{ "best_metric": null, "best_model_checkpoint": null, "epoch": 1.0, "eval_steps": 500, "global_step": 7324, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.00013653741125068268, "grad_norm": 8.987289428710938, "learning_rate": 4.545454545454546e-08, "loss": 1.2943, "step": 1 }, { "epoch": 0.00027307482250136535, "grad_norm": 12.404081344604492, "learning_rate": 9.090909090909091e-08, "loss": 1.623, "step": 2 }, { "epoch": 0.00040961223375204805, "grad_norm": 18.829774856567383, "learning_rate": 1.3636363636363637e-07, "loss": 1.7482, "step": 3 }, { "epoch": 0.0005461496450027307, "grad_norm": 11.324914932250977, "learning_rate": 1.8181818181818183e-07, "loss": 1.5179, "step": 4 }, { "epoch": 0.0006826870562534135, "grad_norm": 13.127245903015137, "learning_rate": 2.2727272727272729e-07, "loss": 1.646, "step": 5 }, { "epoch": 0.0008192244675040961, "grad_norm": 13.20601749420166, "learning_rate": 2.7272727272727274e-07, "loss": 1.6113, "step": 6 }, { "epoch": 0.0009557618787547789, "grad_norm": 16.113187789916992, "learning_rate": 3.181818181818182e-07, "loss": 1.6779, "step": 7 }, { "epoch": 0.0010922992900054614, "grad_norm": 10.614578247070312, "learning_rate": 3.6363636363636366e-07, "loss": 1.5373, "step": 8 }, { "epoch": 0.0012288367012561442, "grad_norm": 11.617382049560547, "learning_rate": 4.090909090909091e-07, "loss": 1.5863, "step": 9 }, { "epoch": 0.001365374112506827, "grad_norm": 17.82931137084961, "learning_rate": 4.5454545454545457e-07, "loss": 1.8616, "step": 10 }, { "epoch": 0.0015019115237575095, "grad_norm": 12.649788856506348, "learning_rate": 5.000000000000001e-07, "loss": 1.7328, "step": 11 }, { "epoch": 0.0016384489350081922, "grad_norm": 35.99782180786133, "learning_rate": 5.454545454545455e-07, "loss": 1.5127, "step": 12 }, { "epoch": 0.001774986346258875, "grad_norm": 13.534668922424316, "learning_rate": 5.90909090909091e-07, "loss": 1.6953, "step": 13 }, { "epoch": 0.0019115237575095577, "grad_norm": 12.925115585327148, "learning_rate": 6.363636363636364e-07, "loss": 1.6669, "step": 14 }, { "epoch": 0.0020480611687602405, "grad_norm": 12.660481452941895, "learning_rate": 6.818181818181818e-07, "loss": 1.5468, "step": 15 }, { "epoch": 0.002184598580010923, "grad_norm": 9.128573417663574, "learning_rate": 7.272727272727273e-07, "loss": 1.3397, "step": 16 }, { "epoch": 0.0023211359912616056, "grad_norm": 9.0270414352417, "learning_rate": 7.727272727272727e-07, "loss": 1.3341, "step": 17 }, { "epoch": 0.0024576734025122883, "grad_norm": 10.299264907836914, "learning_rate": 8.181818181818182e-07, "loss": 1.4737, "step": 18 }, { "epoch": 0.002594210813762971, "grad_norm": 10.913025856018066, "learning_rate": 8.636363636363637e-07, "loss": 1.3287, "step": 19 }, { "epoch": 0.002730748225013654, "grad_norm": 10.57042121887207, "learning_rate": 9.090909090909091e-07, "loss": 1.591, "step": 20 }, { "epoch": 0.0028672856362643366, "grad_norm": 9.5247802734375, "learning_rate": 9.545454545454548e-07, "loss": 1.5423, "step": 21 }, { "epoch": 0.003003823047515019, "grad_norm": 11.81783676147461, "learning_rate": 1.0000000000000002e-06, "loss": 1.7131, "step": 22 }, { "epoch": 0.0031403604587657017, "grad_norm": 27.643835067749023, "learning_rate": 1.0454545454545456e-06, "loss": 1.3972, "step": 23 }, { "epoch": 0.0032768978700163844, "grad_norm": 7.817766189575195, "learning_rate": 1.090909090909091e-06, "loss": 1.3563, "step": 24 }, { "epoch": 0.003413435281267067, "grad_norm": 11.848875999450684, "learning_rate": 1.1363636363636364e-06, "loss": 1.3461, "step": 25 }, { "epoch": 0.00354997269251775, "grad_norm": 8.956430435180664, "learning_rate": 1.181818181818182e-06, "loss": 1.5208, "step": 26 }, { "epoch": 0.0036865101037684327, "grad_norm": 10.038544654846191, "learning_rate": 1.2272727272727274e-06, "loss": 1.4397, "step": 27 }, { "epoch": 0.0038230475150191155, "grad_norm": 8.714576721191406, "learning_rate": 1.2727272727272728e-06, "loss": 1.2262, "step": 28 }, { "epoch": 0.003959584926269798, "grad_norm": 7.772757053375244, "learning_rate": 1.3181818181818182e-06, "loss": 1.2061, "step": 29 }, { "epoch": 0.004096122337520481, "grad_norm": 7.886837005615234, "learning_rate": 1.3636363636363636e-06, "loss": 1.3132, "step": 30 }, { "epoch": 0.004232659748771164, "grad_norm": 8.279170989990234, "learning_rate": 1.409090909090909e-06, "loss": 1.4731, "step": 31 }, { "epoch": 0.004369197160021846, "grad_norm": 8.376132011413574, "learning_rate": 1.4545454545454546e-06, "loss": 1.4162, "step": 32 }, { "epoch": 0.004505734571272528, "grad_norm": 9.420126914978027, "learning_rate": 1.5e-06, "loss": 1.3362, "step": 33 }, { "epoch": 0.004642271982523211, "grad_norm": 8.644148826599121, "learning_rate": 1.5454545454545454e-06, "loss": 1.1535, "step": 34 }, { "epoch": 0.004778809393773894, "grad_norm": 9.827284812927246, "learning_rate": 1.590909090909091e-06, "loss": 1.2638, "step": 35 }, { "epoch": 0.004915346805024577, "grad_norm": 10.540002822875977, "learning_rate": 1.6363636363636365e-06, "loss": 1.4462, "step": 36 }, { "epoch": 0.005051884216275259, "grad_norm": 30.93503761291504, "learning_rate": 1.6818181818181819e-06, "loss": 1.2882, "step": 37 }, { "epoch": 0.005188421627525942, "grad_norm": 8.514425277709961, "learning_rate": 1.7272727272727275e-06, "loss": 1.3616, "step": 38 }, { "epoch": 0.005324959038776625, "grad_norm": 9.465974807739258, "learning_rate": 1.7727272727272729e-06, "loss": 1.4472, "step": 39 }, { "epoch": 0.005461496450027308, "grad_norm": 6.578811168670654, "learning_rate": 1.8181818181818183e-06, "loss": 1.4673, "step": 40 }, { "epoch": 0.00559803386127799, "grad_norm": 8.418177604675293, "learning_rate": 1.863636363636364e-06, "loss": 1.3875, "step": 41 }, { "epoch": 0.005734571272528673, "grad_norm": 8.087608337402344, "learning_rate": 1.9090909090909095e-06, "loss": 1.1515, "step": 42 }, { "epoch": 0.005871108683779356, "grad_norm": 10.338250160217285, "learning_rate": 1.954545454545455e-06, "loss": 1.1104, "step": 43 }, { "epoch": 0.006007646095030038, "grad_norm": 6.382353782653809, "learning_rate": 2.0000000000000003e-06, "loss": 1.1516, "step": 44 }, { "epoch": 0.006144183506280721, "grad_norm": 7.540999889373779, "learning_rate": 2.0454545454545457e-06, "loss": 1.0226, "step": 45 }, { "epoch": 0.006280720917531403, "grad_norm": 8.347698211669922, "learning_rate": 2.090909090909091e-06, "loss": 1.0704, "step": 46 }, { "epoch": 0.006417258328782086, "grad_norm": 7.919347286224365, "learning_rate": 2.1363636363636365e-06, "loss": 1.2001, "step": 47 }, { "epoch": 0.006553795740032769, "grad_norm": 8.03372573852539, "learning_rate": 2.181818181818182e-06, "loss": 1.0667, "step": 48 }, { "epoch": 0.006690333151283452, "grad_norm": 8.294642448425293, "learning_rate": 2.2272727272727274e-06, "loss": 1.2381, "step": 49 }, { "epoch": 0.006826870562534134, "grad_norm": 23.87828826904297, "learning_rate": 2.2727272727272728e-06, "loss": 1.2045, "step": 50 }, { "epoch": 0.006963407973784817, "grad_norm": 26.372629165649414, "learning_rate": 2.318181818181818e-06, "loss": 1.1135, "step": 51 }, { "epoch": 0.0070999453850355, "grad_norm": 10.761754035949707, "learning_rate": 2.363636363636364e-06, "loss": 1.0831, "step": 52 }, { "epoch": 0.007236482796286183, "grad_norm": 9.564151763916016, "learning_rate": 2.4090909090909094e-06, "loss": 1.1165, "step": 53 }, { "epoch": 0.007373020207536865, "grad_norm": 9.100855827331543, "learning_rate": 2.454545454545455e-06, "loss": 1.1095, "step": 54 }, { "epoch": 0.007509557618787548, "grad_norm": 6.06331205368042, "learning_rate": 2.5e-06, "loss": 1.1877, "step": 55 }, { "epoch": 0.007646095030038231, "grad_norm": 6.6048359870910645, "learning_rate": 2.5454545454545456e-06, "loss": 1.2389, "step": 56 }, { "epoch": 0.007782632441288913, "grad_norm": 6.052951335906982, "learning_rate": 2.590909090909091e-06, "loss": 1.1538, "step": 57 }, { "epoch": 0.007919169852539596, "grad_norm": 6.015878200531006, "learning_rate": 2.6363636363636364e-06, "loss": 1.2016, "step": 58 }, { "epoch": 0.008055707263790278, "grad_norm": 94.88129425048828, "learning_rate": 2.6818181818181822e-06, "loss": 1.2186, "step": 59 }, { "epoch": 0.008192244675040962, "grad_norm": 6.332214832305908, "learning_rate": 2.7272727272727272e-06, "loss": 1.0892, "step": 60 }, { "epoch": 0.008328782086291644, "grad_norm": 10.593901634216309, "learning_rate": 2.772727272727273e-06, "loss": 1.2246, "step": 61 }, { "epoch": 0.008465319497542327, "grad_norm": 15.149567604064941, "learning_rate": 2.818181818181818e-06, "loss": 1.0866, "step": 62 }, { "epoch": 0.00860185690879301, "grad_norm": 6.232218265533447, "learning_rate": 2.863636363636364e-06, "loss": 1.238, "step": 63 }, { "epoch": 0.008738394320043691, "grad_norm": 7.861034870147705, "learning_rate": 2.9090909090909093e-06, "loss": 1.1761, "step": 64 }, { "epoch": 0.008874931731294375, "grad_norm": 6.95709753036499, "learning_rate": 2.954545454545455e-06, "loss": 1.2936, "step": 65 }, { "epoch": 0.009011469142545057, "grad_norm": 7.817255020141602, "learning_rate": 3e-06, "loss": 1.079, "step": 66 }, { "epoch": 0.00914800655379574, "grad_norm": 9.865900993347168, "learning_rate": 3.045454545454546e-06, "loss": 1.0475, "step": 67 }, { "epoch": 0.009284543965046422, "grad_norm": 6.828991413116455, "learning_rate": 3.090909090909091e-06, "loss": 1.2792, "step": 68 }, { "epoch": 0.009421081376297106, "grad_norm": 7.336574077606201, "learning_rate": 3.1363636363636367e-06, "loss": 1.1822, "step": 69 }, { "epoch": 0.009557618787547788, "grad_norm": 7.718461036682129, "learning_rate": 3.181818181818182e-06, "loss": 1.1597, "step": 70 }, { "epoch": 0.009694156198798471, "grad_norm": 6.948351860046387, "learning_rate": 3.227272727272728e-06, "loss": 1.0391, "step": 71 }, { "epoch": 0.009830693610049153, "grad_norm": 9.936028480529785, "learning_rate": 3.272727272727273e-06, "loss": 1.0793, "step": 72 }, { "epoch": 0.009967231021299837, "grad_norm": 7.504670143127441, "learning_rate": 3.3181818181818188e-06, "loss": 1.1579, "step": 73 }, { "epoch": 0.010103768432550519, "grad_norm": 6.294007301330566, "learning_rate": 3.3636363636363637e-06, "loss": 1.2029, "step": 74 }, { "epoch": 0.0102403058438012, "grad_norm": 8.15434741973877, "learning_rate": 3.409090909090909e-06, "loss": 1.0619, "step": 75 }, { "epoch": 0.010376843255051884, "grad_norm": 7.9301371574401855, "learning_rate": 3.454545454545455e-06, "loss": 1.2067, "step": 76 }, { "epoch": 0.010513380666302566, "grad_norm": 8.867652893066406, "learning_rate": 3.5e-06, "loss": 1.0848, "step": 77 }, { "epoch": 0.01064991807755325, "grad_norm": 6.980035305023193, "learning_rate": 3.5454545454545458e-06, "loss": 1.0161, "step": 78 }, { "epoch": 0.010786455488803932, "grad_norm": 8.527592658996582, "learning_rate": 3.590909090909091e-06, "loss": 1.0761, "step": 79 }, { "epoch": 0.010922992900054615, "grad_norm": 7.7525482177734375, "learning_rate": 3.6363636363636366e-06, "loss": 1.1013, "step": 80 }, { "epoch": 0.011059530311305297, "grad_norm": 6.853456974029541, "learning_rate": 3.681818181818182e-06, "loss": 1.0758, "step": 81 }, { "epoch": 0.01119606772255598, "grad_norm": 7.442591190338135, "learning_rate": 3.727272727272728e-06, "loss": 1.0745, "step": 82 }, { "epoch": 0.011332605133806663, "grad_norm": 5.621485710144043, "learning_rate": 3.772727272727273e-06, "loss": 1.0057, "step": 83 }, { "epoch": 0.011469142545057346, "grad_norm": 15.377763748168945, "learning_rate": 3.818181818181819e-06, "loss": 0.9859, "step": 84 }, { "epoch": 0.011605679956308028, "grad_norm": 11.054804801940918, "learning_rate": 3.863636363636364e-06, "loss": 1.0032, "step": 85 }, { "epoch": 0.011742217367558712, "grad_norm": 7.605411052703857, "learning_rate": 3.90909090909091e-06, "loss": 1.0557, "step": 86 }, { "epoch": 0.011878754778809394, "grad_norm": 12.800761222839355, "learning_rate": 3.954545454545454e-06, "loss": 1.1054, "step": 87 }, { "epoch": 0.012015292190060076, "grad_norm": 7.331326961517334, "learning_rate": 4.000000000000001e-06, "loss": 1.1093, "step": 88 }, { "epoch": 0.01215182960131076, "grad_norm": 5.327223300933838, "learning_rate": 4.045454545454546e-06, "loss": 1.0609, "step": 89 }, { "epoch": 0.012288367012561441, "grad_norm": 7.839710712432861, "learning_rate": 4.0909090909090915e-06, "loss": 1.0026, "step": 90 }, { "epoch": 0.012424904423812125, "grad_norm": 10.156535148620605, "learning_rate": 4.136363636363637e-06, "loss": 1.0987, "step": 91 }, { "epoch": 0.012561441835062807, "grad_norm": 6.819254398345947, "learning_rate": 4.181818181818182e-06, "loss": 1.0535, "step": 92 }, { "epoch": 0.01269797924631349, "grad_norm": 10.112334251403809, "learning_rate": 4.227272727272728e-06, "loss": 1.0525, "step": 93 }, { "epoch": 0.012834516657564172, "grad_norm": 10.082497596740723, "learning_rate": 4.272727272727273e-06, "loss": 1.1115, "step": 94 }, { "epoch": 0.012971054068814856, "grad_norm": 7.775457859039307, "learning_rate": 4.3181818181818185e-06, "loss": 0.9831, "step": 95 }, { "epoch": 0.013107591480065538, "grad_norm": 6.974147796630859, "learning_rate": 4.363636363636364e-06, "loss": 1.0418, "step": 96 }, { "epoch": 0.013244128891316221, "grad_norm": 7.971465110778809, "learning_rate": 4.409090909090909e-06, "loss": 1.0143, "step": 97 }, { "epoch": 0.013380666302566903, "grad_norm": 9.213125228881836, "learning_rate": 4.454545454545455e-06, "loss": 1.1022, "step": 98 }, { "epoch": 0.013517203713817587, "grad_norm": 14.815136909484863, "learning_rate": 4.5e-06, "loss": 1.098, "step": 99 }, { "epoch": 0.013653741125068269, "grad_norm": 5.615110874176025, "learning_rate": 4.5454545454545455e-06, "loss": 0.9458, "step": 100 }, { "epoch": 0.01379027853631895, "grad_norm": 11.310483932495117, "learning_rate": 4.590909090909092e-06, "loss": 0.9679, "step": 101 }, { "epoch": 0.013926815947569634, "grad_norm": 12.356385231018066, "learning_rate": 4.636363636363636e-06, "loss": 1.1086, "step": 102 }, { "epoch": 0.014063353358820316, "grad_norm": 15.9056396484375, "learning_rate": 4.681818181818183e-06, "loss": 0.9473, "step": 103 }, { "epoch": 0.014199890770071, "grad_norm": 9.187207221984863, "learning_rate": 4.727272727272728e-06, "loss": 1.13, "step": 104 }, { "epoch": 0.014336428181321682, "grad_norm": 11.438641548156738, "learning_rate": 4.772727272727273e-06, "loss": 1.1554, "step": 105 }, { "epoch": 0.014472965592572365, "grad_norm": 12.720915794372559, "learning_rate": 4.818181818181819e-06, "loss": 1.1412, "step": 106 }, { "epoch": 0.014609503003823047, "grad_norm": 8.082242012023926, "learning_rate": 4.863636363636364e-06, "loss": 1.0316, "step": 107 }, { "epoch": 0.01474604041507373, "grad_norm": 16.171316146850586, "learning_rate": 4.90909090909091e-06, "loss": 1.2446, "step": 108 }, { "epoch": 0.014882577826324413, "grad_norm": 10.801708221435547, "learning_rate": 4.954545454545455e-06, "loss": 1.1467, "step": 109 }, { "epoch": 0.015019115237575096, "grad_norm": 14.372072219848633, "learning_rate": 5e-06, "loss": 1.1326, "step": 110 }, { "epoch": 0.015155652648825778, "grad_norm": 8.159862518310547, "learning_rate": 5.045454545454546e-06, "loss": 0.99, "step": 111 }, { "epoch": 0.015292190060076462, "grad_norm": 6.170370101928711, "learning_rate": 5.090909090909091e-06, "loss": 0.9188, "step": 112 }, { "epoch": 0.015428727471327144, "grad_norm": 7.113578796386719, "learning_rate": 5.1363636363636375e-06, "loss": 0.9717, "step": 113 }, { "epoch": 0.015565264882577826, "grad_norm": 8.31975269317627, "learning_rate": 5.181818181818182e-06, "loss": 1.0749, "step": 114 }, { "epoch": 0.015701802293828507, "grad_norm": 12.390118598937988, "learning_rate": 5.2272727272727274e-06, "loss": 1.1905, "step": 115 }, { "epoch": 0.015838339705079193, "grad_norm": 7.244166851043701, "learning_rate": 5.272727272727273e-06, "loss": 1.1333, "step": 116 }, { "epoch": 0.015974877116329875, "grad_norm": 7.368595123291016, "learning_rate": 5.318181818181819e-06, "loss": 1.0633, "step": 117 }, { "epoch": 0.016111414527580557, "grad_norm": 12.796453475952148, "learning_rate": 5.3636363636363645e-06, "loss": 0.9804, "step": 118 }, { "epoch": 0.01624795193883124, "grad_norm": 8.9561185836792, "learning_rate": 5.409090909090909e-06, "loss": 1.0742, "step": 119 }, { "epoch": 0.016384489350081924, "grad_norm": 8.112398147583008, "learning_rate": 5.4545454545454545e-06, "loss": 1.1728, "step": 120 }, { "epoch": 0.016521026761332606, "grad_norm": 7.4385199546813965, "learning_rate": 5.500000000000001e-06, "loss": 1.1846, "step": 121 }, { "epoch": 0.016657564172583288, "grad_norm": 5.863475322723389, "learning_rate": 5.545454545454546e-06, "loss": 0.9351, "step": 122 }, { "epoch": 0.01679410158383397, "grad_norm": 8.732736587524414, "learning_rate": 5.5909090909090915e-06, "loss": 1.1295, "step": 123 }, { "epoch": 0.016930638995084655, "grad_norm": 9.324421882629395, "learning_rate": 5.636363636363636e-06, "loss": 1.051, "step": 124 }, { "epoch": 0.017067176406335337, "grad_norm": 9.045470237731934, "learning_rate": 5.681818181818183e-06, "loss": 1.1395, "step": 125 }, { "epoch": 0.01720371381758602, "grad_norm": 8.594548225402832, "learning_rate": 5.727272727272728e-06, "loss": 1.0948, "step": 126 }, { "epoch": 0.0173402512288367, "grad_norm": 11.689970016479492, "learning_rate": 5.772727272727273e-06, "loss": 1.2664, "step": 127 }, { "epoch": 0.017476788640087382, "grad_norm": 41.821998596191406, "learning_rate": 5.8181818181818185e-06, "loss": 1.0831, "step": 128 }, { "epoch": 0.017613326051338068, "grad_norm": 7.111389636993408, "learning_rate": 5.863636363636364e-06, "loss": 0.9916, "step": 129 }, { "epoch": 0.01774986346258875, "grad_norm": 14.742327690124512, "learning_rate": 5.90909090909091e-06, "loss": 1.1049, "step": 130 }, { "epoch": 0.01788640087383943, "grad_norm": 11.249462127685547, "learning_rate": 5.954545454545455e-06, "loss": 1.0347, "step": 131 }, { "epoch": 0.018022938285090113, "grad_norm": 8.444786071777344, "learning_rate": 6e-06, "loss": 1.1489, "step": 132 }, { "epoch": 0.0181594756963408, "grad_norm": 10.367302894592285, "learning_rate": 6.0454545454545456e-06, "loss": 0.9631, "step": 133 }, { "epoch": 0.01829601310759148, "grad_norm": 6.829505443572998, "learning_rate": 6.090909090909092e-06, "loss": 1.1105, "step": 134 }, { "epoch": 0.018432550518842163, "grad_norm": 9.45673656463623, "learning_rate": 6.136363636363637e-06, "loss": 0.967, "step": 135 }, { "epoch": 0.018569087930092845, "grad_norm": 7.11916446685791, "learning_rate": 6.181818181818182e-06, "loss": 0.9997, "step": 136 }, { "epoch": 0.01870562534134353, "grad_norm": 11.46143913269043, "learning_rate": 6.227272727272727e-06, "loss": 1.03, "step": 137 }, { "epoch": 0.018842162752594212, "grad_norm": 12.575772285461426, "learning_rate": 6.2727272727272734e-06, "loss": 1.0257, "step": 138 }, { "epoch": 0.018978700163844894, "grad_norm": 6.463825702667236, "learning_rate": 6.318181818181819e-06, "loss": 1.0473, "step": 139 }, { "epoch": 0.019115237575095576, "grad_norm": 7.806009769439697, "learning_rate": 6.363636363636364e-06, "loss": 0.9384, "step": 140 }, { "epoch": 0.019251774986346257, "grad_norm": 9.235309600830078, "learning_rate": 6.40909090909091e-06, "loss": 1.0836, "step": 141 }, { "epoch": 0.019388312397596943, "grad_norm": 7.583468437194824, "learning_rate": 6.454545454545456e-06, "loss": 0.9095, "step": 142 }, { "epoch": 0.019524849808847625, "grad_norm": 10.190034866333008, "learning_rate": 6.5000000000000004e-06, "loss": 1.002, "step": 143 }, { "epoch": 0.019661387220098307, "grad_norm": 10.040583610534668, "learning_rate": 6.545454545454546e-06, "loss": 1.1018, "step": 144 }, { "epoch": 0.01979792463134899, "grad_norm": 8.368958473205566, "learning_rate": 6.590909090909091e-06, "loss": 1.1982, "step": 145 }, { "epoch": 0.019934462042599674, "grad_norm": 6.8480048179626465, "learning_rate": 6.6363636363636375e-06, "loss": 1.0452, "step": 146 }, { "epoch": 0.020070999453850356, "grad_norm": 7.463661193847656, "learning_rate": 6.681818181818183e-06, "loss": 1.0742, "step": 147 }, { "epoch": 0.020207536865101038, "grad_norm": 8.023838996887207, "learning_rate": 6.7272727272727275e-06, "loss": 0.9931, "step": 148 }, { "epoch": 0.02034407427635172, "grad_norm": 14.87636947631836, "learning_rate": 6.772727272727273e-06, "loss": 0.9434, "step": 149 }, { "epoch": 0.0204806116876024, "grad_norm": 6.111929893493652, "learning_rate": 6.818181818181818e-06, "loss": 1.1309, "step": 150 }, { "epoch": 0.020617149098853087, "grad_norm": 7.559609889984131, "learning_rate": 6.8636363636363645e-06, "loss": 1.1599, "step": 151 }, { "epoch": 0.02075368651010377, "grad_norm": 57.313880920410156, "learning_rate": 6.90909090909091e-06, "loss": 0.8713, "step": 152 }, { "epoch": 0.02089022392135445, "grad_norm": 5.911097049713135, "learning_rate": 6.954545454545455e-06, "loss": 1.0848, "step": 153 }, { "epoch": 0.021026761332605132, "grad_norm": 7.213879108428955, "learning_rate": 7e-06, "loss": 1.0956, "step": 154 }, { "epoch": 0.021163298743855818, "grad_norm": 6.770841121673584, "learning_rate": 7.045454545454546e-06, "loss": 1.0196, "step": 155 }, { "epoch": 0.0212998361551065, "grad_norm": 10.876555442810059, "learning_rate": 7.0909090909090916e-06, "loss": 1.2797, "step": 156 }, { "epoch": 0.02143637356635718, "grad_norm": 9.169231414794922, "learning_rate": 7.136363636363637e-06, "loss": 1.0533, "step": 157 }, { "epoch": 0.021572910977607863, "grad_norm": 5.8935418128967285, "learning_rate": 7.181818181818182e-06, "loss": 0.9682, "step": 158 }, { "epoch": 0.02170944838885855, "grad_norm": 6.920186519622803, "learning_rate": 7.227272727272729e-06, "loss": 0.914, "step": 159 }, { "epoch": 0.02184598580010923, "grad_norm": 10.671875953674316, "learning_rate": 7.272727272727273e-06, "loss": 1.1501, "step": 160 }, { "epoch": 0.021982523211359913, "grad_norm": 5.621334552764893, "learning_rate": 7.3181818181818186e-06, "loss": 1.2339, "step": 161 }, { "epoch": 0.022119060622610594, "grad_norm": 13.285127639770508, "learning_rate": 7.363636363636364e-06, "loss": 1.1823, "step": 162 }, { "epoch": 0.022255598033861276, "grad_norm": 5.429306507110596, "learning_rate": 7.40909090909091e-06, "loss": 1.1293, "step": 163 }, { "epoch": 0.02239213544511196, "grad_norm": 6.6834492683410645, "learning_rate": 7.454545454545456e-06, "loss": 1.0352, "step": 164 }, { "epoch": 0.022528672856362644, "grad_norm": 10.923229217529297, "learning_rate": 7.500000000000001e-06, "loss": 1.0545, "step": 165 }, { "epoch": 0.022665210267613325, "grad_norm": 7.378855228424072, "learning_rate": 7.545454545454546e-06, "loss": 1.046, "step": 166 }, { "epoch": 0.022801747678864007, "grad_norm": 6.996416091918945, "learning_rate": 7.590909090909091e-06, "loss": 1.2398, "step": 167 }, { "epoch": 0.022938285090114693, "grad_norm": 5.84944486618042, "learning_rate": 7.636363636363638e-06, "loss": 1.0335, "step": 168 }, { "epoch": 0.023074822501365375, "grad_norm": 6.002893447875977, "learning_rate": 7.681818181818183e-06, "loss": 0.9936, "step": 169 }, { "epoch": 0.023211359912616056, "grad_norm": 6.06666898727417, "learning_rate": 7.727272727272727e-06, "loss": 0.9115, "step": 170 }, { "epoch": 0.02334789732386674, "grad_norm": 6.041037559509277, "learning_rate": 7.772727272727273e-06, "loss": 0.8468, "step": 171 }, { "epoch": 0.023484434735117424, "grad_norm": 7.37949800491333, "learning_rate": 7.81818181818182e-06, "loss": 0.9961, "step": 172 }, { "epoch": 0.023620972146368106, "grad_norm": 7.2195725440979, "learning_rate": 7.863636363636364e-06, "loss": 1.1353, "step": 173 }, { "epoch": 0.023757509557618788, "grad_norm": 5.882667064666748, "learning_rate": 7.909090909090909e-06, "loss": 0.9373, "step": 174 }, { "epoch": 0.02389404696886947, "grad_norm": 5.8069562911987305, "learning_rate": 7.954545454545455e-06, "loss": 0.9156, "step": 175 }, { "epoch": 0.02403058438012015, "grad_norm": 5.936809062957764, "learning_rate": 8.000000000000001e-06, "loss": 1.1687, "step": 176 }, { "epoch": 0.024167121791370837, "grad_norm": 5.9012675285339355, "learning_rate": 8.045454545454546e-06, "loss": 0.9718, "step": 177 }, { "epoch": 0.02430365920262152, "grad_norm": 5.836941242218018, "learning_rate": 8.090909090909092e-06, "loss": 0.9494, "step": 178 }, { "epoch": 0.0244401966138722, "grad_norm": 7.625082492828369, "learning_rate": 8.136363636363637e-06, "loss": 1.0671, "step": 179 }, { "epoch": 0.024576734025122882, "grad_norm": 6.572492599487305, "learning_rate": 8.181818181818183e-06, "loss": 0.9625, "step": 180 }, { "epoch": 0.024713271436373568, "grad_norm": 7.328122615814209, "learning_rate": 8.227272727272728e-06, "loss": 1.0266, "step": 181 }, { "epoch": 0.02484980884762425, "grad_norm": 6.2686767578125, "learning_rate": 8.272727272727274e-06, "loss": 1.0621, "step": 182 }, { "epoch": 0.02498634625887493, "grad_norm": 6.541305065155029, "learning_rate": 8.318181818181818e-06, "loss": 0.9842, "step": 183 }, { "epoch": 0.025122883670125613, "grad_norm": 6.871057510375977, "learning_rate": 8.363636363636365e-06, "loss": 1.1253, "step": 184 }, { "epoch": 0.0252594210813763, "grad_norm": 18.802967071533203, "learning_rate": 8.40909090909091e-06, "loss": 1.0306, "step": 185 }, { "epoch": 0.02539595849262698, "grad_norm": 6.875433921813965, "learning_rate": 8.454545454545455e-06, "loss": 1.0528, "step": 186 }, { "epoch": 0.025532495903877662, "grad_norm": 6.5471577644348145, "learning_rate": 8.5e-06, "loss": 0.8524, "step": 187 }, { "epoch": 0.025669033315128344, "grad_norm": 10.32519817352295, "learning_rate": 8.545454545454546e-06, "loss": 1.0136, "step": 188 }, { "epoch": 0.025805570726379026, "grad_norm": 7.083716869354248, "learning_rate": 8.590909090909092e-06, "loss": 1.1286, "step": 189 }, { "epoch": 0.02594210813762971, "grad_norm": 5.794723987579346, "learning_rate": 8.636363636363637e-06, "loss": 0.9652, "step": 190 }, { "epoch": 0.026078645548880394, "grad_norm": 5.845549583435059, "learning_rate": 8.681818181818182e-06, "loss": 1.0334, "step": 191 }, { "epoch": 0.026215182960131075, "grad_norm": 6.903234004974365, "learning_rate": 8.727272727272728e-06, "loss": 1.2033, "step": 192 }, { "epoch": 0.026351720371381757, "grad_norm": 7.957429885864258, "learning_rate": 8.772727272727274e-06, "loss": 1.1966, "step": 193 }, { "epoch": 0.026488257782632443, "grad_norm": 10.86413288116455, "learning_rate": 8.818181818181819e-06, "loss": 1.0177, "step": 194 }, { "epoch": 0.026624795193883125, "grad_norm": 6.8351149559021, "learning_rate": 8.863636363636365e-06, "loss": 0.9698, "step": 195 }, { "epoch": 0.026761332605133806, "grad_norm": 7.291882038116455, "learning_rate": 8.90909090909091e-06, "loss": 1.0181, "step": 196 }, { "epoch": 0.02689787001638449, "grad_norm": 6.478904724121094, "learning_rate": 8.954545454545456e-06, "loss": 1.1291, "step": 197 }, { "epoch": 0.027034407427635174, "grad_norm": 9.656041145324707, "learning_rate": 9e-06, "loss": 1.1816, "step": 198 }, { "epoch": 0.027170944838885856, "grad_norm": 6.0336408615112305, "learning_rate": 9.045454545454546e-06, "loss": 1.0047, "step": 199 }, { "epoch": 0.027307482250136537, "grad_norm": 6.436997413635254, "learning_rate": 9.090909090909091e-06, "loss": 1.0983, "step": 200 }, { "epoch": 0.02744401966138722, "grad_norm": 7.121159553527832, "learning_rate": 9.136363636363637e-06, "loss": 1.0684, "step": 201 }, { "epoch": 0.0275805570726379, "grad_norm": 5.096314430236816, "learning_rate": 9.181818181818184e-06, "loss": 0.8563, "step": 202 }, { "epoch": 0.027717094483888587, "grad_norm": 6.306644916534424, "learning_rate": 9.227272727272728e-06, "loss": 1.0449, "step": 203 }, { "epoch": 0.02785363189513927, "grad_norm": 6.955361366271973, "learning_rate": 9.272727272727273e-06, "loss": 0.9943, "step": 204 }, { "epoch": 0.02799016930638995, "grad_norm": 7.242427825927734, "learning_rate": 9.318181818181819e-06, "loss": 1.1825, "step": 205 }, { "epoch": 0.028126706717640632, "grad_norm": 8.421954154968262, "learning_rate": 9.363636363636365e-06, "loss": 1.0741, "step": 206 }, { "epoch": 0.028263244128891318, "grad_norm": 6.2889404296875, "learning_rate": 9.40909090909091e-06, "loss": 0.9862, "step": 207 }, { "epoch": 0.028399781540142, "grad_norm": 8.217613220214844, "learning_rate": 9.454545454545456e-06, "loss": 1.1225, "step": 208 }, { "epoch": 0.02853631895139268, "grad_norm": 6.89904260635376, "learning_rate": 9.5e-06, "loss": 0.9021, "step": 209 }, { "epoch": 0.028672856362643363, "grad_norm": 7.738643646240234, "learning_rate": 9.545454545454547e-06, "loss": 1.1158, "step": 210 }, { "epoch": 0.02880939377389405, "grad_norm": 8.02056884765625, "learning_rate": 9.590909090909091e-06, "loss": 1.0659, "step": 211 }, { "epoch": 0.02894593118514473, "grad_norm": 6.053958892822266, "learning_rate": 9.636363636363638e-06, "loss": 1.0904, "step": 212 }, { "epoch": 0.029082468596395412, "grad_norm": 5.873446941375732, "learning_rate": 9.681818181818182e-06, "loss": 1.0552, "step": 213 }, { "epoch": 0.029219006007646094, "grad_norm": 6.664444446563721, "learning_rate": 9.727272727272728e-06, "loss": 1.017, "step": 214 }, { "epoch": 0.029355543418896776, "grad_norm": 6.438930511474609, "learning_rate": 9.772727272727273e-06, "loss": 1.0967, "step": 215 }, { "epoch": 0.02949208083014746, "grad_norm": 6.546825885772705, "learning_rate": 9.81818181818182e-06, "loss": 1.1913, "step": 216 }, { "epoch": 0.029628618241398143, "grad_norm": 6.002410888671875, "learning_rate": 9.863636363636364e-06, "loss": 1.0792, "step": 217 }, { "epoch": 0.029765155652648825, "grad_norm": 14.918628692626953, "learning_rate": 9.90909090909091e-06, "loss": 1.0817, "step": 218 }, { "epoch": 0.029901693063899507, "grad_norm": 8.899186134338379, "learning_rate": 9.954545454545456e-06, "loss": 1.0104, "step": 219 }, { "epoch": 0.030038230475150193, "grad_norm": 7.404455184936523, "learning_rate": 1e-05, "loss": 1.0886, "step": 220 }, { "epoch": 0.030174767886400874, "grad_norm": 10.601614952087402, "learning_rate": 9.99999951108446e-06, "loss": 0.8562, "step": 221 }, { "epoch": 0.030311305297651556, "grad_norm": 6.781443119049072, "learning_rate": 9.999998044337937e-06, "loss": 1.112, "step": 222 }, { "epoch": 0.030447842708902238, "grad_norm": 6.163244724273682, "learning_rate": 9.999995599760715e-06, "loss": 1.0149, "step": 223 }, { "epoch": 0.030584380120152924, "grad_norm": 8.446272850036621, "learning_rate": 9.999992177353272e-06, "loss": 1.1065, "step": 224 }, { "epoch": 0.030720917531403606, "grad_norm": 10.697783470153809, "learning_rate": 9.99998777711628e-06, "loss": 1.0663, "step": 225 }, { "epoch": 0.030857454942654287, "grad_norm": 5.906458854675293, "learning_rate": 9.999982399050598e-06, "loss": 1.1416, "step": 226 }, { "epoch": 0.03099399235390497, "grad_norm": 11.393940925598145, "learning_rate": 9.999976043157277e-06, "loss": 0.9565, "step": 227 }, { "epoch": 0.03113052976515565, "grad_norm": 6.089052677154541, "learning_rate": 9.999968709437563e-06, "loss": 0.9726, "step": 228 }, { "epoch": 0.03126706717640634, "grad_norm": 6.497355937957764, "learning_rate": 9.999960397892886e-06, "loss": 1.0547, "step": 229 }, { "epoch": 0.031403604587657015, "grad_norm": 6.09201717376709, "learning_rate": 9.999951108524875e-06, "loss": 1.0195, "step": 230 }, { "epoch": 0.0315401419989077, "grad_norm": 7.273472785949707, "learning_rate": 9.999940841335346e-06, "loss": 1.0395, "step": 231 }, { "epoch": 0.031676679410158386, "grad_norm": 7.070492744445801, "learning_rate": 9.999929596326306e-06, "loss": 1.0887, "step": 232 }, { "epoch": 0.031813216821409064, "grad_norm": 7.052062511444092, "learning_rate": 9.999917373499953e-06, "loss": 1.0024, "step": 233 }, { "epoch": 0.03194975423265975, "grad_norm": 8.064093589782715, "learning_rate": 9.99990417285868e-06, "loss": 1.0361, "step": 234 }, { "epoch": 0.032086291643910435, "grad_norm": 6.21509313583374, "learning_rate": 9.999889994405068e-06, "loss": 1.0186, "step": 235 }, { "epoch": 0.03222282905516111, "grad_norm": 16.73869514465332, "learning_rate": 9.999874838141888e-06, "loss": 1.0112, "step": 236 }, { "epoch": 0.0323593664664118, "grad_norm": 7.248203754425049, "learning_rate": 9.999858704072104e-06, "loss": 0.9663, "step": 237 }, { "epoch": 0.03249590387766248, "grad_norm": 6.538657188415527, "learning_rate": 9.999841592198876e-06, "loss": 1.0491, "step": 238 }, { "epoch": 0.03263244128891316, "grad_norm": 6.265979766845703, "learning_rate": 9.999823502525545e-06, "loss": 0.9472, "step": 239 }, { "epoch": 0.03276897870016385, "grad_norm": 22.59429931640625, "learning_rate": 9.99980443505565e-06, "loss": 1.1628, "step": 240 }, { "epoch": 0.032905516111414526, "grad_norm": 8.019349098205566, "learning_rate": 9.999784389792924e-06, "loss": 1.176, "step": 241 }, { "epoch": 0.03304205352266521, "grad_norm": 8.775652885437012, "learning_rate": 9.99976336674128e-06, "loss": 0.9988, "step": 242 }, { "epoch": 0.03317859093391589, "grad_norm": 8.94062328338623, "learning_rate": 9.999741365904836e-06, "loss": 1.11, "step": 243 }, { "epoch": 0.033315128345166575, "grad_norm": 18.557416915893555, "learning_rate": 9.999718387287891e-06, "loss": 0.9988, "step": 244 }, { "epoch": 0.03345166575641726, "grad_norm": 7.798446178436279, "learning_rate": 9.999694430894939e-06, "loss": 1.1685, "step": 245 }, { "epoch": 0.03358820316766794, "grad_norm": 8.114873886108398, "learning_rate": 9.999669496730666e-06, "loss": 1.0043, "step": 246 }, { "epoch": 0.033724740578918624, "grad_norm": 8.795559883117676, "learning_rate": 9.99964358479995e-06, "loss": 1.0318, "step": 247 }, { "epoch": 0.03386127799016931, "grad_norm": 6.790772914886475, "learning_rate": 9.999616695107854e-06, "loss": 0.9434, "step": 248 }, { "epoch": 0.03399781540141999, "grad_norm": 6.535905838012695, "learning_rate": 9.99958882765964e-06, "loss": 0.8784, "step": 249 }, { "epoch": 0.034134352812670674, "grad_norm": 6.839535236358643, "learning_rate": 9.99955998246076e-06, "loss": 0.973, "step": 250 }, { "epoch": 0.03427089022392135, "grad_norm": 21.397554397583008, "learning_rate": 9.99953015951685e-06, "loss": 0.9551, "step": 251 }, { "epoch": 0.03440742763517204, "grad_norm": 6.476192474365234, "learning_rate": 9.999499358833745e-06, "loss": 1.0946, "step": 252 }, { "epoch": 0.03454396504642272, "grad_norm": 8.474649429321289, "learning_rate": 9.999467580417468e-06, "loss": 1.1078, "step": 253 }, { "epoch": 0.0346805024576734, "grad_norm": 21.63345718383789, "learning_rate": 9.999434824274235e-06, "loss": 1.0315, "step": 254 }, { "epoch": 0.034817039868924086, "grad_norm": 7.777858734130859, "learning_rate": 9.99940109041045e-06, "loss": 1.0178, "step": 255 }, { "epoch": 0.034953577280174765, "grad_norm": 8.227347373962402, "learning_rate": 9.999366378832714e-06, "loss": 0.9862, "step": 256 }, { "epoch": 0.03509011469142545, "grad_norm": 6.497341632843018, "learning_rate": 9.999330689547811e-06, "loss": 1.0353, "step": 257 }, { "epoch": 0.035226652102676136, "grad_norm": 7.82156229019165, "learning_rate": 9.999294022562721e-06, "loss": 1.2232, "step": 258 }, { "epoch": 0.035363189513926814, "grad_norm": 20.265504837036133, "learning_rate": 9.999256377884617e-06, "loss": 1.0388, "step": 259 }, { "epoch": 0.0354997269251775, "grad_norm": 8.066078186035156, "learning_rate": 9.99921775552086e-06, "loss": 0.9382, "step": 260 }, { "epoch": 0.035636264336428185, "grad_norm": 8.30470085144043, "learning_rate": 9.999178155479005e-06, "loss": 1.0885, "step": 261 }, { "epoch": 0.03577280174767886, "grad_norm": 7.538148403167725, "learning_rate": 9.999137577766793e-06, "loss": 1.0178, "step": 262 }, { "epoch": 0.03590933915892955, "grad_norm": 8.515132904052734, "learning_rate": 9.999096022392163e-06, "loss": 0.9988, "step": 263 }, { "epoch": 0.03604587657018023, "grad_norm": 10.6077241897583, "learning_rate": 9.99905348936324e-06, "loss": 1.0799, "step": 264 }, { "epoch": 0.03618241398143091, "grad_norm": 8.776172637939453, "learning_rate": 9.99900997868834e-06, "loss": 1.0198, "step": 265 }, { "epoch": 0.0363189513926816, "grad_norm": 31.528493881225586, "learning_rate": 9.998965490375979e-06, "loss": 1.0564, "step": 266 }, { "epoch": 0.036455488803932276, "grad_norm": 7.447332382202148, "learning_rate": 9.99892002443485e-06, "loss": 1.0163, "step": 267 }, { "epoch": 0.03659202621518296, "grad_norm": 7.083631992340088, "learning_rate": 9.998873580873848e-06, "loss": 1.2921, "step": 268 }, { "epoch": 0.03672856362643364, "grad_norm": 11.015118598937988, "learning_rate": 9.998826159702056e-06, "loss": 1.1479, "step": 269 }, { "epoch": 0.036865101037684325, "grad_norm": 6.183999538421631, "learning_rate": 9.998777760928747e-06, "loss": 0.9699, "step": 270 }, { "epoch": 0.03700163844893501, "grad_norm": 6.92440938949585, "learning_rate": 9.998728384563385e-06, "loss": 1.1267, "step": 271 }, { "epoch": 0.03713817586018569, "grad_norm": 5.186967372894287, "learning_rate": 9.99867803061563e-06, "loss": 0.9851, "step": 272 }, { "epoch": 0.037274713271436374, "grad_norm": 6.318822860717773, "learning_rate": 9.998626699095327e-06, "loss": 1.0962, "step": 273 }, { "epoch": 0.03741125068268706, "grad_norm": 7.323174953460693, "learning_rate": 9.998574390012514e-06, "loss": 1.1501, "step": 274 }, { "epoch": 0.03754778809393774, "grad_norm": 7.878437519073486, "learning_rate": 9.998521103377424e-06, "loss": 1.0624, "step": 275 }, { "epoch": 0.037684325505188423, "grad_norm": 6.398550987243652, "learning_rate": 9.998466839200474e-06, "loss": 1.1322, "step": 276 }, { "epoch": 0.0378208629164391, "grad_norm": 7.3576765060424805, "learning_rate": 9.99841159749228e-06, "loss": 1.1515, "step": 277 }, { "epoch": 0.03795740032768979, "grad_norm": 5.841732025146484, "learning_rate": 9.998355378263643e-06, "loss": 1.0612, "step": 278 }, { "epoch": 0.03809393773894047, "grad_norm": 7.271647930145264, "learning_rate": 9.998298181525556e-06, "loss": 1.1448, "step": 279 }, { "epoch": 0.03823047515019115, "grad_norm": 11.09030532836914, "learning_rate": 9.998240007289211e-06, "loss": 1.0503, "step": 280 }, { "epoch": 0.038367012561441836, "grad_norm": 9.045515060424805, "learning_rate": 9.998180855565978e-06, "loss": 1.1866, "step": 281 }, { "epoch": 0.038503549972692515, "grad_norm": 32.260257720947266, "learning_rate": 9.99812072636743e-06, "loss": 1.0178, "step": 282 }, { "epoch": 0.0386400873839432, "grad_norm": 27.870521545410156, "learning_rate": 9.998059619705323e-06, "loss": 1.1091, "step": 283 }, { "epoch": 0.038776624795193886, "grad_norm": 49.21774673461914, "learning_rate": 9.99799753559161e-06, "loss": 1.2064, "step": 284 }, { "epoch": 0.038913162206444564, "grad_norm": 6.680756092071533, "learning_rate": 9.997934474038429e-06, "loss": 0.997, "step": 285 }, { "epoch": 0.03904969961769525, "grad_norm": 6.869826793670654, "learning_rate": 9.997870435058117e-06, "loss": 1.1964, "step": 286 }, { "epoch": 0.03918623702894593, "grad_norm": 7.524827480316162, "learning_rate": 9.997805418663195e-06, "loss": 1.1173, "step": 287 }, { "epoch": 0.03932277444019661, "grad_norm": 5.13725471496582, "learning_rate": 9.997739424866379e-06, "loss": 1.0149, "step": 288 }, { "epoch": 0.0394593118514473, "grad_norm": 11.211112976074219, "learning_rate": 9.997672453680575e-06, "loss": 1.1824, "step": 289 }, { "epoch": 0.03959584926269798, "grad_norm": 7.798264980316162, "learning_rate": 9.997604505118879e-06, "loss": 1.04, "step": 290 }, { "epoch": 0.03973238667394866, "grad_norm": 17.2856502532959, "learning_rate": 9.997535579194583e-06, "loss": 1.0818, "step": 291 }, { "epoch": 0.03986892408519935, "grad_norm": 6.234739780426025, "learning_rate": 9.997465675921163e-06, "loss": 1.0266, "step": 292 }, { "epoch": 0.040005461496450026, "grad_norm": 10.67748737335205, "learning_rate": 9.997394795312291e-06, "loss": 1.0659, "step": 293 }, { "epoch": 0.04014199890770071, "grad_norm": 7.7242255210876465, "learning_rate": 9.997322937381829e-06, "loss": 1.0564, "step": 294 }, { "epoch": 0.04027853631895139, "grad_norm": 7.016628742218018, "learning_rate": 9.99725010214383e-06, "loss": 0.9475, "step": 295 }, { "epoch": 0.040415073730202075, "grad_norm": 6.051543235778809, "learning_rate": 9.997176289612537e-06, "loss": 1.0217, "step": 296 }, { "epoch": 0.04055161114145276, "grad_norm": 6.720175266265869, "learning_rate": 9.99710149980239e-06, "loss": 1.0142, "step": 297 }, { "epoch": 0.04068814855270344, "grad_norm": 25.230865478515625, "learning_rate": 9.997025732728007e-06, "loss": 1.1766, "step": 298 }, { "epoch": 0.040824685963954124, "grad_norm": 6.8942694664001465, "learning_rate": 9.996948988404212e-06, "loss": 0.9967, "step": 299 }, { "epoch": 0.0409612233752048, "grad_norm": 12.969277381896973, "learning_rate": 9.99687126684601e-06, "loss": 1.1245, "step": 300 }, { "epoch": 0.04109776078645549, "grad_norm": 26.627471923828125, "learning_rate": 9.996792568068604e-06, "loss": 1.1086, "step": 301 }, { "epoch": 0.04123429819770617, "grad_norm": 89.57864379882812, "learning_rate": 9.996712892087383e-06, "loss": 1.0768, "step": 302 }, { "epoch": 0.04137083560895685, "grad_norm": 12.59830093383789, "learning_rate": 9.99663223891793e-06, "loss": 1.1311, "step": 303 }, { "epoch": 0.04150737302020754, "grad_norm": 5.198631763458252, "learning_rate": 9.996550608576015e-06, "loss": 1.1242, "step": 304 }, { "epoch": 0.04164391043145822, "grad_norm": 7.160858631134033, "learning_rate": 9.996468001077606e-06, "loss": 1.1514, "step": 305 }, { "epoch": 0.0417804478427089, "grad_norm": 6.181071758270264, "learning_rate": 9.996384416438855e-06, "loss": 1.0825, "step": 306 }, { "epoch": 0.041916985253959586, "grad_norm": 8.167061805725098, "learning_rate": 9.99629985467611e-06, "loss": 0.9982, "step": 307 }, { "epoch": 0.042053522665210265, "grad_norm": 6.571996212005615, "learning_rate": 9.99621431580591e-06, "loss": 1.0656, "step": 308 }, { "epoch": 0.04219006007646095, "grad_norm": 5.8164963722229, "learning_rate": 9.996127799844982e-06, "loss": 0.7979, "step": 309 }, { "epoch": 0.042326597487711635, "grad_norm": 5.890752792358398, "learning_rate": 9.996040306810243e-06, "loss": 1.1091, "step": 310 }, { "epoch": 0.042463134898962314, "grad_norm": 6.73175048828125, "learning_rate": 9.995951836718806e-06, "loss": 1.1369, "step": 311 }, { "epoch": 0.042599672310213, "grad_norm": 10.447607040405273, "learning_rate": 9.995862389587976e-06, "loss": 1.0419, "step": 312 }, { "epoch": 0.04273620972146368, "grad_norm": 7.459190368652344, "learning_rate": 9.995771965435238e-06, "loss": 1.1511, "step": 313 }, { "epoch": 0.04287274713271436, "grad_norm": 7.0616135597229, "learning_rate": 9.995680564278284e-06, "loss": 1.1279, "step": 314 }, { "epoch": 0.04300928454396505, "grad_norm": 5.5964202880859375, "learning_rate": 9.995588186134985e-06, "loss": 0.9984, "step": 315 }, { "epoch": 0.04314582195521573, "grad_norm": 7.9305739402771, "learning_rate": 9.99549483102341e-06, "loss": 1.0637, "step": 316 }, { "epoch": 0.04328235936646641, "grad_norm": 10.202630996704102, "learning_rate": 9.99540049896181e-06, "loss": 0.947, "step": 317 }, { "epoch": 0.0434188967777171, "grad_norm": 8.32096004486084, "learning_rate": 9.995305189968637e-06, "loss": 1.0481, "step": 318 }, { "epoch": 0.043555434188967776, "grad_norm": 5.554686069488525, "learning_rate": 9.995208904062531e-06, "loss": 0.8886, "step": 319 }, { "epoch": 0.04369197160021846, "grad_norm": 8.242725372314453, "learning_rate": 9.995111641262322e-06, "loss": 1.0017, "step": 320 }, { "epoch": 0.04382850901146914, "grad_norm": 10.72989559173584, "learning_rate": 9.99501340158703e-06, "loss": 1.0881, "step": 321 }, { "epoch": 0.043965046422719825, "grad_norm": 8.692205429077148, "learning_rate": 9.994914185055869e-06, "loss": 1.0565, "step": 322 }, { "epoch": 0.04410158383397051, "grad_norm": 10.010233879089355, "learning_rate": 9.99481399168824e-06, "loss": 0.9631, "step": 323 }, { "epoch": 0.04423812124522119, "grad_norm": 7.297447681427002, "learning_rate": 9.994712821503737e-06, "loss": 1.1, "step": 324 }, { "epoch": 0.044374658656471874, "grad_norm": 6.710750102996826, "learning_rate": 9.994610674522151e-06, "loss": 1.1186, "step": 325 }, { "epoch": 0.04451119606772255, "grad_norm": 7.2071685791015625, "learning_rate": 9.994507550763453e-06, "loss": 1.0006, "step": 326 }, { "epoch": 0.04464773347897324, "grad_norm": 6.916559219360352, "learning_rate": 9.994403450247814e-06, "loss": 0.9996, "step": 327 }, { "epoch": 0.04478427089022392, "grad_norm": 5.86030387878418, "learning_rate": 9.99429837299559e-06, "loss": 1.0995, "step": 328 }, { "epoch": 0.0449208083014746, "grad_norm": 9.7324800491333, "learning_rate": 9.994192319027331e-06, "loss": 0.9884, "step": 329 }, { "epoch": 0.04505734571272529, "grad_norm": 5.971088409423828, "learning_rate": 9.994085288363777e-06, "loss": 1.1209, "step": 330 }, { "epoch": 0.04519388312397597, "grad_norm": 6.225163459777832, "learning_rate": 9.993977281025862e-06, "loss": 1.1379, "step": 331 }, { "epoch": 0.04533042053522665, "grad_norm": 6.96148681640625, "learning_rate": 9.993868297034709e-06, "loss": 0.9521, "step": 332 }, { "epoch": 0.045466957946477336, "grad_norm": 10.265271186828613, "learning_rate": 9.993758336411628e-06, "loss": 0.9767, "step": 333 }, { "epoch": 0.045603495357728015, "grad_norm": 11.113887786865234, "learning_rate": 9.993647399178124e-06, "loss": 1.0357, "step": 334 }, { "epoch": 0.0457400327689787, "grad_norm": 36.40565872192383, "learning_rate": 9.993535485355895e-06, "loss": 1.0479, "step": 335 }, { "epoch": 0.045876570180229385, "grad_norm": 7.278351306915283, "learning_rate": 9.993422594966827e-06, "loss": 1.054, "step": 336 }, { "epoch": 0.046013107591480064, "grad_norm": 7.715376853942871, "learning_rate": 9.993308728032997e-06, "loss": 0.9389, "step": 337 }, { "epoch": 0.04614964500273075, "grad_norm": 6.691707611083984, "learning_rate": 9.993193884576673e-06, "loss": 1.0758, "step": 338 }, { "epoch": 0.04628618241398143, "grad_norm": 6.1334967613220215, "learning_rate": 9.993078064620317e-06, "loss": 1.0259, "step": 339 }, { "epoch": 0.04642271982523211, "grad_norm": 11.08230209350586, "learning_rate": 9.992961268186575e-06, "loss": 1.1118, "step": 340 }, { "epoch": 0.0465592572364828, "grad_norm": 9.439099311828613, "learning_rate": 9.992843495298292e-06, "loss": 0.9755, "step": 341 }, { "epoch": 0.04669579464773348, "grad_norm": 8.544450759887695, "learning_rate": 9.992724745978499e-06, "loss": 1.1236, "step": 342 }, { "epoch": 0.04683233205898416, "grad_norm": 11.897004127502441, "learning_rate": 9.992605020250422e-06, "loss": 1.0625, "step": 343 }, { "epoch": 0.04696886947023485, "grad_norm": 10.843701362609863, "learning_rate": 9.99248431813747e-06, "loss": 0.9724, "step": 344 }, { "epoch": 0.047105406881485526, "grad_norm": 9.587562561035156, "learning_rate": 9.992362639663253e-06, "loss": 0.9783, "step": 345 }, { "epoch": 0.04724194429273621, "grad_norm": 11.26671028137207, "learning_rate": 9.992239984851565e-06, "loss": 1.1176, "step": 346 }, { "epoch": 0.04737848170398689, "grad_norm": 6.088156700134277, "learning_rate": 9.992116353726393e-06, "loss": 0.9956, "step": 347 }, { "epoch": 0.047515019115237575, "grad_norm": 21.225711822509766, "learning_rate": 9.991991746311916e-06, "loss": 1.0223, "step": 348 }, { "epoch": 0.04765155652648826, "grad_norm": 16.143600463867188, "learning_rate": 9.991866162632503e-06, "loss": 1.0625, "step": 349 }, { "epoch": 0.04778809393773894, "grad_norm": 10.986730575561523, "learning_rate": 9.991739602712714e-06, "loss": 1.0908, "step": 350 }, { "epoch": 0.047924631348989624, "grad_norm": 11.220099449157715, "learning_rate": 9.991612066577298e-06, "loss": 1.1408, "step": 351 }, { "epoch": 0.0480611687602403, "grad_norm": 11.498674392700195, "learning_rate": 9.9914835542512e-06, "loss": 1.1631, "step": 352 }, { "epoch": 0.04819770617149099, "grad_norm": 7.656613349914551, "learning_rate": 9.991354065759552e-06, "loss": 0.9807, "step": 353 }, { "epoch": 0.04833424358274167, "grad_norm": 13.854513168334961, "learning_rate": 9.991223601127675e-06, "loss": 1.0914, "step": 354 }, { "epoch": 0.04847078099399235, "grad_norm": 6.825024604797363, "learning_rate": 9.991092160381084e-06, "loss": 0.8731, "step": 355 }, { "epoch": 0.04860731840524304, "grad_norm": 5.815252304077148, "learning_rate": 9.990959743545487e-06, "loss": 1.0344, "step": 356 }, { "epoch": 0.04874385581649372, "grad_norm": 8.259517669677734, "learning_rate": 9.990826350646777e-06, "loss": 1.1182, "step": 357 }, { "epoch": 0.0488803932277444, "grad_norm": 6.19083309173584, "learning_rate": 9.990691981711043e-06, "loss": 0.9954, "step": 358 }, { "epoch": 0.049016930638995086, "grad_norm": 7.526218891143799, "learning_rate": 9.990556636764564e-06, "loss": 1.0011, "step": 359 }, { "epoch": 0.049153468050245765, "grad_norm": 10.310550689697266, "learning_rate": 9.990420315833807e-06, "loss": 1.1332, "step": 360 }, { "epoch": 0.04929000546149645, "grad_norm": 11.11314582824707, "learning_rate": 9.990283018945432e-06, "loss": 1.0935, "step": 361 }, { "epoch": 0.049426542872747135, "grad_norm": 7.739438056945801, "learning_rate": 9.990144746126292e-06, "loss": 1.011, "step": 362 }, { "epoch": 0.049563080283997814, "grad_norm": 6.4262800216674805, "learning_rate": 9.990005497403423e-06, "loss": 0.9384, "step": 363 }, { "epoch": 0.0496996176952485, "grad_norm": 6.063249588012695, "learning_rate": 9.989865272804064e-06, "loss": 1.0597, "step": 364 }, { "epoch": 0.04983615510649918, "grad_norm": 9.066120147705078, "learning_rate": 9.989724072355634e-06, "loss": 1.1365, "step": 365 }, { "epoch": 0.04997269251774986, "grad_norm": 9.420570373535156, "learning_rate": 9.989581896085746e-06, "loss": 1.0443, "step": 366 }, { "epoch": 0.05010922992900055, "grad_norm": 7.209034442901611, "learning_rate": 9.989438744022211e-06, "loss": 1.0533, "step": 367 }, { "epoch": 0.05024576734025123, "grad_norm": 9.87623405456543, "learning_rate": 9.989294616193018e-06, "loss": 1.0666, "step": 368 }, { "epoch": 0.05038230475150191, "grad_norm": 7.389622211456299, "learning_rate": 9.989149512626357e-06, "loss": 0.9098, "step": 369 }, { "epoch": 0.0505188421627526, "grad_norm": 7.482319355010986, "learning_rate": 9.989003433350606e-06, "loss": 0.9358, "step": 370 }, { "epoch": 0.050655379574003276, "grad_norm": 7.607789039611816, "learning_rate": 9.988856378394329e-06, "loss": 1.0919, "step": 371 }, { "epoch": 0.05079191698525396, "grad_norm": 45.31864547729492, "learning_rate": 9.988708347786288e-06, "loss": 1.105, "step": 372 }, { "epoch": 0.05092845439650464, "grad_norm": 8.657610893249512, "learning_rate": 9.988559341555435e-06, "loss": 1.0136, "step": 373 }, { "epoch": 0.051064991807755325, "grad_norm": 9.531627655029297, "learning_rate": 9.988409359730907e-06, "loss": 1.2016, "step": 374 }, { "epoch": 0.05120152921900601, "grad_norm": 8.150023460388184, "learning_rate": 9.988258402342036e-06, "loss": 1.0963, "step": 375 }, { "epoch": 0.05133806663025669, "grad_norm": 12.027748107910156, "learning_rate": 9.988106469418346e-06, "loss": 1.1059, "step": 376 }, { "epoch": 0.051474604041507374, "grad_norm": 14.789801597595215, "learning_rate": 9.987953560989548e-06, "loss": 1.1166, "step": 377 }, { "epoch": 0.05161114145275805, "grad_norm": 19.782440185546875, "learning_rate": 9.987799677085546e-06, "loss": 1.0136, "step": 378 }, { "epoch": 0.05174767886400874, "grad_norm": 11.537410736083984, "learning_rate": 9.987644817736435e-06, "loss": 1.171, "step": 379 }, { "epoch": 0.05188421627525942, "grad_norm": 6.852720260620117, "learning_rate": 9.9874889829725e-06, "loss": 0.9211, "step": 380 }, { "epoch": 0.0520207536865101, "grad_norm": 39.56999969482422, "learning_rate": 9.987332172824216e-06, "loss": 1.0318, "step": 381 }, { "epoch": 0.05215729109776079, "grad_norm": 7.822873115539551, "learning_rate": 9.987174387322251e-06, "loss": 1.0593, "step": 382 }, { "epoch": 0.05229382850901147, "grad_norm": 7.921478271484375, "learning_rate": 9.987015626497464e-06, "loss": 0.9061, "step": 383 }, { "epoch": 0.05243036592026215, "grad_norm": 8.507376670837402, "learning_rate": 9.986855890380901e-06, "loss": 1.143, "step": 384 }, { "epoch": 0.052566903331512836, "grad_norm": 6.1860857009887695, "learning_rate": 9.986695179003803e-06, "loss": 1.0144, "step": 385 }, { "epoch": 0.052703440742763515, "grad_norm": 7.266015529632568, "learning_rate": 9.986533492397596e-06, "loss": 0.9638, "step": 386 }, { "epoch": 0.0528399781540142, "grad_norm": 11.862972259521484, "learning_rate": 9.986370830593904e-06, "loss": 1.084, "step": 387 }, { "epoch": 0.052976515565264885, "grad_norm": 7.81760835647583, "learning_rate": 9.986207193624537e-06, "loss": 1.038, "step": 388 }, { "epoch": 0.053113052976515564, "grad_norm": 11.633219718933105, "learning_rate": 9.986042581521495e-06, "loss": 1.0963, "step": 389 }, { "epoch": 0.05324959038776625, "grad_norm": 7.667217254638672, "learning_rate": 9.985876994316974e-06, "loss": 1.158, "step": 390 }, { "epoch": 0.05338612779901693, "grad_norm": 9.950471878051758, "learning_rate": 9.985710432043357e-06, "loss": 0.9349, "step": 391 }, { "epoch": 0.05352266521026761, "grad_norm": 7.715943336486816, "learning_rate": 9.985542894733214e-06, "loss": 1.0905, "step": 392 }, { "epoch": 0.0536592026215183, "grad_norm": 6.669850826263428, "learning_rate": 9.985374382419312e-06, "loss": 1.0611, "step": 393 }, { "epoch": 0.05379574003276898, "grad_norm": 7.935279369354248, "learning_rate": 9.985204895134608e-06, "loss": 1.0381, "step": 394 }, { "epoch": 0.05393227744401966, "grad_norm": 9.493926048278809, "learning_rate": 9.985034432912247e-06, "loss": 1.0066, "step": 395 }, { "epoch": 0.05406881485527035, "grad_norm": 10.765724182128906, "learning_rate": 9.984862995785564e-06, "loss": 1.1755, "step": 396 }, { "epoch": 0.054205352266521026, "grad_norm": 6.897792816162109, "learning_rate": 9.984690583788088e-06, "loss": 1.1079, "step": 397 }, { "epoch": 0.05434188967777171, "grad_norm": 7.70706844329834, "learning_rate": 9.984517196953536e-06, "loss": 1.1486, "step": 398 }, { "epoch": 0.05447842708902239, "grad_norm": 5.226355075836182, "learning_rate": 9.984342835315819e-06, "loss": 1.0417, "step": 399 }, { "epoch": 0.054614964500273075, "grad_norm": 8.096595764160156, "learning_rate": 9.984167498909031e-06, "loss": 1.072, "step": 400 }, { "epoch": 0.05475150191152376, "grad_norm": 13.248932838439941, "learning_rate": 9.983991187767469e-06, "loss": 1.1287, "step": 401 }, { "epoch": 0.05488803932277444, "grad_norm": 27.12986183166504, "learning_rate": 9.983813901925607e-06, "loss": 1.1496, "step": 402 }, { "epoch": 0.055024576734025124, "grad_norm": 9.738059997558594, "learning_rate": 9.98363564141812e-06, "loss": 1.1718, "step": 403 }, { "epoch": 0.0551611141452758, "grad_norm": 7.462279796600342, "learning_rate": 9.983456406279866e-06, "loss": 1.0403, "step": 404 }, { "epoch": 0.05529765155652649, "grad_norm": 8.201712608337402, "learning_rate": 9.983276196545902e-06, "loss": 1.0227, "step": 405 }, { "epoch": 0.05543418896777717, "grad_norm": 9.171109199523926, "learning_rate": 9.98309501225147e-06, "loss": 1.089, "step": 406 }, { "epoch": 0.05557072637902785, "grad_norm": 7.323428153991699, "learning_rate": 9.982912853432e-06, "loss": 1.2101, "step": 407 }, { "epoch": 0.05570726379027854, "grad_norm": 8.126551628112793, "learning_rate": 9.982729720123118e-06, "loss": 1.1062, "step": 408 }, { "epoch": 0.05584380120152922, "grad_norm": 7.312544822692871, "learning_rate": 9.98254561236064e-06, "loss": 0.9347, "step": 409 }, { "epoch": 0.0559803386127799, "grad_norm": 5.9940996170043945, "learning_rate": 9.982360530180573e-06, "loss": 1.1223, "step": 410 }, { "epoch": 0.056116876024030586, "grad_norm": 9.34977912902832, "learning_rate": 9.982174473619109e-06, "loss": 1.2142, "step": 411 }, { "epoch": 0.056253413435281265, "grad_norm": 7.9049153327941895, "learning_rate": 9.981987442712634e-06, "loss": 1.0797, "step": 412 }, { "epoch": 0.05638995084653195, "grad_norm": 6.808762073516846, "learning_rate": 9.981799437497727e-06, "loss": 1.1624, "step": 413 }, { "epoch": 0.056526488257782635, "grad_norm": 6.695291519165039, "learning_rate": 9.981610458011155e-06, "loss": 1.148, "step": 414 }, { "epoch": 0.056663025669033314, "grad_norm": 6.1939520835876465, "learning_rate": 9.981420504289876e-06, "loss": 0.8842, "step": 415 }, { "epoch": 0.056799563080284, "grad_norm": 5.933281421661377, "learning_rate": 9.98122957637104e-06, "loss": 1.0623, "step": 416 }, { "epoch": 0.05693610049153468, "grad_norm": 6.002859592437744, "learning_rate": 9.981037674291983e-06, "loss": 1.1665, "step": 417 }, { "epoch": 0.05707263790278536, "grad_norm": 7.318545818328857, "learning_rate": 9.980844798090235e-06, "loss": 0.9638, "step": 418 }, { "epoch": 0.05720917531403605, "grad_norm": 8.141002655029297, "learning_rate": 9.98065094780352e-06, "loss": 1.0823, "step": 419 }, { "epoch": 0.05734571272528673, "grad_norm": 4.966285228729248, "learning_rate": 9.980456123469743e-06, "loss": 1.0557, "step": 420 }, { "epoch": 0.05748225013653741, "grad_norm": 5.565876007080078, "learning_rate": 9.980260325127009e-06, "loss": 1.1514, "step": 421 }, { "epoch": 0.0576187875477881, "grad_norm": 6.855783462524414, "learning_rate": 9.980063552813608e-06, "loss": 1.0729, "step": 422 }, { "epoch": 0.057755324959038776, "grad_norm": 7.369726657867432, "learning_rate": 9.979865806568023e-06, "loss": 0.9887, "step": 423 }, { "epoch": 0.05789186237028946, "grad_norm": 6.935479640960693, "learning_rate": 9.979667086428926e-06, "loss": 0.9318, "step": 424 }, { "epoch": 0.05802839978154014, "grad_norm": 7.203799247741699, "learning_rate": 9.97946739243518e-06, "loss": 1.1391, "step": 425 }, { "epoch": 0.058164937192790825, "grad_norm": 6.555767059326172, "learning_rate": 9.979266724625837e-06, "loss": 1.049, "step": 426 }, { "epoch": 0.05830147460404151, "grad_norm": 6.335944652557373, "learning_rate": 9.979065083040144e-06, "loss": 1.1043, "step": 427 }, { "epoch": 0.05843801201529219, "grad_norm": 8.077914237976074, "learning_rate": 9.978862467717532e-06, "loss": 1.0737, "step": 428 }, { "epoch": 0.058574549426542874, "grad_norm": 6.904454708099365, "learning_rate": 9.978658878697627e-06, "loss": 1.0039, "step": 429 }, { "epoch": 0.05871108683779355, "grad_norm": 6.141204833984375, "learning_rate": 9.978454316020246e-06, "loss": 0.9866, "step": 430 }, { "epoch": 0.05884762424904424, "grad_norm": 6.827988624572754, "learning_rate": 9.97824877972539e-06, "loss": 0.9677, "step": 431 }, { "epoch": 0.05898416166029492, "grad_norm": 5.821073532104492, "learning_rate": 9.978042269853259e-06, "loss": 1.0061, "step": 432 }, { "epoch": 0.0591206990715456, "grad_norm": 7.127789497375488, "learning_rate": 9.977834786444238e-06, "loss": 1.0851, "step": 433 }, { "epoch": 0.05925723648279629, "grad_norm": 7.102571964263916, "learning_rate": 9.977626329538904e-06, "loss": 0.9359, "step": 434 }, { "epoch": 0.05939377389404697, "grad_norm": 7.17198371887207, "learning_rate": 9.977416899178023e-06, "loss": 1.0952, "step": 435 }, { "epoch": 0.05953031130529765, "grad_norm": 13.053123474121094, "learning_rate": 9.977206495402554e-06, "loss": 0.9958, "step": 436 }, { "epoch": 0.059666848716548336, "grad_norm": 6.562597274780273, "learning_rate": 9.976995118253644e-06, "loss": 1.0697, "step": 437 }, { "epoch": 0.059803386127799014, "grad_norm": 5.986268520355225, "learning_rate": 9.976782767772632e-06, "loss": 1.1164, "step": 438 }, { "epoch": 0.0599399235390497, "grad_norm": 7.985202312469482, "learning_rate": 9.976569444001044e-06, "loss": 1.1017, "step": 439 }, { "epoch": 0.060076460950300385, "grad_norm": 7.539736747741699, "learning_rate": 9.976355146980605e-06, "loss": 1.0502, "step": 440 }, { "epoch": 0.060212998361551064, "grad_norm": 14.492462158203125, "learning_rate": 9.976139876753218e-06, "loss": 1.0116, "step": 441 }, { "epoch": 0.06034953577280175, "grad_norm": 6.935237407684326, "learning_rate": 9.975923633360985e-06, "loss": 1.2628, "step": 442 }, { "epoch": 0.06048607318405243, "grad_norm": 9.153454780578613, "learning_rate": 9.975706416846196e-06, "loss": 1.0697, "step": 443 }, { "epoch": 0.06062261059530311, "grad_norm": 5.770129680633545, "learning_rate": 9.97548822725133e-06, "loss": 1.0357, "step": 444 }, { "epoch": 0.0607591480065538, "grad_norm": 12.209389686584473, "learning_rate": 9.97526906461906e-06, "loss": 0.9891, "step": 445 }, { "epoch": 0.060895685417804477, "grad_norm": 7.420429229736328, "learning_rate": 9.975048928992245e-06, "loss": 1.1352, "step": 446 }, { "epoch": 0.06103222282905516, "grad_norm": 6.621999740600586, "learning_rate": 9.974827820413936e-06, "loss": 1.0842, "step": 447 }, { "epoch": 0.06116876024030585, "grad_norm": 6.435914039611816, "learning_rate": 9.974605738927374e-06, "loss": 0.9875, "step": 448 }, { "epoch": 0.061305297651556526, "grad_norm": 7.525317668914795, "learning_rate": 9.974382684575993e-06, "loss": 1.0785, "step": 449 }, { "epoch": 0.06144183506280721, "grad_norm": 6.288585186004639, "learning_rate": 9.974158657403413e-06, "loss": 1.0427, "step": 450 }, { "epoch": 0.06157837247405789, "grad_norm": 7.553632736206055, "learning_rate": 9.973933657453445e-06, "loss": 1.1556, "step": 451 }, { "epoch": 0.061714909885308575, "grad_norm": 9.074507713317871, "learning_rate": 9.973707684770095e-06, "loss": 0.9154, "step": 452 }, { "epoch": 0.06185144729655926, "grad_norm": 11.396561622619629, "learning_rate": 9.973480739397552e-06, "loss": 1.119, "step": 453 }, { "epoch": 0.06198798470780994, "grad_norm": 6.136778354644775, "learning_rate": 9.973252821380199e-06, "loss": 0.9707, "step": 454 }, { "epoch": 0.062124522119060624, "grad_norm": 6.821625709533691, "learning_rate": 9.973023930762614e-06, "loss": 0.9863, "step": 455 }, { "epoch": 0.0622610595303113, "grad_norm": 8.925457954406738, "learning_rate": 9.972794067589552e-06, "loss": 1.0689, "step": 456 }, { "epoch": 0.06239759694156199, "grad_norm": 6.570418834686279, "learning_rate": 9.972563231905975e-06, "loss": 1.125, "step": 457 }, { "epoch": 0.06253413435281267, "grad_norm": 7.758591175079346, "learning_rate": 9.97233142375702e-06, "loss": 1.1348, "step": 458 }, { "epoch": 0.06267067176406335, "grad_norm": 5.941880226135254, "learning_rate": 9.972098643188026e-06, "loss": 1.0108, "step": 459 }, { "epoch": 0.06280720917531403, "grad_norm": 7.095127582550049, "learning_rate": 9.971864890244514e-06, "loss": 1.0797, "step": 460 }, { "epoch": 0.06294374658656472, "grad_norm": 9.06072998046875, "learning_rate": 9.971630164972197e-06, "loss": 1.0183, "step": 461 }, { "epoch": 0.0630802839978154, "grad_norm": 6.913858413696289, "learning_rate": 9.971394467416984e-06, "loss": 1.0388, "step": 462 }, { "epoch": 0.06321682140906608, "grad_norm": 6.711099147796631, "learning_rate": 9.971157797624964e-06, "loss": 1.078, "step": 463 }, { "epoch": 0.06335335882031677, "grad_norm": 10.899479866027832, "learning_rate": 9.970920155642425e-06, "loss": 0.9736, "step": 464 }, { "epoch": 0.06348989623156745, "grad_norm": 8.62476634979248, "learning_rate": 9.970681541515841e-06, "loss": 1.0069, "step": 465 }, { "epoch": 0.06362643364281813, "grad_norm": 5.8636250495910645, "learning_rate": 9.970441955291878e-06, "loss": 1.0187, "step": 466 }, { "epoch": 0.06376297105406882, "grad_norm": 7.518034934997559, "learning_rate": 9.97020139701739e-06, "loss": 1.1402, "step": 467 }, { "epoch": 0.0638995084653195, "grad_norm": 47.3076171875, "learning_rate": 9.96995986673942e-06, "loss": 1.0728, "step": 468 }, { "epoch": 0.06403604587657018, "grad_norm": 12.71468448638916, "learning_rate": 9.969717364505205e-06, "loss": 0.9935, "step": 469 }, { "epoch": 0.06417258328782087, "grad_norm": 70.96749114990234, "learning_rate": 9.969473890362172e-06, "loss": 1.0149, "step": 470 }, { "epoch": 0.06430912069907155, "grad_norm": 7.937244415283203, "learning_rate": 9.969229444357933e-06, "loss": 1.1018, "step": 471 }, { "epoch": 0.06444565811032223, "grad_norm": 8.059639930725098, "learning_rate": 9.968984026540295e-06, "loss": 1.1557, "step": 472 }, { "epoch": 0.0645821955215729, "grad_norm": 16.06111717224121, "learning_rate": 9.968737636957254e-06, "loss": 1.0912, "step": 473 }, { "epoch": 0.0647187329328236, "grad_norm": 21.034284591674805, "learning_rate": 9.968490275656995e-06, "loss": 1.1584, "step": 474 }, { "epoch": 0.06485527034407428, "grad_norm": 9.831196784973145, "learning_rate": 9.968241942687895e-06, "loss": 1.1473, "step": 475 }, { "epoch": 0.06499180775532495, "grad_norm": 8.326192855834961, "learning_rate": 9.967992638098517e-06, "loss": 1.2217, "step": 476 }, { "epoch": 0.06512834516657565, "grad_norm": 12.756434440612793, "learning_rate": 9.967742361937616e-06, "loss": 1.2377, "step": 477 }, { "epoch": 0.06526488257782632, "grad_norm": 9.621971130371094, "learning_rate": 9.967491114254139e-06, "loss": 1.1072, "step": 478 }, { "epoch": 0.065401419989077, "grad_norm": 8.77814769744873, "learning_rate": 9.967238895097223e-06, "loss": 1.1642, "step": 479 }, { "epoch": 0.0655379574003277, "grad_norm": 9.399707794189453, "learning_rate": 9.966985704516191e-06, "loss": 1.098, "step": 480 }, { "epoch": 0.06567449481157837, "grad_norm": 14.020768165588379, "learning_rate": 9.966731542560561e-06, "loss": 1.0298, "step": 481 }, { "epoch": 0.06581103222282905, "grad_norm": 9.7642822265625, "learning_rate": 9.966476409280036e-06, "loss": 1.2219, "step": 482 }, { "epoch": 0.06594756963407974, "grad_norm": 7.952610015869141, "learning_rate": 9.966220304724515e-06, "loss": 1.2089, "step": 483 }, { "epoch": 0.06608410704533042, "grad_norm": 8.397884368896484, "learning_rate": 9.965963228944077e-06, "loss": 1.1208, "step": 484 }, { "epoch": 0.0662206444565811, "grad_norm": 8.838234901428223, "learning_rate": 9.965705181989005e-06, "loss": 1.1389, "step": 485 }, { "epoch": 0.06635718186783178, "grad_norm": 7.725074291229248, "learning_rate": 9.965446163909758e-06, "loss": 1.0131, "step": 486 }, { "epoch": 0.06649371927908247, "grad_norm": 8.16168212890625, "learning_rate": 9.965186174756996e-06, "loss": 1.187, "step": 487 }, { "epoch": 0.06663025669033315, "grad_norm": 11.386713027954102, "learning_rate": 9.96492521458156e-06, "loss": 1.0753, "step": 488 }, { "epoch": 0.06676679410158383, "grad_norm": 8.7830228805542, "learning_rate": 9.964663283434488e-06, "loss": 0.9255, "step": 489 }, { "epoch": 0.06690333151283452, "grad_norm": 8.198393821716309, "learning_rate": 9.964400381367004e-06, "loss": 1.1911, "step": 490 }, { "epoch": 0.0670398689240852, "grad_norm": 6.617398262023926, "learning_rate": 9.964136508430522e-06, "loss": 1.084, "step": 491 }, { "epoch": 0.06717640633533588, "grad_norm": 11.767572402954102, "learning_rate": 9.963871664676647e-06, "loss": 1.0612, "step": 492 }, { "epoch": 0.06731294374658657, "grad_norm": 11.061574935913086, "learning_rate": 9.963605850157173e-06, "loss": 1.2428, "step": 493 }, { "epoch": 0.06744948115783725, "grad_norm": 10.677107810974121, "learning_rate": 9.963339064924086e-06, "loss": 1.1812, "step": 494 }, { "epoch": 0.06758601856908793, "grad_norm": 9.93780517578125, "learning_rate": 9.963071309029561e-06, "loss": 1.2808, "step": 495 }, { "epoch": 0.06772255598033862, "grad_norm": 9.663151741027832, "learning_rate": 9.962802582525958e-06, "loss": 1.1549, "step": 496 }, { "epoch": 0.0678590933915893, "grad_norm": 10.092496871948242, "learning_rate": 9.962532885465835e-06, "loss": 1.1228, "step": 497 }, { "epoch": 0.06799563080283998, "grad_norm": 11.131919860839844, "learning_rate": 9.96226221790193e-06, "loss": 1.0983, "step": 498 }, { "epoch": 0.06813216821409065, "grad_norm": 8.003222465515137, "learning_rate": 9.961990579887183e-06, "loss": 1.0956, "step": 499 }, { "epoch": 0.06826870562534135, "grad_norm": 7.003744602203369, "learning_rate": 9.961717971474714e-06, "loss": 1.096, "step": 500 }, { "epoch": 0.06840524303659203, "grad_norm": 12.33846378326416, "learning_rate": 9.961444392717837e-06, "loss": 1.0265, "step": 501 }, { "epoch": 0.0685417804478427, "grad_norm": 11.093429565429688, "learning_rate": 9.961169843670051e-06, "loss": 0.9554, "step": 502 }, { "epoch": 0.0686783178590934, "grad_norm": 7.673842430114746, "learning_rate": 9.960894324385054e-06, "loss": 0.9935, "step": 503 }, { "epoch": 0.06881485527034407, "grad_norm": 7.5609893798828125, "learning_rate": 9.960617834916726e-06, "loss": 1.2907, "step": 504 }, { "epoch": 0.06895139268159475, "grad_norm": 6.69573974609375, "learning_rate": 9.960340375319138e-06, "loss": 1.01, "step": 505 }, { "epoch": 0.06908793009284545, "grad_norm": 9.986468315124512, "learning_rate": 9.960061945646554e-06, "loss": 1.0008, "step": 506 }, { "epoch": 0.06922446750409612, "grad_norm": 7.103825569152832, "learning_rate": 9.95978254595342e-06, "loss": 1.1274, "step": 507 }, { "epoch": 0.0693610049153468, "grad_norm": 5.764829158782959, "learning_rate": 9.959502176294384e-06, "loss": 0.9529, "step": 508 }, { "epoch": 0.0694975423265975, "grad_norm": 13.524877548217773, "learning_rate": 9.959220836724274e-06, "loss": 1.0929, "step": 509 }, { "epoch": 0.06963407973784817, "grad_norm": 6.53584098815918, "learning_rate": 9.958938527298113e-06, "loss": 1.1082, "step": 510 }, { "epoch": 0.06977061714909885, "grad_norm": 5.57343053817749, "learning_rate": 9.958655248071105e-06, "loss": 1.011, "step": 511 }, { "epoch": 0.06990715456034953, "grad_norm": 7.586821556091309, "learning_rate": 9.958370999098654e-06, "loss": 1.2024, "step": 512 }, { "epoch": 0.07004369197160022, "grad_norm": 6.948132038116455, "learning_rate": 9.95808578043635e-06, "loss": 1.0466, "step": 513 }, { "epoch": 0.0701802293828509, "grad_norm": 18.952350616455078, "learning_rate": 9.957799592139971e-06, "loss": 1.0899, "step": 514 }, { "epoch": 0.07031676679410158, "grad_norm": 5.935039520263672, "learning_rate": 9.957512434265486e-06, "loss": 1.1508, "step": 515 }, { "epoch": 0.07045330420535227, "grad_norm": 7.194721221923828, "learning_rate": 9.957224306869053e-06, "loss": 1.0677, "step": 516 }, { "epoch": 0.07058984161660295, "grad_norm": 24.654516220092773, "learning_rate": 9.95693521000702e-06, "loss": 1.0923, "step": 517 }, { "epoch": 0.07072637902785363, "grad_norm": 6.552839279174805, "learning_rate": 9.956645143735926e-06, "loss": 1.1995, "step": 518 }, { "epoch": 0.07086291643910432, "grad_norm": 61.21061706542969, "learning_rate": 9.956354108112496e-06, "loss": 1.0673, "step": 519 }, { "epoch": 0.070999453850355, "grad_norm": 8.373029708862305, "learning_rate": 9.956062103193648e-06, "loss": 1.0279, "step": 520 }, { "epoch": 0.07113599126160568, "grad_norm": 8.874249458312988, "learning_rate": 9.955769129036488e-06, "loss": 0.9574, "step": 521 }, { "epoch": 0.07127252867285637, "grad_norm": 6.208461284637451, "learning_rate": 9.95547518569831e-06, "loss": 1.0158, "step": 522 }, { "epoch": 0.07140906608410705, "grad_norm": 10.488898277282715, "learning_rate": 9.955180273236606e-06, "loss": 1.1785, "step": 523 }, { "epoch": 0.07154560349535773, "grad_norm": 8.536693572998047, "learning_rate": 9.954884391709043e-06, "loss": 0.9709, "step": 524 }, { "epoch": 0.0716821409066084, "grad_norm": 8.12423324584961, "learning_rate": 9.954587541173488e-06, "loss": 1.0211, "step": 525 }, { "epoch": 0.0718186783178591, "grad_norm": 11.935105323791504, "learning_rate": 9.954289721687997e-06, "loss": 0.9218, "step": 526 }, { "epoch": 0.07195521572910978, "grad_norm": 6.642602443695068, "learning_rate": 9.953990933310813e-06, "loss": 1.1321, "step": 527 }, { "epoch": 0.07209175314036045, "grad_norm": 7.558493137359619, "learning_rate": 9.953691176100367e-06, "loss": 0.9249, "step": 528 }, { "epoch": 0.07222829055161115, "grad_norm": 8.105002403259277, "learning_rate": 9.953390450115281e-06, "loss": 1.0086, "step": 529 }, { "epoch": 0.07236482796286182, "grad_norm": 11.003863334655762, "learning_rate": 9.953088755414369e-06, "loss": 1.1623, "step": 530 }, { "epoch": 0.0725013653741125, "grad_norm": 6.582425594329834, "learning_rate": 9.952786092056632e-06, "loss": 0.9456, "step": 531 }, { "epoch": 0.0726379027853632, "grad_norm": 8.663773536682129, "learning_rate": 9.95248246010126e-06, "loss": 0.9092, "step": 532 }, { "epoch": 0.07277444019661387, "grad_norm": 7.234334468841553, "learning_rate": 9.952177859607632e-06, "loss": 1.0698, "step": 533 }, { "epoch": 0.07291097760786455, "grad_norm": 8.449201583862305, "learning_rate": 9.951872290635321e-06, "loss": 0.9155, "step": 534 }, { "epoch": 0.07304751501911524, "grad_norm": 6.877994537353516, "learning_rate": 9.951565753244083e-06, "loss": 1.013, "step": 535 }, { "epoch": 0.07318405243036592, "grad_norm": 16.813125610351562, "learning_rate": 9.951258247493867e-06, "loss": 1.0147, "step": 536 }, { "epoch": 0.0733205898416166, "grad_norm": 7.62076997756958, "learning_rate": 9.95094977344481e-06, "loss": 1.0567, "step": 537 }, { "epoch": 0.07345712725286728, "grad_norm": 8.237454414367676, "learning_rate": 9.950640331157241e-06, "loss": 1.0712, "step": 538 }, { "epoch": 0.07359366466411797, "grad_norm": 15.649870872497559, "learning_rate": 9.950329920691676e-06, "loss": 1.0209, "step": 539 }, { "epoch": 0.07373020207536865, "grad_norm": 8.374606132507324, "learning_rate": 9.950018542108818e-06, "loss": 1.149, "step": 540 }, { "epoch": 0.07386673948661933, "grad_norm": 8.06190013885498, "learning_rate": 9.949706195469568e-06, "loss": 1.0214, "step": 541 }, { "epoch": 0.07400327689787002, "grad_norm": 8.00143814086914, "learning_rate": 9.949392880835005e-06, "loss": 1.0731, "step": 542 }, { "epoch": 0.0741398143091207, "grad_norm": 6.987200736999512, "learning_rate": 9.949078598266405e-06, "loss": 1.1158, "step": 543 }, { "epoch": 0.07427635172037138, "grad_norm": 8.161616325378418, "learning_rate": 9.948763347825229e-06, "loss": 0.9958, "step": 544 }, { "epoch": 0.07441288913162207, "grad_norm": 8.629700660705566, "learning_rate": 9.948447129573133e-06, "loss": 0.9354, "step": 545 }, { "epoch": 0.07454942654287275, "grad_norm": 6.935551166534424, "learning_rate": 9.948129943571959e-06, "loss": 1.047, "step": 546 }, { "epoch": 0.07468596395412343, "grad_norm": 7.240071773529053, "learning_rate": 9.94781178988373e-06, "loss": 1.0838, "step": 547 }, { "epoch": 0.07482250136537412, "grad_norm": 8.100765228271484, "learning_rate": 9.947492668570675e-06, "loss": 1.1392, "step": 548 }, { "epoch": 0.0749590387766248, "grad_norm": 7.918428897857666, "learning_rate": 9.947172579695199e-06, "loss": 1.0742, "step": 549 }, { "epoch": 0.07509557618787548, "grad_norm": 18.724864959716797, "learning_rate": 9.946851523319903e-06, "loss": 1.1208, "step": 550 }, { "epoch": 0.07523211359912615, "grad_norm": 8.143332481384277, "learning_rate": 9.946529499507572e-06, "loss": 1.0732, "step": 551 }, { "epoch": 0.07536865101037685, "grad_norm": 10.897701263427734, "learning_rate": 9.946206508321183e-06, "loss": 1.1041, "step": 552 }, { "epoch": 0.07550518842162753, "grad_norm": 7.314990043640137, "learning_rate": 9.945882549823906e-06, "loss": 1.144, "step": 553 }, { "epoch": 0.0756417258328782, "grad_norm": 7.440279960632324, "learning_rate": 9.945557624079092e-06, "loss": 0.9702, "step": 554 }, { "epoch": 0.0757782632441289, "grad_norm": 6.305679798126221, "learning_rate": 9.94523173115029e-06, "loss": 0.9455, "step": 555 }, { "epoch": 0.07591480065537957, "grad_norm": 7.331363677978516, "learning_rate": 9.944904871101227e-06, "loss": 1.0308, "step": 556 }, { "epoch": 0.07605133806663025, "grad_norm": 6.629897594451904, "learning_rate": 9.944577043995832e-06, "loss": 0.9365, "step": 557 }, { "epoch": 0.07618787547788095, "grad_norm": 7.544886112213135, "learning_rate": 9.944248249898215e-06, "loss": 0.9066, "step": 558 }, { "epoch": 0.07632441288913162, "grad_norm": 5.935361862182617, "learning_rate": 9.943918488872674e-06, "loss": 1.1311, "step": 559 }, { "epoch": 0.0764609503003823, "grad_norm": 7.792722702026367, "learning_rate": 9.943587760983706e-06, "loss": 0.9364, "step": 560 }, { "epoch": 0.07659748771163298, "grad_norm": 7.6601409912109375, "learning_rate": 9.943256066295983e-06, "loss": 1.0021, "step": 561 }, { "epoch": 0.07673402512288367, "grad_norm": 19.323131561279297, "learning_rate": 9.942923404874377e-06, "loss": 0.9899, "step": 562 }, { "epoch": 0.07687056253413435, "grad_norm": 6.944280624389648, "learning_rate": 9.942589776783945e-06, "loss": 0.9699, "step": 563 }, { "epoch": 0.07700709994538503, "grad_norm": 7.373135566711426, "learning_rate": 9.94225518208993e-06, "loss": 1.0414, "step": 564 }, { "epoch": 0.07714363735663572, "grad_norm": 5.787781715393066, "learning_rate": 9.941919620857773e-06, "loss": 1.0021, "step": 565 }, { "epoch": 0.0772801747678864, "grad_norm": 8.803869247436523, "learning_rate": 9.941583093153097e-06, "loss": 0.9899, "step": 566 }, { "epoch": 0.07741671217913708, "grad_norm": 5.849770545959473, "learning_rate": 9.941245599041713e-06, "loss": 1.0563, "step": 567 }, { "epoch": 0.07755324959038777, "grad_norm": 10.013983726501465, "learning_rate": 9.940907138589624e-06, "loss": 1.1451, "step": 568 }, { "epoch": 0.07768978700163845, "grad_norm": 9.510555267333984, "learning_rate": 9.940567711863022e-06, "loss": 1.0991, "step": 569 }, { "epoch": 0.07782632441288913, "grad_norm": 6.2042131423950195, "learning_rate": 9.94022731892829e-06, "loss": 0.9694, "step": 570 }, { "epoch": 0.07796286182413982, "grad_norm": 23.564268112182617, "learning_rate": 9.939885959851993e-06, "loss": 0.9367, "step": 571 }, { "epoch": 0.0780993992353905, "grad_norm": 8.106094360351562, "learning_rate": 9.939543634700891e-06, "loss": 1.225, "step": 572 }, { "epoch": 0.07823593664664118, "grad_norm": 7.477207183837891, "learning_rate": 9.939200343541933e-06, "loss": 0.9011, "step": 573 }, { "epoch": 0.07837247405789186, "grad_norm": 7.0610504150390625, "learning_rate": 9.938856086442252e-06, "loss": 1.0456, "step": 574 }, { "epoch": 0.07850901146914255, "grad_norm": 6.810762405395508, "learning_rate": 9.938510863469175e-06, "loss": 1.0353, "step": 575 }, { "epoch": 0.07864554888039323, "grad_norm": 7.124144077301025, "learning_rate": 9.938164674690215e-06, "loss": 1.108, "step": 576 }, { "epoch": 0.0787820862916439, "grad_norm": 6.193704128265381, "learning_rate": 9.937817520173077e-06, "loss": 1.0675, "step": 577 }, { "epoch": 0.0789186237028946, "grad_norm": 11.75933837890625, "learning_rate": 9.93746939998565e-06, "loss": 1.0357, "step": 578 }, { "epoch": 0.07905516111414528, "grad_norm": 16.49513053894043, "learning_rate": 9.937120314196016e-06, "loss": 1.0346, "step": 579 }, { "epoch": 0.07919169852539595, "grad_norm": 13.529397964477539, "learning_rate": 9.936770262872444e-06, "loss": 0.9486, "step": 580 }, { "epoch": 0.07932823593664665, "grad_norm": 5.9588236808776855, "learning_rate": 9.936419246083392e-06, "loss": 0.9331, "step": 581 }, { "epoch": 0.07946477334789732, "grad_norm": 5.873622417449951, "learning_rate": 9.936067263897507e-06, "loss": 1.0345, "step": 582 }, { "epoch": 0.079601310759148, "grad_norm": 5.907949447631836, "learning_rate": 9.935714316383627e-06, "loss": 1.2154, "step": 583 }, { "epoch": 0.0797378481703987, "grad_norm": 4.859532356262207, "learning_rate": 9.935360403610773e-06, "loss": 0.891, "step": 584 }, { "epoch": 0.07987438558164937, "grad_norm": 7.816528797149658, "learning_rate": 9.93500552564816e-06, "loss": 1.0162, "step": 585 }, { "epoch": 0.08001092299290005, "grad_norm": 11.550995826721191, "learning_rate": 9.934649682565192e-06, "loss": 0.9607, "step": 586 }, { "epoch": 0.08014746040415073, "grad_norm": 6.763953685760498, "learning_rate": 9.934292874431457e-06, "loss": 1.048, "step": 587 }, { "epoch": 0.08028399781540142, "grad_norm": 7.107987880706787, "learning_rate": 9.933935101316735e-06, "loss": 0.9637, "step": 588 }, { "epoch": 0.0804205352266521, "grad_norm": 7.154505252838135, "learning_rate": 9.933576363290995e-06, "loss": 1.0104, "step": 589 }, { "epoch": 0.08055707263790278, "grad_norm": 6.059754371643066, "learning_rate": 9.933216660424396e-06, "loss": 1.0994, "step": 590 }, { "epoch": 0.08069361004915347, "grad_norm": 8.587461471557617, "learning_rate": 9.93285599278728e-06, "loss": 1.1228, "step": 591 }, { "epoch": 0.08083014746040415, "grad_norm": 6.170499324798584, "learning_rate": 9.932494360450184e-06, "loss": 1.0492, "step": 592 }, { "epoch": 0.08096668487165483, "grad_norm": 9.466740608215332, "learning_rate": 9.93213176348383e-06, "loss": 0.8957, "step": 593 }, { "epoch": 0.08110322228290552, "grad_norm": 11.088088035583496, "learning_rate": 9.93176820195913e-06, "loss": 1.0328, "step": 594 }, { "epoch": 0.0812397596941562, "grad_norm": 5.289377212524414, "learning_rate": 9.931403675947187e-06, "loss": 0.9181, "step": 595 }, { "epoch": 0.08137629710540688, "grad_norm": 6.743936538696289, "learning_rate": 9.931038185519285e-06, "loss": 1.0795, "step": 596 }, { "epoch": 0.08151283451665757, "grad_norm": 9.223841667175293, "learning_rate": 9.930671730746903e-06, "loss": 0.9758, "step": 597 }, { "epoch": 0.08164937192790825, "grad_norm": 6.690703392028809, "learning_rate": 9.93030431170171e-06, "loss": 1.0586, "step": 598 }, { "epoch": 0.08178590933915893, "grad_norm": 5.420074462890625, "learning_rate": 9.929935928455558e-06, "loss": 1.1439, "step": 599 }, { "epoch": 0.0819224467504096, "grad_norm": 7.876078128814697, "learning_rate": 9.929566581080491e-06, "loss": 0.9426, "step": 600 }, { "epoch": 0.0820589841616603, "grad_norm": 7.8573503494262695, "learning_rate": 9.929196269648741e-06, "loss": 1.0572, "step": 601 }, { "epoch": 0.08219552157291098, "grad_norm": 6.792572021484375, "learning_rate": 9.928824994232731e-06, "loss": 1.0569, "step": 602 }, { "epoch": 0.08233205898416165, "grad_norm": 5.676190376281738, "learning_rate": 9.928452754905065e-06, "loss": 1.0747, "step": 603 }, { "epoch": 0.08246859639541235, "grad_norm": 6.366390228271484, "learning_rate": 9.928079551738542e-06, "loss": 0.8856, "step": 604 }, { "epoch": 0.08260513380666303, "grad_norm": 7.311120986938477, "learning_rate": 9.927705384806152e-06, "loss": 0.995, "step": 605 }, { "epoch": 0.0827416712179137, "grad_norm": 8.263860702514648, "learning_rate": 9.927330254181063e-06, "loss": 0.9105, "step": 606 }, { "epoch": 0.0828782086291644, "grad_norm": 7.200901031494141, "learning_rate": 9.926954159936643e-06, "loss": 0.9758, "step": 607 }, { "epoch": 0.08301474604041507, "grad_norm": 5.98473596572876, "learning_rate": 9.92657710214644e-06, "loss": 1.0747, "step": 608 }, { "epoch": 0.08315128345166575, "grad_norm": 8.052898406982422, "learning_rate": 9.926199080884197e-06, "loss": 0.9736, "step": 609 }, { "epoch": 0.08328782086291645, "grad_norm": 7.452002048492432, "learning_rate": 9.925820096223838e-06, "loss": 0.9944, "step": 610 }, { "epoch": 0.08342435827416712, "grad_norm": 7.499879837036133, "learning_rate": 9.925440148239483e-06, "loss": 1.053, "step": 611 }, { "epoch": 0.0835608956854178, "grad_norm": 15.702729225158691, "learning_rate": 9.925059237005437e-06, "loss": 0.8509, "step": 612 }, { "epoch": 0.08369743309666848, "grad_norm": 6.883962631225586, "learning_rate": 9.92467736259619e-06, "loss": 1.1427, "step": 613 }, { "epoch": 0.08383397050791917, "grad_norm": 7.578169822692871, "learning_rate": 9.924294525086427e-06, "loss": 1.1013, "step": 614 }, { "epoch": 0.08397050791916985, "grad_norm": 11.126921653747559, "learning_rate": 9.923910724551018e-06, "loss": 1.0211, "step": 615 }, { "epoch": 0.08410704533042053, "grad_norm": 9.266389846801758, "learning_rate": 9.92352596106502e-06, "loss": 1.0122, "step": 616 }, { "epoch": 0.08424358274167122, "grad_norm": 8.483704566955566, "learning_rate": 9.92314023470368e-06, "loss": 1.1395, "step": 617 }, { "epoch": 0.0843801201529219, "grad_norm": 13.890247344970703, "learning_rate": 9.922753545542433e-06, "loss": 1.1494, "step": 618 }, { "epoch": 0.08451665756417258, "grad_norm": 21.742395401000977, "learning_rate": 9.922365893656903e-06, "loss": 1.0871, "step": 619 }, { "epoch": 0.08465319497542327, "grad_norm": 69.384033203125, "learning_rate": 9.9219772791229e-06, "loss": 1.0142, "step": 620 }, { "epoch": 0.08478973238667395, "grad_norm": 19.770816802978516, "learning_rate": 9.921587702016429e-06, "loss": 1.3019, "step": 621 }, { "epoch": 0.08492626979792463, "grad_norm": 23.36754608154297, "learning_rate": 9.921197162413671e-06, "loss": 1.2471, "step": 622 }, { "epoch": 0.08506280720917532, "grad_norm": 34.89978790283203, "learning_rate": 9.920805660391006e-06, "loss": 1.3033, "step": 623 }, { "epoch": 0.085199344620426, "grad_norm": 31.79450035095215, "learning_rate": 9.920413196024998e-06, "loss": 1.1014, "step": 624 }, { "epoch": 0.08533588203167668, "grad_norm": 19.7933406829834, "learning_rate": 9.920019769392401e-06, "loss": 1.0786, "step": 625 }, { "epoch": 0.08547241944292736, "grad_norm": 23.2260799407959, "learning_rate": 9.919625380570154e-06, "loss": 1.1774, "step": 626 }, { "epoch": 0.08560895685417805, "grad_norm": 23.111881256103516, "learning_rate": 9.919230029635388e-06, "loss": 1.0849, "step": 627 }, { "epoch": 0.08574549426542873, "grad_norm": 30.206905364990234, "learning_rate": 9.91883371666542e-06, "loss": 1.1465, "step": 628 }, { "epoch": 0.0858820316766794, "grad_norm": 20.825210571289062, "learning_rate": 9.918436441737751e-06, "loss": 1.1743, "step": 629 }, { "epoch": 0.0860185690879301, "grad_norm": 13.444172859191895, "learning_rate": 9.918038204930082e-06, "loss": 1.2078, "step": 630 }, { "epoch": 0.08615510649918078, "grad_norm": 24.10356903076172, "learning_rate": 9.91763900632029e-06, "loss": 0.9479, "step": 631 }, { "epoch": 0.08629164391043145, "grad_norm": 13.952431678771973, "learning_rate": 9.917238845986446e-06, "loss": 1.1373, "step": 632 }, { "epoch": 0.08642818132168215, "grad_norm": 39.21952438354492, "learning_rate": 9.916837724006806e-06, "loss": 1.1075, "step": 633 }, { "epoch": 0.08656471873293282, "grad_norm": 160.41452026367188, "learning_rate": 9.916435640459819e-06, "loss": 1.1872, "step": 634 }, { "epoch": 0.0867012561441835, "grad_norm": 60.691062927246094, "learning_rate": 9.916032595424116e-06, "loss": 1.1251, "step": 635 }, { "epoch": 0.0868377935554342, "grad_norm": 39.131595611572266, "learning_rate": 9.915628588978522e-06, "loss": 1.0222, "step": 636 }, { "epoch": 0.08697433096668487, "grad_norm": 25.074953079223633, "learning_rate": 9.915223621202045e-06, "loss": 1.2576, "step": 637 }, { "epoch": 0.08711086837793555, "grad_norm": 53.14552307128906, "learning_rate": 9.914817692173883e-06, "loss": 1.1653, "step": 638 }, { "epoch": 0.08724740578918623, "grad_norm": 27.938920974731445, "learning_rate": 9.914410801973422e-06, "loss": 1.2265, "step": 639 }, { "epoch": 0.08738394320043692, "grad_norm": 149.00790405273438, "learning_rate": 9.914002950680238e-06, "loss": 1.2609, "step": 640 }, { "epoch": 0.0875204806116876, "grad_norm": 62.381065368652344, "learning_rate": 9.91359413837409e-06, "loss": 1.1369, "step": 641 }, { "epoch": 0.08765701802293828, "grad_norm": 57.87706756591797, "learning_rate": 9.91318436513493e-06, "loss": 1.07, "step": 642 }, { "epoch": 0.08779355543418897, "grad_norm": 25.35280418395996, "learning_rate": 9.912773631042895e-06, "loss": 1.0858, "step": 643 }, { "epoch": 0.08793009284543965, "grad_norm": 362.02947998046875, "learning_rate": 9.912361936178312e-06, "loss": 1.1724, "step": 644 }, { "epoch": 0.08806663025669033, "grad_norm": 11.029642105102539, "learning_rate": 9.911949280621692e-06, "loss": 0.9894, "step": 645 }, { "epoch": 0.08820316766794102, "grad_norm": 16.008508682250977, "learning_rate": 9.911535664453736e-06, "loss": 1.0754, "step": 646 }, { "epoch": 0.0883397050791917, "grad_norm": 13.757280349731445, "learning_rate": 9.911121087755337e-06, "loss": 1.0953, "step": 647 }, { "epoch": 0.08847624249044238, "grad_norm": 12.258698463439941, "learning_rate": 9.91070555060757e-06, "loss": 1.0228, "step": 648 }, { "epoch": 0.08861277990169307, "grad_norm": 18.537839889526367, "learning_rate": 9.910289053091702e-06, "loss": 0.9328, "step": 649 }, { "epoch": 0.08874931731294375, "grad_norm": 45.65349197387695, "learning_rate": 9.909871595289184e-06, "loss": 1.161, "step": 650 }, { "epoch": 0.08888585472419443, "grad_norm": 8.008685111999512, "learning_rate": 9.909453177281655e-06, "loss": 1.085, "step": 651 }, { "epoch": 0.0890223921354451, "grad_norm": 25.088502883911133, "learning_rate": 9.909033799150947e-06, "loss": 0.994, "step": 652 }, { "epoch": 0.0891589295466958, "grad_norm": 59.963260650634766, "learning_rate": 9.908613460979073e-06, "loss": 1.3993, "step": 653 }, { "epoch": 0.08929546695794648, "grad_norm": 19.86861801147461, "learning_rate": 9.90819216284824e-06, "loss": 1.2713, "step": 654 }, { "epoch": 0.08943200436919715, "grad_norm": 62.54096221923828, "learning_rate": 9.907769904840837e-06, "loss": 1.0339, "step": 655 }, { "epoch": 0.08956854178044785, "grad_norm": 103.5677261352539, "learning_rate": 9.907346687039445e-06, "loss": 1.1487, "step": 656 }, { "epoch": 0.08970507919169853, "grad_norm": 25.932085037231445, "learning_rate": 9.90692250952683e-06, "loss": 1.1397, "step": 657 }, { "epoch": 0.0898416166029492, "grad_norm": 32.802223205566406, "learning_rate": 9.906497372385949e-06, "loss": 1.134, "step": 658 }, { "epoch": 0.0899781540141999, "grad_norm": 347.5599365234375, "learning_rate": 9.906071275699941e-06, "loss": 1.1711, "step": 659 }, { "epoch": 0.09011469142545057, "grad_norm": 17.203805923461914, "learning_rate": 9.90564421955214e-06, "loss": 1.0832, "step": 660 }, { "epoch": 0.09025122883670125, "grad_norm": 33.25777053833008, "learning_rate": 9.905216204026063e-06, "loss": 1.1188, "step": 661 }, { "epoch": 0.09038776624795195, "grad_norm": 12.876509666442871, "learning_rate": 9.904787229205411e-06, "loss": 1.0964, "step": 662 }, { "epoch": 0.09052430365920262, "grad_norm": 19.849456787109375, "learning_rate": 9.90435729517408e-06, "loss": 1.0975, "step": 663 }, { "epoch": 0.0906608410704533, "grad_norm": 26.65281105041504, "learning_rate": 9.903926402016153e-06, "loss": 1.0741, "step": 664 }, { "epoch": 0.09079737848170398, "grad_norm": 30.680503845214844, "learning_rate": 9.903494549815896e-06, "loss": 1.2109, "step": 665 }, { "epoch": 0.09093391589295467, "grad_norm": 14.728971481323242, "learning_rate": 9.903061738657762e-06, "loss": 1.0336, "step": 666 }, { "epoch": 0.09107045330420535, "grad_norm": 8.730377197265625, "learning_rate": 9.9026279686264e-06, "loss": 1.0571, "step": 667 }, { "epoch": 0.09120699071545603, "grad_norm": 17.055891036987305, "learning_rate": 9.902193239806634e-06, "loss": 0.9821, "step": 668 }, { "epoch": 0.09134352812670672, "grad_norm": 18.23478126525879, "learning_rate": 9.90175755228349e-06, "loss": 0.9801, "step": 669 }, { "epoch": 0.0914800655379574, "grad_norm": 17.85597801208496, "learning_rate": 9.901320906142165e-06, "loss": 1.0372, "step": 670 }, { "epoch": 0.09161660294920808, "grad_norm": 18.538557052612305, "learning_rate": 9.90088330146806e-06, "loss": 1.0806, "step": 671 }, { "epoch": 0.09175314036045877, "grad_norm": 10.971503257751465, "learning_rate": 9.900444738346751e-06, "loss": 1.0848, "step": 672 }, { "epoch": 0.09188967777170945, "grad_norm": 7.44399881362915, "learning_rate": 9.900005216864008e-06, "loss": 1.134, "step": 673 }, { "epoch": 0.09202621518296013, "grad_norm": 10.081724166870117, "learning_rate": 9.899564737105786e-06, "loss": 1.0402, "step": 674 }, { "epoch": 0.09216275259421082, "grad_norm": 11.893204689025879, "learning_rate": 9.899123299158228e-06, "loss": 1.2656, "step": 675 }, { "epoch": 0.0922992900054615, "grad_norm": 8.132952690124512, "learning_rate": 9.898680903107668e-06, "loss": 1.0734, "step": 676 }, { "epoch": 0.09243582741671218, "grad_norm": 7.825572490692139, "learning_rate": 9.898237549040616e-06, "loss": 1.068, "step": 677 }, { "epoch": 0.09257236482796286, "grad_norm": 6.369556427001953, "learning_rate": 9.897793237043783e-06, "loss": 1.1454, "step": 678 }, { "epoch": 0.09270890223921355, "grad_norm": 11.835112571716309, "learning_rate": 9.897347967204059e-06, "loss": 1.0029, "step": 679 }, { "epoch": 0.09284543965046423, "grad_norm": 11.235885620117188, "learning_rate": 9.896901739608525e-06, "loss": 1.2173, "step": 680 }, { "epoch": 0.0929819770617149, "grad_norm": 13.353041648864746, "learning_rate": 9.896454554344448e-06, "loss": 1.0814, "step": 681 }, { "epoch": 0.0931185144729656, "grad_norm": 8.589726448059082, "learning_rate": 9.89600641149928e-06, "loss": 1.128, "step": 682 }, { "epoch": 0.09325505188421628, "grad_norm": 7.73782205581665, "learning_rate": 9.895557311160666e-06, "loss": 1.1065, "step": 683 }, { "epoch": 0.09339158929546695, "grad_norm": 8.88583755493164, "learning_rate": 9.895107253416434e-06, "loss": 1.1724, "step": 684 }, { "epoch": 0.09352812670671765, "grad_norm": 6.989463806152344, "learning_rate": 9.894656238354597e-06, "loss": 0.9976, "step": 685 }, { "epoch": 0.09366466411796832, "grad_norm": 8.322875022888184, "learning_rate": 9.894204266063362e-06, "loss": 1.0266, "step": 686 }, { "epoch": 0.093801201529219, "grad_norm": 38.83163070678711, "learning_rate": 9.89375133663112e-06, "loss": 1.1638, "step": 687 }, { "epoch": 0.0939377389404697, "grad_norm": 12.195005416870117, "learning_rate": 9.893297450146445e-06, "loss": 1.0071, "step": 688 }, { "epoch": 0.09407427635172037, "grad_norm": 30.113510131835938, "learning_rate": 9.892842606698104e-06, "loss": 1.1413, "step": 689 }, { "epoch": 0.09421081376297105, "grad_norm": 10.582636833190918, "learning_rate": 9.892386806375048e-06, "loss": 1.1308, "step": 690 }, { "epoch": 0.09434735117422173, "grad_norm": 8.679417610168457, "learning_rate": 9.891930049266417e-06, "loss": 0.9133, "step": 691 }, { "epoch": 0.09448388858547242, "grad_norm": 15.689891815185547, "learning_rate": 9.891472335461537e-06, "loss": 1.0705, "step": 692 }, { "epoch": 0.0946204259967231, "grad_norm": 10.571813583374023, "learning_rate": 9.891013665049924e-06, "loss": 1.172, "step": 693 }, { "epoch": 0.09475696340797378, "grad_norm": 13.000786781311035, "learning_rate": 9.890554038121274e-06, "loss": 1.0992, "step": 694 }, { "epoch": 0.09489350081922447, "grad_norm": 28.088382720947266, "learning_rate": 9.890093454765477e-06, "loss": 1.1, "step": 695 }, { "epoch": 0.09503003823047515, "grad_norm": 17.791053771972656, "learning_rate": 9.889631915072606e-06, "loss": 1.0131, "step": 696 }, { "epoch": 0.09516657564172583, "grad_norm": 13.234286308288574, "learning_rate": 9.889169419132924e-06, "loss": 1.0688, "step": 697 }, { "epoch": 0.09530311305297652, "grad_norm": 13.512242317199707, "learning_rate": 9.88870596703688e-06, "loss": 1.0387, "step": 698 }, { "epoch": 0.0954396504642272, "grad_norm": 14.84122371673584, "learning_rate": 9.888241558875109e-06, "loss": 0.9793, "step": 699 }, { "epoch": 0.09557618787547788, "grad_norm": 16.83757209777832, "learning_rate": 9.887776194738433e-06, "loss": 1.0963, "step": 700 }, { "epoch": 0.09571272528672857, "grad_norm": 19.230966567993164, "learning_rate": 9.88730987471786e-06, "loss": 1.0878, "step": 701 }, { "epoch": 0.09584926269797925, "grad_norm": 55.92375183105469, "learning_rate": 9.88684259890459e-06, "loss": 1.0165, "step": 702 }, { "epoch": 0.09598580010922993, "grad_norm": 8.882553100585938, "learning_rate": 9.886374367390003e-06, "loss": 1.0944, "step": 703 }, { "epoch": 0.0961223375204806, "grad_norm": 11.63900375366211, "learning_rate": 9.885905180265674e-06, "loss": 0.9664, "step": 704 }, { "epoch": 0.0962588749317313, "grad_norm": 44.62104034423828, "learning_rate": 9.885435037623355e-06, "loss": 0.946, "step": 705 }, { "epoch": 0.09639541234298198, "grad_norm": 9.073205947875977, "learning_rate": 9.884963939554991e-06, "loss": 1.0958, "step": 706 }, { "epoch": 0.09653194975423265, "grad_norm": 14.316548347473145, "learning_rate": 9.884491886152715e-06, "loss": 1.0509, "step": 707 }, { "epoch": 0.09666848716548335, "grad_norm": 12.807377815246582, "learning_rate": 9.884018877508844e-06, "loss": 1.1275, "step": 708 }, { "epoch": 0.09680502457673403, "grad_norm": 25.36724853515625, "learning_rate": 9.883544913715882e-06, "loss": 1.2555, "step": 709 }, { "epoch": 0.0969415619879847, "grad_norm": 7.670375347137451, "learning_rate": 9.88306999486652e-06, "loss": 1.1023, "step": 710 }, { "epoch": 0.0970780993992354, "grad_norm": 15.856683731079102, "learning_rate": 9.882594121053635e-06, "loss": 1.1113, "step": 711 }, { "epoch": 0.09721463681048607, "grad_norm": 87.13206481933594, "learning_rate": 9.882117292370296e-06, "loss": 0.9897, "step": 712 }, { "epoch": 0.09735117422173675, "grad_norm": 24.469999313354492, "learning_rate": 9.88163950890975e-06, "loss": 0.9595, "step": 713 }, { "epoch": 0.09748771163298744, "grad_norm": 25.405946731567383, "learning_rate": 9.881160770765438e-06, "loss": 0.9665, "step": 714 }, { "epoch": 0.09762424904423812, "grad_norm": 26.19294548034668, "learning_rate": 9.880681078030984e-06, "loss": 1.0256, "step": 715 }, { "epoch": 0.0977607864554888, "grad_norm": 26.515213012695312, "learning_rate": 9.8802004308002e-06, "loss": 1.1069, "step": 716 }, { "epoch": 0.09789732386673948, "grad_norm": 25.4880313873291, "learning_rate": 9.879718829167085e-06, "loss": 0.9888, "step": 717 }, { "epoch": 0.09803386127799017, "grad_norm": 93.64582061767578, "learning_rate": 9.879236273225822e-06, "loss": 1.0156, "step": 718 }, { "epoch": 0.09817039868924085, "grad_norm": 13.250457763671875, "learning_rate": 9.878752763070786e-06, "loss": 1.2005, "step": 719 }, { "epoch": 0.09830693610049153, "grad_norm": 13.648965835571289, "learning_rate": 9.878268298796531e-06, "loss": 1.1691, "step": 720 }, { "epoch": 0.09844347351174222, "grad_norm": 16.790372848510742, "learning_rate": 9.877782880497806e-06, "loss": 1.0386, "step": 721 }, { "epoch": 0.0985800109229929, "grad_norm": 26.925865173339844, "learning_rate": 9.877296508269538e-06, "loss": 1.2006, "step": 722 }, { "epoch": 0.09871654833424358, "grad_norm": 23.95745086669922, "learning_rate": 9.87680918220685e-06, "loss": 1.0244, "step": 723 }, { "epoch": 0.09885308574549427, "grad_norm": 97.30123138427734, "learning_rate": 9.876320902405041e-06, "loss": 1.0551, "step": 724 }, { "epoch": 0.09898962315674495, "grad_norm": 24.88794708251953, "learning_rate": 9.875831668959607e-06, "loss": 1.04, "step": 725 }, { "epoch": 0.09912616056799563, "grad_norm": 18.089813232421875, "learning_rate": 9.875341481966223e-06, "loss": 0.9945, "step": 726 }, { "epoch": 0.09926269797924632, "grad_norm": 37.41273880004883, "learning_rate": 9.874850341520754e-06, "loss": 1.0533, "step": 727 }, { "epoch": 0.099399235390497, "grad_norm": 22.282821655273438, "learning_rate": 9.874358247719251e-06, "loss": 1.0653, "step": 728 }, { "epoch": 0.09953577280174768, "grad_norm": 27.87940216064453, "learning_rate": 9.873865200657948e-06, "loss": 1.0885, "step": 729 }, { "epoch": 0.09967231021299836, "grad_norm": 16.78286361694336, "learning_rate": 9.87337120043327e-06, "loss": 1.0242, "step": 730 }, { "epoch": 0.09980884762424905, "grad_norm": 35.74118423461914, "learning_rate": 9.872876247141828e-06, "loss": 1.0825, "step": 731 }, { "epoch": 0.09994538503549973, "grad_norm": 34.63689041137695, "learning_rate": 9.872380340880416e-06, "loss": 1.1263, "step": 732 }, { "epoch": 0.1000819224467504, "grad_norm": 12.817477226257324, "learning_rate": 9.87188348174602e-06, "loss": 1.1079, "step": 733 }, { "epoch": 0.1002184598580011, "grad_norm": 14.915030479431152, "learning_rate": 9.871385669835805e-06, "loss": 1.1278, "step": 734 }, { "epoch": 0.10035499726925177, "grad_norm": 13.866483688354492, "learning_rate": 9.870886905247129e-06, "loss": 0.9271, "step": 735 }, { "epoch": 0.10049153468050245, "grad_norm": 16.138917922973633, "learning_rate": 9.87038718807753e-06, "loss": 0.9464, "step": 736 }, { "epoch": 0.10062807209175315, "grad_norm": 13.38328742980957, "learning_rate": 9.869886518424738e-06, "loss": 1.1191, "step": 737 }, { "epoch": 0.10076460950300382, "grad_norm": 15.898326873779297, "learning_rate": 9.869384896386669e-06, "loss": 0.8861, "step": 738 }, { "epoch": 0.1009011469142545, "grad_norm": 21.363554000854492, "learning_rate": 9.86888232206142e-06, "loss": 1.0712, "step": 739 }, { "epoch": 0.1010376843255052, "grad_norm": 24.868730545043945, "learning_rate": 9.86837879554728e-06, "loss": 1.1222, "step": 740 }, { "epoch": 0.10117422173675587, "grad_norm": 12.102543830871582, "learning_rate": 9.86787431694272e-06, "loss": 1.1591, "step": 741 }, { "epoch": 0.10131075914800655, "grad_norm": 230.59658813476562, "learning_rate": 9.8673688863464e-06, "loss": 1.0923, "step": 742 }, { "epoch": 0.10144729655925723, "grad_norm": 19.227453231811523, "learning_rate": 9.866862503857166e-06, "loss": 0.9286, "step": 743 }, { "epoch": 0.10158383397050792, "grad_norm": 16.205604553222656, "learning_rate": 9.866355169574047e-06, "loss": 0.9631, "step": 744 }, { "epoch": 0.1017203713817586, "grad_norm": 16.218448638916016, "learning_rate": 9.865846883596262e-06, "loss": 0.9188, "step": 745 }, { "epoch": 0.10185690879300928, "grad_norm": 14.286324501037598, "learning_rate": 9.865337646023214e-06, "loss": 1.0687, "step": 746 }, { "epoch": 0.10199344620425997, "grad_norm": 8.436545372009277, "learning_rate": 9.864827456954491e-06, "loss": 1.0221, "step": 747 }, { "epoch": 0.10212998361551065, "grad_norm": 13.809633255004883, "learning_rate": 9.864316316489873e-06, "loss": 1.0322, "step": 748 }, { "epoch": 0.10226652102676133, "grad_norm": 19.649749755859375, "learning_rate": 9.86380422472932e-06, "loss": 1.0525, "step": 749 }, { "epoch": 0.10240305843801202, "grad_norm": 13.649017333984375, "learning_rate": 9.863291181772979e-06, "loss": 1.038, "step": 750 }, { "epoch": 0.1025395958492627, "grad_norm": 10.44362735748291, "learning_rate": 9.862777187721182e-06, "loss": 1.216, "step": 751 }, { "epoch": 0.10267613326051338, "grad_norm": 20.870100021362305, "learning_rate": 9.862262242674451e-06, "loss": 0.972, "step": 752 }, { "epoch": 0.10281267067176407, "grad_norm": 11.092216491699219, "learning_rate": 9.861746346733494e-06, "loss": 1.1541, "step": 753 }, { "epoch": 0.10294920808301475, "grad_norm": 14.968749046325684, "learning_rate": 9.861229499999199e-06, "loss": 1.0041, "step": 754 }, { "epoch": 0.10308574549426543, "grad_norm": 8.556004524230957, "learning_rate": 9.860711702572647e-06, "loss": 1.1708, "step": 755 }, { "epoch": 0.1032222829055161, "grad_norm": 9.170053482055664, "learning_rate": 9.860192954555099e-06, "loss": 1.0704, "step": 756 }, { "epoch": 0.1033588203167668, "grad_norm": 9.101730346679688, "learning_rate": 9.859673256048005e-06, "loss": 1.0575, "step": 757 }, { "epoch": 0.10349535772801748, "grad_norm": 8.212737083435059, "learning_rate": 9.859152607153002e-06, "loss": 1.0539, "step": 758 }, { "epoch": 0.10363189513926815, "grad_norm": 6.061692237854004, "learning_rate": 9.858631007971912e-06, "loss": 0.9219, "step": 759 }, { "epoch": 0.10376843255051885, "grad_norm": 6.557736396789551, "learning_rate": 9.85810845860674e-06, "loss": 0.9791, "step": 760 }, { "epoch": 0.10390496996176952, "grad_norm": 8.414353370666504, "learning_rate": 9.857584959159679e-06, "loss": 1.0679, "step": 761 }, { "epoch": 0.1040415073730202, "grad_norm": 9.542908668518066, "learning_rate": 9.857060509733108e-06, "loss": 1.1493, "step": 762 }, { "epoch": 0.1041780447842709, "grad_norm": 10.078423500061035, "learning_rate": 9.856535110429593e-06, "loss": 1.1593, "step": 763 }, { "epoch": 0.10431458219552157, "grad_norm": 10.416378021240234, "learning_rate": 9.856008761351882e-06, "loss": 1.0569, "step": 764 }, { "epoch": 0.10445111960677225, "grad_norm": 19.542144775390625, "learning_rate": 9.855481462602914e-06, "loss": 1.1467, "step": 765 }, { "epoch": 0.10458765701802294, "grad_norm": 6.405394077301025, "learning_rate": 9.854953214285808e-06, "loss": 0.9303, "step": 766 }, { "epoch": 0.10472419442927362, "grad_norm": 13.53309154510498, "learning_rate": 9.854424016503874e-06, "loss": 1.1673, "step": 767 }, { "epoch": 0.1048607318405243, "grad_norm": 5.883976459503174, "learning_rate": 9.853893869360605e-06, "loss": 1.0036, "step": 768 }, { "epoch": 0.10499726925177498, "grad_norm": 8.149572372436523, "learning_rate": 9.853362772959678e-06, "loss": 1.2839, "step": 769 }, { "epoch": 0.10513380666302567, "grad_norm": 7.415305137634277, "learning_rate": 9.852830727404958e-06, "loss": 1.1751, "step": 770 }, { "epoch": 0.10527034407427635, "grad_norm": 5.797821521759033, "learning_rate": 9.852297732800496e-06, "loss": 1.1008, "step": 771 }, { "epoch": 0.10540688148552703, "grad_norm": 7.823464393615723, "learning_rate": 9.851763789250526e-06, "loss": 1.1143, "step": 772 }, { "epoch": 0.10554341889677772, "grad_norm": 15.413524627685547, "learning_rate": 9.851228896859472e-06, "loss": 1.2244, "step": 773 }, { "epoch": 0.1056799563080284, "grad_norm": 15.150097846984863, "learning_rate": 9.850693055731938e-06, "loss": 1.1713, "step": 774 }, { "epoch": 0.10581649371927908, "grad_norm": 7.89561653137207, "learning_rate": 9.850156265972722e-06, "loss": 1.128, "step": 775 }, { "epoch": 0.10595303113052977, "grad_norm": 8.291728973388672, "learning_rate": 9.849618527686793e-06, "loss": 0.9735, "step": 776 }, { "epoch": 0.10608956854178045, "grad_norm": 8.413064002990723, "learning_rate": 9.849079840979323e-06, "loss": 0.8901, "step": 777 }, { "epoch": 0.10622610595303113, "grad_norm": 9.737346649169922, "learning_rate": 9.848540205955656e-06, "loss": 1.0278, "step": 778 }, { "epoch": 0.10636264336428182, "grad_norm": 7.849782943725586, "learning_rate": 9.847999622721327e-06, "loss": 1.1303, "step": 779 }, { "epoch": 0.1064991807755325, "grad_norm": 9.257120132446289, "learning_rate": 9.847458091382057e-06, "loss": 1.0455, "step": 780 }, { "epoch": 0.10663571818678318, "grad_norm": 7.1808013916015625, "learning_rate": 9.846915612043751e-06, "loss": 0.9357, "step": 781 }, { "epoch": 0.10677225559803386, "grad_norm": 12.995445251464844, "learning_rate": 9.846372184812499e-06, "loss": 0.9891, "step": 782 }, { "epoch": 0.10690879300928455, "grad_norm": 6.9469099044799805, "learning_rate": 9.845827809794577e-06, "loss": 1.103, "step": 783 }, { "epoch": 0.10704533042053523, "grad_norm": 6.705833435058594, "learning_rate": 9.845282487096447e-06, "loss": 1.1516, "step": 784 }, { "epoch": 0.1071818678317859, "grad_norm": 8.683612823486328, "learning_rate": 9.844736216824755e-06, "loss": 1.194, "step": 785 }, { "epoch": 0.1073184052430366, "grad_norm": 10.579155921936035, "learning_rate": 9.844188999086336e-06, "loss": 0.9855, "step": 786 }, { "epoch": 0.10745494265428727, "grad_norm": 7.960040092468262, "learning_rate": 9.843640833988202e-06, "loss": 0.9246, "step": 787 }, { "epoch": 0.10759148006553795, "grad_norm": 11.181638717651367, "learning_rate": 9.843091721637559e-06, "loss": 1.0771, "step": 788 }, { "epoch": 0.10772801747678865, "grad_norm": 13.295620918273926, "learning_rate": 9.842541662141794e-06, "loss": 1.0711, "step": 789 }, { "epoch": 0.10786455488803932, "grad_norm": 9.553313255310059, "learning_rate": 9.84199065560848e-06, "loss": 0.9603, "step": 790 }, { "epoch": 0.10800109229929, "grad_norm": 18.9283504486084, "learning_rate": 9.841438702145374e-06, "loss": 0.9262, "step": 791 }, { "epoch": 0.1081376297105407, "grad_norm": 23.325611114501953, "learning_rate": 9.840885801860423e-06, "loss": 1.0848, "step": 792 }, { "epoch": 0.10827416712179137, "grad_norm": 7.072152614593506, "learning_rate": 9.840331954861752e-06, "loss": 1.1945, "step": 793 }, { "epoch": 0.10841070453304205, "grad_norm": 10.06103801727295, "learning_rate": 9.839777161257677e-06, "loss": 1.0501, "step": 794 }, { "epoch": 0.10854724194429273, "grad_norm": 7.242034435272217, "learning_rate": 9.839221421156697e-06, "loss": 1.1705, "step": 795 }, { "epoch": 0.10868377935554342, "grad_norm": 22.3081111907959, "learning_rate": 9.838664734667496e-06, "loss": 1.1503, "step": 796 }, { "epoch": 0.1088203167667941, "grad_norm": 11.476323127746582, "learning_rate": 9.83810710189894e-06, "loss": 1.0907, "step": 797 }, { "epoch": 0.10895685417804478, "grad_norm": 10.065070152282715, "learning_rate": 9.837548522960085e-06, "loss": 1.0291, "step": 798 }, { "epoch": 0.10909339158929547, "grad_norm": 30.058034896850586, "learning_rate": 9.836988997960173e-06, "loss": 1.1873, "step": 799 }, { "epoch": 0.10922992900054615, "grad_norm": 6.327232837677002, "learning_rate": 9.836428527008624e-06, "loss": 0.9242, "step": 800 }, { "epoch": 0.10936646641179683, "grad_norm": 11.04651165008545, "learning_rate": 9.835867110215048e-06, "loss": 0.9982, "step": 801 }, { "epoch": 0.10950300382304752, "grad_norm": 17.44797134399414, "learning_rate": 9.835304747689242e-06, "loss": 1.0392, "step": 802 }, { "epoch": 0.1096395412342982, "grad_norm": 6.767638683319092, "learning_rate": 9.834741439541183e-06, "loss": 1.0825, "step": 803 }, { "epoch": 0.10977607864554888, "grad_norm": 6.0564165115356445, "learning_rate": 9.834177185881033e-06, "loss": 1.0749, "step": 804 }, { "epoch": 0.10991261605679957, "grad_norm": 6.599329471588135, "learning_rate": 9.833611986819145e-06, "loss": 1.0301, "step": 805 }, { "epoch": 0.11004915346805025, "grad_norm": 8.69588851928711, "learning_rate": 9.833045842466051e-06, "loss": 1.0858, "step": 806 }, { "epoch": 0.11018569087930093, "grad_norm": 8.670390129089355, "learning_rate": 9.832478752932468e-06, "loss": 1.1187, "step": 807 }, { "epoch": 0.1103222282905516, "grad_norm": 7.683977127075195, "learning_rate": 9.831910718329304e-06, "loss": 1.0443, "step": 808 }, { "epoch": 0.1104587657018023, "grad_norm": 10.522838592529297, "learning_rate": 9.831341738767641e-06, "loss": 0.9991, "step": 809 }, { "epoch": 0.11059530311305298, "grad_norm": 15.13160514831543, "learning_rate": 9.83077181435876e-06, "loss": 0.9466, "step": 810 }, { "epoch": 0.11073184052430365, "grad_norm": 6.946547031402588, "learning_rate": 9.830200945214111e-06, "loss": 1.1537, "step": 811 }, { "epoch": 0.11086837793555435, "grad_norm": 7.103602409362793, "learning_rate": 9.829629131445342e-06, "loss": 1.3739, "step": 812 }, { "epoch": 0.11100491534680502, "grad_norm": 6.285361289978027, "learning_rate": 9.829056373164278e-06, "loss": 1.0734, "step": 813 }, { "epoch": 0.1111414527580557, "grad_norm": 10.898956298828125, "learning_rate": 9.828482670482936e-06, "loss": 1.1479, "step": 814 }, { "epoch": 0.1112779901693064, "grad_norm": 8.205595970153809, "learning_rate": 9.827908023513504e-06, "loss": 0.9552, "step": 815 }, { "epoch": 0.11141452758055707, "grad_norm": 6.174274444580078, "learning_rate": 9.82733243236837e-06, "loss": 1.0563, "step": 816 }, { "epoch": 0.11155106499180775, "grad_norm": 8.675193786621094, "learning_rate": 9.8267558971601e-06, "loss": 0.9931, "step": 817 }, { "epoch": 0.11168760240305844, "grad_norm": 13.697504997253418, "learning_rate": 9.826178418001443e-06, "loss": 1.0337, "step": 818 }, { "epoch": 0.11182413981430912, "grad_norm": 6.6956987380981445, "learning_rate": 9.825599995005334e-06, "loss": 0.9148, "step": 819 }, { "epoch": 0.1119606772255598, "grad_norm": 9.751168251037598, "learning_rate": 9.825020628284896e-06, "loss": 1.1238, "step": 820 }, { "epoch": 0.11209721463681048, "grad_norm": 15.191605567932129, "learning_rate": 9.82444031795343e-06, "loss": 1.0964, "step": 821 }, { "epoch": 0.11223375204806117, "grad_norm": 9.387804985046387, "learning_rate": 9.823859064124426e-06, "loss": 1.0776, "step": 822 }, { "epoch": 0.11237028945931185, "grad_norm": 7.906907081604004, "learning_rate": 9.82327686691156e-06, "loss": 1.0935, "step": 823 }, { "epoch": 0.11250682687056253, "grad_norm": 8.684904098510742, "learning_rate": 9.822693726428685e-06, "loss": 1.1705, "step": 824 }, { "epoch": 0.11264336428181322, "grad_norm": 8.960783958435059, "learning_rate": 9.82210964278985e-06, "loss": 1.0956, "step": 825 }, { "epoch": 0.1127799016930639, "grad_norm": 6.587287902832031, "learning_rate": 9.821524616109277e-06, "loss": 1.0699, "step": 826 }, { "epoch": 0.11291643910431458, "grad_norm": 6.524636268615723, "learning_rate": 9.82093864650138e-06, "loss": 0.8645, "step": 827 }, { "epoch": 0.11305297651556527, "grad_norm": 10.110618591308594, "learning_rate": 9.820351734080754e-06, "loss": 1.1518, "step": 828 }, { "epoch": 0.11318951392681595, "grad_norm": 9.679329872131348, "learning_rate": 9.81976387896218e-06, "loss": 1.0947, "step": 829 }, { "epoch": 0.11332605133806663, "grad_norm": 8.542322158813477, "learning_rate": 9.819175081260622e-06, "loss": 1.0416, "step": 830 }, { "epoch": 0.11346258874931732, "grad_norm": 6.323790550231934, "learning_rate": 9.818585341091228e-06, "loss": 1.0458, "step": 831 }, { "epoch": 0.113599126160568, "grad_norm": 14.482656478881836, "learning_rate": 9.817994658569333e-06, "loss": 0.8707, "step": 832 }, { "epoch": 0.11373566357181868, "grad_norm": 7.3750319480896, "learning_rate": 9.817403033810454e-06, "loss": 1.0083, "step": 833 }, { "epoch": 0.11387220098306935, "grad_norm": 7.603151798248291, "learning_rate": 9.816810466930291e-06, "loss": 1.1113, "step": 834 }, { "epoch": 0.11400873839432005, "grad_norm": 7.3793816566467285, "learning_rate": 9.816216958044733e-06, "loss": 1.1375, "step": 835 }, { "epoch": 0.11414527580557073, "grad_norm": 10.399057388305664, "learning_rate": 9.81562250726985e-06, "loss": 1.0698, "step": 836 }, { "epoch": 0.1142818132168214, "grad_norm": 7.3843793869018555, "learning_rate": 9.815027114721894e-06, "loss": 1.0085, "step": 837 }, { "epoch": 0.1144183506280721, "grad_norm": 9.528382301330566, "learning_rate": 9.814430780517306e-06, "loss": 1.0193, "step": 838 }, { "epoch": 0.11455488803932277, "grad_norm": 7.696165084838867, "learning_rate": 9.813833504772706e-06, "loss": 1.0981, "step": 839 }, { "epoch": 0.11469142545057345, "grad_norm": 7.234599590301514, "learning_rate": 9.813235287604904e-06, "loss": 0.9957, "step": 840 }, { "epoch": 0.11482796286182415, "grad_norm": 6.675375938415527, "learning_rate": 9.81263612913089e-06, "loss": 1.0258, "step": 841 }, { "epoch": 0.11496450027307482, "grad_norm": 8.31371784210205, "learning_rate": 9.81203602946784e-06, "loss": 1.1901, "step": 842 }, { "epoch": 0.1151010376843255, "grad_norm": 7.848668575286865, "learning_rate": 9.811434988733111e-06, "loss": 0.9806, "step": 843 }, { "epoch": 0.1152375750955762, "grad_norm": 8.892669677734375, "learning_rate": 9.810833007044247e-06, "loss": 1.043, "step": 844 }, { "epoch": 0.11537411250682687, "grad_norm": 9.455756187438965, "learning_rate": 9.810230084518977e-06, "loss": 1.0971, "step": 845 }, { "epoch": 0.11551064991807755, "grad_norm": 9.494386672973633, "learning_rate": 9.80962622127521e-06, "loss": 1.1108, "step": 846 }, { "epoch": 0.11564718732932823, "grad_norm": 8.47288990020752, "learning_rate": 9.809021417431045e-06, "loss": 1.1016, "step": 847 }, { "epoch": 0.11578372474057892, "grad_norm": 6.7064948081970215, "learning_rate": 9.808415673104757e-06, "loss": 1.1106, "step": 848 }, { "epoch": 0.1159202621518296, "grad_norm": 9.608433723449707, "learning_rate": 9.807808988414811e-06, "loss": 0.9246, "step": 849 }, { "epoch": 0.11605679956308028, "grad_norm": 9.175115585327148, "learning_rate": 9.807201363479852e-06, "loss": 1.1673, "step": 850 }, { "epoch": 0.11619333697433097, "grad_norm": 10.788400650024414, "learning_rate": 9.806592798418714e-06, "loss": 1.0272, "step": 851 }, { "epoch": 0.11632987438558165, "grad_norm": 10.475918769836426, "learning_rate": 9.805983293350413e-06, "loss": 1.028, "step": 852 }, { "epoch": 0.11646641179683233, "grad_norm": 9.230118751525879, "learning_rate": 9.80537284839414e-06, "loss": 0.9738, "step": 853 }, { "epoch": 0.11660294920808302, "grad_norm": 12.158004760742188, "learning_rate": 9.804761463669286e-06, "loss": 1.0041, "step": 854 }, { "epoch": 0.1167394866193337, "grad_norm": 10.9843111038208, "learning_rate": 9.804149139295412e-06, "loss": 1.0927, "step": 855 }, { "epoch": 0.11687602403058438, "grad_norm": 11.516792297363281, "learning_rate": 9.80353587539227e-06, "loss": 1.1091, "step": 856 }, { "epoch": 0.11701256144183507, "grad_norm": 17.030086517333984, "learning_rate": 9.802921672079794e-06, "loss": 1.1048, "step": 857 }, { "epoch": 0.11714909885308575, "grad_norm": 14.3440580368042, "learning_rate": 9.8023065294781e-06, "loss": 1.1767, "step": 858 }, { "epoch": 0.11728563626433643, "grad_norm": 11.655410766601562, "learning_rate": 9.80169044770749e-06, "loss": 0.9786, "step": 859 }, { "epoch": 0.1174221736755871, "grad_norm": 9.423626899719238, "learning_rate": 9.801073426888447e-06, "loss": 1.0578, "step": 860 }, { "epoch": 0.1175587110868378, "grad_norm": 20.416040420532227, "learning_rate": 9.800455467141642e-06, "loss": 1.2134, "step": 861 }, { "epoch": 0.11769524849808848, "grad_norm": 7.652032375335693, "learning_rate": 9.799836568587928e-06, "loss": 1.0284, "step": 862 }, { "epoch": 0.11783178590933915, "grad_norm": 8.455526351928711, "learning_rate": 9.799216731348338e-06, "loss": 1.211, "step": 863 }, { "epoch": 0.11796832332058985, "grad_norm": 10.148366928100586, "learning_rate": 9.79859595554409e-06, "loss": 1.0917, "step": 864 }, { "epoch": 0.11810486073184052, "grad_norm": 10.748922348022461, "learning_rate": 9.79797424129659e-06, "loss": 1.058, "step": 865 }, { "epoch": 0.1182413981430912, "grad_norm": 7.0171637535095215, "learning_rate": 9.797351588727424e-06, "loss": 1.1392, "step": 866 }, { "epoch": 0.1183779355543419, "grad_norm": 8.741509437561035, "learning_rate": 9.79672799795836e-06, "loss": 1.0308, "step": 867 }, { "epoch": 0.11851447296559257, "grad_norm": 7.134588718414307, "learning_rate": 9.796103469111352e-06, "loss": 0.9423, "step": 868 }, { "epoch": 0.11865101037684325, "grad_norm": 6.152257442474365, "learning_rate": 9.795478002308535e-06, "loss": 0.8172, "step": 869 }, { "epoch": 0.11878754778809394, "grad_norm": 18.028377532958984, "learning_rate": 9.794851597672234e-06, "loss": 0.9923, "step": 870 }, { "epoch": 0.11892408519934462, "grad_norm": 38.45817184448242, "learning_rate": 9.794224255324947e-06, "loss": 1.0135, "step": 871 }, { "epoch": 0.1190606226105953, "grad_norm": 10.018363952636719, "learning_rate": 9.793595975389366e-06, "loss": 1.0641, "step": 872 }, { "epoch": 0.11919716002184598, "grad_norm": 7.267457962036133, "learning_rate": 9.792966757988358e-06, "loss": 1.1051, "step": 873 }, { "epoch": 0.11933369743309667, "grad_norm": 9.606276512145996, "learning_rate": 9.792336603244977e-06, "loss": 1.0209, "step": 874 }, { "epoch": 0.11947023484434735, "grad_norm": 7.541828155517578, "learning_rate": 9.791705511282462e-06, "loss": 0.9197, "step": 875 }, { "epoch": 0.11960677225559803, "grad_norm": 36.8525390625, "learning_rate": 9.791073482224229e-06, "loss": 1.1159, "step": 876 }, { "epoch": 0.11974330966684872, "grad_norm": 14.264284133911133, "learning_rate": 9.790440516193887e-06, "loss": 1.0013, "step": 877 }, { "epoch": 0.1198798470780994, "grad_norm": 6.624886989593506, "learning_rate": 9.78980661331522e-06, "loss": 1.0037, "step": 878 }, { "epoch": 0.12001638448935008, "grad_norm": 7.203617572784424, "learning_rate": 9.789171773712197e-06, "loss": 1.0511, "step": 879 }, { "epoch": 0.12015292190060077, "grad_norm": 20.006458282470703, "learning_rate": 9.788535997508972e-06, "loss": 0.9383, "step": 880 }, { "epoch": 0.12028945931185145, "grad_norm": 9.469712257385254, "learning_rate": 9.787899284829883e-06, "loss": 0.9729, "step": 881 }, { "epoch": 0.12042599672310213, "grad_norm": 6.219909191131592, "learning_rate": 9.787261635799448e-06, "loss": 0.9793, "step": 882 }, { "epoch": 0.12056253413435282, "grad_norm": 9.762345314025879, "learning_rate": 9.786623050542368e-06, "loss": 1.0054, "step": 883 }, { "epoch": 0.1206990715456035, "grad_norm": 8.474611282348633, "learning_rate": 9.785983529183533e-06, "loss": 1.0496, "step": 884 }, { "epoch": 0.12083560895685418, "grad_norm": 8.852746963500977, "learning_rate": 9.785343071848008e-06, "loss": 1.0097, "step": 885 }, { "epoch": 0.12097214636810485, "grad_norm": 8.868712425231934, "learning_rate": 9.784701678661045e-06, "loss": 1.0602, "step": 886 }, { "epoch": 0.12110868377935555, "grad_norm": 13.393411636352539, "learning_rate": 9.78405934974808e-06, "loss": 1.0538, "step": 887 }, { "epoch": 0.12124522119060623, "grad_norm": 15.350007057189941, "learning_rate": 9.783416085234732e-06, "loss": 1.1112, "step": 888 }, { "epoch": 0.1213817586018569, "grad_norm": 8.363614082336426, "learning_rate": 9.7827718852468e-06, "loss": 1.0834, "step": 889 }, { "epoch": 0.1215182960131076, "grad_norm": 15.807332038879395, "learning_rate": 9.782126749910268e-06, "loss": 0.9212, "step": 890 }, { "epoch": 0.12165483342435827, "grad_norm": 5.594084739685059, "learning_rate": 9.781480679351305e-06, "loss": 0.9005, "step": 891 }, { "epoch": 0.12179137083560895, "grad_norm": 7.4885969161987305, "learning_rate": 9.780833673696255e-06, "loss": 1.0867, "step": 892 }, { "epoch": 0.12192790824685965, "grad_norm": 6.817811012268066, "learning_rate": 9.780185733071656e-06, "loss": 1.0188, "step": 893 }, { "epoch": 0.12206444565811032, "grad_norm": 8.992538452148438, "learning_rate": 9.779536857604222e-06, "loss": 0.9134, "step": 894 }, { "epoch": 0.122200983069361, "grad_norm": 14.12388801574707, "learning_rate": 9.778887047420848e-06, "loss": 0.9068, "step": 895 }, { "epoch": 0.1223375204806117, "grad_norm": 13.694872856140137, "learning_rate": 9.778236302648618e-06, "loss": 1.1286, "step": 896 }, { "epoch": 0.12247405789186237, "grad_norm": 8.62972354888916, "learning_rate": 9.777584623414796e-06, "loss": 1.0686, "step": 897 }, { "epoch": 0.12261059530311305, "grad_norm": 6.318917274475098, "learning_rate": 9.776932009846826e-06, "loss": 1.0107, "step": 898 }, { "epoch": 0.12274713271436373, "grad_norm": 6.955656051635742, "learning_rate": 9.776278462072342e-06, "loss": 1.0156, "step": 899 }, { "epoch": 0.12288367012561442, "grad_norm": 12.045774459838867, "learning_rate": 9.775623980219149e-06, "loss": 1.1771, "step": 900 }, { "epoch": 0.1230202075368651, "grad_norm": 6.661323547363281, "learning_rate": 9.774968564415246e-06, "loss": 1.0173, "step": 901 }, { "epoch": 0.12315674494811578, "grad_norm": 6.792392730712891, "learning_rate": 9.77431221478881e-06, "loss": 1.0031, "step": 902 }, { "epoch": 0.12329328235936647, "grad_norm": 8.489592552185059, "learning_rate": 9.7736549314682e-06, "loss": 1.0897, "step": 903 }, { "epoch": 0.12342981977061715, "grad_norm": 6.573811054229736, "learning_rate": 9.772996714581957e-06, "loss": 1.0089, "step": 904 }, { "epoch": 0.12356635718186783, "grad_norm": 8.823612213134766, "learning_rate": 9.772337564258807e-06, "loss": 1.0985, "step": 905 }, { "epoch": 0.12370289459311852, "grad_norm": 7.346874713897705, "learning_rate": 9.771677480627659e-06, "loss": 0.9826, "step": 906 }, { "epoch": 0.1238394320043692, "grad_norm": 6.620998382568359, "learning_rate": 9.771016463817602e-06, "loss": 1.0219, "step": 907 }, { "epoch": 0.12397596941561988, "grad_norm": 8.147665023803711, "learning_rate": 9.77035451395791e-06, "loss": 0.8922, "step": 908 }, { "epoch": 0.12411250682687057, "grad_norm": 8.210898399353027, "learning_rate": 9.769691631178035e-06, "loss": 1.1464, "step": 909 }, { "epoch": 0.12424904423812125, "grad_norm": 7.724538803100586, "learning_rate": 9.769027815607616e-06, "loss": 0.9771, "step": 910 }, { "epoch": 0.12438558164937193, "grad_norm": 7.786626815795898, "learning_rate": 9.768363067376473e-06, "loss": 0.977, "step": 911 }, { "epoch": 0.1245221190606226, "grad_norm": 6.7598443031311035, "learning_rate": 9.767697386614609e-06, "loss": 1.0811, "step": 912 }, { "epoch": 0.1246586564718733, "grad_norm": 9.953804969787598, "learning_rate": 9.767030773452207e-06, "loss": 0.938, "step": 913 }, { "epoch": 0.12479519388312398, "grad_norm": 8.881417274475098, "learning_rate": 9.766363228019635e-06, "loss": 0.955, "step": 914 }, { "epoch": 0.12493173129437465, "grad_norm": 10.115408897399902, "learning_rate": 9.765694750447443e-06, "loss": 1.0202, "step": 915 }, { "epoch": 0.12506826870562535, "grad_norm": 7.568378448486328, "learning_rate": 9.76502534086636e-06, "loss": 0.9907, "step": 916 }, { "epoch": 0.125204806116876, "grad_norm": 8.32648754119873, "learning_rate": 9.764354999407303e-06, "loss": 0.9633, "step": 917 }, { "epoch": 0.1253413435281267, "grad_norm": 6.804895401000977, "learning_rate": 9.76368372620137e-06, "loss": 1.0956, "step": 918 }, { "epoch": 0.1254778809393774, "grad_norm": 15.133220672607422, "learning_rate": 9.763011521379833e-06, "loss": 0.9855, "step": 919 }, { "epoch": 0.12561441835062806, "grad_norm": 12.472837448120117, "learning_rate": 9.762338385074155e-06, "loss": 0.9935, "step": 920 }, { "epoch": 0.12575095576187875, "grad_norm": 10.05013370513916, "learning_rate": 9.761664317415981e-06, "loss": 1.042, "step": 921 }, { "epoch": 0.12588749317312944, "grad_norm": 7.897647380828857, "learning_rate": 9.760989318537135e-06, "loss": 1.1325, "step": 922 }, { "epoch": 0.1260240305843801, "grad_norm": 7.791382312774658, "learning_rate": 9.760313388569621e-06, "loss": 1.0497, "step": 923 }, { "epoch": 0.1261605679956308, "grad_norm": 17.14434242248535, "learning_rate": 9.759636527645633e-06, "loss": 1.0352, "step": 924 }, { "epoch": 0.1262971054068815, "grad_norm": 26.321882247924805, "learning_rate": 9.75895873589754e-06, "loss": 0.8973, "step": 925 }, { "epoch": 0.12643364281813216, "grad_norm": 28.182659149169922, "learning_rate": 9.758280013457893e-06, "loss": 1.0865, "step": 926 }, { "epoch": 0.12657018022938285, "grad_norm": 6.13004207611084, "learning_rate": 9.757600360459429e-06, "loss": 1.0655, "step": 927 }, { "epoch": 0.12670671764063354, "grad_norm": 11.413946151733398, "learning_rate": 9.756919777035066e-06, "loss": 1.0176, "step": 928 }, { "epoch": 0.1268432550518842, "grad_norm": 11.599974632263184, "learning_rate": 9.756238263317902e-06, "loss": 1.0361, "step": 929 }, { "epoch": 0.1269797924631349, "grad_norm": 8.810538291931152, "learning_rate": 9.755555819441219e-06, "loss": 0.98, "step": 930 }, { "epoch": 0.1271163298743856, "grad_norm": 11.659756660461426, "learning_rate": 9.754872445538477e-06, "loss": 0.9216, "step": 931 }, { "epoch": 0.12725286728563626, "grad_norm": 6.7393364906311035, "learning_rate": 9.754188141743326e-06, "loss": 1.0521, "step": 932 }, { "epoch": 0.12738940469688695, "grad_norm": 7.350715637207031, "learning_rate": 9.753502908189588e-06, "loss": 1.1097, "step": 933 }, { "epoch": 0.12752594210813764, "grad_norm": 8.394755363464355, "learning_rate": 9.752816745011272e-06, "loss": 0.9619, "step": 934 }, { "epoch": 0.1276624795193883, "grad_norm": 10.867347717285156, "learning_rate": 9.752129652342572e-06, "loss": 1.0036, "step": 935 }, { "epoch": 0.127799016930639, "grad_norm": 6.545230388641357, "learning_rate": 9.751441630317858e-06, "loss": 1.0089, "step": 936 }, { "epoch": 0.1279355543418897, "grad_norm": 7.702572345733643, "learning_rate": 9.750752679071683e-06, "loss": 1.032, "step": 937 }, { "epoch": 0.12807209175314035, "grad_norm": 11.883520126342773, "learning_rate": 9.750062798738784e-06, "loss": 1.0521, "step": 938 }, { "epoch": 0.12820862916439105, "grad_norm": 5.942498207092285, "learning_rate": 9.749371989454076e-06, "loss": 1.0333, "step": 939 }, { "epoch": 0.12834516657564174, "grad_norm": 8.528575897216797, "learning_rate": 9.74868025135266e-06, "loss": 1.0162, "step": 940 }, { "epoch": 0.1284817039868924, "grad_norm": 6.754151344299316, "learning_rate": 9.747987584569817e-06, "loss": 0.969, "step": 941 }, { "epoch": 0.1286182413981431, "grad_norm": 6.491395473480225, "learning_rate": 9.747293989241007e-06, "loss": 1.1748, "step": 942 }, { "epoch": 0.12875477880939376, "grad_norm": 9.443376541137695, "learning_rate": 9.746599465501876e-06, "loss": 0.8774, "step": 943 }, { "epoch": 0.12889131622064445, "grad_norm": 40.51387405395508, "learning_rate": 9.745904013488248e-06, "loss": 0.9759, "step": 944 }, { "epoch": 0.12902785363189515, "grad_norm": 12.348345756530762, "learning_rate": 9.745207633336132e-06, "loss": 1.025, "step": 945 }, { "epoch": 0.1291643910431458, "grad_norm": 7.450240135192871, "learning_rate": 9.744510325181714e-06, "loss": 0.9963, "step": 946 }, { "epoch": 0.1293009284543965, "grad_norm": 6.479504585266113, "learning_rate": 9.743812089161362e-06, "loss": 0.9671, "step": 947 }, { "epoch": 0.1294374658656472, "grad_norm": 10.329021453857422, "learning_rate": 9.743112925411633e-06, "loss": 1.0548, "step": 948 }, { "epoch": 0.12957400327689786, "grad_norm": 10.367941856384277, "learning_rate": 9.742412834069257e-06, "loss": 1.0876, "step": 949 }, { "epoch": 0.12971054068814855, "grad_norm": 8.15021800994873, "learning_rate": 9.741711815271148e-06, "loss": 1.1314, "step": 950 }, { "epoch": 0.12984707809939924, "grad_norm": 8.814618110656738, "learning_rate": 9.7410098691544e-06, "loss": 0.9519, "step": 951 }, { "epoch": 0.1299836155106499, "grad_norm": 9.21642017364502, "learning_rate": 9.740306995856294e-06, "loss": 0.9924, "step": 952 }, { "epoch": 0.1301201529219006, "grad_norm": 7.041244983673096, "learning_rate": 9.739603195514287e-06, "loss": 0.9244, "step": 953 }, { "epoch": 0.1302566903331513, "grad_norm": 8.674356460571289, "learning_rate": 9.738898468266014e-06, "loss": 0.9631, "step": 954 }, { "epoch": 0.13039322774440196, "grad_norm": 6.805272579193115, "learning_rate": 9.738192814249302e-06, "loss": 1.1146, "step": 955 }, { "epoch": 0.13052976515565265, "grad_norm": 10.82956600189209, "learning_rate": 9.737486233602149e-06, "loss": 0.9418, "step": 956 }, { "epoch": 0.13066630256690334, "grad_norm": 10.391427040100098, "learning_rate": 9.736778726462742e-06, "loss": 1.1418, "step": 957 }, { "epoch": 0.130802839978154, "grad_norm": 10.648874282836914, "learning_rate": 9.736070292969441e-06, "loss": 1.1134, "step": 958 }, { "epoch": 0.1309393773894047, "grad_norm": 14.634666442871094, "learning_rate": 9.735360933260795e-06, "loss": 1.0428, "step": 959 }, { "epoch": 0.1310759148006554, "grad_norm": 27.93881607055664, "learning_rate": 9.73465064747553e-06, "loss": 1.0136, "step": 960 }, { "epoch": 0.13121245221190606, "grad_norm": 16.09466552734375, "learning_rate": 9.733939435752552e-06, "loss": 0.9139, "step": 961 }, { "epoch": 0.13134898962315675, "grad_norm": 9.545394897460938, "learning_rate": 9.733227298230952e-06, "loss": 0.9618, "step": 962 }, { "epoch": 0.13148552703440744, "grad_norm": 10.284571647644043, "learning_rate": 9.732514235050002e-06, "loss": 1.0025, "step": 963 }, { "epoch": 0.1316220644456581, "grad_norm": 15.433235168457031, "learning_rate": 9.731800246349149e-06, "loss": 1.0907, "step": 964 }, { "epoch": 0.1317586018569088, "grad_norm": 9.819815635681152, "learning_rate": 9.731085332268026e-06, "loss": 1.0236, "step": 965 }, { "epoch": 0.1318951392681595, "grad_norm": 8.763428688049316, "learning_rate": 9.730369492946447e-06, "loss": 1.0636, "step": 966 }, { "epoch": 0.13203167667941015, "grad_norm": 12.793198585510254, "learning_rate": 9.729652728524408e-06, "loss": 1.137, "step": 967 }, { "epoch": 0.13216821409066085, "grad_norm": 7.979931354522705, "learning_rate": 9.72893503914208e-06, "loss": 1.1041, "step": 968 }, { "epoch": 0.1323047515019115, "grad_norm": 15.27701473236084, "learning_rate": 9.728216424939821e-06, "loss": 1.0514, "step": 969 }, { "epoch": 0.1324412889131622, "grad_norm": 8.16024398803711, "learning_rate": 9.727496886058168e-06, "loss": 1.0621, "step": 970 }, { "epoch": 0.1325778263244129, "grad_norm": 14.12614917755127, "learning_rate": 9.726776422637835e-06, "loss": 0.9873, "step": 971 }, { "epoch": 0.13271436373566356, "grad_norm": 7.685389518737793, "learning_rate": 9.726055034819726e-06, "loss": 0.9625, "step": 972 }, { "epoch": 0.13285090114691425, "grad_norm": 7.104608058929443, "learning_rate": 9.725332722744915e-06, "loss": 1.0193, "step": 973 }, { "epoch": 0.13298743855816494, "grad_norm": 14.162872314453125, "learning_rate": 9.724609486554666e-06, "loss": 0.9824, "step": 974 }, { "epoch": 0.1331239759694156, "grad_norm": 16.206499099731445, "learning_rate": 9.723885326390417e-06, "loss": 1.0289, "step": 975 }, { "epoch": 0.1332605133806663, "grad_norm": 8.137534141540527, "learning_rate": 9.723160242393788e-06, "loss": 0.9249, "step": 976 }, { "epoch": 0.133397050791917, "grad_norm": 6.32620906829834, "learning_rate": 9.722434234706584e-06, "loss": 1.0737, "step": 977 }, { "epoch": 0.13353358820316766, "grad_norm": 7.100324630737305, "learning_rate": 9.721707303470786e-06, "loss": 0.8832, "step": 978 }, { "epoch": 0.13367012561441835, "grad_norm": 7.333224773406982, "learning_rate": 9.720979448828557e-06, "loss": 0.8669, "step": 979 }, { "epoch": 0.13380666302566904, "grad_norm": 6.856069087982178, "learning_rate": 9.720250670922242e-06, "loss": 1.0422, "step": 980 }, { "epoch": 0.1339432004369197, "grad_norm": 5.2987589836120605, "learning_rate": 9.719520969894365e-06, "loss": 1.0123, "step": 981 }, { "epoch": 0.1340797378481704, "grad_norm": 8.586170196533203, "learning_rate": 9.71879034588763e-06, "loss": 0.9694, "step": 982 }, { "epoch": 0.1342162752594211, "grad_norm": 12.084134101867676, "learning_rate": 9.718058799044922e-06, "loss": 1.0609, "step": 983 }, { "epoch": 0.13435281267067176, "grad_norm": 8.264341354370117, "learning_rate": 9.717326329509308e-06, "loss": 1.1034, "step": 984 }, { "epoch": 0.13448935008192245, "grad_norm": 12.783570289611816, "learning_rate": 9.716592937424034e-06, "loss": 0.9926, "step": 985 }, { "epoch": 0.13462588749317314, "grad_norm": 6.192535400390625, "learning_rate": 9.715858622932529e-06, "loss": 1.0826, "step": 986 }, { "epoch": 0.1347624249044238, "grad_norm": 7.393129348754883, "learning_rate": 9.715123386178393e-06, "loss": 0.8988, "step": 987 }, { "epoch": 0.1348989623156745, "grad_norm": 10.279806137084961, "learning_rate": 9.714387227305422e-06, "loss": 0.971, "step": 988 }, { "epoch": 0.1350354997269252, "grad_norm": 5.957614421844482, "learning_rate": 9.713650146457578e-06, "loss": 0.9644, "step": 989 }, { "epoch": 0.13517203713817585, "grad_norm": 7.5411529541015625, "learning_rate": 9.712912143779012e-06, "loss": 1.0431, "step": 990 }, { "epoch": 0.13530857454942655, "grad_norm": 6.581211090087891, "learning_rate": 9.712173219414052e-06, "loss": 1.1028, "step": 991 }, { "epoch": 0.13544511196067724, "grad_norm": 7.47784948348999, "learning_rate": 9.711433373507206e-06, "loss": 1.016, "step": 992 }, { "epoch": 0.1355816493719279, "grad_norm": 8.036261558532715, "learning_rate": 9.710692606203162e-06, "loss": 1.0637, "step": 993 }, { "epoch": 0.1357181867831786, "grad_norm": 9.519754409790039, "learning_rate": 9.709950917646793e-06, "loss": 1.1505, "step": 994 }, { "epoch": 0.13585472419442926, "grad_norm": 5.406389236450195, "learning_rate": 9.709208307983142e-06, "loss": 1.0583, "step": 995 }, { "epoch": 0.13599126160567995, "grad_norm": 9.16183853149414, "learning_rate": 9.708464777357444e-06, "loss": 1.0386, "step": 996 }, { "epoch": 0.13612779901693065, "grad_norm": 9.822042465209961, "learning_rate": 9.707720325915105e-06, "loss": 1.0594, "step": 997 }, { "epoch": 0.1362643364281813, "grad_norm": 8.374072074890137, "learning_rate": 9.706974953801715e-06, "loss": 1.0506, "step": 998 }, { "epoch": 0.136400873839432, "grad_norm": 7.7896199226379395, "learning_rate": 9.706228661163046e-06, "loss": 0.9474, "step": 999 }, { "epoch": 0.1365374112506827, "grad_norm": 7.846332550048828, "learning_rate": 9.705481448145046e-06, "loss": 0.983, "step": 1000 }, { "epoch": 0.13667394866193336, "grad_norm": 8.389957427978516, "learning_rate": 9.704733314893843e-06, "loss": 0.9607, "step": 1001 }, { "epoch": 0.13681048607318405, "grad_norm": 8.354280471801758, "learning_rate": 9.70398426155575e-06, "loss": 1.2118, "step": 1002 }, { "epoch": 0.13694702348443474, "grad_norm": 8.02209186553955, "learning_rate": 9.703234288277252e-06, "loss": 1.0366, "step": 1003 }, { "epoch": 0.1370835608956854, "grad_norm": 7.241204261779785, "learning_rate": 9.702483395205023e-06, "loss": 1.1139, "step": 1004 }, { "epoch": 0.1372200983069361, "grad_norm": 7.559254169464111, "learning_rate": 9.701731582485912e-06, "loss": 1.0124, "step": 1005 }, { "epoch": 0.1373566357181868, "grad_norm": 8.455316543579102, "learning_rate": 9.700978850266945e-06, "loss": 0.9651, "step": 1006 }, { "epoch": 0.13749317312943746, "grad_norm": 6.083085060119629, "learning_rate": 9.700225198695333e-06, "loss": 1.1205, "step": 1007 }, { "epoch": 0.13762971054068815, "grad_norm": 7.506846904754639, "learning_rate": 9.699470627918462e-06, "loss": 1.1261, "step": 1008 }, { "epoch": 0.13776624795193884, "grad_norm": 9.434876441955566, "learning_rate": 9.698715138083906e-06, "loss": 1.1213, "step": 1009 }, { "epoch": 0.1379027853631895, "grad_norm": 6.321789741516113, "learning_rate": 9.697958729339408e-06, "loss": 0.9799, "step": 1010 }, { "epoch": 0.1380393227744402, "grad_norm": 7.359473705291748, "learning_rate": 9.6972014018329e-06, "loss": 1.1277, "step": 1011 }, { "epoch": 0.1381758601856909, "grad_norm": 8.165733337402344, "learning_rate": 9.696443155712488e-06, "loss": 0.9312, "step": 1012 }, { "epoch": 0.13831239759694156, "grad_norm": 6.28787899017334, "learning_rate": 9.695683991126458e-06, "loss": 1.0064, "step": 1013 }, { "epoch": 0.13844893500819225, "grad_norm": 13.889725685119629, "learning_rate": 9.694923908223279e-06, "loss": 0.9412, "step": 1014 }, { "epoch": 0.13858547241944294, "grad_norm": 7.531134605407715, "learning_rate": 9.694162907151596e-06, "loss": 1.123, "step": 1015 }, { "epoch": 0.1387220098306936, "grad_norm": 6.232512950897217, "learning_rate": 9.693400988060236e-06, "loss": 1.0708, "step": 1016 }, { "epoch": 0.1388585472419443, "grad_norm": 7.089717388153076, "learning_rate": 9.692638151098204e-06, "loss": 0.999, "step": 1017 }, { "epoch": 0.138995084653195, "grad_norm": 9.256784439086914, "learning_rate": 9.691874396414687e-06, "loss": 1.128, "step": 1018 }, { "epoch": 0.13913162206444565, "grad_norm": 6.941555976867676, "learning_rate": 9.691109724159047e-06, "loss": 1.1688, "step": 1019 }, { "epoch": 0.13926815947569635, "grad_norm": 7.970249652862549, "learning_rate": 9.69034413448083e-06, "loss": 1.0551, "step": 1020 }, { "epoch": 0.139404696886947, "grad_norm": 9.75619888305664, "learning_rate": 9.689577627529758e-06, "loss": 0.9509, "step": 1021 }, { "epoch": 0.1395412342981977, "grad_norm": 7.622045040130615, "learning_rate": 9.688810203455737e-06, "loss": 1.0498, "step": 1022 }, { "epoch": 0.1396777717094484, "grad_norm": 8.09350872039795, "learning_rate": 9.688041862408843e-06, "loss": 1.0884, "step": 1023 }, { "epoch": 0.13981430912069906, "grad_norm": 7.626909255981445, "learning_rate": 9.687272604539344e-06, "loss": 1.0455, "step": 1024 }, { "epoch": 0.13995084653194975, "grad_norm": 6.759965419769287, "learning_rate": 9.686502429997677e-06, "loss": 1.0565, "step": 1025 }, { "epoch": 0.14008738394320044, "grad_norm": 5.8203277587890625, "learning_rate": 9.685731338934463e-06, "loss": 0.9503, "step": 1026 }, { "epoch": 0.1402239213544511, "grad_norm": 8.259676933288574, "learning_rate": 9.684959331500504e-06, "loss": 0.997, "step": 1027 }, { "epoch": 0.1403604587657018, "grad_norm": 5.538718223571777, "learning_rate": 9.684186407846774e-06, "loss": 1.0058, "step": 1028 }, { "epoch": 0.1404969961769525, "grad_norm": 8.121193885803223, "learning_rate": 9.683412568124434e-06, "loss": 1.0924, "step": 1029 }, { "epoch": 0.14063353358820316, "grad_norm": 11.526751518249512, "learning_rate": 9.68263781248482e-06, "loss": 1.0362, "step": 1030 }, { "epoch": 0.14077007099945385, "grad_norm": 9.010273933410645, "learning_rate": 9.681862141079449e-06, "loss": 1.1003, "step": 1031 }, { "epoch": 0.14090660841070454, "grad_norm": 6.125271797180176, "learning_rate": 9.681085554060013e-06, "loss": 1.0359, "step": 1032 }, { "epoch": 0.1410431458219552, "grad_norm": 6.301988124847412, "learning_rate": 9.680308051578389e-06, "loss": 1.0563, "step": 1033 }, { "epoch": 0.1411796832332059, "grad_norm": 11.446797370910645, "learning_rate": 9.67952963378663e-06, "loss": 1.2054, "step": 1034 }, { "epoch": 0.1413162206444566, "grad_norm": 8.450840950012207, "learning_rate": 9.678750300836966e-06, "loss": 1.0414, "step": 1035 }, { "epoch": 0.14145275805570726, "grad_norm": 10.880522727966309, "learning_rate": 9.677970052881811e-06, "loss": 1.0784, "step": 1036 }, { "epoch": 0.14158929546695795, "grad_norm": 7.206618309020996, "learning_rate": 9.677188890073756e-06, "loss": 0.955, "step": 1037 }, { "epoch": 0.14172583287820864, "grad_norm": 6.989908218383789, "learning_rate": 9.676406812565566e-06, "loss": 1.0026, "step": 1038 }, { "epoch": 0.1418623702894593, "grad_norm": 7.253908157348633, "learning_rate": 9.675623820510191e-06, "loss": 0.9126, "step": 1039 }, { "epoch": 0.14199890770071, "grad_norm": 5.420611381530762, "learning_rate": 9.67483991406076e-06, "loss": 0.9459, "step": 1040 }, { "epoch": 0.1421354451119607, "grad_norm": 6.887893199920654, "learning_rate": 9.674055093370574e-06, "loss": 1.0594, "step": 1041 }, { "epoch": 0.14227198252321135, "grad_norm": 6.928985595703125, "learning_rate": 9.67326935859312e-06, "loss": 0.8751, "step": 1042 }, { "epoch": 0.14240851993446205, "grad_norm": 6.836937427520752, "learning_rate": 9.67248270988206e-06, "loss": 1.08, "step": 1043 }, { "epoch": 0.14254505734571274, "grad_norm": 10.296745300292969, "learning_rate": 9.67169514739124e-06, "loss": 0.9388, "step": 1044 }, { "epoch": 0.1426815947569634, "grad_norm": 12.438305854797363, "learning_rate": 9.670906671274675e-06, "loss": 1.0692, "step": 1045 }, { "epoch": 0.1428181321682141, "grad_norm": 6.37188196182251, "learning_rate": 9.670117281686569e-06, "loss": 1.028, "step": 1046 }, { "epoch": 0.14295466957946476, "grad_norm": 15.840499877929688, "learning_rate": 9.669326978781297e-06, "loss": 0.8196, "step": 1047 }, { "epoch": 0.14309120699071545, "grad_norm": 7.875833034515381, "learning_rate": 9.668535762713416e-06, "loss": 1.1751, "step": 1048 }, { "epoch": 0.14322774440196614, "grad_norm": 7.607682704925537, "learning_rate": 9.667743633637663e-06, "loss": 0.9867, "step": 1049 }, { "epoch": 0.1433642818132168, "grad_norm": 6.648364543914795, "learning_rate": 9.666950591708947e-06, "loss": 0.9774, "step": 1050 }, { "epoch": 0.1435008192244675, "grad_norm": 8.174484252929688, "learning_rate": 9.666156637082367e-06, "loss": 1.1638, "step": 1051 }, { "epoch": 0.1436373566357182, "grad_norm": 8.004630088806152, "learning_rate": 9.665361769913187e-06, "loss": 0.8404, "step": 1052 }, { "epoch": 0.14377389404696886, "grad_norm": 13.931015014648438, "learning_rate": 9.664565990356861e-06, "loss": 0.9517, "step": 1053 }, { "epoch": 0.14391043145821955, "grad_norm": 5.0958027839660645, "learning_rate": 9.663769298569015e-06, "loss": 0.9819, "step": 1054 }, { "epoch": 0.14404696886947024, "grad_norm": 8.664600372314453, "learning_rate": 9.662971694705454e-06, "loss": 1.0213, "step": 1055 }, { "epoch": 0.1441835062807209, "grad_norm": 9.147215843200684, "learning_rate": 9.662173178922163e-06, "loss": 1.0095, "step": 1056 }, { "epoch": 0.1443200436919716, "grad_norm": 6.128600597381592, "learning_rate": 9.661373751375306e-06, "loss": 0.9706, "step": 1057 }, { "epoch": 0.1444565811032223, "grad_norm": 8.162605285644531, "learning_rate": 9.660573412221221e-06, "loss": 1.0511, "step": 1058 }, { "epoch": 0.14459311851447296, "grad_norm": 9.399548530578613, "learning_rate": 9.659772161616431e-06, "loss": 0.9145, "step": 1059 }, { "epoch": 0.14472965592572365, "grad_norm": 6.124495506286621, "learning_rate": 9.658969999717631e-06, "loss": 0.995, "step": 1060 }, { "epoch": 0.14486619333697434, "grad_norm": 7.291552543640137, "learning_rate": 9.658166926681699e-06, "loss": 1.1108, "step": 1061 }, { "epoch": 0.145002730748225, "grad_norm": 8.982306480407715, "learning_rate": 9.657362942665687e-06, "loss": 0.9815, "step": 1062 }, { "epoch": 0.1451392681594757, "grad_norm": 10.967436790466309, "learning_rate": 9.656558047826826e-06, "loss": 1.0951, "step": 1063 }, { "epoch": 0.1452758055707264, "grad_norm": 8.362929344177246, "learning_rate": 9.65575224232253e-06, "loss": 1.0256, "step": 1064 }, { "epoch": 0.14541234298197706, "grad_norm": 6.414985179901123, "learning_rate": 9.654945526310382e-06, "loss": 0.9977, "step": 1065 }, { "epoch": 0.14554888039322775, "grad_norm": 5.820162296295166, "learning_rate": 9.654137899948155e-06, "loss": 1.0302, "step": 1066 }, { "epoch": 0.14568541780447844, "grad_norm": 7.595294952392578, "learning_rate": 9.65332936339379e-06, "loss": 1.0275, "step": 1067 }, { "epoch": 0.1458219552157291, "grad_norm": 6.123447418212891, "learning_rate": 9.652519916805406e-06, "loss": 1.0576, "step": 1068 }, { "epoch": 0.1459584926269798, "grad_norm": 10.953108787536621, "learning_rate": 9.65170956034131e-06, "loss": 0.8784, "step": 1069 }, { "epoch": 0.1460950300382305, "grad_norm": 13.519688606262207, "learning_rate": 9.650898294159976e-06, "loss": 1.0739, "step": 1070 }, { "epoch": 0.14623156744948115, "grad_norm": 7.220661640167236, "learning_rate": 9.65008611842006e-06, "loss": 1.1271, "step": 1071 }, { "epoch": 0.14636810486073185, "grad_norm": 5.2602338790893555, "learning_rate": 9.649273033280401e-06, "loss": 0.9867, "step": 1072 }, { "epoch": 0.1465046422719825, "grad_norm": 8.155426979064941, "learning_rate": 9.648459038900005e-06, "loss": 0.9778, "step": 1073 }, { "epoch": 0.1466411796832332, "grad_norm": 9.012592315673828, "learning_rate": 9.647644135438065e-06, "loss": 1.1476, "step": 1074 }, { "epoch": 0.1467777170944839, "grad_norm": 7.156071186065674, "learning_rate": 9.64682832305395e-06, "loss": 0.9925, "step": 1075 }, { "epoch": 0.14691425450573456, "grad_norm": 14.388676643371582, "learning_rate": 9.6460116019072e-06, "loss": 1.2818, "step": 1076 }, { "epoch": 0.14705079191698525, "grad_norm": 8.494617462158203, "learning_rate": 9.645193972157543e-06, "loss": 1.053, "step": 1077 }, { "epoch": 0.14718732932823594, "grad_norm": 5.890790939331055, "learning_rate": 9.64437543396488e-06, "loss": 1.0944, "step": 1078 }, { "epoch": 0.1473238667394866, "grad_norm": 6.374682426452637, "learning_rate": 9.643555987489284e-06, "loss": 0.9742, "step": 1079 }, { "epoch": 0.1474604041507373, "grad_norm": 7.643800258636475, "learning_rate": 9.642735632891017e-06, "loss": 1.217, "step": 1080 }, { "epoch": 0.147596941561988, "grad_norm": 6.145362854003906, "learning_rate": 9.641914370330509e-06, "loss": 1.023, "step": 1081 }, { "epoch": 0.14773347897323866, "grad_norm": 8.373120307922363, "learning_rate": 9.641092199968373e-06, "loss": 1.0495, "step": 1082 }, { "epoch": 0.14787001638448935, "grad_norm": 8.17119312286377, "learning_rate": 9.640269121965397e-06, "loss": 1.1614, "step": 1083 }, { "epoch": 0.14800655379574004, "grad_norm": 5.774254322052002, "learning_rate": 9.639445136482549e-06, "loss": 1.0868, "step": 1084 }, { "epoch": 0.1481430912069907, "grad_norm": 10.798376083374023, "learning_rate": 9.638620243680968e-06, "loss": 0.9531, "step": 1085 }, { "epoch": 0.1482796286182414, "grad_norm": 7.946506023406982, "learning_rate": 9.63779444372198e-06, "loss": 1.0689, "step": 1086 }, { "epoch": 0.1484161660294921, "grad_norm": 8.863298416137695, "learning_rate": 9.636967736767082e-06, "loss": 1.1162, "step": 1087 }, { "epoch": 0.14855270344074276, "grad_norm": 8.13926887512207, "learning_rate": 9.63614012297795e-06, "loss": 1.0714, "step": 1088 }, { "epoch": 0.14868924085199345, "grad_norm": 5.965102672576904, "learning_rate": 9.635311602516437e-06, "loss": 0.9432, "step": 1089 }, { "epoch": 0.14882577826324414, "grad_norm": 6.98094367980957, "learning_rate": 9.634482175544574e-06, "loss": 0.9912, "step": 1090 }, { "epoch": 0.1489623156744948, "grad_norm": 6.1961774826049805, "learning_rate": 9.633651842224568e-06, "loss": 1.0534, "step": 1091 }, { "epoch": 0.1490988530857455, "grad_norm": 15.026382446289062, "learning_rate": 9.632820602718806e-06, "loss": 1.164, "step": 1092 }, { "epoch": 0.1492353904969962, "grad_norm": 6.549170970916748, "learning_rate": 9.631988457189847e-06, "loss": 0.9874, "step": 1093 }, { "epoch": 0.14937192790824685, "grad_norm": 9.228849411010742, "learning_rate": 9.631155405800436e-06, "loss": 1.0212, "step": 1094 }, { "epoch": 0.14950846531949755, "grad_norm": 10.567547798156738, "learning_rate": 9.630321448713484e-06, "loss": 1.042, "step": 1095 }, { "epoch": 0.14964500273074824, "grad_norm": 7.4318108558654785, "learning_rate": 9.629486586092088e-06, "loss": 1.0591, "step": 1096 }, { "epoch": 0.1497815401419989, "grad_norm": 15.326586723327637, "learning_rate": 9.628650818099518e-06, "loss": 0.9855, "step": 1097 }, { "epoch": 0.1499180775532496, "grad_norm": 7.6866888999938965, "learning_rate": 9.627814144899222e-06, "loss": 1.0186, "step": 1098 }, { "epoch": 0.15005461496450026, "grad_norm": 10.249095916748047, "learning_rate": 9.626976566654825e-06, "loss": 1.073, "step": 1099 }, { "epoch": 0.15019115237575095, "grad_norm": 12.137853622436523, "learning_rate": 9.62613808353013e-06, "loss": 0.9838, "step": 1100 }, { "epoch": 0.15032768978700164, "grad_norm": 8.97667407989502, "learning_rate": 9.625298695689116e-06, "loss": 0.9559, "step": 1101 }, { "epoch": 0.1504642271982523, "grad_norm": 8.7179536819458, "learning_rate": 9.624458403295935e-06, "loss": 0.9465, "step": 1102 }, { "epoch": 0.150600764609503, "grad_norm": 11.540604591369629, "learning_rate": 9.623617206514925e-06, "loss": 0.9281, "step": 1103 }, { "epoch": 0.1507373020207537, "grad_norm": 6.9372334480285645, "learning_rate": 9.622775105510593e-06, "loss": 1.0247, "step": 1104 }, { "epoch": 0.15087383943200436, "grad_norm": 7.971857070922852, "learning_rate": 9.621932100447627e-06, "loss": 0.9353, "step": 1105 }, { "epoch": 0.15101037684325505, "grad_norm": 6.446276664733887, "learning_rate": 9.621088191490889e-06, "loss": 1.1677, "step": 1106 }, { "epoch": 0.15114691425450574, "grad_norm": 7.577133655548096, "learning_rate": 9.620243378805418e-06, "loss": 0.9408, "step": 1107 }, { "epoch": 0.1512834516657564, "grad_norm": 7.07135534286499, "learning_rate": 9.619397662556434e-06, "loss": 0.9661, "step": 1108 }, { "epoch": 0.1514199890770071, "grad_norm": 14.848194122314453, "learning_rate": 9.61855104290933e-06, "loss": 0.9561, "step": 1109 }, { "epoch": 0.1515565264882578, "grad_norm": 8.083752632141113, "learning_rate": 9.617703520029672e-06, "loss": 1.1159, "step": 1110 }, { "epoch": 0.15169306389950846, "grad_norm": 8.106919288635254, "learning_rate": 9.61685509408321e-06, "loss": 1.0348, "step": 1111 }, { "epoch": 0.15182960131075915, "grad_norm": 10.860791206359863, "learning_rate": 9.61600576523587e-06, "loss": 1.0122, "step": 1112 }, { "epoch": 0.15196613872200984, "grad_norm": 9.74930191040039, "learning_rate": 9.615155533653748e-06, "loss": 0.9253, "step": 1113 }, { "epoch": 0.1521026761332605, "grad_norm": 6.268348693847656, "learning_rate": 9.61430439950312e-06, "loss": 0.8952, "step": 1114 }, { "epoch": 0.1522392135445112, "grad_norm": 6.737515449523926, "learning_rate": 9.613452362950444e-06, "loss": 0.9687, "step": 1115 }, { "epoch": 0.1523757509557619, "grad_norm": 8.040236473083496, "learning_rate": 9.612599424162344e-06, "loss": 0.9244, "step": 1116 }, { "epoch": 0.15251228836701256, "grad_norm": 7.888452053070068, "learning_rate": 9.611745583305629e-06, "loss": 0.8619, "step": 1117 }, { "epoch": 0.15264882577826325, "grad_norm": 7.04509162902832, "learning_rate": 9.610890840547283e-06, "loss": 1.0651, "step": 1118 }, { "epoch": 0.15278536318951394, "grad_norm": 7.814815044403076, "learning_rate": 9.610035196054461e-06, "loss": 1.0289, "step": 1119 }, { "epoch": 0.1529219006007646, "grad_norm": 6.5771894454956055, "learning_rate": 9.609178649994499e-06, "loss": 1.0125, "step": 1120 }, { "epoch": 0.1530584380120153, "grad_norm": 7.153321266174316, "learning_rate": 9.608321202534912e-06, "loss": 0.9737, "step": 1121 }, { "epoch": 0.15319497542326596, "grad_norm": 6.632976055145264, "learning_rate": 9.607462853843384e-06, "loss": 0.8843, "step": 1122 }, { "epoch": 0.15333151283451665, "grad_norm": 13.821470260620117, "learning_rate": 9.60660360408778e-06, "loss": 0.9825, "step": 1123 }, { "epoch": 0.15346805024576735, "grad_norm": 7.72769021987915, "learning_rate": 9.60574345343614e-06, "loss": 1.1298, "step": 1124 }, { "epoch": 0.153604587657018, "grad_norm": 15.799240112304688, "learning_rate": 9.604882402056681e-06, "loss": 0.9501, "step": 1125 }, { "epoch": 0.1537411250682687, "grad_norm": 7.734097480773926, "learning_rate": 9.604020450117795e-06, "loss": 1.0414, "step": 1126 }, { "epoch": 0.1538776624795194, "grad_norm": 12.514607429504395, "learning_rate": 9.603157597788053e-06, "loss": 0.9764, "step": 1127 }, { "epoch": 0.15401419989077006, "grad_norm": 7.128668785095215, "learning_rate": 9.602293845236198e-06, "loss": 1.2188, "step": 1128 }, { "epoch": 0.15415073730202075, "grad_norm": 7.128610134124756, "learning_rate": 9.601429192631147e-06, "loss": 0.932, "step": 1129 }, { "epoch": 0.15428727471327144, "grad_norm": 6.4979567527771, "learning_rate": 9.600563640142002e-06, "loss": 1.031, "step": 1130 }, { "epoch": 0.1544238121245221, "grad_norm": 7.73478364944458, "learning_rate": 9.599697187938035e-06, "loss": 1.0056, "step": 1131 }, { "epoch": 0.1545603495357728, "grad_norm": 6.983908653259277, "learning_rate": 9.598829836188694e-06, "loss": 1.0412, "step": 1132 }, { "epoch": 0.1546968869470235, "grad_norm": 6.7610554695129395, "learning_rate": 9.597961585063604e-06, "loss": 0.9607, "step": 1133 }, { "epoch": 0.15483342435827416, "grad_norm": 6.469634056091309, "learning_rate": 9.597092434732565e-06, "loss": 1.0163, "step": 1134 }, { "epoch": 0.15496996176952485, "grad_norm": 9.312721252441406, "learning_rate": 9.596222385365554e-06, "loss": 1.0659, "step": 1135 }, { "epoch": 0.15510649918077554, "grad_norm": 12.315463066101074, "learning_rate": 9.595351437132722e-06, "loss": 1.0688, "step": 1136 }, { "epoch": 0.1552430365920262, "grad_norm": 6.918793678283691, "learning_rate": 9.5944795902044e-06, "loss": 1.006, "step": 1137 }, { "epoch": 0.1553795740032769, "grad_norm": 5.781917095184326, "learning_rate": 9.593606844751088e-06, "loss": 1.0717, "step": 1138 }, { "epoch": 0.1555161114145276, "grad_norm": 6.596312046051025, "learning_rate": 9.59273320094347e-06, "loss": 1.1153, "step": 1139 }, { "epoch": 0.15565264882577826, "grad_norm": 8.105413436889648, "learning_rate": 9.591858658952396e-06, "loss": 0.8708, "step": 1140 }, { "epoch": 0.15578918623702895, "grad_norm": 6.608616352081299, "learning_rate": 9.5909832189489e-06, "loss": 1.0226, "step": 1141 }, { "epoch": 0.15592572364827964, "grad_norm": 6.602327823638916, "learning_rate": 9.590106881104188e-06, "loss": 1.001, "step": 1142 }, { "epoch": 0.1560622610595303, "grad_norm": 9.548921585083008, "learning_rate": 9.589229645589645e-06, "loss": 1.0404, "step": 1143 }, { "epoch": 0.156198798470781, "grad_norm": 7.659726619720459, "learning_rate": 9.588351512576822e-06, "loss": 1.0798, "step": 1144 }, { "epoch": 0.1563353358820317, "grad_norm": 8.227075576782227, "learning_rate": 9.587472482237457e-06, "loss": 1.1461, "step": 1145 }, { "epoch": 0.15647187329328235, "grad_norm": 6.328829765319824, "learning_rate": 9.586592554743458e-06, "loss": 1.0909, "step": 1146 }, { "epoch": 0.15660841070453305, "grad_norm": 11.891766548156738, "learning_rate": 9.585711730266908e-06, "loss": 1.073, "step": 1147 }, { "epoch": 0.1567449481157837, "grad_norm": 8.119528770446777, "learning_rate": 9.584830008980068e-06, "loss": 0.9107, "step": 1148 }, { "epoch": 0.1568814855270344, "grad_norm": 8.25195026397705, "learning_rate": 9.583947391055371e-06, "loss": 1.0426, "step": 1149 }, { "epoch": 0.1570180229382851, "grad_norm": 6.32529878616333, "learning_rate": 9.583063876665428e-06, "loss": 1.0504, "step": 1150 }, { "epoch": 0.15715456034953576, "grad_norm": 24.502700805664062, "learning_rate": 9.582179465983026e-06, "loss": 0.9046, "step": 1151 }, { "epoch": 0.15729109776078645, "grad_norm": 11.201688766479492, "learning_rate": 9.581294159181124e-06, "loss": 1.0131, "step": 1152 }, { "epoch": 0.15742763517203714, "grad_norm": 7.30430269241333, "learning_rate": 9.58040795643286e-06, "loss": 1.0182, "step": 1153 }, { "epoch": 0.1575641725832878, "grad_norm": 8.526731491088867, "learning_rate": 9.579520857911542e-06, "loss": 1.1634, "step": 1154 }, { "epoch": 0.1577007099945385, "grad_norm": 6.524833679199219, "learning_rate": 9.57863286379066e-06, "loss": 1.1317, "step": 1155 }, { "epoch": 0.1578372474057892, "grad_norm": 6.346664905548096, "learning_rate": 9.577743974243875e-06, "loss": 0.9099, "step": 1156 }, { "epoch": 0.15797378481703986, "grad_norm": 6.231741905212402, "learning_rate": 9.57685418944502e-06, "loss": 1.0231, "step": 1157 }, { "epoch": 0.15811032222829055, "grad_norm": 6.447360038757324, "learning_rate": 9.575963509568111e-06, "loss": 1.0825, "step": 1158 }, { "epoch": 0.15824685963954124, "grad_norm": 5.206186294555664, "learning_rate": 9.575071934787336e-06, "loss": 1.0068, "step": 1159 }, { "epoch": 0.1583833970507919, "grad_norm": 8.380304336547852, "learning_rate": 9.574179465277051e-06, "loss": 1.0552, "step": 1160 }, { "epoch": 0.1585199344620426, "grad_norm": 7.102015495300293, "learning_rate": 9.573286101211801e-06, "loss": 1.152, "step": 1161 }, { "epoch": 0.1586564718732933, "grad_norm": 7.183263778686523, "learning_rate": 9.57239184276629e-06, "loss": 1.1152, "step": 1162 }, { "epoch": 0.15879300928454396, "grad_norm": 7.31404447555542, "learning_rate": 9.57149669011541e-06, "loss": 1.0967, "step": 1163 }, { "epoch": 0.15892954669579465, "grad_norm": 6.643958568572998, "learning_rate": 9.570600643434217e-06, "loss": 1.147, "step": 1164 }, { "epoch": 0.15906608410704534, "grad_norm": 9.715282440185547, "learning_rate": 9.569703702897956e-06, "loss": 1.0003, "step": 1165 }, { "epoch": 0.159202621518296, "grad_norm": 8.055109024047852, "learning_rate": 9.568805868682031e-06, "loss": 1.1554, "step": 1166 }, { "epoch": 0.1593391589295467, "grad_norm": 8.853145599365234, "learning_rate": 9.56790714096203e-06, "loss": 1.0358, "step": 1167 }, { "epoch": 0.1594756963407974, "grad_norm": 6.316333293914795, "learning_rate": 9.567007519913716e-06, "loss": 0.996, "step": 1168 }, { "epoch": 0.15961223375204805, "grad_norm": 6.743781566619873, "learning_rate": 9.566107005713021e-06, "loss": 0.9859, "step": 1169 }, { "epoch": 0.15974877116329875, "grad_norm": 6.970876216888428, "learning_rate": 9.565205598536059e-06, "loss": 1.1616, "step": 1170 }, { "epoch": 0.15988530857454944, "grad_norm": 12.716506004333496, "learning_rate": 9.56430329855911e-06, "loss": 0.9668, "step": 1171 }, { "epoch": 0.1600218459858001, "grad_norm": 9.101361274719238, "learning_rate": 9.563400105958638e-06, "loss": 0.9294, "step": 1172 }, { "epoch": 0.1601583833970508, "grad_norm": 5.786417007446289, "learning_rate": 9.562496020911274e-06, "loss": 1.0774, "step": 1173 }, { "epoch": 0.16029492080830146, "grad_norm": 7.3655524253845215, "learning_rate": 9.561591043593828e-06, "loss": 0.8395, "step": 1174 }, { "epoch": 0.16043145821955215, "grad_norm": 10.097489356994629, "learning_rate": 9.560685174183285e-06, "loss": 0.9381, "step": 1175 }, { "epoch": 0.16056799563080285, "grad_norm": 8.880616188049316, "learning_rate": 9.559778412856795e-06, "loss": 0.9137, "step": 1176 }, { "epoch": 0.1607045330420535, "grad_norm": 7.70898962020874, "learning_rate": 9.558870759791698e-06, "loss": 0.8393, "step": 1177 }, { "epoch": 0.1608410704533042, "grad_norm": 8.643007278442383, "learning_rate": 9.557962215165496e-06, "loss": 1.0118, "step": 1178 }, { "epoch": 0.1609776078645549, "grad_norm": 21.564733505249023, "learning_rate": 9.557052779155871e-06, "loss": 1.082, "step": 1179 }, { "epoch": 0.16111414527580556, "grad_norm": 6.690162658691406, "learning_rate": 9.55614245194068e-06, "loss": 0.99, "step": 1180 }, { "epoch": 0.16125068268705625, "grad_norm": 10.110111236572266, "learning_rate": 9.555231233697948e-06, "loss": 0.9739, "step": 1181 }, { "epoch": 0.16138722009830694, "grad_norm": 5.481503963470459, "learning_rate": 9.55431912460588e-06, "loss": 0.9795, "step": 1182 }, { "epoch": 0.1615237575095576, "grad_norm": 6.40487813949585, "learning_rate": 9.553406124842855e-06, "loss": 1.1447, "step": 1183 }, { "epoch": 0.1616602949208083, "grad_norm": 7.03510046005249, "learning_rate": 9.552492234587423e-06, "loss": 0.8938, "step": 1184 }, { "epoch": 0.161796832332059, "grad_norm": 6.879603385925293, "learning_rate": 9.551577454018312e-06, "loss": 0.9834, "step": 1185 }, { "epoch": 0.16193336974330966, "grad_norm": 17.36176872253418, "learning_rate": 9.550661783314421e-06, "loss": 0.9541, "step": 1186 }, { "epoch": 0.16206990715456035, "grad_norm": 10.213103294372559, "learning_rate": 9.549745222654825e-06, "loss": 0.9496, "step": 1187 }, { "epoch": 0.16220644456581104, "grad_norm": 6.082911968231201, "learning_rate": 9.548827772218772e-06, "loss": 1.0924, "step": 1188 }, { "epoch": 0.1623429819770617, "grad_norm": 5.282127380371094, "learning_rate": 9.547909432185685e-06, "loss": 1.0352, "step": 1189 }, { "epoch": 0.1624795193883124, "grad_norm": 8.504545211791992, "learning_rate": 9.546990202735159e-06, "loss": 1.1682, "step": 1190 }, { "epoch": 0.1626160567995631, "grad_norm": 5.618490695953369, "learning_rate": 9.546070084046964e-06, "loss": 1.1768, "step": 1191 }, { "epoch": 0.16275259421081376, "grad_norm": 16.24967384338379, "learning_rate": 9.545149076301045e-06, "loss": 1.1682, "step": 1192 }, { "epoch": 0.16288913162206445, "grad_norm": 8.832948684692383, "learning_rate": 9.54422717967752e-06, "loss": 1.1312, "step": 1193 }, { "epoch": 0.16302566903331514, "grad_norm": 7.563710689544678, "learning_rate": 9.54330439435668e-06, "loss": 1.0233, "step": 1194 }, { "epoch": 0.1631622064445658, "grad_norm": 5.582520008087158, "learning_rate": 9.542380720518992e-06, "loss": 1.1029, "step": 1195 }, { "epoch": 0.1632987438558165, "grad_norm": 7.423349857330322, "learning_rate": 9.541456158345094e-06, "loss": 0.9511, "step": 1196 }, { "epoch": 0.1634352812670672, "grad_norm": 5.864078998565674, "learning_rate": 9.540530708015801e-06, "loss": 0.9522, "step": 1197 }, { "epoch": 0.16357181867831785, "grad_norm": 5.88934326171875, "learning_rate": 9.539604369712099e-06, "loss": 0.9902, "step": 1198 }, { "epoch": 0.16370835608956855, "grad_norm": 17.254188537597656, "learning_rate": 9.538677143615147e-06, "loss": 1.1639, "step": 1199 }, { "epoch": 0.1638448935008192, "grad_norm": 8.349756240844727, "learning_rate": 9.53774902990628e-06, "loss": 1.0743, "step": 1200 }, { "epoch": 0.1639814309120699, "grad_norm": 7.481032371520996, "learning_rate": 9.536820028767007e-06, "loss": 0.9817, "step": 1201 }, { "epoch": 0.1641179683233206, "grad_norm": 9.010658264160156, "learning_rate": 9.535890140379008e-06, "loss": 0.9616, "step": 1202 }, { "epoch": 0.16425450573457126, "grad_norm": 6.696835517883301, "learning_rate": 9.534959364924137e-06, "loss": 1.0188, "step": 1203 }, { "epoch": 0.16439104314582195, "grad_norm": 8.029718399047852, "learning_rate": 9.534027702584425e-06, "loss": 1.0682, "step": 1204 }, { "epoch": 0.16452758055707264, "grad_norm": 6.100505352020264, "learning_rate": 9.53309515354207e-06, "loss": 1.019, "step": 1205 }, { "epoch": 0.1646641179683233, "grad_norm": 7.044552803039551, "learning_rate": 9.53216171797945e-06, "loss": 1.036, "step": 1206 }, { "epoch": 0.164800655379574, "grad_norm": 6.937235355377197, "learning_rate": 9.531227396079111e-06, "loss": 1.1491, "step": 1207 }, { "epoch": 0.1649371927908247, "grad_norm": 9.733360290527344, "learning_rate": 9.530292188023778e-06, "loss": 1.0273, "step": 1208 }, { "epoch": 0.16507373020207536, "grad_norm": 6.554324150085449, "learning_rate": 9.529356093996342e-06, "loss": 1.0008, "step": 1209 }, { "epoch": 0.16521026761332605, "grad_norm": 9.49962043762207, "learning_rate": 9.528419114179876e-06, "loss": 0.9795, "step": 1210 }, { "epoch": 0.16534680502457674, "grad_norm": 7.326340198516846, "learning_rate": 9.527481248757618e-06, "loss": 1.0543, "step": 1211 }, { "epoch": 0.1654833424358274, "grad_norm": 38.641700744628906, "learning_rate": 9.526542497912984e-06, "loss": 0.9326, "step": 1212 }, { "epoch": 0.1656198798470781, "grad_norm": 6.997072219848633, "learning_rate": 9.525602861829562e-06, "loss": 1.0976, "step": 1213 }, { "epoch": 0.1657564172583288, "grad_norm": 7.181764125823975, "learning_rate": 9.524662340691113e-06, "loss": 1.0787, "step": 1214 }, { "epoch": 0.16589295466957946, "grad_norm": 7.117163181304932, "learning_rate": 9.523720934681573e-06, "loss": 1.0175, "step": 1215 }, { "epoch": 0.16602949208083015, "grad_norm": 11.846736907958984, "learning_rate": 9.522778643985045e-06, "loss": 0.8927, "step": 1216 }, { "epoch": 0.16616602949208084, "grad_norm": 46.479854583740234, "learning_rate": 9.521835468785812e-06, "loss": 1.1626, "step": 1217 }, { "epoch": 0.1663025669033315, "grad_norm": 6.767431259155273, "learning_rate": 9.520891409268328e-06, "loss": 1.0681, "step": 1218 }, { "epoch": 0.1664391043145822, "grad_norm": 10.66124153137207, "learning_rate": 9.519946465617217e-06, "loss": 1.1267, "step": 1219 }, { "epoch": 0.1665756417258329, "grad_norm": 6.208431243896484, "learning_rate": 9.51900063801728e-06, "loss": 1.0276, "step": 1220 }, { "epoch": 0.16671217913708355, "grad_norm": 6.407414436340332, "learning_rate": 9.518053926653488e-06, "loss": 1.0736, "step": 1221 }, { "epoch": 0.16684871654833425, "grad_norm": 6.907487392425537, "learning_rate": 9.517106331710984e-06, "loss": 1.1244, "step": 1222 }, { "epoch": 0.16698525395958494, "grad_norm": 6.531860828399658, "learning_rate": 9.516157853375089e-06, "loss": 1.0344, "step": 1223 }, { "epoch": 0.1671217913708356, "grad_norm": 10.352866172790527, "learning_rate": 9.515208491831292e-06, "loss": 1.0487, "step": 1224 }, { "epoch": 0.1672583287820863, "grad_norm": 7.8848395347595215, "learning_rate": 9.514258247265253e-06, "loss": 0.9554, "step": 1225 }, { "epoch": 0.16739486619333696, "grad_norm": 8.043700218200684, "learning_rate": 9.513307119862814e-06, "loss": 0.9962, "step": 1226 }, { "epoch": 0.16753140360458765, "grad_norm": 7.825257301330566, "learning_rate": 9.512355109809977e-06, "loss": 0.9853, "step": 1227 }, { "epoch": 0.16766794101583835, "grad_norm": 9.804315567016602, "learning_rate": 9.511402217292927e-06, "loss": 0.9709, "step": 1228 }, { "epoch": 0.167804478427089, "grad_norm": 21.853796005249023, "learning_rate": 9.510448442498015e-06, "loss": 1.0436, "step": 1229 }, { "epoch": 0.1679410158383397, "grad_norm": 6.4918622970581055, "learning_rate": 9.50949378561177e-06, "loss": 1.0888, "step": 1230 }, { "epoch": 0.1680775532495904, "grad_norm": 6.533638000488281, "learning_rate": 9.508538246820889e-06, "loss": 0.9404, "step": 1231 }, { "epoch": 0.16821409066084106, "grad_norm": 6.383194446563721, "learning_rate": 9.507581826312243e-06, "loss": 1.1514, "step": 1232 }, { "epoch": 0.16835062807209175, "grad_norm": 8.416783332824707, "learning_rate": 9.506624524272876e-06, "loss": 1.0197, "step": 1233 }, { "epoch": 0.16848716548334244, "grad_norm": 8.175089836120605, "learning_rate": 9.505666340890004e-06, "loss": 0.9375, "step": 1234 }, { "epoch": 0.1686237028945931, "grad_norm": 7.21132755279541, "learning_rate": 9.504707276351014e-06, "loss": 1.1142, "step": 1235 }, { "epoch": 0.1687602403058438, "grad_norm": 10.461348533630371, "learning_rate": 9.503747330843468e-06, "loss": 0.7938, "step": 1236 }, { "epoch": 0.1688967777170945, "grad_norm": 10.043424606323242, "learning_rate": 9.502786504555099e-06, "loss": 1.1523, "step": 1237 }, { "epoch": 0.16903331512834516, "grad_norm": 5.857701778411865, "learning_rate": 9.501824797673812e-06, "loss": 1.0372, "step": 1238 }, { "epoch": 0.16916985253959585, "grad_norm": 9.475350379943848, "learning_rate": 9.500862210387682e-06, "loss": 1.0957, "step": 1239 }, { "epoch": 0.16930638995084654, "grad_norm": 8.698984146118164, "learning_rate": 9.499898742884962e-06, "loss": 1.1107, "step": 1240 }, { "epoch": 0.1694429273620972, "grad_norm": 5.628172397613525, "learning_rate": 9.498934395354074e-06, "loss": 1.1243, "step": 1241 }, { "epoch": 0.1695794647733479, "grad_norm": 6.586719512939453, "learning_rate": 9.497969167983608e-06, "loss": 0.9366, "step": 1242 }, { "epoch": 0.1697160021845986, "grad_norm": 6.109160900115967, "learning_rate": 9.497003060962334e-06, "loss": 1.0115, "step": 1243 }, { "epoch": 0.16985253959584926, "grad_norm": 6.29781436920166, "learning_rate": 9.496036074479184e-06, "loss": 0.9832, "step": 1244 }, { "epoch": 0.16998907700709995, "grad_norm": 7.573486328125, "learning_rate": 9.495068208723274e-06, "loss": 0.9489, "step": 1245 }, { "epoch": 0.17012561441835064, "grad_norm": 6.814439296722412, "learning_rate": 9.494099463883885e-06, "loss": 0.994, "step": 1246 }, { "epoch": 0.1702621518296013, "grad_norm": 7.548239707946777, "learning_rate": 9.493129840150467e-06, "loss": 0.9716, "step": 1247 }, { "epoch": 0.170398689240852, "grad_norm": 9.713972091674805, "learning_rate": 9.49215933771265e-06, "loss": 1.0649, "step": 1248 }, { "epoch": 0.1705352266521027, "grad_norm": 7.553433418273926, "learning_rate": 9.491187956760229e-06, "loss": 0.9162, "step": 1249 }, { "epoch": 0.17067176406335335, "grad_norm": 7.984651565551758, "learning_rate": 9.490215697483174e-06, "loss": 1.1713, "step": 1250 }, { "epoch": 0.17080830147460405, "grad_norm": 6.274826526641846, "learning_rate": 9.489242560071626e-06, "loss": 1.0954, "step": 1251 }, { "epoch": 0.1709448388858547, "grad_norm": 6.418497562408447, "learning_rate": 9.488268544715897e-06, "loss": 0.9939, "step": 1252 }, { "epoch": 0.1710813762971054, "grad_norm": 6.557459831237793, "learning_rate": 9.487293651606472e-06, "loss": 1.0091, "step": 1253 }, { "epoch": 0.1712179137083561, "grad_norm": 5.447089672088623, "learning_rate": 9.486317880934007e-06, "loss": 1.0451, "step": 1254 }, { "epoch": 0.17135445111960676, "grad_norm": 6.182375907897949, "learning_rate": 9.485341232889331e-06, "loss": 0.9141, "step": 1255 }, { "epoch": 0.17149098853085745, "grad_norm": 6.883936882019043, "learning_rate": 9.484363707663443e-06, "loss": 0.9305, "step": 1256 }, { "epoch": 0.17162752594210814, "grad_norm": 10.733736991882324, "learning_rate": 9.483385305447513e-06, "loss": 1.008, "step": 1257 }, { "epoch": 0.1717640633533588, "grad_norm": 7.473027229309082, "learning_rate": 9.482406026432882e-06, "loss": 0.8995, "step": 1258 }, { "epoch": 0.1719006007646095, "grad_norm": 6.316264629364014, "learning_rate": 9.481425870811067e-06, "loss": 0.8962, "step": 1259 }, { "epoch": 0.1720371381758602, "grad_norm": 5.912388324737549, "learning_rate": 9.480444838773753e-06, "loss": 0.9498, "step": 1260 }, { "epoch": 0.17217367558711086, "grad_norm": 6.505964756011963, "learning_rate": 9.479462930512795e-06, "loss": 1.0342, "step": 1261 }, { "epoch": 0.17231021299836155, "grad_norm": 8.93424129486084, "learning_rate": 9.47848014622022e-06, "loss": 1.1059, "step": 1262 }, { "epoch": 0.17244675040961224, "grad_norm": 16.90530014038086, "learning_rate": 9.477496486088232e-06, "loss": 0.9208, "step": 1263 }, { "epoch": 0.1725832878208629, "grad_norm": 7.074974060058594, "learning_rate": 9.476511950309198e-06, "loss": 1.13, "step": 1264 }, { "epoch": 0.1727198252321136, "grad_norm": 6.534042835235596, "learning_rate": 9.475526539075661e-06, "loss": 1.0524, "step": 1265 }, { "epoch": 0.1728563626433643, "grad_norm": 6.731245040893555, "learning_rate": 9.474540252580332e-06, "loss": 0.9859, "step": 1266 }, { "epoch": 0.17299290005461496, "grad_norm": 5.736403942108154, "learning_rate": 9.4735530910161e-06, "loss": 0.9339, "step": 1267 }, { "epoch": 0.17312943746586565, "grad_norm": 5.825185298919678, "learning_rate": 9.472565054576017e-06, "loss": 0.9255, "step": 1268 }, { "epoch": 0.17326597487711634, "grad_norm": 10.1139554977417, "learning_rate": 9.471576143453312e-06, "loss": 1.1138, "step": 1269 }, { "epoch": 0.173402512288367, "grad_norm": 6.094748020172119, "learning_rate": 9.470586357841378e-06, "loss": 1.0855, "step": 1270 }, { "epoch": 0.1735390496996177, "grad_norm": 7.634471893310547, "learning_rate": 9.469595697933788e-06, "loss": 1.1979, "step": 1271 }, { "epoch": 0.1736755871108684, "grad_norm": 7.776664733886719, "learning_rate": 9.46860416392428e-06, "loss": 1.0361, "step": 1272 }, { "epoch": 0.17381212452211905, "grad_norm": 6.564837455749512, "learning_rate": 9.467611756006764e-06, "loss": 0.8675, "step": 1273 }, { "epoch": 0.17394866193336975, "grad_norm": 8.645862579345703, "learning_rate": 9.466618474375323e-06, "loss": 1.0636, "step": 1274 }, { "epoch": 0.17408519934462044, "grad_norm": 6.905599117279053, "learning_rate": 9.465624319224207e-06, "loss": 1.0034, "step": 1275 }, { "epoch": 0.1742217367558711, "grad_norm": 10.776083946228027, "learning_rate": 9.464629290747844e-06, "loss": 1.0091, "step": 1276 }, { "epoch": 0.1743582741671218, "grad_norm": 6.243834018707275, "learning_rate": 9.463633389140821e-06, "loss": 0.7698, "step": 1277 }, { "epoch": 0.17449481157837246, "grad_norm": 7.878570079803467, "learning_rate": 9.46263661459791e-06, "loss": 1.1699, "step": 1278 }, { "epoch": 0.17463134898962315, "grad_norm": 8.997087478637695, "learning_rate": 9.46163896731404e-06, "loss": 1.0146, "step": 1279 }, { "epoch": 0.17476788640087385, "grad_norm": 6.09397029876709, "learning_rate": 9.46064044748432e-06, "loss": 0.9648, "step": 1280 }, { "epoch": 0.1749044238121245, "grad_norm": 6.2424540519714355, "learning_rate": 9.459641055304027e-06, "loss": 1.1373, "step": 1281 }, { "epoch": 0.1750409612233752, "grad_norm": 8.288773536682129, "learning_rate": 9.458640790968608e-06, "loss": 0.8652, "step": 1282 }, { "epoch": 0.1751774986346259, "grad_norm": 6.750994682312012, "learning_rate": 9.457639654673682e-06, "loss": 0.9553, "step": 1283 }, { "epoch": 0.17531403604587656, "grad_norm": 10.912541389465332, "learning_rate": 9.456637646615035e-06, "loss": 0.9326, "step": 1284 }, { "epoch": 0.17545057345712725, "grad_norm": 7.236667156219482, "learning_rate": 9.455634766988628e-06, "loss": 1.0884, "step": 1285 }, { "epoch": 0.17558711086837794, "grad_norm": 8.598258018493652, "learning_rate": 9.454631015990587e-06, "loss": 0.9126, "step": 1286 }, { "epoch": 0.1757236482796286, "grad_norm": 27.458005905151367, "learning_rate": 9.453626393817216e-06, "loss": 0.824, "step": 1287 }, { "epoch": 0.1758601856908793, "grad_norm": 7.393378734588623, "learning_rate": 9.452620900664986e-06, "loss": 0.9781, "step": 1288 }, { "epoch": 0.17599672310213, "grad_norm": 6.6686577796936035, "learning_rate": 9.451614536730534e-06, "loss": 1.0913, "step": 1289 }, { "epoch": 0.17613326051338066, "grad_norm": 9.033621788024902, "learning_rate": 9.450607302210671e-06, "loss": 0.9716, "step": 1290 }, { "epoch": 0.17626979792463135, "grad_norm": 9.511388778686523, "learning_rate": 9.449599197302377e-06, "loss": 1.1045, "step": 1291 }, { "epoch": 0.17640633533588204, "grad_norm": 30.203428268432617, "learning_rate": 9.448590222202808e-06, "loss": 0.9876, "step": 1292 }, { "epoch": 0.1765428727471327, "grad_norm": 9.69135856628418, "learning_rate": 9.447580377109281e-06, "loss": 1.0436, "step": 1293 }, { "epoch": 0.1766794101583834, "grad_norm": 14.732564926147461, "learning_rate": 9.44656966221929e-06, "loss": 1.0551, "step": 1294 }, { "epoch": 0.1768159475696341, "grad_norm": 8.724034309387207, "learning_rate": 9.445558077730494e-06, "loss": 1.0231, "step": 1295 }, { "epoch": 0.17695248498088476, "grad_norm": 10.179417610168457, "learning_rate": 9.444545623840728e-06, "loss": 1.0223, "step": 1296 }, { "epoch": 0.17708902239213545, "grad_norm": 7.485844612121582, "learning_rate": 9.443532300747992e-06, "loss": 1.0635, "step": 1297 }, { "epoch": 0.17722555980338614, "grad_norm": 5.689070701599121, "learning_rate": 9.44251810865046e-06, "loss": 1.1047, "step": 1298 }, { "epoch": 0.1773620972146368, "grad_norm": 7.397756099700928, "learning_rate": 9.44150304774647e-06, "loss": 0.9282, "step": 1299 }, { "epoch": 0.1774986346258875, "grad_norm": 10.25770092010498, "learning_rate": 9.440487118234536e-06, "loss": 0.9932, "step": 1300 }, { "epoch": 0.1776351720371382, "grad_norm": 6.210300445556641, "learning_rate": 9.43947032031334e-06, "loss": 1.0219, "step": 1301 }, { "epoch": 0.17777170944838885, "grad_norm": 8.644829750061035, "learning_rate": 9.43845265418173e-06, "loss": 1.0298, "step": 1302 }, { "epoch": 0.17790824685963955, "grad_norm": 7.57055139541626, "learning_rate": 9.437434120038731e-06, "loss": 1.0548, "step": 1303 }, { "epoch": 0.1780447842708902, "grad_norm": 5.6771674156188965, "learning_rate": 9.436414718083531e-06, "loss": 1.2865, "step": 1304 }, { "epoch": 0.1781813216821409, "grad_norm": 6.980106353759766, "learning_rate": 9.435394448515494e-06, "loss": 1.0235, "step": 1305 }, { "epoch": 0.1783178590933916, "grad_norm": 11.646650314331055, "learning_rate": 9.434373311534146e-06, "loss": 1.0344, "step": 1306 }, { "epoch": 0.17845439650464226, "grad_norm": 10.584572792053223, "learning_rate": 9.433351307339189e-06, "loss": 1.0686, "step": 1307 }, { "epoch": 0.17859093391589295, "grad_norm": 5.2553253173828125, "learning_rate": 9.432328436130493e-06, "loss": 0.9782, "step": 1308 }, { "epoch": 0.17872747132714364, "grad_norm": 5.565936088562012, "learning_rate": 9.431304698108097e-06, "loss": 0.9, "step": 1309 }, { "epoch": 0.1788640087383943, "grad_norm": 11.999773979187012, "learning_rate": 9.430280093472209e-06, "loss": 1.1172, "step": 1310 }, { "epoch": 0.179000546149645, "grad_norm": 7.502378463745117, "learning_rate": 9.429254622423207e-06, "loss": 1.0415, "step": 1311 }, { "epoch": 0.1791370835608957, "grad_norm": 6.5778679847717285, "learning_rate": 9.42822828516164e-06, "loss": 1.0438, "step": 1312 }, { "epoch": 0.17927362097214636, "grad_norm": 7.898289203643799, "learning_rate": 9.427201081888223e-06, "loss": 1.0257, "step": 1313 }, { "epoch": 0.17941015838339705, "grad_norm": 7.25940465927124, "learning_rate": 9.42617301280384e-06, "loss": 0.8575, "step": 1314 }, { "epoch": 0.17954669579464774, "grad_norm": 6.735111236572266, "learning_rate": 9.42514407810955e-06, "loss": 1.0648, "step": 1315 }, { "epoch": 0.1796832332058984, "grad_norm": 11.214339256286621, "learning_rate": 9.42411427800658e-06, "loss": 0.8824, "step": 1316 }, { "epoch": 0.1798197706171491, "grad_norm": 13.946009635925293, "learning_rate": 9.42308361269632e-06, "loss": 1.0501, "step": 1317 }, { "epoch": 0.1799563080283998, "grad_norm": 11.200096130371094, "learning_rate": 9.422052082380335e-06, "loss": 0.9443, "step": 1318 }, { "epoch": 0.18009284543965046, "grad_norm": 6.688594341278076, "learning_rate": 9.421019687260358e-06, "loss": 1.0636, "step": 1319 }, { "epoch": 0.18022938285090115, "grad_norm": 12.319500923156738, "learning_rate": 9.419986427538287e-06, "loss": 1.156, "step": 1320 }, { "epoch": 0.18036592026215184, "grad_norm": 9.303262710571289, "learning_rate": 9.418952303416198e-06, "loss": 1.1381, "step": 1321 }, { "epoch": 0.1805024576734025, "grad_norm": 15.1398344039917, "learning_rate": 9.417917315096327e-06, "loss": 1.1668, "step": 1322 }, { "epoch": 0.1806389950846532, "grad_norm": 25.54405403137207, "learning_rate": 9.416881462781085e-06, "loss": 1.0752, "step": 1323 }, { "epoch": 0.1807755324959039, "grad_norm": 65.6520004272461, "learning_rate": 9.415844746673047e-06, "loss": 1.1398, "step": 1324 }, { "epoch": 0.18091206990715455, "grad_norm": 45.79397964477539, "learning_rate": 9.414807166974962e-06, "loss": 1.3167, "step": 1325 }, { "epoch": 0.18104860731840525, "grad_norm": 35.293861389160156, "learning_rate": 9.413768723889746e-06, "loss": 1.2009, "step": 1326 }, { "epoch": 0.18118514472965594, "grad_norm": 22.678546905517578, "learning_rate": 9.41272941762048e-06, "loss": 1.06, "step": 1327 }, { "epoch": 0.1813216821409066, "grad_norm": 47.87980270385742, "learning_rate": 9.411689248370421e-06, "loss": 1.1213, "step": 1328 }, { "epoch": 0.1814582195521573, "grad_norm": 64.4864501953125, "learning_rate": 9.41064821634299e-06, "loss": 1.0829, "step": 1329 }, { "epoch": 0.18159475696340796, "grad_norm": 23.849088668823242, "learning_rate": 9.409606321741776e-06, "loss": 1.0045, "step": 1330 }, { "epoch": 0.18173129437465865, "grad_norm": 8.35268497467041, "learning_rate": 9.40856356477054e-06, "loss": 1.0355, "step": 1331 }, { "epoch": 0.18186783178590935, "grad_norm": 16.467029571533203, "learning_rate": 9.40751994563321e-06, "loss": 1.0077, "step": 1332 }, { "epoch": 0.18200436919716, "grad_norm": 6.559985637664795, "learning_rate": 9.40647546453388e-06, "loss": 1.1151, "step": 1333 }, { "epoch": 0.1821409066084107, "grad_norm": 13.288018226623535, "learning_rate": 9.40543012167682e-06, "loss": 0.9331, "step": 1334 }, { "epoch": 0.1822774440196614, "grad_norm": 12.635666847229004, "learning_rate": 9.404383917266459e-06, "loss": 1.1305, "step": 1335 }, { "epoch": 0.18241398143091206, "grad_norm": 10.421608924865723, "learning_rate": 9.403336851507403e-06, "loss": 1.0072, "step": 1336 }, { "epoch": 0.18255051884216275, "grad_norm": 7.505956649780273, "learning_rate": 9.40228892460442e-06, "loss": 1.1439, "step": 1337 }, { "epoch": 0.18268705625341344, "grad_norm": 8.183819770812988, "learning_rate": 9.40124013676245e-06, "loss": 1.0761, "step": 1338 }, { "epoch": 0.1828235936646641, "grad_norm": 5.947787284851074, "learning_rate": 9.4001904881866e-06, "loss": 0.9788, "step": 1339 }, { "epoch": 0.1829601310759148, "grad_norm": 9.14639949798584, "learning_rate": 9.399139979082148e-06, "loss": 0.9894, "step": 1340 }, { "epoch": 0.1830966684871655, "grad_norm": 6.151947021484375, "learning_rate": 9.398088609654535e-06, "loss": 0.9066, "step": 1341 }, { "epoch": 0.18323320589841616, "grad_norm": 7.021195411682129, "learning_rate": 9.397036380109376e-06, "loss": 1.083, "step": 1342 }, { "epoch": 0.18336974330966685, "grad_norm": 8.697460174560547, "learning_rate": 9.39598329065245e-06, "loss": 0.9709, "step": 1343 }, { "epoch": 0.18350628072091754, "grad_norm": 24.95224380493164, "learning_rate": 9.394929341489706e-06, "loss": 1.0895, "step": 1344 }, { "epoch": 0.1836428181321682, "grad_norm": 10.311819076538086, "learning_rate": 9.393874532827262e-06, "loss": 1.1092, "step": 1345 }, { "epoch": 0.1837793555434189, "grad_norm": 8.706442832946777, "learning_rate": 9.3928188648714e-06, "loss": 1.1137, "step": 1346 }, { "epoch": 0.1839158929546696, "grad_norm": 26.16192054748535, "learning_rate": 9.391762337828577e-06, "loss": 0.9602, "step": 1347 }, { "epoch": 0.18405243036592026, "grad_norm": 11.131927490234375, "learning_rate": 9.390704951905412e-06, "loss": 1.1281, "step": 1348 }, { "epoch": 0.18418896777717095, "grad_norm": 10.949265480041504, "learning_rate": 9.389646707308691e-06, "loss": 1.1003, "step": 1349 }, { "epoch": 0.18432550518842164, "grad_norm": 8.78908920288086, "learning_rate": 9.388587604245376e-06, "loss": 1.0086, "step": 1350 }, { "epoch": 0.1844620425996723, "grad_norm": 7.431546688079834, "learning_rate": 9.38752764292259e-06, "loss": 0.9314, "step": 1351 }, { "epoch": 0.184598580010923, "grad_norm": 9.131193161010742, "learning_rate": 9.386466823547623e-06, "loss": 0.9952, "step": 1352 }, { "epoch": 0.1847351174221737, "grad_norm": 9.047117233276367, "learning_rate": 9.385405146327939e-06, "loss": 0.9931, "step": 1353 }, { "epoch": 0.18487165483342435, "grad_norm": 8.096878051757812, "learning_rate": 9.384342611471164e-06, "loss": 1.0452, "step": 1354 }, { "epoch": 0.18500819224467505, "grad_norm": 7.530014514923096, "learning_rate": 9.383279219185096e-06, "loss": 1.0146, "step": 1355 }, { "epoch": 0.1851447296559257, "grad_norm": 7.217423439025879, "learning_rate": 9.382214969677697e-06, "loss": 0.9747, "step": 1356 }, { "epoch": 0.1852812670671764, "grad_norm": 7.179719924926758, "learning_rate": 9.381149863157097e-06, "loss": 1.037, "step": 1357 }, { "epoch": 0.1854178044784271, "grad_norm": 6.8965020179748535, "learning_rate": 9.380083899831598e-06, "loss": 0.9867, "step": 1358 }, { "epoch": 0.18555434188967776, "grad_norm": 8.157299041748047, "learning_rate": 9.379017079909664e-06, "loss": 0.9522, "step": 1359 }, { "epoch": 0.18569087930092845, "grad_norm": 6.1058349609375, "learning_rate": 9.377949403599928e-06, "loss": 1.0382, "step": 1360 }, { "epoch": 0.18582741671217914, "grad_norm": 7.365131378173828, "learning_rate": 9.376880871111196e-06, "loss": 1.0135, "step": 1361 }, { "epoch": 0.1859639541234298, "grad_norm": 14.035136222839355, "learning_rate": 9.375811482652432e-06, "loss": 1.2324, "step": 1362 }, { "epoch": 0.1861004915346805, "grad_norm": 8.13170051574707, "learning_rate": 9.374741238432775e-06, "loss": 1.089, "step": 1363 }, { "epoch": 0.1862370289459312, "grad_norm": 23.83106231689453, "learning_rate": 9.37367013866153e-06, "loss": 0.9669, "step": 1364 }, { "epoch": 0.18637356635718186, "grad_norm": 8.520120620727539, "learning_rate": 9.372598183548166e-06, "loss": 1.0723, "step": 1365 }, { "epoch": 0.18651010376843255, "grad_norm": 7.113424777984619, "learning_rate": 9.371525373302317e-06, "loss": 0.913, "step": 1366 }, { "epoch": 0.18664664117968324, "grad_norm": 9.450668334960938, "learning_rate": 9.370451708133795e-06, "loss": 1.0259, "step": 1367 }, { "epoch": 0.1867831785909339, "grad_norm": 6.924631595611572, "learning_rate": 9.36937718825257e-06, "loss": 0.9605, "step": 1368 }, { "epoch": 0.1869197160021846, "grad_norm": 8.570900917053223, "learning_rate": 9.36830181386878e-06, "loss": 0.9613, "step": 1369 }, { "epoch": 0.1870562534134353, "grad_norm": 6.718122959136963, "learning_rate": 9.367225585192736e-06, "loss": 1.0523, "step": 1370 }, { "epoch": 0.18719279082468596, "grad_norm": 8.402220726013184, "learning_rate": 9.366148502434909e-06, "loss": 1.0466, "step": 1371 }, { "epoch": 0.18732932823593665, "grad_norm": 5.4882049560546875, "learning_rate": 9.365070565805941e-06, "loss": 1.0235, "step": 1372 }, { "epoch": 0.18746586564718734, "grad_norm": 7.516458034515381, "learning_rate": 9.363991775516642e-06, "loss": 1.0251, "step": 1373 }, { "epoch": 0.187602403058438, "grad_norm": 8.159358978271484, "learning_rate": 9.362912131777982e-06, "loss": 1.1624, "step": 1374 }, { "epoch": 0.1877389404696887, "grad_norm": 8.242350578308105, "learning_rate": 9.361831634801104e-06, "loss": 1.0418, "step": 1375 }, { "epoch": 0.1878754778809394, "grad_norm": 7.208111763000488, "learning_rate": 9.36075028479732e-06, "loss": 1.0141, "step": 1376 }, { "epoch": 0.18801201529219005, "grad_norm": 11.9007568359375, "learning_rate": 9.359668081978104e-06, "loss": 0.9274, "step": 1377 }, { "epoch": 0.18814855270344075, "grad_norm": 5.835931301116943, "learning_rate": 9.358585026555099e-06, "loss": 1.0681, "step": 1378 }, { "epoch": 0.18828509011469144, "grad_norm": 9.013005256652832, "learning_rate": 9.357501118740112e-06, "loss": 1.0743, "step": 1379 }, { "epoch": 0.1884216275259421, "grad_norm": 9.200258255004883, "learning_rate": 9.356416358745119e-06, "loss": 0.9935, "step": 1380 }, { "epoch": 0.1885581649371928, "grad_norm": 11.271373748779297, "learning_rate": 9.355330746782263e-06, "loss": 1.151, "step": 1381 }, { "epoch": 0.18869470234844346, "grad_norm": 6.529872417449951, "learning_rate": 9.354244283063855e-06, "loss": 1.0735, "step": 1382 }, { "epoch": 0.18883123975969415, "grad_norm": 7.053716659545898, "learning_rate": 9.353156967802368e-06, "loss": 0.964, "step": 1383 }, { "epoch": 0.18896777717094485, "grad_norm": 8.338112831115723, "learning_rate": 9.352068801210446e-06, "loss": 0.9638, "step": 1384 }, { "epoch": 0.1891043145821955, "grad_norm": 6.011890888214111, "learning_rate": 9.350979783500895e-06, "loss": 1.0525, "step": 1385 }, { "epoch": 0.1892408519934462, "grad_norm": 7.273273944854736, "learning_rate": 9.349889914886692e-06, "loss": 1.0091, "step": 1386 }, { "epoch": 0.1893773894046969, "grad_norm": 7.602698802947998, "learning_rate": 9.34879919558098e-06, "loss": 1.0016, "step": 1387 }, { "epoch": 0.18951392681594756, "grad_norm": 6.970663547515869, "learning_rate": 9.347707625797062e-06, "loss": 1.0859, "step": 1388 }, { "epoch": 0.18965046422719825, "grad_norm": 19.8358211517334, "learning_rate": 9.346615205748418e-06, "loss": 1.0933, "step": 1389 }, { "epoch": 0.18978700163844894, "grad_norm": 8.09011459350586, "learning_rate": 9.345521935648685e-06, "loss": 1.1895, "step": 1390 }, { "epoch": 0.1899235390496996, "grad_norm": 5.655594348907471, "learning_rate": 9.34442781571167e-06, "loss": 1.1659, "step": 1391 }, { "epoch": 0.1900600764609503, "grad_norm": 7.191570281982422, "learning_rate": 9.343332846151347e-06, "loss": 1.0403, "step": 1392 }, { "epoch": 0.190196613872201, "grad_norm": 8.465242385864258, "learning_rate": 9.342237027181853e-06, "loss": 1.0155, "step": 1393 }, { "epoch": 0.19033315128345166, "grad_norm": 11.575764656066895, "learning_rate": 9.341140359017495e-06, "loss": 1.0278, "step": 1394 }, { "epoch": 0.19046968869470235, "grad_norm": 10.411417007446289, "learning_rate": 9.340042841872743e-06, "loss": 0.97, "step": 1395 }, { "epoch": 0.19060622610595304, "grad_norm": 7.735202789306641, "learning_rate": 9.338944475962236e-06, "loss": 0.931, "step": 1396 }, { "epoch": 0.1907427635172037, "grad_norm": 9.008576393127441, "learning_rate": 9.337845261500777e-06, "loss": 1.0052, "step": 1397 }, { "epoch": 0.1908793009284544, "grad_norm": 11.744087219238281, "learning_rate": 9.336745198703334e-06, "loss": 1.0359, "step": 1398 }, { "epoch": 0.1910158383397051, "grad_norm": 6.918254852294922, "learning_rate": 9.335644287785042e-06, "loss": 1.0872, "step": 1399 }, { "epoch": 0.19115237575095576, "grad_norm": 14.4006929397583, "learning_rate": 9.334542528961203e-06, "loss": 1.041, "step": 1400 }, { "epoch": 0.19128891316220645, "grad_norm": 7.324592590332031, "learning_rate": 9.333439922447283e-06, "loss": 1.1087, "step": 1401 }, { "epoch": 0.19142545057345714, "grad_norm": 8.247533798217773, "learning_rate": 9.332336468458914e-06, "loss": 1.1168, "step": 1402 }, { "epoch": 0.1915619879847078, "grad_norm": 13.131689071655273, "learning_rate": 9.331232167211897e-06, "loss": 0.9931, "step": 1403 }, { "epoch": 0.1916985253959585, "grad_norm": 6.865470886230469, "learning_rate": 9.330127018922195e-06, "loss": 1.0755, "step": 1404 }, { "epoch": 0.1918350628072092, "grad_norm": 7.789769649505615, "learning_rate": 9.329021023805935e-06, "loss": 1.0449, "step": 1405 }, { "epoch": 0.19197160021845985, "grad_norm": 6.320048809051514, "learning_rate": 9.327914182079414e-06, "loss": 0.9173, "step": 1406 }, { "epoch": 0.19210813762971055, "grad_norm": 9.362077713012695, "learning_rate": 9.326806493959095e-06, "loss": 0.8783, "step": 1407 }, { "epoch": 0.1922446750409612, "grad_norm": 31.67044448852539, "learning_rate": 9.325697959661602e-06, "loss": 1.0841, "step": 1408 }, { "epoch": 0.1923812124522119, "grad_norm": 12.116443634033203, "learning_rate": 9.324588579403725e-06, "loss": 1.1085, "step": 1409 }, { "epoch": 0.1925177498634626, "grad_norm": 15.77743148803711, "learning_rate": 9.323478353402427e-06, "loss": 1.0792, "step": 1410 }, { "epoch": 0.19265428727471326, "grad_norm": 8.035379409790039, "learning_rate": 9.322367281874825e-06, "loss": 1.1312, "step": 1411 }, { "epoch": 0.19279082468596395, "grad_norm": 9.699896812438965, "learning_rate": 9.32125536503821e-06, "loss": 1.0882, "step": 1412 }, { "epoch": 0.19292736209721464, "grad_norm": 9.839787483215332, "learning_rate": 9.320142603110036e-06, "loss": 0.8632, "step": 1413 }, { "epoch": 0.1930638995084653, "grad_norm": 12.444212913513184, "learning_rate": 9.31902899630792e-06, "loss": 0.9798, "step": 1414 }, { "epoch": 0.193200436919716, "grad_norm": 7.800527095794678, "learning_rate": 9.317914544849645e-06, "loss": 1.0696, "step": 1415 }, { "epoch": 0.1933369743309667, "grad_norm": 7.9847612380981445, "learning_rate": 9.316799248953162e-06, "loss": 1.0004, "step": 1416 }, { "epoch": 0.19347351174221736, "grad_norm": 6.473366737365723, "learning_rate": 9.315683108836585e-06, "loss": 1.0903, "step": 1417 }, { "epoch": 0.19361004915346805, "grad_norm": 11.181233406066895, "learning_rate": 9.314566124718193e-06, "loss": 1.0267, "step": 1418 }, { "epoch": 0.19374658656471874, "grad_norm": 9.82568645477295, "learning_rate": 9.313448296816429e-06, "loss": 1.1185, "step": 1419 }, { "epoch": 0.1938831239759694, "grad_norm": 7.431239604949951, "learning_rate": 9.312329625349903e-06, "loss": 0.9711, "step": 1420 }, { "epoch": 0.1940196613872201, "grad_norm": 7.931659698486328, "learning_rate": 9.311210110537392e-06, "loss": 1.0041, "step": 1421 }, { "epoch": 0.1941561987984708, "grad_norm": 7.027667999267578, "learning_rate": 9.310089752597833e-06, "loss": 0.9946, "step": 1422 }, { "epoch": 0.19429273620972146, "grad_norm": 7.610822677612305, "learning_rate": 9.308968551750327e-06, "loss": 1.0056, "step": 1423 }, { "epoch": 0.19442927362097215, "grad_norm": 5.058792591094971, "learning_rate": 9.30784650821415e-06, "loss": 1.1116, "step": 1424 }, { "epoch": 0.19456581103222284, "grad_norm": 7.992805480957031, "learning_rate": 9.30672362220873e-06, "loss": 0.9547, "step": 1425 }, { "epoch": 0.1947023484434735, "grad_norm": 7.579679012298584, "learning_rate": 9.30559989395367e-06, "loss": 1.1633, "step": 1426 }, { "epoch": 0.1948388858547242, "grad_norm": 9.26533031463623, "learning_rate": 9.304475323668728e-06, "loss": 0.7979, "step": 1427 }, { "epoch": 0.1949754232659749, "grad_norm": 6.664769172668457, "learning_rate": 9.303349911573838e-06, "loss": 1.0893, "step": 1428 }, { "epoch": 0.19511196067722555, "grad_norm": 7.567628383636475, "learning_rate": 9.302223657889088e-06, "loss": 1.0713, "step": 1429 }, { "epoch": 0.19524849808847625, "grad_norm": 28.204002380371094, "learning_rate": 9.301096562834737e-06, "loss": 1.1255, "step": 1430 }, { "epoch": 0.19538503549972694, "grad_norm": 10.445733070373535, "learning_rate": 9.299968626631207e-06, "loss": 1.0167, "step": 1431 }, { "epoch": 0.1955215729109776, "grad_norm": 6.65501594543457, "learning_rate": 9.298839849499082e-06, "loss": 1.1596, "step": 1432 }, { "epoch": 0.1956581103222283, "grad_norm": 9.986204147338867, "learning_rate": 9.297710231659115e-06, "loss": 0.9618, "step": 1433 }, { "epoch": 0.19579464773347896, "grad_norm": 16.327768325805664, "learning_rate": 9.29657977333222e-06, "loss": 1.0548, "step": 1434 }, { "epoch": 0.19593118514472965, "grad_norm": 12.609517097473145, "learning_rate": 9.295448474739479e-06, "loss": 1.1968, "step": 1435 }, { "epoch": 0.19606772255598034, "grad_norm": 7.178225517272949, "learning_rate": 9.294316336102132e-06, "loss": 1.0879, "step": 1436 }, { "epoch": 0.196204259967231, "grad_norm": 7.652967929840088, "learning_rate": 9.293183357641588e-06, "loss": 0.9759, "step": 1437 }, { "epoch": 0.1963407973784817, "grad_norm": 8.41903305053711, "learning_rate": 9.292049539579421e-06, "loss": 0.9007, "step": 1438 }, { "epoch": 0.1964773347897324, "grad_norm": 8.334145545959473, "learning_rate": 9.290914882137367e-06, "loss": 0.9543, "step": 1439 }, { "epoch": 0.19661387220098306, "grad_norm": 11.139703750610352, "learning_rate": 9.289779385537325e-06, "loss": 1.1743, "step": 1440 }, { "epoch": 0.19675040961223375, "grad_norm": 8.674994468688965, "learning_rate": 9.288643050001362e-06, "loss": 0.9431, "step": 1441 }, { "epoch": 0.19688694702348444, "grad_norm": 9.716414451599121, "learning_rate": 9.287505875751705e-06, "loss": 0.9352, "step": 1442 }, { "epoch": 0.1970234844347351, "grad_norm": 7.701615810394287, "learning_rate": 9.286367863010748e-06, "loss": 1.1528, "step": 1443 }, { "epoch": 0.1971600218459858, "grad_norm": 22.06209373474121, "learning_rate": 9.285229012001047e-06, "loss": 0.9583, "step": 1444 }, { "epoch": 0.1972965592572365, "grad_norm": 6.708016872406006, "learning_rate": 9.284089322945324e-06, "loss": 1.1423, "step": 1445 }, { "epoch": 0.19743309666848716, "grad_norm": 9.30356502532959, "learning_rate": 9.282948796066462e-06, "loss": 1.0593, "step": 1446 }, { "epoch": 0.19756963407973785, "grad_norm": 11.681610107421875, "learning_rate": 9.281807431587512e-06, "loss": 0.964, "step": 1447 }, { "epoch": 0.19770617149098854, "grad_norm": 7.432068347930908, "learning_rate": 9.280665229731685e-06, "loss": 1.0606, "step": 1448 }, { "epoch": 0.1978427089022392, "grad_norm": 7.756032943725586, "learning_rate": 9.279522190722355e-06, "loss": 1.144, "step": 1449 }, { "epoch": 0.1979792463134899, "grad_norm": 6.973230838775635, "learning_rate": 9.278378314783065e-06, "loss": 1.0426, "step": 1450 }, { "epoch": 0.1981157837247406, "grad_norm": 7.968820095062256, "learning_rate": 9.277233602137516e-06, "loss": 1.0549, "step": 1451 }, { "epoch": 0.19825232113599126, "grad_norm": 6.602351665496826, "learning_rate": 9.276088053009578e-06, "loss": 1.0128, "step": 1452 }, { "epoch": 0.19838885854724195, "grad_norm": 6.8908281326293945, "learning_rate": 9.27494166762328e-06, "loss": 1.1155, "step": 1453 }, { "epoch": 0.19852539595849264, "grad_norm": 7.61147928237915, "learning_rate": 9.273794446202816e-06, "loss": 1.0773, "step": 1454 }, { "epoch": 0.1986619333697433, "grad_norm": 7.105641841888428, "learning_rate": 9.272646388972543e-06, "loss": 0.8513, "step": 1455 }, { "epoch": 0.198798470780994, "grad_norm": 7.612560272216797, "learning_rate": 9.271497496156985e-06, "loss": 0.9435, "step": 1456 }, { "epoch": 0.1989350081922447, "grad_norm": 6.961950302124023, "learning_rate": 9.270347767980823e-06, "loss": 0.9775, "step": 1457 }, { "epoch": 0.19907154560349535, "grad_norm": 6.990511417388916, "learning_rate": 9.269197204668908e-06, "loss": 0.9492, "step": 1458 }, { "epoch": 0.19920808301474605, "grad_norm": 8.129257202148438, "learning_rate": 9.26804580644625e-06, "loss": 1.1159, "step": 1459 }, { "epoch": 0.1993446204259967, "grad_norm": 6.361772537231445, "learning_rate": 9.266893573538023e-06, "loss": 1.0951, "step": 1460 }, { "epoch": 0.1994811578372474, "grad_norm": 6.285001277923584, "learning_rate": 9.265740506169567e-06, "loss": 0.9615, "step": 1461 }, { "epoch": 0.1996176952484981, "grad_norm": 5.746885299682617, "learning_rate": 9.26458660456638e-06, "loss": 1.101, "step": 1462 }, { "epoch": 0.19975423265974876, "grad_norm": 9.977544784545898, "learning_rate": 9.26343186895413e-06, "loss": 1.0733, "step": 1463 }, { "epoch": 0.19989077007099945, "grad_norm": 6.936085224151611, "learning_rate": 9.262276299558642e-06, "loss": 1.0354, "step": 1464 }, { "epoch": 0.20002730748225014, "grad_norm": 13.275028228759766, "learning_rate": 9.261119896605905e-06, "loss": 0.9483, "step": 1465 }, { "epoch": 0.2001638448935008, "grad_norm": 8.029072761535645, "learning_rate": 9.259962660322073e-06, "loss": 0.8952, "step": 1466 }, { "epoch": 0.2003003823047515, "grad_norm": 5.435176849365234, "learning_rate": 9.258804590933465e-06, "loss": 0.9912, "step": 1467 }, { "epoch": 0.2004369197160022, "grad_norm": 6.924093723297119, "learning_rate": 9.257645688666557e-06, "loss": 0.9926, "step": 1468 }, { "epoch": 0.20057345712725286, "grad_norm": 7.705539703369141, "learning_rate": 9.256485953747993e-06, "loss": 0.8967, "step": 1469 }, { "epoch": 0.20070999453850355, "grad_norm": 7.600363254547119, "learning_rate": 9.255325386404578e-06, "loss": 0.9993, "step": 1470 }, { "epoch": 0.20084653194975424, "grad_norm": 7.03790283203125, "learning_rate": 9.254163986863278e-06, "loss": 1.0767, "step": 1471 }, { "epoch": 0.2009830693610049, "grad_norm": 7.781857013702393, "learning_rate": 9.253001755351225e-06, "loss": 1.0495, "step": 1472 }, { "epoch": 0.2011196067722556, "grad_norm": 6.236697673797607, "learning_rate": 9.251838692095714e-06, "loss": 1.0765, "step": 1473 }, { "epoch": 0.2012561441835063, "grad_norm": 7.2099289894104, "learning_rate": 9.250674797324197e-06, "loss": 0.9779, "step": 1474 }, { "epoch": 0.20139268159475696, "grad_norm": 8.280847549438477, "learning_rate": 9.249510071264295e-06, "loss": 1.0202, "step": 1475 }, { "epoch": 0.20152921900600765, "grad_norm": 5.9115681648254395, "learning_rate": 9.248344514143786e-06, "loss": 1.0061, "step": 1476 }, { "epoch": 0.20166575641725834, "grad_norm": 6.321501731872559, "learning_rate": 9.24717812619062e-06, "loss": 1.0578, "step": 1477 }, { "epoch": 0.201802293828509, "grad_norm": 6.2368292808532715, "learning_rate": 9.246010907632894e-06, "loss": 0.8762, "step": 1478 }, { "epoch": 0.2019388312397597, "grad_norm": 7.657657623291016, "learning_rate": 9.244842858698886e-06, "loss": 1.0418, "step": 1479 }, { "epoch": 0.2020753686510104, "grad_norm": 6.628024101257324, "learning_rate": 9.243673979617021e-06, "loss": 1.1216, "step": 1480 }, { "epoch": 0.20221190606226105, "grad_norm": 7.353935718536377, "learning_rate": 9.242504270615893e-06, "loss": 0.9548, "step": 1481 }, { "epoch": 0.20234844347351175, "grad_norm": 10.310465812683105, "learning_rate": 9.241333731924259e-06, "loss": 1.0023, "step": 1482 }, { "epoch": 0.2024849808847624, "grad_norm": 5.71497106552124, "learning_rate": 9.240162363771036e-06, "loss": 1.0434, "step": 1483 }, { "epoch": 0.2026215182960131, "grad_norm": 6.545828819274902, "learning_rate": 9.238990166385304e-06, "loss": 0.939, "step": 1484 }, { "epoch": 0.2027580557072638, "grad_norm": 5.810062885284424, "learning_rate": 9.237817139996306e-06, "loss": 0.8731, "step": 1485 }, { "epoch": 0.20289459311851446, "grad_norm": 8.840611457824707, "learning_rate": 9.236643284833445e-06, "loss": 1.0535, "step": 1486 }, { "epoch": 0.20303113052976515, "grad_norm": 11.303881645202637, "learning_rate": 9.23546860112629e-06, "loss": 1.0416, "step": 1487 }, { "epoch": 0.20316766794101584, "grad_norm": 8.487915992736816, "learning_rate": 9.234293089104564e-06, "loss": 0.9354, "step": 1488 }, { "epoch": 0.2033042053522665, "grad_norm": 15.390558242797852, "learning_rate": 9.233116748998164e-06, "loss": 0.9813, "step": 1489 }, { "epoch": 0.2034407427635172, "grad_norm": 16.182201385498047, "learning_rate": 9.231939581037138e-06, "loss": 0.979, "step": 1490 }, { "epoch": 0.2035772801747679, "grad_norm": 6.069469928741455, "learning_rate": 9.230761585451702e-06, "loss": 0.9048, "step": 1491 }, { "epoch": 0.20371381758601856, "grad_norm": 11.347345352172852, "learning_rate": 9.229582762472232e-06, "loss": 0.9284, "step": 1492 }, { "epoch": 0.20385035499726925, "grad_norm": 8.320094108581543, "learning_rate": 9.228403112329265e-06, "loss": 1.1233, "step": 1493 }, { "epoch": 0.20398689240851994, "grad_norm": 6.4287309646606445, "learning_rate": 9.227222635253503e-06, "loss": 0.923, "step": 1494 }, { "epoch": 0.2041234298197706, "grad_norm": 9.159597396850586, "learning_rate": 9.226041331475805e-06, "loss": 1.0388, "step": 1495 }, { "epoch": 0.2042599672310213, "grad_norm": 6.441023826599121, "learning_rate": 9.224859201227195e-06, "loss": 0.9519, "step": 1496 }, { "epoch": 0.204396504642272, "grad_norm": 10.811750411987305, "learning_rate": 9.223676244738858e-06, "loss": 0.9362, "step": 1497 }, { "epoch": 0.20453304205352266, "grad_norm": 7.480386734008789, "learning_rate": 9.222492462242139e-06, "loss": 1.161, "step": 1498 }, { "epoch": 0.20466957946477335, "grad_norm": 6.08917760848999, "learning_rate": 9.221307853968548e-06, "loss": 0.9407, "step": 1499 }, { "epoch": 0.20480611687602404, "grad_norm": 8.308331489562988, "learning_rate": 9.220122420149753e-06, "loss": 1.0771, "step": 1500 }, { "epoch": 0.2049426542872747, "grad_norm": 7.307647228240967, "learning_rate": 9.218936161017585e-06, "loss": 0.9242, "step": 1501 }, { "epoch": 0.2050791916985254, "grad_norm": 7.390491008758545, "learning_rate": 9.217749076804037e-06, "loss": 1.0382, "step": 1502 }, { "epoch": 0.2052157291097761, "grad_norm": 12.776845932006836, "learning_rate": 9.21656116774126e-06, "loss": 0.9186, "step": 1503 }, { "epoch": 0.20535226652102676, "grad_norm": 8.538631439208984, "learning_rate": 9.215372434061573e-06, "loss": 0.9641, "step": 1504 }, { "epoch": 0.20548880393227745, "grad_norm": 19.72527313232422, "learning_rate": 9.214182875997449e-06, "loss": 0.9598, "step": 1505 }, { "epoch": 0.20562534134352814, "grad_norm": 8.509109497070312, "learning_rate": 9.212992493781525e-06, "loss": 1.0741, "step": 1506 }, { "epoch": 0.2057618787547788, "grad_norm": 8.70642375946045, "learning_rate": 9.211801287646602e-06, "loss": 0.897, "step": 1507 }, { "epoch": 0.2058984161660295, "grad_norm": 6.472919464111328, "learning_rate": 9.21060925782564e-06, "loss": 1.0986, "step": 1508 }, { "epoch": 0.20603495357728016, "grad_norm": 21.30291748046875, "learning_rate": 9.209416404551756e-06, "loss": 0.9821, "step": 1509 }, { "epoch": 0.20617149098853085, "grad_norm": 11.157583236694336, "learning_rate": 9.208222728058237e-06, "loss": 0.9214, "step": 1510 }, { "epoch": 0.20630802839978155, "grad_norm": 8.765081405639648, "learning_rate": 9.207028228578519e-06, "loss": 0.8542, "step": 1511 }, { "epoch": 0.2064445658110322, "grad_norm": 8.76642894744873, "learning_rate": 9.205832906346211e-06, "loss": 0.9219, "step": 1512 }, { "epoch": 0.2065811032222829, "grad_norm": 7.122570514678955, "learning_rate": 9.204636761595077e-06, "loss": 0.8197, "step": 1513 }, { "epoch": 0.2067176406335336, "grad_norm": 8.919891357421875, "learning_rate": 9.203439794559042e-06, "loss": 1.0579, "step": 1514 }, { "epoch": 0.20685417804478426, "grad_norm": 7.157680988311768, "learning_rate": 9.20224200547219e-06, "loss": 1.0528, "step": 1515 }, { "epoch": 0.20699071545603495, "grad_norm": 8.201748847961426, "learning_rate": 9.201043394568773e-06, "loss": 1.0007, "step": 1516 }, { "epoch": 0.20712725286728564, "grad_norm": 6.51995325088501, "learning_rate": 9.199843962083195e-06, "loss": 0.9784, "step": 1517 }, { "epoch": 0.2072637902785363, "grad_norm": 8.124167442321777, "learning_rate": 9.198643708250024e-06, "loss": 1.0558, "step": 1518 }, { "epoch": 0.207400327689787, "grad_norm": 8.233346939086914, "learning_rate": 9.197442633303991e-06, "loss": 1.101, "step": 1519 }, { "epoch": 0.2075368651010377, "grad_norm": 6.5110392570495605, "learning_rate": 9.196240737479987e-06, "loss": 0.917, "step": 1520 }, { "epoch": 0.20767340251228836, "grad_norm": 8.114928245544434, "learning_rate": 9.19503802101306e-06, "loss": 0.9664, "step": 1521 }, { "epoch": 0.20780993992353905, "grad_norm": 9.081069946289062, "learning_rate": 9.193834484138418e-06, "loss": 1.017, "step": 1522 }, { "epoch": 0.20794647733478974, "grad_norm": 8.98887825012207, "learning_rate": 9.192630127091437e-06, "loss": 0.9138, "step": 1523 }, { "epoch": 0.2080830147460404, "grad_norm": 8.437406539916992, "learning_rate": 9.191424950107648e-06, "loss": 1.161, "step": 1524 }, { "epoch": 0.2082195521572911, "grad_norm": 5.568582057952881, "learning_rate": 9.190218953422742e-06, "loss": 1.152, "step": 1525 }, { "epoch": 0.2083560895685418, "grad_norm": 7.691720485687256, "learning_rate": 9.18901213727257e-06, "loss": 1.0878, "step": 1526 }, { "epoch": 0.20849262697979246, "grad_norm": 6.376018524169922, "learning_rate": 9.187804501893145e-06, "loss": 1.0107, "step": 1527 }, { "epoch": 0.20862916439104315, "grad_norm": 7.658590793609619, "learning_rate": 9.186596047520639e-06, "loss": 0.8731, "step": 1528 }, { "epoch": 0.20876570180229384, "grad_norm": 7.332942008972168, "learning_rate": 9.185386774391388e-06, "loss": 0.9072, "step": 1529 }, { "epoch": 0.2089022392135445, "grad_norm": 8.405695915222168, "learning_rate": 9.184176682741883e-06, "loss": 1.135, "step": 1530 }, { "epoch": 0.2090387766247952, "grad_norm": 7.072011947631836, "learning_rate": 9.182965772808776e-06, "loss": 1.0565, "step": 1531 }, { "epoch": 0.2091753140360459, "grad_norm": 10.43355655670166, "learning_rate": 9.181754044828882e-06, "loss": 0.896, "step": 1532 }, { "epoch": 0.20931185144729655, "grad_norm": 33.007938385009766, "learning_rate": 9.180541499039171e-06, "loss": 0.9673, "step": 1533 }, { "epoch": 0.20944838885854725, "grad_norm": 6.772249698638916, "learning_rate": 9.179328135676779e-06, "loss": 0.9589, "step": 1534 }, { "epoch": 0.2095849262697979, "grad_norm": 20.774089813232422, "learning_rate": 9.178113954979e-06, "loss": 1.1015, "step": 1535 }, { "epoch": 0.2097214636810486, "grad_norm": 8.320184707641602, "learning_rate": 9.176898957183282e-06, "loss": 1.0325, "step": 1536 }, { "epoch": 0.2098580010922993, "grad_norm": 6.95117712020874, "learning_rate": 9.175683142527242e-06, "loss": 0.9726, "step": 1537 }, { "epoch": 0.20999453850354996, "grad_norm": 8.178778648376465, "learning_rate": 9.17446651124865e-06, "loss": 0.9872, "step": 1538 }, { "epoch": 0.21013107591480065, "grad_norm": 26.84575843811035, "learning_rate": 9.173249063585438e-06, "loss": 1.0251, "step": 1539 }, { "epoch": 0.21026761332605134, "grad_norm": 11.034449577331543, "learning_rate": 9.172030799775698e-06, "loss": 1.0638, "step": 1540 }, { "epoch": 0.210404150737302, "grad_norm": 7.010817050933838, "learning_rate": 9.170811720057683e-06, "loss": 1.0462, "step": 1541 }, { "epoch": 0.2105406881485527, "grad_norm": 7.687351703643799, "learning_rate": 9.169591824669798e-06, "loss": 1.0805, "step": 1542 }, { "epoch": 0.2106772255598034, "grad_norm": 17.550323486328125, "learning_rate": 9.168371113850623e-06, "loss": 1.1246, "step": 1543 }, { "epoch": 0.21081376297105406, "grad_norm": 8.48725414276123, "learning_rate": 9.167149587838878e-06, "loss": 1.0595, "step": 1544 }, { "epoch": 0.21095030038230475, "grad_norm": 8.009523391723633, "learning_rate": 9.165927246873458e-06, "loss": 1.0306, "step": 1545 }, { "epoch": 0.21108683779355544, "grad_norm": 8.969228744506836, "learning_rate": 9.16470409119341e-06, "loss": 1.0323, "step": 1546 }, { "epoch": 0.2112233752048061, "grad_norm": 12.068535804748535, "learning_rate": 9.163480121037942e-06, "loss": 1.1335, "step": 1547 }, { "epoch": 0.2113599126160568, "grad_norm": 5.796160697937012, "learning_rate": 9.162255336646422e-06, "loss": 0.9931, "step": 1548 }, { "epoch": 0.2114964500273075, "grad_norm": 8.08561897277832, "learning_rate": 9.161029738258374e-06, "loss": 1.064, "step": 1549 }, { "epoch": 0.21163298743855816, "grad_norm": 7.705301284790039, "learning_rate": 9.159803326113487e-06, "loss": 0.964, "step": 1550 }, { "epoch": 0.21176952484980885, "grad_norm": 5.903691291809082, "learning_rate": 9.158576100451602e-06, "loss": 1.133, "step": 1551 }, { "epoch": 0.21190606226105954, "grad_norm": 6.485862731933594, "learning_rate": 9.157348061512728e-06, "loss": 1.0606, "step": 1552 }, { "epoch": 0.2120425996723102, "grad_norm": 6.833629131317139, "learning_rate": 9.156119209537022e-06, "loss": 1.0531, "step": 1553 }, { "epoch": 0.2121791370835609, "grad_norm": 11.36345386505127, "learning_rate": 9.15488954476481e-06, "loss": 1.0127, "step": 1554 }, { "epoch": 0.2123156744948116, "grad_norm": 14.588425636291504, "learning_rate": 9.153659067436572e-06, "loss": 0.975, "step": 1555 }, { "epoch": 0.21245221190606225, "grad_norm": 8.921573638916016, "learning_rate": 9.152427777792947e-06, "loss": 1.0187, "step": 1556 }, { "epoch": 0.21258874931731295, "grad_norm": 8.798787117004395, "learning_rate": 9.151195676074733e-06, "loss": 0.9943, "step": 1557 }, { "epoch": 0.21272528672856364, "grad_norm": 9.686378479003906, "learning_rate": 9.149962762522891e-06, "loss": 0.8805, "step": 1558 }, { "epoch": 0.2128618241398143, "grad_norm": 7.287022590637207, "learning_rate": 9.148729037378534e-06, "loss": 1.039, "step": 1559 }, { "epoch": 0.212998361551065, "grad_norm": 6.196048736572266, "learning_rate": 9.147494500882937e-06, "loss": 0.9358, "step": 1560 }, { "epoch": 0.21313489896231566, "grad_norm": 8.575185775756836, "learning_rate": 9.146259153277534e-06, "loss": 0.9449, "step": 1561 }, { "epoch": 0.21327143637356635, "grad_norm": 12.67035961151123, "learning_rate": 9.14502299480392e-06, "loss": 0.9595, "step": 1562 }, { "epoch": 0.21340797378481705, "grad_norm": 7.13870096206665, "learning_rate": 9.143786025703842e-06, "loss": 1.0052, "step": 1563 }, { "epoch": 0.2135445111960677, "grad_norm": 6.028109073638916, "learning_rate": 9.142548246219212e-06, "loss": 1.0267, "step": 1564 }, { "epoch": 0.2136810486073184, "grad_norm": 8.119118690490723, "learning_rate": 9.141309656592095e-06, "loss": 1.0758, "step": 1565 }, { "epoch": 0.2138175860185691, "grad_norm": 6.282076358795166, "learning_rate": 9.14007025706472e-06, "loss": 1.0765, "step": 1566 }, { "epoch": 0.21395412342981976, "grad_norm": 5.54152774810791, "learning_rate": 9.13883004787947e-06, "loss": 0.8532, "step": 1567 }, { "epoch": 0.21409066084107045, "grad_norm": 6.4829864501953125, "learning_rate": 9.13758902927889e-06, "loss": 0.8602, "step": 1568 }, { "epoch": 0.21422719825232114, "grad_norm": 7.105710506439209, "learning_rate": 9.13634720150568e-06, "loss": 0.9201, "step": 1569 }, { "epoch": 0.2143637356635718, "grad_norm": 8.445049285888672, "learning_rate": 9.135104564802698e-06, "loss": 0.8982, "step": 1570 }, { "epoch": 0.2145002730748225, "grad_norm": 9.21277141571045, "learning_rate": 9.133861119412966e-06, "loss": 1.0924, "step": 1571 }, { "epoch": 0.2146368104860732, "grad_norm": 8.443033218383789, "learning_rate": 9.132616865579655e-06, "loss": 0.9276, "step": 1572 }, { "epoch": 0.21477334789732386, "grad_norm": 11.95068359375, "learning_rate": 9.131371803546102e-06, "loss": 1.1649, "step": 1573 }, { "epoch": 0.21490988530857455, "grad_norm": 7.080075740814209, "learning_rate": 9.130125933555798e-06, "loss": 0.9705, "step": 1574 }, { "epoch": 0.21504642271982524, "grad_norm": 7.328812599182129, "learning_rate": 9.128879255852396e-06, "loss": 1.1217, "step": 1575 }, { "epoch": 0.2151829601310759, "grad_norm": 5.5290751457214355, "learning_rate": 9.127631770679699e-06, "loss": 0.9384, "step": 1576 }, { "epoch": 0.2153194975423266, "grad_norm": 7.73374605178833, "learning_rate": 9.126383478281675e-06, "loss": 1.1292, "step": 1577 }, { "epoch": 0.2154560349535773, "grad_norm": 11.546578407287598, "learning_rate": 9.12513437890245e-06, "loss": 0.936, "step": 1578 }, { "epoch": 0.21559257236482796, "grad_norm": 6.714505672454834, "learning_rate": 9.123884472786302e-06, "loss": 1.0597, "step": 1579 }, { "epoch": 0.21572910977607865, "grad_norm": 10.051525115966797, "learning_rate": 9.122633760177674e-06, "loss": 1.0014, "step": 1580 }, { "epoch": 0.21586564718732934, "grad_norm": 6.2739577293396, "learning_rate": 9.12138224132116e-06, "loss": 1.0549, "step": 1581 }, { "epoch": 0.21600218459858, "grad_norm": 11.786907196044922, "learning_rate": 9.120129916461518e-06, "loss": 0.9053, "step": 1582 }, { "epoch": 0.2161387220098307, "grad_norm": 7.275577068328857, "learning_rate": 9.118876785843656e-06, "loss": 0.9478, "step": 1583 }, { "epoch": 0.2162752594210814, "grad_norm": 10.04375171661377, "learning_rate": 9.117622849712649e-06, "loss": 1.0119, "step": 1584 }, { "epoch": 0.21641179683233205, "grad_norm": 8.160940170288086, "learning_rate": 9.116368108313722e-06, "loss": 1.0717, "step": 1585 }, { "epoch": 0.21654833424358275, "grad_norm": 10.877127647399902, "learning_rate": 9.11511256189226e-06, "loss": 0.8074, "step": 1586 }, { "epoch": 0.2166848716548334, "grad_norm": 7.368432998657227, "learning_rate": 9.113856210693805e-06, "loss": 0.9368, "step": 1587 }, { "epoch": 0.2168214090660841, "grad_norm": 5.281885147094727, "learning_rate": 9.112599054964058e-06, "loss": 0.952, "step": 1588 }, { "epoch": 0.2169579464773348, "grad_norm": 6.834371566772461, "learning_rate": 9.111341094948876e-06, "loss": 1.2004, "step": 1589 }, { "epoch": 0.21709448388858546, "grad_norm": 7.927865505218506, "learning_rate": 9.110082330894275e-06, "loss": 1.0165, "step": 1590 }, { "epoch": 0.21723102129983615, "grad_norm": 8.961710929870605, "learning_rate": 9.10882276304642e-06, "loss": 0.9915, "step": 1591 }, { "epoch": 0.21736755871108684, "grad_norm": 16.149538040161133, "learning_rate": 9.10756239165165e-06, "loss": 0.976, "step": 1592 }, { "epoch": 0.2175040961223375, "grad_norm": 7.968392848968506, "learning_rate": 9.106301216956443e-06, "loss": 1.1236, "step": 1593 }, { "epoch": 0.2176406335335882, "grad_norm": 5.4191155433654785, "learning_rate": 9.105039239207447e-06, "loss": 0.8721, "step": 1594 }, { "epoch": 0.2177771709448389, "grad_norm": 6.901122093200684, "learning_rate": 9.103776458651458e-06, "loss": 1.0658, "step": 1595 }, { "epoch": 0.21791370835608956, "grad_norm": 7.895171165466309, "learning_rate": 9.102512875535439e-06, "loss": 0.8856, "step": 1596 }, { "epoch": 0.21805024576734025, "grad_norm": 7.080287456512451, "learning_rate": 9.101248490106497e-06, "loss": 1.0153, "step": 1597 }, { "epoch": 0.21818678317859094, "grad_norm": 10.519644737243652, "learning_rate": 9.09998330261191e-06, "loss": 1.014, "step": 1598 }, { "epoch": 0.2183233205898416, "grad_norm": 8.686219215393066, "learning_rate": 9.098717313299101e-06, "loss": 0.9768, "step": 1599 }, { "epoch": 0.2184598580010923, "grad_norm": 6.818769931793213, "learning_rate": 9.097450522415656e-06, "loss": 0.966, "step": 1600 }, { "epoch": 0.218596395412343, "grad_norm": 26.346506118774414, "learning_rate": 9.096182930209318e-06, "loss": 1.1338, "step": 1601 }, { "epoch": 0.21873293282359366, "grad_norm": 15.493473052978516, "learning_rate": 9.094914536927983e-06, "loss": 1.1301, "step": 1602 }, { "epoch": 0.21886947023484435, "grad_norm": 5.39259672164917, "learning_rate": 9.093645342819708e-06, "loss": 1.0907, "step": 1603 }, { "epoch": 0.21900600764609504, "grad_norm": 10.311751365661621, "learning_rate": 9.092375348132704e-06, "loss": 1.1147, "step": 1604 }, { "epoch": 0.2191425450573457, "grad_norm": 7.2263078689575195, "learning_rate": 9.091104553115338e-06, "loss": 1.0808, "step": 1605 }, { "epoch": 0.2192790824685964, "grad_norm": 47.87458801269531, "learning_rate": 9.089832958016135e-06, "loss": 1.163, "step": 1606 }, { "epoch": 0.2194156198798471, "grad_norm": 7.704683780670166, "learning_rate": 9.088560563083777e-06, "loss": 1.0165, "step": 1607 }, { "epoch": 0.21955215729109775, "grad_norm": 11.272551536560059, "learning_rate": 9.0872873685671e-06, "loss": 0.9903, "step": 1608 }, { "epoch": 0.21968869470234845, "grad_norm": 6.1117682456970215, "learning_rate": 9.086013374715099e-06, "loss": 0.8451, "step": 1609 }, { "epoch": 0.21982523211359914, "grad_norm": 7.351281642913818, "learning_rate": 9.084738581776921e-06, "loss": 1.0154, "step": 1610 }, { "epoch": 0.2199617695248498, "grad_norm": 13.159271240234375, "learning_rate": 9.08346299000188e-06, "loss": 1.0387, "step": 1611 }, { "epoch": 0.2200983069361005, "grad_norm": 9.023297309875488, "learning_rate": 9.082186599639429e-06, "loss": 1.0501, "step": 1612 }, { "epoch": 0.22023484434735116, "grad_norm": 6.486201286315918, "learning_rate": 9.080909410939194e-06, "loss": 0.8716, "step": 1613 }, { "epoch": 0.22037138175860185, "grad_norm": 8.288872718811035, "learning_rate": 9.079631424150947e-06, "loss": 1.0047, "step": 1614 }, { "epoch": 0.22050791916985255, "grad_norm": 7.404196262359619, "learning_rate": 9.078352639524618e-06, "loss": 1.0579, "step": 1615 }, { "epoch": 0.2206444565811032, "grad_norm": 7.5792717933654785, "learning_rate": 9.0770730573103e-06, "loss": 1.0801, "step": 1616 }, { "epoch": 0.2207809939923539, "grad_norm": 9.174310684204102, "learning_rate": 9.07579267775823e-06, "loss": 0.8025, "step": 1617 }, { "epoch": 0.2209175314036046, "grad_norm": 7.256665229797363, "learning_rate": 9.074511501118806e-06, "loss": 1.1911, "step": 1618 }, { "epoch": 0.22105406881485526, "grad_norm": 8.251877784729004, "learning_rate": 9.073229527642587e-06, "loss": 1.0498, "step": 1619 }, { "epoch": 0.22119060622610595, "grad_norm": 7.019141674041748, "learning_rate": 9.071946757580282e-06, "loss": 1.0628, "step": 1620 }, { "epoch": 0.22132714363735664, "grad_norm": 6.047471046447754, "learning_rate": 9.070663191182758e-06, "loss": 0.9471, "step": 1621 }, { "epoch": 0.2214636810486073, "grad_norm": 7.771193504333496, "learning_rate": 9.069378828701038e-06, "loss": 1.0879, "step": 1622 }, { "epoch": 0.221600218459858, "grad_norm": 6.235985279083252, "learning_rate": 9.068093670386298e-06, "loss": 1.1496, "step": 1623 }, { "epoch": 0.2217367558711087, "grad_norm": 5.496775150299072, "learning_rate": 9.066807716489871e-06, "loss": 0.9706, "step": 1624 }, { "epoch": 0.22187329328235936, "grad_norm": 11.24832534790039, "learning_rate": 9.065520967263251e-06, "loss": 1.0593, "step": 1625 }, { "epoch": 0.22200983069361005, "grad_norm": 6.42877197265625, "learning_rate": 9.064233422958078e-06, "loss": 1.0543, "step": 1626 }, { "epoch": 0.22214636810486074, "grad_norm": 7.003305912017822, "learning_rate": 9.06294508382615e-06, "loss": 1.0441, "step": 1627 }, { "epoch": 0.2222829055161114, "grad_norm": 12.25634765625, "learning_rate": 9.06165595011943e-06, "loss": 1.0354, "step": 1628 }, { "epoch": 0.2224194429273621, "grad_norm": 6.803445339202881, "learning_rate": 9.060366022090024e-06, "loss": 0.9752, "step": 1629 }, { "epoch": 0.2225559803386128, "grad_norm": 9.007088661193848, "learning_rate": 9.059075299990199e-06, "loss": 1.1067, "step": 1630 }, { "epoch": 0.22269251774986346, "grad_norm": 7.895106315612793, "learning_rate": 9.057783784072377e-06, "loss": 0.9957, "step": 1631 }, { "epoch": 0.22282905516111415, "grad_norm": 11.767343521118164, "learning_rate": 9.056491474589136e-06, "loss": 1.0328, "step": 1632 }, { "epoch": 0.22296559257236484, "grad_norm": 7.004368782043457, "learning_rate": 9.055198371793205e-06, "loss": 0.8625, "step": 1633 }, { "epoch": 0.2231021299836155, "grad_norm": 10.698566436767578, "learning_rate": 9.053904475937473e-06, "loss": 0.9352, "step": 1634 }, { "epoch": 0.2232386673948662, "grad_norm": 7.430522918701172, "learning_rate": 9.052609787274985e-06, "loss": 1.0152, "step": 1635 }, { "epoch": 0.2233752048061169, "grad_norm": 6.075687408447266, "learning_rate": 9.051314306058934e-06, "loss": 1.1253, "step": 1636 }, { "epoch": 0.22351174221736755, "grad_norm": 6.998076915740967, "learning_rate": 9.050018032542676e-06, "loss": 1.0915, "step": 1637 }, { "epoch": 0.22364827962861825, "grad_norm": 8.336559295654297, "learning_rate": 9.048720966979714e-06, "loss": 1.0074, "step": 1638 }, { "epoch": 0.2237848170398689, "grad_norm": 6.099851131439209, "learning_rate": 9.047423109623714e-06, "loss": 0.829, "step": 1639 }, { "epoch": 0.2239213544511196, "grad_norm": 5.7710700035095215, "learning_rate": 9.046124460728491e-06, "loss": 1.0943, "step": 1640 }, { "epoch": 0.2240578918623703, "grad_norm": 7.263605117797852, "learning_rate": 9.04482502054802e-06, "loss": 1.0574, "step": 1641 }, { "epoch": 0.22419442927362096, "grad_norm": 7.108891487121582, "learning_rate": 9.043524789336424e-06, "loss": 1.0363, "step": 1642 }, { "epoch": 0.22433096668487165, "grad_norm": 8.487008094787598, "learning_rate": 9.042223767347985e-06, "loss": 0.9805, "step": 1643 }, { "epoch": 0.22446750409612234, "grad_norm": 6.939018726348877, "learning_rate": 9.040921954837139e-06, "loss": 0.9905, "step": 1644 }, { "epoch": 0.224604041507373, "grad_norm": 8.003236770629883, "learning_rate": 9.039619352058478e-06, "loss": 1.0911, "step": 1645 }, { "epoch": 0.2247405789186237, "grad_norm": 8.253698348999023, "learning_rate": 9.038315959266745e-06, "loss": 0.9203, "step": 1646 }, { "epoch": 0.2248771163298744, "grad_norm": 13.218046188354492, "learning_rate": 9.037011776716841e-06, "loss": 0.9914, "step": 1647 }, { "epoch": 0.22501365374112506, "grad_norm": 9.388579368591309, "learning_rate": 9.035706804663819e-06, "loss": 1.034, "step": 1648 }, { "epoch": 0.22515019115237575, "grad_norm": 12.666531562805176, "learning_rate": 9.03440104336289e-06, "loss": 1.0885, "step": 1649 }, { "epoch": 0.22528672856362644, "grad_norm": 11.230344772338867, "learning_rate": 9.033094493069414e-06, "loss": 1.1202, "step": 1650 }, { "epoch": 0.2254232659748771, "grad_norm": 8.372713088989258, "learning_rate": 9.03178715403891e-06, "loss": 0.951, "step": 1651 }, { "epoch": 0.2255598033861278, "grad_norm": 9.002799987792969, "learning_rate": 9.030479026527048e-06, "loss": 1.2028, "step": 1652 }, { "epoch": 0.2256963407973785, "grad_norm": 17.056591033935547, "learning_rate": 9.029170110789654e-06, "loss": 0.9591, "step": 1653 }, { "epoch": 0.22583287820862916, "grad_norm": 23.125703811645508, "learning_rate": 9.027860407082707e-06, "loss": 1.0512, "step": 1654 }, { "epoch": 0.22596941561987985, "grad_norm": 6.951831817626953, "learning_rate": 9.026549915662341e-06, "loss": 1.0518, "step": 1655 }, { "epoch": 0.22610595303113054, "grad_norm": 6.931041717529297, "learning_rate": 9.025238636784847e-06, "loss": 0.9361, "step": 1656 }, { "epoch": 0.2262424904423812, "grad_norm": 7.445444583892822, "learning_rate": 9.023926570706662e-06, "loss": 0.9883, "step": 1657 }, { "epoch": 0.2263790278536319, "grad_norm": 5.717973709106445, "learning_rate": 9.022613717684385e-06, "loss": 1.0165, "step": 1658 }, { "epoch": 0.2265155652648826, "grad_norm": 7.874797344207764, "learning_rate": 9.021300077974764e-06, "loss": 1.0487, "step": 1659 }, { "epoch": 0.22665210267613325, "grad_norm": 7.993814945220947, "learning_rate": 9.019985651834703e-06, "loss": 1.0122, "step": 1660 }, { "epoch": 0.22678864008738395, "grad_norm": 15.317946434020996, "learning_rate": 9.018670439521261e-06, "loss": 0.8658, "step": 1661 }, { "epoch": 0.22692517749863464, "grad_norm": 6.906681060791016, "learning_rate": 9.017354441291647e-06, "loss": 0.9727, "step": 1662 }, { "epoch": 0.2270617149098853, "grad_norm": 7.536348342895508, "learning_rate": 9.016037657403225e-06, "loss": 0.993, "step": 1663 }, { "epoch": 0.227198252321136, "grad_norm": 8.926419258117676, "learning_rate": 9.014720088113517e-06, "loss": 1.0011, "step": 1664 }, { "epoch": 0.22733478973238666, "grad_norm": 7.269813060760498, "learning_rate": 9.013401733680191e-06, "loss": 0.9856, "step": 1665 }, { "epoch": 0.22747132714363735, "grad_norm": 7.723622798919678, "learning_rate": 9.012082594361076e-06, "loss": 1.1082, "step": 1666 }, { "epoch": 0.22760786455488805, "grad_norm": 9.908987998962402, "learning_rate": 9.01076267041415e-06, "loss": 1.0046, "step": 1667 }, { "epoch": 0.2277444019661387, "grad_norm": 6.036856174468994, "learning_rate": 9.009441962097543e-06, "loss": 0.9795, "step": 1668 }, { "epoch": 0.2278809393773894, "grad_norm": 8.8262300491333, "learning_rate": 9.008120469669545e-06, "loss": 1.0358, "step": 1669 }, { "epoch": 0.2280174767886401, "grad_norm": 15.659740447998047, "learning_rate": 9.006798193388592e-06, "loss": 0.9586, "step": 1670 }, { "epoch": 0.22815401419989076, "grad_norm": 7.036919593811035, "learning_rate": 9.005475133513279e-06, "loss": 1.0473, "step": 1671 }, { "epoch": 0.22829055161114145, "grad_norm": 8.227021217346191, "learning_rate": 9.00415129030235e-06, "loss": 0.9071, "step": 1672 }, { "epoch": 0.22842708902239214, "grad_norm": 6.470198631286621, "learning_rate": 9.002826664014705e-06, "loss": 0.9657, "step": 1673 }, { "epoch": 0.2285636264336428, "grad_norm": 7.749699592590332, "learning_rate": 9.001501254909396e-06, "loss": 0.7357, "step": 1674 }, { "epoch": 0.2287001638448935, "grad_norm": 6.748370170593262, "learning_rate": 9.000175063245628e-06, "loss": 1.0974, "step": 1675 }, { "epoch": 0.2288367012561442, "grad_norm": 6.436516761779785, "learning_rate": 8.99884808928276e-06, "loss": 1.1927, "step": 1676 }, { "epoch": 0.22897323866739486, "grad_norm": 9.006427764892578, "learning_rate": 8.997520333280303e-06, "loss": 0.8563, "step": 1677 }, { "epoch": 0.22910977607864555, "grad_norm": 5.859609603881836, "learning_rate": 8.99619179549792e-06, "loss": 1.0386, "step": 1678 }, { "epoch": 0.22924631348989624, "grad_norm": 5.102894306182861, "learning_rate": 8.99486247619543e-06, "loss": 0.8305, "step": 1679 }, { "epoch": 0.2293828509011469, "grad_norm": 8.840161323547363, "learning_rate": 8.9935323756328e-06, "loss": 1.0582, "step": 1680 }, { "epoch": 0.2295193883123976, "grad_norm": 7.252627849578857, "learning_rate": 8.992201494070157e-06, "loss": 0.8858, "step": 1681 }, { "epoch": 0.2296559257236483, "grad_norm": 5.631767749786377, "learning_rate": 8.990869831767773e-06, "loss": 1.1049, "step": 1682 }, { "epoch": 0.22979246313489896, "grad_norm": 7.249933242797852, "learning_rate": 8.989537388986077e-06, "loss": 1.0772, "step": 1683 }, { "epoch": 0.22992900054614965, "grad_norm": 6.183664798736572, "learning_rate": 8.98820416598565e-06, "loss": 1.0275, "step": 1684 }, { "epoch": 0.23006553795740034, "grad_norm": 5.655548572540283, "learning_rate": 8.986870163027225e-06, "loss": 1.0367, "step": 1685 }, { "epoch": 0.230202075368651, "grad_norm": 7.799781799316406, "learning_rate": 8.98553538037169e-06, "loss": 0.8639, "step": 1686 }, { "epoch": 0.2303386127799017, "grad_norm": 7.0912275314331055, "learning_rate": 8.984199818280082e-06, "loss": 1.01, "step": 1687 }, { "epoch": 0.2304751501911524, "grad_norm": 10.236945152282715, "learning_rate": 8.982863477013591e-06, "loss": 0.8918, "step": 1688 }, { "epoch": 0.23061168760240305, "grad_norm": 7.482415199279785, "learning_rate": 8.981526356833559e-06, "loss": 0.9109, "step": 1689 }, { "epoch": 0.23074822501365375, "grad_norm": 8.914116859436035, "learning_rate": 8.980188458001487e-06, "loss": 0.8846, "step": 1690 }, { "epoch": 0.2308847624249044, "grad_norm": 6.592989444732666, "learning_rate": 8.978849780779015e-06, "loss": 1.0931, "step": 1691 }, { "epoch": 0.2310212998361551, "grad_norm": 10.229009628295898, "learning_rate": 8.97751032542795e-06, "loss": 1.1002, "step": 1692 }, { "epoch": 0.2311578372474058, "grad_norm": 5.90974760055542, "learning_rate": 8.976170092210241e-06, "loss": 0.9947, "step": 1693 }, { "epoch": 0.23129437465865646, "grad_norm": 7.570229530334473, "learning_rate": 8.974829081387993e-06, "loss": 1.0132, "step": 1694 }, { "epoch": 0.23143091206990715, "grad_norm": 6.80957555770874, "learning_rate": 8.973487293223461e-06, "loss": 1.173, "step": 1695 }, { "epoch": 0.23156744948115784, "grad_norm": 7.6865925788879395, "learning_rate": 8.972144727979057e-06, "loss": 1.1176, "step": 1696 }, { "epoch": 0.2317039868924085, "grad_norm": 7.284778594970703, "learning_rate": 8.970801385917337e-06, "loss": 1.0154, "step": 1697 }, { "epoch": 0.2318405243036592, "grad_norm": 5.853013038635254, "learning_rate": 8.969457267301016e-06, "loss": 0.9189, "step": 1698 }, { "epoch": 0.2319770617149099, "grad_norm": 7.266374111175537, "learning_rate": 8.968112372392956e-06, "loss": 1.0518, "step": 1699 }, { "epoch": 0.23211359912616056, "grad_norm": 6.766361236572266, "learning_rate": 8.966766701456177e-06, "loss": 0.9998, "step": 1700 }, { "epoch": 0.23225013653741125, "grad_norm": 5.472480773925781, "learning_rate": 8.965420254753843e-06, "loss": 0.9128, "step": 1701 }, { "epoch": 0.23238667394866194, "grad_norm": 6.403226852416992, "learning_rate": 8.964073032549274e-06, "loss": 1.0849, "step": 1702 }, { "epoch": 0.2325232113599126, "grad_norm": 11.68002986907959, "learning_rate": 8.962725035105944e-06, "loss": 1.1052, "step": 1703 }, { "epoch": 0.2326597487711633, "grad_norm": 6.214273929595947, "learning_rate": 8.961376262687473e-06, "loss": 1.0709, "step": 1704 }, { "epoch": 0.232796286182414, "grad_norm": 7.287885665893555, "learning_rate": 8.960026715557635e-06, "loss": 0.9255, "step": 1705 }, { "epoch": 0.23293282359366466, "grad_norm": 6.68980598449707, "learning_rate": 8.958676393980357e-06, "loss": 1.0271, "step": 1706 }, { "epoch": 0.23306936100491535, "grad_norm": 6.272366046905518, "learning_rate": 8.957325298219716e-06, "loss": 0.8523, "step": 1707 }, { "epoch": 0.23320589841616604, "grad_norm": 6.865006446838379, "learning_rate": 8.955973428539943e-06, "loss": 0.9402, "step": 1708 }, { "epoch": 0.2333424358274167, "grad_norm": 7.36637020111084, "learning_rate": 8.954620785205414e-06, "loss": 0.958, "step": 1709 }, { "epoch": 0.2334789732386674, "grad_norm": 7.7383503913879395, "learning_rate": 8.953267368480663e-06, "loss": 0.9858, "step": 1710 }, { "epoch": 0.2336155106499181, "grad_norm": 5.414590358734131, "learning_rate": 8.951913178630372e-06, "loss": 1.0955, "step": 1711 }, { "epoch": 0.23375204806116875, "grad_norm": 6.915541648864746, "learning_rate": 8.950558215919373e-06, "loss": 0.9899, "step": 1712 }, { "epoch": 0.23388858547241945, "grad_norm": 10.025419235229492, "learning_rate": 8.949202480612653e-06, "loss": 1.0569, "step": 1713 }, { "epoch": 0.23402512288367014, "grad_norm": 7.543872356414795, "learning_rate": 8.947845972975348e-06, "loss": 1.0577, "step": 1714 }, { "epoch": 0.2341616602949208, "grad_norm": 8.394279479980469, "learning_rate": 8.946488693272746e-06, "loss": 0.9569, "step": 1715 }, { "epoch": 0.2342981977061715, "grad_norm": 7.1770172119140625, "learning_rate": 8.945130641770281e-06, "loss": 1.0822, "step": 1716 }, { "epoch": 0.23443473511742216, "grad_norm": 6.906083106994629, "learning_rate": 8.943771818733547e-06, "loss": 1.0669, "step": 1717 }, { "epoch": 0.23457127252867285, "grad_norm": 6.500915050506592, "learning_rate": 8.942412224428281e-06, "loss": 1.015, "step": 1718 }, { "epoch": 0.23470780993992355, "grad_norm": 6.413971424102783, "learning_rate": 8.941051859120372e-06, "loss": 0.9257, "step": 1719 }, { "epoch": 0.2348443473511742, "grad_norm": 5.3994364738464355, "learning_rate": 8.939690723075865e-06, "loss": 1.0677, "step": 1720 }, { "epoch": 0.2349808847624249, "grad_norm": 5.5543107986450195, "learning_rate": 8.938328816560953e-06, "loss": 1.0348, "step": 1721 }, { "epoch": 0.2351174221736756, "grad_norm": 8.568669319152832, "learning_rate": 8.936966139841973e-06, "loss": 1.0773, "step": 1722 }, { "epoch": 0.23525395958492626, "grad_norm": 9.866353034973145, "learning_rate": 8.935602693185425e-06, "loss": 1.0423, "step": 1723 }, { "epoch": 0.23539049699617695, "grad_norm": 6.118907928466797, "learning_rate": 8.93423847685795e-06, "loss": 1.2117, "step": 1724 }, { "epoch": 0.23552703440742764, "grad_norm": 6.421145915985107, "learning_rate": 8.932873491126344e-06, "loss": 1.0954, "step": 1725 }, { "epoch": 0.2356635718186783, "grad_norm": 6.46424674987793, "learning_rate": 8.93150773625755e-06, "loss": 1.0848, "step": 1726 }, { "epoch": 0.235800109229929, "grad_norm": 10.111395835876465, "learning_rate": 8.930141212518666e-06, "loss": 0.959, "step": 1727 }, { "epoch": 0.2359366466411797, "grad_norm": 6.9195380210876465, "learning_rate": 8.928773920176935e-06, "loss": 0.9334, "step": 1728 }, { "epoch": 0.23607318405243036, "grad_norm": 6.3333210945129395, "learning_rate": 8.927405859499757e-06, "loss": 0.9962, "step": 1729 }, { "epoch": 0.23620972146368105, "grad_norm": 5.678621292114258, "learning_rate": 8.926037030754672e-06, "loss": 0.8589, "step": 1730 }, { "epoch": 0.23634625887493174, "grad_norm": 9.840557098388672, "learning_rate": 8.924667434209385e-06, "loss": 1.1626, "step": 1731 }, { "epoch": 0.2364827962861824, "grad_norm": 8.783963203430176, "learning_rate": 8.923297070131738e-06, "loss": 1.0604, "step": 1732 }, { "epoch": 0.2366193336974331, "grad_norm": 11.604878425598145, "learning_rate": 8.921925938789728e-06, "loss": 1.1008, "step": 1733 }, { "epoch": 0.2367558711086838, "grad_norm": 9.358281135559082, "learning_rate": 8.920554040451502e-06, "loss": 1.082, "step": 1734 }, { "epoch": 0.23689240851993446, "grad_norm": 8.140381813049316, "learning_rate": 8.919181375385357e-06, "loss": 0.9425, "step": 1735 }, { "epoch": 0.23702894593118515, "grad_norm": 16.919265747070312, "learning_rate": 8.917807943859742e-06, "loss": 1.0339, "step": 1736 }, { "epoch": 0.23716548334243584, "grad_norm": 8.592205047607422, "learning_rate": 8.916433746143253e-06, "loss": 0.899, "step": 1737 }, { "epoch": 0.2373020207536865, "grad_norm": 10.333584785461426, "learning_rate": 8.915058782504634e-06, "loss": 1.0436, "step": 1738 }, { "epoch": 0.2374385581649372, "grad_norm": 5.671545505523682, "learning_rate": 8.913683053212784e-06, "loss": 1.145, "step": 1739 }, { "epoch": 0.2375750955761879, "grad_norm": 17.813339233398438, "learning_rate": 8.91230655853675e-06, "loss": 1.0053, "step": 1740 }, { "epoch": 0.23771163298743855, "grad_norm": 9.099871635437012, "learning_rate": 8.910929298745726e-06, "loss": 1.0585, "step": 1741 }, { "epoch": 0.23784817039868925, "grad_norm": 9.107704162597656, "learning_rate": 8.909551274109057e-06, "loss": 1.1116, "step": 1742 }, { "epoch": 0.2379847078099399, "grad_norm": 7.830885887145996, "learning_rate": 8.908172484896237e-06, "loss": 1.0671, "step": 1743 }, { "epoch": 0.2381212452211906, "grad_norm": 7.934291362762451, "learning_rate": 8.906792931376916e-06, "loss": 0.9325, "step": 1744 }, { "epoch": 0.2382577826324413, "grad_norm": 7.606125354766846, "learning_rate": 8.905412613820881e-06, "loss": 0.9524, "step": 1745 }, { "epoch": 0.23839432004369196, "grad_norm": 5.424095153808594, "learning_rate": 8.904031532498082e-06, "loss": 0.8542, "step": 1746 }, { "epoch": 0.23853085745494265, "grad_norm": 6.526294231414795, "learning_rate": 8.902649687678609e-06, "loss": 0.8999, "step": 1747 }, { "epoch": 0.23866739486619334, "grad_norm": 8.047200202941895, "learning_rate": 8.901267079632703e-06, "loss": 0.9828, "step": 1748 }, { "epoch": 0.238803932277444, "grad_norm": 5.571774482727051, "learning_rate": 8.899883708630757e-06, "loss": 1.0584, "step": 1749 }, { "epoch": 0.2389404696886947, "grad_norm": 6.76044225692749, "learning_rate": 8.89849957494331e-06, "loss": 0.9143, "step": 1750 }, { "epoch": 0.2390770070999454, "grad_norm": 6.158073902130127, "learning_rate": 8.897114678841054e-06, "loss": 0.9946, "step": 1751 }, { "epoch": 0.23921354451119606, "grad_norm": 6.436885356903076, "learning_rate": 8.895729020594826e-06, "loss": 1.018, "step": 1752 }, { "epoch": 0.23935008192244675, "grad_norm": 6.751821994781494, "learning_rate": 8.894342600475616e-06, "loss": 0.949, "step": 1753 }, { "epoch": 0.23948661933369744, "grad_norm": 6.03473424911499, "learning_rate": 8.892955418754559e-06, "loss": 0.9979, "step": 1754 }, { "epoch": 0.2396231567449481, "grad_norm": 6.614584922790527, "learning_rate": 8.891567475702942e-06, "loss": 0.9229, "step": 1755 }, { "epoch": 0.2397596941561988, "grad_norm": 8.440226554870605, "learning_rate": 8.890178771592198e-06, "loss": 1.0168, "step": 1756 }, { "epoch": 0.2398962315674495, "grad_norm": 9.392389297485352, "learning_rate": 8.888789306693913e-06, "loss": 1.0402, "step": 1757 }, { "epoch": 0.24003276897870016, "grad_norm": 8.017632484436035, "learning_rate": 8.887399081279818e-06, "loss": 0.9741, "step": 1758 }, { "epoch": 0.24016930638995085, "grad_norm": 10.281259536743164, "learning_rate": 8.886008095621795e-06, "loss": 1.0716, "step": 1759 }, { "epoch": 0.24030584380120154, "grad_norm": 7.825941562652588, "learning_rate": 8.884616349991874e-06, "loss": 0.9682, "step": 1760 }, { "epoch": 0.2404423812124522, "grad_norm": 6.6843366622924805, "learning_rate": 8.88322384466223e-06, "loss": 1.0098, "step": 1761 }, { "epoch": 0.2405789186237029, "grad_norm": 6.3463850021362305, "learning_rate": 8.881830579905196e-06, "loss": 0.9039, "step": 1762 }, { "epoch": 0.2407154560349536, "grad_norm": 7.674259662628174, "learning_rate": 8.880436555993242e-06, "loss": 0.8453, "step": 1763 }, { "epoch": 0.24085199344620425, "grad_norm": 8.375870704650879, "learning_rate": 8.879041773198996e-06, "loss": 0.9719, "step": 1764 }, { "epoch": 0.24098853085745495, "grad_norm": 6.672438621520996, "learning_rate": 8.877646231795227e-06, "loss": 1.0562, "step": 1765 }, { "epoch": 0.24112506826870564, "grad_norm": 7.4836530685424805, "learning_rate": 8.876249932054857e-06, "loss": 1.0739, "step": 1766 }, { "epoch": 0.2412616056799563, "grad_norm": 7.217228889465332, "learning_rate": 8.874852874250956e-06, "loss": 0.9328, "step": 1767 }, { "epoch": 0.241398143091207, "grad_norm": 10.065325736999512, "learning_rate": 8.873455058656741e-06, "loss": 1.0768, "step": 1768 }, { "epoch": 0.24153468050245766, "grad_norm": 5.60721492767334, "learning_rate": 8.872056485545578e-06, "loss": 1.2086, "step": 1769 }, { "epoch": 0.24167121791370835, "grad_norm": 5.513501167297363, "learning_rate": 8.870657155190978e-06, "loss": 1.0453, "step": 1770 }, { "epoch": 0.24180775532495905, "grad_norm": 7.092294216156006, "learning_rate": 8.869257067866606e-06, "loss": 1.0017, "step": 1771 }, { "epoch": 0.2419442927362097, "grad_norm": 5.751553058624268, "learning_rate": 8.86785622384627e-06, "loss": 1.1214, "step": 1772 }, { "epoch": 0.2420808301474604, "grad_norm": 27.531108856201172, "learning_rate": 8.866454623403928e-06, "loss": 1.058, "step": 1773 }, { "epoch": 0.2422173675587111, "grad_norm": 7.729981899261475, "learning_rate": 8.865052266813686e-06, "loss": 0.9522, "step": 1774 }, { "epoch": 0.24235390496996176, "grad_norm": 7.785430431365967, "learning_rate": 8.863649154349796e-06, "loss": 1.0797, "step": 1775 }, { "epoch": 0.24249044238121245, "grad_norm": 8.650733947753906, "learning_rate": 8.862245286286665e-06, "loss": 1.0479, "step": 1776 }, { "epoch": 0.24262697979246314, "grad_norm": 10.149774551391602, "learning_rate": 8.860840662898833e-06, "loss": 1.0254, "step": 1777 }, { "epoch": 0.2427635172037138, "grad_norm": 6.922665119171143, "learning_rate": 8.859435284461004e-06, "loss": 1.0801, "step": 1778 }, { "epoch": 0.2429000546149645, "grad_norm": 8.229588508605957, "learning_rate": 8.85802915124802e-06, "loss": 0.9331, "step": 1779 }, { "epoch": 0.2430365920262152, "grad_norm": 11.465883255004883, "learning_rate": 8.856622263534875e-06, "loss": 1.0534, "step": 1780 }, { "epoch": 0.24317312943746586, "grad_norm": 5.634054660797119, "learning_rate": 8.855214621596707e-06, "loss": 0.9695, "step": 1781 }, { "epoch": 0.24330966684871655, "grad_norm": 5.811212539672852, "learning_rate": 8.853806225708801e-06, "loss": 1.0278, "step": 1782 }, { "epoch": 0.24344620425996724, "grad_norm": 5.748698711395264, "learning_rate": 8.852397076146597e-06, "loss": 0.899, "step": 1783 }, { "epoch": 0.2435827416712179, "grad_norm": 7.750949859619141, "learning_rate": 8.850987173185673e-06, "loss": 0.8375, "step": 1784 }, { "epoch": 0.2437192790824686, "grad_norm": 5.413559913635254, "learning_rate": 8.84957651710176e-06, "loss": 1.2312, "step": 1785 }, { "epoch": 0.2438558164937193, "grad_norm": 6.607126712799072, "learning_rate": 8.848165108170733e-06, "loss": 0.9919, "step": 1786 }, { "epoch": 0.24399235390496996, "grad_norm": 7.609501838684082, "learning_rate": 8.846752946668618e-06, "loss": 0.994, "step": 1787 }, { "epoch": 0.24412889131622065, "grad_norm": 5.773645877838135, "learning_rate": 8.845340032871584e-06, "loss": 1.0931, "step": 1788 }, { "epoch": 0.24426542872747134, "grad_norm": 6.788311958312988, "learning_rate": 8.843926367055952e-06, "loss": 0.9938, "step": 1789 }, { "epoch": 0.244401966138722, "grad_norm": 8.456381797790527, "learning_rate": 8.842511949498185e-06, "loss": 1.0153, "step": 1790 }, { "epoch": 0.2445385035499727, "grad_norm": 8.759236335754395, "learning_rate": 8.841096780474895e-06, "loss": 1.0952, "step": 1791 }, { "epoch": 0.2446750409612234, "grad_norm": 5.88118839263916, "learning_rate": 8.839680860262844e-06, "loss": 1.0166, "step": 1792 }, { "epoch": 0.24481157837247405, "grad_norm": 7.158264636993408, "learning_rate": 8.838264189138936e-06, "loss": 0.9746, "step": 1793 }, { "epoch": 0.24494811578372475, "grad_norm": 6.6205220222473145, "learning_rate": 8.836846767380224e-06, "loss": 1.0194, "step": 1794 }, { "epoch": 0.2450846531949754, "grad_norm": 7.4593505859375, "learning_rate": 8.83542859526391e-06, "loss": 1.0379, "step": 1795 }, { "epoch": 0.2452211906062261, "grad_norm": 10.549972534179688, "learning_rate": 8.834009673067337e-06, "loss": 1.075, "step": 1796 }, { "epoch": 0.2453577280174768, "grad_norm": 7.379745960235596, "learning_rate": 8.832590001068e-06, "loss": 1.0938, "step": 1797 }, { "epoch": 0.24549426542872746, "grad_norm": 9.712757110595703, "learning_rate": 8.831169579543539e-06, "loss": 0.9762, "step": 1798 }, { "epoch": 0.24563080283997815, "grad_norm": 8.048026084899902, "learning_rate": 8.829748408771742e-06, "loss": 1.0595, "step": 1799 }, { "epoch": 0.24576734025122884, "grad_norm": 7.540980339050293, "learning_rate": 8.828326489030538e-06, "loss": 0.9766, "step": 1800 }, { "epoch": 0.2459038776624795, "grad_norm": 8.200517654418945, "learning_rate": 8.82690382059801e-06, "loss": 1.0264, "step": 1801 }, { "epoch": 0.2460404150737302, "grad_norm": 7.815308094024658, "learning_rate": 8.825480403752382e-06, "loss": 1.1712, "step": 1802 }, { "epoch": 0.2461769524849809, "grad_norm": 6.728014945983887, "learning_rate": 8.824056238772027e-06, "loss": 1.0716, "step": 1803 }, { "epoch": 0.24631348989623156, "grad_norm": 6.311887741088867, "learning_rate": 8.822631325935463e-06, "loss": 0.8698, "step": 1804 }, { "epoch": 0.24645002730748225, "grad_norm": 5.680947780609131, "learning_rate": 8.821205665521357e-06, "loss": 1.0937, "step": 1805 }, { "epoch": 0.24658656471873294, "grad_norm": 7.44262170791626, "learning_rate": 8.819779257808517e-06, "loss": 1.0262, "step": 1806 }, { "epoch": 0.2467231021299836, "grad_norm": 10.825685501098633, "learning_rate": 8.818352103075902e-06, "loss": 1.204, "step": 1807 }, { "epoch": 0.2468596395412343, "grad_norm": 9.88280200958252, "learning_rate": 8.816924201602614e-06, "loss": 1.1326, "step": 1808 }, { "epoch": 0.246996176952485, "grad_norm": 12.65449333190918, "learning_rate": 8.815495553667904e-06, "loss": 0.9782, "step": 1809 }, { "epoch": 0.24713271436373566, "grad_norm": 9.49329948425293, "learning_rate": 8.814066159551166e-06, "loss": 1.0261, "step": 1810 }, { "epoch": 0.24726925177498635, "grad_norm": 8.882302284240723, "learning_rate": 8.812636019531942e-06, "loss": 0.9924, "step": 1811 }, { "epoch": 0.24740578918623704, "grad_norm": 8.893527030944824, "learning_rate": 8.811205133889917e-06, "loss": 0.9527, "step": 1812 }, { "epoch": 0.2475423265974877, "grad_norm": 5.4755377769470215, "learning_rate": 8.809773502904927e-06, "loss": 0.9095, "step": 1813 }, { "epoch": 0.2476788640087384, "grad_norm": 6.238205909729004, "learning_rate": 8.808341126856947e-06, "loss": 0.8951, "step": 1814 }, { "epoch": 0.2478154014199891, "grad_norm": 8.372099876403809, "learning_rate": 8.806908006026106e-06, "loss": 0.9775, "step": 1815 }, { "epoch": 0.24795193883123975, "grad_norm": 7.419745922088623, "learning_rate": 8.80547414069267e-06, "loss": 0.997, "step": 1816 }, { "epoch": 0.24808847624249045, "grad_norm": 12.494742393493652, "learning_rate": 8.804039531137057e-06, "loss": 1.0654, "step": 1817 }, { "epoch": 0.24822501365374114, "grad_norm": 8.39792251586914, "learning_rate": 8.802604177639826e-06, "loss": 0.9953, "step": 1818 }, { "epoch": 0.2483615510649918, "grad_norm": 6.332118034362793, "learning_rate": 8.801168080481686e-06, "loss": 0.9144, "step": 1819 }, { "epoch": 0.2484980884762425, "grad_norm": 6.842358112335205, "learning_rate": 8.799731239943488e-06, "loss": 1.0089, "step": 1820 }, { "epoch": 0.24863462588749316, "grad_norm": 7.544071197509766, "learning_rate": 8.79829365630623e-06, "loss": 1.0118, "step": 1821 }, { "epoch": 0.24877116329874385, "grad_norm": 10.972503662109375, "learning_rate": 8.796855329851053e-06, "loss": 1.0512, "step": 1822 }, { "epoch": 0.24890770070999454, "grad_norm": 6.498275279998779, "learning_rate": 8.795416260859247e-06, "loss": 1.1278, "step": 1823 }, { "epoch": 0.2490442381212452, "grad_norm": 7.671143054962158, "learning_rate": 8.793976449612244e-06, "loss": 1.2045, "step": 1824 }, { "epoch": 0.2491807755324959, "grad_norm": 8.01452922821045, "learning_rate": 8.792535896391622e-06, "loss": 1.0422, "step": 1825 }, { "epoch": 0.2493173129437466, "grad_norm": 8.506482124328613, "learning_rate": 8.791094601479109e-06, "loss": 1.0421, "step": 1826 }, { "epoch": 0.24945385035499726, "grad_norm": 8.619180679321289, "learning_rate": 8.789652565156568e-06, "loss": 1.0711, "step": 1827 }, { "epoch": 0.24959038776624795, "grad_norm": 7.4283223152160645, "learning_rate": 8.788209787706014e-06, "loss": 1.0601, "step": 1828 }, { "epoch": 0.24972692517749864, "grad_norm": 5.7286200523376465, "learning_rate": 8.786766269409607e-06, "loss": 1.105, "step": 1829 }, { "epoch": 0.2498634625887493, "grad_norm": 6.373295307159424, "learning_rate": 8.78532201054965e-06, "loss": 1.032, "step": 1830 }, { "epoch": 0.25, "grad_norm": 7.343088626861572, "learning_rate": 8.783877011408593e-06, "loss": 1.0674, "step": 1831 }, { "epoch": 0.2501365374112507, "grad_norm": 7.837392330169678, "learning_rate": 8.782431272269023e-06, "loss": 1.1073, "step": 1832 }, { "epoch": 0.2502730748225014, "grad_norm": 12.533609390258789, "learning_rate": 8.780984793413684e-06, "loss": 0.9305, "step": 1833 }, { "epoch": 0.250409612233752, "grad_norm": 7.0389933586120605, "learning_rate": 8.779537575125457e-06, "loss": 1.0158, "step": 1834 }, { "epoch": 0.2505461496450027, "grad_norm": 6.749795436859131, "learning_rate": 8.778089617687365e-06, "loss": 1.1896, "step": 1835 }, { "epoch": 0.2506826870562534, "grad_norm": 6.332571029663086, "learning_rate": 8.776640921382585e-06, "loss": 0.9892, "step": 1836 }, { "epoch": 0.2508192244675041, "grad_norm": 8.719735145568848, "learning_rate": 8.77519148649443e-06, "loss": 1.1732, "step": 1837 }, { "epoch": 0.2509557618787548, "grad_norm": 8.456766128540039, "learning_rate": 8.773741313306361e-06, "loss": 1.0804, "step": 1838 }, { "epoch": 0.2510922992900055, "grad_norm": 6.232743740081787, "learning_rate": 8.772290402101983e-06, "loss": 0.9417, "step": 1839 }, { "epoch": 0.2512288367012561, "grad_norm": 6.859513282775879, "learning_rate": 8.770838753165045e-06, "loss": 1.141, "step": 1840 }, { "epoch": 0.2513653741125068, "grad_norm": 8.437198638916016, "learning_rate": 8.769386366779442e-06, "loss": 1.1171, "step": 1841 }, { "epoch": 0.2515019115237575, "grad_norm": 7.334499835968018, "learning_rate": 8.76793324322921e-06, "loss": 1.19, "step": 1842 }, { "epoch": 0.2516384489350082, "grad_norm": 7.51288366317749, "learning_rate": 8.766479382798532e-06, "loss": 1.0563, "step": 1843 }, { "epoch": 0.2517749863462589, "grad_norm": 17.808197021484375, "learning_rate": 8.765024785771732e-06, "loss": 1.0371, "step": 1844 }, { "epoch": 0.2519115237575096, "grad_norm": 11.70899486541748, "learning_rate": 8.763569452433281e-06, "loss": 0.9878, "step": 1845 }, { "epoch": 0.2520480611687602, "grad_norm": 7.021222114562988, "learning_rate": 8.762113383067793e-06, "loss": 1.024, "step": 1846 }, { "epoch": 0.2521845985800109, "grad_norm": 5.804854869842529, "learning_rate": 8.76065657796003e-06, "loss": 1.0498, "step": 1847 }, { "epoch": 0.2523211359912616, "grad_norm": 6.851361274719238, "learning_rate": 8.759199037394888e-06, "loss": 1.0845, "step": 1848 }, { "epoch": 0.2524576734025123, "grad_norm": 7.831886291503906, "learning_rate": 8.757740761657414e-06, "loss": 0.9428, "step": 1849 }, { "epoch": 0.252594210813763, "grad_norm": 7.590631484985352, "learning_rate": 8.7562817510328e-06, "loss": 0.8424, "step": 1850 }, { "epoch": 0.2527307482250137, "grad_norm": 5.738604545593262, "learning_rate": 8.754822005806377e-06, "loss": 0.9089, "step": 1851 }, { "epoch": 0.2528672856362643, "grad_norm": 25.807355880737305, "learning_rate": 8.753361526263622e-06, "loss": 0.9851, "step": 1852 }, { "epoch": 0.253003823047515, "grad_norm": 8.303184509277344, "learning_rate": 8.751900312690158e-06, "loss": 0.9773, "step": 1853 }, { "epoch": 0.2531403604587657, "grad_norm": 5.453091621398926, "learning_rate": 8.750438365371745e-06, "loss": 0.9908, "step": 1854 }, { "epoch": 0.2532768978700164, "grad_norm": 10.478045463562012, "learning_rate": 8.748975684594292e-06, "loss": 1.0155, "step": 1855 }, { "epoch": 0.2534134352812671, "grad_norm": 6.902614116668701, "learning_rate": 8.74751227064385e-06, "loss": 0.9497, "step": 1856 }, { "epoch": 0.2535499726925177, "grad_norm": 6.41688346862793, "learning_rate": 8.746048123806617e-06, "loss": 1.0228, "step": 1857 }, { "epoch": 0.2536865101037684, "grad_norm": 10.322389602661133, "learning_rate": 8.744583244368923e-06, "loss": 0.9715, "step": 1858 }, { "epoch": 0.2538230475150191, "grad_norm": 8.085564613342285, "learning_rate": 8.743117632617256e-06, "loss": 1.0264, "step": 1859 }, { "epoch": 0.2539595849262698, "grad_norm": 7.114721775054932, "learning_rate": 8.741651288838237e-06, "loss": 0.8537, "step": 1860 }, { "epoch": 0.2540961223375205, "grad_norm": 6.562750339508057, "learning_rate": 8.740184213318635e-06, "loss": 1.0311, "step": 1861 }, { "epoch": 0.2542326597487712, "grad_norm": 7.134526252746582, "learning_rate": 8.738716406345356e-06, "loss": 1.143, "step": 1862 }, { "epoch": 0.2543691971600218, "grad_norm": 7.70328950881958, "learning_rate": 8.73724786820546e-06, "loss": 1.1344, "step": 1863 }, { "epoch": 0.2545057345712725, "grad_norm": 4.963126182556152, "learning_rate": 8.735778599186138e-06, "loss": 0.9406, "step": 1864 }, { "epoch": 0.2546422719825232, "grad_norm": 7.992384433746338, "learning_rate": 8.73430859957473e-06, "loss": 1.0632, "step": 1865 }, { "epoch": 0.2547788093937739, "grad_norm": 6.107133388519287, "learning_rate": 8.732837869658722e-06, "loss": 1.0505, "step": 1866 }, { "epoch": 0.2549153468050246, "grad_norm": 8.176409721374512, "learning_rate": 8.731366409725737e-06, "loss": 1.0954, "step": 1867 }, { "epoch": 0.2550518842162753, "grad_norm": 5.7823166847229, "learning_rate": 8.729894220063542e-06, "loss": 0.9673, "step": 1868 }, { "epoch": 0.2551884216275259, "grad_norm": 6.847221374511719, "learning_rate": 8.728421300960049e-06, "loss": 0.9925, "step": 1869 }, { "epoch": 0.2553249590387766, "grad_norm": 12.222972869873047, "learning_rate": 8.726947652703307e-06, "loss": 1.0007, "step": 1870 }, { "epoch": 0.2554614964500273, "grad_norm": 6.086280345916748, "learning_rate": 8.72547327558152e-06, "loss": 1.1295, "step": 1871 }, { "epoch": 0.255598033861278, "grad_norm": 16.678173065185547, "learning_rate": 8.72399816988302e-06, "loss": 0.9276, "step": 1872 }, { "epoch": 0.2557345712725287, "grad_norm": 8.54294490814209, "learning_rate": 8.722522335896287e-06, "loss": 1.1056, "step": 1873 }, { "epoch": 0.2558711086837794, "grad_norm": 7.181792736053467, "learning_rate": 8.721045773909948e-06, "loss": 1.1412, "step": 1874 }, { "epoch": 0.25600764609503, "grad_norm": 7.097261905670166, "learning_rate": 8.719568484212769e-06, "loss": 1.046, "step": 1875 }, { "epoch": 0.2561441835062807, "grad_norm": 6.674594879150391, "learning_rate": 8.718090467093654e-06, "loss": 1.0598, "step": 1876 }, { "epoch": 0.2562807209175314, "grad_norm": 7.108286380767822, "learning_rate": 8.716611722841656e-06, "loss": 0.872, "step": 1877 }, { "epoch": 0.2564172583287821, "grad_norm": 45.059776306152344, "learning_rate": 8.715132251745968e-06, "loss": 1.1588, "step": 1878 }, { "epoch": 0.2565537957400328, "grad_norm": 5.567859649658203, "learning_rate": 8.713652054095923e-06, "loss": 1.0268, "step": 1879 }, { "epoch": 0.2566903331512835, "grad_norm": 8.503434181213379, "learning_rate": 8.712171130180997e-06, "loss": 1.0626, "step": 1880 }, { "epoch": 0.2568268705625341, "grad_norm": 5.477012634277344, "learning_rate": 8.710689480290811e-06, "loss": 1.0852, "step": 1881 }, { "epoch": 0.2569634079737848, "grad_norm": 9.732892036437988, "learning_rate": 8.709207104715124e-06, "loss": 0.9657, "step": 1882 }, { "epoch": 0.2570999453850355, "grad_norm": 10.624225616455078, "learning_rate": 8.70772400374384e-06, "loss": 1.2364, "step": 1883 }, { "epoch": 0.2572364827962862, "grad_norm": 6.16495418548584, "learning_rate": 8.706240177667003e-06, "loss": 0.9896, "step": 1884 }, { "epoch": 0.2573730202075369, "grad_norm": 7.532153129577637, "learning_rate": 8.704755626774796e-06, "loss": 0.975, "step": 1885 }, { "epoch": 0.2575095576187875, "grad_norm": 16.078723907470703, "learning_rate": 8.703270351357552e-06, "loss": 1.024, "step": 1886 }, { "epoch": 0.2576460950300382, "grad_norm": 7.389825820922852, "learning_rate": 8.701784351705738e-06, "loss": 1.0425, "step": 1887 }, { "epoch": 0.2577826324412889, "grad_norm": 7.625925540924072, "learning_rate": 8.700297628109964e-06, "loss": 1.0149, "step": 1888 }, { "epoch": 0.2579191698525396, "grad_norm": 8.744148254394531, "learning_rate": 8.698810180860987e-06, "loss": 1.0708, "step": 1889 }, { "epoch": 0.2580557072637903, "grad_norm": 8.827646255493164, "learning_rate": 8.697322010249697e-06, "loss": 0.9196, "step": 1890 }, { "epoch": 0.258192244675041, "grad_norm": 9.626229286193848, "learning_rate": 8.695833116567134e-06, "loss": 1.2212, "step": 1891 }, { "epoch": 0.2583287820862916, "grad_norm": 6.693582057952881, "learning_rate": 8.694343500104474e-06, "loss": 1.1063, "step": 1892 }, { "epoch": 0.2584653194975423, "grad_norm": 8.634584426879883, "learning_rate": 8.692853161153033e-06, "loss": 0.9913, "step": 1893 }, { "epoch": 0.258601856908793, "grad_norm": 6.699746131896973, "learning_rate": 8.691362100004273e-06, "loss": 1.0048, "step": 1894 }, { "epoch": 0.2587383943200437, "grad_norm": 6.987297058105469, "learning_rate": 8.689870316949796e-06, "loss": 1.0716, "step": 1895 }, { "epoch": 0.2588749317312944, "grad_norm": 6.54337215423584, "learning_rate": 8.688377812281344e-06, "loss": 1.0997, "step": 1896 }, { "epoch": 0.2590114691425451, "grad_norm": 9.142366409301758, "learning_rate": 8.686884586290798e-06, "loss": 0.9604, "step": 1897 }, { "epoch": 0.2591480065537957, "grad_norm": 6.315212249755859, "learning_rate": 8.685390639270186e-06, "loss": 1.0153, "step": 1898 }, { "epoch": 0.2592845439650464, "grad_norm": 6.078569412231445, "learning_rate": 8.68389597151167e-06, "loss": 1.1505, "step": 1899 }, { "epoch": 0.2594210813762971, "grad_norm": 6.739510536193848, "learning_rate": 8.682400583307562e-06, "loss": 0.9057, "step": 1900 }, { "epoch": 0.2595576187875478, "grad_norm": 16.5374755859375, "learning_rate": 8.680904474950304e-06, "loss": 1.0654, "step": 1901 }, { "epoch": 0.2596941561987985, "grad_norm": 6.8267436027526855, "learning_rate": 8.679407646732487e-06, "loss": 0.8811, "step": 1902 }, { "epoch": 0.2598306936100492, "grad_norm": 8.801745414733887, "learning_rate": 8.677910098946838e-06, "loss": 1.0845, "step": 1903 }, { "epoch": 0.2599672310212998, "grad_norm": 7.0646162033081055, "learning_rate": 8.676411831886228e-06, "loss": 1.1084, "step": 1904 }, { "epoch": 0.2601037684325505, "grad_norm": 5.59908390045166, "learning_rate": 8.67491284584367e-06, "loss": 1.0164, "step": 1905 }, { "epoch": 0.2602403058438012, "grad_norm": 8.574299812316895, "learning_rate": 8.67341314111231e-06, "loss": 1.0113, "step": 1906 }, { "epoch": 0.2603768432550519, "grad_norm": 10.930458068847656, "learning_rate": 8.671912717985442e-06, "loss": 1.0605, "step": 1907 }, { "epoch": 0.2605133806663026, "grad_norm": 8.158284187316895, "learning_rate": 8.670411576756502e-06, "loss": 1.0156, "step": 1908 }, { "epoch": 0.2606499180775532, "grad_norm": 6.809046745300293, "learning_rate": 8.668909717719054e-06, "loss": 0.9477, "step": 1909 }, { "epoch": 0.2607864554888039, "grad_norm": 5.718589782714844, "learning_rate": 8.667407141166818e-06, "loss": 0.9615, "step": 1910 }, { "epoch": 0.2609229929000546, "grad_norm": 6.737512111663818, "learning_rate": 8.665903847393644e-06, "loss": 1.1291, "step": 1911 }, { "epoch": 0.2610595303113053, "grad_norm": 7.357619762420654, "learning_rate": 8.664399836693526e-06, "loss": 1.1585, "step": 1912 }, { "epoch": 0.261196067722556, "grad_norm": 4.918781280517578, "learning_rate": 8.662895109360598e-06, "loss": 1.0259, "step": 1913 }, { "epoch": 0.2613326051338067, "grad_norm": 10.441486358642578, "learning_rate": 8.661389665689134e-06, "loss": 0.9852, "step": 1914 }, { "epoch": 0.2614691425450573, "grad_norm": 13.721099853515625, "learning_rate": 8.659883505973547e-06, "loss": 1.078, "step": 1915 }, { "epoch": 0.261605679956308, "grad_norm": 7.231611728668213, "learning_rate": 8.658376630508391e-06, "loss": 0.9852, "step": 1916 }, { "epoch": 0.2617422173675587, "grad_norm": 6.650347709655762, "learning_rate": 8.656869039588362e-06, "loss": 0.9818, "step": 1917 }, { "epoch": 0.2618787547788094, "grad_norm": 6.217762470245361, "learning_rate": 8.655360733508293e-06, "loss": 0.9222, "step": 1918 }, { "epoch": 0.2620152921900601, "grad_norm": 7.883362770080566, "learning_rate": 8.653851712563158e-06, "loss": 1.0127, "step": 1919 }, { "epoch": 0.2621518296013108, "grad_norm": 12.689401626586914, "learning_rate": 8.652341977048067e-06, "loss": 1.0473, "step": 1920 }, { "epoch": 0.2622883670125614, "grad_norm": 10.957810401916504, "learning_rate": 8.650831527258277e-06, "loss": 1.0361, "step": 1921 }, { "epoch": 0.2624249044238121, "grad_norm": 6.895299434661865, "learning_rate": 8.649320363489178e-06, "loss": 0.9064, "step": 1922 }, { "epoch": 0.2625614418350628, "grad_norm": 6.20728063583374, "learning_rate": 8.647808486036306e-06, "loss": 1.0732, "step": 1923 }, { "epoch": 0.2626979792463135, "grad_norm": 7.01143741607666, "learning_rate": 8.646295895195334e-06, "loss": 1.0234, "step": 1924 }, { "epoch": 0.2628345166575642, "grad_norm": 5.568572998046875, "learning_rate": 8.644782591262068e-06, "loss": 1.014, "step": 1925 }, { "epoch": 0.2629710540688149, "grad_norm": 10.615517616271973, "learning_rate": 8.643268574532463e-06, "loss": 0.9191, "step": 1926 }, { "epoch": 0.2631075914800655, "grad_norm": 6.874980926513672, "learning_rate": 8.64175384530261e-06, "loss": 0.9709, "step": 1927 }, { "epoch": 0.2632441288913162, "grad_norm": 10.203253746032715, "learning_rate": 8.640238403868738e-06, "loss": 0.9282, "step": 1928 }, { "epoch": 0.2633806663025669, "grad_norm": 6.052663326263428, "learning_rate": 8.638722250527214e-06, "loss": 0.8941, "step": 1929 }, { "epoch": 0.2635172037138176, "grad_norm": 4.958773612976074, "learning_rate": 8.637205385574547e-06, "loss": 0.9796, "step": 1930 }, { "epoch": 0.2636537411250683, "grad_norm": 9.658924102783203, "learning_rate": 8.63568780930739e-06, "loss": 1.1013, "step": 1931 }, { "epoch": 0.263790278536319, "grad_norm": 6.997915744781494, "learning_rate": 8.634169522022522e-06, "loss": 0.9577, "step": 1932 }, { "epoch": 0.2639268159475696, "grad_norm": 8.762690544128418, "learning_rate": 8.632650524016875e-06, "loss": 0.9698, "step": 1933 }, { "epoch": 0.2640633533588203, "grad_norm": 9.3893404006958, "learning_rate": 8.63113081558751e-06, "loss": 0.9526, "step": 1934 }, { "epoch": 0.264199890770071, "grad_norm": 8.542679786682129, "learning_rate": 8.629610397031629e-06, "loss": 0.9914, "step": 1935 }, { "epoch": 0.2643364281813217, "grad_norm": 7.178361892700195, "learning_rate": 8.62808926864658e-06, "loss": 1.141, "step": 1936 }, { "epoch": 0.2644729655925724, "grad_norm": 8.932392120361328, "learning_rate": 8.62656743072984e-06, "loss": 1.1403, "step": 1937 }, { "epoch": 0.264609503003823, "grad_norm": 6.084834098815918, "learning_rate": 8.62504488357903e-06, "loss": 1.0918, "step": 1938 }, { "epoch": 0.2647460404150737, "grad_norm": 6.910654544830322, "learning_rate": 8.62352162749191e-06, "loss": 1.0402, "step": 1939 }, { "epoch": 0.2648825778263244, "grad_norm": 8.777710914611816, "learning_rate": 8.621997662766378e-06, "loss": 1.1324, "step": 1940 }, { "epoch": 0.2650191152375751, "grad_norm": 6.602145195007324, "learning_rate": 8.620472989700465e-06, "loss": 0.9729, "step": 1941 }, { "epoch": 0.2651556526488258, "grad_norm": 5.806883811950684, "learning_rate": 8.618947608592351e-06, "loss": 1.0051, "step": 1942 }, { "epoch": 0.2652921900600765, "grad_norm": 7.6148505210876465, "learning_rate": 8.617421519740347e-06, "loss": 1.0349, "step": 1943 }, { "epoch": 0.2654287274713271, "grad_norm": 8.73187255859375, "learning_rate": 8.615894723442905e-06, "loss": 0.9115, "step": 1944 }, { "epoch": 0.2655652648825778, "grad_norm": 6.790077209472656, "learning_rate": 8.614367219998615e-06, "loss": 1.1128, "step": 1945 }, { "epoch": 0.2657018022938285, "grad_norm": 8.921399116516113, "learning_rate": 8.612839009706205e-06, "loss": 1.0107, "step": 1946 }, { "epoch": 0.2658383397050792, "grad_norm": 7.474118232727051, "learning_rate": 8.611310092864538e-06, "loss": 1.0054, "step": 1947 }, { "epoch": 0.2659748771163299, "grad_norm": 7.765749931335449, "learning_rate": 8.609780469772623e-06, "loss": 0.98, "step": 1948 }, { "epoch": 0.2661114145275806, "grad_norm": 6.903172016143799, "learning_rate": 8.6082501407296e-06, "loss": 0.9721, "step": 1949 }, { "epoch": 0.2662479519388312, "grad_norm": 6.338351726531982, "learning_rate": 8.606719106034751e-06, "loss": 0.9686, "step": 1950 }, { "epoch": 0.2663844893500819, "grad_norm": 8.97138500213623, "learning_rate": 8.605187365987495e-06, "loss": 0.8733, "step": 1951 }, { "epoch": 0.2665210267613326, "grad_norm": 6.3150153160095215, "learning_rate": 8.603654920887386e-06, "loss": 1.0592, "step": 1952 }, { "epoch": 0.2666575641725833, "grad_norm": 11.983983039855957, "learning_rate": 8.602121771034122e-06, "loss": 0.9622, "step": 1953 }, { "epoch": 0.266794101583834, "grad_norm": 6.450122356414795, "learning_rate": 8.600587916727533e-06, "loss": 1.0426, "step": 1954 }, { "epoch": 0.2669306389950847, "grad_norm": 10.102849006652832, "learning_rate": 8.599053358267588e-06, "loss": 1.1097, "step": 1955 }, { "epoch": 0.2670671764063353, "grad_norm": 6.931760787963867, "learning_rate": 8.597518095954399e-06, "loss": 1.0258, "step": 1956 }, { "epoch": 0.267203713817586, "grad_norm": 7.476613521575928, "learning_rate": 8.595982130088207e-06, "loss": 1.1342, "step": 1957 }, { "epoch": 0.2673402512288367, "grad_norm": 5.318270206451416, "learning_rate": 8.594445460969399e-06, "loss": 1.0281, "step": 1958 }, { "epoch": 0.2674767886400874, "grad_norm": 9.251049995422363, "learning_rate": 8.59290808889849e-06, "loss": 0.988, "step": 1959 }, { "epoch": 0.2676133260513381, "grad_norm": 9.035605430603027, "learning_rate": 8.591370014176145e-06, "loss": 1.1287, "step": 1960 }, { "epoch": 0.2677498634625887, "grad_norm": 8.11251163482666, "learning_rate": 8.589831237103155e-06, "loss": 1.2194, "step": 1961 }, { "epoch": 0.2678864008738394, "grad_norm": 4.98154354095459, "learning_rate": 8.588291757980451e-06, "loss": 1.0093, "step": 1962 }, { "epoch": 0.2680229382850901, "grad_norm": 10.27852725982666, "learning_rate": 8.586751577109109e-06, "loss": 0.9776, "step": 1963 }, { "epoch": 0.2681594756963408, "grad_norm": 6.290530681610107, "learning_rate": 8.585210694790333e-06, "loss": 0.9506, "step": 1964 }, { "epoch": 0.2682960131075915, "grad_norm": 7.940451145172119, "learning_rate": 8.583669111325468e-06, "loss": 0.9569, "step": 1965 }, { "epoch": 0.2684325505188422, "grad_norm": 6.29770565032959, "learning_rate": 8.582126827015993e-06, "loss": 0.9941, "step": 1966 }, { "epoch": 0.2685690879300928, "grad_norm": 5.732116222381592, "learning_rate": 8.580583842163532e-06, "loss": 0.979, "step": 1967 }, { "epoch": 0.2687056253413435, "grad_norm": 14.416977882385254, "learning_rate": 8.579040157069836e-06, "loss": 0.9994, "step": 1968 }, { "epoch": 0.2688421627525942, "grad_norm": 7.023555755615234, "learning_rate": 8.5774957720368e-06, "loss": 1.0084, "step": 1969 }, { "epoch": 0.2689787001638449, "grad_norm": 8.139918327331543, "learning_rate": 8.575950687366453e-06, "loss": 0.9908, "step": 1970 }, { "epoch": 0.2691152375750956, "grad_norm": 6.42880916595459, "learning_rate": 8.57440490336096e-06, "loss": 1.053, "step": 1971 }, { "epoch": 0.2692517749863463, "grad_norm": 6.6067280769348145, "learning_rate": 8.572858420322626e-06, "loss": 0.9364, "step": 1972 }, { "epoch": 0.2693883123975969, "grad_norm": 6.356932640075684, "learning_rate": 8.571311238553893e-06, "loss": 1.158, "step": 1973 }, { "epoch": 0.2695248498088476, "grad_norm": 21.70123863220215, "learning_rate": 8.569763358357332e-06, "loss": 1.0129, "step": 1974 }, { "epoch": 0.2696613872200983, "grad_norm": 7.9755659103393555, "learning_rate": 8.56821478003566e-06, "loss": 1.0583, "step": 1975 }, { "epoch": 0.269797924631349, "grad_norm": 6.048696994781494, "learning_rate": 8.566665503891725e-06, "loss": 1.0919, "step": 1976 }, { "epoch": 0.2699344620425997, "grad_norm": 6.154782772064209, "learning_rate": 8.565115530228513e-06, "loss": 0.9246, "step": 1977 }, { "epoch": 0.2700709994538504, "grad_norm": 7.127429008483887, "learning_rate": 8.563564859349149e-06, "loss": 1.0478, "step": 1978 }, { "epoch": 0.270207536865101, "grad_norm": 9.281728744506836, "learning_rate": 8.562013491556888e-06, "loss": 1.0224, "step": 1979 }, { "epoch": 0.2703440742763517, "grad_norm": 8.067935943603516, "learning_rate": 8.56046142715513e-06, "loss": 0.9083, "step": 1980 }, { "epoch": 0.2704806116876024, "grad_norm": 8.071939468383789, "learning_rate": 8.558908666447399e-06, "loss": 1.0511, "step": 1981 }, { "epoch": 0.2706171490988531, "grad_norm": 7.690673351287842, "learning_rate": 8.55735520973737e-06, "loss": 1.0378, "step": 1982 }, { "epoch": 0.2707536865101038, "grad_norm": 8.745827674865723, "learning_rate": 8.555801057328841e-06, "loss": 1.0628, "step": 1983 }, { "epoch": 0.2708902239213545, "grad_norm": 8.283588409423828, "learning_rate": 8.554246209525755e-06, "loss": 0.9014, "step": 1984 }, { "epoch": 0.2710267613326051, "grad_norm": 8.06218147277832, "learning_rate": 8.55269066663219e-06, "loss": 1.0358, "step": 1985 }, { "epoch": 0.2711632987438558, "grad_norm": 6.999236106872559, "learning_rate": 8.551134428952353e-06, "loss": 0.9623, "step": 1986 }, { "epoch": 0.2712998361551065, "grad_norm": 6.190275192260742, "learning_rate": 8.549577496790591e-06, "loss": 0.978, "step": 1987 }, { "epoch": 0.2714363735663572, "grad_norm": 6.5048980712890625, "learning_rate": 8.548019870451391e-06, "loss": 1.0684, "step": 1988 }, { "epoch": 0.2715729109776079, "grad_norm": 5.981325149536133, "learning_rate": 8.54646155023937e-06, "loss": 0.9533, "step": 1989 }, { "epoch": 0.2717094483888585, "grad_norm": 7.240347385406494, "learning_rate": 8.544902536459283e-06, "loss": 1.1136, "step": 1990 }, { "epoch": 0.2718459858001092, "grad_norm": 7.085204124450684, "learning_rate": 8.543342829416022e-06, "loss": 0.9644, "step": 1991 }, { "epoch": 0.2719825232113599, "grad_norm": 5.296578407287598, "learning_rate": 8.54178242941461e-06, "loss": 0.8788, "step": 1992 }, { "epoch": 0.2721190606226106, "grad_norm": 13.244375228881836, "learning_rate": 8.540221336760212e-06, "loss": 1.0659, "step": 1993 }, { "epoch": 0.2722555980338613, "grad_norm": 5.874307155609131, "learning_rate": 8.538659551758124e-06, "loss": 0.9941, "step": 1994 }, { "epoch": 0.272392135445112, "grad_norm": 6.058582782745361, "learning_rate": 8.537097074713776e-06, "loss": 0.9893, "step": 1995 }, { "epoch": 0.2725286728563626, "grad_norm": 7.089010238647461, "learning_rate": 8.535533905932739e-06, "loss": 1.0196, "step": 1996 }, { "epoch": 0.2726652102676133, "grad_norm": 6.543280124664307, "learning_rate": 8.533970045720712e-06, "loss": 0.8361, "step": 1997 }, { "epoch": 0.272801747678864, "grad_norm": 6.453683376312256, "learning_rate": 8.532405494383538e-06, "loss": 1.1325, "step": 1998 }, { "epoch": 0.2729382850901147, "grad_norm": 7.096835613250732, "learning_rate": 8.530840252227187e-06, "loss": 1.1636, "step": 1999 }, { "epoch": 0.2730748225013654, "grad_norm": 8.035272598266602, "learning_rate": 8.529274319557767e-06, "loss": 1.028, "step": 2000 }, { "epoch": 0.2732113599126161, "grad_norm": 5.148586273193359, "learning_rate": 8.527707696681525e-06, "loss": 0.959, "step": 2001 }, { "epoch": 0.2733478973238667, "grad_norm": 7.781858921051025, "learning_rate": 8.526140383904836e-06, "loss": 0.9993, "step": 2002 }, { "epoch": 0.2734844347351174, "grad_norm": 5.9349870681762695, "learning_rate": 8.524572381534215e-06, "loss": 0.9497, "step": 2003 }, { "epoch": 0.2736209721463681, "grad_norm": 6.74819803237915, "learning_rate": 8.523003689876312e-06, "loss": 0.9338, "step": 2004 }, { "epoch": 0.2737575095576188, "grad_norm": 6.221861839294434, "learning_rate": 8.521434309237906e-06, "loss": 1.0273, "step": 2005 }, { "epoch": 0.2738940469688695, "grad_norm": 13.520655632019043, "learning_rate": 8.519864239925919e-06, "loss": 1.1518, "step": 2006 }, { "epoch": 0.2740305843801202, "grad_norm": 7.081543922424316, "learning_rate": 8.5182934822474e-06, "loss": 0.9399, "step": 2007 }, { "epoch": 0.2741671217913708, "grad_norm": 11.325624465942383, "learning_rate": 8.516722036509539e-06, "loss": 1.0296, "step": 2008 }, { "epoch": 0.2743036592026215, "grad_norm": 5.825429439544678, "learning_rate": 8.515149903019657e-06, "loss": 0.9937, "step": 2009 }, { "epoch": 0.2744401966138722, "grad_norm": 7.717718601226807, "learning_rate": 8.513577082085209e-06, "loss": 1.0024, "step": 2010 }, { "epoch": 0.2745767340251229, "grad_norm": 6.42721700668335, "learning_rate": 8.512003574013785e-06, "loss": 0.994, "step": 2011 }, { "epoch": 0.2747132714363736, "grad_norm": 8.380824089050293, "learning_rate": 8.510429379113114e-06, "loss": 1.0201, "step": 2012 }, { "epoch": 0.2748498088476242, "grad_norm": 6.320898056030273, "learning_rate": 8.50885449769105e-06, "loss": 0.9963, "step": 2013 }, { "epoch": 0.2749863462588749, "grad_norm": 5.999427795410156, "learning_rate": 8.507278930055592e-06, "loss": 0.927, "step": 2014 }, { "epoch": 0.2751228836701256, "grad_norm": 7.321274280548096, "learning_rate": 8.505702676514864e-06, "loss": 0.9561, "step": 2015 }, { "epoch": 0.2752594210813763, "grad_norm": 6.75630521774292, "learning_rate": 8.504125737377126e-06, "loss": 1.0476, "step": 2016 }, { "epoch": 0.275395958492627, "grad_norm": 6.644586563110352, "learning_rate": 8.502548112950781e-06, "loss": 1.0331, "step": 2017 }, { "epoch": 0.2755324959038777, "grad_norm": 8.314475059509277, "learning_rate": 8.500969803544354e-06, "loss": 1.0813, "step": 2018 }, { "epoch": 0.2756690333151283, "grad_norm": 5.679462432861328, "learning_rate": 8.49939080946651e-06, "loss": 1.0124, "step": 2019 }, { "epoch": 0.275805570726379, "grad_norm": 6.975249290466309, "learning_rate": 8.497811131026046e-06, "loss": 1.0928, "step": 2020 }, { "epoch": 0.2759421081376297, "grad_norm": 54.92338943481445, "learning_rate": 8.496230768531896e-06, "loss": 1.0137, "step": 2021 }, { "epoch": 0.2760786455488804, "grad_norm": 6.879970073699951, "learning_rate": 8.494649722293125e-06, "loss": 1.1859, "step": 2022 }, { "epoch": 0.2762151829601311, "grad_norm": 11.296605110168457, "learning_rate": 8.493067992618929e-06, "loss": 1.0245, "step": 2023 }, { "epoch": 0.2763517203713818, "grad_norm": 19.337717056274414, "learning_rate": 8.491485579818646e-06, "loss": 0.9757, "step": 2024 }, { "epoch": 0.2764882577826324, "grad_norm": 5.718870639801025, "learning_rate": 8.489902484201738e-06, "loss": 0.892, "step": 2025 }, { "epoch": 0.2766247951938831, "grad_norm": 6.745364665985107, "learning_rate": 8.488318706077805e-06, "loss": 1.1275, "step": 2026 }, { "epoch": 0.2767613326051338, "grad_norm": 7.459939479827881, "learning_rate": 8.486734245756587e-06, "loss": 0.9976, "step": 2027 }, { "epoch": 0.2768978700163845, "grad_norm": 5.417336940765381, "learning_rate": 8.485149103547943e-06, "loss": 0.9869, "step": 2028 }, { "epoch": 0.2770344074276352, "grad_norm": 6.595572471618652, "learning_rate": 8.483563279761877e-06, "loss": 1.1577, "step": 2029 }, { "epoch": 0.2771709448388859, "grad_norm": 6.489832878112793, "learning_rate": 8.481976774708523e-06, "loss": 1.0823, "step": 2030 }, { "epoch": 0.2773074822501365, "grad_norm": 10.37623405456543, "learning_rate": 8.480389588698145e-06, "loss": 0.9491, "step": 2031 }, { "epoch": 0.2774440196613872, "grad_norm": 7.725444316864014, "learning_rate": 8.478801722041147e-06, "loss": 0.9955, "step": 2032 }, { "epoch": 0.2775805570726379, "grad_norm": 6.4594035148620605, "learning_rate": 8.477213175048059e-06, "loss": 1.0519, "step": 2033 }, { "epoch": 0.2777170944838886, "grad_norm": 6.778250217437744, "learning_rate": 8.475623948029548e-06, "loss": 1.0248, "step": 2034 }, { "epoch": 0.2778536318951393, "grad_norm": 7.889559745788574, "learning_rate": 8.474034041296412e-06, "loss": 0.9951, "step": 2035 }, { "epoch": 0.27799016930639, "grad_norm": 5.75142240524292, "learning_rate": 8.472443455159586e-06, "loss": 0.8229, "step": 2036 }, { "epoch": 0.2781267067176406, "grad_norm": 5.741940498352051, "learning_rate": 8.470852189930133e-06, "loss": 0.9591, "step": 2037 }, { "epoch": 0.2782632441288913, "grad_norm": 14.088173866271973, "learning_rate": 8.469260245919251e-06, "loss": 1.0128, "step": 2038 }, { "epoch": 0.278399781540142, "grad_norm": 7.34263277053833, "learning_rate": 8.467667623438269e-06, "loss": 0.9413, "step": 2039 }, { "epoch": 0.2785363189513927, "grad_norm": 7.221748352050781, "learning_rate": 8.466074322798651e-06, "loss": 1.0167, "step": 2040 }, { "epoch": 0.2786728563626434, "grad_norm": 7.757517337799072, "learning_rate": 8.464480344311995e-06, "loss": 0.9846, "step": 2041 }, { "epoch": 0.278809393773894, "grad_norm": 9.39764404296875, "learning_rate": 8.462885688290027e-06, "loss": 1.1415, "step": 2042 }, { "epoch": 0.2789459311851447, "grad_norm": 7.335814952850342, "learning_rate": 8.461290355044608e-06, "loss": 1.0147, "step": 2043 }, { "epoch": 0.2790824685963954, "grad_norm": 7.036930561065674, "learning_rate": 8.459694344887732e-06, "loss": 1.0837, "step": 2044 }, { "epoch": 0.2792190060076461, "grad_norm": 8.337998390197754, "learning_rate": 8.458097658131523e-06, "loss": 0.9685, "step": 2045 }, { "epoch": 0.2793555434188968, "grad_norm": 6.802257537841797, "learning_rate": 8.456500295088243e-06, "loss": 0.8968, "step": 2046 }, { "epoch": 0.2794920808301475, "grad_norm": 6.312107086181641, "learning_rate": 8.454902256070277e-06, "loss": 1.054, "step": 2047 }, { "epoch": 0.2796286182413981, "grad_norm": 6.697347164154053, "learning_rate": 8.453303541390152e-06, "loss": 1.0504, "step": 2048 }, { "epoch": 0.2797651556526488, "grad_norm": 7.941572189331055, "learning_rate": 8.451704151360519e-06, "loss": 1.0917, "step": 2049 }, { "epoch": 0.2799016930638995, "grad_norm": 6.538543701171875, "learning_rate": 8.450104086294166e-06, "loss": 0.9355, "step": 2050 }, { "epoch": 0.2800382304751502, "grad_norm": 8.832320213317871, "learning_rate": 8.448503346504013e-06, "loss": 1.0158, "step": 2051 }, { "epoch": 0.2801747678864009, "grad_norm": 9.932576179504395, "learning_rate": 8.446901932303112e-06, "loss": 1.0167, "step": 2052 }, { "epoch": 0.2803113052976516, "grad_norm": 5.931886196136475, "learning_rate": 8.44529984400464e-06, "loss": 1.118, "step": 2053 }, { "epoch": 0.2804478427089022, "grad_norm": 6.88953161239624, "learning_rate": 8.443697081921915e-06, "loss": 1.0426, "step": 2054 }, { "epoch": 0.2805843801201529, "grad_norm": 7.017373085021973, "learning_rate": 8.442093646368381e-06, "loss": 0.987, "step": 2055 }, { "epoch": 0.2807209175314036, "grad_norm": 9.203167915344238, "learning_rate": 8.440489537657619e-06, "loss": 1.1745, "step": 2056 }, { "epoch": 0.2808574549426543, "grad_norm": 5.566797733306885, "learning_rate": 8.438884756103336e-06, "loss": 1.007, "step": 2057 }, { "epoch": 0.280993992353905, "grad_norm": 6.675853252410889, "learning_rate": 8.437279302019376e-06, "loss": 1.0029, "step": 2058 }, { "epoch": 0.2811305297651557, "grad_norm": 10.364462852478027, "learning_rate": 8.435673175719708e-06, "loss": 1.0699, "step": 2059 }, { "epoch": 0.2812670671764063, "grad_norm": 6.59923791885376, "learning_rate": 8.434066377518437e-06, "loss": 1.0281, "step": 2060 }, { "epoch": 0.281403604587657, "grad_norm": 6.499609470367432, "learning_rate": 8.4324589077298e-06, "loss": 0.9332, "step": 2061 }, { "epoch": 0.2815401419989077, "grad_norm": 6.293932914733887, "learning_rate": 8.430850766668161e-06, "loss": 0.8313, "step": 2062 }, { "epoch": 0.2816766794101584, "grad_norm": 6.4274187088012695, "learning_rate": 8.429241954648022e-06, "loss": 0.9748, "step": 2063 }, { "epoch": 0.2818132168214091, "grad_norm": 6.871764183044434, "learning_rate": 8.427632471984008e-06, "loss": 0.8623, "step": 2064 }, { "epoch": 0.2819497542326597, "grad_norm": 9.497662544250488, "learning_rate": 8.426022318990882e-06, "loss": 1.1038, "step": 2065 }, { "epoch": 0.2820862916439104, "grad_norm": 11.096906661987305, "learning_rate": 8.424411495983537e-06, "loss": 1.0978, "step": 2066 }, { "epoch": 0.2822228290551611, "grad_norm": 7.594355583190918, "learning_rate": 8.422800003276993e-06, "loss": 1.0037, "step": 2067 }, { "epoch": 0.2823593664664118, "grad_norm": 9.225275039672852, "learning_rate": 8.421187841186402e-06, "loss": 0.8717, "step": 2068 }, { "epoch": 0.2824959038776625, "grad_norm": 5.65740442276001, "learning_rate": 8.419575010027052e-06, "loss": 1.0101, "step": 2069 }, { "epoch": 0.2826324412889132, "grad_norm": 11.259809494018555, "learning_rate": 8.417961510114357e-06, "loss": 0.9264, "step": 2070 }, { "epoch": 0.2827689787001638, "grad_norm": 6.5590291023254395, "learning_rate": 8.416347341763862e-06, "loss": 0.9571, "step": 2071 }, { "epoch": 0.2829055161114145, "grad_norm": 7.262763977050781, "learning_rate": 8.414732505291247e-06, "loss": 0.9945, "step": 2072 }, { "epoch": 0.2830420535226652, "grad_norm": 7.240426063537598, "learning_rate": 8.413117001012315e-06, "loss": 1.0973, "step": 2073 }, { "epoch": 0.2831785909339159, "grad_norm": 6.8112006187438965, "learning_rate": 8.411500829243006e-06, "loss": 1.1603, "step": 2074 }, { "epoch": 0.2833151283451666, "grad_norm": 12.44117546081543, "learning_rate": 8.409883990299391e-06, "loss": 1.0732, "step": 2075 }, { "epoch": 0.2834516657564173, "grad_norm": 11.326740264892578, "learning_rate": 8.408266484497664e-06, "loss": 1.1051, "step": 2076 }, { "epoch": 0.2835882031676679, "grad_norm": 7.726027011871338, "learning_rate": 8.40664831215416e-06, "loss": 0.93, "step": 2077 }, { "epoch": 0.2837247405789186, "grad_norm": 20.390850067138672, "learning_rate": 8.405029473585336e-06, "loss": 1.0783, "step": 2078 }, { "epoch": 0.2838612779901693, "grad_norm": 6.0269904136657715, "learning_rate": 8.40340996910778e-06, "loss": 0.9705, "step": 2079 }, { "epoch": 0.28399781540142, "grad_norm": 6.394077777862549, "learning_rate": 8.401789799038217e-06, "loss": 0.9882, "step": 2080 }, { "epoch": 0.2841343528126707, "grad_norm": 7.112863540649414, "learning_rate": 8.400168963693494e-06, "loss": 1.0222, "step": 2081 }, { "epoch": 0.2842708902239214, "grad_norm": 6.198776721954346, "learning_rate": 8.398547463390595e-06, "loss": 1.015, "step": 2082 }, { "epoch": 0.284407427635172, "grad_norm": 9.04934310913086, "learning_rate": 8.396925298446627e-06, "loss": 1.0525, "step": 2083 }, { "epoch": 0.2845439650464227, "grad_norm": 5.4860334396362305, "learning_rate": 8.395302469178832e-06, "loss": 0.9078, "step": 2084 }, { "epoch": 0.2846805024576734, "grad_norm": 15.145487785339355, "learning_rate": 8.39367897590458e-06, "loss": 1.0434, "step": 2085 }, { "epoch": 0.2848170398689241, "grad_norm": 6.496155261993408, "learning_rate": 8.392054818941375e-06, "loss": 0.9669, "step": 2086 }, { "epoch": 0.2849535772801748, "grad_norm": 7.7399582862854, "learning_rate": 8.390429998606841e-06, "loss": 0.9052, "step": 2087 }, { "epoch": 0.2850901146914255, "grad_norm": 7.275129318237305, "learning_rate": 8.388804515218745e-06, "loss": 1.0724, "step": 2088 }, { "epoch": 0.2852266521026761, "grad_norm": 16.866363525390625, "learning_rate": 8.38717836909497e-06, "loss": 1.0764, "step": 2089 }, { "epoch": 0.2853631895139268, "grad_norm": 6.271788120269775, "learning_rate": 8.38555156055354e-06, "loss": 1.0002, "step": 2090 }, { "epoch": 0.2854997269251775, "grad_norm": 5.607553005218506, "learning_rate": 8.383924089912603e-06, "loss": 1.0086, "step": 2091 }, { "epoch": 0.2856362643364282, "grad_norm": 5.705012798309326, "learning_rate": 8.382295957490435e-06, "loss": 0.8314, "step": 2092 }, { "epoch": 0.2857728017476789, "grad_norm": 11.564478874206543, "learning_rate": 8.380667163605446e-06, "loss": 0.9782, "step": 2093 }, { "epoch": 0.2859093391589295, "grad_norm": 9.671404838562012, "learning_rate": 8.379037708576173e-06, "loss": 1.0266, "step": 2094 }, { "epoch": 0.2860458765701802, "grad_norm": 6.579284191131592, "learning_rate": 8.377407592721279e-06, "loss": 1.0767, "step": 2095 }, { "epoch": 0.2861824139814309, "grad_norm": 7.957174777984619, "learning_rate": 8.375776816359561e-06, "loss": 1.074, "step": 2096 }, { "epoch": 0.2863189513926816, "grad_norm": 5.306970596313477, "learning_rate": 8.374145379809948e-06, "loss": 1.0402, "step": 2097 }, { "epoch": 0.2864554888039323, "grad_norm": 6.468954563140869, "learning_rate": 8.37251328339149e-06, "loss": 0.9776, "step": 2098 }, { "epoch": 0.286592026215183, "grad_norm": 12.03419017791748, "learning_rate": 8.37088052742337e-06, "loss": 0.8617, "step": 2099 }, { "epoch": 0.2867285636264336, "grad_norm": 6.289488792419434, "learning_rate": 8.369247112224901e-06, "loss": 1.0631, "step": 2100 }, { "epoch": 0.2868651010376843, "grad_norm": 5.643879413604736, "learning_rate": 8.367613038115524e-06, "loss": 1.0677, "step": 2101 }, { "epoch": 0.287001638448935, "grad_norm": 5.915124416351318, "learning_rate": 8.365978305414808e-06, "loss": 0.9145, "step": 2102 }, { "epoch": 0.2871381758601857, "grad_norm": 7.25880765914917, "learning_rate": 8.364342914442452e-06, "loss": 1.08, "step": 2103 }, { "epoch": 0.2872747132714364, "grad_norm": 6.685667991638184, "learning_rate": 8.362706865518281e-06, "loss": 1.1071, "step": 2104 }, { "epoch": 0.2874112506826871, "grad_norm": 5.731158256530762, "learning_rate": 8.361070158962253e-06, "loss": 1.0057, "step": 2105 }, { "epoch": 0.2875477880939377, "grad_norm": 8.273614883422852, "learning_rate": 8.359432795094453e-06, "loss": 1.0437, "step": 2106 }, { "epoch": 0.2876843255051884, "grad_norm": 8.036625862121582, "learning_rate": 8.357794774235094e-06, "loss": 1.0352, "step": 2107 }, { "epoch": 0.2878208629164391, "grad_norm": 7.407632827758789, "learning_rate": 8.356156096704516e-06, "loss": 0.9668, "step": 2108 }, { "epoch": 0.2879574003276898, "grad_norm": 7.452439308166504, "learning_rate": 8.354516762823188e-06, "loss": 0.9782, "step": 2109 }, { "epoch": 0.2880939377389405, "grad_norm": 6.340099334716797, "learning_rate": 8.352876772911712e-06, "loss": 0.9854, "step": 2110 }, { "epoch": 0.2882304751501912, "grad_norm": 6.925042152404785, "learning_rate": 8.351236127290811e-06, "loss": 0.8539, "step": 2111 }, { "epoch": 0.2883670125614418, "grad_norm": 5.159319877624512, "learning_rate": 8.349594826281344e-06, "loss": 0.8408, "step": 2112 }, { "epoch": 0.2885035499726925, "grad_norm": 10.975370407104492, "learning_rate": 8.34795287020429e-06, "loss": 1.0451, "step": 2113 }, { "epoch": 0.2886400873839432, "grad_norm": 7.075092792510986, "learning_rate": 8.346310259380763e-06, "loss": 1.0804, "step": 2114 }, { "epoch": 0.2887766247951939, "grad_norm": 7.667272567749023, "learning_rate": 8.344666994132001e-06, "loss": 1.0377, "step": 2115 }, { "epoch": 0.2889131622064446, "grad_norm": 6.837903022766113, "learning_rate": 8.343023074779368e-06, "loss": 0.8641, "step": 2116 }, { "epoch": 0.2890496996176952, "grad_norm": 6.663847923278809, "learning_rate": 8.341378501644366e-06, "loss": 1.1418, "step": 2117 }, { "epoch": 0.2891862370289459, "grad_norm": 7.401825428009033, "learning_rate": 8.339733275048611e-06, "loss": 1.0679, "step": 2118 }, { "epoch": 0.2893227744401966, "grad_norm": 7.782190799713135, "learning_rate": 8.338087395313858e-06, "loss": 1.0608, "step": 2119 }, { "epoch": 0.2894593118514473, "grad_norm": 7.514100551605225, "learning_rate": 8.336440862761985e-06, "loss": 0.8835, "step": 2120 }, { "epoch": 0.289595849262698, "grad_norm": 7.247591495513916, "learning_rate": 8.334793677714998e-06, "loss": 1.0058, "step": 2121 }, { "epoch": 0.2897323866739487, "grad_norm": 9.73189640045166, "learning_rate": 8.333145840495028e-06, "loss": 1.0154, "step": 2122 }, { "epoch": 0.2898689240851993, "grad_norm": 7.753100872039795, "learning_rate": 8.33149735142434e-06, "loss": 1.0013, "step": 2123 }, { "epoch": 0.29000546149645, "grad_norm": 19.34882354736328, "learning_rate": 8.329848210825322e-06, "loss": 1.0271, "step": 2124 }, { "epoch": 0.2901419989077007, "grad_norm": 17.896297454833984, "learning_rate": 8.328198419020488e-06, "loss": 1.1406, "step": 2125 }, { "epoch": 0.2902785363189514, "grad_norm": 31.45242691040039, "learning_rate": 8.326547976332484e-06, "loss": 1.1592, "step": 2126 }, { "epoch": 0.2904150737302021, "grad_norm": 137.677978515625, "learning_rate": 8.324896883084079e-06, "loss": 1.0049, "step": 2127 }, { "epoch": 0.2905516111414528, "grad_norm": 10.949028015136719, "learning_rate": 8.323245139598172e-06, "loss": 0.9035, "step": 2128 }, { "epoch": 0.2906881485527034, "grad_norm": 8.571060180664062, "learning_rate": 8.321592746197788e-06, "loss": 1.0151, "step": 2129 }, { "epoch": 0.2908246859639541, "grad_norm": 9.010289192199707, "learning_rate": 8.319939703206078e-06, "loss": 1.0712, "step": 2130 }, { "epoch": 0.2909612233752048, "grad_norm": 7.393627166748047, "learning_rate": 8.318286010946327e-06, "loss": 1.1534, "step": 2131 }, { "epoch": 0.2910977607864555, "grad_norm": 6.746450901031494, "learning_rate": 8.316631669741934e-06, "loss": 0.9973, "step": 2132 }, { "epoch": 0.2912342981977062, "grad_norm": 7.079291820526123, "learning_rate": 8.314976679916435e-06, "loss": 1.1055, "step": 2133 }, { "epoch": 0.2913708356089569, "grad_norm": 7.345869064331055, "learning_rate": 8.313321041793493e-06, "loss": 1.1582, "step": 2134 }, { "epoch": 0.2915073730202075, "grad_norm": 7.319872856140137, "learning_rate": 8.311664755696892e-06, "loss": 1.0781, "step": 2135 }, { "epoch": 0.2916439104314582, "grad_norm": 9.319795608520508, "learning_rate": 8.310007821950542e-06, "loss": 1.0629, "step": 2136 }, { "epoch": 0.2917804478427089, "grad_norm": 7.33242654800415, "learning_rate": 8.30835024087849e-06, "loss": 1.1296, "step": 2137 }, { "epoch": 0.2919169852539596, "grad_norm": 7.400509834289551, "learning_rate": 8.306692012804901e-06, "loss": 1.0337, "step": 2138 }, { "epoch": 0.2920535226652103, "grad_norm": 9.836797714233398, "learning_rate": 8.305033138054068e-06, "loss": 1.0666, "step": 2139 }, { "epoch": 0.292190060076461, "grad_norm": 10.99081802368164, "learning_rate": 8.303373616950408e-06, "loss": 1.0085, "step": 2140 }, { "epoch": 0.2923265974877116, "grad_norm": 13.923578262329102, "learning_rate": 8.30171344981847e-06, "loss": 1.186, "step": 2141 }, { "epoch": 0.2924631348989623, "grad_norm": 12.596421241760254, "learning_rate": 8.300052636982928e-06, "loss": 1.0388, "step": 2142 }, { "epoch": 0.292599672310213, "grad_norm": 10.862865447998047, "learning_rate": 8.298391178768577e-06, "loss": 1.0316, "step": 2143 }, { "epoch": 0.2927362097214637, "grad_norm": 40.231651306152344, "learning_rate": 8.296729075500345e-06, "loss": 1.0334, "step": 2144 }, { "epoch": 0.2928727471327144, "grad_norm": 10.55799388885498, "learning_rate": 8.295066327503283e-06, "loss": 1.027, "step": 2145 }, { "epoch": 0.293009284543965, "grad_norm": 9.646060943603516, "learning_rate": 8.293402935102567e-06, "loss": 0.977, "step": 2146 }, { "epoch": 0.2931458219552157, "grad_norm": 8.405832290649414, "learning_rate": 8.291738898623501e-06, "loss": 1.0875, "step": 2147 }, { "epoch": 0.2932823593664664, "grad_norm": 7.7974724769592285, "learning_rate": 8.290074218391515e-06, "loss": 1.0524, "step": 2148 }, { "epoch": 0.2934188967777171, "grad_norm": 8.398768424987793, "learning_rate": 8.288408894732163e-06, "loss": 1.1628, "step": 2149 }, { "epoch": 0.2935554341889678, "grad_norm": 5.831943511962891, "learning_rate": 8.286742927971128e-06, "loss": 0.9925, "step": 2150 }, { "epoch": 0.2936919716002185, "grad_norm": 10.75561809539795, "learning_rate": 8.285076318434212e-06, "loss": 1.1354, "step": 2151 }, { "epoch": 0.2938285090114691, "grad_norm": 10.131570816040039, "learning_rate": 8.283409066447355e-06, "loss": 1.2429, "step": 2152 }, { "epoch": 0.2939650464227198, "grad_norm": 13.234832763671875, "learning_rate": 8.28174117233661e-06, "loss": 1.1411, "step": 2153 }, { "epoch": 0.2941015838339705, "grad_norm": 8.467374801635742, "learning_rate": 8.280072636428163e-06, "loss": 1.1395, "step": 2154 }, { "epoch": 0.2942381212452212, "grad_norm": 8.302349090576172, "learning_rate": 8.278403459048322e-06, "loss": 1.1205, "step": 2155 }, { "epoch": 0.2943746586564719, "grad_norm": 8.918286323547363, "learning_rate": 8.27673364052352e-06, "loss": 1.0675, "step": 2156 }, { "epoch": 0.2945111960677226, "grad_norm": 10.216395378112793, "learning_rate": 8.275063181180319e-06, "loss": 1.4109, "step": 2157 }, { "epoch": 0.2946477334789732, "grad_norm": 12.133328437805176, "learning_rate": 8.273392081345405e-06, "loss": 0.918, "step": 2158 }, { "epoch": 0.2947842708902239, "grad_norm": 8.711065292358398, "learning_rate": 8.271720341345588e-06, "loss": 0.9773, "step": 2159 }, { "epoch": 0.2949208083014746, "grad_norm": 10.373221397399902, "learning_rate": 8.270047961507804e-06, "loss": 0.9671, "step": 2160 }, { "epoch": 0.2950573457127253, "grad_norm": 10.455913543701172, "learning_rate": 8.268374942159114e-06, "loss": 0.9829, "step": 2161 }, { "epoch": 0.295193883123976, "grad_norm": 14.527731895446777, "learning_rate": 8.266701283626705e-06, "loss": 1.0013, "step": 2162 }, { "epoch": 0.2953304205352267, "grad_norm": 7.712882995605469, "learning_rate": 8.265026986237887e-06, "loss": 1.0986, "step": 2163 }, { "epoch": 0.2954669579464773, "grad_norm": 10.070378303527832, "learning_rate": 8.263352050320094e-06, "loss": 1.1161, "step": 2164 }, { "epoch": 0.295603495357728, "grad_norm": 11.381475448608398, "learning_rate": 8.26167647620089e-06, "loss": 1.1944, "step": 2165 }, { "epoch": 0.2957400327689787, "grad_norm": 11.334471702575684, "learning_rate": 8.26000026420796e-06, "loss": 1.186, "step": 2166 }, { "epoch": 0.2958765701802294, "grad_norm": 6.633096694946289, "learning_rate": 8.258323414669114e-06, "loss": 0.9223, "step": 2167 }, { "epoch": 0.2960131075914801, "grad_norm": 11.275524139404297, "learning_rate": 8.256645927912288e-06, "loss": 0.9421, "step": 2168 }, { "epoch": 0.2961496450027307, "grad_norm": 9.269476890563965, "learning_rate": 8.254967804265538e-06, "loss": 1.0212, "step": 2169 }, { "epoch": 0.2962861824139814, "grad_norm": 9.588215827941895, "learning_rate": 8.253289044057053e-06, "loss": 1.1106, "step": 2170 }, { "epoch": 0.2964227198252321, "grad_norm": 9.052478790283203, "learning_rate": 8.251609647615142e-06, "loss": 1.0822, "step": 2171 }, { "epoch": 0.2965592572364828, "grad_norm": 8.07905101776123, "learning_rate": 8.249929615268234e-06, "loss": 0.9899, "step": 2172 }, { "epoch": 0.2966957946477335, "grad_norm": 10.264089584350586, "learning_rate": 8.24824894734489e-06, "loss": 1.076, "step": 2173 }, { "epoch": 0.2968323320589842, "grad_norm": 7.931914806365967, "learning_rate": 8.246567644173789e-06, "loss": 1.1439, "step": 2174 }, { "epoch": 0.2969688694702348, "grad_norm": 7.75913667678833, "learning_rate": 8.244885706083741e-06, "loss": 1.1884, "step": 2175 }, { "epoch": 0.2971054068814855, "grad_norm": 9.115120887756348, "learning_rate": 8.243203133403672e-06, "loss": 1.06, "step": 2176 }, { "epoch": 0.2972419442927362, "grad_norm": 12.238795280456543, "learning_rate": 8.241519926462638e-06, "loss": 1.1586, "step": 2177 }, { "epoch": 0.2973784817039869, "grad_norm": 11.09077262878418, "learning_rate": 8.239836085589818e-06, "loss": 0.9209, "step": 2178 }, { "epoch": 0.2975150191152376, "grad_norm": 8.177552223205566, "learning_rate": 8.238151611114515e-06, "loss": 1.0065, "step": 2179 }, { "epoch": 0.2976515565264883, "grad_norm": 11.716320991516113, "learning_rate": 8.236466503366155e-06, "loss": 0.9385, "step": 2180 }, { "epoch": 0.2977880939377389, "grad_norm": 8.70767879486084, "learning_rate": 8.234780762674288e-06, "loss": 1.0841, "step": 2181 }, { "epoch": 0.2979246313489896, "grad_norm": 7.509238243103027, "learning_rate": 8.233094389368585e-06, "loss": 1.0698, "step": 2182 }, { "epoch": 0.2980611687602403, "grad_norm": 5.913687229156494, "learning_rate": 8.23140738377885e-06, "loss": 1.0418, "step": 2183 }, { "epoch": 0.298197706171491, "grad_norm": 8.279830932617188, "learning_rate": 8.229719746234997e-06, "loss": 1.1535, "step": 2184 }, { "epoch": 0.2983342435827417, "grad_norm": 9.551103591918945, "learning_rate": 8.228031477067077e-06, "loss": 1.0803, "step": 2185 }, { "epoch": 0.2984707809939924, "grad_norm": 6.099183559417725, "learning_rate": 8.226342576605252e-06, "loss": 0.9404, "step": 2186 }, { "epoch": 0.298607318405243, "grad_norm": 7.884206295013428, "learning_rate": 8.224653045179822e-06, "loss": 1.0625, "step": 2187 }, { "epoch": 0.2987438558164937, "grad_norm": 7.011703968048096, "learning_rate": 8.222962883121196e-06, "loss": 0.8758, "step": 2188 }, { "epoch": 0.2988803932277444, "grad_norm": 8.503881454467773, "learning_rate": 8.221272090759914e-06, "loss": 1.0553, "step": 2189 }, { "epoch": 0.2990169306389951, "grad_norm": 8.812241554260254, "learning_rate": 8.219580668426638e-06, "loss": 1.0652, "step": 2190 }, { "epoch": 0.2991534680502458, "grad_norm": 9.75148868560791, "learning_rate": 8.217888616452156e-06, "loss": 0.9763, "step": 2191 }, { "epoch": 0.2992900054614965, "grad_norm": 6.1007843017578125, "learning_rate": 8.216195935167373e-06, "loss": 1.0433, "step": 2192 }, { "epoch": 0.2994265428727471, "grad_norm": 7.343416213989258, "learning_rate": 8.21450262490332e-06, "loss": 0.9163, "step": 2193 }, { "epoch": 0.2995630802839978, "grad_norm": 7.3555803298950195, "learning_rate": 8.212808685991151e-06, "loss": 0.8465, "step": 2194 }, { "epoch": 0.2996996176952485, "grad_norm": 7.104743480682373, "learning_rate": 8.211114118762147e-06, "loss": 1.016, "step": 2195 }, { "epoch": 0.2998361551064992, "grad_norm": 7.869146347045898, "learning_rate": 8.209418923547706e-06, "loss": 0.9968, "step": 2196 }, { "epoch": 0.2999726925177499, "grad_norm": 7.025898456573486, "learning_rate": 8.207723100679346e-06, "loss": 0.9247, "step": 2197 }, { "epoch": 0.3001092299290005, "grad_norm": 6.813436031341553, "learning_rate": 8.206026650488722e-06, "loss": 0.9982, "step": 2198 }, { "epoch": 0.3002457673402512, "grad_norm": 8.015617370605469, "learning_rate": 8.204329573307594e-06, "loss": 1.0356, "step": 2199 }, { "epoch": 0.3003823047515019, "grad_norm": 4.941858768463135, "learning_rate": 8.202631869467858e-06, "loss": 1.0372, "step": 2200 }, { "epoch": 0.3005188421627526, "grad_norm": 6.498075485229492, "learning_rate": 8.200933539301525e-06, "loss": 1.1007, "step": 2201 }, { "epoch": 0.3006553795740033, "grad_norm": 6.5641770362854, "learning_rate": 8.199234583140734e-06, "loss": 0.9715, "step": 2202 }, { "epoch": 0.300791916985254, "grad_norm": 7.054961681365967, "learning_rate": 8.19753500131774e-06, "loss": 1.0442, "step": 2203 }, { "epoch": 0.3009284543965046, "grad_norm": 7.061356544494629, "learning_rate": 8.195834794164925e-06, "loss": 0.9382, "step": 2204 }, { "epoch": 0.3010649918077553, "grad_norm": 6.506701469421387, "learning_rate": 8.194133962014794e-06, "loss": 1.0612, "step": 2205 }, { "epoch": 0.301201529219006, "grad_norm": 9.902569770812988, "learning_rate": 8.192432505199968e-06, "loss": 1.0147, "step": 2206 }, { "epoch": 0.3013380666302567, "grad_norm": 8.699676513671875, "learning_rate": 8.1907304240532e-06, "loss": 1.0929, "step": 2207 }, { "epoch": 0.3014746040415074, "grad_norm": 8.25078296661377, "learning_rate": 8.189027718907353e-06, "loss": 1.1635, "step": 2208 }, { "epoch": 0.3016111414527581, "grad_norm": 9.083823204040527, "learning_rate": 8.187324390095424e-06, "loss": 1.0566, "step": 2209 }, { "epoch": 0.3017476788640087, "grad_norm": 8.918213844299316, "learning_rate": 8.185620437950526e-06, "loss": 1.0284, "step": 2210 }, { "epoch": 0.3018842162752594, "grad_norm": 8.352091789245605, "learning_rate": 8.18391586280589e-06, "loss": 1.1034, "step": 2211 }, { "epoch": 0.3020207536865101, "grad_norm": 8.945951461791992, "learning_rate": 8.182210664994879e-06, "loss": 0.963, "step": 2212 }, { "epoch": 0.3021572910977608, "grad_norm": 7.448283672332764, "learning_rate": 8.18050484485097e-06, "loss": 1.0679, "step": 2213 }, { "epoch": 0.3022938285090115, "grad_norm": 6.234255313873291, "learning_rate": 8.178798402707762e-06, "loss": 1.0822, "step": 2214 }, { "epoch": 0.3024303659202622, "grad_norm": 7.597780704498291, "learning_rate": 8.177091338898979e-06, "loss": 0.9423, "step": 2215 }, { "epoch": 0.3025669033315128, "grad_norm": 5.272780418395996, "learning_rate": 8.175383653758467e-06, "loss": 0.8437, "step": 2216 }, { "epoch": 0.3027034407427635, "grad_norm": 5.897809982299805, "learning_rate": 8.173675347620186e-06, "loss": 0.9711, "step": 2217 }, { "epoch": 0.3028399781540142, "grad_norm": 6.87273645401001, "learning_rate": 8.171966420818227e-06, "loss": 1.0897, "step": 2218 }, { "epoch": 0.3029765155652649, "grad_norm": 8.07485580444336, "learning_rate": 8.170256873686799e-06, "loss": 1.0337, "step": 2219 }, { "epoch": 0.3031130529765156, "grad_norm": 10.21430778503418, "learning_rate": 8.168546706560231e-06, "loss": 0.9679, "step": 2220 }, { "epoch": 0.3032495903877662, "grad_norm": 10.265007019042969, "learning_rate": 8.166835919772972e-06, "loss": 1.0502, "step": 2221 }, { "epoch": 0.3033861277990169, "grad_norm": 4.69850492477417, "learning_rate": 8.165124513659595e-06, "loss": 1.1021, "step": 2222 }, { "epoch": 0.3035226652102676, "grad_norm": 8.40811824798584, "learning_rate": 8.163412488554796e-06, "loss": 1.222, "step": 2223 }, { "epoch": 0.3036592026215183, "grad_norm": 9.434910774230957, "learning_rate": 8.161699844793384e-06, "loss": 1.1466, "step": 2224 }, { "epoch": 0.303795740032769, "grad_norm": 5.53639030456543, "learning_rate": 8.1599865827103e-06, "loss": 1.0315, "step": 2225 }, { "epoch": 0.3039322774440197, "grad_norm": 7.351401329040527, "learning_rate": 8.158272702640596e-06, "loss": 0.998, "step": 2226 }, { "epoch": 0.3040688148552703, "grad_norm": 14.765775680541992, "learning_rate": 8.15655820491945e-06, "loss": 1.0729, "step": 2227 }, { "epoch": 0.304205352266521, "grad_norm": 7.466633319854736, "learning_rate": 8.154843089882159e-06, "loss": 0.9156, "step": 2228 }, { "epoch": 0.3043418896777717, "grad_norm": 5.5799174308776855, "learning_rate": 8.153127357864143e-06, "loss": 0.9886, "step": 2229 }, { "epoch": 0.3044784270890224, "grad_norm": 11.225911140441895, "learning_rate": 8.151411009200942e-06, "loss": 0.9667, "step": 2230 }, { "epoch": 0.3046149645002731, "grad_norm": 8.289618492126465, "learning_rate": 8.149694044228216e-06, "loss": 1.1009, "step": 2231 }, { "epoch": 0.3047515019115238, "grad_norm": 6.795470714569092, "learning_rate": 8.147976463281741e-06, "loss": 1.0178, "step": 2232 }, { "epoch": 0.3048880393227744, "grad_norm": 6.466415882110596, "learning_rate": 8.146258266697424e-06, "loss": 0.9722, "step": 2233 }, { "epoch": 0.3050245767340251, "grad_norm": 6.251246929168701, "learning_rate": 8.144539454811283e-06, "loss": 1.0049, "step": 2234 }, { "epoch": 0.3051611141452758, "grad_norm": 6.860640048980713, "learning_rate": 8.142820027959457e-06, "loss": 0.9777, "step": 2235 }, { "epoch": 0.3052976515565265, "grad_norm": 6.854926109313965, "learning_rate": 8.141099986478212e-06, "loss": 0.9071, "step": 2236 }, { "epoch": 0.3054341889677772, "grad_norm": 6.7642340660095215, "learning_rate": 8.139379330703929e-06, "loss": 1.0136, "step": 2237 }, { "epoch": 0.3055707263790279, "grad_norm": 9.484954833984375, "learning_rate": 8.137658060973108e-06, "loss": 1.1417, "step": 2238 }, { "epoch": 0.3057072637902785, "grad_norm": 5.6228790283203125, "learning_rate": 8.135936177622375e-06, "loss": 1.139, "step": 2239 }, { "epoch": 0.3058438012015292, "grad_norm": 8.877588272094727, "learning_rate": 8.134213680988468e-06, "loss": 1.0061, "step": 2240 }, { "epoch": 0.3059803386127799, "grad_norm": 7.3777875900268555, "learning_rate": 8.132490571408253e-06, "loss": 1.0839, "step": 2241 }, { "epoch": 0.3061168760240306, "grad_norm": 6.717514991760254, "learning_rate": 8.130766849218708e-06, "loss": 1.0229, "step": 2242 }, { "epoch": 0.3062534134352813, "grad_norm": 9.181564331054688, "learning_rate": 8.12904251475694e-06, "loss": 1.0228, "step": 2243 }, { "epoch": 0.3063899508465319, "grad_norm": 7.798862457275391, "learning_rate": 8.127317568360164e-06, "loss": 0.947, "step": 2244 }, { "epoch": 0.3065264882577826, "grad_norm": 5.793305397033691, "learning_rate": 8.125592010365729e-06, "loss": 1.1158, "step": 2245 }, { "epoch": 0.3066630256690333, "grad_norm": 5.375869274139404, "learning_rate": 8.123865841111089e-06, "loss": 1.1124, "step": 2246 }, { "epoch": 0.306799563080284, "grad_norm": 7.19471549987793, "learning_rate": 8.122139060933827e-06, "loss": 1.1445, "step": 2247 }, { "epoch": 0.3069361004915347, "grad_norm": 6.554323673248291, "learning_rate": 8.120411670171642e-06, "loss": 1.0318, "step": 2248 }, { "epoch": 0.3070726379027854, "grad_norm": 6.8486504554748535, "learning_rate": 8.118683669162357e-06, "loss": 1.1054, "step": 2249 }, { "epoch": 0.307209175314036, "grad_norm": 8.917920112609863, "learning_rate": 8.116955058243905e-06, "loss": 0.9936, "step": 2250 }, { "epoch": 0.3073457127252867, "grad_norm": 22.152000427246094, "learning_rate": 8.115225837754348e-06, "loss": 1.1498, "step": 2251 }, { "epoch": 0.3074822501365374, "grad_norm": 6.8146162033081055, "learning_rate": 8.113496008031863e-06, "loss": 0.9478, "step": 2252 }, { "epoch": 0.3076187875477881, "grad_norm": 5.731667518615723, "learning_rate": 8.111765569414745e-06, "loss": 1.003, "step": 2253 }, { "epoch": 0.3077553249590388, "grad_norm": 6.869544982910156, "learning_rate": 8.110034522241408e-06, "loss": 0.989, "step": 2254 }, { "epoch": 0.3078918623702895, "grad_norm": 6.764086723327637, "learning_rate": 8.108302866850388e-06, "loss": 1.026, "step": 2255 }, { "epoch": 0.3080283997815401, "grad_norm": 9.13305377960205, "learning_rate": 8.106570603580338e-06, "loss": 1.0043, "step": 2256 }, { "epoch": 0.3081649371927908, "grad_norm": 5.537441730499268, "learning_rate": 8.10483773277003e-06, "loss": 1.061, "step": 2257 }, { "epoch": 0.3083014746040415, "grad_norm": 8.55178451538086, "learning_rate": 8.103104254758355e-06, "loss": 0.9382, "step": 2258 }, { "epoch": 0.3084380120152922, "grad_norm": 10.15920639038086, "learning_rate": 8.101370169884326e-06, "loss": 1.0847, "step": 2259 }, { "epoch": 0.3085745494265429, "grad_norm": 7.126048564910889, "learning_rate": 8.099635478487064e-06, "loss": 0.9968, "step": 2260 }, { "epoch": 0.3087110868377936, "grad_norm": 8.278207778930664, "learning_rate": 8.097900180905822e-06, "loss": 1.0686, "step": 2261 }, { "epoch": 0.3088476242490442, "grad_norm": 6.110464572906494, "learning_rate": 8.096164277479966e-06, "loss": 0.9157, "step": 2262 }, { "epoch": 0.3089841616602949, "grad_norm": 8.877846717834473, "learning_rate": 8.094427768548975e-06, "loss": 1.0761, "step": 2263 }, { "epoch": 0.3091206990715456, "grad_norm": 6.224483966827393, "learning_rate": 8.092690654452457e-06, "loss": 0.8841, "step": 2264 }, { "epoch": 0.3092572364827963, "grad_norm": 6.451091289520264, "learning_rate": 8.09095293553013e-06, "loss": 0.9272, "step": 2265 }, { "epoch": 0.309393773894047, "grad_norm": 7.371923446655273, "learning_rate": 8.089214612121833e-06, "loss": 0.9413, "step": 2266 }, { "epoch": 0.3095303113052977, "grad_norm": 7.857203483581543, "learning_rate": 8.087475684567523e-06, "loss": 1.1152, "step": 2267 }, { "epoch": 0.3096668487165483, "grad_norm": 8.200831413269043, "learning_rate": 8.085736153207277e-06, "loss": 0.9745, "step": 2268 }, { "epoch": 0.309803386127799, "grad_norm": 5.865789413452148, "learning_rate": 8.083996018381287e-06, "loss": 0.9781, "step": 2269 }, { "epoch": 0.3099399235390497, "grad_norm": 6.402328968048096, "learning_rate": 8.082255280429867e-06, "loss": 0.8684, "step": 2270 }, { "epoch": 0.3100764609503004, "grad_norm": 14.338569641113281, "learning_rate": 8.080513939693445e-06, "loss": 0.9955, "step": 2271 }, { "epoch": 0.3102129983615511, "grad_norm": 7.557278633117676, "learning_rate": 8.078771996512567e-06, "loss": 1.0293, "step": 2272 }, { "epoch": 0.3103495357728017, "grad_norm": 8.555275917053223, "learning_rate": 8.077029451227899e-06, "loss": 1.1007, "step": 2273 }, { "epoch": 0.3104860731840524, "grad_norm": 6.400882720947266, "learning_rate": 8.075286304180226e-06, "loss": 0.9597, "step": 2274 }, { "epoch": 0.3106226105953031, "grad_norm": 8.131205558776855, "learning_rate": 8.073542555710447e-06, "loss": 0.9817, "step": 2275 }, { "epoch": 0.3107591480065538, "grad_norm": 7.536998748779297, "learning_rate": 8.07179820615958e-06, "loss": 0.9637, "step": 2276 }, { "epoch": 0.3108956854178045, "grad_norm": 6.429612636566162, "learning_rate": 8.070053255868762e-06, "loss": 0.9048, "step": 2277 }, { "epoch": 0.3110322228290552, "grad_norm": 11.20511245727539, "learning_rate": 8.068307705179246e-06, "loss": 1.1239, "step": 2278 }, { "epoch": 0.3111687602403058, "grad_norm": 6.366639137268066, "learning_rate": 8.066561554432402e-06, "loss": 1.016, "step": 2279 }, { "epoch": 0.3113052976515565, "grad_norm": 8.1592378616333, "learning_rate": 8.064814803969719e-06, "loss": 1.0007, "step": 2280 }, { "epoch": 0.3114418350628072, "grad_norm": 11.753116607666016, "learning_rate": 8.063067454132802e-06, "loss": 0.8552, "step": 2281 }, { "epoch": 0.3115783724740579, "grad_norm": 14.113029479980469, "learning_rate": 8.061319505263373e-06, "loss": 1.0512, "step": 2282 }, { "epoch": 0.3117149098853086, "grad_norm": 6.399592399597168, "learning_rate": 8.059570957703273e-06, "loss": 0.9946, "step": 2283 }, { "epoch": 0.3118514472965593, "grad_norm": 7.2612738609313965, "learning_rate": 8.057821811794457e-06, "loss": 0.8407, "step": 2284 }, { "epoch": 0.3119879847078099, "grad_norm": 5.963510990142822, "learning_rate": 8.056072067879002e-06, "loss": 0.916, "step": 2285 }, { "epoch": 0.3121245221190606, "grad_norm": 6.293915271759033, "learning_rate": 8.054321726299094e-06, "loss": 1.0208, "step": 2286 }, { "epoch": 0.3122610595303113, "grad_norm": 7.9803361892700195, "learning_rate": 8.052570787397045e-06, "loss": 1.0336, "step": 2287 }, { "epoch": 0.312397596941562, "grad_norm": 7.996541500091553, "learning_rate": 8.050819251515278e-06, "loss": 1.0221, "step": 2288 }, { "epoch": 0.3125341343528127, "grad_norm": 9.510490417480469, "learning_rate": 8.049067118996334e-06, "loss": 0.9338, "step": 2289 }, { "epoch": 0.3126706717640634, "grad_norm": 9.65205192565918, "learning_rate": 8.047314390182873e-06, "loss": 1.1174, "step": 2290 }, { "epoch": 0.312807209175314, "grad_norm": 5.8015522956848145, "learning_rate": 8.045561065417664e-06, "loss": 0.9588, "step": 2291 }, { "epoch": 0.3129437465865647, "grad_norm": 5.9073166847229, "learning_rate": 8.043807145043604e-06, "loss": 0.9544, "step": 2292 }, { "epoch": 0.3130802839978154, "grad_norm": 6.671653747558594, "learning_rate": 8.042052629403697e-06, "loss": 0.9891, "step": 2293 }, { "epoch": 0.3132168214090661, "grad_norm": 16.982067108154297, "learning_rate": 8.04029751884107e-06, "loss": 0.923, "step": 2294 }, { "epoch": 0.3133533588203168, "grad_norm": 7.95582389831543, "learning_rate": 8.038541813698962e-06, "loss": 1.1159, "step": 2295 }, { "epoch": 0.3134898962315674, "grad_norm": 10.12961196899414, "learning_rate": 8.036785514320726e-06, "loss": 1.0497, "step": 2296 }, { "epoch": 0.3136264336428181, "grad_norm": 10.363160133361816, "learning_rate": 8.03502862104984e-06, "loss": 1.0465, "step": 2297 }, { "epoch": 0.3137629710540688, "grad_norm": 12.494643211364746, "learning_rate": 8.033271134229893e-06, "loss": 0.9954, "step": 2298 }, { "epoch": 0.3138995084653195, "grad_norm": 15.55480670928955, "learning_rate": 8.031513054204584e-06, "loss": 0.9111, "step": 2299 }, { "epoch": 0.3140360458765702, "grad_norm": 21.02598762512207, "learning_rate": 8.029754381317741e-06, "loss": 1.0773, "step": 2300 }, { "epoch": 0.3141725832878209, "grad_norm": 9.819600105285645, "learning_rate": 8.027995115913299e-06, "loss": 1.0637, "step": 2301 }, { "epoch": 0.3143091206990715, "grad_norm": 31.658058166503906, "learning_rate": 8.026235258335307e-06, "loss": 0.9795, "step": 2302 }, { "epoch": 0.3144456581103222, "grad_norm": 11.453607559204102, "learning_rate": 8.024474808927938e-06, "loss": 0.8848, "step": 2303 }, { "epoch": 0.3145821955215729, "grad_norm": 12.474359512329102, "learning_rate": 8.022713768035477e-06, "loss": 0.9905, "step": 2304 }, { "epoch": 0.3147187329328236, "grad_norm": 16.096654891967773, "learning_rate": 8.02095213600232e-06, "loss": 0.8643, "step": 2305 }, { "epoch": 0.3148552703440743, "grad_norm": 10.696681022644043, "learning_rate": 8.019189913172986e-06, "loss": 0.9876, "step": 2306 }, { "epoch": 0.314991807755325, "grad_norm": 7.298346996307373, "learning_rate": 8.017427099892104e-06, "loss": 1.0297, "step": 2307 }, { "epoch": 0.3151283451665756, "grad_norm": 12.819706916809082, "learning_rate": 8.015663696504424e-06, "loss": 0.9143, "step": 2308 }, { "epoch": 0.3152648825778263, "grad_norm": 15.279690742492676, "learning_rate": 8.013899703354804e-06, "loss": 1.0899, "step": 2309 }, { "epoch": 0.315401419989077, "grad_norm": 22.490869522094727, "learning_rate": 8.012135120788223e-06, "loss": 1.0132, "step": 2310 }, { "epoch": 0.3155379574003277, "grad_norm": 36.83845901489258, "learning_rate": 8.010369949149776e-06, "loss": 1.1057, "step": 2311 }, { "epoch": 0.3156744948115784, "grad_norm": 13.461432456970215, "learning_rate": 8.00860418878467e-06, "loss": 0.9657, "step": 2312 }, { "epoch": 0.3158110322228291, "grad_norm": 8.906269073486328, "learning_rate": 8.006837840038224e-06, "loss": 0.9651, "step": 2313 }, { "epoch": 0.3159475696340797, "grad_norm": 9.195684432983398, "learning_rate": 8.005070903255883e-06, "loss": 1.2068, "step": 2314 }, { "epoch": 0.3160841070453304, "grad_norm": 9.427898406982422, "learning_rate": 8.003303378783193e-06, "loss": 0.9276, "step": 2315 }, { "epoch": 0.3162206444565811, "grad_norm": 8.633831024169922, "learning_rate": 8.001535266965829e-06, "loss": 1.0636, "step": 2316 }, { "epoch": 0.3163571818678318, "grad_norm": 7.550094127655029, "learning_rate": 7.99976656814957e-06, "loss": 1.094, "step": 2317 }, { "epoch": 0.3164937192790825, "grad_norm": 66.04327392578125, "learning_rate": 7.997997282680313e-06, "loss": 0.9907, "step": 2318 }, { "epoch": 0.3166302566903332, "grad_norm": 12.063087463378906, "learning_rate": 7.996227410904072e-06, "loss": 0.8624, "step": 2319 }, { "epoch": 0.3167667941015838, "grad_norm": 33.64094161987305, "learning_rate": 7.994456953166974e-06, "loss": 1.0393, "step": 2320 }, { "epoch": 0.3169033315128345, "grad_norm": 6.0893120765686035, "learning_rate": 7.99268590981526e-06, "loss": 1.064, "step": 2321 }, { "epoch": 0.3170398689240852, "grad_norm": 8.47836685180664, "learning_rate": 7.990914281195287e-06, "loss": 1.0295, "step": 2322 }, { "epoch": 0.3171764063353359, "grad_norm": 8.533049583435059, "learning_rate": 7.989142067653525e-06, "loss": 1.0499, "step": 2323 }, { "epoch": 0.3173129437465866, "grad_norm": 6.811569690704346, "learning_rate": 7.987369269536563e-06, "loss": 0.9554, "step": 2324 }, { "epoch": 0.3174494811578372, "grad_norm": 7.408911228179932, "learning_rate": 7.985595887191094e-06, "loss": 1.0255, "step": 2325 }, { "epoch": 0.3175860185690879, "grad_norm": 5.961880207061768, "learning_rate": 7.983821920963935e-06, "loss": 1.0435, "step": 2326 }, { "epoch": 0.3177225559803386, "grad_norm": 8.553694725036621, "learning_rate": 7.982047371202016e-06, "loss": 0.9624, "step": 2327 }, { "epoch": 0.3178590933915893, "grad_norm": 12.211134910583496, "learning_rate": 7.980272238252373e-06, "loss": 1.0287, "step": 2328 }, { "epoch": 0.31799563080284, "grad_norm": 7.167140960693359, "learning_rate": 7.978496522462167e-06, "loss": 0.9743, "step": 2329 }, { "epoch": 0.3181321682140907, "grad_norm": 5.8687543869018555, "learning_rate": 7.976720224178666e-06, "loss": 1.0122, "step": 2330 }, { "epoch": 0.3182687056253413, "grad_norm": 7.217541694641113, "learning_rate": 7.974943343749258e-06, "loss": 0.9691, "step": 2331 }, { "epoch": 0.318405243036592, "grad_norm": 5.828256130218506, "learning_rate": 7.973165881521435e-06, "loss": 0.9703, "step": 2332 }, { "epoch": 0.3185417804478427, "grad_norm": 6.42768669128418, "learning_rate": 7.97138783784281e-06, "loss": 1.0071, "step": 2333 }, { "epoch": 0.3186783178590934, "grad_norm": 5.205018997192383, "learning_rate": 7.96960921306111e-06, "loss": 0.9057, "step": 2334 }, { "epoch": 0.3188148552703441, "grad_norm": 8.263187408447266, "learning_rate": 7.967830007524174e-06, "loss": 0.9941, "step": 2335 }, { "epoch": 0.3189513926815948, "grad_norm": 4.803721904754639, "learning_rate": 7.966050221579951e-06, "loss": 1.0149, "step": 2336 }, { "epoch": 0.3190879300928454, "grad_norm": 19.77713394165039, "learning_rate": 7.964269855576512e-06, "loss": 0.8448, "step": 2337 }, { "epoch": 0.3192244675040961, "grad_norm": 5.737462043762207, "learning_rate": 7.962488909862034e-06, "loss": 0.8237, "step": 2338 }, { "epoch": 0.3193610049153468, "grad_norm": 7.955691337585449, "learning_rate": 7.960707384784809e-06, "loss": 0.9584, "step": 2339 }, { "epoch": 0.3194975423265975, "grad_norm": 7.848254203796387, "learning_rate": 7.958925280693243e-06, "loss": 0.9501, "step": 2340 }, { "epoch": 0.3196340797378482, "grad_norm": 8.027532577514648, "learning_rate": 7.957142597935859e-06, "loss": 1.0941, "step": 2341 }, { "epoch": 0.3197706171490989, "grad_norm": 8.694445610046387, "learning_rate": 7.955359336861284e-06, "loss": 1.0415, "step": 2342 }, { "epoch": 0.3199071545603495, "grad_norm": 7.9696946144104, "learning_rate": 7.95357549781827e-06, "loss": 0.9843, "step": 2343 }, { "epoch": 0.3200436919716002, "grad_norm": 7.321447849273682, "learning_rate": 7.95179108115567e-06, "loss": 0.8419, "step": 2344 }, { "epoch": 0.3201802293828509, "grad_norm": 8.48193073272705, "learning_rate": 7.950006087222457e-06, "loss": 0.7937, "step": 2345 }, { "epoch": 0.3203167667941016, "grad_norm": 10.316190719604492, "learning_rate": 7.948220516367718e-06, "loss": 1.1084, "step": 2346 }, { "epoch": 0.3204533042053523, "grad_norm": 7.9369797706604, "learning_rate": 7.946434368940645e-06, "loss": 1.0003, "step": 2347 }, { "epoch": 0.3205898416166029, "grad_norm": 18.82132339477539, "learning_rate": 7.944647645290555e-06, "loss": 1.0718, "step": 2348 }, { "epoch": 0.3207263790278536, "grad_norm": 7.463727951049805, "learning_rate": 7.942860345766867e-06, "loss": 1.0116, "step": 2349 }, { "epoch": 0.3208629164391043, "grad_norm": 6.498861789703369, "learning_rate": 7.941072470719117e-06, "loss": 1.1018, "step": 2350 }, { "epoch": 0.320999453850355, "grad_norm": 7.7288498878479, "learning_rate": 7.93928402049695e-06, "loss": 1.1424, "step": 2351 }, { "epoch": 0.3211359912616057, "grad_norm": 6.030069828033447, "learning_rate": 7.937494995450135e-06, "loss": 1.0132, "step": 2352 }, { "epoch": 0.3212725286728564, "grad_norm": 9.789459228515625, "learning_rate": 7.935705395928534e-06, "loss": 1.0885, "step": 2353 }, { "epoch": 0.321409066084107, "grad_norm": 9.859770774841309, "learning_rate": 7.93391522228214e-06, "loss": 0.8843, "step": 2354 }, { "epoch": 0.3215456034953577, "grad_norm": 6.853219509124756, "learning_rate": 7.932124474861047e-06, "loss": 0.9021, "step": 2355 }, { "epoch": 0.3216821409066084, "grad_norm": 7.465497016906738, "learning_rate": 7.930333154015467e-06, "loss": 0.9725, "step": 2356 }, { "epoch": 0.3218186783178591, "grad_norm": 8.346929550170898, "learning_rate": 7.928541260095716e-06, "loss": 0.9565, "step": 2357 }, { "epoch": 0.3219552157291098, "grad_norm": 5.932945251464844, "learning_rate": 7.926748793452236e-06, "loss": 1.0795, "step": 2358 }, { "epoch": 0.3220917531403605, "grad_norm": 6.087224960327148, "learning_rate": 7.92495575443557e-06, "loss": 1.1323, "step": 2359 }, { "epoch": 0.3222282905516111, "grad_norm": 7.876840591430664, "learning_rate": 7.923162143396373e-06, "loss": 1.0604, "step": 2360 }, { "epoch": 0.3223648279628618, "grad_norm": 8.6082124710083, "learning_rate": 7.921367960685417e-06, "loss": 1.1179, "step": 2361 }, { "epoch": 0.3225013653741125, "grad_norm": 8.613194465637207, "learning_rate": 7.919573206653583e-06, "loss": 0.9477, "step": 2362 }, { "epoch": 0.3226379027853632, "grad_norm": 7.620638847351074, "learning_rate": 7.917777881651864e-06, "loss": 1.0277, "step": 2363 }, { "epoch": 0.3227744401966139, "grad_norm": 7.083987236022949, "learning_rate": 7.915981986031367e-06, "loss": 0.9465, "step": 2364 }, { "epoch": 0.3229109776078646, "grad_norm": 17.57803726196289, "learning_rate": 7.914185520143307e-06, "loss": 0.8811, "step": 2365 }, { "epoch": 0.3230475150191152, "grad_norm": 12.122920989990234, "learning_rate": 7.912388484339012e-06, "loss": 0.898, "step": 2366 }, { "epoch": 0.3231840524303659, "grad_norm": 7.15609073638916, "learning_rate": 7.91059087896992e-06, "loss": 0.9266, "step": 2367 }, { "epoch": 0.3233205898416166, "grad_norm": 7.23237419128418, "learning_rate": 7.908792704387584e-06, "loss": 0.9737, "step": 2368 }, { "epoch": 0.3234571272528673, "grad_norm": 9.003904342651367, "learning_rate": 7.906993960943668e-06, "loss": 0.9552, "step": 2369 }, { "epoch": 0.323593664664118, "grad_norm": 10.190507888793945, "learning_rate": 7.90519464898994e-06, "loss": 1.0233, "step": 2370 }, { "epoch": 0.3237302020753687, "grad_norm": 7.171715259552002, "learning_rate": 7.90339476887829e-06, "loss": 1.0345, "step": 2371 }, { "epoch": 0.3238667394866193, "grad_norm": 9.368165016174316, "learning_rate": 7.901594320960709e-06, "loss": 0.9948, "step": 2372 }, { "epoch": 0.32400327689787, "grad_norm": 6.032124042510986, "learning_rate": 7.899793305589307e-06, "loss": 1.0412, "step": 2373 }, { "epoch": 0.3241398143091207, "grad_norm": 8.002161026000977, "learning_rate": 7.897991723116301e-06, "loss": 1.0683, "step": 2374 }, { "epoch": 0.3242763517203714, "grad_norm": 13.020090103149414, "learning_rate": 7.89618957389402e-06, "loss": 0.964, "step": 2375 }, { "epoch": 0.3244128891316221, "grad_norm": 6.176939010620117, "learning_rate": 7.894386858274903e-06, "loss": 1.1582, "step": 2376 }, { "epoch": 0.3245494265428727, "grad_norm": 8.045456886291504, "learning_rate": 7.8925835766115e-06, "loss": 1.1166, "step": 2377 }, { "epoch": 0.3246859639541234, "grad_norm": 6.814077854156494, "learning_rate": 7.890779729256472e-06, "loss": 0.9211, "step": 2378 }, { "epoch": 0.3248225013653741, "grad_norm": 16.96174430847168, "learning_rate": 7.888975316562593e-06, "loss": 0.9847, "step": 2379 }, { "epoch": 0.3249590387766248, "grad_norm": 12.232844352722168, "learning_rate": 7.887170338882742e-06, "loss": 1.1496, "step": 2380 }, { "epoch": 0.3250955761878755, "grad_norm": 6.82275915145874, "learning_rate": 7.885364796569911e-06, "loss": 0.9276, "step": 2381 }, { "epoch": 0.3252321135991262, "grad_norm": 8.179162979125977, "learning_rate": 7.883558689977208e-06, "loss": 1.0576, "step": 2382 }, { "epoch": 0.3253686510103768, "grad_norm": 9.982139587402344, "learning_rate": 7.881752019457841e-06, "loss": 1.0726, "step": 2383 }, { "epoch": 0.3255051884216275, "grad_norm": 6.216320991516113, "learning_rate": 7.879944785365136e-06, "loss": 0.8721, "step": 2384 }, { "epoch": 0.3256417258328782, "grad_norm": 5.83876371383667, "learning_rate": 7.878136988052529e-06, "loss": 1.0054, "step": 2385 }, { "epoch": 0.3257782632441289, "grad_norm": 6.839359283447266, "learning_rate": 7.876328627873561e-06, "loss": 1.0575, "step": 2386 }, { "epoch": 0.3259148006553796, "grad_norm": 5.817805767059326, "learning_rate": 7.874519705181887e-06, "loss": 1.0552, "step": 2387 }, { "epoch": 0.3260513380666303, "grad_norm": 6.565758228302002, "learning_rate": 7.872710220331271e-06, "loss": 1.0007, "step": 2388 }, { "epoch": 0.3261878754778809, "grad_norm": 9.512808799743652, "learning_rate": 7.87090017367559e-06, "loss": 0.9967, "step": 2389 }, { "epoch": 0.3263244128891316, "grad_norm": 8.30820369720459, "learning_rate": 7.869089565568822e-06, "loss": 0.9077, "step": 2390 }, { "epoch": 0.3264609503003823, "grad_norm": 8.724743843078613, "learning_rate": 7.867278396365064e-06, "loss": 0.9603, "step": 2391 }, { "epoch": 0.326597487711633, "grad_norm": 7.272090435028076, "learning_rate": 7.865466666418523e-06, "loss": 1.0804, "step": 2392 }, { "epoch": 0.3267340251228837, "grad_norm": 7.4595947265625, "learning_rate": 7.863654376083504e-06, "loss": 0.9888, "step": 2393 }, { "epoch": 0.3268705625341344, "grad_norm": 6.734989643096924, "learning_rate": 7.86184152571444e-06, "loss": 0.9613, "step": 2394 }, { "epoch": 0.327007099945385, "grad_norm": 11.489699363708496, "learning_rate": 7.860028115665855e-06, "loss": 0.9814, "step": 2395 }, { "epoch": 0.3271436373566357, "grad_norm": 6.982486248016357, "learning_rate": 7.858214146292394e-06, "loss": 0.954, "step": 2396 }, { "epoch": 0.3272801747678864, "grad_norm": 6.933880805969238, "learning_rate": 7.85639961794881e-06, "loss": 1.1011, "step": 2397 }, { "epoch": 0.3274167121791371, "grad_norm": 7.8203277587890625, "learning_rate": 7.854584530989958e-06, "loss": 0.9799, "step": 2398 }, { "epoch": 0.3275532495903878, "grad_norm": 8.487804412841797, "learning_rate": 7.852768885770811e-06, "loss": 0.9761, "step": 2399 }, { "epoch": 0.3276897870016384, "grad_norm": 5.944777965545654, "learning_rate": 7.850952682646451e-06, "loss": 1.0939, "step": 2400 }, { "epoch": 0.3278263244128891, "grad_norm": 5.241250038146973, "learning_rate": 7.84913592197206e-06, "loss": 1.0022, "step": 2401 }, { "epoch": 0.3279628618241398, "grad_norm": 5.928726673126221, "learning_rate": 7.847318604102939e-06, "loss": 1.0374, "step": 2402 }, { "epoch": 0.3280993992353905, "grad_norm": 6.08997917175293, "learning_rate": 7.845500729394492e-06, "loss": 1.0478, "step": 2403 }, { "epoch": 0.3282359366466412, "grad_norm": 9.80699348449707, "learning_rate": 7.843682298202235e-06, "loss": 1.1917, "step": 2404 }, { "epoch": 0.3283724740578919, "grad_norm": 9.511377334594727, "learning_rate": 7.841863310881792e-06, "loss": 1.0358, "step": 2405 }, { "epoch": 0.3285090114691425, "grad_norm": 6.991609573364258, "learning_rate": 7.840043767788894e-06, "loss": 1.0651, "step": 2406 }, { "epoch": 0.3286455488803932, "grad_norm": 8.19912338256836, "learning_rate": 7.838223669279384e-06, "loss": 0.9502, "step": 2407 }, { "epoch": 0.3287820862916439, "grad_norm": 7.827434062957764, "learning_rate": 7.836403015709208e-06, "loss": 1.1172, "step": 2408 }, { "epoch": 0.3289186237028946, "grad_norm": 5.553248405456543, "learning_rate": 7.834581807434429e-06, "loss": 1.0735, "step": 2409 }, { "epoch": 0.3290551611141453, "grad_norm": 5.371456623077393, "learning_rate": 7.832760044811212e-06, "loss": 1.0373, "step": 2410 }, { "epoch": 0.329191698525396, "grad_norm": 5.89978551864624, "learning_rate": 7.830937728195832e-06, "loss": 0.9399, "step": 2411 }, { "epoch": 0.3293282359366466, "grad_norm": 4.907297134399414, "learning_rate": 7.829114857944672e-06, "loss": 0.9831, "step": 2412 }, { "epoch": 0.3294647733478973, "grad_norm": 30.221717834472656, "learning_rate": 7.827291434414226e-06, "loss": 0.97, "step": 2413 }, { "epoch": 0.329601310759148, "grad_norm": 8.265303611755371, "learning_rate": 7.825467457961092e-06, "loss": 0.8163, "step": 2414 }, { "epoch": 0.3297378481703987, "grad_norm": 16.95279884338379, "learning_rate": 7.82364292894198e-06, "loss": 1.0375, "step": 2415 }, { "epoch": 0.3298743855816494, "grad_norm": 7.310180187225342, "learning_rate": 7.821817847713703e-06, "loss": 0.9839, "step": 2416 }, { "epoch": 0.3300109229929001, "grad_norm": 5.804140090942383, "learning_rate": 7.819992214633188e-06, "loss": 1.0572, "step": 2417 }, { "epoch": 0.3301474604041507, "grad_norm": 8.68124771118164, "learning_rate": 7.818166030057465e-06, "loss": 0.8138, "step": 2418 }, { "epoch": 0.3302839978154014, "grad_norm": 9.474159240722656, "learning_rate": 7.816339294343675e-06, "loss": 0.9889, "step": 2419 }, { "epoch": 0.3304205352266521, "grad_norm": 8.274681091308594, "learning_rate": 7.814512007849069e-06, "loss": 1.0817, "step": 2420 }, { "epoch": 0.3305570726379028, "grad_norm": 10.462203025817871, "learning_rate": 7.812684170930998e-06, "loss": 1.1128, "step": 2421 }, { "epoch": 0.3306936100491535, "grad_norm": 6.0273261070251465, "learning_rate": 7.810855783946926e-06, "loss": 1.1309, "step": 2422 }, { "epoch": 0.3308301474604042, "grad_norm": 12.840606689453125, "learning_rate": 7.809026847254426e-06, "loss": 1.0585, "step": 2423 }, { "epoch": 0.3309666848716548, "grad_norm": 8.438720703125, "learning_rate": 7.807197361211173e-06, "loss": 1.0483, "step": 2424 }, { "epoch": 0.3311032222829055, "grad_norm": 8.18388557434082, "learning_rate": 7.805367326174957e-06, "loss": 1.1867, "step": 2425 }, { "epoch": 0.3312397596941562, "grad_norm": 10.287498474121094, "learning_rate": 7.803536742503667e-06, "loss": 0.9394, "step": 2426 }, { "epoch": 0.3313762971054069, "grad_norm": 7.462771892547607, "learning_rate": 7.801705610555305e-06, "loss": 1.0312, "step": 2427 }, { "epoch": 0.3315128345166576, "grad_norm": 9.378389358520508, "learning_rate": 7.799873930687979e-06, "loss": 1.0973, "step": 2428 }, { "epoch": 0.3316493719279082, "grad_norm": 6.543299674987793, "learning_rate": 7.798041703259902e-06, "loss": 1.0042, "step": 2429 }, { "epoch": 0.3317859093391589, "grad_norm": 6.396316051483154, "learning_rate": 7.796208928629396e-06, "loss": 1.0874, "step": 2430 }, { "epoch": 0.3319224467504096, "grad_norm": 11.778031349182129, "learning_rate": 7.794375607154891e-06, "loss": 1.1003, "step": 2431 }, { "epoch": 0.3320589841616603, "grad_norm": 5.050673484802246, "learning_rate": 7.792541739194924e-06, "loss": 1.0617, "step": 2432 }, { "epoch": 0.332195521572911, "grad_norm": 6.660134792327881, "learning_rate": 7.790707325108134e-06, "loss": 0.996, "step": 2433 }, { "epoch": 0.3323320589841617, "grad_norm": 6.287275791168213, "learning_rate": 7.788872365253273e-06, "loss": 1.0064, "step": 2434 }, { "epoch": 0.3324685963954123, "grad_norm": 8.955965995788574, "learning_rate": 7.787036859989197e-06, "loss": 1.1369, "step": 2435 }, { "epoch": 0.332605133806663, "grad_norm": 6.311997413635254, "learning_rate": 7.785200809674869e-06, "loss": 1.0805, "step": 2436 }, { "epoch": 0.3327416712179137, "grad_norm": 11.285745620727539, "learning_rate": 7.783364214669356e-06, "loss": 0.9714, "step": 2437 }, { "epoch": 0.3328782086291644, "grad_norm": 6.354136943817139, "learning_rate": 7.781527075331836e-06, "loss": 1.0123, "step": 2438 }, { "epoch": 0.3330147460404151, "grad_norm": 30.376087188720703, "learning_rate": 7.779689392021592e-06, "loss": 0.9388, "step": 2439 }, { "epoch": 0.3331512834516658, "grad_norm": 8.90121078491211, "learning_rate": 7.777851165098012e-06, "loss": 1.047, "step": 2440 }, { "epoch": 0.3332878208629164, "grad_norm": 8.417560577392578, "learning_rate": 7.77601239492059e-06, "loss": 0.8724, "step": 2441 }, { "epoch": 0.3334243582741671, "grad_norm": 6.985981464385986, "learning_rate": 7.77417308184893e-06, "loss": 0.9286, "step": 2442 }, { "epoch": 0.3335608956854178, "grad_norm": 6.7494635581970215, "learning_rate": 7.772333226242736e-06, "loss": 0.9302, "step": 2443 }, { "epoch": 0.3336974330966685, "grad_norm": 7.838825225830078, "learning_rate": 7.770492828461824e-06, "loss": 1.029, "step": 2444 }, { "epoch": 0.3338339705079192, "grad_norm": 6.484485149383545, "learning_rate": 7.768651888866112e-06, "loss": 0.8932, "step": 2445 }, { "epoch": 0.3339705079191699, "grad_norm": 6.151934623718262, "learning_rate": 7.766810407815628e-06, "loss": 1.0012, "step": 2446 }, { "epoch": 0.3341070453304205, "grad_norm": 7.9484710693359375, "learning_rate": 7.764968385670501e-06, "loss": 1.0131, "step": 2447 }, { "epoch": 0.3342435827416712, "grad_norm": 8.68266487121582, "learning_rate": 7.76312582279097e-06, "loss": 1.0176, "step": 2448 }, { "epoch": 0.3343801201529219, "grad_norm": 19.535470962524414, "learning_rate": 7.761282719537377e-06, "loss": 0.9715, "step": 2449 }, { "epoch": 0.3345166575641726, "grad_norm": 7.959295749664307, "learning_rate": 7.759439076270173e-06, "loss": 1.0137, "step": 2450 }, { "epoch": 0.3346531949754233, "grad_norm": 6.212903022766113, "learning_rate": 7.757594893349909e-06, "loss": 0.8467, "step": 2451 }, { "epoch": 0.3347897323866739, "grad_norm": 8.40816879272461, "learning_rate": 7.755750171137245e-06, "loss": 1.0207, "step": 2452 }, { "epoch": 0.3349262697979246, "grad_norm": 6.926759719848633, "learning_rate": 7.753904909992948e-06, "loss": 1.0626, "step": 2453 }, { "epoch": 0.3350628072091753, "grad_norm": 5.9420390129089355, "learning_rate": 7.75205911027789e-06, "loss": 1.0034, "step": 2454 }, { "epoch": 0.335199344620426, "grad_norm": 14.149885177612305, "learning_rate": 7.750212772353046e-06, "loss": 1.1213, "step": 2455 }, { "epoch": 0.3353358820316767, "grad_norm": 7.540037631988525, "learning_rate": 7.748365896579497e-06, "loss": 1.0048, "step": 2456 }, { "epoch": 0.3354724194429274, "grad_norm": 8.660224914550781, "learning_rate": 7.746518483318426e-06, "loss": 0.9733, "step": 2457 }, { "epoch": 0.335608956854178, "grad_norm": 6.574601173400879, "learning_rate": 7.744670532931131e-06, "loss": 0.9032, "step": 2458 }, { "epoch": 0.3357454942654287, "grad_norm": 7.087325572967529, "learning_rate": 7.742822045779005e-06, "loss": 0.9575, "step": 2459 }, { "epoch": 0.3358820316766794, "grad_norm": 6.9509172439575195, "learning_rate": 7.74097302222355e-06, "loss": 0.7675, "step": 2460 }, { "epoch": 0.3360185690879301, "grad_norm": 4.7987284660339355, "learning_rate": 7.739123462626371e-06, "loss": 1.0624, "step": 2461 }, { "epoch": 0.3361551064991808, "grad_norm": 10.100525856018066, "learning_rate": 7.737273367349184e-06, "loss": 0.9307, "step": 2462 }, { "epoch": 0.3362916439104315, "grad_norm": 11.608060836791992, "learning_rate": 7.735422736753802e-06, "loss": 1.0881, "step": 2463 }, { "epoch": 0.3364281813216821, "grad_norm": 6.23374605178833, "learning_rate": 7.733571571202146e-06, "loss": 0.9643, "step": 2464 }, { "epoch": 0.3365647187329328, "grad_norm": 9.883541107177734, "learning_rate": 7.73171987105624e-06, "loss": 1.0151, "step": 2465 }, { "epoch": 0.3367012561441835, "grad_norm": 6.364637851715088, "learning_rate": 7.729867636678217e-06, "loss": 0.9171, "step": 2466 }, { "epoch": 0.3368377935554342, "grad_norm": 5.94024133682251, "learning_rate": 7.728014868430309e-06, "loss": 0.9113, "step": 2467 }, { "epoch": 0.3369743309666849, "grad_norm": 6.016084671020508, "learning_rate": 7.726161566674856e-06, "loss": 0.9525, "step": 2468 }, { "epoch": 0.3371108683779356, "grad_norm": 10.518181800842285, "learning_rate": 7.724307731774303e-06, "loss": 0.9895, "step": 2469 }, { "epoch": 0.3372474057891862, "grad_norm": 8.893365859985352, "learning_rate": 7.722453364091194e-06, "loss": 0.9709, "step": 2470 }, { "epoch": 0.3373839432004369, "grad_norm": 8.60717487335205, "learning_rate": 7.720598463988182e-06, "loss": 0.9949, "step": 2471 }, { "epoch": 0.3375204806116876, "grad_norm": 7.214848518371582, "learning_rate": 7.718743031828022e-06, "loss": 1.0046, "step": 2472 }, { "epoch": 0.3376570180229383, "grad_norm": 8.1732816696167, "learning_rate": 7.716887067973578e-06, "loss": 0.9784, "step": 2473 }, { "epoch": 0.337793555434189, "grad_norm": 9.664238929748535, "learning_rate": 7.71503057278781e-06, "loss": 0.9867, "step": 2474 }, { "epoch": 0.3379300928454397, "grad_norm": 5.696558952331543, "learning_rate": 7.713173546633784e-06, "loss": 1.0921, "step": 2475 }, { "epoch": 0.3380666302566903, "grad_norm": 5.018639087677002, "learning_rate": 7.711315989874677e-06, "loss": 1.0633, "step": 2476 }, { "epoch": 0.338203167667941, "grad_norm": 9.81341552734375, "learning_rate": 7.709457902873759e-06, "loss": 0.9495, "step": 2477 }, { "epoch": 0.3383397050791917, "grad_norm": 8.115705490112305, "learning_rate": 7.707599285994415e-06, "loss": 0.9989, "step": 2478 }, { "epoch": 0.3384762424904424, "grad_norm": 6.396160125732422, "learning_rate": 7.70574013960012e-06, "loss": 0.9626, "step": 2479 }, { "epoch": 0.3386127799016931, "grad_norm": 14.885916709899902, "learning_rate": 7.703880464054465e-06, "loss": 0.9983, "step": 2480 }, { "epoch": 0.3387493173129437, "grad_norm": 11.23185920715332, "learning_rate": 7.702020259721142e-06, "loss": 1.079, "step": 2481 }, { "epoch": 0.3388858547241944, "grad_norm": 8.35418701171875, "learning_rate": 7.700159526963937e-06, "loss": 0.9421, "step": 2482 }, { "epoch": 0.3390223921354451, "grad_norm": 9.393770217895508, "learning_rate": 7.698298266146753e-06, "loss": 0.9863, "step": 2483 }, { "epoch": 0.3391589295466958, "grad_norm": 6.655555725097656, "learning_rate": 7.696436477633588e-06, "loss": 1.0375, "step": 2484 }, { "epoch": 0.3392954669579465, "grad_norm": 4.51173734664917, "learning_rate": 7.694574161788543e-06, "loss": 0.8696, "step": 2485 }, { "epoch": 0.3394320043691972, "grad_norm": 7.170347690582275, "learning_rate": 7.692711318975827e-06, "loss": 1.0278, "step": 2486 }, { "epoch": 0.3395685417804478, "grad_norm": 7.522616386413574, "learning_rate": 7.690847949559746e-06, "loss": 0.8974, "step": 2487 }, { "epoch": 0.3397050791916985, "grad_norm": 7.158158302307129, "learning_rate": 7.688984053904713e-06, "loss": 1.0924, "step": 2488 }, { "epoch": 0.3398416166029492, "grad_norm": 7.551046371459961, "learning_rate": 7.687119632375245e-06, "loss": 0.9781, "step": 2489 }, { "epoch": 0.3399781540141999, "grad_norm": 6.8274760246276855, "learning_rate": 7.685254685335958e-06, "loss": 1.0336, "step": 2490 }, { "epoch": 0.3401146914254506, "grad_norm": 5.095887184143066, "learning_rate": 7.683389213151574e-06, "loss": 0.8146, "step": 2491 }, { "epoch": 0.3402512288367013, "grad_norm": 7.2189435958862305, "learning_rate": 7.681523216186912e-06, "loss": 1.048, "step": 2492 }, { "epoch": 0.3403877662479519, "grad_norm": 6.414522171020508, "learning_rate": 7.679656694806905e-06, "loss": 0.9823, "step": 2493 }, { "epoch": 0.3405243036592026, "grad_norm": 5.491333484649658, "learning_rate": 7.677789649376577e-06, "loss": 1.0094, "step": 2494 }, { "epoch": 0.3406608410704533, "grad_norm": 6.273065090179443, "learning_rate": 7.67592208026106e-06, "loss": 0.8539, "step": 2495 }, { "epoch": 0.340797378481704, "grad_norm": 7.40518856048584, "learning_rate": 7.674053987825588e-06, "loss": 1.0756, "step": 2496 }, { "epoch": 0.3409339158929547, "grad_norm": 5.831241607666016, "learning_rate": 7.672185372435494e-06, "loss": 1.1272, "step": 2497 }, { "epoch": 0.3410704533042054, "grad_norm": 6.451030731201172, "learning_rate": 7.670316234456222e-06, "loss": 1.0198, "step": 2498 }, { "epoch": 0.341206990715456, "grad_norm": 10.394025802612305, "learning_rate": 7.668446574253303e-06, "loss": 0.9095, "step": 2499 }, { "epoch": 0.3413435281267067, "grad_norm": 8.111407279968262, "learning_rate": 7.666576392192389e-06, "loss": 0.9763, "step": 2500 }, { "epoch": 0.3414800655379574, "grad_norm": 7.894060134887695, "learning_rate": 7.664705688639218e-06, "loss": 0.993, "step": 2501 }, { "epoch": 0.3416166029492081, "grad_norm": 6.763756275177002, "learning_rate": 7.66283446395964e-06, "loss": 1.0703, "step": 2502 }, { "epoch": 0.3417531403604588, "grad_norm": 7.419184684753418, "learning_rate": 7.660962718519602e-06, "loss": 0.9635, "step": 2503 }, { "epoch": 0.3418896777717094, "grad_norm": 6.592366695404053, "learning_rate": 7.659090452685155e-06, "loss": 1.1224, "step": 2504 }, { "epoch": 0.3420262151829601, "grad_norm": 6.636856555938721, "learning_rate": 7.657217666822447e-06, "loss": 1.015, "step": 2505 }, { "epoch": 0.3421627525942108, "grad_norm": 5.853488445281982, "learning_rate": 7.655344361297736e-06, "loss": 1.0375, "step": 2506 }, { "epoch": 0.3422992900054615, "grad_norm": 5.759478569030762, "learning_rate": 7.653470536477377e-06, "loss": 0.9557, "step": 2507 }, { "epoch": 0.3424358274167122, "grad_norm": 7.939628601074219, "learning_rate": 7.651596192727826e-06, "loss": 1.0488, "step": 2508 }, { "epoch": 0.3425723648279629, "grad_norm": 6.834944725036621, "learning_rate": 7.64972133041564e-06, "loss": 0.8855, "step": 2509 }, { "epoch": 0.3427089022392135, "grad_norm": 7.003521919250488, "learning_rate": 7.64784594990748e-06, "loss": 1.0228, "step": 2510 }, { "epoch": 0.3428454396504642, "grad_norm": 7.969171047210693, "learning_rate": 7.645970051570109e-06, "loss": 1.074, "step": 2511 }, { "epoch": 0.3429819770617149, "grad_norm": 5.378918647766113, "learning_rate": 7.644093635770385e-06, "loss": 0.797, "step": 2512 }, { "epoch": 0.3431185144729656, "grad_norm": 6.451955795288086, "learning_rate": 7.642216702875273e-06, "loss": 0.9664, "step": 2513 }, { "epoch": 0.3432550518842163, "grad_norm": 5.73826265335083, "learning_rate": 7.64033925325184e-06, "loss": 1.0862, "step": 2514 }, { "epoch": 0.343391589295467, "grad_norm": 7.202812671661377, "learning_rate": 7.638461287267251e-06, "loss": 1.1305, "step": 2515 }, { "epoch": 0.3435281267067176, "grad_norm": 9.027494430541992, "learning_rate": 7.636582805288771e-06, "loss": 0.9009, "step": 2516 }, { "epoch": 0.3436646641179683, "grad_norm": 5.498115539550781, "learning_rate": 7.634703807683769e-06, "loss": 0.8853, "step": 2517 }, { "epoch": 0.343801201529219, "grad_norm": 6.644163131713867, "learning_rate": 7.632824294819712e-06, "loss": 0.954, "step": 2518 }, { "epoch": 0.3439377389404697, "grad_norm": 7.346612930297852, "learning_rate": 7.630944267064171e-06, "loss": 1.0361, "step": 2519 }, { "epoch": 0.3440742763517204, "grad_norm": 6.011077404022217, "learning_rate": 7.629063724784815e-06, "loss": 1.0582, "step": 2520 }, { "epoch": 0.3442108137629711, "grad_norm": 6.186016082763672, "learning_rate": 7.6271826683494145e-06, "loss": 0.9133, "step": 2521 }, { "epoch": 0.3443473511742217, "grad_norm": 7.202065467834473, "learning_rate": 7.62530109812584e-06, "loss": 0.9894, "step": 2522 }, { "epoch": 0.3444838885854724, "grad_norm": 7.527756690979004, "learning_rate": 7.6234190144820645e-06, "loss": 1.0488, "step": 2523 }, { "epoch": 0.3446204259967231, "grad_norm": 6.054769515991211, "learning_rate": 7.621536417786159e-06, "loss": 1.0552, "step": 2524 }, { "epoch": 0.3447569634079738, "grad_norm": 6.3508148193359375, "learning_rate": 7.619653308406296e-06, "loss": 0.9004, "step": 2525 }, { "epoch": 0.3448935008192245, "grad_norm": 6.279088973999023, "learning_rate": 7.61776968671075e-06, "loss": 1.0403, "step": 2526 }, { "epoch": 0.3450300382304752, "grad_norm": 10.733922958374023, "learning_rate": 7.6158855530678895e-06, "loss": 1.0822, "step": 2527 }, { "epoch": 0.3451665756417258, "grad_norm": 5.7853899002075195, "learning_rate": 7.6140009078461904e-06, "loss": 0.9336, "step": 2528 }, { "epoch": 0.3453031130529765, "grad_norm": 9.495141983032227, "learning_rate": 7.612115751414227e-06, "loss": 1.1336, "step": 2529 }, { "epoch": 0.3454396504642272, "grad_norm": 6.987584114074707, "learning_rate": 7.6102300841406675e-06, "loss": 1.02, "step": 2530 }, { "epoch": 0.3455761878754779, "grad_norm": 6.817749977111816, "learning_rate": 7.608343906394289e-06, "loss": 0.8925, "step": 2531 }, { "epoch": 0.3457127252867286, "grad_norm": 5.499338626861572, "learning_rate": 7.606457218543961e-06, "loss": 0.9209, "step": 2532 }, { "epoch": 0.3458492626979792, "grad_norm": 6.149592399597168, "learning_rate": 7.604570020958659e-06, "loss": 0.9354, "step": 2533 }, { "epoch": 0.3459858001092299, "grad_norm": 7.384950160980225, "learning_rate": 7.602682314007455e-06, "loss": 0.9388, "step": 2534 }, { "epoch": 0.3461223375204806, "grad_norm": 6.796448707580566, "learning_rate": 7.6007940980595165e-06, "loss": 1.1138, "step": 2535 }, { "epoch": 0.3462588749317313, "grad_norm": 11.222159385681152, "learning_rate": 7.59890537348412e-06, "loss": 1.0401, "step": 2536 }, { "epoch": 0.346395412342982, "grad_norm": 7.540632247924805, "learning_rate": 7.597016140650631e-06, "loss": 1.0658, "step": 2537 }, { "epoch": 0.3465319497542327, "grad_norm": 10.741629600524902, "learning_rate": 7.595126399928524e-06, "loss": 1.0633, "step": 2538 }, { "epoch": 0.3466684871654833, "grad_norm": 7.247337818145752, "learning_rate": 7.593236151687366e-06, "loss": 0.9324, "step": 2539 }, { "epoch": 0.346805024576734, "grad_norm": 7.459514617919922, "learning_rate": 7.5913453962968296e-06, "loss": 0.9077, "step": 2540 }, { "epoch": 0.3469415619879847, "grad_norm": 6.544034481048584, "learning_rate": 7.589454134126677e-06, "loss": 0.9482, "step": 2541 }, { "epoch": 0.3470780993992354, "grad_norm": 6.870880126953125, "learning_rate": 7.5875623655467764e-06, "loss": 1.1662, "step": 2542 }, { "epoch": 0.3472146368104861, "grad_norm": 6.624366760253906, "learning_rate": 7.585670090927098e-06, "loss": 1.0291, "step": 2543 }, { "epoch": 0.3473511742217368, "grad_norm": 9.016044616699219, "learning_rate": 7.583777310637703e-06, "loss": 0.9793, "step": 2544 }, { "epoch": 0.3474877116329874, "grad_norm": 8.452563285827637, "learning_rate": 7.581884025048755e-06, "loss": 0.9041, "step": 2545 }, { "epoch": 0.3476242490442381, "grad_norm": 6.192422389984131, "learning_rate": 7.579990234530521e-06, "loss": 1.0546, "step": 2546 }, { "epoch": 0.3477607864554888, "grad_norm": 7.229462623596191, "learning_rate": 7.578095939453358e-06, "loss": 1.0073, "step": 2547 }, { "epoch": 0.3478973238667395, "grad_norm": 9.221029281616211, "learning_rate": 7.576201140187727e-06, "loss": 1.0279, "step": 2548 }, { "epoch": 0.3480338612779902, "grad_norm": 5.8431901931762695, "learning_rate": 7.574305837104188e-06, "loss": 0.9233, "step": 2549 }, { "epoch": 0.3481703986892409, "grad_norm": 6.1318559646606445, "learning_rate": 7.572410030573396e-06, "loss": 0.9449, "step": 2550 }, { "epoch": 0.3483069361004915, "grad_norm": 7.587551116943359, "learning_rate": 7.570513720966108e-06, "loss": 0.9941, "step": 2551 }, { "epoch": 0.3484434735117422, "grad_norm": 7.921436309814453, "learning_rate": 7.568616908653182e-06, "loss": 1.027, "step": 2552 }, { "epoch": 0.3485800109229929, "grad_norm": 6.661026954650879, "learning_rate": 7.566719594005564e-06, "loss": 1.0545, "step": 2553 }, { "epoch": 0.3487165483342436, "grad_norm": 8.391724586486816, "learning_rate": 7.564821777394307e-06, "loss": 0.7982, "step": 2554 }, { "epoch": 0.3488530857454943, "grad_norm": 10.669842720031738, "learning_rate": 7.562923459190561e-06, "loss": 1.1079, "step": 2555 }, { "epoch": 0.3489896231567449, "grad_norm": 78.10692596435547, "learning_rate": 7.5610246397655715e-06, "loss": 0.8137, "step": 2556 }, { "epoch": 0.3491261605679956, "grad_norm": 7.15521240234375, "learning_rate": 7.5591253194906856e-06, "loss": 0.9, "step": 2557 }, { "epoch": 0.3492626979792463, "grad_norm": 5.631014823913574, "learning_rate": 7.557225498737344e-06, "loss": 0.8949, "step": 2558 }, { "epoch": 0.349399235390497, "grad_norm": 9.567737579345703, "learning_rate": 7.555325177877086e-06, "loss": 1.139, "step": 2559 }, { "epoch": 0.3495357728017477, "grad_norm": 8.211870193481445, "learning_rate": 7.553424357281556e-06, "loss": 0.9149, "step": 2560 }, { "epoch": 0.3496723102129984, "grad_norm": 7.235172271728516, "learning_rate": 7.551523037322483e-06, "loss": 0.9791, "step": 2561 }, { "epoch": 0.349808847624249, "grad_norm": 6.127881050109863, "learning_rate": 7.549621218371706e-06, "loss": 1.1554, "step": 2562 }, { "epoch": 0.3499453850354997, "grad_norm": 7.261926651000977, "learning_rate": 7.547718900801155e-06, "loss": 1.0531, "step": 2563 }, { "epoch": 0.3500819224467504, "grad_norm": 6.9740824699401855, "learning_rate": 7.54581608498286e-06, "loss": 0.9247, "step": 2564 }, { "epoch": 0.3502184598580011, "grad_norm": 5.931049346923828, "learning_rate": 7.5439127712889455e-06, "loss": 0.9874, "step": 2565 }, { "epoch": 0.3503549972692518, "grad_norm": 9.371994972229004, "learning_rate": 7.542008960091636e-06, "loss": 0.9863, "step": 2566 }, { "epoch": 0.3504915346805025, "grad_norm": 8.761381149291992, "learning_rate": 7.540104651763255e-06, "loss": 1.0119, "step": 2567 }, { "epoch": 0.3506280720917531, "grad_norm": 12.430277824401855, "learning_rate": 7.5381998466762175e-06, "loss": 1.0083, "step": 2568 }, { "epoch": 0.3507646095030038, "grad_norm": 5.887928009033203, "learning_rate": 7.53629454520304e-06, "loss": 0.9365, "step": 2569 }, { "epoch": 0.3509011469142545, "grad_norm": 6.478551387786865, "learning_rate": 7.534388747716334e-06, "loss": 0.8755, "step": 2570 }, { "epoch": 0.3510376843255052, "grad_norm": 9.455679893493652, "learning_rate": 7.5324824545888145e-06, "loss": 0.9656, "step": 2571 }, { "epoch": 0.3511742217367559, "grad_norm": 8.162528038024902, "learning_rate": 7.530575666193283e-06, "loss": 1.021, "step": 2572 }, { "epoch": 0.3513107591480066, "grad_norm": 7.0807294845581055, "learning_rate": 7.528668382902642e-06, "loss": 0.885, "step": 2573 }, { "epoch": 0.3514472965592572, "grad_norm": 6.339987754821777, "learning_rate": 7.526760605089895e-06, "loss": 0.9734, "step": 2574 }, { "epoch": 0.3515838339705079, "grad_norm": 17.877159118652344, "learning_rate": 7.524852333128139e-06, "loss": 1.0695, "step": 2575 }, { "epoch": 0.3517203713817586, "grad_norm": 6.187435150146484, "learning_rate": 7.522943567390564e-06, "loss": 0.9487, "step": 2576 }, { "epoch": 0.3518569087930093, "grad_norm": 7.5790605545043945, "learning_rate": 7.521034308250463e-06, "loss": 0.8684, "step": 2577 }, { "epoch": 0.35199344620426, "grad_norm": 5.057061672210693, "learning_rate": 7.519124556081223e-06, "loss": 0.905, "step": 2578 }, { "epoch": 0.3521299836155106, "grad_norm": 6.283287048339844, "learning_rate": 7.517214311256325e-06, "loss": 1.0073, "step": 2579 }, { "epoch": 0.3522665210267613, "grad_norm": 5.592400550842285, "learning_rate": 7.515303574149348e-06, "loss": 1.0344, "step": 2580 }, { "epoch": 0.352403058438012, "grad_norm": 5.906271934509277, "learning_rate": 7.5133923451339705e-06, "loss": 0.9967, "step": 2581 }, { "epoch": 0.3525395958492627, "grad_norm": 12.456277847290039, "learning_rate": 7.511480624583962e-06, "loss": 0.9218, "step": 2582 }, { "epoch": 0.3526761332605134, "grad_norm": 5.510994911193848, "learning_rate": 7.509568412873191e-06, "loss": 1.1712, "step": 2583 }, { "epoch": 0.3528126706717641, "grad_norm": 6.572891712188721, "learning_rate": 7.507655710375622e-06, "loss": 1.1457, "step": 2584 }, { "epoch": 0.3529492080830147, "grad_norm": 5.899937152862549, "learning_rate": 7.505742517465315e-06, "loss": 0.9392, "step": 2585 }, { "epoch": 0.3530857454942654, "grad_norm": 5.246906757354736, "learning_rate": 7.503828834516424e-06, "loss": 1.0316, "step": 2586 }, { "epoch": 0.3532222829055161, "grad_norm": 7.944967269897461, "learning_rate": 7.501914661903205e-06, "loss": 0.9036, "step": 2587 }, { "epoch": 0.3533588203167668, "grad_norm": 8.311027526855469, "learning_rate": 7.500000000000001e-06, "loss": 1.1082, "step": 2588 }, { "epoch": 0.3534953577280175, "grad_norm": 7.009125709533691, "learning_rate": 7.498084849181257e-06, "loss": 0.985, "step": 2589 }, { "epoch": 0.3536318951392682, "grad_norm": 5.317987442016602, "learning_rate": 7.496169209821511e-06, "loss": 0.9585, "step": 2590 }, { "epoch": 0.3537684325505188, "grad_norm": 6.293808460235596, "learning_rate": 7.494253082295399e-06, "loss": 0.8781, "step": 2591 }, { "epoch": 0.3539049699617695, "grad_norm": 7.195811748504639, "learning_rate": 7.49233646697765e-06, "loss": 1.0444, "step": 2592 }, { "epoch": 0.3540415073730202, "grad_norm": 14.437593460083008, "learning_rate": 7.490419364243089e-06, "loss": 0.9758, "step": 2593 }, { "epoch": 0.3541780447842709, "grad_norm": 8.204058647155762, "learning_rate": 7.488501774466636e-06, "loss": 1.0114, "step": 2594 }, { "epoch": 0.3543145821955216, "grad_norm": 6.901512622833252, "learning_rate": 7.4865836980233065e-06, "loss": 1.1201, "step": 2595 }, { "epoch": 0.3544511196067723, "grad_norm": 6.163673400878906, "learning_rate": 7.484665135288214e-06, "loss": 1.0089, "step": 2596 }, { "epoch": 0.3545876570180229, "grad_norm": 6.351656436920166, "learning_rate": 7.482746086636563e-06, "loss": 0.9144, "step": 2597 }, { "epoch": 0.3547241944292736, "grad_norm": 7.194968223571777, "learning_rate": 7.480826552443654e-06, "loss": 0.889, "step": 2598 }, { "epoch": 0.3548607318405243, "grad_norm": 6.635287284851074, "learning_rate": 7.478906533084884e-06, "loss": 1.0151, "step": 2599 }, { "epoch": 0.354997269251775, "grad_norm": 6.993618011474609, "learning_rate": 7.476986028935742e-06, "loss": 0.9788, "step": 2600 }, { "epoch": 0.3551338066630257, "grad_norm": 5.670629024505615, "learning_rate": 7.475065040371816e-06, "loss": 0.9338, "step": 2601 }, { "epoch": 0.3552703440742764, "grad_norm": 12.250588417053223, "learning_rate": 7.4731435677687856e-06, "loss": 0.9955, "step": 2602 }, { "epoch": 0.355406881485527, "grad_norm": 8.624670028686523, "learning_rate": 7.471221611502426e-06, "loss": 0.9546, "step": 2603 }, { "epoch": 0.3555434188967777, "grad_norm": 6.815893650054932, "learning_rate": 7.469299171948608e-06, "loss": 0.8751, "step": 2604 }, { "epoch": 0.3556799563080284, "grad_norm": 7.865246772766113, "learning_rate": 7.467376249483292e-06, "loss": 1.0284, "step": 2605 }, { "epoch": 0.3558164937192791, "grad_norm": 6.464951515197754, "learning_rate": 7.4654528444825415e-06, "loss": 1.0667, "step": 2606 }, { "epoch": 0.3559530311305298, "grad_norm": 7.6376519203186035, "learning_rate": 7.463528957322507e-06, "loss": 0.8997, "step": 2607 }, { "epoch": 0.3560895685417804, "grad_norm": 7.021105766296387, "learning_rate": 7.461604588379436e-06, "loss": 0.9016, "step": 2608 }, { "epoch": 0.3562261059530311, "grad_norm": 6.252638816833496, "learning_rate": 7.459679738029671e-06, "loss": 0.9234, "step": 2609 }, { "epoch": 0.3563626433642818, "grad_norm": 8.187629699707031, "learning_rate": 7.457754406649645e-06, "loss": 0.8405, "step": 2610 }, { "epoch": 0.3564991807755325, "grad_norm": 7.390028476715088, "learning_rate": 7.455828594615891e-06, "loss": 0.9177, "step": 2611 }, { "epoch": 0.3566357181867832, "grad_norm": 6.066237449645996, "learning_rate": 7.453902302305032e-06, "loss": 0.8679, "step": 2612 }, { "epoch": 0.3567722555980339, "grad_norm": 5.0087690353393555, "learning_rate": 7.451975530093784e-06, "loss": 1.084, "step": 2613 }, { "epoch": 0.3569087930092845, "grad_norm": 7.324928283691406, "learning_rate": 7.450048278358961e-06, "loss": 1.014, "step": 2614 }, { "epoch": 0.3570453304205352, "grad_norm": 6.281166076660156, "learning_rate": 7.4481205474774665e-06, "loss": 1.0374, "step": 2615 }, { "epoch": 0.3571818678317859, "grad_norm": 5.794029712677002, "learning_rate": 7.4461923378263006e-06, "loss": 1.0726, "step": 2616 }, { "epoch": 0.3573184052430366, "grad_norm": 5.495463848114014, "learning_rate": 7.444263649782555e-06, "loss": 0.9362, "step": 2617 }, { "epoch": 0.3574549426542873, "grad_norm": 8.403618812561035, "learning_rate": 7.4423344837234175e-06, "loss": 0.9016, "step": 2618 }, { "epoch": 0.357591480065538, "grad_norm": 6.73108434677124, "learning_rate": 7.440404840026165e-06, "loss": 1.0136, "step": 2619 }, { "epoch": 0.3577280174767886, "grad_norm": 8.979459762573242, "learning_rate": 7.438474719068174e-06, "loss": 0.996, "step": 2620 }, { "epoch": 0.3578645548880393, "grad_norm": 6.773321628570557, "learning_rate": 7.4365441212269076e-06, "loss": 1.0566, "step": 2621 }, { "epoch": 0.35800109229929, "grad_norm": 7.685944080352783, "learning_rate": 7.434613046879928e-06, "loss": 0.9594, "step": 2622 }, { "epoch": 0.3581376297105407, "grad_norm": 8.016117095947266, "learning_rate": 7.4326814964048875e-06, "loss": 0.9052, "step": 2623 }, { "epoch": 0.3582741671217914, "grad_norm": 8.61867904663086, "learning_rate": 7.430749470179533e-06, "loss": 0.9058, "step": 2624 }, { "epoch": 0.3584107045330421, "grad_norm": 10.874334335327148, "learning_rate": 7.428816968581701e-06, "loss": 1.1271, "step": 2625 }, { "epoch": 0.3585472419442927, "grad_norm": 8.837441444396973, "learning_rate": 7.426883991989325e-06, "loss": 0.8967, "step": 2626 }, { "epoch": 0.3586837793555434, "grad_norm": 8.535466194152832, "learning_rate": 7.424950540780431e-06, "loss": 1.0492, "step": 2627 }, { "epoch": 0.3588203167667941, "grad_norm": 14.411042213439941, "learning_rate": 7.423016615333135e-06, "loss": 1.0663, "step": 2628 }, { "epoch": 0.3589568541780448, "grad_norm": 7.40132999420166, "learning_rate": 7.421082216025648e-06, "loss": 0.9405, "step": 2629 }, { "epoch": 0.3590933915892955, "grad_norm": 7.149345874786377, "learning_rate": 7.419147343236274e-06, "loss": 1.1292, "step": 2630 }, { "epoch": 0.3592299290005461, "grad_norm": 6.9629011154174805, "learning_rate": 7.417211997343408e-06, "loss": 0.966, "step": 2631 }, { "epoch": 0.3593664664117968, "grad_norm": 11.2891206741333, "learning_rate": 7.415276178725538e-06, "loss": 0.9224, "step": 2632 }, { "epoch": 0.3595030038230475, "grad_norm": 7.436181545257568, "learning_rate": 7.413339887761244e-06, "loss": 0.9148, "step": 2633 }, { "epoch": 0.3596395412342982, "grad_norm": 14.618906021118164, "learning_rate": 7.411403124829202e-06, "loss": 1.0267, "step": 2634 }, { "epoch": 0.3597760786455489, "grad_norm": 15.461623191833496, "learning_rate": 7.409465890308174e-06, "loss": 1.0369, "step": 2635 }, { "epoch": 0.3599126160567996, "grad_norm": 8.125219345092773, "learning_rate": 7.4075281845770196e-06, "loss": 0.865, "step": 2636 }, { "epoch": 0.3600491534680502, "grad_norm": 8.401772499084473, "learning_rate": 7.40559000801469e-06, "loss": 1.2223, "step": 2637 }, { "epoch": 0.3601856908793009, "grad_norm": 8.743890762329102, "learning_rate": 7.403651361000224e-06, "loss": 1.0775, "step": 2638 }, { "epoch": 0.3603222282905516, "grad_norm": 23.476848602294922, "learning_rate": 7.401712243912755e-06, "loss": 1.0703, "step": 2639 }, { "epoch": 0.3604587657018023, "grad_norm": 11.75575065612793, "learning_rate": 7.399772657131512e-06, "loss": 0.9274, "step": 2640 }, { "epoch": 0.360595303113053, "grad_norm": 8.743170738220215, "learning_rate": 7.3978326010358106e-06, "loss": 0.9872, "step": 2641 }, { "epoch": 0.3607318405243037, "grad_norm": 30.055301666259766, "learning_rate": 7.395892076005061e-06, "loss": 1.0507, "step": 2642 }, { "epoch": 0.3608683779355543, "grad_norm": 12.55659008026123, "learning_rate": 7.393951082418765e-06, "loss": 1.0452, "step": 2643 }, { "epoch": 0.361004915346805, "grad_norm": 12.33983039855957, "learning_rate": 7.392009620656513e-06, "loss": 0.9083, "step": 2644 }, { "epoch": 0.3611414527580557, "grad_norm": 8.225340843200684, "learning_rate": 7.390067691097991e-06, "loss": 1.0437, "step": 2645 }, { "epoch": 0.3612779901693064, "grad_norm": 11.515753746032715, "learning_rate": 7.388125294122974e-06, "loss": 0.9401, "step": 2646 }, { "epoch": 0.3614145275805571, "grad_norm": 5.888173580169678, "learning_rate": 7.386182430111333e-06, "loss": 0.9745, "step": 2647 }, { "epoch": 0.3615510649918078, "grad_norm": 6.498175621032715, "learning_rate": 7.38423909944302e-06, "loss": 1.0039, "step": 2648 }, { "epoch": 0.3616876024030584, "grad_norm": 7.541513442993164, "learning_rate": 7.382295302498089e-06, "loss": 1.0095, "step": 2649 }, { "epoch": 0.3618241398143091, "grad_norm": 6.532063961029053, "learning_rate": 7.380351039656681e-06, "loss": 1.0715, "step": 2650 }, { "epoch": 0.3619606772255598, "grad_norm": 8.58214282989502, "learning_rate": 7.3784063112990264e-06, "loss": 1.0039, "step": 2651 }, { "epoch": 0.3620972146368105, "grad_norm": 5.824608325958252, "learning_rate": 7.37646111780545e-06, "loss": 0.905, "step": 2652 }, { "epoch": 0.3622337520480612, "grad_norm": 5.625657081604004, "learning_rate": 7.374515459556365e-06, "loss": 0.994, "step": 2653 }, { "epoch": 0.3623702894593119, "grad_norm": 5.451712131500244, "learning_rate": 7.372569336932277e-06, "loss": 0.9636, "step": 2654 }, { "epoch": 0.3625068268705625, "grad_norm": 4.934938430786133, "learning_rate": 7.370622750313781e-06, "loss": 1.0615, "step": 2655 }, { "epoch": 0.3626433642818132, "grad_norm": 11.308733940124512, "learning_rate": 7.368675700081565e-06, "loss": 0.8884, "step": 2656 }, { "epoch": 0.3627799016930639, "grad_norm": 12.65997314453125, "learning_rate": 7.366728186616405e-06, "loss": 0.949, "step": 2657 }, { "epoch": 0.3629164391043146, "grad_norm": 13.098514556884766, "learning_rate": 7.36478021029917e-06, "loss": 0.882, "step": 2658 }, { "epoch": 0.3630529765155653, "grad_norm": 8.643465042114258, "learning_rate": 7.362831771510815e-06, "loss": 1.0054, "step": 2659 }, { "epoch": 0.3631895139268159, "grad_norm": 11.17980670928955, "learning_rate": 7.360882870632393e-06, "loss": 0.8579, "step": 2660 }, { "epoch": 0.3633260513380666, "grad_norm": 7.040585517883301, "learning_rate": 7.358933508045041e-06, "loss": 0.9604, "step": 2661 }, { "epoch": 0.3634625887493173, "grad_norm": 7.028109550476074, "learning_rate": 7.3569836841299905e-06, "loss": 0.9436, "step": 2662 }, { "epoch": 0.363599126160568, "grad_norm": 5.450358867645264, "learning_rate": 7.355033399268557e-06, "loss": 1.0283, "step": 2663 }, { "epoch": 0.3637356635718187, "grad_norm": 7.867987155914307, "learning_rate": 7.353082653842154e-06, "loss": 1.0454, "step": 2664 }, { "epoch": 0.3638722009830694, "grad_norm": 6.701916217803955, "learning_rate": 7.3511314482322805e-06, "loss": 0.9809, "step": 2665 }, { "epoch": 0.36400873839432, "grad_norm": 7.9462971687316895, "learning_rate": 7.3491797828205255e-06, "loss": 0.9701, "step": 2666 }, { "epoch": 0.3641452758055707, "grad_norm": 7.042110443115234, "learning_rate": 7.347227657988572e-06, "loss": 0.9676, "step": 2667 }, { "epoch": 0.3642818132168214, "grad_norm": 7.408754348754883, "learning_rate": 7.3452750741181855e-06, "loss": 0.9675, "step": 2668 }, { "epoch": 0.3644183506280721, "grad_norm": 6.3924431800842285, "learning_rate": 7.343322031591229e-06, "loss": 1.0222, "step": 2669 }, { "epoch": 0.3645548880393228, "grad_norm": 6.674106121063232, "learning_rate": 7.341368530789648e-06, "loss": 0.9427, "step": 2670 }, { "epoch": 0.3646914254505735, "grad_norm": 6.759739398956299, "learning_rate": 7.339414572095482e-06, "loss": 0.9644, "step": 2671 }, { "epoch": 0.3648279628618241, "grad_norm": 9.244881629943848, "learning_rate": 7.337460155890862e-06, "loss": 0.9634, "step": 2672 }, { "epoch": 0.3649645002730748, "grad_norm": 7.201916694641113, "learning_rate": 7.335505282558004e-06, "loss": 0.9956, "step": 2673 }, { "epoch": 0.3651010376843255, "grad_norm": 7.759944915771484, "learning_rate": 7.333549952479215e-06, "loss": 0.9523, "step": 2674 }, { "epoch": 0.3652375750955762, "grad_norm": 6.611997127532959, "learning_rate": 7.331594166036893e-06, "loss": 0.9839, "step": 2675 }, { "epoch": 0.3653741125068269, "grad_norm": 6.374034881591797, "learning_rate": 7.329637923613522e-06, "loss": 0.8686, "step": 2676 }, { "epoch": 0.3655106499180776, "grad_norm": 10.349431037902832, "learning_rate": 7.327681225591677e-06, "loss": 1.0072, "step": 2677 }, { "epoch": 0.3656471873293282, "grad_norm": 6.203850746154785, "learning_rate": 7.3257240723540234e-06, "loss": 1.1209, "step": 2678 }, { "epoch": 0.3657837247405789, "grad_norm": 6.343413829803467, "learning_rate": 7.323766464283314e-06, "loss": 1.1467, "step": 2679 }, { "epoch": 0.3659202621518296, "grad_norm": 7.681708812713623, "learning_rate": 7.321808401762389e-06, "loss": 0.937, "step": 2680 }, { "epoch": 0.3660567995630803, "grad_norm": 19.493968963623047, "learning_rate": 7.319849885174181e-06, "loss": 0.9967, "step": 2681 }, { "epoch": 0.366193336974331, "grad_norm": 6.9773664474487305, "learning_rate": 7.317890914901709e-06, "loss": 0.8881, "step": 2682 }, { "epoch": 0.3663298743855816, "grad_norm": 6.501558780670166, "learning_rate": 7.315931491328081e-06, "loss": 0.9929, "step": 2683 }, { "epoch": 0.3664664117968323, "grad_norm": 6.032958507537842, "learning_rate": 7.313971614836496e-06, "loss": 0.9487, "step": 2684 }, { "epoch": 0.366602949208083, "grad_norm": 22.391307830810547, "learning_rate": 7.312011285810237e-06, "loss": 1.0067, "step": 2685 }, { "epoch": 0.3667394866193337, "grad_norm": 5.811027526855469, "learning_rate": 7.31005050463268e-06, "loss": 0.9909, "step": 2686 }, { "epoch": 0.3668760240305844, "grad_norm": 8.318014144897461, "learning_rate": 7.308089271687286e-06, "loss": 0.9295, "step": 2687 }, { "epoch": 0.3670125614418351, "grad_norm": 6.897480010986328, "learning_rate": 7.3061275873576085e-06, "loss": 0.8888, "step": 2688 }, { "epoch": 0.3671490988530857, "grad_norm": 6.409003257751465, "learning_rate": 7.304165452027285e-06, "loss": 0.9309, "step": 2689 }, { "epoch": 0.3672856362643364, "grad_norm": 7.623099327087402, "learning_rate": 7.302202866080042e-06, "loss": 0.9545, "step": 2690 }, { "epoch": 0.3674221736755871, "grad_norm": 6.841815948486328, "learning_rate": 7.300239829899697e-06, "loss": 1.0693, "step": 2691 }, { "epoch": 0.3675587110868378, "grad_norm": 6.05837345123291, "learning_rate": 7.298276343870152e-06, "loss": 0.8833, "step": 2692 }, { "epoch": 0.3676952484980885, "grad_norm": 7.685089111328125, "learning_rate": 7.296312408375397e-06, "loss": 1.0334, "step": 2693 }, { "epoch": 0.3678317859093392, "grad_norm": 6.745669364929199, "learning_rate": 7.294348023799517e-06, "loss": 0.9237, "step": 2694 }, { "epoch": 0.3679683233205898, "grad_norm": 7.057027816772461, "learning_rate": 7.292383190526674e-06, "loss": 0.9035, "step": 2695 }, { "epoch": 0.3681048607318405, "grad_norm": 5.674225330352783, "learning_rate": 7.290417908941125e-06, "loss": 1.0584, "step": 2696 }, { "epoch": 0.3682413981430912, "grad_norm": 8.096081733703613, "learning_rate": 7.288452179427211e-06, "loss": 1.2118, "step": 2697 }, { "epoch": 0.3683779355543419, "grad_norm": 8.626683235168457, "learning_rate": 7.286486002369366e-06, "loss": 0.9411, "step": 2698 }, { "epoch": 0.3685144729655926, "grad_norm": 11.967741012573242, "learning_rate": 7.284519378152104e-06, "loss": 1.0262, "step": 2699 }, { "epoch": 0.3686510103768433, "grad_norm": 6.270383834838867, "learning_rate": 7.282552307160033e-06, "loss": 1.0976, "step": 2700 }, { "epoch": 0.3687875477880939, "grad_norm": 7.145039081573486, "learning_rate": 7.280584789777844e-06, "loss": 0.957, "step": 2701 }, { "epoch": 0.3689240851993446, "grad_norm": 5.5502800941467285, "learning_rate": 7.278616826390317e-06, "loss": 1.0032, "step": 2702 }, { "epoch": 0.3690606226105953, "grad_norm": 6.480877876281738, "learning_rate": 7.276648417382318e-06, "loss": 0.895, "step": 2703 }, { "epoch": 0.369197160021846, "grad_norm": 6.08402156829834, "learning_rate": 7.274679563138805e-06, "loss": 1.0967, "step": 2704 }, { "epoch": 0.3693336974330967, "grad_norm": 6.234459400177002, "learning_rate": 7.2727102640448155e-06, "loss": 0.9936, "step": 2705 }, { "epoch": 0.3694702348443474, "grad_norm": 8.55860424041748, "learning_rate": 7.27074052048548e-06, "loss": 0.977, "step": 2706 }, { "epoch": 0.369606772255598, "grad_norm": 7.765544891357422, "learning_rate": 7.268770332846015e-06, "loss": 0.9707, "step": 2707 }, { "epoch": 0.3697433096668487, "grad_norm": 7.630166053771973, "learning_rate": 7.26679970151172e-06, "loss": 1.0149, "step": 2708 }, { "epoch": 0.3698798470780994, "grad_norm": 8.454901695251465, "learning_rate": 7.264828626867983e-06, "loss": 0.9598, "step": 2709 }, { "epoch": 0.3700163844893501, "grad_norm": 6.687481880187988, "learning_rate": 7.262857109300283e-06, "loss": 0.9353, "step": 2710 }, { "epoch": 0.3701529219006008, "grad_norm": 7.679237365722656, "learning_rate": 7.260885149194181e-06, "loss": 1.0653, "step": 2711 }, { "epoch": 0.3702894593118514, "grad_norm": 5.738134860992432, "learning_rate": 7.258912746935325e-06, "loss": 0.9622, "step": 2712 }, { "epoch": 0.3704259967231021, "grad_norm": 6.2817182540893555, "learning_rate": 7.25693990290945e-06, "loss": 0.9903, "step": 2713 }, { "epoch": 0.3705625341343528, "grad_norm": 14.541528701782227, "learning_rate": 7.254966617502381e-06, "loss": 1.0505, "step": 2714 }, { "epoch": 0.3706990715456035, "grad_norm": 6.701182842254639, "learning_rate": 7.2529928911000215e-06, "loss": 0.8566, "step": 2715 }, { "epoch": 0.3708356089568542, "grad_norm": 5.574336051940918, "learning_rate": 7.251018724088367e-06, "loss": 0.9621, "step": 2716 }, { "epoch": 0.3709721463681049, "grad_norm": 5.905509948730469, "learning_rate": 7.249044116853498e-06, "loss": 0.9305, "step": 2717 }, { "epoch": 0.3711086837793555, "grad_norm": 5.838621616363525, "learning_rate": 7.2470690697815835e-06, "loss": 0.9155, "step": 2718 }, { "epoch": 0.3712452211906062, "grad_norm": 5.521225452423096, "learning_rate": 7.245093583258873e-06, "loss": 0.8891, "step": 2719 }, { "epoch": 0.3713817586018569, "grad_norm": 6.825730800628662, "learning_rate": 7.243117657671706e-06, "loss": 0.9467, "step": 2720 }, { "epoch": 0.3715182960131076, "grad_norm": 6.858492851257324, "learning_rate": 7.241141293406506e-06, "loss": 0.9523, "step": 2721 }, { "epoch": 0.3716548334243583, "grad_norm": 7.4043378829956055, "learning_rate": 7.239164490849784e-06, "loss": 1.079, "step": 2722 }, { "epoch": 0.371791370835609, "grad_norm": 6.700211524963379, "learning_rate": 7.237187250388136e-06, "loss": 0.9668, "step": 2723 }, { "epoch": 0.3719279082468596, "grad_norm": 5.506198406219482, "learning_rate": 7.235209572408241e-06, "loss": 0.9218, "step": 2724 }, { "epoch": 0.3720644456581103, "grad_norm": 7.053493976593018, "learning_rate": 7.233231457296869e-06, "loss": 1.1045, "step": 2725 }, { "epoch": 0.372200983069361, "grad_norm": 7.008048057556152, "learning_rate": 7.231252905440872e-06, "loss": 0.9227, "step": 2726 }, { "epoch": 0.3723375204806117, "grad_norm": 5.687405109405518, "learning_rate": 7.229273917227187e-06, "loss": 1.0284, "step": 2727 }, { "epoch": 0.3724740578918624, "grad_norm": 5.811182498931885, "learning_rate": 7.227294493042838e-06, "loss": 0.9805, "step": 2728 }, { "epoch": 0.3726105953031131, "grad_norm": 5.9255170822143555, "learning_rate": 7.225314633274931e-06, "loss": 1.1509, "step": 2729 }, { "epoch": 0.3727471327143637, "grad_norm": 8.553253173828125, "learning_rate": 7.2233343383106635e-06, "loss": 1.0111, "step": 2730 }, { "epoch": 0.3728836701256144, "grad_norm": 9.991944313049316, "learning_rate": 7.221353608537311e-06, "loss": 0.8735, "step": 2731 }, { "epoch": 0.3730202075368651, "grad_norm": 5.826121807098389, "learning_rate": 7.2193724443422405e-06, "loss": 0.8612, "step": 2732 }, { "epoch": 0.3731567449481158, "grad_norm": 6.141218662261963, "learning_rate": 7.217390846112899e-06, "loss": 0.8477, "step": 2733 }, { "epoch": 0.3732932823593665, "grad_norm": 6.184194564819336, "learning_rate": 7.21540881423682e-06, "loss": 0.9717, "step": 2734 }, { "epoch": 0.3734298197706171, "grad_norm": 6.330240249633789, "learning_rate": 7.213426349101621e-06, "loss": 0.973, "step": 2735 }, { "epoch": 0.3735663571818678, "grad_norm": 5.480973243713379, "learning_rate": 7.211443451095007e-06, "loss": 0.9904, "step": 2736 }, { "epoch": 0.3737028945931185, "grad_norm": 7.48714542388916, "learning_rate": 7.209460120604766e-06, "loss": 0.934, "step": 2737 }, { "epoch": 0.3738394320043692, "grad_norm": 7.373696327209473, "learning_rate": 7.207476358018769e-06, "loss": 1.0268, "step": 2738 }, { "epoch": 0.3739759694156199, "grad_norm": 7.819951057434082, "learning_rate": 7.205492163724974e-06, "loss": 0.9649, "step": 2739 }, { "epoch": 0.3741125068268706, "grad_norm": 5.871792316436768, "learning_rate": 7.203507538111423e-06, "loss": 0.9729, "step": 2740 }, { "epoch": 0.3742490442381212, "grad_norm": 6.672013759613037, "learning_rate": 7.20152248156624e-06, "loss": 0.9945, "step": 2741 }, { "epoch": 0.3743855816493719, "grad_norm": 6.107138156890869, "learning_rate": 7.199536994477635e-06, "loss": 0.9338, "step": 2742 }, { "epoch": 0.3745221190606226, "grad_norm": 6.66179084777832, "learning_rate": 7.197551077233903e-06, "loss": 0.9907, "step": 2743 }, { "epoch": 0.3746586564718733, "grad_norm": 8.038701057434082, "learning_rate": 7.1955647302234235e-06, "loss": 0.9519, "step": 2744 }, { "epoch": 0.374795193883124, "grad_norm": 6.580724239349365, "learning_rate": 7.193577953834656e-06, "loss": 0.8973, "step": 2745 }, { "epoch": 0.3749317312943747, "grad_norm": 5.830915451049805, "learning_rate": 7.191590748456151e-06, "loss": 0.9787, "step": 2746 }, { "epoch": 0.3750682687056253, "grad_norm": 6.175323963165283, "learning_rate": 7.189603114476535e-06, "loss": 0.8487, "step": 2747 }, { "epoch": 0.375204806116876, "grad_norm": 10.13574504852295, "learning_rate": 7.187615052284522e-06, "loss": 0.8815, "step": 2748 }, { "epoch": 0.3753413435281267, "grad_norm": 7.616255283355713, "learning_rate": 7.1856265622689125e-06, "loss": 0.9905, "step": 2749 }, { "epoch": 0.3754778809393774, "grad_norm": 5.846999168395996, "learning_rate": 7.183637644818586e-06, "loss": 0.9834, "step": 2750 }, { "epoch": 0.3756144183506281, "grad_norm": 6.27462911605835, "learning_rate": 7.181648300322508e-06, "loss": 0.8196, "step": 2751 }, { "epoch": 0.3757509557618788, "grad_norm": 6.308337211608887, "learning_rate": 7.179658529169728e-06, "loss": 0.8272, "step": 2752 }, { "epoch": 0.3758874931731294, "grad_norm": 6.071825981140137, "learning_rate": 7.177668331749376e-06, "loss": 0.8243, "step": 2753 }, { "epoch": 0.3760240305843801, "grad_norm": 6.669151306152344, "learning_rate": 7.1756777084506704e-06, "loss": 0.8912, "step": 2754 }, { "epoch": 0.3761605679956308, "grad_norm": 6.9293131828308105, "learning_rate": 7.173686659662906e-06, "loss": 0.9439, "step": 2755 }, { "epoch": 0.3762971054068815, "grad_norm": 6.624861717224121, "learning_rate": 7.171695185775468e-06, "loss": 0.8765, "step": 2756 }, { "epoch": 0.3764336428181322, "grad_norm": 5.981417179107666, "learning_rate": 7.169703287177821e-06, "loss": 0.9885, "step": 2757 }, { "epoch": 0.3765701802293829, "grad_norm": 12.044288635253906, "learning_rate": 7.167710964259511e-06, "loss": 1.1271, "step": 2758 }, { "epoch": 0.3767067176406335, "grad_norm": 6.803135395050049, "learning_rate": 7.16571821741017e-06, "loss": 1.0864, "step": 2759 }, { "epoch": 0.3768432550518842, "grad_norm": 25.033782958984375, "learning_rate": 7.163725047019513e-06, "loss": 1.1022, "step": 2760 }, { "epoch": 0.3769797924631349, "grad_norm": 6.462336540222168, "learning_rate": 7.161731453477336e-06, "loss": 0.9808, "step": 2761 }, { "epoch": 0.3771163298743856, "grad_norm": 6.963551998138428, "learning_rate": 7.159737437173518e-06, "loss": 1.0662, "step": 2762 }, { "epoch": 0.3772528672856363, "grad_norm": 8.027510643005371, "learning_rate": 7.157742998498022e-06, "loss": 1.0341, "step": 2763 }, { "epoch": 0.3773894046968869, "grad_norm": 6.3856096267700195, "learning_rate": 7.155748137840892e-06, "loss": 0.9428, "step": 2764 }, { "epoch": 0.3775259421081376, "grad_norm": 6.866258144378662, "learning_rate": 7.153752855592258e-06, "loss": 0.9542, "step": 2765 }, { "epoch": 0.3776624795193883, "grad_norm": 8.768871307373047, "learning_rate": 7.151757152142325e-06, "loss": 0.9508, "step": 2766 }, { "epoch": 0.377799016930639, "grad_norm": 6.95012903213501, "learning_rate": 7.1497610278813885e-06, "loss": 0.996, "step": 2767 }, { "epoch": 0.3779355543418897, "grad_norm": 7.177189826965332, "learning_rate": 7.147764483199825e-06, "loss": 1.0182, "step": 2768 }, { "epoch": 0.3780720917531404, "grad_norm": 26.788427352905273, "learning_rate": 7.145767518488086e-06, "loss": 1.1938, "step": 2769 }, { "epoch": 0.378208629164391, "grad_norm": 12.838212013244629, "learning_rate": 7.143770134136714e-06, "loss": 1.0163, "step": 2770 }, { "epoch": 0.3783451665756417, "grad_norm": 7.794022083282471, "learning_rate": 7.141772330536327e-06, "loss": 1.0565, "step": 2771 }, { "epoch": 0.3784817039868924, "grad_norm": 8.020102500915527, "learning_rate": 7.139774108077633e-06, "loss": 1.018, "step": 2772 }, { "epoch": 0.3786182413981431, "grad_norm": 6.30324125289917, "learning_rate": 7.137775467151411e-06, "loss": 0.9553, "step": 2773 }, { "epoch": 0.3787547788093938, "grad_norm": 6.209074020385742, "learning_rate": 7.135776408148532e-06, "loss": 0.7992, "step": 2774 }, { "epoch": 0.3788913162206445, "grad_norm": 6.74443244934082, "learning_rate": 7.13377693145994e-06, "loss": 0.9263, "step": 2775 }, { "epoch": 0.3790278536318951, "grad_norm": 22.94153594970703, "learning_rate": 7.131777037476669e-06, "loss": 0.9275, "step": 2776 }, { "epoch": 0.3791643910431458, "grad_norm": 5.614070415496826, "learning_rate": 7.12977672658983e-06, "loss": 1.0942, "step": 2777 }, { "epoch": 0.3793009284543965, "grad_norm": 8.532584190368652, "learning_rate": 7.127775999190616e-06, "loss": 0.827, "step": 2778 }, { "epoch": 0.3794374658656472, "grad_norm": 18.863527297973633, "learning_rate": 7.125774855670301e-06, "loss": 1.1474, "step": 2779 }, { "epoch": 0.3795740032768979, "grad_norm": 8.643842697143555, "learning_rate": 7.12377329642024e-06, "loss": 0.8615, "step": 2780 }, { "epoch": 0.3797105406881486, "grad_norm": 8.33103084564209, "learning_rate": 7.121771321831872e-06, "loss": 1.1905, "step": 2781 }, { "epoch": 0.3798470780993992, "grad_norm": 10.723105430603027, "learning_rate": 7.119768932296716e-06, "loss": 1.0536, "step": 2782 }, { "epoch": 0.3799836155106499, "grad_norm": 7.007081031799316, "learning_rate": 7.11776612820637e-06, "loss": 0.7779, "step": 2783 }, { "epoch": 0.3801201529219006, "grad_norm": 36.11051559448242, "learning_rate": 7.115762909952517e-06, "loss": 0.9305, "step": 2784 }, { "epoch": 0.3802566903331513, "grad_norm": 16.038284301757812, "learning_rate": 7.113759277926918e-06, "loss": 0.8523, "step": 2785 }, { "epoch": 0.380393227744402, "grad_norm": 10.777405738830566, "learning_rate": 7.1117552325214145e-06, "loss": 0.9663, "step": 2786 }, { "epoch": 0.3805297651556526, "grad_norm": 6.678280830383301, "learning_rate": 7.109750774127931e-06, "loss": 0.9203, "step": 2787 }, { "epoch": 0.3806663025669033, "grad_norm": 6.40794038772583, "learning_rate": 7.107745903138472e-06, "loss": 0.808, "step": 2788 }, { "epoch": 0.380802839978154, "grad_norm": 5.913449287414551, "learning_rate": 7.105740619945123e-06, "loss": 0.8464, "step": 2789 }, { "epoch": 0.3809393773894047, "grad_norm": 12.2850341796875, "learning_rate": 7.1037349249400485e-06, "loss": 0.9765, "step": 2790 }, { "epoch": 0.3810759148006554, "grad_norm": 5.583169460296631, "learning_rate": 7.101728818515496e-06, "loss": 0.9718, "step": 2791 }, { "epoch": 0.3812124522119061, "grad_norm": 5.608798503875732, "learning_rate": 7.09972230106379e-06, "loss": 0.8871, "step": 2792 }, { "epoch": 0.3813489896231567, "grad_norm": 7.7044830322265625, "learning_rate": 7.09771537297734e-06, "loss": 1.0453, "step": 2793 }, { "epoch": 0.3814855270344074, "grad_norm": 8.288396835327148, "learning_rate": 7.0957080346486305e-06, "loss": 1.1044, "step": 2794 }, { "epoch": 0.3816220644456581, "grad_norm": 6.634851455688477, "learning_rate": 7.093700286470232e-06, "loss": 1.0549, "step": 2795 }, { "epoch": 0.3817586018569088, "grad_norm": 5.444485664367676, "learning_rate": 7.09169212883479e-06, "loss": 0.914, "step": 2796 }, { "epoch": 0.3818951392681595, "grad_norm": 9.504552841186523, "learning_rate": 7.089683562135034e-06, "loss": 0.8352, "step": 2797 }, { "epoch": 0.3820316766794102, "grad_norm": 8.939574241638184, "learning_rate": 7.087674586763772e-06, "loss": 1.0085, "step": 2798 }, { "epoch": 0.3821682140906608, "grad_norm": 9.090300559997559, "learning_rate": 7.08566520311389e-06, "loss": 0.9646, "step": 2799 }, { "epoch": 0.3823047515019115, "grad_norm": 5.948248863220215, "learning_rate": 7.083655411578356e-06, "loss": 0.9004, "step": 2800 }, { "epoch": 0.3824412889131622, "grad_norm": 10.712512969970703, "learning_rate": 7.081645212550219e-06, "loss": 0.9985, "step": 2801 }, { "epoch": 0.3825778263244129, "grad_norm": 6.343214511871338, "learning_rate": 7.079634606422603e-06, "loss": 0.9875, "step": 2802 }, { "epoch": 0.3827143637356636, "grad_norm": 7.0884833335876465, "learning_rate": 7.077623593588716e-06, "loss": 0.9946, "step": 2803 }, { "epoch": 0.3828509011469143, "grad_norm": 7.574934959411621, "learning_rate": 7.075612174441846e-06, "loss": 0.9842, "step": 2804 }, { "epoch": 0.3829874385581649, "grad_norm": 7.263979911804199, "learning_rate": 7.073600349375356e-06, "loss": 1.0356, "step": 2805 }, { "epoch": 0.3831239759694156, "grad_norm": 5.864504814147949, "learning_rate": 7.071588118782693e-06, "loss": 0.9676, "step": 2806 }, { "epoch": 0.3832605133806663, "grad_norm": 6.871547222137451, "learning_rate": 7.069575483057379e-06, "loss": 1.0289, "step": 2807 }, { "epoch": 0.383397050791917, "grad_norm": 7.492237567901611, "learning_rate": 7.06756244259302e-06, "loss": 1.0001, "step": 2808 }, { "epoch": 0.3835335882031677, "grad_norm": 6.772663116455078, "learning_rate": 7.065548997783298e-06, "loss": 0.9674, "step": 2809 }, { "epoch": 0.3836701256144184, "grad_norm": 6.2256317138671875, "learning_rate": 7.063535149021974e-06, "loss": 0.9148, "step": 2810 }, { "epoch": 0.383806663025669, "grad_norm": 9.326070785522461, "learning_rate": 7.0615208967028904e-06, "loss": 1.079, "step": 2811 }, { "epoch": 0.3839432004369197, "grad_norm": 7.8320136070251465, "learning_rate": 7.059506241219964e-06, "loss": 0.9433, "step": 2812 }, { "epoch": 0.3840797378481704, "grad_norm": 8.780533790588379, "learning_rate": 7.057491182967196e-06, "loss": 0.952, "step": 2813 }, { "epoch": 0.3842162752594211, "grad_norm": 7.955201625823975, "learning_rate": 7.055475722338663e-06, "loss": 1.0003, "step": 2814 }, { "epoch": 0.3843528126706718, "grad_norm": 9.202345848083496, "learning_rate": 7.05345985972852e-06, "loss": 0.7635, "step": 2815 }, { "epoch": 0.3844893500819224, "grad_norm": 6.854211807250977, "learning_rate": 7.051443595531002e-06, "loss": 0.9762, "step": 2816 }, { "epoch": 0.3846258874931731, "grad_norm": 6.216058731079102, "learning_rate": 7.049426930140425e-06, "loss": 0.9979, "step": 2817 }, { "epoch": 0.3847624249044238, "grad_norm": 6.601439952850342, "learning_rate": 7.047409863951177e-06, "loss": 1.0096, "step": 2818 }, { "epoch": 0.3848989623156745, "grad_norm": 6.210318565368652, "learning_rate": 7.045392397357729e-06, "loss": 1.0578, "step": 2819 }, { "epoch": 0.3850354997269252, "grad_norm": 5.7793354988098145, "learning_rate": 7.04337453075463e-06, "loss": 0.8855, "step": 2820 }, { "epoch": 0.3851720371381759, "grad_norm": 6.738264083862305, "learning_rate": 7.041356264536505e-06, "loss": 0.8634, "step": 2821 }, { "epoch": 0.3853085745494265, "grad_norm": 7.976448059082031, "learning_rate": 7.03933759909806e-06, "loss": 1.1802, "step": 2822 }, { "epoch": 0.3854451119606772, "grad_norm": 6.846953868865967, "learning_rate": 7.0373185348340785e-06, "loss": 0.8909, "step": 2823 }, { "epoch": 0.3855816493719279, "grad_norm": 8.084505081176758, "learning_rate": 7.03529907213942e-06, "loss": 1.0645, "step": 2824 }, { "epoch": 0.3857181867831786, "grad_norm": 10.851642608642578, "learning_rate": 7.033279211409023e-06, "loss": 1.068, "step": 2825 }, { "epoch": 0.3858547241944293, "grad_norm": 6.529760837554932, "learning_rate": 7.031258953037905e-06, "loss": 1.0218, "step": 2826 }, { "epoch": 0.38599126160568, "grad_norm": 8.12089729309082, "learning_rate": 7.029238297421159e-06, "loss": 0.8929, "step": 2827 }, { "epoch": 0.3861277990169306, "grad_norm": 9.073348045349121, "learning_rate": 7.027217244953958e-06, "loss": 0.983, "step": 2828 }, { "epoch": 0.3862643364281813, "grad_norm": 7.633033752441406, "learning_rate": 7.025195796031552e-06, "loss": 0.9105, "step": 2829 }, { "epoch": 0.386400873839432, "grad_norm": 7.244134426116943, "learning_rate": 7.023173951049268e-06, "loss": 1.0275, "step": 2830 }, { "epoch": 0.3865374112506827, "grad_norm": 8.7642822265625, "learning_rate": 7.021151710402509e-06, "loss": 0.952, "step": 2831 }, { "epoch": 0.3866739486619334, "grad_norm": 7.080837726593018, "learning_rate": 7.019129074486758e-06, "loss": 1.1161, "step": 2832 }, { "epoch": 0.3868104860731841, "grad_norm": 7.726204872131348, "learning_rate": 7.017106043697573e-06, "loss": 1.0156, "step": 2833 }, { "epoch": 0.3869470234844347, "grad_norm": 6.0648722648620605, "learning_rate": 7.015082618430593e-06, "loss": 0.8194, "step": 2834 }, { "epoch": 0.3870835608956854, "grad_norm": 7.534506797790527, "learning_rate": 7.013058799081531e-06, "loss": 1.0423, "step": 2835 }, { "epoch": 0.3872200983069361, "grad_norm": 6.901264667510986, "learning_rate": 7.011034586046177e-06, "loss": 0.9208, "step": 2836 }, { "epoch": 0.3873566357181868, "grad_norm": 6.714628219604492, "learning_rate": 7.009009979720398e-06, "loss": 0.9541, "step": 2837 }, { "epoch": 0.3874931731294375, "grad_norm": 5.180017471313477, "learning_rate": 7.006984980500139e-06, "loss": 0.8897, "step": 2838 }, { "epoch": 0.3876297105406881, "grad_norm": 6.444457530975342, "learning_rate": 7.004959588781423e-06, "loss": 1.1415, "step": 2839 }, { "epoch": 0.3877662479519388, "grad_norm": 6.229708194732666, "learning_rate": 7.002933804960345e-06, "loss": 0.9482, "step": 2840 }, { "epoch": 0.3879027853631895, "grad_norm": 19.415733337402344, "learning_rate": 7.000907629433083e-06, "loss": 0.9476, "step": 2841 }, { "epoch": 0.3880393227744402, "grad_norm": 6.883073806762695, "learning_rate": 6.998881062595887e-06, "loss": 1.0837, "step": 2842 }, { "epoch": 0.3881758601856909, "grad_norm": 6.307773113250732, "learning_rate": 6.996854104845086e-06, "loss": 1.0226, "step": 2843 }, { "epoch": 0.3883123975969416, "grad_norm": 8.055567741394043, "learning_rate": 6.994826756577082e-06, "loss": 0.995, "step": 2844 }, { "epoch": 0.3884489350081922, "grad_norm": 7.649487495422363, "learning_rate": 6.992799018188359e-06, "loss": 1.0201, "step": 2845 }, { "epoch": 0.3885854724194429, "grad_norm": 7.752221584320068, "learning_rate": 6.9907708900754725e-06, "loss": 0.8531, "step": 2846 }, { "epoch": 0.3887220098306936, "grad_norm": 5.937368869781494, "learning_rate": 6.988742372635056e-06, "loss": 1.0577, "step": 2847 }, { "epoch": 0.3888585472419443, "grad_norm": 9.542723655700684, "learning_rate": 6.9867134662638185e-06, "loss": 0.9298, "step": 2848 }, { "epoch": 0.388995084653195, "grad_norm": 6.883297920227051, "learning_rate": 6.984684171358547e-06, "loss": 0.8764, "step": 2849 }, { "epoch": 0.3891316220644457, "grad_norm": 7.091235637664795, "learning_rate": 6.982654488316103e-06, "loss": 1.0028, "step": 2850 }, { "epoch": 0.3892681594756963, "grad_norm": 7.665216445922852, "learning_rate": 6.980624417533421e-06, "loss": 0.8576, "step": 2851 }, { "epoch": 0.389404696886947, "grad_norm": 8.305477142333984, "learning_rate": 6.978593959407516e-06, "loss": 1.2156, "step": 2852 }, { "epoch": 0.3895412342981977, "grad_norm": 5.808934211730957, "learning_rate": 6.9765631143354785e-06, "loss": 0.88, "step": 2853 }, { "epoch": 0.3896777717094484, "grad_norm": 7.725901126861572, "learning_rate": 6.9745318827144716e-06, "loss": 1.0612, "step": 2854 }, { "epoch": 0.3898143091206991, "grad_norm": 6.32650899887085, "learning_rate": 6.972500264941737e-06, "loss": 1.0263, "step": 2855 }, { "epoch": 0.3899508465319498, "grad_norm": 8.862229347229004, "learning_rate": 6.970468261414588e-06, "loss": 1.1363, "step": 2856 }, { "epoch": 0.3900873839432004, "grad_norm": 5.949523448944092, "learning_rate": 6.968435872530417e-06, "loss": 0.9721, "step": 2857 }, { "epoch": 0.3902239213544511, "grad_norm": 5.6574506759643555, "learning_rate": 6.966403098686691e-06, "loss": 0.8849, "step": 2858 }, { "epoch": 0.3903604587657018, "grad_norm": 9.759318351745605, "learning_rate": 6.964369940280955e-06, "loss": 0.9364, "step": 2859 }, { "epoch": 0.3904969961769525, "grad_norm": 8.564919471740723, "learning_rate": 6.962336397710819e-06, "loss": 1.0193, "step": 2860 }, { "epoch": 0.3906335335882032, "grad_norm": 6.7046308517456055, "learning_rate": 6.9603024713739806e-06, "loss": 1.1022, "step": 2861 }, { "epoch": 0.3907700709994539, "grad_norm": 7.076629161834717, "learning_rate": 6.9582681616682065e-06, "loss": 1.0024, "step": 2862 }, { "epoch": 0.3909066084107045, "grad_norm": 5.371557235717773, "learning_rate": 6.956233468991337e-06, "loss": 0.9677, "step": 2863 }, { "epoch": 0.3910431458219552, "grad_norm": 6.0893473625183105, "learning_rate": 6.954198393741291e-06, "loss": 1.0309, "step": 2864 }, { "epoch": 0.3911796832332059, "grad_norm": 5.041922092437744, "learning_rate": 6.952162936316058e-06, "loss": 1.0855, "step": 2865 }, { "epoch": 0.3913162206444566, "grad_norm": 7.019600868225098, "learning_rate": 6.950127097113708e-06, "loss": 1.0141, "step": 2866 }, { "epoch": 0.3914527580557073, "grad_norm": 5.69437313079834, "learning_rate": 6.94809087653238e-06, "loss": 1.0578, "step": 2867 }, { "epoch": 0.3915892954669579, "grad_norm": 6.907191276550293, "learning_rate": 6.946054274970292e-06, "loss": 0.9322, "step": 2868 }, { "epoch": 0.3917258328782086, "grad_norm": 5.88911771774292, "learning_rate": 6.944017292825733e-06, "loss": 1.0004, "step": 2869 }, { "epoch": 0.3918623702894593, "grad_norm": 6.573517799377441, "learning_rate": 6.941979930497067e-06, "loss": 1.0344, "step": 2870 }, { "epoch": 0.39199890770071, "grad_norm": 6.193339824676514, "learning_rate": 6.939942188382736e-06, "loss": 0.8766, "step": 2871 }, { "epoch": 0.3921354451119607, "grad_norm": 6.438813209533691, "learning_rate": 6.93790406688125e-06, "loss": 1.0307, "step": 2872 }, { "epoch": 0.3922719825232114, "grad_norm": 5.306576728820801, "learning_rate": 6.9358655663912e-06, "loss": 1.0015, "step": 2873 }, { "epoch": 0.392408519934462, "grad_norm": 9.510895729064941, "learning_rate": 6.933826687311245e-06, "loss": 1.0348, "step": 2874 }, { "epoch": 0.3925450573457127, "grad_norm": 10.081445693969727, "learning_rate": 6.931787430040125e-06, "loss": 1.0068, "step": 2875 }, { "epoch": 0.3926815947569634, "grad_norm": 5.9786248207092285, "learning_rate": 6.9297477949766445e-06, "loss": 0.9917, "step": 2876 }, { "epoch": 0.3928181321682141, "grad_norm": 6.605868816375732, "learning_rate": 6.9277077825196905e-06, "loss": 0.9958, "step": 2877 }, { "epoch": 0.3929546695794648, "grad_norm": 6.33040189743042, "learning_rate": 6.92566739306822e-06, "loss": 0.8613, "step": 2878 }, { "epoch": 0.3930912069907155, "grad_norm": 6.370571136474609, "learning_rate": 6.923626627021265e-06, "loss": 0.8006, "step": 2879 }, { "epoch": 0.3932277444019661, "grad_norm": 14.266075134277344, "learning_rate": 6.921585484777929e-06, "loss": 1.1411, "step": 2880 }, { "epoch": 0.3933642818132168, "grad_norm": 6.85063362121582, "learning_rate": 6.91954396673739e-06, "loss": 0.8835, "step": 2881 }, { "epoch": 0.3935008192244675, "grad_norm": 7.057925701141357, "learning_rate": 6.917502073298903e-06, "loss": 1.118, "step": 2882 }, { "epoch": 0.3936373566357182, "grad_norm": 8.48973560333252, "learning_rate": 6.9154598048617895e-06, "loss": 0.8946, "step": 2883 }, { "epoch": 0.3937738940469689, "grad_norm": 8.041266441345215, "learning_rate": 6.913417161825449e-06, "loss": 0.9187, "step": 2884 }, { "epoch": 0.3939104314582196, "grad_norm": 5.746150016784668, "learning_rate": 6.911374144589356e-06, "loss": 0.9641, "step": 2885 }, { "epoch": 0.3940469688694702, "grad_norm": 8.907888412475586, "learning_rate": 6.909330753553054e-06, "loss": 0.9269, "step": 2886 }, { "epoch": 0.3941835062807209, "grad_norm": 5.900776386260986, "learning_rate": 6.907286989116162e-06, "loss": 1.0074, "step": 2887 }, { "epoch": 0.3943200436919716, "grad_norm": 5.783077716827393, "learning_rate": 6.90524285167837e-06, "loss": 1.0073, "step": 2888 }, { "epoch": 0.3944565811032223, "grad_norm": 5.735586166381836, "learning_rate": 6.903198341639443e-06, "loss": 0.9639, "step": 2889 }, { "epoch": 0.394593118514473, "grad_norm": 5.767056465148926, "learning_rate": 6.9011534593992176e-06, "loss": 0.9671, "step": 2890 }, { "epoch": 0.3947296559257236, "grad_norm": 7.540747165679932, "learning_rate": 6.899108205357605e-06, "loss": 0.97, "step": 2891 }, { "epoch": 0.3948661933369743, "grad_norm": 6.680800437927246, "learning_rate": 6.897062579914587e-06, "loss": 1.061, "step": 2892 }, { "epoch": 0.395002730748225, "grad_norm": 11.82414436340332, "learning_rate": 6.8950165834702165e-06, "loss": 1.091, "step": 2893 }, { "epoch": 0.3951392681594757, "grad_norm": 12.34029483795166, "learning_rate": 6.8929702164246255e-06, "loss": 1.0253, "step": 2894 }, { "epoch": 0.3952758055707264, "grad_norm": 7.343029022216797, "learning_rate": 6.890923479178011e-06, "loss": 0.9712, "step": 2895 }, { "epoch": 0.3954123429819771, "grad_norm": 7.0168561935424805, "learning_rate": 6.888876372130648e-06, "loss": 0.9139, "step": 2896 }, { "epoch": 0.3955488803932277, "grad_norm": 6.541500091552734, "learning_rate": 6.886828895682879e-06, "loss": 1.0865, "step": 2897 }, { "epoch": 0.3956854178044784, "grad_norm": 5.844142436981201, "learning_rate": 6.884781050235123e-06, "loss": 1.0865, "step": 2898 }, { "epoch": 0.3958219552157291, "grad_norm": 9.77907943725586, "learning_rate": 6.882732836187869e-06, "loss": 1.0082, "step": 2899 }, { "epoch": 0.3959584926269798, "grad_norm": 6.365231037139893, "learning_rate": 6.88068425394168e-06, "loss": 0.9818, "step": 2900 }, { "epoch": 0.3960950300382305, "grad_norm": 4.980734825134277, "learning_rate": 6.878635303897188e-06, "loss": 0.9061, "step": 2901 }, { "epoch": 0.3962315674494812, "grad_norm": 5.451747417449951, "learning_rate": 6.876585986455096e-06, "loss": 1.0583, "step": 2902 }, { "epoch": 0.3963681048607318, "grad_norm": 5.484940052032471, "learning_rate": 6.874536302016184e-06, "loss": 0.8583, "step": 2903 }, { "epoch": 0.3965046422719825, "grad_norm": 6.8520355224609375, "learning_rate": 6.872486250981301e-06, "loss": 0.9467, "step": 2904 }, { "epoch": 0.3966411796832332, "grad_norm": 7.844563007354736, "learning_rate": 6.8704358337513675e-06, "loss": 1.0325, "step": 2905 }, { "epoch": 0.3967777170944839, "grad_norm": 7.541085720062256, "learning_rate": 6.868385050727375e-06, "loss": 1.0528, "step": 2906 }, { "epoch": 0.3969142545057346, "grad_norm": 6.101463794708252, "learning_rate": 6.8663339023103895e-06, "loss": 0.9446, "step": 2907 }, { "epoch": 0.3970507919169853, "grad_norm": 8.892609596252441, "learning_rate": 6.864282388901544e-06, "loss": 1.1027, "step": 2908 }, { "epoch": 0.3971873293282359, "grad_norm": 5.566604137420654, "learning_rate": 6.862230510902046e-06, "loss": 1.0841, "step": 2909 }, { "epoch": 0.3973238667394866, "grad_norm": 6.6276140213012695, "learning_rate": 6.860178268713173e-06, "loss": 1.0073, "step": 2910 }, { "epoch": 0.3974604041507373, "grad_norm": 6.497176647186279, "learning_rate": 6.858125662736277e-06, "loss": 0.9169, "step": 2911 }, { "epoch": 0.397596941561988, "grad_norm": 7.142061710357666, "learning_rate": 6.8560726933727735e-06, "loss": 0.964, "step": 2912 }, { "epoch": 0.3977334789732387, "grad_norm": 5.433043003082275, "learning_rate": 6.8540193610241576e-06, "loss": 0.9656, "step": 2913 }, { "epoch": 0.3978700163844894, "grad_norm": 8.576324462890625, "learning_rate": 6.851965666091993e-06, "loss": 0.9524, "step": 2914 }, { "epoch": 0.39800655379574, "grad_norm": 6.2250871658325195, "learning_rate": 6.849911608977908e-06, "loss": 0.9883, "step": 2915 }, { "epoch": 0.3981430912069907, "grad_norm": 6.39654016494751, "learning_rate": 6.847857190083611e-06, "loss": 0.9858, "step": 2916 }, { "epoch": 0.3982796286182414, "grad_norm": 5.57852029800415, "learning_rate": 6.845802409810875e-06, "loss": 0.9901, "step": 2917 }, { "epoch": 0.3984161660294921, "grad_norm": 6.289798259735107, "learning_rate": 6.843747268561546e-06, "loss": 0.9277, "step": 2918 }, { "epoch": 0.3985527034407428, "grad_norm": 6.256983757019043, "learning_rate": 6.841691766737541e-06, "loss": 0.9028, "step": 2919 }, { "epoch": 0.3986892408519934, "grad_norm": 8.054781913757324, "learning_rate": 6.839635904740847e-06, "loss": 0.9229, "step": 2920 }, { "epoch": 0.3988257782632441, "grad_norm": 9.098484992980957, "learning_rate": 6.837579682973519e-06, "loss": 1.0318, "step": 2921 }, { "epoch": 0.3989623156744948, "grad_norm": 7.167291164398193, "learning_rate": 6.835523101837686e-06, "loss": 0.9274, "step": 2922 }, { "epoch": 0.3990988530857455, "grad_norm": 7.027278900146484, "learning_rate": 6.833466161735545e-06, "loss": 1.0229, "step": 2923 }, { "epoch": 0.3992353904969962, "grad_norm": 5.5256853103637695, "learning_rate": 6.831408863069364e-06, "loss": 1.1068, "step": 2924 }, { "epoch": 0.3993719279082469, "grad_norm": 8.419180870056152, "learning_rate": 6.829351206241484e-06, "loss": 0.9484, "step": 2925 }, { "epoch": 0.3995084653194975, "grad_norm": 6.675102233886719, "learning_rate": 6.8272931916543095e-06, "loss": 0.9795, "step": 2926 }, { "epoch": 0.3996450027307482, "grad_norm": 12.838143348693848, "learning_rate": 6.825234819710319e-06, "loss": 1.0488, "step": 2927 }, { "epoch": 0.3997815401419989, "grad_norm": 9.86237907409668, "learning_rate": 6.823176090812063e-06, "loss": 0.9794, "step": 2928 }, { "epoch": 0.3999180775532496, "grad_norm": 8.767800331115723, "learning_rate": 6.821117005362157e-06, "loss": 1.0286, "step": 2929 }, { "epoch": 0.4000546149645003, "grad_norm": 6.631206035614014, "learning_rate": 6.819057563763289e-06, "loss": 1.0322, "step": 2930 }, { "epoch": 0.400191152375751, "grad_norm": 7.521854877471924, "learning_rate": 6.8169977664182164e-06, "loss": 1.0699, "step": 2931 }, { "epoch": 0.4003276897870016, "grad_norm": 6.319944858551025, "learning_rate": 6.814937613729766e-06, "loss": 0.9257, "step": 2932 }, { "epoch": 0.4004642271982523, "grad_norm": 5.079736709594727, "learning_rate": 6.812877106100836e-06, "loss": 1.0428, "step": 2933 }, { "epoch": 0.400600764609503, "grad_norm": 8.80451774597168, "learning_rate": 6.810816243934388e-06, "loss": 0.9661, "step": 2934 }, { "epoch": 0.4007373020207537, "grad_norm": 7.565907001495361, "learning_rate": 6.80875502763346e-06, "loss": 1.1412, "step": 2935 }, { "epoch": 0.4008738394320044, "grad_norm": 7.719516277313232, "learning_rate": 6.806693457601157e-06, "loss": 1.0766, "step": 2936 }, { "epoch": 0.4010103768432551, "grad_norm": 8.725255966186523, "learning_rate": 6.804631534240649e-06, "loss": 1.0658, "step": 2937 }, { "epoch": 0.4011469142545057, "grad_norm": 7.798548698425293, "learning_rate": 6.802569257955181e-06, "loss": 0.9721, "step": 2938 }, { "epoch": 0.4012834516657564, "grad_norm": 6.847436904907227, "learning_rate": 6.8005066291480645e-06, "loss": 1.0904, "step": 2939 }, { "epoch": 0.4014199890770071, "grad_norm": 8.018086433410645, "learning_rate": 6.79844364822268e-06, "loss": 0.9934, "step": 2940 }, { "epoch": 0.4015565264882578, "grad_norm": 7.85576057434082, "learning_rate": 6.796380315582476e-06, "loss": 0.9082, "step": 2941 }, { "epoch": 0.4016930638995085, "grad_norm": 7.630638122558594, "learning_rate": 6.79431663163097e-06, "loss": 0.9979, "step": 2942 }, { "epoch": 0.4018296013107591, "grad_norm": 5.516631126403809, "learning_rate": 6.792252596771751e-06, "loss": 0.8556, "step": 2943 }, { "epoch": 0.4019661387220098, "grad_norm": 8.661415100097656, "learning_rate": 6.790188211408473e-06, "loss": 1.0615, "step": 2944 }, { "epoch": 0.4021026761332605, "grad_norm": 6.558718681335449, "learning_rate": 6.788123475944861e-06, "loss": 0.9912, "step": 2945 }, { "epoch": 0.4022392135445112, "grad_norm": 7.787026882171631, "learning_rate": 6.786058390784708e-06, "loss": 1.0025, "step": 2946 }, { "epoch": 0.4023757509557619, "grad_norm": 5.593625068664551, "learning_rate": 6.783992956331873e-06, "loss": 1.0632, "step": 2947 }, { "epoch": 0.4025122883670126, "grad_norm": 6.014603137969971, "learning_rate": 6.781927172990285e-06, "loss": 0.9384, "step": 2948 }, { "epoch": 0.4026488257782632, "grad_norm": 7.424574375152588, "learning_rate": 6.779861041163943e-06, "loss": 0.9085, "step": 2949 }, { "epoch": 0.4027853631895139, "grad_norm": 5.701840400695801, "learning_rate": 6.777794561256914e-06, "loss": 0.9484, "step": 2950 }, { "epoch": 0.4029219006007646, "grad_norm": 5.130801200866699, "learning_rate": 6.7757277336733276e-06, "loss": 1.0441, "step": 2951 }, { "epoch": 0.4030584380120153, "grad_norm": 11.330381393432617, "learning_rate": 6.77366055881739e-06, "loss": 0.991, "step": 2952 }, { "epoch": 0.403194975423266, "grad_norm": 7.511411666870117, "learning_rate": 6.771593037093366e-06, "loss": 1.024, "step": 2953 }, { "epoch": 0.4033315128345167, "grad_norm": 5.596879959106445, "learning_rate": 6.769525168905596e-06, "loss": 0.9325, "step": 2954 }, { "epoch": 0.4034680502457673, "grad_norm": 6.646936893463135, "learning_rate": 6.767456954658485e-06, "loss": 0.9722, "step": 2955 }, { "epoch": 0.403604587657018, "grad_norm": 5.840404987335205, "learning_rate": 6.765388394756504e-06, "loss": 0.9587, "step": 2956 }, { "epoch": 0.4037411250682687, "grad_norm": 6.840742588043213, "learning_rate": 6.763319489604196e-06, "loss": 0.8848, "step": 2957 }, { "epoch": 0.4038776624795194, "grad_norm": 6.3301286697387695, "learning_rate": 6.7612502396061685e-06, "loss": 0.933, "step": 2958 }, { "epoch": 0.4040141998907701, "grad_norm": 7.573635578155518, "learning_rate": 6.759180645167095e-06, "loss": 0.9189, "step": 2959 }, { "epoch": 0.4041507373020208, "grad_norm": 6.295337200164795, "learning_rate": 6.757110706691721e-06, "loss": 1.1058, "step": 2960 }, { "epoch": 0.4042872747132714, "grad_norm": 7.065178394317627, "learning_rate": 6.755040424584853e-06, "loss": 0.9499, "step": 2961 }, { "epoch": 0.4044238121245221, "grad_norm": 6.851754665374756, "learning_rate": 6.752969799251371e-06, "loss": 1.1179, "step": 2962 }, { "epoch": 0.4045603495357728, "grad_norm": 5.8907294273376465, "learning_rate": 6.7508988310962176e-06, "loss": 0.8759, "step": 2963 }, { "epoch": 0.4046968869470235, "grad_norm": 6.407512187957764, "learning_rate": 6.748827520524406e-06, "loss": 0.8877, "step": 2964 }, { "epoch": 0.4048334243582742, "grad_norm": 7.420782089233398, "learning_rate": 6.746755867941015e-06, "loss": 1.0603, "step": 2965 }, { "epoch": 0.4049699617695248, "grad_norm": 6.331903457641602, "learning_rate": 6.744683873751186e-06, "loss": 1.1196, "step": 2966 }, { "epoch": 0.4051064991807755, "grad_norm": 6.942225456237793, "learning_rate": 6.742611538360135e-06, "loss": 0.8494, "step": 2967 }, { "epoch": 0.4052430365920262, "grad_norm": 5.651515960693359, "learning_rate": 6.740538862173139e-06, "loss": 0.9787, "step": 2968 }, { "epoch": 0.4053795740032769, "grad_norm": 6.894955158233643, "learning_rate": 6.738465845595544e-06, "loss": 1.162, "step": 2969 }, { "epoch": 0.4055161114145276, "grad_norm": 7.25398063659668, "learning_rate": 6.736392489032764e-06, "loss": 0.8934, "step": 2970 }, { "epoch": 0.4056526488257783, "grad_norm": 6.885441780090332, "learning_rate": 6.734318792890273e-06, "loss": 1.0488, "step": 2971 }, { "epoch": 0.4057891862370289, "grad_norm": 6.967933177947998, "learning_rate": 6.732244757573619e-06, "loss": 1.0149, "step": 2972 }, { "epoch": 0.4059257236482796, "grad_norm": 5.446812152862549, "learning_rate": 6.730170383488412e-06, "loss": 0.8895, "step": 2973 }, { "epoch": 0.4060622610595303, "grad_norm": 13.7422456741333, "learning_rate": 6.72809567104033e-06, "loss": 1.011, "step": 2974 }, { "epoch": 0.406198798470781, "grad_norm": 9.403554916381836, "learning_rate": 6.726020620635117e-06, "loss": 1.0019, "step": 2975 }, { "epoch": 0.4063353358820317, "grad_norm": 7.976624488830566, "learning_rate": 6.723945232678582e-06, "loss": 1.1138, "step": 2976 }, { "epoch": 0.4064718732932824, "grad_norm": 6.764908790588379, "learning_rate": 6.721869507576602e-06, "loss": 0.9273, "step": 2977 }, { "epoch": 0.406608410704533, "grad_norm": 8.294069290161133, "learning_rate": 6.719793445735117e-06, "loss": 0.9754, "step": 2978 }, { "epoch": 0.4067449481157837, "grad_norm": 6.8963847160339355, "learning_rate": 6.717717047560136e-06, "loss": 0.9658, "step": 2979 }, { "epoch": 0.4068814855270344, "grad_norm": 6.62798547744751, "learning_rate": 6.715640313457733e-06, "loss": 0.8998, "step": 2980 }, { "epoch": 0.4070180229382851, "grad_norm": 5.809681415557861, "learning_rate": 6.713563243834045e-06, "loss": 0.9823, "step": 2981 }, { "epoch": 0.4071545603495358, "grad_norm": 7.8230767250061035, "learning_rate": 6.711485839095277e-06, "loss": 1.0536, "step": 2982 }, { "epoch": 0.4072910977607865, "grad_norm": 10.49423599243164, "learning_rate": 6.7094080996476985e-06, "loss": 0.9265, "step": 2983 }, { "epoch": 0.4074276351720371, "grad_norm": 7.921358108520508, "learning_rate": 6.707330025897648e-06, "loss": 0.9724, "step": 2984 }, { "epoch": 0.4075641725832878, "grad_norm": 8.384347915649414, "learning_rate": 6.705251618251525e-06, "loss": 0.8902, "step": 2985 }, { "epoch": 0.4077007099945385, "grad_norm": 8.033731460571289, "learning_rate": 6.703172877115795e-06, "loss": 1.0524, "step": 2986 }, { "epoch": 0.4078372474057892, "grad_norm": 6.498462677001953, "learning_rate": 6.7010938028969895e-06, "loss": 1.0006, "step": 2987 }, { "epoch": 0.4079737848170399, "grad_norm": 9.819449424743652, "learning_rate": 6.699014396001707e-06, "loss": 1.0117, "step": 2988 }, { "epoch": 0.4081103222282906, "grad_norm": 11.916315078735352, "learning_rate": 6.6969346568366065e-06, "loss": 1.0227, "step": 2989 }, { "epoch": 0.4082468596395412, "grad_norm": 6.624394416809082, "learning_rate": 6.694854585808418e-06, "loss": 0.9267, "step": 2990 }, { "epoch": 0.4083833970507919, "grad_norm": 6.168055534362793, "learning_rate": 6.692774183323931e-06, "loss": 1.0029, "step": 2991 }, { "epoch": 0.4085199344620426, "grad_norm": 8.413952827453613, "learning_rate": 6.690693449790003e-06, "loss": 1.1278, "step": 2992 }, { "epoch": 0.4086564718732933, "grad_norm": 8.050894737243652, "learning_rate": 6.6886123856135535e-06, "loss": 1.0017, "step": 2993 }, { "epoch": 0.408793009284544, "grad_norm": 9.405301094055176, "learning_rate": 6.68653099120157e-06, "loss": 0.9469, "step": 2994 }, { "epoch": 0.4089295466957946, "grad_norm": 5.99675989151001, "learning_rate": 6.684449266961101e-06, "loss": 0.9808, "step": 2995 }, { "epoch": 0.4090660841070453, "grad_norm": 6.438608169555664, "learning_rate": 6.682367213299264e-06, "loss": 1.0663, "step": 2996 }, { "epoch": 0.409202621518296, "grad_norm": 6.9482808113098145, "learning_rate": 6.680284830623237e-06, "loss": 0.9584, "step": 2997 }, { "epoch": 0.4093391589295467, "grad_norm": 5.961461544036865, "learning_rate": 6.678202119340264e-06, "loss": 0.9198, "step": 2998 }, { "epoch": 0.4094756963407974, "grad_norm": 8.130098342895508, "learning_rate": 6.676119079857651e-06, "loss": 0.8666, "step": 2999 }, { "epoch": 0.4096122337520481, "grad_norm": 5.409849166870117, "learning_rate": 6.674035712582774e-06, "loss": 0.9609, "step": 3000 }, { "epoch": 0.4097487711632987, "grad_norm": 5.739963054656982, "learning_rate": 6.671952017923067e-06, "loss": 1.087, "step": 3001 }, { "epoch": 0.4098853085745494, "grad_norm": 8.44023323059082, "learning_rate": 6.66986799628603e-06, "loss": 0.9887, "step": 3002 }, { "epoch": 0.4100218459858001, "grad_norm": 7.033815860748291, "learning_rate": 6.667783648079228e-06, "loss": 0.9248, "step": 3003 }, { "epoch": 0.4101583833970508, "grad_norm": 8.163522720336914, "learning_rate": 6.665698973710289e-06, "loss": 1.0755, "step": 3004 }, { "epoch": 0.4102949208083015, "grad_norm": 5.969165325164795, "learning_rate": 6.663613973586905e-06, "loss": 1.0326, "step": 3005 }, { "epoch": 0.4104314582195522, "grad_norm": 8.22364330291748, "learning_rate": 6.66152864811683e-06, "loss": 1.0348, "step": 3006 }, { "epoch": 0.4105679956308028, "grad_norm": 7.647712707519531, "learning_rate": 6.659442997707883e-06, "loss": 0.8616, "step": 3007 }, { "epoch": 0.4107045330420535, "grad_norm": 5.964879989624023, "learning_rate": 6.657357022767951e-06, "loss": 1.1183, "step": 3008 }, { "epoch": 0.4108410704533042, "grad_norm": 5.182024002075195, "learning_rate": 6.655270723704977e-06, "loss": 0.9032, "step": 3009 }, { "epoch": 0.4109776078645549, "grad_norm": 6.4399733543396, "learning_rate": 6.6531841009269704e-06, "loss": 1.0131, "step": 3010 }, { "epoch": 0.4111141452758056, "grad_norm": 14.161690711975098, "learning_rate": 6.651097154842005e-06, "loss": 0.9884, "step": 3011 }, { "epoch": 0.4112506826870563, "grad_norm": 6.956021785736084, "learning_rate": 6.6490098858582176e-06, "loss": 0.8866, "step": 3012 }, { "epoch": 0.4113872200983069, "grad_norm": 7.347654819488525, "learning_rate": 6.646922294383806e-06, "loss": 0.9764, "step": 3013 }, { "epoch": 0.4115237575095576, "grad_norm": 6.649476051330566, "learning_rate": 6.644834380827032e-06, "loss": 1.0839, "step": 3014 }, { "epoch": 0.4116602949208083, "grad_norm": 6.558155059814453, "learning_rate": 6.642746145596224e-06, "loss": 0.9369, "step": 3015 }, { "epoch": 0.411796832332059, "grad_norm": 5.790913105010986, "learning_rate": 6.640657589099769e-06, "loss": 0.9864, "step": 3016 }, { "epoch": 0.4119333697433097, "grad_norm": 5.897333145141602, "learning_rate": 6.638568711746114e-06, "loss": 1.0817, "step": 3017 }, { "epoch": 0.4120699071545603, "grad_norm": 5.400352478027344, "learning_rate": 6.636479513943779e-06, "loss": 0.9802, "step": 3018 }, { "epoch": 0.412206444565811, "grad_norm": 7.06073522567749, "learning_rate": 6.6343899961013366e-06, "loss": 1.0742, "step": 3019 }, { "epoch": 0.4123429819770617, "grad_norm": 6.796563148498535, "learning_rate": 6.632300158627427e-06, "loss": 0.9596, "step": 3020 }, { "epoch": 0.4124795193883124, "grad_norm": 6.764845848083496, "learning_rate": 6.630210001930754e-06, "loss": 0.8584, "step": 3021 }, { "epoch": 0.4126160567995631, "grad_norm": 5.347965717315674, "learning_rate": 6.628119526420078e-06, "loss": 0.9941, "step": 3022 }, { "epoch": 0.4127525942108138, "grad_norm": 5.896217346191406, "learning_rate": 6.626028732504228e-06, "loss": 1.0989, "step": 3023 }, { "epoch": 0.4128891316220644, "grad_norm": 5.455431938171387, "learning_rate": 6.623937620592091e-06, "loss": 1.0864, "step": 3024 }, { "epoch": 0.4130256690333151, "grad_norm": 5.537895202636719, "learning_rate": 6.621846191092618e-06, "loss": 1.0331, "step": 3025 }, { "epoch": 0.4131622064445658, "grad_norm": 6.2731218338012695, "learning_rate": 6.619754444414823e-06, "loss": 0.9284, "step": 3026 }, { "epoch": 0.4132987438558165, "grad_norm": 6.857913494110107, "learning_rate": 6.61766238096778e-06, "loss": 0.9665, "step": 3027 }, { "epoch": 0.4134352812670672, "grad_norm": 12.6488618850708, "learning_rate": 6.615570001160626e-06, "loss": 1.0626, "step": 3028 }, { "epoch": 0.4135718186783179, "grad_norm": 6.342931270599365, "learning_rate": 6.613477305402561e-06, "loss": 0.959, "step": 3029 }, { "epoch": 0.4137083560895685, "grad_norm": 8.173354148864746, "learning_rate": 6.611384294102845e-06, "loss": 1.0348, "step": 3030 }, { "epoch": 0.4138448935008192, "grad_norm": 8.035192489624023, "learning_rate": 6.6092909676708e-06, "loss": 0.8994, "step": 3031 }, { "epoch": 0.4139814309120699, "grad_norm": 6.542652130126953, "learning_rate": 6.607197326515808e-06, "loss": 0.9059, "step": 3032 }, { "epoch": 0.4141179683233206, "grad_norm": 7.9112548828125, "learning_rate": 6.605103371047319e-06, "loss": 1.0602, "step": 3033 }, { "epoch": 0.4142545057345713, "grad_norm": 10.077544212341309, "learning_rate": 6.603009101674835e-06, "loss": 1.0119, "step": 3034 }, { "epoch": 0.414391043145822, "grad_norm": 56.21882247924805, "learning_rate": 6.600914518807928e-06, "loss": 1.0439, "step": 3035 }, { "epoch": 0.4145275805570726, "grad_norm": 30.132801055908203, "learning_rate": 6.598819622856227e-06, "loss": 1.0482, "step": 3036 }, { "epoch": 0.4146641179683233, "grad_norm": 28.087129592895508, "learning_rate": 6.596724414229422e-06, "loss": 1.0843, "step": 3037 }, { "epoch": 0.414800655379574, "grad_norm": 18.373130798339844, "learning_rate": 6.594628893337265e-06, "loss": 1.0249, "step": 3038 }, { "epoch": 0.4149371927908247, "grad_norm": 11.016571998596191, "learning_rate": 6.592533060589568e-06, "loss": 0.8675, "step": 3039 }, { "epoch": 0.4150737302020754, "grad_norm": 6.495385646820068, "learning_rate": 6.590436916396208e-06, "loss": 0.98, "step": 3040 }, { "epoch": 0.4152102676133261, "grad_norm": 5.709208965301514, "learning_rate": 6.588340461167119e-06, "loss": 0.9954, "step": 3041 }, { "epoch": 0.4153468050245767, "grad_norm": 13.988382339477539, "learning_rate": 6.586243695312295e-06, "loss": 1.1201, "step": 3042 }, { "epoch": 0.4154833424358274, "grad_norm": 6.481481075286865, "learning_rate": 6.584146619241795e-06, "loss": 1.016, "step": 3043 }, { "epoch": 0.4156198798470781, "grad_norm": 7.253641128540039, "learning_rate": 6.582049233365734e-06, "loss": 0.9958, "step": 3044 }, { "epoch": 0.4157564172583288, "grad_norm": 6.332427978515625, "learning_rate": 6.57995153809429e-06, "loss": 1.0861, "step": 3045 }, { "epoch": 0.4158929546695795, "grad_norm": 7.25539493560791, "learning_rate": 6.577853533837704e-06, "loss": 0.9999, "step": 3046 }, { "epoch": 0.4160294920808301, "grad_norm": 8.045851707458496, "learning_rate": 6.5757552210062705e-06, "loss": 0.9826, "step": 3047 }, { "epoch": 0.4161660294920808, "grad_norm": 8.645273208618164, "learning_rate": 6.5736566000103545e-06, "loss": 1.0341, "step": 3048 }, { "epoch": 0.4163025669033315, "grad_norm": 6.86408805847168, "learning_rate": 6.571557671260369e-06, "loss": 1.1234, "step": 3049 }, { "epoch": 0.4164391043145822, "grad_norm": 6.460883617401123, "learning_rate": 6.569458435166798e-06, "loss": 1.1224, "step": 3050 }, { "epoch": 0.4165756417258329, "grad_norm": 10.479613304138184, "learning_rate": 6.567358892140178e-06, "loss": 1.0767, "step": 3051 }, { "epoch": 0.4167121791370836, "grad_norm": 6.974365711212158, "learning_rate": 6.565259042591112e-06, "loss": 0.844, "step": 3052 }, { "epoch": 0.4168487165483342, "grad_norm": 7.220246315002441, "learning_rate": 6.563158886930258e-06, "loss": 1.0884, "step": 3053 }, { "epoch": 0.4169852539595849, "grad_norm": 7.1056718826293945, "learning_rate": 6.561058425568334e-06, "loss": 0.9449, "step": 3054 }, { "epoch": 0.4171217913708356, "grad_norm": 5.715883731842041, "learning_rate": 6.558957658916123e-06, "loss": 0.9922, "step": 3055 }, { "epoch": 0.4172583287820863, "grad_norm": 6.198915004730225, "learning_rate": 6.556856587384459e-06, "loss": 0.8434, "step": 3056 }, { "epoch": 0.417394866193337, "grad_norm": 6.773345470428467, "learning_rate": 6.554755211384245e-06, "loss": 0.8811, "step": 3057 }, { "epoch": 0.4175314036045877, "grad_norm": 5.809813976287842, "learning_rate": 6.552653531326436e-06, "loss": 1.0789, "step": 3058 }, { "epoch": 0.4176679410158383, "grad_norm": 7.109429836273193, "learning_rate": 6.550551547622053e-06, "loss": 0.8451, "step": 3059 }, { "epoch": 0.417804478427089, "grad_norm": 13.590129852294922, "learning_rate": 6.548449260682169e-06, "loss": 1.0047, "step": 3060 }, { "epoch": 0.4179410158383397, "grad_norm": 5.842122554779053, "learning_rate": 6.546346670917925e-06, "loss": 1.0293, "step": 3061 }, { "epoch": 0.4180775532495904, "grad_norm": 6.808965682983398, "learning_rate": 6.544243778740512e-06, "loss": 1.1373, "step": 3062 }, { "epoch": 0.4182140906608411, "grad_norm": 6.584280490875244, "learning_rate": 6.5421405845611875e-06, "loss": 1.0093, "step": 3063 }, { "epoch": 0.4183506280720918, "grad_norm": 5.653814792633057, "learning_rate": 6.540037088791263e-06, "loss": 0.9394, "step": 3064 }, { "epoch": 0.4184871654833424, "grad_norm": 8.252896308898926, "learning_rate": 6.537933291842114e-06, "loss": 1.0629, "step": 3065 }, { "epoch": 0.4186237028945931, "grad_norm": 9.617838859558105, "learning_rate": 6.535829194125169e-06, "loss": 1.0728, "step": 3066 }, { "epoch": 0.4187602403058438, "grad_norm": 8.748862266540527, "learning_rate": 6.53372479605192e-06, "loss": 0.9672, "step": 3067 }, { "epoch": 0.4188967777170945, "grad_norm": 14.641648292541504, "learning_rate": 6.531620098033919e-06, "loss": 0.8785, "step": 3068 }, { "epoch": 0.4190333151283452, "grad_norm": 15.657959938049316, "learning_rate": 6.529515100482768e-06, "loss": 0.9055, "step": 3069 }, { "epoch": 0.4191698525395958, "grad_norm": 21.38869285583496, "learning_rate": 6.5274098038101375e-06, "loss": 1.1109, "step": 3070 }, { "epoch": 0.4193063899508465, "grad_norm": 31.253141403198242, "learning_rate": 6.525304208427752e-06, "loss": 1.1617, "step": 3071 }, { "epoch": 0.4194429273620972, "grad_norm": 58.5184211730957, "learning_rate": 6.523198314747393e-06, "loss": 1.11, "step": 3072 }, { "epoch": 0.4195794647733479, "grad_norm": 25.83836555480957, "learning_rate": 6.521092123180903e-06, "loss": 1.079, "step": 3073 }, { "epoch": 0.4197160021845986, "grad_norm": 62.252349853515625, "learning_rate": 6.518985634140184e-06, "loss": 1.2859, "step": 3074 }, { "epoch": 0.4198525395958493, "grad_norm": 44.069129943847656, "learning_rate": 6.516878848037191e-06, "loss": 1.158, "step": 3075 }, { "epoch": 0.4199890770070999, "grad_norm": 7.740957736968994, "learning_rate": 6.514771765283942e-06, "loss": 1.0394, "step": 3076 }, { "epoch": 0.4201256144183506, "grad_norm": 7.841851234436035, "learning_rate": 6.512664386292511e-06, "loss": 0.8999, "step": 3077 }, { "epoch": 0.4202621518296013, "grad_norm": 7.22431755065918, "learning_rate": 6.510556711475029e-06, "loss": 1.0707, "step": 3078 }, { "epoch": 0.420398689240852, "grad_norm": 8.200579643249512, "learning_rate": 6.508448741243687e-06, "loss": 0.9879, "step": 3079 }, { "epoch": 0.4205352266521027, "grad_norm": 8.539970397949219, "learning_rate": 6.506340476010734e-06, "loss": 1.1757, "step": 3080 }, { "epoch": 0.4206717640633534, "grad_norm": 8.308406829833984, "learning_rate": 6.504231916188474e-06, "loss": 0.8953, "step": 3081 }, { "epoch": 0.420808301474604, "grad_norm": 8.534614562988281, "learning_rate": 6.502123062189269e-06, "loss": 1.174, "step": 3082 }, { "epoch": 0.4209448388858547, "grad_norm": 6.751463890075684, "learning_rate": 6.50001391442554e-06, "loss": 0.9644, "step": 3083 }, { "epoch": 0.4210813762971054, "grad_norm": 5.426738262176514, "learning_rate": 6.497904473309766e-06, "loss": 0.9774, "step": 3084 }, { "epoch": 0.4212179137083561, "grad_norm": 7.133547782897949, "learning_rate": 6.495794739254483e-06, "loss": 0.9114, "step": 3085 }, { "epoch": 0.4213544511196068, "grad_norm": 6.214235305786133, "learning_rate": 6.493684712672282e-06, "loss": 1.0698, "step": 3086 }, { "epoch": 0.4214909885308575, "grad_norm": 14.247042655944824, "learning_rate": 6.491574393975815e-06, "loss": 0.9371, "step": 3087 }, { "epoch": 0.4216275259421081, "grad_norm": 7.350670337677002, "learning_rate": 6.489463783577787e-06, "loss": 0.9413, "step": 3088 }, { "epoch": 0.4217640633533588, "grad_norm": 6.394527912139893, "learning_rate": 6.487352881890964e-06, "loss": 0.9869, "step": 3089 }, { "epoch": 0.4219006007646095, "grad_norm": 6.098810195922852, "learning_rate": 6.485241689328164e-06, "loss": 1.0214, "step": 3090 }, { "epoch": 0.4220371381758602, "grad_norm": 7.089245796203613, "learning_rate": 6.483130206302269e-06, "loss": 0.9327, "step": 3091 }, { "epoch": 0.4221736755871109, "grad_norm": 5.948686599731445, "learning_rate": 6.481018433226212e-06, "loss": 1.0029, "step": 3092 }, { "epoch": 0.4223102129983616, "grad_norm": 7.758563995361328, "learning_rate": 6.478906370512982e-06, "loss": 1.024, "step": 3093 }, { "epoch": 0.4224467504096122, "grad_norm": 8.316962242126465, "learning_rate": 6.4767940185756305e-06, "loss": 1.0588, "step": 3094 }, { "epoch": 0.4225832878208629, "grad_norm": 11.343836784362793, "learning_rate": 6.474681377827261e-06, "loss": 0.999, "step": 3095 }, { "epoch": 0.4227198252321136, "grad_norm": 13.34882926940918, "learning_rate": 6.4725684486810335e-06, "loss": 0.8682, "step": 3096 }, { "epoch": 0.4228563626433643, "grad_norm": 7.519565105438232, "learning_rate": 6.470455231550167e-06, "loss": 1.0569, "step": 3097 }, { "epoch": 0.422992900054615, "grad_norm": 10.60969352722168, "learning_rate": 6.468341726847935e-06, "loss": 1.0157, "step": 3098 }, { "epoch": 0.4231294374658656, "grad_norm": 9.320820808410645, "learning_rate": 6.4662279349876674e-06, "loss": 1.0225, "step": 3099 }, { "epoch": 0.4232659748771163, "grad_norm": 30.271705627441406, "learning_rate": 6.464113856382752e-06, "loss": 1.0413, "step": 3100 }, { "epoch": 0.423402512288367, "grad_norm": 41.99192428588867, "learning_rate": 6.461999491446631e-06, "loss": 0.9998, "step": 3101 }, { "epoch": 0.4235390496996177, "grad_norm": 17.60826301574707, "learning_rate": 6.459884840592799e-06, "loss": 1.2674, "step": 3102 }, { "epoch": 0.4236755871108684, "grad_norm": 32.41632843017578, "learning_rate": 6.457769904234814e-06, "loss": 1.0092, "step": 3103 }, { "epoch": 0.4238121245221191, "grad_norm": 6.967065334320068, "learning_rate": 6.455654682786284e-06, "loss": 1.1219, "step": 3104 }, { "epoch": 0.4239486619333697, "grad_norm": 8.68334674835205, "learning_rate": 6.453539176660877e-06, "loss": 1.0576, "step": 3105 }, { "epoch": 0.4240851993446204, "grad_norm": 8.054780006408691, "learning_rate": 6.451423386272312e-06, "loss": 1.0363, "step": 3106 }, { "epoch": 0.4242217367558711, "grad_norm": 7.956326007843018, "learning_rate": 6.44930731203437e-06, "loss": 1.1793, "step": 3107 }, { "epoch": 0.4243582741671218, "grad_norm": 19.704805374145508, "learning_rate": 6.447190954360878e-06, "loss": 1.0257, "step": 3108 }, { "epoch": 0.4244948115783725, "grad_norm": 9.125407218933105, "learning_rate": 6.445074313665729e-06, "loss": 1.0198, "step": 3109 }, { "epoch": 0.4246313489896232, "grad_norm": 9.588525772094727, "learning_rate": 6.442957390362864e-06, "loss": 1.025, "step": 3110 }, { "epoch": 0.4247678864008738, "grad_norm": 7.688592910766602, "learning_rate": 6.440840184866281e-06, "loss": 1.0411, "step": 3111 }, { "epoch": 0.4249044238121245, "grad_norm": 7.163168907165527, "learning_rate": 6.438722697590038e-06, "loss": 1.0904, "step": 3112 }, { "epoch": 0.4250409612233752, "grad_norm": 7.7545671463012695, "learning_rate": 6.436604928948241e-06, "loss": 0.9871, "step": 3113 }, { "epoch": 0.4251774986346259, "grad_norm": 8.558239936828613, "learning_rate": 6.434486879355053e-06, "loss": 0.9722, "step": 3114 }, { "epoch": 0.4253140360458766, "grad_norm": 7.109350681304932, "learning_rate": 6.432368549224693e-06, "loss": 0.9551, "step": 3115 }, { "epoch": 0.4254505734571273, "grad_norm": 14.79693603515625, "learning_rate": 6.430249938971438e-06, "loss": 1.202, "step": 3116 }, { "epoch": 0.4255871108683779, "grad_norm": 8.65604305267334, "learning_rate": 6.428131049009613e-06, "loss": 0.8361, "step": 3117 }, { "epoch": 0.4257236482796286, "grad_norm": 10.758996963500977, "learning_rate": 6.426011879753602e-06, "loss": 0.9713, "step": 3118 }, { "epoch": 0.4258601856908793, "grad_norm": 7.78549861907959, "learning_rate": 6.423892431617847e-06, "loss": 1.014, "step": 3119 }, { "epoch": 0.42599672310213, "grad_norm": 9.16383171081543, "learning_rate": 6.4217727050168336e-06, "loss": 1.0627, "step": 3120 }, { "epoch": 0.4261332605133807, "grad_norm": 9.215062141418457, "learning_rate": 6.419652700365113e-06, "loss": 0.9525, "step": 3121 }, { "epoch": 0.4262697979246313, "grad_norm": 8.939230918884277, "learning_rate": 6.417532418077287e-06, "loss": 0.9945, "step": 3122 }, { "epoch": 0.426406335335882, "grad_norm": 8.102400779724121, "learning_rate": 6.4154118585680085e-06, "loss": 0.9601, "step": 3123 }, { "epoch": 0.4265428727471327, "grad_norm": 7.343319416046143, "learning_rate": 6.41329102225199e-06, "loss": 0.9221, "step": 3124 }, { "epoch": 0.4266794101583834, "grad_norm": 8.155294418334961, "learning_rate": 6.411169909543993e-06, "loss": 1.0067, "step": 3125 }, { "epoch": 0.4268159475696341, "grad_norm": 7.493625640869141, "learning_rate": 6.409048520858837e-06, "loss": 0.7809, "step": 3126 }, { "epoch": 0.4269524849808848, "grad_norm": 6.5978217124938965, "learning_rate": 6.406926856611393e-06, "loss": 0.9894, "step": 3127 }, { "epoch": 0.4270890223921354, "grad_norm": 9.890664100646973, "learning_rate": 6.404804917216588e-06, "loss": 0.9962, "step": 3128 }, { "epoch": 0.4272255598033861, "grad_norm": 6.339402675628662, "learning_rate": 6.402682703089401e-06, "loss": 0.8546, "step": 3129 }, { "epoch": 0.4273620972146368, "grad_norm": 7.786122798919678, "learning_rate": 6.400560214644866e-06, "loss": 0.9151, "step": 3130 }, { "epoch": 0.4274986346258875, "grad_norm": 6.973571300506592, "learning_rate": 6.398437452298068e-06, "loss": 0.9229, "step": 3131 }, { "epoch": 0.4276351720371382, "grad_norm": 6.028951168060303, "learning_rate": 6.396314416464151e-06, "loss": 0.9969, "step": 3132 }, { "epoch": 0.4277717094483889, "grad_norm": 9.272881507873535, "learning_rate": 6.394191107558307e-06, "loss": 0.9299, "step": 3133 }, { "epoch": 0.4279082468596395, "grad_norm": 8.260950088500977, "learning_rate": 6.392067525995783e-06, "loss": 1.0397, "step": 3134 }, { "epoch": 0.4280447842708902, "grad_norm": 9.512899398803711, "learning_rate": 6.3899436721918805e-06, "loss": 0.8825, "step": 3135 }, { "epoch": 0.4281813216821409, "grad_norm": 5.848815441131592, "learning_rate": 6.387819546561953e-06, "loss": 1.0213, "step": 3136 }, { "epoch": 0.4283178590933916, "grad_norm": 6.352452278137207, "learning_rate": 6.385695149521408e-06, "loss": 1.0673, "step": 3137 }, { "epoch": 0.4284543965046423, "grad_norm": 7.925137519836426, "learning_rate": 6.383570481485708e-06, "loss": 0.8532, "step": 3138 }, { "epoch": 0.428590933915893, "grad_norm": 6.98323392868042, "learning_rate": 6.381445542870363e-06, "loss": 1.0859, "step": 3139 }, { "epoch": 0.4287274713271436, "grad_norm": 7.9195475578308105, "learning_rate": 6.37932033409094e-06, "loss": 0.919, "step": 3140 }, { "epoch": 0.4288640087383943, "grad_norm": 5.388498783111572, "learning_rate": 6.377194855563059e-06, "loss": 1.0446, "step": 3141 }, { "epoch": 0.429000546149645, "grad_norm": 6.129664897918701, "learning_rate": 6.375069107702392e-06, "loss": 0.9124, "step": 3142 }, { "epoch": 0.4291370835608957, "grad_norm": 8.659010887145996, "learning_rate": 6.3729430909246625e-06, "loss": 0.6908, "step": 3143 }, { "epoch": 0.4292736209721464, "grad_norm": 7.561368942260742, "learning_rate": 6.370816805645647e-06, "loss": 0.9331, "step": 3144 }, { "epoch": 0.4294101583833971, "grad_norm": 6.24357795715332, "learning_rate": 6.368690252281178e-06, "loss": 1.1045, "step": 3145 }, { "epoch": 0.4295466957946477, "grad_norm": 6.379855155944824, "learning_rate": 6.366563431247134e-06, "loss": 1.0213, "step": 3146 }, { "epoch": 0.4296832332058984, "grad_norm": 9.425196647644043, "learning_rate": 6.364436342959451e-06, "loss": 0.9743, "step": 3147 }, { "epoch": 0.4298197706171491, "grad_norm": 6.550610065460205, "learning_rate": 6.3623089878341146e-06, "loss": 0.921, "step": 3148 }, { "epoch": 0.4299563080283998, "grad_norm": 8.748936653137207, "learning_rate": 6.3601813662871646e-06, "loss": 0.9682, "step": 3149 }, { "epoch": 0.4300928454396505, "grad_norm": 8.688496589660645, "learning_rate": 6.358053478734693e-06, "loss": 1.0233, "step": 3150 }, { "epoch": 0.4302293828509011, "grad_norm": 8.629069328308105, "learning_rate": 6.355925325592841e-06, "loss": 0.9598, "step": 3151 }, { "epoch": 0.4303659202621518, "grad_norm": 13.220932960510254, "learning_rate": 6.353796907277804e-06, "loss": 0.9974, "step": 3152 }, { "epoch": 0.4305024576734025, "grad_norm": 23.41579246520996, "learning_rate": 6.351668224205828e-06, "loss": 1.0448, "step": 3153 }, { "epoch": 0.4306389950846532, "grad_norm": 11.386629104614258, "learning_rate": 6.349539276793212e-06, "loss": 1.0522, "step": 3154 }, { "epoch": 0.4307755324959039, "grad_norm": 8.975319862365723, "learning_rate": 6.347410065456305e-06, "loss": 0.881, "step": 3155 }, { "epoch": 0.4309120699071546, "grad_norm": 9.86809253692627, "learning_rate": 6.345280590611512e-06, "loss": 0.9427, "step": 3156 }, { "epoch": 0.4310486073184052, "grad_norm": 21.961685180664062, "learning_rate": 6.343150852675284e-06, "loss": 0.9028, "step": 3157 }, { "epoch": 0.4311851447296559, "grad_norm": 10.829675674438477, "learning_rate": 6.341020852064126e-06, "loss": 0.8969, "step": 3158 }, { "epoch": 0.4313216821409066, "grad_norm": 6.995420455932617, "learning_rate": 6.338890589194594e-06, "loss": 1.0377, "step": 3159 }, { "epoch": 0.4314582195521573, "grad_norm": 5.012026786804199, "learning_rate": 6.336760064483296e-06, "loss": 0.9658, "step": 3160 }, { "epoch": 0.431594756963408, "grad_norm": 11.749007225036621, "learning_rate": 6.334629278346891e-06, "loss": 1.0646, "step": 3161 }, { "epoch": 0.4317312943746587, "grad_norm": 6.960147380828857, "learning_rate": 6.332498231202088e-06, "loss": 0.8351, "step": 3162 }, { "epoch": 0.4318678317859093, "grad_norm": 6.466558456420898, "learning_rate": 6.330366923465647e-06, "loss": 0.926, "step": 3163 }, { "epoch": 0.43200436919716, "grad_norm": 5.9127092361450195, "learning_rate": 6.328235355554382e-06, "loss": 1.0168, "step": 3164 }, { "epoch": 0.4321409066084107, "grad_norm": 5.904780387878418, "learning_rate": 6.326103527885155e-06, "loss": 0.9838, "step": 3165 }, { "epoch": 0.4322774440196614, "grad_norm": 9.005804061889648, "learning_rate": 6.323971440874878e-06, "loss": 1.0853, "step": 3166 }, { "epoch": 0.4324139814309121, "grad_norm": 5.807237148284912, "learning_rate": 6.3218390949405165e-06, "loss": 1.0117, "step": 3167 }, { "epoch": 0.4325505188421628, "grad_norm": 9.299508094787598, "learning_rate": 6.319706490499085e-06, "loss": 0.8824, "step": 3168 }, { "epoch": 0.4326870562534134, "grad_norm": 8.4609956741333, "learning_rate": 6.317573627967648e-06, "loss": 1.0442, "step": 3169 }, { "epoch": 0.4328235936646641, "grad_norm": 5.909036636352539, "learning_rate": 6.315440507763324e-06, "loss": 0.9076, "step": 3170 }, { "epoch": 0.4329601310759148, "grad_norm": 9.764897346496582, "learning_rate": 6.3133071303032755e-06, "loss": 0.8594, "step": 3171 }, { "epoch": 0.4330966684871655, "grad_norm": 6.269824028015137, "learning_rate": 6.311173496004723e-06, "loss": 1.1332, "step": 3172 }, { "epoch": 0.4332332058984162, "grad_norm": 6.683200836181641, "learning_rate": 6.3090396052849305e-06, "loss": 0.896, "step": 3173 }, { "epoch": 0.4333697433096668, "grad_norm": 10.658092498779297, "learning_rate": 6.306905458561214e-06, "loss": 1.0178, "step": 3174 }, { "epoch": 0.4335062807209175, "grad_norm": 11.423171043395996, "learning_rate": 6.304771056250945e-06, "loss": 1.1157, "step": 3175 }, { "epoch": 0.4336428181321682, "grad_norm": 5.855122089385986, "learning_rate": 6.302636398771536e-06, "loss": 0.9909, "step": 3176 }, { "epoch": 0.4337793555434189, "grad_norm": 6.405190467834473, "learning_rate": 6.300501486540457e-06, "loss": 1.0476, "step": 3177 }, { "epoch": 0.4339158929546696, "grad_norm": 7.209458351135254, "learning_rate": 6.298366319975222e-06, "loss": 0.9412, "step": 3178 }, { "epoch": 0.4340524303659203, "grad_norm": 12.25793170928955, "learning_rate": 6.2962308994933996e-06, "loss": 1.0703, "step": 3179 }, { "epoch": 0.4341889677771709, "grad_norm": 6.145587921142578, "learning_rate": 6.294095225512604e-06, "loss": 0.9492, "step": 3180 }, { "epoch": 0.4343255051884216, "grad_norm": 12.538482666015625, "learning_rate": 6.2919592984505025e-06, "loss": 1.0512, "step": 3181 }, { "epoch": 0.4344620425996723, "grad_norm": 14.642934799194336, "learning_rate": 6.289823118724812e-06, "loss": 1.0742, "step": 3182 }, { "epoch": 0.434598580010923, "grad_norm": 14.477701187133789, "learning_rate": 6.287686686753291e-06, "loss": 0.8472, "step": 3183 }, { "epoch": 0.4347351174221737, "grad_norm": 15.334817886352539, "learning_rate": 6.285550002953761e-06, "loss": 0.9512, "step": 3184 }, { "epoch": 0.4348716548334244, "grad_norm": 25.218326568603516, "learning_rate": 6.2834130677440786e-06, "loss": 1.1288, "step": 3185 }, { "epoch": 0.435008192244675, "grad_norm": 14.34673023223877, "learning_rate": 6.281275881542159e-06, "loss": 1.0293, "step": 3186 }, { "epoch": 0.4351447296559257, "grad_norm": 11.09196949005127, "learning_rate": 6.279138444765964e-06, "loss": 0.9504, "step": 3187 }, { "epoch": 0.4352812670671764, "grad_norm": 7.5194501876831055, "learning_rate": 6.2770007578335044e-06, "loss": 0.9777, "step": 3188 }, { "epoch": 0.4354178044784271, "grad_norm": 17.85151481628418, "learning_rate": 6.274862821162838e-06, "loss": 1.0316, "step": 3189 }, { "epoch": 0.4355543418896778, "grad_norm": 5.258164882659912, "learning_rate": 6.272724635172075e-06, "loss": 0.9283, "step": 3190 }, { "epoch": 0.4356908793009285, "grad_norm": 5.692591190338135, "learning_rate": 6.27058620027937e-06, "loss": 1.0248, "step": 3191 }, { "epoch": 0.4358274167121791, "grad_norm": 9.133248329162598, "learning_rate": 6.26844751690293e-06, "loss": 1.0085, "step": 3192 }, { "epoch": 0.4359639541234298, "grad_norm": 11.233198165893555, "learning_rate": 6.2663085854610084e-06, "loss": 0.9353, "step": 3193 }, { "epoch": 0.4361004915346805, "grad_norm": 9.14649486541748, "learning_rate": 6.264169406371908e-06, "loss": 1.0273, "step": 3194 }, { "epoch": 0.4362370289459312, "grad_norm": 11.081869125366211, "learning_rate": 6.262029980053981e-06, "loss": 0.931, "step": 3195 }, { "epoch": 0.4363735663571819, "grad_norm": 8.042177200317383, "learning_rate": 6.259890306925627e-06, "loss": 0.9377, "step": 3196 }, { "epoch": 0.4365101037684326, "grad_norm": 10.476299285888672, "learning_rate": 6.257750387405293e-06, "loss": 1.0117, "step": 3197 }, { "epoch": 0.4366466411796832, "grad_norm": 6.694096088409424, "learning_rate": 6.255610221911473e-06, "loss": 1.0246, "step": 3198 }, { "epoch": 0.4367831785909339, "grad_norm": 8.345856666564941, "learning_rate": 6.253469810862715e-06, "loss": 0.9988, "step": 3199 }, { "epoch": 0.4369197160021846, "grad_norm": 9.62939739227295, "learning_rate": 6.251329154677608e-06, "loss": 0.9071, "step": 3200 }, { "epoch": 0.4370562534134353, "grad_norm": 13.91903305053711, "learning_rate": 6.249188253774794e-06, "loss": 0.9962, "step": 3201 }, { "epoch": 0.437192790824686, "grad_norm": 6.507308006286621, "learning_rate": 6.24704710857296e-06, "loss": 1.0425, "step": 3202 }, { "epoch": 0.4373293282359366, "grad_norm": 15.132488250732422, "learning_rate": 6.244905719490841e-06, "loss": 0.9503, "step": 3203 }, { "epoch": 0.4374658656471873, "grad_norm": 7.567615032196045, "learning_rate": 6.2427640869472235e-06, "loss": 1.0078, "step": 3204 }, { "epoch": 0.437602403058438, "grad_norm": 7.341262340545654, "learning_rate": 6.240622211360934e-06, "loss": 0.9984, "step": 3205 }, { "epoch": 0.4377389404696887, "grad_norm": 5.388112545013428, "learning_rate": 6.238480093150854e-06, "loss": 1.0577, "step": 3206 }, { "epoch": 0.4378754778809394, "grad_norm": 6.6639404296875, "learning_rate": 6.236337732735907e-06, "loss": 1.0886, "step": 3207 }, { "epoch": 0.4380120152921901, "grad_norm": 7.045592784881592, "learning_rate": 6.234195130535069e-06, "loss": 0.8673, "step": 3208 }, { "epoch": 0.4381485527034407, "grad_norm": 8.05612564086914, "learning_rate": 6.232052286967361e-06, "loss": 0.9847, "step": 3209 }, { "epoch": 0.4382850901146914, "grad_norm": 8.621404647827148, "learning_rate": 6.229909202451847e-06, "loss": 1.0182, "step": 3210 }, { "epoch": 0.4384216275259421, "grad_norm": 6.799225807189941, "learning_rate": 6.2277658774076445e-06, "loss": 1.1425, "step": 3211 }, { "epoch": 0.4385581649371928, "grad_norm": 6.769572734832764, "learning_rate": 6.225622312253916e-06, "loss": 1.2251, "step": 3212 }, { "epoch": 0.4386947023484435, "grad_norm": 6.767605304718018, "learning_rate": 6.223478507409869e-06, "loss": 0.9074, "step": 3213 }, { "epoch": 0.4388312397596942, "grad_norm": 8.168427467346191, "learning_rate": 6.22133446329476e-06, "loss": 1.0341, "step": 3214 }, { "epoch": 0.4389677771709448, "grad_norm": 8.81424331665039, "learning_rate": 6.219190180327891e-06, "loss": 1.0143, "step": 3215 }, { "epoch": 0.4391043145821955, "grad_norm": 93.41905975341797, "learning_rate": 6.217045658928613e-06, "loss": 0.9537, "step": 3216 }, { "epoch": 0.4392408519934462, "grad_norm": 8.897623062133789, "learning_rate": 6.21490089951632e-06, "loss": 0.8779, "step": 3217 }, { "epoch": 0.4393773894046969, "grad_norm": 11.563440322875977, "learning_rate": 6.2127559025104555e-06, "loss": 0.957, "step": 3218 }, { "epoch": 0.4395139268159476, "grad_norm": 10.088201522827148, "learning_rate": 6.210610668330508e-06, "loss": 1.0337, "step": 3219 }, { "epoch": 0.4396504642271983, "grad_norm": 6.896104335784912, "learning_rate": 6.208465197396013e-06, "loss": 1.0226, "step": 3220 }, { "epoch": 0.4397870016384489, "grad_norm": 7.971157073974609, "learning_rate": 6.206319490126552e-06, "loss": 0.8789, "step": 3221 }, { "epoch": 0.4399235390496996, "grad_norm": 8.356730461120605, "learning_rate": 6.204173546941754e-06, "loss": 1.0628, "step": 3222 }, { "epoch": 0.4400600764609503, "grad_norm": 7.020260334014893, "learning_rate": 6.202027368261292e-06, "loss": 0.8254, "step": 3223 }, { "epoch": 0.440196613872201, "grad_norm": 9.603171348571777, "learning_rate": 6.199880954504884e-06, "loss": 1.2761, "step": 3224 }, { "epoch": 0.4403331512834517, "grad_norm": 7.574984550476074, "learning_rate": 6.1977343060923e-06, "loss": 1.1428, "step": 3225 }, { "epoch": 0.4404696886947023, "grad_norm": 9.557082176208496, "learning_rate": 6.195587423443349e-06, "loss": 0.8839, "step": 3226 }, { "epoch": 0.440606226105953, "grad_norm": 9.234078407287598, "learning_rate": 6.1934403069778895e-06, "loss": 1.0886, "step": 3227 }, { "epoch": 0.4407427635172037, "grad_norm": 8.507935523986816, "learning_rate": 6.191292957115825e-06, "loss": 0.9999, "step": 3228 }, { "epoch": 0.4408793009284544, "grad_norm": 8.966573715209961, "learning_rate": 6.189145374277105e-06, "loss": 0.9395, "step": 3229 }, { "epoch": 0.4410158383397051, "grad_norm": 6.166525840759277, "learning_rate": 6.186997558881724e-06, "loss": 1.1195, "step": 3230 }, { "epoch": 0.4411523757509558, "grad_norm": 6.043544292449951, "learning_rate": 6.184849511349723e-06, "loss": 1.0181, "step": 3231 }, { "epoch": 0.4412889131622064, "grad_norm": 10.8473482131958, "learning_rate": 6.182701232101184e-06, "loss": 0.9992, "step": 3232 }, { "epoch": 0.4414254505734571, "grad_norm": 6.798439979553223, "learning_rate": 6.180552721556244e-06, "loss": 0.974, "step": 3233 }, { "epoch": 0.4415619879847078, "grad_norm": 7.755008220672607, "learning_rate": 6.1784039801350726e-06, "loss": 0.906, "step": 3234 }, { "epoch": 0.4416985253959585, "grad_norm": 14.577199935913086, "learning_rate": 6.176255008257895e-06, "loss": 0.9416, "step": 3235 }, { "epoch": 0.4418350628072092, "grad_norm": 6.040658950805664, "learning_rate": 6.174105806344975e-06, "loss": 0.9425, "step": 3236 }, { "epoch": 0.4419716002184599, "grad_norm": 7.715141296386719, "learning_rate": 6.171956374816627e-06, "loss": 0.9786, "step": 3237 }, { "epoch": 0.4421081376297105, "grad_norm": 5.674663066864014, "learning_rate": 6.1698067140932035e-06, "loss": 1.0559, "step": 3238 }, { "epoch": 0.4422446750409612, "grad_norm": 9.928032875061035, "learning_rate": 6.167656824595109e-06, "loss": 0.9431, "step": 3239 }, { "epoch": 0.4423812124522119, "grad_norm": 7.244250297546387, "learning_rate": 6.165506706742786e-06, "loss": 0.8245, "step": 3240 }, { "epoch": 0.4425177498634626, "grad_norm": 6.483587265014648, "learning_rate": 6.163356360956729e-06, "loss": 1.1447, "step": 3241 }, { "epoch": 0.4426542872747133, "grad_norm": 7.482090950012207, "learning_rate": 6.161205787657469e-06, "loss": 0.9762, "step": 3242 }, { "epoch": 0.442790824685964, "grad_norm": 7.114530563354492, "learning_rate": 6.159054987265588e-06, "loss": 0.9274, "step": 3243 }, { "epoch": 0.4429273620972146, "grad_norm": 8.527421951293945, "learning_rate": 6.156903960201709e-06, "loss": 1.0376, "step": 3244 }, { "epoch": 0.4430638995084653, "grad_norm": 6.847373962402344, "learning_rate": 6.154752706886499e-06, "loss": 0.8713, "step": 3245 }, { "epoch": 0.443200436919716, "grad_norm": 5.507210731506348, "learning_rate": 6.152601227740672e-06, "loss": 1.0046, "step": 3246 }, { "epoch": 0.4433369743309667, "grad_norm": 7.028973579406738, "learning_rate": 6.150449523184985e-06, "loss": 1.0004, "step": 3247 }, { "epoch": 0.4434735117422174, "grad_norm": 9.997851371765137, "learning_rate": 6.148297593640238e-06, "loss": 1.0161, "step": 3248 }, { "epoch": 0.4436100491534681, "grad_norm": 9.306519508361816, "learning_rate": 6.146145439527274e-06, "loss": 1.0014, "step": 3249 }, { "epoch": 0.4437465865647187, "grad_norm": 6.1528191566467285, "learning_rate": 6.143993061266985e-06, "loss": 1.0323, "step": 3250 }, { "epoch": 0.4438831239759694, "grad_norm": 6.59522819519043, "learning_rate": 6.1418404592803015e-06, "loss": 0.9957, "step": 3251 }, { "epoch": 0.4440196613872201, "grad_norm": 8.937469482421875, "learning_rate": 6.1396876339882e-06, "loss": 0.913, "step": 3252 }, { "epoch": 0.4441561987984708, "grad_norm": 12.220613479614258, "learning_rate": 6.137534585811701e-06, "loss": 1.0533, "step": 3253 }, { "epoch": 0.4442927362097215, "grad_norm": 5.916696071624756, "learning_rate": 6.135381315171867e-06, "loss": 1.0109, "step": 3254 }, { "epoch": 0.4444292736209721, "grad_norm": 6.553743839263916, "learning_rate": 6.133227822489806e-06, "loss": 1.0741, "step": 3255 }, { "epoch": 0.4445658110322228, "grad_norm": 7.299474239349365, "learning_rate": 6.131074108186667e-06, "loss": 1.017, "step": 3256 }, { "epoch": 0.4447023484434735, "grad_norm": 7.5961384773254395, "learning_rate": 6.128920172683644e-06, "loss": 0.9089, "step": 3257 }, { "epoch": 0.4448388858547242, "grad_norm": 5.6889824867248535, "learning_rate": 6.126766016401976e-06, "loss": 1.0091, "step": 3258 }, { "epoch": 0.4449754232659749, "grad_norm": 7.203798770904541, "learning_rate": 6.1246116397629405e-06, "loss": 1.0255, "step": 3259 }, { "epoch": 0.4451119606772256, "grad_norm": 6.307995796203613, "learning_rate": 6.122457043187863e-06, "loss": 1.0657, "step": 3260 }, { "epoch": 0.4452484980884762, "grad_norm": 7.49009370803833, "learning_rate": 6.1203022270981095e-06, "loss": 0.945, "step": 3261 }, { "epoch": 0.4453850354997269, "grad_norm": 9.849950790405273, "learning_rate": 6.118147191915088e-06, "loss": 0.9816, "step": 3262 }, { "epoch": 0.4455215729109776, "grad_norm": 7.745129585266113, "learning_rate": 6.115991938060252e-06, "loss": 0.9254, "step": 3263 }, { "epoch": 0.4456581103222283, "grad_norm": 6.859891414642334, "learning_rate": 6.113836465955094e-06, "loss": 1.056, "step": 3264 }, { "epoch": 0.445794647733479, "grad_norm": 7.133142471313477, "learning_rate": 6.111680776021154e-06, "loss": 0.9536, "step": 3265 }, { "epoch": 0.4459311851447297, "grad_norm": 6.212580680847168, "learning_rate": 6.1095248686800105e-06, "loss": 1.0078, "step": 3266 }, { "epoch": 0.4460677225559803, "grad_norm": 6.722409725189209, "learning_rate": 6.107368744353288e-06, "loss": 1.0682, "step": 3267 }, { "epoch": 0.446204259967231, "grad_norm": 7.177855968475342, "learning_rate": 6.10521240346265e-06, "loss": 1.085, "step": 3268 }, { "epoch": 0.4463407973784817, "grad_norm": 5.742508888244629, "learning_rate": 6.103055846429804e-06, "loss": 0.9046, "step": 3269 }, { "epoch": 0.4464773347897324, "grad_norm": 6.713708877563477, "learning_rate": 6.1008990736765e-06, "loss": 0.8982, "step": 3270 }, { "epoch": 0.4466138722009831, "grad_norm": 8.210559844970703, "learning_rate": 6.098742085624529e-06, "loss": 0.8418, "step": 3271 }, { "epoch": 0.4467504096122338, "grad_norm": 7.8428215980529785, "learning_rate": 6.096584882695728e-06, "loss": 0.8678, "step": 3272 }, { "epoch": 0.4468869470234844, "grad_norm": 9.893601417541504, "learning_rate": 6.0944274653119695e-06, "loss": 1.068, "step": 3273 }, { "epoch": 0.4470234844347351, "grad_norm": 10.564047813415527, "learning_rate": 6.092269833895174e-06, "loss": 1.0131, "step": 3274 }, { "epoch": 0.4471600218459858, "grad_norm": 7.778263568878174, "learning_rate": 6.0901119888673e-06, "loss": 1.0176, "step": 3275 }, { "epoch": 0.4472965592572365, "grad_norm": 7.1538214683532715, "learning_rate": 6.087953930650349e-06, "loss": 1.0858, "step": 3276 }, { "epoch": 0.4474330966684872, "grad_norm": 7.523618221282959, "learning_rate": 6.085795659666364e-06, "loss": 0.8977, "step": 3277 }, { "epoch": 0.4475696340797378, "grad_norm": 6.627319812774658, "learning_rate": 6.08363717633743e-06, "loss": 0.921, "step": 3278 }, { "epoch": 0.4477061714909885, "grad_norm": 8.31324291229248, "learning_rate": 6.081478481085675e-06, "loss": 1.0877, "step": 3279 }, { "epoch": 0.4478427089022392, "grad_norm": 5.437684059143066, "learning_rate": 6.079319574333266e-06, "loss": 0.8509, "step": 3280 }, { "epoch": 0.4479792463134899, "grad_norm": 6.517136573791504, "learning_rate": 6.077160456502411e-06, "loss": 0.9831, "step": 3281 }, { "epoch": 0.4481157837247406, "grad_norm": 10.968679428100586, "learning_rate": 6.075001128015362e-06, "loss": 1.1104, "step": 3282 }, { "epoch": 0.4482523211359913, "grad_norm": 5.5145978927612305, "learning_rate": 6.0728415892944105e-06, "loss": 1.0193, "step": 3283 }, { "epoch": 0.4483888585472419, "grad_norm": 7.044874668121338, "learning_rate": 6.070681840761889e-06, "loss": 0.981, "step": 3284 }, { "epoch": 0.4485253959584926, "grad_norm": 7.5828537940979, "learning_rate": 6.06852188284017e-06, "loss": 1.1426, "step": 3285 }, { "epoch": 0.4486619333697433, "grad_norm": 7.683312892913818, "learning_rate": 6.0663617159516705e-06, "loss": 0.8729, "step": 3286 }, { "epoch": 0.448798470780994, "grad_norm": 10.364477157592773, "learning_rate": 6.064201340518846e-06, "loss": 1.0047, "step": 3287 }, { "epoch": 0.4489350081922447, "grad_norm": 18.26905632019043, "learning_rate": 6.062040756964192e-06, "loss": 1.0452, "step": 3288 }, { "epoch": 0.4490715456034954, "grad_norm": 7.801833152770996, "learning_rate": 6.059879965710245e-06, "loss": 1.0105, "step": 3289 }, { "epoch": 0.449208083014746, "grad_norm": 10.087602615356445, "learning_rate": 6.057718967179583e-06, "loss": 1.1145, "step": 3290 }, { "epoch": 0.4493446204259967, "grad_norm": 10.35301685333252, "learning_rate": 6.055557761794826e-06, "loss": 0.96, "step": 3291 }, { "epoch": 0.4494811578372474, "grad_norm": 10.633131980895996, "learning_rate": 6.053396349978632e-06, "loss": 0.9464, "step": 3292 }, { "epoch": 0.4496176952484981, "grad_norm": 7.570821285247803, "learning_rate": 6.0512347321537e-06, "loss": 1.1844, "step": 3293 }, { "epoch": 0.4497542326597488, "grad_norm": 12.550382614135742, "learning_rate": 6.049072908742771e-06, "loss": 0.9411, "step": 3294 }, { "epoch": 0.4498907700709995, "grad_norm": 18.06302261352539, "learning_rate": 6.046910880168619e-06, "loss": 0.8944, "step": 3295 }, { "epoch": 0.4500273074822501, "grad_norm": 6.862484455108643, "learning_rate": 6.04474864685407e-06, "loss": 0.9854, "step": 3296 }, { "epoch": 0.4501638448935008, "grad_norm": 10.46908187866211, "learning_rate": 6.04258620922198e-06, "loss": 1.059, "step": 3297 }, { "epoch": 0.4503003823047515, "grad_norm": 6.808405876159668, "learning_rate": 6.040423567695251e-06, "loss": 0.9998, "step": 3298 }, { "epoch": 0.4504369197160022, "grad_norm": 15.314498901367188, "learning_rate": 6.0382607226968226e-06, "loss": 1.0819, "step": 3299 }, { "epoch": 0.4505734571272529, "grad_norm": 31.092958450317383, "learning_rate": 6.036097674649672e-06, "loss": 1.0113, "step": 3300 }, { "epoch": 0.4507099945385035, "grad_norm": 5.997395038604736, "learning_rate": 6.03393442397682e-06, "loss": 1.1075, "step": 3301 }, { "epoch": 0.4508465319497542, "grad_norm": 8.946466445922852, "learning_rate": 6.031770971101325e-06, "loss": 0.9154, "step": 3302 }, { "epoch": 0.4509830693610049, "grad_norm": 12.147799491882324, "learning_rate": 6.029607316446286e-06, "loss": 0.9302, "step": 3303 }, { "epoch": 0.4511196067722556, "grad_norm": 9.843483924865723, "learning_rate": 6.027443460434841e-06, "loss": 0.9159, "step": 3304 }, { "epoch": 0.4512561441835063, "grad_norm": 11.928657531738281, "learning_rate": 6.025279403490165e-06, "loss": 0.8426, "step": 3305 }, { "epoch": 0.451392681594757, "grad_norm": 25.330020904541016, "learning_rate": 6.023115146035477e-06, "loss": 1.0449, "step": 3306 }, { "epoch": 0.4515292190060076, "grad_norm": 22.469728469848633, "learning_rate": 6.02095068849403e-06, "loss": 1.0141, "step": 3307 }, { "epoch": 0.4516657564172583, "grad_norm": 48.62971878051758, "learning_rate": 6.018786031289119e-06, "loss": 0.9723, "step": 3308 }, { "epoch": 0.451802293828509, "grad_norm": 9.79395580291748, "learning_rate": 6.01662117484408e-06, "loss": 0.9323, "step": 3309 }, { "epoch": 0.4519388312397597, "grad_norm": 8.554288864135742, "learning_rate": 6.014456119582285e-06, "loss": 0.9012, "step": 3310 }, { "epoch": 0.4520753686510104, "grad_norm": 11.151392936706543, "learning_rate": 6.012290865927145e-06, "loss": 0.903, "step": 3311 }, { "epoch": 0.4522119060622611, "grad_norm": 8.190482139587402, "learning_rate": 6.010125414302112e-06, "loss": 0.9085, "step": 3312 }, { "epoch": 0.4523484434735117, "grad_norm": 7.72269344329834, "learning_rate": 6.007959765130674e-06, "loss": 0.9641, "step": 3313 }, { "epoch": 0.4524849808847624, "grad_norm": 6.8534255027771, "learning_rate": 6.005793918836358e-06, "loss": 1.0323, "step": 3314 }, { "epoch": 0.4526215182960131, "grad_norm": 6.0790486335754395, "learning_rate": 6.00362787584273e-06, "loss": 1.0288, "step": 3315 }, { "epoch": 0.4527580557072638, "grad_norm": 5.076391696929932, "learning_rate": 6.001461636573397e-06, "loss": 0.8725, "step": 3316 }, { "epoch": 0.4528945931185145, "grad_norm": 6.65741491317749, "learning_rate": 5.9992952014520015e-06, "loss": 0.9756, "step": 3317 }, { "epoch": 0.4530311305297652, "grad_norm": 9.055595397949219, "learning_rate": 5.997128570902225e-06, "loss": 0.9724, "step": 3318 }, { "epoch": 0.4531676679410158, "grad_norm": 5.328054428100586, "learning_rate": 5.994961745347787e-06, "loss": 0.9143, "step": 3319 }, { "epoch": 0.4533042053522665, "grad_norm": 8.55007266998291, "learning_rate": 5.992794725212444e-06, "loss": 0.8531, "step": 3320 }, { "epoch": 0.4534407427635172, "grad_norm": 10.974757194519043, "learning_rate": 5.9906275109199954e-06, "loss": 0.8616, "step": 3321 }, { "epoch": 0.4535772801747679, "grad_norm": 6.590741157531738, "learning_rate": 5.988460102894271e-06, "loss": 0.9374, "step": 3322 }, { "epoch": 0.4537138175860186, "grad_norm": 10.351899147033691, "learning_rate": 5.9862925015591455e-06, "loss": 1.0084, "step": 3323 }, { "epoch": 0.4538503549972693, "grad_norm": 6.684643745422363, "learning_rate": 5.984124707338528e-06, "loss": 0.9219, "step": 3324 }, { "epoch": 0.4539868924085199, "grad_norm": 7.484306335449219, "learning_rate": 5.981956720656364e-06, "loss": 1.007, "step": 3325 }, { "epoch": 0.4541234298197706, "grad_norm": 9.240147590637207, "learning_rate": 5.979788541936642e-06, "loss": 0.9684, "step": 3326 }, { "epoch": 0.4542599672310213, "grad_norm": 7.5723347663879395, "learning_rate": 5.977620171603381e-06, "loss": 1.0864, "step": 3327 }, { "epoch": 0.454396504642272, "grad_norm": 11.119094848632812, "learning_rate": 5.975451610080643e-06, "loss": 0.9541, "step": 3328 }, { "epoch": 0.4545330420535227, "grad_norm": 10.261183738708496, "learning_rate": 5.973282857792523e-06, "loss": 0.8627, "step": 3329 }, { "epoch": 0.4546695794647733, "grad_norm": 11.609720230102539, "learning_rate": 5.971113915163158e-06, "loss": 0.997, "step": 3330 }, { "epoch": 0.454806116876024, "grad_norm": 10.54614543914795, "learning_rate": 5.968944782616721e-06, "loss": 1.1353, "step": 3331 }, { "epoch": 0.4549426542872747, "grad_norm": 29.03822898864746, "learning_rate": 5.966775460577418e-06, "loss": 0.901, "step": 3332 }, { "epoch": 0.4550791916985254, "grad_norm": 12.423521041870117, "learning_rate": 5.964605949469497e-06, "loss": 1.0159, "step": 3333 }, { "epoch": 0.4552157291097761, "grad_norm": 16.810747146606445, "learning_rate": 5.962436249717239e-06, "loss": 1.017, "step": 3334 }, { "epoch": 0.4553522665210268, "grad_norm": 6.05726957321167, "learning_rate": 5.960266361744966e-06, "loss": 0.8895, "step": 3335 }, { "epoch": 0.4554888039322774, "grad_norm": 6.1171112060546875, "learning_rate": 5.958096285977035e-06, "loss": 0.9599, "step": 3336 }, { "epoch": 0.4556253413435281, "grad_norm": 7.094620227813721, "learning_rate": 5.955926022837839e-06, "loss": 0.8488, "step": 3337 }, { "epoch": 0.4557618787547788, "grad_norm": 5.770688056945801, "learning_rate": 5.9537555727518085e-06, "loss": 0.9847, "step": 3338 }, { "epoch": 0.4558984161660295, "grad_norm": 7.702932357788086, "learning_rate": 5.951584936143407e-06, "loss": 0.9648, "step": 3339 }, { "epoch": 0.4560349535772802, "grad_norm": 6.455895900726318, "learning_rate": 5.949414113437142e-06, "loss": 0.9768, "step": 3340 }, { "epoch": 0.4561714909885309, "grad_norm": 6.064666748046875, "learning_rate": 5.94724310505755e-06, "loss": 1.0428, "step": 3341 }, { "epoch": 0.4563080283997815, "grad_norm": 9.381836891174316, "learning_rate": 5.94507191142921e-06, "loss": 0.8128, "step": 3342 }, { "epoch": 0.4564445658110322, "grad_norm": 12.32673168182373, "learning_rate": 5.942900532976732e-06, "loss": 1.0102, "step": 3343 }, { "epoch": 0.4565811032222829, "grad_norm": 4.839799404144287, "learning_rate": 5.940728970124765e-06, "loss": 1.0926, "step": 3344 }, { "epoch": 0.4567176406335336, "grad_norm": 6.662607669830322, "learning_rate": 5.9385572232979935e-06, "loss": 1.1221, "step": 3345 }, { "epoch": 0.4568541780447843, "grad_norm": 5.217293739318848, "learning_rate": 5.9363852929211365e-06, "loss": 0.9318, "step": 3346 }, { "epoch": 0.456990715456035, "grad_norm": 7.119763374328613, "learning_rate": 5.9342131794189505e-06, "loss": 0.9863, "step": 3347 }, { "epoch": 0.4571272528672856, "grad_norm": 6.140427589416504, "learning_rate": 5.932040883216228e-06, "loss": 0.9309, "step": 3348 }, { "epoch": 0.4572637902785363, "grad_norm": 9.383124351501465, "learning_rate": 5.929868404737798e-06, "loss": 0.9454, "step": 3349 }, { "epoch": 0.457400327689787, "grad_norm": 8.435185432434082, "learning_rate": 5.927695744408521e-06, "loss": 1.0377, "step": 3350 }, { "epoch": 0.4575368651010377, "grad_norm": 6.332075595855713, "learning_rate": 5.925522902653299e-06, "loss": 1.0671, "step": 3351 }, { "epoch": 0.4576734025122884, "grad_norm": 11.715008735656738, "learning_rate": 5.9233498798970645e-06, "loss": 0.8391, "step": 3352 }, { "epoch": 0.457809939923539, "grad_norm": 14.199630737304688, "learning_rate": 5.921176676564788e-06, "loss": 0.9516, "step": 3353 }, { "epoch": 0.4579464773347897, "grad_norm": 6.330979824066162, "learning_rate": 5.919003293081476e-06, "loss": 0.8006, "step": 3354 }, { "epoch": 0.4580830147460404, "grad_norm": 5.633138656616211, "learning_rate": 5.916829729872166e-06, "loss": 1.0062, "step": 3355 }, { "epoch": 0.4582195521572911, "grad_norm": 7.389068603515625, "learning_rate": 5.914655987361934e-06, "loss": 0.9763, "step": 3356 }, { "epoch": 0.4583560895685418, "grad_norm": 6.768261909484863, "learning_rate": 5.912482065975892e-06, "loss": 1.048, "step": 3357 }, { "epoch": 0.4584926269797925, "grad_norm": 6.272348403930664, "learning_rate": 5.910307966139187e-06, "loss": 0.9533, "step": 3358 }, { "epoch": 0.4586291643910431, "grad_norm": 7.770414352416992, "learning_rate": 5.9081336882769955e-06, "loss": 0.9258, "step": 3359 }, { "epoch": 0.4587657018022938, "grad_norm": 6.507904052734375, "learning_rate": 5.905959232814537e-06, "loss": 0.9156, "step": 3360 }, { "epoch": 0.4589022392135445, "grad_norm": 4.942061424255371, "learning_rate": 5.903784600177057e-06, "loss": 0.8689, "step": 3361 }, { "epoch": 0.4590387766247952, "grad_norm": 7.836349964141846, "learning_rate": 5.901609790789844e-06, "loss": 0.9303, "step": 3362 }, { "epoch": 0.4591753140360459, "grad_norm": 7.1297807693481445, "learning_rate": 5.8994348050782165e-06, "loss": 0.9179, "step": 3363 }, { "epoch": 0.4593118514472966, "grad_norm": 8.89971923828125, "learning_rate": 5.897259643467528e-06, "loss": 1.0446, "step": 3364 }, { "epoch": 0.4594483888585472, "grad_norm": 5.695171356201172, "learning_rate": 5.895084306383166e-06, "loss": 0.9698, "step": 3365 }, { "epoch": 0.4595849262697979, "grad_norm": 6.997114658355713, "learning_rate": 5.892908794250552e-06, "loss": 0.974, "step": 3366 }, { "epoch": 0.4597214636810486, "grad_norm": 7.91954231262207, "learning_rate": 5.8907331074951425e-06, "loss": 1.0311, "step": 3367 }, { "epoch": 0.4598580010922993, "grad_norm": 6.167180061340332, "learning_rate": 5.888557246542431e-06, "loss": 0.9172, "step": 3368 }, { "epoch": 0.45999453850355, "grad_norm": 7.49533224105835, "learning_rate": 5.886381211817942e-06, "loss": 0.9082, "step": 3369 }, { "epoch": 0.4601310759148007, "grad_norm": 6.383411407470703, "learning_rate": 5.884205003747233e-06, "loss": 1.0597, "step": 3370 }, { "epoch": 0.4602676133260513, "grad_norm": 6.688462257385254, "learning_rate": 5.882028622755898e-06, "loss": 0.9558, "step": 3371 }, { "epoch": 0.460404150737302, "grad_norm": 6.166485786437988, "learning_rate": 5.8798520692695605e-06, "loss": 0.9519, "step": 3372 }, { "epoch": 0.4605406881485527, "grad_norm": 6.30013370513916, "learning_rate": 5.877675343713884e-06, "loss": 0.9323, "step": 3373 }, { "epoch": 0.4606772255598034, "grad_norm": 7.299749851226807, "learning_rate": 5.875498446514564e-06, "loss": 1.0703, "step": 3374 }, { "epoch": 0.4608137629710541, "grad_norm": 10.2733736038208, "learning_rate": 5.873321378097323e-06, "loss": 0.9008, "step": 3375 }, { "epoch": 0.4609503003823048, "grad_norm": 7.062094211578369, "learning_rate": 5.871144138887925e-06, "loss": 0.8679, "step": 3376 }, { "epoch": 0.4610868377935554, "grad_norm": 6.473434925079346, "learning_rate": 5.868966729312166e-06, "loss": 1.0695, "step": 3377 }, { "epoch": 0.4612233752048061, "grad_norm": 11.326369285583496, "learning_rate": 5.86678914979587e-06, "loss": 1.0439, "step": 3378 }, { "epoch": 0.4613599126160568, "grad_norm": 8.08911418914795, "learning_rate": 5.8646114007649e-06, "loss": 1.0875, "step": 3379 }, { "epoch": 0.4614964500273075, "grad_norm": 41.30638122558594, "learning_rate": 5.862433482645151e-06, "loss": 1.0336, "step": 3380 }, { "epoch": 0.4616329874385582, "grad_norm": 11.790706634521484, "learning_rate": 5.860255395862549e-06, "loss": 1.0028, "step": 3381 }, { "epoch": 0.4617695248498088, "grad_norm": 5.326590061187744, "learning_rate": 5.858077140843052e-06, "loss": 0.8506, "step": 3382 }, { "epoch": 0.4619060622610595, "grad_norm": 12.033537864685059, "learning_rate": 5.855898718012659e-06, "loss": 1.0116, "step": 3383 }, { "epoch": 0.4620425996723102, "grad_norm": 8.419950485229492, "learning_rate": 5.853720127797392e-06, "loss": 1.047, "step": 3384 }, { "epoch": 0.4621791370835609, "grad_norm": 15.060348510742188, "learning_rate": 5.851541370623309e-06, "loss": 1.0514, "step": 3385 }, { "epoch": 0.4623156744948116, "grad_norm": 8.0593843460083, "learning_rate": 5.849362446916502e-06, "loss": 1.0788, "step": 3386 }, { "epoch": 0.4624522119060623, "grad_norm": 7.213581085205078, "learning_rate": 5.847183357103096e-06, "loss": 1.0892, "step": 3387 }, { "epoch": 0.4625887493173129, "grad_norm": 5.771481037139893, "learning_rate": 5.8450041016092465e-06, "loss": 0.915, "step": 3388 }, { "epoch": 0.4627252867285636, "grad_norm": 7.873781681060791, "learning_rate": 5.842824680861141e-06, "loss": 1.0711, "step": 3389 }, { "epoch": 0.4628618241398143, "grad_norm": 15.300519943237305, "learning_rate": 5.840645095285004e-06, "loss": 1.0827, "step": 3390 }, { "epoch": 0.462998361551065, "grad_norm": 7.146687030792236, "learning_rate": 5.838465345307085e-06, "loss": 0.9293, "step": 3391 }, { "epoch": 0.4631348989623157, "grad_norm": 15.2657470703125, "learning_rate": 5.836285431353672e-06, "loss": 0.9849, "step": 3392 }, { "epoch": 0.4632714363735664, "grad_norm": 6.110131740570068, "learning_rate": 5.834105353851081e-06, "loss": 0.9596, "step": 3393 }, { "epoch": 0.463407973784817, "grad_norm": 7.939927101135254, "learning_rate": 5.831925113225664e-06, "loss": 0.953, "step": 3394 }, { "epoch": 0.4635445111960677, "grad_norm": 5.458503246307373, "learning_rate": 5.829744709903798e-06, "loss": 0.9676, "step": 3395 }, { "epoch": 0.4636810486073184, "grad_norm": 6.696799278259277, "learning_rate": 5.8275641443119015e-06, "loss": 1.048, "step": 3396 }, { "epoch": 0.4638175860185691, "grad_norm": 21.74042320251465, "learning_rate": 5.825383416876414e-06, "loss": 0.9888, "step": 3397 }, { "epoch": 0.4639541234298198, "grad_norm": 6.335643768310547, "learning_rate": 5.823202528023817e-06, "loss": 1.1948, "step": 3398 }, { "epoch": 0.4640906608410705, "grad_norm": 6.271256446838379, "learning_rate": 5.821021478180615e-06, "loss": 1.0837, "step": 3399 }, { "epoch": 0.4642271982523211, "grad_norm": 9.366411209106445, "learning_rate": 5.81884026777335e-06, "loss": 1.0678, "step": 3400 }, { "epoch": 0.4643637356635718, "grad_norm": 5.880775451660156, "learning_rate": 5.816658897228592e-06, "loss": 0.844, "step": 3401 }, { "epoch": 0.4645002730748225, "grad_norm": 6.397569179534912, "learning_rate": 5.814477366972945e-06, "loss": 0.8915, "step": 3402 }, { "epoch": 0.4646368104860732, "grad_norm": 5.654773712158203, "learning_rate": 5.812295677433039e-06, "loss": 0.8549, "step": 3403 }, { "epoch": 0.4647733478973239, "grad_norm": 5.759537220001221, "learning_rate": 5.810113829035544e-06, "loss": 1.0279, "step": 3404 }, { "epoch": 0.4649098853085745, "grad_norm": 7.036959648132324, "learning_rate": 5.807931822207152e-06, "loss": 0.9286, "step": 3405 }, { "epoch": 0.4650464227198252, "grad_norm": 7.070478439331055, "learning_rate": 5.805749657374589e-06, "loss": 0.9924, "step": 3406 }, { "epoch": 0.4651829601310759, "grad_norm": 7.7452850341796875, "learning_rate": 5.803567334964615e-06, "loss": 0.904, "step": 3407 }, { "epoch": 0.4653194975423266, "grad_norm": 8.038347244262695, "learning_rate": 5.801384855404018e-06, "loss": 1.0614, "step": 3408 }, { "epoch": 0.4654560349535773, "grad_norm": 11.4511079788208, "learning_rate": 5.799202219119619e-06, "loss": 0.9731, "step": 3409 }, { "epoch": 0.465592572364828, "grad_norm": 5.376178741455078, "learning_rate": 5.797019426538264e-06, "loss": 0.9394, "step": 3410 }, { "epoch": 0.4657291097760786, "grad_norm": 35.569087982177734, "learning_rate": 5.794836478086836e-06, "loss": 1.0054, "step": 3411 }, { "epoch": 0.4658656471873293, "grad_norm": 5.633117198944092, "learning_rate": 5.792653374192245e-06, "loss": 0.9029, "step": 3412 }, { "epoch": 0.46600218459858, "grad_norm": 7.183730602264404, "learning_rate": 5.790470115281434e-06, "loss": 0.9958, "step": 3413 }, { "epoch": 0.4661387220098307, "grad_norm": 5.967844009399414, "learning_rate": 5.788286701781375e-06, "loss": 1.0378, "step": 3414 }, { "epoch": 0.4662752594210814, "grad_norm": 14.209881782531738, "learning_rate": 5.786103134119067e-06, "loss": 0.9525, "step": 3415 }, { "epoch": 0.4664117968323321, "grad_norm": 5.738059043884277, "learning_rate": 5.7839194127215445e-06, "loss": 0.9099, "step": 3416 }, { "epoch": 0.4665483342435827, "grad_norm": 8.898516654968262, "learning_rate": 5.781735538015868e-06, "loss": 1.1809, "step": 3417 }, { "epoch": 0.4666848716548334, "grad_norm": 5.952216148376465, "learning_rate": 5.77955151042913e-06, "loss": 1.0001, "step": 3418 }, { "epoch": 0.4668214090660841, "grad_norm": 8.303669929504395, "learning_rate": 5.777367330388453e-06, "loss": 0.9832, "step": 3419 }, { "epoch": 0.4669579464773348, "grad_norm": 7.72039270401001, "learning_rate": 5.77518299832099e-06, "loss": 0.9947, "step": 3420 }, { "epoch": 0.4670944838885855, "grad_norm": 5.36533260345459, "learning_rate": 5.77299851465392e-06, "loss": 0.9552, "step": 3421 }, { "epoch": 0.4672310212998362, "grad_norm": 5.9728779792785645, "learning_rate": 5.770813879814456e-06, "loss": 1.0035, "step": 3422 }, { "epoch": 0.4673675587110868, "grad_norm": 5.887731075286865, "learning_rate": 5.768629094229838e-06, "loss": 0.8314, "step": 3423 }, { "epoch": 0.4675040961223375, "grad_norm": 6.884525775909424, "learning_rate": 5.7664441583273374e-06, "loss": 0.9849, "step": 3424 }, { "epoch": 0.4676406335335882, "grad_norm": 11.125741958618164, "learning_rate": 5.764259072534253e-06, "loss": 1.0206, "step": 3425 }, { "epoch": 0.4677771709448389, "grad_norm": 8.181407928466797, "learning_rate": 5.7620738372779125e-06, "loss": 1.0804, "step": 3426 }, { "epoch": 0.4679137083560896, "grad_norm": 8.495160102844238, "learning_rate": 5.759888452985676e-06, "loss": 0.9757, "step": 3427 }, { "epoch": 0.4680502457673403, "grad_norm": 5.788748264312744, "learning_rate": 5.757702920084931e-06, "loss": 0.912, "step": 3428 }, { "epoch": 0.4681867831785909, "grad_norm": 7.579591274261475, "learning_rate": 5.755517239003091e-06, "loss": 1.0996, "step": 3429 }, { "epoch": 0.4683233205898416, "grad_norm": 6.269713878631592, "learning_rate": 5.753331410167604e-06, "loss": 1.0165, "step": 3430 }, { "epoch": 0.4684598580010923, "grad_norm": 7.853512287139893, "learning_rate": 5.751145434005945e-06, "loss": 1.0399, "step": 3431 }, { "epoch": 0.468596395412343, "grad_norm": 6.575924873352051, "learning_rate": 5.748959310945614e-06, "loss": 1.1196, "step": 3432 }, { "epoch": 0.4687329328235937, "grad_norm": 10.859123229980469, "learning_rate": 5.7467730414141454e-06, "loss": 0.932, "step": 3433 }, { "epoch": 0.4688694702348443, "grad_norm": 6.316227436065674, "learning_rate": 5.744586625839101e-06, "loss": 0.9145, "step": 3434 }, { "epoch": 0.469006007646095, "grad_norm": 9.029763221740723, "learning_rate": 5.742400064648066e-06, "loss": 1.0212, "step": 3435 }, { "epoch": 0.4691425450573457, "grad_norm": 7.899065017700195, "learning_rate": 5.740213358268658e-06, "loss": 1.0301, "step": 3436 }, { "epoch": 0.4692790824685964, "grad_norm": 5.8091630935668945, "learning_rate": 5.738026507128526e-06, "loss": 1.0396, "step": 3437 }, { "epoch": 0.4694156198798471, "grad_norm": 6.788754463195801, "learning_rate": 5.735839511655342e-06, "loss": 1.1198, "step": 3438 }, { "epoch": 0.4695521572910978, "grad_norm": 6.68763542175293, "learning_rate": 5.733652372276809e-06, "loss": 0.9382, "step": 3439 }, { "epoch": 0.4696886947023484, "grad_norm": 5.5990800857543945, "learning_rate": 5.731465089420658e-06, "loss": 1.0107, "step": 3440 }, { "epoch": 0.4698252321135991, "grad_norm": 6.505922794342041, "learning_rate": 5.729277663514648e-06, "loss": 0.8497, "step": 3441 }, { "epoch": 0.4699617695248498, "grad_norm": 6.1970930099487305, "learning_rate": 5.727090094986565e-06, "loss": 0.8786, "step": 3442 }, { "epoch": 0.4700983069361005, "grad_norm": 7.955367565155029, "learning_rate": 5.724902384264221e-06, "loss": 1.0315, "step": 3443 }, { "epoch": 0.4702348443473512, "grad_norm": 4.8032450675964355, "learning_rate": 5.722714531775463e-06, "loss": 0.9438, "step": 3444 }, { "epoch": 0.4703713817586019, "grad_norm": 5.996164798736572, "learning_rate": 5.720526537948159e-06, "loss": 0.9855, "step": 3445 }, { "epoch": 0.4705079191698525, "grad_norm": 9.986010551452637, "learning_rate": 5.718338403210206e-06, "loss": 0.9527, "step": 3446 }, { "epoch": 0.4706444565811032, "grad_norm": 8.422688484191895, "learning_rate": 5.716150127989529e-06, "loss": 0.8393, "step": 3447 }, { "epoch": 0.4707809939923539, "grad_norm": 11.042016983032227, "learning_rate": 5.7139617127140825e-06, "loss": 0.9368, "step": 3448 }, { "epoch": 0.4709175314036046, "grad_norm": 5.822388648986816, "learning_rate": 5.711773157811844e-06, "loss": 0.9297, "step": 3449 }, { "epoch": 0.4710540688148553, "grad_norm": 9.202298164367676, "learning_rate": 5.709584463710824e-06, "loss": 0.9466, "step": 3450 }, { "epoch": 0.471190606226106, "grad_norm": 4.577877998352051, "learning_rate": 5.707395630839053e-06, "loss": 0.9671, "step": 3451 }, { "epoch": 0.4713271436373566, "grad_norm": 12.530285835266113, "learning_rate": 5.705206659624597e-06, "loss": 0.8876, "step": 3452 }, { "epoch": 0.4714636810486073, "grad_norm": 7.5959954261779785, "learning_rate": 5.703017550495543e-06, "loss": 1.1045, "step": 3453 }, { "epoch": 0.471600218459858, "grad_norm": 11.465561866760254, "learning_rate": 5.7008283038800075e-06, "loss": 1.0013, "step": 3454 }, { "epoch": 0.4717367558711087, "grad_norm": 5.14430046081543, "learning_rate": 5.698638920206132e-06, "loss": 0.9844, "step": 3455 }, { "epoch": 0.4718732932823594, "grad_norm": 6.2734456062316895, "learning_rate": 5.696449399902085e-06, "loss": 0.9987, "step": 3456 }, { "epoch": 0.47200983069361, "grad_norm": 10.630733489990234, "learning_rate": 5.694259743396066e-06, "loss": 0.9161, "step": 3457 }, { "epoch": 0.4721463681048607, "grad_norm": 7.0476226806640625, "learning_rate": 5.692069951116294e-06, "loss": 1.101, "step": 3458 }, { "epoch": 0.4722829055161114, "grad_norm": 5.703925609588623, "learning_rate": 5.689880023491022e-06, "loss": 0.9268, "step": 3459 }, { "epoch": 0.4724194429273621, "grad_norm": 9.826427459716797, "learning_rate": 5.687689960948526e-06, "loss": 0.8642, "step": 3460 }, { "epoch": 0.4725559803386128, "grad_norm": 6.160983085632324, "learning_rate": 5.685499763917104e-06, "loss": 0.875, "step": 3461 }, { "epoch": 0.4726925177498635, "grad_norm": 5.448974609375, "learning_rate": 5.683309432825087e-06, "loss": 1.0077, "step": 3462 }, { "epoch": 0.4728290551611141, "grad_norm": 6.828988075256348, "learning_rate": 5.681118968100831e-06, "loss": 1.0647, "step": 3463 }, { "epoch": 0.4729655925723648, "grad_norm": 18.3389892578125, "learning_rate": 5.678928370172715e-06, "loss": 0.9433, "step": 3464 }, { "epoch": 0.4731021299836155, "grad_norm": 10.048956871032715, "learning_rate": 5.676737639469148e-06, "loss": 1.1597, "step": 3465 }, { "epoch": 0.4732386673948662, "grad_norm": 6.660247325897217, "learning_rate": 5.67454677641856e-06, "loss": 0.8757, "step": 3466 }, { "epoch": 0.4733752048061169, "grad_norm": 13.337262153625488, "learning_rate": 5.6723557814494125e-06, "loss": 0.9324, "step": 3467 }, { "epoch": 0.4735117422173676, "grad_norm": 6.088951587677002, "learning_rate": 5.670164654990189e-06, "loss": 0.8871, "step": 3468 }, { "epoch": 0.4736482796286182, "grad_norm": 9.153749465942383, "learning_rate": 5.667973397469398e-06, "loss": 1.0314, "step": 3469 }, { "epoch": 0.4737848170398689, "grad_norm": 6.928051471710205, "learning_rate": 5.665782009315579e-06, "loss": 1.0037, "step": 3470 }, { "epoch": 0.4739213544511196, "grad_norm": 6.421050071716309, "learning_rate": 5.663590490957291e-06, "loss": 0.9512, "step": 3471 }, { "epoch": 0.4740578918623703, "grad_norm": 8.202956199645996, "learning_rate": 5.661398842823122e-06, "loss": 1.0275, "step": 3472 }, { "epoch": 0.474194429273621, "grad_norm": 6.396574974060059, "learning_rate": 5.659207065341684e-06, "loss": 1.143, "step": 3473 }, { "epoch": 0.4743309666848717, "grad_norm": 31.140356063842773, "learning_rate": 5.657015158941615e-06, "loss": 0.8327, "step": 3474 }, { "epoch": 0.4744675040961223, "grad_norm": 6.925636291503906, "learning_rate": 5.6548231240515785e-06, "loss": 1.0229, "step": 3475 }, { "epoch": 0.474604041507373, "grad_norm": 5.973821640014648, "learning_rate": 5.65263096110026e-06, "loss": 1.0353, "step": 3476 }, { "epoch": 0.4747405789186237, "grad_norm": 7.304678440093994, "learning_rate": 5.6504386705163725e-06, "loss": 1.0223, "step": 3477 }, { "epoch": 0.4748771163298744, "grad_norm": 6.437880039215088, "learning_rate": 5.648246252728658e-06, "loss": 1.0837, "step": 3478 }, { "epoch": 0.4750136537411251, "grad_norm": 7.81486177444458, "learning_rate": 5.646053708165875e-06, "loss": 0.8402, "step": 3479 }, { "epoch": 0.4751501911523758, "grad_norm": 6.610736846923828, "learning_rate": 5.643861037256815e-06, "loss": 0.9964, "step": 3480 }, { "epoch": 0.4752867285636264, "grad_norm": 8.049823760986328, "learning_rate": 5.641668240430286e-06, "loss": 1.0275, "step": 3481 }, { "epoch": 0.4754232659748771, "grad_norm": 6.52759313583374, "learning_rate": 5.639475318115128e-06, "loss": 1.0131, "step": 3482 }, { "epoch": 0.4755598033861278, "grad_norm": 7.59115743637085, "learning_rate": 5.637282270740202e-06, "loss": 0.9973, "step": 3483 }, { "epoch": 0.4756963407973785, "grad_norm": 8.739601135253906, "learning_rate": 5.635089098734394e-06, "loss": 0.8634, "step": 3484 }, { "epoch": 0.4758328782086292, "grad_norm": 9.621499061584473, "learning_rate": 5.6328958025266145e-06, "loss": 0.9474, "step": 3485 }, { "epoch": 0.4759694156198798, "grad_norm": 6.239101409912109, "learning_rate": 5.630702382545797e-06, "loss": 0.9854, "step": 3486 }, { "epoch": 0.4761059530311305, "grad_norm": 9.185111045837402, "learning_rate": 5.628508839220902e-06, "loss": 1.13, "step": 3487 }, { "epoch": 0.4762424904423812, "grad_norm": 7.4505615234375, "learning_rate": 5.626315172980911e-06, "loss": 0.9166, "step": 3488 }, { "epoch": 0.4763790278536319, "grad_norm": 6.829624176025391, "learning_rate": 5.624121384254831e-06, "loss": 0.8916, "step": 3489 }, { "epoch": 0.4765155652648826, "grad_norm": 8.664773941040039, "learning_rate": 5.621927473471694e-06, "loss": 0.886, "step": 3490 }, { "epoch": 0.4766521026761333, "grad_norm": 5.82222843170166, "learning_rate": 5.619733441060554e-06, "loss": 1.0356, "step": 3491 }, { "epoch": 0.4767886400873839, "grad_norm": 7.93876838684082, "learning_rate": 5.617539287450492e-06, "loss": 1.0428, "step": 3492 }, { "epoch": 0.4769251774986346, "grad_norm": 11.174074172973633, "learning_rate": 5.615345013070605e-06, "loss": 0.9296, "step": 3493 }, { "epoch": 0.4770617149098853, "grad_norm": 7.2681169509887695, "learning_rate": 5.6131506183500255e-06, "loss": 0.888, "step": 3494 }, { "epoch": 0.477198252321136, "grad_norm": 5.749581336975098, "learning_rate": 5.610956103717898e-06, "loss": 0.9631, "step": 3495 }, { "epoch": 0.4773347897323867, "grad_norm": 8.252660751342773, "learning_rate": 5.608761469603398e-06, "loss": 1.013, "step": 3496 }, { "epoch": 0.4774713271436374, "grad_norm": 17.252567291259766, "learning_rate": 5.6065667164357195e-06, "loss": 1.0244, "step": 3497 }, { "epoch": 0.477607864554888, "grad_norm": 15.677563667297363, "learning_rate": 5.604371844644083e-06, "loss": 1.0153, "step": 3498 }, { "epoch": 0.4777444019661387, "grad_norm": 9.169370651245117, "learning_rate": 5.602176854657735e-06, "loss": 0.9969, "step": 3499 }, { "epoch": 0.4778809393773894, "grad_norm": 7.229883670806885, "learning_rate": 5.599981746905935e-06, "loss": 1.094, "step": 3500 }, { "epoch": 0.4780174767886401, "grad_norm": 5.577108860015869, "learning_rate": 5.597786521817976e-06, "loss": 1.1713, "step": 3501 }, { "epoch": 0.4781540141998908, "grad_norm": 10.581486701965332, "learning_rate": 5.595591179823169e-06, "loss": 1.0596, "step": 3502 }, { "epoch": 0.4782905516111415, "grad_norm": 11.756385803222656, "learning_rate": 5.593395721350848e-06, "loss": 0.9387, "step": 3503 }, { "epoch": 0.4784270890223921, "grad_norm": 8.368131637573242, "learning_rate": 5.591200146830372e-06, "loss": 0.9017, "step": 3504 }, { "epoch": 0.4785636264336428, "grad_norm": 8.031614303588867, "learning_rate": 5.5890044566911215e-06, "loss": 0.925, "step": 3505 }, { "epoch": 0.4787001638448935, "grad_norm": 8.126554489135742, "learning_rate": 5.586808651362498e-06, "loss": 1.0256, "step": 3506 }, { "epoch": 0.4788367012561442, "grad_norm": 8.462515830993652, "learning_rate": 5.584612731273927e-06, "loss": 0.8746, "step": 3507 }, { "epoch": 0.4789732386673949, "grad_norm": 5.687477111816406, "learning_rate": 5.582416696854853e-06, "loss": 1.0555, "step": 3508 }, { "epoch": 0.4791097760786455, "grad_norm": 6.030755519866943, "learning_rate": 5.580220548534753e-06, "loss": 0.9217, "step": 3509 }, { "epoch": 0.4792463134898962, "grad_norm": 5.704596519470215, "learning_rate": 5.578024286743113e-06, "loss": 1.0091, "step": 3510 }, { "epoch": 0.4793828509011469, "grad_norm": 5.294273853302002, "learning_rate": 5.575827911909453e-06, "loss": 0.801, "step": 3511 }, { "epoch": 0.4795193883123976, "grad_norm": 10.670400619506836, "learning_rate": 5.5736314244633075e-06, "loss": 1.0218, "step": 3512 }, { "epoch": 0.4796559257236483, "grad_norm": 8.891175270080566, "learning_rate": 5.571434824834233e-06, "loss": 1.0047, "step": 3513 }, { "epoch": 0.479792463134899, "grad_norm": 5.46375036239624, "learning_rate": 5.569238113451813e-06, "loss": 0.9218, "step": 3514 }, { "epoch": 0.4799290005461496, "grad_norm": 6.540562629699707, "learning_rate": 5.56704129074565e-06, "loss": 0.9268, "step": 3515 }, { "epoch": 0.4800655379574003, "grad_norm": 6.141508102416992, "learning_rate": 5.564844357145365e-06, "loss": 0.9229, "step": 3516 }, { "epoch": 0.480202075368651, "grad_norm": 12.698073387145996, "learning_rate": 5.562647313080608e-06, "loss": 1.0466, "step": 3517 }, { "epoch": 0.4803386127799017, "grad_norm": 5.663780212402344, "learning_rate": 5.560450158981045e-06, "loss": 0.8662, "step": 3518 }, { "epoch": 0.4804751501911524, "grad_norm": 7.0449628829956055, "learning_rate": 5.5582528952763645e-06, "loss": 0.9261, "step": 3519 }, { "epoch": 0.4806116876024031, "grad_norm": 8.490678787231445, "learning_rate": 5.556055522396279e-06, "loss": 1.06, "step": 3520 }, { "epoch": 0.4807482250136537, "grad_norm": 6.750999450683594, "learning_rate": 5.553858040770518e-06, "loss": 1.0137, "step": 3521 }, { "epoch": 0.4808847624249044, "grad_norm": 7.294538497924805, "learning_rate": 5.551660450828835e-06, "loss": 0.9948, "step": 3522 }, { "epoch": 0.4810212998361551, "grad_norm": 9.396686553955078, "learning_rate": 5.549462753001006e-06, "loss": 1.0268, "step": 3523 }, { "epoch": 0.4811578372474058, "grad_norm": 7.508344650268555, "learning_rate": 5.5472649477168264e-06, "loss": 0.9209, "step": 3524 }, { "epoch": 0.4812943746586565, "grad_norm": 5.124869346618652, "learning_rate": 5.545067035406112e-06, "loss": 0.9261, "step": 3525 }, { "epoch": 0.4814309120699072, "grad_norm": 5.696589469909668, "learning_rate": 5.5428690164986984e-06, "loss": 0.9214, "step": 3526 }, { "epoch": 0.4815674494811578, "grad_norm": 6.213076591491699, "learning_rate": 5.540670891424445e-06, "loss": 0.8711, "step": 3527 }, { "epoch": 0.4817039868924085, "grad_norm": 5.8470683097839355, "learning_rate": 5.538472660613233e-06, "loss": 1.1289, "step": 3528 }, { "epoch": 0.4818405243036592, "grad_norm": 7.864144802093506, "learning_rate": 5.536274324494959e-06, "loss": 0.9323, "step": 3529 }, { "epoch": 0.4819770617149099, "grad_norm": 7.113290786743164, "learning_rate": 5.534075883499545e-06, "loss": 0.9395, "step": 3530 }, { "epoch": 0.4821135991261606, "grad_norm": 6.196209907531738, "learning_rate": 5.5318773380569326e-06, "loss": 0.9531, "step": 3531 }, { "epoch": 0.4822501365374113, "grad_norm": 5.754477500915527, "learning_rate": 5.529678688597081e-06, "loss": 0.9939, "step": 3532 }, { "epoch": 0.4823866739486619, "grad_norm": 5.456782817840576, "learning_rate": 5.527479935549972e-06, "loss": 0.8815, "step": 3533 }, { "epoch": 0.4825232113599126, "grad_norm": 9.013300895690918, "learning_rate": 5.525281079345608e-06, "loss": 1.1136, "step": 3534 }, { "epoch": 0.4826597487711633, "grad_norm": 5.5354132652282715, "learning_rate": 5.523082120414013e-06, "loss": 1.0446, "step": 3535 }, { "epoch": 0.482796286182414, "grad_norm": 6.764456272125244, "learning_rate": 5.5208830591852245e-06, "loss": 1.0872, "step": 3536 }, { "epoch": 0.4829328235936647, "grad_norm": 7.642219543457031, "learning_rate": 5.518683896089307e-06, "loss": 0.9714, "step": 3537 }, { "epoch": 0.4830693610049153, "grad_norm": 6.078707218170166, "learning_rate": 5.516484631556345e-06, "loss": 0.9552, "step": 3538 }, { "epoch": 0.483205898416166, "grad_norm": 7.576251029968262, "learning_rate": 5.514285266016437e-06, "loss": 0.991, "step": 3539 }, { "epoch": 0.4833424358274167, "grad_norm": 7.445967674255371, "learning_rate": 5.512085799899705e-06, "loss": 0.8931, "step": 3540 }, { "epoch": 0.4834789732386674, "grad_norm": 8.33027172088623, "learning_rate": 5.50988623363629e-06, "loss": 1.0056, "step": 3541 }, { "epoch": 0.4836155106499181, "grad_norm": 7.272780895233154, "learning_rate": 5.507686567656354e-06, "loss": 0.8456, "step": 3542 }, { "epoch": 0.4837520480611688, "grad_norm": 5.843495845794678, "learning_rate": 5.5054868023900776e-06, "loss": 1.0058, "step": 3543 }, { "epoch": 0.4838885854724194, "grad_norm": 8.718984603881836, "learning_rate": 5.503286938267661e-06, "loss": 0.9297, "step": 3544 }, { "epoch": 0.4840251228836701, "grad_norm": 6.735233783721924, "learning_rate": 5.501086975719323e-06, "loss": 0.946, "step": 3545 }, { "epoch": 0.4841616602949208, "grad_norm": 6.346378803253174, "learning_rate": 5.498886915175299e-06, "loss": 1.0059, "step": 3546 }, { "epoch": 0.4842981977061715, "grad_norm": 5.425680160522461, "learning_rate": 5.496686757065849e-06, "loss": 1.0629, "step": 3547 }, { "epoch": 0.4844347351174222, "grad_norm": 6.225181579589844, "learning_rate": 5.49448650182125e-06, "loss": 1.0926, "step": 3548 }, { "epoch": 0.4845712725286729, "grad_norm": 6.5565948486328125, "learning_rate": 5.492286149871796e-06, "loss": 1.0468, "step": 3549 }, { "epoch": 0.4847078099399235, "grad_norm": 5.310577392578125, "learning_rate": 5.490085701647805e-06, "loss": 1.0361, "step": 3550 }, { "epoch": 0.4848443473511742, "grad_norm": 10.755179405212402, "learning_rate": 5.487885157579606e-06, "loss": 1.0465, "step": 3551 }, { "epoch": 0.4849808847624249, "grad_norm": 7.0420308113098145, "learning_rate": 5.485684518097552e-06, "loss": 0.8107, "step": 3552 }, { "epoch": 0.4851174221736756, "grad_norm": 7.357022285461426, "learning_rate": 5.4834837836320156e-06, "loss": 0.8876, "step": 3553 }, { "epoch": 0.4852539595849263, "grad_norm": 5.948049545288086, "learning_rate": 5.481282954613384e-06, "loss": 1.0629, "step": 3554 }, { "epoch": 0.485390496996177, "grad_norm": 7.612827301025391, "learning_rate": 5.479082031472067e-06, "loss": 0.9008, "step": 3555 }, { "epoch": 0.4855270344074276, "grad_norm": 5.557114601135254, "learning_rate": 5.476881014638491e-06, "loss": 1.0958, "step": 3556 }, { "epoch": 0.4856635718186783, "grad_norm": 7.6853532791137695, "learning_rate": 5.4746799045431e-06, "loss": 1.0228, "step": 3557 }, { "epoch": 0.485800109229929, "grad_norm": 7.864725589752197, "learning_rate": 5.472478701616354e-06, "loss": 1.0419, "step": 3558 }, { "epoch": 0.4859366466411797, "grad_norm": 6.06391716003418, "learning_rate": 5.470277406288736e-06, "loss": 1.0547, "step": 3559 }, { "epoch": 0.4860731840524304, "grad_norm": 7.002217769622803, "learning_rate": 5.468076018990747e-06, "loss": 1.0759, "step": 3560 }, { "epoch": 0.486209721463681, "grad_norm": 9.99905014038086, "learning_rate": 5.4658745401529005e-06, "loss": 1.0463, "step": 3561 }, { "epoch": 0.4863462588749317, "grad_norm": 7.514871120452881, "learning_rate": 5.463672970205733e-06, "loss": 1.0488, "step": 3562 }, { "epoch": 0.4864827962861824, "grad_norm": 5.094854354858398, "learning_rate": 5.4614713095798e-06, "loss": 0.9061, "step": 3563 }, { "epoch": 0.4866193336974331, "grad_norm": 7.496458053588867, "learning_rate": 5.459269558705667e-06, "loss": 1.017, "step": 3564 }, { "epoch": 0.4867558711086838, "grad_norm": 5.9604411125183105, "learning_rate": 5.457067718013924e-06, "loss": 0.8535, "step": 3565 }, { "epoch": 0.4868924085199345, "grad_norm": 9.207576751708984, "learning_rate": 5.454865787935178e-06, "loss": 1.155, "step": 3566 }, { "epoch": 0.4870289459311851, "grad_norm": 10.66580867767334, "learning_rate": 5.452663768900051e-06, "loss": 1.0768, "step": 3567 }, { "epoch": 0.4871654833424358, "grad_norm": 9.33138656616211, "learning_rate": 5.450461661339183e-06, "loss": 1.0126, "step": 3568 }, { "epoch": 0.4873020207536865, "grad_norm": 7.063398361206055, "learning_rate": 5.448259465683231e-06, "loss": 0.9135, "step": 3569 }, { "epoch": 0.4874385581649372, "grad_norm": 7.228682518005371, "learning_rate": 5.446057182362874e-06, "loss": 0.9824, "step": 3570 }, { "epoch": 0.4875750955761879, "grad_norm": 5.838810443878174, "learning_rate": 5.443854811808801e-06, "loss": 0.8968, "step": 3571 }, { "epoch": 0.4877116329874386, "grad_norm": 7.791773319244385, "learning_rate": 5.441652354451721e-06, "loss": 1.0425, "step": 3572 }, { "epoch": 0.4878481703986892, "grad_norm": 5.6887898445129395, "learning_rate": 5.4394498107223616e-06, "loss": 1.0354, "step": 3573 }, { "epoch": 0.4879847078099399, "grad_norm": 6.51300573348999, "learning_rate": 5.437247181051466e-06, "loss": 1.111, "step": 3574 }, { "epoch": 0.4881212452211906, "grad_norm": 17.426883697509766, "learning_rate": 5.435044465869793e-06, "loss": 1.0158, "step": 3575 }, { "epoch": 0.4882577826324413, "grad_norm": 11.178483963012695, "learning_rate": 5.432841665608122e-06, "loss": 0.9363, "step": 3576 }, { "epoch": 0.488394320043692, "grad_norm": 6.300640106201172, "learning_rate": 5.4306387806972434e-06, "loss": 0.9578, "step": 3577 }, { "epoch": 0.4885308574549427, "grad_norm": 8.928197860717773, "learning_rate": 5.428435811567967e-06, "loss": 1.05, "step": 3578 }, { "epoch": 0.4886673948661933, "grad_norm": 8.4735689163208, "learning_rate": 5.426232758651121e-06, "loss": 1.0001, "step": 3579 }, { "epoch": 0.488803932277444, "grad_norm": 7.0446600914001465, "learning_rate": 5.4240296223775465e-06, "loss": 1.0717, "step": 3580 }, { "epoch": 0.4889404696886947, "grad_norm": 10.6903076171875, "learning_rate": 5.421826403178104e-06, "loss": 1.0017, "step": 3581 }, { "epoch": 0.4890770070999454, "grad_norm": 6.7438859939575195, "learning_rate": 5.419623101483667e-06, "loss": 1.0962, "step": 3582 }, { "epoch": 0.4892135445111961, "grad_norm": 10.606842041015625, "learning_rate": 5.41741971772513e-06, "loss": 0.9912, "step": 3583 }, { "epoch": 0.4893500819224468, "grad_norm": 8.061169624328613, "learning_rate": 5.415216252333396e-06, "loss": 0.9716, "step": 3584 }, { "epoch": 0.4894866193336974, "grad_norm": 5.66438102722168, "learning_rate": 5.4130127057393915e-06, "loss": 1.129, "step": 3585 }, { "epoch": 0.4896231567449481, "grad_norm": 8.784912109375, "learning_rate": 5.410809078374055e-06, "loss": 1.005, "step": 3586 }, { "epoch": 0.4897596941561988, "grad_norm": 7.683396816253662, "learning_rate": 5.408605370668342e-06, "loss": 1.0252, "step": 3587 }, { "epoch": 0.4898962315674495, "grad_norm": 19.32404327392578, "learning_rate": 5.406401583053222e-06, "loss": 1.1035, "step": 3588 }, { "epoch": 0.4900327689787002, "grad_norm": 10.666996955871582, "learning_rate": 5.404197715959683e-06, "loss": 1.0357, "step": 3589 }, { "epoch": 0.4901693063899508, "grad_norm": 11.888504981994629, "learning_rate": 5.401993769818727e-06, "loss": 1.0518, "step": 3590 }, { "epoch": 0.4903058438012015, "grad_norm": 10.52580738067627, "learning_rate": 5.399789745061368e-06, "loss": 0.998, "step": 3591 }, { "epoch": 0.4904423812124522, "grad_norm": 92.28585052490234, "learning_rate": 5.397585642118642e-06, "loss": 0.9465, "step": 3592 }, { "epoch": 0.4905789186237029, "grad_norm": 9.812294960021973, "learning_rate": 5.3953814614215975e-06, "loss": 0.9341, "step": 3593 }, { "epoch": 0.4907154560349536, "grad_norm": 42.223854064941406, "learning_rate": 5.393177203401296e-06, "loss": 0.9812, "step": 3594 }, { "epoch": 0.4908519934462043, "grad_norm": 8.186102867126465, "learning_rate": 5.390972868488818e-06, "loss": 0.9927, "step": 3595 }, { "epoch": 0.4909885308574549, "grad_norm": 6.241161823272705, "learning_rate": 5.388768457115254e-06, "loss": 1.065, "step": 3596 }, { "epoch": 0.4911250682687056, "grad_norm": 6.7195024490356445, "learning_rate": 5.3865639697117155e-06, "loss": 0.9679, "step": 3597 }, { "epoch": 0.4912616056799563, "grad_norm": 5.963768005371094, "learning_rate": 5.3843594067093215e-06, "loss": 1.0233, "step": 3598 }, { "epoch": 0.491398143091207, "grad_norm": 6.3305559158325195, "learning_rate": 5.382154768539215e-06, "loss": 0.9758, "step": 3599 }, { "epoch": 0.4915346805024577, "grad_norm": 5.8190999031066895, "learning_rate": 5.3799500556325455e-06, "loss": 0.9124, "step": 3600 }, { "epoch": 0.4916712179137084, "grad_norm": 7.018558502197266, "learning_rate": 5.377745268420481e-06, "loss": 1.0552, "step": 3601 }, { "epoch": 0.491807755324959, "grad_norm": 7.926124572753906, "learning_rate": 5.3755404073342055e-06, "loss": 0.9223, "step": 3602 }, { "epoch": 0.4919442927362097, "grad_norm": 5.845036029815674, "learning_rate": 5.373335472804912e-06, "loss": 1.0675, "step": 3603 }, { "epoch": 0.4920808301474604, "grad_norm": 6.597814559936523, "learning_rate": 5.371130465263813e-06, "loss": 0.913, "step": 3604 }, { "epoch": 0.4922173675587111, "grad_norm": 7.312215328216553, "learning_rate": 5.3689253851421334e-06, "loss": 0.9585, "step": 3605 }, { "epoch": 0.4923539049699618, "grad_norm": 19.61456298828125, "learning_rate": 5.366720232871114e-06, "loss": 0.9764, "step": 3606 }, { "epoch": 0.4924904423812125, "grad_norm": 5.406713008880615, "learning_rate": 5.364515008882003e-06, "loss": 0.9299, "step": 3607 }, { "epoch": 0.4926269797924631, "grad_norm": 7.920365810394287, "learning_rate": 5.362309713606073e-06, "loss": 1.0116, "step": 3608 }, { "epoch": 0.4927635172037138, "grad_norm": 7.106398582458496, "learning_rate": 5.360104347474603e-06, "loss": 1.0078, "step": 3609 }, { "epoch": 0.4929000546149645, "grad_norm": 6.57130241394043, "learning_rate": 5.357898910918888e-06, "loss": 0.9721, "step": 3610 }, { "epoch": 0.4930365920262152, "grad_norm": 6.806454181671143, "learning_rate": 5.355693404370238e-06, "loss": 1.024, "step": 3611 }, { "epoch": 0.4931731294374659, "grad_norm": 5.218612194061279, "learning_rate": 5.353487828259973e-06, "loss": 0.8743, "step": 3612 }, { "epoch": 0.4933096668487165, "grad_norm": 9.399069786071777, "learning_rate": 5.351282183019433e-06, "loss": 0.9732, "step": 3613 }, { "epoch": 0.4934462042599672, "grad_norm": 5.911714553833008, "learning_rate": 5.349076469079966e-06, "loss": 1.0805, "step": 3614 }, { "epoch": 0.4935827416712179, "grad_norm": 7.142038822174072, "learning_rate": 5.346870686872934e-06, "loss": 1.0469, "step": 3615 }, { "epoch": 0.4937192790824686, "grad_norm": 6.54102897644043, "learning_rate": 5.344664836829715e-06, "loss": 1.0371, "step": 3616 }, { "epoch": 0.4938558164937193, "grad_norm": 7.741968154907227, "learning_rate": 5.342458919381697e-06, "loss": 1.0933, "step": 3617 }, { "epoch": 0.49399235390497, "grad_norm": 7.883018493652344, "learning_rate": 5.340252934960284e-06, "loss": 1.0086, "step": 3618 }, { "epoch": 0.4941288913162206, "grad_norm": 6.8229546546936035, "learning_rate": 5.338046883996892e-06, "loss": 0.9473, "step": 3619 }, { "epoch": 0.4942654287274713, "grad_norm": 11.348180770874023, "learning_rate": 5.33584076692295e-06, "loss": 1.0596, "step": 3620 }, { "epoch": 0.494401966138722, "grad_norm": 7.644598007202148, "learning_rate": 5.333634584169901e-06, "loss": 0.96, "step": 3621 }, { "epoch": 0.4945385035499727, "grad_norm": 5.012483596801758, "learning_rate": 5.331428336169199e-06, "loss": 0.8675, "step": 3622 }, { "epoch": 0.4946750409612234, "grad_norm": 7.059633731842041, "learning_rate": 5.3292220233523095e-06, "loss": 1.0598, "step": 3623 }, { "epoch": 0.4948115783724741, "grad_norm": 6.087562561035156, "learning_rate": 5.327015646150716e-06, "loss": 1.0447, "step": 3624 }, { "epoch": 0.4949481157837247, "grad_norm": 6.22674560546875, "learning_rate": 5.3248092049959085e-06, "loss": 0.8738, "step": 3625 }, { "epoch": 0.4950846531949754, "grad_norm": 5.870251178741455, "learning_rate": 5.322602700319397e-06, "loss": 1.1095, "step": 3626 }, { "epoch": 0.4952211906062261, "grad_norm": 7.349368095397949, "learning_rate": 5.320396132552694e-06, "loss": 0.9371, "step": 3627 }, { "epoch": 0.4953577280174768, "grad_norm": 5.72018575668335, "learning_rate": 5.318189502127332e-06, "loss": 0.9667, "step": 3628 }, { "epoch": 0.4954942654287275, "grad_norm": 6.102813243865967, "learning_rate": 5.315982809474854e-06, "loss": 0.9412, "step": 3629 }, { "epoch": 0.4956308028399782, "grad_norm": 8.519122123718262, "learning_rate": 5.313776055026811e-06, "loss": 1.0596, "step": 3630 }, { "epoch": 0.4957673402512288, "grad_norm": 6.754581451416016, "learning_rate": 5.3115692392147745e-06, "loss": 0.998, "step": 3631 }, { "epoch": 0.4959038776624795, "grad_norm": 7.398264408111572, "learning_rate": 5.309362362470321e-06, "loss": 0.9199, "step": 3632 }, { "epoch": 0.4960404150737302, "grad_norm": 6.172832489013672, "learning_rate": 5.307155425225039e-06, "loss": 0.9829, "step": 3633 }, { "epoch": 0.4961769524849809, "grad_norm": 5.835134029388428, "learning_rate": 5.304948427910534e-06, "loss": 0.9978, "step": 3634 }, { "epoch": 0.4963134898962316, "grad_norm": 6.741804599761963, "learning_rate": 5.3027413709584195e-06, "loss": 1.0888, "step": 3635 }, { "epoch": 0.4964500273074823, "grad_norm": 6.135282039642334, "learning_rate": 5.300534254800321e-06, "loss": 1.009, "step": 3636 }, { "epoch": 0.4965865647187329, "grad_norm": 6.230905532836914, "learning_rate": 5.2983270798678744e-06, "loss": 1.1203, "step": 3637 }, { "epoch": 0.4967231021299836, "grad_norm": 5.427338600158691, "learning_rate": 5.2961198465927284e-06, "loss": 0.9217, "step": 3638 }, { "epoch": 0.4968596395412343, "grad_norm": 4.857357978820801, "learning_rate": 5.293912555406546e-06, "loss": 0.8205, "step": 3639 }, { "epoch": 0.496996176952485, "grad_norm": 5.3615851402282715, "learning_rate": 5.291705206740997e-06, "loss": 0.9552, "step": 3640 }, { "epoch": 0.4971327143637357, "grad_norm": 6.220595359802246, "learning_rate": 5.289497801027766e-06, "loss": 0.898, "step": 3641 }, { "epoch": 0.4972692517749863, "grad_norm": 5.248448371887207, "learning_rate": 5.287290338698543e-06, "loss": 0.8621, "step": 3642 }, { "epoch": 0.497405789186237, "grad_norm": 6.822656631469727, "learning_rate": 5.285082820185036e-06, "loss": 0.9975, "step": 3643 }, { "epoch": 0.4975423265974877, "grad_norm": 6.106515884399414, "learning_rate": 5.282875245918963e-06, "loss": 1.0309, "step": 3644 }, { "epoch": 0.4976788640087384, "grad_norm": 7.056214332580566, "learning_rate": 5.280667616332046e-06, "loss": 0.9273, "step": 3645 }, { "epoch": 0.4978154014199891, "grad_norm": 5.579257011413574, "learning_rate": 5.278459931856027e-06, "loss": 0.8796, "step": 3646 }, { "epoch": 0.4979519388312398, "grad_norm": 6.440056800842285, "learning_rate": 5.276252192922652e-06, "loss": 1.1214, "step": 3647 }, { "epoch": 0.4980884762424904, "grad_norm": 8.407042503356934, "learning_rate": 5.274044399963682e-06, "loss": 0.9743, "step": 3648 }, { "epoch": 0.4982250136537411, "grad_norm": 6.996539115905762, "learning_rate": 5.271836553410884e-06, "loss": 1.0587, "step": 3649 }, { "epoch": 0.4983615510649918, "grad_norm": 6.444116115570068, "learning_rate": 5.269628653696039e-06, "loss": 1.0527, "step": 3650 }, { "epoch": 0.4984980884762425, "grad_norm": 8.594717979431152, "learning_rate": 5.2674207012509405e-06, "loss": 1.0233, "step": 3651 }, { "epoch": 0.4986346258874932, "grad_norm": 5.4339094161987305, "learning_rate": 5.265212696507387e-06, "loss": 0.9966, "step": 3652 }, { "epoch": 0.4987711632987439, "grad_norm": 6.299079418182373, "learning_rate": 5.26300463989719e-06, "loss": 1.1114, "step": 3653 }, { "epoch": 0.4989077007099945, "grad_norm": 8.258943557739258, "learning_rate": 5.26079653185217e-06, "loss": 1.0263, "step": 3654 }, { "epoch": 0.4990442381212452, "grad_norm": 7.880128383636475, "learning_rate": 5.25858837280416e-06, "loss": 0.8074, "step": 3655 }, { "epoch": 0.4991807755324959, "grad_norm": 10.728532791137695, "learning_rate": 5.256380163185001e-06, "loss": 0.9732, "step": 3656 }, { "epoch": 0.4993173129437466, "grad_norm": 10.6724853515625, "learning_rate": 5.254171903426543e-06, "loss": 0.9906, "step": 3657 }, { "epoch": 0.4994538503549973, "grad_norm": 6.289631366729736, "learning_rate": 5.251963593960646e-06, "loss": 0.8713, "step": 3658 }, { "epoch": 0.499590387766248, "grad_norm": 7.011536598205566, "learning_rate": 5.249755235219184e-06, "loss": 0.9023, "step": 3659 }, { "epoch": 0.4997269251774986, "grad_norm": 5.061919212341309, "learning_rate": 5.247546827634035e-06, "loss": 0.8349, "step": 3660 }, { "epoch": 0.4998634625887493, "grad_norm": 6.376471996307373, "learning_rate": 5.245338371637091e-06, "loss": 0.9309, "step": 3661 }, { "epoch": 0.5, "grad_norm": 6.692419528961182, "learning_rate": 5.243129867660249e-06, "loss": 0.9874, "step": 3662 }, { "epoch": 0.5001365374112506, "grad_norm": 5.539783477783203, "learning_rate": 5.240921316135419e-06, "loss": 0.9961, "step": 3663 }, { "epoch": 0.5002730748225014, "grad_norm": 6.6232757568359375, "learning_rate": 5.238712717494519e-06, "loss": 0.8672, "step": 3664 }, { "epoch": 0.500409612233752, "grad_norm": 8.50538158416748, "learning_rate": 5.2365040721694745e-06, "loss": 1.0804, "step": 3665 }, { "epoch": 0.5005461496450028, "grad_norm": 13.712178230285645, "learning_rate": 5.234295380592226e-06, "loss": 0.9705, "step": 3666 }, { "epoch": 0.5006826870562534, "grad_norm": 5.708759307861328, "learning_rate": 5.232086643194716e-06, "loss": 0.9842, "step": 3667 }, { "epoch": 0.500819224467504, "grad_norm": 6.1808037757873535, "learning_rate": 5.229877860408899e-06, "loss": 0.9723, "step": 3668 }, { "epoch": 0.5009557618787548, "grad_norm": 5.506880283355713, "learning_rate": 5.227669032666738e-06, "loss": 0.9375, "step": 3669 }, { "epoch": 0.5010922992900054, "grad_norm": 5.634519577026367, "learning_rate": 5.225460160400205e-06, "loss": 0.8762, "step": 3670 }, { "epoch": 0.5012288367012562, "grad_norm": 6.242835998535156, "learning_rate": 5.223251244041283e-06, "loss": 0.9379, "step": 3671 }, { "epoch": 0.5013653741125068, "grad_norm": 6.379541873931885, "learning_rate": 5.2210422840219595e-06, "loss": 1.0754, "step": 3672 }, { "epoch": 0.5015019115237576, "grad_norm": 8.118326187133789, "learning_rate": 5.218833280774233e-06, "loss": 0.9426, "step": 3673 }, { "epoch": 0.5016384489350082, "grad_norm": 9.852163314819336, "learning_rate": 5.216624234730111e-06, "loss": 0.8829, "step": 3674 }, { "epoch": 0.5017749863462588, "grad_norm": 17.349882125854492, "learning_rate": 5.214415146321605e-06, "loss": 1.0514, "step": 3675 }, { "epoch": 0.5019115237575096, "grad_norm": 11.573907852172852, "learning_rate": 5.212206015980742e-06, "loss": 1.0015, "step": 3676 }, { "epoch": 0.5020480611687602, "grad_norm": 6.472723007202148, "learning_rate": 5.209996844139551e-06, "loss": 0.8426, "step": 3677 }, { "epoch": 0.502184598580011, "grad_norm": 7.8112664222717285, "learning_rate": 5.207787631230071e-06, "loss": 0.9632, "step": 3678 }, { "epoch": 0.5023211359912616, "grad_norm": 6.712269306182861, "learning_rate": 5.205578377684351e-06, "loss": 0.9504, "step": 3679 }, { "epoch": 0.5024576734025122, "grad_norm": 5.518418788909912, "learning_rate": 5.2033690839344465e-06, "loss": 0.9573, "step": 3680 }, { "epoch": 0.502594210813763, "grad_norm": 8.344756126403809, "learning_rate": 5.201159750412418e-06, "loss": 0.9206, "step": 3681 }, { "epoch": 0.5027307482250136, "grad_norm": 6.030433177947998, "learning_rate": 5.198950377550339e-06, "loss": 0.9767, "step": 3682 }, { "epoch": 0.5028672856362644, "grad_norm": 5.438810348510742, "learning_rate": 5.196740965780287e-06, "loss": 1.0301, "step": 3683 }, { "epoch": 0.503003823047515, "grad_norm": 5.573031902313232, "learning_rate": 5.194531515534349e-06, "loss": 0.892, "step": 3684 }, { "epoch": 0.5031403604587658, "grad_norm": 7.388181209564209, "learning_rate": 5.1923220272446195e-06, "loss": 0.9192, "step": 3685 }, { "epoch": 0.5032768978700164, "grad_norm": 5.850457191467285, "learning_rate": 5.1901125013431974e-06, "loss": 0.8965, "step": 3686 }, { "epoch": 0.503413435281267, "grad_norm": 14.596317291259766, "learning_rate": 5.187902938262195e-06, "loss": 1.0199, "step": 3687 }, { "epoch": 0.5035499726925178, "grad_norm": 5.122547626495361, "learning_rate": 5.185693338433724e-06, "loss": 0.9156, "step": 3688 }, { "epoch": 0.5036865101037684, "grad_norm": 5.62335729598999, "learning_rate": 5.18348370228991e-06, "loss": 0.9043, "step": 3689 }, { "epoch": 0.5038230475150192, "grad_norm": 5.979723930358887, "learning_rate": 5.1812740302628825e-06, "loss": 0.943, "step": 3690 }, { "epoch": 0.5039595849262698, "grad_norm": 5.913339614868164, "learning_rate": 5.1790643227847795e-06, "loss": 0.9734, "step": 3691 }, { "epoch": 0.5040961223375204, "grad_norm": 6.715885162353516, "learning_rate": 5.176854580287744e-06, "loss": 1.0006, "step": 3692 }, { "epoch": 0.5042326597487712, "grad_norm": 5.214709758758545, "learning_rate": 5.174644803203928e-06, "loss": 0.9244, "step": 3693 }, { "epoch": 0.5043691971600218, "grad_norm": 6.991945743560791, "learning_rate": 5.172434991965487e-06, "loss": 0.9155, "step": 3694 }, { "epoch": 0.5045057345712726, "grad_norm": 25.68096351623535, "learning_rate": 5.170225147004588e-06, "loss": 1.0732, "step": 3695 }, { "epoch": 0.5046422719825232, "grad_norm": 5.6365509033203125, "learning_rate": 5.168015268753401e-06, "loss": 0.9308, "step": 3696 }, { "epoch": 0.5047788093937738, "grad_norm": 6.212463855743408, "learning_rate": 5.165805357644105e-06, "loss": 1.0643, "step": 3697 }, { "epoch": 0.5049153468050246, "grad_norm": 6.7276082038879395, "learning_rate": 5.1635954141088815e-06, "loss": 0.8644, "step": 3698 }, { "epoch": 0.5050518842162752, "grad_norm": 7.959217071533203, "learning_rate": 5.161385438579922e-06, "loss": 1.0412, "step": 3699 }, { "epoch": 0.505188421627526, "grad_norm": 7.106665134429932, "learning_rate": 5.159175431489424e-06, "loss": 1.0408, "step": 3700 }, { "epoch": 0.5053249590387766, "grad_norm": 7.133647918701172, "learning_rate": 5.156965393269587e-06, "loss": 0.8569, "step": 3701 }, { "epoch": 0.5054614964500274, "grad_norm": 5.489775657653809, "learning_rate": 5.154755324352623e-06, "loss": 0.8938, "step": 3702 }, { "epoch": 0.505598033861278, "grad_norm": 8.116087913513184, "learning_rate": 5.152545225170745e-06, "loss": 0.8704, "step": 3703 }, { "epoch": 0.5057345712725286, "grad_norm": 5.10756254196167, "learning_rate": 5.150335096156176e-06, "loss": 0.9539, "step": 3704 }, { "epoch": 0.5058711086837794, "grad_norm": 7.684436321258545, "learning_rate": 5.148124937741141e-06, "loss": 1.0906, "step": 3705 }, { "epoch": 0.50600764609503, "grad_norm": 7.68075704574585, "learning_rate": 5.145914750357872e-06, "loss": 0.9029, "step": 3706 }, { "epoch": 0.5061441835062808, "grad_norm": 6.020623207092285, "learning_rate": 5.143704534438608e-06, "loss": 1.0228, "step": 3707 }, { "epoch": 0.5062807209175314, "grad_norm": 6.824230670928955, "learning_rate": 5.141494290415592e-06, "loss": 1.0407, "step": 3708 }, { "epoch": 0.506417258328782, "grad_norm": 7.502054691314697, "learning_rate": 5.139284018721071e-06, "loss": 0.9428, "step": 3709 }, { "epoch": 0.5065537957400328, "grad_norm": 5.613166809082031, "learning_rate": 5.137073719787303e-06, "loss": 1.0403, "step": 3710 }, { "epoch": 0.5066903331512834, "grad_norm": 7.964564800262451, "learning_rate": 5.134863394046548e-06, "loss": 0.8451, "step": 3711 }, { "epoch": 0.5068268705625342, "grad_norm": 6.594479560852051, "learning_rate": 5.132653041931067e-06, "loss": 0.9205, "step": 3712 }, { "epoch": 0.5069634079737848, "grad_norm": 5.862914085388184, "learning_rate": 5.130442663873133e-06, "loss": 1.0495, "step": 3713 }, { "epoch": 0.5070999453850354, "grad_norm": 11.253378868103027, "learning_rate": 5.128232260305021e-06, "loss": 0.8802, "step": 3714 }, { "epoch": 0.5072364827962862, "grad_norm": 5.683630466461182, "learning_rate": 5.12602183165901e-06, "loss": 0.9295, "step": 3715 }, { "epoch": 0.5073730202075368, "grad_norm": 6.0259690284729, "learning_rate": 5.123811378367387e-06, "loss": 1.0203, "step": 3716 }, { "epoch": 0.5075095576187876, "grad_norm": 7.675084590911865, "learning_rate": 5.121600900862443e-06, "loss": 1.0387, "step": 3717 }, { "epoch": 0.5076460950300382, "grad_norm": 5.294970512390137, "learning_rate": 5.1193903995764695e-06, "loss": 1.0491, "step": 3718 }, { "epoch": 0.507782632441289, "grad_norm": 5.816165447235107, "learning_rate": 5.117179874941766e-06, "loss": 0.96, "step": 3719 }, { "epoch": 0.5079191698525396, "grad_norm": 7.444000244140625, "learning_rate": 5.11496932739064e-06, "loss": 1.0854, "step": 3720 }, { "epoch": 0.5080557072637902, "grad_norm": 8.126455307006836, "learning_rate": 5.112758757355396e-06, "loss": 0.9535, "step": 3721 }, { "epoch": 0.508192244675041, "grad_norm": 5.611153602600098, "learning_rate": 5.11054816526835e-06, "loss": 0.9503, "step": 3722 }, { "epoch": 0.5083287820862916, "grad_norm": 7.688157081604004, "learning_rate": 5.108337551561816e-06, "loss": 1.1507, "step": 3723 }, { "epoch": 0.5084653194975424, "grad_norm": 6.6236186027526855, "learning_rate": 5.106126916668118e-06, "loss": 0.9936, "step": 3724 }, { "epoch": 0.508601856908793, "grad_norm": 8.888197898864746, "learning_rate": 5.10391626101958e-06, "loss": 0.9074, "step": 3725 }, { "epoch": 0.5087383943200436, "grad_norm": 6.617262840270996, "learning_rate": 5.101705585048533e-06, "loss": 1.0531, "step": 3726 }, { "epoch": 0.5088749317312944, "grad_norm": 5.39443826675415, "learning_rate": 5.09949488918731e-06, "loss": 0.9569, "step": 3727 }, { "epoch": 0.509011469142545, "grad_norm": 5.100245952606201, "learning_rate": 5.0972841738682464e-06, "loss": 0.9423, "step": 3728 }, { "epoch": 0.5091480065537958, "grad_norm": 8.03077220916748, "learning_rate": 5.095073439523685e-06, "loss": 0.9846, "step": 3729 }, { "epoch": 0.5092845439650464, "grad_norm": 6.077859401702881, "learning_rate": 5.092862686585971e-06, "loss": 0.9415, "step": 3730 }, { "epoch": 0.5094210813762972, "grad_norm": 8.099133491516113, "learning_rate": 5.090651915487454e-06, "loss": 1.0133, "step": 3731 }, { "epoch": 0.5095576187875478, "grad_norm": 6.824978351593018, "learning_rate": 5.088441126660484e-06, "loss": 0.8662, "step": 3732 }, { "epoch": 0.5096941561987984, "grad_norm": 7.240400791168213, "learning_rate": 5.086230320537417e-06, "loss": 1.0095, "step": 3733 }, { "epoch": 0.5098306936100492, "grad_norm": 7.892816543579102, "learning_rate": 5.084019497550613e-06, "loss": 0.9882, "step": 3734 }, { "epoch": 0.5099672310212998, "grad_norm": 8.1260986328125, "learning_rate": 5.0818086581324345e-06, "loss": 0.9459, "step": 3735 }, { "epoch": 0.5101037684325506, "grad_norm": 6.474001407623291, "learning_rate": 5.079597802715245e-06, "loss": 1.05, "step": 3736 }, { "epoch": 0.5102403058438012, "grad_norm": 9.885919570922852, "learning_rate": 5.077386931731416e-06, "loss": 1.0445, "step": 3737 }, { "epoch": 0.5103768432550518, "grad_norm": 9.286192893981934, "learning_rate": 5.075176045613317e-06, "loss": 0.9827, "step": 3738 }, { "epoch": 0.5105133806663026, "grad_norm": 6.956995010375977, "learning_rate": 5.072965144793323e-06, "loss": 0.9865, "step": 3739 }, { "epoch": 0.5106499180775532, "grad_norm": 5.926479816436768, "learning_rate": 5.070754229703811e-06, "loss": 0.9581, "step": 3740 }, { "epoch": 0.510786455488804, "grad_norm": 6.993411540985107, "learning_rate": 5.068543300777163e-06, "loss": 1.0202, "step": 3741 }, { "epoch": 0.5109229929000546, "grad_norm": 5.008316993713379, "learning_rate": 5.06633235844576e-06, "loss": 1.0354, "step": 3742 }, { "epoch": 0.5110595303113052, "grad_norm": 6.90936803817749, "learning_rate": 5.064121403141991e-06, "loss": 1.0312, "step": 3743 }, { "epoch": 0.511196067722556, "grad_norm": 6.107491493225098, "learning_rate": 5.06191043529824e-06, "loss": 0.8609, "step": 3744 }, { "epoch": 0.5113326051338066, "grad_norm": 6.84547233581543, "learning_rate": 5.059699455346899e-06, "loss": 0.9763, "step": 3745 }, { "epoch": 0.5114691425450574, "grad_norm": 8.549927711486816, "learning_rate": 5.057488463720361e-06, "loss": 1.0245, "step": 3746 }, { "epoch": 0.511605679956308, "grad_norm": 5.408423900604248, "learning_rate": 5.055277460851024e-06, "loss": 0.9855, "step": 3747 }, { "epoch": 0.5117422173675588, "grad_norm": 9.616259574890137, "learning_rate": 5.053066447171282e-06, "loss": 0.8631, "step": 3748 }, { "epoch": 0.5118787547788094, "grad_norm": 8.924840927124023, "learning_rate": 5.050855423113535e-06, "loss": 0.9101, "step": 3749 }, { "epoch": 0.51201529219006, "grad_norm": 6.576691627502441, "learning_rate": 5.0486443891101865e-06, "loss": 0.9747, "step": 3750 }, { "epoch": 0.5121518296013108, "grad_norm": 7.166160583496094, "learning_rate": 5.046433345593639e-06, "loss": 0.7989, "step": 3751 }, { "epoch": 0.5122883670125614, "grad_norm": 6.035069465637207, "learning_rate": 5.044222292996296e-06, "loss": 0.8913, "step": 3752 }, { "epoch": 0.5124249044238122, "grad_norm": 14.328235626220703, "learning_rate": 5.042011231750568e-06, "loss": 1.0221, "step": 3753 }, { "epoch": 0.5125614418350628, "grad_norm": 6.459346294403076, "learning_rate": 5.039800162288861e-06, "loss": 0.9027, "step": 3754 }, { "epoch": 0.5126979792463134, "grad_norm": 78.23435974121094, "learning_rate": 5.037589085043588e-06, "loss": 1.0061, "step": 3755 }, { "epoch": 0.5128345166575642, "grad_norm": 7.337204456329346, "learning_rate": 5.0353780004471605e-06, "loss": 0.8631, "step": 3756 }, { "epoch": 0.5129710540688148, "grad_norm": 5.653942584991455, "learning_rate": 5.033166908931991e-06, "loss": 0.9071, "step": 3757 }, { "epoch": 0.5131075914800656, "grad_norm": 6.461834907531738, "learning_rate": 5.030955810930495e-06, "loss": 0.9426, "step": 3758 }, { "epoch": 0.5132441288913162, "grad_norm": 4.759790897369385, "learning_rate": 5.028744706875086e-06, "loss": 0.9357, "step": 3759 }, { "epoch": 0.513380666302567, "grad_norm": 9.68362045288086, "learning_rate": 5.026533597198185e-06, "loss": 1.1534, "step": 3760 }, { "epoch": 0.5135172037138176, "grad_norm": 7.187854290008545, "learning_rate": 5.0243224823322075e-06, "loss": 1.0877, "step": 3761 }, { "epoch": 0.5136537411250682, "grad_norm": 7.234663963317871, "learning_rate": 5.0221113627095765e-06, "loss": 0.9684, "step": 3762 }, { "epoch": 0.513790278536319, "grad_norm": 6.248347282409668, "learning_rate": 5.019900238762709e-06, "loss": 0.8946, "step": 3763 }, { "epoch": 0.5139268159475696, "grad_norm": 7.536035537719727, "learning_rate": 5.0176891109240265e-06, "loss": 1.0614, "step": 3764 }, { "epoch": 0.5140633533588204, "grad_norm": 4.625940799713135, "learning_rate": 5.015477979625951e-06, "loss": 0.9304, "step": 3765 }, { "epoch": 0.514199890770071, "grad_norm": 6.928626537322998, "learning_rate": 5.0132668453009074e-06, "loss": 0.8732, "step": 3766 }, { "epoch": 0.5143364281813216, "grad_norm": 5.803050518035889, "learning_rate": 5.0110557083813165e-06, "loss": 0.9773, "step": 3767 }, { "epoch": 0.5144729655925724, "grad_norm": 8.284955978393555, "learning_rate": 5.008844569299603e-06, "loss": 0.9656, "step": 3768 }, { "epoch": 0.514609503003823, "grad_norm": 6.239803791046143, "learning_rate": 5.006633428488188e-06, "loss": 0.8837, "step": 3769 }, { "epoch": 0.5147460404150738, "grad_norm": 6.990610599517822, "learning_rate": 5.004422286379501e-06, "loss": 0.9019, "step": 3770 }, { "epoch": 0.5148825778263244, "grad_norm": 9.476832389831543, "learning_rate": 5.002211143405964e-06, "loss": 0.8907, "step": 3771 }, { "epoch": 0.515019115237575, "grad_norm": 5.596862316131592, "learning_rate": 5e-06, "loss": 0.8279, "step": 3772 }, { "epoch": 0.5151556526488258, "grad_norm": 6.547582149505615, "learning_rate": 4.9977888565940395e-06, "loss": 1.0073, "step": 3773 }, { "epoch": 0.5152921900600764, "grad_norm": 4.903007984161377, "learning_rate": 4.9955777136205e-06, "loss": 0.9728, "step": 3774 }, { "epoch": 0.5154287274713272, "grad_norm": 9.841032981872559, "learning_rate": 4.993366571511813e-06, "loss": 1.0618, "step": 3775 }, { "epoch": 0.5155652648825778, "grad_norm": 6.331923484802246, "learning_rate": 4.9911554307004e-06, "loss": 1.1992, "step": 3776 }, { "epoch": 0.5157018022938286, "grad_norm": 6.943817615509033, "learning_rate": 4.988944291618686e-06, "loss": 1.0424, "step": 3777 }, { "epoch": 0.5158383397050792, "grad_norm": 7.297945022583008, "learning_rate": 4.986733154699093e-06, "loss": 0.9526, "step": 3778 }, { "epoch": 0.5159748771163298, "grad_norm": 6.583631992340088, "learning_rate": 4.98452202037405e-06, "loss": 0.8859, "step": 3779 }, { "epoch": 0.5161114145275806, "grad_norm": 6.436054229736328, "learning_rate": 4.9823108890759735e-06, "loss": 1.0476, "step": 3780 }, { "epoch": 0.5162479519388312, "grad_norm": 6.396273136138916, "learning_rate": 4.980099761237292e-06, "loss": 0.9291, "step": 3781 }, { "epoch": 0.516384489350082, "grad_norm": 11.543529510498047, "learning_rate": 4.977888637290424e-06, "loss": 1.0516, "step": 3782 }, { "epoch": 0.5165210267613326, "grad_norm": 5.592321395874023, "learning_rate": 4.975677517667793e-06, "loss": 0.9719, "step": 3783 }, { "epoch": 0.5166575641725832, "grad_norm": 5.682465076446533, "learning_rate": 4.973466402801817e-06, "loss": 0.7747, "step": 3784 }, { "epoch": 0.516794101583834, "grad_norm": 12.483003616333008, "learning_rate": 4.971255293124915e-06, "loss": 0.9442, "step": 3785 }, { "epoch": 0.5169306389950846, "grad_norm": 5.147092342376709, "learning_rate": 4.969044189069509e-06, "loss": 0.9798, "step": 3786 }, { "epoch": 0.5170671764063354, "grad_norm": 6.097100734710693, "learning_rate": 4.96683309106801e-06, "loss": 0.8908, "step": 3787 }, { "epoch": 0.517203713817586, "grad_norm": 7.654575347900391, "learning_rate": 4.964621999552841e-06, "loss": 1.0578, "step": 3788 }, { "epoch": 0.5173402512288368, "grad_norm": 5.804990291595459, "learning_rate": 4.9624109149564125e-06, "loss": 0.8153, "step": 3789 }, { "epoch": 0.5174767886400874, "grad_norm": 6.242343425750732, "learning_rate": 4.96019983771114e-06, "loss": 0.9303, "step": 3790 }, { "epoch": 0.517613326051338, "grad_norm": 7.454705715179443, "learning_rate": 4.957988768249432e-06, "loss": 0.9253, "step": 3791 }, { "epoch": 0.5177498634625888, "grad_norm": 8.08651065826416, "learning_rate": 4.955777707003705e-06, "loss": 0.9655, "step": 3792 }, { "epoch": 0.5178864008738394, "grad_norm": 5.749599933624268, "learning_rate": 4.953566654406364e-06, "loss": 0.867, "step": 3793 }, { "epoch": 0.5180229382850902, "grad_norm": 9.772473335266113, "learning_rate": 4.951355610889815e-06, "loss": 0.9238, "step": 3794 }, { "epoch": 0.5181594756963408, "grad_norm": 5.956388473510742, "learning_rate": 4.949144576886466e-06, "loss": 0.9309, "step": 3795 }, { "epoch": 0.5182960131075914, "grad_norm": 11.493518829345703, "learning_rate": 4.94693355282872e-06, "loss": 1.1169, "step": 3796 }, { "epoch": 0.5184325505188422, "grad_norm": 4.9545698165893555, "learning_rate": 4.944722539148979e-06, "loss": 0.9673, "step": 3797 }, { "epoch": 0.5185690879300928, "grad_norm": 5.862629413604736, "learning_rate": 4.942511536279639e-06, "loss": 1.1068, "step": 3798 }, { "epoch": 0.5187056253413436, "grad_norm": 5.311477184295654, "learning_rate": 4.9403005446531024e-06, "loss": 0.9075, "step": 3799 }, { "epoch": 0.5188421627525942, "grad_norm": 5.228984832763672, "learning_rate": 4.938089564701762e-06, "loss": 1.0245, "step": 3800 }, { "epoch": 0.5189787001638448, "grad_norm": 8.182540893554688, "learning_rate": 4.935878596858011e-06, "loss": 0.9562, "step": 3801 }, { "epoch": 0.5191152375750956, "grad_norm": 5.250361442565918, "learning_rate": 4.9336676415542405e-06, "loss": 0.9032, "step": 3802 }, { "epoch": 0.5192517749863462, "grad_norm": 6.400282859802246, "learning_rate": 4.931456699222838e-06, "loss": 0.9544, "step": 3803 }, { "epoch": 0.519388312397597, "grad_norm": 6.4816460609436035, "learning_rate": 4.929245770296191e-06, "loss": 0.8613, "step": 3804 }, { "epoch": 0.5195248498088476, "grad_norm": 9.061352729797363, "learning_rate": 4.927034855206678e-06, "loss": 0.924, "step": 3805 }, { "epoch": 0.5196613872200984, "grad_norm": 6.189514636993408, "learning_rate": 4.924823954386685e-06, "loss": 0.9839, "step": 3806 }, { "epoch": 0.519797924631349, "grad_norm": 5.695917129516602, "learning_rate": 4.922613068268586e-06, "loss": 1.0025, "step": 3807 }, { "epoch": 0.5199344620425996, "grad_norm": 7.696769714355469, "learning_rate": 4.920402197284756e-06, "loss": 0.9683, "step": 3808 }, { "epoch": 0.5200709994538504, "grad_norm": 5.3738789558410645, "learning_rate": 4.918191341867566e-06, "loss": 0.8723, "step": 3809 }, { "epoch": 0.520207536865101, "grad_norm": 6.347646236419678, "learning_rate": 4.915980502449388e-06, "loss": 1.0413, "step": 3810 }, { "epoch": 0.5203440742763518, "grad_norm": 10.122615814208984, "learning_rate": 4.9137696794625826e-06, "loss": 0.9271, "step": 3811 }, { "epoch": 0.5204806116876024, "grad_norm": 11.426087379455566, "learning_rate": 4.911558873339517e-06, "loss": 1.0921, "step": 3812 }, { "epoch": 0.520617149098853, "grad_norm": 13.686214447021484, "learning_rate": 4.909348084512548e-06, "loss": 1.0209, "step": 3813 }, { "epoch": 0.5207536865101038, "grad_norm": 4.9201226234436035, "learning_rate": 4.90713731341403e-06, "loss": 0.877, "step": 3814 }, { "epoch": 0.5208902239213544, "grad_norm": 8.757646560668945, "learning_rate": 4.904926560476317e-06, "loss": 0.9154, "step": 3815 }, { "epoch": 0.5210267613326052, "grad_norm": 5.167112827301025, "learning_rate": 4.902715826131755e-06, "loss": 0.852, "step": 3816 }, { "epoch": 0.5211632987438558, "grad_norm": 7.56147575378418, "learning_rate": 4.9005051108126926e-06, "loss": 0.9514, "step": 3817 }, { "epoch": 0.5212998361551064, "grad_norm": 6.918664932250977, "learning_rate": 4.8982944149514675e-06, "loss": 1.1441, "step": 3818 }, { "epoch": 0.5214363735663572, "grad_norm": 6.492950916290283, "learning_rate": 4.896083738980421e-06, "loss": 1.0709, "step": 3819 }, { "epoch": 0.5215729109776078, "grad_norm": 7.608251094818115, "learning_rate": 4.8938730833318825e-06, "loss": 0.7848, "step": 3820 }, { "epoch": 0.5217094483888586, "grad_norm": 10.66708755493164, "learning_rate": 4.891662448438186e-06, "loss": 0.9996, "step": 3821 }, { "epoch": 0.5218459858001092, "grad_norm": 8.100761413574219, "learning_rate": 4.889451834731651e-06, "loss": 1.023, "step": 3822 }, { "epoch": 0.52198252321136, "grad_norm": 8.248516082763672, "learning_rate": 4.887241242644605e-06, "loss": 1.1581, "step": 3823 }, { "epoch": 0.5221190606226106, "grad_norm": 7.089481830596924, "learning_rate": 4.885030672609362e-06, "loss": 0.9617, "step": 3824 }, { "epoch": 0.5222555980338612, "grad_norm": 5.405861854553223, "learning_rate": 4.8828201250582345e-06, "loss": 1.0272, "step": 3825 }, { "epoch": 0.522392135445112, "grad_norm": 12.360124588012695, "learning_rate": 4.880609600423533e-06, "loss": 0.9683, "step": 3826 }, { "epoch": 0.5225286728563626, "grad_norm": 8.092246055603027, "learning_rate": 4.878399099137559e-06, "loss": 0.9699, "step": 3827 }, { "epoch": 0.5226652102676134, "grad_norm": 40.37392044067383, "learning_rate": 4.876188621632614e-06, "loss": 0.8993, "step": 3828 }, { "epoch": 0.522801747678864, "grad_norm": 6.660440444946289, "learning_rate": 4.873978168340991e-06, "loss": 0.9013, "step": 3829 }, { "epoch": 0.5229382850901146, "grad_norm": 9.444583892822266, "learning_rate": 4.871767739694982e-06, "loss": 1.0264, "step": 3830 }, { "epoch": 0.5230748225013654, "grad_norm": 9.683099746704102, "learning_rate": 4.869557336126868e-06, "loss": 1.0048, "step": 3831 }, { "epoch": 0.523211359912616, "grad_norm": 9.852189064025879, "learning_rate": 4.867346958068934e-06, "loss": 1.1242, "step": 3832 }, { "epoch": 0.5233478973238668, "grad_norm": 7.4009175300598145, "learning_rate": 4.865136605953455e-06, "loss": 1.0134, "step": 3833 }, { "epoch": 0.5234844347351174, "grad_norm": 8.835874557495117, "learning_rate": 4.862926280212698e-06, "loss": 1.0471, "step": 3834 }, { "epoch": 0.5236209721463682, "grad_norm": 6.666238307952881, "learning_rate": 4.860715981278931e-06, "loss": 1.0009, "step": 3835 }, { "epoch": 0.5237575095576188, "grad_norm": 16.294763565063477, "learning_rate": 4.85850570958441e-06, "loss": 0.8877, "step": 3836 }, { "epoch": 0.5238940469688694, "grad_norm": 6.843860149383545, "learning_rate": 4.856295465561394e-06, "loss": 0.8754, "step": 3837 }, { "epoch": 0.5240305843801202, "grad_norm": 5.971340179443359, "learning_rate": 4.854085249642128e-06, "loss": 0.8831, "step": 3838 }, { "epoch": 0.5241671217913708, "grad_norm": 9.891092300415039, "learning_rate": 4.85187506225886e-06, "loss": 0.9466, "step": 3839 }, { "epoch": 0.5243036592026216, "grad_norm": 7.6544342041015625, "learning_rate": 4.849664903843824e-06, "loss": 0.8854, "step": 3840 }, { "epoch": 0.5244401966138722, "grad_norm": 6.53659725189209, "learning_rate": 4.847454774829256e-06, "loss": 0.9128, "step": 3841 }, { "epoch": 0.5245767340251228, "grad_norm": 6.113401412963867, "learning_rate": 4.845244675647377e-06, "loss": 0.8547, "step": 3842 }, { "epoch": 0.5247132714363736, "grad_norm": 7.5341010093688965, "learning_rate": 4.843034606730414e-06, "loss": 0.9989, "step": 3843 }, { "epoch": 0.5248498088476242, "grad_norm": 7.117054462432861, "learning_rate": 4.840824568510579e-06, "loss": 1.034, "step": 3844 }, { "epoch": 0.524986346258875, "grad_norm": 6.081263542175293, "learning_rate": 4.838614561420079e-06, "loss": 0.8107, "step": 3845 }, { "epoch": 0.5251228836701256, "grad_norm": 6.330902576446533, "learning_rate": 4.83640458589112e-06, "loss": 0.9654, "step": 3846 }, { "epoch": 0.5252594210813762, "grad_norm": 7.463233470916748, "learning_rate": 4.834194642355897e-06, "loss": 1.0485, "step": 3847 }, { "epoch": 0.525395958492627, "grad_norm": 7.7807416915893555, "learning_rate": 4.8319847312466e-06, "loss": 0.9248, "step": 3848 }, { "epoch": 0.5255324959038776, "grad_norm": 8.939536094665527, "learning_rate": 4.8297748529954125e-06, "loss": 0.9936, "step": 3849 }, { "epoch": 0.5256690333151284, "grad_norm": 4.991199016571045, "learning_rate": 4.827565008034514e-06, "loss": 0.9688, "step": 3850 }, { "epoch": 0.525805570726379, "grad_norm": 5.728866100311279, "learning_rate": 4.825355196796073e-06, "loss": 1.1224, "step": 3851 }, { "epoch": 0.5259421081376298, "grad_norm": 6.325470924377441, "learning_rate": 4.8231454197122575e-06, "loss": 1.0496, "step": 3852 }, { "epoch": 0.5260786455488804, "grad_norm": 7.057547092437744, "learning_rate": 4.820935677215223e-06, "loss": 0.9988, "step": 3853 }, { "epoch": 0.526215182960131, "grad_norm": 5.851417541503906, "learning_rate": 4.818725969737119e-06, "loss": 1.0915, "step": 3854 }, { "epoch": 0.5263517203713818, "grad_norm": 6.916220188140869, "learning_rate": 4.816516297710093e-06, "loss": 0.9667, "step": 3855 }, { "epoch": 0.5264882577826324, "grad_norm": 6.781264305114746, "learning_rate": 4.814306661566277e-06, "loss": 1.1014, "step": 3856 }, { "epoch": 0.5266247951938832, "grad_norm": 6.75055456161499, "learning_rate": 4.8120970617378075e-06, "loss": 1.0446, "step": 3857 }, { "epoch": 0.5267613326051338, "grad_norm": 7.1732683181762695, "learning_rate": 4.8098874986568025e-06, "loss": 1.002, "step": 3858 }, { "epoch": 0.5268978700163844, "grad_norm": 8.601685523986816, "learning_rate": 4.807677972755382e-06, "loss": 0.9055, "step": 3859 }, { "epoch": 0.5270344074276352, "grad_norm": 5.986949920654297, "learning_rate": 4.805468484465651e-06, "loss": 1.0897, "step": 3860 }, { "epoch": 0.5271709448388858, "grad_norm": 6.749411582946777, "learning_rate": 4.803259034219714e-06, "loss": 1.0358, "step": 3861 }, { "epoch": 0.5273074822501366, "grad_norm": 7.488998889923096, "learning_rate": 4.801049622449662e-06, "loss": 0.8818, "step": 3862 }, { "epoch": 0.5274440196613872, "grad_norm": 5.148072242736816, "learning_rate": 4.798840249587584e-06, "loss": 0.937, "step": 3863 }, { "epoch": 0.527580557072638, "grad_norm": 14.667706489562988, "learning_rate": 4.796630916065556e-06, "loss": 0.9802, "step": 3864 }, { "epoch": 0.5277170944838886, "grad_norm": 7.945551872253418, "learning_rate": 4.7944216223156494e-06, "loss": 1.0304, "step": 3865 }, { "epoch": 0.5278536318951392, "grad_norm": 6.683437347412109, "learning_rate": 4.7922123687699305e-06, "loss": 0.9622, "step": 3866 }, { "epoch": 0.52799016930639, "grad_norm": 6.033742904663086, "learning_rate": 4.79000315586045e-06, "loss": 1.0194, "step": 3867 }, { "epoch": 0.5281267067176406, "grad_norm": 6.932093620300293, "learning_rate": 4.78779398401926e-06, "loss": 0.9191, "step": 3868 }, { "epoch": 0.5282632441288914, "grad_norm": 8.565435409545898, "learning_rate": 4.785584853678395e-06, "loss": 0.8504, "step": 3869 }, { "epoch": 0.528399781540142, "grad_norm": 5.708729267120361, "learning_rate": 4.783375765269891e-06, "loss": 0.9758, "step": 3870 }, { "epoch": 0.5285363189513926, "grad_norm": 27.19120979309082, "learning_rate": 4.7811667192257675e-06, "loss": 1.0362, "step": 3871 }, { "epoch": 0.5286728563626434, "grad_norm": 5.733111381530762, "learning_rate": 4.778957715978042e-06, "loss": 0.793, "step": 3872 }, { "epoch": 0.528809393773894, "grad_norm": 8.416876792907715, "learning_rate": 4.776748755958718e-06, "loss": 0.8829, "step": 3873 }, { "epoch": 0.5289459311851448, "grad_norm": 6.510391712188721, "learning_rate": 4.774539839599796e-06, "loss": 1.0986, "step": 3874 }, { "epoch": 0.5290824685963954, "grad_norm": 8.946467399597168, "learning_rate": 4.772330967333265e-06, "loss": 1.0788, "step": 3875 }, { "epoch": 0.529219006007646, "grad_norm": 5.481148719787598, "learning_rate": 4.770122139591103e-06, "loss": 1.0041, "step": 3876 }, { "epoch": 0.5293555434188968, "grad_norm": 9.440217971801758, "learning_rate": 4.767913356805286e-06, "loss": 1.0471, "step": 3877 }, { "epoch": 0.5294920808301474, "grad_norm": 6.253282070159912, "learning_rate": 4.765704619407775e-06, "loss": 0.9459, "step": 3878 }, { "epoch": 0.5296286182413982, "grad_norm": 6.6307692527771, "learning_rate": 4.763495927830527e-06, "loss": 0.9039, "step": 3879 }, { "epoch": 0.5297651556526488, "grad_norm": 6.7851176261901855, "learning_rate": 4.761287282505482e-06, "loss": 1.0945, "step": 3880 }, { "epoch": 0.5299016930638996, "grad_norm": 6.2800469398498535, "learning_rate": 4.759078683864583e-06, "loss": 0.8429, "step": 3881 }, { "epoch": 0.5300382304751502, "grad_norm": 9.677416801452637, "learning_rate": 4.7568701323397515e-06, "loss": 1.0077, "step": 3882 }, { "epoch": 0.5301747678864008, "grad_norm": 10.38083267211914, "learning_rate": 4.75466162836291e-06, "loss": 1.0248, "step": 3883 }, { "epoch": 0.5303113052976516, "grad_norm": 9.04879379272461, "learning_rate": 4.752453172365966e-06, "loss": 0.9706, "step": 3884 }, { "epoch": 0.5304478427089022, "grad_norm": 5.585054874420166, "learning_rate": 4.750244764780818e-06, "loss": 1.054, "step": 3885 }, { "epoch": 0.530584380120153, "grad_norm": 6.307790756225586, "learning_rate": 4.748036406039356e-06, "loss": 1.0016, "step": 3886 }, { "epoch": 0.5307209175314036, "grad_norm": 8.28049087524414, "learning_rate": 4.745828096573459e-06, "loss": 0.9982, "step": 3887 }, { "epoch": 0.5308574549426542, "grad_norm": 6.863428592681885, "learning_rate": 4.743619836815002e-06, "loss": 1.1109, "step": 3888 }, { "epoch": 0.530993992353905, "grad_norm": 7.112967491149902, "learning_rate": 4.741411627195841e-06, "loss": 1.0325, "step": 3889 }, { "epoch": 0.5311305297651556, "grad_norm": 8.557655334472656, "learning_rate": 4.739203468147831e-06, "loss": 0.9771, "step": 3890 }, { "epoch": 0.5312670671764064, "grad_norm": 9.994078636169434, "learning_rate": 4.736995360102811e-06, "loss": 0.8905, "step": 3891 }, { "epoch": 0.531403604587657, "grad_norm": 6.2537922859191895, "learning_rate": 4.734787303492615e-06, "loss": 0.8264, "step": 3892 }, { "epoch": 0.5315401419989078, "grad_norm": 10.88942813873291, "learning_rate": 4.7325792987490595e-06, "loss": 1.0162, "step": 3893 }, { "epoch": 0.5316766794101584, "grad_norm": 6.3643341064453125, "learning_rate": 4.730371346303962e-06, "loss": 1.0321, "step": 3894 }, { "epoch": 0.531813216821409, "grad_norm": 9.924310684204102, "learning_rate": 4.728163446589119e-06, "loss": 0.9522, "step": 3895 }, { "epoch": 0.5319497542326598, "grad_norm": 5.987548828125, "learning_rate": 4.725955600036321e-06, "loss": 0.9669, "step": 3896 }, { "epoch": 0.5320862916439104, "grad_norm": 9.571248054504395, "learning_rate": 4.72374780707735e-06, "loss": 1.037, "step": 3897 }, { "epoch": 0.5322228290551612, "grad_norm": 8.250645637512207, "learning_rate": 4.721540068143975e-06, "loss": 1.0085, "step": 3898 }, { "epoch": 0.5323593664664118, "grad_norm": 6.808598041534424, "learning_rate": 4.719332383667956e-06, "loss": 0.8576, "step": 3899 }, { "epoch": 0.5324959038776624, "grad_norm": 5.791086673736572, "learning_rate": 4.717124754081038e-06, "loss": 1.1088, "step": 3900 }, { "epoch": 0.5326324412889132, "grad_norm": 14.326767921447754, "learning_rate": 4.714917179814964e-06, "loss": 0.9466, "step": 3901 }, { "epoch": 0.5327689787001638, "grad_norm": 35.02191162109375, "learning_rate": 4.7127096613014565e-06, "loss": 0.9813, "step": 3902 }, { "epoch": 0.5329055161114146, "grad_norm": 10.769547462463379, "learning_rate": 4.710502198972236e-06, "loss": 0.9547, "step": 3903 }, { "epoch": 0.5330420535226652, "grad_norm": 6.115740776062012, "learning_rate": 4.708294793259004e-06, "loss": 1.0385, "step": 3904 }, { "epoch": 0.5331785909339158, "grad_norm": 9.745681762695312, "learning_rate": 4.706087444593455e-06, "loss": 1.0303, "step": 3905 }, { "epoch": 0.5333151283451666, "grad_norm": 5.875186920166016, "learning_rate": 4.703880153407274e-06, "loss": 0.9482, "step": 3906 }, { "epoch": 0.5334516657564172, "grad_norm": 13.844706535339355, "learning_rate": 4.701672920132127e-06, "loss": 1.0193, "step": 3907 }, { "epoch": 0.533588203167668, "grad_norm": 6.520347595214844, "learning_rate": 4.6994657451996815e-06, "loss": 0.8865, "step": 3908 }, { "epoch": 0.5337247405789186, "grad_norm": 5.442863941192627, "learning_rate": 4.6972586290415805e-06, "loss": 1.0179, "step": 3909 }, { "epoch": 0.5338612779901694, "grad_norm": 6.258813858032227, "learning_rate": 4.695051572089466e-06, "loss": 1.0258, "step": 3910 }, { "epoch": 0.53399781540142, "grad_norm": 6.974420070648193, "learning_rate": 4.692844574774961e-06, "loss": 0.9288, "step": 3911 }, { "epoch": 0.5341343528126706, "grad_norm": 20.39899253845215, "learning_rate": 4.6906376375296815e-06, "loss": 0.9861, "step": 3912 }, { "epoch": 0.5342708902239214, "grad_norm": 16.70686912536621, "learning_rate": 4.6884307607852254e-06, "loss": 1.0523, "step": 3913 }, { "epoch": 0.534407427635172, "grad_norm": 15.829236030578613, "learning_rate": 4.68622394497319e-06, "loss": 1.0476, "step": 3914 }, { "epoch": 0.5345439650464228, "grad_norm": 8.793954849243164, "learning_rate": 4.684017190525149e-06, "loss": 0.8852, "step": 3915 }, { "epoch": 0.5346805024576734, "grad_norm": 11.36697769165039, "learning_rate": 4.6818104978726685e-06, "loss": 1.0097, "step": 3916 }, { "epoch": 0.534817039868924, "grad_norm": 9.409612655639648, "learning_rate": 4.679603867447308e-06, "loss": 0.9734, "step": 3917 }, { "epoch": 0.5349535772801748, "grad_norm": 7.716702938079834, "learning_rate": 4.677397299680605e-06, "loss": 1.0187, "step": 3918 }, { "epoch": 0.5350901146914254, "grad_norm": 9.17917251586914, "learning_rate": 4.675190795004092e-06, "loss": 1.0098, "step": 3919 }, { "epoch": 0.5352266521026762, "grad_norm": 14.555163383483887, "learning_rate": 4.672984353849285e-06, "loss": 1.0305, "step": 3920 }, { "epoch": 0.5353631895139268, "grad_norm": 8.735223770141602, "learning_rate": 4.670777976647692e-06, "loss": 1.1685, "step": 3921 }, { "epoch": 0.5354997269251774, "grad_norm": 17.574485778808594, "learning_rate": 4.668571663830802e-06, "loss": 0.8441, "step": 3922 }, { "epoch": 0.5356362643364282, "grad_norm": 12.434253692626953, "learning_rate": 4.6663654158301e-06, "loss": 0.9642, "step": 3923 }, { "epoch": 0.5357728017476788, "grad_norm": 8.41952896118164, "learning_rate": 4.664159233077051e-06, "loss": 1.0051, "step": 3924 }, { "epoch": 0.5359093391589296, "grad_norm": 8.882777214050293, "learning_rate": 4.66195311600311e-06, "loss": 0.8763, "step": 3925 }, { "epoch": 0.5360458765701802, "grad_norm": 12.375126838684082, "learning_rate": 4.659747065039719e-06, "loss": 0.932, "step": 3926 }, { "epoch": 0.536182413981431, "grad_norm": 8.93920612335205, "learning_rate": 4.657541080618305e-06, "loss": 0.9454, "step": 3927 }, { "epoch": 0.5363189513926816, "grad_norm": 6.7119832038879395, "learning_rate": 4.655335163170288e-06, "loss": 0.9966, "step": 3928 }, { "epoch": 0.5364554888039322, "grad_norm": 6.204195976257324, "learning_rate": 4.653129313127068e-06, "loss": 0.9319, "step": 3929 }, { "epoch": 0.536592026215183, "grad_norm": 20.783184051513672, "learning_rate": 4.6509235309200356e-06, "loss": 1.0676, "step": 3930 }, { "epoch": 0.5367285636264336, "grad_norm": 7.149247646331787, "learning_rate": 4.648717816980567e-06, "loss": 0.8524, "step": 3931 }, { "epoch": 0.5368651010376844, "grad_norm": 7.248298168182373, "learning_rate": 4.646512171740028e-06, "loss": 0.9752, "step": 3932 }, { "epoch": 0.537001638448935, "grad_norm": 7.681949615478516, "learning_rate": 4.644306595629763e-06, "loss": 1.0063, "step": 3933 }, { "epoch": 0.5371381758601856, "grad_norm": 9.997994422912598, "learning_rate": 4.642101089081113e-06, "loss": 0.9987, "step": 3934 }, { "epoch": 0.5372747132714364, "grad_norm": 11.05584716796875, "learning_rate": 4.6398956525253994e-06, "loss": 0.9127, "step": 3935 }, { "epoch": 0.537411250682687, "grad_norm": 6.219005584716797, "learning_rate": 4.637690286393929e-06, "loss": 0.7351, "step": 3936 }, { "epoch": 0.5375477880939378, "grad_norm": 6.07413911819458, "learning_rate": 4.6354849911179995e-06, "loss": 0.9587, "step": 3937 }, { "epoch": 0.5376843255051884, "grad_norm": 21.051918029785156, "learning_rate": 4.633279767128889e-06, "loss": 0.9788, "step": 3938 }, { "epoch": 0.5378208629164392, "grad_norm": 11.491430282592773, "learning_rate": 4.631074614857868e-06, "loss": 0.9339, "step": 3939 }, { "epoch": 0.5379574003276898, "grad_norm": 8.168171882629395, "learning_rate": 4.628869534736187e-06, "loss": 1.0671, "step": 3940 }, { "epoch": 0.5380939377389404, "grad_norm": 9.884180068969727, "learning_rate": 4.626664527195089e-06, "loss": 0.9081, "step": 3941 }, { "epoch": 0.5382304751501912, "grad_norm": 6.053835868835449, "learning_rate": 4.624459592665796e-06, "loss": 1.0081, "step": 3942 }, { "epoch": 0.5383670125614418, "grad_norm": 7.0319061279296875, "learning_rate": 4.6222547315795205e-06, "loss": 1.1485, "step": 3943 }, { "epoch": 0.5385035499726926, "grad_norm": 6.769811153411865, "learning_rate": 4.620049944367457e-06, "loss": 0.9375, "step": 3944 }, { "epoch": 0.5386400873839432, "grad_norm": 20.24485969543457, "learning_rate": 4.617845231460787e-06, "loss": 1.0032, "step": 3945 }, { "epoch": 0.5387766247951938, "grad_norm": 13.899285316467285, "learning_rate": 4.61564059329068e-06, "loss": 1.1176, "step": 3946 }, { "epoch": 0.5389131622064446, "grad_norm": 6.0577311515808105, "learning_rate": 4.613436030288286e-06, "loss": 1.076, "step": 3947 }, { "epoch": 0.5390496996176952, "grad_norm": 6.775046348571777, "learning_rate": 4.611231542884747e-06, "loss": 1.1271, "step": 3948 }, { "epoch": 0.539186237028946, "grad_norm": 6.544516086578369, "learning_rate": 4.609027131511184e-06, "loss": 0.9162, "step": 3949 }, { "epoch": 0.5393227744401966, "grad_norm": 8.374985694885254, "learning_rate": 4.6068227965987055e-06, "loss": 0.927, "step": 3950 }, { "epoch": 0.5394593118514472, "grad_norm": 5.835503578186035, "learning_rate": 4.604618538578403e-06, "loss": 0.8783, "step": 3951 }, { "epoch": 0.539595849262698, "grad_norm": 5.137890338897705, "learning_rate": 4.6024143578813594e-06, "loss": 0.9374, "step": 3952 }, { "epoch": 0.5397323866739486, "grad_norm": 6.0806074142456055, "learning_rate": 4.6002102549386325e-06, "loss": 0.9742, "step": 3953 }, { "epoch": 0.5398689240851994, "grad_norm": 5.054799556732178, "learning_rate": 4.598006230181276e-06, "loss": 0.9481, "step": 3954 }, { "epoch": 0.54000546149645, "grad_norm": 8.939061164855957, "learning_rate": 4.595802284040319e-06, "loss": 1.015, "step": 3955 }, { "epoch": 0.5401419989077008, "grad_norm": 7.568939685821533, "learning_rate": 4.593598416946779e-06, "loss": 0.8488, "step": 3956 }, { "epoch": 0.5402785363189514, "grad_norm": 6.577822685241699, "learning_rate": 4.5913946293316605e-06, "loss": 1.0446, "step": 3957 }, { "epoch": 0.540415073730202, "grad_norm": 5.993860721588135, "learning_rate": 4.589190921625946e-06, "loss": 0.9278, "step": 3958 }, { "epoch": 0.5405516111414528, "grad_norm": 6.450240135192871, "learning_rate": 4.58698729426061e-06, "loss": 1.0447, "step": 3959 }, { "epoch": 0.5406881485527034, "grad_norm": 9.540741920471191, "learning_rate": 4.584783747666605e-06, "loss": 1.038, "step": 3960 }, { "epoch": 0.5408246859639542, "grad_norm": 11.020204544067383, "learning_rate": 4.582580282274873e-06, "loss": 1.0176, "step": 3961 }, { "epoch": 0.5409612233752048, "grad_norm": 7.059102535247803, "learning_rate": 4.5803768985163336e-06, "loss": 0.9335, "step": 3962 }, { "epoch": 0.5410977607864554, "grad_norm": 6.181499004364014, "learning_rate": 4.578173596821897e-06, "loss": 1.0232, "step": 3963 }, { "epoch": 0.5412342981977062, "grad_norm": 6.593588352203369, "learning_rate": 4.575970377622456e-06, "loss": 1.0197, "step": 3964 }, { "epoch": 0.5413708356089568, "grad_norm": 16.450944900512695, "learning_rate": 4.57376724134888e-06, "loss": 0.9372, "step": 3965 }, { "epoch": 0.5415073730202076, "grad_norm": 5.53275728225708, "learning_rate": 4.571564188432035e-06, "loss": 1.0301, "step": 3966 }, { "epoch": 0.5416439104314582, "grad_norm": 7.877674102783203, "learning_rate": 4.569361219302758e-06, "loss": 0.9265, "step": 3967 }, { "epoch": 0.541780447842709, "grad_norm": 6.687263011932373, "learning_rate": 4.56715833439188e-06, "loss": 1.1911, "step": 3968 }, { "epoch": 0.5419169852539596, "grad_norm": 5.421719074249268, "learning_rate": 4.564955534130207e-06, "loss": 0.8763, "step": 3969 }, { "epoch": 0.5420535226652102, "grad_norm": 6.542791366577148, "learning_rate": 4.562752818948536e-06, "loss": 0.9471, "step": 3970 }, { "epoch": 0.542190060076461, "grad_norm": 7.506113052368164, "learning_rate": 4.560550189277638e-06, "loss": 0.8839, "step": 3971 }, { "epoch": 0.5423265974877116, "grad_norm": 7.898138999938965, "learning_rate": 4.55834764554828e-06, "loss": 0.9017, "step": 3972 }, { "epoch": 0.5424631348989624, "grad_norm": 5.633675575256348, "learning_rate": 4.556145188191199e-06, "loss": 0.9103, "step": 3973 }, { "epoch": 0.542599672310213, "grad_norm": 6.469141960144043, "learning_rate": 4.553942817637127e-06, "loss": 0.9514, "step": 3974 }, { "epoch": 0.5427362097214636, "grad_norm": 6.587597846984863, "learning_rate": 4.55174053431677e-06, "loss": 0.9746, "step": 3975 }, { "epoch": 0.5428727471327144, "grad_norm": 5.503188133239746, "learning_rate": 4.54953833866082e-06, "loss": 0.8679, "step": 3976 }, { "epoch": 0.543009284543965, "grad_norm": 6.073029041290283, "learning_rate": 4.547336231099952e-06, "loss": 0.9891, "step": 3977 }, { "epoch": 0.5431458219552158, "grad_norm": 6.41379976272583, "learning_rate": 4.545134212064823e-06, "loss": 0.9795, "step": 3978 }, { "epoch": 0.5432823593664664, "grad_norm": 5.291617393493652, "learning_rate": 4.5429322819860775e-06, "loss": 0.9017, "step": 3979 }, { "epoch": 0.543418896777717, "grad_norm": 5.973079204559326, "learning_rate": 4.540730441294334e-06, "loss": 0.9528, "step": 3980 }, { "epoch": 0.5435554341889678, "grad_norm": 9.012115478515625, "learning_rate": 4.5385286904202015e-06, "loss": 0.9771, "step": 3981 }, { "epoch": 0.5436919716002184, "grad_norm": 5.668509006500244, "learning_rate": 4.536327029794267e-06, "loss": 0.9552, "step": 3982 }, { "epoch": 0.5438285090114692, "grad_norm": 10.146949768066406, "learning_rate": 4.5341254598471e-06, "loss": 0.9331, "step": 3983 }, { "epoch": 0.5439650464227198, "grad_norm": 8.114529609680176, "learning_rate": 4.531923981009256e-06, "loss": 0.9717, "step": 3984 }, { "epoch": 0.5441015838339706, "grad_norm": 8.061179161071777, "learning_rate": 4.529722593711265e-06, "loss": 1.0954, "step": 3985 }, { "epoch": 0.5442381212452212, "grad_norm": 10.265938758850098, "learning_rate": 4.5275212983836486e-06, "loss": 0.8862, "step": 3986 }, { "epoch": 0.5443746586564718, "grad_norm": 6.617290496826172, "learning_rate": 4.525320095456902e-06, "loss": 0.9037, "step": 3987 }, { "epoch": 0.5445111960677226, "grad_norm": 6.930297374725342, "learning_rate": 4.523118985361511e-06, "loss": 0.9643, "step": 3988 }, { "epoch": 0.5446477334789732, "grad_norm": 6.537635803222656, "learning_rate": 4.520917968527933e-06, "loss": 0.9943, "step": 3989 }, { "epoch": 0.544784270890224, "grad_norm": 5.405265808105469, "learning_rate": 4.5187170453866175e-06, "loss": 0.8596, "step": 3990 }, { "epoch": 0.5449208083014746, "grad_norm": 5.981689453125, "learning_rate": 4.516516216367985e-06, "loss": 0.9982, "step": 3991 }, { "epoch": 0.5450573457127252, "grad_norm": 6.887039661407471, "learning_rate": 4.51431548190245e-06, "loss": 0.987, "step": 3992 }, { "epoch": 0.545193883123976, "grad_norm": 15.722081184387207, "learning_rate": 4.512114842420397e-06, "loss": 0.9372, "step": 3993 }, { "epoch": 0.5453304205352266, "grad_norm": 8.717489242553711, "learning_rate": 4.509914298352197e-06, "loss": 0.7519, "step": 3994 }, { "epoch": 0.5454669579464774, "grad_norm": 8.595357894897461, "learning_rate": 4.507713850128205e-06, "loss": 1.0717, "step": 3995 }, { "epoch": 0.545603495357728, "grad_norm": 6.414422512054443, "learning_rate": 4.505513498178752e-06, "loss": 1.0192, "step": 3996 }, { "epoch": 0.5457400327689788, "grad_norm": 6.13639497756958, "learning_rate": 4.503313242934153e-06, "loss": 0.9632, "step": 3997 }, { "epoch": 0.5458765701802294, "grad_norm": 9.991554260253906, "learning_rate": 4.501113084824702e-06, "loss": 0.8676, "step": 3998 }, { "epoch": 0.54601310759148, "grad_norm": 5.827604293823242, "learning_rate": 4.49891302428068e-06, "loss": 0.9949, "step": 3999 }, { "epoch": 0.5461496450027308, "grad_norm": 7.034242630004883, "learning_rate": 4.49671306173234e-06, "loss": 0.945, "step": 4000 }, { "epoch": 0.5462861824139814, "grad_norm": 6.735865592956543, "learning_rate": 4.494513197609923e-06, "loss": 0.8549, "step": 4001 }, { "epoch": 0.5464227198252322, "grad_norm": 11.24351978302002, "learning_rate": 4.492313432343646e-06, "loss": 0.9066, "step": 4002 }, { "epoch": 0.5465592572364828, "grad_norm": 7.121833801269531, "learning_rate": 4.4901137663637115e-06, "loss": 1.1049, "step": 4003 }, { "epoch": 0.5466957946477334, "grad_norm": 9.033473014831543, "learning_rate": 4.487914200100296e-06, "loss": 1.0031, "step": 4004 }, { "epoch": 0.5468323320589842, "grad_norm": 5.061344623565674, "learning_rate": 4.4857147339835646e-06, "loss": 0.9614, "step": 4005 }, { "epoch": 0.5469688694702348, "grad_norm": 15.1470308303833, "learning_rate": 4.483515368443657e-06, "loss": 0.9207, "step": 4006 }, { "epoch": 0.5471054068814856, "grad_norm": 9.993692398071289, "learning_rate": 4.481316103910694e-06, "loss": 1.0007, "step": 4007 }, { "epoch": 0.5472419442927362, "grad_norm": 6.826722145080566, "learning_rate": 4.479116940814778e-06, "loss": 1.0155, "step": 4008 }, { "epoch": 0.5473784817039868, "grad_norm": 9.011756896972656, "learning_rate": 4.476917879585989e-06, "loss": 0.9449, "step": 4009 }, { "epoch": 0.5475150191152376, "grad_norm": 34.7252197265625, "learning_rate": 4.474718920654394e-06, "loss": 0.9984, "step": 4010 }, { "epoch": 0.5476515565264882, "grad_norm": 10.216498374938965, "learning_rate": 4.4725200644500285e-06, "loss": 1.037, "step": 4011 }, { "epoch": 0.547788093937739, "grad_norm": 8.050246238708496, "learning_rate": 4.47032131140292e-06, "loss": 0.9951, "step": 4012 }, { "epoch": 0.5479246313489896, "grad_norm": 7.103221416473389, "learning_rate": 4.468122661943068e-06, "loss": 0.9503, "step": 4013 }, { "epoch": 0.5480611687602404, "grad_norm": 8.722744941711426, "learning_rate": 4.4659241165004556e-06, "loss": 0.849, "step": 4014 }, { "epoch": 0.548197706171491, "grad_norm": 9.172613143920898, "learning_rate": 4.463725675505043e-06, "loss": 1.0351, "step": 4015 }, { "epoch": 0.5483342435827416, "grad_norm": 9.301980018615723, "learning_rate": 4.4615273393867685e-06, "loss": 0.9969, "step": 4016 }, { "epoch": 0.5484707809939924, "grad_norm": 7.739041805267334, "learning_rate": 4.459329108575556e-06, "loss": 1.0598, "step": 4017 }, { "epoch": 0.548607318405243, "grad_norm": 7.476820468902588, "learning_rate": 4.457130983501302e-06, "loss": 0.875, "step": 4018 }, { "epoch": 0.5487438558164938, "grad_norm": 6.300267219543457, "learning_rate": 4.45493296459389e-06, "loss": 1.1044, "step": 4019 }, { "epoch": 0.5488803932277444, "grad_norm": 6.773159980773926, "learning_rate": 4.452735052283175e-06, "loss": 0.9912, "step": 4020 }, { "epoch": 0.549016930638995, "grad_norm": 9.686066627502441, "learning_rate": 4.4505372469989955e-06, "loss": 1.1589, "step": 4021 }, { "epoch": 0.5491534680502458, "grad_norm": 6.1597208976745605, "learning_rate": 4.448339549171165e-06, "loss": 0.8838, "step": 4022 }, { "epoch": 0.5492900054614964, "grad_norm": 5.383895397186279, "learning_rate": 4.446141959229484e-06, "loss": 1.0284, "step": 4023 }, { "epoch": 0.5494265428727472, "grad_norm": 6.934307098388672, "learning_rate": 4.443944477603722e-06, "loss": 0.8753, "step": 4024 }, { "epoch": 0.5495630802839978, "grad_norm": 6.697490692138672, "learning_rate": 4.441747104723636e-06, "loss": 0.8506, "step": 4025 }, { "epoch": 0.5496996176952484, "grad_norm": 9.728121757507324, "learning_rate": 4.439549841018958e-06, "loss": 0.855, "step": 4026 }, { "epoch": 0.5498361551064992, "grad_norm": 8.536943435668945, "learning_rate": 4.437352686919394e-06, "loss": 0.9759, "step": 4027 }, { "epoch": 0.5499726925177498, "grad_norm": 8.390037536621094, "learning_rate": 4.435155642854637e-06, "loss": 1.0954, "step": 4028 }, { "epoch": 0.5501092299290006, "grad_norm": 17.587553024291992, "learning_rate": 4.432958709254353e-06, "loss": 0.9003, "step": 4029 }, { "epoch": 0.5502457673402512, "grad_norm": 6.568607330322266, "learning_rate": 4.4307618865481896e-06, "loss": 0.8673, "step": 4030 }, { "epoch": 0.550382304751502, "grad_norm": 7.86309289932251, "learning_rate": 4.4285651751657676e-06, "loss": 0.8268, "step": 4031 }, { "epoch": 0.5505188421627526, "grad_norm": 12.700871467590332, "learning_rate": 4.426368575536695e-06, "loss": 1.0302, "step": 4032 }, { "epoch": 0.5506553795740032, "grad_norm": 6.249841690063477, "learning_rate": 4.424172088090547e-06, "loss": 0.9437, "step": 4033 }, { "epoch": 0.550791916985254, "grad_norm": 5.601725101470947, "learning_rate": 4.421975713256888e-06, "loss": 0.9369, "step": 4034 }, { "epoch": 0.5509284543965046, "grad_norm": 39.83314514160156, "learning_rate": 4.41977945146525e-06, "loss": 1.0596, "step": 4035 }, { "epoch": 0.5510649918077554, "grad_norm": 6.903526306152344, "learning_rate": 4.4175833031451475e-06, "loss": 0.9228, "step": 4036 }, { "epoch": 0.551201529219006, "grad_norm": 6.570939064025879, "learning_rate": 4.415387268726077e-06, "loss": 0.9023, "step": 4037 }, { "epoch": 0.5513380666302566, "grad_norm": 7.1141252517700195, "learning_rate": 4.413191348637503e-06, "loss": 0.8939, "step": 4038 }, { "epoch": 0.5514746040415074, "grad_norm": 7.126589775085449, "learning_rate": 4.410995543308879e-06, "loss": 0.8494, "step": 4039 }, { "epoch": 0.551611141452758, "grad_norm": 5.457694053649902, "learning_rate": 4.408799853169628e-06, "loss": 0.9713, "step": 4040 }, { "epoch": 0.5517476788640088, "grad_norm": 6.457379341125488, "learning_rate": 4.4066042786491524e-06, "loss": 1.0664, "step": 4041 }, { "epoch": 0.5518842162752594, "grad_norm": 8.389650344848633, "learning_rate": 4.404408820176831e-06, "loss": 1.0211, "step": 4042 }, { "epoch": 0.5520207536865102, "grad_norm": 7.446523666381836, "learning_rate": 4.402213478182026e-06, "loss": 1.0186, "step": 4043 }, { "epoch": 0.5521572910977608, "grad_norm": 5.606820583343506, "learning_rate": 4.400018253094065e-06, "loss": 1.0061, "step": 4044 }, { "epoch": 0.5522938285090114, "grad_norm": 5.453819751739502, "learning_rate": 4.3978231453422674e-06, "loss": 0.8495, "step": 4045 }, { "epoch": 0.5524303659202622, "grad_norm": 6.996252536773682, "learning_rate": 4.395628155355918e-06, "loss": 0.9527, "step": 4046 }, { "epoch": 0.5525669033315128, "grad_norm": 8.452753067016602, "learning_rate": 4.393433283564282e-06, "loss": 1.16, "step": 4047 }, { "epoch": 0.5527034407427636, "grad_norm": 5.720831394195557, "learning_rate": 4.391238530396606e-06, "loss": 0.913, "step": 4048 }, { "epoch": 0.5528399781540142, "grad_norm": 6.188658237457275, "learning_rate": 4.389043896282103e-06, "loss": 1.0599, "step": 4049 }, { "epoch": 0.5529765155652648, "grad_norm": 7.675411224365234, "learning_rate": 4.386849381649977e-06, "loss": 0.9163, "step": 4050 }, { "epoch": 0.5531130529765156, "grad_norm": 10.818305969238281, "learning_rate": 4.384654986929394e-06, "loss": 0.9977, "step": 4051 }, { "epoch": 0.5532495903877662, "grad_norm": 9.4747896194458, "learning_rate": 4.38246071254951e-06, "loss": 0.9088, "step": 4052 }, { "epoch": 0.553386127799017, "grad_norm": 7.657349586486816, "learning_rate": 4.380266558939446e-06, "loss": 0.9737, "step": 4053 }, { "epoch": 0.5535226652102676, "grad_norm": 5.341177940368652, "learning_rate": 4.378072526528307e-06, "loss": 1.0778, "step": 4054 }, { "epoch": 0.5536592026215182, "grad_norm": 8.173754692077637, "learning_rate": 4.375878615745171e-06, "loss": 0.8233, "step": 4055 }, { "epoch": 0.553795740032769, "grad_norm": 6.085881233215332, "learning_rate": 4.37368482701909e-06, "loss": 0.9171, "step": 4056 }, { "epoch": 0.5539322774440196, "grad_norm": 7.759738445281982, "learning_rate": 4.3714911607791e-06, "loss": 0.7894, "step": 4057 }, { "epoch": 0.5540688148552704, "grad_norm": 5.900169372558594, "learning_rate": 4.3692976174542044e-06, "loss": 1.0901, "step": 4058 }, { "epoch": 0.554205352266521, "grad_norm": 6.005524635314941, "learning_rate": 4.367104197473387e-06, "loss": 0.949, "step": 4059 }, { "epoch": 0.5543418896777718, "grad_norm": 11.58770751953125, "learning_rate": 4.364910901265607e-06, "loss": 0.977, "step": 4060 }, { "epoch": 0.5544784270890224, "grad_norm": 8.594623565673828, "learning_rate": 4.362717729259799e-06, "loss": 0.9352, "step": 4061 }, { "epoch": 0.554614964500273, "grad_norm": 6.059980869293213, "learning_rate": 4.3605246818848725e-06, "loss": 0.9943, "step": 4062 }, { "epoch": 0.5547515019115238, "grad_norm": 8.18249225616455, "learning_rate": 4.358331759569715e-06, "loss": 1.0165, "step": 4063 }, { "epoch": 0.5548880393227744, "grad_norm": 7.542844772338867, "learning_rate": 4.356138962743187e-06, "loss": 0.9979, "step": 4064 }, { "epoch": 0.5550245767340252, "grad_norm": 5.971932411193848, "learning_rate": 4.353946291834127e-06, "loss": 0.8428, "step": 4065 }, { "epoch": 0.5551611141452758, "grad_norm": 9.407501220703125, "learning_rate": 4.351753747271346e-06, "loss": 1.1121, "step": 4066 }, { "epoch": 0.5552976515565264, "grad_norm": 7.772112846374512, "learning_rate": 4.349561329483628e-06, "loss": 1.0115, "step": 4067 }, { "epoch": 0.5554341889677772, "grad_norm": 8.52226448059082, "learning_rate": 4.347369038899744e-06, "loss": 1.0079, "step": 4068 }, { "epoch": 0.5555707263790278, "grad_norm": 8.099661827087402, "learning_rate": 4.345176875948424e-06, "loss": 1.0252, "step": 4069 }, { "epoch": 0.5557072637902786, "grad_norm": 8.96290397644043, "learning_rate": 4.342984841058386e-06, "loss": 1.074, "step": 4070 }, { "epoch": 0.5558438012015292, "grad_norm": 9.46536636352539, "learning_rate": 4.340792934658317e-06, "loss": 0.8996, "step": 4071 }, { "epoch": 0.55598033861278, "grad_norm": 5.380434513092041, "learning_rate": 4.33860115717688e-06, "loss": 1.0257, "step": 4072 }, { "epoch": 0.5561168760240306, "grad_norm": 8.039323806762695, "learning_rate": 4.336409509042709e-06, "loss": 0.9309, "step": 4073 }, { "epoch": 0.5562534134352812, "grad_norm": 7.6056084632873535, "learning_rate": 4.334217990684423e-06, "loss": 0.9829, "step": 4074 }, { "epoch": 0.556389950846532, "grad_norm": 8.75869083404541, "learning_rate": 4.332026602530604e-06, "loss": 0.9992, "step": 4075 }, { "epoch": 0.5565264882577826, "grad_norm": 10.54147720336914, "learning_rate": 4.329835345009813e-06, "loss": 1.0419, "step": 4076 }, { "epoch": 0.5566630256690334, "grad_norm": 16.655784606933594, "learning_rate": 4.327644218550589e-06, "loss": 0.9858, "step": 4077 }, { "epoch": 0.556799563080284, "grad_norm": 6.59839391708374, "learning_rate": 4.325453223581442e-06, "loss": 0.9916, "step": 4078 }, { "epoch": 0.5569361004915346, "grad_norm": 9.186866760253906, "learning_rate": 4.323262360530855e-06, "loss": 0.9911, "step": 4079 }, { "epoch": 0.5570726379027854, "grad_norm": 9.914010047912598, "learning_rate": 4.3210716298272856e-06, "loss": 1.0576, "step": 4080 }, { "epoch": 0.557209175314036, "grad_norm": 5.643657207489014, "learning_rate": 4.318881031899171e-06, "loss": 0.9441, "step": 4081 }, { "epoch": 0.5573457127252868, "grad_norm": 13.875289916992188, "learning_rate": 4.316690567174913e-06, "loss": 0.9487, "step": 4082 }, { "epoch": 0.5574822501365374, "grad_norm": 7.748137474060059, "learning_rate": 4.314500236082898e-06, "loss": 1.048, "step": 4083 }, { "epoch": 0.557618787547788, "grad_norm": 6.150937557220459, "learning_rate": 4.312310039051476e-06, "loss": 0.9362, "step": 4084 }, { "epoch": 0.5577553249590388, "grad_norm": 6.904571533203125, "learning_rate": 4.310119976508979e-06, "loss": 0.9961, "step": 4085 }, { "epoch": 0.5578918623702894, "grad_norm": 7.043313026428223, "learning_rate": 4.307930048883708e-06, "loss": 0.9892, "step": 4086 }, { "epoch": 0.5580283997815402, "grad_norm": 7.776768684387207, "learning_rate": 4.305740256603936e-06, "loss": 0.9474, "step": 4087 }, { "epoch": 0.5581649371927908, "grad_norm": 7.59912633895874, "learning_rate": 4.303550600097917e-06, "loss": 1.0045, "step": 4088 }, { "epoch": 0.5583014746040416, "grad_norm": 5.327816486358643, "learning_rate": 4.30136107979387e-06, "loss": 0.8335, "step": 4089 }, { "epoch": 0.5584380120152922, "grad_norm": 5.799037456512451, "learning_rate": 4.299171696119995e-06, "loss": 0.9602, "step": 4090 }, { "epoch": 0.5585745494265428, "grad_norm": 8.3775053024292, "learning_rate": 4.296982449504458e-06, "loss": 0.933, "step": 4091 }, { "epoch": 0.5587110868377936, "grad_norm": 5.762325286865234, "learning_rate": 4.294793340375405e-06, "loss": 1.1035, "step": 4092 }, { "epoch": 0.5588476242490442, "grad_norm": 12.6303071975708, "learning_rate": 4.292604369160947e-06, "loss": 0.8924, "step": 4093 }, { "epoch": 0.558984161660295, "grad_norm": 12.266947746276855, "learning_rate": 4.290415536289179e-06, "loss": 1.0769, "step": 4094 }, { "epoch": 0.5591206990715456, "grad_norm": 33.18797302246094, "learning_rate": 4.288226842188158e-06, "loss": 1.028, "step": 4095 }, { "epoch": 0.5592572364827962, "grad_norm": 6.019954204559326, "learning_rate": 4.286038287285919e-06, "loss": 0.8851, "step": 4096 }, { "epoch": 0.559393773894047, "grad_norm": 9.16659927368164, "learning_rate": 4.283849872010473e-06, "loss": 0.9556, "step": 4097 }, { "epoch": 0.5595303113052976, "grad_norm": 26.363800048828125, "learning_rate": 4.281661596789796e-06, "loss": 1.0777, "step": 4098 }, { "epoch": 0.5596668487165484, "grad_norm": 20.54505729675293, "learning_rate": 4.279473462051843e-06, "loss": 0.9961, "step": 4099 }, { "epoch": 0.559803386127799, "grad_norm": 14.359708786010742, "learning_rate": 4.2772854682245365e-06, "loss": 1.0415, "step": 4100 }, { "epoch": 0.5599399235390496, "grad_norm": 12.387316703796387, "learning_rate": 4.27509761573578e-06, "loss": 1.0516, "step": 4101 }, { "epoch": 0.5600764609503004, "grad_norm": 14.239012718200684, "learning_rate": 4.272909905013436e-06, "loss": 0.7719, "step": 4102 }, { "epoch": 0.560212998361551, "grad_norm": 7.742232322692871, "learning_rate": 4.270722336485353e-06, "loss": 0.8991, "step": 4103 }, { "epoch": 0.5603495357728018, "grad_norm": 9.280793190002441, "learning_rate": 4.268534910579342e-06, "loss": 0.9481, "step": 4104 }, { "epoch": 0.5604860731840524, "grad_norm": 8.082914352416992, "learning_rate": 4.266347627723192e-06, "loss": 0.826, "step": 4105 }, { "epoch": 0.5606226105953032, "grad_norm": 8.001077651977539, "learning_rate": 4.264160488344661e-06, "loss": 0.9464, "step": 4106 }, { "epoch": 0.5607591480065538, "grad_norm": 8.247674942016602, "learning_rate": 4.261973492871476e-06, "loss": 1.1509, "step": 4107 }, { "epoch": 0.5608956854178044, "grad_norm": 7.359442234039307, "learning_rate": 4.259786641731344e-06, "loss": 0.9243, "step": 4108 }, { "epoch": 0.5610322228290552, "grad_norm": 7.168264865875244, "learning_rate": 4.257599935351936e-06, "loss": 0.9184, "step": 4109 }, { "epoch": 0.5611687602403058, "grad_norm": 5.480323791503906, "learning_rate": 4.255413374160902e-06, "loss": 0.801, "step": 4110 }, { "epoch": 0.5613052976515566, "grad_norm": 7.0277252197265625, "learning_rate": 4.2532269585858545e-06, "loss": 0.8547, "step": 4111 }, { "epoch": 0.5614418350628072, "grad_norm": 7.477377891540527, "learning_rate": 4.251040689054387e-06, "loss": 0.9993, "step": 4112 }, { "epoch": 0.5615783724740578, "grad_norm": 6.268578052520752, "learning_rate": 4.248854565994056e-06, "loss": 0.8973, "step": 4113 }, { "epoch": 0.5617149098853086, "grad_norm": 12.923929214477539, "learning_rate": 4.246668589832397e-06, "loss": 0.8635, "step": 4114 }, { "epoch": 0.5618514472965592, "grad_norm": 9.895533561706543, "learning_rate": 4.244482760996909e-06, "loss": 1.099, "step": 4115 }, { "epoch": 0.56198798470781, "grad_norm": 5.969226837158203, "learning_rate": 4.242297079915071e-06, "loss": 0.821, "step": 4116 }, { "epoch": 0.5621245221190606, "grad_norm": 7.712716102600098, "learning_rate": 4.240111547014326e-06, "loss": 0.9925, "step": 4117 }, { "epoch": 0.5622610595303114, "grad_norm": 5.696356296539307, "learning_rate": 4.237926162722088e-06, "loss": 1.0309, "step": 4118 }, { "epoch": 0.562397596941562, "grad_norm": 7.5438947677612305, "learning_rate": 4.235740927465749e-06, "loss": 0.9372, "step": 4119 }, { "epoch": 0.5625341343528126, "grad_norm": 6.880177974700928, "learning_rate": 4.233555841672663e-06, "loss": 1.0099, "step": 4120 }, { "epoch": 0.5626706717640634, "grad_norm": 8.792606353759766, "learning_rate": 4.231370905770163e-06, "loss": 1.1415, "step": 4121 }, { "epoch": 0.562807209175314, "grad_norm": 8.274511337280273, "learning_rate": 4.229186120185545e-06, "loss": 1.0131, "step": 4122 }, { "epoch": 0.5629437465865648, "grad_norm": 8.811211585998535, "learning_rate": 4.227001485346081e-06, "loss": 1.0348, "step": 4123 }, { "epoch": 0.5630802839978154, "grad_norm": 8.874906539916992, "learning_rate": 4.224817001679011e-06, "loss": 0.8785, "step": 4124 }, { "epoch": 0.563216821409066, "grad_norm": 11.88424015045166, "learning_rate": 4.2226326696115475e-06, "loss": 0.972, "step": 4125 }, { "epoch": 0.5633533588203168, "grad_norm": 6.340921878814697, "learning_rate": 4.220448489570872e-06, "loss": 1.0664, "step": 4126 }, { "epoch": 0.5634898962315674, "grad_norm": 8.680340766906738, "learning_rate": 4.218264461984133e-06, "loss": 0.9541, "step": 4127 }, { "epoch": 0.5636264336428182, "grad_norm": 6.524031162261963, "learning_rate": 4.216080587278458e-06, "loss": 0.8947, "step": 4128 }, { "epoch": 0.5637629710540688, "grad_norm": 5.611642837524414, "learning_rate": 4.213896865880935e-06, "loss": 0.8604, "step": 4129 }, { "epoch": 0.5638995084653194, "grad_norm": 9.727931022644043, "learning_rate": 4.211713298218627e-06, "loss": 0.9428, "step": 4130 }, { "epoch": 0.5640360458765702, "grad_norm": 6.71394681930542, "learning_rate": 4.2095298847185665e-06, "loss": 1.1198, "step": 4131 }, { "epoch": 0.5641725832878208, "grad_norm": 6.471746444702148, "learning_rate": 4.2073466258077564e-06, "loss": 1.0346, "step": 4132 }, { "epoch": 0.5643091206990716, "grad_norm": 7.11635160446167, "learning_rate": 4.205163521913165e-06, "loss": 0.9519, "step": 4133 }, { "epoch": 0.5644456581103222, "grad_norm": 6.774502754211426, "learning_rate": 4.202980573461738e-06, "loss": 0.9406, "step": 4134 }, { "epoch": 0.564582195521573, "grad_norm": 5.384120941162109, "learning_rate": 4.200797780880384e-06, "loss": 1.0665, "step": 4135 }, { "epoch": 0.5647187329328236, "grad_norm": 6.825501441955566, "learning_rate": 4.198615144595984e-06, "loss": 0.9202, "step": 4136 }, { "epoch": 0.5648552703440742, "grad_norm": 9.446950912475586, "learning_rate": 4.196432665035388e-06, "loss": 1.0103, "step": 4137 }, { "epoch": 0.564991807755325, "grad_norm": 7.218953609466553, "learning_rate": 4.194250342625413e-06, "loss": 1.0048, "step": 4138 }, { "epoch": 0.5651283451665756, "grad_norm": 8.119199752807617, "learning_rate": 4.192068177792852e-06, "loss": 1.1328, "step": 4139 }, { "epoch": 0.5652648825778264, "grad_norm": 5.242656707763672, "learning_rate": 4.189886170964458e-06, "loss": 1.0962, "step": 4140 }, { "epoch": 0.565401419989077, "grad_norm": 7.323511123657227, "learning_rate": 4.1877043225669615e-06, "loss": 0.8005, "step": 4141 }, { "epoch": 0.5655379574003276, "grad_norm": 9.716525077819824, "learning_rate": 4.185522633027057e-06, "loss": 0.9055, "step": 4142 }, { "epoch": 0.5656744948115784, "grad_norm": 9.218575477600098, "learning_rate": 4.18334110277141e-06, "loss": 1.1091, "step": 4143 }, { "epoch": 0.565811032222829, "grad_norm": 6.550674915313721, "learning_rate": 4.181159732226651e-06, "loss": 1.0565, "step": 4144 }, { "epoch": 0.5659475696340798, "grad_norm": 6.163348197937012, "learning_rate": 4.178978521819386e-06, "loss": 1.0459, "step": 4145 }, { "epoch": 0.5660841070453304, "grad_norm": 22.7617130279541, "learning_rate": 4.176797471976186e-06, "loss": 0.933, "step": 4146 }, { "epoch": 0.5662206444565812, "grad_norm": 6.829190254211426, "learning_rate": 4.174616583123587e-06, "loss": 0.8874, "step": 4147 }, { "epoch": 0.5663571818678318, "grad_norm": 6.783851146697998, "learning_rate": 4.172435855688101e-06, "loss": 0.9175, "step": 4148 }, { "epoch": 0.5664937192790824, "grad_norm": 6.540674686431885, "learning_rate": 4.1702552900962025e-06, "loss": 1.0306, "step": 4149 }, { "epoch": 0.5666302566903332, "grad_norm": 6.834727764129639, "learning_rate": 4.1680748867743394e-06, "loss": 1.0082, "step": 4150 }, { "epoch": 0.5667667941015838, "grad_norm": 14.528191566467285, "learning_rate": 4.165894646148919e-06, "loss": 1.0416, "step": 4151 }, { "epoch": 0.5669033315128346, "grad_norm": 10.89067268371582, "learning_rate": 4.16371456864633e-06, "loss": 0.9389, "step": 4152 }, { "epoch": 0.5670398689240852, "grad_norm": 6.3214945793151855, "learning_rate": 4.161534654692915e-06, "loss": 0.8562, "step": 4153 }, { "epoch": 0.5671764063353358, "grad_norm": 7.797123908996582, "learning_rate": 4.159354904714997e-06, "loss": 0.8992, "step": 4154 }, { "epoch": 0.5673129437465866, "grad_norm": 6.33941650390625, "learning_rate": 4.157175319138859e-06, "loss": 1.0338, "step": 4155 }, { "epoch": 0.5674494811578372, "grad_norm": 8.643257141113281, "learning_rate": 4.154995898390756e-06, "loss": 1.0459, "step": 4156 }, { "epoch": 0.567586018569088, "grad_norm": 6.28251838684082, "learning_rate": 4.1528166428969066e-06, "loss": 0.9768, "step": 4157 }, { "epoch": 0.5677225559803386, "grad_norm": 8.284003257751465, "learning_rate": 4.1506375530834995e-06, "loss": 1.0308, "step": 4158 }, { "epoch": 0.5678590933915892, "grad_norm": 6.758110046386719, "learning_rate": 4.148458629376693e-06, "loss": 1.0149, "step": 4159 }, { "epoch": 0.56799563080284, "grad_norm": 8.68240737915039, "learning_rate": 4.146279872202609e-06, "loss": 0.9147, "step": 4160 }, { "epoch": 0.5681321682140906, "grad_norm": 6.218994617462158, "learning_rate": 4.144101281987342e-06, "loss": 1.0261, "step": 4161 }, { "epoch": 0.5682687056253414, "grad_norm": 5.066781997680664, "learning_rate": 4.141922859156947e-06, "loss": 0.9659, "step": 4162 }, { "epoch": 0.568405243036592, "grad_norm": 10.266014099121094, "learning_rate": 4.139744604137453e-06, "loss": 0.8994, "step": 4163 }, { "epoch": 0.5685417804478428, "grad_norm": 5.561988830566406, "learning_rate": 4.13756651735485e-06, "loss": 1.0207, "step": 4164 }, { "epoch": 0.5686783178590934, "grad_norm": 6.2753448486328125, "learning_rate": 4.135388599235101e-06, "loss": 0.9894, "step": 4165 }, { "epoch": 0.568814855270344, "grad_norm": 6.698072910308838, "learning_rate": 4.133210850204132e-06, "loss": 1.0261, "step": 4166 }, { "epoch": 0.5689513926815948, "grad_norm": 7.360587120056152, "learning_rate": 4.131033270687835e-06, "loss": 0.9922, "step": 4167 }, { "epoch": 0.5690879300928454, "grad_norm": 7.768197059631348, "learning_rate": 4.128855861112076e-06, "loss": 0.8799, "step": 4168 }, { "epoch": 0.5692244675040962, "grad_norm": 7.212032318115234, "learning_rate": 4.126678621902678e-06, "loss": 0.8895, "step": 4169 }, { "epoch": 0.5693610049153468, "grad_norm": 8.844584465026855, "learning_rate": 4.124501553485439e-06, "loss": 1.0264, "step": 4170 }, { "epoch": 0.5694975423265974, "grad_norm": 6.865130424499512, "learning_rate": 4.122324656286116e-06, "loss": 0.8349, "step": 4171 }, { "epoch": 0.5696340797378482, "grad_norm": 6.626981735229492, "learning_rate": 4.12014793073044e-06, "loss": 1.0156, "step": 4172 }, { "epoch": 0.5697706171490988, "grad_norm": 8.379866600036621, "learning_rate": 4.117971377244103e-06, "loss": 1.0171, "step": 4173 }, { "epoch": 0.5699071545603496, "grad_norm": 6.1726274490356445, "learning_rate": 4.115794996252768e-06, "loss": 0.9013, "step": 4174 }, { "epoch": 0.5700436919716002, "grad_norm": 10.641180992126465, "learning_rate": 4.1136187881820586e-06, "loss": 0.969, "step": 4175 }, { "epoch": 0.570180229382851, "grad_norm": 6.904456615447998, "learning_rate": 4.11144275345757e-06, "loss": 1.0687, "step": 4176 }, { "epoch": 0.5703167667941016, "grad_norm": 6.859570503234863, "learning_rate": 4.10926689250486e-06, "loss": 0.9716, "step": 4177 }, { "epoch": 0.5704533042053522, "grad_norm": 8.41302490234375, "learning_rate": 4.107091205749451e-06, "loss": 0.9484, "step": 4178 }, { "epoch": 0.570589841616603, "grad_norm": 7.122895240783691, "learning_rate": 4.104915693616838e-06, "loss": 0.909, "step": 4179 }, { "epoch": 0.5707263790278536, "grad_norm": 12.05252742767334, "learning_rate": 4.102740356532474e-06, "loss": 0.9859, "step": 4180 }, { "epoch": 0.5708629164391044, "grad_norm": 7.253874778747559, "learning_rate": 4.100565194921785e-06, "loss": 0.85, "step": 4181 }, { "epoch": 0.570999453850355, "grad_norm": 8.002450942993164, "learning_rate": 4.0983902092101565e-06, "loss": 1.0049, "step": 4182 }, { "epoch": 0.5711359912616056, "grad_norm": 6.156628131866455, "learning_rate": 4.0962153998229445e-06, "loss": 1.039, "step": 4183 }, { "epoch": 0.5712725286728564, "grad_norm": 6.805959701538086, "learning_rate": 4.094040767185464e-06, "loss": 0.8675, "step": 4184 }, { "epoch": 0.571409066084107, "grad_norm": 5.472182750701904, "learning_rate": 4.091866311723005e-06, "loss": 0.8986, "step": 4185 }, { "epoch": 0.5715456034953578, "grad_norm": 8.209964752197266, "learning_rate": 4.089692033860815e-06, "loss": 0.9106, "step": 4186 }, { "epoch": 0.5716821409066084, "grad_norm": 5.262819766998291, "learning_rate": 4.0875179340241085e-06, "loss": 0.9338, "step": 4187 }, { "epoch": 0.571818678317859, "grad_norm": 5.074063301086426, "learning_rate": 4.085344012638067e-06, "loss": 0.9305, "step": 4188 }, { "epoch": 0.5719552157291098, "grad_norm": 7.983865261077881, "learning_rate": 4.083170270127836e-06, "loss": 0.9045, "step": 4189 }, { "epoch": 0.5720917531403604, "grad_norm": 8.147480964660645, "learning_rate": 4.080996706918528e-06, "loss": 1.04, "step": 4190 }, { "epoch": 0.5722282905516112, "grad_norm": 5.889437198638916, "learning_rate": 4.078823323435213e-06, "loss": 0.9403, "step": 4191 }, { "epoch": 0.5723648279628618, "grad_norm": 6.550772666931152, "learning_rate": 4.076650120102937e-06, "loss": 1.0337, "step": 4192 }, { "epoch": 0.5725013653741126, "grad_norm": 7.205765247344971, "learning_rate": 4.0744770973467015e-06, "loss": 1.0061, "step": 4193 }, { "epoch": 0.5726379027853632, "grad_norm": 5.976070880889893, "learning_rate": 4.07230425559148e-06, "loss": 1.0472, "step": 4194 }, { "epoch": 0.5727744401966138, "grad_norm": 7.947186470031738, "learning_rate": 4.070131595262203e-06, "loss": 1.0051, "step": 4195 }, { "epoch": 0.5729109776078646, "grad_norm": 5.8222880363464355, "learning_rate": 4.0679591167837725e-06, "loss": 0.8094, "step": 4196 }, { "epoch": 0.5730475150191152, "grad_norm": 9.111194610595703, "learning_rate": 4.065786820581052e-06, "loss": 1.0579, "step": 4197 }, { "epoch": 0.573184052430366, "grad_norm": 7.711875915527344, "learning_rate": 4.063614707078865e-06, "loss": 0.9306, "step": 4198 }, { "epoch": 0.5733205898416166, "grad_norm": 7.4979729652404785, "learning_rate": 4.061442776702009e-06, "loss": 0.8976, "step": 4199 }, { "epoch": 0.5734571272528672, "grad_norm": 7.697362422943115, "learning_rate": 4.059271029875236e-06, "loss": 1.1182, "step": 4200 }, { "epoch": 0.573593664664118, "grad_norm": 5.741271495819092, "learning_rate": 4.057099467023269e-06, "loss": 0.7872, "step": 4201 }, { "epoch": 0.5737302020753686, "grad_norm": 5.570954322814941, "learning_rate": 4.054928088570789e-06, "loss": 0.9179, "step": 4202 }, { "epoch": 0.5738667394866194, "grad_norm": 6.557390213012695, "learning_rate": 4.0527568949424505e-06, "loss": 1.0157, "step": 4203 }, { "epoch": 0.57400327689787, "grad_norm": 31.089889526367188, "learning_rate": 4.050585886562858e-06, "loss": 0.8275, "step": 4204 }, { "epoch": 0.5741398143091206, "grad_norm": 4.877368450164795, "learning_rate": 4.048415063856594e-06, "loss": 1.0014, "step": 4205 }, { "epoch": 0.5742763517203714, "grad_norm": 6.675382614135742, "learning_rate": 4.046244427248195e-06, "loss": 0.915, "step": 4206 }, { "epoch": 0.574412889131622, "grad_norm": 5.95986795425415, "learning_rate": 4.044073977162163e-06, "loss": 0.9821, "step": 4207 }, { "epoch": 0.5745494265428728, "grad_norm": 6.138588905334473, "learning_rate": 4.041903714022967e-06, "loss": 1.0371, "step": 4208 }, { "epoch": 0.5746859639541234, "grad_norm": 6.201237678527832, "learning_rate": 4.039733638255035e-06, "loss": 0.9164, "step": 4209 }, { "epoch": 0.5748225013653742, "grad_norm": 6.365601539611816, "learning_rate": 4.0375637502827626e-06, "loss": 0.8935, "step": 4210 }, { "epoch": 0.5749590387766248, "grad_norm": 6.716522216796875, "learning_rate": 4.035394050530504e-06, "loss": 0.9862, "step": 4211 }, { "epoch": 0.5750955761878754, "grad_norm": 7.479371547698975, "learning_rate": 4.033224539422584e-06, "loss": 1.0442, "step": 4212 }, { "epoch": 0.5752321135991262, "grad_norm": 6.406385898590088, "learning_rate": 4.031055217383281e-06, "loss": 0.9026, "step": 4213 }, { "epoch": 0.5753686510103768, "grad_norm": 7.720452785491943, "learning_rate": 4.0288860848368436e-06, "loss": 0.8725, "step": 4214 }, { "epoch": 0.5755051884216276, "grad_norm": 8.337135314941406, "learning_rate": 4.026717142207478e-06, "loss": 0.9398, "step": 4215 }, { "epoch": 0.5756417258328782, "grad_norm": 8.03817081451416, "learning_rate": 4.02454838991936e-06, "loss": 1.053, "step": 4216 }, { "epoch": 0.5757782632441288, "grad_norm": 5.958836555480957, "learning_rate": 4.022379828396621e-06, "loss": 0.8855, "step": 4217 }, { "epoch": 0.5759148006553796, "grad_norm": 6.562881946563721, "learning_rate": 4.02021145806336e-06, "loss": 1.0493, "step": 4218 }, { "epoch": 0.5760513380666302, "grad_norm": 5.982101917266846, "learning_rate": 4.018043279343637e-06, "loss": 0.9355, "step": 4219 }, { "epoch": 0.576187875477881, "grad_norm": 7.994453430175781, "learning_rate": 4.015875292661474e-06, "loss": 0.9942, "step": 4220 }, { "epoch": 0.5763244128891316, "grad_norm": 5.9698333740234375, "learning_rate": 4.013707498440856e-06, "loss": 0.829, "step": 4221 }, { "epoch": 0.5764609503003824, "grad_norm": 7.175675868988037, "learning_rate": 4.01153989710573e-06, "loss": 0.8498, "step": 4222 }, { "epoch": 0.576597487711633, "grad_norm": 10.22332763671875, "learning_rate": 4.009372489080007e-06, "loss": 0.9347, "step": 4223 }, { "epoch": 0.5767340251228836, "grad_norm": 6.554703712463379, "learning_rate": 4.007205274787556e-06, "loss": 1.0904, "step": 4224 }, { "epoch": 0.5768705625341344, "grad_norm": 6.86953067779541, "learning_rate": 4.005038254652215e-06, "loss": 0.927, "step": 4225 }, { "epoch": 0.577007099945385, "grad_norm": 6.05958890914917, "learning_rate": 4.002871429097776e-06, "loss": 1.0218, "step": 4226 }, { "epoch": 0.5771436373566358, "grad_norm": 7.997840404510498, "learning_rate": 4.000704798547999e-06, "loss": 1.0014, "step": 4227 }, { "epoch": 0.5772801747678864, "grad_norm": 6.745728015899658, "learning_rate": 3.998538363426605e-06, "loss": 1.0362, "step": 4228 }, { "epoch": 0.577416712179137, "grad_norm": 7.29318380355835, "learning_rate": 3.996372124157271e-06, "loss": 0.9987, "step": 4229 }, { "epoch": 0.5775532495903878, "grad_norm": 6.660798072814941, "learning_rate": 3.994206081163645e-06, "loss": 1.0652, "step": 4230 }, { "epoch": 0.5776897870016384, "grad_norm": 14.25586223602295, "learning_rate": 3.992040234869328e-06, "loss": 1.0506, "step": 4231 }, { "epoch": 0.5778263244128892, "grad_norm": 6.640712261199951, "learning_rate": 3.989874585697889e-06, "loss": 0.9217, "step": 4232 }, { "epoch": 0.5779628618241398, "grad_norm": 6.110358715057373, "learning_rate": 3.987709134072855e-06, "loss": 0.9177, "step": 4233 }, { "epoch": 0.5780993992353904, "grad_norm": 8.323915481567383, "learning_rate": 3.985543880417717e-06, "loss": 0.9099, "step": 4234 }, { "epoch": 0.5782359366466412, "grad_norm": 5.768202781677246, "learning_rate": 3.9833788251559206e-06, "loss": 1.0769, "step": 4235 }, { "epoch": 0.5783724740578918, "grad_norm": 6.09627628326416, "learning_rate": 3.981213968710882e-06, "loss": 0.9613, "step": 4236 }, { "epoch": 0.5785090114691426, "grad_norm": 5.203184127807617, "learning_rate": 3.979049311505973e-06, "loss": 0.8954, "step": 4237 }, { "epoch": 0.5786455488803932, "grad_norm": 7.981082439422607, "learning_rate": 3.976884853964525e-06, "loss": 0.9376, "step": 4238 }, { "epoch": 0.578782086291644, "grad_norm": 7.737185955047607, "learning_rate": 3.974720596509837e-06, "loss": 0.9958, "step": 4239 }, { "epoch": 0.5789186237028946, "grad_norm": 5.808172225952148, "learning_rate": 3.9725565395651614e-06, "loss": 1.0311, "step": 4240 }, { "epoch": 0.5790551611141452, "grad_norm": 5.854875087738037, "learning_rate": 3.9703926835537156e-06, "loss": 1.0202, "step": 4241 }, { "epoch": 0.579191698525396, "grad_norm": 6.762170791625977, "learning_rate": 3.968229028898675e-06, "loss": 0.9381, "step": 4242 }, { "epoch": 0.5793282359366466, "grad_norm": 8.884984970092773, "learning_rate": 3.966065576023181e-06, "loss": 0.9813, "step": 4243 }, { "epoch": 0.5794647733478974, "grad_norm": 7.322936058044434, "learning_rate": 3.96390232535033e-06, "loss": 0.931, "step": 4244 }, { "epoch": 0.579601310759148, "grad_norm": 6.910751819610596, "learning_rate": 3.961739277303179e-06, "loss": 0.9066, "step": 4245 }, { "epoch": 0.5797378481703986, "grad_norm": 7.546841144561768, "learning_rate": 3.95957643230475e-06, "loss": 1.0206, "step": 4246 }, { "epoch": 0.5798743855816494, "grad_norm": 8.714784622192383, "learning_rate": 3.957413790778022e-06, "loss": 0.9034, "step": 4247 }, { "epoch": 0.5800109229929, "grad_norm": 10.43252944946289, "learning_rate": 3.955251353145934e-06, "loss": 1.0803, "step": 4248 }, { "epoch": 0.5801474604041508, "grad_norm": 6.702269077301025, "learning_rate": 3.953089119831383e-06, "loss": 0.9349, "step": 4249 }, { "epoch": 0.5802839978154014, "grad_norm": 6.5153584480285645, "learning_rate": 3.950927091257233e-06, "loss": 0.9365, "step": 4250 }, { "epoch": 0.5804205352266522, "grad_norm": 5.938937187194824, "learning_rate": 3.948765267846302e-06, "loss": 0.9635, "step": 4251 }, { "epoch": 0.5805570726379028, "grad_norm": 7.200220108032227, "learning_rate": 3.94660365002137e-06, "loss": 0.9647, "step": 4252 }, { "epoch": 0.5806936100491534, "grad_norm": 5.744198322296143, "learning_rate": 3.944442238205174e-06, "loss": 0.8947, "step": 4253 }, { "epoch": 0.5808301474604042, "grad_norm": 8.937034606933594, "learning_rate": 3.9422810328204185e-06, "loss": 0.9848, "step": 4254 }, { "epoch": 0.5809666848716548, "grad_norm": 6.353508472442627, "learning_rate": 3.940120034289756e-06, "loss": 0.9315, "step": 4255 }, { "epoch": 0.5811032222829056, "grad_norm": 9.416091918945312, "learning_rate": 3.937959243035811e-06, "loss": 0.9131, "step": 4256 }, { "epoch": 0.5812397596941562, "grad_norm": 6.053955554962158, "learning_rate": 3.935798659481156e-06, "loss": 0.9867, "step": 4257 }, { "epoch": 0.5813762971054068, "grad_norm": 6.736166000366211, "learning_rate": 3.933638284048331e-06, "loss": 0.9417, "step": 4258 }, { "epoch": 0.5815128345166576, "grad_norm": 7.393416404724121, "learning_rate": 3.931478117159832e-06, "loss": 0.8855, "step": 4259 }, { "epoch": 0.5816493719279082, "grad_norm": 5.444993019104004, "learning_rate": 3.929318159238113e-06, "loss": 0.952, "step": 4260 }, { "epoch": 0.581785909339159, "grad_norm": 7.225527286529541, "learning_rate": 3.927158410705592e-06, "loss": 0.8912, "step": 4261 }, { "epoch": 0.5819224467504096, "grad_norm": 6.230487823486328, "learning_rate": 3.924998871984638e-06, "loss": 0.9488, "step": 4262 }, { "epoch": 0.5820589841616602, "grad_norm": 6.440472602844238, "learning_rate": 3.92283954349759e-06, "loss": 0.8913, "step": 4263 }, { "epoch": 0.582195521572911, "grad_norm": 6.513551235198975, "learning_rate": 3.920680425666735e-06, "loss": 0.9596, "step": 4264 }, { "epoch": 0.5823320589841616, "grad_norm": 7.333292484283447, "learning_rate": 3.9185215189143265e-06, "loss": 1.1869, "step": 4265 }, { "epoch": 0.5824685963954124, "grad_norm": 6.993584632873535, "learning_rate": 3.91636282366257e-06, "loss": 1.0518, "step": 4266 }, { "epoch": 0.582605133806663, "grad_norm": 6.480946063995361, "learning_rate": 3.914204340333638e-06, "loss": 1.1097, "step": 4267 }, { "epoch": 0.5827416712179138, "grad_norm": 10.758556365966797, "learning_rate": 3.912046069349654e-06, "loss": 0.9261, "step": 4268 }, { "epoch": 0.5828782086291644, "grad_norm": 9.68719482421875, "learning_rate": 3.9098880111327015e-06, "loss": 0.977, "step": 4269 }, { "epoch": 0.583014746040415, "grad_norm": 4.85245943069458, "learning_rate": 3.907730166104828e-06, "loss": 0.9757, "step": 4270 }, { "epoch": 0.5831512834516658, "grad_norm": 7.888553142547607, "learning_rate": 3.905572534688031e-06, "loss": 1.0044, "step": 4271 }, { "epoch": 0.5832878208629164, "grad_norm": 5.853946685791016, "learning_rate": 3.903415117304274e-06, "loss": 1.0313, "step": 4272 }, { "epoch": 0.5834243582741672, "grad_norm": 5.240073204040527, "learning_rate": 3.901257914375471e-06, "loss": 1.081, "step": 4273 }, { "epoch": 0.5835608956854178, "grad_norm": 6.1538472175598145, "learning_rate": 3.899100926323501e-06, "loss": 0.8529, "step": 4274 }, { "epoch": 0.5836974330966684, "grad_norm": 6.905930042266846, "learning_rate": 3.896944153570197e-06, "loss": 1.0186, "step": 4275 }, { "epoch": 0.5838339705079192, "grad_norm": 7.59943151473999, "learning_rate": 3.894787596537352e-06, "loss": 0.9786, "step": 4276 }, { "epoch": 0.5839705079191698, "grad_norm": 7.726747512817383, "learning_rate": 3.892631255646714e-06, "loss": 1.005, "step": 4277 }, { "epoch": 0.5841070453304206, "grad_norm": 5.150697231292725, "learning_rate": 3.89047513131999e-06, "loss": 1.0687, "step": 4278 }, { "epoch": 0.5842435827416712, "grad_norm": 5.35971736907959, "learning_rate": 3.888319223978849e-06, "loss": 1.0105, "step": 4279 }, { "epoch": 0.584380120152922, "grad_norm": 7.755530834197998, "learning_rate": 3.8861635340449065e-06, "loss": 0.9829, "step": 4280 }, { "epoch": 0.5845166575641726, "grad_norm": 7.026715278625488, "learning_rate": 3.88400806193975e-06, "loss": 0.9444, "step": 4281 }, { "epoch": 0.5846531949754232, "grad_norm": 7.832117557525635, "learning_rate": 3.8818528080849125e-06, "loss": 1.1319, "step": 4282 }, { "epoch": 0.584789732386674, "grad_norm": 6.698554039001465, "learning_rate": 3.879697772901891e-06, "loss": 0.9665, "step": 4283 }, { "epoch": 0.5849262697979246, "grad_norm": 5.198868751525879, "learning_rate": 3.877542956812137e-06, "loss": 0.9743, "step": 4284 }, { "epoch": 0.5850628072091754, "grad_norm": 8.381553649902344, "learning_rate": 3.87538836023706e-06, "loss": 1.0285, "step": 4285 }, { "epoch": 0.585199344620426, "grad_norm": 7.231939792633057, "learning_rate": 3.8732339835980245e-06, "loss": 0.9394, "step": 4286 }, { "epoch": 0.5853358820316766, "grad_norm": 6.545104026794434, "learning_rate": 3.8710798273163565e-06, "loss": 1.0251, "step": 4287 }, { "epoch": 0.5854724194429274, "grad_norm": 6.108163356781006, "learning_rate": 3.868925891813336e-06, "loss": 1.0088, "step": 4288 }, { "epoch": 0.585608956854178, "grad_norm": 7.365631580352783, "learning_rate": 3.866772177510196e-06, "loss": 0.869, "step": 4289 }, { "epoch": 0.5857454942654288, "grad_norm": 5.773995876312256, "learning_rate": 3.864618684828135e-06, "loss": 0.9713, "step": 4290 }, { "epoch": 0.5858820316766794, "grad_norm": 7.362401962280273, "learning_rate": 3.8624654141883005e-06, "loss": 0.9469, "step": 4291 }, { "epoch": 0.58601856908793, "grad_norm": 7.182982444763184, "learning_rate": 3.860312366011802e-06, "loss": 0.9903, "step": 4292 }, { "epoch": 0.5861551064991808, "grad_norm": 6.95276403427124, "learning_rate": 3.858159540719699e-06, "loss": 0.9435, "step": 4293 }, { "epoch": 0.5862916439104314, "grad_norm": 5.493736743927002, "learning_rate": 3.856006938733016e-06, "loss": 0.8653, "step": 4294 }, { "epoch": 0.5864281813216822, "grad_norm": 6.381621837615967, "learning_rate": 3.853854560472726e-06, "loss": 0.9726, "step": 4295 }, { "epoch": 0.5865647187329328, "grad_norm": 7.213822364807129, "learning_rate": 3.851702406359764e-06, "loss": 0.8817, "step": 4296 }, { "epoch": 0.5867012561441836, "grad_norm": 7.091149806976318, "learning_rate": 3.8495504768150175e-06, "loss": 1.032, "step": 4297 }, { "epoch": 0.5868377935554342, "grad_norm": 4.8797831535339355, "learning_rate": 3.84739877225933e-06, "loss": 1.0394, "step": 4298 }, { "epoch": 0.5869743309666848, "grad_norm": 5.877305030822754, "learning_rate": 3.845247293113504e-06, "loss": 1.0801, "step": 4299 }, { "epoch": 0.5871108683779356, "grad_norm": 5.685977458953857, "learning_rate": 3.843096039798293e-06, "loss": 1.0519, "step": 4300 }, { "epoch": 0.5872474057891862, "grad_norm": 7.195107460021973, "learning_rate": 3.840945012734414e-06, "loss": 0.9842, "step": 4301 }, { "epoch": 0.587383943200437, "grad_norm": 6.200474739074707, "learning_rate": 3.838794212342531e-06, "loss": 0.9547, "step": 4302 }, { "epoch": 0.5875204806116876, "grad_norm": 6.529054641723633, "learning_rate": 3.836643639043272e-06, "loss": 0.971, "step": 4303 }, { "epoch": 0.5876570180229382, "grad_norm": 11.122239112854004, "learning_rate": 3.834493293257214e-06, "loss": 1.0156, "step": 4304 }, { "epoch": 0.587793555434189, "grad_norm": 8.584301948547363, "learning_rate": 3.832343175404893e-06, "loss": 1.0496, "step": 4305 }, { "epoch": 0.5879300928454396, "grad_norm": 7.262170791625977, "learning_rate": 3.8301932859067965e-06, "loss": 1.0282, "step": 4306 }, { "epoch": 0.5880666302566904, "grad_norm": 6.693784713745117, "learning_rate": 3.828043625183375e-06, "loss": 0.8654, "step": 4307 }, { "epoch": 0.588203167667941, "grad_norm": 10.142651557922363, "learning_rate": 3.825894193655026e-06, "loss": 1.0439, "step": 4308 }, { "epoch": 0.5883397050791916, "grad_norm": 6.512868881225586, "learning_rate": 3.823744991742106e-06, "loss": 0.9552, "step": 4309 }, { "epoch": 0.5884762424904424, "grad_norm": 7.649118900299072, "learning_rate": 3.821596019864929e-06, "loss": 0.942, "step": 4310 }, { "epoch": 0.588612779901693, "grad_norm": 4.9107890129089355, "learning_rate": 3.819447278443757e-06, "loss": 0.9329, "step": 4311 }, { "epoch": 0.5887493173129438, "grad_norm": 6.779836654663086, "learning_rate": 3.8172987678988165e-06, "loss": 1.0025, "step": 4312 }, { "epoch": 0.5888858547241944, "grad_norm": 6.410350322723389, "learning_rate": 3.815150488650278e-06, "loss": 0.8202, "step": 4313 }, { "epoch": 0.5890223921354452, "grad_norm": 7.489186763763428, "learning_rate": 3.8130024411182763e-06, "loss": 1.1203, "step": 4314 }, { "epoch": 0.5891589295466958, "grad_norm": 14.81029987335205, "learning_rate": 3.810854625722895e-06, "loss": 0.9747, "step": 4315 }, { "epoch": 0.5892954669579464, "grad_norm": 7.5212578773498535, "learning_rate": 3.808707042884176e-06, "loss": 0.9737, "step": 4316 }, { "epoch": 0.5894320043691972, "grad_norm": 5.46960973739624, "learning_rate": 3.806559693022112e-06, "loss": 0.8993, "step": 4317 }, { "epoch": 0.5895685417804478, "grad_norm": 9.254508018493652, "learning_rate": 3.8044125765566525e-06, "loss": 1.1356, "step": 4318 }, { "epoch": 0.5897050791916986, "grad_norm": 5.996488571166992, "learning_rate": 3.802265693907703e-06, "loss": 0.9137, "step": 4319 }, { "epoch": 0.5898416166029492, "grad_norm": 6.874297142028809, "learning_rate": 3.8001190454951164e-06, "loss": 1.0493, "step": 4320 }, { "epoch": 0.5899781540141998, "grad_norm": 7.6856818199157715, "learning_rate": 3.7979726317387104e-06, "loss": 0.9634, "step": 4321 }, { "epoch": 0.5901146914254506, "grad_norm": 7.830516815185547, "learning_rate": 3.7958264530582478e-06, "loss": 1.1493, "step": 4322 }, { "epoch": 0.5902512288367012, "grad_norm": 7.014950275421143, "learning_rate": 3.79368050987345e-06, "loss": 0.94, "step": 4323 }, { "epoch": 0.590387766247952, "grad_norm": 6.348411560058594, "learning_rate": 3.7915348026039877e-06, "loss": 0.8357, "step": 4324 }, { "epoch": 0.5905243036592026, "grad_norm": 6.788508892059326, "learning_rate": 3.7893893316694936e-06, "loss": 0.9935, "step": 4325 }, { "epoch": 0.5906608410704534, "grad_norm": 10.54659366607666, "learning_rate": 3.7872440974895445e-06, "loss": 0.9528, "step": 4326 }, { "epoch": 0.590797378481704, "grad_norm": 5.522327899932861, "learning_rate": 3.7850991004836813e-06, "loss": 0.9787, "step": 4327 }, { "epoch": 0.5909339158929546, "grad_norm": 8.546070098876953, "learning_rate": 3.7829543410713887e-06, "loss": 0.958, "step": 4328 }, { "epoch": 0.5910704533042054, "grad_norm": 6.3056464195251465, "learning_rate": 3.78080981967211e-06, "loss": 0.868, "step": 4329 }, { "epoch": 0.591206990715456, "grad_norm": 5.988211631774902, "learning_rate": 3.778665536705243e-06, "loss": 0.9754, "step": 4330 }, { "epoch": 0.5913435281267068, "grad_norm": 17.644935607910156, "learning_rate": 3.7765214925901323e-06, "loss": 0.9086, "step": 4331 }, { "epoch": 0.5914800655379574, "grad_norm": 8.264376640319824, "learning_rate": 3.7743776877460864e-06, "loss": 0.9496, "step": 4332 }, { "epoch": 0.591616602949208, "grad_norm": 12.720810890197754, "learning_rate": 3.7722341225923564e-06, "loss": 0.9392, "step": 4333 }, { "epoch": 0.5917531403604588, "grad_norm": 7.658515453338623, "learning_rate": 3.770090797548155e-06, "loss": 0.8518, "step": 4334 }, { "epoch": 0.5918896777717094, "grad_norm": 7.119770050048828, "learning_rate": 3.7679477130326414e-06, "loss": 0.9309, "step": 4335 }, { "epoch": 0.5920262151829602, "grad_norm": 9.247293472290039, "learning_rate": 3.7658048694649324e-06, "loss": 0.7201, "step": 4336 }, { "epoch": 0.5921627525942108, "grad_norm": 6.101698398590088, "learning_rate": 3.763662267264093e-06, "loss": 1.0017, "step": 4337 }, { "epoch": 0.5922992900054614, "grad_norm": 10.21976375579834, "learning_rate": 3.7615199068491483e-06, "loss": 1.1826, "step": 4338 }, { "epoch": 0.5924358274167122, "grad_norm": 6.780646324157715, "learning_rate": 3.759377788639068e-06, "loss": 0.9811, "step": 4339 }, { "epoch": 0.5925723648279628, "grad_norm": 9.3480806350708, "learning_rate": 3.757235913052778e-06, "loss": 0.959, "step": 4340 }, { "epoch": 0.5927089022392136, "grad_norm": 10.824969291687012, "learning_rate": 3.7550942805091594e-06, "loss": 1.0613, "step": 4341 }, { "epoch": 0.5928454396504642, "grad_norm": 17.67361068725586, "learning_rate": 3.7529528914270408e-06, "loss": 0.9729, "step": 4342 }, { "epoch": 0.592981977061715, "grad_norm": 18.231592178344727, "learning_rate": 3.750811746225208e-06, "loss": 0.8549, "step": 4343 }, { "epoch": 0.5931185144729656, "grad_norm": 8.173707008361816, "learning_rate": 3.748670845322392e-06, "loss": 0.8829, "step": 4344 }, { "epoch": 0.5932550518842162, "grad_norm": 10.590411186218262, "learning_rate": 3.746530189137287e-06, "loss": 1.0332, "step": 4345 }, { "epoch": 0.593391589295467, "grad_norm": 6.877011775970459, "learning_rate": 3.744389778088527e-06, "loss": 0.8795, "step": 4346 }, { "epoch": 0.5935281267067176, "grad_norm": 8.22133731842041, "learning_rate": 3.742249612594709e-06, "loss": 0.9593, "step": 4347 }, { "epoch": 0.5936646641179684, "grad_norm": 15.10015869140625, "learning_rate": 3.7401096930743753e-06, "loss": 0.9408, "step": 4348 }, { "epoch": 0.593801201529219, "grad_norm": 31.779542922973633, "learning_rate": 3.73797001994602e-06, "loss": 1.006, "step": 4349 }, { "epoch": 0.5939377389404696, "grad_norm": 7.697597503662109, "learning_rate": 3.7358305936280938e-06, "loss": 0.9285, "step": 4350 }, { "epoch": 0.5940742763517204, "grad_norm": 7.5200910568237305, "learning_rate": 3.7336914145389924e-06, "loss": 1.0955, "step": 4351 }, { "epoch": 0.594210813762971, "grad_norm": 14.133238792419434, "learning_rate": 3.731552483097072e-06, "loss": 0.9895, "step": 4352 }, { "epoch": 0.5943473511742218, "grad_norm": 6.067554473876953, "learning_rate": 3.7294137997206302e-06, "loss": 0.9918, "step": 4353 }, { "epoch": 0.5944838885854724, "grad_norm": 14.111966133117676, "learning_rate": 3.7272753648279263e-06, "loss": 0.9491, "step": 4354 }, { "epoch": 0.5946204259967232, "grad_norm": 7.764437198638916, "learning_rate": 3.725137178837162e-06, "loss": 1.0044, "step": 4355 }, { "epoch": 0.5947569634079738, "grad_norm": 20.241159439086914, "learning_rate": 3.722999242166497e-06, "loss": 0.9327, "step": 4356 }, { "epoch": 0.5948935008192244, "grad_norm": 7.256432056427002, "learning_rate": 3.7208615552340354e-06, "loss": 0.9027, "step": 4357 }, { "epoch": 0.5950300382304752, "grad_norm": 6.083212375640869, "learning_rate": 3.7187241184578417e-06, "loss": 1.1973, "step": 4358 }, { "epoch": 0.5951665756417258, "grad_norm": 7.6639180183410645, "learning_rate": 3.7165869322559235e-06, "loss": 1.0223, "step": 4359 }, { "epoch": 0.5953031130529766, "grad_norm": 8.147726058959961, "learning_rate": 3.714449997046241e-06, "loss": 0.955, "step": 4360 }, { "epoch": 0.5954396504642272, "grad_norm": 32.7466926574707, "learning_rate": 3.7123133132467097e-06, "loss": 0.7897, "step": 4361 }, { "epoch": 0.5955761878754778, "grad_norm": 8.12600326538086, "learning_rate": 3.7101768812751904e-06, "loss": 0.9261, "step": 4362 }, { "epoch": 0.5957127252867286, "grad_norm": 10.298309326171875, "learning_rate": 3.708040701549499e-06, "loss": 0.9341, "step": 4363 }, { "epoch": 0.5958492626979792, "grad_norm": 7.930052280426025, "learning_rate": 3.705904774487396e-06, "loss": 0.906, "step": 4364 }, { "epoch": 0.59598580010923, "grad_norm": 7.051095485687256, "learning_rate": 3.7037691005066025e-06, "loss": 1.026, "step": 4365 }, { "epoch": 0.5961223375204806, "grad_norm": 6.713619232177734, "learning_rate": 3.7016336800247783e-06, "loss": 0.9156, "step": 4366 }, { "epoch": 0.5962588749317312, "grad_norm": 6.097678184509277, "learning_rate": 3.6994985134595445e-06, "loss": 1.032, "step": 4367 }, { "epoch": 0.596395412342982, "grad_norm": 9.562681198120117, "learning_rate": 3.697363601228465e-06, "loss": 1.0464, "step": 4368 }, { "epoch": 0.5965319497542326, "grad_norm": 9.583341598510742, "learning_rate": 3.695228943749057e-06, "loss": 0.9283, "step": 4369 }, { "epoch": 0.5966684871654834, "grad_norm": 7.729695796966553, "learning_rate": 3.6930945414387875e-06, "loss": 0.9007, "step": 4370 }, { "epoch": 0.596805024576734, "grad_norm": 8.638556480407715, "learning_rate": 3.690960394715071e-06, "loss": 0.8849, "step": 4371 }, { "epoch": 0.5969415619879848, "grad_norm": 7.207937717437744, "learning_rate": 3.6888265039952796e-06, "loss": 0.9656, "step": 4372 }, { "epoch": 0.5970780993992354, "grad_norm": 5.477370262145996, "learning_rate": 3.6866928696967258e-06, "loss": 0.877, "step": 4373 }, { "epoch": 0.597214636810486, "grad_norm": 6.918509483337402, "learning_rate": 3.684559492236678e-06, "loss": 0.9411, "step": 4374 }, { "epoch": 0.5973511742217368, "grad_norm": 6.174928665161133, "learning_rate": 3.682426372032353e-06, "loss": 0.8112, "step": 4375 }, { "epoch": 0.5974877116329874, "grad_norm": 10.572176933288574, "learning_rate": 3.6802935095009173e-06, "loss": 0.9715, "step": 4376 }, { "epoch": 0.5976242490442382, "grad_norm": 6.911671161651611, "learning_rate": 3.6781609050594847e-06, "loss": 1.0255, "step": 4377 }, { "epoch": 0.5977607864554888, "grad_norm": 6.571045398712158, "learning_rate": 3.6760285591251233e-06, "loss": 0.875, "step": 4378 }, { "epoch": 0.5978973238667394, "grad_norm": 5.9743170738220215, "learning_rate": 3.6738964721148475e-06, "loss": 1.0305, "step": 4379 }, { "epoch": 0.5980338612779902, "grad_norm": 21.57502555847168, "learning_rate": 3.6717646444456196e-06, "loss": 0.9716, "step": 4380 }, { "epoch": 0.5981703986892408, "grad_norm": 8.756229400634766, "learning_rate": 3.6696330765343547e-06, "loss": 1.063, "step": 4381 }, { "epoch": 0.5983069361004916, "grad_norm": 6.924428462982178, "learning_rate": 3.667501768797913e-06, "loss": 1.0954, "step": 4382 }, { "epoch": 0.5984434735117422, "grad_norm": 5.725757598876953, "learning_rate": 3.6653707216531108e-06, "loss": 1.0262, "step": 4383 }, { "epoch": 0.598580010922993, "grad_norm": 6.836084842681885, "learning_rate": 3.663239935516704e-06, "loss": 0.9799, "step": 4384 }, { "epoch": 0.5987165483342436, "grad_norm": 33.83891296386719, "learning_rate": 3.661109410805407e-06, "loss": 0.7877, "step": 4385 }, { "epoch": 0.5988530857454942, "grad_norm": 6.507673740386963, "learning_rate": 3.658979147935875e-06, "loss": 0.9957, "step": 4386 }, { "epoch": 0.598989623156745, "grad_norm": 9.931584358215332, "learning_rate": 3.656849147324718e-06, "loss": 1.0128, "step": 4387 }, { "epoch": 0.5991261605679956, "grad_norm": 7.116394996643066, "learning_rate": 3.6547194093884907e-06, "loss": 1.0021, "step": 4388 }, { "epoch": 0.5992626979792464, "grad_norm": 4.876944541931152, "learning_rate": 3.6525899345436953e-06, "loss": 0.9799, "step": 4389 }, { "epoch": 0.599399235390497, "grad_norm": 7.559394359588623, "learning_rate": 3.650460723206791e-06, "loss": 0.8029, "step": 4390 }, { "epoch": 0.5995357728017476, "grad_norm": 6.426578521728516, "learning_rate": 3.648331775794174e-06, "loss": 1.0583, "step": 4391 }, { "epoch": 0.5996723102129984, "grad_norm": 6.018967628479004, "learning_rate": 3.646203092722198e-06, "loss": 0.8516, "step": 4392 }, { "epoch": 0.599808847624249, "grad_norm": 7.117590427398682, "learning_rate": 3.644074674407161e-06, "loss": 1.0698, "step": 4393 }, { "epoch": 0.5999453850354998, "grad_norm": 10.714136123657227, "learning_rate": 3.6419465212653093e-06, "loss": 0.9743, "step": 4394 }, { "epoch": 0.6000819224467504, "grad_norm": 6.8585052490234375, "learning_rate": 3.639818633712835e-06, "loss": 0.8917, "step": 4395 }, { "epoch": 0.600218459858001, "grad_norm": 7.514493942260742, "learning_rate": 3.6376910121658867e-06, "loss": 0.9855, "step": 4396 }, { "epoch": 0.6003549972692518, "grad_norm": 7.191768646240234, "learning_rate": 3.6355636570405493e-06, "loss": 0.8778, "step": 4397 }, { "epoch": 0.6004915346805024, "grad_norm": 6.652954578399658, "learning_rate": 3.6334365687528673e-06, "loss": 0.9442, "step": 4398 }, { "epoch": 0.6006280720917532, "grad_norm": 7.891591548919678, "learning_rate": 3.631309747718824e-06, "loss": 1.0419, "step": 4399 }, { "epoch": 0.6007646095030038, "grad_norm": 9.616731643676758, "learning_rate": 3.629183194354354e-06, "loss": 0.858, "step": 4400 }, { "epoch": 0.6009011469142546, "grad_norm": 32.329673767089844, "learning_rate": 3.62705690907534e-06, "loss": 0.9733, "step": 4401 }, { "epoch": 0.6010376843255052, "grad_norm": 5.9138336181640625, "learning_rate": 3.624930892297609e-06, "loss": 0.9465, "step": 4402 }, { "epoch": 0.6011742217367558, "grad_norm": 6.307674407958984, "learning_rate": 3.6228051444369427e-06, "loss": 1.0009, "step": 4403 }, { "epoch": 0.6013107591480066, "grad_norm": 6.621613025665283, "learning_rate": 3.6206796659090605e-06, "loss": 0.9197, "step": 4404 }, { "epoch": 0.6014472965592572, "grad_norm": 13.786382675170898, "learning_rate": 3.6185544571296382e-06, "loss": 1.1007, "step": 4405 }, { "epoch": 0.601583833970508, "grad_norm": 5.460282802581787, "learning_rate": 3.6164295185142938e-06, "loss": 1.0344, "step": 4406 }, { "epoch": 0.6017203713817586, "grad_norm": 7.116756439208984, "learning_rate": 3.6143048504785925e-06, "loss": 0.9568, "step": 4407 }, { "epoch": 0.6018569087930092, "grad_norm": 7.108888626098633, "learning_rate": 3.61218045343805e-06, "loss": 0.9754, "step": 4408 }, { "epoch": 0.60199344620426, "grad_norm": 13.152957916259766, "learning_rate": 3.610056327808121e-06, "loss": 1.0581, "step": 4409 }, { "epoch": 0.6021299836155106, "grad_norm": 9.655712127685547, "learning_rate": 3.60793247400422e-06, "loss": 0.9858, "step": 4410 }, { "epoch": 0.6022665210267614, "grad_norm": 6.212085723876953, "learning_rate": 3.6058088924416946e-06, "loss": 1.0008, "step": 4411 }, { "epoch": 0.602403058438012, "grad_norm": 6.709236145019531, "learning_rate": 3.60368558353585e-06, "loss": 1.0074, "step": 4412 }, { "epoch": 0.6025395958492626, "grad_norm": 9.911942481994629, "learning_rate": 3.601562547701932e-06, "loss": 0.9531, "step": 4413 }, { "epoch": 0.6026761332605134, "grad_norm": 5.2138352394104, "learning_rate": 3.599439785355136e-06, "loss": 0.9359, "step": 4414 }, { "epoch": 0.602812670671764, "grad_norm": 6.731888294219971, "learning_rate": 3.5973172969105997e-06, "loss": 0.9046, "step": 4415 }, { "epoch": 0.6029492080830148, "grad_norm": 5.179233551025391, "learning_rate": 3.595195082783413e-06, "loss": 0.9391, "step": 4416 }, { "epoch": 0.6030857454942654, "grad_norm": 6.314576148986816, "learning_rate": 3.593073143388607e-06, "loss": 0.906, "step": 4417 }, { "epoch": 0.6032222829055162, "grad_norm": 5.936521530151367, "learning_rate": 3.5909514791411643e-06, "loss": 0.9342, "step": 4418 }, { "epoch": 0.6033588203167668, "grad_norm": 7.672661781311035, "learning_rate": 3.588830090456009e-06, "loss": 1.1051, "step": 4419 }, { "epoch": 0.6034953577280174, "grad_norm": 5.834810256958008, "learning_rate": 3.5867089777480124e-06, "loss": 1.0101, "step": 4420 }, { "epoch": 0.6036318951392682, "grad_norm": 5.406373977661133, "learning_rate": 3.584588141431994e-06, "loss": 1.0805, "step": 4421 }, { "epoch": 0.6037684325505188, "grad_norm": 6.026113033294678, "learning_rate": 3.5824675819227145e-06, "loss": 1.0392, "step": 4422 }, { "epoch": 0.6039049699617696, "grad_norm": 7.656580448150635, "learning_rate": 3.580347299634889e-06, "loss": 1.0015, "step": 4423 }, { "epoch": 0.6040415073730202, "grad_norm": 5.851015090942383, "learning_rate": 3.578227294983167e-06, "loss": 0.9292, "step": 4424 }, { "epoch": 0.6041780447842708, "grad_norm": 7.689148902893066, "learning_rate": 3.5761075683821557e-06, "loss": 1.0036, "step": 4425 }, { "epoch": 0.6043145821955216, "grad_norm": 9.474185943603516, "learning_rate": 3.5739881202463978e-06, "loss": 0.9497, "step": 4426 }, { "epoch": 0.6044511196067722, "grad_norm": 4.983151912689209, "learning_rate": 3.5718689509903893e-06, "loss": 0.9448, "step": 4427 }, { "epoch": 0.604587657018023, "grad_norm": 6.654464244842529, "learning_rate": 3.569750061028565e-06, "loss": 0.9494, "step": 4428 }, { "epoch": 0.6047241944292736, "grad_norm": 8.464898109436035, "learning_rate": 3.5676314507753074e-06, "loss": 0.9984, "step": 4429 }, { "epoch": 0.6048607318405244, "grad_norm": 4.883274555206299, "learning_rate": 3.5655131206449497e-06, "loss": 1.0508, "step": 4430 }, { "epoch": 0.604997269251775, "grad_norm": 5.557490825653076, "learning_rate": 3.56339507105176e-06, "loss": 1.1085, "step": 4431 }, { "epoch": 0.6051338066630256, "grad_norm": 10.412681579589844, "learning_rate": 3.5612773024099624e-06, "loss": 1.146, "step": 4432 }, { "epoch": 0.6052703440742764, "grad_norm": 9.392373085021973, "learning_rate": 3.5591598151337183e-06, "loss": 0.9836, "step": 4433 }, { "epoch": 0.605406881485527, "grad_norm": 5.96383810043335, "learning_rate": 3.5570426096371376e-06, "loss": 0.9924, "step": 4434 }, { "epoch": 0.6055434188967778, "grad_norm": 5.870004177093506, "learning_rate": 3.5549256863342712e-06, "loss": 0.953, "step": 4435 }, { "epoch": 0.6056799563080284, "grad_norm": 7.206234455108643, "learning_rate": 3.552809045639123e-06, "loss": 0.9468, "step": 4436 }, { "epoch": 0.605816493719279, "grad_norm": 4.357130527496338, "learning_rate": 3.5506926879656324e-06, "loss": 0.9382, "step": 4437 }, { "epoch": 0.6059530311305298, "grad_norm": 8.106184959411621, "learning_rate": 3.5485766137276894e-06, "loss": 0.9695, "step": 4438 }, { "epoch": 0.6060895685417804, "grad_norm": 6.188693046569824, "learning_rate": 3.546460823339125e-06, "loss": 0.9876, "step": 4439 }, { "epoch": 0.6062261059530312, "grad_norm": 6.1762285232543945, "learning_rate": 3.5443453172137175e-06, "loss": 0.8822, "step": 4440 }, { "epoch": 0.6063626433642818, "grad_norm": 7.589461803436279, "learning_rate": 3.542230095765189e-06, "loss": 0.9299, "step": 4441 }, { "epoch": 0.6064991807755324, "grad_norm": 6.370148181915283, "learning_rate": 3.5401151594072024e-06, "loss": 1.0691, "step": 4442 }, { "epoch": 0.6066357181867832, "grad_norm": 5.548425674438477, "learning_rate": 3.538000508553372e-06, "loss": 1.0167, "step": 4443 }, { "epoch": 0.6067722555980338, "grad_norm": 5.900299549102783, "learning_rate": 3.5358861436172487e-06, "loss": 1.0787, "step": 4444 }, { "epoch": 0.6069087930092846, "grad_norm": 6.714040756225586, "learning_rate": 3.533772065012333e-06, "loss": 1.0634, "step": 4445 }, { "epoch": 0.6070453304205352, "grad_norm": 6.208914756774902, "learning_rate": 3.5316582731520654e-06, "loss": 0.8813, "step": 4446 }, { "epoch": 0.607181867831786, "grad_norm": 4.553153991699219, "learning_rate": 3.5295447684498342e-06, "loss": 0.9396, "step": 4447 }, { "epoch": 0.6073184052430366, "grad_norm": 7.001908779144287, "learning_rate": 3.5274315513189673e-06, "loss": 0.8553, "step": 4448 }, { "epoch": 0.6074549426542872, "grad_norm": 6.232903480529785, "learning_rate": 3.525318622172741e-06, "loss": 0.9116, "step": 4449 }, { "epoch": 0.607591480065538, "grad_norm": 6.970930099487305, "learning_rate": 3.523205981424372e-06, "loss": 0.9302, "step": 4450 }, { "epoch": 0.6077280174767886, "grad_norm": 8.505130767822266, "learning_rate": 3.5210936294870204e-06, "loss": 0.9553, "step": 4451 }, { "epoch": 0.6078645548880394, "grad_norm": 6.890498161315918, "learning_rate": 3.5189815667737916e-06, "loss": 1.1101, "step": 4452 }, { "epoch": 0.60800109229929, "grad_norm": 10.53338623046875, "learning_rate": 3.516869793697732e-06, "loss": 0.986, "step": 4453 }, { "epoch": 0.6081376297105406, "grad_norm": 7.531799793243408, "learning_rate": 3.514758310671837e-06, "loss": 0.8857, "step": 4454 }, { "epoch": 0.6082741671217914, "grad_norm": 6.376864433288574, "learning_rate": 3.512647118109037e-06, "loss": 0.8477, "step": 4455 }, { "epoch": 0.608410704533042, "grad_norm": 6.116745948791504, "learning_rate": 3.5105362164222135e-06, "loss": 0.9458, "step": 4456 }, { "epoch": 0.6085472419442928, "grad_norm": 5.2634596824646, "learning_rate": 3.5084256060241863e-06, "loss": 0.8776, "step": 4457 }, { "epoch": 0.6086837793555434, "grad_norm": 4.999838829040527, "learning_rate": 3.506315287327719e-06, "loss": 0.9764, "step": 4458 }, { "epoch": 0.6088203167667942, "grad_norm": 7.107117176055908, "learning_rate": 3.5042052607455197e-06, "loss": 0.9819, "step": 4459 }, { "epoch": 0.6089568541780448, "grad_norm": 11.910131454467773, "learning_rate": 3.5020955266902344e-06, "loss": 0.911, "step": 4460 }, { "epoch": 0.6090933915892954, "grad_norm": 6.449159145355225, "learning_rate": 3.499986085574462e-06, "loss": 1.0153, "step": 4461 }, { "epoch": 0.6092299290005462, "grad_norm": 6.690553188323975, "learning_rate": 3.4978769378107326e-06, "loss": 0.9679, "step": 4462 }, { "epoch": 0.6093664664117968, "grad_norm": 5.352859020233154, "learning_rate": 3.4957680838115287e-06, "loss": 0.8018, "step": 4463 }, { "epoch": 0.6095030038230476, "grad_norm": 5.306438446044922, "learning_rate": 3.4936595239892675e-06, "loss": 0.8918, "step": 4464 }, { "epoch": 0.6096395412342982, "grad_norm": 14.046384811401367, "learning_rate": 3.4915512587563138e-06, "loss": 0.8971, "step": 4465 }, { "epoch": 0.6097760786455488, "grad_norm": 4.999622344970703, "learning_rate": 3.4894432885249714e-06, "loss": 0.9377, "step": 4466 }, { "epoch": 0.6099126160567996, "grad_norm": 7.047876358032227, "learning_rate": 3.4873356137074906e-06, "loss": 0.96, "step": 4467 }, { "epoch": 0.6100491534680502, "grad_norm": 7.423439025878906, "learning_rate": 3.485228234716058e-06, "loss": 1.1066, "step": 4468 }, { "epoch": 0.610185690879301, "grad_norm": 21.056224822998047, "learning_rate": 3.4831211519628096e-06, "loss": 0.8218, "step": 4469 }, { "epoch": 0.6103222282905516, "grad_norm": 4.95994758605957, "learning_rate": 3.4810143658598178e-06, "loss": 0.9293, "step": 4470 }, { "epoch": 0.6104587657018022, "grad_norm": 5.836236000061035, "learning_rate": 3.4789078768190974e-06, "loss": 0.9817, "step": 4471 }, { "epoch": 0.610595303113053, "grad_norm": 7.672433853149414, "learning_rate": 3.4768016852526093e-06, "loss": 0.941, "step": 4472 }, { "epoch": 0.6107318405243036, "grad_norm": 5.879073143005371, "learning_rate": 3.4746957915722496e-06, "loss": 1.0571, "step": 4473 }, { "epoch": 0.6108683779355544, "grad_norm": 7.595875263214111, "learning_rate": 3.4725901961898646e-06, "loss": 0.9556, "step": 4474 }, { "epoch": 0.611004915346805, "grad_norm": 6.386652946472168, "learning_rate": 3.4704848995172326e-06, "loss": 0.9307, "step": 4475 }, { "epoch": 0.6111414527580558, "grad_norm": 5.786144733428955, "learning_rate": 3.4683799019660834e-06, "loss": 0.9969, "step": 4476 }, { "epoch": 0.6112779901693064, "grad_norm": 8.460765838623047, "learning_rate": 3.4662752039480803e-06, "loss": 0.8943, "step": 4477 }, { "epoch": 0.611414527580557, "grad_norm": 7.295679092407227, "learning_rate": 3.4641708058748327e-06, "loss": 1.0832, "step": 4478 }, { "epoch": 0.6115510649918078, "grad_norm": 6.4967217445373535, "learning_rate": 3.4620667081578897e-06, "loss": 0.8889, "step": 4479 }, { "epoch": 0.6116876024030584, "grad_norm": 7.454122543334961, "learning_rate": 3.4599629112087387e-06, "loss": 0.9712, "step": 4480 }, { "epoch": 0.6118241398143092, "grad_norm": 6.111247539520264, "learning_rate": 3.457859415438816e-06, "loss": 0.9871, "step": 4481 }, { "epoch": 0.6119606772255598, "grad_norm": 8.13853645324707, "learning_rate": 3.4557562212594885e-06, "loss": 0.9111, "step": 4482 }, { "epoch": 0.6120972146368104, "grad_norm": 7.759593486785889, "learning_rate": 3.453653329082077e-06, "loss": 0.9585, "step": 4483 }, { "epoch": 0.6122337520480612, "grad_norm": 6.551658630371094, "learning_rate": 3.4515507393178316e-06, "loss": 0.8746, "step": 4484 }, { "epoch": 0.6123702894593118, "grad_norm": 9.923174858093262, "learning_rate": 3.449448452377949e-06, "loss": 0.9329, "step": 4485 }, { "epoch": 0.6125068268705626, "grad_norm": 10.036072731018066, "learning_rate": 3.4473464686735636e-06, "loss": 0.9562, "step": 4486 }, { "epoch": 0.6126433642818132, "grad_norm": 5.982027053833008, "learning_rate": 3.445244788615757e-06, "loss": 0.8778, "step": 4487 }, { "epoch": 0.6127799016930638, "grad_norm": 4.941713333129883, "learning_rate": 3.4431434126155406e-06, "loss": 0.8101, "step": 4488 }, { "epoch": 0.6129164391043146, "grad_norm": 8.781547546386719, "learning_rate": 3.441042341083879e-06, "loss": 0.9654, "step": 4489 }, { "epoch": 0.6130529765155652, "grad_norm": 5.900252819061279, "learning_rate": 3.4389415744316667e-06, "loss": 0.883, "step": 4490 }, { "epoch": 0.613189513926816, "grad_norm": 5.864494323730469, "learning_rate": 3.4368411130697437e-06, "loss": 1.012, "step": 4491 }, { "epoch": 0.6133260513380666, "grad_norm": 5.235367298126221, "learning_rate": 3.4347409574088896e-06, "loss": 0.8992, "step": 4492 }, { "epoch": 0.6134625887493174, "grad_norm": 5.546283721923828, "learning_rate": 3.432641107859822e-06, "loss": 0.8591, "step": 4493 }, { "epoch": 0.613599126160568, "grad_norm": 10.038716316223145, "learning_rate": 3.4305415648332043e-06, "loss": 0.9313, "step": 4494 }, { "epoch": 0.6137356635718186, "grad_norm": 5.754940509796143, "learning_rate": 3.428442328739631e-06, "loss": 1.0475, "step": 4495 }, { "epoch": 0.6138722009830694, "grad_norm": 5.588577747344971, "learning_rate": 3.426343399989648e-06, "loss": 0.8705, "step": 4496 }, { "epoch": 0.61400873839432, "grad_norm": 5.5026936531066895, "learning_rate": 3.4242447789937295e-06, "loss": 0.8714, "step": 4497 }, { "epoch": 0.6141452758055708, "grad_norm": 5.609668254852295, "learning_rate": 3.4221464661622983e-06, "loss": 1.0769, "step": 4498 }, { "epoch": 0.6142818132168214, "grad_norm": 6.108980178833008, "learning_rate": 3.4200484619057122e-06, "loss": 0.9357, "step": 4499 }, { "epoch": 0.614418350628072, "grad_norm": 9.631689071655273, "learning_rate": 3.417950766634268e-06, "loss": 0.9623, "step": 4500 }, { "epoch": 0.6145548880393228, "grad_norm": 6.131313800811768, "learning_rate": 3.4158533807582076e-06, "loss": 0.8058, "step": 4501 }, { "epoch": 0.6146914254505734, "grad_norm": 5.5355401039123535, "learning_rate": 3.4137563046877066e-06, "loss": 0.9371, "step": 4502 }, { "epoch": 0.6148279628618242, "grad_norm": 6.382054805755615, "learning_rate": 3.4116595388328827e-06, "loss": 0.8514, "step": 4503 }, { "epoch": 0.6149645002730748, "grad_norm": 5.093729019165039, "learning_rate": 3.409563083603793e-06, "loss": 0.8162, "step": 4504 }, { "epoch": 0.6151010376843256, "grad_norm": 10.19255542755127, "learning_rate": 3.4074669394104332e-06, "loss": 1.0166, "step": 4505 }, { "epoch": 0.6152375750955762, "grad_norm": 6.139233589172363, "learning_rate": 3.405371106662736e-06, "loss": 0.8414, "step": 4506 }, { "epoch": 0.6153741125068268, "grad_norm": 6.353785037994385, "learning_rate": 3.4032755857705797e-06, "loss": 0.7835, "step": 4507 }, { "epoch": 0.6155106499180776, "grad_norm": 17.61574363708496, "learning_rate": 3.401180377143774e-06, "loss": 1.0995, "step": 4508 }, { "epoch": 0.6156471873293282, "grad_norm": 5.219788551330566, "learning_rate": 3.399085481192073e-06, "loss": 0.9089, "step": 4509 }, { "epoch": 0.615783724740579, "grad_norm": 5.346512317657471, "learning_rate": 3.3969908983251667e-06, "loss": 0.9718, "step": 4510 }, { "epoch": 0.6159202621518296, "grad_norm": 8.719682693481445, "learning_rate": 3.394896628952683e-06, "loss": 0.9426, "step": 4511 }, { "epoch": 0.6160567995630802, "grad_norm": 6.230128765106201, "learning_rate": 3.3928026734841935e-06, "loss": 0.9355, "step": 4512 }, { "epoch": 0.616193336974331, "grad_norm": 5.1659345626831055, "learning_rate": 3.390709032329201e-06, "loss": 0.9997, "step": 4513 }, { "epoch": 0.6163298743855816, "grad_norm": 8.261083602905273, "learning_rate": 3.388615705897157e-06, "loss": 0.9058, "step": 4514 }, { "epoch": 0.6164664117968324, "grad_norm": 5.518463611602783, "learning_rate": 3.38652269459744e-06, "loss": 1.0565, "step": 4515 }, { "epoch": 0.616602949208083, "grad_norm": 8.655237197875977, "learning_rate": 3.3844299988393757e-06, "loss": 0.9824, "step": 4516 }, { "epoch": 0.6167394866193336, "grad_norm": 5.309066295623779, "learning_rate": 3.382337619032221e-06, "loss": 0.8995, "step": 4517 }, { "epoch": 0.6168760240305844, "grad_norm": 7.606470108032227, "learning_rate": 3.3802455555851787e-06, "loss": 1.0028, "step": 4518 }, { "epoch": 0.617012561441835, "grad_norm": 7.1098952293396, "learning_rate": 3.378153808907384e-06, "loss": 0.8833, "step": 4519 }, { "epoch": 0.6171490988530858, "grad_norm": 6.075589656829834, "learning_rate": 3.3760623794079105e-06, "loss": 1.0437, "step": 4520 }, { "epoch": 0.6172856362643364, "grad_norm": 5.699235916137695, "learning_rate": 3.373971267495774e-06, "loss": 0.9907, "step": 4521 }, { "epoch": 0.6174221736755872, "grad_norm": 9.373231887817383, "learning_rate": 3.3718804735799233e-06, "loss": 0.9652, "step": 4522 }, { "epoch": 0.6175587110868378, "grad_norm": 7.889226913452148, "learning_rate": 3.369789998069248e-06, "loss": 1.0113, "step": 4523 }, { "epoch": 0.6176952484980884, "grad_norm": 4.857365131378174, "learning_rate": 3.3676998413725726e-06, "loss": 0.8647, "step": 4524 }, { "epoch": 0.6178317859093392, "grad_norm": 7.70547342300415, "learning_rate": 3.365610003898665e-06, "loss": 1.0388, "step": 4525 }, { "epoch": 0.6179683233205898, "grad_norm": 7.295742034912109, "learning_rate": 3.363520486056222e-06, "loss": 0.9645, "step": 4526 }, { "epoch": 0.6181048607318406, "grad_norm": 5.9853105545043945, "learning_rate": 3.361431288253887e-06, "loss": 1.0491, "step": 4527 }, { "epoch": 0.6182413981430912, "grad_norm": 7.189521312713623, "learning_rate": 3.359342410900234e-06, "loss": 0.8914, "step": 4528 }, { "epoch": 0.6183779355543418, "grad_norm": 5.709507465362549, "learning_rate": 3.357253854403778e-06, "loss": 0.8806, "step": 4529 }, { "epoch": 0.6185144729655926, "grad_norm": 6.608227729797363, "learning_rate": 3.3551656191729696e-06, "loss": 0.8953, "step": 4530 }, { "epoch": 0.6186510103768432, "grad_norm": 8.40975570678711, "learning_rate": 3.3530777056161958e-06, "loss": 0.9534, "step": 4531 }, { "epoch": 0.618787547788094, "grad_norm": 6.2403411865234375, "learning_rate": 3.3509901141417845e-06, "loss": 1.1314, "step": 4532 }, { "epoch": 0.6189240851993446, "grad_norm": 10.263176918029785, "learning_rate": 3.3489028451579952e-06, "loss": 0.9669, "step": 4533 }, { "epoch": 0.6190606226105954, "grad_norm": 6.483567714691162, "learning_rate": 3.3468158990730304e-06, "loss": 0.9427, "step": 4534 }, { "epoch": 0.619197160021846, "grad_norm": 13.686463356018066, "learning_rate": 3.344729276295024e-06, "loss": 0.947, "step": 4535 }, { "epoch": 0.6193336974330966, "grad_norm": 5.868316650390625, "learning_rate": 3.34264297723205e-06, "loss": 1.0377, "step": 4536 }, { "epoch": 0.6194702348443474, "grad_norm": 8.625507354736328, "learning_rate": 3.340557002292116e-06, "loss": 0.9152, "step": 4537 }, { "epoch": 0.619606772255598, "grad_norm": 6.603318691253662, "learning_rate": 3.3384713518831723e-06, "loss": 0.9275, "step": 4538 }, { "epoch": 0.6197433096668488, "grad_norm": 5.652337074279785, "learning_rate": 3.3363860264130987e-06, "loss": 0.9788, "step": 4539 }, { "epoch": 0.6198798470780994, "grad_norm": 6.607895374298096, "learning_rate": 3.3343010262897125e-06, "loss": 0.855, "step": 4540 }, { "epoch": 0.62001638448935, "grad_norm": 7.982262134552002, "learning_rate": 3.332216351920774e-06, "loss": 0.8981, "step": 4541 }, { "epoch": 0.6201529219006008, "grad_norm": 6.785465717315674, "learning_rate": 3.330132003713971e-06, "loss": 1.1368, "step": 4542 }, { "epoch": 0.6202894593118514, "grad_norm": 5.202209949493408, "learning_rate": 3.328047982076935e-06, "loss": 0.8548, "step": 4543 }, { "epoch": 0.6204259967231022, "grad_norm": 5.950862884521484, "learning_rate": 3.3259642874172266e-06, "loss": 0.9586, "step": 4544 }, { "epoch": 0.6205625341343528, "grad_norm": 5.368066310882568, "learning_rate": 3.3238809201423493e-06, "loss": 0.9351, "step": 4545 }, { "epoch": 0.6206990715456034, "grad_norm": 6.385456085205078, "learning_rate": 3.3217978806597375e-06, "loss": 0.9125, "step": 4546 }, { "epoch": 0.6208356089568542, "grad_norm": 7.981788635253906, "learning_rate": 3.319715169376764e-06, "loss": 0.8398, "step": 4547 }, { "epoch": 0.6209721463681048, "grad_norm": 6.427757263183594, "learning_rate": 3.3176327867007376e-06, "loss": 0.9897, "step": 4548 }, { "epoch": 0.6211086837793556, "grad_norm": 5.283452033996582, "learning_rate": 3.3155507330389004e-06, "loss": 0.8767, "step": 4549 }, { "epoch": 0.6212452211906062, "grad_norm": 8.619776725769043, "learning_rate": 3.3134690087984335e-06, "loss": 0.8805, "step": 4550 }, { "epoch": 0.621381758601857, "grad_norm": 6.317007541656494, "learning_rate": 3.3113876143864485e-06, "loss": 0.8743, "step": 4551 }, { "epoch": 0.6215182960131076, "grad_norm": 4.657472610473633, "learning_rate": 3.30930655021e-06, "loss": 0.9569, "step": 4552 }, { "epoch": 0.6216548334243582, "grad_norm": 5.4181647300720215, "learning_rate": 3.3072258166760694e-06, "loss": 0.8512, "step": 4553 }, { "epoch": 0.621791370835609, "grad_norm": 5.8463969230651855, "learning_rate": 3.3051454141915833e-06, "loss": 0.802, "step": 4554 }, { "epoch": 0.6219279082468596, "grad_norm": 5.548891067504883, "learning_rate": 3.3030653431633943e-06, "loss": 0.8839, "step": 4555 }, { "epoch": 0.6220644456581104, "grad_norm": 6.010339260101318, "learning_rate": 3.300985603998296e-06, "loss": 0.8394, "step": 4556 }, { "epoch": 0.622200983069361, "grad_norm": 8.785501480102539, "learning_rate": 3.2989061971030113e-06, "loss": 0.9518, "step": 4557 }, { "epoch": 0.6223375204806116, "grad_norm": 5.292012691497803, "learning_rate": 3.2968271228842074e-06, "loss": 0.9785, "step": 4558 }, { "epoch": 0.6224740578918624, "grad_norm": 6.1288323402404785, "learning_rate": 3.2947483817484773e-06, "loss": 1.0313, "step": 4559 }, { "epoch": 0.622610595303113, "grad_norm": 5.588454246520996, "learning_rate": 3.2926699741023525e-06, "loss": 0.8702, "step": 4560 }, { "epoch": 0.6227471327143638, "grad_norm": 5.958776950836182, "learning_rate": 3.2905919003523028e-06, "loss": 1.012, "step": 4561 }, { "epoch": 0.6228836701256144, "grad_norm": 6.335391998291016, "learning_rate": 3.2885141609047257e-06, "loss": 1.0, "step": 4562 }, { "epoch": 0.6230202075368652, "grad_norm": 5.53788423538208, "learning_rate": 3.2864367561659582e-06, "loss": 0.9937, "step": 4563 }, { "epoch": 0.6231567449481158, "grad_norm": 5.12812614440918, "learning_rate": 3.2843596865422687e-06, "loss": 0.9128, "step": 4564 }, { "epoch": 0.6232932823593664, "grad_norm": 5.729053974151611, "learning_rate": 3.2822829524398645e-06, "loss": 0.7785, "step": 4565 }, { "epoch": 0.6234298197706172, "grad_norm": 10.465906143188477, "learning_rate": 3.2802065542648835e-06, "loss": 1.0902, "step": 4566 }, { "epoch": 0.6235663571818678, "grad_norm": 7.322951793670654, "learning_rate": 3.2781304924234005e-06, "loss": 0.8959, "step": 4567 }, { "epoch": 0.6237028945931186, "grad_norm": 5.385303497314453, "learning_rate": 3.2760547673214183e-06, "loss": 0.885, "step": 4568 }, { "epoch": 0.6238394320043692, "grad_norm": 5.225915431976318, "learning_rate": 3.2739793793648846e-06, "loss": 0.9259, "step": 4569 }, { "epoch": 0.6239759694156198, "grad_norm": 5.812171936035156, "learning_rate": 3.2719043289596727e-06, "loss": 0.7686, "step": 4570 }, { "epoch": 0.6241125068268706, "grad_norm": 6.325955390930176, "learning_rate": 3.2698296165115896e-06, "loss": 1.096, "step": 4571 }, { "epoch": 0.6242490442381212, "grad_norm": 11.738037109375, "learning_rate": 3.2677552424263836e-06, "loss": 0.931, "step": 4572 }, { "epoch": 0.624385581649372, "grad_norm": 6.633430004119873, "learning_rate": 3.265681207109729e-06, "loss": 0.9339, "step": 4573 }, { "epoch": 0.6245221190606226, "grad_norm": 5.004945755004883, "learning_rate": 3.2636075109672395e-06, "loss": 0.9085, "step": 4574 }, { "epoch": 0.6246586564718732, "grad_norm": 6.239785671234131, "learning_rate": 3.261534154404456e-06, "loss": 0.9905, "step": 4575 }, { "epoch": 0.624795193883124, "grad_norm": 7.093977928161621, "learning_rate": 3.259461137826862e-06, "loss": 0.9951, "step": 4576 }, { "epoch": 0.6249317312943746, "grad_norm": 6.127622127532959, "learning_rate": 3.2573884616398655e-06, "loss": 0.9342, "step": 4577 }, { "epoch": 0.6250682687056254, "grad_norm": 6.76707649230957, "learning_rate": 3.255316126248815e-06, "loss": 0.865, "step": 4578 }, { "epoch": 0.625204806116876, "grad_norm": 6.966607093811035, "learning_rate": 3.253244132058987e-06, "loss": 1.0636, "step": 4579 }, { "epoch": 0.6253413435281268, "grad_norm": 6.374328136444092, "learning_rate": 3.251172479475595e-06, "loss": 0.9917, "step": 4580 }, { "epoch": 0.6254778809393774, "grad_norm": 8.240182876586914, "learning_rate": 3.2491011689037845e-06, "loss": 1.0729, "step": 4581 }, { "epoch": 0.625614418350628, "grad_norm": 7.258090972900391, "learning_rate": 3.247030200748631e-06, "loss": 0.8121, "step": 4582 }, { "epoch": 0.6257509557618788, "grad_norm": 7.695972442626953, "learning_rate": 3.2449595754151498e-06, "loss": 0.8737, "step": 4583 }, { "epoch": 0.6258874931731294, "grad_norm": 6.037177562713623, "learning_rate": 3.2428892933082805e-06, "loss": 0.8597, "step": 4584 }, { "epoch": 0.6260240305843802, "grad_norm": 5.633379936218262, "learning_rate": 3.2408193548329066e-06, "loss": 0.9706, "step": 4585 }, { "epoch": 0.6261605679956308, "grad_norm": 7.582866191864014, "learning_rate": 3.2387497603938327e-06, "loss": 0.9642, "step": 4586 }, { "epoch": 0.6262971054068814, "grad_norm": 7.839961051940918, "learning_rate": 3.2366805103958055e-06, "loss": 0.9445, "step": 4587 }, { "epoch": 0.6264336428181322, "grad_norm": 8.917119979858398, "learning_rate": 3.234611605243496e-06, "loss": 0.961, "step": 4588 }, { "epoch": 0.6265701802293828, "grad_norm": 13.572163581848145, "learning_rate": 3.232543045341517e-06, "loss": 0.9196, "step": 4589 }, { "epoch": 0.6267067176406336, "grad_norm": 6.889333724975586, "learning_rate": 3.2304748310944066e-06, "loss": 1.0494, "step": 4590 }, { "epoch": 0.6268432550518842, "grad_norm": 6.724116802215576, "learning_rate": 3.228406962906635e-06, "loss": 0.9144, "step": 4591 }, { "epoch": 0.6269797924631348, "grad_norm": 6.349323749542236, "learning_rate": 3.2263394411826133e-06, "loss": 1.1579, "step": 4592 }, { "epoch": 0.6271163298743856, "grad_norm": 8.109363555908203, "learning_rate": 3.2242722663266733e-06, "loss": 1.0156, "step": 4593 }, { "epoch": 0.6272528672856362, "grad_norm": 7.4195780754089355, "learning_rate": 3.222205438743089e-06, "loss": 1.0431, "step": 4594 }, { "epoch": 0.627389404696887, "grad_norm": 7.495914936065674, "learning_rate": 3.220138958836057e-06, "loss": 0.9028, "step": 4595 }, { "epoch": 0.6275259421081376, "grad_norm": 6.001664161682129, "learning_rate": 3.2180728270097163e-06, "loss": 0.9427, "step": 4596 }, { "epoch": 0.6276624795193884, "grad_norm": 5.808532238006592, "learning_rate": 3.2160070436681278e-06, "loss": 0.9454, "step": 4597 }, { "epoch": 0.627799016930639, "grad_norm": 7.091921329498291, "learning_rate": 3.2139416092152937e-06, "loss": 0.8211, "step": 4598 }, { "epoch": 0.6279355543418896, "grad_norm": 5.475137710571289, "learning_rate": 3.21187652405514e-06, "loss": 0.9145, "step": 4599 }, { "epoch": 0.6280720917531404, "grad_norm": 8.966647148132324, "learning_rate": 3.209811788591528e-06, "loss": 1.1895, "step": 4600 }, { "epoch": 0.628208629164391, "grad_norm": 7.719625473022461, "learning_rate": 3.207747403228251e-06, "loss": 0.9832, "step": 4601 }, { "epoch": 0.6283451665756418, "grad_norm": 6.78692102432251, "learning_rate": 3.205683368369031e-06, "loss": 1.0323, "step": 4602 }, { "epoch": 0.6284817039868924, "grad_norm": 6.946160316467285, "learning_rate": 3.2036196844175266e-06, "loss": 0.8346, "step": 4603 }, { "epoch": 0.628618241398143, "grad_norm": 6.724095821380615, "learning_rate": 3.2015563517773214e-06, "loss": 0.918, "step": 4604 }, { "epoch": 0.6287547788093938, "grad_norm": 5.599284648895264, "learning_rate": 3.199493370851937e-06, "loss": 0.9943, "step": 4605 }, { "epoch": 0.6288913162206444, "grad_norm": 5.3876214027404785, "learning_rate": 3.1974307420448203e-06, "loss": 0.8834, "step": 4606 }, { "epoch": 0.6290278536318952, "grad_norm": 6.04674768447876, "learning_rate": 3.195368465759353e-06, "loss": 0.8058, "step": 4607 }, { "epoch": 0.6291643910431458, "grad_norm": 6.1451826095581055, "learning_rate": 3.193306542398844e-06, "loss": 0.9683, "step": 4608 }, { "epoch": 0.6293009284543966, "grad_norm": 6.65691614151001, "learning_rate": 3.1912449723665405e-06, "loss": 0.9666, "step": 4609 }, { "epoch": 0.6294374658656472, "grad_norm": 7.108993053436279, "learning_rate": 3.1891837560656135e-06, "loss": 0.8817, "step": 4610 }, { "epoch": 0.6295740032768978, "grad_norm": 6.528343200683594, "learning_rate": 3.187122893899165e-06, "loss": 0.8258, "step": 4611 }, { "epoch": 0.6297105406881486, "grad_norm": 5.818915843963623, "learning_rate": 3.1850623862702344e-06, "loss": 1.0, "step": 4612 }, { "epoch": 0.6298470780993992, "grad_norm": 4.581451892852783, "learning_rate": 3.1830022335817844e-06, "loss": 0.941, "step": 4613 }, { "epoch": 0.62998361551065, "grad_norm": 6.402774333953857, "learning_rate": 3.1809424362367136e-06, "loss": 1.045, "step": 4614 }, { "epoch": 0.6301201529219006, "grad_norm": 7.184240341186523, "learning_rate": 3.1788829946378443e-06, "loss": 1.0049, "step": 4615 }, { "epoch": 0.6302566903331512, "grad_norm": 18.79808235168457, "learning_rate": 3.1768239091879393e-06, "loss": 0.9856, "step": 4616 }, { "epoch": 0.630393227744402, "grad_norm": 5.890898704528809, "learning_rate": 3.174765180289681e-06, "loss": 0.8564, "step": 4617 }, { "epoch": 0.6305297651556526, "grad_norm": 5.995928764343262, "learning_rate": 3.1727068083456926e-06, "loss": 0.8055, "step": 4618 }, { "epoch": 0.6306663025669034, "grad_norm": 8.385224342346191, "learning_rate": 3.170648793758517e-06, "loss": 1.0224, "step": 4619 }, { "epoch": 0.630802839978154, "grad_norm": 10.218295097351074, "learning_rate": 3.1685911369306364e-06, "loss": 0.9306, "step": 4620 }, { "epoch": 0.6309393773894046, "grad_norm": 6.788443088531494, "learning_rate": 3.1665338382644575e-06, "loss": 1.0088, "step": 4621 }, { "epoch": 0.6310759148006554, "grad_norm": 7.561661720275879, "learning_rate": 3.1644768981623154e-06, "loss": 0.8494, "step": 4622 }, { "epoch": 0.631212452211906, "grad_norm": 5.486880302429199, "learning_rate": 3.1624203170264832e-06, "loss": 0.8593, "step": 4623 }, { "epoch": 0.6313489896231568, "grad_norm": 5.5033135414123535, "learning_rate": 3.160364095259154e-06, "loss": 0.9736, "step": 4624 }, { "epoch": 0.6314855270344074, "grad_norm": 7.092715740203857, "learning_rate": 3.15830823326246e-06, "loss": 0.926, "step": 4625 }, { "epoch": 0.6316220644456582, "grad_norm": 5.634692668914795, "learning_rate": 3.156252731438454e-06, "loss": 0.8816, "step": 4626 }, { "epoch": 0.6317586018569088, "grad_norm": 5.474454879760742, "learning_rate": 3.1541975901891265e-06, "loss": 1.0348, "step": 4627 }, { "epoch": 0.6318951392681594, "grad_norm": 9.233160018920898, "learning_rate": 3.1521428099163897e-06, "loss": 0.8389, "step": 4628 }, { "epoch": 0.6320316766794102, "grad_norm": 7.100677967071533, "learning_rate": 3.1500883910220932e-06, "loss": 0.9162, "step": 4629 }, { "epoch": 0.6321682140906608, "grad_norm": 6.87846040725708, "learning_rate": 3.14803433390801e-06, "loss": 0.9248, "step": 4630 }, { "epoch": 0.6323047515019116, "grad_norm": 7.262454032897949, "learning_rate": 3.145980638975843e-06, "loss": 1.0556, "step": 4631 }, { "epoch": 0.6324412889131622, "grad_norm": 5.656414031982422, "learning_rate": 3.1439273066272273e-06, "loss": 0.9498, "step": 4632 }, { "epoch": 0.6325778263244128, "grad_norm": 5.506000995635986, "learning_rate": 3.141874337263725e-06, "loss": 0.9261, "step": 4633 }, { "epoch": 0.6327143637356636, "grad_norm": 6.0308451652526855, "learning_rate": 3.1398217312868284e-06, "loss": 0.9425, "step": 4634 }, { "epoch": 0.6328509011469142, "grad_norm": 6.3605732917785645, "learning_rate": 3.1377694890979547e-06, "loss": 1.0478, "step": 4635 }, { "epoch": 0.632987438558165, "grad_norm": 5.899538993835449, "learning_rate": 3.1357176110984578e-06, "loss": 1.0106, "step": 4636 }, { "epoch": 0.6331239759694156, "grad_norm": 7.441431045532227, "learning_rate": 3.1336660976896118e-06, "loss": 1.0076, "step": 4637 }, { "epoch": 0.6332605133806664, "grad_norm": 6.5998711585998535, "learning_rate": 3.1316149492726266e-06, "loss": 0.9267, "step": 4638 }, { "epoch": 0.633397050791917, "grad_norm": 7.4612298011779785, "learning_rate": 3.1295641662486333e-06, "loss": 0.9934, "step": 4639 }, { "epoch": 0.6335335882031676, "grad_norm": 7.321218490600586, "learning_rate": 3.1275137490187003e-06, "loss": 0.9264, "step": 4640 }, { "epoch": 0.6336701256144184, "grad_norm": 5.958755970001221, "learning_rate": 3.125463697983818e-06, "loss": 0.863, "step": 4641 }, { "epoch": 0.633806663025669, "grad_norm": 6.9423828125, "learning_rate": 3.1234140135449055e-06, "loss": 1.0348, "step": 4642 }, { "epoch": 0.6339432004369198, "grad_norm": 13.76129150390625, "learning_rate": 3.1213646961028155e-06, "loss": 0.8999, "step": 4643 }, { "epoch": 0.6340797378481704, "grad_norm": 5.286762237548828, "learning_rate": 3.1193157460583217e-06, "loss": 0.8842, "step": 4644 }, { "epoch": 0.634216275259421, "grad_norm": 6.7931413650512695, "learning_rate": 3.117267163812132e-06, "loss": 0.9484, "step": 4645 }, { "epoch": 0.6343528126706718, "grad_norm": 4.668332099914551, "learning_rate": 3.115218949764877e-06, "loss": 0.9098, "step": 4646 }, { "epoch": 0.6344893500819224, "grad_norm": 5.645944595336914, "learning_rate": 3.113171104317122e-06, "loss": 1.0484, "step": 4647 }, { "epoch": 0.6346258874931732, "grad_norm": 34.07417678833008, "learning_rate": 3.111123627869353e-06, "loss": 1.0951, "step": 4648 }, { "epoch": 0.6347624249044238, "grad_norm": 8.415307998657227, "learning_rate": 3.1090765208219895e-06, "loss": 1.003, "step": 4649 }, { "epoch": 0.6348989623156744, "grad_norm": 6.636249542236328, "learning_rate": 3.107029783575377e-06, "loss": 0.8869, "step": 4650 }, { "epoch": 0.6350354997269252, "grad_norm": 7.462479591369629, "learning_rate": 3.1049834165297847e-06, "loss": 1.0751, "step": 4651 }, { "epoch": 0.6351720371381758, "grad_norm": 24.86771583557129, "learning_rate": 3.1029374200854167e-06, "loss": 0.9123, "step": 4652 }, { "epoch": 0.6353085745494266, "grad_norm": 7.137205600738525, "learning_rate": 3.1008917946423965e-06, "loss": 0.9249, "step": 4653 }, { "epoch": 0.6354451119606772, "grad_norm": 11.840239524841309, "learning_rate": 3.098846540600784e-06, "loss": 1.0536, "step": 4654 }, { "epoch": 0.635581649371928, "grad_norm": 7.856432914733887, "learning_rate": 3.0968016583605574e-06, "loss": 0.9772, "step": 4655 }, { "epoch": 0.6357181867831786, "grad_norm": 9.050811767578125, "learning_rate": 3.094757148321631e-06, "loss": 0.8737, "step": 4656 }, { "epoch": 0.6358547241944292, "grad_norm": 6.023827075958252, "learning_rate": 3.0927130108838387e-06, "loss": 1.0118, "step": 4657 }, { "epoch": 0.63599126160568, "grad_norm": 5.045204162597656, "learning_rate": 3.0906692464469475e-06, "loss": 0.9622, "step": 4658 }, { "epoch": 0.6361277990169306, "grad_norm": 6.696101188659668, "learning_rate": 3.088625855410644e-06, "loss": 0.9559, "step": 4659 }, { "epoch": 0.6362643364281814, "grad_norm": 7.2188639640808105, "learning_rate": 3.0865828381745515e-06, "loss": 0.9809, "step": 4660 }, { "epoch": 0.636400873839432, "grad_norm": 5.430547714233398, "learning_rate": 3.084540195138214e-06, "loss": 1.0605, "step": 4661 }, { "epoch": 0.6365374112506826, "grad_norm": 6.051831245422363, "learning_rate": 3.082497926701099e-06, "loss": 0.9881, "step": 4662 }, { "epoch": 0.6366739486619334, "grad_norm": 7.477881908416748, "learning_rate": 3.0804560332626116e-06, "loss": 0.7905, "step": 4663 }, { "epoch": 0.636810486073184, "grad_norm": 7.836913108825684, "learning_rate": 3.078414515222073e-06, "loss": 0.8062, "step": 4664 }, { "epoch": 0.6369470234844348, "grad_norm": 5.8339667320251465, "learning_rate": 3.0763733729787375e-06, "loss": 0.9566, "step": 4665 }, { "epoch": 0.6370835608956854, "grad_norm": 6.1981425285339355, "learning_rate": 3.07433260693178e-06, "loss": 0.9164, "step": 4666 }, { "epoch": 0.6372200983069362, "grad_norm": 6.149043560028076, "learning_rate": 3.0722922174803103e-06, "loss": 0.9871, "step": 4667 }, { "epoch": 0.6373566357181868, "grad_norm": 6.745606422424316, "learning_rate": 3.070252205023356e-06, "loss": 0.9266, "step": 4668 }, { "epoch": 0.6374931731294374, "grad_norm": 6.331937789916992, "learning_rate": 3.0682125699598775e-06, "loss": 0.8448, "step": 4669 }, { "epoch": 0.6376297105406882, "grad_norm": 6.536730766296387, "learning_rate": 3.066173312688755e-06, "loss": 1.015, "step": 4670 }, { "epoch": 0.6377662479519388, "grad_norm": 5.97411584854126, "learning_rate": 3.064134433608802e-06, "loss": 0.8639, "step": 4671 }, { "epoch": 0.6379027853631896, "grad_norm": 5.2847137451171875, "learning_rate": 3.062095933118752e-06, "loss": 0.9996, "step": 4672 }, { "epoch": 0.6380393227744402, "grad_norm": 5.596935749053955, "learning_rate": 3.0600578116172665e-06, "loss": 0.9344, "step": 4673 }, { "epoch": 0.6381758601856908, "grad_norm": 5.738983631134033, "learning_rate": 3.0580200695029348e-06, "loss": 0.9547, "step": 4674 }, { "epoch": 0.6383123975969416, "grad_norm": 8.327960014343262, "learning_rate": 3.0559827071742682e-06, "loss": 0.9894, "step": 4675 }, { "epoch": 0.6384489350081922, "grad_norm": 6.867223739624023, "learning_rate": 3.0539457250297095e-06, "loss": 0.9622, "step": 4676 }, { "epoch": 0.638585472419443, "grad_norm": 5.524543285369873, "learning_rate": 3.0519091234676207e-06, "loss": 1.0411, "step": 4677 }, { "epoch": 0.6387220098306936, "grad_norm": 6.501236915588379, "learning_rate": 3.0498729028862937e-06, "loss": 0.9713, "step": 4678 }, { "epoch": 0.6388585472419442, "grad_norm": 7.221127986907959, "learning_rate": 3.047837063683942e-06, "loss": 1.0273, "step": 4679 }, { "epoch": 0.638995084653195, "grad_norm": 6.803359031677246, "learning_rate": 3.0458016062587116e-06, "loss": 1.0087, "step": 4680 }, { "epoch": 0.6391316220644456, "grad_norm": 6.446500301361084, "learning_rate": 3.043766531008665e-06, "loss": 0.8672, "step": 4681 }, { "epoch": 0.6392681594756964, "grad_norm": 5.354035377502441, "learning_rate": 3.041731838331795e-06, "loss": 0.9054, "step": 4682 }, { "epoch": 0.639404696886947, "grad_norm": 4.741690158843994, "learning_rate": 3.0396975286260207e-06, "loss": 0.949, "step": 4683 }, { "epoch": 0.6395412342981978, "grad_norm": 6.32820463180542, "learning_rate": 3.0376636022891813e-06, "loss": 0.886, "step": 4684 }, { "epoch": 0.6396777717094484, "grad_norm": 5.467116832733154, "learning_rate": 3.035630059719048e-06, "loss": 1.0056, "step": 4685 }, { "epoch": 0.639814309120699, "grad_norm": 7.839003562927246, "learning_rate": 3.0335969013133083e-06, "loss": 1.0135, "step": 4686 }, { "epoch": 0.6399508465319498, "grad_norm": 6.6141557693481445, "learning_rate": 3.031564127469584e-06, "loss": 0.9223, "step": 4687 }, { "epoch": 0.6400873839432004, "grad_norm": 11.74671459197998, "learning_rate": 3.0295317385854134e-06, "loss": 0.9962, "step": 4688 }, { "epoch": 0.6402239213544512, "grad_norm": 5.917082786560059, "learning_rate": 3.0274997350582647e-06, "loss": 0.879, "step": 4689 }, { "epoch": 0.6403604587657018, "grad_norm": 6.134444236755371, "learning_rate": 3.0254681172855292e-06, "loss": 1.1594, "step": 4690 }, { "epoch": 0.6404969961769524, "grad_norm": 7.112154006958008, "learning_rate": 3.023436885664523e-06, "loss": 0.8437, "step": 4691 }, { "epoch": 0.6406335335882032, "grad_norm": 5.431486129760742, "learning_rate": 3.0214060405924863e-06, "loss": 0.9224, "step": 4692 }, { "epoch": 0.6407700709994538, "grad_norm": 8.719795227050781, "learning_rate": 3.0193755824665805e-06, "loss": 0.9413, "step": 4693 }, { "epoch": 0.6409066084107046, "grad_norm": 5.121359348297119, "learning_rate": 3.0173455116839002e-06, "loss": 0.927, "step": 4694 }, { "epoch": 0.6410431458219552, "grad_norm": 6.080813884735107, "learning_rate": 3.0153158286414542e-06, "loss": 0.9759, "step": 4695 }, { "epoch": 0.6411796832332058, "grad_norm": 5.223971366882324, "learning_rate": 3.013286533736183e-06, "loss": 0.9387, "step": 4696 }, { "epoch": 0.6413162206444566, "grad_norm": 5.004194259643555, "learning_rate": 3.011257627364945e-06, "loss": 0.9113, "step": 4697 }, { "epoch": 0.6414527580557072, "grad_norm": 5.959400653839111, "learning_rate": 3.0092291099245292e-06, "loss": 0.9928, "step": 4698 }, { "epoch": 0.641589295466958, "grad_norm": 8.267775535583496, "learning_rate": 3.007200981811641e-06, "loss": 1.0212, "step": 4699 }, { "epoch": 0.6417258328782086, "grad_norm": 6.067785739898682, "learning_rate": 3.0051732434229185e-06, "loss": 0.9603, "step": 4700 }, { "epoch": 0.6418623702894594, "grad_norm": 7.663427352905273, "learning_rate": 3.0031458951549162e-06, "loss": 1.0083, "step": 4701 }, { "epoch": 0.64199890770071, "grad_norm": 8.414738655090332, "learning_rate": 3.001118937404115e-06, "loss": 0.9395, "step": 4702 }, { "epoch": 0.6421354451119606, "grad_norm": 4.867571830749512, "learning_rate": 2.9990923705669194e-06, "loss": 0.9094, "step": 4703 }, { "epoch": 0.6422719825232114, "grad_norm": 5.941793441772461, "learning_rate": 2.9970661950396563e-06, "loss": 0.8847, "step": 4704 }, { "epoch": 0.642408519934462, "grad_norm": 5.284521102905273, "learning_rate": 2.99504041121858e-06, "loss": 0.9546, "step": 4705 }, { "epoch": 0.6425450573457128, "grad_norm": 6.947103977203369, "learning_rate": 2.993015019499862e-06, "loss": 1.0245, "step": 4706 }, { "epoch": 0.6426815947569634, "grad_norm": 5.9234619140625, "learning_rate": 2.990990020279604e-06, "loss": 0.86, "step": 4707 }, { "epoch": 0.642818132168214, "grad_norm": 23.29233169555664, "learning_rate": 2.988965413953825e-06, "loss": 0.9325, "step": 4708 }, { "epoch": 0.6429546695794648, "grad_norm": 9.259940147399902, "learning_rate": 2.9869412009184705e-06, "loss": 1.0571, "step": 4709 }, { "epoch": 0.6430912069907154, "grad_norm": 5.355065822601318, "learning_rate": 2.9849173815694067e-06, "loss": 0.8979, "step": 4710 }, { "epoch": 0.6432277444019662, "grad_norm": 5.938404083251953, "learning_rate": 2.982893956302427e-06, "loss": 0.8827, "step": 4711 }, { "epoch": 0.6433642818132168, "grad_norm": 5.964612007141113, "learning_rate": 2.9808709255132444e-06, "loss": 0.9796, "step": 4712 }, { "epoch": 0.6435008192244676, "grad_norm": 6.495572090148926, "learning_rate": 2.9788482895974923e-06, "loss": 1.0143, "step": 4713 }, { "epoch": 0.6436373566357182, "grad_norm": 5.6476006507873535, "learning_rate": 2.976826048950734e-06, "loss": 0.7885, "step": 4714 }, { "epoch": 0.6437738940469688, "grad_norm": 6.7072248458862305, "learning_rate": 2.9748042039684487e-06, "loss": 1.0246, "step": 4715 }, { "epoch": 0.6439104314582196, "grad_norm": 11.743192672729492, "learning_rate": 2.972782755046043e-06, "loss": 0.9888, "step": 4716 }, { "epoch": 0.6440469688694702, "grad_norm": 6.7654500007629395, "learning_rate": 2.9707617025788418e-06, "loss": 0.9968, "step": 4717 }, { "epoch": 0.644183506280721, "grad_norm": 5.313817501068115, "learning_rate": 2.968741046962097e-06, "loss": 0.9905, "step": 4718 }, { "epoch": 0.6443200436919716, "grad_norm": 5.857702732086182, "learning_rate": 2.966720788590977e-06, "loss": 1.1007, "step": 4719 }, { "epoch": 0.6444565811032222, "grad_norm": 6.500277042388916, "learning_rate": 2.964700927860581e-06, "loss": 0.8864, "step": 4720 }, { "epoch": 0.644593118514473, "grad_norm": 5.855020046234131, "learning_rate": 2.962681465165923e-06, "loss": 0.9969, "step": 4721 }, { "epoch": 0.6447296559257236, "grad_norm": 12.2373628616333, "learning_rate": 2.9606624009019413e-06, "loss": 1.0273, "step": 4722 }, { "epoch": 0.6448661933369744, "grad_norm": 7.490725040435791, "learning_rate": 2.958643735463497e-06, "loss": 1.0392, "step": 4723 }, { "epoch": 0.645002730748225, "grad_norm": 6.247919082641602, "learning_rate": 2.956625469245372e-06, "loss": 1.0696, "step": 4724 }, { "epoch": 0.6451392681594756, "grad_norm": 12.813472747802734, "learning_rate": 2.954607602642273e-06, "loss": 0.9832, "step": 4725 }, { "epoch": 0.6452758055707264, "grad_norm": 7.255091667175293, "learning_rate": 2.952590136048824e-06, "loss": 0.8604, "step": 4726 }, { "epoch": 0.645412342981977, "grad_norm": 4.471803665161133, "learning_rate": 2.950573069859577e-06, "loss": 0.8052, "step": 4727 }, { "epoch": 0.6455488803932278, "grad_norm": 6.122821807861328, "learning_rate": 2.9485564044689978e-06, "loss": 0.8446, "step": 4728 }, { "epoch": 0.6456854178044784, "grad_norm": 5.9916534423828125, "learning_rate": 2.9465401402714817e-06, "loss": 0.9378, "step": 4729 }, { "epoch": 0.6458219552157292, "grad_norm": 5.697536945343018, "learning_rate": 2.944524277661338e-06, "loss": 1.0293, "step": 4730 }, { "epoch": 0.6459584926269798, "grad_norm": 7.060524940490723, "learning_rate": 2.9425088170328055e-06, "loss": 1.0615, "step": 4731 }, { "epoch": 0.6460950300382304, "grad_norm": 5.3204145431518555, "learning_rate": 2.9404937587800374e-06, "loss": 1.0117, "step": 4732 }, { "epoch": 0.6462315674494812, "grad_norm": 6.876022815704346, "learning_rate": 2.9384791032971112e-06, "loss": 0.9678, "step": 4733 }, { "epoch": 0.6463681048607318, "grad_norm": 8.31568431854248, "learning_rate": 2.936464850978027e-06, "loss": 0.9013, "step": 4734 }, { "epoch": 0.6465046422719826, "grad_norm": 4.993836402893066, "learning_rate": 2.9344510022167027e-06, "loss": 0.9415, "step": 4735 }, { "epoch": 0.6466411796832332, "grad_norm": 6.294283866882324, "learning_rate": 2.9324375574069814e-06, "loss": 0.8752, "step": 4736 }, { "epoch": 0.6467777170944838, "grad_norm": 7.9740118980407715, "learning_rate": 2.930424516942621e-06, "loss": 1.2005, "step": 4737 }, { "epoch": 0.6469142545057346, "grad_norm": 9.178372383117676, "learning_rate": 2.928411881217309e-06, "loss": 0.9967, "step": 4738 }, { "epoch": 0.6470507919169852, "grad_norm": 8.428943634033203, "learning_rate": 2.926399650624644e-06, "loss": 1.0674, "step": 4739 }, { "epoch": 0.647187329328236, "grad_norm": 5.723480224609375, "learning_rate": 2.924387825558155e-06, "loss": 0.9452, "step": 4740 }, { "epoch": 0.6473238667394866, "grad_norm": 9.761069297790527, "learning_rate": 2.922376406411285e-06, "loss": 0.9436, "step": 4741 }, { "epoch": 0.6474604041507374, "grad_norm": 6.314396381378174, "learning_rate": 2.920365393577399e-06, "loss": 0.9192, "step": 4742 }, { "epoch": 0.647596941561988, "grad_norm": 5.295228004455566, "learning_rate": 2.9183547874497833e-06, "loss": 1.021, "step": 4743 }, { "epoch": 0.6477334789732386, "grad_norm": 6.900853157043457, "learning_rate": 2.9163445884216457e-06, "loss": 1.0184, "step": 4744 }, { "epoch": 0.6478700163844894, "grad_norm": 5.673403739929199, "learning_rate": 2.9143347968861123e-06, "loss": 0.9519, "step": 4745 }, { "epoch": 0.64800655379574, "grad_norm": 6.210951328277588, "learning_rate": 2.912325413236229e-06, "loss": 0.9035, "step": 4746 }, { "epoch": 0.6481430912069908, "grad_norm": 7.94413423538208, "learning_rate": 2.9103164378649673e-06, "loss": 0.9661, "step": 4747 }, { "epoch": 0.6482796286182414, "grad_norm": 7.977102279663086, "learning_rate": 2.90830787116521e-06, "loss": 1.0139, "step": 4748 }, { "epoch": 0.648416166029492, "grad_norm": 6.077450752258301, "learning_rate": 2.90629971352977e-06, "loss": 0.9085, "step": 4749 }, { "epoch": 0.6485527034407428, "grad_norm": 6.9181084632873535, "learning_rate": 2.9042919653513694e-06, "loss": 0.9867, "step": 4750 }, { "epoch": 0.6486892408519934, "grad_norm": 7.7746195793151855, "learning_rate": 2.9022846270226623e-06, "loss": 0.8871, "step": 4751 }, { "epoch": 0.6488257782632442, "grad_norm": 6.464641571044922, "learning_rate": 2.9002776989362126e-06, "loss": 0.9815, "step": 4752 }, { "epoch": 0.6489623156744948, "grad_norm": 5.7354960441589355, "learning_rate": 2.8982711814845056e-06, "loss": 0.9768, "step": 4753 }, { "epoch": 0.6490988530857454, "grad_norm": 5.761993408203125, "learning_rate": 2.896265075059953e-06, "loss": 1.0611, "step": 4754 }, { "epoch": 0.6492353904969962, "grad_norm": 5.4821696281433105, "learning_rate": 2.8942593800548767e-06, "loss": 1.1166, "step": 4755 }, { "epoch": 0.6493719279082468, "grad_norm": 6.428526401519775, "learning_rate": 2.892254096861529e-06, "loss": 1.1559, "step": 4756 }, { "epoch": 0.6495084653194976, "grad_norm": 9.594759941101074, "learning_rate": 2.8902492258720704e-06, "loss": 0.94, "step": 4757 }, { "epoch": 0.6496450027307482, "grad_norm": 5.708981990814209, "learning_rate": 2.8882447674785876e-06, "loss": 0.9328, "step": 4758 }, { "epoch": 0.649781540141999, "grad_norm": 5.79365873336792, "learning_rate": 2.886240722073084e-06, "loss": 0.8483, "step": 4759 }, { "epoch": 0.6499180775532496, "grad_norm": 6.696018695831299, "learning_rate": 2.8842370900474846e-06, "loss": 0.9149, "step": 4760 }, { "epoch": 0.6500546149645002, "grad_norm": 4.723262786865234, "learning_rate": 2.8822338717936316e-06, "loss": 1.0194, "step": 4761 }, { "epoch": 0.650191152375751, "grad_norm": 11.370518684387207, "learning_rate": 2.8802310677032852e-06, "loss": 0.8931, "step": 4762 }, { "epoch": 0.6503276897870016, "grad_norm": 9.09052562713623, "learning_rate": 2.87822867816813e-06, "loss": 0.8749, "step": 4763 }, { "epoch": 0.6504642271982524, "grad_norm": 9.411837577819824, "learning_rate": 2.8762267035797607e-06, "loss": 0.9874, "step": 4764 }, { "epoch": 0.650600764609503, "grad_norm": 6.927007675170898, "learning_rate": 2.874225144329702e-06, "loss": 1.032, "step": 4765 }, { "epoch": 0.6507373020207536, "grad_norm": 11.825226783752441, "learning_rate": 2.872224000809385e-06, "loss": 1.0022, "step": 4766 }, { "epoch": 0.6508738394320044, "grad_norm": 7.59103536605835, "learning_rate": 2.8702232734101707e-06, "loss": 0.9462, "step": 4767 }, { "epoch": 0.651010376843255, "grad_norm": 4.930685997009277, "learning_rate": 2.86822296252333e-06, "loss": 0.9192, "step": 4768 }, { "epoch": 0.6511469142545058, "grad_norm": 5.665060520172119, "learning_rate": 2.8662230685400597e-06, "loss": 0.9629, "step": 4769 }, { "epoch": 0.6512834516657564, "grad_norm": 5.819179058074951, "learning_rate": 2.8642235918514703e-06, "loss": 0.9599, "step": 4770 }, { "epoch": 0.651419989077007, "grad_norm": 9.90715503692627, "learning_rate": 2.862224532848591e-06, "loss": 0.8893, "step": 4771 }, { "epoch": 0.6515565264882578, "grad_norm": 17.7382755279541, "learning_rate": 2.8602258919223703e-06, "loss": 1.0383, "step": 4772 }, { "epoch": 0.6516930638995084, "grad_norm": 6.144195556640625, "learning_rate": 2.8582276694636734e-06, "loss": 1.1304, "step": 4773 }, { "epoch": 0.6518296013107592, "grad_norm": 5.3380656242370605, "learning_rate": 2.8562298658632884e-06, "loss": 0.9276, "step": 4774 }, { "epoch": 0.6519661387220098, "grad_norm": 10.29845905303955, "learning_rate": 2.854232481511915e-06, "loss": 0.9649, "step": 4775 }, { "epoch": 0.6521026761332606, "grad_norm": 5.275804042816162, "learning_rate": 2.852235516800178e-06, "loss": 1.023, "step": 4776 }, { "epoch": 0.6522392135445112, "grad_norm": 5.9315505027771, "learning_rate": 2.8502389721186106e-06, "loss": 0.9225, "step": 4777 }, { "epoch": 0.6523757509557618, "grad_norm": 5.325796127319336, "learning_rate": 2.848242847857676e-06, "loss": 0.9989, "step": 4778 }, { "epoch": 0.6525122883670126, "grad_norm": 33.32158279418945, "learning_rate": 2.8462471444077433e-06, "loss": 0.8626, "step": 4779 }, { "epoch": 0.6526488257782632, "grad_norm": 8.383137702941895, "learning_rate": 2.8442518621591085e-06, "loss": 0.8946, "step": 4780 }, { "epoch": 0.652785363189514, "grad_norm": 6.480694770812988, "learning_rate": 2.84225700150198e-06, "loss": 0.9464, "step": 4781 }, { "epoch": 0.6529219006007646, "grad_norm": 5.861283779144287, "learning_rate": 2.8402625628264823e-06, "loss": 0.9798, "step": 4782 }, { "epoch": 0.6530584380120152, "grad_norm": 5.862580299377441, "learning_rate": 2.8382685465226655e-06, "loss": 1.0297, "step": 4783 }, { "epoch": 0.653194975423266, "grad_norm": 8.199628829956055, "learning_rate": 2.836274952980489e-06, "loss": 0.976, "step": 4784 }, { "epoch": 0.6533315128345166, "grad_norm": 4.817512512207031, "learning_rate": 2.834281782589833e-06, "loss": 0.9685, "step": 4785 }, { "epoch": 0.6534680502457674, "grad_norm": 6.435152530670166, "learning_rate": 2.8322890357404907e-06, "loss": 0.8133, "step": 4786 }, { "epoch": 0.653604587657018, "grad_norm": 4.914725303649902, "learning_rate": 2.8302967128221816e-06, "loss": 0.8768, "step": 4787 }, { "epoch": 0.6537411250682688, "grad_norm": 5.424465179443359, "learning_rate": 2.828304814224532e-06, "loss": 0.9879, "step": 4788 }, { "epoch": 0.6538776624795194, "grad_norm": 4.92141056060791, "learning_rate": 2.8263133403370946e-06, "loss": 0.9556, "step": 4789 }, { "epoch": 0.65401419989077, "grad_norm": 5.285379409790039, "learning_rate": 2.8243222915493295e-06, "loss": 0.9753, "step": 4790 }, { "epoch": 0.6541507373020208, "grad_norm": 6.792632579803467, "learning_rate": 2.822331668250624e-06, "loss": 1.0261, "step": 4791 }, { "epoch": 0.6542872747132714, "grad_norm": 6.205959320068359, "learning_rate": 2.8203414708302733e-06, "loss": 1.0162, "step": 4792 }, { "epoch": 0.6544238121245222, "grad_norm": 7.4750237464904785, "learning_rate": 2.818351699677492e-06, "loss": 0.8665, "step": 4793 }, { "epoch": 0.6545603495357728, "grad_norm": 5.752437114715576, "learning_rate": 2.816362355181415e-06, "loss": 0.8755, "step": 4794 }, { "epoch": 0.6546968869470234, "grad_norm": 4.604861736297607, "learning_rate": 2.8143734377310896e-06, "loss": 1.071, "step": 4795 }, { "epoch": 0.6548334243582742, "grad_norm": 5.141453266143799, "learning_rate": 2.8123849477154808e-06, "loss": 0.9421, "step": 4796 }, { "epoch": 0.6549699617695248, "grad_norm": 5.3008880615234375, "learning_rate": 2.8103968855234675e-06, "loss": 0.8408, "step": 4797 }, { "epoch": 0.6551064991807756, "grad_norm": 7.072658061981201, "learning_rate": 2.808409251543852e-06, "loss": 0.9442, "step": 4798 }, { "epoch": 0.6552430365920262, "grad_norm": 6.9900031089782715, "learning_rate": 2.8064220461653443e-06, "loss": 1.1257, "step": 4799 }, { "epoch": 0.6553795740032768, "grad_norm": 5.714847564697266, "learning_rate": 2.8044352697765786e-06, "loss": 1.1499, "step": 4800 }, { "epoch": 0.6555161114145276, "grad_norm": 7.653200149536133, "learning_rate": 2.8024489227660967e-06, "loss": 1.1847, "step": 4801 }, { "epoch": 0.6556526488257782, "grad_norm": 5.390381813049316, "learning_rate": 2.8004630055223666e-06, "loss": 0.8385, "step": 4802 }, { "epoch": 0.655789186237029, "grad_norm": 4.958990573883057, "learning_rate": 2.798477518433763e-06, "loss": 0.9236, "step": 4803 }, { "epoch": 0.6559257236482796, "grad_norm": 5.953808784484863, "learning_rate": 2.796492461888578e-06, "loss": 1.1138, "step": 4804 }, { "epoch": 0.6560622610595304, "grad_norm": 6.2463603019714355, "learning_rate": 2.794507836275027e-06, "loss": 1.0943, "step": 4805 }, { "epoch": 0.656198798470781, "grad_norm": 7.887307643890381, "learning_rate": 2.7925236419812305e-06, "loss": 1.1149, "step": 4806 }, { "epoch": 0.6563353358820316, "grad_norm": 6.725759029388428, "learning_rate": 2.7905398793952355e-06, "loss": 1.0543, "step": 4807 }, { "epoch": 0.6564718732932824, "grad_norm": 6.9426798820495605, "learning_rate": 2.7885565489049948e-06, "loss": 0.9282, "step": 4808 }, { "epoch": 0.656608410704533, "grad_norm": 6.781834125518799, "learning_rate": 2.7865736508983814e-06, "loss": 1.0284, "step": 4809 }, { "epoch": 0.6567449481157838, "grad_norm": 5.5143723487854, "learning_rate": 2.7845911857631823e-06, "loss": 0.864, "step": 4810 }, { "epoch": 0.6568814855270344, "grad_norm": 5.335553169250488, "learning_rate": 2.7826091538871034e-06, "loss": 0.9822, "step": 4811 }, { "epoch": 0.657018022938285, "grad_norm": 5.658422470092773, "learning_rate": 2.7806275556577624e-06, "loss": 1.0259, "step": 4812 }, { "epoch": 0.6571545603495358, "grad_norm": 5.947118759155273, "learning_rate": 2.7786463914626894e-06, "loss": 0.928, "step": 4813 }, { "epoch": 0.6572910977607864, "grad_norm": 7.050957679748535, "learning_rate": 2.776665661689339e-06, "loss": 0.9018, "step": 4814 }, { "epoch": 0.6574276351720372, "grad_norm": 7.055345058441162, "learning_rate": 2.7746853667250695e-06, "loss": 0.922, "step": 4815 }, { "epoch": 0.6575641725832878, "grad_norm": 5.978343486785889, "learning_rate": 2.7727055069571646e-06, "loss": 0.9946, "step": 4816 }, { "epoch": 0.6577007099945386, "grad_norm": 6.677016735076904, "learning_rate": 2.7707260827728133e-06, "loss": 1.0069, "step": 4817 }, { "epoch": 0.6578372474057892, "grad_norm": 5.798194885253906, "learning_rate": 2.7687470945591288e-06, "loss": 0.974, "step": 4818 }, { "epoch": 0.6579737848170398, "grad_norm": 5.258587837219238, "learning_rate": 2.7667685427031305e-06, "loss": 0.971, "step": 4819 }, { "epoch": 0.6581103222282906, "grad_norm": 8.99306583404541, "learning_rate": 2.764790427591759e-06, "loss": 1.0738, "step": 4820 }, { "epoch": 0.6582468596395412, "grad_norm": 6.214727878570557, "learning_rate": 2.762812749611866e-06, "loss": 0.9273, "step": 4821 }, { "epoch": 0.658383397050792, "grad_norm": 5.071146011352539, "learning_rate": 2.760835509150218e-06, "loss": 0.9293, "step": 4822 }, { "epoch": 0.6585199344620426, "grad_norm": 5.916341781616211, "learning_rate": 2.758858706593497e-06, "loss": 0.9895, "step": 4823 }, { "epoch": 0.6586564718732932, "grad_norm": 7.694231986999512, "learning_rate": 2.756882342328296e-06, "loss": 0.8822, "step": 4824 }, { "epoch": 0.658793009284544, "grad_norm": 9.424131393432617, "learning_rate": 2.754906416741129e-06, "loss": 1.1219, "step": 4825 }, { "epoch": 0.6589295466957946, "grad_norm": 5.26901912689209, "learning_rate": 2.752930930218417e-06, "loss": 1.0037, "step": 4826 }, { "epoch": 0.6590660841070454, "grad_norm": 5.462457656860352, "learning_rate": 2.750955883146502e-06, "loss": 0.9621, "step": 4827 }, { "epoch": 0.659202621518296, "grad_norm": 6.1847405433654785, "learning_rate": 2.748981275911633e-06, "loss": 1.0436, "step": 4828 }, { "epoch": 0.6593391589295466, "grad_norm": 10.407098770141602, "learning_rate": 2.7470071088999806e-06, "loss": 1.0058, "step": 4829 }, { "epoch": 0.6594756963407974, "grad_norm": 5.922900199890137, "learning_rate": 2.7450333824976193e-06, "loss": 1.0192, "step": 4830 }, { "epoch": 0.659612233752048, "grad_norm": 5.622966766357422, "learning_rate": 2.7430600970905496e-06, "loss": 0.947, "step": 4831 }, { "epoch": 0.6597487711632988, "grad_norm": 24.07126808166504, "learning_rate": 2.7410872530646766e-06, "loss": 0.9637, "step": 4832 }, { "epoch": 0.6598853085745494, "grad_norm": 11.716585159301758, "learning_rate": 2.7391148508058198e-06, "loss": 0.9699, "step": 4833 }, { "epoch": 0.6600218459858002, "grad_norm": 6.198192596435547, "learning_rate": 2.7371428906997178e-06, "loss": 0.9957, "step": 4834 }, { "epoch": 0.6601583833970508, "grad_norm": 7.585477828979492, "learning_rate": 2.7351713731320185e-06, "loss": 1.0725, "step": 4835 }, { "epoch": 0.6602949208083014, "grad_norm": 4.953169822692871, "learning_rate": 2.733200298488284e-06, "loss": 0.9828, "step": 4836 }, { "epoch": 0.6604314582195522, "grad_norm": 5.929437637329102, "learning_rate": 2.731229667153987e-06, "loss": 0.9327, "step": 4837 }, { "epoch": 0.6605679956308028, "grad_norm": 6.755898475646973, "learning_rate": 2.7292594795145216e-06, "loss": 1.1558, "step": 4838 }, { "epoch": 0.6607045330420536, "grad_norm": 6.388064384460449, "learning_rate": 2.7272897359551853e-06, "loss": 1.0443, "step": 4839 }, { "epoch": 0.6608410704533042, "grad_norm": 6.081600189208984, "learning_rate": 2.7253204368611975e-06, "loss": 0.9025, "step": 4840 }, { "epoch": 0.6609776078645548, "grad_norm": 6.054614543914795, "learning_rate": 2.7233515826176822e-06, "loss": 1.0146, "step": 4841 }, { "epoch": 0.6611141452758056, "grad_norm": 5.5838212966918945, "learning_rate": 2.721383173609685e-06, "loss": 0.8364, "step": 4842 }, { "epoch": 0.6612506826870562, "grad_norm": 6.09340238571167, "learning_rate": 2.719415210222159e-06, "loss": 0.8255, "step": 4843 }, { "epoch": 0.661387220098307, "grad_norm": 5.361234188079834, "learning_rate": 2.7174476928399685e-06, "loss": 0.9195, "step": 4844 }, { "epoch": 0.6615237575095576, "grad_norm": 5.157100200653076, "learning_rate": 2.715480621847897e-06, "loss": 1.0191, "step": 4845 }, { "epoch": 0.6616602949208084, "grad_norm": 5.544739723205566, "learning_rate": 2.7135139976306344e-06, "loss": 1.1427, "step": 4846 }, { "epoch": 0.661796832332059, "grad_norm": 6.785170078277588, "learning_rate": 2.7115478205727887e-06, "loss": 1.0199, "step": 4847 }, { "epoch": 0.6619333697433096, "grad_norm": 5.935640811920166, "learning_rate": 2.7095820910588766e-06, "loss": 0.9391, "step": 4848 }, { "epoch": 0.6620699071545604, "grad_norm": 6.633614540100098, "learning_rate": 2.7076168094733284e-06, "loss": 1.0615, "step": 4849 }, { "epoch": 0.662206444565811, "grad_norm": 5.918429374694824, "learning_rate": 2.705651976200484e-06, "loss": 0.8239, "step": 4850 }, { "epoch": 0.6623429819770618, "grad_norm": 6.088216304779053, "learning_rate": 2.703687591624603e-06, "loss": 1.055, "step": 4851 }, { "epoch": 0.6624795193883124, "grad_norm": 5.436710357666016, "learning_rate": 2.701723656129851e-06, "loss": 0.9159, "step": 4852 }, { "epoch": 0.662616056799563, "grad_norm": 4.636074542999268, "learning_rate": 2.699760170100304e-06, "loss": 0.8585, "step": 4853 }, { "epoch": 0.6627525942108138, "grad_norm": 5.25913143157959, "learning_rate": 2.6977971339199594e-06, "loss": 1.0903, "step": 4854 }, { "epoch": 0.6628891316220644, "grad_norm": 6.249119758605957, "learning_rate": 2.6958345479727154e-06, "loss": 0.7561, "step": 4855 }, { "epoch": 0.6630256690333152, "grad_norm": 7.442941188812256, "learning_rate": 2.693872412642392e-06, "loss": 1.1191, "step": 4856 }, { "epoch": 0.6631622064445658, "grad_norm": 6.264682769775391, "learning_rate": 2.691910728312713e-06, "loss": 0.9722, "step": 4857 }, { "epoch": 0.6632987438558164, "grad_norm": 6.004727840423584, "learning_rate": 2.6899494953673204e-06, "loss": 0.9318, "step": 4858 }, { "epoch": 0.6634352812670672, "grad_norm": 5.304535388946533, "learning_rate": 2.6879887141897645e-06, "loss": 0.9343, "step": 4859 }, { "epoch": 0.6635718186783178, "grad_norm": 5.254663944244385, "learning_rate": 2.6860283851635067e-06, "loss": 1.0244, "step": 4860 }, { "epoch": 0.6637083560895686, "grad_norm": 7.333578586578369, "learning_rate": 2.68406850867192e-06, "loss": 0.9854, "step": 4861 }, { "epoch": 0.6638448935008192, "grad_norm": 6.484597206115723, "learning_rate": 2.6821090850982935e-06, "loss": 1.0319, "step": 4862 }, { "epoch": 0.66398143091207, "grad_norm": 5.20726203918457, "learning_rate": 2.680150114825822e-06, "loss": 1.0199, "step": 4863 }, { "epoch": 0.6641179683233206, "grad_norm": 6.1719818115234375, "learning_rate": 2.678191598237613e-06, "loss": 1.0795, "step": 4864 }, { "epoch": 0.6642545057345712, "grad_norm": 6.383916854858398, "learning_rate": 2.676233535716689e-06, "loss": 0.9163, "step": 4865 }, { "epoch": 0.664391043145822, "grad_norm": 7.0356855392456055, "learning_rate": 2.6742759276459774e-06, "loss": 0.9636, "step": 4866 }, { "epoch": 0.6645275805570726, "grad_norm": 12.356621742248535, "learning_rate": 2.672318774408324e-06, "loss": 0.8598, "step": 4867 }, { "epoch": 0.6646641179683234, "grad_norm": 5.288106441497803, "learning_rate": 2.670362076386478e-06, "loss": 0.8387, "step": 4868 }, { "epoch": 0.664800655379574, "grad_norm": 6.264438629150391, "learning_rate": 2.668405833963108e-06, "loss": 0.9905, "step": 4869 }, { "epoch": 0.6649371927908246, "grad_norm": 7.086028575897217, "learning_rate": 2.666450047520784e-06, "loss": 0.7694, "step": 4870 }, { "epoch": 0.6650737302020754, "grad_norm": 10.746251106262207, "learning_rate": 2.6644947174419965e-06, "loss": 0.917, "step": 4871 }, { "epoch": 0.665210267613326, "grad_norm": 9.32685375213623, "learning_rate": 2.662539844109139e-06, "loss": 0.8957, "step": 4872 }, { "epoch": 0.6653468050245768, "grad_norm": 5.486771583557129, "learning_rate": 2.6605854279045195e-06, "loss": 0.8591, "step": 4873 }, { "epoch": 0.6654833424358274, "grad_norm": 8.98688793182373, "learning_rate": 2.6586314692103555e-06, "loss": 1.0377, "step": 4874 }, { "epoch": 0.665619879847078, "grad_norm": 6.266202926635742, "learning_rate": 2.656677968408774e-06, "loss": 0.9921, "step": 4875 }, { "epoch": 0.6657564172583288, "grad_norm": 4.9416093826293945, "learning_rate": 2.6547249258818162e-06, "loss": 0.9612, "step": 4876 }, { "epoch": 0.6658929546695794, "grad_norm": 6.669625759124756, "learning_rate": 2.652772342011429e-06, "loss": 0.8626, "step": 4877 }, { "epoch": 0.6660294920808302, "grad_norm": 6.995275974273682, "learning_rate": 2.650820217179475e-06, "loss": 1.042, "step": 4878 }, { "epoch": 0.6661660294920808, "grad_norm": 7.792511940002441, "learning_rate": 2.64886855176772e-06, "loss": 0.909, "step": 4879 }, { "epoch": 0.6663025669033316, "grad_norm": 4.916767120361328, "learning_rate": 2.646917346157847e-06, "loss": 0.9485, "step": 4880 }, { "epoch": 0.6664391043145822, "grad_norm": 6.782994270324707, "learning_rate": 2.6449666007314435e-06, "loss": 1.1007, "step": 4881 }, { "epoch": 0.6665756417258328, "grad_norm": 8.117804527282715, "learning_rate": 2.6430163158700116e-06, "loss": 0.9064, "step": 4882 }, { "epoch": 0.6667121791370836, "grad_norm": 5.108698844909668, "learning_rate": 2.641066491954961e-06, "loss": 0.9586, "step": 4883 }, { "epoch": 0.6668487165483342, "grad_norm": 5.302891731262207, "learning_rate": 2.6391171293676077e-06, "loss": 0.9846, "step": 4884 }, { "epoch": 0.666985253959585, "grad_norm": 5.789632797241211, "learning_rate": 2.6371682284891864e-06, "loss": 0.7066, "step": 4885 }, { "epoch": 0.6671217913708356, "grad_norm": 6.52791166305542, "learning_rate": 2.6352197897008337e-06, "loss": 0.9904, "step": 4886 }, { "epoch": 0.6672583287820862, "grad_norm": 8.658780097961426, "learning_rate": 2.633271813383598e-06, "loss": 0.979, "step": 4887 }, { "epoch": 0.667394866193337, "grad_norm": 7.167264938354492, "learning_rate": 2.6313242999184364e-06, "loss": 0.892, "step": 4888 }, { "epoch": 0.6675314036045876, "grad_norm": 6.901003837585449, "learning_rate": 2.62937724968622e-06, "loss": 0.8734, "step": 4889 }, { "epoch": 0.6676679410158384, "grad_norm": 6.94549560546875, "learning_rate": 2.6274306630677237e-06, "loss": 0.8104, "step": 4890 }, { "epoch": 0.667804478427089, "grad_norm": 5.568278789520264, "learning_rate": 2.625484540443636e-06, "loss": 0.9943, "step": 4891 }, { "epoch": 0.6679410158383398, "grad_norm": 7.858736515045166, "learning_rate": 2.6235388821945497e-06, "loss": 0.9216, "step": 4892 }, { "epoch": 0.6680775532495904, "grad_norm": 5.62117862701416, "learning_rate": 2.621593688700974e-06, "loss": 0.892, "step": 4893 }, { "epoch": 0.668214090660841, "grad_norm": 5.277308464050293, "learning_rate": 2.6196489603433207e-06, "loss": 0.9513, "step": 4894 }, { "epoch": 0.6683506280720918, "grad_norm": 5.726161003112793, "learning_rate": 2.617704697501911e-06, "loss": 0.8864, "step": 4895 }, { "epoch": 0.6684871654833424, "grad_norm": 6.022412300109863, "learning_rate": 2.615760900556981e-06, "loss": 0.9142, "step": 4896 }, { "epoch": 0.6686237028945932, "grad_norm": 6.076082229614258, "learning_rate": 2.613817569888668e-06, "loss": 0.9234, "step": 4897 }, { "epoch": 0.6687602403058438, "grad_norm": 6.225438594818115, "learning_rate": 2.6118747058770255e-06, "loss": 0.978, "step": 4898 }, { "epoch": 0.6688967777170944, "grad_norm": 9.424436569213867, "learning_rate": 2.60993230890201e-06, "loss": 0.9111, "step": 4899 }, { "epoch": 0.6690333151283452, "grad_norm": 6.674483776092529, "learning_rate": 2.607990379343489e-06, "loss": 1.0379, "step": 4900 }, { "epoch": 0.6691698525395958, "grad_norm": 6.186288833618164, "learning_rate": 2.6060489175812366e-06, "loss": 0.9152, "step": 4901 }, { "epoch": 0.6693063899508466, "grad_norm": 7.3669610023498535, "learning_rate": 2.604107923994941e-06, "loss": 1.0196, "step": 4902 }, { "epoch": 0.6694429273620972, "grad_norm": 8.475544929504395, "learning_rate": 2.6021673989641915e-06, "loss": 0.9529, "step": 4903 }, { "epoch": 0.6695794647733478, "grad_norm": 5.840922832489014, "learning_rate": 2.600227342868489e-06, "loss": 0.9296, "step": 4904 }, { "epoch": 0.6697160021845986, "grad_norm": 15.681486129760742, "learning_rate": 2.5982877560872467e-06, "loss": 0.8932, "step": 4905 }, { "epoch": 0.6698525395958492, "grad_norm": 7.266645431518555, "learning_rate": 2.5963486389997785e-06, "loss": 0.8508, "step": 4906 }, { "epoch": 0.6699890770071, "grad_norm": 6.154879570007324, "learning_rate": 2.594409991985313e-06, "loss": 1.0299, "step": 4907 }, { "epoch": 0.6701256144183506, "grad_norm": 7.100042819976807, "learning_rate": 2.59247181542298e-06, "loss": 0.9565, "step": 4908 }, { "epoch": 0.6702621518296014, "grad_norm": 6.013652324676514, "learning_rate": 2.590534109691827e-06, "loss": 1.0682, "step": 4909 }, { "epoch": 0.670398689240852, "grad_norm": 6.502292156219482, "learning_rate": 2.5885968751708002e-06, "loss": 0.9723, "step": 4910 }, { "epoch": 0.6705352266521026, "grad_norm": 9.734871864318848, "learning_rate": 2.5866601122387556e-06, "loss": 1.1224, "step": 4911 }, { "epoch": 0.6706717640633534, "grad_norm": 6.342564105987549, "learning_rate": 2.584723821274464e-06, "loss": 1.0276, "step": 4912 }, { "epoch": 0.670808301474604, "grad_norm": 5.930066108703613, "learning_rate": 2.5827880026565945e-06, "loss": 0.9632, "step": 4913 }, { "epoch": 0.6709448388858548, "grad_norm": 5.63728666305542, "learning_rate": 2.580852656763729e-06, "loss": 0.9297, "step": 4914 }, { "epoch": 0.6710813762971054, "grad_norm": 5.670009613037109, "learning_rate": 2.578917783974353e-06, "loss": 0.9886, "step": 4915 }, { "epoch": 0.671217913708356, "grad_norm": 6.712139129638672, "learning_rate": 2.576983384666867e-06, "loss": 0.9939, "step": 4916 }, { "epoch": 0.6713544511196068, "grad_norm": 6.370323181152344, "learning_rate": 2.5750494592195694e-06, "loss": 0.9809, "step": 4917 }, { "epoch": 0.6714909885308574, "grad_norm": 6.678493499755859, "learning_rate": 2.5731160080106765e-06, "loss": 0.8492, "step": 4918 }, { "epoch": 0.6716275259421082, "grad_norm": 6.307890892028809, "learning_rate": 2.5711830314182996e-06, "loss": 0.9986, "step": 4919 }, { "epoch": 0.6717640633533588, "grad_norm": 5.931622505187988, "learning_rate": 2.5692505298204684e-06, "loss": 0.9819, "step": 4920 }, { "epoch": 0.6719006007646096, "grad_norm": 6.49144983291626, "learning_rate": 2.567318503595112e-06, "loss": 1.0307, "step": 4921 }, { "epoch": 0.6720371381758602, "grad_norm": 8.239789962768555, "learning_rate": 2.565386953120072e-06, "loss": 0.9205, "step": 4922 }, { "epoch": 0.6721736755871108, "grad_norm": 6.446512222290039, "learning_rate": 2.5634558787730933e-06, "loss": 0.9011, "step": 4923 }, { "epoch": 0.6723102129983616, "grad_norm": 6.424502849578857, "learning_rate": 2.5615252809318287e-06, "loss": 0.8997, "step": 4924 }, { "epoch": 0.6724467504096122, "grad_norm": 7.630763530731201, "learning_rate": 2.559595159973838e-06, "loss": 1.0291, "step": 4925 }, { "epoch": 0.672583287820863, "grad_norm": 6.141809463500977, "learning_rate": 2.557665516276585e-06, "loss": 0.8819, "step": 4926 }, { "epoch": 0.6727198252321136, "grad_norm": 6.288203239440918, "learning_rate": 2.555736350217447e-06, "loss": 1.0071, "step": 4927 }, { "epoch": 0.6728563626433642, "grad_norm": 5.351778984069824, "learning_rate": 2.5538076621737007e-06, "loss": 0.8674, "step": 4928 }, { "epoch": 0.672992900054615, "grad_norm": 5.712267875671387, "learning_rate": 2.5518794525225356e-06, "loss": 0.9188, "step": 4929 }, { "epoch": 0.6731294374658656, "grad_norm": 5.115518093109131, "learning_rate": 2.54995172164104e-06, "loss": 0.865, "step": 4930 }, { "epoch": 0.6732659748771164, "grad_norm": 6.230163097381592, "learning_rate": 2.5480244699062174e-06, "loss": 0.9422, "step": 4931 }, { "epoch": 0.673402512288367, "grad_norm": 5.810431957244873, "learning_rate": 2.5460976976949686e-06, "loss": 0.899, "step": 4932 }, { "epoch": 0.6735390496996176, "grad_norm": 5.846758842468262, "learning_rate": 2.54417140538411e-06, "loss": 0.9868, "step": 4933 }, { "epoch": 0.6736755871108684, "grad_norm": 5.397334575653076, "learning_rate": 2.5422455933503566e-06, "loss": 0.7875, "step": 4934 }, { "epoch": 0.673812124522119, "grad_norm": 5.42236328125, "learning_rate": 2.5403202619703306e-06, "loss": 0.831, "step": 4935 }, { "epoch": 0.6739486619333698, "grad_norm": 6.8280348777771, "learning_rate": 2.5383954116205657e-06, "loss": 1.03, "step": 4936 }, { "epoch": 0.6740851993446204, "grad_norm": 5.771665096282959, "learning_rate": 2.536471042677495e-06, "loss": 0.8113, "step": 4937 }, { "epoch": 0.6742217367558712, "grad_norm": 7.5228166580200195, "learning_rate": 2.534547155517462e-06, "loss": 0.8963, "step": 4938 }, { "epoch": 0.6743582741671218, "grad_norm": 9.60419750213623, "learning_rate": 2.5326237505167097e-06, "loss": 1.0957, "step": 4939 }, { "epoch": 0.6744948115783724, "grad_norm": 10.535232543945312, "learning_rate": 2.5307008280513956e-06, "loss": 0.9674, "step": 4940 }, { "epoch": 0.6746313489896232, "grad_norm": 8.08364486694336, "learning_rate": 2.528778388497575e-06, "loss": 1.0428, "step": 4941 }, { "epoch": 0.6747678864008738, "grad_norm": 6.393430709838867, "learning_rate": 2.526856432231216e-06, "loss": 1.0972, "step": 4942 }, { "epoch": 0.6749044238121246, "grad_norm": 7.30908727645874, "learning_rate": 2.5249349596281863e-06, "loss": 0.9821, "step": 4943 }, { "epoch": 0.6750409612233752, "grad_norm": 6.989886283874512, "learning_rate": 2.5230139710642587e-06, "loss": 0.9939, "step": 4944 }, { "epoch": 0.6751774986346258, "grad_norm": 5.286975383758545, "learning_rate": 2.521093466915119e-06, "loss": 0.9877, "step": 4945 }, { "epoch": 0.6753140360458766, "grad_norm": 5.281793117523193, "learning_rate": 2.5191734475563468e-06, "loss": 0.9375, "step": 4946 }, { "epoch": 0.6754505734571272, "grad_norm": 6.529286861419678, "learning_rate": 2.517253913363438e-06, "loss": 0.881, "step": 4947 }, { "epoch": 0.675587110868378, "grad_norm": 6.2674431800842285, "learning_rate": 2.515334864711786e-06, "loss": 1.052, "step": 4948 }, { "epoch": 0.6757236482796286, "grad_norm": 5.5501708984375, "learning_rate": 2.5134163019766935e-06, "loss": 0.9164, "step": 4949 }, { "epoch": 0.6758601856908794, "grad_norm": 6.586105823516846, "learning_rate": 2.511498225533366e-06, "loss": 1.0064, "step": 4950 }, { "epoch": 0.67599672310213, "grad_norm": 6.815552234649658, "learning_rate": 2.509580635756914e-06, "loss": 1.1164, "step": 4951 }, { "epoch": 0.6761332605133806, "grad_norm": 8.227566719055176, "learning_rate": 2.507663533022351e-06, "loss": 1.0307, "step": 4952 }, { "epoch": 0.6762697979246314, "grad_norm": 9.820321083068848, "learning_rate": 2.5057469177046025e-06, "loss": 0.9307, "step": 4953 }, { "epoch": 0.676406335335882, "grad_norm": 5.0254974365234375, "learning_rate": 2.5038307901784908e-06, "loss": 0.976, "step": 4954 }, { "epoch": 0.6765428727471328, "grad_norm": 4.9741010665893555, "learning_rate": 2.5019151508187444e-06, "loss": 0.9953, "step": 4955 }, { "epoch": 0.6766794101583834, "grad_norm": 5.4419846534729, "learning_rate": 2.5000000000000015e-06, "loss": 0.7366, "step": 4956 }, { "epoch": 0.676815947569634, "grad_norm": 7.46063232421875, "learning_rate": 2.498085338096796e-06, "loss": 0.8161, "step": 4957 }, { "epoch": 0.6769524849808848, "grad_norm": 7.274188995361328, "learning_rate": 2.4961711654835764e-06, "loss": 0.9317, "step": 4958 }, { "epoch": 0.6770890223921354, "grad_norm": 6.020046234130859, "learning_rate": 2.494257482534685e-06, "loss": 0.847, "step": 4959 }, { "epoch": 0.6772255598033862, "grad_norm": 9.374177932739258, "learning_rate": 2.4923442896243784e-06, "loss": 0.989, "step": 4960 }, { "epoch": 0.6773620972146368, "grad_norm": 7.492412090301514, "learning_rate": 2.4904315871268083e-06, "loss": 0.9409, "step": 4961 }, { "epoch": 0.6774986346258874, "grad_norm": 7.32387638092041, "learning_rate": 2.488519375416038e-06, "loss": 0.8718, "step": 4962 }, { "epoch": 0.6776351720371382, "grad_norm": 7.158420085906982, "learning_rate": 2.4866076548660303e-06, "loss": 0.975, "step": 4963 }, { "epoch": 0.6777717094483888, "grad_norm": 8.231603622436523, "learning_rate": 2.484696425850653e-06, "loss": 0.8475, "step": 4964 }, { "epoch": 0.6779082468596396, "grad_norm": 5.566094398498535, "learning_rate": 2.482785688743678e-06, "loss": 0.9611, "step": 4965 }, { "epoch": 0.6780447842708902, "grad_norm": 6.440509796142578, "learning_rate": 2.480875443918779e-06, "loss": 0.896, "step": 4966 }, { "epoch": 0.678181321682141, "grad_norm": 5.043124198913574, "learning_rate": 2.4789656917495387e-06, "loss": 0.8783, "step": 4967 }, { "epoch": 0.6783178590933916, "grad_norm": 7.390796184539795, "learning_rate": 2.4770564326094367e-06, "loss": 0.9309, "step": 4968 }, { "epoch": 0.6784543965046422, "grad_norm": 5.118606090545654, "learning_rate": 2.4751476668718634e-06, "loss": 0.9548, "step": 4969 }, { "epoch": 0.678590933915893, "grad_norm": 7.718662738800049, "learning_rate": 2.4732393949101054e-06, "loss": 0.9759, "step": 4970 }, { "epoch": 0.6787274713271436, "grad_norm": 5.7631683349609375, "learning_rate": 2.471331617097359e-06, "loss": 0.7794, "step": 4971 }, { "epoch": 0.6788640087383944, "grad_norm": 6.523501396179199, "learning_rate": 2.469424333806718e-06, "loss": 0.9591, "step": 4972 }, { "epoch": 0.679000546149645, "grad_norm": 14.802566528320312, "learning_rate": 2.4675175454111867e-06, "loss": 0.9929, "step": 4973 }, { "epoch": 0.6791370835608956, "grad_norm": 7.197484016418457, "learning_rate": 2.465611252283666e-06, "loss": 0.8977, "step": 4974 }, { "epoch": 0.6792736209721464, "grad_norm": 5.792260646820068, "learning_rate": 2.463705454796961e-06, "loss": 1.0014, "step": 4975 }, { "epoch": 0.679410158383397, "grad_norm": 5.7821526527404785, "learning_rate": 2.4618001533237846e-06, "loss": 0.7504, "step": 4976 }, { "epoch": 0.6795466957946478, "grad_norm": 6.361316204071045, "learning_rate": 2.459895348236748e-06, "loss": 0.8633, "step": 4977 }, { "epoch": 0.6796832332058984, "grad_norm": 5.720603942871094, "learning_rate": 2.457991039908366e-06, "loss": 0.923, "step": 4978 }, { "epoch": 0.679819770617149, "grad_norm": 5.975800514221191, "learning_rate": 2.456087228711056e-06, "loss": 1.0477, "step": 4979 }, { "epoch": 0.6799563080283998, "grad_norm": 6.534903049468994, "learning_rate": 2.454183915017142e-06, "loss": 1.0255, "step": 4980 }, { "epoch": 0.6800928454396504, "grad_norm": 6.973656177520752, "learning_rate": 2.4522810991988447e-06, "loss": 1.1211, "step": 4981 }, { "epoch": 0.6802293828509012, "grad_norm": 6.400303840637207, "learning_rate": 2.4503787816282946e-06, "loss": 0.8603, "step": 4982 }, { "epoch": 0.6803659202621518, "grad_norm": 6.916758060455322, "learning_rate": 2.4484769626775184e-06, "loss": 0.8325, "step": 4983 }, { "epoch": 0.6805024576734026, "grad_norm": 10.631697654724121, "learning_rate": 2.4465756427184457e-06, "loss": 1.0362, "step": 4984 }, { "epoch": 0.6806389950846532, "grad_norm": 6.788855075836182, "learning_rate": 2.4446748221229142e-06, "loss": 0.9572, "step": 4985 }, { "epoch": 0.6807755324959038, "grad_norm": 7.301462173461914, "learning_rate": 2.4427745012626567e-06, "loss": 1.02, "step": 4986 }, { "epoch": 0.6809120699071546, "grad_norm": 5.986110210418701, "learning_rate": 2.4408746805093157e-06, "loss": 0.9612, "step": 4987 }, { "epoch": 0.6810486073184052, "grad_norm": 5.088779449462891, "learning_rate": 2.4389753602344298e-06, "loss": 0.9401, "step": 4988 }, { "epoch": 0.681185144729656, "grad_norm": 8.191258430480957, "learning_rate": 2.437076540809441e-06, "loss": 0.8843, "step": 4989 }, { "epoch": 0.6813216821409066, "grad_norm": 8.12574291229248, "learning_rate": 2.435178222605694e-06, "loss": 0.877, "step": 4990 }, { "epoch": 0.6814582195521572, "grad_norm": 6.639814853668213, "learning_rate": 2.4332804059944387e-06, "loss": 1.0942, "step": 4991 }, { "epoch": 0.681594756963408, "grad_norm": 5.034160614013672, "learning_rate": 2.43138309134682e-06, "loss": 0.9423, "step": 4992 }, { "epoch": 0.6817312943746586, "grad_norm": 5.42127799987793, "learning_rate": 2.429486279033892e-06, "loss": 0.7305, "step": 4993 }, { "epoch": 0.6818678317859094, "grad_norm": 7.198551177978516, "learning_rate": 2.4275899694266062e-06, "loss": 0.854, "step": 4994 }, { "epoch": 0.68200436919716, "grad_norm": 6.478899955749512, "learning_rate": 2.425694162895814e-06, "loss": 1.0627, "step": 4995 }, { "epoch": 0.6821409066084108, "grad_norm": 6.930584907531738, "learning_rate": 2.423798859812275e-06, "loss": 0.9649, "step": 4996 }, { "epoch": 0.6822774440196614, "grad_norm": 4.820116996765137, "learning_rate": 2.4219040605466433e-06, "loss": 0.9521, "step": 4997 }, { "epoch": 0.682413981430912, "grad_norm": 6.55518913269043, "learning_rate": 2.420009765469481e-06, "loss": 1.0896, "step": 4998 }, { "epoch": 0.6825505188421628, "grad_norm": 5.845508098602295, "learning_rate": 2.4181159749512437e-06, "loss": 0.9598, "step": 4999 }, { "epoch": 0.6826870562534134, "grad_norm": 6.4376373291015625, "learning_rate": 2.416222689362298e-06, "loss": 1.0247, "step": 5000 }, { "epoch": 0.6828235936646642, "grad_norm": 8.240621566772461, "learning_rate": 2.414329909072904e-06, "loss": 1.0277, "step": 5001 }, { "epoch": 0.6829601310759148, "grad_norm": 9.325630187988281, "learning_rate": 2.412437634453225e-06, "loss": 1.1334, "step": 5002 }, { "epoch": 0.6830966684871654, "grad_norm": 7.148828029632568, "learning_rate": 2.410545865873325e-06, "loss": 0.933, "step": 5003 }, { "epoch": 0.6832332058984162, "grad_norm": 7.885014057159424, "learning_rate": 2.4086546037031734e-06, "loss": 0.934, "step": 5004 }, { "epoch": 0.6833697433096668, "grad_norm": 6.277070999145508, "learning_rate": 2.4067638483126353e-06, "loss": 0.817, "step": 5005 }, { "epoch": 0.6835062807209176, "grad_norm": 7.82966423034668, "learning_rate": 2.404873600071477e-06, "loss": 0.9825, "step": 5006 }, { "epoch": 0.6836428181321682, "grad_norm": 7.457578659057617, "learning_rate": 2.4029838593493703e-06, "loss": 0.9868, "step": 5007 }, { "epoch": 0.6837793555434188, "grad_norm": 7.8008646965026855, "learning_rate": 2.401094626515882e-06, "loss": 1.1005, "step": 5008 }, { "epoch": 0.6839158929546696, "grad_norm": 6.399770736694336, "learning_rate": 2.399205901940485e-06, "loss": 0.994, "step": 5009 }, { "epoch": 0.6840524303659202, "grad_norm": 6.6056318283081055, "learning_rate": 2.397317685992547e-06, "loss": 0.9497, "step": 5010 }, { "epoch": 0.684188967777171, "grad_norm": 6.321259021759033, "learning_rate": 2.3954299790413415e-06, "loss": 0.9468, "step": 5011 }, { "epoch": 0.6843255051884216, "grad_norm": 9.086098670959473, "learning_rate": 2.393542781456038e-06, "loss": 0.996, "step": 5012 }, { "epoch": 0.6844620425996724, "grad_norm": 11.734248161315918, "learning_rate": 2.391656093605712e-06, "loss": 1.0485, "step": 5013 }, { "epoch": 0.684598580010923, "grad_norm": 5.896494388580322, "learning_rate": 2.389769915859334e-06, "loss": 1.0236, "step": 5014 }, { "epoch": 0.6847351174221736, "grad_norm": 7.632261753082275, "learning_rate": 2.3878842485857763e-06, "loss": 1.1153, "step": 5015 }, { "epoch": 0.6848716548334244, "grad_norm": 5.573274612426758, "learning_rate": 2.3859990921538116e-06, "loss": 1.008, "step": 5016 }, { "epoch": 0.685008192244675, "grad_norm": 5.63569450378418, "learning_rate": 2.384114446932112e-06, "loss": 0.9837, "step": 5017 }, { "epoch": 0.6851447296559258, "grad_norm": 42.214019775390625, "learning_rate": 2.3822303132892533e-06, "loss": 0.9572, "step": 5018 }, { "epoch": 0.6852812670671764, "grad_norm": 5.391456604003906, "learning_rate": 2.3803466915937044e-06, "loss": 0.9198, "step": 5019 }, { "epoch": 0.685417804478427, "grad_norm": 6.080112457275391, "learning_rate": 2.3784635822138424e-06, "loss": 0.9172, "step": 5020 }, { "epoch": 0.6855543418896778, "grad_norm": 7.642206192016602, "learning_rate": 2.3765809855179363e-06, "loss": 0.9784, "step": 5021 }, { "epoch": 0.6856908793009284, "grad_norm": 7.191618919372559, "learning_rate": 2.3746989018741613e-06, "loss": 0.974, "step": 5022 }, { "epoch": 0.6858274167121792, "grad_norm": 19.46882438659668, "learning_rate": 2.372817331650586e-06, "loss": 0.9704, "step": 5023 }, { "epoch": 0.6859639541234298, "grad_norm": 6.2811126708984375, "learning_rate": 2.3709362752151866e-06, "loss": 0.9968, "step": 5024 }, { "epoch": 0.6861004915346806, "grad_norm": 6.5015645027160645, "learning_rate": 2.369055732935831e-06, "loss": 0.9726, "step": 5025 }, { "epoch": 0.6862370289459312, "grad_norm": 5.355893611907959, "learning_rate": 2.3671757051802887e-06, "loss": 0.9273, "step": 5026 }, { "epoch": 0.6863735663571818, "grad_norm": 5.648413181304932, "learning_rate": 2.3652961923162328e-06, "loss": 1.0137, "step": 5027 }, { "epoch": 0.6865101037684326, "grad_norm": 5.505655765533447, "learning_rate": 2.3634171947112307e-06, "loss": 0.8794, "step": 5028 }, { "epoch": 0.6866466411796832, "grad_norm": 5.947563648223877, "learning_rate": 2.3615387127327517e-06, "loss": 0.9609, "step": 5029 }, { "epoch": 0.686783178590934, "grad_norm": 4.638413429260254, "learning_rate": 2.3596607467481602e-06, "loss": 0.8737, "step": 5030 }, { "epoch": 0.6869197160021846, "grad_norm": 4.726334571838379, "learning_rate": 2.3577832971247277e-06, "loss": 0.9616, "step": 5031 }, { "epoch": 0.6870562534134352, "grad_norm": 4.946816444396973, "learning_rate": 2.3559063642296166e-06, "loss": 0.8967, "step": 5032 }, { "epoch": 0.687192790824686, "grad_norm": 6.773372173309326, "learning_rate": 2.3540299484298934e-06, "loss": 1.0181, "step": 5033 }, { "epoch": 0.6873293282359366, "grad_norm": 6.531459331512451, "learning_rate": 2.3521540500925215e-06, "loss": 0.9591, "step": 5034 }, { "epoch": 0.6874658656471874, "grad_norm": 8.15267276763916, "learning_rate": 2.350278669584361e-06, "loss": 0.9541, "step": 5035 }, { "epoch": 0.687602403058438, "grad_norm": 6.660778999328613, "learning_rate": 2.348403807272176e-06, "loss": 0.9764, "step": 5036 }, { "epoch": 0.6877389404696886, "grad_norm": 6.866642475128174, "learning_rate": 2.3465294635226233e-06, "loss": 0.9346, "step": 5037 }, { "epoch": 0.6878754778809394, "grad_norm": 4.038094997406006, "learning_rate": 2.3446556387022644e-06, "loss": 0.8661, "step": 5038 }, { "epoch": 0.68801201529219, "grad_norm": 6.4330525398254395, "learning_rate": 2.3427823331775546e-06, "loss": 0.9416, "step": 5039 }, { "epoch": 0.6881485527034408, "grad_norm": 7.937891960144043, "learning_rate": 2.3409095473148476e-06, "loss": 0.8851, "step": 5040 }, { "epoch": 0.6882850901146914, "grad_norm": 8.92888069152832, "learning_rate": 2.3390372814804e-06, "loss": 0.879, "step": 5041 }, { "epoch": 0.6884216275259422, "grad_norm": 5.45948600769043, "learning_rate": 2.3371655360403617e-06, "loss": 0.9747, "step": 5042 }, { "epoch": 0.6885581649371928, "grad_norm": 5.494053363800049, "learning_rate": 2.3352943113607824e-06, "loss": 1.0042, "step": 5043 }, { "epoch": 0.6886947023484434, "grad_norm": 6.0307087898254395, "learning_rate": 2.3334236078076126e-06, "loss": 1.0171, "step": 5044 }, { "epoch": 0.6888312397596942, "grad_norm": 7.924975395202637, "learning_rate": 2.3315534257466983e-06, "loss": 0.988, "step": 5045 }, { "epoch": 0.6889677771709448, "grad_norm": 6.206173419952393, "learning_rate": 2.329683765543781e-06, "loss": 1.0218, "step": 5046 }, { "epoch": 0.6891043145821956, "grad_norm": 5.28834867477417, "learning_rate": 2.3278146275645074e-06, "loss": 1.0053, "step": 5047 }, { "epoch": 0.6892408519934462, "grad_norm": 5.715757846832275, "learning_rate": 2.325946012174414e-06, "loss": 0.9105, "step": 5048 }, { "epoch": 0.6893773894046968, "grad_norm": 5.505260944366455, "learning_rate": 2.3240779197389413e-06, "loss": 0.9836, "step": 5049 }, { "epoch": 0.6895139268159476, "grad_norm": 6.524370193481445, "learning_rate": 2.3222103506234235e-06, "loss": 0.8699, "step": 5050 }, { "epoch": 0.6896504642271982, "grad_norm": 6.317988872528076, "learning_rate": 2.320343305193096e-06, "loss": 0.8704, "step": 5051 }, { "epoch": 0.689787001638449, "grad_norm": 6.6989922523498535, "learning_rate": 2.318476783813088e-06, "loss": 0.9206, "step": 5052 }, { "epoch": 0.6899235390496996, "grad_norm": 6.003871440887451, "learning_rate": 2.3166107868484296e-06, "loss": 1.0028, "step": 5053 }, { "epoch": 0.6900600764609504, "grad_norm": 10.937309265136719, "learning_rate": 2.3147453146640453e-06, "loss": 1.0378, "step": 5054 }, { "epoch": 0.690196613872201, "grad_norm": 6.834426403045654, "learning_rate": 2.3128803676247575e-06, "loss": 0.9077, "step": 5055 }, { "epoch": 0.6903331512834516, "grad_norm": 6.672553539276123, "learning_rate": 2.3110159460952895e-06, "loss": 0.8091, "step": 5056 }, { "epoch": 0.6904696886947024, "grad_norm": 5.786807060241699, "learning_rate": 2.3091520504402557e-06, "loss": 1.1056, "step": 5057 }, { "epoch": 0.690606226105953, "grad_norm": 7.9763407707214355, "learning_rate": 2.3072886810241763e-06, "loss": 1.0665, "step": 5058 }, { "epoch": 0.6907427635172038, "grad_norm": 5.26152229309082, "learning_rate": 2.305425838211458e-06, "loss": 0.9967, "step": 5059 }, { "epoch": 0.6908793009284544, "grad_norm": 5.8641581535339355, "learning_rate": 2.3035635223664136e-06, "loss": 0.9188, "step": 5060 }, { "epoch": 0.691015838339705, "grad_norm": 5.718111038208008, "learning_rate": 2.301701733853247e-06, "loss": 0.984, "step": 5061 }, { "epoch": 0.6911523757509558, "grad_norm": 5.740975379943848, "learning_rate": 2.2998404730360634e-06, "loss": 1.073, "step": 5062 }, { "epoch": 0.6912889131622064, "grad_norm": 7.783509254455566, "learning_rate": 2.2979797402788594e-06, "loss": 0.9783, "step": 5063 }, { "epoch": 0.6914254505734572, "grad_norm": 5.853060245513916, "learning_rate": 2.296119535945535e-06, "loss": 0.8214, "step": 5064 }, { "epoch": 0.6915619879847078, "grad_norm": 7.31341552734375, "learning_rate": 2.2942598603998816e-06, "loss": 0.9212, "step": 5065 }, { "epoch": 0.6916985253959584, "grad_norm": 6.312413692474365, "learning_rate": 2.2924007140055893e-06, "loss": 0.9746, "step": 5066 }, { "epoch": 0.6918350628072092, "grad_norm": 6.069666862487793, "learning_rate": 2.290542097126243e-06, "loss": 1.0614, "step": 5067 }, { "epoch": 0.6919716002184598, "grad_norm": 5.2039475440979, "learning_rate": 2.288684010125325e-06, "loss": 0.8628, "step": 5068 }, { "epoch": 0.6921081376297106, "grad_norm": 11.204469680786133, "learning_rate": 2.2868264533662177e-06, "loss": 1.0222, "step": 5069 }, { "epoch": 0.6922446750409612, "grad_norm": 8.902286529541016, "learning_rate": 2.284969427212192e-06, "loss": 0.9355, "step": 5070 }, { "epoch": 0.692381212452212, "grad_norm": 7.391424655914307, "learning_rate": 2.2831129320264237e-06, "loss": 0.9423, "step": 5071 }, { "epoch": 0.6925177498634626, "grad_norm": 11.102987289428711, "learning_rate": 2.281256968171977e-06, "loss": 0.9233, "step": 5072 }, { "epoch": 0.6926542872747132, "grad_norm": 6.1535563468933105, "learning_rate": 2.2794015360118193e-06, "loss": 0.863, "step": 5073 }, { "epoch": 0.692790824685964, "grad_norm": 5.36200475692749, "learning_rate": 2.2775466359088084e-06, "loss": 1.0312, "step": 5074 }, { "epoch": 0.6929273620972146, "grad_norm": 7.414661884307861, "learning_rate": 2.2756922682256983e-06, "loss": 1.0939, "step": 5075 }, { "epoch": 0.6930638995084654, "grad_norm": 7.1884074211120605, "learning_rate": 2.2738384333251447e-06, "loss": 0.9345, "step": 5076 }, { "epoch": 0.693200436919716, "grad_norm": 6.524356365203857, "learning_rate": 2.271985131569691e-06, "loss": 0.9071, "step": 5077 }, { "epoch": 0.6933369743309666, "grad_norm": 6.17726469039917, "learning_rate": 2.2701323633217843e-06, "loss": 0.9297, "step": 5078 }, { "epoch": 0.6934735117422174, "grad_norm": 8.139671325683594, "learning_rate": 2.268280128943762e-06, "loss": 0.9389, "step": 5079 }, { "epoch": 0.693610049153468, "grad_norm": 4.220712184906006, "learning_rate": 2.266428428797857e-06, "loss": 0.8976, "step": 5080 }, { "epoch": 0.6937465865647188, "grad_norm": 7.558180332183838, "learning_rate": 2.264577263246199e-06, "loss": 1.0025, "step": 5081 }, { "epoch": 0.6938831239759694, "grad_norm": 6.927279472351074, "learning_rate": 2.2627266326508173e-06, "loss": 1.0766, "step": 5082 }, { "epoch": 0.69401966138722, "grad_norm": 8.772239685058594, "learning_rate": 2.260876537373628e-06, "loss": 0.9978, "step": 5083 }, { "epoch": 0.6941561987984708, "grad_norm": 5.634762763977051, "learning_rate": 2.2590269777764516e-06, "loss": 0.9902, "step": 5084 }, { "epoch": 0.6942927362097214, "grad_norm": 6.6486921310424805, "learning_rate": 2.2571779542209977e-06, "loss": 0.9563, "step": 5085 }, { "epoch": 0.6944292736209722, "grad_norm": 7.040818214416504, "learning_rate": 2.25532946706887e-06, "loss": 1.1754, "step": 5086 }, { "epoch": 0.6945658110322228, "grad_norm": 8.005040168762207, "learning_rate": 2.2534815166815748e-06, "loss": 1.0756, "step": 5087 }, { "epoch": 0.6947023484434736, "grad_norm": 6.045426845550537, "learning_rate": 2.2516341034205047e-06, "loss": 0.9428, "step": 5088 }, { "epoch": 0.6948388858547242, "grad_norm": 6.373175621032715, "learning_rate": 2.2497872276469553e-06, "loss": 1.021, "step": 5089 }, { "epoch": 0.6949754232659748, "grad_norm": 6.5377912521362305, "learning_rate": 2.247940889722109e-06, "loss": 0.9315, "step": 5090 }, { "epoch": 0.6951119606772256, "grad_norm": 6.552042484283447, "learning_rate": 2.246095090007051e-06, "loss": 0.813, "step": 5091 }, { "epoch": 0.6952484980884762, "grad_norm": 7.713925361633301, "learning_rate": 2.2442498288627555e-06, "loss": 0.8292, "step": 5092 }, { "epoch": 0.695385035499727, "grad_norm": 5.764345169067383, "learning_rate": 2.242405106650094e-06, "loss": 1.0116, "step": 5093 }, { "epoch": 0.6955215729109776, "grad_norm": 6.195940017700195, "learning_rate": 2.2405609237298303e-06, "loss": 1.0416, "step": 5094 }, { "epoch": 0.6956581103222282, "grad_norm": 9.02599048614502, "learning_rate": 2.238717280462624e-06, "loss": 0.9046, "step": 5095 }, { "epoch": 0.695794647733479, "grad_norm": 10.531936645507812, "learning_rate": 2.236874177209032e-06, "loss": 1.0186, "step": 5096 }, { "epoch": 0.6959311851447296, "grad_norm": 7.529582500457764, "learning_rate": 2.2350316143295e-06, "loss": 0.8993, "step": 5097 }, { "epoch": 0.6960677225559804, "grad_norm": 7.749948024749756, "learning_rate": 2.2331895921843737e-06, "loss": 0.9582, "step": 5098 }, { "epoch": 0.696204259967231, "grad_norm": 6.112221717834473, "learning_rate": 2.2313481111338882e-06, "loss": 0.8999, "step": 5099 }, { "epoch": 0.6963407973784818, "grad_norm": 5.85003662109375, "learning_rate": 2.229507171538178e-06, "loss": 0.8642, "step": 5100 }, { "epoch": 0.6964773347897324, "grad_norm": 6.396047115325928, "learning_rate": 2.2276667737572643e-06, "loss": 0.9729, "step": 5101 }, { "epoch": 0.696613872200983, "grad_norm": 7.238321781158447, "learning_rate": 2.2258269181510717e-06, "loss": 0.8883, "step": 5102 }, { "epoch": 0.6967504096122338, "grad_norm": 7.042076110839844, "learning_rate": 2.223987605079411e-06, "loss": 1.0031, "step": 5103 }, { "epoch": 0.6968869470234844, "grad_norm": 6.143781661987305, "learning_rate": 2.2221488349019903e-06, "loss": 1.0723, "step": 5104 }, { "epoch": 0.6970234844347352, "grad_norm": 12.459723472595215, "learning_rate": 2.220310607978409e-06, "loss": 0.9516, "step": 5105 }, { "epoch": 0.6971600218459858, "grad_norm": 7.108977794647217, "learning_rate": 2.218472924668165e-06, "loss": 0.8368, "step": 5106 }, { "epoch": 0.6972965592572364, "grad_norm": 9.708449363708496, "learning_rate": 2.2166357853306463e-06, "loss": 0.9332, "step": 5107 }, { "epoch": 0.6974330966684872, "grad_norm": 5.8265252113342285, "learning_rate": 2.214799190325133e-06, "loss": 0.9046, "step": 5108 }, { "epoch": 0.6975696340797378, "grad_norm": 7.299825191497803, "learning_rate": 2.2129631400108047e-06, "loss": 0.9964, "step": 5109 }, { "epoch": 0.6977061714909886, "grad_norm": 6.799757957458496, "learning_rate": 2.2111276347467277e-06, "loss": 0.9034, "step": 5110 }, { "epoch": 0.6978427089022392, "grad_norm": 12.070245742797852, "learning_rate": 2.2092926748918673e-06, "loss": 1.0609, "step": 5111 }, { "epoch": 0.6979792463134898, "grad_norm": 5.102197170257568, "learning_rate": 2.207458260805077e-06, "loss": 0.8399, "step": 5112 }, { "epoch": 0.6981157837247406, "grad_norm": 10.035292625427246, "learning_rate": 2.20562439284511e-06, "loss": 1.0581, "step": 5113 }, { "epoch": 0.6982523211359912, "grad_norm": 6.066649436950684, "learning_rate": 2.203791071370604e-06, "loss": 0.8657, "step": 5114 }, { "epoch": 0.698388858547242, "grad_norm": 7.765335559844971, "learning_rate": 2.2019582967401e-06, "loss": 0.9126, "step": 5115 }, { "epoch": 0.6985253959584926, "grad_norm": 6.791998863220215, "learning_rate": 2.2001260693120236e-06, "loss": 0.994, "step": 5116 }, { "epoch": 0.6986619333697434, "grad_norm": 6.348181247711182, "learning_rate": 2.1982943894446974e-06, "loss": 0.8504, "step": 5117 }, { "epoch": 0.698798470780994, "grad_norm": 6.618621349334717, "learning_rate": 2.1964632574963356e-06, "loss": 0.9925, "step": 5118 }, { "epoch": 0.6989350081922446, "grad_norm": 6.570781707763672, "learning_rate": 2.1946326738250444e-06, "loss": 0.9869, "step": 5119 }, { "epoch": 0.6990715456034954, "grad_norm": 5.900721073150635, "learning_rate": 2.192802638788828e-06, "loss": 0.9288, "step": 5120 }, { "epoch": 0.699208083014746, "grad_norm": 5.513760089874268, "learning_rate": 2.1909731527455752e-06, "loss": 0.8979, "step": 5121 }, { "epoch": 0.6993446204259968, "grad_norm": 5.820621967315674, "learning_rate": 2.1891442160530753e-06, "loss": 1.0621, "step": 5122 }, { "epoch": 0.6994811578372474, "grad_norm": 7.051481246948242, "learning_rate": 2.1873158290690035e-06, "loss": 0.9279, "step": 5123 }, { "epoch": 0.699617695248498, "grad_norm": 10.159491539001465, "learning_rate": 2.185487992150933e-06, "loss": 0.8753, "step": 5124 }, { "epoch": 0.6997542326597488, "grad_norm": 4.917572975158691, "learning_rate": 2.1836607056563256e-06, "loss": 0.8824, "step": 5125 }, { "epoch": 0.6998907700709994, "grad_norm": 5.575085639953613, "learning_rate": 2.1818339699425362e-06, "loss": 0.8367, "step": 5126 }, { "epoch": 0.7000273074822502, "grad_norm": 5.158543586730957, "learning_rate": 2.180007785366815e-06, "loss": 0.9694, "step": 5127 }, { "epoch": 0.7001638448935008, "grad_norm": 8.048929214477539, "learning_rate": 2.178182152286298e-06, "loss": 1.0649, "step": 5128 }, { "epoch": 0.7003003823047516, "grad_norm": 7.148622035980225, "learning_rate": 2.1763570710580224e-06, "loss": 0.847, "step": 5129 }, { "epoch": 0.7004369197160022, "grad_norm": 6.223512172698975, "learning_rate": 2.1745325420389095e-06, "loss": 0.9383, "step": 5130 }, { "epoch": 0.7005734571272528, "grad_norm": 6.693835735321045, "learning_rate": 2.1727085655857762e-06, "loss": 0.9608, "step": 5131 }, { "epoch": 0.7007099945385036, "grad_norm": 6.495735168457031, "learning_rate": 2.1708851420553277e-06, "loss": 0.963, "step": 5132 }, { "epoch": 0.7008465319497542, "grad_norm": 8.264199256896973, "learning_rate": 2.1690622718041694e-06, "loss": 1.0387, "step": 5133 }, { "epoch": 0.700983069361005, "grad_norm": 6.807207107543945, "learning_rate": 2.1672399551887884e-06, "loss": 0.9147, "step": 5134 }, { "epoch": 0.7011196067722556, "grad_norm": 5.692448616027832, "learning_rate": 2.165418192565572e-06, "loss": 0.9027, "step": 5135 }, { "epoch": 0.7012561441835062, "grad_norm": 5.865970134735107, "learning_rate": 2.163596984290793e-06, "loss": 0.9809, "step": 5136 }, { "epoch": 0.701392681594757, "grad_norm": 4.542954444885254, "learning_rate": 2.1617763307206185e-06, "loss": 1.1337, "step": 5137 }, { "epoch": 0.7015292190060076, "grad_norm": 5.765435218811035, "learning_rate": 2.159956232211108e-06, "loss": 0.9731, "step": 5138 }, { "epoch": 0.7016657564172584, "grad_norm": 7.14558744430542, "learning_rate": 2.158136689118209e-06, "loss": 1.008, "step": 5139 }, { "epoch": 0.701802293828509, "grad_norm": 7.465306282043457, "learning_rate": 2.156317701797766e-06, "loss": 0.8756, "step": 5140 }, { "epoch": 0.7019388312397596, "grad_norm": 5.5127105712890625, "learning_rate": 2.154499270605508e-06, "loss": 1.0959, "step": 5141 }, { "epoch": 0.7020753686510104, "grad_norm": 6.217054843902588, "learning_rate": 2.1526813958970622e-06, "loss": 0.984, "step": 5142 }, { "epoch": 0.702211906062261, "grad_norm": 6.098620414733887, "learning_rate": 2.1508640780279414e-06, "loss": 1.0072, "step": 5143 }, { "epoch": 0.7023484434735118, "grad_norm": 7.767393589019775, "learning_rate": 2.149047317353552e-06, "loss": 1.0501, "step": 5144 }, { "epoch": 0.7024849808847624, "grad_norm": 6.4393134117126465, "learning_rate": 2.1472311142291906e-06, "loss": 0.9086, "step": 5145 }, { "epoch": 0.7026215182960132, "grad_norm": 5.077963352203369, "learning_rate": 2.1454154690100436e-06, "loss": 0.8812, "step": 5146 }, { "epoch": 0.7027580557072638, "grad_norm": 5.675973415374756, "learning_rate": 2.1436003820511943e-06, "loss": 1.0979, "step": 5147 }, { "epoch": 0.7028945931185144, "grad_norm": 7.233415603637695, "learning_rate": 2.141785853707607e-06, "loss": 0.9749, "step": 5148 }, { "epoch": 0.7030311305297652, "grad_norm": 6.19469690322876, "learning_rate": 2.139971884334147e-06, "loss": 0.9905, "step": 5149 }, { "epoch": 0.7031676679410158, "grad_norm": 5.380542755126953, "learning_rate": 2.138158474285561e-06, "loss": 0.9017, "step": 5150 }, { "epoch": 0.7033042053522666, "grad_norm": 8.60727596282959, "learning_rate": 2.1363456239164954e-06, "loss": 0.9733, "step": 5151 }, { "epoch": 0.7034407427635172, "grad_norm": 5.671201229095459, "learning_rate": 2.134533333581478e-06, "loss": 0.897, "step": 5152 }, { "epoch": 0.7035772801747678, "grad_norm": 5.250146389007568, "learning_rate": 2.1327216036349364e-06, "loss": 1.0441, "step": 5153 }, { "epoch": 0.7037138175860186, "grad_norm": 5.856002330780029, "learning_rate": 2.130910434431179e-06, "loss": 1.0129, "step": 5154 }, { "epoch": 0.7038503549972692, "grad_norm": 6.08221960067749, "learning_rate": 2.1290998263244124e-06, "loss": 1.014, "step": 5155 }, { "epoch": 0.70398689240852, "grad_norm": 8.328256607055664, "learning_rate": 2.12728977966873e-06, "loss": 0.9636, "step": 5156 }, { "epoch": 0.7041234298197706, "grad_norm": 5.501182556152344, "learning_rate": 2.1254802948181145e-06, "loss": 1.0576, "step": 5157 }, { "epoch": 0.7042599672310212, "grad_norm": 6.5866923332214355, "learning_rate": 2.1236713721264417e-06, "loss": 0.9876, "step": 5158 }, { "epoch": 0.704396504642272, "grad_norm": 5.767144203186035, "learning_rate": 2.121863011947472e-06, "loss": 0.8036, "step": 5159 }, { "epoch": 0.7045330420535226, "grad_norm": 5.807214260101318, "learning_rate": 2.1200552146348645e-06, "loss": 0.9366, "step": 5160 }, { "epoch": 0.7046695794647734, "grad_norm": 5.895829677581787, "learning_rate": 2.118247980542159e-06, "loss": 0.9193, "step": 5161 }, { "epoch": 0.704806116876024, "grad_norm": 6.384842872619629, "learning_rate": 2.1164413100227936e-06, "loss": 0.9458, "step": 5162 }, { "epoch": 0.7049426542872748, "grad_norm": 4.916213035583496, "learning_rate": 2.114635203430088e-06, "loss": 0.9368, "step": 5163 }, { "epoch": 0.7050791916985254, "grad_norm": 5.32282018661499, "learning_rate": 2.1128296611172593e-06, "loss": 0.9417, "step": 5164 }, { "epoch": 0.705215729109776, "grad_norm": 6.183126926422119, "learning_rate": 2.1110246834374087e-06, "loss": 0.9266, "step": 5165 }, { "epoch": 0.7053522665210268, "grad_norm": 5.396843433380127, "learning_rate": 2.1092202707435278e-06, "loss": 1.0197, "step": 5166 }, { "epoch": 0.7054888039322774, "grad_norm": 4.9715495109558105, "learning_rate": 2.107416423388501e-06, "loss": 0.9426, "step": 5167 }, { "epoch": 0.7056253413435282, "grad_norm": 5.676435947418213, "learning_rate": 2.105613141725099e-06, "loss": 0.968, "step": 5168 }, { "epoch": 0.7057618787547788, "grad_norm": 6.062934398651123, "learning_rate": 2.103810426105981e-06, "loss": 1.0556, "step": 5169 }, { "epoch": 0.7058984161660294, "grad_norm": 7.260133743286133, "learning_rate": 2.1020082768837e-06, "loss": 0.8874, "step": 5170 }, { "epoch": 0.7060349535772802, "grad_norm": 6.225526809692383, "learning_rate": 2.100206694410695e-06, "loss": 0.9949, "step": 5171 }, { "epoch": 0.7061714909885308, "grad_norm": 6.467231750488281, "learning_rate": 2.0984056790392926e-06, "loss": 1.1249, "step": 5172 }, { "epoch": 0.7063080283997816, "grad_norm": 5.146481990814209, "learning_rate": 2.096605231121713e-06, "loss": 0.979, "step": 5173 }, { "epoch": 0.7064445658110322, "grad_norm": 5.8692708015441895, "learning_rate": 2.094805351010061e-06, "loss": 0.7378, "step": 5174 }, { "epoch": 0.706581103222283, "grad_norm": 7.716945648193359, "learning_rate": 2.0930060390563346e-06, "loss": 0.8989, "step": 5175 }, { "epoch": 0.7067176406335336, "grad_norm": 4.941687107086182, "learning_rate": 2.091207295612417e-06, "loss": 0.9459, "step": 5176 }, { "epoch": 0.7068541780447842, "grad_norm": 8.868212699890137, "learning_rate": 2.0894091210300805e-06, "loss": 1.0004, "step": 5177 }, { "epoch": 0.706990715456035, "grad_norm": 6.480724334716797, "learning_rate": 2.08761151566099e-06, "loss": 0.9984, "step": 5178 }, { "epoch": 0.7071272528672856, "grad_norm": 5.460892677307129, "learning_rate": 2.085814479856693e-06, "loss": 0.9506, "step": 5179 }, { "epoch": 0.7072637902785364, "grad_norm": 5.975342750549316, "learning_rate": 2.0840180139686333e-06, "loss": 0.926, "step": 5180 }, { "epoch": 0.707400327689787, "grad_norm": 12.728937149047852, "learning_rate": 2.0822221183481366e-06, "loss": 0.9787, "step": 5181 }, { "epoch": 0.7075368651010376, "grad_norm": 7.619952201843262, "learning_rate": 2.0804267933464197e-06, "loss": 0.9605, "step": 5182 }, { "epoch": 0.7076734025122884, "grad_norm": 6.065814018249512, "learning_rate": 2.078632039314585e-06, "loss": 0.8818, "step": 5183 }, { "epoch": 0.707809939923539, "grad_norm": 5.894173622131348, "learning_rate": 2.0768378566036294e-06, "loss": 1.0385, "step": 5184 }, { "epoch": 0.7079464773347898, "grad_norm": 4.61968469619751, "learning_rate": 2.075044245564434e-06, "loss": 0.7723, "step": 5185 }, { "epoch": 0.7080830147460404, "grad_norm": 7.6902689933776855, "learning_rate": 2.073251206547765e-06, "loss": 0.9336, "step": 5186 }, { "epoch": 0.708219552157291, "grad_norm": 6.6241559982299805, "learning_rate": 2.0714587399042847e-06, "loss": 0.8936, "step": 5187 }, { "epoch": 0.7083560895685418, "grad_norm": 5.827635288238525, "learning_rate": 2.0696668459845354e-06, "loss": 0.8559, "step": 5188 }, { "epoch": 0.7084926269797924, "grad_norm": 6.251188278198242, "learning_rate": 2.0678755251389547e-06, "loss": 0.8647, "step": 5189 }, { "epoch": 0.7086291643910432, "grad_norm": 6.757892608642578, "learning_rate": 2.0660847777178606e-06, "loss": 1.0318, "step": 5190 }, { "epoch": 0.7087657018022938, "grad_norm": 4.400653839111328, "learning_rate": 2.064294604071466e-06, "loss": 0.9834, "step": 5191 }, { "epoch": 0.7089022392135446, "grad_norm": 4.840196132659912, "learning_rate": 2.0625050045498664e-06, "loss": 0.9774, "step": 5192 }, { "epoch": 0.7090387766247952, "grad_norm": 7.559979438781738, "learning_rate": 2.0607159795030483e-06, "loss": 0.988, "step": 5193 }, { "epoch": 0.7091753140360458, "grad_norm": 5.481594085693359, "learning_rate": 2.0589275292808847e-06, "loss": 0.9155, "step": 5194 }, { "epoch": 0.7093118514472966, "grad_norm": 10.011117935180664, "learning_rate": 2.057139654233135e-06, "loss": 1.0249, "step": 5195 }, { "epoch": 0.7094483888585472, "grad_norm": 6.299411773681641, "learning_rate": 2.0553523547094473e-06, "loss": 1.0289, "step": 5196 }, { "epoch": 0.709584926269798, "grad_norm": 6.250232696533203, "learning_rate": 2.0535656310593555e-06, "loss": 0.9127, "step": 5197 }, { "epoch": 0.7097214636810486, "grad_norm": 6.639812469482422, "learning_rate": 2.0517794836322857e-06, "loss": 0.8972, "step": 5198 }, { "epoch": 0.7098580010922992, "grad_norm": 5.163991928100586, "learning_rate": 2.0499939127775447e-06, "loss": 0.8598, "step": 5199 }, { "epoch": 0.70999453850355, "grad_norm": 17.917701721191406, "learning_rate": 2.048208918844333e-06, "loss": 0.8872, "step": 5200 }, { "epoch": 0.7101310759148006, "grad_norm": 7.215757846832275, "learning_rate": 2.046424502181732e-06, "loss": 0.9949, "step": 5201 }, { "epoch": 0.7102676133260514, "grad_norm": 5.845452785491943, "learning_rate": 2.0446406631387166e-06, "loss": 0.9031, "step": 5202 }, { "epoch": 0.710404150737302, "grad_norm": 6.432675361633301, "learning_rate": 2.0428574020641422e-06, "loss": 0.8851, "step": 5203 }, { "epoch": 0.7105406881485528, "grad_norm": 5.474089622497559, "learning_rate": 2.041074719306757e-06, "loss": 0.946, "step": 5204 }, { "epoch": 0.7106772255598034, "grad_norm": 5.903942108154297, "learning_rate": 2.039292615215193e-06, "loss": 0.9842, "step": 5205 }, { "epoch": 0.710813762971054, "grad_norm": 5.381400108337402, "learning_rate": 2.0375110901379674e-06, "loss": 0.989, "step": 5206 }, { "epoch": 0.7109503003823048, "grad_norm": 10.477408409118652, "learning_rate": 2.0357301444234893e-06, "loss": 0.8426, "step": 5207 }, { "epoch": 0.7110868377935554, "grad_norm": 6.1019206047058105, "learning_rate": 2.03394977842005e-06, "loss": 0.8756, "step": 5208 }, { "epoch": 0.7112233752048062, "grad_norm": 5.217455863952637, "learning_rate": 2.03216999247583e-06, "loss": 0.8477, "step": 5209 }, { "epoch": 0.7113599126160568, "grad_norm": 5.063220977783203, "learning_rate": 2.030390786938892e-06, "loss": 0.9452, "step": 5210 }, { "epoch": 0.7114964500273074, "grad_norm": 6.794870376586914, "learning_rate": 2.0286121621571926e-06, "loss": 1.0005, "step": 5211 }, { "epoch": 0.7116329874385582, "grad_norm": 5.206624507904053, "learning_rate": 2.0268341184785674e-06, "loss": 0.9133, "step": 5212 }, { "epoch": 0.7117695248498088, "grad_norm": 7.476443767547607, "learning_rate": 2.0250566562507444e-06, "loss": 0.8404, "step": 5213 }, { "epoch": 0.7119060622610596, "grad_norm": 8.66901969909668, "learning_rate": 2.023279775821333e-06, "loss": 0.9406, "step": 5214 }, { "epoch": 0.7120425996723102, "grad_norm": 6.140585899353027, "learning_rate": 2.0215034775378336e-06, "loss": 0.9318, "step": 5215 }, { "epoch": 0.7121791370835608, "grad_norm": 5.157886981964111, "learning_rate": 2.0197277617476287e-06, "loss": 1.0127, "step": 5216 }, { "epoch": 0.7123156744948116, "grad_norm": 6.565325736999512, "learning_rate": 2.017952628797986e-06, "loss": 1.1365, "step": 5217 }, { "epoch": 0.7124522119060622, "grad_norm": 6.557471752166748, "learning_rate": 2.016178079036066e-06, "loss": 0.8857, "step": 5218 }, { "epoch": 0.712588749317313, "grad_norm": 4.998987197875977, "learning_rate": 2.0144041128089065e-06, "loss": 0.8561, "step": 5219 }, { "epoch": 0.7127252867285636, "grad_norm": 7.719844818115234, "learning_rate": 2.0126307304634383e-06, "loss": 0.8953, "step": 5220 }, { "epoch": 0.7128618241398144, "grad_norm": 5.942051410675049, "learning_rate": 2.010857932346475e-06, "loss": 0.9193, "step": 5221 }, { "epoch": 0.712998361551065, "grad_norm": 7.5037522315979, "learning_rate": 2.009085718804715e-06, "loss": 0.8398, "step": 5222 }, { "epoch": 0.7131348989623156, "grad_norm": 6.566201686859131, "learning_rate": 2.007314090184741e-06, "loss": 0.9313, "step": 5223 }, { "epoch": 0.7132714363735664, "grad_norm": 4.946251392364502, "learning_rate": 2.0055430468330283e-06, "loss": 1.0112, "step": 5224 }, { "epoch": 0.713407973784817, "grad_norm": 6.909111022949219, "learning_rate": 2.003772589095929e-06, "loss": 0.9991, "step": 5225 }, { "epoch": 0.7135445111960678, "grad_norm": 7.246105670928955, "learning_rate": 2.002002717319689e-06, "loss": 0.9903, "step": 5226 }, { "epoch": 0.7136810486073184, "grad_norm": 7.889798641204834, "learning_rate": 2.000233431850433e-06, "loss": 1.0469, "step": 5227 }, { "epoch": 0.713817586018569, "grad_norm": 4.601850509643555, "learning_rate": 1.998464733034172e-06, "loss": 0.9163, "step": 5228 }, { "epoch": 0.7139541234298198, "grad_norm": 5.543924808502197, "learning_rate": 1.996696621216807e-06, "loss": 1.0377, "step": 5229 }, { "epoch": 0.7140906608410704, "grad_norm": 6.582374572753906, "learning_rate": 1.9949290967441183e-06, "loss": 1.0439, "step": 5230 }, { "epoch": 0.7142271982523212, "grad_norm": 4.606156349182129, "learning_rate": 1.993162159961776e-06, "loss": 1.0032, "step": 5231 }, { "epoch": 0.7143637356635718, "grad_norm": 17.311059951782227, "learning_rate": 1.9913958112153326e-06, "loss": 0.9862, "step": 5232 }, { "epoch": 0.7145002730748226, "grad_norm": 12.256556510925293, "learning_rate": 1.9896300508502258e-06, "loss": 1.0159, "step": 5233 }, { "epoch": 0.7146368104860732, "grad_norm": 6.6112895011901855, "learning_rate": 1.9878648792117775e-06, "loss": 1.1471, "step": 5234 }, { "epoch": 0.7147733478973238, "grad_norm": 6.942371845245361, "learning_rate": 1.9861002966451987e-06, "loss": 1.0327, "step": 5235 }, { "epoch": 0.7149098853085746, "grad_norm": 12.47995662689209, "learning_rate": 1.98433630349558e-06, "loss": 0.8696, "step": 5236 }, { "epoch": 0.7150464227198252, "grad_norm": 6.9250335693359375, "learning_rate": 1.9825729001078974e-06, "loss": 1.0967, "step": 5237 }, { "epoch": 0.715182960131076, "grad_norm": 6.822649955749512, "learning_rate": 1.980810086827017e-06, "loss": 0.8955, "step": 5238 }, { "epoch": 0.7153194975423266, "grad_norm": 7.894099712371826, "learning_rate": 1.9790478639976813e-06, "loss": 0.8443, "step": 5239 }, { "epoch": 0.7154560349535772, "grad_norm": 6.942162036895752, "learning_rate": 1.977286231964525e-06, "loss": 0.815, "step": 5240 }, { "epoch": 0.715592572364828, "grad_norm": 7.521786212921143, "learning_rate": 1.9755251910720617e-06, "loss": 0.8499, "step": 5241 }, { "epoch": 0.7157291097760786, "grad_norm": 5.922510147094727, "learning_rate": 1.973764741664694e-06, "loss": 0.8597, "step": 5242 }, { "epoch": 0.7158656471873294, "grad_norm": 6.2369232177734375, "learning_rate": 1.9720048840867017e-06, "loss": 0.8376, "step": 5243 }, { "epoch": 0.71600218459858, "grad_norm": 6.135618209838867, "learning_rate": 1.9702456186822595e-06, "loss": 0.8536, "step": 5244 }, { "epoch": 0.7161387220098306, "grad_norm": 7.539278507232666, "learning_rate": 1.9684869457954163e-06, "loss": 1.0528, "step": 5245 }, { "epoch": 0.7162752594210814, "grad_norm": 5.484525680541992, "learning_rate": 1.9667288657701102e-06, "loss": 0.9646, "step": 5246 }, { "epoch": 0.716411796832332, "grad_norm": 5.291676998138428, "learning_rate": 1.9649713789501617e-06, "loss": 0.9298, "step": 5247 }, { "epoch": 0.7165483342435828, "grad_norm": 5.522354602813721, "learning_rate": 1.963214485679275e-06, "loss": 0.812, "step": 5248 }, { "epoch": 0.7166848716548334, "grad_norm": 9.140140533447266, "learning_rate": 1.9614581863010414e-06, "loss": 1.0298, "step": 5249 }, { "epoch": 0.7168214090660842, "grad_norm": 5.8655500411987305, "learning_rate": 1.959702481158931e-06, "loss": 0.8985, "step": 5250 }, { "epoch": 0.7169579464773348, "grad_norm": 13.053121566772461, "learning_rate": 1.9579473705963044e-06, "loss": 1.0257, "step": 5251 }, { "epoch": 0.7170944838885854, "grad_norm": 7.534231185913086, "learning_rate": 1.956192854956397e-06, "loss": 0.9891, "step": 5252 }, { "epoch": 0.7172310212998362, "grad_norm": 6.960418701171875, "learning_rate": 1.9544389345823367e-06, "loss": 0.8237, "step": 5253 }, { "epoch": 0.7173675587110868, "grad_norm": 5.419609546661377, "learning_rate": 1.9526856098171286e-06, "loss": 0.7598, "step": 5254 }, { "epoch": 0.7175040961223376, "grad_norm": 5.939371109008789, "learning_rate": 1.9509328810036664e-06, "loss": 0.9287, "step": 5255 }, { "epoch": 0.7176406335335882, "grad_norm": 8.050980567932129, "learning_rate": 1.949180748484723e-06, "loss": 0.8515, "step": 5256 }, { "epoch": 0.7177771709448388, "grad_norm": 6.752748489379883, "learning_rate": 1.9474292126029548e-06, "loss": 0.9926, "step": 5257 }, { "epoch": 0.7179137083560896, "grad_norm": 8.682927131652832, "learning_rate": 1.945678273700906e-06, "loss": 1.1161, "step": 5258 }, { "epoch": 0.7180502457673402, "grad_norm": 6.7312397956848145, "learning_rate": 1.943927932121e-06, "loss": 0.9974, "step": 5259 }, { "epoch": 0.718186783178591, "grad_norm": 6.841413497924805, "learning_rate": 1.9421781882055447e-06, "loss": 0.9675, "step": 5260 }, { "epoch": 0.7183233205898416, "grad_norm": 7.006195068359375, "learning_rate": 1.940429042296728e-06, "loss": 1.011, "step": 5261 }, { "epoch": 0.7184598580010922, "grad_norm": 5.793015003204346, "learning_rate": 1.9386804947366285e-06, "loss": 1.0853, "step": 5262 }, { "epoch": 0.718596395412343, "grad_norm": 6.448946952819824, "learning_rate": 1.936932545867199e-06, "loss": 0.928, "step": 5263 }, { "epoch": 0.7187329328235936, "grad_norm": 17.24244499206543, "learning_rate": 1.935185196030282e-06, "loss": 0.9521, "step": 5264 }, { "epoch": 0.7188694702348444, "grad_norm": 8.826805114746094, "learning_rate": 1.933438445567598e-06, "loss": 0.8467, "step": 5265 }, { "epoch": 0.719006007646095, "grad_norm": 6.128154754638672, "learning_rate": 1.9316922948207545e-06, "loss": 0.9462, "step": 5266 }, { "epoch": 0.7191425450573458, "grad_norm": 6.714038372039795, "learning_rate": 1.9299467441312394e-06, "loss": 0.9595, "step": 5267 }, { "epoch": 0.7192790824685964, "grad_norm": 6.561349868774414, "learning_rate": 1.9282017938404202e-06, "loss": 1.0108, "step": 5268 }, { "epoch": 0.719415619879847, "grad_norm": 5.888653755187988, "learning_rate": 1.926457444289554e-06, "loss": 0.8736, "step": 5269 }, { "epoch": 0.7195521572910978, "grad_norm": 6.109442234039307, "learning_rate": 1.9247136958197744e-06, "loss": 0.8443, "step": 5270 }, { "epoch": 0.7196886947023484, "grad_norm": 8.77908706665039, "learning_rate": 1.9229705487721014e-06, "loss": 0.7785, "step": 5271 }, { "epoch": 0.7198252321135992, "grad_norm": 8.009835243225098, "learning_rate": 1.9212280034874352e-06, "loss": 0.9113, "step": 5272 }, { "epoch": 0.7199617695248498, "grad_norm": 6.744925022125244, "learning_rate": 1.9194860603065584e-06, "loss": 0.9504, "step": 5273 }, { "epoch": 0.7200983069361004, "grad_norm": 6.041810512542725, "learning_rate": 1.9177447195701345e-06, "loss": 0.8184, "step": 5274 }, { "epoch": 0.7202348443473512, "grad_norm": 6.46185302734375, "learning_rate": 1.9160039816187143e-06, "loss": 0.8867, "step": 5275 }, { "epoch": 0.7203713817586018, "grad_norm": 8.025424003601074, "learning_rate": 1.9142638467927254e-06, "loss": 0.8986, "step": 5276 }, { "epoch": 0.7205079191698526, "grad_norm": 6.066869258880615, "learning_rate": 1.9125243154324786e-06, "loss": 0.9567, "step": 5277 }, { "epoch": 0.7206444565811032, "grad_norm": 7.681621074676514, "learning_rate": 1.9107853878781695e-06, "loss": 0.8383, "step": 5278 }, { "epoch": 0.720780993992354, "grad_norm": 6.259346008300781, "learning_rate": 1.909047064469871e-06, "loss": 1.0545, "step": 5279 }, { "epoch": 0.7209175314036046, "grad_norm": 8.45454216003418, "learning_rate": 1.9073093455475444e-06, "loss": 0.9513, "step": 5280 }, { "epoch": 0.7210540688148552, "grad_norm": 5.808821201324463, "learning_rate": 1.9055722314510238e-06, "loss": 0.8996, "step": 5281 }, { "epoch": 0.721190606226106, "grad_norm": 5.197171211242676, "learning_rate": 1.9038357225200355e-06, "loss": 0.9136, "step": 5282 }, { "epoch": 0.7213271436373566, "grad_norm": 6.1996259689331055, "learning_rate": 1.9020998190941763e-06, "loss": 1.0661, "step": 5283 }, { "epoch": 0.7214636810486074, "grad_norm": 7.79416561126709, "learning_rate": 1.9003645215129356e-06, "loss": 0.93, "step": 5284 }, { "epoch": 0.721600218459858, "grad_norm": 7.265057563781738, "learning_rate": 1.8986298301156765e-06, "loss": 1.0532, "step": 5285 }, { "epoch": 0.7217367558711086, "grad_norm": 7.5267815589904785, "learning_rate": 1.8968957452416453e-06, "loss": 0.9421, "step": 5286 }, { "epoch": 0.7218732932823594, "grad_norm": 6.472428321838379, "learning_rate": 1.8951622672299719e-06, "loss": 0.7924, "step": 5287 }, { "epoch": 0.72200983069361, "grad_norm": 6.184980392456055, "learning_rate": 1.8934293964196632e-06, "loss": 1.0613, "step": 5288 }, { "epoch": 0.7221463681048608, "grad_norm": 6.140648365020752, "learning_rate": 1.8916971331496143e-06, "loss": 0.8569, "step": 5289 }, { "epoch": 0.7222829055161114, "grad_norm": 7.065980911254883, "learning_rate": 1.8899654777585935e-06, "loss": 0.8373, "step": 5290 }, { "epoch": 0.722419442927362, "grad_norm": 7.267586708068848, "learning_rate": 1.8882344305852574e-06, "loss": 0.9891, "step": 5291 }, { "epoch": 0.7225559803386128, "grad_norm": 39.853206634521484, "learning_rate": 1.8865039919681377e-06, "loss": 0.9379, "step": 5292 }, { "epoch": 0.7226925177498634, "grad_norm": 5.487471580505371, "learning_rate": 1.8847741622456523e-06, "loss": 0.9549, "step": 5293 }, { "epoch": 0.7228290551611142, "grad_norm": 6.701192855834961, "learning_rate": 1.8830449417560943e-06, "loss": 0.7899, "step": 5294 }, { "epoch": 0.7229655925723648, "grad_norm": 6.972287654876709, "learning_rate": 1.8813163308376443e-06, "loss": 0.9715, "step": 5295 }, { "epoch": 0.7231021299836156, "grad_norm": 8.019269943237305, "learning_rate": 1.8795883298283585e-06, "loss": 1.0474, "step": 5296 }, { "epoch": 0.7232386673948662, "grad_norm": 5.822124481201172, "learning_rate": 1.8778609390661756e-06, "loss": 1.0228, "step": 5297 }, { "epoch": 0.7233752048061168, "grad_norm": 9.584644317626953, "learning_rate": 1.8761341588889125e-06, "loss": 0.8947, "step": 5298 }, { "epoch": 0.7235117422173676, "grad_norm": 8.571166038513184, "learning_rate": 1.8744079896342738e-06, "loss": 1.0833, "step": 5299 }, { "epoch": 0.7236482796286182, "grad_norm": 6.688860893249512, "learning_rate": 1.8726824316398372e-06, "loss": 1.105, "step": 5300 }, { "epoch": 0.723784817039869, "grad_norm": 6.248779296875, "learning_rate": 1.8709574852430618e-06, "loss": 1.0179, "step": 5301 }, { "epoch": 0.7239213544511196, "grad_norm": 5.72178840637207, "learning_rate": 1.8692331507812928e-06, "loss": 0.9581, "step": 5302 }, { "epoch": 0.7240578918623702, "grad_norm": 6.342770576477051, "learning_rate": 1.8675094285917482e-06, "loss": 0.7921, "step": 5303 }, { "epoch": 0.724194429273621, "grad_norm": 4.350861072540283, "learning_rate": 1.8657863190115328e-06, "loss": 0.8173, "step": 5304 }, { "epoch": 0.7243309666848716, "grad_norm": 5.8906073570251465, "learning_rate": 1.8640638223776258e-06, "loss": 0.9894, "step": 5305 }, { "epoch": 0.7244675040961224, "grad_norm": 5.685700416564941, "learning_rate": 1.8623419390268922e-06, "loss": 0.9902, "step": 5306 }, { "epoch": 0.724604041507373, "grad_norm": 6.360250949859619, "learning_rate": 1.8606206692960727e-06, "loss": 0.9177, "step": 5307 }, { "epoch": 0.7247405789186238, "grad_norm": 5.134634971618652, "learning_rate": 1.8589000135217882e-06, "loss": 1.0262, "step": 5308 }, { "epoch": 0.7248771163298744, "grad_norm": 6.060389995574951, "learning_rate": 1.8571799720405436e-06, "loss": 0.9102, "step": 5309 }, { "epoch": 0.725013653741125, "grad_norm": 6.5071120262146, "learning_rate": 1.8554605451887198e-06, "loss": 1.0753, "step": 5310 }, { "epoch": 0.7251501911523758, "grad_norm": 6.756542682647705, "learning_rate": 1.8537417333025782e-06, "loss": 0.9857, "step": 5311 }, { "epoch": 0.7252867285636264, "grad_norm": 5.792132377624512, "learning_rate": 1.8520235367182588e-06, "loss": 0.8751, "step": 5312 }, { "epoch": 0.7254232659748772, "grad_norm": 6.658812046051025, "learning_rate": 1.8503059557717861e-06, "loss": 0.9458, "step": 5313 }, { "epoch": 0.7255598033861278, "grad_norm": 7.493373870849609, "learning_rate": 1.848588990799058e-06, "loss": 0.9514, "step": 5314 }, { "epoch": 0.7256963407973784, "grad_norm": 5.752963542938232, "learning_rate": 1.8468726421358574e-06, "loss": 0.9037, "step": 5315 }, { "epoch": 0.7258328782086292, "grad_norm": 5.3337554931640625, "learning_rate": 1.845156910117843e-06, "loss": 0.9588, "step": 5316 }, { "epoch": 0.7259694156198798, "grad_norm": 7.538841247558594, "learning_rate": 1.8434417950805523e-06, "loss": 0.9874, "step": 5317 }, { "epoch": 0.7261059530311306, "grad_norm": 6.285089492797852, "learning_rate": 1.841727297359407e-06, "loss": 0.9036, "step": 5318 }, { "epoch": 0.7262424904423812, "grad_norm": 13.967082977294922, "learning_rate": 1.8400134172897016e-06, "loss": 0.9136, "step": 5319 }, { "epoch": 0.7263790278536318, "grad_norm": 7.1436028480529785, "learning_rate": 1.8383001552066166e-06, "loss": 0.9651, "step": 5320 }, { "epoch": 0.7265155652648826, "grad_norm": 5.116337299346924, "learning_rate": 1.8365875114452048e-06, "loss": 0.9594, "step": 5321 }, { "epoch": 0.7266521026761332, "grad_norm": 6.546178817749023, "learning_rate": 1.8348754863404046e-06, "loss": 0.9511, "step": 5322 }, { "epoch": 0.726788640087384, "grad_norm": 6.69089412689209, "learning_rate": 1.833164080227029e-06, "loss": 0.8463, "step": 5323 }, { "epoch": 0.7269251774986346, "grad_norm": 6.305819988250732, "learning_rate": 1.831453293439771e-06, "loss": 0.8509, "step": 5324 }, { "epoch": 0.7270617149098854, "grad_norm": 5.5850419998168945, "learning_rate": 1.829743126313201e-06, "loss": 0.8631, "step": 5325 }, { "epoch": 0.727198252321136, "grad_norm": 5.635329723358154, "learning_rate": 1.8280335791817733e-06, "loss": 0.9227, "step": 5326 }, { "epoch": 0.7273347897323866, "grad_norm": 19.375381469726562, "learning_rate": 1.8263246523798162e-06, "loss": 0.975, "step": 5327 }, { "epoch": 0.7274713271436374, "grad_norm": 4.2479753494262695, "learning_rate": 1.8246163462415355e-06, "loss": 0.7736, "step": 5328 }, { "epoch": 0.727607864554888, "grad_norm": 6.322738170623779, "learning_rate": 1.8229086611010228e-06, "loss": 0.8668, "step": 5329 }, { "epoch": 0.7277444019661388, "grad_norm": 5.6758294105529785, "learning_rate": 1.8212015972922393e-06, "loss": 0.8446, "step": 5330 }, { "epoch": 0.7278809393773894, "grad_norm": 4.833553791046143, "learning_rate": 1.819495155149032e-06, "loss": 1.0198, "step": 5331 }, { "epoch": 0.72801747678864, "grad_norm": 7.363951683044434, "learning_rate": 1.8177893350051213e-06, "loss": 0.9946, "step": 5332 }, { "epoch": 0.7281540141998908, "grad_norm": 5.2222514152526855, "learning_rate": 1.8160841371941101e-06, "loss": 0.9135, "step": 5333 }, { "epoch": 0.7282905516111414, "grad_norm": 5.813876628875732, "learning_rate": 1.8143795620494753e-06, "loss": 0.9671, "step": 5334 }, { "epoch": 0.7284270890223922, "grad_norm": 5.304929733276367, "learning_rate": 1.8126756099045767e-06, "loss": 1.0837, "step": 5335 }, { "epoch": 0.7285636264336428, "grad_norm": 6.087500095367432, "learning_rate": 1.8109722810926483e-06, "loss": 0.9696, "step": 5336 }, { "epoch": 0.7287001638448936, "grad_norm": 6.124519348144531, "learning_rate": 1.8092695759468037e-06, "loss": 0.8704, "step": 5337 }, { "epoch": 0.7288367012561442, "grad_norm": 7.362490653991699, "learning_rate": 1.8075674948000344e-06, "loss": 0.8176, "step": 5338 }, { "epoch": 0.7289732386673948, "grad_norm": 6.249159336090088, "learning_rate": 1.8058660379852082e-06, "loss": 0.9005, "step": 5339 }, { "epoch": 0.7291097760786456, "grad_norm": 5.465653896331787, "learning_rate": 1.8041652058350768e-06, "loss": 0.9512, "step": 5340 }, { "epoch": 0.7292463134898962, "grad_norm": 6.763309478759766, "learning_rate": 1.8024649986822607e-06, "loss": 1.024, "step": 5341 }, { "epoch": 0.729382850901147, "grad_norm": 5.799649238586426, "learning_rate": 1.8007654168592676e-06, "loss": 0.8854, "step": 5342 }, { "epoch": 0.7295193883123976, "grad_norm": 5.130634784698486, "learning_rate": 1.7990664606984743e-06, "loss": 0.9542, "step": 5343 }, { "epoch": 0.7296559257236482, "grad_norm": 6.877966403961182, "learning_rate": 1.7973681305321427e-06, "loss": 0.9237, "step": 5344 }, { "epoch": 0.729792463134899, "grad_norm": 12.299071311950684, "learning_rate": 1.7956704266924057e-06, "loss": 0.9371, "step": 5345 }, { "epoch": 0.7299290005461496, "grad_norm": 12.361834526062012, "learning_rate": 1.79397334951128e-06, "loss": 0.9263, "step": 5346 }, { "epoch": 0.7300655379574004, "grad_norm": 6.423850059509277, "learning_rate": 1.792276899320654e-06, "loss": 1.0555, "step": 5347 }, { "epoch": 0.730202075368651, "grad_norm": 6.55225133895874, "learning_rate": 1.7905810764522963e-06, "loss": 0.9475, "step": 5348 }, { "epoch": 0.7303386127799016, "grad_norm": 6.143751621246338, "learning_rate": 1.788885881237854e-06, "loss": 0.8465, "step": 5349 }, { "epoch": 0.7304751501911524, "grad_norm": 7.235813140869141, "learning_rate": 1.7871913140088499e-06, "loss": 0.9942, "step": 5350 }, { "epoch": 0.730611687602403, "grad_norm": 5.541832447052002, "learning_rate": 1.7854973750966825e-06, "loss": 0.9657, "step": 5351 }, { "epoch": 0.7307482250136538, "grad_norm": 11.684595108032227, "learning_rate": 1.7838040648326288e-06, "loss": 0.9411, "step": 5352 }, { "epoch": 0.7308847624249044, "grad_norm": 9.871325492858887, "learning_rate": 1.7821113835478454e-06, "loss": 1.0456, "step": 5353 }, { "epoch": 0.7310212998361552, "grad_norm": 7.123288154602051, "learning_rate": 1.780419331573361e-06, "loss": 0.8944, "step": 5354 }, { "epoch": 0.7311578372474058, "grad_norm": 6.041117191314697, "learning_rate": 1.7787279092400865e-06, "loss": 1.0228, "step": 5355 }, { "epoch": 0.7312943746586564, "grad_norm": 6.2357401847839355, "learning_rate": 1.7770371168788042e-06, "loss": 0.9186, "step": 5356 }, { "epoch": 0.7314309120699072, "grad_norm": 4.989100933074951, "learning_rate": 1.775346954820179e-06, "loss": 0.8931, "step": 5357 }, { "epoch": 0.7315674494811578, "grad_norm": 7.079780101776123, "learning_rate": 1.773657423394748e-06, "loss": 0.8545, "step": 5358 }, { "epoch": 0.7317039868924086, "grad_norm": 5.473432540893555, "learning_rate": 1.7719685229329243e-06, "loss": 0.8941, "step": 5359 }, { "epoch": 0.7318405243036592, "grad_norm": 6.473773002624512, "learning_rate": 1.7702802537650038e-06, "loss": 1.0101, "step": 5360 }, { "epoch": 0.7319770617149098, "grad_norm": 5.462620735168457, "learning_rate": 1.7685926162211525e-06, "loss": 1.0935, "step": 5361 }, { "epoch": 0.7321135991261606, "grad_norm": 6.655271053314209, "learning_rate": 1.7669056106314165e-06, "loss": 0.9045, "step": 5362 }, { "epoch": 0.7322501365374112, "grad_norm": 6.412947177886963, "learning_rate": 1.7652192373257137e-06, "loss": 0.9107, "step": 5363 }, { "epoch": 0.732386673948662, "grad_norm": 6.716574192047119, "learning_rate": 1.7635334966338463e-06, "loss": 1.0147, "step": 5364 }, { "epoch": 0.7325232113599126, "grad_norm": 7.795173168182373, "learning_rate": 1.7618483888854848e-06, "loss": 0.8018, "step": 5365 }, { "epoch": 0.7326597487711632, "grad_norm": 15.10838794708252, "learning_rate": 1.7601639144101823e-06, "loss": 1.005, "step": 5366 }, { "epoch": 0.732796286182414, "grad_norm": 5.928388595581055, "learning_rate": 1.7584800735373636e-06, "loss": 1.0477, "step": 5367 }, { "epoch": 0.7329328235936646, "grad_norm": 6.927799224853516, "learning_rate": 1.7567968665963297e-06, "loss": 0.9307, "step": 5368 }, { "epoch": 0.7330693610049154, "grad_norm": 5.946130275726318, "learning_rate": 1.7551142939162618e-06, "loss": 0.8939, "step": 5369 }, { "epoch": 0.733205898416166, "grad_norm": 6.741263389587402, "learning_rate": 1.7534323558262118e-06, "loss": 0.8132, "step": 5370 }, { "epoch": 0.7333424358274168, "grad_norm": 6.377094268798828, "learning_rate": 1.7517510526551118e-06, "loss": 1.0761, "step": 5371 }, { "epoch": 0.7334789732386674, "grad_norm": 5.458950519561768, "learning_rate": 1.7500703847317663e-06, "loss": 0.9343, "step": 5372 }, { "epoch": 0.733615510649918, "grad_norm": 5.1593098640441895, "learning_rate": 1.7483903523848594e-06, "loss": 0.8971, "step": 5373 }, { "epoch": 0.7337520480611688, "grad_norm": 8.184839248657227, "learning_rate": 1.746710955942947e-06, "loss": 0.8668, "step": 5374 }, { "epoch": 0.7338885854724194, "grad_norm": 5.880495548248291, "learning_rate": 1.745032195734463e-06, "loss": 0.8083, "step": 5375 }, { "epoch": 0.7340251228836702, "grad_norm": 5.04026985168457, "learning_rate": 1.743354072087714e-06, "loss": 0.9215, "step": 5376 }, { "epoch": 0.7341616602949208, "grad_norm": 5.316815376281738, "learning_rate": 1.7416765853308876e-06, "loss": 0.9367, "step": 5377 }, { "epoch": 0.7342981977061714, "grad_norm": 5.656385898590088, "learning_rate": 1.7399997357920424e-06, "loss": 0.9063, "step": 5378 }, { "epoch": 0.7344347351174222, "grad_norm": 10.118496894836426, "learning_rate": 1.7383235237991108e-06, "loss": 0.8137, "step": 5379 }, { "epoch": 0.7345712725286728, "grad_norm": 7.162223815917969, "learning_rate": 1.7366479496799076e-06, "loss": 0.7838, "step": 5380 }, { "epoch": 0.7347078099399236, "grad_norm": 7.625694751739502, "learning_rate": 1.734973013762115e-06, "loss": 0.93, "step": 5381 }, { "epoch": 0.7348443473511742, "grad_norm": 6.036022186279297, "learning_rate": 1.7332987163732967e-06, "loss": 0.9659, "step": 5382 }, { "epoch": 0.734980884762425, "grad_norm": 6.9776387214660645, "learning_rate": 1.731625057840886e-06, "loss": 1.1259, "step": 5383 }, { "epoch": 0.7351174221736756, "grad_norm": 5.606903076171875, "learning_rate": 1.7299520384921965e-06, "loss": 0.9881, "step": 5384 }, { "epoch": 0.7352539595849262, "grad_norm": 4.98411750793457, "learning_rate": 1.7282796586544115e-06, "loss": 1.0344, "step": 5385 }, { "epoch": 0.735390496996177, "grad_norm": 6.040095329284668, "learning_rate": 1.7266079186545958e-06, "loss": 0.8555, "step": 5386 }, { "epoch": 0.7355270344074276, "grad_norm": 5.767264366149902, "learning_rate": 1.7249368188196824e-06, "loss": 0.8369, "step": 5387 }, { "epoch": 0.7356635718186784, "grad_norm": 9.282707214355469, "learning_rate": 1.723266359476483e-06, "loss": 0.9851, "step": 5388 }, { "epoch": 0.735800109229929, "grad_norm": 5.685998439788818, "learning_rate": 1.721596540951682e-06, "loss": 0.7926, "step": 5389 }, { "epoch": 0.7359366466411796, "grad_norm": 8.69946002960205, "learning_rate": 1.7199273635718394e-06, "loss": 1.0525, "step": 5390 }, { "epoch": 0.7360731840524304, "grad_norm": 7.076837539672852, "learning_rate": 1.7182588276633915e-06, "loss": 0.8575, "step": 5391 }, { "epoch": 0.736209721463681, "grad_norm": 4.9631781578063965, "learning_rate": 1.7165909335526454e-06, "loss": 0.9597, "step": 5392 }, { "epoch": 0.7363462588749318, "grad_norm": 6.022247314453125, "learning_rate": 1.7149236815657876e-06, "loss": 0.7956, "step": 5393 }, { "epoch": 0.7364827962861824, "grad_norm": 7.367210388183594, "learning_rate": 1.7132570720288733e-06, "loss": 0.8952, "step": 5394 }, { "epoch": 0.736619333697433, "grad_norm": 5.441461086273193, "learning_rate": 1.711591105267838e-06, "loss": 0.9123, "step": 5395 }, { "epoch": 0.7367558711086838, "grad_norm": 6.245781898498535, "learning_rate": 1.7099257816084851e-06, "loss": 0.9246, "step": 5396 }, { "epoch": 0.7368924085199344, "grad_norm": 8.09118366241455, "learning_rate": 1.7082611013764998e-06, "loss": 0.9266, "step": 5397 }, { "epoch": 0.7370289459311852, "grad_norm": 9.240830421447754, "learning_rate": 1.7065970648974344e-06, "loss": 0.926, "step": 5398 }, { "epoch": 0.7371654833424358, "grad_norm": 7.7893548011779785, "learning_rate": 1.704933672496718e-06, "loss": 1.0273, "step": 5399 }, { "epoch": 0.7373020207536866, "grad_norm": 5.396602630615234, "learning_rate": 1.7032709244996559e-06, "loss": 0.9231, "step": 5400 }, { "epoch": 0.7374385581649372, "grad_norm": 5.280228614807129, "learning_rate": 1.7016088212314247e-06, "loss": 0.9924, "step": 5401 }, { "epoch": 0.7375750955761878, "grad_norm": 5.628167629241943, "learning_rate": 1.699947363017075e-06, "loss": 0.9221, "step": 5402 }, { "epoch": 0.7377116329874386, "grad_norm": 6.500683307647705, "learning_rate": 1.6982865501815304e-06, "loss": 1.0441, "step": 5403 }, { "epoch": 0.7378481703986892, "grad_norm": 5.2245192527771, "learning_rate": 1.6966263830495939e-06, "loss": 0.8912, "step": 5404 }, { "epoch": 0.73798470780994, "grad_norm": 6.34283447265625, "learning_rate": 1.6949668619459336e-06, "loss": 1.0079, "step": 5405 }, { "epoch": 0.7381212452211906, "grad_norm": 4.4289231300354, "learning_rate": 1.6933079871950997e-06, "loss": 0.9481, "step": 5406 }, { "epoch": 0.7382577826324412, "grad_norm": 6.599897384643555, "learning_rate": 1.691649759121511e-06, "loss": 0.8778, "step": 5407 }, { "epoch": 0.738394320043692, "grad_norm": 5.126126289367676, "learning_rate": 1.6899921780494577e-06, "loss": 1.0327, "step": 5408 }, { "epoch": 0.7385308574549426, "grad_norm": 6.943210124969482, "learning_rate": 1.688335244303111e-06, "loss": 0.9486, "step": 5409 }, { "epoch": 0.7386673948661934, "grad_norm": 6.362957954406738, "learning_rate": 1.686678958206508e-06, "loss": 0.857, "step": 5410 }, { "epoch": 0.738803932277444, "grad_norm": 4.855505466461182, "learning_rate": 1.6850233200835654e-06, "loss": 0.9588, "step": 5411 }, { "epoch": 0.7389404696886948, "grad_norm": 6.547317028045654, "learning_rate": 1.6833683302580661e-06, "loss": 0.9428, "step": 5412 }, { "epoch": 0.7390770070999454, "grad_norm": 9.201554298400879, "learning_rate": 1.6817139890536743e-06, "loss": 1.1426, "step": 5413 }, { "epoch": 0.739213544511196, "grad_norm": 9.072770118713379, "learning_rate": 1.6800602967939216e-06, "loss": 0.9227, "step": 5414 }, { "epoch": 0.7393500819224468, "grad_norm": 6.972456932067871, "learning_rate": 1.678407253802214e-06, "loss": 0.9784, "step": 5415 }, { "epoch": 0.7394866193336974, "grad_norm": 5.128940582275391, "learning_rate": 1.6767548604018292e-06, "loss": 0.8461, "step": 5416 }, { "epoch": 0.7396231567449482, "grad_norm": 6.411998271942139, "learning_rate": 1.6751031169159227e-06, "loss": 0.9961, "step": 5417 }, { "epoch": 0.7397596941561988, "grad_norm": 6.311262607574463, "learning_rate": 1.673452023667519e-06, "loss": 0.9968, "step": 5418 }, { "epoch": 0.7398962315674494, "grad_norm": 6.738217830657959, "learning_rate": 1.671801580979513e-06, "loss": 1.0101, "step": 5419 }, { "epoch": 0.7400327689787002, "grad_norm": 19.708864212036133, "learning_rate": 1.6701517891746805e-06, "loss": 0.9264, "step": 5420 }, { "epoch": 0.7401693063899508, "grad_norm": 5.952019691467285, "learning_rate": 1.66850264857566e-06, "loss": 0.9418, "step": 5421 }, { "epoch": 0.7403058438012016, "grad_norm": 6.49573278427124, "learning_rate": 1.6668541595049726e-06, "loss": 0.808, "step": 5422 }, { "epoch": 0.7404423812124522, "grad_norm": 24.68328285217285, "learning_rate": 1.6652063222850029e-06, "loss": 1.0816, "step": 5423 }, { "epoch": 0.7405789186237028, "grad_norm": 7.27242374420166, "learning_rate": 1.6635591372380156e-06, "loss": 1.0958, "step": 5424 }, { "epoch": 0.7407154560349536, "grad_norm": 6.736164093017578, "learning_rate": 1.6619126046861428e-06, "loss": 0.9492, "step": 5425 }, { "epoch": 0.7408519934462042, "grad_norm": 9.06712532043457, "learning_rate": 1.6602667249513904e-06, "loss": 0.8473, "step": 5426 }, { "epoch": 0.740988530857455, "grad_norm": 8.262860298156738, "learning_rate": 1.6586214983556358e-06, "loss": 0.9991, "step": 5427 }, { "epoch": 0.7411250682687056, "grad_norm": 5.3303632736206055, "learning_rate": 1.656976925220633e-06, "loss": 1.1455, "step": 5428 }, { "epoch": 0.7412616056799564, "grad_norm": 7.613128662109375, "learning_rate": 1.6553330058680028e-06, "loss": 1.005, "step": 5429 }, { "epoch": 0.741398143091207, "grad_norm": 5.591047286987305, "learning_rate": 1.6536897406192387e-06, "loss": 0.8938, "step": 5430 }, { "epoch": 0.7415346805024576, "grad_norm": 5.6875691413879395, "learning_rate": 1.6520471297957114e-06, "loss": 0.9393, "step": 5431 }, { "epoch": 0.7416712179137084, "grad_norm": 7.427462577819824, "learning_rate": 1.6504051737186572e-06, "loss": 0.8586, "step": 5432 }, { "epoch": 0.741807755324959, "grad_norm": 6.886147499084473, "learning_rate": 1.6487638727091892e-06, "loss": 0.9924, "step": 5433 }, { "epoch": 0.7419442927362098, "grad_norm": 10.716341018676758, "learning_rate": 1.6471232270882887e-06, "loss": 0.8895, "step": 5434 }, { "epoch": 0.7420808301474604, "grad_norm": 7.350010871887207, "learning_rate": 1.645483237176813e-06, "loss": 1.0215, "step": 5435 }, { "epoch": 0.742217367558711, "grad_norm": 6.856513500213623, "learning_rate": 1.6438439032954857e-06, "loss": 0.9467, "step": 5436 }, { "epoch": 0.7423539049699618, "grad_norm": 7.694068908691406, "learning_rate": 1.642205225764908e-06, "loss": 0.9418, "step": 5437 }, { "epoch": 0.7424904423812124, "grad_norm": 6.223573207855225, "learning_rate": 1.6405672049055482e-06, "loss": 1.0389, "step": 5438 }, { "epoch": 0.7426269797924632, "grad_norm": 5.833287239074707, "learning_rate": 1.6389298410377485e-06, "loss": 1.0939, "step": 5439 }, { "epoch": 0.7427635172037138, "grad_norm": 7.12050724029541, "learning_rate": 1.6372931344817217e-06, "loss": 1.0221, "step": 5440 }, { "epoch": 0.7429000546149646, "grad_norm": 9.198968887329102, "learning_rate": 1.6356570855575505e-06, "loss": 0.8973, "step": 5441 }, { "epoch": 0.7430365920262152, "grad_norm": 5.852164268493652, "learning_rate": 1.6340216945851944e-06, "loss": 1.0215, "step": 5442 }, { "epoch": 0.7431731294374658, "grad_norm": 7.947495937347412, "learning_rate": 1.6323869618844767e-06, "loss": 0.9772, "step": 5443 }, { "epoch": 0.7433096668487166, "grad_norm": 7.68768310546875, "learning_rate": 1.6307528877751e-06, "loss": 1.0225, "step": 5444 }, { "epoch": 0.7434462042599672, "grad_norm": 8.58976936340332, "learning_rate": 1.6291194725766297e-06, "loss": 0.8896, "step": 5445 }, { "epoch": 0.743582741671218, "grad_norm": 8.626755714416504, "learning_rate": 1.6274867166085106e-06, "loss": 0.9565, "step": 5446 }, { "epoch": 0.7437192790824686, "grad_norm": 10.464741706848145, "learning_rate": 1.6258546201900516e-06, "loss": 1.13, "step": 5447 }, { "epoch": 0.7438558164937192, "grad_norm": 10.810221672058105, "learning_rate": 1.6242231836404382e-06, "loss": 0.8891, "step": 5448 }, { "epoch": 0.74399235390497, "grad_norm": 6.1493353843688965, "learning_rate": 1.622592407278723e-06, "loss": 0.9317, "step": 5449 }, { "epoch": 0.7441288913162206, "grad_norm": 6.855332851409912, "learning_rate": 1.6209622914238294e-06, "loss": 0.8733, "step": 5450 }, { "epoch": 0.7442654287274714, "grad_norm": 5.346677303314209, "learning_rate": 1.6193328363945554e-06, "loss": 0.9404, "step": 5451 }, { "epoch": 0.744401966138722, "grad_norm": 5.85287618637085, "learning_rate": 1.6177040425095664e-06, "loss": 0.9406, "step": 5452 }, { "epoch": 0.7445385035499726, "grad_norm": 6.8572916984558105, "learning_rate": 1.6160759100873991e-06, "loss": 0.88, "step": 5453 }, { "epoch": 0.7446750409612234, "grad_norm": 8.925386428833008, "learning_rate": 1.6144484394464599e-06, "loss": 0.8469, "step": 5454 }, { "epoch": 0.744811578372474, "grad_norm": 7.370643615722656, "learning_rate": 1.6128216309050305e-06, "loss": 0.9999, "step": 5455 }, { "epoch": 0.7449481157837248, "grad_norm": 6.756781578063965, "learning_rate": 1.611195484781256e-06, "loss": 0.9587, "step": 5456 }, { "epoch": 0.7450846531949754, "grad_norm": 11.431467056274414, "learning_rate": 1.6095700013931592e-06, "loss": 0.8155, "step": 5457 }, { "epoch": 0.7452211906062262, "grad_norm": 8.235432624816895, "learning_rate": 1.6079451810586278e-06, "loss": 1.0925, "step": 5458 }, { "epoch": 0.7453577280174768, "grad_norm": 10.884265899658203, "learning_rate": 1.6063210240954202e-06, "loss": 0.9665, "step": 5459 }, { "epoch": 0.7454942654287274, "grad_norm": 6.388981342315674, "learning_rate": 1.6046975308211699e-06, "loss": 0.9314, "step": 5460 }, { "epoch": 0.7456308028399782, "grad_norm": 6.471247673034668, "learning_rate": 1.6030747015533742e-06, "loss": 0.9562, "step": 5461 }, { "epoch": 0.7457673402512288, "grad_norm": 4.95382022857666, "learning_rate": 1.601452536609407e-06, "loss": 0.9971, "step": 5462 }, { "epoch": 0.7459038776624796, "grad_norm": 5.611825942993164, "learning_rate": 1.599831036306505e-06, "loss": 0.9092, "step": 5463 }, { "epoch": 0.7460404150737302, "grad_norm": 8.237981796264648, "learning_rate": 1.5982102009617834e-06, "loss": 0.9552, "step": 5464 }, { "epoch": 0.7461769524849808, "grad_norm": 6.931246757507324, "learning_rate": 1.5965900308922206e-06, "loss": 0.8803, "step": 5465 }, { "epoch": 0.7463134898962316, "grad_norm": 6.057047367095947, "learning_rate": 1.5949705264146669e-06, "loss": 0.7462, "step": 5466 }, { "epoch": 0.7464500273074822, "grad_norm": 7.7313055992126465, "learning_rate": 1.593351687845841e-06, "loss": 1.0506, "step": 5467 }, { "epoch": 0.746586564718733, "grad_norm": 6.695274829864502, "learning_rate": 1.5917335155023368e-06, "loss": 0.9378, "step": 5468 }, { "epoch": 0.7467231021299836, "grad_norm": 6.946703910827637, "learning_rate": 1.5901160097006118e-06, "loss": 0.7384, "step": 5469 }, { "epoch": 0.7468596395412342, "grad_norm": 7.054993629455566, "learning_rate": 1.5884991707569947e-06, "loss": 1.0254, "step": 5470 }, { "epoch": 0.746996176952485, "grad_norm": 6.198383331298828, "learning_rate": 1.5868829989876865e-06, "loss": 0.8798, "step": 5471 }, { "epoch": 0.7471327143637356, "grad_norm": 10.783284187316895, "learning_rate": 1.5852674947087543e-06, "loss": 1.0294, "step": 5472 }, { "epoch": 0.7472692517749864, "grad_norm": 6.4701247215271, "learning_rate": 1.5836526582361384e-06, "loss": 1.035, "step": 5473 }, { "epoch": 0.747405789186237, "grad_norm": 6.839522838592529, "learning_rate": 1.5820384898856433e-06, "loss": 1.0107, "step": 5474 }, { "epoch": 0.7475423265974878, "grad_norm": 7.446542739868164, "learning_rate": 1.5804249899729485e-06, "loss": 0.9391, "step": 5475 }, { "epoch": 0.7476788640087384, "grad_norm": 5.719664096832275, "learning_rate": 1.5788121588135975e-06, "loss": 0.8811, "step": 5476 }, { "epoch": 0.747815401419989, "grad_norm": 19.459495544433594, "learning_rate": 1.5771999967230084e-06, "loss": 1.0256, "step": 5477 }, { "epoch": 0.7479519388312398, "grad_norm": 6.9660258293151855, "learning_rate": 1.5755885040164642e-06, "loss": 0.9628, "step": 5478 }, { "epoch": 0.7480884762424904, "grad_norm": 5.144484519958496, "learning_rate": 1.5739776810091184e-06, "loss": 0.8102, "step": 5479 }, { "epoch": 0.7482250136537412, "grad_norm": 6.520107746124268, "learning_rate": 1.5723675280159934e-06, "loss": 0.9244, "step": 5480 }, { "epoch": 0.7483615510649918, "grad_norm": 7.2037353515625, "learning_rate": 1.5707580453519795e-06, "loss": 1.0556, "step": 5481 }, { "epoch": 0.7484980884762424, "grad_norm": 6.575085163116455, "learning_rate": 1.5691492333318403e-06, "loss": 0.855, "step": 5482 }, { "epoch": 0.7486346258874932, "grad_norm": 7.262720584869385, "learning_rate": 1.5675410922702012e-06, "loss": 1.0584, "step": 5483 }, { "epoch": 0.7487711632987438, "grad_norm": 6.983469009399414, "learning_rate": 1.5659336224815642e-06, "loss": 0.9849, "step": 5484 }, { "epoch": 0.7489077007099946, "grad_norm": 5.822343826293945, "learning_rate": 1.5643268242802928e-06, "loss": 1.0242, "step": 5485 }, { "epoch": 0.7490442381212452, "grad_norm": 5.7765212059021, "learning_rate": 1.5627206979806254e-06, "loss": 0.9259, "step": 5486 }, { "epoch": 0.749180775532496, "grad_norm": 6.202066898345947, "learning_rate": 1.561115243896663e-06, "loss": 0.8236, "step": 5487 }, { "epoch": 0.7493173129437466, "grad_norm": 8.22131633758545, "learning_rate": 1.5595104623423812e-06, "loss": 0.9142, "step": 5488 }, { "epoch": 0.7494538503549972, "grad_norm": 6.247834205627441, "learning_rate": 1.5579063536316198e-06, "loss": 0.9717, "step": 5489 }, { "epoch": 0.749590387766248, "grad_norm": 5.18611478805542, "learning_rate": 1.5563029180780876e-06, "loss": 1.0416, "step": 5490 }, { "epoch": 0.7497269251774986, "grad_norm": 4.891385555267334, "learning_rate": 1.5547001559953634e-06, "loss": 0.8385, "step": 5491 }, { "epoch": 0.7498634625887494, "grad_norm": 5.233121395111084, "learning_rate": 1.553098067696891e-06, "loss": 0.8766, "step": 5492 }, { "epoch": 0.75, "grad_norm": 6.194385051727295, "learning_rate": 1.551496653495988e-06, "loss": 0.924, "step": 5493 }, { "epoch": 0.7501365374112506, "grad_norm": 5.278480529785156, "learning_rate": 1.549895913705834e-06, "loss": 1.0175, "step": 5494 }, { "epoch": 0.7502730748225014, "grad_norm": 4.483291149139404, "learning_rate": 1.5482958486394822e-06, "loss": 1.1385, "step": 5495 }, { "epoch": 0.750409612233752, "grad_norm": 6.312232971191406, "learning_rate": 1.5466964586098487e-06, "loss": 0.9825, "step": 5496 }, { "epoch": 0.7505461496450028, "grad_norm": 6.75675630569458, "learning_rate": 1.5450977439297237e-06, "loss": 0.9683, "step": 5497 }, { "epoch": 0.7506826870562534, "grad_norm": 5.689468860626221, "learning_rate": 1.5434997049117595e-06, "loss": 1.0261, "step": 5498 }, { "epoch": 0.750819224467504, "grad_norm": 5.644844055175781, "learning_rate": 1.541902341868477e-06, "loss": 0.9952, "step": 5499 }, { "epoch": 0.7509557618787548, "grad_norm": 5.714580059051514, "learning_rate": 1.5403056551122697e-06, "loss": 0.9516, "step": 5500 }, { "epoch": 0.7510922992900054, "grad_norm": 6.279755592346191, "learning_rate": 1.5387096449553924e-06, "loss": 0.8322, "step": 5501 }, { "epoch": 0.7512288367012562, "grad_norm": 6.5505170822143555, "learning_rate": 1.5371143117099741e-06, "loss": 0.855, "step": 5502 }, { "epoch": 0.7513653741125068, "grad_norm": 8.800761222839355, "learning_rate": 1.5355196556880065e-06, "loss": 0.9693, "step": 5503 }, { "epoch": 0.7515019115237576, "grad_norm": 6.718132972717285, "learning_rate": 1.5339256772013507e-06, "loss": 0.9461, "step": 5504 }, { "epoch": 0.7516384489350082, "grad_norm": 7.769277095794678, "learning_rate": 1.532332376561732e-06, "loss": 1.1699, "step": 5505 }, { "epoch": 0.7517749863462588, "grad_norm": 9.254415512084961, "learning_rate": 1.530739754080751e-06, "loss": 0.9484, "step": 5506 }, { "epoch": 0.7519115237575096, "grad_norm": 5.1873555183410645, "learning_rate": 1.5291478100698676e-06, "loss": 0.8629, "step": 5507 }, { "epoch": 0.7520480611687602, "grad_norm": 7.250380039215088, "learning_rate": 1.5275565448404146e-06, "loss": 1.029, "step": 5508 }, { "epoch": 0.752184598580011, "grad_norm": 9.442801475524902, "learning_rate": 1.5259659587035886e-06, "loss": 1.0865, "step": 5509 }, { "epoch": 0.7523211359912616, "grad_norm": 5.271417617797852, "learning_rate": 1.524376051970453e-06, "loss": 1.0696, "step": 5510 }, { "epoch": 0.7524576734025122, "grad_norm": 5.279184341430664, "learning_rate": 1.5227868249519423e-06, "loss": 0.968, "step": 5511 }, { "epoch": 0.752594210813763, "grad_norm": 7.222929000854492, "learning_rate": 1.5211982779588535e-06, "loss": 0.9543, "step": 5512 }, { "epoch": 0.7527307482250136, "grad_norm": 5.994690418243408, "learning_rate": 1.519610411301855e-06, "loss": 0.9865, "step": 5513 }, { "epoch": 0.7528672856362644, "grad_norm": 6.7898850440979, "learning_rate": 1.5180232252914778e-06, "loss": 0.8349, "step": 5514 }, { "epoch": 0.753003823047515, "grad_norm": 5.227200508117676, "learning_rate": 1.5164367202381236e-06, "loss": 0.8805, "step": 5515 }, { "epoch": 0.7531403604587658, "grad_norm": 6.53846549987793, "learning_rate": 1.5148508964520586e-06, "loss": 0.9809, "step": 5516 }, { "epoch": 0.7532768978700164, "grad_norm": 5.594987392425537, "learning_rate": 1.513265754243416e-06, "loss": 0.8683, "step": 5517 }, { "epoch": 0.753413435281267, "grad_norm": 5.93937873840332, "learning_rate": 1.5116812939221964e-06, "loss": 1.003, "step": 5518 }, { "epoch": 0.7535499726925178, "grad_norm": 7.887880802154541, "learning_rate": 1.5100975157982645e-06, "loss": 1.0103, "step": 5519 }, { "epoch": 0.7536865101037684, "grad_norm": 4.290431499481201, "learning_rate": 1.5085144201813572e-06, "loss": 0.9406, "step": 5520 }, { "epoch": 0.7538230475150192, "grad_norm": 6.555044651031494, "learning_rate": 1.5069320073810722e-06, "loss": 0.9715, "step": 5521 }, { "epoch": 0.7539595849262698, "grad_norm": 7.067183494567871, "learning_rate": 1.505350277706878e-06, "loss": 1.0956, "step": 5522 }, { "epoch": 0.7540961223375204, "grad_norm": 5.6782145500183105, "learning_rate": 1.5037692314681047e-06, "loss": 0.8922, "step": 5523 }, { "epoch": 0.7542326597487712, "grad_norm": 6.920835971832275, "learning_rate": 1.502188868973955e-06, "loss": 0.9049, "step": 5524 }, { "epoch": 0.7543691971600218, "grad_norm": 6.343334197998047, "learning_rate": 1.5006091905334907e-06, "loss": 0.9048, "step": 5525 }, { "epoch": 0.7545057345712726, "grad_norm": 5.526154041290283, "learning_rate": 1.499030196455647e-06, "loss": 0.8098, "step": 5526 }, { "epoch": 0.7546422719825232, "grad_norm": 5.437511444091797, "learning_rate": 1.4974518870492189e-06, "loss": 0.9652, "step": 5527 }, { "epoch": 0.7547788093937738, "grad_norm": 5.9558868408203125, "learning_rate": 1.4958742626228728e-06, "loss": 0.9758, "step": 5528 }, { "epoch": 0.7549153468050246, "grad_norm": 6.278284072875977, "learning_rate": 1.4942973234851383e-06, "loss": 1.0188, "step": 5529 }, { "epoch": 0.7550518842162752, "grad_norm": 6.620179653167725, "learning_rate": 1.4927210699444106e-06, "loss": 1.0071, "step": 5530 }, { "epoch": 0.755188421627526, "grad_norm": 9.065377235412598, "learning_rate": 1.4911455023089516e-06, "loss": 1.0958, "step": 5531 }, { "epoch": 0.7553249590387766, "grad_norm": 5.981166839599609, "learning_rate": 1.4895706208868876e-06, "loss": 0.975, "step": 5532 }, { "epoch": 0.7554614964500274, "grad_norm": 6.369169235229492, "learning_rate": 1.4879964259862162e-06, "loss": 0.8955, "step": 5533 }, { "epoch": 0.755598033861278, "grad_norm": 5.225766181945801, "learning_rate": 1.486422917914792e-06, "loss": 1.0195, "step": 5534 }, { "epoch": 0.7557345712725286, "grad_norm": 5.824159622192383, "learning_rate": 1.4848500969803447e-06, "loss": 0.8652, "step": 5535 }, { "epoch": 0.7558711086837794, "grad_norm": 8.87770938873291, "learning_rate": 1.483277963490461e-06, "loss": 1.0084, "step": 5536 }, { "epoch": 0.75600764609503, "grad_norm": 5.560022830963135, "learning_rate": 1.4817065177526002e-06, "loss": 0.9617, "step": 5537 }, { "epoch": 0.7561441835062808, "grad_norm": 11.149993896484375, "learning_rate": 1.480135760074083e-06, "loss": 1.0929, "step": 5538 }, { "epoch": 0.7562807209175314, "grad_norm": 6.989748477935791, "learning_rate": 1.4785656907620937e-06, "loss": 0.7216, "step": 5539 }, { "epoch": 0.756417258328782, "grad_norm": 5.189471244812012, "learning_rate": 1.4769963101236894e-06, "loss": 0.8561, "step": 5540 }, { "epoch": 0.7565537957400328, "grad_norm": 7.012948036193848, "learning_rate": 1.4754276184657846e-06, "loss": 1.0862, "step": 5541 }, { "epoch": 0.7566903331512834, "grad_norm": 5.873268127441406, "learning_rate": 1.4738596160951646e-06, "loss": 0.9411, "step": 5542 }, { "epoch": 0.7568268705625342, "grad_norm": 7.889925479888916, "learning_rate": 1.4722923033184772e-06, "loss": 0.9273, "step": 5543 }, { "epoch": 0.7569634079737848, "grad_norm": 7.335233688354492, "learning_rate": 1.4707256804422348e-06, "loss": 0.9133, "step": 5544 }, { "epoch": 0.7570999453850354, "grad_norm": 5.826662063598633, "learning_rate": 1.4691597477728147e-06, "loss": 0.9168, "step": 5545 }, { "epoch": 0.7572364827962862, "grad_norm": 6.488889694213867, "learning_rate": 1.4675945056164642e-06, "loss": 0.9393, "step": 5546 }, { "epoch": 0.7573730202075368, "grad_norm": 4.962029457092285, "learning_rate": 1.4660299542792882e-06, "loss": 0.9476, "step": 5547 }, { "epoch": 0.7575095576187876, "grad_norm": 5.849429130554199, "learning_rate": 1.4644660940672628e-06, "loss": 0.9714, "step": 5548 }, { "epoch": 0.7576460950300382, "grad_norm": 5.463021755218506, "learning_rate": 1.4629029252862253e-06, "loss": 0.8732, "step": 5549 }, { "epoch": 0.757782632441289, "grad_norm": 6.7772216796875, "learning_rate": 1.4613404482418765e-06, "loss": 0.8814, "step": 5550 }, { "epoch": 0.7579191698525396, "grad_norm": 6.345326900482178, "learning_rate": 1.459778663239788e-06, "loss": 0.776, "step": 5551 }, { "epoch": 0.7580557072637902, "grad_norm": 5.327242851257324, "learning_rate": 1.4582175705853886e-06, "loss": 1.0749, "step": 5552 }, { "epoch": 0.758192244675041, "grad_norm": 7.080816268920898, "learning_rate": 1.4566571705839782e-06, "loss": 0.9104, "step": 5553 }, { "epoch": 0.7583287820862916, "grad_norm": 6.217944622039795, "learning_rate": 1.4550974635407173e-06, "loss": 0.9152, "step": 5554 }, { "epoch": 0.7584653194975424, "grad_norm": 7.275456428527832, "learning_rate": 1.453538449760632e-06, "loss": 0.9391, "step": 5555 }, { "epoch": 0.758601856908793, "grad_norm": 9.964015007019043, "learning_rate": 1.4519801295486102e-06, "loss": 0.9736, "step": 5556 }, { "epoch": 0.7587383943200436, "grad_norm": 6.5865302085876465, "learning_rate": 1.450422503209411e-06, "loss": 0.937, "step": 5557 }, { "epoch": 0.7588749317312944, "grad_norm": 5.91324520111084, "learning_rate": 1.448865571047649e-06, "loss": 0.9294, "step": 5558 }, { "epoch": 0.759011469142545, "grad_norm": 8.104875564575195, "learning_rate": 1.4473093333678124e-06, "loss": 0.9266, "step": 5559 }, { "epoch": 0.7591480065537958, "grad_norm": 5.633307933807373, "learning_rate": 1.4457537904742451e-06, "loss": 1.0736, "step": 5560 }, { "epoch": 0.7592845439650464, "grad_norm": 4.841339588165283, "learning_rate": 1.4441989426711595e-06, "loss": 1.0469, "step": 5561 }, { "epoch": 0.7594210813762972, "grad_norm": 6.934905529022217, "learning_rate": 1.4426447902626323e-06, "loss": 1.0446, "step": 5562 }, { "epoch": 0.7595576187875478, "grad_norm": 12.991809844970703, "learning_rate": 1.4410913335526012e-06, "loss": 1.0541, "step": 5563 }, { "epoch": 0.7596941561987984, "grad_norm": 5.218640327453613, "learning_rate": 1.439538572844873e-06, "loss": 0.9194, "step": 5564 }, { "epoch": 0.7598306936100492, "grad_norm": 6.119372367858887, "learning_rate": 1.4379865084431115e-06, "loss": 0.8474, "step": 5565 }, { "epoch": 0.7599672310212998, "grad_norm": 5.952753067016602, "learning_rate": 1.436435140650852e-06, "loss": 0.957, "step": 5566 }, { "epoch": 0.7601037684325506, "grad_norm": 6.096498012542725, "learning_rate": 1.4348844697714875e-06, "loss": 1.0166, "step": 5567 }, { "epoch": 0.7602403058438012, "grad_norm": 4.898193359375, "learning_rate": 1.4333344961082767e-06, "loss": 0.9698, "step": 5568 }, { "epoch": 0.7603768432550518, "grad_norm": 6.910530090332031, "learning_rate": 1.4317852199643428e-06, "loss": 1.2029, "step": 5569 }, { "epoch": 0.7605133806663026, "grad_norm": 6.609477519989014, "learning_rate": 1.4302366416426694e-06, "loss": 0.8463, "step": 5570 }, { "epoch": 0.7606499180775532, "grad_norm": 5.456730842590332, "learning_rate": 1.4286887614461098e-06, "loss": 0.9608, "step": 5571 }, { "epoch": 0.760786455488804, "grad_norm": 6.119638919830322, "learning_rate": 1.4271415796773742e-06, "loss": 1.0359, "step": 5572 }, { "epoch": 0.7609229929000546, "grad_norm": 6.854240894317627, "learning_rate": 1.4255950966390414e-06, "loss": 0.8766, "step": 5573 }, { "epoch": 0.7610595303113052, "grad_norm": 4.278013706207275, "learning_rate": 1.4240493126335487e-06, "loss": 0.9402, "step": 5574 }, { "epoch": 0.761196067722556, "grad_norm": 8.274951934814453, "learning_rate": 1.4225042279632017e-06, "loss": 0.7625, "step": 5575 }, { "epoch": 0.7613326051338066, "grad_norm": 6.8625640869140625, "learning_rate": 1.4209598429301646e-06, "loss": 0.976, "step": 5576 }, { "epoch": 0.7614691425450574, "grad_norm": 9.642528533935547, "learning_rate": 1.4194161578364696e-06, "loss": 0.8775, "step": 5577 }, { "epoch": 0.761605679956308, "grad_norm": 6.229416370391846, "learning_rate": 1.4178731729840061e-06, "loss": 0.9223, "step": 5578 }, { "epoch": 0.7617422173675588, "grad_norm": 6.202539920806885, "learning_rate": 1.4163308886745335e-06, "loss": 0.9032, "step": 5579 }, { "epoch": 0.7618787547788094, "grad_norm": 5.970988750457764, "learning_rate": 1.4147893052096684e-06, "loss": 1.0288, "step": 5580 }, { "epoch": 0.76201529219006, "grad_norm": 6.009920597076416, "learning_rate": 1.413248422890892e-06, "loss": 1.0307, "step": 5581 }, { "epoch": 0.7621518296013108, "grad_norm": 6.137457847595215, "learning_rate": 1.41170824201955e-06, "loss": 0.735, "step": 5582 }, { "epoch": 0.7622883670125614, "grad_norm": 8.284481048583984, "learning_rate": 1.410168762896847e-06, "loss": 1.0059, "step": 5583 }, { "epoch": 0.7624249044238122, "grad_norm": 5.671302795410156, "learning_rate": 1.4086299858238573e-06, "loss": 0.9255, "step": 5584 }, { "epoch": 0.7625614418350628, "grad_norm": 6.473966598510742, "learning_rate": 1.4070919111015097e-06, "loss": 0.8301, "step": 5585 }, { "epoch": 0.7626979792463134, "grad_norm": 5.826488971710205, "learning_rate": 1.4055545390306036e-06, "loss": 0.9353, "step": 5586 }, { "epoch": 0.7628345166575642, "grad_norm": 7.0617241859436035, "learning_rate": 1.4040178699117928e-06, "loss": 0.9376, "step": 5587 }, { "epoch": 0.7629710540688148, "grad_norm": 6.5181732177734375, "learning_rate": 1.4024819040456023e-06, "loss": 0.9059, "step": 5588 }, { "epoch": 0.7631075914800656, "grad_norm": 6.5886054039001465, "learning_rate": 1.4009466417324126e-06, "loss": 0.9444, "step": 5589 }, { "epoch": 0.7632441288913162, "grad_norm": 15.588363647460938, "learning_rate": 1.3994120832724678e-06, "loss": 0.8224, "step": 5590 }, { "epoch": 0.763380666302567, "grad_norm": 5.224120616912842, "learning_rate": 1.3978782289658793e-06, "loss": 0.8432, "step": 5591 }, { "epoch": 0.7635172037138176, "grad_norm": 9.698444366455078, "learning_rate": 1.3963450791126137e-06, "loss": 0.954, "step": 5592 }, { "epoch": 0.7636537411250682, "grad_norm": 6.5652031898498535, "learning_rate": 1.3948126340125063e-06, "loss": 0.9475, "step": 5593 }, { "epoch": 0.763790278536319, "grad_norm": 8.205672264099121, "learning_rate": 1.3932808939652498e-06, "loss": 1.0428, "step": 5594 }, { "epoch": 0.7639268159475696, "grad_norm": 7.239740371704102, "learning_rate": 1.3917498592704016e-06, "loss": 0.9388, "step": 5595 }, { "epoch": 0.7640633533588204, "grad_norm": 7.898396968841553, "learning_rate": 1.390219530227378e-06, "loss": 1.0241, "step": 5596 }, { "epoch": 0.764199890770071, "grad_norm": 4.585214138031006, "learning_rate": 1.388689907135463e-06, "loss": 0.8886, "step": 5597 }, { "epoch": 0.7643364281813216, "grad_norm": 7.911113739013672, "learning_rate": 1.3871609902937965e-06, "loss": 1.0112, "step": 5598 }, { "epoch": 0.7644729655925724, "grad_norm": 6.399995803833008, "learning_rate": 1.3856327800013864e-06, "loss": 0.8762, "step": 5599 }, { "epoch": 0.764609503003823, "grad_norm": 5.72900915145874, "learning_rate": 1.3841052765570962e-06, "loss": 0.9354, "step": 5600 }, { "epoch": 0.7647460404150738, "grad_norm": 6.715089797973633, "learning_rate": 1.382578480259653e-06, "loss": 0.809, "step": 5601 }, { "epoch": 0.7648825778263244, "grad_norm": 6.155733108520508, "learning_rate": 1.38105239140765e-06, "loss": 0.9806, "step": 5602 }, { "epoch": 0.765019115237575, "grad_norm": 5.696329593658447, "learning_rate": 1.379527010299535e-06, "loss": 0.882, "step": 5603 }, { "epoch": 0.7651556526488258, "grad_norm": 12.594465255737305, "learning_rate": 1.378002337233625e-06, "loss": 1.0084, "step": 5604 }, { "epoch": 0.7652921900600764, "grad_norm": 5.805490970611572, "learning_rate": 1.37647837250809e-06, "loss": 1.0391, "step": 5605 }, { "epoch": 0.7654287274713272, "grad_norm": 5.250081539154053, "learning_rate": 1.3749551164209707e-06, "loss": 0.8011, "step": 5606 }, { "epoch": 0.7655652648825778, "grad_norm": 4.62357234954834, "learning_rate": 1.3734325692701617e-06, "loss": 0.814, "step": 5607 }, { "epoch": 0.7657018022938286, "grad_norm": 4.106560707092285, "learning_rate": 1.3719107313534224e-06, "loss": 0.8824, "step": 5608 }, { "epoch": 0.7658383397050792, "grad_norm": 7.007771015167236, "learning_rate": 1.3703896029683728e-06, "loss": 0.9334, "step": 5609 }, { "epoch": 0.7659748771163298, "grad_norm": 5.535504341125488, "learning_rate": 1.368869184412493e-06, "loss": 0.8446, "step": 5610 }, { "epoch": 0.7661114145275806, "grad_norm": 8.865386009216309, "learning_rate": 1.3673494759831279e-06, "loss": 0.9716, "step": 5611 }, { "epoch": 0.7662479519388312, "grad_norm": 5.76627254486084, "learning_rate": 1.3658304779774784e-06, "loss": 0.8773, "step": 5612 }, { "epoch": 0.766384489350082, "grad_norm": 7.309541702270508, "learning_rate": 1.3643121906926127e-06, "loss": 0.9546, "step": 5613 }, { "epoch": 0.7665210267613326, "grad_norm": 5.674097537994385, "learning_rate": 1.3627946144254522e-06, "loss": 1.0568, "step": 5614 }, { "epoch": 0.7666575641725832, "grad_norm": 7.903212547302246, "learning_rate": 1.361277749472788e-06, "loss": 0.8887, "step": 5615 }, { "epoch": 0.766794101583834, "grad_norm": 7.022305011749268, "learning_rate": 1.3597615961312638e-06, "loss": 0.9265, "step": 5616 }, { "epoch": 0.7669306389950846, "grad_norm": 6.023830413818359, "learning_rate": 1.3582461546973913e-06, "loss": 0.8752, "step": 5617 }, { "epoch": 0.7670671764063354, "grad_norm": 5.849997520446777, "learning_rate": 1.3567314254675385e-06, "loss": 0.8263, "step": 5618 }, { "epoch": 0.767203713817586, "grad_norm": 5.070760726928711, "learning_rate": 1.3552174087379345e-06, "loss": 1.0689, "step": 5619 }, { "epoch": 0.7673402512288368, "grad_norm": 10.708206176757812, "learning_rate": 1.3537041048046696e-06, "loss": 0.9811, "step": 5620 }, { "epoch": 0.7674767886400874, "grad_norm": 6.054457664489746, "learning_rate": 1.3521915139636943e-06, "loss": 0.9394, "step": 5621 }, { "epoch": 0.767613326051338, "grad_norm": 7.273226261138916, "learning_rate": 1.3506796365108232e-06, "loss": 1.0075, "step": 5622 }, { "epoch": 0.7677498634625888, "grad_norm": 5.627878189086914, "learning_rate": 1.3491684727417249e-06, "loss": 1.0065, "step": 5623 }, { "epoch": 0.7678864008738394, "grad_norm": 12.190152168273926, "learning_rate": 1.3476580229519354e-06, "loss": 0.927, "step": 5624 }, { "epoch": 0.7680229382850902, "grad_norm": 10.417658805847168, "learning_rate": 1.3461482874368442e-06, "loss": 0.8529, "step": 5625 }, { "epoch": 0.7681594756963408, "grad_norm": 9.119796752929688, "learning_rate": 1.3446392664917079e-06, "loss": 0.9837, "step": 5626 }, { "epoch": 0.7682960131075914, "grad_norm": 6.344748497009277, "learning_rate": 1.3431309604116372e-06, "loss": 0.9786, "step": 5627 }, { "epoch": 0.7684325505188422, "grad_norm": 6.077754497528076, "learning_rate": 1.3416233694916086e-06, "loss": 0.831, "step": 5628 }, { "epoch": 0.7685690879300928, "grad_norm": 6.2380876541137695, "learning_rate": 1.3401164940264543e-06, "loss": 0.9173, "step": 5629 }, { "epoch": 0.7687056253413436, "grad_norm": 5.2987751960754395, "learning_rate": 1.338610334310867e-06, "loss": 0.9616, "step": 5630 }, { "epoch": 0.7688421627525942, "grad_norm": 9.727751731872559, "learning_rate": 1.3371048906394036e-06, "loss": 1.0701, "step": 5631 }, { "epoch": 0.7689787001638448, "grad_norm": 6.685088157653809, "learning_rate": 1.3356001633064762e-06, "loss": 1.0344, "step": 5632 }, { "epoch": 0.7691152375750956, "grad_norm": 7.248749732971191, "learning_rate": 1.334096152606359e-06, "loss": 0.9147, "step": 5633 }, { "epoch": 0.7692517749863462, "grad_norm": 5.03084659576416, "learning_rate": 1.3325928588331838e-06, "loss": 0.9353, "step": 5634 }, { "epoch": 0.769388312397597, "grad_norm": 6.039592742919922, "learning_rate": 1.3310902822809479e-06, "loss": 0.9302, "step": 5635 }, { "epoch": 0.7695248498088476, "grad_norm": 8.216293334960938, "learning_rate": 1.3295884232435008e-06, "loss": 1.0569, "step": 5636 }, { "epoch": 0.7696613872200984, "grad_norm": 5.939653396606445, "learning_rate": 1.328087282014558e-06, "loss": 0.8688, "step": 5637 }, { "epoch": 0.769797924631349, "grad_norm": 8.525836944580078, "learning_rate": 1.3265868588876902e-06, "loss": 1.096, "step": 5638 }, { "epoch": 0.7699344620425996, "grad_norm": 8.282645225524902, "learning_rate": 1.3250871541563316e-06, "loss": 1.0363, "step": 5639 }, { "epoch": 0.7700709994538504, "grad_norm": 5.716739654541016, "learning_rate": 1.3235881681137724e-06, "loss": 0.8388, "step": 5640 }, { "epoch": 0.770207536865101, "grad_norm": 6.771587371826172, "learning_rate": 1.3220899010531624e-06, "loss": 1.0373, "step": 5641 }, { "epoch": 0.7703440742763518, "grad_norm": 5.635565280914307, "learning_rate": 1.3205923532675146e-06, "loss": 1.0579, "step": 5642 }, { "epoch": 0.7704806116876024, "grad_norm": 7.794984817504883, "learning_rate": 1.3190955250496962e-06, "loss": 1.0165, "step": 5643 }, { "epoch": 0.770617149098853, "grad_norm": 6.116494178771973, "learning_rate": 1.3175994166924394e-06, "loss": 0.9262, "step": 5644 }, { "epoch": 0.7707536865101038, "grad_norm": 5.673656463623047, "learning_rate": 1.3161040284883297e-06, "loss": 0.9375, "step": 5645 }, { "epoch": 0.7708902239213544, "grad_norm": 8.932343482971191, "learning_rate": 1.314609360729816e-06, "loss": 0.9394, "step": 5646 }, { "epoch": 0.7710267613326052, "grad_norm": 7.0869927406311035, "learning_rate": 1.3131154137092028e-06, "loss": 1.0229, "step": 5647 }, { "epoch": 0.7711632987438558, "grad_norm": 5.5869317054748535, "learning_rate": 1.3116221877186585e-06, "loss": 0.9262, "step": 5648 }, { "epoch": 0.7712998361551064, "grad_norm": 6.464390754699707, "learning_rate": 1.3101296830502064e-06, "loss": 0.9053, "step": 5649 }, { "epoch": 0.7714363735663572, "grad_norm": 11.509420394897461, "learning_rate": 1.3086378999957277e-06, "loss": 1.036, "step": 5650 }, { "epoch": 0.7715729109776078, "grad_norm": 7.032009124755859, "learning_rate": 1.3071468388469688e-06, "loss": 0.8452, "step": 5651 }, { "epoch": 0.7717094483888586, "grad_norm": 6.019516468048096, "learning_rate": 1.3056564998955274e-06, "loss": 0.9506, "step": 5652 }, { "epoch": 0.7718459858001092, "grad_norm": 9.136763572692871, "learning_rate": 1.3041668834328669e-06, "loss": 0.8732, "step": 5653 }, { "epoch": 0.77198252321136, "grad_norm": 8.838974952697754, "learning_rate": 1.3026779897503023e-06, "loss": 0.9433, "step": 5654 }, { "epoch": 0.7721190606226106, "grad_norm": 7.117635726928711, "learning_rate": 1.3011898191390137e-06, "loss": 1.0656, "step": 5655 }, { "epoch": 0.7722555980338612, "grad_norm": 6.070577144622803, "learning_rate": 1.2997023718900354e-06, "loss": 0.8401, "step": 5656 }, { "epoch": 0.772392135445112, "grad_norm": 7.352748394012451, "learning_rate": 1.2982156482942632e-06, "loss": 1.0143, "step": 5657 }, { "epoch": 0.7725286728563626, "grad_norm": 7.554833889007568, "learning_rate": 1.2967296486424497e-06, "loss": 0.9943, "step": 5658 }, { "epoch": 0.7726652102676134, "grad_norm": 7.033071041107178, "learning_rate": 1.2952443732252058e-06, "loss": 0.9143, "step": 5659 }, { "epoch": 0.772801747678864, "grad_norm": 7.636633396148682, "learning_rate": 1.2937598223330006e-06, "loss": 0.8779, "step": 5660 }, { "epoch": 0.7729382850901146, "grad_norm": 14.795381546020508, "learning_rate": 1.2922759962561615e-06, "loss": 0.9363, "step": 5661 }, { "epoch": 0.7730748225013654, "grad_norm": 6.986961364746094, "learning_rate": 1.2907928952848775e-06, "loss": 0.9455, "step": 5662 }, { "epoch": 0.773211359912616, "grad_norm": 6.263961315155029, "learning_rate": 1.2893105197091898e-06, "loss": 0.9217, "step": 5663 }, { "epoch": 0.7733478973238668, "grad_norm": 5.962589263916016, "learning_rate": 1.287828869819004e-06, "loss": 0.9405, "step": 5664 }, { "epoch": 0.7734844347351174, "grad_norm": 6.907599925994873, "learning_rate": 1.286347945904078e-06, "loss": 1.0982, "step": 5665 }, { "epoch": 0.7736209721463682, "grad_norm": 7.217462539672852, "learning_rate": 1.2848677482540333e-06, "loss": 0.7837, "step": 5666 }, { "epoch": 0.7737575095576188, "grad_norm": 5.661107063293457, "learning_rate": 1.283388277158344e-06, "loss": 0.921, "step": 5667 }, { "epoch": 0.7738940469688694, "grad_norm": 7.810755252838135, "learning_rate": 1.2819095329063469e-06, "loss": 1.0537, "step": 5668 }, { "epoch": 0.7740305843801202, "grad_norm": 4.880729675292969, "learning_rate": 1.2804315157872332e-06, "loss": 1.0234, "step": 5669 }, { "epoch": 0.7741671217913708, "grad_norm": 7.261683940887451, "learning_rate": 1.2789542260900522e-06, "loss": 0.9141, "step": 5670 }, { "epoch": 0.7743036592026216, "grad_norm": 7.057079792022705, "learning_rate": 1.277477664103714e-06, "loss": 0.9733, "step": 5671 }, { "epoch": 0.7744401966138722, "grad_norm": 6.8397979736328125, "learning_rate": 1.2760018301169829e-06, "loss": 0.847, "step": 5672 }, { "epoch": 0.7745767340251228, "grad_norm": 7.5987372398376465, "learning_rate": 1.2745267244184828e-06, "loss": 0.9317, "step": 5673 }, { "epoch": 0.7747132714363736, "grad_norm": 5.563663959503174, "learning_rate": 1.2730523472966927e-06, "loss": 0.9062, "step": 5674 }, { "epoch": 0.7748498088476242, "grad_norm": 11.153419494628906, "learning_rate": 1.2715786990399536e-06, "loss": 0.9949, "step": 5675 }, { "epoch": 0.774986346258875, "grad_norm": 8.310120582580566, "learning_rate": 1.2701057799364591e-06, "loss": 1.0206, "step": 5676 }, { "epoch": 0.7751228836701256, "grad_norm": 5.84478759765625, "learning_rate": 1.2686335902742647e-06, "loss": 0.9751, "step": 5677 }, { "epoch": 0.7752594210813762, "grad_norm": 5.166982650756836, "learning_rate": 1.2671621303412778e-06, "loss": 0.9321, "step": 5678 }, { "epoch": 0.775395958492627, "grad_norm": 6.113218307495117, "learning_rate": 1.2656914004252696e-06, "loss": 1.0564, "step": 5679 }, { "epoch": 0.7755324959038776, "grad_norm": 6.014479160308838, "learning_rate": 1.2642214008138642e-06, "loss": 0.8975, "step": 5680 }, { "epoch": 0.7756690333151284, "grad_norm": 7.056690692901611, "learning_rate": 1.2627521317945418e-06, "loss": 0.877, "step": 5681 }, { "epoch": 0.775805570726379, "grad_norm": 9.460527420043945, "learning_rate": 1.2612835936546446e-06, "loss": 1.0084, "step": 5682 }, { "epoch": 0.7759421081376298, "grad_norm": 5.4615044593811035, "learning_rate": 1.2598157866813676e-06, "loss": 0.9817, "step": 5683 }, { "epoch": 0.7760786455488804, "grad_norm": 5.331623554229736, "learning_rate": 1.2583487111617647e-06, "loss": 0.9969, "step": 5684 }, { "epoch": 0.776215182960131, "grad_norm": 7.275086879730225, "learning_rate": 1.2568823673827453e-06, "loss": 0.9977, "step": 5685 }, { "epoch": 0.7763517203713818, "grad_norm": 5.383200168609619, "learning_rate": 1.255416755631078e-06, "loss": 0.8481, "step": 5686 }, { "epoch": 0.7764882577826324, "grad_norm": 11.649757385253906, "learning_rate": 1.253951876193385e-06, "loss": 0.9595, "step": 5687 }, { "epoch": 0.7766247951938832, "grad_norm": 6.802255630493164, "learning_rate": 1.2524877293561504e-06, "loss": 0.9588, "step": 5688 }, { "epoch": 0.7767613326051338, "grad_norm": 6.346439361572266, "learning_rate": 1.2510243154057088e-06, "loss": 0.9253, "step": 5689 }, { "epoch": 0.7768978700163844, "grad_norm": 5.397683620452881, "learning_rate": 1.2495616346282569e-06, "loss": 0.8618, "step": 5690 }, { "epoch": 0.7770344074276352, "grad_norm": 7.4069504737854, "learning_rate": 1.248099687309845e-06, "loss": 1.0453, "step": 5691 }, { "epoch": 0.7771709448388858, "grad_norm": 6.025253772735596, "learning_rate": 1.246638473736378e-06, "loss": 0.946, "step": 5692 }, { "epoch": 0.7773074822501366, "grad_norm": 5.710239887237549, "learning_rate": 1.2451779941936243e-06, "loss": 0.9553, "step": 5693 }, { "epoch": 0.7774440196613872, "grad_norm": 6.900992393493652, "learning_rate": 1.2437182489672e-06, "loss": 0.9704, "step": 5694 }, { "epoch": 0.777580557072638, "grad_norm": 7.016232967376709, "learning_rate": 1.2422592383425864e-06, "loss": 0.8566, "step": 5695 }, { "epoch": 0.7777170944838886, "grad_norm": 5.81416130065918, "learning_rate": 1.2408009626051137e-06, "loss": 0.9157, "step": 5696 }, { "epoch": 0.7778536318951392, "grad_norm": 5.9544677734375, "learning_rate": 1.2393434220399724e-06, "loss": 0.9041, "step": 5697 }, { "epoch": 0.77799016930639, "grad_norm": 7.9266767501831055, "learning_rate": 1.2378866169322063e-06, "loss": 0.8241, "step": 5698 }, { "epoch": 0.7781267067176406, "grad_norm": 6.39110803604126, "learning_rate": 1.23643054756672e-06, "loss": 0.9069, "step": 5699 }, { "epoch": 0.7782632441288914, "grad_norm": 8.324947357177734, "learning_rate": 1.2349752142282706e-06, "loss": 0.9249, "step": 5700 }, { "epoch": 0.778399781540142, "grad_norm": 6.141880035400391, "learning_rate": 1.2335206172014702e-06, "loss": 1.0248, "step": 5701 }, { "epoch": 0.7785363189513926, "grad_norm": 8.511384963989258, "learning_rate": 1.2320667567707916e-06, "loss": 0.9204, "step": 5702 }, { "epoch": 0.7786728563626434, "grad_norm": 7.011524200439453, "learning_rate": 1.2306136332205586e-06, "loss": 0.9282, "step": 5703 }, { "epoch": 0.778809393773894, "grad_norm": 8.178071022033691, "learning_rate": 1.2291612468349556e-06, "loss": 0.8388, "step": 5704 }, { "epoch": 0.7789459311851448, "grad_norm": 6.150355815887451, "learning_rate": 1.2277095978980174e-06, "loss": 1.1418, "step": 5705 }, { "epoch": 0.7790824685963954, "grad_norm": 5.928557872772217, "learning_rate": 1.2262586866936404e-06, "loss": 0.8368, "step": 5706 }, { "epoch": 0.779219006007646, "grad_norm": 7.196935653686523, "learning_rate": 1.2248085135055704e-06, "loss": 1.0339, "step": 5707 }, { "epoch": 0.7793555434188968, "grad_norm": 6.021081924438477, "learning_rate": 1.223359078617416e-06, "loss": 0.9527, "step": 5708 }, { "epoch": 0.7794920808301474, "grad_norm": 6.304210186004639, "learning_rate": 1.2219103823126361e-06, "loss": 1.0471, "step": 5709 }, { "epoch": 0.7796286182413982, "grad_norm": 6.6449360847473145, "learning_rate": 1.2204624248745461e-06, "loss": 1.0576, "step": 5710 }, { "epoch": 0.7797651556526488, "grad_norm": 4.779585838317871, "learning_rate": 1.219015206586318e-06, "loss": 0.8083, "step": 5711 }, { "epoch": 0.7799016930638996, "grad_norm": 6.420902729034424, "learning_rate": 1.2175687277309777e-06, "loss": 0.9413, "step": 5712 }, { "epoch": 0.7800382304751502, "grad_norm": 5.529566764831543, "learning_rate": 1.2161229885914099e-06, "loss": 1.0297, "step": 5713 }, { "epoch": 0.7801747678864008, "grad_norm": 7.878076076507568, "learning_rate": 1.2146779894503501e-06, "loss": 1.0686, "step": 5714 }, { "epoch": 0.7803113052976516, "grad_norm": 9.030872344970703, "learning_rate": 1.2132337305903934e-06, "loss": 0.9998, "step": 5715 }, { "epoch": 0.7804478427089022, "grad_norm": 9.344771385192871, "learning_rate": 1.2117902122939861e-06, "loss": 0.9548, "step": 5716 }, { "epoch": 0.780584380120153, "grad_norm": 6.616122722625732, "learning_rate": 1.2103474348434335e-06, "loss": 0.8253, "step": 5717 }, { "epoch": 0.7807209175314036, "grad_norm": 6.8468828201293945, "learning_rate": 1.2089053985208915e-06, "loss": 0.9652, "step": 5718 }, { "epoch": 0.7808574549426542, "grad_norm": 6.019960403442383, "learning_rate": 1.2074641036083774e-06, "loss": 0.9466, "step": 5719 }, { "epoch": 0.780993992353905, "grad_norm": 6.477540969848633, "learning_rate": 1.2060235503877571e-06, "loss": 1.0578, "step": 5720 }, { "epoch": 0.7811305297651556, "grad_norm": 5.606204509735107, "learning_rate": 1.2045837391407539e-06, "loss": 1.0043, "step": 5721 }, { "epoch": 0.7812670671764064, "grad_norm": 6.354002952575684, "learning_rate": 1.203144670148948e-06, "loss": 1.1609, "step": 5722 }, { "epoch": 0.781403604587657, "grad_norm": 11.728978157043457, "learning_rate": 1.201706343693772e-06, "loss": 0.9985, "step": 5723 }, { "epoch": 0.7815401419989078, "grad_norm": 7.320222854614258, "learning_rate": 1.2002687600565138e-06, "loss": 0.9677, "step": 5724 }, { "epoch": 0.7816766794101584, "grad_norm": 7.615493297576904, "learning_rate": 1.1988319195183146e-06, "loss": 0.9938, "step": 5725 }, { "epoch": 0.781813216821409, "grad_norm": 8.407721519470215, "learning_rate": 1.197395822360175e-06, "loss": 0.8943, "step": 5726 }, { "epoch": 0.7819497542326598, "grad_norm": 7.799405097961426, "learning_rate": 1.1959604688629439e-06, "loss": 0.8967, "step": 5727 }, { "epoch": 0.7820862916439104, "grad_norm": 8.49462604522705, "learning_rate": 1.1945258593073311e-06, "loss": 1.0279, "step": 5728 }, { "epoch": 0.7822228290551612, "grad_norm": 5.765975475311279, "learning_rate": 1.1930919939738945e-06, "loss": 0.8511, "step": 5729 }, { "epoch": 0.7823593664664118, "grad_norm": 6.7273030281066895, "learning_rate": 1.1916588731430528e-06, "loss": 0.9205, "step": 5730 }, { "epoch": 0.7824959038776624, "grad_norm": 5.4793171882629395, "learning_rate": 1.190226497095075e-06, "loss": 0.8766, "step": 5731 }, { "epoch": 0.7826324412889132, "grad_norm": 6.170335292816162, "learning_rate": 1.1887948661100833e-06, "loss": 0.8942, "step": 5732 }, { "epoch": 0.7827689787001638, "grad_norm": 7.896617889404297, "learning_rate": 1.1873639804680599e-06, "loss": 0.8789, "step": 5733 }, { "epoch": 0.7829055161114146, "grad_norm": 6.948099136352539, "learning_rate": 1.1859338404488342e-06, "loss": 0.9517, "step": 5734 }, { "epoch": 0.7830420535226652, "grad_norm": 4.717222213745117, "learning_rate": 1.1845044463320971e-06, "loss": 0.8968, "step": 5735 }, { "epoch": 0.7831785909339158, "grad_norm": 14.82169246673584, "learning_rate": 1.1830757983973868e-06, "loss": 0.9778, "step": 5736 }, { "epoch": 0.7833151283451666, "grad_norm": 5.615795612335205, "learning_rate": 1.1816478969241002e-06, "loss": 0.932, "step": 5737 }, { "epoch": 0.7834516657564172, "grad_norm": 6.610168933868408, "learning_rate": 1.180220742191484e-06, "loss": 0.9485, "step": 5738 }, { "epoch": 0.783588203167668, "grad_norm": 5.760245323181152, "learning_rate": 1.178794334478645e-06, "loss": 0.9667, "step": 5739 }, { "epoch": 0.7837247405789186, "grad_norm": 6.962228298187256, "learning_rate": 1.1773686740645384e-06, "loss": 1.0836, "step": 5740 }, { "epoch": 0.7838612779901694, "grad_norm": 5.390321254730225, "learning_rate": 1.1759437612279745e-06, "loss": 0.8687, "step": 5741 }, { "epoch": 0.78399781540142, "grad_norm": 6.160682678222656, "learning_rate": 1.1745195962476202e-06, "loss": 0.9535, "step": 5742 }, { "epoch": 0.7841343528126706, "grad_norm": 5.898110389709473, "learning_rate": 1.173096179401992e-06, "loss": 0.9042, "step": 5743 }, { "epoch": 0.7842708902239214, "grad_norm": 5.719261169433594, "learning_rate": 1.1716735109694637e-06, "loss": 0.9502, "step": 5744 }, { "epoch": 0.784407427635172, "grad_norm": 6.366700172424316, "learning_rate": 1.1702515912282598e-06, "loss": 0.891, "step": 5745 }, { "epoch": 0.7845439650464228, "grad_norm": 6.301692485809326, "learning_rate": 1.1688304204564616e-06, "loss": 0.8113, "step": 5746 }, { "epoch": 0.7846805024576734, "grad_norm": 6.437831401824951, "learning_rate": 1.1674099989320015e-06, "loss": 0.7033, "step": 5747 }, { "epoch": 0.784817039868924, "grad_norm": 12.574041366577148, "learning_rate": 1.165990326932665e-06, "loss": 0.9734, "step": 5748 }, { "epoch": 0.7849535772801748, "grad_norm": 7.032948017120361, "learning_rate": 1.1645714047360918e-06, "loss": 1.0441, "step": 5749 }, { "epoch": 0.7850901146914254, "grad_norm": 5.340752601623535, "learning_rate": 1.1631532326197765e-06, "loss": 0.8758, "step": 5750 }, { "epoch": 0.7852266521026762, "grad_norm": 6.167428016662598, "learning_rate": 1.1617358108610654e-06, "loss": 0.857, "step": 5751 }, { "epoch": 0.7853631895139268, "grad_norm": 5.5704874992370605, "learning_rate": 1.160319139737156e-06, "loss": 0.9066, "step": 5752 }, { "epoch": 0.7854997269251774, "grad_norm": 7.15016508102417, "learning_rate": 1.1589032195251053e-06, "loss": 0.9003, "step": 5753 }, { "epoch": 0.7856362643364282, "grad_norm": 14.262809753417969, "learning_rate": 1.1574880505018156e-06, "loss": 1.0149, "step": 5754 }, { "epoch": 0.7857728017476788, "grad_norm": 6.900885581970215, "learning_rate": 1.1560736329440492e-06, "loss": 0.9703, "step": 5755 }, { "epoch": 0.7859093391589296, "grad_norm": 9.256402015686035, "learning_rate": 1.1546599671284158e-06, "loss": 0.9704, "step": 5756 }, { "epoch": 0.7860458765701802, "grad_norm": 10.494734764099121, "learning_rate": 1.1532470533313834e-06, "loss": 0.8418, "step": 5757 }, { "epoch": 0.786182413981431, "grad_norm": 4.971269130706787, "learning_rate": 1.1518348918292677e-06, "loss": 1.0006, "step": 5758 }, { "epoch": 0.7863189513926816, "grad_norm": 6.3500189781188965, "learning_rate": 1.1504234828982419e-06, "loss": 0.9607, "step": 5759 }, { "epoch": 0.7864554888039322, "grad_norm": 5.054460525512695, "learning_rate": 1.149012826814329e-06, "loss": 0.9098, "step": 5760 }, { "epoch": 0.786592026215183, "grad_norm": 5.30594539642334, "learning_rate": 1.1476029238534053e-06, "loss": 0.9794, "step": 5761 }, { "epoch": 0.7867285636264336, "grad_norm": 5.527027130126953, "learning_rate": 1.1461937742912004e-06, "loss": 0.9217, "step": 5762 }, { "epoch": 0.7868651010376844, "grad_norm": 7.63683557510376, "learning_rate": 1.1447853784032952e-06, "loss": 0.8419, "step": 5763 }, { "epoch": 0.787001638448935, "grad_norm": 5.518123626708984, "learning_rate": 1.1433777364651272e-06, "loss": 0.968, "step": 5764 }, { "epoch": 0.7871381758601856, "grad_norm": 5.914290428161621, "learning_rate": 1.1419708487519803e-06, "loss": 0.867, "step": 5765 }, { "epoch": 0.7872747132714364, "grad_norm": 5.669515609741211, "learning_rate": 1.140564715538997e-06, "loss": 0.816, "step": 5766 }, { "epoch": 0.787411250682687, "grad_norm": 6.610101222991943, "learning_rate": 1.1391593371011673e-06, "loss": 1.0268, "step": 5767 }, { "epoch": 0.7875477880939378, "grad_norm": 5.464537620544434, "learning_rate": 1.1377547137133382e-06, "loss": 0.9472, "step": 5768 }, { "epoch": 0.7876843255051884, "grad_norm": 8.927734375, "learning_rate": 1.1363508456502032e-06, "loss": 0.9828, "step": 5769 }, { "epoch": 0.7878208629164392, "grad_norm": 5.60807991027832, "learning_rate": 1.134947733186315e-06, "loss": 0.847, "step": 5770 }, { "epoch": 0.7879574003276898, "grad_norm": 6.583324909210205, "learning_rate": 1.1335453765960735e-06, "loss": 1.0474, "step": 5771 }, { "epoch": 0.7880939377389404, "grad_norm": 177.78163146972656, "learning_rate": 1.1321437761537307e-06, "loss": 0.9652, "step": 5772 }, { "epoch": 0.7882304751501912, "grad_norm": 5.468012809753418, "learning_rate": 1.1307429321333951e-06, "loss": 0.9914, "step": 5773 }, { "epoch": 0.7883670125614418, "grad_norm": 10.024352073669434, "learning_rate": 1.1293428448090233e-06, "loss": 1.0107, "step": 5774 }, { "epoch": 0.7885035499726926, "grad_norm": 5.682775020599365, "learning_rate": 1.1279435144544248e-06, "loss": 0.812, "step": 5775 }, { "epoch": 0.7886400873839432, "grad_norm": 7.5887861251831055, "learning_rate": 1.12654494134326e-06, "loss": 1.019, "step": 5776 }, { "epoch": 0.7887766247951938, "grad_norm": 7.1279296875, "learning_rate": 1.1251471257490454e-06, "loss": 1.0006, "step": 5777 }, { "epoch": 0.7889131622064446, "grad_norm": 7.472787380218506, "learning_rate": 1.1237500679451435e-06, "loss": 0.9287, "step": 5778 }, { "epoch": 0.7890496996176952, "grad_norm": 6.034247875213623, "learning_rate": 1.1223537682047753e-06, "loss": 0.9342, "step": 5779 }, { "epoch": 0.789186237028946, "grad_norm": 5.168729305267334, "learning_rate": 1.1209582268010056e-06, "loss": 0.8481, "step": 5780 }, { "epoch": 0.7893227744401966, "grad_norm": 6.131038665771484, "learning_rate": 1.1195634440067587e-06, "loss": 0.9358, "step": 5781 }, { "epoch": 0.7894593118514472, "grad_norm": 9.275487899780273, "learning_rate": 1.1181694200948063e-06, "loss": 0.8655, "step": 5782 }, { "epoch": 0.789595849262698, "grad_norm": 5.774051666259766, "learning_rate": 1.11677615533777e-06, "loss": 0.9873, "step": 5783 }, { "epoch": 0.7897323866739486, "grad_norm": 7.164634704589844, "learning_rate": 1.1153836500081278e-06, "loss": 0.8978, "step": 5784 }, { "epoch": 0.7898689240851994, "grad_norm": 10.130523681640625, "learning_rate": 1.113991904378205e-06, "loss": 0.9205, "step": 5785 }, { "epoch": 0.79000546149645, "grad_norm": 6.210947036743164, "learning_rate": 1.1126009187201824e-06, "loss": 0.8869, "step": 5786 }, { "epoch": 0.7901419989077008, "grad_norm": 4.6152663230896, "learning_rate": 1.1112106933060878e-06, "loss": 0.875, "step": 5787 }, { "epoch": 0.7902785363189514, "grad_norm": 18.081098556518555, "learning_rate": 1.1098212284078037e-06, "loss": 1.0556, "step": 5788 }, { "epoch": 0.790415073730202, "grad_norm": 4.86842155456543, "learning_rate": 1.1084325242970594e-06, "loss": 0.8005, "step": 5789 }, { "epoch": 0.7905516111414528, "grad_norm": 9.026623725891113, "learning_rate": 1.1070445812454428e-06, "loss": 0.8664, "step": 5790 }, { "epoch": 0.7906881485527034, "grad_norm": 5.557648181915283, "learning_rate": 1.1056573995243862e-06, "loss": 0.9004, "step": 5791 }, { "epoch": 0.7908246859639542, "grad_norm": 6.769309043884277, "learning_rate": 1.1042709794051749e-06, "loss": 1.0975, "step": 5792 }, { "epoch": 0.7909612233752048, "grad_norm": 5.7863969802856445, "learning_rate": 1.102885321158948e-06, "loss": 1.0668, "step": 5793 }, { "epoch": 0.7910977607864554, "grad_norm": 4.838612079620361, "learning_rate": 1.1015004250566908e-06, "loss": 0.9199, "step": 5794 }, { "epoch": 0.7912342981977062, "grad_norm": 7.35879373550415, "learning_rate": 1.1001162913692447e-06, "loss": 1.0474, "step": 5795 }, { "epoch": 0.7913708356089568, "grad_norm": 5.8956618309021, "learning_rate": 1.098732920367298e-06, "loss": 0.969, "step": 5796 }, { "epoch": 0.7915073730202076, "grad_norm": 5.469350337982178, "learning_rate": 1.0973503123213925e-06, "loss": 1.0825, "step": 5797 }, { "epoch": 0.7916439104314582, "grad_norm": 6.855193614959717, "learning_rate": 1.0959684675019194e-06, "loss": 1.0172, "step": 5798 }, { "epoch": 0.791780447842709, "grad_norm": 7.593465328216553, "learning_rate": 1.0945873861791185e-06, "loss": 0.9744, "step": 5799 }, { "epoch": 0.7919169852539596, "grad_norm": 7.469038009643555, "learning_rate": 1.093207068623086e-06, "loss": 0.9433, "step": 5800 }, { "epoch": 0.7920535226652102, "grad_norm": 7.030568599700928, "learning_rate": 1.091827515103764e-06, "loss": 0.7489, "step": 5801 }, { "epoch": 0.792190060076461, "grad_norm": 8.262043952941895, "learning_rate": 1.0904487258909463e-06, "loss": 1.0606, "step": 5802 }, { "epoch": 0.7923265974877116, "grad_norm": 4.751641273498535, "learning_rate": 1.0890707012542767e-06, "loss": 0.8492, "step": 5803 }, { "epoch": 0.7924631348989624, "grad_norm": 6.443160057067871, "learning_rate": 1.0876934414632523e-06, "loss": 0.8984, "step": 5804 }, { "epoch": 0.792599672310213, "grad_norm": 6.553399562835693, "learning_rate": 1.0863169467872165e-06, "loss": 1.0104, "step": 5805 }, { "epoch": 0.7927362097214636, "grad_norm": 7.676197052001953, "learning_rate": 1.0849412174953672e-06, "loss": 1.0773, "step": 5806 }, { "epoch": 0.7928727471327144, "grad_norm": 8.345806121826172, "learning_rate": 1.0835662538567482e-06, "loss": 0.9428, "step": 5807 }, { "epoch": 0.793009284543965, "grad_norm": 7.409294605255127, "learning_rate": 1.0821920561402583e-06, "loss": 0.7727, "step": 5808 }, { "epoch": 0.7931458219552158, "grad_norm": 7.485914707183838, "learning_rate": 1.0808186246146424e-06, "loss": 1.0007, "step": 5809 }, { "epoch": 0.7932823593664664, "grad_norm": 6.405949592590332, "learning_rate": 1.0794459595484985e-06, "loss": 0.8553, "step": 5810 }, { "epoch": 0.793418896777717, "grad_norm": 7.802053451538086, "learning_rate": 1.0780740612102735e-06, "loss": 0.9891, "step": 5811 }, { "epoch": 0.7935554341889678, "grad_norm": 8.952622413635254, "learning_rate": 1.0767029298682642e-06, "loss": 0.9887, "step": 5812 }, { "epoch": 0.7936919716002184, "grad_norm": 4.36614465713501, "learning_rate": 1.0753325657906171e-06, "loss": 0.8067, "step": 5813 }, { "epoch": 0.7938285090114692, "grad_norm": 6.326979160308838, "learning_rate": 1.0739629692453279e-06, "loss": 0.9921, "step": 5814 }, { "epoch": 0.7939650464227198, "grad_norm": 6.5043559074401855, "learning_rate": 1.072594140500246e-06, "loss": 1.0472, "step": 5815 }, { "epoch": 0.7941015838339706, "grad_norm": 4.455220699310303, "learning_rate": 1.0712260798230656e-06, "loss": 0.9958, "step": 5816 }, { "epoch": 0.7942381212452212, "grad_norm": 7.039586067199707, "learning_rate": 1.069858787481336e-06, "loss": 0.9846, "step": 5817 }, { "epoch": 0.7943746586564718, "grad_norm": 10.16119384765625, "learning_rate": 1.0684922637424504e-06, "loss": 0.9381, "step": 5818 }, { "epoch": 0.7945111960677226, "grad_norm": 7.586671352386475, "learning_rate": 1.067126508873657e-06, "loss": 1.1054, "step": 5819 }, { "epoch": 0.7946477334789732, "grad_norm": 18.79142189025879, "learning_rate": 1.0657615231420492e-06, "loss": 0.9483, "step": 5820 }, { "epoch": 0.794784270890224, "grad_norm": 6.192427635192871, "learning_rate": 1.0643973068145752e-06, "loss": 0.9687, "step": 5821 }, { "epoch": 0.7949208083014746, "grad_norm": 8.782366752624512, "learning_rate": 1.063033860158027e-06, "loss": 0.8656, "step": 5822 }, { "epoch": 0.7950573457127252, "grad_norm": 11.744020462036133, "learning_rate": 1.0616711834390486e-06, "loss": 0.8023, "step": 5823 }, { "epoch": 0.795193883123976, "grad_norm": 8.494800567626953, "learning_rate": 1.0603092769241352e-06, "loss": 0.845, "step": 5824 }, { "epoch": 0.7953304205352266, "grad_norm": 7.779788017272949, "learning_rate": 1.0589481408796292e-06, "loss": 1.1566, "step": 5825 }, { "epoch": 0.7954669579464774, "grad_norm": 6.721142768859863, "learning_rate": 1.0575877755717223e-06, "loss": 1.0467, "step": 5826 }, { "epoch": 0.795603495357728, "grad_norm": 5.931054592132568, "learning_rate": 1.056228181266455e-06, "loss": 1.0334, "step": 5827 }, { "epoch": 0.7957400327689788, "grad_norm": 7.151946544647217, "learning_rate": 1.0548693582297203e-06, "loss": 1.0169, "step": 5828 }, { "epoch": 0.7958765701802294, "grad_norm": 10.341690063476562, "learning_rate": 1.0535113067272556e-06, "loss": 1.0217, "step": 5829 }, { "epoch": 0.79601310759148, "grad_norm": 6.7493438720703125, "learning_rate": 1.0521540270246527e-06, "loss": 1.0209, "step": 5830 }, { "epoch": 0.7961496450027308, "grad_norm": 6.377974033355713, "learning_rate": 1.0507975193873481e-06, "loss": 1.0706, "step": 5831 }, { "epoch": 0.7962861824139814, "grad_norm": 5.872397422790527, "learning_rate": 1.0494417840806276e-06, "loss": 1.0799, "step": 5832 }, { "epoch": 0.7964227198252322, "grad_norm": 5.473605632781982, "learning_rate": 1.0480868213696298e-06, "loss": 0.9115, "step": 5833 }, { "epoch": 0.7965592572364828, "grad_norm": 10.257668495178223, "learning_rate": 1.0467326315193377e-06, "loss": 0.9071, "step": 5834 }, { "epoch": 0.7966957946477334, "grad_norm": 7.995219707489014, "learning_rate": 1.0453792147945867e-06, "loss": 0.931, "step": 5835 }, { "epoch": 0.7968323320589842, "grad_norm": 5.524139881134033, "learning_rate": 1.0440265714600573e-06, "loss": 0.9138, "step": 5836 }, { "epoch": 0.7969688694702348, "grad_norm": 5.824769973754883, "learning_rate": 1.0426747017802836e-06, "loss": 1.0261, "step": 5837 }, { "epoch": 0.7971054068814856, "grad_norm": 10.241632461547852, "learning_rate": 1.0413236060196435e-06, "loss": 1.0441, "step": 5838 }, { "epoch": 0.7972419442927362, "grad_norm": 7.66450309753418, "learning_rate": 1.0399732844423666e-06, "loss": 0.9286, "step": 5839 }, { "epoch": 0.7973784817039868, "grad_norm": 5.473629951477051, "learning_rate": 1.0386237373125286e-06, "loss": 0.9334, "step": 5840 }, { "epoch": 0.7975150191152376, "grad_norm": 7.235651969909668, "learning_rate": 1.0372749648940577e-06, "loss": 0.9159, "step": 5841 }, { "epoch": 0.7976515565264882, "grad_norm": 6.39370584487915, "learning_rate": 1.0359269674507272e-06, "loss": 0.9749, "step": 5842 }, { "epoch": 0.797788093937739, "grad_norm": 16.993572235107422, "learning_rate": 1.034579745246158e-06, "loss": 1.0055, "step": 5843 }, { "epoch": 0.7979246313489896, "grad_norm": 6.455846786499023, "learning_rate": 1.0332332985438248e-06, "loss": 0.861, "step": 5844 }, { "epoch": 0.7980611687602404, "grad_norm": 7.436452865600586, "learning_rate": 1.0318876276070439e-06, "loss": 0.8668, "step": 5845 }, { "epoch": 0.798197706171491, "grad_norm": 6.649466037750244, "learning_rate": 1.030542732698986e-06, "loss": 0.8188, "step": 5846 }, { "epoch": 0.7983342435827416, "grad_norm": 41.53883743286133, "learning_rate": 1.0291986140826637e-06, "loss": 0.9555, "step": 5847 }, { "epoch": 0.7984707809939924, "grad_norm": 5.605208396911621, "learning_rate": 1.0278552720209451e-06, "loss": 0.8803, "step": 5848 }, { "epoch": 0.798607318405243, "grad_norm": 11.424134254455566, "learning_rate": 1.0265127067765384e-06, "loss": 0.8841, "step": 5849 }, { "epoch": 0.7987438558164938, "grad_norm": 12.54396915435791, "learning_rate": 1.0251709186120079e-06, "loss": 0.8844, "step": 5850 }, { "epoch": 0.7988803932277444, "grad_norm": 6.0731940269470215, "learning_rate": 1.0238299077897596e-06, "loss": 0.9248, "step": 5851 }, { "epoch": 0.799016930638995, "grad_norm": 5.251492977142334, "learning_rate": 1.0224896745720513e-06, "loss": 0.9619, "step": 5852 }, { "epoch": 0.7991534680502458, "grad_norm": 5.757622718811035, "learning_rate": 1.021150219220986e-06, "loss": 0.9288, "step": 5853 }, { "epoch": 0.7992900054614964, "grad_norm": 6.607485294342041, "learning_rate": 1.0198115419985155e-06, "loss": 0.9388, "step": 5854 }, { "epoch": 0.7994265428727472, "grad_norm": 5.714757442474365, "learning_rate": 1.018473643166442e-06, "loss": 0.8467, "step": 5855 }, { "epoch": 0.7995630802839978, "grad_norm": 5.393362522125244, "learning_rate": 1.0171365229864106e-06, "loss": 0.9358, "step": 5856 }, { "epoch": 0.7996996176952484, "grad_norm": 6.263640403747559, "learning_rate": 1.0158001817199204e-06, "loss": 0.9747, "step": 5857 }, { "epoch": 0.7998361551064992, "grad_norm": 5.244487285614014, "learning_rate": 1.0144646196283103e-06, "loss": 0.935, "step": 5858 }, { "epoch": 0.7999726925177498, "grad_norm": 7.454324722290039, "learning_rate": 1.0131298369727755e-06, "loss": 0.8358, "step": 5859 }, { "epoch": 0.8001092299290006, "grad_norm": 7.257071495056152, "learning_rate": 1.0117958340143508e-06, "loss": 0.9601, "step": 5860 }, { "epoch": 0.8002457673402512, "grad_norm": 8.487951278686523, "learning_rate": 1.0104626110139243e-06, "loss": 0.9488, "step": 5861 }, { "epoch": 0.800382304751502, "grad_norm": 10.930228233337402, "learning_rate": 1.009130168232229e-06, "loss": 0.9887, "step": 5862 }, { "epoch": 0.8005188421627526, "grad_norm": 7.357706069946289, "learning_rate": 1.0077985059298445e-06, "loss": 1.0148, "step": 5863 }, { "epoch": 0.8006553795740032, "grad_norm": 8.247533798217773, "learning_rate": 1.0064676243672012e-06, "loss": 0.8724, "step": 5864 }, { "epoch": 0.800791916985254, "grad_norm": 8.777915000915527, "learning_rate": 1.0051375238045724e-06, "loss": 0.8993, "step": 5865 }, { "epoch": 0.8009284543965046, "grad_norm": 6.321505546569824, "learning_rate": 1.0038082045020825e-06, "loss": 0.9107, "step": 5866 }, { "epoch": 0.8010649918077554, "grad_norm": 5.302130699157715, "learning_rate": 1.0024796667196983e-06, "loss": 0.9054, "step": 5867 }, { "epoch": 0.801201529219006, "grad_norm": 9.021751403808594, "learning_rate": 1.0011519107172413e-06, "loss": 0.8437, "step": 5868 }, { "epoch": 0.8013380666302566, "grad_norm": 6.4111223220825195, "learning_rate": 9.998249367543722e-07, "loss": 0.9477, "step": 5869 }, { "epoch": 0.8014746040415074, "grad_norm": 14.506977081298828, "learning_rate": 9.984987450906048e-07, "loss": 0.9075, "step": 5870 }, { "epoch": 0.801611141452758, "grad_norm": 8.429649353027344, "learning_rate": 9.971733359852964e-07, "loss": 1.0353, "step": 5871 }, { "epoch": 0.8017476788640088, "grad_norm": 7.600343227386475, "learning_rate": 9.958487096976505e-07, "loss": 1.1081, "step": 5872 }, { "epoch": 0.8018842162752594, "grad_norm": 7.7318501472473145, "learning_rate": 9.945248664867225e-07, "loss": 0.8281, "step": 5873 }, { "epoch": 0.8020207536865102, "grad_norm": 6.263496398925781, "learning_rate": 9.93201806611408e-07, "loss": 1.0035, "step": 5874 }, { "epoch": 0.8021572910977608, "grad_norm": 6.05348014831543, "learning_rate": 9.918795303304562e-07, "loss": 0.9908, "step": 5875 }, { "epoch": 0.8022938285090114, "grad_norm": 5.841392517089844, "learning_rate": 9.905580379024581e-07, "loss": 0.8202, "step": 5876 }, { "epoch": 0.8024303659202622, "grad_norm": 7.349368095397949, "learning_rate": 9.89237329585853e-07, "loss": 0.9781, "step": 5877 }, { "epoch": 0.8025669033315128, "grad_norm": 5.69992208480835, "learning_rate": 9.87917405638925e-07, "loss": 0.8274, "step": 5878 }, { "epoch": 0.8027034407427636, "grad_norm": 9.737996101379395, "learning_rate": 9.865982663198103e-07, "loss": 1.0042, "step": 5879 }, { "epoch": 0.8028399781540142, "grad_norm": 5.224078178405762, "learning_rate": 9.852799118864841e-07, "loss": 0.817, "step": 5880 }, { "epoch": 0.8029765155652648, "grad_norm": 6.899978160858154, "learning_rate": 9.83962342596776e-07, "loss": 0.936, "step": 5881 }, { "epoch": 0.8031130529765156, "grad_norm": 7.219719409942627, "learning_rate": 9.826455587083555e-07, "loss": 0.9487, "step": 5882 }, { "epoch": 0.8032495903877662, "grad_norm": 7.528860092163086, "learning_rate": 9.813295604787404e-07, "loss": 1.0174, "step": 5883 }, { "epoch": 0.803386127799017, "grad_norm": 6.373664379119873, "learning_rate": 9.80014348165298e-07, "loss": 0.805, "step": 5884 }, { "epoch": 0.8035226652102676, "grad_norm": 6.079888343811035, "learning_rate": 9.786999220252363e-07, "loss": 1.0353, "step": 5885 }, { "epoch": 0.8036592026215182, "grad_norm": 5.180588722229004, "learning_rate": 9.77386282315616e-07, "loss": 0.9397, "step": 5886 }, { "epoch": 0.803795740032769, "grad_norm": 8.36640453338623, "learning_rate": 9.760734292933382e-07, "loss": 0.9875, "step": 5887 }, { "epoch": 0.8039322774440196, "grad_norm": 6.858026504516602, "learning_rate": 9.747613632151543e-07, "loss": 0.9478, "step": 5888 }, { "epoch": 0.8040688148552704, "grad_norm": 5.859564304351807, "learning_rate": 9.734500843376587e-07, "loss": 0.9228, "step": 5889 }, { "epoch": 0.804205352266521, "grad_norm": 6.844244480133057, "learning_rate": 9.721395929172944e-07, "loss": 0.9546, "step": 5890 }, { "epoch": 0.8043418896777718, "grad_norm": 7.676883220672607, "learning_rate": 9.708298892103473e-07, "loss": 0.8557, "step": 5891 }, { "epoch": 0.8044784270890224, "grad_norm": 7.576914310455322, "learning_rate": 9.695209734729533e-07, "loss": 0.9296, "step": 5892 }, { "epoch": 0.804614964500273, "grad_norm": 5.08367395401001, "learning_rate": 9.682128459610919e-07, "loss": 0.7355, "step": 5893 }, { "epoch": 0.8047515019115238, "grad_norm": 6.592347145080566, "learning_rate": 9.669055069305867e-07, "loss": 1.0119, "step": 5894 }, { "epoch": 0.8048880393227744, "grad_norm": 5.850371837615967, "learning_rate": 9.65598956637111e-07, "loss": 0.9095, "step": 5895 }, { "epoch": 0.8050245767340252, "grad_norm": 6.979963779449463, "learning_rate": 9.642931953361806e-07, "loss": 0.9522, "step": 5896 }, { "epoch": 0.8051611141452758, "grad_norm": 7.617821216583252, "learning_rate": 9.629882232831604e-07, "loss": 0.9412, "step": 5897 }, { "epoch": 0.8052976515565264, "grad_norm": 6.89281702041626, "learning_rate": 9.616840407332563e-07, "loss": 0.9044, "step": 5898 }, { "epoch": 0.8054341889677772, "grad_norm": 5.861178398132324, "learning_rate": 9.60380647941524e-07, "loss": 0.9951, "step": 5899 }, { "epoch": 0.8055707263790278, "grad_norm": 6.417260646820068, "learning_rate": 9.590780451628617e-07, "loss": 1.0601, "step": 5900 }, { "epoch": 0.8057072637902786, "grad_norm": 5.80917501449585, "learning_rate": 9.577762326520169e-07, "loss": 0.8751, "step": 5901 }, { "epoch": 0.8058438012015292, "grad_norm": 7.423742771148682, "learning_rate": 9.564752106635783e-07, "loss": 0.825, "step": 5902 }, { "epoch": 0.80598033861278, "grad_norm": 4.744889736175537, "learning_rate": 9.55174979451982e-07, "loss": 0.9208, "step": 5903 }, { "epoch": 0.8061168760240306, "grad_norm": 6.659989833831787, "learning_rate": 9.538755392715105e-07, "loss": 0.8742, "step": 5904 }, { "epoch": 0.8062534134352812, "grad_norm": 6.737648963928223, "learning_rate": 9.525768903762872e-07, "loss": 0.8361, "step": 5905 }, { "epoch": 0.806389950846532, "grad_norm": 7.984147071838379, "learning_rate": 9.512790330202876e-07, "loss": 0.8943, "step": 5906 }, { "epoch": 0.8065264882577826, "grad_norm": 5.586496829986572, "learning_rate": 9.499819674573257e-07, "loss": 0.9814, "step": 5907 }, { "epoch": 0.8066630256690334, "grad_norm": 7.567931652069092, "learning_rate": 9.486856939410672e-07, "loss": 0.9178, "step": 5908 }, { "epoch": 0.806799563080284, "grad_norm": 8.651440620422363, "learning_rate": 9.473902127250156e-07, "loss": 0.888, "step": 5909 }, { "epoch": 0.8069361004915346, "grad_norm": 7.224522113800049, "learning_rate": 9.460955240625269e-07, "loss": 0.971, "step": 5910 }, { "epoch": 0.8070726379027854, "grad_norm": 6.890003681182861, "learning_rate": 9.448016282067951e-07, "loss": 1.012, "step": 5911 }, { "epoch": 0.807209175314036, "grad_norm": 6.908063888549805, "learning_rate": 9.435085254108656e-07, "loss": 0.8736, "step": 5912 }, { "epoch": 0.8073457127252868, "grad_norm": 7.886530876159668, "learning_rate": 9.422162159276238e-07, "loss": 0.9548, "step": 5913 }, { "epoch": 0.8074822501365374, "grad_norm": 5.243919849395752, "learning_rate": 9.40924700009801e-07, "loss": 1.0298, "step": 5914 }, { "epoch": 0.807618787547788, "grad_norm": 6.67543363571167, "learning_rate": 9.396339779099767e-07, "loss": 0.8721, "step": 5915 }, { "epoch": 0.8077553249590388, "grad_norm": 9.689397811889648, "learning_rate": 9.383440498805712e-07, "loss": 0.977, "step": 5916 }, { "epoch": 0.8078918623702894, "grad_norm": 6.027979373931885, "learning_rate": 9.370549161738502e-07, "loss": 0.8146, "step": 5917 }, { "epoch": 0.8080283997815402, "grad_norm": 5.692111015319824, "learning_rate": 9.357665770419244e-07, "loss": 0.9564, "step": 5918 }, { "epoch": 0.8081649371927908, "grad_norm": 8.911596298217773, "learning_rate": 9.344790327367509e-07, "loss": 1.0345, "step": 5919 }, { "epoch": 0.8083014746040416, "grad_norm": 6.02752685546875, "learning_rate": 9.331922835101281e-07, "loss": 1.0223, "step": 5920 }, { "epoch": 0.8084380120152922, "grad_norm": 6.240177631378174, "learning_rate": 9.319063296137032e-07, "loss": 0.8294, "step": 5921 }, { "epoch": 0.8085745494265428, "grad_norm": 5.833874225616455, "learning_rate": 9.306211712989637e-07, "loss": 0.8897, "step": 5922 }, { "epoch": 0.8087110868377936, "grad_norm": 6.139307022094727, "learning_rate": 9.293368088172422e-07, "loss": 0.9109, "step": 5923 }, { "epoch": 0.8088476242490442, "grad_norm": 5.117588520050049, "learning_rate": 9.280532424197192e-07, "loss": 0.7815, "step": 5924 }, { "epoch": 0.808984161660295, "grad_norm": 6.496694564819336, "learning_rate": 9.267704723574139e-07, "loss": 1.0383, "step": 5925 }, { "epoch": 0.8091206990715456, "grad_norm": 5.285203456878662, "learning_rate": 9.254884988811952e-07, "loss": 0.9153, "step": 5926 }, { "epoch": 0.8092572364827962, "grad_norm": 10.018228530883789, "learning_rate": 9.242073222417736e-07, "loss": 0.9341, "step": 5927 }, { "epoch": 0.809393773894047, "grad_norm": 9.594717979431152, "learning_rate": 9.22926942689702e-07, "loss": 0.9449, "step": 5928 }, { "epoch": 0.8095303113052976, "grad_norm": 6.690884113311768, "learning_rate": 9.216473604753817e-07, "loss": 1.033, "step": 5929 }, { "epoch": 0.8096668487165484, "grad_norm": 5.9738078117370605, "learning_rate": 9.20368575849055e-07, "loss": 0.9255, "step": 5930 }, { "epoch": 0.809803386127799, "grad_norm": 5.678671360015869, "learning_rate": 9.190905890608071e-07, "loss": 1.0221, "step": 5931 }, { "epoch": 0.8099399235390496, "grad_norm": 8.08583927154541, "learning_rate": 9.178134003605721e-07, "loss": 0.9585, "step": 5932 }, { "epoch": 0.8100764609503004, "grad_norm": 6.514730930328369, "learning_rate": 9.165370099981235e-07, "loss": 1.0219, "step": 5933 }, { "epoch": 0.810212998361551, "grad_norm": 15.342748641967773, "learning_rate": 9.15261418223079e-07, "loss": 0.9008, "step": 5934 }, { "epoch": 0.8103495357728018, "grad_norm": 7.17624568939209, "learning_rate": 9.139866252849034e-07, "loss": 0.8347, "step": 5935 }, { "epoch": 0.8104860731840524, "grad_norm": 6.171482086181641, "learning_rate": 9.12712631432901e-07, "loss": 0.9416, "step": 5936 }, { "epoch": 0.8106226105953032, "grad_norm": 6.535037517547607, "learning_rate": 9.114394369162249e-07, "loss": 1.0166, "step": 5937 }, { "epoch": 0.8107591480065538, "grad_norm": 6.464998722076416, "learning_rate": 9.101670419838654e-07, "loss": 0.9543, "step": 5938 }, { "epoch": 0.8108956854178044, "grad_norm": 6.686229705810547, "learning_rate": 9.088954468846628e-07, "loss": 1.0498, "step": 5939 }, { "epoch": 0.8110322228290552, "grad_norm": 8.15402603149414, "learning_rate": 9.076246518672971e-07, "loss": 0.9331, "step": 5940 }, { "epoch": 0.8111687602403058, "grad_norm": 9.762567520141602, "learning_rate": 9.063546571802934e-07, "loss": 1.0441, "step": 5941 }, { "epoch": 0.8113052976515566, "grad_norm": 5.119861602783203, "learning_rate": 9.050854630720185e-07, "loss": 1.0635, "step": 5942 }, { "epoch": 0.8114418350628072, "grad_norm": 9.486417770385742, "learning_rate": 9.038170697906834e-07, "loss": 0.8971, "step": 5943 }, { "epoch": 0.8115783724740578, "grad_norm": 8.284789085388184, "learning_rate": 9.025494775843457e-07, "loss": 0.9889, "step": 5944 }, { "epoch": 0.8117149098853086, "grad_norm": 7.147170543670654, "learning_rate": 9.012826867009005e-07, "loss": 0.9858, "step": 5945 }, { "epoch": 0.8118514472965592, "grad_norm": 8.32954216003418, "learning_rate": 9.000166973880919e-07, "loss": 0.8993, "step": 5946 }, { "epoch": 0.81198798470781, "grad_norm": 7.1288628578186035, "learning_rate": 8.987515098935029e-07, "loss": 0.8023, "step": 5947 }, { "epoch": 0.8121245221190606, "grad_norm": 6.6995930671691895, "learning_rate": 8.974871244645628e-07, "loss": 1.0252, "step": 5948 }, { "epoch": 0.8122610595303114, "grad_norm": 15.057552337646484, "learning_rate": 8.96223541348541e-07, "loss": 0.9276, "step": 5949 }, { "epoch": 0.812397596941562, "grad_norm": 7.255823135375977, "learning_rate": 8.949607607925542e-07, "loss": 0.9798, "step": 5950 }, { "epoch": 0.8125341343528126, "grad_norm": 7.416062355041504, "learning_rate": 8.936987830435562e-07, "loss": 0.9639, "step": 5951 }, { "epoch": 0.8126706717640634, "grad_norm": 5.996987342834473, "learning_rate": 8.924376083483511e-07, "loss": 0.9962, "step": 5952 }, { "epoch": 0.812807209175314, "grad_norm": 7.322890758514404, "learning_rate": 8.911772369535793e-07, "loss": 0.8848, "step": 5953 }, { "epoch": 0.8129437465865648, "grad_norm": 25.736860275268555, "learning_rate": 8.899176691057282e-07, "loss": 1.0966, "step": 5954 }, { "epoch": 0.8130802839978154, "grad_norm": 5.582528591156006, "learning_rate": 8.886589050511257e-07, "loss": 0.944, "step": 5955 }, { "epoch": 0.813216821409066, "grad_norm": 5.7572503089904785, "learning_rate": 8.874009450359428e-07, "loss": 0.8675, "step": 5956 }, { "epoch": 0.8133533588203168, "grad_norm": 7.147692680358887, "learning_rate": 8.861437893061964e-07, "loss": 0.9974, "step": 5957 }, { "epoch": 0.8134898962315674, "grad_norm": 5.58314323425293, "learning_rate": 8.848874381077411e-07, "loss": 1.0053, "step": 5958 }, { "epoch": 0.8136264336428182, "grad_norm": 9.588831901550293, "learning_rate": 8.836318916862796e-07, "loss": 1.0675, "step": 5959 }, { "epoch": 0.8137629710540688, "grad_norm": 6.3535475730896, "learning_rate": 8.823771502873513e-07, "loss": 0.8646, "step": 5960 }, { "epoch": 0.8138995084653194, "grad_norm": 11.128878593444824, "learning_rate": 8.811232141563441e-07, "loss": 0.9439, "step": 5961 }, { "epoch": 0.8140360458765702, "grad_norm": 7.098822116851807, "learning_rate": 8.798700835384844e-07, "loss": 0.9705, "step": 5962 }, { "epoch": 0.8141725832878208, "grad_norm": 5.40207052230835, "learning_rate": 8.786177586788408e-07, "loss": 0.9085, "step": 5963 }, { "epoch": 0.8143091206990716, "grad_norm": 12.659236907958984, "learning_rate": 8.773662398223276e-07, "loss": 0.8447, "step": 5964 }, { "epoch": 0.8144456581103222, "grad_norm": 6.4095869064331055, "learning_rate": 8.761155272136984e-07, "loss": 0.9615, "step": 5965 }, { "epoch": 0.814582195521573, "grad_norm": 5.848822116851807, "learning_rate": 8.748656210975515e-07, "loss": 0.9278, "step": 5966 }, { "epoch": 0.8147187329328236, "grad_norm": 7.827000617980957, "learning_rate": 8.736165217183262e-07, "loss": 0.9753, "step": 5967 }, { "epoch": 0.8148552703440742, "grad_norm": 5.997834205627441, "learning_rate": 8.723682293203034e-07, "loss": 0.9854, "step": 5968 }, { "epoch": 0.814991807755325, "grad_norm": 7.894003868103027, "learning_rate": 8.711207441476061e-07, "loss": 1.0249, "step": 5969 }, { "epoch": 0.8151283451665756, "grad_norm": 6.1814751625061035, "learning_rate": 8.698740664442024e-07, "loss": 0.9206, "step": 5970 }, { "epoch": 0.8152648825778264, "grad_norm": 7.674288749694824, "learning_rate": 8.686281964538979e-07, "loss": 1.1375, "step": 5971 }, { "epoch": 0.815401419989077, "grad_norm": 5.3032026290893555, "learning_rate": 8.673831344203454e-07, "loss": 0.8795, "step": 5972 }, { "epoch": 0.8155379574003276, "grad_norm": 5.541345596313477, "learning_rate": 8.661388805870357e-07, "loss": 0.859, "step": 5973 }, { "epoch": 0.8156744948115784, "grad_norm": 6.40965461730957, "learning_rate": 8.648954351973015e-07, "loss": 1.0326, "step": 5974 }, { "epoch": 0.815811032222829, "grad_norm": 6.304429531097412, "learning_rate": 8.636527984943211e-07, "loss": 1.0209, "step": 5975 }, { "epoch": 0.8159475696340798, "grad_norm": 5.093228340148926, "learning_rate": 8.624109707211098e-07, "loss": 0.8839, "step": 5976 }, { "epoch": 0.8160841070453304, "grad_norm": 7.053525447845459, "learning_rate": 8.611699521205297e-07, "loss": 0.9518, "step": 5977 }, { "epoch": 0.8162206444565812, "grad_norm": 5.5177001953125, "learning_rate": 8.599297429352798e-07, "loss": 0.9769, "step": 5978 }, { "epoch": 0.8163571818678318, "grad_norm": 7.0840277671813965, "learning_rate": 8.586903434079053e-07, "loss": 0.9947, "step": 5979 }, { "epoch": 0.8164937192790824, "grad_norm": 4.966041564941406, "learning_rate": 8.574517537807897e-07, "loss": 0.8318, "step": 5980 }, { "epoch": 0.8166302566903332, "grad_norm": 5.8195648193359375, "learning_rate": 8.562139742961595e-07, "loss": 1.0423, "step": 5981 }, { "epoch": 0.8167667941015838, "grad_norm": 6.269500255584717, "learning_rate": 8.549770051960821e-07, "loss": 0.8572, "step": 5982 }, { "epoch": 0.8169033315128346, "grad_norm": 5.861905097961426, "learning_rate": 8.537408467224662e-07, "loss": 0.8453, "step": 5983 }, { "epoch": 0.8170398689240852, "grad_norm": 9.141486167907715, "learning_rate": 8.525054991170644e-07, "loss": 0.8328, "step": 5984 }, { "epoch": 0.8171764063353358, "grad_norm": 8.49560832977295, "learning_rate": 8.512709626214677e-07, "loss": 0.8062, "step": 5985 }, { "epoch": 0.8173129437465866, "grad_norm": 5.6569504737854, "learning_rate": 8.500372374771105e-07, "loss": 0.9479, "step": 5986 }, { "epoch": 0.8174494811578372, "grad_norm": 6.44003963470459, "learning_rate": 8.488043239252669e-07, "loss": 0.9769, "step": 5987 }, { "epoch": 0.817586018569088, "grad_norm": 8.550592422485352, "learning_rate": 8.475722222070542e-07, "loss": 0.914, "step": 5988 }, { "epoch": 0.8177225559803386, "grad_norm": 5.327877044677734, "learning_rate": 8.463409325634286e-07, "loss": 0.8307, "step": 5989 }, { "epoch": 0.8178590933915892, "grad_norm": 8.696457862854004, "learning_rate": 8.451104552351908e-07, "loss": 0.6662, "step": 5990 }, { "epoch": 0.81799563080284, "grad_norm": 4.953520774841309, "learning_rate": 8.438807904629792e-07, "loss": 0.9557, "step": 5991 }, { "epoch": 0.8181321682140906, "grad_norm": 5.1043596267700195, "learning_rate": 8.426519384872733e-07, "loss": 0.8912, "step": 5992 }, { "epoch": 0.8182687056253414, "grad_norm": 5.390047073364258, "learning_rate": 8.414238995483981e-07, "loss": 0.9266, "step": 5993 }, { "epoch": 0.818405243036592, "grad_norm": 14.079085350036621, "learning_rate": 8.401966738865148e-07, "loss": 1.0876, "step": 5994 }, { "epoch": 0.8185417804478428, "grad_norm": 6.665754318237305, "learning_rate": 8.389702617416274e-07, "loss": 0.8763, "step": 5995 }, { "epoch": 0.8186783178590934, "grad_norm": 6.6927618980407715, "learning_rate": 8.377446633535797e-07, "loss": 0.9292, "step": 5996 }, { "epoch": 0.818814855270344, "grad_norm": 5.071844577789307, "learning_rate": 8.365198789620593e-07, "loss": 0.9516, "step": 5997 }, { "epoch": 0.8189513926815948, "grad_norm": 5.804231643676758, "learning_rate": 8.352959088065904e-07, "loss": 0.8888, "step": 5998 }, { "epoch": 0.8190879300928454, "grad_norm": 7.512393951416016, "learning_rate": 8.340727531265431e-07, "loss": 0.8564, "step": 5999 }, { "epoch": 0.8192244675040962, "grad_norm": 5.519015789031982, "learning_rate": 8.32850412161122e-07, "loss": 1.038, "step": 6000 }, { "epoch": 0.8193610049153468, "grad_norm": 6.77590799331665, "learning_rate": 8.316288861493788e-07, "loss": 0.9789, "step": 6001 }, { "epoch": 0.8194975423265974, "grad_norm": 6.216150283813477, "learning_rate": 8.304081753302007e-07, "loss": 0.8924, "step": 6002 }, { "epoch": 0.8196340797378482, "grad_norm": 6.603560447692871, "learning_rate": 8.291882799423184e-07, "loss": 0.9665, "step": 6003 }, { "epoch": 0.8197706171490988, "grad_norm": 7.312281131744385, "learning_rate": 8.279692002243028e-07, "loss": 0.9279, "step": 6004 }, { "epoch": 0.8199071545603496, "grad_norm": 5.679546356201172, "learning_rate": 8.267509364145637e-07, "loss": 0.8474, "step": 6005 }, { "epoch": 0.8200436919716002, "grad_norm": 7.42680549621582, "learning_rate": 8.255334887513522e-07, "loss": 0.9622, "step": 6006 }, { "epoch": 0.820180229382851, "grad_norm": 5.97423791885376, "learning_rate": 8.243168574727589e-07, "loss": 1.1192, "step": 6007 }, { "epoch": 0.8203167667941016, "grad_norm": 6.244233131408691, "learning_rate": 8.231010428167191e-07, "loss": 0.9477, "step": 6008 }, { "epoch": 0.8204533042053522, "grad_norm": 11.378974914550781, "learning_rate": 8.218860450210015e-07, "loss": 1.0829, "step": 6009 }, { "epoch": 0.820589841616603, "grad_norm": 6.126101016998291, "learning_rate": 8.206718643232209e-07, "loss": 1.0084, "step": 6010 }, { "epoch": 0.8207263790278536, "grad_norm": 5.967619895935059, "learning_rate": 8.194585009608286e-07, "loss": 0.8289, "step": 6011 }, { "epoch": 0.8208629164391044, "grad_norm": 6.696547508239746, "learning_rate": 8.182459551711197e-07, "loss": 0.9629, "step": 6012 }, { "epoch": 0.820999453850355, "grad_norm": 6.674386501312256, "learning_rate": 8.170342271912251e-07, "loss": 0.9746, "step": 6013 }, { "epoch": 0.8211359912616056, "grad_norm": 6.012960910797119, "learning_rate": 8.158233172581181e-07, "loss": 0.9967, "step": 6014 }, { "epoch": 0.8212725286728564, "grad_norm": 4.651568412780762, "learning_rate": 8.146132256086126e-07, "loss": 0.8254, "step": 6015 }, { "epoch": 0.821409066084107, "grad_norm": 5.14337682723999, "learning_rate": 8.134039524793603e-07, "loss": 0.9179, "step": 6016 }, { "epoch": 0.8215456034953578, "grad_norm": 6.157440662384033, "learning_rate": 8.121954981068564e-07, "loss": 1.0143, "step": 6017 }, { "epoch": 0.8216821409066084, "grad_norm": 7.132279396057129, "learning_rate": 8.109878627274321e-07, "loss": 0.9044, "step": 6018 }, { "epoch": 0.821818678317859, "grad_norm": 12.394059181213379, "learning_rate": 8.097810465772604e-07, "loss": 0.9215, "step": 6019 }, { "epoch": 0.8219552157291098, "grad_norm": 6.1917948722839355, "learning_rate": 8.085750498923528e-07, "loss": 0.8528, "step": 6020 }, { "epoch": 0.8220917531403604, "grad_norm": 6.703836441040039, "learning_rate": 8.073698729085638e-07, "loss": 1.0166, "step": 6021 }, { "epoch": 0.8222282905516112, "grad_norm": 10.431328773498535, "learning_rate": 8.061655158615822e-07, "loss": 1.0058, "step": 6022 }, { "epoch": 0.8223648279628618, "grad_norm": 6.187649250030518, "learning_rate": 8.049619789869428e-07, "loss": 0.947, "step": 6023 }, { "epoch": 0.8225013653741126, "grad_norm": 7.580956935882568, "learning_rate": 8.037592625200152e-07, "loss": 0.9013, "step": 6024 }, { "epoch": 0.8226379027853632, "grad_norm": 5.231788158416748, "learning_rate": 8.025573666960096e-07, "loss": 1.0029, "step": 6025 }, { "epoch": 0.8227744401966138, "grad_norm": 20.828853607177734, "learning_rate": 8.013562917499773e-07, "loss": 1.0015, "step": 6026 }, { "epoch": 0.8229109776078646, "grad_norm": 5.275928020477295, "learning_rate": 8.001560379168066e-07, "loss": 0.9182, "step": 6027 }, { "epoch": 0.8230475150191152, "grad_norm": 9.093202590942383, "learning_rate": 7.989566054312286e-07, "loss": 0.9449, "step": 6028 }, { "epoch": 0.823184052430366, "grad_norm": 5.879027366638184, "learning_rate": 7.977579945278091e-07, "loss": 0.8183, "step": 6029 }, { "epoch": 0.8233205898416166, "grad_norm": 6.377237796783447, "learning_rate": 7.965602054409594e-07, "loss": 0.9043, "step": 6030 }, { "epoch": 0.8234571272528672, "grad_norm": 5.981852054595947, "learning_rate": 7.953632384049242e-07, "loss": 1.0703, "step": 6031 }, { "epoch": 0.823593664664118, "grad_norm": 7.254115581512451, "learning_rate": 7.941670936537904e-07, "loss": 0.8721, "step": 6032 }, { "epoch": 0.8237302020753686, "grad_norm": 5.294435024261475, "learning_rate": 7.929717714214829e-07, "loss": 0.9127, "step": 6033 }, { "epoch": 0.8238667394866194, "grad_norm": 6.4421892166137695, "learning_rate": 7.91777271941766e-07, "loss": 1.0088, "step": 6034 }, { "epoch": 0.82400327689787, "grad_norm": 7.773678779602051, "learning_rate": 7.905835954482455e-07, "loss": 0.904, "step": 6035 }, { "epoch": 0.8241398143091206, "grad_norm": 6.209051132202148, "learning_rate": 7.893907421743613e-07, "loss": 0.8198, "step": 6036 }, { "epoch": 0.8242763517203714, "grad_norm": 5.972301483154297, "learning_rate": 7.881987123533985e-07, "loss": 0.8949, "step": 6037 }, { "epoch": 0.824412889131622, "grad_norm": 7.232142448425293, "learning_rate": 7.870075062184751e-07, "loss": 0.9861, "step": 6038 }, { "epoch": 0.8245494265428728, "grad_norm": 4.805367469787598, "learning_rate": 7.858171240025525e-07, "loss": 0.9109, "step": 6039 }, { "epoch": 0.8246859639541234, "grad_norm": 6.361180305480957, "learning_rate": 7.846275659384278e-07, "loss": 0.8849, "step": 6040 }, { "epoch": 0.8248225013653742, "grad_norm": 5.290686130523682, "learning_rate": 7.834388322587405e-07, "loss": 0.9603, "step": 6041 }, { "epoch": 0.8249590387766248, "grad_norm": 10.295391082763672, "learning_rate": 7.822509231959641e-07, "loss": 0.9317, "step": 6042 }, { "epoch": 0.8250955761878754, "grad_norm": 6.320725917816162, "learning_rate": 7.81063838982416e-07, "loss": 0.859, "step": 6043 }, { "epoch": 0.8252321135991262, "grad_norm": 7.2736382484436035, "learning_rate": 7.798775798502484e-07, "loss": 0.9528, "step": 6044 }, { "epoch": 0.8253686510103768, "grad_norm": 5.263650417327881, "learning_rate": 7.78692146031454e-07, "loss": 0.9176, "step": 6045 }, { "epoch": 0.8255051884216276, "grad_norm": 6.519360542297363, "learning_rate": 7.775075377578633e-07, "loss": 0.8727, "step": 6046 }, { "epoch": 0.8256417258328782, "grad_norm": 5.862558841705322, "learning_rate": 7.763237552611441e-07, "loss": 0.8537, "step": 6047 }, { "epoch": 0.8257782632441288, "grad_norm": 7.207602024078369, "learning_rate": 7.751407987728071e-07, "loss": 0.9767, "step": 6048 }, { "epoch": 0.8259148006553796, "grad_norm": 7.550887107849121, "learning_rate": 7.739586685241967e-07, "loss": 0.8571, "step": 6049 }, { "epoch": 0.8260513380666302, "grad_norm": 7.448858261108398, "learning_rate": 7.727773647464987e-07, "loss": 0.8597, "step": 6050 }, { "epoch": 0.826187875477881, "grad_norm": 6.79356575012207, "learning_rate": 7.715968876707353e-07, "loss": 1.0523, "step": 6051 }, { "epoch": 0.8263244128891316, "grad_norm": 7.5771613121032715, "learning_rate": 7.704172375277691e-07, "loss": 1.1209, "step": 6052 }, { "epoch": 0.8264609503003824, "grad_norm": 8.521787643432617, "learning_rate": 7.692384145482996e-07, "loss": 1.0225, "step": 6053 }, { "epoch": 0.826597487711633, "grad_norm": 9.747159957885742, "learning_rate": 7.680604189628627e-07, "loss": 1.0222, "step": 6054 }, { "epoch": 0.8267340251228836, "grad_norm": 6.231647491455078, "learning_rate": 7.668832510018376e-07, "loss": 0.8776, "step": 6055 }, { "epoch": 0.8268705625341344, "grad_norm": 5.331812858581543, "learning_rate": 7.657069108954368e-07, "loss": 1.005, "step": 6056 }, { "epoch": 0.827007099945385, "grad_norm": 5.211276531219482, "learning_rate": 7.645313988737119e-07, "loss": 0.975, "step": 6057 }, { "epoch": 0.8271436373566358, "grad_norm": 7.5025811195373535, "learning_rate": 7.633567151665561e-07, "loss": 1.021, "step": 6058 }, { "epoch": 0.8272801747678864, "grad_norm": 5.764501571655273, "learning_rate": 7.621828600036957e-07, "loss": 1.0113, "step": 6059 }, { "epoch": 0.827416712179137, "grad_norm": 6.687558650970459, "learning_rate": 7.610098336146965e-07, "loss": 0.9671, "step": 6060 }, { "epoch": 0.8275532495903878, "grad_norm": 8.84978199005127, "learning_rate": 7.59837636228965e-07, "loss": 0.9248, "step": 6061 }, { "epoch": 0.8276897870016384, "grad_norm": 6.430138111114502, "learning_rate": 7.586662680757418e-07, "loss": 0.9056, "step": 6062 }, { "epoch": 0.8278263244128892, "grad_norm": 6.109920978546143, "learning_rate": 7.574957293841079e-07, "loss": 1.1182, "step": 6063 }, { "epoch": 0.8279628618241398, "grad_norm": 34.759979248046875, "learning_rate": 7.563260203829809e-07, "loss": 0.9258, "step": 6064 }, { "epoch": 0.8280993992353904, "grad_norm": 5.545628547668457, "learning_rate": 7.55157141301115e-07, "loss": 0.8538, "step": 6065 }, { "epoch": 0.8282359366466412, "grad_norm": 8.200223922729492, "learning_rate": 7.539890923671061e-07, "loss": 0.8268, "step": 6066 }, { "epoch": 0.8283724740578918, "grad_norm": 6.090317726135254, "learning_rate": 7.528218738093823e-07, "loss": 0.8226, "step": 6067 }, { "epoch": 0.8285090114691426, "grad_norm": 5.621297359466553, "learning_rate": 7.516554858562142e-07, "loss": 0.9855, "step": 6068 }, { "epoch": 0.8286455488803932, "grad_norm": 7.2935028076171875, "learning_rate": 7.504899287357076e-07, "loss": 1.1073, "step": 6069 }, { "epoch": 0.828782086291644, "grad_norm": 10.052055358886719, "learning_rate": 7.493252026758052e-07, "loss": 1.0047, "step": 6070 }, { "epoch": 0.8289186237028946, "grad_norm": 8.83743667602539, "learning_rate": 7.481613079042876e-07, "loss": 1.0257, "step": 6071 }, { "epoch": 0.8290551611141452, "grad_norm": 6.618685245513916, "learning_rate": 7.469982446487756e-07, "loss": 1.0501, "step": 6072 }, { "epoch": 0.829191698525396, "grad_norm": 7.0428266525268555, "learning_rate": 7.458360131367237e-07, "loss": 0.9794, "step": 6073 }, { "epoch": 0.8293282359366466, "grad_norm": 6.873630046844482, "learning_rate": 7.446746135954236e-07, "loss": 1.0155, "step": 6074 }, { "epoch": 0.8294647733478974, "grad_norm": 5.76023530960083, "learning_rate": 7.435140462520085e-07, "loss": 0.9733, "step": 6075 }, { "epoch": 0.829601310759148, "grad_norm": 5.494194984436035, "learning_rate": 7.423543113334436e-07, "loss": 0.8645, "step": 6076 }, { "epoch": 0.8297378481703986, "grad_norm": 7.7363739013671875, "learning_rate": 7.411954090665368e-07, "loss": 0.8715, "step": 6077 }, { "epoch": 0.8298743855816494, "grad_norm": 6.5160136222839355, "learning_rate": 7.40037339677927e-07, "loss": 0.9124, "step": 6078 }, { "epoch": 0.8300109229929, "grad_norm": 6.639175891876221, "learning_rate": 7.388801033940968e-07, "loss": 0.8801, "step": 6079 }, { "epoch": 0.8301474604041508, "grad_norm": 4.496313571929932, "learning_rate": 7.377237004413596e-07, "loss": 0.9741, "step": 6080 }, { "epoch": 0.8302839978154014, "grad_norm": 6.7021965980529785, "learning_rate": 7.365681310458705e-07, "loss": 0.7748, "step": 6081 }, { "epoch": 0.8304205352266522, "grad_norm": 7.160914897918701, "learning_rate": 7.354133954336201e-07, "loss": 0.9035, "step": 6082 }, { "epoch": 0.8305570726379028, "grad_norm": 12.622906684875488, "learning_rate": 7.342594938304343e-07, "loss": 0.9599, "step": 6083 }, { "epoch": 0.8306936100491534, "grad_norm": 5.87579870223999, "learning_rate": 7.331064264619786e-07, "loss": 1.0108, "step": 6084 }, { "epoch": 0.8308301474604042, "grad_norm": 5.386961460113525, "learning_rate": 7.319541935537516e-07, "loss": 1.0978, "step": 6085 }, { "epoch": 0.8309666848716548, "grad_norm": 5.860728740692139, "learning_rate": 7.308027953310937e-07, "loss": 0.951, "step": 6086 }, { "epoch": 0.8311032222829056, "grad_norm": 6.299342632293701, "learning_rate": 7.296522320191779e-07, "loss": 0.8602, "step": 6087 }, { "epoch": 0.8312397596941562, "grad_norm": 4.898527145385742, "learning_rate": 7.285025038430171e-07, "loss": 0.9507, "step": 6088 }, { "epoch": 0.8313762971054068, "grad_norm": 8.513933181762695, "learning_rate": 7.273536110274576e-07, "loss": 0.9346, "step": 6089 }, { "epoch": 0.8315128345166576, "grad_norm": 5.0287275314331055, "learning_rate": 7.262055537971858e-07, "loss": 0.9075, "step": 6090 }, { "epoch": 0.8316493719279082, "grad_norm": 12.724555015563965, "learning_rate": 7.250583323767208e-07, "loss": 0.8491, "step": 6091 }, { "epoch": 0.831785909339159, "grad_norm": 6.622172832489014, "learning_rate": 7.239119469904227e-07, "loss": 0.8477, "step": 6092 }, { "epoch": 0.8319224467504096, "grad_norm": 6.290123462677002, "learning_rate": 7.227663978624844e-07, "loss": 0.8992, "step": 6093 }, { "epoch": 0.8320589841616602, "grad_norm": 5.3805251121521, "learning_rate": 7.216216852169361e-07, "loss": 0.8154, "step": 6094 }, { "epoch": 0.832195521572911, "grad_norm": 6.853226661682129, "learning_rate": 7.20477809277646e-07, "loss": 0.8819, "step": 6095 }, { "epoch": 0.8323320589841616, "grad_norm": 6.106152057647705, "learning_rate": 7.193347702683173e-07, "loss": 0.9044, "step": 6096 }, { "epoch": 0.8324685963954124, "grad_norm": 43.25228500366211, "learning_rate": 7.181925684124902e-07, "loss": 1.074, "step": 6097 }, { "epoch": 0.832605133806663, "grad_norm": 7.100437164306641, "learning_rate": 7.170512039335386e-07, "loss": 0.854, "step": 6098 }, { "epoch": 0.8327416712179138, "grad_norm": 6.411056041717529, "learning_rate": 7.159106770546775e-07, "loss": 0.9075, "step": 6099 }, { "epoch": 0.8328782086291644, "grad_norm": 4.869194507598877, "learning_rate": 7.147709879989539e-07, "loss": 0.7654, "step": 6100 }, { "epoch": 0.833014746040415, "grad_norm": 7.032118797302246, "learning_rate": 7.13632136989254e-07, "loss": 0.9623, "step": 6101 }, { "epoch": 0.8331512834516658, "grad_norm": 8.000211715698242, "learning_rate": 7.124941242482963e-07, "loss": 1.0374, "step": 6102 }, { "epoch": 0.8332878208629164, "grad_norm": 5.933933258056641, "learning_rate": 7.113569499986401e-07, "loss": 0.93, "step": 6103 }, { "epoch": 0.8334243582741672, "grad_norm": 5.9407219886779785, "learning_rate": 7.102206144626766e-07, "loss": 0.9104, "step": 6104 }, { "epoch": 0.8335608956854178, "grad_norm": 8.242212295532227, "learning_rate": 7.090851178626346e-07, "loss": 0.9646, "step": 6105 }, { "epoch": 0.8336974330966684, "grad_norm": 5.8169779777526855, "learning_rate": 7.079504604205806e-07, "loss": 0.9421, "step": 6106 }, { "epoch": 0.8338339705079192, "grad_norm": 5.283357620239258, "learning_rate": 7.068166423584127e-07, "loss": 0.7849, "step": 6107 }, { "epoch": 0.8339705079191698, "grad_norm": 7.411425590515137, "learning_rate": 7.056836638978698e-07, "loss": 1.0856, "step": 6108 }, { "epoch": 0.8341070453304206, "grad_norm": 5.9339518547058105, "learning_rate": 7.045515252605234e-07, "loss": 1.0594, "step": 6109 }, { "epoch": 0.8342435827416712, "grad_norm": 6.470942497253418, "learning_rate": 7.034202266677814e-07, "loss": 0.8657, "step": 6110 }, { "epoch": 0.834380120152922, "grad_norm": 6.940715312957764, "learning_rate": 7.022897683408858e-07, "loss": 0.9294, "step": 6111 }, { "epoch": 0.8345166575641726, "grad_norm": 5.9534807205200195, "learning_rate": 7.011601505009196e-07, "loss": 0.8063, "step": 6112 }, { "epoch": 0.8346531949754232, "grad_norm": 9.039587020874023, "learning_rate": 7.000313733687947e-07, "loss": 0.9951, "step": 6113 }, { "epoch": 0.834789732386674, "grad_norm": 5.211368083953857, "learning_rate": 6.989034371652648e-07, "loss": 0.8437, "step": 6114 }, { "epoch": 0.8349262697979246, "grad_norm": 4.799892902374268, "learning_rate": 6.97776342110914e-07, "loss": 0.8805, "step": 6115 }, { "epoch": 0.8350628072091754, "grad_norm": 7.0157575607299805, "learning_rate": 6.966500884261635e-07, "loss": 0.9909, "step": 6116 }, { "epoch": 0.835199344620426, "grad_norm": 7.283013343811035, "learning_rate": 6.955246763312722e-07, "loss": 0.9545, "step": 6117 }, { "epoch": 0.8353358820316766, "grad_norm": 6.141777992248535, "learning_rate": 6.944001060463313e-07, "loss": 0.9221, "step": 6118 }, { "epoch": 0.8354724194429274, "grad_norm": 6.244899749755859, "learning_rate": 6.932763777912704e-07, "loss": 0.8838, "step": 6119 }, { "epoch": 0.835608956854178, "grad_norm": 5.046258926391602, "learning_rate": 6.921534917858513e-07, "loss": 0.937, "step": 6120 }, { "epoch": 0.8357454942654288, "grad_norm": 4.845920562744141, "learning_rate": 6.910314482496738e-07, "loss": 0.8784, "step": 6121 }, { "epoch": 0.8358820316766794, "grad_norm": 5.6691741943359375, "learning_rate": 6.899102474021696e-07, "loss": 0.9632, "step": 6122 }, { "epoch": 0.83601856908793, "grad_norm": 6.214330196380615, "learning_rate": 6.887898894626099e-07, "loss": 0.9459, "step": 6123 }, { "epoch": 0.8361551064991808, "grad_norm": 8.975057601928711, "learning_rate": 6.876703746500984e-07, "loss": 0.9694, "step": 6124 }, { "epoch": 0.8362916439104314, "grad_norm": 5.69432258605957, "learning_rate": 6.865517031835727e-07, "loss": 0.9089, "step": 6125 }, { "epoch": 0.8364281813216822, "grad_norm": 5.826721668243408, "learning_rate": 6.854338752818097e-07, "loss": 0.9768, "step": 6126 }, { "epoch": 0.8365647187329328, "grad_norm": 5.4837727546691895, "learning_rate": 6.843168911634163e-07, "loss": 0.9475, "step": 6127 }, { "epoch": 0.8367012561441836, "grad_norm": 5.303565979003906, "learning_rate": 6.832007510468392e-07, "loss": 0.9406, "step": 6128 }, { "epoch": 0.8368377935554342, "grad_norm": 6.036495685577393, "learning_rate": 6.820854551503553e-07, "loss": 0.94, "step": 6129 }, { "epoch": 0.8369743309666848, "grad_norm": 6.683126926422119, "learning_rate": 6.809710036920819e-07, "loss": 0.8622, "step": 6130 }, { "epoch": 0.8371108683779356, "grad_norm": 5.792947769165039, "learning_rate": 6.798573968899642e-07, "loss": 0.9533, "step": 6131 }, { "epoch": 0.8372474057891862, "grad_norm": 6.681478977203369, "learning_rate": 6.787446349617899e-07, "loss": 0.9413, "step": 6132 }, { "epoch": 0.837383943200437, "grad_norm": 5.526304244995117, "learning_rate": 6.776327181251758e-07, "loss": 1.0269, "step": 6133 }, { "epoch": 0.8375204806116876, "grad_norm": 7.038468360900879, "learning_rate": 6.765216465975749e-07, "loss": 1.0069, "step": 6134 }, { "epoch": 0.8376570180229382, "grad_norm": 7.266366481781006, "learning_rate": 6.754114205962759e-07, "loss": 1.0836, "step": 6135 }, { "epoch": 0.837793555434189, "grad_norm": 6.23276424407959, "learning_rate": 6.743020403383999e-07, "loss": 0.9397, "step": 6136 }, { "epoch": 0.8379300928454396, "grad_norm": 5.8401665687561035, "learning_rate": 6.731935060409067e-07, "loss": 0.9283, "step": 6137 }, { "epoch": 0.8380666302566904, "grad_norm": 6.016854286193848, "learning_rate": 6.720858179205863e-07, "loss": 0.7604, "step": 6138 }, { "epoch": 0.838203167667941, "grad_norm": 7.217895030975342, "learning_rate": 6.709789761940666e-07, "loss": 1.0583, "step": 6139 }, { "epoch": 0.8383397050791916, "grad_norm": 5.559060096740723, "learning_rate": 6.698729810778065e-07, "loss": 0.867, "step": 6140 }, { "epoch": 0.8384762424904424, "grad_norm": 7.054479598999023, "learning_rate": 6.687678327881036e-07, "loss": 1.0144, "step": 6141 }, { "epoch": 0.838612779901693, "grad_norm": 5.730830192565918, "learning_rate": 6.676635315410856e-07, "loss": 0.8438, "step": 6142 }, { "epoch": 0.8387493173129438, "grad_norm": 9.605636596679688, "learning_rate": 6.665600775527181e-07, "loss": 0.9304, "step": 6143 }, { "epoch": 0.8388858547241944, "grad_norm": 6.1067094802856445, "learning_rate": 6.65457471038799e-07, "loss": 0.9712, "step": 6144 }, { "epoch": 0.8390223921354452, "grad_norm": 7.384527683258057, "learning_rate": 6.643557122149591e-07, "loss": 1.021, "step": 6145 }, { "epoch": 0.8391589295466958, "grad_norm": 7.595494270324707, "learning_rate": 6.632548012966678e-07, "loss": 0.9425, "step": 6146 }, { "epoch": 0.8392954669579464, "grad_norm": 5.12518835067749, "learning_rate": 6.621547384992244e-07, "loss": 0.9651, "step": 6147 }, { "epoch": 0.8394320043691972, "grad_norm": 6.505200386047363, "learning_rate": 6.610555240377653e-07, "loss": 0.939, "step": 6148 }, { "epoch": 0.8395685417804478, "grad_norm": 5.2892045974731445, "learning_rate": 6.599571581272573e-07, "loss": 0.9596, "step": 6149 }, { "epoch": 0.8397050791916986, "grad_norm": 6.420322895050049, "learning_rate": 6.588596409825065e-07, "loss": 0.8305, "step": 6150 }, { "epoch": 0.8398416166029492, "grad_norm": 8.236823081970215, "learning_rate": 6.577629728181479e-07, "loss": 1.061, "step": 6151 }, { "epoch": 0.8399781540141998, "grad_norm": 6.0547637939453125, "learning_rate": 6.566671538486552e-07, "loss": 0.8163, "step": 6152 }, { "epoch": 0.8401146914254506, "grad_norm": 6.772252082824707, "learning_rate": 6.555721842883306e-07, "loss": 1.0329, "step": 6153 }, { "epoch": 0.8402512288367012, "grad_norm": 6.026567459106445, "learning_rate": 6.54478064351316e-07, "loss": 0.8229, "step": 6154 }, { "epoch": 0.840387766247952, "grad_norm": 5.093323707580566, "learning_rate": 6.53384794251583e-07, "loss": 0.9012, "step": 6155 }, { "epoch": 0.8405243036592026, "grad_norm": 6.2133331298828125, "learning_rate": 6.522923742029374e-07, "loss": 1.0056, "step": 6156 }, { "epoch": 0.8406608410704534, "grad_norm": 5.771066188812256, "learning_rate": 6.512008044190215e-07, "loss": 0.9468, "step": 6157 }, { "epoch": 0.840797378481704, "grad_norm": 5.544821739196777, "learning_rate": 6.50110085113308e-07, "loss": 0.9416, "step": 6158 }, { "epoch": 0.8409339158929546, "grad_norm": 5.829370498657227, "learning_rate": 6.49020216499106e-07, "loss": 1.0814, "step": 6159 }, { "epoch": 0.8410704533042054, "grad_norm": 13.910468101501465, "learning_rate": 6.479311987895559e-07, "loss": 0.9314, "step": 6160 }, { "epoch": 0.841206990715456, "grad_norm": 7.195474624633789, "learning_rate": 6.468430321976338e-07, "loss": 1.014, "step": 6161 }, { "epoch": 0.8413435281267068, "grad_norm": 5.765955924987793, "learning_rate": 6.457557169361461e-07, "loss": 0.9787, "step": 6162 }, { "epoch": 0.8414800655379574, "grad_norm": 6.5870280265808105, "learning_rate": 6.446692532177379e-07, "loss": 1.0473, "step": 6163 }, { "epoch": 0.841616602949208, "grad_norm": 4.966012001037598, "learning_rate": 6.435836412548835e-07, "loss": 0.8971, "step": 6164 }, { "epoch": 0.8417531403604588, "grad_norm": 6.991256237030029, "learning_rate": 6.424988812598903e-07, "loss": 0.8977, "step": 6165 }, { "epoch": 0.8418896777717094, "grad_norm": 7.11104154586792, "learning_rate": 6.414149734449037e-07, "loss": 0.9282, "step": 6166 }, { "epoch": 0.8420262151829602, "grad_norm": 5.655010223388672, "learning_rate": 6.40331918021897e-07, "loss": 0.9251, "step": 6167 }, { "epoch": 0.8421627525942108, "grad_norm": 8.47278881072998, "learning_rate": 6.392497152026811e-07, "loss": 0.7716, "step": 6168 }, { "epoch": 0.8422992900054614, "grad_norm": 10.435815811157227, "learning_rate": 6.381683651988963e-07, "loss": 0.9084, "step": 6169 }, { "epoch": 0.8424358274167122, "grad_norm": 5.797774314880371, "learning_rate": 6.370878682220205e-07, "loss": 1.0807, "step": 6170 }, { "epoch": 0.8425723648279628, "grad_norm": 6.9679765701293945, "learning_rate": 6.360082244833599e-07, "loss": 0.8469, "step": 6171 }, { "epoch": 0.8427089022392136, "grad_norm": 6.223174095153809, "learning_rate": 6.349294341940593e-07, "loss": 0.8916, "step": 6172 }, { "epoch": 0.8428454396504642, "grad_norm": 5.373178005218506, "learning_rate": 6.338514975650912e-07, "loss": 0.9371, "step": 6173 }, { "epoch": 0.842981977061715, "grad_norm": 5.662460803985596, "learning_rate": 6.327744148072651e-07, "loss": 0.9131, "step": 6174 }, { "epoch": 0.8431185144729656, "grad_norm": 4.898725509643555, "learning_rate": 6.316981861312204e-07, "loss": 0.8166, "step": 6175 }, { "epoch": 0.8432550518842162, "grad_norm": 6.983028411865234, "learning_rate": 6.306228117474317e-07, "loss": 0.8253, "step": 6176 }, { "epoch": 0.843391589295467, "grad_norm": 5.705809116363525, "learning_rate": 6.295482918662066e-07, "loss": 0.9125, "step": 6177 }, { "epoch": 0.8435281267067176, "grad_norm": 5.968257904052734, "learning_rate": 6.284746266976832e-07, "loss": 0.8586, "step": 6178 }, { "epoch": 0.8436646641179684, "grad_norm": 6.035765647888184, "learning_rate": 6.27401816451837e-07, "loss": 0.9319, "step": 6179 }, { "epoch": 0.843801201529219, "grad_norm": 8.055394172668457, "learning_rate": 6.263298613384705e-07, "loss": 0.9919, "step": 6180 }, { "epoch": 0.8439377389404696, "grad_norm": 6.426917552947998, "learning_rate": 6.25258761567224e-07, "loss": 0.8392, "step": 6181 }, { "epoch": 0.8440742763517204, "grad_norm": 7.83585786819458, "learning_rate": 6.24188517347567e-07, "loss": 1.011, "step": 6182 }, { "epoch": 0.844210813762971, "grad_norm": 4.806547164916992, "learning_rate": 6.231191288888044e-07, "loss": 0.8999, "step": 6183 }, { "epoch": 0.8443473511742218, "grad_norm": 14.591889381408691, "learning_rate": 6.220505964000717e-07, "loss": 0.9885, "step": 6184 }, { "epoch": 0.8444838885854724, "grad_norm": 5.290890693664551, "learning_rate": 6.209829200903383e-07, "loss": 0.8935, "step": 6185 }, { "epoch": 0.8446204259967232, "grad_norm": 5.18202543258667, "learning_rate": 6.19916100168404e-07, "loss": 0.856, "step": 6186 }, { "epoch": 0.8447569634079738, "grad_norm": 6.375435829162598, "learning_rate": 6.188501368429045e-07, "loss": 0.9652, "step": 6187 }, { "epoch": 0.8448935008192244, "grad_norm": 8.380054473876953, "learning_rate": 6.177850303223059e-07, "loss": 0.9642, "step": 6188 }, { "epoch": 0.8450300382304752, "grad_norm": 6.034421443939209, "learning_rate": 6.167207808149056e-07, "loss": 0.802, "step": 6189 }, { "epoch": 0.8451665756417258, "grad_norm": 7.463138580322266, "learning_rate": 6.156573885288375e-07, "loss": 0.9408, "step": 6190 }, { "epoch": 0.8453031130529766, "grad_norm": 6.0779242515563965, "learning_rate": 6.14594853672062e-07, "loss": 0.9556, "step": 6191 }, { "epoch": 0.8454396504642272, "grad_norm": 8.211406707763672, "learning_rate": 6.135331764523783e-07, "loss": 0.8722, "step": 6192 }, { "epoch": 0.8455761878754778, "grad_norm": 5.11403751373291, "learning_rate": 6.124723570774116e-07, "loss": 0.8727, "step": 6193 }, { "epoch": 0.8457127252867286, "grad_norm": 11.17186450958252, "learning_rate": 6.114123957546248e-07, "loss": 0.8468, "step": 6194 }, { "epoch": 0.8458492626979792, "grad_norm": 11.5364990234375, "learning_rate": 6.103532926913097e-07, "loss": 0.9842, "step": 6195 }, { "epoch": 0.84598580010923, "grad_norm": 4.942721366882324, "learning_rate": 6.092950480945897e-07, "loss": 0.9642, "step": 6196 }, { "epoch": 0.8461223375204806, "grad_norm": 7.9674482345581055, "learning_rate": 6.082376621714242e-07, "loss": 1.0814, "step": 6197 }, { "epoch": 0.8462588749317312, "grad_norm": 6.834774017333984, "learning_rate": 6.071811351286005e-07, "loss": 0.889, "step": 6198 }, { "epoch": 0.846395412342982, "grad_norm": 6.710633277893066, "learning_rate": 6.061254671727402e-07, "loss": 0.903, "step": 6199 }, { "epoch": 0.8465319497542326, "grad_norm": 6.798560619354248, "learning_rate": 6.050706585102945e-07, "loss": 0.9629, "step": 6200 }, { "epoch": 0.8466684871654834, "grad_norm": 5.714810848236084, "learning_rate": 6.040167093475513e-07, "loss": 0.8663, "step": 6201 }, { "epoch": 0.846805024576734, "grad_norm": 9.800137519836426, "learning_rate": 6.029636198906247e-07, "loss": 1.0452, "step": 6202 }, { "epoch": 0.8469415619879848, "grad_norm": 6.404882431030273, "learning_rate": 6.019113903454654e-07, "loss": 0.8573, "step": 6203 }, { "epoch": 0.8470780993992354, "grad_norm": 10.232120513916016, "learning_rate": 6.008600209178539e-07, "loss": 0.7825, "step": 6204 }, { "epoch": 0.847214636810486, "grad_norm": 6.865848064422607, "learning_rate": 5.998095118134006e-07, "loss": 0.9466, "step": 6205 }, { "epoch": 0.8473511742217368, "grad_norm": 8.626975059509277, "learning_rate": 5.987598632375518e-07, "loss": 0.9397, "step": 6206 }, { "epoch": 0.8474877116329874, "grad_norm": 8.083962440490723, "learning_rate": 5.977110753955817e-07, "loss": 0.8531, "step": 6207 }, { "epoch": 0.8476242490442382, "grad_norm": 6.906410217285156, "learning_rate": 5.966631484925994e-07, "loss": 1.0387, "step": 6208 }, { "epoch": 0.8477607864554888, "grad_norm": 6.163895606994629, "learning_rate": 5.956160827335416e-07, "loss": 0.9543, "step": 6209 }, { "epoch": 0.8478973238667394, "grad_norm": 7.743660926818848, "learning_rate": 5.945698783231818e-07, "loss": 0.802, "step": 6210 }, { "epoch": 0.8480338612779902, "grad_norm": 6.731042861938477, "learning_rate": 5.935245354661212e-07, "loss": 0.781, "step": 6211 }, { "epoch": 0.8481703986892408, "grad_norm": 7.279829978942871, "learning_rate": 5.92480054366793e-07, "loss": 1.0542, "step": 6212 }, { "epoch": 0.8483069361004916, "grad_norm": 6.815313816070557, "learning_rate": 5.914364352294616e-07, "loss": 0.9237, "step": 6213 }, { "epoch": 0.8484434735117422, "grad_norm": 7.229732513427734, "learning_rate": 5.903936782582253e-07, "loss": 0.974, "step": 6214 }, { "epoch": 0.848580010922993, "grad_norm": 8.073955535888672, "learning_rate": 5.893517836570117e-07, "loss": 1.0243, "step": 6215 }, { "epoch": 0.8487165483342436, "grad_norm": 5.376828193664551, "learning_rate": 5.883107516295794e-07, "loss": 1.0479, "step": 6216 }, { "epoch": 0.8488530857454942, "grad_norm": 11.596636772155762, "learning_rate": 5.872705823795205e-07, "loss": 0.9729, "step": 6217 }, { "epoch": 0.848989623156745, "grad_norm": 6.9137749671936035, "learning_rate": 5.86231276110255e-07, "loss": 0.8341, "step": 6218 }, { "epoch": 0.8491261605679956, "grad_norm": 6.489985466003418, "learning_rate": 5.851928330250389e-07, "loss": 1.0691, "step": 6219 }, { "epoch": 0.8492626979792464, "grad_norm": 6.213458061218262, "learning_rate": 5.841552533269534e-07, "loss": 0.9691, "step": 6220 }, { "epoch": 0.849399235390497, "grad_norm": 6.445953369140625, "learning_rate": 5.831185372189163e-07, "loss": 0.9943, "step": 6221 }, { "epoch": 0.8495357728017476, "grad_norm": 5.788681507110596, "learning_rate": 5.820826849036732e-07, "loss": 0.8107, "step": 6222 }, { "epoch": 0.8496723102129984, "grad_norm": 4.929882526397705, "learning_rate": 5.81047696583803e-07, "loss": 0.9928, "step": 6223 }, { "epoch": 0.849808847624249, "grad_norm": 5.639886379241943, "learning_rate": 5.800135724617134e-07, "loss": 0.945, "step": 6224 }, { "epoch": 0.8499453850354998, "grad_norm": 6.0965142250061035, "learning_rate": 5.789803127396442e-07, "loss": 0.9993, "step": 6225 }, { "epoch": 0.8500819224467504, "grad_norm": 5.445735454559326, "learning_rate": 5.779479176196667e-07, "loss": 0.8608, "step": 6226 }, { "epoch": 0.850218459858001, "grad_norm": 5.670139312744141, "learning_rate": 5.769163873036803e-07, "loss": 0.9521, "step": 6227 }, { "epoch": 0.8503549972692518, "grad_norm": 4.5511555671691895, "learning_rate": 5.75885721993421e-07, "loss": 0.928, "step": 6228 }, { "epoch": 0.8504915346805024, "grad_norm": 5.467923164367676, "learning_rate": 5.748559218904493e-07, "loss": 0.7946, "step": 6229 }, { "epoch": 0.8506280720917532, "grad_norm": 5.3641581535339355, "learning_rate": 5.73826987196161e-07, "loss": 0.9951, "step": 6230 }, { "epoch": 0.8507646095030038, "grad_norm": 5.727939128875732, "learning_rate": 5.727989181117794e-07, "loss": 1.0042, "step": 6231 }, { "epoch": 0.8509011469142546, "grad_norm": 5.733470439910889, "learning_rate": 5.717717148383617e-07, "loss": 1.001, "step": 6232 }, { "epoch": 0.8510376843255052, "grad_norm": 8.524284362792969, "learning_rate": 5.70745377576793e-07, "loss": 0.8526, "step": 6233 }, { "epoch": 0.8511742217367558, "grad_norm": 6.911466598510742, "learning_rate": 5.697199065277914e-07, "loss": 0.8572, "step": 6234 }, { "epoch": 0.8513107591480066, "grad_norm": 5.206319808959961, "learning_rate": 5.686953018919034e-07, "loss": 0.8573, "step": 6235 }, { "epoch": 0.8514472965592572, "grad_norm": 5.9929585456848145, "learning_rate": 5.676715638695063e-07, "loss": 1.0422, "step": 6236 }, { "epoch": 0.851583833970508, "grad_norm": 5.499980449676514, "learning_rate": 5.666486926608111e-07, "loss": 0.8003, "step": 6237 }, { "epoch": 0.8517203713817586, "grad_norm": 5.453682899475098, "learning_rate": 5.656266884658551e-07, "loss": 0.9439, "step": 6238 }, { "epoch": 0.8518569087930092, "grad_norm": 6.310804843902588, "learning_rate": 5.646055514845083e-07, "loss": 0.9069, "step": 6239 }, { "epoch": 0.85199344620426, "grad_norm": 6.670948505401611, "learning_rate": 5.635852819164695e-07, "loss": 0.99, "step": 6240 }, { "epoch": 0.8521299836155106, "grad_norm": 5.3728742599487305, "learning_rate": 5.625658799612704e-07, "loss": 0.8978, "step": 6241 }, { "epoch": 0.8522665210267614, "grad_norm": 6.383598327636719, "learning_rate": 5.615473458182702e-07, "loss": 0.9524, "step": 6242 }, { "epoch": 0.852403058438012, "grad_norm": 6.090155601501465, "learning_rate": 5.605296796866616e-07, "loss": 0.823, "step": 6243 }, { "epoch": 0.8525395958492626, "grad_norm": 8.038678169250488, "learning_rate": 5.595128817654638e-07, "loss": 0.8377, "step": 6244 }, { "epoch": 0.8526761332605134, "grad_norm": 6.254675388336182, "learning_rate": 5.5849695225353e-07, "loss": 1.0591, "step": 6245 }, { "epoch": 0.852812670671764, "grad_norm": 5.508857250213623, "learning_rate": 5.574818913495411e-07, "loss": 0.9645, "step": 6246 }, { "epoch": 0.8529492080830148, "grad_norm": 6.805173873901367, "learning_rate": 5.564676992520068e-07, "loss": 0.869, "step": 6247 }, { "epoch": 0.8530857454942654, "grad_norm": 7.351226329803467, "learning_rate": 5.554543761592717e-07, "loss": 0.9404, "step": 6248 }, { "epoch": 0.8532222829055162, "grad_norm": 4.934357643127441, "learning_rate": 5.544419222695064e-07, "loss": 0.9702, "step": 6249 }, { "epoch": 0.8533588203167668, "grad_norm": 6.071849822998047, "learning_rate": 5.534303377807121e-07, "loss": 0.9969, "step": 6250 }, { "epoch": 0.8534953577280174, "grad_norm": 5.2313313484191895, "learning_rate": 5.524196228907203e-07, "loss": 0.7734, "step": 6251 }, { "epoch": 0.8536318951392682, "grad_norm": 5.3352251052856445, "learning_rate": 5.514097777971939e-07, "loss": 1.0148, "step": 6252 }, { "epoch": 0.8537684325505188, "grad_norm": 6.814446449279785, "learning_rate": 5.504008026976232e-07, "loss": 0.9058, "step": 6253 }, { "epoch": 0.8539049699617696, "grad_norm": 6.504136562347412, "learning_rate": 5.493926977893315e-07, "loss": 0.8857, "step": 6254 }, { "epoch": 0.8540415073730202, "grad_norm": 5.7828688621521, "learning_rate": 5.483854632694685e-07, "loss": 1.144, "step": 6255 }, { "epoch": 0.8541780447842708, "grad_norm": 6.501652240753174, "learning_rate": 5.473790993350153e-07, "loss": 0.9188, "step": 6256 }, { "epoch": 0.8543145821955216, "grad_norm": 5.44770622253418, "learning_rate": 5.463736061827834e-07, "loss": 0.7978, "step": 6257 }, { "epoch": 0.8544511196067722, "grad_norm": 5.539835453033447, "learning_rate": 5.453689840094123e-07, "loss": 0.9871, "step": 6258 }, { "epoch": 0.854587657018023, "grad_norm": 5.288881778717041, "learning_rate": 5.443652330113741e-07, "loss": 0.9686, "step": 6259 }, { "epoch": 0.8547241944292736, "grad_norm": 5.982603073120117, "learning_rate": 5.433623533849658e-07, "loss": 0.7963, "step": 6260 }, { "epoch": 0.8548607318405244, "grad_norm": 4.817323207855225, "learning_rate": 5.423603453263199e-07, "loss": 0.8164, "step": 6261 }, { "epoch": 0.854997269251775, "grad_norm": 5.5579423904418945, "learning_rate": 5.413592090313929e-07, "loss": 0.9822, "step": 6262 }, { "epoch": 0.8551338066630256, "grad_norm": 6.4277753829956055, "learning_rate": 5.403589446959745e-07, "loss": 0.9877, "step": 6263 }, { "epoch": 0.8552703440742764, "grad_norm": 6.150827407836914, "learning_rate": 5.393595525156809e-07, "loss": 0.9717, "step": 6264 }, { "epoch": 0.855406881485527, "grad_norm": 7.243844509124756, "learning_rate": 5.383610326859617e-07, "loss": 0.9069, "step": 6265 }, { "epoch": 0.8555434188967778, "grad_norm": 7.15041971206665, "learning_rate": 5.373633854020932e-07, "loss": 1.0468, "step": 6266 }, { "epoch": 0.8556799563080284, "grad_norm": 5.178158283233643, "learning_rate": 5.363666108591792e-07, "loss": 0.865, "step": 6267 }, { "epoch": 0.855816493719279, "grad_norm": 6.029728889465332, "learning_rate": 5.353707092521581e-07, "loss": 1.0633, "step": 6268 }, { "epoch": 0.8559530311305298, "grad_norm": 5.143277645111084, "learning_rate": 5.343756807757927e-07, "loss": 1.0238, "step": 6269 }, { "epoch": 0.8560895685417804, "grad_norm": 6.031569957733154, "learning_rate": 5.333815256246783e-07, "loss": 0.922, "step": 6270 }, { "epoch": 0.8562261059530312, "grad_norm": 6.411166191101074, "learning_rate": 5.323882439932365e-07, "loss": 0.7828, "step": 6271 }, { "epoch": 0.8563626433642818, "grad_norm": 24.556629180908203, "learning_rate": 5.313958360757215e-07, "loss": 0.987, "step": 6272 }, { "epoch": 0.8564991807755324, "grad_norm": 5.956310749053955, "learning_rate": 5.304043020662125e-07, "loss": 1.0124, "step": 6273 }, { "epoch": 0.8566357181867832, "grad_norm": 6.365175724029541, "learning_rate": 5.294136421586227e-07, "loss": 0.9377, "step": 6274 }, { "epoch": 0.8567722555980338, "grad_norm": 5.991331577301025, "learning_rate": 5.284238565466898e-07, "loss": 0.9034, "step": 6275 }, { "epoch": 0.8569087930092846, "grad_norm": 6.632915019989014, "learning_rate": 5.274349454239836e-07, "loss": 0.8518, "step": 6276 }, { "epoch": 0.8570453304205352, "grad_norm": 13.006275177001953, "learning_rate": 5.264469089839008e-07, "loss": 0.9088, "step": 6277 }, { "epoch": 0.857181867831786, "grad_norm": 6.542431831359863, "learning_rate": 5.254597474196671e-07, "loss": 0.7776, "step": 6278 }, { "epoch": 0.8573184052430366, "grad_norm": 8.139970779418945, "learning_rate": 5.244734609243407e-07, "loss": 0.7877, "step": 6279 }, { "epoch": 0.8574549426542872, "grad_norm": 6.850924968719482, "learning_rate": 5.234880496908029e-07, "loss": 0.7799, "step": 6280 }, { "epoch": 0.857591480065538, "grad_norm": 26.942588806152344, "learning_rate": 5.22503513911769e-07, "loss": 0.9326, "step": 6281 }, { "epoch": 0.8577280174767886, "grad_norm": 5.606826305389404, "learning_rate": 5.215198537797794e-07, "loss": 1.0268, "step": 6282 }, { "epoch": 0.8578645548880394, "grad_norm": 6.465070724487305, "learning_rate": 5.205370694872064e-07, "loss": 0.9797, "step": 6283 }, { "epoch": 0.85800109229929, "grad_norm": 8.217486381530762, "learning_rate": 5.195551612262478e-07, "loss": 1.1785, "step": 6284 }, { "epoch": 0.8581376297105406, "grad_norm": 6.103366374969482, "learning_rate": 5.185741291889334e-07, "loss": 0.9926, "step": 6285 }, { "epoch": 0.8582741671217914, "grad_norm": 5.7780609130859375, "learning_rate": 5.175939735671187e-07, "loss": 1.0697, "step": 6286 }, { "epoch": 0.858410704533042, "grad_norm": 6.6736979484558105, "learning_rate": 5.166146945524886e-07, "loss": 0.9373, "step": 6287 }, { "epoch": 0.8585472419442928, "grad_norm": 10.213388442993164, "learning_rate": 5.156362923365587e-07, "loss": 0.9726, "step": 6288 }, { "epoch": 0.8586837793555434, "grad_norm": 6.580459117889404, "learning_rate": 5.146587671106701e-07, "loss": 1.0781, "step": 6289 }, { "epoch": 0.8588203167667942, "grad_norm": 7.402894496917725, "learning_rate": 5.136821190659947e-07, "loss": 0.8634, "step": 6290 }, { "epoch": 0.8589568541780448, "grad_norm": 7.262302398681641, "learning_rate": 5.127063483935296e-07, "loss": 1.067, "step": 6291 }, { "epoch": 0.8590933915892954, "grad_norm": 4.4744977951049805, "learning_rate": 5.117314552841052e-07, "loss": 1.0322, "step": 6292 }, { "epoch": 0.8592299290005462, "grad_norm": 6.873678684234619, "learning_rate": 5.107574399283755e-07, "loss": 0.835, "step": 6293 }, { "epoch": 0.8593664664117968, "grad_norm": 5.556891441345215, "learning_rate": 5.097843025168275e-07, "loss": 0.8857, "step": 6294 }, { "epoch": 0.8595030038230476, "grad_norm": 17.822402954101562, "learning_rate": 5.088120432397725e-07, "loss": 0.858, "step": 6295 }, { "epoch": 0.8596395412342982, "grad_norm": 6.763442516326904, "learning_rate": 5.078406622873505e-07, "loss": 0.9333, "step": 6296 }, { "epoch": 0.8597760786455488, "grad_norm": 5.831841468811035, "learning_rate": 5.068701598495334e-07, "loss": 0.908, "step": 6297 }, { "epoch": 0.8599126160567996, "grad_norm": 7.375953674316406, "learning_rate": 5.059005361161157e-07, "loss": 0.8565, "step": 6298 }, { "epoch": 0.8600491534680502, "grad_norm": 7.387597560882568, "learning_rate": 5.04931791276726e-07, "loss": 0.8373, "step": 6299 }, { "epoch": 0.860185690879301, "grad_norm": 6.993311405181885, "learning_rate": 5.039639255208156e-07, "loss": 1.1292, "step": 6300 }, { "epoch": 0.8603222282905516, "grad_norm": 7.702173709869385, "learning_rate": 5.029969390376682e-07, "loss": 0.89, "step": 6301 }, { "epoch": 0.8604587657018022, "grad_norm": 10.024561882019043, "learning_rate": 5.020308320163935e-07, "loss": 1.0425, "step": 6302 }, { "epoch": 0.860595303113053, "grad_norm": 6.827041149139404, "learning_rate": 5.010656046459284e-07, "loss": 0.9026, "step": 6303 }, { "epoch": 0.8607318405243036, "grad_norm": 7.2027764320373535, "learning_rate": 5.001012571150382e-07, "loss": 0.8582, "step": 6304 }, { "epoch": 0.8608683779355544, "grad_norm": 5.1170244216918945, "learning_rate": 4.991377896123184e-07, "loss": 0.9363, "step": 6305 }, { "epoch": 0.861004915346805, "grad_norm": 7.483628273010254, "learning_rate": 4.981752023261905e-07, "loss": 1.1987, "step": 6306 }, { "epoch": 0.8611414527580558, "grad_norm": 5.400750637054443, "learning_rate": 4.972134954449021e-07, "loss": 0.8805, "step": 6307 }, { "epoch": 0.8612779901693064, "grad_norm": 7.047628879547119, "learning_rate": 4.962526691565333e-07, "loss": 1.0641, "step": 6308 }, { "epoch": 0.861414527580557, "grad_norm": 7.989495754241943, "learning_rate": 4.952927236489868e-07, "loss": 0.8702, "step": 6309 }, { "epoch": 0.8615510649918078, "grad_norm": 5.745270252227783, "learning_rate": 4.943336591099979e-07, "loss": 0.9397, "step": 6310 }, { "epoch": 0.8616876024030584, "grad_norm": 6.098723888397217, "learning_rate": 4.933754757271247e-07, "loss": 0.8546, "step": 6311 }, { "epoch": 0.8618241398143092, "grad_norm": 38.6884651184082, "learning_rate": 4.924181736877576e-07, "loss": 0.8764, "step": 6312 }, { "epoch": 0.8619606772255598, "grad_norm": 5.889823913574219, "learning_rate": 4.914617531791121e-07, "loss": 1.0297, "step": 6313 }, { "epoch": 0.8620972146368104, "grad_norm": 5.174919605255127, "learning_rate": 4.905062143882311e-07, "loss": 1.0382, "step": 6314 }, { "epoch": 0.8622337520480612, "grad_norm": 5.216951847076416, "learning_rate": 4.895515575019849e-07, "loss": 0.8551, "step": 6315 }, { "epoch": 0.8623702894593118, "grad_norm": 5.295199871063232, "learning_rate": 4.885977827070748e-07, "loss": 0.9006, "step": 6316 }, { "epoch": 0.8625068268705626, "grad_norm": 5.732710361480713, "learning_rate": 4.876448901900243e-07, "loss": 0.9824, "step": 6317 }, { "epoch": 0.8626433642818132, "grad_norm": 5.938845634460449, "learning_rate": 4.866928801371878e-07, "loss": 0.7696, "step": 6318 }, { "epoch": 0.8627799016930638, "grad_norm": 7.4895339012146, "learning_rate": 4.857417527347474e-07, "loss": 0.8609, "step": 6319 }, { "epoch": 0.8629164391043146, "grad_norm": 7.715083122253418, "learning_rate": 4.847915081687099e-07, "loss": 0.9062, "step": 6320 }, { "epoch": 0.8630529765155652, "grad_norm": 6.909310340881348, "learning_rate": 4.838421466249121e-07, "loss": 0.861, "step": 6321 }, { "epoch": 0.863189513926816, "grad_norm": 5.299135208129883, "learning_rate": 4.828936682890162e-07, "loss": 1.0311, "step": 6322 }, { "epoch": 0.8633260513380666, "grad_norm": 6.44394588470459, "learning_rate": 4.819460733465137e-07, "loss": 0.8644, "step": 6323 }, { "epoch": 0.8634625887493174, "grad_norm": 6.181793212890625, "learning_rate": 4.809993619827203e-07, "loss": 1.0523, "step": 6324 }, { "epoch": 0.863599126160568, "grad_norm": 11.505426406860352, "learning_rate": 4.800535343827834e-07, "loss": 0.8273, "step": 6325 }, { "epoch": 0.8637356635718186, "grad_norm": 5.280917167663574, "learning_rate": 4.791085907316728e-07, "loss": 0.9518, "step": 6326 }, { "epoch": 0.8638722009830694, "grad_norm": 7.803527355194092, "learning_rate": 4.781645312141886e-07, "loss": 0.9712, "step": 6327 }, { "epoch": 0.86400873839432, "grad_norm": 6.083357334136963, "learning_rate": 4.772213560149569e-07, "loss": 0.8621, "step": 6328 }, { "epoch": 0.8641452758055708, "grad_norm": 9.604944229125977, "learning_rate": 4.7627906531842904e-07, "loss": 0.9781, "step": 6329 }, { "epoch": 0.8642818132168214, "grad_norm": 6.131002902984619, "learning_rate": 4.7533765930888766e-07, "loss": 0.7995, "step": 6330 }, { "epoch": 0.864418350628072, "grad_norm": 5.932621479034424, "learning_rate": 4.7439713817043876e-07, "loss": 1.0183, "step": 6331 }, { "epoch": 0.8645548880393228, "grad_norm": 5.33446741104126, "learning_rate": 4.734575020870169e-07, "loss": 0.9453, "step": 6332 }, { "epoch": 0.8646914254505734, "grad_norm": 5.906506538391113, "learning_rate": 4.725187512423829e-07, "loss": 0.8543, "step": 6333 }, { "epoch": 0.8648279628618242, "grad_norm": 6.1058878898620605, "learning_rate": 4.715808858201254e-07, "loss": 1.0073, "step": 6334 }, { "epoch": 0.8649645002730748, "grad_norm": 8.003154754638672, "learning_rate": 4.706439060036577e-07, "loss": 1.0057, "step": 6335 }, { "epoch": 0.8651010376843256, "grad_norm": 5.550704479217529, "learning_rate": 4.697078119762233e-07, "loss": 0.8003, "step": 6336 }, { "epoch": 0.8652375750955762, "grad_norm": 7.283102989196777, "learning_rate": 4.687726039208895e-07, "loss": 1.0167, "step": 6337 }, { "epoch": 0.8653741125068268, "grad_norm": 4.968751907348633, "learning_rate": 4.678382820205507e-07, "loss": 0.9342, "step": 6338 }, { "epoch": 0.8655106499180776, "grad_norm": 5.693424701690674, "learning_rate": 4.6690484645793066e-07, "loss": 1.0093, "step": 6339 }, { "epoch": 0.8656471873293282, "grad_norm": 7.172761917114258, "learning_rate": 4.659722974155767e-07, "loss": 0.8511, "step": 6340 }, { "epoch": 0.865783724740579, "grad_norm": 5.674288272857666, "learning_rate": 4.6504063507586404e-07, "loss": 0.9525, "step": 6341 }, { "epoch": 0.8659202621518296, "grad_norm": 6.397520542144775, "learning_rate": 4.641098596209931e-07, "loss": 0.9361, "step": 6342 }, { "epoch": 0.8660567995630802, "grad_norm": 6.906615734100342, "learning_rate": 4.631799712329937e-07, "loss": 1.1432, "step": 6343 }, { "epoch": 0.866193336974331, "grad_norm": 6.313215255737305, "learning_rate": 4.6225097009372e-07, "loss": 0.8803, "step": 6344 }, { "epoch": 0.8663298743855816, "grad_norm": 10.460848808288574, "learning_rate": 4.6132285638485375e-07, "loss": 0.925, "step": 6345 }, { "epoch": 0.8664664117968324, "grad_norm": 6.837290287017822, "learning_rate": 4.603956302879026e-07, "loss": 0.9981, "step": 6346 }, { "epoch": 0.866602949208083, "grad_norm": 6.1032819747924805, "learning_rate": 4.5946929198419923e-07, "loss": 0.88, "step": 6347 }, { "epoch": 0.8667394866193336, "grad_norm": 5.902693271636963, "learning_rate": 4.5854384165490596e-07, "loss": 0.9276, "step": 6348 }, { "epoch": 0.8668760240305844, "grad_norm": 7.210457801818848, "learning_rate": 4.5761927948100806e-07, "loss": 1.0191, "step": 6349 }, { "epoch": 0.867012561441835, "grad_norm": 6.690125942230225, "learning_rate": 4.5669560564332025e-07, "loss": 0.919, "step": 6350 }, { "epoch": 0.8671490988530858, "grad_norm": 6.462133407592773, "learning_rate": 4.557728203224804e-07, "loss": 0.9236, "step": 6351 }, { "epoch": 0.8672856362643364, "grad_norm": 5.592453956604004, "learning_rate": 4.548509236989562e-07, "loss": 0.9544, "step": 6352 }, { "epoch": 0.8674221736755872, "grad_norm": 24.01266860961914, "learning_rate": 4.539299159530375e-07, "loss": 0.903, "step": 6353 }, { "epoch": 0.8675587110868378, "grad_norm": 6.653442859649658, "learning_rate": 4.5300979726484286e-07, "loss": 0.9984, "step": 6354 }, { "epoch": 0.8676952484980884, "grad_norm": 7.889250755310059, "learning_rate": 4.5209056781431615e-07, "loss": 0.9849, "step": 6355 }, { "epoch": 0.8678317859093392, "grad_norm": 6.143606662750244, "learning_rate": 4.511722277812286e-07, "loss": 0.9726, "step": 6356 }, { "epoch": 0.8679683233205898, "grad_norm": 5.655711650848389, "learning_rate": 4.50254777345176e-07, "loss": 1.0024, "step": 6357 }, { "epoch": 0.8681048607318406, "grad_norm": 9.728277206420898, "learning_rate": 4.493382166855792e-07, "loss": 0.9383, "step": 6358 }, { "epoch": 0.8682413981430912, "grad_norm": 5.800633907318115, "learning_rate": 4.4842254598168875e-07, "loss": 0.8573, "step": 6359 }, { "epoch": 0.8683779355543418, "grad_norm": 7.768548011779785, "learning_rate": 4.47507765412577e-07, "loss": 0.8338, "step": 6360 }, { "epoch": 0.8685144729655926, "grad_norm": 10.460402488708496, "learning_rate": 4.465938751571464e-07, "loss": 1.0739, "step": 6361 }, { "epoch": 0.8686510103768432, "grad_norm": 5.589339256286621, "learning_rate": 4.456808753941205e-07, "loss": 0.9706, "step": 6362 }, { "epoch": 0.868787547788094, "grad_norm": 5.434919357299805, "learning_rate": 4.4476876630205333e-07, "loss": 0.9844, "step": 6363 }, { "epoch": 0.8689240851993446, "grad_norm": 5.248823165893555, "learning_rate": 4.43857548059321e-07, "loss": 0.8655, "step": 6364 }, { "epoch": 0.8690606226105954, "grad_norm": 9.601181030273438, "learning_rate": 4.429472208441282e-07, "loss": 0.9385, "step": 6365 }, { "epoch": 0.869197160021846, "grad_norm": 7.216343879699707, "learning_rate": 4.4203778483450423e-07, "loss": 1.0212, "step": 6366 }, { "epoch": 0.8693336974330966, "grad_norm": 6.360830307006836, "learning_rate": 4.4112924020830285e-07, "loss": 1.0437, "step": 6367 }, { "epoch": 0.8694702348443474, "grad_norm": 4.65792179107666, "learning_rate": 4.4022158714320584e-07, "loss": 0.9904, "step": 6368 }, { "epoch": 0.869606772255598, "grad_norm": 5.618222236633301, "learning_rate": 4.393148258167179e-07, "loss": 1.0363, "step": 6369 }, { "epoch": 0.8697433096668488, "grad_norm": 6.224900722503662, "learning_rate": 4.384089564061728e-07, "loss": 0.9093, "step": 6370 }, { "epoch": 0.8698798470780994, "grad_norm": 7.888637065887451, "learning_rate": 4.3750397908872646e-07, "loss": 0.8894, "step": 6371 }, { "epoch": 0.87001638448935, "grad_norm": 15.959565162658691, "learning_rate": 4.365998940413629e-07, "loss": 0.9241, "step": 6372 }, { "epoch": 0.8701529219006008, "grad_norm": 6.122472763061523, "learning_rate": 4.3569670144089025e-07, "loss": 0.9495, "step": 6373 }, { "epoch": 0.8702894593118514, "grad_norm": 5.608402729034424, "learning_rate": 4.347944014639427e-07, "loss": 1.0622, "step": 6374 }, { "epoch": 0.8704259967231022, "grad_norm": 12.652383804321289, "learning_rate": 4.338929942869791e-07, "loss": 1.057, "step": 6375 }, { "epoch": 0.8705625341343528, "grad_norm": 6.649057865142822, "learning_rate": 4.3299248008628503e-07, "loss": 0.9108, "step": 6376 }, { "epoch": 0.8706990715456034, "grad_norm": 5.681212425231934, "learning_rate": 4.3209285903797084e-07, "loss": 0.8943, "step": 6377 }, { "epoch": 0.8708356089568542, "grad_norm": 7.815115451812744, "learning_rate": 4.311941313179707e-07, "loss": 0.9263, "step": 6378 }, { "epoch": 0.8709721463681048, "grad_norm": 6.5339155197143555, "learning_rate": 4.3029629710204634e-07, "loss": 0.9286, "step": 6379 }, { "epoch": 0.8711086837793556, "grad_norm": 6.108555793762207, "learning_rate": 4.293993565657828e-07, "loss": 0.979, "step": 6380 }, { "epoch": 0.8712452211906062, "grad_norm": 5.8952956199646, "learning_rate": 4.2850330988459255e-07, "loss": 0.9049, "step": 6381 }, { "epoch": 0.871381758601857, "grad_norm": 6.185424327850342, "learning_rate": 4.27608157233711e-07, "loss": 0.873, "step": 6382 }, { "epoch": 0.8715182960131076, "grad_norm": 5.827213764190674, "learning_rate": 4.2671389878820137e-07, "loss": 0.9593, "step": 6383 }, { "epoch": 0.8716548334243582, "grad_norm": 7.316529750823975, "learning_rate": 4.2582053472294825e-07, "loss": 0.9724, "step": 6384 }, { "epoch": 0.871791370835609, "grad_norm": 5.438082218170166, "learning_rate": 4.2492806521266584e-07, "loss": 0.8791, "step": 6385 }, { "epoch": 0.8719279082468596, "grad_norm": 5.961911678314209, "learning_rate": 4.2403649043188945e-07, "loss": 1.0049, "step": 6386 }, { "epoch": 0.8720644456581104, "grad_norm": 5.678912162780762, "learning_rate": 4.231458105549807e-07, "loss": 0.946, "step": 6387 }, { "epoch": 0.872200983069361, "grad_norm": 4.2851057052612305, "learning_rate": 4.222560257561276e-07, "loss": 0.9502, "step": 6388 }, { "epoch": 0.8723375204806116, "grad_norm": 5.894918441772461, "learning_rate": 4.2136713620934076e-07, "loss": 0.9359, "step": 6389 }, { "epoch": 0.8724740578918624, "grad_norm": 7.037126064300537, "learning_rate": 4.2047914208845854e-07, "loss": 0.9216, "step": 6390 }, { "epoch": 0.872610595303113, "grad_norm": 5.764901161193848, "learning_rate": 4.195920435671419e-07, "loss": 0.9908, "step": 6391 }, { "epoch": 0.8727471327143638, "grad_norm": 5.8066911697387695, "learning_rate": 4.187058408188771e-07, "loss": 0.8347, "step": 6392 }, { "epoch": 0.8728836701256144, "grad_norm": 8.82292366027832, "learning_rate": 4.1782053401697496e-07, "loss": 1.0541, "step": 6393 }, { "epoch": 0.8730202075368652, "grad_norm": 6.738858699798584, "learning_rate": 4.1693612333457255e-07, "loss": 0.9927, "step": 6394 }, { "epoch": 0.8731567449481158, "grad_norm": 5.788300514221191, "learning_rate": 4.1605260894462984e-07, "loss": 0.7593, "step": 6395 }, { "epoch": 0.8732932823593664, "grad_norm": 4.9138665199279785, "learning_rate": 4.151699910199336e-07, "loss": 0.9309, "step": 6396 }, { "epoch": 0.8734298197706172, "grad_norm": 5.214917182922363, "learning_rate": 4.14288269733093e-07, "loss": 0.9104, "step": 6397 }, { "epoch": 0.8735663571818678, "grad_norm": 6.915849685668945, "learning_rate": 4.1340744525654287e-07, "loss": 0.9104, "step": 6398 }, { "epoch": 0.8737028945931186, "grad_norm": 5.143849849700928, "learning_rate": 4.1252751776254373e-07, "loss": 0.8855, "step": 6399 }, { "epoch": 0.8738394320043692, "grad_norm": 8.439248085021973, "learning_rate": 4.1164848742317853e-07, "loss": 0.8964, "step": 6400 }, { "epoch": 0.8739759694156198, "grad_norm": 10.900763511657715, "learning_rate": 4.1077035441035695e-07, "loss": 0.9898, "step": 6401 }, { "epoch": 0.8741125068268706, "grad_norm": 7.315414905548096, "learning_rate": 4.098931188958111e-07, "loss": 0.9419, "step": 6402 }, { "epoch": 0.8742490442381212, "grad_norm": 6.894789218902588, "learning_rate": 4.090167810511003e-07, "loss": 0.9519, "step": 6403 }, { "epoch": 0.874385581649372, "grad_norm": 9.525827407836914, "learning_rate": 4.0814134104760483e-07, "loss": 0.8914, "step": 6404 }, { "epoch": 0.8745221190606226, "grad_norm": 5.812859535217285, "learning_rate": 4.072667990565321e-07, "loss": 0.9177, "step": 6405 }, { "epoch": 0.8746586564718732, "grad_norm": 6.74946403503418, "learning_rate": 4.063931552489131e-07, "loss": 0.7597, "step": 6406 }, { "epoch": 0.874795193883124, "grad_norm": 5.361668109893799, "learning_rate": 4.0552040979560116e-07, "loss": 1.1013, "step": 6407 }, { "epoch": 0.8749317312943746, "grad_norm": 6.328123569488525, "learning_rate": 4.046485628672786e-07, "loss": 0.9516, "step": 6408 }, { "epoch": 0.8750682687056254, "grad_norm": 6.653213024139404, "learning_rate": 4.0377761463444685e-07, "loss": 0.7522, "step": 6409 }, { "epoch": 0.875204806116876, "grad_norm": 5.651281833648682, "learning_rate": 4.0290756526743637e-07, "loss": 1.0737, "step": 6410 }, { "epoch": 0.8753413435281268, "grad_norm": 5.686330318450928, "learning_rate": 4.020384149363965e-07, "loss": 0.9475, "step": 6411 }, { "epoch": 0.8754778809393774, "grad_norm": 6.1471381187438965, "learning_rate": 4.0117016381130636e-07, "loss": 0.9448, "step": 6412 }, { "epoch": 0.875614418350628, "grad_norm": 9.21238899230957, "learning_rate": 4.003028120619651e-07, "loss": 0.9972, "step": 6413 }, { "epoch": 0.8757509557618788, "grad_norm": 7.090991497039795, "learning_rate": 3.994363598579981e-07, "loss": 0.9236, "step": 6414 }, { "epoch": 0.8758874931731294, "grad_norm": 5.329564571380615, "learning_rate": 3.985708073688532e-07, "loss": 0.8206, "step": 6415 }, { "epoch": 0.8760240305843802, "grad_norm": 9.698780059814453, "learning_rate": 3.977061547638045e-07, "loss": 0.9391, "step": 6416 }, { "epoch": 0.8761605679956308, "grad_norm": 13.433427810668945, "learning_rate": 3.9684240221194827e-07, "loss": 0.8831, "step": 6417 }, { "epoch": 0.8762971054068814, "grad_norm": 47.7511100769043, "learning_rate": 3.9597954988220555e-07, "loss": 0.8532, "step": 6418 }, { "epoch": 0.8764336428181322, "grad_norm": 5.94598913192749, "learning_rate": 3.951175979433203e-07, "loss": 1.1139, "step": 6419 }, { "epoch": 0.8765701802293828, "grad_norm": 13.391172409057617, "learning_rate": 3.9425654656386094e-07, "loss": 0.8895, "step": 6420 }, { "epoch": 0.8767067176406336, "grad_norm": 5.502480983734131, "learning_rate": 3.933963959122217e-07, "loss": 1.0121, "step": 6421 }, { "epoch": 0.8768432550518842, "grad_norm": 7.147122383117676, "learning_rate": 3.925371461566174e-07, "loss": 1.0137, "step": 6422 }, { "epoch": 0.8769797924631348, "grad_norm": 5.505691051483154, "learning_rate": 3.916787974650893e-07, "loss": 0.9509, "step": 6423 }, { "epoch": 0.8771163298743856, "grad_norm": 6.5338311195373535, "learning_rate": 3.908213500055008e-07, "loss": 0.9779, "step": 6424 }, { "epoch": 0.8772528672856362, "grad_norm": 5.9932732582092285, "learning_rate": 3.8996480394554006e-07, "loss": 0.9254, "step": 6425 }, { "epoch": 0.877389404696887, "grad_norm": 4.583036422729492, "learning_rate": 3.891091594527191e-07, "loss": 0.9465, "step": 6426 }, { "epoch": 0.8775259421081376, "grad_norm": 4.88400936126709, "learning_rate": 3.882544166943708e-07, "loss": 0.8494, "step": 6427 }, { "epoch": 0.8776624795193884, "grad_norm": 7.585082054138184, "learning_rate": 3.87400575837657e-07, "loss": 0.9725, "step": 6428 }, { "epoch": 0.877799016930639, "grad_norm": 7.977452754974365, "learning_rate": 3.8654763704955744e-07, "loss": 0.8085, "step": 6429 }, { "epoch": 0.8779355543418896, "grad_norm": 6.696411609649658, "learning_rate": 3.8569560049688037e-07, "loss": 0.9133, "step": 6430 }, { "epoch": 0.8780720917531404, "grad_norm": 6.217384338378906, "learning_rate": 3.8484446634625415e-07, "loss": 0.9027, "step": 6431 }, { "epoch": 0.878208629164391, "grad_norm": 6.016598701477051, "learning_rate": 3.8399423476413234e-07, "loss": 0.8928, "step": 6432 }, { "epoch": 0.8783451665756418, "grad_norm": 5.800785064697266, "learning_rate": 3.8314490591678964e-07, "loss": 0.9218, "step": 6433 }, { "epoch": 0.8784817039868924, "grad_norm": 5.4834771156311035, "learning_rate": 3.8229647997032935e-07, "loss": 1.0456, "step": 6434 }, { "epoch": 0.878618241398143, "grad_norm": 7.123562812805176, "learning_rate": 3.8144895709067206e-07, "loss": 0.7872, "step": 6435 }, { "epoch": 0.8787547788093938, "grad_norm": 4.605392932891846, "learning_rate": 3.8060233744356634e-07, "loss": 0.9969, "step": 6436 }, { "epoch": 0.8788913162206444, "grad_norm": 5.3556108474731445, "learning_rate": 3.7975662119458245e-07, "loss": 0.9326, "step": 6437 }, { "epoch": 0.8790278536318952, "grad_norm": 7.577620506286621, "learning_rate": 3.78911808509112e-07, "loss": 0.9774, "step": 6438 }, { "epoch": 0.8791643910431458, "grad_norm": 5.88073205947876, "learning_rate": 3.78067899552374e-07, "loss": 1.0433, "step": 6439 }, { "epoch": 0.8793009284543966, "grad_norm": 6.045743942260742, "learning_rate": 3.772248944894069e-07, "loss": 1.1126, "step": 6440 }, { "epoch": 0.8794374658656472, "grad_norm": 5.722362041473389, "learning_rate": 3.7638279348507555e-07, "loss": 0.8136, "step": 6441 }, { "epoch": 0.8795740032768978, "grad_norm": 6.436522006988525, "learning_rate": 3.7554159670406544e-07, "loss": 0.9563, "step": 6442 }, { "epoch": 0.8797105406881486, "grad_norm": 8.595789909362793, "learning_rate": 3.7470130431088656e-07, "loss": 0.8832, "step": 6443 }, { "epoch": 0.8798470780993992, "grad_norm": 6.710079669952393, "learning_rate": 3.7386191646987094e-07, "loss": 0.944, "step": 6444 }, { "epoch": 0.87998361551065, "grad_norm": 5.401512622833252, "learning_rate": 3.73023433345176e-07, "loss": 1.1892, "step": 6445 }, { "epoch": 0.8801201529219006, "grad_norm": 22.56553840637207, "learning_rate": 3.7218585510077844e-07, "loss": 0.9835, "step": 6446 }, { "epoch": 0.8802566903331512, "grad_norm": 5.752763271331787, "learning_rate": 3.713491819004833e-07, "loss": 1.0543, "step": 6447 }, { "epoch": 0.880393227744402, "grad_norm": 5.886919975280762, "learning_rate": 3.7051341390791365e-07, "loss": 0.9048, "step": 6448 }, { "epoch": 0.8805297651556526, "grad_norm": 8.037768363952637, "learning_rate": 3.69678551286517e-07, "loss": 0.9685, "step": 6449 }, { "epoch": 0.8806663025669034, "grad_norm": 5.613559246063232, "learning_rate": 3.688445941995661e-07, "loss": 0.89, "step": 6450 }, { "epoch": 0.880802839978154, "grad_norm": 6.549896240234375, "learning_rate": 3.680115428101527e-07, "loss": 1.0042, "step": 6451 }, { "epoch": 0.8809393773894046, "grad_norm": 10.427637100219727, "learning_rate": 3.671793972811954e-07, "loss": 0.9856, "step": 6452 }, { "epoch": 0.8810759148006554, "grad_norm": 5.985243320465088, "learning_rate": 3.6634815777543186e-07, "loss": 1.0938, "step": 6453 }, { "epoch": 0.881212452211906, "grad_norm": 6.152195453643799, "learning_rate": 3.6551782445542696e-07, "loss": 0.872, "step": 6454 }, { "epoch": 0.8813489896231568, "grad_norm": 8.112650871276855, "learning_rate": 3.646883974835641e-07, "loss": 0.9723, "step": 6455 }, { "epoch": 0.8814855270344074, "grad_norm": 6.353419303894043, "learning_rate": 3.638598770220514e-07, "loss": 0.9707, "step": 6456 }, { "epoch": 0.8816220644456582, "grad_norm": 6.4555888175964355, "learning_rate": 3.6303226323291976e-07, "loss": 0.9484, "step": 6457 }, { "epoch": 0.8817586018569088, "grad_norm": 8.427528381347656, "learning_rate": 3.6220555627802076e-07, "loss": 1.0161, "step": 6458 }, { "epoch": 0.8818951392681594, "grad_norm": 5.91949987411499, "learning_rate": 3.6137975631903357e-07, "loss": 1.0882, "step": 6459 }, { "epoch": 0.8820316766794102, "grad_norm": 4.563174247741699, "learning_rate": 3.6055486351745327e-07, "loss": 0.9157, "step": 6460 }, { "epoch": 0.8821682140906608, "grad_norm": 6.372862339019775, "learning_rate": 3.597308780346043e-07, "loss": 1.007, "step": 6461 }, { "epoch": 0.8823047515019116, "grad_norm": 5.198858261108398, "learning_rate": 3.5890780003162815e-07, "loss": 0.993, "step": 6462 }, { "epoch": 0.8824412889131622, "grad_norm": 7.951730251312256, "learning_rate": 3.580856296694923e-07, "loss": 0.9779, "step": 6463 }, { "epoch": 0.8825778263244128, "grad_norm": 4.59386682510376, "learning_rate": 3.5726436710898426e-07, "loss": 0.8865, "step": 6464 }, { "epoch": 0.8827143637356636, "grad_norm": 6.340750217437744, "learning_rate": 3.564440125107166e-07, "loss": 0.9611, "step": 6465 }, { "epoch": 0.8828509011469142, "grad_norm": 6.258773326873779, "learning_rate": 3.5562456603512206e-07, "loss": 1.0104, "step": 6466 }, { "epoch": 0.882987438558165, "grad_norm": 9.62854290008545, "learning_rate": 3.54806027842457e-07, "loss": 0.8407, "step": 6467 }, { "epoch": 0.8831239759694156, "grad_norm": 4.447173595428467, "learning_rate": 3.539883980928005e-07, "loss": 0.9982, "step": 6468 }, { "epoch": 0.8832605133806664, "grad_norm": 7.025568008422852, "learning_rate": 3.531716769460525e-07, "loss": 0.7995, "step": 6469 }, { "epoch": 0.883397050791917, "grad_norm": 7.316929340362549, "learning_rate": 3.5235586456193573e-07, "loss": 0.8474, "step": 6470 }, { "epoch": 0.8835335882031676, "grad_norm": 8.21718692779541, "learning_rate": 3.5154096109999534e-07, "loss": 1.1784, "step": 6471 }, { "epoch": 0.8836701256144184, "grad_norm": 6.567658424377441, "learning_rate": 3.507269667196006e-07, "loss": 0.8116, "step": 6472 }, { "epoch": 0.883806663025669, "grad_norm": 5.969086170196533, "learning_rate": 3.4991388157993967e-07, "loss": 0.9453, "step": 6473 }, { "epoch": 0.8839432004369198, "grad_norm": 6.150145530700684, "learning_rate": 3.4910170584002535e-07, "loss": 0.8141, "step": 6474 }, { "epoch": 0.8840797378481704, "grad_norm": 7.057607173919678, "learning_rate": 3.4829043965869116e-07, "loss": 0.9154, "step": 6475 }, { "epoch": 0.884216275259421, "grad_norm": 12.811161041259766, "learning_rate": 3.4748008319459457e-07, "loss": 0.9108, "step": 6476 }, { "epoch": 0.8843528126706718, "grad_norm": 5.860852241516113, "learning_rate": 3.466706366062128e-07, "loss": 0.8543, "step": 6477 }, { "epoch": 0.8844893500819224, "grad_norm": 5.956218719482422, "learning_rate": 3.458621000518458e-07, "loss": 0.9123, "step": 6478 }, { "epoch": 0.8846258874931732, "grad_norm": 9.198786735534668, "learning_rate": 3.4505447368961776e-07, "loss": 1.0843, "step": 6479 }, { "epoch": 0.8847624249044238, "grad_norm": 6.627330303192139, "learning_rate": 3.4424775767747166e-07, "loss": 0.9083, "step": 6480 }, { "epoch": 0.8848989623156744, "grad_norm": 5.708579063415527, "learning_rate": 3.4344195217317467e-07, "loss": 0.9295, "step": 6481 }, { "epoch": 0.8850354997269252, "grad_norm": 6.06371545791626, "learning_rate": 3.426370573343152e-07, "loss": 0.9904, "step": 6482 }, { "epoch": 0.8851720371381758, "grad_norm": 7.492867469787598, "learning_rate": 3.418330733183028e-07, "loss": 0.9832, "step": 6483 }, { "epoch": 0.8853085745494266, "grad_norm": 6.057971477508545, "learning_rate": 3.410300002823691e-07, "loss": 0.9795, "step": 6484 }, { "epoch": 0.8854451119606772, "grad_norm": 13.919281959533691, "learning_rate": 3.4022783838357e-07, "loss": 1.0887, "step": 6485 }, { "epoch": 0.885581649371928, "grad_norm": 5.978420734405518, "learning_rate": 3.394265877787789e-07, "loss": 1.0082, "step": 6486 }, { "epoch": 0.8857181867831786, "grad_norm": 7.745957374572754, "learning_rate": 3.386262486246955e-07, "loss": 0.9134, "step": 6487 }, { "epoch": 0.8858547241944292, "grad_norm": 7.632265567779541, "learning_rate": 3.3782682107783794e-07, "loss": 0.9015, "step": 6488 }, { "epoch": 0.88599126160568, "grad_norm": 5.462253093719482, "learning_rate": 3.370283052945472e-07, "loss": 0.872, "step": 6489 }, { "epoch": 0.8861277990169306, "grad_norm": 5.3937201499938965, "learning_rate": 3.362307014309868e-07, "loss": 0.9504, "step": 6490 }, { "epoch": 0.8862643364281814, "grad_norm": 13.824981689453125, "learning_rate": 3.3543400964313964e-07, "loss": 0.943, "step": 6491 }, { "epoch": 0.886400873839432, "grad_norm": 6.626574993133545, "learning_rate": 3.346382300868134e-07, "loss": 1.2182, "step": 6492 }, { "epoch": 0.8865374112506826, "grad_norm": 10.846844673156738, "learning_rate": 3.338433629176341e-07, "loss": 1.0597, "step": 6493 }, { "epoch": 0.8866739486619334, "grad_norm": 9.220710754394531, "learning_rate": 3.330494082910529e-07, "loss": 0.97, "step": 6494 }, { "epoch": 0.886810486073184, "grad_norm": 12.650090217590332, "learning_rate": 3.3225636636233903e-07, "loss": 1.0473, "step": 6495 }, { "epoch": 0.8869470234844348, "grad_norm": 6.513047695159912, "learning_rate": 3.3146423728658507e-07, "loss": 0.9637, "step": 6496 }, { "epoch": 0.8870835608956854, "grad_norm": 5.750511646270752, "learning_rate": 3.3067302121870426e-07, "loss": 0.892, "step": 6497 }, { "epoch": 0.8872200983069362, "grad_norm": 5.943047523498535, "learning_rate": 3.298827183134318e-07, "loss": 1.024, "step": 6498 }, { "epoch": 0.8873566357181868, "grad_norm": 5.772302627563477, "learning_rate": 3.2909332872532505e-07, "loss": 0.952, "step": 6499 }, { "epoch": 0.8874931731294374, "grad_norm": 7.631951332092285, "learning_rate": 3.2830485260876064e-07, "loss": 0.9389, "step": 6500 }, { "epoch": 0.8876297105406882, "grad_norm": 5.470414638519287, "learning_rate": 3.275172901179396e-07, "loss": 1.0409, "step": 6501 }, { "epoch": 0.8877662479519388, "grad_norm": 10.451034545898438, "learning_rate": 3.26730641406881e-07, "loss": 0.9511, "step": 6502 }, { "epoch": 0.8879027853631896, "grad_norm": 6.3253092765808105, "learning_rate": 3.259449066294279e-07, "loss": 0.9536, "step": 6503 }, { "epoch": 0.8880393227744402, "grad_norm": 4.445725917816162, "learning_rate": 3.251600859392423e-07, "loss": 0.8257, "step": 6504 }, { "epoch": 0.8881758601856908, "grad_norm": 5.450277805328369, "learning_rate": 3.2437617948980983e-07, "loss": 0.9642, "step": 6505 }, { "epoch": 0.8883123975969416, "grad_norm": 6.1704936027526855, "learning_rate": 3.235931874344356e-07, "loss": 0.8798, "step": 6506 }, { "epoch": 0.8884489350081922, "grad_norm": 5.506032466888428, "learning_rate": 3.22811109926246e-07, "loss": 0.8056, "step": 6507 }, { "epoch": 0.888585472419443, "grad_norm": 8.042261123657227, "learning_rate": 3.220299471181898e-07, "loss": 0.929, "step": 6508 }, { "epoch": 0.8887220098306936, "grad_norm": 5.803826808929443, "learning_rate": 3.2124969916303427e-07, "loss": 0.8629, "step": 6509 }, { "epoch": 0.8888585472419442, "grad_norm": 6.534225940704346, "learning_rate": 3.204703662133724e-07, "loss": 0.9224, "step": 6510 }, { "epoch": 0.888995084653195, "grad_norm": 14.570290565490723, "learning_rate": 3.1969194842161213e-07, "loss": 0.9022, "step": 6511 }, { "epoch": 0.8891316220644456, "grad_norm": 7.568376541137695, "learning_rate": 3.18914445939989e-07, "loss": 1.0862, "step": 6512 }, { "epoch": 0.8892681594756964, "grad_norm": 6.242584228515625, "learning_rate": 3.1813785892055305e-07, "loss": 0.9176, "step": 6513 }, { "epoch": 0.889404696886947, "grad_norm": 10.999967575073242, "learning_rate": 3.1736218751518113e-07, "loss": 1.052, "step": 6514 }, { "epoch": 0.8895412342981978, "grad_norm": 10.691555976867676, "learning_rate": 3.1658743187556626e-07, "loss": 0.9498, "step": 6515 }, { "epoch": 0.8896777717094484, "grad_norm": 7.998223304748535, "learning_rate": 3.158135921532268e-07, "loss": 1.0703, "step": 6516 }, { "epoch": 0.889814309120699, "grad_norm": 7.154487609863281, "learning_rate": 3.1504066849949775e-07, "loss": 0.8697, "step": 6517 }, { "epoch": 0.8899508465319498, "grad_norm": 7.491081237792969, "learning_rate": 3.1426866106553656e-07, "loss": 1.0347, "step": 6518 }, { "epoch": 0.8900873839432004, "grad_norm": 6.669404029846191, "learning_rate": 3.1349757000232407e-07, "loss": 0.8945, "step": 6519 }, { "epoch": 0.8902239213544512, "grad_norm": 9.4568510055542, "learning_rate": 3.1272739546065743e-07, "loss": 1.0645, "step": 6520 }, { "epoch": 0.8903604587657018, "grad_norm": 6.381989479064941, "learning_rate": 3.119581375911579e-07, "loss": 1.022, "step": 6521 }, { "epoch": 0.8904969961769524, "grad_norm": 5.772890090942383, "learning_rate": 3.111897965442656e-07, "loss": 0.9365, "step": 6522 }, { "epoch": 0.8906335335882032, "grad_norm": 8.566405296325684, "learning_rate": 3.1042237247024265e-07, "loss": 0.8785, "step": 6523 }, { "epoch": 0.8907700709994538, "grad_norm": 5.312895774841309, "learning_rate": 3.096558655191706e-07, "loss": 0.8774, "step": 6524 }, { "epoch": 0.8909066084107046, "grad_norm": 5.374547481536865, "learning_rate": 3.08890275840954e-07, "loss": 0.8289, "step": 6525 }, { "epoch": 0.8910431458219552, "grad_norm": 5.58174991607666, "learning_rate": 3.081256035853136e-07, "loss": 0.9702, "step": 6526 }, { "epoch": 0.8911796832332058, "grad_norm": 6.742737770080566, "learning_rate": 3.0736184890179654e-07, "loss": 0.8078, "step": 6527 }, { "epoch": 0.8913162206444566, "grad_norm": 6.776114463806152, "learning_rate": 3.065990119397655e-07, "loss": 0.8516, "step": 6528 }, { "epoch": 0.8914527580557072, "grad_norm": 7.213008880615234, "learning_rate": 3.05837092848405e-07, "loss": 0.9761, "step": 6529 }, { "epoch": 0.891589295466958, "grad_norm": 8.655163764953613, "learning_rate": 3.050760917767226e-07, "loss": 0.9402, "step": 6530 }, { "epoch": 0.8917258328782086, "grad_norm": 8.196301460266113, "learning_rate": 3.0431600887354305e-07, "loss": 0.8288, "step": 6531 }, { "epoch": 0.8918623702894594, "grad_norm": 4.671411037445068, "learning_rate": 3.035568442875136e-07, "loss": 0.8398, "step": 6532 }, { "epoch": 0.89199890770071, "grad_norm": 5.545207500457764, "learning_rate": 3.027985981671011e-07, "loss": 1.0097, "step": 6533 }, { "epoch": 0.8921354451119606, "grad_norm": 5.444901466369629, "learning_rate": 3.0204127066059297e-07, "loss": 0.9528, "step": 6534 }, { "epoch": 0.8922719825232114, "grad_norm": 6.148550033569336, "learning_rate": 3.0128486191609574e-07, "loss": 0.8635, "step": 6535 }, { "epoch": 0.892408519934462, "grad_norm": 5.236995220184326, "learning_rate": 3.005293720815389e-07, "loss": 0.9689, "step": 6536 }, { "epoch": 0.8925450573457128, "grad_norm": 7.895675182342529, "learning_rate": 2.997748013046703e-07, "loss": 0.7332, "step": 6537 }, { "epoch": 0.8926815947569634, "grad_norm": 4.99729585647583, "learning_rate": 2.9902114973305696e-07, "loss": 0.826, "step": 6538 }, { "epoch": 0.892818132168214, "grad_norm": 18.298561096191406, "learning_rate": 2.982684175140904e-07, "loss": 0.8713, "step": 6539 }, { "epoch": 0.8929546695794648, "grad_norm": 5.24515962600708, "learning_rate": 2.9751660479497737e-07, "loss": 0.9455, "step": 6540 }, { "epoch": 0.8930912069907154, "grad_norm": 5.492649555206299, "learning_rate": 2.967657117227485e-07, "loss": 0.9886, "step": 6541 }, { "epoch": 0.8932277444019662, "grad_norm": 7.19160795211792, "learning_rate": 2.9601573844425133e-07, "loss": 0.8806, "step": 6542 }, { "epoch": 0.8933642818132168, "grad_norm": 5.140348434448242, "learning_rate": 2.9526668510615807e-07, "loss": 0.922, "step": 6543 }, { "epoch": 0.8935008192244676, "grad_norm": 7.321076393127441, "learning_rate": 2.9451855185495536e-07, "loss": 0.9095, "step": 6544 }, { "epoch": 0.8936373566357182, "grad_norm": 6.9850969314575195, "learning_rate": 2.937713388369545e-07, "loss": 0.9828, "step": 6545 }, { "epoch": 0.8937738940469688, "grad_norm": 7.789126873016357, "learning_rate": 2.9302504619828533e-07, "loss": 0.8819, "step": 6546 }, { "epoch": 0.8939104314582196, "grad_norm": 6.3266191482543945, "learning_rate": 2.9227967408489653e-07, "loss": 1.0363, "step": 6547 }, { "epoch": 0.8940469688694702, "grad_norm": 7.965027332305908, "learning_rate": 2.915352226425583e-07, "loss": 0.8598, "step": 6548 }, { "epoch": 0.894183506280721, "grad_norm": 5.872429370880127, "learning_rate": 2.907916920168585e-07, "loss": 0.8749, "step": 6549 }, { "epoch": 0.8943200436919716, "grad_norm": 6.3200883865356445, "learning_rate": 2.900490823532093e-07, "loss": 0.8819, "step": 6550 }, { "epoch": 0.8944565811032222, "grad_norm": 6.742278575897217, "learning_rate": 2.893073937968377e-07, "loss": 0.865, "step": 6551 }, { "epoch": 0.894593118514473, "grad_norm": 5.382570266723633, "learning_rate": 2.8856662649279497e-07, "loss": 0.9718, "step": 6552 }, { "epoch": 0.8947296559257236, "grad_norm": 6.784901142120361, "learning_rate": 2.878267805859486e-07, "loss": 0.9463, "step": 6553 }, { "epoch": 0.8948661933369744, "grad_norm": 6.651855945587158, "learning_rate": 2.870878562209883e-07, "loss": 0.9957, "step": 6554 }, { "epoch": 0.895002730748225, "grad_norm": 5.534138202667236, "learning_rate": 2.8634985354242197e-07, "loss": 0.9361, "step": 6555 }, { "epoch": 0.8951392681594756, "grad_norm": 5.508810043334961, "learning_rate": 2.85612772694579e-07, "loss": 0.8214, "step": 6556 }, { "epoch": 0.8952758055707264, "grad_norm": 6.874266147613525, "learning_rate": 2.848766138216075e-07, "loss": 0.9937, "step": 6557 }, { "epoch": 0.895412342981977, "grad_norm": 5.29581880569458, "learning_rate": 2.8414137706747334e-07, "loss": 0.8747, "step": 6558 }, { "epoch": 0.8955488803932278, "grad_norm": 6.841447353363037, "learning_rate": 2.8340706257596663e-07, "loss": 0.9853, "step": 6559 }, { "epoch": 0.8956854178044784, "grad_norm": 7.01485013961792, "learning_rate": 2.8267367049069296e-07, "loss": 0.8955, "step": 6560 }, { "epoch": 0.8958219552157292, "grad_norm": 5.529797554016113, "learning_rate": 2.8194120095507926e-07, "loss": 0.7915, "step": 6561 }, { "epoch": 0.8959584926269798, "grad_norm": 8.433340072631836, "learning_rate": 2.8120965411237155e-07, "loss": 1.0137, "step": 6562 }, { "epoch": 0.8960950300382304, "grad_norm": 4.988847255706787, "learning_rate": 2.804790301056365e-07, "loss": 0.9247, "step": 6563 }, { "epoch": 0.8962315674494812, "grad_norm": 7.263337135314941, "learning_rate": 2.7974932907775863e-07, "loss": 0.8627, "step": 6564 }, { "epoch": 0.8963681048607318, "grad_norm": 7.926014423370361, "learning_rate": 2.7902055117144334e-07, "loss": 0.8333, "step": 6565 }, { "epoch": 0.8965046422719826, "grad_norm": 7.27543830871582, "learning_rate": 2.782926965292143e-07, "loss": 0.9071, "step": 6566 }, { "epoch": 0.8966411796832332, "grad_norm": 7.482120513916016, "learning_rate": 2.775657652934166e-07, "loss": 1.0259, "step": 6567 }, { "epoch": 0.8967777170944838, "grad_norm": 6.002060413360596, "learning_rate": 2.7683975760621264e-07, "loss": 1.0622, "step": 6568 }, { "epoch": 0.8969142545057346, "grad_norm": 6.5901031494140625, "learning_rate": 2.761146736095843e-07, "loss": 0.9163, "step": 6569 }, { "epoch": 0.8970507919169852, "grad_norm": 9.658255577087402, "learning_rate": 2.7539051344533497e-07, "loss": 0.997, "step": 6570 }, { "epoch": 0.897187329328236, "grad_norm": 8.852822303771973, "learning_rate": 2.746672772550851e-07, "loss": 0.8861, "step": 6571 }, { "epoch": 0.8973238667394866, "grad_norm": 5.9460129737854, "learning_rate": 2.739449651802756e-07, "loss": 0.8904, "step": 6572 }, { "epoch": 0.8974604041507374, "grad_norm": 7.3332977294921875, "learning_rate": 2.73223577362165e-07, "loss": 0.9571, "step": 6573 }, { "epoch": 0.897596941561988, "grad_norm": 6.1086554527282715, "learning_rate": 2.7250311394183383e-07, "loss": 1.0064, "step": 6574 }, { "epoch": 0.8977334789732386, "grad_norm": 5.251235485076904, "learning_rate": 2.717835750601794e-07, "loss": 0.8688, "step": 6575 }, { "epoch": 0.8978700163844894, "grad_norm": 7.037664890289307, "learning_rate": 2.710649608579208e-07, "loss": 0.8675, "step": 6576 }, { "epoch": 0.89800655379574, "grad_norm": 5.368688106536865, "learning_rate": 2.7034727147559284e-07, "loss": 0.8698, "step": 6577 }, { "epoch": 0.8981430912069908, "grad_norm": 5.602945804595947, "learning_rate": 2.6963050705355265e-07, "loss": 0.6667, "step": 6578 }, { "epoch": 0.8982796286182414, "grad_norm": 5.784215450286865, "learning_rate": 2.6891466773197483e-07, "loss": 0.947, "step": 6579 }, { "epoch": 0.898416166029492, "grad_norm": 7.34066104888916, "learning_rate": 2.6819975365085237e-07, "loss": 0.9741, "step": 6580 }, { "epoch": 0.8985527034407428, "grad_norm": 9.904590606689453, "learning_rate": 2.6748576494999955e-07, "loss": 0.9399, "step": 6581 }, { "epoch": 0.8986892408519934, "grad_norm": 6.312925815582275, "learning_rate": 2.667727017690475e-07, "loss": 0.8905, "step": 6582 }, { "epoch": 0.8988257782632442, "grad_norm": 5.922350883483887, "learning_rate": 2.660605642474484e-07, "loss": 0.8557, "step": 6583 }, { "epoch": 0.8989623156744948, "grad_norm": 6.465730667114258, "learning_rate": 2.653493525244721e-07, "loss": 0.8356, "step": 6584 }, { "epoch": 0.8990988530857454, "grad_norm": 8.752839088439941, "learning_rate": 2.646390667392068e-07, "loss": 0.839, "step": 6585 }, { "epoch": 0.8992353904969962, "grad_norm": 6.710587978363037, "learning_rate": 2.639297070305602e-07, "loss": 0.8313, "step": 6586 }, { "epoch": 0.8993719279082468, "grad_norm": 6.198617935180664, "learning_rate": 2.6322127353726025e-07, "loss": 0.95, "step": 6587 }, { "epoch": 0.8995084653194976, "grad_norm": 7.384983539581299, "learning_rate": 2.6251376639785163e-07, "loss": 0.8981, "step": 6588 }, { "epoch": 0.8996450027307482, "grad_norm": 6.551940441131592, "learning_rate": 2.618071857506993e-07, "loss": 0.9936, "step": 6589 }, { "epoch": 0.899781540141999, "grad_norm": 5.99476957321167, "learning_rate": 2.611015317339866e-07, "loss": 0.8908, "step": 6590 }, { "epoch": 0.8999180775532496, "grad_norm": 6.5398850440979, "learning_rate": 2.603968044857147e-07, "loss": 0.876, "step": 6591 }, { "epoch": 0.9000546149645002, "grad_norm": 10.161198616027832, "learning_rate": 2.5969300414370624e-07, "loss": 0.8824, "step": 6592 }, { "epoch": 0.900191152375751, "grad_norm": 9.423365592956543, "learning_rate": 2.589901308455989e-07, "loss": 0.8769, "step": 6593 }, { "epoch": 0.9003276897870016, "grad_norm": 6.787641525268555, "learning_rate": 2.582881847288532e-07, "loss": 0.9118, "step": 6594 }, { "epoch": 0.9004642271982524, "grad_norm": 7.12251091003418, "learning_rate": 2.575871659307433e-07, "loss": 1.0645, "step": 6595 }, { "epoch": 0.900600764609503, "grad_norm": 7.315267562866211, "learning_rate": 2.5688707458836724e-07, "loss": 0.9747, "step": 6596 }, { "epoch": 0.9007373020207536, "grad_norm": 4.824131965637207, "learning_rate": 2.561879108386378e-07, "loss": 0.9739, "step": 6597 }, { "epoch": 0.9008738394320044, "grad_norm": 6.2472710609436035, "learning_rate": 2.5548967481828836e-07, "loss": 0.8445, "step": 6598 }, { "epoch": 0.901010376843255, "grad_norm": 6.914155006408691, "learning_rate": 2.547923666638702e-07, "loss": 0.9616, "step": 6599 }, { "epoch": 0.9011469142545058, "grad_norm": 6.3598952293396, "learning_rate": 2.540959865117526e-07, "loss": 1.0439, "step": 6600 }, { "epoch": 0.9012834516657564, "grad_norm": 4.600180625915527, "learning_rate": 2.5340053449812553e-07, "loss": 0.9262, "step": 6601 }, { "epoch": 0.901419989077007, "grad_norm": 6.777325630187988, "learning_rate": 2.527060107589935e-07, "loss": 0.9426, "step": 6602 }, { "epoch": 0.9015565264882578, "grad_norm": 8.924428939819336, "learning_rate": 2.520124154301845e-07, "loss": 0.9605, "step": 6603 }, { "epoch": 0.9016930638995084, "grad_norm": 6.481432914733887, "learning_rate": 2.5131974864734063e-07, "loss": 0.9187, "step": 6604 }, { "epoch": 0.9018296013107592, "grad_norm": 8.662337303161621, "learning_rate": 2.506280105459252e-07, "loss": 0.833, "step": 6605 }, { "epoch": 0.9019661387220098, "grad_norm": 5.880603313446045, "learning_rate": 2.499372012612172e-07, "loss": 0.9902, "step": 6606 }, { "epoch": 0.9021026761332606, "grad_norm": 6.686001777648926, "learning_rate": 2.492473209283175e-07, "loss": 0.9122, "step": 6607 }, { "epoch": 0.9022392135445112, "grad_norm": 7.823156356811523, "learning_rate": 2.4855836968214306e-07, "loss": 0.9567, "step": 6608 }, { "epoch": 0.9023757509557618, "grad_norm": 6.201719760894775, "learning_rate": 2.478703476574279e-07, "loss": 1.0442, "step": 6609 }, { "epoch": 0.9025122883670126, "grad_norm": 5.757351875305176, "learning_rate": 2.4718325498872765e-07, "loss": 0.9386, "step": 6610 }, { "epoch": 0.9026488257782632, "grad_norm": 7.8022661209106445, "learning_rate": 2.4649709181041373e-07, "loss": 1.0445, "step": 6611 }, { "epoch": 0.902785363189514, "grad_norm": 5.974910259246826, "learning_rate": 2.45811858256676e-07, "loss": 0.8954, "step": 6612 }, { "epoch": 0.9029219006007646, "grad_norm": 5.818890571594238, "learning_rate": 2.4512755446152336e-07, "loss": 0.9741, "step": 6613 }, { "epoch": 0.9030584380120152, "grad_norm": 6.53483247756958, "learning_rate": 2.4444418055878325e-07, "loss": 0.9101, "step": 6614 }, { "epoch": 0.903194975423266, "grad_norm": 6.583759307861328, "learning_rate": 2.437617366820988e-07, "loss": 0.9404, "step": 6615 }, { "epoch": 0.9033315128345166, "grad_norm": 6.256868362426758, "learning_rate": 2.4308022296493484e-07, "loss": 0.9203, "step": 6616 }, { "epoch": 0.9034680502457674, "grad_norm": 5.402975559234619, "learning_rate": 2.4239963954057146e-07, "loss": 0.7539, "step": 6617 }, { "epoch": 0.903604587657018, "grad_norm": 9.13315486907959, "learning_rate": 2.4171998654210826e-07, "loss": 0.9022, "step": 6618 }, { "epoch": 0.9037411250682688, "grad_norm": 5.545251369476318, "learning_rate": 2.4104126410246174e-07, "loss": 0.9617, "step": 6619 }, { "epoch": 0.9038776624795194, "grad_norm": 6.158647060394287, "learning_rate": 2.403634723543674e-07, "loss": 1.0911, "step": 6620 }, { "epoch": 0.90401419989077, "grad_norm": 8.847434043884277, "learning_rate": 2.3968661143037864e-07, "loss": 1.0805, "step": 6621 }, { "epoch": 0.9041507373020208, "grad_norm": 6.011013507843018, "learning_rate": 2.390106814628662e-07, "loss": 0.8185, "step": 6622 }, { "epoch": 0.9042872747132714, "grad_norm": 7.829250812530518, "learning_rate": 2.3833568258401996e-07, "loss": 0.8933, "step": 6623 }, { "epoch": 0.9044238121245222, "grad_norm": 4.222664833068848, "learning_rate": 2.3766161492584595e-07, "loss": 0.932, "step": 6624 }, { "epoch": 0.9045603495357728, "grad_norm": 7.575108528137207, "learning_rate": 2.3698847862016927e-07, "loss": 0.9237, "step": 6625 }, { "epoch": 0.9046968869470234, "grad_norm": 6.298488140106201, "learning_rate": 2.3631627379863242e-07, "loss": 0.9727, "step": 6626 }, { "epoch": 0.9048334243582742, "grad_norm": 5.885182857513428, "learning_rate": 2.356450005926969e-07, "loss": 0.9666, "step": 6627 }, { "epoch": 0.9049699617695248, "grad_norm": 6.715749740600586, "learning_rate": 2.3497465913364047e-07, "loss": 0.9142, "step": 6628 }, { "epoch": 0.9051064991807756, "grad_norm": 6.543333053588867, "learning_rate": 2.343052495525583e-07, "loss": 0.8799, "step": 6629 }, { "epoch": 0.9052430365920262, "grad_norm": 6.120357990264893, "learning_rate": 2.3363677198036616e-07, "loss": 1.057, "step": 6630 }, { "epoch": 0.9053795740032768, "grad_norm": 5.0387983322143555, "learning_rate": 2.3296922654779397e-07, "loss": 0.9434, "step": 6631 }, { "epoch": 0.9055161114145276, "grad_norm": 6.950046539306641, "learning_rate": 2.3230261338539283e-07, "loss": 1.0484, "step": 6632 }, { "epoch": 0.9056526488257782, "grad_norm": 5.248211860656738, "learning_rate": 2.3163693262352794e-07, "loss": 0.8973, "step": 6633 }, { "epoch": 0.905789186237029, "grad_norm": 7.791205883026123, "learning_rate": 2.3097218439238512e-07, "loss": 0.9676, "step": 6634 }, { "epoch": 0.9059257236482796, "grad_norm": 6.7082037925720215, "learning_rate": 2.303083688219665e-07, "loss": 0.982, "step": 6635 }, { "epoch": 0.9060622610595304, "grad_norm": 6.1314263343811035, "learning_rate": 2.2964548604209214e-07, "loss": 0.8039, "step": 6636 }, { "epoch": 0.906198798470781, "grad_norm": 5.015981197357178, "learning_rate": 2.2898353618239832e-07, "loss": 0.9491, "step": 6637 }, { "epoch": 0.9063353358820316, "grad_norm": 6.179503917694092, "learning_rate": 2.2832251937234152e-07, "loss": 0.9031, "step": 6638 }, { "epoch": 0.9064718732932824, "grad_norm": 6.527876853942871, "learning_rate": 2.2766243574119383e-07, "loss": 0.8595, "step": 6639 }, { "epoch": 0.906608410704533, "grad_norm": 6.4453535079956055, "learning_rate": 2.2700328541804428e-07, "loss": 0.9365, "step": 6640 }, { "epoch": 0.9067449481157838, "grad_norm": 5.444783687591553, "learning_rate": 2.263450685318025e-07, "loss": 0.8766, "step": 6641 }, { "epoch": 0.9068814855270344, "grad_norm": 7.96658182144165, "learning_rate": 2.2568778521119106e-07, "loss": 0.9271, "step": 6642 }, { "epoch": 0.907018022938285, "grad_norm": 5.528905391693115, "learning_rate": 2.2503143558475493e-07, "loss": 0.9041, "step": 6643 }, { "epoch": 0.9071545603495358, "grad_norm": 8.057743072509766, "learning_rate": 2.2437601978085144e-07, "loss": 1.0214, "step": 6644 }, { "epoch": 0.9072910977607864, "grad_norm": 5.54731559753418, "learning_rate": 2.2372153792765972e-07, "loss": 0.8668, "step": 6645 }, { "epoch": 0.9074276351720372, "grad_norm": 6.339807987213135, "learning_rate": 2.2306799015317292e-07, "loss": 0.9301, "step": 6646 }, { "epoch": 0.9075641725832878, "grad_norm": 8.03821086883545, "learning_rate": 2.224153765852044e-07, "loss": 0.8585, "step": 6647 }, { "epoch": 0.9077007099945386, "grad_norm": 8.41008472442627, "learning_rate": 2.2176369735138202e-07, "loss": 0.9184, "step": 6648 }, { "epoch": 0.9078372474057892, "grad_norm": 8.104436874389648, "learning_rate": 2.2111295257915278e-07, "loss": 0.8579, "step": 6649 }, { "epoch": 0.9079737848170398, "grad_norm": 9.873147964477539, "learning_rate": 2.2046314239578037e-07, "loss": 0.8597, "step": 6650 }, { "epoch": 0.9081103222282906, "grad_norm": 6.905699729919434, "learning_rate": 2.1981426692834485e-07, "loss": 1.0286, "step": 6651 }, { "epoch": 0.9082468596395412, "grad_norm": 6.278092861175537, "learning_rate": 2.1916632630374579e-07, "loss": 0.9674, "step": 6652 }, { "epoch": 0.908383397050792, "grad_norm": 7.609993934631348, "learning_rate": 2.1851932064869684e-07, "loss": 0.8668, "step": 6653 }, { "epoch": 0.9085199344620426, "grad_norm": 5.577683925628662, "learning_rate": 2.1787325008973236e-07, "loss": 0.6879, "step": 6654 }, { "epoch": 0.9086564718732932, "grad_norm": 6.397035121917725, "learning_rate": 2.1722811475320015e-07, "loss": 1.0092, "step": 6655 }, { "epoch": 0.908793009284544, "grad_norm": 7.337709426879883, "learning_rate": 2.1658391476526873e-07, "loss": 1.0394, "step": 6656 }, { "epoch": 0.9089295466957946, "grad_norm": 8.380420684814453, "learning_rate": 2.1594065025191958e-07, "loss": 0.8955, "step": 6657 }, { "epoch": 0.9090660841070454, "grad_norm": 7.6052141189575195, "learning_rate": 2.152983213389559e-07, "loss": 0.8716, "step": 6658 }, { "epoch": 0.909202621518296, "grad_norm": 5.069437503814697, "learning_rate": 2.146569281519939e-07, "loss": 0.9399, "step": 6659 }, { "epoch": 0.9093391589295466, "grad_norm": 5.842856407165527, "learning_rate": 2.1401647081646825e-07, "loss": 1.0339, "step": 6660 }, { "epoch": 0.9094756963407974, "grad_norm": 5.8611226081848145, "learning_rate": 2.133769494576321e-07, "loss": 1.0323, "step": 6661 }, { "epoch": 0.909612233752048, "grad_norm": 6.099756240844727, "learning_rate": 2.1273836420055372e-07, "loss": 0.9124, "step": 6662 }, { "epoch": 0.9097487711632988, "grad_norm": 5.820398807525635, "learning_rate": 2.121007151701182e-07, "loss": 0.8553, "step": 6663 }, { "epoch": 0.9098853085745494, "grad_norm": 12.987310409545898, "learning_rate": 2.1146400249102804e-07, "loss": 1.0757, "step": 6664 }, { "epoch": 0.9100218459858002, "grad_norm": 5.971721649169922, "learning_rate": 2.108282262878042e-07, "loss": 0.9977, "step": 6665 }, { "epoch": 0.9101583833970508, "grad_norm": 6.810041904449463, "learning_rate": 2.1019338668478107e-07, "loss": 0.9199, "step": 6666 }, { "epoch": 0.9102949208083014, "grad_norm": 5.375779628753662, "learning_rate": 2.095594838061138e-07, "loss": 0.9821, "step": 6667 }, { "epoch": 0.9104314582195522, "grad_norm": 9.243833541870117, "learning_rate": 2.0892651777577045e-07, "loss": 1.1607, "step": 6668 }, { "epoch": 0.9105679956308028, "grad_norm": 5.966073036193848, "learning_rate": 2.0829448871753976e-07, "loss": 1.0273, "step": 6669 }, { "epoch": 0.9107045330420536, "grad_norm": 6.416095733642578, "learning_rate": 2.07663396755024e-07, "loss": 0.8071, "step": 6670 }, { "epoch": 0.9108410704533042, "grad_norm": 4.867931365966797, "learning_rate": 2.0703324201164276e-07, "loss": 0.9847, "step": 6671 }, { "epoch": 0.9109776078645548, "grad_norm": 5.3086652755737305, "learning_rate": 2.0640402461063524e-07, "loss": 0.9193, "step": 6672 }, { "epoch": 0.9111141452758056, "grad_norm": 6.956286907196045, "learning_rate": 2.0577574467505246e-07, "loss": 0.8482, "step": 6673 }, { "epoch": 0.9112506826870562, "grad_norm": 7.111166477203369, "learning_rate": 2.0514840232776721e-07, "loss": 0.9659, "step": 6674 }, { "epoch": 0.911387220098307, "grad_norm": 5.971009731292725, "learning_rate": 2.0452199769146531e-07, "loss": 0.9047, "step": 6675 }, { "epoch": 0.9115237575095576, "grad_norm": 5.517773628234863, "learning_rate": 2.0389653088865035e-07, "loss": 0.8465, "step": 6676 }, { "epoch": 0.9116602949208084, "grad_norm": 7.6052165031433105, "learning_rate": 2.0327200204164176e-07, "loss": 0.9183, "step": 6677 }, { "epoch": 0.911796832332059, "grad_norm": 5.548340797424316, "learning_rate": 2.0264841127257738e-07, "loss": 0.9507, "step": 6678 }, { "epoch": 0.9119333697433096, "grad_norm": 8.05154800415039, "learning_rate": 2.020257587034108e-07, "loss": 0.9698, "step": 6679 }, { "epoch": 0.9120699071545604, "grad_norm": 5.44613790512085, "learning_rate": 2.014040444559101e-07, "loss": 0.9738, "step": 6680 }, { "epoch": 0.912206444565811, "grad_norm": 6.37185001373291, "learning_rate": 2.0078326865166363e-07, "loss": 0.8571, "step": 6681 }, { "epoch": 0.9123429819770618, "grad_norm": 7.007288932800293, "learning_rate": 2.0016343141207263e-07, "loss": 1.1018, "step": 6682 }, { "epoch": 0.9124795193883124, "grad_norm": 11.223559379577637, "learning_rate": 1.9954453285835728e-07, "loss": 0.9316, "step": 6683 }, { "epoch": 0.912616056799563, "grad_norm": 4.79846715927124, "learning_rate": 1.989265731115525e-07, "loss": 0.8467, "step": 6684 }, { "epoch": 0.9127525942108138, "grad_norm": 8.667881965637207, "learning_rate": 1.9830955229251102e-07, "loss": 0.9645, "step": 6685 }, { "epoch": 0.9128891316220644, "grad_norm": 9.020501136779785, "learning_rate": 1.9769347052190134e-07, "loss": 0.947, "step": 6686 }, { "epoch": 0.9130256690333152, "grad_norm": 6.464852809906006, "learning_rate": 1.970783279202071e-07, "loss": 0.8567, "step": 6687 }, { "epoch": 0.9131622064445658, "grad_norm": 6.168850421905518, "learning_rate": 1.9646412460773034e-07, "loss": 1.0156, "step": 6688 }, { "epoch": 0.9132987438558164, "grad_norm": 6.96959114074707, "learning_rate": 1.9585086070458892e-07, "loss": 0.9057, "step": 6689 }, { "epoch": 0.9134352812670672, "grad_norm": 4.610093116760254, "learning_rate": 1.9523853633071576e-07, "loss": 0.9007, "step": 6690 }, { "epoch": 0.9135718186783178, "grad_norm": 5.3296990394592285, "learning_rate": 1.946271516058601e-07, "loss": 0.9578, "step": 6691 }, { "epoch": 0.9137083560895686, "grad_norm": 7.103723049163818, "learning_rate": 1.940167066495896e-07, "loss": 0.9719, "step": 6692 }, { "epoch": 0.9138448935008192, "grad_norm": 5.314182281494141, "learning_rate": 1.9340720158128602e-07, "loss": 0.8884, "step": 6693 }, { "epoch": 0.91398143091207, "grad_norm": 7.2616119384765625, "learning_rate": 1.927986365201484e-07, "loss": 0.8289, "step": 6694 }, { "epoch": 0.9141179683233206, "grad_norm": 6.280640125274658, "learning_rate": 1.9219101158518993e-07, "loss": 0.8975, "step": 6695 }, { "epoch": 0.9142545057345712, "grad_norm": 5.490942478179932, "learning_rate": 1.9158432689524386e-07, "loss": 0.9202, "step": 6696 }, { "epoch": 0.914391043145822, "grad_norm": 4.753111839294434, "learning_rate": 1.9097858256895584e-07, "loss": 0.8677, "step": 6697 }, { "epoch": 0.9145275805570726, "grad_norm": 7.268146991729736, "learning_rate": 1.9037377872478947e-07, "loss": 0.9467, "step": 6698 }, { "epoch": 0.9146641179683234, "grad_norm": 8.43366813659668, "learning_rate": 1.8976991548102341e-07, "loss": 0.9211, "step": 6699 }, { "epoch": 0.914800655379574, "grad_norm": 4.70484733581543, "learning_rate": 1.8916699295575324e-07, "loss": 0.9865, "step": 6700 }, { "epoch": 0.9149371927908246, "grad_norm": 5.395211696624756, "learning_rate": 1.8856501126689075e-07, "loss": 0.9048, "step": 6701 }, { "epoch": 0.9150737302020754, "grad_norm": 13.045578956604004, "learning_rate": 1.8796397053216176e-07, "loss": 0.9016, "step": 6702 }, { "epoch": 0.915210267613326, "grad_norm": 9.034720420837402, "learning_rate": 1.8736387086911111e-07, "loss": 0.9049, "step": 6703 }, { "epoch": 0.9153468050245768, "grad_norm": 8.320977210998535, "learning_rate": 1.8676471239509664e-07, "loss": 0.9341, "step": 6704 }, { "epoch": 0.9154833424358274, "grad_norm": 5.957061767578125, "learning_rate": 1.8616649522729512e-07, "loss": 0.9986, "step": 6705 }, { "epoch": 0.915619879847078, "grad_norm": 5.876267910003662, "learning_rate": 1.855692194826958e-07, "loss": 0.9294, "step": 6706 }, { "epoch": 0.9157564172583288, "grad_norm": 6.353397846221924, "learning_rate": 1.8497288527810742e-07, "loss": 0.9152, "step": 6707 }, { "epoch": 0.9158929546695794, "grad_norm": 5.713481426239014, "learning_rate": 1.8437749273015116e-07, "loss": 0.9364, "step": 6708 }, { "epoch": 0.9160294920808302, "grad_norm": 7.235587120056152, "learning_rate": 1.837830419552672e-07, "loss": 1.0264, "step": 6709 }, { "epoch": 0.9161660294920808, "grad_norm": 10.223628044128418, "learning_rate": 1.831895330697092e-07, "loss": 0.9707, "step": 6710 }, { "epoch": 0.9163025669033316, "grad_norm": 5.174218654632568, "learning_rate": 1.8259696618954714e-07, "loss": 0.9482, "step": 6711 }, { "epoch": 0.9164391043145822, "grad_norm": 5.545658111572266, "learning_rate": 1.8200534143066772e-07, "loss": 1.0217, "step": 6712 }, { "epoch": 0.9165756417258328, "grad_norm": 6.318828105926514, "learning_rate": 1.8141465890877285e-07, "loss": 0.852, "step": 6713 }, { "epoch": 0.9167121791370836, "grad_norm": 5.558835983276367, "learning_rate": 1.8082491873938014e-07, "loss": 1.0356, "step": 6714 }, { "epoch": 0.9168487165483342, "grad_norm": 6.929923057556152, "learning_rate": 1.8023612103782117e-07, "loss": 0.997, "step": 6715 }, { "epoch": 0.916985253959585, "grad_norm": 6.334205627441406, "learning_rate": 1.7964826591924722e-07, "loss": 0.896, "step": 6716 }, { "epoch": 0.9171217913708356, "grad_norm": 6.017897129058838, "learning_rate": 1.7906135349862074e-07, "loss": 0.9418, "step": 6717 }, { "epoch": 0.9172583287820862, "grad_norm": 6.193016052246094, "learning_rate": 1.784753838907244e-07, "loss": 0.7922, "step": 6718 }, { "epoch": 0.917394866193337, "grad_norm": 5.58914852142334, "learning_rate": 1.77890357210152e-07, "loss": 0.8135, "step": 6719 }, { "epoch": 0.9175314036045876, "grad_norm": 6.249495506286621, "learning_rate": 1.7730627357131546e-07, "loss": 0.9286, "step": 6720 }, { "epoch": 0.9176679410158384, "grad_norm": 7.4020256996154785, "learning_rate": 1.767231330884428e-07, "loss": 0.9786, "step": 6721 }, { "epoch": 0.917804478427089, "grad_norm": 5.9552788734436035, "learning_rate": 1.76140935875575e-07, "loss": 1.0189, "step": 6722 }, { "epoch": 0.9179410158383398, "grad_norm": 16.572853088378906, "learning_rate": 1.755596820465716e-07, "loss": 0.8878, "step": 6723 }, { "epoch": 0.9180775532495904, "grad_norm": 5.332666873931885, "learning_rate": 1.749793717151055e-07, "loss": 0.8372, "step": 6724 }, { "epoch": 0.918214090660841, "grad_norm": 5.741370677947998, "learning_rate": 1.744000049946659e-07, "loss": 0.9746, "step": 6725 }, { "epoch": 0.9183506280720918, "grad_norm": 8.185474395751953, "learning_rate": 1.7382158199855782e-07, "loss": 0.963, "step": 6726 }, { "epoch": 0.9184871654833424, "grad_norm": 8.418864250183105, "learning_rate": 1.7324410283990123e-07, "loss": 0.8802, "step": 6727 }, { "epoch": 0.9186237028945932, "grad_norm": 5.224255084991455, "learning_rate": 1.7266756763162973e-07, "loss": 0.9259, "step": 6728 }, { "epoch": 0.9187602403058438, "grad_norm": 24.673864364624023, "learning_rate": 1.7209197648649644e-07, "loss": 0.978, "step": 6729 }, { "epoch": 0.9188967777170944, "grad_norm": 7.014693737030029, "learning_rate": 1.715173295170669e-07, "loss": 0.9754, "step": 6730 }, { "epoch": 0.9190333151283452, "grad_norm": 7.078821182250977, "learning_rate": 1.7094362683572174e-07, "loss": 0.9775, "step": 6731 }, { "epoch": 0.9191698525395958, "grad_norm": 8.632495880126953, "learning_rate": 1.7037086855465902e-07, "loss": 0.9098, "step": 6732 }, { "epoch": 0.9193063899508466, "grad_norm": 6.5251264572143555, "learning_rate": 1.6979905478588964e-07, "loss": 0.9984, "step": 6733 }, { "epoch": 0.9194429273620972, "grad_norm": 7.700799942016602, "learning_rate": 1.6922818564124255e-07, "loss": 0.9869, "step": 6734 }, { "epoch": 0.9195794647733478, "grad_norm": 6.358842849731445, "learning_rate": 1.6865826123235895e-07, "loss": 0.8497, "step": 6735 }, { "epoch": 0.9197160021845986, "grad_norm": 5.439366340637207, "learning_rate": 1.6808928167069806e-07, "loss": 0.9642, "step": 6736 }, { "epoch": 0.9198525395958492, "grad_norm": 6.1980485916137695, "learning_rate": 1.6752124706753249e-07, "loss": 0.8244, "step": 6737 }, { "epoch": 0.9199890770071, "grad_norm": 5.188318252563477, "learning_rate": 1.669541575339506e-07, "loss": 0.8664, "step": 6738 }, { "epoch": 0.9201256144183506, "grad_norm": 5.292796611785889, "learning_rate": 1.663880131808565e-07, "loss": 0.7875, "step": 6739 }, { "epoch": 0.9202621518296014, "grad_norm": 7.28897762298584, "learning_rate": 1.6582281411896827e-07, "loss": 0.8726, "step": 6740 }, { "epoch": 0.920398689240852, "grad_norm": 9.548260688781738, "learning_rate": 1.6525856045881972e-07, "loss": 1.0301, "step": 6741 }, { "epoch": 0.9205352266521026, "grad_norm": 6.070762634277344, "learning_rate": 1.6469525231075979e-07, "loss": 1.0029, "step": 6742 }, { "epoch": 0.9206717640633534, "grad_norm": 5.9863972663879395, "learning_rate": 1.641328897849531e-07, "loss": 0.7164, "step": 6743 }, { "epoch": 0.920808301474604, "grad_norm": 13.41379165649414, "learning_rate": 1.6357147299137733e-07, "loss": 0.867, "step": 6744 }, { "epoch": 0.9209448388858548, "grad_norm": 8.76972484588623, "learning_rate": 1.63011002039829e-07, "loss": 1.0106, "step": 6745 }, { "epoch": 0.9210813762971054, "grad_norm": 6.211139678955078, "learning_rate": 1.6245147703991493e-07, "loss": 0.8664, "step": 6746 }, { "epoch": 0.921217913708356, "grad_norm": 5.731220245361328, "learning_rate": 1.6189289810106147e-07, "loss": 0.795, "step": 6747 }, { "epoch": 0.9213544511196068, "grad_norm": 7.711384296417236, "learning_rate": 1.6133526533250566e-07, "loss": 0.8364, "step": 6748 }, { "epoch": 0.9214909885308574, "grad_norm": 5.97477388381958, "learning_rate": 1.6077857884330306e-07, "loss": 0.8081, "step": 6749 }, { "epoch": 0.9216275259421082, "grad_norm": 6.376023292541504, "learning_rate": 1.602228387423227e-07, "loss": 0.8612, "step": 6750 }, { "epoch": 0.9217640633533588, "grad_norm": 6.689574241638184, "learning_rate": 1.5966804513824764e-07, "loss": 0.9381, "step": 6751 }, { "epoch": 0.9219006007646096, "grad_norm": 4.911769390106201, "learning_rate": 1.5911419813957772e-07, "loss": 0.9819, "step": 6752 }, { "epoch": 0.9220371381758602, "grad_norm": 22.83432960510254, "learning_rate": 1.5856129785462636e-07, "loss": 0.928, "step": 6753 }, { "epoch": 0.9221736755871108, "grad_norm": 6.127509593963623, "learning_rate": 1.5800934439152205e-07, "loss": 1.1195, "step": 6754 }, { "epoch": 0.9223102129983616, "grad_norm": 4.920155048370361, "learning_rate": 1.5745833785820786e-07, "loss": 0.8047, "step": 6755 }, { "epoch": 0.9224467504096122, "grad_norm": 7.866464138031006, "learning_rate": 1.5690827836244317e-07, "loss": 0.8614, "step": 6756 }, { "epoch": 0.922583287820863, "grad_norm": 6.461967945098877, "learning_rate": 1.5635916601179967e-07, "loss": 0.8547, "step": 6757 }, { "epoch": 0.9227198252321136, "grad_norm": 7.625049591064453, "learning_rate": 1.5581100091366653e-07, "loss": 0.81, "step": 6758 }, { "epoch": 0.9228563626433642, "grad_norm": 7.082080841064453, "learning_rate": 1.552637831752457e-07, "loss": 0.9058, "step": 6759 }, { "epoch": 0.922992900054615, "grad_norm": 6.820964336395264, "learning_rate": 1.5471751290355387e-07, "loss": 1.0888, "step": 6760 }, { "epoch": 0.9231294374658656, "grad_norm": 7.238985538482666, "learning_rate": 1.5417219020542385e-07, "loss": 0.9835, "step": 6761 }, { "epoch": 0.9232659748771164, "grad_norm": 6.426520824432373, "learning_rate": 1.5362781518750204e-07, "loss": 0.8276, "step": 6762 }, { "epoch": 0.923402512288367, "grad_norm": 5.648866176605225, "learning_rate": 1.530843879562499e-07, "loss": 0.965, "step": 6763 }, { "epoch": 0.9235390496996176, "grad_norm": 4.918831825256348, "learning_rate": 1.5254190861794415e-07, "loss": 0.9208, "step": 6764 }, { "epoch": 0.9236755871108684, "grad_norm": 6.738744735717773, "learning_rate": 1.5200037727867434e-07, "loss": 0.9301, "step": 6765 }, { "epoch": 0.923812124522119, "grad_norm": 6.119943618774414, "learning_rate": 1.514597940443452e-07, "loss": 1.0392, "step": 6766 }, { "epoch": 0.9239486619333698, "grad_norm": 5.908400535583496, "learning_rate": 1.5092015902067824e-07, "loss": 0.9138, "step": 6767 }, { "epoch": 0.9240851993446204, "grad_norm": 6.378866195678711, "learning_rate": 1.503814723132069e-07, "loss": 0.9503, "step": 6768 }, { "epoch": 0.9242217367558712, "grad_norm": 6.023906230926514, "learning_rate": 1.4984373402728014e-07, "loss": 0.9561, "step": 6769 }, { "epoch": 0.9243582741671218, "grad_norm": 6.655737400054932, "learning_rate": 1.493069442680617e-07, "loss": 0.9401, "step": 6770 }, { "epoch": 0.9244948115783724, "grad_norm": 5.7029709815979, "learning_rate": 1.4877110314052867e-07, "loss": 0.9281, "step": 6771 }, { "epoch": 0.9246313489896232, "grad_norm": 5.949487209320068, "learning_rate": 1.4823621074947503e-07, "loss": 0.9184, "step": 6772 }, { "epoch": 0.9247678864008738, "grad_norm": 6.136383533477783, "learning_rate": 1.4770226719950598e-07, "loss": 1.0077, "step": 6773 }, { "epoch": 0.9249044238121246, "grad_norm": 6.009672164916992, "learning_rate": 1.4716927259504355e-07, "loss": 0.9981, "step": 6774 }, { "epoch": 0.9250409612233752, "grad_norm": 7.213639259338379, "learning_rate": 1.466372270403238e-07, "loss": 0.9241, "step": 6775 }, { "epoch": 0.9251774986346258, "grad_norm": 5.568517208099365, "learning_rate": 1.4610613063939682e-07, "loss": 0.9368, "step": 6776 }, { "epoch": 0.9253140360458766, "grad_norm": 4.955690383911133, "learning_rate": 1.4557598349612678e-07, "loss": 0.9648, "step": 6777 }, { "epoch": 0.9254505734571272, "grad_norm": 6.488436698913574, "learning_rate": 1.450467857141924e-07, "loss": 1.057, "step": 6778 }, { "epoch": 0.925587110868378, "grad_norm": 6.64835262298584, "learning_rate": 1.44518537397087e-07, "loss": 0.9559, "step": 6779 }, { "epoch": 0.9257236482796286, "grad_norm": 6.883822441101074, "learning_rate": 1.4399123864811904e-07, "loss": 0.9256, "step": 6780 }, { "epoch": 0.9258601856908794, "grad_norm": 8.198201179504395, "learning_rate": 1.4346488957040884e-07, "loss": 0.9429, "step": 6781 }, { "epoch": 0.92599672310213, "grad_norm": 6.513754844665527, "learning_rate": 1.4293949026689348e-07, "loss": 0.8306, "step": 6782 }, { "epoch": 0.9261332605133806, "grad_norm": 6.717618465423584, "learning_rate": 1.4241504084032297e-07, "loss": 0.9131, "step": 6783 }, { "epoch": 0.9262697979246314, "grad_norm": 6.499029636383057, "learning_rate": 1.4189154139326145e-07, "loss": 0.9034, "step": 6784 }, { "epoch": 0.926406335335882, "grad_norm": 6.192415237426758, "learning_rate": 1.4136899202808917e-07, "loss": 0.9429, "step": 6785 }, { "epoch": 0.9265428727471328, "grad_norm": 6.039337158203125, "learning_rate": 1.4084739284699722e-07, "loss": 0.9268, "step": 6786 }, { "epoch": 0.9266794101583834, "grad_norm": 5.678371906280518, "learning_rate": 1.4032674395199452e-07, "loss": 1.0084, "step": 6787 }, { "epoch": 0.926815947569634, "grad_norm": 6.231472969055176, "learning_rate": 1.398070454449013e-07, "loss": 1.0442, "step": 6788 }, { "epoch": 0.9269524849808848, "grad_norm": 6.013583183288574, "learning_rate": 1.3928829742735405e-07, "loss": 0.788, "step": 6789 }, { "epoch": 0.9270890223921354, "grad_norm": 6.705965995788574, "learning_rate": 1.3877050000080106e-07, "loss": 0.9207, "step": 6790 }, { "epoch": 0.9272255598033862, "grad_norm": 5.360812664031982, "learning_rate": 1.3825365326650685e-07, "loss": 1.1026, "step": 6791 }, { "epoch": 0.9273620972146368, "grad_norm": 7.479288578033447, "learning_rate": 1.377377573255495e-07, "loss": 0.9876, "step": 6792 }, { "epoch": 0.9274986346258874, "grad_norm": 6.677104949951172, "learning_rate": 1.372228122788194e-07, "loss": 1.0976, "step": 6793 }, { "epoch": 0.9276351720371382, "grad_norm": 5.836313247680664, "learning_rate": 1.3670881822702374e-07, "loss": 0.8848, "step": 6794 }, { "epoch": 0.9277717094483888, "grad_norm": 6.844132900238037, "learning_rate": 1.3619577527068107e-07, "loss": 0.9606, "step": 6795 }, { "epoch": 0.9279082468596396, "grad_norm": 7.185439109802246, "learning_rate": 1.3568368351012718e-07, "loss": 0.8959, "step": 6796 }, { "epoch": 0.9280447842708902, "grad_norm": 6.7374043464660645, "learning_rate": 1.351725430455081e-07, "loss": 1.0111, "step": 6797 }, { "epoch": 0.928181321682141, "grad_norm": 5.815807819366455, "learning_rate": 1.3466235397678717e-07, "loss": 0.9791, "step": 6798 }, { "epoch": 0.9283178590933916, "grad_norm": 6.254541397094727, "learning_rate": 1.3415311640373907e-07, "loss": 0.9655, "step": 6799 }, { "epoch": 0.9284543965046422, "grad_norm": 4.7020416259765625, "learning_rate": 1.336448304259541e-07, "loss": 0.8847, "step": 6800 }, { "epoch": 0.928590933915893, "grad_norm": 7.547082424163818, "learning_rate": 1.33137496142835e-07, "loss": 0.9507, "step": 6801 }, { "epoch": 0.9287274713271436, "grad_norm": 7.195004940032959, "learning_rate": 1.3263111365360014e-07, "loss": 0.8992, "step": 6802 }, { "epoch": 0.9288640087383944, "grad_norm": 4.562135219573975, "learning_rate": 1.3212568305728035e-07, "loss": 1.0258, "step": 6803 }, { "epoch": 0.929000546149645, "grad_norm": 6.543232440948486, "learning_rate": 1.3162120445272096e-07, "loss": 1.0087, "step": 6804 }, { "epoch": 0.9291370835608956, "grad_norm": 8.769084930419922, "learning_rate": 1.311176779385803e-07, "loss": 0.7494, "step": 6805 }, { "epoch": 0.9292736209721464, "grad_norm": 9.005562782287598, "learning_rate": 1.3061510361333186e-07, "loss": 1.0588, "step": 6806 }, { "epoch": 0.929410158383397, "grad_norm": 4.723135471343994, "learning_rate": 1.301134815752625e-07, "loss": 0.938, "step": 6807 }, { "epoch": 0.9295466957946478, "grad_norm": 9.371877670288086, "learning_rate": 1.2961281192247099e-07, "loss": 0.9236, "step": 6808 }, { "epoch": 0.9296832332058984, "grad_norm": 14.685009002685547, "learning_rate": 1.2911309475287348e-07, "loss": 0.8779, "step": 6809 }, { "epoch": 0.929819770617149, "grad_norm": 5.202442646026611, "learning_rate": 1.2861433016419622e-07, "loss": 0.7994, "step": 6810 }, { "epoch": 0.9299563080283998, "grad_norm": 4.349259853363037, "learning_rate": 1.281165182539812e-07, "loss": 0.8821, "step": 6811 }, { "epoch": 0.9300928454396504, "grad_norm": 6.420283317565918, "learning_rate": 1.2761965911958385e-07, "loss": 0.8058, "step": 6812 }, { "epoch": 0.9302293828509012, "grad_norm": 15.231624603271484, "learning_rate": 1.271237528581726e-07, "loss": 0.9677, "step": 6813 }, { "epoch": 0.9303659202621518, "grad_norm": 7.629288673400879, "learning_rate": 1.266287995667309e-07, "loss": 0.8069, "step": 6814 }, { "epoch": 0.9305024576734026, "grad_norm": 7.315296649932861, "learning_rate": 1.2613479934205363e-07, "loss": 0.9282, "step": 6815 }, { "epoch": 0.9306389950846532, "grad_norm": 6.207494735717773, "learning_rate": 1.256417522807507e-07, "loss": 1.0657, "step": 6816 }, { "epoch": 0.9307755324959038, "grad_norm": 7.579843044281006, "learning_rate": 1.2514965847924664e-07, "loss": 0.9865, "step": 6817 }, { "epoch": 0.9309120699071546, "grad_norm": 8.79889965057373, "learning_rate": 1.2465851803377727e-07, "loss": 1.0022, "step": 6818 }, { "epoch": 0.9310486073184052, "grad_norm": 8.648076057434082, "learning_rate": 1.241683310403935e-07, "loss": 0.9512, "step": 6819 }, { "epoch": 0.931185144729656, "grad_norm": 5.517149925231934, "learning_rate": 1.236790975949592e-07, "loss": 0.9809, "step": 6820 }, { "epoch": 0.9313216821409066, "grad_norm": 7.119357585906982, "learning_rate": 1.2319081779315178e-07, "loss": 0.843, "step": 6821 }, { "epoch": 0.9314582195521572, "grad_norm": 9.938267707824707, "learning_rate": 1.2270349173046204e-07, "loss": 1.079, "step": 6822 }, { "epoch": 0.931594756963408, "grad_norm": 6.208835601806641, "learning_rate": 1.2221711950219596e-07, "loss": 0.9077, "step": 6823 }, { "epoch": 0.9317312943746586, "grad_norm": 6.144247531890869, "learning_rate": 1.2173170120346911e-07, "loss": 0.8747, "step": 6824 }, { "epoch": 0.9318678317859094, "grad_norm": 7.528911113739014, "learning_rate": 1.2124723692921503e-07, "loss": 1.0401, "step": 6825 }, { "epoch": 0.93200436919716, "grad_norm": 7.157042503356934, "learning_rate": 1.2076372677417736e-07, "loss": 0.9013, "step": 6826 }, { "epoch": 0.9321409066084108, "grad_norm": 7.672157287597656, "learning_rate": 1.2028117083291547e-07, "loss": 0.9522, "step": 6827 }, { "epoch": 0.9322774440196614, "grad_norm": 8.415511131286621, "learning_rate": 1.1979956919979996e-07, "loss": 0.9257, "step": 6828 }, { "epoch": 0.932413981430912, "grad_norm": 7.523343086242676, "learning_rate": 1.193189219690166e-07, "loss": 0.9154, "step": 6829 }, { "epoch": 0.9325505188421628, "grad_norm": 6.943379878997803, "learning_rate": 1.1883922923456303e-07, "loss": 1.0778, "step": 6830 }, { "epoch": 0.9326870562534134, "grad_norm": 5.928445339202881, "learning_rate": 1.1836049109025083e-07, "loss": 0.9787, "step": 6831 }, { "epoch": 0.9328235936646642, "grad_norm": 6.531148433685303, "learning_rate": 1.1788270762970566e-07, "loss": 1.1063, "step": 6832 }, { "epoch": 0.9329601310759148, "grad_norm": 5.376271724700928, "learning_rate": 1.1740587894636501e-07, "loss": 0.8905, "step": 6833 }, { "epoch": 0.9330966684871654, "grad_norm": 6.710028171539307, "learning_rate": 1.1693000513348208e-07, "loss": 1.05, "step": 6834 }, { "epoch": 0.9332332058984162, "grad_norm": 8.775568008422852, "learning_rate": 1.1645508628411961e-07, "loss": 0.8341, "step": 6835 }, { "epoch": 0.9333697433096668, "grad_norm": 5.127921104431152, "learning_rate": 1.1598112249115723e-07, "loss": 1.0055, "step": 6836 }, { "epoch": 0.9335062807209176, "grad_norm": 7.389885425567627, "learning_rate": 1.1550811384728578e-07, "loss": 0.9141, "step": 6837 }, { "epoch": 0.9336428181321682, "grad_norm": 5.5109663009643555, "learning_rate": 1.150360604450096e-07, "loss": 0.8986, "step": 6838 }, { "epoch": 0.9337793555434188, "grad_norm": 8.132359504699707, "learning_rate": 1.1456496237664649e-07, "loss": 0.9365, "step": 6839 }, { "epoch": 0.9339158929546696, "grad_norm": 8.099593162536621, "learning_rate": 1.1409481973432778e-07, "loss": 0.8169, "step": 6840 }, { "epoch": 0.9340524303659202, "grad_norm": 6.852217674255371, "learning_rate": 1.1362563260999715e-07, "loss": 0.821, "step": 6841 }, { "epoch": 0.934188967777171, "grad_norm": 9.402530670166016, "learning_rate": 1.1315740109541118e-07, "loss": 0.9115, "step": 6842 }, { "epoch": 0.9343255051884216, "grad_norm": 7.60922908782959, "learning_rate": 1.1269012528214108e-07, "loss": 1.0089, "step": 6843 }, { "epoch": 0.9344620425996724, "grad_norm": 7.549257755279541, "learning_rate": 1.1222380526156929e-07, "loss": 0.9619, "step": 6844 }, { "epoch": 0.934598580010923, "grad_norm": 6.999926567077637, "learning_rate": 1.1175844112489286e-07, "loss": 0.8955, "step": 6845 }, { "epoch": 0.9347351174221736, "grad_norm": 6.600773811340332, "learning_rate": 1.1129403296312069e-07, "loss": 0.9101, "step": 6846 }, { "epoch": 0.9348716548334244, "grad_norm": 6.200679302215576, "learning_rate": 1.1083058086707677e-07, "loss": 0.9377, "step": 6847 }, { "epoch": 0.935008192244675, "grad_norm": 6.531972885131836, "learning_rate": 1.1036808492739471e-07, "loss": 0.9648, "step": 6848 }, { "epoch": 0.9351447296559258, "grad_norm": 6.886209011077881, "learning_rate": 1.0990654523452437e-07, "loss": 0.8551, "step": 6849 }, { "epoch": 0.9352812670671764, "grad_norm": 5.858019828796387, "learning_rate": 1.0944596187872747e-07, "loss": 0.8633, "step": 6850 }, { "epoch": 0.935417804478427, "grad_norm": 6.513808727264404, "learning_rate": 1.0898633495007748e-07, "loss": 0.8224, "step": 6851 }, { "epoch": 0.9355543418896778, "grad_norm": 6.58029842376709, "learning_rate": 1.0852766453846308e-07, "loss": 0.9063, "step": 6852 }, { "epoch": 0.9356908793009284, "grad_norm": 6.098498821258545, "learning_rate": 1.0806995073358362e-07, "loss": 0.9822, "step": 6853 }, { "epoch": 0.9358274167121792, "grad_norm": 6.430515289306641, "learning_rate": 1.076131936249536e-07, "loss": 0.8795, "step": 6854 }, { "epoch": 0.9359639541234298, "grad_norm": 6.522995471954346, "learning_rate": 1.0715739330189823e-07, "loss": 0.8902, "step": 6855 }, { "epoch": 0.9361004915346806, "grad_norm": 9.2955322265625, "learning_rate": 1.0670254985355733e-07, "loss": 1.0994, "step": 6856 }, { "epoch": 0.9362370289459312, "grad_norm": 7.02601432800293, "learning_rate": 1.0624866336888196e-07, "loss": 0.8712, "step": 6857 }, { "epoch": 0.9363735663571818, "grad_norm": 7.957219123840332, "learning_rate": 1.0579573393663834e-07, "loss": 0.8474, "step": 6858 }, { "epoch": 0.9365101037684326, "grad_norm": 5.434604644775391, "learning_rate": 1.0534376164540338e-07, "loss": 0.8993, "step": 6859 }, { "epoch": 0.9366466411796832, "grad_norm": 5.8749823570251465, "learning_rate": 1.0489274658356808e-07, "loss": 0.919, "step": 6860 }, { "epoch": 0.936783178590934, "grad_norm": 7.330767631530762, "learning_rate": 1.0444268883933518e-07, "loss": 0.7617, "step": 6861 }, { "epoch": 0.9369197160021846, "grad_norm": 6.547465801239014, "learning_rate": 1.0399358850072039e-07, "loss": 1.0575, "step": 6862 }, { "epoch": 0.9370562534134352, "grad_norm": 7.732560634613037, "learning_rate": 1.0354544565555402e-07, "loss": 0.8928, "step": 6863 }, { "epoch": 0.937192790824686, "grad_norm": 6.48198127746582, "learning_rate": 1.0309826039147597e-07, "loss": 0.8726, "step": 6864 }, { "epoch": 0.9373293282359366, "grad_norm": 7.188812732696533, "learning_rate": 1.0265203279594182e-07, "loss": 0.9015, "step": 6865 }, { "epoch": 0.9374658656471874, "grad_norm": 7.846114158630371, "learning_rate": 1.022067629562179e-07, "loss": 0.9889, "step": 6866 }, { "epoch": 0.937602403058438, "grad_norm": 6.309162139892578, "learning_rate": 1.0176245095938508e-07, "loss": 0.759, "step": 6867 }, { "epoch": 0.9377389404696886, "grad_norm": 8.188708305358887, "learning_rate": 1.0131909689233444e-07, "loss": 1.0046, "step": 6868 }, { "epoch": 0.9378754778809394, "grad_norm": 8.95218563079834, "learning_rate": 1.008767008417716e-07, "loss": 0.8372, "step": 6869 }, { "epoch": 0.93801201529219, "grad_norm": 5.548086166381836, "learning_rate": 1.0043526289421457e-07, "loss": 1.0685, "step": 6870 }, { "epoch": 0.9381485527034408, "grad_norm": 4.735276699066162, "learning_rate": 9.999478313599265e-08, "loss": 0.9206, "step": 6871 }, { "epoch": 0.9382850901146914, "grad_norm": 6.5888519287109375, "learning_rate": 9.955526165325025e-08, "loss": 0.9865, "step": 6872 }, { "epoch": 0.9384216275259422, "grad_norm": 13.384836196899414, "learning_rate": 9.911669853194139e-08, "loss": 0.8806, "step": 6873 }, { "epoch": 0.9385581649371928, "grad_norm": 5.398756504058838, "learning_rate": 9.86790938578358e-08, "loss": 1.0391, "step": 6874 }, { "epoch": 0.9386947023484434, "grad_norm": 7.163490295410156, "learning_rate": 9.824244771651281e-08, "loss": 0.8749, "step": 6875 }, { "epoch": 0.9388312397596942, "grad_norm": 6.609044075012207, "learning_rate": 9.780676019336632e-08, "loss": 1.2149, "step": 6876 }, { "epoch": 0.9389677771709448, "grad_norm": 7.089473724365234, "learning_rate": 9.737203137360207e-08, "loss": 1.008, "step": 6877 }, { "epoch": 0.9391043145821956, "grad_norm": 7.041528224945068, "learning_rate": 9.693826134223871e-08, "loss": 0.9016, "step": 6878 }, { "epoch": 0.9392408519934462, "grad_norm": 6.107686519622803, "learning_rate": 9.650545018410618e-08, "loss": 1.0079, "step": 6879 }, { "epoch": 0.9393773894046968, "grad_norm": 5.665398120880127, "learning_rate": 9.607359798384785e-08, "loss": 0.9555, "step": 6880 }, { "epoch": 0.9395139268159476, "grad_norm": 7.018929958343506, "learning_rate": 9.564270482592009e-08, "loss": 0.8135, "step": 6881 }, { "epoch": 0.9396504642271982, "grad_norm": 6.600776672363281, "learning_rate": 9.521277079459046e-08, "loss": 0.9187, "step": 6882 }, { "epoch": 0.939787001638449, "grad_norm": 6.5217485427856445, "learning_rate": 9.47837959739395e-08, "loss": 1.0337, "step": 6883 }, { "epoch": 0.9399235390496996, "grad_norm": 4.482770919799805, "learning_rate": 9.435578044786009e-08, "loss": 0.9528, "step": 6884 }, { "epoch": 0.9400600764609504, "grad_norm": 8.675463676452637, "learning_rate": 9.392872430005861e-08, "loss": 0.7597, "step": 6885 }, { "epoch": 0.940196613872201, "grad_norm": 6.264554977416992, "learning_rate": 9.35026276140516e-08, "loss": 0.9347, "step": 6886 }, { "epoch": 0.9403331512834516, "grad_norm": 6.365854263305664, "learning_rate": 9.307749047316961e-08, "loss": 0.8318, "step": 6887 }, { "epoch": 0.9404696886947024, "grad_norm": 5.680685997009277, "learning_rate": 9.265331296055558e-08, "loss": 0.8333, "step": 6888 }, { "epoch": 0.940606226105953, "grad_norm": 5.823029041290283, "learning_rate": 9.22300951591637e-08, "loss": 0.9118, "step": 6889 }, { "epoch": 0.9407427635172038, "grad_norm": 7.913331985473633, "learning_rate": 9.180783715176111e-08, "loss": 0.9179, "step": 6890 }, { "epoch": 0.9408793009284544, "grad_norm": 6.030888080596924, "learning_rate": 9.138653902092732e-08, "loss": 0.9077, "step": 6891 }, { "epoch": 0.941015838339705, "grad_norm": 6.952649116516113, "learning_rate": 9.096620084905472e-08, "loss": 0.8507, "step": 6892 }, { "epoch": 0.9411523757509558, "grad_norm": 7.587541580200195, "learning_rate": 9.054682271834592e-08, "loss": 0.9367, "step": 6893 }, { "epoch": 0.9412889131622064, "grad_norm": 8.222264289855957, "learning_rate": 9.012840471081808e-08, "loss": 0.9391, "step": 6894 }, { "epoch": 0.9414254505734572, "grad_norm": 6.624253273010254, "learning_rate": 8.971094690829907e-08, "loss": 0.9585, "step": 6895 }, { "epoch": 0.9415619879847078, "grad_norm": 5.7424798011779785, "learning_rate": 8.929444939243026e-08, "loss": 0.8928, "step": 6896 }, { "epoch": 0.9416985253959584, "grad_norm": 7.5715155601501465, "learning_rate": 8.887891224466316e-08, "loss": 0.9002, "step": 6897 }, { "epoch": 0.9418350628072092, "grad_norm": 6.137444972991943, "learning_rate": 8.846433554626444e-08, "loss": 0.8898, "step": 6898 }, { "epoch": 0.9419716002184598, "grad_norm": 7.277009010314941, "learning_rate": 8.805071937830978e-08, "loss": 1.0274, "step": 6899 }, { "epoch": 0.9421081376297106, "grad_norm": 8.959774017333984, "learning_rate": 8.763806382169005e-08, "loss": 0.9421, "step": 6900 }, { "epoch": 0.9422446750409612, "grad_norm": 7.339325428009033, "learning_rate": 8.722636895710567e-08, "loss": 1.0037, "step": 6901 }, { "epoch": 0.942381212452212, "grad_norm": 7.382093906402588, "learning_rate": 8.68156348650706e-08, "loss": 0.8342, "step": 6902 }, { "epoch": 0.9425177498634626, "grad_norm": 6.508423328399658, "learning_rate": 8.640586162591059e-08, "loss": 0.8973, "step": 6903 }, { "epoch": 0.9426542872747132, "grad_norm": 7.250967025756836, "learning_rate": 8.59970493197626e-08, "loss": 1.0466, "step": 6904 }, { "epoch": 0.942790824685964, "grad_norm": 4.771463871002197, "learning_rate": 8.558919802657828e-08, "loss": 0.9597, "step": 6905 }, { "epoch": 0.9429273620972146, "grad_norm": 6.1982598304748535, "learning_rate": 8.51823078261177e-08, "loss": 0.7961, "step": 6906 }, { "epoch": 0.9430638995084654, "grad_norm": 6.844623565673828, "learning_rate": 8.477637879795664e-08, "loss": 0.8982, "step": 6907 }, { "epoch": 0.943200436919716, "grad_norm": 6.218079090118408, "learning_rate": 8.437141102147883e-08, "loss": 0.9606, "step": 6908 }, { "epoch": 0.9433369743309666, "grad_norm": 7.853671550750732, "learning_rate": 8.396740457588426e-08, "loss": 0.8983, "step": 6909 }, { "epoch": 0.9434735117422174, "grad_norm": 7.393011093139648, "learning_rate": 8.356435954018193e-08, "loss": 0.8735, "step": 6910 }, { "epoch": 0.943610049153468, "grad_norm": 6.950826644897461, "learning_rate": 8.316227599319493e-08, "loss": 0.8355, "step": 6911 }, { "epoch": 0.9437465865647188, "grad_norm": 5.859259605407715, "learning_rate": 8.276115401355589e-08, "loss": 0.8795, "step": 6912 }, { "epoch": 0.9438831239759694, "grad_norm": 5.799868583679199, "learning_rate": 8.236099367971151e-08, "loss": 0.8399, "step": 6913 }, { "epoch": 0.94401966138722, "grad_norm": 5.566917419433594, "learning_rate": 8.19617950699192e-08, "loss": 0.9313, "step": 6914 }, { "epoch": 0.9441561987984708, "grad_norm": 9.743762016296387, "learning_rate": 8.15635582622487e-08, "loss": 0.9061, "step": 6915 }, { "epoch": 0.9442927362097214, "grad_norm": 4.783638954162598, "learning_rate": 8.11662833345822e-08, "loss": 0.8625, "step": 6916 }, { "epoch": 0.9444292736209722, "grad_norm": 8.970824241638184, "learning_rate": 8.076997036461254e-08, "loss": 0.9057, "step": 6917 }, { "epoch": 0.9445658110322228, "grad_norm": 8.0139741897583, "learning_rate": 8.037461942984603e-08, "loss": 1.1035, "step": 6918 }, { "epoch": 0.9447023484434736, "grad_norm": 5.536376476287842, "learning_rate": 7.998023060759974e-08, "loss": 0.9465, "step": 6919 }, { "epoch": 0.9448388858547242, "grad_norm": 5.858532905578613, "learning_rate": 7.958680397500251e-08, "loss": 1.067, "step": 6920 }, { "epoch": 0.9449754232659748, "grad_norm": 5.438150405883789, "learning_rate": 7.919433960899504e-08, "loss": 0.8669, "step": 6921 }, { "epoch": 0.9451119606772256, "grad_norm": 10.233525276184082, "learning_rate": 7.880283758633034e-08, "loss": 1.0119, "step": 6922 }, { "epoch": 0.9452484980884762, "grad_norm": 6.107775688171387, "learning_rate": 7.84122979835733e-08, "loss": 0.8915, "step": 6923 }, { "epoch": 0.945385035499727, "grad_norm": 6.963064193725586, "learning_rate": 7.802272087709951e-08, "loss": 0.8816, "step": 6924 }, { "epoch": 0.9455215729109776, "grad_norm": 6.167111396789551, "learning_rate": 7.763410634309798e-08, "loss": 0.7747, "step": 6925 }, { "epoch": 0.9456581103222282, "grad_norm": 5.545964241027832, "learning_rate": 7.724645445756796e-08, "loss": 0.9127, "step": 6926 }, { "epoch": 0.945794647733479, "grad_norm": 7.424250602722168, "learning_rate": 7.685976529632156e-08, "loss": 0.9156, "step": 6927 }, { "epoch": 0.9459311851447296, "grad_norm": 5.497802257537842, "learning_rate": 7.647403893498106e-08, "loss": 0.8743, "step": 6928 }, { "epoch": 0.9460677225559804, "grad_norm": 8.337976455688477, "learning_rate": 7.608927544898337e-08, "loss": 0.9628, "step": 6929 }, { "epoch": 0.946204259967231, "grad_norm": 6.460484504699707, "learning_rate": 7.570547491357328e-08, "loss": 0.9348, "step": 6930 }, { "epoch": 0.9463407973784818, "grad_norm": 7.393450736999512, "learning_rate": 7.532263740381074e-08, "loss": 0.9837, "step": 6931 }, { "epoch": 0.9464773347897324, "grad_norm": 10.23348617553711, "learning_rate": 7.494076299456531e-08, "loss": 0.986, "step": 6932 }, { "epoch": 0.946613872200983, "grad_norm": 5.842645168304443, "learning_rate": 7.455985176051838e-08, "loss": 0.8172, "step": 6933 }, { "epoch": 0.9467504096122338, "grad_norm": 5.621244430541992, "learning_rate": 7.417990377616313e-08, "loss": 1.066, "step": 6934 }, { "epoch": 0.9468869470234844, "grad_norm": 8.841137886047363, "learning_rate": 7.380091911580511e-08, "loss": 0.8949, "step": 6935 }, { "epoch": 0.9470234844347352, "grad_norm": 5.516604423522949, "learning_rate": 7.342289785356115e-08, "loss": 0.9162, "step": 6936 }, { "epoch": 0.9471600218459858, "grad_norm": 6.3790059089660645, "learning_rate": 7.304584006335824e-08, "loss": 0.9948, "step": 6937 }, { "epoch": 0.9472965592572364, "grad_norm": 6.062676906585693, "learning_rate": 7.266974581893793e-08, "loss": 0.9919, "step": 6938 }, { "epoch": 0.9474330966684872, "grad_norm": 7.880708694458008, "learning_rate": 7.229461519384973e-08, "loss": 0.9719, "step": 6939 }, { "epoch": 0.9475696340797378, "grad_norm": 5.878744602203369, "learning_rate": 7.192044826145772e-08, "loss": 0.856, "step": 6940 }, { "epoch": 0.9477061714909886, "grad_norm": 6.1231207847595215, "learning_rate": 7.154724509493615e-08, "loss": 0.9595, "step": 6941 }, { "epoch": 0.9478427089022392, "grad_norm": 11.875143051147461, "learning_rate": 7.117500576726999e-08, "loss": 1.0434, "step": 6942 }, { "epoch": 0.9479792463134898, "grad_norm": 10.956377983093262, "learning_rate": 7.08037303512582e-08, "loss": 0.9336, "step": 6943 }, { "epoch": 0.9481157837247406, "grad_norm": 7.665284156799316, "learning_rate": 7.043341891950883e-08, "loss": 1.0274, "step": 6944 }, { "epoch": 0.9482523211359912, "grad_norm": 8.4794282913208, "learning_rate": 7.006407154444228e-08, "loss": 0.9945, "step": 6945 }, { "epoch": 0.948388858547242, "grad_norm": 9.374035835266113, "learning_rate": 6.969568829829076e-08, "loss": 0.9575, "step": 6946 }, { "epoch": 0.9485253959584926, "grad_norm": 5.766533851623535, "learning_rate": 6.932826925309721e-08, "loss": 0.9896, "step": 6947 }, { "epoch": 0.9486619333697434, "grad_norm": 5.454185962677002, "learning_rate": 6.896181448071582e-08, "loss": 0.8407, "step": 6948 }, { "epoch": 0.948798470780994, "grad_norm": 13.230429649353027, "learning_rate": 6.859632405281425e-08, "loss": 0.947, "step": 6949 }, { "epoch": 0.9489350081922446, "grad_norm": 15.170241355895996, "learning_rate": 6.823179804086921e-08, "loss": 0.9254, "step": 6950 }, { "epoch": 0.9490715456034954, "grad_norm": 6.243165969848633, "learning_rate": 6.78682365161698e-08, "loss": 0.959, "step": 6951 }, { "epoch": 0.949208083014746, "grad_norm": 5.6639204025268555, "learning_rate": 6.750563954981638e-08, "loss": 0.9452, "step": 6952 }, { "epoch": 0.9493446204259968, "grad_norm": 6.9797587394714355, "learning_rate": 6.714400721271996e-08, "loss": 0.948, "step": 6953 }, { "epoch": 0.9494811578372474, "grad_norm": 9.978755950927734, "learning_rate": 6.678333957560513e-08, "loss": 0.9924, "step": 6954 }, { "epoch": 0.949617695248498, "grad_norm": 8.333573341369629, "learning_rate": 6.642363670900487e-08, "loss": 1.0401, "step": 6955 }, { "epoch": 0.9497542326597488, "grad_norm": 6.091446399688721, "learning_rate": 6.606489868326571e-08, "loss": 0.9358, "step": 6956 }, { "epoch": 0.9498907700709994, "grad_norm": 4.774864673614502, "learning_rate": 6.570712556854486e-08, "loss": 0.8681, "step": 6957 }, { "epoch": 0.9500273074822502, "grad_norm": 5.237020492553711, "learning_rate": 6.535031743480968e-08, "loss": 0.9089, "step": 6958 }, { "epoch": 0.9501638448935008, "grad_norm": 8.23719596862793, "learning_rate": 6.499447435184047e-08, "loss": 0.8935, "step": 6959 }, { "epoch": 0.9503003823047516, "grad_norm": 6.028432369232178, "learning_rate": 6.463959638922823e-08, "loss": 1.057, "step": 6960 }, { "epoch": 0.9504369197160022, "grad_norm": 7.918414115905762, "learning_rate": 6.428568361637466e-08, "loss": 0.9664, "step": 6961 }, { "epoch": 0.9505734571272528, "grad_norm": 6.776345729827881, "learning_rate": 6.393273610249329e-08, "loss": 0.9638, "step": 6962 }, { "epoch": 0.9507099945385036, "grad_norm": 6.551833629608154, "learning_rate": 6.358075391660889e-08, "loss": 0.7892, "step": 6963 }, { "epoch": 0.9508465319497542, "grad_norm": 4.957479953765869, "learning_rate": 6.322973712755698e-08, "loss": 0.8833, "step": 6964 }, { "epoch": 0.950983069361005, "grad_norm": 8.535771369934082, "learning_rate": 6.28796858039854e-08, "loss": 0.9355, "step": 6965 }, { "epoch": 0.9511196067722556, "grad_norm": 7.1530232429504395, "learning_rate": 6.253060001435108e-08, "loss": 0.7878, "step": 6966 }, { "epoch": 0.9512561441835062, "grad_norm": 7.109511375427246, "learning_rate": 6.218247982692438e-08, "loss": 1.0082, "step": 6967 }, { "epoch": 0.951392681594757, "grad_norm": 5.86331033706665, "learning_rate": 6.18353253097853e-08, "loss": 0.8748, "step": 6968 }, { "epoch": 0.9515292190060076, "grad_norm": 8.653367042541504, "learning_rate": 6.148913653082622e-08, "loss": 0.8585, "step": 6969 }, { "epoch": 0.9516657564172584, "grad_norm": 6.347375869750977, "learning_rate": 6.114391355774962e-08, "loss": 0.8014, "step": 6970 }, { "epoch": 0.951802293828509, "grad_norm": 7.155083656311035, "learning_rate": 6.079965645806929e-08, "loss": 1.0178, "step": 6971 }, { "epoch": 0.9519388312397596, "grad_norm": 6.066342830657959, "learning_rate": 6.045636529911025e-08, "loss": 0.9149, "step": 6972 }, { "epoch": 0.9520753686510104, "grad_norm": 6.746638774871826, "learning_rate": 6.011404014800825e-08, "loss": 0.8597, "step": 6973 }, { "epoch": 0.952211906062261, "grad_norm": 5.188093662261963, "learning_rate": 5.977268107171196e-08, "loss": 0.7986, "step": 6974 }, { "epoch": 0.9523484434735118, "grad_norm": 8.214299201965332, "learning_rate": 5.943228813697799e-08, "loss": 0.8689, "step": 6975 }, { "epoch": 0.9524849808847624, "grad_norm": 9.544525146484375, "learning_rate": 5.909286141037696e-08, "loss": 0.9563, "step": 6976 }, { "epoch": 0.9526215182960132, "grad_norm": 5.871176242828369, "learning_rate": 5.875440095828799e-08, "loss": 0.7282, "step": 6977 }, { "epoch": 0.9527580557072638, "grad_norm": 5.772797107696533, "learning_rate": 5.841690684690427e-08, "loss": 0.9335, "step": 6978 }, { "epoch": 0.9528945931185144, "grad_norm": 7.6105170249938965, "learning_rate": 5.8080379142226864e-08, "loss": 0.9043, "step": 6979 }, { "epoch": 0.9530311305297652, "grad_norm": 4.77163553237915, "learning_rate": 5.7744817910069804e-08, "loss": 0.8942, "step": 6980 }, { "epoch": 0.9531676679410158, "grad_norm": 7.015429973602295, "learning_rate": 5.741022321605727e-08, "loss": 0.996, "step": 6981 }, { "epoch": 0.9533042053522666, "grad_norm": 33.992218017578125, "learning_rate": 5.707659512562469e-08, "loss": 0.9484, "step": 6982 }, { "epoch": 0.9534407427635172, "grad_norm": 6.394505500793457, "learning_rate": 5.674393370401821e-08, "loss": 0.9608, "step": 6983 }, { "epoch": 0.9535772801747678, "grad_norm": 6.736581325531006, "learning_rate": 5.6412239016296334e-08, "loss": 0.8861, "step": 6984 }, { "epoch": 0.9537138175860186, "grad_norm": 6.615479946136475, "learning_rate": 5.608151112732607e-08, "loss": 1.0643, "step": 6985 }, { "epoch": 0.9538503549972692, "grad_norm": 6.569654941558838, "learning_rate": 5.575175010178679e-08, "loss": 1.0361, "step": 6986 }, { "epoch": 0.95398689240852, "grad_norm": 9.126017570495605, "learning_rate": 5.5422956004169134e-08, "loss": 1.0595, "step": 6987 }, { "epoch": 0.9541234298197706, "grad_norm": 5.583319664001465, "learning_rate": 5.509512889877333e-08, "loss": 0.8733, "step": 6988 }, { "epoch": 0.9542599672310212, "grad_norm": 7.54746150970459, "learning_rate": 5.476826884971253e-08, "loss": 0.9688, "step": 6989 }, { "epoch": 0.954396504642272, "grad_norm": 5.455805778503418, "learning_rate": 5.44423759209084e-08, "loss": 0.9498, "step": 6990 }, { "epoch": 0.9545330420535226, "grad_norm": 7.427881240844727, "learning_rate": 5.411745017609493e-08, "loss": 0.7877, "step": 6991 }, { "epoch": 0.9546695794647734, "grad_norm": 5.6472368240356445, "learning_rate": 5.3793491678817423e-08, "loss": 0.9316, "step": 6992 }, { "epoch": 0.954806116876024, "grad_norm": 5.930205821990967, "learning_rate": 5.3470500492429634e-08, "loss": 0.9546, "step": 6993 }, { "epoch": 0.9549426542872748, "grad_norm": 33.1263542175293, "learning_rate": 5.3148476680098814e-08, "loss": 0.8646, "step": 6994 }, { "epoch": 0.9550791916985254, "grad_norm": 7.412595272064209, "learning_rate": 5.282742030480126e-08, "loss": 0.7606, "step": 6995 }, { "epoch": 0.955215729109776, "grad_norm": 10.099181175231934, "learning_rate": 5.250733142932562e-08, "loss": 0.9274, "step": 6996 }, { "epoch": 0.9553522665210268, "grad_norm": 8.003251075744629, "learning_rate": 5.218821011627018e-08, "loss": 0.7523, "step": 6997 }, { "epoch": 0.9554888039322774, "grad_norm": 5.644485950469971, "learning_rate": 5.187005642804388e-08, "loss": 0.9132, "step": 6998 }, { "epoch": 0.9556253413435282, "grad_norm": 6.073544025421143, "learning_rate": 5.155287042686696e-08, "loss": 0.831, "step": 6999 }, { "epoch": 0.9557618787547788, "grad_norm": 5.143595218658447, "learning_rate": 5.1236652174770365e-08, "loss": 0.881, "step": 7000 }, { "epoch": 0.9558984161660294, "grad_norm": 6.46697998046875, "learning_rate": 5.092140173359572e-08, "loss": 0.959, "step": 7001 }, { "epoch": 0.9560349535772802, "grad_norm": 6.4335103034973145, "learning_rate": 5.060711916499539e-08, "loss": 0.7972, "step": 7002 }, { "epoch": 0.9561714909885308, "grad_norm": 6.370956897735596, "learning_rate": 5.029380453043298e-08, "loss": 1.1041, "step": 7003 }, { "epoch": 0.9563080283997816, "grad_norm": 7.423280715942383, "learning_rate": 4.998145789118114e-08, "loss": 0.9257, "step": 7004 }, { "epoch": 0.9564445658110322, "grad_norm": 7.000473499298096, "learning_rate": 4.9670079308324906e-08, "loss": 0.8096, "step": 7005 }, { "epoch": 0.956581103222283, "grad_norm": 8.010915756225586, "learning_rate": 4.935966884275945e-08, "loss": 0.8224, "step": 7006 }, { "epoch": 0.9567176406335336, "grad_norm": 6.433271884918213, "learning_rate": 4.905022655519009e-08, "loss": 0.9327, "step": 7007 }, { "epoch": 0.9568541780447842, "grad_norm": 6.224764823913574, "learning_rate": 4.874175250613455e-08, "loss": 1.0058, "step": 7008 }, { "epoch": 0.956990715456035, "grad_norm": 5.815981864929199, "learning_rate": 4.8434246755918455e-08, "loss": 0.8854, "step": 7009 }, { "epoch": 0.9571272528672856, "grad_norm": 12.856573104858398, "learning_rate": 4.812770936468036e-08, "loss": 1.0333, "step": 7010 }, { "epoch": 0.9572637902785364, "grad_norm": 5.276297092437744, "learning_rate": 4.782214039236843e-08, "loss": 0.7408, "step": 7011 }, { "epoch": 0.957400327689787, "grad_norm": 8.729162216186523, "learning_rate": 4.751753989874153e-08, "loss": 1.0091, "step": 7012 }, { "epoch": 0.9575368651010376, "grad_norm": 5.734604358673096, "learning_rate": 4.721390794336922e-08, "loss": 1.0585, "step": 7013 }, { "epoch": 0.9576734025122884, "grad_norm": 9.089288711547852, "learning_rate": 4.691124458563179e-08, "loss": 0.9538, "step": 7014 }, { "epoch": 0.957809939923539, "grad_norm": 7.487004280090332, "learning_rate": 4.6609549884719664e-08, "loss": 0.8602, "step": 7015 }, { "epoch": 0.9579464773347898, "grad_norm": 7.374166965484619, "learning_rate": 4.6308823899634535e-08, "loss": 0.8818, "step": 7016 }, { "epoch": 0.9580830147460404, "grad_norm": 7.094447135925293, "learning_rate": 4.600906668918825e-08, "loss": 1.0148, "step": 7017 }, { "epoch": 0.958219552157291, "grad_norm": 6.600050449371338, "learning_rate": 4.571027831200336e-08, "loss": 1.0041, "step": 7018 }, { "epoch": 0.9583560895685418, "grad_norm": 7.239684104919434, "learning_rate": 4.541245882651146e-08, "loss": 0.9869, "step": 7019 }, { "epoch": 0.9584926269797924, "grad_norm": 5.2728400230407715, "learning_rate": 4.511560829095818e-08, "loss": 0.9298, "step": 7020 }, { "epoch": 0.9586291643910432, "grad_norm": 5.088415145874023, "learning_rate": 4.481972676339541e-08, "loss": 0.9888, "step": 7021 }, { "epoch": 0.9587657018022938, "grad_norm": 9.383365631103516, "learning_rate": 4.452481430168909e-08, "loss": 0.9347, "step": 7022 }, { "epoch": 0.9589022392135446, "grad_norm": 5.650630950927734, "learning_rate": 4.4230870963513086e-08, "loss": 0.9148, "step": 7023 }, { "epoch": 0.9590387766247952, "grad_norm": 6.119711399078369, "learning_rate": 4.393789680635308e-08, "loss": 0.9866, "step": 7024 }, { "epoch": 0.9591753140360458, "grad_norm": 5.638863563537598, "learning_rate": 4.364589188750545e-08, "loss": 0.926, "step": 7025 }, { "epoch": 0.9593118514472966, "grad_norm": 7.291694164276123, "learning_rate": 4.3354856264075095e-08, "loss": 0.9629, "step": 7026 }, { "epoch": 0.9594483888585472, "grad_norm": 6.593517780303955, "learning_rate": 4.306478999298092e-08, "loss": 1.0208, "step": 7027 }, { "epoch": 0.959584926269798, "grad_norm": 6.304633140563965, "learning_rate": 4.2775693130948094e-08, "loss": 0.841, "step": 7028 }, { "epoch": 0.9597214636810486, "grad_norm": 6.351804733276367, "learning_rate": 4.248756573451529e-08, "loss": 0.9086, "step": 7029 }, { "epoch": 0.9598580010922992, "grad_norm": 5.125675201416016, "learning_rate": 4.220040786002966e-08, "loss": 0.8334, "step": 7030 }, { "epoch": 0.95999453850355, "grad_norm": 7.1198201179504395, "learning_rate": 4.191421956365072e-08, "loss": 0.8458, "step": 7031 }, { "epoch": 0.9601310759148006, "grad_norm": 6.562489032745361, "learning_rate": 4.1629000901346475e-08, "loss": 0.936, "step": 7032 }, { "epoch": 0.9602676133260514, "grad_norm": 6.516452312469482, "learning_rate": 4.13447519288962e-08, "loss": 0.8777, "step": 7033 }, { "epoch": 0.960404150737302, "grad_norm": 8.628989219665527, "learning_rate": 4.1061472701889317e-08, "loss": 0.9164, "step": 7034 }, { "epoch": 0.9605406881485528, "grad_norm": 5.694529056549072, "learning_rate": 4.077916327572595e-08, "loss": 0.8982, "step": 7035 }, { "epoch": 0.9606772255598034, "grad_norm": 5.068448066711426, "learning_rate": 4.0497823705615836e-08, "loss": 0.8516, "step": 7036 }, { "epoch": 0.960813762971054, "grad_norm": 7.525258541107178, "learning_rate": 4.0217454046579395e-08, "loss": 0.8406, "step": 7037 }, { "epoch": 0.9609503003823048, "grad_norm": 6.279467582702637, "learning_rate": 3.993805435344833e-08, "loss": 0.9023, "step": 7038 }, { "epoch": 0.9610868377935554, "grad_norm": 6.044496059417725, "learning_rate": 3.965962468086337e-08, "loss": 0.9913, "step": 7039 }, { "epoch": 0.9612233752048062, "grad_norm": 7.697057723999023, "learning_rate": 3.9382165083275413e-08, "loss": 0.755, "step": 7040 }, { "epoch": 0.9613599126160568, "grad_norm": 6.863320350646973, "learning_rate": 3.9105675614946606e-08, "loss": 0.9299, "step": 7041 }, { "epoch": 0.9614964500273074, "grad_norm": 18.526458740234375, "learning_rate": 3.8830156329949245e-08, "loss": 0.9981, "step": 7042 }, { "epoch": 0.9616329874385582, "grad_norm": 15.790956497192383, "learning_rate": 3.855560728216523e-08, "loss": 0.9907, "step": 7043 }, { "epoch": 0.9617695248498088, "grad_norm": 5.48340368270874, "learning_rate": 3.828202852528717e-08, "loss": 1.0017, "step": 7044 }, { "epoch": 0.9619060622610596, "grad_norm": 9.171648025512695, "learning_rate": 3.800942011281783e-08, "loss": 0.9256, "step": 7045 }, { "epoch": 0.9620425996723102, "grad_norm": 6.36826753616333, "learning_rate": 3.7737782098069554e-08, "loss": 0.9125, "step": 7046 }, { "epoch": 0.9621791370835608, "grad_norm": 9.643457412719727, "learning_rate": 3.746711453416707e-08, "loss": 1.0198, "step": 7047 }, { "epoch": 0.9623156744948116, "grad_norm": 6.360140800476074, "learning_rate": 3.719741747404249e-08, "loss": 0.9268, "step": 7048 }, { "epoch": 0.9624522119060622, "grad_norm": 16.953392028808594, "learning_rate": 3.6928690970440274e-08, "loss": 0.9314, "step": 7049 }, { "epoch": 0.962588749317313, "grad_norm": 6.509639263153076, "learning_rate": 3.66609350759134e-08, "loss": 0.9958, "step": 7050 }, { "epoch": 0.9627252867285636, "grad_norm": 6.174683094024658, "learning_rate": 3.6394149842826634e-08, "loss": 0.9866, "step": 7051 }, { "epoch": 0.9628618241398144, "grad_norm": 5.76990270614624, "learning_rate": 3.6128335323353804e-08, "loss": 0.8319, "step": 7052 }, { "epoch": 0.962998361551065, "grad_norm": 5.829864501953125, "learning_rate": 3.586349156947888e-08, "loss": 0.9105, "step": 7053 }, { "epoch": 0.9631348989623156, "grad_norm": 5.91876745223999, "learning_rate": 3.5599618632997104e-08, "loss": 0.9478, "step": 7054 }, { "epoch": 0.9632714363735664, "grad_norm": 6.099367141723633, "learning_rate": 3.533671656551274e-08, "loss": 0.9595, "step": 7055 }, { "epoch": 0.963407973784817, "grad_norm": 6.626838207244873, "learning_rate": 3.507478541844023e-08, "loss": 0.9076, "step": 7056 }, { "epoch": 0.9635445111960678, "grad_norm": 6.959403991699219, "learning_rate": 3.481382524300525e-08, "loss": 0.8203, "step": 7057 }, { "epoch": 0.9636810486073184, "grad_norm": 5.712878227233887, "learning_rate": 3.455383609024254e-08, "loss": 0.8986, "step": 7058 }, { "epoch": 0.963817586018569, "grad_norm": 7.697371006011963, "learning_rate": 3.429481801099643e-08, "loss": 0.9436, "step": 7059 }, { "epoch": 0.9639541234298198, "grad_norm": 8.095046043395996, "learning_rate": 3.4036771055923066e-08, "loss": 1.0135, "step": 7060 }, { "epoch": 0.9640906608410704, "grad_norm": 6.162755012512207, "learning_rate": 3.377969527548708e-08, "loss": 0.7878, "step": 7061 }, { "epoch": 0.9642271982523212, "grad_norm": 7.76952600479126, "learning_rate": 3.352359071996436e-08, "loss": 0.8615, "step": 7062 }, { "epoch": 0.9643637356635718, "grad_norm": 6.309835910797119, "learning_rate": 3.32684574394404e-08, "loss": 1.0008, "step": 7063 }, { "epoch": 0.9645002730748226, "grad_norm": 6.199411869049072, "learning_rate": 3.301429548380919e-08, "loss": 0.9742, "step": 7064 }, { "epoch": 0.9646368104860732, "grad_norm": 6.856508255004883, "learning_rate": 3.2761104902778175e-08, "loss": 0.784, "step": 7065 }, { "epoch": 0.9647733478973238, "grad_norm": 6.389617919921875, "learning_rate": 3.250888574586164e-08, "loss": 1.0235, "step": 7066 }, { "epoch": 0.9649098853085746, "grad_norm": 6.589426040649414, "learning_rate": 3.225763806238569e-08, "loss": 1.0238, "step": 7067 }, { "epoch": 0.9650464227198252, "grad_norm": 6.823286056518555, "learning_rate": 3.2007361901485455e-08, "loss": 0.9525, "step": 7068 }, { "epoch": 0.965182960131076, "grad_norm": 5.21692419052124, "learning_rate": 3.175805731210679e-08, "loss": 0.8893, "step": 7069 }, { "epoch": 0.9653194975423266, "grad_norm": 5.538044452667236, "learning_rate": 3.150972434300459e-08, "loss": 0.9982, "step": 7070 }, { "epoch": 0.9654560349535772, "grad_norm": 6.983372211456299, "learning_rate": 3.1262363042746106e-08, "loss": 0.9842, "step": 7071 }, { "epoch": 0.965592572364828, "grad_norm": 7.874731540679932, "learning_rate": 3.1015973459704864e-08, "loss": 0.7667, "step": 7072 }, { "epoch": 0.9657291097760786, "grad_norm": 7.547750949859619, "learning_rate": 3.077055564206788e-08, "loss": 0.8584, "step": 7073 }, { "epoch": 0.9658656471873294, "grad_norm": 5.701330661773682, "learning_rate": 3.052610963782954e-08, "loss": 1.0326, "step": 7074 }, { "epoch": 0.96600218459858, "grad_norm": 9.254070281982422, "learning_rate": 3.028263549479549e-08, "loss": 1.0027, "step": 7075 }, { "epoch": 0.9661387220098306, "grad_norm": 5.346912860870361, "learning_rate": 3.004013326058153e-08, "loss": 1.0025, "step": 7076 }, { "epoch": 0.9662752594210814, "grad_norm": 6.290789604187012, "learning_rate": 2.9798602982611946e-08, "loss": 0.8726, "step": 7077 }, { "epoch": 0.966411796832332, "grad_norm": 5.827367782592773, "learning_rate": 2.9558044708123402e-08, "loss": 1.0504, "step": 7078 }, { "epoch": 0.9665483342435828, "grad_norm": 4.6464009284973145, "learning_rate": 2.9318458484159373e-08, "loss": 0.8835, "step": 7079 }, { "epoch": 0.9666848716548334, "grad_norm": 6.933250904083252, "learning_rate": 2.9079844357575716e-08, "loss": 0.7529, "step": 7080 }, { "epoch": 0.9668214090660842, "grad_norm": 5.623373031616211, "learning_rate": 2.8842202375036764e-08, "loss": 0.8987, "step": 7081 }, { "epoch": 0.9669579464773348, "grad_norm": 16.831214904785156, "learning_rate": 2.860553258301757e-08, "loss": 0.9501, "step": 7082 }, { "epoch": 0.9670944838885854, "grad_norm": 5.652606964111328, "learning_rate": 2.8369835027803326e-08, "loss": 1.0358, "step": 7083 }, { "epoch": 0.9672310212998362, "grad_norm": 6.1747612953186035, "learning_rate": 2.8135109755487723e-08, "loss": 0.8732, "step": 7084 }, { "epoch": 0.9673675587110868, "grad_norm": 8.624218940734863, "learning_rate": 2.7901356811975144e-08, "loss": 0.9149, "step": 7085 }, { "epoch": 0.9675040961223376, "grad_norm": 6.376754283905029, "learning_rate": 2.7668576242980138e-08, "loss": 0.8854, "step": 7086 }, { "epoch": 0.9676406335335882, "grad_norm": 4.833987236022949, "learning_rate": 2.7436768094026843e-08, "loss": 0.9686, "step": 7087 }, { "epoch": 0.9677771709448388, "grad_norm": 7.653088569641113, "learning_rate": 2.7205932410447887e-08, "loss": 0.8668, "step": 7088 }, { "epoch": 0.9679137083560896, "grad_norm": 6.623289585113525, "learning_rate": 2.697606923738827e-08, "loss": 0.9716, "step": 7089 }, { "epoch": 0.9680502457673402, "grad_norm": 4.911900997161865, "learning_rate": 2.674717861980092e-08, "loss": 0.904, "step": 7090 }, { "epoch": 0.968186783178591, "grad_norm": 6.4445719718933105, "learning_rate": 2.651926060244947e-08, "loss": 0.9741, "step": 7091 }, { "epoch": 0.9683233205898416, "grad_norm": 6.127001762390137, "learning_rate": 2.629231522990716e-08, "loss": 0.9037, "step": 7092 }, { "epoch": 0.9684598580010922, "grad_norm": 6.1101274490356445, "learning_rate": 2.6066342546555712e-08, "loss": 0.918, "step": 7093 }, { "epoch": 0.968596395412343, "grad_norm": 5.580913543701172, "learning_rate": 2.584134259658866e-08, "loss": 0.8383, "step": 7094 }, { "epoch": 0.9687329328235936, "grad_norm": 5.075085639953613, "learning_rate": 2.5617315424008026e-08, "loss": 0.9499, "step": 7095 }, { "epoch": 0.9688694702348444, "grad_norm": 5.988424301147461, "learning_rate": 2.539426107262599e-08, "loss": 0.9686, "step": 7096 }, { "epoch": 0.969006007646095, "grad_norm": 6.253198146820068, "learning_rate": 2.517217958606488e-08, "loss": 0.9037, "step": 7097 }, { "epoch": 0.9691425450573458, "grad_norm": 22.721412658691406, "learning_rate": 2.4951071007756068e-08, "loss": 1.0138, "step": 7098 }, { "epoch": 0.9692790824685964, "grad_norm": 8.743348121643066, "learning_rate": 2.4730935380940512e-08, "loss": 1.0177, "step": 7099 }, { "epoch": 0.969415619879847, "grad_norm": 5.327219009399414, "learning_rate": 2.4511772748669894e-08, "loss": 1.0877, "step": 7100 }, { "epoch": 0.9695521572910978, "grad_norm": 6.861960411071777, "learning_rate": 2.4293583153804923e-08, "loss": 0.9857, "step": 7101 }, { "epoch": 0.9696886947023484, "grad_norm": 26.169456481933594, "learning_rate": 2.4076366639015914e-08, "loss": 0.8842, "step": 7102 }, { "epoch": 0.9698252321135992, "grad_norm": 8.024060249328613, "learning_rate": 2.3860123246782774e-08, "loss": 0.9615, "step": 7103 }, { "epoch": 0.9699617695248498, "grad_norm": 6.3968915939331055, "learning_rate": 2.364485301939612e-08, "loss": 0.8969, "step": 7104 }, { "epoch": 0.9700983069361004, "grad_norm": 10.649284362792969, "learning_rate": 2.343055599895505e-08, "loss": 0.9005, "step": 7105 }, { "epoch": 0.9702348443473512, "grad_norm": 8.400102615356445, "learning_rate": 2.3217232227368826e-08, "loss": 0.8708, "step": 7106 }, { "epoch": 0.9703713817586018, "grad_norm": 5.295313358306885, "learning_rate": 2.3004881746356843e-08, "loss": 0.9734, "step": 7107 }, { "epoch": 0.9705079191698526, "grad_norm": 50.2430305480957, "learning_rate": 2.2793504597447003e-08, "loss": 1.0051, "step": 7108 }, { "epoch": 0.9706444565811032, "grad_norm": 6.296533107757568, "learning_rate": 2.2583100821977898e-08, "loss": 0.9782, "step": 7109 }, { "epoch": 0.970780993992354, "grad_norm": 7.131013870239258, "learning_rate": 2.237367046109773e-08, "loss": 0.8588, "step": 7110 }, { "epoch": 0.9709175314036046, "grad_norm": 7.403499126434326, "learning_rate": 2.2165213555763177e-08, "loss": 0.9919, "step": 7111 }, { "epoch": 0.9710540688148552, "grad_norm": 10.140034675598145, "learning_rate": 2.1957730146742185e-08, "loss": 0.9225, "step": 7112 }, { "epoch": 0.971190606226106, "grad_norm": 6.357994079589844, "learning_rate": 2.1751220274611183e-08, "loss": 0.9639, "step": 7113 }, { "epoch": 0.9713271436373566, "grad_norm": 7.470787048339844, "learning_rate": 2.1545683979756205e-08, "loss": 0.954, "step": 7114 }, { "epoch": 0.9714636810486074, "grad_norm": 5.634312629699707, "learning_rate": 2.1341121302373978e-08, "loss": 0.9511, "step": 7115 }, { "epoch": 0.971600218459858, "grad_norm": 6.57702112197876, "learning_rate": 2.1137532282469176e-08, "loss": 0.9202, "step": 7116 }, { "epoch": 0.9717367558711086, "grad_norm": 5.518051624298096, "learning_rate": 2.0934916959857165e-08, "loss": 0.8707, "step": 7117 }, { "epoch": 0.9718732932823594, "grad_norm": 12.12330436706543, "learning_rate": 2.073327537416292e-08, "loss": 1.0495, "step": 7118 }, { "epoch": 0.97200983069361, "grad_norm": 7.233333587646484, "learning_rate": 2.0532607564821004e-08, "loss": 0.8954, "step": 7119 }, { "epoch": 0.9721463681048608, "grad_norm": 5.546310901641846, "learning_rate": 2.0332913571074476e-08, "loss": 0.8655, "step": 7120 }, { "epoch": 0.9722829055161114, "grad_norm": 5.532134532928467, "learning_rate": 2.013419343197709e-08, "loss": 1.0435, "step": 7121 }, { "epoch": 0.972419442927362, "grad_norm": 6.490372180938721, "learning_rate": 1.9936447186392205e-08, "loss": 0.8012, "step": 7122 }, { "epoch": 0.9725559803386128, "grad_norm": 6.261910438537598, "learning_rate": 1.9739674872991667e-08, "loss": 0.8967, "step": 7123 }, { "epoch": 0.9726925177498634, "grad_norm": 7.768855571746826, "learning_rate": 1.954387653025802e-08, "loss": 0.9422, "step": 7124 }, { "epoch": 0.9728290551611142, "grad_norm": 6.12750768661499, "learning_rate": 1.934905219648231e-08, "loss": 0.9328, "step": 7125 }, { "epoch": 0.9729655925723648, "grad_norm": 5.533084869384766, "learning_rate": 1.915520190976572e-08, "loss": 0.9428, "step": 7126 }, { "epoch": 0.9731021299836156, "grad_norm": 7.5294880867004395, "learning_rate": 1.8962325708019037e-08, "loss": 0.9095, "step": 7127 }, { "epoch": 0.9732386673948662, "grad_norm": 6.070545196533203, "learning_rate": 1.8770423628962642e-08, "loss": 0.8961, "step": 7128 }, { "epoch": 0.9733752048061168, "grad_norm": 6.126369953155518, "learning_rate": 1.857949571012485e-08, "loss": 0.9609, "step": 7129 }, { "epoch": 0.9735117422173676, "grad_norm": 9.891369819641113, "learning_rate": 1.838954198884635e-08, "loss": 1.0448, "step": 7130 }, { "epoch": 0.9736482796286182, "grad_norm": 6.774066925048828, "learning_rate": 1.8200562502274644e-08, "loss": 0.9964, "step": 7131 }, { "epoch": 0.973784817039869, "grad_norm": 7.6779584884643555, "learning_rate": 1.8012557287367394e-08, "loss": 1.001, "step": 7132 }, { "epoch": 0.9739213544511196, "grad_norm": 4.783486366271973, "learning_rate": 1.7825526380892966e-08, "loss": 0.9112, "step": 7133 }, { "epoch": 0.9740578918623702, "grad_norm": 4.942917346954346, "learning_rate": 1.7639469819428214e-08, "loss": 0.9057, "step": 7134 }, { "epoch": 0.974194429273621, "grad_norm": 7.597500324249268, "learning_rate": 1.7454387639359027e-08, "loss": 1.0354, "step": 7135 }, { "epoch": 0.9743309666848716, "grad_norm": 6.776873588562012, "learning_rate": 1.7270279876881457e-08, "loss": 0.8907, "step": 7136 }, { "epoch": 0.9744675040961224, "grad_norm": 9.367130279541016, "learning_rate": 1.7087146568001146e-08, "loss": 0.9904, "step": 7137 }, { "epoch": 0.974604041507373, "grad_norm": 6.38670015335083, "learning_rate": 1.690498774853222e-08, "loss": 1.0375, "step": 7138 }, { "epoch": 0.9747405789186238, "grad_norm": 7.434960842132568, "learning_rate": 1.6723803454098408e-08, "loss": 1.0938, "step": 7139 }, { "epoch": 0.9748771163298744, "grad_norm": 7.367375373840332, "learning_rate": 1.6543593720134142e-08, "loss": 0.9733, "step": 7140 }, { "epoch": 0.975013653741125, "grad_norm": 5.153769016265869, "learning_rate": 1.636435858188179e-08, "loss": 0.7823, "step": 7141 }, { "epoch": 0.9751501911523758, "grad_norm": 7.576878070831299, "learning_rate": 1.618609807439442e-08, "loss": 0.8498, "step": 7142 }, { "epoch": 0.9752867285636264, "grad_norm": 6.077991008758545, "learning_rate": 1.6008812232533034e-08, "loss": 1.0642, "step": 7143 }, { "epoch": 0.9754232659748772, "grad_norm": 7.532351016998291, "learning_rate": 1.5832501090968787e-08, "loss": 0.9845, "step": 7144 }, { "epoch": 0.9755598033861278, "grad_norm": 7.192622661590576, "learning_rate": 1.5657164684182434e-08, "loss": 0.8668, "step": 7145 }, { "epoch": 0.9756963407973784, "grad_norm": 6.7828216552734375, "learning_rate": 1.5482803046463768e-08, "loss": 0.9737, "step": 7146 }, { "epoch": 0.9758328782086292, "grad_norm": 16.202072143554688, "learning_rate": 1.5309416211912177e-08, "loss": 0.9659, "step": 7147 }, { "epoch": 0.9759694156198798, "grad_norm": 6.121826171875, "learning_rate": 1.513700421443609e-08, "loss": 0.884, "step": 7148 }, { "epoch": 0.9761059530311306, "grad_norm": 5.4878249168396, "learning_rate": 1.496556708775354e-08, "loss": 0.9632, "step": 7149 }, { "epoch": 0.9762424904423812, "grad_norm": 6.614801406860352, "learning_rate": 1.4795104865392151e-08, "loss": 0.8936, "step": 7150 }, { "epoch": 0.9763790278536318, "grad_norm": 39.50619125366211, "learning_rate": 1.4625617580688035e-08, "loss": 0.896, "step": 7151 }, { "epoch": 0.9765155652648826, "grad_norm": 7.4291672706604, "learning_rate": 1.4457105266786897e-08, "loss": 0.9713, "step": 7152 }, { "epoch": 0.9766521026761332, "grad_norm": 5.413311004638672, "learning_rate": 1.4289567956645157e-08, "loss": 0.9144, "step": 7153 }, { "epoch": 0.976788640087384, "grad_norm": 12.241065979003906, "learning_rate": 1.4123005683026603e-08, "loss": 0.9209, "step": 7154 }, { "epoch": 0.9769251774986346, "grad_norm": 6.65640926361084, "learning_rate": 1.395741847850518e-08, "loss": 0.9623, "step": 7155 }, { "epoch": 0.9770617149098854, "grad_norm": 5.498414993286133, "learning_rate": 1.379280637546443e-08, "loss": 0.7957, "step": 7156 }, { "epoch": 0.977198252321136, "grad_norm": 5.5066447257995605, "learning_rate": 1.3629169406096932e-08, "loss": 0.9429, "step": 7157 }, { "epoch": 0.9773347897323866, "grad_norm": 6.713356018066406, "learning_rate": 1.3466507602404866e-08, "loss": 0.9487, "step": 7158 }, { "epoch": 0.9774713271436374, "grad_norm": 7.29210090637207, "learning_rate": 1.3304820996198897e-08, "loss": 0.867, "step": 7159 }, { "epoch": 0.977607864554888, "grad_norm": 5.907466888427734, "learning_rate": 1.3144109619099288e-08, "loss": 0.9076, "step": 7160 }, { "epoch": 0.9777444019661388, "grad_norm": 6.526339530944824, "learning_rate": 1.2984373502535896e-08, "loss": 0.9783, "step": 7161 }, { "epoch": 0.9778809393773894, "grad_norm": 6.30711555480957, "learning_rate": 1.2825612677748733e-08, "loss": 1.0241, "step": 7162 }, { "epoch": 0.97801747678864, "grad_norm": 6.536374568939209, "learning_rate": 1.2667827175784631e-08, "loss": 0.8106, "step": 7163 }, { "epoch": 0.9781540141998908, "grad_norm": 6.188304901123047, "learning_rate": 1.2511017027501682e-08, "loss": 0.8775, "step": 7164 }, { "epoch": 0.9782905516111414, "grad_norm": 8.651385307312012, "learning_rate": 1.2355182263566468e-08, "loss": 0.9106, "step": 7165 }, { "epoch": 0.9784270890223922, "grad_norm": 6.185110092163086, "learning_rate": 1.2200322914455165e-08, "loss": 1.04, "step": 7166 }, { "epoch": 0.9785636264336428, "grad_norm": 14.230478286743164, "learning_rate": 1.2046439010453548e-08, "loss": 0.8368, "step": 7167 }, { "epoch": 0.9787001638448936, "grad_norm": 14.128826141357422, "learning_rate": 1.1893530581654767e-08, "loss": 1.0673, "step": 7168 }, { "epoch": 0.9788367012561442, "grad_norm": 7.001812934875488, "learning_rate": 1.1741597657964343e-08, "loss": 0.9766, "step": 7169 }, { "epoch": 0.9789732386673948, "grad_norm": 6.887838840484619, "learning_rate": 1.1590640269093512e-08, "loss": 1.0446, "step": 7170 }, { "epoch": 0.9791097760786456, "grad_norm": 7.583461284637451, "learning_rate": 1.1440658444565322e-08, "loss": 0.9777, "step": 7171 }, { "epoch": 0.9792463134898962, "grad_norm": 4.694572925567627, "learning_rate": 1.1291652213710758e-08, "loss": 0.8587, "step": 7172 }, { "epoch": 0.979382850901147, "grad_norm": 6.613863945007324, "learning_rate": 1.1143621605671507e-08, "loss": 0.9771, "step": 7173 }, { "epoch": 0.9795193883123976, "grad_norm": 9.878159523010254, "learning_rate": 1.0996566649395524e-08, "loss": 1.0382, "step": 7174 }, { "epoch": 0.9796559257236482, "grad_norm": 6.422177314758301, "learning_rate": 1.0850487373643137e-08, "loss": 0.9781, "step": 7175 }, { "epoch": 0.979792463134899, "grad_norm": 5.634557247161865, "learning_rate": 1.0705383806982606e-08, "loss": 0.8142, "step": 7176 }, { "epoch": 0.9799290005461496, "grad_norm": 7.829019069671631, "learning_rate": 1.056125597779012e-08, "loss": 0.9671, "step": 7177 }, { "epoch": 0.9800655379574004, "grad_norm": 11.466585159301758, "learning_rate": 1.0418103914253686e-08, "loss": 0.9128, "step": 7178 }, { "epoch": 0.980202075368651, "grad_norm": 8.804161071777344, "learning_rate": 1.0275927644367578e-08, "loss": 0.8622, "step": 7179 }, { "epoch": 0.9803386127799016, "grad_norm": 8.861454010009766, "learning_rate": 1.0134727195937332e-08, "loss": 0.9168, "step": 7180 }, { "epoch": 0.9804751501911524, "grad_norm": 7.508023262023926, "learning_rate": 9.994502596577527e-09, "loss": 0.9098, "step": 7181 }, { "epoch": 0.980611687602403, "grad_norm": 4.893029689788818, "learning_rate": 9.855253873710668e-09, "loss": 0.9906, "step": 7182 }, { "epoch": 0.9807482250136538, "grad_norm": 7.857944011688232, "learning_rate": 9.716981054568864e-09, "loss": 0.9239, "step": 7183 }, { "epoch": 0.9808847624249044, "grad_norm": 7.856436729431152, "learning_rate": 9.57968416619437e-09, "loss": 0.9799, "step": 7184 }, { "epoch": 0.9810212998361552, "grad_norm": 8.51899242401123, "learning_rate": 9.443363235437374e-09, "loss": 0.9064, "step": 7185 }, { "epoch": 0.9811578372474058, "grad_norm": 6.744991779327393, "learning_rate": 9.30801828895711e-09, "loss": 0.891, "step": 7186 }, { "epoch": 0.9812943746586564, "grad_norm": 5.941058158874512, "learning_rate": 9.173649353224067e-09, "loss": 0.8564, "step": 7187 }, { "epoch": 0.9814309120699072, "grad_norm": 7.202165126800537, "learning_rate": 9.04025645451445e-09, "loss": 1.0253, "step": 7188 }, { "epoch": 0.9815674494811578, "grad_norm": 5.321246147155762, "learning_rate": 8.907839618916836e-09, "loss": 0.8903, "step": 7189 }, { "epoch": 0.9817039868924086, "grad_norm": 6.348518371582031, "learning_rate": 8.77639887232662e-09, "loss": 1.0601, "step": 7190 }, { "epoch": 0.9818405243036592, "grad_norm": 5.908637046813965, "learning_rate": 8.645934240449905e-09, "loss": 0.903, "step": 7191 }, { "epoch": 0.9819770617149098, "grad_norm": 5.264166355133057, "learning_rate": 8.516445748800173e-09, "loss": 0.9622, "step": 7192 }, { "epoch": 0.9821135991261606, "grad_norm": 5.8920369148254395, "learning_rate": 8.38793342270161e-09, "loss": 1.0458, "step": 7193 }, { "epoch": 0.9822501365374112, "grad_norm": 5.802741050720215, "learning_rate": 8.260397287286893e-09, "loss": 0.9994, "step": 7194 }, { "epoch": 0.982386673948662, "grad_norm": 4.950692653656006, "learning_rate": 8.133837367497733e-09, "loss": 0.7885, "step": 7195 }, { "epoch": 0.9825232113599126, "grad_norm": 5.7580342292785645, "learning_rate": 8.008253688084888e-09, "loss": 0.9579, "step": 7196 }, { "epoch": 0.9826597487711632, "grad_norm": 5.691699981689453, "learning_rate": 7.883646273608158e-09, "loss": 0.9129, "step": 7197 }, { "epoch": 0.982796286182414, "grad_norm": 6.596006870269775, "learning_rate": 7.760015148436938e-09, "loss": 0.9364, "step": 7198 }, { "epoch": 0.9829328235936646, "grad_norm": 5.898108005523682, "learning_rate": 7.637360336748556e-09, "loss": 0.8872, "step": 7199 }, { "epoch": 0.9830693610049154, "grad_norm": 8.766241073608398, "learning_rate": 7.515681862531043e-09, "loss": 0.8761, "step": 7200 }, { "epoch": 0.983205898416166, "grad_norm": 5.777924537658691, "learning_rate": 7.394979749580367e-09, "loss": 0.9836, "step": 7201 }, { "epoch": 0.9833424358274168, "grad_norm": 5.049607753753662, "learning_rate": 7.275254021501532e-09, "loss": 0.8086, "step": 7202 }, { "epoch": 0.9834789732386674, "grad_norm": 6.617821216583252, "learning_rate": 7.156504701709144e-09, "loss": 1.0094, "step": 7203 }, { "epoch": 0.983615510649918, "grad_norm": 6.115789890289307, "learning_rate": 7.038731813426292e-09, "loss": 1.0204, "step": 7204 }, { "epoch": 0.9837520480611688, "grad_norm": 5.736260414123535, "learning_rate": 6.9219353796851075e-09, "loss": 0.9983, "step": 7205 }, { "epoch": 0.9838885854724194, "grad_norm": 5.932833671569824, "learning_rate": 6.80611542332732e-09, "loss": 1.0126, "step": 7206 }, { "epoch": 0.9840251228836702, "grad_norm": 5.995199680328369, "learning_rate": 6.691271967004253e-09, "loss": 1.1257, "step": 7207 }, { "epoch": 0.9841616602949208, "grad_norm": 7.616644859313965, "learning_rate": 6.5774050331735e-09, "loss": 1.007, "step": 7208 }, { "epoch": 0.9842981977061714, "grad_norm": 6.107201099395752, "learning_rate": 6.46451464410558e-09, "loss": 0.9155, "step": 7209 }, { "epoch": 0.9844347351174222, "grad_norm": 12.42308521270752, "learning_rate": 6.35260082187672e-09, "loss": 1.121, "step": 7210 }, { "epoch": 0.9845712725286728, "grad_norm": 5.552826881408691, "learning_rate": 6.241663588373858e-09, "loss": 1.1105, "step": 7211 }, { "epoch": 0.9847078099399236, "grad_norm": 6.482144832611084, "learning_rate": 6.1317029652929734e-09, "loss": 0.7762, "step": 7212 }, { "epoch": 0.9848443473511742, "grad_norm": 7.587658405303955, "learning_rate": 6.022718974137976e-09, "loss": 0.8974, "step": 7213 }, { "epoch": 0.984980884762425, "grad_norm": 6.682025909423828, "learning_rate": 5.914711636222925e-09, "loss": 0.9238, "step": 7214 }, { "epoch": 0.9851174221736756, "grad_norm": 6.970547676086426, "learning_rate": 5.807680972670371e-09, "loss": 0.8632, "step": 7215 }, { "epoch": 0.9852539595849262, "grad_norm": 5.788361549377441, "learning_rate": 5.701627004411347e-09, "loss": 0.8643, "step": 7216 }, { "epoch": 0.985390496996177, "grad_norm": 7.1819562911987305, "learning_rate": 5.5965497521870415e-09, "loss": 0.961, "step": 7217 }, { "epoch": 0.9855270344074276, "grad_norm": 5.4894022941589355, "learning_rate": 5.492449236547126e-09, "loss": 1.0175, "step": 7218 }, { "epoch": 0.9856635718186784, "grad_norm": 6.254721641540527, "learning_rate": 5.389325477849761e-09, "loss": 0.9864, "step": 7219 }, { "epoch": 0.985800109229929, "grad_norm": 6.470337390899658, "learning_rate": 5.2871784962627015e-09, "loss": 1.0473, "step": 7220 }, { "epoch": 0.9859366466411796, "grad_norm": 5.940365791320801, "learning_rate": 5.186008311761637e-09, "loss": 0.9901, "step": 7221 }, { "epoch": 0.9860731840524304, "grad_norm": 5.0772905349731445, "learning_rate": 5.085814944132961e-09, "loss": 0.9123, "step": 7222 }, { "epoch": 0.986209721463681, "grad_norm": 5.1766886711120605, "learning_rate": 4.986598412971e-09, "loss": 0.8845, "step": 7223 }, { "epoch": 0.9863462588749318, "grad_norm": 6.119506359100342, "learning_rate": 4.888358737679122e-09, "loss": 1.0055, "step": 7224 }, { "epoch": 0.9864827962861824, "grad_norm": 7.3483967781066895, "learning_rate": 4.791095937469736e-09, "loss": 0.94, "step": 7225 }, { "epoch": 0.986619333697433, "grad_norm": 4.72857666015625, "learning_rate": 4.694810031363739e-09, "loss": 0.8797, "step": 7226 }, { "epoch": 0.9867558711086838, "grad_norm": 6.129341125488281, "learning_rate": 4.5995010381916225e-09, "loss": 1.0102, "step": 7227 }, { "epoch": 0.9868924085199344, "grad_norm": 6.322046756744385, "learning_rate": 4.505168976592922e-09, "loss": 0.9645, "step": 7228 }, { "epoch": 0.9870289459311852, "grad_norm": 5.853535175323486, "learning_rate": 4.411813865015102e-09, "loss": 0.8793, "step": 7229 }, { "epoch": 0.9871654833424358, "grad_norm": 6.102413177490234, "learning_rate": 4.319435721715781e-09, "loss": 0.8107, "step": 7230 }, { "epoch": 0.9873020207536866, "grad_norm": 5.917200565338135, "learning_rate": 4.228034564761063e-09, "loss": 0.9384, "step": 7231 }, { "epoch": 0.9874385581649372, "grad_norm": 8.106110572814941, "learning_rate": 4.137610412025539e-09, "loss": 1.0321, "step": 7232 }, { "epoch": 0.9875750955761878, "grad_norm": 6.360005855560303, "learning_rate": 4.048163281193951e-09, "loss": 0.9464, "step": 7233 }, { "epoch": 0.9877116329874386, "grad_norm": 5.929665565490723, "learning_rate": 3.959693189757863e-09, "loss": 1.0284, "step": 7234 }, { "epoch": 0.9878481703986892, "grad_norm": 5.462950706481934, "learning_rate": 3.8722001550201e-09, "loss": 0.9347, "step": 7235 }, { "epoch": 0.98798470780994, "grad_norm": 6.4453043937683105, "learning_rate": 3.785684194090866e-09, "loss": 0.8728, "step": 7236 }, { "epoch": 0.9881212452211906, "grad_norm": 8.26285457611084, "learning_rate": 3.700145323889959e-09, "loss": 0.9938, "step": 7237 }, { "epoch": 0.9882577826324412, "grad_norm": 6.462268352508545, "learning_rate": 3.6155835611456635e-09, "loss": 0.9239, "step": 7238 }, { "epoch": 0.988394320043692, "grad_norm": 6.84260368347168, "learning_rate": 3.531998922395308e-09, "loss": 0.9631, "step": 7239 }, { "epoch": 0.9885308574549426, "grad_norm": 5.1418232917785645, "learning_rate": 3.4493914239858152e-09, "loss": 1.0617, "step": 7240 }, { "epoch": 0.9886673948661934, "grad_norm": 6.918455123901367, "learning_rate": 3.367761082072041e-09, "loss": 1.0411, "step": 7241 }, { "epoch": 0.988803932277444, "grad_norm": 10.931583404541016, "learning_rate": 3.2871079126178817e-09, "loss": 0.924, "step": 7242 }, { "epoch": 0.9889404696886948, "grad_norm": 6.751733779907227, "learning_rate": 3.2074319313968314e-09, "loss": 1.0479, "step": 7243 }, { "epoch": 0.9890770070999454, "grad_norm": 6.9778666496276855, "learning_rate": 3.1287331539903155e-09, "loss": 0.8323, "step": 7244 }, { "epoch": 0.989213544511196, "grad_norm": 6.345627784729004, "learning_rate": 3.05101159578991e-09, "loss": 0.8725, "step": 7245 }, { "epoch": 0.9893500819224468, "grad_norm": 11.253364562988281, "learning_rate": 2.9742672719940135e-09, "loss": 0.9443, "step": 7246 }, { "epoch": 0.9894866193336974, "grad_norm": 9.266310691833496, "learning_rate": 2.8985001976128415e-09, "loss": 0.8385, "step": 7247 }, { "epoch": 0.9896231567449482, "grad_norm": 7.584554672241211, "learning_rate": 2.8237103874628748e-09, "loss": 0.9423, "step": 7248 }, { "epoch": 0.9897596941561988, "grad_norm": 6.36651611328125, "learning_rate": 2.7498978561707466e-09, "loss": 0.9033, "step": 7249 }, { "epoch": 0.9898962315674494, "grad_norm": 7.6826252937316895, "learning_rate": 2.6770626181715776e-09, "loss": 1.0898, "step": 7250 }, { "epoch": 0.9900327689787002, "grad_norm": 9.285856246948242, "learning_rate": 2.6052046877100833e-09, "loss": 0.7928, "step": 7251 }, { "epoch": 0.9901693063899508, "grad_norm": 5.817430019378662, "learning_rate": 2.534324078837802e-09, "loss": 0.8109, "step": 7252 }, { "epoch": 0.9903058438012016, "grad_norm": 6.2283172607421875, "learning_rate": 2.4644208054180883e-09, "loss": 0.9904, "step": 7253 }, { "epoch": 0.9904423812124522, "grad_norm": 8.135821342468262, "learning_rate": 2.3954948811211186e-09, "loss": 1.0102, "step": 7254 }, { "epoch": 0.9905789186237028, "grad_norm": 6.097606658935547, "learning_rate": 2.3275463194261104e-09, "loss": 0.8308, "step": 7255 }, { "epoch": 0.9907154560349536, "grad_norm": 5.6423821449279785, "learning_rate": 2.2605751336224337e-09, "loss": 1.0598, "step": 7256 }, { "epoch": 0.9908519934462042, "grad_norm": 5.425329685211182, "learning_rate": 2.1945813368062787e-09, "loss": 0.9301, "step": 7257 }, { "epoch": 0.990988530857455, "grad_norm": 6.715761661529541, "learning_rate": 2.129564941883988e-09, "loss": 0.9103, "step": 7258 }, { "epoch": 0.9911250682687056, "grad_norm": 5.843687534332275, "learning_rate": 2.065525961571502e-09, "loss": 0.7379, "step": 7259 }, { "epoch": 0.9912616056799564, "grad_norm": 6.84116268157959, "learning_rate": 2.002464408392135e-09, "loss": 1.0586, "step": 7260 }, { "epoch": 0.991398143091207, "grad_norm": 5.968358993530273, "learning_rate": 1.9403802946776908e-09, "loss": 0.812, "step": 7261 }, { "epoch": 0.9915346805024576, "grad_norm": 6.928186893463135, "learning_rate": 1.8792736325712323e-09, "loss": 0.8539, "step": 7262 }, { "epoch": 0.9916712179137084, "grad_norm": 6.648662567138672, "learning_rate": 1.819144434022646e-09, "loss": 0.9575, "step": 7263 }, { "epoch": 0.991807755324959, "grad_norm": 7.282908916473389, "learning_rate": 1.7599927107908588e-09, "loss": 0.8919, "step": 7264 }, { "epoch": 0.9919442927362098, "grad_norm": 5.227563381195068, "learning_rate": 1.7018184744438393e-09, "loss": 0.8608, "step": 7265 }, { "epoch": 0.9920808301474604, "grad_norm": 5.581913948059082, "learning_rate": 1.6446217363591533e-09, "loss": 0.9859, "step": 7266 }, { "epoch": 0.992217367558711, "grad_norm": 6.79208517074585, "learning_rate": 1.5884025077217424e-09, "loss": 1.0234, "step": 7267 }, { "epoch": 0.9923539049699618, "grad_norm": 5.785965919494629, "learning_rate": 1.5331607995267006e-09, "loss": 0.9727, "step": 7268 }, { "epoch": 0.9924904423812124, "grad_norm": 6.56870174407959, "learning_rate": 1.478896622577608e-09, "loss": 0.8332, "step": 7269 }, { "epoch": 0.9926269797924632, "grad_norm": 6.309728145599365, "learning_rate": 1.4256099874865314e-09, "loss": 0.7648, "step": 7270 }, { "epoch": 0.9927635172037138, "grad_norm": 4.822063446044922, "learning_rate": 1.3733009046740243e-09, "loss": 0.7884, "step": 7271 }, { "epoch": 0.9929000546149646, "grad_norm": 6.725471496582031, "learning_rate": 1.3219693843707914e-09, "loss": 1.0288, "step": 7272 }, { "epoch": 0.9930365920262152, "grad_norm": 7.263617992401123, "learning_rate": 1.2716154366149147e-09, "loss": 1.032, "step": 7273 }, { "epoch": 0.9931731294374658, "grad_norm": 5.857482433319092, "learning_rate": 1.2222390712546273e-09, "loss": 0.9379, "step": 7274 }, { "epoch": 0.9933096668487166, "grad_norm": 6.04392147064209, "learning_rate": 1.1738402979455388e-09, "loss": 0.967, "step": 7275 }, { "epoch": 0.9934462042599672, "grad_norm": 6.593574523925781, "learning_rate": 1.1264191261528557e-09, "loss": 0.9332, "step": 7276 }, { "epoch": 0.993582741671218, "grad_norm": 6.8769145011901855, "learning_rate": 1.0799755651508259e-09, "loss": 0.9313, "step": 7277 }, { "epoch": 0.9937192790824686, "grad_norm": 7.5863213539123535, "learning_rate": 1.0345096240227393e-09, "loss": 1.0694, "step": 7278 }, { "epoch": 0.9938558164937192, "grad_norm": 8.365434646606445, "learning_rate": 9.900213116592616e-10, "loss": 1.0026, "step": 7279 }, { "epoch": 0.99399235390497, "grad_norm": 5.129082679748535, "learning_rate": 9.46510636761211e-10, "loss": 0.8782, "step": 7280 }, { "epoch": 0.9941288913162206, "grad_norm": 6.67662239074707, "learning_rate": 9.039776078378914e-10, "loss": 0.9779, "step": 7281 }, { "epoch": 0.9942654287274714, "grad_norm": 15.16599178314209, "learning_rate": 8.624222332070941e-10, "loss": 0.9062, "step": 7282 }, { "epoch": 0.994401966138722, "grad_norm": 6.967362880706787, "learning_rate": 8.218445209962067e-10, "loss": 0.8218, "step": 7283 }, { "epoch": 0.9945385035499726, "grad_norm": 6.516237735748291, "learning_rate": 7.82244479139993e-10, "loss": 0.8484, "step": 7284 }, { "epoch": 0.9946750409612234, "grad_norm": 11.855177879333496, "learning_rate": 7.436221153833689e-10, "loss": 1.0765, "step": 7285 }, { "epoch": 0.994811578372474, "grad_norm": 5.9910569190979, "learning_rate": 7.059774372797368e-10, "loss": 0.9753, "step": 7286 }, { "epoch": 0.9949481157837248, "grad_norm": 5.387778282165527, "learning_rate": 6.693104521909854e-10, "loss": 1.0262, "step": 7287 }, { "epoch": 0.9950846531949754, "grad_norm": 8.386541366577148, "learning_rate": 6.336211672880454e-10, "loss": 1.0248, "step": 7288 }, { "epoch": 0.9952211906062262, "grad_norm": 5.6666178703308105, "learning_rate": 5.989095895497787e-10, "loss": 0.8433, "step": 7289 }, { "epoch": 0.9953577280174768, "grad_norm": 6.136352062225342, "learning_rate": 5.651757257657542e-10, "loss": 1.0001, "step": 7290 }, { "epoch": 0.9954942654287274, "grad_norm": 4.869958400726318, "learning_rate": 5.32419582532362e-10, "loss": 0.9905, "step": 7291 }, { "epoch": 0.9956308028399782, "grad_norm": 6.3737406730651855, "learning_rate": 5.006411662555888e-10, "loss": 0.9611, "step": 7292 }, { "epoch": 0.9957673402512288, "grad_norm": 7.4499077796936035, "learning_rate": 4.698404831510184e-10, "loss": 0.8743, "step": 7293 }, { "epoch": 0.9959038776624796, "grad_norm": 6.468605995178223, "learning_rate": 4.400175392410555e-10, "loss": 0.8971, "step": 7294 }, { "epoch": 0.9960404150737302, "grad_norm": 8.152840614318848, "learning_rate": 4.1117234035936703e-10, "loss": 0.8053, "step": 7295 }, { "epoch": 0.9961769524849808, "grad_norm": 5.161272048950195, "learning_rate": 3.8330489214588593e-10, "loss": 0.8723, "step": 7296 }, { "epoch": 0.9963134898962316, "grad_norm": 7.7617621421813965, "learning_rate": 3.564152000512522e-10, "loss": 0.8656, "step": 7297 }, { "epoch": 0.9964500273074822, "grad_norm": 6.570938587188721, "learning_rate": 3.305032693340371e-10, "loss": 1.0316, "step": 7298 }, { "epoch": 0.996586564718733, "grad_norm": 6.940401077270508, "learning_rate": 3.0556910506129853e-10, "loss": 0.8875, "step": 7299 }, { "epoch": 0.9967231021299836, "grad_norm": 8.176926612854004, "learning_rate": 2.816127121102463e-10, "loss": 1.0223, "step": 7300 }, { "epoch": 0.9968596395412342, "grad_norm": 6.091250419616699, "learning_rate": 2.5863409516491134e-10, "loss": 0.9041, "step": 7301 }, { "epoch": 0.996996176952485, "grad_norm": 5.422081470489502, "learning_rate": 2.366332587200315e-10, "loss": 0.979, "step": 7302 }, { "epoch": 0.9971327143637356, "grad_norm": 5.517271995544434, "learning_rate": 2.1561020707772108e-10, "loss": 0.9023, "step": 7303 }, { "epoch": 0.9972692517749864, "grad_norm": 5.9540696144104, "learning_rate": 1.9556494434969097e-10, "loss": 1.0419, "step": 7304 }, { "epoch": 0.997405789186237, "grad_norm": 5.710447788238525, "learning_rate": 1.7649747445558362e-10, "loss": 0.8964, "step": 7305 }, { "epoch": 0.9975423265974878, "grad_norm": 5.022430419921875, "learning_rate": 1.5840780112519328e-10, "loss": 0.8323, "step": 7306 }, { "epoch": 0.9976788640087384, "grad_norm": 14.705644607543945, "learning_rate": 1.4129592789513536e-10, "loss": 0.8434, "step": 7307 }, { "epoch": 0.997815401419989, "grad_norm": 4.8128581047058105, "learning_rate": 1.251618581127323e-10, "loss": 0.9814, "step": 7308 }, { "epoch": 0.9979519388312398, "grad_norm": 11.16581916809082, "learning_rate": 1.1000559493323793e-10, "loss": 0.9434, "step": 7309 }, { "epoch": 0.9980884762424904, "grad_norm": 6.007442951202393, "learning_rate": 9.582714132039261e-11, "loss": 0.7967, "step": 7310 }, { "epoch": 0.9982250136537412, "grad_norm": 6.210345268249512, "learning_rate": 8.262650004753348e-11, "loss": 1.0851, "step": 7311 }, { "epoch": 0.9983615510649918, "grad_norm": 6.191189289093018, "learning_rate": 7.040367369537394e-11, "loss": 0.9016, "step": 7312 }, { "epoch": 0.9984980884762424, "grad_norm": 5.836175918579102, "learning_rate": 5.915866465477926e-11, "loss": 0.9773, "step": 7313 }, { "epoch": 0.9986346258874932, "grad_norm": 5.8090338706970215, "learning_rate": 4.889147512510129e-11, "loss": 0.8106, "step": 7314 }, { "epoch": 0.9987711632987438, "grad_norm": 8.648483276367188, "learning_rate": 3.960210711417834e-11, "loss": 0.9441, "step": 7315 }, { "epoch": 0.9989077007099946, "grad_norm": 8.439908981323242, "learning_rate": 3.129056243833528e-11, "loss": 0.9219, "step": 7316 }, { "epoch": 0.9990442381212452, "grad_norm": 7.683258056640625, "learning_rate": 2.3956842722938633e-11, "loss": 1.0567, "step": 7317 }, { "epoch": 0.999180775532496, "grad_norm": 7.236093997955322, "learning_rate": 1.760094940295165e-11, "loss": 0.9296, "step": 7318 }, { "epoch": 0.9993173129437466, "grad_norm": 5.005321025848389, "learning_rate": 1.2222883721269007e-11, "loss": 0.8793, "step": 7319 }, { "epoch": 0.9994538503549972, "grad_norm": 6.108964443206787, "learning_rate": 7.822646728716798e-12, "loss": 0.9472, "step": 7320 }, { "epoch": 0.999590387766248, "grad_norm": 5.024413585662842, "learning_rate": 4.40023928682809e-12, "loss": 1.1011, "step": 7321 }, { "epoch": 0.9997269251774986, "grad_norm": 6.2433576583862305, "learning_rate": 1.955662064512254e-12, "loss": 0.8759, "step": 7322 }, { "epoch": 0.9998634625887494, "grad_norm": 4.921255588531494, "learning_rate": 4.889155402754143e-13, "loss": 0.8851, "step": 7323 }, { "epoch": 1.0, "grad_norm": 5.667227745056152, "learning_rate": 0.0, "loss": 1.0258, "step": 7324 }, { "epoch": 1.0, "step": 7324, "total_flos": 1.7519533423192965e+18, "train_loss": 0.9869708416648799, "train_runtime": 13159.8051, "train_samples_per_second": 35.617, "train_steps_per_second": 0.557 } ], "logging_steps": 1.0, "max_steps": 7324, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 200, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 1.7519533423192965e+18, "train_batch_size": 2, "trial_name": null, "trial_params": null }