{ "best_global_step": 33814, "best_metric": 0.7482216106755283, "best_model_checkpoint": "output/QA-ModernBERT-large/checkpoint-33814", "epoch": 4.0, "eval_steps": 500, "global_step": 67628, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0005914709883480216, "grad_norm": 8.802539825439453, "learning_rate": 5.322294500295683e-08, "loss": 0.8356, "step": 10 }, { "epoch": 0.0011829419766960431, "grad_norm": 7.218952178955078, "learning_rate": 1.1235955056179776e-07, "loss": 0.8052, "step": 20 }, { "epoch": 0.0017744129650440646, "grad_norm": 5.774627208709717, "learning_rate": 1.714961561206387e-07, "loss": 0.7543, "step": 30 }, { "epoch": 0.0023658839533920862, "grad_norm": 5.247931480407715, "learning_rate": 2.3063276167947962e-07, "loss": 0.7271, "step": 40 }, { "epoch": 0.0029573549417401075, "grad_norm": 4.746769905090332, "learning_rate": 2.897693672383205e-07, "loss": 0.6972, "step": 50 }, { "epoch": 0.003548825930088129, "grad_norm": 6.610109329223633, "learning_rate": 3.4890597279716147e-07, "loss": 0.6421, "step": 60 }, { "epoch": 0.004140296918436151, "grad_norm": 4.935207366943359, "learning_rate": 4.0804257835600236e-07, "loss": 0.5885, "step": 70 }, { "epoch": 0.0047317679067841725, "grad_norm": 4.761286735534668, "learning_rate": 4.6717918391484326e-07, "loss": 0.5666, "step": 80 }, { "epoch": 0.005323238895132194, "grad_norm": 4.488841533660889, "learning_rate": 5.263157894736842e-07, "loss": 0.5388, "step": 90 }, { "epoch": 0.005914709883480215, "grad_norm": 3.960728406906128, "learning_rate": 5.854523950325252e-07, "loss": 0.5154, "step": 100 }, { "epoch": 0.006506180871828237, "grad_norm": 1.8189759254455566, "learning_rate": 6.445890005913661e-07, "loss": 0.2912, "step": 110 }, { "epoch": 0.007097651860176258, "grad_norm": 3.722106456756592, "learning_rate": 7.03725606150207e-07, "loss": 0.3218, "step": 120 }, { "epoch": 0.00768912284852428, "grad_norm": 2.9181010723114014, "learning_rate": 7.62862211709048e-07, "loss": 0.3289, "step": 130 }, { "epoch": 0.008280593836872302, "grad_norm": 2.398226499557495, "learning_rate": 8.219988172678889e-07, "loss": 0.2943, "step": 140 }, { "epoch": 0.008872064825220323, "grad_norm": 3.217151641845703, "learning_rate": 8.811354228267298e-07, "loss": 0.29, "step": 150 }, { "epoch": 0.009463535813568345, "grad_norm": 46.61650848388672, "learning_rate": 9.402720283855708e-07, "loss": 0.2428, "step": 160 }, { "epoch": 0.010055006801916367, "grad_norm": 2.776388645172119, "learning_rate": 9.994086339444114e-07, "loss": 0.2395, "step": 170 }, { "epoch": 0.010646477790264388, "grad_norm": 1.4118751287460327, "learning_rate": 1.0585452395032526e-06, "loss": 0.218, "step": 180 }, { "epoch": 0.011237948778612408, "grad_norm": 1.618638277053833, "learning_rate": 1.1176818450620935e-06, "loss": 0.2113, "step": 190 }, { "epoch": 0.01182941976696043, "grad_norm": 2.5771474838256836, "learning_rate": 1.1768184506209344e-06, "loss": 0.2024, "step": 200 }, { "epoch": 0.012420890755308452, "grad_norm": 1.8539259433746338, "learning_rate": 1.2359550561797752e-06, "loss": 0.2275, "step": 210 }, { "epoch": 0.013012361743656473, "grad_norm": 4.5911946296691895, "learning_rate": 1.2950916617386161e-06, "loss": 0.2068, "step": 220 }, { "epoch": 0.013603832732004495, "grad_norm": 1.4101959466934204, "learning_rate": 1.354228267297457e-06, "loss": 0.1894, "step": 230 }, { "epoch": 0.014195303720352517, "grad_norm": 1.2502838373184204, "learning_rate": 1.4133648728562982e-06, "loss": 0.1877, "step": 240 }, { "epoch": 0.014786774708700538, "grad_norm": 2.126499652862549, "learning_rate": 1.472501478415139e-06, "loss": 0.1799, "step": 250 }, { "epoch": 0.01537824569704856, "grad_norm": 2.89096999168396, "learning_rate": 1.53163808397398e-06, "loss": 0.2218, "step": 260 }, { "epoch": 0.01596971668539658, "grad_norm": 1.4436651468276978, "learning_rate": 1.5907746895328208e-06, "loss": 0.2102, "step": 270 }, { "epoch": 0.016561187673744603, "grad_norm": 1.9941529035568237, "learning_rate": 1.6499112950916617e-06, "loss": 0.1977, "step": 280 }, { "epoch": 0.017152658662092625, "grad_norm": 1.4458268880844116, "learning_rate": 1.7090479006505029e-06, "loss": 0.1766, "step": 290 }, { "epoch": 0.017744129650440647, "grad_norm": 2.2254538536071777, "learning_rate": 1.7681845062093438e-06, "loss": 0.1795, "step": 300 }, { "epoch": 0.01833560063878867, "grad_norm": 1.4980852603912354, "learning_rate": 1.8273211117681844e-06, "loss": 0.215, "step": 310 }, { "epoch": 0.01892707162713669, "grad_norm": 1.8602489233016968, "learning_rate": 1.8864577173270255e-06, "loss": 0.1895, "step": 320 }, { "epoch": 0.01951854261548471, "grad_norm": 0.8940849304199219, "learning_rate": 1.945594322885866e-06, "loss": 0.1593, "step": 330 }, { "epoch": 0.020110013603832733, "grad_norm": 1.2008174657821655, "learning_rate": 2.0047309284447076e-06, "loss": 0.1629, "step": 340 }, { "epoch": 0.020701484592180755, "grad_norm": 2.7451987266540527, "learning_rate": 2.0638675340035482e-06, "loss": 0.1581, "step": 350 }, { "epoch": 0.021292955580528777, "grad_norm": 2.5634405612945557, "learning_rate": 2.1230041395623893e-06, "loss": 0.2193, "step": 360 }, { "epoch": 0.0218844265688768, "grad_norm": 1.398598313331604, "learning_rate": 2.18214074512123e-06, "loss": 0.1776, "step": 370 }, { "epoch": 0.022475897557224817, "grad_norm": 1.6595708131790161, "learning_rate": 2.2412773506800707e-06, "loss": 0.1821, "step": 380 }, { "epoch": 0.023067368545572838, "grad_norm": 1.2330938577651978, "learning_rate": 2.300413956238912e-06, "loss": 0.1503, "step": 390 }, { "epoch": 0.02365883953392086, "grad_norm": 2.1451687812805176, "learning_rate": 2.359550561797753e-06, "loss": 0.1408, "step": 400 }, { "epoch": 0.02425031052226888, "grad_norm": 1.9059118032455444, "learning_rate": 2.418687167356594e-06, "loss": 0.1939, "step": 410 }, { "epoch": 0.024841781510616903, "grad_norm": 1.3768647909164429, "learning_rate": 2.4778237729154347e-06, "loss": 0.1905, "step": 420 }, { "epoch": 0.025433252498964925, "grad_norm": 1.5470619201660156, "learning_rate": 2.5369603784742754e-06, "loss": 0.1652, "step": 430 }, { "epoch": 0.026024723487312947, "grad_norm": 1.4683308601379395, "learning_rate": 2.5960969840331165e-06, "loss": 0.1616, "step": 440 }, { "epoch": 0.026616194475660968, "grad_norm": 1.5188425779342651, "learning_rate": 2.6552335895919576e-06, "loss": 0.1302, "step": 450 }, { "epoch": 0.02720766546400899, "grad_norm": 1.3792104721069336, "learning_rate": 2.7143701951507983e-06, "loss": 0.1872, "step": 460 }, { "epoch": 0.02779913645235701, "grad_norm": 1.243992567062378, "learning_rate": 2.7735068007096394e-06, "loss": 0.1687, "step": 470 }, { "epoch": 0.028390607440705033, "grad_norm": 1.3309744596481323, "learning_rate": 2.83264340626848e-06, "loss": 0.1755, "step": 480 }, { "epoch": 0.028982078429053055, "grad_norm": 1.4267258644104004, "learning_rate": 2.8917800118273212e-06, "loss": 0.1443, "step": 490 }, { "epoch": 0.029573549417401077, "grad_norm": 1.9535112380981445, "learning_rate": 2.950916617386162e-06, "loss": 0.1281, "step": 500 }, { "epoch": 0.030165020405749098, "grad_norm": 1.9775069952011108, "learning_rate": 3.010053222945003e-06, "loss": 0.2034, "step": 510 }, { "epoch": 0.03075649139409712, "grad_norm": 1.5109907388687134, "learning_rate": 3.069189828503844e-06, "loss": 0.1814, "step": 520 }, { "epoch": 0.03134796238244514, "grad_norm": 1.4241763353347778, "learning_rate": 3.128326434062685e-06, "loss": 0.1556, "step": 530 }, { "epoch": 0.03193943337079316, "grad_norm": 1.1549763679504395, "learning_rate": 3.187463039621526e-06, "loss": 0.143, "step": 540 }, { "epoch": 0.03253090435914118, "grad_norm": 1.5027859210968018, "learning_rate": 3.2465996451803666e-06, "loss": 0.1156, "step": 550 }, { "epoch": 0.03312237534748921, "grad_norm": 1.9123948812484741, "learning_rate": 3.3057362507392073e-06, "loss": 0.1871, "step": 560 }, { "epoch": 0.033713846335837225, "grad_norm": 1.3411476612091064, "learning_rate": 3.364872856298049e-06, "loss": 0.1681, "step": 570 }, { "epoch": 0.03430531732418525, "grad_norm": 1.4057272672653198, "learning_rate": 3.4240094618568895e-06, "loss": 0.1406, "step": 580 }, { "epoch": 0.03489678831253327, "grad_norm": 1.5951554775238037, "learning_rate": 3.4831460674157306e-06, "loss": 0.1374, "step": 590 }, { "epoch": 0.03548825930088129, "grad_norm": 1.9347423315048218, "learning_rate": 3.5422826729745713e-06, "loss": 0.1136, "step": 600 }, { "epoch": 0.03607973028922931, "grad_norm": 1.749595284461975, "learning_rate": 3.601419278533412e-06, "loss": 0.1935, "step": 610 }, { "epoch": 0.03667120127757734, "grad_norm": 1.5630292892456055, "learning_rate": 3.660555884092253e-06, "loss": 0.1738, "step": 620 }, { "epoch": 0.037262672265925355, "grad_norm": 2.172639846801758, "learning_rate": 3.7196924896510942e-06, "loss": 0.1428, "step": 630 }, { "epoch": 0.03785414325427338, "grad_norm": 1.9153077602386475, "learning_rate": 3.7788290952099353e-06, "loss": 0.1337, "step": 640 }, { "epoch": 0.0384456142426214, "grad_norm": 1.5925862789154053, "learning_rate": 3.837965700768776e-06, "loss": 0.114, "step": 650 }, { "epoch": 0.03903708523096942, "grad_norm": 1.748759150505066, "learning_rate": 3.897102306327617e-06, "loss": 0.1855, "step": 660 }, { "epoch": 0.03962855621931744, "grad_norm": 2.2314038276672363, "learning_rate": 3.956238911886458e-06, "loss": 0.17, "step": 670 }, { "epoch": 0.04022002720766547, "grad_norm": 1.2750163078308105, "learning_rate": 4.015375517445299e-06, "loss": 0.1384, "step": 680 }, { "epoch": 0.040811498196013485, "grad_norm": 1.7319550514221191, "learning_rate": 4.07451212300414e-06, "loss": 0.1138, "step": 690 }, { "epoch": 0.04140296918436151, "grad_norm": 1.5106467008590698, "learning_rate": 4.133648728562981e-06, "loss": 0.1272, "step": 700 }, { "epoch": 0.04199444017270953, "grad_norm": 5.195685386657715, "learning_rate": 4.192785334121821e-06, "loss": 0.1881, "step": 710 }, { "epoch": 0.04258591116105755, "grad_norm": 2.518991470336914, "learning_rate": 4.251921939680662e-06, "loss": 0.1562, "step": 720 }, { "epoch": 0.04317738214940557, "grad_norm": 1.7233519554138184, "learning_rate": 4.311058545239504e-06, "loss": 0.1435, "step": 730 }, { "epoch": 0.0437688531377536, "grad_norm": 1.7967174053192139, "learning_rate": 4.370195150798344e-06, "loss": 0.1355, "step": 740 }, { "epoch": 0.044360324126101615, "grad_norm": 1.0948567390441895, "learning_rate": 4.429331756357185e-06, "loss": 0.1194, "step": 750 }, { "epoch": 0.04495179511444963, "grad_norm": 1.9314794540405273, "learning_rate": 4.488468361916026e-06, "loss": 0.2057, "step": 760 }, { "epoch": 0.04554326610279766, "grad_norm": 1.3082349300384521, "learning_rate": 4.547604967474867e-06, "loss": 0.1465, "step": 770 }, { "epoch": 0.046134737091145676, "grad_norm": 1.0615453720092773, "learning_rate": 4.606741573033708e-06, "loss": 0.1405, "step": 780 }, { "epoch": 0.0467262080794937, "grad_norm": 1.120788812637329, "learning_rate": 4.6658781785925494e-06, "loss": 0.1297, "step": 790 }, { "epoch": 0.04731767906784172, "grad_norm": 1.4794665575027466, "learning_rate": 4.72501478415139e-06, "loss": 0.1187, "step": 800 }, { "epoch": 0.047909150056189745, "grad_norm": 1.3880072832107544, "learning_rate": 4.784151389710231e-06, "loss": 0.168, "step": 810 }, { "epoch": 0.04850062104453776, "grad_norm": 1.3812446594238281, "learning_rate": 4.8432879952690715e-06, "loss": 0.1463, "step": 820 }, { "epoch": 0.04909209203288579, "grad_norm": 1.1149697303771973, "learning_rate": 4.902424600827912e-06, "loss": 0.1314, "step": 830 }, { "epoch": 0.049683563021233806, "grad_norm": 0.9738759994506836, "learning_rate": 4.961561206386754e-06, "loss": 0.1282, "step": 840 }, { "epoch": 0.05027503400958183, "grad_norm": 1.2358518838882446, "learning_rate": 5.020697811945594e-06, "loss": 0.1121, "step": 850 }, { "epoch": 0.05086650499792985, "grad_norm": 1.2813243865966797, "learning_rate": 5.079834417504435e-06, "loss": 0.1586, "step": 860 }, { "epoch": 0.051457975986277875, "grad_norm": 2.145686388015747, "learning_rate": 5.138971023063276e-06, "loss": 0.1368, "step": 870 }, { "epoch": 0.05204944697462589, "grad_norm": 1.460083246231079, "learning_rate": 5.1981076286221165e-06, "loss": 0.1361, "step": 880 }, { "epoch": 0.05264091796297392, "grad_norm": 2.160054922103882, "learning_rate": 5.257244234180959e-06, "loss": 0.1297, "step": 890 }, { "epoch": 0.053232388951321936, "grad_norm": 4.355591297149658, "learning_rate": 5.3163808397397995e-06, "loss": 0.1147, "step": 900 }, { "epoch": 0.05382385993966996, "grad_norm": 1.9197818040847778, "learning_rate": 5.37551744529864e-06, "loss": 0.1704, "step": 910 }, { "epoch": 0.05441533092801798, "grad_norm": 1.8332459926605225, "learning_rate": 5.434654050857481e-06, "loss": 0.1355, "step": 920 }, { "epoch": 0.055006801916366005, "grad_norm": 1.043602466583252, "learning_rate": 5.493790656416322e-06, "loss": 0.1238, "step": 930 }, { "epoch": 0.05559827290471402, "grad_norm": 1.3634600639343262, "learning_rate": 5.552927261975163e-06, "loss": 0.1217, "step": 940 }, { "epoch": 0.05618974389306205, "grad_norm": 1.267054796218872, "learning_rate": 5.612063867534004e-06, "loss": 0.1088, "step": 950 }, { "epoch": 0.056781214881410066, "grad_norm": 1.1846832036972046, "learning_rate": 5.6712004730928445e-06, "loss": 0.1608, "step": 960 }, { "epoch": 0.05737268586975809, "grad_norm": 1.5894241333007812, "learning_rate": 5.730337078651685e-06, "loss": 0.133, "step": 970 }, { "epoch": 0.05796415685810611, "grad_norm": 8.251991271972656, "learning_rate": 5.789473684210526e-06, "loss": 0.1207, "step": 980 }, { "epoch": 0.05855562784645413, "grad_norm": 1.1410938501358032, "learning_rate": 5.848610289769367e-06, "loss": 0.1091, "step": 990 }, { "epoch": 0.05914709883480215, "grad_norm": 1.1493747234344482, "learning_rate": 5.907746895328209e-06, "loss": 0.0968, "step": 1000 }, { "epoch": 0.05973856982315017, "grad_norm": 2.52860951423645, "learning_rate": 5.96688350088705e-06, "loss": 0.154, "step": 1010 }, { "epoch": 0.060330040811498196, "grad_norm": 1.3178080320358276, "learning_rate": 6.02602010644589e-06, "loss": 0.1407, "step": 1020 }, { "epoch": 0.060921511799846215, "grad_norm": 1.6836622953414917, "learning_rate": 6.085156712004731e-06, "loss": 0.113, "step": 1030 }, { "epoch": 0.06151298278819424, "grad_norm": 0.9104619026184082, "learning_rate": 6.1442933175635725e-06, "loss": 0.1173, "step": 1040 }, { "epoch": 0.06210445377654226, "grad_norm": 1.5926438570022583, "learning_rate": 6.203429923122413e-06, "loss": 0.1021, "step": 1050 }, { "epoch": 0.06269592476489028, "grad_norm": 2.0736446380615234, "learning_rate": 6.262566528681254e-06, "loss": 0.1461, "step": 1060 }, { "epoch": 0.0632873957532383, "grad_norm": 1.0439884662628174, "learning_rate": 6.3217031342400946e-06, "loss": 0.1223, "step": 1070 }, { "epoch": 0.06387886674158633, "grad_norm": 1.662875771522522, "learning_rate": 6.380839739798935e-06, "loss": 0.1255, "step": 1080 }, { "epoch": 0.06447033772993435, "grad_norm": 1.1812007427215576, "learning_rate": 6.439976345357777e-06, "loss": 0.1164, "step": 1090 }, { "epoch": 0.06506180871828236, "grad_norm": 1.47392737865448, "learning_rate": 6.4991129509166175e-06, "loss": 0.1019, "step": 1100 }, { "epoch": 0.06565327970663039, "grad_norm": 1.1967815160751343, "learning_rate": 6.558249556475458e-06, "loss": 0.1421, "step": 1110 }, { "epoch": 0.06624475069497841, "grad_norm": 3.9282026290893555, "learning_rate": 6.6173861620343e-06, "loss": 0.1342, "step": 1120 }, { "epoch": 0.06683622168332644, "grad_norm": 0.9664806127548218, "learning_rate": 6.67652276759314e-06, "loss": 0.1313, "step": 1130 }, { "epoch": 0.06742769267167445, "grad_norm": 0.9938191175460815, "learning_rate": 6.735659373151982e-06, "loss": 0.1028, "step": 1140 }, { "epoch": 0.06801916366002247, "grad_norm": 1.2730629444122314, "learning_rate": 6.794795978710823e-06, "loss": 0.0962, "step": 1150 }, { "epoch": 0.0686106346483705, "grad_norm": 1.909461259841919, "learning_rate": 6.853932584269663e-06, "loss": 0.1593, "step": 1160 }, { "epoch": 0.06920210563671852, "grad_norm": 1.3908004760742188, "learning_rate": 6.913069189828504e-06, "loss": 0.1303, "step": 1170 }, { "epoch": 0.06979357662506654, "grad_norm": 1.061418056488037, "learning_rate": 6.972205795387345e-06, "loss": 0.1332, "step": 1180 }, { "epoch": 0.07038504761341456, "grad_norm": 1.091521978378296, "learning_rate": 7.031342400946185e-06, "loss": 0.0999, "step": 1190 }, { "epoch": 0.07097651860176259, "grad_norm": 1.1078569889068604, "learning_rate": 7.090479006505027e-06, "loss": 0.0964, "step": 1200 }, { "epoch": 0.07156798959011061, "grad_norm": 1.0520617961883545, "learning_rate": 7.1496156120638676e-06, "loss": 0.1547, "step": 1210 }, { "epoch": 0.07215946057845862, "grad_norm": 0.8578731417655945, "learning_rate": 7.208752217622708e-06, "loss": 0.1256, "step": 1220 }, { "epoch": 0.07275093156680665, "grad_norm": 1.6188583374023438, "learning_rate": 7.267888823181549e-06, "loss": 0.1215, "step": 1230 }, { "epoch": 0.07334240255515467, "grad_norm": 1.0055336952209473, "learning_rate": 7.3270254287403905e-06, "loss": 0.1056, "step": 1240 }, { "epoch": 0.07393387354350268, "grad_norm": 1.6526163816452026, "learning_rate": 7.386162034299232e-06, "loss": 0.098, "step": 1250 }, { "epoch": 0.07452534453185071, "grad_norm": 1.558734655380249, "learning_rate": 7.445298639858073e-06, "loss": 0.131, "step": 1260 }, { "epoch": 0.07511681552019873, "grad_norm": 1.1068843603134155, "learning_rate": 7.504435245416914e-06, "loss": 0.1232, "step": 1270 }, { "epoch": 0.07570828650854676, "grad_norm": 4.338015079498291, "learning_rate": 7.563571850975754e-06, "loss": 0.1213, "step": 1280 }, { "epoch": 0.07629975749689477, "grad_norm": 1.0286308526992798, "learning_rate": 7.622708456534596e-06, "loss": 0.1131, "step": 1290 }, { "epoch": 0.0768912284852428, "grad_norm": 1.196652889251709, "learning_rate": 7.681845062093435e-06, "loss": 0.0969, "step": 1300 }, { "epoch": 0.07748269947359082, "grad_norm": 1.0248245000839233, "learning_rate": 7.740981667652278e-06, "loss": 0.1219, "step": 1310 }, { "epoch": 0.07807417046193885, "grad_norm": 0.9763538241386414, "learning_rate": 7.800118273211118e-06, "loss": 0.1089, "step": 1320 }, { "epoch": 0.07866564145028686, "grad_norm": 0.8487405776977539, "learning_rate": 7.85925487876996e-06, "loss": 0.1101, "step": 1330 }, { "epoch": 0.07925711243863488, "grad_norm": 2.242201328277588, "learning_rate": 7.9183914843288e-06, "loss": 0.1046, "step": 1340 }, { "epoch": 0.07984858342698291, "grad_norm": 1.0400896072387695, "learning_rate": 7.97752808988764e-06, "loss": 0.0959, "step": 1350 }, { "epoch": 0.08044005441533093, "grad_norm": 1.3924500942230225, "learning_rate": 8.036664695446481e-06, "loss": 0.1304, "step": 1360 }, { "epoch": 0.08103152540367894, "grad_norm": 1.2205969095230103, "learning_rate": 8.095801301005322e-06, "loss": 0.1153, "step": 1370 }, { "epoch": 0.08162299639202697, "grad_norm": 1.060810923576355, "learning_rate": 8.154937906564163e-06, "loss": 0.1182, "step": 1380 }, { "epoch": 0.082214467380375, "grad_norm": 0.8278103470802307, "learning_rate": 8.214074512123005e-06, "loss": 0.1179, "step": 1390 }, { "epoch": 0.08280593836872302, "grad_norm": 1.1937655210494995, "learning_rate": 8.273211117681844e-06, "loss": 0.0906, "step": 1400 }, { "epoch": 0.08339740935707103, "grad_norm": 1.2828706502914429, "learning_rate": 8.332347723240686e-06, "loss": 0.1505, "step": 1410 }, { "epoch": 0.08398888034541906, "grad_norm": 1.3856922388076782, "learning_rate": 8.391484328799527e-06, "loss": 0.128, "step": 1420 }, { "epoch": 0.08458035133376708, "grad_norm": 1.1052805185317993, "learning_rate": 8.450620934358368e-06, "loss": 0.1192, "step": 1430 }, { "epoch": 0.0851718223221151, "grad_norm": 0.8539075255393982, "learning_rate": 8.50975753991721e-06, "loss": 0.1084, "step": 1440 }, { "epoch": 0.08576329331046312, "grad_norm": 3.1041576862335205, "learning_rate": 8.568894145476049e-06, "loss": 0.0971, "step": 1450 }, { "epoch": 0.08635476429881114, "grad_norm": 1.5595990419387817, "learning_rate": 8.628030751034891e-06, "loss": 0.1254, "step": 1460 }, { "epoch": 0.08694623528715917, "grad_norm": 1.4602948427200317, "learning_rate": 8.68716735659373e-06, "loss": 0.1242, "step": 1470 }, { "epoch": 0.0875377062755072, "grad_norm": 1.1192611455917358, "learning_rate": 8.746303962152573e-06, "loss": 0.115, "step": 1480 }, { "epoch": 0.0881291772638552, "grad_norm": 0.92164546251297, "learning_rate": 8.805440567711414e-06, "loss": 0.1052, "step": 1490 }, { "epoch": 0.08872064825220323, "grad_norm": 1.1066807508468628, "learning_rate": 8.864577173270254e-06, "loss": 0.1082, "step": 1500 }, { "epoch": 0.08931211924055125, "grad_norm": 1.2995595932006836, "learning_rate": 8.923713778829095e-06, "loss": 0.1404, "step": 1510 }, { "epoch": 0.08990359022889927, "grad_norm": 0.9651638865470886, "learning_rate": 8.982850384387936e-06, "loss": 0.1166, "step": 1520 }, { "epoch": 0.09049506121724729, "grad_norm": 0.9661513566970825, "learning_rate": 9.041986989946778e-06, "loss": 0.1163, "step": 1530 }, { "epoch": 0.09108653220559532, "grad_norm": 0.8070608973503113, "learning_rate": 9.101123595505619e-06, "loss": 0.0983, "step": 1540 }, { "epoch": 0.09167800319394334, "grad_norm": 0.7113871574401855, "learning_rate": 9.16026020106446e-06, "loss": 0.0879, "step": 1550 }, { "epoch": 0.09226947418229135, "grad_norm": 1.7236013412475586, "learning_rate": 9.2193968066233e-06, "loss": 0.1307, "step": 1560 }, { "epoch": 0.09286094517063938, "grad_norm": 1.10356605052948, "learning_rate": 9.27853341218214e-06, "loss": 0.1073, "step": 1570 }, { "epoch": 0.0934524161589874, "grad_norm": 1.4670889377593994, "learning_rate": 9.337670017740981e-06, "loss": 0.1205, "step": 1580 }, { "epoch": 0.09404388714733543, "grad_norm": 0.6251926422119141, "learning_rate": 9.396806623299824e-06, "loss": 0.1007, "step": 1590 }, { "epoch": 0.09463535813568344, "grad_norm": 0.9680246710777283, "learning_rate": 9.455943228858663e-06, "loss": 0.09, "step": 1600 }, { "epoch": 0.09522682912403146, "grad_norm": 1.4342759847640991, "learning_rate": 9.515079834417505e-06, "loss": 0.1239, "step": 1610 }, { "epoch": 0.09581830011237949, "grad_norm": 1.9606391191482544, "learning_rate": 9.574216439976344e-06, "loss": 0.1225, "step": 1620 }, { "epoch": 0.09640977110072751, "grad_norm": 1.2802199125289917, "learning_rate": 9.633353045535187e-06, "loss": 0.1086, "step": 1630 }, { "epoch": 0.09700124208907553, "grad_norm": 1.0158711671829224, "learning_rate": 9.692489651094029e-06, "loss": 0.1039, "step": 1640 }, { "epoch": 0.09759271307742355, "grad_norm": 1.2874104976654053, "learning_rate": 9.751626256652868e-06, "loss": 0.0962, "step": 1650 }, { "epoch": 0.09818418406577158, "grad_norm": 1.5606082677841187, "learning_rate": 9.81076286221171e-06, "loss": 0.1266, "step": 1660 }, { "epoch": 0.0987756550541196, "grad_norm": 2.3393404483795166, "learning_rate": 9.86989946777055e-06, "loss": 0.1268, "step": 1670 }, { "epoch": 0.09936712604246761, "grad_norm": 0.958175778388977, "learning_rate": 9.929036073329392e-06, "loss": 0.1177, "step": 1680 }, { "epoch": 0.09995859703081564, "grad_norm": 1.1530687808990479, "learning_rate": 9.988172678888232e-06, "loss": 0.0917, "step": 1690 }, { "epoch": 0.10055006801916366, "grad_norm": 0.7195730805397034, "learning_rate": 1.0047309284447073e-05, "loss": 0.0855, "step": 1700 }, { "epoch": 0.10114153900751169, "grad_norm": 0.8960280418395996, "learning_rate": 1.0106445890005914e-05, "loss": 0.1242, "step": 1710 }, { "epoch": 0.1017330099958597, "grad_norm": 1.1061089038848877, "learning_rate": 1.0165582495564754e-05, "loss": 0.1068, "step": 1720 }, { "epoch": 0.10232448098420772, "grad_norm": 1.083343744277954, "learning_rate": 1.0224719101123595e-05, "loss": 0.1055, "step": 1730 }, { "epoch": 0.10291595197255575, "grad_norm": 0.5255225300788879, "learning_rate": 1.0283855706682437e-05, "loss": 0.1123, "step": 1740 }, { "epoch": 0.10350742296090376, "grad_norm": 0.6270454525947571, "learning_rate": 1.0342992312241276e-05, "loss": 0.0853, "step": 1750 }, { "epoch": 0.10409889394925179, "grad_norm": 1.1417897939682007, "learning_rate": 1.0402128917800119e-05, "loss": 0.1215, "step": 1760 }, { "epoch": 0.10469036493759981, "grad_norm": 1.1659555435180664, "learning_rate": 1.046126552335896e-05, "loss": 0.1131, "step": 1770 }, { "epoch": 0.10528183592594784, "grad_norm": 1.1007463932037354, "learning_rate": 1.05204021289178e-05, "loss": 0.1078, "step": 1780 }, { "epoch": 0.10587330691429585, "grad_norm": 0.6095358729362488, "learning_rate": 1.0579538734476643e-05, "loss": 0.1029, "step": 1790 }, { "epoch": 0.10646477790264387, "grad_norm": 1.2525566816329956, "learning_rate": 1.0638675340035482e-05, "loss": 0.097, "step": 1800 }, { "epoch": 0.1070562488909919, "grad_norm": 1.0419878959655762, "learning_rate": 1.0697811945594324e-05, "loss": 0.1377, "step": 1810 }, { "epoch": 0.10764771987933992, "grad_norm": 0.8815398216247559, "learning_rate": 1.0756948551153163e-05, "loss": 0.1073, "step": 1820 }, { "epoch": 0.10823919086768793, "grad_norm": 0.9797380566596985, "learning_rate": 1.0816085156712005e-05, "loss": 0.1048, "step": 1830 }, { "epoch": 0.10883066185603596, "grad_norm": 0.8748274445533752, "learning_rate": 1.0875221762270846e-05, "loss": 0.0908, "step": 1840 }, { "epoch": 0.10942213284438398, "grad_norm": 0.8170437216758728, "learning_rate": 1.0934358367829687e-05, "loss": 0.0813, "step": 1850 }, { "epoch": 0.11001360383273201, "grad_norm": 1.4697694778442383, "learning_rate": 1.0993494973388527e-05, "loss": 0.1264, "step": 1860 }, { "epoch": 0.11060507482108002, "grad_norm": 1.1220688819885254, "learning_rate": 1.1052631578947368e-05, "loss": 0.1042, "step": 1870 }, { "epoch": 0.11119654580942805, "grad_norm": 0.8219739198684692, "learning_rate": 1.111176818450621e-05, "loss": 0.1148, "step": 1880 }, { "epoch": 0.11178801679777607, "grad_norm": 0.8747354745864868, "learning_rate": 1.1170904790065051e-05, "loss": 0.0957, "step": 1890 }, { "epoch": 0.1123794877861241, "grad_norm": 1.0249109268188477, "learning_rate": 1.1230041395623892e-05, "loss": 0.091, "step": 1900 }, { "epoch": 0.11297095877447211, "grad_norm": 1.0904954671859741, "learning_rate": 1.1289178001182733e-05, "loss": 0.1254, "step": 1910 }, { "epoch": 0.11356242976282013, "grad_norm": 1.290754795074463, "learning_rate": 1.1348314606741573e-05, "loss": 0.1095, "step": 1920 }, { "epoch": 0.11415390075116816, "grad_norm": 1.5418059825897217, "learning_rate": 1.1407451212300414e-05, "loss": 0.1123, "step": 1930 }, { "epoch": 0.11474537173951618, "grad_norm": 0.8294047713279724, "learning_rate": 1.1466587817859256e-05, "loss": 0.0701, "step": 1940 }, { "epoch": 0.1153368427278642, "grad_norm": 1.4641904830932617, "learning_rate": 1.1525724423418095e-05, "loss": 0.0789, "step": 1950 }, { "epoch": 0.11592831371621222, "grad_norm": 1.2698817253112793, "learning_rate": 1.1584861028976938e-05, "loss": 0.1259, "step": 1960 }, { "epoch": 0.11651978470456024, "grad_norm": 0.9772729873657227, "learning_rate": 1.1643997634535777e-05, "loss": 0.1226, "step": 1970 }, { "epoch": 0.11711125569290826, "grad_norm": 2.8346009254455566, "learning_rate": 1.1703134240094619e-05, "loss": 0.1138, "step": 1980 }, { "epoch": 0.11770272668125628, "grad_norm": 0.8168536424636841, "learning_rate": 1.1762270845653461e-05, "loss": 0.1073, "step": 1990 }, { "epoch": 0.1182941976696043, "grad_norm": 0.9471856951713562, "learning_rate": 1.18214074512123e-05, "loss": 0.0915, "step": 2000 }, { "epoch": 0.11888566865795233, "grad_norm": 1.3899338245391846, "learning_rate": 1.1880544056771143e-05, "loss": 0.1258, "step": 2010 }, { "epoch": 0.11947713964630034, "grad_norm": 0.7538505792617798, "learning_rate": 1.1939680662329982e-05, "loss": 0.1046, "step": 2020 }, { "epoch": 0.12006861063464837, "grad_norm": 0.9105229377746582, "learning_rate": 1.1998817267888824e-05, "loss": 0.1007, "step": 2030 }, { "epoch": 0.12066008162299639, "grad_norm": 0.7670049071311951, "learning_rate": 1.2057953873447665e-05, "loss": 0.1013, "step": 2040 }, { "epoch": 0.12125155261134442, "grad_norm": 0.8363207578659058, "learning_rate": 1.2117090479006506e-05, "loss": 0.0888, "step": 2050 }, { "epoch": 0.12184302359969243, "grad_norm": 0.8007575869560242, "learning_rate": 1.2176227084565346e-05, "loss": 0.1267, "step": 2060 }, { "epoch": 0.12243449458804045, "grad_norm": 0.7427950501441956, "learning_rate": 1.2235363690124187e-05, "loss": 0.106, "step": 2070 }, { "epoch": 0.12302596557638848, "grad_norm": 0.810699462890625, "learning_rate": 1.2294500295683028e-05, "loss": 0.1043, "step": 2080 }, { "epoch": 0.1236174365647365, "grad_norm": 1.0401771068572998, "learning_rate": 1.235363690124187e-05, "loss": 0.0972, "step": 2090 }, { "epoch": 0.12420890755308452, "grad_norm": 0.862919270992279, "learning_rate": 1.2412773506800709e-05, "loss": 0.0867, "step": 2100 }, { "epoch": 0.12480037854143254, "grad_norm": 3.2273213863372803, "learning_rate": 1.2471910112359551e-05, "loss": 0.1242, "step": 2110 }, { "epoch": 0.12539184952978055, "grad_norm": 0.8169100284576416, "learning_rate": 1.2531046717918392e-05, "loss": 0.1076, "step": 2120 }, { "epoch": 0.12598332051812858, "grad_norm": 0.6626930236816406, "learning_rate": 1.2590183323477233e-05, "loss": 0.1002, "step": 2130 }, { "epoch": 0.1265747915064766, "grad_norm": 1.0661871433258057, "learning_rate": 1.2649319929036075e-05, "loss": 0.1038, "step": 2140 }, { "epoch": 0.12716626249482463, "grad_norm": 0.884075939655304, "learning_rate": 1.2708456534594914e-05, "loss": 0.089, "step": 2150 }, { "epoch": 0.12775773348317265, "grad_norm": 0.9505901336669922, "learning_rate": 1.2767593140153756e-05, "loss": 0.1324, "step": 2160 }, { "epoch": 0.12834920447152068, "grad_norm": 0.9830239415168762, "learning_rate": 1.2826729745712595e-05, "loss": 0.1042, "step": 2170 }, { "epoch": 0.1289406754598687, "grad_norm": 3.122896671295166, "learning_rate": 1.2885866351271438e-05, "loss": 0.116, "step": 2180 }, { "epoch": 0.12953214644821673, "grad_norm": 2.2679991722106934, "learning_rate": 1.2945002956830277e-05, "loss": 0.1013, "step": 2190 }, { "epoch": 0.13012361743656473, "grad_norm": 1.319920539855957, "learning_rate": 1.300413956238912e-05, "loss": 0.0873, "step": 2200 }, { "epoch": 0.13071508842491275, "grad_norm": 1.0883764028549194, "learning_rate": 1.306327616794796e-05, "loss": 0.1151, "step": 2210 }, { "epoch": 0.13130655941326078, "grad_norm": 1.4667441844940186, "learning_rate": 1.31224127735068e-05, "loss": 0.1036, "step": 2220 }, { "epoch": 0.1318980304016088, "grad_norm": 0.7794870734214783, "learning_rate": 1.3181549379065643e-05, "loss": 0.106, "step": 2230 }, { "epoch": 0.13248950138995683, "grad_norm": 1.037962555885315, "learning_rate": 1.3240685984624482e-05, "loss": 0.0937, "step": 2240 }, { "epoch": 0.13308097237830485, "grad_norm": 0.948747456073761, "learning_rate": 1.3299822590183324e-05, "loss": 0.0907, "step": 2250 }, { "epoch": 0.13367244336665288, "grad_norm": 1.2789727449417114, "learning_rate": 1.3358959195742165e-05, "loss": 0.1171, "step": 2260 }, { "epoch": 0.13426391435500087, "grad_norm": 0.9090473055839539, "learning_rate": 1.3418095801301006e-05, "loss": 0.1094, "step": 2270 }, { "epoch": 0.1348553853433489, "grad_norm": 0.5976537466049194, "learning_rate": 1.3477232406859846e-05, "loss": 0.0972, "step": 2280 }, { "epoch": 0.13544685633169692, "grad_norm": 3.605243444442749, "learning_rate": 1.3536369012418687e-05, "loss": 0.1136, "step": 2290 }, { "epoch": 0.13603832732004495, "grad_norm": 0.8624510169029236, "learning_rate": 1.3595505617977528e-05, "loss": 0.0824, "step": 2300 }, { "epoch": 0.13662979830839297, "grad_norm": 0.8274652361869812, "learning_rate": 1.365464222353637e-05, "loss": 0.1231, "step": 2310 }, { "epoch": 0.137221269296741, "grad_norm": 1.1511255502700806, "learning_rate": 1.3713778829095209e-05, "loss": 0.1097, "step": 2320 }, { "epoch": 0.13781274028508902, "grad_norm": 0.7331304550170898, "learning_rate": 1.3772915434654051e-05, "loss": 0.0967, "step": 2330 }, { "epoch": 0.13840421127343705, "grad_norm": 1.1830743551254272, "learning_rate": 1.3832052040212892e-05, "loss": 0.0874, "step": 2340 }, { "epoch": 0.13899568226178505, "grad_norm": 0.8610976338386536, "learning_rate": 1.3891188645771733e-05, "loss": 0.0846, "step": 2350 }, { "epoch": 0.13958715325013307, "grad_norm": 1.0089398622512817, "learning_rate": 1.3950325251330575e-05, "loss": 0.1238, "step": 2360 }, { "epoch": 0.1401786242384811, "grad_norm": 0.9082810282707214, "learning_rate": 1.4009461856889414e-05, "loss": 0.1045, "step": 2370 }, { "epoch": 0.14077009522682912, "grad_norm": 0.7428750395774841, "learning_rate": 1.4068598462448257e-05, "loss": 0.1076, "step": 2380 }, { "epoch": 0.14136156621517715, "grad_norm": 0.8178524971008301, "learning_rate": 1.4127735068007096e-05, "loss": 0.0908, "step": 2390 }, { "epoch": 0.14195303720352517, "grad_norm": 0.6192349791526794, "learning_rate": 1.4186871673565938e-05, "loss": 0.0876, "step": 2400 }, { "epoch": 0.1425445081918732, "grad_norm": 2.8662919998168945, "learning_rate": 1.4246008279124779e-05, "loss": 0.117, "step": 2410 }, { "epoch": 0.14313597918022122, "grad_norm": 0.9834034442901611, "learning_rate": 1.430514488468362e-05, "loss": 0.1019, "step": 2420 }, { "epoch": 0.14372745016856922, "grad_norm": 0.8117826581001282, "learning_rate": 1.436428149024246e-05, "loss": 0.1046, "step": 2430 }, { "epoch": 0.14431892115691725, "grad_norm": 0.8213126063346863, "learning_rate": 1.44234180958013e-05, "loss": 0.0967, "step": 2440 }, { "epoch": 0.14491039214526527, "grad_norm": 1.3720089197158813, "learning_rate": 1.4482554701360141e-05, "loss": 0.0879, "step": 2450 }, { "epoch": 0.1455018631336133, "grad_norm": 0.9144525527954102, "learning_rate": 1.4541691306918984e-05, "loss": 0.1314, "step": 2460 }, { "epoch": 0.14609333412196132, "grad_norm": 0.8498542904853821, "learning_rate": 1.4600827912477824e-05, "loss": 0.1248, "step": 2470 }, { "epoch": 0.14668480511030935, "grad_norm": 0.7484121322631836, "learning_rate": 1.4659964518036665e-05, "loss": 0.102, "step": 2480 }, { "epoch": 0.14727627609865737, "grad_norm": 1.2402563095092773, "learning_rate": 1.4719101123595506e-05, "loss": 0.0886, "step": 2490 }, { "epoch": 0.14786774708700537, "grad_norm": 0.6737223267555237, "learning_rate": 1.4778237729154347e-05, "loss": 0.0831, "step": 2500 }, { "epoch": 0.1484592180753534, "grad_norm": 0.7072854042053223, "learning_rate": 1.4837374334713189e-05, "loss": 0.1114, "step": 2510 }, { "epoch": 0.14905068906370142, "grad_norm": 0.5983100533485413, "learning_rate": 1.4896510940272028e-05, "loss": 0.1068, "step": 2520 }, { "epoch": 0.14964216005204944, "grad_norm": 0.751998245716095, "learning_rate": 1.495564754583087e-05, "loss": 0.1012, "step": 2530 }, { "epoch": 0.15023363104039747, "grad_norm": 0.7250840663909912, "learning_rate": 1.501478415138971e-05, "loss": 0.1029, "step": 2540 }, { "epoch": 0.1508251020287455, "grad_norm": 0.8930765390396118, "learning_rate": 1.5073920756948552e-05, "loss": 0.0857, "step": 2550 }, { "epoch": 0.15141657301709352, "grad_norm": 0.9434906244277954, "learning_rate": 1.5133057362507392e-05, "loss": 0.1155, "step": 2560 }, { "epoch": 0.15200804400544154, "grad_norm": 0.8826856017112732, "learning_rate": 1.5192193968066235e-05, "loss": 0.12, "step": 2570 }, { "epoch": 0.15259951499378954, "grad_norm": 0.7198922038078308, "learning_rate": 1.5251330573625074e-05, "loss": 0.0994, "step": 2580 }, { "epoch": 0.15319098598213757, "grad_norm": 0.48092806339263916, "learning_rate": 1.5310467179183916e-05, "loss": 0.0816, "step": 2590 }, { "epoch": 0.1537824569704856, "grad_norm": 0.8785791397094727, "learning_rate": 1.5369603784742755e-05, "loss": 0.0823, "step": 2600 }, { "epoch": 0.15437392795883362, "grad_norm": 0.9513733386993408, "learning_rate": 1.5428740390301597e-05, "loss": 0.1294, "step": 2610 }, { "epoch": 0.15496539894718164, "grad_norm": 0.6453056335449219, "learning_rate": 1.548787699586044e-05, "loss": 0.1221, "step": 2620 }, { "epoch": 0.15555686993552967, "grad_norm": 1.15029776096344, "learning_rate": 1.554701360141928e-05, "loss": 0.1072, "step": 2630 }, { "epoch": 0.1561483409238777, "grad_norm": 0.6731084585189819, "learning_rate": 1.5606150206978118e-05, "loss": 0.0888, "step": 2640 }, { "epoch": 0.15673981191222572, "grad_norm": 0.9130929708480835, "learning_rate": 1.566528681253696e-05, "loss": 0.0872, "step": 2650 }, { "epoch": 0.15733128290057372, "grad_norm": 0.6773471236228943, "learning_rate": 1.5724423418095803e-05, "loss": 0.115, "step": 2660 }, { "epoch": 0.15792275388892174, "grad_norm": 2.178490161895752, "learning_rate": 1.5783560023654645e-05, "loss": 0.0999, "step": 2670 }, { "epoch": 0.15851422487726977, "grad_norm": 0.8213022947311401, "learning_rate": 1.5842696629213484e-05, "loss": 0.1057, "step": 2680 }, { "epoch": 0.1591056958656178, "grad_norm": 0.4557856619358063, "learning_rate": 1.5901833234772323e-05, "loss": 0.0856, "step": 2690 }, { "epoch": 0.15969716685396582, "grad_norm": 0.7958064079284668, "learning_rate": 1.5960969840331165e-05, "loss": 0.0946, "step": 2700 }, { "epoch": 0.16028863784231384, "grad_norm": 0.6608469486236572, "learning_rate": 1.6020106445890008e-05, "loss": 0.1193, "step": 2710 }, { "epoch": 0.16088010883066187, "grad_norm": 0.8138685822486877, "learning_rate": 1.607924305144885e-05, "loss": 0.1036, "step": 2720 }, { "epoch": 0.16147157981900986, "grad_norm": 0.7490956783294678, "learning_rate": 1.6138379657007686e-05, "loss": 0.1, "step": 2730 }, { "epoch": 0.1620630508073579, "grad_norm": 0.6062787771224976, "learning_rate": 1.6197516262566528e-05, "loss": 0.0901, "step": 2740 }, { "epoch": 0.16265452179570591, "grad_norm": 0.8111969232559204, "learning_rate": 1.625665286812537e-05, "loss": 0.0809, "step": 2750 }, { "epoch": 0.16324599278405394, "grad_norm": 0.6375450491905212, "learning_rate": 1.6315789473684213e-05, "loss": 0.1149, "step": 2760 }, { "epoch": 0.16383746377240196, "grad_norm": 0.723285436630249, "learning_rate": 1.6374926079243052e-05, "loss": 0.0987, "step": 2770 }, { "epoch": 0.16442893476075, "grad_norm": 0.7630077004432678, "learning_rate": 1.643406268480189e-05, "loss": 0.1122, "step": 2780 }, { "epoch": 0.16502040574909801, "grad_norm": 0.6838405728340149, "learning_rate": 1.6493199290360733e-05, "loss": 0.0914, "step": 2790 }, { "epoch": 0.16561187673744604, "grad_norm": 0.8457553386688232, "learning_rate": 1.6552335895919576e-05, "loss": 0.0808, "step": 2800 }, { "epoch": 0.16620334772579404, "grad_norm": 1.2897372245788574, "learning_rate": 1.6611472501478418e-05, "loss": 0.1056, "step": 2810 }, { "epoch": 0.16679481871414206, "grad_norm": 0.6709902286529541, "learning_rate": 1.6670609107037257e-05, "loss": 0.1149, "step": 2820 }, { "epoch": 0.1673862897024901, "grad_norm": 0.7361058592796326, "learning_rate": 1.6729745712596096e-05, "loss": 0.108, "step": 2830 }, { "epoch": 0.1679777606908381, "grad_norm": 0.48314210772514343, "learning_rate": 1.678888231815494e-05, "loss": 0.0854, "step": 2840 }, { "epoch": 0.16856923167918614, "grad_norm": 0.4741690754890442, "learning_rate": 1.684801892371378e-05, "loss": 0.0872, "step": 2850 }, { "epoch": 0.16916070266753416, "grad_norm": 0.6758005023002625, "learning_rate": 1.690715552927262e-05, "loss": 0.1059, "step": 2860 }, { "epoch": 0.1697521736558822, "grad_norm": 0.7478546500205994, "learning_rate": 1.6966292134831462e-05, "loss": 0.1194, "step": 2870 }, { "epoch": 0.1703436446442302, "grad_norm": 0.6190911531448364, "learning_rate": 1.70254287403903e-05, "loss": 0.1073, "step": 2880 }, { "epoch": 0.1709351156325782, "grad_norm": 0.738862156867981, "learning_rate": 1.7084565345949143e-05, "loss": 0.1063, "step": 2890 }, { "epoch": 0.17152658662092624, "grad_norm": 4.353343963623047, "learning_rate": 1.7143701951507982e-05, "loss": 0.0735, "step": 2900 }, { "epoch": 0.17211805760927426, "grad_norm": 1.2887223958969116, "learning_rate": 1.7202838557066825e-05, "loss": 0.12, "step": 2910 }, { "epoch": 0.17270952859762229, "grad_norm": 1.4853367805480957, "learning_rate": 1.7261975162625667e-05, "loss": 0.1145, "step": 2920 }, { "epoch": 0.1733009995859703, "grad_norm": 0.7316121459007263, "learning_rate": 1.7321111768184506e-05, "loss": 0.104, "step": 2930 }, { "epoch": 0.17389247057431834, "grad_norm": 0.5768103003501892, "learning_rate": 1.738024837374335e-05, "loss": 0.1014, "step": 2940 }, { "epoch": 0.17448394156266636, "grad_norm": 0.8338832855224609, "learning_rate": 1.7439384979302188e-05, "loss": 0.0838, "step": 2950 }, { "epoch": 0.1750754125510144, "grad_norm": 0.8248314261436462, "learning_rate": 1.749852158486103e-05, "loss": 0.1035, "step": 2960 }, { "epoch": 0.17566688353936238, "grad_norm": 0.5259304642677307, "learning_rate": 1.7557658190419872e-05, "loss": 0.1049, "step": 2970 }, { "epoch": 0.1762583545277104, "grad_norm": 0.7238562107086182, "learning_rate": 1.761679479597871e-05, "loss": 0.106, "step": 2980 }, { "epoch": 0.17684982551605843, "grad_norm": 0.5108389854431152, "learning_rate": 1.767593140153755e-05, "loss": 0.0951, "step": 2990 }, { "epoch": 0.17744129650440646, "grad_norm": 0.8610133528709412, "learning_rate": 1.7735068007096393e-05, "loss": 0.0903, "step": 3000 }, { "epoch": 0.17803276749275448, "grad_norm": 0.6845341920852661, "learning_rate": 1.7794204612655235e-05, "loss": 0.1128, "step": 3010 }, { "epoch": 0.1786242384811025, "grad_norm": 0.72154301404953, "learning_rate": 1.7853341218214077e-05, "loss": 0.117, "step": 3020 }, { "epoch": 0.17921570946945053, "grad_norm": 0.8999161720275879, "learning_rate": 1.7912477823772916e-05, "loss": 0.1006, "step": 3030 }, { "epoch": 0.17980718045779853, "grad_norm": 0.5463709831237793, "learning_rate": 1.7971614429331755e-05, "loss": 0.0976, "step": 3040 }, { "epoch": 0.18039865144614656, "grad_norm": 0.8493990302085876, "learning_rate": 1.8030751034890598e-05, "loss": 0.0856, "step": 3050 }, { "epoch": 0.18099012243449458, "grad_norm": 0.7477964162826538, "learning_rate": 1.808988764044944e-05, "loss": 0.1207, "step": 3060 }, { "epoch": 0.1815815934228426, "grad_norm": 0.90024334192276, "learning_rate": 1.8149024246008283e-05, "loss": 0.1036, "step": 3070 }, { "epoch": 0.18217306441119063, "grad_norm": 0.4558635354042053, "learning_rate": 1.8208160851567118e-05, "loss": 0.1082, "step": 3080 }, { "epoch": 0.18276453539953866, "grad_norm": 0.4179149270057678, "learning_rate": 1.826729745712596e-05, "loss": 0.0925, "step": 3090 }, { "epoch": 0.18335600638788668, "grad_norm": 0.6174872517585754, "learning_rate": 1.8326434062684803e-05, "loss": 0.0993, "step": 3100 }, { "epoch": 0.1839474773762347, "grad_norm": 0.6533388495445251, "learning_rate": 1.8385570668243645e-05, "loss": 0.1149, "step": 3110 }, { "epoch": 0.1845389483645827, "grad_norm": 0.87162184715271, "learning_rate": 1.8444707273802484e-05, "loss": 0.1142, "step": 3120 }, { "epoch": 0.18513041935293073, "grad_norm": 1.1266059875488281, "learning_rate": 1.8503843879361323e-05, "loss": 0.1028, "step": 3130 }, { "epoch": 0.18572189034127876, "grad_norm": 0.8105480670928955, "learning_rate": 1.8562980484920166e-05, "loss": 0.1002, "step": 3140 }, { "epoch": 0.18631336132962678, "grad_norm": 0.8620553016662598, "learning_rate": 1.8622117090479008e-05, "loss": 0.0735, "step": 3150 }, { "epoch": 0.1869048323179748, "grad_norm": 0.7717949151992798, "learning_rate": 1.868125369603785e-05, "loss": 0.1185, "step": 3160 }, { "epoch": 0.18749630330632283, "grad_norm": 1.0576605796813965, "learning_rate": 1.874039030159669e-05, "loss": 0.119, "step": 3170 }, { "epoch": 0.18808777429467086, "grad_norm": 0.592893123626709, "learning_rate": 1.879952690715553e-05, "loss": 0.1118, "step": 3180 }, { "epoch": 0.18867924528301888, "grad_norm": 0.4088200628757477, "learning_rate": 1.885866351271437e-05, "loss": 0.0886, "step": 3190 }, { "epoch": 0.18927071627136688, "grad_norm": 0.7400609254837036, "learning_rate": 1.8917800118273213e-05, "loss": 0.0814, "step": 3200 }, { "epoch": 0.1898621872597149, "grad_norm": 0.553082287311554, "learning_rate": 1.8976936723832052e-05, "loss": 0.108, "step": 3210 }, { "epoch": 0.19045365824806293, "grad_norm": 0.620814859867096, "learning_rate": 1.9036073329390895e-05, "loss": 0.1185, "step": 3220 }, { "epoch": 0.19104512923641095, "grad_norm": 0.7490543127059937, "learning_rate": 1.9095209934949734e-05, "loss": 0.0994, "step": 3230 }, { "epoch": 0.19163660022475898, "grad_norm": 0.8149442672729492, "learning_rate": 1.9154346540508576e-05, "loss": 0.0922, "step": 3240 }, { "epoch": 0.192228071213107, "grad_norm": 0.5334457159042358, "learning_rate": 1.9213483146067415e-05, "loss": 0.0876, "step": 3250 }, { "epoch": 0.19281954220145503, "grad_norm": 0.6213833093643188, "learning_rate": 1.9272619751626257e-05, "loss": 0.1226, "step": 3260 }, { "epoch": 0.19341101318980303, "grad_norm": 0.5302562117576599, "learning_rate": 1.93317563571851e-05, "loss": 0.1046, "step": 3270 }, { "epoch": 0.19400248417815105, "grad_norm": 0.7817535400390625, "learning_rate": 1.939089296274394e-05, "loss": 0.1019, "step": 3280 }, { "epoch": 0.19459395516649908, "grad_norm": 0.508794367313385, "learning_rate": 1.945002956830278e-05, "loss": 0.0781, "step": 3290 }, { "epoch": 0.1951854261548471, "grad_norm": 1.0402964353561401, "learning_rate": 1.950916617386162e-05, "loss": 0.0834, "step": 3300 }, { "epoch": 0.19577689714319513, "grad_norm": 0.5706701278686523, "learning_rate": 1.9568302779420462e-05, "loss": 0.111, "step": 3310 }, { "epoch": 0.19636836813154315, "grad_norm": 0.6753926277160645, "learning_rate": 1.9627439384979305e-05, "loss": 0.108, "step": 3320 }, { "epoch": 0.19695983911989118, "grad_norm": 0.6875175833702087, "learning_rate": 1.9686575990538144e-05, "loss": 0.1086, "step": 3330 }, { "epoch": 0.1975513101082392, "grad_norm": 0.38337966799736023, "learning_rate": 1.9745712596096983e-05, "loss": 0.093, "step": 3340 }, { "epoch": 0.1981427810965872, "grad_norm": 0.6686885356903076, "learning_rate": 1.9804849201655825e-05, "loss": 0.0765, "step": 3350 }, { "epoch": 0.19873425208493523, "grad_norm": 0.5502485036849976, "learning_rate": 1.9863985807214668e-05, "loss": 0.1146, "step": 3360 }, { "epoch": 0.19932572307328325, "grad_norm": 0.679628849029541, "learning_rate": 1.9923122412773507e-05, "loss": 0.0918, "step": 3370 }, { "epoch": 0.19991719406163128, "grad_norm": 0.742016613483429, "learning_rate": 1.998225901833235e-05, "loss": 0.0901, "step": 3380 }, { "epoch": 0.2005086650499793, "grad_norm": 0.5584272146224976, "learning_rate": 2.0041395623891188e-05, "loss": 0.0992, "step": 3390 }, { "epoch": 0.20110013603832733, "grad_norm": 0.4851781725883484, "learning_rate": 2.010053222945003e-05, "loss": 0.0862, "step": 3400 }, { "epoch": 0.20169160702667535, "grad_norm": 0.8838627934455872, "learning_rate": 2.0159668835008873e-05, "loss": 0.1085, "step": 3410 }, { "epoch": 0.20228307801502338, "grad_norm": 0.7821999788284302, "learning_rate": 2.021880544056771e-05, "loss": 0.1017, "step": 3420 }, { "epoch": 0.20287454900337137, "grad_norm": 0.4581637382507324, "learning_rate": 2.027794204612655e-05, "loss": 0.1108, "step": 3430 }, { "epoch": 0.2034660199917194, "grad_norm": 0.6102486252784729, "learning_rate": 2.0337078651685393e-05, "loss": 0.0961, "step": 3440 }, { "epoch": 0.20405749098006742, "grad_norm": 0.7160676121711731, "learning_rate": 2.0396215257244235e-05, "loss": 0.0816, "step": 3450 }, { "epoch": 0.20464896196841545, "grad_norm": 0.5592502355575562, "learning_rate": 2.0455351862803078e-05, "loss": 0.1139, "step": 3460 }, { "epoch": 0.20524043295676347, "grad_norm": 0.8437674045562744, "learning_rate": 2.0514488468361913e-05, "loss": 0.1074, "step": 3470 }, { "epoch": 0.2058319039451115, "grad_norm": 0.9372509717941284, "learning_rate": 2.0573625073920756e-05, "loss": 0.1002, "step": 3480 }, { "epoch": 0.20642337493345952, "grad_norm": 0.4822525084018707, "learning_rate": 2.0632761679479598e-05, "loss": 0.0959, "step": 3490 }, { "epoch": 0.20701484592180752, "grad_norm": 0.6867931485176086, "learning_rate": 2.069189828503844e-05, "loss": 0.0773, "step": 3500 }, { "epoch": 0.20760631691015555, "grad_norm": 0.7662633061408997, "learning_rate": 2.0751034890597283e-05, "loss": 0.1133, "step": 3510 }, { "epoch": 0.20819778789850357, "grad_norm": 0.8297512531280518, "learning_rate": 2.081017149615612e-05, "loss": 0.1204, "step": 3520 }, { "epoch": 0.2087892588868516, "grad_norm": 0.5180473923683167, "learning_rate": 2.086930810171496e-05, "loss": 0.1037, "step": 3530 }, { "epoch": 0.20938072987519962, "grad_norm": 1.1159570217132568, "learning_rate": 2.0928444707273803e-05, "loss": 0.084, "step": 3540 }, { "epoch": 0.20997220086354765, "grad_norm": 0.6880168914794922, "learning_rate": 2.0987581312832646e-05, "loss": 0.0816, "step": 3550 }, { "epoch": 0.21056367185189567, "grad_norm": 0.5288013219833374, "learning_rate": 2.1046717918391485e-05, "loss": 0.1186, "step": 3560 }, { "epoch": 0.2111551428402437, "grad_norm": 0.4509548842906952, "learning_rate": 2.1105854523950324e-05, "loss": 0.1053, "step": 3570 }, { "epoch": 0.2117466138285917, "grad_norm": 0.5973458290100098, "learning_rate": 2.1164991129509166e-05, "loss": 0.097, "step": 3580 }, { "epoch": 0.21233808481693972, "grad_norm": 1.134918451309204, "learning_rate": 2.122412773506801e-05, "loss": 0.0987, "step": 3590 }, { "epoch": 0.21292955580528775, "grad_norm": 0.7707091569900513, "learning_rate": 2.1283264340626847e-05, "loss": 0.0733, "step": 3600 }, { "epoch": 0.21352102679363577, "grad_norm": 0.6120101809501648, "learning_rate": 2.134240094618569e-05, "loss": 0.1109, "step": 3610 }, { "epoch": 0.2141124977819838, "grad_norm": 0.4861706495285034, "learning_rate": 2.140153755174453e-05, "loss": 0.097, "step": 3620 }, { "epoch": 0.21470396877033182, "grad_norm": 0.5677862763404846, "learning_rate": 2.146067415730337e-05, "loss": 0.1031, "step": 3630 }, { "epoch": 0.21529543975867985, "grad_norm": 0.5924938917160034, "learning_rate": 2.1519810762862214e-05, "loss": 0.0899, "step": 3640 }, { "epoch": 0.21588691074702787, "grad_norm": 0.7508660554885864, "learning_rate": 2.1578947368421053e-05, "loss": 0.0828, "step": 3650 }, { "epoch": 0.21647838173537587, "grad_norm": 0.5740810632705688, "learning_rate": 2.1638083973979895e-05, "loss": 0.1121, "step": 3660 }, { "epoch": 0.2170698527237239, "grad_norm": 0.6357908248901367, "learning_rate": 2.1697220579538734e-05, "loss": 0.0982, "step": 3670 }, { "epoch": 0.21766132371207192, "grad_norm": 0.8109158873558044, "learning_rate": 2.1756357185097576e-05, "loss": 0.1039, "step": 3680 }, { "epoch": 0.21825279470041994, "grad_norm": 0.40994781255722046, "learning_rate": 2.1815493790656415e-05, "loss": 0.0883, "step": 3690 }, { "epoch": 0.21884426568876797, "grad_norm": 0.7750908732414246, "learning_rate": 2.1874630396215258e-05, "loss": 0.0806, "step": 3700 }, { "epoch": 0.219435736677116, "grad_norm": 0.5586658120155334, "learning_rate": 2.19337670017741e-05, "loss": 0.1085, "step": 3710 }, { "epoch": 0.22002720766546402, "grad_norm": 0.6694844365119934, "learning_rate": 2.199290360733294e-05, "loss": 0.1083, "step": 3720 }, { "epoch": 0.22061867865381202, "grad_norm": 0.47450411319732666, "learning_rate": 2.205204021289178e-05, "loss": 0.0928, "step": 3730 }, { "epoch": 0.22121014964216004, "grad_norm": 0.506190836429596, "learning_rate": 2.211117681845062e-05, "loss": 0.0944, "step": 3740 }, { "epoch": 0.22180162063050807, "grad_norm": 0.5016607642173767, "learning_rate": 2.2170313424009463e-05, "loss": 0.0856, "step": 3750 }, { "epoch": 0.2223930916188561, "grad_norm": 0.949115514755249, "learning_rate": 2.2229450029568305e-05, "loss": 0.1165, "step": 3760 }, { "epoch": 0.22298456260720412, "grad_norm": 0.5426368117332458, "learning_rate": 2.2288586635127144e-05, "loss": 0.1104, "step": 3770 }, { "epoch": 0.22357603359555214, "grad_norm": 0.5375096797943115, "learning_rate": 2.2347723240685983e-05, "loss": 0.0898, "step": 3780 }, { "epoch": 0.22416750458390017, "grad_norm": 0.47455644607543945, "learning_rate": 2.2406859846244826e-05, "loss": 0.0816, "step": 3790 }, { "epoch": 0.2247589755722482, "grad_norm": 0.5627992153167725, "learning_rate": 2.2465996451803668e-05, "loss": 0.0805, "step": 3800 }, { "epoch": 0.2253504465605962, "grad_norm": 0.519260048866272, "learning_rate": 2.252513305736251e-05, "loss": 0.1253, "step": 3810 }, { "epoch": 0.22594191754894422, "grad_norm": 0.5100855231285095, "learning_rate": 2.2584269662921346e-05, "loss": 0.1055, "step": 3820 }, { "epoch": 0.22653338853729224, "grad_norm": 0.5852696299552917, "learning_rate": 2.2643406268480188e-05, "loss": 0.0834, "step": 3830 }, { "epoch": 0.22712485952564027, "grad_norm": 0.8379999399185181, "learning_rate": 2.270254287403903e-05, "loss": 0.0914, "step": 3840 }, { "epoch": 0.2277163305139883, "grad_norm": 1.2966054677963257, "learning_rate": 2.2761679479597873e-05, "loss": 0.0881, "step": 3850 }, { "epoch": 0.22830780150233632, "grad_norm": 0.39251694083213806, "learning_rate": 2.2820816085156715e-05, "loss": 0.1122, "step": 3860 }, { "epoch": 0.22889927249068434, "grad_norm": 0.6352452039718628, "learning_rate": 2.287995269071555e-05, "loss": 0.1101, "step": 3870 }, { "epoch": 0.22949074347903237, "grad_norm": 0.9457617998123169, "learning_rate": 2.2939089296274393e-05, "loss": 0.1083, "step": 3880 }, { "epoch": 0.23008221446738036, "grad_norm": 0.7608509063720703, "learning_rate": 2.2998225901833236e-05, "loss": 0.0924, "step": 3890 }, { "epoch": 0.2306736854557284, "grad_norm": 0.5494500994682312, "learning_rate": 2.3057362507392078e-05, "loss": 0.0903, "step": 3900 }, { "epoch": 0.23126515644407641, "grad_norm": 0.7322097420692444, "learning_rate": 2.3116499112950917e-05, "loss": 0.1119, "step": 3910 }, { "epoch": 0.23185662743242444, "grad_norm": 0.556820809841156, "learning_rate": 2.3175635718509756e-05, "loss": 0.1034, "step": 3920 }, { "epoch": 0.23244809842077246, "grad_norm": 0.47771599888801575, "learning_rate": 2.32347723240686e-05, "loss": 0.0961, "step": 3930 }, { "epoch": 0.2330395694091205, "grad_norm": 0.39273303747177124, "learning_rate": 2.329390892962744e-05, "loss": 0.0957, "step": 3940 }, { "epoch": 0.23363104039746851, "grad_norm": 0.6662347316741943, "learning_rate": 2.335304553518628e-05, "loss": 0.0827, "step": 3950 }, { "epoch": 0.2342225113858165, "grad_norm": 0.4206922948360443, "learning_rate": 2.3412182140745122e-05, "loss": 0.1035, "step": 3960 }, { "epoch": 0.23481398237416454, "grad_norm": 0.6769024133682251, "learning_rate": 2.347131874630396e-05, "loss": 0.1023, "step": 3970 }, { "epoch": 0.23540545336251256, "grad_norm": 0.33630305528640747, "learning_rate": 2.3530455351862804e-05, "loss": 0.0793, "step": 3980 }, { "epoch": 0.2359969243508606, "grad_norm": 0.36074501276016235, "learning_rate": 2.3589591957421646e-05, "loss": 0.0819, "step": 3990 }, { "epoch": 0.2365883953392086, "grad_norm": 0.4409344494342804, "learning_rate": 2.3648728562980485e-05, "loss": 0.0748, "step": 4000 }, { "epoch": 0.23717986632755664, "grad_norm": 0.5027844309806824, "learning_rate": 2.3707865168539327e-05, "loss": 0.1145, "step": 4010 }, { "epoch": 0.23777133731590466, "grad_norm": 0.7532927393913269, "learning_rate": 2.3767001774098166e-05, "loss": 0.1142, "step": 4020 }, { "epoch": 0.2383628083042527, "grad_norm": 0.5713790059089661, "learning_rate": 2.382613837965701e-05, "loss": 0.1107, "step": 4030 }, { "epoch": 0.23895427929260069, "grad_norm": 0.611487090587616, "learning_rate": 2.3885274985215848e-05, "loss": 0.0993, "step": 4040 }, { "epoch": 0.2395457502809487, "grad_norm": 0.5959345698356628, "learning_rate": 2.394441159077469e-05, "loss": 0.0687, "step": 4050 }, { "epoch": 0.24013722126929674, "grad_norm": 0.4257316589355469, "learning_rate": 2.4003548196333533e-05, "loss": 0.1067, "step": 4060 }, { "epoch": 0.24072869225764476, "grad_norm": 0.42096737027168274, "learning_rate": 2.406268480189237e-05, "loss": 0.093, "step": 4070 }, { "epoch": 0.24132016324599279, "grad_norm": 0.5533395409584045, "learning_rate": 2.4121821407451214e-05, "loss": 0.0849, "step": 4080 }, { "epoch": 0.2419116342343408, "grad_norm": 0.4856775999069214, "learning_rate": 2.4180958013010053e-05, "loss": 0.0916, "step": 4090 }, { "epoch": 0.24250310522268884, "grad_norm": 0.4168648421764374, "learning_rate": 2.4240094618568895e-05, "loss": 0.0819, "step": 4100 }, { "epoch": 0.24309457621103686, "grad_norm": 0.6705118417739868, "learning_rate": 2.4299231224127738e-05, "loss": 0.105, "step": 4110 }, { "epoch": 0.24368604719938486, "grad_norm": 1.5798367261886597, "learning_rate": 2.4358367829686577e-05, "loss": 0.096, "step": 4120 }, { "epoch": 0.24427751818773288, "grad_norm": 0.4839443266391754, "learning_rate": 2.4417504435245416e-05, "loss": 0.0923, "step": 4130 }, { "epoch": 0.2448689891760809, "grad_norm": 2.155184268951416, "learning_rate": 2.4476641040804258e-05, "loss": 0.0926, "step": 4140 }, { "epoch": 0.24546046016442893, "grad_norm": 0.6686860918998718, "learning_rate": 2.45357776463631e-05, "loss": 0.0727, "step": 4150 }, { "epoch": 0.24605193115277696, "grad_norm": 0.7138897180557251, "learning_rate": 2.4594914251921943e-05, "loss": 0.1105, "step": 4160 }, { "epoch": 0.24664340214112498, "grad_norm": 0.6278505921363831, "learning_rate": 2.465405085748078e-05, "loss": 0.101, "step": 4170 }, { "epoch": 0.247234873129473, "grad_norm": 0.6532488465309143, "learning_rate": 2.471318746303962e-05, "loss": 0.0846, "step": 4180 }, { "epoch": 0.24782634411782103, "grad_norm": 0.5302889943122864, "learning_rate": 2.4772324068598463e-05, "loss": 0.072, "step": 4190 }, { "epoch": 0.24841781510616903, "grad_norm": 0.8164333701133728, "learning_rate": 2.4831460674157305e-05, "loss": 0.0752, "step": 4200 }, { "epoch": 0.24900928609451706, "grad_norm": 0.6171656250953674, "learning_rate": 2.4890597279716148e-05, "loss": 0.1255, "step": 4210 }, { "epoch": 0.24960075708286508, "grad_norm": 0.6153189539909363, "learning_rate": 2.4949733885274983e-05, "loss": 0.0976, "step": 4220 }, { "epoch": 0.2501922280712131, "grad_norm": 0.4349226951599121, "learning_rate": 2.5008870490833826e-05, "loss": 0.0933, "step": 4230 }, { "epoch": 0.2507836990595611, "grad_norm": 0.6601523756980896, "learning_rate": 2.5068007096392668e-05, "loss": 0.0895, "step": 4240 }, { "epoch": 0.25137517004790916, "grad_norm": 0.659258246421814, "learning_rate": 2.512714370195151e-05, "loss": 0.0906, "step": 4250 }, { "epoch": 0.25196664103625716, "grad_norm": 0.7543184161186218, "learning_rate": 2.518628030751035e-05, "loss": 0.1096, "step": 4260 }, { "epoch": 0.2525581120246052, "grad_norm": 0.546157717704773, "learning_rate": 2.524541691306919e-05, "loss": 0.1049, "step": 4270 }, { "epoch": 0.2531495830129532, "grad_norm": 0.7393099069595337, "learning_rate": 2.530455351862803e-05, "loss": 0.0937, "step": 4280 }, { "epoch": 0.25374105400130126, "grad_norm": 0.5002017617225647, "learning_rate": 2.5363690124186873e-05, "loss": 0.088, "step": 4290 }, { "epoch": 0.25433252498964926, "grad_norm": 0.6293171048164368, "learning_rate": 2.5422826729745712e-05, "loss": 0.0782, "step": 4300 }, { "epoch": 0.25492399597799725, "grad_norm": 0.5364149808883667, "learning_rate": 2.5481963335304555e-05, "loss": 0.1182, "step": 4310 }, { "epoch": 0.2555154669663453, "grad_norm": 0.4713842272758484, "learning_rate": 2.5541099940863394e-05, "loss": 0.106, "step": 4320 }, { "epoch": 0.2561069379546933, "grad_norm": 0.318093866109848, "learning_rate": 2.5600236546422236e-05, "loss": 0.0775, "step": 4330 }, { "epoch": 0.25669840894304136, "grad_norm": 0.5131035447120667, "learning_rate": 2.565937315198108e-05, "loss": 0.079, "step": 4340 }, { "epoch": 0.25728987993138935, "grad_norm": 0.9621380567550659, "learning_rate": 2.5718509757539917e-05, "loss": 0.0895, "step": 4350 }, { "epoch": 0.2578813509197374, "grad_norm": 0.49918901920318604, "learning_rate": 2.577764636309876e-05, "loss": 0.1096, "step": 4360 }, { "epoch": 0.2584728219080854, "grad_norm": 0.5275120735168457, "learning_rate": 2.58367829686576e-05, "loss": 0.1101, "step": 4370 }, { "epoch": 0.25906429289643346, "grad_norm": 0.5116639137268066, "learning_rate": 2.589591957421644e-05, "loss": 0.1177, "step": 4380 }, { "epoch": 0.25965576388478145, "grad_norm": 0.7981589436531067, "learning_rate": 2.595505617977528e-05, "loss": 0.0854, "step": 4390 }, { "epoch": 0.26024723487312945, "grad_norm": 0.7899790406227112, "learning_rate": 2.6014192785334123e-05, "loss": 0.0818, "step": 4400 }, { "epoch": 0.2608387058614775, "grad_norm": 0.7144728302955627, "learning_rate": 2.6073329390892965e-05, "loss": 0.102, "step": 4410 }, { "epoch": 0.2614301768498255, "grad_norm": 0.4882543385028839, "learning_rate": 2.6132465996451804e-05, "loss": 0.1043, "step": 4420 }, { "epoch": 0.26202164783817355, "grad_norm": 0.4290657341480255, "learning_rate": 2.6191602602010646e-05, "loss": 0.0959, "step": 4430 }, { "epoch": 0.26261311882652155, "grad_norm": 0.4277171194553375, "learning_rate": 2.6250739207569485e-05, "loss": 0.0848, "step": 4440 }, { "epoch": 0.2632045898148696, "grad_norm": 0.3865758180618286, "learning_rate": 2.6309875813128328e-05, "loss": 0.0829, "step": 4450 }, { "epoch": 0.2637960608032176, "grad_norm": 0.6022497415542603, "learning_rate": 2.636901241868717e-05, "loss": 0.1052, "step": 4460 }, { "epoch": 0.2643875317915656, "grad_norm": 0.49171561002731323, "learning_rate": 2.642814902424601e-05, "loss": 0.0876, "step": 4470 }, { "epoch": 0.26497900277991365, "grad_norm": 0.5476326942443848, "learning_rate": 2.6487285629804848e-05, "loss": 0.0891, "step": 4480 }, { "epoch": 0.26557047376826165, "grad_norm": 0.4244706928730011, "learning_rate": 2.654642223536369e-05, "loss": 0.0903, "step": 4490 }, { "epoch": 0.2661619447566097, "grad_norm": 0.37157154083251953, "learning_rate": 2.6605558840922533e-05, "loss": 0.0803, "step": 4500 }, { "epoch": 0.2667534157449577, "grad_norm": 0.6135340332984924, "learning_rate": 2.6664695446481375e-05, "loss": 0.1034, "step": 4510 }, { "epoch": 0.26734488673330575, "grad_norm": 0.4272502362728119, "learning_rate": 2.672383205204021e-05, "loss": 0.1054, "step": 4520 }, { "epoch": 0.26793635772165375, "grad_norm": 1.873118281364441, "learning_rate": 2.6782968657599053e-05, "loss": 0.1045, "step": 4530 }, { "epoch": 0.26852782871000175, "grad_norm": 0.40855222940444946, "learning_rate": 2.6842105263157896e-05, "loss": 0.0906, "step": 4540 }, { "epoch": 0.2691192996983498, "grad_norm": 2.3345131874084473, "learning_rate": 2.6901241868716738e-05, "loss": 0.0785, "step": 4550 }, { "epoch": 0.2697107706866978, "grad_norm": 0.5167428851127625, "learning_rate": 2.696037847427558e-05, "loss": 0.1054, "step": 4560 }, { "epoch": 0.27030224167504585, "grad_norm": 0.608432412147522, "learning_rate": 2.7019515079834416e-05, "loss": 0.103, "step": 4570 }, { "epoch": 0.27089371266339385, "grad_norm": 0.3851298689842224, "learning_rate": 2.707865168539326e-05, "loss": 0.0933, "step": 4580 }, { "epoch": 0.2714851836517419, "grad_norm": 0.41576334834098816, "learning_rate": 2.71377882909521e-05, "loss": 0.0779, "step": 4590 }, { "epoch": 0.2720766546400899, "grad_norm": 0.9958471655845642, "learning_rate": 2.7196924896510943e-05, "loss": 0.0774, "step": 4600 }, { "epoch": 0.27266812562843795, "grad_norm": 0.5325009226799011, "learning_rate": 2.7256061502069782e-05, "loss": 0.1105, "step": 4610 }, { "epoch": 0.27325959661678595, "grad_norm": 0.4363190233707428, "learning_rate": 2.731519810762862e-05, "loss": 0.1096, "step": 4620 }, { "epoch": 0.27385106760513395, "grad_norm": 0.47465863823890686, "learning_rate": 2.7374334713187463e-05, "loss": 0.0967, "step": 4630 }, { "epoch": 0.274442538593482, "grad_norm": 0.3927568197250366, "learning_rate": 2.7433471318746306e-05, "loss": 0.0777, "step": 4640 }, { "epoch": 0.27503400958183, "grad_norm": 0.6077162027359009, "learning_rate": 2.7492607924305145e-05, "loss": 0.0928, "step": 4650 }, { "epoch": 0.27562548057017805, "grad_norm": 0.5975255966186523, "learning_rate": 2.7551744529863987e-05, "loss": 0.1125, "step": 4660 }, { "epoch": 0.27621695155852605, "grad_norm": 0.6954349875450134, "learning_rate": 2.7610881135422826e-05, "loss": 0.0997, "step": 4670 }, { "epoch": 0.2768084225468741, "grad_norm": 0.41666311025619507, "learning_rate": 2.767001774098167e-05, "loss": 0.0973, "step": 4680 }, { "epoch": 0.2773998935352221, "grad_norm": 0.4235331416130066, "learning_rate": 2.772915434654051e-05, "loss": 0.0805, "step": 4690 }, { "epoch": 0.2779913645235701, "grad_norm": 0.6315481662750244, "learning_rate": 2.778829095209935e-05, "loss": 0.0851, "step": 4700 }, { "epoch": 0.27858283551191815, "grad_norm": 0.5171540975570679, "learning_rate": 2.7847427557658192e-05, "loss": 0.1125, "step": 4710 }, { "epoch": 0.27917430650026615, "grad_norm": 0.7739179730415344, "learning_rate": 2.790656416321703e-05, "loss": 0.1003, "step": 4720 }, { "epoch": 0.2797657774886142, "grad_norm": 0.3568883240222931, "learning_rate": 2.7965700768775874e-05, "loss": 0.091, "step": 4730 }, { "epoch": 0.2803572484769622, "grad_norm": 0.34503424167633057, "learning_rate": 2.8024837374334713e-05, "loss": 0.089, "step": 4740 }, { "epoch": 0.28094871946531025, "grad_norm": 0.3675888776779175, "learning_rate": 2.8083973979893555e-05, "loss": 0.0883, "step": 4750 }, { "epoch": 0.28154019045365825, "grad_norm": 0.5029391646385193, "learning_rate": 2.8143110585452397e-05, "loss": 0.1058, "step": 4760 }, { "epoch": 0.28213166144200624, "grad_norm": 0.6557720899581909, "learning_rate": 2.8202247191011236e-05, "loss": 0.1109, "step": 4770 }, { "epoch": 0.2827231324303543, "grad_norm": 0.42117950320243835, "learning_rate": 2.826138379657008e-05, "loss": 0.0968, "step": 4780 }, { "epoch": 0.2833146034187023, "grad_norm": 1.9978711605072021, "learning_rate": 2.8320520402128918e-05, "loss": 0.0929, "step": 4790 }, { "epoch": 0.28390607440705035, "grad_norm": 0.4729589819908142, "learning_rate": 2.837965700768776e-05, "loss": 0.0768, "step": 4800 }, { "epoch": 0.28449754539539834, "grad_norm": 0.5765406489372253, "learning_rate": 2.8438793613246603e-05, "loss": 0.1051, "step": 4810 }, { "epoch": 0.2850890163837464, "grad_norm": 0.797124981880188, "learning_rate": 2.849793021880544e-05, "loss": 0.1028, "step": 4820 }, { "epoch": 0.2856804873720944, "grad_norm": 0.4349302649497986, "learning_rate": 2.855706682436428e-05, "loss": 0.1035, "step": 4830 }, { "epoch": 0.28627195836044245, "grad_norm": 0.5300995707511902, "learning_rate": 2.8616203429923123e-05, "loss": 0.0899, "step": 4840 }, { "epoch": 0.28686342934879044, "grad_norm": 0.43920189142227173, "learning_rate": 2.8675340035481965e-05, "loss": 0.0854, "step": 4850 }, { "epoch": 0.28745490033713844, "grad_norm": 0.7135322690010071, "learning_rate": 2.8734476641040808e-05, "loss": 0.1139, "step": 4860 }, { "epoch": 0.2880463713254865, "grad_norm": 0.38730135560035706, "learning_rate": 2.8793613246599643e-05, "loss": 0.1082, "step": 4870 }, { "epoch": 0.2886378423138345, "grad_norm": 0.4683602452278137, "learning_rate": 2.8852749852158486e-05, "loss": 0.0973, "step": 4880 }, { "epoch": 0.28922931330218254, "grad_norm": 0.6244880557060242, "learning_rate": 2.8911886457717328e-05, "loss": 0.0919, "step": 4890 }, { "epoch": 0.28982078429053054, "grad_norm": 0.39450111985206604, "learning_rate": 2.897102306327617e-05, "loss": 0.0879, "step": 4900 }, { "epoch": 0.2904122552788786, "grad_norm": 0.5704113841056824, "learning_rate": 2.9030159668835013e-05, "loss": 0.1076, "step": 4910 }, { "epoch": 0.2910037262672266, "grad_norm": 0.5140575170516968, "learning_rate": 2.908929627439385e-05, "loss": 0.1001, "step": 4920 }, { "epoch": 0.2915951972555746, "grad_norm": 0.5829914212226868, "learning_rate": 2.914843287995269e-05, "loss": 0.0839, "step": 4930 }, { "epoch": 0.29218666824392264, "grad_norm": 0.3897274136543274, "learning_rate": 2.9207569485511533e-05, "loss": 0.0818, "step": 4940 }, { "epoch": 0.29277813923227064, "grad_norm": 0.6907403469085693, "learning_rate": 2.9266706091070376e-05, "loss": 0.0942, "step": 4950 }, { "epoch": 0.2933696102206187, "grad_norm": 0.6229063272476196, "learning_rate": 2.932584269662921e-05, "loss": 0.1047, "step": 4960 }, { "epoch": 0.2939610812089667, "grad_norm": 0.6070343255996704, "learning_rate": 2.9384979302188054e-05, "loss": 0.1104, "step": 4970 }, { "epoch": 0.29455255219731474, "grad_norm": 0.47401708364486694, "learning_rate": 2.9444115907746896e-05, "loss": 0.0997, "step": 4980 }, { "epoch": 0.29514402318566274, "grad_norm": 0.4055856764316559, "learning_rate": 2.950325251330574e-05, "loss": 0.0907, "step": 4990 }, { "epoch": 0.29573549417401074, "grad_norm": 0.3559326231479645, "learning_rate": 2.9562389118864577e-05, "loss": 0.0825, "step": 5000 }, { "epoch": 0.2963269651623588, "grad_norm": 0.4869089126586914, "learning_rate": 2.9621525724423416e-05, "loss": 0.0966, "step": 5010 }, { "epoch": 0.2969184361507068, "grad_norm": 0.49755847454071045, "learning_rate": 2.968066232998226e-05, "loss": 0.0978, "step": 5020 }, { "epoch": 0.29750990713905484, "grad_norm": 0.4758862555027008, "learning_rate": 2.97397989355411e-05, "loss": 0.086, "step": 5030 }, { "epoch": 0.29810137812740284, "grad_norm": 0.5203420519828796, "learning_rate": 2.9798935541099943e-05, "loss": 0.0918, "step": 5040 }, { "epoch": 0.2986928491157509, "grad_norm": 0.8956958651542664, "learning_rate": 2.9858072146658782e-05, "loss": 0.0934, "step": 5050 }, { "epoch": 0.2992843201040989, "grad_norm": 0.677950382232666, "learning_rate": 2.991720875221762e-05, "loss": 0.1055, "step": 5060 }, { "epoch": 0.29987579109244694, "grad_norm": 0.483716756105423, "learning_rate": 2.9976345357776464e-05, "loss": 0.1017, "step": 5070 }, { "epoch": 0.30046726208079494, "grad_norm": 0.2728760540485382, "learning_rate": 2.9999999900918698e-05, "loss": 0.0944, "step": 5080 }, { "epoch": 0.30105873306914294, "grad_norm": 0.6176698803901672, "learning_rate": 2.9999999295421864e-05, "loss": 0.0949, "step": 5090 }, { "epoch": 0.301650204057491, "grad_norm": 0.36628374457359314, "learning_rate": 2.9999998139473382e-05, "loss": 0.0764, "step": 5100 }, { "epoch": 0.302241675045839, "grad_norm": 0.6645309925079346, "learning_rate": 2.999999643307329e-05, "loss": 0.1092, "step": 5110 }, { "epoch": 0.30283314603418704, "grad_norm": 0.6050965785980225, "learning_rate": 2.9999994176221662e-05, "loss": 0.1071, "step": 5120 }, { "epoch": 0.30342461702253504, "grad_norm": 0.4314238429069519, "learning_rate": 2.9999991368918572e-05, "loss": 0.0931, "step": 5130 }, { "epoch": 0.3040160880108831, "grad_norm": 0.4195494055747986, "learning_rate": 2.9999988011164123e-05, "loss": 0.0832, "step": 5140 }, { "epoch": 0.3046075589992311, "grad_norm": 1.1032463312149048, "learning_rate": 2.9999984102958444e-05, "loss": 0.0746, "step": 5150 }, { "epoch": 0.3051990299875791, "grad_norm": 0.5741855502128601, "learning_rate": 2.9999979644301676e-05, "loss": 0.1219, "step": 5160 }, { "epoch": 0.30579050097592714, "grad_norm": 0.5436108708381653, "learning_rate": 2.999997463519398e-05, "loss": 0.1135, "step": 5170 }, { "epoch": 0.30638197196427513, "grad_norm": 0.43459030985832214, "learning_rate": 2.9999969075635545e-05, "loss": 0.0849, "step": 5180 }, { "epoch": 0.3069734429526232, "grad_norm": 0.43470853567123413, "learning_rate": 2.9999962965626572e-05, "loss": 0.092, "step": 5190 }, { "epoch": 0.3075649139409712, "grad_norm": 0.41236287355422974, "learning_rate": 2.9999956305167282e-05, "loss": 0.0778, "step": 5200 }, { "epoch": 0.30815638492931924, "grad_norm": 0.49586328864097595, "learning_rate": 2.9999949094257926e-05, "loss": 0.1002, "step": 5210 }, { "epoch": 0.30874785591766724, "grad_norm": 0.44783398509025574, "learning_rate": 2.999994133289876e-05, "loss": 0.1021, "step": 5220 }, { "epoch": 0.30933932690601523, "grad_norm": 0.2820966839790344, "learning_rate": 2.9999933021090074e-05, "loss": 0.0883, "step": 5230 }, { "epoch": 0.3099307978943633, "grad_norm": 0.637581467628479, "learning_rate": 2.999992415883218e-05, "loss": 0.0887, "step": 5240 }, { "epoch": 0.3105222688827113, "grad_norm": 0.8021873831748962, "learning_rate": 2.999991474612539e-05, "loss": 0.084, "step": 5250 }, { "epoch": 0.31111373987105934, "grad_norm": 0.3142925202846527, "learning_rate": 2.999990478297006e-05, "loss": 0.104, "step": 5260 }, { "epoch": 0.31170521085940733, "grad_norm": 0.48665565252304077, "learning_rate": 2.9999894269366544e-05, "loss": 0.0945, "step": 5270 }, { "epoch": 0.3122966818477554, "grad_norm": 0.33794817328453064, "learning_rate": 2.9999883205315244e-05, "loss": 0.0965, "step": 5280 }, { "epoch": 0.3128881528361034, "grad_norm": 0.30997908115386963, "learning_rate": 2.999987159081655e-05, "loss": 0.0837, "step": 5290 }, { "epoch": 0.31347962382445144, "grad_norm": 0.5421434640884399, "learning_rate": 2.99998594258709e-05, "loss": 0.0735, "step": 5300 }, { "epoch": 0.31407109481279943, "grad_norm": 0.4763562083244324, "learning_rate": 2.999984671047873e-05, "loss": 0.1027, "step": 5310 }, { "epoch": 0.31466256580114743, "grad_norm": 0.4435054659843445, "learning_rate": 2.9999833444640517e-05, "loss": 0.1023, "step": 5320 }, { "epoch": 0.3152540367894955, "grad_norm": 0.518551766872406, "learning_rate": 2.9999819628356747e-05, "loss": 0.0944, "step": 5330 }, { "epoch": 0.3158455077778435, "grad_norm": 0.3684108257293701, "learning_rate": 2.9999805261627912e-05, "loss": 0.0841, "step": 5340 }, { "epoch": 0.31643697876619153, "grad_norm": 0.4948899447917938, "learning_rate": 2.999979034445456e-05, "loss": 0.0872, "step": 5350 }, { "epoch": 0.31702844975453953, "grad_norm": 0.5961499214172363, "learning_rate": 2.9999774876837224e-05, "loss": 0.1218, "step": 5360 }, { "epoch": 0.3176199207428876, "grad_norm": 0.32722043991088867, "learning_rate": 2.999975885877648e-05, "loss": 0.1019, "step": 5370 }, { "epoch": 0.3182113917312356, "grad_norm": 0.605175256729126, "learning_rate": 2.999974229027291e-05, "loss": 0.1046, "step": 5380 }, { "epoch": 0.3188028627195836, "grad_norm": 0.396962434053421, "learning_rate": 2.999972517132712e-05, "loss": 0.088, "step": 5390 }, { "epoch": 0.31939433370793163, "grad_norm": 0.8200657963752747, "learning_rate": 2.9999707501939747e-05, "loss": 0.0726, "step": 5400 }, { "epoch": 0.31998580469627963, "grad_norm": 0.4743416905403137, "learning_rate": 2.9999689282111435e-05, "loss": 0.1058, "step": 5410 }, { "epoch": 0.3205772756846277, "grad_norm": 0.3738054037094116, "learning_rate": 2.9999670511842848e-05, "loss": 0.0919, "step": 5420 }, { "epoch": 0.3211687466729757, "grad_norm": 0.587361752986908, "learning_rate": 2.9999651191134685e-05, "loss": 0.0858, "step": 5430 }, { "epoch": 0.32176021766132373, "grad_norm": 0.8471593260765076, "learning_rate": 2.999963131998765e-05, "loss": 0.0907, "step": 5440 }, { "epoch": 0.32235168864967173, "grad_norm": 0.3890211284160614, "learning_rate": 2.9999610898402464e-05, "loss": 0.085, "step": 5450 }, { "epoch": 0.32294315963801973, "grad_norm": 0.6676970720291138, "learning_rate": 2.999958992637989e-05, "loss": 0.1145, "step": 5460 }, { "epoch": 0.3235346306263678, "grad_norm": 0.5396118760108948, "learning_rate": 2.9999568403920687e-05, "loss": 0.103, "step": 5470 }, { "epoch": 0.3241261016147158, "grad_norm": 0.6588020324707031, "learning_rate": 2.9999546331025655e-05, "loss": 0.0847, "step": 5480 }, { "epoch": 0.32471757260306383, "grad_norm": 0.48856842517852783, "learning_rate": 2.999952370769559e-05, "loss": 0.0959, "step": 5490 }, { "epoch": 0.32530904359141183, "grad_norm": 0.2437646985054016, "learning_rate": 2.9999500533931338e-05, "loss": 0.0903, "step": 5500 }, { "epoch": 0.3259005145797599, "grad_norm": 1.331642508506775, "learning_rate": 2.999947680973374e-05, "loss": 0.1052, "step": 5510 }, { "epoch": 0.3264919855681079, "grad_norm": 0.4825616180896759, "learning_rate": 2.9999452535103667e-05, "loss": 0.1056, "step": 5520 }, { "epoch": 0.32708345655645593, "grad_norm": 0.4749957323074341, "learning_rate": 2.999942771004201e-05, "loss": 0.0863, "step": 5530 }, { "epoch": 0.32767492754480393, "grad_norm": 0.5640983581542969, "learning_rate": 2.9999402334549684e-05, "loss": 0.0932, "step": 5540 }, { "epoch": 0.3282663985331519, "grad_norm": 0.394968181848526, "learning_rate": 2.9999376408627617e-05, "loss": 0.0804, "step": 5550 }, { "epoch": 0.3288578695215, "grad_norm": 2.0595619678497314, "learning_rate": 2.9999349932276764e-05, "loss": 0.1102, "step": 5560 }, { "epoch": 0.329449340509848, "grad_norm": 0.763821005821228, "learning_rate": 2.9999322905498088e-05, "loss": 0.1038, "step": 5570 }, { "epoch": 0.33004081149819603, "grad_norm": 0.7225002646446228, "learning_rate": 2.9999295328292588e-05, "loss": 0.1037, "step": 5580 }, { "epoch": 0.330632282486544, "grad_norm": 0.3952047824859619, "learning_rate": 2.999926720066127e-05, "loss": 0.0863, "step": 5590 }, { "epoch": 0.3312237534748921, "grad_norm": 0.4874582886695862, "learning_rate": 2.999923852260518e-05, "loss": 0.0811, "step": 5600 }, { "epoch": 0.3318152244632401, "grad_norm": 0.4852536618709564, "learning_rate": 2.999920929412535e-05, "loss": 0.0936, "step": 5610 }, { "epoch": 0.3324066954515881, "grad_norm": 0.546252965927124, "learning_rate": 2.9999179515222876e-05, "loss": 0.1041, "step": 5620 }, { "epoch": 0.3329981664399361, "grad_norm": 0.4255697727203369, "learning_rate": 2.999914918589883e-05, "loss": 0.0893, "step": 5630 }, { "epoch": 0.3335896374282841, "grad_norm": 0.296194851398468, "learning_rate": 2.999911830615433e-05, "loss": 0.0867, "step": 5640 }, { "epoch": 0.3341811084166322, "grad_norm": 0.7092667818069458, "learning_rate": 2.9999086875990517e-05, "loss": 0.0752, "step": 5650 }, { "epoch": 0.3347725794049802, "grad_norm": 0.5450347661972046, "learning_rate": 2.999905489540854e-05, "loss": 0.1014, "step": 5660 }, { "epoch": 0.33536405039332823, "grad_norm": 0.43773722648620605, "learning_rate": 2.999902236440957e-05, "loss": 0.0916, "step": 5670 }, { "epoch": 0.3359555213816762, "grad_norm": 0.44019246101379395, "learning_rate": 2.9998989282994804e-05, "loss": 0.0916, "step": 5680 }, { "epoch": 0.3365469923700242, "grad_norm": 0.6297785043716431, "learning_rate": 2.9998955651165457e-05, "loss": 0.0868, "step": 5690 }, { "epoch": 0.3371384633583723, "grad_norm": 0.3961608409881592, "learning_rate": 2.9998921468922758e-05, "loss": 0.082, "step": 5700 }, { "epoch": 0.3377299343467203, "grad_norm": 0.6609804630279541, "learning_rate": 2.999888673626797e-05, "loss": 0.1097, "step": 5710 }, { "epoch": 0.3383214053350683, "grad_norm": 0.43394920229911804, "learning_rate": 2.9998851453202357e-05, "loss": 0.0915, "step": 5720 }, { "epoch": 0.3389128763234163, "grad_norm": 0.5366097688674927, "learning_rate": 2.9998815619727223e-05, "loss": 0.1026, "step": 5730 }, { "epoch": 0.3395043473117644, "grad_norm": 0.5314561724662781, "learning_rate": 2.9998779235843877e-05, "loss": 0.0816, "step": 5740 }, { "epoch": 0.3400958183001124, "grad_norm": 0.8055298328399658, "learning_rate": 2.999874230155366e-05, "loss": 0.0861, "step": 5750 }, { "epoch": 0.3406872892884604, "grad_norm": 0.4343501627445221, "learning_rate": 2.999870481685792e-05, "loss": 0.1092, "step": 5760 }, { "epoch": 0.3412787602768084, "grad_norm": 0.5138716697692871, "learning_rate": 2.9998666781758032e-05, "loss": 0.0996, "step": 5770 }, { "epoch": 0.3418702312651564, "grad_norm": 0.41791480779647827, "learning_rate": 2.9998628196255404e-05, "loss": 0.0835, "step": 5780 }, { "epoch": 0.3424617022535045, "grad_norm": 0.5399900078773499, "learning_rate": 2.9998589060351437e-05, "loss": 0.0947, "step": 5790 }, { "epoch": 0.34305317324185247, "grad_norm": 0.4277154505252838, "learning_rate": 2.9998549374047576e-05, "loss": 0.0693, "step": 5800 }, { "epoch": 0.3436446442302005, "grad_norm": 0.57532799243927, "learning_rate": 2.9998509137345273e-05, "loss": 0.1198, "step": 5810 }, { "epoch": 0.3442361152185485, "grad_norm": 0.6615387201309204, "learning_rate": 2.9998468350246013e-05, "loss": 0.1089, "step": 5820 }, { "epoch": 0.3448275862068966, "grad_norm": 0.39803552627563477, "learning_rate": 2.9998427012751283e-05, "loss": 0.0969, "step": 5830 }, { "epoch": 0.34541905719524457, "grad_norm": 0.4156382977962494, "learning_rate": 2.99983851248626e-05, "loss": 0.0859, "step": 5840 }, { "epoch": 0.34601052818359257, "grad_norm": 0.3197266161441803, "learning_rate": 2.9998342686581514e-05, "loss": 0.0705, "step": 5850 }, { "epoch": 0.3466019991719406, "grad_norm": 0.530516505241394, "learning_rate": 2.9998299697909562e-05, "loss": 0.1112, "step": 5860 }, { "epoch": 0.3471934701602886, "grad_norm": 0.5574976205825806, "learning_rate": 2.999825615884834e-05, "loss": 0.1185, "step": 5870 }, { "epoch": 0.3477849411486367, "grad_norm": 0.565134584903717, "learning_rate": 2.999821206939944e-05, "loss": 0.0958, "step": 5880 }, { "epoch": 0.34837641213698467, "grad_norm": 0.44386711716651917, "learning_rate": 2.9998167429564475e-05, "loss": 0.0901, "step": 5890 }, { "epoch": 0.3489678831253327, "grad_norm": 0.35916373133659363, "learning_rate": 2.9998122239345087e-05, "loss": 0.0765, "step": 5900 }, { "epoch": 0.3495593541136807, "grad_norm": 0.5703742504119873, "learning_rate": 2.9998076498742934e-05, "loss": 0.1053, "step": 5910 }, { "epoch": 0.3501508251020288, "grad_norm": 0.6213458776473999, "learning_rate": 2.9998030207759694e-05, "loss": 0.0944, "step": 5920 }, { "epoch": 0.35074229609037677, "grad_norm": 0.395303338766098, "learning_rate": 2.9997983366397068e-05, "loss": 0.0906, "step": 5930 }, { "epoch": 0.35133376707872477, "grad_norm": 0.4161030948162079, "learning_rate": 2.999793597465677e-05, "loss": 0.0832, "step": 5940 }, { "epoch": 0.3519252380670728, "grad_norm": 1.2720222473144531, "learning_rate": 2.999788803254054e-05, "loss": 0.0706, "step": 5950 }, { "epoch": 0.3525167090554208, "grad_norm": 0.4545113444328308, "learning_rate": 2.999783954005014e-05, "loss": 0.1034, "step": 5960 }, { "epoch": 0.35310818004376887, "grad_norm": 0.37914690375328064, "learning_rate": 2.9997790497187356e-05, "loss": 0.1074, "step": 5970 }, { "epoch": 0.35369965103211687, "grad_norm": 0.29220882058143616, "learning_rate": 2.9997740903953977e-05, "loss": 0.0928, "step": 5980 }, { "epoch": 0.3542911220204649, "grad_norm": 0.44229111075401306, "learning_rate": 2.9997690760351822e-05, "loss": 0.0886, "step": 5990 }, { "epoch": 0.3548825930088129, "grad_norm": 0.5695660710334778, "learning_rate": 2.999764006638274e-05, "loss": 0.0795, "step": 6000 }, { "epoch": 0.3554740639971609, "grad_norm": 0.5406869053840637, "learning_rate": 2.999758882204859e-05, "loss": 0.0958, "step": 6010 }, { "epoch": 0.35606553498550897, "grad_norm": 0.36774879693984985, "learning_rate": 2.9997537027351243e-05, "loss": 0.0903, "step": 6020 }, { "epoch": 0.35665700597385697, "grad_norm": 0.42426082491874695, "learning_rate": 2.999748468229261e-05, "loss": 0.0898, "step": 6030 }, { "epoch": 0.357248476962205, "grad_norm": 0.40181443095207214, "learning_rate": 2.9997431786874607e-05, "loss": 0.0885, "step": 6040 }, { "epoch": 0.357839947950553, "grad_norm": 0.41118308901786804, "learning_rate": 2.9997378341099177e-05, "loss": 0.0834, "step": 6050 }, { "epoch": 0.35843141893890107, "grad_norm": 0.46172159910202026, "learning_rate": 2.9997324344968277e-05, "loss": 0.103, "step": 6060 }, { "epoch": 0.35902288992724907, "grad_norm": 0.29163599014282227, "learning_rate": 2.9997269798483895e-05, "loss": 0.0948, "step": 6070 }, { "epoch": 0.35961436091559706, "grad_norm": 0.5637189149856567, "learning_rate": 2.9997214701648032e-05, "loss": 0.0906, "step": 6080 }, { "epoch": 0.3602058319039451, "grad_norm": 0.37268707156181335, "learning_rate": 2.9997159054462703e-05, "loss": 0.0823, "step": 6090 }, { "epoch": 0.3607973028922931, "grad_norm": 0.4051815867424011, "learning_rate": 2.9997102856929958e-05, "loss": 0.0758, "step": 6100 }, { "epoch": 0.36138877388064117, "grad_norm": 1.0991637706756592, "learning_rate": 2.9997046109051853e-05, "loss": 0.1084, "step": 6110 }, { "epoch": 0.36198024486898916, "grad_norm": 0.3870258629322052, "learning_rate": 2.9996988810830476e-05, "loss": 0.0995, "step": 6120 }, { "epoch": 0.3625717158573372, "grad_norm": 0.3901376724243164, "learning_rate": 2.9996930962267923e-05, "loss": 0.0917, "step": 6130 }, { "epoch": 0.3631631868456852, "grad_norm": 0.3451939523220062, "learning_rate": 2.9996872563366327e-05, "loss": 0.079, "step": 6140 }, { "epoch": 0.36375465783403327, "grad_norm": 0.5886303782463074, "learning_rate": 2.999681361412782e-05, "loss": 0.0923, "step": 6150 }, { "epoch": 0.36434612882238127, "grad_norm": 1.176507830619812, "learning_rate": 2.9996754114554574e-05, "loss": 0.1, "step": 6160 }, { "epoch": 0.36493759981072926, "grad_norm": 0.4554236829280853, "learning_rate": 2.9996694064648764e-05, "loss": 0.0936, "step": 6170 }, { "epoch": 0.3655290707990773, "grad_norm": 0.3519240915775299, "learning_rate": 2.99966334644126e-05, "loss": 0.0933, "step": 6180 }, { "epoch": 0.3661205417874253, "grad_norm": 0.4676471948623657, "learning_rate": 2.9996572313848306e-05, "loss": 0.0884, "step": 6190 }, { "epoch": 0.36671201277577337, "grad_norm": 0.5607316493988037, "learning_rate": 2.9996510612958124e-05, "loss": 0.0794, "step": 6200 }, { "epoch": 0.36730348376412136, "grad_norm": 0.3461562395095825, "learning_rate": 2.9996448361744313e-05, "loss": 0.1178, "step": 6210 }, { "epoch": 0.3678949547524694, "grad_norm": 0.4694616496562958, "learning_rate": 2.999638556020917e-05, "loss": 0.0976, "step": 6220 }, { "epoch": 0.3684864257408174, "grad_norm": 0.3697315454483032, "learning_rate": 2.9996322208354987e-05, "loss": 0.0997, "step": 6230 }, { "epoch": 0.3690778967291654, "grad_norm": 0.41962745785713196, "learning_rate": 2.9996258306184095e-05, "loss": 0.0946, "step": 6240 }, { "epoch": 0.36966936771751346, "grad_norm": 0.6190120577812195, "learning_rate": 2.9996193853698843e-05, "loss": 0.071, "step": 6250 }, { "epoch": 0.37026083870586146, "grad_norm": 0.4183938801288605, "learning_rate": 2.9996128850901586e-05, "loss": 0.1028, "step": 6260 }, { "epoch": 0.3708523096942095, "grad_norm": 0.3879983425140381, "learning_rate": 2.9996063297794715e-05, "loss": 0.0975, "step": 6270 }, { "epoch": 0.3714437806825575, "grad_norm": 0.39678114652633667, "learning_rate": 2.9995997194380638e-05, "loss": 0.0827, "step": 6280 }, { "epoch": 0.37203525167090556, "grad_norm": 0.2913842797279358, "learning_rate": 2.9995930540661778e-05, "loss": 0.0825, "step": 6290 }, { "epoch": 0.37262672265925356, "grad_norm": 1.6328439712524414, "learning_rate": 2.9995863336640577e-05, "loss": 0.0718, "step": 6300 }, { "epoch": 0.37321819364760156, "grad_norm": 0.4258565306663513, "learning_rate": 2.9995795582319513e-05, "loss": 0.094, "step": 6310 }, { "epoch": 0.3738096646359496, "grad_norm": 0.41400542855262756, "learning_rate": 2.9995727277701058e-05, "loss": 0.1118, "step": 6320 }, { "epoch": 0.3744011356242976, "grad_norm": 0.43831950426101685, "learning_rate": 2.9995658422787727e-05, "loss": 0.1006, "step": 6330 }, { "epoch": 0.37499260661264566, "grad_norm": 0.6847688555717468, "learning_rate": 2.9995589017582044e-05, "loss": 0.0847, "step": 6340 }, { "epoch": 0.37558407760099366, "grad_norm": 0.3831956088542938, "learning_rate": 2.9995519062086556e-05, "loss": 0.079, "step": 6350 }, { "epoch": 0.3761755485893417, "grad_norm": 0.6240316033363342, "learning_rate": 2.9995448556303835e-05, "loss": 0.1062, "step": 6360 }, { "epoch": 0.3767670195776897, "grad_norm": 0.4992639422416687, "learning_rate": 2.999537750023646e-05, "loss": 0.1058, "step": 6370 }, { "epoch": 0.37735849056603776, "grad_norm": 0.4374285042285919, "learning_rate": 2.999530589388705e-05, "loss": 0.0905, "step": 6380 }, { "epoch": 0.37794996155438576, "grad_norm": 0.4452453851699829, "learning_rate": 2.9995233737258217e-05, "loss": 0.0782, "step": 6390 }, { "epoch": 0.37854143254273376, "grad_norm": 0.4651772379875183, "learning_rate": 2.999516103035262e-05, "loss": 0.0736, "step": 6400 }, { "epoch": 0.3791329035310818, "grad_norm": 0.40845414996147156, "learning_rate": 2.9995087773172926e-05, "loss": 0.1063, "step": 6410 }, { "epoch": 0.3797243745194298, "grad_norm": 0.39161843061447144, "learning_rate": 2.999501396572182e-05, "loss": 0.087, "step": 6420 }, { "epoch": 0.38031584550777786, "grad_norm": 0.4753105342388153, "learning_rate": 2.9994939608002018e-05, "loss": 0.0956, "step": 6430 }, { "epoch": 0.38090731649612586, "grad_norm": 0.4397672116756439, "learning_rate": 2.999486470001624e-05, "loss": 0.0952, "step": 6440 }, { "epoch": 0.3814987874844739, "grad_norm": 2.445711135864258, "learning_rate": 2.9994789241767235e-05, "loss": 0.0777, "step": 6450 }, { "epoch": 0.3820902584728219, "grad_norm": 0.3835374712944031, "learning_rate": 2.9994713233257774e-05, "loss": 0.1028, "step": 6460 }, { "epoch": 0.3826817294611699, "grad_norm": 0.309417724609375, "learning_rate": 2.9994636674490656e-05, "loss": 0.0967, "step": 6470 }, { "epoch": 0.38327320044951796, "grad_norm": 0.49841833114624023, "learning_rate": 2.9994559565468676e-05, "loss": 0.0963, "step": 6480 }, { "epoch": 0.38386467143786596, "grad_norm": 0.6668940186500549, "learning_rate": 2.9994481906194672e-05, "loss": 0.0928, "step": 6490 }, { "epoch": 0.384456142426214, "grad_norm": 0.3276020586490631, "learning_rate": 2.9994403696671487e-05, "loss": 0.0748, "step": 6500 }, { "epoch": 0.385047613414562, "grad_norm": 0.5966588854789734, "learning_rate": 2.9994324936901995e-05, "loss": 0.1132, "step": 6510 }, { "epoch": 0.38563908440291006, "grad_norm": 0.8177466988563538, "learning_rate": 2.999424562688909e-05, "loss": 0.0921, "step": 6520 }, { "epoch": 0.38623055539125806, "grad_norm": 0.5286778211593628, "learning_rate": 2.999416576663568e-05, "loss": 0.1016, "step": 6530 }, { "epoch": 0.38682202637960605, "grad_norm": 0.41721311211586, "learning_rate": 2.9994085356144693e-05, "loss": 0.0785, "step": 6540 }, { "epoch": 0.3874134973679541, "grad_norm": 0.7414016723632812, "learning_rate": 2.9994004395419085e-05, "loss": 0.0842, "step": 6550 }, { "epoch": 0.3880049683563021, "grad_norm": 2.0572123527526855, "learning_rate": 2.9993922884461816e-05, "loss": 0.1206, "step": 6560 }, { "epoch": 0.38859643934465016, "grad_norm": 0.39591169357299805, "learning_rate": 2.9993840823275886e-05, "loss": 0.0973, "step": 6570 }, { "epoch": 0.38918791033299815, "grad_norm": 0.40155473351478577, "learning_rate": 2.9993758211864308e-05, "loss": 0.0948, "step": 6580 }, { "epoch": 0.3897793813213462, "grad_norm": 0.7107874751091003, "learning_rate": 2.999367505023011e-05, "loss": 0.0817, "step": 6590 }, { "epoch": 0.3903708523096942, "grad_norm": 0.3950653374195099, "learning_rate": 2.9993591338376346e-05, "loss": 0.0774, "step": 6600 }, { "epoch": 0.39096232329804226, "grad_norm": 0.4582042098045349, "learning_rate": 2.9993507076306087e-05, "loss": 0.1034, "step": 6610 }, { "epoch": 0.39155379428639026, "grad_norm": 0.37158411741256714, "learning_rate": 2.999342226402242e-05, "loss": 0.1079, "step": 6620 }, { "epoch": 0.39214526527473825, "grad_norm": 0.4069949686527252, "learning_rate": 2.9993336901528463e-05, "loss": 0.0877, "step": 6630 }, { "epoch": 0.3927367362630863, "grad_norm": 0.4168596863746643, "learning_rate": 2.999325098882735e-05, "loss": 0.0871, "step": 6640 }, { "epoch": 0.3933282072514343, "grad_norm": 0.4412901997566223, "learning_rate": 2.9993164525922226e-05, "loss": 0.0852, "step": 6650 }, { "epoch": 0.39391967823978236, "grad_norm": 0.408365935087204, "learning_rate": 2.9993077512816274e-05, "loss": 0.1016, "step": 6660 }, { "epoch": 0.39451114922813035, "grad_norm": 0.3175400197505951, "learning_rate": 2.999298994951268e-05, "loss": 0.0984, "step": 6670 }, { "epoch": 0.3951026202164784, "grad_norm": 0.26326173543930054, "learning_rate": 2.999290183601466e-05, "loss": 0.0799, "step": 6680 }, { "epoch": 0.3956940912048264, "grad_norm": 0.30345723032951355, "learning_rate": 2.9992813172325445e-05, "loss": 0.0831, "step": 6690 }, { "epoch": 0.3962855621931744, "grad_norm": 1.189917802810669, "learning_rate": 2.999272395844829e-05, "loss": 0.0749, "step": 6700 }, { "epoch": 0.39687703318152245, "grad_norm": 0.48308447003364563, "learning_rate": 2.999263419438647e-05, "loss": 0.1083, "step": 6710 }, { "epoch": 0.39746850416987045, "grad_norm": 0.4937615990638733, "learning_rate": 2.999254388014328e-05, "loss": 0.0953, "step": 6720 }, { "epoch": 0.3980599751582185, "grad_norm": 0.38444623351097107, "learning_rate": 2.999245301572203e-05, "loss": 0.0891, "step": 6730 }, { "epoch": 0.3986514461465665, "grad_norm": 0.3206271827220917, "learning_rate": 2.999236160112606e-05, "loss": 0.0783, "step": 6740 }, { "epoch": 0.39924291713491455, "grad_norm": 0.4873577952384949, "learning_rate": 2.9992269636358722e-05, "loss": 0.0796, "step": 6750 }, { "epoch": 0.39983438812326255, "grad_norm": 0.6545272469520569, "learning_rate": 2.999217712142339e-05, "loss": 0.1077, "step": 6760 }, { "epoch": 0.40042585911161055, "grad_norm": 0.5153070688247681, "learning_rate": 2.9992084056323456e-05, "loss": 0.1006, "step": 6770 }, { "epoch": 0.4010173300999586, "grad_norm": 0.36097973585128784, "learning_rate": 2.999199044106234e-05, "loss": 0.0845, "step": 6780 }, { "epoch": 0.4016088010883066, "grad_norm": 0.4028223752975464, "learning_rate": 2.9991896275643477e-05, "loss": 0.0983, "step": 6790 }, { "epoch": 0.40220027207665465, "grad_norm": 0.4205327332019806, "learning_rate": 2.9991801560070324e-05, "loss": 0.0712, "step": 6800 }, { "epoch": 0.40279174306500265, "grad_norm": 0.3164220452308655, "learning_rate": 2.999170629434635e-05, "loss": 0.1077, "step": 6810 }, { "epoch": 0.4033832140533507, "grad_norm": 0.3586147725582123, "learning_rate": 2.999161047847506e-05, "loss": 0.0968, "step": 6820 }, { "epoch": 0.4039746850416987, "grad_norm": 0.4027222692966461, "learning_rate": 2.9991514112459962e-05, "loss": 0.089, "step": 6830 }, { "epoch": 0.40456615603004675, "grad_norm": 0.39066195487976074, "learning_rate": 2.9991417196304597e-05, "loss": 0.0889, "step": 6840 }, { "epoch": 0.40515762701839475, "grad_norm": 0.3797062933444977, "learning_rate": 2.999131973001252e-05, "loss": 0.0698, "step": 6850 }, { "epoch": 0.40574909800674275, "grad_norm": 0.5629835724830627, "learning_rate": 2.9991221713587307e-05, "loss": 0.1094, "step": 6860 }, { "epoch": 0.4063405689950908, "grad_norm": 0.35187312960624695, "learning_rate": 2.999112314703256e-05, "loss": 0.0911, "step": 6870 }, { "epoch": 0.4069320399834388, "grad_norm": 0.4240199327468872, "learning_rate": 2.9991024030351888e-05, "loss": 0.0853, "step": 6880 }, { "epoch": 0.40752351097178685, "grad_norm": 0.43752628564834595, "learning_rate": 2.9990924363548933e-05, "loss": 0.0841, "step": 6890 }, { "epoch": 0.40811498196013485, "grad_norm": 0.8664826154708862, "learning_rate": 2.9990824146627353e-05, "loss": 0.0767, "step": 6900 }, { "epoch": 0.4087064529484829, "grad_norm": 0.6195040345191956, "learning_rate": 2.999072337959082e-05, "loss": 0.0963, "step": 6910 }, { "epoch": 0.4092979239368309, "grad_norm": 0.3410938084125519, "learning_rate": 2.999062206244304e-05, "loss": 0.1011, "step": 6920 }, { "epoch": 0.4098893949251789, "grad_norm": 0.3505179286003113, "learning_rate": 2.9990520195187726e-05, "loss": 0.0924, "step": 6930 }, { "epoch": 0.41048086591352695, "grad_norm": 0.4072246253490448, "learning_rate": 2.9990417777828617e-05, "loss": 0.0683, "step": 6940 }, { "epoch": 0.41107233690187495, "grad_norm": 0.5128532648086548, "learning_rate": 2.9990314810369473e-05, "loss": 0.085, "step": 6950 }, { "epoch": 0.411663807890223, "grad_norm": 0.43681731820106506, "learning_rate": 2.9990211292814072e-05, "loss": 0.0985, "step": 6960 }, { "epoch": 0.412255278878571, "grad_norm": 0.4175565540790558, "learning_rate": 2.9990107225166208e-05, "loss": 0.0994, "step": 6970 }, { "epoch": 0.41284674986691905, "grad_norm": 0.29840245842933655, "learning_rate": 2.9990002607429706e-05, "loss": 0.0919, "step": 6980 }, { "epoch": 0.41343822085526705, "grad_norm": 0.43594175577163696, "learning_rate": 2.9989897439608406e-05, "loss": 0.0851, "step": 6990 }, { "epoch": 0.41402969184361504, "grad_norm": 0.6718905568122864, "learning_rate": 2.9989791721706163e-05, "loss": 0.0761, "step": 7000 }, { "epoch": 0.4146211628319631, "grad_norm": 0.4135831296443939, "learning_rate": 2.9989685453726853e-05, "loss": 0.0947, "step": 7010 }, { "epoch": 0.4152126338203111, "grad_norm": 0.3237605690956116, "learning_rate": 2.998957863567439e-05, "loss": 0.1029, "step": 7020 }, { "epoch": 0.41580410480865915, "grad_norm": 0.42007118463516235, "learning_rate": 2.998947126755268e-05, "loss": 0.0867, "step": 7030 }, { "epoch": 0.41639557579700714, "grad_norm": 0.2662273943424225, "learning_rate": 2.9989363349365664e-05, "loss": 0.0844, "step": 7040 }, { "epoch": 0.4169870467853552, "grad_norm": 0.309138685464859, "learning_rate": 2.9989254881117308e-05, "loss": 0.0734, "step": 7050 }, { "epoch": 0.4175785177737032, "grad_norm": 0.8333616852760315, "learning_rate": 2.9989145862811593e-05, "loss": 0.0908, "step": 7060 }, { "epoch": 0.41816998876205125, "grad_norm": 0.525898814201355, "learning_rate": 2.9989036294452514e-05, "loss": 0.0962, "step": 7070 }, { "epoch": 0.41876145975039925, "grad_norm": 0.37844234704971313, "learning_rate": 2.9988926176044097e-05, "loss": 0.0958, "step": 7080 }, { "epoch": 0.41935293073874724, "grad_norm": 0.3182623088359833, "learning_rate": 2.998881550759038e-05, "loss": 0.0949, "step": 7090 }, { "epoch": 0.4199444017270953, "grad_norm": 0.36062946915626526, "learning_rate": 2.9988704289095425e-05, "loss": 0.0776, "step": 7100 }, { "epoch": 0.4205358727154433, "grad_norm": 0.5168910026550293, "learning_rate": 2.998859252056331e-05, "loss": 0.108, "step": 7110 }, { "epoch": 0.42112734370379135, "grad_norm": 0.47972092032432556, "learning_rate": 2.9988480201998145e-05, "loss": 0.1, "step": 7120 }, { "epoch": 0.42171881469213934, "grad_norm": 0.4920753240585327, "learning_rate": 2.9988367333404042e-05, "loss": 0.103, "step": 7130 }, { "epoch": 0.4223102856804874, "grad_norm": 0.4874896705150604, "learning_rate": 2.998825391478515e-05, "loss": 0.0988, "step": 7140 }, { "epoch": 0.4229017566688354, "grad_norm": 0.5665042400360107, "learning_rate": 2.9988139946145626e-05, "loss": 0.0779, "step": 7150 }, { "epoch": 0.4234932276571834, "grad_norm": 0.3206358253955841, "learning_rate": 2.9988025427489658e-05, "loss": 0.0886, "step": 7160 }, { "epoch": 0.42408469864553144, "grad_norm": 0.4694593846797943, "learning_rate": 2.9987910358821444e-05, "loss": 0.0976, "step": 7170 }, { "epoch": 0.42467616963387944, "grad_norm": 0.45653003454208374, "learning_rate": 2.998779474014521e-05, "loss": 0.1024, "step": 7180 }, { "epoch": 0.4252676406222275, "grad_norm": 0.27198299765586853, "learning_rate": 2.9987678571465195e-05, "loss": 0.0831, "step": 7190 }, { "epoch": 0.4258591116105755, "grad_norm": 0.486351877450943, "learning_rate": 2.9987561852785664e-05, "loss": 0.0783, "step": 7200 }, { "epoch": 0.42645058259892354, "grad_norm": 0.46887484192848206, "learning_rate": 2.9987444584110895e-05, "loss": 0.1042, "step": 7210 }, { "epoch": 0.42704205358727154, "grad_norm": 0.5453546643257141, "learning_rate": 2.9987326765445203e-05, "loss": 0.0992, "step": 7220 }, { "epoch": 0.42763352457561954, "grad_norm": 0.4675035774707794, "learning_rate": 2.9987208396792904e-05, "loss": 0.1069, "step": 7230 }, { "epoch": 0.4282249955639676, "grad_norm": 0.33599647879600525, "learning_rate": 2.9987089478158338e-05, "loss": 0.0823, "step": 7240 }, { "epoch": 0.4288164665523156, "grad_norm": 0.37970489263534546, "learning_rate": 2.998697000954588e-05, "loss": 0.0715, "step": 7250 }, { "epoch": 0.42940793754066364, "grad_norm": 0.35508739948272705, "learning_rate": 2.9986849990959904e-05, "loss": 0.0936, "step": 7260 }, { "epoch": 0.42999940852901164, "grad_norm": 0.580111026763916, "learning_rate": 2.998672942240482e-05, "loss": 0.0922, "step": 7270 }, { "epoch": 0.4305908795173597, "grad_norm": 0.5764631628990173, "learning_rate": 2.9986608303885047e-05, "loss": 0.0907, "step": 7280 }, { "epoch": 0.4311823505057077, "grad_norm": 0.36012327671051025, "learning_rate": 2.9986486635405035e-05, "loss": 0.0902, "step": 7290 }, { "epoch": 0.43177382149405574, "grad_norm": 0.3487236201763153, "learning_rate": 2.998636441696925e-05, "loss": 0.0788, "step": 7300 }, { "epoch": 0.43236529248240374, "grad_norm": 0.34666040539741516, "learning_rate": 2.9986241648582167e-05, "loss": 0.1053, "step": 7310 }, { "epoch": 0.43295676347075174, "grad_norm": 0.30986103415489197, "learning_rate": 2.9986118330248304e-05, "loss": 0.0901, "step": 7320 }, { "epoch": 0.4335482344590998, "grad_norm": 0.32929205894470215, "learning_rate": 2.998599446197218e-05, "loss": 0.0908, "step": 7330 }, { "epoch": 0.4341397054474478, "grad_norm": 0.37751081585884094, "learning_rate": 2.998587004375834e-05, "loss": 0.0943, "step": 7340 }, { "epoch": 0.43473117643579584, "grad_norm": 0.28586241602897644, "learning_rate": 2.998574507561135e-05, "loss": 0.0706, "step": 7350 }, { "epoch": 0.43532264742414384, "grad_norm": 0.4068446457386017, "learning_rate": 2.9985619557535797e-05, "loss": 0.1066, "step": 7360 }, { "epoch": 0.4359141184124919, "grad_norm": 0.3923264145851135, "learning_rate": 2.9985493489536292e-05, "loss": 0.0954, "step": 7370 }, { "epoch": 0.4365055894008399, "grad_norm": 0.5770062804222107, "learning_rate": 2.998536687161745e-05, "loss": 0.0996, "step": 7380 }, { "epoch": 0.4370970603891879, "grad_norm": 0.3249686658382416, "learning_rate": 2.9985239703783923e-05, "loss": 0.0722, "step": 7390 }, { "epoch": 0.43768853137753594, "grad_norm": 0.37385961413383484, "learning_rate": 2.9985111986040383e-05, "loss": 0.0841, "step": 7400 }, { "epoch": 0.43828000236588394, "grad_norm": 0.36809444427490234, "learning_rate": 2.9984983718391514e-05, "loss": 0.1122, "step": 7410 }, { "epoch": 0.438871473354232, "grad_norm": 0.2292686104774475, "learning_rate": 2.9984854900842015e-05, "loss": 0.0934, "step": 7420 }, { "epoch": 0.43946294434258, "grad_norm": 0.5835661292076111, "learning_rate": 2.9984725533396623e-05, "loss": 0.0875, "step": 7430 }, { "epoch": 0.44005441533092804, "grad_norm": 0.32870036363601685, "learning_rate": 2.9984595616060084e-05, "loss": 0.0868, "step": 7440 }, { "epoch": 0.44064588631927604, "grad_norm": 0.4500444233417511, "learning_rate": 2.9984465148837158e-05, "loss": 0.0898, "step": 7450 }, { "epoch": 0.44123735730762403, "grad_norm": 0.27681389451026917, "learning_rate": 2.9984334131732637e-05, "loss": 0.1109, "step": 7460 }, { "epoch": 0.4418288282959721, "grad_norm": 0.4758449196815491, "learning_rate": 2.9984202564751336e-05, "loss": 0.102, "step": 7470 }, { "epoch": 0.4424202992843201, "grad_norm": 0.4448201060295105, "learning_rate": 2.998407044789807e-05, "loss": 0.0879, "step": 7480 }, { "epoch": 0.44301177027266814, "grad_norm": 0.23302404582500458, "learning_rate": 2.99839377811777e-05, "loss": 0.0837, "step": 7490 }, { "epoch": 0.44360324126101613, "grad_norm": 0.4430330693721771, "learning_rate": 2.998380456459509e-05, "loss": 0.0764, "step": 7500 }, { "epoch": 0.4441947122493642, "grad_norm": 0.3749871551990509, "learning_rate": 2.9983670798155125e-05, "loss": 0.1053, "step": 7510 }, { "epoch": 0.4447861832377122, "grad_norm": 0.4552935063838959, "learning_rate": 2.9983536481862712e-05, "loss": 0.0872, "step": 7520 }, { "epoch": 0.44537765422606024, "grad_norm": 2.1190755367279053, "learning_rate": 2.998340161572279e-05, "loss": 0.0854, "step": 7530 }, { "epoch": 0.44596912521440824, "grad_norm": 0.40904027223587036, "learning_rate": 2.99832661997403e-05, "loss": 0.0816, "step": 7540 }, { "epoch": 0.44656059620275623, "grad_norm": 0.3417412042617798, "learning_rate": 2.998313023392021e-05, "loss": 0.0753, "step": 7550 }, { "epoch": 0.4471520671911043, "grad_norm": 0.3222808539867401, "learning_rate": 2.9982993718267522e-05, "loss": 0.096, "step": 7560 }, { "epoch": 0.4477435381794523, "grad_norm": 0.35867491364479065, "learning_rate": 2.998285665278723e-05, "loss": 0.0947, "step": 7570 }, { "epoch": 0.44833500916780034, "grad_norm": 0.38870301842689514, "learning_rate": 2.998271903748437e-05, "loss": 0.0998, "step": 7580 }, { "epoch": 0.44892648015614833, "grad_norm": 0.5798963308334351, "learning_rate": 2.9982580872363998e-05, "loss": 0.088, "step": 7590 }, { "epoch": 0.4495179511444964, "grad_norm": 0.48712655901908875, "learning_rate": 2.9982442157431174e-05, "loss": 0.0794, "step": 7600 }, { "epoch": 0.4501094221328444, "grad_norm": 0.4734156131744385, "learning_rate": 2.9982302892690996e-05, "loss": 0.1114, "step": 7610 }, { "epoch": 0.4507008931211924, "grad_norm": 0.3543788492679596, "learning_rate": 2.998216307814857e-05, "loss": 0.0954, "step": 7620 }, { "epoch": 0.45129236410954043, "grad_norm": 0.3643248677253723, "learning_rate": 2.998202271380903e-05, "loss": 0.0995, "step": 7630 }, { "epoch": 0.45188383509788843, "grad_norm": 0.28739550709724426, "learning_rate": 2.9981881799677527e-05, "loss": 0.0856, "step": 7640 }, { "epoch": 0.4524753060862365, "grad_norm": 0.570069432258606, "learning_rate": 2.9981740335759226e-05, "loss": 0.07, "step": 7650 }, { "epoch": 0.4530667770745845, "grad_norm": 0.3004268705844879, "learning_rate": 2.9981598322059327e-05, "loss": 0.0982, "step": 7660 }, { "epoch": 0.45365824806293253, "grad_norm": 0.35454240441322327, "learning_rate": 2.9981455758583037e-05, "loss": 0.0924, "step": 7670 }, { "epoch": 0.45424971905128053, "grad_norm": 0.34179621934890747, "learning_rate": 2.9981312645335585e-05, "loss": 0.0851, "step": 7680 }, { "epoch": 0.45484119003962853, "grad_norm": 0.48127299547195435, "learning_rate": 2.9981168982322225e-05, "loss": 0.0742, "step": 7690 }, { "epoch": 0.4554326610279766, "grad_norm": 0.4134199917316437, "learning_rate": 2.9981024769548235e-05, "loss": 0.0773, "step": 7700 }, { "epoch": 0.4560241320163246, "grad_norm": 0.3652089536190033, "learning_rate": 2.99808800070189e-05, "loss": 0.1058, "step": 7710 }, { "epoch": 0.45661560300467263, "grad_norm": 0.510755717754364, "learning_rate": 2.9980734694739532e-05, "loss": 0.0941, "step": 7720 }, { "epoch": 0.45720707399302063, "grad_norm": 0.40435662865638733, "learning_rate": 2.9980588832715467e-05, "loss": 0.1026, "step": 7730 }, { "epoch": 0.4577985449813687, "grad_norm": 1.0026781558990479, "learning_rate": 2.9980442420952058e-05, "loss": 0.0962, "step": 7740 }, { "epoch": 0.4583900159697167, "grad_norm": 1.6732622385025024, "learning_rate": 2.9980295459454673e-05, "loss": 0.0785, "step": 7750 }, { "epoch": 0.45898148695806473, "grad_norm": 0.4123741388320923, "learning_rate": 2.9980147948228712e-05, "loss": 0.1102, "step": 7760 }, { "epoch": 0.45957295794641273, "grad_norm": 0.45199766755104065, "learning_rate": 2.997999988727958e-05, "loss": 0.1015, "step": 7770 }, { "epoch": 0.4601644289347607, "grad_norm": 0.42394763231277466, "learning_rate": 2.9979851276612717e-05, "loss": 0.082, "step": 7780 }, { "epoch": 0.4607558999231088, "grad_norm": 0.3767097294330597, "learning_rate": 2.9979702116233577e-05, "loss": 0.0859, "step": 7790 }, { "epoch": 0.4613473709114568, "grad_norm": 0.6528733372688293, "learning_rate": 2.9979552406147625e-05, "loss": 0.0735, "step": 7800 }, { "epoch": 0.46193884189980483, "grad_norm": 0.3254058361053467, "learning_rate": 2.9979402146360363e-05, "loss": 0.1108, "step": 7810 }, { "epoch": 0.46253031288815283, "grad_norm": 0.4165489971637726, "learning_rate": 2.9979251336877308e-05, "loss": 0.101, "step": 7820 }, { "epoch": 0.4631217838765009, "grad_norm": 0.3659830391407013, "learning_rate": 2.997909997770398e-05, "loss": 0.0875, "step": 7830 }, { "epoch": 0.4637132548648489, "grad_norm": 0.3305671811103821, "learning_rate": 2.997894806884595e-05, "loss": 0.0803, "step": 7840 }, { "epoch": 0.4643047258531969, "grad_norm": 0.4912160038948059, "learning_rate": 2.997879561030878e-05, "loss": 0.0767, "step": 7850 }, { "epoch": 0.46489619684154493, "grad_norm": 0.4259221851825714, "learning_rate": 2.9978642602098076e-05, "loss": 0.1021, "step": 7860 }, { "epoch": 0.4654876678298929, "grad_norm": 0.6119204163551331, "learning_rate": 2.9978489044219444e-05, "loss": 0.0817, "step": 7870 }, { "epoch": 0.466079138818241, "grad_norm": 1.179253101348877, "learning_rate": 2.997833493667852e-05, "loss": 0.1014, "step": 7880 }, { "epoch": 0.466670609806589, "grad_norm": 0.40724268555641174, "learning_rate": 2.997818027948096e-05, "loss": 0.0798, "step": 7890 }, { "epoch": 0.46726208079493703, "grad_norm": 0.4630604088306427, "learning_rate": 2.9978025072632443e-05, "loss": 0.0823, "step": 7900 }, { "epoch": 0.467853551783285, "grad_norm": 0.30124643445014954, "learning_rate": 2.9977869316138664e-05, "loss": 0.1093, "step": 7910 }, { "epoch": 0.468445022771633, "grad_norm": 0.3362332880496979, "learning_rate": 2.9977713010005334e-05, "loss": 0.0906, "step": 7920 }, { "epoch": 0.4690364937599811, "grad_norm": 1.2026435136795044, "learning_rate": 2.9977556154238193e-05, "loss": 0.0929, "step": 7930 }, { "epoch": 0.4696279647483291, "grad_norm": 0.33686375617980957, "learning_rate": 2.9977398748842994e-05, "loss": 0.0869, "step": 7940 }, { "epoch": 0.4702194357366771, "grad_norm": 0.34143251180648804, "learning_rate": 2.9977240793825517e-05, "loss": 0.0791, "step": 7950 }, { "epoch": 0.4708109067250251, "grad_norm": 0.4877542555332184, "learning_rate": 2.9977082289191555e-05, "loss": 0.1151, "step": 7960 }, { "epoch": 0.4714023777133732, "grad_norm": 0.5574517846107483, "learning_rate": 2.9976923234946926e-05, "loss": 0.0919, "step": 7970 }, { "epoch": 0.4719938487017212, "grad_norm": 0.5290156602859497, "learning_rate": 2.997676363109747e-05, "loss": 0.0923, "step": 7980 }, { "epoch": 0.4725853196900692, "grad_norm": 0.4069325625896454, "learning_rate": 2.9976603477649037e-05, "loss": 0.0794, "step": 7990 }, { "epoch": 0.4731767906784172, "grad_norm": 0.48831215500831604, "learning_rate": 2.997644277460751e-05, "loss": 0.0756, "step": 8000 }, { "epoch": 0.4737682616667652, "grad_norm": 0.6384404301643372, "learning_rate": 2.9976281521978786e-05, "loss": 0.1126, "step": 8010 }, { "epoch": 0.4743597326551133, "grad_norm": 0.406911164522171, "learning_rate": 2.997611971976878e-05, "loss": 0.0936, "step": 8020 }, { "epoch": 0.4749512036434613, "grad_norm": 0.34492945671081543, "learning_rate": 2.9975957367983426e-05, "loss": 0.0879, "step": 8030 }, { "epoch": 0.4755426746318093, "grad_norm": 0.34715041518211365, "learning_rate": 2.997579446662869e-05, "loss": 0.0798, "step": 8040 }, { "epoch": 0.4761341456201573, "grad_norm": 0.5800969004631042, "learning_rate": 2.9975631015710546e-05, "loss": 0.0797, "step": 8050 }, { "epoch": 0.4767256166085054, "grad_norm": 0.4562055766582489, "learning_rate": 2.9975467015234993e-05, "loss": 0.1097, "step": 8060 }, { "epoch": 0.4773170875968534, "grad_norm": 0.3860180675983429, "learning_rate": 2.997530246520805e-05, "loss": 0.0896, "step": 8070 }, { "epoch": 0.47790855858520137, "grad_norm": 0.2844473421573639, "learning_rate": 2.997513736563575e-05, "loss": 0.0891, "step": 8080 }, { "epoch": 0.4785000295735494, "grad_norm": 0.32709795236587524, "learning_rate": 2.9974971716524155e-05, "loss": 0.0812, "step": 8090 }, { "epoch": 0.4790915005618974, "grad_norm": 0.3930027484893799, "learning_rate": 2.997480551787935e-05, "loss": 0.0897, "step": 8100 }, { "epoch": 0.4796829715502455, "grad_norm": 0.5116729736328125, "learning_rate": 2.9974638769707425e-05, "loss": 0.0998, "step": 8110 }, { "epoch": 0.48027444253859347, "grad_norm": 0.46249106526374817, "learning_rate": 2.99744714720145e-05, "loss": 0.1024, "step": 8120 }, { "epoch": 0.4808659135269415, "grad_norm": 1.240351676940918, "learning_rate": 2.9974303624806722e-05, "loss": 0.0895, "step": 8130 }, { "epoch": 0.4814573845152895, "grad_norm": 0.4114325940608978, "learning_rate": 2.9974135228090242e-05, "loss": 0.0869, "step": 8140 }, { "epoch": 0.4820488555036375, "grad_norm": 0.4182482063770294, "learning_rate": 2.997396628187124e-05, "loss": 0.0813, "step": 8150 }, { "epoch": 0.48264032649198557, "grad_norm": 0.5645573139190674, "learning_rate": 2.9973796786155924e-05, "loss": 0.0919, "step": 8160 }, { "epoch": 0.48323179748033357, "grad_norm": 0.454366534948349, "learning_rate": 2.997362674095051e-05, "loss": 0.0875, "step": 8170 }, { "epoch": 0.4838232684686816, "grad_norm": 0.4592689871788025, "learning_rate": 2.9973456146261228e-05, "loss": 0.0826, "step": 8180 }, { "epoch": 0.4844147394570296, "grad_norm": 0.34621739387512207, "learning_rate": 2.9973285002094355e-05, "loss": 0.0799, "step": 8190 }, { "epoch": 0.4850062104453777, "grad_norm": 0.7299017310142517, "learning_rate": 2.9973113308456157e-05, "loss": 0.082, "step": 8200 }, { "epoch": 0.48559768143372567, "grad_norm": 0.5149027705192566, "learning_rate": 2.9972941065352947e-05, "loss": 0.108, "step": 8210 }, { "epoch": 0.4861891524220737, "grad_norm": 0.3488397002220154, "learning_rate": 2.9972768272791034e-05, "loss": 0.1067, "step": 8220 }, { "epoch": 0.4867806234104217, "grad_norm": 0.4332522451877594, "learning_rate": 2.997259493077677e-05, "loss": 0.103, "step": 8230 }, { "epoch": 0.4873720943987697, "grad_norm": 0.3877393901348114, "learning_rate": 2.997242103931651e-05, "loss": 0.0751, "step": 8240 }, { "epoch": 0.48796356538711777, "grad_norm": 0.35592296719551086, "learning_rate": 2.9972246598416634e-05, "loss": 0.0691, "step": 8250 }, { "epoch": 0.48855503637546577, "grad_norm": 0.42144080996513367, "learning_rate": 2.9972071608083548e-05, "loss": 0.1096, "step": 8260 }, { "epoch": 0.4891465073638138, "grad_norm": 0.6215307712554932, "learning_rate": 2.9971896068323666e-05, "loss": 0.102, "step": 8270 }, { "epoch": 0.4897379783521618, "grad_norm": 0.3667503297328949, "learning_rate": 2.997171997914344e-05, "loss": 0.0893, "step": 8280 }, { "epoch": 0.49032944934050987, "grad_norm": 0.38080206513404846, "learning_rate": 2.9971543340549322e-05, "loss": 0.0863, "step": 8290 }, { "epoch": 0.49092092032885787, "grad_norm": 0.3141956925392151, "learning_rate": 2.9971366152547804e-05, "loss": 0.0778, "step": 8300 }, { "epoch": 0.49151239131720587, "grad_norm": 0.4494202435016632, "learning_rate": 2.997118841514538e-05, "loss": 0.1018, "step": 8310 }, { "epoch": 0.4921038623055539, "grad_norm": 0.26992538571357727, "learning_rate": 2.9971010128348577e-05, "loss": 0.0828, "step": 8320 }, { "epoch": 0.4926953332939019, "grad_norm": 0.321652889251709, "learning_rate": 2.9970831292163936e-05, "loss": 0.083, "step": 8330 }, { "epoch": 0.49328680428224997, "grad_norm": 0.35223260521888733, "learning_rate": 2.997065190659802e-05, "loss": 0.0809, "step": 8340 }, { "epoch": 0.49387827527059797, "grad_norm": 0.3650332987308502, "learning_rate": 2.9970471971657407e-05, "loss": 0.0679, "step": 8350 }, { "epoch": 0.494469746258946, "grad_norm": 0.589424192905426, "learning_rate": 2.997029148734871e-05, "loss": 0.117, "step": 8360 }, { "epoch": 0.495061217247294, "grad_norm": 0.3164695203304291, "learning_rate": 2.997011045367854e-05, "loss": 0.0796, "step": 8370 }, { "epoch": 0.49565268823564207, "grad_norm": 0.5559983849525452, "learning_rate": 2.9969928870653554e-05, "loss": 0.0921, "step": 8380 }, { "epoch": 0.49624415922399007, "grad_norm": 0.3761603832244873, "learning_rate": 2.9969746738280406e-05, "loss": 0.0833, "step": 8390 }, { "epoch": 0.49683563021233806, "grad_norm": 0.44659337401390076, "learning_rate": 2.9969564056565782e-05, "loss": 0.0692, "step": 8400 }, { "epoch": 0.4974271012006861, "grad_norm": 0.37508320808410645, "learning_rate": 2.9969380825516386e-05, "loss": 0.0986, "step": 8410 }, { "epoch": 0.4980185721890341, "grad_norm": 0.483776718378067, "learning_rate": 2.9969197045138944e-05, "loss": 0.0924, "step": 8420 }, { "epoch": 0.49861004317738217, "grad_norm": 0.40941905975341797, "learning_rate": 2.99690127154402e-05, "loss": 0.0888, "step": 8430 }, { "epoch": 0.49920151416573016, "grad_norm": 0.34611526131629944, "learning_rate": 2.996882783642691e-05, "loss": 0.0835, "step": 8440 }, { "epoch": 0.4997929851540782, "grad_norm": 0.5906410813331604, "learning_rate": 2.996864240810587e-05, "loss": 0.0769, "step": 8450 }, { "epoch": 0.5003844561424262, "grad_norm": 0.3573364019393921, "learning_rate": 2.9968456430483874e-05, "loss": 0.0999, "step": 8460 }, { "epoch": 0.5009759271307742, "grad_norm": 0.5096628069877625, "learning_rate": 2.996826990356776e-05, "loss": 0.0998, "step": 8470 }, { "epoch": 0.5015673981191222, "grad_norm": 0.3323473334312439, "learning_rate": 2.9968082827364357e-05, "loss": 0.0926, "step": 8480 }, { "epoch": 0.5021588691074703, "grad_norm": 0.47174322605133057, "learning_rate": 2.996789520188055e-05, "loss": 0.0723, "step": 8490 }, { "epoch": 0.5027503400958183, "grad_norm": 0.34649306535720825, "learning_rate": 2.99677070271232e-05, "loss": 0.0845, "step": 8500 }, { "epoch": 0.5033418110841663, "grad_norm": 0.5018144249916077, "learning_rate": 2.996751830309923e-05, "loss": 0.1036, "step": 8510 }, { "epoch": 0.5039332820725143, "grad_norm": 0.24817676842212677, "learning_rate": 2.9967329029815566e-05, "loss": 0.0888, "step": 8520 }, { "epoch": 0.5045247530608624, "grad_norm": 0.6448086500167847, "learning_rate": 2.9967139207279143e-05, "loss": 0.0943, "step": 8530 }, { "epoch": 0.5051162240492104, "grad_norm": 0.26994988322257996, "learning_rate": 2.9966948835496932e-05, "loss": 0.0906, "step": 8540 }, { "epoch": 0.5057076950375584, "grad_norm": 0.35430335998535156, "learning_rate": 2.9966757914475924e-05, "loss": 0.0861, "step": 8550 }, { "epoch": 0.5062991660259064, "grad_norm": 0.5497093796730042, "learning_rate": 2.9966566444223115e-05, "loss": 0.1034, "step": 8560 }, { "epoch": 0.5068906370142544, "grad_norm": 0.38396987318992615, "learning_rate": 2.996637442474554e-05, "loss": 0.0898, "step": 8570 }, { "epoch": 0.5074821080026025, "grad_norm": 0.8556404709815979, "learning_rate": 2.996618185605024e-05, "loss": 0.087, "step": 8580 }, { "epoch": 0.5080735789909505, "grad_norm": 0.4999160170555115, "learning_rate": 2.9965988738144285e-05, "loss": 0.09, "step": 8590 }, { "epoch": 0.5086650499792985, "grad_norm": 0.3395746052265167, "learning_rate": 2.996579507103476e-05, "loss": 0.0752, "step": 8600 }, { "epoch": 0.5092565209676465, "grad_norm": 0.5743008852005005, "learning_rate": 2.9965600854728777e-05, "loss": 0.1041, "step": 8610 }, { "epoch": 0.5098479919559945, "grad_norm": 0.32282838225364685, "learning_rate": 2.996540608923345e-05, "loss": 0.1099, "step": 8620 }, { "epoch": 0.5104394629443426, "grad_norm": 0.40972578525543213, "learning_rate": 2.9965210774555946e-05, "loss": 0.0812, "step": 8630 }, { "epoch": 0.5110309339326906, "grad_norm": 0.30699703097343445, "learning_rate": 2.9965014910703418e-05, "loss": 0.0837, "step": 8640 }, { "epoch": 0.5116224049210386, "grad_norm": 0.2400931566953659, "learning_rate": 2.9964818497683052e-05, "loss": 0.0801, "step": 8650 }, { "epoch": 0.5122138759093866, "grad_norm": 0.6607128977775574, "learning_rate": 2.9964621535502064e-05, "loss": 0.1113, "step": 8660 }, { "epoch": 0.5128053468977347, "grad_norm": 0.32740047574043274, "learning_rate": 2.996442402416768e-05, "loss": 0.0927, "step": 8670 }, { "epoch": 0.5133968178860827, "grad_norm": 0.3822545111179352, "learning_rate": 2.9964225963687147e-05, "loss": 0.0981, "step": 8680 }, { "epoch": 0.5139882888744307, "grad_norm": 0.36452430486679077, "learning_rate": 2.9964027354067728e-05, "loss": 0.088, "step": 8690 }, { "epoch": 0.5145797598627787, "grad_norm": 0.4185750484466553, "learning_rate": 2.996382819531672e-05, "loss": 0.0776, "step": 8700 }, { "epoch": 0.5151712308511267, "grad_norm": 0.32123032212257385, "learning_rate": 2.996362848744143e-05, "loss": 0.0958, "step": 8710 }, { "epoch": 0.5157627018394748, "grad_norm": 0.4125857651233673, "learning_rate": 2.996342823044918e-05, "loss": 0.0911, "step": 8720 }, { "epoch": 0.5163541728278228, "grad_norm": 0.3385552167892456, "learning_rate": 2.9963227424347327e-05, "loss": 0.0888, "step": 8730 }, { "epoch": 0.5169456438161708, "grad_norm": 0.36953485012054443, "learning_rate": 2.9963026069143234e-05, "loss": 0.0925, "step": 8740 }, { "epoch": 0.5175371148045188, "grad_norm": 0.4910293519496918, "learning_rate": 2.996282416484429e-05, "loss": 0.0774, "step": 8750 }, { "epoch": 0.5181285857928669, "grad_norm": 0.3263454735279083, "learning_rate": 2.9962621711457908e-05, "loss": 0.1047, "step": 8760 }, { "epoch": 0.5187200567812149, "grad_norm": 0.35855013132095337, "learning_rate": 2.9962418708991516e-05, "loss": 0.0875, "step": 8770 }, { "epoch": 0.5193115277695629, "grad_norm": 0.7009475231170654, "learning_rate": 2.9962215157452566e-05, "loss": 0.0944, "step": 8780 }, { "epoch": 0.5199029987579109, "grad_norm": 0.32110416889190674, "learning_rate": 2.996201105684852e-05, "loss": 0.0784, "step": 8790 }, { "epoch": 0.5204944697462589, "grad_norm": 0.526898205280304, "learning_rate": 2.996180640718688e-05, "loss": 0.0742, "step": 8800 }, { "epoch": 0.521085940734607, "grad_norm": 0.5232031941413879, "learning_rate": 2.996160120847514e-05, "loss": 0.0926, "step": 8810 }, { "epoch": 0.521677411722955, "grad_norm": 0.5559702515602112, "learning_rate": 2.9961395460720848e-05, "loss": 0.0994, "step": 8820 }, { "epoch": 0.522268882711303, "grad_norm": 0.3953031897544861, "learning_rate": 2.996118916393154e-05, "loss": 0.1016, "step": 8830 }, { "epoch": 0.522860353699651, "grad_norm": 0.35693132877349854, "learning_rate": 2.996098231811479e-05, "loss": 0.0728, "step": 8840 }, { "epoch": 0.523451824687999, "grad_norm": 0.3822118639945984, "learning_rate": 2.9960774923278196e-05, "loss": 0.0716, "step": 8850 }, { "epoch": 0.5240432956763471, "grad_norm": 0.43887388706207275, "learning_rate": 2.9960566979429356e-05, "loss": 0.1004, "step": 8860 }, { "epoch": 0.5246347666646951, "grad_norm": 0.483318030834198, "learning_rate": 2.9960358486575915e-05, "loss": 0.0971, "step": 8870 }, { "epoch": 0.5252262376530431, "grad_norm": 0.3590041697025299, "learning_rate": 2.9960149444725513e-05, "loss": 0.0941, "step": 8880 }, { "epoch": 0.5258177086413911, "grad_norm": 0.3141467273235321, "learning_rate": 2.9959939853885828e-05, "loss": 0.0925, "step": 8890 }, { "epoch": 0.5264091796297392, "grad_norm": 0.40355902910232544, "learning_rate": 2.9959729714064546e-05, "loss": 0.0773, "step": 8900 }, { "epoch": 0.5270006506180872, "grad_norm": 0.32483258843421936, "learning_rate": 2.995951902526938e-05, "loss": 0.0952, "step": 8910 }, { "epoch": 0.5275921216064352, "grad_norm": 0.3437613546848297, "learning_rate": 2.9959307787508058e-05, "loss": 0.0979, "step": 8920 }, { "epoch": 0.5281835925947832, "grad_norm": 0.2980209290981293, "learning_rate": 2.9959096000788345e-05, "loss": 0.0817, "step": 8930 }, { "epoch": 0.5287750635831312, "grad_norm": 0.3825405538082123, "learning_rate": 2.9958883665117998e-05, "loss": 0.0739, "step": 8940 }, { "epoch": 0.5293665345714793, "grad_norm": 0.4605342745780945, "learning_rate": 2.995867078050482e-05, "loss": 0.0841, "step": 8950 }, { "epoch": 0.5299580055598273, "grad_norm": 0.390678346157074, "learning_rate": 2.9958457346956615e-05, "loss": 0.0941, "step": 8960 }, { "epoch": 0.5305494765481753, "grad_norm": 0.5045586824417114, "learning_rate": 2.995824336448122e-05, "loss": 0.09, "step": 8970 }, { "epoch": 0.5311409475365233, "grad_norm": 0.28925448656082153, "learning_rate": 2.9958028833086482e-05, "loss": 0.0909, "step": 8980 }, { "epoch": 0.5317324185248714, "grad_norm": 0.29900431632995605, "learning_rate": 2.9957813752780283e-05, "loss": 0.0835, "step": 8990 }, { "epoch": 0.5323238895132194, "grad_norm": 0.4429897964000702, "learning_rate": 2.9957598123570507e-05, "loss": 0.0685, "step": 9000 }, { "epoch": 0.5329153605015674, "grad_norm": 0.5068618059158325, "learning_rate": 2.9957381945465074e-05, "loss": 0.102, "step": 9010 }, { "epoch": 0.5335068314899154, "grad_norm": 0.45222359895706177, "learning_rate": 2.995716521847191e-05, "loss": 0.1027, "step": 9020 }, { "epoch": 0.5340983024782634, "grad_norm": 0.4112400710582733, "learning_rate": 2.995694794259897e-05, "loss": 0.1014, "step": 9030 }, { "epoch": 0.5346897734666115, "grad_norm": 0.3610832691192627, "learning_rate": 2.9956730117854232e-05, "loss": 0.0767, "step": 9040 }, { "epoch": 0.5352812444549595, "grad_norm": 0.5332263708114624, "learning_rate": 2.9956511744245686e-05, "loss": 0.0932, "step": 9050 }, { "epoch": 0.5358727154433075, "grad_norm": 0.5238906741142273, "learning_rate": 2.9956292821781348e-05, "loss": 0.0958, "step": 9060 }, { "epoch": 0.5364641864316555, "grad_norm": 0.35042300820350647, "learning_rate": 2.9956073350469246e-05, "loss": 0.0854, "step": 9070 }, { "epoch": 0.5370556574200035, "grad_norm": 0.42112380266189575, "learning_rate": 2.995585333031744e-05, "loss": 0.0873, "step": 9080 }, { "epoch": 0.5376471284083516, "grad_norm": 0.3694233000278473, "learning_rate": 2.9955632761333995e-05, "loss": 0.0801, "step": 9090 }, { "epoch": 0.5382385993966996, "grad_norm": 0.5599299073219299, "learning_rate": 2.995541164352702e-05, "loss": 0.0812, "step": 9100 }, { "epoch": 0.5388300703850476, "grad_norm": 0.360370010137558, "learning_rate": 2.995518997690462e-05, "loss": 0.1078, "step": 9110 }, { "epoch": 0.5394215413733956, "grad_norm": 0.3549087345600128, "learning_rate": 2.9954967761474925e-05, "loss": 0.1045, "step": 9120 }, { "epoch": 0.5400130123617437, "grad_norm": 0.317585289478302, "learning_rate": 2.99547449972461e-05, "loss": 0.0902, "step": 9130 }, { "epoch": 0.5406044833500917, "grad_norm": 0.2955344319343567, "learning_rate": 2.9954521684226313e-05, "loss": 0.0848, "step": 9140 }, { "epoch": 0.5411959543384397, "grad_norm": 0.3687433898448944, "learning_rate": 2.995429782242376e-05, "loss": 0.0733, "step": 9150 }, { "epoch": 0.5417874253267877, "grad_norm": 0.5759553909301758, "learning_rate": 2.995407341184666e-05, "loss": 0.1069, "step": 9160 }, { "epoch": 0.5423788963151357, "grad_norm": 0.26839494705200195, "learning_rate": 2.995384845250324e-05, "loss": 0.0851, "step": 9170 }, { "epoch": 0.5429703673034838, "grad_norm": 0.32091617584228516, "learning_rate": 2.9953622944401764e-05, "loss": 0.0929, "step": 9180 }, { "epoch": 0.5435618382918318, "grad_norm": 0.3602246642112732, "learning_rate": 2.9953396887550503e-05, "loss": 0.0799, "step": 9190 }, { "epoch": 0.5441533092801798, "grad_norm": 0.36736828088760376, "learning_rate": 2.9953170281957754e-05, "loss": 0.0834, "step": 9200 }, { "epoch": 0.5447447802685278, "grad_norm": 0.4556533396244049, "learning_rate": 2.9952943127631833e-05, "loss": 0.1052, "step": 9210 }, { "epoch": 0.5453362512568759, "grad_norm": 0.30887410044670105, "learning_rate": 2.995271542458107e-05, "loss": 0.1041, "step": 9220 }, { "epoch": 0.5459277222452239, "grad_norm": 0.5061690807342529, "learning_rate": 2.995248717281383e-05, "loss": 0.0949, "step": 9230 }, { "epoch": 0.5465191932335719, "grad_norm": 0.3270837962627411, "learning_rate": 2.995225837233848e-05, "loss": 0.0807, "step": 9240 }, { "epoch": 0.5471106642219199, "grad_norm": 0.4200716018676758, "learning_rate": 2.9952029023163424e-05, "loss": 0.0682, "step": 9250 }, { "epoch": 0.5477021352102679, "grad_norm": 0.4221612513065338, "learning_rate": 2.9951799125297075e-05, "loss": 0.0971, "step": 9260 }, { "epoch": 0.548293606198616, "grad_norm": 0.27233487367630005, "learning_rate": 2.995156867874787e-05, "loss": 0.0964, "step": 9270 }, { "epoch": 0.548885077186964, "grad_norm": 0.32229700684547424, "learning_rate": 2.9951337683524262e-05, "loss": 0.0925, "step": 9280 }, { "epoch": 0.549476548175312, "grad_norm": 0.3860439956188202, "learning_rate": 2.9951106139634735e-05, "loss": 0.0997, "step": 9290 }, { "epoch": 0.55006801916366, "grad_norm": 0.4712941646575928, "learning_rate": 2.995087404708778e-05, "loss": 0.0779, "step": 9300 }, { "epoch": 0.550659490152008, "grad_norm": 0.2397492527961731, "learning_rate": 2.995064140589192e-05, "loss": 0.1009, "step": 9310 }, { "epoch": 0.5512509611403561, "grad_norm": 0.5343261361122131, "learning_rate": 2.9950408216055685e-05, "loss": 0.097, "step": 9320 }, { "epoch": 0.5518424321287041, "grad_norm": 0.37581515312194824, "learning_rate": 2.9950174477587637e-05, "loss": 0.1023, "step": 9330 }, { "epoch": 0.5524339031170521, "grad_norm": 0.4306933581829071, "learning_rate": 2.9949940190496357e-05, "loss": 0.0859, "step": 9340 }, { "epoch": 0.5530253741054001, "grad_norm": 0.3492285907268524, "learning_rate": 2.994970535479043e-05, "loss": 0.0686, "step": 9350 }, { "epoch": 0.5536168450937482, "grad_norm": 0.33390623331069946, "learning_rate": 2.9949469970478486e-05, "loss": 0.0875, "step": 9360 }, { "epoch": 0.5542083160820962, "grad_norm": 0.35284870862960815, "learning_rate": 2.9949234037569158e-05, "loss": 0.0915, "step": 9370 }, { "epoch": 0.5547997870704442, "grad_norm": 1.1866803169250488, "learning_rate": 2.99489975560711e-05, "loss": 0.0893, "step": 9380 }, { "epoch": 0.5553912580587922, "grad_norm": 0.37359052896499634, "learning_rate": 2.9948760525992997e-05, "loss": 0.0875, "step": 9390 }, { "epoch": 0.5559827290471402, "grad_norm": 0.45234647393226624, "learning_rate": 2.994852294734355e-05, "loss": 0.077, "step": 9400 }, { "epoch": 0.5565742000354883, "grad_norm": 0.46365395188331604, "learning_rate": 2.9948284820131464e-05, "loss": 0.1045, "step": 9410 }, { "epoch": 0.5571656710238363, "grad_norm": 0.4560779631137848, "learning_rate": 2.9948046144365488e-05, "loss": 0.0836, "step": 9420 }, { "epoch": 0.5577571420121843, "grad_norm": 0.2708345353603363, "learning_rate": 2.994780692005438e-05, "loss": 0.0988, "step": 9430 }, { "epoch": 0.5583486130005323, "grad_norm": 0.29789504408836365, "learning_rate": 2.9947567147206914e-05, "loss": 0.0894, "step": 9440 }, { "epoch": 0.5589400839888804, "grad_norm": 0.33837342262268066, "learning_rate": 2.9947326825831894e-05, "loss": 0.0724, "step": 9450 }, { "epoch": 0.5595315549772284, "grad_norm": 0.418983519077301, "learning_rate": 2.9947085955938137e-05, "loss": 0.1059, "step": 9460 }, { "epoch": 0.5601230259655764, "grad_norm": 0.4764470160007477, "learning_rate": 2.994684453753448e-05, "loss": 0.0901, "step": 9470 }, { "epoch": 0.5607144969539244, "grad_norm": 0.4244707226753235, "learning_rate": 2.9946602570629787e-05, "loss": 0.0979, "step": 9480 }, { "epoch": 0.5613059679422724, "grad_norm": 0.36740249395370483, "learning_rate": 2.9946360055232934e-05, "loss": 0.0792, "step": 9490 }, { "epoch": 0.5618974389306205, "grad_norm": 0.47081083059310913, "learning_rate": 2.9946116991352818e-05, "loss": 0.082, "step": 9500 }, { "epoch": 0.5624889099189685, "grad_norm": 0.37772810459136963, "learning_rate": 2.9945873378998367e-05, "loss": 0.0943, "step": 9510 }, { "epoch": 0.5630803809073165, "grad_norm": 0.30243468284606934, "learning_rate": 2.9945629218178516e-05, "loss": 0.0847, "step": 9520 }, { "epoch": 0.5636718518956645, "grad_norm": 0.30133286118507385, "learning_rate": 2.994538450890222e-05, "loss": 0.0929, "step": 9530 }, { "epoch": 0.5642633228840125, "grad_norm": 0.2604788839817047, "learning_rate": 2.994513925117846e-05, "loss": 0.0825, "step": 9540 }, { "epoch": 0.5648547938723606, "grad_norm": 0.31509456038475037, "learning_rate": 2.994489344501625e-05, "loss": 0.082, "step": 9550 }, { "epoch": 0.5654462648607086, "grad_norm": 0.3090815842151642, "learning_rate": 2.9944647090424596e-05, "loss": 0.0991, "step": 9560 }, { "epoch": 0.5660377358490566, "grad_norm": 0.5116276741027832, "learning_rate": 2.994440018741254e-05, "loss": 0.0964, "step": 9570 }, { "epoch": 0.5666292068374046, "grad_norm": 0.27522197365760803, "learning_rate": 2.9944152735989143e-05, "loss": 0.0768, "step": 9580 }, { "epoch": 0.5672206778257527, "grad_norm": 0.313790887594223, "learning_rate": 2.9943904736163494e-05, "loss": 0.0703, "step": 9590 }, { "epoch": 0.5678121488141007, "grad_norm": 0.5991852283477783, "learning_rate": 2.9943656187944684e-05, "loss": 0.0787, "step": 9600 }, { "epoch": 0.5684036198024487, "grad_norm": 0.504159688949585, "learning_rate": 2.9943407091341835e-05, "loss": 0.0821, "step": 9610 }, { "epoch": 0.5689950907907967, "grad_norm": 0.49208757281303406, "learning_rate": 2.9943157446364097e-05, "loss": 0.0976, "step": 9620 }, { "epoch": 0.5695865617791447, "grad_norm": 0.4888491630554199, "learning_rate": 2.9942907253020624e-05, "loss": 0.0947, "step": 9630 }, { "epoch": 0.5701780327674928, "grad_norm": 0.6191413402557373, "learning_rate": 2.9942656511320593e-05, "loss": 0.085, "step": 9640 }, { "epoch": 0.5707695037558408, "grad_norm": 0.4919918179512024, "learning_rate": 2.994240522127321e-05, "loss": 0.0702, "step": 9650 }, { "epoch": 0.5713609747441888, "grad_norm": 0.6132060885429382, "learning_rate": 2.9942153382887702e-05, "loss": 0.0958, "step": 9660 }, { "epoch": 0.5719524457325368, "grad_norm": 0.4209537208080292, "learning_rate": 2.9941900996173303e-05, "loss": 0.0908, "step": 9670 }, { "epoch": 0.5725439167208849, "grad_norm": 0.41234278678894043, "learning_rate": 2.9941648061139275e-05, "loss": 0.0888, "step": 9680 }, { "epoch": 0.5731353877092329, "grad_norm": 0.2798830568790436, "learning_rate": 2.9941394577794903e-05, "loss": 0.0726, "step": 9690 }, { "epoch": 0.5737268586975809, "grad_norm": 0.422971248626709, "learning_rate": 2.9941140546149494e-05, "loss": 0.0691, "step": 9700 }, { "epoch": 0.5743183296859289, "grad_norm": 0.5670852661132812, "learning_rate": 2.994088596621236e-05, "loss": 0.1024, "step": 9710 }, { "epoch": 0.5749098006742769, "grad_norm": 0.42728370428085327, "learning_rate": 2.9940630837992844e-05, "loss": 0.098, "step": 9720 }, { "epoch": 0.575501271662625, "grad_norm": 0.49966275691986084, "learning_rate": 2.994037516150032e-05, "loss": 0.1008, "step": 9730 }, { "epoch": 0.576092742650973, "grad_norm": 0.4244346022605896, "learning_rate": 2.9940118936744156e-05, "loss": 0.0754, "step": 9740 }, { "epoch": 0.576684213639321, "grad_norm": 0.2790745794773102, "learning_rate": 2.9939862163733763e-05, "loss": 0.0697, "step": 9750 }, { "epoch": 0.577275684627669, "grad_norm": 0.38304123282432556, "learning_rate": 2.9939604842478564e-05, "loss": 0.0937, "step": 9760 }, { "epoch": 0.577867155616017, "grad_norm": 0.6226797699928284, "learning_rate": 2.9939346972987996e-05, "loss": 0.1098, "step": 9770 }, { "epoch": 0.5784586266043651, "grad_norm": 0.23942483961582184, "learning_rate": 2.993908855527153e-05, "loss": 0.0824, "step": 9780 }, { "epoch": 0.5790500975927131, "grad_norm": 0.4292865991592407, "learning_rate": 2.9938829589338638e-05, "loss": 0.081, "step": 9790 }, { "epoch": 0.5796415685810611, "grad_norm": 0.3851962387561798, "learning_rate": 2.993857007519884e-05, "loss": 0.0615, "step": 9800 }, { "epoch": 0.5802330395694091, "grad_norm": 0.34196674823760986, "learning_rate": 2.993831001286164e-05, "loss": 0.1078, "step": 9810 }, { "epoch": 0.5808245105577572, "grad_norm": 0.330931156873703, "learning_rate": 2.9938049402336595e-05, "loss": 0.0881, "step": 9820 }, { "epoch": 0.5814159815461052, "grad_norm": 0.4396262466907501, "learning_rate": 2.9937788243633266e-05, "loss": 0.0951, "step": 9830 }, { "epoch": 0.5820074525344532, "grad_norm": 0.29935500025749207, "learning_rate": 2.993752653676123e-05, "loss": 0.0822, "step": 9840 }, { "epoch": 0.5825989235228012, "grad_norm": 0.40181565284729004, "learning_rate": 2.99372642817301e-05, "loss": 0.0746, "step": 9850 }, { "epoch": 0.5831903945111492, "grad_norm": 0.4010077714920044, "learning_rate": 2.9937001478549496e-05, "loss": 0.0997, "step": 9860 }, { "epoch": 0.5837818654994973, "grad_norm": 0.223511204123497, "learning_rate": 2.993673812722906e-05, "loss": 0.0925, "step": 9870 }, { "epoch": 0.5843733364878453, "grad_norm": 0.3683392107486725, "learning_rate": 2.993647422777846e-05, "loss": 0.0839, "step": 9880 }, { "epoch": 0.5849648074761933, "grad_norm": 0.3258034884929657, "learning_rate": 2.9936209780207375e-05, "loss": 0.0876, "step": 9890 }, { "epoch": 0.5855562784645413, "grad_norm": 0.3112740218639374, "learning_rate": 2.9935944784525517e-05, "loss": 0.0881, "step": 9900 }, { "epoch": 0.5861477494528894, "grad_norm": 0.42282265424728394, "learning_rate": 2.99356792407426e-05, "loss": 0.0963, "step": 9910 }, { "epoch": 0.5867392204412374, "grad_norm": 0.49202409386634827, "learning_rate": 2.993541314886838e-05, "loss": 0.1004, "step": 9920 }, { "epoch": 0.5873306914295854, "grad_norm": 0.8990455865859985, "learning_rate": 2.9935146508912617e-05, "loss": 0.0912, "step": 9930 }, { "epoch": 0.5879221624179334, "grad_norm": 0.35118013620376587, "learning_rate": 2.993487932088509e-05, "loss": 0.0804, "step": 9940 }, { "epoch": 0.5885136334062814, "grad_norm": 0.2363854944705963, "learning_rate": 2.9934611584795612e-05, "loss": 0.0726, "step": 9950 }, { "epoch": 0.5891051043946295, "grad_norm": 0.31286293268203735, "learning_rate": 2.9934343300654006e-05, "loss": 0.1036, "step": 9960 }, { "epoch": 0.5896965753829775, "grad_norm": 0.3893478512763977, "learning_rate": 2.9934074468470112e-05, "loss": 0.0999, "step": 9970 }, { "epoch": 0.5902880463713255, "grad_norm": 0.5940829515457153, "learning_rate": 2.9933805088253804e-05, "loss": 0.0921, "step": 9980 }, { "epoch": 0.5908795173596735, "grad_norm": 0.494723379611969, "learning_rate": 2.9933535160014966e-05, "loss": 0.0798, "step": 9990 }, { "epoch": 0.5914709883480215, "grad_norm": 0.29406529664993286, "learning_rate": 2.9933264683763496e-05, "loss": 0.0781, "step": 10000 }, { "epoch": 0.5920624593363696, "grad_norm": 0.31615546345710754, "learning_rate": 2.9932993659509323e-05, "loss": 0.0981, "step": 10010 }, { "epoch": 0.5926539303247176, "grad_norm": 0.3438585698604584, "learning_rate": 2.9932722087262394e-05, "loss": 0.0955, "step": 10020 }, { "epoch": 0.5932454013130656, "grad_norm": 0.4173542261123657, "learning_rate": 2.9932449967032676e-05, "loss": 0.0873, "step": 10030 }, { "epoch": 0.5938368723014136, "grad_norm": 0.25617480278015137, "learning_rate": 2.9932177298830156e-05, "loss": 0.0756, "step": 10040 }, { "epoch": 0.5944283432897617, "grad_norm": 0.27974605560302734, "learning_rate": 2.9931904082664833e-05, "loss": 0.0657, "step": 10050 }, { "epoch": 0.5950198142781097, "grad_norm": 0.28745776414871216, "learning_rate": 2.9931630318546745e-05, "loss": 0.0947, "step": 10060 }, { "epoch": 0.5956112852664577, "grad_norm": 0.30080464482307434, "learning_rate": 2.9931356006485927e-05, "loss": 0.0929, "step": 10070 }, { "epoch": 0.5962027562548057, "grad_norm": 0.4940321743488312, "learning_rate": 2.9931081146492448e-05, "loss": 0.083, "step": 10080 }, { "epoch": 0.5967942272431537, "grad_norm": 0.3386019766330719, "learning_rate": 2.9930805738576394e-05, "loss": 0.0715, "step": 10090 }, { "epoch": 0.5973856982315018, "grad_norm": 0.5200408101081848, "learning_rate": 2.993052978274788e-05, "loss": 0.0748, "step": 10100 }, { "epoch": 0.5979771692198498, "grad_norm": 0.396281898021698, "learning_rate": 2.9930253279017022e-05, "loss": 0.1079, "step": 10110 }, { "epoch": 0.5985686402081978, "grad_norm": 0.27457818388938904, "learning_rate": 2.992997622739397e-05, "loss": 0.0908, "step": 10120 }, { "epoch": 0.5991601111965458, "grad_norm": 0.32418546080589294, "learning_rate": 2.9929698627888895e-05, "loss": 0.0827, "step": 10130 }, { "epoch": 0.5997515821848939, "grad_norm": 0.28666257858276367, "learning_rate": 2.992942048051198e-05, "loss": 0.0781, "step": 10140 }, { "epoch": 0.6003430531732419, "grad_norm": 0.3333970308303833, "learning_rate": 2.992914178527343e-05, "loss": 0.0902, "step": 10150 }, { "epoch": 0.6009345241615899, "grad_norm": 0.3733903467655182, "learning_rate": 2.992886254218348e-05, "loss": 0.1041, "step": 10160 }, { "epoch": 0.6015259951499379, "grad_norm": 0.38411688804626465, "learning_rate": 2.992858275125237e-05, "loss": 0.1001, "step": 10170 }, { "epoch": 0.6021174661382859, "grad_norm": 0.389826238155365, "learning_rate": 2.9928302412490373e-05, "loss": 0.092, "step": 10180 }, { "epoch": 0.602708937126634, "grad_norm": 0.25488677620887756, "learning_rate": 2.9928021525907764e-05, "loss": 0.0884, "step": 10190 }, { "epoch": 0.603300408114982, "grad_norm": 0.36875757575035095, "learning_rate": 2.9927740091514867e-05, "loss": 0.0729, "step": 10200 }, { "epoch": 0.60389187910333, "grad_norm": 0.7485021352767944, "learning_rate": 2.9927458109322005e-05, "loss": 0.113, "step": 10210 }, { "epoch": 0.604483350091678, "grad_norm": 0.3715158700942993, "learning_rate": 2.9927175579339523e-05, "loss": 0.0925, "step": 10220 }, { "epoch": 0.605074821080026, "grad_norm": 0.3176770806312561, "learning_rate": 2.9926892501577784e-05, "loss": 0.085, "step": 10230 }, { "epoch": 0.6056662920683741, "grad_norm": 0.31546550989151, "learning_rate": 2.992660887604719e-05, "loss": 0.0873, "step": 10240 }, { "epoch": 0.6062577630567221, "grad_norm": 0.2989206612110138, "learning_rate": 2.9926324702758138e-05, "loss": 0.0776, "step": 10250 }, { "epoch": 0.6068492340450701, "grad_norm": 0.6224656701087952, "learning_rate": 2.9926039981721057e-05, "loss": 0.0987, "step": 10260 }, { "epoch": 0.6074407050334181, "grad_norm": 0.2896079421043396, "learning_rate": 2.9925754712946393e-05, "loss": 0.0951, "step": 10270 }, { "epoch": 0.6080321760217662, "grad_norm": 0.40822938084602356, "learning_rate": 2.9925468896444627e-05, "loss": 0.0843, "step": 10280 }, { "epoch": 0.6086236470101142, "grad_norm": 1.138526439666748, "learning_rate": 2.9925182532226237e-05, "loss": 0.0912, "step": 10290 }, { "epoch": 0.6092151179984622, "grad_norm": 0.2818152904510498, "learning_rate": 2.992489562030173e-05, "loss": 0.0776, "step": 10300 }, { "epoch": 0.6098065889868102, "grad_norm": 0.5309779047966003, "learning_rate": 2.9924608160681642e-05, "loss": 0.1019, "step": 10310 }, { "epoch": 0.6103980599751582, "grad_norm": 0.3539980351924896, "learning_rate": 2.9924320153376524e-05, "loss": 0.0953, "step": 10320 }, { "epoch": 0.6109895309635063, "grad_norm": 0.32185977697372437, "learning_rate": 2.9924031598396934e-05, "loss": 0.097, "step": 10330 }, { "epoch": 0.6115810019518543, "grad_norm": 0.5363276600837708, "learning_rate": 2.9923742495753467e-05, "loss": 0.086, "step": 10340 }, { "epoch": 0.6121724729402023, "grad_norm": 0.2652864158153534, "learning_rate": 2.992345284545673e-05, "loss": 0.0734, "step": 10350 }, { "epoch": 0.6127639439285503, "grad_norm": 0.2597825229167938, "learning_rate": 2.9923162647517356e-05, "loss": 0.0898, "step": 10360 }, { "epoch": 0.6133554149168984, "grad_norm": 0.2903847396373749, "learning_rate": 2.9922871901945993e-05, "loss": 0.0865, "step": 10370 }, { "epoch": 0.6139468859052464, "grad_norm": 0.3159567713737488, "learning_rate": 2.9922580608753312e-05, "loss": 0.0914, "step": 10380 }, { "epoch": 0.6145383568935944, "grad_norm": 0.30943670868873596, "learning_rate": 2.9922288767949994e-05, "loss": 0.0709, "step": 10390 }, { "epoch": 0.6151298278819424, "grad_norm": 0.2695314884185791, "learning_rate": 2.9921996379546758e-05, "loss": 0.0813, "step": 10400 }, { "epoch": 0.6157212988702904, "grad_norm": 0.34180453419685364, "learning_rate": 2.9921703443554332e-05, "loss": 0.1002, "step": 10410 }, { "epoch": 0.6163127698586385, "grad_norm": 0.4421316087245941, "learning_rate": 2.9921409959983462e-05, "loss": 0.0847, "step": 10420 }, { "epoch": 0.6169042408469865, "grad_norm": 0.30742698907852173, "learning_rate": 2.992111592884492e-05, "loss": 0.0854, "step": 10430 }, { "epoch": 0.6174957118353345, "grad_norm": 0.33670249581336975, "learning_rate": 2.99208213501495e-05, "loss": 0.0834, "step": 10440 }, { "epoch": 0.6180871828236825, "grad_norm": 0.49088653922080994, "learning_rate": 2.9920526223908003e-05, "loss": 0.0742, "step": 10450 }, { "epoch": 0.6186786538120305, "grad_norm": 0.41962161660194397, "learning_rate": 2.9920230550131268e-05, "loss": 0.1015, "step": 10460 }, { "epoch": 0.6192701248003786, "grad_norm": 0.2799878716468811, "learning_rate": 2.9919934328830144e-05, "loss": 0.0849, "step": 10470 }, { "epoch": 0.6198615957887266, "grad_norm": 0.8722606301307678, "learning_rate": 2.99196375600155e-05, "loss": 0.1014, "step": 10480 }, { "epoch": 0.6204530667770746, "grad_norm": 0.30293646454811096, "learning_rate": 2.991934024369822e-05, "loss": 0.0895, "step": 10490 }, { "epoch": 0.6210445377654226, "grad_norm": 0.42362096905708313, "learning_rate": 2.9919042379889223e-05, "loss": 0.0811, "step": 10500 }, { "epoch": 0.6216360087537707, "grad_norm": 0.2366364449262619, "learning_rate": 2.9918743968599437e-05, "loss": 0.0985, "step": 10510 }, { "epoch": 0.6222274797421187, "grad_norm": 0.3197803497314453, "learning_rate": 2.991844500983981e-05, "loss": 0.0891, "step": 10520 }, { "epoch": 0.6228189507304667, "grad_norm": 0.3368244469165802, "learning_rate": 2.9918145503621322e-05, "loss": 0.079, "step": 10530 }, { "epoch": 0.6234104217188147, "grad_norm": 0.3867165744304657, "learning_rate": 2.9917845449954947e-05, "loss": 0.08, "step": 10540 }, { "epoch": 0.6240018927071627, "grad_norm": 0.47061219811439514, "learning_rate": 2.9917544848851717e-05, "loss": 0.0896, "step": 10550 }, { "epoch": 0.6245933636955108, "grad_norm": 0.32438725233078003, "learning_rate": 2.9917243700322646e-05, "loss": 0.1043, "step": 10560 }, { "epoch": 0.6251848346838588, "grad_norm": 0.22107170522212982, "learning_rate": 2.9916942004378794e-05, "loss": 0.0984, "step": 10570 }, { "epoch": 0.6257763056722068, "grad_norm": 0.22954751551151276, "learning_rate": 2.9916639761031226e-05, "loss": 0.0887, "step": 10580 }, { "epoch": 0.6263677766605548, "grad_norm": 0.41756436228752136, "learning_rate": 2.9916336970291042e-05, "loss": 0.0905, "step": 10590 }, { "epoch": 0.6269592476489029, "grad_norm": 0.46846309304237366, "learning_rate": 2.991603363216935e-05, "loss": 0.0821, "step": 10600 }, { "epoch": 0.6275507186372509, "grad_norm": 0.5057905316352844, "learning_rate": 2.9915729746677275e-05, "loss": 0.0902, "step": 10610 }, { "epoch": 0.6281421896255989, "grad_norm": 0.3200319707393646, "learning_rate": 2.9915425313825978e-05, "loss": 0.0923, "step": 10620 }, { "epoch": 0.6287336606139469, "grad_norm": 0.29852205514907837, "learning_rate": 2.9915120333626626e-05, "loss": 0.0939, "step": 10630 }, { "epoch": 0.6293251316022949, "grad_norm": 0.30838149785995483, "learning_rate": 2.991481480609041e-05, "loss": 0.0833, "step": 10640 }, { "epoch": 0.629916602590643, "grad_norm": 0.3163038194179535, "learning_rate": 2.991450873122854e-05, "loss": 0.0649, "step": 10650 }, { "epoch": 0.630508073578991, "grad_norm": 0.36003321409225464, "learning_rate": 2.9914202109052257e-05, "loss": 0.0949, "step": 10660 }, { "epoch": 0.631099544567339, "grad_norm": 0.3561979830265045, "learning_rate": 2.9913894939572806e-05, "loss": 0.1018, "step": 10670 }, { "epoch": 0.631691015555687, "grad_norm": 0.26810982823371887, "learning_rate": 2.9913587222801455e-05, "loss": 0.0869, "step": 10680 }, { "epoch": 0.632282486544035, "grad_norm": 0.33971288800239563, "learning_rate": 2.991327895874951e-05, "loss": 0.077, "step": 10690 }, { "epoch": 0.6328739575323831, "grad_norm": 0.651029646396637, "learning_rate": 2.9912970147428267e-05, "loss": 0.0795, "step": 10700 }, { "epoch": 0.6334654285207311, "grad_norm": 0.38723355531692505, "learning_rate": 2.9912660788849068e-05, "loss": 0.1111, "step": 10710 }, { "epoch": 0.6340568995090791, "grad_norm": 0.32248109579086304, "learning_rate": 2.9912350883023267e-05, "loss": 0.1025, "step": 10720 }, { "epoch": 0.6346483704974271, "grad_norm": 0.34935611486434937, "learning_rate": 2.9912040429962235e-05, "loss": 0.0951, "step": 10730 }, { "epoch": 0.6352398414857752, "grad_norm": 0.5009417533874512, "learning_rate": 2.9911729429677357e-05, "loss": 0.0834, "step": 10740 }, { "epoch": 0.6358313124741232, "grad_norm": 0.3891516625881195, "learning_rate": 2.9911417882180057e-05, "loss": 0.0757, "step": 10750 }, { "epoch": 0.6364227834624712, "grad_norm": 0.43632370233535767, "learning_rate": 2.991110578748176e-05, "loss": 0.0954, "step": 10760 }, { "epoch": 0.6370142544508192, "grad_norm": 0.28197354078292847, "learning_rate": 2.991079314559392e-05, "loss": 0.1025, "step": 10770 }, { "epoch": 0.6376057254391672, "grad_norm": 0.2991298735141754, "learning_rate": 2.9910479956528012e-05, "loss": 0.089, "step": 10780 }, { "epoch": 0.6381971964275153, "grad_norm": 0.28788381814956665, "learning_rate": 2.9910166220295527e-05, "loss": 0.0954, "step": 10790 }, { "epoch": 0.6387886674158633, "grad_norm": 0.25542184710502625, "learning_rate": 2.990985193690798e-05, "loss": 0.0815, "step": 10800 }, { "epoch": 0.6393801384042113, "grad_norm": 0.36090949177742004, "learning_rate": 2.9909537106376906e-05, "loss": 0.1048, "step": 10810 }, { "epoch": 0.6399716093925593, "grad_norm": 0.35333752632141113, "learning_rate": 2.9909221728713855e-05, "loss": 0.1071, "step": 10820 }, { "epoch": 0.6405630803809074, "grad_norm": 0.2561059594154358, "learning_rate": 2.9908905803930396e-05, "loss": 0.0897, "step": 10830 }, { "epoch": 0.6411545513692554, "grad_norm": 0.2575996518135071, "learning_rate": 2.9908589332038135e-05, "loss": 0.0927, "step": 10840 }, { "epoch": 0.6417460223576034, "grad_norm": 0.2566252648830414, "learning_rate": 2.9908272313048672e-05, "loss": 0.0727, "step": 10850 }, { "epoch": 0.6423374933459514, "grad_norm": 0.3183698058128357, "learning_rate": 2.990795474697365e-05, "loss": 0.0942, "step": 10860 }, { "epoch": 0.6429289643342994, "grad_norm": 0.38642609119415283, "learning_rate": 2.9907636633824715e-05, "loss": 0.0893, "step": 10870 }, { "epoch": 0.6435204353226475, "grad_norm": 0.2761015295982361, "learning_rate": 2.990731797361355e-05, "loss": 0.0791, "step": 10880 }, { "epoch": 0.6441119063109955, "grad_norm": 0.38166481256484985, "learning_rate": 2.990699876635184e-05, "loss": 0.0844, "step": 10890 }, { "epoch": 0.6447033772993435, "grad_norm": 0.3992398977279663, "learning_rate": 2.990667901205131e-05, "loss": 0.0751, "step": 10900 }, { "epoch": 0.6452948482876915, "grad_norm": 0.3203184902667999, "learning_rate": 2.990635871072368e-05, "loss": 0.1026, "step": 10910 }, { "epoch": 0.6458863192760395, "grad_norm": 0.2907930016517639, "learning_rate": 2.9906037862380712e-05, "loss": 0.1019, "step": 10920 }, { "epoch": 0.6464777902643876, "grad_norm": 0.3328760862350464, "learning_rate": 2.9905716467034178e-05, "loss": 0.0887, "step": 10930 }, { "epoch": 0.6470692612527356, "grad_norm": 0.35013410449028015, "learning_rate": 2.9905394524695877e-05, "loss": 0.0817, "step": 10940 }, { "epoch": 0.6476607322410836, "grad_norm": 0.3336506187915802, "learning_rate": 2.9905072035377615e-05, "loss": 0.0739, "step": 10950 }, { "epoch": 0.6482522032294316, "grad_norm": 0.8287002444267273, "learning_rate": 2.990474899909123e-05, "loss": 0.1048, "step": 10960 }, { "epoch": 0.6488436742177797, "grad_norm": 0.26342493295669556, "learning_rate": 2.990442541584858e-05, "loss": 0.0882, "step": 10970 }, { "epoch": 0.6494351452061277, "grad_norm": 0.2847461998462677, "learning_rate": 2.9904101285661536e-05, "loss": 0.0941, "step": 10980 }, { "epoch": 0.6500266161944757, "grad_norm": 0.30033835768699646, "learning_rate": 2.990377660854199e-05, "loss": 0.0732, "step": 10990 }, { "epoch": 0.6506180871828237, "grad_norm": 0.5474840402603149, "learning_rate": 2.990345138450186e-05, "loss": 0.0815, "step": 11000 }, { "epoch": 0.6512095581711717, "grad_norm": 0.2934534251689911, "learning_rate": 2.9903125613553083e-05, "loss": 0.1077, "step": 11010 }, { "epoch": 0.6518010291595198, "grad_norm": 0.27554166316986084, "learning_rate": 2.990279929570761e-05, "loss": 0.0902, "step": 11020 }, { "epoch": 0.6523925001478678, "grad_norm": 0.25672203302383423, "learning_rate": 2.9902472430977418e-05, "loss": 0.0882, "step": 11030 }, { "epoch": 0.6529839711362158, "grad_norm": 0.30681583285331726, "learning_rate": 2.9902145019374492e-05, "loss": 0.0726, "step": 11040 }, { "epoch": 0.6535754421245638, "grad_norm": 0.5912954211235046, "learning_rate": 2.9901817060910866e-05, "loss": 0.0746, "step": 11050 }, { "epoch": 0.6541669131129119, "grad_norm": 0.2697179317474365, "learning_rate": 2.9901488555598562e-05, "loss": 0.0953, "step": 11060 }, { "epoch": 0.6547583841012599, "grad_norm": 0.3276585042476654, "learning_rate": 2.9901159503449633e-05, "loss": 0.0882, "step": 11070 }, { "epoch": 0.6553498550896079, "grad_norm": 0.39535462856292725, "learning_rate": 2.9900829904476166e-05, "loss": 0.0858, "step": 11080 }, { "epoch": 0.6559413260779559, "grad_norm": 0.3186829388141632, "learning_rate": 2.9900499758690243e-05, "loss": 0.0815, "step": 11090 }, { "epoch": 0.6565327970663039, "grad_norm": 0.41758865118026733, "learning_rate": 2.9900169066103987e-05, "loss": 0.0797, "step": 11100 }, { "epoch": 0.657124268054652, "grad_norm": 0.36694350838661194, "learning_rate": 2.9899837826729533e-05, "loss": 0.1061, "step": 11110 }, { "epoch": 0.657715739043, "grad_norm": 0.2719023525714874, "learning_rate": 2.9899506040579035e-05, "loss": 0.0889, "step": 11120 }, { "epoch": 0.658307210031348, "grad_norm": 0.32084453105926514, "learning_rate": 2.989917370766467e-05, "loss": 0.0954, "step": 11130 }, { "epoch": 0.658898681019696, "grad_norm": 0.2593802511692047, "learning_rate": 2.9898840827998628e-05, "loss": 0.0762, "step": 11140 }, { "epoch": 0.659490152008044, "grad_norm": 0.3657616078853607, "learning_rate": 2.9898507401593134e-05, "loss": 0.0719, "step": 11150 }, { "epoch": 0.6600816229963921, "grad_norm": 0.43748655915260315, "learning_rate": 2.9898173428460412e-05, "loss": 0.1036, "step": 11160 }, { "epoch": 0.6606730939847401, "grad_norm": 0.32723233103752136, "learning_rate": 2.9897838908612728e-05, "loss": 0.0949, "step": 11170 }, { "epoch": 0.661264564973088, "grad_norm": 0.2595810890197754, "learning_rate": 2.9897503842062356e-05, "loss": 0.0938, "step": 11180 }, { "epoch": 0.661856035961436, "grad_norm": 0.32318422198295593, "learning_rate": 2.9897168228821585e-05, "loss": 0.0895, "step": 11190 }, { "epoch": 0.6624475069497842, "grad_norm": 0.29712820053100586, "learning_rate": 2.9896832068902738e-05, "loss": 0.0701, "step": 11200 }, { "epoch": 0.6630389779381322, "grad_norm": 0.2595883309841156, "learning_rate": 2.989649536231815e-05, "loss": 0.0984, "step": 11210 }, { "epoch": 0.6636304489264802, "grad_norm": 0.3787139058113098, "learning_rate": 2.9896158109080173e-05, "loss": 0.0923, "step": 11220 }, { "epoch": 0.6642219199148282, "grad_norm": 0.2658247649669647, "learning_rate": 2.989582030920119e-05, "loss": 0.0981, "step": 11230 }, { "epoch": 0.6648133909031761, "grad_norm": 0.2244441658258438, "learning_rate": 2.989548196269359e-05, "loss": 0.0843, "step": 11240 }, { "epoch": 0.6654048618915243, "grad_norm": 0.4877835810184479, "learning_rate": 2.989514306956979e-05, "loss": 0.0809, "step": 11250 }, { "epoch": 0.6659963328798723, "grad_norm": 0.4567145109176636, "learning_rate": 2.9894803629842233e-05, "loss": 0.0979, "step": 11260 }, { "epoch": 0.6665878038682203, "grad_norm": 0.43773752450942993, "learning_rate": 2.989446364352337e-05, "loss": 0.0952, "step": 11270 }, { "epoch": 0.6671792748565682, "grad_norm": 0.27425840497016907, "learning_rate": 2.9894123110625676e-05, "loss": 0.0926, "step": 11280 }, { "epoch": 0.6677707458449164, "grad_norm": 0.3325795829296112, "learning_rate": 2.9893782031161653e-05, "loss": 0.0744, "step": 11290 }, { "epoch": 0.6683622168332644, "grad_norm": 0.4094293713569641, "learning_rate": 2.9893440405143814e-05, "loss": 0.0773, "step": 11300 }, { "epoch": 0.6689536878216124, "grad_norm": 0.2614808678627014, "learning_rate": 2.989309823258469e-05, "loss": 0.1077, "step": 11310 }, { "epoch": 0.6695451588099604, "grad_norm": 0.3743084967136383, "learning_rate": 2.989275551349685e-05, "loss": 0.1011, "step": 11320 }, { "epoch": 0.6701366297983083, "grad_norm": 0.32871973514556885, "learning_rate": 2.989241224789286e-05, "loss": 0.0878, "step": 11330 }, { "epoch": 0.6707281007866565, "grad_norm": 0.31493186950683594, "learning_rate": 2.9892068435785326e-05, "loss": 0.086, "step": 11340 }, { "epoch": 0.6713195717750045, "grad_norm": 0.2289353460073471, "learning_rate": 2.9891724077186856e-05, "loss": 0.0652, "step": 11350 }, { "epoch": 0.6719110427633525, "grad_norm": 0.45655593276023865, "learning_rate": 2.989137917211009e-05, "loss": 0.1045, "step": 11360 }, { "epoch": 0.6725025137517004, "grad_norm": 0.26754650473594666, "learning_rate": 2.989103372056769e-05, "loss": 0.0926, "step": 11370 }, { "epoch": 0.6730939847400484, "grad_norm": 0.596095621585846, "learning_rate": 2.9890687722572324e-05, "loss": 0.0823, "step": 11380 }, { "epoch": 0.6736854557283966, "grad_norm": 0.3252650797367096, "learning_rate": 2.9890341178136697e-05, "loss": 0.0829, "step": 11390 }, { "epoch": 0.6742769267167446, "grad_norm": 0.4384790062904358, "learning_rate": 2.988999408727352e-05, "loss": 0.0702, "step": 11400 }, { "epoch": 0.6748683977050925, "grad_norm": 0.5699938535690308, "learning_rate": 2.9889646449995532e-05, "loss": 0.0869, "step": 11410 }, { "epoch": 0.6754598686934405, "grad_norm": 0.29993510246276855, "learning_rate": 2.988929826631549e-05, "loss": 0.0951, "step": 11420 }, { "epoch": 0.6760513396817887, "grad_norm": 0.3188250958919525, "learning_rate": 2.988894953624617e-05, "loss": 0.0802, "step": 11430 }, { "epoch": 0.6766428106701367, "grad_norm": 0.3355191946029663, "learning_rate": 2.9888600259800377e-05, "loss": 0.0845, "step": 11440 }, { "epoch": 0.6772342816584846, "grad_norm": 0.3795005679130554, "learning_rate": 2.988825043699092e-05, "loss": 0.0763, "step": 11450 }, { "epoch": 0.6778257526468326, "grad_norm": 1.1514251232147217, "learning_rate": 2.988790006783064e-05, "loss": 0.0998, "step": 11460 }, { "epoch": 0.6784172236351806, "grad_norm": 0.2580040991306305, "learning_rate": 2.9887549152332393e-05, "loss": 0.0932, "step": 11470 }, { "epoch": 0.6790086946235288, "grad_norm": 0.41304513812065125, "learning_rate": 2.9887197690509057e-05, "loss": 0.0899, "step": 11480 }, { "epoch": 0.6796001656118767, "grad_norm": 0.3571810722351074, "learning_rate": 2.988684568237353e-05, "loss": 0.0861, "step": 11490 }, { "epoch": 0.6801916366002247, "grad_norm": 0.409320592880249, "learning_rate": 2.9886493127938726e-05, "loss": 0.0763, "step": 11500 }, { "epoch": 0.6807831075885727, "grad_norm": 0.5272565484046936, "learning_rate": 2.988614002721759e-05, "loss": 0.1116, "step": 11510 }, { "epoch": 0.6813745785769209, "grad_norm": 0.30267980694770813, "learning_rate": 2.988578638022307e-05, "loss": 0.106, "step": 11520 }, { "epoch": 0.6819660495652689, "grad_norm": 0.27288180589675903, "learning_rate": 2.9885432186968155e-05, "loss": 0.0825, "step": 11530 }, { "epoch": 0.6825575205536168, "grad_norm": 0.4776425063610077, "learning_rate": 2.9885077447465836e-05, "loss": 0.0836, "step": 11540 }, { "epoch": 0.6831489915419648, "grad_norm": 0.3362545371055603, "learning_rate": 2.9884722161729122e-05, "loss": 0.0758, "step": 11550 }, { "epoch": 0.6837404625303128, "grad_norm": 0.28165891766548157, "learning_rate": 2.988436632977107e-05, "loss": 0.108, "step": 11560 }, { "epoch": 0.684331933518661, "grad_norm": 0.21505916118621826, "learning_rate": 2.9884009951604726e-05, "loss": 0.0928, "step": 11570 }, { "epoch": 0.684923404507009, "grad_norm": 0.44760581851005554, "learning_rate": 2.9883653027243166e-05, "loss": 0.1016, "step": 11580 }, { "epoch": 0.685514875495357, "grad_norm": 0.2832064628601074, "learning_rate": 2.9883295556699498e-05, "loss": 0.0923, "step": 11590 }, { "epoch": 0.6861063464837049, "grad_norm": 0.24620947241783142, "learning_rate": 2.9882937539986834e-05, "loss": 0.0787, "step": 11600 }, { "epoch": 0.6866978174720529, "grad_norm": 0.298103392124176, "learning_rate": 2.988257897711831e-05, "loss": 0.0997, "step": 11610 }, { "epoch": 0.687289288460401, "grad_norm": 0.24683618545532227, "learning_rate": 2.988221986810708e-05, "loss": 0.0981, "step": 11620 }, { "epoch": 0.687880759448749, "grad_norm": 0.25583863258361816, "learning_rate": 2.988186021296634e-05, "loss": 0.0751, "step": 11630 }, { "epoch": 0.688472230437097, "grad_norm": 0.377777099609375, "learning_rate": 2.988150001170927e-05, "loss": 0.0759, "step": 11640 }, { "epoch": 0.689063701425445, "grad_norm": 0.38316184282302856, "learning_rate": 2.9881139264349094e-05, "loss": 0.0807, "step": 11650 }, { "epoch": 0.6896551724137931, "grad_norm": 0.4298362731933594, "learning_rate": 2.9880777970899055e-05, "loss": 0.0963, "step": 11660 }, { "epoch": 0.6902466434021411, "grad_norm": 0.4884297549724579, "learning_rate": 2.9880416131372404e-05, "loss": 0.0929, "step": 11670 }, { "epoch": 0.6908381143904891, "grad_norm": 0.25943416357040405, "learning_rate": 2.9880053745782424e-05, "loss": 0.092, "step": 11680 }, { "epoch": 0.6914295853788371, "grad_norm": 0.22880136966705322, "learning_rate": 2.987969081414241e-05, "loss": 0.0854, "step": 11690 }, { "epoch": 0.6920210563671851, "grad_norm": 0.3117683529853821, "learning_rate": 2.9879327336465686e-05, "loss": 0.0758, "step": 11700 }, { "epoch": 0.6926125273555332, "grad_norm": 0.4262247383594513, "learning_rate": 2.9878963312765586e-05, "loss": 0.1095, "step": 11710 }, { "epoch": 0.6932039983438812, "grad_norm": 0.28359678387641907, "learning_rate": 2.987859874305547e-05, "loss": 0.0866, "step": 11720 }, { "epoch": 0.6937954693322292, "grad_norm": 0.7036268711090088, "learning_rate": 2.9878233627348713e-05, "loss": 0.088, "step": 11730 }, { "epoch": 0.6943869403205772, "grad_norm": 0.29166319966316223, "learning_rate": 2.987786796565872e-05, "loss": 0.0882, "step": 11740 }, { "epoch": 0.6949784113089253, "grad_norm": 0.5486705899238586, "learning_rate": 2.9877501757998908e-05, "loss": 0.0717, "step": 11750 }, { "epoch": 0.6955698822972733, "grad_norm": 1.5386556386947632, "learning_rate": 2.987713500438271e-05, "loss": 0.0926, "step": 11760 }, { "epoch": 0.6961613532856213, "grad_norm": 0.8348132371902466, "learning_rate": 2.987676770482359e-05, "loss": 0.1014, "step": 11770 }, { "epoch": 0.6967528242739693, "grad_norm": 0.34573131799697876, "learning_rate": 2.9876399859335025e-05, "loss": 0.0878, "step": 11780 }, { "epoch": 0.6973442952623173, "grad_norm": 0.3745811879634857, "learning_rate": 2.9876031467930515e-05, "loss": 0.0771, "step": 11790 }, { "epoch": 0.6979357662506654, "grad_norm": 0.4268810749053955, "learning_rate": 2.987566253062358e-05, "loss": 0.0655, "step": 11800 }, { "epoch": 0.6985272372390134, "grad_norm": 0.24492980539798737, "learning_rate": 2.987529304742775e-05, "loss": 0.0995, "step": 11810 }, { "epoch": 0.6991187082273614, "grad_norm": 0.35192152857780457, "learning_rate": 2.9874923018356594e-05, "loss": 0.0866, "step": 11820 }, { "epoch": 0.6997101792157094, "grad_norm": 0.2349397987127304, "learning_rate": 2.9874552443423685e-05, "loss": 0.0817, "step": 11830 }, { "epoch": 0.7003016502040575, "grad_norm": 0.45812639594078064, "learning_rate": 2.987418132264263e-05, "loss": 0.0875, "step": 11840 }, { "epoch": 0.7008931211924055, "grad_norm": 0.308860719203949, "learning_rate": 2.9873809656027034e-05, "loss": 0.0767, "step": 11850 }, { "epoch": 0.7014845921807535, "grad_norm": 0.28422534465789795, "learning_rate": 2.987343744359055e-05, "loss": 0.0974, "step": 11860 }, { "epoch": 0.7020760631691015, "grad_norm": 0.3477625846862793, "learning_rate": 2.9873064685346828e-05, "loss": 0.0903, "step": 11870 }, { "epoch": 0.7026675341574495, "grad_norm": 0.4712865948677063, "learning_rate": 2.9872691381309552e-05, "loss": 0.0875, "step": 11880 }, { "epoch": 0.7032590051457976, "grad_norm": 0.21796923875808716, "learning_rate": 2.987231753149242e-05, "loss": 0.0653, "step": 11890 }, { "epoch": 0.7038504761341456, "grad_norm": 1.1445534229278564, "learning_rate": 2.987194313590915e-05, "loss": 0.0808, "step": 11900 }, { "epoch": 0.7044419471224936, "grad_norm": 0.41337600350379944, "learning_rate": 2.987156819457348e-05, "loss": 0.0929, "step": 11910 }, { "epoch": 0.7050334181108416, "grad_norm": 2.010338544845581, "learning_rate": 2.987119270749917e-05, "loss": 0.0881, "step": 11920 }, { "epoch": 0.7056248890991896, "grad_norm": 0.2787507474422455, "learning_rate": 2.98708166747e-05, "loss": 0.0863, "step": 11930 }, { "epoch": 0.7062163600875377, "grad_norm": 0.24534118175506592, "learning_rate": 2.987044009618977e-05, "loss": 0.0803, "step": 11940 }, { "epoch": 0.7068078310758857, "grad_norm": 0.31736212968826294, "learning_rate": 2.9870062971982297e-05, "loss": 0.0705, "step": 11950 }, { "epoch": 0.7073993020642337, "grad_norm": 0.3646486699581146, "learning_rate": 2.9869685302091423e-05, "loss": 0.1113, "step": 11960 }, { "epoch": 0.7079907730525817, "grad_norm": 0.8245124816894531, "learning_rate": 2.9869307086531007e-05, "loss": 0.0848, "step": 11970 }, { "epoch": 0.7085822440409298, "grad_norm": 0.30753424763679504, "learning_rate": 2.986892832531492e-05, "loss": 0.0957, "step": 11980 }, { "epoch": 0.7091737150292778, "grad_norm": 0.3957972526550293, "learning_rate": 2.9868549018457073e-05, "loss": 0.0699, "step": 11990 }, { "epoch": 0.7097651860176258, "grad_norm": 0.2646888792514801, "learning_rate": 2.986816916597138e-05, "loss": 0.0759, "step": 12000 }, { "epoch": 0.7103566570059738, "grad_norm": 0.26201337575912476, "learning_rate": 2.9867788767871782e-05, "loss": 0.0944, "step": 12010 }, { "epoch": 0.7109481279943218, "grad_norm": 0.26167750358581543, "learning_rate": 2.9867407824172233e-05, "loss": 0.0977, "step": 12020 }, { "epoch": 0.7115395989826699, "grad_norm": 0.24881511926651, "learning_rate": 2.9867026334886722e-05, "loss": 0.0802, "step": 12030 }, { "epoch": 0.7121310699710179, "grad_norm": 0.37105175852775574, "learning_rate": 2.9866644300029244e-05, "loss": 0.0997, "step": 12040 }, { "epoch": 0.7127225409593659, "grad_norm": 1.0464709997177124, "learning_rate": 2.9866261719613817e-05, "loss": 0.0772, "step": 12050 }, { "epoch": 0.7133140119477139, "grad_norm": 2.3640549182891846, "learning_rate": 2.9865878593654477e-05, "loss": 0.1013, "step": 12060 }, { "epoch": 0.713905482936062, "grad_norm": 0.43731045722961426, "learning_rate": 2.986549492216529e-05, "loss": 0.0944, "step": 12070 }, { "epoch": 0.71449695392441, "grad_norm": 0.29333367943763733, "learning_rate": 2.986511070516033e-05, "loss": 0.0894, "step": 12080 }, { "epoch": 0.715088424912758, "grad_norm": 0.3198452591896057, "learning_rate": 2.9864725942653704e-05, "loss": 0.0884, "step": 12090 }, { "epoch": 0.715679895901106, "grad_norm": 0.24759267270565033, "learning_rate": 2.9864340634659525e-05, "loss": 0.0778, "step": 12100 }, { "epoch": 0.716271366889454, "grad_norm": 1.9039976596832275, "learning_rate": 2.9863954781191936e-05, "loss": 0.089, "step": 12110 }, { "epoch": 0.7168628378778021, "grad_norm": 0.685551106929779, "learning_rate": 2.9863568382265094e-05, "loss": 0.0996, "step": 12120 }, { "epoch": 0.7174543088661501, "grad_norm": 0.2940368354320526, "learning_rate": 2.9863181437893178e-05, "loss": 0.0975, "step": 12130 }, { "epoch": 0.7180457798544981, "grad_norm": 0.2861166298389435, "learning_rate": 2.9862793948090394e-05, "loss": 0.0854, "step": 12140 }, { "epoch": 0.7186372508428461, "grad_norm": 0.20230960845947266, "learning_rate": 2.9862405912870952e-05, "loss": 0.0683, "step": 12150 }, { "epoch": 0.7192287218311941, "grad_norm": 0.5340808033943176, "learning_rate": 2.98620173322491e-05, "loss": 0.0972, "step": 12160 }, { "epoch": 0.7198201928195422, "grad_norm": 0.5474090576171875, "learning_rate": 2.9861628206239094e-05, "loss": 0.0952, "step": 12170 }, { "epoch": 0.7204116638078902, "grad_norm": 0.3530290126800537, "learning_rate": 2.9861238534855213e-05, "loss": 0.0906, "step": 12180 }, { "epoch": 0.7210031347962382, "grad_norm": 0.24072228372097015, "learning_rate": 2.986084831811176e-05, "loss": 0.066, "step": 12190 }, { "epoch": 0.7215946057845862, "grad_norm": 0.5090463757514954, "learning_rate": 2.9860457556023054e-05, "loss": 0.072, "step": 12200 }, { "epoch": 0.7221860767729343, "grad_norm": 0.3207811415195465, "learning_rate": 2.9860066248603427e-05, "loss": 0.1014, "step": 12210 }, { "epoch": 0.7227775477612823, "grad_norm": 0.2935757040977478, "learning_rate": 2.985967439586725e-05, "loss": 0.0985, "step": 12220 }, { "epoch": 0.7233690187496303, "grad_norm": 0.42206332087516785, "learning_rate": 2.9859281997828893e-05, "loss": 0.0906, "step": 12230 }, { "epoch": 0.7239604897379783, "grad_norm": 0.3151499927043915, "learning_rate": 2.9858889054502765e-05, "loss": 0.0844, "step": 12240 }, { "epoch": 0.7245519607263263, "grad_norm": 0.4058063328266144, "learning_rate": 2.9858495565903276e-05, "loss": 0.0735, "step": 12250 }, { "epoch": 0.7251434317146744, "grad_norm": 0.3654170036315918, "learning_rate": 2.9858101532044875e-05, "loss": 0.0838, "step": 12260 }, { "epoch": 0.7257349027030224, "grad_norm": 0.28188443183898926, "learning_rate": 2.9857706952942017e-05, "loss": 0.1016, "step": 12270 }, { "epoch": 0.7263263736913704, "grad_norm": 0.27893078327178955, "learning_rate": 2.9857311828609184e-05, "loss": 0.0907, "step": 12280 }, { "epoch": 0.7269178446797184, "grad_norm": 0.5286269187927246, "learning_rate": 2.985691615906087e-05, "loss": 0.0915, "step": 12290 }, { "epoch": 0.7275093156680665, "grad_norm": 0.3224281370639801, "learning_rate": 2.98565199443116e-05, "loss": 0.0661, "step": 12300 }, { "epoch": 0.7281007866564145, "grad_norm": 0.3735649287700653, "learning_rate": 2.9856123184375918e-05, "loss": 0.097, "step": 12310 }, { "epoch": 0.7286922576447625, "grad_norm": 0.23711223900318146, "learning_rate": 2.9855725879268373e-05, "loss": 0.0803, "step": 12320 }, { "epoch": 0.7292837286331105, "grad_norm": 0.4189877212047577, "learning_rate": 2.9855328029003552e-05, "loss": 0.0895, "step": 12330 }, { "epoch": 0.7298751996214585, "grad_norm": 0.2918144762516022, "learning_rate": 2.9854929633596054e-05, "loss": 0.084, "step": 12340 }, { "epoch": 0.7304666706098066, "grad_norm": 0.6429993510246277, "learning_rate": 2.98545306930605e-05, "loss": 0.0816, "step": 12350 }, { "epoch": 0.7310581415981546, "grad_norm": 0.31851524114608765, "learning_rate": 2.9854131207411522e-05, "loss": 0.09, "step": 12360 }, { "epoch": 0.7316496125865026, "grad_norm": 0.33354780077934265, "learning_rate": 2.985373117666379e-05, "loss": 0.0877, "step": 12370 }, { "epoch": 0.7322410835748506, "grad_norm": 0.2886171042919159, "learning_rate": 2.9853330600831982e-05, "loss": 0.0839, "step": 12380 }, { "epoch": 0.7328325545631986, "grad_norm": 0.4005396366119385, "learning_rate": 2.985292947993079e-05, "loss": 0.0814, "step": 12390 }, { "epoch": 0.7334240255515467, "grad_norm": 0.38587436079978943, "learning_rate": 2.9852527813974945e-05, "loss": 0.0778, "step": 12400 }, { "epoch": 0.7340154965398947, "grad_norm": 0.32461845874786377, "learning_rate": 2.9852125602979174e-05, "loss": 0.1025, "step": 12410 }, { "epoch": 0.7346069675282427, "grad_norm": 1.9205691814422607, "learning_rate": 2.9851722846958247e-05, "loss": 0.1011, "step": 12420 }, { "epoch": 0.7351984385165907, "grad_norm": 0.3059195280075073, "learning_rate": 2.9851319545926943e-05, "loss": 0.0863, "step": 12430 }, { "epoch": 0.7357899095049388, "grad_norm": 0.21128615736961365, "learning_rate": 2.9850915699900055e-05, "loss": 0.0859, "step": 12440 }, { "epoch": 0.7363813804932868, "grad_norm": 0.3509645462036133, "learning_rate": 2.9850511308892416e-05, "loss": 0.0769, "step": 12450 }, { "epoch": 0.7369728514816348, "grad_norm": 0.273505300283432, "learning_rate": 2.985010637291885e-05, "loss": 0.0996, "step": 12460 }, { "epoch": 0.7375643224699828, "grad_norm": 0.4372437000274658, "learning_rate": 2.984970089199423e-05, "loss": 0.0976, "step": 12470 }, { "epoch": 0.7381557934583308, "grad_norm": 0.3266538381576538, "learning_rate": 2.9849294866133424e-05, "loss": 0.0893, "step": 12480 }, { "epoch": 0.7387472644466789, "grad_norm": 0.2518570125102997, "learning_rate": 2.984888829535134e-05, "loss": 0.0744, "step": 12490 }, { "epoch": 0.7393387354350269, "grad_norm": 0.8435744643211365, "learning_rate": 2.98484811796629e-05, "loss": 0.0789, "step": 12500 }, { "epoch": 0.7399302064233749, "grad_norm": 0.3686636686325073, "learning_rate": 2.9848073519083033e-05, "loss": 0.1043, "step": 12510 }, { "epoch": 0.7405216774117229, "grad_norm": 0.511618971824646, "learning_rate": 2.9847665313626708e-05, "loss": 0.1063, "step": 12520 }, { "epoch": 0.741113148400071, "grad_norm": 0.4587442874908447, "learning_rate": 2.98472565633089e-05, "loss": 0.0983, "step": 12530 }, { "epoch": 0.741704619388419, "grad_norm": 0.4443434178829193, "learning_rate": 2.9846847268144616e-05, "loss": 0.0773, "step": 12540 }, { "epoch": 0.742296090376767, "grad_norm": 0.6467982530593872, "learning_rate": 2.9846437428148867e-05, "loss": 0.0752, "step": 12550 }, { "epoch": 0.742887561365115, "grad_norm": 0.4975701570510864, "learning_rate": 2.98460270433367e-05, "loss": 0.1044, "step": 12560 }, { "epoch": 0.743479032353463, "grad_norm": 0.2505386471748352, "learning_rate": 2.9845616113723167e-05, "loss": 0.0855, "step": 12570 }, { "epoch": 0.7440705033418111, "grad_norm": 0.41679060459136963, "learning_rate": 2.984520463932336e-05, "loss": 0.082, "step": 12580 }, { "epoch": 0.7446619743301591, "grad_norm": 0.6668739318847656, "learning_rate": 2.9844792620152364e-05, "loss": 0.0888, "step": 12590 }, { "epoch": 0.7452534453185071, "grad_norm": 0.38528838753700256, "learning_rate": 2.9844380056225305e-05, "loss": 0.072, "step": 12600 }, { "epoch": 0.7458449163068551, "grad_norm": 0.3763620853424072, "learning_rate": 2.9843966947557328e-05, "loss": 0.0957, "step": 12610 }, { "epoch": 0.7464363872952031, "grad_norm": 0.3889075219631195, "learning_rate": 2.9843553294163583e-05, "loss": 0.088, "step": 12620 }, { "epoch": 0.7470278582835512, "grad_norm": 0.3001166880130768, "learning_rate": 2.984313909605926e-05, "loss": 0.0907, "step": 12630 }, { "epoch": 0.7476193292718992, "grad_norm": 0.28330886363983154, "learning_rate": 2.9842724353259557e-05, "loss": 0.0777, "step": 12640 }, { "epoch": 0.7482108002602472, "grad_norm": 0.5447905659675598, "learning_rate": 2.9842309065779683e-05, "loss": 0.078, "step": 12650 }, { "epoch": 0.7488022712485952, "grad_norm": 0.4370589256286621, "learning_rate": 2.984189323363489e-05, "loss": 0.0954, "step": 12660 }, { "epoch": 0.7493937422369433, "grad_norm": 0.30857786536216736, "learning_rate": 2.9841476856840433e-05, "loss": 0.1093, "step": 12670 }, { "epoch": 0.7499852132252913, "grad_norm": 0.28249841928482056, "learning_rate": 2.9841059935411585e-05, "loss": 0.0972, "step": 12680 }, { "epoch": 0.7505766842136393, "grad_norm": 0.5655917525291443, "learning_rate": 2.9840642469363662e-05, "loss": 0.0927, "step": 12690 }, { "epoch": 0.7511681552019873, "grad_norm": 1.6921757459640503, "learning_rate": 2.9840224458711968e-05, "loss": 0.0804, "step": 12700 }, { "epoch": 0.7517596261903353, "grad_norm": 0.3293375074863434, "learning_rate": 2.983980590347185e-05, "loss": 0.1116, "step": 12710 }, { "epoch": 0.7523510971786834, "grad_norm": 1.132238507270813, "learning_rate": 2.983938680365867e-05, "loss": 0.0914, "step": 12720 }, { "epoch": 0.7529425681670314, "grad_norm": 0.7722944021224976, "learning_rate": 2.9838967159287795e-05, "loss": 0.0846, "step": 12730 }, { "epoch": 0.7535340391553794, "grad_norm": 0.4200759828090668, "learning_rate": 2.9838546970374637e-05, "loss": 0.0937, "step": 12740 }, { "epoch": 0.7541255101437274, "grad_norm": 1.7878185510635376, "learning_rate": 2.9838126236934617e-05, "loss": 0.0748, "step": 12750 }, { "epoch": 0.7547169811320755, "grad_norm": 0.5759223103523254, "learning_rate": 2.9837704958983166e-05, "loss": 0.1088, "step": 12760 }, { "epoch": 0.7553084521204235, "grad_norm": 0.40230458974838257, "learning_rate": 2.9837283136535748e-05, "loss": 0.0914, "step": 12770 }, { "epoch": 0.7558999231087715, "grad_norm": 0.5083870887756348, "learning_rate": 2.9836860769607844e-05, "loss": 0.086, "step": 12780 }, { "epoch": 0.7564913940971195, "grad_norm": 0.6145972609519958, "learning_rate": 2.9836437858214943e-05, "loss": 0.0845, "step": 12790 }, { "epoch": 0.7570828650854675, "grad_norm": 0.7405126094818115, "learning_rate": 2.983601440237258e-05, "loss": 0.0772, "step": 12800 }, { "epoch": 0.7576743360738156, "grad_norm": 0.7130064368247986, "learning_rate": 2.9835590402096283e-05, "loss": 0.1136, "step": 12810 }, { "epoch": 0.7582658070621636, "grad_norm": 0.3803260326385498, "learning_rate": 2.9835165857401623e-05, "loss": 0.0889, "step": 12820 }, { "epoch": 0.7588572780505116, "grad_norm": 0.31797465682029724, "learning_rate": 2.9834740768304166e-05, "loss": 0.0969, "step": 12830 }, { "epoch": 0.7594487490388596, "grad_norm": 0.3443801999092102, "learning_rate": 2.9834315134819518e-05, "loss": 0.0944, "step": 12840 }, { "epoch": 0.7600402200272076, "grad_norm": 2.023477554321289, "learning_rate": 2.98338889569633e-05, "loss": 0.0734, "step": 12850 }, { "epoch": 0.7606316910155557, "grad_norm": 0.25107207894325256, "learning_rate": 2.9833462234751147e-05, "loss": 0.1069, "step": 12860 }, { "epoch": 0.7612231620039037, "grad_norm": 0.3578993082046509, "learning_rate": 2.983303496819872e-05, "loss": 0.0943, "step": 12870 }, { "epoch": 0.7618146329922517, "grad_norm": 0.317683607339859, "learning_rate": 2.9832607157321698e-05, "loss": 0.0987, "step": 12880 }, { "epoch": 0.7624061039805997, "grad_norm": 0.37252476811408997, "learning_rate": 2.9832178802135785e-05, "loss": 0.0903, "step": 12890 }, { "epoch": 0.7629975749689478, "grad_norm": 0.45283013582229614, "learning_rate": 2.983174990265669e-05, "loss": 0.0831, "step": 12900 }, { "epoch": 0.7635890459572958, "grad_norm": 0.5238233208656311, "learning_rate": 2.9831320458900162e-05, "loss": 0.1035, "step": 12910 }, { "epoch": 0.7641805169456438, "grad_norm": 0.36557215452194214, "learning_rate": 2.9830890470881958e-05, "loss": 0.0992, "step": 12920 }, { "epoch": 0.7647719879339918, "grad_norm": 0.4904554784297943, "learning_rate": 2.983045993861786e-05, "loss": 0.0937, "step": 12930 }, { "epoch": 0.7653634589223398, "grad_norm": 0.25248825550079346, "learning_rate": 2.9830028862123656e-05, "loss": 0.0817, "step": 12940 }, { "epoch": 0.7659549299106879, "grad_norm": 0.5732356905937195, "learning_rate": 2.9829597241415173e-05, "loss": 0.0754, "step": 12950 }, { "epoch": 0.7665464008990359, "grad_norm": 0.29634422063827515, "learning_rate": 2.9829165076508247e-05, "loss": 0.0932, "step": 12960 }, { "epoch": 0.7671378718873839, "grad_norm": 0.3393080234527588, "learning_rate": 2.982873236741875e-05, "loss": 0.0971, "step": 12970 }, { "epoch": 0.7677293428757319, "grad_norm": 0.29851317405700684, "learning_rate": 2.982829911416254e-05, "loss": 0.0803, "step": 12980 }, { "epoch": 0.76832081386408, "grad_norm": 0.5608127117156982, "learning_rate": 2.982786531675553e-05, "loss": 0.0743, "step": 12990 }, { "epoch": 0.768912284852428, "grad_norm": 4.0223469734191895, "learning_rate": 2.9827430975213637e-05, "loss": 0.087, "step": 13000 }, { "epoch": 0.769503755840776, "grad_norm": 0.3172115981578827, "learning_rate": 2.9826996089552795e-05, "loss": 0.1096, "step": 13010 }, { "epoch": 0.770095226829124, "grad_norm": 0.4315185248851776, "learning_rate": 2.982656065978897e-05, "loss": 0.0978, "step": 13020 }, { "epoch": 0.770686697817472, "grad_norm": 0.38713639974594116, "learning_rate": 2.9826124685938134e-05, "loss": 0.0915, "step": 13030 }, { "epoch": 0.7712781688058201, "grad_norm": 0.3993960916996002, "learning_rate": 2.9825688168016285e-05, "loss": 0.0858, "step": 13040 }, { "epoch": 0.7718696397941681, "grad_norm": 0.4508276879787445, "learning_rate": 2.982525110603945e-05, "loss": 0.0782, "step": 13050 }, { "epoch": 0.7724611107825161, "grad_norm": 0.3396393954753876, "learning_rate": 2.9824813500023666e-05, "loss": 0.0914, "step": 13060 }, { "epoch": 0.7730525817708641, "grad_norm": 0.34203964471817017, "learning_rate": 2.9824375349984988e-05, "loss": 0.0963, "step": 13070 }, { "epoch": 0.7736440527592121, "grad_norm": 0.5328509211540222, "learning_rate": 2.9823936655939495e-05, "loss": 0.0977, "step": 13080 }, { "epoch": 0.7742355237475602, "grad_norm": 0.4563915729522705, "learning_rate": 2.9823497417903283e-05, "loss": 0.0819, "step": 13090 }, { "epoch": 0.7748269947359082, "grad_norm": 0.33608871698379517, "learning_rate": 2.982305763589248e-05, "loss": 0.0661, "step": 13100 }, { "epoch": 0.7754184657242562, "grad_norm": 0.3509613871574402, "learning_rate": 2.9822617309923214e-05, "loss": 0.102, "step": 13110 }, { "epoch": 0.7760099367126042, "grad_norm": 0.33111199736595154, "learning_rate": 2.9822176440011652e-05, "loss": 0.0865, "step": 13120 }, { "epoch": 0.7766014077009523, "grad_norm": 0.33350345492362976, "learning_rate": 2.9821735026173967e-05, "loss": 0.0805, "step": 13130 }, { "epoch": 0.7771928786893003, "grad_norm": 0.21321989595890045, "learning_rate": 2.982129306842636e-05, "loss": 0.082, "step": 13140 }, { "epoch": 0.7777843496776483, "grad_norm": 0.5107712745666504, "learning_rate": 2.9820850566785044e-05, "loss": 0.0732, "step": 13150 }, { "epoch": 0.7783758206659963, "grad_norm": 0.33789125084877014, "learning_rate": 2.9820407521266267e-05, "loss": 0.094, "step": 13160 }, { "epoch": 0.7789672916543443, "grad_norm": 0.4934857487678528, "learning_rate": 2.9819963931886283e-05, "loss": 0.0981, "step": 13170 }, { "epoch": 0.7795587626426924, "grad_norm": 1.695789098739624, "learning_rate": 2.981951979866137e-05, "loss": 0.096, "step": 13180 }, { "epoch": 0.7801502336310404, "grad_norm": 0.3156895637512207, "learning_rate": 2.981907512160782e-05, "loss": 0.0825, "step": 13190 }, { "epoch": 0.7807417046193884, "grad_norm": 0.48421138525009155, "learning_rate": 2.9818629900741963e-05, "loss": 0.0733, "step": 13200 }, { "epoch": 0.7813331756077364, "grad_norm": 0.932627260684967, "learning_rate": 2.981818413608013e-05, "loss": 0.0894, "step": 13210 }, { "epoch": 0.7819246465960845, "grad_norm": 0.3360190987586975, "learning_rate": 2.981773782763868e-05, "loss": 0.0894, "step": 13220 }, { "epoch": 0.7825161175844325, "grad_norm": 0.3028421998023987, "learning_rate": 2.9817290975433993e-05, "loss": 0.0878, "step": 13230 }, { "epoch": 0.7831075885727805, "grad_norm": 0.48906025290489197, "learning_rate": 2.9816843579482463e-05, "loss": 0.0917, "step": 13240 }, { "epoch": 0.7836990595611285, "grad_norm": 0.5520796775817871, "learning_rate": 2.9816395639800514e-05, "loss": 0.0651, "step": 13250 }, { "epoch": 0.7842905305494765, "grad_norm": 0.361269474029541, "learning_rate": 2.9815947156404575e-05, "loss": 0.107, "step": 13260 }, { "epoch": 0.7848820015378246, "grad_norm": 1.1876903772354126, "learning_rate": 2.9815498129311113e-05, "loss": 0.0929, "step": 13270 }, { "epoch": 0.7854734725261726, "grad_norm": 0.3364367187023163, "learning_rate": 2.98150485585366e-05, "loss": 0.0885, "step": 13280 }, { "epoch": 0.7860649435145206, "grad_norm": 0.29096996784210205, "learning_rate": 2.9814598444097543e-05, "loss": 0.0846, "step": 13290 }, { "epoch": 0.7866564145028686, "grad_norm": 0.7503502368927002, "learning_rate": 2.9814147786010446e-05, "loss": 0.0779, "step": 13300 }, { "epoch": 0.7872478854912166, "grad_norm": 0.5518102645874023, "learning_rate": 2.9813696584291856e-05, "loss": 0.0895, "step": 13310 }, { "epoch": 0.7878393564795647, "grad_norm": 0.2333541214466095, "learning_rate": 2.981324483895833e-05, "loss": 0.0842, "step": 13320 }, { "epoch": 0.7884308274679127, "grad_norm": 0.4378890097141266, "learning_rate": 2.981279255002644e-05, "loss": 0.0923, "step": 13330 }, { "epoch": 0.7890222984562607, "grad_norm": 0.3336389660835266, "learning_rate": 2.981233971751279e-05, "loss": 0.0802, "step": 13340 }, { "epoch": 0.7896137694446087, "grad_norm": 0.5928786993026733, "learning_rate": 2.981188634143399e-05, "loss": 0.0658, "step": 13350 }, { "epoch": 0.7902052404329568, "grad_norm": 0.47597411274909973, "learning_rate": 2.9811432421806688e-05, "loss": 0.0846, "step": 13360 }, { "epoch": 0.7907967114213048, "grad_norm": 0.27026069164276123, "learning_rate": 2.9810977958647533e-05, "loss": 0.1069, "step": 13370 }, { "epoch": 0.7913881824096528, "grad_norm": 0.2846652865409851, "learning_rate": 2.9810522951973206e-05, "loss": 0.0953, "step": 13380 }, { "epoch": 0.7919796533980008, "grad_norm": 0.6654115915298462, "learning_rate": 2.9810067401800403e-05, "loss": 0.0934, "step": 13390 }, { "epoch": 0.7925711243863488, "grad_norm": 0.3949263095855713, "learning_rate": 2.980961130814584e-05, "loss": 0.0822, "step": 13400 }, { "epoch": 0.7931625953746969, "grad_norm": 0.3673669099807739, "learning_rate": 2.9809154671026257e-05, "loss": 0.1006, "step": 13410 }, { "epoch": 0.7937540663630449, "grad_norm": 0.3966699242591858, "learning_rate": 2.980869749045841e-05, "loss": 0.1011, "step": 13420 }, { "epoch": 0.7943455373513929, "grad_norm": 0.28322598338127136, "learning_rate": 2.9808239766459074e-05, "loss": 0.0909, "step": 13430 }, { "epoch": 0.7949370083397409, "grad_norm": 0.2288777083158493, "learning_rate": 2.980778149904505e-05, "loss": 0.0698, "step": 13440 }, { "epoch": 0.795528479328089, "grad_norm": 0.45226573944091797, "learning_rate": 2.980732268823315e-05, "loss": 0.0718, "step": 13450 }, { "epoch": 0.796119950316437, "grad_norm": 0.273052453994751, "learning_rate": 2.9806863334040216e-05, "loss": 0.1022, "step": 13460 }, { "epoch": 0.796711421304785, "grad_norm": 0.34088313579559326, "learning_rate": 2.9806403436483103e-05, "loss": 0.0804, "step": 13470 }, { "epoch": 0.797302892293133, "grad_norm": 0.2702556550502777, "learning_rate": 2.9805942995578684e-05, "loss": 0.0759, "step": 13480 }, { "epoch": 0.797894363281481, "grad_norm": 0.6529324054718018, "learning_rate": 2.980548201134386e-05, "loss": 0.0837, "step": 13490 }, { "epoch": 0.7984858342698291, "grad_norm": 0.9305248856544495, "learning_rate": 2.9805020483795548e-05, "loss": 0.0692, "step": 13500 }, { "epoch": 0.7990773052581771, "grad_norm": 0.3876625895500183, "learning_rate": 2.980455841295068e-05, "loss": 0.0965, "step": 13510 }, { "epoch": 0.7996687762465251, "grad_norm": 0.3762448728084564, "learning_rate": 2.980409579882622e-05, "loss": 0.1043, "step": 13520 }, { "epoch": 0.8002602472348731, "grad_norm": 0.28309690952301025, "learning_rate": 2.980363264143914e-05, "loss": 0.0809, "step": 13530 }, { "epoch": 0.8008517182232211, "grad_norm": 0.24442128837108612, "learning_rate": 2.9803168940806432e-05, "loss": 0.0725, "step": 13540 }, { "epoch": 0.8014431892115692, "grad_norm": 0.7695870399475098, "learning_rate": 2.9802704696945122e-05, "loss": 0.0819, "step": 13550 }, { "epoch": 0.8020346601999172, "grad_norm": 0.32524189352989197, "learning_rate": 2.980223990987224e-05, "loss": 0.0938, "step": 13560 }, { "epoch": 0.8026261311882652, "grad_norm": 0.37729793787002563, "learning_rate": 2.9801774579604837e-05, "loss": 0.0863, "step": 13570 }, { "epoch": 0.8032176021766132, "grad_norm": 0.21958045661449432, "learning_rate": 2.9801308706160005e-05, "loss": 0.0738, "step": 13580 }, { "epoch": 0.8038090731649613, "grad_norm": 0.34680676460266113, "learning_rate": 2.9800842289554828e-05, "loss": 0.0843, "step": 13590 }, { "epoch": 0.8044005441533093, "grad_norm": 0.5532814860343933, "learning_rate": 2.980037532980642e-05, "loss": 0.0795, "step": 13600 }, { "epoch": 0.8049920151416573, "grad_norm": 0.3468475341796875, "learning_rate": 2.9799907826931922e-05, "loss": 0.1021, "step": 13610 }, { "epoch": 0.8055834861300053, "grad_norm": 0.4738484025001526, "learning_rate": 2.979943978094849e-05, "loss": 0.097, "step": 13620 }, { "epoch": 0.8061749571183533, "grad_norm": 0.34599167108535767, "learning_rate": 2.97989711918733e-05, "loss": 0.0957, "step": 13630 }, { "epoch": 0.8067664281067014, "grad_norm": 0.275051087141037, "learning_rate": 2.9798502059723546e-05, "loss": 0.0861, "step": 13640 }, { "epoch": 0.8073578990950494, "grad_norm": 1.235427737236023, "learning_rate": 2.9798032384516443e-05, "loss": 0.0772, "step": 13650 }, { "epoch": 0.8079493700833974, "grad_norm": 0.33678457140922546, "learning_rate": 2.979756216626923e-05, "loss": 0.0914, "step": 13660 }, { "epoch": 0.8085408410717454, "grad_norm": 0.3089011311531067, "learning_rate": 2.979709140499916e-05, "loss": 0.1092, "step": 13670 }, { "epoch": 0.8091323120600935, "grad_norm": 0.29656746983528137, "learning_rate": 2.9796620100723507e-05, "loss": 0.0872, "step": 13680 }, { "epoch": 0.8097237830484415, "grad_norm": 0.5326535701751709, "learning_rate": 2.9796148253459565e-05, "loss": 0.078, "step": 13690 }, { "epoch": 0.8103152540367895, "grad_norm": 0.45826584100723267, "learning_rate": 2.9795675863224658e-05, "loss": 0.0718, "step": 13700 }, { "epoch": 0.8109067250251375, "grad_norm": 0.2724880576133728, "learning_rate": 2.9795202930036116e-05, "loss": 0.0923, "step": 13710 }, { "epoch": 0.8114981960134855, "grad_norm": 0.5629162192344666, "learning_rate": 2.9794729453911293e-05, "loss": 0.0863, "step": 13720 }, { "epoch": 0.8120896670018336, "grad_norm": 0.4399127960205078, "learning_rate": 2.9794255434867563e-05, "loss": 0.0813, "step": 13730 }, { "epoch": 0.8126811379901816, "grad_norm": 0.2951563894748688, "learning_rate": 2.979378087292232e-05, "loss": 0.0917, "step": 13740 }, { "epoch": 0.8132726089785296, "grad_norm": 8.1080322265625, "learning_rate": 2.9793305768092985e-05, "loss": 0.0663, "step": 13750 }, { "epoch": 0.8138640799668776, "grad_norm": 0.7212384939193726, "learning_rate": 2.979283012039699e-05, "loss": 0.0928, "step": 13760 }, { "epoch": 0.8144555509552256, "grad_norm": 0.5483909845352173, "learning_rate": 2.979235392985179e-05, "loss": 0.0751, "step": 13770 }, { "epoch": 0.8150470219435737, "grad_norm": 0.36493828892707825, "learning_rate": 2.9791877196474854e-05, "loss": 0.1004, "step": 13780 }, { "epoch": 0.8156384929319217, "grad_norm": 0.3167257308959961, "learning_rate": 2.9791399920283687e-05, "loss": 0.101, "step": 13790 }, { "epoch": 0.8162299639202697, "grad_norm": 2.3264503479003906, "learning_rate": 2.9790922101295796e-05, "loss": 0.0814, "step": 13800 }, { "epoch": 0.8168214349086177, "grad_norm": 0.30411410331726074, "learning_rate": 2.9790443739528714e-05, "loss": 0.0852, "step": 13810 }, { "epoch": 0.8174129058969658, "grad_norm": 0.3149624466896057, "learning_rate": 2.9789964835000003e-05, "loss": 0.0949, "step": 13820 }, { "epoch": 0.8180043768853138, "grad_norm": 0.22981634736061096, "learning_rate": 2.978948538772723e-05, "loss": 0.0825, "step": 13830 }, { "epoch": 0.8185958478736618, "grad_norm": 0.45279407501220703, "learning_rate": 2.9789005397727996e-05, "loss": 0.084, "step": 13840 }, { "epoch": 0.8191873188620098, "grad_norm": 0.46744123101234436, "learning_rate": 2.9788524865019905e-05, "loss": 0.0804, "step": 13850 }, { "epoch": 0.8197787898503578, "grad_norm": 0.3238426446914673, "learning_rate": 2.97880437896206e-05, "loss": 0.0968, "step": 13860 }, { "epoch": 0.8203702608387059, "grad_norm": 0.30540838837623596, "learning_rate": 2.9787562171547738e-05, "loss": 0.0928, "step": 13870 }, { "epoch": 0.8209617318270539, "grad_norm": 0.26100340485572815, "learning_rate": 2.9787080010818983e-05, "loss": 0.0715, "step": 13880 }, { "epoch": 0.8215532028154019, "grad_norm": 0.4226730465888977, "learning_rate": 2.9786597307452027e-05, "loss": 0.1016, "step": 13890 }, { "epoch": 0.8221446738037499, "grad_norm": 0.4876885414123535, "learning_rate": 2.9786114061464594e-05, "loss": 0.083, "step": 13900 }, { "epoch": 0.822736144792098, "grad_norm": 0.25160783529281616, "learning_rate": 2.9785630272874412e-05, "loss": 0.1069, "step": 13910 }, { "epoch": 0.823327615780446, "grad_norm": 0.7545141577720642, "learning_rate": 2.9785145941699237e-05, "loss": 0.0976, "step": 13920 }, { "epoch": 0.823919086768794, "grad_norm": 0.61112380027771, "learning_rate": 2.978466106795684e-05, "loss": 0.0855, "step": 13930 }, { "epoch": 0.824510557757142, "grad_norm": 0.37406644225120544, "learning_rate": 2.9784175651665015e-05, "loss": 0.0734, "step": 13940 }, { "epoch": 0.82510202874549, "grad_norm": 0.36684519052505493, "learning_rate": 2.9783689692841577e-05, "loss": 0.0781, "step": 13950 }, { "epoch": 0.8256934997338381, "grad_norm": 0.33311736583709717, "learning_rate": 2.9783203191504352e-05, "loss": 0.1106, "step": 13960 }, { "epoch": 0.8262849707221861, "grad_norm": 0.6469634771347046, "learning_rate": 2.97827161476712e-05, "loss": 0.0991, "step": 13970 }, { "epoch": 0.8268764417105341, "grad_norm": 0.3834339380264282, "learning_rate": 2.9782228561359997e-05, "loss": 0.0849, "step": 13980 }, { "epoch": 0.8274679126988821, "grad_norm": 0.5868173241615295, "learning_rate": 2.9781740432588626e-05, "loss": 0.0884, "step": 13990 }, { "epoch": 0.8280593836872301, "grad_norm": 0.3544082045555115, "learning_rate": 2.978125176137501e-05, "loss": 0.082, "step": 14000 }, { "epoch": 0.8286508546755782, "grad_norm": 0.31641414761543274, "learning_rate": 2.978076254773707e-05, "loss": 0.1025, "step": 14010 }, { "epoch": 0.8292423256639262, "grad_norm": 0.28708720207214355, "learning_rate": 2.978027279169277e-05, "loss": 0.0865, "step": 14020 }, { "epoch": 0.8298337966522742, "grad_norm": 0.4068717956542969, "learning_rate": 2.977978249326008e-05, "loss": 0.0854, "step": 14030 }, { "epoch": 0.8304252676406222, "grad_norm": 0.25783872604370117, "learning_rate": 2.9779291652456987e-05, "loss": 0.0776, "step": 14040 }, { "epoch": 0.8310167386289703, "grad_norm": 0.7573058009147644, "learning_rate": 2.9778800269301505e-05, "loss": 0.0832, "step": 14050 }, { "epoch": 0.8316082096173183, "grad_norm": 0.4542487561702728, "learning_rate": 2.977830834381167e-05, "loss": 0.0963, "step": 14060 }, { "epoch": 0.8321996806056663, "grad_norm": 0.27101388573646545, "learning_rate": 2.9777815876005532e-05, "loss": 0.089, "step": 14070 }, { "epoch": 0.8327911515940143, "grad_norm": 0.5542360544204712, "learning_rate": 2.977732286590116e-05, "loss": 0.0897, "step": 14080 }, { "epoch": 0.8333826225823623, "grad_norm": 0.32935553789138794, "learning_rate": 2.977682931351665e-05, "loss": 0.0843, "step": 14090 }, { "epoch": 0.8339740935707104, "grad_norm": 0.819449245929718, "learning_rate": 2.9776335218870112e-05, "loss": 0.0779, "step": 14100 }, { "epoch": 0.8345655645590584, "grad_norm": 0.4425503611564636, "learning_rate": 2.9775840581979675e-05, "loss": 0.102, "step": 14110 }, { "epoch": 0.8351570355474064, "grad_norm": 0.3773064613342285, "learning_rate": 2.9775345402863495e-05, "loss": 0.09, "step": 14120 }, { "epoch": 0.8357485065357544, "grad_norm": 0.17350701987743378, "learning_rate": 2.9774849681539747e-05, "loss": 0.0753, "step": 14130 }, { "epoch": 0.8363399775241025, "grad_norm": 0.48782604932785034, "learning_rate": 2.977435341802661e-05, "loss": 0.0886, "step": 14140 }, { "epoch": 0.8369314485124505, "grad_norm": 0.36269208788871765, "learning_rate": 2.977385661234231e-05, "loss": 0.0761, "step": 14150 }, { "epoch": 0.8375229195007985, "grad_norm": 0.31839871406555176, "learning_rate": 2.9773359264505065e-05, "loss": 0.0966, "step": 14160 }, { "epoch": 0.8381143904891465, "grad_norm": 0.2523324489593506, "learning_rate": 2.9772861374533135e-05, "loss": 0.0969, "step": 14170 }, { "epoch": 0.8387058614774945, "grad_norm": 0.35008543729782104, "learning_rate": 2.9772362942444784e-05, "loss": 0.0786, "step": 14180 }, { "epoch": 0.8392973324658426, "grad_norm": 0.39622488617897034, "learning_rate": 2.9771863968258308e-05, "loss": 0.0809, "step": 14190 }, { "epoch": 0.8398888034541906, "grad_norm": 0.4629196524620056, "learning_rate": 2.977136445199202e-05, "loss": 0.0725, "step": 14200 }, { "epoch": 0.8404802744425386, "grad_norm": 0.39642035961151123, "learning_rate": 2.9770864393664246e-05, "loss": 0.0979, "step": 14210 }, { "epoch": 0.8410717454308866, "grad_norm": 0.5970670580863953, "learning_rate": 2.9770363793293336e-05, "loss": 0.0923, "step": 14220 }, { "epoch": 0.8416632164192346, "grad_norm": 0.6056641340255737, "learning_rate": 2.976986265089766e-05, "loss": 0.0935, "step": 14230 }, { "epoch": 0.8422546874075827, "grad_norm": 0.8016699552536011, "learning_rate": 2.9769360966495615e-05, "loss": 0.0784, "step": 14240 }, { "epoch": 0.8428461583959307, "grad_norm": 0.36010685563087463, "learning_rate": 2.9768858740105603e-05, "loss": 0.0749, "step": 14250 }, { "epoch": 0.8434376293842787, "grad_norm": 0.5262669920921326, "learning_rate": 2.9768355971746055e-05, "loss": 0.0974, "step": 14260 }, { "epoch": 0.8440291003726267, "grad_norm": 0.2964012920856476, "learning_rate": 2.976785266143543e-05, "loss": 0.0915, "step": 14270 }, { "epoch": 0.8446205713609748, "grad_norm": 0.5375957489013672, "learning_rate": 2.9767348809192187e-05, "loss": 0.1044, "step": 14280 }, { "epoch": 0.8452120423493228, "grad_norm": 0.49140939116477966, "learning_rate": 2.9766844415034822e-05, "loss": 0.0876, "step": 14290 }, { "epoch": 0.8458035133376708, "grad_norm": 0.5515789985656738, "learning_rate": 2.976633947898184e-05, "loss": 0.0768, "step": 14300 }, { "epoch": 0.8463949843260188, "grad_norm": 0.3619690537452698, "learning_rate": 2.976583400105178e-05, "loss": 0.1038, "step": 14310 }, { "epoch": 0.8469864553143668, "grad_norm": 0.24014824628829956, "learning_rate": 2.9765327981263178e-05, "loss": 0.092, "step": 14320 }, { "epoch": 0.8475779263027149, "grad_norm": 0.37947890162467957, "learning_rate": 2.976482141963461e-05, "loss": 0.0984, "step": 14330 }, { "epoch": 0.8481693972910629, "grad_norm": 0.34102949500083923, "learning_rate": 2.9764314316184674e-05, "loss": 0.0737, "step": 14340 }, { "epoch": 0.8487608682794109, "grad_norm": 0.7298864126205444, "learning_rate": 2.976380667093196e-05, "loss": 0.071, "step": 14350 }, { "epoch": 0.8493523392677589, "grad_norm": 0.37882182002067566, "learning_rate": 2.9763298483895115e-05, "loss": 0.1027, "step": 14360 }, { "epoch": 0.849943810256107, "grad_norm": 0.45949587225914, "learning_rate": 2.9762789755092775e-05, "loss": 0.0944, "step": 14370 }, { "epoch": 0.850535281244455, "grad_norm": 0.37264150381088257, "learning_rate": 2.9762280484543615e-05, "loss": 0.0914, "step": 14380 }, { "epoch": 0.851126752232803, "grad_norm": 0.28296536207199097, "learning_rate": 2.9761770672266325e-05, "loss": 0.074, "step": 14390 }, { "epoch": 0.851718223221151, "grad_norm": 0.44830265641212463, "learning_rate": 2.9761260318279606e-05, "loss": 0.0758, "step": 14400 }, { "epoch": 0.852309694209499, "grad_norm": 0.3245091438293457, "learning_rate": 2.976074942260219e-05, "loss": 0.0927, "step": 14410 }, { "epoch": 0.8529011651978471, "grad_norm": 0.27142414450645447, "learning_rate": 2.9760237985252832e-05, "loss": 0.0994, "step": 14420 }, { "epoch": 0.8534926361861951, "grad_norm": 0.42020997405052185, "learning_rate": 2.975972600625029e-05, "loss": 0.0871, "step": 14430 }, { "epoch": 0.8540841071745431, "grad_norm": 0.3311450183391571, "learning_rate": 2.975921348561336e-05, "loss": 0.0849, "step": 14440 }, { "epoch": 0.8546755781628911, "grad_norm": 0.5350584387779236, "learning_rate": 2.9758700423360847e-05, "loss": 0.0818, "step": 14450 }, { "epoch": 0.8552670491512391, "grad_norm": 0.2504926919937134, "learning_rate": 2.9758186819511577e-05, "loss": 0.1021, "step": 14460 }, { "epoch": 0.8558585201395872, "grad_norm": 0.43662840127944946, "learning_rate": 2.9757672674084403e-05, "loss": 0.0944, "step": 14470 }, { "epoch": 0.8564499911279352, "grad_norm": 0.38557472825050354, "learning_rate": 2.975715798709818e-05, "loss": 0.0765, "step": 14480 }, { "epoch": 0.8570414621162832, "grad_norm": 0.2793623208999634, "learning_rate": 2.975664275857181e-05, "loss": 0.0893, "step": 14490 }, { "epoch": 0.8576329331046312, "grad_norm": 0.7109217643737793, "learning_rate": 2.9756126988524186e-05, "loss": 0.0787, "step": 14500 }, { "epoch": 0.8582244040929793, "grad_norm": 0.26603010296821594, "learning_rate": 2.975561067697425e-05, "loss": 0.0811, "step": 14510 }, { "epoch": 0.8588158750813273, "grad_norm": 0.30548202991485596, "learning_rate": 2.975509382394094e-05, "loss": 0.0923, "step": 14520 }, { "epoch": 0.8594073460696753, "grad_norm": 0.32947057485580444, "learning_rate": 2.9754576429443223e-05, "loss": 0.085, "step": 14530 }, { "epoch": 0.8599988170580233, "grad_norm": 0.4050820767879486, "learning_rate": 2.975405849350009e-05, "loss": 0.0838, "step": 14540 }, { "epoch": 0.8605902880463713, "grad_norm": 0.8083544969558716, "learning_rate": 2.975354001613054e-05, "loss": 0.0801, "step": 14550 }, { "epoch": 0.8611817590347194, "grad_norm": 0.4371320307254791, "learning_rate": 2.975302099735361e-05, "loss": 0.098, "step": 14560 }, { "epoch": 0.8617732300230674, "grad_norm": 0.3875666558742523, "learning_rate": 2.975250143718834e-05, "loss": 0.0862, "step": 14570 }, { "epoch": 0.8623647010114154, "grad_norm": 0.31792882084846497, "learning_rate": 2.9751981335653794e-05, "loss": 0.0843, "step": 14580 }, { "epoch": 0.8629561719997634, "grad_norm": 0.3179303705692291, "learning_rate": 2.9751460692769063e-05, "loss": 0.0736, "step": 14590 }, { "epoch": 0.8635476429881115, "grad_norm": 0.4364507794380188, "learning_rate": 2.975093950855325e-05, "loss": 0.0773, "step": 14600 }, { "epoch": 0.8641391139764595, "grad_norm": 0.2329847663640976, "learning_rate": 2.9750417783025482e-05, "loss": 0.0972, "step": 14610 }, { "epoch": 0.8647305849648075, "grad_norm": 0.2940913140773773, "learning_rate": 2.9749895516204905e-05, "loss": 0.0891, "step": 14620 }, { "epoch": 0.8653220559531555, "grad_norm": 0.3385302424430847, "learning_rate": 2.9749372708110684e-05, "loss": 0.0897, "step": 14630 }, { "epoch": 0.8659135269415035, "grad_norm": 0.31890565156936646, "learning_rate": 2.9748849358761997e-05, "loss": 0.0857, "step": 14640 }, { "epoch": 0.8665049979298516, "grad_norm": 0.48810046911239624, "learning_rate": 2.9748325468178067e-05, "loss": 0.0721, "step": 14650 }, { "epoch": 0.8670964689181996, "grad_norm": 0.3175092339515686, "learning_rate": 2.9747801036378103e-05, "loss": 0.1008, "step": 14660 }, { "epoch": 0.8676879399065476, "grad_norm": 2.0456316471099854, "learning_rate": 2.9747276063381358e-05, "loss": 0.0854, "step": 14670 }, { "epoch": 0.8682794108948956, "grad_norm": 2.124727725982666, "learning_rate": 2.9746750549207094e-05, "loss": 0.0947, "step": 14680 }, { "epoch": 0.8688708818832436, "grad_norm": 0.3708743453025818, "learning_rate": 2.9746224493874593e-05, "loss": 0.0786, "step": 14690 }, { "epoch": 0.8694623528715917, "grad_norm": 2.0503218173980713, "learning_rate": 2.9745697897403165e-05, "loss": 0.0756, "step": 14700 }, { "epoch": 0.8700538238599397, "grad_norm": 0.3926321268081665, "learning_rate": 2.9745170759812134e-05, "loss": 0.1006, "step": 14710 }, { "epoch": 0.8706452948482877, "grad_norm": 0.5340031981468201, "learning_rate": 2.9744643081120837e-05, "loss": 0.0882, "step": 14720 }, { "epoch": 0.8712367658366357, "grad_norm": 0.3883570432662964, "learning_rate": 2.9744114861348645e-05, "loss": 0.0854, "step": 14730 }, { "epoch": 0.8718282368249838, "grad_norm": 0.30995944142341614, "learning_rate": 2.974358610051494e-05, "loss": 0.0802, "step": 14740 }, { "epoch": 0.8724197078133318, "grad_norm": 0.41064998507499695, "learning_rate": 2.974305679863913e-05, "loss": 0.0716, "step": 14750 }, { "epoch": 0.8730111788016798, "grad_norm": 0.3474482297897339, "learning_rate": 2.974252695574063e-05, "loss": 0.0887, "step": 14760 }, { "epoch": 0.8736026497900278, "grad_norm": 0.41512617468833923, "learning_rate": 2.9741996571838888e-05, "loss": 0.0958, "step": 14770 }, { "epoch": 0.8741941207783758, "grad_norm": 0.39237460494041443, "learning_rate": 2.974146564695337e-05, "loss": 0.1093, "step": 14780 }, { "epoch": 0.8747855917667239, "grad_norm": 0.3388891816139221, "learning_rate": 2.9740934181103557e-05, "loss": 0.0842, "step": 14790 }, { "epoch": 0.8753770627550719, "grad_norm": 0.36844006180763245, "learning_rate": 2.974040217430895e-05, "loss": 0.0825, "step": 14800 }, { "epoch": 0.8759685337434199, "grad_norm": 0.2869773209095001, "learning_rate": 2.9739869626589077e-05, "loss": 0.1043, "step": 14810 }, { "epoch": 0.8765600047317679, "grad_norm": 0.24903786182403564, "learning_rate": 2.9739336537963473e-05, "loss": 0.0877, "step": 14820 }, { "epoch": 0.877151475720116, "grad_norm": 0.48662057518959045, "learning_rate": 2.9738802908451712e-05, "loss": 0.0858, "step": 14830 }, { "epoch": 0.877742946708464, "grad_norm": 2.979245185852051, "learning_rate": 2.9738268738073365e-05, "loss": 0.0754, "step": 14840 }, { "epoch": 0.878334417696812, "grad_norm": 3.104799747467041, "learning_rate": 2.973773402684804e-05, "loss": 0.137, "step": 14850 }, { "epoch": 0.87892588868516, "grad_norm": 5.693235874176025, "learning_rate": 2.973719877479536e-05, "loss": 0.2191, "step": 14860 }, { "epoch": 0.879517359673508, "grad_norm": 2.3017232418060303, "learning_rate": 2.973666298193496e-05, "loss": 0.2135, "step": 14870 }, { "epoch": 0.8801088306618561, "grad_norm": 1.447304129600525, "learning_rate": 2.973612664828651e-05, "loss": 0.1998, "step": 14880 }, { "epoch": 0.8807003016502041, "grad_norm": 2.841717004776001, "learning_rate": 2.973558977386969e-05, "loss": 0.1792, "step": 14890 }, { "epoch": 0.8812917726385521, "grad_norm": 2.418792486190796, "learning_rate": 2.97350523587042e-05, "loss": 0.1608, "step": 14900 }, { "epoch": 0.8818832436269001, "grad_norm": 2.2788898944854736, "learning_rate": 2.9734514402809764e-05, "loss": 0.1177, "step": 14910 }, { "epoch": 0.8824747146152481, "grad_norm": 0.8657085299491882, "learning_rate": 2.9733975906206115e-05, "loss": 0.1546, "step": 14920 }, { "epoch": 0.8830661856035962, "grad_norm": 1.2277467250823975, "learning_rate": 2.973343686891303e-05, "loss": 0.1682, "step": 14930 }, { "epoch": 0.8836576565919442, "grad_norm": 0.2650769352912903, "learning_rate": 2.973289729095027e-05, "loss": 0.1076, "step": 14940 }, { "epoch": 0.8842491275802922, "grad_norm": 0.5999905467033386, "learning_rate": 2.9732357172337655e-05, "loss": 0.0747, "step": 14950 }, { "epoch": 0.8848405985686402, "grad_norm": 0.38549524545669556, "learning_rate": 2.973181651309499e-05, "loss": 0.0984, "step": 14960 }, { "epoch": 0.8854320695569883, "grad_norm": 0.2434990257024765, "learning_rate": 2.9731275313242124e-05, "loss": 0.0944, "step": 14970 }, { "epoch": 0.8860235405453363, "grad_norm": 0.33561989665031433, "learning_rate": 2.9730733572798916e-05, "loss": 0.0883, "step": 14980 }, { "epoch": 0.8866150115336843, "grad_norm": 0.2185223400592804, "learning_rate": 2.9730191291785243e-05, "loss": 0.0761, "step": 14990 }, { "epoch": 0.8872064825220323, "grad_norm": 1.7384274005889893, "learning_rate": 2.9729648470221012e-05, "loss": 0.0735, "step": 15000 }, { "epoch": 0.8877979535103803, "grad_norm": 0.631160318851471, "learning_rate": 2.9729105108126134e-05, "loss": 0.0959, "step": 15010 }, { "epoch": 0.8883894244987284, "grad_norm": 0.2747505307197571, "learning_rate": 2.9728561205520553e-05, "loss": 0.0954, "step": 15020 }, { "epoch": 0.8889808954870764, "grad_norm": 0.24308983981609344, "learning_rate": 2.972801676242423e-05, "loss": 0.0759, "step": 15030 }, { "epoch": 0.8895723664754244, "grad_norm": 0.2429323047399521, "learning_rate": 2.9727471778857143e-05, "loss": 0.0699, "step": 15040 }, { "epoch": 0.8901638374637724, "grad_norm": 0.566961407661438, "learning_rate": 2.9726926254839287e-05, "loss": 0.0637, "step": 15050 }, { "epoch": 0.8907553084521205, "grad_norm": 0.2557506561279297, "learning_rate": 2.9726380190390688e-05, "loss": 0.0845, "step": 15060 }, { "epoch": 0.8913467794404685, "grad_norm": 0.4596206247806549, "learning_rate": 2.972583358553138e-05, "loss": 0.0926, "step": 15070 }, { "epoch": 0.8919382504288165, "grad_norm": 0.3352380394935608, "learning_rate": 2.9725286440281424e-05, "loss": 0.0818, "step": 15080 }, { "epoch": 0.8925297214171645, "grad_norm": 0.3517421782016754, "learning_rate": 2.9724738754660895e-05, "loss": 0.0838, "step": 15090 }, { "epoch": 0.8931211924055125, "grad_norm": 0.3267073333263397, "learning_rate": 2.97241905286899e-05, "loss": 0.0742, "step": 15100 }, { "epoch": 0.8937126633938606, "grad_norm": 0.3496324121952057, "learning_rate": 2.972364176238855e-05, "loss": 0.1147, "step": 15110 }, { "epoch": 0.8943041343822086, "grad_norm": 0.2357374131679535, "learning_rate": 2.972309245577698e-05, "loss": 0.0867, "step": 15120 }, { "epoch": 0.8948956053705566, "grad_norm": 0.2470654547214508, "learning_rate": 2.972254260887535e-05, "loss": 0.0885, "step": 15130 }, { "epoch": 0.8954870763589046, "grad_norm": 0.2228875309228897, "learning_rate": 2.9721992221703843e-05, "loss": 0.0734, "step": 15140 }, { "epoch": 0.8960785473472526, "grad_norm": 0.32470694184303284, "learning_rate": 2.9721441294282653e-05, "loss": 0.0767, "step": 15150 }, { "epoch": 0.8966700183356007, "grad_norm": 0.40284162759780884, "learning_rate": 2.9720889826631995e-05, "loss": 0.0938, "step": 15160 }, { "epoch": 0.8972614893239487, "grad_norm": 0.32294195890426636, "learning_rate": 2.972033781877211e-05, "loss": 0.0983, "step": 15170 }, { "epoch": 0.8978529603122967, "grad_norm": 0.2692215144634247, "learning_rate": 2.971978527072325e-05, "loss": 0.0853, "step": 15180 }, { "epoch": 0.8984444313006447, "grad_norm": 0.2963016629219055, "learning_rate": 2.97192321825057e-05, "loss": 0.0697, "step": 15190 }, { "epoch": 0.8990359022889928, "grad_norm": 0.770630419254303, "learning_rate": 2.971867855413975e-05, "loss": 0.0702, "step": 15200 }, { "epoch": 0.8996273732773408, "grad_norm": 0.2837944030761719, "learning_rate": 2.9718124385645717e-05, "loss": 0.0942, "step": 15210 }, { "epoch": 0.9002188442656888, "grad_norm": 0.35707059502601624, "learning_rate": 2.9717569677043938e-05, "loss": 0.0839, "step": 15220 }, { "epoch": 0.9008103152540368, "grad_norm": 0.27628156542778015, "learning_rate": 2.971701442835477e-05, "loss": 0.0824, "step": 15230 }, { "epoch": 0.9014017862423848, "grad_norm": 0.294998437166214, "learning_rate": 2.9716458639598584e-05, "loss": 0.0943, "step": 15240 }, { "epoch": 0.9019932572307329, "grad_norm": 0.3922577202320099, "learning_rate": 2.9715902310795782e-05, "loss": 0.0685, "step": 15250 }, { "epoch": 0.9025847282190809, "grad_norm": 0.21817399561405182, "learning_rate": 2.9715345441966775e-05, "loss": 0.0947, "step": 15260 }, { "epoch": 0.9031761992074289, "grad_norm": 0.3238697052001953, "learning_rate": 2.9714788033132005e-05, "loss": 0.1064, "step": 15270 }, { "epoch": 0.9037676701957769, "grad_norm": 0.28602108359336853, "learning_rate": 2.9714230084311918e-05, "loss": 0.08, "step": 15280 }, { "epoch": 0.904359141184125, "grad_norm": 0.28182291984558105, "learning_rate": 2.9713671595526995e-05, "loss": 0.0818, "step": 15290 }, { "epoch": 0.904950612172473, "grad_norm": 0.3718523681163788, "learning_rate": 2.971311256679773e-05, "loss": 0.0753, "step": 15300 }, { "epoch": 0.905542083160821, "grad_norm": 0.35081106424331665, "learning_rate": 2.9712552998144634e-05, "loss": 0.0909, "step": 15310 }, { "epoch": 0.906133554149169, "grad_norm": 0.363253116607666, "learning_rate": 2.9711992889588246e-05, "loss": 0.0827, "step": 15320 }, { "epoch": 0.906725025137517, "grad_norm": 0.2995121479034424, "learning_rate": 2.9711432241149117e-05, "loss": 0.0845, "step": 15330 }, { "epoch": 0.9073164961258651, "grad_norm": 0.17994001507759094, "learning_rate": 2.9710871052847823e-05, "loss": 0.0837, "step": 15340 }, { "epoch": 0.9079079671142131, "grad_norm": 0.2789979875087738, "learning_rate": 2.9710309324704955e-05, "loss": 0.0653, "step": 15350 }, { "epoch": 0.9084994381025611, "grad_norm": 0.2967322766780853, "learning_rate": 2.9709747056741128e-05, "loss": 0.0949, "step": 15360 }, { "epoch": 0.9090909090909091, "grad_norm": 0.24569687247276306, "learning_rate": 2.970918424897698e-05, "loss": 0.0912, "step": 15370 }, { "epoch": 0.9096823800792571, "grad_norm": 0.26315900683403015, "learning_rate": 2.9708620901433154e-05, "loss": 0.0867, "step": 15380 }, { "epoch": 0.9102738510676052, "grad_norm": 0.2086511105298996, "learning_rate": 2.9708057014130336e-05, "loss": 0.0763, "step": 15390 }, { "epoch": 0.9108653220559532, "grad_norm": 0.454714834690094, "learning_rate": 2.9707492587089206e-05, "loss": 0.0708, "step": 15400 }, { "epoch": 0.9114567930443012, "grad_norm": 0.5836544036865234, "learning_rate": 2.9706927620330487e-05, "loss": 0.1017, "step": 15410 }, { "epoch": 0.9120482640326492, "grad_norm": 0.3164951205253601, "learning_rate": 2.9706362113874906e-05, "loss": 0.0923, "step": 15420 }, { "epoch": 0.9126397350209973, "grad_norm": 0.21801935136318207, "learning_rate": 2.9705796067743217e-05, "loss": 0.0853, "step": 15430 }, { "epoch": 0.9132312060093453, "grad_norm": 0.18368077278137207, "learning_rate": 2.970522948195619e-05, "loss": 0.0778, "step": 15440 }, { "epoch": 0.9138226769976933, "grad_norm": 0.2635590732097626, "learning_rate": 2.970466235653462e-05, "loss": 0.0629, "step": 15450 }, { "epoch": 0.9144141479860413, "grad_norm": 0.2920669913291931, "learning_rate": 2.9704094691499313e-05, "loss": 0.1005, "step": 15460 }, { "epoch": 0.9150056189743893, "grad_norm": 0.3646514117717743, "learning_rate": 2.9703526486871108e-05, "loss": 0.0967, "step": 15470 }, { "epoch": 0.9155970899627374, "grad_norm": 0.32024431228637695, "learning_rate": 2.9702957742670854e-05, "loss": 0.0841, "step": 15480 }, { "epoch": 0.9161885609510854, "grad_norm": 0.24406811594963074, "learning_rate": 2.9702388458919418e-05, "loss": 0.076, "step": 15490 }, { "epoch": 0.9167800319394334, "grad_norm": 0.5381653904914856, "learning_rate": 2.9701818635637695e-05, "loss": 0.0765, "step": 15500 }, { "epoch": 0.9173715029277814, "grad_norm": 0.43756312131881714, "learning_rate": 2.9701248272846596e-05, "loss": 0.0932, "step": 15510 }, { "epoch": 0.9179629739161295, "grad_norm": 0.3177310824394226, "learning_rate": 2.9700677370567047e-05, "loss": 0.0957, "step": 15520 }, { "epoch": 0.9185544449044775, "grad_norm": 0.47775325179100037, "learning_rate": 2.9700105928820003e-05, "loss": 0.0893, "step": 15530 }, { "epoch": 0.9191459158928255, "grad_norm": 0.38157081604003906, "learning_rate": 2.9699533947626434e-05, "loss": 0.0955, "step": 15540 }, { "epoch": 0.9197373868811735, "grad_norm": 0.28308337926864624, "learning_rate": 2.9698961427007325e-05, "loss": 0.0638, "step": 15550 }, { "epoch": 0.9203288578695215, "grad_norm": 0.25611573457717896, "learning_rate": 2.9698388366983686e-05, "loss": 0.0913, "step": 15560 }, { "epoch": 0.9209203288578696, "grad_norm": 0.26076480746269226, "learning_rate": 2.9697814767576555e-05, "loss": 0.0828, "step": 15570 }, { "epoch": 0.9215117998462176, "grad_norm": 0.193716898560524, "learning_rate": 2.9697240628806974e-05, "loss": 0.0799, "step": 15580 }, { "epoch": 0.9221032708345656, "grad_norm": 0.44423773884773254, "learning_rate": 2.969666595069601e-05, "loss": 0.0838, "step": 15590 }, { "epoch": 0.9226947418229136, "grad_norm": 0.8228429555892944, "learning_rate": 2.969609073326476e-05, "loss": 0.084, "step": 15600 }, { "epoch": 0.9232862128112616, "grad_norm": 0.31759175658226013, "learning_rate": 2.9695514976534326e-05, "loss": 0.1181, "step": 15610 }, { "epoch": 0.9238776837996097, "grad_norm": 0.26686879992485046, "learning_rate": 2.969493868052584e-05, "loss": 0.0956, "step": 15620 }, { "epoch": 0.9244691547879577, "grad_norm": 0.34230855107307434, "learning_rate": 2.9694361845260448e-05, "loss": 0.0988, "step": 15630 }, { "epoch": 0.9250606257763057, "grad_norm": 0.40530312061309814, "learning_rate": 2.9693784470759316e-05, "loss": 0.0868, "step": 15640 }, { "epoch": 0.9256520967646537, "grad_norm": 0.3631957471370697, "learning_rate": 2.9693206557043637e-05, "loss": 0.0743, "step": 15650 }, { "epoch": 0.9262435677530018, "grad_norm": 0.2552196681499481, "learning_rate": 2.9692628104134618e-05, "loss": 0.0932, "step": 15660 }, { "epoch": 0.9268350387413498, "grad_norm": 0.3616819679737091, "learning_rate": 2.9692049112053477e-05, "loss": 0.0944, "step": 15670 }, { "epoch": 0.9274265097296978, "grad_norm": 0.26803603768348694, "learning_rate": 2.9691469580821477e-05, "loss": 0.0779, "step": 15680 }, { "epoch": 0.9280179807180458, "grad_norm": 0.22110195457935333, "learning_rate": 2.969088951045987e-05, "loss": 0.0741, "step": 15690 }, { "epoch": 0.9286094517063938, "grad_norm": 0.31091389060020447, "learning_rate": 2.9690308900989957e-05, "loss": 0.0776, "step": 15700 }, { "epoch": 0.9292009226947419, "grad_norm": 0.26942211389541626, "learning_rate": 2.9689727752433023e-05, "loss": 0.0846, "step": 15710 }, { "epoch": 0.9297923936830899, "grad_norm": 0.3888183534145355, "learning_rate": 2.968914606481042e-05, "loss": 0.0968, "step": 15720 }, { "epoch": 0.9303838646714379, "grad_norm": 0.6223667860031128, "learning_rate": 2.9688563838143477e-05, "loss": 0.09, "step": 15730 }, { "epoch": 0.9309753356597859, "grad_norm": 0.30812063813209534, "learning_rate": 2.9687981072453564e-05, "loss": 0.0851, "step": 15740 }, { "epoch": 0.931566806648134, "grad_norm": 0.5528847575187683, "learning_rate": 2.968739776776207e-05, "loss": 0.0775, "step": 15750 }, { "epoch": 0.932158277636482, "grad_norm": 0.2383166402578354, "learning_rate": 2.96868139240904e-05, "loss": 0.0831, "step": 15760 }, { "epoch": 0.93274974862483, "grad_norm": 0.31954437494277954, "learning_rate": 2.9686229541459974e-05, "loss": 0.0909, "step": 15770 }, { "epoch": 0.933341219613178, "grad_norm": 0.35306504368782043, "learning_rate": 2.968564461989224e-05, "loss": 0.0931, "step": 15780 }, { "epoch": 0.933932690601526, "grad_norm": 0.26876962184906006, "learning_rate": 2.968505915940866e-05, "loss": 0.0794, "step": 15790 }, { "epoch": 0.9345241615898741, "grad_norm": 0.19640454649925232, "learning_rate": 2.9684473160030724e-05, "loss": 0.0723, "step": 15800 }, { "epoch": 0.9351156325782221, "grad_norm": 0.3621252179145813, "learning_rate": 2.9683886621779938e-05, "loss": 0.0896, "step": 15810 }, { "epoch": 0.93570710356657, "grad_norm": 0.31867483258247375, "learning_rate": 2.9683299544677816e-05, "loss": 0.0931, "step": 15820 }, { "epoch": 0.936298574554918, "grad_norm": 0.22268182039260864, "learning_rate": 2.9682711928745907e-05, "loss": 0.0889, "step": 15830 }, { "epoch": 0.936890045543266, "grad_norm": 0.42104026675224304, "learning_rate": 2.9682123774005776e-05, "loss": 0.0749, "step": 15840 }, { "epoch": 0.9374815165316142, "grad_norm": 0.3210522532463074, "learning_rate": 2.968153508047901e-05, "loss": 0.0648, "step": 15850 }, { "epoch": 0.9380729875199622, "grad_norm": 0.34925293922424316, "learning_rate": 2.96809458481872e-05, "loss": 0.1036, "step": 15860 }, { "epoch": 0.9386644585083102, "grad_norm": 0.3454207479953766, "learning_rate": 2.968035607715198e-05, "loss": 0.0879, "step": 15870 }, { "epoch": 0.9392559294966581, "grad_norm": 0.20312580466270447, "learning_rate": 2.9679765767394992e-05, "loss": 0.095, "step": 15880 }, { "epoch": 0.9398474004850063, "grad_norm": 0.23739632964134216, "learning_rate": 2.967917491893789e-05, "loss": 0.0795, "step": 15890 }, { "epoch": 0.9404388714733543, "grad_norm": 0.3681986331939697, "learning_rate": 2.967858353180237e-05, "loss": 0.0847, "step": 15900 }, { "epoch": 0.9410303424617023, "grad_norm": 0.3488328754901886, "learning_rate": 2.9677991606010117e-05, "loss": 0.0993, "step": 15910 }, { "epoch": 0.9416218134500502, "grad_norm": 0.3367461562156677, "learning_rate": 2.9677399141582864e-05, "loss": 0.0952, "step": 15920 }, { "epoch": 0.9422132844383982, "grad_norm": 0.24016602337360382, "learning_rate": 2.967680613854235e-05, "loss": 0.0831, "step": 15930 }, { "epoch": 0.9428047554267464, "grad_norm": 0.2071119099855423, "learning_rate": 2.967621259691034e-05, "loss": 0.0774, "step": 15940 }, { "epoch": 0.9433962264150944, "grad_norm": 0.1801297664642334, "learning_rate": 2.9675618516708607e-05, "loss": 0.0864, "step": 15950 }, { "epoch": 0.9439876974034423, "grad_norm": 0.37007465958595276, "learning_rate": 2.9675023897958963e-05, "loss": 0.0873, "step": 15960 }, { "epoch": 0.9445791683917903, "grad_norm": 0.24327093362808228, "learning_rate": 2.9674428740683216e-05, "loss": 0.0879, "step": 15970 }, { "epoch": 0.9451706393801385, "grad_norm": 0.21850304305553436, "learning_rate": 2.9673833044903216e-05, "loss": 0.0816, "step": 15980 }, { "epoch": 0.9457621103684865, "grad_norm": 28.03598403930664, "learning_rate": 2.9673236810640815e-05, "loss": 0.0792, "step": 15990 }, { "epoch": 0.9463535813568345, "grad_norm": 0.3002605438232422, "learning_rate": 2.96726400379179e-05, "loss": 0.0689, "step": 16000 }, { "epoch": 0.9469450523451824, "grad_norm": 0.5247543454170227, "learning_rate": 2.967204272675637e-05, "loss": 0.0891, "step": 16010 }, { "epoch": 0.9475365233335304, "grad_norm": 0.27450984716415405, "learning_rate": 2.967144487717814e-05, "loss": 0.0911, "step": 16020 }, { "epoch": 0.9481279943218786, "grad_norm": 0.3501132130622864, "learning_rate": 2.9670846489205152e-05, "loss": 0.0876, "step": 16030 }, { "epoch": 0.9487194653102266, "grad_norm": 0.22981517016887665, "learning_rate": 2.967024756285936e-05, "loss": 0.071, "step": 16040 }, { "epoch": 0.9493109362985745, "grad_norm": 0.3691391348838806, "learning_rate": 2.9669648098162753e-05, "loss": 0.0802, "step": 16050 }, { "epoch": 0.9499024072869225, "grad_norm": 0.33414024114608765, "learning_rate": 2.9669048095137322e-05, "loss": 0.0912, "step": 16060 }, { "epoch": 0.9504938782752705, "grad_norm": 0.4192345142364502, "learning_rate": 2.9668447553805087e-05, "loss": 0.0773, "step": 16070 }, { "epoch": 0.9510853492636187, "grad_norm": 0.4886787533760071, "learning_rate": 2.966784647418809e-05, "loss": 0.079, "step": 16080 }, { "epoch": 0.9516768202519666, "grad_norm": 0.24452680349349976, "learning_rate": 2.9667244856308375e-05, "loss": 0.0801, "step": 16090 }, { "epoch": 0.9522682912403146, "grad_norm": 0.32086899876594543, "learning_rate": 2.9666642700188032e-05, "loss": 0.0782, "step": 16100 }, { "epoch": 0.9528597622286626, "grad_norm": 0.3940134644508362, "learning_rate": 2.966604000584916e-05, "loss": 0.0918, "step": 16110 }, { "epoch": 0.9534512332170108, "grad_norm": 0.2743614912033081, "learning_rate": 2.9665436773313865e-05, "loss": 0.0807, "step": 16120 }, { "epoch": 0.9540427042053587, "grad_norm": 0.5194355249404907, "learning_rate": 2.9664833002604292e-05, "loss": 0.0813, "step": 16130 }, { "epoch": 0.9546341751937067, "grad_norm": 0.2562077045440674, "learning_rate": 2.9664228693742596e-05, "loss": 0.0726, "step": 16140 }, { "epoch": 0.9552256461820547, "grad_norm": 0.3376345634460449, "learning_rate": 2.9663623846750948e-05, "loss": 0.0685, "step": 16150 }, { "epoch": 0.9558171171704027, "grad_norm": 0.20224232971668243, "learning_rate": 2.9663018461651553e-05, "loss": 0.1001, "step": 16160 }, { "epoch": 0.9564085881587508, "grad_norm": 0.3213803470134735, "learning_rate": 2.966241253846662e-05, "loss": 0.0853, "step": 16170 }, { "epoch": 0.9570000591470988, "grad_norm": 0.27701568603515625, "learning_rate": 2.9661806077218384e-05, "loss": 0.0921, "step": 16180 }, { "epoch": 0.9575915301354468, "grad_norm": 0.3409293293952942, "learning_rate": 2.9661199077929102e-05, "loss": 0.0846, "step": 16190 }, { "epoch": 0.9581830011237948, "grad_norm": 0.24152979254722595, "learning_rate": 2.9660591540621053e-05, "loss": 0.0746, "step": 16200 }, { "epoch": 0.958774472112143, "grad_norm": 0.21405795216560364, "learning_rate": 2.965998346531653e-05, "loss": 0.0869, "step": 16210 }, { "epoch": 0.959365943100491, "grad_norm": 0.462589830160141, "learning_rate": 2.965937485203784e-05, "loss": 0.0819, "step": 16220 }, { "epoch": 0.959957414088839, "grad_norm": 0.29476287961006165, "learning_rate": 2.9658765700807324e-05, "loss": 0.0884, "step": 16230 }, { "epoch": 0.9605488850771869, "grad_norm": 0.27643194794654846, "learning_rate": 2.9658156011647333e-05, "loss": 0.076, "step": 16240 }, { "epoch": 0.9611403560655349, "grad_norm": 0.19322308897972107, "learning_rate": 2.9657545784580244e-05, "loss": 0.0637, "step": 16250 }, { "epoch": 0.961731827053883, "grad_norm": 0.23862324655056, "learning_rate": 2.965693501962845e-05, "loss": 0.0927, "step": 16260 }, { "epoch": 0.962323298042231, "grad_norm": 0.25705254077911377, "learning_rate": 2.965632371681436e-05, "loss": 0.0895, "step": 16270 }, { "epoch": 0.962914769030579, "grad_norm": 0.4542146623134613, "learning_rate": 2.9655711876160413e-05, "loss": 0.0865, "step": 16280 }, { "epoch": 0.963506240018927, "grad_norm": 0.22849732637405396, "learning_rate": 2.9655099497689052e-05, "loss": 0.074, "step": 16290 }, { "epoch": 0.964097711007275, "grad_norm": 0.6494603753089905, "learning_rate": 2.9654486581422758e-05, "loss": 0.0725, "step": 16300 }, { "epoch": 0.9646891819956231, "grad_norm": 0.21200565993785858, "learning_rate": 2.9653873127384025e-05, "loss": 0.097, "step": 16310 }, { "epoch": 0.9652806529839711, "grad_norm": 0.30805283784866333, "learning_rate": 2.9653259135595356e-05, "loss": 0.0881, "step": 16320 }, { "epoch": 0.9658721239723191, "grad_norm": 0.22168399393558502, "learning_rate": 2.9652644606079286e-05, "loss": 0.0853, "step": 16330 }, { "epoch": 0.9664635949606671, "grad_norm": 0.23300854861736298, "learning_rate": 2.9652029538858366e-05, "loss": 0.0677, "step": 16340 }, { "epoch": 0.9670550659490152, "grad_norm": 0.28344836831092834, "learning_rate": 2.965141393395517e-05, "loss": 0.0632, "step": 16350 }, { "epoch": 0.9676465369373632, "grad_norm": 0.4148576855659485, "learning_rate": 2.9650797791392287e-05, "loss": 0.1031, "step": 16360 }, { "epoch": 0.9682380079257112, "grad_norm": 0.24495205283164978, "learning_rate": 2.9650181111192327e-05, "loss": 0.0984, "step": 16370 }, { "epoch": 0.9688294789140592, "grad_norm": 0.2204698622226715, "learning_rate": 2.9649563893377918e-05, "loss": 0.0926, "step": 16380 }, { "epoch": 0.9694209499024072, "grad_norm": 0.2821146845817566, "learning_rate": 2.9648946137971715e-05, "loss": 0.0802, "step": 16390 }, { "epoch": 0.9700124208907553, "grad_norm": 0.3368695080280304, "learning_rate": 2.9648327844996385e-05, "loss": 0.0814, "step": 16400 }, { "epoch": 0.9706038918791033, "grad_norm": 0.2466294914484024, "learning_rate": 2.9647709014474613e-05, "loss": 0.0947, "step": 16410 }, { "epoch": 0.9711953628674513, "grad_norm": 0.4389761686325073, "learning_rate": 2.9647089646429114e-05, "loss": 0.0879, "step": 16420 }, { "epoch": 0.9717868338557993, "grad_norm": 0.16201452910900116, "learning_rate": 2.964646974088262e-05, "loss": 0.073, "step": 16430 }, { "epoch": 0.9723783048441474, "grad_norm": 0.20987403392791748, "learning_rate": 2.9645849297857866e-05, "loss": 0.085, "step": 16440 }, { "epoch": 0.9729697758324954, "grad_norm": 0.6072264909744263, "learning_rate": 2.964522831737763e-05, "loss": 0.084, "step": 16450 }, { "epoch": 0.9735612468208434, "grad_norm": 0.34316617250442505, "learning_rate": 2.9644606799464702e-05, "loss": 0.1117, "step": 16460 }, { "epoch": 0.9741527178091914, "grad_norm": 0.3072223961353302, "learning_rate": 2.964398474414189e-05, "loss": 0.1084, "step": 16470 }, { "epoch": 0.9747441887975394, "grad_norm": 0.29986873269081116, "learning_rate": 2.9643362151432012e-05, "loss": 0.0935, "step": 16480 }, { "epoch": 0.9753356597858875, "grad_norm": 0.25069767236709595, "learning_rate": 2.9642739021357922e-05, "loss": 0.0814, "step": 16490 }, { "epoch": 0.9759271307742355, "grad_norm": 0.2861212491989136, "learning_rate": 2.9642115353942488e-05, "loss": 0.0659, "step": 16500 }, { "epoch": 0.9765186017625835, "grad_norm": 0.26804113388061523, "learning_rate": 2.9641491149208594e-05, "loss": 0.0928, "step": 16510 }, { "epoch": 0.9771100727509315, "grad_norm": 0.4552845358848572, "learning_rate": 2.964086640717915e-05, "loss": 0.096, "step": 16520 }, { "epoch": 0.9777015437392796, "grad_norm": 0.2577589154243469, "learning_rate": 2.9640241127877072e-05, "loss": 0.0917, "step": 16530 }, { "epoch": 0.9782930147276276, "grad_norm": 0.22550149261951447, "learning_rate": 2.9639615311325314e-05, "loss": 0.0734, "step": 16540 }, { "epoch": 0.9788844857159756, "grad_norm": 0.3499772548675537, "learning_rate": 2.9638988957546843e-05, "loss": 0.0826, "step": 16550 }, { "epoch": 0.9794759567043236, "grad_norm": 0.3152664303779602, "learning_rate": 2.9638362066564643e-05, "loss": 0.0934, "step": 16560 }, { "epoch": 0.9800674276926716, "grad_norm": 0.28695136308670044, "learning_rate": 2.963773463840171e-05, "loss": 0.0927, "step": 16570 }, { "epoch": 0.9806588986810197, "grad_norm": 0.2380322962999344, "learning_rate": 2.9637106673081082e-05, "loss": 0.0939, "step": 16580 }, { "epoch": 0.9812503696693677, "grad_norm": 0.2392168641090393, "learning_rate": 2.9636478170625795e-05, "loss": 0.0841, "step": 16590 }, { "epoch": 0.9818418406577157, "grad_norm": 0.2662017047405243, "learning_rate": 2.9635849131058913e-05, "loss": 0.0662, "step": 16600 }, { "epoch": 0.9824333116460637, "grad_norm": 0.2979264259338379, "learning_rate": 2.9635219554403524e-05, "loss": 0.1084, "step": 16610 }, { "epoch": 0.9830247826344117, "grad_norm": 0.2892182469367981, "learning_rate": 2.963458944068273e-05, "loss": 0.0952, "step": 16620 }, { "epoch": 0.9836162536227598, "grad_norm": 0.29324063658714294, "learning_rate": 2.9633958789919653e-05, "loss": 0.0798, "step": 16630 }, { "epoch": 0.9842077246111078, "grad_norm": 0.23308831453323364, "learning_rate": 2.9633327602137433e-05, "loss": 0.0677, "step": 16640 }, { "epoch": 0.9847991955994558, "grad_norm": 0.33434855937957764, "learning_rate": 2.963269587735924e-05, "loss": 0.0876, "step": 16650 }, { "epoch": 0.9853906665878038, "grad_norm": 0.28692811727523804, "learning_rate": 2.9632063615608248e-05, "loss": 0.0933, "step": 16660 }, { "epoch": 0.9859821375761519, "grad_norm": 0.25616034865379333, "learning_rate": 2.9631430816907662e-05, "loss": 0.0975, "step": 16670 }, { "epoch": 0.9865736085644999, "grad_norm": 0.373921275138855, "learning_rate": 2.963079748128071e-05, "loss": 0.0911, "step": 16680 }, { "epoch": 0.9871650795528479, "grad_norm": 0.19814570248126984, "learning_rate": 2.9630163608750624e-05, "loss": 0.0813, "step": 16690 }, { "epoch": 0.9877565505411959, "grad_norm": 0.30185142159461975, "learning_rate": 2.962952919934067e-05, "loss": 0.071, "step": 16700 }, { "epoch": 0.9883480215295439, "grad_norm": 0.2776850461959839, "learning_rate": 2.9628894253074133e-05, "loss": 0.1024, "step": 16710 }, { "epoch": 0.988939492517892, "grad_norm": 0.26387250423431396, "learning_rate": 2.9628258769974302e-05, "loss": 0.0905, "step": 16720 }, { "epoch": 0.98953096350624, "grad_norm": 0.34088224172592163, "learning_rate": 2.9627622750064505e-05, "loss": 0.0975, "step": 16730 }, { "epoch": 0.990122434494588, "grad_norm": 0.2585127353668213, "learning_rate": 2.962698619336808e-05, "loss": 0.0707, "step": 16740 }, { "epoch": 0.990713905482936, "grad_norm": 0.38319113850593567, "learning_rate": 2.9626349099908385e-05, "loss": 0.0765, "step": 16750 }, { "epoch": 0.9913053764712841, "grad_norm": 0.2967897355556488, "learning_rate": 2.9625711469708806e-05, "loss": 0.0944, "step": 16760 }, { "epoch": 0.9918968474596321, "grad_norm": 0.23643963038921356, "learning_rate": 2.9625073302792735e-05, "loss": 0.0773, "step": 16770 }, { "epoch": 0.9924883184479801, "grad_norm": 0.24326668679714203, "learning_rate": 2.962443459918359e-05, "loss": 0.0802, "step": 16780 }, { "epoch": 0.9930797894363281, "grad_norm": 0.3119269013404846, "learning_rate": 2.962379535890482e-05, "loss": 0.0907, "step": 16790 }, { "epoch": 0.9936712604246761, "grad_norm": 0.38082417845726013, "learning_rate": 2.962315558197987e-05, "loss": 0.0775, "step": 16800 }, { "epoch": 0.9942627314130242, "grad_norm": 0.24004894495010376, "learning_rate": 2.962251526843222e-05, "loss": 0.0875, "step": 16810 }, { "epoch": 0.9948542024013722, "grad_norm": 0.28469404578208923, "learning_rate": 2.9621874418285375e-05, "loss": 0.0845, "step": 16820 }, { "epoch": 0.9954456733897202, "grad_norm": 0.20995399355888367, "learning_rate": 2.9621233031562847e-05, "loss": 0.0735, "step": 16830 }, { "epoch": 0.9960371443780682, "grad_norm": 0.4911569356918335, "learning_rate": 2.9620591108288173e-05, "loss": 0.0754, "step": 16840 }, { "epoch": 0.9966286153664162, "grad_norm": 0.24788491427898407, "learning_rate": 2.961994864848491e-05, "loss": 0.0677, "step": 16850 }, { "epoch": 0.9972200863547643, "grad_norm": 0.3711113929748535, "learning_rate": 2.9619305652176633e-05, "loss": 0.0956, "step": 16860 }, { "epoch": 0.9978115573431123, "grad_norm": 0.3531755805015564, "learning_rate": 2.961866211938694e-05, "loss": 0.0832, "step": 16870 }, { "epoch": 0.9984030283314603, "grad_norm": 0.18299508094787598, "learning_rate": 2.9618018050139447e-05, "loss": 0.0742, "step": 16880 }, { "epoch": 0.9989944993198083, "grad_norm": 0.3496200442314148, "learning_rate": 2.9617373444457786e-05, "loss": 0.0775, "step": 16890 }, { "epoch": 0.9995859703081564, "grad_norm": 0.21856151521205902, "learning_rate": 2.9616728302365616e-05, "loss": 0.0661, "step": 16900 }, { "epoch": 1.0, "eval_accuracy": 0.6523937851415643, "eval_animal_abuse/accuracy": 0.9943440795821273, "eval_animal_abuse/f1": 0.7635605006954103, "eval_animal_abuse/fpr": 0.0033823578904856404, "eval_animal_abuse/precision": 0.732, "eval_animal_abuse/recall": 0.7979651162790697, "eval_animal_abuse/threshold": 0.2991051971912384, "eval_child_abuse/accuracy": 0.9964733672688558, "eval_child_abuse/f1": 0.6697819314641744, "eval_child_abuse/fpr": 0.0015724059483782447, "eval_child_abuse/precision": 0.6957928802588996, "eval_child_abuse/recall": 0.6456456456456456, "eval_child_abuse/threshold": 0.49900051951408386, "eval_controversial_topics,politics/accuracy": 0.9667797850750242, "eval_controversial_topics,politics/f1": 0.4950695322376738, "eval_controversial_topics,politics/fpr": 0.019460461285008203, "eval_controversial_topics,politics/precision": 0.4633222905821107, "eval_controversial_topics,politics/recall": 0.5314875135722041, "eval_controversial_topics,politics/threshold": 0.4073334038257599, "eval_discrimination,stereotype,injustice/accuracy": 0.9488638253984097, "eval_discrimination,stereotype,injustice/f1": 0.706847224871257, "eval_discrimination,stereotype,injustice/fpr": 0.036144142841652446, "eval_discrimination,stereotype,injustice/precision": 0.6494917630564319, "eval_discrimination,stereotype,injustice/recall": 0.7753138075313808, "eval_discrimination,stereotype,injustice/threshold": 0.23231014609336853, "eval_drug_abuse,weapons,banned_substance/accuracy": 0.9726685963336328, "eval_drug_abuse,weapons,banned_substance/f1": 0.7636311322111926, "eval_drug_abuse,weapons,banned_substance/fpr": 0.01605908898603861, "eval_drug_abuse,weapons,banned_substance/precision": 0.7444600280504908, "eval_drug_abuse,weapons,banned_substance/recall": 0.7838157117542823, "eval_drug_abuse,weapons,banned_substance/threshold": 0.47962552309036255, "eval_financial_crime,property_crime,theft/accuracy": 0.9579299331270585, "eval_financial_crime,property_crime,theft/f1": 0.7946740277665015, "eval_financial_crime,property_crime,theft/fpr": 0.028970016401599564, "eval_financial_crime,property_crime,theft/precision": 0.7568821527992576, "eval_financial_crime,property_crime,theft/recall": 0.8364382156896257, "eval_financial_crime,property_crime,theft/threshold": 0.36206647753715515, "eval_flagged/accuracy": 0.8380576903882623, "eval_flagged/aucpr": 0.891947458956023, "eval_flagged/f1": 0.858220584594323, "eval_flagged/fpr": 0.2155796422008018, "eval_flagged/precision": 0.8367601953879359, "eval_flagged/recall": 0.8808107380945264, "eval_hate_speech,offensive_language/accuracy": 0.9481485178161493, "eval_hate_speech,offensive_language/f1": 0.692997143701369, "eval_hate_speech,offensive_language/fpr": 0.022857664900420203, "eval_hate_speech,offensive_language/precision": 0.7376808555252673, "eval_hate_speech,offensive_language/recall": 0.6534175334323923, "eval_hate_speech,offensive_language/threshold": 0.3711366057395935, "eval_loss": 0.08566619455814362, "eval_macro_f1": 0.6585203378827451, "eval_macro_precision": 0.6442850192339123, "eval_macro_recall": 0.6802716311190036, "eval_micro_f1": 0.7396906541974544, "eval_micro_precision": 0.7260412385258255, "eval_micro_recall": 0.7538631150310294, "eval_misinformation_regarding_ethics,laws_and_safety/accuracy": 0.9761785940047244, "eval_misinformation_regarding_ethics,laws_and_safety/f1": 0.20532741398446172, "eval_misinformation_regarding_ethics,laws_and_safety/fpr": 0.0149200949766768, "eval_misinformation_regarding_ethics,laws_and_safety/precision": 0.17273576097105509, "eval_misinformation_regarding_ethics,laws_and_safety/recall": 0.253077975376197, "eval_misinformation_regarding_ethics,laws_and_safety/threshold": 0.16132023930549622, "eval_non_violent_unethical_behavior/accuracy": 0.8791296536580497, "eval_non_violent_unethical_behavior/f1": 0.6854817764695698, "eval_non_violent_unethical_behavior/fpr": 0.06722305264906148, "eval_non_violent_unethical_behavior/precision": 0.7097525994980279, "eval_non_violent_unethical_behavior/recall": 0.6628160053574418, "eval_non_violent_unethical_behavior/threshold": 0.3951341509819031, "eval_privacy_violation/accuracy": 0.97945570083508, "eval_privacy_violation/f1": 0.8035003977724742, "eval_privacy_violation/fpr": 0.013893749562539347, "eval_privacy_violation/precision": 0.7607713166616451, "eval_privacy_violation/recall": 0.8513149022252191, "eval_privacy_violation/threshold": 0.4525359272956848, "eval_runtime": 90.3218, "eval_samples_per_second": 665.554, "eval_self_harm/accuracy": 0.9965731776291712, "eval_self_harm/f1": 0.726063829787234, "eval_self_harm/fpr": 0.0011557014605386555, "eval_self_harm/precision": 0.7982456140350878, "eval_self_harm/recall": 0.6658536585365854, "eval_self_harm/threshold": 0.6766199469566345, "eval_sexually_explicit,adult_content/accuracy": 0.9831653192268024, "eval_sexually_explicit,adult_content/f1": 0.6673241288625904, "eval_sexually_explicit,adult_content/fpr": 0.009886307464162119, "eval_sexually_explicit,adult_content/precision": 0.6363636363636364, "eval_sexually_explicit,adult_content/recall": 0.7014512785072564, "eval_sexually_explicit,adult_content/threshold": 0.28378137946128845, "eval_steps_per_second": 41.607, "eval_terrorism,organized_crime/accuracy": 0.9878397711015737, "eval_terrorism,organized_crime/f1": 0.4003281378178835, "eval_terrorism,organized_crime/fpr": 0.008284003823386366, "eval_terrorism,organized_crime/precision": 0.33062330623306235, "eval_terrorism,organized_crime/recall": 0.5072765072765073, "eval_terrorism,organized_crime/threshold": 0.2030746191740036, "eval_violence,aiding_and_abetting,incitement/accuracy": 0.9160761220348006, "eval_violence,aiding_and_abetting,incitement/f1": 0.8446975527166385, "eval_violence,aiding_and_abetting,incitement/fpr": 0.06284846561805887, "eval_violence,aiding_and_abetting,incitement/precision": 0.8318680652397987, "eval_violence,aiding_and_abetting,incitement/recall": 0.8579289644822411, "eval_violence,aiding_and_abetting,incitement/threshold": 0.4030968248844147, "step": 16907 }, { "epoch": 1.0001774412965043, "grad_norm": 0.24168743193149567, "learning_rate": 2.961608262388661e-05, "loss": 0.0869, "step": 16910 }, { "epoch": 1.0007689122848524, "grad_norm": 0.3728097379207611, "learning_rate": 2.9615436409044458e-05, "loss": 0.0913, "step": 16920 }, { "epoch": 1.0013603832732005, "grad_norm": 0.4404839277267456, "learning_rate": 2.961478965786288e-05, "loss": 0.0855, "step": 16930 }, { "epoch": 1.0019518542615484, "grad_norm": 0.30489930510520935, "learning_rate": 2.9614142370365607e-05, "loss": 0.0802, "step": 16940 }, { "epoch": 1.0025433252498965, "grad_norm": 0.30829858779907227, "learning_rate": 2.9613494546576396e-05, "loss": 0.0757, "step": 16950 }, { "epoch": 1.0031347962382444, "grad_norm": 1.0674141645431519, "learning_rate": 2.961284618651901e-05, "loss": 0.066, "step": 16960 }, { "epoch": 1.0037262672265925, "grad_norm": 0.2531295716762543, "learning_rate": 2.9612197290217255e-05, "loss": 0.0941, "step": 16970 }, { "epoch": 1.0043177382149406, "grad_norm": 0.28287559747695923, "learning_rate": 2.9611547857694935e-05, "loss": 0.0894, "step": 16980 }, { "epoch": 1.0049092092032885, "grad_norm": 0.29789769649505615, "learning_rate": 2.9610897888975884e-05, "loss": 0.0754, "step": 16990 }, { "epoch": 1.0055006801916366, "grad_norm": 0.4699673652648926, "learning_rate": 2.9610247384083954e-05, "loss": 0.0759, "step": 17000 }, { "epoch": 1.0060921511799845, "grad_norm": 0.3208353519439697, "learning_rate": 2.960959634304302e-05, "loss": 0.0771, "step": 17010 }, { "epoch": 1.0066836221683326, "grad_norm": 0.28807389736175537, "learning_rate": 2.9608944765876963e-05, "loss": 0.1007, "step": 17020 }, { "epoch": 1.0072750931566807, "grad_norm": 0.2635231614112854, "learning_rate": 2.9608292652609704e-05, "loss": 0.0822, "step": 17030 }, { "epoch": 1.0078665641450286, "grad_norm": 0.33043619990348816, "learning_rate": 2.960764000326517e-05, "loss": 0.0786, "step": 17040 }, { "epoch": 1.0084580351333767, "grad_norm": 0.30759310722351074, "learning_rate": 2.9606986817867304e-05, "loss": 0.0758, "step": 17050 }, { "epoch": 1.0090495061217248, "grad_norm": 0.6840531826019287, "learning_rate": 2.960633309644009e-05, "loss": 0.0735, "step": 17060 }, { "epoch": 1.0096409771100727, "grad_norm": 0.3298848867416382, "learning_rate": 2.960567883900751e-05, "loss": 0.0908, "step": 17070 }, { "epoch": 1.0102324480984208, "grad_norm": 0.24331288039684296, "learning_rate": 2.9605024045593572e-05, "loss": 0.0836, "step": 17080 }, { "epoch": 1.0108239190867687, "grad_norm": 0.3902689516544342, "learning_rate": 2.9604368716222305e-05, "loss": 0.0798, "step": 17090 }, { "epoch": 1.0114153900751168, "grad_norm": 0.32320040464401245, "learning_rate": 2.9603712850917757e-05, "loss": 0.077, "step": 17100 }, { "epoch": 1.012006861063465, "grad_norm": 0.5285111665725708, "learning_rate": 2.9603056449704002e-05, "loss": 0.0755, "step": 17110 }, { "epoch": 1.0125983320518128, "grad_norm": 0.4833056330680847, "learning_rate": 2.960239951260512e-05, "loss": 0.0795, "step": 17120 }, { "epoch": 1.013189803040161, "grad_norm": 0.3415476679801941, "learning_rate": 2.9601742039645224e-05, "loss": 0.0848, "step": 17130 }, { "epoch": 1.0137812740285088, "grad_norm": 0.3362158238887787, "learning_rate": 2.9601084030848438e-05, "loss": 0.084, "step": 17140 }, { "epoch": 1.014372745016857, "grad_norm": 0.2126520425081253, "learning_rate": 2.960042548623891e-05, "loss": 0.0739, "step": 17150 }, { "epoch": 1.014964216005205, "grad_norm": 0.31921565532684326, "learning_rate": 2.9599766405840805e-05, "loss": 0.0825, "step": 17160 }, { "epoch": 1.015555686993553, "grad_norm": 0.3327826261520386, "learning_rate": 2.9599106789678312e-05, "loss": 0.0886, "step": 17170 }, { "epoch": 1.016147157981901, "grad_norm": 2.309460163116455, "learning_rate": 2.9598446637775636e-05, "loss": 0.0851, "step": 17180 }, { "epoch": 1.016738628970249, "grad_norm": 0.21135446429252625, "learning_rate": 2.9597785950157e-05, "loss": 0.0783, "step": 17190 }, { "epoch": 1.017330099958597, "grad_norm": 0.3058203160762787, "learning_rate": 2.9597124726846654e-05, "loss": 0.0786, "step": 17200 }, { "epoch": 1.0179215709469451, "grad_norm": 0.3206031322479248, "learning_rate": 2.9596462967868858e-05, "loss": 0.07, "step": 17210 }, { "epoch": 1.018513041935293, "grad_norm": 0.367458313703537, "learning_rate": 2.9595800673247894e-05, "loss": 0.097, "step": 17220 }, { "epoch": 1.0191045129236411, "grad_norm": 0.29557329416275024, "learning_rate": 2.9595137843008072e-05, "loss": 0.0911, "step": 17230 }, { "epoch": 1.019695983911989, "grad_norm": 0.27824878692626953, "learning_rate": 2.9594474477173715e-05, "loss": 0.0788, "step": 17240 }, { "epoch": 1.0202874549003371, "grad_norm": 0.257636159658432, "learning_rate": 2.9593810575769167e-05, "loss": 0.0769, "step": 17250 }, { "epoch": 1.0208789258886852, "grad_norm": 0.2907682955265045, "learning_rate": 2.9593146138818792e-05, "loss": 0.0672, "step": 17260 }, { "epoch": 1.0214703968770331, "grad_norm": 0.4218895733356476, "learning_rate": 2.9592481166346964e-05, "loss": 0.0917, "step": 17270 }, { "epoch": 1.0220618678653812, "grad_norm": 0.24341516196727753, "learning_rate": 2.959181565837809e-05, "loss": 0.0851, "step": 17280 }, { "epoch": 1.0226533388537293, "grad_norm": 0.2598972022533417, "learning_rate": 2.9591149614936598e-05, "loss": 0.0764, "step": 17290 }, { "epoch": 1.0232448098420772, "grad_norm": 0.4207022190093994, "learning_rate": 2.9590483036046925e-05, "loss": 0.0711, "step": 17300 }, { "epoch": 1.0238362808304253, "grad_norm": 0.31123048067092896, "learning_rate": 2.958981592173353e-05, "loss": 0.0838, "step": 17310 }, { "epoch": 1.0244277518187732, "grad_norm": 0.2937064468860626, "learning_rate": 2.9589148272020893e-05, "loss": 0.0819, "step": 17320 }, { "epoch": 1.0250192228071213, "grad_norm": 0.3269467055797577, "learning_rate": 2.9588480086933524e-05, "loss": 0.0769, "step": 17330 }, { "epoch": 1.0256106937954694, "grad_norm": 0.26060816645622253, "learning_rate": 2.9587811366495935e-05, "loss": 0.0802, "step": 17340 }, { "epoch": 1.0262021647838173, "grad_norm": 0.20454570651054382, "learning_rate": 2.9587142110732666e-05, "loss": 0.0695, "step": 17350 }, { "epoch": 1.0267936357721654, "grad_norm": 0.4391438066959381, "learning_rate": 2.9586472319668278e-05, "loss": 0.0766, "step": 17360 }, { "epoch": 1.0273851067605133, "grad_norm": 0.22391819953918457, "learning_rate": 2.9585801993327355e-05, "loss": 0.078, "step": 17370 }, { "epoch": 1.0279765777488614, "grad_norm": 0.24345768988132477, "learning_rate": 2.9585131131734488e-05, "loss": 0.0822, "step": 17380 }, { "epoch": 1.0285680487372095, "grad_norm": 0.3054044246673584, "learning_rate": 2.95844597349143e-05, "loss": 0.0788, "step": 17390 }, { "epoch": 1.0291595197255574, "grad_norm": 0.2870088815689087, "learning_rate": 2.958378780289143e-05, "loss": 0.0746, "step": 17400 }, { "epoch": 1.0297509907139055, "grad_norm": 0.271018385887146, "learning_rate": 2.9583115335690527e-05, "loss": 0.0756, "step": 17410 }, { "epoch": 1.0303424617022534, "grad_norm": 0.2636096179485321, "learning_rate": 2.958244233333628e-05, "loss": 0.0836, "step": 17420 }, { "epoch": 1.0309339326906015, "grad_norm": 0.3050462007522583, "learning_rate": 2.9581768795853378e-05, "loss": 0.0792, "step": 17430 }, { "epoch": 1.0315254036789496, "grad_norm": 0.2521854639053345, "learning_rate": 2.9581094723266547e-05, "loss": 0.0879, "step": 17440 }, { "epoch": 1.0321168746672975, "grad_norm": 0.2751416563987732, "learning_rate": 2.9580420115600516e-05, "loss": 0.0718, "step": 17450 }, { "epoch": 1.0327083456556456, "grad_norm": 0.2972453832626343, "learning_rate": 2.957974497288004e-05, "loss": 0.072, "step": 17460 }, { "epoch": 1.0332998166439935, "grad_norm": 0.24716466665267944, "learning_rate": 2.9579069295129895e-05, "loss": 0.0754, "step": 17470 }, { "epoch": 1.0338912876323416, "grad_norm": 0.35440191626548767, "learning_rate": 2.9578393082374878e-05, "loss": 0.084, "step": 17480 }, { "epoch": 1.0344827586206897, "grad_norm": 0.24264678359031677, "learning_rate": 2.9577716334639804e-05, "loss": 0.0718, "step": 17490 }, { "epoch": 1.0350742296090376, "grad_norm": 0.2470598667860031, "learning_rate": 2.9577039051949508e-05, "loss": 0.0709, "step": 17500 }, { "epoch": 1.0356657005973857, "grad_norm": 0.2971654236316681, "learning_rate": 2.957636123432884e-05, "loss": 0.0737, "step": 17510 }, { "epoch": 1.0362571715857338, "grad_norm": 0.28784722089767456, "learning_rate": 2.957568288180268e-05, "loss": 0.0886, "step": 17520 }, { "epoch": 1.0368486425740817, "grad_norm": 0.3560084104537964, "learning_rate": 2.9575003994395916e-05, "loss": 0.0888, "step": 17530 }, { "epoch": 1.0374401135624298, "grad_norm": 0.22591233253479004, "learning_rate": 2.9574324572133464e-05, "loss": 0.0844, "step": 17540 }, { "epoch": 1.0380315845507777, "grad_norm": 0.31691503524780273, "learning_rate": 2.957364461504026e-05, "loss": 0.0673, "step": 17550 }, { "epoch": 1.0386230555391258, "grad_norm": 0.3441419303417206, "learning_rate": 2.9572964123141248e-05, "loss": 0.0767, "step": 17560 }, { "epoch": 1.039214526527474, "grad_norm": 0.2866358458995819, "learning_rate": 2.95722830964614e-05, "loss": 0.0977, "step": 17570 }, { "epoch": 1.0398059975158218, "grad_norm": 0.25115692615509033, "learning_rate": 2.9571601535025718e-05, "loss": 0.0803, "step": 17580 }, { "epoch": 1.04039746850417, "grad_norm": 0.24948909878730774, "learning_rate": 2.9570919438859205e-05, "loss": 0.07, "step": 17590 }, { "epoch": 1.0409889394925178, "grad_norm": 0.3040098547935486, "learning_rate": 2.9570236807986895e-05, "loss": 0.0703, "step": 17600 }, { "epoch": 1.041580410480866, "grad_norm": 0.3188563585281372, "learning_rate": 2.9569553642433833e-05, "loss": 0.0744, "step": 17610 }, { "epoch": 1.042171881469214, "grad_norm": 0.29475969076156616, "learning_rate": 2.956886994222509e-05, "loss": 0.0962, "step": 17620 }, { "epoch": 1.042763352457562, "grad_norm": 0.34358662366867065, "learning_rate": 2.9568185707385764e-05, "loss": 0.0898, "step": 17630 }, { "epoch": 1.04335482344591, "grad_norm": 0.27744197845458984, "learning_rate": 2.9567500937940956e-05, "loss": 0.077, "step": 17640 }, { "epoch": 1.043946294434258, "grad_norm": 0.22696976363658905, "learning_rate": 2.95668156339158e-05, "loss": 0.0722, "step": 17650 }, { "epoch": 1.044537765422606, "grad_norm": 0.3735019564628601, "learning_rate": 2.9566129795335437e-05, "loss": 0.0807, "step": 17660 }, { "epoch": 1.0451292364109541, "grad_norm": 0.2343236207962036, "learning_rate": 2.956544342222504e-05, "loss": 0.0907, "step": 17670 }, { "epoch": 1.045720707399302, "grad_norm": 0.5092349648475647, "learning_rate": 2.95647565146098e-05, "loss": 0.0806, "step": 17680 }, { "epoch": 1.0463121783876501, "grad_norm": 0.2368614822626114, "learning_rate": 2.956406907251492e-05, "loss": 0.0763, "step": 17690 }, { "epoch": 1.046903649375998, "grad_norm": 0.23518724739551544, "learning_rate": 2.9563381095965628e-05, "loss": 0.0691, "step": 17700 }, { "epoch": 1.047495120364346, "grad_norm": 0.3757396340370178, "learning_rate": 2.9562692584987166e-05, "loss": 0.0812, "step": 17710 }, { "epoch": 1.0480865913526942, "grad_norm": 0.31954076886177063, "learning_rate": 2.9562003539604806e-05, "loss": 0.0896, "step": 17720 }, { "epoch": 1.048678062341042, "grad_norm": 0.33757755160331726, "learning_rate": 2.956131395984383e-05, "loss": 0.0836, "step": 17730 }, { "epoch": 1.0492695333293902, "grad_norm": 0.25755926966667175, "learning_rate": 2.9560623845729552e-05, "loss": 0.0782, "step": 17740 }, { "epoch": 1.0498610043177383, "grad_norm": 0.2267039716243744, "learning_rate": 2.9559933197287287e-05, "loss": 0.0726, "step": 17750 }, { "epoch": 1.0504524753060862, "grad_norm": 0.31417131423950195, "learning_rate": 2.955924201454238e-05, "loss": 0.0792, "step": 17760 }, { "epoch": 1.0510439462944343, "grad_norm": 0.3243836462497711, "learning_rate": 2.9558550297520203e-05, "loss": 0.0946, "step": 17770 }, { "epoch": 1.0516354172827822, "grad_norm": 0.24112512171268463, "learning_rate": 2.955785804624613e-05, "loss": 0.0954, "step": 17780 }, { "epoch": 1.0522268882711303, "grad_norm": 0.186975359916687, "learning_rate": 2.955716526074557e-05, "loss": 0.0768, "step": 17790 }, { "epoch": 1.0528183592594784, "grad_norm": 0.3758186995983124, "learning_rate": 2.955647194104395e-05, "loss": 0.074, "step": 17800 }, { "epoch": 1.0534098302478263, "grad_norm": 0.2612709403038025, "learning_rate": 2.9555778087166706e-05, "loss": 0.0724, "step": 17810 }, { "epoch": 1.0540013012361744, "grad_norm": 0.45295244455337524, "learning_rate": 2.95550836991393e-05, "loss": 0.0827, "step": 17820 }, { "epoch": 1.0545927722245223, "grad_norm": 0.2633380591869354, "learning_rate": 2.9554388776987215e-05, "loss": 0.0939, "step": 17830 }, { "epoch": 1.0551842432128704, "grad_norm": 0.4371947944164276, "learning_rate": 2.9553693320735956e-05, "loss": 0.0829, "step": 17840 }, { "epoch": 1.0557757142012185, "grad_norm": 0.2949948012828827, "learning_rate": 2.955299733041104e-05, "loss": 0.079, "step": 17850 }, { "epoch": 1.0563671851895664, "grad_norm": 1.1749624013900757, "learning_rate": 2.9552300806038008e-05, "loss": 0.0654, "step": 17860 }, { "epoch": 1.0569586561779145, "grad_norm": 0.2524203360080719, "learning_rate": 2.9551603747642425e-05, "loss": 0.0884, "step": 17870 }, { "epoch": 1.0575501271662624, "grad_norm": 0.3762047290802002, "learning_rate": 2.9550906155249862e-05, "loss": 0.0742, "step": 17880 }, { "epoch": 1.0581415981546105, "grad_norm": 0.2624726891517639, "learning_rate": 2.9550208028885923e-05, "loss": 0.0812, "step": 17890 }, { "epoch": 1.0587330691429586, "grad_norm": 0.2884615361690521, "learning_rate": 2.9549509368576233e-05, "loss": 0.075, "step": 17900 }, { "epoch": 1.0593245401313065, "grad_norm": 0.31584182381629944, "learning_rate": 2.9548810174346423e-05, "loss": 0.076, "step": 17910 }, { "epoch": 1.0599160111196546, "grad_norm": 0.3648366332054138, "learning_rate": 2.9548110446222147e-05, "loss": 0.0944, "step": 17920 }, { "epoch": 1.0605074821080025, "grad_norm": 0.22742106020450592, "learning_rate": 2.9547410184229092e-05, "loss": 0.0924, "step": 17930 }, { "epoch": 1.0610989530963506, "grad_norm": 0.28838610649108887, "learning_rate": 2.9546709388392956e-05, "loss": 0.0929, "step": 17940 }, { "epoch": 1.0616904240846987, "grad_norm": 0.357505202293396, "learning_rate": 2.9546008058739448e-05, "loss": 0.0735, "step": 17950 }, { "epoch": 1.0622818950730466, "grad_norm": 0.3551344871520996, "learning_rate": 2.954530619529431e-05, "loss": 0.0752, "step": 17960 }, { "epoch": 1.0628733660613947, "grad_norm": 0.25288185477256775, "learning_rate": 2.9544603798083296e-05, "loss": 0.0937, "step": 17970 }, { "epoch": 1.0634648370497426, "grad_norm": 0.5625773072242737, "learning_rate": 2.9543900867132182e-05, "loss": 0.0815, "step": 17980 }, { "epoch": 1.0640563080380907, "grad_norm": 0.32866019010543823, "learning_rate": 2.9543197402466763e-05, "loss": 0.0853, "step": 17990 }, { "epoch": 1.0646477790264388, "grad_norm": 0.17845532298088074, "learning_rate": 2.9542493404112858e-05, "loss": 0.0757, "step": 18000 }, { "epoch": 1.0652392500147867, "grad_norm": 0.5615264773368835, "learning_rate": 2.95417888720963e-05, "loss": 0.0887, "step": 18010 }, { "epoch": 1.0658307210031348, "grad_norm": 0.301735520362854, "learning_rate": 2.9541083806442936e-05, "loss": 0.0901, "step": 18020 }, { "epoch": 1.066422191991483, "grad_norm": 0.34561067819595337, "learning_rate": 2.9540378207178647e-05, "loss": 0.095, "step": 18030 }, { "epoch": 1.0670136629798308, "grad_norm": 0.23869717121124268, "learning_rate": 2.953967207432932e-05, "loss": 0.0843, "step": 18040 }, { "epoch": 1.067605133968179, "grad_norm": 0.28174829483032227, "learning_rate": 2.9538965407920876e-05, "loss": 0.0648, "step": 18050 }, { "epoch": 1.0681966049565268, "grad_norm": 0.33542466163635254, "learning_rate": 2.9538258207979243e-05, "loss": 0.0693, "step": 18060 }, { "epoch": 1.068788075944875, "grad_norm": 0.2768996059894562, "learning_rate": 2.9537550474530374e-05, "loss": 0.0871, "step": 18070 }, { "epoch": 1.069379546933223, "grad_norm": 0.3773219883441925, "learning_rate": 2.953684220760024e-05, "loss": 0.0823, "step": 18080 }, { "epoch": 1.069971017921571, "grad_norm": 0.3396288752555847, "learning_rate": 2.9536133407214827e-05, "loss": 0.0698, "step": 18090 }, { "epoch": 1.070562488909919, "grad_norm": 0.26451870799064636, "learning_rate": 2.9535424073400156e-05, "loss": 0.0679, "step": 18100 }, { "epoch": 1.071153959898267, "grad_norm": 0.29691019654273987, "learning_rate": 2.9534714206182248e-05, "loss": 0.0824, "step": 18110 }, { "epoch": 1.071745430886615, "grad_norm": 0.29345738887786865, "learning_rate": 2.9534003805587158e-05, "loss": 0.1, "step": 18120 }, { "epoch": 1.072336901874963, "grad_norm": 0.24920059740543365, "learning_rate": 2.9533292871640957e-05, "loss": 0.0843, "step": 18130 }, { "epoch": 1.072928372863311, "grad_norm": 0.20463615655899048, "learning_rate": 2.9532581404369728e-05, "loss": 0.0788, "step": 18140 }, { "epoch": 1.073519843851659, "grad_norm": 0.2397138476371765, "learning_rate": 2.9531869403799582e-05, "loss": 0.0802, "step": 18150 }, { "epoch": 1.0741113148400072, "grad_norm": 0.4536987841129303, "learning_rate": 2.953115686995665e-05, "loss": 0.073, "step": 18160 }, { "epoch": 1.074702785828355, "grad_norm": 0.39645707607269287, "learning_rate": 2.9530443802867076e-05, "loss": 0.0767, "step": 18170 }, { "epoch": 1.0752942568167032, "grad_norm": 0.4084077775478363, "learning_rate": 2.9529730202557025e-05, "loss": 0.076, "step": 18180 }, { "epoch": 1.075885727805051, "grad_norm": 0.3456519544124603, "learning_rate": 2.9529016069052696e-05, "loss": 0.0835, "step": 18190 }, { "epoch": 1.0764771987933992, "grad_norm": 0.21205629408359528, "learning_rate": 2.952830140238028e-05, "loss": 0.0713, "step": 18200 }, { "epoch": 1.077068669781747, "grad_norm": 0.31428778171539307, "learning_rate": 2.9527586202566008e-05, "loss": 0.0714, "step": 18210 }, { "epoch": 1.0776601407700952, "grad_norm": 0.34709611535072327, "learning_rate": 2.952687046963613e-05, "loss": 0.0892, "step": 18220 }, { "epoch": 1.0782516117584433, "grad_norm": 0.3639043867588043, "learning_rate": 2.952615420361691e-05, "loss": 0.0824, "step": 18230 }, { "epoch": 1.0788430827467912, "grad_norm": 0.35111531615257263, "learning_rate": 2.9525437404534633e-05, "loss": 0.0826, "step": 18240 }, { "epoch": 1.0794345537351393, "grad_norm": 0.31536129117012024, "learning_rate": 2.95247200724156e-05, "loss": 0.0826, "step": 18250 }, { "epoch": 1.0800260247234874, "grad_norm": 0.2637863755226135, "learning_rate": 2.9524002207286133e-05, "loss": 0.0812, "step": 18260 }, { "epoch": 1.0806174957118353, "grad_norm": 0.26015734672546387, "learning_rate": 2.9523283809172583e-05, "loss": 0.0781, "step": 18270 }, { "epoch": 1.0812089667001834, "grad_norm": 0.32121431827545166, "learning_rate": 2.9522564878101305e-05, "loss": 0.0953, "step": 18280 }, { "epoch": 1.0818004376885313, "grad_norm": 0.28728213906288147, "learning_rate": 2.9521845414098686e-05, "loss": 0.0737, "step": 18290 }, { "epoch": 1.0823919086768794, "grad_norm": 0.27093178033828735, "learning_rate": 2.9521125417191126e-05, "loss": 0.079, "step": 18300 }, { "epoch": 1.0829833796652275, "grad_norm": 0.4742007553577423, "learning_rate": 2.9520404887405052e-05, "loss": 0.087, "step": 18310 }, { "epoch": 1.0835748506535754, "grad_norm": 0.41651350259780884, "learning_rate": 2.9519683824766892e-05, "loss": 0.078, "step": 18320 }, { "epoch": 1.0841663216419235, "grad_norm": 0.26944640278816223, "learning_rate": 2.9518962229303124e-05, "loss": 0.0968, "step": 18330 }, { "epoch": 1.0847577926302714, "grad_norm": 0.251997172832489, "learning_rate": 2.951824010104021e-05, "loss": 0.0945, "step": 18340 }, { "epoch": 1.0853492636186195, "grad_norm": 0.20624178647994995, "learning_rate": 2.9517517440004668e-05, "loss": 0.0692, "step": 18350 }, { "epoch": 1.0859407346069676, "grad_norm": 0.32897311449050903, "learning_rate": 2.9516794246223003e-05, "loss": 0.0854, "step": 18360 }, { "epoch": 1.0865322055953155, "grad_norm": 0.22849713265895844, "learning_rate": 2.9516070519721762e-05, "loss": 0.0862, "step": 18370 }, { "epoch": 1.0871236765836636, "grad_norm": 0.25902485847473145, "learning_rate": 2.9515346260527498e-05, "loss": 0.0866, "step": 18380 }, { "epoch": 1.0877151475720117, "grad_norm": 0.2932555079460144, "learning_rate": 2.9514621468666797e-05, "loss": 0.0869, "step": 18390 }, { "epoch": 1.0883066185603596, "grad_norm": 0.4053296446800232, "learning_rate": 2.951389614416625e-05, "loss": 0.0713, "step": 18400 }, { "epoch": 1.0888980895487077, "grad_norm": 0.25874555110931396, "learning_rate": 2.9513170287052475e-05, "loss": 0.07, "step": 18410 }, { "epoch": 1.0894895605370556, "grad_norm": 0.2749444246292114, "learning_rate": 2.9512443897352105e-05, "loss": 0.1026, "step": 18420 }, { "epoch": 1.0900810315254037, "grad_norm": 0.2403583526611328, "learning_rate": 2.9511716975091804e-05, "loss": 0.0774, "step": 18430 }, { "epoch": 1.0906725025137516, "grad_norm": 0.253406286239624, "learning_rate": 2.951098952029824e-05, "loss": 0.0748, "step": 18440 }, { "epoch": 1.0912639735020997, "grad_norm": 0.2992539703845978, "learning_rate": 2.9510261532998116e-05, "loss": 0.0651, "step": 18450 }, { "epoch": 1.0918554444904478, "grad_norm": 0.3170943558216095, "learning_rate": 2.9509533013218145e-05, "loss": 0.0826, "step": 18460 }, { "epoch": 1.0924469154787957, "grad_norm": 0.28898730874061584, "learning_rate": 2.9508803960985055e-05, "loss": 0.0871, "step": 18470 }, { "epoch": 1.0930383864671438, "grad_norm": 0.37102609872817993, "learning_rate": 2.9508074376325607e-05, "loss": 0.0863, "step": 18480 }, { "epoch": 1.093629857455492, "grad_norm": 0.22643573582172394, "learning_rate": 2.9507344259266568e-05, "loss": 0.0866, "step": 18490 }, { "epoch": 1.0942213284438398, "grad_norm": 0.22442157566547394, "learning_rate": 2.9506613609834735e-05, "loss": 0.0882, "step": 18500 }, { "epoch": 1.094812799432188, "grad_norm": 0.3355563282966614, "learning_rate": 2.950588242805692e-05, "loss": 0.0762, "step": 18510 }, { "epoch": 1.0954042704205358, "grad_norm": 0.31044912338256836, "learning_rate": 2.950515071395996e-05, "loss": 0.0888, "step": 18520 }, { "epoch": 1.095995741408884, "grad_norm": 0.27234524488449097, "learning_rate": 2.95044184675707e-05, "loss": 0.099, "step": 18530 }, { "epoch": 1.096587212397232, "grad_norm": 0.2376391887664795, "learning_rate": 2.950368568891601e-05, "loss": 0.0815, "step": 18540 }, { "epoch": 1.09717868338558, "grad_norm": 0.34280920028686523, "learning_rate": 2.9502952378022782e-05, "loss": 0.0693, "step": 18550 }, { "epoch": 1.097770154373928, "grad_norm": 0.2206336408853531, "learning_rate": 2.9502218534917928e-05, "loss": 0.0794, "step": 18560 }, { "epoch": 1.0983616253622759, "grad_norm": 0.3568260371685028, "learning_rate": 2.950148415962838e-05, "loss": 0.1065, "step": 18570 }, { "epoch": 1.098953096350624, "grad_norm": 0.34200170636177063, "learning_rate": 2.950074925218108e-05, "loss": 0.0802, "step": 18580 }, { "epoch": 1.099544567338972, "grad_norm": 0.32180315256118774, "learning_rate": 2.9500013812603004e-05, "loss": 0.0827, "step": 18590 }, { "epoch": 1.10013603832732, "grad_norm": 0.3073802590370178, "learning_rate": 2.9499277840921138e-05, "loss": 0.0647, "step": 18600 }, { "epoch": 1.100727509315668, "grad_norm": 0.6482457518577576, "learning_rate": 2.9498541337162486e-05, "loss": 0.0725, "step": 18610 }, { "epoch": 1.1013189803040162, "grad_norm": 0.4288886785507202, "learning_rate": 2.949780430135408e-05, "loss": 0.0975, "step": 18620 }, { "epoch": 1.101910451292364, "grad_norm": 0.2440129518508911, "learning_rate": 2.9497066733522966e-05, "loss": 0.085, "step": 18630 }, { "epoch": 1.1025019222807122, "grad_norm": 0.29600560665130615, "learning_rate": 2.9496328633696208e-05, "loss": 0.0799, "step": 18640 }, { "epoch": 1.10309339326906, "grad_norm": 0.23407679796218872, "learning_rate": 2.9495590001900896e-05, "loss": 0.0691, "step": 18650 }, { "epoch": 1.1036848642574082, "grad_norm": 0.26205095648765564, "learning_rate": 2.949485083816413e-05, "loss": 0.0709, "step": 18660 }, { "epoch": 1.104276335245756, "grad_norm": 0.30578285455703735, "learning_rate": 2.9494111142513042e-05, "loss": 0.0842, "step": 18670 }, { "epoch": 1.1048678062341042, "grad_norm": 0.6085702776908875, "learning_rate": 2.9493370914974767e-05, "loss": 0.0917, "step": 18680 }, { "epoch": 1.1054592772224523, "grad_norm": 0.2269701361656189, "learning_rate": 2.9492630155576478e-05, "loss": 0.0771, "step": 18690 }, { "epoch": 1.1060507482108002, "grad_norm": 0.3264181315898895, "learning_rate": 2.949188886434535e-05, "loss": 0.0737, "step": 18700 }, { "epoch": 1.1066422191991483, "grad_norm": 0.3293164372444153, "learning_rate": 2.9491147041308596e-05, "loss": 0.0774, "step": 18710 }, { "epoch": 1.1072336901874964, "grad_norm": 0.2535981237888336, "learning_rate": 2.9490404686493433e-05, "loss": 0.0902, "step": 18720 }, { "epoch": 1.1078251611758443, "grad_norm": 0.2912566661834717, "learning_rate": 2.94896617999271e-05, "loss": 0.0881, "step": 18730 }, { "epoch": 1.1084166321641924, "grad_norm": 0.22365647554397583, "learning_rate": 2.948891838163686e-05, "loss": 0.0784, "step": 18740 }, { "epoch": 1.1090081031525403, "grad_norm": 0.2858092784881592, "learning_rate": 2.948817443165e-05, "loss": 0.0793, "step": 18750 }, { "epoch": 1.1095995741408884, "grad_norm": 0.2574170231819153, "learning_rate": 2.9487429949993818e-05, "loss": 0.0696, "step": 18760 }, { "epoch": 1.1101910451292365, "grad_norm": 0.3013181686401367, "learning_rate": 2.948668493669563e-05, "loss": 0.1025, "step": 18770 }, { "epoch": 1.1107825161175844, "grad_norm": 0.28765666484832764, "learning_rate": 2.948593939178278e-05, "loss": 0.0847, "step": 18780 }, { "epoch": 1.1113739871059325, "grad_norm": 0.21391737461090088, "learning_rate": 2.9485193315282618e-05, "loss": 0.0802, "step": 18790 }, { "epoch": 1.1119654580942804, "grad_norm": 0.22935307025909424, "learning_rate": 2.9484446707222537e-05, "loss": 0.078, "step": 18800 }, { "epoch": 1.1125569290826285, "grad_norm": 0.2604447901248932, "learning_rate": 2.9483699567629925e-05, "loss": 0.0761, "step": 18810 }, { "epoch": 1.1131484000709766, "grad_norm": 0.42835554480552673, "learning_rate": 2.9482951896532202e-05, "loss": 0.1039, "step": 18820 }, { "epoch": 1.1137398710593245, "grad_norm": 0.23095324635505676, "learning_rate": 2.9482203693956806e-05, "loss": 0.0835, "step": 18830 }, { "epoch": 1.1143313420476726, "grad_norm": 0.25063470005989075, "learning_rate": 2.9481454959931197e-05, "loss": 0.0803, "step": 18840 }, { "epoch": 1.1149228130360207, "grad_norm": 0.25460442900657654, "learning_rate": 2.9480705694482843e-05, "loss": 0.0729, "step": 18850 }, { "epoch": 1.1155142840243686, "grad_norm": 0.36690911650657654, "learning_rate": 2.947995589763925e-05, "loss": 0.0752, "step": 18860 }, { "epoch": 1.1161057550127167, "grad_norm": 0.22972306609153748, "learning_rate": 2.947920556942792e-05, "loss": 0.0934, "step": 18870 }, { "epoch": 1.1166972260010646, "grad_norm": 0.24749106168746948, "learning_rate": 2.9478454709876395e-05, "loss": 0.0754, "step": 18880 }, { "epoch": 1.1172886969894127, "grad_norm": 1.5667240619659424, "learning_rate": 2.947770331901223e-05, "loss": 0.0798, "step": 18890 }, { "epoch": 1.1178801679777606, "grad_norm": 0.16485442221164703, "learning_rate": 2.9476951396863e-05, "loss": 0.0712, "step": 18900 }, { "epoch": 1.1184716389661087, "grad_norm": 0.2982946038246155, "learning_rate": 2.9476198943456293e-05, "loss": 0.0725, "step": 18910 }, { "epoch": 1.1190631099544568, "grad_norm": 0.2800109386444092, "learning_rate": 2.9475445958819726e-05, "loss": 0.0813, "step": 18920 }, { "epoch": 1.1196545809428047, "grad_norm": 0.2767331302165985, "learning_rate": 2.947469244298093e-05, "loss": 0.0882, "step": 18930 }, { "epoch": 1.1202460519311528, "grad_norm": 0.19506707787513733, "learning_rate": 2.9473938395967557e-05, "loss": 0.0828, "step": 18940 }, { "epoch": 1.120837522919501, "grad_norm": 0.37685051560401917, "learning_rate": 2.9473183817807276e-05, "loss": 0.0849, "step": 18950 }, { "epoch": 1.1214289939078488, "grad_norm": 0.29225748777389526, "learning_rate": 2.9472428708527772e-05, "loss": 0.0786, "step": 18960 }, { "epoch": 1.122020464896197, "grad_norm": 0.41102051734924316, "learning_rate": 2.9471673068156768e-05, "loss": 0.0827, "step": 18970 }, { "epoch": 1.1226119358845448, "grad_norm": 0.33024293184280396, "learning_rate": 2.9470916896721987e-05, "loss": 0.0904, "step": 18980 }, { "epoch": 1.1232034068728929, "grad_norm": 0.20335519313812256, "learning_rate": 2.9470160194251175e-05, "loss": 0.0846, "step": 18990 }, { "epoch": 1.123794877861241, "grad_norm": 0.2262912094593048, "learning_rate": 2.946940296077211e-05, "loss": 0.0671, "step": 19000 }, { "epoch": 1.1243863488495889, "grad_norm": 0.3475838601589203, "learning_rate": 2.9468645196312568e-05, "loss": 0.0768, "step": 19010 }, { "epoch": 1.124977819837937, "grad_norm": 0.40413224697113037, "learning_rate": 2.9467886900900368e-05, "loss": 0.0938, "step": 19020 }, { "epoch": 1.1255692908262849, "grad_norm": 0.2467261701822281, "learning_rate": 2.946712807456333e-05, "loss": 0.0905, "step": 19030 }, { "epoch": 1.126160761814633, "grad_norm": 0.25036922097206116, "learning_rate": 2.9466368717329298e-05, "loss": 0.083, "step": 19040 }, { "epoch": 1.126752232802981, "grad_norm": 0.5117222666740417, "learning_rate": 2.9465608829226143e-05, "loss": 0.0783, "step": 19050 }, { "epoch": 1.127343703791329, "grad_norm": 0.3967338800430298, "learning_rate": 2.946484841028175e-05, "loss": 0.0786, "step": 19060 }, { "epoch": 1.127935174779677, "grad_norm": 0.28305119276046753, "learning_rate": 2.9464087460524027e-05, "loss": 0.0914, "step": 19070 }, { "epoch": 1.1285266457680252, "grad_norm": 0.2797723710536957, "learning_rate": 2.946332597998089e-05, "loss": 0.078, "step": 19080 }, { "epoch": 1.129118116756373, "grad_norm": 0.29865747690200806, "learning_rate": 2.946256396868029e-05, "loss": 0.0748, "step": 19090 }, { "epoch": 1.1297095877447212, "grad_norm": 0.4044674038887024, "learning_rate": 2.9461801426650183e-05, "loss": 0.0642, "step": 19100 }, { "epoch": 1.130301058733069, "grad_norm": 0.2541804611682892, "learning_rate": 2.946103835391856e-05, "loss": 0.0632, "step": 19110 }, { "epoch": 1.1308925297214172, "grad_norm": 0.3425607681274414, "learning_rate": 2.9460274750513423e-05, "loss": 0.087, "step": 19120 }, { "epoch": 1.131484000709765, "grad_norm": 0.20441533625125885, "learning_rate": 2.945951061646279e-05, "loss": 0.0821, "step": 19130 }, { "epoch": 1.1320754716981132, "grad_norm": 0.21668636798858643, "learning_rate": 2.94587459517947e-05, "loss": 0.0688, "step": 19140 }, { "epoch": 1.1326669426864613, "grad_norm": 0.3512345254421234, "learning_rate": 2.9457980756537218e-05, "loss": 0.0771, "step": 19150 }, { "epoch": 1.1332584136748092, "grad_norm": 0.44390869140625, "learning_rate": 2.9457215030718427e-05, "loss": 0.0851, "step": 19160 }, { "epoch": 1.1338498846631573, "grad_norm": 0.2769230008125305, "learning_rate": 2.9456448774366417e-05, "loss": 0.0885, "step": 19170 }, { "epoch": 1.1344413556515054, "grad_norm": 0.24628321826457977, "learning_rate": 2.9455681987509314e-05, "loss": 0.0832, "step": 19180 }, { "epoch": 1.1350328266398533, "grad_norm": 0.27229800820350647, "learning_rate": 2.9454914670175256e-05, "loss": 0.0776, "step": 19190 }, { "epoch": 1.1356242976282014, "grad_norm": 0.21947835385799408, "learning_rate": 2.94541468223924e-05, "loss": 0.0651, "step": 19200 }, { "epoch": 1.1362157686165493, "grad_norm": 0.42406949400901794, "learning_rate": 2.9453378444188924e-05, "loss": 0.0715, "step": 19210 }, { "epoch": 1.1368072396048974, "grad_norm": 0.2637139558792114, "learning_rate": 2.945260953559303e-05, "loss": 0.0888, "step": 19220 }, { "epoch": 1.1373987105932455, "grad_norm": 0.300957053899765, "learning_rate": 2.945184009663292e-05, "loss": 0.0847, "step": 19230 }, { "epoch": 1.1379901815815934, "grad_norm": 0.1882699877023697, "learning_rate": 2.9451070127336846e-05, "loss": 0.0872, "step": 19240 }, { "epoch": 1.1385816525699415, "grad_norm": 0.37506914138793945, "learning_rate": 2.9450299627733053e-05, "loss": 0.0717, "step": 19250 }, { "epoch": 1.1391731235582894, "grad_norm": 0.2614683210849762, "learning_rate": 2.9449528597849823e-05, "loss": 0.0838, "step": 19260 }, { "epoch": 1.1397645945466375, "grad_norm": 0.25017234683036804, "learning_rate": 2.9448757037715447e-05, "loss": 0.0947, "step": 19270 }, { "epoch": 1.1403560655349856, "grad_norm": 0.28026047348976135, "learning_rate": 2.9447984947358236e-05, "loss": 0.0913, "step": 19280 }, { "epoch": 1.1409475365233335, "grad_norm": 0.23484675586223602, "learning_rate": 2.944721232680653e-05, "loss": 0.0858, "step": 19290 }, { "epoch": 1.1415390075116816, "grad_norm": 0.2874893546104431, "learning_rate": 2.9446439176088673e-05, "loss": 0.0711, "step": 19300 }, { "epoch": 1.1421304785000297, "grad_norm": 0.4646609425544739, "learning_rate": 2.9445665495233045e-05, "loss": 0.0712, "step": 19310 }, { "epoch": 1.1427219494883776, "grad_norm": 0.2827105224132538, "learning_rate": 2.9444891284268033e-05, "loss": 0.1008, "step": 19320 }, { "epoch": 1.1433134204767257, "grad_norm": 0.27582818269729614, "learning_rate": 2.944411654322205e-05, "loss": 0.0933, "step": 19330 }, { "epoch": 1.1439048914650736, "grad_norm": 0.2946337163448334, "learning_rate": 2.944334127212353e-05, "loss": 0.0787, "step": 19340 }, { "epoch": 1.1444963624534217, "grad_norm": 0.30958837270736694, "learning_rate": 2.9442565471000913e-05, "loss": 0.0607, "step": 19350 }, { "epoch": 1.1450878334417696, "grad_norm": 0.35441091656684875, "learning_rate": 2.9441789139882677e-05, "loss": 0.0874, "step": 19360 }, { "epoch": 1.1456793044301177, "grad_norm": 0.2503030002117157, "learning_rate": 2.9441012278797308e-05, "loss": 0.0888, "step": 19370 }, { "epoch": 1.1462707754184658, "grad_norm": 0.35739222168922424, "learning_rate": 2.9440234887773314e-05, "loss": 0.0746, "step": 19380 }, { "epoch": 1.1468622464068137, "grad_norm": 0.2842925488948822, "learning_rate": 2.943945696683922e-05, "loss": 0.0794, "step": 19390 }, { "epoch": 1.1474537173951618, "grad_norm": 0.25880104303359985, "learning_rate": 2.9438678516023584e-05, "loss": 0.0752, "step": 19400 }, { "epoch": 1.1480451883835099, "grad_norm": 0.31884342432022095, "learning_rate": 2.943789953535496e-05, "loss": 0.0881, "step": 19410 }, { "epoch": 1.1486366593718578, "grad_norm": 0.2781723439693451, "learning_rate": 2.943712002486194e-05, "loss": 0.0877, "step": 19420 }, { "epoch": 1.1492281303602059, "grad_norm": 0.2303922027349472, "learning_rate": 2.9436339984573125e-05, "loss": 0.088, "step": 19430 }, { "epoch": 1.1498196013485538, "grad_norm": 0.23736636340618134, "learning_rate": 2.9435559414517146e-05, "loss": 0.0815, "step": 19440 }, { "epoch": 1.1504110723369019, "grad_norm": 0.3581428527832031, "learning_rate": 2.943477831472265e-05, "loss": 0.0662, "step": 19450 }, { "epoch": 1.15100254332525, "grad_norm": 0.3535332679748535, "learning_rate": 2.9433996685218286e-05, "loss": 0.072, "step": 19460 }, { "epoch": 1.1515940143135979, "grad_norm": 0.17115099728107452, "learning_rate": 2.9433214526032756e-05, "loss": 0.0822, "step": 19470 }, { "epoch": 1.152185485301946, "grad_norm": 0.2822836935520172, "learning_rate": 2.9432431837194752e-05, "loss": 0.0736, "step": 19480 }, { "epoch": 1.1527769562902939, "grad_norm": 0.2778540551662445, "learning_rate": 2.9431648618732993e-05, "loss": 0.0866, "step": 19490 }, { "epoch": 1.153368427278642, "grad_norm": 0.26194414496421814, "learning_rate": 2.9430864870676233e-05, "loss": 0.0722, "step": 19500 }, { "epoch": 1.15395989826699, "grad_norm": 0.25432345271110535, "learning_rate": 2.943008059305322e-05, "loss": 0.0773, "step": 19510 }, { "epoch": 1.154551369255338, "grad_norm": 0.27568602561950684, "learning_rate": 2.9429295785892743e-05, "loss": 0.0787, "step": 19520 }, { "epoch": 1.155142840243686, "grad_norm": 0.23271173238754272, "learning_rate": 2.94285104492236e-05, "loss": 0.0811, "step": 19530 }, { "epoch": 1.1557343112320342, "grad_norm": 0.2543984353542328, "learning_rate": 2.9427724583074606e-05, "loss": 0.0785, "step": 19540 }, { "epoch": 1.156325782220382, "grad_norm": 0.21092550456523895, "learning_rate": 2.9426938187474607e-05, "loss": 0.0704, "step": 19550 }, { "epoch": 1.1569172532087302, "grad_norm": 0.3793754279613495, "learning_rate": 2.9426151262452453e-05, "loss": 0.0774, "step": 19560 }, { "epoch": 1.157508724197078, "grad_norm": 0.324491024017334, "learning_rate": 2.9425363808037028e-05, "loss": 0.0801, "step": 19570 }, { "epoch": 1.1581001951854262, "grad_norm": 0.34007367491722107, "learning_rate": 2.942457582425723e-05, "loss": 0.0884, "step": 19580 }, { "epoch": 1.158691666173774, "grad_norm": 0.38925471901893616, "learning_rate": 2.9423787311141967e-05, "loss": 0.0846, "step": 19590 }, { "epoch": 1.1592831371621222, "grad_norm": 0.33066996932029724, "learning_rate": 2.942299826872018e-05, "loss": 0.0765, "step": 19600 }, { "epoch": 1.1598746081504703, "grad_norm": 0.29261693358421326, "learning_rate": 2.942220869702083e-05, "loss": 0.0837, "step": 19610 }, { "epoch": 1.1604660791388182, "grad_norm": 0.3981401324272156, "learning_rate": 2.9421418596072882e-05, "loss": 0.1011, "step": 19620 }, { "epoch": 1.1610575501271663, "grad_norm": 0.23289784789085388, "learning_rate": 2.9420627965905332e-05, "loss": 0.0761, "step": 19630 }, { "epoch": 1.1616490211155144, "grad_norm": 0.37498483061790466, "learning_rate": 2.9419836806547203e-05, "loss": 0.0784, "step": 19640 }, { "epoch": 1.1622404921038623, "grad_norm": 0.23568488657474518, "learning_rate": 2.941904511802752e-05, "loss": 0.0649, "step": 19650 }, { "epoch": 1.1628319630922104, "grad_norm": 0.3235750198364258, "learning_rate": 2.9418252900375333e-05, "loss": 0.0735, "step": 19660 }, { "epoch": 1.1634234340805583, "grad_norm": 0.49463319778442383, "learning_rate": 2.941746015361972e-05, "loss": 0.0925, "step": 19670 }, { "epoch": 1.1640149050689064, "grad_norm": 0.2914793789386749, "learning_rate": 2.941666687778977e-05, "loss": 0.0709, "step": 19680 }, { "epoch": 1.1646063760572545, "grad_norm": 0.2106831967830658, "learning_rate": 2.941587307291459e-05, "loss": 0.0813, "step": 19690 }, { "epoch": 1.1651978470456024, "grad_norm": 0.18272194266319275, "learning_rate": 2.9415078739023313e-05, "loss": 0.0723, "step": 19700 }, { "epoch": 1.1657893180339505, "grad_norm": 0.25717276334762573, "learning_rate": 2.941428387614509e-05, "loss": 0.0752, "step": 19710 }, { "epoch": 1.1663807890222984, "grad_norm": 0.33264732360839844, "learning_rate": 2.941348848430909e-05, "loss": 0.0828, "step": 19720 }, { "epoch": 1.1669722600106465, "grad_norm": 0.36206746101379395, "learning_rate": 2.94126925635445e-05, "loss": 0.0767, "step": 19730 }, { "epoch": 1.1675637309989946, "grad_norm": 0.2839219272136688, "learning_rate": 2.9411896113880525e-05, "loss": 0.0823, "step": 19740 }, { "epoch": 1.1681552019873425, "grad_norm": 0.37777432799339294, "learning_rate": 2.9411099135346396e-05, "loss": 0.0819, "step": 19750 }, { "epoch": 1.1687466729756906, "grad_norm": 0.24454940855503082, "learning_rate": 2.9410301627971367e-05, "loss": 0.0745, "step": 19760 }, { "epoch": 1.1693381439640387, "grad_norm": 0.19247059524059296, "learning_rate": 2.9409503591784686e-05, "loss": 0.0812, "step": 19770 }, { "epoch": 1.1699296149523866, "grad_norm": 0.2910480797290802, "learning_rate": 2.940870502681565e-05, "loss": 0.0896, "step": 19780 }, { "epoch": 1.1705210859407347, "grad_norm": 0.24662050604820251, "learning_rate": 2.9407905933093562e-05, "loss": 0.0747, "step": 19790 }, { "epoch": 1.1711125569290826, "grad_norm": 0.26801398396492004, "learning_rate": 2.9407106310647746e-05, "loss": 0.0833, "step": 19800 }, { "epoch": 1.1717040279174307, "grad_norm": 0.2985973358154297, "learning_rate": 2.940630615950754e-05, "loss": 0.0696, "step": 19810 }, { "epoch": 1.1722954989057786, "grad_norm": 0.4201258718967438, "learning_rate": 2.940550547970232e-05, "loss": 0.0887, "step": 19820 }, { "epoch": 1.1728869698941267, "grad_norm": 0.3056704103946686, "learning_rate": 2.9404704271261455e-05, "loss": 0.0834, "step": 19830 }, { "epoch": 1.1734784408824748, "grad_norm": 0.21569810807704926, "learning_rate": 2.9403902534214353e-05, "loss": 0.0771, "step": 19840 }, { "epoch": 1.1740699118708227, "grad_norm": 0.23975586891174316, "learning_rate": 2.9403100268590437e-05, "loss": 0.0828, "step": 19850 }, { "epoch": 1.1746613828591708, "grad_norm": 0.2811067998409271, "learning_rate": 2.9402297474419145e-05, "loss": 0.0797, "step": 19860 }, { "epoch": 1.1752528538475189, "grad_norm": 0.3170258104801178, "learning_rate": 2.9401494151729937e-05, "loss": 0.0919, "step": 19870 }, { "epoch": 1.1758443248358668, "grad_norm": 0.20965993404388428, "learning_rate": 2.940069030055229e-05, "loss": 0.0796, "step": 19880 }, { "epoch": 1.1764357958242149, "grad_norm": 0.25766000151634216, "learning_rate": 2.9399885920915706e-05, "loss": 0.0879, "step": 19890 }, { "epoch": 1.1770272668125628, "grad_norm": 0.2328053116798401, "learning_rate": 2.9399081012849702e-05, "loss": 0.0644, "step": 19900 }, { "epoch": 1.1776187378009109, "grad_norm": 0.4502614438533783, "learning_rate": 2.939827557638382e-05, "loss": 0.0853, "step": 19910 }, { "epoch": 1.178210208789259, "grad_norm": 0.3198412358760834, "learning_rate": 2.939746961154761e-05, "loss": 0.0961, "step": 19920 }, { "epoch": 1.1788016797776069, "grad_norm": 0.22751423716545105, "learning_rate": 2.9396663118370647e-05, "loss": 0.0821, "step": 19930 }, { "epoch": 1.179393150765955, "grad_norm": 0.2675306797027588, "learning_rate": 2.9395856096882538e-05, "loss": 0.0785, "step": 19940 }, { "epoch": 1.1799846217543029, "grad_norm": 0.25209662318229675, "learning_rate": 2.9395048547112882e-05, "loss": 0.084, "step": 19950 }, { "epoch": 1.180576092742651, "grad_norm": 0.27330052852630615, "learning_rate": 2.9394240469091332e-05, "loss": 0.0831, "step": 19960 }, { "epoch": 1.181167563730999, "grad_norm": 0.4576662480831146, "learning_rate": 2.9393431862847527e-05, "loss": 0.0923, "step": 19970 }, { "epoch": 1.181759034719347, "grad_norm": 0.21485188603401184, "learning_rate": 2.9392622728411144e-05, "loss": 0.0881, "step": 19980 }, { "epoch": 1.182350505707695, "grad_norm": 0.3671990931034088, "learning_rate": 2.9391813065811883e-05, "loss": 0.0815, "step": 19990 }, { "epoch": 1.1829419766960432, "grad_norm": 0.25003668665885925, "learning_rate": 2.9391002875079444e-05, "loss": 0.0676, "step": 20000 }, { "epoch": 1.183533447684391, "grad_norm": 0.3711579740047455, "learning_rate": 2.939019215624357e-05, "loss": 0.0757, "step": 20010 }, { "epoch": 1.1841249186727392, "grad_norm": 0.3646019399166107, "learning_rate": 2.9389380909334e-05, "loss": 0.0751, "step": 20020 }, { "epoch": 1.184716389661087, "grad_norm": 0.389131635427475, "learning_rate": 2.938856913438051e-05, "loss": 0.0829, "step": 20030 }, { "epoch": 1.1853078606494352, "grad_norm": 0.28194671869277954, "learning_rate": 2.9387756831412894e-05, "loss": 0.0729, "step": 20040 }, { "epoch": 1.185899331637783, "grad_norm": 0.29723018407821655, "learning_rate": 2.938694400046096e-05, "loss": 0.0665, "step": 20050 }, { "epoch": 1.1864908026261312, "grad_norm": 0.5553014278411865, "learning_rate": 2.938613064155453e-05, "loss": 0.064, "step": 20060 }, { "epoch": 1.1870822736144793, "grad_norm": 0.36046260595321655, "learning_rate": 2.938531675472345e-05, "loss": 0.0895, "step": 20070 }, { "epoch": 1.1876737446028272, "grad_norm": 0.5858955383300781, "learning_rate": 2.9384502339997597e-05, "loss": 0.0861, "step": 20080 }, { "epoch": 1.1882652155911753, "grad_norm": 0.23918431997299194, "learning_rate": 2.938368739740685e-05, "loss": 0.0833, "step": 20090 }, { "epoch": 1.1888566865795234, "grad_norm": 0.256050169467926, "learning_rate": 2.9382871926981118e-05, "loss": 0.0733, "step": 20100 }, { "epoch": 1.1894481575678713, "grad_norm": 0.3898162245750427, "learning_rate": 2.9382055928750327e-05, "loss": 0.0865, "step": 20110 }, { "epoch": 1.1900396285562194, "grad_norm": 0.2794979214668274, "learning_rate": 2.9381239402744416e-05, "loss": 0.0954, "step": 20120 }, { "epoch": 1.1906310995445673, "grad_norm": 0.19899627566337585, "learning_rate": 2.9380422348993354e-05, "loss": 0.0884, "step": 20130 }, { "epoch": 1.1912225705329154, "grad_norm": 0.37368425726890564, "learning_rate": 2.937960476752712e-05, "loss": 0.0659, "step": 20140 }, { "epoch": 1.1918140415212635, "grad_norm": 0.20688028633594513, "learning_rate": 2.9378786658375727e-05, "loss": 0.0726, "step": 20150 }, { "epoch": 1.1924055125096114, "grad_norm": 0.2508821189403534, "learning_rate": 2.9377968021569184e-05, "loss": 0.0663, "step": 20160 }, { "epoch": 1.1929969834979595, "grad_norm": 0.2476595789194107, "learning_rate": 2.9377148857137537e-05, "loss": 0.0909, "step": 20170 }, { "epoch": 1.1935884544863073, "grad_norm": 0.24759793281555176, "learning_rate": 2.9376329165110846e-05, "loss": 0.0955, "step": 20180 }, { "epoch": 1.1941799254746555, "grad_norm": 0.28663524985313416, "learning_rate": 2.9375508945519195e-05, "loss": 0.0728, "step": 20190 }, { "epoch": 1.1947713964630036, "grad_norm": 0.42192205786705017, "learning_rate": 2.937468819839268e-05, "loss": 0.0769, "step": 20200 }, { "epoch": 1.1953628674513515, "grad_norm": 0.4048394560813904, "learning_rate": 2.937386692376142e-05, "loss": 0.0777, "step": 20210 }, { "epoch": 1.1959543384396996, "grad_norm": 0.27875176072120667, "learning_rate": 2.9373045121655556e-05, "loss": 0.095, "step": 20220 }, { "epoch": 1.1965458094280477, "grad_norm": 0.26196807622909546, "learning_rate": 2.9372222792105242e-05, "loss": 0.0921, "step": 20230 }, { "epoch": 1.1971372804163956, "grad_norm": 0.3077414333820343, "learning_rate": 2.9371399935140656e-05, "loss": 0.0904, "step": 20240 }, { "epoch": 1.1977287514047437, "grad_norm": 0.27920275926589966, "learning_rate": 2.937057655079199e-05, "loss": 0.0772, "step": 20250 }, { "epoch": 1.1983202223930915, "grad_norm": 0.3282419741153717, "learning_rate": 2.936975263908947e-05, "loss": 0.0785, "step": 20260 }, { "epoch": 1.1989116933814397, "grad_norm": 0.21491001546382904, "learning_rate": 2.936892820006332e-05, "loss": 0.0791, "step": 20270 }, { "epoch": 1.1995031643697875, "grad_norm": 0.306201308965683, "learning_rate": 2.93681032337438e-05, "loss": 0.0767, "step": 20280 }, { "epoch": 1.2000946353581357, "grad_norm": 0.25892603397369385, "learning_rate": 2.9367277740161184e-05, "loss": 0.0722, "step": 20290 }, { "epoch": 1.2006861063464838, "grad_norm": 0.3208106756210327, "learning_rate": 2.9366451719345763e-05, "loss": 0.072, "step": 20300 }, { "epoch": 1.2012775773348316, "grad_norm": 0.30485400557518005, "learning_rate": 2.936562517132785e-05, "loss": 0.0652, "step": 20310 }, { "epoch": 1.2018690483231798, "grad_norm": 0.24308043718338013, "learning_rate": 2.9364798096137774e-05, "loss": 0.0925, "step": 20320 }, { "epoch": 1.2024605193115279, "grad_norm": 0.27552589774131775, "learning_rate": 2.9363970493805893e-05, "loss": 0.0892, "step": 20330 }, { "epoch": 1.2030519902998758, "grad_norm": 0.391776442527771, "learning_rate": 2.936314236436257e-05, "loss": 0.0761, "step": 20340 }, { "epoch": 1.2036434612882239, "grad_norm": 0.23775440454483032, "learning_rate": 2.9362313707838197e-05, "loss": 0.0751, "step": 20350 }, { "epoch": 1.2042349322765717, "grad_norm": 0.24946856498718262, "learning_rate": 2.9361484524263182e-05, "loss": 0.0699, "step": 20360 }, { "epoch": 1.2048264032649199, "grad_norm": 0.3208564519882202, "learning_rate": 2.936065481366796e-05, "loss": 0.086, "step": 20370 }, { "epoch": 1.205417874253268, "grad_norm": 0.4304056763648987, "learning_rate": 2.9359824576082973e-05, "loss": 0.0934, "step": 20380 }, { "epoch": 1.2060093452416158, "grad_norm": 0.22509275376796722, "learning_rate": 2.935899381153868e-05, "loss": 0.0832, "step": 20390 }, { "epoch": 1.206600816229964, "grad_norm": 0.2640911638736725, "learning_rate": 2.9358162520065584e-05, "loss": 0.0658, "step": 20400 }, { "epoch": 1.2071922872183118, "grad_norm": 0.3969402313232422, "learning_rate": 2.9357330701694182e-05, "loss": 0.0798, "step": 20410 }, { "epoch": 1.20778375820666, "grad_norm": 0.24805422127246857, "learning_rate": 2.9356498356455e-05, "loss": 0.0857, "step": 20420 }, { "epoch": 1.208375229195008, "grad_norm": 0.2152995765209198, "learning_rate": 2.9355665484378578e-05, "loss": 0.074, "step": 20430 }, { "epoch": 1.208966700183356, "grad_norm": 0.22731104493141174, "learning_rate": 2.9354832085495487e-05, "loss": 0.0794, "step": 20440 }, { "epoch": 1.209558171171704, "grad_norm": 0.2551000416278839, "learning_rate": 2.9353998159836308e-05, "loss": 0.0659, "step": 20450 }, { "epoch": 1.2101496421600522, "grad_norm": 0.48358863592147827, "learning_rate": 2.935316370743164e-05, "loss": 0.0902, "step": 20460 }, { "epoch": 1.2107411131484, "grad_norm": 0.3361690938472748, "learning_rate": 2.9352328728312108e-05, "loss": 0.0814, "step": 20470 }, { "epoch": 1.2113325841367482, "grad_norm": 0.3948379158973694, "learning_rate": 2.935149322250835e-05, "loss": 0.0886, "step": 20480 }, { "epoch": 1.211924055125096, "grad_norm": 0.22896696627140045, "learning_rate": 2.935065719005103e-05, "loss": 0.075, "step": 20490 }, { "epoch": 1.2125155261134442, "grad_norm": 0.27328401803970337, "learning_rate": 2.9349820630970827e-05, "loss": 0.0651, "step": 20500 }, { "epoch": 1.213106997101792, "grad_norm": 0.38891804218292236, "learning_rate": 2.934898354529844e-05, "loss": 0.0785, "step": 20510 }, { "epoch": 1.2136984680901401, "grad_norm": 0.2755261957645416, "learning_rate": 2.9348145933064584e-05, "loss": 0.0772, "step": 20520 }, { "epoch": 1.2142899390784883, "grad_norm": 0.24131572246551514, "learning_rate": 2.93473077943e-05, "loss": 0.0886, "step": 20530 }, { "epoch": 1.2148814100668361, "grad_norm": 0.39333492517471313, "learning_rate": 2.9346469129035444e-05, "loss": 0.0741, "step": 20540 }, { "epoch": 1.2154728810551843, "grad_norm": 0.2469291388988495, "learning_rate": 2.934562993730169e-05, "loss": 0.0687, "step": 20550 }, { "epoch": 1.2160643520435324, "grad_norm": 0.3304382264614105, "learning_rate": 2.9344790219129538e-05, "loss": 0.0824, "step": 20560 }, { "epoch": 1.2166558230318802, "grad_norm": 0.3828884959220886, "learning_rate": 2.9343949974549803e-05, "loss": 0.1012, "step": 20570 }, { "epoch": 1.2172472940202284, "grad_norm": 0.29157042503356934, "learning_rate": 2.9343109203593315e-05, "loss": 0.0903, "step": 20580 }, { "epoch": 1.2178387650085762, "grad_norm": 0.3177265226840973, "learning_rate": 2.934226790629093e-05, "loss": 0.0865, "step": 20590 }, { "epoch": 1.2184302359969243, "grad_norm": 0.210539773106575, "learning_rate": 2.9341426082673524e-05, "loss": 0.0752, "step": 20600 }, { "epoch": 1.2190217069852725, "grad_norm": 0.2705287039279938, "learning_rate": 2.9340583732771986e-05, "loss": 0.0803, "step": 20610 }, { "epoch": 1.2196131779736203, "grad_norm": 0.32092994451522827, "learning_rate": 2.9339740856617226e-05, "loss": 0.097, "step": 20620 }, { "epoch": 1.2202046489619685, "grad_norm": 0.2790767550468445, "learning_rate": 2.9338897454240174e-05, "loss": 0.0883, "step": 20630 }, { "epoch": 1.2207961199503163, "grad_norm": 0.2540844678878784, "learning_rate": 2.9338053525671782e-05, "loss": 0.0873, "step": 20640 }, { "epoch": 1.2213875909386644, "grad_norm": 0.15430191159248352, "learning_rate": 2.9337209070943027e-05, "loss": 0.0744, "step": 20650 }, { "epoch": 1.2219790619270126, "grad_norm": 0.28234246373176575, "learning_rate": 2.9336364090084882e-05, "loss": 0.0742, "step": 20660 }, { "epoch": 1.2225705329153604, "grad_norm": 0.449108749628067, "learning_rate": 2.9335518583128367e-05, "loss": 0.0921, "step": 20670 }, { "epoch": 1.2231620039037086, "grad_norm": 0.22715707123279572, "learning_rate": 2.933467255010451e-05, "loss": 0.0849, "step": 20680 }, { "epoch": 1.2237534748920567, "grad_norm": 0.24758711457252502, "learning_rate": 2.933382599104435e-05, "loss": 0.0853, "step": 20690 }, { "epoch": 1.2243449458804045, "grad_norm": 0.15920619666576385, "learning_rate": 2.9332978905978958e-05, "loss": 0.0764, "step": 20700 }, { "epoch": 1.2249364168687527, "grad_norm": 0.252042293548584, "learning_rate": 2.9332131294939415e-05, "loss": 0.0799, "step": 20710 }, { "epoch": 1.2255278878571005, "grad_norm": 0.36647486686706543, "learning_rate": 2.9331283157956837e-05, "loss": 0.0944, "step": 20720 }, { "epoch": 1.2261193588454486, "grad_norm": 0.38669320940971375, "learning_rate": 2.933043449506233e-05, "loss": 0.0876, "step": 20730 }, { "epoch": 1.2267108298337965, "grad_norm": 0.33968669176101685, "learning_rate": 2.9329585306287053e-05, "loss": 0.0856, "step": 20740 }, { "epoch": 1.2273023008221446, "grad_norm": 0.20202665030956268, "learning_rate": 2.932873559166216e-05, "loss": 0.074, "step": 20750 }, { "epoch": 1.2278937718104928, "grad_norm": 0.41714924573898315, "learning_rate": 2.9327885351218837e-05, "loss": 0.0713, "step": 20760 }, { "epoch": 1.2284852427988406, "grad_norm": 0.3663497269153595, "learning_rate": 2.932703458498828e-05, "loss": 0.0906, "step": 20770 }, { "epoch": 1.2290767137871887, "grad_norm": 0.25420475006103516, "learning_rate": 2.9326183293001718e-05, "loss": 0.085, "step": 20780 }, { "epoch": 1.2296681847755369, "grad_norm": 0.26633211970329285, "learning_rate": 2.9325331475290377e-05, "loss": 0.0842, "step": 20790 }, { "epoch": 1.2302596557638847, "grad_norm": 0.24461530148983002, "learning_rate": 2.9324479131885533e-05, "loss": 0.0843, "step": 20800 }, { "epoch": 1.2308511267522328, "grad_norm": 0.23560373485088348, "learning_rate": 2.9323626262818453e-05, "loss": 0.074, "step": 20810 }, { "epoch": 1.2314425977405807, "grad_norm": 0.29467764496803284, "learning_rate": 2.9322772868120432e-05, "loss": 0.0896, "step": 20820 }, { "epoch": 1.2320340687289288, "grad_norm": 0.16699965298175812, "learning_rate": 2.9321918947822797e-05, "loss": 0.0806, "step": 20830 }, { "epoch": 1.232625539717277, "grad_norm": 0.22940713167190552, "learning_rate": 2.932106450195688e-05, "loss": 0.0768, "step": 20840 }, { "epoch": 1.2332170107056248, "grad_norm": 0.2752224802970886, "learning_rate": 2.932020953055403e-05, "loss": 0.0815, "step": 20850 }, { "epoch": 1.233808481693973, "grad_norm": 0.3283916413784027, "learning_rate": 2.931935403364563e-05, "loss": 0.0726, "step": 20860 }, { "epoch": 1.2343999526823208, "grad_norm": 0.23677848279476166, "learning_rate": 2.931849801126307e-05, "loss": 0.0889, "step": 20870 }, { "epoch": 1.234991423670669, "grad_norm": 0.2839066684246063, "learning_rate": 2.931764146343777e-05, "loss": 0.0852, "step": 20880 }, { "epoch": 1.235582894659017, "grad_norm": 0.24223487079143524, "learning_rate": 2.9316784390201146e-05, "loss": 0.0785, "step": 20890 }, { "epoch": 1.236174365647365, "grad_norm": 0.22278349101543427, "learning_rate": 2.9315926791584672e-05, "loss": 0.0661, "step": 20900 }, { "epoch": 1.236765836635713, "grad_norm": 0.33210816979408264, "learning_rate": 2.9315068667619804e-05, "loss": 0.0872, "step": 20910 }, { "epoch": 1.2373573076240612, "grad_norm": 0.2762989401817322, "learning_rate": 2.9314210018338035e-05, "loss": 0.0982, "step": 20920 }, { "epoch": 1.237948778612409, "grad_norm": 0.3192777931690216, "learning_rate": 2.9313350843770874e-05, "loss": 0.0866, "step": 20930 }, { "epoch": 1.2385402496007571, "grad_norm": 0.18164126574993134, "learning_rate": 2.9312491143949854e-05, "loss": 0.0772, "step": 20940 }, { "epoch": 1.239131720589105, "grad_norm": 0.23046953976154327, "learning_rate": 2.9311630918906523e-05, "loss": 0.0844, "step": 20950 }, { "epoch": 1.2397231915774531, "grad_norm": 0.19877156615257263, "learning_rate": 2.9310770168672444e-05, "loss": 0.0769, "step": 20960 }, { "epoch": 1.240314662565801, "grad_norm": 0.24802422523498535, "learning_rate": 2.930990889327921e-05, "loss": 0.0811, "step": 20970 }, { "epoch": 1.2409061335541491, "grad_norm": 0.3128206729888916, "learning_rate": 2.9309047092758417e-05, "loss": 0.088, "step": 20980 }, { "epoch": 1.2414976045424972, "grad_norm": 0.24826304614543915, "learning_rate": 2.93081847671417e-05, "loss": 0.0843, "step": 20990 }, { "epoch": 1.2420890755308451, "grad_norm": 0.2904874384403229, "learning_rate": 2.9307321916460698e-05, "loss": 0.0793, "step": 21000 }, { "epoch": 1.2426805465191932, "grad_norm": 0.2979776859283447, "learning_rate": 2.9306458540747084e-05, "loss": 0.0869, "step": 21010 }, { "epoch": 1.2432720175075413, "grad_norm": 0.24323789775371552, "learning_rate": 2.9305594640032525e-05, "loss": 0.0899, "step": 21020 }, { "epoch": 1.2438634884958892, "grad_norm": 0.37783217430114746, "learning_rate": 2.9304730214348738e-05, "loss": 0.0903, "step": 21030 }, { "epoch": 1.2444549594842373, "grad_norm": 0.27388548851013184, "learning_rate": 2.930386526372744e-05, "loss": 0.0878, "step": 21040 }, { "epoch": 1.2450464304725852, "grad_norm": 0.3358969986438751, "learning_rate": 2.9302999788200367e-05, "loss": 0.0805, "step": 21050 }, { "epoch": 1.2456379014609333, "grad_norm": 0.3015676736831665, "learning_rate": 2.9302133787799285e-05, "loss": 0.0771, "step": 21060 }, { "epoch": 1.2462293724492814, "grad_norm": 0.2542024850845337, "learning_rate": 2.930126726255597e-05, "loss": 0.0943, "step": 21070 }, { "epoch": 1.2468208434376293, "grad_norm": 0.39418360590934753, "learning_rate": 2.9300400212502225e-05, "loss": 0.086, "step": 21080 }, { "epoch": 1.2474123144259774, "grad_norm": 0.24463015794754028, "learning_rate": 2.929953263766987e-05, "loss": 0.08, "step": 21090 }, { "epoch": 1.2480037854143253, "grad_norm": 0.26904770731925964, "learning_rate": 2.9298664538090727e-05, "loss": 0.0798, "step": 21100 }, { "epoch": 1.2485952564026734, "grad_norm": 0.27346354722976685, "learning_rate": 2.929779591379667e-05, "loss": 0.0715, "step": 21110 }, { "epoch": 1.2491867273910215, "grad_norm": 0.24264125525951385, "learning_rate": 2.9296926764819565e-05, "loss": 0.0931, "step": 21120 }, { "epoch": 1.2497781983793694, "grad_norm": 0.30334052443504333, "learning_rate": 2.929605709119131e-05, "loss": 0.0895, "step": 21130 }, { "epoch": 1.2503696693677175, "grad_norm": 0.22954250872135162, "learning_rate": 2.929518689294382e-05, "loss": 0.0743, "step": 21140 }, { "epoch": 1.2509611403560656, "grad_norm": 0.2561001777648926, "learning_rate": 2.9294316170109026e-05, "loss": 0.0722, "step": 21150 }, { "epoch": 1.2515526113444135, "grad_norm": 0.3823665678501129, "learning_rate": 2.929344492271888e-05, "loss": 0.0816, "step": 21160 }, { "epoch": 1.2521440823327616, "grad_norm": 0.22664594650268555, "learning_rate": 2.9292573150805357e-05, "loss": 0.0961, "step": 21170 }, { "epoch": 1.2527355533211095, "grad_norm": 0.23325590789318085, "learning_rate": 2.929170085440045e-05, "loss": 0.084, "step": 21180 }, { "epoch": 1.2533270243094576, "grad_norm": 0.3455829918384552, "learning_rate": 2.9290828033536163e-05, "loss": 0.0782, "step": 21190 }, { "epoch": 1.2539184952978055, "grad_norm": 0.3483183979988098, "learning_rate": 2.9289954688244536e-05, "loss": 0.0769, "step": 21200 }, { "epoch": 1.2545099662861536, "grad_norm": 0.4314415156841278, "learning_rate": 2.9289080818557606e-05, "loss": 0.0705, "step": 21210 }, { "epoch": 1.2551014372745017, "grad_norm": 0.21312116086483002, "learning_rate": 2.9288206424507446e-05, "loss": 0.0874, "step": 21220 }, { "epoch": 1.2556929082628496, "grad_norm": 0.25624531507492065, "learning_rate": 2.9287331506126145e-05, "loss": 0.0834, "step": 21230 }, { "epoch": 1.2562843792511977, "grad_norm": 0.21749022603034973, "learning_rate": 2.928645606344581e-05, "loss": 0.0733, "step": 21240 }, { "epoch": 1.2568758502395458, "grad_norm": 0.36586377024650574, "learning_rate": 2.9285580096498562e-05, "loss": 0.076, "step": 21250 }, { "epoch": 1.2574673212278937, "grad_norm": 0.560189962387085, "learning_rate": 2.9284703605316557e-05, "loss": 0.0736, "step": 21260 }, { "epoch": 1.2580587922162418, "grad_norm": 0.3558400273323059, "learning_rate": 2.928382658993195e-05, "loss": 0.083, "step": 21270 }, { "epoch": 1.25865026320459, "grad_norm": 0.23025022447109222, "learning_rate": 2.928294905037692e-05, "loss": 0.0811, "step": 21280 }, { "epoch": 1.2592417341929378, "grad_norm": 0.236318439245224, "learning_rate": 2.9282070986683686e-05, "loss": 0.0818, "step": 21290 }, { "epoch": 1.2598332051812857, "grad_norm": 0.20606474578380585, "learning_rate": 2.9281192398884452e-05, "loss": 0.0712, "step": 21300 }, { "epoch": 1.2604246761696338, "grad_norm": 0.323366641998291, "learning_rate": 2.9280313287011473e-05, "loss": 0.0843, "step": 21310 }, { "epoch": 1.261016147157982, "grad_norm": 0.5441957712173462, "learning_rate": 2.9279433651097005e-05, "loss": 0.0874, "step": 21320 }, { "epoch": 1.2616076181463298, "grad_norm": 0.4490743577480316, "learning_rate": 2.9278553491173328e-05, "loss": 0.08, "step": 21330 }, { "epoch": 1.262199089134678, "grad_norm": 0.29009920358657837, "learning_rate": 2.9277672807272736e-05, "loss": 0.0801, "step": 21340 }, { "epoch": 1.262790560123026, "grad_norm": 0.2919093370437622, "learning_rate": 2.9276791599427554e-05, "loss": 0.0725, "step": 21350 }, { "epoch": 1.263382031111374, "grad_norm": 0.43508628010749817, "learning_rate": 2.9275909867670122e-05, "loss": 0.0742, "step": 21360 }, { "epoch": 1.263973502099722, "grad_norm": 0.29329726099967957, "learning_rate": 2.9275027612032788e-05, "loss": 0.091, "step": 21370 }, { "epoch": 1.2645649730880701, "grad_norm": 0.3024733364582062, "learning_rate": 2.927414483254793e-05, "loss": 0.0793, "step": 21380 }, { "epoch": 1.265156444076418, "grad_norm": 0.2626588046550751, "learning_rate": 2.9273261529247947e-05, "loss": 0.0849, "step": 21390 }, { "epoch": 1.2657479150647661, "grad_norm": 0.2891884744167328, "learning_rate": 2.9272377702165253e-05, "loss": 0.0616, "step": 21400 }, { "epoch": 1.266339386053114, "grad_norm": 0.3896363377571106, "learning_rate": 2.927149335133228e-05, "loss": 0.0808, "step": 21410 }, { "epoch": 1.2669308570414621, "grad_norm": 0.39382031559944153, "learning_rate": 2.9270608476781476e-05, "loss": 0.0837, "step": 21420 }, { "epoch": 1.26752232802981, "grad_norm": 0.25524744391441345, "learning_rate": 2.9269723078545323e-05, "loss": 0.0802, "step": 21430 }, { "epoch": 1.2681137990181581, "grad_norm": 0.21598152816295624, "learning_rate": 2.9268837156656305e-05, "loss": 0.0822, "step": 21440 }, { "epoch": 1.2687052700065062, "grad_norm": 0.23708386719226837, "learning_rate": 2.9267950711146936e-05, "loss": 0.0674, "step": 21450 }, { "epoch": 1.2692967409948541, "grad_norm": 0.2627578675746918, "learning_rate": 2.9267063742049748e-05, "loss": 0.0749, "step": 21460 }, { "epoch": 1.2698882119832022, "grad_norm": 0.2072814404964447, "learning_rate": 2.926617624939728e-05, "loss": 0.0957, "step": 21470 }, { "epoch": 1.2704796829715503, "grad_norm": 0.33153653144836426, "learning_rate": 2.9265288233222112e-05, "loss": 0.0885, "step": 21480 }, { "epoch": 1.2710711539598982, "grad_norm": 0.23076030611991882, "learning_rate": 2.926439969355682e-05, "loss": 0.0739, "step": 21490 }, { "epoch": 1.2716626249482463, "grad_norm": 0.28019610047340393, "learning_rate": 2.9263510630434018e-05, "loss": 0.0758, "step": 21500 }, { "epoch": 1.2722540959365944, "grad_norm": 0.3180229961872101, "learning_rate": 2.9262621043886333e-05, "loss": 0.0761, "step": 21510 }, { "epoch": 1.2728455669249423, "grad_norm": 0.28313642740249634, "learning_rate": 2.9261730933946405e-05, "loss": 0.0848, "step": 21520 }, { "epoch": 1.2734370379132902, "grad_norm": 0.2657395303249359, "learning_rate": 2.92608403006469e-05, "loss": 0.0791, "step": 21530 }, { "epoch": 1.2740285089016383, "grad_norm": 0.28778067231178284, "learning_rate": 2.9259949144020502e-05, "loss": 0.0866, "step": 21540 }, { "epoch": 1.2746199798899864, "grad_norm": 0.26010453701019287, "learning_rate": 2.9259057464099914e-05, "loss": 0.0707, "step": 21550 }, { "epoch": 1.2752114508783343, "grad_norm": 0.3895326554775238, "learning_rate": 2.9258165260917855e-05, "loss": 0.0768, "step": 21560 }, { "epoch": 1.2758029218666824, "grad_norm": 0.3349877893924713, "learning_rate": 2.9257272534507072e-05, "loss": 0.1055, "step": 21570 }, { "epoch": 1.2763943928550305, "grad_norm": 0.2711276412010193, "learning_rate": 2.9256379284900317e-05, "loss": 0.0849, "step": 21580 }, { "epoch": 1.2769858638433784, "grad_norm": 0.41593995690345764, "learning_rate": 2.9255485512130374e-05, "loss": 0.0883, "step": 21590 }, { "epoch": 1.2775773348317265, "grad_norm": 0.2516036033630371, "learning_rate": 2.9254591216230044e-05, "loss": 0.0775, "step": 21600 }, { "epoch": 1.2781688058200746, "grad_norm": 0.3309241831302643, "learning_rate": 2.9253696397232136e-05, "loss": 0.0871, "step": 21610 }, { "epoch": 1.2787602768084225, "grad_norm": 0.27369722723960876, "learning_rate": 2.92528010551695e-05, "loss": 0.0817, "step": 21620 }, { "epoch": 1.2793517477967706, "grad_norm": 0.2956319749355316, "learning_rate": 2.925190519007498e-05, "loss": 0.0829, "step": 21630 }, { "epoch": 1.2799432187851185, "grad_norm": 0.3379030227661133, "learning_rate": 2.9251008801981457e-05, "loss": 0.0692, "step": 21640 }, { "epoch": 1.2805346897734666, "grad_norm": 0.32336750626564026, "learning_rate": 2.9250111890921824e-05, "loss": 0.073, "step": 21650 }, { "epoch": 1.2811261607618145, "grad_norm": 0.22586104273796082, "learning_rate": 2.9249214456929e-05, "loss": 0.0787, "step": 21660 }, { "epoch": 1.2817176317501626, "grad_norm": 0.3281586170196533, "learning_rate": 2.924831650003591e-05, "loss": 0.0788, "step": 21670 }, { "epoch": 1.2823091027385107, "grad_norm": 0.4255973994731903, "learning_rate": 2.9247418020275512e-05, "loss": 0.0826, "step": 21680 }, { "epoch": 1.2829005737268586, "grad_norm": 0.3311843276023865, "learning_rate": 2.924651901768077e-05, "loss": 0.0867, "step": 21690 }, { "epoch": 1.2834920447152067, "grad_norm": 0.23109906911849976, "learning_rate": 2.924561949228469e-05, "loss": 0.0718, "step": 21700 }, { "epoch": 1.2840835157035548, "grad_norm": 0.3229973316192627, "learning_rate": 2.924471944412026e-05, "loss": 0.084, "step": 21710 }, { "epoch": 1.2846749866919027, "grad_norm": 0.3422892093658447, "learning_rate": 2.924381887322052e-05, "loss": 0.0927, "step": 21720 }, { "epoch": 1.2852664576802508, "grad_norm": 1.336753249168396, "learning_rate": 2.9242917779618527e-05, "loss": 0.0839, "step": 21730 }, { "epoch": 1.285857928668599, "grad_norm": 0.28827324509620667, "learning_rate": 2.9242016163347335e-05, "loss": 0.0773, "step": 21740 }, { "epoch": 1.2864493996569468, "grad_norm": 0.55047208070755, "learning_rate": 2.9241114024440033e-05, "loss": 0.0648, "step": 21750 }, { "epoch": 1.2870408706452947, "grad_norm": 0.2722717225551605, "learning_rate": 2.924021136292973e-05, "loss": 0.0667, "step": 21760 }, { "epoch": 1.2876323416336428, "grad_norm": 0.3907991051673889, "learning_rate": 2.923930817884955e-05, "loss": 0.0891, "step": 21770 }, { "epoch": 1.288223812621991, "grad_norm": 0.28837621212005615, "learning_rate": 2.9238404472232637e-05, "loss": 0.0752, "step": 21780 }, { "epoch": 1.2888152836103388, "grad_norm": 0.3020334243774414, "learning_rate": 2.9237500243112153e-05, "loss": 0.0867, "step": 21790 }, { "epoch": 1.289406754598687, "grad_norm": 0.24361446499824524, "learning_rate": 2.923659549152128e-05, "loss": 0.0734, "step": 21800 }, { "epoch": 1.289998225587035, "grad_norm": 0.2887633740901947, "learning_rate": 2.9235690217493217e-05, "loss": 0.0916, "step": 21810 }, { "epoch": 1.290589696575383, "grad_norm": 0.18337130546569824, "learning_rate": 2.923478442106119e-05, "loss": 0.0798, "step": 21820 }, { "epoch": 1.291181167563731, "grad_norm": 0.20508386194705963, "learning_rate": 2.9233878102258433e-05, "loss": 0.0821, "step": 21830 }, { "epoch": 1.2917726385520791, "grad_norm": 0.18352572619915009, "learning_rate": 2.923297126111821e-05, "loss": 0.0682, "step": 21840 }, { "epoch": 1.292364109540427, "grad_norm": 0.27331387996673584, "learning_rate": 2.9232063897673804e-05, "loss": 0.0656, "step": 21850 }, { "epoch": 1.2929555805287751, "grad_norm": 0.3849876821041107, "learning_rate": 2.9231156011958498e-05, "loss": 0.0669, "step": 21860 }, { "epoch": 1.293547051517123, "grad_norm": 0.34906113147735596, "learning_rate": 2.9230247604005617e-05, "loss": 0.0834, "step": 21870 }, { "epoch": 1.2941385225054711, "grad_norm": 0.27434855699539185, "learning_rate": 2.92293386738485e-05, "loss": 0.0892, "step": 21880 }, { "epoch": 1.294729993493819, "grad_norm": 0.26754748821258545, "learning_rate": 2.9228429221520497e-05, "loss": 0.0794, "step": 21890 }, { "epoch": 1.2953214644821671, "grad_norm": 0.2330232560634613, "learning_rate": 2.9227519247054984e-05, "loss": 0.064, "step": 21900 }, { "epoch": 1.2959129354705152, "grad_norm": 0.36616024374961853, "learning_rate": 2.9226608750485348e-05, "loss": 0.0953, "step": 21910 }, { "epoch": 1.2965044064588631, "grad_norm": 0.2532069981098175, "learning_rate": 2.9225697731845007e-05, "loss": 0.0825, "step": 21920 }, { "epoch": 1.2970958774472112, "grad_norm": 0.2812981903553009, "learning_rate": 2.9224786191167395e-05, "loss": 0.0873, "step": 21930 }, { "epoch": 1.2976873484355593, "grad_norm": 0.34394609928131104, "learning_rate": 2.922387412848596e-05, "loss": 0.0842, "step": 21940 }, { "epoch": 1.2982788194239072, "grad_norm": 0.2937251329421997, "learning_rate": 2.9222961543834168e-05, "loss": 0.0707, "step": 21950 }, { "epoch": 1.2988702904122553, "grad_norm": 0.3248669505119324, "learning_rate": 2.9222048437245513e-05, "loss": 0.0713, "step": 21960 }, { "epoch": 1.2994617614006034, "grad_norm": 0.3074268400669098, "learning_rate": 2.9221134808753496e-05, "loss": 0.0865, "step": 21970 }, { "epoch": 1.3000532323889513, "grad_norm": 0.18960897624492645, "learning_rate": 2.9220220658391652e-05, "loss": 0.0774, "step": 21980 }, { "epoch": 1.3006447033772992, "grad_norm": 0.24076619744300842, "learning_rate": 2.9219305986193525e-05, "loss": 0.0774, "step": 21990 }, { "epoch": 1.3012361743656473, "grad_norm": 0.2616594731807709, "learning_rate": 2.9218390792192684e-05, "loss": 0.0689, "step": 22000 }, { "epoch": 1.3018276453539954, "grad_norm": 0.3906490206718445, "learning_rate": 2.9217475076422705e-05, "loss": 0.0673, "step": 22010 }, { "epoch": 1.3024191163423433, "grad_norm": 0.28497427701950073, "learning_rate": 2.9216558838917195e-05, "loss": 0.1018, "step": 22020 }, { "epoch": 1.3030105873306914, "grad_norm": 0.324411004781723, "learning_rate": 2.9215642079709785e-05, "loss": 0.0911, "step": 22030 }, { "epoch": 1.3036020583190395, "grad_norm": 0.28426289558410645, "learning_rate": 2.9214724798834105e-05, "loss": 0.0896, "step": 22040 }, { "epoch": 1.3041935293073874, "grad_norm": 0.207442507147789, "learning_rate": 2.9213806996323827e-05, "loss": 0.0681, "step": 22050 }, { "epoch": 1.3047850002957355, "grad_norm": 0.27853384613990784, "learning_rate": 2.921288867221262e-05, "loss": 0.0759, "step": 22060 }, { "epoch": 1.3053764712840836, "grad_norm": 0.2529630661010742, "learning_rate": 2.9211969826534194e-05, "loss": 0.0857, "step": 22070 }, { "epoch": 1.3059679422724315, "grad_norm": 0.27132290601730347, "learning_rate": 2.9211050459322263e-05, "loss": 0.0953, "step": 22080 }, { "epoch": 1.3065594132607796, "grad_norm": 0.21091541647911072, "learning_rate": 2.9210130570610564e-05, "loss": 0.0863, "step": 22090 }, { "epoch": 1.3071508842491275, "grad_norm": 0.3435342609882355, "learning_rate": 2.9209210160432857e-05, "loss": 0.0679, "step": 22100 }, { "epoch": 1.3077423552374756, "grad_norm": 0.26723024249076843, "learning_rate": 2.9208289228822918e-05, "loss": 0.0831, "step": 22110 }, { "epoch": 1.3083338262258235, "grad_norm": 0.3549343943595886, "learning_rate": 2.9207367775814536e-05, "loss": 0.0937, "step": 22120 }, { "epoch": 1.3089252972141716, "grad_norm": 0.1855117678642273, "learning_rate": 2.9206445801441533e-05, "loss": 0.1038, "step": 22130 }, { "epoch": 1.3095167682025197, "grad_norm": 0.21145416796207428, "learning_rate": 2.9205523305737738e-05, "loss": 0.0793, "step": 22140 }, { "epoch": 1.3101082391908676, "grad_norm": 0.1863199770450592, "learning_rate": 2.9204600288737006e-05, "loss": 0.0693, "step": 22150 }, { "epoch": 1.3106997101792157, "grad_norm": 0.32054775953292847, "learning_rate": 2.9203676750473208e-05, "loss": 0.0819, "step": 22160 }, { "epoch": 1.3112911811675638, "grad_norm": 0.36954885721206665, "learning_rate": 2.9202752690980235e-05, "loss": 0.0909, "step": 22170 }, { "epoch": 1.3118826521559117, "grad_norm": 0.4393022358417511, "learning_rate": 2.9201828110291996e-05, "loss": 0.0946, "step": 22180 }, { "epoch": 1.3124741231442598, "grad_norm": 0.22962810099124908, "learning_rate": 2.9200903008442423e-05, "loss": 0.073, "step": 22190 }, { "epoch": 1.313065594132608, "grad_norm": 0.23853257298469543, "learning_rate": 2.919997738546546e-05, "loss": 0.0706, "step": 22200 }, { "epoch": 1.3136570651209558, "grad_norm": 0.3615471124649048, "learning_rate": 2.9199051241395075e-05, "loss": 0.0756, "step": 22210 }, { "epoch": 1.3142485361093037, "grad_norm": 0.3072773218154907, "learning_rate": 2.919812457626526e-05, "loss": 0.0795, "step": 22220 }, { "epoch": 1.3148400070976518, "grad_norm": 0.3055395781993866, "learning_rate": 2.9197197390110013e-05, "loss": 0.0886, "step": 22230 }, { "epoch": 1.315431478086, "grad_norm": 0.29420092701911926, "learning_rate": 2.9196269682963362e-05, "loss": 0.0864, "step": 22240 }, { "epoch": 1.3160229490743478, "grad_norm": 0.23795561492443085, "learning_rate": 2.919534145485935e-05, "loss": 0.0787, "step": 22250 }, { "epoch": 1.316614420062696, "grad_norm": 0.40031588077545166, "learning_rate": 2.9194412705832046e-05, "loss": 0.077, "step": 22260 }, { "epoch": 1.317205891051044, "grad_norm": 0.24321489036083221, "learning_rate": 2.9193483435915526e-05, "loss": 0.0922, "step": 22270 }, { "epoch": 1.317797362039392, "grad_norm": 0.22535431385040283, "learning_rate": 2.9192553645143895e-05, "loss": 0.0795, "step": 22280 }, { "epoch": 1.31838883302774, "grad_norm": 0.21220308542251587, "learning_rate": 2.9191623333551264e-05, "loss": 0.0757, "step": 22290 }, { "epoch": 1.3189803040160881, "grad_norm": 0.34057313203811646, "learning_rate": 2.9190692501171783e-05, "loss": 0.0722, "step": 22300 }, { "epoch": 1.319571775004436, "grad_norm": 0.3472253382205963, "learning_rate": 2.9189761148039607e-05, "loss": 0.0847, "step": 22310 }, { "epoch": 1.3201632459927841, "grad_norm": 0.24485459923744202, "learning_rate": 2.9188829274188912e-05, "loss": 0.1033, "step": 22320 }, { "epoch": 1.320754716981132, "grad_norm": 0.29137709736824036, "learning_rate": 2.91878968796539e-05, "loss": 0.0852, "step": 22330 }, { "epoch": 1.3213461879694801, "grad_norm": 0.3172985017299652, "learning_rate": 2.918696396446878e-05, "loss": 0.0893, "step": 22340 }, { "epoch": 1.321937658957828, "grad_norm": 0.19637930393218994, "learning_rate": 2.918603052866779e-05, "loss": 0.0836, "step": 22350 }, { "epoch": 1.322529129946176, "grad_norm": 0.2576524615287781, "learning_rate": 2.9185096572285182e-05, "loss": 0.0703, "step": 22360 }, { "epoch": 1.3231206009345242, "grad_norm": 0.26654699444770813, "learning_rate": 2.9184162095355236e-05, "loss": 0.0912, "step": 22370 }, { "epoch": 1.323712071922872, "grad_norm": 0.24477635324001312, "learning_rate": 2.9183227097912237e-05, "loss": 0.0895, "step": 22380 }, { "epoch": 1.3243035429112202, "grad_norm": 0.27838197350502014, "learning_rate": 2.91822915799905e-05, "loss": 0.0691, "step": 22390 }, { "epoch": 1.3248950138995683, "grad_norm": 0.2614823877811432, "learning_rate": 2.9181355541624354e-05, "loss": 0.0695, "step": 22400 }, { "epoch": 1.3254864848879162, "grad_norm": 0.3041316866874695, "learning_rate": 2.918041898284815e-05, "loss": 0.0728, "step": 22410 }, { "epoch": 1.3260779558762643, "grad_norm": 0.35545647144317627, "learning_rate": 2.9179481903696255e-05, "loss": 0.0988, "step": 22420 }, { "epoch": 1.3266694268646124, "grad_norm": 0.4133315980434418, "learning_rate": 2.917854430420306e-05, "loss": 0.0798, "step": 22430 }, { "epoch": 1.3272608978529603, "grad_norm": 0.20991329848766327, "learning_rate": 2.9177606184402965e-05, "loss": 0.0732, "step": 22440 }, { "epoch": 1.3278523688413084, "grad_norm": 0.2603638768196106, "learning_rate": 2.9176667544330404e-05, "loss": 0.0864, "step": 22450 }, { "epoch": 1.3284438398296563, "grad_norm": 0.3072459101676941, "learning_rate": 2.9175728384019818e-05, "loss": 0.0702, "step": 22460 }, { "epoch": 1.3290353108180044, "grad_norm": 0.27183201909065247, "learning_rate": 2.9174788703505672e-05, "loss": 0.0935, "step": 22470 }, { "epoch": 1.3296267818063523, "grad_norm": 0.4419783651828766, "learning_rate": 2.917384850282245e-05, "loss": 0.0903, "step": 22480 }, { "epoch": 1.3302182527947004, "grad_norm": 0.3345002830028534, "learning_rate": 2.9172907782004648e-05, "loss": 0.0833, "step": 22490 }, { "epoch": 1.3308097237830485, "grad_norm": 0.1996384859085083, "learning_rate": 2.91719665410868e-05, "loss": 0.0664, "step": 22500 }, { "epoch": 1.3314011947713964, "grad_norm": 0.2663660943508148, "learning_rate": 2.9171024780103434e-05, "loss": 0.0721, "step": 22510 }, { "epoch": 1.3319926657597445, "grad_norm": 0.34870555996894836, "learning_rate": 2.9170082499089117e-05, "loss": 0.086, "step": 22520 }, { "epoch": 1.3325841367480926, "grad_norm": 0.2871217131614685, "learning_rate": 2.9169139698078424e-05, "loss": 0.0854, "step": 22530 }, { "epoch": 1.3331756077364405, "grad_norm": 0.2635180950164795, "learning_rate": 2.9168196377105955e-05, "loss": 0.0962, "step": 22540 }, { "epoch": 1.3337670787247886, "grad_norm": 0.23301883041858673, "learning_rate": 2.9167252536206326e-05, "loss": 0.0779, "step": 22550 }, { "epoch": 1.3343585497131365, "grad_norm": 0.2403072565793991, "learning_rate": 2.9166308175414175e-05, "loss": 0.0806, "step": 22560 }, { "epoch": 1.3349500207014846, "grad_norm": 0.41287466883659363, "learning_rate": 2.916536329476415e-05, "loss": 0.0937, "step": 22570 }, { "epoch": 1.3355414916898325, "grad_norm": 0.2873689830303192, "learning_rate": 2.9164417894290936e-05, "loss": 0.087, "step": 22580 }, { "epoch": 1.3361329626781806, "grad_norm": 0.23839373886585236, "learning_rate": 2.9163471974029214e-05, "loss": 0.0801, "step": 22590 }, { "epoch": 1.3367244336665287, "grad_norm": 0.33009073138237, "learning_rate": 2.9162525534013706e-05, "loss": 0.0796, "step": 22600 }, { "epoch": 1.3373159046548766, "grad_norm": 0.3074011504650116, "learning_rate": 2.9161578574279138e-05, "loss": 0.0715, "step": 22610 }, { "epoch": 1.3379073756432247, "grad_norm": 0.2656671404838562, "learning_rate": 2.9160631094860268e-05, "loss": 0.0921, "step": 22620 }, { "epoch": 1.3384988466315728, "grad_norm": 0.2100301831960678, "learning_rate": 2.9159683095791855e-05, "loss": 0.0966, "step": 22630 }, { "epoch": 1.3390903176199207, "grad_norm": 0.23908312618732452, "learning_rate": 2.9158734577108696e-05, "loss": 0.0827, "step": 22640 }, { "epoch": 1.3396817886082688, "grad_norm": 0.35226577520370483, "learning_rate": 2.915778553884559e-05, "loss": 0.0712, "step": 22650 }, { "epoch": 1.340273259596617, "grad_norm": 0.507988452911377, "learning_rate": 2.915683598103737e-05, "loss": 0.0905, "step": 22660 }, { "epoch": 1.3408647305849648, "grad_norm": 0.2682386040687561, "learning_rate": 2.9155885903718878e-05, "loss": 0.0869, "step": 22670 }, { "epoch": 1.341456201573313, "grad_norm": 0.3229106068611145, "learning_rate": 2.915493530692498e-05, "loss": 0.0891, "step": 22680 }, { "epoch": 1.3420476725616608, "grad_norm": 0.4209747314453125, "learning_rate": 2.9153984190690564e-05, "loss": 0.0799, "step": 22690 }, { "epoch": 1.342639143550009, "grad_norm": 0.4753105044364929, "learning_rate": 2.915303255505053e-05, "loss": 0.0736, "step": 22700 }, { "epoch": 1.3432306145383568, "grad_norm": 0.4175238311290741, "learning_rate": 2.9152080400039797e-05, "loss": 0.0791, "step": 22710 }, { "epoch": 1.343822085526705, "grad_norm": 0.27466076612472534, "learning_rate": 2.915112772569331e-05, "loss": 0.1013, "step": 22720 }, { "epoch": 1.344413556515053, "grad_norm": 0.2286314070224762, "learning_rate": 2.9150174532046026e-05, "loss": 0.0837, "step": 22730 }, { "epoch": 1.345005027503401, "grad_norm": 0.3145163357257843, "learning_rate": 2.9149220819132925e-05, "loss": 0.0878, "step": 22740 }, { "epoch": 1.345596498491749, "grad_norm": 0.26326292753219604, "learning_rate": 2.914826658698901e-05, "loss": 0.0753, "step": 22750 }, { "epoch": 1.3461879694800971, "grad_norm": 0.22757557034492493, "learning_rate": 2.914731183564929e-05, "loss": 0.0805, "step": 22760 }, { "epoch": 1.346779440468445, "grad_norm": 0.2653678059577942, "learning_rate": 2.9146356565148808e-05, "loss": 0.0954, "step": 22770 }, { "epoch": 1.347370911456793, "grad_norm": 0.40158361196517944, "learning_rate": 2.9145400775522618e-05, "loss": 0.0888, "step": 22780 }, { "epoch": 1.347962382445141, "grad_norm": 0.19501306116580963, "learning_rate": 2.9144444466805792e-05, "loss": 0.0795, "step": 22790 }, { "epoch": 1.348553853433489, "grad_norm": 0.2360030859708786, "learning_rate": 2.9143487639033423e-05, "loss": 0.0664, "step": 22800 }, { "epoch": 1.349145324421837, "grad_norm": 0.39113089442253113, "learning_rate": 2.9142530292240627e-05, "loss": 0.074, "step": 22810 }, { "epoch": 1.349736795410185, "grad_norm": 0.267241507768631, "learning_rate": 2.914157242646253e-05, "loss": 0.0834, "step": 22820 }, { "epoch": 1.3503282663985332, "grad_norm": 0.2754008173942566, "learning_rate": 2.9140614041734294e-05, "loss": 0.0864, "step": 22830 }, { "epoch": 1.350919737386881, "grad_norm": 0.2691642940044403, "learning_rate": 2.913965513809108e-05, "loss": 0.0987, "step": 22840 }, { "epoch": 1.3515112083752292, "grad_norm": 0.2318229377269745, "learning_rate": 2.913869571556807e-05, "loss": 0.0622, "step": 22850 }, { "epoch": 1.3521026793635773, "grad_norm": 0.3408474326133728, "learning_rate": 2.9137735774200483e-05, "loss": 0.076, "step": 22860 }, { "epoch": 1.3526941503519252, "grad_norm": 0.36699235439300537, "learning_rate": 2.9136775314023544e-05, "loss": 0.0885, "step": 22870 }, { "epoch": 1.3532856213402733, "grad_norm": 0.31899940967559814, "learning_rate": 2.9135814335072497e-05, "loss": 0.0851, "step": 22880 }, { "epoch": 1.3538770923286214, "grad_norm": 0.17593620717525482, "learning_rate": 2.9134852837382603e-05, "loss": 0.0848, "step": 22890 }, { "epoch": 1.3544685633169693, "grad_norm": 0.25284650921821594, "learning_rate": 2.9133890820989152e-05, "loss": 0.0684, "step": 22900 }, { "epoch": 1.3550600343053174, "grad_norm": 0.29678091406822205, "learning_rate": 2.9132928285927444e-05, "loss": 0.0781, "step": 22910 }, { "epoch": 1.3556515052936653, "grad_norm": 0.3187030851840973, "learning_rate": 2.91319652322328e-05, "loss": 0.0881, "step": 22920 }, { "epoch": 1.3562429762820134, "grad_norm": 0.33558425307273865, "learning_rate": 2.913100165994057e-05, "loss": 0.0804, "step": 22930 }, { "epoch": 1.3568344472703613, "grad_norm": 0.23844169080257416, "learning_rate": 2.9130037569086094e-05, "loss": 0.0821, "step": 22940 }, { "epoch": 1.3574259182587094, "grad_norm": 0.2821294963359833, "learning_rate": 2.912907295970477e-05, "loss": 0.0661, "step": 22950 }, { "epoch": 1.3580173892470575, "grad_norm": 0.325705349445343, "learning_rate": 2.912810783183199e-05, "loss": 0.0845, "step": 22960 }, { "epoch": 1.3586088602354054, "grad_norm": 0.27407771348953247, "learning_rate": 2.912714218550317e-05, "loss": 0.0903, "step": 22970 }, { "epoch": 1.3592003312237535, "grad_norm": 0.22974953055381775, "learning_rate": 2.9126176020753747e-05, "loss": 0.0896, "step": 22980 }, { "epoch": 1.3597918022121016, "grad_norm": 0.3038245737552643, "learning_rate": 2.9125209337619176e-05, "loss": 0.0921, "step": 22990 }, { "epoch": 1.3603832732004495, "grad_norm": 0.30204445123672485, "learning_rate": 2.9124242136134928e-05, "loss": 0.0799, "step": 23000 }, { "epoch": 1.3609747441887976, "grad_norm": 0.22251738607883453, "learning_rate": 2.9123274416336506e-05, "loss": 0.0825, "step": 23010 }, { "epoch": 1.3615662151771455, "grad_norm": 0.32170993089675903, "learning_rate": 2.9122306178259406e-05, "loss": 0.099, "step": 23020 }, { "epoch": 1.3621576861654936, "grad_norm": 0.24463686347007751, "learning_rate": 2.9121337421939173e-05, "loss": 0.0856, "step": 23030 }, { "epoch": 1.3627491571538415, "grad_norm": 0.2763751447200775, "learning_rate": 2.9120368147411355e-05, "loss": 0.0734, "step": 23040 }, { "epoch": 1.3633406281421896, "grad_norm": 0.18502481281757355, "learning_rate": 2.9119398354711515e-05, "loss": 0.0747, "step": 23050 }, { "epoch": 1.3639320991305377, "grad_norm": 0.2492811381816864, "learning_rate": 2.911842804387525e-05, "loss": 0.0772, "step": 23060 }, { "epoch": 1.3645235701188856, "grad_norm": 0.3575737774372101, "learning_rate": 2.9117457214938157e-05, "loss": 0.0874, "step": 23070 }, { "epoch": 1.3651150411072337, "grad_norm": 0.29859188199043274, "learning_rate": 2.911648586793587e-05, "loss": 0.081, "step": 23080 }, { "epoch": 1.3657065120955818, "grad_norm": 0.16729451715946198, "learning_rate": 2.9115514002904036e-05, "loss": 0.0797, "step": 23090 }, { "epoch": 1.3662979830839297, "grad_norm": 0.22657711803913116, "learning_rate": 2.911454161987831e-05, "loss": 0.0742, "step": 23100 }, { "epoch": 1.3668894540722778, "grad_norm": 0.3024768531322479, "learning_rate": 2.9113568718894383e-05, "loss": 0.077, "step": 23110 }, { "epoch": 1.367480925060626, "grad_norm": 0.36197978258132935, "learning_rate": 2.9112595299987956e-05, "loss": 0.0957, "step": 23120 }, { "epoch": 1.3680723960489738, "grad_norm": 0.48049652576446533, "learning_rate": 2.911162136319475e-05, "loss": 0.0883, "step": 23130 }, { "epoch": 1.368663867037322, "grad_norm": 0.22693835198879242, "learning_rate": 2.9110646908550502e-05, "loss": 0.0858, "step": 23140 }, { "epoch": 1.3692553380256698, "grad_norm": 0.3059140145778656, "learning_rate": 2.910967193609098e-05, "loss": 0.0934, "step": 23150 }, { "epoch": 1.369846809014018, "grad_norm": 0.3193203806877136, "learning_rate": 2.9108696445851947e-05, "loss": 0.072, "step": 23160 }, { "epoch": 1.3704382800023658, "grad_norm": 0.29991045594215393, "learning_rate": 2.910772043786922e-05, "loss": 0.0936, "step": 23170 }, { "epoch": 1.371029750990714, "grad_norm": 0.32271626591682434, "learning_rate": 2.9106743912178598e-05, "loss": 0.095, "step": 23180 }, { "epoch": 1.371621221979062, "grad_norm": 0.23675476014614105, "learning_rate": 2.910576686881593e-05, "loss": 0.0651, "step": 23190 }, { "epoch": 1.3722126929674099, "grad_norm": 0.18949252367019653, "learning_rate": 2.9104789307817058e-05, "loss": 0.0634, "step": 23200 }, { "epoch": 1.372804163955758, "grad_norm": 0.2776506245136261, "learning_rate": 2.910381122921786e-05, "loss": 0.0759, "step": 23210 }, { "epoch": 1.373395634944106, "grad_norm": 0.27336469292640686, "learning_rate": 2.9102832633054237e-05, "loss": 0.0931, "step": 23220 }, { "epoch": 1.373987105932454, "grad_norm": 0.17465288937091827, "learning_rate": 2.910185351936209e-05, "loss": 0.0889, "step": 23230 }, { "epoch": 1.374578576920802, "grad_norm": 0.2795037627220154, "learning_rate": 2.910087388817735e-05, "loss": 0.0729, "step": 23240 }, { "epoch": 1.37517004790915, "grad_norm": 0.4954911172389984, "learning_rate": 2.9099893739535965e-05, "loss": 0.0729, "step": 23250 }, { "epoch": 1.375761518897498, "grad_norm": 0.513812243938446, "learning_rate": 2.9098913073473917e-05, "loss": 0.0843, "step": 23260 }, { "epoch": 1.376352989885846, "grad_norm": 0.2534746527671814, "learning_rate": 2.9097931890027174e-05, "loss": 0.0842, "step": 23270 }, { "epoch": 1.376944460874194, "grad_norm": 0.2504163086414337, "learning_rate": 2.9096950189231757e-05, "loss": 0.084, "step": 23280 }, { "epoch": 1.3775359318625422, "grad_norm": 0.2822531461715698, "learning_rate": 2.909596797112368e-05, "loss": 0.088, "step": 23290 }, { "epoch": 1.37812740285089, "grad_norm": 0.30269113183021545, "learning_rate": 2.9094985235739e-05, "loss": 0.0857, "step": 23300 }, { "epoch": 1.3787188738392382, "grad_norm": 0.2717697322368622, "learning_rate": 2.9094001983113766e-05, "loss": 0.0833, "step": 23310 }, { "epoch": 1.3793103448275863, "grad_norm": 0.2133883535861969, "learning_rate": 2.9093018213284075e-05, "loss": 0.0974, "step": 23320 }, { "epoch": 1.3799018158159342, "grad_norm": 0.22112144529819489, "learning_rate": 2.9092033926286014e-05, "loss": 0.0835, "step": 23330 }, { "epoch": 1.3804932868042823, "grad_norm": 0.30217593908309937, "learning_rate": 2.909104912215571e-05, "loss": 0.0978, "step": 23340 }, { "epoch": 1.3810847577926304, "grad_norm": 0.2720113694667816, "learning_rate": 2.9090063800929306e-05, "loss": 0.0861, "step": 23350 }, { "epoch": 1.3816762287809783, "grad_norm": 0.3004179000854492, "learning_rate": 2.9089077962642956e-05, "loss": 0.0814, "step": 23360 }, { "epoch": 1.3822676997693264, "grad_norm": 0.3403264284133911, "learning_rate": 2.9088091607332834e-05, "loss": 0.0987, "step": 23370 }, { "epoch": 1.3828591707576743, "grad_norm": 0.29011252522468567, "learning_rate": 2.908710473503514e-05, "loss": 0.0932, "step": 23380 }, { "epoch": 1.3834506417460224, "grad_norm": 0.2832266390323639, "learning_rate": 2.9086117345786085e-05, "loss": 0.0859, "step": 23390 }, { "epoch": 1.3840421127343703, "grad_norm": 0.35785990953445435, "learning_rate": 2.908512943962191e-05, "loss": 0.0701, "step": 23400 }, { "epoch": 1.3846335837227184, "grad_norm": 0.3094034194946289, "learning_rate": 2.908414101657886e-05, "loss": 0.0844, "step": 23410 }, { "epoch": 1.3852250547110665, "grad_norm": 0.3175751566886902, "learning_rate": 2.9083152076693218e-05, "loss": 0.078, "step": 23420 }, { "epoch": 1.3858165256994144, "grad_norm": 0.4987969398498535, "learning_rate": 2.9082162620001263e-05, "loss": 0.1067, "step": 23430 }, { "epoch": 1.3864079966877625, "grad_norm": 0.2950889766216278, "learning_rate": 2.9081172646539312e-05, "loss": 0.0797, "step": 23440 }, { "epoch": 1.3869994676761106, "grad_norm": 0.2327333390712738, "learning_rate": 2.9080182156343692e-05, "loss": 0.0758, "step": 23450 }, { "epoch": 1.3875909386644585, "grad_norm": 0.3496612012386322, "learning_rate": 2.9079191149450747e-05, "loss": 0.0823, "step": 23460 }, { "epoch": 1.3881824096528066, "grad_norm": 0.30393901467323303, "learning_rate": 2.907819962589685e-05, "loss": 0.0817, "step": 23470 }, { "epoch": 1.3887738806411545, "grad_norm": 0.328605979681015, "learning_rate": 2.9077207585718383e-05, "loss": 0.0753, "step": 23480 }, { "epoch": 1.3893653516295026, "grad_norm": 0.24817006289958954, "learning_rate": 2.907621502895175e-05, "loss": 0.0729, "step": 23490 }, { "epoch": 1.3899568226178505, "grad_norm": 0.20940034091472626, "learning_rate": 2.907522195563338e-05, "loss": 0.0679, "step": 23500 }, { "epoch": 1.3905482936061986, "grad_norm": 0.32009151577949524, "learning_rate": 2.9074228365799706e-05, "loss": 0.0728, "step": 23510 }, { "epoch": 1.3911397645945467, "grad_norm": 0.22112223505973816, "learning_rate": 2.90732342594872e-05, "loss": 0.0947, "step": 23520 }, { "epoch": 1.3917312355828946, "grad_norm": 0.29622411727905273, "learning_rate": 2.9072239636732338e-05, "loss": 0.0784, "step": 23530 }, { "epoch": 1.3923227065712427, "grad_norm": 0.2310027927160263, "learning_rate": 2.907124449757162e-05, "loss": 0.09, "step": 23540 }, { "epoch": 1.3929141775595908, "grad_norm": 0.1826559603214264, "learning_rate": 2.9070248842041558e-05, "loss": 0.0709, "step": 23550 }, { "epoch": 1.3935056485479387, "grad_norm": 0.25800254940986633, "learning_rate": 2.90692526701787e-05, "loss": 0.0735, "step": 23560 }, { "epoch": 1.3940971195362868, "grad_norm": 0.28597453236579895, "learning_rate": 2.90682559820196e-05, "loss": 0.0847, "step": 23570 }, { "epoch": 1.394688590524635, "grad_norm": 0.28089994192123413, "learning_rate": 2.9067258777600824e-05, "loss": 0.0931, "step": 23580 }, { "epoch": 1.3952800615129828, "grad_norm": 0.24727469682693481, "learning_rate": 2.9066261056958974e-05, "loss": 0.0837, "step": 23590 }, { "epoch": 1.395871532501331, "grad_norm": 1.9808515310287476, "learning_rate": 2.9065262820130663e-05, "loss": 0.0749, "step": 23600 }, { "epoch": 1.3964630034896788, "grad_norm": 0.2612461745738983, "learning_rate": 2.9064264067152525e-05, "loss": 0.0764, "step": 23610 }, { "epoch": 1.3970544744780269, "grad_norm": 0.23766689002513885, "learning_rate": 2.9063264798061207e-05, "loss": 0.1003, "step": 23620 }, { "epoch": 1.3976459454663748, "grad_norm": 0.25766539573669434, "learning_rate": 2.9062265012893375e-05, "loss": 0.0768, "step": 23630 }, { "epoch": 1.3982374164547229, "grad_norm": 0.185215026140213, "learning_rate": 2.906126471168573e-05, "loss": 0.0824, "step": 23640 }, { "epoch": 1.398828887443071, "grad_norm": 0.2767346203327179, "learning_rate": 2.906026389447497e-05, "loss": 0.0667, "step": 23650 }, { "epoch": 1.3994203584314189, "grad_norm": 0.4220616817474365, "learning_rate": 2.905926256129782e-05, "loss": 0.0821, "step": 23660 }, { "epoch": 1.400011829419767, "grad_norm": 0.2590365409851074, "learning_rate": 2.9058260712191035e-05, "loss": 0.0817, "step": 23670 }, { "epoch": 1.400603300408115, "grad_norm": 0.19240209460258484, "learning_rate": 2.9057258347191374e-05, "loss": 0.0803, "step": 23680 }, { "epoch": 1.401194771396463, "grad_norm": 0.2880755066871643, "learning_rate": 2.905625546633562e-05, "loss": 0.0825, "step": 23690 }, { "epoch": 1.401786242384811, "grad_norm": 0.19638900458812714, "learning_rate": 2.905525206966058e-05, "loss": 0.0769, "step": 23700 }, { "epoch": 1.402377713373159, "grad_norm": 0.36756008863449097, "learning_rate": 2.905424815720307e-05, "loss": 0.0796, "step": 23710 }, { "epoch": 1.402969184361507, "grad_norm": 0.3500494360923767, "learning_rate": 2.9053243728999935e-05, "loss": 0.0992, "step": 23720 }, { "epoch": 1.403560655349855, "grad_norm": 0.2764095664024353, "learning_rate": 2.905223878508803e-05, "loss": 0.0819, "step": 23730 }, { "epoch": 1.404152126338203, "grad_norm": 0.20463374257087708, "learning_rate": 2.905123332550423e-05, "loss": 0.0711, "step": 23740 }, { "epoch": 1.4047435973265512, "grad_norm": 0.20663319528102875, "learning_rate": 2.9050227350285443e-05, "loss": 0.0675, "step": 23750 }, { "epoch": 1.405335068314899, "grad_norm": 0.34086737036705017, "learning_rate": 2.9049220859468578e-05, "loss": 0.0825, "step": 23760 }, { "epoch": 1.4059265393032472, "grad_norm": 0.36889517307281494, "learning_rate": 2.9048213853090574e-05, "loss": 0.0996, "step": 23770 }, { "epoch": 1.4065180102915953, "grad_norm": 0.3235318660736084, "learning_rate": 2.904720633118838e-05, "loss": 0.089, "step": 23780 }, { "epoch": 1.4071094812799432, "grad_norm": 0.3583332896232605, "learning_rate": 2.904619829379897e-05, "loss": 0.084, "step": 23790 }, { "epoch": 1.4077009522682913, "grad_norm": 0.27394190430641174, "learning_rate": 2.9045189740959337e-05, "loss": 0.0709, "step": 23800 }, { "epoch": 1.4082924232566394, "grad_norm": 0.3436741828918457, "learning_rate": 2.9044180672706493e-05, "loss": 0.0859, "step": 23810 }, { "epoch": 1.4088838942449873, "grad_norm": 0.2860000431537628, "learning_rate": 2.9043171089077465e-05, "loss": 0.0924, "step": 23820 }, { "epoch": 1.4094753652333354, "grad_norm": 0.25949347019195557, "learning_rate": 2.9042160990109303e-05, "loss": 0.0994, "step": 23830 }, { "epoch": 1.4100668362216833, "grad_norm": 0.26031145453453064, "learning_rate": 2.9041150375839075e-05, "loss": 0.088, "step": 23840 }, { "epoch": 1.4106583072100314, "grad_norm": 0.17178063094615936, "learning_rate": 2.904013924630386e-05, "loss": 0.079, "step": 23850 }, { "epoch": 1.4112497781983793, "grad_norm": 0.3879677355289459, "learning_rate": 2.9039127601540775e-05, "loss": 0.0836, "step": 23860 }, { "epoch": 1.4118412491867274, "grad_norm": 0.3190739154815674, "learning_rate": 2.9038115441586936e-05, "loss": 0.1236, "step": 23870 }, { "epoch": 1.4124327201750755, "grad_norm": 0.19187705218791962, "learning_rate": 2.9037102766479486e-05, "loss": 0.0786, "step": 23880 }, { "epoch": 1.4130241911634234, "grad_norm": 0.18643411993980408, "learning_rate": 2.9036089576255594e-05, "loss": 0.0781, "step": 23890 }, { "epoch": 1.4136156621517715, "grad_norm": 0.24650780856609344, "learning_rate": 2.9035075870952433e-05, "loss": 0.058, "step": 23900 }, { "epoch": 1.4142071331401196, "grad_norm": 0.24776890873908997, "learning_rate": 2.9034061650607207e-05, "loss": 0.0811, "step": 23910 }, { "epoch": 1.4147986041284675, "grad_norm": 0.17962007224559784, "learning_rate": 2.9033046915257132e-05, "loss": 0.0714, "step": 23920 }, { "epoch": 1.4153900751168156, "grad_norm": 0.2940641939640045, "learning_rate": 2.903203166493945e-05, "loss": 0.0914, "step": 23930 }, { "epoch": 1.4159815461051635, "grad_norm": 4.546075344085693, "learning_rate": 2.903101589969141e-05, "loss": 0.095, "step": 23940 }, { "epoch": 1.4165730170935116, "grad_norm": 0.19622208178043365, "learning_rate": 2.9029999619550295e-05, "loss": 0.0736, "step": 23950 }, { "epoch": 1.4171644880818595, "grad_norm": 0.25399908423423767, "learning_rate": 2.9028982824553398e-05, "loss": 0.08, "step": 23960 }, { "epoch": 1.4177559590702076, "grad_norm": 0.3059501647949219, "learning_rate": 2.9027965514738027e-05, "loss": 0.0924, "step": 23970 }, { "epoch": 1.4183474300585557, "grad_norm": 0.3289657235145569, "learning_rate": 2.9026947690141517e-05, "loss": 0.083, "step": 23980 }, { "epoch": 1.4189389010469036, "grad_norm": 0.23651061952114105, "learning_rate": 2.9025929350801218e-05, "loss": 0.0839, "step": 23990 }, { "epoch": 1.4195303720352517, "grad_norm": 0.22535105049610138, "learning_rate": 2.90249104967545e-05, "loss": 0.0832, "step": 24000 }, { "epoch": 1.4201218430235998, "grad_norm": 0.26407191157341003, "learning_rate": 2.9023891128038754e-05, "loss": 0.0763, "step": 24010 }, { "epoch": 1.4207133140119477, "grad_norm": 0.36456432938575745, "learning_rate": 2.9022871244691386e-05, "loss": 0.0968, "step": 24020 }, { "epoch": 1.4213047850002958, "grad_norm": 0.2294393628835678, "learning_rate": 2.9021850846749825e-05, "loss": 0.0843, "step": 24030 }, { "epoch": 1.4218962559886439, "grad_norm": 0.21623428165912628, "learning_rate": 2.9020829934251507e-05, "loss": 0.0884, "step": 24040 }, { "epoch": 1.4224877269769918, "grad_norm": 0.8087053298950195, "learning_rate": 2.9019808507233905e-05, "loss": 0.074, "step": 24050 }, { "epoch": 1.4230791979653399, "grad_norm": 0.37782397866249084, "learning_rate": 2.90187865657345e-05, "loss": 0.0716, "step": 24060 }, { "epoch": 1.4236706689536878, "grad_norm": 0.18069672584533691, "learning_rate": 2.9017764109790798e-05, "loss": 0.0945, "step": 24070 }, { "epoch": 1.4242621399420359, "grad_norm": 1.7422423362731934, "learning_rate": 2.901674113944031e-05, "loss": 0.0886, "step": 24080 }, { "epoch": 1.4248536109303838, "grad_norm": 0.20915158092975616, "learning_rate": 2.9015717654720585e-05, "loss": 0.082, "step": 24090 }, { "epoch": 1.4254450819187319, "grad_norm": 0.23701195418834686, "learning_rate": 2.901469365566918e-05, "loss": 0.081, "step": 24100 }, { "epoch": 1.42603655290708, "grad_norm": 0.29659730195999146, "learning_rate": 2.9013669142323668e-05, "loss": 0.0625, "step": 24110 }, { "epoch": 1.4266280238954279, "grad_norm": 0.2895767092704773, "learning_rate": 2.9012644114721642e-05, "loss": 0.0937, "step": 24120 }, { "epoch": 1.427219494883776, "grad_norm": 0.34154126048088074, "learning_rate": 2.901161857290073e-05, "loss": 0.0761, "step": 24130 }, { "epoch": 1.427810965872124, "grad_norm": 0.20248724520206451, "learning_rate": 2.9010592516898554e-05, "loss": 0.08, "step": 24140 }, { "epoch": 1.428402436860472, "grad_norm": 0.2956783175468445, "learning_rate": 2.900956594675278e-05, "loss": 0.0775, "step": 24150 }, { "epoch": 1.42899390784882, "grad_norm": 0.3450738489627838, "learning_rate": 2.9008538862501062e-05, "loss": 0.0793, "step": 24160 }, { "epoch": 1.429585378837168, "grad_norm": 0.30057528614997864, "learning_rate": 2.900751126418111e-05, "loss": 0.0888, "step": 24170 }, { "epoch": 1.430176849825516, "grad_norm": 0.36087965965270996, "learning_rate": 2.9006483151830613e-05, "loss": 0.0893, "step": 24180 }, { "epoch": 1.430768320813864, "grad_norm": 0.27264660596847534, "learning_rate": 2.9005454525487317e-05, "loss": 0.0843, "step": 24190 }, { "epoch": 1.431359791802212, "grad_norm": 0.2545437514781952, "learning_rate": 2.9004425385188963e-05, "loss": 0.0823, "step": 24200 }, { "epoch": 1.4319512627905602, "grad_norm": 0.2809264361858368, "learning_rate": 2.900339573097332e-05, "loss": 0.0667, "step": 24210 }, { "epoch": 1.432542733778908, "grad_norm": 0.28703922033309937, "learning_rate": 2.9002365562878167e-05, "loss": 0.0982, "step": 24220 }, { "epoch": 1.4331342047672562, "grad_norm": 0.21560266613960266, "learning_rate": 2.900133488094131e-05, "loss": 0.077, "step": 24230 }, { "epoch": 1.4337256757556043, "grad_norm": 0.3359575569629669, "learning_rate": 2.900030368520057e-05, "loss": 0.0704, "step": 24240 }, { "epoch": 1.4343171467439522, "grad_norm": 0.1947898417711258, "learning_rate": 2.8999271975693792e-05, "loss": 0.0724, "step": 24250 }, { "epoch": 1.4349086177323003, "grad_norm": 0.33292466402053833, "learning_rate": 2.899823975245884e-05, "loss": 0.0809, "step": 24260 }, { "epoch": 1.4355000887206484, "grad_norm": 0.18848590552806854, "learning_rate": 2.8997207015533585e-05, "loss": 0.092, "step": 24270 }, { "epoch": 1.4360915597089963, "grad_norm": 0.20416322350502014, "learning_rate": 2.899617376495593e-05, "loss": 0.091, "step": 24280 }, { "epoch": 1.4366830306973444, "grad_norm": 0.2685832381248474, "learning_rate": 2.8995140000763786e-05, "loss": 0.0834, "step": 24290 }, { "epoch": 1.4372745016856923, "grad_norm": 0.24769675731658936, "learning_rate": 2.8994105722995097e-05, "loss": 0.0712, "step": 24300 }, { "epoch": 1.4378659726740404, "grad_norm": 0.3723134994506836, "learning_rate": 2.8993070931687815e-05, "loss": 0.0886, "step": 24310 }, { "epoch": 1.4384574436623883, "grad_norm": 0.301455557346344, "learning_rate": 2.899203562687991e-05, "loss": 0.0868, "step": 24320 }, { "epoch": 1.4390489146507364, "grad_norm": 0.2522510886192322, "learning_rate": 2.8990999808609383e-05, "loss": 0.0877, "step": 24330 }, { "epoch": 1.4396403856390845, "grad_norm": 0.2616371810436249, "learning_rate": 2.8989963476914238e-05, "loss": 0.0861, "step": 24340 }, { "epoch": 1.4402318566274324, "grad_norm": 0.13928189873695374, "learning_rate": 2.89889266318325e-05, "loss": 0.0658, "step": 24350 }, { "epoch": 1.4408233276157805, "grad_norm": 0.28191640973091125, "learning_rate": 2.898788927340223e-05, "loss": 0.082, "step": 24360 }, { "epoch": 1.4414147986041286, "grad_norm": 0.22441254556179047, "learning_rate": 2.8986851401661487e-05, "loss": 0.0835, "step": 24370 }, { "epoch": 1.4420062695924765, "grad_norm": 0.21616429090499878, "learning_rate": 2.898581301664836e-05, "loss": 0.0786, "step": 24380 }, { "epoch": 1.4425977405808246, "grad_norm": 0.2360030859708786, "learning_rate": 2.898477411840096e-05, "loss": 0.0913, "step": 24390 }, { "epoch": 1.4431892115691725, "grad_norm": 0.17373469471931458, "learning_rate": 2.89837347069574e-05, "loss": 0.0738, "step": 24400 }, { "epoch": 1.4437806825575206, "grad_norm": 0.2542229890823364, "learning_rate": 2.8982694782355832e-05, "loss": 0.0715, "step": 24410 }, { "epoch": 1.4443721535458685, "grad_norm": 0.250963032245636, "learning_rate": 2.8981654344634414e-05, "loss": 0.0894, "step": 24420 }, { "epoch": 1.4449636245342166, "grad_norm": 0.22512248158454895, "learning_rate": 2.8980613393831327e-05, "loss": 0.0881, "step": 24430 }, { "epoch": 1.4455550955225647, "grad_norm": 0.2522774934768677, "learning_rate": 2.897957192998477e-05, "loss": 0.0817, "step": 24440 }, { "epoch": 1.4461465665109126, "grad_norm": 0.2726389169692993, "learning_rate": 2.8978529953132968e-05, "loss": 0.0705, "step": 24450 }, { "epoch": 1.4467380374992607, "grad_norm": 0.23936805129051208, "learning_rate": 2.8977487463314147e-05, "loss": 0.0734, "step": 24460 }, { "epoch": 1.4473295084876088, "grad_norm": 0.25148290395736694, "learning_rate": 2.8976444460566572e-05, "loss": 0.0945, "step": 24470 }, { "epoch": 1.4479209794759567, "grad_norm": 0.317205548286438, "learning_rate": 2.8975400944928516e-05, "loss": 0.0753, "step": 24480 }, { "epoch": 1.4485124504643048, "grad_norm": 0.3132873773574829, "learning_rate": 2.897435691643827e-05, "loss": 0.09, "step": 24490 }, { "epoch": 1.4491039214526529, "grad_norm": 0.2174491137266159, "learning_rate": 2.8973312375134147e-05, "loss": 0.0689, "step": 24500 }, { "epoch": 1.4496953924410008, "grad_norm": 0.2698325514793396, "learning_rate": 2.897226732105448e-05, "loss": 0.0733, "step": 24510 }, { "epoch": 1.4502868634293489, "grad_norm": 0.289106547832489, "learning_rate": 2.8971221754237614e-05, "loss": 0.0839, "step": 24520 }, { "epoch": 1.4508783344176968, "grad_norm": 0.33012062311172485, "learning_rate": 2.8970175674721927e-05, "loss": 0.0895, "step": 24530 }, { "epoch": 1.4514698054060449, "grad_norm": 0.20294640958309174, "learning_rate": 2.89691290825458e-05, "loss": 0.0766, "step": 24540 }, { "epoch": 1.4520612763943928, "grad_norm": 0.3956138789653778, "learning_rate": 2.8968081977747644e-05, "loss": 0.0677, "step": 24550 }, { "epoch": 1.4526527473827409, "grad_norm": 0.26099687814712524, "learning_rate": 2.896703436036588e-05, "loss": 0.0765, "step": 24560 }, { "epoch": 1.453244218371089, "grad_norm": 0.22580376267433167, "learning_rate": 2.896598623043895e-05, "loss": 0.0788, "step": 24570 }, { "epoch": 1.4538356893594369, "grad_norm": 0.31483158469200134, "learning_rate": 2.8964937588005328e-05, "loss": 0.078, "step": 24580 }, { "epoch": 1.454427160347785, "grad_norm": 0.3252531886100769, "learning_rate": 2.8963888433103487e-05, "loss": 0.0832, "step": 24590 }, { "epoch": 1.455018631336133, "grad_norm": 0.282911092042923, "learning_rate": 2.8962838765771927e-05, "loss": 0.0753, "step": 24600 }, { "epoch": 1.455610102324481, "grad_norm": 0.1887507438659668, "learning_rate": 2.896178858604917e-05, "loss": 0.0709, "step": 24610 }, { "epoch": 1.456201573312829, "grad_norm": 0.20157001912593842, "learning_rate": 2.896073789397375e-05, "loss": 0.0798, "step": 24620 }, { "epoch": 1.456793044301177, "grad_norm": 0.37894460558891296, "learning_rate": 2.895968668958424e-05, "loss": 0.0949, "step": 24630 }, { "epoch": 1.457384515289525, "grad_norm": 0.279159814119339, "learning_rate": 2.8958634972919194e-05, "loss": 0.0807, "step": 24640 }, { "epoch": 1.457975986277873, "grad_norm": 0.18518033623695374, "learning_rate": 2.8957582744017216e-05, "loss": 0.0705, "step": 24650 }, { "epoch": 1.458567457266221, "grad_norm": 0.24535031616687775, "learning_rate": 2.8956530002916926e-05, "loss": 0.0759, "step": 24660 }, { "epoch": 1.4591589282545692, "grad_norm": 0.3250593841075897, "learning_rate": 2.8955476749656947e-05, "loss": 0.0888, "step": 24670 }, { "epoch": 1.459750399242917, "grad_norm": 0.2268904149532318, "learning_rate": 2.8954422984275936e-05, "loss": 0.0894, "step": 24680 }, { "epoch": 1.4603418702312652, "grad_norm": 0.23000016808509827, "learning_rate": 2.8953368706812554e-05, "loss": 0.0832, "step": 24690 }, { "epoch": 1.4609333412196133, "grad_norm": 0.2462313175201416, "learning_rate": 2.89523139173055e-05, "loss": 0.0803, "step": 24700 }, { "epoch": 1.4615248122079612, "grad_norm": 0.25295957922935486, "learning_rate": 2.8951258615793478e-05, "loss": 0.0799, "step": 24710 }, { "epoch": 1.4621162831963093, "grad_norm": 0.19940926134586334, "learning_rate": 2.8950202802315206e-05, "loss": 0.0921, "step": 24720 }, { "epoch": 1.4627077541846574, "grad_norm": 0.25467416644096375, "learning_rate": 2.8949146476909442e-05, "loss": 0.0873, "step": 24730 }, { "epoch": 1.4632992251730053, "grad_norm": 0.539389967918396, "learning_rate": 2.894808963961494e-05, "loss": 0.09, "step": 24740 }, { "epoch": 1.4638906961613534, "grad_norm": 0.21207265555858612, "learning_rate": 2.8947032290470488e-05, "loss": 0.0762, "step": 24750 }, { "epoch": 1.4644821671497013, "grad_norm": 0.24505196511745453, "learning_rate": 2.894597442951489e-05, "loss": 0.083, "step": 24760 }, { "epoch": 1.4650736381380494, "grad_norm": 0.28688478469848633, "learning_rate": 2.8944916056786957e-05, "loss": 0.0872, "step": 24770 }, { "epoch": 1.4656651091263972, "grad_norm": 0.25698918104171753, "learning_rate": 2.894385717232553e-05, "loss": 0.0903, "step": 24780 }, { "epoch": 1.4662565801147454, "grad_norm": 0.29510873556137085, "learning_rate": 2.894279777616947e-05, "loss": 0.0866, "step": 24790 }, { "epoch": 1.4668480511030935, "grad_norm": 1.8017829656600952, "learning_rate": 2.8941737868357658e-05, "loss": 0.0746, "step": 24800 }, { "epoch": 1.4674395220914414, "grad_norm": 0.22172819077968597, "learning_rate": 2.894067744892898e-05, "loss": 0.0781, "step": 24810 }, { "epoch": 1.4680309930797895, "grad_norm": 0.38896676898002625, "learning_rate": 2.893961651792235e-05, "loss": 0.0866, "step": 24820 }, { "epoch": 1.4686224640681376, "grad_norm": 0.2756063640117645, "learning_rate": 2.8938555075376715e-05, "loss": 0.101, "step": 24830 }, { "epoch": 1.4692139350564855, "grad_norm": 0.17455819249153137, "learning_rate": 2.8937493121331008e-05, "loss": 0.0824, "step": 24840 }, { "epoch": 1.4698054060448336, "grad_norm": 0.17877046763896942, "learning_rate": 2.8936430655824206e-05, "loss": 0.0705, "step": 24850 }, { "epoch": 1.4703968770331814, "grad_norm": 0.35156285762786865, "learning_rate": 2.8935367678895305e-05, "loss": 0.0824, "step": 24860 }, { "epoch": 1.4709883480215296, "grad_norm": 0.30160465836524963, "learning_rate": 2.8934304190583304e-05, "loss": 0.0846, "step": 24870 }, { "epoch": 1.4715798190098774, "grad_norm": 0.38168251514434814, "learning_rate": 2.8933240190927232e-05, "loss": 0.0985, "step": 24880 }, { "epoch": 1.4721712899982256, "grad_norm": 0.2823927402496338, "learning_rate": 2.8932175679966136e-05, "loss": 0.0877, "step": 24890 }, { "epoch": 1.4727627609865737, "grad_norm": 0.30881232023239136, "learning_rate": 2.893111065773908e-05, "loss": 0.0841, "step": 24900 }, { "epoch": 1.4733542319749215, "grad_norm": 0.2710370123386383, "learning_rate": 2.8930045124285147e-05, "loss": 0.0827, "step": 24910 }, { "epoch": 1.4739457029632697, "grad_norm": 0.24227558076381683, "learning_rate": 2.8928979079643438e-05, "loss": 0.0919, "step": 24920 }, { "epoch": 1.4745371739516178, "grad_norm": 0.22660906612873077, "learning_rate": 2.892791252385307e-05, "loss": 0.0778, "step": 24930 }, { "epoch": 1.4751286449399656, "grad_norm": 10.437958717346191, "learning_rate": 2.8926845456953186e-05, "loss": 0.0729, "step": 24940 }, { "epoch": 1.4757201159283138, "grad_norm": 0.3503447473049164, "learning_rate": 2.8925777878982946e-05, "loss": 0.0787, "step": 24950 }, { "epoch": 1.4763115869166619, "grad_norm": 0.2755274772644043, "learning_rate": 2.8924709789981523e-05, "loss": 0.0672, "step": 24960 }, { "epoch": 1.4769030579050098, "grad_norm": 0.33493316173553467, "learning_rate": 2.8923641189988112e-05, "loss": 0.0865, "step": 24970 }, { "epoch": 1.4774945288933579, "grad_norm": 0.21875622868537903, "learning_rate": 2.892257207904193e-05, "loss": 0.0796, "step": 24980 }, { "epoch": 1.4780859998817057, "grad_norm": 0.30097121000289917, "learning_rate": 2.8921502457182208e-05, "loss": 0.0735, "step": 24990 }, { "epoch": 1.4786774708700539, "grad_norm": 0.36131998896598816, "learning_rate": 2.8920432324448197e-05, "loss": 0.0638, "step": 25000 }, { "epoch": 1.4792689418584017, "grad_norm": 0.38696184754371643, "learning_rate": 2.891936168087917e-05, "loss": 0.0774, "step": 25010 }, { "epoch": 1.4798604128467499, "grad_norm": 0.28108328580856323, "learning_rate": 2.8918290526514413e-05, "loss": 0.0874, "step": 25020 }, { "epoch": 1.480451883835098, "grad_norm": 0.21860097348690033, "learning_rate": 2.8917218861393236e-05, "loss": 0.0944, "step": 25030 }, { "epoch": 1.4810433548234458, "grad_norm": 0.23821933567523956, "learning_rate": 2.891614668555497e-05, "loss": 0.0792, "step": 25040 }, { "epoch": 1.481634825811794, "grad_norm": 0.24258559942245483, "learning_rate": 2.891507399903895e-05, "loss": 0.0768, "step": 25050 }, { "epoch": 1.482226296800142, "grad_norm": 0.21447689831256866, "learning_rate": 2.8914000801884548e-05, "loss": 0.0734, "step": 25060 }, { "epoch": 1.48281776778849, "grad_norm": 0.2917690575122833, "learning_rate": 2.8912927094131144e-05, "loss": 0.1002, "step": 25070 }, { "epoch": 1.483409238776838, "grad_norm": 0.4488290548324585, "learning_rate": 2.8911852875818144e-05, "loss": 0.0867, "step": 25080 }, { "epoch": 1.484000709765186, "grad_norm": 0.2777525782585144, "learning_rate": 2.8910778146984957e-05, "loss": 0.073, "step": 25090 }, { "epoch": 1.484592180753534, "grad_norm": 0.25133609771728516, "learning_rate": 2.8909702907671037e-05, "loss": 0.0691, "step": 25100 }, { "epoch": 1.485183651741882, "grad_norm": 0.25835084915161133, "learning_rate": 2.8908627157915832e-05, "loss": 0.0727, "step": 25110 }, { "epoch": 1.48577512273023, "grad_norm": 0.234394833445549, "learning_rate": 2.8907550897758817e-05, "loss": 0.083, "step": 25120 }, { "epoch": 1.4863665937185782, "grad_norm": 0.2965814471244812, "learning_rate": 2.8906474127239498e-05, "loss": 0.0878, "step": 25130 }, { "epoch": 1.486958064706926, "grad_norm": 0.17419470846652985, "learning_rate": 2.8905396846397383e-05, "loss": 0.0758, "step": 25140 }, { "epoch": 1.4875495356952741, "grad_norm": 0.17521622776985168, "learning_rate": 2.8904319055272e-05, "loss": 0.0694, "step": 25150 }, { "epoch": 1.4881410066836223, "grad_norm": 0.2575691342353821, "learning_rate": 2.8903240753902905e-05, "loss": 0.0776, "step": 25160 }, { "epoch": 1.4887324776719701, "grad_norm": 0.32859188318252563, "learning_rate": 2.890216194232967e-05, "loss": 0.0965, "step": 25170 }, { "epoch": 1.4893239486603183, "grad_norm": 0.23825177550315857, "learning_rate": 2.890108262059188e-05, "loss": 0.0844, "step": 25180 }, { "epoch": 1.4899154196486664, "grad_norm": 0.2501539885997772, "learning_rate": 2.8900002788729142e-05, "loss": 0.0773, "step": 25190 }, { "epoch": 1.4905068906370142, "grad_norm": 0.26883506774902344, "learning_rate": 2.889892244678109e-05, "loss": 0.0838, "step": 25200 }, { "epoch": 1.4910983616253624, "grad_norm": 0.25209516286849976, "learning_rate": 2.889784159478736e-05, "loss": 0.0724, "step": 25210 }, { "epoch": 1.4916898326137102, "grad_norm": 0.25017935037612915, "learning_rate": 2.8896760232787624e-05, "loss": 0.0904, "step": 25220 }, { "epoch": 1.4922813036020584, "grad_norm": 0.35468435287475586, "learning_rate": 2.8895678360821556e-05, "loss": 0.082, "step": 25230 }, { "epoch": 1.4928727745904062, "grad_norm": 0.1857665777206421, "learning_rate": 2.8894595978928865e-05, "loss": 0.0831, "step": 25240 }, { "epoch": 1.4934642455787543, "grad_norm": 0.19951242208480835, "learning_rate": 2.8893513087149264e-05, "loss": 0.0772, "step": 25250 }, { "epoch": 1.4940557165671025, "grad_norm": 0.3285885155200958, "learning_rate": 2.889242968552249e-05, "loss": 0.0706, "step": 25260 }, { "epoch": 1.4946471875554503, "grad_norm": 0.24961206316947937, "learning_rate": 2.8891345774088312e-05, "loss": 0.078, "step": 25270 }, { "epoch": 1.4952386585437984, "grad_norm": 0.20167319476604462, "learning_rate": 2.8890261352886503e-05, "loss": 0.0752, "step": 25280 }, { "epoch": 1.4958301295321466, "grad_norm": 0.2106693983078003, "learning_rate": 2.8889176421956848e-05, "loss": 0.0853, "step": 25290 }, { "epoch": 1.4964216005204944, "grad_norm": 0.23926286399364471, "learning_rate": 2.8888090981339167e-05, "loss": 0.0715, "step": 25300 }, { "epoch": 1.4970130715088426, "grad_norm": 0.2556246817111969, "learning_rate": 2.888700503107329e-05, "loss": 0.0707, "step": 25310 }, { "epoch": 1.4976045424971904, "grad_norm": 0.2691529095172882, "learning_rate": 2.888591857119907e-05, "loss": 0.0881, "step": 25320 }, { "epoch": 1.4981960134855385, "grad_norm": 0.32988426089286804, "learning_rate": 2.888483160175638e-05, "loss": 0.0794, "step": 25330 }, { "epoch": 1.4987874844738864, "grad_norm": 0.3034648001194, "learning_rate": 2.88837441227851e-05, "loss": 0.0954, "step": 25340 }, { "epoch": 1.4993789554622345, "grad_norm": 0.25944504141807556, "learning_rate": 2.8882656134325145e-05, "loss": 0.0737, "step": 25350 }, { "epoch": 1.4999704264505827, "grad_norm": 0.30176156759262085, "learning_rate": 2.8881567636416434e-05, "loss": 0.0762, "step": 25360 }, { "epoch": 1.5005618974389305, "grad_norm": 0.28882089257240295, "learning_rate": 2.8880478629098918e-05, "loss": 0.0963, "step": 25370 }, { "epoch": 1.5011533684272786, "grad_norm": 0.2960374355316162, "learning_rate": 2.8879389112412554e-05, "loss": 0.0916, "step": 25380 }, { "epoch": 1.5017448394156268, "grad_norm": 0.25397223234176636, "learning_rate": 2.8878299086397325e-05, "loss": 0.0948, "step": 25390 }, { "epoch": 1.5023363104039746, "grad_norm": 0.25824278593063354, "learning_rate": 2.887720855109324e-05, "loss": 0.0694, "step": 25400 }, { "epoch": 1.5029277813923227, "grad_norm": 0.19423234462738037, "learning_rate": 2.88761175065403e-05, "loss": 0.0712, "step": 25410 }, { "epoch": 1.5035192523806709, "grad_norm": 0.2979271709918976, "learning_rate": 2.8875025952778562e-05, "loss": 0.082, "step": 25420 }, { "epoch": 1.5041107233690187, "grad_norm": 0.221957728266716, "learning_rate": 2.8873933889848072e-05, "loss": 0.0823, "step": 25430 }, { "epoch": 1.5047021943573666, "grad_norm": 0.23225772380828857, "learning_rate": 2.8872841317788904e-05, "loss": 0.0697, "step": 25440 }, { "epoch": 1.5052936653457147, "grad_norm": 0.20314069092273712, "learning_rate": 2.887174823664116e-05, "loss": 0.0782, "step": 25450 }, { "epoch": 1.5058851363340628, "grad_norm": 0.28477638959884644, "learning_rate": 2.8870654646444946e-05, "loss": 0.0667, "step": 25460 }, { "epoch": 1.5064766073224107, "grad_norm": 0.2893887162208557, "learning_rate": 2.886956054724039e-05, "loss": 0.0886, "step": 25470 }, { "epoch": 1.5070680783107588, "grad_norm": 0.22578442096710205, "learning_rate": 2.8868465939067655e-05, "loss": 0.0727, "step": 25480 }, { "epoch": 1.507659549299107, "grad_norm": 0.2739262580871582, "learning_rate": 2.8867370821966896e-05, "loss": 0.0902, "step": 25490 }, { "epoch": 1.5082510202874548, "grad_norm": 0.33137065172195435, "learning_rate": 2.8866275195978308e-05, "loss": 0.075, "step": 25500 }, { "epoch": 1.508842491275803, "grad_norm": 0.34751516580581665, "learning_rate": 2.8865179061142093e-05, "loss": 0.0711, "step": 25510 }, { "epoch": 1.509433962264151, "grad_norm": 0.48016417026519775, "learning_rate": 2.886408241749848e-05, "loss": 0.0872, "step": 25520 }, { "epoch": 1.510025433252499, "grad_norm": 0.21668638288974762, "learning_rate": 2.886298526508771e-05, "loss": 0.0792, "step": 25530 }, { "epoch": 1.5106169042408468, "grad_norm": 0.330017626285553, "learning_rate": 2.886188760395004e-05, "loss": 0.0715, "step": 25540 }, { "epoch": 1.5112083752291952, "grad_norm": 0.2145836502313614, "learning_rate": 2.8860789434125758e-05, "loss": 0.0788, "step": 25550 }, { "epoch": 1.511799846217543, "grad_norm": 0.3094559609889984, "learning_rate": 2.8859690755655162e-05, "loss": 0.0751, "step": 25560 }, { "epoch": 1.512391317205891, "grad_norm": 0.27866923809051514, "learning_rate": 2.8858591568578564e-05, "loss": 0.0943, "step": 25570 }, { "epoch": 1.512982788194239, "grad_norm": 0.30330196022987366, "learning_rate": 2.8857491872936307e-05, "loss": 0.0837, "step": 25580 }, { "epoch": 1.5135742591825871, "grad_norm": 0.22278471291065216, "learning_rate": 2.8856391668768744e-05, "loss": 0.0766, "step": 25590 }, { "epoch": 1.514165730170935, "grad_norm": 0.19442130625247955, "learning_rate": 2.8855290956116253e-05, "loss": 0.0713, "step": 25600 }, { "epoch": 1.5147572011592831, "grad_norm": 0.38117706775665283, "learning_rate": 2.8854189735019222e-05, "loss": 0.0824, "step": 25610 }, { "epoch": 1.5153486721476312, "grad_norm": 0.4601345360279083, "learning_rate": 2.885308800551806e-05, "loss": 0.0925, "step": 25620 }, { "epoch": 1.5159401431359791, "grad_norm": 0.2863464653491974, "learning_rate": 2.8851985767653204e-05, "loss": 0.083, "step": 25630 }, { "epoch": 1.5165316141243272, "grad_norm": 0.2643550634384155, "learning_rate": 2.8850883021465094e-05, "loss": 0.0728, "step": 25640 }, { "epoch": 1.5171230851126754, "grad_norm": 0.20228776335716248, "learning_rate": 2.8849779766994204e-05, "loss": 0.0683, "step": 25650 }, { "epoch": 1.5177145561010232, "grad_norm": 0.5430436134338379, "learning_rate": 2.884867600428102e-05, "loss": 0.0715, "step": 25660 }, { "epoch": 1.5183060270893711, "grad_norm": 0.27862808108329773, "learning_rate": 2.884757173336604e-05, "loss": 0.0875, "step": 25670 }, { "epoch": 1.5188974980777192, "grad_norm": 0.268477201461792, "learning_rate": 2.8846466954289798e-05, "loss": 0.0814, "step": 25680 }, { "epoch": 1.5194889690660673, "grad_norm": 0.2043159306049347, "learning_rate": 2.8845361667092822e-05, "loss": 0.0798, "step": 25690 }, { "epoch": 1.5200804400544152, "grad_norm": 0.3362095057964325, "learning_rate": 2.8844255871815682e-05, "loss": 0.0894, "step": 25700 }, { "epoch": 1.5206719110427633, "grad_norm": 0.24881064891815186, "learning_rate": 2.8843149568498962e-05, "loss": 0.0876, "step": 25710 }, { "epoch": 1.5212633820311114, "grad_norm": 0.22972163558006287, "learning_rate": 2.8842042757183244e-05, "loss": 0.0829, "step": 25720 }, { "epoch": 1.5218548530194593, "grad_norm": 0.2812427878379822, "learning_rate": 2.8840935437909155e-05, "loss": 0.0737, "step": 25730 }, { "epoch": 1.5224463240078074, "grad_norm": 0.19959071278572083, "learning_rate": 2.8839827610717333e-05, "loss": 0.0797, "step": 25740 }, { "epoch": 1.5230377949961555, "grad_norm": 0.34675133228302, "learning_rate": 2.8838719275648422e-05, "loss": 0.0688, "step": 25750 }, { "epoch": 1.5236292659845034, "grad_norm": 0.279104620218277, "learning_rate": 2.8837610432743102e-05, "loss": 0.067, "step": 25760 }, { "epoch": 1.5242207369728513, "grad_norm": 0.3460127115249634, "learning_rate": 2.883650108204206e-05, "loss": 0.09, "step": 25770 }, { "epoch": 1.5248122079611997, "grad_norm": 0.22155936062335968, "learning_rate": 2.8835391223586002e-05, "loss": 0.0902, "step": 25780 }, { "epoch": 1.5254036789495475, "grad_norm": 0.19669297337532043, "learning_rate": 2.883428085741567e-05, "loss": 0.0771, "step": 25790 }, { "epoch": 1.5259951499378954, "grad_norm": 0.24947574734687805, "learning_rate": 2.8833169983571794e-05, "loss": 0.0683, "step": 25800 }, { "epoch": 1.5265866209262435, "grad_norm": 0.34101223945617676, "learning_rate": 2.8832058602095152e-05, "loss": 0.091, "step": 25810 }, { "epoch": 1.5271780919145916, "grad_norm": 0.2508315145969391, "learning_rate": 2.8830946713026522e-05, "loss": 0.0973, "step": 25820 }, { "epoch": 1.5277695629029395, "grad_norm": 0.30286213755607605, "learning_rate": 2.8829834316406707e-05, "loss": 0.087, "step": 25830 }, { "epoch": 1.5283610338912876, "grad_norm": 0.18652908504009247, "learning_rate": 2.8828721412276534e-05, "loss": 0.0852, "step": 25840 }, { "epoch": 1.5289525048796357, "grad_norm": 0.2862434983253479, "learning_rate": 2.8827608000676834e-05, "loss": 0.0693, "step": 25850 }, { "epoch": 1.5295439758679836, "grad_norm": 0.99066162109375, "learning_rate": 2.882649408164847e-05, "loss": 0.0818, "step": 25860 }, { "epoch": 1.5301354468563317, "grad_norm": 0.2878056764602661, "learning_rate": 2.8825379655232324e-05, "loss": 0.0936, "step": 25870 }, { "epoch": 1.5307269178446798, "grad_norm": 0.9740822911262512, "learning_rate": 2.8824264721469284e-05, "loss": 0.0958, "step": 25880 }, { "epoch": 1.5313183888330277, "grad_norm": 0.20788346230983734, "learning_rate": 2.8823149280400265e-05, "loss": 0.0827, "step": 25890 }, { "epoch": 1.5319098598213756, "grad_norm": 0.26112043857574463, "learning_rate": 2.882203333206621e-05, "loss": 0.076, "step": 25900 }, { "epoch": 1.532501330809724, "grad_norm": 0.2624869644641876, "learning_rate": 2.8820916876508058e-05, "loss": 0.07, "step": 25910 }, { "epoch": 1.5330928017980718, "grad_norm": 0.26642489433288574, "learning_rate": 2.881979991376679e-05, "loss": 0.0822, "step": 25920 }, { "epoch": 1.5336842727864197, "grad_norm": 0.23014599084854126, "learning_rate": 2.881868244388338e-05, "loss": 0.084, "step": 25930 }, { "epoch": 1.5342757437747678, "grad_norm": 0.5993940234184265, "learning_rate": 2.8817564466898853e-05, "loss": 0.0712, "step": 25940 }, { "epoch": 1.534867214763116, "grad_norm": 0.2774715721607208, "learning_rate": 2.8816445982854227e-05, "loss": 0.0708, "step": 25950 }, { "epoch": 1.5354586857514638, "grad_norm": 0.35026293992996216, "learning_rate": 2.881532699179055e-05, "loss": 0.0773, "step": 25960 }, { "epoch": 1.536050156739812, "grad_norm": 0.27871862053871155, "learning_rate": 2.8814207493748876e-05, "loss": 0.0953, "step": 25970 }, { "epoch": 1.53664162772816, "grad_norm": 0.25329703092575073, "learning_rate": 2.88130874887703e-05, "loss": 0.0815, "step": 25980 }, { "epoch": 1.537233098716508, "grad_norm": 0.23667258024215698, "learning_rate": 2.881196697689591e-05, "loss": 0.0917, "step": 25990 }, { "epoch": 1.5378245697048558, "grad_norm": 0.23568439483642578, "learning_rate": 2.8810845958166836e-05, "loss": 0.0688, "step": 26000 }, { "epoch": 1.5384160406932041, "grad_norm": 0.3685014247894287, "learning_rate": 2.8809724432624208e-05, "loss": 0.0754, "step": 26010 }, { "epoch": 1.539007511681552, "grad_norm": 0.24632401764392853, "learning_rate": 2.8808602400309187e-05, "loss": 0.0875, "step": 26020 }, { "epoch": 1.5395989826699, "grad_norm": 0.2708096504211426, "learning_rate": 2.880747986126295e-05, "loss": 0.0846, "step": 26030 }, { "epoch": 1.540190453658248, "grad_norm": 5.202524185180664, "learning_rate": 2.8806356815526685e-05, "loss": 0.0844, "step": 26040 }, { "epoch": 1.5407819246465961, "grad_norm": 0.22329317033290863, "learning_rate": 2.88052332631416e-05, "loss": 0.0628, "step": 26050 }, { "epoch": 1.541373395634944, "grad_norm": 0.3066757321357727, "learning_rate": 2.880410920414894e-05, "loss": 0.0806, "step": 26060 }, { "epoch": 1.5419648666232921, "grad_norm": 0.22780433297157288, "learning_rate": 2.880298463858994e-05, "loss": 0.0864, "step": 26070 }, { "epoch": 1.5425563376116402, "grad_norm": 0.28007641434669495, "learning_rate": 2.8801859566505878e-05, "loss": 0.0808, "step": 26080 }, { "epoch": 1.5431478085999881, "grad_norm": 0.28541135787963867, "learning_rate": 2.8800733987938037e-05, "loss": 0.0837, "step": 26090 }, { "epoch": 1.5437392795883362, "grad_norm": 0.3992471396923065, "learning_rate": 2.8799607902927723e-05, "loss": 0.0827, "step": 26100 }, { "epoch": 1.5443307505766843, "grad_norm": 0.29137539863586426, "learning_rate": 2.8798481311516255e-05, "loss": 0.0771, "step": 26110 }, { "epoch": 1.5449222215650322, "grad_norm": 0.32866477966308594, "learning_rate": 2.879735421374498e-05, "loss": 0.086, "step": 26120 }, { "epoch": 1.5455136925533801, "grad_norm": 0.2312266230583191, "learning_rate": 2.8796226609655262e-05, "loss": 0.0877, "step": 26130 }, { "epoch": 1.5461051635417284, "grad_norm": 0.22062306106090546, "learning_rate": 2.8795098499288473e-05, "loss": 0.0739, "step": 26140 }, { "epoch": 1.5466966345300763, "grad_norm": 0.29537996649742126, "learning_rate": 2.8793969882686012e-05, "loss": 0.0834, "step": 26150 }, { "epoch": 1.5472881055184242, "grad_norm": 0.2168600857257843, "learning_rate": 2.87928407598893e-05, "loss": 0.0816, "step": 26160 }, { "epoch": 1.5478795765067723, "grad_norm": 0.2883399724960327, "learning_rate": 2.8791711130939768e-05, "loss": 0.0962, "step": 26170 }, { "epoch": 1.5484710474951204, "grad_norm": 0.274944931268692, "learning_rate": 2.8790580995878876e-05, "loss": 0.0843, "step": 26180 }, { "epoch": 1.5490625184834683, "grad_norm": 0.28298524022102356, "learning_rate": 2.8789450354748087e-05, "loss": 0.0883, "step": 26190 }, { "epoch": 1.5496539894718164, "grad_norm": 0.19047197699546814, "learning_rate": 2.87883192075889e-05, "loss": 0.067, "step": 26200 }, { "epoch": 1.5502454604601645, "grad_norm": 0.28505972027778625, "learning_rate": 2.8787187554442816e-05, "loss": 0.0727, "step": 26210 }, { "epoch": 1.5508369314485124, "grad_norm": 0.29495692253112793, "learning_rate": 2.8786055395351373e-05, "loss": 0.0907, "step": 26220 }, { "epoch": 1.5514284024368603, "grad_norm": 0.2588426172733307, "learning_rate": 2.8784922730356113e-05, "loss": 0.0778, "step": 26230 }, { "epoch": 1.5520198734252086, "grad_norm": 0.23878620564937592, "learning_rate": 2.8783789559498605e-05, "loss": 0.081, "step": 26240 }, { "epoch": 1.5526113444135565, "grad_norm": 0.21636812388896942, "learning_rate": 2.8782655882820423e-05, "loss": 0.0564, "step": 26250 }, { "epoch": 1.5532028154019044, "grad_norm": 0.22310784459114075, "learning_rate": 2.8781521700363175e-05, "loss": 0.0764, "step": 26260 }, { "epoch": 1.5537942863902525, "grad_norm": 0.3720957338809967, "learning_rate": 2.8780387012168483e-05, "loss": 0.0819, "step": 26270 }, { "epoch": 1.5543857573786006, "grad_norm": 0.5286603569984436, "learning_rate": 2.8779251818277985e-05, "loss": 0.091, "step": 26280 }, { "epoch": 1.5549772283669485, "grad_norm": 0.3768579959869385, "learning_rate": 2.877811611873334e-05, "loss": 0.0717, "step": 26290 }, { "epoch": 1.5555686993552966, "grad_norm": 0.20370739698410034, "learning_rate": 2.8776979913576223e-05, "loss": 0.0731, "step": 26300 }, { "epoch": 1.5561601703436447, "grad_norm": 0.26866206526756287, "learning_rate": 2.8775843202848332e-05, "loss": 0.074, "step": 26310 }, { "epoch": 1.5567516413319926, "grad_norm": 0.23890383541584015, "learning_rate": 2.8774705986591378e-05, "loss": 0.0826, "step": 26320 }, { "epoch": 1.5573431123203407, "grad_norm": 0.28255531191825867, "learning_rate": 2.8773568264847095e-05, "loss": 0.0881, "step": 26330 }, { "epoch": 1.5579345833086888, "grad_norm": 0.23922079801559448, "learning_rate": 2.8772430037657226e-05, "loss": 0.086, "step": 26340 }, { "epoch": 1.5585260542970367, "grad_norm": 0.17053008079528809, "learning_rate": 2.8771291305063553e-05, "loss": 0.0809, "step": 26350 }, { "epoch": 1.5591175252853846, "grad_norm": 0.30190473794937134, "learning_rate": 2.8770152067107855e-05, "loss": 0.076, "step": 26360 }, { "epoch": 1.559708996273733, "grad_norm": 0.18806904554367065, "learning_rate": 2.8769012323831946e-05, "loss": 0.0896, "step": 26370 }, { "epoch": 1.5603004672620808, "grad_norm": 0.2535829246044159, "learning_rate": 2.8767872075277637e-05, "loss": 0.0786, "step": 26380 }, { "epoch": 1.5608919382504287, "grad_norm": 0.5682618618011475, "learning_rate": 2.8766731321486786e-05, "loss": 0.0844, "step": 26390 }, { "epoch": 1.5614834092387768, "grad_norm": 0.23776757717132568, "learning_rate": 2.8765590062501247e-05, "loss": 0.0692, "step": 26400 }, { "epoch": 1.562074880227125, "grad_norm": 0.2377249151468277, "learning_rate": 2.8764448298362904e-05, "loss": 0.0754, "step": 26410 }, { "epoch": 1.5626663512154728, "grad_norm": 0.30376845598220825, "learning_rate": 2.8763306029113653e-05, "loss": 0.0904, "step": 26420 }, { "epoch": 1.563257822203821, "grad_norm": 0.26991599798202515, "learning_rate": 2.8762163254795418e-05, "loss": 0.0865, "step": 26430 }, { "epoch": 1.563849293192169, "grad_norm": 0.2260972559452057, "learning_rate": 2.8761019975450125e-05, "loss": 0.0862, "step": 26440 }, { "epoch": 1.564440764180517, "grad_norm": 0.21196646988391876, "learning_rate": 2.8759876191119737e-05, "loss": 0.063, "step": 26450 }, { "epoch": 1.5650322351688648, "grad_norm": 0.30477088689804077, "learning_rate": 2.875873190184622e-05, "loss": 0.0675, "step": 26460 }, { "epoch": 1.5656237061572131, "grad_norm": 0.3242291510105133, "learning_rate": 2.8757587107671577e-05, "loss": 0.0817, "step": 26470 }, { "epoch": 1.566215177145561, "grad_norm": 0.2766665518283844, "learning_rate": 2.8756441808637804e-05, "loss": 0.0803, "step": 26480 }, { "epoch": 1.566806648133909, "grad_norm": 0.2363434135913849, "learning_rate": 2.8755296004786947e-05, "loss": 0.095, "step": 26490 }, { "epoch": 1.567398119122257, "grad_norm": 0.2948712408542633, "learning_rate": 2.8754149696161035e-05, "loss": 0.0731, "step": 26500 }, { "epoch": 1.5679895901106051, "grad_norm": 0.26518958806991577, "learning_rate": 2.8753002882802145e-05, "loss": 0.0744, "step": 26510 }, { "epoch": 1.568581061098953, "grad_norm": 0.35065773129463196, "learning_rate": 2.875185556475236e-05, "loss": 0.0892, "step": 26520 }, { "epoch": 1.5691725320873011, "grad_norm": 0.2734394967556, "learning_rate": 2.8750707742053783e-05, "loss": 0.0706, "step": 26530 }, { "epoch": 1.5697640030756492, "grad_norm": 0.23829077184200287, "learning_rate": 2.874955941474853e-05, "loss": 0.0874, "step": 26540 }, { "epoch": 1.5703554740639971, "grad_norm": 0.2101343423128128, "learning_rate": 2.874841058287874e-05, "loss": 0.0795, "step": 26550 }, { "epoch": 1.5709469450523452, "grad_norm": 0.37033194303512573, "learning_rate": 2.8747261246486586e-05, "loss": 0.0859, "step": 26560 }, { "epoch": 1.5715384160406933, "grad_norm": 0.25218653678894043, "learning_rate": 2.874611140561423e-05, "loss": 0.0898, "step": 26570 }, { "epoch": 1.5721298870290412, "grad_norm": 0.2643648087978363, "learning_rate": 2.8744961060303877e-05, "loss": 0.0929, "step": 26580 }, { "epoch": 1.572721358017389, "grad_norm": 0.2065151482820511, "learning_rate": 2.8743810210597732e-05, "loss": 0.0804, "step": 26590 }, { "epoch": 1.5733128290057374, "grad_norm": 0.29339462518692017, "learning_rate": 2.8742658856538033e-05, "loss": 0.077, "step": 26600 }, { "epoch": 1.5739042999940853, "grad_norm": 0.32331642508506775, "learning_rate": 2.8741506998167033e-05, "loss": 0.0721, "step": 26610 }, { "epoch": 1.5744957709824332, "grad_norm": 0.2547270357608795, "learning_rate": 2.8740354635527e-05, "loss": 0.0788, "step": 26620 }, { "epoch": 1.5750872419707813, "grad_norm": 0.37323158979415894, "learning_rate": 2.873920176866021e-05, "loss": 0.0885, "step": 26630 }, { "epoch": 1.5756787129591294, "grad_norm": 0.26701459288597107, "learning_rate": 2.8738048397608995e-05, "loss": 0.0889, "step": 26640 }, { "epoch": 1.5762701839474773, "grad_norm": 0.27092495560646057, "learning_rate": 2.8736894522415654e-05, "loss": 0.0774, "step": 26650 }, { "epoch": 1.5768616549358254, "grad_norm": 0.2353277951478958, "learning_rate": 2.8735740143122543e-05, "loss": 0.0729, "step": 26660 }, { "epoch": 1.5774531259241735, "grad_norm": 0.2040995955467224, "learning_rate": 2.8734585259772025e-05, "loss": 0.0871, "step": 26670 }, { "epoch": 1.5780445969125214, "grad_norm": 0.23545150458812714, "learning_rate": 2.8733429872406478e-05, "loss": 0.0878, "step": 26680 }, { "epoch": 1.5786360679008693, "grad_norm": 0.2848670184612274, "learning_rate": 2.87322739810683e-05, "loss": 0.0745, "step": 26690 }, { "epoch": 1.5792275388892176, "grad_norm": 0.21129831671714783, "learning_rate": 2.8731117585799912e-05, "loss": 0.0758, "step": 26700 }, { "epoch": 1.5798190098775655, "grad_norm": 0.2422870695590973, "learning_rate": 2.8729960686643743e-05, "loss": 0.0811, "step": 26710 }, { "epoch": 1.5804104808659134, "grad_norm": 0.32673415541648865, "learning_rate": 2.8728803283642257e-05, "loss": 0.0868, "step": 26720 }, { "epoch": 1.5810019518542615, "grad_norm": 0.20843687653541565, "learning_rate": 2.872764537683792e-05, "loss": 0.0835, "step": 26730 }, { "epoch": 1.5815934228426096, "grad_norm": 0.29568907618522644, "learning_rate": 2.8726486966273226e-05, "loss": 0.0798, "step": 26740 }, { "epoch": 1.5821848938309575, "grad_norm": 0.24375396966934204, "learning_rate": 2.8725328051990687e-05, "loss": 0.0701, "step": 26750 }, { "epoch": 1.5827763648193056, "grad_norm": 0.3115358352661133, "learning_rate": 2.8724168634032822e-05, "loss": 0.0703, "step": 26760 }, { "epoch": 1.5833678358076537, "grad_norm": 0.49175745248794556, "learning_rate": 2.8723008712442188e-05, "loss": 0.0962, "step": 26770 }, { "epoch": 1.5839593067960016, "grad_norm": 0.20081643760204315, "learning_rate": 2.872184828726135e-05, "loss": 0.0942, "step": 26780 }, { "epoch": 1.5845507777843497, "grad_norm": 0.25259608030319214, "learning_rate": 2.8720687358532887e-05, "loss": 0.0717, "step": 26790 }, { "epoch": 1.5851422487726978, "grad_norm": 0.34213241934776306, "learning_rate": 2.87195259262994e-05, "loss": 0.0863, "step": 26800 }, { "epoch": 1.5857337197610457, "grad_norm": 0.34115734696388245, "learning_rate": 2.8718363990603512e-05, "loss": 0.0711, "step": 26810 }, { "epoch": 1.5863251907493936, "grad_norm": 0.18234825134277344, "learning_rate": 2.871720155148787e-05, "loss": 0.0951, "step": 26820 }, { "epoch": 1.586916661737742, "grad_norm": 0.23068365454673767, "learning_rate": 2.8716038608995122e-05, "loss": 0.0904, "step": 26830 }, { "epoch": 1.5875081327260898, "grad_norm": 0.20634141564369202, "learning_rate": 2.871487516316795e-05, "loss": 0.0789, "step": 26840 }, { "epoch": 1.5880996037144377, "grad_norm": 0.28957489132881165, "learning_rate": 2.8713711214049043e-05, "loss": 0.0804, "step": 26850 }, { "epoch": 1.5886910747027858, "grad_norm": 0.25642824172973633, "learning_rate": 2.871254676168112e-05, "loss": 0.0783, "step": 26860 }, { "epoch": 1.589282545691134, "grad_norm": 0.24537068605422974, "learning_rate": 2.8711381806106906e-05, "loss": 0.0881, "step": 26870 }, { "epoch": 1.5898740166794818, "grad_norm": 0.16702991724014282, "learning_rate": 2.8710216347369156e-05, "loss": 0.088, "step": 26880 }, { "epoch": 1.59046548766783, "grad_norm": 0.26245784759521484, "learning_rate": 2.870905038551064e-05, "loss": 0.0816, "step": 26890 }, { "epoch": 1.591056958656178, "grad_norm": 0.4139139652252197, "learning_rate": 2.870788392057414e-05, "loss": 0.0719, "step": 26900 }, { "epoch": 1.591648429644526, "grad_norm": 0.3063182234764099, "learning_rate": 2.870671695260247e-05, "loss": 0.0747, "step": 26910 }, { "epoch": 1.5922399006328738, "grad_norm": 0.33485734462738037, "learning_rate": 2.870554948163844e-05, "loss": 0.0786, "step": 26920 }, { "epoch": 1.5928313716212221, "grad_norm": 0.2821700870990753, "learning_rate": 2.870438150772491e-05, "loss": 0.1001, "step": 26930 }, { "epoch": 1.59342284260957, "grad_norm": 0.2430308759212494, "learning_rate": 2.870321303090472e-05, "loss": 0.0697, "step": 26940 }, { "epoch": 1.594014313597918, "grad_norm": 0.20129749178886414, "learning_rate": 2.8702044051220768e-05, "loss": 0.0661, "step": 26950 }, { "epoch": 1.594605784586266, "grad_norm": 0.4300387501716614, "learning_rate": 2.8700874568715946e-05, "loss": 0.071, "step": 26960 }, { "epoch": 1.5951972555746141, "grad_norm": 0.28593721985816956, "learning_rate": 2.8699704583433163e-05, "loss": 0.0834, "step": 26970 }, { "epoch": 1.595788726562962, "grad_norm": 0.27885702252388, "learning_rate": 2.8698534095415364e-05, "loss": 0.0807, "step": 26980 }, { "epoch": 1.59638019755131, "grad_norm": 0.24704399704933167, "learning_rate": 2.8697363104705496e-05, "loss": 0.0761, "step": 26990 }, { "epoch": 1.5969716685396582, "grad_norm": 0.27243608236312866, "learning_rate": 2.8696191611346535e-05, "loss": 0.0773, "step": 27000 }, { "epoch": 1.597563139528006, "grad_norm": 0.28430721163749695, "learning_rate": 2.8695019615381465e-05, "loss": 0.0772, "step": 27010 }, { "epoch": 1.5981546105163542, "grad_norm": 0.3201001286506653, "learning_rate": 2.86938471168533e-05, "loss": 0.0862, "step": 27020 }, { "epoch": 1.5987460815047023, "grad_norm": 0.2623547315597534, "learning_rate": 2.8692674115805063e-05, "loss": 0.0847, "step": 27030 }, { "epoch": 1.5993375524930502, "grad_norm": 0.2105882316827774, "learning_rate": 2.86915006122798e-05, "loss": 0.0859, "step": 27040 }, { "epoch": 1.599929023481398, "grad_norm": 0.1913120597600937, "learning_rate": 2.869032660632058e-05, "loss": 0.0675, "step": 27050 }, { "epoch": 1.6005204944697464, "grad_norm": 0.3710069954395294, "learning_rate": 2.868915209797048e-05, "loss": 0.0866, "step": 27060 }, { "epoch": 1.6011119654580943, "grad_norm": 0.25049740076065063, "learning_rate": 2.8687977087272597e-05, "loss": 0.0872, "step": 27070 }, { "epoch": 1.6017034364464422, "grad_norm": 0.1929461658000946, "learning_rate": 2.868680157427006e-05, "loss": 0.0809, "step": 27080 }, { "epoch": 1.6022949074347903, "grad_norm": 0.1834689825773239, "learning_rate": 2.8685625559005997e-05, "loss": 0.0705, "step": 27090 }, { "epoch": 1.6028863784231384, "grad_norm": 0.2719268202781677, "learning_rate": 2.868444904152357e-05, "loss": 0.0742, "step": 27100 }, { "epoch": 1.6034778494114863, "grad_norm": 0.32168906927108765, "learning_rate": 2.8683272021865956e-05, "loss": 0.0722, "step": 27110 }, { "epoch": 1.6040693203998344, "grad_norm": 0.2472773790359497, "learning_rate": 2.868209450007634e-05, "loss": 0.0795, "step": 27120 }, { "epoch": 1.6046607913881825, "grad_norm": 0.26132848858833313, "learning_rate": 2.8680916476197937e-05, "loss": 0.0822, "step": 27130 }, { "epoch": 1.6052522623765304, "grad_norm": 0.21841923892498016, "learning_rate": 2.8679737950273975e-05, "loss": 0.0862, "step": 27140 }, { "epoch": 1.6058437333648783, "grad_norm": 0.22288987040519714, "learning_rate": 2.8678558922347707e-05, "loss": 0.0776, "step": 27150 }, { "epoch": 1.6064352043532266, "grad_norm": 0.18772469460964203, "learning_rate": 2.8677379392462393e-05, "loss": 0.0744, "step": 27160 }, { "epoch": 1.6070266753415745, "grad_norm": 0.25267085433006287, "learning_rate": 2.8676199360661325e-05, "loss": 0.0936, "step": 27170 }, { "epoch": 1.6076181463299224, "grad_norm": 0.19280335307121277, "learning_rate": 2.86750188269878e-05, "loss": 0.0916, "step": 27180 }, { "epoch": 1.6082096173182705, "grad_norm": 0.2149762213230133, "learning_rate": 2.867383779148514e-05, "loss": 0.071, "step": 27190 }, { "epoch": 1.6088010883066186, "grad_norm": 0.13483110070228577, "learning_rate": 2.8672656254196692e-05, "loss": 0.0744, "step": 27200 }, { "epoch": 1.6093925592949665, "grad_norm": 0.21005165576934814, "learning_rate": 2.867147421516581e-05, "loss": 0.0711, "step": 27210 }, { "epoch": 1.6099840302833146, "grad_norm": 0.206457257270813, "learning_rate": 2.8670291674435864e-05, "loss": 0.0978, "step": 27220 }, { "epoch": 1.6105755012716627, "grad_norm": 0.15895533561706543, "learning_rate": 2.8669108632050264e-05, "loss": 0.0826, "step": 27230 }, { "epoch": 1.6111669722600106, "grad_norm": 0.37785735726356506, "learning_rate": 2.8667925088052414e-05, "loss": 0.0699, "step": 27240 }, { "epoch": 1.6117584432483587, "grad_norm": 0.25529780983924866, "learning_rate": 2.866674104248575e-05, "loss": 0.0745, "step": 27250 }, { "epoch": 1.6123499142367068, "grad_norm": 0.3624706268310547, "learning_rate": 2.8665556495393716e-05, "loss": 0.0715, "step": 27260 }, { "epoch": 1.6129413852250547, "grad_norm": 0.2907574474811554, "learning_rate": 2.866437144681979e-05, "loss": 0.0877, "step": 27270 }, { "epoch": 1.6135328562134026, "grad_norm": 0.19026079773902893, "learning_rate": 2.866318589680746e-05, "loss": 0.0753, "step": 27280 }, { "epoch": 1.614124327201751, "grad_norm": 0.28851139545440674, "learning_rate": 2.8661999845400222e-05, "loss": 0.0748, "step": 27290 }, { "epoch": 1.6147157981900988, "grad_norm": 0.29167309403419495, "learning_rate": 2.8660813292641607e-05, "loss": 0.0842, "step": 27300 }, { "epoch": 1.6153072691784467, "grad_norm": 0.3406606614589691, "learning_rate": 2.8659626238575158e-05, "loss": 0.0799, "step": 27310 }, { "epoch": 1.6158987401667948, "grad_norm": 0.25799140334129333, "learning_rate": 2.8658438683244433e-05, "loss": 0.0894, "step": 27320 }, { "epoch": 1.616490211155143, "grad_norm": 0.31370025873184204, "learning_rate": 2.8657250626693014e-05, "loss": 0.0845, "step": 27330 }, { "epoch": 1.6170816821434908, "grad_norm": 0.21448130905628204, "learning_rate": 2.86560620689645e-05, "loss": 0.0792, "step": 27340 }, { "epoch": 1.617673153131839, "grad_norm": 0.20627625286579132, "learning_rate": 2.8654873010102503e-05, "loss": 0.0692, "step": 27350 }, { "epoch": 1.618264624120187, "grad_norm": 0.24298259615898132, "learning_rate": 2.865368345015066e-05, "loss": 0.0735, "step": 27360 }, { "epoch": 1.618856095108535, "grad_norm": 0.221207857131958, "learning_rate": 2.8652493389152623e-05, "loss": 0.0944, "step": 27370 }, { "epoch": 1.6194475660968828, "grad_norm": 0.21571588516235352, "learning_rate": 2.8651302827152064e-05, "loss": 0.0802, "step": 27380 }, { "epoch": 1.6200390370852311, "grad_norm": 1.3408998250961304, "learning_rate": 2.8650111764192677e-05, "loss": 0.0986, "step": 27390 }, { "epoch": 1.620630508073579, "grad_norm": 0.18730662763118744, "learning_rate": 2.864892020031816e-05, "loss": 0.0806, "step": 27400 }, { "epoch": 1.6212219790619269, "grad_norm": 0.3052735924720764, "learning_rate": 2.864772813557225e-05, "loss": 0.0753, "step": 27410 }, { "epoch": 1.621813450050275, "grad_norm": 0.25599515438079834, "learning_rate": 2.8646535569998686e-05, "loss": 0.0867, "step": 27420 }, { "epoch": 1.622404921038623, "grad_norm": 0.24874895811080933, "learning_rate": 2.8645342503641237e-05, "loss": 0.0835, "step": 27430 }, { "epoch": 1.622996392026971, "grad_norm": 0.21728980541229248, "learning_rate": 2.8644148936543676e-05, "loss": 0.0852, "step": 27440 }, { "epoch": 1.623587863015319, "grad_norm": 0.2609628736972809, "learning_rate": 2.864295486874981e-05, "loss": 0.085, "step": 27450 }, { "epoch": 1.6241793340036672, "grad_norm": 0.2718333899974823, "learning_rate": 2.864176030030345e-05, "loss": 0.0756, "step": 27460 }, { "epoch": 1.624770804992015, "grad_norm": 0.185034841299057, "learning_rate": 2.8640565231248447e-05, "loss": 0.0794, "step": 27470 }, { "epoch": 1.6253622759803632, "grad_norm": 0.2869969606399536, "learning_rate": 2.8639369661628644e-05, "loss": 0.0885, "step": 27480 }, { "epoch": 1.6259537469687113, "grad_norm": 0.2572740912437439, "learning_rate": 2.8638173591487914e-05, "loss": 0.0772, "step": 27490 }, { "epoch": 1.6265452179570592, "grad_norm": 0.2404574453830719, "learning_rate": 2.8636977020870157e-05, "loss": 0.0709, "step": 27500 }, { "epoch": 1.627136688945407, "grad_norm": 0.32702645659446716, "learning_rate": 2.8635779949819278e-05, "loss": 0.0746, "step": 27510 }, { "epoch": 1.6277281599337554, "grad_norm": 0.2722541391849518, "learning_rate": 2.8634582378379205e-05, "loss": 0.0948, "step": 27520 }, { "epoch": 1.6283196309221033, "grad_norm": 0.34183356165885925, "learning_rate": 2.8633384306593888e-05, "loss": 0.0904, "step": 27530 }, { "epoch": 1.6289111019104512, "grad_norm": 0.32095685601234436, "learning_rate": 2.8632185734507292e-05, "loss": 0.0797, "step": 27540 }, { "epoch": 1.6295025728987993, "grad_norm": 0.2335154414176941, "learning_rate": 2.8630986662163398e-05, "loss": 0.0622, "step": 27550 }, { "epoch": 1.6300940438871474, "grad_norm": 0.23347744345664978, "learning_rate": 2.862978708960621e-05, "loss": 0.0804, "step": 27560 }, { "epoch": 1.6306855148754953, "grad_norm": 0.26494619250297546, "learning_rate": 2.862858701687975e-05, "loss": 0.0893, "step": 27570 }, { "epoch": 1.6312769858638434, "grad_norm": 0.4631834924221039, "learning_rate": 2.8627386444028052e-05, "loss": 0.0804, "step": 27580 }, { "epoch": 1.6318684568521915, "grad_norm": 0.25629428029060364, "learning_rate": 2.862618537109518e-05, "loss": 0.1006, "step": 27590 }, { "epoch": 1.6324599278405394, "grad_norm": 0.2614514231681824, "learning_rate": 2.8624983798125205e-05, "loss": 0.0566, "step": 27600 }, { "epoch": 1.6330513988288873, "grad_norm": 0.2833921015262604, "learning_rate": 2.862378172516222e-05, "loss": 0.0709, "step": 27610 }, { "epoch": 1.6336428698172356, "grad_norm": 0.2472497820854187, "learning_rate": 2.8622579152250335e-05, "loss": 0.1023, "step": 27620 }, { "epoch": 1.6342343408055835, "grad_norm": 0.2121846228837967, "learning_rate": 2.862137607943369e-05, "loss": 0.0866, "step": 27630 }, { "epoch": 1.6348258117939314, "grad_norm": 0.2620795667171478, "learning_rate": 2.8620172506756425e-05, "loss": 0.0884, "step": 27640 }, { "epoch": 1.6354172827822795, "grad_norm": 0.20599666237831116, "learning_rate": 2.8618968434262717e-05, "loss": 0.0671, "step": 27650 }, { "epoch": 1.6360087537706276, "grad_norm": 0.30769702792167664, "learning_rate": 2.8617763861996734e-05, "loss": 0.0755, "step": 27660 }, { "epoch": 1.6366002247589755, "grad_norm": 0.2724991738796234, "learning_rate": 2.86165587900027e-05, "loss": 0.0788, "step": 27670 }, { "epoch": 1.6371916957473236, "grad_norm": 0.29996824264526367, "learning_rate": 2.8615353218324828e-05, "loss": 0.0927, "step": 27680 }, { "epoch": 1.6377831667356717, "grad_norm": 0.23494388163089752, "learning_rate": 2.8614147147007355e-05, "loss": 0.0955, "step": 27690 }, { "epoch": 1.6383746377240196, "grad_norm": 0.19357731938362122, "learning_rate": 2.8612940576094542e-05, "loss": 0.0829, "step": 27700 }, { "epoch": 1.6389661087123677, "grad_norm": 0.23210641741752625, "learning_rate": 2.8611733505630676e-05, "loss": 0.0714, "step": 27710 }, { "epoch": 1.6395575797007158, "grad_norm": 0.2064623236656189, "learning_rate": 2.861052593566004e-05, "loss": 0.0884, "step": 27720 }, { "epoch": 1.6401490506890637, "grad_norm": 0.2211700826883316, "learning_rate": 2.8609317866226955e-05, "loss": 0.0935, "step": 27730 }, { "epoch": 1.6407405216774116, "grad_norm": 0.25506213307380676, "learning_rate": 2.8608109297375744e-05, "loss": 0.0876, "step": 27740 }, { "epoch": 1.64133199266576, "grad_norm": 0.14909663796424866, "learning_rate": 2.8606900229150773e-05, "loss": 0.065, "step": 27750 }, { "epoch": 1.6419234636541078, "grad_norm": 0.24952657520771027, "learning_rate": 2.8605690661596394e-05, "loss": 0.0737, "step": 27760 }, { "epoch": 1.6425149346424557, "grad_norm": 0.27413663268089294, "learning_rate": 2.860448059475701e-05, "loss": 0.1013, "step": 27770 }, { "epoch": 1.6431064056308038, "grad_norm": 0.2158968597650528, "learning_rate": 2.8603270028677014e-05, "loss": 0.0858, "step": 27780 }, { "epoch": 1.643697876619152, "grad_norm": 0.2828664481639862, "learning_rate": 2.8602058963400838e-05, "loss": 0.0876, "step": 27790 }, { "epoch": 1.6442893476074998, "grad_norm": 0.30203309655189514, "learning_rate": 2.860084739897292e-05, "loss": 0.0768, "step": 27800 }, { "epoch": 1.644880818595848, "grad_norm": 0.28379103541374207, "learning_rate": 2.859963533543772e-05, "loss": 0.0875, "step": 27810 }, { "epoch": 1.645472289584196, "grad_norm": 0.18509817123413086, "learning_rate": 2.8598422772839726e-05, "loss": 0.0867, "step": 27820 }, { "epoch": 1.6460637605725439, "grad_norm": 0.2734489440917969, "learning_rate": 2.8597209711223417e-05, "loss": 0.0823, "step": 27830 }, { "epoch": 1.6466552315608918, "grad_norm": 0.28734925389289856, "learning_rate": 2.859599615063333e-05, "loss": 0.0934, "step": 27840 }, { "epoch": 1.64724670254924, "grad_norm": 0.21547721326351166, "learning_rate": 2.859478209111398e-05, "loss": 0.0735, "step": 27850 }, { "epoch": 1.647838173537588, "grad_norm": 0.21174709498882294, "learning_rate": 2.859356753270993e-05, "loss": 0.0632, "step": 27860 }, { "epoch": 1.6484296445259359, "grad_norm": 0.35802051424980164, "learning_rate": 2.8592352475465742e-05, "loss": 0.0921, "step": 27870 }, { "epoch": 1.649021115514284, "grad_norm": 0.3373861014842987, "learning_rate": 2.8591136919426017e-05, "loss": 0.0832, "step": 27880 }, { "epoch": 1.649612586502632, "grad_norm": 0.3085138499736786, "learning_rate": 2.8589920864635356e-05, "loss": 0.0758, "step": 27890 }, { "epoch": 1.65020405749098, "grad_norm": 0.15509644150733948, "learning_rate": 2.8588704311138377e-05, "loss": 0.0686, "step": 27900 }, { "epoch": 1.650795528479328, "grad_norm": 0.28569573163986206, "learning_rate": 2.8587487258979736e-05, "loss": 0.0851, "step": 27910 }, { "epoch": 1.6513869994676762, "grad_norm": 0.25356391072273254, "learning_rate": 2.858626970820408e-05, "loss": 0.0951, "step": 27920 }, { "epoch": 1.651978470456024, "grad_norm": 0.2038189023733139, "learning_rate": 2.8585051658856105e-05, "loss": 0.0745, "step": 27930 }, { "epoch": 1.6525699414443722, "grad_norm": 0.18826451897621155, "learning_rate": 2.85838331109805e-05, "loss": 0.0811, "step": 27940 }, { "epoch": 1.6531614124327203, "grad_norm": 0.276580274105072, "learning_rate": 2.8582614064621988e-05, "loss": 0.0745, "step": 27950 }, { "epoch": 1.6537528834210682, "grad_norm": 0.24759763479232788, "learning_rate": 2.85813945198253e-05, "loss": 0.0738, "step": 27960 }, { "epoch": 1.654344354409416, "grad_norm": 0.18928121030330658, "learning_rate": 2.8580174476635184e-05, "loss": 0.0773, "step": 27970 }, { "epoch": 1.6549358253977644, "grad_norm": 0.27623695135116577, "learning_rate": 2.857895393509642e-05, "loss": 0.0939, "step": 27980 }, { "epoch": 1.6555272963861123, "grad_norm": 0.3382868468761444, "learning_rate": 2.8577732895253798e-05, "loss": 0.0749, "step": 27990 }, { "epoch": 1.6561187673744602, "grad_norm": 0.25431618094444275, "learning_rate": 2.857651135715212e-05, "loss": 0.0728, "step": 28000 }, { "epoch": 1.6567102383628083, "grad_norm": 0.2629346251487732, "learning_rate": 2.857528932083622e-05, "loss": 0.0679, "step": 28010 }, { "epoch": 1.6573017093511564, "grad_norm": 0.2973002791404724, "learning_rate": 2.8574066786350933e-05, "loss": 0.0956, "step": 28020 }, { "epoch": 1.6578931803395043, "grad_norm": 0.30630794167518616, "learning_rate": 2.8572843753741134e-05, "loss": 0.0874, "step": 28030 }, { "epoch": 1.6584846513278524, "grad_norm": 0.2076866328716278, "learning_rate": 2.8571620223051694e-05, "loss": 0.0831, "step": 28040 }, { "epoch": 1.6590761223162005, "grad_norm": 0.29183465242385864, "learning_rate": 2.8570396194327518e-05, "loss": 0.0795, "step": 28050 }, { "epoch": 1.6596675933045484, "grad_norm": 0.2965888977050781, "learning_rate": 2.8569171667613522e-05, "loss": 0.083, "step": 28060 }, { "epoch": 1.6602590642928963, "grad_norm": 0.24139857292175293, "learning_rate": 2.8567946642954647e-05, "loss": 0.0791, "step": 28070 }, { "epoch": 1.6608505352812446, "grad_norm": 0.27297908067703247, "learning_rate": 2.856672112039584e-05, "loss": 0.0797, "step": 28080 }, { "epoch": 1.6614420062695925, "grad_norm": 0.2804509699344635, "learning_rate": 2.8565495099982078e-05, "loss": 0.082, "step": 28090 }, { "epoch": 1.6620334772579404, "grad_norm": 1.4729059934616089, "learning_rate": 2.856426858175835e-05, "loss": 0.0686, "step": 28100 }, { "epoch": 1.6626249482462885, "grad_norm": 0.3302995264530182, "learning_rate": 2.856304156576967e-05, "loss": 0.0766, "step": 28110 }, { "epoch": 1.6632164192346366, "grad_norm": 0.26263171434402466, "learning_rate": 2.856181405206106e-05, "loss": 0.1015, "step": 28120 }, { "epoch": 1.6638078902229845, "grad_norm": 0.256700336933136, "learning_rate": 2.8560586040677565e-05, "loss": 0.0832, "step": 28130 }, { "epoch": 1.6643993612113326, "grad_norm": 0.19560439884662628, "learning_rate": 2.8559357531664253e-05, "loss": 0.0758, "step": 28140 }, { "epoch": 1.6649908321996807, "grad_norm": 0.18962416052818298, "learning_rate": 2.855812852506621e-05, "loss": 0.0628, "step": 28150 }, { "epoch": 1.6655823031880286, "grad_norm": 0.2611166536808014, "learning_rate": 2.8556899020928526e-05, "loss": 0.0783, "step": 28160 }, { "epoch": 1.6661737741763767, "grad_norm": 0.33898890018463135, "learning_rate": 2.855566901929633e-05, "loss": 0.0906, "step": 28170 }, { "epoch": 1.6667652451647248, "grad_norm": 0.5789848566055298, "learning_rate": 2.8554438520214747e-05, "loss": 0.0781, "step": 28180 }, { "epoch": 1.6673567161530727, "grad_norm": 0.26490798592567444, "learning_rate": 2.8553207523728947e-05, "loss": 0.0797, "step": 28190 }, { "epoch": 1.6679481871414206, "grad_norm": 0.21104596555233002, "learning_rate": 2.8551976029884096e-05, "loss": 0.0733, "step": 28200 }, { "epoch": 1.668539658129769, "grad_norm": 0.2859245240688324, "learning_rate": 2.8550744038725385e-05, "loss": 0.076, "step": 28210 }, { "epoch": 1.6691311291181168, "grad_norm": 0.28913983702659607, "learning_rate": 2.8549511550298026e-05, "loss": 0.0951, "step": 28220 }, { "epoch": 1.6697226001064647, "grad_norm": 0.2669130861759186, "learning_rate": 2.8548278564647245e-05, "loss": 0.0855, "step": 28230 }, { "epoch": 1.6703140710948128, "grad_norm": 0.3092068135738373, "learning_rate": 2.8547045081818293e-05, "loss": 0.0874, "step": 28240 }, { "epoch": 1.6709055420831609, "grad_norm": 0.2030395269393921, "learning_rate": 2.8545811101856434e-05, "loss": 0.0631, "step": 28250 }, { "epoch": 1.6714970130715088, "grad_norm": 0.2705625295639038, "learning_rate": 2.8544576624806947e-05, "loss": 0.0797, "step": 28260 }, { "epoch": 1.6720884840598569, "grad_norm": 0.29380014538764954, "learning_rate": 2.8543341650715136e-05, "loss": 0.0894, "step": 28270 }, { "epoch": 1.672679955048205, "grad_norm": 0.2461223006248474, "learning_rate": 2.854210617962632e-05, "loss": 0.0778, "step": 28280 }, { "epoch": 1.6732714260365529, "grad_norm": 0.29434847831726074, "learning_rate": 2.854087021158584e-05, "loss": 0.0857, "step": 28290 }, { "epoch": 1.6738628970249008, "grad_norm": 0.21710096299648285, "learning_rate": 2.853963374663905e-05, "loss": 0.0653, "step": 28300 }, { "epoch": 1.674454368013249, "grad_norm": 0.26138168573379517, "learning_rate": 2.8538396784831317e-05, "loss": 0.0755, "step": 28310 }, { "epoch": 1.675045839001597, "grad_norm": 0.2175125628709793, "learning_rate": 2.853715932620804e-05, "loss": 0.0941, "step": 28320 }, { "epoch": 1.6756373099899449, "grad_norm": 0.24279813468456268, "learning_rate": 2.853592137081464e-05, "loss": 0.0824, "step": 28330 }, { "epoch": 1.676228780978293, "grad_norm": 0.32920417189598083, "learning_rate": 2.853468291869653e-05, "loss": 0.0854, "step": 28340 }, { "epoch": 1.676820251966641, "grad_norm": 0.151876300573349, "learning_rate": 2.853344396989916e-05, "loss": 0.072, "step": 28350 }, { "epoch": 1.677411722954989, "grad_norm": 0.29124194383621216, "learning_rate": 2.8532204524467997e-05, "loss": 0.0759, "step": 28360 }, { "epoch": 1.678003193943337, "grad_norm": 0.2896119952201843, "learning_rate": 2.853096458244853e-05, "loss": 0.0942, "step": 28370 }, { "epoch": 1.6785946649316852, "grad_norm": 0.2086765617132187, "learning_rate": 2.8529724143886257e-05, "loss": 0.0818, "step": 28380 }, { "epoch": 1.679186135920033, "grad_norm": 0.24530771374702454, "learning_rate": 2.85284832088267e-05, "loss": 0.0697, "step": 28390 }, { "epoch": 1.6797776069083812, "grad_norm": 0.21338436007499695, "learning_rate": 2.8527241777315392e-05, "loss": 0.0765, "step": 28400 }, { "epoch": 1.6803690778967293, "grad_norm": 0.31387728452682495, "learning_rate": 2.8525999849397892e-05, "loss": 0.0829, "step": 28410 }, { "epoch": 1.6809605488850772, "grad_norm": 0.30949658155441284, "learning_rate": 2.852475742511978e-05, "loss": 0.0934, "step": 28420 }, { "epoch": 1.681552019873425, "grad_norm": 0.2825029492378235, "learning_rate": 2.852351450452664e-05, "loss": 0.0862, "step": 28430 }, { "epoch": 1.6821434908617734, "grad_norm": 0.19122019410133362, "learning_rate": 2.8522271087664092e-05, "loss": 0.0861, "step": 28440 }, { "epoch": 1.6827349618501213, "grad_norm": 0.15829670429229736, "learning_rate": 2.8521027174577757e-05, "loss": 0.0693, "step": 28450 }, { "epoch": 1.6833264328384692, "grad_norm": 0.5372774600982666, "learning_rate": 2.851978276531329e-05, "loss": 0.0797, "step": 28460 }, { "epoch": 1.6839179038268173, "grad_norm": 0.22070683538913727, "learning_rate": 2.851853785991635e-05, "loss": 0.0821, "step": 28470 }, { "epoch": 1.6845093748151654, "grad_norm": 0.24470604956150055, "learning_rate": 2.851729245843263e-05, "loss": 0.088, "step": 28480 }, { "epoch": 1.6851008458035133, "grad_norm": 0.3253689706325531, "learning_rate": 2.8516046560907825e-05, "loss": 0.0767, "step": 28490 }, { "epoch": 1.6856923167918614, "grad_norm": 0.20680132508277893, "learning_rate": 2.8514800167387653e-05, "loss": 0.0778, "step": 28500 }, { "epoch": 1.6862837877802095, "grad_norm": 0.2744502127170563, "learning_rate": 2.8513553277917862e-05, "loss": 0.0785, "step": 28510 }, { "epoch": 1.6868752587685574, "grad_norm": 0.2665365934371948, "learning_rate": 2.8512305892544202e-05, "loss": 0.0829, "step": 28520 }, { "epoch": 1.6874667297569053, "grad_norm": 0.31432339549064636, "learning_rate": 2.851105801131245e-05, "loss": 0.0866, "step": 28530 }, { "epoch": 1.6880582007452536, "grad_norm": 0.3200651705265045, "learning_rate": 2.85098096342684e-05, "loss": 0.0884, "step": 28540 }, { "epoch": 1.6886496717336015, "grad_norm": 0.4139779210090637, "learning_rate": 2.850856076145786e-05, "loss": 0.0881, "step": 28550 }, { "epoch": 1.6892411427219494, "grad_norm": 0.229922816157341, "learning_rate": 2.8507311392926662e-05, "loss": 0.0903, "step": 28560 }, { "epoch": 1.6898326137102975, "grad_norm": 0.18361631035804749, "learning_rate": 2.8506061528720654e-05, "loss": 0.0881, "step": 28570 }, { "epoch": 1.6904240846986456, "grad_norm": 0.21821536123752594, "learning_rate": 2.85048111688857e-05, "loss": 0.0871, "step": 28580 }, { "epoch": 1.6910155556869935, "grad_norm": 0.1960626095533371, "learning_rate": 2.850356031346769e-05, "loss": 0.0846, "step": 28590 }, { "epoch": 1.6916070266753416, "grad_norm": 0.21215000748634338, "learning_rate": 2.850230896251252e-05, "loss": 0.0665, "step": 28600 }, { "epoch": 1.6921984976636897, "grad_norm": 0.2733882665634155, "learning_rate": 2.8501057116066116e-05, "loss": 0.0726, "step": 28610 }, { "epoch": 1.6927899686520376, "grad_norm": 0.23502619564533234, "learning_rate": 2.8499804774174407e-05, "loss": 0.0981, "step": 28620 }, { "epoch": 1.6933814396403857, "grad_norm": 0.289266973733902, "learning_rate": 2.8498551936883364e-05, "loss": 0.0778, "step": 28630 }, { "epoch": 1.6939729106287338, "grad_norm": 0.3269972503185272, "learning_rate": 2.849729860423895e-05, "loss": 0.0901, "step": 28640 }, { "epoch": 1.6945643816170817, "grad_norm": 0.21155452728271484, "learning_rate": 2.8496044776287163e-05, "loss": 0.0744, "step": 28650 }, { "epoch": 1.6951558526054296, "grad_norm": 0.24481725692749023, "learning_rate": 2.8494790453074017e-05, "loss": 0.0773, "step": 28660 }, { "epoch": 1.6957473235937779, "grad_norm": 0.3700588345527649, "learning_rate": 2.8493535634645538e-05, "loss": 0.0841, "step": 28670 }, { "epoch": 1.6963387945821258, "grad_norm": 0.26650214195251465, "learning_rate": 2.849228032104777e-05, "loss": 0.0891, "step": 28680 }, { "epoch": 1.6969302655704737, "grad_norm": 0.16735830903053284, "learning_rate": 2.8491024512326788e-05, "loss": 0.0778, "step": 28690 }, { "epoch": 1.6975217365588218, "grad_norm": 0.34529122710227966, "learning_rate": 2.848976820852867e-05, "loss": 0.0703, "step": 28700 }, { "epoch": 1.6981132075471699, "grad_norm": 0.30869725346565247, "learning_rate": 2.8488511409699523e-05, "loss": 0.0876, "step": 28710 }, { "epoch": 1.6987046785355178, "grad_norm": 0.31604284048080444, "learning_rate": 2.8487254115885466e-05, "loss": 0.0819, "step": 28720 }, { "epoch": 1.6992961495238659, "grad_norm": 0.24138414859771729, "learning_rate": 2.8485996327132632e-05, "loss": 0.0799, "step": 28730 }, { "epoch": 1.699887620512214, "grad_norm": 0.20988720655441284, "learning_rate": 2.8484738043487182e-05, "loss": 0.0753, "step": 28740 }, { "epoch": 1.7004790915005619, "grad_norm": 0.1991497278213501, "learning_rate": 2.8483479264995293e-05, "loss": 0.0618, "step": 28750 }, { "epoch": 1.7010705624889098, "grad_norm": 0.243675097823143, "learning_rate": 2.8482219991703154e-05, "loss": 0.0858, "step": 28760 }, { "epoch": 1.701662033477258, "grad_norm": 0.2541062831878662, "learning_rate": 2.848096022365698e-05, "loss": 0.1022, "step": 28770 }, { "epoch": 1.702253504465606, "grad_norm": 0.2887207567691803, "learning_rate": 2.8479699960903e-05, "loss": 0.0876, "step": 28780 }, { "epoch": 1.7028449754539539, "grad_norm": 0.1775137037038803, "learning_rate": 2.8478439203487456e-05, "loss": 0.0823, "step": 28790 }, { "epoch": 1.703436446442302, "grad_norm": 0.2978924512863159, "learning_rate": 2.8477177951456622e-05, "loss": 0.0735, "step": 28800 }, { "epoch": 1.70402791743065, "grad_norm": 0.26284149289131165, "learning_rate": 2.847591620485678e-05, "loss": 0.0693, "step": 28810 }, { "epoch": 1.704619388418998, "grad_norm": 0.25961193442344666, "learning_rate": 2.8474653963734228e-05, "loss": 0.0909, "step": 28820 }, { "epoch": 1.705210859407346, "grad_norm": 0.1876889169216156, "learning_rate": 2.8473391228135283e-05, "loss": 0.0709, "step": 28830 }, { "epoch": 1.7058023303956942, "grad_norm": 0.25210508704185486, "learning_rate": 2.8472127998106293e-05, "loss": 0.0791, "step": 28840 }, { "epoch": 1.706393801384042, "grad_norm": 0.3581472635269165, "learning_rate": 2.8470864273693608e-05, "loss": 0.0661, "step": 28850 }, { "epoch": 1.7069852723723902, "grad_norm": 0.31352394819259644, "learning_rate": 2.846960005494361e-05, "loss": 0.0731, "step": 28860 }, { "epoch": 1.7075767433607383, "grad_norm": 1.9150136709213257, "learning_rate": 2.8468335341902683e-05, "loss": 0.0936, "step": 28870 }, { "epoch": 1.7081682143490862, "grad_norm": 0.2855890691280365, "learning_rate": 2.846707013461724e-05, "loss": 0.083, "step": 28880 }, { "epoch": 1.708759685337434, "grad_norm": 0.23345257341861725, "learning_rate": 2.846580443313371e-05, "loss": 0.0814, "step": 28890 }, { "epoch": 1.7093511563257824, "grad_norm": 0.2051374763250351, "learning_rate": 2.8464538237498547e-05, "loss": 0.0717, "step": 28900 }, { "epoch": 1.7099426273141303, "grad_norm": 0.19974899291992188, "learning_rate": 2.8463271547758206e-05, "loss": 0.0718, "step": 28910 }, { "epoch": 1.7105340983024782, "grad_norm": 0.26394811272621155, "learning_rate": 2.846200436395918e-05, "loss": 0.0892, "step": 28920 }, { "epoch": 1.7111255692908263, "grad_norm": 0.360980749130249, "learning_rate": 2.8460736686147962e-05, "loss": 0.0925, "step": 28930 }, { "epoch": 1.7117170402791744, "grad_norm": 0.41588619351387024, "learning_rate": 2.8459468514371074e-05, "loss": 0.0703, "step": 28940 }, { "epoch": 1.7123085112675223, "grad_norm": 0.2874605357646942, "learning_rate": 2.8458199848675058e-05, "loss": 0.0824, "step": 28950 }, { "epoch": 1.7128999822558704, "grad_norm": 0.2859974503517151, "learning_rate": 2.8456930689106467e-05, "loss": 0.0779, "step": 28960 }, { "epoch": 1.7134914532442185, "grad_norm": 0.21746009588241577, "learning_rate": 2.8455661035711874e-05, "loss": 0.0748, "step": 28970 }, { "epoch": 1.7140829242325664, "grad_norm": 0.22897332906723022, "learning_rate": 2.8454390888537873e-05, "loss": 0.0813, "step": 28980 }, { "epoch": 1.7146743952209142, "grad_norm": 0.2616842985153198, "learning_rate": 2.8453120247631073e-05, "loss": 0.0677, "step": 28990 }, { "epoch": 1.7152658662092626, "grad_norm": 0.28277525305747986, "learning_rate": 2.8451849113038107e-05, "loss": 0.0774, "step": 29000 }, { "epoch": 1.7158573371976105, "grad_norm": 0.26406702399253845, "learning_rate": 2.8450577484805615e-05, "loss": 0.0669, "step": 29010 }, { "epoch": 1.7164488081859584, "grad_norm": 0.20659536123275757, "learning_rate": 2.844930536298026e-05, "loss": 0.0882, "step": 29020 }, { "epoch": 1.7170402791743065, "grad_norm": 0.2023080736398697, "learning_rate": 2.8448032747608735e-05, "loss": 0.0941, "step": 29030 }, { "epoch": 1.7176317501626546, "grad_norm": 0.3368518352508545, "learning_rate": 2.8446759638737735e-05, "loss": 0.0867, "step": 29040 }, { "epoch": 1.7182232211510025, "grad_norm": 0.19312727451324463, "learning_rate": 2.844548603641398e-05, "loss": 0.0709, "step": 29050 }, { "epoch": 1.7188146921393506, "grad_norm": 0.20198990404605865, "learning_rate": 2.84442119406842e-05, "loss": 0.0675, "step": 29060 }, { "epoch": 1.7194061631276987, "grad_norm": 0.19405516982078552, "learning_rate": 2.8442937351595163e-05, "loss": 0.0885, "step": 29070 }, { "epoch": 1.7199976341160466, "grad_norm": 0.38162142038345337, "learning_rate": 2.8441662269193632e-05, "loss": 0.0924, "step": 29080 }, { "epoch": 1.7205891051043947, "grad_norm": 0.2835167646408081, "learning_rate": 2.84403866935264e-05, "loss": 0.0804, "step": 29090 }, { "epoch": 1.7211805760927428, "grad_norm": 0.23531118035316467, "learning_rate": 2.8439110624640284e-05, "loss": 0.0719, "step": 29100 }, { "epoch": 1.7217720470810907, "grad_norm": 0.32492080330848694, "learning_rate": 2.8437834062582108e-05, "loss": 0.0798, "step": 29110 }, { "epoch": 1.7223635180694385, "grad_norm": 0.3041568696498871, "learning_rate": 2.8436557007398708e-05, "loss": 0.0957, "step": 29120 }, { "epoch": 1.7229549890577869, "grad_norm": 0.2127840369939804, "learning_rate": 2.8435279459136958e-05, "loss": 0.0874, "step": 29130 }, { "epoch": 1.7235464600461348, "grad_norm": 0.24665023386478424, "learning_rate": 2.8434001417843743e-05, "loss": 0.0822, "step": 29140 }, { "epoch": 1.7241379310344827, "grad_norm": 0.2295551896095276, "learning_rate": 2.8432722883565952e-05, "loss": 0.0734, "step": 29150 }, { "epoch": 1.7247294020228308, "grad_norm": 0.20535042881965637, "learning_rate": 2.843144385635051e-05, "loss": 0.0822, "step": 29160 }, { "epoch": 1.7253208730111789, "grad_norm": 0.26248008012771606, "learning_rate": 2.8430164336244354e-05, "loss": 0.0915, "step": 29170 }, { "epoch": 1.7259123439995268, "grad_norm": 0.290780246257782, "learning_rate": 2.8428884323294432e-05, "loss": 0.0894, "step": 29180 }, { "epoch": 1.7265038149878749, "grad_norm": 0.21824878454208374, "learning_rate": 2.8427603817547727e-05, "loss": 0.0778, "step": 29190 }, { "epoch": 1.727095285976223, "grad_norm": 0.2567332684993744, "learning_rate": 2.8426322819051217e-05, "loss": 0.0726, "step": 29200 }, { "epoch": 1.7276867569645709, "grad_norm": 0.30957046151161194, "learning_rate": 2.842504132785192e-05, "loss": 0.0751, "step": 29210 }, { "epoch": 1.7282782279529187, "grad_norm": 0.24021567404270172, "learning_rate": 2.842375934399686e-05, "loss": 0.0861, "step": 29220 }, { "epoch": 1.728869698941267, "grad_norm": 0.17245757579803467, "learning_rate": 2.8422476867533074e-05, "loss": 0.0812, "step": 29230 }, { "epoch": 1.729461169929615, "grad_norm": 0.24036145210266113, "learning_rate": 2.8421193898507637e-05, "loss": 0.0921, "step": 29240 }, { "epoch": 1.7300526409179628, "grad_norm": 0.2659378945827484, "learning_rate": 2.8419910436967627e-05, "loss": 0.069, "step": 29250 }, { "epoch": 1.730644111906311, "grad_norm": 0.2855517566204071, "learning_rate": 2.841862648296014e-05, "loss": 0.075, "step": 29260 }, { "epoch": 1.731235582894659, "grad_norm": 0.24238331615924835, "learning_rate": 2.8417342036532285e-05, "loss": 0.0908, "step": 29270 }, { "epoch": 1.731827053883007, "grad_norm": 0.29205191135406494, "learning_rate": 2.8416057097731213e-05, "loss": 0.0844, "step": 29280 }, { "epoch": 1.732418524871355, "grad_norm": 0.22629576921463013, "learning_rate": 2.841477166660407e-05, "loss": 0.0892, "step": 29290 }, { "epoch": 1.7330099958597032, "grad_norm": 0.2037062644958496, "learning_rate": 2.841348574319802e-05, "loss": 0.0586, "step": 29300 }, { "epoch": 1.733601466848051, "grad_norm": 0.27505484223365784, "learning_rate": 2.8412199327560264e-05, "loss": 0.077, "step": 29310 }, { "epoch": 1.7341929378363992, "grad_norm": 0.2938869893550873, "learning_rate": 2.8410912419738e-05, "loss": 0.0912, "step": 29320 }, { "epoch": 1.7347844088247473, "grad_norm": 0.1825495809316635, "learning_rate": 2.840962501977846e-05, "loss": 0.099, "step": 29330 }, { "epoch": 1.7353758798130952, "grad_norm": 0.26828739047050476, "learning_rate": 2.8408337127728887e-05, "loss": 0.0856, "step": 29340 }, { "epoch": 1.735967350801443, "grad_norm": 0.23940999805927277, "learning_rate": 2.8407048743636535e-05, "loss": 0.0833, "step": 29350 }, { "epoch": 1.7365588217897914, "grad_norm": 0.2954493761062622, "learning_rate": 2.8405759867548695e-05, "loss": 0.0789, "step": 29360 }, { "epoch": 1.7371502927781393, "grad_norm": 0.26081934571266174, "learning_rate": 2.8404470499512653e-05, "loss": 0.0911, "step": 29370 }, { "epoch": 1.7377417637664871, "grad_norm": 0.291718065738678, "learning_rate": 2.8403180639575735e-05, "loss": 0.0926, "step": 29380 }, { "epoch": 1.7383332347548353, "grad_norm": 0.1970929056406021, "learning_rate": 2.840189028778527e-05, "loss": 0.0892, "step": 29390 }, { "epoch": 1.7389247057431834, "grad_norm": 0.18504412472248077, "learning_rate": 2.840059944418861e-05, "loss": 0.075, "step": 29400 }, { "epoch": 1.7395161767315312, "grad_norm": 0.2807154357433319, "learning_rate": 2.839930810883312e-05, "loss": 0.0725, "step": 29410 }, { "epoch": 1.7401076477198794, "grad_norm": 0.22152866423130035, "learning_rate": 2.8398016281766193e-05, "loss": 0.0768, "step": 29420 }, { "epoch": 1.7406991187082275, "grad_norm": 0.2232944369316101, "learning_rate": 2.839672396303524e-05, "loss": 0.0868, "step": 29430 }, { "epoch": 1.7412905896965754, "grad_norm": 0.1744961142539978, "learning_rate": 2.8395431152687674e-05, "loss": 0.0725, "step": 29440 }, { "epoch": 1.7418820606849232, "grad_norm": 0.22927170991897583, "learning_rate": 2.8394137850770942e-05, "loss": 0.0812, "step": 29450 }, { "epoch": 1.7424735316732716, "grad_norm": 0.33292677998542786, "learning_rate": 2.8392844057332507e-05, "loss": 0.0705, "step": 29460 }, { "epoch": 1.7430650026616195, "grad_norm": 0.2525402009487152, "learning_rate": 2.839154977241984e-05, "loss": 0.0732, "step": 29470 }, { "epoch": 1.7436564736499673, "grad_norm": 0.3557959794998169, "learning_rate": 2.839025499608044e-05, "loss": 0.0925, "step": 29480 }, { "epoch": 1.7442479446383155, "grad_norm": 0.21435587108135223, "learning_rate": 2.8388959728361832e-05, "loss": 0.0815, "step": 29490 }, { "epoch": 1.7448394156266636, "grad_norm": 0.22469905018806458, "learning_rate": 2.838766396931153e-05, "loss": 0.0696, "step": 29500 }, { "epoch": 1.7454308866150114, "grad_norm": 0.20161843299865723, "learning_rate": 2.8386367718977096e-05, "loss": 0.0619, "step": 29510 }, { "epoch": 1.7460223576033596, "grad_norm": 0.3178854286670685, "learning_rate": 2.83850709774061e-05, "loss": 0.0891, "step": 29520 }, { "epoch": 1.7466138285917077, "grad_norm": 0.3124752342700958, "learning_rate": 2.8383773744646112e-05, "loss": 0.0841, "step": 29530 }, { "epoch": 1.7472052995800555, "grad_norm": 0.20332744717597961, "learning_rate": 2.8382476020744754e-05, "loss": 0.0879, "step": 29540 }, { "epoch": 1.7477967705684037, "grad_norm": 0.311849445104599, "learning_rate": 2.8381177805749645e-05, "loss": 0.075, "step": 29550 }, { "epoch": 1.7483882415567518, "grad_norm": 0.32740238308906555, "learning_rate": 2.8379879099708414e-05, "loss": 0.0663, "step": 29560 }, { "epoch": 1.7489797125450997, "grad_norm": 0.2126862108707428, "learning_rate": 2.837857990266873e-05, "loss": 0.0785, "step": 29570 }, { "epoch": 1.7495711835334475, "grad_norm": 0.29318419098854065, "learning_rate": 2.8377280214678266e-05, "loss": 0.078, "step": 29580 }, { "epoch": 1.7501626545217959, "grad_norm": 0.31421536207199097, "learning_rate": 2.8375980035784722e-05, "loss": 0.0839, "step": 29590 }, { "epoch": 1.7507541255101438, "grad_norm": 0.207491934299469, "learning_rate": 2.8374679366035802e-05, "loss": 0.0733, "step": 29600 }, { "epoch": 1.7513455964984916, "grad_norm": 0.2920909523963928, "learning_rate": 2.837337820547924e-05, "loss": 0.0748, "step": 29610 }, { "epoch": 1.7519370674868397, "grad_norm": 1.1799206733703613, "learning_rate": 2.8372076554162785e-05, "loss": 0.091, "step": 29620 }, { "epoch": 1.7525285384751879, "grad_norm": 0.24941182136535645, "learning_rate": 2.8370774412134202e-05, "loss": 0.0851, "step": 29630 }, { "epoch": 1.7531200094635357, "grad_norm": 0.27492019534111023, "learning_rate": 2.8369471779441277e-05, "loss": 0.0802, "step": 29640 }, { "epoch": 1.7537114804518839, "grad_norm": 0.14831943809986115, "learning_rate": 2.8368168656131812e-05, "loss": 0.0709, "step": 29650 }, { "epoch": 1.754302951440232, "grad_norm": 0.2776950001716614, "learning_rate": 2.8366865042253624e-05, "loss": 0.0644, "step": 29660 }, { "epoch": 1.7548944224285798, "grad_norm": 0.32490086555480957, "learning_rate": 2.8365560937854556e-05, "loss": 0.0881, "step": 29670 }, { "epoch": 1.7554858934169277, "grad_norm": 0.25508105754852295, "learning_rate": 2.8364256342982463e-05, "loss": 0.0916, "step": 29680 }, { "epoch": 1.756077364405276, "grad_norm": 0.3109520673751831, "learning_rate": 2.836295125768522e-05, "loss": 0.0671, "step": 29690 }, { "epoch": 1.756668835393624, "grad_norm": 0.24325019121170044, "learning_rate": 2.836164568201072e-05, "loss": 0.0684, "step": 29700 }, { "epoch": 1.7572603063819718, "grad_norm": 0.2618434727191925, "learning_rate": 2.8360339616006866e-05, "loss": 0.0923, "step": 29710 }, { "epoch": 1.75785177737032, "grad_norm": 0.22394289076328278, "learning_rate": 2.8359033059721594e-05, "loss": 0.0849, "step": 29720 }, { "epoch": 1.758443248358668, "grad_norm": 0.222235769033432, "learning_rate": 2.8357726013202854e-05, "loss": 0.0887, "step": 29730 }, { "epoch": 1.759034719347016, "grad_norm": 0.24052773416042328, "learning_rate": 2.8356418476498602e-05, "loss": 0.0843, "step": 29740 }, { "epoch": 1.759626190335364, "grad_norm": 0.1393558531999588, "learning_rate": 2.8355110449656825e-05, "loss": 0.0658, "step": 29750 }, { "epoch": 1.7602176613237122, "grad_norm": 0.48704004287719727, "learning_rate": 2.835380193272552e-05, "loss": 0.0779, "step": 29760 }, { "epoch": 1.76080913231206, "grad_norm": 0.2190777063369751, "learning_rate": 2.835249292575271e-05, "loss": 0.0901, "step": 29770 }, { "epoch": 1.7614006033004082, "grad_norm": 0.22614970803260803, "learning_rate": 2.835118342878643e-05, "loss": 0.0782, "step": 29780 }, { "epoch": 1.7619920742887563, "grad_norm": 0.1698099970817566, "learning_rate": 2.8349873441874727e-05, "loss": 0.0789, "step": 29790 }, { "epoch": 1.7625835452771041, "grad_norm": 0.23573243618011475, "learning_rate": 2.834856296506568e-05, "loss": 0.081, "step": 29800 }, { "epoch": 1.763175016265452, "grad_norm": 0.2786996364593506, "learning_rate": 2.8347251998407386e-05, "loss": 0.0805, "step": 29810 }, { "epoch": 1.7637664872538004, "grad_norm": 0.25901493430137634, "learning_rate": 2.834594054194794e-05, "loss": 0.0755, "step": 29820 }, { "epoch": 1.7643579582421482, "grad_norm": 0.2517664432525635, "learning_rate": 2.8344628595735473e-05, "loss": 0.0871, "step": 29830 }, { "epoch": 1.7649494292304961, "grad_norm": 0.31491461396217346, "learning_rate": 2.8343316159818136e-05, "loss": 0.08, "step": 29840 }, { "epoch": 1.7655409002188442, "grad_norm": 1.5678975582122803, "learning_rate": 2.834200323424408e-05, "loss": 0.064, "step": 29850 }, { "epoch": 1.7661323712071924, "grad_norm": 0.20695938169956207, "learning_rate": 2.834068981906149e-05, "loss": 0.0702, "step": 29860 }, { "epoch": 1.7667238421955402, "grad_norm": 0.30171340703964233, "learning_rate": 2.833937591431857e-05, "loss": 0.0938, "step": 29870 }, { "epoch": 1.7673153131838883, "grad_norm": 0.2238880693912506, "learning_rate": 2.8338061520063527e-05, "loss": 0.0852, "step": 29880 }, { "epoch": 1.7679067841722365, "grad_norm": 0.2965969741344452, "learning_rate": 2.8336746636344603e-05, "loss": 0.0885, "step": 29890 }, { "epoch": 1.7684982551605843, "grad_norm": 0.2452205866575241, "learning_rate": 2.833543126321004e-05, "loss": 0.0752, "step": 29900 }, { "epoch": 1.7690897261489322, "grad_norm": 0.6340268850326538, "learning_rate": 2.8334115400708117e-05, "loss": 0.0609, "step": 29910 }, { "epoch": 1.7696811971372806, "grad_norm": 0.2749587893486023, "learning_rate": 2.8332799048887118e-05, "loss": 0.0921, "step": 29920 }, { "epoch": 1.7702726681256284, "grad_norm": 0.24834755063056946, "learning_rate": 2.833148220779535e-05, "loss": 0.0823, "step": 29930 }, { "epoch": 1.7708641391139763, "grad_norm": 0.4620300829410553, "learning_rate": 2.8330164877481136e-05, "loss": 0.0788, "step": 29940 }, { "epoch": 1.7714556101023244, "grad_norm": 0.2605662941932678, "learning_rate": 2.832884705799282e-05, "loss": 0.0737, "step": 29950 }, { "epoch": 1.7720470810906725, "grad_norm": 0.38702988624572754, "learning_rate": 2.8327528749378755e-05, "loss": 0.0713, "step": 29960 }, { "epoch": 1.7726385520790204, "grad_norm": 0.2501676380634308, "learning_rate": 2.832620995168733e-05, "loss": 0.0879, "step": 29970 }, { "epoch": 1.7732300230673685, "grad_norm": 0.22414378821849823, "learning_rate": 2.8324890664966932e-05, "loss": 0.1031, "step": 29980 }, { "epoch": 1.7738214940557167, "grad_norm": 0.1977868378162384, "learning_rate": 2.832357088926597e-05, "loss": 0.0772, "step": 29990 }, { "epoch": 1.7744129650440645, "grad_norm": 0.2812778055667877, "learning_rate": 2.832225062463289e-05, "loss": 0.0732, "step": 30000 }, { "epoch": 1.7750044360324126, "grad_norm": 0.273708313703537, "learning_rate": 2.832092987111613e-05, "loss": 0.0744, "step": 30010 }, { "epoch": 1.7755959070207608, "grad_norm": 0.2682749032974243, "learning_rate": 2.8319608628764165e-05, "loss": 0.0827, "step": 30020 }, { "epoch": 1.7761873780091086, "grad_norm": 0.27001267671585083, "learning_rate": 2.8318286897625478e-05, "loss": 0.0843, "step": 30030 }, { "epoch": 1.7767788489974565, "grad_norm": 0.2164471596479416, "learning_rate": 2.8316964677748563e-05, "loss": 0.0749, "step": 30040 }, { "epoch": 1.7773703199858049, "grad_norm": 0.19958384335041046, "learning_rate": 2.8315641969181955e-05, "loss": 0.0824, "step": 30050 }, { "epoch": 1.7779617909741527, "grad_norm": 0.22371554374694824, "learning_rate": 2.8314318771974183e-05, "loss": 0.091, "step": 30060 }, { "epoch": 1.7785532619625006, "grad_norm": 0.21416473388671875, "learning_rate": 2.831299508617381e-05, "loss": 0.0887, "step": 30070 }, { "epoch": 1.7791447329508487, "grad_norm": 0.22287233173847198, "learning_rate": 2.8311670911829413e-05, "loss": 0.0951, "step": 30080 }, { "epoch": 1.7797362039391968, "grad_norm": 0.21131780743598938, "learning_rate": 2.8310346248989575e-05, "loss": 0.09, "step": 30090 }, { "epoch": 1.7803276749275447, "grad_norm": 0.22969521582126617, "learning_rate": 2.8309021097702916e-05, "loss": 0.0731, "step": 30100 }, { "epoch": 1.7809191459158928, "grad_norm": 0.3029368221759796, "learning_rate": 2.830769545801806e-05, "loss": 0.0815, "step": 30110 }, { "epoch": 1.781510616904241, "grad_norm": 0.26853859424591064, "learning_rate": 2.8306369329983656e-05, "loss": 0.0926, "step": 30120 }, { "epoch": 1.7821020878925888, "grad_norm": 0.3078211843967438, "learning_rate": 2.830504271364837e-05, "loss": 0.0849, "step": 30130 }, { "epoch": 1.7826935588809367, "grad_norm": 0.4090209901332855, "learning_rate": 2.8303715609060883e-05, "loss": 0.0782, "step": 30140 }, { "epoch": 1.783285029869285, "grad_norm": 0.14139211177825928, "learning_rate": 2.8302388016269893e-05, "loss": 0.0629, "step": 30150 }, { "epoch": 1.783876500857633, "grad_norm": 0.32040712237358093, "learning_rate": 2.830105993532412e-05, "loss": 0.0745, "step": 30160 }, { "epoch": 1.7844679718459808, "grad_norm": 0.2403516322374344, "learning_rate": 2.8299731366272304e-05, "loss": 0.0864, "step": 30170 }, { "epoch": 1.785059442834329, "grad_norm": 0.27556753158569336, "learning_rate": 2.8298402309163193e-05, "loss": 0.0804, "step": 30180 }, { "epoch": 1.785650913822677, "grad_norm": 0.3021678626537323, "learning_rate": 2.8297072764045562e-05, "loss": 0.0851, "step": 30190 }, { "epoch": 1.786242384811025, "grad_norm": 0.12489450722932816, "learning_rate": 2.8295742730968202e-05, "loss": 0.0797, "step": 30200 }, { "epoch": 1.786833855799373, "grad_norm": 0.33536073565483093, "learning_rate": 2.829441220997992e-05, "loss": 0.0699, "step": 30210 }, { "epoch": 1.7874253267877211, "grad_norm": 0.27955538034439087, "learning_rate": 2.8293081201129542e-05, "loss": 0.0912, "step": 30220 }, { "epoch": 1.788016797776069, "grad_norm": 0.28755971789360046, "learning_rate": 2.829174970446591e-05, "loss": 0.0806, "step": 30230 }, { "epoch": 1.7886082687644171, "grad_norm": 0.25836828351020813, "learning_rate": 2.8290417720037893e-05, "loss": 0.0813, "step": 30240 }, { "epoch": 1.7891997397527653, "grad_norm": 0.21935155987739563, "learning_rate": 2.828908524789436e-05, "loss": 0.0771, "step": 30250 }, { "epoch": 1.7897912107411131, "grad_norm": 0.28685855865478516, "learning_rate": 2.8287752288084212e-05, "loss": 0.079, "step": 30260 }, { "epoch": 1.790382681729461, "grad_norm": 0.23329295217990875, "learning_rate": 2.8286418840656368e-05, "loss": 0.0984, "step": 30270 }, { "epoch": 1.7909741527178094, "grad_norm": 0.23117569088935852, "learning_rate": 2.828508490565976e-05, "loss": 0.0988, "step": 30280 }, { "epoch": 1.7915656237061572, "grad_norm": 0.21687456965446472, "learning_rate": 2.8283750483143333e-05, "loss": 0.0864, "step": 30290 }, { "epoch": 1.7921570946945051, "grad_norm": 0.16400647163391113, "learning_rate": 2.8282415573156065e-05, "loss": 0.072, "step": 30300 }, { "epoch": 1.7927485656828532, "grad_norm": 0.23398926854133606, "learning_rate": 2.8281080175746937e-05, "loss": 0.0688, "step": 30310 }, { "epoch": 1.7933400366712013, "grad_norm": 0.25359785556793213, "learning_rate": 2.827974429096496e-05, "loss": 0.0875, "step": 30320 }, { "epoch": 1.7939315076595492, "grad_norm": 0.2117186337709427, "learning_rate": 2.8278407918859145e-05, "loss": 0.0888, "step": 30330 }, { "epoch": 1.7945229786478973, "grad_norm": 0.2515004873275757, "learning_rate": 2.8277071059478547e-05, "loss": 0.0758, "step": 30340 }, { "epoch": 1.7951144496362454, "grad_norm": 0.2191869020462036, "learning_rate": 2.827573371287221e-05, "loss": 0.0693, "step": 30350 }, { "epoch": 1.7957059206245933, "grad_norm": 0.1889289766550064, "learning_rate": 2.8274395879089222e-05, "loss": 0.0653, "step": 30360 }, { "epoch": 1.7962973916129412, "grad_norm": 0.24781669676303864, "learning_rate": 2.827305755817867e-05, "loss": 0.0812, "step": 30370 }, { "epoch": 1.7968888626012895, "grad_norm": 0.2662566900253296, "learning_rate": 2.827171875018967e-05, "loss": 0.0909, "step": 30380 }, { "epoch": 1.7974803335896374, "grad_norm": 0.1819058209657669, "learning_rate": 2.8270379455171354e-05, "loss": 0.0814, "step": 30390 }, { "epoch": 1.7980718045779853, "grad_norm": 0.2691464126110077, "learning_rate": 2.8269039673172865e-05, "loss": 0.076, "step": 30400 }, { "epoch": 1.7986632755663334, "grad_norm": 0.319846510887146, "learning_rate": 2.8267699404243365e-05, "loss": 0.0903, "step": 30410 }, { "epoch": 1.7992547465546815, "grad_norm": 0.22045710682868958, "learning_rate": 2.826635864843205e-05, "loss": 0.0994, "step": 30420 }, { "epoch": 1.7998462175430294, "grad_norm": 0.17943499982357025, "learning_rate": 2.826501740578811e-05, "loss": 0.0806, "step": 30430 }, { "epoch": 1.8004376885313775, "grad_norm": 0.24657957255840302, "learning_rate": 2.826367567636077e-05, "loss": 0.0794, "step": 30440 }, { "epoch": 1.8010291595197256, "grad_norm": 0.16820724308490753, "learning_rate": 2.8262333460199264e-05, "loss": 0.0755, "step": 30450 }, { "epoch": 1.8016206305080735, "grad_norm": 0.22593681514263153, "learning_rate": 2.826099075735285e-05, "loss": 0.0629, "step": 30460 }, { "epoch": 1.8022121014964216, "grad_norm": 0.41453197598457336, "learning_rate": 2.8259647567870798e-05, "loss": 0.0902, "step": 30470 }, { "epoch": 1.8028035724847697, "grad_norm": 0.2696337103843689, "learning_rate": 2.82583038918024e-05, "loss": 0.0859, "step": 30480 }, { "epoch": 1.8033950434731176, "grad_norm": 0.2602221369743347, "learning_rate": 2.8256959729196968e-05, "loss": 0.0875, "step": 30490 }, { "epoch": 1.8039865144614655, "grad_norm": 0.2543225586414337, "learning_rate": 2.8255615080103823e-05, "loss": 0.0743, "step": 30500 }, { "epoch": 1.8045779854498138, "grad_norm": 0.26292863488197327, "learning_rate": 2.8254269944572315e-05, "loss": 0.064, "step": 30510 }, { "epoch": 1.8051694564381617, "grad_norm": 0.18395119905471802, "learning_rate": 2.8252924322651798e-05, "loss": 0.0819, "step": 30520 }, { "epoch": 1.8057609274265096, "grad_norm": 0.2898954749107361, "learning_rate": 2.825157821439166e-05, "loss": 0.0891, "step": 30530 }, { "epoch": 1.8063523984148577, "grad_norm": 0.2323627918958664, "learning_rate": 2.8250231619841293e-05, "loss": 0.0841, "step": 30540 }, { "epoch": 1.8069438694032058, "grad_norm": 0.20633867383003235, "learning_rate": 2.8248884539050117e-05, "loss": 0.0745, "step": 30550 }, { "epoch": 1.8075353403915537, "grad_norm": 0.27257412672042847, "learning_rate": 2.8247536972067562e-05, "loss": 0.0719, "step": 30560 }, { "epoch": 1.8081268113799018, "grad_norm": 0.189887136220932, "learning_rate": 2.8246188918943084e-05, "loss": 0.0859, "step": 30570 }, { "epoch": 1.80871828236825, "grad_norm": 0.22008103132247925, "learning_rate": 2.8244840379726145e-05, "loss": 0.0863, "step": 30580 }, { "epoch": 1.8093097533565978, "grad_norm": 0.1679900735616684, "learning_rate": 2.824349135446624e-05, "loss": 0.074, "step": 30590 }, { "epoch": 1.809901224344946, "grad_norm": 0.43903401494026184, "learning_rate": 2.8242141843212867e-05, "loss": 0.07, "step": 30600 }, { "epoch": 1.810492695333294, "grad_norm": 2.2700905799865723, "learning_rate": 2.8240791846015553e-05, "loss": 0.0711, "step": 30610 }, { "epoch": 1.811084166321642, "grad_norm": 0.24474476277828217, "learning_rate": 2.8239441362923838e-05, "loss": 0.0914, "step": 30620 }, { "epoch": 1.8116756373099898, "grad_norm": 0.2807188630104065, "learning_rate": 2.823809039398728e-05, "loss": 0.0899, "step": 30630 }, { "epoch": 1.812267108298338, "grad_norm": 0.285859614610672, "learning_rate": 2.8236738939255453e-05, "loss": 0.071, "step": 30640 }, { "epoch": 1.812858579286686, "grad_norm": 0.2187737673521042, "learning_rate": 2.823538699877796e-05, "loss": 0.0861, "step": 30650 }, { "epoch": 1.813450050275034, "grad_norm": 0.21331432461738586, "learning_rate": 2.82340345726044e-05, "loss": 0.0771, "step": 30660 }, { "epoch": 1.814041521263382, "grad_norm": 0.32551470398902893, "learning_rate": 2.8232681660784408e-05, "loss": 0.0921, "step": 30670 }, { "epoch": 1.8146329922517301, "grad_norm": 0.30382344126701355, "learning_rate": 2.8231328263367638e-05, "loss": 0.0858, "step": 30680 }, { "epoch": 1.815224463240078, "grad_norm": 0.24752295017242432, "learning_rate": 2.8229974380403745e-05, "loss": 0.0785, "step": 30690 }, { "epoch": 1.8158159342284261, "grad_norm": 0.1954280585050583, "learning_rate": 2.8228620011942413e-05, "loss": 0.0741, "step": 30700 }, { "epoch": 1.8164074052167742, "grad_norm": 0.2982083261013031, "learning_rate": 2.8227265158033354e-05, "loss": 0.0776, "step": 30710 }, { "epoch": 1.8169988762051221, "grad_norm": 0.2689540684223175, "learning_rate": 2.8225909818726273e-05, "loss": 0.0921, "step": 30720 }, { "epoch": 1.81759034719347, "grad_norm": 0.22872553765773773, "learning_rate": 2.8224553994070913e-05, "loss": 0.0801, "step": 30730 }, { "epoch": 1.8181818181818183, "grad_norm": 0.2386481761932373, "learning_rate": 2.8223197684117032e-05, "loss": 0.0735, "step": 30740 }, { "epoch": 1.8187732891701662, "grad_norm": 0.20304542779922485, "learning_rate": 2.82218408889144e-05, "loss": 0.0787, "step": 30750 }, { "epoch": 1.8193647601585141, "grad_norm": 0.25796636939048767, "learning_rate": 2.82204836085128e-05, "loss": 0.0809, "step": 30760 }, { "epoch": 1.8199562311468622, "grad_norm": 0.2870256304740906, "learning_rate": 2.821912584296205e-05, "loss": 0.0812, "step": 30770 }, { "epoch": 1.8205477021352103, "grad_norm": 0.26264333724975586, "learning_rate": 2.821776759231197e-05, "loss": 0.0966, "step": 30780 }, { "epoch": 1.8211391731235582, "grad_norm": 0.22098562121391296, "learning_rate": 2.8216408856612404e-05, "loss": 0.08, "step": 30790 }, { "epoch": 1.8217306441119063, "grad_norm": 0.2144012302160263, "learning_rate": 2.8215049635913215e-05, "loss": 0.081, "step": 30800 }, { "epoch": 1.8223221151002544, "grad_norm": 0.20110011100769043, "learning_rate": 2.8213689930264275e-05, "loss": 0.0795, "step": 30810 }, { "epoch": 1.8229135860886023, "grad_norm": 0.28109562397003174, "learning_rate": 2.8212329739715493e-05, "loss": 0.0897, "step": 30820 }, { "epoch": 1.8235050570769504, "grad_norm": 0.22887635231018066, "learning_rate": 2.8210969064316777e-05, "loss": 0.0817, "step": 30830 }, { "epoch": 1.8240965280652985, "grad_norm": 0.2786496579647064, "learning_rate": 2.8209607904118056e-05, "loss": 0.0651, "step": 30840 }, { "epoch": 1.8246879990536464, "grad_norm": 0.17248857021331787, "learning_rate": 2.8208246259169284e-05, "loss": 0.0646, "step": 30850 }, { "epoch": 1.8252794700419943, "grad_norm": 0.2839887738227844, "learning_rate": 2.820688412952043e-05, "loss": 0.081, "step": 30860 }, { "epoch": 1.8258709410303424, "grad_norm": 0.257275253534317, "learning_rate": 2.820552151522148e-05, "loss": 0.0817, "step": 30870 }, { "epoch": 1.8264624120186905, "grad_norm": 0.1834273636341095, "learning_rate": 2.820415841632243e-05, "loss": 0.0829, "step": 30880 }, { "epoch": 1.8270538830070384, "grad_norm": 0.2943112552165985, "learning_rate": 2.8202794832873315e-05, "loss": 0.0832, "step": 30890 }, { "epoch": 1.8276453539953865, "grad_norm": 0.17442844808101654, "learning_rate": 2.8201430764924165e-05, "loss": 0.0747, "step": 30900 }, { "epoch": 1.8282368249837346, "grad_norm": 0.17094875872135162, "learning_rate": 2.820006621252503e-05, "loss": 0.0605, "step": 30910 }, { "epoch": 1.8288282959720825, "grad_norm": 0.3936079144477844, "learning_rate": 2.8198701175726002e-05, "loss": 0.0825, "step": 30920 }, { "epoch": 1.8294197669604306, "grad_norm": 0.20798274874687195, "learning_rate": 2.819733565457716e-05, "loss": 0.0769, "step": 30930 }, { "epoch": 1.8300112379487787, "grad_norm": 0.3646204173564911, "learning_rate": 2.8195969649128623e-05, "loss": 0.0809, "step": 30940 }, { "epoch": 1.8306027089371266, "grad_norm": 0.1950586587190628, "learning_rate": 2.8194603159430507e-05, "loss": 0.069, "step": 30950 }, { "epoch": 1.8311941799254745, "grad_norm": 0.2626189589500427, "learning_rate": 2.819323618553297e-05, "loss": 0.0859, "step": 30960 }, { "epoch": 1.8317856509138228, "grad_norm": 0.3152479827404022, "learning_rate": 2.8191868727486176e-05, "loss": 0.0903, "step": 30970 }, { "epoch": 1.8323771219021707, "grad_norm": 0.23450665175914764, "learning_rate": 2.8190500785340296e-05, "loss": 0.0769, "step": 30980 }, { "epoch": 1.8329685928905186, "grad_norm": 0.19824464619159698, "learning_rate": 2.8189132359145536e-05, "loss": 0.0714, "step": 30990 }, { "epoch": 1.8335600638788667, "grad_norm": 0.25948160886764526, "learning_rate": 2.818776344895211e-05, "loss": 0.0741, "step": 31000 }, { "epoch": 1.8341515348672148, "grad_norm": 0.2790810167789459, "learning_rate": 2.8186394054810254e-05, "loss": 0.0678, "step": 31010 }, { "epoch": 1.8347430058555627, "grad_norm": 0.34995904564857483, "learning_rate": 2.818502417677022e-05, "loss": 0.0897, "step": 31020 }, { "epoch": 1.8353344768439108, "grad_norm": 0.2530444264411926, "learning_rate": 2.8183653814882274e-05, "loss": 0.0822, "step": 31030 }, { "epoch": 1.835925947832259, "grad_norm": 0.23686310648918152, "learning_rate": 2.8182282969196715e-05, "loss": 0.0805, "step": 31040 }, { "epoch": 1.8365174188206068, "grad_norm": 0.22448155283927917, "learning_rate": 2.818091163976384e-05, "loss": 0.0823, "step": 31050 }, { "epoch": 1.837108889808955, "grad_norm": 0.20877547562122345, "learning_rate": 2.817953982663397e-05, "loss": 0.0773, "step": 31060 }, { "epoch": 1.837700360797303, "grad_norm": 0.2491547167301178, "learning_rate": 2.8178167529857456e-05, "loss": 0.0853, "step": 31070 }, { "epoch": 1.838291831785651, "grad_norm": 0.1949806809425354, "learning_rate": 2.817679474948465e-05, "loss": 0.0799, "step": 31080 }, { "epoch": 1.8388833027739988, "grad_norm": 0.3479134142398834, "learning_rate": 2.817542148556593e-05, "loss": 0.0871, "step": 31090 }, { "epoch": 1.839474773762347, "grad_norm": 0.24711720645427704, "learning_rate": 2.8174047738151687e-05, "loss": 0.0714, "step": 31100 }, { "epoch": 1.840066244750695, "grad_norm": 0.3661980628967285, "learning_rate": 2.817267350729234e-05, "loss": 0.0838, "step": 31110 }, { "epoch": 1.840657715739043, "grad_norm": 0.28557562828063965, "learning_rate": 2.817129879303831e-05, "loss": 0.094, "step": 31120 }, { "epoch": 1.841249186727391, "grad_norm": 0.22968101501464844, "learning_rate": 2.8169923595440054e-05, "loss": 0.1047, "step": 31130 }, { "epoch": 1.8418406577157391, "grad_norm": 0.1809111088514328, "learning_rate": 2.8168547914548027e-05, "loss": 0.0812, "step": 31140 }, { "epoch": 1.842432128704087, "grad_norm": 0.20231647789478302, "learning_rate": 2.8167171750412727e-05, "loss": 0.0702, "step": 31150 }, { "epoch": 1.8430235996924351, "grad_norm": 0.2591203451156616, "learning_rate": 2.8165795103084642e-05, "loss": 0.071, "step": 31160 }, { "epoch": 1.8436150706807832, "grad_norm": 0.42372405529022217, "learning_rate": 2.816441797261429e-05, "loss": 0.0887, "step": 31170 }, { "epoch": 1.8442065416691311, "grad_norm": 0.25331756472587585, "learning_rate": 2.8163040359052215e-05, "loss": 0.0996, "step": 31180 }, { "epoch": 1.844798012657479, "grad_norm": 0.19477489590644836, "learning_rate": 2.8161662262448972e-05, "loss": 0.0718, "step": 31190 }, { "epoch": 1.8453894836458273, "grad_norm": 0.20152048766613007, "learning_rate": 2.816028368285512e-05, "loss": 0.0753, "step": 31200 }, { "epoch": 1.8459809546341752, "grad_norm": 0.32267239689826965, "learning_rate": 2.8158904620321262e-05, "loss": 0.0795, "step": 31210 }, { "epoch": 1.846572425622523, "grad_norm": 0.19644010066986084, "learning_rate": 2.8157525074898e-05, "loss": 0.0864, "step": 31220 }, { "epoch": 1.8471638966108712, "grad_norm": 0.21545056998729706, "learning_rate": 2.8156145046635953e-05, "loss": 0.0791, "step": 31230 }, { "epoch": 1.8477553675992193, "grad_norm": 0.18588368594646454, "learning_rate": 2.8154764535585775e-05, "loss": 0.0735, "step": 31240 }, { "epoch": 1.8483468385875672, "grad_norm": 0.36803314089775085, "learning_rate": 2.815338354179812e-05, "loss": 0.0688, "step": 31250 }, { "epoch": 1.8489383095759153, "grad_norm": 0.2929094135761261, "learning_rate": 2.815200206532366e-05, "loss": 0.0759, "step": 31260 }, { "epoch": 1.8495297805642634, "grad_norm": 0.23765796422958374, "learning_rate": 2.8150620106213106e-05, "loss": 0.0906, "step": 31270 }, { "epoch": 1.8501212515526113, "grad_norm": 0.29193368554115295, "learning_rate": 2.814923766451716e-05, "loss": 0.0902, "step": 31280 }, { "epoch": 1.8507127225409594, "grad_norm": 0.49423912167549133, "learning_rate": 2.8147854740286555e-05, "loss": 0.0783, "step": 31290 }, { "epoch": 1.8513041935293075, "grad_norm": 0.28525498509407043, "learning_rate": 2.8146471333572044e-05, "loss": 0.0793, "step": 31300 }, { "epoch": 1.8518956645176554, "grad_norm": 0.2831168472766876, "learning_rate": 2.8145087444424388e-05, "loss": 0.0786, "step": 31310 }, { "epoch": 1.8524871355060033, "grad_norm": 0.1887740045785904, "learning_rate": 2.8143703072894372e-05, "loss": 0.0832, "step": 31320 }, { "epoch": 1.8530786064943514, "grad_norm": 0.3354043960571289, "learning_rate": 2.8142318219032802e-05, "loss": 0.0846, "step": 31330 }, { "epoch": 1.8536700774826995, "grad_norm": 0.2095494121313095, "learning_rate": 2.8140932882890496e-05, "loss": 0.0781, "step": 31340 }, { "epoch": 1.8542615484710474, "grad_norm": 0.17088592052459717, "learning_rate": 2.813954706451829e-05, "loss": 0.0811, "step": 31350 }, { "epoch": 1.8548530194593955, "grad_norm": 0.24626773595809937, "learning_rate": 2.8138160763967037e-05, "loss": 0.0713, "step": 31360 }, { "epoch": 1.8554444904477436, "grad_norm": 0.2550351023674011, "learning_rate": 2.8136773981287613e-05, "loss": 0.1028, "step": 31370 }, { "epoch": 1.8560359614360915, "grad_norm": 0.28781744837760925, "learning_rate": 2.813538671653091e-05, "loss": 0.0887, "step": 31380 }, { "epoch": 1.8566274324244396, "grad_norm": 0.2616073787212372, "learning_rate": 2.8133998969747835e-05, "loss": 0.0738, "step": 31390 }, { "epoch": 1.8572189034127877, "grad_norm": 0.22233663499355316, "learning_rate": 2.813261074098931e-05, "loss": 0.0778, "step": 31400 }, { "epoch": 1.8578103744011356, "grad_norm": 0.2545117437839508, "learning_rate": 2.8131222030306283e-05, "loss": 0.0613, "step": 31410 }, { "epoch": 1.8584018453894835, "grad_norm": 0.2742786109447479, "learning_rate": 2.8129832837749714e-05, "loss": 0.0906, "step": 31420 }, { "epoch": 1.8589933163778318, "grad_norm": 0.21986903250217438, "learning_rate": 2.812844316337058e-05, "loss": 0.0753, "step": 31430 }, { "epoch": 1.8595847873661797, "grad_norm": 0.21106329560279846, "learning_rate": 2.8127053007219884e-05, "loss": 0.0814, "step": 31440 }, { "epoch": 1.8601762583545276, "grad_norm": 0.19232907891273499, "learning_rate": 2.812566236934863e-05, "loss": 0.0613, "step": 31450 }, { "epoch": 1.8607677293428757, "grad_norm": 0.34337571263313293, "learning_rate": 2.8124271249807856e-05, "loss": 0.0769, "step": 31460 }, { "epoch": 1.8613592003312238, "grad_norm": 0.3260633945465088, "learning_rate": 2.8122879648648616e-05, "loss": 0.0967, "step": 31470 }, { "epoch": 1.8619506713195717, "grad_norm": 0.2818242311477661, "learning_rate": 2.8121487565921966e-05, "loss": 0.0898, "step": 31480 }, { "epoch": 1.8625421423079198, "grad_norm": 0.22625431418418884, "learning_rate": 2.8120095001679e-05, "loss": 0.0853, "step": 31490 }, { "epoch": 1.863133613296268, "grad_norm": 0.5179388523101807, "learning_rate": 2.811870195597082e-05, "loss": 0.0716, "step": 31500 }, { "epoch": 1.8637250842846158, "grad_norm": 0.21773073077201843, "learning_rate": 2.811730842884854e-05, "loss": 0.0723, "step": 31510 }, { "epoch": 1.864316555272964, "grad_norm": 0.21425776183605194, "learning_rate": 2.8115914420363303e-05, "loss": 0.0885, "step": 31520 }, { "epoch": 1.864908026261312, "grad_norm": 0.23381304740905762, "learning_rate": 2.811451993056626e-05, "loss": 0.0828, "step": 31530 }, { "epoch": 1.86549949724966, "grad_norm": 0.2151535451412201, "learning_rate": 2.8113124959508595e-05, "loss": 0.088, "step": 31540 }, { "epoch": 1.8660909682380078, "grad_norm": 0.2119532972574234, "learning_rate": 2.811172950724149e-05, "loss": 0.0817, "step": 31550 }, { "epoch": 1.866682439226356, "grad_norm": 0.26467275619506836, "learning_rate": 2.8110333573816156e-05, "loss": 0.0727, "step": 31560 }, { "epoch": 1.867273910214704, "grad_norm": 0.2668033838272095, "learning_rate": 2.810893715928382e-05, "loss": 0.0904, "step": 31570 }, { "epoch": 1.867865381203052, "grad_norm": 0.2731895446777344, "learning_rate": 2.8107540263695716e-05, "loss": 0.0959, "step": 31580 }, { "epoch": 1.8684568521914, "grad_norm": 0.4006766974925995, "learning_rate": 2.8106142887103125e-05, "loss": 0.0869, "step": 31590 }, { "epoch": 1.8690483231797481, "grad_norm": 0.22106832265853882, "learning_rate": 2.810474502955731e-05, "loss": 0.0747, "step": 31600 }, { "epoch": 1.869639794168096, "grad_norm": 0.23422002792358398, "learning_rate": 2.810334669110957e-05, "loss": 0.0822, "step": 31610 }, { "epoch": 1.8702312651564441, "grad_norm": 0.25434938073158264, "learning_rate": 2.810194787181123e-05, "loss": 0.0924, "step": 31620 }, { "epoch": 1.8708227361447922, "grad_norm": 0.2944895029067993, "learning_rate": 2.810054857171361e-05, "loss": 0.0851, "step": 31630 }, { "epoch": 1.87141420713314, "grad_norm": 0.27997344732284546, "learning_rate": 2.809914879086807e-05, "loss": 0.0823, "step": 31640 }, { "epoch": 1.872005678121488, "grad_norm": 0.1529153734445572, "learning_rate": 2.8097748529325968e-05, "loss": 0.0643, "step": 31650 }, { "epoch": 1.8725971491098363, "grad_norm": 0.35458752512931824, "learning_rate": 2.809634778713869e-05, "loss": 0.0732, "step": 31660 }, { "epoch": 1.8731886200981842, "grad_norm": 0.2640478312969208, "learning_rate": 2.809494656435765e-05, "loss": 0.0869, "step": 31670 }, { "epoch": 1.873780091086532, "grad_norm": 0.23238149285316467, "learning_rate": 2.8093544861034254e-05, "loss": 0.0729, "step": 31680 }, { "epoch": 1.8743715620748802, "grad_norm": 0.288754940032959, "learning_rate": 2.809214267721995e-05, "loss": 0.0756, "step": 31690 }, { "epoch": 1.8749630330632283, "grad_norm": 0.21728385984897614, "learning_rate": 2.809074001296619e-05, "loss": 0.074, "step": 31700 }, { "epoch": 1.8755545040515762, "grad_norm": 0.2720482051372528, "learning_rate": 2.8089336868324446e-05, "loss": 0.0709, "step": 31710 }, { "epoch": 1.8761459750399243, "grad_norm": 0.2570580542087555, "learning_rate": 2.8087933243346212e-05, "loss": 0.0832, "step": 31720 }, { "epoch": 1.8767374460282724, "grad_norm": 0.20284238457679749, "learning_rate": 2.8086529138082995e-05, "loss": 0.0875, "step": 31730 }, { "epoch": 1.8773289170166203, "grad_norm": 0.281078964471817, "learning_rate": 2.8085124552586316e-05, "loss": 0.0924, "step": 31740 }, { "epoch": 1.8779203880049684, "grad_norm": 0.2698196470737457, "learning_rate": 2.808371948690773e-05, "loss": 0.079, "step": 31750 }, { "epoch": 1.8785118589933165, "grad_norm": 0.25665584206581116, "learning_rate": 2.8082313941098788e-05, "loss": 0.0682, "step": 31760 }, { "epoch": 1.8791033299816644, "grad_norm": 0.22195088863372803, "learning_rate": 2.8080907915211073e-05, "loss": 0.0901, "step": 31770 }, { "epoch": 1.8796948009700123, "grad_norm": 0.266935259103775, "learning_rate": 2.8079501409296183e-05, "loss": 0.0849, "step": 31780 }, { "epoch": 1.8802862719583604, "grad_norm": 0.22991105914115906, "learning_rate": 2.807809442340573e-05, "loss": 0.0736, "step": 31790 }, { "epoch": 1.8808777429467085, "grad_norm": 0.31136876344680786, "learning_rate": 2.8076686957591347e-05, "loss": 0.0753, "step": 31800 }, { "epoch": 1.8814692139350564, "grad_norm": 0.5241884589195251, "learning_rate": 2.8075279011904683e-05, "loss": 0.0758, "step": 31810 }, { "epoch": 1.8820606849234045, "grad_norm": 0.2394225001335144, "learning_rate": 2.8073870586397406e-05, "loss": 0.0922, "step": 31820 }, { "epoch": 1.8826521559117526, "grad_norm": 0.22825446724891663, "learning_rate": 2.80724616811212e-05, "loss": 0.0882, "step": 31830 }, { "epoch": 1.8832436269001005, "grad_norm": 0.21964378654956818, "learning_rate": 2.8071052296127765e-05, "loss": 0.0845, "step": 31840 }, { "epoch": 1.8838350978884486, "grad_norm": 0.21377815306186676, "learning_rate": 2.8069642431468827e-05, "loss": 0.0684, "step": 31850 }, { "epoch": 1.8844265688767967, "grad_norm": 0.27950286865234375, "learning_rate": 2.8068232087196114e-05, "loss": 0.0763, "step": 31860 }, { "epoch": 1.8850180398651446, "grad_norm": 0.9327775835990906, "learning_rate": 2.806682126336139e-05, "loss": 0.0787, "step": 31870 }, { "epoch": 1.8856095108534925, "grad_norm": 0.23541386425495148, "learning_rate": 2.8065409960016428e-05, "loss": 0.069, "step": 31880 }, { "epoch": 1.8862009818418408, "grad_norm": 0.21006172895431519, "learning_rate": 2.8063998177213007e-05, "loss": 0.0768, "step": 31890 }, { "epoch": 1.8867924528301887, "grad_norm": 0.2601150870323181, "learning_rate": 2.806258591500295e-05, "loss": 0.0704, "step": 31900 }, { "epoch": 1.8873839238185366, "grad_norm": 0.2564570903778076, "learning_rate": 2.806117317343807e-05, "loss": 0.0822, "step": 31910 }, { "epoch": 1.8879753948068847, "grad_norm": 0.2143567055463791, "learning_rate": 2.8059759952570213e-05, "loss": 0.0814, "step": 31920 }, { "epoch": 1.8885668657952328, "grad_norm": 0.2224491983652115, "learning_rate": 2.805834625245125e-05, "loss": 0.0759, "step": 31930 }, { "epoch": 1.8891583367835807, "grad_norm": 0.20841029286384583, "learning_rate": 2.8056932073133045e-05, "loss": 0.0707, "step": 31940 }, { "epoch": 1.8897498077719288, "grad_norm": 1.201770305633545, "learning_rate": 2.80555174146675e-05, "loss": 0.0741, "step": 31950 }, { "epoch": 1.890341278760277, "grad_norm": 0.4207596778869629, "learning_rate": 2.805410227710653e-05, "loss": 0.0718, "step": 31960 }, { "epoch": 1.8909327497486248, "grad_norm": 0.23242387175559998, "learning_rate": 2.805268666050206e-05, "loss": 0.0952, "step": 31970 }, { "epoch": 1.891524220736973, "grad_norm": 0.23833099007606506, "learning_rate": 2.8051270564906047e-05, "loss": 0.0877, "step": 31980 }, { "epoch": 1.892115691725321, "grad_norm": 0.20872916281223297, "learning_rate": 2.8049853990370453e-05, "loss": 0.0743, "step": 31990 }, { "epoch": 1.892707162713669, "grad_norm": 0.2642126679420471, "learning_rate": 2.804843693694726e-05, "loss": 0.0849, "step": 32000 }, { "epoch": 1.8932986337020168, "grad_norm": 0.4927157163619995, "learning_rate": 2.8047019404688472e-05, "loss": 0.0698, "step": 32010 }, { "epoch": 1.893890104690365, "grad_norm": 0.24387937784194946, "learning_rate": 2.8045601393646105e-05, "loss": 0.0923, "step": 32020 }, { "epoch": 1.894481575678713, "grad_norm": 0.24022908508777618, "learning_rate": 2.8044182903872198e-05, "loss": 0.0893, "step": 32030 }, { "epoch": 1.895073046667061, "grad_norm": 0.20068368315696716, "learning_rate": 2.8042763935418807e-05, "loss": 0.0713, "step": 32040 }, { "epoch": 1.895664517655409, "grad_norm": 0.19078713655471802, "learning_rate": 2.8041344488338e-05, "loss": 0.063, "step": 32050 }, { "epoch": 1.896255988643757, "grad_norm": 0.32068493962287903, "learning_rate": 2.8039924562681864e-05, "loss": 0.0744, "step": 32060 }, { "epoch": 1.896847459632105, "grad_norm": 0.20932389795780182, "learning_rate": 2.8038504158502508e-05, "loss": 0.0821, "step": 32070 }, { "epoch": 1.897438930620453, "grad_norm": 0.21828347444534302, "learning_rate": 2.8037083275852058e-05, "loss": 0.0753, "step": 32080 }, { "epoch": 1.8980304016088012, "grad_norm": 0.20566125214099884, "learning_rate": 2.8035661914782654e-05, "loss": 0.0779, "step": 32090 }, { "epoch": 1.898621872597149, "grad_norm": 0.22495123744010925, "learning_rate": 2.803424007534646e-05, "loss": 0.0826, "step": 32100 }, { "epoch": 1.899213343585497, "grad_norm": 0.19142025709152222, "learning_rate": 2.8032817757595647e-05, "loss": 0.0753, "step": 32110 }, { "epoch": 1.8998048145738453, "grad_norm": 0.2743467688560486, "learning_rate": 2.803139496158241e-05, "loss": 0.0979, "step": 32120 }, { "epoch": 1.9003962855621932, "grad_norm": 0.2812301516532898, "learning_rate": 2.8029971687358965e-05, "loss": 0.0817, "step": 32130 }, { "epoch": 1.900987756550541, "grad_norm": 0.2214442938566208, "learning_rate": 2.8028547934977535e-05, "loss": 0.0878, "step": 32140 }, { "epoch": 1.9015792275388892, "grad_norm": 0.17671862244606018, "learning_rate": 2.8027123704490375e-05, "loss": 0.0755, "step": 32150 }, { "epoch": 1.9021706985272373, "grad_norm": 0.47600024938583374, "learning_rate": 2.8025698995949748e-05, "loss": 0.079, "step": 32160 }, { "epoch": 1.9027621695155852, "grad_norm": 0.3295838236808777, "learning_rate": 2.8024273809407925e-05, "loss": 0.0846, "step": 32170 }, { "epoch": 1.9033536405039333, "grad_norm": 0.2647828161716461, "learning_rate": 2.8022848144917224e-05, "loss": 0.0829, "step": 32180 }, { "epoch": 1.9039451114922814, "grad_norm": 0.25552263855934143, "learning_rate": 2.802142200252995e-05, "loss": 0.0849, "step": 32190 }, { "epoch": 1.9045365824806293, "grad_norm": 0.34581056237220764, "learning_rate": 2.8019995382298437e-05, "loss": 0.08, "step": 32200 }, { "epoch": 1.9051280534689774, "grad_norm": 0.21570026874542236, "learning_rate": 2.8018568284275047e-05, "loss": 0.0873, "step": 32210 }, { "epoch": 1.9057195244573255, "grad_norm": 0.1958141326904297, "learning_rate": 2.801714070851214e-05, "loss": 0.0928, "step": 32220 }, { "epoch": 1.9063109954456734, "grad_norm": 0.4220956861972809, "learning_rate": 2.801571265506211e-05, "loss": 0.0927, "step": 32230 }, { "epoch": 1.9069024664340213, "grad_norm": 0.17636364698410034, "learning_rate": 2.8014284123977363e-05, "loss": 0.0817, "step": 32240 }, { "epoch": 1.9074939374223694, "grad_norm": 0.21833212673664093, "learning_rate": 2.8012855115310313e-05, "loss": 0.0698, "step": 32250 }, { "epoch": 1.9080854084107175, "grad_norm": 0.3404870629310608, "learning_rate": 2.801142562911341e-05, "loss": 0.0816, "step": 32260 }, { "epoch": 1.9086768793990654, "grad_norm": 0.2710515558719635, "learning_rate": 2.8009995665439103e-05, "loss": 0.0814, "step": 32270 }, { "epoch": 1.9092683503874135, "grad_norm": 0.3287609815597534, "learning_rate": 2.800856522433987e-05, "loss": 0.0923, "step": 32280 }, { "epoch": 1.9098598213757616, "grad_norm": 0.2726863920688629, "learning_rate": 2.8007134305868203e-05, "loss": 0.0793, "step": 32290 }, { "epoch": 1.9104512923641095, "grad_norm": 0.2152744084596634, "learning_rate": 2.8005702910076617e-05, "loss": 0.0679, "step": 32300 }, { "epoch": 1.9110427633524576, "grad_norm": 0.2665676176548004, "learning_rate": 2.8004271037017636e-05, "loss": 0.0768, "step": 32310 }, { "epoch": 1.9116342343408057, "grad_norm": 0.24649165570735931, "learning_rate": 2.80028386867438e-05, "loss": 0.0866, "step": 32320 }, { "epoch": 1.9122257053291536, "grad_norm": 0.2325955331325531, "learning_rate": 2.8001405859307684e-05, "loss": 0.0815, "step": 32330 }, { "epoch": 1.9128171763175015, "grad_norm": 0.22711457312107086, "learning_rate": 2.799997255476186e-05, "loss": 0.0776, "step": 32340 }, { "epoch": 1.9134086473058498, "grad_norm": 0.21032023429870605, "learning_rate": 2.799853877315892e-05, "loss": 0.057, "step": 32350 }, { "epoch": 1.9140001182941977, "grad_norm": 0.2925427258014679, "learning_rate": 2.7997104514551493e-05, "loss": 0.0701, "step": 32360 }, { "epoch": 1.9145915892825456, "grad_norm": 0.3511250913143158, "learning_rate": 2.79956697789922e-05, "loss": 0.0865, "step": 32370 }, { "epoch": 1.9151830602708937, "grad_norm": 0.1505398452281952, "learning_rate": 2.7994234566533697e-05, "loss": 0.0866, "step": 32380 }, { "epoch": 1.9157745312592418, "grad_norm": 0.20352868735790253, "learning_rate": 2.799279887722865e-05, "loss": 0.0852, "step": 32390 }, { "epoch": 1.9163660022475897, "grad_norm": 0.1406812220811844, "learning_rate": 2.7991362711129747e-05, "loss": 0.0731, "step": 32400 }, { "epoch": 1.9169574732359378, "grad_norm": 0.26730239391326904, "learning_rate": 2.7989926068289682e-05, "loss": 0.0818, "step": 32410 }, { "epoch": 1.917548944224286, "grad_norm": 0.26420944929122925, "learning_rate": 2.798848894876119e-05, "loss": 0.0789, "step": 32420 }, { "epoch": 1.9181404152126338, "grad_norm": 0.2500435411930084, "learning_rate": 2.798705135259699e-05, "loss": 0.0819, "step": 32430 }, { "epoch": 1.918731886200982, "grad_norm": 0.22269798815250397, "learning_rate": 2.7985613279849854e-05, "loss": 0.0702, "step": 32440 }, { "epoch": 1.91932335718933, "grad_norm": 0.16615238785743713, "learning_rate": 2.798417473057255e-05, "loss": 0.0676, "step": 32450 }, { "epoch": 1.919914828177678, "grad_norm": 0.33316996693611145, "learning_rate": 2.7982735704817858e-05, "loss": 0.0704, "step": 32460 }, { "epoch": 1.9205062991660258, "grad_norm": 0.4328831136226654, "learning_rate": 2.7981296202638597e-05, "loss": 0.0949, "step": 32470 }, { "epoch": 1.9210977701543739, "grad_norm": 0.45011693239212036, "learning_rate": 2.7979856224087587e-05, "loss": 0.0958, "step": 32480 }, { "epoch": 1.921689241142722, "grad_norm": 0.27826157212257385, "learning_rate": 2.7978415769217674e-05, "loss": 0.0915, "step": 32490 }, { "epoch": 1.9222807121310699, "grad_norm": 0.19748994708061218, "learning_rate": 2.797697483808172e-05, "loss": 0.0627, "step": 32500 }, { "epoch": 1.922872183119418, "grad_norm": 0.25902923941612244, "learning_rate": 2.7975533430732588e-05, "loss": 0.0663, "step": 32510 }, { "epoch": 1.923463654107766, "grad_norm": 0.32259947061538696, "learning_rate": 2.7974091547223192e-05, "loss": 0.0875, "step": 32520 }, { "epoch": 1.924055125096114, "grad_norm": 0.2177666574716568, "learning_rate": 2.7972649187606435e-05, "loss": 0.0734, "step": 32530 }, { "epoch": 1.924646596084462, "grad_norm": 0.24915629625320435, "learning_rate": 2.797120635193525e-05, "loss": 0.0757, "step": 32540 }, { "epoch": 1.9252380670728102, "grad_norm": 0.24355275928974152, "learning_rate": 2.796976304026258e-05, "loss": 0.0803, "step": 32550 }, { "epoch": 1.925829538061158, "grad_norm": 0.25485727190971375, "learning_rate": 2.796831925264139e-05, "loss": 0.0804, "step": 32560 }, { "epoch": 1.926421009049506, "grad_norm": 0.17906315624713898, "learning_rate": 2.796687498912467e-05, "loss": 0.0868, "step": 32570 }, { "epoch": 1.9270124800378543, "grad_norm": 0.22153976559638977, "learning_rate": 2.7965430249765412e-05, "loss": 0.0918, "step": 32580 }, { "epoch": 1.9276039510262022, "grad_norm": 0.20794345438480377, "learning_rate": 2.796398503461663e-05, "loss": 0.0906, "step": 32590 }, { "epoch": 1.92819542201455, "grad_norm": 0.4973081052303314, "learning_rate": 2.7962539343731376e-05, "loss": 0.067, "step": 32600 }, { "epoch": 1.9287868930028982, "grad_norm": 0.2240123748779297, "learning_rate": 2.7961093177162684e-05, "loss": 0.0721, "step": 32610 }, { "epoch": 1.9293783639912463, "grad_norm": 0.22059756517410278, "learning_rate": 2.7959646534963633e-05, "loss": 0.0865, "step": 32620 }, { "epoch": 1.9299698349795942, "grad_norm": 0.2886395752429962, "learning_rate": 2.7958199417187305e-05, "loss": 0.0872, "step": 32630 }, { "epoch": 1.9305613059679423, "grad_norm": 0.23753120005130768, "learning_rate": 2.795675182388681e-05, "loss": 0.0851, "step": 32640 }, { "epoch": 1.9311527769562904, "grad_norm": 0.2968668043613434, "learning_rate": 2.7955303755115268e-05, "loss": 0.0721, "step": 32650 }, { "epoch": 1.9317442479446383, "grad_norm": 0.3604702055454254, "learning_rate": 2.7953855210925814e-05, "loss": 0.0789, "step": 32660 }, { "epoch": 1.9323357189329864, "grad_norm": 0.2268953025341034, "learning_rate": 2.795240619137161e-05, "loss": 0.0957, "step": 32670 }, { "epoch": 1.9329271899213345, "grad_norm": 0.2738328278064728, "learning_rate": 2.7950956696505828e-05, "loss": 0.0794, "step": 32680 }, { "epoch": 1.9335186609096824, "grad_norm": 0.35266929864883423, "learning_rate": 2.7949506726381663e-05, "loss": 0.0695, "step": 32690 }, { "epoch": 1.9341101318980303, "grad_norm": 0.12914714217185974, "learning_rate": 2.794805628105232e-05, "loss": 0.0657, "step": 32700 }, { "epoch": 1.9347016028863784, "grad_norm": 0.6734729409217834, "learning_rate": 2.7946605360571024e-05, "loss": 0.0818, "step": 32710 }, { "epoch": 1.9352930738747265, "grad_norm": 0.2861590087413788, "learning_rate": 2.794515396499103e-05, "loss": 0.0839, "step": 32720 }, { "epoch": 1.9358845448630744, "grad_norm": 0.20242413878440857, "learning_rate": 2.794370209436559e-05, "loss": 0.0887, "step": 32730 }, { "epoch": 1.9364760158514225, "grad_norm": 0.26270490884780884, "learning_rate": 2.7942249748747983e-05, "loss": 0.0707, "step": 32740 }, { "epoch": 1.9370674868397706, "grad_norm": 0.1809258908033371, "learning_rate": 2.7940796928191508e-05, "loss": 0.0729, "step": 32750 }, { "epoch": 1.9376589578281185, "grad_norm": 0.2461245208978653, "learning_rate": 2.793934363274948e-05, "loss": 0.0818, "step": 32760 }, { "epoch": 1.9382504288164666, "grad_norm": 0.23200541734695435, "learning_rate": 2.7937889862475227e-05, "loss": 0.0912, "step": 32770 }, { "epoch": 1.9388418998048147, "grad_norm": 0.2617327868938446, "learning_rate": 2.7936435617422103e-05, "loss": 0.0921, "step": 32780 }, { "epoch": 1.9394333707931626, "grad_norm": 0.3361920416355133, "learning_rate": 2.7934980897643465e-05, "loss": 0.0775, "step": 32790 }, { "epoch": 1.9400248417815105, "grad_norm": 0.2668512761592865, "learning_rate": 2.7933525703192707e-05, "loss": 0.0693, "step": 32800 }, { "epoch": 1.9406163127698588, "grad_norm": 0.318674772977829, "learning_rate": 2.793207003412322e-05, "loss": 0.0876, "step": 32810 }, { "epoch": 1.9412077837582067, "grad_norm": 0.2986931800842285, "learning_rate": 2.7930613890488433e-05, "loss": 0.0933, "step": 32820 }, { "epoch": 1.9417992547465546, "grad_norm": 0.30040717124938965, "learning_rate": 2.7929157272341773e-05, "loss": 0.0872, "step": 32830 }, { "epoch": 1.9423907257349027, "grad_norm": 0.25898224115371704, "learning_rate": 2.7927700179736692e-05, "loss": 0.082, "step": 32840 }, { "epoch": 1.9429821967232508, "grad_norm": 0.19209370017051697, "learning_rate": 2.792624261272667e-05, "loss": 0.072, "step": 32850 }, { "epoch": 1.9435736677115987, "grad_norm": 0.3811415135860443, "learning_rate": 2.792478457136519e-05, "loss": 0.0871, "step": 32860 }, { "epoch": 1.9441651386999468, "grad_norm": 0.29509055614471436, "learning_rate": 2.7923326055705757e-05, "loss": 0.0922, "step": 32870 }, { "epoch": 1.944756609688295, "grad_norm": 0.2637777030467987, "learning_rate": 2.7921867065801894e-05, "loss": 0.0959, "step": 32880 }, { "epoch": 1.9453480806766428, "grad_norm": 0.29825201630592346, "learning_rate": 2.7920407601707144e-05, "loss": 0.0788, "step": 32890 }, { "epoch": 1.9459395516649909, "grad_norm": 0.2673318684101105, "learning_rate": 2.7918947663475056e-05, "loss": 0.0683, "step": 32900 }, { "epoch": 1.946531022653339, "grad_norm": 0.47716039419174194, "learning_rate": 2.7917487251159214e-05, "loss": 0.08, "step": 32910 }, { "epoch": 1.9471224936416869, "grad_norm": 0.38047635555267334, "learning_rate": 2.7916026364813207e-05, "loss": 0.0719, "step": 32920 }, { "epoch": 1.9477139646300348, "grad_norm": 0.3043347895145416, "learning_rate": 2.7914565004490646e-05, "loss": 0.0754, "step": 32930 }, { "epoch": 1.9483054356183829, "grad_norm": 0.23030027747154236, "learning_rate": 2.791310317024516e-05, "loss": 0.0791, "step": 32940 }, { "epoch": 1.948896906606731, "grad_norm": 0.18391168117523193, "learning_rate": 2.7911640862130386e-05, "loss": 0.0713, "step": 32950 }, { "epoch": 1.9494883775950789, "grad_norm": 0.28797388076782227, "learning_rate": 2.7910178080199996e-05, "loss": 0.073, "step": 32960 }, { "epoch": 1.950079848583427, "grad_norm": 0.3047351539134979, "learning_rate": 2.7908714824507662e-05, "loss": 0.1059, "step": 32970 }, { "epoch": 1.950671319571775, "grad_norm": 0.19717153906822205, "learning_rate": 2.790725109510708e-05, "loss": 0.0891, "step": 32980 }, { "epoch": 1.951262790560123, "grad_norm": 0.29880183935165405, "learning_rate": 2.7905786892051974e-05, "loss": 0.0719, "step": 32990 }, { "epoch": 1.951854261548471, "grad_norm": 0.13701768219470978, "learning_rate": 2.7904322215396064e-05, "loss": 0.0695, "step": 33000 }, { "epoch": 1.9524457325368192, "grad_norm": 0.26398685574531555, "learning_rate": 2.790285706519311e-05, "loss": 0.0727, "step": 33010 }, { "epoch": 1.953037203525167, "grad_norm": 0.16707783937454224, "learning_rate": 2.7901391441496864e-05, "loss": 0.0802, "step": 33020 }, { "epoch": 1.953628674513515, "grad_norm": 0.3397873044013977, "learning_rate": 2.789992534436112e-05, "loss": 0.0844, "step": 33030 }, { "epoch": 1.9542201455018633, "grad_norm": 0.23688843846321106, "learning_rate": 2.789845877383968e-05, "loss": 0.074, "step": 33040 }, { "epoch": 1.9548116164902112, "grad_norm": 0.4102155268192291, "learning_rate": 2.789699172998636e-05, "loss": 0.0846, "step": 33050 }, { "epoch": 1.955403087478559, "grad_norm": 0.38548967242240906, "learning_rate": 2.7895524212854992e-05, "loss": 0.0669, "step": 33060 }, { "epoch": 1.9559945584669072, "grad_norm": 0.2436135858297348, "learning_rate": 2.7894056222499435e-05, "loss": 0.0878, "step": 33070 }, { "epoch": 1.9565860294552553, "grad_norm": 0.29189926385879517, "learning_rate": 2.789258775897355e-05, "loss": 0.0861, "step": 33080 }, { "epoch": 1.9571775004436032, "grad_norm": 0.15111416578292847, "learning_rate": 2.789111882233124e-05, "loss": 0.067, "step": 33090 }, { "epoch": 1.9577689714319513, "grad_norm": 0.1914990395307541, "learning_rate": 2.7889649412626397e-05, "loss": 0.0834, "step": 33100 }, { "epoch": 1.9583604424202994, "grad_norm": 0.19412317872047424, "learning_rate": 2.788817952991295e-05, "loss": 0.0833, "step": 33110 }, { "epoch": 1.9589519134086473, "grad_norm": 0.3096545338630676, "learning_rate": 2.7886709174244838e-05, "loss": 0.0986, "step": 33120 }, { "epoch": 1.9595433843969954, "grad_norm": 0.1870594322681427, "learning_rate": 2.7885238345676015e-05, "loss": 0.0858, "step": 33130 }, { "epoch": 1.9601348553853435, "grad_norm": 0.3586656451225281, "learning_rate": 2.7883767044260462e-05, "loss": 0.0885, "step": 33140 }, { "epoch": 1.9607263263736914, "grad_norm": 0.21367691457271576, "learning_rate": 2.7882295270052168e-05, "loss": 0.0719, "step": 33150 }, { "epoch": 1.9613177973620393, "grad_norm": 0.21324028074741364, "learning_rate": 2.788082302310514e-05, "loss": 0.0713, "step": 33160 }, { "epoch": 1.9619092683503874, "grad_norm": 0.22886621952056885, "learning_rate": 2.7879350303473402e-05, "loss": 0.093, "step": 33170 }, { "epoch": 1.9625007393387355, "grad_norm": 0.16942475736141205, "learning_rate": 2.787787711121101e-05, "loss": 0.0896, "step": 33180 }, { "epoch": 1.9630922103270834, "grad_norm": 0.24372942745685577, "learning_rate": 2.7876403446372012e-05, "loss": 0.0809, "step": 33190 }, { "epoch": 1.9636836813154315, "grad_norm": 0.24979716539382935, "learning_rate": 2.78749293090105e-05, "loss": 0.0732, "step": 33200 }, { "epoch": 1.9642751523037796, "grad_norm": 0.2874504327774048, "learning_rate": 2.7873454699180555e-05, "loss": 0.0765, "step": 33210 }, { "epoch": 1.9648666232921275, "grad_norm": 0.3667215406894684, "learning_rate": 2.7871979616936304e-05, "loss": 0.0799, "step": 33220 }, { "epoch": 1.9654580942804756, "grad_norm": 0.21619708836078644, "learning_rate": 2.7870504062331868e-05, "loss": 0.0789, "step": 33230 }, { "epoch": 1.9660495652688237, "grad_norm": 0.23574019968509674, "learning_rate": 2.78690280354214e-05, "loss": 0.0774, "step": 33240 }, { "epoch": 1.9666410362571716, "grad_norm": 0.3446825444698334, "learning_rate": 2.7867551536259066e-05, "loss": 0.0745, "step": 33250 }, { "epoch": 1.9672325072455195, "grad_norm": 0.341897189617157, "learning_rate": 2.786607456489905e-05, "loss": 0.0756, "step": 33260 }, { "epoch": 1.9678239782338678, "grad_norm": 0.23940177261829376, "learning_rate": 2.7864597121395545e-05, "loss": 0.0873, "step": 33270 }, { "epoch": 1.9684154492222157, "grad_norm": 0.25616922974586487, "learning_rate": 2.786311920580277e-05, "loss": 0.0899, "step": 33280 }, { "epoch": 1.9690069202105636, "grad_norm": 0.30487877130508423, "learning_rate": 2.786164081817497e-05, "loss": 0.0833, "step": 33290 }, { "epoch": 1.9695983911989117, "grad_norm": 0.3209149241447449, "learning_rate": 2.7860161958566388e-05, "loss": 0.0644, "step": 33300 }, { "epoch": 1.9701898621872598, "grad_norm": 0.31980201601982117, "learning_rate": 2.785868262703129e-05, "loss": 0.0815, "step": 33310 }, { "epoch": 1.9707813331756077, "grad_norm": 0.2574011981487274, "learning_rate": 2.7857202823623972e-05, "loss": 0.0916, "step": 33320 }, { "epoch": 1.9713728041639558, "grad_norm": 0.1882166713476181, "learning_rate": 2.7855722548398734e-05, "loss": 0.0769, "step": 33330 }, { "epoch": 1.9719642751523039, "grad_norm": 0.2927606403827667, "learning_rate": 2.7854241801409897e-05, "loss": 0.0777, "step": 33340 }, { "epoch": 1.9725557461406518, "grad_norm": 0.2126402109861374, "learning_rate": 2.7852760582711796e-05, "loss": 0.0872, "step": 33350 }, { "epoch": 1.9731472171289999, "grad_norm": 0.6173895597457886, "learning_rate": 2.785127889235879e-05, "loss": 0.0747, "step": 33360 }, { "epoch": 1.973738688117348, "grad_norm": 0.22618620097637177, "learning_rate": 2.784979673040526e-05, "loss": 0.0868, "step": 33370 }, { "epoch": 1.9743301591056959, "grad_norm": 0.19562262296676636, "learning_rate": 2.7848314096905588e-05, "loss": 0.0892, "step": 33380 }, { "epoch": 1.9749216300940438, "grad_norm": 0.18309426307678223, "learning_rate": 2.784683099191418e-05, "loss": 0.0833, "step": 33390 }, { "epoch": 1.9755131010823919, "grad_norm": 0.21669237315654755, "learning_rate": 2.7845347415485464e-05, "loss": 0.0682, "step": 33400 }, { "epoch": 1.97610457207074, "grad_norm": 0.37141457200050354, "learning_rate": 2.7843863367673887e-05, "loss": 0.0787, "step": 33410 }, { "epoch": 1.9766960430590879, "grad_norm": 0.21680474281311035, "learning_rate": 2.7842378848533903e-05, "loss": 0.0856, "step": 33420 }, { "epoch": 1.977287514047436, "grad_norm": 0.22960738837718964, "learning_rate": 2.7840893858119986e-05, "loss": 0.0883, "step": 33430 }, { "epoch": 1.977878985035784, "grad_norm": 0.16422978043556213, "learning_rate": 2.783940839648664e-05, "loss": 0.0791, "step": 33440 }, { "epoch": 1.978470456024132, "grad_norm": 0.2761060893535614, "learning_rate": 2.7837922463688373e-05, "loss": 0.0721, "step": 33450 }, { "epoch": 1.97906192701248, "grad_norm": 0.2304811328649521, "learning_rate": 2.783643605977971e-05, "loss": 0.0794, "step": 33460 }, { "epoch": 1.9796533980008282, "grad_norm": 1.4546939134597778, "learning_rate": 2.7834949184815204e-05, "loss": 0.0915, "step": 33470 }, { "epoch": 1.980244868989176, "grad_norm": 0.2024153620004654, "learning_rate": 2.783346183884941e-05, "loss": 0.0809, "step": 33480 }, { "epoch": 1.980836339977524, "grad_norm": 0.31442761421203613, "learning_rate": 2.7831974021936916e-05, "loss": 0.0843, "step": 33490 }, { "epoch": 1.9814278109658723, "grad_norm": 0.2743093967437744, "learning_rate": 2.7830485734132315e-05, "loss": 0.0645, "step": 33500 }, { "epoch": 1.9820192819542202, "grad_norm": 0.37202948331832886, "learning_rate": 2.782899697549023e-05, "loss": 0.0741, "step": 33510 }, { "epoch": 1.982610752942568, "grad_norm": 0.21858836710453033, "learning_rate": 2.7827507746065285e-05, "loss": 0.0923, "step": 33520 }, { "epoch": 1.9832022239309162, "grad_norm": 0.25853127241134644, "learning_rate": 2.782601804591213e-05, "loss": 0.0826, "step": 33530 }, { "epoch": 1.9837936949192643, "grad_norm": 0.26256033778190613, "learning_rate": 2.7824527875085443e-05, "loss": 0.0747, "step": 33540 }, { "epoch": 1.9843851659076122, "grad_norm": 0.36239710450172424, "learning_rate": 2.78230372336399e-05, "loss": 0.072, "step": 33550 }, { "epoch": 1.9849766368959603, "grad_norm": 0.3194473087787628, "learning_rate": 2.7821546121630198e-05, "loss": 0.0827, "step": 33560 }, { "epoch": 1.9855681078843084, "grad_norm": 0.29258689284324646, "learning_rate": 2.7820054539111066e-05, "loss": 0.0968, "step": 33570 }, { "epoch": 1.9861595788726563, "grad_norm": 0.245907723903656, "learning_rate": 2.7818562486137234e-05, "loss": 0.0796, "step": 33580 }, { "epoch": 1.9867510498610044, "grad_norm": 0.2012362778186798, "learning_rate": 2.781706996276346e-05, "loss": 0.0917, "step": 33590 }, { "epoch": 1.9873425208493525, "grad_norm": 0.31298181414604187, "learning_rate": 2.781557696904451e-05, "loss": 0.0777, "step": 33600 }, { "epoch": 1.9879339918377004, "grad_norm": 0.3697013556957245, "learning_rate": 2.7814083505035174e-05, "loss": 0.0731, "step": 33610 }, { "epoch": 1.9885254628260483, "grad_norm": 0.205588698387146, "learning_rate": 2.781258957079026e-05, "loss": 0.0854, "step": 33620 }, { "epoch": 1.9891169338143964, "grad_norm": 0.28467312455177307, "learning_rate": 2.781109516636459e-05, "loss": 0.0865, "step": 33630 }, { "epoch": 1.9897084048027445, "grad_norm": 0.18742351233959198, "learning_rate": 2.7809600291813e-05, "loss": 0.0784, "step": 33640 }, { "epoch": 1.9902998757910924, "grad_norm": 0.15212039649486542, "learning_rate": 2.7808104947190348e-05, "loss": 0.0588, "step": 33650 }, { "epoch": 1.9908913467794405, "grad_norm": 0.24170315265655518, "learning_rate": 2.7806609132551512e-05, "loss": 0.0722, "step": 33660 }, { "epoch": 1.9914828177677886, "grad_norm": 0.32029587030410767, "learning_rate": 2.780511284795138e-05, "loss": 0.0908, "step": 33670 }, { "epoch": 1.9920742887561365, "grad_norm": 0.21325314044952393, "learning_rate": 2.7803616093444864e-05, "loss": 0.0927, "step": 33680 }, { "epoch": 1.9926657597444846, "grad_norm": 0.19892625510692596, "learning_rate": 2.7802118869086882e-05, "loss": 0.0719, "step": 33690 }, { "epoch": 1.9932572307328327, "grad_norm": 5.815086841583252, "learning_rate": 2.780062117493239e-05, "loss": 0.0706, "step": 33700 }, { "epoch": 1.9938487017211806, "grad_norm": 0.3760562539100647, "learning_rate": 2.779912301103634e-05, "loss": 0.08, "step": 33710 }, { "epoch": 1.9944401727095284, "grad_norm": 0.2234477698802948, "learning_rate": 2.779762437745371e-05, "loss": 0.0939, "step": 33720 }, { "epoch": 1.9950316436978768, "grad_norm": 0.30731672048568726, "learning_rate": 2.7796125274239498e-05, "loss": 0.0841, "step": 33730 }, { "epoch": 1.9956231146862247, "grad_norm": 0.2563929259777069, "learning_rate": 2.7794625701448717e-05, "loss": 0.0932, "step": 33740 }, { "epoch": 1.9962145856745726, "grad_norm": 0.21441523730754852, "learning_rate": 2.7793125659136393e-05, "loss": 0.0725, "step": 33750 }, { "epoch": 1.9968060566629207, "grad_norm": 0.3214682638645172, "learning_rate": 2.779162514735757e-05, "loss": 0.0846, "step": 33760 }, { "epoch": 1.9973975276512688, "grad_norm": 0.2176462560892105, "learning_rate": 2.7790124166167324e-05, "loss": 0.0855, "step": 33770 }, { "epoch": 1.9979889986396167, "grad_norm": 0.23106130957603455, "learning_rate": 2.7788622715620722e-05, "loss": 0.0823, "step": 33780 }, { "epoch": 1.9985804696279648, "grad_norm": 0.21134227514266968, "learning_rate": 2.7787120795772877e-05, "loss": 0.0708, "step": 33790 }, { "epoch": 1.9991719406163129, "grad_norm": 0.2240174114704132, "learning_rate": 2.7785618406678888e-05, "loss": 0.0799, "step": 33800 }, { "epoch": 1.9997634116046608, "grad_norm": 0.23130792379379272, "learning_rate": 2.77841155483939e-05, "loss": 0.0742, "step": 33810 }, { "epoch": 2.0, "eval_accuracy": 0.6607612203480054, "eval_animal_abuse/accuracy": 0.9945936054829158, "eval_animal_abuse/f1": 0.7709654686398872, "eval_animal_abuse/fpr": 0.003096287820146059, "eval_animal_abuse/precision": 0.7482900136798906, "eval_animal_abuse/recall": 0.7950581395348837, "eval_animal_abuse/threshold": 0.2735743820667267, "eval_child_abuse/accuracy": 0.9968227035299597, "eval_child_abuse/f1": 0.6904376012965965, "eval_child_abuse/fpr": 0.0011876683227112273, "eval_child_abuse/precision": 0.75, "eval_child_abuse/recall": 0.6396396396396397, "eval_child_abuse/threshold": 0.3747906982898712, "eval_controversial_topics,politics/accuracy": 0.9679276042186512, "eval_controversial_topics,politics/f1": 0.5167919799498747, "eval_controversial_topics,politics/fpr": 0.01916872597473912, "eval_controversial_topics,politics/precision": 0.47998137802607077, "eval_controversial_topics,politics/recall": 0.5597176981541803, "eval_controversial_topics,politics/threshold": 0.28219619393348694, "eval_discrimination,stereotype,injustice/accuracy": 0.9497621186412483, "eval_discrimination,stereotype,injustice/f1": 0.7109494640122511, "eval_discrimination,stereotype,injustice/fpr": 0.03531282755629444, "eval_discrimination,stereotype,injustice/precision": 0.655257586450247, "eval_discrimination,stereotype,injustice/recall": 0.7769874476987447, "eval_discrimination,stereotype,injustice/threshold": 0.18713268637657166, "eval_drug_abuse,weapons,banned_substance/accuracy": 0.9727517716338956, "eval_drug_abuse,weapons,banned_substance/f1": 0.7689051918735892, "eval_drug_abuse,weapons,banned_substance/fpr": 0.01722253560851781, "eval_drug_abuse,weapons,banned_substance/precision": 0.736088600756348, "eval_drug_abuse,weapons,banned_substance/recall": 0.8047844063792085, "eval_drug_abuse,weapons,banned_substance/threshold": 0.34775859117507935, "eval_financial_crime,property_crime,theft/accuracy": 0.959260737931264, "eval_financial_crime,property_crime,theft/f1": 0.803624408628017, "eval_financial_crime,property_crime,theft/fpr": 0.029651880655326777, "eval_financial_crime,property_crime,theft/precision": 0.7569486404833837, "eval_financial_crime,property_crime,theft/recall": 0.8564347974705179, "eval_financial_crime,property_crime,theft/threshold": 0.3830641508102417, "eval_flagged/accuracy": 0.8492697208636923, "eval_flagged/aucpr": 0.899063650578452, "eval_flagged/f1": 0.8679212279346384, "eval_flagged/fpr": 0.20181524959681882, "eval_flagged/precision": 0.8469219390077378, "eval_flagged/recall": 0.8899883411557203, "eval_hate_speech,offensive_language/accuracy": 0.9504275210433509, "eval_hate_speech,offensive_language/f1": 0.7013429544998998, "eval_hate_speech,offensive_language/fpr": 0.02000730860588339, "eval_hate_speech,offensive_language/precision": 0.761645624727906, "eval_hate_speech,offensive_language/recall": 0.649888558692422, "eval_hate_speech,offensive_language/threshold": 0.3886180520057678, "eval_loss": 0.08257276564836502, "eval_macro_f1": 0.6702854759353558, "eval_macro_precision": 0.6571975451925889, "eval_macro_recall": 0.6909492470772027, "eval_micro_f1": 0.7482216106755283, "eval_micro_precision": 0.728796129271541, "eval_micro_recall": 0.7687109909845831, "eval_misinformation_regarding_ethics,laws_and_safety/accuracy": 0.9791562697541338, "eval_misinformation_regarding_ethics,laws_and_safety/f1": 0.22606547251389747, "eval_misinformation_regarding_ethics,laws_and_safety/fpr": 0.011872084603337635, "eval_misinformation_regarding_ethics,laws_and_safety/precision": 0.20608108108108109, "eval_misinformation_regarding_ethics,laws_and_safety/recall": 0.2503419972640219, "eval_misinformation_regarding_ethics,laws_and_safety/threshold": 0.18713268637657166, "eval_non_violent_unethical_behavior/accuracy": 0.8782979006554213, "eval_non_violent_unethical_behavior/f1": 0.6907338518769023, "eval_non_violent_unethical_behavior/fpr": 0.0734927752864971, "eval_non_violent_unethical_behavior/precision": 0.6976942783945346, "eval_non_violent_unethical_behavior/recall": 0.683910932529717, "eval_non_violent_unethical_behavior/threshold": 0.34775859117507935, "eval_privacy_violation/accuracy": 0.9805203446784443, "eval_privacy_violation/f1": 0.8086913902957034, "eval_privacy_violation/fpr": 0.011898929096381305, "eval_privacy_violation/precision": 0.7844690966719493, "eval_privacy_violation/recall": 0.8344571813890762, "eval_privacy_violation/threshold": 0.34687307476997375, "eval_runtime": 83.6609, "eval_samples_per_second": 718.544, "eval_self_harm/accuracy": 0.996623082809329, "eval_self_harm/f1": 0.729693741677763, "eval_self_harm/fpr": 0.001122202867479564, "eval_self_harm/precision": 0.8035190615835777, "eval_self_harm/recall": 0.6682926829268293, "eval_self_harm/threshold": 0.48785635828971863, "eval_sexually_explicit,adult_content/accuracy": 0.983448115247696, "eval_sexually_explicit,adult_content/f1": 0.6822101564995209, "eval_sexually_explicit,adult_content/fpr": 0.010499940341248043, "eval_sexually_explicit,adult_content/precision": 0.6342042755344418, "eval_sexually_explicit,adult_content/recall": 0.7380787836903939, "eval_sexually_explicit,adult_content/threshold": 0.3123700022697449, "eval_steps_per_second": 44.919, "eval_terrorism,organized_crime/accuracy": 0.9887713344645174, "eval_terrorism,organized_crime/f1": 0.4332493702770781, "eval_terrorism,organized_crime/fpr": 0.007579695806013436, "eval_terrorism,organized_crime/precision": 0.36338028169014086, "eval_terrorism,organized_crime/recall": 0.5363825363825364, "eval_terrorism,organized_crime/threshold": 0.17328819632530212, "eval_violence,aiding_and_abetting,incitement/accuracy": 0.9176564527397943, "eval_violence,aiding_and_abetting,incitement/f1": 0.8503356110540001, "eval_violence,aiding_and_abetting,incitement/fpr": 0.06844657993744602, "eval_violence,aiding_and_abetting,incitement/precision": 0.8232057136166725, "eval_violence,aiding_and_abetting,incitement/recall": 0.8793146573286643, "eval_violence,aiding_and_abetting,incitement/threshold": 0.43014734983444214, "step": 33814 }, { "epoch": 2.0003548825930086, "grad_norm": 0.23188789188861847, "learning_rate": 2.7782612220973058e-05, "loss": 0.0814, "step": 33820 }, { "epoch": 2.000946353581357, "grad_norm": 0.6500804424285889, "learning_rate": 2.7781108424471526e-05, "loss": 0.0752, "step": 33830 }, { "epoch": 2.001537824569705, "grad_norm": 0.19501632452011108, "learning_rate": 2.77796041589445e-05, "loss": 0.0782, "step": 33840 }, { "epoch": 2.0021292955580527, "grad_norm": 0.3436525762081146, "learning_rate": 2.7778099424447175e-05, "loss": 0.0761, "step": 33850 }, { "epoch": 2.002720766546401, "grad_norm": 0.2373393028974533, "learning_rate": 2.7776594221034763e-05, "loss": 0.0694, "step": 33860 }, { "epoch": 2.003312237534749, "grad_norm": 0.25522229075431824, "learning_rate": 2.7775088548762514e-05, "loss": 0.0773, "step": 33870 }, { "epoch": 2.003903708523097, "grad_norm": 0.24835404753684998, "learning_rate": 2.777358240768567e-05, "loss": 0.0923, "step": 33880 }, { "epoch": 2.004495179511445, "grad_norm": 0.20451675355434418, "learning_rate": 2.777207579785951e-05, "loss": 0.0762, "step": 33890 }, { "epoch": 2.005086650499793, "grad_norm": 0.20223581790924072, "learning_rate": 2.7770568719339312e-05, "loss": 0.0655, "step": 33900 }, { "epoch": 2.005678121488141, "grad_norm": 0.22411231696605682, "learning_rate": 2.7769061172180387e-05, "loss": 0.0786, "step": 33910 }, { "epoch": 2.006269592476489, "grad_norm": 0.28326767683029175, "learning_rate": 2.7767553156438057e-05, "loss": 0.0709, "step": 33920 }, { "epoch": 2.006861063464837, "grad_norm": 0.1835663616657257, "learning_rate": 2.776604467216766e-05, "loss": 0.0865, "step": 33930 }, { "epoch": 2.007452534453185, "grad_norm": 0.3143583834171295, "learning_rate": 2.7764535719424554e-05, "loss": 0.0693, "step": 33940 }, { "epoch": 2.008044005441533, "grad_norm": 0.44684386253356934, "learning_rate": 2.7763026298264115e-05, "loss": 0.0695, "step": 33950 }, { "epoch": 2.0086354764298813, "grad_norm": 0.18165352940559387, "learning_rate": 2.776151640874173e-05, "loss": 0.0708, "step": 33960 }, { "epoch": 2.009226947418229, "grad_norm": 0.3087877333164215, "learning_rate": 2.7760006050912804e-05, "loss": 0.0818, "step": 33970 }, { "epoch": 2.009818418406577, "grad_norm": 0.2709878385066986, "learning_rate": 2.7758495224832768e-05, "loss": 0.0742, "step": 33980 }, { "epoch": 2.0104098893949254, "grad_norm": 0.24648648500442505, "learning_rate": 2.7756983930557066e-05, "loss": 0.0859, "step": 33990 }, { "epoch": 2.0110013603832733, "grad_norm": 0.19217729568481445, "learning_rate": 2.7755472168141148e-05, "loss": 0.0796, "step": 34000 }, { "epoch": 2.011592831371621, "grad_norm": 0.19126592576503754, "learning_rate": 2.77539599376405e-05, "loss": 0.0587, "step": 34010 }, { "epoch": 2.012184302359969, "grad_norm": 0.21343013644218445, "learning_rate": 2.7752447239110615e-05, "loss": 0.0825, "step": 34020 }, { "epoch": 2.0127757733483174, "grad_norm": 0.3255603313446045, "learning_rate": 2.7750934072607e-05, "loss": 0.0808, "step": 34030 }, { "epoch": 2.0133672443366653, "grad_norm": 0.2769628167152405, "learning_rate": 2.7749420438185184e-05, "loss": 0.0837, "step": 34040 }, { "epoch": 2.013958715325013, "grad_norm": 0.22503836452960968, "learning_rate": 2.774790633590072e-05, "loss": 0.0668, "step": 34050 }, { "epoch": 2.0145501863133615, "grad_norm": 0.20989282429218292, "learning_rate": 2.7746391765809157e-05, "loss": 0.0591, "step": 34060 }, { "epoch": 2.0151416573017094, "grad_norm": 0.2182508409023285, "learning_rate": 2.774487672796609e-05, "loss": 0.0774, "step": 34070 }, { "epoch": 2.0157331282900572, "grad_norm": 0.3571649193763733, "learning_rate": 2.77433612224271e-05, "loss": 0.0795, "step": 34080 }, { "epoch": 2.0163245992784056, "grad_norm": 0.522011935710907, "learning_rate": 2.7741845249247816e-05, "loss": 0.0787, "step": 34090 }, { "epoch": 2.0169160702667535, "grad_norm": 0.15803849697113037, "learning_rate": 2.7740328808483864e-05, "loss": 0.0738, "step": 34100 }, { "epoch": 2.0175075412551013, "grad_norm": 0.22571659088134766, "learning_rate": 2.7738811900190888e-05, "loss": 0.0564, "step": 34110 }, { "epoch": 2.0180990122434497, "grad_norm": 0.24101731181144714, "learning_rate": 2.7737294524424555e-05, "loss": 0.0748, "step": 34120 }, { "epoch": 2.0186904832317976, "grad_norm": 0.2488047480583191, "learning_rate": 2.7735776681240553e-05, "loss": 0.0749, "step": 34130 }, { "epoch": 2.0192819542201454, "grad_norm": 0.22856900095939636, "learning_rate": 2.7734258370694582e-05, "loss": 0.0748, "step": 34140 }, { "epoch": 2.0198734252084933, "grad_norm": 0.30317720770835876, "learning_rate": 2.7732739592842354e-05, "loss": 0.07, "step": 34150 }, { "epoch": 2.0204648961968417, "grad_norm": 0.24288305640220642, "learning_rate": 2.7731220347739606e-05, "loss": 0.053, "step": 34160 }, { "epoch": 2.0210563671851896, "grad_norm": 0.7175596356391907, "learning_rate": 2.7729700635442084e-05, "loss": 0.0708, "step": 34170 }, { "epoch": 2.0216478381735374, "grad_norm": 0.3340443968772888, "learning_rate": 2.7728180456005563e-05, "loss": 0.0748, "step": 34180 }, { "epoch": 2.0222393091618858, "grad_norm": 0.2356317788362503, "learning_rate": 2.7726659809485828e-05, "loss": 0.0783, "step": 34190 }, { "epoch": 2.0228307801502337, "grad_norm": 0.2337334156036377, "learning_rate": 2.7725138695938685e-05, "loss": 0.0707, "step": 34200 }, { "epoch": 2.0234222511385815, "grad_norm": 0.24006560444831848, "learning_rate": 2.7723617115419945e-05, "loss": 0.0614, "step": 34210 }, { "epoch": 2.02401372212693, "grad_norm": 0.27765095233917236, "learning_rate": 2.772209506798545e-05, "loss": 0.0732, "step": 34220 }, { "epoch": 2.0246051931152778, "grad_norm": 0.21305255591869354, "learning_rate": 2.772057255369106e-05, "loss": 0.0782, "step": 34230 }, { "epoch": 2.0251966641036256, "grad_norm": 0.26502811908721924, "learning_rate": 2.771904957259263e-05, "loss": 0.08, "step": 34240 }, { "epoch": 2.0257881350919735, "grad_norm": 0.3100709915161133, "learning_rate": 2.7717526124746064e-05, "loss": 0.0771, "step": 34250 }, { "epoch": 2.026379606080322, "grad_norm": 0.2511020004749298, "learning_rate": 2.7716002210207264e-05, "loss": 0.0638, "step": 34260 }, { "epoch": 2.0269710770686697, "grad_norm": 0.2999921441078186, "learning_rate": 2.771447782903215e-05, "loss": 0.0798, "step": 34270 }, { "epoch": 2.0275625480570176, "grad_norm": 0.263761430978775, "learning_rate": 2.7712952981276664e-05, "loss": 0.0734, "step": 34280 }, { "epoch": 2.028154019045366, "grad_norm": 0.24385713040828705, "learning_rate": 2.7711427666996763e-05, "loss": 0.0732, "step": 34290 }, { "epoch": 2.028745490033714, "grad_norm": 0.24898099899291992, "learning_rate": 2.770990188624842e-05, "loss": 0.076, "step": 34300 }, { "epoch": 2.0293369610220617, "grad_norm": 0.438127726316452, "learning_rate": 2.7708375639087625e-05, "loss": 0.0662, "step": 34310 }, { "epoch": 2.02992843201041, "grad_norm": 0.2760345935821533, "learning_rate": 2.7706848925570386e-05, "loss": 0.0727, "step": 34320 }, { "epoch": 2.030519902998758, "grad_norm": 0.3509216904640198, "learning_rate": 2.7705321745752735e-05, "loss": 0.0754, "step": 34330 }, { "epoch": 2.031111373987106, "grad_norm": 0.21928465366363525, "learning_rate": 2.7703794099690707e-05, "loss": 0.0724, "step": 34340 }, { "epoch": 2.031702844975454, "grad_norm": 0.2792761027812958, "learning_rate": 2.7702265987440366e-05, "loss": 0.0666, "step": 34350 }, { "epoch": 2.032294315963802, "grad_norm": 0.2760184705257416, "learning_rate": 2.770073740905779e-05, "loss": 0.0574, "step": 34360 }, { "epoch": 2.03288578695215, "grad_norm": 0.32641366124153137, "learning_rate": 2.7699208364599067e-05, "loss": 0.0724, "step": 34370 }, { "epoch": 2.033477257940498, "grad_norm": 0.3330936133861542, "learning_rate": 2.7697678854120313e-05, "loss": 0.0761, "step": 34380 }, { "epoch": 2.034068728928846, "grad_norm": 0.23426678776741028, "learning_rate": 2.769614887767765e-05, "loss": 0.0691, "step": 34390 }, { "epoch": 2.034660199917194, "grad_norm": 0.23894928395748138, "learning_rate": 2.769461843532723e-05, "loss": 0.0641, "step": 34400 }, { "epoch": 2.035251670905542, "grad_norm": 0.24092766642570496, "learning_rate": 2.7693087527125217e-05, "loss": 0.0597, "step": 34410 }, { "epoch": 2.0358431418938903, "grad_norm": 0.2816120386123657, "learning_rate": 2.7691556153127782e-05, "loss": 0.0896, "step": 34420 }, { "epoch": 2.036434612882238, "grad_norm": 0.2986956536769867, "learning_rate": 2.7690024313391128e-05, "loss": 0.0866, "step": 34430 }, { "epoch": 2.037026083870586, "grad_norm": 0.27061983942985535, "learning_rate": 2.7688492007971467e-05, "loss": 0.0724, "step": 34440 }, { "epoch": 2.0376175548589344, "grad_norm": 0.27262642979621887, "learning_rate": 2.7686959236925028e-05, "loss": 0.0695, "step": 34450 }, { "epoch": 2.0382090258472823, "grad_norm": 0.28608766198158264, "learning_rate": 2.7685426000308058e-05, "loss": 0.0698, "step": 34460 }, { "epoch": 2.03880049683563, "grad_norm": 0.2664140462875366, "learning_rate": 2.7683892298176827e-05, "loss": 0.0802, "step": 34470 }, { "epoch": 2.039391967823978, "grad_norm": 0.36842668056488037, "learning_rate": 2.7682358130587608e-05, "loss": 0.0815, "step": 34480 }, { "epoch": 2.0399834388123264, "grad_norm": 0.16882917284965515, "learning_rate": 2.768082349759671e-05, "loss": 0.0744, "step": 34490 }, { "epoch": 2.0405749098006742, "grad_norm": 0.2666638195514679, "learning_rate": 2.7679288399260447e-05, "loss": 0.0749, "step": 34500 }, { "epoch": 2.041166380789022, "grad_norm": 0.14035171270370483, "learning_rate": 2.767775283563515e-05, "loss": 0.0583, "step": 34510 }, { "epoch": 2.0417578517773705, "grad_norm": 0.5335877537727356, "learning_rate": 2.7676216806777166e-05, "loss": 0.0739, "step": 34520 }, { "epoch": 2.0423493227657183, "grad_norm": 0.4131584167480469, "learning_rate": 2.7674680312742863e-05, "loss": 0.0788, "step": 34530 }, { "epoch": 2.0429407937540662, "grad_norm": 0.27057182788848877, "learning_rate": 2.767314335358863e-05, "loss": 0.0826, "step": 34540 }, { "epoch": 2.0435322647424146, "grad_norm": 0.22804118692874908, "learning_rate": 2.767160592937087e-05, "loss": 0.0653, "step": 34550 }, { "epoch": 2.0441237357307624, "grad_norm": 0.24701447784900665, "learning_rate": 2.7670068040145994e-05, "loss": 0.0622, "step": 34560 }, { "epoch": 2.0447152067191103, "grad_norm": 0.2412748634815216, "learning_rate": 2.7668529685970443e-05, "loss": 0.0688, "step": 34570 }, { "epoch": 2.0453066777074587, "grad_norm": 0.25668609142303467, "learning_rate": 2.7666990866900668e-05, "loss": 0.0811, "step": 34580 }, { "epoch": 2.0458981486958066, "grad_norm": 0.24900344014167786, "learning_rate": 2.7665451582993137e-05, "loss": 0.0711, "step": 34590 }, { "epoch": 2.0464896196841544, "grad_norm": 0.30013781785964966, "learning_rate": 2.766391183430434e-05, "loss": 0.0662, "step": 34600 }, { "epoch": 2.0470810906725023, "grad_norm": 0.166748508810997, "learning_rate": 2.766237162089078e-05, "loss": 0.0669, "step": 34610 }, { "epoch": 2.0476725616608507, "grad_norm": 0.2777673006057739, "learning_rate": 2.7660830942808975e-05, "loss": 0.076, "step": 34620 }, { "epoch": 2.0482640326491985, "grad_norm": 0.29202356934547424, "learning_rate": 2.7659289800115465e-05, "loss": 0.0878, "step": 34630 }, { "epoch": 2.0488555036375464, "grad_norm": 0.2716125249862671, "learning_rate": 2.7657748192866806e-05, "loss": 0.0788, "step": 34640 }, { "epoch": 2.0494469746258948, "grad_norm": 0.21064263582229614, "learning_rate": 2.7656206121119573e-05, "loss": 0.0702, "step": 34650 }, { "epoch": 2.0500384456142426, "grad_norm": 0.28706735372543335, "learning_rate": 2.7654663584930345e-05, "loss": 0.0723, "step": 34660 }, { "epoch": 2.0506299166025905, "grad_norm": 0.2678033709526062, "learning_rate": 2.7653120584355737e-05, "loss": 0.0765, "step": 34670 }, { "epoch": 2.051221387590939, "grad_norm": 0.457154244184494, "learning_rate": 2.7651577119452366e-05, "loss": 0.075, "step": 34680 }, { "epoch": 2.0518128585792867, "grad_norm": 0.2274620234966278, "learning_rate": 2.765003319027688e-05, "loss": 0.0737, "step": 34690 }, { "epoch": 2.0524043295676346, "grad_norm": 0.3537290692329407, "learning_rate": 2.7648488796885932e-05, "loss": 0.0718, "step": 34700 }, { "epoch": 2.0529958005559825, "grad_norm": 0.28974276781082153, "learning_rate": 2.7646943939336196e-05, "loss": 0.0596, "step": 34710 }, { "epoch": 2.053587271544331, "grad_norm": 0.48527011275291443, "learning_rate": 2.764539861768436e-05, "loss": 0.0766, "step": 34720 }, { "epoch": 2.0541787425326787, "grad_norm": 0.29510965943336487, "learning_rate": 2.764385283198714e-05, "loss": 0.0858, "step": 34730 }, { "epoch": 2.0547702135210266, "grad_norm": 0.24046389758586884, "learning_rate": 2.7642306582301254e-05, "loss": 0.0785, "step": 34740 }, { "epoch": 2.055361684509375, "grad_norm": 0.23310011625289917, "learning_rate": 2.764075986868345e-05, "loss": 0.0675, "step": 34750 }, { "epoch": 2.055953155497723, "grad_norm": 0.4268381595611572, "learning_rate": 2.763921269119048e-05, "loss": 0.069, "step": 34760 }, { "epoch": 2.0565446264860707, "grad_norm": 0.14477601647377014, "learning_rate": 2.763766504987913e-05, "loss": 0.0684, "step": 34770 }, { "epoch": 2.057136097474419, "grad_norm": 0.24592351913452148, "learning_rate": 2.7636116944806186e-05, "loss": 0.0725, "step": 34780 }, { "epoch": 2.057727568462767, "grad_norm": 0.2680130898952484, "learning_rate": 2.7634568376028463e-05, "loss": 0.0801, "step": 34790 }, { "epoch": 2.058319039451115, "grad_norm": 0.25434410572052, "learning_rate": 2.7633019343602788e-05, "loss": 0.0728, "step": 34800 }, { "epoch": 2.058910510439463, "grad_norm": 0.33596181869506836, "learning_rate": 2.7631469847586e-05, "loss": 0.0583, "step": 34810 }, { "epoch": 2.059501981427811, "grad_norm": 0.4223988950252533, "learning_rate": 2.7629919888034966e-05, "loss": 0.0958, "step": 34820 }, { "epoch": 2.060093452416159, "grad_norm": 0.25609445571899414, "learning_rate": 2.7628369465006563e-05, "loss": 0.0826, "step": 34830 }, { "epoch": 2.060684923404507, "grad_norm": 0.2761741876602173, "learning_rate": 2.7626818578557693e-05, "loss": 0.0709, "step": 34840 }, { "epoch": 2.061276394392855, "grad_norm": 0.31300246715545654, "learning_rate": 2.7625267228745253e-05, "loss": 0.0649, "step": 34850 }, { "epoch": 2.061867865381203, "grad_norm": 0.26112252473831177, "learning_rate": 2.762371541562619e-05, "loss": 0.0745, "step": 34860 }, { "epoch": 2.062459336369551, "grad_norm": 0.27056562900543213, "learning_rate": 2.7622163139257438e-05, "loss": 0.0727, "step": 34870 }, { "epoch": 2.0630508073578993, "grad_norm": 0.1917833387851715, "learning_rate": 2.7620610399695964e-05, "loss": 0.0698, "step": 34880 }, { "epoch": 2.063642278346247, "grad_norm": 0.21204181015491486, "learning_rate": 2.7619057196998752e-05, "loss": 0.0767, "step": 34890 }, { "epoch": 2.064233749334595, "grad_norm": 0.31301161646842957, "learning_rate": 2.7617503531222794e-05, "loss": 0.0736, "step": 34900 }, { "epoch": 2.0648252203229434, "grad_norm": 0.2749568223953247, "learning_rate": 2.7615949402425114e-05, "loss": 0.0601, "step": 34910 }, { "epoch": 2.0654166913112912, "grad_norm": 0.20587383210659027, "learning_rate": 2.761439481066273e-05, "loss": 0.0726, "step": 34920 }, { "epoch": 2.066008162299639, "grad_norm": 0.20000414550304413, "learning_rate": 2.7612839755992702e-05, "loss": 0.0759, "step": 34930 }, { "epoch": 2.066599633287987, "grad_norm": 0.2850423753261566, "learning_rate": 2.7611284238472088e-05, "loss": 0.0798, "step": 34940 }, { "epoch": 2.0671911042763353, "grad_norm": 0.2262641340494156, "learning_rate": 2.7609728258157974e-05, "loss": 0.0778, "step": 34950 }, { "epoch": 2.0677825752646832, "grad_norm": 0.25647592544555664, "learning_rate": 2.760817181510746e-05, "loss": 0.0653, "step": 34960 }, { "epoch": 2.068374046253031, "grad_norm": 0.24745623767375946, "learning_rate": 2.760661490937766e-05, "loss": 0.0768, "step": 34970 }, { "epoch": 2.0689655172413794, "grad_norm": 0.24240407347679138, "learning_rate": 2.760505754102571e-05, "loss": 0.0792, "step": 34980 }, { "epoch": 2.0695569882297273, "grad_norm": 0.3092505931854248, "learning_rate": 2.7603499710108758e-05, "loss": 0.0737, "step": 34990 }, { "epoch": 2.070148459218075, "grad_norm": 0.2704700827598572, "learning_rate": 2.7601941416683973e-05, "loss": 0.0759, "step": 35000 }, { "epoch": 2.0707399302064236, "grad_norm": 0.2623562812805176, "learning_rate": 2.7600382660808537e-05, "loss": 0.0707, "step": 35010 }, { "epoch": 2.0713314011947714, "grad_norm": 0.3822818994522095, "learning_rate": 2.759882344253966e-05, "loss": 0.0686, "step": 35020 }, { "epoch": 2.0719228721831193, "grad_norm": 0.34677767753601074, "learning_rate": 2.7597263761934545e-05, "loss": 0.0773, "step": 35030 }, { "epoch": 2.0725143431714677, "grad_norm": 0.2376457005739212, "learning_rate": 2.7595703619050444e-05, "loss": 0.0725, "step": 35040 }, { "epoch": 2.0731058141598155, "grad_norm": 0.20213457942008972, "learning_rate": 2.7594143013944597e-05, "loss": 0.063, "step": 35050 }, { "epoch": 2.0736972851481634, "grad_norm": 0.24487438797950745, "learning_rate": 2.7592581946674277e-05, "loss": 0.0674, "step": 35060 }, { "epoch": 2.0742887561365113, "grad_norm": 0.2594928741455078, "learning_rate": 2.7591020417296774e-05, "loss": 0.075, "step": 35070 }, { "epoch": 2.0748802271248596, "grad_norm": 0.21570642292499542, "learning_rate": 2.758945842586938e-05, "loss": 0.0729, "step": 35080 }, { "epoch": 2.0754716981132075, "grad_norm": 0.8361179232597351, "learning_rate": 2.758789597244943e-05, "loss": 0.0753, "step": 35090 }, { "epoch": 2.0760631691015554, "grad_norm": 0.2601967751979828, "learning_rate": 2.758633305709425e-05, "loss": 0.0679, "step": 35100 }, { "epoch": 2.0766546400899037, "grad_norm": 0.20211568474769592, "learning_rate": 2.75847696798612e-05, "loss": 0.0563, "step": 35110 }, { "epoch": 2.0772461110782516, "grad_norm": 0.2966079115867615, "learning_rate": 2.7583205840807646e-05, "loss": 0.0681, "step": 35120 }, { "epoch": 2.0778375820665995, "grad_norm": 0.2161778211593628, "learning_rate": 2.7581641539990977e-05, "loss": 0.0769, "step": 35130 }, { "epoch": 2.078429053054948, "grad_norm": 0.22200877964496613, "learning_rate": 2.75800767774686e-05, "loss": 0.0875, "step": 35140 }, { "epoch": 2.0790205240432957, "grad_norm": 0.1878858059644699, "learning_rate": 2.7578511553297937e-05, "loss": 0.0656, "step": 35150 }, { "epoch": 2.0796119950316436, "grad_norm": 0.22677908837795258, "learning_rate": 2.7576945867536423e-05, "loss": 0.0586, "step": 35160 }, { "epoch": 2.0802034660199915, "grad_norm": 0.3179025948047638, "learning_rate": 2.7575379720241517e-05, "loss": 0.0746, "step": 35170 }, { "epoch": 2.08079493700834, "grad_norm": 0.24207715690135956, "learning_rate": 2.7573813111470697e-05, "loss": 0.079, "step": 35180 }, { "epoch": 2.0813864079966877, "grad_norm": 0.2643544673919678, "learning_rate": 2.7572246041281437e-05, "loss": 0.0774, "step": 35190 }, { "epoch": 2.0819778789850356, "grad_norm": 0.1953153908252716, "learning_rate": 2.7570678509731258e-05, "loss": 0.0656, "step": 35200 }, { "epoch": 2.082569349973384, "grad_norm": 0.15158244967460632, "learning_rate": 2.756911051687768e-05, "loss": 0.0633, "step": 35210 }, { "epoch": 2.083160820961732, "grad_norm": 0.4635500907897949, "learning_rate": 2.756754206277823e-05, "loss": 0.0829, "step": 35220 }, { "epoch": 2.0837522919500797, "grad_norm": 0.2925766408443451, "learning_rate": 2.7565973147490486e-05, "loss": 0.0787, "step": 35230 }, { "epoch": 2.084343762938428, "grad_norm": 0.258266806602478, "learning_rate": 2.7564403771072013e-05, "loss": 0.0711, "step": 35240 }, { "epoch": 2.084935233926776, "grad_norm": 0.3289991617202759, "learning_rate": 2.75628339335804e-05, "loss": 0.0818, "step": 35250 }, { "epoch": 2.085526704915124, "grad_norm": 0.20565329492092133, "learning_rate": 2.7561263635073254e-05, "loss": 0.066, "step": 35260 }, { "epoch": 2.086118175903472, "grad_norm": 0.3299778699874878, "learning_rate": 2.7559692875608205e-05, "loss": 0.0796, "step": 35270 }, { "epoch": 2.08670964689182, "grad_norm": 0.25482067465782166, "learning_rate": 2.755812165524289e-05, "loss": 0.0753, "step": 35280 }, { "epoch": 2.087301117880168, "grad_norm": 0.40642330050468445, "learning_rate": 2.7556549974034974e-05, "loss": 0.0773, "step": 35290 }, { "epoch": 2.087892588868516, "grad_norm": 0.35866111516952515, "learning_rate": 2.7554977832042128e-05, "loss": 0.068, "step": 35300 }, { "epoch": 2.088484059856864, "grad_norm": 0.35133498907089233, "learning_rate": 2.755340522932204e-05, "loss": 0.0608, "step": 35310 }, { "epoch": 2.089075530845212, "grad_norm": 0.35826513171195984, "learning_rate": 2.755183216593243e-05, "loss": 0.0685, "step": 35320 }, { "epoch": 2.08966700183356, "grad_norm": 0.34083425998687744, "learning_rate": 2.7550258641931015e-05, "loss": 0.083, "step": 35330 }, { "epoch": 2.0902584728219082, "grad_norm": 0.24826161563396454, "learning_rate": 2.7548684657375547e-05, "loss": 0.0722, "step": 35340 }, { "epoch": 2.090849943810256, "grad_norm": 0.3479858636856079, "learning_rate": 2.7547110212323777e-05, "loss": 0.0745, "step": 35350 }, { "epoch": 2.091441414798604, "grad_norm": 0.2598996162414551, "learning_rate": 2.7545535306833492e-05, "loss": 0.063, "step": 35360 }, { "epoch": 2.0920328857869523, "grad_norm": 0.2337484359741211, "learning_rate": 2.7543959940962478e-05, "loss": 0.0688, "step": 35370 }, { "epoch": 2.0926243567753002, "grad_norm": 0.44539111852645874, "learning_rate": 2.7542384114768545e-05, "loss": 0.0898, "step": 35380 }, { "epoch": 2.093215827763648, "grad_norm": 0.3223032057285309, "learning_rate": 2.754080782830953e-05, "loss": 0.0863, "step": 35390 }, { "epoch": 2.093807298751996, "grad_norm": 0.24792693555355072, "learning_rate": 2.7539231081643267e-05, "loss": 0.0693, "step": 35400 }, { "epoch": 2.0943987697403443, "grad_norm": 0.2048162966966629, "learning_rate": 2.7537653874827622e-05, "loss": 0.0555, "step": 35410 }, { "epoch": 2.094990240728692, "grad_norm": 0.23264804482460022, "learning_rate": 2.7536076207920473e-05, "loss": 0.0755, "step": 35420 }, { "epoch": 2.09558171171704, "grad_norm": 0.2856682240962982, "learning_rate": 2.753449808097972e-05, "loss": 0.0828, "step": 35430 }, { "epoch": 2.0961731827053884, "grad_norm": 0.25490686297416687, "learning_rate": 2.753291949406327e-05, "loss": 0.0712, "step": 35440 }, { "epoch": 2.0967646536937363, "grad_norm": 0.26113349199295044, "learning_rate": 2.753134044722905e-05, "loss": 0.073, "step": 35450 }, { "epoch": 2.097356124682084, "grad_norm": 0.20487166941165924, "learning_rate": 2.7529760940535013e-05, "loss": 0.0605, "step": 35460 }, { "epoch": 2.0979475956704325, "grad_norm": 1.6542637348175049, "learning_rate": 2.7528180974039118e-05, "loss": 0.0751, "step": 35470 }, { "epoch": 2.0985390666587804, "grad_norm": 0.23817075788974762, "learning_rate": 2.7526600547799343e-05, "loss": 0.0804, "step": 35480 }, { "epoch": 2.0991305376471283, "grad_norm": 0.2655485272407532, "learning_rate": 2.7525019661873683e-05, "loss": 0.0795, "step": 35490 }, { "epoch": 2.0997220086354766, "grad_norm": 0.45790162682533264, "learning_rate": 2.752343831632016e-05, "loss": 0.082, "step": 35500 }, { "epoch": 2.1003134796238245, "grad_norm": 0.26030462980270386, "learning_rate": 2.7521856511196796e-05, "loss": 0.0711, "step": 35510 }, { "epoch": 2.1009049506121724, "grad_norm": 0.37906038761138916, "learning_rate": 2.7520274246561642e-05, "loss": 0.0742, "step": 35520 }, { "epoch": 2.1014964216005203, "grad_norm": 0.3550862967967987, "learning_rate": 2.7518691522472758e-05, "loss": 0.076, "step": 35530 }, { "epoch": 2.1020878925888686, "grad_norm": 0.19926269352436066, "learning_rate": 2.7517108338988233e-05, "loss": 0.0727, "step": 35540 }, { "epoch": 2.1026793635772165, "grad_norm": 0.29401421546936035, "learning_rate": 2.7515524696166157e-05, "loss": 0.0741, "step": 35550 }, { "epoch": 2.1032708345655644, "grad_norm": 0.24450641870498657, "learning_rate": 2.7513940594064647e-05, "loss": 0.068, "step": 35560 }, { "epoch": 2.1038623055539127, "grad_norm": 0.29928091168403625, "learning_rate": 2.7512356032741836e-05, "loss": 0.067, "step": 35570 }, { "epoch": 2.1044537765422606, "grad_norm": 0.28134679794311523, "learning_rate": 2.7510771012255867e-05, "loss": 0.0821, "step": 35580 }, { "epoch": 2.1050452475306085, "grad_norm": 0.43435025215148926, "learning_rate": 2.7509185532664912e-05, "loss": 0.0765, "step": 35590 }, { "epoch": 2.105636718518957, "grad_norm": 0.24787119030952454, "learning_rate": 2.750759959402715e-05, "loss": 0.0787, "step": 35600 }, { "epoch": 2.1062281895073047, "grad_norm": 0.26081371307373047, "learning_rate": 2.750601319640078e-05, "loss": 0.066, "step": 35610 }, { "epoch": 2.1068196604956526, "grad_norm": 0.27083656191825867, "learning_rate": 2.7504426339844016e-05, "loss": 0.0719, "step": 35620 }, { "epoch": 2.1074111314840005, "grad_norm": 0.2496936023235321, "learning_rate": 2.7502839024415095e-05, "loss": 0.0807, "step": 35630 }, { "epoch": 2.108002602472349, "grad_norm": 0.22347913682460785, "learning_rate": 2.7501251250172265e-05, "loss": 0.0833, "step": 35640 }, { "epoch": 2.1085940734606967, "grad_norm": 0.33888670802116394, "learning_rate": 2.7499663017173785e-05, "loss": 0.0829, "step": 35650 }, { "epoch": 2.1091855444490446, "grad_norm": 0.23358848690986633, "learning_rate": 2.749807432547795e-05, "loss": 0.0582, "step": 35660 }, { "epoch": 2.109777015437393, "grad_norm": 0.21683163940906525, "learning_rate": 2.7496485175143045e-05, "loss": 0.0808, "step": 35670 }, { "epoch": 2.110368486425741, "grad_norm": 0.2119876593351364, "learning_rate": 2.74948955662274e-05, "loss": 0.0755, "step": 35680 }, { "epoch": 2.1109599574140887, "grad_norm": 0.2540484368801117, "learning_rate": 2.749330549878935e-05, "loss": 0.0698, "step": 35690 }, { "epoch": 2.111551428402437, "grad_norm": 0.3679427206516266, "learning_rate": 2.7491714972887228e-05, "loss": 0.0715, "step": 35700 }, { "epoch": 2.112142899390785, "grad_norm": 0.27210474014282227, "learning_rate": 2.749012398857942e-05, "loss": 0.0607, "step": 35710 }, { "epoch": 2.112734370379133, "grad_norm": 0.464223712682724, "learning_rate": 2.74885325459243e-05, "loss": 0.082, "step": 35720 }, { "epoch": 2.113325841367481, "grad_norm": 0.26611998677253723, "learning_rate": 2.748694064498027e-05, "loss": 0.0808, "step": 35730 }, { "epoch": 2.113917312355829, "grad_norm": 0.3281119763851166, "learning_rate": 2.748534828580575e-05, "loss": 0.0753, "step": 35740 }, { "epoch": 2.114508783344177, "grad_norm": 0.4127328097820282, "learning_rate": 2.7483755468459174e-05, "loss": 0.0817, "step": 35750 }, { "epoch": 2.115100254332525, "grad_norm": 0.4419535994529724, "learning_rate": 2.748216219299899e-05, "loss": 0.0552, "step": 35760 }, { "epoch": 2.115691725320873, "grad_norm": 0.3553488850593567, "learning_rate": 2.748056845948367e-05, "loss": 0.0736, "step": 35770 }, { "epoch": 2.116283196309221, "grad_norm": 0.3071654140949249, "learning_rate": 2.7478974267971698e-05, "loss": 0.0809, "step": 35780 }, { "epoch": 2.116874667297569, "grad_norm": 0.32357609272003174, "learning_rate": 2.7477379618521576e-05, "loss": 0.0793, "step": 35790 }, { "epoch": 2.1174661382859172, "grad_norm": 0.3528458774089813, "learning_rate": 2.7475784511191822e-05, "loss": 0.0826, "step": 35800 }, { "epoch": 2.118057609274265, "grad_norm": 0.21058857440948486, "learning_rate": 2.7474188946040967e-05, "loss": 0.0623, "step": 35810 }, { "epoch": 2.118649080262613, "grad_norm": 0.2257361263036728, "learning_rate": 2.7472592923127568e-05, "loss": 0.0667, "step": 35820 }, { "epoch": 2.1192405512509613, "grad_norm": 0.3337410092353821, "learning_rate": 2.7470996442510196e-05, "loss": 0.0894, "step": 35830 }, { "epoch": 2.119832022239309, "grad_norm": 0.37757235765457153, "learning_rate": 2.7469399504247433e-05, "loss": 0.0735, "step": 35840 }, { "epoch": 2.120423493227657, "grad_norm": 0.2059958577156067, "learning_rate": 2.7467802108397883e-05, "loss": 0.0645, "step": 35850 }, { "epoch": 2.121014964216005, "grad_norm": 0.31103405356407166, "learning_rate": 2.7466204255020162e-05, "loss": 0.0683, "step": 35860 }, { "epoch": 2.1216064352043533, "grad_norm": 0.4473499357700348, "learning_rate": 2.7464605944172907e-05, "loss": 0.0797, "step": 35870 }, { "epoch": 2.122197906192701, "grad_norm": 0.32119330763816833, "learning_rate": 2.7463007175914778e-05, "loss": 0.0722, "step": 35880 }, { "epoch": 2.122789377181049, "grad_norm": 0.30261966586112976, "learning_rate": 2.7461407950304434e-05, "loss": 0.0787, "step": 35890 }, { "epoch": 2.1233808481693974, "grad_norm": 0.24644248187541962, "learning_rate": 2.7459808267400566e-05, "loss": 0.0681, "step": 35900 }, { "epoch": 2.1239723191577453, "grad_norm": 0.28463613986968994, "learning_rate": 2.7458208127261882e-05, "loss": 0.0737, "step": 35910 }, { "epoch": 2.124563790146093, "grad_norm": 0.21722403168678284, "learning_rate": 2.7456607529947095e-05, "loss": 0.0668, "step": 35920 }, { "epoch": 2.1251552611344415, "grad_norm": 0.24301616847515106, "learning_rate": 2.7455006475514946e-05, "loss": 0.0747, "step": 35930 }, { "epoch": 2.1257467321227894, "grad_norm": 0.35665377974510193, "learning_rate": 2.7453404964024184e-05, "loss": 0.0715, "step": 35940 }, { "epoch": 2.1263382031111373, "grad_norm": 0.21592436730861664, "learning_rate": 2.7451802995533584e-05, "loss": 0.0619, "step": 35950 }, { "epoch": 2.126929674099485, "grad_norm": 0.24491268396377563, "learning_rate": 2.745020057010193e-05, "loss": 0.0661, "step": 35960 }, { "epoch": 2.1275211450878335, "grad_norm": 0.2676364481449127, "learning_rate": 2.744859768778803e-05, "loss": 0.0784, "step": 35970 }, { "epoch": 2.1281126160761814, "grad_norm": 0.37015214562416077, "learning_rate": 2.74469943486507e-05, "loss": 0.0847, "step": 35980 }, { "epoch": 2.1287040870645293, "grad_norm": 0.27970460057258606, "learning_rate": 2.7445390552748777e-05, "loss": 0.0721, "step": 35990 }, { "epoch": 2.1292955580528776, "grad_norm": 0.29455873370170593, "learning_rate": 2.7443786300141122e-05, "loss": 0.0689, "step": 36000 }, { "epoch": 2.1298870290412255, "grad_norm": 0.2921854555606842, "learning_rate": 2.7442181590886597e-05, "loss": 0.0596, "step": 36010 }, { "epoch": 2.1304785000295734, "grad_norm": 0.30388781428337097, "learning_rate": 2.7440576425044095e-05, "loss": 0.0706, "step": 36020 }, { "epoch": 2.1310699710179217, "grad_norm": 0.36866435408592224, "learning_rate": 2.743897080267252e-05, "loss": 0.0817, "step": 36030 }, { "epoch": 2.1316614420062696, "grad_norm": 0.504880428314209, "learning_rate": 2.7437364723830793e-05, "loss": 0.0765, "step": 36040 }, { "epoch": 2.1322529129946175, "grad_norm": 1.584774136543274, "learning_rate": 2.743575818857785e-05, "loss": 0.073, "step": 36050 }, { "epoch": 2.132844383982966, "grad_norm": 0.2049465924501419, "learning_rate": 2.7434151196972648e-05, "loss": 0.0613, "step": 36060 }, { "epoch": 2.1334358549713137, "grad_norm": 0.41227954626083374, "learning_rate": 2.7432543749074156e-05, "loss": 0.0739, "step": 36070 }, { "epoch": 2.1340273259596616, "grad_norm": 0.9695149064064026, "learning_rate": 2.743093584494137e-05, "loss": 0.0709, "step": 36080 }, { "epoch": 2.1346187969480095, "grad_norm": 0.9167152047157288, "learning_rate": 2.742932748463328e-05, "loss": 0.1097, "step": 36090 }, { "epoch": 2.135210267936358, "grad_norm": 0.35767897963523865, "learning_rate": 2.742771866820892e-05, "loss": 0.0895, "step": 36100 }, { "epoch": 2.1358017389247057, "grad_norm": 0.2661340832710266, "learning_rate": 2.7426109395727325e-05, "loss": 0.0707, "step": 36110 }, { "epoch": 2.1363932099130536, "grad_norm": 0.4975190758705139, "learning_rate": 2.742449966724755e-05, "loss": 0.0802, "step": 36120 }, { "epoch": 2.136984680901402, "grad_norm": 0.22878694534301758, "learning_rate": 2.7422889482828663e-05, "loss": 0.0885, "step": 36130 }, { "epoch": 2.13757615188975, "grad_norm": 0.2801564633846283, "learning_rate": 2.742127884252976e-05, "loss": 0.0957, "step": 36140 }, { "epoch": 2.1381676228780977, "grad_norm": 0.195699080824852, "learning_rate": 2.7419667746409937e-05, "loss": 0.0822, "step": 36150 }, { "epoch": 2.138759093866446, "grad_norm": 0.23833036422729492, "learning_rate": 2.741805619452833e-05, "loss": 0.061, "step": 36160 }, { "epoch": 2.139350564854794, "grad_norm": 0.2584175765514374, "learning_rate": 2.7416444186944062e-05, "loss": 0.0773, "step": 36170 }, { "epoch": 2.139942035843142, "grad_norm": 0.6535046696662903, "learning_rate": 2.74148317237163e-05, "loss": 0.0824, "step": 36180 }, { "epoch": 2.14053350683149, "grad_norm": 0.5517720580101013, "learning_rate": 2.7413218804904208e-05, "loss": 0.0801, "step": 36190 }, { "epoch": 2.141124977819838, "grad_norm": 0.39170774817466736, "learning_rate": 2.741160543056698e-05, "loss": 0.0697, "step": 36200 }, { "epoch": 2.141716448808186, "grad_norm": 0.2615944445133209, "learning_rate": 2.740999160076382e-05, "loss": 0.0702, "step": 36210 }, { "epoch": 2.142307919796534, "grad_norm": 0.24508745968341827, "learning_rate": 2.7408377315553953e-05, "loss": 0.0763, "step": 36220 }, { "epoch": 2.142899390784882, "grad_norm": 0.25516611337661743, "learning_rate": 2.7406762574996616e-05, "loss": 0.0951, "step": 36230 }, { "epoch": 2.14349086177323, "grad_norm": 0.3125692307949066, "learning_rate": 2.7405147379151065e-05, "loss": 0.0793, "step": 36240 }, { "epoch": 2.144082332761578, "grad_norm": 0.21976424753665924, "learning_rate": 2.740353172807657e-05, "loss": 0.0694, "step": 36250 }, { "epoch": 2.144673803749926, "grad_norm": 1.965863823890686, "learning_rate": 2.740191562183242e-05, "loss": 0.0713, "step": 36260 }, { "epoch": 2.145265274738274, "grad_norm": 0.29789915680885315, "learning_rate": 2.740029906047793e-05, "loss": 0.0662, "step": 36270 }, { "epoch": 2.145856745726622, "grad_norm": 0.3091645836830139, "learning_rate": 2.7398682044072408e-05, "loss": 0.0896, "step": 36280 }, { "epoch": 2.1464482167149703, "grad_norm": 0.2237364947795868, "learning_rate": 2.739706457267521e-05, "loss": 0.0722, "step": 36290 }, { "epoch": 2.147039687703318, "grad_norm": 0.3571000397205353, "learning_rate": 2.7395446646345673e-05, "loss": 0.076, "step": 36300 }, { "epoch": 2.147631158691666, "grad_norm": 0.29845449328422546, "learning_rate": 2.7393828265143185e-05, "loss": 0.0656, "step": 36310 }, { "epoch": 2.1482226296800144, "grad_norm": 0.3451295495033264, "learning_rate": 2.7392209429127128e-05, "loss": 0.0847, "step": 36320 }, { "epoch": 2.1488141006683623, "grad_norm": 0.23783986270427704, "learning_rate": 2.7390590138356917e-05, "loss": 0.0753, "step": 36330 }, { "epoch": 2.14940557165671, "grad_norm": 0.2604544460773468, "learning_rate": 2.738897039289196e-05, "loss": 0.0778, "step": 36340 }, { "epoch": 2.149997042645058, "grad_norm": 0.33122628927230835, "learning_rate": 2.7387350192791707e-05, "loss": 0.0774, "step": 36350 }, { "epoch": 2.1505885136334064, "grad_norm": 0.20490416884422302, "learning_rate": 2.7385729538115614e-05, "loss": 0.0702, "step": 36360 }, { "epoch": 2.1511799846217543, "grad_norm": 0.29258784651756287, "learning_rate": 2.738410842892315e-05, "loss": 0.0779, "step": 36370 }, { "epoch": 2.151771455610102, "grad_norm": 0.20393416285514832, "learning_rate": 2.7382486865273804e-05, "loss": 0.0848, "step": 36380 }, { "epoch": 2.1523629265984505, "grad_norm": 0.21863456070423126, "learning_rate": 2.7380864847227086e-05, "loss": 0.0761, "step": 36390 }, { "epoch": 2.1529543975867984, "grad_norm": 0.2604093551635742, "learning_rate": 2.7379242374842513e-05, "loss": 0.064, "step": 36400 }, { "epoch": 2.1535458685751463, "grad_norm": 0.2923860251903534, "learning_rate": 2.7377619448179632e-05, "loss": 0.0603, "step": 36410 }, { "epoch": 2.154137339563494, "grad_norm": 0.23792335391044617, "learning_rate": 2.7375996067297997e-05, "loss": 0.0815, "step": 36420 }, { "epoch": 2.1547288105518425, "grad_norm": 0.2623823881149292, "learning_rate": 2.7374372232257173e-05, "loss": 0.0807, "step": 36430 }, { "epoch": 2.1553202815401904, "grad_norm": 0.40070629119873047, "learning_rate": 2.7372747943116765e-05, "loss": 0.0715, "step": 36440 }, { "epoch": 2.1559117525285383, "grad_norm": 0.22734875977039337, "learning_rate": 2.7371123199936363e-05, "loss": 0.062, "step": 36450 }, { "epoch": 2.1565032235168866, "grad_norm": 0.21944743394851685, "learning_rate": 2.7369498002775602e-05, "loss": 0.0617, "step": 36460 }, { "epoch": 2.1570946945052345, "grad_norm": 0.6223885416984558, "learning_rate": 2.7367872351694113e-05, "loss": 0.0787, "step": 36470 }, { "epoch": 2.1576861654935824, "grad_norm": 0.22666053473949432, "learning_rate": 2.7366246246751555e-05, "loss": 0.0757, "step": 36480 }, { "epoch": 2.1582776364819307, "grad_norm": 0.36751407384872437, "learning_rate": 2.7364619688007598e-05, "loss": 0.0831, "step": 36490 }, { "epoch": 2.1588691074702786, "grad_norm": 0.3441159725189209, "learning_rate": 2.7362992675521942e-05, "loss": 0.0745, "step": 36500 }, { "epoch": 2.1594605784586265, "grad_norm": 0.26863229274749756, "learning_rate": 2.7361365209354282e-05, "loss": 0.0592, "step": 36510 }, { "epoch": 2.160052049446975, "grad_norm": 0.2622426152229309, "learning_rate": 2.7359737289564345e-05, "loss": 0.0712, "step": 36520 }, { "epoch": 2.1606435204353227, "grad_norm": 0.2171238660812378, "learning_rate": 2.7358108916211873e-05, "loss": 0.0757, "step": 36530 }, { "epoch": 2.1612349914236706, "grad_norm": 0.28753823041915894, "learning_rate": 2.7356480089356616e-05, "loss": 0.0723, "step": 36540 }, { "epoch": 2.1618264624120185, "grad_norm": 0.2651236057281494, "learning_rate": 2.735485080905835e-05, "loss": 0.0811, "step": 36550 }, { "epoch": 2.162417933400367, "grad_norm": 0.34593334794044495, "learning_rate": 2.7353221075376864e-05, "loss": 0.0677, "step": 36560 }, { "epoch": 2.1630094043887147, "grad_norm": 0.33934471011161804, "learning_rate": 2.7351590888371963e-05, "loss": 0.0804, "step": 36570 }, { "epoch": 2.1636008753770626, "grad_norm": 0.30046164989471436, "learning_rate": 2.7349960248103477e-05, "loss": 0.0744, "step": 36580 }, { "epoch": 2.164192346365411, "grad_norm": 0.17748357355594635, "learning_rate": 2.7348329154631234e-05, "loss": 0.0745, "step": 36590 }, { "epoch": 2.164783817353759, "grad_norm": 0.30593234300613403, "learning_rate": 2.7346697608015095e-05, "loss": 0.0662, "step": 36600 }, { "epoch": 2.1653752883421067, "grad_norm": 0.2696385085582733, "learning_rate": 2.7345065608314934e-05, "loss": 0.0573, "step": 36610 }, { "epoch": 2.165966759330455, "grad_norm": 0.30768439173698425, "learning_rate": 2.734343315559064e-05, "loss": 0.0815, "step": 36620 }, { "epoch": 2.166558230318803, "grad_norm": 0.2516109347343445, "learning_rate": 2.7341800249902112e-05, "loss": 0.0773, "step": 36630 }, { "epoch": 2.167149701307151, "grad_norm": 0.4589599370956421, "learning_rate": 2.7340166891309284e-05, "loss": 0.0723, "step": 36640 }, { "epoch": 2.167741172295499, "grad_norm": 0.3953612446784973, "learning_rate": 2.7338533079872086e-05, "loss": 0.0704, "step": 36650 }, { "epoch": 2.168332643283847, "grad_norm": 0.2848329246044159, "learning_rate": 2.7336898815650475e-05, "loss": 0.0678, "step": 36660 }, { "epoch": 2.168924114272195, "grad_norm": 0.41109251976013184, "learning_rate": 2.7335264098704425e-05, "loss": 0.0764, "step": 36670 }, { "epoch": 2.169515585260543, "grad_norm": 0.7269043326377869, "learning_rate": 2.7333628929093925e-05, "loss": 0.082, "step": 36680 }, { "epoch": 2.170107056248891, "grad_norm": 0.22979988157749176, "learning_rate": 2.7331993306878976e-05, "loss": 0.0735, "step": 36690 }, { "epoch": 2.170698527237239, "grad_norm": 0.2577757239341736, "learning_rate": 2.7330357232119612e-05, "loss": 0.0618, "step": 36700 }, { "epoch": 2.171289998225587, "grad_norm": 0.30467134714126587, "learning_rate": 2.7328720704875858e-05, "loss": 0.0719, "step": 36710 }, { "epoch": 2.171881469213935, "grad_norm": 0.2875248193740845, "learning_rate": 2.7327083725207774e-05, "loss": 0.0925, "step": 36720 }, { "epoch": 2.172472940202283, "grad_norm": 0.35269999504089355, "learning_rate": 2.7325446293175433e-05, "loss": 0.0769, "step": 36730 }, { "epoch": 2.173064411190631, "grad_norm": 0.2791959345340729, "learning_rate": 2.7323808408838927e-05, "loss": 0.0722, "step": 36740 }, { "epoch": 2.1736558821789793, "grad_norm": 0.30945783853530884, "learning_rate": 2.7322170072258353e-05, "loss": 0.0728, "step": 36750 }, { "epoch": 2.174247353167327, "grad_norm": 0.5243887305259705, "learning_rate": 2.7320531283493842e-05, "loss": 0.0634, "step": 36760 }, { "epoch": 2.174838824155675, "grad_norm": 0.2775687277317047, "learning_rate": 2.7318892042605522e-05, "loss": 0.0759, "step": 36770 }, { "epoch": 2.1754302951440234, "grad_norm": 0.44470155239105225, "learning_rate": 2.7317252349653556e-05, "loss": 0.0745, "step": 36780 }, { "epoch": 2.1760217661323713, "grad_norm": 0.2431807667016983, "learning_rate": 2.731561220469811e-05, "loss": 0.0839, "step": 36790 }, { "epoch": 2.176613237120719, "grad_norm": 0.32309749722480774, "learning_rate": 2.731397160779938e-05, "loss": 0.0605, "step": 36800 }, { "epoch": 2.177204708109067, "grad_norm": 0.42987319827079773, "learning_rate": 2.731233055901756e-05, "loss": 0.0584, "step": 36810 }, { "epoch": 2.1777961790974154, "grad_norm": 0.3567282557487488, "learning_rate": 2.731068905841288e-05, "loss": 0.0823, "step": 36820 }, { "epoch": 2.1783876500857633, "grad_norm": 0.24787995219230652, "learning_rate": 2.7309047106045568e-05, "loss": 0.0902, "step": 36830 }, { "epoch": 2.178979121074111, "grad_norm": 0.5208701491355896, "learning_rate": 2.730740470197589e-05, "loss": 0.0725, "step": 36840 }, { "epoch": 2.1795705920624595, "grad_norm": 0.21790924668312073, "learning_rate": 2.730576184626411e-05, "loss": 0.0704, "step": 36850 }, { "epoch": 2.1801620630508074, "grad_norm": 0.20224367082118988, "learning_rate": 2.7304118538970518e-05, "loss": 0.0612, "step": 36860 }, { "epoch": 2.1807535340391553, "grad_norm": 0.658391535282135, "learning_rate": 2.7302474780155414e-05, "loss": 0.073, "step": 36870 }, { "epoch": 2.181345005027503, "grad_norm": 0.3838525712490082, "learning_rate": 2.7300830569879125e-05, "loss": 0.0764, "step": 36880 }, { "epoch": 2.1819364760158515, "grad_norm": 0.19882987439632416, "learning_rate": 2.7299185908201984e-05, "loss": 0.0795, "step": 36890 }, { "epoch": 2.1825279470041994, "grad_norm": 0.21372182667255402, "learning_rate": 2.7297540795184346e-05, "loss": 0.0763, "step": 36900 }, { "epoch": 2.1831194179925473, "grad_norm": 0.1848401576280594, "learning_rate": 2.729589523088658e-05, "loss": 0.0711, "step": 36910 }, { "epoch": 2.1837108889808956, "grad_norm": 0.36913397908210754, "learning_rate": 2.7294249215369075e-05, "loss": 0.0758, "step": 36920 }, { "epoch": 2.1843023599692435, "grad_norm": 0.2912278175354004, "learning_rate": 2.729260274869223e-05, "loss": 0.0771, "step": 36930 }, { "epoch": 2.1848938309575914, "grad_norm": 0.2660139501094818, "learning_rate": 2.7290955830916475e-05, "loss": 0.0609, "step": 36940 }, { "epoch": 2.1854853019459397, "grad_norm": 0.234347403049469, "learning_rate": 2.7289308462102235e-05, "loss": 0.069, "step": 36950 }, { "epoch": 2.1860767729342876, "grad_norm": 0.1820228099822998, "learning_rate": 2.728766064230997e-05, "loss": 0.0568, "step": 36960 }, { "epoch": 2.1866682439226355, "grad_norm": 0.32836106419563293, "learning_rate": 2.7286012371600143e-05, "loss": 0.0801, "step": 36970 }, { "epoch": 2.187259714910984, "grad_norm": 0.2209366261959076, "learning_rate": 2.728436365003325e-05, "loss": 0.0729, "step": 36980 }, { "epoch": 2.1878511858993317, "grad_norm": 0.3054406940937042, "learning_rate": 2.728271447766979e-05, "loss": 0.0773, "step": 36990 }, { "epoch": 2.1884426568876796, "grad_norm": 0.24690639972686768, "learning_rate": 2.728106485457028e-05, "loss": 0.0814, "step": 37000 }, { "epoch": 2.1890341278760275, "grad_norm": 0.3428565561771393, "learning_rate": 2.727941478079526e-05, "loss": 0.0642, "step": 37010 }, { "epoch": 2.189625598864376, "grad_norm": 0.25601688027381897, "learning_rate": 2.7277764256405276e-05, "loss": 0.0712, "step": 37020 }, { "epoch": 2.1902170698527237, "grad_norm": 0.4545801281929016, "learning_rate": 2.72761132814609e-05, "loss": 0.0792, "step": 37030 }, { "epoch": 2.1908085408410716, "grad_norm": 0.30256885290145874, "learning_rate": 2.7274461856022716e-05, "loss": 0.0756, "step": 37040 }, { "epoch": 2.19140001182942, "grad_norm": 0.29100683331489563, "learning_rate": 2.7272809980151333e-05, "loss": 0.0705, "step": 37050 }, { "epoch": 2.191991482817768, "grad_norm": 0.27853843569755554, "learning_rate": 2.7271157653907358e-05, "loss": 0.0578, "step": 37060 }, { "epoch": 2.1925829538061157, "grad_norm": 0.2955076992511749, "learning_rate": 2.7269504877351436e-05, "loss": 0.0792, "step": 37070 }, { "epoch": 2.193174424794464, "grad_norm": 0.23436091840267181, "learning_rate": 2.7267851650544217e-05, "loss": 0.0763, "step": 37080 }, { "epoch": 2.193765895782812, "grad_norm": 0.23637790977954865, "learning_rate": 2.7266197973546363e-05, "loss": 0.0621, "step": 37090 }, { "epoch": 2.19435736677116, "grad_norm": 0.2222813367843628, "learning_rate": 2.726454384641856e-05, "loss": 0.0743, "step": 37100 }, { "epoch": 2.194948837759508, "grad_norm": 0.9356229305267334, "learning_rate": 2.726288926922152e-05, "loss": 0.0643, "step": 37110 }, { "epoch": 2.195540308747856, "grad_norm": 0.29877954721450806, "learning_rate": 2.7261234242015947e-05, "loss": 0.0749, "step": 37120 }, { "epoch": 2.196131779736204, "grad_norm": 0.34004801511764526, "learning_rate": 2.725957876486258e-05, "loss": 0.0844, "step": 37130 }, { "epoch": 2.1967232507245518, "grad_norm": 0.40531125664711, "learning_rate": 2.7257922837822174e-05, "loss": 0.0875, "step": 37140 }, { "epoch": 2.1973147217129, "grad_norm": 0.2674142122268677, "learning_rate": 2.725626646095549e-05, "loss": 0.0776, "step": 37150 }, { "epoch": 2.197906192701248, "grad_norm": 0.2513258755207062, "learning_rate": 2.7254609634323316e-05, "loss": 0.0587, "step": 37160 }, { "epoch": 2.198497663689596, "grad_norm": 0.24910545349121094, "learning_rate": 2.725295235798645e-05, "loss": 0.071, "step": 37170 }, { "epoch": 2.199089134677944, "grad_norm": 0.2903410494327545, "learning_rate": 2.7251294632005707e-05, "loss": 0.0671, "step": 37180 }, { "epoch": 2.199680605666292, "grad_norm": 0.26335451006889343, "learning_rate": 2.7249636456441924e-05, "loss": 0.0801, "step": 37190 }, { "epoch": 2.20027207665464, "grad_norm": 0.16062591969966888, "learning_rate": 2.7247977831355947e-05, "loss": 0.0644, "step": 37200 }, { "epoch": 2.2008635476429883, "grad_norm": 0.3792581260204315, "learning_rate": 2.7246318756808647e-05, "loss": 0.0658, "step": 37210 }, { "epoch": 2.201455018631336, "grad_norm": 0.30694061517715454, "learning_rate": 2.7244659232860905e-05, "loss": 0.079, "step": 37220 }, { "epoch": 2.202046489619684, "grad_norm": 0.26408836245536804, "learning_rate": 2.724299925957361e-05, "loss": 0.0833, "step": 37230 }, { "epoch": 2.2026379606080324, "grad_norm": 0.2334756702184677, "learning_rate": 2.7241338837007696e-05, "loss": 0.0696, "step": 37240 }, { "epoch": 2.2032294315963803, "grad_norm": 0.38697677850723267, "learning_rate": 2.7239677965224082e-05, "loss": 0.0687, "step": 37250 }, { "epoch": 2.203820902584728, "grad_norm": 0.7499844431877136, "learning_rate": 2.7238016644283722e-05, "loss": 0.0585, "step": 37260 }, { "epoch": 2.204412373573076, "grad_norm": 0.25349289178848267, "learning_rate": 2.723635487424758e-05, "loss": 0.0712, "step": 37270 }, { "epoch": 2.2050038445614244, "grad_norm": 0.18322475254535675, "learning_rate": 2.7234692655176637e-05, "loss": 0.0725, "step": 37280 }, { "epoch": 2.2055953155497723, "grad_norm": 0.2852199375629425, "learning_rate": 2.723302998713189e-05, "loss": 0.0851, "step": 37290 }, { "epoch": 2.20618678653812, "grad_norm": 0.32972317934036255, "learning_rate": 2.7231366870174357e-05, "loss": 0.0693, "step": 37300 }, { "epoch": 2.2067782575264685, "grad_norm": 0.24409162998199463, "learning_rate": 2.7229703304365067e-05, "loss": 0.0657, "step": 37310 }, { "epoch": 2.2073697285148164, "grad_norm": 0.3164711594581604, "learning_rate": 2.7228039289765063e-05, "loss": 0.0722, "step": 37320 }, { "epoch": 2.2079611995031643, "grad_norm": 0.3256780207157135, "learning_rate": 2.7226374826435416e-05, "loss": 0.0807, "step": 37330 }, { "epoch": 2.208552670491512, "grad_norm": 0.41082119941711426, "learning_rate": 2.7224709914437205e-05, "loss": 0.0764, "step": 37340 }, { "epoch": 2.2091441414798605, "grad_norm": 0.23844102025032043, "learning_rate": 2.7223044553831524e-05, "loss": 0.0632, "step": 37350 }, { "epoch": 2.2097356124682084, "grad_norm": 0.20227478444576263, "learning_rate": 2.722137874467949e-05, "loss": 0.0526, "step": 37360 }, { "epoch": 2.2103270834565563, "grad_norm": 0.2502842843532562, "learning_rate": 2.7219712487042233e-05, "loss": 0.0776, "step": 37370 }, { "epoch": 2.2109185544449046, "grad_norm": 0.23311921954154968, "learning_rate": 2.7218045780980894e-05, "loss": 0.0704, "step": 37380 }, { "epoch": 2.2115100254332525, "grad_norm": 0.43423911929130554, "learning_rate": 2.7216378626556636e-05, "loss": 0.0791, "step": 37390 }, { "epoch": 2.2121014964216004, "grad_norm": 0.24314239621162415, "learning_rate": 2.7214711023830647e-05, "loss": 0.0751, "step": 37400 }, { "epoch": 2.2126929674099487, "grad_norm": 0.23176825046539307, "learning_rate": 2.7213042972864113e-05, "loss": 0.0733, "step": 37410 }, { "epoch": 2.2132844383982966, "grad_norm": 0.2964625358581543, "learning_rate": 2.721137447371825e-05, "loss": 0.0822, "step": 37420 }, { "epoch": 2.2138759093866445, "grad_norm": 0.37530168890953064, "learning_rate": 2.7209705526454285e-05, "loss": 0.083, "step": 37430 }, { "epoch": 2.214467380374993, "grad_norm": 0.2879180312156677, "learning_rate": 2.720803613113347e-05, "loss": 0.0814, "step": 37440 }, { "epoch": 2.2150588513633407, "grad_norm": 0.30108457803726196, "learning_rate": 2.720636628781706e-05, "loss": 0.0809, "step": 37450 }, { "epoch": 2.2156503223516886, "grad_norm": 0.3730691969394684, "learning_rate": 2.7204695996566328e-05, "loss": 0.0624, "step": 37460 }, { "epoch": 2.2162417933400365, "grad_norm": 0.24777236580848694, "learning_rate": 2.720302525744258e-05, "loss": 0.0749, "step": 37470 }, { "epoch": 2.216833264328385, "grad_norm": 0.2719033360481262, "learning_rate": 2.7201354070507115e-05, "loss": 0.0885, "step": 37480 }, { "epoch": 2.2174247353167327, "grad_norm": 0.30870017409324646, "learning_rate": 2.719968243582127e-05, "loss": 0.07, "step": 37490 }, { "epoch": 2.2180162063050806, "grad_norm": 0.2729548215866089, "learning_rate": 2.7198010353446385e-05, "loss": 0.0726, "step": 37500 }, { "epoch": 2.218607677293429, "grad_norm": 0.15803508460521698, "learning_rate": 2.7196337823443822e-05, "loss": 0.0576, "step": 37510 }, { "epoch": 2.219199148281777, "grad_norm": 0.33429834246635437, "learning_rate": 2.719466484587495e-05, "loss": 0.0753, "step": 37520 }, { "epoch": 2.2197906192701247, "grad_norm": 0.30622994899749756, "learning_rate": 2.7192991420801168e-05, "loss": 0.0742, "step": 37530 }, { "epoch": 2.220382090258473, "grad_norm": 0.293546199798584, "learning_rate": 2.7191317548283886e-05, "loss": 0.0707, "step": 37540 }, { "epoch": 2.220973561246821, "grad_norm": 0.2625015079975128, "learning_rate": 2.7189643228384527e-05, "loss": 0.0743, "step": 37550 }, { "epoch": 2.2215650322351688, "grad_norm": 0.37001481652259827, "learning_rate": 2.7187968461164534e-05, "loss": 0.0782, "step": 37560 }, { "epoch": 2.222156503223517, "grad_norm": 0.3297373950481415, "learning_rate": 2.7186293246685364e-05, "loss": 0.0713, "step": 37570 }, { "epoch": 2.222747974211865, "grad_norm": 0.4331996738910675, "learning_rate": 2.7184617585008496e-05, "loss": 0.0777, "step": 37580 }, { "epoch": 2.223339445200213, "grad_norm": 0.18302270770072937, "learning_rate": 2.718294147619542e-05, "loss": 0.0786, "step": 37590 }, { "epoch": 2.2239309161885608, "grad_norm": 0.2961925268173218, "learning_rate": 2.718126492030764e-05, "loss": 0.0761, "step": 37600 }, { "epoch": 2.224522387176909, "grad_norm": 0.1812025010585785, "learning_rate": 2.717958791740668e-05, "loss": 0.0546, "step": 37610 }, { "epoch": 2.225113858165257, "grad_norm": 0.44559383392333984, "learning_rate": 2.7177910467554096e-05, "loss": 0.095, "step": 37620 }, { "epoch": 2.225705329153605, "grad_norm": 0.20496827363967896, "learning_rate": 2.717623257081142e-05, "loss": 0.0704, "step": 37630 }, { "epoch": 2.226296800141953, "grad_norm": 0.22350656986236572, "learning_rate": 2.7174554227240247e-05, "loss": 0.0775, "step": 37640 }, { "epoch": 2.226888271130301, "grad_norm": 0.2932031452655792, "learning_rate": 2.717287543690215e-05, "loss": 0.0726, "step": 37650 }, { "epoch": 2.227479742118649, "grad_norm": 0.2140844464302063, "learning_rate": 2.717119619985875e-05, "loss": 0.0655, "step": 37660 }, { "epoch": 2.2280712131069973, "grad_norm": 0.35172775387763977, "learning_rate": 2.716951651617166e-05, "loss": 0.0918, "step": 37670 }, { "epoch": 2.228662684095345, "grad_norm": 0.2763568162918091, "learning_rate": 2.7167836385902518e-05, "loss": 0.0842, "step": 37680 }, { "epoch": 2.229254155083693, "grad_norm": 0.19439639151096344, "learning_rate": 2.7166155809112984e-05, "loss": 0.0707, "step": 37690 }, { "epoch": 2.2298456260720414, "grad_norm": 0.2400418072938919, "learning_rate": 2.7164474785864733e-05, "loss": 0.0674, "step": 37700 }, { "epoch": 2.2304370970603893, "grad_norm": 0.2656238079071045, "learning_rate": 2.716279331621945e-05, "loss": 0.0681, "step": 37710 }, { "epoch": 2.231028568048737, "grad_norm": 0.2664846181869507, "learning_rate": 2.7161111400238833e-05, "loss": 0.0764, "step": 37720 }, { "epoch": 2.231620039037085, "grad_norm": 0.2526686191558838, "learning_rate": 2.715942903798461e-05, "loss": 0.0807, "step": 37730 }, { "epoch": 2.2322115100254334, "grad_norm": 0.2626025974750519, "learning_rate": 2.7157746229518517e-05, "loss": 0.0787, "step": 37740 }, { "epoch": 2.2328029810137813, "grad_norm": 0.23869651556015015, "learning_rate": 2.7156062974902307e-05, "loss": 0.0731, "step": 37750 }, { "epoch": 2.233394452002129, "grad_norm": 0.21615718305110931, "learning_rate": 2.7154379274197753e-05, "loss": 0.0738, "step": 37760 }, { "epoch": 2.2339859229904775, "grad_norm": 0.35794979333877563, "learning_rate": 2.7152695127466635e-05, "loss": 0.0723, "step": 37770 }, { "epoch": 2.2345773939788254, "grad_norm": 0.22767722606658936, "learning_rate": 2.715101053477076e-05, "loss": 0.08, "step": 37780 }, { "epoch": 2.2351688649671733, "grad_norm": 0.22096598148345947, "learning_rate": 2.7149325496171947e-05, "loss": 0.0743, "step": 37790 }, { "epoch": 2.235760335955521, "grad_norm": 0.18992961943149567, "learning_rate": 2.7147640011732033e-05, "loss": 0.0615, "step": 37800 }, { "epoch": 2.2363518069438695, "grad_norm": 0.15232056379318237, "learning_rate": 2.7145954081512865e-05, "loss": 0.067, "step": 37810 }, { "epoch": 2.2369432779322174, "grad_norm": 0.3522980213165283, "learning_rate": 2.7144267705576314e-05, "loss": 0.0776, "step": 37820 }, { "epoch": 2.2375347489205653, "grad_norm": 0.23591844737529755, "learning_rate": 2.7142580883984267e-05, "loss": 0.0844, "step": 37830 }, { "epoch": 2.2381262199089136, "grad_norm": 0.30151480436325073, "learning_rate": 2.7140893616798622e-05, "loss": 0.0726, "step": 37840 }, { "epoch": 2.2387176908972615, "grad_norm": 0.2307496964931488, "learning_rate": 2.7139205904081294e-05, "loss": 0.08, "step": 37850 }, { "epoch": 2.2393091618856094, "grad_norm": 0.35498160123825073, "learning_rate": 2.7137517745894224e-05, "loss": 0.0779, "step": 37860 }, { "epoch": 2.2399006328739577, "grad_norm": 0.3250002861022949, "learning_rate": 2.7135829142299354e-05, "loss": 0.0739, "step": 37870 }, { "epoch": 2.2404921038623056, "grad_norm": 0.341960608959198, "learning_rate": 2.7134140093358658e-05, "loss": 0.0763, "step": 37880 }, { "epoch": 2.2410835748506535, "grad_norm": 0.21703967452049255, "learning_rate": 2.713245059913411e-05, "loss": 0.0743, "step": 37890 }, { "epoch": 2.241675045839002, "grad_norm": 0.2187587171792984, "learning_rate": 2.713076065968771e-05, "loss": 0.054, "step": 37900 }, { "epoch": 2.2422665168273497, "grad_norm": 0.3450455069541931, "learning_rate": 2.7129070275081484e-05, "loss": 0.0572, "step": 37910 }, { "epoch": 2.2428579878156976, "grad_norm": 0.31378617882728577, "learning_rate": 2.7127379445377452e-05, "loss": 0.0777, "step": 37920 }, { "epoch": 2.2434494588040454, "grad_norm": 0.5924904346466064, "learning_rate": 2.7125688170637668e-05, "loss": 0.0818, "step": 37930 }, { "epoch": 2.244040929792394, "grad_norm": 0.24270780384540558, "learning_rate": 2.71239964509242e-05, "loss": 0.0741, "step": 37940 }, { "epoch": 2.2446324007807417, "grad_norm": 0.25001341104507446, "learning_rate": 2.7122304286299115e-05, "loss": 0.0772, "step": 37950 }, { "epoch": 2.2452238717690896, "grad_norm": 0.20175443589687347, "learning_rate": 2.7120611676824522e-05, "loss": 0.0669, "step": 37960 }, { "epoch": 2.245815342757438, "grad_norm": 0.3303905129432678, "learning_rate": 2.711891862256253e-05, "loss": 0.086, "step": 37970 }, { "epoch": 2.2464068137457858, "grad_norm": 0.2728011906147003, "learning_rate": 2.7117225123575267e-05, "loss": 0.0779, "step": 37980 }, { "epoch": 2.2469982847341337, "grad_norm": 0.4008581042289734, "learning_rate": 2.7115531179924885e-05, "loss": 0.0739, "step": 37990 }, { "epoch": 2.247589755722482, "grad_norm": 0.32954931259155273, "learning_rate": 2.711383679167354e-05, "loss": 0.0723, "step": 38000 }, { "epoch": 2.24818122671083, "grad_norm": 0.35465487837791443, "learning_rate": 2.7112141958883414e-05, "loss": 0.0618, "step": 38010 }, { "epoch": 2.2487726976991778, "grad_norm": 0.22977440059185028, "learning_rate": 2.71104466816167e-05, "loss": 0.0696, "step": 38020 }, { "epoch": 2.249364168687526, "grad_norm": 0.31575125455856323, "learning_rate": 2.7108750959935616e-05, "loss": 0.0808, "step": 38030 }, { "epoch": 2.249955639675874, "grad_norm": 0.2889094352722168, "learning_rate": 2.710705479390238e-05, "loss": 0.0766, "step": 38040 }, { "epoch": 2.250547110664222, "grad_norm": 0.2106386423110962, "learning_rate": 2.7105358183579238e-05, "loss": 0.077, "step": 38050 }, { "epoch": 2.2511385816525697, "grad_norm": 0.5413414835929871, "learning_rate": 2.7103661129028454e-05, "loss": 0.0633, "step": 38060 }, { "epoch": 2.251730052640918, "grad_norm": 0.2311921864748001, "learning_rate": 2.7101963630312298e-05, "loss": 0.0761, "step": 38070 }, { "epoch": 2.252321523629266, "grad_norm": 0.25516486167907715, "learning_rate": 2.710026568749307e-05, "loss": 0.0742, "step": 38080 }, { "epoch": 2.252912994617614, "grad_norm": 0.20909439027309418, "learning_rate": 2.7098567300633075e-05, "loss": 0.0632, "step": 38090 }, { "epoch": 2.253504465605962, "grad_norm": 0.2684396207332611, "learning_rate": 2.7096868469794644e-05, "loss": 0.0677, "step": 38100 }, { "epoch": 2.25409593659431, "grad_norm": 0.18223819136619568, "learning_rate": 2.7095169195040107e-05, "loss": 0.0657, "step": 38110 }, { "epoch": 2.254687407582658, "grad_norm": 0.445408433675766, "learning_rate": 2.709346947643183e-05, "loss": 0.0765, "step": 38120 }, { "epoch": 2.2552788785710063, "grad_norm": 0.23565718531608582, "learning_rate": 2.709176931403219e-05, "loss": 0.0756, "step": 38130 }, { "epoch": 2.255870349559354, "grad_norm": 0.5329472422599792, "learning_rate": 2.709006870790357e-05, "loss": 0.0756, "step": 38140 }, { "epoch": 2.256461820547702, "grad_norm": 0.3265543580055237, "learning_rate": 2.7088367658108387e-05, "loss": 0.0735, "step": 38150 }, { "epoch": 2.2570532915360504, "grad_norm": 0.20542387664318085, "learning_rate": 2.708666616470905e-05, "loss": 0.0575, "step": 38160 }, { "epoch": 2.2576447625243983, "grad_norm": 0.31883516907691956, "learning_rate": 2.7084964227768004e-05, "loss": 0.0697, "step": 38170 }, { "epoch": 2.258236233512746, "grad_norm": 0.26377737522125244, "learning_rate": 2.7083261847347714e-05, "loss": 0.0812, "step": 38180 }, { "epoch": 2.258827704501094, "grad_norm": 0.29453369975090027, "learning_rate": 2.708155902351064e-05, "loss": 0.0847, "step": 38190 }, { "epoch": 2.2594191754894424, "grad_norm": 0.33363741636276245, "learning_rate": 2.707985575631927e-05, "loss": 0.0771, "step": 38200 }, { "epoch": 2.2600106464777903, "grad_norm": 0.4209578335285187, "learning_rate": 2.7078152045836117e-05, "loss": 0.071, "step": 38210 }, { "epoch": 2.260602117466138, "grad_norm": 0.3668021857738495, "learning_rate": 2.70764478921237e-05, "loss": 0.0881, "step": 38220 }, { "epoch": 2.2611935884544865, "grad_norm": 0.223727747797966, "learning_rate": 2.707474329524455e-05, "loss": 0.0759, "step": 38230 }, { "epoch": 2.2617850594428344, "grad_norm": 0.20635779201984406, "learning_rate": 2.7073038255261223e-05, "loss": 0.0624, "step": 38240 }, { "epoch": 2.2623765304311823, "grad_norm": 0.17119941115379333, "learning_rate": 2.707133277223629e-05, "loss": 0.0599, "step": 38250 }, { "epoch": 2.26296800141953, "grad_norm": 0.2572097182273865, "learning_rate": 2.7069626846232332e-05, "loss": 0.0709, "step": 38260 }, { "epoch": 2.2635594724078785, "grad_norm": 0.25038477778434753, "learning_rate": 2.706792047731196e-05, "loss": 0.0725, "step": 38270 }, { "epoch": 2.2641509433962264, "grad_norm": 0.2864813506603241, "learning_rate": 2.7066213665537786e-05, "loss": 0.0845, "step": 38280 }, { "epoch": 2.2647424143845742, "grad_norm": 0.2443036139011383, "learning_rate": 2.7064506410972445e-05, "loss": 0.0829, "step": 38290 }, { "epoch": 2.2653338853729226, "grad_norm": 0.3704104423522949, "learning_rate": 2.7062798713678586e-05, "loss": 0.079, "step": 38300 }, { "epoch": 2.2659253563612705, "grad_norm": 0.30270034074783325, "learning_rate": 2.7061090573718878e-05, "loss": 0.0677, "step": 38310 }, { "epoch": 2.2665168273496183, "grad_norm": 0.22173014283180237, "learning_rate": 2.7059381991156008e-05, "loss": 0.0742, "step": 38320 }, { "epoch": 2.2671082983379667, "grad_norm": 0.3244498372077942, "learning_rate": 2.7057672966052667e-05, "loss": 0.0862, "step": 38330 }, { "epoch": 2.2676997693263146, "grad_norm": 0.23792847990989685, "learning_rate": 2.7055963498471583e-05, "loss": 0.0866, "step": 38340 }, { "epoch": 2.2682912403146624, "grad_norm": 0.31139102578163147, "learning_rate": 2.7054253588475477e-05, "loss": 0.072, "step": 38350 }, { "epoch": 2.268882711303011, "grad_norm": 0.34200698137283325, "learning_rate": 2.70525432361271e-05, "loss": 0.0591, "step": 38360 }, { "epoch": 2.2694741822913587, "grad_norm": 0.31849950551986694, "learning_rate": 2.7050832441489215e-05, "loss": 0.0786, "step": 38370 }, { "epoch": 2.2700656532797066, "grad_norm": 0.38518065214157104, "learning_rate": 2.704912120462461e-05, "loss": 0.0785, "step": 38380 }, { "epoch": 2.2706571242680544, "grad_norm": 0.16469204425811768, "learning_rate": 2.704740952559607e-05, "loss": 0.078, "step": 38390 }, { "epoch": 2.2712485952564028, "grad_norm": 0.22767549753189087, "learning_rate": 2.704569740446642e-05, "loss": 0.0686, "step": 38400 }, { "epoch": 2.2718400662447507, "grad_norm": 0.2281775176525116, "learning_rate": 2.7043984841298487e-05, "loss": 0.0665, "step": 38410 }, { "epoch": 2.2724315372330985, "grad_norm": 0.21098807454109192, "learning_rate": 2.7042271836155107e-05, "loss": 0.0766, "step": 38420 }, { "epoch": 2.273023008221447, "grad_norm": 0.21277029812335968, "learning_rate": 2.7040558389099148e-05, "loss": 0.0868, "step": 38430 }, { "epoch": 2.2736144792097948, "grad_norm": 0.2861267030239105, "learning_rate": 2.703884450019349e-05, "loss": 0.083, "step": 38440 }, { "epoch": 2.2742059501981426, "grad_norm": 0.24404913187026978, "learning_rate": 2.7037130169501028e-05, "loss": 0.0668, "step": 38450 }, { "epoch": 2.274797421186491, "grad_norm": 1.1289016008377075, "learning_rate": 2.703541539708467e-05, "loss": 0.0659, "step": 38460 }, { "epoch": 2.275388892174839, "grad_norm": 0.181890606880188, "learning_rate": 2.703370018300734e-05, "loss": 0.0713, "step": 38470 }, { "epoch": 2.2759803631631867, "grad_norm": 0.24722044169902802, "learning_rate": 2.7031984527331987e-05, "loss": 0.0777, "step": 38480 }, { "epoch": 2.276571834151535, "grad_norm": 0.9479984641075134, "learning_rate": 2.7030268430121566e-05, "loss": 0.0731, "step": 38490 }, { "epoch": 2.277163305139883, "grad_norm": 0.2380731999874115, "learning_rate": 2.7028551891439046e-05, "loss": 0.0777, "step": 38500 }, { "epoch": 2.277754776128231, "grad_norm": 0.15304678678512573, "learning_rate": 2.7026834911347434e-05, "loss": 0.0744, "step": 38510 }, { "epoch": 2.2783462471165787, "grad_norm": 0.3424718976020813, "learning_rate": 2.7025117489909724e-05, "loss": 0.0863, "step": 38520 }, { "epoch": 2.278937718104927, "grad_norm": 0.3185748755931854, "learning_rate": 2.7023399627188948e-05, "loss": 0.0853, "step": 38530 }, { "epoch": 2.279529189093275, "grad_norm": 0.2563779652118683, "learning_rate": 2.702168132324814e-05, "loss": 0.0834, "step": 38540 }, { "epoch": 2.280120660081623, "grad_norm": 0.2809531092643738, "learning_rate": 2.7019962578150356e-05, "loss": 0.077, "step": 38550 }, { "epoch": 2.280712131069971, "grad_norm": 0.14969602227210999, "learning_rate": 2.7018243391958678e-05, "loss": 0.0613, "step": 38560 }, { "epoch": 2.281303602058319, "grad_norm": 0.2659991681575775, "learning_rate": 2.7016523764736186e-05, "loss": 0.071, "step": 38570 }, { "epoch": 2.281895073046667, "grad_norm": 0.3962456285953522, "learning_rate": 2.7014803696545983e-05, "loss": 0.0906, "step": 38580 }, { "epoch": 2.2824865440350153, "grad_norm": 0.23918688297271729, "learning_rate": 2.7013083187451197e-05, "loss": 0.0822, "step": 38590 }, { "epoch": 2.283078015023363, "grad_norm": 0.21186165511608124, "learning_rate": 2.701136223751496e-05, "loss": 0.0672, "step": 38600 }, { "epoch": 2.283669486011711, "grad_norm": 0.18772625923156738, "learning_rate": 2.700964084680043e-05, "loss": 0.0651, "step": 38610 }, { "epoch": 2.2842609570000594, "grad_norm": 0.21623879671096802, "learning_rate": 2.700791901537077e-05, "loss": 0.0728, "step": 38620 }, { "epoch": 2.2848524279884073, "grad_norm": 0.15799367427825928, "learning_rate": 2.700619674328917e-05, "loss": 0.0853, "step": 38630 }, { "epoch": 2.285443898976755, "grad_norm": 0.3478884696960449, "learning_rate": 2.7004474030618836e-05, "loss": 0.0833, "step": 38640 }, { "epoch": 2.286035369965103, "grad_norm": 0.27201223373413086, "learning_rate": 2.7002750877422977e-05, "loss": 0.0746, "step": 38650 }, { "epoch": 2.2866268409534514, "grad_norm": 0.19965794682502747, "learning_rate": 2.700102728376483e-05, "loss": 0.0561, "step": 38660 }, { "epoch": 2.2872183119417993, "grad_norm": 0.25584715604782104, "learning_rate": 2.699930324970765e-05, "loss": 0.0825, "step": 38670 }, { "epoch": 2.287809782930147, "grad_norm": 0.4808101952075958, "learning_rate": 2.6997578775314695e-05, "loss": 0.082, "step": 38680 }, { "epoch": 2.2884012539184955, "grad_norm": 0.3358575403690338, "learning_rate": 2.6995853860649258e-05, "loss": 0.0848, "step": 38690 }, { "epoch": 2.2889927249068434, "grad_norm": 0.7943382859230042, "learning_rate": 2.699412850577463e-05, "loss": 0.0681, "step": 38700 }, { "epoch": 2.2895841958951912, "grad_norm": 0.24317410588264465, "learning_rate": 2.699240271075413e-05, "loss": 0.0602, "step": 38710 }, { "epoch": 2.290175666883539, "grad_norm": 0.313153475522995, "learning_rate": 2.6990676475651087e-05, "loss": 0.0767, "step": 38720 }, { "epoch": 2.2907671378718875, "grad_norm": 0.2307540476322174, "learning_rate": 2.6988949800528845e-05, "loss": 0.0718, "step": 38730 }, { "epoch": 2.2913586088602353, "grad_norm": 0.169331893324852, "learning_rate": 2.698722268545078e-05, "loss": 0.0744, "step": 38740 }, { "epoch": 2.2919500798485837, "grad_norm": 0.18648645281791687, "learning_rate": 2.6985495130480256e-05, "loss": 0.0608, "step": 38750 }, { "epoch": 2.2925415508369316, "grad_norm": 0.25810492038726807, "learning_rate": 2.698376713568068e-05, "loss": 0.0577, "step": 38760 }, { "epoch": 2.2931330218252794, "grad_norm": 0.5534600019454956, "learning_rate": 2.6982038701115456e-05, "loss": 0.0792, "step": 38770 }, { "epoch": 2.2937244928136273, "grad_norm": 0.21694576740264893, "learning_rate": 2.6980309826848018e-05, "loss": 0.0735, "step": 38780 }, { "epoch": 2.2943159638019757, "grad_norm": 0.28000885248184204, "learning_rate": 2.6978580512941802e-05, "loss": 0.075, "step": 38790 }, { "epoch": 2.2949074347903236, "grad_norm": 0.20739790797233582, "learning_rate": 2.697685075946028e-05, "loss": 0.0852, "step": 38800 }, { "epoch": 2.2954989057786714, "grad_norm": 0.3235076069831848, "learning_rate": 2.697512056646692e-05, "loss": 0.0579, "step": 38810 }, { "epoch": 2.2960903767670198, "grad_norm": 0.27575254440307617, "learning_rate": 2.6973389934025218e-05, "loss": 0.0754, "step": 38820 }, { "epoch": 2.2966818477553677, "grad_norm": 0.2264142483472824, "learning_rate": 2.697165886219868e-05, "loss": 0.0886, "step": 38830 }, { "epoch": 2.2972733187437155, "grad_norm": 0.4441242516040802, "learning_rate": 2.696992735105083e-05, "loss": 0.0836, "step": 38840 }, { "epoch": 2.2978647897320634, "grad_norm": 0.347817599773407, "learning_rate": 2.6968195400645215e-05, "loss": 0.0656, "step": 38850 }, { "epoch": 2.2984562607204118, "grad_norm": 0.18984340131282806, "learning_rate": 2.6966463011045386e-05, "loss": 0.0662, "step": 38860 }, { "epoch": 2.2990477317087596, "grad_norm": 0.20624710619449615, "learning_rate": 2.696473018231492e-05, "loss": 0.0745, "step": 38870 }, { "epoch": 2.2996392026971075, "grad_norm": 0.3297380805015564, "learning_rate": 2.69629969145174e-05, "loss": 0.0863, "step": 38880 }, { "epoch": 2.300230673685456, "grad_norm": 0.14531248807907104, "learning_rate": 2.6961263207716437e-05, "loss": 0.0779, "step": 38890 }, { "epoch": 2.3008221446738037, "grad_norm": 0.24709321558475494, "learning_rate": 2.6959529061975653e-05, "loss": 0.0704, "step": 38900 }, { "epoch": 2.3014136156621516, "grad_norm": 0.24293270707130432, "learning_rate": 2.6957794477358684e-05, "loss": 0.0688, "step": 38910 }, { "epoch": 2.3020050866505, "grad_norm": 0.32211834192276, "learning_rate": 2.6956059453929184e-05, "loss": 0.0809, "step": 38920 }, { "epoch": 2.302596557638848, "grad_norm": 0.3136073052883148, "learning_rate": 2.6954323991750817e-05, "loss": 0.0771, "step": 38930 }, { "epoch": 2.3031880286271957, "grad_norm": 0.27235642075538635, "learning_rate": 2.695258809088728e-05, "loss": 0.0944, "step": 38940 }, { "epoch": 2.303779499615544, "grad_norm": 0.16962893307209015, "learning_rate": 2.6950851751402266e-05, "loss": 0.0771, "step": 38950 }, { "epoch": 2.304370970603892, "grad_norm": 0.20865239202976227, "learning_rate": 2.6949114973359496e-05, "loss": 0.0601, "step": 38960 }, { "epoch": 2.30496244159224, "grad_norm": 0.21170762181282043, "learning_rate": 2.6947377756822708e-05, "loss": 0.0757, "step": 38970 }, { "epoch": 2.3055539125805877, "grad_norm": 0.3089379370212555, "learning_rate": 2.6945640101855645e-05, "loss": 0.0837, "step": 38980 }, { "epoch": 2.306145383568936, "grad_norm": 0.2679968476295471, "learning_rate": 2.6943902008522073e-05, "loss": 0.0797, "step": 38990 }, { "epoch": 2.306736854557284, "grad_norm": 0.30713382363319397, "learning_rate": 2.6942163476885783e-05, "loss": 0.0848, "step": 39000 }, { "epoch": 2.307328325545632, "grad_norm": 0.18123817443847656, "learning_rate": 2.6940424507010567e-05, "loss": 0.0662, "step": 39010 }, { "epoch": 2.30791979653398, "grad_norm": 0.23424990475177765, "learning_rate": 2.693868509896024e-05, "loss": 0.0766, "step": 39020 }, { "epoch": 2.308511267522328, "grad_norm": 0.2148856669664383, "learning_rate": 2.693694525279864e-05, "loss": 0.081, "step": 39030 }, { "epoch": 2.309102738510676, "grad_norm": 0.23139645159244537, "learning_rate": 2.69352049685896e-05, "loss": 0.0792, "step": 39040 }, { "epoch": 2.3096942094990243, "grad_norm": 0.44708383083343506, "learning_rate": 2.6933464246396996e-05, "loss": 0.0796, "step": 39050 }, { "epoch": 2.310285680487372, "grad_norm": 0.17485760152339935, "learning_rate": 2.69317230862847e-05, "loss": 0.0558, "step": 39060 }, { "epoch": 2.31087715147572, "grad_norm": 0.3691437542438507, "learning_rate": 2.6929981488316608e-05, "loss": 0.0693, "step": 39070 }, { "epoch": 2.3114686224640684, "grad_norm": 0.24708475172519684, "learning_rate": 2.6928239452556633e-05, "loss": 0.0792, "step": 39080 }, { "epoch": 2.3120600934524163, "grad_norm": 0.3004644811153412, "learning_rate": 2.6926496979068697e-05, "loss": 0.0851, "step": 39090 }, { "epoch": 2.312651564440764, "grad_norm": 0.225742906332016, "learning_rate": 2.692475406791675e-05, "loss": 0.0774, "step": 39100 }, { "epoch": 2.313243035429112, "grad_norm": 0.2543518543243408, "learning_rate": 2.6923010719164742e-05, "loss": 0.0601, "step": 39110 }, { "epoch": 2.3138345064174604, "grad_norm": 0.24059675633907318, "learning_rate": 2.6921266932876663e-05, "loss": 0.0737, "step": 39120 }, { "epoch": 2.3144259774058082, "grad_norm": 0.3145280182361603, "learning_rate": 2.691952270911649e-05, "loss": 0.0735, "step": 39130 }, { "epoch": 2.315017448394156, "grad_norm": 2.8728466033935547, "learning_rate": 2.6917778047948236e-05, "loss": 0.0878, "step": 39140 }, { "epoch": 2.3156089193825045, "grad_norm": 0.26752394437789917, "learning_rate": 2.6916032949435926e-05, "loss": 0.0638, "step": 39150 }, { "epoch": 2.3162003903708523, "grad_norm": 0.25884559750556946, "learning_rate": 2.69142874136436e-05, "loss": 0.0688, "step": 39160 }, { "epoch": 2.3167918613592002, "grad_norm": 0.20620672404766083, "learning_rate": 2.6912541440635307e-05, "loss": 0.0831, "step": 39170 }, { "epoch": 2.317383332347548, "grad_norm": 0.2911711037158966, "learning_rate": 2.6910795030475124e-05, "loss": 0.0905, "step": 39180 }, { "epoch": 2.3179748033358964, "grad_norm": 0.16319561004638672, "learning_rate": 2.6909048183227135e-05, "loss": 0.0716, "step": 39190 }, { "epoch": 2.3185662743242443, "grad_norm": 0.3008088767528534, "learning_rate": 2.6907300898955453e-05, "loss": 0.065, "step": 39200 }, { "epoch": 2.3191577453125927, "grad_norm": 0.6821818351745605, "learning_rate": 2.690555317772419e-05, "loss": 0.0566, "step": 39210 }, { "epoch": 2.3197492163009406, "grad_norm": 0.2608467936515808, "learning_rate": 2.6903805019597478e-05, "loss": 0.0763, "step": 39220 }, { "epoch": 2.3203406872892884, "grad_norm": 0.22508038580417633, "learning_rate": 2.6902056424639477e-05, "loss": 0.078, "step": 39230 }, { "epoch": 2.3209321582776363, "grad_norm": 0.1956833153963089, "learning_rate": 2.6900307392914352e-05, "loss": 0.0756, "step": 39240 }, { "epoch": 2.3215236292659847, "grad_norm": 0.1783047467470169, "learning_rate": 2.689855792448628e-05, "loss": 0.0693, "step": 39250 }, { "epoch": 2.3221151002543325, "grad_norm": 0.2220461368560791, "learning_rate": 2.6896808019419472e-05, "loss": 0.0672, "step": 39260 }, { "epoch": 2.3227065712426804, "grad_norm": 0.32944241166114807, "learning_rate": 2.6895057677778138e-05, "loss": 0.0747, "step": 39270 }, { "epoch": 2.3232980422310288, "grad_norm": 0.24663564562797546, "learning_rate": 2.6893306899626512e-05, "loss": 0.081, "step": 39280 }, { "epoch": 2.3238895132193766, "grad_norm": 0.3418060541152954, "learning_rate": 2.6891555685028842e-05, "loss": 0.0703, "step": 39290 }, { "epoch": 2.3244809842077245, "grad_norm": 0.3519322872161865, "learning_rate": 2.6889804034049385e-05, "loss": 0.0668, "step": 39300 }, { "epoch": 2.3250724551960724, "grad_norm": 0.26106688380241394, "learning_rate": 2.688805194675243e-05, "loss": 0.0635, "step": 39310 }, { "epoch": 2.3256639261844207, "grad_norm": 0.3929954469203949, "learning_rate": 2.688629942320227e-05, "loss": 0.0797, "step": 39320 }, { "epoch": 2.3262553971727686, "grad_norm": 0.28056490421295166, "learning_rate": 2.6884546463463217e-05, "loss": 0.09, "step": 39330 }, { "epoch": 2.3268468681611165, "grad_norm": 0.23964697122573853, "learning_rate": 2.6882793067599596e-05, "loss": 0.0665, "step": 39340 }, { "epoch": 2.327438339149465, "grad_norm": 0.6039820313453674, "learning_rate": 2.6881039235675753e-05, "loss": 0.0652, "step": 39350 }, { "epoch": 2.3280298101378127, "grad_norm": 1.001520037651062, "learning_rate": 2.6879284967756052e-05, "loss": 0.0638, "step": 39360 }, { "epoch": 2.3286212811261606, "grad_norm": 0.2546440660953522, "learning_rate": 2.6877530263904865e-05, "loss": 0.0717, "step": 39370 }, { "epoch": 2.329212752114509, "grad_norm": 0.2555360794067383, "learning_rate": 2.687577512418658e-05, "loss": 0.0764, "step": 39380 }, { "epoch": 2.329804223102857, "grad_norm": 0.19859707355499268, "learning_rate": 2.687401954866561e-05, "loss": 0.0757, "step": 39390 }, { "epoch": 2.3303956940912047, "grad_norm": 0.2444983422756195, "learning_rate": 2.687226353740638e-05, "loss": 0.0701, "step": 39400 }, { "epoch": 2.330987165079553, "grad_norm": 0.20022106170654297, "learning_rate": 2.687050709047333e-05, "loss": 0.066, "step": 39410 }, { "epoch": 2.331578636067901, "grad_norm": 0.2595757842063904, "learning_rate": 2.6868750207930918e-05, "loss": 0.0698, "step": 39420 }, { "epoch": 2.332170107056249, "grad_norm": 0.27539733052253723, "learning_rate": 2.6866992889843602e-05, "loss": 0.0756, "step": 39430 }, { "epoch": 2.3327615780445967, "grad_norm": 0.23342947661876678, "learning_rate": 2.686523513627589e-05, "loss": 0.0765, "step": 39440 }, { "epoch": 2.333353049032945, "grad_norm": 0.27297982573509216, "learning_rate": 2.686347694729227e-05, "loss": 0.0701, "step": 39450 }, { "epoch": 2.333944520021293, "grad_norm": 0.27359870076179504, "learning_rate": 2.6861718322957268e-05, "loss": 0.0581, "step": 39460 }, { "epoch": 2.334535991009641, "grad_norm": 0.2839185297489166, "learning_rate": 2.685995926333542e-05, "loss": 0.0859, "step": 39470 }, { "epoch": 2.335127461997989, "grad_norm": 0.2226552963256836, "learning_rate": 2.6858199768491275e-05, "loss": 0.0819, "step": 39480 }, { "epoch": 2.335718932986337, "grad_norm": 0.23184406757354736, "learning_rate": 2.685643983848941e-05, "loss": 0.0926, "step": 39490 }, { "epoch": 2.336310403974685, "grad_norm": 0.24150829017162323, "learning_rate": 2.6854679473394396e-05, "loss": 0.0676, "step": 39500 }, { "epoch": 2.3369018749630333, "grad_norm": 0.18258993327617645, "learning_rate": 2.6852918673270844e-05, "loss": 0.0647, "step": 39510 }, { "epoch": 2.337493345951381, "grad_norm": 0.2630647122859955, "learning_rate": 2.685115743818336e-05, "loss": 0.0745, "step": 39520 }, { "epoch": 2.338084816939729, "grad_norm": 0.26644113659858704, "learning_rate": 2.6849395768196583e-05, "loss": 0.0781, "step": 39530 }, { "epoch": 2.3386762879280774, "grad_norm": 0.22776491940021515, "learning_rate": 2.6847633663375154e-05, "loss": 0.0835, "step": 39540 }, { "epoch": 2.3392677589164252, "grad_norm": 0.2757827639579773, "learning_rate": 2.6845871123783742e-05, "loss": 0.0632, "step": 39550 }, { "epoch": 2.339859229904773, "grad_norm": 0.30542564392089844, "learning_rate": 2.684410814948703e-05, "loss": 0.0543, "step": 39560 }, { "epoch": 2.340450700893121, "grad_norm": 0.33210325241088867, "learning_rate": 2.6842344740549704e-05, "loss": 0.0781, "step": 39570 }, { "epoch": 2.3410421718814693, "grad_norm": 0.2950170934200287, "learning_rate": 2.6840580897036474e-05, "loss": 0.092, "step": 39580 }, { "epoch": 2.3416336428698172, "grad_norm": 0.23428918421268463, "learning_rate": 2.683881661901208e-05, "loss": 0.0733, "step": 39590 }, { "epoch": 2.342225113858165, "grad_norm": 0.3115044832229614, "learning_rate": 2.683705190654126e-05, "loss": 0.0801, "step": 39600 }, { "epoch": 2.3428165848465135, "grad_norm": 0.27959397435188293, "learning_rate": 2.683528675968877e-05, "loss": 0.0613, "step": 39610 }, { "epoch": 2.3434080558348613, "grad_norm": 0.23413562774658203, "learning_rate": 2.6833521178519384e-05, "loss": 0.0697, "step": 39620 }, { "epoch": 2.3439995268232092, "grad_norm": 0.2046659141778946, "learning_rate": 2.6831755163097898e-05, "loss": 0.0772, "step": 39630 }, { "epoch": 2.344590997811557, "grad_norm": 0.2535099685192108, "learning_rate": 2.682998871348912e-05, "loss": 0.0766, "step": 39640 }, { "epoch": 2.3451824687999054, "grad_norm": 0.4044240713119507, "learning_rate": 2.6828221829757862e-05, "loss": 0.0699, "step": 39650 }, { "epoch": 2.3457739397882533, "grad_norm": 0.24525873363018036, "learning_rate": 2.682645451196898e-05, "loss": 0.0657, "step": 39660 }, { "epoch": 2.3463654107766017, "grad_norm": 0.2785421907901764, "learning_rate": 2.6824686760187312e-05, "loss": 0.0773, "step": 39670 }, { "epoch": 2.3469568817649495, "grad_norm": 0.4772386848926544, "learning_rate": 2.682291857447774e-05, "loss": 0.0828, "step": 39680 }, { "epoch": 2.3475483527532974, "grad_norm": 0.20973297953605652, "learning_rate": 2.682114995490515e-05, "loss": 0.0784, "step": 39690 }, { "epoch": 2.3481398237416453, "grad_norm": 0.2743854820728302, "learning_rate": 2.6819380901534437e-05, "loss": 0.0746, "step": 39700 }, { "epoch": 2.3487312947299936, "grad_norm": 0.2579994201660156, "learning_rate": 2.681761141443053e-05, "loss": 0.0632, "step": 39710 }, { "epoch": 2.3493227657183415, "grad_norm": 0.27801713347435, "learning_rate": 2.6815841493658358e-05, "loss": 0.0749, "step": 39720 }, { "epoch": 2.3499142367066894, "grad_norm": 0.3164948523044586, "learning_rate": 2.681407113928287e-05, "loss": 0.0852, "step": 39730 }, { "epoch": 2.3505057076950377, "grad_norm": 0.34830763936042786, "learning_rate": 2.6812300351369027e-05, "loss": 0.0793, "step": 39740 }, { "epoch": 2.3510971786833856, "grad_norm": 0.13908717036247253, "learning_rate": 2.681052912998183e-05, "loss": 0.0688, "step": 39750 }, { "epoch": 2.3516886496717335, "grad_norm": 0.28207018971443176, "learning_rate": 2.6808757475186255e-05, "loss": 0.0653, "step": 39760 }, { "epoch": 2.3522801206600814, "grad_norm": 0.27666306495666504, "learning_rate": 2.6806985387047333e-05, "loss": 0.0776, "step": 39770 }, { "epoch": 2.3528715916484297, "grad_norm": 0.20678985118865967, "learning_rate": 2.6805212865630082e-05, "loss": 0.0906, "step": 39780 }, { "epoch": 2.3534630626367776, "grad_norm": 0.25511568784713745, "learning_rate": 2.6803439910999556e-05, "loss": 0.0785, "step": 39790 }, { "epoch": 2.3540545336251255, "grad_norm": 0.17124611139297485, "learning_rate": 2.6801666523220815e-05, "loss": 0.0697, "step": 39800 }, { "epoch": 2.354646004613474, "grad_norm": 0.18933486938476562, "learning_rate": 2.6799892702358933e-05, "loss": 0.0662, "step": 39810 }, { "epoch": 2.3552374756018217, "grad_norm": 0.7272133231163025, "learning_rate": 2.6798118448479005e-05, "loss": 0.0694, "step": 39820 }, { "epoch": 2.3558289465901696, "grad_norm": 0.2953447103500366, "learning_rate": 2.679634376164614e-05, "loss": 0.079, "step": 39830 }, { "epoch": 2.356420417578518, "grad_norm": 0.27742907404899597, "learning_rate": 2.679456864192547e-05, "loss": 0.0769, "step": 39840 }, { "epoch": 2.357011888566866, "grad_norm": 0.4949093461036682, "learning_rate": 2.6792793089382125e-05, "loss": 0.083, "step": 39850 }, { "epoch": 2.3576033595552137, "grad_norm": 0.2878926396369934, "learning_rate": 2.6791017104081272e-05, "loss": 0.057, "step": 39860 }, { "epoch": 2.358194830543562, "grad_norm": 0.24683928489685059, "learning_rate": 2.6789240686088075e-05, "loss": 0.0724, "step": 39870 }, { "epoch": 2.35878630153191, "grad_norm": 0.30309173464775085, "learning_rate": 2.6787463835467728e-05, "loss": 0.0852, "step": 39880 }, { "epoch": 2.359377772520258, "grad_norm": 0.23982690274715424, "learning_rate": 2.6785686552285436e-05, "loss": 0.0722, "step": 39890 }, { "epoch": 2.3599692435086057, "grad_norm": 0.17430318892002106, "learning_rate": 2.678390883660642e-05, "loss": 0.0755, "step": 39900 }, { "epoch": 2.360560714496954, "grad_norm": 0.2135031372308731, "learning_rate": 2.6782130688495916e-05, "loss": 0.0615, "step": 39910 }, { "epoch": 2.361152185485302, "grad_norm": 0.2644331753253937, "learning_rate": 2.6780352108019178e-05, "loss": 0.0815, "step": 39920 }, { "epoch": 2.36174365647365, "grad_norm": 0.2282927930355072, "learning_rate": 2.6778573095241466e-05, "loss": 0.0735, "step": 39930 }, { "epoch": 2.362335127461998, "grad_norm": 0.309355765581131, "learning_rate": 2.6776793650228073e-05, "loss": 0.0842, "step": 39940 }, { "epoch": 2.362926598450346, "grad_norm": 0.1253838986158371, "learning_rate": 2.6775013773044293e-05, "loss": 0.0724, "step": 39950 }, { "epoch": 2.363518069438694, "grad_norm": 0.257951021194458, "learning_rate": 2.6773233463755445e-05, "loss": 0.0677, "step": 39960 }, { "epoch": 2.3641095404270422, "grad_norm": 0.23855766654014587, "learning_rate": 2.677145272242686e-05, "loss": 0.0771, "step": 39970 }, { "epoch": 2.36470101141539, "grad_norm": 0.28499796986579895, "learning_rate": 2.6769671549123887e-05, "loss": 0.0868, "step": 39980 }, { "epoch": 2.365292482403738, "grad_norm": 0.2676307260990143, "learning_rate": 2.6767889943911888e-05, "loss": 0.0818, "step": 39990 }, { "epoch": 2.3658839533920863, "grad_norm": 0.2943953573703766, "learning_rate": 2.676610790685624e-05, "loss": 0.0791, "step": 40000 }, { "epoch": 2.3664754243804342, "grad_norm": 0.2631187438964844, "learning_rate": 2.676432543802234e-05, "loss": 0.062, "step": 40010 }, { "epoch": 2.367066895368782, "grad_norm": 0.28907299041748047, "learning_rate": 2.6762542537475597e-05, "loss": 0.0772, "step": 40020 }, { "epoch": 2.36765836635713, "grad_norm": 0.16524738073349, "learning_rate": 2.676075920528144e-05, "loss": 0.0778, "step": 40030 }, { "epoch": 2.3682498373454783, "grad_norm": 0.2779484987258911, "learning_rate": 2.6758975441505313e-05, "loss": 0.0744, "step": 40040 }, { "epoch": 2.3688413083338262, "grad_norm": 0.3103736639022827, "learning_rate": 2.675719124621267e-05, "loss": 0.0698, "step": 40050 }, { "epoch": 2.369432779322174, "grad_norm": 0.46230778098106384, "learning_rate": 2.6755406619468992e-05, "loss": 0.0691, "step": 40060 }, { "epoch": 2.3700242503105224, "grad_norm": 0.3021988570690155, "learning_rate": 2.6753621561339764e-05, "loss": 0.0663, "step": 40070 }, { "epoch": 2.3706157212988703, "grad_norm": 0.36752405762672424, "learning_rate": 2.675183607189049e-05, "loss": 0.0795, "step": 40080 }, { "epoch": 2.371207192287218, "grad_norm": 0.24246838688850403, "learning_rate": 2.6750050151186694e-05, "loss": 0.0795, "step": 40090 }, { "epoch": 2.371798663275566, "grad_norm": 0.1745341718196869, "learning_rate": 2.6748263799293914e-05, "loss": 0.0646, "step": 40100 }, { "epoch": 2.3723901342639144, "grad_norm": 0.19389665126800537, "learning_rate": 2.6746477016277702e-05, "loss": 0.0713, "step": 40110 }, { "epoch": 2.3729816052522623, "grad_norm": 0.2702147364616394, "learning_rate": 2.6744689802203632e-05, "loss": 0.0746, "step": 40120 }, { "epoch": 2.3735730762406106, "grad_norm": 0.30269303917884827, "learning_rate": 2.674290215713728e-05, "loss": 0.0809, "step": 40130 }, { "epoch": 2.3741645472289585, "grad_norm": 0.25317466259002686, "learning_rate": 2.6741114081144258e-05, "loss": 0.0773, "step": 40140 }, { "epoch": 2.3747560182173064, "grad_norm": 0.1674255132675171, "learning_rate": 2.6739325574290173e-05, "loss": 0.0769, "step": 40150 }, { "epoch": 2.3753474892056543, "grad_norm": 0.1775890439748764, "learning_rate": 2.6737536636640664e-05, "loss": 0.0659, "step": 40160 }, { "epoch": 2.3759389601940026, "grad_norm": 0.21198303997516632, "learning_rate": 2.6735747268261376e-05, "loss": 0.075, "step": 40170 }, { "epoch": 2.3765304311823505, "grad_norm": 0.1887165606021881, "learning_rate": 2.673395746921797e-05, "loss": 0.0819, "step": 40180 }, { "epoch": 2.3771219021706984, "grad_norm": 0.23739440739154816, "learning_rate": 2.6732167239576135e-05, "loss": 0.0765, "step": 40190 }, { "epoch": 2.3777133731590467, "grad_norm": 0.19740553200244904, "learning_rate": 2.6730376579401556e-05, "loss": 0.07, "step": 40200 }, { "epoch": 2.3783048441473946, "grad_norm": 0.23151017725467682, "learning_rate": 2.672858548875995e-05, "loss": 0.0582, "step": 40210 }, { "epoch": 2.3788963151357425, "grad_norm": 0.3677885830402374, "learning_rate": 2.6726793967717047e-05, "loss": 0.0801, "step": 40220 }, { "epoch": 2.3794877861240904, "grad_norm": 0.23326925933361053, "learning_rate": 2.6725002016338586e-05, "loss": 0.0844, "step": 40230 }, { "epoch": 2.3800792571124387, "grad_norm": 0.2128097116947174, "learning_rate": 2.6723209634690325e-05, "loss": 0.0689, "step": 40240 }, { "epoch": 2.3806707281007866, "grad_norm": 0.19877108931541443, "learning_rate": 2.672141682283804e-05, "loss": 0.0739, "step": 40250 }, { "epoch": 2.3812621990891345, "grad_norm": 0.37866899371147156, "learning_rate": 2.6719623580847525e-05, "loss": 0.0624, "step": 40260 }, { "epoch": 2.381853670077483, "grad_norm": 0.2981448769569397, "learning_rate": 2.671782990878458e-05, "loss": 0.0732, "step": 40270 }, { "epoch": 2.3824451410658307, "grad_norm": 0.2802310883998871, "learning_rate": 2.6716035806715028e-05, "loss": 0.0693, "step": 40280 }, { "epoch": 2.3830366120541786, "grad_norm": 0.3318893015384674, "learning_rate": 2.6714241274704712e-05, "loss": 0.0716, "step": 40290 }, { "epoch": 2.383628083042527, "grad_norm": 0.31890690326690674, "learning_rate": 2.6712446312819484e-05, "loss": 0.0767, "step": 40300 }, { "epoch": 2.384219554030875, "grad_norm": 0.1615903675556183, "learning_rate": 2.6710650921125208e-05, "loss": 0.0612, "step": 40310 }, { "epoch": 2.3848110250192227, "grad_norm": 0.22492802143096924, "learning_rate": 2.6708855099687773e-05, "loss": 0.0708, "step": 40320 }, { "epoch": 2.385402496007571, "grad_norm": 1.8180094957351685, "learning_rate": 2.670705884857308e-05, "loss": 0.0828, "step": 40330 }, { "epoch": 2.385993966995919, "grad_norm": 0.2425127625465393, "learning_rate": 2.6705262167847045e-05, "loss": 0.0833, "step": 40340 }, { "epoch": 2.386585437984267, "grad_norm": 0.18908506631851196, "learning_rate": 2.67034650575756e-05, "loss": 0.0674, "step": 40350 }, { "epoch": 2.3871769089726147, "grad_norm": 0.3616631329059601, "learning_rate": 2.6701667517824694e-05, "loss": 0.0631, "step": 40360 }, { "epoch": 2.387768379960963, "grad_norm": 0.6358438730239868, "learning_rate": 2.6699869548660293e-05, "loss": 0.0801, "step": 40370 }, { "epoch": 2.388359850949311, "grad_norm": 0.33529290556907654, "learning_rate": 2.6698071150148372e-05, "loss": 0.0767, "step": 40380 }, { "epoch": 2.388951321937659, "grad_norm": 0.24547825753688812, "learning_rate": 2.6696272322354926e-05, "loss": 0.0721, "step": 40390 }, { "epoch": 2.389542792926007, "grad_norm": 0.3144250214099884, "learning_rate": 2.6694473065345976e-05, "loss": 0.0844, "step": 40400 }, { "epoch": 2.390134263914355, "grad_norm": 0.2691476345062256, "learning_rate": 2.6692673379187535e-05, "loss": 0.0639, "step": 40410 }, { "epoch": 2.390725734902703, "grad_norm": 0.24730069935321808, "learning_rate": 2.6690873263945653e-05, "loss": 0.0783, "step": 40420 }, { "epoch": 2.3913172058910512, "grad_norm": 0.3023394048213959, "learning_rate": 2.668907271968639e-05, "loss": 0.0743, "step": 40430 }, { "epoch": 2.391908676879399, "grad_norm": 0.26464733481407166, "learning_rate": 2.6687271746475817e-05, "loss": 0.0729, "step": 40440 }, { "epoch": 2.392500147867747, "grad_norm": 0.27252933382987976, "learning_rate": 2.668547034438003e-05, "loss": 0.067, "step": 40450 }, { "epoch": 2.3930916188560953, "grad_norm": 0.23775574564933777, "learning_rate": 2.668366851346512e-05, "loss": 0.0629, "step": 40460 }, { "epoch": 2.3936830898444432, "grad_norm": 0.3005264401435852, "learning_rate": 2.6681866253797227e-05, "loss": 0.0866, "step": 40470 }, { "epoch": 2.394274560832791, "grad_norm": 0.2925126850605011, "learning_rate": 2.6680063565442476e-05, "loss": 0.0865, "step": 40480 }, { "epoch": 2.394866031821139, "grad_norm": 0.20050930976867676, "learning_rate": 2.667826044846702e-05, "loss": 0.0784, "step": 40490 }, { "epoch": 2.3954575028094873, "grad_norm": 0.1928286999464035, "learning_rate": 2.667645690293703e-05, "loss": 0.0744, "step": 40500 }, { "epoch": 2.396048973797835, "grad_norm": 0.26381656527519226, "learning_rate": 2.6674652928918698e-05, "loss": 0.0716, "step": 40510 }, { "epoch": 2.396640444786183, "grad_norm": 0.2953284680843353, "learning_rate": 2.6672848526478215e-05, "loss": 0.078, "step": 40520 }, { "epoch": 2.3972319157745314, "grad_norm": 0.44961458444595337, "learning_rate": 2.6671043695681794e-05, "loss": 0.0774, "step": 40530 }, { "epoch": 2.3978233867628793, "grad_norm": 0.2578798830509186, "learning_rate": 2.6669238436595675e-05, "loss": 0.0737, "step": 40540 }, { "epoch": 2.398414857751227, "grad_norm": 0.28383195400238037, "learning_rate": 2.66674327492861e-05, "loss": 0.072, "step": 40550 }, { "epoch": 2.399006328739575, "grad_norm": 0.2621991038322449, "learning_rate": 2.6665626633819334e-05, "loss": 0.0607, "step": 40560 }, { "epoch": 2.3995977997279234, "grad_norm": 0.264138400554657, "learning_rate": 2.6663820090261655e-05, "loss": 0.0769, "step": 40570 }, { "epoch": 2.4001892707162713, "grad_norm": 0.2632836699485779, "learning_rate": 2.6662013118679354e-05, "loss": 0.0794, "step": 40580 }, { "epoch": 2.4007807417046196, "grad_norm": 0.30214470624923706, "learning_rate": 2.6660205719138748e-05, "loss": 0.0834, "step": 40590 }, { "epoch": 2.4013722126929675, "grad_norm": 0.298843115568161, "learning_rate": 2.665839789170616e-05, "loss": 0.084, "step": 40600 }, { "epoch": 2.4019636836813154, "grad_norm": 0.19669941067695618, "learning_rate": 2.6656589636447924e-05, "loss": 0.0661, "step": 40610 }, { "epoch": 2.4025551546696633, "grad_norm": 0.37783193588256836, "learning_rate": 2.665478095343041e-05, "loss": 0.0821, "step": 40620 }, { "epoch": 2.4031466256580116, "grad_norm": 0.30438145995140076, "learning_rate": 2.6652971842719982e-05, "loss": 0.0711, "step": 40630 }, { "epoch": 2.4037380966463595, "grad_norm": 0.2195587456226349, "learning_rate": 2.665116230438303e-05, "loss": 0.0716, "step": 40640 }, { "epoch": 2.4043295676347074, "grad_norm": 0.35212671756744385, "learning_rate": 2.6649352338485965e-05, "loss": 0.0765, "step": 40650 }, { "epoch": 2.4049210386230557, "grad_norm": 0.28146132826805115, "learning_rate": 2.6647541945095192e-05, "loss": 0.0704, "step": 40660 }, { "epoch": 2.4055125096114036, "grad_norm": 0.33408689498901367, "learning_rate": 2.664573112427716e-05, "loss": 0.081, "step": 40670 }, { "epoch": 2.4061039805997515, "grad_norm": 0.2580247223377228, "learning_rate": 2.6643919876098316e-05, "loss": 0.0882, "step": 40680 }, { "epoch": 2.4066954515880994, "grad_norm": 0.2352454960346222, "learning_rate": 2.664210820062513e-05, "loss": 0.0827, "step": 40690 }, { "epoch": 2.4072869225764477, "grad_norm": 0.23082613945007324, "learning_rate": 2.664029609792408e-05, "loss": 0.0706, "step": 40700 }, { "epoch": 2.4078783935647956, "grad_norm": 0.17350301146507263, "learning_rate": 2.6638483568061663e-05, "loss": 0.0665, "step": 40710 }, { "epoch": 2.4084698645531435, "grad_norm": 0.5501158237457275, "learning_rate": 2.6636670611104395e-05, "loss": 0.0817, "step": 40720 }, { "epoch": 2.409061335541492, "grad_norm": 0.3090137243270874, "learning_rate": 2.663485722711881e-05, "loss": 0.081, "step": 40730 }, { "epoch": 2.4096528065298397, "grad_norm": 0.36134880781173706, "learning_rate": 2.663304341617145e-05, "loss": 0.0892, "step": 40740 }, { "epoch": 2.4102442775181876, "grad_norm": 0.3425292670726776, "learning_rate": 2.663122917832887e-05, "loss": 0.0651, "step": 40750 }, { "epoch": 2.410835748506536, "grad_norm": 0.29257476329803467, "learning_rate": 2.6629414513657663e-05, "loss": 0.0531, "step": 40760 }, { "epoch": 2.411427219494884, "grad_norm": 0.3050496578216553, "learning_rate": 2.662759942222441e-05, "loss": 0.0786, "step": 40770 }, { "epoch": 2.4120186904832317, "grad_norm": 0.27878907322883606, "learning_rate": 2.6625783904095712e-05, "loss": 0.0794, "step": 40780 }, { "epoch": 2.41261016147158, "grad_norm": 0.21333123743534088, "learning_rate": 2.6623967959338204e-05, "loss": 0.0761, "step": 40790 }, { "epoch": 2.413201632459928, "grad_norm": 0.2691073715686798, "learning_rate": 2.6622151588018525e-05, "loss": 0.0722, "step": 40800 }, { "epoch": 2.413793103448276, "grad_norm": 0.2649717926979065, "learning_rate": 2.6620334790203324e-05, "loss": 0.0703, "step": 40810 }, { "epoch": 2.4143845744366237, "grad_norm": 0.3094264268875122, "learning_rate": 2.6618517565959276e-05, "loss": 0.0723, "step": 40820 }, { "epoch": 2.414976045424972, "grad_norm": 0.28360503911972046, "learning_rate": 2.6616699915353062e-05, "loss": 0.085, "step": 40830 }, { "epoch": 2.41556751641332, "grad_norm": 0.24577435851097107, "learning_rate": 2.6614881838451395e-05, "loss": 0.0823, "step": 40840 }, { "epoch": 2.416158987401668, "grad_norm": 0.2580578923225403, "learning_rate": 2.6613063335320977e-05, "loss": 0.0721, "step": 40850 }, { "epoch": 2.416750458390016, "grad_norm": 0.25996172428131104, "learning_rate": 2.6611244406028557e-05, "loss": 0.0711, "step": 40860 }, { "epoch": 2.417341929378364, "grad_norm": 0.8787683844566345, "learning_rate": 2.6609425050640872e-05, "loss": 0.0781, "step": 40870 }, { "epoch": 2.417933400366712, "grad_norm": 0.3903643786907196, "learning_rate": 2.6607605269224695e-05, "loss": 0.0773, "step": 40880 }, { "epoch": 2.4185248713550602, "grad_norm": 0.2588331699371338, "learning_rate": 2.66057850618468e-05, "loss": 0.0763, "step": 40890 }, { "epoch": 2.419116342343408, "grad_norm": 0.12824323773384094, "learning_rate": 2.6603964428573985e-05, "loss": 0.0723, "step": 40900 }, { "epoch": 2.419707813331756, "grad_norm": 0.20927371084690094, "learning_rate": 2.6602143369473056e-05, "loss": 0.0637, "step": 40910 }, { "epoch": 2.4202992843201043, "grad_norm": 0.33453768491744995, "learning_rate": 2.660032188461085e-05, "loss": 0.0865, "step": 40920 }, { "epoch": 2.420890755308452, "grad_norm": 0.2527134120464325, "learning_rate": 2.6598499974054208e-05, "loss": 0.0748, "step": 40930 }, { "epoch": 2.4214822262968, "grad_norm": 0.26128968596458435, "learning_rate": 2.6596677637869978e-05, "loss": 0.0713, "step": 40940 }, { "epoch": 2.422073697285148, "grad_norm": 0.21824052929878235, "learning_rate": 2.6594854876125045e-05, "loss": 0.0679, "step": 40950 }, { "epoch": 2.4226651682734963, "grad_norm": 0.27969852089881897, "learning_rate": 2.659303168888629e-05, "loss": 0.0694, "step": 40960 }, { "epoch": 2.423256639261844, "grad_norm": 0.2627663016319275, "learning_rate": 2.659120807622063e-05, "loss": 0.0757, "step": 40970 }, { "epoch": 2.423848110250192, "grad_norm": 0.30139273405075073, "learning_rate": 2.6589384038194975e-05, "loss": 0.077, "step": 40980 }, { "epoch": 2.4244395812385404, "grad_norm": 0.26936107873916626, "learning_rate": 2.658755957487626e-05, "loss": 0.0725, "step": 40990 }, { "epoch": 2.4250310522268883, "grad_norm": 0.3026929497718811, "learning_rate": 2.658573468633145e-05, "loss": 0.0739, "step": 41000 }, { "epoch": 2.425622523215236, "grad_norm": 0.3338640034198761, "learning_rate": 2.6583909372627496e-05, "loss": 0.0647, "step": 41010 }, { "epoch": 2.426213994203584, "grad_norm": 0.23431776463985443, "learning_rate": 2.6582083633831394e-05, "loss": 0.0808, "step": 41020 }, { "epoch": 2.4268054651919324, "grad_norm": 0.21546149253845215, "learning_rate": 2.6580257470010135e-05, "loss": 0.0847, "step": 41030 }, { "epoch": 2.4273969361802803, "grad_norm": 0.3444022238254547, "learning_rate": 2.657843088123074e-05, "loss": 0.083, "step": 41040 }, { "epoch": 2.4279884071686286, "grad_norm": 0.21109916269779205, "learning_rate": 2.6576603867560232e-05, "loss": 0.07, "step": 41050 }, { "epoch": 2.4285798781569765, "grad_norm": 0.2264404445886612, "learning_rate": 2.657477642906566e-05, "loss": 0.0609, "step": 41060 }, { "epoch": 2.4291713491453244, "grad_norm": 0.2523472309112549, "learning_rate": 2.657294856581408e-05, "loss": 0.0703, "step": 41070 }, { "epoch": 2.4297628201336723, "grad_norm": 0.28377190232276917, "learning_rate": 2.657112027787258e-05, "loss": 0.0838, "step": 41080 }, { "epoch": 2.4303542911220206, "grad_norm": 0.2032395899295807, "learning_rate": 2.6569291565308243e-05, "loss": 0.0772, "step": 41090 }, { "epoch": 2.4309457621103685, "grad_norm": 0.21096746623516083, "learning_rate": 2.656746242818818e-05, "loss": 0.0706, "step": 41100 }, { "epoch": 2.4315372330987164, "grad_norm": 0.1712249517440796, "learning_rate": 2.6565632866579516e-05, "loss": 0.0612, "step": 41110 }, { "epoch": 2.4321287040870647, "grad_norm": 0.304663747549057, "learning_rate": 2.6563802880549387e-05, "loss": 0.0786, "step": 41120 }, { "epoch": 2.4327201750754126, "grad_norm": 0.2172831892967224, "learning_rate": 2.6561972470164947e-05, "loss": 0.0828, "step": 41130 }, { "epoch": 2.4333116460637605, "grad_norm": 0.23229072988033295, "learning_rate": 2.6560141635493366e-05, "loss": 0.0742, "step": 41140 }, { "epoch": 2.4339031170521084, "grad_norm": 0.26561298966407776, "learning_rate": 2.655831037660184e-05, "loss": 0.0794, "step": 41150 }, { "epoch": 2.4344945880404567, "grad_norm": 0.2776474952697754, "learning_rate": 2.6556478693557552e-05, "loss": 0.0649, "step": 41160 }, { "epoch": 2.4350860590288046, "grad_norm": 0.2971775233745575, "learning_rate": 2.6554646586427734e-05, "loss": 0.0731, "step": 41170 }, { "epoch": 2.4356775300171525, "grad_norm": 0.20176908373832703, "learning_rate": 2.6552814055279612e-05, "loss": 0.071, "step": 41180 }, { "epoch": 2.436269001005501, "grad_norm": 0.2361716479063034, "learning_rate": 2.655098110018043e-05, "loss": 0.0748, "step": 41190 }, { "epoch": 2.4368604719938487, "grad_norm": 0.26726746559143066, "learning_rate": 2.6549147721197467e-05, "loss": 0.0844, "step": 41200 }, { "epoch": 2.4374519429821966, "grad_norm": 0.2654077708721161, "learning_rate": 2.6547313918397985e-05, "loss": 0.0697, "step": 41210 }, { "epoch": 2.438043413970545, "grad_norm": 0.2808907926082611, "learning_rate": 2.6545479691849282e-05, "loss": 0.0677, "step": 41220 }, { "epoch": 2.438634884958893, "grad_norm": 0.5860393047332764, "learning_rate": 2.654364504161868e-05, "loss": 0.0911, "step": 41230 }, { "epoch": 2.4392263559472407, "grad_norm": 0.2244025617837906, "learning_rate": 2.654180996777349e-05, "loss": 0.0743, "step": 41240 }, { "epoch": 2.439817826935589, "grad_norm": 0.1867539882659912, "learning_rate": 2.6539974470381064e-05, "loss": 0.0691, "step": 41250 }, { "epoch": 2.440409297923937, "grad_norm": 0.2897874414920807, "learning_rate": 2.6538138549508752e-05, "loss": 0.0541, "step": 41260 }, { "epoch": 2.441000768912285, "grad_norm": 0.35181358456611633, "learning_rate": 2.6536302205223927e-05, "loss": 0.0652, "step": 41270 }, { "epoch": 2.4415922399006327, "grad_norm": 0.19202551245689392, "learning_rate": 2.653446543759398e-05, "loss": 0.0842, "step": 41280 }, { "epoch": 2.442183710888981, "grad_norm": 0.3214339017868042, "learning_rate": 2.6532628246686315e-05, "loss": 0.0769, "step": 41290 }, { "epoch": 2.442775181877329, "grad_norm": 0.22841446101665497, "learning_rate": 2.653079063256835e-05, "loss": 0.0809, "step": 41300 }, { "epoch": 2.443366652865677, "grad_norm": 0.4121863842010498, "learning_rate": 2.6528952595307517e-05, "loss": 0.0667, "step": 41310 }, { "epoch": 2.443958123854025, "grad_norm": 0.26157647371292114, "learning_rate": 2.6527114134971265e-05, "loss": 0.0742, "step": 41320 }, { "epoch": 2.444549594842373, "grad_norm": 0.2401769608259201, "learning_rate": 2.6525275251627068e-05, "loss": 0.0789, "step": 41330 }, { "epoch": 2.445141065830721, "grad_norm": 0.3285743296146393, "learning_rate": 2.6523435945342396e-05, "loss": 0.0845, "step": 41340 }, { "epoch": 2.445732536819069, "grad_norm": 0.2883967459201813, "learning_rate": 2.6521596216184752e-05, "loss": 0.0801, "step": 41350 }, { "epoch": 2.446324007807417, "grad_norm": 0.29080405831336975, "learning_rate": 2.651975606422165e-05, "loss": 0.0536, "step": 41360 }, { "epoch": 2.446915478795765, "grad_norm": 0.32612553238868713, "learning_rate": 2.6517915489520618e-05, "loss": 0.0757, "step": 41370 }, { "epoch": 2.4475069497841133, "grad_norm": 0.17159679532051086, "learning_rate": 2.6516074492149186e-05, "loss": 0.0731, "step": 41380 }, { "epoch": 2.448098420772461, "grad_norm": 0.217193141579628, "learning_rate": 2.651423307217493e-05, "loss": 0.0783, "step": 41390 }, { "epoch": 2.448689891760809, "grad_norm": 0.32894301414489746, "learning_rate": 2.6512391229665417e-05, "loss": 0.0756, "step": 41400 }, { "epoch": 2.449281362749157, "grad_norm": 0.3127390742301941, "learning_rate": 2.6510548964688234e-05, "loss": 0.0675, "step": 41410 }, { "epoch": 2.4498728337375053, "grad_norm": 0.18390129506587982, "learning_rate": 2.6508706277310986e-05, "loss": 0.0744, "step": 41420 }, { "epoch": 2.450464304725853, "grad_norm": 0.2775457203388214, "learning_rate": 2.6506863167601302e-05, "loss": 0.0876, "step": 41430 }, { "epoch": 2.451055775714201, "grad_norm": 0.2800723910331726, "learning_rate": 2.650501963562681e-05, "loss": 0.0712, "step": 41440 }, { "epoch": 2.4516472467025494, "grad_norm": 0.24599936604499817, "learning_rate": 2.6503175681455162e-05, "loss": 0.0701, "step": 41450 }, { "epoch": 2.4522387176908973, "grad_norm": 0.209570050239563, "learning_rate": 2.6501331305154032e-05, "loss": 0.0625, "step": 41460 }, { "epoch": 2.452830188679245, "grad_norm": 0.21063558757305145, "learning_rate": 2.64994865067911e-05, "loss": 0.0791, "step": 41470 }, { "epoch": 2.453421659667593, "grad_norm": 0.25992661714553833, "learning_rate": 2.6497641286434055e-05, "loss": 0.0798, "step": 41480 }, { "epoch": 2.4540131306559414, "grad_norm": 0.2282334864139557, "learning_rate": 2.6495795644150624e-05, "loss": 0.0797, "step": 41490 }, { "epoch": 2.4546046016442893, "grad_norm": 0.2587922215461731, "learning_rate": 2.6493949580008526e-05, "loss": 0.0697, "step": 41500 }, { "epoch": 2.4551960726326376, "grad_norm": 0.28546807169914246, "learning_rate": 2.649210309407551e-05, "loss": 0.0627, "step": 41510 }, { "epoch": 2.4557875436209855, "grad_norm": 0.2837957739830017, "learning_rate": 2.649025618641934e-05, "loss": 0.0859, "step": 41520 }, { "epoch": 2.4563790146093334, "grad_norm": 0.4548133909702301, "learning_rate": 2.648840885710778e-05, "loss": 0.0929, "step": 41530 }, { "epoch": 2.4569704855976813, "grad_norm": 0.24580880999565125, "learning_rate": 2.6486561106208636e-05, "loss": 0.0777, "step": 41540 }, { "epoch": 2.4575619565860296, "grad_norm": 0.37814274430274963, "learning_rate": 2.6484712933789697e-05, "loss": 0.0872, "step": 41550 }, { "epoch": 2.4581534275743775, "grad_norm": 0.18447467684745789, "learning_rate": 2.6482864339918806e-05, "loss": 0.0648, "step": 41560 }, { "epoch": 2.4587448985627254, "grad_norm": 0.2727169394493103, "learning_rate": 2.6481015324663782e-05, "loss": 0.0706, "step": 41570 }, { "epoch": 2.4593363695510737, "grad_norm": 0.28988009691238403, "learning_rate": 2.647916588809249e-05, "loss": 0.078, "step": 41580 }, { "epoch": 2.4599278405394216, "grad_norm": 0.24181248247623444, "learning_rate": 2.647731603027279e-05, "loss": 0.0842, "step": 41590 }, { "epoch": 2.4605193115277695, "grad_norm": 0.3323458731174469, "learning_rate": 2.647546575127257e-05, "loss": 0.0605, "step": 41600 }, { "epoch": 2.4611107825161174, "grad_norm": 0.30886390805244446, "learning_rate": 2.6473615051159727e-05, "loss": 0.0747, "step": 41610 }, { "epoch": 2.4617022535044657, "grad_norm": 0.2473755031824112, "learning_rate": 2.647176393000218e-05, "loss": 0.0839, "step": 41620 }, { "epoch": 2.4622937244928136, "grad_norm": 0.2568899095058441, "learning_rate": 2.6469912387867854e-05, "loss": 0.0827, "step": 41630 }, { "epoch": 2.4628851954811615, "grad_norm": 0.34124791622161865, "learning_rate": 2.64680604248247e-05, "loss": 0.0794, "step": 41640 }, { "epoch": 2.46347666646951, "grad_norm": 0.17837944626808167, "learning_rate": 2.646620804094068e-05, "loss": 0.0704, "step": 41650 }, { "epoch": 2.4640681374578577, "grad_norm": 0.2092302292585373, "learning_rate": 2.6464355236283758e-05, "loss": 0.0694, "step": 41660 }, { "epoch": 2.4646596084462056, "grad_norm": 0.3000149428844452, "learning_rate": 2.646250201092194e-05, "loss": 0.0824, "step": 41670 }, { "epoch": 2.465251079434554, "grad_norm": 0.3462270498275757, "learning_rate": 2.6460648364923227e-05, "loss": 0.0771, "step": 41680 }, { "epoch": 2.465842550422902, "grad_norm": 0.312503457069397, "learning_rate": 2.645879429835564e-05, "loss": 0.0763, "step": 41690 }, { "epoch": 2.4664340214112497, "grad_norm": 0.200402170419693, "learning_rate": 2.6456939811287223e-05, "loss": 0.0637, "step": 41700 }, { "epoch": 2.467025492399598, "grad_norm": 0.26477888226509094, "learning_rate": 2.6455084903786028e-05, "loss": 0.0593, "step": 41710 }, { "epoch": 2.467616963387946, "grad_norm": 0.3068704605102539, "learning_rate": 2.6453229575920122e-05, "loss": 0.076, "step": 41720 }, { "epoch": 2.468208434376294, "grad_norm": 0.27245673537254333, "learning_rate": 2.6451373827757593e-05, "loss": 0.085, "step": 41730 }, { "epoch": 2.4687999053646417, "grad_norm": 0.4673888683319092, "learning_rate": 2.6449517659366534e-05, "loss": 0.071, "step": 41740 }, { "epoch": 2.46939137635299, "grad_norm": 0.2922459542751312, "learning_rate": 2.644766107081507e-05, "loss": 0.0782, "step": 41750 }, { "epoch": 2.469982847341338, "grad_norm": 0.16664950549602509, "learning_rate": 2.644580406217132e-05, "loss": 0.0575, "step": 41760 }, { "epoch": 2.4705743183296858, "grad_norm": 0.2511938214302063, "learning_rate": 2.6443946633503444e-05, "loss": 0.0723, "step": 41770 }, { "epoch": 2.471165789318034, "grad_norm": 0.3682669699192047, "learning_rate": 2.6442088784879593e-05, "loss": 0.0818, "step": 41780 }, { "epoch": 2.471757260306382, "grad_norm": 0.22270546853542328, "learning_rate": 2.6440230516367948e-05, "loss": 0.0728, "step": 41790 }, { "epoch": 2.47234873129473, "grad_norm": 0.2537131905555725, "learning_rate": 2.64383718280367e-05, "loss": 0.0835, "step": 41800 }, { "epoch": 2.472940202283078, "grad_norm": 0.22482994198799133, "learning_rate": 2.643651271995406e-05, "loss": 0.0727, "step": 41810 }, { "epoch": 2.473531673271426, "grad_norm": 0.2948078513145447, "learning_rate": 2.6434653192188253e-05, "loss": 0.0794, "step": 41820 }, { "epoch": 2.474123144259774, "grad_norm": 0.23529113829135895, "learning_rate": 2.6432793244807507e-05, "loss": 0.0846, "step": 41830 }, { "epoch": 2.4747146152481223, "grad_norm": 0.16720904409885406, "learning_rate": 2.6430932877880084e-05, "loss": 0.0671, "step": 41840 }, { "epoch": 2.47530608623647, "grad_norm": 0.2390902042388916, "learning_rate": 2.6429072091474257e-05, "loss": 0.0766, "step": 41850 }, { "epoch": 2.475897557224818, "grad_norm": 0.3140603303909302, "learning_rate": 2.6427210885658304e-05, "loss": 0.0674, "step": 41860 }, { "epoch": 2.476489028213166, "grad_norm": 0.24091054499149323, "learning_rate": 2.6425349260500527e-05, "loss": 0.0793, "step": 41870 }, { "epoch": 2.4770804992015143, "grad_norm": 0.2769945561885834, "learning_rate": 2.6423487216069245e-05, "loss": 0.0873, "step": 41880 }, { "epoch": 2.477671970189862, "grad_norm": 0.18219617009162903, "learning_rate": 2.6421624752432784e-05, "loss": 0.0824, "step": 41890 }, { "epoch": 2.47826344117821, "grad_norm": 0.25219178199768066, "learning_rate": 2.6419761869659495e-05, "loss": 0.0624, "step": 41900 }, { "epoch": 2.4788549121665584, "grad_norm": 0.23631954193115234, "learning_rate": 2.6417898567817736e-05, "loss": 0.0499, "step": 41910 }, { "epoch": 2.4794463831549063, "grad_norm": 0.31383800506591797, "learning_rate": 2.6416034846975887e-05, "loss": 0.0778, "step": 41920 }, { "epoch": 2.480037854143254, "grad_norm": 0.322535902261734, "learning_rate": 2.6414170707202336e-05, "loss": 0.0793, "step": 41930 }, { "epoch": 2.480629325131602, "grad_norm": 0.20464324951171875, "learning_rate": 2.6412306148565497e-05, "loss": 0.0758, "step": 41940 }, { "epoch": 2.4812207961199504, "grad_norm": 0.35691097378730774, "learning_rate": 2.641044117113379e-05, "loss": 0.0714, "step": 41950 }, { "epoch": 2.4818122671082983, "grad_norm": 0.28027620911598206, "learning_rate": 2.6408575774975653e-05, "loss": 0.0643, "step": 41960 }, { "epoch": 2.4824037380966466, "grad_norm": 0.2746596336364746, "learning_rate": 2.6406709960159544e-05, "loss": 0.0841, "step": 41970 }, { "epoch": 2.4829952090849945, "grad_norm": 0.3758169114589691, "learning_rate": 2.640484372675393e-05, "loss": 0.0796, "step": 41980 }, { "epoch": 2.4835866800733424, "grad_norm": 0.15910932421684265, "learning_rate": 2.6402977074827292e-05, "loss": 0.0683, "step": 41990 }, { "epoch": 2.4841781510616903, "grad_norm": 0.33291691541671753, "learning_rate": 2.6401110004448134e-05, "loss": 0.0728, "step": 42000 }, { "epoch": 2.4847696220500386, "grad_norm": 0.22012479603290558, "learning_rate": 2.6399242515684976e-05, "loss": 0.0772, "step": 42010 }, { "epoch": 2.4853610930383865, "grad_norm": 0.39314040541648865, "learning_rate": 2.639737460860634e-05, "loss": 0.0744, "step": 42020 }, { "epoch": 2.4859525640267344, "grad_norm": 0.29292765259742737, "learning_rate": 2.6395506283280775e-05, "loss": 0.0839, "step": 42030 }, { "epoch": 2.4865440350150827, "grad_norm": 0.2702873647212982, "learning_rate": 2.6393637539776843e-05, "loss": 0.0779, "step": 42040 }, { "epoch": 2.4871355060034306, "grad_norm": 0.2775570750236511, "learning_rate": 2.639176837816312e-05, "loss": 0.0688, "step": 42050 }, { "epoch": 2.4877269769917785, "grad_norm": 0.2513512670993805, "learning_rate": 2.6389898798508206e-05, "loss": 0.0676, "step": 42060 }, { "epoch": 2.4883184479801264, "grad_norm": 0.36302614212036133, "learning_rate": 2.63880288008807e-05, "loss": 0.0618, "step": 42070 }, { "epoch": 2.4889099189684747, "grad_norm": 0.2840184271335602, "learning_rate": 2.6386158385349223e-05, "loss": 0.091, "step": 42080 }, { "epoch": 2.4895013899568226, "grad_norm": 0.27509012818336487, "learning_rate": 2.638428755198242e-05, "loss": 0.0771, "step": 42090 }, { "epoch": 2.4900928609451705, "grad_norm": 0.3434343636035919, "learning_rate": 2.638241630084894e-05, "loss": 0.0747, "step": 42100 }, { "epoch": 2.490684331933519, "grad_norm": 0.1596093475818634, "learning_rate": 2.6380544632017455e-05, "loss": 0.06, "step": 42110 }, { "epoch": 2.4912758029218667, "grad_norm": 0.3794292211532593, "learning_rate": 2.6378672545556647e-05, "loss": 0.0719, "step": 42120 }, { "epoch": 2.4918672739102146, "grad_norm": 0.26453351974487305, "learning_rate": 2.637680004153522e-05, "loss": 0.0817, "step": 42130 }, { "epoch": 2.492458744898563, "grad_norm": 0.4048343598842621, "learning_rate": 2.637492712002188e-05, "loss": 0.0783, "step": 42140 }, { "epoch": 2.493050215886911, "grad_norm": 0.1899815797805786, "learning_rate": 2.6373053781085366e-05, "loss": 0.0555, "step": 42150 }, { "epoch": 2.4936416868752587, "grad_norm": 0.2604844868183136, "learning_rate": 2.6371180024794417e-05, "loss": 0.0643, "step": 42160 }, { "epoch": 2.494233157863607, "grad_norm": 0.38879504799842834, "learning_rate": 2.63693058512178e-05, "loss": 0.0859, "step": 42170 }, { "epoch": 2.494824628851955, "grad_norm": 0.27604812383651733, "learning_rate": 2.6367431260424287e-05, "loss": 0.0788, "step": 42180 }, { "epoch": 2.4954160998403028, "grad_norm": 0.20528577268123627, "learning_rate": 2.636555625248267e-05, "loss": 0.0784, "step": 42190 }, { "epoch": 2.4960075708286507, "grad_norm": 0.2500089108943939, "learning_rate": 2.6363680827461755e-05, "loss": 0.0764, "step": 42200 }, { "epoch": 2.496599041816999, "grad_norm": 0.3252667784690857, "learning_rate": 2.636180498543037e-05, "loss": 0.0744, "step": 42210 }, { "epoch": 2.497190512805347, "grad_norm": 0.2469581961631775, "learning_rate": 2.635992872645734e-05, "loss": 0.0836, "step": 42220 }, { "epoch": 2.4977819837936948, "grad_norm": 0.23770004510879517, "learning_rate": 2.635805205061153e-05, "loss": 0.0782, "step": 42230 }, { "epoch": 2.498373454782043, "grad_norm": 0.3118543028831482, "learning_rate": 2.6356174957961804e-05, "loss": 0.0916, "step": 42240 }, { "epoch": 2.498964925770391, "grad_norm": 0.20697429776191711, "learning_rate": 2.6354297448577042e-05, "loss": 0.0767, "step": 42250 }, { "epoch": 2.499556396758739, "grad_norm": 0.26701101660728455, "learning_rate": 2.6352419522526145e-05, "loss": 0.0633, "step": 42260 }, { "epoch": 2.5001478677470867, "grad_norm": 0.49323439598083496, "learning_rate": 2.6350541179878028e-05, "loss": 0.0673, "step": 42270 }, { "epoch": 2.500739338735435, "grad_norm": 0.3049086630344391, "learning_rate": 2.6348662420701614e-05, "loss": 0.0885, "step": 42280 }, { "epoch": 2.501330809723783, "grad_norm": 0.32198014855384827, "learning_rate": 2.6346783245065863e-05, "loss": 0.0836, "step": 42290 }, { "epoch": 2.5019222807121313, "grad_norm": 0.2757326662540436, "learning_rate": 2.6344903653039713e-05, "loss": 0.082, "step": 42300 }, { "epoch": 2.502513751700479, "grad_norm": 0.2871796190738678, "learning_rate": 2.6343023644692156e-05, "loss": 0.0616, "step": 42310 }, { "epoch": 2.503105222688827, "grad_norm": 0.20750926434993744, "learning_rate": 2.6341143220092176e-05, "loss": 0.0756, "step": 42320 }, { "epoch": 2.503696693677175, "grad_norm": 0.2718091607093811, "learning_rate": 2.6339262379308777e-05, "loss": 0.082, "step": 42330 }, { "epoch": 2.5042881646655233, "grad_norm": 0.1701480895280838, "learning_rate": 2.6337381122410983e-05, "loss": 0.0769, "step": 42340 }, { "epoch": 2.504879635653871, "grad_norm": 0.15201810002326965, "learning_rate": 2.6335499449467828e-05, "loss": 0.0764, "step": 42350 }, { "epoch": 2.505471106642219, "grad_norm": 0.17689037322998047, "learning_rate": 2.633361736054836e-05, "loss": 0.0581, "step": 42360 }, { "epoch": 2.5060625776305674, "grad_norm": 0.2808118164539337, "learning_rate": 2.6331734855721656e-05, "loss": 0.0805, "step": 42370 }, { "epoch": 2.5066540486189153, "grad_norm": 0.2729872763156891, "learning_rate": 2.6329851935056787e-05, "loss": 0.0951, "step": 42380 }, { "epoch": 2.507245519607263, "grad_norm": 0.32438287138938904, "learning_rate": 2.6327968598622858e-05, "loss": 0.0718, "step": 42390 }, { "epoch": 2.507836990595611, "grad_norm": 0.2912069857120514, "learning_rate": 2.6326084846488974e-05, "loss": 0.072, "step": 42400 }, { "epoch": 2.5084284615839594, "grad_norm": 0.1953236311674118, "learning_rate": 2.632420067872427e-05, "loss": 0.0495, "step": 42410 }, { "epoch": 2.5090199325723073, "grad_norm": 0.25447461009025574, "learning_rate": 2.6322316095397883e-05, "loss": 0.0724, "step": 42420 }, { "epoch": 2.5096114035606556, "grad_norm": 0.2526516020298004, "learning_rate": 2.6320431096578976e-05, "loss": 0.0811, "step": 42430 }, { "epoch": 2.5102028745490035, "grad_norm": 0.30781635642051697, "learning_rate": 2.6318545682336717e-05, "loss": 0.0936, "step": 42440 }, { "epoch": 2.5107943455373514, "grad_norm": 0.2571191191673279, "learning_rate": 2.6316659852740302e-05, "loss": 0.0805, "step": 42450 }, { "epoch": 2.5113858165256993, "grad_norm": 0.22951500117778778, "learning_rate": 2.6314773607858928e-05, "loss": 0.0557, "step": 42460 }, { "epoch": 2.5119772875140476, "grad_norm": 0.284130722284317, "learning_rate": 2.6312886947761812e-05, "loss": 0.082, "step": 42470 }, { "epoch": 2.5125687585023955, "grad_norm": 0.29201555252075195, "learning_rate": 2.63109998725182e-05, "loss": 0.084, "step": 42480 }, { "epoch": 2.5131602294907434, "grad_norm": 0.2502705752849579, "learning_rate": 2.630911238219733e-05, "loss": 0.0822, "step": 42490 }, { "epoch": 2.5137517004790917, "grad_norm": 0.18746919929981232, "learning_rate": 2.6307224476868474e-05, "loss": 0.0628, "step": 42500 }, { "epoch": 2.5143431714674396, "grad_norm": 0.3292168974876404, "learning_rate": 2.6305336156600907e-05, "loss": 0.0755, "step": 42510 }, { "epoch": 2.5149346424557875, "grad_norm": 0.2576083242893219, "learning_rate": 2.6303447421463928e-05, "loss": 0.0752, "step": 42520 }, { "epoch": 2.5155261134441353, "grad_norm": 0.3170403242111206, "learning_rate": 2.6301558271526844e-05, "loss": 0.0719, "step": 42530 }, { "epoch": 2.5161175844324837, "grad_norm": 0.3052998185157776, "learning_rate": 2.6299668706858987e-05, "loss": 0.0792, "step": 42540 }, { "epoch": 2.5167090554208316, "grad_norm": 0.3575524389743805, "learning_rate": 2.6297778727529687e-05, "loss": 0.0695, "step": 42550 }, { "epoch": 2.51730052640918, "grad_norm": 0.24090559780597687, "learning_rate": 2.6295888333608315e-05, "loss": 0.0603, "step": 42560 }, { "epoch": 2.517891997397528, "grad_norm": 0.2754235565662384, "learning_rate": 2.6293997525164233e-05, "loss": 0.0768, "step": 42570 }, { "epoch": 2.5184834683858757, "grad_norm": 0.24079322814941406, "learning_rate": 2.6292106302266826e-05, "loss": 0.0712, "step": 42580 }, { "epoch": 2.5190749393742236, "grad_norm": 0.18826699256896973, "learning_rate": 2.62902146649855e-05, "loss": 0.0766, "step": 42590 }, { "epoch": 2.5196664103625714, "grad_norm": 0.3549033999443054, "learning_rate": 2.6288322613389667e-05, "loss": 0.0802, "step": 42600 }, { "epoch": 2.5202578813509198, "grad_norm": 0.2425244152545929, "learning_rate": 2.6286430147548767e-05, "loss": 0.0632, "step": 42610 }, { "epoch": 2.5208493523392677, "grad_norm": 0.24633009731769562, "learning_rate": 2.628453726753224e-05, "loss": 0.0801, "step": 42620 }, { "epoch": 2.521440823327616, "grad_norm": 0.38022148609161377, "learning_rate": 2.6282643973409554e-05, "loss": 0.0816, "step": 42630 }, { "epoch": 2.522032294315964, "grad_norm": 0.28085857629776, "learning_rate": 2.6280750265250184e-05, "loss": 0.0719, "step": 42640 }, { "epoch": 2.5226237653043118, "grad_norm": 0.22314944863319397, "learning_rate": 2.6278856143123628e-05, "loss": 0.0714, "step": 42650 }, { "epoch": 2.5232152362926596, "grad_norm": 0.2048136442899704, "learning_rate": 2.6276961607099382e-05, "loss": 0.0574, "step": 42660 }, { "epoch": 2.523806707281008, "grad_norm": 0.3169582188129425, "learning_rate": 2.627506665724698e-05, "loss": 0.0763, "step": 42670 }, { "epoch": 2.524398178269356, "grad_norm": 0.3440268337726593, "learning_rate": 2.627317129363596e-05, "loss": 0.0867, "step": 42680 }, { "epoch": 2.5249896492577037, "grad_norm": 0.35202398896217346, "learning_rate": 2.627127551633587e-05, "loss": 0.0786, "step": 42690 }, { "epoch": 2.525581120246052, "grad_norm": 0.30685901641845703, "learning_rate": 2.6269379325416288e-05, "loss": 0.0758, "step": 42700 }, { "epoch": 2.5261725912344, "grad_norm": 0.2339339256286621, "learning_rate": 2.6267482720946787e-05, "loss": 0.0598, "step": 42710 }, { "epoch": 2.526764062222748, "grad_norm": 0.26399070024490356, "learning_rate": 2.6265585702996974e-05, "loss": 0.0723, "step": 42720 }, { "epoch": 2.5273555332110957, "grad_norm": 0.28078433871269226, "learning_rate": 2.626368827163646e-05, "loss": 0.0772, "step": 42730 }, { "epoch": 2.527947004199444, "grad_norm": 0.23549801111221313, "learning_rate": 2.6261790426934878e-05, "loss": 0.0683, "step": 42740 }, { "epoch": 2.528538475187792, "grad_norm": 0.19851718842983246, "learning_rate": 2.6259892168961867e-05, "loss": 0.0617, "step": 42750 }, { "epoch": 2.5291299461761403, "grad_norm": 0.4800650179386139, "learning_rate": 2.6257993497787096e-05, "loss": 0.0622, "step": 42760 }, { "epoch": 2.529721417164488, "grad_norm": 0.21601779758930206, "learning_rate": 2.6256094413480232e-05, "loss": 0.0762, "step": 42770 }, { "epoch": 2.530312888152836, "grad_norm": 0.24222072958946228, "learning_rate": 2.6254194916110965e-05, "loss": 0.0808, "step": 42780 }, { "epoch": 2.530904359141184, "grad_norm": 0.1762920320034027, "learning_rate": 2.6252295005749008e-05, "loss": 0.0775, "step": 42790 }, { "epoch": 2.5314958301295323, "grad_norm": 0.23716303706169128, "learning_rate": 2.625039468246407e-05, "loss": 0.0772, "step": 42800 }, { "epoch": 2.53208730111788, "grad_norm": 0.19930070638656616, "learning_rate": 2.6248493946325902e-05, "loss": 0.0629, "step": 42810 }, { "epoch": 2.532678772106228, "grad_norm": 0.28131118416786194, "learning_rate": 2.6246592797404245e-05, "loss": 0.0797, "step": 42820 }, { "epoch": 2.5332702430945764, "grad_norm": 0.23869208991527557, "learning_rate": 2.6244691235768865e-05, "loss": 0.0932, "step": 42830 }, { "epoch": 2.5338617140829243, "grad_norm": 0.2099369615316391, "learning_rate": 2.6242789261489546e-05, "loss": 0.0758, "step": 42840 }, { "epoch": 2.534453185071272, "grad_norm": 0.11212150007486343, "learning_rate": 2.6240886874636077e-05, "loss": 0.0698, "step": 42850 }, { "epoch": 2.53504465605962, "grad_norm": 0.28618893027305603, "learning_rate": 2.623898407527828e-05, "loss": 0.0592, "step": 42860 }, { "epoch": 2.5356361270479684, "grad_norm": 0.2830834686756134, "learning_rate": 2.6237080863485978e-05, "loss": 0.0819, "step": 42870 }, { "epoch": 2.5362275980363163, "grad_norm": 0.3222261965274811, "learning_rate": 2.6235177239329013e-05, "loss": 0.0786, "step": 42880 }, { "epoch": 2.5368190690246646, "grad_norm": 0.20361045002937317, "learning_rate": 2.623327320287724e-05, "loss": 0.0727, "step": 42890 }, { "epoch": 2.5374105400130125, "grad_norm": 0.19862814247608185, "learning_rate": 2.6231368754200532e-05, "loss": 0.0718, "step": 42900 }, { "epoch": 2.5380020110013604, "grad_norm": 0.16234982013702393, "learning_rate": 2.6229463893368776e-05, "loss": 0.0621, "step": 42910 }, { "epoch": 2.5385934819897082, "grad_norm": 0.2662702798843384, "learning_rate": 2.6227558620451872e-05, "loss": 0.0841, "step": 42920 }, { "epoch": 2.5391849529780566, "grad_norm": 0.3215915262699127, "learning_rate": 2.6225652935519742e-05, "loss": 0.0632, "step": 42930 }, { "epoch": 2.5397764239664045, "grad_norm": 0.22692811489105225, "learning_rate": 2.6223746838642314e-05, "loss": 0.0839, "step": 42940 }, { "epoch": 2.5403678949547523, "grad_norm": 0.2267572432756424, "learning_rate": 2.6221840329889537e-05, "loss": 0.0745, "step": 42950 }, { "epoch": 2.5409593659431007, "grad_norm": 0.17404790222644806, "learning_rate": 2.621993340933138e-05, "loss": 0.0626, "step": 42960 }, { "epoch": 2.5415508369314486, "grad_norm": 0.3437452018260956, "learning_rate": 2.6218026077037812e-05, "loss": 0.0806, "step": 42970 }, { "epoch": 2.5421423079197965, "grad_norm": 0.3015662729740143, "learning_rate": 2.6216118333078827e-05, "loss": 0.0779, "step": 42980 }, { "epoch": 2.5427337789081443, "grad_norm": 0.2813277542591095, "learning_rate": 2.6214210177524436e-05, "loss": 0.0797, "step": 42990 }, { "epoch": 2.5433252498964927, "grad_norm": 0.363493412733078, "learning_rate": 2.6212301610444664e-05, "loss": 0.0694, "step": 43000 }, { "epoch": 2.5439167208848406, "grad_norm": 0.18019935488700867, "learning_rate": 2.6210392631909544e-05, "loss": 0.063, "step": 43010 }, { "epoch": 2.544508191873189, "grad_norm": 0.23182670772075653, "learning_rate": 2.6208483241989136e-05, "loss": 0.0645, "step": 43020 }, { "epoch": 2.5450996628615368, "grad_norm": 0.2938601076602936, "learning_rate": 2.62065734407535e-05, "loss": 0.085, "step": 43030 }, { "epoch": 2.5456911338498847, "grad_norm": 0.208881214261055, "learning_rate": 2.620466322827273e-05, "loss": 0.0775, "step": 43040 }, { "epoch": 2.5462826048382325, "grad_norm": 0.22324922680854797, "learning_rate": 2.6202752604616913e-05, "loss": 0.0831, "step": 43050 }, { "epoch": 2.5468740758265804, "grad_norm": 0.2024829089641571, "learning_rate": 2.6200841569856168e-05, "loss": 0.0585, "step": 43060 }, { "epoch": 2.5474655468149288, "grad_norm": 0.27613916993141174, "learning_rate": 2.619893012406063e-05, "loss": 0.0731, "step": 43070 }, { "epoch": 2.5480570178032766, "grad_norm": 0.2121340036392212, "learning_rate": 2.6197018267300433e-05, "loss": 0.0802, "step": 43080 }, { "epoch": 2.548648488791625, "grad_norm": 0.2782824635505676, "learning_rate": 2.619510599964574e-05, "loss": 0.0828, "step": 43090 }, { "epoch": 2.549239959779973, "grad_norm": 0.1711290031671524, "learning_rate": 2.6193193321166724e-05, "loss": 0.0616, "step": 43100 }, { "epoch": 2.5498314307683208, "grad_norm": 0.25994589924812317, "learning_rate": 2.619128023193358e-05, "loss": 0.0665, "step": 43110 }, { "epoch": 2.5504229017566686, "grad_norm": 0.3430980443954468, "learning_rate": 2.6189366732016506e-05, "loss": 0.0654, "step": 43120 }, { "epoch": 2.551014372745017, "grad_norm": 0.3432604670524597, "learning_rate": 2.618745282148572e-05, "loss": 0.092, "step": 43130 }, { "epoch": 2.551605843733365, "grad_norm": 0.22747917473316193, "learning_rate": 2.6185538500411462e-05, "loss": 0.0672, "step": 43140 }, { "epoch": 2.5521973147217127, "grad_norm": 0.22151151299476624, "learning_rate": 2.6183623768863977e-05, "loss": 0.0774, "step": 43150 }, { "epoch": 2.552788785710061, "grad_norm": 0.2546790838241577, "learning_rate": 2.6181708626913532e-05, "loss": 0.0707, "step": 43160 }, { "epoch": 2.553380256698409, "grad_norm": 0.2773752212524414, "learning_rate": 2.6179793074630407e-05, "loss": 0.0877, "step": 43170 }, { "epoch": 2.553971727686757, "grad_norm": 0.2854421138763428, "learning_rate": 2.617787711208489e-05, "loss": 0.0818, "step": 43180 }, { "epoch": 2.5545631986751047, "grad_norm": 0.24338744580745697, "learning_rate": 2.6175960739347303e-05, "loss": 0.0787, "step": 43190 }, { "epoch": 2.555154669663453, "grad_norm": 0.20836426317691803, "learning_rate": 2.617404395648796e-05, "loss": 0.065, "step": 43200 }, { "epoch": 2.555746140651801, "grad_norm": 0.14596711099147797, "learning_rate": 2.6172126763577204e-05, "loss": 0.0704, "step": 43210 }, { "epoch": 2.5563376116401493, "grad_norm": 0.31084108352661133, "learning_rate": 2.6170209160685393e-05, "loss": 0.0747, "step": 43220 }, { "epoch": 2.556929082628497, "grad_norm": 0.31297796964645386, "learning_rate": 2.6168291147882893e-05, "loss": 0.0844, "step": 43230 }, { "epoch": 2.557520553616845, "grad_norm": 0.3145076632499695, "learning_rate": 2.616637272524009e-05, "loss": 0.0759, "step": 43240 }, { "epoch": 2.558112024605193, "grad_norm": 0.2961950898170471, "learning_rate": 2.6164453892827382e-05, "loss": 0.0706, "step": 43250 }, { "epoch": 2.5587034955935413, "grad_norm": 0.205470010638237, "learning_rate": 2.6162534650715185e-05, "loss": 0.0629, "step": 43260 }, { "epoch": 2.559294966581889, "grad_norm": 14.085007667541504, "learning_rate": 2.616061499897393e-05, "loss": 0.076, "step": 43270 }, { "epoch": 2.559886437570237, "grad_norm": 0.2278136909008026, "learning_rate": 2.6158694937674063e-05, "loss": 0.0896, "step": 43280 }, { "epoch": 2.5604779085585854, "grad_norm": 0.2691895365715027, "learning_rate": 2.6156774466886043e-05, "loss": 0.0748, "step": 43290 }, { "epoch": 2.5610693795469333, "grad_norm": 0.27500420808792114, "learning_rate": 2.6154853586680346e-05, "loss": 0.0729, "step": 43300 }, { "epoch": 2.561660850535281, "grad_norm": 0.26902833580970764, "learning_rate": 2.615293229712746e-05, "loss": 0.0629, "step": 43310 }, { "epoch": 2.562252321523629, "grad_norm": 0.22494381666183472, "learning_rate": 2.6151010598297892e-05, "loss": 0.0714, "step": 43320 }, { "epoch": 2.5628437925119774, "grad_norm": 0.1985783874988556, "learning_rate": 2.614908849026216e-05, "loss": 0.0744, "step": 43330 }, { "epoch": 2.5634352635003252, "grad_norm": 0.38035842776298523, "learning_rate": 2.6147165973090803e-05, "loss": 0.0799, "step": 43340 }, { "epoch": 2.5640267344886736, "grad_norm": 0.2559462785720825, "learning_rate": 2.6145243046854365e-05, "loss": 0.0689, "step": 43350 }, { "epoch": 2.5646182054770215, "grad_norm": 0.31456899642944336, "learning_rate": 2.6143319711623417e-05, "loss": 0.0769, "step": 43360 }, { "epoch": 2.5652096764653693, "grad_norm": 0.23653040826320648, "learning_rate": 2.614139596746854e-05, "loss": 0.074, "step": 43370 }, { "epoch": 2.5658011474537172, "grad_norm": 0.29824209213256836, "learning_rate": 2.613947181446032e-05, "loss": 0.0813, "step": 43380 }, { "epoch": 2.5663926184420656, "grad_norm": 0.29182907938957214, "learning_rate": 2.613754725266938e-05, "loss": 0.0824, "step": 43390 }, { "epoch": 2.5669840894304135, "grad_norm": 0.305999219417572, "learning_rate": 2.6135622282166337e-05, "loss": 0.0839, "step": 43400 }, { "epoch": 2.5675755604187613, "grad_norm": 0.32202619314193726, "learning_rate": 2.613369690302183e-05, "loss": 0.0703, "step": 43410 }, { "epoch": 2.5681670314071097, "grad_norm": 0.2852182984352112, "learning_rate": 2.613177111530652e-05, "loss": 0.0739, "step": 43420 }, { "epoch": 2.5687585023954576, "grad_norm": 0.22314396500587463, "learning_rate": 2.6129844919091072e-05, "loss": 0.0815, "step": 43430 }, { "epoch": 2.5693499733838054, "grad_norm": 0.2142597883939743, "learning_rate": 2.6127918314446176e-05, "loss": 0.0735, "step": 43440 }, { "epoch": 2.5699414443721533, "grad_norm": 0.3333527743816376, "learning_rate": 2.6125991301442528e-05, "loss": 0.0781, "step": 43450 }, { "epoch": 2.5705329153605017, "grad_norm": 0.464089572429657, "learning_rate": 2.612406388015085e-05, "loss": 0.0743, "step": 43460 }, { "epoch": 2.5711243863488495, "grad_norm": 0.24109119176864624, "learning_rate": 2.612213605064186e-05, "loss": 0.0771, "step": 43470 }, { "epoch": 2.571715857337198, "grad_norm": 0.2678265869617462, "learning_rate": 2.6120207812986316e-05, "loss": 0.0816, "step": 43480 }, { "epoch": 2.5723073283255458, "grad_norm": 0.5044927000999451, "learning_rate": 2.611827916725497e-05, "loss": 0.0829, "step": 43490 }, { "epoch": 2.5728987993138936, "grad_norm": 0.24744528532028198, "learning_rate": 2.61163501135186e-05, "loss": 0.0754, "step": 43500 }, { "epoch": 2.5734902703022415, "grad_norm": 0.3059263527393341, "learning_rate": 2.6114420651847994e-05, "loss": 0.0651, "step": 43510 }, { "epoch": 2.5740817412905894, "grad_norm": 0.3848341703414917, "learning_rate": 2.611249078231396e-05, "loss": 0.0746, "step": 43520 }, { "epoch": 2.5746732122789378, "grad_norm": 0.2096925973892212, "learning_rate": 2.6110560504987312e-05, "loss": 0.0731, "step": 43530 }, { "epoch": 2.5752646832672856, "grad_norm": 0.23394954204559326, "learning_rate": 2.61086298199389e-05, "loss": 0.0751, "step": 43540 }, { "epoch": 2.575856154255634, "grad_norm": 0.18813957273960114, "learning_rate": 2.610669872723955e-05, "loss": 0.0699, "step": 43550 }, { "epoch": 2.576447625243982, "grad_norm": 0.3085828125476837, "learning_rate": 2.610476722696015e-05, "loss": 0.0706, "step": 43560 }, { "epoch": 2.5770390962323297, "grad_norm": 0.4016071856021881, "learning_rate": 2.610283531917157e-05, "loss": 0.072, "step": 43570 }, { "epoch": 2.5776305672206776, "grad_norm": 0.3100753426551819, "learning_rate": 2.6100903003944698e-05, "loss": 0.0831, "step": 43580 }, { "epoch": 2.578222038209026, "grad_norm": 0.21692544221878052, "learning_rate": 2.6098970281350457e-05, "loss": 0.0769, "step": 43590 }, { "epoch": 2.578813509197374, "grad_norm": 0.2196241319179535, "learning_rate": 2.6097037151459763e-05, "loss": 0.0743, "step": 43600 }, { "epoch": 2.5794049801857217, "grad_norm": 0.2347879260778427, "learning_rate": 2.609510361434356e-05, "loss": 0.0671, "step": 43610 }, { "epoch": 2.57999645117407, "grad_norm": 0.25999146699905396, "learning_rate": 2.6093169670072797e-05, "loss": 0.0729, "step": 43620 }, { "epoch": 2.580587922162418, "grad_norm": 0.2523012161254883, "learning_rate": 2.6091235318718448e-05, "loss": 0.0799, "step": 43630 }, { "epoch": 2.581179393150766, "grad_norm": 0.23374685645103455, "learning_rate": 2.60893005603515e-05, "loss": 0.078, "step": 43640 }, { "epoch": 2.5817708641391137, "grad_norm": 0.20576931536197662, "learning_rate": 2.6087365395042947e-05, "loss": 0.0665, "step": 43650 }, { "epoch": 2.582362335127462, "grad_norm": 0.5026184320449829, "learning_rate": 2.6085429822863806e-05, "loss": 0.0755, "step": 43660 }, { "epoch": 2.58295380611581, "grad_norm": 0.3708222508430481, "learning_rate": 2.6083493843885105e-05, "loss": 0.0752, "step": 43670 }, { "epoch": 2.5835452771041583, "grad_norm": 0.2372686266899109, "learning_rate": 2.6081557458177885e-05, "loss": 0.08, "step": 43680 }, { "epoch": 2.584136748092506, "grad_norm": 0.22214511036872864, "learning_rate": 2.6079620665813214e-05, "loss": 0.0827, "step": 43690 }, { "epoch": 2.584728219080854, "grad_norm": 0.22767861187458038, "learning_rate": 2.6077683466862158e-05, "loss": 0.0666, "step": 43700 }, { "epoch": 2.585319690069202, "grad_norm": 0.30193987488746643, "learning_rate": 2.6075745861395812e-05, "loss": 0.0683, "step": 43710 }, { "epoch": 2.5859111610575503, "grad_norm": 0.21817606687545776, "learning_rate": 2.6073807849485274e-05, "loss": 0.0752, "step": 43720 }, { "epoch": 2.586502632045898, "grad_norm": 0.3093623220920563, "learning_rate": 2.6071869431201667e-05, "loss": 0.0827, "step": 43730 }, { "epoch": 2.587094103034246, "grad_norm": 0.2693406939506531, "learning_rate": 2.606993060661612e-05, "loss": 0.0747, "step": 43740 }, { "epoch": 2.5876855740225944, "grad_norm": 0.2551145553588867, "learning_rate": 2.6067991375799788e-05, "loss": 0.074, "step": 43750 }, { "epoch": 2.5882770450109422, "grad_norm": 0.2508329153060913, "learning_rate": 2.606605173882383e-05, "loss": 0.0591, "step": 43760 }, { "epoch": 2.58886851599929, "grad_norm": 0.2261354625225067, "learning_rate": 2.606411169575943e-05, "loss": 0.0694, "step": 43770 }, { "epoch": 2.589459986987638, "grad_norm": 0.2496955394744873, "learning_rate": 2.6062171246677773e-05, "loss": 0.0831, "step": 43780 }, { "epoch": 2.5900514579759863, "grad_norm": 0.2077709287405014, "learning_rate": 2.606023039165007e-05, "loss": 0.0822, "step": 43790 }, { "epoch": 2.5906429289643342, "grad_norm": 0.2726019024848938, "learning_rate": 2.605828913074755e-05, "loss": 0.0756, "step": 43800 }, { "epoch": 2.5912343999526826, "grad_norm": 0.2528131902217865, "learning_rate": 2.6056347464041442e-05, "loss": 0.0716, "step": 43810 }, { "epoch": 2.5918258709410305, "grad_norm": 0.3781578540802002, "learning_rate": 2.6054405391603006e-05, "loss": 0.0794, "step": 43820 }, { "epoch": 2.5924173419293783, "grad_norm": 0.2914809286594391, "learning_rate": 2.6052462913503507e-05, "loss": 0.076, "step": 43830 }, { "epoch": 2.5930088129177262, "grad_norm": 0.2829378545284271, "learning_rate": 2.6050520029814227e-05, "loss": 0.0757, "step": 43840 }, { "epoch": 2.5936002839060746, "grad_norm": 0.27090907096862793, "learning_rate": 2.6048576740606467e-05, "loss": 0.0704, "step": 43850 }, { "epoch": 2.5941917548944224, "grad_norm": 0.3985308110713959, "learning_rate": 2.6046633045951537e-05, "loss": 0.0704, "step": 43860 }, { "epoch": 2.5947832258827703, "grad_norm": 0.25558575987815857, "learning_rate": 2.604468894592076e-05, "loss": 0.0702, "step": 43870 }, { "epoch": 2.5953746968711187, "grad_norm": 0.2025875598192215, "learning_rate": 2.6042744440585486e-05, "loss": 0.076, "step": 43880 }, { "epoch": 2.5959661678594665, "grad_norm": 0.24643415212631226, "learning_rate": 2.6040799530017067e-05, "loss": 0.0714, "step": 43890 }, { "epoch": 2.5965576388478144, "grad_norm": 0.21249890327453613, "learning_rate": 2.6038854214286877e-05, "loss": 0.0692, "step": 43900 }, { "epoch": 2.5971491098361623, "grad_norm": 0.35141265392303467, "learning_rate": 2.60369084934663e-05, "loss": 0.0737, "step": 43910 }, { "epoch": 2.5977405808245106, "grad_norm": 0.218539297580719, "learning_rate": 2.6034962367626746e-05, "loss": 0.0675, "step": 43920 }, { "epoch": 2.5983320518128585, "grad_norm": 0.2509163022041321, "learning_rate": 2.603301583683962e-05, "loss": 0.086, "step": 43930 }, { "epoch": 2.598923522801207, "grad_norm": 0.2328100949525833, "learning_rate": 2.6031068901176362e-05, "loss": 0.087, "step": 43940 }, { "epoch": 2.5995149937895548, "grad_norm": 0.31049007177352905, "learning_rate": 2.6029121560708413e-05, "loss": 0.077, "step": 43950 }, { "epoch": 2.6001064647779026, "grad_norm": 0.22851276397705078, "learning_rate": 2.6027173815507238e-05, "loss": 0.0627, "step": 43960 }, { "epoch": 2.6006979357662505, "grad_norm": 0.2537268400192261, "learning_rate": 2.602522566564431e-05, "loss": 0.0765, "step": 43970 }, { "epoch": 2.6012894067545984, "grad_norm": 0.2439136505126953, "learning_rate": 2.6023277111191125e-05, "loss": 0.0819, "step": 43980 }, { "epoch": 2.6018808777429467, "grad_norm": 0.23198746144771576, "learning_rate": 2.6021328152219184e-05, "loss": 0.0784, "step": 43990 }, { "epoch": 2.6024723487312946, "grad_norm": 0.26327505707740784, "learning_rate": 2.6019378788800005e-05, "loss": 0.0804, "step": 44000 }, { "epoch": 2.603063819719643, "grad_norm": 0.2568318843841553, "learning_rate": 2.6017429021005128e-05, "loss": 0.0644, "step": 44010 }, { "epoch": 2.603655290707991, "grad_norm": 0.29773545265197754, "learning_rate": 2.60154788489061e-05, "loss": 0.0766, "step": 44020 }, { "epoch": 2.6042467616963387, "grad_norm": 0.2996233403682709, "learning_rate": 2.6013528272574492e-05, "loss": 0.0776, "step": 44030 }, { "epoch": 2.6048382326846866, "grad_norm": 0.21741946041584015, "learning_rate": 2.6011577292081877e-05, "loss": 0.0772, "step": 44040 }, { "epoch": 2.605429703673035, "grad_norm": 0.2855459153652191, "learning_rate": 2.6009625907499852e-05, "loss": 0.0648, "step": 44050 }, { "epoch": 2.606021174661383, "grad_norm": 0.3524601459503174, "learning_rate": 2.6007674118900028e-05, "loss": 0.0706, "step": 44060 }, { "epoch": 2.6066126456497307, "grad_norm": 0.48485949635505676, "learning_rate": 2.600572192635403e-05, "loss": 0.0719, "step": 44070 }, { "epoch": 2.607204116638079, "grad_norm": 0.3931770622730255, "learning_rate": 2.6003769329933494e-05, "loss": 0.0915, "step": 44080 }, { "epoch": 2.607795587626427, "grad_norm": 0.21697570383548737, "learning_rate": 2.6001816329710078e-05, "loss": 0.0717, "step": 44090 }, { "epoch": 2.608387058614775, "grad_norm": 0.2572587728500366, "learning_rate": 2.5999862925755445e-05, "loss": 0.0646, "step": 44100 }, { "epoch": 2.6089785296031227, "grad_norm": 0.206429123878479, "learning_rate": 2.5997909118141284e-05, "loss": 0.0564, "step": 44110 }, { "epoch": 2.609570000591471, "grad_norm": 0.2220039814710617, "learning_rate": 2.599595490693929e-05, "loss": 0.0913, "step": 44120 }, { "epoch": 2.610161471579819, "grad_norm": 0.1879478245973587, "learning_rate": 2.599400029222118e-05, "loss": 0.0736, "step": 44130 }, { "epoch": 2.6107529425681673, "grad_norm": 0.2734776437282562, "learning_rate": 2.5992045274058677e-05, "loss": 0.0751, "step": 44140 }, { "epoch": 2.611344413556515, "grad_norm": 0.3170112073421478, "learning_rate": 2.5990089852523527e-05, "loss": 0.0789, "step": 44150 }, { "epoch": 2.611935884544863, "grad_norm": 0.19874295592308044, "learning_rate": 2.598813402768749e-05, "loss": 0.0643, "step": 44160 }, { "epoch": 2.612527355533211, "grad_norm": 0.4306972920894623, "learning_rate": 2.5986177799622334e-05, "loss": 0.0891, "step": 44170 }, { "epoch": 2.6131188265215592, "grad_norm": 0.20880214869976044, "learning_rate": 2.598422116839985e-05, "loss": 0.0809, "step": 44180 }, { "epoch": 2.613710297509907, "grad_norm": 0.24130482971668243, "learning_rate": 2.5982264134091836e-05, "loss": 0.0747, "step": 44190 }, { "epoch": 2.614301768498255, "grad_norm": 0.27670732140541077, "learning_rate": 2.598030669677011e-05, "loss": 0.0745, "step": 44200 }, { "epoch": 2.6148932394866033, "grad_norm": 0.23386231064796448, "learning_rate": 2.5978348856506506e-05, "loss": 0.0641, "step": 44210 }, { "epoch": 2.6154847104749512, "grad_norm": 0.25303342938423157, "learning_rate": 2.5976390613372868e-05, "loss": 0.076, "step": 44220 }, { "epoch": 2.616076181463299, "grad_norm": 0.31107842922210693, "learning_rate": 2.597443196744106e-05, "loss": 0.0799, "step": 44230 }, { "epoch": 2.616667652451647, "grad_norm": 0.2475108653306961, "learning_rate": 2.5972472918782956e-05, "loss": 0.0708, "step": 44240 }, { "epoch": 2.6172591234399953, "grad_norm": 0.27122923731803894, "learning_rate": 2.5970513467470443e-05, "loss": 0.0741, "step": 44250 }, { "epoch": 2.6178505944283432, "grad_norm": 0.297466516494751, "learning_rate": 2.5968553613575433e-05, "loss": 0.0648, "step": 44260 }, { "epoch": 2.6184420654166916, "grad_norm": 0.284341424703598, "learning_rate": 2.596659335716985e-05, "loss": 0.0895, "step": 44270 }, { "epoch": 2.6190335364050394, "grad_norm": 0.20621508359909058, "learning_rate": 2.5964632698325617e-05, "loss": 0.0853, "step": 44280 }, { "epoch": 2.6196250073933873, "grad_norm": 0.40776577591896057, "learning_rate": 2.596267163711469e-05, "loss": 0.0904, "step": 44290 }, { "epoch": 2.620216478381735, "grad_norm": 0.23733507096767426, "learning_rate": 2.5960710173609033e-05, "loss": 0.0687, "step": 44300 }, { "epoch": 2.6208079493700835, "grad_norm": 0.1915244460105896, "learning_rate": 2.5958748307880627e-05, "loss": 0.0691, "step": 44310 }, { "epoch": 2.6213994203584314, "grad_norm": 0.23129712045192719, "learning_rate": 2.5956786040001467e-05, "loss": 0.067, "step": 44320 }, { "epoch": 2.6219908913467793, "grad_norm": 0.2787461280822754, "learning_rate": 2.5954823370043555e-05, "loss": 0.0759, "step": 44330 }, { "epoch": 2.6225823623351276, "grad_norm": 0.2127467393875122, "learning_rate": 2.5952860298078925e-05, "loss": 0.0903, "step": 44340 }, { "epoch": 2.6231738333234755, "grad_norm": 0.3183678686618805, "learning_rate": 2.5950896824179607e-05, "loss": 0.0757, "step": 44350 }, { "epoch": 2.6237653043118234, "grad_norm": 0.20652534067630768, "learning_rate": 2.5948932948417657e-05, "loss": 0.0677, "step": 44360 }, { "epoch": 2.6243567753001713, "grad_norm": 0.23570892214775085, "learning_rate": 2.5946968670865144e-05, "loss": 0.0745, "step": 44370 }, { "epoch": 2.6249482462885196, "grad_norm": 0.2306399941444397, "learning_rate": 2.594500399159415e-05, "loss": 0.0822, "step": 44380 }, { "epoch": 2.6255397172768675, "grad_norm": 0.3045675456523895, "learning_rate": 2.5943038910676773e-05, "loss": 0.0768, "step": 44390 }, { "epoch": 2.626131188265216, "grad_norm": 0.2711852490901947, "learning_rate": 2.594107342818512e-05, "loss": 0.0758, "step": 44400 }, { "epoch": 2.6267226592535637, "grad_norm": 0.2164936661720276, "learning_rate": 2.5939107544191326e-05, "loss": 0.0584, "step": 44410 }, { "epoch": 2.6273141302419116, "grad_norm": 0.20308132469654083, "learning_rate": 2.593714125876753e-05, "loss": 0.0643, "step": 44420 }, { "epoch": 2.6279056012302595, "grad_norm": 0.39634740352630615, "learning_rate": 2.5935174571985883e-05, "loss": 0.0944, "step": 44430 }, { "epoch": 2.6284970722186074, "grad_norm": 0.2797413170337677, "learning_rate": 2.5933207483918566e-05, "loss": 0.0706, "step": 44440 }, { "epoch": 2.6290885432069557, "grad_norm": 0.22732189297676086, "learning_rate": 2.5931239994637752e-05, "loss": 0.0685, "step": 44450 }, { "epoch": 2.6296800141953036, "grad_norm": 0.977605938911438, "learning_rate": 2.5929272104215657e-05, "loss": 0.0678, "step": 44460 }, { "epoch": 2.630271485183652, "grad_norm": 0.25790882110595703, "learning_rate": 2.5927303812724477e-05, "loss": 0.073, "step": 44470 }, { "epoch": 2.630862956172, "grad_norm": 0.21642598509788513, "learning_rate": 2.592533512023646e-05, "loss": 0.0811, "step": 44480 }, { "epoch": 2.6314544271603477, "grad_norm": 0.2483031004667282, "learning_rate": 2.592336602682384e-05, "loss": 0.0779, "step": 44490 }, { "epoch": 2.6320458981486956, "grad_norm": 0.20516441762447357, "learning_rate": 2.5921396532558885e-05, "loss": 0.0758, "step": 44500 }, { "epoch": 2.632637369137044, "grad_norm": 0.23274962604045868, "learning_rate": 2.5919426637513858e-05, "loss": 0.0617, "step": 44510 }, { "epoch": 2.633228840125392, "grad_norm": 0.2892996668815613, "learning_rate": 2.5917456341761057e-05, "loss": 0.0771, "step": 44520 }, { "epoch": 2.6338203111137397, "grad_norm": 0.4754289388656616, "learning_rate": 2.591548564537278e-05, "loss": 0.0767, "step": 44530 }, { "epoch": 2.634411782102088, "grad_norm": 0.23381979763507843, "learning_rate": 2.5913514548421348e-05, "loss": 0.0845, "step": 44540 }, { "epoch": 2.635003253090436, "grad_norm": 0.2245001345872879, "learning_rate": 2.5911543050979097e-05, "loss": 0.0852, "step": 44550 }, { "epoch": 2.635594724078784, "grad_norm": 0.2547067403793335, "learning_rate": 2.5909571153118367e-05, "loss": 0.0654, "step": 44560 }, { "epoch": 2.6361861950671317, "grad_norm": 0.28615930676460266, "learning_rate": 2.5907598854911523e-05, "loss": 0.081, "step": 44570 }, { "epoch": 2.63677766605548, "grad_norm": 0.2982221245765686, "learning_rate": 2.5905626156430945e-05, "loss": 0.0916, "step": 44580 }, { "epoch": 2.637369137043828, "grad_norm": 0.20458008348941803, "learning_rate": 2.5903653057749025e-05, "loss": 0.0782, "step": 44590 }, { "epoch": 2.6379606080321762, "grad_norm": 0.19919316470623016, "learning_rate": 2.5901679558938162e-05, "loss": 0.0697, "step": 44600 }, { "epoch": 2.638552079020524, "grad_norm": 0.32033365964889526, "learning_rate": 2.5899705660070782e-05, "loss": 0.0616, "step": 44610 }, { "epoch": 2.639143550008872, "grad_norm": 0.2546304166316986, "learning_rate": 2.5897731361219327e-05, "loss": 0.0765, "step": 44620 }, { "epoch": 2.63973502099722, "grad_norm": 0.33057230710983276, "learning_rate": 2.5895756662456238e-05, "loss": 0.0856, "step": 44630 }, { "epoch": 2.6403264919855682, "grad_norm": 0.22377455234527588, "learning_rate": 2.589378156385398e-05, "loss": 0.085, "step": 44640 }, { "epoch": 2.640917962973916, "grad_norm": 0.24744053184986115, "learning_rate": 2.5891806065485047e-05, "loss": 0.0642, "step": 44650 }, { "epoch": 2.641509433962264, "grad_norm": 0.2783753275871277, "learning_rate": 2.5889830167421913e-05, "loss": 0.0703, "step": 44660 }, { "epoch": 2.6421009049506123, "grad_norm": 0.3269871175289154, "learning_rate": 2.5887853869737098e-05, "loss": 0.0711, "step": 44670 }, { "epoch": 2.6426923759389602, "grad_norm": 0.24129489064216614, "learning_rate": 2.5885877172503128e-05, "loss": 0.0789, "step": 44680 }, { "epoch": 2.643283846927308, "grad_norm": 0.2592705488204956, "learning_rate": 2.5883900075792537e-05, "loss": 0.068, "step": 44690 }, { "epoch": 2.643875317915656, "grad_norm": 0.24317887425422668, "learning_rate": 2.5881922579677874e-05, "loss": 0.0731, "step": 44700 }, { "epoch": 2.6444667889040043, "grad_norm": 0.23162049055099487, "learning_rate": 2.5879944684231717e-05, "loss": 0.0619, "step": 44710 }, { "epoch": 2.645058259892352, "grad_norm": 0.3395942449569702, "learning_rate": 2.5877966389526643e-05, "loss": 0.0745, "step": 44720 }, { "epoch": 2.6456497308807005, "grad_norm": 0.22639667987823486, "learning_rate": 2.587598769563525e-05, "loss": 0.0856, "step": 44730 }, { "epoch": 2.6462412018690484, "grad_norm": 0.2819026708602905, "learning_rate": 2.5874008602630144e-05, "loss": 0.0825, "step": 44740 }, { "epoch": 2.6468326728573963, "grad_norm": 0.273825079202652, "learning_rate": 2.5872029110583958e-05, "loss": 0.0635, "step": 44750 }, { "epoch": 2.647424143845744, "grad_norm": 0.2831593155860901, "learning_rate": 2.5870049219569336e-05, "loss": 0.0601, "step": 44760 }, { "epoch": 2.6480156148340925, "grad_norm": 0.4028322994709015, "learning_rate": 2.5868068929658924e-05, "loss": 0.0793, "step": 44770 }, { "epoch": 2.6486070858224404, "grad_norm": 0.22598396241664886, "learning_rate": 2.5866088240925396e-05, "loss": 0.0857, "step": 44780 }, { "epoch": 2.6491985568107883, "grad_norm": 0.2694484293460846, "learning_rate": 2.5864107153441442e-05, "loss": 0.075, "step": 44790 }, { "epoch": 2.6497900277991366, "grad_norm": 0.2566628158092499, "learning_rate": 2.586212566727975e-05, "loss": 0.0804, "step": 44800 }, { "epoch": 2.6503814987874845, "grad_norm": 0.1780455857515335, "learning_rate": 2.5860143782513053e-05, "loss": 0.0535, "step": 44810 }, { "epoch": 2.6509729697758324, "grad_norm": 0.26202648878097534, "learning_rate": 2.585816149921406e-05, "loss": 0.077, "step": 44820 }, { "epoch": 2.6515644407641803, "grad_norm": 0.24388407170772552, "learning_rate": 2.5856178817455527e-05, "loss": 0.0761, "step": 44830 }, { "epoch": 2.6521559117525286, "grad_norm": 0.2727460265159607, "learning_rate": 2.5854195737310204e-05, "loss": 0.0856, "step": 44840 }, { "epoch": 2.6527473827408765, "grad_norm": 0.16467943787574768, "learning_rate": 2.5852212258850872e-05, "loss": 0.0773, "step": 44850 }, { "epoch": 2.653338853729225, "grad_norm": 0.20467965304851532, "learning_rate": 2.5850228382150314e-05, "loss": 0.0663, "step": 44860 }, { "epoch": 2.6539303247175727, "grad_norm": 1.4689408540725708, "learning_rate": 2.584824410728133e-05, "loss": 0.087, "step": 44870 }, { "epoch": 2.6545217957059206, "grad_norm": 0.39400747418403625, "learning_rate": 2.5846259434316735e-05, "loss": 0.0886, "step": 44880 }, { "epoch": 2.6551132666942685, "grad_norm": 0.3035984933376312, "learning_rate": 2.5844274363329363e-05, "loss": 0.0676, "step": 44890 }, { "epoch": 2.655704737682617, "grad_norm": 0.15178732573986053, "learning_rate": 2.5842288894392066e-05, "loss": 0.0741, "step": 44900 }, { "epoch": 2.6562962086709647, "grad_norm": 0.23671898245811462, "learning_rate": 2.5840303027577697e-05, "loss": 0.0621, "step": 44910 }, { "epoch": 2.6568876796593126, "grad_norm": 0.32082220911979675, "learning_rate": 2.5838316762959127e-05, "loss": 0.0819, "step": 44920 }, { "epoch": 2.657479150647661, "grad_norm": 0.2785528302192688, "learning_rate": 2.5836330100609253e-05, "loss": 0.0796, "step": 44930 }, { "epoch": 2.658070621636009, "grad_norm": 0.2775447964668274, "learning_rate": 2.5834343040600978e-05, "loss": 0.0842, "step": 44940 }, { "epoch": 2.6586620926243567, "grad_norm": 0.3106387257575989, "learning_rate": 2.5832355583007218e-05, "loss": 0.0749, "step": 44950 }, { "epoch": 2.6592535636127046, "grad_norm": 0.21418128907680511, "learning_rate": 2.583036772790091e-05, "loss": 0.0627, "step": 44960 }, { "epoch": 2.659845034601053, "grad_norm": 0.1986611932516098, "learning_rate": 2.5828379475355e-05, "loss": 0.0837, "step": 44970 }, { "epoch": 2.660436505589401, "grad_norm": 0.2590869069099426, "learning_rate": 2.582639082544245e-05, "loss": 0.0737, "step": 44980 }, { "epoch": 2.6610279765777487, "grad_norm": 0.29159459471702576, "learning_rate": 2.5824401778236236e-05, "loss": 0.0837, "step": 44990 }, { "epoch": 2.661619447566097, "grad_norm": 0.20831064879894257, "learning_rate": 2.582241233380935e-05, "loss": 0.0761, "step": 45000 }, { "epoch": 2.662210918554445, "grad_norm": 0.20557430386543274, "learning_rate": 2.5820422492234798e-05, "loss": 0.0684, "step": 45010 }, { "epoch": 2.662802389542793, "grad_norm": 0.235710009932518, "learning_rate": 2.5818432253585606e-05, "loss": 0.0659, "step": 45020 }, { "epoch": 2.6633938605311407, "grad_norm": 0.2705872654914856, "learning_rate": 2.5816441617934802e-05, "loss": 0.0839, "step": 45030 }, { "epoch": 2.663985331519489, "grad_norm": 0.2834615409374237, "learning_rate": 2.5814450585355442e-05, "loss": 0.0801, "step": 45040 }, { "epoch": 2.664576802507837, "grad_norm": 0.4313344359397888, "learning_rate": 2.5812459155920583e-05, "loss": 0.0828, "step": 45050 }, { "epoch": 2.6651682734961852, "grad_norm": 0.2632099688053131, "learning_rate": 2.5810467329703312e-05, "loss": 0.0676, "step": 45060 }, { "epoch": 2.665759744484533, "grad_norm": 0.21938368678092957, "learning_rate": 2.5808475106776716e-05, "loss": 0.0709, "step": 45070 }, { "epoch": 2.666351215472881, "grad_norm": 0.39359384775161743, "learning_rate": 2.5806482487213912e-05, "loss": 0.0771, "step": 45080 }, { "epoch": 2.666942686461229, "grad_norm": 0.1752823144197464, "learning_rate": 2.5804489471088012e-05, "loss": 0.0832, "step": 45090 }, { "epoch": 2.6675341574495772, "grad_norm": 0.17220647633075714, "learning_rate": 2.5802496058472163e-05, "loss": 0.0684, "step": 45100 }, { "epoch": 2.668125628437925, "grad_norm": 0.3344656825065613, "learning_rate": 2.5800502249439507e-05, "loss": 0.0726, "step": 45110 }, { "epoch": 2.668717099426273, "grad_norm": 0.31540608406066895, "learning_rate": 2.5798508044063215e-05, "loss": 0.0743, "step": 45120 }, { "epoch": 2.6693085704146213, "grad_norm": 0.2285296767950058, "learning_rate": 2.5796513442416474e-05, "loss": 0.0815, "step": 45130 }, { "epoch": 2.669900041402969, "grad_norm": 0.2399052232503891, "learning_rate": 2.579451844457247e-05, "loss": 0.0728, "step": 45140 }, { "epoch": 2.670491512391317, "grad_norm": 0.26031413674354553, "learning_rate": 2.579252305060442e-05, "loss": 0.0628, "step": 45150 }, { "epoch": 2.671082983379665, "grad_norm": 0.22087857127189636, "learning_rate": 2.5790527260585543e-05, "loss": 0.0689, "step": 45160 }, { "epoch": 2.6716744543680133, "grad_norm": 0.37823840975761414, "learning_rate": 2.578853107458908e-05, "loss": 0.0867, "step": 45170 }, { "epoch": 2.672265925356361, "grad_norm": 0.24667467176914215, "learning_rate": 2.578653449268829e-05, "loss": 0.0869, "step": 45180 }, { "epoch": 2.6728573963447095, "grad_norm": 0.18703751266002655, "learning_rate": 2.578453751495643e-05, "loss": 0.0818, "step": 45190 }, { "epoch": 2.6734488673330574, "grad_norm": 0.19802823662757874, "learning_rate": 2.5782540141466793e-05, "loss": 0.0708, "step": 45200 }, { "epoch": 2.6740403383214053, "grad_norm": 0.23923592269420624, "learning_rate": 2.5780542372292668e-05, "loss": 0.0704, "step": 45210 }, { "epoch": 2.674631809309753, "grad_norm": 0.2591167092323303, "learning_rate": 2.577854420750737e-05, "loss": 0.0793, "step": 45220 }, { "epoch": 2.6752232802981015, "grad_norm": 0.18164940178394318, "learning_rate": 2.577654564718423e-05, "loss": 0.0692, "step": 45230 }, { "epoch": 2.6758147512864494, "grad_norm": 0.21727970242500305, "learning_rate": 2.5774546691396587e-05, "loss": 0.0856, "step": 45240 }, { "epoch": 2.6764062222747973, "grad_norm": 0.2618764638900757, "learning_rate": 2.577254734021779e-05, "loss": 0.0699, "step": 45250 }, { "epoch": 2.6769976932631456, "grad_norm": 0.34287789463996887, "learning_rate": 2.577054759372121e-05, "loss": 0.0698, "step": 45260 }, { "epoch": 2.6775891642514935, "grad_norm": 0.5774725079536438, "learning_rate": 2.5768547451980234e-05, "loss": 0.0747, "step": 45270 }, { "epoch": 2.6781806352398414, "grad_norm": 0.27107661962509155, "learning_rate": 2.5766546915068267e-05, "loss": 0.0855, "step": 45280 }, { "epoch": 2.6787721062281893, "grad_norm": 0.2057570368051529, "learning_rate": 2.576454598305871e-05, "loss": 0.0738, "step": 45290 }, { "epoch": 2.6793635772165376, "grad_norm": 0.20232224464416504, "learning_rate": 2.5762544656025e-05, "loss": 0.0779, "step": 45300 }, { "epoch": 2.6799550482048855, "grad_norm": 0.3557865619659424, "learning_rate": 2.5760542934040578e-05, "loss": 0.0677, "step": 45310 }, { "epoch": 2.680546519193234, "grad_norm": 0.273611843585968, "learning_rate": 2.5758540817178895e-05, "loss": 0.0761, "step": 45320 }, { "epoch": 2.6811379901815817, "grad_norm": 0.22678878903388977, "learning_rate": 2.5756538305513427e-05, "loss": 0.0948, "step": 45330 }, { "epoch": 2.6817294611699296, "grad_norm": 0.2672193646430969, "learning_rate": 2.5754535399117657e-05, "loss": 0.0679, "step": 45340 }, { "epoch": 2.6823209321582775, "grad_norm": 0.2622309923171997, "learning_rate": 2.575253209806509e-05, "loss": 0.0682, "step": 45350 }, { "epoch": 2.682912403146626, "grad_norm": 0.20012830197811127, "learning_rate": 2.5750528402429233e-05, "loss": 0.0642, "step": 45360 }, { "epoch": 2.6835038741349737, "grad_norm": 0.23377133905887604, "learning_rate": 2.574852431228362e-05, "loss": 0.0754, "step": 45370 }, { "epoch": 2.6840953451233216, "grad_norm": 0.31324487924575806, "learning_rate": 2.57465198277018e-05, "loss": 0.0915, "step": 45380 }, { "epoch": 2.68468681611167, "grad_norm": 0.29874953627586365, "learning_rate": 2.574451494875732e-05, "loss": 0.0819, "step": 45390 }, { "epoch": 2.685278287100018, "grad_norm": 0.5048360228538513, "learning_rate": 2.5742509675523762e-05, "loss": 0.0714, "step": 45400 }, { "epoch": 2.6858697580883657, "grad_norm": 0.285978227853775, "learning_rate": 2.5740504008074707e-05, "loss": 0.0624, "step": 45410 }, { "epoch": 2.6864612290767136, "grad_norm": 0.2871914803981781, "learning_rate": 2.573849794648376e-05, "loss": 0.0909, "step": 45420 }, { "epoch": 2.687052700065062, "grad_norm": 0.2358093112707138, "learning_rate": 2.5736491490824536e-05, "loss": 0.0737, "step": 45430 }, { "epoch": 2.68764417105341, "grad_norm": 0.2232852727174759, "learning_rate": 2.573448464117066e-05, "loss": 0.0766, "step": 45440 }, { "epoch": 2.6882356420417577, "grad_norm": 0.193142831325531, "learning_rate": 2.5732477397595787e-05, "loss": 0.083, "step": 45450 }, { "epoch": 2.688827113030106, "grad_norm": 0.3302657902240753, "learning_rate": 2.5730469760173573e-05, "loss": 0.062, "step": 45460 }, { "epoch": 2.689418584018454, "grad_norm": 0.22027587890625, "learning_rate": 2.5728461728977686e-05, "loss": 0.0697, "step": 45470 }, { "epoch": 2.690010055006802, "grad_norm": 0.2580655813217163, "learning_rate": 2.5726453304081823e-05, "loss": 0.0805, "step": 45480 }, { "epoch": 2.6906015259951497, "grad_norm": 0.23526498675346375, "learning_rate": 2.5724444485559676e-05, "loss": 0.0811, "step": 45490 }, { "epoch": 2.691192996983498, "grad_norm": 0.25317323207855225, "learning_rate": 2.5722435273484975e-05, "loss": 0.0703, "step": 45500 }, { "epoch": 2.691784467971846, "grad_norm": 0.19459182024002075, "learning_rate": 2.5720425667931443e-05, "loss": 0.0633, "step": 45510 }, { "epoch": 2.6923759389601942, "grad_norm": 0.2925008237361908, "learning_rate": 2.5718415668972824e-05, "loss": 0.079, "step": 45520 }, { "epoch": 2.692967409948542, "grad_norm": 0.26565900444984436, "learning_rate": 2.5716405276682886e-05, "loss": 0.0808, "step": 45530 }, { "epoch": 2.69355888093689, "grad_norm": 0.44790589809417725, "learning_rate": 2.5714394491135398e-05, "loss": 0.0854, "step": 45540 }, { "epoch": 2.694150351925238, "grad_norm": 0.17854736745357513, "learning_rate": 2.5712383312404157e-05, "loss": 0.0714, "step": 45550 }, { "epoch": 2.694741822913586, "grad_norm": 0.27532994747161865, "learning_rate": 2.571037174056296e-05, "loss": 0.0577, "step": 45560 }, { "epoch": 2.695333293901934, "grad_norm": 0.18220485746860504, "learning_rate": 2.5708359775685625e-05, "loss": 0.0758, "step": 45570 }, { "epoch": 2.695924764890282, "grad_norm": 0.289846271276474, "learning_rate": 2.5706347417845985e-05, "loss": 0.0792, "step": 45580 }, { "epoch": 2.6965162358786303, "grad_norm": 0.3699118494987488, "learning_rate": 2.570433466711789e-05, "loss": 0.0777, "step": 45590 }, { "epoch": 2.697107706866978, "grad_norm": 0.2821970582008362, "learning_rate": 2.57023215235752e-05, "loss": 0.0711, "step": 45600 }, { "epoch": 2.697699177855326, "grad_norm": 0.32123398780822754, "learning_rate": 2.5700307987291792e-05, "loss": 0.0689, "step": 45610 }, { "epoch": 2.698290648843674, "grad_norm": 0.2721620798110962, "learning_rate": 2.569829405834155e-05, "loss": 0.0781, "step": 45620 }, { "epoch": 2.6988821198320223, "grad_norm": 0.22089920938014984, "learning_rate": 2.5696279736798395e-05, "loss": 0.0774, "step": 45630 }, { "epoch": 2.69947359082037, "grad_norm": 0.24699769914150238, "learning_rate": 2.5694265022736224e-05, "loss": 0.0668, "step": 45640 }, { "epoch": 2.7000650618087185, "grad_norm": 0.3601049780845642, "learning_rate": 2.5692249916228988e-05, "loss": 0.0808, "step": 45650 }, { "epoch": 2.7006565327970664, "grad_norm": 0.2532736361026764, "learning_rate": 2.5690234417350626e-05, "loss": 0.0727, "step": 45660 }, { "epoch": 2.7012480037854143, "grad_norm": 0.24696844816207886, "learning_rate": 2.56882185261751e-05, "loss": 0.0669, "step": 45670 }, { "epoch": 2.701839474773762, "grad_norm": 0.2549728453159332, "learning_rate": 2.5686202242776394e-05, "loss": 0.0829, "step": 45680 }, { "epoch": 2.7024309457621105, "grad_norm": 0.27340835332870483, "learning_rate": 2.5684185567228494e-05, "loss": 0.0787, "step": 45690 }, { "epoch": 2.7030224167504584, "grad_norm": 0.20491476356983185, "learning_rate": 2.5682168499605405e-05, "loss": 0.0747, "step": 45700 }, { "epoch": 2.7036138877388063, "grad_norm": 0.3124412000179291, "learning_rate": 2.5680151039981146e-05, "loss": 0.0734, "step": 45710 }, { "epoch": 2.7042053587271546, "grad_norm": 0.3016975522041321, "learning_rate": 2.5678133188429754e-05, "loss": 0.0741, "step": 45720 }, { "epoch": 2.7047968297155025, "grad_norm": 0.19618210196495056, "learning_rate": 2.567611494502528e-05, "loss": 0.0811, "step": 45730 }, { "epoch": 2.7053883007038504, "grad_norm": 0.2697261571884155, "learning_rate": 2.567409630984178e-05, "loss": 0.0795, "step": 45740 }, { "epoch": 2.7059797716921983, "grad_norm": 0.4369853734970093, "learning_rate": 2.5672077282953336e-05, "loss": 0.0813, "step": 45750 }, { "epoch": 2.7065712426805466, "grad_norm": 0.39208826422691345, "learning_rate": 2.5670057864434042e-05, "loss": 0.0632, "step": 45760 }, { "epoch": 2.7071627136688945, "grad_norm": 0.29478225111961365, "learning_rate": 2.5668038054357997e-05, "loss": 0.07, "step": 45770 }, { "epoch": 2.707754184657243, "grad_norm": 0.28630033135414124, "learning_rate": 2.566601785279933e-05, "loss": 0.087, "step": 45780 }, { "epoch": 2.7083456556455907, "grad_norm": 0.2878042757511139, "learning_rate": 2.5663997259832166e-05, "loss": 0.0907, "step": 45790 }, { "epoch": 2.7089371266339386, "grad_norm": 0.17714187502861023, "learning_rate": 2.5661976275530663e-05, "loss": 0.068, "step": 45800 }, { "epoch": 2.7095285976222865, "grad_norm": 0.17311742901802063, "learning_rate": 2.5659954899968982e-05, "loss": 0.0571, "step": 45810 }, { "epoch": 2.710120068610635, "grad_norm": 0.21206505596637726, "learning_rate": 2.5657933133221302e-05, "loss": 0.0793, "step": 45820 }, { "epoch": 2.7107115395989827, "grad_norm": 0.2341279536485672, "learning_rate": 2.565591097536181e-05, "loss": 0.0817, "step": 45830 }, { "epoch": 2.7113030105873306, "grad_norm": 0.2527892291545868, "learning_rate": 2.565388842646472e-05, "loss": 0.089, "step": 45840 }, { "epoch": 2.711894481575679, "grad_norm": 0.17729325592517853, "learning_rate": 2.5651865486604245e-05, "loss": 0.0735, "step": 45850 }, { "epoch": 2.712485952564027, "grad_norm": 0.47951018810272217, "learning_rate": 2.564984215585463e-05, "loss": 0.0726, "step": 45860 }, { "epoch": 2.7130774235523747, "grad_norm": 0.2759694457054138, "learning_rate": 2.5647818434290117e-05, "loss": 0.0763, "step": 45870 }, { "epoch": 2.7136688945407226, "grad_norm": 0.26548048853874207, "learning_rate": 2.5645794321984973e-05, "loss": 0.0906, "step": 45880 }, { "epoch": 2.714260365529071, "grad_norm": 0.19597193598747253, "learning_rate": 2.5643769819013475e-05, "loss": 0.077, "step": 45890 }, { "epoch": 2.714851836517419, "grad_norm": 0.20652413368225098, "learning_rate": 2.564174492544992e-05, "loss": 0.0692, "step": 45900 }, { "epoch": 2.7154433075057667, "grad_norm": 0.21854528784751892, "learning_rate": 2.5639719641368607e-05, "loss": 0.0667, "step": 45910 }, { "epoch": 2.716034778494115, "grad_norm": 0.3047170042991638, "learning_rate": 2.5637693966843865e-05, "loss": 0.081, "step": 45920 }, { "epoch": 2.716626249482463, "grad_norm": 0.21573135256767273, "learning_rate": 2.563566790195003e-05, "loss": 0.0778, "step": 45930 }, { "epoch": 2.717217720470811, "grad_norm": 0.22410711646080017, "learning_rate": 2.5633641446761443e-05, "loss": 0.0648, "step": 45940 }, { "epoch": 2.7178091914591587, "grad_norm": 0.206963449716568, "learning_rate": 2.5631614601352476e-05, "loss": 0.072, "step": 45950 }, { "epoch": 2.718400662447507, "grad_norm": 0.23753273487091064, "learning_rate": 2.5629587365797508e-05, "loss": 0.0622, "step": 45960 }, { "epoch": 2.718992133435855, "grad_norm": 0.3965107500553131, "learning_rate": 2.5627559740170928e-05, "loss": 0.0869, "step": 45970 }, { "epoch": 2.719583604424203, "grad_norm": 0.19314508140087128, "learning_rate": 2.562553172454715e-05, "loss": 0.08, "step": 45980 }, { "epoch": 2.720175075412551, "grad_norm": 0.2955799698829651, "learning_rate": 2.5623503319000588e-05, "loss": 0.0792, "step": 45990 }, { "epoch": 2.720766546400899, "grad_norm": 0.21426241099834442, "learning_rate": 2.562147452360568e-05, "loss": 0.0732, "step": 46000 }, { "epoch": 2.721358017389247, "grad_norm": 0.2433563470840454, "learning_rate": 2.5619445338436877e-05, "loss": 0.0588, "step": 46010 }, { "epoch": 2.721949488377595, "grad_norm": 0.21991248428821564, "learning_rate": 2.5617415763568642e-05, "loss": 0.0691, "step": 46020 }, { "epoch": 2.722540959365943, "grad_norm": 0.2816930413246155, "learning_rate": 2.5615385799075464e-05, "loss": 0.0854, "step": 46030 }, { "epoch": 2.723132430354291, "grad_norm": 0.23984229564666748, "learning_rate": 2.561335544503182e-05, "loss": 0.0913, "step": 46040 }, { "epoch": 2.7237239013426393, "grad_norm": 0.2323431372642517, "learning_rate": 2.561132470151223e-05, "loss": 0.0729, "step": 46050 }, { "epoch": 2.724315372330987, "grad_norm": 0.2679961025714874, "learning_rate": 2.5609293568591208e-05, "loss": 0.0629, "step": 46060 }, { "epoch": 2.724906843319335, "grad_norm": 0.3247837424278259, "learning_rate": 2.5607262046343294e-05, "loss": 0.0807, "step": 46070 }, { "epoch": 2.725498314307683, "grad_norm": 0.39570382237434387, "learning_rate": 2.5605230134843033e-05, "loss": 0.0809, "step": 46080 }, { "epoch": 2.7260897852960313, "grad_norm": 0.1917399764060974, "learning_rate": 2.5603197834165003e-05, "loss": 0.0759, "step": 46090 }, { "epoch": 2.726681256284379, "grad_norm": 0.2802713215351105, "learning_rate": 2.5601165144383768e-05, "loss": 0.0671, "step": 46100 }, { "epoch": 2.7272727272727275, "grad_norm": 0.2217477262020111, "learning_rate": 2.5599132065573928e-05, "loss": 0.0646, "step": 46110 }, { "epoch": 2.7278641982610754, "grad_norm": 0.2074282318353653, "learning_rate": 2.5597098597810088e-05, "loss": 0.0733, "step": 46120 }, { "epoch": 2.7284556692494233, "grad_norm": 0.2055998146533966, "learning_rate": 2.5595064741166873e-05, "loss": 0.0743, "step": 46130 }, { "epoch": 2.729047140237771, "grad_norm": 0.18626992404460907, "learning_rate": 2.5593030495718916e-05, "loss": 0.0836, "step": 46140 }, { "epoch": 2.7296386112261195, "grad_norm": 0.23525382578372955, "learning_rate": 2.5590995861540872e-05, "loss": 0.074, "step": 46150 }, { "epoch": 2.7302300822144674, "grad_norm": 0.2552281320095062, "learning_rate": 2.5588960838707402e-05, "loss": 0.0667, "step": 46160 }, { "epoch": 2.7308215532028153, "grad_norm": 0.4027305245399475, "learning_rate": 2.558692542729318e-05, "loss": 0.0694, "step": 46170 }, { "epoch": 2.7314130241911636, "grad_norm": 0.24021759629249573, "learning_rate": 2.558488962737291e-05, "loss": 0.0875, "step": 46180 }, { "epoch": 2.7320044951795115, "grad_norm": 0.21888771653175354, "learning_rate": 2.5582853439021286e-05, "loss": 0.0867, "step": 46190 }, { "epoch": 2.7325959661678594, "grad_norm": 0.2895667254924774, "learning_rate": 2.558081686231304e-05, "loss": 0.0727, "step": 46200 }, { "epoch": 2.7331874371562073, "grad_norm": 0.3527485430240631, "learning_rate": 2.5578779897322905e-05, "loss": 0.0605, "step": 46210 }, { "epoch": 2.7337789081445556, "grad_norm": 0.3040667772293091, "learning_rate": 2.557674254412563e-05, "loss": 0.0712, "step": 46220 }, { "epoch": 2.7343703791329035, "grad_norm": 0.261437326669693, "learning_rate": 2.557470480279598e-05, "loss": 0.0859, "step": 46230 }, { "epoch": 2.734961850121252, "grad_norm": 0.23257125914096832, "learning_rate": 2.5572666673408734e-05, "loss": 0.0666, "step": 46240 }, { "epoch": 2.7355533211095997, "grad_norm": 0.18090975284576416, "learning_rate": 2.5570628156038687e-05, "loss": 0.0742, "step": 46250 }, { "epoch": 2.7361447920979476, "grad_norm": 0.25055837631225586, "learning_rate": 2.5568589250760636e-05, "loss": 0.0695, "step": 46260 }, { "epoch": 2.7367362630862955, "grad_norm": 0.315912663936615, "learning_rate": 2.556654995764941e-05, "loss": 0.0867, "step": 46270 }, { "epoch": 2.737327734074644, "grad_norm": 0.2673095762729645, "learning_rate": 2.5564510276779846e-05, "loss": 0.0988, "step": 46280 }, { "epoch": 2.7379192050629917, "grad_norm": 0.22399145364761353, "learning_rate": 2.5562470208226796e-05, "loss": 0.0826, "step": 46290 }, { "epoch": 2.7385106760513396, "grad_norm": 0.19677536189556122, "learning_rate": 2.5560429752065117e-05, "loss": 0.0752, "step": 46300 }, { "epoch": 2.739102147039688, "grad_norm": 0.1760648638010025, "learning_rate": 2.5558388908369686e-05, "loss": 0.0685, "step": 46310 }, { "epoch": 2.739693618028036, "grad_norm": 0.27788594365119934, "learning_rate": 2.5556347677215404e-05, "loss": 0.0795, "step": 46320 }, { "epoch": 2.7402850890163837, "grad_norm": 0.22467556595802307, "learning_rate": 2.5554306058677174e-05, "loss": 0.0912, "step": 46330 }, { "epoch": 2.7408765600047316, "grad_norm": 0.18720953166484833, "learning_rate": 2.5552264052829907e-05, "loss": 0.0766, "step": 46340 }, { "epoch": 2.74146803099308, "grad_norm": 0.2377718687057495, "learning_rate": 2.5550221659748554e-05, "loss": 0.078, "step": 46350 }, { "epoch": 2.742059501981428, "grad_norm": 0.18679428100585938, "learning_rate": 2.5548178879508055e-05, "loss": 0.0637, "step": 46360 }, { "epoch": 2.7426509729697757, "grad_norm": 0.40754860639572144, "learning_rate": 2.5546135712183373e-05, "loss": 0.0804, "step": 46370 }, { "epoch": 2.743242443958124, "grad_norm": 0.28740358352661133, "learning_rate": 2.554409215784949e-05, "loss": 0.0935, "step": 46380 }, { "epoch": 2.743833914946472, "grad_norm": 0.18045496940612793, "learning_rate": 2.5542048216581393e-05, "loss": 0.0791, "step": 46390 }, { "epoch": 2.7444253859348198, "grad_norm": 0.19612689316272736, "learning_rate": 2.5540003888454093e-05, "loss": 0.0907, "step": 46400 }, { "epoch": 2.7450168569231677, "grad_norm": 0.20709101855754852, "learning_rate": 2.553795917354261e-05, "loss": 0.0697, "step": 46410 }, { "epoch": 2.745608327911516, "grad_norm": 0.3146284520626068, "learning_rate": 2.5535914071921976e-05, "loss": 0.0792, "step": 46420 }, { "epoch": 2.746199798899864, "grad_norm": 0.18816989660263062, "learning_rate": 2.5533868583667238e-05, "loss": 0.0843, "step": 46430 }, { "epoch": 2.746791269888212, "grad_norm": 0.2321249544620514, "learning_rate": 2.5531822708853463e-05, "loss": 0.0743, "step": 46440 }, { "epoch": 2.74738274087656, "grad_norm": 0.2432987540960312, "learning_rate": 2.5529776447555724e-05, "loss": 0.0797, "step": 46450 }, { "epoch": 2.747974211864908, "grad_norm": 0.18266639113426208, "learning_rate": 2.5527729799849118e-05, "loss": 0.0643, "step": 46460 }, { "epoch": 2.748565682853256, "grad_norm": 0.20835937559604645, "learning_rate": 2.552568276580874e-05, "loss": 0.0736, "step": 46470 }, { "epoch": 2.749157153841604, "grad_norm": 0.20955325663089752, "learning_rate": 2.5523635345509723e-05, "loss": 0.0851, "step": 46480 }, { "epoch": 2.749748624829952, "grad_norm": 0.5501386523246765, "learning_rate": 2.552158753902719e-05, "loss": 0.0778, "step": 46490 }, { "epoch": 2.7503400958183, "grad_norm": 0.3308993875980377, "learning_rate": 2.5519539346436294e-05, "loss": 0.0721, "step": 46500 }, { "epoch": 2.7509315668066483, "grad_norm": 0.20173344016075134, "learning_rate": 2.5517490767812195e-05, "loss": 0.066, "step": 46510 }, { "epoch": 2.751523037794996, "grad_norm": 0.3048115372657776, "learning_rate": 2.5515441803230077e-05, "loss": 0.0668, "step": 46520 }, { "epoch": 2.752114508783344, "grad_norm": 0.24563106894493103, "learning_rate": 2.5513392452765114e-05, "loss": 0.0759, "step": 46530 }, { "epoch": 2.752705979771692, "grad_norm": 0.5661080479621887, "learning_rate": 2.551134271649252e-05, "loss": 0.0743, "step": 46540 }, { "epoch": 2.7532974507600403, "grad_norm": 0.22711612284183502, "learning_rate": 2.550929259448752e-05, "loss": 0.0615, "step": 46550 }, { "epoch": 2.753888921748388, "grad_norm": 0.1632077395915985, "learning_rate": 2.550724208682534e-05, "loss": 0.0606, "step": 46560 }, { "epoch": 2.7544803927367365, "grad_norm": 0.20458900928497314, "learning_rate": 2.5505191193581225e-05, "loss": 0.0713, "step": 46570 }, { "epoch": 2.7550718637250844, "grad_norm": 0.29493069648742676, "learning_rate": 2.5503139914830437e-05, "loss": 0.0781, "step": 46580 }, { "epoch": 2.7556633347134323, "grad_norm": 0.2443927377462387, "learning_rate": 2.5501088250648256e-05, "loss": 0.074, "step": 46590 }, { "epoch": 2.75625480570178, "grad_norm": 0.271885484457016, "learning_rate": 2.5499036201109965e-05, "loss": 0.0772, "step": 46600 }, { "epoch": 2.7568462766901285, "grad_norm": 0.1509919911623001, "learning_rate": 2.549698376629088e-05, "loss": 0.0673, "step": 46610 }, { "epoch": 2.7574377476784764, "grad_norm": 0.25114431977272034, "learning_rate": 2.5494930946266304e-05, "loss": 0.0761, "step": 46620 }, { "epoch": 2.7580292186668243, "grad_norm": 0.3342428505420685, "learning_rate": 2.5492877741111574e-05, "loss": 0.0869, "step": 46630 }, { "epoch": 2.7586206896551726, "grad_norm": 0.35655277967453003, "learning_rate": 2.5490824150902042e-05, "loss": 0.0826, "step": 46640 }, { "epoch": 2.7592121606435205, "grad_norm": 0.39825716614723206, "learning_rate": 2.5488770175713055e-05, "loss": 0.0691, "step": 46650 }, { "epoch": 2.7598036316318684, "grad_norm": 0.2005738914012909, "learning_rate": 2.548671581562e-05, "loss": 0.0618, "step": 46660 }, { "epoch": 2.7603951026202163, "grad_norm": 0.28669610619544983, "learning_rate": 2.5484661070698262e-05, "loss": 0.0764, "step": 46670 }, { "epoch": 2.7609865736085646, "grad_norm": 0.3196412920951843, "learning_rate": 2.548260594102324e-05, "loss": 0.0796, "step": 46680 }, { "epoch": 2.7615780445969125, "grad_norm": 0.20503279566764832, "learning_rate": 2.548055042667035e-05, "loss": 0.0839, "step": 46690 }, { "epoch": 2.762169515585261, "grad_norm": 0.19036805629730225, "learning_rate": 2.547849452771503e-05, "loss": 0.0738, "step": 46700 }, { "epoch": 2.7627609865736087, "grad_norm": 0.2564232647418976, "learning_rate": 2.5476438244232724e-05, "loss": 0.0528, "step": 46710 }, { "epoch": 2.7633524575619566, "grad_norm": 0.30253106355667114, "learning_rate": 2.547438157629888e-05, "loss": 0.0729, "step": 46720 }, { "epoch": 2.7639439285503045, "grad_norm": 0.27851033210754395, "learning_rate": 2.547232452398898e-05, "loss": 0.0875, "step": 46730 }, { "epoch": 2.764535399538653, "grad_norm": 0.22970053553581238, "learning_rate": 2.5470267087378515e-05, "loss": 0.0917, "step": 46740 }, { "epoch": 2.7651268705270007, "grad_norm": 0.1787286251783371, "learning_rate": 2.5468209266542975e-05, "loss": 0.076, "step": 46750 }, { "epoch": 2.7657183415153486, "grad_norm": 0.18171299993991852, "learning_rate": 2.5466151061557882e-05, "loss": 0.0694, "step": 46760 }, { "epoch": 2.766309812503697, "grad_norm": 0.27388012409210205, "learning_rate": 2.5464092472498767e-05, "loss": 0.0766, "step": 46770 }, { "epoch": 2.766901283492045, "grad_norm": 0.24683816730976105, "learning_rate": 2.546203349944117e-05, "loss": 0.081, "step": 46780 }, { "epoch": 2.7674927544803927, "grad_norm": 0.2775369882583618, "learning_rate": 2.545997414246065e-05, "loss": 0.077, "step": 46790 }, { "epoch": 2.7680842254687406, "grad_norm": 0.4035634696483612, "learning_rate": 2.5457914401632782e-05, "loss": 0.0648, "step": 46800 }, { "epoch": 2.768675696457089, "grad_norm": 0.3846067786216736, "learning_rate": 2.545585427703315e-05, "loss": 0.0562, "step": 46810 }, { "epoch": 2.7692671674454368, "grad_norm": 0.28463953733444214, "learning_rate": 2.545379376873735e-05, "loss": 0.0757, "step": 46820 }, { "epoch": 2.7698586384337847, "grad_norm": 0.9917224645614624, "learning_rate": 2.5451732876820995e-05, "loss": 0.0817, "step": 46830 }, { "epoch": 2.770450109422133, "grad_norm": 0.27432167530059814, "learning_rate": 2.544967160135972e-05, "loss": 0.0762, "step": 46840 }, { "epoch": 2.771041580410481, "grad_norm": 0.23067840933799744, "learning_rate": 2.5447609942429167e-05, "loss": 0.0771, "step": 46850 }, { "epoch": 2.7716330513988288, "grad_norm": 0.23156629502773285, "learning_rate": 2.5445547900104987e-05, "loss": 0.0792, "step": 46860 }, { "epoch": 2.7722245223871766, "grad_norm": 0.2718466818332672, "learning_rate": 2.5443485474462853e-05, "loss": 0.0903, "step": 46870 }, { "epoch": 2.772815993375525, "grad_norm": 0.3043084144592285, "learning_rate": 2.5441422665578446e-05, "loss": 0.0851, "step": 46880 }, { "epoch": 2.773407464363873, "grad_norm": 0.2339135706424713, "learning_rate": 2.5439359473527467e-05, "loss": 0.0823, "step": 46890 }, { "epoch": 2.773998935352221, "grad_norm": 0.2362414002418518, "learning_rate": 2.5437295898385632e-05, "loss": 0.0675, "step": 46900 }, { "epoch": 2.774590406340569, "grad_norm": 0.16230733692646027, "learning_rate": 2.5435231940228667e-05, "loss": 0.0586, "step": 46910 }, { "epoch": 2.775181877328917, "grad_norm": 0.4054580330848694, "learning_rate": 2.5433167599132305e-05, "loss": 0.0815, "step": 46920 }, { "epoch": 2.775773348317265, "grad_norm": 0.2600560188293457, "learning_rate": 2.5431102875172307e-05, "loss": 0.0773, "step": 46930 }, { "epoch": 2.776364819305613, "grad_norm": 0.2506859600543976, "learning_rate": 2.542903776842444e-05, "loss": 0.0765, "step": 46940 }, { "epoch": 2.776956290293961, "grad_norm": 0.3317354917526245, "learning_rate": 2.542697227896449e-05, "loss": 0.0742, "step": 46950 }, { "epoch": 2.777547761282309, "grad_norm": 0.26621899008750916, "learning_rate": 2.5424906406868245e-05, "loss": 0.0673, "step": 46960 }, { "epoch": 2.7781392322706573, "grad_norm": 0.2704770863056183, "learning_rate": 2.542284015221153e-05, "loss": 0.0928, "step": 46970 }, { "epoch": 2.778730703259005, "grad_norm": 0.2402925044298172, "learning_rate": 2.542077351507016e-05, "loss": 0.0871, "step": 46980 }, { "epoch": 2.779322174247353, "grad_norm": 0.26857930421829224, "learning_rate": 2.5418706495519974e-05, "loss": 0.0751, "step": 46990 }, { "epoch": 2.779913645235701, "grad_norm": 0.202592134475708, "learning_rate": 2.5416639093636826e-05, "loss": 0.0702, "step": 47000 }, { "epoch": 2.7805051162240493, "grad_norm": 0.15842676162719727, "learning_rate": 2.541457130949658e-05, "loss": 0.0526, "step": 47010 }, { "epoch": 2.781096587212397, "grad_norm": 0.2404882162809372, "learning_rate": 2.5412503143175124e-05, "loss": 0.0746, "step": 47020 }, { "epoch": 2.7816880582007455, "grad_norm": 0.46227994561195374, "learning_rate": 2.5410434594748347e-05, "loss": 0.076, "step": 47030 }, { "epoch": 2.7822795291890934, "grad_norm": 0.29190516471862793, "learning_rate": 2.5408365664292165e-05, "loss": 0.0782, "step": 47040 }, { "epoch": 2.7828710001774413, "grad_norm": 0.28898969292640686, "learning_rate": 2.5406296351882498e-05, "loss": 0.0602, "step": 47050 }, { "epoch": 2.783462471165789, "grad_norm": 0.19607481360435486, "learning_rate": 2.540422665759528e-05, "loss": 0.0604, "step": 47060 }, { "epoch": 2.7840539421541375, "grad_norm": 0.28793570399284363, "learning_rate": 2.540215658150646e-05, "loss": 0.074, "step": 47070 }, { "epoch": 2.7846454131424854, "grad_norm": 0.31041979789733887, "learning_rate": 2.5400086123692014e-05, "loss": 0.0848, "step": 47080 }, { "epoch": 2.7852368841308333, "grad_norm": 0.232753187417984, "learning_rate": 2.539801528422791e-05, "loss": 0.0828, "step": 47090 }, { "epoch": 2.7858283551191816, "grad_norm": 0.2248464822769165, "learning_rate": 2.539594406319015e-05, "loss": 0.0816, "step": 47100 }, { "epoch": 2.7864198261075295, "grad_norm": 0.24645383656024933, "learning_rate": 2.539387246065473e-05, "loss": 0.0738, "step": 47110 }, { "epoch": 2.7870112970958774, "grad_norm": 0.2641263008117676, "learning_rate": 2.539180047669768e-05, "loss": 0.0752, "step": 47120 }, { "epoch": 2.7876027680842252, "grad_norm": 0.24658380448818207, "learning_rate": 2.5389728111395036e-05, "loss": 0.0848, "step": 47130 }, { "epoch": 2.7881942390725736, "grad_norm": 0.32730910181999207, "learning_rate": 2.5387655364822838e-05, "loss": 0.0788, "step": 47140 }, { "epoch": 2.7887857100609215, "grad_norm": 0.1992596983909607, "learning_rate": 2.538558223705716e-05, "loss": 0.078, "step": 47150 }, { "epoch": 2.78937718104927, "grad_norm": 0.3293907642364502, "learning_rate": 2.5383508728174073e-05, "loss": 0.0643, "step": 47160 }, { "epoch": 2.7899686520376177, "grad_norm": 0.25771158933639526, "learning_rate": 2.538143483824967e-05, "loss": 0.0728, "step": 47170 }, { "epoch": 2.7905601230259656, "grad_norm": 0.3015732765197754, "learning_rate": 2.5379360567360053e-05, "loss": 0.0868, "step": 47180 }, { "epoch": 2.7911515940143135, "grad_norm": 0.23222659528255463, "learning_rate": 2.5377285915581346e-05, "loss": 0.0716, "step": 47190 }, { "epoch": 2.791743065002662, "grad_norm": 0.21951580047607422, "learning_rate": 2.537521088298968e-05, "loss": 0.0701, "step": 47200 }, { "epoch": 2.7923345359910097, "grad_norm": 0.23532342910766602, "learning_rate": 2.53731354696612e-05, "loss": 0.0777, "step": 47210 }, { "epoch": 2.7929260069793576, "grad_norm": 0.29383617639541626, "learning_rate": 2.5371059675672066e-05, "loss": 0.0703, "step": 47220 }, { "epoch": 2.793517477967706, "grad_norm": 0.30154547095298767, "learning_rate": 2.5368983501098458e-05, "loss": 0.0735, "step": 47230 }, { "epoch": 2.7941089489560538, "grad_norm": 0.2864249646663666, "learning_rate": 2.5366906946016563e-05, "loss": 0.0887, "step": 47240 }, { "epoch": 2.7947004199444017, "grad_norm": 0.246367946267128, "learning_rate": 2.536483001050258e-05, "loss": 0.0712, "step": 47250 }, { "epoch": 2.7952918909327495, "grad_norm": 0.16797472536563873, "learning_rate": 2.5362752694632732e-05, "loss": 0.073, "step": 47260 }, { "epoch": 2.795883361921098, "grad_norm": 0.35722190141677856, "learning_rate": 2.5360674998483246e-05, "loss": 0.0873, "step": 47270 }, { "epoch": 2.7964748329094458, "grad_norm": 0.3298204243183136, "learning_rate": 2.535859692213037e-05, "loss": 0.09, "step": 47280 }, { "epoch": 2.7970663038977936, "grad_norm": 0.41503778100013733, "learning_rate": 2.5356518465650355e-05, "loss": 0.084, "step": 47290 }, { "epoch": 2.797657774886142, "grad_norm": 0.23741711676120758, "learning_rate": 2.5354439629119484e-05, "loss": 0.0812, "step": 47300 }, { "epoch": 2.79824924587449, "grad_norm": 0.17608273029327393, "learning_rate": 2.5352360412614037e-05, "loss": 0.0625, "step": 47310 }, { "epoch": 2.7988407168628378, "grad_norm": 0.3060406744480133, "learning_rate": 2.535028081621031e-05, "loss": 0.0885, "step": 47320 }, { "epoch": 2.7994321878511856, "grad_norm": 0.22165516018867493, "learning_rate": 2.5348200839984636e-05, "loss": 0.0845, "step": 47330 }, { "epoch": 2.800023658839534, "grad_norm": 0.2447560429573059, "learning_rate": 2.5346120484013324e-05, "loss": 0.0712, "step": 47340 }, { "epoch": 2.800615129827882, "grad_norm": 0.27404630184173584, "learning_rate": 2.534403974837272e-05, "loss": 0.0694, "step": 47350 }, { "epoch": 2.80120660081623, "grad_norm": 0.25356525182724, "learning_rate": 2.5341958633139192e-05, "loss": 0.0669, "step": 47360 }, { "epoch": 2.801798071804578, "grad_norm": 0.24101147055625916, "learning_rate": 2.5339877138389095e-05, "loss": 0.0764, "step": 47370 }, { "epoch": 2.802389542792926, "grad_norm": 0.30216100811958313, "learning_rate": 2.5337795264198822e-05, "loss": 0.0958, "step": 47380 }, { "epoch": 2.802981013781274, "grad_norm": 0.24021676182746887, "learning_rate": 2.5335713010644772e-05, "loss": 0.0799, "step": 47390 }, { "epoch": 2.803572484769622, "grad_norm": 0.1831078976392746, "learning_rate": 2.5333630377803352e-05, "loss": 0.0744, "step": 47400 }, { "epoch": 2.80416395575797, "grad_norm": 0.18344448506832123, "learning_rate": 2.5331547365750986e-05, "loss": 0.0685, "step": 47410 }, { "epoch": 2.804755426746318, "grad_norm": 0.3441561460494995, "learning_rate": 2.5329463974564126e-05, "loss": 0.0741, "step": 47420 }, { "epoch": 2.8053468977346663, "grad_norm": 0.2571943700313568, "learning_rate": 2.532738020431921e-05, "loss": 0.081, "step": 47430 }, { "epoch": 2.805938368723014, "grad_norm": 0.27553999423980713, "learning_rate": 2.5325296055092716e-05, "loss": 0.0663, "step": 47440 }, { "epoch": 2.806529839711362, "grad_norm": 0.14605967700481415, "learning_rate": 2.5323211526961123e-05, "loss": 0.0656, "step": 47450 }, { "epoch": 2.80712131069971, "grad_norm": 0.22525426745414734, "learning_rate": 2.5321126620000926e-05, "loss": 0.0703, "step": 47460 }, { "epoch": 2.8077127816880583, "grad_norm": 0.2681531608104706, "learning_rate": 2.531904133428864e-05, "loss": 0.0691, "step": 47470 }, { "epoch": 2.808304252676406, "grad_norm": 0.3051014542579651, "learning_rate": 2.5316955669900776e-05, "loss": 0.0799, "step": 47480 }, { "epoch": 2.8088957236647545, "grad_norm": 0.22886180877685547, "learning_rate": 2.5314869626913884e-05, "loss": 0.07, "step": 47490 }, { "epoch": 2.8094871946531024, "grad_norm": 0.3060985803604126, "learning_rate": 2.5312783205404503e-05, "loss": 0.0749, "step": 47500 }, { "epoch": 2.8100786656414503, "grad_norm": 0.28952547907829285, "learning_rate": 2.5310696405449204e-05, "loss": 0.0655, "step": 47510 }, { "epoch": 2.810670136629798, "grad_norm": 0.22860831022262573, "learning_rate": 2.530860922712457e-05, "loss": 0.0674, "step": 47520 }, { "epoch": 2.8112616076181465, "grad_norm": 0.3372999131679535, "learning_rate": 2.530652167050719e-05, "loss": 0.0808, "step": 47530 }, { "epoch": 2.8118530786064944, "grad_norm": 0.3608091473579407, "learning_rate": 2.530443373567367e-05, "loss": 0.0791, "step": 47540 }, { "epoch": 2.8124445495948422, "grad_norm": 0.30017194151878357, "learning_rate": 2.5302345422700628e-05, "loss": 0.0704, "step": 47550 }, { "epoch": 2.8130360205831906, "grad_norm": 0.22282861173152924, "learning_rate": 2.5300256731664704e-05, "loss": 0.066, "step": 47560 }, { "epoch": 2.8136274915715385, "grad_norm": 0.22871609032154083, "learning_rate": 2.5298167662642542e-05, "loss": 0.0771, "step": 47570 }, { "epoch": 2.8142189625598864, "grad_norm": 0.2948770523071289, "learning_rate": 2.52960782157108e-05, "loss": 0.09, "step": 47580 }, { "epoch": 2.8148104335482342, "grad_norm": 0.2222415953874588, "learning_rate": 2.529398839094617e-05, "loss": 0.0796, "step": 47590 }, { "epoch": 2.8154019045365826, "grad_norm": 0.18343183398246765, "learning_rate": 2.529189818842532e-05, "loss": 0.0783, "step": 47600 }, { "epoch": 2.8159933755249305, "grad_norm": 0.3036390244960785, "learning_rate": 2.5289807608224976e-05, "loss": 0.0647, "step": 47610 }, { "epoch": 2.816584846513279, "grad_norm": 0.4154324233531952, "learning_rate": 2.528771665042184e-05, "loss": 0.0843, "step": 47620 }, { "epoch": 2.8171763175016267, "grad_norm": 0.29556775093078613, "learning_rate": 2.5285625315092647e-05, "loss": 0.0879, "step": 47630 }, { "epoch": 2.8177677884899746, "grad_norm": 0.3121691942214966, "learning_rate": 2.528353360231414e-05, "loss": 0.0881, "step": 47640 }, { "epoch": 2.8183592594783224, "grad_norm": 0.20391850173473358, "learning_rate": 2.528144151216309e-05, "loss": 0.0848, "step": 47650 }, { "epoch": 2.8189507304666708, "grad_norm": 0.23748892545700073, "learning_rate": 2.5279349044716253e-05, "loss": 0.0688, "step": 47660 }, { "epoch": 2.8195422014550187, "grad_norm": 0.21499799191951752, "learning_rate": 2.5277256200050428e-05, "loss": 0.0731, "step": 47670 }, { "epoch": 2.8201336724433665, "grad_norm": 0.2863518297672272, "learning_rate": 2.527516297824241e-05, "loss": 0.0843, "step": 47680 }, { "epoch": 2.820725143431715, "grad_norm": 0.22892868518829346, "learning_rate": 2.5273069379369017e-05, "loss": 0.081, "step": 47690 }, { "epoch": 2.8213166144200628, "grad_norm": 0.21844562888145447, "learning_rate": 2.5270975403507073e-05, "loss": 0.0719, "step": 47700 }, { "epoch": 2.8219080854084106, "grad_norm": 0.18262428045272827, "learning_rate": 2.5268881050733424e-05, "loss": 0.0638, "step": 47710 }, { "epoch": 2.8224995563967585, "grad_norm": 0.42386218905448914, "learning_rate": 2.5266786321124927e-05, "loss": 0.0772, "step": 47720 }, { "epoch": 2.823091027385107, "grad_norm": 0.23881395161151886, "learning_rate": 2.526469121475845e-05, "loss": 0.0839, "step": 47730 }, { "epoch": 2.8236824983734548, "grad_norm": 0.2339208722114563, "learning_rate": 2.5262595731710872e-05, "loss": 0.075, "step": 47740 }, { "epoch": 2.8242739693618026, "grad_norm": 0.1339050531387329, "learning_rate": 2.52604998720591e-05, "loss": 0.0701, "step": 47750 }, { "epoch": 2.824865440350151, "grad_norm": 0.2520085275173187, "learning_rate": 2.5258403635880038e-05, "loss": 0.0686, "step": 47760 }, { "epoch": 2.825456911338499, "grad_norm": 0.21549415588378906, "learning_rate": 2.5256307023250607e-05, "loss": 0.0877, "step": 47770 }, { "epoch": 2.8260483823268467, "grad_norm": 0.3688305616378784, "learning_rate": 2.5254210034247758e-05, "loss": 0.0795, "step": 47780 }, { "epoch": 2.8266398533151946, "grad_norm": 0.24031813442707062, "learning_rate": 2.525211266894844e-05, "loss": 0.076, "step": 47790 }, { "epoch": 2.827231324303543, "grad_norm": 0.4106099009513855, "learning_rate": 2.525001492742961e-05, "loss": 0.0764, "step": 47800 }, { "epoch": 2.827822795291891, "grad_norm": 0.2039157897233963, "learning_rate": 2.524791680976826e-05, "loss": 0.0706, "step": 47810 }, { "epoch": 2.828414266280239, "grad_norm": 0.2772786319255829, "learning_rate": 2.524581831604138e-05, "loss": 0.0727, "step": 47820 }, { "epoch": 2.829005737268587, "grad_norm": 0.24255774915218353, "learning_rate": 2.524371944632598e-05, "loss": 0.085, "step": 47830 }, { "epoch": 2.829597208256935, "grad_norm": 0.16180072724819183, "learning_rate": 2.5241620200699077e-05, "loss": 0.0764, "step": 47840 }, { "epoch": 2.830188679245283, "grad_norm": 0.2625848948955536, "learning_rate": 2.523952057923771e-05, "loss": 0.07, "step": 47850 }, { "epoch": 2.830780150233631, "grad_norm": 0.19451937079429626, "learning_rate": 2.5237420582018925e-05, "loss": 0.075, "step": 47860 }, { "epoch": 2.831371621221979, "grad_norm": 0.20673932135105133, "learning_rate": 2.523532020911979e-05, "loss": 0.0737, "step": 47870 }, { "epoch": 2.831963092210327, "grad_norm": 0.43867287039756775, "learning_rate": 2.5233219460617377e-05, "loss": 0.078, "step": 47880 }, { "epoch": 2.8325545631986753, "grad_norm": 0.17412856221199036, "learning_rate": 2.523111833658878e-05, "loss": 0.0676, "step": 47890 }, { "epoch": 2.833146034187023, "grad_norm": 0.16312189400196075, "learning_rate": 2.5229016837111104e-05, "loss": 0.0742, "step": 47900 }, { "epoch": 2.833737505175371, "grad_norm": 1.0387628078460693, "learning_rate": 2.5226914962261465e-05, "loss": 0.0744, "step": 47910 }, { "epoch": 2.834328976163719, "grad_norm": 0.2762399911880493, "learning_rate": 2.5224812712117e-05, "loss": 0.0747, "step": 47920 }, { "epoch": 2.8349204471520673, "grad_norm": 0.30393821001052856, "learning_rate": 2.5222710086754848e-05, "loss": 0.0885, "step": 47930 }, { "epoch": 2.835511918140415, "grad_norm": 0.25632694363594055, "learning_rate": 2.5220607086252165e-05, "loss": 0.0758, "step": 47940 }, { "epoch": 2.8361033891287635, "grad_norm": 0.26205313205718994, "learning_rate": 2.5218503710686143e-05, "loss": 0.0764, "step": 47950 }, { "epoch": 2.8366948601171114, "grad_norm": 0.25642499327659607, "learning_rate": 2.521639996013395e-05, "loss": 0.0726, "step": 47960 }, { "epoch": 2.8372863311054592, "grad_norm": 0.2924336791038513, "learning_rate": 2.521429583467279e-05, "loss": 0.0797, "step": 47970 }, { "epoch": 2.837877802093807, "grad_norm": 0.2655504643917084, "learning_rate": 2.5212191334379887e-05, "loss": 0.0889, "step": 47980 }, { "epoch": 2.8384692730821555, "grad_norm": 0.18655462563037872, "learning_rate": 2.521008645933246e-05, "loss": 0.0715, "step": 47990 }, { "epoch": 2.8390607440705034, "grad_norm": 0.2720717787742615, "learning_rate": 2.520798120960776e-05, "loss": 0.0666, "step": 48000 }, { "epoch": 2.8396522150588512, "grad_norm": 0.1949460655450821, "learning_rate": 2.5205875585283033e-05, "loss": 0.0645, "step": 48010 }, { "epoch": 2.8402436860471996, "grad_norm": 0.22050270438194275, "learning_rate": 2.5203769586435556e-05, "loss": 0.0767, "step": 48020 }, { "epoch": 2.8408351570355475, "grad_norm": 0.20323626697063446, "learning_rate": 2.520166321314261e-05, "loss": 0.0758, "step": 48030 }, { "epoch": 2.8414266280238953, "grad_norm": 0.3502785563468933, "learning_rate": 2.5199556465481482e-05, "loss": 0.0815, "step": 48040 }, { "epoch": 2.8420180990122432, "grad_norm": 0.19688430428504944, "learning_rate": 2.5197449343529507e-05, "loss": 0.075, "step": 48050 }, { "epoch": 2.8426095700005916, "grad_norm": 0.4801066517829895, "learning_rate": 2.5195341847363986e-05, "loss": 0.0692, "step": 48060 }, { "epoch": 2.8432010409889394, "grad_norm": 0.39126378297805786, "learning_rate": 2.519323397706227e-05, "loss": 0.0789, "step": 48070 }, { "epoch": 2.8437925119772878, "grad_norm": 0.2631698548793793, "learning_rate": 2.5191125732701712e-05, "loss": 0.079, "step": 48080 }, { "epoch": 2.8443839829656357, "grad_norm": 0.27036115527153015, "learning_rate": 2.518901711435967e-05, "loss": 0.0742, "step": 48090 }, { "epoch": 2.8449754539539835, "grad_norm": 0.1683962345123291, "learning_rate": 2.518690812211352e-05, "loss": 0.0655, "step": 48100 }, { "epoch": 2.8455669249423314, "grad_norm": 0.21635784208774567, "learning_rate": 2.518479875604067e-05, "loss": 0.0678, "step": 48110 }, { "epoch": 2.8461583959306798, "grad_norm": 0.3262924551963806, "learning_rate": 2.5182689016218518e-05, "loss": 0.0764, "step": 48120 }, { "epoch": 2.8467498669190276, "grad_norm": 0.31572097539901733, "learning_rate": 2.5180578902724483e-05, "loss": 0.0807, "step": 48130 }, { "epoch": 2.8473413379073755, "grad_norm": 0.4038466513156891, "learning_rate": 2.5178468415636003e-05, "loss": 0.0787, "step": 48140 }, { "epoch": 2.847932808895724, "grad_norm": 0.3659064471721649, "learning_rate": 2.517635755503053e-05, "loss": 0.0801, "step": 48150 }, { "epoch": 2.8485242798840718, "grad_norm": 0.19580966234207153, "learning_rate": 2.5174246320985513e-05, "loss": 0.0736, "step": 48160 }, { "epoch": 2.8491157508724196, "grad_norm": 0.20497548580169678, "learning_rate": 2.5172134713578443e-05, "loss": 0.0766, "step": 48170 }, { "epoch": 2.8497072218607675, "grad_norm": 0.2933996617794037, "learning_rate": 2.51700227328868e-05, "loss": 0.0819, "step": 48180 }, { "epoch": 2.850298692849116, "grad_norm": 0.20137441158294678, "learning_rate": 2.516791037898808e-05, "loss": 0.0831, "step": 48190 }, { "epoch": 2.8508901638374637, "grad_norm": 0.24004969000816345, "learning_rate": 2.5165797651959817e-05, "loss": 0.0683, "step": 48200 }, { "epoch": 2.8514816348258116, "grad_norm": 0.2307700663805008, "learning_rate": 2.5163684551879533e-05, "loss": 0.0677, "step": 48210 }, { "epoch": 2.85207310581416, "grad_norm": 0.3239348828792572, "learning_rate": 2.5161571078824768e-05, "loss": 0.0714, "step": 48220 }, { "epoch": 2.852664576802508, "grad_norm": 0.27984994649887085, "learning_rate": 2.5159457232873085e-05, "loss": 0.0842, "step": 48230 }, { "epoch": 2.8532560477908557, "grad_norm": 0.2402133345603943, "learning_rate": 2.515734301410205e-05, "loss": 0.0841, "step": 48240 }, { "epoch": 2.8538475187792036, "grad_norm": 0.2864617705345154, "learning_rate": 2.5155228422589253e-05, "loss": 0.0796, "step": 48250 }, { "epoch": 2.854438989767552, "grad_norm": 0.24144382774829865, "learning_rate": 2.5153113458412287e-05, "loss": 0.0643, "step": 48260 }, { "epoch": 2.8550304607559, "grad_norm": 0.1777723729610443, "learning_rate": 2.5150998121648776e-05, "loss": 0.0856, "step": 48270 }, { "epoch": 2.855621931744248, "grad_norm": 0.18647298216819763, "learning_rate": 2.5148882412376333e-05, "loss": 0.0831, "step": 48280 }, { "epoch": 2.856213402732596, "grad_norm": 0.17385172843933105, "learning_rate": 2.5146766330672608e-05, "loss": 0.066, "step": 48290 }, { "epoch": 2.856804873720944, "grad_norm": 0.3253816068172455, "learning_rate": 2.5144649876615245e-05, "loss": 0.0723, "step": 48300 }, { "epoch": 2.857396344709292, "grad_norm": 0.24629051983356476, "learning_rate": 2.5142533050281918e-05, "loss": 0.063, "step": 48310 }, { "epoch": 2.85798781569764, "grad_norm": 0.2550038993358612, "learning_rate": 2.5140415851750306e-05, "loss": 0.0825, "step": 48320 }, { "epoch": 2.858579286685988, "grad_norm": 0.2567945122718811, "learning_rate": 2.5138298281098096e-05, "loss": 0.073, "step": 48330 }, { "epoch": 2.859170757674336, "grad_norm": 0.25070488452911377, "learning_rate": 2.5136180338403012e-05, "loss": 0.0837, "step": 48340 }, { "epoch": 2.8597622286626843, "grad_norm": 0.33903220295906067, "learning_rate": 2.5134062023742764e-05, "loss": 0.0681, "step": 48350 }, { "epoch": 2.860353699651032, "grad_norm": 0.20092879235744476, "learning_rate": 2.5131943337195082e-05, "loss": 0.0639, "step": 48360 }, { "epoch": 2.86094517063938, "grad_norm": 0.3105347454547882, "learning_rate": 2.5129824278837734e-05, "loss": 0.0752, "step": 48370 }, { "epoch": 2.861536641627728, "grad_norm": 0.2094162404537201, "learning_rate": 2.5127704848748464e-05, "loss": 0.0757, "step": 48380 }, { "epoch": 2.8621281126160762, "grad_norm": 0.22241833806037903, "learning_rate": 2.5125585047005058e-05, "loss": 0.0857, "step": 48390 }, { "epoch": 2.862719583604424, "grad_norm": 0.2927614450454712, "learning_rate": 2.5123464873685306e-05, "loss": 0.0734, "step": 48400 }, { "epoch": 2.8633110545927725, "grad_norm": 0.3663642704486847, "learning_rate": 2.5121344328867e-05, "loss": 0.0747, "step": 48410 }, { "epoch": 2.8639025255811204, "grad_norm": 0.20122095942497253, "learning_rate": 2.511922341262797e-05, "loss": 0.078, "step": 48420 }, { "epoch": 2.8644939965694682, "grad_norm": 0.3111271858215332, "learning_rate": 2.5117102125046047e-05, "loss": 0.0857, "step": 48430 }, { "epoch": 2.865085467557816, "grad_norm": 0.22517845034599304, "learning_rate": 2.5114980466199068e-05, "loss": 0.0699, "step": 48440 }, { "epoch": 2.8656769385461645, "grad_norm": 0.1559632271528244, "learning_rate": 2.5112858436164898e-05, "loss": 0.0685, "step": 48450 }, { "epoch": 2.8662684095345123, "grad_norm": 0.20848923921585083, "learning_rate": 2.51107360350214e-05, "loss": 0.0596, "step": 48460 }, { "epoch": 2.8668598805228602, "grad_norm": 0.21868553757667542, "learning_rate": 2.5108613262846468e-05, "loss": 0.0748, "step": 48470 }, { "epoch": 2.8674513515112086, "grad_norm": 0.3153384029865265, "learning_rate": 2.5106490119717997e-05, "loss": 0.0753, "step": 48480 }, { "epoch": 2.8680428224995564, "grad_norm": 0.27015602588653564, "learning_rate": 2.5104366605713898e-05, "loss": 0.0679, "step": 48490 }, { "epoch": 2.8686342934879043, "grad_norm": 0.3101634383201599, "learning_rate": 2.51022427209121e-05, "loss": 0.0804, "step": 48500 }, { "epoch": 2.869225764476252, "grad_norm": 0.26402294635772705, "learning_rate": 2.5100118465390536e-05, "loss": 0.0649, "step": 48510 }, { "epoch": 2.8698172354646005, "grad_norm": 0.29495465755462646, "learning_rate": 2.5097993839227176e-05, "loss": 0.0755, "step": 48520 }, { "epoch": 2.8704087064529484, "grad_norm": 0.2801480293273926, "learning_rate": 2.5095868842499966e-05, "loss": 0.0812, "step": 48530 }, { "epoch": 2.8710001774412968, "grad_norm": 0.25349417328834534, "learning_rate": 2.5093743475286905e-05, "loss": 0.0756, "step": 48540 }, { "epoch": 2.8715916484296446, "grad_norm": 0.25435110926628113, "learning_rate": 2.509161773766597e-05, "loss": 0.0716, "step": 48550 }, { "epoch": 2.8721831194179925, "grad_norm": 0.22753190994262695, "learning_rate": 2.508949162971518e-05, "loss": 0.064, "step": 48560 }, { "epoch": 2.8727745904063404, "grad_norm": 0.23927567899227142, "learning_rate": 2.5087365151512555e-05, "loss": 0.0626, "step": 48570 }, { "epoch": 2.8733660613946888, "grad_norm": 0.5278388261795044, "learning_rate": 2.508523830313613e-05, "loss": 0.0839, "step": 48580 }, { "epoch": 2.8739575323830366, "grad_norm": 0.1621789187192917, "learning_rate": 2.508311108466395e-05, "loss": 0.0633, "step": 48590 }, { "epoch": 2.8745490033713845, "grad_norm": 0.26297035813331604, "learning_rate": 2.508098349617408e-05, "loss": 0.0757, "step": 48600 }, { "epoch": 2.875140474359733, "grad_norm": 0.3840123414993286, "learning_rate": 2.507885553774459e-05, "loss": 0.0608, "step": 48610 }, { "epoch": 2.8757319453480807, "grad_norm": 0.2966090440750122, "learning_rate": 2.507672720945358e-05, "loss": 0.0717, "step": 48620 }, { "epoch": 2.8763234163364286, "grad_norm": 0.24133741855621338, "learning_rate": 2.507459851137914e-05, "loss": 0.0826, "step": 48630 }, { "epoch": 2.8769148873247765, "grad_norm": 0.3305496275424957, "learning_rate": 2.5072469443599397e-05, "loss": 0.0814, "step": 48640 }, { "epoch": 2.877506358313125, "grad_norm": 0.3100024461746216, "learning_rate": 2.5070340006192477e-05, "loss": 0.0646, "step": 48650 }, { "epoch": 2.8780978293014727, "grad_norm": 0.7891737818717957, "learning_rate": 2.5068210199236527e-05, "loss": 0.0696, "step": 48660 }, { "epoch": 2.8786893002898206, "grad_norm": 0.4677433371543884, "learning_rate": 2.5066080022809695e-05, "loss": 0.0785, "step": 48670 }, { "epoch": 2.879280771278169, "grad_norm": 0.2678983807563782, "learning_rate": 2.5063949476990158e-05, "loss": 0.0719, "step": 48680 }, { "epoch": 2.879872242266517, "grad_norm": 0.4366995692253113, "learning_rate": 2.5061818561856104e-05, "loss": 0.0762, "step": 48690 }, { "epoch": 2.8804637132548647, "grad_norm": 0.22798535227775574, "learning_rate": 2.505968727748572e-05, "loss": 0.0768, "step": 48700 }, { "epoch": 2.8810551842432126, "grad_norm": 0.18661554157733917, "learning_rate": 2.505755562395722e-05, "loss": 0.0696, "step": 48710 }, { "epoch": 2.881646655231561, "grad_norm": 0.2297562211751938, "learning_rate": 2.5055423601348842e-05, "loss": 0.0729, "step": 48720 }, { "epoch": 2.882238126219909, "grad_norm": 0.1945757120847702, "learning_rate": 2.505329120973881e-05, "loss": 0.0804, "step": 48730 }, { "epoch": 2.882829597208257, "grad_norm": 0.18128035962581635, "learning_rate": 2.5051158449205384e-05, "loss": 0.0806, "step": 48740 }, { "epoch": 2.883421068196605, "grad_norm": 0.2595556378364563, "learning_rate": 2.504902531982682e-05, "loss": 0.0659, "step": 48750 }, { "epoch": 2.884012539184953, "grad_norm": 0.22539164125919342, "learning_rate": 2.5046891821681405e-05, "loss": 0.0558, "step": 48760 }, { "epoch": 2.884604010173301, "grad_norm": 0.289772093296051, "learning_rate": 2.5044757954847424e-05, "loss": 0.0697, "step": 48770 }, { "epoch": 2.885195481161649, "grad_norm": 0.27399009466171265, "learning_rate": 2.5042623719403194e-05, "loss": 0.0723, "step": 48780 }, { "epoch": 2.885786952149997, "grad_norm": 0.28560671210289, "learning_rate": 2.504048911542703e-05, "loss": 0.0942, "step": 48790 }, { "epoch": 2.886378423138345, "grad_norm": 0.21624864637851715, "learning_rate": 2.5038354142997262e-05, "loss": 0.077, "step": 48800 }, { "epoch": 2.8869698941266932, "grad_norm": 0.19834721088409424, "learning_rate": 2.5036218802192238e-05, "loss": 0.071, "step": 48810 }, { "epoch": 2.887561365115041, "grad_norm": 0.276924729347229, "learning_rate": 2.5034083093090315e-05, "loss": 0.0858, "step": 48820 }, { "epoch": 2.888152836103389, "grad_norm": 0.23109589517116547, "learning_rate": 2.503194701576988e-05, "loss": 0.0807, "step": 48830 }, { "epoch": 2.888744307091737, "grad_norm": 0.1637505739927292, "learning_rate": 2.5029810570309302e-05, "loss": 0.0742, "step": 48840 }, { "epoch": 2.8893357780800852, "grad_norm": 0.22774530947208405, "learning_rate": 2.5027673756786998e-05, "loss": 0.063, "step": 48850 }, { "epoch": 2.889927249068433, "grad_norm": 0.23052553832530975, "learning_rate": 2.502553657528137e-05, "loss": 0.0547, "step": 48860 }, { "epoch": 2.8905187200567815, "grad_norm": 0.222826287150383, "learning_rate": 2.5023399025870848e-05, "loss": 0.0713, "step": 48870 }, { "epoch": 2.8911101910451293, "grad_norm": 0.20628008246421814, "learning_rate": 2.5021261108633877e-05, "loss": 0.0711, "step": 48880 }, { "epoch": 2.8917016620334772, "grad_norm": 0.23139439523220062, "learning_rate": 2.5019122823648908e-05, "loss": 0.0864, "step": 48890 }, { "epoch": 2.892293133021825, "grad_norm": 0.3469775915145874, "learning_rate": 2.501698417099441e-05, "loss": 0.0738, "step": 48900 }, { "epoch": 2.8928846040101734, "grad_norm": 0.13978783786296844, "learning_rate": 2.501484515074887e-05, "loss": 0.075, "step": 48910 }, { "epoch": 2.8934760749985213, "grad_norm": 0.22882360219955444, "learning_rate": 2.501270576299078e-05, "loss": 0.0714, "step": 48920 }, { "epoch": 2.894067545986869, "grad_norm": 0.27096059918403625, "learning_rate": 2.501056600779864e-05, "loss": 0.0857, "step": 48930 }, { "epoch": 2.8946590169752175, "grad_norm": 0.23223842680454254, "learning_rate": 2.5008425885250983e-05, "loss": 0.08, "step": 48940 }, { "epoch": 2.8952504879635654, "grad_norm": 0.23612412810325623, "learning_rate": 2.5006285395426344e-05, "loss": 0.0677, "step": 48950 }, { "epoch": 2.8958419589519133, "grad_norm": 0.1493513435125351, "learning_rate": 2.500414453840327e-05, "loss": 0.0646, "step": 48960 }, { "epoch": 2.896433429940261, "grad_norm": 0.26083239912986755, "learning_rate": 2.5002003314260317e-05, "loss": 0.0781, "step": 48970 }, { "epoch": 2.8970249009286095, "grad_norm": 0.3211793899536133, "learning_rate": 2.4999861723076073e-05, "loss": 0.0834, "step": 48980 }, { "epoch": 2.8976163719169574, "grad_norm": 0.2311321347951889, "learning_rate": 2.4997719764929117e-05, "loss": 0.0762, "step": 48990 }, { "epoch": 2.8982078429053058, "grad_norm": 0.2060532569885254, "learning_rate": 2.4995577439898056e-05, "loss": 0.0701, "step": 49000 }, { "epoch": 2.8987993138936536, "grad_norm": 0.1787044107913971, "learning_rate": 2.499343474806151e-05, "loss": 0.0737, "step": 49010 }, { "epoch": 2.8993907848820015, "grad_norm": 0.3251761496067047, "learning_rate": 2.499129168949811e-05, "loss": 0.0791, "step": 49020 }, { "epoch": 2.8999822558703494, "grad_norm": 0.2821826934814453, "learning_rate": 2.4989148264286486e-05, "loss": 0.0766, "step": 49030 }, { "epoch": 2.9005737268586977, "grad_norm": 0.28222405910491943, "learning_rate": 2.4987004472505306e-05, "loss": 0.0835, "step": 49040 }, { "epoch": 2.9011651978470456, "grad_norm": 0.19193662703037262, "learning_rate": 2.498486031423324e-05, "loss": 0.0802, "step": 49050 }, { "epoch": 2.9017566688353935, "grad_norm": 0.2026618868112564, "learning_rate": 2.4982715789548966e-05, "loss": 0.0744, "step": 49060 }, { "epoch": 2.902348139823742, "grad_norm": 0.24518506228923798, "learning_rate": 2.498057089853119e-05, "loss": 0.0748, "step": 49070 }, { "epoch": 2.9029396108120897, "grad_norm": 0.27158060669898987, "learning_rate": 2.497842564125861e-05, "loss": 0.0682, "step": 49080 }, { "epoch": 2.9035310818004376, "grad_norm": 0.20569056272506714, "learning_rate": 2.4976280017809968e-05, "loss": 0.0809, "step": 49090 }, { "epoch": 2.9041225527887855, "grad_norm": 0.24069836735725403, "learning_rate": 2.4974134028263983e-05, "loss": 0.0749, "step": 49100 }, { "epoch": 2.904714023777134, "grad_norm": 0.2381853610277176, "learning_rate": 2.4971987672699413e-05, "loss": 0.067, "step": 49110 }, { "epoch": 2.9053054947654817, "grad_norm": 0.26636120676994324, "learning_rate": 2.496984095119503e-05, "loss": 0.0723, "step": 49120 }, { "epoch": 2.9058969657538296, "grad_norm": 0.29326388239860535, "learning_rate": 2.4967693863829603e-05, "loss": 0.0763, "step": 49130 }, { "epoch": 2.906488436742178, "grad_norm": 0.19037672877311707, "learning_rate": 2.4965546410681925e-05, "loss": 0.0701, "step": 49140 }, { "epoch": 2.907079907730526, "grad_norm": 0.40112271904945374, "learning_rate": 2.4963398591830797e-05, "loss": 0.0751, "step": 49150 }, { "epoch": 2.9076713787188737, "grad_norm": 0.21724386513233185, "learning_rate": 2.4961250407355042e-05, "loss": 0.062, "step": 49160 }, { "epoch": 2.9082628497072216, "grad_norm": 0.2550131380558014, "learning_rate": 2.495910185733349e-05, "loss": 0.0658, "step": 49170 }, { "epoch": 2.90885432069557, "grad_norm": 0.21128523349761963, "learning_rate": 2.495695294184499e-05, "loss": 0.0789, "step": 49180 }, { "epoch": 2.909445791683918, "grad_norm": 0.28181007504463196, "learning_rate": 2.495480366096839e-05, "loss": 0.0751, "step": 49190 }, { "epoch": 2.910037262672266, "grad_norm": 0.21242931485176086, "learning_rate": 2.4952654014782575e-05, "loss": 0.0676, "step": 49200 }, { "epoch": 2.910628733660614, "grad_norm": 0.1878039389848709, "learning_rate": 2.495050400336642e-05, "loss": 0.0765, "step": 49210 }, { "epoch": 2.911220204648962, "grad_norm": 0.21209585666656494, "learning_rate": 2.494835362679883e-05, "loss": 0.0849, "step": 49220 }, { "epoch": 2.91181167563731, "grad_norm": 0.7152859568595886, "learning_rate": 2.4946202885158704e-05, "loss": 0.0799, "step": 49230 }, { "epoch": 2.912403146625658, "grad_norm": 0.24405090510845184, "learning_rate": 2.4944051778524987e-05, "loss": 0.0777, "step": 49240 }, { "epoch": 2.912994617614006, "grad_norm": 0.16502921283245087, "learning_rate": 2.49419003069766e-05, "loss": 0.0833, "step": 49250 }, { "epoch": 2.913586088602354, "grad_norm": 0.23323746025562286, "learning_rate": 2.493974847059251e-05, "loss": 0.0679, "step": 49260 }, { "epoch": 2.9141775595907022, "grad_norm": 0.22975020110607147, "learning_rate": 2.4937596269451666e-05, "loss": 0.0769, "step": 49270 }, { "epoch": 2.91476903057905, "grad_norm": 0.18769672513008118, "learning_rate": 2.4935443703633064e-05, "loss": 0.0792, "step": 49280 }, { "epoch": 2.915360501567398, "grad_norm": 0.3475551903247833, "learning_rate": 2.4933290773215686e-05, "loss": 0.0913, "step": 49290 }, { "epoch": 2.915951972555746, "grad_norm": 0.21753858029842377, "learning_rate": 2.4931137478278536e-05, "loss": 0.0744, "step": 49300 }, { "epoch": 2.9165434435440942, "grad_norm": 0.15747222304344177, "learning_rate": 2.492898381890064e-05, "loss": 0.0637, "step": 49310 }, { "epoch": 2.917134914532442, "grad_norm": 0.2585150897502899, "learning_rate": 2.492682979516103e-05, "loss": 0.0778, "step": 49320 }, { "epoch": 2.9177263855207904, "grad_norm": 0.18931323289871216, "learning_rate": 2.4924675407138743e-05, "loss": 0.0795, "step": 49330 }, { "epoch": 2.9183178565091383, "grad_norm": 0.26358675956726074, "learning_rate": 2.4922520654912843e-05, "loss": 0.0883, "step": 49340 }, { "epoch": 2.918909327497486, "grad_norm": 0.2829856872558594, "learning_rate": 2.492036553856241e-05, "loss": 0.0774, "step": 49350 }, { "epoch": 2.919500798485834, "grad_norm": 0.18083500862121582, "learning_rate": 2.491821005816652e-05, "loss": 0.0654, "step": 49360 }, { "epoch": 2.9200922694741824, "grad_norm": 0.19369329512119293, "learning_rate": 2.4916054213804273e-05, "loss": 0.0751, "step": 49370 }, { "epoch": 2.9206837404625303, "grad_norm": 0.24598056077957153, "learning_rate": 2.4913898005554788e-05, "loss": 0.0806, "step": 49380 }, { "epoch": 2.921275211450878, "grad_norm": 0.4119557738304138, "learning_rate": 2.4911741433497188e-05, "loss": 0.0813, "step": 49390 }, { "epoch": 2.9218666824392265, "grad_norm": 0.6385931372642517, "learning_rate": 2.4909584497710606e-05, "loss": 0.0694, "step": 49400 }, { "epoch": 2.9224581534275744, "grad_norm": 0.23811964690685272, "learning_rate": 2.4907427198274203e-05, "loss": 0.066, "step": 49410 }, { "epoch": 2.9230496244159223, "grad_norm": 0.3130665123462677, "learning_rate": 2.4905269535267142e-05, "loss": 0.0854, "step": 49420 }, { "epoch": 2.92364109540427, "grad_norm": 1.0040373802185059, "learning_rate": 2.4903111508768594e-05, "loss": 0.0803, "step": 49430 }, { "epoch": 2.9242325663926185, "grad_norm": 0.30684709548950195, "learning_rate": 2.4900953118857766e-05, "loss": 0.0819, "step": 49440 }, { "epoch": 2.9248240373809664, "grad_norm": 0.31223928928375244, "learning_rate": 2.489879436561386e-05, "loss": 0.0764, "step": 49450 }, { "epoch": 2.9254155083693147, "grad_norm": 0.2330486923456192, "learning_rate": 2.489663524911609e-05, "loss": 0.0594, "step": 49460 }, { "epoch": 2.9260069793576626, "grad_norm": 0.2074413001537323, "learning_rate": 2.489447576944369e-05, "loss": 0.0783, "step": 49470 }, { "epoch": 2.9265984503460105, "grad_norm": 0.2560372054576874, "learning_rate": 2.489231592667591e-05, "loss": 0.0753, "step": 49480 }, { "epoch": 2.9271899213343584, "grad_norm": 0.3583814799785614, "learning_rate": 2.4890155720892006e-05, "loss": 0.0812, "step": 49490 }, { "epoch": 2.9277813923227067, "grad_norm": 0.6976001858711243, "learning_rate": 2.488799515217125e-05, "loss": 0.0781, "step": 49500 }, { "epoch": 2.9283728633110546, "grad_norm": 0.238274946808815, "learning_rate": 2.488583422059293e-05, "loss": 0.0657, "step": 49510 }, { "epoch": 2.9289643342994025, "grad_norm": 0.24038559198379517, "learning_rate": 2.4883672926236346e-05, "loss": 0.0844, "step": 49520 }, { "epoch": 2.929555805287751, "grad_norm": 0.397865355014801, "learning_rate": 2.4881511269180805e-05, "loss": 0.0722, "step": 49530 }, { "epoch": 2.9301472762760987, "grad_norm": 0.22036971151828766, "learning_rate": 2.4879349249505638e-05, "loss": 0.067, "step": 49540 }, { "epoch": 2.9307387472644466, "grad_norm": 0.36041006445884705, "learning_rate": 2.4877186867290182e-05, "loss": 0.0801, "step": 49550 }, { "epoch": 2.9313302182527945, "grad_norm": 0.1923438459634781, "learning_rate": 2.4875024122613793e-05, "loss": 0.0709, "step": 49560 }, { "epoch": 2.931921689241143, "grad_norm": 0.41845494508743286, "learning_rate": 2.487286101555583e-05, "loss": 0.0715, "step": 49570 }, { "epoch": 2.9325131602294907, "grad_norm": 0.2992112338542938, "learning_rate": 2.4870697546195678e-05, "loss": 0.0801, "step": 49580 }, { "epoch": 2.933104631217839, "grad_norm": 0.2425754964351654, "learning_rate": 2.486853371461273e-05, "loss": 0.0757, "step": 49590 }, { "epoch": 2.933696102206187, "grad_norm": 0.1337866187095642, "learning_rate": 2.4866369520886385e-05, "loss": 0.0672, "step": 49600 }, { "epoch": 2.934287573194535, "grad_norm": 0.16781064867973328, "learning_rate": 2.4864204965096068e-05, "loss": 0.06, "step": 49610 }, { "epoch": 2.9348790441828827, "grad_norm": 0.21596947312355042, "learning_rate": 2.4862040047321213e-05, "loss": 0.0805, "step": 49620 }, { "epoch": 2.9354705151712306, "grad_norm": 0.1371227651834488, "learning_rate": 2.485987476764126e-05, "loss": 0.0697, "step": 49630 }, { "epoch": 2.936061986159579, "grad_norm": 0.26247653365135193, "learning_rate": 2.4857709126135666e-05, "loss": 0.0745, "step": 49640 }, { "epoch": 2.936653457147927, "grad_norm": 0.21418337523937225, "learning_rate": 2.485554312288391e-05, "loss": 0.0764, "step": 49650 }, { "epoch": 2.937244928136275, "grad_norm": 0.12316597998142242, "learning_rate": 2.4853376757965474e-05, "loss": 0.0722, "step": 49660 }, { "epoch": 2.937836399124623, "grad_norm": 0.3248337209224701, "learning_rate": 2.4851210031459855e-05, "loss": 0.0804, "step": 49670 }, { "epoch": 2.938427870112971, "grad_norm": 0.2613685727119446, "learning_rate": 2.4849042943446568e-05, "loss": 0.0756, "step": 49680 }, { "epoch": 2.939019341101319, "grad_norm": 0.21569406986236572, "learning_rate": 2.4846875494005135e-05, "loss": 0.0748, "step": 49690 }, { "epoch": 2.939610812089667, "grad_norm": 0.25530219078063965, "learning_rate": 2.4844707683215093e-05, "loss": 0.0726, "step": 49700 }, { "epoch": 2.940202283078015, "grad_norm": 0.19646203517913818, "learning_rate": 2.4842539511156003e-05, "loss": 0.0575, "step": 49710 }, { "epoch": 2.940793754066363, "grad_norm": 0.2667638063430786, "learning_rate": 2.4840370977907423e-05, "loss": 0.0771, "step": 49720 }, { "epoch": 2.9413852250547112, "grad_norm": 0.24874912202358246, "learning_rate": 2.4838202083548925e-05, "loss": 0.0785, "step": 49730 }, { "epoch": 2.941976696043059, "grad_norm": 0.26943439245224, "learning_rate": 2.4836032828160113e-05, "loss": 0.083, "step": 49740 }, { "epoch": 2.942568167031407, "grad_norm": 0.21057887375354767, "learning_rate": 2.4833863211820587e-05, "loss": 0.0711, "step": 49750 }, { "epoch": 2.943159638019755, "grad_norm": 0.15980477631092072, "learning_rate": 2.483169323460996e-05, "loss": 0.0625, "step": 49760 }, { "epoch": 2.943751109008103, "grad_norm": 0.2000371366739273, "learning_rate": 2.482952289660787e-05, "loss": 0.0684, "step": 49770 }, { "epoch": 2.944342579996451, "grad_norm": 0.2766992449760437, "learning_rate": 2.482735219789396e-05, "loss": 0.0767, "step": 49780 }, { "epoch": 2.9449340509847994, "grad_norm": 0.27224117517471313, "learning_rate": 2.4825181138547884e-05, "loss": 0.0599, "step": 49790 }, { "epoch": 2.9455255219731473, "grad_norm": 0.23234067857265472, "learning_rate": 2.4823009718649313e-05, "loss": 0.0752, "step": 49800 }, { "epoch": 2.946116992961495, "grad_norm": 0.653385579586029, "learning_rate": 2.4820837938277938e-05, "loss": 0.0717, "step": 49810 }, { "epoch": 2.946708463949843, "grad_norm": 0.3001931607723236, "learning_rate": 2.4818665797513447e-05, "loss": 0.072, "step": 49820 }, { "epoch": 2.9472999349381914, "grad_norm": 0.3160836100578308, "learning_rate": 2.4816493296435554e-05, "loss": 0.0718, "step": 49830 }, { "epoch": 2.9478914059265393, "grad_norm": 0.2301233410835266, "learning_rate": 2.4814320435123988e-05, "loss": 0.0734, "step": 49840 }, { "epoch": 2.948482876914887, "grad_norm": 0.37070170044898987, "learning_rate": 2.481214721365848e-05, "loss": 0.0783, "step": 49850 }, { "epoch": 2.9490743479032355, "grad_norm": 0.19679854810237885, "learning_rate": 2.480997363211878e-05, "loss": 0.0677, "step": 49860 }, { "epoch": 2.9496658188915834, "grad_norm": 0.3013223111629486, "learning_rate": 2.4807799690584658e-05, "loss": 0.0742, "step": 49870 }, { "epoch": 2.9502572898799313, "grad_norm": 0.19687768816947937, "learning_rate": 2.4805625389135884e-05, "loss": 0.0737, "step": 49880 }, { "epoch": 2.950848760868279, "grad_norm": 0.24892400205135345, "learning_rate": 2.4803450727852248e-05, "loss": 0.0696, "step": 49890 }, { "epoch": 2.9514402318566275, "grad_norm": 0.26080232858657837, "learning_rate": 2.480127570681356e-05, "loss": 0.0725, "step": 49900 }, { "epoch": 2.9520317028449754, "grad_norm": 0.18043825030326843, "learning_rate": 2.4799100326099625e-05, "loss": 0.0539, "step": 49910 }, { "epoch": 2.9526231738333237, "grad_norm": 0.2726351022720337, "learning_rate": 2.4796924585790285e-05, "loss": 0.0797, "step": 49920 }, { "epoch": 2.9532146448216716, "grad_norm": 0.26227518916130066, "learning_rate": 2.4794748485965373e-05, "loss": 0.0774, "step": 49930 }, { "epoch": 2.9538061158100195, "grad_norm": 0.4942541718482971, "learning_rate": 2.4792572026704744e-05, "loss": 0.0758, "step": 49940 }, { "epoch": 2.9543975867983674, "grad_norm": 0.16356457769870758, "learning_rate": 2.4790395208088275e-05, "loss": 0.0833, "step": 49950 }, { "epoch": 2.9549890577867157, "grad_norm": 0.18415279686450958, "learning_rate": 2.4788218030195847e-05, "loss": 0.0703, "step": 49960 }, { "epoch": 2.9555805287750636, "grad_norm": 0.24273653328418732, "learning_rate": 2.4786040493107354e-05, "loss": 0.0818, "step": 49970 }, { "epoch": 2.9561719997634115, "grad_norm": 0.2502584755420685, "learning_rate": 2.4783862596902703e-05, "loss": 0.0865, "step": 49980 }, { "epoch": 2.95676347075176, "grad_norm": 0.14881078898906708, "learning_rate": 2.4781684341661815e-05, "loss": 0.077, "step": 49990 }, { "epoch": 2.9573549417401077, "grad_norm": 0.2598876953125, "learning_rate": 2.4779505727464624e-05, "loss": 0.0668, "step": 50000 }, { "epoch": 2.9579464127284556, "grad_norm": 0.2793751060962677, "learning_rate": 2.4777326754391082e-05, "loss": 0.0689, "step": 50010 }, { "epoch": 2.9585378837168035, "grad_norm": 0.40701866149902344, "learning_rate": 2.4775147422521147e-05, "loss": 0.0639, "step": 50020 }, { "epoch": 2.959129354705152, "grad_norm": 0.242240309715271, "learning_rate": 2.47729677319348e-05, "loss": 0.0849, "step": 50030 }, { "epoch": 2.9597208256934997, "grad_norm": 0.3254956901073456, "learning_rate": 2.477078768271202e-05, "loss": 0.0837, "step": 50040 }, { "epoch": 2.960312296681848, "grad_norm": 0.2585713267326355, "learning_rate": 2.4768607274932815e-05, "loss": 0.07, "step": 50050 }, { "epoch": 2.960903767670196, "grad_norm": 7.77785587310791, "learning_rate": 2.4766426508677195e-05, "loss": 0.0615, "step": 50060 }, { "epoch": 2.961495238658544, "grad_norm": 0.21214480698108673, "learning_rate": 2.4764245384025184e-05, "loss": 0.0764, "step": 50070 }, { "epoch": 2.9620867096468917, "grad_norm": 0.3293885290622711, "learning_rate": 2.4762063901056824e-05, "loss": 0.0831, "step": 50080 }, { "epoch": 2.9626781806352396, "grad_norm": 0.24002254009246826, "learning_rate": 2.4759882059852175e-05, "loss": 0.0812, "step": 50090 }, { "epoch": 2.963269651623588, "grad_norm": 0.15059901773929596, "learning_rate": 2.4757699860491297e-05, "loss": 0.0683, "step": 50100 }, { "epoch": 2.963861122611936, "grad_norm": 0.21212832629680634, "learning_rate": 2.4755517303054274e-05, "loss": 0.0589, "step": 50110 }, { "epoch": 2.964452593600284, "grad_norm": 0.8485557436943054, "learning_rate": 2.4753334387621195e-05, "loss": 0.077, "step": 50120 }, { "epoch": 2.965044064588632, "grad_norm": 0.6849534511566162, "learning_rate": 2.475115111427217e-05, "loss": 0.0938, "step": 50130 }, { "epoch": 2.96563553557698, "grad_norm": 0.42792850732803345, "learning_rate": 2.474896748308731e-05, "loss": 0.0891, "step": 50140 }, { "epoch": 2.966227006565328, "grad_norm": 0.19615229964256287, "learning_rate": 2.4746783494146755e-05, "loss": 0.0668, "step": 50150 }, { "epoch": 2.966818477553676, "grad_norm": 0.22825178503990173, "learning_rate": 2.474459914753065e-05, "loss": 0.0637, "step": 50160 }, { "epoch": 2.967409948542024, "grad_norm": 0.45108887553215027, "learning_rate": 2.474241444331915e-05, "loss": 0.0767, "step": 50170 }, { "epoch": 2.968001419530372, "grad_norm": 0.2653070390224457, "learning_rate": 2.474022938159243e-05, "loss": 0.0936, "step": 50180 }, { "epoch": 2.96859289051872, "grad_norm": 0.3925186097621918, "learning_rate": 2.473804396243067e-05, "loss": 0.0711, "step": 50190 }, { "epoch": 2.969184361507068, "grad_norm": 0.2704251706600189, "learning_rate": 2.4735858185914065e-05, "loss": 0.0763, "step": 50200 }, { "epoch": 2.969775832495416, "grad_norm": 0.22965455055236816, "learning_rate": 2.473367205212284e-05, "loss": 0.0593, "step": 50210 }, { "epoch": 2.970367303483764, "grad_norm": 0.31042715907096863, "learning_rate": 2.4731485561137207e-05, "loss": 0.0733, "step": 50220 }, { "epoch": 2.970958774472112, "grad_norm": 0.3359450399875641, "learning_rate": 2.4729298713037405e-05, "loss": 0.0777, "step": 50230 }, { "epoch": 2.97155024546046, "grad_norm": 0.18117231130599976, "learning_rate": 2.4727111507903692e-05, "loss": 0.0749, "step": 50240 }, { "epoch": 2.9721417164488084, "grad_norm": 0.24925221502780914, "learning_rate": 2.4724923945816326e-05, "loss": 0.0756, "step": 50250 }, { "epoch": 2.9727331874371563, "grad_norm": 0.23891757428646088, "learning_rate": 2.4722736026855577e-05, "loss": 0.0694, "step": 50260 }, { "epoch": 2.973324658425504, "grad_norm": 0.27576887607574463, "learning_rate": 2.4720547751101746e-05, "loss": 0.0765, "step": 50270 }, { "epoch": 2.973916129413852, "grad_norm": 0.2774089276790619, "learning_rate": 2.4718359118635124e-05, "loss": 0.0821, "step": 50280 }, { "epoch": 2.9745076004022004, "grad_norm": 0.2914014756679535, "learning_rate": 2.4716170129536037e-05, "loss": 0.0754, "step": 50290 }, { "epoch": 2.9750990713905483, "grad_norm": 0.2330569177865982, "learning_rate": 2.471398078388481e-05, "loss": 0.074, "step": 50300 }, { "epoch": 2.975690542378896, "grad_norm": 0.2799699306488037, "learning_rate": 2.4711791081761785e-05, "loss": 0.0634, "step": 50310 }, { "epoch": 2.9762820133672445, "grad_norm": 0.22327011823654175, "learning_rate": 2.4709601023247315e-05, "loss": 0.0722, "step": 50320 }, { "epoch": 2.9768734843555924, "grad_norm": 0.4355412721633911, "learning_rate": 2.470741060842177e-05, "loss": 0.0766, "step": 50330 }, { "epoch": 2.9774649553439403, "grad_norm": 0.22963528335094452, "learning_rate": 2.4705219837365534e-05, "loss": 0.075, "step": 50340 }, { "epoch": 2.978056426332288, "grad_norm": 0.342792809009552, "learning_rate": 2.4703028710158993e-05, "loss": 0.0715, "step": 50350 }, { "epoch": 2.9786478973206365, "grad_norm": 0.23175881803035736, "learning_rate": 2.4700837226882566e-05, "loss": 0.0753, "step": 50360 }, { "epoch": 2.9792393683089844, "grad_norm": 0.16878055036067963, "learning_rate": 2.469864538761666e-05, "loss": 0.0719, "step": 50370 }, { "epoch": 2.9798308392973327, "grad_norm": 0.2693006992340088, "learning_rate": 2.469645319244172e-05, "loss": 0.0925, "step": 50380 }, { "epoch": 2.9804223102856806, "grad_norm": 0.24487754702568054, "learning_rate": 2.469426064143819e-05, "loss": 0.073, "step": 50390 }, { "epoch": 2.9810137812740285, "grad_norm": 0.18677391111850739, "learning_rate": 2.469206773468652e-05, "loss": 0.0756, "step": 50400 }, { "epoch": 2.9816052522623764, "grad_norm": 0.1623191088438034, "learning_rate": 2.46898744722672e-05, "loss": 0.0591, "step": 50410 }, { "epoch": 2.9821967232507247, "grad_norm": 0.24058088660240173, "learning_rate": 2.4687680854260696e-05, "loss": 0.0776, "step": 50420 }, { "epoch": 2.9827881942390726, "grad_norm": 0.4111918807029724, "learning_rate": 2.468548688074752e-05, "loss": 0.0827, "step": 50430 }, { "epoch": 2.9833796652274205, "grad_norm": 0.3431392014026642, "learning_rate": 2.4683292551808178e-05, "loss": 0.0844, "step": 50440 }, { "epoch": 2.983971136215769, "grad_norm": 0.28122401237487793, "learning_rate": 2.46810978675232e-05, "loss": 0.0717, "step": 50450 }, { "epoch": 2.9845626072041167, "grad_norm": 0.15829753875732422, "learning_rate": 2.467890282797312e-05, "loss": 0.0664, "step": 50460 }, { "epoch": 2.9851540781924646, "grad_norm": 0.38097134232521057, "learning_rate": 2.4676707433238494e-05, "loss": 0.0721, "step": 50470 }, { "epoch": 2.9857455491808125, "grad_norm": 0.24402837455272675, "learning_rate": 2.467451168339988e-05, "loss": 0.0783, "step": 50480 }, { "epoch": 2.986337020169161, "grad_norm": 0.27508029341697693, "learning_rate": 2.4672315578537854e-05, "loss": 0.0805, "step": 50490 }, { "epoch": 2.9869284911575087, "grad_norm": 0.31720736622810364, "learning_rate": 2.4670119118733004e-05, "loss": 0.0771, "step": 50500 }, { "epoch": 2.987519962145857, "grad_norm": 0.25408485531806946, "learning_rate": 2.4667922304065944e-05, "loss": 0.082, "step": 50510 }, { "epoch": 2.988111433134205, "grad_norm": 0.2537490129470825, "learning_rate": 2.4665725134617284e-05, "loss": 0.0757, "step": 50520 }, { "epoch": 2.988702904122553, "grad_norm": 0.24201206862926483, "learning_rate": 2.4663527610467652e-05, "loss": 0.0887, "step": 50530 }, { "epoch": 2.9892943751109007, "grad_norm": 0.15652969479560852, "learning_rate": 2.466132973169769e-05, "loss": 0.0745, "step": 50540 }, { "epoch": 2.9898858460992486, "grad_norm": 0.2396858185529709, "learning_rate": 2.4659131498388054e-05, "loss": 0.0735, "step": 50550 }, { "epoch": 2.990477317087597, "grad_norm": 0.23407725989818573, "learning_rate": 2.465693291061941e-05, "loss": 0.065, "step": 50560 }, { "epoch": 2.991068788075945, "grad_norm": 0.267800897359848, "learning_rate": 2.4654733968472444e-05, "loss": 0.0718, "step": 50570 }, { "epoch": 2.991660259064293, "grad_norm": 0.23324161767959595, "learning_rate": 2.465253467202784e-05, "loss": 0.0811, "step": 50580 }, { "epoch": 2.992251730052641, "grad_norm": 0.30993327498435974, "learning_rate": 2.465033502136632e-05, "loss": 0.0805, "step": 50590 }, { "epoch": 2.992843201040989, "grad_norm": 0.22815024852752686, "learning_rate": 2.4648135016568592e-05, "loss": 0.0751, "step": 50600 }, { "epoch": 2.9934346720293368, "grad_norm": 0.17975936830043793, "learning_rate": 2.4645934657715395e-05, "loss": 0.0662, "step": 50610 }, { "epoch": 2.994026143017685, "grad_norm": 0.31314557790756226, "learning_rate": 2.4643733944887475e-05, "loss": 0.0703, "step": 50620 }, { "epoch": 2.994617614006033, "grad_norm": 0.29363253712654114, "learning_rate": 2.4641532878165584e-05, "loss": 0.077, "step": 50630 }, { "epoch": 2.995209084994381, "grad_norm": 0.2875217795372009, "learning_rate": 2.4639331457630503e-05, "loss": 0.0691, "step": 50640 }, { "epoch": 2.995800555982729, "grad_norm": 0.2560643255710602, "learning_rate": 2.4637129683363015e-05, "loss": 0.0813, "step": 50650 }, { "epoch": 2.996392026971077, "grad_norm": 0.24261713027954102, "learning_rate": 2.4634927555443916e-05, "loss": 0.0664, "step": 50660 }, { "epoch": 2.996983497959425, "grad_norm": 0.3038952648639679, "learning_rate": 2.4632725073954017e-05, "loss": 0.0834, "step": 50670 }, { "epoch": 2.997574968947773, "grad_norm": 0.21095554530620575, "learning_rate": 2.463052223897414e-05, "loss": 0.0959, "step": 50680 }, { "epoch": 2.998166439936121, "grad_norm": 0.2305067479610443, "learning_rate": 2.4628319050585127e-05, "loss": 0.0757, "step": 50690 }, { "epoch": 2.998757910924469, "grad_norm": 0.8786062598228455, "learning_rate": 2.4626115508867822e-05, "loss": 0.0687, "step": 50700 }, { "epoch": 2.9993493819128174, "grad_norm": 0.18398495018482208, "learning_rate": 2.46239116139031e-05, "loss": 0.0644, "step": 50710 }, { "epoch": 2.9999408529011653, "grad_norm": 0.1763872504234314, "learning_rate": 2.462170736577182e-05, "loss": 0.0601, "step": 50720 }, { "epoch": 3.0, "eval_accuracy": 0.6650863359616728, "eval_animal_abuse/accuracy": 0.9944106198223376, "eval_animal_abuse/f1": 0.7613636363636364, "eval_animal_abuse/fpr": 0.003096287820146059, "eval_animal_abuse/precision": 0.7444444444444445, "eval_animal_abuse/recall": 0.7790697674418605, "eval_animal_abuse/threshold": 0.3747906982898712, "eval_child_abuse/accuracy": 0.9964900023289084, "eval_child_abuse/f1": 0.6645468998410174, "eval_child_abuse/fpr": 0.0014553118883926307, "eval_child_abuse/precision": 0.706081081081081, "eval_child_abuse/recall": 0.6276276276276276, "eval_child_abuse/threshold": 0.3557748794555664, "eval_controversial_topics,politics/accuracy": 0.9654988854509765, "eval_controversial_topics,politics/f1": 0.5160989267382174, "eval_controversial_topics,politics/fpr": 0.022961285008237194, "eval_controversial_topics,politics/precision": 0.4525368248772504, "eval_controversial_topics,politics/recall": 0.6004343105320304, "eval_controversial_topics,politics/threshold": 0.22000709176063538, "eval_discrimination,stereotype,injustice/accuracy": 0.9544698406361247, "eval_discrimination,stereotype,injustice/f1": 0.7148067104303428, "eval_discrimination,stereotype,injustice/fpr": 0.02506596306068597, "eval_discrimination,stereotype,injustice/precision": 0.7120614490346688, "eval_discrimination,stereotype,injustice/recall": 0.7175732217573222, "eval_discrimination,stereotype,injustice/threshold": 0.3486451208591461, "eval_drug_abuse,weapons,banned_substance/accuracy": 0.9732175533153675, "eval_drug_abuse,weapons,banned_substance/f1": 0.7692748638578389, "eval_drug_abuse,weapons,banned_substance/fpr": 0.016006205048653195, "eval_drug_abuse,weapons,banned_substance/precision": 0.7472160356347439, "eval_drug_abuse,weapons,banned_substance/recall": 0.7926757235676314, "eval_drug_abuse,weapons,banned_substance/threshold": 0.41679665446281433, "eval_financial_crime,property_crime,theft/accuracy": 0.9592940080513691, "eval_financial_crime,property_crime,theft/f1": 0.7995412468255919, "eval_financial_crime,property_crime,theft/fpr": 0.02720085509463165, "eval_financial_crime,property_crime,theft/precision": 0.7677784770295784, "eval_financial_crime,property_crime,theft/recall": 0.8340454623141343, "eval_financial_crime,property_crime,theft/threshold": 0.4163219630718231, "eval_flagged/accuracy": 0.8492031806234821, "eval_flagged/aucpr": 0.9001094054669765, "eval_flagged/f1": 0.8666264510718437, "eval_flagged/fpr": 0.189963619997749, "eval_flagged/precision": 0.8532564607718159, "eval_flagged/recall": 0.8804221099518699, "eval_hate_speech,offensive_language/accuracy": 0.9487141098579366, "eval_hate_speech,offensive_language/f1": 0.6981298345246255, "eval_hate_speech,offensive_language/fpr": 0.023095194591631604, "eval_hate_speech,offensive_language/precision": 0.7382480844895424, "eval_hate_speech,offensive_language/recall": 0.662147102526003, "eval_hate_speech,offensive_language/threshold": 0.43878522515296936, "eval_loss": 0.08145993947982788, "eval_macro_f1": 0.6684733321139996, "eval_macro_precision": 0.6603979539624963, "eval_macro_recall": 0.6827775765181805, "eval_micro_f1": 0.7476251067612557, "eval_micro_precision": 0.7331339201777626, "eval_micro_recall": 0.7627007130536835, "eval_misinformation_regarding_ethics,laws_and_safety/accuracy": 0.9809861263599161, "eval_misinformation_regarding_ethics,laws_and_safety/f1": 0.23647294589178355, "eval_misinformation_regarding_ethics,laws_and_safety/fpr": 0.009918663590589882, "eval_misinformation_regarding_ethics,laws_and_safety/precision": 0.2310704960835509, "eval_misinformation_regarding_ethics,laws_and_safety/recall": 0.2421340629274966, "eval_misinformation_regarding_ethics,laws_and_safety/threshold": 0.12421301752328873, "eval_non_violent_unethical_behavior/accuracy": 0.8776824034334764, "eval_non_violent_unethical_behavior/f1": 0.6919693351765741, "eval_non_violent_unethical_behavior/fpr": 0.07610861983059276, "eval_non_violent_unethical_behavior/precision": 0.6925786163522013, "eval_non_violent_unethical_behavior/recall": 0.6913611250627825, "eval_non_violent_unethical_behavior/threshold": 0.3380771279335022, "eval_privacy_violation/accuracy": 0.9813354626210201, "eval_privacy_violation/f1": 0.8087282645755199, "eval_privacy_violation/fpr": 0.009239168474837248, "eval_privacy_violation/precision": 0.8179310344827586, "eval_privacy_violation/recall": 0.7997302764666218, "eval_privacy_violation/threshold": 0.46611642837524414, "eval_runtime": 83.7577, "eval_samples_per_second": 717.713, "eval_self_harm/accuracy": 0.9967894334098546, "eval_self_harm/f1": 0.7416331994645248, "eval_self_harm/fpr": 0.0010049577917727438, "eval_self_harm/precision": 0.8219584569732937, "eval_self_harm/recall": 0.675609756097561, "eval_self_harm/threshold": 0.5338835716247559, "eval_sexually_explicit,adult_content/accuracy": 0.98256645706491, "eval_sexually_explicit,adult_content/f1": 0.6662420382165605, "eval_sexually_explicit,adult_content/fpr": 0.011028346429849812, "eval_sexually_explicit,adult_content/precision": 0.6178381571175429, "eval_sexually_explicit,adult_content/recall": 0.7228749136143746, "eval_sexually_explicit,adult_content/threshold": 0.30074557662010193, "eval_steps_per_second": 44.868, "eval_terrorism,organized_crime/accuracy": 0.9891206707256213, "eval_terrorism,organized_crime/f1": 0.4391080617495712, "eval_terrorism,organized_crime/fpr": 0.007194003320309212, "eval_terrorism,organized_crime/precision": 0.37372262773722625, "eval_terrorism,organized_crime/recall": 0.5322245322245323, "eval_terrorism,organized_crime/threshold": 0.33111974596977234, "eval_violence,aiding_and_abetting,incitement/accuracy": 0.9177063579199521, "eval_violence,aiding_and_abetting,incitement/f1": 0.8507106859401877, "eval_violence,aiding_and_abetting,incitement/fpr": 0.0691265128507319, "eval_violence,aiding_and_abetting,incitement/precision": 0.8221055701370662, "eval_violence,aiding_and_abetting,incitement/recall": 0.8813781890945472, "eval_violence,aiding_and_abetting,incitement/threshold": 0.44890937209129333, "step": 50721 }, { "epoch": 3.000532323889513, "grad_norm": 0.26428717374801636, "learning_rate": 2.4619502764554882e-05, "loss": 0.0824, "step": 50730 }, { "epoch": 3.001123794877861, "grad_norm": 0.25030970573425293, "learning_rate": 2.4617297810333182e-05, "loss": 0.0668, "step": 50740 }, { "epoch": 3.0017152658662094, "grad_norm": 0.22125805914402008, "learning_rate": 2.461509250318764e-05, "loss": 0.0612, "step": 50750 }, { "epoch": 3.0023067368545573, "grad_norm": 0.36565831303596497, "learning_rate": 2.461288684319918e-05, "loss": 0.0686, "step": 50760 }, { "epoch": 3.002898207842905, "grad_norm": 0.11091470718383789, "learning_rate": 2.4610680830448747e-05, "loss": 0.0549, "step": 50770 }, { "epoch": 3.0034896788312535, "grad_norm": 0.4012264907360077, "learning_rate": 2.4608474465017285e-05, "loss": 0.073, "step": 50780 }, { "epoch": 3.0040811498196014, "grad_norm": 0.2787577509880066, "learning_rate": 2.460626774698577e-05, "loss": 0.0775, "step": 50790 }, { "epoch": 3.0046726208079493, "grad_norm": 0.2979797124862671, "learning_rate": 2.460406067643518e-05, "loss": 0.0688, "step": 50800 }, { "epoch": 3.0052640917962976, "grad_norm": 0.16019071638584137, "learning_rate": 2.4601853253446505e-05, "loss": 0.0592, "step": 50810 }, { "epoch": 3.0058555627846455, "grad_norm": 0.21194925904273987, "learning_rate": 2.4599645478100748e-05, "loss": 0.05, "step": 50820 }, { "epoch": 3.0064470337729934, "grad_norm": 0.7862281203269958, "learning_rate": 2.459743735047893e-05, "loss": 0.0762, "step": 50830 }, { "epoch": 3.0070385047613413, "grad_norm": 0.2908197343349457, "learning_rate": 2.4595228870662084e-05, "loss": 0.0724, "step": 50840 }, { "epoch": 3.0076299757496896, "grad_norm": 0.23472429811954498, "learning_rate": 2.4593020038731254e-05, "loss": 0.0586, "step": 50850 }, { "epoch": 3.0082214467380375, "grad_norm": 0.18853576481342316, "learning_rate": 2.4590810854767492e-05, "loss": 0.0579, "step": 50860 }, { "epoch": 3.0088129177263854, "grad_norm": 0.2972956895828247, "learning_rate": 2.4588601318851875e-05, "loss": 0.0512, "step": 50870 }, { "epoch": 3.0094043887147337, "grad_norm": 0.24920864403247833, "learning_rate": 2.4586391431065483e-05, "loss": 0.0733, "step": 50880 }, { "epoch": 3.0099958597030816, "grad_norm": 0.2798340618610382, "learning_rate": 2.4584181191489408e-05, "loss": 0.069, "step": 50890 }, { "epoch": 3.0105873306914295, "grad_norm": 0.23897887766361237, "learning_rate": 2.458197060020476e-05, "loss": 0.0673, "step": 50900 }, { "epoch": 3.011178801679778, "grad_norm": 0.207307830452919, "learning_rate": 2.4579759657292664e-05, "loss": 0.06, "step": 50910 }, { "epoch": 3.0117702726681257, "grad_norm": 0.18763300776481628, "learning_rate": 2.457754836283425e-05, "loss": 0.0526, "step": 50920 }, { "epoch": 3.0123617436564736, "grad_norm": 0.33131980895996094, "learning_rate": 2.4575336716910672e-05, "loss": 0.0702, "step": 50930 }, { "epoch": 3.0129532146448215, "grad_norm": 0.2434694766998291, "learning_rate": 2.457312471960309e-05, "loss": 0.0715, "step": 50940 }, { "epoch": 3.01354468563317, "grad_norm": 0.2280663251876831, "learning_rate": 2.4570912370992667e-05, "loss": 0.0678, "step": 50950 }, { "epoch": 3.0141361566215177, "grad_norm": 0.2232215404510498, "learning_rate": 2.4568699671160594e-05, "loss": 0.0602, "step": 50960 }, { "epoch": 3.0147276276098656, "grad_norm": 0.5476029515266418, "learning_rate": 2.456648662018808e-05, "loss": 0.0582, "step": 50970 }, { "epoch": 3.015319098598214, "grad_norm": 0.3415350019931793, "learning_rate": 2.4564273218156318e-05, "loss": 0.0821, "step": 50980 }, { "epoch": 3.015910569586562, "grad_norm": 0.3499416708946228, "learning_rate": 2.456205946514655e-05, "loss": 0.0778, "step": 50990 }, { "epoch": 3.0165020405749097, "grad_norm": 0.2536623179912567, "learning_rate": 2.4559845361240004e-05, "loss": 0.0643, "step": 51000 }, { "epoch": 3.017093511563258, "grad_norm": 0.2110520452260971, "learning_rate": 2.4557630906517935e-05, "loss": 0.0625, "step": 51010 }, { "epoch": 3.017684982551606, "grad_norm": 1.1391783952713013, "learning_rate": 2.4555416101061604e-05, "loss": 0.0515, "step": 51020 }, { "epoch": 3.0182764535399538, "grad_norm": 0.2603330910205841, "learning_rate": 2.4553200944952284e-05, "loss": 0.0762, "step": 51030 }, { "epoch": 3.018867924528302, "grad_norm": 0.22125062346458435, "learning_rate": 2.4550985438271278e-05, "loss": 0.0763, "step": 51040 }, { "epoch": 3.01945939551665, "grad_norm": 0.21334604918956757, "learning_rate": 2.454876958109987e-05, "loss": 0.0693, "step": 51050 }, { "epoch": 3.020050866504998, "grad_norm": 0.25633490085601807, "learning_rate": 2.4546553373519383e-05, "loss": 0.0571, "step": 51060 }, { "epoch": 3.0206423374933458, "grad_norm": 0.18487338721752167, "learning_rate": 2.4544336815611145e-05, "loss": 0.0473, "step": 51070 }, { "epoch": 3.021233808481694, "grad_norm": 0.30539175868034363, "learning_rate": 2.4542119907456492e-05, "loss": 0.0633, "step": 51080 }, { "epoch": 3.021825279470042, "grad_norm": 0.3055707514286041, "learning_rate": 2.4539902649136788e-05, "loss": 0.0704, "step": 51090 }, { "epoch": 3.02241675045839, "grad_norm": 0.22695477306842804, "learning_rate": 2.4537685040733387e-05, "loss": 0.067, "step": 51100 }, { "epoch": 3.023008221446738, "grad_norm": 0.24642635881900787, "learning_rate": 2.4535467082327678e-05, "loss": 0.065, "step": 51110 }, { "epoch": 3.023599692435086, "grad_norm": 0.4798355996608734, "learning_rate": 2.453324877400105e-05, "loss": 0.0563, "step": 51120 }, { "epoch": 3.024191163423434, "grad_norm": 0.28574544191360474, "learning_rate": 2.4531030115834898e-05, "loss": 0.0706, "step": 51130 }, { "epoch": 3.0247826344117823, "grad_norm": 0.3102929890155792, "learning_rate": 2.4528811107910654e-05, "loss": 0.0733, "step": 51140 }, { "epoch": 3.02537410540013, "grad_norm": 0.39437639713287354, "learning_rate": 2.4526591750309742e-05, "loss": 0.0718, "step": 51150 }, { "epoch": 3.025965576388478, "grad_norm": 0.3233456313610077, "learning_rate": 2.4524372043113603e-05, "loss": 0.0596, "step": 51160 }, { "epoch": 3.026557047376826, "grad_norm": 0.2463153600692749, "learning_rate": 2.45221519864037e-05, "loss": 0.0602, "step": 51170 }, { "epoch": 3.0271485183651743, "grad_norm": 0.1984703093767166, "learning_rate": 2.4519931580261493e-05, "loss": 0.0672, "step": 51180 }, { "epoch": 3.027739989353522, "grad_norm": 0.3202812969684601, "learning_rate": 2.451771082476847e-05, "loss": 0.0795, "step": 51190 }, { "epoch": 3.02833146034187, "grad_norm": 0.3239227831363678, "learning_rate": 2.4515489720006126e-05, "loss": 0.0696, "step": 51200 }, { "epoch": 3.0289229313302184, "grad_norm": 0.23815585672855377, "learning_rate": 2.4513268266055963e-05, "loss": 0.0633, "step": 51210 }, { "epoch": 3.0295144023185663, "grad_norm": 0.31699687242507935, "learning_rate": 2.4511046462999507e-05, "loss": 0.0502, "step": 51220 }, { "epoch": 3.030105873306914, "grad_norm": 0.21697133779525757, "learning_rate": 2.4508824310918284e-05, "loss": 0.0815, "step": 51230 }, { "epoch": 3.0306973442952625, "grad_norm": 0.2782089412212372, "learning_rate": 2.4506601809893852e-05, "loss": 0.0778, "step": 51240 }, { "epoch": 3.0312888152836104, "grad_norm": 0.26025015115737915, "learning_rate": 2.4504378960007754e-05, "loss": 0.0636, "step": 51250 }, { "epoch": 3.0318802862719583, "grad_norm": 0.22185713052749634, "learning_rate": 2.4502155761341573e-05, "loss": 0.0615, "step": 51260 }, { "epoch": 3.0324717572603066, "grad_norm": 0.23852844536304474, "learning_rate": 2.4499932213976894e-05, "loss": 0.0681, "step": 51270 }, { "epoch": 3.0330632282486545, "grad_norm": 0.336933434009552, "learning_rate": 2.4497708317995304e-05, "loss": 0.0809, "step": 51280 }, { "epoch": 3.0336546992370024, "grad_norm": 0.32426175475120544, "learning_rate": 2.449548407347842e-05, "loss": 0.0657, "step": 51290 }, { "epoch": 3.0342461702253503, "grad_norm": 0.3510187268257141, "learning_rate": 2.449325948050786e-05, "loss": 0.0608, "step": 51300 }, { "epoch": 3.0348376412136986, "grad_norm": 0.21470306813716888, "learning_rate": 2.449103453916527e-05, "loss": 0.0626, "step": 51310 }, { "epoch": 3.0354291122020465, "grad_norm": 0.2311384677886963, "learning_rate": 2.4488809249532284e-05, "loss": 0.0499, "step": 51320 }, { "epoch": 3.0360205831903944, "grad_norm": 0.31500595808029175, "learning_rate": 2.4486583611690573e-05, "loss": 0.0777, "step": 51330 }, { "epoch": 3.0366120541787427, "grad_norm": 0.2424517273902893, "learning_rate": 2.4484357625721806e-05, "loss": 0.0682, "step": 51340 }, { "epoch": 3.0372035251670906, "grad_norm": 0.27501341700553894, "learning_rate": 2.4482131291707666e-05, "loss": 0.0671, "step": 51350 }, { "epoch": 3.0377949961554385, "grad_norm": 0.1739862710237503, "learning_rate": 2.4479904609729867e-05, "loss": 0.0573, "step": 51360 }, { "epoch": 3.038386467143787, "grad_norm": 0.27985313534736633, "learning_rate": 2.4477677579870104e-05, "loss": 0.0604, "step": 51370 }, { "epoch": 3.0389779381321347, "grad_norm": 0.5604745149612427, "learning_rate": 2.4475450202210112e-05, "loss": 0.0669, "step": 51380 }, { "epoch": 3.0395694091204826, "grad_norm": 0.3672141134738922, "learning_rate": 2.4473222476831622e-05, "loss": 0.0717, "step": 51390 }, { "epoch": 3.0401608801088305, "grad_norm": 0.22847267985343933, "learning_rate": 2.4470994403816395e-05, "loss": 0.065, "step": 51400 }, { "epoch": 3.040752351097179, "grad_norm": 0.3064451515674591, "learning_rate": 2.4468765983246182e-05, "loss": 0.058, "step": 51410 }, { "epoch": 3.0413438220855267, "grad_norm": 0.25168943405151367, "learning_rate": 2.4466537215202766e-05, "loss": 0.0633, "step": 51420 }, { "epoch": 3.0419352930738746, "grad_norm": 0.35627874732017517, "learning_rate": 2.4464308099767932e-05, "loss": 0.0754, "step": 51430 }, { "epoch": 3.042526764062223, "grad_norm": 0.2302217334508896, "learning_rate": 2.4462078637023485e-05, "loss": 0.0699, "step": 51440 }, { "epoch": 3.0431182350505708, "grad_norm": 0.20402826368808746, "learning_rate": 2.4459848827051234e-05, "loss": 0.073, "step": 51450 }, { "epoch": 3.0437097060389187, "grad_norm": 0.1582670360803604, "learning_rate": 2.445761866993301e-05, "loss": 0.0583, "step": 51460 }, { "epoch": 3.044301177027267, "grad_norm": 0.21345260739326477, "learning_rate": 2.4455388165750646e-05, "loss": 0.0653, "step": 51470 }, { "epoch": 3.044892648015615, "grad_norm": 0.6346406936645508, "learning_rate": 2.445315731458601e-05, "loss": 0.0763, "step": 51480 }, { "epoch": 3.0454841190039628, "grad_norm": 0.20172423124313354, "learning_rate": 2.445092611652095e-05, "loss": 0.0701, "step": 51490 }, { "epoch": 3.046075589992311, "grad_norm": 0.1645033210515976, "learning_rate": 2.4448694571637354e-05, "loss": 0.0607, "step": 51500 }, { "epoch": 3.046667060980659, "grad_norm": 0.2306830883026123, "learning_rate": 2.4446462680017105e-05, "loss": 0.0617, "step": 51510 }, { "epoch": 3.047258531969007, "grad_norm": 0.1421327441930771, "learning_rate": 2.4444230441742116e-05, "loss": 0.0461, "step": 51520 }, { "epoch": 3.0478500029573548, "grad_norm": 0.27381518483161926, "learning_rate": 2.4441997856894294e-05, "loss": 0.0643, "step": 51530 }, { "epoch": 3.048441473945703, "grad_norm": 0.3113805055618286, "learning_rate": 2.443976492555557e-05, "loss": 0.0738, "step": 51540 }, { "epoch": 3.049032944934051, "grad_norm": 1.0599582195281982, "learning_rate": 2.4437531647807883e-05, "loss": 0.0647, "step": 51550 }, { "epoch": 3.049624415922399, "grad_norm": 0.26678466796875, "learning_rate": 2.44352980237332e-05, "loss": 0.0609, "step": 51560 }, { "epoch": 3.050215886910747, "grad_norm": 0.2777632176876068, "learning_rate": 2.443306405341347e-05, "loss": 0.0523, "step": 51570 }, { "epoch": 3.050807357899095, "grad_norm": 0.33425408601760864, "learning_rate": 2.4430829736930684e-05, "loss": 0.0711, "step": 51580 }, { "epoch": 3.051398828887443, "grad_norm": 0.3494829535484314, "learning_rate": 2.442859507436683e-05, "loss": 0.0676, "step": 51590 }, { "epoch": 3.0519902998757913, "grad_norm": 0.44991910457611084, "learning_rate": 2.4426360065803914e-05, "loss": 0.0624, "step": 51600 }, { "epoch": 3.052581770864139, "grad_norm": 0.30008596181869507, "learning_rate": 2.4424124711323955e-05, "loss": 0.0614, "step": 51610 }, { "epoch": 3.053173241852487, "grad_norm": 0.4228046238422394, "learning_rate": 2.442188901100898e-05, "loss": 0.0449, "step": 51620 }, { "epoch": 3.053764712840835, "grad_norm": 0.6289417743682861, "learning_rate": 2.4419652964941036e-05, "loss": 0.0762, "step": 51630 }, { "epoch": 3.0543561838291833, "grad_norm": 0.24594338238239288, "learning_rate": 2.4417416573202176e-05, "loss": 0.0763, "step": 51640 }, { "epoch": 3.054947654817531, "grad_norm": 0.32697099447250366, "learning_rate": 2.4415179835874466e-05, "loss": 0.067, "step": 51650 }, { "epoch": 3.055539125805879, "grad_norm": 0.24807670712471008, "learning_rate": 2.441294275303999e-05, "loss": 0.0486, "step": 51660 }, { "epoch": 3.0561305967942274, "grad_norm": 0.25788241624832153, "learning_rate": 2.4410705324780845e-05, "loss": 0.0554, "step": 51670 }, { "epoch": 3.0567220677825753, "grad_norm": 3.4894559383392334, "learning_rate": 2.440846755117913e-05, "loss": 0.0726, "step": 51680 }, { "epoch": 3.057313538770923, "grad_norm": 0.4255462884902954, "learning_rate": 2.4406229432316973e-05, "loss": 0.0818, "step": 51690 }, { "epoch": 3.0579050097592715, "grad_norm": 0.271758109331131, "learning_rate": 2.4403990968276496e-05, "loss": 0.0635, "step": 51700 }, { "epoch": 3.0584964807476194, "grad_norm": 0.18952128291130066, "learning_rate": 2.4401752159139848e-05, "loss": 0.0678, "step": 51710 }, { "epoch": 3.0590879517359673, "grad_norm": 0.3952663838863373, "learning_rate": 2.4399513004989195e-05, "loss": 0.058, "step": 51720 }, { "epoch": 3.0596794227243156, "grad_norm": 0.400855153799057, "learning_rate": 2.4397273505906688e-05, "loss": 0.0745, "step": 51730 }, { "epoch": 3.0602708937126635, "grad_norm": 0.2641051709651947, "learning_rate": 2.4395033661974527e-05, "loss": 0.0731, "step": 51740 }, { "epoch": 3.0608623647010114, "grad_norm": 0.447699636220932, "learning_rate": 2.439279347327489e-05, "loss": 0.062, "step": 51750 }, { "epoch": 3.0614538356893592, "grad_norm": 0.3237744867801666, "learning_rate": 2.4390552939890003e-05, "loss": 0.0677, "step": 51760 }, { "epoch": 3.0620453066777076, "grad_norm": 0.22880731523036957, "learning_rate": 2.438831206190207e-05, "loss": 0.0577, "step": 51770 }, { "epoch": 3.0626367776660555, "grad_norm": 0.39410197734832764, "learning_rate": 2.438607083939334e-05, "loss": 0.0708, "step": 51780 }, { "epoch": 3.0632282486544034, "grad_norm": 0.24236002564430237, "learning_rate": 2.4383829272446048e-05, "loss": 0.0748, "step": 51790 }, { "epoch": 3.0638197196427517, "grad_norm": 0.27232277393341064, "learning_rate": 2.438158736114245e-05, "loss": 0.0671, "step": 51800 }, { "epoch": 3.0644111906310996, "grad_norm": 0.22454331815242767, "learning_rate": 2.4379345105564824e-05, "loss": 0.0633, "step": 51810 }, { "epoch": 3.0650026616194475, "grad_norm": 0.2469848096370697, "learning_rate": 2.4377102505795455e-05, "loss": 0.0593, "step": 51820 }, { "epoch": 3.065594132607796, "grad_norm": 0.6042790412902832, "learning_rate": 2.4374859561916633e-05, "loss": 0.0677, "step": 51830 }, { "epoch": 3.0661856035961437, "grad_norm": 0.2754485607147217, "learning_rate": 2.4372616274010664e-05, "loss": 0.0745, "step": 51840 }, { "epoch": 3.0667770745844916, "grad_norm": 0.8051297068595886, "learning_rate": 2.437037264215988e-05, "loss": 0.066, "step": 51850 }, { "epoch": 3.0673685455728394, "grad_norm": 0.27134716510772705, "learning_rate": 2.436812866644661e-05, "loss": 0.0625, "step": 51860 }, { "epoch": 3.0679600165611878, "grad_norm": 0.20600903034210205, "learning_rate": 2.4365884346953196e-05, "loss": 0.0562, "step": 51870 }, { "epoch": 3.0685514875495357, "grad_norm": 0.4473828077316284, "learning_rate": 2.4363639683762002e-05, "loss": 0.0762, "step": 51880 }, { "epoch": 3.0691429585378835, "grad_norm": 0.19748499989509583, "learning_rate": 2.4361394676955407e-05, "loss": 0.066, "step": 51890 }, { "epoch": 3.069734429526232, "grad_norm": 0.30558812618255615, "learning_rate": 2.4359149326615775e-05, "loss": 0.0655, "step": 51900 }, { "epoch": 3.0703259005145798, "grad_norm": 0.32843899726867676, "learning_rate": 2.4356903632825524e-05, "loss": 0.0572, "step": 51910 }, { "epoch": 3.0709173715029277, "grad_norm": 0.21740534901618958, "learning_rate": 2.4354657595667055e-05, "loss": 0.0462, "step": 51920 }, { "epoch": 3.071508842491276, "grad_norm": 0.3403121531009674, "learning_rate": 2.435241121522279e-05, "loss": 0.0745, "step": 51930 }, { "epoch": 3.072100313479624, "grad_norm": 0.37398087978363037, "learning_rate": 2.435016449157517e-05, "loss": 0.0709, "step": 51940 }, { "epoch": 3.0726917844679718, "grad_norm": 0.28226643800735474, "learning_rate": 2.4347917424806635e-05, "loss": 0.0718, "step": 51950 }, { "epoch": 3.07328325545632, "grad_norm": 0.32134827971458435, "learning_rate": 2.4345670014999643e-05, "loss": 0.0618, "step": 51960 }, { "epoch": 3.073874726444668, "grad_norm": 0.30178162455558777, "learning_rate": 2.4343422262236676e-05, "loss": 0.0487, "step": 51970 }, { "epoch": 3.074466197433016, "grad_norm": 0.4077136516571045, "learning_rate": 2.434117416660021e-05, "loss": 0.066, "step": 51980 }, { "epoch": 3.0750576684213637, "grad_norm": 0.2507302761077881, "learning_rate": 2.4338925728172754e-05, "loss": 0.0701, "step": 51990 }, { "epoch": 3.075649139409712, "grad_norm": 0.3068912923336029, "learning_rate": 2.4336676947036806e-05, "loss": 0.0639, "step": 52000 }, { "epoch": 3.07624061039806, "grad_norm": 0.25174233317375183, "learning_rate": 2.43344278232749e-05, "loss": 0.0667, "step": 52010 }, { "epoch": 3.076832081386408, "grad_norm": 0.3530505299568176, "learning_rate": 2.4332178356969558e-05, "loss": 0.0514, "step": 52020 }, { "epoch": 3.077423552374756, "grad_norm": 0.2639806270599365, "learning_rate": 2.4329928548203348e-05, "loss": 0.0728, "step": 52030 }, { "epoch": 3.078015023363104, "grad_norm": 2.086588144302368, "learning_rate": 2.432767839705881e-05, "loss": 0.0714, "step": 52040 }, { "epoch": 3.078606494351452, "grad_norm": 0.39129117131233215, "learning_rate": 2.4325427903618536e-05, "loss": 0.0638, "step": 52050 }, { "epoch": 3.0791979653398003, "grad_norm": 0.34262701869010925, "learning_rate": 2.4323177067965095e-05, "loss": 0.0646, "step": 52060 }, { "epoch": 3.079789436328148, "grad_norm": 0.20679286122322083, "learning_rate": 2.4320925890181095e-05, "loss": 0.0552, "step": 52070 }, { "epoch": 3.080380907316496, "grad_norm": 0.32295653223991394, "learning_rate": 2.4318674370349148e-05, "loss": 0.0757, "step": 52080 }, { "epoch": 3.080972378304844, "grad_norm": 0.3187139928340912, "learning_rate": 2.4316422508551873e-05, "loss": 0.0703, "step": 52090 }, { "epoch": 3.0815638492931923, "grad_norm": 0.2781730592250824, "learning_rate": 2.4314170304871904e-05, "loss": 0.0728, "step": 52100 }, { "epoch": 3.08215532028154, "grad_norm": 0.15651842951774597, "learning_rate": 2.4311917759391898e-05, "loss": 0.065, "step": 52110 }, { "epoch": 3.082746791269888, "grad_norm": 0.38467466831207275, "learning_rate": 2.430966487219451e-05, "loss": 0.0556, "step": 52120 }, { "epoch": 3.0833382622582364, "grad_norm": 0.26292505860328674, "learning_rate": 2.4307411643362412e-05, "loss": 0.0746, "step": 52130 }, { "epoch": 3.0839297332465843, "grad_norm": 0.2010863721370697, "learning_rate": 2.4305158072978296e-05, "loss": 0.0712, "step": 52140 }, { "epoch": 3.084521204234932, "grad_norm": 0.3719455301761627, "learning_rate": 2.430290416112486e-05, "loss": 0.0682, "step": 52150 }, { "epoch": 3.0851126752232805, "grad_norm": 0.2540028691291809, "learning_rate": 2.430064990788481e-05, "loss": 0.0504, "step": 52160 }, { "epoch": 3.0857041462116284, "grad_norm": 0.2770560681819916, "learning_rate": 2.4298395313340873e-05, "loss": 0.0524, "step": 52170 }, { "epoch": 3.0862956171999762, "grad_norm": 0.27895933389663696, "learning_rate": 2.429614037757579e-05, "loss": 0.0831, "step": 52180 }, { "epoch": 3.086887088188324, "grad_norm": 0.32190045714378357, "learning_rate": 2.4293885100672305e-05, "loss": 0.0725, "step": 52190 }, { "epoch": 3.0874785591766725, "grad_norm": 0.34459182620048523, "learning_rate": 2.4291629482713173e-05, "loss": 0.0631, "step": 52200 }, { "epoch": 3.0880700301650204, "grad_norm": 0.17621487379074097, "learning_rate": 2.428937352378118e-05, "loss": 0.0583, "step": 52210 }, { "epoch": 3.0886615011533682, "grad_norm": 0.2582513988018036, "learning_rate": 2.4287117223959106e-05, "loss": 0.0546, "step": 52220 }, { "epoch": 3.0892529721417166, "grad_norm": 0.30140575766563416, "learning_rate": 2.4284860583329753e-05, "loss": 0.0753, "step": 52230 }, { "epoch": 3.0898444431300645, "grad_norm": 0.4671994149684906, "learning_rate": 2.428260360197593e-05, "loss": 0.0652, "step": 52240 }, { "epoch": 3.0904359141184123, "grad_norm": 0.33789077401161194, "learning_rate": 2.428034627998046e-05, "loss": 0.0764, "step": 52250 }, { "epoch": 3.0910273851067607, "grad_norm": 0.22340425848960876, "learning_rate": 2.4278088617426182e-05, "loss": 0.05, "step": 52260 }, { "epoch": 3.0916188560951086, "grad_norm": 0.20699775218963623, "learning_rate": 2.4275830614395942e-05, "loss": 0.0593, "step": 52270 }, { "epoch": 3.0922103270834564, "grad_norm": 0.3506063222885132, "learning_rate": 2.4273572270972607e-05, "loss": 0.0659, "step": 52280 }, { "epoch": 3.0928017980718048, "grad_norm": 0.30139896273612976, "learning_rate": 2.4271313587239047e-05, "loss": 0.0693, "step": 52290 }, { "epoch": 3.0933932690601527, "grad_norm": 0.2567168176174164, "learning_rate": 2.4269054563278145e-05, "loss": 0.0668, "step": 52300 }, { "epoch": 3.0939847400485005, "grad_norm": 0.3178446888923645, "learning_rate": 2.426679519917281e-05, "loss": 0.0701, "step": 52310 }, { "epoch": 3.0945762110368484, "grad_norm": 0.22572338581085205, "learning_rate": 2.4264535495005936e-05, "loss": 0.0557, "step": 52320 }, { "epoch": 3.0951676820251968, "grad_norm": 0.36177340149879456, "learning_rate": 2.4262275450860467e-05, "loss": 0.0678, "step": 52330 }, { "epoch": 3.0957591530135447, "grad_norm": 0.7019818425178528, "learning_rate": 2.4260015066819325e-05, "loss": 0.0762, "step": 52340 }, { "epoch": 3.0963506240018925, "grad_norm": 0.2551574110984802, "learning_rate": 2.4257754342965467e-05, "loss": 0.067, "step": 52350 }, { "epoch": 3.096942094990241, "grad_norm": 0.33333277702331543, "learning_rate": 2.4255493279381853e-05, "loss": 0.0659, "step": 52360 }, { "epoch": 3.0975335659785888, "grad_norm": 0.28679734468460083, "learning_rate": 2.4253231876151455e-05, "loss": 0.0575, "step": 52370 }, { "epoch": 3.0981250369669366, "grad_norm": 0.35738229751586914, "learning_rate": 2.4250970133357257e-05, "loss": 0.076, "step": 52380 }, { "epoch": 3.098716507955285, "grad_norm": 0.32250216603279114, "learning_rate": 2.424870805108226e-05, "loss": 0.0721, "step": 52390 }, { "epoch": 3.099307978943633, "grad_norm": 0.2149754762649536, "learning_rate": 2.424644562940947e-05, "loss": 0.0693, "step": 52400 }, { "epoch": 3.0998994499319807, "grad_norm": 0.7547304034233093, "learning_rate": 2.4244182868421927e-05, "loss": 0.0668, "step": 52410 }, { "epoch": 3.100490920920329, "grad_norm": 0.321157842874527, "learning_rate": 2.4241919768202645e-05, "loss": 0.0546, "step": 52420 }, { "epoch": 3.101082391908677, "grad_norm": 0.35171449184417725, "learning_rate": 2.423965632883469e-05, "loss": 0.0875, "step": 52430 }, { "epoch": 3.101673862897025, "grad_norm": 0.5778587460517883, "learning_rate": 2.4237392550401116e-05, "loss": 0.0751, "step": 52440 }, { "epoch": 3.1022653338853727, "grad_norm": 0.1932615041732788, "learning_rate": 2.423512843298499e-05, "loss": 0.0683, "step": 52450 }, { "epoch": 3.102856804873721, "grad_norm": 0.47317391633987427, "learning_rate": 2.423286397666941e-05, "loss": 0.064, "step": 52460 }, { "epoch": 3.103448275862069, "grad_norm": 0.20357303321361542, "learning_rate": 2.4230599181537465e-05, "loss": 0.0496, "step": 52470 }, { "epoch": 3.104039746850417, "grad_norm": 0.2942262589931488, "learning_rate": 2.4228334047672274e-05, "loss": 0.0848, "step": 52480 }, { "epoch": 3.104631217838765, "grad_norm": 0.3402283489704132, "learning_rate": 2.422606857515695e-05, "loss": 0.0701, "step": 52490 }, { "epoch": 3.105222688827113, "grad_norm": 0.31462621688842773, "learning_rate": 2.4223802764074634e-05, "loss": 0.0736, "step": 52500 }, { "epoch": 3.105814159815461, "grad_norm": 0.3616654574871063, "learning_rate": 2.4221536614508478e-05, "loss": 0.064, "step": 52510 }, { "epoch": 3.1064056308038093, "grad_norm": 0.2409081757068634, "learning_rate": 2.4219270126541635e-05, "loss": 0.0586, "step": 52520 }, { "epoch": 3.106997101792157, "grad_norm": 0.3729875981807709, "learning_rate": 2.4217003300257286e-05, "loss": 0.0654, "step": 52530 }, { "epoch": 3.107588572780505, "grad_norm": 0.2692134380340576, "learning_rate": 2.4214736135738604e-05, "loss": 0.0707, "step": 52540 }, { "epoch": 3.108180043768853, "grad_norm": 0.29861709475517273, "learning_rate": 2.42124686330688e-05, "loss": 0.0613, "step": 52550 }, { "epoch": 3.1087715147572013, "grad_norm": 0.36965906620025635, "learning_rate": 2.4210200792331076e-05, "loss": 0.0569, "step": 52560 }, { "epoch": 3.109362985745549, "grad_norm": 0.3635241687297821, "learning_rate": 2.4207932613608655e-05, "loss": 0.0604, "step": 52570 }, { "epoch": 3.109954456733897, "grad_norm": 0.34818366169929504, "learning_rate": 2.4205664096984777e-05, "loss": 0.0838, "step": 52580 }, { "epoch": 3.1105459277222454, "grad_norm": 0.22303278744220734, "learning_rate": 2.420339524254268e-05, "loss": 0.0789, "step": 52590 }, { "epoch": 3.1111373987105932, "grad_norm": 0.3104505240917206, "learning_rate": 2.420112605036563e-05, "loss": 0.0763, "step": 52600 }, { "epoch": 3.111728869698941, "grad_norm": 0.22188255190849304, "learning_rate": 2.4198856520536904e-05, "loss": 0.0599, "step": 52610 }, { "epoch": 3.1123203406872895, "grad_norm": 0.21212057769298553, "learning_rate": 2.4196586653139773e-05, "loss": 0.054, "step": 52620 }, { "epoch": 3.1129118116756374, "grad_norm": 0.34225162863731384, "learning_rate": 2.4194316448257544e-05, "loss": 0.0763, "step": 52630 }, { "epoch": 3.1135032826639852, "grad_norm": 0.4040735065937042, "learning_rate": 2.4192045905973523e-05, "loss": 0.0798, "step": 52640 }, { "epoch": 3.114094753652333, "grad_norm": 0.25734248757362366, "learning_rate": 2.4189775026371034e-05, "loss": 0.0692, "step": 52650 }, { "epoch": 3.1146862246406815, "grad_norm": 0.1950569599866867, "learning_rate": 2.418750380953341e-05, "loss": 0.0617, "step": 52660 }, { "epoch": 3.1152776956290293, "grad_norm": 0.3186948895454407, "learning_rate": 2.4185232255544e-05, "loss": 0.0595, "step": 52670 }, { "epoch": 3.1158691666173772, "grad_norm": 0.48759397864341736, "learning_rate": 2.418296036448615e-05, "loss": 0.091, "step": 52680 }, { "epoch": 3.1164606376057256, "grad_norm": 0.2809197008609772, "learning_rate": 2.4180688136443243e-05, "loss": 0.0691, "step": 52690 }, { "epoch": 3.1170521085940734, "grad_norm": 0.24146489799022675, "learning_rate": 2.4178415571498666e-05, "loss": 0.0612, "step": 52700 }, { "epoch": 3.1176435795824213, "grad_norm": 0.2941409647464752, "learning_rate": 2.41761426697358e-05, "loss": 0.068, "step": 52710 }, { "epoch": 3.1182350505707697, "grad_norm": 0.3090708553791046, "learning_rate": 2.4173869431238068e-05, "loss": 0.0671, "step": 52720 }, { "epoch": 3.1188265215591175, "grad_norm": 0.2670879662036896, "learning_rate": 2.417159585608888e-05, "loss": 0.0729, "step": 52730 }, { "epoch": 3.1194179925474654, "grad_norm": 0.3378868103027344, "learning_rate": 2.416932194437168e-05, "loss": 0.0702, "step": 52740 }, { "epoch": 3.1200094635358138, "grad_norm": 0.18548588454723358, "learning_rate": 2.4167047696169897e-05, "loss": 0.0698, "step": 52750 }, { "epoch": 3.1206009345241617, "grad_norm": 0.30512022972106934, "learning_rate": 2.4164773111567e-05, "loss": 0.0617, "step": 52760 }, { "epoch": 3.1211924055125095, "grad_norm": 0.21257857978343964, "learning_rate": 2.4162498190646468e-05, "loss": 0.0491, "step": 52770 }, { "epoch": 3.1217838765008574, "grad_norm": 0.2839519679546356, "learning_rate": 2.416022293349176e-05, "loss": 0.0734, "step": 52780 }, { "epoch": 3.1223753474892058, "grad_norm": 0.3086227476596832, "learning_rate": 2.415794734018639e-05, "loss": 0.0738, "step": 52790 }, { "epoch": 3.1229668184775536, "grad_norm": 0.22158344089984894, "learning_rate": 2.415567141081385e-05, "loss": 0.0658, "step": 52800 }, { "epoch": 3.1235582894659015, "grad_norm": 0.42694777250289917, "learning_rate": 2.4153395145457673e-05, "loss": 0.0641, "step": 52810 }, { "epoch": 3.12414976045425, "grad_norm": 1.8585562705993652, "learning_rate": 2.415111854420138e-05, "loss": 0.0645, "step": 52820 }, { "epoch": 3.1247412314425977, "grad_norm": 1.879040002822876, "learning_rate": 2.4148841607128528e-05, "loss": 0.0775, "step": 52830 }, { "epoch": 3.1253327024309456, "grad_norm": 0.27072831988334656, "learning_rate": 2.414656433432266e-05, "loss": 0.0735, "step": 52840 }, { "epoch": 3.125924173419294, "grad_norm": 0.2457980066537857, "learning_rate": 2.414428672586734e-05, "loss": 0.0706, "step": 52850 }, { "epoch": 3.126515644407642, "grad_norm": 0.2630576193332672, "learning_rate": 2.414200878184617e-05, "loss": 0.0587, "step": 52860 }, { "epoch": 3.1271071153959897, "grad_norm": 0.21032574772834778, "learning_rate": 2.413973050234273e-05, "loss": 0.0532, "step": 52870 }, { "epoch": 3.127698586384338, "grad_norm": 1.346584439277649, "learning_rate": 2.4137451887440627e-05, "loss": 0.0648, "step": 52880 }, { "epoch": 3.128290057372686, "grad_norm": 0.27324607968330383, "learning_rate": 2.4135172937223476e-05, "loss": 0.0696, "step": 52890 }, { "epoch": 3.128881528361034, "grad_norm": 0.297845721244812, "learning_rate": 2.413289365177491e-05, "loss": 0.0645, "step": 52900 }, { "epoch": 3.1294729993493817, "grad_norm": 0.2651025056838989, "learning_rate": 2.4130614031178572e-05, "loss": 0.0692, "step": 52910 }, { "epoch": 3.13006447033773, "grad_norm": 0.195501908659935, "learning_rate": 2.4128334075518117e-05, "loss": 0.0513, "step": 52920 }, { "epoch": 3.130655941326078, "grad_norm": 0.3183828294277191, "learning_rate": 2.4126053784877213e-05, "loss": 0.0857, "step": 52930 }, { "epoch": 3.131247412314426, "grad_norm": 0.3141893148422241, "learning_rate": 2.4123773159339532e-05, "loss": 0.0773, "step": 52940 }, { "epoch": 3.131838883302774, "grad_norm": 0.214707612991333, "learning_rate": 2.4121492198988776e-05, "loss": 0.0666, "step": 52950 }, { "epoch": 3.132430354291122, "grad_norm": 0.2936789393424988, "learning_rate": 2.411921090390864e-05, "loss": 0.0618, "step": 52960 }, { "epoch": 3.13302182527947, "grad_norm": 0.33161452412605286, "learning_rate": 2.4116929274182846e-05, "loss": 0.0551, "step": 52970 }, { "epoch": 3.1336132962678183, "grad_norm": 0.30141979455947876, "learning_rate": 2.411464730989512e-05, "loss": 0.0759, "step": 52980 }, { "epoch": 3.134204767256166, "grad_norm": 0.34045279026031494, "learning_rate": 2.4112365011129204e-05, "loss": 0.081, "step": 52990 }, { "epoch": 3.134796238244514, "grad_norm": 0.20764616131782532, "learning_rate": 2.4110082377968848e-05, "loss": 0.0704, "step": 53000 }, { "epoch": 3.1353877092328624, "grad_norm": 0.2681753635406494, "learning_rate": 2.4107799410497825e-05, "loss": 0.0695, "step": 53010 }, { "epoch": 3.1359791802212102, "grad_norm": 0.25967222452163696, "learning_rate": 2.41055161087999e-05, "loss": 0.0574, "step": 53020 }, { "epoch": 3.136570651209558, "grad_norm": 0.3404262959957123, "learning_rate": 2.4103232472958882e-05, "loss": 0.0802, "step": 53030 }, { "epoch": 3.137162122197906, "grad_norm": 0.21549588441848755, "learning_rate": 2.410094850305855e-05, "loss": 0.0629, "step": 53040 }, { "epoch": 3.1377535931862544, "grad_norm": 0.2736116647720337, "learning_rate": 2.409866419918273e-05, "loss": 0.0633, "step": 53050 }, { "epoch": 3.1383450641746022, "grad_norm": 0.3338108956813812, "learning_rate": 2.4096379561415255e-05, "loss": 0.0679, "step": 53060 }, { "epoch": 3.13893653516295, "grad_norm": 0.3166361153125763, "learning_rate": 2.4094094589839952e-05, "loss": 0.0528, "step": 53070 }, { "epoch": 3.1395280061512985, "grad_norm": 0.2830023467540741, "learning_rate": 2.4091809284540674e-05, "loss": 0.0784, "step": 53080 }, { "epoch": 3.1401194771396463, "grad_norm": 0.3633255362510681, "learning_rate": 2.4089523645601292e-05, "loss": 0.0705, "step": 53090 }, { "epoch": 3.1407109481279942, "grad_norm": 0.2885197103023529, "learning_rate": 2.4087237673105675e-05, "loss": 0.0685, "step": 53100 }, { "epoch": 3.141302419116342, "grad_norm": 0.22423449158668518, "learning_rate": 2.4084951367137717e-05, "loss": 0.0598, "step": 53110 }, { "epoch": 3.1418938901046904, "grad_norm": 0.6736089587211609, "learning_rate": 2.4082664727781304e-05, "loss": 0.062, "step": 53120 }, { "epoch": 3.1424853610930383, "grad_norm": 0.38119134306907654, "learning_rate": 2.4080377755120367e-05, "loss": 0.0679, "step": 53130 }, { "epoch": 3.143076832081386, "grad_norm": 0.20114581286907196, "learning_rate": 2.407809044923882e-05, "loss": 0.0723, "step": 53140 }, { "epoch": 3.1436683030697345, "grad_norm": 0.3819166123867035, "learning_rate": 2.4075802810220597e-05, "loss": 0.0695, "step": 53150 }, { "epoch": 3.1442597740580824, "grad_norm": 0.33529677987098694, "learning_rate": 2.4073514838149656e-05, "loss": 0.0567, "step": 53160 }, { "epoch": 3.1448512450464303, "grad_norm": 0.21879087388515472, "learning_rate": 2.4071226533109952e-05, "loss": 0.0571, "step": 53170 }, { "epoch": 3.1454427160347787, "grad_norm": 0.3126198351383209, "learning_rate": 2.4068937895185456e-05, "loss": 0.0746, "step": 53180 }, { "epoch": 3.1460341870231265, "grad_norm": 0.32871466875076294, "learning_rate": 2.406664892446016e-05, "loss": 0.0807, "step": 53190 }, { "epoch": 3.1466256580114744, "grad_norm": 0.3245089650154114, "learning_rate": 2.4064359621018066e-05, "loss": 0.0614, "step": 53200 }, { "epoch": 3.1472171289998228, "grad_norm": 0.3937760591506958, "learning_rate": 2.406206998494317e-05, "loss": 0.0636, "step": 53210 }, { "epoch": 3.1478085999881706, "grad_norm": 0.26020827889442444, "learning_rate": 2.4059780016319504e-05, "loss": 0.0513, "step": 53220 }, { "epoch": 3.1484000709765185, "grad_norm": 0.24382169544696808, "learning_rate": 2.40574897152311e-05, "loss": 0.0787, "step": 53230 }, { "epoch": 3.1489915419648664, "grad_norm": 0.24321861565113068, "learning_rate": 2.4055199081762004e-05, "loss": 0.0749, "step": 53240 }, { "epoch": 3.1495830129532147, "grad_norm": 0.28173813223838806, "learning_rate": 2.4052908115996276e-05, "loss": 0.062, "step": 53250 }, { "epoch": 3.1501744839415626, "grad_norm": 0.23556962609291077, "learning_rate": 2.4050616818017993e-05, "loss": 0.0661, "step": 53260 }, { "epoch": 3.1507659549299105, "grad_norm": 0.1686282902956009, "learning_rate": 2.4048325187911223e-05, "loss": 0.0566, "step": 53270 }, { "epoch": 3.151357425918259, "grad_norm": 0.438043475151062, "learning_rate": 2.4046033225760074e-05, "loss": 0.0751, "step": 53280 }, { "epoch": 3.1519488969066067, "grad_norm": 0.3283003270626068, "learning_rate": 2.4043740931648653e-05, "loss": 0.0754, "step": 53290 }, { "epoch": 3.1525403678949546, "grad_norm": 0.24636635184288025, "learning_rate": 2.4041448305661073e-05, "loss": 0.0685, "step": 53300 }, { "epoch": 3.153131838883303, "grad_norm": 0.3401607275009155, "learning_rate": 2.4039155347881468e-05, "loss": 0.064, "step": 53310 }, { "epoch": 3.153723309871651, "grad_norm": 0.2686995565891266, "learning_rate": 2.403686205839399e-05, "loss": 0.0614, "step": 53320 }, { "epoch": 3.1543147808599987, "grad_norm": 0.30774030089378357, "learning_rate": 2.403456843728279e-05, "loss": 0.0842, "step": 53330 }, { "epoch": 3.154906251848347, "grad_norm": 0.3229697644710541, "learning_rate": 2.4032274484632028e-05, "loss": 0.069, "step": 53340 }, { "epoch": 3.155497722836695, "grad_norm": 0.280492901802063, "learning_rate": 2.40299802005259e-05, "loss": 0.0693, "step": 53350 }, { "epoch": 3.156089193825043, "grad_norm": 0.37666019797325134, "learning_rate": 2.4027685585048587e-05, "loss": 0.0624, "step": 53360 }, { "epoch": 3.1566806648133907, "grad_norm": 0.18529656529426575, "learning_rate": 2.4025390638284298e-05, "loss": 0.0454, "step": 53370 }, { "epoch": 3.157272135801739, "grad_norm": 0.2512343227863312, "learning_rate": 2.4023095360317255e-05, "loss": 0.0763, "step": 53380 }, { "epoch": 3.157863606790087, "grad_norm": 0.2861296534538269, "learning_rate": 2.402079975123168e-05, "loss": 0.0774, "step": 53390 }, { "epoch": 3.158455077778435, "grad_norm": 0.3045980930328369, "learning_rate": 2.401850381111182e-05, "loss": 0.0743, "step": 53400 }, { "epoch": 3.159046548766783, "grad_norm": 0.25382643938064575, "learning_rate": 2.4016207540041923e-05, "loss": 0.0625, "step": 53410 }, { "epoch": 3.159638019755131, "grad_norm": 0.2520484924316406, "learning_rate": 2.401391093810626e-05, "loss": 0.056, "step": 53420 }, { "epoch": 3.160229490743479, "grad_norm": 0.22031886875629425, "learning_rate": 2.4011614005389105e-05, "loss": 0.0726, "step": 53430 }, { "epoch": 3.1608209617318272, "grad_norm": 0.3155290484428406, "learning_rate": 2.400931674197475e-05, "loss": 0.0837, "step": 53440 }, { "epoch": 3.161412432720175, "grad_norm": 0.267925888299942, "learning_rate": 2.40070191479475e-05, "loss": 0.0688, "step": 53450 }, { "epoch": 3.162003903708523, "grad_norm": 0.6356840133666992, "learning_rate": 2.400472122339166e-05, "loss": 0.0673, "step": 53460 }, { "epoch": 3.1625953746968714, "grad_norm": 0.23914793133735657, "learning_rate": 2.4002422968391565e-05, "loss": 0.0512, "step": 53470 }, { "epoch": 3.1631868456852192, "grad_norm": 0.24778881669044495, "learning_rate": 2.4000124383031555e-05, "loss": 0.0841, "step": 53480 }, { "epoch": 3.163778316673567, "grad_norm": 0.26924657821655273, "learning_rate": 2.3997825467395973e-05, "loss": 0.0671, "step": 53490 }, { "epoch": 3.164369787661915, "grad_norm": 0.2772112190723419, "learning_rate": 2.399552622156918e-05, "loss": 0.0698, "step": 53500 }, { "epoch": 3.1649612586502633, "grad_norm": 0.23510275781154633, "learning_rate": 2.3993226645635567e-05, "loss": 0.0628, "step": 53510 }, { "epoch": 3.1655527296386112, "grad_norm": 0.1949782520532608, "learning_rate": 2.3990926739679506e-05, "loss": 0.0564, "step": 53520 }, { "epoch": 3.166144200626959, "grad_norm": 0.2875596880912781, "learning_rate": 2.39886265037854e-05, "loss": 0.068, "step": 53530 }, { "epoch": 3.1667356716153074, "grad_norm": 0.3077613413333893, "learning_rate": 2.3986325938037662e-05, "loss": 0.0724, "step": 53540 }, { "epoch": 3.1673271426036553, "grad_norm": 0.23052120208740234, "learning_rate": 2.3984025042520713e-05, "loss": 0.0652, "step": 53550 }, { "epoch": 3.167918613592003, "grad_norm": 0.22753475606441498, "learning_rate": 2.398172381731899e-05, "loss": 0.066, "step": 53560 }, { "epoch": 3.168510084580351, "grad_norm": 0.29081082344055176, "learning_rate": 2.397942226251694e-05, "loss": 0.0542, "step": 53570 }, { "epoch": 3.1691015555686994, "grad_norm": 0.23962120711803436, "learning_rate": 2.3977120378199028e-05, "loss": 0.0777, "step": 53580 }, { "epoch": 3.1696930265570473, "grad_norm": 0.28926655650138855, "learning_rate": 2.3974818164449714e-05, "loss": 0.0722, "step": 53590 }, { "epoch": 3.170284497545395, "grad_norm": 0.4123786687850952, "learning_rate": 2.397251562135349e-05, "loss": 0.0743, "step": 53600 }, { "epoch": 3.1708759685337435, "grad_norm": 0.3178366720676422, "learning_rate": 2.3970212748994853e-05, "loss": 0.0728, "step": 53610 }, { "epoch": 3.1714674395220914, "grad_norm": 0.18073870241641998, "learning_rate": 2.3967909547458305e-05, "loss": 0.051, "step": 53620 }, { "epoch": 3.1720589105104393, "grad_norm": 0.2837464511394501, "learning_rate": 2.3965606016828372e-05, "loss": 0.0768, "step": 53630 }, { "epoch": 3.1726503814987876, "grad_norm": 0.2802674472332001, "learning_rate": 2.3963302157189583e-05, "loss": 0.0737, "step": 53640 }, { "epoch": 3.1732418524871355, "grad_norm": 0.24866187572479248, "learning_rate": 2.396099796862648e-05, "loss": 0.0757, "step": 53650 }, { "epoch": 3.1738333234754834, "grad_norm": 0.23850663006305695, "learning_rate": 2.395869345122362e-05, "loss": 0.0717, "step": 53660 }, { "epoch": 3.1744247944638317, "grad_norm": 0.23324789106845856, "learning_rate": 2.395638860506558e-05, "loss": 0.0579, "step": 53670 }, { "epoch": 3.1750162654521796, "grad_norm": 0.39138102531433105, "learning_rate": 2.3954083430236936e-05, "loss": 0.0755, "step": 53680 }, { "epoch": 3.1756077364405275, "grad_norm": 0.2196347564458847, "learning_rate": 2.3951777926822272e-05, "loss": 0.0732, "step": 53690 }, { "epoch": 3.1761992074288754, "grad_norm": 0.28577831387519836, "learning_rate": 2.39494720949062e-05, "loss": 0.0748, "step": 53700 }, { "epoch": 3.1767906784172237, "grad_norm": 0.26994770765304565, "learning_rate": 2.394716593457334e-05, "loss": 0.0662, "step": 53710 }, { "epoch": 3.1773821494055716, "grad_norm": 0.2780914306640625, "learning_rate": 2.3944859445908308e-05, "loss": 0.0562, "step": 53720 }, { "epoch": 3.1779736203939195, "grad_norm": 0.7561632394790649, "learning_rate": 2.394255262899576e-05, "loss": 0.073, "step": 53730 }, { "epoch": 3.178565091382268, "grad_norm": 0.31493499875068665, "learning_rate": 2.394024548392034e-05, "loss": 0.0745, "step": 53740 }, { "epoch": 3.1791565623706157, "grad_norm": 0.21772153675556183, "learning_rate": 2.3937938010766714e-05, "loss": 0.0632, "step": 53750 }, { "epoch": 3.1797480333589636, "grad_norm": 0.29410403966903687, "learning_rate": 2.393563020961956e-05, "loss": 0.0663, "step": 53760 }, { "epoch": 3.180339504347312, "grad_norm": 0.33348768949508667, "learning_rate": 2.3933322080563564e-05, "loss": 0.0578, "step": 53770 }, { "epoch": 3.18093097533566, "grad_norm": 0.2799113094806671, "learning_rate": 2.393101362368343e-05, "loss": 0.0818, "step": 53780 }, { "epoch": 3.1815224463240077, "grad_norm": 0.324078768491745, "learning_rate": 2.392870483906387e-05, "loss": 0.0818, "step": 53790 }, { "epoch": 3.182113917312356, "grad_norm": 0.2335120588541031, "learning_rate": 2.3926395726789612e-05, "loss": 0.0646, "step": 53800 }, { "epoch": 3.182705388300704, "grad_norm": 0.25261929631233215, "learning_rate": 2.3924086286945386e-05, "loss": 0.0605, "step": 53810 }, { "epoch": 3.183296859289052, "grad_norm": 0.7106374502182007, "learning_rate": 2.3921776519615946e-05, "loss": 0.0617, "step": 53820 }, { "epoch": 3.1838883302773997, "grad_norm": 0.21020656824111938, "learning_rate": 2.3919466424886052e-05, "loss": 0.0698, "step": 53830 }, { "epoch": 3.184479801265748, "grad_norm": 0.4287221431732178, "learning_rate": 2.3917156002840476e-05, "loss": 0.0736, "step": 53840 }, { "epoch": 3.185071272254096, "grad_norm": 0.2901339530944824, "learning_rate": 2.3914845253564005e-05, "loss": 0.0756, "step": 53850 }, { "epoch": 3.185662743242444, "grad_norm": 0.1969919502735138, "learning_rate": 2.391253417714144e-05, "loss": 0.0645, "step": 53860 }, { "epoch": 3.186254214230792, "grad_norm": 0.2254004329442978, "learning_rate": 2.391022277365758e-05, "loss": 0.0589, "step": 53870 }, { "epoch": 3.18684568521914, "grad_norm": 0.4160335063934326, "learning_rate": 2.390791104319725e-05, "loss": 0.0684, "step": 53880 }, { "epoch": 3.187437156207488, "grad_norm": 0.3453950583934784, "learning_rate": 2.3905598985845284e-05, "loss": 0.0778, "step": 53890 }, { "epoch": 3.1880286271958362, "grad_norm": 0.38070234656333923, "learning_rate": 2.390328660168653e-05, "loss": 0.0659, "step": 53900 }, { "epoch": 3.188620098184184, "grad_norm": 0.2258126437664032, "learning_rate": 2.3900973890805838e-05, "loss": 0.0699, "step": 53910 }, { "epoch": 3.189211569172532, "grad_norm": 0.22162677347660065, "learning_rate": 2.3898660853288085e-05, "loss": 0.0488, "step": 53920 }, { "epoch": 3.1898030401608803, "grad_norm": 0.2704688012599945, "learning_rate": 2.389634748921815e-05, "loss": 0.0705, "step": 53930 }, { "epoch": 3.1903945111492282, "grad_norm": 0.3158307671546936, "learning_rate": 2.3894033798680924e-05, "loss": 0.0733, "step": 53940 }, { "epoch": 3.190985982137576, "grad_norm": 0.22324447333812714, "learning_rate": 2.3891719781761312e-05, "loss": 0.0706, "step": 53950 }, { "epoch": 3.191577453125924, "grad_norm": 0.21770168840885162, "learning_rate": 2.3889405438544233e-05, "loss": 0.054, "step": 53960 }, { "epoch": 3.1921689241142723, "grad_norm": 0.35470908880233765, "learning_rate": 2.388709076911461e-05, "loss": 0.055, "step": 53970 }, { "epoch": 3.19276039510262, "grad_norm": 0.3380069434642792, "learning_rate": 2.3884775773557386e-05, "loss": 0.0781, "step": 53980 }, { "epoch": 3.193351866090968, "grad_norm": 0.2248462438583374, "learning_rate": 2.388246045195752e-05, "loss": 0.0681, "step": 53990 }, { "epoch": 3.1939433370793164, "grad_norm": 0.3032626211643219, "learning_rate": 2.3880144804399972e-05, "loss": 0.065, "step": 54000 }, { "epoch": 3.1945348080676643, "grad_norm": 0.266975462436676, "learning_rate": 2.3877828830969727e-05, "loss": 0.0616, "step": 54010 }, { "epoch": 3.195126279056012, "grad_norm": 0.3458475172519684, "learning_rate": 2.3875512531751755e-05, "loss": 0.059, "step": 54020 }, { "epoch": 3.19571775004436, "grad_norm": 0.209462970495224, "learning_rate": 2.3873195906831075e-05, "loss": 0.0727, "step": 54030 }, { "epoch": 3.1963092210327084, "grad_norm": 0.22675706446170807, "learning_rate": 2.387087895629269e-05, "loss": 0.0694, "step": 54040 }, { "epoch": 3.1969006920210563, "grad_norm": 0.2630390226840973, "learning_rate": 2.386856168022163e-05, "loss": 0.0655, "step": 54050 }, { "epoch": 3.197492163009404, "grad_norm": 0.35981231927871704, "learning_rate": 2.386624407870293e-05, "loss": 0.0632, "step": 54060 }, { "epoch": 3.1980836339977525, "grad_norm": 0.2902248799800873, "learning_rate": 2.3863926151821638e-05, "loss": 0.061, "step": 54070 }, { "epoch": 3.1986751049861004, "grad_norm": 0.29207733273506165, "learning_rate": 2.386160789966281e-05, "loss": 0.0685, "step": 54080 }, { "epoch": 3.1992665759744483, "grad_norm": 0.25533249974250793, "learning_rate": 2.385928932231152e-05, "loss": 0.0688, "step": 54090 }, { "epoch": 3.1998580469627966, "grad_norm": 0.29362940788269043, "learning_rate": 2.3856970419852863e-05, "loss": 0.0687, "step": 54100 }, { "epoch": 3.2004495179511445, "grad_norm": 0.2591809630393982, "learning_rate": 2.3854651192371922e-05, "loss": 0.0624, "step": 54110 }, { "epoch": 3.2010409889394924, "grad_norm": 0.24161329865455627, "learning_rate": 2.3852331639953812e-05, "loss": 0.0564, "step": 54120 }, { "epoch": 3.2016324599278407, "grad_norm": 0.47037121653556824, "learning_rate": 2.3850011762683645e-05, "loss": 0.0769, "step": 54130 }, { "epoch": 3.2022239309161886, "grad_norm": 0.44197186827659607, "learning_rate": 2.384769156064657e-05, "loss": 0.0742, "step": 54140 }, { "epoch": 3.2028154019045365, "grad_norm": 0.18699391186237335, "learning_rate": 2.3845371033927713e-05, "loss": 0.0679, "step": 54150 }, { "epoch": 3.2034068728928844, "grad_norm": 0.23201517760753632, "learning_rate": 2.384305018261224e-05, "loss": 0.0566, "step": 54160 }, { "epoch": 3.2039983438812327, "grad_norm": 0.11439193040132523, "learning_rate": 2.3840729006785313e-05, "loss": 0.0604, "step": 54170 }, { "epoch": 3.2045898148695806, "grad_norm": 0.4170629382133484, "learning_rate": 2.3838407506532117e-05, "loss": 0.0788, "step": 54180 }, { "epoch": 3.2051812858579285, "grad_norm": 0.2382897436618805, "learning_rate": 2.383608568193784e-05, "loss": 0.0702, "step": 54190 }, { "epoch": 3.205772756846277, "grad_norm": 0.22828860580921173, "learning_rate": 2.3833763533087688e-05, "loss": 0.0722, "step": 54200 }, { "epoch": 3.2063642278346247, "grad_norm": 0.30413296818733215, "learning_rate": 2.3831441060066876e-05, "loss": 0.0633, "step": 54210 }, { "epoch": 3.2069556988229726, "grad_norm": 0.1978887915611267, "learning_rate": 2.3829118262960624e-05, "loss": 0.0561, "step": 54220 }, { "epoch": 3.207547169811321, "grad_norm": 0.28523510694503784, "learning_rate": 2.3826795141854186e-05, "loss": 0.0758, "step": 54230 }, { "epoch": 3.208138640799669, "grad_norm": 0.2433222383260727, "learning_rate": 2.38244716968328e-05, "loss": 0.0724, "step": 54240 }, { "epoch": 3.2087301117880167, "grad_norm": 0.30036863684654236, "learning_rate": 2.3822147927981733e-05, "loss": 0.0652, "step": 54250 }, { "epoch": 3.209321582776365, "grad_norm": 0.22361835837364197, "learning_rate": 2.381982383538626e-05, "loss": 0.0641, "step": 54260 }, { "epoch": 3.209913053764713, "grad_norm": 0.24581168591976166, "learning_rate": 2.3817499419131673e-05, "loss": 0.0508, "step": 54270 }, { "epoch": 3.210504524753061, "grad_norm": 0.30424410104751587, "learning_rate": 2.3815174679303257e-05, "loss": 0.0753, "step": 54280 }, { "epoch": 3.2110959957414087, "grad_norm": 0.3424946963787079, "learning_rate": 2.3812849615986334e-05, "loss": 0.0848, "step": 54290 }, { "epoch": 3.211687466729757, "grad_norm": 0.4516679048538208, "learning_rate": 2.3810524229266226e-05, "loss": 0.0922, "step": 54300 }, { "epoch": 3.212278937718105, "grad_norm": 0.2771846055984497, "learning_rate": 2.3808198519228267e-05, "loss": 0.0662, "step": 54310 }, { "epoch": 3.212870408706453, "grad_norm": 0.24118655920028687, "learning_rate": 2.3805872485957793e-05, "loss": 0.0677, "step": 54320 }, { "epoch": 3.213461879694801, "grad_norm": 0.26580238342285156, "learning_rate": 2.3803546129540173e-05, "loss": 0.07, "step": 54330 }, { "epoch": 3.214053350683149, "grad_norm": 0.25900065898895264, "learning_rate": 2.3801219450060772e-05, "loss": 0.0721, "step": 54340 }, { "epoch": 3.214644821671497, "grad_norm": 0.33028215169906616, "learning_rate": 2.379889244760498e-05, "loss": 0.0698, "step": 54350 }, { "epoch": 3.2152362926598452, "grad_norm": 0.3582227826118469, "learning_rate": 2.379656512225818e-05, "loss": 0.0618, "step": 54360 }, { "epoch": 3.215827763648193, "grad_norm": 0.20752385258674622, "learning_rate": 2.379423747410578e-05, "loss": 0.061, "step": 54370 }, { "epoch": 3.216419234636541, "grad_norm": 0.3374687135219574, "learning_rate": 2.3791909503233194e-05, "loss": 0.0733, "step": 54380 }, { "epoch": 3.2170107056248893, "grad_norm": 0.18826815485954285, "learning_rate": 2.378958120972586e-05, "loss": 0.0587, "step": 54390 }, { "epoch": 3.217602176613237, "grad_norm": 0.25722238421440125, "learning_rate": 2.378725259366921e-05, "loss": 0.0717, "step": 54400 }, { "epoch": 3.218193647601585, "grad_norm": 0.23555637896060944, "learning_rate": 2.3784923655148705e-05, "loss": 0.0569, "step": 54410 }, { "epoch": 3.218785118589933, "grad_norm": 0.27687713503837585, "learning_rate": 2.37825943942498e-05, "loss": 0.048, "step": 54420 }, { "epoch": 3.2193765895782813, "grad_norm": 0.7064971327781677, "learning_rate": 2.3780264811057982e-05, "loss": 0.0714, "step": 54430 }, { "epoch": 3.219968060566629, "grad_norm": 0.3972127139568329, "learning_rate": 2.3777934905658728e-05, "loss": 0.0789, "step": 54440 }, { "epoch": 3.220559531554977, "grad_norm": 0.21676336228847504, "learning_rate": 2.3775604678137546e-05, "loss": 0.0703, "step": 54450 }, { "epoch": 3.2211510025433254, "grad_norm": 0.2231106460094452, "learning_rate": 2.377327412857995e-05, "loss": 0.0682, "step": 54460 }, { "epoch": 3.2217424735316733, "grad_norm": 0.21732227504253387, "learning_rate": 2.3770943257071454e-05, "loss": 0.0545, "step": 54470 }, { "epoch": 3.222333944520021, "grad_norm": 0.30430877208709717, "learning_rate": 2.37686120636976e-05, "loss": 0.0768, "step": 54480 }, { "epoch": 3.222925415508369, "grad_norm": 0.36383214592933655, "learning_rate": 2.3766280548543936e-05, "loss": 0.0632, "step": 54490 }, { "epoch": 3.2235168864967174, "grad_norm": 0.2536865472793579, "learning_rate": 2.3763948711696018e-05, "loss": 0.0634, "step": 54500 }, { "epoch": 3.2241083574850653, "grad_norm": 0.2646339237689972, "learning_rate": 2.3761616553239417e-05, "loss": 0.0739, "step": 54510 }, { "epoch": 3.224699828473413, "grad_norm": 0.2419830709695816, "learning_rate": 2.3759284073259718e-05, "loss": 0.0586, "step": 54520 }, { "epoch": 3.2252912994617615, "grad_norm": 0.17413753271102905, "learning_rate": 2.3756951271842515e-05, "loss": 0.0756, "step": 54530 }, { "epoch": 3.2258827704501094, "grad_norm": 0.3134021461009979, "learning_rate": 2.375461814907341e-05, "loss": 0.0751, "step": 54540 }, { "epoch": 3.2264742414384573, "grad_norm": 0.2611316442489624, "learning_rate": 2.3752284705038024e-05, "loss": 0.0646, "step": 54550 }, { "epoch": 3.2270657124268056, "grad_norm": 0.2348349541425705, "learning_rate": 2.374995093982199e-05, "loss": 0.0729, "step": 54560 }, { "epoch": 3.2276571834151535, "grad_norm": 0.22626115381717682, "learning_rate": 2.3747616853510947e-05, "loss": 0.0474, "step": 54570 }, { "epoch": 3.2282486544035014, "grad_norm": 0.2225128412246704, "learning_rate": 2.3745282446190548e-05, "loss": 0.0757, "step": 54580 }, { "epoch": 3.2288401253918497, "grad_norm": 0.2144184559583664, "learning_rate": 2.3742947717946462e-05, "loss": 0.0645, "step": 54590 }, { "epoch": 3.2294315963801976, "grad_norm": 0.5779950618743896, "learning_rate": 2.3740612668864358e-05, "loss": 0.0689, "step": 54600 }, { "epoch": 3.2300230673685455, "grad_norm": 0.2622970938682556, "learning_rate": 2.3738277299029935e-05, "loss": 0.0599, "step": 54610 }, { "epoch": 3.2306145383568934, "grad_norm": 0.2843441069126129, "learning_rate": 2.3735941608528886e-05, "loss": 0.0578, "step": 54620 }, { "epoch": 3.2312060093452417, "grad_norm": 0.32057851552963257, "learning_rate": 2.373360559744692e-05, "loss": 0.0713, "step": 54630 }, { "epoch": 3.2317974803335896, "grad_norm": 0.3333110213279724, "learning_rate": 2.3731269265869774e-05, "loss": 0.0758, "step": 54640 }, { "epoch": 3.2323889513219375, "grad_norm": 0.21298569440841675, "learning_rate": 2.372893261388317e-05, "loss": 0.0771, "step": 54650 }, { "epoch": 3.232980422310286, "grad_norm": 0.3640173077583313, "learning_rate": 2.372659564157287e-05, "loss": 0.0606, "step": 54660 }, { "epoch": 3.2335718932986337, "grad_norm": 0.24030165374279022, "learning_rate": 2.372425834902462e-05, "loss": 0.0595, "step": 54670 }, { "epoch": 3.2341633642869816, "grad_norm": 0.3276984691619873, "learning_rate": 2.3721920736324195e-05, "loss": 0.0795, "step": 54680 }, { "epoch": 3.23475483527533, "grad_norm": 0.2046201527118683, "learning_rate": 2.3719582803557382e-05, "loss": 0.0738, "step": 54690 }, { "epoch": 3.235346306263678, "grad_norm": 0.38023680448532104, "learning_rate": 2.371724455080997e-05, "loss": 0.0643, "step": 54700 }, { "epoch": 3.2359377772520257, "grad_norm": 0.24305787682533264, "learning_rate": 2.3714905978167772e-05, "loss": 0.0671, "step": 54710 }, { "epoch": 3.236529248240374, "grad_norm": 0.3471966087818146, "learning_rate": 2.37125670857166e-05, "loss": 0.0706, "step": 54720 }, { "epoch": 3.237120719228722, "grad_norm": 0.24028123915195465, "learning_rate": 2.371022787354229e-05, "loss": 0.0804, "step": 54730 }, { "epoch": 3.23771219021707, "grad_norm": 0.32224321365356445, "learning_rate": 2.3707888341730673e-05, "loss": 0.0763, "step": 54740 }, { "epoch": 3.2383036612054177, "grad_norm": 0.2676287293434143, "learning_rate": 2.3705548490367617e-05, "loss": 0.0708, "step": 54750 }, { "epoch": 3.238895132193766, "grad_norm": 0.24476931989192963, "learning_rate": 2.370320831953897e-05, "loss": 0.0603, "step": 54760 }, { "epoch": 3.239486603182114, "grad_norm": 0.18841753900051117, "learning_rate": 2.370086782933062e-05, "loss": 0.0616, "step": 54770 }, { "epoch": 3.240078074170462, "grad_norm": 0.30466222763061523, "learning_rate": 2.369852701982846e-05, "loss": 0.0821, "step": 54780 }, { "epoch": 3.24066954515881, "grad_norm": 0.30774497985839844, "learning_rate": 2.3696185891118377e-05, "loss": 0.0712, "step": 54790 }, { "epoch": 3.241261016147158, "grad_norm": 0.23425737023353577, "learning_rate": 2.369384444328629e-05, "loss": 0.0637, "step": 54800 }, { "epoch": 3.241852487135506, "grad_norm": 0.21024344861507416, "learning_rate": 2.3691502676418123e-05, "loss": 0.0494, "step": 54810 }, { "epoch": 3.242443958123854, "grad_norm": 0.2060362696647644, "learning_rate": 2.368916059059981e-05, "loss": 0.0619, "step": 54820 }, { "epoch": 3.243035429112202, "grad_norm": 0.238154336810112, "learning_rate": 2.3686818185917302e-05, "loss": 0.0733, "step": 54830 }, { "epoch": 3.24362690010055, "grad_norm": 0.30010560154914856, "learning_rate": 2.368447546245654e-05, "loss": 0.0676, "step": 54840 }, { "epoch": 3.2442183710888983, "grad_norm": 0.26899006962776184, "learning_rate": 2.3682132420303528e-05, "loss": 0.0716, "step": 54850 }, { "epoch": 3.244809842077246, "grad_norm": 0.2674799859523773, "learning_rate": 2.3679789059544214e-05, "loss": 0.0649, "step": 54860 }, { "epoch": 3.245401313065594, "grad_norm": 0.19851289689540863, "learning_rate": 2.3677445380264608e-05, "loss": 0.0503, "step": 54870 }, { "epoch": 3.245992784053942, "grad_norm": 0.27190056443214417, "learning_rate": 2.367510138255072e-05, "loss": 0.0768, "step": 54880 }, { "epoch": 3.2465842550422903, "grad_norm": 0.25922203063964844, "learning_rate": 2.3672757066488553e-05, "loss": 0.0639, "step": 54890 }, { "epoch": 3.247175726030638, "grad_norm": 0.354257196187973, "learning_rate": 2.3670412432164147e-05, "loss": 0.0778, "step": 54900 }, { "epoch": 3.247767197018986, "grad_norm": 0.1591290980577469, "learning_rate": 2.3668067479663538e-05, "loss": 0.057, "step": 54910 }, { "epoch": 3.2483586680073344, "grad_norm": 0.18159203231334686, "learning_rate": 2.3665722209072783e-05, "loss": 0.0585, "step": 54920 }, { "epoch": 3.2489501389956823, "grad_norm": 0.3201538920402527, "learning_rate": 2.3663376620477938e-05, "loss": 0.077, "step": 54930 }, { "epoch": 3.24954160998403, "grad_norm": 0.2977270185947418, "learning_rate": 2.3661030713965082e-05, "loss": 0.0743, "step": 54940 }, { "epoch": 3.250133080972378, "grad_norm": 0.29676949977874756, "learning_rate": 2.365868448962031e-05, "loss": 0.0701, "step": 54950 }, { "epoch": 3.2507245519607264, "grad_norm": 0.1769467443227768, "learning_rate": 2.3656337947529707e-05, "loss": 0.0625, "step": 54960 }, { "epoch": 3.2513160229490743, "grad_norm": 0.37505894899368286, "learning_rate": 2.3653991087779393e-05, "loss": 0.0623, "step": 54970 }, { "epoch": 3.251907493937422, "grad_norm": 0.22130617499351501, "learning_rate": 2.3651643910455484e-05, "loss": 0.0839, "step": 54980 }, { "epoch": 3.2524989649257705, "grad_norm": 0.3155069053173065, "learning_rate": 2.3649296415644125e-05, "loss": 0.0688, "step": 54990 }, { "epoch": 3.2530904359141184, "grad_norm": 0.3583519458770752, "learning_rate": 2.3646948603431445e-05, "loss": 0.071, "step": 55000 }, { "epoch": 3.2536819069024663, "grad_norm": 0.3026479482650757, "learning_rate": 2.3644600473903615e-05, "loss": 0.0593, "step": 55010 }, { "epoch": 3.2542733778908146, "grad_norm": 0.39991119503974915, "learning_rate": 2.36422520271468e-05, "loss": 0.065, "step": 55020 }, { "epoch": 3.2548648488791625, "grad_norm": 0.21950456500053406, "learning_rate": 2.3639903263247176e-05, "loss": 0.0788, "step": 55030 }, { "epoch": 3.2554563198675104, "grad_norm": 0.41473904252052307, "learning_rate": 2.363755418229094e-05, "loss": 0.0825, "step": 55040 }, { "epoch": 3.2560477908558587, "grad_norm": 0.23805676400661469, "learning_rate": 2.3635204784364294e-05, "loss": 0.0655, "step": 55050 }, { "epoch": 3.2566392618442066, "grad_norm": 0.3118124306201935, "learning_rate": 2.3632855069553452e-05, "loss": 0.0508, "step": 55060 }, { "epoch": 3.2572307328325545, "grad_norm": 0.2209084928035736, "learning_rate": 2.363050503794464e-05, "loss": 0.0581, "step": 55070 }, { "epoch": 3.2578222038209024, "grad_norm": 0.36786964535713196, "learning_rate": 2.3628154689624108e-05, "loss": 0.0924, "step": 55080 }, { "epoch": 3.2584136748092507, "grad_norm": 0.3091426193714142, "learning_rate": 2.3625804024678086e-05, "loss": 0.068, "step": 55090 }, { "epoch": 3.2590051457975986, "grad_norm": 0.2595883011817932, "learning_rate": 2.3623453043192853e-05, "loss": 0.0725, "step": 55100 }, { "epoch": 3.2595966167859465, "grad_norm": 0.25896593928337097, "learning_rate": 2.3621101745254677e-05, "loss": 0.0691, "step": 55110 }, { "epoch": 3.260188087774295, "grad_norm": 0.25016531348228455, "learning_rate": 2.3618750130949838e-05, "loss": 0.0612, "step": 55120 }, { "epoch": 3.2607795587626427, "grad_norm": 0.27234113216400146, "learning_rate": 2.3616398200364642e-05, "loss": 0.0722, "step": 55130 }, { "epoch": 3.2613710297509906, "grad_norm": 0.2442077249288559, "learning_rate": 2.3614045953585387e-05, "loss": 0.0798, "step": 55140 }, { "epoch": 3.261962500739339, "grad_norm": 0.23006390035152435, "learning_rate": 2.3611693390698406e-05, "loss": 0.077, "step": 55150 }, { "epoch": 3.262553971727687, "grad_norm": 0.28582534193992615, "learning_rate": 2.3609340511790016e-05, "loss": 0.0698, "step": 55160 }, { "epoch": 3.2631454427160347, "grad_norm": 0.15524055063724518, "learning_rate": 2.360698731694657e-05, "loss": 0.0524, "step": 55170 }, { "epoch": 3.263736913704383, "grad_norm": 0.3152953088283539, "learning_rate": 2.3604633806254418e-05, "loss": 0.0798, "step": 55180 }, { "epoch": 3.264328384692731, "grad_norm": 0.2751936912536621, "learning_rate": 2.360227997979993e-05, "loss": 0.0698, "step": 55190 }, { "epoch": 3.264919855681079, "grad_norm": 0.21918001770973206, "learning_rate": 2.359992583766948e-05, "loss": 0.0622, "step": 55200 }, { "epoch": 3.2655113266694267, "grad_norm": 0.33682572841644287, "learning_rate": 2.359757137994946e-05, "loss": 0.0641, "step": 55210 }, { "epoch": 3.266102797657775, "grad_norm": 0.21547679603099823, "learning_rate": 2.359521660672627e-05, "loss": 0.0567, "step": 55220 }, { "epoch": 3.266694268646123, "grad_norm": 0.44988059997558594, "learning_rate": 2.359286151808632e-05, "loss": 0.0761, "step": 55230 }, { "epoch": 3.2672857396344708, "grad_norm": 0.27099478244781494, "learning_rate": 2.359050611411604e-05, "loss": 0.0743, "step": 55240 }, { "epoch": 3.267877210622819, "grad_norm": 0.26745033264160156, "learning_rate": 2.358815039490186e-05, "loss": 0.071, "step": 55250 }, { "epoch": 3.268468681611167, "grad_norm": 0.220061793923378, "learning_rate": 2.3585794360530227e-05, "loss": 0.0545, "step": 55260 }, { "epoch": 3.269060152599515, "grad_norm": 0.47135332226753235, "learning_rate": 2.358343801108761e-05, "loss": 0.0526, "step": 55270 }, { "epoch": 3.269651623587863, "grad_norm": 0.3234444558620453, "learning_rate": 2.358108134666047e-05, "loss": 0.0738, "step": 55280 }, { "epoch": 3.270243094576211, "grad_norm": 0.2848387658596039, "learning_rate": 2.357872436733529e-05, "loss": 0.0727, "step": 55290 }, { "epoch": 3.270834565564559, "grad_norm": 0.26846879720687866, "learning_rate": 2.357636707319856e-05, "loss": 0.0707, "step": 55300 }, { "epoch": 3.2714260365529073, "grad_norm": 0.2949610948562622, "learning_rate": 2.3574009464336797e-05, "loss": 0.064, "step": 55310 }, { "epoch": 3.272017507541255, "grad_norm": 0.2659359276294708, "learning_rate": 2.357165154083651e-05, "loss": 0.055, "step": 55320 }, { "epoch": 3.272608978529603, "grad_norm": 0.32329946756362915, "learning_rate": 2.356929330278422e-05, "loss": 0.0778, "step": 55330 }, { "epoch": 3.273200449517951, "grad_norm": 0.25088033080101013, "learning_rate": 2.3566934750266486e-05, "loss": 0.0664, "step": 55340 }, { "epoch": 3.2737919205062993, "grad_norm": 0.23783057928085327, "learning_rate": 2.356457588336984e-05, "loss": 0.0586, "step": 55350 }, { "epoch": 3.274383391494647, "grad_norm": 0.23781508207321167, "learning_rate": 2.3562216702180854e-05, "loss": 0.0598, "step": 55360 }, { "epoch": 3.274974862482995, "grad_norm": 0.19962182641029358, "learning_rate": 2.35598572067861e-05, "loss": 0.0612, "step": 55370 }, { "epoch": 3.2755663334713434, "grad_norm": 0.40783455967903137, "learning_rate": 2.355749739727217e-05, "loss": 0.0833, "step": 55380 }, { "epoch": 3.2761578044596913, "grad_norm": 0.21507735550403595, "learning_rate": 2.3555137273725645e-05, "loss": 0.0664, "step": 55390 }, { "epoch": 3.276749275448039, "grad_norm": 0.3595885932445526, "learning_rate": 2.3552776836233152e-05, "loss": 0.0723, "step": 55400 }, { "epoch": 3.277340746436387, "grad_norm": 0.19637736678123474, "learning_rate": 2.3550416084881305e-05, "loss": 0.0619, "step": 55410 }, { "epoch": 3.2779322174247354, "grad_norm": 0.281281054019928, "learning_rate": 2.3548055019756734e-05, "loss": 0.0637, "step": 55420 }, { "epoch": 3.2785236884130833, "grad_norm": 0.2948489189147949, "learning_rate": 2.3545693640946082e-05, "loss": 0.0731, "step": 55430 }, { "epoch": 3.279115159401431, "grad_norm": 0.21855171024799347, "learning_rate": 2.3543331948536012e-05, "loss": 0.0668, "step": 55440 }, { "epoch": 3.2797066303897795, "grad_norm": 0.24931351840496063, "learning_rate": 2.354096994261318e-05, "loss": 0.0744, "step": 55450 }, { "epoch": 3.2802981013781274, "grad_norm": 0.2847099304199219, "learning_rate": 2.353860762326427e-05, "loss": 0.0817, "step": 55460 }, { "epoch": 3.2808895723664753, "grad_norm": 0.2084139883518219, "learning_rate": 2.3536244990575965e-05, "loss": 0.0559, "step": 55470 }, { "epoch": 3.2814810433548236, "grad_norm": 0.3302476704120636, "learning_rate": 2.3533882044634976e-05, "loss": 0.0681, "step": 55480 }, { "epoch": 3.2820725143431715, "grad_norm": 0.32305872440338135, "learning_rate": 2.353151878552801e-05, "loss": 0.074, "step": 55490 }, { "epoch": 3.2826639853315194, "grad_norm": 0.25687238574028015, "learning_rate": 2.3529155213341794e-05, "loss": 0.0583, "step": 55500 }, { "epoch": 3.2832554563198677, "grad_norm": 0.28989702463150024, "learning_rate": 2.3526791328163053e-05, "loss": 0.0691, "step": 55510 }, { "epoch": 3.2838469273082156, "grad_norm": 0.3021659553050995, "learning_rate": 2.3524427130078547e-05, "loss": 0.0529, "step": 55520 }, { "epoch": 3.2844383982965635, "grad_norm": 0.35478460788726807, "learning_rate": 2.352206261917503e-05, "loss": 0.0732, "step": 55530 }, { "epoch": 3.2850298692849114, "grad_norm": 0.3796676695346832, "learning_rate": 2.3519697795539273e-05, "loss": 0.0749, "step": 55540 }, { "epoch": 3.2856213402732597, "grad_norm": 0.23426400125026703, "learning_rate": 2.351733265925805e-05, "loss": 0.0727, "step": 55550 }, { "epoch": 3.2862128112616076, "grad_norm": 0.45584774017333984, "learning_rate": 2.3514967210418164e-05, "loss": 0.0599, "step": 55560 }, { "epoch": 3.2868042822499555, "grad_norm": 0.22836463153362274, "learning_rate": 2.3512601449106415e-05, "loss": 0.0572, "step": 55570 }, { "epoch": 3.287395753238304, "grad_norm": 0.29350554943084717, "learning_rate": 2.3510235375409617e-05, "loss": 0.0739, "step": 55580 }, { "epoch": 3.2879872242266517, "grad_norm": 0.30355626344680786, "learning_rate": 2.3507868989414596e-05, "loss": 0.0719, "step": 55590 }, { "epoch": 3.2885786952149996, "grad_norm": 0.22662216424942017, "learning_rate": 2.35055022912082e-05, "loss": 0.0691, "step": 55600 }, { "epoch": 3.289170166203348, "grad_norm": 0.2145370990037918, "learning_rate": 2.3503135280877268e-05, "loss": 0.0598, "step": 55610 }, { "epoch": 3.289761637191696, "grad_norm": 0.4064503014087677, "learning_rate": 2.350076795850867e-05, "loss": 0.0593, "step": 55620 }, { "epoch": 3.2903531081800437, "grad_norm": 0.3139859735965729, "learning_rate": 2.3498400324189276e-05, "loss": 0.0696, "step": 55630 }, { "epoch": 3.290944579168392, "grad_norm": 0.3105955719947815, "learning_rate": 2.3496032378005967e-05, "loss": 0.0568, "step": 55640 }, { "epoch": 3.29153605015674, "grad_norm": 0.2817492187023163, "learning_rate": 2.3493664120045643e-05, "loss": 0.0754, "step": 55650 }, { "epoch": 3.2921275211450878, "grad_norm": 0.21360693871974945, "learning_rate": 2.349129555039521e-05, "loss": 0.0577, "step": 55660 }, { "epoch": 3.2927189921334357, "grad_norm": 0.24687866866588593, "learning_rate": 2.348892666914159e-05, "loss": 0.0557, "step": 55670 }, { "epoch": 3.293310463121784, "grad_norm": 0.2604109048843384, "learning_rate": 2.348655747637171e-05, "loss": 0.0837, "step": 55680 }, { "epoch": 3.293901934110132, "grad_norm": 0.25851699709892273, "learning_rate": 2.3484187972172513e-05, "loss": 0.0728, "step": 55690 }, { "epoch": 3.2944934050984798, "grad_norm": 0.20215526223182678, "learning_rate": 2.348181815663095e-05, "loss": 0.0681, "step": 55700 }, { "epoch": 3.295084876086828, "grad_norm": 0.34471237659454346, "learning_rate": 2.347944802983399e-05, "loss": 0.0585, "step": 55710 }, { "epoch": 3.295676347075176, "grad_norm": 0.3512226939201355, "learning_rate": 2.34770775918686e-05, "loss": 0.0616, "step": 55720 }, { "epoch": 3.296267818063524, "grad_norm": 0.2922793924808502, "learning_rate": 2.3474706842821785e-05, "loss": 0.0855, "step": 55730 }, { "epoch": 3.296859289051872, "grad_norm": 0.2733229398727417, "learning_rate": 2.3472335782780526e-05, "loss": 0.0779, "step": 55740 }, { "epoch": 3.29745076004022, "grad_norm": 0.34944966435432434, "learning_rate": 2.3469964411831843e-05, "loss": 0.0686, "step": 55750 }, { "epoch": 3.298042231028568, "grad_norm": 0.17611435055732727, "learning_rate": 2.3467592730062753e-05, "loss": 0.0629, "step": 55760 }, { "epoch": 3.2986337020169163, "grad_norm": 0.3235274851322174, "learning_rate": 2.3465220737560286e-05, "loss": 0.059, "step": 55770 }, { "epoch": 3.299225173005264, "grad_norm": 0.3184773623943329, "learning_rate": 2.3462848434411496e-05, "loss": 0.0716, "step": 55780 }, { "epoch": 3.299816643993612, "grad_norm": 0.24328801035881042, "learning_rate": 2.3460475820703435e-05, "loss": 0.0661, "step": 55790 }, { "epoch": 3.30040811498196, "grad_norm": 0.17414002120494843, "learning_rate": 2.345810289652317e-05, "loss": 0.0612, "step": 55800 }, { "epoch": 3.3009995859703083, "grad_norm": 0.24618323147296906, "learning_rate": 2.345572966195777e-05, "loss": 0.0581, "step": 55810 }, { "epoch": 3.301591056958656, "grad_norm": 0.34359124302864075, "learning_rate": 2.345335611709434e-05, "loss": 0.0583, "step": 55820 }, { "epoch": 3.302182527947004, "grad_norm": 0.40780654549598694, "learning_rate": 2.345098226201998e-05, "loss": 0.0792, "step": 55830 }, { "epoch": 3.3027739989353524, "grad_norm": 0.28789862990379333, "learning_rate": 2.3448608096821795e-05, "loss": 0.0746, "step": 55840 }, { "epoch": 3.3033654699237003, "grad_norm": 0.25850069522857666, "learning_rate": 2.344623362158691e-05, "loss": 0.0632, "step": 55850 }, { "epoch": 3.303956940912048, "grad_norm": 0.16011136770248413, "learning_rate": 2.344385883640247e-05, "loss": 0.0589, "step": 55860 }, { "epoch": 3.304548411900396, "grad_norm": 0.17869877815246582, "learning_rate": 2.3441483741355608e-05, "loss": 0.0472, "step": 55870 }, { "epoch": 3.3051398828887444, "grad_norm": 0.33179301023483276, "learning_rate": 2.3439108336533494e-05, "loss": 0.0831, "step": 55880 }, { "epoch": 3.3057313538770923, "grad_norm": 0.22358812391757965, "learning_rate": 2.3436732622023293e-05, "loss": 0.0716, "step": 55890 }, { "epoch": 3.30632282486544, "grad_norm": 0.2657735347747803, "learning_rate": 2.3434356597912182e-05, "loss": 0.069, "step": 55900 }, { "epoch": 3.3069142958537885, "grad_norm": 0.3219412565231323, "learning_rate": 2.3431980264287362e-05, "loss": 0.0654, "step": 55910 }, { "epoch": 3.3075057668421364, "grad_norm": 0.2232288122177124, "learning_rate": 2.3429603621236025e-05, "loss": 0.0537, "step": 55920 }, { "epoch": 3.3080972378304843, "grad_norm": 0.34084728360176086, "learning_rate": 2.3427226668845404e-05, "loss": 0.0849, "step": 55930 }, { "epoch": 3.3086887088188326, "grad_norm": 0.27688777446746826, "learning_rate": 2.342484940720271e-05, "loss": 0.0754, "step": 55940 }, { "epoch": 3.3092801798071805, "grad_norm": 0.21054717898368835, "learning_rate": 2.3422471836395188e-05, "loss": 0.0723, "step": 55950 }, { "epoch": 3.3098716507955284, "grad_norm": 0.1795228272676468, "learning_rate": 2.3420093956510086e-05, "loss": 0.057, "step": 55960 }, { "epoch": 3.3104631217838767, "grad_norm": 0.2183246910572052, "learning_rate": 2.341771576763466e-05, "loss": 0.0564, "step": 55970 }, { "epoch": 3.3110545927722246, "grad_norm": 0.312862753868103, "learning_rate": 2.3415337269856183e-05, "loss": 0.0698, "step": 55980 }, { "epoch": 3.3116460637605725, "grad_norm": 0.37468257546424866, "learning_rate": 2.341295846326195e-05, "loss": 0.0666, "step": 55990 }, { "epoch": 3.3122375347489204, "grad_norm": 0.3046177625656128, "learning_rate": 2.341057934793924e-05, "loss": 0.0739, "step": 56000 }, { "epoch": 3.3128290057372687, "grad_norm": 0.24479295313358307, "learning_rate": 2.340819992397536e-05, "loss": 0.0645, "step": 56010 }, { "epoch": 3.3134204767256166, "grad_norm": 0.214075967669487, "learning_rate": 2.3405820191457646e-05, "loss": 0.0458, "step": 56020 }, { "epoch": 3.3140119477139645, "grad_norm": 0.2933814525604248, "learning_rate": 2.3403440150473404e-05, "loss": 0.0716, "step": 56030 }, { "epoch": 3.314603418702313, "grad_norm": 0.3456864356994629, "learning_rate": 2.3401059801109982e-05, "loss": 0.0719, "step": 56040 }, { "epoch": 3.3151948896906607, "grad_norm": 0.38491201400756836, "learning_rate": 2.3398679143454736e-05, "loss": 0.0666, "step": 56050 }, { "epoch": 3.3157863606790086, "grad_norm": 0.24662712216377258, "learning_rate": 2.339629817759502e-05, "loss": 0.0689, "step": 56060 }, { "epoch": 3.316377831667357, "grad_norm": 0.27154064178466797, "learning_rate": 2.3393916903618214e-05, "loss": 0.0648, "step": 56070 }, { "epoch": 3.3169693026557048, "grad_norm": 0.378583163022995, "learning_rate": 2.3391535321611702e-05, "loss": 0.0914, "step": 56080 }, { "epoch": 3.3175607736440527, "grad_norm": 0.22422757744789124, "learning_rate": 2.3389153431662882e-05, "loss": 0.077, "step": 56090 }, { "epoch": 3.318152244632401, "grad_norm": 0.30172762274742126, "learning_rate": 2.338677123385915e-05, "loss": 0.0677, "step": 56100 }, { "epoch": 3.318743715620749, "grad_norm": 0.23536942899227142, "learning_rate": 2.3384388728287944e-05, "loss": 0.0708, "step": 56110 }, { "epoch": 3.3193351866090968, "grad_norm": 0.220445916056633, "learning_rate": 2.3382005915036684e-05, "loss": 0.054, "step": 56120 }, { "epoch": 3.3199266575974447, "grad_norm": 3.1893935203552246, "learning_rate": 2.337962279419281e-05, "loss": 0.0825, "step": 56130 }, { "epoch": 3.320518128585793, "grad_norm": 0.24221481382846832, "learning_rate": 2.3377239365843773e-05, "loss": 0.0732, "step": 56140 }, { "epoch": 3.321109599574141, "grad_norm": 0.19879117608070374, "learning_rate": 2.3374855630077044e-05, "loss": 0.0692, "step": 56150 }, { "epoch": 3.3217010705624888, "grad_norm": 5.9246039390563965, "learning_rate": 2.3372471586980098e-05, "loss": 0.0686, "step": 56160 }, { "epoch": 3.322292541550837, "grad_norm": 0.5334119200706482, "learning_rate": 2.3370087236640415e-05, "loss": 0.0623, "step": 56170 }, { "epoch": 3.322884012539185, "grad_norm": 0.40443724393844604, "learning_rate": 2.33677025791455e-05, "loss": 0.0789, "step": 56180 }, { "epoch": 3.323475483527533, "grad_norm": 0.39035752415657043, "learning_rate": 2.336531761458286e-05, "loss": 0.0715, "step": 56190 }, { "epoch": 3.324066954515881, "grad_norm": 0.2634991705417633, "learning_rate": 2.3362932343040013e-05, "loss": 0.0759, "step": 56200 }, { "epoch": 3.324658425504229, "grad_norm": 0.21822525560855865, "learning_rate": 2.3360546764604492e-05, "loss": 0.0691, "step": 56210 }, { "epoch": 3.325249896492577, "grad_norm": 0.29439815878868103, "learning_rate": 2.3358160879363842e-05, "loss": 0.0497, "step": 56220 }, { "epoch": 3.3258413674809253, "grad_norm": 0.2878245413303375, "learning_rate": 2.3355774687405617e-05, "loss": 0.0761, "step": 56230 }, { "epoch": 3.326432838469273, "grad_norm": 0.2782304584980011, "learning_rate": 2.3353388188817376e-05, "loss": 0.0704, "step": 56240 }, { "epoch": 3.327024309457621, "grad_norm": 0.3196043372154236, "learning_rate": 2.335100138368671e-05, "loss": 0.0613, "step": 56250 }, { "epoch": 3.327615780445969, "grad_norm": 0.2238444834947586, "learning_rate": 2.3348614272101196e-05, "loss": 0.062, "step": 56260 }, { "epoch": 3.3282072514343173, "grad_norm": 0.24451257288455963, "learning_rate": 2.3346226854148432e-05, "loss": 0.0589, "step": 56270 }, { "epoch": 3.328798722422665, "grad_norm": 0.29529425501823425, "learning_rate": 2.3343839129916038e-05, "loss": 0.0758, "step": 56280 }, { "epoch": 3.329390193411013, "grad_norm": 0.19926735758781433, "learning_rate": 2.3341451099491625e-05, "loss": 0.0748, "step": 56290 }, { "epoch": 3.3299816643993614, "grad_norm": 0.28852444887161255, "learning_rate": 2.3339062762962834e-05, "loss": 0.0673, "step": 56300 }, { "epoch": 3.3305731353877093, "grad_norm": 0.24516096711158752, "learning_rate": 2.3336674120417302e-05, "loss": 0.0631, "step": 56310 }, { "epoch": 3.331164606376057, "grad_norm": 0.27052614092826843, "learning_rate": 2.3334285171942694e-05, "loss": 0.0556, "step": 56320 }, { "epoch": 3.331756077364405, "grad_norm": 0.30242031812667847, "learning_rate": 2.3331895917626668e-05, "loss": 0.0809, "step": 56330 }, { "epoch": 3.3323475483527534, "grad_norm": 0.25975218415260315, "learning_rate": 2.332950635755691e-05, "loss": 0.0689, "step": 56340 }, { "epoch": 3.3329390193411013, "grad_norm": 0.2227345108985901, "learning_rate": 2.3327116491821105e-05, "loss": 0.0665, "step": 56350 }, { "epoch": 3.333530490329449, "grad_norm": 0.33750253915786743, "learning_rate": 2.332472632050695e-05, "loss": 0.069, "step": 56360 }, { "epoch": 3.3341219613177975, "grad_norm": 0.1911894530057907, "learning_rate": 2.3322335843702157e-05, "loss": 0.0569, "step": 56370 }, { "epoch": 3.3347134323061454, "grad_norm": 0.5361012816429138, "learning_rate": 2.331994506149446e-05, "loss": 0.0835, "step": 56380 }, { "epoch": 3.3353049032944933, "grad_norm": 0.24751421809196472, "learning_rate": 2.3317553973971575e-05, "loss": 0.0745, "step": 56390 }, { "epoch": 3.3358963742828416, "grad_norm": 0.18504227697849274, "learning_rate": 2.331516258122126e-05, "loss": 0.0615, "step": 56400 }, { "epoch": 3.3364878452711895, "grad_norm": 0.1480644941329956, "learning_rate": 2.3312770883331266e-05, "loss": 0.0566, "step": 56410 }, { "epoch": 3.3370793162595374, "grad_norm": 0.2592058479785919, "learning_rate": 2.3310378880389372e-05, "loss": 0.0649, "step": 56420 }, { "epoch": 3.3376707872478857, "grad_norm": 0.22261187434196472, "learning_rate": 2.330798657248334e-05, "loss": 0.0819, "step": 56430 }, { "epoch": 3.3382622582362336, "grad_norm": 0.217985600233078, "learning_rate": 2.3305593959700964e-05, "loss": 0.0734, "step": 56440 }, { "epoch": 3.3388537292245815, "grad_norm": 0.2509158253669739, "learning_rate": 2.3303201042130057e-05, "loss": 0.0611, "step": 56450 }, { "epoch": 3.3394452002129293, "grad_norm": 0.1932864636182785, "learning_rate": 2.3300807819858417e-05, "loss": 0.0552, "step": 56460 }, { "epoch": 3.3400366712012777, "grad_norm": 0.19126522541046143, "learning_rate": 2.3298414292973875e-05, "loss": 0.0469, "step": 56470 }, { "epoch": 3.3406281421896256, "grad_norm": 0.31000709533691406, "learning_rate": 2.329602046156427e-05, "loss": 0.0807, "step": 56480 }, { "epoch": 3.3412196131779734, "grad_norm": 0.2516923248767853, "learning_rate": 2.3293626325717434e-05, "loss": 0.0723, "step": 56490 }, { "epoch": 3.3418110841663218, "grad_norm": 0.23980526626110077, "learning_rate": 2.3291231885521236e-05, "loss": 0.0719, "step": 56500 }, { "epoch": 3.3424025551546697, "grad_norm": 0.22531017661094666, "learning_rate": 2.328883714106354e-05, "loss": 0.0624, "step": 56510 }, { "epoch": 3.3429940261430175, "grad_norm": 0.31401684880256653, "learning_rate": 2.3286442092432226e-05, "loss": 0.0525, "step": 56520 }, { "epoch": 3.343585497131366, "grad_norm": 0.264597624540329, "learning_rate": 2.3284046739715186e-05, "loss": 0.0698, "step": 56530 }, { "epoch": 3.3441769681197138, "grad_norm": 0.2957179844379425, "learning_rate": 2.3281651083000315e-05, "loss": 0.0707, "step": 56540 }, { "epoch": 3.3447684391080617, "grad_norm": 0.2893736660480499, "learning_rate": 2.3279255122375535e-05, "loss": 0.0638, "step": 56550 }, { "epoch": 3.34535991009641, "grad_norm": 0.21781525015830994, "learning_rate": 2.3276858857928765e-05, "loss": 0.0717, "step": 56560 }, { "epoch": 3.345951381084758, "grad_norm": 0.3845846354961395, "learning_rate": 2.3274462289747943e-05, "loss": 0.0545, "step": 56570 }, { "epoch": 3.3465428520731058, "grad_norm": 0.3850856423377991, "learning_rate": 2.3272065417921016e-05, "loss": 0.0849, "step": 56580 }, { "epoch": 3.3471343230614536, "grad_norm": 0.4330119788646698, "learning_rate": 2.3269668242535935e-05, "loss": 0.0716, "step": 56590 }, { "epoch": 3.347725794049802, "grad_norm": 0.2220381796360016, "learning_rate": 2.326727076368067e-05, "loss": 0.0674, "step": 56600 }, { "epoch": 3.34831726503815, "grad_norm": 0.22610688209533691, "learning_rate": 2.326487298144321e-05, "loss": 0.0615, "step": 56610 }, { "epoch": 3.3489087360264977, "grad_norm": 0.1593916416168213, "learning_rate": 2.3262474895911538e-05, "loss": 0.0576, "step": 56620 }, { "epoch": 3.349500207014846, "grad_norm": 0.24929630756378174, "learning_rate": 2.3260076507173654e-05, "loss": 0.0717, "step": 56630 }, { "epoch": 3.350091678003194, "grad_norm": 0.2906995117664337, "learning_rate": 2.3257677815317577e-05, "loss": 0.074, "step": 56640 }, { "epoch": 3.350683148991542, "grad_norm": 0.2477165013551712, "learning_rate": 2.3255278820431326e-05, "loss": 0.0725, "step": 56650 }, { "epoch": 3.35127461997989, "grad_norm": 0.23989064991474152, "learning_rate": 2.325287952260294e-05, "loss": 0.0662, "step": 56660 }, { "epoch": 3.351866090968238, "grad_norm": 0.32827234268188477, "learning_rate": 2.3250479921920468e-05, "loss": 0.0567, "step": 56670 }, { "epoch": 3.352457561956586, "grad_norm": 0.2714281380176544, "learning_rate": 2.3248080018471963e-05, "loss": 0.0705, "step": 56680 }, { "epoch": 3.3530490329449343, "grad_norm": 0.248392716050148, "learning_rate": 2.3245679812345494e-05, "loss": 0.0726, "step": 56690 }, { "epoch": 3.353640503933282, "grad_norm": 0.2635292410850525, "learning_rate": 2.3243279303629142e-05, "loss": 0.061, "step": 56700 }, { "epoch": 3.35423197492163, "grad_norm": 0.28301694989204407, "learning_rate": 2.3240878492411002e-05, "loss": 0.059, "step": 56710 }, { "epoch": 3.354823445909978, "grad_norm": 0.32304126024246216, "learning_rate": 2.3238477378779166e-05, "loss": 0.0552, "step": 56720 }, { "epoch": 3.3554149168983263, "grad_norm": 0.34824687242507935, "learning_rate": 2.3236075962821752e-05, "loss": 0.0782, "step": 56730 }, { "epoch": 3.356006387886674, "grad_norm": 0.20772428810596466, "learning_rate": 2.323367424462689e-05, "loss": 0.081, "step": 56740 }, { "epoch": 3.356597858875022, "grad_norm": 0.22392839193344116, "learning_rate": 2.323127222428271e-05, "loss": 0.0666, "step": 56750 }, { "epoch": 3.3571893298633704, "grad_norm": 0.21737904846668243, "learning_rate": 2.3228869901877354e-05, "loss": 0.0647, "step": 56760 }, { "epoch": 3.3577808008517183, "grad_norm": 0.2910986840724945, "learning_rate": 2.3226467277498993e-05, "loss": 0.0442, "step": 56770 }, { "epoch": 3.358372271840066, "grad_norm": 0.24712185561656952, "learning_rate": 2.322406435123578e-05, "loss": 0.0745, "step": 56780 }, { "epoch": 3.358963742828414, "grad_norm": 0.19129426777362823, "learning_rate": 2.32216611231759e-05, "loss": 0.0781, "step": 56790 }, { "epoch": 3.3595552138167624, "grad_norm": 0.2507435083389282, "learning_rate": 2.321925759340755e-05, "loss": 0.0794, "step": 56800 }, { "epoch": 3.3601466848051103, "grad_norm": 0.32444390654563904, "learning_rate": 2.3216853762018926e-05, "loss": 0.0629, "step": 56810 }, { "epoch": 3.360738155793458, "grad_norm": 0.15617775917053223, "learning_rate": 2.321444962909824e-05, "loss": 0.0471, "step": 56820 }, { "epoch": 3.3613296267818065, "grad_norm": 0.2856794595718384, "learning_rate": 2.3212045194733725e-05, "loss": 0.0717, "step": 56830 }, { "epoch": 3.3619210977701544, "grad_norm": 0.36769524216651917, "learning_rate": 2.3209640459013603e-05, "loss": 0.0835, "step": 56840 }, { "epoch": 3.3625125687585022, "grad_norm": 0.18108046054840088, "learning_rate": 2.3207235422026122e-05, "loss": 0.0717, "step": 56850 }, { "epoch": 3.3631040397468506, "grad_norm": 0.248825341463089, "learning_rate": 2.3204830083859552e-05, "loss": 0.0626, "step": 56860 }, { "epoch": 3.3636955107351985, "grad_norm": 0.25311848521232605, "learning_rate": 2.3202424444602148e-05, "loss": 0.0561, "step": 56870 }, { "epoch": 3.3642869817235463, "grad_norm": 0.2776171863079071, "learning_rate": 2.320001850434219e-05, "loss": 0.0781, "step": 56880 }, { "epoch": 3.3648784527118947, "grad_norm": 0.27097469568252563, "learning_rate": 2.3197612263167976e-05, "loss": 0.0735, "step": 56890 }, { "epoch": 3.3654699237002426, "grad_norm": 0.3063564598560333, "learning_rate": 2.3195205721167803e-05, "loss": 0.0766, "step": 56900 }, { "epoch": 3.3660613946885904, "grad_norm": 0.3661107122898102, "learning_rate": 2.3192798878429986e-05, "loss": 0.0564, "step": 56910 }, { "epoch": 3.3666528656769383, "grad_norm": 0.2343021184206009, "learning_rate": 2.319039173504284e-05, "loss": 0.0466, "step": 56920 }, { "epoch": 3.3672443366652867, "grad_norm": 0.27948513627052307, "learning_rate": 2.318798429109471e-05, "loss": 0.0677, "step": 56930 }, { "epoch": 3.3678358076536346, "grad_norm": 0.24737614393234253, "learning_rate": 2.3185576546673934e-05, "loss": 0.0798, "step": 56940 }, { "epoch": 3.3684272786419824, "grad_norm": 0.2774636745452881, "learning_rate": 2.318316850186887e-05, "loss": 0.0626, "step": 56950 }, { "epoch": 3.3690187496303308, "grad_norm": 0.2587146461009979, "learning_rate": 2.3180760156767883e-05, "loss": 0.0679, "step": 56960 }, { "epoch": 3.3696102206186787, "grad_norm": 0.2325674593448639, "learning_rate": 2.3178351511459365e-05, "loss": 0.0532, "step": 56970 }, { "epoch": 3.3702016916070265, "grad_norm": 0.3207871615886688, "learning_rate": 2.3175942566031693e-05, "loss": 0.0757, "step": 56980 }, { "epoch": 3.370793162595375, "grad_norm": 0.22408607602119446, "learning_rate": 2.3173533320573264e-05, "loss": 0.0714, "step": 56990 }, { "epoch": 3.3713846335837228, "grad_norm": 0.2335038185119629, "learning_rate": 2.3171123775172497e-05, "loss": 0.0704, "step": 57000 }, { "epoch": 3.3719761045720706, "grad_norm": 0.21960006654262543, "learning_rate": 2.3168713929917818e-05, "loss": 0.0601, "step": 57010 }, { "epoch": 3.372567575560419, "grad_norm": 0.23208077251911163, "learning_rate": 2.316630378489765e-05, "loss": 0.0512, "step": 57020 }, { "epoch": 3.373159046548767, "grad_norm": 0.3268280625343323, "learning_rate": 2.3163893340200447e-05, "loss": 0.0754, "step": 57030 }, { "epoch": 3.3737505175371147, "grad_norm": 0.28629031777381897, "learning_rate": 2.3161482595914664e-05, "loss": 0.0695, "step": 57040 }, { "epoch": 3.3743419885254626, "grad_norm": 0.24317032098770142, "learning_rate": 2.315907155212876e-05, "loss": 0.0589, "step": 57050 }, { "epoch": 3.374933459513811, "grad_norm": 0.2600618600845337, "learning_rate": 2.3156660208931216e-05, "loss": 0.0641, "step": 57060 }, { "epoch": 3.375524930502159, "grad_norm": 0.5114952921867371, "learning_rate": 2.3154248566410526e-05, "loss": 0.0507, "step": 57070 }, { "epoch": 3.3761164014905067, "grad_norm": 0.2203395664691925, "learning_rate": 2.315183662465518e-05, "loss": 0.0865, "step": 57080 }, { "epoch": 3.376707872478855, "grad_norm": 0.2887316644191742, "learning_rate": 2.3149424383753696e-05, "loss": 0.0703, "step": 57090 }, { "epoch": 3.377299343467203, "grad_norm": 0.1968720406293869, "learning_rate": 2.3147011843794595e-05, "loss": 0.0675, "step": 57100 }, { "epoch": 3.377890814455551, "grad_norm": 0.20393237471580505, "learning_rate": 2.3144599004866403e-05, "loss": 0.0584, "step": 57110 }, { "epoch": 3.378482285443899, "grad_norm": 0.28929686546325684, "learning_rate": 2.314218586705767e-05, "loss": 0.0631, "step": 57120 }, { "epoch": 3.379073756432247, "grad_norm": 0.313576877117157, "learning_rate": 2.3139772430456948e-05, "loss": 0.0753, "step": 57130 }, { "epoch": 3.379665227420595, "grad_norm": 0.23291856050491333, "learning_rate": 2.3137358695152803e-05, "loss": 0.0797, "step": 57140 }, { "epoch": 3.3802566984089433, "grad_norm": 0.19430576264858246, "learning_rate": 2.313494466123381e-05, "loss": 0.0769, "step": 57150 }, { "epoch": 3.380848169397291, "grad_norm": 0.21063002943992615, "learning_rate": 2.313253032878856e-05, "loss": 0.0667, "step": 57160 }, { "epoch": 3.381439640385639, "grad_norm": 0.27716851234436035, "learning_rate": 2.3130115697905643e-05, "loss": 0.0629, "step": 57170 }, { "epoch": 3.382031111373987, "grad_norm": 0.22432033717632294, "learning_rate": 2.3127700768673674e-05, "loss": 0.0839, "step": 57180 }, { "epoch": 3.3826225823623353, "grad_norm": 0.2453380674123764, "learning_rate": 2.312528554118128e-05, "loss": 0.0755, "step": 57190 }, { "epoch": 3.383214053350683, "grad_norm": 0.2864881455898285, "learning_rate": 2.3122870015517078e-05, "loss": 0.0692, "step": 57200 }, { "epoch": 3.383805524339031, "grad_norm": 0.30516332387924194, "learning_rate": 2.312045419176972e-05, "loss": 0.0808, "step": 57210 }, { "epoch": 3.3843969953273794, "grad_norm": 0.1874382346868515, "learning_rate": 2.3118038070027848e-05, "loss": 0.0586, "step": 57220 }, { "epoch": 3.3849884663157273, "grad_norm": 0.26655372977256775, "learning_rate": 2.3115621650380144e-05, "loss": 0.0874, "step": 57230 }, { "epoch": 3.385579937304075, "grad_norm": 0.2773149907588959, "learning_rate": 2.311320493291527e-05, "loss": 0.0772, "step": 57240 }, { "epoch": 3.386171408292423, "grad_norm": 0.2663726210594177, "learning_rate": 2.311078791772191e-05, "loss": 0.0612, "step": 57250 }, { "epoch": 3.3867628792807714, "grad_norm": 0.31309399008750916, "learning_rate": 2.3108370604888767e-05, "loss": 0.0695, "step": 57260 }, { "epoch": 3.3873543502691192, "grad_norm": 0.2138657420873642, "learning_rate": 2.310595299450455e-05, "loss": 0.0569, "step": 57270 }, { "epoch": 3.387945821257467, "grad_norm": 0.28580334782600403, "learning_rate": 2.3103535086657965e-05, "loss": 0.069, "step": 57280 }, { "epoch": 3.3885372922458155, "grad_norm": 0.22132959961891174, "learning_rate": 2.3101116881437757e-05, "loss": 0.0703, "step": 57290 }, { "epoch": 3.3891287632341633, "grad_norm": 0.27603572607040405, "learning_rate": 2.3098698378932658e-05, "loss": 0.0764, "step": 57300 }, { "epoch": 3.3897202342225112, "grad_norm": 0.3550662100315094, "learning_rate": 2.309627957923142e-05, "loss": 0.0708, "step": 57310 }, { "epoch": 3.3903117052108596, "grad_norm": 0.2137317657470703, "learning_rate": 2.3093860482422804e-05, "loss": 0.0567, "step": 57320 }, { "epoch": 3.3909031761992074, "grad_norm": 0.2697364389896393, "learning_rate": 2.3091441088595593e-05, "loss": 0.0795, "step": 57330 }, { "epoch": 3.3914946471875553, "grad_norm": 0.22462518513202667, "learning_rate": 2.3089021397838557e-05, "loss": 0.0716, "step": 57340 }, { "epoch": 3.3920861181759037, "grad_norm": 0.31531256437301636, "learning_rate": 2.30866014102405e-05, "loss": 0.0785, "step": 57350 }, { "epoch": 3.3926775891642516, "grad_norm": 0.3994135558605194, "learning_rate": 2.3084181125890224e-05, "loss": 0.0645, "step": 57360 }, { "epoch": 3.3932690601525994, "grad_norm": 0.1787116825580597, "learning_rate": 2.3081760544876543e-05, "loss": 0.0484, "step": 57370 }, { "epoch": 3.3938605311409473, "grad_norm": 0.250339150428772, "learning_rate": 2.3079339667288288e-05, "loss": 0.0789, "step": 57380 }, { "epoch": 3.3944520021292957, "grad_norm": 0.2658472955226898, "learning_rate": 2.3076918493214304e-05, "loss": 0.0661, "step": 57390 }, { "epoch": 3.3950434731176435, "grad_norm": 0.16378487646579742, "learning_rate": 2.3074497022743427e-05, "loss": 0.0637, "step": 57400 }, { "epoch": 3.3956349441059914, "grad_norm": 0.24855421483516693, "learning_rate": 2.3072075255964524e-05, "loss": 0.068, "step": 57410 }, { "epoch": 3.3962264150943398, "grad_norm": 0.2512270510196686, "learning_rate": 2.3069653192966467e-05, "loss": 0.061, "step": 57420 }, { "epoch": 3.3968178860826876, "grad_norm": 0.32534530758857727, "learning_rate": 2.3067230833838143e-05, "loss": 0.0768, "step": 57430 }, { "epoch": 3.3974093570710355, "grad_norm": 0.34040239453315735, "learning_rate": 2.3064808178668425e-05, "loss": 0.0803, "step": 57440 }, { "epoch": 3.398000828059384, "grad_norm": 0.2903355360031128, "learning_rate": 2.306238522754624e-05, "loss": 0.0684, "step": 57450 }, { "epoch": 3.3985922990477317, "grad_norm": 0.17368072271347046, "learning_rate": 2.305996198056049e-05, "loss": 0.0549, "step": 57460 }, { "epoch": 3.3991837700360796, "grad_norm": 0.16233080625534058, "learning_rate": 2.3057538437800107e-05, "loss": 0.0511, "step": 57470 }, { "epoch": 3.399775241024428, "grad_norm": 0.46111416816711426, "learning_rate": 2.3055114599354012e-05, "loss": 0.0695, "step": 57480 }, { "epoch": 3.400366712012776, "grad_norm": 0.3060557246208191, "learning_rate": 2.3052690465311173e-05, "loss": 0.0684, "step": 57490 }, { "epoch": 3.4009581830011237, "grad_norm": 0.17740675806999207, "learning_rate": 2.3050266035760537e-05, "loss": 0.0719, "step": 57500 }, { "epoch": 3.4015496539894716, "grad_norm": 0.2968410849571228, "learning_rate": 2.304784131079107e-05, "loss": 0.0644, "step": 57510 }, { "epoch": 3.40214112497782, "grad_norm": 0.17095279693603516, "learning_rate": 2.304541629049176e-05, "loss": 0.0616, "step": 57520 }, { "epoch": 3.402732595966168, "grad_norm": 0.26864689588546753, "learning_rate": 2.304299097495159e-05, "loss": 0.0833, "step": 57530 }, { "epoch": 3.4033240669545157, "grad_norm": 0.2580094635486603, "learning_rate": 2.304056536425956e-05, "loss": 0.073, "step": 57540 }, { "epoch": 3.403915537942864, "grad_norm": 0.2635140120983124, "learning_rate": 2.3038139458504693e-05, "loss": 0.0698, "step": 57550 }, { "epoch": 3.404507008931212, "grad_norm": 0.18277722597122192, "learning_rate": 2.303571325777601e-05, "loss": 0.0589, "step": 57560 }, { "epoch": 3.40509847991956, "grad_norm": 0.3110537528991699, "learning_rate": 2.303328676216253e-05, "loss": 0.0552, "step": 57570 }, { "epoch": 3.405689950907908, "grad_norm": 0.22942157089710236, "learning_rate": 2.303085997175331e-05, "loss": 0.0744, "step": 57580 }, { "epoch": 3.406281421896256, "grad_norm": 0.31911805272102356, "learning_rate": 2.30284328866374e-05, "loss": 0.0704, "step": 57590 }, { "epoch": 3.406872892884604, "grad_norm": 0.3608364462852478, "learning_rate": 2.3026005506903875e-05, "loss": 0.0703, "step": 57600 }, { "epoch": 3.4074643638729523, "grad_norm": 0.2889629304409027, "learning_rate": 2.3023577832641805e-05, "loss": 0.0656, "step": 57610 }, { "epoch": 3.4080558348613, "grad_norm": 0.23562091588974, "learning_rate": 2.3021149863940277e-05, "loss": 0.0583, "step": 57620 }, { "epoch": 3.408647305849648, "grad_norm": 0.2903328239917755, "learning_rate": 2.3018721600888393e-05, "loss": 0.0753, "step": 57630 }, { "epoch": 3.409238776837996, "grad_norm": 0.2772645354270935, "learning_rate": 2.3016293043575257e-05, "loss": 0.075, "step": 57640 }, { "epoch": 3.4098302478263443, "grad_norm": 0.3455328345298767, "learning_rate": 2.3013864192089998e-05, "loss": 0.0786, "step": 57650 }, { "epoch": 3.410421718814692, "grad_norm": 0.3158470392227173, "learning_rate": 2.301143504652174e-05, "loss": 0.071, "step": 57660 }, { "epoch": 3.41101318980304, "grad_norm": 0.23748068511486053, "learning_rate": 2.300900560695963e-05, "loss": 0.0578, "step": 57670 }, { "epoch": 3.4116046607913884, "grad_norm": 0.31029030680656433, "learning_rate": 2.300657587349281e-05, "loss": 0.0702, "step": 57680 }, { "epoch": 3.4121961317797362, "grad_norm": 0.2702336311340332, "learning_rate": 2.300414584621046e-05, "loss": 0.0789, "step": 57690 }, { "epoch": 3.412787602768084, "grad_norm": 0.33194220066070557, "learning_rate": 2.300171552520174e-05, "loss": 0.0708, "step": 57700 }, { "epoch": 3.413379073756432, "grad_norm": 0.15211793780326843, "learning_rate": 2.299928491055584e-05, "loss": 0.0636, "step": 57710 }, { "epoch": 3.4139705447447803, "grad_norm": 0.2670438885688782, "learning_rate": 2.299685400236196e-05, "loss": 0.0542, "step": 57720 }, { "epoch": 3.4145620157331282, "grad_norm": 0.339582234621048, "learning_rate": 2.2994422800709303e-05, "loss": 0.0688, "step": 57730 }, { "epoch": 3.4151534867214766, "grad_norm": 0.2607167363166809, "learning_rate": 2.299199130568708e-05, "loss": 0.0668, "step": 57740 }, { "epoch": 3.4157449577098244, "grad_norm": 0.1963626593351364, "learning_rate": 2.298955951738453e-05, "loss": 0.0683, "step": 57750 }, { "epoch": 3.4163364286981723, "grad_norm": 0.32886427640914917, "learning_rate": 2.2987127435890882e-05, "loss": 0.0717, "step": 57760 }, { "epoch": 3.41692789968652, "grad_norm": 0.20713983476161957, "learning_rate": 2.2984695061295394e-05, "loss": 0.0513, "step": 57770 }, { "epoch": 3.4175193706748686, "grad_norm": 0.22875526547431946, "learning_rate": 2.298226239368732e-05, "loss": 0.0804, "step": 57780 }, { "epoch": 3.4181108416632164, "grad_norm": 0.3550453186035156, "learning_rate": 2.297982943315593e-05, "loss": 0.065, "step": 57790 }, { "epoch": 3.4187023126515643, "grad_norm": 0.2121259570121765, "learning_rate": 2.2977396179790516e-05, "loss": 0.0617, "step": 57800 }, { "epoch": 3.4192937836399127, "grad_norm": 0.27920523285865784, "learning_rate": 2.2974962633680356e-05, "loss": 0.0583, "step": 57810 }, { "epoch": 3.4198852546282605, "grad_norm": 0.2740841507911682, "learning_rate": 2.297252879491477e-05, "loss": 0.0553, "step": 57820 }, { "epoch": 3.4204767256166084, "grad_norm": 0.3020816147327423, "learning_rate": 2.2970094663583062e-05, "loss": 0.0754, "step": 57830 }, { "epoch": 3.4210681966049563, "grad_norm": 0.23608793318271637, "learning_rate": 2.296766023977455e-05, "loss": 0.0838, "step": 57840 }, { "epoch": 3.4216596675933046, "grad_norm": 0.33642762899398804, "learning_rate": 2.2965225523578583e-05, "loss": 0.0699, "step": 57850 }, { "epoch": 3.4222511385816525, "grad_norm": 0.2581881284713745, "learning_rate": 2.2962790515084502e-05, "loss": 0.0647, "step": 57860 }, { "epoch": 3.4228426095700004, "grad_norm": 0.17887184023857117, "learning_rate": 2.2960355214381662e-05, "loss": 0.0552, "step": 57870 }, { "epoch": 3.4234340805583487, "grad_norm": 0.26994526386260986, "learning_rate": 2.2957919621559432e-05, "loss": 0.0775, "step": 57880 }, { "epoch": 3.4240255515466966, "grad_norm": 0.3035241663455963, "learning_rate": 2.2955483736707195e-05, "loss": 0.0752, "step": 57890 }, { "epoch": 3.4246170225350445, "grad_norm": 0.2539523243904114, "learning_rate": 2.2953047559914332e-05, "loss": 0.0696, "step": 57900 }, { "epoch": 3.425208493523393, "grad_norm": 0.2651298940181732, "learning_rate": 2.2950611091270247e-05, "loss": 0.0694, "step": 57910 }, { "epoch": 3.4257999645117407, "grad_norm": 0.2687346339225769, "learning_rate": 2.2948174330864348e-05, "loss": 0.0512, "step": 57920 }, { "epoch": 3.4263914355000886, "grad_norm": 0.2663552463054657, "learning_rate": 2.2945737278786066e-05, "loss": 0.0826, "step": 57930 }, { "epoch": 3.426982906488437, "grad_norm": 0.2449176013469696, "learning_rate": 2.2943299935124816e-05, "loss": 0.0733, "step": 57940 }, { "epoch": 3.427574377476785, "grad_norm": 0.2327168583869934, "learning_rate": 2.2940862299970056e-05, "loss": 0.0649, "step": 57950 }, { "epoch": 3.4281658484651327, "grad_norm": 0.24302883446216583, "learning_rate": 2.2938424373411235e-05, "loss": 0.0663, "step": 57960 }, { "epoch": 3.4287573194534806, "grad_norm": 0.19643819332122803, "learning_rate": 2.2935986155537808e-05, "loss": 0.0559, "step": 57970 }, { "epoch": 3.429348790441829, "grad_norm": 0.2835114896297455, "learning_rate": 2.2933547646439264e-05, "loss": 0.0737, "step": 57980 }, { "epoch": 3.429940261430177, "grad_norm": 0.3113783299922943, "learning_rate": 2.2931108846205078e-05, "loss": 0.0685, "step": 57990 }, { "epoch": 3.4305317324185247, "grad_norm": 0.24609756469726562, "learning_rate": 2.2928669754924753e-05, "loss": 0.0733, "step": 58000 }, { "epoch": 3.431123203406873, "grad_norm": 0.3635719120502472, "learning_rate": 2.2926230372687795e-05, "loss": 0.0649, "step": 58010 }, { "epoch": 3.431714674395221, "grad_norm": 0.34007778763771057, "learning_rate": 2.2923790699583713e-05, "loss": 0.0578, "step": 58020 }, { "epoch": 3.432306145383569, "grad_norm": 0.23369617760181427, "learning_rate": 2.2921350735702044e-05, "loss": 0.0657, "step": 58030 }, { "epoch": 3.432897616371917, "grad_norm": 0.3354840576648712, "learning_rate": 2.291891048113232e-05, "loss": 0.0782, "step": 58040 }, { "epoch": 3.433489087360265, "grad_norm": 0.3225891590118408, "learning_rate": 2.2916469935964097e-05, "loss": 0.0718, "step": 58050 }, { "epoch": 3.434080558348613, "grad_norm": 0.2160460352897644, "learning_rate": 2.2914029100286936e-05, "loss": 0.0665, "step": 58060 }, { "epoch": 3.4346720293369613, "grad_norm": 0.18101108074188232, "learning_rate": 2.29115879741904e-05, "loss": 0.0562, "step": 58070 }, { "epoch": 3.435263500325309, "grad_norm": 0.20836301147937775, "learning_rate": 2.290914655776408e-05, "loss": 0.0732, "step": 58080 }, { "epoch": 3.435854971313657, "grad_norm": 0.2875462472438812, "learning_rate": 2.2906704851097557e-05, "loss": 0.078, "step": 58090 }, { "epoch": 3.436446442302005, "grad_norm": 0.6525830030441284, "learning_rate": 2.2904262854280445e-05, "loss": 0.0687, "step": 58100 }, { "epoch": 3.4370379132903532, "grad_norm": 0.20684456825256348, "learning_rate": 2.290182056740235e-05, "loss": 0.0719, "step": 58110 }, { "epoch": 3.437629384278701, "grad_norm": 0.4857803285121918, "learning_rate": 2.28993779905529e-05, "loss": 0.0617, "step": 58120 }, { "epoch": 3.438220855267049, "grad_norm": 0.2913428544998169, "learning_rate": 2.2896935123821726e-05, "loss": 0.0803, "step": 58130 }, { "epoch": 3.4388123262553973, "grad_norm": 0.21098461747169495, "learning_rate": 2.2894491967298478e-05, "loss": 0.0797, "step": 58140 }, { "epoch": 3.4394037972437452, "grad_norm": 0.3133944571018219, "learning_rate": 2.2892048521072804e-05, "loss": 0.0719, "step": 58150 }, { "epoch": 3.439995268232093, "grad_norm": 0.19400596618652344, "learning_rate": 2.288960478523438e-05, "loss": 0.0521, "step": 58160 }, { "epoch": 3.440586739220441, "grad_norm": 0.35929006338119507, "learning_rate": 2.2887160759872878e-05, "loss": 0.0552, "step": 58170 }, { "epoch": 3.4411782102087893, "grad_norm": 0.24698345363140106, "learning_rate": 2.2884716445077987e-05, "loss": 0.0765, "step": 58180 }, { "epoch": 3.441769681197137, "grad_norm": 0.28593751788139343, "learning_rate": 2.2882271840939403e-05, "loss": 0.0716, "step": 58190 }, { "epoch": 3.4423611521854856, "grad_norm": 0.25646424293518066, "learning_rate": 2.287982694754684e-05, "loss": 0.0688, "step": 58200 }, { "epoch": 3.4429526231738334, "grad_norm": 0.28132718801498413, "learning_rate": 2.2877381764990017e-05, "loss": 0.0688, "step": 58210 }, { "epoch": 3.4435440941621813, "grad_norm": 0.31467729806900024, "learning_rate": 2.2874936293358655e-05, "loss": 0.0613, "step": 58220 }, { "epoch": 3.444135565150529, "grad_norm": 0.42679065465927124, "learning_rate": 2.287249053274251e-05, "loss": 0.0852, "step": 58230 }, { "epoch": 3.4447270361388775, "grad_norm": 0.28342968225479126, "learning_rate": 2.2870044483231327e-05, "loss": 0.0731, "step": 58240 }, { "epoch": 3.4453185071272254, "grad_norm": 0.29935136437416077, "learning_rate": 2.2867598144914864e-05, "loss": 0.0612, "step": 58250 }, { "epoch": 3.4459099781155733, "grad_norm": 0.1963024139404297, "learning_rate": 2.28651515178829e-05, "loss": 0.0612, "step": 58260 }, { "epoch": 3.4465014491039216, "grad_norm": 0.19546885788440704, "learning_rate": 2.2862704602225214e-05, "loss": 0.0533, "step": 58270 }, { "epoch": 3.4470929200922695, "grad_norm": 0.2710597515106201, "learning_rate": 2.2860257398031598e-05, "loss": 0.0804, "step": 58280 }, { "epoch": 3.4476843910806174, "grad_norm": 0.2965071499347687, "learning_rate": 2.285780990539186e-05, "loss": 0.0805, "step": 58290 }, { "epoch": 3.4482758620689653, "grad_norm": 0.302456259727478, "learning_rate": 2.2855362124395822e-05, "loss": 0.0645, "step": 58300 }, { "epoch": 3.4488673330573136, "grad_norm": 0.19610056281089783, "learning_rate": 2.285291405513329e-05, "loss": 0.0625, "step": 58310 }, { "epoch": 3.4494588040456615, "grad_norm": 0.2343490570783615, "learning_rate": 2.2850465697694122e-05, "loss": 0.0689, "step": 58320 }, { "epoch": 3.4500502750340094, "grad_norm": 0.2528317868709564, "learning_rate": 2.2848017052168155e-05, "loss": 0.0758, "step": 58330 }, { "epoch": 3.4506417460223577, "grad_norm": 0.2643369734287262, "learning_rate": 2.2845568118645243e-05, "loss": 0.0828, "step": 58340 }, { "epoch": 3.4512332170107056, "grad_norm": 0.4310593008995056, "learning_rate": 2.2843118897215263e-05, "loss": 0.0711, "step": 58350 }, { "epoch": 3.4518246879990535, "grad_norm": 0.2642076015472412, "learning_rate": 2.2840669387968085e-05, "loss": 0.0643, "step": 58360 }, { "epoch": 3.452416158987402, "grad_norm": 0.2598249912261963, "learning_rate": 2.2838219590993603e-05, "loss": 0.0658, "step": 58370 }, { "epoch": 3.4530076299757497, "grad_norm": 0.25490519404411316, "learning_rate": 2.283576950638171e-05, "loss": 0.0752, "step": 58380 }, { "epoch": 3.4535991009640976, "grad_norm": 0.2607930898666382, "learning_rate": 2.2833319134222327e-05, "loss": 0.0697, "step": 58390 }, { "epoch": 3.454190571952446, "grad_norm": 0.2857826352119446, "learning_rate": 2.283086847460537e-05, "loss": 0.0685, "step": 58400 }, { "epoch": 3.454782042940794, "grad_norm": 0.15977224707603455, "learning_rate": 2.282841752762077e-05, "loss": 0.0617, "step": 58410 }, { "epoch": 3.4553735139291417, "grad_norm": 0.37385278940200806, "learning_rate": 2.282596629335846e-05, "loss": 0.0691, "step": 58420 }, { "epoch": 3.4559649849174896, "grad_norm": 0.2319481521844864, "learning_rate": 2.2823514771908407e-05, "loss": 0.0844, "step": 58430 }, { "epoch": 3.456556455905838, "grad_norm": 0.25332027673721313, "learning_rate": 2.282106296336057e-05, "loss": 0.0749, "step": 58440 }, { "epoch": 3.457147926894186, "grad_norm": 0.19836145639419556, "learning_rate": 2.2818610867804915e-05, "loss": 0.068, "step": 58450 }, { "epoch": 3.4577393978825337, "grad_norm": 0.27004265785217285, "learning_rate": 2.2816158485331436e-05, "loss": 0.0712, "step": 58460 }, { "epoch": 3.458330868870882, "grad_norm": 0.20447489619255066, "learning_rate": 2.281370581603012e-05, "loss": 0.0584, "step": 58470 }, { "epoch": 3.45892233985923, "grad_norm": 0.2779751420021057, "learning_rate": 2.281125285999097e-05, "loss": 0.0894, "step": 58480 }, { "epoch": 3.459513810847578, "grad_norm": 0.2668973207473755, "learning_rate": 2.2808799617304014e-05, "loss": 0.0669, "step": 58490 }, { "epoch": 3.460105281835926, "grad_norm": 0.2148611694574356, "learning_rate": 2.280634608805927e-05, "loss": 0.0654, "step": 58500 }, { "epoch": 3.460696752824274, "grad_norm": 0.2834460437297821, "learning_rate": 2.2803892272346773e-05, "loss": 0.0679, "step": 58510 }, { "epoch": 3.461288223812622, "grad_norm": 0.22971194982528687, "learning_rate": 2.2801438170256572e-05, "loss": 0.0589, "step": 58520 }, { "epoch": 3.4618796948009702, "grad_norm": 0.22062619030475616, "learning_rate": 2.279898378187873e-05, "loss": 0.0797, "step": 58530 }, { "epoch": 3.462471165789318, "grad_norm": 0.19633857905864716, "learning_rate": 2.27965291073033e-05, "loss": 0.0655, "step": 58540 }, { "epoch": 3.463062636777666, "grad_norm": 0.18875467777252197, "learning_rate": 2.279407414662037e-05, "loss": 0.0735, "step": 58550 }, { "epoch": 3.463654107766014, "grad_norm": 0.30111223459243774, "learning_rate": 2.279161889992004e-05, "loss": 0.0576, "step": 58560 }, { "epoch": 3.4642455787543622, "grad_norm": 0.23130834102630615, "learning_rate": 2.2789163367292396e-05, "loss": 0.0567, "step": 58570 }, { "epoch": 3.46483704974271, "grad_norm": 0.2732314169406891, "learning_rate": 2.278670754882755e-05, "loss": 0.078, "step": 58580 }, { "epoch": 3.465428520731058, "grad_norm": 0.2708621025085449, "learning_rate": 2.278425144461562e-05, "loss": 0.0644, "step": 58590 }, { "epoch": 3.4660199917194063, "grad_norm": 0.19976045191287994, "learning_rate": 2.278179505474675e-05, "loss": 0.0688, "step": 58600 }, { "epoch": 3.466611462707754, "grad_norm": 0.15967105329036713, "learning_rate": 2.277933837931107e-05, "loss": 0.0714, "step": 58610 }, { "epoch": 3.467202933696102, "grad_norm": 0.25961804389953613, "learning_rate": 2.277688141839873e-05, "loss": 0.0675, "step": 58620 }, { "epoch": 3.46779440468445, "grad_norm": 0.36011242866516113, "learning_rate": 2.277442417209991e-05, "loss": 0.0667, "step": 58630 }, { "epoch": 3.4683858756727983, "grad_norm": 0.22202156484127045, "learning_rate": 2.2771966640504758e-05, "loss": 0.0711, "step": 58640 }, { "epoch": 3.468977346661146, "grad_norm": 0.23148079216480255, "learning_rate": 2.276950882370347e-05, "loss": 0.0748, "step": 58650 }, { "epoch": 3.4695688176494945, "grad_norm": 0.2192564308643341, "learning_rate": 2.2767050721786245e-05, "loss": 0.0704, "step": 58660 }, { "epoch": 3.4701602886378424, "grad_norm": 0.2268427461385727, "learning_rate": 2.276459233484328e-05, "loss": 0.0664, "step": 58670 }, { "epoch": 3.4707517596261903, "grad_norm": 0.2776515483856201, "learning_rate": 2.2762133662964794e-05, "loss": 0.0748, "step": 58680 }, { "epoch": 3.471343230614538, "grad_norm": 0.35928231477737427, "learning_rate": 2.275967470624101e-05, "loss": 0.07, "step": 58690 }, { "epoch": 3.4719347016028865, "grad_norm": 0.3130253553390503, "learning_rate": 2.2757215464762168e-05, "loss": 0.0718, "step": 58700 }, { "epoch": 3.4725261725912344, "grad_norm": 0.20125623047351837, "learning_rate": 2.2754755938618508e-05, "loss": 0.0565, "step": 58710 }, { "epoch": 3.4731176435795823, "grad_norm": 0.32938581705093384, "learning_rate": 2.2752296127900282e-05, "loss": 0.0553, "step": 58720 }, { "epoch": 3.4737091145679306, "grad_norm": 0.2575124204158783, "learning_rate": 2.2749836032697778e-05, "loss": 0.07, "step": 58730 }, { "epoch": 3.4743005855562785, "grad_norm": 0.3022269010543823, "learning_rate": 2.274737565310125e-05, "loss": 0.0763, "step": 58740 }, { "epoch": 3.4748920565446264, "grad_norm": 0.2571289837360382, "learning_rate": 2.2744914989201e-05, "loss": 0.0695, "step": 58750 }, { "epoch": 3.4754835275329743, "grad_norm": 0.7117968797683716, "learning_rate": 2.274245404108732e-05, "loss": 0.0611, "step": 58760 }, { "epoch": 3.4760749985213226, "grad_norm": 0.27872851490974426, "learning_rate": 2.2739992808850525e-05, "loss": 0.0543, "step": 58770 }, { "epoch": 3.4766664695096705, "grad_norm": 0.17450273036956787, "learning_rate": 2.2737531292580927e-05, "loss": 0.0738, "step": 58780 }, { "epoch": 3.4772579404980184, "grad_norm": 0.4845382273197174, "learning_rate": 2.2735069492368862e-05, "loss": 0.0765, "step": 58790 }, { "epoch": 3.4778494114863667, "grad_norm": 0.2578427791595459, "learning_rate": 2.273260740830467e-05, "loss": 0.0758, "step": 58800 }, { "epoch": 3.4784408824747146, "grad_norm": 0.3577041029930115, "learning_rate": 2.2730145040478692e-05, "loss": 0.064, "step": 58810 }, { "epoch": 3.4790323534630625, "grad_norm": 0.14608485996723175, "learning_rate": 2.2727682388981297e-05, "loss": 0.0505, "step": 58820 }, { "epoch": 3.479623824451411, "grad_norm": 0.2791447043418884, "learning_rate": 2.2725219453902865e-05, "loss": 0.0674, "step": 58830 }, { "epoch": 3.4802152954397587, "grad_norm": 0.4431314468383789, "learning_rate": 2.2722756235333757e-05, "loss": 0.0735, "step": 58840 }, { "epoch": 3.4808067664281066, "grad_norm": 0.25809210538864136, "learning_rate": 2.272029273336438e-05, "loss": 0.0834, "step": 58850 }, { "epoch": 3.481398237416455, "grad_norm": 0.18422839045524597, "learning_rate": 2.2717828948085134e-05, "loss": 0.0668, "step": 58860 }, { "epoch": 3.481989708404803, "grad_norm": 0.26859867572784424, "learning_rate": 2.2715364879586434e-05, "loss": 0.0582, "step": 58870 }, { "epoch": 3.4825811793931507, "grad_norm": 0.24848033487796783, "learning_rate": 2.2712900527958693e-05, "loss": 0.068, "step": 58880 }, { "epoch": 3.4831726503814986, "grad_norm": 0.20461387932300568, "learning_rate": 2.271043589329236e-05, "loss": 0.0608, "step": 58890 }, { "epoch": 3.483764121369847, "grad_norm": 0.3935926854610443, "learning_rate": 2.2707970975677866e-05, "loss": 0.0783, "step": 58900 }, { "epoch": 3.484355592358195, "grad_norm": 0.19076453149318695, "learning_rate": 2.270550577520567e-05, "loss": 0.0568, "step": 58910 }, { "epoch": 3.4849470633465427, "grad_norm": 0.24551984667778015, "learning_rate": 2.2703040291966242e-05, "loss": 0.0632, "step": 58920 }, { "epoch": 3.485538534334891, "grad_norm": 0.3530275821685791, "learning_rate": 2.270057452605005e-05, "loss": 0.0619, "step": 58930 }, { "epoch": 3.486130005323239, "grad_norm": 0.36002156138420105, "learning_rate": 2.2698108477547584e-05, "loss": 0.0667, "step": 58940 }, { "epoch": 3.486721476311587, "grad_norm": 0.17630375921726227, "learning_rate": 2.2695642146549332e-05, "loss": 0.0649, "step": 58950 }, { "epoch": 3.487312947299935, "grad_norm": 0.29244351387023926, "learning_rate": 2.2693175533145813e-05, "loss": 0.064, "step": 58960 }, { "epoch": 3.487904418288283, "grad_norm": 0.2537522614002228, "learning_rate": 2.269070863742754e-05, "loss": 0.0731, "step": 58970 }, { "epoch": 3.488495889276631, "grad_norm": 0.25457999110221863, "learning_rate": 2.2688241459485033e-05, "loss": 0.0741, "step": 58980 }, { "epoch": 3.4890873602649792, "grad_norm": 0.21036018431186676, "learning_rate": 2.268577399940884e-05, "loss": 0.078, "step": 58990 }, { "epoch": 3.489678831253327, "grad_norm": 0.23138651251792908, "learning_rate": 2.2683306257289503e-05, "loss": 0.0773, "step": 59000 }, { "epoch": 3.490270302241675, "grad_norm": 0.19478170573711395, "learning_rate": 2.2680838233217575e-05, "loss": 0.0657, "step": 59010 }, { "epoch": 3.490861773230023, "grad_norm": 0.19298779964447021, "learning_rate": 2.2678369927283632e-05, "loss": 0.0555, "step": 59020 }, { "epoch": 3.491453244218371, "grad_norm": 0.3098237216472626, "learning_rate": 2.2675901339578257e-05, "loss": 0.0779, "step": 59030 }, { "epoch": 3.492044715206719, "grad_norm": 0.32567670941352844, "learning_rate": 2.2673432470192024e-05, "loss": 0.0814, "step": 59040 }, { "epoch": 3.492636186195067, "grad_norm": 0.2757243514060974, "learning_rate": 2.2670963319215546e-05, "loss": 0.0698, "step": 59050 }, { "epoch": 3.4932276571834153, "grad_norm": 0.2678086459636688, "learning_rate": 2.266849388673943e-05, "loss": 0.0637, "step": 59060 }, { "epoch": 3.493819128171763, "grad_norm": 0.24353010952472687, "learning_rate": 2.2666024172854286e-05, "loss": 0.0561, "step": 59070 }, { "epoch": 3.494410599160111, "grad_norm": 0.2514934241771698, "learning_rate": 2.2663554177650763e-05, "loss": 0.0735, "step": 59080 }, { "epoch": 3.495002070148459, "grad_norm": 0.30266666412353516, "learning_rate": 2.2661083901219487e-05, "loss": 0.0718, "step": 59090 }, { "epoch": 3.4955935411368073, "grad_norm": 0.22208337485790253, "learning_rate": 2.265861334365112e-05, "loss": 0.0694, "step": 59100 }, { "epoch": 3.496185012125155, "grad_norm": 0.24249084293842316, "learning_rate": 2.265614250503631e-05, "loss": 0.0591, "step": 59110 }, { "epoch": 3.4967764831135035, "grad_norm": 0.22425048053264618, "learning_rate": 2.265367138546574e-05, "loss": 0.0575, "step": 59120 }, { "epoch": 3.4973679541018514, "grad_norm": 0.31766676902770996, "learning_rate": 2.2651199985030093e-05, "loss": 0.0704, "step": 59130 }, { "epoch": 3.4979594250901993, "grad_norm": 0.2654212415218353, "learning_rate": 2.264872830382005e-05, "loss": 0.0842, "step": 59140 }, { "epoch": 3.498550896078547, "grad_norm": 0.18132786452770233, "learning_rate": 2.264625634192633e-05, "loss": 0.0664, "step": 59150 }, { "epoch": 3.4991423670668955, "grad_norm": 0.2306559830904007, "learning_rate": 2.264378409943963e-05, "loss": 0.0626, "step": 59160 }, { "epoch": 3.4997338380552434, "grad_norm": 0.1661803424358368, "learning_rate": 2.264131157645068e-05, "loss": 0.0579, "step": 59170 }, { "epoch": 3.5003253090435913, "grad_norm": 0.27146250009536743, "learning_rate": 2.2638838773050218e-05, "loss": 0.08, "step": 59180 }, { "epoch": 3.5009167800319396, "grad_norm": 0.3077813684940338, "learning_rate": 2.2636365689328982e-05, "loss": 0.0778, "step": 59190 }, { "epoch": 3.5015082510202875, "grad_norm": 0.40117037296295166, "learning_rate": 2.2633892325377727e-05, "loss": 0.0739, "step": 59200 }, { "epoch": 3.5020997220086354, "grad_norm": 0.21534466743469238, "learning_rate": 2.2631418681287218e-05, "loss": 0.0687, "step": 59210 }, { "epoch": 3.5026911929969833, "grad_norm": 0.21515803039073944, "learning_rate": 2.2628944757148233e-05, "loss": 0.0513, "step": 59220 }, { "epoch": 3.5032826639853316, "grad_norm": 0.3409246504306793, "learning_rate": 2.2626470553051554e-05, "loss": 0.0851, "step": 59230 }, { "epoch": 3.5038741349736795, "grad_norm": 0.22750601172447205, "learning_rate": 2.2623996069087976e-05, "loss": 0.0656, "step": 59240 }, { "epoch": 3.504465605962028, "grad_norm": 0.30306684970855713, "learning_rate": 2.2621521305348304e-05, "loss": 0.0702, "step": 59250 }, { "epoch": 3.5050570769503757, "grad_norm": 0.20101703703403473, "learning_rate": 2.2619046261923354e-05, "loss": 0.0694, "step": 59260 }, { "epoch": 3.5056485479387236, "grad_norm": 0.2861050069332123, "learning_rate": 2.2616570938903954e-05, "loss": 0.0562, "step": 59270 }, { "epoch": 3.5062400189270715, "grad_norm": 0.30420929193496704, "learning_rate": 2.2614095336380945e-05, "loss": 0.0806, "step": 59280 }, { "epoch": 3.5068314899154194, "grad_norm": 0.43913453817367554, "learning_rate": 2.2611619454445164e-05, "loss": 0.0786, "step": 59290 }, { "epoch": 3.5074229609037677, "grad_norm": 0.23556561768054962, "learning_rate": 2.2609143293187472e-05, "loss": 0.0692, "step": 59300 }, { "epoch": 3.5080144318921156, "grad_norm": 0.20835639536380768, "learning_rate": 2.260666685269874e-05, "loss": 0.0618, "step": 59310 }, { "epoch": 3.508605902880464, "grad_norm": 0.3342490792274475, "learning_rate": 2.2604190133069837e-05, "loss": 0.0591, "step": 59320 }, { "epoch": 3.509197373868812, "grad_norm": 0.2793823182582855, "learning_rate": 2.2601713134391657e-05, "loss": 0.0892, "step": 59330 }, { "epoch": 3.5097888448571597, "grad_norm": 0.21282990276813507, "learning_rate": 2.2599235856755098e-05, "loss": 0.0677, "step": 59340 }, { "epoch": 3.5103803158455076, "grad_norm": 0.2531382143497467, "learning_rate": 2.259675830025106e-05, "loss": 0.0703, "step": 59350 }, { "epoch": 3.510971786833856, "grad_norm": 0.30429577827453613, "learning_rate": 2.2594280464970473e-05, "loss": 0.0682, "step": 59360 }, { "epoch": 3.511563257822204, "grad_norm": 0.2681003510951996, "learning_rate": 2.259180235100426e-05, "loss": 0.0585, "step": 59370 }, { "epoch": 3.5121547288105517, "grad_norm": 0.1818714290857315, "learning_rate": 2.2589323958443357e-05, "loss": 0.0733, "step": 59380 }, { "epoch": 3.5127461997989, "grad_norm": 0.21616142988204956, "learning_rate": 2.2586845287378718e-05, "loss": 0.0822, "step": 59390 }, { "epoch": 3.513337670787248, "grad_norm": 0.28461626172065735, "learning_rate": 2.25843663379013e-05, "loss": 0.0704, "step": 59400 }, { "epoch": 3.513929141775596, "grad_norm": 0.29345494508743286, "learning_rate": 2.2581887110102075e-05, "loss": 0.0654, "step": 59410 }, { "epoch": 3.5145206127639437, "grad_norm": 0.19221185147762299, "learning_rate": 2.257940760407202e-05, "loss": 0.0552, "step": 59420 }, { "epoch": 3.515112083752292, "grad_norm": 0.2643568813800812, "learning_rate": 2.2576927819902124e-05, "loss": 0.0747, "step": 59430 }, { "epoch": 3.51570355474064, "grad_norm": 0.26785698533058167, "learning_rate": 2.257444775768339e-05, "loss": 0.0705, "step": 59440 }, { "epoch": 3.516295025728988, "grad_norm": 0.2197382152080536, "learning_rate": 2.2571967417506822e-05, "loss": 0.0697, "step": 59450 }, { "epoch": 3.516886496717336, "grad_norm": 0.2675495147705078, "learning_rate": 2.2569486799463444e-05, "loss": 0.0742, "step": 59460 }, { "epoch": 3.517477967705684, "grad_norm": 0.18756087124347687, "learning_rate": 2.2567005903644292e-05, "loss": 0.0621, "step": 59470 }, { "epoch": 3.518069438694032, "grad_norm": 0.17693226039409637, "learning_rate": 2.2564524730140404e-05, "loss": 0.0729, "step": 59480 }, { "epoch": 3.51866090968238, "grad_norm": 0.25869300961494446, "learning_rate": 2.256204327904283e-05, "loss": 0.0671, "step": 59490 }, { "epoch": 3.519252380670728, "grad_norm": 0.23151895403862, "learning_rate": 2.2559561550442633e-05, "loss": 0.0736, "step": 59500 }, { "epoch": 3.519843851659076, "grad_norm": 0.44366663694381714, "learning_rate": 2.2557079544430878e-05, "loss": 0.0618, "step": 59510 }, { "epoch": 3.5204353226474243, "grad_norm": 0.2488214671611786, "learning_rate": 2.2554597261098652e-05, "loss": 0.0627, "step": 59520 }, { "epoch": 3.521026793635772, "grad_norm": 0.2677406966686249, "learning_rate": 2.2552114700537046e-05, "loss": 0.0835, "step": 59530 }, { "epoch": 3.52161826462412, "grad_norm": 0.3193245530128479, "learning_rate": 2.2549631862837162e-05, "loss": 0.0782, "step": 59540 }, { "epoch": 3.522209735612468, "grad_norm": 0.27423807978630066, "learning_rate": 2.2547148748090115e-05, "loss": 0.0789, "step": 59550 }, { "epoch": 3.5228012066008163, "grad_norm": 0.29701149463653564, "learning_rate": 2.2544665356387014e-05, "loss": 0.0589, "step": 59560 }, { "epoch": 3.523392677589164, "grad_norm": 0.22368581593036652, "learning_rate": 2.2542181687819014e-05, "loss": 0.0545, "step": 59570 }, { "epoch": 3.5239841485775125, "grad_norm": 0.21792113780975342, "learning_rate": 2.253969774247724e-05, "loss": 0.0787, "step": 59580 }, { "epoch": 3.5245756195658604, "grad_norm": 0.2629070580005646, "learning_rate": 2.253721352045285e-05, "loss": 0.0731, "step": 59590 }, { "epoch": 3.5251670905542083, "grad_norm": 0.2774624526500702, "learning_rate": 2.253472902183701e-05, "loss": 0.0705, "step": 59600 }, { "epoch": 3.525758561542556, "grad_norm": 0.15511058270931244, "learning_rate": 2.253224424672089e-05, "loss": 0.0602, "step": 59610 }, { "epoch": 3.5263500325309045, "grad_norm": 0.18929727375507355, "learning_rate": 2.252975919519567e-05, "loss": 0.0524, "step": 59620 }, { "epoch": 3.5269415035192524, "grad_norm": 0.24850428104400635, "learning_rate": 2.252727386735255e-05, "loss": 0.0712, "step": 59630 }, { "epoch": 3.5275329745076003, "grad_norm": 0.26056039333343506, "learning_rate": 2.252478826328273e-05, "loss": 0.0711, "step": 59640 }, { "epoch": 3.5281244454959486, "grad_norm": 0.22454339265823364, "learning_rate": 2.252230238307742e-05, "loss": 0.0621, "step": 59650 }, { "epoch": 3.5287159164842965, "grad_norm": 0.2362855076789856, "learning_rate": 2.2519816226827852e-05, "loss": 0.0684, "step": 59660 }, { "epoch": 3.5293073874726444, "grad_norm": 0.2904996871948242, "learning_rate": 2.2517329794625262e-05, "loss": 0.0575, "step": 59670 }, { "epoch": 3.5298988584609923, "grad_norm": 0.25972306728363037, "learning_rate": 2.2514843086560876e-05, "loss": 0.0827, "step": 59680 }, { "epoch": 3.5304903294493406, "grad_norm": 0.2513984143733978, "learning_rate": 2.2512356102725965e-05, "loss": 0.0743, "step": 59690 }, { "epoch": 3.5310818004376885, "grad_norm": 0.3055368959903717, "learning_rate": 2.2509868843211794e-05, "loss": 0.0603, "step": 59700 }, { "epoch": 3.531673271426037, "grad_norm": 0.28974658250808716, "learning_rate": 2.2507381308109622e-05, "loss": 0.0672, "step": 59710 }, { "epoch": 3.5322647424143847, "grad_norm": 0.33850690722465515, "learning_rate": 2.250489349751075e-05, "loss": 0.062, "step": 59720 }, { "epoch": 3.5328562134027326, "grad_norm": 0.256155788898468, "learning_rate": 2.250240541150646e-05, "loss": 0.0748, "step": 59730 }, { "epoch": 3.5334476843910805, "grad_norm": 0.22104662656784058, "learning_rate": 2.2499917050188072e-05, "loss": 0.0827, "step": 59740 }, { "epoch": 3.5340391553794284, "grad_norm": 0.21473455429077148, "learning_rate": 2.2497428413646882e-05, "loss": 0.065, "step": 59750 }, { "epoch": 3.5346306263677767, "grad_norm": 0.208973228931427, "learning_rate": 2.249493950197423e-05, "loss": 0.0579, "step": 59760 }, { "epoch": 3.5352220973561246, "grad_norm": 0.20454779267311096, "learning_rate": 2.2492450315261443e-05, "loss": 0.0669, "step": 59770 }, { "epoch": 3.535813568344473, "grad_norm": 0.3872264623641968, "learning_rate": 2.248996085359987e-05, "loss": 0.0766, "step": 59780 }, { "epoch": 3.536405039332821, "grad_norm": 0.3948296904563904, "learning_rate": 2.2487471117080863e-05, "loss": 0.0724, "step": 59790 }, { "epoch": 3.5369965103211687, "grad_norm": 0.32949909567832947, "learning_rate": 2.2484981105795794e-05, "loss": 0.0791, "step": 59800 }, { "epoch": 3.5375879813095166, "grad_norm": 0.2180435210466385, "learning_rate": 2.248249081983603e-05, "loss": 0.069, "step": 59810 }, { "epoch": 3.538179452297865, "grad_norm": 0.24947422742843628, "learning_rate": 2.2480000259292955e-05, "loss": 0.0621, "step": 59820 }, { "epoch": 3.538770923286213, "grad_norm": 0.2557392120361328, "learning_rate": 2.2477509424257977e-05, "loss": 0.0783, "step": 59830 }, { "epoch": 3.5393623942745607, "grad_norm": 0.30716437101364136, "learning_rate": 2.2475018314822495e-05, "loss": 0.073, "step": 59840 }, { "epoch": 3.539953865262909, "grad_norm": 0.24185393750667572, "learning_rate": 2.2472526931077918e-05, "loss": 0.0719, "step": 59850 }, { "epoch": 3.540545336251257, "grad_norm": 0.1462240070104599, "learning_rate": 2.247003527311568e-05, "loss": 0.0707, "step": 59860 }, { "epoch": 3.5411368072396048, "grad_norm": 0.20009714365005493, "learning_rate": 2.2467543341027217e-05, "loss": 0.0655, "step": 59870 }, { "epoch": 3.5417282782279527, "grad_norm": 0.36748552322387695, "learning_rate": 2.2465051134903968e-05, "loss": 0.0753, "step": 59880 }, { "epoch": 3.542319749216301, "grad_norm": 0.24965906143188477, "learning_rate": 2.2462558654837396e-05, "loss": 0.0647, "step": 59890 }, { "epoch": 3.542911220204649, "grad_norm": 0.3313955068588257, "learning_rate": 2.2460065900918967e-05, "loss": 0.0677, "step": 59900 }, { "epoch": 3.543502691192997, "grad_norm": 0.25083252787590027, "learning_rate": 2.245757287324015e-05, "loss": 0.0548, "step": 59910 }, { "epoch": 3.544094162181345, "grad_norm": 0.19498567283153534, "learning_rate": 2.2455079571892435e-05, "loss": 0.0587, "step": 59920 }, { "epoch": 3.544685633169693, "grad_norm": 0.413817435503006, "learning_rate": 2.245258599696732e-05, "loss": 0.0782, "step": 59930 }, { "epoch": 3.545277104158041, "grad_norm": 0.17462480068206787, "learning_rate": 2.2450092148556313e-05, "loss": 0.0707, "step": 59940 }, { "epoch": 3.545868575146389, "grad_norm": 0.2273186296224594, "learning_rate": 2.244759802675092e-05, "loss": 0.0672, "step": 59950 }, { "epoch": 3.546460046134737, "grad_norm": 0.21299971640110016, "learning_rate": 2.244510363164268e-05, "loss": 0.0589, "step": 59960 }, { "epoch": 3.547051517123085, "grad_norm": 0.1939006745815277, "learning_rate": 2.244260896332312e-05, "loss": 0.0626, "step": 59970 }, { "epoch": 3.5476429881114333, "grad_norm": 0.20604632794857025, "learning_rate": 2.2440114021883788e-05, "loss": 0.0753, "step": 59980 }, { "epoch": 3.548234459099781, "grad_norm": 0.42090949416160583, "learning_rate": 2.2437618807416243e-05, "loss": 0.0735, "step": 59990 }, { "epoch": 3.548825930088129, "grad_norm": 0.308079332113266, "learning_rate": 2.2435123320012057e-05, "loss": 0.0821, "step": 60000 }, { "epoch": 3.549417401076477, "grad_norm": 0.4138772785663605, "learning_rate": 2.243262755976279e-05, "loss": 0.0746, "step": 60010 }, { "epoch": 3.5500088720648253, "grad_norm": 0.2794824540615082, "learning_rate": 2.2430131526760044e-05, "loss": 0.0553, "step": 60020 }, { "epoch": 3.550600343053173, "grad_norm": 0.28858014941215515, "learning_rate": 2.242763522109541e-05, "loss": 0.0859, "step": 60030 }, { "epoch": 3.5511918140415215, "grad_norm": 0.2803875505924225, "learning_rate": 2.242513864286049e-05, "loss": 0.0792, "step": 60040 }, { "epoch": 3.5517832850298694, "grad_norm": 0.22494617104530334, "learning_rate": 2.2422641792146906e-05, "loss": 0.0727, "step": 60050 }, { "epoch": 3.5523747560182173, "grad_norm": 0.26938676834106445, "learning_rate": 2.2420144669046283e-05, "loss": 0.0632, "step": 60060 }, { "epoch": 3.552966227006565, "grad_norm": 0.22485986351966858, "learning_rate": 2.241764727365026e-05, "loss": 0.0568, "step": 60070 }, { "epoch": 3.5535576979949135, "grad_norm": 0.2500336170196533, "learning_rate": 2.241514960605047e-05, "loss": 0.0683, "step": 60080 }, { "epoch": 3.5541491689832614, "grad_norm": 0.20979081094264984, "learning_rate": 2.241265166633859e-05, "loss": 0.0798, "step": 60090 }, { "epoch": 3.5547406399716093, "grad_norm": 0.29819077253341675, "learning_rate": 2.241015345460627e-05, "loss": 0.0671, "step": 60100 }, { "epoch": 3.5553321109599576, "grad_norm": 0.3860347270965576, "learning_rate": 2.2407654970945197e-05, "loss": 0.0595, "step": 60110 }, { "epoch": 3.5559235819483055, "grad_norm": 0.33308568596839905, "learning_rate": 2.2405156215447045e-05, "loss": 0.0599, "step": 60120 }, { "epoch": 3.5565150529366534, "grad_norm": 0.24190610647201538, "learning_rate": 2.2402657188203525e-05, "loss": 0.0751, "step": 60130 }, { "epoch": 3.5571065239250013, "grad_norm": 0.2630539834499359, "learning_rate": 2.2400157889306334e-05, "loss": 0.0727, "step": 60140 }, { "epoch": 3.5576979949133496, "grad_norm": 0.26586171984672546, "learning_rate": 2.2397658318847192e-05, "loss": 0.0642, "step": 60150 }, { "epoch": 3.5582894659016975, "grad_norm": 0.2579649090766907, "learning_rate": 2.2395158476917822e-05, "loss": 0.0629, "step": 60160 }, { "epoch": 3.558880936890046, "grad_norm": 0.44353294372558594, "learning_rate": 2.2392658363609962e-05, "loss": 0.0597, "step": 60170 }, { "epoch": 3.5594724078783937, "grad_norm": 0.3332165479660034, "learning_rate": 2.2390157979015355e-05, "loss": 0.0713, "step": 60180 }, { "epoch": 3.5600638788667416, "grad_norm": 0.3077091574668884, "learning_rate": 2.238765732322576e-05, "loss": 0.0703, "step": 60190 }, { "epoch": 3.5606553498550895, "grad_norm": 0.2023640125989914, "learning_rate": 2.2385156396332947e-05, "loss": 0.0722, "step": 60200 }, { "epoch": 3.5612468208434374, "grad_norm": 0.2700214385986328, "learning_rate": 2.2382655198428685e-05, "loss": 0.0569, "step": 60210 }, { "epoch": 3.5618382918317857, "grad_norm": 0.3298155665397644, "learning_rate": 2.238015372960476e-05, "loss": 0.0614, "step": 60220 }, { "epoch": 3.5624297628201336, "grad_norm": 0.23845484852790833, "learning_rate": 2.2377651989952976e-05, "loss": 0.0772, "step": 60230 }, { "epoch": 3.563021233808482, "grad_norm": 0.27367645502090454, "learning_rate": 2.2375149979565132e-05, "loss": 0.0659, "step": 60240 }, { "epoch": 3.56361270479683, "grad_norm": 0.239519402384758, "learning_rate": 2.237264769853304e-05, "loss": 0.0711, "step": 60250 }, { "epoch": 3.5642041757851777, "grad_norm": 0.3230857253074646, "learning_rate": 2.237014514694854e-05, "loss": 0.0741, "step": 60260 }, { "epoch": 3.5647956467735256, "grad_norm": 0.22555752098560333, "learning_rate": 2.2367642324903455e-05, "loss": 0.0578, "step": 60270 }, { "epoch": 3.565387117761874, "grad_norm": 0.3979608118534088, "learning_rate": 2.2365139232489626e-05, "loss": 0.0798, "step": 60280 }, { "epoch": 3.565978588750222, "grad_norm": 0.2541196644306183, "learning_rate": 2.2362635869798924e-05, "loss": 0.0848, "step": 60290 }, { "epoch": 3.5665700597385697, "grad_norm": 0.2032260149717331, "learning_rate": 2.2360132236923207e-05, "loss": 0.0777, "step": 60300 }, { "epoch": 3.567161530726918, "grad_norm": 0.20832222700119019, "learning_rate": 2.2357628333954345e-05, "loss": 0.0716, "step": 60310 }, { "epoch": 3.567753001715266, "grad_norm": 0.24472740292549133, "learning_rate": 2.235512416098423e-05, "loss": 0.0574, "step": 60320 }, { "epoch": 3.5683444727036138, "grad_norm": 0.20993666350841522, "learning_rate": 2.2352619718104757e-05, "loss": 0.0857, "step": 60330 }, { "epoch": 3.5689359436919617, "grad_norm": 0.21660064160823822, "learning_rate": 2.2350115005407827e-05, "loss": 0.07, "step": 60340 }, { "epoch": 3.56952741468031, "grad_norm": 0.22931864857673645, "learning_rate": 2.2347610022985357e-05, "loss": 0.0698, "step": 60350 }, { "epoch": 3.570118885668658, "grad_norm": 0.24382300674915314, "learning_rate": 2.234510477092927e-05, "loss": 0.0656, "step": 60360 }, { "epoch": 3.570710356657006, "grad_norm": 0.2694830298423767, "learning_rate": 2.234259924933151e-05, "loss": 0.0444, "step": 60370 }, { "epoch": 3.571301827645354, "grad_norm": 0.2921294867992401, "learning_rate": 2.2340093458284002e-05, "loss": 0.0779, "step": 60380 }, { "epoch": 3.571893298633702, "grad_norm": 0.21052700281143188, "learning_rate": 2.233758739787872e-05, "loss": 0.0805, "step": 60390 }, { "epoch": 3.57248476962205, "grad_norm": 0.32304784655570984, "learning_rate": 2.233508106820762e-05, "loss": 0.0769, "step": 60400 }, { "epoch": 3.573076240610398, "grad_norm": 0.28677427768707275, "learning_rate": 2.2332574469362677e-05, "loss": 0.0585, "step": 60410 }, { "epoch": 3.573667711598746, "grad_norm": 0.18534690141677856, "learning_rate": 2.2330067601435874e-05, "loss": 0.0585, "step": 60420 }, { "epoch": 3.574259182587094, "grad_norm": 0.21426115930080414, "learning_rate": 2.2327560464519204e-05, "loss": 0.0803, "step": 60430 }, { "epoch": 3.5748506535754423, "grad_norm": 0.20629866421222687, "learning_rate": 2.2325053058704678e-05, "loss": 0.0758, "step": 60440 }, { "epoch": 3.57544212456379, "grad_norm": 0.25979286432266235, "learning_rate": 2.23225453840843e-05, "loss": 0.0662, "step": 60450 }, { "epoch": 3.576033595552138, "grad_norm": 0.36203935742378235, "learning_rate": 2.2320037440750102e-05, "loss": 0.0664, "step": 60460 }, { "epoch": 3.576625066540486, "grad_norm": 0.19506196677684784, "learning_rate": 2.2317529228794113e-05, "loss": 0.051, "step": 60470 }, { "epoch": 3.5772165375288343, "grad_norm": 0.203768789768219, "learning_rate": 2.231502074830837e-05, "loss": 0.0684, "step": 60480 }, { "epoch": 3.577808008517182, "grad_norm": 0.2824991047382355, "learning_rate": 2.2312511999384942e-05, "loss": 0.0824, "step": 60490 }, { "epoch": 3.5783994795055305, "grad_norm": 0.264977365732193, "learning_rate": 2.231000298211588e-05, "loss": 0.0761, "step": 60500 }, { "epoch": 3.5789909504938784, "grad_norm": 0.2002137452363968, "learning_rate": 2.2307493696593262e-05, "loss": 0.0571, "step": 60510 }, { "epoch": 3.5795824214822263, "grad_norm": 0.5469442009925842, "learning_rate": 2.230498414290917e-05, "loss": 0.0568, "step": 60520 }, { "epoch": 3.580173892470574, "grad_norm": 0.32548123598098755, "learning_rate": 2.2302474321155687e-05, "loss": 0.0829, "step": 60530 }, { "epoch": 3.5807653634589225, "grad_norm": 0.1982114017009735, "learning_rate": 2.2299964231424927e-05, "loss": 0.0701, "step": 60540 }, { "epoch": 3.5813568344472704, "grad_norm": 0.31245285272598267, "learning_rate": 2.2297453873809003e-05, "loss": 0.0801, "step": 60550 }, { "epoch": 3.5819483054356183, "grad_norm": 0.20446626842021942, "learning_rate": 2.229494324840003e-05, "loss": 0.0602, "step": 60560 }, { "epoch": 3.5825397764239666, "grad_norm": 0.17150558531284332, "learning_rate": 2.2292432355290143e-05, "loss": 0.0637, "step": 60570 }, { "epoch": 3.5831312474123145, "grad_norm": 0.22144359350204468, "learning_rate": 2.2289921194571483e-05, "loss": 0.0629, "step": 60580 }, { "epoch": 3.5837227184006624, "grad_norm": 0.3862224519252777, "learning_rate": 2.22874097663362e-05, "loss": 0.0761, "step": 60590 }, { "epoch": 3.5843141893890103, "grad_norm": 0.23152858018875122, "learning_rate": 2.228489807067646e-05, "loss": 0.068, "step": 60600 }, { "epoch": 3.5849056603773586, "grad_norm": 0.32055628299713135, "learning_rate": 2.228238610768443e-05, "loss": 0.0607, "step": 60610 }, { "epoch": 3.5854971313657065, "grad_norm": 0.19801002740859985, "learning_rate": 2.2279873877452292e-05, "loss": 0.0608, "step": 60620 }, { "epoch": 3.586088602354055, "grad_norm": 0.23092128336429596, "learning_rate": 2.227736138007224e-05, "loss": 0.0725, "step": 60630 }, { "epoch": 3.5866800733424027, "grad_norm": 0.4872131943702698, "learning_rate": 2.227484861563647e-05, "loss": 0.0805, "step": 60640 }, { "epoch": 3.5872715443307506, "grad_norm": 0.29179295897483826, "learning_rate": 2.227233558423719e-05, "loss": 0.0773, "step": 60650 }, { "epoch": 3.5878630153190985, "grad_norm": 0.2148386389017105, "learning_rate": 2.2269822285966626e-05, "loss": 0.0694, "step": 60660 }, { "epoch": 3.5884544863074463, "grad_norm": 0.28540167212486267, "learning_rate": 2.226730872091701e-05, "loss": 0.0628, "step": 60670 }, { "epoch": 3.5890459572957947, "grad_norm": 0.31509947776794434, "learning_rate": 2.2264794889180576e-05, "loss": 0.078, "step": 60680 }, { "epoch": 3.5896374282841426, "grad_norm": 0.1804039031267166, "learning_rate": 2.226228079084957e-05, "loss": 0.0668, "step": 60690 }, { "epoch": 3.590228899272491, "grad_norm": 0.26125746965408325, "learning_rate": 2.225976642601626e-05, "loss": 0.0634, "step": 60700 }, { "epoch": 3.590820370260839, "grad_norm": 0.271543025970459, "learning_rate": 2.2257251794772916e-05, "loss": 0.0683, "step": 60710 }, { "epoch": 3.5914118412491867, "grad_norm": 0.23187248408794403, "learning_rate": 2.2254736897211807e-05, "loss": 0.0668, "step": 60720 }, { "epoch": 3.5920033122375346, "grad_norm": 0.21362583339214325, "learning_rate": 2.225222173342523e-05, "loss": 0.0814, "step": 60730 }, { "epoch": 3.592594783225883, "grad_norm": 0.2766668498516083, "learning_rate": 2.224970630350548e-05, "loss": 0.074, "step": 60740 }, { "epoch": 3.5931862542142308, "grad_norm": 0.2827211320400238, "learning_rate": 2.2247190607544866e-05, "loss": 0.0602, "step": 60750 }, { "epoch": 3.5937777252025787, "grad_norm": 0.21572522819042206, "learning_rate": 2.2244674645635705e-05, "loss": 0.065, "step": 60760 }, { "epoch": 3.594369196190927, "grad_norm": 0.29310333728790283, "learning_rate": 2.224215841787033e-05, "loss": 0.0538, "step": 60770 }, { "epoch": 3.594960667179275, "grad_norm": 0.2522077262401581, "learning_rate": 2.2239641924341067e-05, "loss": 0.0791, "step": 60780 }, { "epoch": 3.5955521381676228, "grad_norm": 0.30288293957710266, "learning_rate": 2.223712516514027e-05, "loss": 0.0844, "step": 60790 }, { "epoch": 3.5961436091559706, "grad_norm": 0.28091150522232056, "learning_rate": 2.22346081403603e-05, "loss": 0.0743, "step": 60800 }, { "epoch": 3.596735080144319, "grad_norm": 0.21678823232650757, "learning_rate": 2.223209085009352e-05, "loss": 0.0665, "step": 60810 }, { "epoch": 3.597326551132667, "grad_norm": 0.3631535768508911, "learning_rate": 2.2229573294432302e-05, "loss": 0.0602, "step": 60820 }, { "epoch": 3.597918022121015, "grad_norm": 0.3127500116825104, "learning_rate": 2.222705547346904e-05, "loss": 0.0784, "step": 60830 }, { "epoch": 3.598509493109363, "grad_norm": 0.287783145904541, "learning_rate": 2.222453738729613e-05, "loss": 0.0737, "step": 60840 }, { "epoch": 3.599100964097711, "grad_norm": 0.3034447431564331, "learning_rate": 2.2222019036005965e-05, "loss": 0.0675, "step": 60850 }, { "epoch": 3.599692435086059, "grad_norm": 0.24448587000370026, "learning_rate": 2.2219500419690974e-05, "loss": 0.0677, "step": 60860 }, { "epoch": 3.600283906074407, "grad_norm": 0.16017676889896393, "learning_rate": 2.2216981538443577e-05, "loss": 0.0519, "step": 60870 }, { "epoch": 3.600875377062755, "grad_norm": 0.32404232025146484, "learning_rate": 2.221446239235621e-05, "loss": 0.0776, "step": 60880 }, { "epoch": 3.601466848051103, "grad_norm": 0.3601243793964386, "learning_rate": 2.2211942981521313e-05, "loss": 0.0741, "step": 60890 }, { "epoch": 3.6020583190394513, "grad_norm": 0.178043395280838, "learning_rate": 2.220942330603135e-05, "loss": 0.0623, "step": 60900 }, { "epoch": 3.602649790027799, "grad_norm": 0.39042195677757263, "learning_rate": 2.2206903365978776e-05, "loss": 0.0746, "step": 60910 }, { "epoch": 3.603241261016147, "grad_norm": 0.2784009873867035, "learning_rate": 2.2204383161456068e-05, "loss": 0.0574, "step": 60920 }, { "epoch": 3.603832732004495, "grad_norm": 0.27470752596855164, "learning_rate": 2.2201862692555704e-05, "loss": 0.0778, "step": 60930 }, { "epoch": 3.6044242029928433, "grad_norm": 0.17401157319545746, "learning_rate": 2.2199341959370195e-05, "loss": 0.0743, "step": 60940 }, { "epoch": 3.605015673981191, "grad_norm": 0.4549206793308258, "learning_rate": 2.219682096199202e-05, "loss": 0.0868, "step": 60950 }, { "epoch": 3.6056071449695395, "grad_norm": 0.20158012211322784, "learning_rate": 2.2194299700513702e-05, "loss": 0.055, "step": 60960 }, { "epoch": 3.6061986159578874, "grad_norm": 0.21977607905864716, "learning_rate": 2.219177817502777e-05, "loss": 0.0719, "step": 60970 }, { "epoch": 3.6067900869462353, "grad_norm": 0.2642035186290741, "learning_rate": 2.2189256385626746e-05, "loss": 0.0767, "step": 60980 }, { "epoch": 3.607381557934583, "grad_norm": 0.19986052811145782, "learning_rate": 2.2186734332403174e-05, "loss": 0.0752, "step": 60990 }, { "epoch": 3.6079730289229315, "grad_norm": 0.2594001889228821, "learning_rate": 2.218421201544961e-05, "loss": 0.0712, "step": 61000 }, { "epoch": 3.6085644999112794, "grad_norm": 0.1962708979845047, "learning_rate": 2.218168943485861e-05, "loss": 0.0666, "step": 61010 }, { "epoch": 3.6091559708996273, "grad_norm": 0.22460965812206268, "learning_rate": 2.2179166590722742e-05, "loss": 0.0534, "step": 61020 }, { "epoch": 3.6097474418879756, "grad_norm": 0.332396000623703, "learning_rate": 2.2176643483134594e-05, "loss": 0.0799, "step": 61030 }, { "epoch": 3.6103389128763235, "grad_norm": 0.4792465567588806, "learning_rate": 2.2174120112186752e-05, "loss": 0.0727, "step": 61040 }, { "epoch": 3.6109303838646714, "grad_norm": 0.317129909992218, "learning_rate": 2.217159647797181e-05, "loss": 0.0695, "step": 61050 }, { "epoch": 3.6115218548530192, "grad_norm": 0.20831605792045593, "learning_rate": 2.2169072580582387e-05, "loss": 0.065, "step": 61060 }, { "epoch": 3.6121133258413676, "grad_norm": 0.19702428579330444, "learning_rate": 2.2166548420111103e-05, "loss": 0.0505, "step": 61070 }, { "epoch": 3.6127047968297155, "grad_norm": 0.2690291106700897, "learning_rate": 2.216402399665057e-05, "loss": 0.0744, "step": 61080 }, { "epoch": 3.613296267818064, "grad_norm": 0.23296214640140533, "learning_rate": 2.2161499310293442e-05, "loss": 0.0775, "step": 61090 }, { "epoch": 3.6138877388064117, "grad_norm": 0.2183951884508133, "learning_rate": 2.2158974361132366e-05, "loss": 0.0666, "step": 61100 }, { "epoch": 3.6144792097947596, "grad_norm": 0.3117885887622833, "learning_rate": 2.2156449149259993e-05, "loss": 0.0679, "step": 61110 }, { "epoch": 3.6150706807831074, "grad_norm": 0.20863474905490875, "learning_rate": 2.215392367476899e-05, "loss": 0.0631, "step": 61120 }, { "epoch": 3.6156621517714553, "grad_norm": 0.2993501126766205, "learning_rate": 2.2151397937752044e-05, "loss": 0.0792, "step": 61130 }, { "epoch": 3.6162536227598037, "grad_norm": 0.2286103069782257, "learning_rate": 2.2148871938301826e-05, "loss": 0.0683, "step": 61140 }, { "epoch": 3.6168450937481516, "grad_norm": 0.19987398386001587, "learning_rate": 2.2146345676511044e-05, "loss": 0.0589, "step": 61150 }, { "epoch": 3.6174365647365, "grad_norm": 0.21442891657352448, "learning_rate": 2.21438191524724e-05, "loss": 0.062, "step": 61160 }, { "epoch": 3.6180280357248478, "grad_norm": 0.23245052993297577, "learning_rate": 2.214129236627861e-05, "loss": 0.0543, "step": 61170 }, { "epoch": 3.6186195067131957, "grad_norm": 0.3406074345111847, "learning_rate": 2.2138765318022396e-05, "loss": 0.0832, "step": 61180 }, { "epoch": 3.6192109777015435, "grad_norm": 0.21748006343841553, "learning_rate": 2.213623800779649e-05, "loss": 0.0658, "step": 61190 }, { "epoch": 3.619802448689892, "grad_norm": 0.2622928023338318, "learning_rate": 2.2133710435693648e-05, "loss": 0.0644, "step": 61200 }, { "epoch": 3.6203939196782398, "grad_norm": 0.3167169392108917, "learning_rate": 2.2131182601806616e-05, "loss": 0.0643, "step": 61210 }, { "epoch": 3.6209853906665876, "grad_norm": 0.3612213432788849, "learning_rate": 2.2128654506228158e-05, "loss": 0.0608, "step": 61220 }, { "epoch": 3.621576861654936, "grad_norm": 0.28210946917533875, "learning_rate": 2.2126126149051045e-05, "loss": 0.0674, "step": 61230 }, { "epoch": 3.622168332643284, "grad_norm": 0.24346742033958435, "learning_rate": 2.212359753036806e-05, "loss": 0.0781, "step": 61240 }, { "epoch": 3.6227598036316317, "grad_norm": 0.36825671792030334, "learning_rate": 2.2121068650271998e-05, "loss": 0.0761, "step": 61250 }, { "epoch": 3.6233512746199796, "grad_norm": 0.22608652710914612, "learning_rate": 2.211853950885565e-05, "loss": 0.0594, "step": 61260 }, { "epoch": 3.623942745608328, "grad_norm": 0.22785736620426178, "learning_rate": 2.2116010106211854e-05, "loss": 0.0523, "step": 61270 }, { "epoch": 3.624534216596676, "grad_norm": 0.31666573882102966, "learning_rate": 2.21134804424334e-05, "loss": 0.0698, "step": 61280 }, { "epoch": 3.625125687585024, "grad_norm": 0.2438500076532364, "learning_rate": 2.2110950517613135e-05, "loss": 0.0716, "step": 61290 }, { "epoch": 3.625717158573372, "grad_norm": 0.2498377114534378, "learning_rate": 2.21084203318439e-05, "loss": 0.0638, "step": 61300 }, { "epoch": 3.62630862956172, "grad_norm": 0.26318827271461487, "learning_rate": 2.2105889885218542e-05, "loss": 0.0689, "step": 61310 }, { "epoch": 3.626900100550068, "grad_norm": 0.21816833317279816, "learning_rate": 2.2103359177829912e-05, "loss": 0.0622, "step": 61320 }, { "epoch": 3.627491571538416, "grad_norm": 0.23005694150924683, "learning_rate": 2.2100828209770888e-05, "loss": 0.0733, "step": 61330 }, { "epoch": 3.628083042526764, "grad_norm": 0.3906135559082031, "learning_rate": 2.209829698113435e-05, "loss": 0.0786, "step": 61340 }, { "epoch": 3.628674513515112, "grad_norm": 0.27205613255500793, "learning_rate": 2.209576549201318e-05, "loss": 0.0774, "step": 61350 }, { "epoch": 3.6292659845034603, "grad_norm": 0.18961140513420105, "learning_rate": 2.2093233742500273e-05, "loss": 0.0666, "step": 61360 }, { "epoch": 3.629857455491808, "grad_norm": 0.26037535071372986, "learning_rate": 2.209070173268855e-05, "loss": 0.0523, "step": 61370 }, { "epoch": 3.630448926480156, "grad_norm": 0.27055349946022034, "learning_rate": 2.2088169462670912e-05, "loss": 0.0816, "step": 61380 }, { "epoch": 3.631040397468504, "grad_norm": 0.28929004073143005, "learning_rate": 2.208563693254029e-05, "loss": 0.0813, "step": 61390 }, { "epoch": 3.6316318684568523, "grad_norm": 0.38387665152549744, "learning_rate": 2.2083104142389626e-05, "loss": 0.0717, "step": 61400 }, { "epoch": 3.6322233394452, "grad_norm": 0.2561796009540558, "learning_rate": 2.2080571092311857e-05, "loss": 0.0635, "step": 61410 }, { "epoch": 3.6328148104335485, "grad_norm": 0.2469596415758133, "learning_rate": 2.207803778239994e-05, "loss": 0.0579, "step": 61420 }, { "epoch": 3.6334062814218964, "grad_norm": 0.25463029742240906, "learning_rate": 2.2075504212746846e-05, "loss": 0.0705, "step": 61430 }, { "epoch": 3.6339977524102443, "grad_norm": 0.4266403615474701, "learning_rate": 2.207297038344554e-05, "loss": 0.0837, "step": 61440 }, { "epoch": 3.634589223398592, "grad_norm": 0.2787102162837982, "learning_rate": 2.2070436294589006e-05, "loss": 0.0739, "step": 61450 }, { "epoch": 3.6351806943869405, "grad_norm": 0.2006814032793045, "learning_rate": 2.2067901946270242e-05, "loss": 0.0605, "step": 61460 }, { "epoch": 3.6357721653752884, "grad_norm": 0.22094090282917023, "learning_rate": 2.206536733858225e-05, "loss": 0.0609, "step": 61470 }, { "epoch": 3.6363636363636362, "grad_norm": 0.23673319816589355, "learning_rate": 2.2062832471618034e-05, "loss": 0.0757, "step": 61480 }, { "epoch": 3.6369551073519846, "grad_norm": 0.2715672254562378, "learning_rate": 2.2060297345470625e-05, "loss": 0.0799, "step": 61490 }, { "epoch": 3.6375465783403325, "grad_norm": 0.2301141619682312, "learning_rate": 2.205776196023305e-05, "loss": 0.0592, "step": 61500 }, { "epoch": 3.6381380493286803, "grad_norm": 0.25271862745285034, "learning_rate": 2.2055226315998353e-05, "loss": 0.0657, "step": 61510 }, { "epoch": 3.6387295203170282, "grad_norm": 0.28308337926864624, "learning_rate": 2.2052690412859575e-05, "loss": 0.0554, "step": 61520 }, { "epoch": 3.6393209913053766, "grad_norm": 0.23726312816143036, "learning_rate": 2.205015425090979e-05, "loss": 0.0843, "step": 61530 }, { "epoch": 3.6399124622937244, "grad_norm": 0.262703001499176, "learning_rate": 2.204761783024205e-05, "loss": 0.073, "step": 61540 }, { "epoch": 3.640503933282073, "grad_norm": 0.30628886818885803, "learning_rate": 2.2045081150949445e-05, "loss": 0.0732, "step": 61550 }, { "epoch": 3.6410954042704207, "grad_norm": 0.27714964747428894, "learning_rate": 2.2042544213125055e-05, "loss": 0.0614, "step": 61560 }, { "epoch": 3.6416868752587686, "grad_norm": 0.28511327505111694, "learning_rate": 2.2040007016861988e-05, "loss": 0.0706, "step": 61570 }, { "epoch": 3.6422783462471164, "grad_norm": 0.22825849056243896, "learning_rate": 2.203746956225334e-05, "loss": 0.0714, "step": 61580 }, { "epoch": 3.6428698172354643, "grad_norm": 0.25508594512939453, "learning_rate": 2.203493184939224e-05, "loss": 0.0736, "step": 61590 }, { "epoch": 3.6434612882238127, "grad_norm": 0.21649231016635895, "learning_rate": 2.2032393878371798e-05, "loss": 0.0703, "step": 61600 }, { "epoch": 3.6440527592121605, "grad_norm": 0.28854572772979736, "learning_rate": 2.202985564928516e-05, "loss": 0.0654, "step": 61610 }, { "epoch": 3.644644230200509, "grad_norm": 0.16577975451946259, "learning_rate": 2.202731716222547e-05, "loss": 0.0548, "step": 61620 }, { "epoch": 3.6452357011888568, "grad_norm": 0.31568121910095215, "learning_rate": 2.202477841728588e-05, "loss": 0.0748, "step": 61630 }, { "epoch": 3.6458271721772046, "grad_norm": 0.5175712704658508, "learning_rate": 2.2022239414559558e-05, "loss": 0.0763, "step": 61640 }, { "epoch": 3.6464186431655525, "grad_norm": 0.17973312735557556, "learning_rate": 2.201970015413967e-05, "loss": 0.0777, "step": 61650 }, { "epoch": 3.647010114153901, "grad_norm": 0.30234479904174805, "learning_rate": 2.2017160636119403e-05, "loss": 0.066, "step": 61660 }, { "epoch": 3.6476015851422487, "grad_norm": 0.2489735335111618, "learning_rate": 2.2014620860591948e-05, "loss": 0.0516, "step": 61670 }, { "epoch": 3.6481930561305966, "grad_norm": 0.3041554093360901, "learning_rate": 2.2012080827650507e-05, "loss": 0.0761, "step": 61680 }, { "epoch": 3.648784527118945, "grad_norm": 0.18139268457889557, "learning_rate": 2.2009540537388293e-05, "loss": 0.0696, "step": 61690 }, { "epoch": 3.649375998107293, "grad_norm": 0.23845025897026062, "learning_rate": 2.2006999989898523e-05, "loss": 0.0672, "step": 61700 }, { "epoch": 3.6499674690956407, "grad_norm": 0.3495868444442749, "learning_rate": 2.200445918527443e-05, "loss": 0.0708, "step": 61710 }, { "epoch": 3.6505589400839886, "grad_norm": 0.3810141980648041, "learning_rate": 2.2001918123609252e-05, "loss": 0.0575, "step": 61720 }, { "epoch": 3.651150411072337, "grad_norm": 0.25922051072120667, "learning_rate": 2.1999376804996233e-05, "loss": 0.077, "step": 61730 }, { "epoch": 3.651741882060685, "grad_norm": 0.34002572298049927, "learning_rate": 2.199683522952864e-05, "loss": 0.0709, "step": 61740 }, { "epoch": 3.652333353049033, "grad_norm": 0.2459460347890854, "learning_rate": 2.1994293397299735e-05, "loss": 0.0688, "step": 61750 }, { "epoch": 3.652924824037381, "grad_norm": 0.2741642892360687, "learning_rate": 2.19917513084028e-05, "loss": 0.057, "step": 61760 }, { "epoch": 3.653516295025729, "grad_norm": 0.16378143429756165, "learning_rate": 2.1989208962931115e-05, "loss": 0.0552, "step": 61770 }, { "epoch": 3.654107766014077, "grad_norm": 0.21589046716690063, "learning_rate": 2.198666636097798e-05, "loss": 0.0742, "step": 61780 }, { "epoch": 3.654699237002425, "grad_norm": 0.2679607570171356, "learning_rate": 2.1984123502636698e-05, "loss": 0.0777, "step": 61790 }, { "epoch": 3.655290707990773, "grad_norm": 0.33066534996032715, "learning_rate": 2.1981580388000584e-05, "loss": 0.078, "step": 61800 }, { "epoch": 3.655882178979121, "grad_norm": 0.17241603136062622, "learning_rate": 2.1979037017162966e-05, "loss": 0.0674, "step": 61810 }, { "epoch": 3.6564736499674693, "grad_norm": 0.27808433771133423, "learning_rate": 2.1976493390217175e-05, "loss": 0.0657, "step": 61820 }, { "epoch": 3.657065120955817, "grad_norm": 0.19643966853618622, "learning_rate": 2.197394950725655e-05, "loss": 0.0884, "step": 61830 }, { "epoch": 3.657656591944165, "grad_norm": 0.27742037177085876, "learning_rate": 2.197140536837445e-05, "loss": 0.0636, "step": 61840 }, { "epoch": 3.658248062932513, "grad_norm": 0.2577517330646515, "learning_rate": 2.1968860973664234e-05, "loss": 0.07, "step": 61850 }, { "epoch": 3.6588395339208613, "grad_norm": 0.24153223633766174, "learning_rate": 2.196631632321927e-05, "loss": 0.0722, "step": 61860 }, { "epoch": 3.659431004909209, "grad_norm": 0.24970322847366333, "learning_rate": 2.1963771417132937e-05, "loss": 0.0599, "step": 61870 }, { "epoch": 3.6600224758975575, "grad_norm": 0.22995494306087494, "learning_rate": 2.1961226255498636e-05, "loss": 0.0774, "step": 61880 }, { "epoch": 3.6606139468859054, "grad_norm": 0.47656458616256714, "learning_rate": 2.1958680838409758e-05, "loss": 0.0687, "step": 61890 }, { "epoch": 3.6612054178742532, "grad_norm": 0.1934625804424286, "learning_rate": 2.1956135165959708e-05, "loss": 0.071, "step": 61900 }, { "epoch": 3.661796888862601, "grad_norm": 0.19671660661697388, "learning_rate": 2.1953589238241914e-05, "loss": 0.0543, "step": 61910 }, { "epoch": 3.6623883598509495, "grad_norm": 0.36295682191848755, "learning_rate": 2.1951043055349796e-05, "loss": 0.0618, "step": 61920 }, { "epoch": 3.6629798308392973, "grad_norm": 0.2571692168712616, "learning_rate": 2.194849661737679e-05, "loss": 0.0792, "step": 61930 }, { "epoch": 3.6635713018276452, "grad_norm": 0.250510573387146, "learning_rate": 2.194594992441635e-05, "loss": 0.0714, "step": 61940 }, { "epoch": 3.6641627728159936, "grad_norm": 0.20066231489181519, "learning_rate": 2.1943402976561926e-05, "loss": 0.0788, "step": 61950 }, { "epoch": 3.6647542438043414, "grad_norm": 0.27648085355758667, "learning_rate": 2.1940855773906976e-05, "loss": 0.0636, "step": 61960 }, { "epoch": 3.6653457147926893, "grad_norm": 0.19213417172431946, "learning_rate": 2.193830831654499e-05, "loss": 0.0609, "step": 61970 }, { "epoch": 3.665937185781037, "grad_norm": 0.26499900221824646, "learning_rate": 2.193576060456944e-05, "loss": 0.0761, "step": 61980 }, { "epoch": 3.6665286567693856, "grad_norm": 0.18545491993427277, "learning_rate": 2.1933212638073817e-05, "loss": 0.0746, "step": 61990 }, { "epoch": 3.6671201277577334, "grad_norm": 0.22346341609954834, "learning_rate": 2.193066441715163e-05, "loss": 0.0692, "step": 62000 }, { "epoch": 3.6677115987460818, "grad_norm": 0.23489467799663544, "learning_rate": 2.192811594189639e-05, "loss": 0.0596, "step": 62010 }, { "epoch": 3.6683030697344297, "grad_norm": 0.28867828845977783, "learning_rate": 2.1925567212401615e-05, "loss": 0.0565, "step": 62020 }, { "epoch": 3.6688945407227775, "grad_norm": 0.6276708245277405, "learning_rate": 2.1923018228760833e-05, "loss": 0.0763, "step": 62030 }, { "epoch": 3.6694860117111254, "grad_norm": 0.23981741070747375, "learning_rate": 2.1920468991067594e-05, "loss": 0.0821, "step": 62040 }, { "epoch": 3.6700774826994733, "grad_norm": 0.2749853730201721, "learning_rate": 2.1917919499415434e-05, "loss": 0.0741, "step": 62050 }, { "epoch": 3.6706689536878216, "grad_norm": 0.2828308939933777, "learning_rate": 2.191536975389792e-05, "loss": 0.0719, "step": 62060 }, { "epoch": 3.6712604246761695, "grad_norm": 0.2282838523387909, "learning_rate": 2.1912819754608612e-05, "loss": 0.059, "step": 62070 }, { "epoch": 3.671851895664518, "grad_norm": 0.20980100333690643, "learning_rate": 2.191026950164109e-05, "loss": 0.0699, "step": 62080 }, { "epoch": 3.6724433666528657, "grad_norm": 0.2994769215583801, "learning_rate": 2.1907718995088946e-05, "loss": 0.074, "step": 62090 }, { "epoch": 3.6730348376412136, "grad_norm": 0.20609985291957855, "learning_rate": 2.190516823504577e-05, "loss": 0.0683, "step": 62100 }, { "epoch": 3.6736263086295615, "grad_norm": 0.2702770531177521, "learning_rate": 2.1902617221605163e-05, "loss": 0.0645, "step": 62110 }, { "epoch": 3.67421777961791, "grad_norm": 0.23107455670833588, "learning_rate": 2.190006595486074e-05, "loss": 0.0625, "step": 62120 }, { "epoch": 3.6748092506062577, "grad_norm": 0.339653879404068, "learning_rate": 2.189751443490613e-05, "loss": 0.0852, "step": 62130 }, { "epoch": 3.6754007215946056, "grad_norm": 0.32758814096450806, "learning_rate": 2.189496266183497e-05, "loss": 0.076, "step": 62140 }, { "epoch": 3.675992192582954, "grad_norm": 0.22866372764110565, "learning_rate": 2.1892410635740885e-05, "loss": 0.0654, "step": 62150 }, { "epoch": 3.676583663571302, "grad_norm": 0.32705679535865784, "learning_rate": 2.1889858356717538e-05, "loss": 0.0704, "step": 62160 }, { "epoch": 3.6771751345596497, "grad_norm": 0.28788936138153076, "learning_rate": 2.188730582485859e-05, "loss": 0.0568, "step": 62170 }, { "epoch": 3.6777666055479976, "grad_norm": 0.28715598583221436, "learning_rate": 2.1884753040257706e-05, "loss": 0.0706, "step": 62180 }, { "epoch": 3.678358076536346, "grad_norm": 0.2292279750108719, "learning_rate": 2.1882200003008565e-05, "loss": 0.0696, "step": 62190 }, { "epoch": 3.678949547524694, "grad_norm": 0.1925470232963562, "learning_rate": 2.1879646713204858e-05, "loss": 0.0691, "step": 62200 }, { "epoch": 3.679541018513042, "grad_norm": 0.29641368985176086, "learning_rate": 2.1877093170940287e-05, "loss": 0.0676, "step": 62210 }, { "epoch": 3.68013248950139, "grad_norm": 0.38703569769859314, "learning_rate": 2.1874539376308545e-05, "loss": 0.0678, "step": 62220 }, { "epoch": 3.680723960489738, "grad_norm": 0.19678464531898499, "learning_rate": 2.1871985329403357e-05, "loss": 0.0742, "step": 62230 }, { "epoch": 3.681315431478086, "grad_norm": 0.19784782826900482, "learning_rate": 2.1869431030318453e-05, "loss": 0.0748, "step": 62240 }, { "epoch": 3.681906902466434, "grad_norm": 0.30738505721092224, "learning_rate": 2.1866876479147558e-05, "loss": 0.0618, "step": 62250 }, { "epoch": 3.682498373454782, "grad_norm": 0.32217007875442505, "learning_rate": 2.186432167598442e-05, "loss": 0.06, "step": 62260 }, { "epoch": 3.68308984444313, "grad_norm": 0.20235323905944824, "learning_rate": 2.1861766620922793e-05, "loss": 0.0568, "step": 62270 }, { "epoch": 3.6836813154314783, "grad_norm": 0.28982850909233093, "learning_rate": 2.185921131405644e-05, "loss": 0.0763, "step": 62280 }, { "epoch": 3.684272786419826, "grad_norm": 0.2998345196247101, "learning_rate": 2.185665575547913e-05, "loss": 0.0681, "step": 62290 }, { "epoch": 3.684864257408174, "grad_norm": 0.29462912678718567, "learning_rate": 2.1854099945284644e-05, "loss": 0.0657, "step": 62300 }, { "epoch": 3.685455728396522, "grad_norm": 0.28120580315589905, "learning_rate": 2.1851543883566773e-05, "loss": 0.0668, "step": 62310 }, { "epoch": 3.6860471993848702, "grad_norm": 0.2794836461544037, "learning_rate": 2.1848987570419313e-05, "loss": 0.0584, "step": 62320 }, { "epoch": 3.686638670373218, "grad_norm": 0.26495563983917236, "learning_rate": 2.1846431005936078e-05, "loss": 0.0773, "step": 62330 }, { "epoch": 3.6872301413615665, "grad_norm": 0.20056284964084625, "learning_rate": 2.1843874190210886e-05, "loss": 0.0769, "step": 62340 }, { "epoch": 3.6878216123499143, "grad_norm": 0.23462992906570435, "learning_rate": 2.1841317123337555e-05, "loss": 0.0687, "step": 62350 }, { "epoch": 3.6884130833382622, "grad_norm": 0.21409542858600616, "learning_rate": 2.1838759805409927e-05, "loss": 0.0596, "step": 62360 }, { "epoch": 3.68900455432661, "grad_norm": 0.28001412749290466, "learning_rate": 2.183620223652185e-05, "loss": 0.0506, "step": 62370 }, { "epoch": 3.6895960253149584, "grad_norm": 0.49125176668167114, "learning_rate": 2.1833644416767177e-05, "loss": 0.0814, "step": 62380 }, { "epoch": 3.6901874963033063, "grad_norm": 0.2823297679424286, "learning_rate": 2.1831086346239767e-05, "loss": 0.0822, "step": 62390 }, { "epoch": 3.6907789672916542, "grad_norm": 0.20956048369407654, "learning_rate": 2.18285280250335e-05, "loss": 0.0625, "step": 62400 }, { "epoch": 3.6913704382800026, "grad_norm": 0.24558883905410767, "learning_rate": 2.1825969453242254e-05, "loss": 0.0744, "step": 62410 }, { "epoch": 3.6919619092683504, "grad_norm": 0.21287479996681213, "learning_rate": 2.1823410630959917e-05, "loss": 0.0588, "step": 62420 }, { "epoch": 3.6925533802566983, "grad_norm": 0.28029629588127136, "learning_rate": 2.1820851558280397e-05, "loss": 0.0655, "step": 62430 }, { "epoch": 3.693144851245046, "grad_norm": 0.21910078823566437, "learning_rate": 2.1818292235297603e-05, "loss": 0.0728, "step": 62440 }, { "epoch": 3.6937363222333945, "grad_norm": 0.4007279872894287, "learning_rate": 2.1815732662105447e-05, "loss": 0.0747, "step": 62450 }, { "epoch": 3.6943277932217424, "grad_norm": 0.31149372458457947, "learning_rate": 2.181317283879786e-05, "loss": 0.0612, "step": 62460 }, { "epoch": 3.6949192642100908, "grad_norm": 0.2457403540611267, "learning_rate": 2.1810612765468784e-05, "loss": 0.0699, "step": 62470 }, { "epoch": 3.6955107351984386, "grad_norm": 0.2128959596157074, "learning_rate": 2.1808052442212162e-05, "loss": 0.0688, "step": 62480 }, { "epoch": 3.6961022061867865, "grad_norm": 0.24054042994976044, "learning_rate": 2.1805491869121946e-05, "loss": 0.0711, "step": 62490 }, { "epoch": 3.6966936771751344, "grad_norm": 0.16105344891548157, "learning_rate": 2.1802931046292108e-05, "loss": 0.0744, "step": 62500 }, { "epoch": 3.6972851481634823, "grad_norm": 0.30918800830841064, "learning_rate": 2.1800369973816616e-05, "loss": 0.0616, "step": 62510 }, { "epoch": 3.6978766191518306, "grad_norm": 0.23107531666755676, "learning_rate": 2.1797808651789456e-05, "loss": 0.0556, "step": 62520 }, { "epoch": 3.6984680901401785, "grad_norm": 0.5927944779396057, "learning_rate": 2.1795247080304618e-05, "loss": 0.0753, "step": 62530 }, { "epoch": 3.699059561128527, "grad_norm": 0.2981753647327423, "learning_rate": 2.179268525945611e-05, "loss": 0.0728, "step": 62540 }, { "epoch": 3.6996510321168747, "grad_norm": 0.2981685400009155, "learning_rate": 2.1790123189337935e-05, "loss": 0.0659, "step": 62550 }, { "epoch": 3.7002425031052226, "grad_norm": 0.24546770751476288, "learning_rate": 2.1787560870044114e-05, "loss": 0.0647, "step": 62560 }, { "epoch": 3.7008339740935705, "grad_norm": 0.25060904026031494, "learning_rate": 2.1784998301668677e-05, "loss": 0.0598, "step": 62570 }, { "epoch": 3.701425445081919, "grad_norm": 0.2608087658882141, "learning_rate": 2.1782435484305666e-05, "loss": 0.0813, "step": 62580 }, { "epoch": 3.7020169160702667, "grad_norm": 0.24506491422653198, "learning_rate": 2.1779872418049118e-05, "loss": 0.0747, "step": 62590 }, { "epoch": 3.7026083870586146, "grad_norm": 0.24028533697128296, "learning_rate": 2.17773091029931e-05, "loss": 0.068, "step": 62600 }, { "epoch": 3.703199858046963, "grad_norm": 0.18917258083820343, "learning_rate": 2.1774745539231677e-05, "loss": 0.0629, "step": 62610 }, { "epoch": 3.703791329035311, "grad_norm": 0.20181237161159515, "learning_rate": 2.1772181726858914e-05, "loss": 0.0561, "step": 62620 }, { "epoch": 3.7043828000236587, "grad_norm": 0.2920893430709839, "learning_rate": 2.17696176659689e-05, "loss": 0.0714, "step": 62630 }, { "epoch": 3.7049742710120066, "grad_norm": 0.295653373003006, "learning_rate": 2.1767053356655737e-05, "loss": 0.0698, "step": 62640 }, { "epoch": 3.705565742000355, "grad_norm": 0.34584492444992065, "learning_rate": 2.1764488799013506e-05, "loss": 0.0869, "step": 62650 }, { "epoch": 3.706157212988703, "grad_norm": 0.20920847356319427, "learning_rate": 2.176192399313633e-05, "loss": 0.0676, "step": 62660 }, { "epoch": 3.706748683977051, "grad_norm": 0.19452059268951416, "learning_rate": 2.1759358939118336e-05, "loss": 0.0564, "step": 62670 }, { "epoch": 3.707340154965399, "grad_norm": 0.23940414190292358, "learning_rate": 2.1756793637053645e-05, "loss": 0.081, "step": 62680 }, { "epoch": 3.707931625953747, "grad_norm": 0.22894896566867828, "learning_rate": 2.1754228087036395e-05, "loss": 0.0696, "step": 62690 }, { "epoch": 3.708523096942095, "grad_norm": 0.38256898522377014, "learning_rate": 2.1751662289160738e-05, "loss": 0.0701, "step": 62700 }, { "epoch": 3.709114567930443, "grad_norm": 0.3547435998916626, "learning_rate": 2.1749096243520824e-05, "loss": 0.0617, "step": 62710 }, { "epoch": 3.709706038918791, "grad_norm": 0.28482234477996826, "learning_rate": 2.1746529950210822e-05, "loss": 0.0564, "step": 62720 }, { "epoch": 3.710297509907139, "grad_norm": 0.2007933109998703, "learning_rate": 2.1743963409324907e-05, "loss": 0.0758, "step": 62730 }, { "epoch": 3.7108889808954872, "grad_norm": 0.29377198219299316, "learning_rate": 2.174139662095727e-05, "loss": 0.0798, "step": 62740 }, { "epoch": 3.711480451883835, "grad_norm": 0.28249216079711914, "learning_rate": 2.1738829585202085e-05, "loss": 0.0756, "step": 62750 }, { "epoch": 3.712071922872183, "grad_norm": 0.20473287999629974, "learning_rate": 2.173626230215357e-05, "loss": 0.0657, "step": 62760 }, { "epoch": 3.712663393860531, "grad_norm": 0.18094758689403534, "learning_rate": 2.173369477190593e-05, "loss": 0.0572, "step": 62770 }, { "epoch": 3.7132548648488792, "grad_norm": 0.19340883195400238, "learning_rate": 2.173112699455339e-05, "loss": 0.0746, "step": 62780 }, { "epoch": 3.713846335837227, "grad_norm": 0.3559933304786682, "learning_rate": 2.1728558970190175e-05, "loss": 0.0624, "step": 62790 }, { "epoch": 3.7144378068255754, "grad_norm": 0.20025795698165894, "learning_rate": 2.172599069891052e-05, "loss": 0.0738, "step": 62800 }, { "epoch": 3.7150292778139233, "grad_norm": 0.18814417719841003, "learning_rate": 2.1723422180808682e-05, "loss": 0.0683, "step": 62810 }, { "epoch": 3.7156207488022712, "grad_norm": 0.25604161620140076, "learning_rate": 2.1720853415978906e-05, "loss": 0.0515, "step": 62820 }, { "epoch": 3.716212219790619, "grad_norm": 0.2189248502254486, "learning_rate": 2.1718284404515466e-05, "loss": 0.0785, "step": 62830 }, { "epoch": 3.7168036907789674, "grad_norm": 0.2352142035961151, "learning_rate": 2.1715715146512634e-05, "loss": 0.0722, "step": 62840 }, { "epoch": 3.7173951617673153, "grad_norm": 0.30785393714904785, "learning_rate": 2.171314564206469e-05, "loss": 0.0712, "step": 62850 }, { "epoch": 3.717986632755663, "grad_norm": 0.2703291177749634, "learning_rate": 2.1710575891265933e-05, "loss": 0.0778, "step": 62860 }, { "epoch": 3.7185781037440115, "grad_norm": 0.16879059374332428, "learning_rate": 2.1708005894210656e-05, "loss": 0.0649, "step": 62870 }, { "epoch": 3.7191695747323594, "grad_norm": 0.290712833404541, "learning_rate": 2.170543565099318e-05, "loss": 0.0898, "step": 62880 }, { "epoch": 3.7197610457207073, "grad_norm": 0.26966097950935364, "learning_rate": 2.1702865161707815e-05, "loss": 0.0747, "step": 62890 }, { "epoch": 3.720352516709055, "grad_norm": 0.22546175122261047, "learning_rate": 2.1700294426448898e-05, "loss": 0.0715, "step": 62900 }, { "epoch": 3.7209439876974035, "grad_norm": 0.23949959874153137, "learning_rate": 2.169772344531076e-05, "loss": 0.0631, "step": 62910 }, { "epoch": 3.7215354586857514, "grad_norm": 0.17589643597602844, "learning_rate": 2.1695152218387747e-05, "loss": 0.0473, "step": 62920 }, { "epoch": 3.7221269296740997, "grad_norm": 0.3499659597873688, "learning_rate": 2.1692580745774222e-05, "loss": 0.0786, "step": 62930 }, { "epoch": 3.7227184006624476, "grad_norm": 0.20293442904949188, "learning_rate": 2.1690009027564545e-05, "loss": 0.076, "step": 62940 }, { "epoch": 3.7233098716507955, "grad_norm": 0.21437683701515198, "learning_rate": 2.1687437063853094e-05, "loss": 0.0679, "step": 62950 }, { "epoch": 3.7239013426391434, "grad_norm": 0.21931342780590057, "learning_rate": 2.1684864854734247e-05, "loss": 0.0704, "step": 62960 }, { "epoch": 3.7244928136274913, "grad_norm": 0.20578354597091675, "learning_rate": 2.1682292400302393e-05, "loss": 0.0581, "step": 62970 }, { "epoch": 3.7250842846158396, "grad_norm": 0.3262272775173187, "learning_rate": 2.167971970065194e-05, "loss": 0.0833, "step": 62980 }, { "epoch": 3.7256757556041875, "grad_norm": 0.356090784072876, "learning_rate": 2.1677146755877296e-05, "loss": 0.0758, "step": 62990 }, { "epoch": 3.726267226592536, "grad_norm": 0.20212040841579437, "learning_rate": 2.167457356607287e-05, "loss": 0.0608, "step": 63000 }, { "epoch": 3.7268586975808837, "grad_norm": 0.2545827329158783, "learning_rate": 2.167200013133311e-05, "loss": 0.0706, "step": 63010 }, { "epoch": 3.7274501685692316, "grad_norm": 0.1964626908302307, "learning_rate": 2.166942645175243e-05, "loss": 0.0601, "step": 63020 }, { "epoch": 3.7280416395575795, "grad_norm": 0.24582751095294952, "learning_rate": 2.1666852527425295e-05, "loss": 0.0742, "step": 63030 }, { "epoch": 3.728633110545928, "grad_norm": 0.32064056396484375, "learning_rate": 2.166427835844615e-05, "loss": 0.0842, "step": 63040 }, { "epoch": 3.7292245815342757, "grad_norm": 0.1914915293455124, "learning_rate": 2.1661703944909455e-05, "loss": 0.0714, "step": 63050 }, { "epoch": 3.7298160525226236, "grad_norm": 0.23648925125598907, "learning_rate": 2.1659129286909697e-05, "loss": 0.0662, "step": 63060 }, { "epoch": 3.730407523510972, "grad_norm": 1.4573936462402344, "learning_rate": 2.165655438454134e-05, "loss": 0.062, "step": 63070 }, { "epoch": 3.73099899449932, "grad_norm": 0.18769501149654388, "learning_rate": 2.165397923789889e-05, "loss": 0.0662, "step": 63080 }, { "epoch": 3.7315904654876677, "grad_norm": 0.23864683508872986, "learning_rate": 2.1651403847076836e-05, "loss": 0.0793, "step": 63090 }, { "epoch": 3.7321819364760156, "grad_norm": 0.3260466158390045, "learning_rate": 2.1648828212169693e-05, "loss": 0.0686, "step": 63100 }, { "epoch": 3.732773407464364, "grad_norm": 0.24570316076278687, "learning_rate": 2.1646252333271972e-05, "loss": 0.0591, "step": 63110 }, { "epoch": 3.733364878452712, "grad_norm": 0.1977892965078354, "learning_rate": 2.1643676210478206e-05, "loss": 0.0616, "step": 63120 }, { "epoch": 3.73395634944106, "grad_norm": 0.20417439937591553, "learning_rate": 2.1641099843882926e-05, "loss": 0.0805, "step": 63130 }, { "epoch": 3.734547820429408, "grad_norm": 0.20969070494174957, "learning_rate": 2.163852323358068e-05, "loss": 0.065, "step": 63140 }, { "epoch": 3.735139291417756, "grad_norm": 0.29822835326194763, "learning_rate": 2.1635946379666018e-05, "loss": 0.0748, "step": 63150 }, { "epoch": 3.735730762406104, "grad_norm": 0.19025565683841705, "learning_rate": 2.1633369282233506e-05, "loss": 0.0641, "step": 63160 }, { "epoch": 3.736322233394452, "grad_norm": 0.3490945100784302, "learning_rate": 2.1630791941377708e-05, "loss": 0.0731, "step": 63170 }, { "epoch": 3.7369137043828, "grad_norm": 0.2766999900341034, "learning_rate": 2.1628214357193213e-05, "loss": 0.073, "step": 63180 }, { "epoch": 3.737505175371148, "grad_norm": 0.2743195593357086, "learning_rate": 2.1625636529774606e-05, "loss": 0.0786, "step": 63190 }, { "epoch": 3.7380966463594962, "grad_norm": 0.26982244849205017, "learning_rate": 2.162305845921648e-05, "loss": 0.0726, "step": 63200 }, { "epoch": 3.738688117347844, "grad_norm": 0.16882792115211487, "learning_rate": 2.1620480145613454e-05, "loss": 0.0675, "step": 63210 }, { "epoch": 3.739279588336192, "grad_norm": 0.14260153472423553, "learning_rate": 2.1617901589060133e-05, "loss": 0.0636, "step": 63220 }, { "epoch": 3.73987105932454, "grad_norm": 0.4789895713329315, "learning_rate": 2.1615322789651144e-05, "loss": 0.0821, "step": 63230 }, { "epoch": 3.7404625303128882, "grad_norm": 0.20080408453941345, "learning_rate": 2.161274374748112e-05, "loss": 0.0769, "step": 63240 }, { "epoch": 3.741054001301236, "grad_norm": 0.23547552525997162, "learning_rate": 2.161016446264471e-05, "loss": 0.0637, "step": 63250 }, { "epoch": 3.7416454722895844, "grad_norm": 0.25505998730659485, "learning_rate": 2.160758493523656e-05, "loss": 0.0643, "step": 63260 }, { "epoch": 3.7422369432779323, "grad_norm": 0.40290331840515137, "learning_rate": 2.160500516535133e-05, "loss": 0.0611, "step": 63270 }, { "epoch": 3.74282841426628, "grad_norm": 0.24242012202739716, "learning_rate": 2.160242515308369e-05, "loss": 0.0748, "step": 63280 }, { "epoch": 3.743419885254628, "grad_norm": 0.21080054342746735, "learning_rate": 2.1599844898528322e-05, "loss": 0.0727, "step": 63290 }, { "epoch": 3.7440113562429764, "grad_norm": 0.30979883670806885, "learning_rate": 2.1597264401779903e-05, "loss": 0.0736, "step": 63300 }, { "epoch": 3.7446028272313243, "grad_norm": 0.32784563302993774, "learning_rate": 2.159468366293314e-05, "loss": 0.0729, "step": 63310 }, { "epoch": 3.745194298219672, "grad_norm": 0.2721738815307617, "learning_rate": 2.1592102682082735e-05, "loss": 0.0625, "step": 63320 }, { "epoch": 3.7457857692080205, "grad_norm": 0.21143126487731934, "learning_rate": 2.1589521459323397e-05, "loss": 0.0837, "step": 63330 }, { "epoch": 3.7463772401963684, "grad_norm": 0.19462823867797852, "learning_rate": 2.1586939994749854e-05, "loss": 0.0799, "step": 63340 }, { "epoch": 3.7469687111847163, "grad_norm": 0.2773265838623047, "learning_rate": 2.158435828845683e-05, "loss": 0.0755, "step": 63350 }, { "epoch": 3.747560182173064, "grad_norm": 0.31886211037635803, "learning_rate": 2.1581776340539075e-05, "loss": 0.0679, "step": 63360 }, { "epoch": 3.7481516531614125, "grad_norm": 0.1888505220413208, "learning_rate": 2.1579194151091334e-05, "loss": 0.0585, "step": 63370 }, { "epoch": 3.7487431241497604, "grad_norm": 0.23099400103092194, "learning_rate": 2.1576611720208364e-05, "loss": 0.0693, "step": 63380 }, { "epoch": 3.7493345951381087, "grad_norm": 0.2603781819343567, "learning_rate": 2.1574029047984928e-05, "loss": 0.0816, "step": 63390 }, { "epoch": 3.7499260661264566, "grad_norm": 0.22038297355175018, "learning_rate": 2.1571446134515805e-05, "loss": 0.0678, "step": 63400 }, { "epoch": 3.7505175371148045, "grad_norm": 0.23739929497241974, "learning_rate": 2.1568862979895786e-05, "loss": 0.0588, "step": 63410 }, { "epoch": 3.7511090081031524, "grad_norm": 0.24189382791519165, "learning_rate": 2.1566279584219664e-05, "loss": 0.0638, "step": 63420 }, { "epoch": 3.7517004790915007, "grad_norm": 0.28299862146377563, "learning_rate": 2.1563695947582227e-05, "loss": 0.0857, "step": 63430 }, { "epoch": 3.7522919500798486, "grad_norm": 0.25641679763793945, "learning_rate": 2.15611120700783e-05, "loss": 0.0668, "step": 63440 }, { "epoch": 3.7528834210681965, "grad_norm": 0.3220362067222595, "learning_rate": 2.15585279518027e-05, "loss": 0.0759, "step": 63450 }, { "epoch": 3.753474892056545, "grad_norm": 0.24178647994995117, "learning_rate": 2.155594359285025e-05, "loss": 0.0766, "step": 63460 }, { "epoch": 3.7540663630448927, "grad_norm": 0.5120018124580383, "learning_rate": 2.1553358993315796e-05, "loss": 0.0666, "step": 63470 }, { "epoch": 3.7546578340332406, "grad_norm": 0.29007360339164734, "learning_rate": 2.1550774153294182e-05, "loss": 0.0669, "step": 63480 }, { "epoch": 3.7552493050215885, "grad_norm": 0.35150399804115295, "learning_rate": 2.1548189072880258e-05, "loss": 0.0794, "step": 63490 }, { "epoch": 3.755840776009937, "grad_norm": 0.3262287974357605, "learning_rate": 2.154560375216889e-05, "loss": 0.0798, "step": 63500 }, { "epoch": 3.7564322469982847, "grad_norm": 0.2761794924736023, "learning_rate": 2.1543018191254957e-05, "loss": 0.0624, "step": 63510 }, { "epoch": 3.7570237179866326, "grad_norm": 0.20165112614631653, "learning_rate": 2.154043239023334e-05, "loss": 0.0523, "step": 63520 }, { "epoch": 3.757615188974981, "grad_norm": 0.647806704044342, "learning_rate": 2.153784634919892e-05, "loss": 0.0836, "step": 63530 }, { "epoch": 3.758206659963329, "grad_norm": 0.21218077838420868, "learning_rate": 2.15352600682466e-05, "loss": 0.0754, "step": 63540 }, { "epoch": 3.7587981309516767, "grad_norm": 0.2233307957649231, "learning_rate": 2.1532673547471296e-05, "loss": 0.0693, "step": 63550 }, { "epoch": 3.7593896019400246, "grad_norm": 0.2860598862171173, "learning_rate": 2.153008678696792e-05, "loss": 0.0689, "step": 63560 }, { "epoch": 3.759981072928373, "grad_norm": 0.22785960137844086, "learning_rate": 2.15274997868314e-05, "loss": 0.056, "step": 63570 }, { "epoch": 3.760572543916721, "grad_norm": 0.37576237320899963, "learning_rate": 2.1524912547156664e-05, "loss": 0.0755, "step": 63580 }, { "epoch": 3.761164014905069, "grad_norm": 0.2366967648267746, "learning_rate": 2.1522325068038657e-05, "loss": 0.086, "step": 63590 }, { "epoch": 3.761755485893417, "grad_norm": 0.1954130232334137, "learning_rate": 2.1519737349572338e-05, "loss": 0.0575, "step": 63600 }, { "epoch": 3.762346956881765, "grad_norm": 0.2200961709022522, "learning_rate": 2.1517149391852663e-05, "loss": 0.0599, "step": 63610 }, { "epoch": 3.762938427870113, "grad_norm": 0.186214417219162, "learning_rate": 2.1514561194974603e-05, "loss": 0.0572, "step": 63620 }, { "epoch": 3.763529898858461, "grad_norm": 0.2324269711971283, "learning_rate": 2.1511972759033133e-05, "loss": 0.082, "step": 63630 }, { "epoch": 3.764121369846809, "grad_norm": 0.24336491525173187, "learning_rate": 2.1509384084123247e-05, "loss": 0.0802, "step": 63640 }, { "epoch": 3.764712840835157, "grad_norm": 0.3159898519515991, "learning_rate": 2.1506795170339937e-05, "loss": 0.0782, "step": 63650 }, { "epoch": 3.7653043118235052, "grad_norm": 0.1766372174024582, "learning_rate": 2.15042060177782e-05, "loss": 0.0761, "step": 63660 }, { "epoch": 3.765895782811853, "grad_norm": 0.2673202157020569, "learning_rate": 2.1501616626533064e-05, "loss": 0.0609, "step": 63670 }, { "epoch": 3.766487253800201, "grad_norm": 0.274201899766922, "learning_rate": 2.1499026996699545e-05, "loss": 0.079, "step": 63680 }, { "epoch": 3.767078724788549, "grad_norm": 1.6858642101287842, "learning_rate": 2.1496437128372672e-05, "loss": 0.0713, "step": 63690 }, { "epoch": 3.767670195776897, "grad_norm": 0.18036741018295288, "learning_rate": 2.1493847021647485e-05, "loss": 0.0685, "step": 63700 }, { "epoch": 3.768261666765245, "grad_norm": 0.7717205882072449, "learning_rate": 2.1491256676619044e-05, "loss": 0.0658, "step": 63710 }, { "epoch": 3.7688531377535934, "grad_norm": 0.21494510769844055, "learning_rate": 2.1488666093382387e-05, "loss": 0.0634, "step": 63720 }, { "epoch": 3.7694446087419413, "grad_norm": 0.2699291706085205, "learning_rate": 2.148607527203259e-05, "loss": 0.0672, "step": 63730 }, { "epoch": 3.770036079730289, "grad_norm": 0.20477965474128723, "learning_rate": 2.148348421266473e-05, "loss": 0.0715, "step": 63740 }, { "epoch": 3.770627550718637, "grad_norm": 0.30082371830940247, "learning_rate": 2.1480892915373887e-05, "loss": 0.078, "step": 63750 }, { "epoch": 3.7712190217069854, "grad_norm": 0.3154044449329376, "learning_rate": 2.1478301380255154e-05, "loss": 0.0739, "step": 63760 }, { "epoch": 3.7718104926953333, "grad_norm": 0.19327197968959808, "learning_rate": 2.147570960740363e-05, "loss": 0.0557, "step": 63770 }, { "epoch": 3.772401963683681, "grad_norm": 0.23210398852825165, "learning_rate": 2.1473117596914434e-05, "loss": 0.0694, "step": 63780 }, { "epoch": 3.7729934346720295, "grad_norm": 0.22050051391124725, "learning_rate": 2.1470525348882668e-05, "loss": 0.0762, "step": 63790 }, { "epoch": 3.7735849056603774, "grad_norm": 1.2634254693984985, "learning_rate": 2.146793286340347e-05, "loss": 0.07, "step": 63800 }, { "epoch": 3.7741763766487253, "grad_norm": 0.24066458642482758, "learning_rate": 2.1465340140571984e-05, "loss": 0.0591, "step": 63810 }, { "epoch": 3.774767847637073, "grad_norm": 0.15481841564178467, "learning_rate": 2.1462747180483333e-05, "loss": 0.0591, "step": 63820 }, { "epoch": 3.7753593186254215, "grad_norm": 0.2548755705356598, "learning_rate": 2.1460153983232687e-05, "loss": 0.0728, "step": 63830 }, { "epoch": 3.7759507896137694, "grad_norm": 0.2895848751068115, "learning_rate": 2.1457560548915205e-05, "loss": 0.0659, "step": 63840 }, { "epoch": 3.7765422606021177, "grad_norm": 0.28132712841033936, "learning_rate": 2.1454966877626057e-05, "loss": 0.072, "step": 63850 }, { "epoch": 3.7771337315904656, "grad_norm": 0.23334898054599762, "learning_rate": 2.1452372969460414e-05, "loss": 0.0597, "step": 63860 }, { "epoch": 3.7777252025788135, "grad_norm": 0.34914013743400574, "learning_rate": 2.1449778824513478e-05, "loss": 0.0551, "step": 63870 }, { "epoch": 3.7783166735671614, "grad_norm": 0.2960018813610077, "learning_rate": 2.144718444288044e-05, "loss": 0.0907, "step": 63880 }, { "epoch": 3.7789081445555097, "grad_norm": 0.23534439504146576, "learning_rate": 2.1444589824656496e-05, "loss": 0.0699, "step": 63890 }, { "epoch": 3.7794996155438576, "grad_norm": 0.3210392892360687, "learning_rate": 2.144199496993687e-05, "loss": 0.0699, "step": 63900 }, { "epoch": 3.7800910865322055, "grad_norm": 0.2804014980792999, "learning_rate": 2.1439399878816792e-05, "loss": 0.0711, "step": 63910 }, { "epoch": 3.780682557520554, "grad_norm": 0.2757982611656189, "learning_rate": 2.143680455139148e-05, "loss": 0.0555, "step": 63920 }, { "epoch": 3.7812740285089017, "grad_norm": 0.2377144694328308, "learning_rate": 2.143420898775618e-05, "loss": 0.0706, "step": 63930 }, { "epoch": 3.7818654994972496, "grad_norm": 0.17772722244262695, "learning_rate": 2.143161318800614e-05, "loss": 0.0697, "step": 63940 }, { "epoch": 3.7824569704855975, "grad_norm": 0.16239339113235474, "learning_rate": 2.1429017152236616e-05, "loss": 0.066, "step": 63950 }, { "epoch": 3.783048441473946, "grad_norm": 0.26508304476737976, "learning_rate": 2.1426420880542875e-05, "loss": 0.0618, "step": 63960 }, { "epoch": 3.7836399124622937, "grad_norm": 0.2879500985145569, "learning_rate": 2.1423824373020193e-05, "loss": 0.0563, "step": 63970 }, { "epoch": 3.7842313834506416, "grad_norm": 0.30781620740890503, "learning_rate": 2.1421227629763858e-05, "loss": 0.0794, "step": 63980 }, { "epoch": 3.78482285443899, "grad_norm": 0.2605809271335602, "learning_rate": 2.1418630650869147e-05, "loss": 0.0727, "step": 63990 }, { "epoch": 3.785414325427338, "grad_norm": 0.2381453514099121, "learning_rate": 2.1416033436431378e-05, "loss": 0.0722, "step": 64000 }, { "epoch": 3.7860057964156857, "grad_norm": 0.4007439613342285, "learning_rate": 2.1413435986545853e-05, "loss": 0.0627, "step": 64010 }, { "epoch": 3.7865972674040336, "grad_norm": 0.22122621536254883, "learning_rate": 2.1410838301307884e-05, "loss": 0.0647, "step": 64020 }, { "epoch": 3.787188738392382, "grad_norm": 0.2947361469268799, "learning_rate": 2.140824038081281e-05, "loss": 0.0799, "step": 64030 }, { "epoch": 3.78778020938073, "grad_norm": 0.24338263273239136, "learning_rate": 2.1405642225155956e-05, "loss": 0.0736, "step": 64040 }, { "epoch": 3.788371680369078, "grad_norm": 0.27076900005340576, "learning_rate": 2.1403043834432674e-05, "loss": 0.0687, "step": 64050 }, { "epoch": 3.788963151357426, "grad_norm": 0.24690818786621094, "learning_rate": 2.140044520873831e-05, "loss": 0.0572, "step": 64060 }, { "epoch": 3.789554622345774, "grad_norm": 0.425102174282074, "learning_rate": 2.139784634816823e-05, "loss": 0.0628, "step": 64070 }, { "epoch": 3.790146093334122, "grad_norm": 0.1960875242948532, "learning_rate": 2.1395247252817803e-05, "loss": 0.081, "step": 64080 }, { "epoch": 3.79073756432247, "grad_norm": 0.21403539180755615, "learning_rate": 2.13926479227824e-05, "loss": 0.0733, "step": 64090 }, { "epoch": 3.791329035310818, "grad_norm": 0.2608579993247986, "learning_rate": 2.1390048358157417e-05, "loss": 0.0823, "step": 64100 }, { "epoch": 3.791920506299166, "grad_norm": 0.23252856731414795, "learning_rate": 2.138744855903825e-05, "loss": 0.0728, "step": 64110 }, { "epoch": 3.792511977287514, "grad_norm": 0.22878877818584442, "learning_rate": 2.1384848525520295e-05, "loss": 0.0579, "step": 64120 }, { "epoch": 3.793103448275862, "grad_norm": 0.1994292438030243, "learning_rate": 2.138224825769897e-05, "loss": 0.0839, "step": 64130 }, { "epoch": 3.79369491926421, "grad_norm": 0.32918670773506165, "learning_rate": 2.1379647755669696e-05, "loss": 0.0775, "step": 64140 }, { "epoch": 3.794286390252558, "grad_norm": 0.5312496423721313, "learning_rate": 2.1377047019527908e-05, "loss": 0.0697, "step": 64150 }, { "epoch": 3.794877861240906, "grad_norm": 0.32001879811286926, "learning_rate": 2.1374446049369037e-05, "loss": 0.0738, "step": 64160 }, { "epoch": 3.795469332229254, "grad_norm": 0.313335657119751, "learning_rate": 2.137184484528853e-05, "loss": 0.0616, "step": 64170 }, { "epoch": 3.7960608032176024, "grad_norm": 0.4979078471660614, "learning_rate": 2.1369243407381854e-05, "loss": 0.0784, "step": 64180 }, { "epoch": 3.7966522742059503, "grad_norm": 0.22955772280693054, "learning_rate": 2.136664173574446e-05, "loss": 0.0756, "step": 64190 }, { "epoch": 3.797243745194298, "grad_norm": 0.18120184540748596, "learning_rate": 2.1364039830471828e-05, "loss": 0.0642, "step": 64200 }, { "epoch": 3.797835216182646, "grad_norm": 0.20832939445972443, "learning_rate": 2.136143769165944e-05, "loss": 0.0651, "step": 64210 }, { "epoch": 3.7984266871709944, "grad_norm": 0.22112974524497986, "learning_rate": 2.1358835319402777e-05, "loss": 0.0561, "step": 64220 }, { "epoch": 3.7990181581593423, "grad_norm": 0.23851710557937622, "learning_rate": 2.1356232713797353e-05, "loss": 0.0741, "step": 64230 }, { "epoch": 3.79960962914769, "grad_norm": 0.28373780846595764, "learning_rate": 2.1353629874938662e-05, "loss": 0.0746, "step": 64240 }, { "epoch": 3.8002011001360385, "grad_norm": 0.21401335299015045, "learning_rate": 2.1351026802922226e-05, "loss": 0.0672, "step": 64250 }, { "epoch": 3.8007925711243864, "grad_norm": 0.18171276152133942, "learning_rate": 2.1348423497843567e-05, "loss": 0.0596, "step": 64260 }, { "epoch": 3.8013840421127343, "grad_norm": 0.261526882648468, "learning_rate": 2.1345819959798217e-05, "loss": 0.0509, "step": 64270 }, { "epoch": 3.801975513101082, "grad_norm": 0.30473795533180237, "learning_rate": 2.1343216188881723e-05, "loss": 0.0643, "step": 64280 }, { "epoch": 3.8025669840894305, "grad_norm": 0.26950785517692566, "learning_rate": 2.134061218518963e-05, "loss": 0.0838, "step": 64290 }, { "epoch": 3.8031584550777784, "grad_norm": 0.19257262349128723, "learning_rate": 2.13380079488175e-05, "loss": 0.0684, "step": 64300 }, { "epoch": 3.8037499260661267, "grad_norm": 0.27125492691993713, "learning_rate": 2.13354034798609e-05, "loss": 0.0721, "step": 64310 }, { "epoch": 3.8043413970544746, "grad_norm": 0.2683470547199249, "learning_rate": 2.13327987784154e-05, "loss": 0.0524, "step": 64320 }, { "epoch": 3.8049328680428225, "grad_norm": 0.3437110483646393, "learning_rate": 2.1330193844576588e-05, "loss": 0.0729, "step": 64330 }, { "epoch": 3.8055243390311704, "grad_norm": 0.34637531638145447, "learning_rate": 2.1327588678440058e-05, "loss": 0.0762, "step": 64340 }, { "epoch": 3.8061158100195187, "grad_norm": 0.35167384147644043, "learning_rate": 2.1324983280101405e-05, "loss": 0.0682, "step": 64350 }, { "epoch": 3.8067072810078666, "grad_norm": 0.24775245785713196, "learning_rate": 2.132237764965625e-05, "loss": 0.0714, "step": 64360 }, { "epoch": 3.8072987519962145, "grad_norm": 0.2238556146621704, "learning_rate": 2.13197717872002e-05, "loss": 0.0608, "step": 64370 }, { "epoch": 3.807890222984563, "grad_norm": 0.24395489692687988, "learning_rate": 2.131716569282889e-05, "loss": 0.0747, "step": 64380 }, { "epoch": 3.8084816939729107, "grad_norm": 0.2793270945549011, "learning_rate": 2.131455936663795e-05, "loss": 0.0732, "step": 64390 }, { "epoch": 3.8090731649612586, "grad_norm": 0.2783493101596832, "learning_rate": 2.1311952808723023e-05, "loss": 0.0618, "step": 64400 }, { "epoch": 3.8096646359496065, "grad_norm": 0.1538156419992447, "learning_rate": 2.130934601917977e-05, "loss": 0.0552, "step": 64410 }, { "epoch": 3.810256106937955, "grad_norm": 0.1720696985721588, "learning_rate": 2.1306738998103836e-05, "loss": 0.0538, "step": 64420 }, { "epoch": 3.8108475779263027, "grad_norm": 0.24749766290187836, "learning_rate": 2.130413174559091e-05, "loss": 0.0752, "step": 64430 }, { "epoch": 3.8114390489146506, "grad_norm": 0.23056212067604065, "learning_rate": 2.1301524261736655e-05, "loss": 0.0794, "step": 64440 }, { "epoch": 3.812030519902999, "grad_norm": 0.2768828868865967, "learning_rate": 2.129891654663676e-05, "loss": 0.0698, "step": 64450 }, { "epoch": 3.812621990891347, "grad_norm": 0.243658185005188, "learning_rate": 2.1296308600386924e-05, "loss": 0.0639, "step": 64460 }, { "epoch": 3.8132134618796947, "grad_norm": 0.19607380032539368, "learning_rate": 2.1293700423082846e-05, "loss": 0.0601, "step": 64470 }, { "epoch": 3.8138049328680426, "grad_norm": 0.25299131870269775, "learning_rate": 2.1291092014820242e-05, "loss": 0.0683, "step": 64480 }, { "epoch": 3.814396403856391, "grad_norm": 0.22214533388614655, "learning_rate": 2.128848337569483e-05, "loss": 0.0718, "step": 64490 }, { "epoch": 3.814987874844739, "grad_norm": 0.18954673409461975, "learning_rate": 2.1285874505802337e-05, "loss": 0.0689, "step": 64500 }, { "epoch": 3.815579345833087, "grad_norm": 0.2543029189109802, "learning_rate": 2.12832654052385e-05, "loss": 0.0726, "step": 64510 }, { "epoch": 3.816170816821435, "grad_norm": 0.201517254114151, "learning_rate": 2.128065607409907e-05, "loss": 0.061, "step": 64520 }, { "epoch": 3.816762287809783, "grad_norm": 0.6927186846733093, "learning_rate": 2.1278046512479796e-05, "loss": 0.0718, "step": 64530 }, { "epoch": 3.8173537587981308, "grad_norm": 0.22608432173728943, "learning_rate": 2.127543672047644e-05, "loss": 0.0746, "step": 64540 }, { "epoch": 3.817945229786479, "grad_norm": 0.2443782538175583, "learning_rate": 2.1272826698184774e-05, "loss": 0.0574, "step": 64550 }, { "epoch": 3.818536700774827, "grad_norm": 0.2500171661376953, "learning_rate": 2.127021644570058e-05, "loss": 0.0726, "step": 64560 }, { "epoch": 3.819128171763175, "grad_norm": 0.22444359958171844, "learning_rate": 2.1267605963119643e-05, "loss": 0.0518, "step": 64570 }, { "epoch": 3.819719642751523, "grad_norm": 0.3562575876712799, "learning_rate": 2.126499525053776e-05, "loss": 0.0812, "step": 64580 }, { "epoch": 3.820311113739871, "grad_norm": 0.2829638719558716, "learning_rate": 2.1262384308050736e-05, "loss": 0.0642, "step": 64590 }, { "epoch": 3.820902584728219, "grad_norm": 0.3366457521915436, "learning_rate": 2.1259773135754383e-05, "loss": 0.0718, "step": 64600 }, { "epoch": 3.821494055716567, "grad_norm": 0.24861739575862885, "learning_rate": 2.1257161733744524e-05, "loss": 0.0633, "step": 64610 }, { "epoch": 3.822085526704915, "grad_norm": 0.22691696882247925, "learning_rate": 2.125455010211699e-05, "loss": 0.0515, "step": 64620 }, { "epoch": 3.822676997693263, "grad_norm": 0.3191843628883362, "learning_rate": 2.1251938240967615e-05, "loss": 0.0754, "step": 64630 }, { "epoch": 3.8232684686816114, "grad_norm": 0.2371244579553604, "learning_rate": 2.1249326150392256e-05, "loss": 0.0713, "step": 64640 }, { "epoch": 3.8238599396699593, "grad_norm": 0.19440971314907074, "learning_rate": 2.1246713830486758e-05, "loss": 0.0653, "step": 64650 }, { "epoch": 3.824451410658307, "grad_norm": 0.2795215845108032, "learning_rate": 2.1244101281346986e-05, "loss": 0.0687, "step": 64660 }, { "epoch": 3.825042881646655, "grad_norm": 0.24997767806053162, "learning_rate": 2.1241488503068812e-05, "loss": 0.0696, "step": 64670 }, { "epoch": 3.8256343526350034, "grad_norm": 0.2604724168777466, "learning_rate": 2.1238875495748126e-05, "loss": 0.0828, "step": 64680 }, { "epoch": 3.8262258236233513, "grad_norm": 0.27836719155311584, "learning_rate": 2.1236262259480806e-05, "loss": 0.0767, "step": 64690 }, { "epoch": 3.826817294611699, "grad_norm": 0.3011128604412079, "learning_rate": 2.1233648794362752e-05, "loss": 0.0758, "step": 64700 }, { "epoch": 3.8274087656000475, "grad_norm": 0.2315540909767151, "learning_rate": 2.123103510048987e-05, "loss": 0.0702, "step": 64710 }, { "epoch": 3.8280002365883954, "grad_norm": 0.2830379605293274, "learning_rate": 2.1228421177958077e-05, "loss": 0.0608, "step": 64720 }, { "epoch": 3.8285917075767433, "grad_norm": 0.4276424050331116, "learning_rate": 2.1225807026863293e-05, "loss": 0.0853, "step": 64730 }, { "epoch": 3.829183178565091, "grad_norm": 0.33570367097854614, "learning_rate": 2.1223192647301452e-05, "loss": 0.074, "step": 64740 }, { "epoch": 3.8297746495534395, "grad_norm": 0.2538841664791107, "learning_rate": 2.122057803936849e-05, "loss": 0.0739, "step": 64750 }, { "epoch": 3.8303661205417874, "grad_norm": 0.33998405933380127, "learning_rate": 2.121796320316035e-05, "loss": 0.0757, "step": 64760 }, { "epoch": 3.8309575915301357, "grad_norm": 0.2997061610221863, "learning_rate": 2.1215348138772994e-05, "loss": 0.0546, "step": 64770 }, { "epoch": 3.8315490625184836, "grad_norm": 0.265749454498291, "learning_rate": 2.121273284630239e-05, "loss": 0.0738, "step": 64780 }, { "epoch": 3.8321405335068315, "grad_norm": 0.2275078296661377, "learning_rate": 2.121011732584451e-05, "loss": 0.0671, "step": 64790 }, { "epoch": 3.8327320044951794, "grad_norm": 0.40002819895744324, "learning_rate": 2.1207501577495326e-05, "loss": 0.0759, "step": 64800 }, { "epoch": 3.8333234754835277, "grad_norm": 0.3239554464817047, "learning_rate": 2.1204885601350837e-05, "loss": 0.0717, "step": 64810 }, { "epoch": 3.8339149464718756, "grad_norm": 0.17629894614219666, "learning_rate": 2.1202269397507037e-05, "loss": 0.0508, "step": 64820 }, { "epoch": 3.8345064174602235, "grad_norm": 0.2622445523738861, "learning_rate": 2.119965296605993e-05, "loss": 0.073, "step": 64830 }, { "epoch": 3.835097888448572, "grad_norm": 0.25863128900527954, "learning_rate": 2.1197036307105536e-05, "loss": 0.0709, "step": 64840 }, { "epoch": 3.8356893594369197, "grad_norm": 0.2236400842666626, "learning_rate": 2.1194419420739873e-05, "loss": 0.0744, "step": 64850 }, { "epoch": 3.8362808304252676, "grad_norm": 0.22137081623077393, "learning_rate": 2.1191802307058972e-05, "loss": 0.0552, "step": 64860 }, { "epoch": 3.8368723014136155, "grad_norm": 0.2444639503955841, "learning_rate": 2.118918496615888e-05, "loss": 0.0585, "step": 64870 }, { "epoch": 3.837463772401964, "grad_norm": 0.4595615863800049, "learning_rate": 2.118656739813564e-05, "loss": 0.0876, "step": 64880 }, { "epoch": 3.8380552433903117, "grad_norm": 0.2581709325313568, "learning_rate": 2.1183949603085307e-05, "loss": 0.0825, "step": 64890 }, { "epoch": 3.8386467143786596, "grad_norm": 0.21132990717887878, "learning_rate": 2.1181331581103946e-05, "loss": 0.0617, "step": 64900 }, { "epoch": 3.839238185367008, "grad_norm": 0.22572308778762817, "learning_rate": 2.117871333228763e-05, "loss": 0.0535, "step": 64910 }, { "epoch": 3.839829656355356, "grad_norm": 0.26207366585731506, "learning_rate": 2.1176094856732446e-05, "loss": 0.0563, "step": 64920 }, { "epoch": 3.8404211273437037, "grad_norm": 0.9074767231941223, "learning_rate": 2.1173476154534474e-05, "loss": 0.0852, "step": 64930 }, { "epoch": 3.8410125983320516, "grad_norm": 0.3089481592178345, "learning_rate": 2.117085722578982e-05, "loss": 0.0739, "step": 64940 }, { "epoch": 3.8416040693204, "grad_norm": 0.275326132774353, "learning_rate": 2.1168238070594584e-05, "loss": 0.0683, "step": 64950 }, { "epoch": 3.8421955403087478, "grad_norm": 0.1945675164461136, "learning_rate": 2.1165618689044886e-05, "loss": 0.0595, "step": 64960 }, { "epoch": 3.842787011297096, "grad_norm": 0.20304711163043976, "learning_rate": 2.1162999081236847e-05, "loss": 0.0568, "step": 64970 }, { "epoch": 3.843378482285444, "grad_norm": 0.2852213680744171, "learning_rate": 2.11603792472666e-05, "loss": 0.0841, "step": 64980 }, { "epoch": 3.843969953273792, "grad_norm": 0.32307979464530945, "learning_rate": 2.1157759187230277e-05, "loss": 0.0721, "step": 64990 }, { "epoch": 3.8445614242621398, "grad_norm": 0.5863808393478394, "learning_rate": 2.1155138901224032e-05, "loss": 0.0635, "step": 65000 }, { "epoch": 3.845152895250488, "grad_norm": 0.22122113406658173, "learning_rate": 2.115251838934402e-05, "loss": 0.0652, "step": 65010 }, { "epoch": 3.845744366238836, "grad_norm": 0.2426154911518097, "learning_rate": 2.1149897651686408e-05, "loss": 0.0632, "step": 65020 }, { "epoch": 3.846335837227184, "grad_norm": 0.34315991401672363, "learning_rate": 2.1147276688347363e-05, "loss": 0.072, "step": 65030 }, { "epoch": 3.846927308215532, "grad_norm": 0.3055356442928314, "learning_rate": 2.114465549942307e-05, "loss": 0.073, "step": 65040 }, { "epoch": 3.84751877920388, "grad_norm": 0.24016669392585754, "learning_rate": 2.114203408500972e-05, "loss": 0.0711, "step": 65050 }, { "epoch": 3.848110250192228, "grad_norm": 0.25051677227020264, "learning_rate": 2.11394124452035e-05, "loss": 0.0673, "step": 65060 }, { "epoch": 3.848701721180576, "grad_norm": 0.21586255729198456, "learning_rate": 2.1136790580100627e-05, "loss": 0.0531, "step": 65070 }, { "epoch": 3.849293192168924, "grad_norm": 0.2067125290632248, "learning_rate": 2.1134168489797318e-05, "loss": 0.0758, "step": 65080 }, { "epoch": 3.849884663157272, "grad_norm": 0.20519743859767914, "learning_rate": 2.113154617438978e-05, "loss": 0.0771, "step": 65090 }, { "epoch": 3.8504761341456204, "grad_norm": 0.24298231303691864, "learning_rate": 2.112892363397425e-05, "loss": 0.0695, "step": 65100 }, { "epoch": 3.8510676051339683, "grad_norm": 0.34905773401260376, "learning_rate": 2.1126300868646976e-05, "loss": 0.0673, "step": 65110 }, { "epoch": 3.851659076122316, "grad_norm": 0.2461731880903244, "learning_rate": 2.11236778785042e-05, "loss": 0.0506, "step": 65120 }, { "epoch": 3.852250547110664, "grad_norm": 0.2061251550912857, "learning_rate": 2.1121054663642167e-05, "loss": 0.0647, "step": 65130 }, { "epoch": 3.8528420180990124, "grad_norm": 0.2936934232711792, "learning_rate": 2.111843122415715e-05, "loss": 0.0718, "step": 65140 }, { "epoch": 3.8534334890873603, "grad_norm": 0.21419866383075714, "learning_rate": 2.1115807560145424e-05, "loss": 0.0641, "step": 65150 }, { "epoch": 3.854024960075708, "grad_norm": 0.32433632016181946, "learning_rate": 2.111318367170326e-05, "loss": 0.0645, "step": 65160 }, { "epoch": 3.8546164310640565, "grad_norm": 0.20806702971458435, "learning_rate": 2.111055955892695e-05, "loss": 0.0596, "step": 65170 }, { "epoch": 3.8552079020524044, "grad_norm": 0.296994686126709, "learning_rate": 2.11079352219128e-05, "loss": 0.0846, "step": 65180 }, { "epoch": 3.8557993730407523, "grad_norm": 0.22754783928394318, "learning_rate": 2.1105310660757097e-05, "loss": 0.0697, "step": 65190 }, { "epoch": 3.8563908440291, "grad_norm": 0.20269489288330078, "learning_rate": 2.1102685875556164e-05, "loss": 0.0689, "step": 65200 }, { "epoch": 3.8569823150174485, "grad_norm": 0.2813214957714081, "learning_rate": 2.1100060866406324e-05, "loss": 0.0648, "step": 65210 }, { "epoch": 3.8575737860057964, "grad_norm": 0.240859255194664, "learning_rate": 2.1097435633403904e-05, "loss": 0.0544, "step": 65220 }, { "epoch": 3.8581652569941447, "grad_norm": 0.26871269941329956, "learning_rate": 2.109481017664524e-05, "loss": 0.075, "step": 65230 }, { "epoch": 3.8587567279824926, "grad_norm": 0.22623758018016815, "learning_rate": 2.1092184496226674e-05, "loss": 0.0743, "step": 65240 }, { "epoch": 3.8593481989708405, "grad_norm": 0.1929170787334442, "learning_rate": 2.1089558592244573e-05, "loss": 0.0668, "step": 65250 }, { "epoch": 3.8599396699591884, "grad_norm": 0.20628389716148376, "learning_rate": 2.1086932464795283e-05, "loss": 0.0665, "step": 65260 }, { "epoch": 3.8605311409475367, "grad_norm": 0.17941322922706604, "learning_rate": 2.108430611397519e-05, "loss": 0.0613, "step": 65270 }, { "epoch": 3.8611226119358846, "grad_norm": 0.2660290598869324, "learning_rate": 2.1081679539880658e-05, "loss": 0.0792, "step": 65280 }, { "epoch": 3.8617140829242325, "grad_norm": 0.26082465052604675, "learning_rate": 2.1079052742608084e-05, "loss": 0.0697, "step": 65290 }, { "epoch": 3.862305553912581, "grad_norm": 0.305105596780777, "learning_rate": 2.107642572225386e-05, "loss": 0.0742, "step": 65300 }, { "epoch": 3.8628970249009287, "grad_norm": 0.3124620020389557, "learning_rate": 2.107379847891439e-05, "loss": 0.0693, "step": 65310 }, { "epoch": 3.8634884958892766, "grad_norm": 0.4111160337924957, "learning_rate": 2.1071171012686083e-05, "loss": 0.0711, "step": 65320 }, { "epoch": 3.8640799668776245, "grad_norm": 0.4673391282558441, "learning_rate": 2.1068543323665358e-05, "loss": 0.09, "step": 65330 }, { "epoch": 3.864671437865973, "grad_norm": 0.3263487219810486, "learning_rate": 2.106591541194865e-05, "loss": 0.0823, "step": 65340 }, { "epoch": 3.8652629088543207, "grad_norm": 0.30772697925567627, "learning_rate": 2.106328727763239e-05, "loss": 0.0722, "step": 65350 }, { "epoch": 3.8658543798426686, "grad_norm": 0.3129044771194458, "learning_rate": 2.1060658920813012e-05, "loss": 0.0629, "step": 65360 }, { "epoch": 3.866445850831017, "grad_norm": 0.32415929436683655, "learning_rate": 2.1058030341586984e-05, "loss": 0.0664, "step": 65370 }, { "epoch": 3.8670373218193648, "grad_norm": 0.2095683068037033, "learning_rate": 2.1055401540050763e-05, "loss": 0.0723, "step": 65380 }, { "epoch": 3.8676287928077127, "grad_norm": 0.31088879704475403, "learning_rate": 2.1052772516300808e-05, "loss": 0.0693, "step": 65390 }, { "epoch": 3.8682202637960605, "grad_norm": 0.2147226780653, "learning_rate": 2.105014327043361e-05, "loss": 0.0705, "step": 65400 }, { "epoch": 3.868811734784409, "grad_norm": 0.213846817612648, "learning_rate": 2.1047513802545642e-05, "loss": 0.0567, "step": 65410 }, { "epoch": 3.8694032057727568, "grad_norm": 0.25029224157333374, "learning_rate": 2.1044884112733406e-05, "loss": 0.0616, "step": 65420 }, { "epoch": 3.869994676761105, "grad_norm": 0.3534281849861145, "learning_rate": 2.1042254201093388e-05, "loss": 0.0801, "step": 65430 }, { "epoch": 3.870586147749453, "grad_norm": 0.1991526484489441, "learning_rate": 2.1039624067722117e-05, "loss": 0.0861, "step": 65440 }, { "epoch": 3.871177618737801, "grad_norm": 0.17734092473983765, "learning_rate": 2.1036993712716103e-05, "loss": 0.0736, "step": 65450 }, { "epoch": 3.8717690897261487, "grad_norm": 0.1626678705215454, "learning_rate": 2.103436313617186e-05, "loss": 0.0591, "step": 65460 }, { "epoch": 3.872360560714497, "grad_norm": 0.19771645963191986, "learning_rate": 2.103173233818594e-05, "loss": 0.0633, "step": 65470 }, { "epoch": 3.872952031702845, "grad_norm": 0.26948872208595276, "learning_rate": 2.102910131885487e-05, "loss": 0.0696, "step": 65480 }, { "epoch": 3.873543502691193, "grad_norm": 0.26339662075042725, "learning_rate": 2.1026470078275213e-05, "loss": 0.0766, "step": 65490 }, { "epoch": 3.874134973679541, "grad_norm": 0.2735792100429535, "learning_rate": 2.1023838616543516e-05, "loss": 0.0694, "step": 65500 }, { "epoch": 3.874726444667889, "grad_norm": 0.20825667679309845, "learning_rate": 2.1021206933756352e-05, "loss": 0.0661, "step": 65510 }, { "epoch": 3.875317915656237, "grad_norm": 0.23494525253772736, "learning_rate": 2.1018575030010286e-05, "loss": 0.0545, "step": 65520 }, { "epoch": 3.875909386644585, "grad_norm": 0.29693078994750977, "learning_rate": 2.1015942905401915e-05, "loss": 0.0853, "step": 65530 }, { "epoch": 3.876500857632933, "grad_norm": 0.23521152138710022, "learning_rate": 2.1013310560027815e-05, "loss": 0.0693, "step": 65540 }, { "epoch": 3.877092328621281, "grad_norm": 0.35304710268974304, "learning_rate": 2.1010677993984597e-05, "loss": 0.0687, "step": 65550 }, { "epoch": 3.8776837996096294, "grad_norm": 0.2586324214935303, "learning_rate": 2.1008045207368857e-05, "loss": 0.0641, "step": 65560 }, { "epoch": 3.8782752705979773, "grad_norm": 0.210626021027565, "learning_rate": 2.1005412200277217e-05, "loss": 0.0527, "step": 65570 }, { "epoch": 3.878866741586325, "grad_norm": 0.3372817933559418, "learning_rate": 2.10027789728063e-05, "loss": 0.0795, "step": 65580 }, { "epoch": 3.879458212574673, "grad_norm": 0.3141489326953888, "learning_rate": 2.1000145525052728e-05, "loss": 0.0714, "step": 65590 }, { "epoch": 3.8800496835630214, "grad_norm": 0.16981101036071777, "learning_rate": 2.099751185711315e-05, "loss": 0.065, "step": 65600 }, { "epoch": 3.8806411545513693, "grad_norm": 0.2386074811220169, "learning_rate": 2.0994877969084213e-05, "loss": 0.0671, "step": 65610 }, { "epoch": 3.881232625539717, "grad_norm": 0.30751821398735046, "learning_rate": 2.0992243861062564e-05, "loss": 0.0563, "step": 65620 }, { "epoch": 3.8818240965280655, "grad_norm": 0.30161911249160767, "learning_rate": 2.0989609533144876e-05, "loss": 0.0697, "step": 65630 }, { "epoch": 3.8824155675164134, "grad_norm": 0.266002893447876, "learning_rate": 2.0986974985427807e-05, "loss": 0.0683, "step": 65640 }, { "epoch": 3.8830070385047613, "grad_norm": 0.19302010536193848, "learning_rate": 2.0984340218008054e-05, "loss": 0.0745, "step": 65650 }, { "epoch": 3.883598509493109, "grad_norm": 0.20562180876731873, "learning_rate": 2.0981705230982293e-05, "loss": 0.0648, "step": 65660 }, { "epoch": 3.8841899804814575, "grad_norm": 0.18829233944416046, "learning_rate": 2.0979070024447216e-05, "loss": 0.0576, "step": 65670 }, { "epoch": 3.8847814514698054, "grad_norm": 0.36115533113479614, "learning_rate": 2.097643459849954e-05, "loss": 0.0866, "step": 65680 }, { "epoch": 3.8853729224581537, "grad_norm": 0.2083430141210556, "learning_rate": 2.0973798953235964e-05, "loss": 0.0714, "step": 65690 }, { "epoch": 3.8859643934465016, "grad_norm": 0.22493578493595123, "learning_rate": 2.097116308875322e-05, "loss": 0.0711, "step": 65700 }, { "epoch": 3.8865558644348495, "grad_norm": 0.17710520327091217, "learning_rate": 2.096852700514802e-05, "loss": 0.0681, "step": 65710 }, { "epoch": 3.8871473354231973, "grad_norm": 0.2473973035812378, "learning_rate": 2.0965890702517113e-05, "loss": 0.0626, "step": 65720 }, { "epoch": 3.8877388064115457, "grad_norm": 0.26430022716522217, "learning_rate": 2.0963254180957236e-05, "loss": 0.0774, "step": 65730 }, { "epoch": 3.8883302773998936, "grad_norm": 0.1915067732334137, "learning_rate": 2.0960617440565143e-05, "loss": 0.064, "step": 65740 }, { "epoch": 3.8889217483882415, "grad_norm": 0.3544027507305145, "learning_rate": 2.0957980481437593e-05, "loss": 0.0688, "step": 65750 }, { "epoch": 3.88951321937659, "grad_norm": 0.28570225834846497, "learning_rate": 2.095534330367136e-05, "loss": 0.0623, "step": 65760 }, { "epoch": 3.8901046903649377, "grad_norm": 0.20553217828273773, "learning_rate": 2.0952705907363208e-05, "loss": 0.0609, "step": 65770 }, { "epoch": 3.8906961613532856, "grad_norm": 0.3101552724838257, "learning_rate": 2.095006829260993e-05, "loss": 0.0755, "step": 65780 }, { "epoch": 3.8912876323416334, "grad_norm": 0.24017967283725739, "learning_rate": 2.0947430459508324e-05, "loss": 0.0816, "step": 65790 }, { "epoch": 3.8918791033299818, "grad_norm": 0.222295343875885, "learning_rate": 2.094479240815517e-05, "loss": 0.0775, "step": 65800 }, { "epoch": 3.8924705743183297, "grad_norm": 0.3045780062675476, "learning_rate": 2.094215413864729e-05, "loss": 0.0733, "step": 65810 }, { "epoch": 3.8930620453066775, "grad_norm": 0.1848149597644806, "learning_rate": 2.09395156510815e-05, "loss": 0.0632, "step": 65820 }, { "epoch": 3.893653516295026, "grad_norm": 0.2965365946292877, "learning_rate": 2.0936876945554623e-05, "loss": 0.0813, "step": 65830 }, { "epoch": 3.8942449872833738, "grad_norm": 0.27895140647888184, "learning_rate": 2.0934238022163485e-05, "loss": 0.0771, "step": 65840 }, { "epoch": 3.8948364582717216, "grad_norm": 0.208653524518013, "learning_rate": 2.0931598881004934e-05, "loss": 0.0786, "step": 65850 }, { "epoch": 3.8954279292600695, "grad_norm": 0.20450332760810852, "learning_rate": 2.0928959522175815e-05, "loss": 0.0655, "step": 65860 }, { "epoch": 3.896019400248418, "grad_norm": 0.18443447351455688, "learning_rate": 2.092631994577298e-05, "loss": 0.0549, "step": 65870 }, { "epoch": 3.8966108712367657, "grad_norm": 0.31327494978904724, "learning_rate": 2.09236801518933e-05, "loss": 0.0791, "step": 65880 }, { "epoch": 3.897202342225114, "grad_norm": 0.3314499855041504, "learning_rate": 2.092104014063364e-05, "loss": 0.0638, "step": 65890 }, { "epoch": 3.897793813213462, "grad_norm": 0.32407450675964355, "learning_rate": 2.0918399912090885e-05, "loss": 0.0689, "step": 65900 }, { "epoch": 3.89838528420181, "grad_norm": 0.3767249286174774, "learning_rate": 2.0915759466361923e-05, "loss": 0.067, "step": 65910 }, { "epoch": 3.8989767551901577, "grad_norm": 0.17987526953220367, "learning_rate": 2.0913118803543644e-05, "loss": 0.0523, "step": 65920 }, { "epoch": 3.899568226178506, "grad_norm": 0.29902195930480957, "learning_rate": 2.091047792373296e-05, "loss": 0.0707, "step": 65930 }, { "epoch": 3.900159697166854, "grad_norm": 0.29753485321998596, "learning_rate": 2.0907836827026773e-05, "loss": 0.0706, "step": 65940 }, { "epoch": 3.900751168155202, "grad_norm": 0.25893324613571167, "learning_rate": 2.0905195513522012e-05, "loss": 0.0775, "step": 65950 }, { "epoch": 3.90134263914355, "grad_norm": 0.19125695526599884, "learning_rate": 2.0902553983315605e-05, "loss": 0.0642, "step": 65960 }, { "epoch": 3.901934110131898, "grad_norm": 0.237985298037529, "learning_rate": 2.0899912236504476e-05, "loss": 0.0647, "step": 65970 }, { "epoch": 3.902525581120246, "grad_norm": 0.23815812170505524, "learning_rate": 2.0897270273185583e-05, "loss": 0.0777, "step": 65980 }, { "epoch": 3.903117052108594, "grad_norm": 0.2923356592655182, "learning_rate": 2.0894628093455867e-05, "loss": 0.0754, "step": 65990 }, { "epoch": 3.903708523096942, "grad_norm": 0.29542645812034607, "learning_rate": 2.089198569741229e-05, "loss": 0.0793, "step": 66000 }, { "epoch": 3.90429999408529, "grad_norm": 0.16065479815006256, "learning_rate": 2.0889343085151823e-05, "loss": 0.0603, "step": 66010 }, { "epoch": 3.9048914650736384, "grad_norm": 0.28118640184402466, "learning_rate": 2.088670025677144e-05, "loss": 0.0557, "step": 66020 }, { "epoch": 3.9054829360619863, "grad_norm": 0.31541791558265686, "learning_rate": 2.0884057212368122e-05, "loss": 0.0705, "step": 66030 }, { "epoch": 3.906074407050334, "grad_norm": 0.3329617977142334, "learning_rate": 2.0881413952038858e-05, "loss": 0.0731, "step": 66040 }, { "epoch": 3.906665878038682, "grad_norm": 0.2941540777683258, "learning_rate": 2.0878770475880656e-05, "loss": 0.0742, "step": 66050 }, { "epoch": 3.9072573490270304, "grad_norm": 0.30528223514556885, "learning_rate": 2.0876126783990517e-05, "loss": 0.0676, "step": 66060 }, { "epoch": 3.9078488200153783, "grad_norm": 0.18354372680187225, "learning_rate": 2.087348287646545e-05, "loss": 0.0534, "step": 66070 }, { "epoch": 3.908440291003726, "grad_norm": 0.3917810320854187, "learning_rate": 2.0870838753402495e-05, "loss": 0.0819, "step": 66080 }, { "epoch": 3.9090317619920745, "grad_norm": 0.22146831452846527, "learning_rate": 2.086819441489867e-05, "loss": 0.0666, "step": 66090 }, { "epoch": 3.9096232329804224, "grad_norm": 0.35392552614212036, "learning_rate": 2.0865549861051013e-05, "loss": 0.0861, "step": 66100 }, { "epoch": 3.9102147039687702, "grad_norm": 0.21558359265327454, "learning_rate": 2.0862905091956577e-05, "loss": 0.069, "step": 66110 }, { "epoch": 3.910806174957118, "grad_norm": 0.2742803394794464, "learning_rate": 2.086026010771241e-05, "loss": 0.0532, "step": 66120 }, { "epoch": 3.9113976459454665, "grad_norm": 0.39695432782173157, "learning_rate": 2.085761490841558e-05, "loss": 0.0775, "step": 66130 }, { "epoch": 3.9119891169338143, "grad_norm": 0.21444222331047058, "learning_rate": 2.085496949416315e-05, "loss": 0.0743, "step": 66140 }, { "epoch": 3.9125805879221627, "grad_norm": 0.308633029460907, "learning_rate": 2.085232386505221e-05, "loss": 0.0637, "step": 66150 }, { "epoch": 3.9131720589105106, "grad_norm": 0.28289151191711426, "learning_rate": 2.0849678021179833e-05, "loss": 0.0737, "step": 66160 }, { "epoch": 3.9137635298988585, "grad_norm": 0.2342022806406021, "learning_rate": 2.0847031962643123e-05, "loss": 0.0493, "step": 66170 }, { "epoch": 3.9143550008872063, "grad_norm": 0.3027864098548889, "learning_rate": 2.0844385689539177e-05, "loss": 0.0833, "step": 66180 }, { "epoch": 3.9149464718755547, "grad_norm": 0.2310299277305603, "learning_rate": 2.084173920196511e-05, "loss": 0.0787, "step": 66190 }, { "epoch": 3.9155379428639026, "grad_norm": 0.37598931789398193, "learning_rate": 2.0839092500018026e-05, "loss": 0.0682, "step": 66200 }, { "epoch": 3.9161294138522504, "grad_norm": 0.26870495080947876, "learning_rate": 2.0836445583795066e-05, "loss": 0.0745, "step": 66210 }, { "epoch": 3.9167208848405988, "grad_norm": 0.25085383653640747, "learning_rate": 2.0833798453393357e-05, "loss": 0.0548, "step": 66220 }, { "epoch": 3.9173123558289467, "grad_norm": 0.24978363513946533, "learning_rate": 2.0831151108910037e-05, "loss": 0.0703, "step": 66230 }, { "epoch": 3.9179038268172945, "grad_norm": 0.248252734541893, "learning_rate": 2.0828503550442257e-05, "loss": 0.0736, "step": 66240 }, { "epoch": 3.9184952978056424, "grad_norm": 0.18309995532035828, "learning_rate": 2.0825855778087183e-05, "loss": 0.0614, "step": 66250 }, { "epoch": 3.9190867687939908, "grad_norm": 0.1854497492313385, "learning_rate": 2.0823207791941965e-05, "loss": 0.0586, "step": 66260 }, { "epoch": 3.9196782397823386, "grad_norm": 0.2144472599029541, "learning_rate": 2.0820559592103782e-05, "loss": 0.0559, "step": 66270 }, { "epoch": 3.9202697107706865, "grad_norm": 0.3568789064884186, "learning_rate": 2.0817911178669816e-05, "loss": 0.0698, "step": 66280 }, { "epoch": 3.920861181759035, "grad_norm": 0.2828274369239807, "learning_rate": 2.0815262551737258e-05, "loss": 0.074, "step": 66290 }, { "epoch": 3.9214526527473827, "grad_norm": 0.2765793204307556, "learning_rate": 2.0812613711403294e-05, "loss": 0.0716, "step": 66300 }, { "epoch": 3.9220441237357306, "grad_norm": 0.1750188022851944, "learning_rate": 2.0809964657765133e-05, "loss": 0.0587, "step": 66310 }, { "epoch": 3.9226355947240785, "grad_norm": 0.20416942238807678, "learning_rate": 2.0807315390919994e-05, "loss": 0.0585, "step": 66320 }, { "epoch": 3.923227065712427, "grad_norm": 0.19627468287944794, "learning_rate": 2.0804665910965085e-05, "loss": 0.072, "step": 66330 }, { "epoch": 3.9238185367007747, "grad_norm": 0.3675816059112549, "learning_rate": 2.080201621799764e-05, "loss": 0.078, "step": 66340 }, { "epoch": 3.924410007689123, "grad_norm": 0.22820928692817688, "learning_rate": 2.0799366312114897e-05, "loss": 0.0692, "step": 66350 }, { "epoch": 3.925001478677471, "grad_norm": 0.22373361885547638, "learning_rate": 2.079671619341409e-05, "loss": 0.0711, "step": 66360 }, { "epoch": 3.925592949665819, "grad_norm": 0.2053881585597992, "learning_rate": 2.0794065861992473e-05, "loss": 0.0493, "step": 66370 }, { "epoch": 3.9261844206541667, "grad_norm": 0.5949210524559021, "learning_rate": 2.0791415317947308e-05, "loss": 0.0764, "step": 66380 }, { "epoch": 3.926775891642515, "grad_norm": 0.19620198011398315, "learning_rate": 2.078876456137586e-05, "loss": 0.0656, "step": 66390 }, { "epoch": 3.927367362630863, "grad_norm": 0.22809258103370667, "learning_rate": 2.0786113592375405e-05, "loss": 0.0769, "step": 66400 }, { "epoch": 3.927958833619211, "grad_norm": 0.3042246997356415, "learning_rate": 2.0783462411043224e-05, "loss": 0.0737, "step": 66410 }, { "epoch": 3.928550304607559, "grad_norm": 0.3629584312438965, "learning_rate": 2.0780811017476603e-05, "loss": 0.0621, "step": 66420 }, { "epoch": 3.929141775595907, "grad_norm": 0.27229711413383484, "learning_rate": 2.0778159411772838e-05, "loss": 0.0733, "step": 66430 }, { "epoch": 3.929733246584255, "grad_norm": 0.2951166033744812, "learning_rate": 2.077550759402924e-05, "loss": 0.0727, "step": 66440 }, { "epoch": 3.930324717572603, "grad_norm": 0.31022441387176514, "learning_rate": 2.077285556434313e-05, "loss": 0.0784, "step": 66450 }, { "epoch": 3.930916188560951, "grad_norm": 0.5360049605369568, "learning_rate": 2.0770203322811813e-05, "loss": 0.0659, "step": 66460 }, { "epoch": 3.931507659549299, "grad_norm": 0.17195287346839905, "learning_rate": 2.076755086953262e-05, "loss": 0.0681, "step": 66470 }, { "epoch": 3.9320991305376474, "grad_norm": 0.2273092120885849, "learning_rate": 2.0764898204602902e-05, "loss": 0.0668, "step": 66480 }, { "epoch": 3.9326906015259953, "grad_norm": 0.19054925441741943, "learning_rate": 2.0762245328119987e-05, "loss": 0.0773, "step": 66490 }, { "epoch": 3.933282072514343, "grad_norm": 0.1974085420370102, "learning_rate": 2.0759592240181235e-05, "loss": 0.0787, "step": 66500 }, { "epoch": 3.933873543502691, "grad_norm": 0.2397196888923645, "learning_rate": 2.0756938940884003e-05, "loss": 0.062, "step": 66510 }, { "epoch": 3.9344650144910394, "grad_norm": 0.2689460515975952, "learning_rate": 2.075428543032566e-05, "loss": 0.0554, "step": 66520 }, { "epoch": 3.9350564854793872, "grad_norm": 0.33778688311576843, "learning_rate": 2.075163170860358e-05, "loss": 0.0732, "step": 66530 }, { "epoch": 3.935647956467735, "grad_norm": 0.3708600699901581, "learning_rate": 2.074897777581515e-05, "loss": 0.0831, "step": 66540 }, { "epoch": 3.9362394274560835, "grad_norm": 0.21802349388599396, "learning_rate": 2.0746323632057755e-05, "loss": 0.06, "step": 66550 }, { "epoch": 3.9368308984444313, "grad_norm": 0.24901986122131348, "learning_rate": 2.0743669277428797e-05, "loss": 0.0685, "step": 66560 }, { "epoch": 3.9374223694327792, "grad_norm": 0.2304365634918213, "learning_rate": 2.074101471202568e-05, "loss": 0.0537, "step": 66570 }, { "epoch": 3.938013840421127, "grad_norm": 0.3002395033836365, "learning_rate": 2.0738359935945822e-05, "loss": 0.0691, "step": 66580 }, { "epoch": 3.9386053114094755, "grad_norm": 0.24480144679546356, "learning_rate": 2.0735704949286642e-05, "loss": 0.07, "step": 66590 }, { "epoch": 3.9391967823978233, "grad_norm": 0.2460169494152069, "learning_rate": 2.0733049752145566e-05, "loss": 0.0797, "step": 66600 }, { "epoch": 3.9397882533861717, "grad_norm": 0.1799476146697998, "learning_rate": 2.073039434462004e-05, "loss": 0.0605, "step": 66610 }, { "epoch": 3.9403797243745196, "grad_norm": 0.22519990801811218, "learning_rate": 2.0727738726807502e-05, "loss": 0.0506, "step": 66620 }, { "epoch": 3.9409711953628674, "grad_norm": 0.3082312345504761, "learning_rate": 2.0725082898805403e-05, "loss": 0.0676, "step": 66630 }, { "epoch": 3.9415626663512153, "grad_norm": 0.4158672094345093, "learning_rate": 2.0722426860711213e-05, "loss": 0.0883, "step": 66640 }, { "epoch": 3.9421541373395637, "grad_norm": 0.2311057597398758, "learning_rate": 2.0719770612622392e-05, "loss": 0.0703, "step": 66650 }, { "epoch": 3.9427456083279115, "grad_norm": 0.3433012068271637, "learning_rate": 2.0717114154636415e-05, "loss": 0.0634, "step": 66660 }, { "epoch": 3.9433370793162594, "grad_norm": 0.22340723872184753, "learning_rate": 2.0714457486850766e-05, "loss": 0.0606, "step": 66670 }, { "epoch": 3.9439285503046078, "grad_norm": 0.4836719036102295, "learning_rate": 2.0711800609362943e-05, "loss": 0.0842, "step": 66680 }, { "epoch": 3.9445200212929556, "grad_norm": 0.21799252927303314, "learning_rate": 2.0709143522270435e-05, "loss": 0.0838, "step": 66690 }, { "epoch": 3.9451114922813035, "grad_norm": 0.1853848397731781, "learning_rate": 2.0706486225670754e-05, "loss": 0.0668, "step": 66700 }, { "epoch": 3.9457029632696514, "grad_norm": 0.2606491446495056, "learning_rate": 2.0703828719661418e-05, "loss": 0.0716, "step": 66710 }, { "epoch": 3.9462944342579998, "grad_norm": 0.2591151297092438, "learning_rate": 2.0701171004339944e-05, "loss": 0.0553, "step": 66720 }, { "epoch": 3.9468859052463476, "grad_norm": 0.20804354548454285, "learning_rate": 2.069851307980386e-05, "loss": 0.0703, "step": 66730 }, { "epoch": 3.9474773762346955, "grad_norm": 0.31611159443855286, "learning_rate": 2.0695854946150707e-05, "loss": 0.0771, "step": 66740 }, { "epoch": 3.948068847223044, "grad_norm": 0.20183971524238586, "learning_rate": 2.0693196603478028e-05, "loss": 0.0648, "step": 66750 }, { "epoch": 3.9486603182113917, "grad_norm": 0.26572364568710327, "learning_rate": 2.0690538051883375e-05, "loss": 0.0712, "step": 66760 }, { "epoch": 3.9492517891997396, "grad_norm": 0.20849750936031342, "learning_rate": 2.068787929146431e-05, "loss": 0.067, "step": 66770 }, { "epoch": 3.9498432601880875, "grad_norm": 0.3417236804962158, "learning_rate": 2.06852203223184e-05, "loss": 0.081, "step": 66780 }, { "epoch": 3.950434731176436, "grad_norm": 0.17640353739261627, "learning_rate": 2.068256114454322e-05, "loss": 0.0725, "step": 66790 }, { "epoch": 3.9510262021647837, "grad_norm": 0.24047638475894928, "learning_rate": 2.0679901758236357e-05, "loss": 0.0688, "step": 66800 }, { "epoch": 3.951617673153132, "grad_norm": 0.27715474367141724, "learning_rate": 2.0677242163495395e-05, "loss": 0.0598, "step": 66810 }, { "epoch": 3.95220914414148, "grad_norm": 0.2814379930496216, "learning_rate": 2.0674582360417938e-05, "loss": 0.0558, "step": 66820 }, { "epoch": 3.952800615129828, "grad_norm": 0.2383410781621933, "learning_rate": 2.067192234910159e-05, "loss": 0.0737, "step": 66830 }, { "epoch": 3.9533920861181757, "grad_norm": 0.31023022532463074, "learning_rate": 2.0669262129643968e-05, "loss": 0.0755, "step": 66840 }, { "epoch": 3.953983557106524, "grad_norm": 0.3497777581214905, "learning_rate": 2.066660170214269e-05, "loss": 0.0743, "step": 66850 }, { "epoch": 3.954575028094872, "grad_norm": 0.20843593776226044, "learning_rate": 2.0663941066695387e-05, "loss": 0.0668, "step": 66860 }, { "epoch": 3.95516649908322, "grad_norm": 0.2657051980495453, "learning_rate": 2.0661280223399695e-05, "loss": 0.0611, "step": 66870 }, { "epoch": 3.955757970071568, "grad_norm": 0.32302168011665344, "learning_rate": 2.0658619172353255e-05, "loss": 0.086, "step": 66880 }, { "epoch": 3.956349441059916, "grad_norm": 0.1797727793455124, "learning_rate": 2.065595791365372e-05, "loss": 0.0852, "step": 66890 }, { "epoch": 3.956940912048264, "grad_norm": 0.3000468909740448, "learning_rate": 2.0653296447398764e-05, "loss": 0.0752, "step": 66900 }, { "epoch": 3.957532383036612, "grad_norm": 0.21793487668037415, "learning_rate": 2.065063477368603e-05, "loss": 0.0643, "step": 66910 }, { "epoch": 3.95812385402496, "grad_norm": 0.19377155601978302, "learning_rate": 2.0647972892613213e-05, "loss": 0.0667, "step": 66920 }, { "epoch": 3.958715325013308, "grad_norm": 0.326870858669281, "learning_rate": 2.0645310804277985e-05, "loss": 0.0755, "step": 66930 }, { "epoch": 3.9593067960016564, "grad_norm": 0.3284071683883667, "learning_rate": 2.0642648508778037e-05, "loss": 0.0876, "step": 66940 }, { "epoch": 3.9598982669900042, "grad_norm": 0.24568049609661102, "learning_rate": 2.063998600621107e-05, "loss": 0.0739, "step": 66950 }, { "epoch": 3.960489737978352, "grad_norm": 0.19710500538349152, "learning_rate": 2.0637323296674783e-05, "loss": 0.0514, "step": 66960 }, { "epoch": 3.9610812089667, "grad_norm": 0.1436520516872406, "learning_rate": 2.0634660380266898e-05, "loss": 0.0502, "step": 66970 }, { "epoch": 3.9616726799550483, "grad_norm": 0.3517662286758423, "learning_rate": 2.063199725708513e-05, "loss": 0.0819, "step": 66980 }, { "epoch": 3.9622641509433962, "grad_norm": 0.4691433310508728, "learning_rate": 2.062933392722721e-05, "loss": 0.0786, "step": 66990 }, { "epoch": 3.962855621931744, "grad_norm": 0.3514903485774994, "learning_rate": 2.0626670390790866e-05, "loss": 0.0726, "step": 67000 }, { "epoch": 3.9634470929200925, "grad_norm": 0.2273167222738266, "learning_rate": 2.0624006647873852e-05, "loss": 0.0635, "step": 67010 }, { "epoch": 3.9640385639084403, "grad_norm": 0.25631436705589294, "learning_rate": 2.0621342698573913e-05, "loss": 0.0658, "step": 67020 }, { "epoch": 3.9646300348967882, "grad_norm": 0.22709672152996063, "learning_rate": 2.0618678542988803e-05, "loss": 0.0712, "step": 67030 }, { "epoch": 3.965221505885136, "grad_norm": 0.24888058006763458, "learning_rate": 2.0616014181216298e-05, "loss": 0.0742, "step": 67040 }, { "epoch": 3.9658129768734844, "grad_norm": 0.24208030104637146, "learning_rate": 2.061334961335416e-05, "loss": 0.074, "step": 67050 }, { "epoch": 3.9664044478618323, "grad_norm": 0.6117071509361267, "learning_rate": 2.0610684839500184e-05, "loss": 0.0755, "step": 67060 }, { "epoch": 3.9669959188501807, "grad_norm": 0.30996114015579224, "learning_rate": 2.0608019859752145e-05, "loss": 0.0643, "step": 67070 }, { "epoch": 3.9675873898385285, "grad_norm": 0.3400537669658661, "learning_rate": 2.060535467420785e-05, "loss": 0.0808, "step": 67080 }, { "epoch": 3.9681788608268764, "grad_norm": 0.1903315633535385, "learning_rate": 2.0602689282965093e-05, "loss": 0.0688, "step": 67090 }, { "epoch": 3.9687703318152243, "grad_norm": 0.2759948968887329, "learning_rate": 2.0600023686121694e-05, "loss": 0.0695, "step": 67100 }, { "epoch": 3.9693618028035726, "grad_norm": 0.2175978571176529, "learning_rate": 2.0597357883775464e-05, "loss": 0.0686, "step": 67110 }, { "epoch": 3.9699532737919205, "grad_norm": 0.19037754833698273, "learning_rate": 2.0594691876024235e-05, "loss": 0.0551, "step": 67120 }, { "epoch": 3.9705447447802684, "grad_norm": 0.3163779079914093, "learning_rate": 2.0592025662965844e-05, "loss": 0.0701, "step": 67130 }, { "epoch": 3.9711362157686168, "grad_norm": 0.27763235569000244, "learning_rate": 2.058935924469812e-05, "loss": 0.0728, "step": 67140 }, { "epoch": 3.9717276867569646, "grad_norm": 0.25856998562812805, "learning_rate": 2.0586692621318923e-05, "loss": 0.067, "step": 67150 }, { "epoch": 3.9723191577453125, "grad_norm": 0.23075351119041443, "learning_rate": 2.0584025792926105e-05, "loss": 0.0679, "step": 67160 }, { "epoch": 3.9729106287336604, "grad_norm": 0.24087706208229065, "learning_rate": 2.0581358759617532e-05, "loss": 0.0559, "step": 67170 }, { "epoch": 3.9735020997220087, "grad_norm": 0.26864972710609436, "learning_rate": 2.0578691521491076e-05, "loss": 0.0743, "step": 67180 }, { "epoch": 3.9740935707103566, "grad_norm": 0.32264137268066406, "learning_rate": 2.057602407864461e-05, "loss": 0.0792, "step": 67190 }, { "epoch": 3.9746850416987045, "grad_norm": 0.16661494970321655, "learning_rate": 2.057335643117603e-05, "loss": 0.0724, "step": 67200 }, { "epoch": 3.975276512687053, "grad_norm": 0.20816193521022797, "learning_rate": 2.0570688579183223e-05, "loss": 0.0581, "step": 67210 }, { "epoch": 3.9758679836754007, "grad_norm": 0.41586124897003174, "learning_rate": 2.0568020522764095e-05, "loss": 0.0598, "step": 67220 }, { "epoch": 3.9764594546637486, "grad_norm": 0.32408878207206726, "learning_rate": 2.056535226201655e-05, "loss": 0.0776, "step": 67230 }, { "epoch": 3.9770509256520965, "grad_norm": 0.2743518054485321, "learning_rate": 2.0562683797038507e-05, "loss": 0.075, "step": 67240 }, { "epoch": 3.977642396640445, "grad_norm": 0.31966790556907654, "learning_rate": 2.056001512792789e-05, "loss": 0.077, "step": 67250 }, { "epoch": 3.9782338676287927, "grad_norm": 0.27595221996307373, "learning_rate": 2.0557346254782638e-05, "loss": 0.062, "step": 67260 }, { "epoch": 3.978825338617141, "grad_norm": 0.2028777003288269, "learning_rate": 2.0554677177700677e-05, "loss": 0.0632, "step": 67270 }, { "epoch": 3.979416809605489, "grad_norm": 0.3508329391479492, "learning_rate": 2.055200789677996e-05, "loss": 0.0778, "step": 67280 }, { "epoch": 3.980008280593837, "grad_norm": 0.4085163176059723, "learning_rate": 2.0549338412118443e-05, "loss": 0.08, "step": 67290 }, { "epoch": 3.9805997515821847, "grad_norm": 0.2381708323955536, "learning_rate": 2.0546668723814082e-05, "loss": 0.0754, "step": 67300 }, { "epoch": 3.981191222570533, "grad_norm": 0.17471179366111755, "learning_rate": 2.0543998831964847e-05, "loss": 0.0648, "step": 67310 }, { "epoch": 3.981782693558881, "grad_norm": 0.32988306879997253, "learning_rate": 2.0541328736668724e-05, "loss": 0.069, "step": 67320 }, { "epoch": 3.982374164547229, "grad_norm": 0.3276417851448059, "learning_rate": 2.0538658438023687e-05, "loss": 0.0832, "step": 67330 }, { "epoch": 3.982965635535577, "grad_norm": 0.21835924685001373, "learning_rate": 2.0535987936127727e-05, "loss": 0.0722, "step": 67340 }, { "epoch": 3.983557106523925, "grad_norm": 0.24846981465816498, "learning_rate": 2.0533317231078847e-05, "loss": 0.073, "step": 67350 }, { "epoch": 3.984148577512273, "grad_norm": 0.2443518340587616, "learning_rate": 2.0530646322975054e-05, "loss": 0.0653, "step": 67360 }, { "epoch": 3.984740048500621, "grad_norm": 0.17175117135047913, "learning_rate": 2.0527975211914354e-05, "loss": 0.0552, "step": 67370 }, { "epoch": 3.985331519488969, "grad_norm": 0.33770495653152466, "learning_rate": 2.052530389799478e-05, "loss": 0.0808, "step": 67380 }, { "epoch": 3.985922990477317, "grad_norm": 0.24053901433944702, "learning_rate": 2.052263238131435e-05, "loss": 0.0756, "step": 67390 }, { "epoch": 3.9865144614656653, "grad_norm": 0.2623962461948395, "learning_rate": 2.0519960661971106e-05, "loss": 0.0737, "step": 67400 }, { "epoch": 3.9871059324540132, "grad_norm": 0.2616909146308899, "learning_rate": 2.051728874006309e-05, "loss": 0.0797, "step": 67410 }, { "epoch": 3.987697403442361, "grad_norm": 0.2587584853172302, "learning_rate": 2.0514616615688356e-05, "loss": 0.0501, "step": 67420 }, { "epoch": 3.988288874430709, "grad_norm": 0.36121177673339844, "learning_rate": 2.0511944288944954e-05, "loss": 0.0859, "step": 67430 }, { "epoch": 3.9888803454190573, "grad_norm": 0.18993344902992249, "learning_rate": 2.0509271759930954e-05, "loss": 0.0745, "step": 67440 }, { "epoch": 3.9894718164074052, "grad_norm": 0.17973075807094574, "learning_rate": 2.0506599028744434e-05, "loss": 0.0789, "step": 67450 }, { "epoch": 3.990063287395753, "grad_norm": 0.5757108330726624, "learning_rate": 2.0503926095483473e-05, "loss": 0.0642, "step": 67460 }, { "epoch": 3.9906547583841014, "grad_norm": 0.24590879678726196, "learning_rate": 2.050125296024615e-05, "loss": 0.06, "step": 67470 }, { "epoch": 3.9912462293724493, "grad_norm": 0.22724728286266327, "learning_rate": 2.0498579623130572e-05, "loss": 0.0762, "step": 67480 }, { "epoch": 3.991837700360797, "grad_norm": 0.2850435972213745, "learning_rate": 2.049590608423484e-05, "loss": 0.0795, "step": 67490 }, { "epoch": 3.992429171349145, "grad_norm": 0.20783567428588867, "learning_rate": 2.0493232343657055e-05, "loss": 0.0718, "step": 67500 }, { "epoch": 3.9930206423374934, "grad_norm": 0.22608113288879395, "learning_rate": 2.0490558401495347e-05, "loss": 0.059, "step": 67510 }, { "epoch": 3.9936121133258413, "grad_norm": 0.1949906349182129, "learning_rate": 2.0487884257847834e-05, "loss": 0.0559, "step": 67520 }, { "epoch": 3.9942035843141896, "grad_norm": 0.312238484621048, "learning_rate": 2.0485209912812645e-05, "loss": 0.0675, "step": 67530 }, { "epoch": 3.9947950553025375, "grad_norm": 0.22550924122333527, "learning_rate": 2.0482535366487925e-05, "loss": 0.0767, "step": 67540 }, { "epoch": 3.9953865262908854, "grad_norm": 0.15453879535198212, "learning_rate": 2.0479860618971827e-05, "loss": 0.0666, "step": 67550 }, { "epoch": 3.9959779972792333, "grad_norm": 0.21600617468357086, "learning_rate": 2.0477185670362495e-05, "loss": 0.0706, "step": 67560 }, { "epoch": 3.9965694682675816, "grad_norm": 0.4262576401233673, "learning_rate": 2.047451052075809e-05, "loss": 0.0677, "step": 67570 }, { "epoch": 3.9971609392559295, "grad_norm": 0.31514865159988403, "learning_rate": 2.0471835170256796e-05, "loss": 0.0736, "step": 67580 }, { "epoch": 3.9977524102442774, "grad_norm": 0.20225808024406433, "learning_rate": 2.0469159618956778e-05, "loss": 0.0718, "step": 67590 }, { "epoch": 3.9983438812326257, "grad_norm": 0.1538747251033783, "learning_rate": 2.046648386695622e-05, "loss": 0.0774, "step": 67600 }, { "epoch": 3.9989353522209736, "grad_norm": 0.22352659702301025, "learning_rate": 2.0463807914353318e-05, "loss": 0.0599, "step": 67610 }, { "epoch": 3.9995268232093215, "grad_norm": 0.28754520416259766, "learning_rate": 2.0461131761246268e-05, "loss": 0.0628, "step": 67620 }, { "epoch": 4.0, "eval_accuracy": 0.6579665302591743, "eval_animal_abuse/accuracy": 0.9947266859633364, "eval_animal_abuse/f1": 0.7721063982746226, "eval_animal_abuse/fpr": 0.0027933900986100314, "eval_animal_abuse/precision": 0.7638691322901849, "eval_animal_abuse/recall": 0.7805232558139535, "eval_animal_abuse/threshold": 0.32082128524780273, "eval_child_abuse/accuracy": 0.9961406660678045, "eval_child_abuse/f1": 0.672316384180791, "eval_child_abuse/fpr": 0.002291698031147016, "eval_child_abuse/precision": 0.6346666666666667, "eval_child_abuse/recall": 0.7147147147147147, "eval_child_abuse/threshold": 0.3757064640522003, "eval_controversial_topics,politics/accuracy": 0.9672122966363909, "eval_controversial_topics,politics/f1": 0.5039013340045306, "eval_controversial_topics,politics/fpr": 0.019391817682591947, "eval_controversial_topics,politics/precision": 0.4697325199436884, "eval_controversial_topics,politics/recall": 0.5434310532030402, "eval_controversial_topics,politics/threshold": 0.3363310694694519, "eval_discrimination,stereotype,injustice/accuracy": 0.9524237282496589, "eval_discrimination,stereotype,injustice/f1": 0.7101743007701662, "eval_discrimination,stereotype,injustice/fpr": 0.028626161130588735, "eval_discrimination,stereotype,injustice/precision": 0.6886792452830188, "eval_discrimination,stereotype,injustice/recall": 0.7330543933054393, "eval_discrimination,stereotype,injustice/threshold": 0.36840569972991943, "eval_drug_abuse,weapons,banned_substance/accuracy": 0.9716538576704262, "eval_drug_abuse,weapons,banned_substance/f1": 0.7631359466221852, "eval_drug_abuse,weapons,banned_substance/fpr": 0.018738541813566458, "eval_drug_abuse,weapons,banned_substance/precision": 0.7208508403361344, "eval_drug_abuse,weapons,banned_substance/recall": 0.8106910809214413, "eval_drug_abuse,weapons,banned_substance/threshold": 0.42823341488838196, "eval_financial_crime,property_crime,theft/accuracy": 0.9577136773463752, "eval_financial_crime,property_crime,theft/f1": 0.7959544068068711, "eval_financial_crime,property_crime,theft/fpr": 0.030389031199896742, "eval_financial_crime,property_crime,theft/precision": 0.7504162252156803, "eval_financial_crime,property_crime,theft/recall": 0.8473765168347291, "eval_financial_crime,property_crime,theft/threshold": 0.41869693994522095, "eval_flagged/accuracy": 0.8472568785973318, "eval_flagged/aucpr": 0.8982613922766323, "eval_flagged/f1": 0.8656069787184216, "eval_flagged/fpr": 0.19881483704009228, "eval_flagged/precision": 0.8479825643084512, "eval_flagged/recall": 0.8839795521808018, "eval_hate_speech,offensive_language/accuracy": 0.9466513624114183, "eval_hate_speech,offensive_language/f1": 0.6948330002854696, "eval_hate_speech,offensive_language/fpr": 0.026932212680431156, "eval_hate_speech,offensive_language/precision": 0.712390243902439, "eval_hate_speech,offensive_language/recall": 0.6781203566121843, "eval_hate_speech,offensive_language/threshold": 0.4205995798110962, "eval_loss": 0.08220886439085007, "eval_macro_f1": 0.6634524601915766, "eval_macro_precision": 0.639662268976558, "eval_macro_recall": 0.6938826515092299, "eval_micro_f1": 0.7416191325272005, "eval_micro_precision": 0.7225014335345904, "eval_micro_recall": 0.7617760549104682, "eval_misinformation_regarding_ethics,laws_and_safety/accuracy": 0.9754632864224639, "eval_misinformation_regarding_ethics,laws_and_safety/f1": 0.21750663129973474, "eval_misinformation_regarding_ethics,laws_and_safety/fpr": 0.015981004664634632, "eval_misinformation_regarding_ethics,laws_and_safety/precision": 0.17764298093587522, "eval_misinformation_regarding_ethics,laws_and_safety/recall": 0.280437756497948, "eval_misinformation_regarding_ethics,laws_and_safety/threshold": 0.12168575078248978, "eval_non_violent_unethical_behavior/accuracy": 0.874737997804172, "eval_non_violent_unethical_behavior/f1": 0.6852533021233908, "eval_non_violent_unethical_behavior/fpr": 0.07849609699385468, "eval_non_violent_unethical_behavior/precision": 0.684337952913675, "eval_non_violent_unethical_behavior/recall": 0.6861711032981751, "eval_non_violent_unethical_behavior/threshold": 0.3900110721588135, "eval_privacy_violation/accuracy": 0.9811691120204944, "eval_privacy_violation/f1": 0.8110814419225634, "eval_privacy_violation/fpr": 0.010429061384475379, "eval_privacy_violation/precision": 0.8030403172504957, "eval_privacy_violation/recall": 0.8192852326365475, "eval_privacy_violation/threshold": 0.5583270192146301, "eval_runtime": 85.5002, "eval_samples_per_second": 703.086, "eval_self_harm/accuracy": 0.9964068270286456, "eval_self_harm/f1": 0.7265822784810126, "eval_self_harm/fpr": 0.001557684577247753, "eval_self_harm/precision": 0.7552631578947369, "eval_self_harm/recall": 0.7, "eval_self_harm/threshold": 0.3407045006752014, "eval_sexually_explicit,adult_content/accuracy": 0.9820008650231228, "eval_sexually_explicit,adult_content/f1": 0.668301655426119, "eval_sexually_explicit,adult_content/fpr": 0.012357884330202648, "eval_sexually_explicit,adult_content/precision": 0.6005509641873278, "eval_sexually_explicit,adult_content/recall": 0.7532826537664132, "eval_sexually_explicit,adult_content/threshold": 0.2393493503332138, "eval_steps_per_second": 43.953, "eval_terrorism,organized_crime/accuracy": 0.9887214292843597, "eval_terrorism,organized_crime/f1": 0.41852487135506006, "eval_terrorism,organized_crime/fpr": 0.007395234182415764, "eval_terrorism,organized_crime/precision": 0.3562043795620438, "eval_terrorism,organized_crime/recall": 0.5072765072765073, "eval_terrorism,organized_crime/threshold": 0.24077458679676056, "eval_violence,aiding_and_abetting,incitement/accuracy": 0.9184050304421599, "eval_violence,aiding_and_abetting,incitement/f1": 0.8486624911295548, "eval_violence,aiding_and_abetting,incitement/fpr": 0.06042337156067254, "eval_violence,aiding_and_abetting,incitement/precision": 0.8376271392898471, "eval_violence,aiding_and_abetting,incitement/recall": 0.859992496248124, "eval_violence,aiding_and_abetting,incitement/threshold": 0.4756053388118744, "step": 67628 }, { "epoch": 4.0, "step": 67628, "total_flos": 5.663805429544462e+17, "train_loss": 0.08102998742395807, "train_runtime": 8859.5268, "train_samples_per_second": 610.665, "train_steps_per_second": 19.083 } ], "logging_steps": 10, "max_steps": 169070, "num_input_tokens_seen": 0, "num_train_epochs": 10, "save_steps": 500, "stateful_callbacks": { "EarlyStoppingCallback": { "args": { "early_stopping_patience": 2, "early_stopping_threshold": 0.0 }, "attributes": { "early_stopping_patience_counter": 2 } }, "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 5.663805429544462e+17, "train_batch_size": 32, "trial_name": null, "trial_params": null }