{ "best_global_step": 101442, "best_metric": 0.6790032184713165, "best_model_checkpoint": "output/QA-DeBERTa-v3-large-6970/checkpoint-101442", "epoch": 5.0, "eval_steps": 500, "global_step": 169070, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0002957354941740108, "grad_norm": 1.709465742111206, "learning_rate": 1.0261883434089335e-09, "loss": 0.7357, "step": 10 }, { "epoch": 0.0005914709883480216, "grad_norm": 1.6622246503829956, "learning_rate": 2.166397613863304e-09, "loss": 0.7335, "step": 20 }, { "epoch": 0.0008872064825220323, "grad_norm": 1.6779954433441162, "learning_rate": 3.306606884317674e-09, "loss": 0.7332, "step": 30 }, { "epoch": 0.0011829419766960431, "grad_norm": 1.614998459815979, "learning_rate": 4.446816154772045e-09, "loss": 0.7346, "step": 40 }, { "epoch": 0.0014786774708700537, "grad_norm": 1.7388017177581787, "learning_rate": 5.587025425226415e-09, "loss": 0.7283, "step": 50 }, { "epoch": 0.0017744129650440646, "grad_norm": 1.5931695699691772, "learning_rate": 6.7272346956807854e-09, "loss": 0.7321, "step": 60 }, { "epoch": 0.0020701484592180754, "grad_norm": 1.694272518157959, "learning_rate": 7.867443966135156e-09, "loss": 0.7352, "step": 70 }, { "epoch": 0.0023658839533920862, "grad_norm": 1.7007029056549072, "learning_rate": 9.007653236589528e-09, "loss": 0.7299, "step": 80 }, { "epoch": 0.002661619447566097, "grad_norm": 1.7571079730987549, "learning_rate": 1.0147862507043898e-08, "loss": 0.7364, "step": 90 }, { "epoch": 0.0029573549417401075, "grad_norm": 1.8397973775863647, "learning_rate": 1.1288071777498267e-08, "loss": 0.7335, "step": 100 }, { "epoch": 0.0032530904359141183, "grad_norm": 1.701013207435608, "learning_rate": 1.2428281047952637e-08, "loss": 0.7339, "step": 110 }, { "epoch": 0.003548825930088129, "grad_norm": 1.6931296586990356, "learning_rate": 1.3568490318407009e-08, "loss": 0.7307, "step": 120 }, { "epoch": 0.00384456142426214, "grad_norm": 1.6810343265533447, "learning_rate": 1.470869958886138e-08, "loss": 0.7355, "step": 130 }, { "epoch": 0.004140296918436151, "grad_norm": 1.6985174417495728, "learning_rate": 1.584890885931575e-08, "loss": 0.7309, "step": 140 }, { "epoch": 0.004436032412610162, "grad_norm": 1.8060656785964966, "learning_rate": 1.698911812977012e-08, "loss": 0.7278, "step": 150 }, { "epoch": 0.0047317679067841725, "grad_norm": 1.6703805923461914, "learning_rate": 1.8129327400224492e-08, "loss": 0.7361, "step": 160 }, { "epoch": 0.005027503400958183, "grad_norm": 1.751887559890747, "learning_rate": 1.9269536670678862e-08, "loss": 0.7308, "step": 170 }, { "epoch": 0.005323238895132194, "grad_norm": 1.7764558792114258, "learning_rate": 2.0409745941133232e-08, "loss": 0.733, "step": 180 }, { "epoch": 0.005618974389306204, "grad_norm": 1.6577351093292236, "learning_rate": 2.15499552115876e-08, "loss": 0.7314, "step": 190 }, { "epoch": 0.005914709883480215, "grad_norm": 1.7951151132583618, "learning_rate": 2.269016448204197e-08, "loss": 0.7288, "step": 200 }, { "epoch": 0.006210445377654226, "grad_norm": 1.737445592880249, "learning_rate": 2.383037375249634e-08, "loss": 0.7309, "step": 210 }, { "epoch": 0.006506180871828237, "grad_norm": 1.7721706628799438, "learning_rate": 2.4970583022950714e-08, "loss": 0.7343, "step": 220 }, { "epoch": 0.0068019163660022475, "grad_norm": 1.7921390533447266, "learning_rate": 2.6110792293405084e-08, "loss": 0.7273, "step": 230 }, { "epoch": 0.007097651860176258, "grad_norm": 1.7564938068389893, "learning_rate": 2.7251001563859454e-08, "loss": 0.734, "step": 240 }, { "epoch": 0.007393387354350269, "grad_norm": 1.721591830253601, "learning_rate": 2.8391210834313824e-08, "loss": 0.7298, "step": 250 }, { "epoch": 0.00768912284852428, "grad_norm": 1.7067829370498657, "learning_rate": 2.9531420104768195e-08, "loss": 0.7292, "step": 260 }, { "epoch": 0.00798485834269829, "grad_norm": 1.7408744096755981, "learning_rate": 3.067162937522256e-08, "loss": 0.7313, "step": 270 }, { "epoch": 0.008280593836872302, "grad_norm": 1.6406103372573853, "learning_rate": 3.181183864567693e-08, "loss": 0.7313, "step": 280 }, { "epoch": 0.008576329331046312, "grad_norm": 1.8123146295547485, "learning_rate": 3.29520479161313e-08, "loss": 0.7326, "step": 290 }, { "epoch": 0.008872064825220323, "grad_norm": 1.8259111642837524, "learning_rate": 3.409225718658567e-08, "loss": 0.728, "step": 300 }, { "epoch": 0.009167800319394334, "grad_norm": 1.7917215824127197, "learning_rate": 3.523246645704004e-08, "loss": 0.7299, "step": 310 }, { "epoch": 0.009463535813568345, "grad_norm": 1.7372608184814453, "learning_rate": 3.637267572749441e-08, "loss": 0.7301, "step": 320 }, { "epoch": 0.009759271307742356, "grad_norm": 1.69910728931427, "learning_rate": 3.7512884997948784e-08, "loss": 0.7308, "step": 330 }, { "epoch": 0.010055006801916367, "grad_norm": 1.6954227685928345, "learning_rate": 3.8653094268403154e-08, "loss": 0.7304, "step": 340 }, { "epoch": 0.010350742296090377, "grad_norm": 1.9026210308074951, "learning_rate": 3.979330353885753e-08, "loss": 0.7289, "step": 350 }, { "epoch": 0.010646477790264388, "grad_norm": 1.6355699300765991, "learning_rate": 4.09335128093119e-08, "loss": 0.73, "step": 360 }, { "epoch": 0.0109422132844384, "grad_norm": 1.7803531885147095, "learning_rate": 4.207372207976627e-08, "loss": 0.729, "step": 370 }, { "epoch": 0.011237948778612408, "grad_norm": 1.6233320236206055, "learning_rate": 4.321393135022064e-08, "loss": 0.7292, "step": 380 }, { "epoch": 0.011533684272786419, "grad_norm": 1.7863165140151978, "learning_rate": 4.435414062067501e-08, "loss": 0.7309, "step": 390 }, { "epoch": 0.01182941976696043, "grad_norm": 1.7575761079788208, "learning_rate": 4.549434989112938e-08, "loss": 0.7265, "step": 400 }, { "epoch": 0.01212515526113444, "grad_norm": 1.7641853094100952, "learning_rate": 4.663455916158375e-08, "loss": 0.7304, "step": 410 }, { "epoch": 0.012420890755308452, "grad_norm": 1.7101752758026123, "learning_rate": 4.777476843203812e-08, "loss": 0.729, "step": 420 }, { "epoch": 0.012716626249482462, "grad_norm": 1.6976078748703003, "learning_rate": 4.891497770249249e-08, "loss": 0.7292, "step": 430 }, { "epoch": 0.013012361743656473, "grad_norm": 1.7430704832077026, "learning_rate": 5.0055186972946863e-08, "loss": 0.7292, "step": 440 }, { "epoch": 0.013308097237830484, "grad_norm": 1.8025147914886475, "learning_rate": 5.1195396243401234e-08, "loss": 0.7261, "step": 450 }, { "epoch": 0.013603832732004495, "grad_norm": 1.6511156558990479, "learning_rate": 5.2335605513855604e-08, "loss": 0.7317, "step": 460 }, { "epoch": 0.013899568226178506, "grad_norm": 1.647619366645813, "learning_rate": 5.3475814784309974e-08, "loss": 0.7292, "step": 470 }, { "epoch": 0.014195303720352517, "grad_norm": 1.5958315134048462, "learning_rate": 5.4616024054764345e-08, "loss": 0.7266, "step": 480 }, { "epoch": 0.014491039214526527, "grad_norm": 1.721347451210022, "learning_rate": 5.5756233325218715e-08, "loss": 0.7273, "step": 490 }, { "epoch": 0.014786774708700538, "grad_norm": 1.7272248268127441, "learning_rate": 5.689644259567308e-08, "loss": 0.7252, "step": 500 }, { "epoch": 0.015082510202874549, "grad_norm": 1.7107535600662231, "learning_rate": 5.8036651866127456e-08, "loss": 0.728, "step": 510 }, { "epoch": 0.01537824569704856, "grad_norm": 1.6954700946807861, "learning_rate": 5.9176861136581826e-08, "loss": 0.726, "step": 520 }, { "epoch": 0.01567398119122257, "grad_norm": 1.6942987442016602, "learning_rate": 6.03170704070362e-08, "loss": 0.7277, "step": 530 }, { "epoch": 0.01596971668539658, "grad_norm": 1.6703391075134277, "learning_rate": 6.145727967749057e-08, "loss": 0.7253, "step": 540 }, { "epoch": 0.01626545217957059, "grad_norm": 1.7487692832946777, "learning_rate": 6.259748894794494e-08, "loss": 0.7209, "step": 550 }, { "epoch": 0.016561187673744603, "grad_norm": 1.7379446029663086, "learning_rate": 6.373769821839931e-08, "loss": 0.725, "step": 560 }, { "epoch": 0.016856923167918612, "grad_norm": 1.749861478805542, "learning_rate": 6.487790748885368e-08, "loss": 0.7233, "step": 570 }, { "epoch": 0.017152658662092625, "grad_norm": 1.7595373392105103, "learning_rate": 6.601811675930805e-08, "loss": 0.7247, "step": 580 }, { "epoch": 0.017448394156266634, "grad_norm": 1.6377027034759521, "learning_rate": 6.715832602976242e-08, "loss": 0.724, "step": 590 }, { "epoch": 0.017744129650440647, "grad_norm": 1.6689585447311401, "learning_rate": 6.829853530021679e-08, "loss": 0.7216, "step": 600 }, { "epoch": 0.018039865144614656, "grad_norm": 1.6695555448532104, "learning_rate": 6.943874457067117e-08, "loss": 0.7224, "step": 610 }, { "epoch": 0.01833560063878867, "grad_norm": 1.835863709449768, "learning_rate": 7.057895384112553e-08, "loss": 0.7229, "step": 620 }, { "epoch": 0.018631336132962677, "grad_norm": 1.6894605159759521, "learning_rate": 7.171916311157989e-08, "loss": 0.7232, "step": 630 }, { "epoch": 0.01892707162713669, "grad_norm": 1.6923770904541016, "learning_rate": 7.285937238203427e-08, "loss": 0.7225, "step": 640 }, { "epoch": 0.0192228071213107, "grad_norm": 1.8584328889846802, "learning_rate": 7.399958165248863e-08, "loss": 0.7187, "step": 650 }, { "epoch": 0.01951854261548471, "grad_norm": 1.591746211051941, "learning_rate": 7.513979092294301e-08, "loss": 0.7228, "step": 660 }, { "epoch": 0.01981427810965872, "grad_norm": 1.7615572214126587, "learning_rate": 7.628000019339737e-08, "loss": 0.7207, "step": 670 }, { "epoch": 0.020110013603832733, "grad_norm": 1.7336446046829224, "learning_rate": 7.742020946385175e-08, "loss": 0.7208, "step": 680 }, { "epoch": 0.020405749098006742, "grad_norm": 1.7799566984176636, "learning_rate": 7.856041873430611e-08, "loss": 0.718, "step": 690 }, { "epoch": 0.020701484592180755, "grad_norm": 1.793912410736084, "learning_rate": 7.970062800476049e-08, "loss": 0.7134, "step": 700 }, { "epoch": 0.020997220086354764, "grad_norm": 1.714246392250061, "learning_rate": 8.084083727521485e-08, "loss": 0.7195, "step": 710 }, { "epoch": 0.021292955580528777, "grad_norm": 1.577118992805481, "learning_rate": 8.198104654566923e-08, "loss": 0.7157, "step": 720 }, { "epoch": 0.021588691074702786, "grad_norm": 1.7615697383880615, "learning_rate": 8.31212558161236e-08, "loss": 0.7179, "step": 730 }, { "epoch": 0.0218844265688768, "grad_norm": 1.7381099462509155, "learning_rate": 8.426146508657797e-08, "loss": 0.7122, "step": 740 }, { "epoch": 0.022180162063050807, "grad_norm": 1.8543745279312134, "learning_rate": 8.540167435703235e-08, "loss": 0.7055, "step": 750 }, { "epoch": 0.022475897557224817, "grad_norm": 1.6972157955169678, "learning_rate": 8.654188362748673e-08, "loss": 0.7175, "step": 760 }, { "epoch": 0.02277163305139883, "grad_norm": 1.6592943668365479, "learning_rate": 8.768209289794109e-08, "loss": 0.7123, "step": 770 }, { "epoch": 0.023067368545572838, "grad_norm": 1.7514054775238037, "learning_rate": 8.882230216839547e-08, "loss": 0.7079, "step": 780 }, { "epoch": 0.02336310403974685, "grad_norm": 1.84046471118927, "learning_rate": 8.996251143884983e-08, "loss": 0.7013, "step": 790 }, { "epoch": 0.02365883953392086, "grad_norm": 1.9763678312301636, "learning_rate": 9.110272070930421e-08, "loss": 0.6906, "step": 800 }, { "epoch": 0.023954575028094872, "grad_norm": 1.8099839687347412, "learning_rate": 9.224292997975857e-08, "loss": 0.7041, "step": 810 }, { "epoch": 0.02425031052226888, "grad_norm": 2.4356284141540527, "learning_rate": 9.338313925021295e-08, "loss": 0.687, "step": 820 }, { "epoch": 0.024546046016442894, "grad_norm": 2.5997345447540283, "learning_rate": 9.452334852066731e-08, "loss": 0.6625, "step": 830 }, { "epoch": 0.024841781510616903, "grad_norm": 2.1418192386627197, "learning_rate": 9.566355779112169e-08, "loss": 0.6372, "step": 840 }, { "epoch": 0.025137517004790916, "grad_norm": 2.002599000930786, "learning_rate": 9.680376706157605e-08, "loss": 0.6245, "step": 850 }, { "epoch": 0.025433252498964925, "grad_norm": 1.7634913921356201, "learning_rate": 9.794397633203043e-08, "loss": 0.6043, "step": 860 }, { "epoch": 0.025728987993138937, "grad_norm": 1.8870930671691895, "learning_rate": 9.908418560248479e-08, "loss": 0.5911, "step": 870 }, { "epoch": 0.026024723487312947, "grad_norm": 1.798341155052185, "learning_rate": 1.0022439487293916e-07, "loss": 0.5843, "step": 880 }, { "epoch": 0.02632045898148696, "grad_norm": 1.8085323572158813, "learning_rate": 1.0136460414339353e-07, "loss": 0.5716, "step": 890 }, { "epoch": 0.026616194475660968, "grad_norm": 2.0352492332458496, "learning_rate": 1.025048134138479e-07, "loss": 0.5777, "step": 900 }, { "epoch": 0.02691192996983498, "grad_norm": 1.9059211015701294, "learning_rate": 1.0364502268430227e-07, "loss": 0.5613, "step": 910 }, { "epoch": 0.02720766546400899, "grad_norm": 1.6911944150924683, "learning_rate": 1.0478523195475664e-07, "loss": 0.5496, "step": 920 }, { "epoch": 0.027503400958183002, "grad_norm": 1.6515424251556396, "learning_rate": 1.0592544122521101e-07, "loss": 0.5481, "step": 930 }, { "epoch": 0.02779913645235701, "grad_norm": 1.8169242143630981, "learning_rate": 1.0706565049566538e-07, "loss": 0.5376, "step": 940 }, { "epoch": 0.028094871946531024, "grad_norm": 2.159796953201294, "learning_rate": 1.0820585976611976e-07, "loss": 0.5457, "step": 950 }, { "epoch": 0.028390607440705033, "grad_norm": 1.727889060974121, "learning_rate": 1.0934606903657412e-07, "loss": 0.5192, "step": 960 }, { "epoch": 0.028686342934879046, "grad_norm": 1.6756137609481812, "learning_rate": 1.104862783070285e-07, "loss": 0.5159, "step": 970 }, { "epoch": 0.028982078429053055, "grad_norm": 1.654195785522461, "learning_rate": 1.1162648757748286e-07, "loss": 0.5154, "step": 980 }, { "epoch": 0.029277813923227064, "grad_norm": 1.8217642307281494, "learning_rate": 1.1276669684793724e-07, "loss": 0.5093, "step": 990 }, { "epoch": 0.029573549417401077, "grad_norm": 1.8683708906173706, "learning_rate": 1.139069061183916e-07, "loss": 0.517, "step": 1000 }, { "epoch": 0.029869284911575086, "grad_norm": 1.4954562187194824, "learning_rate": 1.1504711538884598e-07, "loss": 0.4992, "step": 1010 }, { "epoch": 0.030165020405749098, "grad_norm": 1.5850608348846436, "learning_rate": 1.1618732465930034e-07, "loss": 0.4906, "step": 1020 }, { "epoch": 0.030460755899923107, "grad_norm": 1.7356573343276978, "learning_rate": 1.1732753392975472e-07, "loss": 0.4875, "step": 1030 }, { "epoch": 0.03075649139409712, "grad_norm": 1.6977765560150146, "learning_rate": 1.1846774320020908e-07, "loss": 0.4822, "step": 1040 }, { "epoch": 0.03105222688827113, "grad_norm": 1.8330339193344116, "learning_rate": 1.1960795247066347e-07, "loss": 0.4876, "step": 1050 }, { "epoch": 0.03134796238244514, "grad_norm": 1.574069857597351, "learning_rate": 1.2074816174111782e-07, "loss": 0.4729, "step": 1060 }, { "epoch": 0.03164369787661915, "grad_norm": 1.6496306657791138, "learning_rate": 1.218883710115722e-07, "loss": 0.4639, "step": 1070 }, { "epoch": 0.03193943337079316, "grad_norm": 1.6527975797653198, "learning_rate": 1.2302858028202658e-07, "loss": 0.4687, "step": 1080 }, { "epoch": 0.032235168864967176, "grad_norm": 1.6008166074752808, "learning_rate": 1.2416878955248093e-07, "loss": 0.466, "step": 1090 }, { "epoch": 0.03253090435914118, "grad_norm": 2.2343826293945312, "learning_rate": 1.253089988229353e-07, "loss": 0.4638, "step": 1100 }, { "epoch": 0.032826639853315194, "grad_norm": 1.579683780670166, "learning_rate": 1.2644920809338968e-07, "loss": 0.4459, "step": 1110 }, { "epoch": 0.03312237534748921, "grad_norm": 1.4403713941574097, "learning_rate": 1.2758941736384403e-07, "loss": 0.4401, "step": 1120 }, { "epoch": 0.03341811084166322, "grad_norm": 1.5065929889678955, "learning_rate": 1.287296266342984e-07, "loss": 0.4327, "step": 1130 }, { "epoch": 0.033713846335837225, "grad_norm": 1.4875298738479614, "learning_rate": 1.2986983590475278e-07, "loss": 0.4302, "step": 1140 }, { "epoch": 0.03400958183001124, "grad_norm": 1.9383023977279663, "learning_rate": 1.3101004517520716e-07, "loss": 0.4395, "step": 1150 }, { "epoch": 0.03430531732418525, "grad_norm": 1.5228129625320435, "learning_rate": 1.321502544456615e-07, "loss": 0.4262, "step": 1160 }, { "epoch": 0.03460105281835926, "grad_norm": 1.5186738967895508, "learning_rate": 1.332904637161159e-07, "loss": 0.412, "step": 1170 }, { "epoch": 0.03489678831253327, "grad_norm": 1.4809647798538208, "learning_rate": 1.3443067298657027e-07, "loss": 0.419, "step": 1180 }, { "epoch": 0.03519252380670728, "grad_norm": 1.6139271259307861, "learning_rate": 1.3557088225702464e-07, "loss": 0.4145, "step": 1190 }, { "epoch": 0.03548825930088129, "grad_norm": 2.0180470943450928, "learning_rate": 1.36711091527479e-07, "loss": 0.4205, "step": 1200 }, { "epoch": 0.035783994795055306, "grad_norm": 1.3685154914855957, "learning_rate": 1.3785130079793337e-07, "loss": 0.4019, "step": 1210 }, { "epoch": 0.03607973028922931, "grad_norm": 1.2216851711273193, "learning_rate": 1.3899151006838775e-07, "loss": 0.4067, "step": 1220 }, { "epoch": 0.036375465783403324, "grad_norm": 1.487379550933838, "learning_rate": 1.4013171933884215e-07, "loss": 0.3984, "step": 1230 }, { "epoch": 0.03667120127757734, "grad_norm": 1.346665382385254, "learning_rate": 1.412719286092965e-07, "loss": 0.3844, "step": 1240 }, { "epoch": 0.03696693677175134, "grad_norm": 1.8970292806625366, "learning_rate": 1.4241213787975088e-07, "loss": 0.3997, "step": 1250 }, { "epoch": 0.037262672265925355, "grad_norm": 1.4261651039123535, "learning_rate": 1.4355234715020525e-07, "loss": 0.3928, "step": 1260 }, { "epoch": 0.03755840776009937, "grad_norm": 1.50173020362854, "learning_rate": 1.4469255642065963e-07, "loss": 0.3861, "step": 1270 }, { "epoch": 0.03785414325427338, "grad_norm": 1.5046886205673218, "learning_rate": 1.4583276569111398e-07, "loss": 0.3816, "step": 1280 }, { "epoch": 0.038149878748447386, "grad_norm": 1.2889490127563477, "learning_rate": 1.4697297496156836e-07, "loss": 0.3687, "step": 1290 }, { "epoch": 0.0384456142426214, "grad_norm": 1.7095342874526978, "learning_rate": 1.4811318423202274e-07, "loss": 0.3893, "step": 1300 }, { "epoch": 0.03874134973679541, "grad_norm": 1.2248581647872925, "learning_rate": 1.492533935024771e-07, "loss": 0.376, "step": 1310 }, { "epoch": 0.03903708523096942, "grad_norm": 1.2851862907409668, "learning_rate": 1.5039360277293146e-07, "loss": 0.3737, "step": 1320 }, { "epoch": 0.03933282072514343, "grad_norm": 1.2324334383010864, "learning_rate": 1.5153381204338584e-07, "loss": 0.3604, "step": 1330 }, { "epoch": 0.03962855621931744, "grad_norm": 1.368546962738037, "learning_rate": 1.5267402131384022e-07, "loss": 0.3527, "step": 1340 }, { "epoch": 0.039924291713491454, "grad_norm": 1.7137869596481323, "learning_rate": 1.538142305842946e-07, "loss": 0.3779, "step": 1350 }, { "epoch": 0.04022002720766547, "grad_norm": 1.322043776512146, "learning_rate": 1.5495443985474894e-07, "loss": 0.367, "step": 1360 }, { "epoch": 0.04051576270183947, "grad_norm": 1.309154748916626, "learning_rate": 1.5609464912520332e-07, "loss": 0.3549, "step": 1370 }, { "epoch": 0.040811498196013485, "grad_norm": 1.37051260471344, "learning_rate": 1.572348583956577e-07, "loss": 0.3519, "step": 1380 }, { "epoch": 0.0411072336901875, "grad_norm": 1.3086919784545898, "learning_rate": 1.5837506766611205e-07, "loss": 0.3474, "step": 1390 }, { "epoch": 0.04140296918436151, "grad_norm": 1.7435247898101807, "learning_rate": 1.5951527693656643e-07, "loss": 0.3498, "step": 1400 }, { "epoch": 0.041698704678535516, "grad_norm": 1.2765594720840454, "learning_rate": 1.606554862070208e-07, "loss": 0.3546, "step": 1410 }, { "epoch": 0.04199444017270953, "grad_norm": 1.2396811246871948, "learning_rate": 1.6179569547747518e-07, "loss": 0.3357, "step": 1420 }, { "epoch": 0.04229017566688354, "grad_norm": 1.0452631711959839, "learning_rate": 1.6293590474792953e-07, "loss": 0.3454, "step": 1430 }, { "epoch": 0.04258591116105755, "grad_norm": 1.3331849575042725, "learning_rate": 1.640761140183839e-07, "loss": 0.3378, "step": 1440 }, { "epoch": 0.04288164665523156, "grad_norm": 1.6230995655059814, "learning_rate": 1.6521632328883828e-07, "loss": 0.3435, "step": 1450 }, { "epoch": 0.04317738214940557, "grad_norm": 1.345110297203064, "learning_rate": 1.6635653255929266e-07, "loss": 0.3548, "step": 1460 }, { "epoch": 0.043473117643579584, "grad_norm": 1.0651381015777588, "learning_rate": 1.67496741829747e-07, "loss": 0.3328, "step": 1470 }, { "epoch": 0.0437688531377536, "grad_norm": 1.178055763244629, "learning_rate": 1.686369511002014e-07, "loss": 0.3146, "step": 1480 }, { "epoch": 0.0440645886319276, "grad_norm": 1.156166672706604, "learning_rate": 1.6977716037065576e-07, "loss": 0.3238, "step": 1490 }, { "epoch": 0.044360324126101615, "grad_norm": 1.6424639225006104, "learning_rate": 1.7091736964111014e-07, "loss": 0.3396, "step": 1500 }, { "epoch": 0.04465605962027563, "grad_norm": 1.1074872016906738, "learning_rate": 1.720575789115645e-07, "loss": 0.3402, "step": 1510 }, { "epoch": 0.04495179511444963, "grad_norm": 1.0109299421310425, "learning_rate": 1.7319778818201887e-07, "loss": 0.3157, "step": 1520 }, { "epoch": 0.045247530608623646, "grad_norm": 1.1749744415283203, "learning_rate": 1.7433799745247325e-07, "loss": 0.3117, "step": 1530 }, { "epoch": 0.04554326610279766, "grad_norm": 1.2801694869995117, "learning_rate": 1.7547820672292762e-07, "loss": 0.3171, "step": 1540 }, { "epoch": 0.04583900159697167, "grad_norm": 1.5139989852905273, "learning_rate": 1.7661841599338197e-07, "loss": 0.3177, "step": 1550 }, { "epoch": 0.046134737091145676, "grad_norm": 1.1582587957382202, "learning_rate": 1.7775862526383635e-07, "loss": 0.316, "step": 1560 }, { "epoch": 0.04643047258531969, "grad_norm": 1.046334981918335, "learning_rate": 1.7889883453429073e-07, "loss": 0.3149, "step": 1570 }, { "epoch": 0.0467262080794937, "grad_norm": 1.2721556425094604, "learning_rate": 1.800390438047451e-07, "loss": 0.3166, "step": 1580 }, { "epoch": 0.047021943573667714, "grad_norm": 1.1450589895248413, "learning_rate": 1.8117925307519945e-07, "loss": 0.3087, "step": 1590 }, { "epoch": 0.04731767906784172, "grad_norm": 1.7198151350021362, "learning_rate": 1.8231946234565383e-07, "loss": 0.3157, "step": 1600 }, { "epoch": 0.04761341456201573, "grad_norm": 1.0773652791976929, "learning_rate": 1.834596716161082e-07, "loss": 0.3116, "step": 1610 }, { "epoch": 0.047909150056189745, "grad_norm": 1.2116484642028809, "learning_rate": 1.8459988088656256e-07, "loss": 0.306, "step": 1620 }, { "epoch": 0.04820488555036376, "grad_norm": 0.9546124339103699, "learning_rate": 1.8574009015701694e-07, "loss": 0.298, "step": 1630 }, { "epoch": 0.04850062104453776, "grad_norm": 1.001686453819275, "learning_rate": 1.868802994274713e-07, "loss": 0.3029, "step": 1640 }, { "epoch": 0.048796356538711776, "grad_norm": 1.3703161478042603, "learning_rate": 1.880205086979257e-07, "loss": 0.3055, "step": 1650 }, { "epoch": 0.04909209203288579, "grad_norm": 1.0186856985092163, "learning_rate": 1.8916071796838004e-07, "loss": 0.3007, "step": 1660 }, { "epoch": 0.0493878275270598, "grad_norm": 1.085618257522583, "learning_rate": 1.9030092723883442e-07, "loss": 0.2893, "step": 1670 }, { "epoch": 0.049683563021233806, "grad_norm": 1.0103199481964111, "learning_rate": 1.914411365092888e-07, "loss": 0.2917, "step": 1680 }, { "epoch": 0.04997929851540782, "grad_norm": 0.9628229141235352, "learning_rate": 1.9258134577974317e-07, "loss": 0.2982, "step": 1690 }, { "epoch": 0.05027503400958183, "grad_norm": 2.26137375831604, "learning_rate": 1.9372155505019752e-07, "loss": 0.2847, "step": 1700 }, { "epoch": 0.050570769503755844, "grad_norm": 1.048456072807312, "learning_rate": 1.948617643206519e-07, "loss": 0.3029, "step": 1710 }, { "epoch": 0.05086650499792985, "grad_norm": 1.0507831573486328, "learning_rate": 1.9600197359110628e-07, "loss": 0.2821, "step": 1720 }, { "epoch": 0.05116224049210386, "grad_norm": 0.9926427006721497, "learning_rate": 1.9714218286156065e-07, "loss": 0.2905, "step": 1730 }, { "epoch": 0.051457975986277875, "grad_norm": 0.961312472820282, "learning_rate": 1.98282392132015e-07, "loss": 0.2931, "step": 1740 }, { "epoch": 0.05175371148045188, "grad_norm": 1.3986531496047974, "learning_rate": 1.9942260140246938e-07, "loss": 0.2887, "step": 1750 }, { "epoch": 0.05204944697462589, "grad_norm": 1.2100545167922974, "learning_rate": 2.0056281067292376e-07, "loss": 0.2983, "step": 1760 }, { "epoch": 0.052345182468799906, "grad_norm": 0.8898706436157227, "learning_rate": 2.0170301994337813e-07, "loss": 0.2829, "step": 1770 }, { "epoch": 0.05264091796297392, "grad_norm": 1.064134955406189, "learning_rate": 2.0284322921383248e-07, "loss": 0.2711, "step": 1780 }, { "epoch": 0.052936653457147924, "grad_norm": 1.0034550428390503, "learning_rate": 2.0398343848428686e-07, "loss": 0.2711, "step": 1790 }, { "epoch": 0.053232388951321936, "grad_norm": 1.514204978942871, "learning_rate": 2.0512364775474124e-07, "loss": 0.2764, "step": 1800 }, { "epoch": 0.05352812444549595, "grad_norm": 0.9785981178283691, "learning_rate": 2.0626385702519561e-07, "loss": 0.286, "step": 1810 }, { "epoch": 0.05382385993966996, "grad_norm": 0.8925842642784119, "learning_rate": 2.0740406629564996e-07, "loss": 0.2703, "step": 1820 }, { "epoch": 0.05411959543384397, "grad_norm": 0.9838966131210327, "learning_rate": 2.0854427556610434e-07, "loss": 0.2788, "step": 1830 }, { "epoch": 0.05441533092801798, "grad_norm": 0.9489375948905945, "learning_rate": 2.0968448483655872e-07, "loss": 0.2597, "step": 1840 }, { "epoch": 0.05471106642219199, "grad_norm": 1.1493977308273315, "learning_rate": 2.108246941070131e-07, "loss": 0.2694, "step": 1850 }, { "epoch": 0.055006801916366005, "grad_norm": 0.8477813601493835, "learning_rate": 2.1196490337746745e-07, "loss": 0.2944, "step": 1860 }, { "epoch": 0.05530253741054001, "grad_norm": 0.8030583262443542, "learning_rate": 2.1310511264792182e-07, "loss": 0.2841, "step": 1870 }, { "epoch": 0.05559827290471402, "grad_norm": 0.9682376980781555, "learning_rate": 2.1424532191837623e-07, "loss": 0.2623, "step": 1880 }, { "epoch": 0.055894008398888036, "grad_norm": 0.8637194037437439, "learning_rate": 2.1538553118883058e-07, "loss": 0.2704, "step": 1890 }, { "epoch": 0.05618974389306205, "grad_norm": 1.1537015438079834, "learning_rate": 2.1652574045928495e-07, "loss": 0.2626, "step": 1900 }, { "epoch": 0.056485479387236054, "grad_norm": 0.8400982022285461, "learning_rate": 2.1766594972973933e-07, "loss": 0.2749, "step": 1910 }, { "epoch": 0.056781214881410066, "grad_norm": 1.0229967832565308, "learning_rate": 2.188061590001937e-07, "loss": 0.2706, "step": 1920 }, { "epoch": 0.05707695037558408, "grad_norm": 0.9763805270195007, "learning_rate": 2.1994636827064806e-07, "loss": 0.2711, "step": 1930 }, { "epoch": 0.05737268586975809, "grad_norm": 0.9212294220924377, "learning_rate": 2.2108657754110243e-07, "loss": 0.2569, "step": 1940 }, { "epoch": 0.0576684213639321, "grad_norm": 1.142306923866272, "learning_rate": 2.222267868115568e-07, "loss": 0.2725, "step": 1950 }, { "epoch": 0.05796415685810611, "grad_norm": 1.0344904661178589, "learning_rate": 2.233669960820112e-07, "loss": 0.2736, "step": 1960 }, { "epoch": 0.05825989235228012, "grad_norm": 0.975245475769043, "learning_rate": 2.2450720535246554e-07, "loss": 0.2596, "step": 1970 }, { "epoch": 0.05855562784645413, "grad_norm": 0.7780627012252808, "learning_rate": 2.2564741462291992e-07, "loss": 0.264, "step": 1980 }, { "epoch": 0.05885136334062814, "grad_norm": 0.8894855976104736, "learning_rate": 2.267876238933743e-07, "loss": 0.2617, "step": 1990 }, { "epoch": 0.05914709883480215, "grad_norm": 1.1250720024108887, "learning_rate": 2.2792783316382867e-07, "loss": 0.2443, "step": 2000 }, { "epoch": 0.059442834328976166, "grad_norm": 0.8554569482803345, "learning_rate": 2.2906804243428302e-07, "loss": 0.265, "step": 2010 }, { "epoch": 0.05973856982315017, "grad_norm": 0.8245639204978943, "learning_rate": 2.302082517047374e-07, "loss": 0.2701, "step": 2020 }, { "epoch": 0.060034305317324184, "grad_norm": 0.9389355182647705, "learning_rate": 2.3134846097519177e-07, "loss": 0.2518, "step": 2030 }, { "epoch": 0.060330040811498196, "grad_norm": 0.8395287990570068, "learning_rate": 2.3248867024564615e-07, "loss": 0.2548, "step": 2040 }, { "epoch": 0.06062577630567221, "grad_norm": 1.1951408386230469, "learning_rate": 2.336288795161005e-07, "loss": 0.255, "step": 2050 }, { "epoch": 0.060921511799846215, "grad_norm": 0.8304075002670288, "learning_rate": 2.3476908878655488e-07, "loss": 0.2805, "step": 2060 }, { "epoch": 0.06121724729402023, "grad_norm": 0.8882930278778076, "learning_rate": 2.3590929805700926e-07, "loss": 0.26, "step": 2070 }, { "epoch": 0.06151298278819424, "grad_norm": 0.7711049914360046, "learning_rate": 2.3704950732746363e-07, "loss": 0.2491, "step": 2080 }, { "epoch": 0.06180871828236825, "grad_norm": 1.0065109729766846, "learning_rate": 2.3818971659791798e-07, "loss": 0.2366, "step": 2090 }, { "epoch": 0.06210445377654226, "grad_norm": 1.0590455532073975, "learning_rate": 2.3932992586837236e-07, "loss": 0.252, "step": 2100 }, { "epoch": 0.06240018927071627, "grad_norm": 0.9652573466300964, "learning_rate": 2.4047013513882674e-07, "loss": 0.2687, "step": 2110 }, { "epoch": 0.06269592476489028, "grad_norm": 0.7605579495429993, "learning_rate": 2.416103444092811e-07, "loss": 0.2443, "step": 2120 }, { "epoch": 0.06299166025906429, "grad_norm": 0.7835307717323303, "learning_rate": 2.427505536797355e-07, "loss": 0.2617, "step": 2130 }, { "epoch": 0.0632873957532383, "grad_norm": 0.9672801494598389, "learning_rate": 2.438907629501898e-07, "loss": 0.2374, "step": 2140 }, { "epoch": 0.06358313124741231, "grad_norm": 1.1148545742034912, "learning_rate": 2.450309722206442e-07, "loss": 0.245, "step": 2150 }, { "epoch": 0.06387886674158633, "grad_norm": 0.8052548170089722, "learning_rate": 2.4617118149109857e-07, "loss": 0.2496, "step": 2160 }, { "epoch": 0.06417460223576034, "grad_norm": 0.7678813338279724, "learning_rate": 2.4731139076155295e-07, "loss": 0.2527, "step": 2170 }, { "epoch": 0.06447033772993435, "grad_norm": 0.796437680721283, "learning_rate": 2.484516000320073e-07, "loss": 0.2517, "step": 2180 }, { "epoch": 0.06476607322410836, "grad_norm": 0.9278870224952698, "learning_rate": 2.495918093024617e-07, "loss": 0.2459, "step": 2190 }, { "epoch": 0.06506180871828236, "grad_norm": 0.9970981478691101, "learning_rate": 2.507320185729161e-07, "loss": 0.2532, "step": 2200 }, { "epoch": 0.06535754421245638, "grad_norm": 1.1818450689315796, "learning_rate": 2.5187222784337045e-07, "loss": 0.2662, "step": 2210 }, { "epoch": 0.06565327970663039, "grad_norm": 0.8658540844917297, "learning_rate": 2.530124371138248e-07, "loss": 0.257, "step": 2220 }, { "epoch": 0.0659490152008044, "grad_norm": 0.7723778486251831, "learning_rate": 2.5415264638427915e-07, "loss": 0.235, "step": 2230 }, { "epoch": 0.06624475069497841, "grad_norm": 0.8949558138847351, "learning_rate": 2.5529285565473353e-07, "loss": 0.2337, "step": 2240 }, { "epoch": 0.06654048618915243, "grad_norm": 2.2082109451293945, "learning_rate": 2.564330649251879e-07, "loss": 0.242, "step": 2250 }, { "epoch": 0.06683622168332644, "grad_norm": 0.8367722034454346, "learning_rate": 2.575732741956423e-07, "loss": 0.2395, "step": 2260 }, { "epoch": 0.06713195717750044, "grad_norm": 0.7070799469947815, "learning_rate": 2.5871348346609666e-07, "loss": 0.2435, "step": 2270 }, { "epoch": 0.06742769267167445, "grad_norm": 0.7697556614875793, "learning_rate": 2.5985369273655104e-07, "loss": 0.2457, "step": 2280 }, { "epoch": 0.06772342816584846, "grad_norm": 0.8202248811721802, "learning_rate": 2.609939020070054e-07, "loss": 0.2373, "step": 2290 }, { "epoch": 0.06801916366002247, "grad_norm": 0.974967896938324, "learning_rate": 2.6213411127745974e-07, "loss": 0.2344, "step": 2300 }, { "epoch": 0.06831489915419649, "grad_norm": 0.7534894943237305, "learning_rate": 2.632743205479141e-07, "loss": 0.2587, "step": 2310 }, { "epoch": 0.0686106346483705, "grad_norm": 0.8835882544517517, "learning_rate": 2.644145298183685e-07, "loss": 0.2453, "step": 2320 }, { "epoch": 0.06890637014254451, "grad_norm": 0.732071042060852, "learning_rate": 2.6555473908882287e-07, "loss": 0.2435, "step": 2330 }, { "epoch": 0.06920210563671852, "grad_norm": 0.8298642635345459, "learning_rate": 2.6669494835927725e-07, "loss": 0.2322, "step": 2340 }, { "epoch": 0.06949784113089252, "grad_norm": 1.018329381942749, "learning_rate": 2.678351576297316e-07, "loss": 0.2206, "step": 2350 }, { "epoch": 0.06979357662506654, "grad_norm": 1.1124054193496704, "learning_rate": 2.68975366900186e-07, "loss": 0.26, "step": 2360 }, { "epoch": 0.07008931211924055, "grad_norm": 0.6875130534172058, "learning_rate": 2.701155761706403e-07, "loss": 0.2333, "step": 2370 }, { "epoch": 0.07038504761341456, "grad_norm": 0.8054696917533875, "learning_rate": 2.712557854410947e-07, "loss": 0.2521, "step": 2380 }, { "epoch": 0.07068078310758857, "grad_norm": 0.7228703498840332, "learning_rate": 2.723959947115491e-07, "loss": 0.2154, "step": 2390 }, { "epoch": 0.07097651860176259, "grad_norm": 0.9331116676330566, "learning_rate": 2.7353620398200346e-07, "loss": 0.2224, "step": 2400 }, { "epoch": 0.0712722540959366, "grad_norm": 1.0203100442886353, "learning_rate": 2.7467641325245783e-07, "loss": 0.2499, "step": 2410 }, { "epoch": 0.07156798959011061, "grad_norm": 0.6952072978019714, "learning_rate": 2.758166225229122e-07, "loss": 0.2195, "step": 2420 }, { "epoch": 0.07186372508428461, "grad_norm": 0.7872620224952698, "learning_rate": 2.769568317933666e-07, "loss": 0.2297, "step": 2430 }, { "epoch": 0.07215946057845862, "grad_norm": 0.7986066937446594, "learning_rate": 2.7809704106382096e-07, "loss": 0.2123, "step": 2440 }, { "epoch": 0.07245519607263264, "grad_norm": 0.9740349650382996, "learning_rate": 2.792372503342753e-07, "loss": 0.2366, "step": 2450 }, { "epoch": 0.07275093156680665, "grad_norm": 1.0025030374526978, "learning_rate": 2.8037745960472966e-07, "loss": 0.2435, "step": 2460 }, { "epoch": 0.07304666706098066, "grad_norm": 0.7022868990898132, "learning_rate": 2.8151766887518404e-07, "loss": 0.2435, "step": 2470 }, { "epoch": 0.07334240255515467, "grad_norm": 0.7379316687583923, "learning_rate": 2.826578781456384e-07, "loss": 0.2337, "step": 2480 }, { "epoch": 0.07363813804932869, "grad_norm": 0.6916519999504089, "learning_rate": 2.837980874160928e-07, "loss": 0.2204, "step": 2490 }, { "epoch": 0.07393387354350268, "grad_norm": 1.0350033044815063, "learning_rate": 2.8493829668654717e-07, "loss": 0.2227, "step": 2500 }, { "epoch": 0.0742296090376767, "grad_norm": 0.6371175050735474, "learning_rate": 2.8607850595700155e-07, "loss": 0.2277, "step": 2510 }, { "epoch": 0.07452534453185071, "grad_norm": 0.7224276065826416, "learning_rate": 2.872187152274559e-07, "loss": 0.2271, "step": 2520 }, { "epoch": 0.07482108002602472, "grad_norm": 0.7966628670692444, "learning_rate": 2.8835892449791025e-07, "loss": 0.2337, "step": 2530 }, { "epoch": 0.07511681552019873, "grad_norm": 0.8453613519668579, "learning_rate": 2.894991337683646e-07, "loss": 0.2307, "step": 2540 }, { "epoch": 0.07541255101437275, "grad_norm": 1.0081897974014282, "learning_rate": 2.90639343038819e-07, "loss": 0.2291, "step": 2550 }, { "epoch": 0.07570828650854676, "grad_norm": 1.348480463027954, "learning_rate": 2.917795523092734e-07, "loss": 0.2233, "step": 2560 }, { "epoch": 0.07600402200272077, "grad_norm": 1.0657858848571777, "learning_rate": 2.9291976157972776e-07, "loss": 0.2359, "step": 2570 }, { "epoch": 0.07629975749689477, "grad_norm": 0.8241943717002869, "learning_rate": 2.9405997085018213e-07, "loss": 0.2274, "step": 2580 }, { "epoch": 0.07659549299106878, "grad_norm": 0.731627881526947, "learning_rate": 2.952001801206365e-07, "loss": 0.2244, "step": 2590 }, { "epoch": 0.0768912284852428, "grad_norm": 1.0012497901916504, "learning_rate": 2.9634038939109084e-07, "loss": 0.2143, "step": 2600 }, { "epoch": 0.07718696397941681, "grad_norm": 0.7709453105926514, "learning_rate": 2.974805986615452e-07, "loss": 0.2182, "step": 2610 }, { "epoch": 0.07748269947359082, "grad_norm": 1.0061562061309814, "learning_rate": 2.986208079319996e-07, "loss": 0.2255, "step": 2620 }, { "epoch": 0.07777843496776483, "grad_norm": 1.0573067665100098, "learning_rate": 2.9976101720245397e-07, "loss": 0.2251, "step": 2630 }, { "epoch": 0.07807417046193885, "grad_norm": 0.8312135338783264, "learning_rate": 3.0090122647290834e-07, "loss": 0.2193, "step": 2640 }, { "epoch": 0.07836990595611286, "grad_norm": 1.1266212463378906, "learning_rate": 3.020414357433627e-07, "loss": 0.235, "step": 2650 }, { "epoch": 0.07866564145028686, "grad_norm": 0.7989665269851685, "learning_rate": 3.031816450138171e-07, "loss": 0.2355, "step": 2660 }, { "epoch": 0.07896137694446087, "grad_norm": 0.9280983805656433, "learning_rate": 3.0432185428427147e-07, "loss": 0.198, "step": 2670 }, { "epoch": 0.07925711243863488, "grad_norm": 0.7494238615036011, "learning_rate": 3.054620635547258e-07, "loss": 0.2162, "step": 2680 }, { "epoch": 0.0795528479328089, "grad_norm": 0.8003157377243042, "learning_rate": 3.066022728251802e-07, "loss": 0.1995, "step": 2690 }, { "epoch": 0.07984858342698291, "grad_norm": 5.02930212020874, "learning_rate": 3.0774248209563455e-07, "loss": 0.219, "step": 2700 }, { "epoch": 0.08014431892115692, "grad_norm": 0.9650903940200806, "learning_rate": 3.0888269136608893e-07, "loss": 0.2344, "step": 2710 }, { "epoch": 0.08044005441533093, "grad_norm": 0.988069474697113, "learning_rate": 3.100229006365433e-07, "loss": 0.2177, "step": 2720 }, { "epoch": 0.08073578990950493, "grad_norm": 0.6847904920578003, "learning_rate": 3.111631099069977e-07, "loss": 0.2145, "step": 2730 }, { "epoch": 0.08103152540367894, "grad_norm": 0.7783904671669006, "learning_rate": 3.1230331917745206e-07, "loss": 0.2315, "step": 2740 }, { "epoch": 0.08132726089785296, "grad_norm": 0.9015799164772034, "learning_rate": 3.1344352844790644e-07, "loss": 0.2159, "step": 2750 }, { "epoch": 0.08162299639202697, "grad_norm": 0.7346140146255493, "learning_rate": 3.1458373771836076e-07, "loss": 0.2086, "step": 2760 }, { "epoch": 0.08191873188620098, "grad_norm": 1.2412540912628174, "learning_rate": 3.1572394698881514e-07, "loss": 0.2063, "step": 2770 }, { "epoch": 0.082214467380375, "grad_norm": 0.8172998428344727, "learning_rate": 3.168641562592695e-07, "loss": 0.2154, "step": 2780 }, { "epoch": 0.08251020287454901, "grad_norm": 1.1663490533828735, "learning_rate": 3.180043655297239e-07, "loss": 0.2205, "step": 2790 }, { "epoch": 0.08280593836872302, "grad_norm": 0.8815448880195618, "learning_rate": 3.1914457480017827e-07, "loss": 0.2091, "step": 2800 }, { "epoch": 0.08310167386289702, "grad_norm": 0.7380381226539612, "learning_rate": 3.2028478407063264e-07, "loss": 0.2182, "step": 2810 }, { "epoch": 0.08339740935707103, "grad_norm": 1.119476318359375, "learning_rate": 3.21424993341087e-07, "loss": 0.217, "step": 2820 }, { "epoch": 0.08369314485124504, "grad_norm": 0.7959821224212646, "learning_rate": 3.225652026115414e-07, "loss": 0.2239, "step": 2830 }, { "epoch": 0.08398888034541906, "grad_norm": 0.783059298992157, "learning_rate": 3.237054118819957e-07, "loss": 0.2029, "step": 2840 }, { "epoch": 0.08428461583959307, "grad_norm": 0.8605380654335022, "learning_rate": 3.2484562115245015e-07, "loss": 0.1986, "step": 2850 }, { "epoch": 0.08458035133376708, "grad_norm": 0.8439873456954956, "learning_rate": 3.2598583042290453e-07, "loss": 0.2128, "step": 2860 }, { "epoch": 0.0848760868279411, "grad_norm": 0.7933504581451416, "learning_rate": 3.271260396933589e-07, "loss": 0.2173, "step": 2870 }, { "epoch": 0.0851718223221151, "grad_norm": 0.7570623159408569, "learning_rate": 3.282662489638133e-07, "loss": 0.2116, "step": 2880 }, { "epoch": 0.0854675578162891, "grad_norm": 0.8449259996414185, "learning_rate": 3.2940645823426766e-07, "loss": 0.2124, "step": 2890 }, { "epoch": 0.08576329331046312, "grad_norm": 0.9369310140609741, "learning_rate": 3.3054666750472204e-07, "loss": 0.2023, "step": 2900 }, { "epoch": 0.08605902880463713, "grad_norm": 0.710782527923584, "learning_rate": 3.3168687677517636e-07, "loss": 0.223, "step": 2910 }, { "epoch": 0.08635476429881114, "grad_norm": 0.9766827821731567, "learning_rate": 3.3282708604563074e-07, "loss": 0.2112, "step": 2920 }, { "epoch": 0.08665049979298516, "grad_norm": 0.9084515571594238, "learning_rate": 3.339672953160851e-07, "loss": 0.2082, "step": 2930 }, { "epoch": 0.08694623528715917, "grad_norm": 0.889503002166748, "learning_rate": 3.351075045865395e-07, "loss": 0.1836, "step": 2940 }, { "epoch": 0.08724197078133318, "grad_norm": 0.9436101913452148, "learning_rate": 3.3624771385699387e-07, "loss": 0.206, "step": 2950 }, { "epoch": 0.0875377062755072, "grad_norm": 0.8048279285430908, "learning_rate": 3.3738792312744825e-07, "loss": 0.2011, "step": 2960 }, { "epoch": 0.08783344176968119, "grad_norm": 1.018816351890564, "learning_rate": 3.385281323979026e-07, "loss": 0.2078, "step": 2970 }, { "epoch": 0.0881291772638552, "grad_norm": 0.8326031565666199, "learning_rate": 3.39668341668357e-07, "loss": 0.1964, "step": 2980 }, { "epoch": 0.08842491275802922, "grad_norm": 0.7581288814544678, "learning_rate": 3.408085509388113e-07, "loss": 0.2206, "step": 2990 }, { "epoch": 0.08872064825220323, "grad_norm": 0.9168630242347717, "learning_rate": 3.419487602092657e-07, "loss": 0.2083, "step": 3000 }, { "epoch": 0.08901638374637724, "grad_norm": 1.0765575170516968, "learning_rate": 3.430889694797201e-07, "loss": 0.2089, "step": 3010 }, { "epoch": 0.08931211924055125, "grad_norm": 0.7700571417808533, "learning_rate": 3.4422917875017445e-07, "loss": 0.2226, "step": 3020 }, { "epoch": 0.08960785473472527, "grad_norm": 0.9922499656677246, "learning_rate": 3.4536938802062883e-07, "loss": 0.1905, "step": 3030 }, { "epoch": 0.08990359022889927, "grad_norm": 1.1877827644348145, "learning_rate": 3.465095972910832e-07, "loss": 0.196, "step": 3040 }, { "epoch": 0.09019932572307328, "grad_norm": 1.353184461593628, "learning_rate": 3.476498065615376e-07, "loss": 0.1968, "step": 3050 }, { "epoch": 0.09049506121724729, "grad_norm": 0.7973546385765076, "learning_rate": 3.4879001583199196e-07, "loss": 0.2203, "step": 3060 }, { "epoch": 0.0907907967114213, "grad_norm": 1.0356428623199463, "learning_rate": 3.499302251024463e-07, "loss": 0.1946, "step": 3070 }, { "epoch": 0.09108653220559532, "grad_norm": 1.284353256225586, "learning_rate": 3.5107043437290066e-07, "loss": 0.195, "step": 3080 }, { "epoch": 0.09138226769976933, "grad_norm": 0.9190360307693481, "learning_rate": 3.5221064364335504e-07, "loss": 0.1861, "step": 3090 }, { "epoch": 0.09167800319394334, "grad_norm": 0.7476867437362671, "learning_rate": 3.533508529138094e-07, "loss": 0.1958, "step": 3100 }, { "epoch": 0.09197373868811735, "grad_norm": 1.0298032760620117, "learning_rate": 3.544910621842638e-07, "loss": 0.2067, "step": 3110 }, { "epoch": 0.09226947418229135, "grad_norm": 1.6532825231552124, "learning_rate": 3.5563127145471817e-07, "loss": 0.2052, "step": 3120 }, { "epoch": 0.09256520967646537, "grad_norm": 0.9032609462738037, "learning_rate": 3.5677148072517255e-07, "loss": 0.1917, "step": 3130 }, { "epoch": 0.09286094517063938, "grad_norm": 1.201010823249817, "learning_rate": 3.5791168999562687e-07, "loss": 0.1884, "step": 3140 }, { "epoch": 0.09315668066481339, "grad_norm": 0.9735716581344604, "learning_rate": 3.5905189926608125e-07, "loss": 0.1961, "step": 3150 }, { "epoch": 0.0934524161589874, "grad_norm": 0.9768320918083191, "learning_rate": 3.601921085365356e-07, "loss": 0.2002, "step": 3160 }, { "epoch": 0.09374815165316142, "grad_norm": 0.7265749573707581, "learning_rate": 3.6133231780699e-07, "loss": 0.186, "step": 3170 }, { "epoch": 0.09404388714733543, "grad_norm": 0.7993773221969604, "learning_rate": 3.624725270774444e-07, "loss": 0.2042, "step": 3180 }, { "epoch": 0.09433962264150944, "grad_norm": 0.7845146656036377, "learning_rate": 3.6361273634789876e-07, "loss": 0.1897, "step": 3190 }, { "epoch": 0.09463535813568344, "grad_norm": 0.9060968160629272, "learning_rate": 3.6475294561835313e-07, "loss": 0.194, "step": 3200 }, { "epoch": 0.09493109362985745, "grad_norm": 0.957564115524292, "learning_rate": 3.658931548888075e-07, "loss": 0.2082, "step": 3210 }, { "epoch": 0.09522682912403146, "grad_norm": 0.8583465814590454, "learning_rate": 3.6703336415926183e-07, "loss": 0.2115, "step": 3220 }, { "epoch": 0.09552256461820548, "grad_norm": 0.9374297261238098, "learning_rate": 3.681735734297162e-07, "loss": 0.2015, "step": 3230 }, { "epoch": 0.09581830011237949, "grad_norm": 0.6756753325462341, "learning_rate": 3.693137827001706e-07, "loss": 0.1853, "step": 3240 }, { "epoch": 0.0961140356065535, "grad_norm": 0.7928134202957153, "learning_rate": 3.7045399197062496e-07, "loss": 0.1936, "step": 3250 }, { "epoch": 0.09640977110072751, "grad_norm": 1.371414065361023, "learning_rate": 3.7159420124107934e-07, "loss": 0.1984, "step": 3260 }, { "epoch": 0.09670550659490151, "grad_norm": 0.9331231117248535, "learning_rate": 3.727344105115337e-07, "loss": 0.1931, "step": 3270 }, { "epoch": 0.09700124208907553, "grad_norm": 1.3457688093185425, "learning_rate": 3.738746197819881e-07, "loss": 0.1908, "step": 3280 }, { "epoch": 0.09729697758324954, "grad_norm": 0.8464360237121582, "learning_rate": 3.7501482905244247e-07, "loss": 0.1911, "step": 3290 }, { "epoch": 0.09759271307742355, "grad_norm": 0.888022243976593, "learning_rate": 3.761550383228968e-07, "loss": 0.1818, "step": 3300 }, { "epoch": 0.09788844857159756, "grad_norm": 0.8853899836540222, "learning_rate": 3.7729524759335117e-07, "loss": 0.205, "step": 3310 }, { "epoch": 0.09818418406577158, "grad_norm": 0.760688304901123, "learning_rate": 3.7843545686380555e-07, "loss": 0.1885, "step": 3320 }, { "epoch": 0.09847991955994559, "grad_norm": 1.1640435457229614, "learning_rate": 3.7957566613425993e-07, "loss": 0.1959, "step": 3330 }, { "epoch": 0.0987756550541196, "grad_norm": 0.6932031512260437, "learning_rate": 3.807158754047143e-07, "loss": 0.1872, "step": 3340 }, { "epoch": 0.0990713905482936, "grad_norm": 1.9168181419372559, "learning_rate": 3.818560846751687e-07, "loss": 0.1809, "step": 3350 }, { "epoch": 0.09936712604246761, "grad_norm": 1.1497617959976196, "learning_rate": 3.8299629394562306e-07, "loss": 0.203, "step": 3360 }, { "epoch": 0.09966286153664163, "grad_norm": 1.0869232416152954, "learning_rate": 3.841365032160774e-07, "loss": 0.2049, "step": 3370 }, { "epoch": 0.09995859703081564, "grad_norm": 0.79445880651474, "learning_rate": 3.8527671248653176e-07, "loss": 0.1996, "step": 3380 }, { "epoch": 0.10025433252498965, "grad_norm": 0.9192619323730469, "learning_rate": 3.8641692175698614e-07, "loss": 0.1915, "step": 3390 }, { "epoch": 0.10055006801916366, "grad_norm": 0.9329776167869568, "learning_rate": 3.875571310274405e-07, "loss": 0.1743, "step": 3400 }, { "epoch": 0.10084580351333768, "grad_norm": 1.669707179069519, "learning_rate": 3.886973402978949e-07, "loss": 0.1757, "step": 3410 }, { "epoch": 0.10114153900751169, "grad_norm": 0.7481911182403564, "learning_rate": 3.8983754956834927e-07, "loss": 0.193, "step": 3420 }, { "epoch": 0.10143727450168569, "grad_norm": 0.7281387448310852, "learning_rate": 3.9097775883880364e-07, "loss": 0.1884, "step": 3430 }, { "epoch": 0.1017330099958597, "grad_norm": 0.8089117407798767, "learning_rate": 3.92117968109258e-07, "loss": 0.1904, "step": 3440 }, { "epoch": 0.10202874549003371, "grad_norm": 1.0906902551651, "learning_rate": 3.9325817737971234e-07, "loss": 0.1794, "step": 3450 }, { "epoch": 0.10232448098420772, "grad_norm": 1.693984031677246, "learning_rate": 3.943983866501667e-07, "loss": 0.1917, "step": 3460 }, { "epoch": 0.10262021647838174, "grad_norm": 0.7959063053131104, "learning_rate": 3.955385959206211e-07, "loss": 0.1795, "step": 3470 }, { "epoch": 0.10291595197255575, "grad_norm": 0.8528328537940979, "learning_rate": 3.966788051910755e-07, "loss": 0.1793, "step": 3480 }, { "epoch": 0.10321168746672976, "grad_norm": 0.815780758857727, "learning_rate": 3.9781901446152985e-07, "loss": 0.1845, "step": 3490 }, { "epoch": 0.10350742296090376, "grad_norm": 0.7441211342811584, "learning_rate": 3.9895922373198423e-07, "loss": 0.176, "step": 3500 }, { "epoch": 0.10380315845507777, "grad_norm": 1.5726280212402344, "learning_rate": 4.000994330024386e-07, "loss": 0.1907, "step": 3510 }, { "epoch": 0.10409889394925179, "grad_norm": 1.96705961227417, "learning_rate": 4.01239642272893e-07, "loss": 0.1918, "step": 3520 }, { "epoch": 0.1043946294434258, "grad_norm": 1.024606704711914, "learning_rate": 4.023798515433473e-07, "loss": 0.1834, "step": 3530 }, { "epoch": 0.10469036493759981, "grad_norm": 1.0886914730072021, "learning_rate": 4.035200608138017e-07, "loss": 0.1911, "step": 3540 }, { "epoch": 0.10498610043177382, "grad_norm": 1.1397162675857544, "learning_rate": 4.0466027008425606e-07, "loss": 0.1939, "step": 3550 }, { "epoch": 0.10528183592594784, "grad_norm": 0.8662410378456116, "learning_rate": 4.0580047935471044e-07, "loss": 0.1863, "step": 3560 }, { "epoch": 0.10557757142012185, "grad_norm": 1.04170560836792, "learning_rate": 4.069406886251648e-07, "loss": 0.1755, "step": 3570 }, { "epoch": 0.10587330691429585, "grad_norm": 1.2554603815078735, "learning_rate": 4.080808978956192e-07, "loss": 0.189, "step": 3580 }, { "epoch": 0.10616904240846986, "grad_norm": 0.7069886922836304, "learning_rate": 4.0922110716607357e-07, "loss": 0.1679, "step": 3590 }, { "epoch": 0.10646477790264387, "grad_norm": 0.8312535285949707, "learning_rate": 4.103613164365279e-07, "loss": 0.1652, "step": 3600 }, { "epoch": 0.10676051339681789, "grad_norm": 0.7432570457458496, "learning_rate": 4.1150152570698227e-07, "loss": 0.1767, "step": 3610 }, { "epoch": 0.1070562488909919, "grad_norm": 1.092429518699646, "learning_rate": 4.1264173497743665e-07, "loss": 0.1893, "step": 3620 }, { "epoch": 0.10735198438516591, "grad_norm": 0.8506819605827332, "learning_rate": 4.13781944247891e-07, "loss": 0.1839, "step": 3630 }, { "epoch": 0.10764771987933992, "grad_norm": 0.8514913320541382, "learning_rate": 4.149221535183454e-07, "loss": 0.1723, "step": 3640 }, { "epoch": 0.10794345537351394, "grad_norm": 0.9371907711029053, "learning_rate": 4.160623627887998e-07, "loss": 0.1755, "step": 3650 }, { "epoch": 0.10823919086768793, "grad_norm": 0.7904424071311951, "learning_rate": 4.1720257205925415e-07, "loss": 0.175, "step": 3660 }, { "epoch": 0.10853492636186195, "grad_norm": 0.968573808670044, "learning_rate": 4.1834278132970853e-07, "loss": 0.1754, "step": 3670 }, { "epoch": 0.10883066185603596, "grad_norm": 1.1503961086273193, "learning_rate": 4.1948299060016285e-07, "loss": 0.181, "step": 3680 }, { "epoch": 0.10912639735020997, "grad_norm": 1.2629114389419556, "learning_rate": 4.2062319987061723e-07, "loss": 0.1623, "step": 3690 }, { "epoch": 0.10942213284438398, "grad_norm": 0.9026176929473877, "learning_rate": 4.217634091410716e-07, "loss": 0.1783, "step": 3700 }, { "epoch": 0.109717868338558, "grad_norm": 1.159447431564331, "learning_rate": 4.22903618411526e-07, "loss": 0.1811, "step": 3710 }, { "epoch": 0.11001360383273201, "grad_norm": 0.9779772758483887, "learning_rate": 4.2404382768198036e-07, "loss": 0.1778, "step": 3720 }, { "epoch": 0.11030933932690601, "grad_norm": 0.7582942247390747, "learning_rate": 4.2518403695243474e-07, "loss": 0.182, "step": 3730 }, { "epoch": 0.11060507482108002, "grad_norm": 1.251184105873108, "learning_rate": 4.263242462228891e-07, "loss": 0.1737, "step": 3740 }, { "epoch": 0.11090081031525403, "grad_norm": 1.6455368995666504, "learning_rate": 4.274644554933435e-07, "loss": 0.1746, "step": 3750 }, { "epoch": 0.11119654580942805, "grad_norm": 1.0907518863677979, "learning_rate": 4.286046647637978e-07, "loss": 0.1796, "step": 3760 }, { "epoch": 0.11149228130360206, "grad_norm": 0.7198912501335144, "learning_rate": 4.297448740342522e-07, "loss": 0.1833, "step": 3770 }, { "epoch": 0.11178801679777607, "grad_norm": 0.92348313331604, "learning_rate": 4.3088508330470657e-07, "loss": 0.173, "step": 3780 }, { "epoch": 0.11208375229195008, "grad_norm": 0.800690233707428, "learning_rate": 4.3202529257516095e-07, "loss": 0.1714, "step": 3790 }, { "epoch": 0.1123794877861241, "grad_norm": 0.7649712562561035, "learning_rate": 4.331655018456153e-07, "loss": 0.1686, "step": 3800 }, { "epoch": 0.1126752232802981, "grad_norm": 0.8802704215049744, "learning_rate": 4.343057111160697e-07, "loss": 0.1847, "step": 3810 }, { "epoch": 0.11297095877447211, "grad_norm": 0.8901293873786926, "learning_rate": 4.354459203865241e-07, "loss": 0.1853, "step": 3820 }, { "epoch": 0.11326669426864612, "grad_norm": 1.1534088850021362, "learning_rate": 4.365861296569784e-07, "loss": 0.1694, "step": 3830 }, { "epoch": 0.11356242976282013, "grad_norm": 0.8373908400535583, "learning_rate": 4.377263389274328e-07, "loss": 0.1551, "step": 3840 }, { "epoch": 0.11385816525699415, "grad_norm": 0.7703737020492554, "learning_rate": 4.3886654819788716e-07, "loss": 0.1623, "step": 3850 }, { "epoch": 0.11415390075116816, "grad_norm": 0.8062789440155029, "learning_rate": 4.4000675746834153e-07, "loss": 0.1791, "step": 3860 }, { "epoch": 0.11444963624534217, "grad_norm": 2.6152310371398926, "learning_rate": 4.411469667387959e-07, "loss": 0.1716, "step": 3870 }, { "epoch": 0.11474537173951618, "grad_norm": 1.7593400478363037, "learning_rate": 4.422871760092503e-07, "loss": 0.1663, "step": 3880 }, { "epoch": 0.11504110723369018, "grad_norm": 0.8064714074134827, "learning_rate": 4.4342738527970466e-07, "loss": 0.1413, "step": 3890 }, { "epoch": 0.1153368427278642, "grad_norm": 0.871707022190094, "learning_rate": 4.4456759455015904e-07, "loss": 0.1593, "step": 3900 }, { "epoch": 0.11563257822203821, "grad_norm": 1.3113367557525635, "learning_rate": 4.4570780382061336e-07, "loss": 0.1645, "step": 3910 }, { "epoch": 0.11592831371621222, "grad_norm": 2.2077383995056152, "learning_rate": 4.4684801309106774e-07, "loss": 0.1892, "step": 3920 }, { "epoch": 0.11622404921038623, "grad_norm": 1.092988133430481, "learning_rate": 4.479882223615221e-07, "loss": 0.1597, "step": 3930 }, { "epoch": 0.11651978470456024, "grad_norm": 0.8871197700500488, "learning_rate": 4.491284316319765e-07, "loss": 0.1717, "step": 3940 }, { "epoch": 0.11681552019873426, "grad_norm": 1.1042609214782715, "learning_rate": 4.5026864090243087e-07, "loss": 0.1647, "step": 3950 }, { "epoch": 0.11711125569290826, "grad_norm": 2.5034332275390625, "learning_rate": 4.5140885017288525e-07, "loss": 0.1896, "step": 3960 }, { "epoch": 0.11740699118708227, "grad_norm": 1.0521044731140137, "learning_rate": 4.525490594433396e-07, "loss": 0.1743, "step": 3970 }, { "epoch": 0.11770272668125628, "grad_norm": 0.7264440059661865, "learning_rate": 4.53689268713794e-07, "loss": 0.1723, "step": 3980 }, { "epoch": 0.1179984621754303, "grad_norm": 0.8355633020401001, "learning_rate": 4.5482947798424833e-07, "loss": 0.1826, "step": 3990 }, { "epoch": 0.1182941976696043, "grad_norm": 0.8670969009399414, "learning_rate": 4.559696872547027e-07, "loss": 0.1672, "step": 4000 }, { "epoch": 0.11858993316377832, "grad_norm": 1.7595386505126953, "learning_rate": 4.571098965251571e-07, "loss": 0.1808, "step": 4010 }, { "epoch": 0.11888566865795233, "grad_norm": 2.0964224338531494, "learning_rate": 4.5825010579561146e-07, "loss": 0.1685, "step": 4020 }, { "epoch": 0.11918140415212634, "grad_norm": 1.499770998954773, "learning_rate": 4.5939031506606583e-07, "loss": 0.1694, "step": 4030 }, { "epoch": 0.11947713964630034, "grad_norm": 0.8621537685394287, "learning_rate": 4.605305243365202e-07, "loss": 0.18, "step": 4040 }, { "epoch": 0.11977287514047436, "grad_norm": 1.5447982549667358, "learning_rate": 4.616707336069746e-07, "loss": 0.1646, "step": 4050 }, { "epoch": 0.12006861063464837, "grad_norm": 1.6895644664764404, "learning_rate": 4.6281094287742897e-07, "loss": 0.1825, "step": 4060 }, { "epoch": 0.12036434612882238, "grad_norm": 1.147007942199707, "learning_rate": 4.639511521478833e-07, "loss": 0.1669, "step": 4070 }, { "epoch": 0.12066008162299639, "grad_norm": 0.816000759601593, "learning_rate": 4.6509136141833767e-07, "loss": 0.1618, "step": 4080 }, { "epoch": 0.1209558171171704, "grad_norm": 1.0932070016860962, "learning_rate": 4.6623157068879204e-07, "loss": 0.1445, "step": 4090 }, { "epoch": 0.12125155261134442, "grad_norm": 0.9056412577629089, "learning_rate": 4.673717799592464e-07, "loss": 0.1576, "step": 4100 }, { "epoch": 0.12154728810551843, "grad_norm": 0.9703043699264526, "learning_rate": 4.685119892297008e-07, "loss": 0.191, "step": 4110 }, { "epoch": 0.12184302359969243, "grad_norm": 0.8492308259010315, "learning_rate": 4.696521985001552e-07, "loss": 0.1644, "step": 4120 }, { "epoch": 0.12213875909386644, "grad_norm": 2.1098666191101074, "learning_rate": 4.707924077706096e-07, "loss": 0.1746, "step": 4130 }, { "epoch": 0.12243449458804045, "grad_norm": 1.2255566120147705, "learning_rate": 4.7193261704106393e-07, "loss": 0.1699, "step": 4140 }, { "epoch": 0.12273023008221447, "grad_norm": 1.1556434631347656, "learning_rate": 4.730728263115183e-07, "loss": 0.1504, "step": 4150 }, { "epoch": 0.12302596557638848, "grad_norm": 1.8246527910232544, "learning_rate": 4.742130355819727e-07, "loss": 0.1666, "step": 4160 }, { "epoch": 0.12332170107056249, "grad_norm": 0.8078717589378357, "learning_rate": 4.7535324485242706e-07, "loss": 0.1733, "step": 4170 }, { "epoch": 0.1236174365647365, "grad_norm": 1.6326240301132202, "learning_rate": 4.7649345412288144e-07, "loss": 0.1633, "step": 4180 }, { "epoch": 0.12391317205891052, "grad_norm": 1.1373589038848877, "learning_rate": 4.776336633933358e-07, "loss": 0.1595, "step": 4190 }, { "epoch": 0.12420890755308452, "grad_norm": 1.0175880193710327, "learning_rate": 4.787738726637901e-07, "loss": 0.1551, "step": 4200 }, { "epoch": 0.12450464304725853, "grad_norm": 0.8900519013404846, "learning_rate": 4.799140819342445e-07, "loss": 0.1745, "step": 4210 }, { "epoch": 0.12480037854143254, "grad_norm": 1.7357769012451172, "learning_rate": 4.810542912046989e-07, "loss": 0.1621, "step": 4220 }, { "epoch": 0.12509611403560655, "grad_norm": 1.095513105392456, "learning_rate": 4.821945004751533e-07, "loss": 0.1642, "step": 4230 }, { "epoch": 0.12539184952978055, "grad_norm": 1.1329355239868164, "learning_rate": 4.833347097456076e-07, "loss": 0.1603, "step": 4240 }, { "epoch": 0.12568758502395458, "grad_norm": 1.012614369392395, "learning_rate": 4.84474919016062e-07, "loss": 0.1555, "step": 4250 }, { "epoch": 0.12598332051812858, "grad_norm": 1.0598886013031006, "learning_rate": 4.856151282865163e-07, "loss": 0.1931, "step": 4260 }, { "epoch": 0.1262790560123026, "grad_norm": 1.5407586097717285, "learning_rate": 4.867553375569708e-07, "loss": 0.1728, "step": 4270 }, { "epoch": 0.1265747915064766, "grad_norm": 1.4832807779312134, "learning_rate": 4.87895546827425e-07, "loss": 0.1865, "step": 4280 }, { "epoch": 0.12687052700065063, "grad_norm": 1.169555902481079, "learning_rate": 4.890357560978795e-07, "loss": 0.1631, "step": 4290 }, { "epoch": 0.12716626249482463, "grad_norm": 0.7912857532501221, "learning_rate": 4.901759653683338e-07, "loss": 0.1377, "step": 4300 }, { "epoch": 0.12746199798899863, "grad_norm": 0.868920624256134, "learning_rate": 4.913161746387883e-07, "loss": 0.1716, "step": 4310 }, { "epoch": 0.12775773348317265, "grad_norm": 0.9974510669708252, "learning_rate": 4.924563839092426e-07, "loss": 0.171, "step": 4320 }, { "epoch": 0.12805346897734665, "grad_norm": 1.1904189586639404, "learning_rate": 4.935965931796969e-07, "loss": 0.1888, "step": 4330 }, { "epoch": 0.12834920447152068, "grad_norm": 1.679579734802246, "learning_rate": 4.947368024501513e-07, "loss": 0.1559, "step": 4340 }, { "epoch": 0.12864493996569468, "grad_norm": 1.4915651082992554, "learning_rate": 4.958770117206057e-07, "loss": 0.1627, "step": 4350 }, { "epoch": 0.1289406754598687, "grad_norm": 1.606441855430603, "learning_rate": 4.970172209910601e-07, "loss": 0.164, "step": 4360 }, { "epoch": 0.1292364109540427, "grad_norm": 1.7868328094482422, "learning_rate": 4.981574302615144e-07, "loss": 0.1594, "step": 4370 }, { "epoch": 0.12953214644821673, "grad_norm": 0.922819972038269, "learning_rate": 4.992976395319688e-07, "loss": 0.1551, "step": 4380 }, { "epoch": 0.12982788194239073, "grad_norm": 1.2711663246154785, "learning_rate": 5.004378488024232e-07, "loss": 0.16, "step": 4390 }, { "epoch": 0.13012361743656473, "grad_norm": 2.0443687438964844, "learning_rate": 5.015780580728776e-07, "loss": 0.1508, "step": 4400 }, { "epoch": 0.13041935293073875, "grad_norm": 1.207940936088562, "learning_rate": 5.027182673433319e-07, "loss": 0.1784, "step": 4410 }, { "epoch": 0.13071508842491275, "grad_norm": 1.1327625513076782, "learning_rate": 5.038584766137862e-07, "loss": 0.1585, "step": 4420 }, { "epoch": 0.13101082391908678, "grad_norm": 2.089137077331543, "learning_rate": 5.049986858842407e-07, "loss": 0.1703, "step": 4430 }, { "epoch": 0.13130655941326078, "grad_norm": 0.933725118637085, "learning_rate": 5.06138895154695e-07, "loss": 0.1445, "step": 4440 }, { "epoch": 0.1316022949074348, "grad_norm": 1.344003438949585, "learning_rate": 5.072791044251495e-07, "loss": 0.1494, "step": 4450 }, { "epoch": 0.1318980304016088, "grad_norm": 0.8819056153297424, "learning_rate": 5.084193136956037e-07, "loss": 0.1673, "step": 4460 }, { "epoch": 0.1321937658957828, "grad_norm": 0.8414877653121948, "learning_rate": 5.095595229660582e-07, "loss": 0.1568, "step": 4470 }, { "epoch": 0.13248950138995683, "grad_norm": 1.0812221765518188, "learning_rate": 5.106997322365125e-07, "loss": 0.1613, "step": 4480 }, { "epoch": 0.13278523688413083, "grad_norm": 1.3247190713882446, "learning_rate": 5.118399415069669e-07, "loss": 0.1603, "step": 4490 }, { "epoch": 0.13308097237830485, "grad_norm": 1.0262242555618286, "learning_rate": 5.129801507774212e-07, "loss": 0.1511, "step": 4500 }, { "epoch": 0.13337670787247885, "grad_norm": 0.8682311773300171, "learning_rate": 5.141203600478756e-07, "loss": 0.1739, "step": 4510 }, { "epoch": 0.13367244336665288, "grad_norm": 0.7635599970817566, "learning_rate": 5.1526056931833e-07, "loss": 0.1596, "step": 4520 }, { "epoch": 0.13396817886082688, "grad_norm": 0.8671090006828308, "learning_rate": 5.164007785887844e-07, "loss": 0.1552, "step": 4530 }, { "epoch": 0.13426391435500087, "grad_norm": 0.8990280032157898, "learning_rate": 5.175409878592387e-07, "loss": 0.158, "step": 4540 }, { "epoch": 0.1345596498491749, "grad_norm": 1.414419174194336, "learning_rate": 5.186811971296931e-07, "loss": 0.1371, "step": 4550 }, { "epoch": 0.1348553853433489, "grad_norm": 2.1548197269439697, "learning_rate": 5.198214064001475e-07, "loss": 0.1687, "step": 4560 }, { "epoch": 0.13515112083752293, "grad_norm": 0.6965479254722595, "learning_rate": 5.209616156706019e-07, "loss": 0.1639, "step": 4570 }, { "epoch": 0.13544685633169692, "grad_norm": 1.298904538154602, "learning_rate": 5.221018249410561e-07, "loss": 0.1558, "step": 4580 }, { "epoch": 0.13574259182587095, "grad_norm": 0.9815796613693237, "learning_rate": 5.232420342115106e-07, "loss": 0.1486, "step": 4590 }, { "epoch": 0.13603832732004495, "grad_norm": 0.9483675360679626, "learning_rate": 5.243822434819649e-07, "loss": 0.1421, "step": 4600 }, { "epoch": 0.13633406281421898, "grad_norm": 1.1267789602279663, "learning_rate": 5.255224527524194e-07, "loss": 0.1651, "step": 4610 }, { "epoch": 0.13662979830839297, "grad_norm": 0.9176404476165771, "learning_rate": 5.266626620228736e-07, "loss": 0.1517, "step": 4620 }, { "epoch": 0.13692553380256697, "grad_norm": 0.7991282343864441, "learning_rate": 5.278028712933281e-07, "loss": 0.1539, "step": 4630 }, { "epoch": 0.137221269296741, "grad_norm": 0.9026876091957092, "learning_rate": 5.289430805637824e-07, "loss": 0.1417, "step": 4640 }, { "epoch": 0.137517004790915, "grad_norm": 0.9285138845443726, "learning_rate": 5.300832898342368e-07, "loss": 0.1384, "step": 4650 }, { "epoch": 0.13781274028508902, "grad_norm": 1.1380730867385864, "learning_rate": 5.312234991046912e-07, "loss": 0.1724, "step": 4660 }, { "epoch": 0.13810847577926302, "grad_norm": 1.7909901142120361, "learning_rate": 5.323637083751455e-07, "loss": 0.1654, "step": 4670 }, { "epoch": 0.13840421127343705, "grad_norm": 1.5058112144470215, "learning_rate": 5.335039176455999e-07, "loss": 0.1567, "step": 4680 }, { "epoch": 0.13869994676761105, "grad_norm": 1.0768080949783325, "learning_rate": 5.346441269160543e-07, "loss": 0.1417, "step": 4690 }, { "epoch": 0.13899568226178505, "grad_norm": 1.0593005418777466, "learning_rate": 5.357843361865087e-07, "loss": 0.1399, "step": 4700 }, { "epoch": 0.13929141775595907, "grad_norm": 3.1843345165252686, "learning_rate": 5.36924545456963e-07, "loss": 0.182, "step": 4710 }, { "epoch": 0.13958715325013307, "grad_norm": 1.6038559675216675, "learning_rate": 5.380647547274173e-07, "loss": 0.1647, "step": 4720 }, { "epoch": 0.1398828887443071, "grad_norm": 1.0366215705871582, "learning_rate": 5.392049639978718e-07, "loss": 0.1546, "step": 4730 }, { "epoch": 0.1401786242384811, "grad_norm": 1.2964016199111938, "learning_rate": 5.403451732683261e-07, "loss": 0.1514, "step": 4740 }, { "epoch": 0.14047435973265512, "grad_norm": 0.8407158255577087, "learning_rate": 5.414853825387805e-07, "loss": 0.1374, "step": 4750 }, { "epoch": 0.14077009522682912, "grad_norm": 1.2161564826965332, "learning_rate": 5.426255918092348e-07, "loss": 0.1782, "step": 4760 }, { "epoch": 0.14106583072100312, "grad_norm": 1.1149433851242065, "learning_rate": 5.437658010796893e-07, "loss": 0.1594, "step": 4770 }, { "epoch": 0.14136156621517715, "grad_norm": 1.4779002666473389, "learning_rate": 5.449060103501437e-07, "loss": 0.1655, "step": 4780 }, { "epoch": 0.14165730170935115, "grad_norm": 1.442403793334961, "learning_rate": 5.460462196205981e-07, "loss": 0.134, "step": 4790 }, { "epoch": 0.14195303720352517, "grad_norm": 1.392819881439209, "learning_rate": 5.471864288910524e-07, "loss": 0.154, "step": 4800 }, { "epoch": 0.14224877269769917, "grad_norm": 0.9770697951316833, "learning_rate": 5.483266381615067e-07, "loss": 0.1706, "step": 4810 }, { "epoch": 0.1425445081918732, "grad_norm": 1.3332463502883911, "learning_rate": 5.494668474319612e-07, "loss": 0.1546, "step": 4820 }, { "epoch": 0.1428402436860472, "grad_norm": 2.356201648712158, "learning_rate": 5.506070567024155e-07, "loss": 0.1618, "step": 4830 }, { "epoch": 0.14313597918022122, "grad_norm": 0.9512386918067932, "learning_rate": 5.517472659728699e-07, "loss": 0.1459, "step": 4840 }, { "epoch": 0.14343171467439522, "grad_norm": 2.437241315841675, "learning_rate": 5.528874752433242e-07, "loss": 0.1455, "step": 4850 }, { "epoch": 0.14372745016856922, "grad_norm": 1.3744603395462036, "learning_rate": 5.540276845137787e-07, "loss": 0.1624, "step": 4860 }, { "epoch": 0.14402318566274325, "grad_norm": 1.4248965978622437, "learning_rate": 5.55167893784233e-07, "loss": 0.15, "step": 4870 }, { "epoch": 0.14431892115691725, "grad_norm": 1.0170553922653198, "learning_rate": 5.563081030546873e-07, "loss": 0.1525, "step": 4880 }, { "epoch": 0.14461465665109127, "grad_norm": 1.0061490535736084, "learning_rate": 5.574483123251417e-07, "loss": 0.1498, "step": 4890 }, { "epoch": 0.14491039214526527, "grad_norm": 1.4794628620147705, "learning_rate": 5.585885215955961e-07, "loss": 0.1431, "step": 4900 }, { "epoch": 0.1452061276394393, "grad_norm": 1.370209813117981, "learning_rate": 5.597287308660505e-07, "loss": 0.1816, "step": 4910 }, { "epoch": 0.1455018631336133, "grad_norm": 1.0248627662658691, "learning_rate": 5.608689401365049e-07, "loss": 0.1515, "step": 4920 }, { "epoch": 0.1457975986277873, "grad_norm": 2.763394832611084, "learning_rate": 5.620091494069592e-07, "loss": 0.1501, "step": 4930 }, { "epoch": 0.14609333412196132, "grad_norm": 0.7450131177902222, "learning_rate": 5.631493586774136e-07, "loss": 0.1493, "step": 4940 }, { "epoch": 0.14638906961613532, "grad_norm": 1.2428886890411377, "learning_rate": 5.642895679478679e-07, "loss": 0.1409, "step": 4950 }, { "epoch": 0.14668480511030935, "grad_norm": 1.0189340114593506, "learning_rate": 5.654297772183224e-07, "loss": 0.1679, "step": 4960 }, { "epoch": 0.14698054060448335, "grad_norm": 1.4848960638046265, "learning_rate": 5.665699864887766e-07, "loss": 0.1792, "step": 4970 }, { "epoch": 0.14727627609865737, "grad_norm": 1.0593063831329346, "learning_rate": 5.677101957592311e-07, "loss": 0.1585, "step": 4980 }, { "epoch": 0.14757201159283137, "grad_norm": 1.073595643043518, "learning_rate": 5.688504050296854e-07, "loss": 0.1428, "step": 4990 }, { "epoch": 0.14786774708700537, "grad_norm": 0.9091858267784119, "learning_rate": 5.699906143001399e-07, "loss": 0.1385, "step": 5000 }, { "epoch": 0.1481634825811794, "grad_norm": 0.8420538902282715, "learning_rate": 5.711308235705941e-07, "loss": 0.1486, "step": 5010 }, { "epoch": 0.1484592180753534, "grad_norm": 1.553512692451477, "learning_rate": 5.722710328410486e-07, "loss": 0.1681, "step": 5020 }, { "epoch": 0.14875495356952742, "grad_norm": 1.7290226221084595, "learning_rate": 5.734112421115029e-07, "loss": 0.1475, "step": 5030 }, { "epoch": 0.14905068906370142, "grad_norm": 1.472606897354126, "learning_rate": 5.745514513819573e-07, "loss": 0.1522, "step": 5040 }, { "epoch": 0.14934642455787545, "grad_norm": 1.1150485277175903, "learning_rate": 5.756916606524116e-07, "loss": 0.14, "step": 5050 }, { "epoch": 0.14964216005204944, "grad_norm": 2.433480978012085, "learning_rate": 5.76831869922866e-07, "loss": 0.1589, "step": 5060 }, { "epoch": 0.14993789554622347, "grad_norm": 0.8885801434516907, "learning_rate": 5.779720791933204e-07, "loss": 0.1448, "step": 5070 }, { "epoch": 0.15023363104039747, "grad_norm": 1.0064514875411987, "learning_rate": 5.791122884637748e-07, "loss": 0.148, "step": 5080 }, { "epoch": 0.15052936653457147, "grad_norm": 1.5247752666473389, "learning_rate": 5.802524977342292e-07, "loss": 0.1485, "step": 5090 }, { "epoch": 0.1508251020287455, "grad_norm": 2.0005075931549072, "learning_rate": 5.813927070046835e-07, "loss": 0.1396, "step": 5100 }, { "epoch": 0.1511208375229195, "grad_norm": 1.030068039894104, "learning_rate": 5.825329162751378e-07, "loss": 0.172, "step": 5110 }, { "epoch": 0.15141657301709352, "grad_norm": 0.9804148077964783, "learning_rate": 5.836731255455923e-07, "loss": 0.1644, "step": 5120 }, { "epoch": 0.15171230851126752, "grad_norm": 1.054550290107727, "learning_rate": 5.848133348160466e-07, "loss": 0.1523, "step": 5130 }, { "epoch": 0.15200804400544154, "grad_norm": 0.9367437958717346, "learning_rate": 5.85953544086501e-07, "loss": 0.147, "step": 5140 }, { "epoch": 0.15230377949961554, "grad_norm": 1.3910589218139648, "learning_rate": 5.870937533569553e-07, "loss": 0.1327, "step": 5150 }, { "epoch": 0.15259951499378954, "grad_norm": 1.5173667669296265, "learning_rate": 5.882339626274098e-07, "loss": 0.1579, "step": 5160 }, { "epoch": 0.15289525048796357, "grad_norm": 2.001143217086792, "learning_rate": 5.893741718978641e-07, "loss": 0.1587, "step": 5170 }, { "epoch": 0.15319098598213757, "grad_norm": 1.9634987115859985, "learning_rate": 5.905143811683184e-07, "loss": 0.1484, "step": 5180 }, { "epoch": 0.1534867214763116, "grad_norm": 0.995820939540863, "learning_rate": 5.916545904387728e-07, "loss": 0.1354, "step": 5190 }, { "epoch": 0.1537824569704856, "grad_norm": 1.1883596181869507, "learning_rate": 5.927947997092272e-07, "loss": 0.1468, "step": 5200 }, { "epoch": 0.15407819246465962, "grad_norm": 1.8942376375198364, "learning_rate": 5.939350089796816e-07, "loss": 0.1758, "step": 5210 }, { "epoch": 0.15437392795883362, "grad_norm": 2.087040662765503, "learning_rate": 5.950752182501359e-07, "loss": 0.1747, "step": 5220 }, { "epoch": 0.15466966345300762, "grad_norm": 1.4112640619277954, "learning_rate": 5.962154275205903e-07, "loss": 0.1716, "step": 5230 }, { "epoch": 0.15496539894718164, "grad_norm": 1.394203543663025, "learning_rate": 5.973556367910447e-07, "loss": 0.1492, "step": 5240 }, { "epoch": 0.15526113444135564, "grad_norm": 2.3591346740722656, "learning_rate": 5.984958460614991e-07, "loss": 0.1428, "step": 5250 }, { "epoch": 0.15555686993552967, "grad_norm": 2.444478988647461, "learning_rate": 5.996360553319535e-07, "loss": 0.1819, "step": 5260 }, { "epoch": 0.15585260542970367, "grad_norm": 1.7984718084335327, "learning_rate": 6.007762646024077e-07, "loss": 0.1627, "step": 5270 }, { "epoch": 0.1561483409238777, "grad_norm": 1.4150094985961914, "learning_rate": 6.019164738728622e-07, "loss": 0.1417, "step": 5280 }, { "epoch": 0.1564440764180517, "grad_norm": 0.8409788012504578, "learning_rate": 6.030566831433165e-07, "loss": 0.1289, "step": 5290 }, { "epoch": 0.15673981191222572, "grad_norm": 1.2381224632263184, "learning_rate": 6.04196892413771e-07, "loss": 0.1356, "step": 5300 }, { "epoch": 0.15703554740639972, "grad_norm": 1.283227801322937, "learning_rate": 6.053371016842252e-07, "loss": 0.1359, "step": 5310 }, { "epoch": 0.15733128290057372, "grad_norm": 0.8771860003471375, "learning_rate": 6.064773109546797e-07, "loss": 0.1407, "step": 5320 }, { "epoch": 0.15762701839474774, "grad_norm": 1.296636939048767, "learning_rate": 6.07617520225134e-07, "loss": 0.144, "step": 5330 }, { "epoch": 0.15792275388892174, "grad_norm": 1.1587884426116943, "learning_rate": 6.087577294955884e-07, "loss": 0.1307, "step": 5340 }, { "epoch": 0.15821848938309577, "grad_norm": 1.0065251588821411, "learning_rate": 6.098979387660427e-07, "loss": 0.1464, "step": 5350 }, { "epoch": 0.15851422487726977, "grad_norm": 1.5373650789260864, "learning_rate": 6.110381480364971e-07, "loss": 0.1707, "step": 5360 }, { "epoch": 0.1588099603714438, "grad_norm": 2.1894962787628174, "learning_rate": 6.121783573069515e-07, "loss": 0.1496, "step": 5370 }, { "epoch": 0.1591056958656178, "grad_norm": 1.628888487815857, "learning_rate": 6.133185665774059e-07, "loss": 0.146, "step": 5380 }, { "epoch": 0.1594014313597918, "grad_norm": 1.5682462453842163, "learning_rate": 6.144587758478602e-07, "loss": 0.1374, "step": 5390 }, { "epoch": 0.15969716685396582, "grad_norm": 1.036299228668213, "learning_rate": 6.155989851183146e-07, "loss": 0.1221, "step": 5400 }, { "epoch": 0.15999290234813981, "grad_norm": 1.1619404554367065, "learning_rate": 6.167391943887689e-07, "loss": 0.1625, "step": 5410 }, { "epoch": 0.16028863784231384, "grad_norm": 1.658787488937378, "learning_rate": 6.178794036592234e-07, "loss": 0.1543, "step": 5420 }, { "epoch": 0.16058437333648784, "grad_norm": 1.2527300119400024, "learning_rate": 6.190196129296776e-07, "loss": 0.1469, "step": 5430 }, { "epoch": 0.16088010883066187, "grad_norm": 1.0315215587615967, "learning_rate": 6.201598222001321e-07, "loss": 0.1495, "step": 5440 }, { "epoch": 0.16117584432483587, "grad_norm": 1.1975300312042236, "learning_rate": 6.213000314705864e-07, "loss": 0.1318, "step": 5450 }, { "epoch": 0.16147157981900986, "grad_norm": 2.0197160243988037, "learning_rate": 6.224402407410409e-07, "loss": 0.1656, "step": 5460 }, { "epoch": 0.1617673153131839, "grad_norm": 1.504667043685913, "learning_rate": 6.235804500114952e-07, "loss": 0.1423, "step": 5470 }, { "epoch": 0.1620630508073579, "grad_norm": 1.6819727420806885, "learning_rate": 6.247206592819496e-07, "loss": 0.1459, "step": 5480 }, { "epoch": 0.16235878630153192, "grad_norm": 0.8747183680534363, "learning_rate": 6.258608685524039e-07, "loss": 0.1387, "step": 5490 }, { "epoch": 0.16265452179570591, "grad_norm": 0.8225200176239014, "learning_rate": 6.270010778228583e-07, "loss": 0.1307, "step": 5500 }, { "epoch": 0.16295025728987994, "grad_norm": 1.2582677602767944, "learning_rate": 6.281412870933127e-07, "loss": 0.1596, "step": 5510 }, { "epoch": 0.16324599278405394, "grad_norm": 1.2760018110275269, "learning_rate": 6.29281496363767e-07, "loss": 0.1505, "step": 5520 }, { "epoch": 0.16354172827822797, "grad_norm": 1.298547387123108, "learning_rate": 6.304217056342214e-07, "loss": 0.1619, "step": 5530 }, { "epoch": 0.16383746377240196, "grad_norm": 1.6607297658920288, "learning_rate": 6.315619149046758e-07, "loss": 0.1427, "step": 5540 }, { "epoch": 0.16413319926657596, "grad_norm": 1.16042959690094, "learning_rate": 6.327021241751302e-07, "loss": 0.1233, "step": 5550 }, { "epoch": 0.16442893476075, "grad_norm": 1.3264251947402954, "learning_rate": 6.338423334455845e-07, "loss": 0.1519, "step": 5560 }, { "epoch": 0.164724670254924, "grad_norm": 2.258291721343994, "learning_rate": 6.349825427160388e-07, "loss": 0.1462, "step": 5570 }, { "epoch": 0.16502040574909801, "grad_norm": 1.0035686492919922, "learning_rate": 6.361227519864933e-07, "loss": 0.1291, "step": 5580 }, { "epoch": 0.165316141243272, "grad_norm": 1.776299238204956, "learning_rate": 6.372629612569476e-07, "loss": 0.1409, "step": 5590 }, { "epoch": 0.16561187673744604, "grad_norm": 1.7199896574020386, "learning_rate": 6.384031705274021e-07, "loss": 0.1174, "step": 5600 }, { "epoch": 0.16590761223162004, "grad_norm": 1.897477388381958, "learning_rate": 6.395433797978563e-07, "loss": 0.1443, "step": 5610 }, { "epoch": 0.16620334772579404, "grad_norm": 1.1611740589141846, "learning_rate": 6.406835890683108e-07, "loss": 0.1446, "step": 5620 }, { "epoch": 0.16649908321996806, "grad_norm": 1.4311306476593018, "learning_rate": 6.418237983387651e-07, "loss": 0.1574, "step": 5630 }, { "epoch": 0.16679481871414206, "grad_norm": 0.9748489856719971, "learning_rate": 6.429640076092195e-07, "loss": 0.1357, "step": 5640 }, { "epoch": 0.1670905542083161, "grad_norm": 1.2844891548156738, "learning_rate": 6.441042168796738e-07, "loss": 0.1186, "step": 5650 }, { "epoch": 0.1673862897024901, "grad_norm": 1.088683843612671, "learning_rate": 6.452444261501282e-07, "loss": 0.1628, "step": 5660 }, { "epoch": 0.16768202519666411, "grad_norm": 1.2170013189315796, "learning_rate": 6.463846354205826e-07, "loss": 0.1408, "step": 5670 }, { "epoch": 0.1679777606908381, "grad_norm": 0.8470882773399353, "learning_rate": 6.47524844691037e-07, "loss": 0.1395, "step": 5680 }, { "epoch": 0.1682734961850121, "grad_norm": 0.8847981691360474, "learning_rate": 6.486650539614913e-07, "loss": 0.1336, "step": 5690 }, { "epoch": 0.16856923167918614, "grad_norm": 1.5014325380325317, "learning_rate": 6.498052632319457e-07, "loss": 0.1325, "step": 5700 }, { "epoch": 0.16886496717336014, "grad_norm": 2.359663486480713, "learning_rate": 6.509454725024001e-07, "loss": 0.1459, "step": 5710 }, { "epoch": 0.16916070266753416, "grad_norm": 1.4772907495498657, "learning_rate": 6.520856817728545e-07, "loss": 0.1479, "step": 5720 }, { "epoch": 0.16945643816170816, "grad_norm": 1.1501659154891968, "learning_rate": 6.532258910433087e-07, "loss": 0.1621, "step": 5730 }, { "epoch": 0.1697521736558822, "grad_norm": 0.7903907895088196, "learning_rate": 6.543661003137632e-07, "loss": 0.1402, "step": 5740 }, { "epoch": 0.1700479091500562, "grad_norm": 1.1188033819198608, "learning_rate": 6.555063095842175e-07, "loss": 0.1161, "step": 5750 }, { "epoch": 0.1703436446442302, "grad_norm": 1.772444248199463, "learning_rate": 6.56646518854672e-07, "loss": 0.1527, "step": 5760 }, { "epoch": 0.1706393801384042, "grad_norm": 0.8298978209495544, "learning_rate": 6.577867281251263e-07, "loss": 0.1523, "step": 5770 }, { "epoch": 0.1709351156325782, "grad_norm": 1.640597939491272, "learning_rate": 6.589269373955807e-07, "loss": 0.1576, "step": 5780 }, { "epoch": 0.17123085112675224, "grad_norm": 0.9461125731468201, "learning_rate": 6.60067146666035e-07, "loss": 0.1428, "step": 5790 }, { "epoch": 0.17152658662092624, "grad_norm": 1.0845670700073242, "learning_rate": 6.612073559364894e-07, "loss": 0.1191, "step": 5800 }, { "epoch": 0.17182232211510026, "grad_norm": 1.7876170873641968, "learning_rate": 6.623475652069438e-07, "loss": 0.1672, "step": 5810 }, { "epoch": 0.17211805760927426, "grad_norm": 3.0108423233032227, "learning_rate": 6.634877744773981e-07, "loss": 0.159, "step": 5820 }, { "epoch": 0.1724137931034483, "grad_norm": 1.1886152029037476, "learning_rate": 6.646279837478525e-07, "loss": 0.1334, "step": 5830 }, { "epoch": 0.17270952859762229, "grad_norm": 1.1471554040908813, "learning_rate": 6.657681930183069e-07, "loss": 0.153, "step": 5840 }, { "epoch": 0.17300526409179628, "grad_norm": 1.3881226778030396, "learning_rate": 6.669084022887613e-07, "loss": 0.1347, "step": 5850 }, { "epoch": 0.1733009995859703, "grad_norm": 1.5917078256607056, "learning_rate": 6.680486115592156e-07, "loss": 0.1528, "step": 5860 }, { "epoch": 0.1735967350801443, "grad_norm": 2.9927778244018555, "learning_rate": 6.691888208296699e-07, "loss": 0.1475, "step": 5870 }, { "epoch": 0.17389247057431834, "grad_norm": 2.0300230979919434, "learning_rate": 6.703290301001244e-07, "loss": 0.1334, "step": 5880 }, { "epoch": 0.17418820606849234, "grad_norm": 0.9340474009513855, "learning_rate": 6.714692393705787e-07, "loss": 0.1426, "step": 5890 }, { "epoch": 0.17448394156266636, "grad_norm": 0.6913407444953918, "learning_rate": 6.726094486410332e-07, "loss": 0.1199, "step": 5900 }, { "epoch": 0.17477967705684036, "grad_norm": 1.328738808631897, "learning_rate": 6.737496579114874e-07, "loss": 0.1502, "step": 5910 }, { "epoch": 0.1750754125510144, "grad_norm": 1.7121084928512573, "learning_rate": 6.748898671819419e-07, "loss": 0.1481, "step": 5920 }, { "epoch": 0.17537114804518839, "grad_norm": 1.2023617029190063, "learning_rate": 6.760300764523962e-07, "loss": 0.1476, "step": 5930 }, { "epoch": 0.17566688353936238, "grad_norm": 1.5367422103881836, "learning_rate": 6.771702857228507e-07, "loss": 0.147, "step": 5940 }, { "epoch": 0.1759626190335364, "grad_norm": 1.2594980001449585, "learning_rate": 6.783104949933049e-07, "loss": 0.1156, "step": 5950 }, { "epoch": 0.1762583545277104, "grad_norm": 1.3906440734863281, "learning_rate": 6.794507042637593e-07, "loss": 0.1328, "step": 5960 }, { "epoch": 0.17655409002188444, "grad_norm": 2.131770610809326, "learning_rate": 6.805909135342137e-07, "loss": 0.1502, "step": 5970 }, { "epoch": 0.17684982551605843, "grad_norm": 1.0433008670806885, "learning_rate": 6.817311228046681e-07, "loss": 0.1297, "step": 5980 }, { "epoch": 0.17714556101023246, "grad_norm": 1.3774688243865967, "learning_rate": 6.828713320751224e-07, "loss": 0.1388, "step": 5990 }, { "epoch": 0.17744129650440646, "grad_norm": 0.9477415680885315, "learning_rate": 6.840115413455768e-07, "loss": 0.1359, "step": 6000 }, { "epoch": 0.17773703199858046, "grad_norm": 2.268477439880371, "learning_rate": 6.851517506160312e-07, "loss": 0.1537, "step": 6010 }, { "epoch": 0.17803276749275448, "grad_norm": 1.3220103979110718, "learning_rate": 6.862919598864856e-07, "loss": 0.1501, "step": 6020 }, { "epoch": 0.17832850298692848, "grad_norm": 1.481237530708313, "learning_rate": 6.874321691569398e-07, "loss": 0.1296, "step": 6030 }, { "epoch": 0.1786242384811025, "grad_norm": 2.724547863006592, "learning_rate": 6.885723784273943e-07, "loss": 0.1371, "step": 6040 }, { "epoch": 0.1789199739752765, "grad_norm": 1.575825810432434, "learning_rate": 6.897125876978486e-07, "loss": 0.1258, "step": 6050 }, { "epoch": 0.17921570946945053, "grad_norm": 1.3033298254013062, "learning_rate": 6.908527969683031e-07, "loss": 0.147, "step": 6060 }, { "epoch": 0.17951144496362453, "grad_norm": 1.9762905836105347, "learning_rate": 6.919930062387573e-07, "loss": 0.1581, "step": 6070 }, { "epoch": 0.17980718045779853, "grad_norm": 1.3871872425079346, "learning_rate": 6.931332155092118e-07, "loss": 0.1486, "step": 6080 }, { "epoch": 0.18010291595197256, "grad_norm": 0.8554514646530151, "learning_rate": 6.942734247796662e-07, "loss": 0.1373, "step": 6090 }, { "epoch": 0.18039865144614656, "grad_norm": 0.8202893137931824, "learning_rate": 6.954136340501205e-07, "loss": 0.1219, "step": 6100 }, { "epoch": 0.18069438694032058, "grad_norm": 2.0091145038604736, "learning_rate": 6.96553843320575e-07, "loss": 0.1732, "step": 6110 }, { "epoch": 0.18099012243449458, "grad_norm": 1.80440092086792, "learning_rate": 6.976940525910292e-07, "loss": 0.1317, "step": 6120 }, { "epoch": 0.1812858579286686, "grad_norm": 1.65416419506073, "learning_rate": 6.988342618614837e-07, "loss": 0.1443, "step": 6130 }, { "epoch": 0.1815815934228426, "grad_norm": 1.054284691810608, "learning_rate": 6.99974471131938e-07, "loss": 0.1384, "step": 6140 }, { "epoch": 0.18187732891701663, "grad_norm": 1.1343986988067627, "learning_rate": 7.011146804023925e-07, "loss": 0.1481, "step": 6150 }, { "epoch": 0.18217306441119063, "grad_norm": 1.3499277830123901, "learning_rate": 7.022548896728467e-07, "loss": 0.1435, "step": 6160 }, { "epoch": 0.18246879990536463, "grad_norm": 1.9603607654571533, "learning_rate": 7.033950989433012e-07, "loss": 0.1481, "step": 6170 }, { "epoch": 0.18276453539953866, "grad_norm": 1.280803918838501, "learning_rate": 7.045353082137555e-07, "loss": 0.1462, "step": 6180 }, { "epoch": 0.18306027089371266, "grad_norm": 1.2117339372634888, "learning_rate": 7.056755174842099e-07, "loss": 0.1232, "step": 6190 }, { "epoch": 0.18335600638788668, "grad_norm": 1.114945411682129, "learning_rate": 7.068157267546642e-07, "loss": 0.1319, "step": 6200 }, { "epoch": 0.18365174188206068, "grad_norm": 1.5443572998046875, "learning_rate": 7.079559360251186e-07, "loss": 0.1491, "step": 6210 }, { "epoch": 0.1839474773762347, "grad_norm": 0.8957818150520325, "learning_rate": 7.09096145295573e-07, "loss": 0.1368, "step": 6220 }, { "epoch": 0.1842432128704087, "grad_norm": 1.5410077571868896, "learning_rate": 7.102363545660274e-07, "loss": 0.1465, "step": 6230 }, { "epoch": 0.1845389483645827, "grad_norm": 1.2427817583084106, "learning_rate": 7.113765638364818e-07, "loss": 0.1432, "step": 6240 }, { "epoch": 0.18483468385875673, "grad_norm": 1.0342696905136108, "learning_rate": 7.125167731069361e-07, "loss": 0.1218, "step": 6250 }, { "epoch": 0.18513041935293073, "grad_norm": 2.459137439727783, "learning_rate": 7.136569823773904e-07, "loss": 0.1558, "step": 6260 }, { "epoch": 0.18542615484710476, "grad_norm": 1.364339828491211, "learning_rate": 7.147971916478449e-07, "loss": 0.1552, "step": 6270 }, { "epoch": 0.18572189034127876, "grad_norm": 1.7394558191299438, "learning_rate": 7.159374009182992e-07, "loss": 0.1485, "step": 6280 }, { "epoch": 0.18601762583545278, "grad_norm": 0.9138115644454956, "learning_rate": 7.170776101887536e-07, "loss": 0.1156, "step": 6290 }, { "epoch": 0.18631336132962678, "grad_norm": 1.6907052993774414, "learning_rate": 7.182178194592079e-07, "loss": 0.1053, "step": 6300 }, { "epoch": 0.18660909682380078, "grad_norm": 1.023397445678711, "learning_rate": 7.193580287296624e-07, "loss": 0.1485, "step": 6310 }, { "epoch": 0.1869048323179748, "grad_norm": 1.4928621053695679, "learning_rate": 7.204982380001167e-07, "loss": 0.1503, "step": 6320 }, { "epoch": 0.1872005678121488, "grad_norm": 1.225299596786499, "learning_rate": 7.21638447270571e-07, "loss": 0.1437, "step": 6330 }, { "epoch": 0.18749630330632283, "grad_norm": 1.0719597339630127, "learning_rate": 7.227786565410254e-07, "loss": 0.1291, "step": 6340 }, { "epoch": 0.18779203880049683, "grad_norm": 1.1839184761047363, "learning_rate": 7.239188658114798e-07, "loss": 0.1156, "step": 6350 }, { "epoch": 0.18808777429467086, "grad_norm": 1.191484808921814, "learning_rate": 7.250590750819342e-07, "loss": 0.1385, "step": 6360 }, { "epoch": 0.18838350978884486, "grad_norm": 1.3959529399871826, "learning_rate": 7.261992843523886e-07, "loss": 0.1526, "step": 6370 }, { "epoch": 0.18867924528301888, "grad_norm": 1.6001911163330078, "learning_rate": 7.273394936228429e-07, "loss": 0.1612, "step": 6380 }, { "epoch": 0.18897498077719288, "grad_norm": 1.001173973083496, "learning_rate": 7.284797028932973e-07, "loss": 0.1185, "step": 6390 }, { "epoch": 0.18927071627136688, "grad_norm": 0.9825140833854675, "learning_rate": 7.296199121637517e-07, "loss": 0.1129, "step": 6400 }, { "epoch": 0.1895664517655409, "grad_norm": 1.1056257486343384, "learning_rate": 7.307601214342061e-07, "loss": 0.1542, "step": 6410 }, { "epoch": 0.1898621872597149, "grad_norm": 1.2399442195892334, "learning_rate": 7.319003307046603e-07, "loss": 0.1481, "step": 6420 }, { "epoch": 0.19015792275388893, "grad_norm": 1.3106173276901245, "learning_rate": 7.330405399751148e-07, "loss": 0.154, "step": 6430 }, { "epoch": 0.19045365824806293, "grad_norm": 1.8025455474853516, "learning_rate": 7.341807492455691e-07, "loss": 0.1331, "step": 6440 }, { "epoch": 0.19074939374223696, "grad_norm": 1.368379831314087, "learning_rate": 7.353209585160236e-07, "loss": 0.1341, "step": 6450 }, { "epoch": 0.19104512923641095, "grad_norm": 1.4774243831634521, "learning_rate": 7.364611677864778e-07, "loss": 0.1356, "step": 6460 }, { "epoch": 0.19134086473058495, "grad_norm": 0.7318703532218933, "learning_rate": 7.376013770569323e-07, "loss": 0.1519, "step": 6470 }, { "epoch": 0.19163660022475898, "grad_norm": 1.2509485483169556, "learning_rate": 7.387415863273866e-07, "loss": 0.1236, "step": 6480 }, { "epoch": 0.19193233571893298, "grad_norm": 1.2583553791046143, "learning_rate": 7.39881795597841e-07, "loss": 0.1421, "step": 6490 }, { "epoch": 0.192228071213107, "grad_norm": 1.1174026727676392, "learning_rate": 7.410220048682953e-07, "loss": 0.1213, "step": 6500 }, { "epoch": 0.192523806707281, "grad_norm": 2.2761952877044678, "learning_rate": 7.421622141387497e-07, "loss": 0.1397, "step": 6510 }, { "epoch": 0.19281954220145503, "grad_norm": 1.1519792079925537, "learning_rate": 7.433024234092041e-07, "loss": 0.1366, "step": 6520 }, { "epoch": 0.19311527769562903, "grad_norm": 1.8464876413345337, "learning_rate": 7.444426326796585e-07, "loss": 0.1293, "step": 6530 }, { "epoch": 0.19341101318980303, "grad_norm": 1.2872123718261719, "learning_rate": 7.455828419501129e-07, "loss": 0.1272, "step": 6540 }, { "epoch": 0.19370674868397705, "grad_norm": 1.532799482345581, "learning_rate": 7.467230512205672e-07, "loss": 0.1163, "step": 6550 }, { "epoch": 0.19400248417815105, "grad_norm": 1.7630906105041504, "learning_rate": 7.478632604910215e-07, "loss": 0.1671, "step": 6560 }, { "epoch": 0.19429821967232508, "grad_norm": 0.8536851406097412, "learning_rate": 7.49003469761476e-07, "loss": 0.1311, "step": 6570 }, { "epoch": 0.19459395516649908, "grad_norm": 2.388028860092163, "learning_rate": 7.501436790319303e-07, "loss": 0.146, "step": 6580 }, { "epoch": 0.1948896906606731, "grad_norm": 1.9139297008514404, "learning_rate": 7.512838883023847e-07, "loss": 0.1094, "step": 6590 }, { "epoch": 0.1951854261548471, "grad_norm": 1.5247507095336914, "learning_rate": 7.52424097572839e-07, "loss": 0.1275, "step": 6600 }, { "epoch": 0.19548116164902113, "grad_norm": 1.9543218612670898, "learning_rate": 7.535643068432935e-07, "loss": 0.1527, "step": 6610 }, { "epoch": 0.19577689714319513, "grad_norm": 1.5427926778793335, "learning_rate": 7.547045161137478e-07, "loss": 0.1359, "step": 6620 }, { "epoch": 0.19607263263736913, "grad_norm": 2.245392322540283, "learning_rate": 7.558447253842022e-07, "loss": 0.1433, "step": 6630 }, { "epoch": 0.19636836813154315, "grad_norm": 1.3030879497528076, "learning_rate": 7.569849346546565e-07, "loss": 0.133, "step": 6640 }, { "epoch": 0.19666410362571715, "grad_norm": 1.8450733423233032, "learning_rate": 7.581251439251109e-07, "loss": 0.1211, "step": 6650 }, { "epoch": 0.19695983911989118, "grad_norm": 1.7222923040390015, "learning_rate": 7.592653531955653e-07, "loss": 0.1497, "step": 6660 }, { "epoch": 0.19725557461406518, "grad_norm": 2.06691575050354, "learning_rate": 7.604055624660196e-07, "loss": 0.1536, "step": 6670 }, { "epoch": 0.1975513101082392, "grad_norm": 1.4978983402252197, "learning_rate": 7.61545771736474e-07, "loss": 0.1464, "step": 6680 }, { "epoch": 0.1978470456024132, "grad_norm": 1.1046346426010132, "learning_rate": 7.626859810069284e-07, "loss": 0.1142, "step": 6690 }, { "epoch": 0.1981427810965872, "grad_norm": 1.430648922920227, "learning_rate": 7.638261902773828e-07, "loss": 0.1041, "step": 6700 }, { "epoch": 0.19843851659076123, "grad_norm": 1.7914355993270874, "learning_rate": 7.649663995478372e-07, "loss": 0.1542, "step": 6710 }, { "epoch": 0.19873425208493523, "grad_norm": 1.2829047441482544, "learning_rate": 7.661066088182914e-07, "loss": 0.1221, "step": 6720 }, { "epoch": 0.19902998757910925, "grad_norm": 1.6639484167099, "learning_rate": 7.672468180887459e-07, "loss": 0.1283, "step": 6730 }, { "epoch": 0.19932572307328325, "grad_norm": 1.2000651359558105, "learning_rate": 7.683870273592002e-07, "loss": 0.1246, "step": 6740 }, { "epoch": 0.19962145856745728, "grad_norm": 2.1768412590026855, "learning_rate": 7.695272366296547e-07, "loss": 0.1433, "step": 6750 }, { "epoch": 0.19991719406163128, "grad_norm": 1.2174913883209229, "learning_rate": 7.706674459001089e-07, "loss": 0.1459, "step": 6760 }, { "epoch": 0.20021292955580527, "grad_norm": 1.430829644203186, "learning_rate": 7.718076551705634e-07, "loss": 0.1372, "step": 6770 }, { "epoch": 0.2005086650499793, "grad_norm": 1.6399954557418823, "learning_rate": 7.729478644410177e-07, "loss": 0.1211, "step": 6780 }, { "epoch": 0.2008044005441533, "grad_norm": 1.4942022562026978, "learning_rate": 7.740880737114721e-07, "loss": 0.1338, "step": 6790 }, { "epoch": 0.20110013603832733, "grad_norm": 0.8188782930374146, "learning_rate": 7.752282829819264e-07, "loss": 0.1061, "step": 6800 }, { "epoch": 0.20139587153250132, "grad_norm": 2.4200401306152344, "learning_rate": 7.763684922523808e-07, "loss": 0.1361, "step": 6810 }, { "epoch": 0.20169160702667535, "grad_norm": 1.736870527267456, "learning_rate": 7.775087015228352e-07, "loss": 0.1367, "step": 6820 }, { "epoch": 0.20198734252084935, "grad_norm": 1.2615869045257568, "learning_rate": 7.786489107932896e-07, "loss": 0.1428, "step": 6830 }, { "epoch": 0.20228307801502338, "grad_norm": 1.985583782196045, "learning_rate": 7.79789120063744e-07, "loss": 0.1262, "step": 6840 }, { "epoch": 0.20257881350919738, "grad_norm": 1.095902681350708, "learning_rate": 7.809293293341983e-07, "loss": 0.1163, "step": 6850 }, { "epoch": 0.20287454900337137, "grad_norm": 2.0978920459747314, "learning_rate": 7.820695386046527e-07, "loss": 0.1553, "step": 6860 }, { "epoch": 0.2031702844975454, "grad_norm": 1.4026120901107788, "learning_rate": 7.832097478751071e-07, "loss": 0.1428, "step": 6870 }, { "epoch": 0.2034660199917194, "grad_norm": 1.9312500953674316, "learning_rate": 7.843499571455613e-07, "loss": 0.143, "step": 6880 }, { "epoch": 0.20376175548589343, "grad_norm": 1.6838899850845337, "learning_rate": 7.854901664160158e-07, "loss": 0.1376, "step": 6890 }, { "epoch": 0.20405749098006742, "grad_norm": 1.9161322116851807, "learning_rate": 7.866303756864701e-07, "loss": 0.1115, "step": 6900 }, { "epoch": 0.20435322647424145, "grad_norm": 1.6456317901611328, "learning_rate": 7.877705849569246e-07, "loss": 0.1429, "step": 6910 }, { "epoch": 0.20464896196841545, "grad_norm": 2.212069272994995, "learning_rate": 7.889107942273789e-07, "loss": 0.1301, "step": 6920 }, { "epoch": 0.20494469746258945, "grad_norm": 1.4865992069244385, "learning_rate": 7.900510034978333e-07, "loss": 0.1339, "step": 6930 }, { "epoch": 0.20524043295676347, "grad_norm": 1.5325331687927246, "learning_rate": 7.911912127682876e-07, "loss": 0.1275, "step": 6940 }, { "epoch": 0.20553616845093747, "grad_norm": 1.2484568357467651, "learning_rate": 7.92331422038742e-07, "loss": 0.127, "step": 6950 }, { "epoch": 0.2058319039451115, "grad_norm": 2.1086292266845703, "learning_rate": 7.934716313091964e-07, "loss": 0.1409, "step": 6960 }, { "epoch": 0.2061276394392855, "grad_norm": 1.4028568267822266, "learning_rate": 7.946118405796507e-07, "loss": 0.134, "step": 6970 }, { "epoch": 0.20642337493345952, "grad_norm": 2.035126209259033, "learning_rate": 7.957520498501051e-07, "loss": 0.1335, "step": 6980 }, { "epoch": 0.20671911042763352, "grad_norm": 1.4748485088348389, "learning_rate": 7.968922591205595e-07, "loss": 0.1308, "step": 6990 }, { "epoch": 0.20701484592180752, "grad_norm": 1.4076032638549805, "learning_rate": 7.980324683910139e-07, "loss": 0.1127, "step": 7000 }, { "epoch": 0.20731058141598155, "grad_norm": 1.3056514263153076, "learning_rate": 7.991726776614682e-07, "loss": 0.1419, "step": 7010 }, { "epoch": 0.20760631691015555, "grad_norm": 0.814172625541687, "learning_rate": 8.003128869319226e-07, "loss": 0.1361, "step": 7020 }, { "epoch": 0.20790205240432957, "grad_norm": 0.8338677287101746, "learning_rate": 8.01453096202377e-07, "loss": 0.136, "step": 7030 }, { "epoch": 0.20819778789850357, "grad_norm": 3.5791430473327637, "learning_rate": 8.025933054728313e-07, "loss": 0.1183, "step": 7040 }, { "epoch": 0.2084935233926776, "grad_norm": 1.6046466827392578, "learning_rate": 8.037335147432858e-07, "loss": 0.1275, "step": 7050 }, { "epoch": 0.2087892588868516, "grad_norm": 1.9606467485427856, "learning_rate": 8.0487372401374e-07, "loss": 0.1599, "step": 7060 }, { "epoch": 0.20908499438102562, "grad_norm": 1.888303279876709, "learning_rate": 8.060139332841945e-07, "loss": 0.1498, "step": 7070 }, { "epoch": 0.20938072987519962, "grad_norm": 0.9637253880500793, "learning_rate": 8.071541425546488e-07, "loss": 0.143, "step": 7080 }, { "epoch": 0.20967646536937362, "grad_norm": 1.9349642992019653, "learning_rate": 8.082943518251033e-07, "loss": 0.1146, "step": 7090 }, { "epoch": 0.20997220086354765, "grad_norm": 1.7787822484970093, "learning_rate": 8.094345610955575e-07, "loss": 0.1137, "step": 7100 }, { "epoch": 0.21026793635772165, "grad_norm": 1.9680302143096924, "learning_rate": 8.105747703660119e-07, "loss": 0.153, "step": 7110 }, { "epoch": 0.21056367185189567, "grad_norm": 1.2216789722442627, "learning_rate": 8.117149796364663e-07, "loss": 0.1496, "step": 7120 }, { "epoch": 0.21085940734606967, "grad_norm": 1.3003416061401367, "learning_rate": 8.128551889069207e-07, "loss": 0.1294, "step": 7130 }, { "epoch": 0.2111551428402437, "grad_norm": 1.6643766164779663, "learning_rate": 8.13995398177375e-07, "loss": 0.1473, "step": 7140 }, { "epoch": 0.2114508783344177, "grad_norm": 1.792424201965332, "learning_rate": 8.151356074478294e-07, "loss": 0.1043, "step": 7150 }, { "epoch": 0.2117466138285917, "grad_norm": 2.026991605758667, "learning_rate": 8.162758167182838e-07, "loss": 0.1479, "step": 7160 }, { "epoch": 0.21204234932276572, "grad_norm": 1.1932443380355835, "learning_rate": 8.174160259887382e-07, "loss": 0.1304, "step": 7170 }, { "epoch": 0.21233808481693972, "grad_norm": 1.0517022609710693, "learning_rate": 8.185562352591924e-07, "loss": 0.1242, "step": 7180 }, { "epoch": 0.21263382031111375, "grad_norm": 1.6938865184783936, "learning_rate": 8.196964445296469e-07, "loss": 0.1399, "step": 7190 }, { "epoch": 0.21292955580528775, "grad_norm": 1.1221535205841064, "learning_rate": 8.208366538001012e-07, "loss": 0.1115, "step": 7200 }, { "epoch": 0.21322529129946177, "grad_norm": 2.272653102874756, "learning_rate": 8.219768630705557e-07, "loss": 0.1406, "step": 7210 }, { "epoch": 0.21352102679363577, "grad_norm": 0.8148337602615356, "learning_rate": 8.2311707234101e-07, "loss": 0.1278, "step": 7220 }, { "epoch": 0.21381676228780977, "grad_norm": 1.3478806018829346, "learning_rate": 8.242572816114644e-07, "loss": 0.1472, "step": 7230 }, { "epoch": 0.2141124977819838, "grad_norm": 1.2515833377838135, "learning_rate": 8.253974908819187e-07, "loss": 0.1176, "step": 7240 }, { "epoch": 0.2144082332761578, "grad_norm": 2.4278624057769775, "learning_rate": 8.265377001523732e-07, "loss": 0.122, "step": 7250 }, { "epoch": 0.21470396877033182, "grad_norm": 1.6319884061813354, "learning_rate": 8.276779094228275e-07, "loss": 0.1271, "step": 7260 }, { "epoch": 0.21499970426450582, "grad_norm": 1.4480379819869995, "learning_rate": 8.288181186932818e-07, "loss": 0.1409, "step": 7270 }, { "epoch": 0.21529543975867985, "grad_norm": 1.3128684759140015, "learning_rate": 8.299583279637362e-07, "loss": 0.1313, "step": 7280 }, { "epoch": 0.21559117525285384, "grad_norm": 1.4230341911315918, "learning_rate": 8.310985372341906e-07, "loss": 0.1068, "step": 7290 }, { "epoch": 0.21588691074702787, "grad_norm": 1.181982159614563, "learning_rate": 8.32238746504645e-07, "loss": 0.1092, "step": 7300 }, { "epoch": 0.21618264624120187, "grad_norm": 1.4338572025299072, "learning_rate": 8.333789557750993e-07, "loss": 0.1305, "step": 7310 }, { "epoch": 0.21647838173537587, "grad_norm": 2.2080602645874023, "learning_rate": 8.345191650455537e-07, "loss": 0.1371, "step": 7320 }, { "epoch": 0.2167741172295499, "grad_norm": 1.32375168800354, "learning_rate": 8.356593743160081e-07, "loss": 0.1269, "step": 7330 }, { "epoch": 0.2170698527237239, "grad_norm": 1.0531314611434937, "learning_rate": 8.367995835864624e-07, "loss": 0.1302, "step": 7340 }, { "epoch": 0.21736558821789792, "grad_norm": 1.7876640558242798, "learning_rate": 8.379397928569169e-07, "loss": 0.1071, "step": 7350 }, { "epoch": 0.21766132371207192, "grad_norm": 1.5347601175308228, "learning_rate": 8.390800021273711e-07, "loss": 0.1388, "step": 7360 }, { "epoch": 0.21795705920624595, "grad_norm": 1.4226131439208984, "learning_rate": 8.402202113978256e-07, "loss": 0.1272, "step": 7370 }, { "epoch": 0.21825279470041994, "grad_norm": 2.1696348190307617, "learning_rate": 8.4136042066828e-07, "loss": 0.1446, "step": 7380 }, { "epoch": 0.21854853019459394, "grad_norm": 0.9628176689147949, "learning_rate": 8.425006299387344e-07, "loss": 0.1068, "step": 7390 }, { "epoch": 0.21884426568876797, "grad_norm": 1.7338874340057373, "learning_rate": 8.436408392091887e-07, "loss": 0.1131, "step": 7400 }, { "epoch": 0.21914000118294197, "grad_norm": 1.632494568824768, "learning_rate": 8.44781048479643e-07, "loss": 0.1347, "step": 7410 }, { "epoch": 0.219435736677116, "grad_norm": 1.368071436882019, "learning_rate": 8.459212577500975e-07, "loss": 0.1391, "step": 7420 }, { "epoch": 0.21973147217129, "grad_norm": 2.098879337310791, "learning_rate": 8.470614670205518e-07, "loss": 0.1213, "step": 7430 }, { "epoch": 0.22002720766546402, "grad_norm": 1.7152029275894165, "learning_rate": 8.482016762910062e-07, "loss": 0.1174, "step": 7440 }, { "epoch": 0.22032294315963802, "grad_norm": 1.0671861171722412, "learning_rate": 8.493418855614605e-07, "loss": 0.118, "step": 7450 }, { "epoch": 0.22061867865381202, "grad_norm": 1.9450438022613525, "learning_rate": 8.50482094831915e-07, "loss": 0.133, "step": 7460 }, { "epoch": 0.22091441414798604, "grad_norm": 1.4092713594436646, "learning_rate": 8.516223041023693e-07, "loss": 0.1307, "step": 7470 }, { "epoch": 0.22121014964216004, "grad_norm": 1.6616206169128418, "learning_rate": 8.527625133728238e-07, "loss": 0.1218, "step": 7480 }, { "epoch": 0.22150588513633407, "grad_norm": 0.8613747358322144, "learning_rate": 8.53902722643278e-07, "loss": 0.1367, "step": 7490 }, { "epoch": 0.22180162063050807, "grad_norm": 2.1613974571228027, "learning_rate": 8.550429319137324e-07, "loss": 0.1113, "step": 7500 }, { "epoch": 0.2220973561246821, "grad_norm": 2.3061294555664062, "learning_rate": 8.561831411841868e-07, "loss": 0.1413, "step": 7510 }, { "epoch": 0.2223930916188561, "grad_norm": 2.220860242843628, "learning_rate": 8.573233504546412e-07, "loss": 0.1343, "step": 7520 }, { "epoch": 0.22268882711303012, "grad_norm": 1.4612038135528564, "learning_rate": 8.584635597250955e-07, "loss": 0.1195, "step": 7530 }, { "epoch": 0.22298456260720412, "grad_norm": 1.588279366493225, "learning_rate": 8.596037689955499e-07, "loss": 0.115, "step": 7540 }, { "epoch": 0.22328029810137812, "grad_norm": 2.2256174087524414, "learning_rate": 8.607439782660043e-07, "loss": 0.1044, "step": 7550 }, { "epoch": 0.22357603359555214, "grad_norm": 1.292157530784607, "learning_rate": 8.618841875364587e-07, "loss": 0.1413, "step": 7560 }, { "epoch": 0.22387176908972614, "grad_norm": 1.246085286140442, "learning_rate": 8.630243968069129e-07, "loss": 0.1475, "step": 7570 }, { "epoch": 0.22416750458390017, "grad_norm": 2.363421678543091, "learning_rate": 8.641646060773674e-07, "loss": 0.1144, "step": 7580 }, { "epoch": 0.22446324007807417, "grad_norm": 1.0514531135559082, "learning_rate": 8.653048153478217e-07, "loss": 0.107, "step": 7590 }, { "epoch": 0.2247589755722482, "grad_norm": 1.0833351612091064, "learning_rate": 8.664450246182762e-07, "loss": 0.1249, "step": 7600 }, { "epoch": 0.2250547110664222, "grad_norm": 1.9848440885543823, "learning_rate": 8.675852338887304e-07, "loss": 0.1453, "step": 7610 }, { "epoch": 0.2253504465605962, "grad_norm": 1.1370034217834473, "learning_rate": 8.687254431591849e-07, "loss": 0.1201, "step": 7620 }, { "epoch": 0.22564618205477022, "grad_norm": 3.69091796875, "learning_rate": 8.698656524296392e-07, "loss": 0.1297, "step": 7630 }, { "epoch": 0.22594191754894422, "grad_norm": 1.3632687330245972, "learning_rate": 8.710058617000936e-07, "loss": 0.1197, "step": 7640 }, { "epoch": 0.22623765304311824, "grad_norm": 2.526790142059326, "learning_rate": 8.72146070970548e-07, "loss": 0.1145, "step": 7650 }, { "epoch": 0.22653338853729224, "grad_norm": 1.2337329387664795, "learning_rate": 8.732862802410023e-07, "loss": 0.1465, "step": 7660 }, { "epoch": 0.22682912403146627, "grad_norm": 1.4442672729492188, "learning_rate": 8.744264895114567e-07, "loss": 0.1328, "step": 7670 }, { "epoch": 0.22712485952564027, "grad_norm": 1.3283988237380981, "learning_rate": 8.755666987819111e-07, "loss": 0.1102, "step": 7680 }, { "epoch": 0.22742059501981426, "grad_norm": 1.6577017307281494, "learning_rate": 8.767069080523655e-07, "loss": 0.132, "step": 7690 }, { "epoch": 0.2277163305139883, "grad_norm": 0.9297200441360474, "learning_rate": 8.778471173228198e-07, "loss": 0.1149, "step": 7700 }, { "epoch": 0.2280120660081623, "grad_norm": 0.7492542266845703, "learning_rate": 8.789873265932742e-07, "loss": 0.1243, "step": 7710 }, { "epoch": 0.22830780150233632, "grad_norm": 1.9753133058547974, "learning_rate": 8.801275358637286e-07, "loss": 0.1419, "step": 7720 }, { "epoch": 0.22860353699651031, "grad_norm": 1.547074317932129, "learning_rate": 8.812677451341829e-07, "loss": 0.1385, "step": 7730 }, { "epoch": 0.22889927249068434, "grad_norm": 1.9651073217391968, "learning_rate": 8.824079544046373e-07, "loss": 0.1229, "step": 7740 }, { "epoch": 0.22919500798485834, "grad_norm": 2.282989263534546, "learning_rate": 8.835481636750916e-07, "loss": 0.1191, "step": 7750 }, { "epoch": 0.22949074347903237, "grad_norm": 1.749074101448059, "learning_rate": 8.846883729455461e-07, "loss": 0.1389, "step": 7760 }, { "epoch": 0.22978647897320637, "grad_norm": 2.0893077850341797, "learning_rate": 8.858285822160004e-07, "loss": 0.1321, "step": 7770 }, { "epoch": 0.23008221446738036, "grad_norm": 1.427294373512268, "learning_rate": 8.869687914864548e-07, "loss": 0.1213, "step": 7780 }, { "epoch": 0.2303779499615544, "grad_norm": 1.4131604433059692, "learning_rate": 8.881090007569091e-07, "loss": 0.1144, "step": 7790 }, { "epoch": 0.2306736854557284, "grad_norm": 1.2595775127410889, "learning_rate": 8.892492100273635e-07, "loss": 0.1267, "step": 7800 }, { "epoch": 0.23096942094990242, "grad_norm": 2.5150020122528076, "learning_rate": 8.903894192978179e-07, "loss": 0.1287, "step": 7810 }, { "epoch": 0.23126515644407641, "grad_norm": 1.7936137914657593, "learning_rate": 8.915296285682723e-07, "loss": 0.1241, "step": 7820 }, { "epoch": 0.23156089193825044, "grad_norm": 2.0709800720214844, "learning_rate": 8.926698378387266e-07, "loss": 0.1398, "step": 7830 }, { "epoch": 0.23185662743242444, "grad_norm": 2.3739335536956787, "learning_rate": 8.93810047109181e-07, "loss": 0.1322, "step": 7840 }, { "epoch": 0.23215236292659844, "grad_norm": 1.9060245752334595, "learning_rate": 8.949502563796354e-07, "loss": 0.1098, "step": 7850 }, { "epoch": 0.23244809842077246, "grad_norm": 1.7308197021484375, "learning_rate": 8.960904656500898e-07, "loss": 0.1369, "step": 7860 }, { "epoch": 0.23274383391494646, "grad_norm": 1.7192291021347046, "learning_rate": 8.97230674920544e-07, "loss": 0.1288, "step": 7870 }, { "epoch": 0.2330395694091205, "grad_norm": 1.5656139850616455, "learning_rate": 8.983708841909985e-07, "loss": 0.1147, "step": 7880 }, { "epoch": 0.2333353049032945, "grad_norm": 1.7208844423294067, "learning_rate": 8.995110934614528e-07, "loss": 0.1154, "step": 7890 }, { "epoch": 0.23363104039746851, "grad_norm": 1.0556387901306152, "learning_rate": 9.006513027319073e-07, "loss": 0.099, "step": 7900 }, { "epoch": 0.2339267758916425, "grad_norm": 1.6001899242401123, "learning_rate": 9.017915120023615e-07, "loss": 0.1357, "step": 7910 }, { "epoch": 0.2342225113858165, "grad_norm": 1.526942491531372, "learning_rate": 9.02931721272816e-07, "loss": 0.1295, "step": 7920 }, { "epoch": 0.23451824687999054, "grad_norm": 1.0318199396133423, "learning_rate": 9.040719305432703e-07, "loss": 0.1102, "step": 7930 }, { "epoch": 0.23481398237416454, "grad_norm": 2.1449286937713623, "learning_rate": 9.052121398137248e-07, "loss": 0.1112, "step": 7940 }, { "epoch": 0.23510971786833856, "grad_norm": 1.370117425918579, "learning_rate": 9.06352349084179e-07, "loss": 0.1096, "step": 7950 }, { "epoch": 0.23540545336251256, "grad_norm": 1.0474743843078613, "learning_rate": 9.074925583546334e-07, "loss": 0.1162, "step": 7960 }, { "epoch": 0.2357011888566866, "grad_norm": 2.378169059753418, "learning_rate": 9.086327676250878e-07, "loss": 0.1351, "step": 7970 }, { "epoch": 0.2359969243508606, "grad_norm": 1.4412800073623657, "learning_rate": 9.097729768955422e-07, "loss": 0.1074, "step": 7980 }, { "epoch": 0.2362926598450346, "grad_norm": 1.0916314125061035, "learning_rate": 9.109131861659966e-07, "loss": 0.1027, "step": 7990 }, { "epoch": 0.2365883953392086, "grad_norm": 0.993654727935791, "learning_rate": 9.120533954364509e-07, "loss": 0.0946, "step": 8000 }, { "epoch": 0.2368841308333826, "grad_norm": 2.466583251953125, "learning_rate": 9.131936047069053e-07, "loss": 0.1472, "step": 8010 }, { "epoch": 0.23717986632755664, "grad_norm": 1.637739658355713, "learning_rate": 9.143338139773597e-07, "loss": 0.1345, "step": 8020 }, { "epoch": 0.23747560182173064, "grad_norm": 1.4979522228240967, "learning_rate": 9.15474023247814e-07, "loss": 0.1354, "step": 8030 }, { "epoch": 0.23777133731590466, "grad_norm": 1.3456367254257202, "learning_rate": 9.166142325182684e-07, "loss": 0.1204, "step": 8040 }, { "epoch": 0.23806707281007866, "grad_norm": 1.3733314275741577, "learning_rate": 9.177544417887227e-07, "loss": 0.0904, "step": 8050 }, { "epoch": 0.2383628083042527, "grad_norm": 1.2211251258850098, "learning_rate": 9.188946510591772e-07, "loss": 0.1199, "step": 8060 }, { "epoch": 0.2386585437984267, "grad_norm": 2.6305086612701416, "learning_rate": 9.200348603296315e-07, "loss": 0.1307, "step": 8070 }, { "epoch": 0.23895427929260069, "grad_norm": 1.4602367877960205, "learning_rate": 9.211750696000859e-07, "loss": 0.133, "step": 8080 }, { "epoch": 0.2392500147867747, "grad_norm": 1.5474802255630493, "learning_rate": 9.223152788705402e-07, "loss": 0.1222, "step": 8090 }, { "epoch": 0.2395457502809487, "grad_norm": 1.6191998720169067, "learning_rate": 9.234554881409946e-07, "loss": 0.1033, "step": 8100 }, { "epoch": 0.23984148577512274, "grad_norm": 1.2839703559875488, "learning_rate": 9.24595697411449e-07, "loss": 0.1294, "step": 8110 }, { "epoch": 0.24013722126929674, "grad_norm": 1.7171077728271484, "learning_rate": 9.257359066819033e-07, "loss": 0.1232, "step": 8120 }, { "epoch": 0.24043295676347076, "grad_norm": 1.398793339729309, "learning_rate": 9.268761159523577e-07, "loss": 0.111, "step": 8130 }, { "epoch": 0.24072869225764476, "grad_norm": 1.7937556505203247, "learning_rate": 9.280163252228121e-07, "loss": 0.1159, "step": 8140 }, { "epoch": 0.24102442775181876, "grad_norm": 2.106186866760254, "learning_rate": 9.291565344932665e-07, "loss": 0.0979, "step": 8150 }, { "epoch": 0.24132016324599279, "grad_norm": 2.798097610473633, "learning_rate": 9.302967437637209e-07, "loss": 0.1308, "step": 8160 }, { "epoch": 0.24161589874016678, "grad_norm": 1.408564805984497, "learning_rate": 9.314369530341752e-07, "loss": 0.1238, "step": 8170 }, { "epoch": 0.2419116342343408, "grad_norm": 2.1372342109680176, "learning_rate": 9.325771623046296e-07, "loss": 0.1185, "step": 8180 }, { "epoch": 0.2422073697285148, "grad_norm": 1.5709203481674194, "learning_rate": 9.337173715750839e-07, "loss": 0.1079, "step": 8190 }, { "epoch": 0.24250310522268884, "grad_norm": 2.1187212467193604, "learning_rate": 9.348575808455384e-07, "loss": 0.0964, "step": 8200 }, { "epoch": 0.24279884071686283, "grad_norm": 2.4217634201049805, "learning_rate": 9.359977901159926e-07, "loss": 0.13, "step": 8210 }, { "epoch": 0.24309457621103686, "grad_norm": 1.3927943706512451, "learning_rate": 9.371379993864471e-07, "loss": 0.1165, "step": 8220 }, { "epoch": 0.24339031170521086, "grad_norm": 1.951878547668457, "learning_rate": 9.382782086569014e-07, "loss": 0.1226, "step": 8230 }, { "epoch": 0.24368604719938486, "grad_norm": 1.8467808961868286, "learning_rate": 9.394184179273559e-07, "loss": 0.115, "step": 8240 }, { "epoch": 0.24398178269355889, "grad_norm": 1.329264521598816, "learning_rate": 9.405586271978101e-07, "loss": 0.098, "step": 8250 }, { "epoch": 0.24427751818773288, "grad_norm": 1.9512758255004883, "learning_rate": 9.416988364682645e-07, "loss": 0.1285, "step": 8260 }, { "epoch": 0.2445732536819069, "grad_norm": 1.9040907621383667, "learning_rate": 9.428390457387189e-07, "loss": 0.1199, "step": 8270 }, { "epoch": 0.2448689891760809, "grad_norm": 1.5763345956802368, "learning_rate": 9.439792550091733e-07, "loss": 0.1133, "step": 8280 }, { "epoch": 0.24516472467025494, "grad_norm": 1.8056238889694214, "learning_rate": 9.451194642796276e-07, "loss": 0.1205, "step": 8290 }, { "epoch": 0.24546046016442893, "grad_norm": 1.109251618385315, "learning_rate": 9.46259673550082e-07, "loss": 0.102, "step": 8300 }, { "epoch": 0.24575619565860293, "grad_norm": 2.360128402709961, "learning_rate": 9.473998828205364e-07, "loss": 0.1384, "step": 8310 }, { "epoch": 0.24605193115277696, "grad_norm": 1.7751072645187378, "learning_rate": 9.485400920909908e-07, "loss": 0.1065, "step": 8320 }, { "epoch": 0.24634766664695096, "grad_norm": 1.2390172481536865, "learning_rate": 9.49680301361445e-07, "loss": 0.1097, "step": 8330 }, { "epoch": 0.24664340214112498, "grad_norm": 1.7429379224777222, "learning_rate": 9.508205106318995e-07, "loss": 0.1073, "step": 8340 }, { "epoch": 0.24693913763529898, "grad_norm": 1.6657350063323975, "learning_rate": 9.519607199023538e-07, "loss": 0.1031, "step": 8350 }, { "epoch": 0.247234873129473, "grad_norm": 2.4265825748443604, "learning_rate": 9.531009291728083e-07, "loss": 0.1254, "step": 8360 }, { "epoch": 0.247530608623647, "grad_norm": 2.900251865386963, "learning_rate": 9.542411384432626e-07, "loss": 0.1386, "step": 8370 }, { "epoch": 0.24782634411782103, "grad_norm": 1.9351693391799927, "learning_rate": 9.55381347713717e-07, "loss": 0.1167, "step": 8380 }, { "epoch": 0.24812207961199503, "grad_norm": 1.3756461143493652, "learning_rate": 9.565215569841713e-07, "loss": 0.0961, "step": 8390 }, { "epoch": 0.24841781510616903, "grad_norm": 1.273219347000122, "learning_rate": 9.576617662546257e-07, "loss": 0.0912, "step": 8400 }, { "epoch": 0.24871355060034306, "grad_norm": 1.5653101205825806, "learning_rate": 9.5880197552508e-07, "loss": 0.1453, "step": 8410 }, { "epoch": 0.24900928609451706, "grad_norm": 1.7667126655578613, "learning_rate": 9.599421847955344e-07, "loss": 0.13, "step": 8420 }, { "epoch": 0.24930502158869108, "grad_norm": 1.7313464879989624, "learning_rate": 9.610823940659888e-07, "loss": 0.1111, "step": 8430 }, { "epoch": 0.24960075708286508, "grad_norm": 1.2562183141708374, "learning_rate": 9.622226033364432e-07, "loss": 0.1192, "step": 8440 }, { "epoch": 0.2498964925770391, "grad_norm": 1.2636770009994507, "learning_rate": 9.633628126068976e-07, "loss": 0.1133, "step": 8450 }, { "epoch": 0.2501922280712131, "grad_norm": 1.2721360921859741, "learning_rate": 9.64503021877352e-07, "loss": 0.1298, "step": 8460 }, { "epoch": 0.2504879635653871, "grad_norm": 2.2799084186553955, "learning_rate": 9.656432311478063e-07, "loss": 0.118, "step": 8470 }, { "epoch": 0.2507836990595611, "grad_norm": 1.344041347503662, "learning_rate": 9.667834404182607e-07, "loss": 0.1213, "step": 8480 }, { "epoch": 0.25107943455373516, "grad_norm": 1.0972809791564941, "learning_rate": 9.67923649688715e-07, "loss": 0.1054, "step": 8490 }, { "epoch": 0.25137517004790916, "grad_norm": 1.6137925386428833, "learning_rate": 9.690638589591695e-07, "loss": 0.1109, "step": 8500 }, { "epoch": 0.25167090554208316, "grad_norm": 1.5264941453933716, "learning_rate": 9.702040682296238e-07, "loss": 0.1286, "step": 8510 }, { "epoch": 0.25196664103625716, "grad_norm": 2.6372604370117188, "learning_rate": 9.71344277500078e-07, "loss": 0.1247, "step": 8520 }, { "epoch": 0.2522623765304312, "grad_norm": 1.2153111696243286, "learning_rate": 9.724844867705326e-07, "loss": 0.0959, "step": 8530 }, { "epoch": 0.2525581120246052, "grad_norm": 1.5969287157058716, "learning_rate": 9.73624696040987e-07, "loss": 0.1355, "step": 8540 }, { "epoch": 0.2528538475187792, "grad_norm": 1.567681074142456, "learning_rate": 9.747649053114413e-07, "loss": 0.0963, "step": 8550 }, { "epoch": 0.2531495830129532, "grad_norm": 1.345101237297058, "learning_rate": 9.759051145818955e-07, "loss": 0.1178, "step": 8560 }, { "epoch": 0.2534453185071272, "grad_norm": 1.3501055240631104, "learning_rate": 9.770453238523499e-07, "loss": 0.1402, "step": 8570 }, { "epoch": 0.25374105400130126, "grad_norm": 1.644583821296692, "learning_rate": 9.781855331228045e-07, "loss": 0.1179, "step": 8580 }, { "epoch": 0.25403678949547526, "grad_norm": 1.6558235883712769, "learning_rate": 9.793257423932588e-07, "loss": 0.0985, "step": 8590 }, { "epoch": 0.25433252498964926, "grad_norm": 1.081748366355896, "learning_rate": 9.80465951663713e-07, "loss": 0.0953, "step": 8600 }, { "epoch": 0.25462826048382325, "grad_norm": 1.5392746925354004, "learning_rate": 9.816061609341674e-07, "loss": 0.1292, "step": 8610 }, { "epoch": 0.25492399597799725, "grad_norm": 1.75062096118927, "learning_rate": 9.82746370204622e-07, "loss": 0.1293, "step": 8620 }, { "epoch": 0.2552197314721713, "grad_norm": 1.341939926147461, "learning_rate": 9.838865794750764e-07, "loss": 0.094, "step": 8630 }, { "epoch": 0.2555154669663453, "grad_norm": 0.7264237403869629, "learning_rate": 9.850267887455305e-07, "loss": 0.1122, "step": 8640 }, { "epoch": 0.2558112024605193, "grad_norm": 1.2896755933761597, "learning_rate": 9.86166998015985e-07, "loss": 0.1092, "step": 8650 }, { "epoch": 0.2561069379546933, "grad_norm": 2.4416139125823975, "learning_rate": 9.873072072864393e-07, "loss": 0.1454, "step": 8660 }, { "epoch": 0.25640267344886736, "grad_norm": 2.2584073543548584, "learning_rate": 9.884474165568939e-07, "loss": 0.1237, "step": 8670 }, { "epoch": 0.25669840894304136, "grad_norm": 1.4084042310714722, "learning_rate": 9.895876258273482e-07, "loss": 0.1185, "step": 8680 }, { "epoch": 0.25699414443721535, "grad_norm": 0.8960620164871216, "learning_rate": 9.907278350978024e-07, "loss": 0.0992, "step": 8690 }, { "epoch": 0.25728987993138935, "grad_norm": 1.7126957178115845, "learning_rate": 9.918680443682568e-07, "loss": 0.1045, "step": 8700 }, { "epoch": 0.25758561542556335, "grad_norm": 1.577599287033081, "learning_rate": 9.930082536387114e-07, "loss": 0.113, "step": 8710 }, { "epoch": 0.2578813509197374, "grad_norm": 1.176878809928894, "learning_rate": 9.941484629091657e-07, "loss": 0.1209, "step": 8720 }, { "epoch": 0.2581770864139114, "grad_norm": 1.728493094444275, "learning_rate": 9.9528867217962e-07, "loss": 0.1243, "step": 8730 }, { "epoch": 0.2584728219080854, "grad_norm": 1.0178942680358887, "learning_rate": 9.964288814500743e-07, "loss": 0.112, "step": 8740 }, { "epoch": 0.2587685574022594, "grad_norm": 2.4175403118133545, "learning_rate": 9.975690907205287e-07, "loss": 0.106, "step": 8750 }, { "epoch": 0.25906429289643346, "grad_norm": 2.4721336364746094, "learning_rate": 9.987092999909833e-07, "loss": 0.1341, "step": 8760 }, { "epoch": 0.25936002839060746, "grad_norm": 1.8364440202713013, "learning_rate": 9.998495092614374e-07, "loss": 0.1436, "step": 8770 }, { "epoch": 0.25965576388478145, "grad_norm": 1.7073832750320435, "learning_rate": 1.0009897185318918e-06, "loss": 0.1437, "step": 8780 }, { "epoch": 0.25995149937895545, "grad_norm": 1.5375618934631348, "learning_rate": 1.0021299278023462e-06, "loss": 0.1062, "step": 8790 }, { "epoch": 0.26024723487312945, "grad_norm": 4.851136207580566, "learning_rate": 1.0032701370728006e-06, "loss": 0.1099, "step": 8800 }, { "epoch": 0.2605429703673035, "grad_norm": 2.8157026767730713, "learning_rate": 1.004410346343255e-06, "loss": 0.127, "step": 8810 }, { "epoch": 0.2608387058614775, "grad_norm": 1.393031120300293, "learning_rate": 1.0055505556137093e-06, "loss": 0.1208, "step": 8820 }, { "epoch": 0.2611344413556515, "grad_norm": 2.007474660873413, "learning_rate": 1.0066907648841637e-06, "loss": 0.1171, "step": 8830 }, { "epoch": 0.2614301768498255, "grad_norm": 1.5132521390914917, "learning_rate": 1.007830974154618e-06, "loss": 0.123, "step": 8840 }, { "epoch": 0.2617259123439995, "grad_norm": 1.780964970588684, "learning_rate": 1.0089711834250724e-06, "loss": 0.1126, "step": 8850 }, { "epoch": 0.26202164783817355, "grad_norm": 2.229393482208252, "learning_rate": 1.0101113926955268e-06, "loss": 0.1271, "step": 8860 }, { "epoch": 0.26231738333234755, "grad_norm": 1.7426493167877197, "learning_rate": 1.0112516019659812e-06, "loss": 0.1283, "step": 8870 }, { "epoch": 0.26261311882652155, "grad_norm": 1.644564151763916, "learning_rate": 1.0123918112364356e-06, "loss": 0.1248, "step": 8880 }, { "epoch": 0.26290885432069555, "grad_norm": 0.9723426699638367, "learning_rate": 1.0135320205068897e-06, "loss": 0.0994, "step": 8890 }, { "epoch": 0.2632045898148696, "grad_norm": 1.2395638227462769, "learning_rate": 1.0146722297773443e-06, "loss": 0.0882, "step": 8900 }, { "epoch": 0.2635003253090436, "grad_norm": 2.1916897296905518, "learning_rate": 1.0158124390477987e-06, "loss": 0.1477, "step": 8910 }, { "epoch": 0.2637960608032176, "grad_norm": 2.2914347648620605, "learning_rate": 1.016952648318253e-06, "loss": 0.1163, "step": 8920 }, { "epoch": 0.2640917962973916, "grad_norm": 2.100735664367676, "learning_rate": 1.0180928575887072e-06, "loss": 0.0988, "step": 8930 }, { "epoch": 0.2643875317915656, "grad_norm": 1.386224627494812, "learning_rate": 1.0192330668591618e-06, "loss": 0.0999, "step": 8940 }, { "epoch": 0.26468326728573965, "grad_norm": 1.041670560836792, "learning_rate": 1.0203732761296162e-06, "loss": 0.0957, "step": 8950 }, { "epoch": 0.26497900277991365, "grad_norm": 1.6406505107879639, "learning_rate": 1.0215134854000706e-06, "loss": 0.1178, "step": 8960 }, { "epoch": 0.26527473827408765, "grad_norm": 2.833711862564087, "learning_rate": 1.0226536946705247e-06, "loss": 0.1099, "step": 8970 }, { "epoch": 0.26557047376826165, "grad_norm": 1.786482334136963, "learning_rate": 1.0237939039409791e-06, "loss": 0.1114, "step": 8980 }, { "epoch": 0.2658662092624357, "grad_norm": 1.7099448442459106, "learning_rate": 1.0249341132114337e-06, "loss": 0.1203, "step": 8990 }, { "epoch": 0.2661619447566097, "grad_norm": 1.181265950202942, "learning_rate": 1.026074322481888e-06, "loss": 0.1052, "step": 9000 }, { "epoch": 0.2664576802507837, "grad_norm": 1.8110594749450684, "learning_rate": 1.0272145317523423e-06, "loss": 0.1182, "step": 9010 }, { "epoch": 0.2667534157449577, "grad_norm": 1.663129210472107, "learning_rate": 1.0283547410227966e-06, "loss": 0.1107, "step": 9020 }, { "epoch": 0.2670491512391317, "grad_norm": 2.0354039669036865, "learning_rate": 1.029494950293251e-06, "loss": 0.1297, "step": 9030 }, { "epoch": 0.26734488673330575, "grad_norm": 1.4972401857376099, "learning_rate": 1.0306351595637056e-06, "loss": 0.1148, "step": 9040 }, { "epoch": 0.26764062222747975, "grad_norm": 1.4794105291366577, "learning_rate": 1.0317753688341598e-06, "loss": 0.1122, "step": 9050 }, { "epoch": 0.26793635772165375, "grad_norm": 2.1181206703186035, "learning_rate": 1.0329155781046141e-06, "loss": 0.1239, "step": 9060 }, { "epoch": 0.26823209321582775, "grad_norm": 2.791184425354004, "learning_rate": 1.0340557873750685e-06, "loss": 0.1259, "step": 9070 }, { "epoch": 0.26852782871000175, "grad_norm": 1.8921953439712524, "learning_rate": 1.0351959966455231e-06, "loss": 0.1146, "step": 9080 }, { "epoch": 0.2688235642041758, "grad_norm": 1.3539599180221558, "learning_rate": 1.0363362059159773e-06, "loss": 0.0976, "step": 9090 }, { "epoch": 0.2691192996983498, "grad_norm": 1.8070311546325684, "learning_rate": 1.0374764151864316e-06, "loss": 0.0933, "step": 9100 }, { "epoch": 0.2694150351925238, "grad_norm": 1.4935170412063599, "learning_rate": 1.038616624456886e-06, "loss": 0.1238, "step": 9110 }, { "epoch": 0.2697107706866978, "grad_norm": 2.027379274368286, "learning_rate": 1.0397568337273404e-06, "loss": 0.1267, "step": 9120 }, { "epoch": 0.27000650618087185, "grad_norm": 1.6726008653640747, "learning_rate": 1.0408970429977948e-06, "loss": 0.1152, "step": 9130 }, { "epoch": 0.27030224167504585, "grad_norm": 1.3369629383087158, "learning_rate": 1.0420372522682492e-06, "loss": 0.0996, "step": 9140 }, { "epoch": 0.27059797716921985, "grad_norm": 2.3934133052825928, "learning_rate": 1.0431774615387035e-06, "loss": 0.0842, "step": 9150 }, { "epoch": 0.27089371266339385, "grad_norm": 1.6732689142227173, "learning_rate": 1.044317670809158e-06, "loss": 0.1206, "step": 9160 }, { "epoch": 0.27118944815756785, "grad_norm": 1.3709901571273804, "learning_rate": 1.0454578800796123e-06, "loss": 0.1237, "step": 9170 }, { "epoch": 0.2714851836517419, "grad_norm": 1.6252622604370117, "learning_rate": 1.0465980893500667e-06, "loss": 0.1073, "step": 9180 }, { "epoch": 0.2717809191459159, "grad_norm": 1.3482799530029297, "learning_rate": 1.047738298620521e-06, "loss": 0.0957, "step": 9190 }, { "epoch": 0.2720766546400899, "grad_norm": 1.4419214725494385, "learning_rate": 1.0488785078909754e-06, "loss": 0.1028, "step": 9200 }, { "epoch": 0.2723723901342639, "grad_norm": 1.7663366794586182, "learning_rate": 1.0500187171614296e-06, "loss": 0.1418, "step": 9210 }, { "epoch": 0.27266812562843795, "grad_norm": 2.067110300064087, "learning_rate": 1.0511589264318842e-06, "loss": 0.1301, "step": 9220 }, { "epoch": 0.27296386112261195, "grad_norm": 1.4111547470092773, "learning_rate": 1.0522991357023385e-06, "loss": 0.106, "step": 9230 }, { "epoch": 0.27325959661678595, "grad_norm": 1.78597891330719, "learning_rate": 1.053439344972793e-06, "loss": 0.0982, "step": 9240 }, { "epoch": 0.27355533211095995, "grad_norm": 2.7170193195343018, "learning_rate": 1.054579554243247e-06, "loss": 0.1045, "step": 9250 }, { "epoch": 0.27385106760513395, "grad_norm": 1.5750195980072021, "learning_rate": 1.0557197635137015e-06, "loss": 0.1252, "step": 9260 }, { "epoch": 0.274146803099308, "grad_norm": 1.997711420059204, "learning_rate": 1.056859972784156e-06, "loss": 0.1245, "step": 9270 }, { "epoch": 0.274442538593482, "grad_norm": 1.7852550745010376, "learning_rate": 1.0580001820546104e-06, "loss": 0.105, "step": 9280 }, { "epoch": 0.274738274087656, "grad_norm": 1.250937819480896, "learning_rate": 1.0591403913250646e-06, "loss": 0.1057, "step": 9290 }, { "epoch": 0.27503400958183, "grad_norm": 1.4832959175109863, "learning_rate": 1.060280600595519e-06, "loss": 0.1035, "step": 9300 }, { "epoch": 0.275329745076004, "grad_norm": 1.5355944633483887, "learning_rate": 1.0614208098659736e-06, "loss": 0.1135, "step": 9310 }, { "epoch": 0.27562548057017805, "grad_norm": 1.422123670578003, "learning_rate": 1.062561019136428e-06, "loss": 0.1104, "step": 9320 }, { "epoch": 0.27592121606435205, "grad_norm": 2.423780679702759, "learning_rate": 1.0637012284068821e-06, "loss": 0.1162, "step": 9330 }, { "epoch": 0.27621695155852605, "grad_norm": 1.7000170946121216, "learning_rate": 1.0648414376773365e-06, "loss": 0.1063, "step": 9340 }, { "epoch": 0.27651268705270005, "grad_norm": 1.1989810466766357, "learning_rate": 1.0659816469477909e-06, "loss": 0.097, "step": 9350 }, { "epoch": 0.2768084225468741, "grad_norm": 1.2710539102554321, "learning_rate": 1.0671218562182454e-06, "loss": 0.1247, "step": 9360 }, { "epoch": 0.2771041580410481, "grad_norm": 1.5208256244659424, "learning_rate": 1.0682620654886996e-06, "loss": 0.1157, "step": 9370 }, { "epoch": 0.2773998935352221, "grad_norm": 1.6715724468231201, "learning_rate": 1.069402274759154e-06, "loss": 0.122, "step": 9380 }, { "epoch": 0.2776956290293961, "grad_norm": 1.0460666418075562, "learning_rate": 1.0705424840296084e-06, "loss": 0.1065, "step": 9390 }, { "epoch": 0.2779913645235701, "grad_norm": 2.4180047512054443, "learning_rate": 1.071682693300063e-06, "loss": 0.1178, "step": 9400 }, { "epoch": 0.27828710001774415, "grad_norm": 1.7701725959777832, "learning_rate": 1.0728229025705171e-06, "loss": 0.1336, "step": 9410 }, { "epoch": 0.27858283551191815, "grad_norm": 1.3880988359451294, "learning_rate": 1.0739631118409715e-06, "loss": 0.1132, "step": 9420 }, { "epoch": 0.27887857100609215, "grad_norm": 1.446181058883667, "learning_rate": 1.0751033211114259e-06, "loss": 0.1172, "step": 9430 }, { "epoch": 0.27917430650026615, "grad_norm": 1.0458664894104004, "learning_rate": 1.0762435303818803e-06, "loss": 0.1146, "step": 9440 }, { "epoch": 0.2794700419944402, "grad_norm": 2.1114187240600586, "learning_rate": 1.0773837396523346e-06, "loss": 0.0962, "step": 9450 }, { "epoch": 0.2797657774886142, "grad_norm": 1.4842356443405151, "learning_rate": 1.078523948922789e-06, "loss": 0.1219, "step": 9460 }, { "epoch": 0.2800615129827882, "grad_norm": 1.551857829093933, "learning_rate": 1.0796641581932434e-06, "loss": 0.1124, "step": 9470 }, { "epoch": 0.2803572484769622, "grad_norm": 1.3487508296966553, "learning_rate": 1.0808043674636978e-06, "loss": 0.1127, "step": 9480 }, { "epoch": 0.2806529839711362, "grad_norm": 1.347826361656189, "learning_rate": 1.081944576734152e-06, "loss": 0.116, "step": 9490 }, { "epoch": 0.28094871946531025, "grad_norm": 1.2970168590545654, "learning_rate": 1.0830847860046065e-06, "loss": 0.1063, "step": 9500 }, { "epoch": 0.28124445495948425, "grad_norm": 1.094598650932312, "learning_rate": 1.0842249952750609e-06, "loss": 0.1182, "step": 9510 }, { "epoch": 0.28154019045365825, "grad_norm": 1.934380054473877, "learning_rate": 1.0853652045455153e-06, "loss": 0.1194, "step": 9520 }, { "epoch": 0.28183592594783224, "grad_norm": 1.9961296319961548, "learning_rate": 1.0865054138159694e-06, "loss": 0.1039, "step": 9530 }, { "epoch": 0.28213166144200624, "grad_norm": 2.0272257328033447, "learning_rate": 1.087645623086424e-06, "loss": 0.1246, "step": 9540 }, { "epoch": 0.2824273969361803, "grad_norm": 1.3669723272323608, "learning_rate": 1.0887858323568784e-06, "loss": 0.0999, "step": 9550 }, { "epoch": 0.2827231324303543, "grad_norm": 2.01397705078125, "learning_rate": 1.0899260416273328e-06, "loss": 0.1167, "step": 9560 }, { "epoch": 0.2830188679245283, "grad_norm": 2.6384072303771973, "learning_rate": 1.091066250897787e-06, "loss": 0.126, "step": 9570 }, { "epoch": 0.2833146034187023, "grad_norm": 1.550790548324585, "learning_rate": 1.0922064601682413e-06, "loss": 0.1133, "step": 9580 }, { "epoch": 0.28361033891287635, "grad_norm": 1.2239848375320435, "learning_rate": 1.093346669438696e-06, "loss": 0.1135, "step": 9590 }, { "epoch": 0.28390607440705035, "grad_norm": 1.2337117195129395, "learning_rate": 1.0944868787091503e-06, "loss": 0.0804, "step": 9600 }, { "epoch": 0.28420180990122434, "grad_norm": 1.362929344177246, "learning_rate": 1.0956270879796044e-06, "loss": 0.1158, "step": 9610 }, { "epoch": 0.28449754539539834, "grad_norm": 2.2618072032928467, "learning_rate": 1.0967672972500588e-06, "loss": 0.1448, "step": 9620 }, { "epoch": 0.28479328088957234, "grad_norm": 1.5657097101211548, "learning_rate": 1.0979075065205134e-06, "loss": 0.122, "step": 9630 }, { "epoch": 0.2850890163837464, "grad_norm": 1.235973596572876, "learning_rate": 1.0990477157909678e-06, "loss": 0.1061, "step": 9640 }, { "epoch": 0.2853847518779204, "grad_norm": 0.8688904643058777, "learning_rate": 1.100187925061422e-06, "loss": 0.1101, "step": 9650 }, { "epoch": 0.2856804873720944, "grad_norm": 1.3054732084274292, "learning_rate": 1.1013281343318763e-06, "loss": 0.1131, "step": 9660 }, { "epoch": 0.2859762228662684, "grad_norm": 1.3709168434143066, "learning_rate": 1.1024683436023307e-06, "loss": 0.1162, "step": 9670 }, { "epoch": 0.28627195836044245, "grad_norm": 1.8477131128311157, "learning_rate": 1.1036085528727853e-06, "loss": 0.1203, "step": 9680 }, { "epoch": 0.28656769385461645, "grad_norm": 1.9844791889190674, "learning_rate": 1.1047487621432395e-06, "loss": 0.1193, "step": 9690 }, { "epoch": 0.28686342934879044, "grad_norm": 1.3622442483901978, "learning_rate": 1.1058889714136938e-06, "loss": 0.0943, "step": 9700 }, { "epoch": 0.28715916484296444, "grad_norm": 2.0615713596343994, "learning_rate": 1.1070291806841482e-06, "loss": 0.1283, "step": 9710 }, { "epoch": 0.28745490033713844, "grad_norm": 1.7549121379852295, "learning_rate": 1.1081693899546026e-06, "loss": 0.1151, "step": 9720 }, { "epoch": 0.2877506358313125, "grad_norm": 3.400960922241211, "learning_rate": 1.109309599225057e-06, "loss": 0.1105, "step": 9730 }, { "epoch": 0.2880463713254865, "grad_norm": 1.4789323806762695, "learning_rate": 1.1104498084955113e-06, "loss": 0.103, "step": 9740 }, { "epoch": 0.2883421068196605, "grad_norm": 1.6019301414489746, "learning_rate": 1.1115900177659657e-06, "loss": 0.106, "step": 9750 }, { "epoch": 0.2886378423138345, "grad_norm": 2.109119176864624, "learning_rate": 1.11273022703642e-06, "loss": 0.1334, "step": 9760 }, { "epoch": 0.2889335778080085, "grad_norm": 0.9909560680389404, "learning_rate": 1.1138704363068745e-06, "loss": 0.1246, "step": 9770 }, { "epoch": 0.28922931330218254, "grad_norm": 0.6157114505767822, "learning_rate": 1.1150106455773289e-06, "loss": 0.114, "step": 9780 }, { "epoch": 0.28952504879635654, "grad_norm": 1.164833664894104, "learning_rate": 1.1161508548477832e-06, "loss": 0.1221, "step": 9790 }, { "epoch": 0.28982078429053054, "grad_norm": 1.1469535827636719, "learning_rate": 1.1172910641182376e-06, "loss": 0.0997, "step": 9800 }, { "epoch": 0.29011651978470454, "grad_norm": 1.0561244487762451, "learning_rate": 1.1184312733886918e-06, "loss": 0.1223, "step": 9810 }, { "epoch": 0.2904122552788786, "grad_norm": 2.368572950363159, "learning_rate": 1.1195714826591464e-06, "loss": 0.1028, "step": 9820 }, { "epoch": 0.2907079907730526, "grad_norm": 2.064131498336792, "learning_rate": 1.1207116919296007e-06, "loss": 0.1093, "step": 9830 }, { "epoch": 0.2910037262672266, "grad_norm": 1.0284605026245117, "learning_rate": 1.1218519012000551e-06, "loss": 0.0922, "step": 9840 }, { "epoch": 0.2912994617614006, "grad_norm": 1.4459513425827026, "learning_rate": 1.1229921104705093e-06, "loss": 0.1157, "step": 9850 }, { "epoch": 0.2915951972555746, "grad_norm": 2.8656933307647705, "learning_rate": 1.1241323197409639e-06, "loss": 0.1246, "step": 9860 }, { "epoch": 0.29189093274974864, "grad_norm": 1.3346049785614014, "learning_rate": 1.1252725290114182e-06, "loss": 0.1107, "step": 9870 }, { "epoch": 0.29218666824392264, "grad_norm": 1.553816795349121, "learning_rate": 1.1264127382818726e-06, "loss": 0.1115, "step": 9880 }, { "epoch": 0.29248240373809664, "grad_norm": 1.1711013317108154, "learning_rate": 1.1275529475523268e-06, "loss": 0.1031, "step": 9890 }, { "epoch": 0.29277813923227064, "grad_norm": 1.953853964805603, "learning_rate": 1.1286931568227812e-06, "loss": 0.1152, "step": 9900 }, { "epoch": 0.2930738747264447, "grad_norm": 1.5275319814682007, "learning_rate": 1.1298333660932358e-06, "loss": 0.1221, "step": 9910 }, { "epoch": 0.2933696102206187, "grad_norm": 1.9555613994598389, "learning_rate": 1.1309735753636901e-06, "loss": 0.1363, "step": 9920 }, { "epoch": 0.2936653457147927, "grad_norm": 1.7758419513702393, "learning_rate": 1.1321137846341443e-06, "loss": 0.1143, "step": 9930 }, { "epoch": 0.2939610812089667, "grad_norm": 2.2353785037994385, "learning_rate": 1.1332539939045987e-06, "loss": 0.0951, "step": 9940 }, { "epoch": 0.2942568167031407, "grad_norm": 1.5622835159301758, "learning_rate": 1.134394203175053e-06, "loss": 0.1011, "step": 9950 }, { "epoch": 0.29455255219731474, "grad_norm": 1.795416235923767, "learning_rate": 1.1355344124455076e-06, "loss": 0.1284, "step": 9960 }, { "epoch": 0.29484828769148874, "grad_norm": 3.122126340866089, "learning_rate": 1.136674621715962e-06, "loss": 0.1258, "step": 9970 }, { "epoch": 0.29514402318566274, "grad_norm": 1.2072231769561768, "learning_rate": 1.1378148309864162e-06, "loss": 0.1177, "step": 9980 }, { "epoch": 0.29543975867983674, "grad_norm": 1.6019700765609741, "learning_rate": 1.1389550402568706e-06, "loss": 0.104, "step": 9990 }, { "epoch": 0.29573549417401074, "grad_norm": 1.3372952938079834, "learning_rate": 1.1400952495273251e-06, "loss": 0.0941, "step": 10000 }, { "epoch": 0.2960312296681848, "grad_norm": 2.3658859729766846, "learning_rate": 1.1412354587977795e-06, "loss": 0.1157, "step": 10010 }, { "epoch": 0.2963269651623588, "grad_norm": 2.151214838027954, "learning_rate": 1.1423756680682337e-06, "loss": 0.1128, "step": 10020 }, { "epoch": 0.2966227006565328, "grad_norm": 1.2463027238845825, "learning_rate": 1.143515877338688e-06, "loss": 0.1042, "step": 10030 }, { "epoch": 0.2969184361507068, "grad_norm": 1.4468214511871338, "learning_rate": 1.1446560866091424e-06, "loss": 0.1043, "step": 10040 }, { "epoch": 0.29721417164488084, "grad_norm": 1.9943915605545044, "learning_rate": 1.145796295879597e-06, "loss": 0.1141, "step": 10050 }, { "epoch": 0.29750990713905484, "grad_norm": 1.3757392168045044, "learning_rate": 1.1469365051500512e-06, "loss": 0.1117, "step": 10060 }, { "epoch": 0.29780564263322884, "grad_norm": 1.4458235502243042, "learning_rate": 1.1480767144205056e-06, "loss": 0.1262, "step": 10070 }, { "epoch": 0.29810137812740284, "grad_norm": 1.326414942741394, "learning_rate": 1.14921692369096e-06, "loss": 0.098, "step": 10080 }, { "epoch": 0.29839711362157684, "grad_norm": 1.0406619310379028, "learning_rate": 1.1503571329614145e-06, "loss": 0.1123, "step": 10090 }, { "epoch": 0.2986928491157509, "grad_norm": 1.2436457872390747, "learning_rate": 1.1514973422318687e-06, "loss": 0.104, "step": 10100 }, { "epoch": 0.2989885846099249, "grad_norm": 1.497986078262329, "learning_rate": 1.152637551502323e-06, "loss": 0.1323, "step": 10110 }, { "epoch": 0.2992843201040989, "grad_norm": 2.0938241481781006, "learning_rate": 1.1537777607727775e-06, "loss": 0.1096, "step": 10120 }, { "epoch": 0.2995800555982729, "grad_norm": 1.4392881393432617, "learning_rate": 1.1549179700432318e-06, "loss": 0.1083, "step": 10130 }, { "epoch": 0.29987579109244694, "grad_norm": 1.2219558954238892, "learning_rate": 1.1560581793136862e-06, "loss": 0.1026, "step": 10140 }, { "epoch": 0.30017152658662094, "grad_norm": 1.6443126201629639, "learning_rate": 1.1571983885841406e-06, "loss": 0.0961, "step": 10150 }, { "epoch": 0.30046726208079494, "grad_norm": 2.451397180557251, "learning_rate": 1.158338597854595e-06, "loss": 0.1198, "step": 10160 }, { "epoch": 0.30076299757496894, "grad_norm": 1.4568135738372803, "learning_rate": 1.1594788071250493e-06, "loss": 0.1147, "step": 10170 }, { "epoch": 0.30105873306914294, "grad_norm": 1.8405513763427734, "learning_rate": 1.1606190163955035e-06, "loss": 0.1208, "step": 10180 }, { "epoch": 0.301354468563317, "grad_norm": 1.8588809967041016, "learning_rate": 1.161759225665958e-06, "loss": 0.1022, "step": 10190 }, { "epoch": 0.301650204057491, "grad_norm": 1.1441929340362549, "learning_rate": 1.1628994349364125e-06, "loss": 0.0967, "step": 10200 }, { "epoch": 0.301945939551665, "grad_norm": 1.832527756690979, "learning_rate": 1.1640396442068669e-06, "loss": 0.1393, "step": 10210 }, { "epoch": 0.302241675045839, "grad_norm": 1.2463804483413696, "learning_rate": 1.165179853477321e-06, "loss": 0.1066, "step": 10220 }, { "epoch": 0.302537410540013, "grad_norm": 1.4473994970321655, "learning_rate": 1.1663200627477756e-06, "loss": 0.1199, "step": 10230 }, { "epoch": 0.30283314603418704, "grad_norm": 1.3036376237869263, "learning_rate": 1.16746027201823e-06, "loss": 0.093, "step": 10240 }, { "epoch": 0.30312888152836104, "grad_norm": 1.7165189981460571, "learning_rate": 1.1686004812886844e-06, "loss": 0.0935, "step": 10250 }, { "epoch": 0.30342461702253504, "grad_norm": 1.5405967235565186, "learning_rate": 1.1697406905591385e-06, "loss": 0.1195, "step": 10260 }, { "epoch": 0.30372035251670904, "grad_norm": 0.8855528831481934, "learning_rate": 1.170880899829593e-06, "loss": 0.1148, "step": 10270 }, { "epoch": 0.3040160880108831, "grad_norm": 1.6471772193908691, "learning_rate": 1.1720211091000475e-06, "loss": 0.102, "step": 10280 }, { "epoch": 0.3043118235050571, "grad_norm": 0.819646954536438, "learning_rate": 1.1731613183705019e-06, "loss": 0.0952, "step": 10290 }, { "epoch": 0.3046075589992311, "grad_norm": 3.021801471710205, "learning_rate": 1.174301527640956e-06, "loss": 0.0802, "step": 10300 }, { "epoch": 0.3049032944934051, "grad_norm": 2.8332037925720215, "learning_rate": 1.1754417369114104e-06, "loss": 0.1192, "step": 10310 }, { "epoch": 0.3051990299875791, "grad_norm": 1.9271395206451416, "learning_rate": 1.176581946181865e-06, "loss": 0.1103, "step": 10320 }, { "epoch": 0.30549476548175314, "grad_norm": 2.2399768829345703, "learning_rate": 1.1777221554523194e-06, "loss": 0.091, "step": 10330 }, { "epoch": 0.30579050097592714, "grad_norm": 1.22642982006073, "learning_rate": 1.1788623647227735e-06, "loss": 0.114, "step": 10340 }, { "epoch": 0.30608623647010114, "grad_norm": 1.1692636013031006, "learning_rate": 1.180002573993228e-06, "loss": 0.0944, "step": 10350 }, { "epoch": 0.30638197196427513, "grad_norm": 1.4063799381256104, "learning_rate": 1.1811427832636823e-06, "loss": 0.1443, "step": 10360 }, { "epoch": 0.3066777074584492, "grad_norm": 3.734407424926758, "learning_rate": 1.1822829925341369e-06, "loss": 0.1483, "step": 10370 }, { "epoch": 0.3069734429526232, "grad_norm": 2.053419828414917, "learning_rate": 1.183423201804591e-06, "loss": 0.1123, "step": 10380 }, { "epoch": 0.3072691784467972, "grad_norm": 2.038557767868042, "learning_rate": 1.1845634110750454e-06, "loss": 0.1077, "step": 10390 }, { "epoch": 0.3075649139409712, "grad_norm": 1.1519049406051636, "learning_rate": 1.1857036203454998e-06, "loss": 0.0931, "step": 10400 }, { "epoch": 0.3078606494351452, "grad_norm": 1.237630009651184, "learning_rate": 1.1868438296159542e-06, "loss": 0.114, "step": 10410 }, { "epoch": 0.30815638492931924, "grad_norm": 1.4593144655227661, "learning_rate": 1.1879840388864086e-06, "loss": 0.1079, "step": 10420 }, { "epoch": 0.30845212042349324, "grad_norm": 1.1950563192367554, "learning_rate": 1.189124248156863e-06, "loss": 0.1062, "step": 10430 }, { "epoch": 0.30874785591766724, "grad_norm": 1.4219180345535278, "learning_rate": 1.1902644574273173e-06, "loss": 0.1045, "step": 10440 }, { "epoch": 0.30904359141184123, "grad_norm": 1.6287249326705933, "learning_rate": 1.1914046666977717e-06, "loss": 0.1019, "step": 10450 }, { "epoch": 0.30933932690601523, "grad_norm": 1.0440698862075806, "learning_rate": 1.192544875968226e-06, "loss": 0.1166, "step": 10460 }, { "epoch": 0.3096350624001893, "grad_norm": 2.122018814086914, "learning_rate": 1.1936850852386804e-06, "loss": 0.1283, "step": 10470 }, { "epoch": 0.3099307978943633, "grad_norm": 2.1038434505462646, "learning_rate": 1.1948252945091348e-06, "loss": 0.0991, "step": 10480 }, { "epoch": 0.3102265333885373, "grad_norm": 1.3110798597335815, "learning_rate": 1.1959655037795892e-06, "loss": 0.1098, "step": 10490 }, { "epoch": 0.3105222688827113, "grad_norm": 1.66493821144104, "learning_rate": 1.1971057130500434e-06, "loss": 0.09, "step": 10500 }, { "epoch": 0.31081800437688534, "grad_norm": 1.5197759866714478, "learning_rate": 1.198245922320498e-06, "loss": 0.1115, "step": 10510 }, { "epoch": 0.31111373987105934, "grad_norm": 2.197845935821533, "learning_rate": 1.1993861315909523e-06, "loss": 0.11, "step": 10520 }, { "epoch": 0.31140947536523333, "grad_norm": 1.8984631299972534, "learning_rate": 1.2005263408614067e-06, "loss": 0.1183, "step": 10530 }, { "epoch": 0.31170521085940733, "grad_norm": 1.6799590587615967, "learning_rate": 1.2016665501318609e-06, "loss": 0.1063, "step": 10540 }, { "epoch": 0.31200094635358133, "grad_norm": 2.4965362548828125, "learning_rate": 1.2028067594023155e-06, "loss": 0.0977, "step": 10550 }, { "epoch": 0.3122966818477554, "grad_norm": 1.316033959388733, "learning_rate": 1.2039469686727698e-06, "loss": 0.1263, "step": 10560 }, { "epoch": 0.3125924173419294, "grad_norm": 2.8242878913879395, "learning_rate": 1.2050871779432242e-06, "loss": 0.1059, "step": 10570 }, { "epoch": 0.3128881528361034, "grad_norm": 1.1694066524505615, "learning_rate": 1.2062273872136784e-06, "loss": 0.1233, "step": 10580 }, { "epoch": 0.3131838883302774, "grad_norm": 1.5666266679763794, "learning_rate": 1.2073675964841328e-06, "loss": 0.1038, "step": 10590 }, { "epoch": 0.31347962382445144, "grad_norm": 0.7725870609283447, "learning_rate": 1.2085078057545873e-06, "loss": 0.0922, "step": 10600 }, { "epoch": 0.31377535931862544, "grad_norm": 2.153752326965332, "learning_rate": 1.2096480150250417e-06, "loss": 0.1069, "step": 10610 }, { "epoch": 0.31407109481279943, "grad_norm": 0.928936779499054, "learning_rate": 1.2107882242954959e-06, "loss": 0.0992, "step": 10620 }, { "epoch": 0.31436683030697343, "grad_norm": 1.3046907186508179, "learning_rate": 1.2119284335659503e-06, "loss": 0.1123, "step": 10630 }, { "epoch": 0.31466256580114743, "grad_norm": 1.9979350566864014, "learning_rate": 1.2130686428364046e-06, "loss": 0.0979, "step": 10640 }, { "epoch": 0.3149583012953215, "grad_norm": 0.8486510515213013, "learning_rate": 1.2142088521068592e-06, "loss": 0.0981, "step": 10650 }, { "epoch": 0.3152540367894955, "grad_norm": 2.4037694931030273, "learning_rate": 1.2153490613773134e-06, "loss": 0.1298, "step": 10660 }, { "epoch": 0.3155497722836695, "grad_norm": 1.498759388923645, "learning_rate": 1.2164892706477678e-06, "loss": 0.1248, "step": 10670 }, { "epoch": 0.3158455077778435, "grad_norm": 1.281671404838562, "learning_rate": 1.2176294799182221e-06, "loss": 0.1085, "step": 10680 }, { "epoch": 0.3161412432720175, "grad_norm": 1.3181220293045044, "learning_rate": 1.2187696891886767e-06, "loss": 0.0982, "step": 10690 }, { "epoch": 0.31643697876619153, "grad_norm": 1.9187276363372803, "learning_rate": 1.219909898459131e-06, "loss": 0.0916, "step": 10700 }, { "epoch": 0.31673271426036553, "grad_norm": 1.6081998348236084, "learning_rate": 1.2210501077295853e-06, "loss": 0.1399, "step": 10710 }, { "epoch": 0.31702844975453953, "grad_norm": 1.7658389806747437, "learning_rate": 1.2221903170000397e-06, "loss": 0.1117, "step": 10720 }, { "epoch": 0.31732418524871353, "grad_norm": 3.0100393295288086, "learning_rate": 1.223330526270494e-06, "loss": 0.1139, "step": 10730 }, { "epoch": 0.3176199207428876, "grad_norm": 1.7563133239746094, "learning_rate": 1.2244707355409484e-06, "loss": 0.1021, "step": 10740 }, { "epoch": 0.3179156562370616, "grad_norm": 1.1981126070022583, "learning_rate": 1.2256109448114028e-06, "loss": 0.0802, "step": 10750 }, { "epoch": 0.3182113917312356, "grad_norm": 2.088627338409424, "learning_rate": 1.2267511540818572e-06, "loss": 0.1402, "step": 10760 }, { "epoch": 0.3185071272254096, "grad_norm": 1.0397636890411377, "learning_rate": 1.2278913633523115e-06, "loss": 0.1085, "step": 10770 }, { "epoch": 0.3188028627195836, "grad_norm": 2.0893611907958984, "learning_rate": 1.229031572622766e-06, "loss": 0.114, "step": 10780 }, { "epoch": 0.31909859821375763, "grad_norm": 1.1896512508392334, "learning_rate": 1.2301717818932203e-06, "loss": 0.1157, "step": 10790 }, { "epoch": 0.31939433370793163, "grad_norm": 0.8805369138717651, "learning_rate": 1.2313119911636747e-06, "loss": 0.0887, "step": 10800 }, { "epoch": 0.31969006920210563, "grad_norm": 1.1779825687408447, "learning_rate": 1.232452200434129e-06, "loss": 0.1112, "step": 10810 }, { "epoch": 0.31998580469627963, "grad_norm": 1.5855553150177002, "learning_rate": 1.2335924097045832e-06, "loss": 0.1135, "step": 10820 }, { "epoch": 0.3202815401904537, "grad_norm": 1.647279977798462, "learning_rate": 1.2347326189750378e-06, "loss": 0.0989, "step": 10830 }, { "epoch": 0.3205772756846277, "grad_norm": 1.2696659564971924, "learning_rate": 1.2358728282454922e-06, "loss": 0.0983, "step": 10840 }, { "epoch": 0.3208730111788017, "grad_norm": 1.5669472217559814, "learning_rate": 1.2370130375159466e-06, "loss": 0.0824, "step": 10850 }, { "epoch": 0.3211687466729757, "grad_norm": 2.1130337715148926, "learning_rate": 1.2381532467864007e-06, "loss": 0.1109, "step": 10860 }, { "epoch": 0.3214644821671497, "grad_norm": 2.061450719833374, "learning_rate": 1.239293456056855e-06, "loss": 0.108, "step": 10870 }, { "epoch": 0.32176021766132373, "grad_norm": 2.5293197631835938, "learning_rate": 1.2404336653273097e-06, "loss": 0.1046, "step": 10880 }, { "epoch": 0.32205595315549773, "grad_norm": 2.0121610164642334, "learning_rate": 1.241573874597764e-06, "loss": 0.0973, "step": 10890 }, { "epoch": 0.32235168864967173, "grad_norm": 1.6021910905838013, "learning_rate": 1.2427140838682182e-06, "loss": 0.1124, "step": 10900 }, { "epoch": 0.32264742414384573, "grad_norm": 1.7405065298080444, "learning_rate": 1.2438542931386726e-06, "loss": 0.1159, "step": 10910 }, { "epoch": 0.32294315963801973, "grad_norm": 1.2573871612548828, "learning_rate": 1.2449945024091272e-06, "loss": 0.1175, "step": 10920 }, { "epoch": 0.3232388951321938, "grad_norm": 1.862111210823059, "learning_rate": 1.2461347116795816e-06, "loss": 0.1024, "step": 10930 }, { "epoch": 0.3235346306263678, "grad_norm": 1.221782922744751, "learning_rate": 1.2472749209500357e-06, "loss": 0.1038, "step": 10940 }, { "epoch": 0.3238303661205418, "grad_norm": 1.2268530130386353, "learning_rate": 1.2484151302204901e-06, "loss": 0.0933, "step": 10950 }, { "epoch": 0.3241261016147158, "grad_norm": 1.8843144178390503, "learning_rate": 1.2495553394909445e-06, "loss": 0.1318, "step": 10960 }, { "epoch": 0.32442183710888983, "grad_norm": 1.408066987991333, "learning_rate": 1.250695548761399e-06, "loss": 0.1149, "step": 10970 }, { "epoch": 0.32471757260306383, "grad_norm": 2.04632306098938, "learning_rate": 1.2518357580318532e-06, "loss": 0.1042, "step": 10980 }, { "epoch": 0.32501330809723783, "grad_norm": 1.047501802444458, "learning_rate": 1.2529759673023076e-06, "loss": 0.0962, "step": 10990 }, { "epoch": 0.32530904359141183, "grad_norm": 2.777326822280884, "learning_rate": 1.254116176572762e-06, "loss": 0.1096, "step": 11000 }, { "epoch": 0.3256047790855858, "grad_norm": 1.0312912464141846, "learning_rate": 1.2552563858432166e-06, "loss": 0.0998, "step": 11010 }, { "epoch": 0.3259005145797599, "grad_norm": 1.3974286317825317, "learning_rate": 1.2563965951136707e-06, "loss": 0.1153, "step": 11020 }, { "epoch": 0.3261962500739339, "grad_norm": 1.7079830169677734, "learning_rate": 1.2575368043841251e-06, "loss": 0.1049, "step": 11030 }, { "epoch": 0.3264919855681079, "grad_norm": 1.384421706199646, "learning_rate": 1.2586770136545795e-06, "loss": 0.1145, "step": 11040 }, { "epoch": 0.3267877210622819, "grad_norm": 2.3230175971984863, "learning_rate": 1.2598172229250339e-06, "loss": 0.1005, "step": 11050 }, { "epoch": 0.32708345655645593, "grad_norm": 2.4177465438842773, "learning_rate": 1.2609574321954883e-06, "loss": 0.13, "step": 11060 }, { "epoch": 0.32737919205062993, "grad_norm": 1.4566057920455933, "learning_rate": 1.2620976414659426e-06, "loss": 0.1158, "step": 11070 }, { "epoch": 0.32767492754480393, "grad_norm": 2.1801087856292725, "learning_rate": 1.263237850736397e-06, "loss": 0.1049, "step": 11080 }, { "epoch": 0.3279706630389779, "grad_norm": 1.6394754648208618, "learning_rate": 1.2643780600068514e-06, "loss": 0.0963, "step": 11090 }, { "epoch": 0.3282663985331519, "grad_norm": 1.9540959596633911, "learning_rate": 1.2655182692773056e-06, "loss": 0.0871, "step": 11100 }, { "epoch": 0.328562134027326, "grad_norm": 2.2672085762023926, "learning_rate": 1.2666584785477601e-06, "loss": 0.1277, "step": 11110 }, { "epoch": 0.3288578695215, "grad_norm": 1.6091519594192505, "learning_rate": 1.2677986878182145e-06, "loss": 0.1141, "step": 11120 }, { "epoch": 0.329153605015674, "grad_norm": 1.8360782861709595, "learning_rate": 1.2689388970886689e-06, "loss": 0.1301, "step": 11130 }, { "epoch": 0.329449340509848, "grad_norm": 0.8078816533088684, "learning_rate": 1.270079106359123e-06, "loss": 0.083, "step": 11140 }, { "epoch": 0.329745076004022, "grad_norm": 0.840643048286438, "learning_rate": 1.2712193156295776e-06, "loss": 0.0873, "step": 11150 }, { "epoch": 0.33004081149819603, "grad_norm": 1.7644106149673462, "learning_rate": 1.272359524900032e-06, "loss": 0.1304, "step": 11160 }, { "epoch": 0.33033654699237003, "grad_norm": 2.6023151874542236, "learning_rate": 1.2734997341704864e-06, "loss": 0.1213, "step": 11170 }, { "epoch": 0.330632282486544, "grad_norm": 1.979844570159912, "learning_rate": 1.2746399434409406e-06, "loss": 0.1326, "step": 11180 }, { "epoch": 0.330928017980718, "grad_norm": 1.8069939613342285, "learning_rate": 1.275780152711395e-06, "loss": 0.1103, "step": 11190 }, { "epoch": 0.3312237534748921, "grad_norm": 1.3159468173980713, "learning_rate": 1.2769203619818495e-06, "loss": 0.1097, "step": 11200 }, { "epoch": 0.3315194889690661, "grad_norm": 3.015388011932373, "learning_rate": 1.278060571252304e-06, "loss": 0.1073, "step": 11210 }, { "epoch": 0.3318152244632401, "grad_norm": 1.9404549598693848, "learning_rate": 1.279200780522758e-06, "loss": 0.1219, "step": 11220 }, { "epoch": 0.3321109599574141, "grad_norm": 1.3471031188964844, "learning_rate": 1.2803409897932125e-06, "loss": 0.1052, "step": 11230 }, { "epoch": 0.3324066954515881, "grad_norm": 1.6730549335479736, "learning_rate": 1.281481199063667e-06, "loss": 0.0916, "step": 11240 }, { "epoch": 0.33270243094576213, "grad_norm": 1.4150209426879883, "learning_rate": 1.2826214083341214e-06, "loss": 0.0845, "step": 11250 }, { "epoch": 0.3329981664399361, "grad_norm": 1.4710625410079956, "learning_rate": 1.2837616176045758e-06, "loss": 0.1145, "step": 11260 }, { "epoch": 0.3332939019341101, "grad_norm": 2.7173986434936523, "learning_rate": 1.28490182687503e-06, "loss": 0.1178, "step": 11270 }, { "epoch": 0.3335896374282841, "grad_norm": 1.4599220752716064, "learning_rate": 1.2860420361454843e-06, "loss": 0.1049, "step": 11280 }, { "epoch": 0.3338853729224582, "grad_norm": 1.1491037607192993, "learning_rate": 1.287182245415939e-06, "loss": 0.1103, "step": 11290 }, { "epoch": 0.3341811084166322, "grad_norm": 1.52297842502594, "learning_rate": 1.2883224546863933e-06, "loss": 0.0907, "step": 11300 }, { "epoch": 0.3344768439108062, "grad_norm": 2.0081522464752197, "learning_rate": 1.2894626639568475e-06, "loss": 0.1144, "step": 11310 }, { "epoch": 0.3347725794049802, "grad_norm": 1.3598159551620483, "learning_rate": 1.2906028732273018e-06, "loss": 0.1225, "step": 11320 }, { "epoch": 0.3350683148991542, "grad_norm": 0.9785370826721191, "learning_rate": 1.2917430824977562e-06, "loss": 0.0979, "step": 11330 }, { "epoch": 0.33536405039332823, "grad_norm": 2.0487914085388184, "learning_rate": 1.2928832917682108e-06, "loss": 0.0902, "step": 11340 }, { "epoch": 0.3356597858875022, "grad_norm": 1.4335976839065552, "learning_rate": 1.294023501038665e-06, "loss": 0.0886, "step": 11350 }, { "epoch": 0.3359555213816762, "grad_norm": 1.7523746490478516, "learning_rate": 1.2951637103091194e-06, "loss": 0.1224, "step": 11360 }, { "epoch": 0.3362512568758502, "grad_norm": 1.0008922815322876, "learning_rate": 1.2963039195795737e-06, "loss": 0.0947, "step": 11370 }, { "epoch": 0.3365469923700242, "grad_norm": 1.5690432786941528, "learning_rate": 1.2974441288500283e-06, "loss": 0.0967, "step": 11380 }, { "epoch": 0.3368427278641983, "grad_norm": 1.3592265844345093, "learning_rate": 1.2985843381204825e-06, "loss": 0.0961, "step": 11390 }, { "epoch": 0.3371384633583723, "grad_norm": 2.0600943565368652, "learning_rate": 1.2997245473909369e-06, "loss": 0.092, "step": 11400 }, { "epoch": 0.3374341988525463, "grad_norm": 1.360729455947876, "learning_rate": 1.3008647566613912e-06, "loss": 0.1172, "step": 11410 }, { "epoch": 0.3377299343467203, "grad_norm": 2.880571126937866, "learning_rate": 1.3020049659318456e-06, "loss": 0.1081, "step": 11420 }, { "epoch": 0.3380256698408943, "grad_norm": 2.158111095428467, "learning_rate": 1.3031451752023e-06, "loss": 0.1019, "step": 11430 }, { "epoch": 0.3383214053350683, "grad_norm": 2.073127269744873, "learning_rate": 1.3042853844727544e-06, "loss": 0.1061, "step": 11440 }, { "epoch": 0.3386171408292423, "grad_norm": 2.566769599914551, "learning_rate": 1.3054255937432087e-06, "loss": 0.1041, "step": 11450 }, { "epoch": 0.3389128763234163, "grad_norm": 1.208232045173645, "learning_rate": 1.3065658030136631e-06, "loss": 0.1182, "step": 11460 }, { "epoch": 0.3392086118175903, "grad_norm": 2.1531271934509277, "learning_rate": 1.3077060122841175e-06, "loss": 0.118, "step": 11470 }, { "epoch": 0.3395043473117644, "grad_norm": 1.7846883535385132, "learning_rate": 1.3088462215545719e-06, "loss": 0.1138, "step": 11480 }, { "epoch": 0.3398000828059384, "grad_norm": 2.2219135761260986, "learning_rate": 1.3099864308250263e-06, "loss": 0.0997, "step": 11490 }, { "epoch": 0.3400958183001124, "grad_norm": 1.2820582389831543, "learning_rate": 1.3111266400954806e-06, "loss": 0.0858, "step": 11500 }, { "epoch": 0.3403915537942864, "grad_norm": 1.8923957347869873, "learning_rate": 1.3122668493659348e-06, "loss": 0.1322, "step": 11510 }, { "epoch": 0.3406872892884604, "grad_norm": 1.6784636974334717, "learning_rate": 1.3134070586363894e-06, "loss": 0.103, "step": 11520 }, { "epoch": 0.3409830247826344, "grad_norm": 2.121670961380005, "learning_rate": 1.3145472679068438e-06, "loss": 0.1021, "step": 11530 }, { "epoch": 0.3412787602768084, "grad_norm": 1.5902663469314575, "learning_rate": 1.3156874771772981e-06, "loss": 0.1163, "step": 11540 }, { "epoch": 0.3415744957709824, "grad_norm": 0.7183923721313477, "learning_rate": 1.3168276864477523e-06, "loss": 0.0725, "step": 11550 }, { "epoch": 0.3418702312651564, "grad_norm": 1.3338887691497803, "learning_rate": 1.3179678957182067e-06, "loss": 0.1206, "step": 11560 }, { "epoch": 0.3421659667593305, "grad_norm": 1.738667607307434, "learning_rate": 1.3191081049886613e-06, "loss": 0.1167, "step": 11570 }, { "epoch": 0.3424617022535045, "grad_norm": 1.8267208337783813, "learning_rate": 1.3202483142591156e-06, "loss": 0.1027, "step": 11580 }, { "epoch": 0.3427574377476785, "grad_norm": 0.9021258354187012, "learning_rate": 1.3213885235295698e-06, "loss": 0.0926, "step": 11590 }, { "epoch": 0.34305317324185247, "grad_norm": 1.229697823524475, "learning_rate": 1.3225287328000242e-06, "loss": 0.0915, "step": 11600 }, { "epoch": 0.34334890873602647, "grad_norm": 1.5423134565353394, "learning_rate": 1.3236689420704788e-06, "loss": 0.1309, "step": 11610 }, { "epoch": 0.3436446442302005, "grad_norm": 1.1463887691497803, "learning_rate": 1.3248091513409332e-06, "loss": 0.1207, "step": 11620 }, { "epoch": 0.3439403797243745, "grad_norm": 1.4592506885528564, "learning_rate": 1.3259493606113873e-06, "loss": 0.109, "step": 11630 }, { "epoch": 0.3442361152185485, "grad_norm": 1.4505058526992798, "learning_rate": 1.3270895698818417e-06, "loss": 0.1017, "step": 11640 }, { "epoch": 0.3445318507127225, "grad_norm": 3.361612319946289, "learning_rate": 1.328229779152296e-06, "loss": 0.0912, "step": 11650 }, { "epoch": 0.3448275862068966, "grad_norm": 2.325409173965454, "learning_rate": 1.3293699884227507e-06, "loss": 0.1235, "step": 11660 }, { "epoch": 0.3451233217010706, "grad_norm": 2.065865993499756, "learning_rate": 1.3305101976932048e-06, "loss": 0.1225, "step": 11670 }, { "epoch": 0.34541905719524457, "grad_norm": 1.4692537784576416, "learning_rate": 1.3316504069636592e-06, "loss": 0.1152, "step": 11680 }, { "epoch": 0.34571479268941857, "grad_norm": 1.3368390798568726, "learning_rate": 1.3327906162341136e-06, "loss": 0.099, "step": 11690 }, { "epoch": 0.34601052818359257, "grad_norm": 1.3383474349975586, "learning_rate": 1.3339308255045682e-06, "loss": 0.0825, "step": 11700 }, { "epoch": 0.3463062636777666, "grad_norm": 2.9789109230041504, "learning_rate": 1.3350710347750223e-06, "loss": 0.1351, "step": 11710 }, { "epoch": 0.3466019991719406, "grad_norm": 1.5941259860992432, "learning_rate": 1.3362112440454767e-06, "loss": 0.1328, "step": 11720 }, { "epoch": 0.3468977346661146, "grad_norm": 1.534295916557312, "learning_rate": 1.337351453315931e-06, "loss": 0.0986, "step": 11730 }, { "epoch": 0.3471934701602886, "grad_norm": 1.5481916666030884, "learning_rate": 1.3384916625863855e-06, "loss": 0.1052, "step": 11740 }, { "epoch": 0.3474892056544627, "grad_norm": 1.355768084526062, "learning_rate": 1.3396318718568398e-06, "loss": 0.096, "step": 11750 }, { "epoch": 0.3477849411486367, "grad_norm": 2.17297625541687, "learning_rate": 1.3407720811272942e-06, "loss": 0.1039, "step": 11760 }, { "epoch": 0.34808067664281067, "grad_norm": 2.094363212585449, "learning_rate": 1.3419122903977486e-06, "loss": 0.1214, "step": 11770 }, { "epoch": 0.34837641213698467, "grad_norm": 1.471405267715454, "learning_rate": 1.343052499668203e-06, "loss": 0.11, "step": 11780 }, { "epoch": 0.34867214763115867, "grad_norm": 1.6442985534667969, "learning_rate": 1.3441927089386571e-06, "loss": 0.0944, "step": 11790 }, { "epoch": 0.3489678831253327, "grad_norm": 1.4286552667617798, "learning_rate": 1.3453329182091117e-06, "loss": 0.0823, "step": 11800 }, { "epoch": 0.3492636186195067, "grad_norm": 1.9377614259719849, "learning_rate": 1.346473127479566e-06, "loss": 0.1268, "step": 11810 }, { "epoch": 0.3495593541136807, "grad_norm": 2.099898338317871, "learning_rate": 1.3476133367500205e-06, "loss": 0.1076, "step": 11820 }, { "epoch": 0.3498550896078547, "grad_norm": 1.5395199060440063, "learning_rate": 1.3487535460204746e-06, "loss": 0.1099, "step": 11830 }, { "epoch": 0.3501508251020288, "grad_norm": 2.112086534500122, "learning_rate": 1.3498937552909292e-06, "loss": 0.0974, "step": 11840 }, { "epoch": 0.35044656059620277, "grad_norm": 1.8782426118850708, "learning_rate": 1.3510339645613836e-06, "loss": 0.0841, "step": 11850 }, { "epoch": 0.35074229609037677, "grad_norm": 1.4485673904418945, "learning_rate": 1.352174173831838e-06, "loss": 0.1159, "step": 11860 }, { "epoch": 0.35103803158455077, "grad_norm": 1.3743534088134766, "learning_rate": 1.3533143831022921e-06, "loss": 0.1029, "step": 11870 }, { "epoch": 0.35133376707872477, "grad_norm": 1.5516916513442993, "learning_rate": 1.3544545923727465e-06, "loss": 0.1003, "step": 11880 }, { "epoch": 0.3516295025728988, "grad_norm": 1.6876963376998901, "learning_rate": 1.3555948016432011e-06, "loss": 0.1035, "step": 11890 }, { "epoch": 0.3519252380670728, "grad_norm": 1.253421425819397, "learning_rate": 1.3567350109136555e-06, "loss": 0.0918, "step": 11900 }, { "epoch": 0.3522209735612468, "grad_norm": 1.2895500659942627, "learning_rate": 1.3578752201841097e-06, "loss": 0.1354, "step": 11910 }, { "epoch": 0.3525167090554208, "grad_norm": 1.7182518243789673, "learning_rate": 1.359015429454564e-06, "loss": 0.125, "step": 11920 }, { "epoch": 0.3528124445495948, "grad_norm": 2.9903554916381836, "learning_rate": 1.3601556387250186e-06, "loss": 0.1133, "step": 11930 }, { "epoch": 0.35310818004376887, "grad_norm": 1.4871692657470703, "learning_rate": 1.361295847995473e-06, "loss": 0.0966, "step": 11940 }, { "epoch": 0.35340391553794287, "grad_norm": 1.815180778503418, "learning_rate": 1.3624360572659272e-06, "loss": 0.0965, "step": 11950 }, { "epoch": 0.35369965103211687, "grad_norm": 2.086777448654175, "learning_rate": 1.3635762665363815e-06, "loss": 0.1127, "step": 11960 }, { "epoch": 0.35399538652629087, "grad_norm": 2.336700201034546, "learning_rate": 1.364716475806836e-06, "loss": 0.1236, "step": 11970 }, { "epoch": 0.3542911220204649, "grad_norm": 1.5455620288848877, "learning_rate": 1.3658566850772905e-06, "loss": 0.1029, "step": 11980 }, { "epoch": 0.3545868575146389, "grad_norm": 1.71649968624115, "learning_rate": 1.3669968943477447e-06, "loss": 0.0889, "step": 11990 }, { "epoch": 0.3548825930088129, "grad_norm": 1.4748475551605225, "learning_rate": 1.368137103618199e-06, "loss": 0.09, "step": 12000 }, { "epoch": 0.3551783285029869, "grad_norm": 1.3285162448883057, "learning_rate": 1.3692773128886534e-06, "loss": 0.1133, "step": 12010 }, { "epoch": 0.3554740639971609, "grad_norm": 1.178979754447937, "learning_rate": 1.3704175221591078e-06, "loss": 0.1109, "step": 12020 }, { "epoch": 0.35576979949133497, "grad_norm": 1.6259969472885132, "learning_rate": 1.3715577314295622e-06, "loss": 0.1001, "step": 12030 }, { "epoch": 0.35606553498550897, "grad_norm": 1.3605866432189941, "learning_rate": 1.3726979407000166e-06, "loss": 0.1014, "step": 12040 }, { "epoch": 0.35636127047968297, "grad_norm": 1.5865929126739502, "learning_rate": 1.373838149970471e-06, "loss": 0.0898, "step": 12050 }, { "epoch": 0.35665700597385697, "grad_norm": 1.4827756881713867, "learning_rate": 1.3749783592409253e-06, "loss": 0.1035, "step": 12060 }, { "epoch": 0.356952741468031, "grad_norm": 1.2708405256271362, "learning_rate": 1.3761185685113797e-06, "loss": 0.1003, "step": 12070 }, { "epoch": 0.357248476962205, "grad_norm": 1.091397762298584, "learning_rate": 1.377258777781834e-06, "loss": 0.1019, "step": 12080 }, { "epoch": 0.357544212456379, "grad_norm": 1.9852370023727417, "learning_rate": 1.3783989870522884e-06, "loss": 0.0936, "step": 12090 }, { "epoch": 0.357839947950553, "grad_norm": 1.0979633331298828, "learning_rate": 1.3795391963227428e-06, "loss": 0.099, "step": 12100 }, { "epoch": 0.358135683444727, "grad_norm": 1.5026066303253174, "learning_rate": 1.380679405593197e-06, "loss": 0.1194, "step": 12110 }, { "epoch": 0.35843141893890107, "grad_norm": 1.0303860902786255, "learning_rate": 1.3818196148636516e-06, "loss": 0.1172, "step": 12120 }, { "epoch": 0.35872715443307507, "grad_norm": 2.01812481880188, "learning_rate": 1.382959824134106e-06, "loss": 0.1103, "step": 12130 }, { "epoch": 0.35902288992724907, "grad_norm": 2.0484392642974854, "learning_rate": 1.3841000334045603e-06, "loss": 0.0859, "step": 12140 }, { "epoch": 0.35931862542142307, "grad_norm": 2.229269027709961, "learning_rate": 1.3852402426750145e-06, "loss": 0.0845, "step": 12150 }, { "epoch": 0.35961436091559706, "grad_norm": 1.730782151222229, "learning_rate": 1.386380451945469e-06, "loss": 0.1247, "step": 12160 }, { "epoch": 0.3599100964097711, "grad_norm": 1.8088401556015015, "learning_rate": 1.3875206612159235e-06, "loss": 0.1053, "step": 12170 }, { "epoch": 0.3602058319039451, "grad_norm": 1.98612642288208, "learning_rate": 1.3886608704863778e-06, "loss": 0.1111, "step": 12180 }, { "epoch": 0.3605015673981191, "grad_norm": 0.7602404952049255, "learning_rate": 1.389801079756832e-06, "loss": 0.0917, "step": 12190 }, { "epoch": 0.3607973028922931, "grad_norm": 1.8742904663085938, "learning_rate": 1.3909412890272864e-06, "loss": 0.0911, "step": 12200 }, { "epoch": 0.36109303838646717, "grad_norm": 2.065394639968872, "learning_rate": 1.392081498297741e-06, "loss": 0.1238, "step": 12210 }, { "epoch": 0.36138877388064117, "grad_norm": 1.036638855934143, "learning_rate": 1.3932217075681953e-06, "loss": 0.1254, "step": 12220 }, { "epoch": 0.36168450937481517, "grad_norm": 1.5131670236587524, "learning_rate": 1.3943619168386495e-06, "loss": 0.1206, "step": 12230 }, { "epoch": 0.36198024486898916, "grad_norm": 1.652126669883728, "learning_rate": 1.3955021261091039e-06, "loss": 0.1088, "step": 12240 }, { "epoch": 0.36227598036316316, "grad_norm": 0.9727309942245483, "learning_rate": 1.3966423353795583e-06, "loss": 0.0974, "step": 12250 }, { "epoch": 0.3625717158573372, "grad_norm": 1.5098453760147095, "learning_rate": 1.3977825446500128e-06, "loss": 0.1176, "step": 12260 }, { "epoch": 0.3628674513515112, "grad_norm": 2.5783286094665527, "learning_rate": 1.398922753920467e-06, "loss": 0.1041, "step": 12270 }, { "epoch": 0.3631631868456852, "grad_norm": 2.2953431606292725, "learning_rate": 1.4000629631909214e-06, "loss": 0.0939, "step": 12280 }, { "epoch": 0.3634589223398592, "grad_norm": 1.934065818786621, "learning_rate": 1.4012031724613758e-06, "loss": 0.0813, "step": 12290 }, { "epoch": 0.36375465783403327, "grad_norm": 2.1621594429016113, "learning_rate": 1.4023433817318304e-06, "loss": 0.1008, "step": 12300 }, { "epoch": 0.36405039332820727, "grad_norm": 1.7922914028167725, "learning_rate": 1.4034835910022845e-06, "loss": 0.1086, "step": 12310 }, { "epoch": 0.36434612882238127, "grad_norm": 1.852483868598938, "learning_rate": 1.404623800272739e-06, "loss": 0.1043, "step": 12320 }, { "epoch": 0.36464186431655526, "grad_norm": 1.3128174543380737, "learning_rate": 1.4057640095431933e-06, "loss": 0.1009, "step": 12330 }, { "epoch": 0.36493759981072926, "grad_norm": 1.9175066947937012, "learning_rate": 1.4069042188136477e-06, "loss": 0.1096, "step": 12340 }, { "epoch": 0.3652333353049033, "grad_norm": 1.1874995231628418, "learning_rate": 1.408044428084102e-06, "loss": 0.0827, "step": 12350 }, { "epoch": 0.3655290707990773, "grad_norm": 1.6400448083877563, "learning_rate": 1.4091846373545564e-06, "loss": 0.1242, "step": 12360 }, { "epoch": 0.3658248062932513, "grad_norm": 1.6642391681671143, "learning_rate": 1.4103248466250108e-06, "loss": 0.1117, "step": 12370 }, { "epoch": 0.3661205417874253, "grad_norm": 1.817617416381836, "learning_rate": 1.4114650558954652e-06, "loss": 0.1136, "step": 12380 }, { "epoch": 0.3664162772815993, "grad_norm": 1.5214864015579224, "learning_rate": 1.4126052651659195e-06, "loss": 0.0907, "step": 12390 }, { "epoch": 0.36671201277577337, "grad_norm": 2.203009843826294, "learning_rate": 1.413745474436374e-06, "loss": 0.1107, "step": 12400 }, { "epoch": 0.36700774826994736, "grad_norm": 2.5136282444000244, "learning_rate": 1.4148856837068283e-06, "loss": 0.108, "step": 12410 }, { "epoch": 0.36730348376412136, "grad_norm": 2.4251763820648193, "learning_rate": 1.4160258929772827e-06, "loss": 0.1237, "step": 12420 }, { "epoch": 0.36759921925829536, "grad_norm": 1.9139866828918457, "learning_rate": 1.4171661022477368e-06, "loss": 0.1151, "step": 12430 }, { "epoch": 0.3678949547524694, "grad_norm": 2.2078776359558105, "learning_rate": 1.4183063115181914e-06, "loss": 0.1042, "step": 12440 }, { "epoch": 0.3681906902466434, "grad_norm": 1.2823902368545532, "learning_rate": 1.4194465207886458e-06, "loss": 0.0789, "step": 12450 }, { "epoch": 0.3684864257408174, "grad_norm": 1.8546974658966064, "learning_rate": 1.4205867300591002e-06, "loss": 0.1411, "step": 12460 }, { "epoch": 0.3687821612349914, "grad_norm": 2.291934013366699, "learning_rate": 1.4217269393295543e-06, "loss": 0.1034, "step": 12470 }, { "epoch": 0.3690778967291654, "grad_norm": 2.1223514080047607, "learning_rate": 1.4228671486000087e-06, "loss": 0.1199, "step": 12480 }, { "epoch": 0.36937363222333947, "grad_norm": 2.1202869415283203, "learning_rate": 1.4240073578704633e-06, "loss": 0.1148, "step": 12490 }, { "epoch": 0.36966936771751346, "grad_norm": 1.978886365890503, "learning_rate": 1.4251475671409177e-06, "loss": 0.0824, "step": 12500 }, { "epoch": 0.36996510321168746, "grad_norm": 1.5637167692184448, "learning_rate": 1.4262877764113718e-06, "loss": 0.1196, "step": 12510 }, { "epoch": 0.37026083870586146, "grad_norm": 1.699711561203003, "learning_rate": 1.4274279856818262e-06, "loss": 0.1088, "step": 12520 }, { "epoch": 0.3705565742000355, "grad_norm": 1.5002537965774536, "learning_rate": 1.4285681949522808e-06, "loss": 0.1087, "step": 12530 }, { "epoch": 0.3708523096942095, "grad_norm": 1.369324803352356, "learning_rate": 1.4297084042227352e-06, "loss": 0.1106, "step": 12540 }, { "epoch": 0.3711480451883835, "grad_norm": 1.6636607646942139, "learning_rate": 1.4308486134931896e-06, "loss": 0.0978, "step": 12550 }, { "epoch": 0.3714437806825575, "grad_norm": 1.3690334558486938, "learning_rate": 1.4319888227636437e-06, "loss": 0.1118, "step": 12560 }, { "epoch": 0.3717395161767315, "grad_norm": 1.1536108255386353, "learning_rate": 1.4331290320340981e-06, "loss": 0.1173, "step": 12570 }, { "epoch": 0.37203525167090556, "grad_norm": 1.5551649332046509, "learning_rate": 1.4342692413045527e-06, "loss": 0.0947, "step": 12580 }, { "epoch": 0.37233098716507956, "grad_norm": 1.2419120073318481, "learning_rate": 1.435409450575007e-06, "loss": 0.0976, "step": 12590 }, { "epoch": 0.37262672265925356, "grad_norm": 1.0975079536437988, "learning_rate": 1.4365496598454612e-06, "loss": 0.0629, "step": 12600 }, { "epoch": 0.37292245815342756, "grad_norm": 1.4928385019302368, "learning_rate": 1.4376898691159156e-06, "loss": 0.1098, "step": 12610 }, { "epoch": 0.37321819364760156, "grad_norm": 1.2241997718811035, "learning_rate": 1.4388300783863702e-06, "loss": 0.127, "step": 12620 }, { "epoch": 0.3735139291417756, "grad_norm": 1.5819145441055298, "learning_rate": 1.4399702876568246e-06, "loss": 0.1075, "step": 12630 }, { "epoch": 0.3738096646359496, "grad_norm": 2.159015655517578, "learning_rate": 1.4411104969272787e-06, "loss": 0.1156, "step": 12640 }, { "epoch": 0.3741054001301236, "grad_norm": 2.4201817512512207, "learning_rate": 1.4422507061977331e-06, "loss": 0.0856, "step": 12650 }, { "epoch": 0.3744011356242976, "grad_norm": 1.2315857410430908, "learning_rate": 1.4433909154681875e-06, "loss": 0.0911, "step": 12660 }, { "epoch": 0.37469687111847166, "grad_norm": 2.5295283794403076, "learning_rate": 1.444531124738642e-06, "loss": 0.1344, "step": 12670 }, { "epoch": 0.37499260661264566, "grad_norm": 1.6975730657577515, "learning_rate": 1.4456713340090963e-06, "loss": 0.1141, "step": 12680 }, { "epoch": 0.37528834210681966, "grad_norm": 1.735579490661621, "learning_rate": 1.4468115432795506e-06, "loss": 0.0914, "step": 12690 }, { "epoch": 0.37558407760099366, "grad_norm": 1.0339109897613525, "learning_rate": 1.447951752550005e-06, "loss": 0.0869, "step": 12700 }, { "epoch": 0.37587981309516766, "grad_norm": 1.33883798122406, "learning_rate": 1.4490919618204596e-06, "loss": 0.1138, "step": 12710 }, { "epoch": 0.3761755485893417, "grad_norm": 1.4076626300811768, "learning_rate": 1.4502321710909138e-06, "loss": 0.1198, "step": 12720 }, { "epoch": 0.3764712840835157, "grad_norm": 1.191977620124817, "learning_rate": 1.4513723803613681e-06, "loss": 0.0941, "step": 12730 }, { "epoch": 0.3767670195776897, "grad_norm": 1.678308129310608, "learning_rate": 1.4525125896318225e-06, "loss": 0.0839, "step": 12740 }, { "epoch": 0.3770627550718637, "grad_norm": 1.3605263233184814, "learning_rate": 1.453652798902277e-06, "loss": 0.0785, "step": 12750 }, { "epoch": 0.37735849056603776, "grad_norm": 1.7450271844863892, "learning_rate": 1.4547930081727313e-06, "loss": 0.1253, "step": 12760 }, { "epoch": 0.37765422606021176, "grad_norm": 1.2543177604675293, "learning_rate": 1.4559332174431856e-06, "loss": 0.1031, "step": 12770 }, { "epoch": 0.37794996155438576, "grad_norm": 1.5700496435165405, "learning_rate": 1.45707342671364e-06, "loss": 0.1199, "step": 12780 }, { "epoch": 0.37824569704855976, "grad_norm": 1.6304247379302979, "learning_rate": 1.4582136359840944e-06, "loss": 0.0857, "step": 12790 }, { "epoch": 0.37854143254273376, "grad_norm": 0.8108463883399963, "learning_rate": 1.4593538452545486e-06, "loss": 0.084, "step": 12800 }, { "epoch": 0.3788371680369078, "grad_norm": 1.7017366886138916, "learning_rate": 1.4604940545250032e-06, "loss": 0.1193, "step": 12810 }, { "epoch": 0.3791329035310818, "grad_norm": 1.5419031381607056, "learning_rate": 1.4616342637954575e-06, "loss": 0.1023, "step": 12820 }, { "epoch": 0.3794286390252558, "grad_norm": 1.028552532196045, "learning_rate": 1.462774473065912e-06, "loss": 0.1119, "step": 12830 }, { "epoch": 0.3797243745194298, "grad_norm": 1.8032877445220947, "learning_rate": 1.463914682336366e-06, "loss": 0.1089, "step": 12840 }, { "epoch": 0.3800201100136038, "grad_norm": 1.9023140668869019, "learning_rate": 1.4650548916068207e-06, "loss": 0.0972, "step": 12850 }, { "epoch": 0.38031584550777786, "grad_norm": 1.272821068763733, "learning_rate": 1.466195100877275e-06, "loss": 0.1133, "step": 12860 }, { "epoch": 0.38061158100195186, "grad_norm": 1.8760757446289062, "learning_rate": 1.4673353101477294e-06, "loss": 0.1009, "step": 12870 }, { "epoch": 0.38090731649612586, "grad_norm": 0.9508220553398132, "learning_rate": 1.4684755194181836e-06, "loss": 0.0995, "step": 12880 }, { "epoch": 0.38120305199029986, "grad_norm": 1.1866039037704468, "learning_rate": 1.469615728688638e-06, "loss": 0.1044, "step": 12890 }, { "epoch": 0.3814987874844739, "grad_norm": 1.577646255493164, "learning_rate": 1.4707559379590925e-06, "loss": 0.0761, "step": 12900 }, { "epoch": 0.3817945229786479, "grad_norm": 2.1709372997283936, "learning_rate": 1.471896147229547e-06, "loss": 0.0982, "step": 12910 }, { "epoch": 0.3820902584728219, "grad_norm": 1.3810793161392212, "learning_rate": 1.473036356500001e-06, "loss": 0.1088, "step": 12920 }, { "epoch": 0.3823859939669959, "grad_norm": 2.1715235710144043, "learning_rate": 1.4741765657704555e-06, "loss": 0.1066, "step": 12930 }, { "epoch": 0.3826817294611699, "grad_norm": 1.1373926401138306, "learning_rate": 1.47531677504091e-06, "loss": 0.0873, "step": 12940 }, { "epoch": 0.38297746495534396, "grad_norm": 1.0635402202606201, "learning_rate": 1.4764569843113644e-06, "loss": 0.0996, "step": 12950 }, { "epoch": 0.38327320044951796, "grad_norm": 1.5814106464385986, "learning_rate": 1.4775971935818186e-06, "loss": 0.1215, "step": 12960 }, { "epoch": 0.38356893594369196, "grad_norm": 1.8922555446624756, "learning_rate": 1.478737402852273e-06, "loss": 0.1033, "step": 12970 }, { "epoch": 0.38386467143786596, "grad_norm": 1.2915146350860596, "learning_rate": 1.4798776121227274e-06, "loss": 0.1095, "step": 12980 }, { "epoch": 0.38416040693204, "grad_norm": 1.579077959060669, "learning_rate": 1.481017821393182e-06, "loss": 0.115, "step": 12990 }, { "epoch": 0.384456142426214, "grad_norm": 1.0292831659317017, "learning_rate": 1.4821580306636361e-06, "loss": 0.0796, "step": 13000 }, { "epoch": 0.384751877920388, "grad_norm": 1.392585277557373, "learning_rate": 1.4832982399340905e-06, "loss": 0.1397, "step": 13010 }, { "epoch": 0.385047613414562, "grad_norm": 1.8355880975723267, "learning_rate": 1.4844384492045449e-06, "loss": 0.086, "step": 13020 }, { "epoch": 0.385343348908736, "grad_norm": 1.293155550956726, "learning_rate": 1.4855786584749992e-06, "loss": 0.0989, "step": 13030 }, { "epoch": 0.38563908440291006, "grad_norm": 1.5422582626342773, "learning_rate": 1.4867188677454536e-06, "loss": 0.1063, "step": 13040 }, { "epoch": 0.38593481989708406, "grad_norm": 2.3367626667022705, "learning_rate": 1.487859077015908e-06, "loss": 0.0966, "step": 13050 }, { "epoch": 0.38623055539125806, "grad_norm": 1.6612327098846436, "learning_rate": 1.4889992862863624e-06, "loss": 0.1177, "step": 13060 }, { "epoch": 0.38652629088543206, "grad_norm": 1.1564992666244507, "learning_rate": 1.4901394955568167e-06, "loss": 0.1132, "step": 13070 }, { "epoch": 0.38682202637960605, "grad_norm": 1.2398871183395386, "learning_rate": 1.4912797048272711e-06, "loss": 0.1123, "step": 13080 }, { "epoch": 0.3871177618737801, "grad_norm": 0.9295517206192017, "learning_rate": 1.4924199140977255e-06, "loss": 0.0863, "step": 13090 }, { "epoch": 0.3874134973679541, "grad_norm": 2.0547149181365967, "learning_rate": 1.4935601233681799e-06, "loss": 0.1038, "step": 13100 }, { "epoch": 0.3877092328621281, "grad_norm": 1.6825827360153198, "learning_rate": 1.4947003326386343e-06, "loss": 0.1317, "step": 13110 }, { "epoch": 0.3880049683563021, "grad_norm": 1.717735767364502, "learning_rate": 1.4958405419090884e-06, "loss": 0.0998, "step": 13120 }, { "epoch": 0.38830070385047616, "grad_norm": 2.1509339809417725, "learning_rate": 1.496980751179543e-06, "loss": 0.0969, "step": 13130 }, { "epoch": 0.38859643934465016, "grad_norm": 1.9025739431381226, "learning_rate": 1.4981209604499974e-06, "loss": 0.1139, "step": 13140 }, { "epoch": 0.38889217483882416, "grad_norm": 1.557443618774414, "learning_rate": 1.4992611697204518e-06, "loss": 0.0757, "step": 13150 }, { "epoch": 0.38918791033299815, "grad_norm": 1.6475660800933838, "learning_rate": 1.500401378990906e-06, "loss": 0.1411, "step": 13160 }, { "epoch": 0.38948364582717215, "grad_norm": 2.4272713661193848, "learning_rate": 1.5015415882613605e-06, "loss": 0.1096, "step": 13170 }, { "epoch": 0.3897793813213462, "grad_norm": 1.3630542755126953, "learning_rate": 1.5026817975318149e-06, "loss": 0.1188, "step": 13180 }, { "epoch": 0.3900751168155202, "grad_norm": 0.993603527545929, "learning_rate": 1.5038220068022693e-06, "loss": 0.0852, "step": 13190 }, { "epoch": 0.3903708523096942, "grad_norm": 1.3175159692764282, "learning_rate": 1.5049622160727234e-06, "loss": 0.0954, "step": 13200 }, { "epoch": 0.3906665878038682, "grad_norm": 1.1811977624893188, "learning_rate": 1.5061024253431778e-06, "loss": 0.1002, "step": 13210 }, { "epoch": 0.39096232329804226, "grad_norm": 0.939294695854187, "learning_rate": 1.5072426346136324e-06, "loss": 0.1171, "step": 13220 }, { "epoch": 0.39125805879221626, "grad_norm": 1.3760366439819336, "learning_rate": 1.5083828438840868e-06, "loss": 0.0929, "step": 13230 }, { "epoch": 0.39155379428639026, "grad_norm": 1.0482072830200195, "learning_rate": 1.509523053154541e-06, "loss": 0.0858, "step": 13240 }, { "epoch": 0.39184952978056425, "grad_norm": 1.2145919799804688, "learning_rate": 1.5106632624249953e-06, "loss": 0.0874, "step": 13250 }, { "epoch": 0.39214526527473825, "grad_norm": 1.7247191667556763, "learning_rate": 1.5118034716954497e-06, "loss": 0.1302, "step": 13260 }, { "epoch": 0.3924410007689123, "grad_norm": 1.5173006057739258, "learning_rate": 1.5129436809659043e-06, "loss": 0.1257, "step": 13270 }, { "epoch": 0.3927367362630863, "grad_norm": 1.2164056301116943, "learning_rate": 1.5140838902363584e-06, "loss": 0.0995, "step": 13280 }, { "epoch": 0.3930324717572603, "grad_norm": 2.2243616580963135, "learning_rate": 1.5152240995068128e-06, "loss": 0.1141, "step": 13290 }, { "epoch": 0.3933282072514343, "grad_norm": 1.2759122848510742, "learning_rate": 1.5163643087772672e-06, "loss": 0.1048, "step": 13300 }, { "epoch": 0.3936239427456083, "grad_norm": 1.3730535507202148, "learning_rate": 1.5175045180477218e-06, "loss": 0.1133, "step": 13310 }, { "epoch": 0.39391967823978236, "grad_norm": 1.122879147529602, "learning_rate": 1.518644727318176e-06, "loss": 0.0966, "step": 13320 }, { "epoch": 0.39421541373395635, "grad_norm": 1.6614294052124023, "learning_rate": 1.5197849365886303e-06, "loss": 0.1015, "step": 13330 }, { "epoch": 0.39451114922813035, "grad_norm": 3.0245678424835205, "learning_rate": 1.5209251458590847e-06, "loss": 0.0916, "step": 13340 }, { "epoch": 0.39480688472230435, "grad_norm": 2.3637170791625977, "learning_rate": 1.522065355129539e-06, "loss": 0.0819, "step": 13350 }, { "epoch": 0.3951026202164784, "grad_norm": 1.6741734743118286, "learning_rate": 1.5232055643999935e-06, "loss": 0.1099, "step": 13360 }, { "epoch": 0.3953983557106524, "grad_norm": 2.1118433475494385, "learning_rate": 1.5243457736704478e-06, "loss": 0.1263, "step": 13370 }, { "epoch": 0.3956940912048264, "grad_norm": 1.1544424295425415, "learning_rate": 1.5254859829409022e-06, "loss": 0.0853, "step": 13380 }, { "epoch": 0.3959898266990004, "grad_norm": 2.204185962677002, "learning_rate": 1.5266261922113566e-06, "loss": 0.0827, "step": 13390 }, { "epoch": 0.3962855621931744, "grad_norm": 2.0394859313964844, "learning_rate": 1.527766401481811e-06, "loss": 0.0922, "step": 13400 }, { "epoch": 0.39658129768734846, "grad_norm": 1.9645936489105225, "learning_rate": 1.5289066107522653e-06, "loss": 0.106, "step": 13410 }, { "epoch": 0.39687703318152245, "grad_norm": 2.2026917934417725, "learning_rate": 1.5300468200227197e-06, "loss": 0.1029, "step": 13420 }, { "epoch": 0.39717276867569645, "grad_norm": 1.8892661333084106, "learning_rate": 1.531187029293174e-06, "loss": 0.1102, "step": 13430 }, { "epoch": 0.39746850416987045, "grad_norm": 1.25981867313385, "learning_rate": 1.5323272385636283e-06, "loss": 0.0772, "step": 13440 }, { "epoch": 0.3977642396640445, "grad_norm": 2.470813035964966, "learning_rate": 1.5334674478340829e-06, "loss": 0.0776, "step": 13450 }, { "epoch": 0.3980599751582185, "grad_norm": 2.2705843448638916, "learning_rate": 1.5346076571045372e-06, "loss": 0.1279, "step": 13460 }, { "epoch": 0.3983557106523925, "grad_norm": 1.2070764303207397, "learning_rate": 1.5357478663749916e-06, "loss": 0.0966, "step": 13470 }, { "epoch": 0.3986514461465665, "grad_norm": 1.590222716331482, "learning_rate": 1.5368880756454458e-06, "loss": 0.1061, "step": 13480 }, { "epoch": 0.3989471816407405, "grad_norm": 1.041343331336975, "learning_rate": 1.5380282849159002e-06, "loss": 0.0877, "step": 13490 }, { "epoch": 0.39924291713491455, "grad_norm": 1.4600833654403687, "learning_rate": 1.5391684941863547e-06, "loss": 0.0982, "step": 13500 }, { "epoch": 0.39953865262908855, "grad_norm": 2.057201385498047, "learning_rate": 1.5403087034568091e-06, "loss": 0.1348, "step": 13510 }, { "epoch": 0.39983438812326255, "grad_norm": 1.189174771308899, "learning_rate": 1.5414489127272633e-06, "loss": 0.0949, "step": 13520 }, { "epoch": 0.40013012361743655, "grad_norm": 1.5060405731201172, "learning_rate": 1.5425891219977177e-06, "loss": 0.0955, "step": 13530 }, { "epoch": 0.40042585911161055, "grad_norm": 1.0203219652175903, "learning_rate": 1.5437293312681722e-06, "loss": 0.0918, "step": 13540 }, { "epoch": 0.4007215946057846, "grad_norm": 2.8973941802978516, "learning_rate": 1.5448695405386266e-06, "loss": 0.0907, "step": 13550 }, { "epoch": 0.4010173300999586, "grad_norm": 1.8374208211898804, "learning_rate": 1.5460097498090808e-06, "loss": 0.1143, "step": 13560 }, { "epoch": 0.4013130655941326, "grad_norm": 1.2620447874069214, "learning_rate": 1.5471499590795352e-06, "loss": 0.1079, "step": 13570 }, { "epoch": 0.4016088010883066, "grad_norm": 1.4617714881896973, "learning_rate": 1.5482901683499895e-06, "loss": 0.0977, "step": 13580 }, { "epoch": 0.40190453658248065, "grad_norm": 2.0164997577667236, "learning_rate": 1.5494303776204441e-06, "loss": 0.1121, "step": 13590 }, { "epoch": 0.40220027207665465, "grad_norm": 1.7049673795700073, "learning_rate": 1.5505705868908983e-06, "loss": 0.0754, "step": 13600 }, { "epoch": 0.40249600757082865, "grad_norm": 1.8990411758422852, "learning_rate": 1.5517107961613527e-06, "loss": 0.123, "step": 13610 }, { "epoch": 0.40279174306500265, "grad_norm": 2.5177459716796875, "learning_rate": 1.552851005431807e-06, "loss": 0.1093, "step": 13620 }, { "epoch": 0.40308747855917665, "grad_norm": 1.190343976020813, "learning_rate": 1.5539912147022616e-06, "loss": 0.0986, "step": 13630 }, { "epoch": 0.4033832140533507, "grad_norm": 2.192295551300049, "learning_rate": 1.5551314239727158e-06, "loss": 0.111, "step": 13640 }, { "epoch": 0.4036789495475247, "grad_norm": 2.061594247817993, "learning_rate": 1.5562716332431702e-06, "loss": 0.0792, "step": 13650 }, { "epoch": 0.4039746850416987, "grad_norm": 1.131645679473877, "learning_rate": 1.5574118425136246e-06, "loss": 0.1144, "step": 13660 }, { "epoch": 0.4042704205358727, "grad_norm": 2.2125415802001953, "learning_rate": 1.558552051784079e-06, "loss": 0.1093, "step": 13670 }, { "epoch": 0.40456615603004675, "grad_norm": 1.4044747352600098, "learning_rate": 1.5596922610545333e-06, "loss": 0.1045, "step": 13680 }, { "epoch": 0.40486189152422075, "grad_norm": 1.2431819438934326, "learning_rate": 1.5608324703249877e-06, "loss": 0.0888, "step": 13690 }, { "epoch": 0.40515762701839475, "grad_norm": 2.0623795986175537, "learning_rate": 1.561972679595442e-06, "loss": 0.0758, "step": 13700 }, { "epoch": 0.40545336251256875, "grad_norm": 2.3771204948425293, "learning_rate": 1.5631128888658964e-06, "loss": 0.1136, "step": 13710 }, { "epoch": 0.40574909800674275, "grad_norm": 1.113709807395935, "learning_rate": 1.5642530981363506e-06, "loss": 0.1065, "step": 13720 }, { "epoch": 0.4060448335009168, "grad_norm": 1.3725769519805908, "learning_rate": 1.5653933074068052e-06, "loss": 0.0912, "step": 13730 }, { "epoch": 0.4063405689950908, "grad_norm": 1.6192538738250732, "learning_rate": 1.5665335166772596e-06, "loss": 0.0889, "step": 13740 }, { "epoch": 0.4066363044892648, "grad_norm": 2.1623241901397705, "learning_rate": 1.567673725947714e-06, "loss": 0.102, "step": 13750 }, { "epoch": 0.4069320399834388, "grad_norm": 2.0738773345947266, "learning_rate": 1.5688139352181681e-06, "loss": 0.133, "step": 13760 }, { "epoch": 0.4072277754776128, "grad_norm": 1.6916600465774536, "learning_rate": 1.5699541444886227e-06, "loss": 0.1106, "step": 13770 }, { "epoch": 0.40752351097178685, "grad_norm": 1.6868699789047241, "learning_rate": 1.571094353759077e-06, "loss": 0.1031, "step": 13780 }, { "epoch": 0.40781924646596085, "grad_norm": 1.5066415071487427, "learning_rate": 1.5722345630295315e-06, "loss": 0.0989, "step": 13790 }, { "epoch": 0.40811498196013485, "grad_norm": 2.2494468688964844, "learning_rate": 1.5733747722999856e-06, "loss": 0.0821, "step": 13800 }, { "epoch": 0.40841071745430885, "grad_norm": 1.7937886714935303, "learning_rate": 1.57451498157044e-06, "loss": 0.1022, "step": 13810 }, { "epoch": 0.4087064529484829, "grad_norm": 1.5324221849441528, "learning_rate": 1.5756551908408946e-06, "loss": 0.109, "step": 13820 }, { "epoch": 0.4090021884426569, "grad_norm": 1.472076416015625, "learning_rate": 1.576795400111349e-06, "loss": 0.1137, "step": 13830 }, { "epoch": 0.4092979239368309, "grad_norm": 1.150400996208191, "learning_rate": 1.5779356093818033e-06, "loss": 0.0734, "step": 13840 }, { "epoch": 0.4095936594310049, "grad_norm": 1.685591459274292, "learning_rate": 1.5790758186522575e-06, "loss": 0.1092, "step": 13850 }, { "epoch": 0.4098893949251789, "grad_norm": 1.3106259107589722, "learning_rate": 1.580216027922712e-06, "loss": 0.1018, "step": 13860 }, { "epoch": 0.41018513041935295, "grad_norm": 1.8495272397994995, "learning_rate": 1.5813562371931665e-06, "loss": 0.1109, "step": 13870 }, { "epoch": 0.41048086591352695, "grad_norm": 1.2985358238220215, "learning_rate": 1.5824964464636209e-06, "loss": 0.1, "step": 13880 }, { "epoch": 0.41077660140770095, "grad_norm": 0.8864536881446838, "learning_rate": 1.583636655734075e-06, "loss": 0.0812, "step": 13890 }, { "epoch": 0.41107233690187495, "grad_norm": 1.1126302480697632, "learning_rate": 1.5847768650045294e-06, "loss": 0.0826, "step": 13900 }, { "epoch": 0.411368072396049, "grad_norm": 1.3102459907531738, "learning_rate": 1.585917074274984e-06, "loss": 0.1078, "step": 13910 }, { "epoch": 0.411663807890223, "grad_norm": 1.6247971057891846, "learning_rate": 1.5870572835454384e-06, "loss": 0.1284, "step": 13920 }, { "epoch": 0.411959543384397, "grad_norm": 1.6600250005722046, "learning_rate": 1.5881974928158925e-06, "loss": 0.0963, "step": 13930 }, { "epoch": 0.412255278878571, "grad_norm": 2.230142831802368, "learning_rate": 1.589337702086347e-06, "loss": 0.1029, "step": 13940 }, { "epoch": 0.412551014372745, "grad_norm": 3.0943098068237305, "learning_rate": 1.5904779113568013e-06, "loss": 0.0886, "step": 13950 }, { "epoch": 0.41284674986691905, "grad_norm": 1.43401300907135, "learning_rate": 1.5916181206272559e-06, "loss": 0.1029, "step": 13960 }, { "epoch": 0.41314248536109305, "grad_norm": 2.1934711933135986, "learning_rate": 1.59275832989771e-06, "loss": 0.1052, "step": 13970 }, { "epoch": 0.41343822085526705, "grad_norm": 1.4150151014328003, "learning_rate": 1.5938985391681644e-06, "loss": 0.1031, "step": 13980 }, { "epoch": 0.41373395634944105, "grad_norm": 2.074497699737549, "learning_rate": 1.5950387484386188e-06, "loss": 0.1003, "step": 13990 }, { "epoch": 0.41402969184361504, "grad_norm": 2.1599743366241455, "learning_rate": 1.5961789577090734e-06, "loss": 0.0848, "step": 14000 }, { "epoch": 0.4143254273377891, "grad_norm": 1.2400720119476318, "learning_rate": 1.5973191669795275e-06, "loss": 0.1011, "step": 14010 }, { "epoch": 0.4146211628319631, "grad_norm": 1.7444519996643066, "learning_rate": 1.598459376249982e-06, "loss": 0.1141, "step": 14020 }, { "epoch": 0.4149168983261371, "grad_norm": 1.8643590211868286, "learning_rate": 1.5995995855204363e-06, "loss": 0.0925, "step": 14030 }, { "epoch": 0.4152126338203111, "grad_norm": 1.2144221067428589, "learning_rate": 1.6007397947908907e-06, "loss": 0.0861, "step": 14040 }, { "epoch": 0.41550836931448515, "grad_norm": 1.337630033493042, "learning_rate": 1.601880004061345e-06, "loss": 0.0813, "step": 14050 }, { "epoch": 0.41580410480865915, "grad_norm": 0.9917430281639099, "learning_rate": 1.6030202133317994e-06, "loss": 0.107, "step": 14060 }, { "epoch": 0.41609984030283315, "grad_norm": 2.106448173522949, "learning_rate": 1.6041604226022538e-06, "loss": 0.1191, "step": 14070 }, { "epoch": 0.41639557579700714, "grad_norm": 1.6323187351226807, "learning_rate": 1.6053006318727082e-06, "loss": 0.0984, "step": 14080 }, { "epoch": 0.41669131129118114, "grad_norm": 1.4990512132644653, "learning_rate": 1.6064408411431626e-06, "loss": 0.0935, "step": 14090 }, { "epoch": 0.4169870467853552, "grad_norm": 2.4445250034332275, "learning_rate": 1.607581050413617e-06, "loss": 0.0859, "step": 14100 }, { "epoch": 0.4172827822795292, "grad_norm": 0.9845823049545288, "learning_rate": 1.6087212596840713e-06, "loss": 0.0885, "step": 14110 }, { "epoch": 0.4175785177737032, "grad_norm": 1.2675386667251587, "learning_rate": 1.6098614689545257e-06, "loss": 0.1125, "step": 14120 }, { "epoch": 0.4178742532678772, "grad_norm": 2.255749225616455, "learning_rate": 1.6110016782249799e-06, "loss": 0.0998, "step": 14130 }, { "epoch": 0.41816998876205125, "grad_norm": 1.844573736190796, "learning_rate": 1.6121418874954344e-06, "loss": 0.0935, "step": 14140 }, { "epoch": 0.41846572425622525, "grad_norm": 1.5247774124145508, "learning_rate": 1.6132820967658888e-06, "loss": 0.0812, "step": 14150 }, { "epoch": 0.41876145975039925, "grad_norm": 2.3174569606781006, "learning_rate": 1.6144223060363432e-06, "loss": 0.1064, "step": 14160 }, { "epoch": 0.41905719524457324, "grad_norm": 2.1457502841949463, "learning_rate": 1.6155625153067974e-06, "loss": 0.0973, "step": 14170 }, { "epoch": 0.41935293073874724, "grad_norm": 1.3715119361877441, "learning_rate": 1.6167027245772517e-06, "loss": 0.1172, "step": 14180 }, { "epoch": 0.4196486662329213, "grad_norm": 2.111337184906006, "learning_rate": 1.6178429338477063e-06, "loss": 0.1232, "step": 14190 }, { "epoch": 0.4199444017270953, "grad_norm": 1.493640422821045, "learning_rate": 1.6189831431181607e-06, "loss": 0.0913, "step": 14200 }, { "epoch": 0.4202401372212693, "grad_norm": 2.8751556873321533, "learning_rate": 1.6201233523886149e-06, "loss": 0.1248, "step": 14210 }, { "epoch": 0.4205358727154433, "grad_norm": 1.6757539510726929, "learning_rate": 1.6212635616590692e-06, "loss": 0.0957, "step": 14220 }, { "epoch": 0.4208316082096173, "grad_norm": 1.2722035646438599, "learning_rate": 1.6224037709295238e-06, "loss": 0.1205, "step": 14230 }, { "epoch": 0.42112734370379135, "grad_norm": 1.8828150033950806, "learning_rate": 1.6235439801999782e-06, "loss": 0.1022, "step": 14240 }, { "epoch": 0.42142307919796534, "grad_norm": 1.721077561378479, "learning_rate": 1.6246841894704324e-06, "loss": 0.0885, "step": 14250 }, { "epoch": 0.42171881469213934, "grad_norm": 2.1132397651672363, "learning_rate": 1.6258243987408868e-06, "loss": 0.125, "step": 14260 }, { "epoch": 0.42201455018631334, "grad_norm": 1.7674754858016968, "learning_rate": 1.6269646080113411e-06, "loss": 0.1093, "step": 14270 }, { "epoch": 0.4223102856804874, "grad_norm": 1.2787511348724365, "learning_rate": 1.6281048172817957e-06, "loss": 0.1188, "step": 14280 }, { "epoch": 0.4226060211746614, "grad_norm": 1.4817914962768555, "learning_rate": 1.6292450265522499e-06, "loss": 0.1003, "step": 14290 }, { "epoch": 0.4229017566688354, "grad_norm": 1.000990867614746, "learning_rate": 1.6303852358227043e-06, "loss": 0.0774, "step": 14300 }, { "epoch": 0.4231974921630094, "grad_norm": 1.7811769247055054, "learning_rate": 1.6315254450931586e-06, "loss": 0.093, "step": 14310 }, { "epoch": 0.4234932276571834, "grad_norm": 1.681936264038086, "learning_rate": 1.6326656543636132e-06, "loss": 0.0939, "step": 14320 }, { "epoch": 0.42378896315135745, "grad_norm": 1.4080016613006592, "learning_rate": 1.6338058636340674e-06, "loss": 0.1239, "step": 14330 }, { "epoch": 0.42408469864553144, "grad_norm": 1.516874074935913, "learning_rate": 1.6349460729045218e-06, "loss": 0.0968, "step": 14340 }, { "epoch": 0.42438043413970544, "grad_norm": 2.257693290710449, "learning_rate": 1.6360862821749761e-06, "loss": 0.0896, "step": 14350 }, { "epoch": 0.42467616963387944, "grad_norm": 1.1637177467346191, "learning_rate": 1.6372264914454305e-06, "loss": 0.1136, "step": 14360 }, { "epoch": 0.4249719051280535, "grad_norm": 1.4871673583984375, "learning_rate": 1.638366700715885e-06, "loss": 0.1282, "step": 14370 }, { "epoch": 0.4252676406222275, "grad_norm": 1.624105453491211, "learning_rate": 1.6395069099863393e-06, "loss": 0.1099, "step": 14380 }, { "epoch": 0.4255633761164015, "grad_norm": 0.9852378964424133, "learning_rate": 1.6406471192567937e-06, "loss": 0.0906, "step": 14390 }, { "epoch": 0.4258591116105755, "grad_norm": 1.8646639585494995, "learning_rate": 1.641787328527248e-06, "loss": 0.0878, "step": 14400 }, { "epoch": 0.4261548471047495, "grad_norm": 1.720249056816101, "learning_rate": 1.6429275377977022e-06, "loss": 0.1225, "step": 14410 }, { "epoch": 0.42645058259892354, "grad_norm": 1.3318713903427124, "learning_rate": 1.6440677470681568e-06, "loss": 0.1119, "step": 14420 }, { "epoch": 0.42674631809309754, "grad_norm": 1.196704626083374, "learning_rate": 1.6452079563386112e-06, "loss": 0.1028, "step": 14430 }, { "epoch": 0.42704205358727154, "grad_norm": 1.0238131284713745, "learning_rate": 1.6463481656090655e-06, "loss": 0.0809, "step": 14440 }, { "epoch": 0.42733778908144554, "grad_norm": 2.8873324394226074, "learning_rate": 1.6474883748795197e-06, "loss": 0.0908, "step": 14450 }, { "epoch": 0.42763352457561954, "grad_norm": 1.7770217657089233, "learning_rate": 1.6486285841499743e-06, "loss": 0.1233, "step": 14460 }, { "epoch": 0.4279292600697936, "grad_norm": 1.853381872177124, "learning_rate": 1.6497687934204287e-06, "loss": 0.1003, "step": 14470 }, { "epoch": 0.4282249955639676, "grad_norm": 3.062204360961914, "learning_rate": 1.650909002690883e-06, "loss": 0.1103, "step": 14480 }, { "epoch": 0.4285207310581416, "grad_norm": 1.6033318042755127, "learning_rate": 1.6520492119613372e-06, "loss": 0.1019, "step": 14490 }, { "epoch": 0.4288164665523156, "grad_norm": 1.6484323740005493, "learning_rate": 1.6531894212317916e-06, "loss": 0.0673, "step": 14500 }, { "epoch": 0.42911220204648964, "grad_norm": 1.7793748378753662, "learning_rate": 1.6543296305022462e-06, "loss": 0.1159, "step": 14510 }, { "epoch": 0.42940793754066364, "grad_norm": 1.3361667394638062, "learning_rate": 1.6554698397727006e-06, "loss": 0.1016, "step": 14520 }, { "epoch": 0.42970367303483764, "grad_norm": 3.413017988204956, "learning_rate": 1.6566100490431547e-06, "loss": 0.0986, "step": 14530 }, { "epoch": 0.42999940852901164, "grad_norm": 1.9667028188705444, "learning_rate": 1.657750258313609e-06, "loss": 0.1006, "step": 14540 }, { "epoch": 0.43029514402318564, "grad_norm": 1.8594187498092651, "learning_rate": 1.6588904675840637e-06, "loss": 0.0965, "step": 14550 }, { "epoch": 0.4305908795173597, "grad_norm": 1.581986904144287, "learning_rate": 1.660030676854518e-06, "loss": 0.1167, "step": 14560 }, { "epoch": 0.4308866150115337, "grad_norm": 1.4040088653564453, "learning_rate": 1.6611708861249722e-06, "loss": 0.0976, "step": 14570 }, { "epoch": 0.4311823505057077, "grad_norm": 2.4150147438049316, "learning_rate": 1.6623110953954266e-06, "loss": 0.0936, "step": 14580 }, { "epoch": 0.4314780859998817, "grad_norm": 2.162872791290283, "learning_rate": 1.663451304665881e-06, "loss": 0.1027, "step": 14590 }, { "epoch": 0.43177382149405574, "grad_norm": 0.6914207339286804, "learning_rate": 1.6645915139363356e-06, "loss": 0.077, "step": 14600 }, { "epoch": 0.43206955698822974, "grad_norm": 1.0457942485809326, "learning_rate": 1.6657317232067897e-06, "loss": 0.1126, "step": 14610 }, { "epoch": 0.43236529248240374, "grad_norm": 1.196035385131836, "learning_rate": 1.6668719324772441e-06, "loss": 0.0991, "step": 14620 }, { "epoch": 0.43266102797657774, "grad_norm": 1.5997215509414673, "learning_rate": 1.6680121417476985e-06, "loss": 0.0926, "step": 14630 }, { "epoch": 0.43295676347075174, "grad_norm": 2.166996479034424, "learning_rate": 1.6691523510181529e-06, "loss": 0.1076, "step": 14640 }, { "epoch": 0.4332524989649258, "grad_norm": 1.5552330017089844, "learning_rate": 1.6702925602886072e-06, "loss": 0.0793, "step": 14650 }, { "epoch": 0.4335482344590998, "grad_norm": 1.4806280136108398, "learning_rate": 1.6714327695590616e-06, "loss": 0.1087, "step": 14660 }, { "epoch": 0.4338439699532738, "grad_norm": 1.3605080842971802, "learning_rate": 1.672572978829516e-06, "loss": 0.1177, "step": 14670 }, { "epoch": 0.4341397054474478, "grad_norm": 1.5266697406768799, "learning_rate": 1.6737131880999704e-06, "loss": 0.1102, "step": 14680 }, { "epoch": 0.4344354409416218, "grad_norm": 1.124599814414978, "learning_rate": 1.6748533973704247e-06, "loss": 0.1008, "step": 14690 }, { "epoch": 0.43473117643579584, "grad_norm": 1.275067687034607, "learning_rate": 1.6759936066408791e-06, "loss": 0.079, "step": 14700 }, { "epoch": 0.43502691192996984, "grad_norm": 1.448210597038269, "learning_rate": 1.6771338159113335e-06, "loss": 0.1199, "step": 14710 }, { "epoch": 0.43532264742414384, "grad_norm": 1.1665111780166626, "learning_rate": 1.6782740251817879e-06, "loss": 0.1034, "step": 14720 }, { "epoch": 0.43561838291831784, "grad_norm": 1.470158338546753, "learning_rate": 1.679414234452242e-06, "loss": 0.107, "step": 14730 }, { "epoch": 0.4359141184124919, "grad_norm": 1.3410892486572266, "learning_rate": 1.6805544437226966e-06, "loss": 0.088, "step": 14740 }, { "epoch": 0.4362098539066659, "grad_norm": 1.3185433149337769, "learning_rate": 1.681694652993151e-06, "loss": 0.0869, "step": 14750 }, { "epoch": 0.4365055894008399, "grad_norm": 1.4088330268859863, "learning_rate": 1.6828348622636054e-06, "loss": 0.1187, "step": 14760 }, { "epoch": 0.4368013248950139, "grad_norm": 1.3496302366256714, "learning_rate": 1.6839750715340595e-06, "loss": 0.1005, "step": 14770 }, { "epoch": 0.4370970603891879, "grad_norm": 1.1090009212493896, "learning_rate": 1.6851152808045141e-06, "loss": 0.0985, "step": 14780 }, { "epoch": 0.43739279588336194, "grad_norm": 1.3477363586425781, "learning_rate": 1.6862554900749685e-06, "loss": 0.095, "step": 14790 }, { "epoch": 0.43768853137753594, "grad_norm": 1.2743030786514282, "learning_rate": 1.6873956993454229e-06, "loss": 0.0902, "step": 14800 }, { "epoch": 0.43798426687170994, "grad_norm": 1.7974388599395752, "learning_rate": 1.688535908615877e-06, "loss": 0.1328, "step": 14810 }, { "epoch": 0.43828000236588394, "grad_norm": 1.982537031173706, "learning_rate": 1.6896761178863314e-06, "loss": 0.0956, "step": 14820 }, { "epoch": 0.438575737860058, "grad_norm": 1.6742899417877197, "learning_rate": 1.690816327156786e-06, "loss": 0.0996, "step": 14830 }, { "epoch": 0.438871473354232, "grad_norm": 1.72418212890625, "learning_rate": 1.6919565364272404e-06, "loss": 0.1071, "step": 14840 }, { "epoch": 0.439167208848406, "grad_norm": 1.599827766418457, "learning_rate": 1.6930967456976946e-06, "loss": 0.1128, "step": 14850 }, { "epoch": 0.43946294434258, "grad_norm": 1.8215532302856445, "learning_rate": 1.694236954968149e-06, "loss": 0.1183, "step": 14860 }, { "epoch": 0.439758679836754, "grad_norm": 0.9032446146011353, "learning_rate": 1.6953771642386033e-06, "loss": 0.1073, "step": 14870 }, { "epoch": 0.44005441533092804, "grad_norm": 1.1105458736419678, "learning_rate": 1.696517373509058e-06, "loss": 0.1066, "step": 14880 }, { "epoch": 0.44035015082510204, "grad_norm": 1.3359798192977905, "learning_rate": 1.697657582779512e-06, "loss": 0.1002, "step": 14890 }, { "epoch": 0.44064588631927604, "grad_norm": 1.2270311117172241, "learning_rate": 1.6987977920499664e-06, "loss": 0.0885, "step": 14900 }, { "epoch": 0.44094162181345004, "grad_norm": 1.199445128440857, "learning_rate": 1.6999380013204208e-06, "loss": 0.1046, "step": 14910 }, { "epoch": 0.44123735730762403, "grad_norm": 1.4864622354507446, "learning_rate": 1.7010782105908754e-06, "loss": 0.1128, "step": 14920 }, { "epoch": 0.4415330928017981, "grad_norm": 1.2270127534866333, "learning_rate": 1.7022184198613296e-06, "loss": 0.0925, "step": 14930 }, { "epoch": 0.4418288282959721, "grad_norm": 1.178824543952942, "learning_rate": 1.703358629131784e-06, "loss": 0.078, "step": 14940 }, { "epoch": 0.4421245637901461, "grad_norm": 1.5670989751815796, "learning_rate": 1.7044988384022383e-06, "loss": 0.0981, "step": 14950 }, { "epoch": 0.4424202992843201, "grad_norm": 1.4483826160430908, "learning_rate": 1.7056390476726927e-06, "loss": 0.1306, "step": 14960 }, { "epoch": 0.44271603477849414, "grad_norm": 1.962291955947876, "learning_rate": 1.706779256943147e-06, "loss": 0.1062, "step": 14970 }, { "epoch": 0.44301177027266814, "grad_norm": 1.6901311874389648, "learning_rate": 1.7079194662136015e-06, "loss": 0.0971, "step": 14980 }, { "epoch": 0.44330750576684214, "grad_norm": 2.5116708278656006, "learning_rate": 1.7090596754840558e-06, "loss": 0.1031, "step": 14990 }, { "epoch": 0.44360324126101613, "grad_norm": 1.3656331300735474, "learning_rate": 1.7101998847545102e-06, "loss": 0.0754, "step": 15000 }, { "epoch": 0.44389897675519013, "grad_norm": 1.2453559637069702, "learning_rate": 1.7113400940249646e-06, "loss": 0.0985, "step": 15010 }, { "epoch": 0.4441947122493642, "grad_norm": 0.9088007807731628, "learning_rate": 1.712480303295419e-06, "loss": 0.0936, "step": 15020 }, { "epoch": 0.4444904477435382, "grad_norm": 1.3231422901153564, "learning_rate": 1.7136205125658733e-06, "loss": 0.1034, "step": 15030 }, { "epoch": 0.4447861832377122, "grad_norm": 1.576674222946167, "learning_rate": 1.7147607218363277e-06, "loss": 0.0966, "step": 15040 }, { "epoch": 0.4450819187318862, "grad_norm": 0.8742958903312683, "learning_rate": 1.7159009311067819e-06, "loss": 0.0695, "step": 15050 }, { "epoch": 0.44537765422606024, "grad_norm": 1.3332836627960205, "learning_rate": 1.7170411403772365e-06, "loss": 0.116, "step": 15060 }, { "epoch": 0.44567338972023424, "grad_norm": 0.9583203792572021, "learning_rate": 1.7181813496476909e-06, "loss": 0.0998, "step": 15070 }, { "epoch": 0.44596912521440824, "grad_norm": 1.6240043640136719, "learning_rate": 1.7193215589181452e-06, "loss": 0.1047, "step": 15080 }, { "epoch": 0.44626486070858223, "grad_norm": 1.0613763332366943, "learning_rate": 1.7204617681885994e-06, "loss": 0.0934, "step": 15090 }, { "epoch": 0.44656059620275623, "grad_norm": 2.763092041015625, "learning_rate": 1.7216019774590538e-06, "loss": 0.089, "step": 15100 }, { "epoch": 0.4468563316969303, "grad_norm": 1.3185728788375854, "learning_rate": 1.7227421867295084e-06, "loss": 0.1001, "step": 15110 }, { "epoch": 0.4471520671911043, "grad_norm": 1.0814741849899292, "learning_rate": 1.7238823959999627e-06, "loss": 0.0945, "step": 15120 }, { "epoch": 0.4474478026852783, "grad_norm": 1.8654561042785645, "learning_rate": 1.7250226052704171e-06, "loss": 0.1107, "step": 15130 }, { "epoch": 0.4477435381794523, "grad_norm": 2.3552112579345703, "learning_rate": 1.7261628145408713e-06, "loss": 0.0948, "step": 15140 }, { "epoch": 0.4480392736736263, "grad_norm": 1.995031714439392, "learning_rate": 1.7273030238113259e-06, "loss": 0.0917, "step": 15150 }, { "epoch": 0.44833500916780034, "grad_norm": 1.6683849096298218, "learning_rate": 1.7284432330817802e-06, "loss": 0.1152, "step": 15160 }, { "epoch": 0.44863074466197433, "grad_norm": 1.8350714445114136, "learning_rate": 1.7295834423522346e-06, "loss": 0.1034, "step": 15170 }, { "epoch": 0.44892648015614833, "grad_norm": 3.0051324367523193, "learning_rate": 1.7307236516226888e-06, "loss": 0.1061, "step": 15180 }, { "epoch": 0.44922221565032233, "grad_norm": 1.7614130973815918, "learning_rate": 1.7318638608931432e-06, "loss": 0.0979, "step": 15190 }, { "epoch": 0.4495179511444964, "grad_norm": 1.5130215883255005, "learning_rate": 1.7330040701635978e-06, "loss": 0.0835, "step": 15200 }, { "epoch": 0.4498136866386704, "grad_norm": 1.895626187324524, "learning_rate": 1.7341442794340521e-06, "loss": 0.1276, "step": 15210 }, { "epoch": 0.4501094221328444, "grad_norm": 2.2441701889038086, "learning_rate": 1.7352844887045063e-06, "loss": 0.1202, "step": 15220 }, { "epoch": 0.4504051576270184, "grad_norm": 1.495266079902649, "learning_rate": 1.7364246979749607e-06, "loss": 0.1073, "step": 15230 }, { "epoch": 0.4507008931211924, "grad_norm": 1.052330493927002, "learning_rate": 1.7375649072454153e-06, "loss": 0.087, "step": 15240 }, { "epoch": 0.45099662861536643, "grad_norm": 1.248314380645752, "learning_rate": 1.7387051165158696e-06, "loss": 0.0826, "step": 15250 }, { "epoch": 0.45129236410954043, "grad_norm": 1.4505504369735718, "learning_rate": 1.7398453257863238e-06, "loss": 0.1241, "step": 15260 }, { "epoch": 0.45158809960371443, "grad_norm": 2.030367374420166, "learning_rate": 1.7409855350567782e-06, "loss": 0.1012, "step": 15270 }, { "epoch": 0.45188383509788843, "grad_norm": 1.8438292741775513, "learning_rate": 1.7421257443272326e-06, "loss": 0.1023, "step": 15280 }, { "epoch": 0.4521795705920625, "grad_norm": 1.5480605363845825, "learning_rate": 1.7432659535976871e-06, "loss": 0.0848, "step": 15290 }, { "epoch": 0.4524753060862365, "grad_norm": 1.184993863105774, "learning_rate": 1.7444061628681413e-06, "loss": 0.0738, "step": 15300 }, { "epoch": 0.4527710415804105, "grad_norm": 2.4232139587402344, "learning_rate": 1.7455463721385957e-06, "loss": 0.0923, "step": 15310 }, { "epoch": 0.4530667770745845, "grad_norm": 1.3227882385253906, "learning_rate": 1.74668658140905e-06, "loss": 0.0999, "step": 15320 }, { "epoch": 0.4533625125687585, "grad_norm": 1.6544902324676514, "learning_rate": 1.7478267906795044e-06, "loss": 0.0956, "step": 15330 }, { "epoch": 0.45365824806293253, "grad_norm": 1.3178915977478027, "learning_rate": 1.7489669999499588e-06, "loss": 0.0845, "step": 15340 }, { "epoch": 0.45395398355710653, "grad_norm": 2.8906188011169434, "learning_rate": 1.7501072092204132e-06, "loss": 0.0903, "step": 15350 }, { "epoch": 0.45424971905128053, "grad_norm": 1.3880689144134521, "learning_rate": 1.7512474184908676e-06, "loss": 0.1195, "step": 15360 }, { "epoch": 0.45454545454545453, "grad_norm": 1.1142820119857788, "learning_rate": 1.752387627761322e-06, "loss": 0.095, "step": 15370 }, { "epoch": 0.45484119003962853, "grad_norm": 0.8727043867111206, "learning_rate": 1.7535278370317763e-06, "loss": 0.0853, "step": 15380 }, { "epoch": 0.4551369255338026, "grad_norm": 2.0255415439605713, "learning_rate": 1.7546680463022307e-06, "loss": 0.0891, "step": 15390 }, { "epoch": 0.4554326610279766, "grad_norm": 2.1131319999694824, "learning_rate": 1.755808255572685e-06, "loss": 0.0775, "step": 15400 }, { "epoch": 0.4557283965221506, "grad_norm": 1.3688504695892334, "learning_rate": 1.7569484648431395e-06, "loss": 0.1371, "step": 15410 }, { "epoch": 0.4560241320163246, "grad_norm": 2.7048003673553467, "learning_rate": 1.7580886741135936e-06, "loss": 0.1238, "step": 15420 }, { "epoch": 0.45631986751049863, "grad_norm": 1.5259966850280762, "learning_rate": 1.7592288833840482e-06, "loss": 0.1098, "step": 15430 }, { "epoch": 0.45661560300467263, "grad_norm": 1.2583725452423096, "learning_rate": 1.7603690926545026e-06, "loss": 0.1115, "step": 15440 }, { "epoch": 0.45691133849884663, "grad_norm": 1.4883477687835693, "learning_rate": 1.761509301924957e-06, "loss": 0.0893, "step": 15450 }, { "epoch": 0.45720707399302063, "grad_norm": 1.2421622276306152, "learning_rate": 1.7626495111954111e-06, "loss": 0.1065, "step": 15460 }, { "epoch": 0.45750280948719463, "grad_norm": 0.8987524509429932, "learning_rate": 1.7637897204658657e-06, "loss": 0.0927, "step": 15470 }, { "epoch": 0.4577985449813687, "grad_norm": 1.7145990133285522, "learning_rate": 1.76492992973632e-06, "loss": 0.1081, "step": 15480 }, { "epoch": 0.4580942804755427, "grad_norm": 1.5299122333526611, "learning_rate": 1.7660701390067745e-06, "loss": 0.115, "step": 15490 }, { "epoch": 0.4583900159697167, "grad_norm": 2.0772323608398438, "learning_rate": 1.7672103482772286e-06, "loss": 0.0789, "step": 15500 }, { "epoch": 0.4586857514638907, "grad_norm": 1.389644742012024, "learning_rate": 1.768350557547683e-06, "loss": 0.1173, "step": 15510 }, { "epoch": 0.45898148695806473, "grad_norm": 0.9115376472473145, "learning_rate": 1.7694907668181376e-06, "loss": 0.1032, "step": 15520 }, { "epoch": 0.45927722245223873, "grad_norm": 1.3254233598709106, "learning_rate": 1.770630976088592e-06, "loss": 0.0907, "step": 15530 }, { "epoch": 0.45957295794641273, "grad_norm": 0.8122871518135071, "learning_rate": 1.7717711853590461e-06, "loss": 0.0955, "step": 15540 }, { "epoch": 0.45986869344058673, "grad_norm": 1.8542428016662598, "learning_rate": 1.7729113946295005e-06, "loss": 0.0904, "step": 15550 }, { "epoch": 0.4601644289347607, "grad_norm": 2.4324724674224854, "learning_rate": 1.774051603899955e-06, "loss": 0.1179, "step": 15560 }, { "epoch": 0.4604601644289348, "grad_norm": 1.2877880334854126, "learning_rate": 1.7751918131704095e-06, "loss": 0.1234, "step": 15570 }, { "epoch": 0.4607558999231088, "grad_norm": 2.424508571624756, "learning_rate": 1.7763320224408637e-06, "loss": 0.0879, "step": 15580 }, { "epoch": 0.4610516354172828, "grad_norm": 0.9768553376197815, "learning_rate": 1.777472231711318e-06, "loss": 0.0917, "step": 15590 }, { "epoch": 0.4613473709114568, "grad_norm": 1.5869271755218506, "learning_rate": 1.7786124409817724e-06, "loss": 0.0747, "step": 15600 }, { "epoch": 0.4616431064056308, "grad_norm": 1.2495722770690918, "learning_rate": 1.779752650252227e-06, "loss": 0.1278, "step": 15610 }, { "epoch": 0.46193884189980483, "grad_norm": 1.371124267578125, "learning_rate": 1.7808928595226812e-06, "loss": 0.1182, "step": 15620 }, { "epoch": 0.46223457739397883, "grad_norm": 0.9637176990509033, "learning_rate": 1.7820330687931355e-06, "loss": 0.0907, "step": 15630 }, { "epoch": 0.46253031288815283, "grad_norm": 1.4149583578109741, "learning_rate": 1.78317327806359e-06, "loss": 0.096, "step": 15640 }, { "epoch": 0.4628260483823268, "grad_norm": 2.17120099067688, "learning_rate": 1.7843134873340443e-06, "loss": 0.1005, "step": 15650 }, { "epoch": 0.4631217838765009, "grad_norm": 1.015961766242981, "learning_rate": 1.7854536966044987e-06, "loss": 0.1135, "step": 15660 }, { "epoch": 0.4634175193706749, "grad_norm": 1.4391266107559204, "learning_rate": 1.786593905874953e-06, "loss": 0.1032, "step": 15670 }, { "epoch": 0.4637132548648489, "grad_norm": 1.199676275253296, "learning_rate": 1.7877341151454074e-06, "loss": 0.0937, "step": 15680 }, { "epoch": 0.4640089903590229, "grad_norm": 0.7059270143508911, "learning_rate": 1.7888743244158618e-06, "loss": 0.088, "step": 15690 }, { "epoch": 0.4643047258531969, "grad_norm": 1.6199325323104858, "learning_rate": 1.7900145336863162e-06, "loss": 0.0756, "step": 15700 }, { "epoch": 0.46460046134737093, "grad_norm": 0.9793443083763123, "learning_rate": 1.7911547429567706e-06, "loss": 0.1179, "step": 15710 }, { "epoch": 0.46489619684154493, "grad_norm": 2.018594264984131, "learning_rate": 1.792294952227225e-06, "loss": 0.1004, "step": 15720 }, { "epoch": 0.4651919323357189, "grad_norm": 1.4385091066360474, "learning_rate": 1.7934351614976793e-06, "loss": 0.1081, "step": 15730 }, { "epoch": 0.4654876678298929, "grad_norm": 1.837762475013733, "learning_rate": 1.7945753707681335e-06, "loss": 0.096, "step": 15740 }, { "epoch": 0.465783403324067, "grad_norm": 1.3862535953521729, "learning_rate": 1.795715580038588e-06, "loss": 0.0883, "step": 15750 }, { "epoch": 0.466079138818241, "grad_norm": 1.7141687870025635, "learning_rate": 1.7968557893090424e-06, "loss": 0.1018, "step": 15760 }, { "epoch": 0.466374874312415, "grad_norm": 2.2587122917175293, "learning_rate": 1.7979959985794968e-06, "loss": 0.0808, "step": 15770 }, { "epoch": 0.466670609806589, "grad_norm": 1.109840750694275, "learning_rate": 1.799136207849951e-06, "loss": 0.1119, "step": 15780 }, { "epoch": 0.466966345300763, "grad_norm": 1.5132421255111694, "learning_rate": 1.8002764171204054e-06, "loss": 0.0721, "step": 15790 }, { "epoch": 0.46726208079493703, "grad_norm": 1.5263831615447998, "learning_rate": 1.80141662639086e-06, "loss": 0.0859, "step": 15800 }, { "epoch": 0.46755781628911103, "grad_norm": 1.5199061632156372, "learning_rate": 1.8025568356613143e-06, "loss": 0.1046, "step": 15810 }, { "epoch": 0.467853551783285, "grad_norm": 1.2218031883239746, "learning_rate": 1.8036970449317685e-06, "loss": 0.1036, "step": 15820 }, { "epoch": 0.468149287277459, "grad_norm": 1.3875409364700317, "learning_rate": 1.8048372542022229e-06, "loss": 0.0877, "step": 15830 }, { "epoch": 0.468445022771633, "grad_norm": 1.3436564207077026, "learning_rate": 1.8059774634726775e-06, "loss": 0.09, "step": 15840 }, { "epoch": 0.4687407582658071, "grad_norm": 0.9027476906776428, "learning_rate": 1.8071176727431318e-06, "loss": 0.0825, "step": 15850 }, { "epoch": 0.4690364937599811, "grad_norm": 1.380326747894287, "learning_rate": 1.808257882013586e-06, "loss": 0.1178, "step": 15860 }, { "epoch": 0.4693322292541551, "grad_norm": 2.2228972911834717, "learning_rate": 1.8093980912840404e-06, "loss": 0.1035, "step": 15870 }, { "epoch": 0.4696279647483291, "grad_norm": 1.3563953638076782, "learning_rate": 1.8105383005544948e-06, "loss": 0.107, "step": 15880 }, { "epoch": 0.46992370024250313, "grad_norm": 0.6095418930053711, "learning_rate": 1.8116785098249493e-06, "loss": 0.0881, "step": 15890 }, { "epoch": 0.4702194357366771, "grad_norm": 1.717577576637268, "learning_rate": 1.8128187190954035e-06, "loss": 0.0867, "step": 15900 }, { "epoch": 0.4705151712308511, "grad_norm": 2.0309629440307617, "learning_rate": 1.8139589283658579e-06, "loss": 0.1167, "step": 15910 }, { "epoch": 0.4708109067250251, "grad_norm": 1.0792832374572754, "learning_rate": 1.8150991376363123e-06, "loss": 0.0933, "step": 15920 }, { "epoch": 0.4711066422191991, "grad_norm": 2.543501138687134, "learning_rate": 1.8162393469067668e-06, "loss": 0.1002, "step": 15930 }, { "epoch": 0.4714023777133732, "grad_norm": 0.6068198084831238, "learning_rate": 1.817379556177221e-06, "loss": 0.0858, "step": 15940 }, { "epoch": 0.4716981132075472, "grad_norm": 2.177518367767334, "learning_rate": 1.8185197654476754e-06, "loss": 0.0772, "step": 15950 }, { "epoch": 0.4719938487017212, "grad_norm": 2.1485133171081543, "learning_rate": 1.8196599747181298e-06, "loss": 0.128, "step": 15960 }, { "epoch": 0.4722895841958952, "grad_norm": 1.2457042932510376, "learning_rate": 1.8208001839885841e-06, "loss": 0.1033, "step": 15970 }, { "epoch": 0.4725853196900692, "grad_norm": 1.845682144165039, "learning_rate": 1.8219403932590385e-06, "loss": 0.0961, "step": 15980 }, { "epoch": 0.4728810551842432, "grad_norm": 0.8448562622070312, "learning_rate": 1.823080602529493e-06, "loss": 0.0939, "step": 15990 }, { "epoch": 0.4731767906784172, "grad_norm": 1.8569284677505493, "learning_rate": 1.8242208117999473e-06, "loss": 0.0948, "step": 16000 }, { "epoch": 0.4734725261725912, "grad_norm": 1.0596803426742554, "learning_rate": 1.8253610210704017e-06, "loss": 0.1193, "step": 16010 }, { "epoch": 0.4737682616667652, "grad_norm": 2.252711534500122, "learning_rate": 1.8265012303408558e-06, "loss": 0.1079, "step": 16020 }, { "epoch": 0.4740639971609393, "grad_norm": 2.262838840484619, "learning_rate": 1.8276414396113104e-06, "loss": 0.0996, "step": 16030 }, { "epoch": 0.4743597326551133, "grad_norm": 1.616984486579895, "learning_rate": 1.8287816488817648e-06, "loss": 0.085, "step": 16040 }, { "epoch": 0.4746554681492873, "grad_norm": 1.8440842628479004, "learning_rate": 1.8299218581522192e-06, "loss": 0.0893, "step": 16050 }, { "epoch": 0.4749512036434613, "grad_norm": 1.6552071571350098, "learning_rate": 1.8310620674226733e-06, "loss": 0.111, "step": 16060 }, { "epoch": 0.47524693913763527, "grad_norm": 1.0425183773040771, "learning_rate": 1.832202276693128e-06, "loss": 0.1039, "step": 16070 }, { "epoch": 0.4755426746318093, "grad_norm": 2.220446825027466, "learning_rate": 1.8333424859635823e-06, "loss": 0.1009, "step": 16080 }, { "epoch": 0.4758384101259833, "grad_norm": 3.2955262660980225, "learning_rate": 1.8344826952340367e-06, "loss": 0.0878, "step": 16090 }, { "epoch": 0.4761341456201573, "grad_norm": 2.2563509941101074, "learning_rate": 1.8356229045044908e-06, "loss": 0.0838, "step": 16100 }, { "epoch": 0.4764298811143313, "grad_norm": 1.795244812965393, "learning_rate": 1.8367631137749452e-06, "loss": 0.1314, "step": 16110 }, { "epoch": 0.4767256166085054, "grad_norm": 1.4603867530822754, "learning_rate": 1.8379033230453998e-06, "loss": 0.0956, "step": 16120 }, { "epoch": 0.4770213521026794, "grad_norm": 1.2645301818847656, "learning_rate": 1.8390435323158542e-06, "loss": 0.0948, "step": 16130 }, { "epoch": 0.4773170875968534, "grad_norm": 1.7259292602539062, "learning_rate": 1.8401837415863083e-06, "loss": 0.078, "step": 16140 }, { "epoch": 0.47761282309102737, "grad_norm": 1.4477611780166626, "learning_rate": 1.8413239508567627e-06, "loss": 0.0958, "step": 16150 }, { "epoch": 0.47790855858520137, "grad_norm": 1.4858007431030273, "learning_rate": 1.8424641601272173e-06, "loss": 0.1073, "step": 16160 }, { "epoch": 0.4782042940793754, "grad_norm": 1.5936002731323242, "learning_rate": 1.8436043693976717e-06, "loss": 0.0974, "step": 16170 }, { "epoch": 0.4785000295735494, "grad_norm": 1.2371666431427002, "learning_rate": 1.8447445786681258e-06, "loss": 0.0985, "step": 16180 }, { "epoch": 0.4787957650677234, "grad_norm": 1.5068687200546265, "learning_rate": 1.8458847879385802e-06, "loss": 0.103, "step": 16190 }, { "epoch": 0.4790915005618974, "grad_norm": 2.2252604961395264, "learning_rate": 1.8470249972090346e-06, "loss": 0.1115, "step": 16200 }, { "epoch": 0.4793872360560715, "grad_norm": 1.3424420356750488, "learning_rate": 1.8481652064794892e-06, "loss": 0.1092, "step": 16210 }, { "epoch": 0.4796829715502455, "grad_norm": 1.7583534717559814, "learning_rate": 1.8493054157499434e-06, "loss": 0.1184, "step": 16220 }, { "epoch": 0.4799787070444195, "grad_norm": 1.3029054403305054, "learning_rate": 1.8504456250203977e-06, "loss": 0.0902, "step": 16230 }, { "epoch": 0.48027444253859347, "grad_norm": 1.2538560628890991, "learning_rate": 1.8515858342908521e-06, "loss": 0.0937, "step": 16240 }, { "epoch": 0.48057017803276747, "grad_norm": 1.2668179273605347, "learning_rate": 1.8527260435613065e-06, "loss": 0.0816, "step": 16250 }, { "epoch": 0.4808659135269415, "grad_norm": 2.1155245304107666, "learning_rate": 1.8538662528317609e-06, "loss": 0.1061, "step": 16260 }, { "epoch": 0.4811616490211155, "grad_norm": 1.217987060546875, "learning_rate": 1.8550064621022152e-06, "loss": 0.1074, "step": 16270 }, { "epoch": 0.4814573845152895, "grad_norm": 1.335490345954895, "learning_rate": 1.8561466713726696e-06, "loss": 0.1083, "step": 16280 }, { "epoch": 0.4817531200094635, "grad_norm": 1.2088149785995483, "learning_rate": 1.857286880643124e-06, "loss": 0.0849, "step": 16290 }, { "epoch": 0.4820488555036375, "grad_norm": 1.0812064409255981, "learning_rate": 1.8584270899135784e-06, "loss": 0.0937, "step": 16300 }, { "epoch": 0.4823445909978116, "grad_norm": 1.0119311809539795, "learning_rate": 1.8595672991840327e-06, "loss": 0.0944, "step": 16310 }, { "epoch": 0.48264032649198557, "grad_norm": 3.0426392555236816, "learning_rate": 1.8607075084544871e-06, "loss": 0.1218, "step": 16320 }, { "epoch": 0.48293606198615957, "grad_norm": 0.9116055965423584, "learning_rate": 1.8618477177249415e-06, "loss": 0.0958, "step": 16330 }, { "epoch": 0.48323179748033357, "grad_norm": 1.617056965827942, "learning_rate": 1.8629879269953957e-06, "loss": 0.0889, "step": 16340 }, { "epoch": 0.4835275329745076, "grad_norm": 3.7052831649780273, "learning_rate": 1.8641281362658503e-06, "loss": 0.0966, "step": 16350 }, { "epoch": 0.4838232684686816, "grad_norm": 1.7466497421264648, "learning_rate": 1.8652683455363046e-06, "loss": 0.097, "step": 16360 }, { "epoch": 0.4841190039628556, "grad_norm": 2.17049503326416, "learning_rate": 1.866408554806759e-06, "loss": 0.0978, "step": 16370 }, { "epoch": 0.4844147394570296, "grad_norm": 1.15659761428833, "learning_rate": 1.8675487640772132e-06, "loss": 0.0898, "step": 16380 }, { "epoch": 0.4847104749512036, "grad_norm": 1.8831685781478882, "learning_rate": 1.8686889733476678e-06, "loss": 0.0891, "step": 16390 }, { "epoch": 0.4850062104453777, "grad_norm": 2.5598626136779785, "learning_rate": 1.8698291826181221e-06, "loss": 0.087, "step": 16400 }, { "epoch": 0.48530194593955167, "grad_norm": 2.002108335494995, "learning_rate": 1.8709693918885765e-06, "loss": 0.1153, "step": 16410 }, { "epoch": 0.48559768143372567, "grad_norm": 1.0794765949249268, "learning_rate": 1.8721096011590309e-06, "loss": 0.1175, "step": 16420 }, { "epoch": 0.48589341692789967, "grad_norm": 1.4972211122512817, "learning_rate": 1.873249810429485e-06, "loss": 0.0993, "step": 16430 }, { "epoch": 0.4861891524220737, "grad_norm": 1.3044214248657227, "learning_rate": 1.8743900196999396e-06, "loss": 0.0785, "step": 16440 }, { "epoch": 0.4864848879162477, "grad_norm": 1.5211788415908813, "learning_rate": 1.875530228970394e-06, "loss": 0.0825, "step": 16450 }, { "epoch": 0.4867806234104217, "grad_norm": 1.197157859802246, "learning_rate": 1.8766704382408484e-06, "loss": 0.1048, "step": 16460 }, { "epoch": 0.4870763589045957, "grad_norm": 1.7032278776168823, "learning_rate": 1.8778106475113026e-06, "loss": 0.1115, "step": 16470 }, { "epoch": 0.4873720943987697, "grad_norm": 1.61810302734375, "learning_rate": 1.878950856781757e-06, "loss": 0.1204, "step": 16480 }, { "epoch": 0.48766782989294377, "grad_norm": 0.5257653594017029, "learning_rate": 1.8800910660522115e-06, "loss": 0.0794, "step": 16490 }, { "epoch": 0.48796356538711777, "grad_norm": 1.5937795639038086, "learning_rate": 1.881231275322666e-06, "loss": 0.0851, "step": 16500 }, { "epoch": 0.48825930088129177, "grad_norm": 1.1323301792144775, "learning_rate": 1.88237148459312e-06, "loss": 0.119, "step": 16510 }, { "epoch": 0.48855503637546577, "grad_norm": 1.4772049188613892, "learning_rate": 1.8835116938635745e-06, "loss": 0.119, "step": 16520 }, { "epoch": 0.4888507718696398, "grad_norm": 1.3112454414367676, "learning_rate": 1.884651903134029e-06, "loss": 0.0948, "step": 16530 }, { "epoch": 0.4891465073638138, "grad_norm": 1.7472079992294312, "learning_rate": 1.8857921124044834e-06, "loss": 0.0864, "step": 16540 }, { "epoch": 0.4894422428579878, "grad_norm": 1.7557804584503174, "learning_rate": 1.8869323216749376e-06, "loss": 0.0703, "step": 16550 }, { "epoch": 0.4897379783521618, "grad_norm": 1.8068791627883911, "learning_rate": 1.888072530945392e-06, "loss": 0.1159, "step": 16560 }, { "epoch": 0.4900337138463358, "grad_norm": 1.8445942401885986, "learning_rate": 1.8892127402158463e-06, "loss": 0.0977, "step": 16570 }, { "epoch": 0.49032944934050987, "grad_norm": 1.918713092803955, "learning_rate": 1.890352949486301e-06, "loss": 0.0965, "step": 16580 }, { "epoch": 0.49062518483468387, "grad_norm": 1.4479951858520508, "learning_rate": 1.891493158756755e-06, "loss": 0.0965, "step": 16590 }, { "epoch": 0.49092092032885787, "grad_norm": 1.3636616468429565, "learning_rate": 1.8926333680272095e-06, "loss": 0.0933, "step": 16600 }, { "epoch": 0.49121665582303187, "grad_norm": 2.7475576400756836, "learning_rate": 1.8937735772976638e-06, "loss": 0.1003, "step": 16610 }, { "epoch": 0.49151239131720587, "grad_norm": 1.1670784950256348, "learning_rate": 1.8949137865681184e-06, "loss": 0.0929, "step": 16620 }, { "epoch": 0.4918081268113799, "grad_norm": 1.2996208667755127, "learning_rate": 1.8960539958385726e-06, "loss": 0.1014, "step": 16630 }, { "epoch": 0.4921038623055539, "grad_norm": 1.6657073497772217, "learning_rate": 1.897194205109027e-06, "loss": 0.0841, "step": 16640 }, { "epoch": 0.4923995977997279, "grad_norm": 1.5630896091461182, "learning_rate": 1.8983344143794814e-06, "loss": 0.0672, "step": 16650 }, { "epoch": 0.4926953332939019, "grad_norm": 1.6647597551345825, "learning_rate": 1.8994746236499357e-06, "loss": 0.1011, "step": 16660 }, { "epoch": 0.49299106878807597, "grad_norm": 2.2349343299865723, "learning_rate": 1.90061483292039e-06, "loss": 0.0838, "step": 16670 }, { "epoch": 0.49328680428224997, "grad_norm": 2.039548873901367, "learning_rate": 1.9017550421908445e-06, "loss": 0.0847, "step": 16680 }, { "epoch": 0.49358253977642397, "grad_norm": 1.773619294166565, "learning_rate": 1.9028952514612989e-06, "loss": 0.0957, "step": 16690 }, { "epoch": 0.49387827527059797, "grad_norm": 1.2894055843353271, "learning_rate": 1.9040354607317532e-06, "loss": 0.073, "step": 16700 }, { "epoch": 0.49417401076477196, "grad_norm": 2.142158269882202, "learning_rate": 1.9051756700022074e-06, "loss": 0.1316, "step": 16710 }, { "epoch": 0.494469746258946, "grad_norm": 1.165408968925476, "learning_rate": 1.906315879272662e-06, "loss": 0.0959, "step": 16720 }, { "epoch": 0.49476548175312, "grad_norm": 1.4742584228515625, "learning_rate": 1.907456088543116e-06, "loss": 0.1099, "step": 16730 }, { "epoch": 0.495061217247294, "grad_norm": 1.382782220840454, "learning_rate": 1.9085962978135707e-06, "loss": 0.0981, "step": 16740 }, { "epoch": 0.495356952741468, "grad_norm": 1.8391563892364502, "learning_rate": 1.909736507084025e-06, "loss": 0.0656, "step": 16750 }, { "epoch": 0.49565268823564207, "grad_norm": 1.0721904039382935, "learning_rate": 1.9108767163544795e-06, "loss": 0.1309, "step": 16760 }, { "epoch": 0.49594842372981607, "grad_norm": 1.429514765739441, "learning_rate": 1.9120169256249337e-06, "loss": 0.0799, "step": 16770 }, { "epoch": 0.49624415922399007, "grad_norm": 1.4317594766616821, "learning_rate": 1.9131571348953883e-06, "loss": 0.1066, "step": 16780 }, { "epoch": 0.49653989471816407, "grad_norm": 1.4637778997421265, "learning_rate": 1.9142973441658424e-06, "loss": 0.0908, "step": 16790 }, { "epoch": 0.49683563021233806, "grad_norm": 1.6011683940887451, "learning_rate": 1.915437553436297e-06, "loss": 0.0799, "step": 16800 }, { "epoch": 0.4971313657065121, "grad_norm": 1.2750312089920044, "learning_rate": 1.916577762706751e-06, "loss": 0.1102, "step": 16810 }, { "epoch": 0.4974271012006861, "grad_norm": 1.5616227388381958, "learning_rate": 1.9177179719772058e-06, "loss": 0.0931, "step": 16820 }, { "epoch": 0.4977228366948601, "grad_norm": 1.0894662141799927, "learning_rate": 1.91885818124766e-06, "loss": 0.1078, "step": 16830 }, { "epoch": 0.4980185721890341, "grad_norm": 1.1919078826904297, "learning_rate": 1.9199983905181145e-06, "loss": 0.0863, "step": 16840 }, { "epoch": 0.4983143076832081, "grad_norm": 2.066477060317993, "learning_rate": 1.9211385997885687e-06, "loss": 0.0942, "step": 16850 }, { "epoch": 0.49861004317738217, "grad_norm": 1.661637783050537, "learning_rate": 1.9222788090590233e-06, "loss": 0.0934, "step": 16860 }, { "epoch": 0.49890577867155617, "grad_norm": 2.0801851749420166, "learning_rate": 1.9234190183294774e-06, "loss": 0.1095, "step": 16870 }, { "epoch": 0.49920151416573016, "grad_norm": 2.0130064487457275, "learning_rate": 1.924559227599932e-06, "loss": 0.0957, "step": 16880 }, { "epoch": 0.49949724965990416, "grad_norm": 1.9957391023635864, "learning_rate": 1.925699436870386e-06, "loss": 0.1128, "step": 16890 }, { "epoch": 0.4997929851540782, "grad_norm": 1.1081006526947021, "learning_rate": 1.9268396461408408e-06, "loss": 0.0693, "step": 16900 }, { "epoch": 0.5000887206482522, "grad_norm": 2.6310136318206787, "learning_rate": 1.927979855411295e-06, "loss": 0.1062, "step": 16910 }, { "epoch": 0.5003844561424262, "grad_norm": 1.868896722793579, "learning_rate": 1.9291200646817495e-06, "loss": 0.1053, "step": 16920 }, { "epoch": 0.5006801916366003, "grad_norm": 1.4395815134048462, "learning_rate": 1.9302602739522037e-06, "loss": 0.1157, "step": 16930 }, { "epoch": 0.5009759271307742, "grad_norm": 0.8926964402198792, "learning_rate": 1.9314004832226583e-06, "loss": 0.0866, "step": 16940 }, { "epoch": 0.5012716626249483, "grad_norm": 0.7791577577590942, "learning_rate": 1.9325406924931124e-06, "loss": 0.0835, "step": 16950 }, { "epoch": 0.5015673981191222, "grad_norm": 1.9963293075561523, "learning_rate": 1.933680901763567e-06, "loss": 0.1003, "step": 16960 }, { "epoch": 0.5018631336132963, "grad_norm": 1.4009112119674683, "learning_rate": 1.934821111034021e-06, "loss": 0.1007, "step": 16970 }, { "epoch": 0.5021588691074703, "grad_norm": 1.576061487197876, "learning_rate": 1.9359613203044754e-06, "loss": 0.1009, "step": 16980 }, { "epoch": 0.5024546046016443, "grad_norm": 2.059969902038574, "learning_rate": 1.93710152957493e-06, "loss": 0.0827, "step": 16990 }, { "epoch": 0.5027503400958183, "grad_norm": 1.6689858436584473, "learning_rate": 1.938241738845384e-06, "loss": 0.0963, "step": 17000 }, { "epoch": 0.5030460755899923, "grad_norm": 1.6383532285690308, "learning_rate": 1.9393819481158387e-06, "loss": 0.1071, "step": 17010 }, { "epoch": 0.5033418110841663, "grad_norm": 1.6756535768508911, "learning_rate": 1.9405221573862933e-06, "loss": 0.0933, "step": 17020 }, { "epoch": 0.5036375465783404, "grad_norm": 1.543223261833191, "learning_rate": 1.9416623666567475e-06, "loss": 0.0986, "step": 17030 }, { "epoch": 0.5039332820725143, "grad_norm": 1.1412113904953003, "learning_rate": 1.942802575927202e-06, "loss": 0.0905, "step": 17040 }, { "epoch": 0.5042290175666884, "grad_norm": 1.5712900161743164, "learning_rate": 1.943942785197656e-06, "loss": 0.0965, "step": 17050 }, { "epoch": 0.5045247530608624, "grad_norm": 1.039743423461914, "learning_rate": 1.9450829944681104e-06, "loss": 0.1069, "step": 17060 }, { "epoch": 0.5048204885550364, "grad_norm": 1.3610103130340576, "learning_rate": 1.946223203738565e-06, "loss": 0.1048, "step": 17070 }, { "epoch": 0.5051162240492104, "grad_norm": 1.9840859174728394, "learning_rate": 1.947363413009019e-06, "loss": 0.109, "step": 17080 }, { "epoch": 0.5054119595433844, "grad_norm": 1.1901952028274536, "learning_rate": 1.9485036222794737e-06, "loss": 0.1054, "step": 17090 }, { "epoch": 0.5057076950375584, "grad_norm": 0.9500211477279663, "learning_rate": 1.949643831549928e-06, "loss": 0.0974, "step": 17100 }, { "epoch": 0.5060034305317325, "grad_norm": 1.5983892679214478, "learning_rate": 1.9507840408203825e-06, "loss": 0.1391, "step": 17110 }, { "epoch": 0.5062991660259064, "grad_norm": 1.065449595451355, "learning_rate": 1.951924250090837e-06, "loss": 0.0785, "step": 17120 }, { "epoch": 0.5065949015200805, "grad_norm": 2.0503804683685303, "learning_rate": 1.953064459361291e-06, "loss": 0.0946, "step": 17130 }, { "epoch": 0.5068906370142544, "grad_norm": 1.192305088043213, "learning_rate": 1.9542046686317454e-06, "loss": 0.09, "step": 17140 }, { "epoch": 0.5071863725084285, "grad_norm": 1.0249571800231934, "learning_rate": 1.9553448779022e-06, "loss": 0.0884, "step": 17150 }, { "epoch": 0.5074821080026025, "grad_norm": 1.6912341117858887, "learning_rate": 1.956485087172654e-06, "loss": 0.0945, "step": 17160 }, { "epoch": 0.5077778434967765, "grad_norm": 1.356631875038147, "learning_rate": 1.9576252964431087e-06, "loss": 0.1217, "step": 17170 }, { "epoch": 0.5080735789909505, "grad_norm": 2.0948522090911865, "learning_rate": 1.958765505713563e-06, "loss": 0.1007, "step": 17180 }, { "epoch": 0.5083693144851245, "grad_norm": 1.612525224685669, "learning_rate": 1.9599057149840175e-06, "loss": 0.0984, "step": 17190 }, { "epoch": 0.5086650499792985, "grad_norm": 1.6700327396392822, "learning_rate": 1.961045924254472e-06, "loss": 0.0777, "step": 17200 }, { "epoch": 0.5089607854734726, "grad_norm": 2.2647652626037598, "learning_rate": 1.962186133524926e-06, "loss": 0.1015, "step": 17210 }, { "epoch": 0.5092565209676465, "grad_norm": 1.9750953912734985, "learning_rate": 1.9633263427953804e-06, "loss": 0.1171, "step": 17220 }, { "epoch": 0.5095522564618206, "grad_norm": 1.166587471961975, "learning_rate": 1.9644665520658346e-06, "loss": 0.0818, "step": 17230 }, { "epoch": 0.5098479919559945, "grad_norm": 0.9745687246322632, "learning_rate": 1.965606761336289e-06, "loss": 0.0886, "step": 17240 }, { "epoch": 0.5101437274501686, "grad_norm": 1.2820062637329102, "learning_rate": 1.9667469706067438e-06, "loss": 0.0855, "step": 17250 }, { "epoch": 0.5104394629443426, "grad_norm": 1.3832368850708008, "learning_rate": 1.967887179877198e-06, "loss": 0.1172, "step": 17260 }, { "epoch": 0.5107351984385166, "grad_norm": 1.6920769214630127, "learning_rate": 1.9690273891476525e-06, "loss": 0.1133, "step": 17270 }, { "epoch": 0.5110309339326906, "grad_norm": 1.857712745666504, "learning_rate": 1.9701675984181067e-06, "loss": 0.0868, "step": 17280 }, { "epoch": 0.5113266694268647, "grad_norm": 1.0718028545379639, "learning_rate": 1.971307807688561e-06, "loss": 0.0843, "step": 17290 }, { "epoch": 0.5116224049210386, "grad_norm": 1.988608717918396, "learning_rate": 1.9724480169590154e-06, "loss": 0.0859, "step": 17300 }, { "epoch": 0.5119181404152127, "grad_norm": 1.4682272672653198, "learning_rate": 1.9735882262294696e-06, "loss": 0.1095, "step": 17310 }, { "epoch": 0.5122138759093866, "grad_norm": 1.0989248752593994, "learning_rate": 1.974728435499924e-06, "loss": 0.0825, "step": 17320 }, { "epoch": 0.5125096114035607, "grad_norm": 1.0837293863296509, "learning_rate": 1.9758686447703783e-06, "loss": 0.0968, "step": 17330 }, { "epoch": 0.5128053468977347, "grad_norm": 1.3481334447860718, "learning_rate": 1.977008854040833e-06, "loss": 0.0945, "step": 17340 }, { "epoch": 0.5131010823919087, "grad_norm": 2.2732598781585693, "learning_rate": 1.9781490633112875e-06, "loss": 0.0842, "step": 17350 }, { "epoch": 0.5133968178860827, "grad_norm": 1.0661756992340088, "learning_rate": 1.9792892725817417e-06, "loss": 0.1098, "step": 17360 }, { "epoch": 0.5136925533802567, "grad_norm": 1.7121026515960693, "learning_rate": 1.980429481852196e-06, "loss": 0.1045, "step": 17370 }, { "epoch": 0.5139882888744307, "grad_norm": 1.8681837320327759, "learning_rate": 1.9815696911226504e-06, "loss": 0.1128, "step": 17380 }, { "epoch": 0.5142840243686048, "grad_norm": 2.0182723999023438, "learning_rate": 1.9827099003931046e-06, "loss": 0.1012, "step": 17390 }, { "epoch": 0.5145797598627787, "grad_norm": 2.344133138656616, "learning_rate": 1.983850109663559e-06, "loss": 0.0875, "step": 17400 }, { "epoch": 0.5148754953569528, "grad_norm": 1.6553906202316284, "learning_rate": 1.9849903189340134e-06, "loss": 0.105, "step": 17410 }, { "epoch": 0.5151712308511267, "grad_norm": 1.4783555269241333, "learning_rate": 1.986130528204468e-06, "loss": 0.0972, "step": 17420 }, { "epoch": 0.5154669663453008, "grad_norm": 1.289200782775879, "learning_rate": 1.9872707374749225e-06, "loss": 0.0968, "step": 17430 }, { "epoch": 0.5157627018394748, "grad_norm": 1.397477149963379, "learning_rate": 1.9884109467453767e-06, "loss": 0.087, "step": 17440 }, { "epoch": 0.5160584373336488, "grad_norm": 1.4826557636260986, "learning_rate": 1.989551156015831e-06, "loss": 0.0769, "step": 17450 }, { "epoch": 0.5163541728278228, "grad_norm": 1.7364211082458496, "learning_rate": 1.990691365286285e-06, "loss": 0.0964, "step": 17460 }, { "epoch": 0.5166499083219968, "grad_norm": 1.3973385095596313, "learning_rate": 1.9918315745567396e-06, "loss": 0.093, "step": 17470 }, { "epoch": 0.5169456438161708, "grad_norm": 1.939770221710205, "learning_rate": 1.9929717838271942e-06, "loss": 0.1082, "step": 17480 }, { "epoch": 0.5172413793103449, "grad_norm": 0.5001053810119629, "learning_rate": 1.9941119930976484e-06, "loss": 0.1092, "step": 17490 }, { "epoch": 0.5175371148045188, "grad_norm": 0.7515036463737488, "learning_rate": 1.995252202368103e-06, "loss": 0.0886, "step": 17500 }, { "epoch": 0.5178328502986929, "grad_norm": 1.4547313451766968, "learning_rate": 1.996392411638557e-06, "loss": 0.111, "step": 17510 }, { "epoch": 0.5181285857928669, "grad_norm": 1.4887827634811401, "learning_rate": 1.9975326209090117e-06, "loss": 0.0892, "step": 17520 }, { "epoch": 0.5184243212870409, "grad_norm": 1.8959040641784668, "learning_rate": 1.998672830179466e-06, "loss": 0.1167, "step": 17530 }, { "epoch": 0.5187200567812149, "grad_norm": 1.3283613920211792, "learning_rate": 1.99981303944992e-06, "loss": 0.0798, "step": 17540 }, { "epoch": 0.5190157922753889, "grad_norm": 1.5916645526885986, "learning_rate": 2.0009532487203746e-06, "loss": 0.0813, "step": 17550 }, { "epoch": 0.5193115277695629, "grad_norm": 2.2790987491607666, "learning_rate": 2.002093457990829e-06, "loss": 0.106, "step": 17560 }, { "epoch": 0.519607263263737, "grad_norm": 1.031839370727539, "learning_rate": 2.0032336672612834e-06, "loss": 0.1052, "step": 17570 }, { "epoch": 0.5199029987579109, "grad_norm": 2.041240692138672, "learning_rate": 2.004373876531738e-06, "loss": 0.0902, "step": 17580 }, { "epoch": 0.520198734252085, "grad_norm": 1.991448998451233, "learning_rate": 2.005514085802192e-06, "loss": 0.0879, "step": 17590 }, { "epoch": 0.5204944697462589, "grad_norm": 1.2981057167053223, "learning_rate": 2.0066542950726467e-06, "loss": 0.0962, "step": 17600 }, { "epoch": 0.520790205240433, "grad_norm": 2.209890127182007, "learning_rate": 2.007794504343101e-06, "loss": 0.1035, "step": 17610 }, { "epoch": 0.521085940734607, "grad_norm": 1.3578513860702515, "learning_rate": 2.008934713613555e-06, "loss": 0.0958, "step": 17620 }, { "epoch": 0.521381676228781, "grad_norm": 0.9718051552772522, "learning_rate": 2.0100749228840097e-06, "loss": 0.0941, "step": 17630 }, { "epoch": 0.521677411722955, "grad_norm": 0.962350070476532, "learning_rate": 2.011215132154464e-06, "loss": 0.0698, "step": 17640 }, { "epoch": 0.521973147217129, "grad_norm": 1.5494030714035034, "learning_rate": 2.0123553414249184e-06, "loss": 0.0725, "step": 17650 }, { "epoch": 0.522268882711303, "grad_norm": 2.523710250854492, "learning_rate": 2.013495550695373e-06, "loss": 0.1023, "step": 17660 }, { "epoch": 0.5225646182054771, "grad_norm": 1.86310875415802, "learning_rate": 2.014635759965827e-06, "loss": 0.114, "step": 17670 }, { "epoch": 0.522860353699651, "grad_norm": 0.7668216228485107, "learning_rate": 2.0157759692362818e-06, "loss": 0.107, "step": 17680 }, { "epoch": 0.5231560891938251, "grad_norm": 1.4458492994308472, "learning_rate": 2.0169161785067355e-06, "loss": 0.0814, "step": 17690 }, { "epoch": 0.523451824687999, "grad_norm": 1.5692284107208252, "learning_rate": 2.01805638777719e-06, "loss": 0.0791, "step": 17700 }, { "epoch": 0.523747560182173, "grad_norm": 1.7481708526611328, "learning_rate": 2.0191965970476447e-06, "loss": 0.103, "step": 17710 }, { "epoch": 0.5240432956763471, "grad_norm": 1.3946908712387085, "learning_rate": 2.020336806318099e-06, "loss": 0.1112, "step": 17720 }, { "epoch": 0.524339031170521, "grad_norm": 1.0877000093460083, "learning_rate": 2.0214770155885534e-06, "loss": 0.1125, "step": 17730 }, { "epoch": 0.5246347666646951, "grad_norm": 1.1624692678451538, "learning_rate": 2.0226172248590076e-06, "loss": 0.0843, "step": 17740 }, { "epoch": 0.5249305021588692, "grad_norm": 1.099245309829712, "learning_rate": 2.023757434129462e-06, "loss": 0.0888, "step": 17750 }, { "epoch": 0.5252262376530431, "grad_norm": 1.4924838542938232, "learning_rate": 2.0248976433999168e-06, "loss": 0.1083, "step": 17760 }, { "epoch": 0.5255219731472172, "grad_norm": 1.2857871055603027, "learning_rate": 2.026037852670371e-06, "loss": 0.0973, "step": 17770 }, { "epoch": 0.5258177086413911, "grad_norm": 1.6881922483444214, "learning_rate": 2.027178061940825e-06, "loss": 0.0946, "step": 17780 }, { "epoch": 0.5261134441355652, "grad_norm": 2.309903621673584, "learning_rate": 2.0283182712112793e-06, "loss": 0.1036, "step": 17790 }, { "epoch": 0.5264091796297392, "grad_norm": 1.587709665298462, "learning_rate": 2.029458480481734e-06, "loss": 0.0777, "step": 17800 }, { "epoch": 0.5267049151239132, "grad_norm": 1.5764254331588745, "learning_rate": 2.0305986897521884e-06, "loss": 0.0946, "step": 17810 }, { "epoch": 0.5270006506180872, "grad_norm": 1.914955735206604, "learning_rate": 2.0317388990226426e-06, "loss": 0.1104, "step": 17820 }, { "epoch": 0.5272963861122612, "grad_norm": 1.637967586517334, "learning_rate": 2.032879108293097e-06, "loss": 0.0872, "step": 17830 }, { "epoch": 0.5275921216064352, "grad_norm": 1.2975105047225952, "learning_rate": 2.0340193175635518e-06, "loss": 0.099, "step": 17840 }, { "epoch": 0.5278878571006093, "grad_norm": 1.6501518487930298, "learning_rate": 2.035159526834006e-06, "loss": 0.0949, "step": 17850 }, { "epoch": 0.5281835925947832, "grad_norm": 1.6511424779891968, "learning_rate": 2.03629973610446e-06, "loss": 0.0942, "step": 17860 }, { "epoch": 0.5284793280889573, "grad_norm": 2.4298107624053955, "learning_rate": 2.0374399453749143e-06, "loss": 0.1111, "step": 17870 }, { "epoch": 0.5287750635831312, "grad_norm": 1.6901087760925293, "learning_rate": 2.038580154645369e-06, "loss": 0.0849, "step": 17880 }, { "epoch": 0.5290707990773053, "grad_norm": 1.35586678981781, "learning_rate": 2.0397203639158235e-06, "loss": 0.0811, "step": 17890 }, { "epoch": 0.5293665345714793, "grad_norm": 1.2927281856536865, "learning_rate": 2.0408605731862776e-06, "loss": 0.0961, "step": 17900 }, { "epoch": 0.5296622700656533, "grad_norm": 1.6364798545837402, "learning_rate": 2.042000782456732e-06, "loss": 0.0866, "step": 17910 }, { "epoch": 0.5299580055598273, "grad_norm": 1.6201329231262207, "learning_rate": 2.0431409917271864e-06, "loss": 0.0969, "step": 17920 }, { "epoch": 0.5302537410540012, "grad_norm": 2.4013428688049316, "learning_rate": 2.044281200997641e-06, "loss": 0.1081, "step": 17930 }, { "epoch": 0.5305494765481753, "grad_norm": 1.3722796440124512, "learning_rate": 2.045421410268095e-06, "loss": 0.0865, "step": 17940 }, { "epoch": 0.5308452120423494, "grad_norm": 1.3896211385726929, "learning_rate": 2.0465616195385493e-06, "loss": 0.0803, "step": 17950 }, { "epoch": 0.5311409475365233, "grad_norm": 1.6400245428085327, "learning_rate": 2.047701828809004e-06, "loss": 0.1111, "step": 17960 }, { "epoch": 0.5314366830306974, "grad_norm": 1.336059331893921, "learning_rate": 2.048842038079458e-06, "loss": 0.0956, "step": 17970 }, { "epoch": 0.5317324185248714, "grad_norm": 1.142477035522461, "learning_rate": 2.0499822473499126e-06, "loss": 0.0941, "step": 17980 }, { "epoch": 0.5320281540190454, "grad_norm": 1.4210506677627563, "learning_rate": 2.0511224566203672e-06, "loss": 0.1092, "step": 17990 }, { "epoch": 0.5323238895132194, "grad_norm": 0.8936936855316162, "learning_rate": 2.0522626658908214e-06, "loss": 0.073, "step": 18000 }, { "epoch": 0.5326196250073933, "grad_norm": 1.6223070621490479, "learning_rate": 2.053402875161276e-06, "loss": 0.1129, "step": 18010 }, { "epoch": 0.5329153605015674, "grad_norm": 1.6491690874099731, "learning_rate": 2.0545430844317297e-06, "loss": 0.0887, "step": 18020 }, { "epoch": 0.5332110959957415, "grad_norm": 0.8225910663604736, "learning_rate": 2.0556832937021843e-06, "loss": 0.1108, "step": 18030 }, { "epoch": 0.5335068314899154, "grad_norm": 1.0849820375442505, "learning_rate": 2.056823502972639e-06, "loss": 0.0882, "step": 18040 }, { "epoch": 0.5338025669840895, "grad_norm": 1.5801661014556885, "learning_rate": 2.057963712243093e-06, "loss": 0.0991, "step": 18050 }, { "epoch": 0.5340983024782634, "grad_norm": 1.1709924936294556, "learning_rate": 2.0591039215135476e-06, "loss": 0.1017, "step": 18060 }, { "epoch": 0.5343940379724375, "grad_norm": 1.8726155757904053, "learning_rate": 2.0602441307840022e-06, "loss": 0.1173, "step": 18070 }, { "epoch": 0.5346897734666115, "grad_norm": 1.249151349067688, "learning_rate": 2.0613843400544564e-06, "loss": 0.1106, "step": 18080 }, { "epoch": 0.5349855089607854, "grad_norm": 1.3656312227249146, "learning_rate": 2.062524549324911e-06, "loss": 0.0803, "step": 18090 }, { "epoch": 0.5352812444549595, "grad_norm": 1.1026639938354492, "learning_rate": 2.0636647585953647e-06, "loss": 0.1035, "step": 18100 }, { "epoch": 0.5355769799491334, "grad_norm": 2.6249797344207764, "learning_rate": 2.0648049678658193e-06, "loss": 0.1211, "step": 18110 }, { "epoch": 0.5358727154433075, "grad_norm": 1.106850028038025, "learning_rate": 2.065945177136274e-06, "loss": 0.0855, "step": 18120 }, { "epoch": 0.5361684509374816, "grad_norm": 1.032565712928772, "learning_rate": 2.067085386406728e-06, "loss": 0.0997, "step": 18130 }, { "epoch": 0.5364641864316555, "grad_norm": 1.2601999044418335, "learning_rate": 2.0682255956771827e-06, "loss": 0.0886, "step": 18140 }, { "epoch": 0.5367599219258296, "grad_norm": 1.2339531183242798, "learning_rate": 2.069365804947637e-06, "loss": 0.0932, "step": 18150 }, { "epoch": 0.5370556574200035, "grad_norm": 1.7705157995224, "learning_rate": 2.0705060142180914e-06, "loss": 0.0912, "step": 18160 }, { "epoch": 0.5373513929141776, "grad_norm": 1.7634367942810059, "learning_rate": 2.071646223488546e-06, "loss": 0.0994, "step": 18170 }, { "epoch": 0.5376471284083516, "grad_norm": 0.9259979128837585, "learning_rate": 2.0727864327589997e-06, "loss": 0.0874, "step": 18180 }, { "epoch": 0.5379428639025255, "grad_norm": 0.7268226742744446, "learning_rate": 2.0739266420294543e-06, "loss": 0.0882, "step": 18190 }, { "epoch": 0.5382385993966996, "grad_norm": 1.294443964958191, "learning_rate": 2.0750668512999085e-06, "loss": 0.0885, "step": 18200 }, { "epoch": 0.5385343348908737, "grad_norm": 1.3144948482513428, "learning_rate": 2.076207060570363e-06, "loss": 0.1228, "step": 18210 }, { "epoch": 0.5388300703850476, "grad_norm": 1.4608262777328491, "learning_rate": 2.0773472698408177e-06, "loss": 0.1129, "step": 18220 }, { "epoch": 0.5391258058792217, "grad_norm": 1.12860906124115, "learning_rate": 2.078487479111272e-06, "loss": 0.1095, "step": 18230 }, { "epoch": 0.5394215413733956, "grad_norm": 1.402301549911499, "learning_rate": 2.0796276883817264e-06, "loss": 0.0885, "step": 18240 }, { "epoch": 0.5397172768675697, "grad_norm": 1.5695854425430298, "learning_rate": 2.0807678976521806e-06, "loss": 0.0804, "step": 18250 }, { "epoch": 0.5400130123617437, "grad_norm": 1.602681279182434, "learning_rate": 2.0819081069226348e-06, "loss": 0.1065, "step": 18260 }, { "epoch": 0.5403087478559176, "grad_norm": 1.2109428644180298, "learning_rate": 2.0830483161930894e-06, "loss": 0.1074, "step": 18270 }, { "epoch": 0.5406044833500917, "grad_norm": 1.618951678276062, "learning_rate": 2.0841885254635435e-06, "loss": 0.0911, "step": 18280 }, { "epoch": 0.5409002188442656, "grad_norm": 1.2837086915969849, "learning_rate": 2.085328734733998e-06, "loss": 0.0907, "step": 18290 }, { "epoch": 0.5411959543384397, "grad_norm": 3.303377389907837, "learning_rate": 2.0864689440044527e-06, "loss": 0.0783, "step": 18300 }, { "epoch": 0.5414916898326138, "grad_norm": 1.0677680969238281, "learning_rate": 2.087609153274907e-06, "loss": 0.1228, "step": 18310 }, { "epoch": 0.5417874253267877, "grad_norm": 1.417771577835083, "learning_rate": 2.0887493625453614e-06, "loss": 0.0951, "step": 18320 }, { "epoch": 0.5420831608209618, "grad_norm": 1.0864380598068237, "learning_rate": 2.0898895718158156e-06, "loss": 0.0994, "step": 18330 }, { "epoch": 0.5423788963151357, "grad_norm": 0.5698200464248657, "learning_rate": 2.0910297810862698e-06, "loss": 0.0857, "step": 18340 }, { "epoch": 0.5426746318093097, "grad_norm": 1.5780315399169922, "learning_rate": 2.0921699903567244e-06, "loss": 0.0883, "step": 18350 }, { "epoch": 0.5429703673034838, "grad_norm": 1.6457182168960571, "learning_rate": 2.0933101996271785e-06, "loss": 0.1068, "step": 18360 }, { "epoch": 0.5432661027976577, "grad_norm": 1.4542841911315918, "learning_rate": 2.094450408897633e-06, "loss": 0.0828, "step": 18370 }, { "epoch": 0.5435618382918318, "grad_norm": 1.700870394706726, "learning_rate": 2.0955906181680873e-06, "loss": 0.0892, "step": 18380 }, { "epoch": 0.5438575737860057, "grad_norm": 1.8086128234863281, "learning_rate": 2.096730827438542e-06, "loss": 0.1006, "step": 18390 }, { "epoch": 0.5441533092801798, "grad_norm": 1.9312633275985718, "learning_rate": 2.0978710367089965e-06, "loss": 0.0857, "step": 18400 }, { "epoch": 0.5444490447743539, "grad_norm": 1.4284257888793945, "learning_rate": 2.0990112459794506e-06, "loss": 0.1034, "step": 18410 }, { "epoch": 0.5447447802685278, "grad_norm": 1.4959027767181396, "learning_rate": 2.100151455249905e-06, "loss": 0.1132, "step": 18420 }, { "epoch": 0.5450405157627018, "grad_norm": 1.3020039796829224, "learning_rate": 2.101291664520359e-06, "loss": 0.0908, "step": 18430 }, { "epoch": 0.5453362512568759, "grad_norm": 0.8586970567703247, "learning_rate": 2.1024318737908135e-06, "loss": 0.1035, "step": 18440 }, { "epoch": 0.5456319867510498, "grad_norm": 1.5839859247207642, "learning_rate": 2.103572083061268e-06, "loss": 0.0761, "step": 18450 }, { "epoch": 0.5459277222452239, "grad_norm": 1.5775835514068604, "learning_rate": 2.1047122923317223e-06, "loss": 0.1143, "step": 18460 }, { "epoch": 0.5462234577393978, "grad_norm": 1.8450723886489868, "learning_rate": 2.105852501602177e-06, "loss": 0.1099, "step": 18470 }, { "epoch": 0.5465191932335719, "grad_norm": 1.0516448020935059, "learning_rate": 2.106992710872631e-06, "loss": 0.1053, "step": 18480 }, { "epoch": 0.546814928727746, "grad_norm": 1.0591957569122314, "learning_rate": 2.1081329201430856e-06, "loss": 0.0857, "step": 18490 }, { "epoch": 0.5471106642219199, "grad_norm": 1.8917497396469116, "learning_rate": 2.10927312941354e-06, "loss": 0.0753, "step": 18500 }, { "epoch": 0.547406399716094, "grad_norm": 0.9918244481086731, "learning_rate": 2.110413338683994e-06, "loss": 0.0959, "step": 18510 }, { "epoch": 0.5477021352102679, "grad_norm": 1.486480474472046, "learning_rate": 2.1115535479544486e-06, "loss": 0.1084, "step": 18520 }, { "epoch": 0.547997870704442, "grad_norm": 0.9539822936058044, "learning_rate": 2.112693757224903e-06, "loss": 0.1005, "step": 18530 }, { "epoch": 0.548293606198616, "grad_norm": 0.877953827381134, "learning_rate": 2.1138339664953573e-06, "loss": 0.0954, "step": 18540 }, { "epoch": 0.54858934169279, "grad_norm": 2.730994462966919, "learning_rate": 2.114974175765812e-06, "loss": 0.0818, "step": 18550 }, { "epoch": 0.548885077186964, "grad_norm": 1.1847995519638062, "learning_rate": 2.116114385036266e-06, "loss": 0.1052, "step": 18560 }, { "epoch": 0.5491808126811379, "grad_norm": 1.8529677391052246, "learning_rate": 2.1172545943067207e-06, "loss": 0.1112, "step": 18570 }, { "epoch": 0.549476548175312, "grad_norm": 1.5897551774978638, "learning_rate": 2.118394803577175e-06, "loss": 0.0941, "step": 18580 }, { "epoch": 0.549772283669486, "grad_norm": 2.095381498336792, "learning_rate": 2.119535012847629e-06, "loss": 0.1088, "step": 18590 }, { "epoch": 0.55006801916366, "grad_norm": 1.4291260242462158, "learning_rate": 2.1206752221180836e-06, "loss": 0.0821, "step": 18600 }, { "epoch": 0.550363754657834, "grad_norm": 1.1781895160675049, "learning_rate": 2.1218154313885377e-06, "loss": 0.1119, "step": 18610 }, { "epoch": 0.550659490152008, "grad_norm": 1.918311357498169, "learning_rate": 2.1229556406589923e-06, "loss": 0.108, "step": 18620 }, { "epoch": 0.550955225646182, "grad_norm": 1.2765759229660034, "learning_rate": 2.124095849929447e-06, "loss": 0.112, "step": 18630 }, { "epoch": 0.5512509611403561, "grad_norm": 1.192980170249939, "learning_rate": 2.125236059199901e-06, "loss": 0.0866, "step": 18640 }, { "epoch": 0.55154669663453, "grad_norm": 1.196394443511963, "learning_rate": 2.1263762684703557e-06, "loss": 0.0748, "step": 18650 }, { "epoch": 0.5518424321287041, "grad_norm": 1.4497296810150146, "learning_rate": 2.1275164777408094e-06, "loss": 0.1003, "step": 18660 }, { "epoch": 0.5521381676228782, "grad_norm": 0.9385738372802734, "learning_rate": 2.128656687011264e-06, "loss": 0.0967, "step": 18670 }, { "epoch": 0.5524339031170521, "grad_norm": 1.7759283781051636, "learning_rate": 2.1297968962817186e-06, "loss": 0.1122, "step": 18680 }, { "epoch": 0.5527296386112261, "grad_norm": 0.7269529104232788, "learning_rate": 2.1309371055521728e-06, "loss": 0.1078, "step": 18690 }, { "epoch": 0.5530253741054001, "grad_norm": 1.0500686168670654, "learning_rate": 2.1320773148226273e-06, "loss": 0.0696, "step": 18700 }, { "epoch": 0.5533211095995741, "grad_norm": 2.043729782104492, "learning_rate": 2.1332175240930815e-06, "loss": 0.0888, "step": 18710 }, { "epoch": 0.5536168450937482, "grad_norm": 0.8105563521385193, "learning_rate": 2.134357733363536e-06, "loss": 0.0997, "step": 18720 }, { "epoch": 0.5539125805879221, "grad_norm": 1.1366374492645264, "learning_rate": 2.1354979426339907e-06, "loss": 0.0881, "step": 18730 }, { "epoch": 0.5542083160820962, "grad_norm": 0.9093496203422546, "learning_rate": 2.1366381519044444e-06, "loss": 0.0994, "step": 18740 }, { "epoch": 0.5545040515762701, "grad_norm": 0.7904778122901917, "learning_rate": 2.137778361174899e-06, "loss": 0.0795, "step": 18750 }, { "epoch": 0.5547997870704442, "grad_norm": 1.721495270729065, "learning_rate": 2.1389185704453536e-06, "loss": 0.1076, "step": 18760 }, { "epoch": 0.5550955225646182, "grad_norm": 1.6756616830825806, "learning_rate": 2.1400587797158078e-06, "loss": 0.0909, "step": 18770 }, { "epoch": 0.5553912580587922, "grad_norm": 2.067561149597168, "learning_rate": 2.1411989889862624e-06, "loss": 0.0868, "step": 18780 }, { "epoch": 0.5556869935529662, "grad_norm": 1.2774875164031982, "learning_rate": 2.1423391982567165e-06, "loss": 0.1005, "step": 18790 }, { "epoch": 0.5559827290471402, "grad_norm": 1.4803926944732666, "learning_rate": 2.143479407527171e-06, "loss": 0.0939, "step": 18800 }, { "epoch": 0.5562784645413142, "grad_norm": 1.5984158515930176, "learning_rate": 2.1446196167976257e-06, "loss": 0.1096, "step": 18810 }, { "epoch": 0.5565742000354883, "grad_norm": 1.7235732078552246, "learning_rate": 2.1457598260680794e-06, "loss": 0.0811, "step": 18820 }, { "epoch": 0.5568699355296622, "grad_norm": 1.0627923011779785, "learning_rate": 2.146900035338534e-06, "loss": 0.0941, "step": 18830 }, { "epoch": 0.5571656710238363, "grad_norm": 0.8756879568099976, "learning_rate": 2.148040244608988e-06, "loss": 0.0974, "step": 18840 }, { "epoch": 0.5574614065180102, "grad_norm": 1.3148666620254517, "learning_rate": 2.149180453879443e-06, "loss": 0.0844, "step": 18850 }, { "epoch": 0.5577571420121843, "grad_norm": 2.0067567825317383, "learning_rate": 2.1503206631498974e-06, "loss": 0.1046, "step": 18860 }, { "epoch": 0.5580528775063583, "grad_norm": 2.019836664199829, "learning_rate": 2.1514608724203515e-06, "loss": 0.0986, "step": 18870 }, { "epoch": 0.5583486130005323, "grad_norm": 1.152525782585144, "learning_rate": 2.152601081690806e-06, "loss": 0.1148, "step": 18880 }, { "epoch": 0.5586443484947063, "grad_norm": 1.2559099197387695, "learning_rate": 2.1537412909612603e-06, "loss": 0.1035, "step": 18890 }, { "epoch": 0.5589400839888804, "grad_norm": 1.2273929119110107, "learning_rate": 2.1548815002317145e-06, "loss": 0.0643, "step": 18900 }, { "epoch": 0.5592358194830543, "grad_norm": 1.129739761352539, "learning_rate": 2.156021709502169e-06, "loss": 0.1084, "step": 18910 }, { "epoch": 0.5595315549772284, "grad_norm": 2.2923429012298584, "learning_rate": 2.1571619187726232e-06, "loss": 0.0921, "step": 18920 }, { "epoch": 0.5598272904714023, "grad_norm": 1.8997101783752441, "learning_rate": 2.158302128043078e-06, "loss": 0.1221, "step": 18930 }, { "epoch": 0.5601230259655764, "grad_norm": 0.7218771576881409, "learning_rate": 2.159442337313532e-06, "loss": 0.0939, "step": 18940 }, { "epoch": 0.5604187614597504, "grad_norm": 1.3985130786895752, "learning_rate": 2.1605825465839866e-06, "loss": 0.092, "step": 18950 }, { "epoch": 0.5607144969539244, "grad_norm": 1.4305757284164429, "learning_rate": 2.161722755854441e-06, "loss": 0.1139, "step": 18960 }, { "epoch": 0.5610102324480984, "grad_norm": 1.1150184869766235, "learning_rate": 2.1628629651248953e-06, "loss": 0.0911, "step": 18970 }, { "epoch": 0.5613059679422724, "grad_norm": 1.4444881677627563, "learning_rate": 2.16400317439535e-06, "loss": 0.1009, "step": 18980 }, { "epoch": 0.5616017034364464, "grad_norm": 1.741866111755371, "learning_rate": 2.165143383665804e-06, "loss": 0.0818, "step": 18990 }, { "epoch": 0.5618974389306205, "grad_norm": 2.058046340942383, "learning_rate": 2.1662835929362582e-06, "loss": 0.0828, "step": 19000 }, { "epoch": 0.5621931744247944, "grad_norm": 1.8391274213790894, "learning_rate": 2.167423802206713e-06, "loss": 0.1113, "step": 19010 }, { "epoch": 0.5624889099189685, "grad_norm": 1.264690637588501, "learning_rate": 2.168564011477167e-06, "loss": 0.093, "step": 19020 }, { "epoch": 0.5627846454131424, "grad_norm": 2.0078530311584473, "learning_rate": 2.1697042207476216e-06, "loss": 0.0985, "step": 19030 }, { "epoch": 0.5630803809073165, "grad_norm": 1.0981534719467163, "learning_rate": 2.170844430018076e-06, "loss": 0.1009, "step": 19040 }, { "epoch": 0.5633761164014905, "grad_norm": 0.8994066119194031, "learning_rate": 2.1719846392885303e-06, "loss": 0.0936, "step": 19050 }, { "epoch": 0.5636718518956645, "grad_norm": 0.9297277331352234, "learning_rate": 2.173124848558985e-06, "loss": 0.0939, "step": 19060 }, { "epoch": 0.5639675873898385, "grad_norm": 0.9995452761650085, "learning_rate": 2.1742650578294387e-06, "loss": 0.0848, "step": 19070 }, { "epoch": 0.5642633228840125, "grad_norm": 1.7393485307693481, "learning_rate": 2.1754052670998932e-06, "loss": 0.0965, "step": 19080 }, { "epoch": 0.5645590583781865, "grad_norm": 1.688352346420288, "learning_rate": 2.176545476370348e-06, "loss": 0.0869, "step": 19090 }, { "epoch": 0.5648547938723606, "grad_norm": 1.0131027698516846, "learning_rate": 2.177685685640802e-06, "loss": 0.0842, "step": 19100 }, { "epoch": 0.5651505293665345, "grad_norm": 2.2623307704925537, "learning_rate": 2.1788258949112566e-06, "loss": 0.1123, "step": 19110 }, { "epoch": 0.5654462648607086, "grad_norm": 1.4323618412017822, "learning_rate": 2.1799661041817108e-06, "loss": 0.0991, "step": 19120 }, { "epoch": 0.5657420003548826, "grad_norm": 0.874683678150177, "learning_rate": 2.1811063134521653e-06, "loss": 0.0853, "step": 19130 }, { "epoch": 0.5660377358490566, "grad_norm": 0.794236421585083, "learning_rate": 2.18224652272262e-06, "loss": 0.0885, "step": 19140 }, { "epoch": 0.5663334713432306, "grad_norm": 0.9964127540588379, "learning_rate": 2.1833867319930737e-06, "loss": 0.0949, "step": 19150 }, { "epoch": 0.5666292068374046, "grad_norm": 1.7170658111572266, "learning_rate": 2.1845269412635283e-06, "loss": 0.1039, "step": 19160 }, { "epoch": 0.5669249423315786, "grad_norm": 1.7687870264053345, "learning_rate": 2.1856671505339824e-06, "loss": 0.1103, "step": 19170 }, { "epoch": 0.5672206778257527, "grad_norm": 1.2027908563613892, "learning_rate": 2.186807359804437e-06, "loss": 0.0831, "step": 19180 }, { "epoch": 0.5675164133199266, "grad_norm": 1.2384350299835205, "learning_rate": 2.1879475690748916e-06, "loss": 0.0719, "step": 19190 }, { "epoch": 0.5678121488141007, "grad_norm": 1.3758068084716797, "learning_rate": 2.1890877783453458e-06, "loss": 0.0768, "step": 19200 }, { "epoch": 0.5681078843082746, "grad_norm": 1.8833595514297485, "learning_rate": 2.1902279876158004e-06, "loss": 0.1056, "step": 19210 }, { "epoch": 0.5684036198024487, "grad_norm": 1.421047568321228, "learning_rate": 2.191368196886255e-06, "loss": 0.0915, "step": 19220 }, { "epoch": 0.5686993552966227, "grad_norm": 1.3670659065246582, "learning_rate": 2.1925084061567087e-06, "loss": 0.0909, "step": 19230 }, { "epoch": 0.5689950907907967, "grad_norm": 2.4995269775390625, "learning_rate": 2.1936486154271633e-06, "loss": 0.1189, "step": 19240 }, { "epoch": 0.5692908262849707, "grad_norm": 0.9997063279151917, "learning_rate": 2.1947888246976174e-06, "loss": 0.0508, "step": 19250 }, { "epoch": 0.5695865617791447, "grad_norm": 1.4573912620544434, "learning_rate": 2.195929033968072e-06, "loss": 0.0905, "step": 19260 }, { "epoch": 0.5698822972733187, "grad_norm": 1.1815694570541382, "learning_rate": 2.1970692432385266e-06, "loss": 0.1126, "step": 19270 }, { "epoch": 0.5701780327674928, "grad_norm": 1.2779428958892822, "learning_rate": 2.1982094525089808e-06, "loss": 0.1068, "step": 19280 }, { "epoch": 0.5704737682616667, "grad_norm": 1.4446903467178345, "learning_rate": 2.1993496617794354e-06, "loss": 0.0913, "step": 19290 }, { "epoch": 0.5707695037558408, "grad_norm": 2.088752031326294, "learning_rate": 2.2004898710498895e-06, "loss": 0.0884, "step": 19300 }, { "epoch": 0.5710652392500147, "grad_norm": 1.1521201133728027, "learning_rate": 2.2016300803203437e-06, "loss": 0.1072, "step": 19310 }, { "epoch": 0.5713609747441888, "grad_norm": 1.240229606628418, "learning_rate": 2.2027702895907983e-06, "loss": 0.1028, "step": 19320 }, { "epoch": 0.5716567102383628, "grad_norm": 1.2739291191101074, "learning_rate": 2.2039104988612525e-06, "loss": 0.1014, "step": 19330 }, { "epoch": 0.5719524457325368, "grad_norm": 1.595457673072815, "learning_rate": 2.205050708131707e-06, "loss": 0.0749, "step": 19340 }, { "epoch": 0.5722481812267108, "grad_norm": 1.5823577642440796, "learning_rate": 2.2061909174021612e-06, "loss": 0.0775, "step": 19350 }, { "epoch": 0.5725439167208849, "grad_norm": 1.6803311109542847, "learning_rate": 2.207331126672616e-06, "loss": 0.0909, "step": 19360 }, { "epoch": 0.5728396522150588, "grad_norm": 1.485538363456726, "learning_rate": 2.2084713359430704e-06, "loss": 0.0939, "step": 19370 }, { "epoch": 0.5731353877092329, "grad_norm": 1.6017603874206543, "learning_rate": 2.2096115452135246e-06, "loss": 0.0925, "step": 19380 }, { "epoch": 0.5734311232034068, "grad_norm": 1.8642455339431763, "learning_rate": 2.2107517544839787e-06, "loss": 0.0781, "step": 19390 }, { "epoch": 0.5737268586975809, "grad_norm": 1.3768730163574219, "learning_rate": 2.211891963754433e-06, "loss": 0.0729, "step": 19400 }, { "epoch": 0.5740225941917549, "grad_norm": 2.8111324310302734, "learning_rate": 2.2130321730248875e-06, "loss": 0.1122, "step": 19410 }, { "epoch": 0.5743183296859289, "grad_norm": 1.5878125429153442, "learning_rate": 2.214172382295342e-06, "loss": 0.1034, "step": 19420 }, { "epoch": 0.5746140651801029, "grad_norm": 0.8796367645263672, "learning_rate": 2.2153125915657962e-06, "loss": 0.1102, "step": 19430 }, { "epoch": 0.5749098006742769, "grad_norm": 1.6697742938995361, "learning_rate": 2.216452800836251e-06, "loss": 0.094, "step": 19440 }, { "epoch": 0.5752055361684509, "grad_norm": 1.2811641693115234, "learning_rate": 2.2175930101067054e-06, "loss": 0.08, "step": 19450 }, { "epoch": 0.575501271662625, "grad_norm": 1.322791576385498, "learning_rate": 2.2187332193771596e-06, "loss": 0.1044, "step": 19460 }, { "epoch": 0.5757970071567989, "grad_norm": 1.3587316274642944, "learning_rate": 2.2198734286476137e-06, "loss": 0.099, "step": 19470 }, { "epoch": 0.576092742650973, "grad_norm": 1.3516618013381958, "learning_rate": 2.221013637918068e-06, "loss": 0.0929, "step": 19480 }, { "epoch": 0.5763884781451469, "grad_norm": 1.4501036405563354, "learning_rate": 2.2221538471885225e-06, "loss": 0.0834, "step": 19490 }, { "epoch": 0.576684213639321, "grad_norm": 1.1902296543121338, "learning_rate": 2.223294056458977e-06, "loss": 0.0715, "step": 19500 }, { "epoch": 0.576979949133495, "grad_norm": 1.180477261543274, "learning_rate": 2.2244342657294312e-06, "loss": 0.0901, "step": 19510 }, { "epoch": 0.577275684627669, "grad_norm": 1.0645878314971924, "learning_rate": 2.225574474999886e-06, "loss": 0.1127, "step": 19520 }, { "epoch": 0.577571420121843, "grad_norm": 1.5441665649414062, "learning_rate": 2.22671468427034e-06, "loss": 0.1033, "step": 19530 }, { "epoch": 0.577867155616017, "grad_norm": 1.1055121421813965, "learning_rate": 2.2278548935407946e-06, "loss": 0.0884, "step": 19540 }, { "epoch": 0.578162891110191, "grad_norm": 1.7423735857009888, "learning_rate": 2.2289951028112488e-06, "loss": 0.0666, "step": 19550 }, { "epoch": 0.5784586266043651, "grad_norm": 1.4266847372055054, "learning_rate": 2.230135312081703e-06, "loss": 0.1069, "step": 19560 }, { "epoch": 0.578754362098539, "grad_norm": 1.9948599338531494, "learning_rate": 2.2312755213521575e-06, "loss": 0.1088, "step": 19570 }, { "epoch": 0.5790500975927131, "grad_norm": 1.4947446584701538, "learning_rate": 2.2324157306226117e-06, "loss": 0.0903, "step": 19580 }, { "epoch": 0.5793458330868871, "grad_norm": 2.54064679145813, "learning_rate": 2.2335559398930663e-06, "loss": 0.0903, "step": 19590 }, { "epoch": 0.5796415685810611, "grad_norm": 1.2463548183441162, "learning_rate": 2.234696149163521e-06, "loss": 0.0655, "step": 19600 }, { "epoch": 0.5799373040752351, "grad_norm": 1.9576417207717896, "learning_rate": 2.235836358433975e-06, "loss": 0.1035, "step": 19610 }, { "epoch": 0.5802330395694091, "grad_norm": 2.188645362854004, "learning_rate": 2.2369765677044296e-06, "loss": 0.0981, "step": 19620 }, { "epoch": 0.5805287750635831, "grad_norm": 1.180881142616272, "learning_rate": 2.2381167769748833e-06, "loss": 0.1009, "step": 19630 }, { "epoch": 0.5808245105577572, "grad_norm": 0.7314088344573975, "learning_rate": 2.239256986245338e-06, "loss": 0.0782, "step": 19640 }, { "epoch": 0.5811202460519311, "grad_norm": 1.5765327215194702, "learning_rate": 2.2403971955157925e-06, "loss": 0.0719, "step": 19650 }, { "epoch": 0.5814159815461052, "grad_norm": 1.1625007390975952, "learning_rate": 2.2415374047862467e-06, "loss": 0.1146, "step": 19660 }, { "epoch": 0.5817117170402791, "grad_norm": 1.52592933177948, "learning_rate": 2.2426776140567013e-06, "loss": 0.0985, "step": 19670 }, { "epoch": 0.5820074525344532, "grad_norm": 1.3959335088729858, "learning_rate": 2.243817823327156e-06, "loss": 0.1098, "step": 19680 }, { "epoch": 0.5823031880286272, "grad_norm": 0.8424413800239563, "learning_rate": 2.24495803259761e-06, "loss": 0.1062, "step": 19690 }, { "epoch": 0.5825989235228012, "grad_norm": 1.522362232208252, "learning_rate": 2.2460982418680646e-06, "loss": 0.0859, "step": 19700 }, { "epoch": 0.5828946590169752, "grad_norm": 1.6905187368392944, "learning_rate": 2.2472384511385184e-06, "loss": 0.1026, "step": 19710 }, { "epoch": 0.5831903945111492, "grad_norm": 1.3497294187545776, "learning_rate": 2.248378660408973e-06, "loss": 0.1033, "step": 19720 }, { "epoch": 0.5834861300053232, "grad_norm": 2.2225353717803955, "learning_rate": 2.2495188696794275e-06, "loss": 0.0972, "step": 19730 }, { "epoch": 0.5837818654994973, "grad_norm": 1.8206552267074585, "learning_rate": 2.2506590789498817e-06, "loss": 0.088, "step": 19740 }, { "epoch": 0.5840776009936712, "grad_norm": 1.8345036506652832, "learning_rate": 2.2517992882203363e-06, "loss": 0.1013, "step": 19750 }, { "epoch": 0.5843733364878453, "grad_norm": 0.9228172898292542, "learning_rate": 2.2529394974907905e-06, "loss": 0.1048, "step": 19760 }, { "epoch": 0.5846690719820192, "grad_norm": 0.9424046277999878, "learning_rate": 2.254079706761245e-06, "loss": 0.0981, "step": 19770 }, { "epoch": 0.5849648074761933, "grad_norm": 1.257042407989502, "learning_rate": 2.2552199160316996e-06, "loss": 0.0873, "step": 19780 }, { "epoch": 0.5852605429703673, "grad_norm": 1.612021803855896, "learning_rate": 2.2563601253021534e-06, "loss": 0.0956, "step": 19790 }, { "epoch": 0.5855562784645413, "grad_norm": 1.0890387296676636, "learning_rate": 2.257500334572608e-06, "loss": 0.0853, "step": 19800 }, { "epoch": 0.5858520139587153, "grad_norm": 1.1195919513702393, "learning_rate": 2.258640543843062e-06, "loss": 0.1001, "step": 19810 }, { "epoch": 0.5861477494528894, "grad_norm": 1.3724563121795654, "learning_rate": 2.2597807531135167e-06, "loss": 0.0942, "step": 19820 }, { "epoch": 0.5864434849470633, "grad_norm": 2.033764600753784, "learning_rate": 2.2609209623839713e-06, "loss": 0.1116, "step": 19830 }, { "epoch": 0.5867392204412374, "grad_norm": 1.8368415832519531, "learning_rate": 2.2620611716544255e-06, "loss": 0.1017, "step": 19840 }, { "epoch": 0.5870349559354113, "grad_norm": 1.6191133260726929, "learning_rate": 2.26320138092488e-06, "loss": 0.0754, "step": 19850 }, { "epoch": 0.5873306914295854, "grad_norm": 1.4324214458465576, "learning_rate": 2.2643415901953346e-06, "loss": 0.1097, "step": 19860 }, { "epoch": 0.5876264269237594, "grad_norm": 1.5505285263061523, "learning_rate": 2.2654817994657884e-06, "loss": 0.1088, "step": 19870 }, { "epoch": 0.5879221624179334, "grad_norm": 1.4606670141220093, "learning_rate": 2.266622008736243e-06, "loss": 0.0953, "step": 19880 }, { "epoch": 0.5882178979121074, "grad_norm": 1.6080447435379028, "learning_rate": 2.267762218006697e-06, "loss": 0.076, "step": 19890 }, { "epoch": 0.5885136334062814, "grad_norm": 1.283351182937622, "learning_rate": 2.2689024272771517e-06, "loss": 0.0793, "step": 19900 }, { "epoch": 0.5888093689004554, "grad_norm": 1.9093475341796875, "learning_rate": 2.2700426365476063e-06, "loss": 0.1123, "step": 19910 }, { "epoch": 0.5891051043946295, "grad_norm": 1.6311122179031372, "learning_rate": 2.2711828458180605e-06, "loss": 0.1008, "step": 19920 }, { "epoch": 0.5894008398888034, "grad_norm": 2.1830661296844482, "learning_rate": 2.272323055088515e-06, "loss": 0.1042, "step": 19930 }, { "epoch": 0.5896965753829775, "grad_norm": 1.1610571146011353, "learning_rate": 2.2734632643589692e-06, "loss": 0.0893, "step": 19940 }, { "epoch": 0.5899923108771514, "grad_norm": 1.4727662801742554, "learning_rate": 2.2746034736294234e-06, "loss": 0.0918, "step": 19950 }, { "epoch": 0.5902880463713255, "grad_norm": 1.3620164394378662, "learning_rate": 2.275743682899878e-06, "loss": 0.1092, "step": 19960 }, { "epoch": 0.5905837818654995, "grad_norm": 1.538587212562561, "learning_rate": 2.276883892170332e-06, "loss": 0.0961, "step": 19970 }, { "epoch": 0.5908795173596735, "grad_norm": 1.5889581441879272, "learning_rate": 2.2780241014407867e-06, "loss": 0.0796, "step": 19980 }, { "epoch": 0.5911752528538475, "grad_norm": 1.1361017227172852, "learning_rate": 2.279164310711241e-06, "loss": 0.0842, "step": 19990 }, { "epoch": 0.5914709883480215, "grad_norm": 2.323786497116089, "learning_rate": 2.2803045199816955e-06, "loss": 0.0924, "step": 20000 }, { "epoch": 0.5917667238421955, "grad_norm": 1.197782039642334, "learning_rate": 2.28144472925215e-06, "loss": 0.109, "step": 20010 }, { "epoch": 0.5920624593363696, "grad_norm": 1.864124059677124, "learning_rate": 2.2825849385226043e-06, "loss": 0.1067, "step": 20020 }, { "epoch": 0.5923581948305435, "grad_norm": 0.7996705174446106, "learning_rate": 2.2837251477930584e-06, "loss": 0.09, "step": 20030 }, { "epoch": 0.5926539303247176, "grad_norm": 1.268884539604187, "learning_rate": 2.2848653570635126e-06, "loss": 0.0835, "step": 20040 }, { "epoch": 0.5929496658188916, "grad_norm": 0.9492567181587219, "learning_rate": 2.286005566333967e-06, "loss": 0.0621, "step": 20050 }, { "epoch": 0.5932454013130656, "grad_norm": 1.4644436836242676, "learning_rate": 2.2871457756044218e-06, "loss": 0.1011, "step": 20060 }, { "epoch": 0.5935411368072396, "grad_norm": 1.237685203552246, "learning_rate": 2.288285984874876e-06, "loss": 0.0913, "step": 20070 }, { "epoch": 0.5938368723014136, "grad_norm": 0.7629734873771667, "learning_rate": 2.2894261941453305e-06, "loss": 0.0995, "step": 20080 }, { "epoch": 0.5941326077955876, "grad_norm": 1.323583722114563, "learning_rate": 2.290566403415785e-06, "loss": 0.0846, "step": 20090 }, { "epoch": 0.5944283432897617, "grad_norm": 1.8202491998672485, "learning_rate": 2.2917066126862393e-06, "loss": 0.0784, "step": 20100 }, { "epoch": 0.5947240787839356, "grad_norm": 1.2104151248931885, "learning_rate": 2.2928468219566934e-06, "loss": 0.1064, "step": 20110 }, { "epoch": 0.5950198142781097, "grad_norm": 1.465638279914856, "learning_rate": 2.2939870312271476e-06, "loss": 0.1119, "step": 20120 }, { "epoch": 0.5953155497722836, "grad_norm": 1.9549446105957031, "learning_rate": 2.295127240497602e-06, "loss": 0.0752, "step": 20130 }, { "epoch": 0.5956112852664577, "grad_norm": 1.1906028985977173, "learning_rate": 2.2962674497680568e-06, "loss": 0.0809, "step": 20140 }, { "epoch": 0.5959070207606317, "grad_norm": 1.1516668796539307, "learning_rate": 2.297407659038511e-06, "loss": 0.0743, "step": 20150 }, { "epoch": 0.5962027562548057, "grad_norm": 1.0516180992126465, "learning_rate": 2.2985478683089655e-06, "loss": 0.0982, "step": 20160 }, { "epoch": 0.5964984917489797, "grad_norm": 1.3924200534820557, "learning_rate": 2.2996880775794197e-06, "loss": 0.0981, "step": 20170 }, { "epoch": 0.5967942272431537, "grad_norm": 1.2587363719940186, "learning_rate": 2.3008282868498743e-06, "loss": 0.0928, "step": 20180 }, { "epoch": 0.5970899627373277, "grad_norm": 1.2845369577407837, "learning_rate": 2.3019684961203285e-06, "loss": 0.0782, "step": 20190 }, { "epoch": 0.5973856982315018, "grad_norm": 1.3370106220245361, "learning_rate": 2.3031087053907826e-06, "loss": 0.079, "step": 20200 }, { "epoch": 0.5976814337256757, "grad_norm": 1.1553319692611694, "learning_rate": 2.304248914661237e-06, "loss": 0.1186, "step": 20210 }, { "epoch": 0.5979771692198498, "grad_norm": 1.034548044204712, "learning_rate": 2.3053891239316914e-06, "loss": 0.0931, "step": 20220 }, { "epoch": 0.5982729047140237, "grad_norm": 1.5628103017807007, "learning_rate": 2.306529333202146e-06, "loss": 0.0889, "step": 20230 }, { "epoch": 0.5985686402081978, "grad_norm": 1.368308663368225, "learning_rate": 2.3076695424726005e-06, "loss": 0.0768, "step": 20240 }, { "epoch": 0.5988643757023718, "grad_norm": 3.2525577545166016, "learning_rate": 2.3088097517430547e-06, "loss": 0.1032, "step": 20250 }, { "epoch": 0.5991601111965458, "grad_norm": 1.3584270477294922, "learning_rate": 2.3099499610135093e-06, "loss": 0.1177, "step": 20260 }, { "epoch": 0.5994558466907198, "grad_norm": 1.2865482568740845, "learning_rate": 2.311090170283963e-06, "loss": 0.101, "step": 20270 }, { "epoch": 0.5997515821848939, "grad_norm": 0.9666823148727417, "learning_rate": 2.3122303795544176e-06, "loss": 0.0925, "step": 20280 }, { "epoch": 0.6000473176790678, "grad_norm": 0.9467524290084839, "learning_rate": 2.3133705888248722e-06, "loss": 0.0795, "step": 20290 }, { "epoch": 0.6003430531732419, "grad_norm": 1.7062208652496338, "learning_rate": 2.3145107980953264e-06, "loss": 0.0887, "step": 20300 }, { "epoch": 0.6006387886674158, "grad_norm": 1.135282278060913, "learning_rate": 2.315651007365781e-06, "loss": 0.1101, "step": 20310 }, { "epoch": 0.6009345241615899, "grad_norm": 0.9881283044815063, "learning_rate": 2.3167912166362356e-06, "loss": 0.0941, "step": 20320 }, { "epoch": 0.6012302596557639, "grad_norm": 1.9528708457946777, "learning_rate": 2.3179314259066897e-06, "loss": 0.0973, "step": 20330 }, { "epoch": 0.6015259951499379, "grad_norm": 1.3096508979797363, "learning_rate": 2.3190716351771443e-06, "loss": 0.0899, "step": 20340 }, { "epoch": 0.6018217306441119, "grad_norm": 1.1100993156433105, "learning_rate": 2.3202118444475985e-06, "loss": 0.0788, "step": 20350 }, { "epoch": 0.6021174661382859, "grad_norm": 1.8942760229110718, "learning_rate": 2.3213520537180526e-06, "loss": 0.112, "step": 20360 }, { "epoch": 0.6024132016324599, "grad_norm": 2.4432833194732666, "learning_rate": 2.3224922629885072e-06, "loss": 0.1287, "step": 20370 }, { "epoch": 0.602708937126634, "grad_norm": 1.1609652042388916, "learning_rate": 2.3236324722589614e-06, "loss": 0.0897, "step": 20380 }, { "epoch": 0.6030046726208079, "grad_norm": 0.628275454044342, "learning_rate": 2.324772681529416e-06, "loss": 0.0906, "step": 20390 }, { "epoch": 0.603300408114982, "grad_norm": 1.3120006322860718, "learning_rate": 2.32591289079987e-06, "loss": 0.0964, "step": 20400 }, { "epoch": 0.6035961436091559, "grad_norm": 1.4828115701675415, "learning_rate": 2.3270531000703247e-06, "loss": 0.122, "step": 20410 }, { "epoch": 0.60389187910333, "grad_norm": 0.7014898061752319, "learning_rate": 2.3281933093407793e-06, "loss": 0.0836, "step": 20420 }, { "epoch": 0.604187614597504, "grad_norm": 1.0527313947677612, "learning_rate": 2.3293335186112335e-06, "loss": 0.0943, "step": 20430 }, { "epoch": 0.604483350091678, "grad_norm": 1.3284902572631836, "learning_rate": 2.3304737278816877e-06, "loss": 0.0926, "step": 20440 }, { "epoch": 0.604779085585852, "grad_norm": 1.37040376663208, "learning_rate": 2.331613937152142e-06, "loss": 0.0852, "step": 20450 }, { "epoch": 0.605074821080026, "grad_norm": 1.864874243736267, "learning_rate": 2.3327541464225964e-06, "loss": 0.1348, "step": 20460 }, { "epoch": 0.6053705565742, "grad_norm": 1.768838882446289, "learning_rate": 2.333894355693051e-06, "loss": 0.109, "step": 20470 }, { "epoch": 0.6056662920683741, "grad_norm": 1.0715794563293457, "learning_rate": 2.335034564963505e-06, "loss": 0.1041, "step": 20480 }, { "epoch": 0.605962027562548, "grad_norm": 1.7145907878875732, "learning_rate": 2.3361747742339598e-06, "loss": 0.0992, "step": 20490 }, { "epoch": 0.6062577630567221, "grad_norm": 1.154641032218933, "learning_rate": 2.337314983504414e-06, "loss": 0.088, "step": 20500 }, { "epoch": 0.6065534985508961, "grad_norm": 1.5328869819641113, "learning_rate": 2.3384551927748685e-06, "loss": 0.0927, "step": 20510 }, { "epoch": 0.6068492340450701, "grad_norm": 1.267462968826294, "learning_rate": 2.3395954020453227e-06, "loss": 0.0889, "step": 20520 }, { "epoch": 0.6071449695392441, "grad_norm": 1.977137565612793, "learning_rate": 2.340735611315777e-06, "loss": 0.0881, "step": 20530 }, { "epoch": 0.6074407050334181, "grad_norm": 1.0680129528045654, "learning_rate": 2.3418758205862314e-06, "loss": 0.0862, "step": 20540 }, { "epoch": 0.6077364405275921, "grad_norm": 1.0591624975204468, "learning_rate": 2.343016029856686e-06, "loss": 0.0851, "step": 20550 }, { "epoch": 0.6080321760217662, "grad_norm": 1.5571324825286865, "learning_rate": 2.34415623912714e-06, "loss": 0.1051, "step": 20560 }, { "epoch": 0.6083279115159401, "grad_norm": 0.8007418513298035, "learning_rate": 2.3452964483975948e-06, "loss": 0.1122, "step": 20570 }, { "epoch": 0.6086236470101142, "grad_norm": 1.5987831354141235, "learning_rate": 2.346436657668049e-06, "loss": 0.0879, "step": 20580 }, { "epoch": 0.6089193825042881, "grad_norm": 0.887593150138855, "learning_rate": 2.3475768669385035e-06, "loss": 0.1148, "step": 20590 }, { "epoch": 0.6092151179984622, "grad_norm": 0.8747503757476807, "learning_rate": 2.3487170762089577e-06, "loss": 0.0881, "step": 20600 }, { "epoch": 0.6095108534926362, "grad_norm": 1.2024519443511963, "learning_rate": 2.349857285479412e-06, "loss": 0.1087, "step": 20610 }, { "epoch": 0.6098065889868102, "grad_norm": 1.466437578201294, "learning_rate": 2.3509974947498664e-06, "loss": 0.1049, "step": 20620 }, { "epoch": 0.6101023244809842, "grad_norm": 2.0733797550201416, "learning_rate": 2.3521377040203206e-06, "loss": 0.1102, "step": 20630 }, { "epoch": 0.6103980599751582, "grad_norm": 1.0833581686019897, "learning_rate": 2.353277913290775e-06, "loss": 0.0832, "step": 20640 }, { "epoch": 0.6106937954693322, "grad_norm": 1.1334306001663208, "learning_rate": 2.35441812256123e-06, "loss": 0.0729, "step": 20650 }, { "epoch": 0.6109895309635063, "grad_norm": 2.5121817588806152, "learning_rate": 2.355558331831684e-06, "loss": 0.1081, "step": 20660 }, { "epoch": 0.6112852664576802, "grad_norm": 1.4832355976104736, "learning_rate": 2.3566985411021385e-06, "loss": 0.0994, "step": 20670 }, { "epoch": 0.6115810019518543, "grad_norm": 1.0255330801010132, "learning_rate": 2.3578387503725923e-06, "loss": 0.0857, "step": 20680 }, { "epoch": 0.6118767374460282, "grad_norm": 2.150773763656616, "learning_rate": 2.358978959643047e-06, "loss": 0.1019, "step": 20690 }, { "epoch": 0.6121724729402023, "grad_norm": 1.4626824855804443, "learning_rate": 2.3601191689135015e-06, "loss": 0.091, "step": 20700 }, { "epoch": 0.6124682084343763, "grad_norm": 1.319991946220398, "learning_rate": 2.3612593781839556e-06, "loss": 0.097, "step": 20710 }, { "epoch": 0.6127639439285503, "grad_norm": 1.0776393413543701, "learning_rate": 2.3623995874544102e-06, "loss": 0.1022, "step": 20720 }, { "epoch": 0.6130596794227243, "grad_norm": 1.134462833404541, "learning_rate": 2.3635397967248644e-06, "loss": 0.0935, "step": 20730 }, { "epoch": 0.6133554149168984, "grad_norm": 0.7326345443725586, "learning_rate": 2.364680005995319e-06, "loss": 0.0746, "step": 20740 }, { "epoch": 0.6136511504110723, "grad_norm": 1.1821668148040771, "learning_rate": 2.3658202152657736e-06, "loss": 0.0863, "step": 20750 }, { "epoch": 0.6139468859052464, "grad_norm": 1.7108557224273682, "learning_rate": 2.3669604245362273e-06, "loss": 0.1007, "step": 20760 }, { "epoch": 0.6142426213994203, "grad_norm": 1.2482423782348633, "learning_rate": 2.368100633806682e-06, "loss": 0.0897, "step": 20770 }, { "epoch": 0.6145383568935944, "grad_norm": 2.018427610397339, "learning_rate": 2.3692408430771365e-06, "loss": 0.0857, "step": 20780 }, { "epoch": 0.6148340923877684, "grad_norm": 1.017331600189209, "learning_rate": 2.3703810523475906e-06, "loss": 0.0995, "step": 20790 }, { "epoch": 0.6151298278819424, "grad_norm": 1.4527872800827026, "learning_rate": 2.3715212616180452e-06, "loss": 0.0829, "step": 20800 }, { "epoch": 0.6154255633761164, "grad_norm": 1.5715242624282837, "learning_rate": 2.3726614708884994e-06, "loss": 0.114, "step": 20810 }, { "epoch": 0.6157212988702904, "grad_norm": 1.7132116556167603, "learning_rate": 2.373801680158954e-06, "loss": 0.0912, "step": 20820 }, { "epoch": 0.6160170343644644, "grad_norm": 1.2982984781265259, "learning_rate": 2.3749418894294086e-06, "loss": 0.0917, "step": 20830 }, { "epoch": 0.6163127698586385, "grad_norm": 0.9467558264732361, "learning_rate": 2.3760820986998623e-06, "loss": 0.0932, "step": 20840 }, { "epoch": 0.6166085053528124, "grad_norm": 1.0489991903305054, "learning_rate": 2.377222307970317e-06, "loss": 0.0967, "step": 20850 }, { "epoch": 0.6169042408469865, "grad_norm": 1.529193639755249, "learning_rate": 2.378362517240771e-06, "loss": 0.0978, "step": 20860 }, { "epoch": 0.6171999763411604, "grad_norm": 1.9268536567687988, "learning_rate": 2.3795027265112257e-06, "loss": 0.0878, "step": 20870 }, { "epoch": 0.6174957118353345, "grad_norm": 1.0695388317108154, "learning_rate": 2.3806429357816802e-06, "loss": 0.0908, "step": 20880 }, { "epoch": 0.6177914473295085, "grad_norm": 1.6690698862075806, "learning_rate": 2.3817831450521344e-06, "loss": 0.0842, "step": 20890 }, { "epoch": 0.6180871828236825, "grad_norm": 1.9679336547851562, "learning_rate": 2.382923354322589e-06, "loss": 0.088, "step": 20900 }, { "epoch": 0.6183829183178565, "grad_norm": 1.8438563346862793, "learning_rate": 2.384063563593043e-06, "loss": 0.1127, "step": 20910 }, { "epoch": 0.6186786538120305, "grad_norm": 1.7861558198928833, "learning_rate": 2.3852037728634973e-06, "loss": 0.0966, "step": 20920 }, { "epoch": 0.6189743893062045, "grad_norm": 1.135846495628357, "learning_rate": 2.386343982133952e-06, "loss": 0.0943, "step": 20930 }, { "epoch": 0.6192701248003786, "grad_norm": 1.5427213907241821, "learning_rate": 2.387484191404406e-06, "loss": 0.0963, "step": 20940 }, { "epoch": 0.6195658602945525, "grad_norm": 1.134124994277954, "learning_rate": 2.3886244006748607e-06, "loss": 0.0913, "step": 20950 }, { "epoch": 0.6198615957887266, "grad_norm": 1.574773907661438, "learning_rate": 2.389764609945315e-06, "loss": 0.093, "step": 20960 }, { "epoch": 0.6201573312829006, "grad_norm": 1.8426318168640137, "learning_rate": 2.3909048192157694e-06, "loss": 0.1002, "step": 20970 }, { "epoch": 0.6204530667770746, "grad_norm": 1.2113763093948364, "learning_rate": 2.392045028486224e-06, "loss": 0.113, "step": 20980 }, { "epoch": 0.6207488022712486, "grad_norm": 1.2810699939727783, "learning_rate": 2.393185237756678e-06, "loss": 0.0943, "step": 20990 }, { "epoch": 0.6210445377654226, "grad_norm": 1.0960906744003296, "learning_rate": 2.3943254470271323e-06, "loss": 0.078, "step": 21000 }, { "epoch": 0.6213402732595966, "grad_norm": 0.7717193365097046, "learning_rate": 2.395465656297587e-06, "loss": 0.1007, "step": 21010 }, { "epoch": 0.6216360087537707, "grad_norm": 1.235219120979309, "learning_rate": 2.396605865568041e-06, "loss": 0.119, "step": 21020 }, { "epoch": 0.6219317442479446, "grad_norm": 1.0888067483901978, "learning_rate": 2.3977460748384957e-06, "loss": 0.0827, "step": 21030 }, { "epoch": 0.6222274797421187, "grad_norm": 1.3895015716552734, "learning_rate": 2.39888628410895e-06, "loss": 0.0796, "step": 21040 }, { "epoch": 0.6225232152362926, "grad_norm": 1.9062349796295166, "learning_rate": 2.4000264933794044e-06, "loss": 0.0876, "step": 21050 }, { "epoch": 0.6228189507304667, "grad_norm": 1.0693373680114746, "learning_rate": 2.401166702649859e-06, "loss": 0.0953, "step": 21060 }, { "epoch": 0.6231146862246407, "grad_norm": 1.5277255773544312, "learning_rate": 2.402306911920313e-06, "loss": 0.0896, "step": 21070 }, { "epoch": 0.6234104217188147, "grad_norm": 1.6092650890350342, "learning_rate": 2.4034471211907674e-06, "loss": 0.0757, "step": 21080 }, { "epoch": 0.6237061572129887, "grad_norm": 0.9804664254188538, "learning_rate": 2.4045873304612215e-06, "loss": 0.0922, "step": 21090 }, { "epoch": 0.6240018927071627, "grad_norm": 1.0088791847229004, "learning_rate": 2.405727539731676e-06, "loss": 0.1073, "step": 21100 }, { "epoch": 0.6242976282013367, "grad_norm": 1.7303740978240967, "learning_rate": 2.4068677490021307e-06, "loss": 0.1024, "step": 21110 }, { "epoch": 0.6245933636955108, "grad_norm": 1.1465234756469727, "learning_rate": 2.408007958272585e-06, "loss": 0.1023, "step": 21120 }, { "epoch": 0.6248890991896847, "grad_norm": 0.7941556572914124, "learning_rate": 2.4091481675430395e-06, "loss": 0.0857, "step": 21130 }, { "epoch": 0.6251848346838588, "grad_norm": 1.1885600090026855, "learning_rate": 2.4102883768134936e-06, "loss": 0.1093, "step": 21140 }, { "epoch": 0.6254805701780327, "grad_norm": 1.5826218128204346, "learning_rate": 2.411428586083948e-06, "loss": 0.0923, "step": 21150 }, { "epoch": 0.6257763056722068, "grad_norm": 1.4768006801605225, "learning_rate": 2.4125687953544024e-06, "loss": 0.098, "step": 21160 }, { "epoch": 0.6260720411663808, "grad_norm": 1.8130203485488892, "learning_rate": 2.4137090046248565e-06, "loss": 0.1121, "step": 21170 }, { "epoch": 0.6263677766605548, "grad_norm": 1.0559029579162598, "learning_rate": 2.414849213895311e-06, "loss": 0.0933, "step": 21180 }, { "epoch": 0.6266635121547288, "grad_norm": 1.9049593210220337, "learning_rate": 2.4159894231657653e-06, "loss": 0.0914, "step": 21190 }, { "epoch": 0.6269592476489029, "grad_norm": 1.146331787109375, "learning_rate": 2.41712963243622e-06, "loss": 0.0799, "step": 21200 }, { "epoch": 0.6272549831430768, "grad_norm": 1.8150924444198608, "learning_rate": 2.4182698417066745e-06, "loss": 0.1135, "step": 21210 }, { "epoch": 0.6275507186372509, "grad_norm": 1.6486459970474243, "learning_rate": 2.4194100509771286e-06, "loss": 0.0998, "step": 21220 }, { "epoch": 0.6278464541314248, "grad_norm": 1.4414088726043701, "learning_rate": 2.4205502602475832e-06, "loss": 0.1014, "step": 21230 }, { "epoch": 0.6281421896255989, "grad_norm": 1.278504729270935, "learning_rate": 2.4216904695180374e-06, "loss": 0.0876, "step": 21240 }, { "epoch": 0.6284379251197729, "grad_norm": 1.1310172080993652, "learning_rate": 2.4228306787884916e-06, "loss": 0.0659, "step": 21250 }, { "epoch": 0.6287336606139469, "grad_norm": 2.242408514022827, "learning_rate": 2.423970888058946e-06, "loss": 0.0942, "step": 21260 }, { "epoch": 0.6290293961081209, "grad_norm": 1.7769070863723755, "learning_rate": 2.4251110973294003e-06, "loss": 0.1038, "step": 21270 }, { "epoch": 0.6293251316022949, "grad_norm": 1.4829185009002686, "learning_rate": 2.426251306599855e-06, "loss": 0.1005, "step": 21280 }, { "epoch": 0.6296208670964689, "grad_norm": 1.2124401330947876, "learning_rate": 2.4273915158703095e-06, "loss": 0.1, "step": 21290 }, { "epoch": 0.629916602590643, "grad_norm": 1.7094838619232178, "learning_rate": 2.4285317251407637e-06, "loss": 0.0688, "step": 21300 }, { "epoch": 0.6302123380848169, "grad_norm": 1.101381540298462, "learning_rate": 2.4296719344112182e-06, "loss": 0.0908, "step": 21310 }, { "epoch": 0.630508073578991, "grad_norm": 1.4692163467407227, "learning_rate": 2.430812143681672e-06, "loss": 0.1067, "step": 21320 }, { "epoch": 0.6308038090731649, "grad_norm": 1.0012657642364502, "learning_rate": 2.4319523529521266e-06, "loss": 0.0885, "step": 21330 }, { "epoch": 0.631099544567339, "grad_norm": 1.3651825189590454, "learning_rate": 2.433092562222581e-06, "loss": 0.0839, "step": 21340 }, { "epoch": 0.631395280061513, "grad_norm": 1.9750335216522217, "learning_rate": 2.4342327714930353e-06, "loss": 0.0782, "step": 21350 }, { "epoch": 0.631691015555687, "grad_norm": 2.0150692462921143, "learning_rate": 2.43537298076349e-06, "loss": 0.1109, "step": 21360 }, { "epoch": 0.631986751049861, "grad_norm": 1.4485379457473755, "learning_rate": 2.436513190033944e-06, "loss": 0.0921, "step": 21370 }, { "epoch": 0.632282486544035, "grad_norm": 2.01166033744812, "learning_rate": 2.4376533993043987e-06, "loss": 0.1081, "step": 21380 }, { "epoch": 0.632578222038209, "grad_norm": 2.2153220176696777, "learning_rate": 2.4387936085748533e-06, "loss": 0.0924, "step": 21390 }, { "epoch": 0.6328739575323831, "grad_norm": 1.2475597858428955, "learning_rate": 2.439933817845307e-06, "loss": 0.0857, "step": 21400 }, { "epoch": 0.633169693026557, "grad_norm": 1.5477672815322876, "learning_rate": 2.4410740271157616e-06, "loss": 0.1326, "step": 21410 }, { "epoch": 0.6334654285207311, "grad_norm": 2.010446548461914, "learning_rate": 2.4422142363862158e-06, "loss": 0.1021, "step": 21420 }, { "epoch": 0.6337611640149051, "grad_norm": 0.9138085842132568, "learning_rate": 2.4433544456566703e-06, "loss": 0.104, "step": 21430 }, { "epoch": 0.6340568995090791, "grad_norm": 2.2045726776123047, "learning_rate": 2.444494654927125e-06, "loss": 0.0909, "step": 21440 }, { "epoch": 0.6343526350032531, "grad_norm": 1.316177487373352, "learning_rate": 2.445634864197579e-06, "loss": 0.0819, "step": 21450 }, { "epoch": 0.6346483704974271, "grad_norm": 1.4815231561660767, "learning_rate": 2.4467750734680337e-06, "loss": 0.0925, "step": 21460 }, { "epoch": 0.6349441059916011, "grad_norm": 1.0918701887130737, "learning_rate": 2.4479152827384883e-06, "loss": 0.0995, "step": 21470 }, { "epoch": 0.6352398414857752, "grad_norm": 1.0533379316329956, "learning_rate": 2.449055492008942e-06, "loss": 0.1062, "step": 21480 }, { "epoch": 0.6355355769799491, "grad_norm": 1.1636584997177124, "learning_rate": 2.4501957012793966e-06, "loss": 0.0915, "step": 21490 }, { "epoch": 0.6358313124741232, "grad_norm": 1.228001594543457, "learning_rate": 2.4513359105498508e-06, "loss": 0.0814, "step": 21500 }, { "epoch": 0.6361270479682971, "grad_norm": 1.71761953830719, "learning_rate": 2.4524761198203054e-06, "loss": 0.0962, "step": 21510 }, { "epoch": 0.6364227834624712, "grad_norm": 1.0392463207244873, "learning_rate": 2.45361632909076e-06, "loss": 0.1032, "step": 21520 }, { "epoch": 0.6367185189566452, "grad_norm": 1.326043963432312, "learning_rate": 2.454756538361214e-06, "loss": 0.0834, "step": 21530 }, { "epoch": 0.6370142544508192, "grad_norm": 1.6277879476547241, "learning_rate": 2.4558967476316687e-06, "loss": 0.0951, "step": 21540 }, { "epoch": 0.6373099899449932, "grad_norm": 1.1310862302780151, "learning_rate": 2.457036956902123e-06, "loss": 0.1079, "step": 21550 }, { "epoch": 0.6376057254391672, "grad_norm": 1.5748999118804932, "learning_rate": 2.4581771661725775e-06, "loss": 0.104, "step": 21560 }, { "epoch": 0.6379014609333412, "grad_norm": 1.316678524017334, "learning_rate": 2.4593173754430316e-06, "loss": 0.1101, "step": 21570 }, { "epoch": 0.6381971964275153, "grad_norm": 1.2254667282104492, "learning_rate": 2.4604575847134858e-06, "loss": 0.0947, "step": 21580 }, { "epoch": 0.6384929319216892, "grad_norm": 1.3845397233963013, "learning_rate": 2.4615977939839404e-06, "loss": 0.095, "step": 21590 }, { "epoch": 0.6387886674158633, "grad_norm": 0.843313992023468, "learning_rate": 2.4627380032543945e-06, "loss": 0.0706, "step": 21600 }, { "epoch": 0.6390844029100372, "grad_norm": 1.7169829607009888, "learning_rate": 2.463878212524849e-06, "loss": 0.1119, "step": 21610 }, { "epoch": 0.6393801384042113, "grad_norm": 2.2376997470855713, "learning_rate": 2.4650184217953037e-06, "loss": 0.1217, "step": 21620 }, { "epoch": 0.6396758738983853, "grad_norm": 1.3732236623764038, "learning_rate": 2.466158631065758e-06, "loss": 0.0835, "step": 21630 }, { "epoch": 0.6399716093925593, "grad_norm": 0.9270585179328918, "learning_rate": 2.4672988403362125e-06, "loss": 0.0885, "step": 21640 }, { "epoch": 0.6402673448867333, "grad_norm": 1.2544268369674683, "learning_rate": 2.468439049606666e-06, "loss": 0.0756, "step": 21650 }, { "epoch": 0.6405630803809074, "grad_norm": 1.6459662914276123, "learning_rate": 2.469579258877121e-06, "loss": 0.1105, "step": 21660 }, { "epoch": 0.6408588158750813, "grad_norm": 1.2285573482513428, "learning_rate": 2.4707194681475754e-06, "loss": 0.1143, "step": 21670 }, { "epoch": 0.6411545513692554, "grad_norm": 1.5738500356674194, "learning_rate": 2.4718596774180296e-06, "loss": 0.0977, "step": 21680 }, { "epoch": 0.6414502868634293, "grad_norm": 0.7395358085632324, "learning_rate": 2.472999886688484e-06, "loss": 0.1055, "step": 21690 }, { "epoch": 0.6417460223576034, "grad_norm": 1.20535409450531, "learning_rate": 2.4741400959589387e-06, "loss": 0.0766, "step": 21700 }, { "epoch": 0.6420417578517774, "grad_norm": 0.9585356712341309, "learning_rate": 2.475280305229393e-06, "loss": 0.1124, "step": 21710 }, { "epoch": 0.6423374933459514, "grad_norm": 0.7632978558540344, "learning_rate": 2.4764205144998475e-06, "loss": 0.0888, "step": 21720 }, { "epoch": 0.6426332288401254, "grad_norm": 1.1964410543441772, "learning_rate": 2.4775607237703012e-06, "loss": 0.0909, "step": 21730 }, { "epoch": 0.6429289643342994, "grad_norm": 1.3047099113464355, "learning_rate": 2.478700933040756e-06, "loss": 0.0982, "step": 21740 }, { "epoch": 0.6432246998284734, "grad_norm": 1.3942344188690186, "learning_rate": 2.4798411423112104e-06, "loss": 0.0794, "step": 21750 }, { "epoch": 0.6435204353226475, "grad_norm": 1.082540512084961, "learning_rate": 2.4809813515816646e-06, "loss": 0.0854, "step": 21760 }, { "epoch": 0.6438161708168214, "grad_norm": 1.9040557146072388, "learning_rate": 2.482121560852119e-06, "loss": 0.0974, "step": 21770 }, { "epoch": 0.6441119063109955, "grad_norm": 1.1763248443603516, "learning_rate": 2.4832617701225733e-06, "loss": 0.0935, "step": 21780 }, { "epoch": 0.6444076418051694, "grad_norm": 1.2140578031539917, "learning_rate": 2.484401979393028e-06, "loss": 0.0797, "step": 21790 }, { "epoch": 0.6447033772993435, "grad_norm": 2.104372262954712, "learning_rate": 2.4855421886634825e-06, "loss": 0.084, "step": 21800 }, { "epoch": 0.6449991127935175, "grad_norm": 1.586033821105957, "learning_rate": 2.4866823979339362e-06, "loss": 0.1263, "step": 21810 }, { "epoch": 0.6452948482876915, "grad_norm": 1.332506537437439, "learning_rate": 2.487822607204391e-06, "loss": 0.1105, "step": 21820 }, { "epoch": 0.6455905837818655, "grad_norm": 1.534038782119751, "learning_rate": 2.488962816474845e-06, "loss": 0.0999, "step": 21830 }, { "epoch": 0.6458863192760395, "grad_norm": 0.4960815906524658, "learning_rate": 2.4901030257452996e-06, "loss": 0.0806, "step": 21840 }, { "epoch": 0.6461820547702135, "grad_norm": 0.8567425608634949, "learning_rate": 2.491243235015754e-06, "loss": 0.0771, "step": 21850 }, { "epoch": 0.6464777902643876, "grad_norm": 1.8514686822891235, "learning_rate": 2.4923834442862083e-06, "loss": 0.103, "step": 21860 }, { "epoch": 0.6467735257585615, "grad_norm": 1.468620777130127, "learning_rate": 2.493523653556663e-06, "loss": 0.1072, "step": 21870 }, { "epoch": 0.6470692612527356, "grad_norm": 1.204437017440796, "learning_rate": 2.494663862827117e-06, "loss": 0.1011, "step": 21880 }, { "epoch": 0.6473649967469096, "grad_norm": 1.1762113571166992, "learning_rate": 2.4958040720975713e-06, "loss": 0.0914, "step": 21890 }, { "epoch": 0.6476607322410836, "grad_norm": 1.7251094579696655, "learning_rate": 2.496944281368026e-06, "loss": 0.0764, "step": 21900 }, { "epoch": 0.6479564677352576, "grad_norm": 1.3366053104400635, "learning_rate": 2.49808449063848e-06, "loss": 0.1072, "step": 21910 }, { "epoch": 0.6482522032294316, "grad_norm": 1.8641401529312134, "learning_rate": 2.4992246999089346e-06, "loss": 0.0919, "step": 21920 }, { "epoch": 0.6485479387236056, "grad_norm": 1.1428282260894775, "learning_rate": 2.500364909179389e-06, "loss": 0.1012, "step": 21930 }, { "epoch": 0.6488436742177797, "grad_norm": 1.608068585395813, "learning_rate": 2.5015051184498434e-06, "loss": 0.0857, "step": 21940 }, { "epoch": 0.6491394097119536, "grad_norm": 2.4283154010772705, "learning_rate": 2.502645327720298e-06, "loss": 0.0781, "step": 21950 }, { "epoch": 0.6494351452061277, "grad_norm": 0.8203814029693604, "learning_rate": 2.503785536990752e-06, "loss": 0.1105, "step": 21960 }, { "epoch": 0.6497308807003016, "grad_norm": 2.1297802925109863, "learning_rate": 2.5049257462612063e-06, "loss": 0.1075, "step": 21970 }, { "epoch": 0.6500266161944757, "grad_norm": 1.0313334465026855, "learning_rate": 2.506065955531661e-06, "loss": 0.0862, "step": 21980 }, { "epoch": 0.6503223516886497, "grad_norm": 0.5800285935401917, "learning_rate": 2.507206164802115e-06, "loss": 0.0814, "step": 21990 }, { "epoch": 0.6506180871828237, "grad_norm": 1.6658902168273926, "learning_rate": 2.5083463740725696e-06, "loss": 0.0838, "step": 22000 }, { "epoch": 0.6509138226769977, "grad_norm": 1.2935575246810913, "learning_rate": 2.5094865833430238e-06, "loss": 0.1073, "step": 22010 }, { "epoch": 0.6512095581711717, "grad_norm": 2.5399727821350098, "learning_rate": 2.5106267926134784e-06, "loss": 0.1111, "step": 22020 }, { "epoch": 0.6515052936653457, "grad_norm": 1.2091116905212402, "learning_rate": 2.511767001883933e-06, "loss": 0.0973, "step": 22030 }, { "epoch": 0.6518010291595198, "grad_norm": 1.120115041732788, "learning_rate": 2.512907211154387e-06, "loss": 0.0773, "step": 22040 }, { "epoch": 0.6520967646536937, "grad_norm": 1.1518853902816772, "learning_rate": 2.5140474204248413e-06, "loss": 0.0768, "step": 22050 }, { "epoch": 0.6523925001478678, "grad_norm": 0.9768638014793396, "learning_rate": 2.5151876296952955e-06, "loss": 0.1142, "step": 22060 }, { "epoch": 0.6526882356420417, "grad_norm": 1.9189682006835938, "learning_rate": 2.51632783896575e-06, "loss": 0.0847, "step": 22070 }, { "epoch": 0.6529839711362158, "grad_norm": 0.7350820302963257, "learning_rate": 2.5174680482362046e-06, "loss": 0.0922, "step": 22080 }, { "epoch": 0.6532797066303898, "grad_norm": 1.3662571907043457, "learning_rate": 2.518608257506659e-06, "loss": 0.0746, "step": 22090 }, { "epoch": 0.6535754421245638, "grad_norm": 1.369637131690979, "learning_rate": 2.5197484667771134e-06, "loss": 0.0846, "step": 22100 }, { "epoch": 0.6538711776187378, "grad_norm": 1.618604302406311, "learning_rate": 2.5208886760475675e-06, "loss": 0.1031, "step": 22110 }, { "epoch": 0.6541669131129119, "grad_norm": 1.2605865001678467, "learning_rate": 2.522028885318022e-06, "loss": 0.0837, "step": 22120 }, { "epoch": 0.6544626486070858, "grad_norm": 0.9908208847045898, "learning_rate": 2.5231690945884763e-06, "loss": 0.088, "step": 22130 }, { "epoch": 0.6547583841012599, "grad_norm": 1.0521365404129028, "learning_rate": 2.5243093038589305e-06, "loss": 0.09, "step": 22140 }, { "epoch": 0.6550541195954338, "grad_norm": 1.5479736328125, "learning_rate": 2.525449513129385e-06, "loss": 0.0737, "step": 22150 }, { "epoch": 0.6553498550896079, "grad_norm": 1.218505859375, "learning_rate": 2.5265897223998396e-06, "loss": 0.0939, "step": 22160 }, { "epoch": 0.6556455905837819, "grad_norm": 0.8252938985824585, "learning_rate": 2.527729931670294e-06, "loss": 0.1035, "step": 22170 }, { "epoch": 0.6559413260779559, "grad_norm": 1.8805818557739258, "learning_rate": 2.5288701409407484e-06, "loss": 0.0946, "step": 22180 }, { "epoch": 0.6562370615721299, "grad_norm": 1.2930494546890259, "learning_rate": 2.5300103502112026e-06, "loss": 0.0795, "step": 22190 }, { "epoch": 0.6565327970663039, "grad_norm": 1.7225382328033447, "learning_rate": 2.531150559481657e-06, "loss": 0.1022, "step": 22200 }, { "epoch": 0.6568285325604779, "grad_norm": 1.3455754518508911, "learning_rate": 2.5322907687521113e-06, "loss": 0.118, "step": 22210 }, { "epoch": 0.657124268054652, "grad_norm": 1.02947199344635, "learning_rate": 2.5334309780225655e-06, "loss": 0.0991, "step": 22220 }, { "epoch": 0.6574200035488259, "grad_norm": 1.4652206897735596, "learning_rate": 2.53457118729302e-06, "loss": 0.0993, "step": 22230 }, { "epoch": 0.657715739043, "grad_norm": 1.182682991027832, "learning_rate": 2.5357113965634742e-06, "loss": 0.063, "step": 22240 }, { "epoch": 0.6580114745371739, "grad_norm": 1.3135122060775757, "learning_rate": 2.536851605833929e-06, "loss": 0.0796, "step": 22250 }, { "epoch": 0.658307210031348, "grad_norm": 1.5421816110610962, "learning_rate": 2.5379918151043834e-06, "loss": 0.1046, "step": 22260 }, { "epoch": 0.658602945525522, "grad_norm": 1.3825229406356812, "learning_rate": 2.5391320243748376e-06, "loss": 0.0856, "step": 22270 }, { "epoch": 0.658898681019696, "grad_norm": 1.2447748184204102, "learning_rate": 2.540272233645292e-06, "loss": 0.0954, "step": 22280 }, { "epoch": 0.65919441651387, "grad_norm": 1.26719331741333, "learning_rate": 2.541412442915746e-06, "loss": 0.0938, "step": 22290 }, { "epoch": 0.659490152008044, "grad_norm": 1.0916777849197388, "learning_rate": 2.5425526521862005e-06, "loss": 0.0756, "step": 22300 }, { "epoch": 0.659785887502218, "grad_norm": 1.5672905445098877, "learning_rate": 2.543692861456655e-06, "loss": 0.0991, "step": 22310 }, { "epoch": 0.6600816229963921, "grad_norm": 1.6072012186050415, "learning_rate": 2.5448330707271093e-06, "loss": 0.1055, "step": 22320 }, { "epoch": 0.660377358490566, "grad_norm": 1.6899058818817139, "learning_rate": 2.545973279997564e-06, "loss": 0.0986, "step": 22330 }, { "epoch": 0.6606730939847401, "grad_norm": 0.7157202959060669, "learning_rate": 2.547113489268018e-06, "loss": 0.1146, "step": 22340 }, { "epoch": 0.6609688294789141, "grad_norm": 0.7474240064620972, "learning_rate": 2.5482536985384726e-06, "loss": 0.0812, "step": 22350 }, { "epoch": 0.661264564973088, "grad_norm": 1.25737726688385, "learning_rate": 2.549393907808927e-06, "loss": 0.0911, "step": 22360 }, { "epoch": 0.6615603004672621, "grad_norm": 1.12069571018219, "learning_rate": 2.550534117079381e-06, "loss": 0.0988, "step": 22370 }, { "epoch": 0.661856035961436, "grad_norm": 1.2407941818237305, "learning_rate": 2.5516743263498355e-06, "loss": 0.103, "step": 22380 }, { "epoch": 0.6621517714556101, "grad_norm": 0.9935185313224792, "learning_rate": 2.55281453562029e-06, "loss": 0.0761, "step": 22390 }, { "epoch": 0.6624475069497842, "grad_norm": 1.1642708778381348, "learning_rate": 2.5539547448907443e-06, "loss": 0.0611, "step": 22400 }, { "epoch": 0.6627432424439581, "grad_norm": 1.4429281949996948, "learning_rate": 2.555094954161199e-06, "loss": 0.092, "step": 22410 }, { "epoch": 0.6630389779381322, "grad_norm": 2.289417028427124, "learning_rate": 2.556235163431653e-06, "loss": 0.0977, "step": 22420 }, { "epoch": 0.6633347134323061, "grad_norm": 1.7439731359481812, "learning_rate": 2.5573753727021076e-06, "loss": 0.0964, "step": 22430 }, { "epoch": 0.6636304489264802, "grad_norm": 1.2070528268814087, "learning_rate": 2.558515581972562e-06, "loss": 0.1009, "step": 22440 }, { "epoch": 0.6639261844206542, "grad_norm": 0.9969165921211243, "learning_rate": 2.559655791243016e-06, "loss": 0.0826, "step": 22450 }, { "epoch": 0.6642219199148282, "grad_norm": 1.1171499490737915, "learning_rate": 2.5607960005134705e-06, "loss": 0.1135, "step": 22460 }, { "epoch": 0.6645176554090022, "grad_norm": 1.2881150245666504, "learning_rate": 2.5619362097839247e-06, "loss": 0.1052, "step": 22470 }, { "epoch": 0.6648133909031761, "grad_norm": 1.2129915952682495, "learning_rate": 2.5630764190543793e-06, "loss": 0.1085, "step": 22480 }, { "epoch": 0.6651091263973502, "grad_norm": 1.1514229774475098, "learning_rate": 2.564216628324834e-06, "loss": 0.0815, "step": 22490 }, { "epoch": 0.6654048618915243, "grad_norm": 1.3532291650772095, "learning_rate": 2.565356837595288e-06, "loss": 0.0879, "step": 22500 }, { "epoch": 0.6657005973856982, "grad_norm": 1.6405538320541382, "learning_rate": 2.5664970468657426e-06, "loss": 0.1037, "step": 22510 }, { "epoch": 0.6659963328798723, "grad_norm": 2.0599820613861084, "learning_rate": 2.567637256136197e-06, "loss": 0.0971, "step": 22520 }, { "epoch": 0.6662920683740462, "grad_norm": 1.5832648277282715, "learning_rate": 2.568777465406651e-06, "loss": 0.099, "step": 22530 }, { "epoch": 0.6665878038682203, "grad_norm": 0.7349275350570679, "learning_rate": 2.5699176746771055e-06, "loss": 0.07, "step": 22540 }, { "epoch": 0.6668835393623943, "grad_norm": 1.9078989028930664, "learning_rate": 2.5710578839475597e-06, "loss": 0.0845, "step": 22550 }, { "epoch": 0.6671792748565682, "grad_norm": 1.609693169593811, "learning_rate": 2.5721980932180143e-06, "loss": 0.1044, "step": 22560 }, { "epoch": 0.6674750103507423, "grad_norm": 0.9307450652122498, "learning_rate": 2.5733383024884685e-06, "loss": 0.0996, "step": 22570 }, { "epoch": 0.6677707458449164, "grad_norm": 1.1012439727783203, "learning_rate": 2.574478511758923e-06, "loss": 0.0897, "step": 22580 }, { "epoch": 0.6680664813390903, "grad_norm": 0.7883257269859314, "learning_rate": 2.5756187210293776e-06, "loss": 0.0777, "step": 22590 }, { "epoch": 0.6683622168332644, "grad_norm": 1.4711374044418335, "learning_rate": 2.576758930299832e-06, "loss": 0.0809, "step": 22600 }, { "epoch": 0.6686579523274383, "grad_norm": 1.1524555683135986, "learning_rate": 2.577899139570286e-06, "loss": 0.1042, "step": 22610 }, { "epoch": 0.6689536878216124, "grad_norm": 1.6055198907852173, "learning_rate": 2.5790393488407406e-06, "loss": 0.0962, "step": 22620 }, { "epoch": 0.6692494233157864, "grad_norm": 1.1999361515045166, "learning_rate": 2.5801795581111947e-06, "loss": 0.102, "step": 22630 }, { "epoch": 0.6695451588099604, "grad_norm": 1.3399434089660645, "learning_rate": 2.5813197673816493e-06, "loss": 0.0771, "step": 22640 }, { "epoch": 0.6698408943041344, "grad_norm": 1.0974019765853882, "learning_rate": 2.5824599766521035e-06, "loss": 0.0668, "step": 22650 }, { "epoch": 0.6701366297983083, "grad_norm": 2.0198400020599365, "learning_rate": 2.583600185922558e-06, "loss": 0.1102, "step": 22660 }, { "epoch": 0.6704323652924824, "grad_norm": 1.5393505096435547, "learning_rate": 2.5847403951930127e-06, "loss": 0.1047, "step": 22670 }, { "epoch": 0.6707281007866565, "grad_norm": 1.7418479919433594, "learning_rate": 2.585880604463467e-06, "loss": 0.0929, "step": 22680 }, { "epoch": 0.6710238362808304, "grad_norm": 1.2525627613067627, "learning_rate": 2.587020813733921e-06, "loss": 0.1043, "step": 22690 }, { "epoch": 0.6713195717750045, "grad_norm": 1.455270528793335, "learning_rate": 2.588161023004375e-06, "loss": 0.0706, "step": 22700 }, { "epoch": 0.6716153072691784, "grad_norm": 1.704574704170227, "learning_rate": 2.5893012322748297e-06, "loss": 0.1019, "step": 22710 }, { "epoch": 0.6719110427633525, "grad_norm": 1.394292950630188, "learning_rate": 2.5904414415452843e-06, "loss": 0.0944, "step": 22720 }, { "epoch": 0.6722067782575265, "grad_norm": 0.7079685926437378, "learning_rate": 2.5915816508157385e-06, "loss": 0.097, "step": 22730 }, { "epoch": 0.6725025137517004, "grad_norm": 1.2725220918655396, "learning_rate": 2.592721860086193e-06, "loss": 0.0867, "step": 22740 }, { "epoch": 0.6727982492458745, "grad_norm": 1.2315826416015625, "learning_rate": 2.5938620693566472e-06, "loss": 0.0551, "step": 22750 }, { "epoch": 0.6730939847400484, "grad_norm": 1.2069274187088013, "learning_rate": 2.595002278627102e-06, "loss": 0.1156, "step": 22760 }, { "epoch": 0.6733897202342225, "grad_norm": 1.5261591672897339, "learning_rate": 2.596142487897556e-06, "loss": 0.0964, "step": 22770 }, { "epoch": 0.6736854557283966, "grad_norm": 1.3270264863967896, "learning_rate": 2.59728269716801e-06, "loss": 0.0874, "step": 22780 }, { "epoch": 0.6739811912225705, "grad_norm": 1.0865862369537354, "learning_rate": 2.5984229064384648e-06, "loss": 0.0852, "step": 22790 }, { "epoch": 0.6742769267167446, "grad_norm": 1.2470893859863281, "learning_rate": 2.599563115708919e-06, "loss": 0.087, "step": 22800 }, { "epoch": 0.6745726622109186, "grad_norm": 0.8439205884933472, "learning_rate": 2.6007033249793735e-06, "loss": 0.081, "step": 22810 }, { "epoch": 0.6748683977050925, "grad_norm": 0.7240766286849976, "learning_rate": 2.601843534249828e-06, "loss": 0.1041, "step": 22820 }, { "epoch": 0.6751641331992666, "grad_norm": 0.8504586815834045, "learning_rate": 2.6029837435202823e-06, "loss": 0.0789, "step": 22830 }, { "epoch": 0.6754598686934405, "grad_norm": 1.5520190000534058, "learning_rate": 2.604123952790737e-06, "loss": 0.1023, "step": 22840 }, { "epoch": 0.6757556041876146, "grad_norm": 1.4483451843261719, "learning_rate": 2.6052641620611914e-06, "loss": 0.0848, "step": 22850 }, { "epoch": 0.6760513396817887, "grad_norm": 1.462743878364563, "learning_rate": 2.606404371331645e-06, "loss": 0.0954, "step": 22860 }, { "epoch": 0.6763470751759626, "grad_norm": 1.1067513227462769, "learning_rate": 2.6075445806020998e-06, "loss": 0.1026, "step": 22870 }, { "epoch": 0.6766428106701367, "grad_norm": 1.0700923204421997, "learning_rate": 2.608684789872554e-06, "loss": 0.0931, "step": 22880 }, { "epoch": 0.6769385461643106, "grad_norm": 1.3110617399215698, "learning_rate": 2.6098249991430085e-06, "loss": 0.089, "step": 22890 }, { "epoch": 0.6772342816584846, "grad_norm": 1.536051630973816, "learning_rate": 2.610965208413463e-06, "loss": 0.0822, "step": 22900 }, { "epoch": 0.6775300171526587, "grad_norm": 1.6095881462097168, "learning_rate": 2.6121054176839173e-06, "loss": 0.105, "step": 22910 }, { "epoch": 0.6778257526468326, "grad_norm": 1.6024351119995117, "learning_rate": 2.613245626954372e-06, "loss": 0.0978, "step": 22920 }, { "epoch": 0.6781214881410067, "grad_norm": 1.3497588634490967, "learning_rate": 2.614385836224826e-06, "loss": 0.0964, "step": 22930 }, { "epoch": 0.6784172236351806, "grad_norm": 1.0021517276763916, "learning_rate": 2.61552604549528e-06, "loss": 0.0793, "step": 22940 }, { "epoch": 0.6787129591293547, "grad_norm": 0.8835341334342957, "learning_rate": 2.6166662547657348e-06, "loss": 0.0914, "step": 22950 }, { "epoch": 0.6790086946235288, "grad_norm": 2.409534454345703, "learning_rate": 2.617806464036189e-06, "loss": 0.1062, "step": 22960 }, { "epoch": 0.6793044301177027, "grad_norm": 1.0202598571777344, "learning_rate": 2.6189466733066435e-06, "loss": 0.0925, "step": 22970 }, { "epoch": 0.6796001656118767, "grad_norm": 1.5727365016937256, "learning_rate": 2.6200868825770977e-06, "loss": 0.0946, "step": 22980 }, { "epoch": 0.6798959011060507, "grad_norm": 1.2245970964431763, "learning_rate": 2.6212270918475523e-06, "loss": 0.0986, "step": 22990 }, { "epoch": 0.6801916366002247, "grad_norm": 2.346649408340454, "learning_rate": 2.622367301118007e-06, "loss": 0.075, "step": 23000 }, { "epoch": 0.6804873720943988, "grad_norm": 1.0852952003479004, "learning_rate": 2.623507510388461e-06, "loss": 0.1171, "step": 23010 }, { "epoch": 0.6807831075885727, "grad_norm": 1.1474746465682983, "learning_rate": 2.6246477196589152e-06, "loss": 0.1212, "step": 23020 }, { "epoch": 0.6810788430827468, "grad_norm": 1.5150220394134521, "learning_rate": 2.6257879289293694e-06, "loss": 0.0807, "step": 23030 }, { "epoch": 0.6813745785769209, "grad_norm": 1.6909805536270142, "learning_rate": 2.626928138199824e-06, "loss": 0.1023, "step": 23040 }, { "epoch": 0.6816703140710948, "grad_norm": 1.258571982383728, "learning_rate": 2.6280683474702786e-06, "loss": 0.0809, "step": 23050 }, { "epoch": 0.6819660495652689, "grad_norm": 1.3400483131408691, "learning_rate": 2.6292085567407327e-06, "loss": 0.1202, "step": 23060 }, { "epoch": 0.6822617850594428, "grad_norm": 1.1887692213058472, "learning_rate": 2.6303487660111873e-06, "loss": 0.1096, "step": 23070 }, { "epoch": 0.6825575205536168, "grad_norm": 1.5218315124511719, "learning_rate": 2.631488975281642e-06, "loss": 0.0951, "step": 23080 }, { "epoch": 0.6828532560477909, "grad_norm": 0.8992134928703308, "learning_rate": 2.632629184552096e-06, "loss": 0.0715, "step": 23090 }, { "epoch": 0.6831489915419648, "grad_norm": 2.3348448276519775, "learning_rate": 2.6337693938225502e-06, "loss": 0.0843, "step": 23100 }, { "epoch": 0.6834447270361389, "grad_norm": 1.3271820545196533, "learning_rate": 2.6349096030930044e-06, "loss": 0.1104, "step": 23110 }, { "epoch": 0.6837404625303128, "grad_norm": 1.60702383518219, "learning_rate": 2.636049812363459e-06, "loss": 0.1091, "step": 23120 }, { "epoch": 0.6840361980244869, "grad_norm": 1.1848803758621216, "learning_rate": 2.6371900216339136e-06, "loss": 0.1151, "step": 23130 }, { "epoch": 0.684331933518661, "grad_norm": 0.905006468296051, "learning_rate": 2.6383302309043677e-06, "loss": 0.1167, "step": 23140 }, { "epoch": 0.6846276690128349, "grad_norm": 0.9034202694892883, "learning_rate": 2.6394704401748223e-06, "loss": 0.0748, "step": 23150 }, { "epoch": 0.684923404507009, "grad_norm": 1.041479468345642, "learning_rate": 2.6406106494452765e-06, "loss": 0.117, "step": 23160 }, { "epoch": 0.6852191400011829, "grad_norm": 0.891294002532959, "learning_rate": 2.641750858715731e-06, "loss": 0.0866, "step": 23170 }, { "epoch": 0.685514875495357, "grad_norm": 1.3536839485168457, "learning_rate": 2.6428910679861852e-06, "loss": 0.0883, "step": 23180 }, { "epoch": 0.685810610989531, "grad_norm": 1.1666523218154907, "learning_rate": 2.6440312772566394e-06, "loss": 0.1049, "step": 23190 }, { "epoch": 0.6861063464837049, "grad_norm": 1.6241375207901, "learning_rate": 2.645171486527094e-06, "loss": 0.0776, "step": 23200 }, { "epoch": 0.686402081977879, "grad_norm": 0.7942156195640564, "learning_rate": 2.646311695797548e-06, "loss": 0.0994, "step": 23210 }, { "epoch": 0.6866978174720529, "grad_norm": 1.5049117803573608, "learning_rate": 2.6474519050680028e-06, "loss": 0.1098, "step": 23220 }, { "epoch": 0.686993552966227, "grad_norm": 1.0538346767425537, "learning_rate": 2.6485921143384573e-06, "loss": 0.0857, "step": 23230 }, { "epoch": 0.687289288460401, "grad_norm": 1.382173776626587, "learning_rate": 2.6497323236089115e-06, "loss": 0.0905, "step": 23240 }, { "epoch": 0.687585023954575, "grad_norm": 1.442427635192871, "learning_rate": 2.650872532879366e-06, "loss": 0.075, "step": 23250 }, { "epoch": 0.687880759448749, "grad_norm": 2.893045663833618, "learning_rate": 2.65201274214982e-06, "loss": 0.1144, "step": 23260 }, { "epoch": 0.6881764949429231, "grad_norm": 1.1186518669128418, "learning_rate": 2.6531529514202744e-06, "loss": 0.085, "step": 23270 }, { "epoch": 0.688472230437097, "grad_norm": 1.5048102140426636, "learning_rate": 2.654293160690729e-06, "loss": 0.0815, "step": 23280 }, { "epoch": 0.6887679659312711, "grad_norm": 1.2397404909133911, "learning_rate": 2.655433369961183e-06, "loss": 0.0772, "step": 23290 }, { "epoch": 0.689063701425445, "grad_norm": 1.6486287117004395, "learning_rate": 2.6565735792316378e-06, "loss": 0.0909, "step": 23300 }, { "epoch": 0.6893594369196191, "grad_norm": 1.8071370124816895, "learning_rate": 2.6577137885020924e-06, "loss": 0.1013, "step": 23310 }, { "epoch": 0.6896551724137931, "grad_norm": 1.2025341987609863, "learning_rate": 2.6588539977725465e-06, "loss": 0.0917, "step": 23320 }, { "epoch": 0.6899509079079671, "grad_norm": 1.3929660320281982, "learning_rate": 2.659994207043001e-06, "loss": 0.0936, "step": 23330 }, { "epoch": 0.6902466434021411, "grad_norm": 1.2760956287384033, "learning_rate": 2.661134416313455e-06, "loss": 0.091, "step": 23340 }, { "epoch": 0.6905423788963151, "grad_norm": 3.49483060836792, "learning_rate": 2.6622746255839094e-06, "loss": 0.0781, "step": 23350 }, { "epoch": 0.6908381143904891, "grad_norm": 1.0919232368469238, "learning_rate": 2.663414834854364e-06, "loss": 0.1008, "step": 23360 }, { "epoch": 0.6911338498846632, "grad_norm": 1.1277590990066528, "learning_rate": 2.664555044124818e-06, "loss": 0.0971, "step": 23370 }, { "epoch": 0.6914295853788371, "grad_norm": 1.1203298568725586, "learning_rate": 2.6656952533952728e-06, "loss": 0.1042, "step": 23380 }, { "epoch": 0.6917253208730112, "grad_norm": 1.244868516921997, "learning_rate": 2.666835462665727e-06, "loss": 0.0827, "step": 23390 }, { "epoch": 0.6920210563671851, "grad_norm": 1.5273033380508423, "learning_rate": 2.6679756719361815e-06, "loss": 0.082, "step": 23400 }, { "epoch": 0.6923167918613592, "grad_norm": 1.495300054550171, "learning_rate": 2.669115881206636e-06, "loss": 0.1287, "step": 23410 }, { "epoch": 0.6926125273555332, "grad_norm": 1.0347561836242676, "learning_rate": 2.67025609047709e-06, "loss": 0.095, "step": 23420 }, { "epoch": 0.6929082628497072, "grad_norm": 1.0184979438781738, "learning_rate": 2.6713962997475445e-06, "loss": 0.1003, "step": 23430 }, { "epoch": 0.6932039983438812, "grad_norm": 1.1742730140686035, "learning_rate": 2.6725365090179986e-06, "loss": 0.0954, "step": 23440 }, { "epoch": 0.6934997338380552, "grad_norm": 1.0063831806182861, "learning_rate": 2.673676718288453e-06, "loss": 0.0667, "step": 23450 }, { "epoch": 0.6937954693322292, "grad_norm": 1.2328897714614868, "learning_rate": 2.674816927558908e-06, "loss": 0.1016, "step": 23460 }, { "epoch": 0.6940912048264033, "grad_norm": 1.5101977586746216, "learning_rate": 2.675957136829362e-06, "loss": 0.0829, "step": 23470 }, { "epoch": 0.6943869403205772, "grad_norm": 2.3903005123138428, "learning_rate": 2.6770973460998166e-06, "loss": 0.0919, "step": 23480 }, { "epoch": 0.6946826758147513, "grad_norm": 1.2816286087036133, "learning_rate": 2.6782375553702707e-06, "loss": 0.0889, "step": 23490 }, { "epoch": 0.6949784113089253, "grad_norm": 1.1974139213562012, "learning_rate": 2.679377764640725e-06, "loss": 0.078, "step": 23500 }, { "epoch": 0.6952741468030993, "grad_norm": 1.1859091520309448, "learning_rate": 2.6805179739111795e-06, "loss": 0.0866, "step": 23510 }, { "epoch": 0.6955698822972733, "grad_norm": 1.3953689336776733, "learning_rate": 2.6816581831816336e-06, "loss": 0.1058, "step": 23520 }, { "epoch": 0.6958656177914473, "grad_norm": 0.65782630443573, "learning_rate": 2.6827983924520882e-06, "loss": 0.1036, "step": 23530 }, { "epoch": 0.6961613532856213, "grad_norm": 1.0151127576828003, "learning_rate": 2.683938601722543e-06, "loss": 0.0779, "step": 23540 }, { "epoch": 0.6964570887797954, "grad_norm": 1.2814488410949707, "learning_rate": 2.685078810992997e-06, "loss": 0.0701, "step": 23550 }, { "epoch": 0.6967528242739693, "grad_norm": 0.9534307718276978, "learning_rate": 2.6862190202634516e-06, "loss": 0.1049, "step": 23560 }, { "epoch": 0.6970485597681434, "grad_norm": 1.8034764528274536, "learning_rate": 2.6873592295339057e-06, "loss": 0.1043, "step": 23570 }, { "epoch": 0.6973442952623173, "grad_norm": 1.7235769033432007, "learning_rate": 2.68849943880436e-06, "loss": 0.0917, "step": 23580 }, { "epoch": 0.6976400307564914, "grad_norm": 0.8634165525436401, "learning_rate": 2.6896396480748145e-06, "loss": 0.0794, "step": 23590 }, { "epoch": 0.6979357662506654, "grad_norm": 1.2819619178771973, "learning_rate": 2.6907798573452687e-06, "loss": 0.0766, "step": 23600 }, { "epoch": 0.6982315017448394, "grad_norm": 0.7924041748046875, "learning_rate": 2.6919200666157232e-06, "loss": 0.1003, "step": 23610 }, { "epoch": 0.6985272372390134, "grad_norm": 1.2122983932495117, "learning_rate": 2.6930602758861774e-06, "loss": 0.0919, "step": 23620 }, { "epoch": 0.6988229727331874, "grad_norm": 1.7616533041000366, "learning_rate": 2.694200485156632e-06, "loss": 0.092, "step": 23630 }, { "epoch": 0.6991187082273614, "grad_norm": 1.9496537446975708, "learning_rate": 2.6953406944270866e-06, "loss": 0.0876, "step": 23640 }, { "epoch": 0.6994144437215355, "grad_norm": 1.4086934328079224, "learning_rate": 2.6964809036975407e-06, "loss": 0.0766, "step": 23650 }, { "epoch": 0.6997101792157094, "grad_norm": 1.1352876424789429, "learning_rate": 2.697621112967995e-06, "loss": 0.1023, "step": 23660 }, { "epoch": 0.7000059147098835, "grad_norm": 1.061160683631897, "learning_rate": 2.698761322238449e-06, "loss": 0.0926, "step": 23670 }, { "epoch": 0.7003016502040575, "grad_norm": 0.9988943338394165, "learning_rate": 2.6999015315089037e-06, "loss": 0.0848, "step": 23680 }, { "epoch": 0.7005973856982315, "grad_norm": 1.501450777053833, "learning_rate": 2.7010417407793583e-06, "loss": 0.0842, "step": 23690 }, { "epoch": 0.7008931211924055, "grad_norm": 1.2661164999008179, "learning_rate": 2.7021819500498124e-06, "loss": 0.0943, "step": 23700 }, { "epoch": 0.7011888566865795, "grad_norm": 1.2219380140304565, "learning_rate": 2.703322159320267e-06, "loss": 0.1043, "step": 23710 }, { "epoch": 0.7014845921807535, "grad_norm": 1.348217487335205, "learning_rate": 2.704462368590721e-06, "loss": 0.0887, "step": 23720 }, { "epoch": 0.7017803276749276, "grad_norm": 1.445313572883606, "learning_rate": 2.7056025778611758e-06, "loss": 0.0899, "step": 23730 }, { "epoch": 0.7020760631691015, "grad_norm": 0.9445814490318298, "learning_rate": 2.70674278713163e-06, "loss": 0.0643, "step": 23740 }, { "epoch": 0.7023717986632756, "grad_norm": 1.2701621055603027, "learning_rate": 2.707882996402084e-06, "loss": 0.0827, "step": 23750 }, { "epoch": 0.7026675341574495, "grad_norm": 1.122534155845642, "learning_rate": 2.7090232056725387e-06, "loss": 0.1001, "step": 23760 }, { "epoch": 0.7029632696516236, "grad_norm": 1.41063392162323, "learning_rate": 2.7101634149429933e-06, "loss": 0.1075, "step": 23770 }, { "epoch": 0.7032590051457976, "grad_norm": 1.80709707736969, "learning_rate": 2.7113036242134474e-06, "loss": 0.1075, "step": 23780 }, { "epoch": 0.7035547406399716, "grad_norm": 0.6884756684303284, "learning_rate": 2.712443833483902e-06, "loss": 0.0717, "step": 23790 }, { "epoch": 0.7038504761341456, "grad_norm": 1.9560043811798096, "learning_rate": 2.713584042754356e-06, "loss": 0.0949, "step": 23800 }, { "epoch": 0.7041462116283196, "grad_norm": 1.0955321788787842, "learning_rate": 2.7147242520248108e-06, "loss": 0.1005, "step": 23810 }, { "epoch": 0.7044419471224936, "grad_norm": 1.6938529014587402, "learning_rate": 2.715864461295265e-06, "loss": 0.0953, "step": 23820 }, { "epoch": 0.7047376826166677, "grad_norm": 1.685395359992981, "learning_rate": 2.717004670565719e-06, "loss": 0.1009, "step": 23830 }, { "epoch": 0.7050334181108416, "grad_norm": 0.8732092976570129, "learning_rate": 2.7181448798361737e-06, "loss": 0.0741, "step": 23840 }, { "epoch": 0.7053291536050157, "grad_norm": 1.4734680652618408, "learning_rate": 2.719285089106628e-06, "loss": 0.0884, "step": 23850 }, { "epoch": 0.7056248890991896, "grad_norm": 1.4608761072158813, "learning_rate": 2.7204252983770825e-06, "loss": 0.097, "step": 23860 }, { "epoch": 0.7059206245933637, "grad_norm": 1.1860668659210205, "learning_rate": 2.721565507647537e-06, "loss": 0.0887, "step": 23870 }, { "epoch": 0.7062163600875377, "grad_norm": 1.3775287866592407, "learning_rate": 2.722705716917991e-06, "loss": 0.0852, "step": 23880 }, { "epoch": 0.7065120955817117, "grad_norm": 1.13843834400177, "learning_rate": 2.723845926188446e-06, "loss": 0.101, "step": 23890 }, { "epoch": 0.7068078310758857, "grad_norm": 0.9824529886245728, "learning_rate": 2.7249861354588995e-06, "loss": 0.0658, "step": 23900 }, { "epoch": 0.7071035665700598, "grad_norm": 0.9684402346611023, "learning_rate": 2.726126344729354e-06, "loss": 0.1211, "step": 23910 }, { "epoch": 0.7073993020642337, "grad_norm": 0.8084964156150818, "learning_rate": 2.7272665539998087e-06, "loss": 0.0823, "step": 23920 }, { "epoch": 0.7076950375584078, "grad_norm": 1.571868896484375, "learning_rate": 2.728406763270263e-06, "loss": 0.0929, "step": 23930 }, { "epoch": 0.7079907730525817, "grad_norm": 2.081064224243164, "learning_rate": 2.7295469725407175e-06, "loss": 0.0836, "step": 23940 }, { "epoch": 0.7082865085467558, "grad_norm": 1.1115031242370605, "learning_rate": 2.7306871818111716e-06, "loss": 0.0851, "step": 23950 }, { "epoch": 0.7085822440409298, "grad_norm": 1.1370114088058472, "learning_rate": 2.7318273910816262e-06, "loss": 0.1128, "step": 23960 }, { "epoch": 0.7088779795351038, "grad_norm": 0.9510841965675354, "learning_rate": 2.732967600352081e-06, "loss": 0.094, "step": 23970 }, { "epoch": 0.7091737150292778, "grad_norm": 0.9213568568229675, "learning_rate": 2.7341078096225345e-06, "loss": 0.1113, "step": 23980 }, { "epoch": 0.7094694505234518, "grad_norm": 1.6634730100631714, "learning_rate": 2.735248018892989e-06, "loss": 0.0761, "step": 23990 }, { "epoch": 0.7097651860176258, "grad_norm": 0.9735066890716553, "learning_rate": 2.7363882281634437e-06, "loss": 0.0637, "step": 24000 }, { "epoch": 0.7100609215117999, "grad_norm": 1.2136363983154297, "learning_rate": 2.737528437433898e-06, "loss": 0.0958, "step": 24010 }, { "epoch": 0.7103566570059738, "grad_norm": 1.5504214763641357, "learning_rate": 2.7386686467043525e-06, "loss": 0.0903, "step": 24020 }, { "epoch": 0.7106523925001479, "grad_norm": 0.6369165778160095, "learning_rate": 2.7398088559748066e-06, "loss": 0.0871, "step": 24030 }, { "epoch": 0.7109481279943218, "grad_norm": 1.466933250427246, "learning_rate": 2.7409490652452612e-06, "loss": 0.0933, "step": 24040 }, { "epoch": 0.7112438634884959, "grad_norm": 0.9235086441040039, "learning_rate": 2.742089274515716e-06, "loss": 0.0862, "step": 24050 }, { "epoch": 0.7115395989826699, "grad_norm": 1.0135095119476318, "learning_rate": 2.7432294837861696e-06, "loss": 0.1018, "step": 24060 }, { "epoch": 0.7118353344768439, "grad_norm": 1.2102148532867432, "learning_rate": 2.744369693056624e-06, "loss": 0.1013, "step": 24070 }, { "epoch": 0.7121310699710179, "grad_norm": 1.2102634906768799, "learning_rate": 2.7455099023270783e-06, "loss": 0.0872, "step": 24080 }, { "epoch": 0.7124268054651919, "grad_norm": 1.5778299570083618, "learning_rate": 2.746650111597533e-06, "loss": 0.1014, "step": 24090 }, { "epoch": 0.7127225409593659, "grad_norm": 1.1201860904693604, "learning_rate": 2.7477903208679875e-06, "loss": 0.0847, "step": 24100 }, { "epoch": 0.71301827645354, "grad_norm": 1.3030601739883423, "learning_rate": 2.7489305301384417e-06, "loss": 0.1123, "step": 24110 }, { "epoch": 0.7133140119477139, "grad_norm": 2.209726333618164, "learning_rate": 2.7500707394088963e-06, "loss": 0.1018, "step": 24120 }, { "epoch": 0.713609747441888, "grad_norm": 1.001601219177246, "learning_rate": 2.7512109486793504e-06, "loss": 0.0986, "step": 24130 }, { "epoch": 0.713905482936062, "grad_norm": 0.45531538128852844, "learning_rate": 2.7523511579498046e-06, "loss": 0.0728, "step": 24140 }, { "epoch": 0.714201218430236, "grad_norm": 1.24623441696167, "learning_rate": 2.753491367220259e-06, "loss": 0.0846, "step": 24150 }, { "epoch": 0.71449695392441, "grad_norm": 1.2789989709854126, "learning_rate": 2.7546315764907133e-06, "loss": 0.1039, "step": 24160 }, { "epoch": 0.714792689418584, "grad_norm": 1.4240500926971436, "learning_rate": 2.755771785761168e-06, "loss": 0.0925, "step": 24170 }, { "epoch": 0.715088424912758, "grad_norm": 1.232435703277588, "learning_rate": 2.756911995031622e-06, "loss": 0.0893, "step": 24180 }, { "epoch": 0.7153841604069321, "grad_norm": 0.6558427214622498, "learning_rate": 2.7580522043020767e-06, "loss": 0.1024, "step": 24190 }, { "epoch": 0.715679895901106, "grad_norm": 1.095934510231018, "learning_rate": 2.7591924135725313e-06, "loss": 0.0896, "step": 24200 }, { "epoch": 0.7159756313952801, "grad_norm": 0.9959419369697571, "learning_rate": 2.7603326228429854e-06, "loss": 0.0895, "step": 24210 }, { "epoch": 0.716271366889454, "grad_norm": 1.5774708986282349, "learning_rate": 2.76147283211344e-06, "loss": 0.1085, "step": 24220 }, { "epoch": 0.7165671023836281, "grad_norm": 1.1627031564712524, "learning_rate": 2.762613041383894e-06, "loss": 0.1052, "step": 24230 }, { "epoch": 0.7168628378778021, "grad_norm": 1.3784029483795166, "learning_rate": 2.7637532506543483e-06, "loss": 0.075, "step": 24240 }, { "epoch": 0.7171585733719761, "grad_norm": 1.6509114503860474, "learning_rate": 2.764893459924803e-06, "loss": 0.0693, "step": 24250 }, { "epoch": 0.7174543088661501, "grad_norm": 1.317009449005127, "learning_rate": 2.766033669195257e-06, "loss": 0.0846, "step": 24260 }, { "epoch": 0.7177500443603241, "grad_norm": 1.3492532968521118, "learning_rate": 2.7671738784657117e-06, "loss": 0.1043, "step": 24270 }, { "epoch": 0.7180457798544981, "grad_norm": 1.1568584442138672, "learning_rate": 2.7683140877361663e-06, "loss": 0.1069, "step": 24280 }, { "epoch": 0.7183415153486722, "grad_norm": 0.9267634749412537, "learning_rate": 2.7694542970066204e-06, "loss": 0.0992, "step": 24290 }, { "epoch": 0.7186372508428461, "grad_norm": 0.7808639407157898, "learning_rate": 2.770594506277075e-06, "loss": 0.0757, "step": 24300 }, { "epoch": 0.7189329863370202, "grad_norm": 1.2195206880569458, "learning_rate": 2.7717347155475288e-06, "loss": 0.1084, "step": 24310 }, { "epoch": 0.7192287218311941, "grad_norm": 1.0014675855636597, "learning_rate": 2.7728749248179834e-06, "loss": 0.0876, "step": 24320 }, { "epoch": 0.7195244573253682, "grad_norm": 1.301316261291504, "learning_rate": 2.774015134088438e-06, "loss": 0.0947, "step": 24330 }, { "epoch": 0.7198201928195422, "grad_norm": 1.371716856956482, "learning_rate": 2.775155343358892e-06, "loss": 0.0646, "step": 24340 }, { "epoch": 0.7201159283137162, "grad_norm": 1.7973592281341553, "learning_rate": 2.7762955526293467e-06, "loss": 0.0908, "step": 24350 }, { "epoch": 0.7204116638078902, "grad_norm": 1.6999304294586182, "learning_rate": 2.777435761899801e-06, "loss": 0.1104, "step": 24360 }, { "epoch": 0.7207073993020643, "grad_norm": 1.6610709428787231, "learning_rate": 2.7785759711702555e-06, "loss": 0.0983, "step": 24370 }, { "epoch": 0.7210031347962382, "grad_norm": 1.0936553478240967, "learning_rate": 2.77971618044071e-06, "loss": 0.0917, "step": 24380 }, { "epoch": 0.7212988702904123, "grad_norm": 1.0841227769851685, "learning_rate": 2.780856389711164e-06, "loss": 0.0795, "step": 24390 }, { "epoch": 0.7215946057845862, "grad_norm": 0.8816017508506775, "learning_rate": 2.7819965989816184e-06, "loss": 0.0693, "step": 24400 }, { "epoch": 0.7218903412787603, "grad_norm": 1.0556942224502563, "learning_rate": 2.7831368082520725e-06, "loss": 0.1088, "step": 24410 }, { "epoch": 0.7221860767729343, "grad_norm": 1.6331357955932617, "learning_rate": 2.784277017522527e-06, "loss": 0.0962, "step": 24420 }, { "epoch": 0.7224818122671083, "grad_norm": 1.123669147491455, "learning_rate": 2.7854172267929817e-06, "loss": 0.0836, "step": 24430 }, { "epoch": 0.7227775477612823, "grad_norm": 1.1580137014389038, "learning_rate": 2.786557436063436e-06, "loss": 0.0915, "step": 24440 }, { "epoch": 0.7230732832554563, "grad_norm": 0.9466308355331421, "learning_rate": 2.7876976453338905e-06, "loss": 0.0819, "step": 24450 }, { "epoch": 0.7233690187496303, "grad_norm": 1.5986449718475342, "learning_rate": 2.788837854604345e-06, "loss": 0.1049, "step": 24460 }, { "epoch": 0.7236647542438044, "grad_norm": 1.6866501569747925, "learning_rate": 2.789978063874799e-06, "loss": 0.1102, "step": 24470 }, { "epoch": 0.7239604897379783, "grad_norm": 1.819139003753662, "learning_rate": 2.7911182731452534e-06, "loss": 0.0992, "step": 24480 }, { "epoch": 0.7242562252321524, "grad_norm": 0.9972324967384338, "learning_rate": 2.7922584824157076e-06, "loss": 0.0952, "step": 24490 }, { "epoch": 0.7245519607263263, "grad_norm": 1.7941335439682007, "learning_rate": 2.793398691686162e-06, "loss": 0.0818, "step": 24500 }, { "epoch": 0.7248476962205004, "grad_norm": 1.7904554605484009, "learning_rate": 2.7945389009566167e-06, "loss": 0.0852, "step": 24510 }, { "epoch": 0.7251434317146744, "grad_norm": 1.4754815101623535, "learning_rate": 2.795679110227071e-06, "loss": 0.1143, "step": 24520 }, { "epoch": 0.7254391672088484, "grad_norm": 1.2339415550231934, "learning_rate": 2.7968193194975255e-06, "loss": 0.0987, "step": 24530 }, { "epoch": 0.7257349027030224, "grad_norm": 1.643710970878601, "learning_rate": 2.7979595287679797e-06, "loss": 0.1011, "step": 24540 }, { "epoch": 0.7260306381971964, "grad_norm": 0.9363278746604919, "learning_rate": 2.799099738038434e-06, "loss": 0.0763, "step": 24550 }, { "epoch": 0.7263263736913704, "grad_norm": 1.3736003637313843, "learning_rate": 2.8002399473088884e-06, "loss": 0.1011, "step": 24560 }, { "epoch": 0.7266221091855445, "grad_norm": 1.0528206825256348, "learning_rate": 2.8013801565793426e-06, "loss": 0.0855, "step": 24570 }, { "epoch": 0.7269178446797184, "grad_norm": 1.0755879878997803, "learning_rate": 2.802520365849797e-06, "loss": 0.094, "step": 24580 }, { "epoch": 0.7272135801738925, "grad_norm": 1.3695952892303467, "learning_rate": 2.8036605751202513e-06, "loss": 0.0812, "step": 24590 }, { "epoch": 0.7275093156680665, "grad_norm": 1.5674622058868408, "learning_rate": 2.804800784390706e-06, "loss": 0.0665, "step": 24600 }, { "epoch": 0.7278050511622405, "grad_norm": 0.9054242372512817, "learning_rate": 2.8059409936611605e-06, "loss": 0.0912, "step": 24610 }, { "epoch": 0.7281007866564145, "grad_norm": 0.9240153431892395, "learning_rate": 2.8070812029316147e-06, "loss": 0.087, "step": 24620 }, { "epoch": 0.7283965221505885, "grad_norm": 1.142526388168335, "learning_rate": 2.808221412202069e-06, "loss": 0.0974, "step": 24630 }, { "epoch": 0.7286922576447625, "grad_norm": 1.1087926626205444, "learning_rate": 2.809361621472523e-06, "loss": 0.1024, "step": 24640 }, { "epoch": 0.7289879931389366, "grad_norm": 2.097843647003174, "learning_rate": 2.8105018307429776e-06, "loss": 0.0985, "step": 24650 }, { "epoch": 0.7292837286331105, "grad_norm": 1.149888277053833, "learning_rate": 2.811642040013432e-06, "loss": 0.1009, "step": 24660 }, { "epoch": 0.7295794641272846, "grad_norm": 1.0015342235565186, "learning_rate": 2.8127822492838863e-06, "loss": 0.0859, "step": 24670 }, { "epoch": 0.7298751996214585, "grad_norm": 0.8552561402320862, "learning_rate": 2.813922458554341e-06, "loss": 0.0801, "step": 24680 }, { "epoch": 0.7301709351156326, "grad_norm": 0.8021762371063232, "learning_rate": 2.8150626678247955e-06, "loss": 0.0882, "step": 24690 }, { "epoch": 0.7304666706098066, "grad_norm": 1.2808499336242676, "learning_rate": 2.8162028770952497e-06, "loss": 0.0731, "step": 24700 }, { "epoch": 0.7307624061039806, "grad_norm": 2.1858139038085938, "learning_rate": 2.817343086365704e-06, "loss": 0.0994, "step": 24710 }, { "epoch": 0.7310581415981546, "grad_norm": 1.3759225606918335, "learning_rate": 2.818483295636158e-06, "loss": 0.086, "step": 24720 }, { "epoch": 0.7313538770923286, "grad_norm": 1.3475329875946045, "learning_rate": 2.8196235049066126e-06, "loss": 0.1018, "step": 24730 }, { "epoch": 0.7316496125865026, "grad_norm": 0.9750885963439941, "learning_rate": 2.820763714177067e-06, "loss": 0.0805, "step": 24740 }, { "epoch": 0.7319453480806767, "grad_norm": 1.316325068473816, "learning_rate": 2.8219039234475214e-06, "loss": 0.0647, "step": 24750 }, { "epoch": 0.7322410835748506, "grad_norm": 1.1603143215179443, "learning_rate": 2.823044132717976e-06, "loss": 0.0922, "step": 24760 }, { "epoch": 0.7325368190690247, "grad_norm": 1.4993501901626587, "learning_rate": 2.82418434198843e-06, "loss": 0.0786, "step": 24770 }, { "epoch": 0.7328325545631986, "grad_norm": 1.5520074367523193, "learning_rate": 2.8253245512588847e-06, "loss": 0.0869, "step": 24780 }, { "epoch": 0.7331282900573727, "grad_norm": 1.2877672910690308, "learning_rate": 2.826464760529339e-06, "loss": 0.098, "step": 24790 }, { "epoch": 0.7334240255515467, "grad_norm": 1.304001808166504, "learning_rate": 2.827604969799793e-06, "loss": 0.0815, "step": 24800 }, { "epoch": 0.7337197610457207, "grad_norm": 1.412631869316101, "learning_rate": 2.8287451790702476e-06, "loss": 0.0963, "step": 24810 }, { "epoch": 0.7340154965398947, "grad_norm": 1.1827056407928467, "learning_rate": 2.8298853883407018e-06, "loss": 0.1139, "step": 24820 }, { "epoch": 0.7343112320340688, "grad_norm": 1.4482265710830688, "learning_rate": 2.8310255976111564e-06, "loss": 0.0816, "step": 24830 }, { "epoch": 0.7346069675282427, "grad_norm": 1.070608139038086, "learning_rate": 2.832165806881611e-06, "loss": 0.089, "step": 24840 }, { "epoch": 0.7349027030224168, "grad_norm": 0.8948710560798645, "learning_rate": 2.833306016152065e-06, "loss": 0.1001, "step": 24850 }, { "epoch": 0.7351984385165907, "grad_norm": 1.130915880203247, "learning_rate": 2.8344462254225197e-06, "loss": 0.1121, "step": 24860 }, { "epoch": 0.7354941740107648, "grad_norm": 0.8922879099845886, "learning_rate": 2.8355864346929735e-06, "loss": 0.0971, "step": 24870 }, { "epoch": 0.7357899095049388, "grad_norm": 1.008288025856018, "learning_rate": 2.836726643963428e-06, "loss": 0.0954, "step": 24880 }, { "epoch": 0.7360856449991128, "grad_norm": 1.2910797595977783, "learning_rate": 2.8378668532338826e-06, "loss": 0.093, "step": 24890 }, { "epoch": 0.7363813804932868, "grad_norm": 1.2950761318206787, "learning_rate": 2.839007062504337e-06, "loss": 0.0715, "step": 24900 }, { "epoch": 0.7366771159874608, "grad_norm": 2.052187442779541, "learning_rate": 2.8401472717747914e-06, "loss": 0.1082, "step": 24910 }, { "epoch": 0.7369728514816348, "grad_norm": 1.125252366065979, "learning_rate": 2.841287481045246e-06, "loss": 0.1017, "step": 24920 }, { "epoch": 0.7372685869758089, "grad_norm": 0.9529716372489929, "learning_rate": 2.8424276903157e-06, "loss": 0.1037, "step": 24930 }, { "epoch": 0.7375643224699828, "grad_norm": 0.925663411617279, "learning_rate": 2.8435678995861547e-06, "loss": 0.094, "step": 24940 }, { "epoch": 0.7378600579641569, "grad_norm": 1.3966381549835205, "learning_rate": 2.8447081088566085e-06, "loss": 0.0884, "step": 24950 }, { "epoch": 0.7381557934583308, "grad_norm": 1.1854156255722046, "learning_rate": 2.845848318127063e-06, "loss": 0.1014, "step": 24960 }, { "epoch": 0.7384515289525049, "grad_norm": 0.9084684252738953, "learning_rate": 2.8469885273975177e-06, "loss": 0.0922, "step": 24970 }, { "epoch": 0.7387472644466789, "grad_norm": 1.1569252014160156, "learning_rate": 2.848128736667972e-06, "loss": 0.0897, "step": 24980 }, { "epoch": 0.7390429999408529, "grad_norm": 0.6160550713539124, "learning_rate": 2.8492689459384264e-06, "loss": 0.073, "step": 24990 }, { "epoch": 0.7393387354350269, "grad_norm": 1.3496237993240356, "learning_rate": 2.8504091552088806e-06, "loss": 0.0797, "step": 25000 }, { "epoch": 0.7396344709292009, "grad_norm": 0.9435704350471497, "learning_rate": 2.851549364479335e-06, "loss": 0.1043, "step": 25010 }, { "epoch": 0.7399302064233749, "grad_norm": 1.490173578262329, "learning_rate": 2.8526895737497897e-06, "loss": 0.1032, "step": 25020 }, { "epoch": 0.740225941917549, "grad_norm": 1.0545930862426758, "learning_rate": 2.8538297830202435e-06, "loss": 0.1117, "step": 25030 }, { "epoch": 0.7405216774117229, "grad_norm": 1.1554844379425049, "learning_rate": 2.854969992290698e-06, "loss": 0.0958, "step": 25040 }, { "epoch": 0.740817412905897, "grad_norm": 0.7837294340133667, "learning_rate": 2.8561102015611522e-06, "loss": 0.0853, "step": 25050 }, { "epoch": 0.741113148400071, "grad_norm": 1.0572500228881836, "learning_rate": 2.857250410831607e-06, "loss": 0.1078, "step": 25060 }, { "epoch": 0.741408883894245, "grad_norm": 0.9203202128410339, "learning_rate": 2.8583906201020614e-06, "loss": 0.1044, "step": 25070 }, { "epoch": 0.741704619388419, "grad_norm": 1.1146682500839233, "learning_rate": 2.8595308293725156e-06, "loss": 0.0967, "step": 25080 }, { "epoch": 0.742000354882593, "grad_norm": 0.8190927505493164, "learning_rate": 2.86067103864297e-06, "loss": 0.0751, "step": 25090 }, { "epoch": 0.742296090376767, "grad_norm": 1.6835379600524902, "learning_rate": 2.8618112479134243e-06, "loss": 0.0772, "step": 25100 }, { "epoch": 0.7425918258709411, "grad_norm": 1.047202467918396, "learning_rate": 2.8629514571838785e-06, "loss": 0.1116, "step": 25110 }, { "epoch": 0.742887561365115, "grad_norm": 0.9684354662895203, "learning_rate": 2.864091666454333e-06, "loss": 0.0764, "step": 25120 }, { "epoch": 0.7431832968592891, "grad_norm": 1.0098894834518433, "learning_rate": 2.8652318757247873e-06, "loss": 0.0775, "step": 25130 }, { "epoch": 0.743479032353463, "grad_norm": 1.8651385307312012, "learning_rate": 2.866372084995242e-06, "loss": 0.097, "step": 25140 }, { "epoch": 0.7437747678476371, "grad_norm": 1.6353555917739868, "learning_rate": 2.8675122942656964e-06, "loss": 0.0799, "step": 25150 }, { "epoch": 0.7440705033418111, "grad_norm": 1.2331206798553467, "learning_rate": 2.8686525035361506e-06, "loss": 0.1053, "step": 25160 }, { "epoch": 0.7443662388359851, "grad_norm": 1.4080547094345093, "learning_rate": 2.869792712806605e-06, "loss": 0.1065, "step": 25170 }, { "epoch": 0.7446619743301591, "grad_norm": 1.2596946954727173, "learning_rate": 2.8709329220770594e-06, "loss": 0.0923, "step": 25180 }, { "epoch": 0.7449577098243331, "grad_norm": 0.8599788546562195, "learning_rate": 2.8720731313475135e-06, "loss": 0.0994, "step": 25190 }, { "epoch": 0.7452534453185071, "grad_norm": 2.820988178253174, "learning_rate": 2.873213340617968e-06, "loss": 0.0827, "step": 25200 }, { "epoch": 0.7455491808126812, "grad_norm": 0.934920072555542, "learning_rate": 2.8743535498884223e-06, "loss": 0.0969, "step": 25210 }, { "epoch": 0.7458449163068551, "grad_norm": 1.2486066818237305, "learning_rate": 2.875493759158877e-06, "loss": 0.0923, "step": 25220 }, { "epoch": 0.7461406518010292, "grad_norm": 1.053541898727417, "learning_rate": 2.876633968429331e-06, "loss": 0.0976, "step": 25230 }, { "epoch": 0.7464363872952031, "grad_norm": 0.9272991418838501, "learning_rate": 2.8777741776997856e-06, "loss": 0.0835, "step": 25240 }, { "epoch": 0.7467321227893772, "grad_norm": 1.33474600315094, "learning_rate": 2.87891438697024e-06, "loss": 0.0817, "step": 25250 }, { "epoch": 0.7470278582835512, "grad_norm": 1.1773601770401, "learning_rate": 2.8800545962406944e-06, "loss": 0.0974, "step": 25260 }, { "epoch": 0.7473235937777252, "grad_norm": 0.7001953125, "learning_rate": 2.8811948055111485e-06, "loss": 0.0955, "step": 25270 }, { "epoch": 0.7476193292718992, "grad_norm": 0.8447529673576355, "learning_rate": 2.8823350147816027e-06, "loss": 0.1006, "step": 25280 }, { "epoch": 0.7479150647660733, "grad_norm": 0.6471728086471558, "learning_rate": 2.8834752240520573e-06, "loss": 0.0753, "step": 25290 }, { "epoch": 0.7482108002602472, "grad_norm": 0.9310383200645447, "learning_rate": 2.884615433322512e-06, "loss": 0.074, "step": 25300 }, { "epoch": 0.7485065357544213, "grad_norm": 1.0681833028793335, "learning_rate": 2.885755642592966e-06, "loss": 0.111, "step": 25310 }, { "epoch": 0.7488022712485952, "grad_norm": 0.7911792397499084, "learning_rate": 2.8868958518634206e-06, "loss": 0.1103, "step": 25320 }, { "epoch": 0.7490980067427693, "grad_norm": 1.1569491624832153, "learning_rate": 2.888036061133875e-06, "loss": 0.1086, "step": 25330 }, { "epoch": 0.7493937422369433, "grad_norm": 1.2394909858703613, "learning_rate": 2.8891762704043294e-06, "loss": 0.0941, "step": 25340 }, { "epoch": 0.7496894777311173, "grad_norm": 1.3976503610610962, "learning_rate": 2.8903164796747836e-06, "loss": 0.0833, "step": 25350 }, { "epoch": 0.7499852132252913, "grad_norm": 0.7612642645835876, "learning_rate": 2.8914566889452377e-06, "loss": 0.0918, "step": 25360 }, { "epoch": 0.7502809487194653, "grad_norm": 0.9746830463409424, "learning_rate": 2.8925968982156923e-06, "loss": 0.1153, "step": 25370 }, { "epoch": 0.7505766842136393, "grad_norm": 0.9375082850456238, "learning_rate": 2.893737107486147e-06, "loss": 0.1074, "step": 25380 }, { "epoch": 0.7508724197078134, "grad_norm": 1.148040771484375, "learning_rate": 2.894877316756601e-06, "loss": 0.0983, "step": 25390 }, { "epoch": 0.7511681552019873, "grad_norm": 1.5275120735168457, "learning_rate": 2.8960175260270556e-06, "loss": 0.0878, "step": 25400 }, { "epoch": 0.7514638906961614, "grad_norm": 1.1930468082427979, "learning_rate": 2.89715773529751e-06, "loss": 0.1201, "step": 25410 }, { "epoch": 0.7517596261903353, "grad_norm": 1.0430949926376343, "learning_rate": 2.8982979445679644e-06, "loss": 0.0962, "step": 25420 }, { "epoch": 0.7520553616845094, "grad_norm": 1.197619915008545, "learning_rate": 2.899438153838419e-06, "loss": 0.0821, "step": 25430 }, { "epoch": 0.7523510971786834, "grad_norm": 0.8453516364097595, "learning_rate": 2.9005783631088727e-06, "loss": 0.0958, "step": 25440 }, { "epoch": 0.7526468326728574, "grad_norm": 1.1505236625671387, "learning_rate": 2.9017185723793273e-06, "loss": 0.0721, "step": 25450 }, { "epoch": 0.7529425681670314, "grad_norm": 1.3016102313995361, "learning_rate": 2.9028587816497815e-06, "loss": 0.1118, "step": 25460 }, { "epoch": 0.7532383036612054, "grad_norm": 1.0277715921401978, "learning_rate": 2.903998990920236e-06, "loss": 0.0887, "step": 25470 }, { "epoch": 0.7535340391553794, "grad_norm": 1.2570692300796509, "learning_rate": 2.9051392001906907e-06, "loss": 0.1062, "step": 25480 }, { "epoch": 0.7538297746495535, "grad_norm": 1.6548289060592651, "learning_rate": 2.906279409461145e-06, "loss": 0.0985, "step": 25490 }, { "epoch": 0.7541255101437274, "grad_norm": 1.744871973991394, "learning_rate": 2.9074196187315994e-06, "loss": 0.0826, "step": 25500 }, { "epoch": 0.7544212456379015, "grad_norm": 1.5844391584396362, "learning_rate": 2.9085598280020536e-06, "loss": 0.1042, "step": 25510 }, { "epoch": 0.7547169811320755, "grad_norm": 0.8991546034812927, "learning_rate": 2.9097000372725077e-06, "loss": 0.1003, "step": 25520 }, { "epoch": 0.7550127166262495, "grad_norm": 1.520158290863037, "learning_rate": 2.9108402465429623e-06, "loss": 0.0856, "step": 25530 }, { "epoch": 0.7553084521204235, "grad_norm": 1.5480539798736572, "learning_rate": 2.9119804558134165e-06, "loss": 0.0917, "step": 25540 }, { "epoch": 0.7556041876145975, "grad_norm": 1.1505800485610962, "learning_rate": 2.913120665083871e-06, "loss": 0.0819, "step": 25550 }, { "epoch": 0.7558999231087715, "grad_norm": 1.860944151878357, "learning_rate": 2.9142608743543253e-06, "loss": 0.097, "step": 25560 }, { "epoch": 0.7561956586029456, "grad_norm": 1.227772831916809, "learning_rate": 2.91540108362478e-06, "loss": 0.0881, "step": 25570 }, { "epoch": 0.7564913940971195, "grad_norm": 1.440264105796814, "learning_rate": 2.9165412928952344e-06, "loss": 0.0932, "step": 25580 }, { "epoch": 0.7567871295912936, "grad_norm": 1.2887834310531616, "learning_rate": 2.9176815021656886e-06, "loss": 0.0921, "step": 25590 }, { "epoch": 0.7570828650854675, "grad_norm": 1.4238213300704956, "learning_rate": 2.9188217114361428e-06, "loss": 0.0805, "step": 25600 }, { "epoch": 0.7573786005796416, "grad_norm": 1.610224962234497, "learning_rate": 2.9199619207065974e-06, "loss": 0.122, "step": 25610 }, { "epoch": 0.7576743360738156, "grad_norm": 0.9776206612586975, "learning_rate": 2.9211021299770515e-06, "loss": 0.0965, "step": 25620 }, { "epoch": 0.7579700715679896, "grad_norm": 1.2370860576629639, "learning_rate": 2.922242339247506e-06, "loss": 0.1012, "step": 25630 }, { "epoch": 0.7582658070621636, "grad_norm": 1.1177644729614258, "learning_rate": 2.9233825485179603e-06, "loss": 0.1004, "step": 25640 }, { "epoch": 0.7585615425563376, "grad_norm": 1.1794970035552979, "learning_rate": 2.924522757788415e-06, "loss": 0.0793, "step": 25650 }, { "epoch": 0.7588572780505116, "grad_norm": 2.1593167781829834, "learning_rate": 2.9256629670588694e-06, "loss": 0.1217, "step": 25660 }, { "epoch": 0.7591530135446857, "grad_norm": 1.0088281631469727, "learning_rate": 2.9268031763293236e-06, "loss": 0.088, "step": 25670 }, { "epoch": 0.7594487490388596, "grad_norm": 1.1076277494430542, "learning_rate": 2.9279433855997778e-06, "loss": 0.0853, "step": 25680 }, { "epoch": 0.7597444845330337, "grad_norm": 1.306689977645874, "learning_rate": 2.929083594870232e-06, "loss": 0.1029, "step": 25690 }, { "epoch": 0.7600402200272076, "grad_norm": 1.4249025583267212, "learning_rate": 2.9302238041406865e-06, "loss": 0.0745, "step": 25700 }, { "epoch": 0.7603359555213817, "grad_norm": 0.930271327495575, "learning_rate": 2.931364013411141e-06, "loss": 0.1131, "step": 25710 }, { "epoch": 0.7606316910155557, "grad_norm": 1.471337080001831, "learning_rate": 2.9325042226815953e-06, "loss": 0.0998, "step": 25720 }, { "epoch": 0.7609274265097297, "grad_norm": 1.3374128341674805, "learning_rate": 2.93364443195205e-06, "loss": 0.1019, "step": 25730 }, { "epoch": 0.7612231620039037, "grad_norm": 1.822015643119812, "learning_rate": 2.934784641222504e-06, "loss": 0.0937, "step": 25740 }, { "epoch": 0.7615188974980778, "grad_norm": 1.319510817527771, "learning_rate": 2.9359248504929586e-06, "loss": 0.0889, "step": 25750 }, { "epoch": 0.7618146329922517, "grad_norm": 1.0160259008407593, "learning_rate": 2.937065059763413e-06, "loss": 0.1189, "step": 25760 }, { "epoch": 0.7621103684864258, "grad_norm": 1.2177740335464478, "learning_rate": 2.938205269033867e-06, "loss": 0.0974, "step": 25770 }, { "epoch": 0.7624061039805997, "grad_norm": 0.8945146799087524, "learning_rate": 2.9393454783043215e-06, "loss": 0.0935, "step": 25780 }, { "epoch": 0.7627018394747738, "grad_norm": 1.3086854219436646, "learning_rate": 2.9404856875747757e-06, "loss": 0.1006, "step": 25790 }, { "epoch": 0.7629975749689478, "grad_norm": 1.362432837486267, "learning_rate": 2.9416258968452303e-06, "loss": 0.0786, "step": 25800 }, { "epoch": 0.7632933104631218, "grad_norm": 0.9976385235786438, "learning_rate": 2.942766106115685e-06, "loss": 0.0922, "step": 25810 }, { "epoch": 0.7635890459572958, "grad_norm": 1.7899974584579468, "learning_rate": 2.943906315386139e-06, "loss": 0.098, "step": 25820 }, { "epoch": 0.7638847814514698, "grad_norm": 0.9098941683769226, "learning_rate": 2.9450465246565936e-06, "loss": 0.1005, "step": 25830 }, { "epoch": 0.7641805169456438, "grad_norm": 0.8672398924827576, "learning_rate": 2.946186733927048e-06, "loss": 0.0796, "step": 25840 }, { "epoch": 0.7644762524398179, "grad_norm": 1.1863280534744263, "learning_rate": 2.947326943197502e-06, "loss": 0.0696, "step": 25850 }, { "epoch": 0.7647719879339918, "grad_norm": 0.877158522605896, "learning_rate": 2.9484671524679566e-06, "loss": 0.1133, "step": 25860 }, { "epoch": 0.7650677234281659, "grad_norm": 1.4138656854629517, "learning_rate": 2.9496073617384107e-06, "loss": 0.1077, "step": 25870 }, { "epoch": 0.7653634589223398, "grad_norm": 0.8308823108673096, "learning_rate": 2.9507475710088653e-06, "loss": 0.0891, "step": 25880 }, { "epoch": 0.7656591944165139, "grad_norm": 0.5036703944206238, "learning_rate": 2.95188778027932e-06, "loss": 0.0942, "step": 25890 }, { "epoch": 0.7659549299106879, "grad_norm": 1.5472996234893799, "learning_rate": 2.953027989549774e-06, "loss": 0.0936, "step": 25900 }, { "epoch": 0.7662506654048619, "grad_norm": 0.8652152419090271, "learning_rate": 2.9541681988202287e-06, "loss": 0.0964, "step": 25910 }, { "epoch": 0.7665464008990359, "grad_norm": 1.025137186050415, "learning_rate": 2.9553084080906824e-06, "loss": 0.0851, "step": 25920 }, { "epoch": 0.7668421363932099, "grad_norm": 0.9931901097297668, "learning_rate": 2.956448617361137e-06, "loss": 0.0929, "step": 25930 }, { "epoch": 0.7671378718873839, "grad_norm": 1.1295571327209473, "learning_rate": 2.9575888266315916e-06, "loss": 0.0806, "step": 25940 }, { "epoch": 0.767433607381558, "grad_norm": 1.9767591953277588, "learning_rate": 2.9587290359020457e-06, "loss": 0.077, "step": 25950 }, { "epoch": 0.7677293428757319, "grad_norm": 0.6752023100852966, "learning_rate": 2.9598692451725003e-06, "loss": 0.0973, "step": 25960 }, { "epoch": 0.768025078369906, "grad_norm": 0.8668859601020813, "learning_rate": 2.9610094544429545e-06, "loss": 0.1143, "step": 25970 }, { "epoch": 0.76832081386408, "grad_norm": 1.42691171169281, "learning_rate": 2.962149663713409e-06, "loss": 0.0855, "step": 25980 }, { "epoch": 0.768616549358254, "grad_norm": 0.7759552001953125, "learning_rate": 2.9632898729838637e-06, "loss": 0.0687, "step": 25990 }, { "epoch": 0.768912284852428, "grad_norm": 2.058809757232666, "learning_rate": 2.9644300822543174e-06, "loss": 0.1031, "step": 26000 }, { "epoch": 0.769208020346602, "grad_norm": 1.0842152833938599, "learning_rate": 2.965570291524772e-06, "loss": 0.1136, "step": 26010 }, { "epoch": 0.769503755840776, "grad_norm": 1.377092719078064, "learning_rate": 2.966710500795226e-06, "loss": 0.0999, "step": 26020 }, { "epoch": 0.7697994913349501, "grad_norm": 1.2713805437088013, "learning_rate": 2.9678507100656808e-06, "loss": 0.0953, "step": 26030 }, { "epoch": 0.770095226829124, "grad_norm": 1.3965559005737305, "learning_rate": 2.9689909193361353e-06, "loss": 0.1007, "step": 26040 }, { "epoch": 0.7703909623232981, "grad_norm": 1.0042457580566406, "learning_rate": 2.9701311286065895e-06, "loss": 0.0918, "step": 26050 }, { "epoch": 0.770686697817472, "grad_norm": 1.0876840353012085, "learning_rate": 2.971271337877044e-06, "loss": 0.1162, "step": 26060 }, { "epoch": 0.7709824333116461, "grad_norm": 0.9618350267410278, "learning_rate": 2.9724115471474987e-06, "loss": 0.0923, "step": 26070 }, { "epoch": 0.7712781688058201, "grad_norm": 1.0993800163269043, "learning_rate": 2.9735517564179524e-06, "loss": 0.0997, "step": 26080 }, { "epoch": 0.7715739042999941, "grad_norm": 1.3930983543395996, "learning_rate": 2.974691965688407e-06, "loss": 0.0816, "step": 26090 }, { "epoch": 0.7718696397941681, "grad_norm": 1.9329699277877808, "learning_rate": 2.975832174958861e-06, "loss": 0.0701, "step": 26100 }, { "epoch": 0.7721653752883421, "grad_norm": 0.8531755805015564, "learning_rate": 2.9769723842293158e-06, "loss": 0.0909, "step": 26110 }, { "epoch": 0.7724611107825161, "grad_norm": 0.5809744596481323, "learning_rate": 2.9781125934997704e-06, "loss": 0.0894, "step": 26120 }, { "epoch": 0.7727568462766902, "grad_norm": 1.2343487739562988, "learning_rate": 2.9792528027702245e-06, "loss": 0.1209, "step": 26130 }, { "epoch": 0.7730525817708641, "grad_norm": 0.9576672911643982, "learning_rate": 2.980393012040679e-06, "loss": 0.0896, "step": 26140 }, { "epoch": 0.7733483172650382, "grad_norm": 1.1084113121032715, "learning_rate": 2.9815332213111333e-06, "loss": 0.0767, "step": 26150 }, { "epoch": 0.7736440527592121, "grad_norm": 1.103248953819275, "learning_rate": 2.9826734305815874e-06, "loss": 0.1017, "step": 26160 }, { "epoch": 0.7739397882533862, "grad_norm": 1.0279302597045898, "learning_rate": 2.983813639852042e-06, "loss": 0.1032, "step": 26170 }, { "epoch": 0.7742355237475602, "grad_norm": 1.3653252124786377, "learning_rate": 2.984953849122496e-06, "loss": 0.0931, "step": 26180 }, { "epoch": 0.7745312592417342, "grad_norm": 0.6125912070274353, "learning_rate": 2.986094058392951e-06, "loss": 0.0775, "step": 26190 }, { "epoch": 0.7748269947359082, "grad_norm": 0.845986008644104, "learning_rate": 2.987234267663405e-06, "loss": 0.0615, "step": 26200 }, { "epoch": 0.7751227302300823, "grad_norm": 1.3930935859680176, "learning_rate": 2.9883744769338595e-06, "loss": 0.1002, "step": 26210 }, { "epoch": 0.7754184657242562, "grad_norm": 1.3095866441726685, "learning_rate": 2.989514686204314e-06, "loss": 0.0905, "step": 26220 }, { "epoch": 0.7757142012184303, "grad_norm": 1.5345045328140259, "learning_rate": 2.9906548954747683e-06, "loss": 0.0854, "step": 26230 }, { "epoch": 0.7760099367126042, "grad_norm": 2.026498317718506, "learning_rate": 2.9917951047452225e-06, "loss": 0.0876, "step": 26240 }, { "epoch": 0.7763056722067783, "grad_norm": 0.9924718737602234, "learning_rate": 2.9929353140156766e-06, "loss": 0.0679, "step": 26250 }, { "epoch": 0.7766014077009523, "grad_norm": 1.1879180669784546, "learning_rate": 2.9940755232861312e-06, "loss": 0.1171, "step": 26260 }, { "epoch": 0.7768971431951263, "grad_norm": 1.2750943899154663, "learning_rate": 2.995215732556586e-06, "loss": 0.0817, "step": 26270 }, { "epoch": 0.7771928786893003, "grad_norm": 1.256679892539978, "learning_rate": 2.99635594182704e-06, "loss": 0.0895, "step": 26280 }, { "epoch": 0.7774886141834743, "grad_norm": 1.1193381547927856, "learning_rate": 2.9974961510974946e-06, "loss": 0.0966, "step": 26290 }, { "epoch": 0.7777843496776483, "grad_norm": 0.8524984121322632, "learning_rate": 2.998636360367949e-06, "loss": 0.0765, "step": 26300 }, { "epoch": 0.7780800851718224, "grad_norm": 0.7631569504737854, "learning_rate": 2.9997765696384033e-06, "loss": 0.1039, "step": 26310 }, { "epoch": 0.7783758206659963, "grad_norm": 1.3132492303848267, "learning_rate": 3.0009167789088575e-06, "loss": 0.0908, "step": 26320 }, { "epoch": 0.7786715561601704, "grad_norm": 1.6579073667526245, "learning_rate": 3.0020569881793116e-06, "loss": 0.1112, "step": 26330 }, { "epoch": 0.7789672916543443, "grad_norm": 1.1106021404266357, "learning_rate": 3.0031971974497662e-06, "loss": 0.0843, "step": 26340 }, { "epoch": 0.7792630271485184, "grad_norm": 0.4721481502056122, "learning_rate": 3.004337406720221e-06, "loss": 0.0689, "step": 26350 }, { "epoch": 0.7795587626426924, "grad_norm": 1.0936826467514038, "learning_rate": 3.005477615990675e-06, "loss": 0.0973, "step": 26360 }, { "epoch": 0.7798544981368664, "grad_norm": 2.2117741107940674, "learning_rate": 3.0066178252611296e-06, "loss": 0.1139, "step": 26370 }, { "epoch": 0.7801502336310404, "grad_norm": 1.4539283514022827, "learning_rate": 3.0077580345315837e-06, "loss": 0.1028, "step": 26380 }, { "epoch": 0.7804459691252144, "grad_norm": 0.7475581169128418, "learning_rate": 3.0088982438020383e-06, "loss": 0.0889, "step": 26390 }, { "epoch": 0.7807417046193884, "grad_norm": 1.5412389039993286, "learning_rate": 3.0100384530724925e-06, "loss": 0.0976, "step": 26400 }, { "epoch": 0.7810374401135625, "grad_norm": 0.9744701981544495, "learning_rate": 3.0111786623429467e-06, "loss": 0.0931, "step": 26410 }, { "epoch": 0.7813331756077364, "grad_norm": 1.1424044370651245, "learning_rate": 3.0123188716134012e-06, "loss": 0.0959, "step": 26420 }, { "epoch": 0.7816289111019105, "grad_norm": 1.5846238136291504, "learning_rate": 3.0134590808838554e-06, "loss": 0.1061, "step": 26430 }, { "epoch": 0.7819246465960845, "grad_norm": 0.9753280282020569, "learning_rate": 3.01459929015431e-06, "loss": 0.0878, "step": 26440 }, { "epoch": 0.7822203820902585, "grad_norm": 1.672318696975708, "learning_rate": 3.0157394994247646e-06, "loss": 0.0726, "step": 26450 }, { "epoch": 0.7825161175844325, "grad_norm": 0.9389573335647583, "learning_rate": 3.0168797086952188e-06, "loss": 0.0898, "step": 26460 }, { "epoch": 0.7828118530786065, "grad_norm": 0.6392570734024048, "learning_rate": 3.0180199179656733e-06, "loss": 0.087, "step": 26470 }, { "epoch": 0.7831075885727805, "grad_norm": 1.1518560647964478, "learning_rate": 3.019160127236127e-06, "loss": 0.0848, "step": 26480 }, { "epoch": 0.7834033240669546, "grad_norm": 0.6930180191993713, "learning_rate": 3.0203003365065817e-06, "loss": 0.1006, "step": 26490 }, { "epoch": 0.7836990595611285, "grad_norm": 1.0782032012939453, "learning_rate": 3.0214405457770363e-06, "loss": 0.0748, "step": 26500 }, { "epoch": 0.7839947950553026, "grad_norm": 1.118138313293457, "learning_rate": 3.0225807550474904e-06, "loss": 0.0977, "step": 26510 }, { "epoch": 0.7842905305494765, "grad_norm": 1.0189045667648315, "learning_rate": 3.023720964317945e-06, "loss": 0.1022, "step": 26520 }, { "epoch": 0.7845862660436506, "grad_norm": 1.0973857641220093, "learning_rate": 3.0248611735883996e-06, "loss": 0.0933, "step": 26530 }, { "epoch": 0.7848820015378246, "grad_norm": 0.7634848952293396, "learning_rate": 3.0260013828588538e-06, "loss": 0.074, "step": 26540 }, { "epoch": 0.7851777370319986, "grad_norm": 1.0391982793807983, "learning_rate": 3.0271415921293084e-06, "loss": 0.0679, "step": 26550 }, { "epoch": 0.7854734725261726, "grad_norm": 1.3497505187988281, "learning_rate": 3.028281801399762e-06, "loss": 0.1144, "step": 26560 }, { "epoch": 0.7857692080203466, "grad_norm": 0.7928776144981384, "learning_rate": 3.0294220106702167e-06, "loss": 0.0833, "step": 26570 }, { "epoch": 0.7860649435145206, "grad_norm": 1.7309869527816772, "learning_rate": 3.0305622199406713e-06, "loss": 0.0955, "step": 26580 }, { "epoch": 0.7863606790086947, "grad_norm": 1.1361202001571655, "learning_rate": 3.0317024292111254e-06, "loss": 0.1001, "step": 26590 }, { "epoch": 0.7866564145028686, "grad_norm": 1.2581162452697754, "learning_rate": 3.03284263848158e-06, "loss": 0.0919, "step": 26600 }, { "epoch": 0.7869521499970427, "grad_norm": 1.735303521156311, "learning_rate": 3.033982847752034e-06, "loss": 0.1051, "step": 26610 }, { "epoch": 0.7872478854912166, "grad_norm": 1.2082571983337402, "learning_rate": 3.0351230570224888e-06, "loss": 0.0844, "step": 26620 }, { "epoch": 0.7875436209853907, "grad_norm": 0.9992790818214417, "learning_rate": 3.0362632662929434e-06, "loss": 0.0994, "step": 26630 }, { "epoch": 0.7878393564795647, "grad_norm": 1.3133931159973145, "learning_rate": 3.037403475563397e-06, "loss": 0.082, "step": 26640 }, { "epoch": 0.7881350919737387, "grad_norm": 0.8556798100471497, "learning_rate": 3.0385436848338517e-06, "loss": 0.0704, "step": 26650 }, { "epoch": 0.7884308274679127, "grad_norm": 1.3010005950927734, "learning_rate": 3.039683894104306e-06, "loss": 0.0853, "step": 26660 }, { "epoch": 0.7887265629620868, "grad_norm": 1.2789795398712158, "learning_rate": 3.0408241033747605e-06, "loss": 0.0991, "step": 26670 }, { "epoch": 0.7890222984562607, "grad_norm": 0.8017732501029968, "learning_rate": 3.041964312645215e-06, "loss": 0.0877, "step": 26680 }, { "epoch": 0.7893180339504348, "grad_norm": 1.1173981428146362, "learning_rate": 3.0431045219156692e-06, "loss": 0.0857, "step": 26690 }, { "epoch": 0.7896137694446087, "grad_norm": 0.9720520377159119, "learning_rate": 3.044244731186124e-06, "loss": 0.0682, "step": 26700 }, { "epoch": 0.7899095049387828, "grad_norm": 0.8814991116523743, "learning_rate": 3.045384940456578e-06, "loss": 0.0844, "step": 26710 }, { "epoch": 0.7902052404329568, "grad_norm": 1.5214166641235352, "learning_rate": 3.046525149727032e-06, "loss": 0.1041, "step": 26720 }, { "epoch": 0.7905009759271308, "grad_norm": 0.9744746685028076, "learning_rate": 3.0476653589974867e-06, "loss": 0.1086, "step": 26730 }, { "epoch": 0.7907967114213048, "grad_norm": 1.1568541526794434, "learning_rate": 3.048805568267941e-06, "loss": 0.1015, "step": 26740 }, { "epoch": 0.7910924469154788, "grad_norm": 1.240604281425476, "learning_rate": 3.0499457775383955e-06, "loss": 0.0932, "step": 26750 }, { "epoch": 0.7913881824096528, "grad_norm": 1.0214756727218628, "learning_rate": 3.05108598680885e-06, "loss": 0.0902, "step": 26760 }, { "epoch": 0.7916839179038269, "grad_norm": 1.2185449600219727, "learning_rate": 3.0522261960793042e-06, "loss": 0.1255, "step": 26770 }, { "epoch": 0.7919796533980008, "grad_norm": 1.0016642808914185, "learning_rate": 3.053366405349759e-06, "loss": 0.0982, "step": 26780 }, { "epoch": 0.7922753888921749, "grad_norm": 1.0517094135284424, "learning_rate": 3.054506614620213e-06, "loss": 0.0917, "step": 26790 }, { "epoch": 0.7925711243863488, "grad_norm": 0.8670816421508789, "learning_rate": 3.0556468238906676e-06, "loss": 0.0787, "step": 26800 }, { "epoch": 0.7928668598805229, "grad_norm": 1.2436059713363647, "learning_rate": 3.0567870331611217e-06, "loss": 0.0985, "step": 26810 }, { "epoch": 0.7931625953746969, "grad_norm": 1.592594861984253, "learning_rate": 3.057927242431576e-06, "loss": 0.0891, "step": 26820 }, { "epoch": 0.7934583308688709, "grad_norm": 1.3843811750411987, "learning_rate": 3.0590674517020305e-06, "loss": 0.0911, "step": 26830 }, { "epoch": 0.7937540663630449, "grad_norm": 1.0348005294799805, "learning_rate": 3.0602076609724847e-06, "loss": 0.0741, "step": 26840 }, { "epoch": 0.7940498018572189, "grad_norm": 1.1317980289459229, "learning_rate": 3.0613478702429392e-06, "loss": 0.0663, "step": 26850 }, { "epoch": 0.7943455373513929, "grad_norm": 1.7125859260559082, "learning_rate": 3.062488079513394e-06, "loss": 0.1227, "step": 26860 }, { "epoch": 0.794641272845567, "grad_norm": 1.5364466905593872, "learning_rate": 3.063628288783848e-06, "loss": 0.0942, "step": 26870 }, { "epoch": 0.7949370083397409, "grad_norm": 1.2008180618286133, "learning_rate": 3.0647684980543026e-06, "loss": 0.1123, "step": 26880 }, { "epoch": 0.795232743833915, "grad_norm": 1.1676839590072632, "learning_rate": 3.0659087073247563e-06, "loss": 0.0788, "step": 26890 }, { "epoch": 0.795528479328089, "grad_norm": 0.9239363074302673, "learning_rate": 3.067048916595211e-06, "loss": 0.0805, "step": 26900 }, { "epoch": 0.795824214822263, "grad_norm": 1.24601411819458, "learning_rate": 3.0681891258656655e-06, "loss": 0.1037, "step": 26910 }, { "epoch": 0.796119950316437, "grad_norm": 1.0401266813278198, "learning_rate": 3.0693293351361197e-06, "loss": 0.0818, "step": 26920 }, { "epoch": 0.796415685810611, "grad_norm": 1.1219438314437866, "learning_rate": 3.0704695444065743e-06, "loss": 0.0868, "step": 26930 }, { "epoch": 0.796711421304785, "grad_norm": 1.2447175979614258, "learning_rate": 3.0716097536770284e-06, "loss": 0.0843, "step": 26940 }, { "epoch": 0.7970071567989591, "grad_norm": 1.2862578630447388, "learning_rate": 3.072749962947483e-06, "loss": 0.0733, "step": 26950 }, { "epoch": 0.797302892293133, "grad_norm": 0.997438907623291, "learning_rate": 3.0738901722179376e-06, "loss": 0.1015, "step": 26960 }, { "epoch": 0.7975986277873071, "grad_norm": 1.3386904001235962, "learning_rate": 3.0750303814883913e-06, "loss": 0.0889, "step": 26970 }, { "epoch": 0.797894363281481, "grad_norm": 1.2538278102874756, "learning_rate": 3.076170590758846e-06, "loss": 0.0764, "step": 26980 }, { "epoch": 0.798190098775655, "grad_norm": 1.0181533098220825, "learning_rate": 3.0773108000293005e-06, "loss": 0.0854, "step": 26990 }, { "epoch": 0.7984858342698291, "grad_norm": 1.1998295783996582, "learning_rate": 3.0784510092997547e-06, "loss": 0.0777, "step": 27000 }, { "epoch": 0.798781569764003, "grad_norm": 1.252342939376831, "learning_rate": 3.0795912185702093e-06, "loss": 0.1041, "step": 27010 }, { "epoch": 0.7990773052581771, "grad_norm": 1.5163642168045044, "learning_rate": 3.0807314278406634e-06, "loss": 0.0967, "step": 27020 }, { "epoch": 0.799373040752351, "grad_norm": 0.6394703984260559, "learning_rate": 3.081871637111118e-06, "loss": 0.0986, "step": 27030 }, { "epoch": 0.7996687762465251, "grad_norm": 0.8558893203735352, "learning_rate": 3.0830118463815726e-06, "loss": 0.0734, "step": 27040 }, { "epoch": 0.7999645117406992, "grad_norm": 1.088513731956482, "learning_rate": 3.0841520556520264e-06, "loss": 0.0948, "step": 27050 }, { "epoch": 0.8002602472348731, "grad_norm": 1.439220905303955, "learning_rate": 3.085292264922481e-06, "loss": 0.1012, "step": 27060 }, { "epoch": 0.8005559827290472, "grad_norm": 1.7550907135009766, "learning_rate": 3.086432474192935e-06, "loss": 0.1156, "step": 27070 }, { "epoch": 0.8008517182232211, "grad_norm": 0.9077404737472534, "learning_rate": 3.0875726834633897e-06, "loss": 0.0833, "step": 27080 }, { "epoch": 0.8011474537173952, "grad_norm": 0.8419257998466492, "learning_rate": 3.0887128927338443e-06, "loss": 0.0718, "step": 27090 }, { "epoch": 0.8014431892115692, "grad_norm": 1.0166999101638794, "learning_rate": 3.0898531020042985e-06, "loss": 0.0759, "step": 27100 }, { "epoch": 0.8017389247057432, "grad_norm": 1.0851969718933105, "learning_rate": 3.090993311274753e-06, "loss": 0.1032, "step": 27110 }, { "epoch": 0.8020346601999172, "grad_norm": 1.7154134511947632, "learning_rate": 3.092133520545207e-06, "loss": 0.0866, "step": 27120 }, { "epoch": 0.8023303956940913, "grad_norm": 0.9731138944625854, "learning_rate": 3.0932737298156614e-06, "loss": 0.0835, "step": 27130 }, { "epoch": 0.8026261311882652, "grad_norm": 1.5311000347137451, "learning_rate": 3.094413939086116e-06, "loss": 0.0935, "step": 27140 }, { "epoch": 0.8029218666824393, "grad_norm": 0.8814651370048523, "learning_rate": 3.09555414835657e-06, "loss": 0.0685, "step": 27150 }, { "epoch": 0.8032176021766132, "grad_norm": 1.4605258703231812, "learning_rate": 3.0966943576270247e-06, "loss": 0.0919, "step": 27160 }, { "epoch": 0.8035133376707873, "grad_norm": 1.1058589220046997, "learning_rate": 3.097834566897479e-06, "loss": 0.1061, "step": 27170 }, { "epoch": 0.8038090731649613, "grad_norm": 1.0910767316818237, "learning_rate": 3.0989747761679335e-06, "loss": 0.0752, "step": 27180 }, { "epoch": 0.8041048086591353, "grad_norm": 1.095832347869873, "learning_rate": 3.100114985438388e-06, "loss": 0.0878, "step": 27190 }, { "epoch": 0.8044005441533093, "grad_norm": 2.9937143325805664, "learning_rate": 3.1012551947088422e-06, "loss": 0.1003, "step": 27200 }, { "epoch": 0.8046962796474832, "grad_norm": 0.9010720252990723, "learning_rate": 3.1023954039792964e-06, "loss": 0.1167, "step": 27210 }, { "epoch": 0.8049920151416573, "grad_norm": 1.1010576486587524, "learning_rate": 3.103535613249751e-06, "loss": 0.105, "step": 27220 }, { "epoch": 0.8052877506358314, "grad_norm": 1.4126096963882446, "learning_rate": 3.104675822520205e-06, "loss": 0.0976, "step": 27230 }, { "epoch": 0.8055834861300053, "grad_norm": 0.6461353898048401, "learning_rate": 3.1058160317906597e-06, "loss": 0.0851, "step": 27240 }, { "epoch": 0.8058792216241794, "grad_norm": 0.7065154910087585, "learning_rate": 3.106956241061114e-06, "loss": 0.0727, "step": 27250 }, { "epoch": 0.8061749571183533, "grad_norm": 1.5552020072937012, "learning_rate": 3.1080964503315685e-06, "loss": 0.1027, "step": 27260 }, { "epoch": 0.8064706926125274, "grad_norm": 1.4998282194137573, "learning_rate": 3.109236659602023e-06, "loss": 0.0914, "step": 27270 }, { "epoch": 0.8067664281067014, "grad_norm": 1.1103723049163818, "learning_rate": 3.1103768688724772e-06, "loss": 0.1033, "step": 27280 }, { "epoch": 0.8070621636008753, "grad_norm": 0.5843116044998169, "learning_rate": 3.1115170781429314e-06, "loss": 0.0955, "step": 27290 }, { "epoch": 0.8073578990950494, "grad_norm": 1.2121552228927612, "learning_rate": 3.1126572874133856e-06, "loss": 0.0755, "step": 27300 }, { "epoch": 0.8076536345892233, "grad_norm": 1.1822456121444702, "learning_rate": 3.11379749668384e-06, "loss": 0.095, "step": 27310 }, { "epoch": 0.8079493700833974, "grad_norm": 1.2385603189468384, "learning_rate": 3.1149377059542947e-06, "loss": 0.1005, "step": 27320 }, { "epoch": 0.8082451055775715, "grad_norm": 0.7508575320243835, "learning_rate": 3.116077915224749e-06, "loss": 0.0903, "step": 27330 }, { "epoch": 0.8085408410717454, "grad_norm": 1.1573160886764526, "learning_rate": 3.1172181244952035e-06, "loss": 0.0787, "step": 27340 }, { "epoch": 0.8088365765659195, "grad_norm": 2.0570037364959717, "learning_rate": 3.1183583337656577e-06, "loss": 0.0765, "step": 27350 }, { "epoch": 0.8091323120600935, "grad_norm": 1.2160669565200806, "learning_rate": 3.1194985430361123e-06, "loss": 0.098, "step": 27360 }, { "epoch": 0.8094280475542674, "grad_norm": 1.4756804704666138, "learning_rate": 3.1206387523065664e-06, "loss": 0.12, "step": 27370 }, { "epoch": 0.8097237830484415, "grad_norm": 0.9087268710136414, "learning_rate": 3.1217789615770206e-06, "loss": 0.09, "step": 27380 }, { "epoch": 0.8100195185426154, "grad_norm": 1.0188699960708618, "learning_rate": 3.122919170847475e-06, "loss": 0.0781, "step": 27390 }, { "epoch": 0.8103152540367895, "grad_norm": 1.2235482931137085, "learning_rate": 3.1240593801179293e-06, "loss": 0.0706, "step": 27400 }, { "epoch": 0.8106109895309636, "grad_norm": 1.0894547700881958, "learning_rate": 3.125199589388384e-06, "loss": 0.0974, "step": 27410 }, { "epoch": 0.8109067250251375, "grad_norm": 1.1083650588989258, "learning_rate": 3.1263397986588385e-06, "loss": 0.0838, "step": 27420 }, { "epoch": 0.8112024605193116, "grad_norm": 0.8260504603385925, "learning_rate": 3.1274800079292927e-06, "loss": 0.0814, "step": 27430 }, { "epoch": 0.8114981960134855, "grad_norm": 1.5097711086273193, "learning_rate": 3.1286202171997473e-06, "loss": 0.116, "step": 27440 }, { "epoch": 0.8117939315076595, "grad_norm": 1.5289875268936157, "learning_rate": 3.1297604264702014e-06, "loss": 0.0718, "step": 27450 }, { "epoch": 0.8120896670018336, "grad_norm": 1.0507410764694214, "learning_rate": 3.1309006357406556e-06, "loss": 0.1066, "step": 27460 }, { "epoch": 0.8123854024960075, "grad_norm": 0.8081768155097961, "learning_rate": 3.13204084501111e-06, "loss": 0.1018, "step": 27470 }, { "epoch": 0.8126811379901816, "grad_norm": 1.0094558000564575, "learning_rate": 3.1331810542815644e-06, "loss": 0.0778, "step": 27480 }, { "epoch": 0.8129768734843555, "grad_norm": 1.2755837440490723, "learning_rate": 3.134321263552019e-06, "loss": 0.0913, "step": 27490 }, { "epoch": 0.8132726089785296, "grad_norm": 1.2461471557617188, "learning_rate": 3.1354614728224735e-06, "loss": 0.0758, "step": 27500 }, { "epoch": 0.8135683444727037, "grad_norm": 1.2350823879241943, "learning_rate": 3.1366016820929277e-06, "loss": 0.0983, "step": 27510 }, { "epoch": 0.8138640799668776, "grad_norm": 1.0803009271621704, "learning_rate": 3.1377418913633823e-06, "loss": 0.0827, "step": 27520 }, { "epoch": 0.8141598154610517, "grad_norm": 1.6400419473648071, "learning_rate": 3.138882100633836e-06, "loss": 0.1071, "step": 27530 }, { "epoch": 0.8144555509552256, "grad_norm": 1.102813720703125, "learning_rate": 3.1400223099042906e-06, "loss": 0.1078, "step": 27540 }, { "epoch": 0.8147512864493996, "grad_norm": 0.9936804175376892, "learning_rate": 3.141162519174745e-06, "loss": 0.0675, "step": 27550 }, { "epoch": 0.8150470219435737, "grad_norm": 1.2740235328674316, "learning_rate": 3.1423027284451994e-06, "loss": 0.105, "step": 27560 }, { "epoch": 0.8153427574377476, "grad_norm": 1.0788085460662842, "learning_rate": 3.143442937715654e-06, "loss": 0.0843, "step": 27570 }, { "epoch": 0.8156384929319217, "grad_norm": 1.3624917268753052, "learning_rate": 3.144583146986108e-06, "loss": 0.096, "step": 27580 }, { "epoch": 0.8159342284260958, "grad_norm": 0.7671759128570557, "learning_rate": 3.1457233562565627e-06, "loss": 0.1002, "step": 27590 }, { "epoch": 0.8162299639202697, "grad_norm": 1.2467920780181885, "learning_rate": 3.1468635655270173e-06, "loss": 0.0905, "step": 27600 }, { "epoch": 0.8165256994144438, "grad_norm": 0.9395560026168823, "learning_rate": 3.148003774797471e-06, "loss": 0.0856, "step": 27610 }, { "epoch": 0.8168214349086177, "grad_norm": 0.6911748051643372, "learning_rate": 3.1491439840679256e-06, "loss": 0.11, "step": 27620 }, { "epoch": 0.8171171704027917, "grad_norm": 1.0984911918640137, "learning_rate": 3.15028419333838e-06, "loss": 0.0869, "step": 27630 }, { "epoch": 0.8174129058969658, "grad_norm": 0.5716385841369629, "learning_rate": 3.1514244026088344e-06, "loss": 0.0829, "step": 27640 }, { "epoch": 0.8177086413911397, "grad_norm": 0.9640820026397705, "learning_rate": 3.152564611879289e-06, "loss": 0.0768, "step": 27650 }, { "epoch": 0.8180043768853138, "grad_norm": 1.366157054901123, "learning_rate": 3.153704821149743e-06, "loss": 0.0911, "step": 27660 }, { "epoch": 0.8183001123794877, "grad_norm": 0.7356343269348145, "learning_rate": 3.1548450304201977e-06, "loss": 0.1012, "step": 27670 }, { "epoch": 0.8185958478736618, "grad_norm": 1.2059195041656494, "learning_rate": 3.1559852396906523e-06, "loss": 0.0862, "step": 27680 }, { "epoch": 0.8188915833678359, "grad_norm": 1.5791667699813843, "learning_rate": 3.157125448961106e-06, "loss": 0.0919, "step": 27690 }, { "epoch": 0.8191873188620098, "grad_norm": 1.1497443914413452, "learning_rate": 3.1582656582315606e-06, "loss": 0.0861, "step": 27700 }, { "epoch": 0.8194830543561838, "grad_norm": 1.277777075767517, "learning_rate": 3.159405867502015e-06, "loss": 0.0955, "step": 27710 }, { "epoch": 0.8197787898503578, "grad_norm": 1.051655888557434, "learning_rate": 3.1605460767724694e-06, "loss": 0.0838, "step": 27720 }, { "epoch": 0.8200745253445318, "grad_norm": 1.231967806816101, "learning_rate": 3.161686286042924e-06, "loss": 0.0831, "step": 27730 }, { "epoch": 0.8203702608387059, "grad_norm": 1.4628268480300903, "learning_rate": 3.162826495313378e-06, "loss": 0.1013, "step": 27740 }, { "epoch": 0.8206659963328798, "grad_norm": 1.265773057937622, "learning_rate": 3.1639667045838327e-06, "loss": 0.0807, "step": 27750 }, { "epoch": 0.8209617318270539, "grad_norm": 0.9494853615760803, "learning_rate": 3.165106913854287e-06, "loss": 0.1038, "step": 27760 }, { "epoch": 0.8212574673212278, "grad_norm": 1.0684150457382202, "learning_rate": 3.166247123124741e-06, "loss": 0.1077, "step": 27770 }, { "epoch": 0.8215532028154019, "grad_norm": 1.2490050792694092, "learning_rate": 3.1673873323951957e-06, "loss": 0.0822, "step": 27780 }, { "epoch": 0.821848938309576, "grad_norm": 0.8679643273353577, "learning_rate": 3.16852754166565e-06, "loss": 0.1071, "step": 27790 }, { "epoch": 0.8221446738037499, "grad_norm": 0.9423789381980896, "learning_rate": 3.1696677509361044e-06, "loss": 0.0861, "step": 27800 }, { "epoch": 0.822440409297924, "grad_norm": 0.8306081891059875, "learning_rate": 3.1708079602065586e-06, "loss": 0.1042, "step": 27810 }, { "epoch": 0.822736144792098, "grad_norm": 1.410510540008545, "learning_rate": 3.171948169477013e-06, "loss": 0.11, "step": 27820 }, { "epoch": 0.8230318802862719, "grad_norm": 1.7233806848526, "learning_rate": 3.1730883787474678e-06, "loss": 0.0782, "step": 27830 }, { "epoch": 0.823327615780446, "grad_norm": 1.051909327507019, "learning_rate": 3.174228588017922e-06, "loss": 0.0829, "step": 27840 }, { "epoch": 0.8236233512746199, "grad_norm": 1.1646008491516113, "learning_rate": 3.175368797288376e-06, "loss": 0.0933, "step": 27850 }, { "epoch": 0.823919086768794, "grad_norm": 0.8431004881858826, "learning_rate": 3.1765090065588303e-06, "loss": 0.112, "step": 27860 }, { "epoch": 0.824214822262968, "grad_norm": 1.087695837020874, "learning_rate": 3.177649215829285e-06, "loss": 0.0913, "step": 27870 }, { "epoch": 0.824510557757142, "grad_norm": 1.0602772235870361, "learning_rate": 3.1787894250997394e-06, "loss": 0.0887, "step": 27880 }, { "epoch": 0.824806293251316, "grad_norm": 0.3896876871585846, "learning_rate": 3.1799296343701936e-06, "loss": 0.0727, "step": 27890 }, { "epoch": 0.82510202874549, "grad_norm": 1.37213933467865, "learning_rate": 3.181069843640648e-06, "loss": 0.0759, "step": 27900 }, { "epoch": 0.825397764239664, "grad_norm": 1.0175292491912842, "learning_rate": 3.1822100529111028e-06, "loss": 0.1057, "step": 27910 }, { "epoch": 0.8256934997338381, "grad_norm": 1.0534526109695435, "learning_rate": 3.183350262181557e-06, "loss": 0.1098, "step": 27920 }, { "epoch": 0.825989235228012, "grad_norm": 1.3878103494644165, "learning_rate": 3.184490471452011e-06, "loss": 0.0847, "step": 27930 }, { "epoch": 0.8262849707221861, "grad_norm": 0.5339154601097107, "learning_rate": 3.1856306807224653e-06, "loss": 0.0994, "step": 27940 }, { "epoch": 0.82658070621636, "grad_norm": 1.327061414718628, "learning_rate": 3.18677088999292e-06, "loss": 0.0913, "step": 27950 }, { "epoch": 0.8268764417105341, "grad_norm": 1.2246589660644531, "learning_rate": 3.1879110992633744e-06, "loss": 0.1228, "step": 27960 }, { "epoch": 0.8271721772047081, "grad_norm": 0.8103845715522766, "learning_rate": 3.1890513085338286e-06, "loss": 0.089, "step": 27970 }, { "epoch": 0.8274679126988821, "grad_norm": 1.2111537456512451, "learning_rate": 3.190191517804283e-06, "loss": 0.0769, "step": 27980 }, { "epoch": 0.8277636481930561, "grad_norm": 0.803048312664032, "learning_rate": 3.1913317270747374e-06, "loss": 0.0888, "step": 27990 }, { "epoch": 0.8280593836872301, "grad_norm": 0.8248103857040405, "learning_rate": 3.192471936345192e-06, "loss": 0.0732, "step": 28000 }, { "epoch": 0.8283551191814041, "grad_norm": 0.9920276999473572, "learning_rate": 3.1936121456156465e-06, "loss": 0.108, "step": 28010 }, { "epoch": 0.8286508546755782, "grad_norm": 0.9868312478065491, "learning_rate": 3.1947523548861003e-06, "loss": 0.0992, "step": 28020 }, { "epoch": 0.8289465901697521, "grad_norm": 0.8880218863487244, "learning_rate": 3.195892564156555e-06, "loss": 0.084, "step": 28030 }, { "epoch": 0.8292423256639262, "grad_norm": 1.165266990661621, "learning_rate": 3.197032773427009e-06, "loss": 0.0797, "step": 28040 }, { "epoch": 0.8295380611581002, "grad_norm": 1.7976866960525513, "learning_rate": 3.1981729826974636e-06, "loss": 0.0871, "step": 28050 }, { "epoch": 0.8298337966522742, "grad_norm": 0.86534184217453, "learning_rate": 3.1993131919679182e-06, "loss": 0.1001, "step": 28060 }, { "epoch": 0.8301295321464482, "grad_norm": 1.2016409635543823, "learning_rate": 3.2004534012383724e-06, "loss": 0.097, "step": 28070 }, { "epoch": 0.8304252676406222, "grad_norm": 1.692826747894287, "learning_rate": 3.201593610508827e-06, "loss": 0.0872, "step": 28080 }, { "epoch": 0.8307210031347962, "grad_norm": 0.4558769166469574, "learning_rate": 3.202733819779281e-06, "loss": 0.0878, "step": 28090 }, { "epoch": 0.8310167386289703, "grad_norm": 1.392461895942688, "learning_rate": 3.2038740290497353e-06, "loss": 0.0744, "step": 28100 }, { "epoch": 0.8313124741231442, "grad_norm": 1.2923425436019897, "learning_rate": 3.20501423832019e-06, "loss": 0.0955, "step": 28110 }, { "epoch": 0.8316082096173183, "grad_norm": 1.2805508375167847, "learning_rate": 3.206154447590644e-06, "loss": 0.0899, "step": 28120 }, { "epoch": 0.8319039451114922, "grad_norm": 0.9680945873260498, "learning_rate": 3.2072946568610986e-06, "loss": 0.0888, "step": 28130 }, { "epoch": 0.8321996806056663, "grad_norm": 1.9401746988296509, "learning_rate": 3.2084348661315532e-06, "loss": 0.0808, "step": 28140 }, { "epoch": 0.8324954160998403, "grad_norm": 1.3154159784317017, "learning_rate": 3.2095750754020074e-06, "loss": 0.096, "step": 28150 }, { "epoch": 0.8327911515940143, "grad_norm": 1.4788496494293213, "learning_rate": 3.210715284672462e-06, "loss": 0.1031, "step": 28160 }, { "epoch": 0.8330868870881883, "grad_norm": 1.3265457153320312, "learning_rate": 3.211855493942916e-06, "loss": 0.0922, "step": 28170 }, { "epoch": 0.8333826225823623, "grad_norm": 1.3431086540222168, "learning_rate": 3.2129957032133703e-06, "loss": 0.0928, "step": 28180 }, { "epoch": 0.8336783580765363, "grad_norm": 2.820467472076416, "learning_rate": 3.214135912483825e-06, "loss": 0.0964, "step": 28190 }, { "epoch": 0.8339740935707104, "grad_norm": 1.2690584659576416, "learning_rate": 3.215276121754279e-06, "loss": 0.0698, "step": 28200 }, { "epoch": 0.8342698290648843, "grad_norm": 0.8106564879417419, "learning_rate": 3.2164163310247337e-06, "loss": 0.1186, "step": 28210 }, { "epoch": 0.8345655645590584, "grad_norm": 1.2628686428070068, "learning_rate": 3.217556540295188e-06, "loss": 0.0895, "step": 28220 }, { "epoch": 0.8348613000532323, "grad_norm": 1.0702745914459229, "learning_rate": 3.2186967495656424e-06, "loss": 0.0819, "step": 28230 }, { "epoch": 0.8351570355474064, "grad_norm": 0.8915325403213501, "learning_rate": 3.219836958836097e-06, "loss": 0.1039, "step": 28240 }, { "epoch": 0.8354527710415804, "grad_norm": 0.9672488570213318, "learning_rate": 3.220977168106551e-06, "loss": 0.0852, "step": 28250 }, { "epoch": 0.8357485065357544, "grad_norm": 1.3969873189926147, "learning_rate": 3.2221173773770053e-06, "loss": 0.0912, "step": 28260 }, { "epoch": 0.8360442420299284, "grad_norm": 1.2300626039505005, "learning_rate": 3.2232575866474595e-06, "loss": 0.0993, "step": 28270 }, { "epoch": 0.8363399775241025, "grad_norm": 0.9277346134185791, "learning_rate": 3.224397795917914e-06, "loss": 0.0844, "step": 28280 }, { "epoch": 0.8366357130182764, "grad_norm": 1.9070509672164917, "learning_rate": 3.2255380051883687e-06, "loss": 0.0838, "step": 28290 }, { "epoch": 0.8369314485124505, "grad_norm": 1.2978872060775757, "learning_rate": 3.226678214458823e-06, "loss": 0.075, "step": 28300 }, { "epoch": 0.8372271840066244, "grad_norm": 1.3177375793457031, "learning_rate": 3.2278184237292774e-06, "loss": 0.1088, "step": 28310 }, { "epoch": 0.8375229195007985, "grad_norm": 0.7090723514556885, "learning_rate": 3.2289586329997316e-06, "loss": 0.0905, "step": 28320 }, { "epoch": 0.8378186549949725, "grad_norm": 0.9477949738502502, "learning_rate": 3.230098842270186e-06, "loss": 0.0803, "step": 28330 }, { "epoch": 0.8381143904891465, "grad_norm": 1.3804527521133423, "learning_rate": 3.2312390515406403e-06, "loss": 0.0929, "step": 28340 }, { "epoch": 0.8384101259833205, "grad_norm": 1.1774003505706787, "learning_rate": 3.2323792608110945e-06, "loss": 0.0767, "step": 28350 }, { "epoch": 0.8387058614774945, "grad_norm": 1.0981653928756714, "learning_rate": 3.233519470081549e-06, "loss": 0.1013, "step": 28360 }, { "epoch": 0.8390015969716685, "grad_norm": 1.3748977184295654, "learning_rate": 3.2346596793520037e-06, "loss": 0.108, "step": 28370 }, { "epoch": 0.8392973324658426, "grad_norm": 0.55211341381073, "learning_rate": 3.235799888622458e-06, "loss": 0.087, "step": 28380 }, { "epoch": 0.8395930679600165, "grad_norm": 0.9571582674980164, "learning_rate": 3.2369400978929124e-06, "loss": 0.0855, "step": 28390 }, { "epoch": 0.8398888034541906, "grad_norm": 1.1231532096862793, "learning_rate": 3.2380803071633666e-06, "loss": 0.0803, "step": 28400 }, { "epoch": 0.8401845389483645, "grad_norm": 1.388920783996582, "learning_rate": 3.239220516433821e-06, "loss": 0.1082, "step": 28410 }, { "epoch": 0.8404802744425386, "grad_norm": 1.0118879079818726, "learning_rate": 3.2403607257042754e-06, "loss": 0.0997, "step": 28420 }, { "epoch": 0.8407760099367126, "grad_norm": 1.226649522781372, "learning_rate": 3.2415009349747295e-06, "loss": 0.096, "step": 28430 }, { "epoch": 0.8410717454308866, "grad_norm": 0.7041254639625549, "learning_rate": 3.242641144245184e-06, "loss": 0.0718, "step": 28440 }, { "epoch": 0.8413674809250606, "grad_norm": 0.8116863965988159, "learning_rate": 3.2437813535156383e-06, "loss": 0.0762, "step": 28450 }, { "epoch": 0.8416632164192346, "grad_norm": 1.0587278604507446, "learning_rate": 3.244921562786093e-06, "loss": 0.0991, "step": 28460 }, { "epoch": 0.8419589519134086, "grad_norm": 1.4857535362243652, "learning_rate": 3.2460617720565475e-06, "loss": 0.1035, "step": 28470 }, { "epoch": 0.8422546874075827, "grad_norm": 1.2369893789291382, "learning_rate": 3.2472019813270016e-06, "loss": 0.093, "step": 28480 }, { "epoch": 0.8425504229017566, "grad_norm": 1.2536169290542603, "learning_rate": 3.248342190597456e-06, "loss": 0.0878, "step": 28490 }, { "epoch": 0.8428461583959307, "grad_norm": 1.1100437641143799, "learning_rate": 3.24948239986791e-06, "loss": 0.0872, "step": 28500 }, { "epoch": 0.8431418938901047, "grad_norm": 1.2996366024017334, "learning_rate": 3.2506226091383645e-06, "loss": 0.111, "step": 28510 }, { "epoch": 0.8434376293842787, "grad_norm": 1.7414470911026, "learning_rate": 3.251762818408819e-06, "loss": 0.1015, "step": 28520 }, { "epoch": 0.8437333648784527, "grad_norm": 1.7915074825286865, "learning_rate": 3.2529030276792733e-06, "loss": 0.1096, "step": 28530 }, { "epoch": 0.8440291003726267, "grad_norm": 0.7542709708213806, "learning_rate": 3.254043236949728e-06, "loss": 0.098, "step": 28540 }, { "epoch": 0.8443248358668007, "grad_norm": 1.3751856088638306, "learning_rate": 3.255183446220182e-06, "loss": 0.0827, "step": 28550 }, { "epoch": 0.8446205713609748, "grad_norm": 1.160913348197937, "learning_rate": 3.2563236554906366e-06, "loss": 0.0914, "step": 28560 }, { "epoch": 0.8449163068551487, "grad_norm": 0.843291699886322, "learning_rate": 3.2574638647610912e-06, "loss": 0.1016, "step": 28570 }, { "epoch": 0.8452120423493228, "grad_norm": 0.8952293395996094, "learning_rate": 3.258604074031545e-06, "loss": 0.1058, "step": 28580 }, { "epoch": 0.8455077778434967, "grad_norm": 0.8651318550109863, "learning_rate": 3.2597442833019996e-06, "loss": 0.0969, "step": 28590 }, { "epoch": 0.8458035133376708, "grad_norm": 1.4144386053085327, "learning_rate": 3.260884492572454e-06, "loss": 0.0763, "step": 28600 }, { "epoch": 0.8460992488318448, "grad_norm": 0.9814550280570984, "learning_rate": 3.2620247018429083e-06, "loss": 0.1102, "step": 28610 }, { "epoch": 0.8463949843260188, "grad_norm": 1.4516725540161133, "learning_rate": 3.263164911113363e-06, "loss": 0.1114, "step": 28620 }, { "epoch": 0.8466907198201928, "grad_norm": 1.4053303003311157, "learning_rate": 3.264305120383817e-06, "loss": 0.1164, "step": 28630 }, { "epoch": 0.8469864553143668, "grad_norm": 0.9272307753562927, "learning_rate": 3.2654453296542717e-06, "loss": 0.0871, "step": 28640 }, { "epoch": 0.8472821908085408, "grad_norm": 1.3982492685317993, "learning_rate": 3.2665855389247262e-06, "loss": 0.069, "step": 28650 }, { "epoch": 0.8475779263027149, "grad_norm": 0.8519480228424072, "learning_rate": 3.26772574819518e-06, "loss": 0.0954, "step": 28660 }, { "epoch": 0.8478736617968888, "grad_norm": 0.8460931181907654, "learning_rate": 3.2688659574656346e-06, "loss": 0.0968, "step": 28670 }, { "epoch": 0.8481693972910629, "grad_norm": 1.0703235864639282, "learning_rate": 3.2700061667360887e-06, "loss": 0.0779, "step": 28680 }, { "epoch": 0.8484651327852368, "grad_norm": 0.984870970249176, "learning_rate": 3.2711463760065433e-06, "loss": 0.0766, "step": 28690 }, { "epoch": 0.8487608682794109, "grad_norm": 0.7174683809280396, "learning_rate": 3.272286585276998e-06, "loss": 0.0751, "step": 28700 }, { "epoch": 0.8490566037735849, "grad_norm": 1.3994215726852417, "learning_rate": 3.273426794547452e-06, "loss": 0.1064, "step": 28710 }, { "epoch": 0.8493523392677589, "grad_norm": 0.7743356823921204, "learning_rate": 3.2745670038179067e-06, "loss": 0.0988, "step": 28720 }, { "epoch": 0.8496480747619329, "grad_norm": 0.7573016881942749, "learning_rate": 3.275707213088361e-06, "loss": 0.1029, "step": 28730 }, { "epoch": 0.849943810256107, "grad_norm": 0.7141479253768921, "learning_rate": 3.276847422358815e-06, "loss": 0.0823, "step": 28740 }, { "epoch": 0.8502395457502809, "grad_norm": 1.0479474067687988, "learning_rate": 3.2779876316292696e-06, "loss": 0.0677, "step": 28750 }, { "epoch": 0.850535281244455, "grad_norm": 1.1138867139816284, "learning_rate": 3.2791278408997238e-06, "loss": 0.1037, "step": 28760 }, { "epoch": 0.8508310167386289, "grad_norm": 1.330079436302185, "learning_rate": 3.2802680501701783e-06, "loss": 0.1018, "step": 28770 }, { "epoch": 0.851126752232803, "grad_norm": 1.3112220764160156, "learning_rate": 3.2814082594406325e-06, "loss": 0.1028, "step": 28780 }, { "epoch": 0.851422487726977, "grad_norm": 0.7466031908988953, "learning_rate": 3.282548468711087e-06, "loss": 0.0754, "step": 28790 }, { "epoch": 0.851718223221151, "grad_norm": 2.187302350997925, "learning_rate": 3.2836886779815417e-06, "loss": 0.081, "step": 28800 }, { "epoch": 0.852013958715325, "grad_norm": 1.3209418058395386, "learning_rate": 3.284828887251996e-06, "loss": 0.1011, "step": 28810 }, { "epoch": 0.852309694209499, "grad_norm": 0.8761692047119141, "learning_rate": 3.28596909652245e-06, "loss": 0.0962, "step": 28820 }, { "epoch": 0.852605429703673, "grad_norm": 1.5458145141601562, "learning_rate": 3.2871093057929046e-06, "loss": 0.0936, "step": 28830 }, { "epoch": 0.8529011651978471, "grad_norm": 1.2453436851501465, "learning_rate": 3.2882495150633588e-06, "loss": 0.0924, "step": 28840 }, { "epoch": 0.853196900692021, "grad_norm": 1.617519736289978, "learning_rate": 3.2893897243338134e-06, "loss": 0.0853, "step": 28850 }, { "epoch": 0.8534926361861951, "grad_norm": 1.0697981119155884, "learning_rate": 3.2905299336042675e-06, "loss": 0.102, "step": 28860 }, { "epoch": 0.853788371680369, "grad_norm": 1.0883172750473022, "learning_rate": 3.291670142874722e-06, "loss": 0.1056, "step": 28870 }, { "epoch": 0.8540841071745431, "grad_norm": 1.2306088209152222, "learning_rate": 3.2928103521451767e-06, "loss": 0.0907, "step": 28880 }, { "epoch": 0.8543798426687171, "grad_norm": 1.239182472229004, "learning_rate": 3.293950561415631e-06, "loss": 0.0826, "step": 28890 }, { "epoch": 0.8546755781628911, "grad_norm": 1.108859658241272, "learning_rate": 3.295090770686085e-06, "loss": 0.0911, "step": 28900 }, { "epoch": 0.8549713136570651, "grad_norm": 1.0776790380477905, "learning_rate": 3.296230979956539e-06, "loss": 0.1046, "step": 28910 }, { "epoch": 0.8552670491512391, "grad_norm": 1.0029916763305664, "learning_rate": 3.2973711892269938e-06, "loss": 0.0959, "step": 28920 }, { "epoch": 0.8555627846454131, "grad_norm": 1.228153944015503, "learning_rate": 3.2985113984974484e-06, "loss": 0.0809, "step": 28930 }, { "epoch": 0.8558585201395872, "grad_norm": 0.9617640376091003, "learning_rate": 3.2996516077679025e-06, "loss": 0.0919, "step": 28940 }, { "epoch": 0.8561542556337611, "grad_norm": 1.9737190008163452, "learning_rate": 3.300791817038357e-06, "loss": 0.0861, "step": 28950 }, { "epoch": 0.8564499911279352, "grad_norm": 1.1042948961257935, "learning_rate": 3.3019320263088113e-06, "loss": 0.0994, "step": 28960 }, { "epoch": 0.8567457266221092, "grad_norm": 1.2620985507965088, "learning_rate": 3.303072235579266e-06, "loss": 0.0924, "step": 28970 }, { "epoch": 0.8570414621162832, "grad_norm": 1.3036551475524902, "learning_rate": 3.30421244484972e-06, "loss": 0.093, "step": 28980 }, { "epoch": 0.8573371976104572, "grad_norm": 0.7255498170852661, "learning_rate": 3.305352654120174e-06, "loss": 0.0987, "step": 28990 }, { "epoch": 0.8576329331046312, "grad_norm": 0.659764289855957, "learning_rate": 3.306492863390629e-06, "loss": 0.0719, "step": 29000 }, { "epoch": 0.8579286685988052, "grad_norm": 1.1180446147918701, "learning_rate": 3.307633072661083e-06, "loss": 0.0854, "step": 29010 }, { "epoch": 0.8582244040929793, "grad_norm": 1.1118595600128174, "learning_rate": 3.3087732819315376e-06, "loss": 0.0994, "step": 29020 }, { "epoch": 0.8585201395871532, "grad_norm": 1.2215276956558228, "learning_rate": 3.309913491201992e-06, "loss": 0.0997, "step": 29030 }, { "epoch": 0.8588158750813273, "grad_norm": 0.8488393425941467, "learning_rate": 3.3110537004724463e-06, "loss": 0.0874, "step": 29040 }, { "epoch": 0.8591116105755012, "grad_norm": 1.0373966693878174, "learning_rate": 3.312193909742901e-06, "loss": 0.0805, "step": 29050 }, { "epoch": 0.8594073460696753, "grad_norm": 2.3271644115448, "learning_rate": 3.313334119013355e-06, "loss": 0.0906, "step": 29060 }, { "epoch": 0.8597030815638493, "grad_norm": 0.8703319430351257, "learning_rate": 3.3144743282838092e-06, "loss": 0.0857, "step": 29070 }, { "epoch": 0.8599988170580233, "grad_norm": 1.1620659828186035, "learning_rate": 3.315614537554264e-06, "loss": 0.0926, "step": 29080 }, { "epoch": 0.8602945525521973, "grad_norm": 1.0096948146820068, "learning_rate": 3.316754746824718e-06, "loss": 0.0851, "step": 29090 }, { "epoch": 0.8605902880463713, "grad_norm": 1.0180327892303467, "learning_rate": 3.3178949560951726e-06, "loss": 0.0878, "step": 29100 }, { "epoch": 0.8608860235405453, "grad_norm": 1.1035321950912476, "learning_rate": 3.319035165365627e-06, "loss": 0.1051, "step": 29110 }, { "epoch": 0.8611817590347194, "grad_norm": 0.9940634369850159, "learning_rate": 3.3201753746360813e-06, "loss": 0.0821, "step": 29120 }, { "epoch": 0.8614774945288933, "grad_norm": 0.8576096296310425, "learning_rate": 3.321315583906536e-06, "loss": 0.0917, "step": 29130 }, { "epoch": 0.8617732300230674, "grad_norm": 1.0667065382003784, "learning_rate": 3.3224557931769897e-06, "loss": 0.0781, "step": 29140 }, { "epoch": 0.8620689655172413, "grad_norm": 1.683735728263855, "learning_rate": 3.3235960024474442e-06, "loss": 0.0829, "step": 29150 }, { "epoch": 0.8623647010114154, "grad_norm": 1.5486739873886108, "learning_rate": 3.324736211717899e-06, "loss": 0.1017, "step": 29160 }, { "epoch": 0.8626604365055894, "grad_norm": 1.4657695293426514, "learning_rate": 3.325876420988353e-06, "loss": 0.0992, "step": 29170 }, { "epoch": 0.8629561719997634, "grad_norm": 1.4484615325927734, "learning_rate": 3.3270166302588076e-06, "loss": 0.0961, "step": 29180 }, { "epoch": 0.8632519074939374, "grad_norm": 0.7114478349685669, "learning_rate": 3.3281568395292617e-06, "loss": 0.0692, "step": 29190 }, { "epoch": 0.8635476429881115, "grad_norm": 0.9307709336280823, "learning_rate": 3.3292970487997163e-06, "loss": 0.0706, "step": 29200 }, { "epoch": 0.8638433784822854, "grad_norm": 1.4209626913070679, "learning_rate": 3.330437258070171e-06, "loss": 0.102, "step": 29210 }, { "epoch": 0.8641391139764595, "grad_norm": 1.6996301412582397, "learning_rate": 3.3315774673406247e-06, "loss": 0.1086, "step": 29220 }, { "epoch": 0.8644348494706334, "grad_norm": 0.7824845910072327, "learning_rate": 3.3327176766110793e-06, "loss": 0.0896, "step": 29230 }, { "epoch": 0.8647305849648075, "grad_norm": 0.9984441995620728, "learning_rate": 3.333857885881534e-06, "loss": 0.0895, "step": 29240 }, { "epoch": 0.8650263204589815, "grad_norm": 1.3723121881484985, "learning_rate": 3.334998095151988e-06, "loss": 0.0801, "step": 29250 }, { "epoch": 0.8653220559531555, "grad_norm": 0.7642185091972351, "learning_rate": 3.3361383044224426e-06, "loss": 0.0999, "step": 29260 }, { "epoch": 0.8656177914473295, "grad_norm": 1.1960481405258179, "learning_rate": 3.3372785136928968e-06, "loss": 0.0741, "step": 29270 }, { "epoch": 0.8659135269415035, "grad_norm": 1.6160444021224976, "learning_rate": 3.3384187229633514e-06, "loss": 0.0862, "step": 29280 }, { "epoch": 0.8662092624356775, "grad_norm": 1.3822909593582153, "learning_rate": 3.339558932233806e-06, "loss": 0.0954, "step": 29290 }, { "epoch": 0.8665049979298516, "grad_norm": 1.03216552734375, "learning_rate": 3.3406991415042597e-06, "loss": 0.0704, "step": 29300 }, { "epoch": 0.8668007334240255, "grad_norm": 1.5951682329177856, "learning_rate": 3.3418393507747143e-06, "loss": 0.1056, "step": 29310 }, { "epoch": 0.8670964689181996, "grad_norm": 0.9102626442909241, "learning_rate": 3.3429795600451684e-06, "loss": 0.0849, "step": 29320 }, { "epoch": 0.8673922044123735, "grad_norm": 1.0506025552749634, "learning_rate": 3.344119769315623e-06, "loss": 0.0855, "step": 29330 }, { "epoch": 0.8676879399065476, "grad_norm": 0.949600100517273, "learning_rate": 3.3452599785860776e-06, "loss": 0.0924, "step": 29340 }, { "epoch": 0.8679836754007216, "grad_norm": 1.0730003118515015, "learning_rate": 3.3464001878565318e-06, "loss": 0.0869, "step": 29350 }, { "epoch": 0.8682794108948956, "grad_norm": 1.030375361442566, "learning_rate": 3.3475403971269864e-06, "loss": 0.1063, "step": 29360 }, { "epoch": 0.8685751463890696, "grad_norm": 1.1395375728607178, "learning_rate": 3.3486806063974405e-06, "loss": 0.0888, "step": 29370 }, { "epoch": 0.8688708818832436, "grad_norm": 0.868425726890564, "learning_rate": 3.349820815667895e-06, "loss": 0.0789, "step": 29380 }, { "epoch": 0.8691666173774176, "grad_norm": 0.9360100626945496, "learning_rate": 3.3509610249383493e-06, "loss": 0.0672, "step": 29390 }, { "epoch": 0.8694623528715917, "grad_norm": 1.7258167266845703, "learning_rate": 3.3521012342088035e-06, "loss": 0.063, "step": 29400 }, { "epoch": 0.8697580883657656, "grad_norm": 1.2933807373046875, "learning_rate": 3.353241443479258e-06, "loss": 0.1072, "step": 29410 }, { "epoch": 0.8700538238599397, "grad_norm": 1.7249287366867065, "learning_rate": 3.354381652749712e-06, "loss": 0.0884, "step": 29420 }, { "epoch": 0.8703495593541137, "grad_norm": 1.1437556743621826, "learning_rate": 3.355521862020167e-06, "loss": 0.0937, "step": 29430 }, { "epoch": 0.8706452948482877, "grad_norm": 0.9203524589538574, "learning_rate": 3.3566620712906214e-06, "loss": 0.0838, "step": 29440 }, { "epoch": 0.8709410303424617, "grad_norm": 0.8881894946098328, "learning_rate": 3.3578022805610755e-06, "loss": 0.0696, "step": 29450 }, { "epoch": 0.8712367658366357, "grad_norm": 0.9474285840988159, "learning_rate": 3.35894248983153e-06, "loss": 0.1107, "step": 29460 }, { "epoch": 0.8715325013308097, "grad_norm": 1.484100341796875, "learning_rate": 3.3600826991019843e-06, "loss": 0.1054, "step": 29470 }, { "epoch": 0.8718282368249838, "grad_norm": 0.6944204568862915, "learning_rate": 3.3612229083724385e-06, "loss": 0.0836, "step": 29480 }, { "epoch": 0.8721239723191577, "grad_norm": 1.6535741090774536, "learning_rate": 3.362363117642893e-06, "loss": 0.0858, "step": 29490 }, { "epoch": 0.8724197078133318, "grad_norm": 0.7995972633361816, "learning_rate": 3.3635033269133472e-06, "loss": 0.0789, "step": 29500 }, { "epoch": 0.8727154433075057, "grad_norm": 0.8471816182136536, "learning_rate": 3.364643536183802e-06, "loss": 0.1054, "step": 29510 }, { "epoch": 0.8730111788016798, "grad_norm": 1.3625630140304565, "learning_rate": 3.3657837454542564e-06, "loss": 0.0816, "step": 29520 }, { "epoch": 0.8733069142958538, "grad_norm": 1.0227299928665161, "learning_rate": 3.3669239547247106e-06, "loss": 0.1277, "step": 29530 }, { "epoch": 0.8736026497900278, "grad_norm": 1.6419345140457153, "learning_rate": 3.368064163995165e-06, "loss": 0.0873, "step": 29540 }, { "epoch": 0.8738983852842018, "grad_norm": 0.8992114663124084, "learning_rate": 3.369204373265619e-06, "loss": 0.0783, "step": 29550 }, { "epoch": 0.8741941207783758, "grad_norm": 0.6693108677864075, "learning_rate": 3.3703445825360735e-06, "loss": 0.0832, "step": 29560 }, { "epoch": 0.8744898562725498, "grad_norm": 1.156882882118225, "learning_rate": 3.371484791806528e-06, "loss": 0.1152, "step": 29570 }, { "epoch": 0.8747855917667239, "grad_norm": 1.354270577430725, "learning_rate": 3.3726250010769822e-06, "loss": 0.0997, "step": 29580 }, { "epoch": 0.8750813272608978, "grad_norm": 0.5624622702598572, "learning_rate": 3.373765210347437e-06, "loss": 0.0828, "step": 29590 }, { "epoch": 0.8753770627550719, "grad_norm": 0.6708222031593323, "learning_rate": 3.374905419617891e-06, "loss": 0.0935, "step": 29600 }, { "epoch": 0.8756727982492458, "grad_norm": 1.266472578048706, "learning_rate": 3.3760456288883456e-06, "loss": 0.1129, "step": 29610 }, { "epoch": 0.8759685337434199, "grad_norm": 0.8908541202545166, "learning_rate": 3.3771858381588e-06, "loss": 0.0944, "step": 29620 }, { "epoch": 0.8762642692375939, "grad_norm": 1.2451874017715454, "learning_rate": 3.378326047429254e-06, "loss": 0.0839, "step": 29630 }, { "epoch": 0.8765600047317679, "grad_norm": 0.44477444887161255, "learning_rate": 3.3794662566997085e-06, "loss": 0.0791, "step": 29640 }, { "epoch": 0.8768557402259419, "grad_norm": 1.0744723081588745, "learning_rate": 3.3806064659701627e-06, "loss": 0.0828, "step": 29650 }, { "epoch": 0.877151475720116, "grad_norm": 1.1795405149459839, "learning_rate": 3.3817466752406173e-06, "loss": 0.1088, "step": 29660 }, { "epoch": 0.8774472112142899, "grad_norm": 1.0990387201309204, "learning_rate": 3.382886884511072e-06, "loss": 0.098, "step": 29670 }, { "epoch": 0.877742946708464, "grad_norm": 1.325495719909668, "learning_rate": 3.384027093781526e-06, "loss": 0.084, "step": 29680 }, { "epoch": 0.8780386822026379, "grad_norm": 0.45661401748657227, "learning_rate": 3.3851673030519806e-06, "loss": 0.0742, "step": 29690 }, { "epoch": 0.878334417696812, "grad_norm": 1.0333610773086548, "learning_rate": 3.386307512322435e-06, "loss": 0.0869, "step": 29700 }, { "epoch": 0.878630153190986, "grad_norm": 1.0913972854614258, "learning_rate": 3.387447721592889e-06, "loss": 0.1039, "step": 29710 }, { "epoch": 0.87892588868516, "grad_norm": 0.8351997137069702, "learning_rate": 3.3885879308633435e-06, "loss": 0.0968, "step": 29720 }, { "epoch": 0.879221624179334, "grad_norm": 0.936056911945343, "learning_rate": 3.3897281401337977e-06, "loss": 0.0943, "step": 29730 }, { "epoch": 0.879517359673508, "grad_norm": 1.1260251998901367, "learning_rate": 3.3908683494042523e-06, "loss": 0.0946, "step": 29740 }, { "epoch": 0.879813095167682, "grad_norm": 1.3656177520751953, "learning_rate": 3.392008558674707e-06, "loss": 0.0798, "step": 29750 }, { "epoch": 0.8801088306618561, "grad_norm": 1.550781011581421, "learning_rate": 3.393148767945161e-06, "loss": 0.1324, "step": 29760 }, { "epoch": 0.88040456615603, "grad_norm": 1.218519687652588, "learning_rate": 3.3942889772156156e-06, "loss": 0.0969, "step": 29770 }, { "epoch": 0.8807003016502041, "grad_norm": 0.7364262342453003, "learning_rate": 3.3954291864860698e-06, "loss": 0.0847, "step": 29780 }, { "epoch": 0.880996037144378, "grad_norm": 0.7835240364074707, "learning_rate": 3.396569395756524e-06, "loss": 0.1185, "step": 29790 }, { "epoch": 0.8812917726385521, "grad_norm": 0.9302436113357544, "learning_rate": 3.3977096050269785e-06, "loss": 0.0859, "step": 29800 }, { "epoch": 0.8815875081327261, "grad_norm": 0.9917539358139038, "learning_rate": 3.3988498142974327e-06, "loss": 0.0889, "step": 29810 }, { "epoch": 0.8818832436269001, "grad_norm": 1.0995113849639893, "learning_rate": 3.3999900235678873e-06, "loss": 0.0892, "step": 29820 }, { "epoch": 0.8821789791210741, "grad_norm": 1.0425570011138916, "learning_rate": 3.4011302328383414e-06, "loss": 0.0998, "step": 29830 }, { "epoch": 0.8824747146152481, "grad_norm": 0.9307745695114136, "learning_rate": 3.402270442108796e-06, "loss": 0.0978, "step": 29840 }, { "epoch": 0.8827704501094221, "grad_norm": 1.1586347818374634, "learning_rate": 3.4034106513792506e-06, "loss": 0.0727, "step": 29850 }, { "epoch": 0.8830661856035962, "grad_norm": 1.087112307548523, "learning_rate": 3.404550860649705e-06, "loss": 0.105, "step": 29860 }, { "epoch": 0.8833619210977701, "grad_norm": 1.3886489868164062, "learning_rate": 3.405691069920159e-06, "loss": 0.1099, "step": 29870 }, { "epoch": 0.8836576565919442, "grad_norm": 0.9384096264839172, "learning_rate": 3.406831279190613e-06, "loss": 0.0982, "step": 29880 }, { "epoch": 0.8839533920861182, "grad_norm": 0.927354633808136, "learning_rate": 3.4079714884610677e-06, "loss": 0.0926, "step": 29890 }, { "epoch": 0.8842491275802922, "grad_norm": 1.5356146097183228, "learning_rate": 3.4091116977315223e-06, "loss": 0.0844, "step": 29900 }, { "epoch": 0.8845448630744662, "grad_norm": 1.3543791770935059, "learning_rate": 3.4102519070019765e-06, "loss": 0.0977, "step": 29910 }, { "epoch": 0.8848405985686402, "grad_norm": 1.151032567024231, "learning_rate": 3.411392116272431e-06, "loss": 0.0964, "step": 29920 }, { "epoch": 0.8851363340628142, "grad_norm": 0.7754549384117126, "learning_rate": 3.4125323255428856e-06, "loss": 0.0835, "step": 29930 }, { "epoch": 0.8854320695569883, "grad_norm": 0.6949936747550964, "learning_rate": 3.41367253481334e-06, "loss": 0.0916, "step": 29940 }, { "epoch": 0.8857278050511622, "grad_norm": 1.6818535327911377, "learning_rate": 3.414812744083794e-06, "loss": 0.0879, "step": 29950 }, { "epoch": 0.8860235405453363, "grad_norm": 1.6927471160888672, "learning_rate": 3.415952953354248e-06, "loss": 0.1124, "step": 29960 }, { "epoch": 0.8863192760395102, "grad_norm": 0.7242215871810913, "learning_rate": 3.4170931626247027e-06, "loss": 0.0914, "step": 29970 }, { "epoch": 0.8866150115336843, "grad_norm": 1.4373902082443237, "learning_rate": 3.4182333718951573e-06, "loss": 0.0859, "step": 29980 }, { "epoch": 0.8869107470278583, "grad_norm": 0.671095073223114, "learning_rate": 3.4193735811656115e-06, "loss": 0.0784, "step": 29990 }, { "epoch": 0.8872064825220323, "grad_norm": 1.1446481943130493, "learning_rate": 3.420513790436066e-06, "loss": 0.0744, "step": 30000 }, { "epoch": 0.8875022180162063, "grad_norm": 0.652993381023407, "learning_rate": 3.4216539997065202e-06, "loss": 0.0973, "step": 30010 }, { "epoch": 0.8877979535103803, "grad_norm": 1.1387070417404175, "learning_rate": 3.422794208976975e-06, "loss": 0.09, "step": 30020 }, { "epoch": 0.8880936890045543, "grad_norm": 0.6783024668693542, "learning_rate": 3.423934418247429e-06, "loss": 0.0684, "step": 30030 }, { "epoch": 0.8883894244987284, "grad_norm": 0.7737658619880676, "learning_rate": 3.425074627517883e-06, "loss": 0.0726, "step": 30040 }, { "epoch": 0.8886851599929023, "grad_norm": 1.099435806274414, "learning_rate": 3.4262148367883377e-06, "loss": 0.0569, "step": 30050 }, { "epoch": 0.8889808954870764, "grad_norm": 1.0283626317977905, "learning_rate": 3.427355046058792e-06, "loss": 0.0991, "step": 30060 }, { "epoch": 0.8892766309812503, "grad_norm": 1.189589023590088, "learning_rate": 3.4284952553292465e-06, "loss": 0.0962, "step": 30070 }, { "epoch": 0.8895723664754244, "grad_norm": 1.0636745691299438, "learning_rate": 3.429635464599701e-06, "loss": 0.0873, "step": 30080 }, { "epoch": 0.8898681019695984, "grad_norm": 1.0394093990325928, "learning_rate": 3.4307756738701552e-06, "loss": 0.0797, "step": 30090 }, { "epoch": 0.8901638374637724, "grad_norm": 0.7689923048019409, "learning_rate": 3.43191588314061e-06, "loss": 0.0802, "step": 30100 }, { "epoch": 0.8904595729579464, "grad_norm": 1.4097331762313843, "learning_rate": 3.4330560924110636e-06, "loss": 0.0981, "step": 30110 }, { "epoch": 0.8907553084521205, "grad_norm": 1.111255407333374, "learning_rate": 3.434196301681518e-06, "loss": 0.0964, "step": 30120 }, { "epoch": 0.8910510439462944, "grad_norm": 1.1474651098251343, "learning_rate": 3.4353365109519728e-06, "loss": 0.0882, "step": 30130 }, { "epoch": 0.8913467794404685, "grad_norm": 0.8379011750221252, "learning_rate": 3.436476720222427e-06, "loss": 0.0829, "step": 30140 }, { "epoch": 0.8916425149346424, "grad_norm": 0.8988406658172607, "learning_rate": 3.4376169294928815e-06, "loss": 0.0745, "step": 30150 }, { "epoch": 0.8919382504288165, "grad_norm": 0.9906156063079834, "learning_rate": 3.438757138763336e-06, "loss": 0.0823, "step": 30160 }, { "epoch": 0.8922339859229905, "grad_norm": 1.924432396888733, "learning_rate": 3.4398973480337903e-06, "loss": 0.0877, "step": 30170 }, { "epoch": 0.8925297214171645, "grad_norm": 1.0702497959136963, "learning_rate": 3.441037557304245e-06, "loss": 0.0955, "step": 30180 }, { "epoch": 0.8928254569113385, "grad_norm": 0.7415854930877686, "learning_rate": 3.4421777665746986e-06, "loss": 0.0953, "step": 30190 }, { "epoch": 0.8931211924055125, "grad_norm": 1.5459080934524536, "learning_rate": 3.443317975845153e-06, "loss": 0.0822, "step": 30200 }, { "epoch": 0.8934169278996865, "grad_norm": 1.0539313554763794, "learning_rate": 3.4444581851156078e-06, "loss": 0.122, "step": 30210 }, { "epoch": 0.8937126633938606, "grad_norm": 0.7066173553466797, "learning_rate": 3.445598394386062e-06, "loss": 0.088, "step": 30220 }, { "epoch": 0.8940083988880345, "grad_norm": 0.99092036485672, "learning_rate": 3.4467386036565165e-06, "loss": 0.0967, "step": 30230 }, { "epoch": 0.8943041343822086, "grad_norm": 0.47124937176704407, "learning_rate": 3.4478788129269707e-06, "loss": 0.0972, "step": 30240 }, { "epoch": 0.8945998698763825, "grad_norm": 0.8522318601608276, "learning_rate": 3.4490190221974253e-06, "loss": 0.0755, "step": 30250 }, { "epoch": 0.8948956053705566, "grad_norm": 0.9233049154281616, "learning_rate": 3.45015923146788e-06, "loss": 0.1218, "step": 30260 }, { "epoch": 0.8951913408647306, "grad_norm": 1.1561837196350098, "learning_rate": 3.4512994407383336e-06, "loss": 0.0839, "step": 30270 }, { "epoch": 0.8954870763589046, "grad_norm": 1.2718477249145508, "learning_rate": 3.452439650008788e-06, "loss": 0.0761, "step": 30280 }, { "epoch": 0.8957828118530786, "grad_norm": 1.182649850845337, "learning_rate": 3.4535798592792424e-06, "loss": 0.0666, "step": 30290 }, { "epoch": 0.8960785473472526, "grad_norm": 1.2100799083709717, "learning_rate": 3.454720068549697e-06, "loss": 0.0723, "step": 30300 }, { "epoch": 0.8963742828414266, "grad_norm": 1.2679096460342407, "learning_rate": 3.4558602778201515e-06, "loss": 0.097, "step": 30310 }, { "epoch": 0.8966700183356007, "grad_norm": 0.8342288136482239, "learning_rate": 3.4570004870906057e-06, "loss": 0.0983, "step": 30320 }, { "epoch": 0.8969657538297746, "grad_norm": 0.6947275400161743, "learning_rate": 3.4581406963610603e-06, "loss": 0.0826, "step": 30330 }, { "epoch": 0.8972614893239487, "grad_norm": 1.612107515335083, "learning_rate": 3.4592809056315145e-06, "loss": 0.0803, "step": 30340 }, { "epoch": 0.8975572248181227, "grad_norm": 1.3411152362823486, "learning_rate": 3.4604211149019686e-06, "loss": 0.0698, "step": 30350 }, { "epoch": 0.8978529603122967, "grad_norm": 1.2748661041259766, "learning_rate": 3.461561324172423e-06, "loss": 0.0999, "step": 30360 }, { "epoch": 0.8981486958064707, "grad_norm": 0.9359475374221802, "learning_rate": 3.4627015334428774e-06, "loss": 0.112, "step": 30370 }, { "epoch": 0.8984444313006447, "grad_norm": 1.3744275569915771, "learning_rate": 3.463841742713332e-06, "loss": 0.0873, "step": 30380 }, { "epoch": 0.8987401667948187, "grad_norm": 1.1642259359359741, "learning_rate": 3.4649819519837866e-06, "loss": 0.077, "step": 30390 }, { "epoch": 0.8990359022889928, "grad_norm": 0.9001168012619019, "learning_rate": 3.4661221612542407e-06, "loss": 0.0669, "step": 30400 }, { "epoch": 0.8993316377831667, "grad_norm": 1.0666053295135498, "learning_rate": 3.4672623705246953e-06, "loss": 0.1078, "step": 30410 }, { "epoch": 0.8996273732773408, "grad_norm": 1.6528147459030151, "learning_rate": 3.4684025797951495e-06, "loss": 0.086, "step": 30420 }, { "epoch": 0.8999231087715147, "grad_norm": 1.0313191413879395, "learning_rate": 3.4695427890656036e-06, "loss": 0.0821, "step": 30430 }, { "epoch": 0.9002188442656888, "grad_norm": 1.6272763013839722, "learning_rate": 3.4706829983360582e-06, "loss": 0.0985, "step": 30440 }, { "epoch": 0.9005145797598628, "grad_norm": 4.719452381134033, "learning_rate": 3.4718232076065124e-06, "loss": 0.0852, "step": 30450 }, { "epoch": 0.9008103152540368, "grad_norm": 1.1173129081726074, "learning_rate": 3.472963416876967e-06, "loss": 0.0939, "step": 30460 }, { "epoch": 0.9011060507482108, "grad_norm": 0.8135040998458862, "learning_rate": 3.474103626147421e-06, "loss": 0.0864, "step": 30470 }, { "epoch": 0.9014017862423848, "grad_norm": 1.1691380739212036, "learning_rate": 3.4752438354178757e-06, "loss": 0.0818, "step": 30480 }, { "epoch": 0.9016975217365588, "grad_norm": 0.9931204915046692, "learning_rate": 3.4763840446883303e-06, "loss": 0.1028, "step": 30490 }, { "epoch": 0.9019932572307329, "grad_norm": 0.669613778591156, "learning_rate": 3.4775242539587845e-06, "loss": 0.0657, "step": 30500 }, { "epoch": 0.9022889927249068, "grad_norm": 0.8636003136634827, "learning_rate": 3.4786644632292387e-06, "loss": 0.0996, "step": 30510 }, { "epoch": 0.9025847282190809, "grad_norm": 1.5094749927520752, "learning_rate": 3.479804672499693e-06, "loss": 0.109, "step": 30520 }, { "epoch": 0.9028804637132548, "grad_norm": 1.2261532545089722, "learning_rate": 3.4809448817701474e-06, "loss": 0.0923, "step": 30530 }, { "epoch": 0.9031761992074289, "grad_norm": 1.004303216934204, "learning_rate": 3.482085091040602e-06, "loss": 0.0761, "step": 30540 }, { "epoch": 0.9034719347016029, "grad_norm": 1.5719361305236816, "learning_rate": 3.483225300311056e-06, "loss": 0.066, "step": 30550 }, { "epoch": 0.9037676701957769, "grad_norm": 1.0542378425598145, "learning_rate": 3.4843655095815107e-06, "loss": 0.087, "step": 30560 }, { "epoch": 0.9040634056899509, "grad_norm": 1.3101489543914795, "learning_rate": 3.485505718851965e-06, "loss": 0.1018, "step": 30570 }, { "epoch": 0.904359141184125, "grad_norm": 0.8746352195739746, "learning_rate": 3.4866459281224195e-06, "loss": 0.0857, "step": 30580 }, { "epoch": 0.9046548766782989, "grad_norm": 1.2746466398239136, "learning_rate": 3.487786137392874e-06, "loss": 0.0923, "step": 30590 }, { "epoch": 0.904950612172473, "grad_norm": 1.295785903930664, "learning_rate": 3.488926346663328e-06, "loss": 0.0874, "step": 30600 }, { "epoch": 0.9052463476666469, "grad_norm": 0.9879241585731506, "learning_rate": 3.4900665559337824e-06, "loss": 0.0933, "step": 30610 }, { "epoch": 0.905542083160821, "grad_norm": 1.7495671510696411, "learning_rate": 3.491206765204237e-06, "loss": 0.0846, "step": 30620 }, { "epoch": 0.905837818654995, "grad_norm": 0.903096616268158, "learning_rate": 3.492346974474691e-06, "loss": 0.0842, "step": 30630 }, { "epoch": 0.906133554149169, "grad_norm": 0.7447725534439087, "learning_rate": 3.4934871837451458e-06, "loss": 0.0785, "step": 30640 }, { "epoch": 0.906429289643343, "grad_norm": 0.9791775941848755, "learning_rate": 3.4946273930156e-06, "loss": 0.0846, "step": 30650 }, { "epoch": 0.906725025137517, "grad_norm": 1.1806743144989014, "learning_rate": 3.4957676022860545e-06, "loss": 0.0932, "step": 30660 }, { "epoch": 0.907020760631691, "grad_norm": 1.4913177490234375, "learning_rate": 3.496907811556509e-06, "loss": 0.0916, "step": 30670 }, { "epoch": 0.9073164961258651, "grad_norm": 1.1509301662445068, "learning_rate": 3.498048020826963e-06, "loss": 0.0878, "step": 30680 }, { "epoch": 0.907612231620039, "grad_norm": 0.7850432991981506, "learning_rate": 3.4991882300974174e-06, "loss": 0.1013, "step": 30690 }, { "epoch": 0.9079079671142131, "grad_norm": 1.1212830543518066, "learning_rate": 3.5003284393678716e-06, "loss": 0.0583, "step": 30700 }, { "epoch": 0.908203702608387, "grad_norm": 1.498188853263855, "learning_rate": 3.501468648638326e-06, "loss": 0.1043, "step": 30710 }, { "epoch": 0.9084994381025611, "grad_norm": 0.8611702919006348, "learning_rate": 3.5026088579087808e-06, "loss": 0.0844, "step": 30720 }, { "epoch": 0.9087951735967351, "grad_norm": 1.3164523839950562, "learning_rate": 3.503749067179235e-06, "loss": 0.094, "step": 30730 }, { "epoch": 0.9090909090909091, "grad_norm": 1.0044909715652466, "learning_rate": 3.5048892764496895e-06, "loss": 0.0731, "step": 30740 }, { "epoch": 0.9093866445850831, "grad_norm": 1.4250227212905884, "learning_rate": 3.5060294857201437e-06, "loss": 0.0723, "step": 30750 }, { "epoch": 0.9096823800792571, "grad_norm": 1.0445934534072876, "learning_rate": 3.507169694990598e-06, "loss": 0.0957, "step": 30760 }, { "epoch": 0.9099781155734311, "grad_norm": 1.3673603534698486, "learning_rate": 3.5083099042610525e-06, "loss": 0.1025, "step": 30770 }, { "epoch": 0.9102738510676052, "grad_norm": 1.0062732696533203, "learning_rate": 3.5094501135315066e-06, "loss": 0.0904, "step": 30780 }, { "epoch": 0.9105695865617791, "grad_norm": 0.9213483929634094, "learning_rate": 3.510590322801961e-06, "loss": 0.0883, "step": 30790 }, { "epoch": 0.9108653220559532, "grad_norm": 1.1033052206039429, "learning_rate": 3.5117305320724154e-06, "loss": 0.0707, "step": 30800 }, { "epoch": 0.9111610575501272, "grad_norm": 0.8978191614151001, "learning_rate": 3.51287074134287e-06, "loss": 0.104, "step": 30810 }, { "epoch": 0.9114567930443012, "grad_norm": 0.9667742848396301, "learning_rate": 3.5140109506133245e-06, "loss": 0.0921, "step": 30820 }, { "epoch": 0.9117525285384752, "grad_norm": 0.5837445259094238, "learning_rate": 3.5151511598837787e-06, "loss": 0.0924, "step": 30830 }, { "epoch": 0.9120482640326492, "grad_norm": 0.7570321559906006, "learning_rate": 3.516291369154233e-06, "loss": 0.078, "step": 30840 }, { "epoch": 0.9123439995268232, "grad_norm": 0.6397556066513062, "learning_rate": 3.5174315784246875e-06, "loss": 0.0637, "step": 30850 }, { "epoch": 0.9126397350209973, "grad_norm": 1.2871471643447876, "learning_rate": 3.5185717876951416e-06, "loss": 0.1003, "step": 30860 }, { "epoch": 0.9129354705151712, "grad_norm": 1.025410532951355, "learning_rate": 3.5197119969655962e-06, "loss": 0.0885, "step": 30870 }, { "epoch": 0.9132312060093453, "grad_norm": 0.98319011926651, "learning_rate": 3.5208522062360504e-06, "loss": 0.0898, "step": 30880 }, { "epoch": 0.9135269415035192, "grad_norm": 0.9887683987617493, "learning_rate": 3.521992415506505e-06, "loss": 0.088, "step": 30890 }, { "epoch": 0.9138226769976933, "grad_norm": 0.7016123533248901, "learning_rate": 3.5231326247769596e-06, "loss": 0.0758, "step": 30900 }, { "epoch": 0.9141184124918673, "grad_norm": 1.2908310890197754, "learning_rate": 3.5242728340474137e-06, "loss": 0.1096, "step": 30910 }, { "epoch": 0.9144141479860413, "grad_norm": 1.5970335006713867, "learning_rate": 3.525413043317868e-06, "loss": 0.1041, "step": 30920 }, { "epoch": 0.9147098834802153, "grad_norm": 0.8159674406051636, "learning_rate": 3.526553252588322e-06, "loss": 0.0907, "step": 30930 }, { "epoch": 0.9150056189743893, "grad_norm": 0.9399685859680176, "learning_rate": 3.5276934618587766e-06, "loss": 0.0753, "step": 30940 }, { "epoch": 0.9153013544685633, "grad_norm": 1.24210786819458, "learning_rate": 3.5288336711292312e-06, "loss": 0.0736, "step": 30950 }, { "epoch": 0.9155970899627374, "grad_norm": 1.1102250814437866, "learning_rate": 3.5299738803996854e-06, "loss": 0.0961, "step": 30960 }, { "epoch": 0.9158928254569113, "grad_norm": 0.9266155958175659, "learning_rate": 3.53111408967014e-06, "loss": 0.0937, "step": 30970 }, { "epoch": 0.9161885609510854, "grad_norm": 1.0422606468200684, "learning_rate": 3.532254298940594e-06, "loss": 0.0833, "step": 30980 }, { "epoch": 0.9164842964452593, "grad_norm": 1.2842589616775513, "learning_rate": 3.5333945082110487e-06, "loss": 0.0861, "step": 30990 }, { "epoch": 0.9167800319394334, "grad_norm": 0.9967207312583923, "learning_rate": 3.534534717481503e-06, "loss": 0.0832, "step": 31000 }, { "epoch": 0.9170757674336074, "grad_norm": 1.1859325170516968, "learning_rate": 3.535674926751957e-06, "loss": 0.1106, "step": 31010 }, { "epoch": 0.9173715029277814, "grad_norm": 1.076247215270996, "learning_rate": 3.5368151360224117e-06, "loss": 0.1003, "step": 31020 }, { "epoch": 0.9176672384219554, "grad_norm": 1.221063494682312, "learning_rate": 3.537955345292866e-06, "loss": 0.0978, "step": 31030 }, { "epoch": 0.9179629739161295, "grad_norm": 1.2143890857696533, "learning_rate": 3.5390955545633204e-06, "loss": 0.0986, "step": 31040 }, { "epoch": 0.9182587094103034, "grad_norm": 0.6716635227203369, "learning_rate": 3.540235763833775e-06, "loss": 0.0804, "step": 31050 }, { "epoch": 0.9185544449044775, "grad_norm": 1.1457937955856323, "learning_rate": 3.541375973104229e-06, "loss": 0.0849, "step": 31060 }, { "epoch": 0.9188501803986514, "grad_norm": 1.3999371528625488, "learning_rate": 3.5425161823746838e-06, "loss": 0.1005, "step": 31070 }, { "epoch": 0.9191459158928255, "grad_norm": 1.4750678539276123, "learning_rate": 3.543656391645138e-06, "loss": 0.0908, "step": 31080 }, { "epoch": 0.9194416513869995, "grad_norm": 0.6286326050758362, "learning_rate": 3.544796600915592e-06, "loss": 0.097, "step": 31090 }, { "epoch": 0.9197373868811735, "grad_norm": 0.9858618378639221, "learning_rate": 3.5459368101860467e-06, "loss": 0.0656, "step": 31100 }, { "epoch": 0.9200331223753475, "grad_norm": 1.135019302368164, "learning_rate": 3.547077019456501e-06, "loss": 0.0921, "step": 31110 }, { "epoch": 0.9203288578695215, "grad_norm": 0.9433118104934692, "learning_rate": 3.5482172287269554e-06, "loss": 0.0904, "step": 31120 }, { "epoch": 0.9206245933636955, "grad_norm": 0.9827017188072205, "learning_rate": 3.54935743799741e-06, "loss": 0.0984, "step": 31130 }, { "epoch": 0.9209203288578696, "grad_norm": 1.3592634201049805, "learning_rate": 3.550497647267864e-06, "loss": 0.0859, "step": 31140 }, { "epoch": 0.9212160643520435, "grad_norm": 1.4529117345809937, "learning_rate": 3.5516378565383188e-06, "loss": 0.0802, "step": 31150 }, { "epoch": 0.9215117998462176, "grad_norm": 0.9888778924942017, "learning_rate": 3.5527780658087725e-06, "loss": 0.1015, "step": 31160 }, { "epoch": 0.9218075353403915, "grad_norm": 1.1395801305770874, "learning_rate": 3.553918275079227e-06, "loss": 0.0761, "step": 31170 }, { "epoch": 0.9221032708345656, "grad_norm": 1.6717593669891357, "learning_rate": 3.5550584843496817e-06, "loss": 0.0919, "step": 31180 }, { "epoch": 0.9223990063287396, "grad_norm": 0.8588816523551941, "learning_rate": 3.556198693620136e-06, "loss": 0.0873, "step": 31190 }, { "epoch": 0.9226947418229136, "grad_norm": 1.5387102365493774, "learning_rate": 3.5573389028905904e-06, "loss": 0.1012, "step": 31200 }, { "epoch": 0.9229904773170876, "grad_norm": 1.2361953258514404, "learning_rate": 3.5584791121610446e-06, "loss": 0.1443, "step": 31210 }, { "epoch": 0.9232862128112616, "grad_norm": 1.4518091678619385, "learning_rate": 3.559619321431499e-06, "loss": 0.1112, "step": 31220 }, { "epoch": 0.9235819483054356, "grad_norm": 1.0120344161987305, "learning_rate": 3.560759530701954e-06, "loss": 0.118, "step": 31230 }, { "epoch": 0.9238776837996097, "grad_norm": 0.8620127439498901, "learning_rate": 3.5618997399724075e-06, "loss": 0.0995, "step": 31240 }, { "epoch": 0.9241734192937836, "grad_norm": 0.5190497040748596, "learning_rate": 3.563039949242862e-06, "loss": 0.0724, "step": 31250 }, { "epoch": 0.9244691547879577, "grad_norm": 0.755092442035675, "learning_rate": 3.5641801585133163e-06, "loss": 0.1028, "step": 31260 }, { "epoch": 0.9247648902821317, "grad_norm": 0.9554533362388611, "learning_rate": 3.565320367783771e-06, "loss": 0.0883, "step": 31270 }, { "epoch": 0.9250606257763057, "grad_norm": 2.2155511379241943, "learning_rate": 3.5664605770542255e-06, "loss": 0.0864, "step": 31280 }, { "epoch": 0.9253563612704797, "grad_norm": 1.1192466020584106, "learning_rate": 3.5676007863246796e-06, "loss": 0.0811, "step": 31290 }, { "epoch": 0.9256520967646537, "grad_norm": 1.6851387023925781, "learning_rate": 3.5687409955951342e-06, "loss": 0.0904, "step": 31300 }, { "epoch": 0.9259478322588277, "grad_norm": 1.2397288084030151, "learning_rate": 3.569881204865589e-06, "loss": 0.1076, "step": 31310 }, { "epoch": 0.9262435677530018, "grad_norm": 1.1864025592803955, "learning_rate": 3.5710214141360425e-06, "loss": 0.0898, "step": 31320 }, { "epoch": 0.9265393032471757, "grad_norm": 0.9981522560119629, "learning_rate": 3.572161623406497e-06, "loss": 0.0883, "step": 31330 }, { "epoch": 0.9268350387413498, "grad_norm": 0.8990921974182129, "learning_rate": 3.5733018326769513e-06, "loss": 0.0858, "step": 31340 }, { "epoch": 0.9271307742355237, "grad_norm": 0.7237204313278198, "learning_rate": 3.574442041947406e-06, "loss": 0.0825, "step": 31350 }, { "epoch": 0.9274265097296978, "grad_norm": 1.0605852603912354, "learning_rate": 3.5755822512178605e-06, "loss": 0.0931, "step": 31360 }, { "epoch": 0.9277222452238718, "grad_norm": 0.7056562304496765, "learning_rate": 3.5767224604883146e-06, "loss": 0.1008, "step": 31370 }, { "epoch": 0.9280179807180458, "grad_norm": 0.9061305522918701, "learning_rate": 3.5778626697587692e-06, "loss": 0.0711, "step": 31380 }, { "epoch": 0.9283137162122198, "grad_norm": 1.1271854639053345, "learning_rate": 3.5790028790292234e-06, "loss": 0.0776, "step": 31390 }, { "epoch": 0.9286094517063938, "grad_norm": 1.1317311525344849, "learning_rate": 3.5801430882996776e-06, "loss": 0.0765, "step": 31400 }, { "epoch": 0.9289051872005678, "grad_norm": 0.927709698677063, "learning_rate": 3.581283297570132e-06, "loss": 0.083, "step": 31410 }, { "epoch": 0.9292009226947419, "grad_norm": 1.653266429901123, "learning_rate": 3.5824235068405863e-06, "loss": 0.1118, "step": 31420 }, { "epoch": 0.9294966581889158, "grad_norm": 1.357213020324707, "learning_rate": 3.583563716111041e-06, "loss": 0.1027, "step": 31430 }, { "epoch": 0.9297923936830899, "grad_norm": 0.6362748146057129, "learning_rate": 3.584703925381495e-06, "loss": 0.0796, "step": 31440 }, { "epoch": 0.9300881291772638, "grad_norm": 1.262570858001709, "learning_rate": 3.5858441346519497e-06, "loss": 0.0816, "step": 31450 }, { "epoch": 0.9303838646714379, "grad_norm": 0.8687348365783691, "learning_rate": 3.5869843439224042e-06, "loss": 0.0886, "step": 31460 }, { "epoch": 0.9306796001656119, "grad_norm": 1.0872610807418823, "learning_rate": 3.5881245531928584e-06, "loss": 0.098, "step": 31470 }, { "epoch": 0.9309753356597859, "grad_norm": 1.0649622678756714, "learning_rate": 3.5892647624633126e-06, "loss": 0.0894, "step": 31480 }, { "epoch": 0.9312710711539599, "grad_norm": 0.9163088202476501, "learning_rate": 3.5904049717337667e-06, "loss": 0.0911, "step": 31490 }, { "epoch": 0.931566806648134, "grad_norm": 0.7383618354797363, "learning_rate": 3.5915451810042213e-06, "loss": 0.0788, "step": 31500 }, { "epoch": 0.9318625421423079, "grad_norm": 1.083137035369873, "learning_rate": 3.592685390274676e-06, "loss": 0.1014, "step": 31510 }, { "epoch": 0.932158277636482, "grad_norm": 0.9665963649749756, "learning_rate": 3.59382559954513e-06, "loss": 0.0946, "step": 31520 }, { "epoch": 0.9324540131306559, "grad_norm": 1.0741479396820068, "learning_rate": 3.5949658088155847e-06, "loss": 0.1079, "step": 31530 }, { "epoch": 0.93274974862483, "grad_norm": 0.8945116996765137, "learning_rate": 3.5961060180860393e-06, "loss": 0.0864, "step": 31540 }, { "epoch": 0.933045484119004, "grad_norm": 0.7551920413970947, "learning_rate": 3.5972462273564934e-06, "loss": 0.0833, "step": 31550 }, { "epoch": 0.933341219613178, "grad_norm": 1.6400471925735474, "learning_rate": 3.5983864366269476e-06, "loss": 0.0798, "step": 31560 }, { "epoch": 0.933636955107352, "grad_norm": 0.9972163438796997, "learning_rate": 3.5995266458974018e-06, "loss": 0.0892, "step": 31570 }, { "epoch": 0.933932690601526, "grad_norm": 1.0230592489242554, "learning_rate": 3.6006668551678563e-06, "loss": 0.0816, "step": 31580 }, { "epoch": 0.9342284260957, "grad_norm": 0.8049185872077942, "learning_rate": 3.601807064438311e-06, "loss": 0.0872, "step": 31590 }, { "epoch": 0.9345241615898741, "grad_norm": 0.9392024874687195, "learning_rate": 3.602947273708765e-06, "loss": 0.0638, "step": 31600 }, { "epoch": 0.934819897084048, "grad_norm": 1.2008756399154663, "learning_rate": 3.6040874829792197e-06, "loss": 0.103, "step": 31610 }, { "epoch": 0.9351156325782221, "grad_norm": 1.2685706615447998, "learning_rate": 3.605227692249674e-06, "loss": 0.0979, "step": 31620 }, { "epoch": 0.935411368072396, "grad_norm": 0.9870694875717163, "learning_rate": 3.6063679015201284e-06, "loss": 0.095, "step": 31630 }, { "epoch": 0.93570710356657, "grad_norm": 1.1903905868530273, "learning_rate": 3.6075081107905826e-06, "loss": 0.091, "step": 31640 }, { "epoch": 0.9360028390607441, "grad_norm": 1.276780128479004, "learning_rate": 3.6086483200610368e-06, "loss": 0.0594, "step": 31650 }, { "epoch": 0.936298574554918, "grad_norm": 0.8430718183517456, "learning_rate": 3.6097885293314914e-06, "loss": 0.091, "step": 31660 }, { "epoch": 0.9365943100490921, "grad_norm": 1.7596632242202759, "learning_rate": 3.6109287386019455e-06, "loss": 0.1107, "step": 31670 }, { "epoch": 0.936890045543266, "grad_norm": 1.4037971496582031, "learning_rate": 3.6120689478724e-06, "loss": 0.0781, "step": 31680 }, { "epoch": 0.9371857810374401, "grad_norm": 1.2519956827163696, "learning_rate": 3.6132091571428547e-06, "loss": 0.0804, "step": 31690 }, { "epoch": 0.9374815165316142, "grad_norm": 1.093662977218628, "learning_rate": 3.614349366413309e-06, "loss": 0.0698, "step": 31700 }, { "epoch": 0.9377772520257881, "grad_norm": 0.8132985830307007, "learning_rate": 3.6154895756837635e-06, "loss": 0.105, "step": 31710 }, { "epoch": 0.9380729875199622, "grad_norm": 1.0408258438110352, "learning_rate": 3.616629784954217e-06, "loss": 0.0828, "step": 31720 }, { "epoch": 0.9383687230141362, "grad_norm": 1.0263038873672485, "learning_rate": 3.617769994224672e-06, "loss": 0.088, "step": 31730 }, { "epoch": 0.9386644585083102, "grad_norm": 0.7106947302818298, "learning_rate": 3.6189102034951264e-06, "loss": 0.0906, "step": 31740 }, { "epoch": 0.9389601940024842, "grad_norm": 0.8263773918151855, "learning_rate": 3.6200504127655805e-06, "loss": 0.0985, "step": 31750 }, { "epoch": 0.9392559294966581, "grad_norm": 1.0953577756881714, "learning_rate": 3.621190622036035e-06, "loss": 0.1045, "step": 31760 }, { "epoch": 0.9395516649908322, "grad_norm": 1.1056044101715088, "learning_rate": 3.6223308313064897e-06, "loss": 0.0972, "step": 31770 }, { "epoch": 0.9398474004850063, "grad_norm": 0.7742610573768616, "learning_rate": 3.623471040576944e-06, "loss": 0.0991, "step": 31780 }, { "epoch": 0.9401431359791802, "grad_norm": 1.018146276473999, "learning_rate": 3.6246112498473985e-06, "loss": 0.0741, "step": 31790 }, { "epoch": 0.9404388714733543, "grad_norm": 1.7716717720031738, "learning_rate": 3.6257514591178522e-06, "loss": 0.0799, "step": 31800 }, { "epoch": 0.9407346069675282, "grad_norm": 1.1313862800598145, "learning_rate": 3.626891668388307e-06, "loss": 0.114, "step": 31810 }, { "epoch": 0.9410303424617023, "grad_norm": 0.7634530663490295, "learning_rate": 3.6280318776587614e-06, "loss": 0.09, "step": 31820 }, { "epoch": 0.9413260779558763, "grad_norm": 1.0869756937026978, "learning_rate": 3.6291720869292156e-06, "loss": 0.0912, "step": 31830 }, { "epoch": 0.9416218134500502, "grad_norm": 0.8219099044799805, "learning_rate": 3.63031229619967e-06, "loss": 0.0844, "step": 31840 }, { "epoch": 0.9419175489442243, "grad_norm": 1.7817639112472534, "learning_rate": 3.6314525054701243e-06, "loss": 0.0853, "step": 31850 }, { "epoch": 0.9422132844383982, "grad_norm": 0.8801596760749817, "learning_rate": 3.632592714740579e-06, "loss": 0.1065, "step": 31860 }, { "epoch": 0.9425090199325723, "grad_norm": 0.9884043335914612, "learning_rate": 3.6337329240110335e-06, "loss": 0.0938, "step": 31870 }, { "epoch": 0.9428047554267464, "grad_norm": 1.0996230840682983, "learning_rate": 3.6348731332814872e-06, "loss": 0.0842, "step": 31880 }, { "epoch": 0.9431004909209203, "grad_norm": 0.7306059002876282, "learning_rate": 3.636013342551942e-06, "loss": 0.0775, "step": 31890 }, { "epoch": 0.9433962264150944, "grad_norm": 0.8661320209503174, "learning_rate": 3.637153551822396e-06, "loss": 0.0928, "step": 31900 }, { "epoch": 0.9436919619092683, "grad_norm": 0.9448305368423462, "learning_rate": 3.6382937610928506e-06, "loss": 0.0833, "step": 31910 }, { "epoch": 0.9439876974034423, "grad_norm": 1.3038578033447266, "learning_rate": 3.639433970363305e-06, "loss": 0.111, "step": 31920 }, { "epoch": 0.9442834328976164, "grad_norm": 0.8681148886680603, "learning_rate": 3.6405741796337593e-06, "loss": 0.0879, "step": 31930 }, { "epoch": 0.9445791683917903, "grad_norm": 1.513535737991333, "learning_rate": 3.641714388904214e-06, "loss": 0.0848, "step": 31940 }, { "epoch": 0.9448749038859644, "grad_norm": 1.230508804321289, "learning_rate": 3.642854598174668e-06, "loss": 0.0728, "step": 31950 }, { "epoch": 0.9451706393801385, "grad_norm": 1.0422313213348389, "learning_rate": 3.6439948074451227e-06, "loss": 0.0955, "step": 31960 }, { "epoch": 0.9454663748743124, "grad_norm": 1.3341681957244873, "learning_rate": 3.645135016715577e-06, "loss": 0.0814, "step": 31970 }, { "epoch": 0.9457621103684865, "grad_norm": 0.9917130470275879, "learning_rate": 3.646275225986031e-06, "loss": 0.0829, "step": 31980 }, { "epoch": 0.9460578458626604, "grad_norm": 1.1748452186584473, "learning_rate": 3.6474154352564856e-06, "loss": 0.0874, "step": 31990 }, { "epoch": 0.9463535813568345, "grad_norm": 1.0435839891433716, "learning_rate": 3.64855564452694e-06, "loss": 0.0737, "step": 32000 }, { "epoch": 0.9466493168510085, "grad_norm": 1.5472586154937744, "learning_rate": 3.6496958537973943e-06, "loss": 0.1053, "step": 32010 }, { "epoch": 0.9469450523451824, "grad_norm": 0.8392290472984314, "learning_rate": 3.650836063067849e-06, "loss": 0.0891, "step": 32020 }, { "epoch": 0.9472407878393565, "grad_norm": 0.48291635513305664, "learning_rate": 3.651976272338303e-06, "loss": 0.0895, "step": 32030 }, { "epoch": 0.9475365233335304, "grad_norm": 0.8281598687171936, "learning_rate": 3.6531164816087577e-06, "loss": 0.0712, "step": 32040 }, { "epoch": 0.9478322588277045, "grad_norm": 2.0901505947113037, "learning_rate": 3.654256690879212e-06, "loss": 0.0783, "step": 32050 }, { "epoch": 0.9481279943218786, "grad_norm": 1.3382147550582886, "learning_rate": 3.655396900149666e-06, "loss": 0.084, "step": 32060 }, { "epoch": 0.9484237298160525, "grad_norm": 1.4235990047454834, "learning_rate": 3.6565371094201206e-06, "loss": 0.0928, "step": 32070 }, { "epoch": 0.9487194653102266, "grad_norm": 1.051181435585022, "learning_rate": 3.6576773186905748e-06, "loss": 0.0809, "step": 32080 }, { "epoch": 0.9490152008044005, "grad_norm": 0.8957644701004028, "learning_rate": 3.6588175279610294e-06, "loss": 0.0852, "step": 32090 }, { "epoch": 0.9493109362985745, "grad_norm": 1.0393179655075073, "learning_rate": 3.659957737231484e-06, "loss": 0.0863, "step": 32100 }, { "epoch": 0.9496066717927486, "grad_norm": 0.8882966637611389, "learning_rate": 3.661097946501938e-06, "loss": 0.1004, "step": 32110 }, { "epoch": 0.9499024072869225, "grad_norm": 1.5053455829620361, "learning_rate": 3.6622381557723927e-06, "loss": 0.0794, "step": 32120 }, { "epoch": 0.9501981427810966, "grad_norm": 0.550484836101532, "learning_rate": 3.6633783650428464e-06, "loss": 0.0784, "step": 32130 }, { "epoch": 0.9504938782752705, "grad_norm": 0.7234815359115601, "learning_rate": 3.664518574313301e-06, "loss": 0.076, "step": 32140 }, { "epoch": 0.9507896137694446, "grad_norm": 1.0996745824813843, "learning_rate": 3.6656587835837556e-06, "loss": 0.0795, "step": 32150 }, { "epoch": 0.9510853492636187, "grad_norm": 0.9202247262001038, "learning_rate": 3.6667989928542098e-06, "loss": 0.0961, "step": 32160 }, { "epoch": 0.9513810847577926, "grad_norm": 0.681145966053009, "learning_rate": 3.6679392021246644e-06, "loss": 0.0717, "step": 32170 }, { "epoch": 0.9516768202519666, "grad_norm": 0.9700123071670532, "learning_rate": 3.6690794113951185e-06, "loss": 0.0836, "step": 32180 }, { "epoch": 0.9519725557461407, "grad_norm": 0.763964831829071, "learning_rate": 3.670219620665573e-06, "loss": 0.0918, "step": 32190 }, { "epoch": 0.9522682912403146, "grad_norm": 0.9986066818237305, "learning_rate": 3.6713598299360277e-06, "loss": 0.093, "step": 32200 }, { "epoch": 0.9525640267344887, "grad_norm": 0.7618060111999512, "learning_rate": 3.6725000392064815e-06, "loss": 0.0811, "step": 32210 }, { "epoch": 0.9528597622286626, "grad_norm": 0.9380999803543091, "learning_rate": 3.673640248476936e-06, "loss": 0.0876, "step": 32220 }, { "epoch": 0.9531554977228367, "grad_norm": 0.9140825271606445, "learning_rate": 3.6747804577473906e-06, "loss": 0.0837, "step": 32230 }, { "epoch": 0.9534512332170108, "grad_norm": 1.175866723060608, "learning_rate": 3.675920667017845e-06, "loss": 0.0734, "step": 32240 }, { "epoch": 0.9537469687111847, "grad_norm": 1.5516200065612793, "learning_rate": 3.6770608762882994e-06, "loss": 0.0798, "step": 32250 }, { "epoch": 0.9540427042053587, "grad_norm": 1.1102012395858765, "learning_rate": 3.6782010855587536e-06, "loss": 0.1087, "step": 32260 }, { "epoch": 0.9543384396995327, "grad_norm": 0.8658073544502258, "learning_rate": 3.679341294829208e-06, "loss": 0.0764, "step": 32270 }, { "epoch": 0.9546341751937067, "grad_norm": 1.1901572942733765, "learning_rate": 3.6804815040996627e-06, "loss": 0.0731, "step": 32280 }, { "epoch": 0.9549299106878808, "grad_norm": 0.6634753942489624, "learning_rate": 3.6816217133701165e-06, "loss": 0.092, "step": 32290 }, { "epoch": 0.9552256461820547, "grad_norm": 1.8084944486618042, "learning_rate": 3.682761922640571e-06, "loss": 0.0698, "step": 32300 }, { "epoch": 0.9555213816762288, "grad_norm": 1.0574095249176025, "learning_rate": 3.6839021319110252e-06, "loss": 0.1058, "step": 32310 }, { "epoch": 0.9558171171704027, "grad_norm": 0.6597110629081726, "learning_rate": 3.68504234118148e-06, "loss": 0.0998, "step": 32320 }, { "epoch": 0.9561128526645768, "grad_norm": 1.3246455192565918, "learning_rate": 3.6861825504519344e-06, "loss": 0.0868, "step": 32330 }, { "epoch": 0.9564085881587508, "grad_norm": 1.1100624799728394, "learning_rate": 3.6873227597223886e-06, "loss": 0.0938, "step": 32340 }, { "epoch": 0.9567043236529248, "grad_norm": 0.9776247143745422, "learning_rate": 3.688462968992843e-06, "loss": 0.0714, "step": 32350 }, { "epoch": 0.9570000591470988, "grad_norm": 1.4161086082458496, "learning_rate": 3.6896031782632973e-06, "loss": 0.1003, "step": 32360 }, { "epoch": 0.9572957946412728, "grad_norm": 0.7094051837921143, "learning_rate": 3.6907433875337515e-06, "loss": 0.0755, "step": 32370 }, { "epoch": 0.9575915301354468, "grad_norm": 1.066138505935669, "learning_rate": 3.691883596804206e-06, "loss": 0.1027, "step": 32380 }, { "epoch": 0.9578872656296209, "grad_norm": 1.2404260635375977, "learning_rate": 3.6930238060746602e-06, "loss": 0.0839, "step": 32390 }, { "epoch": 0.9581830011237948, "grad_norm": 0.8477832078933716, "learning_rate": 3.694164015345115e-06, "loss": 0.0893, "step": 32400 }, { "epoch": 0.9584787366179689, "grad_norm": 1.6316337585449219, "learning_rate": 3.695304224615569e-06, "loss": 0.0923, "step": 32410 }, { "epoch": 0.958774472112143, "grad_norm": 1.1409099102020264, "learning_rate": 3.6964444338860236e-06, "loss": 0.0939, "step": 32420 }, { "epoch": 0.9590702076063169, "grad_norm": 0.9340177178382874, "learning_rate": 3.697584643156478e-06, "loss": 0.0842, "step": 32430 }, { "epoch": 0.959365943100491, "grad_norm": 0.9561015963554382, "learning_rate": 3.6987248524269323e-06, "loss": 0.0906, "step": 32440 }, { "epoch": 0.9596616785946649, "grad_norm": 0.8668437600135803, "learning_rate": 3.6998650616973865e-06, "loss": 0.0679, "step": 32450 }, { "epoch": 0.959957414088839, "grad_norm": 0.7773417830467224, "learning_rate": 3.701005270967841e-06, "loss": 0.0981, "step": 32460 }, { "epoch": 0.960253149583013, "grad_norm": 1.1554774045944214, "learning_rate": 3.7021454802382953e-06, "loss": 0.0843, "step": 32470 }, { "epoch": 0.9605488850771869, "grad_norm": 0.8638333678245544, "learning_rate": 3.70328568950875e-06, "loss": 0.0957, "step": 32480 }, { "epoch": 0.960844620571361, "grad_norm": 1.1542717218399048, "learning_rate": 3.704425898779204e-06, "loss": 0.0765, "step": 32490 }, { "epoch": 0.9611403560655349, "grad_norm": 0.7606458067893982, "learning_rate": 3.7055661080496586e-06, "loss": 0.0696, "step": 32500 }, { "epoch": 0.961436091559709, "grad_norm": 1.2031925916671753, "learning_rate": 3.706706317320113e-06, "loss": 0.1065, "step": 32510 }, { "epoch": 0.961731827053883, "grad_norm": 0.9595174193382263, "learning_rate": 3.7078465265905674e-06, "loss": 0.0806, "step": 32520 }, { "epoch": 0.962027562548057, "grad_norm": 1.1035728454589844, "learning_rate": 3.7089867358610215e-06, "loss": 0.0967, "step": 32530 }, { "epoch": 0.962323298042231, "grad_norm": 0.786786675453186, "learning_rate": 3.7101269451314757e-06, "loss": 0.0721, "step": 32540 }, { "epoch": 0.962619033536405, "grad_norm": 0.9830008149147034, "learning_rate": 3.7112671544019303e-06, "loss": 0.0738, "step": 32550 }, { "epoch": 0.962914769030579, "grad_norm": 1.0732156038284302, "learning_rate": 3.712407363672385e-06, "loss": 0.0917, "step": 32560 }, { "epoch": 0.9632105045247531, "grad_norm": 0.723501980304718, "learning_rate": 3.713547572942839e-06, "loss": 0.0954, "step": 32570 }, { "epoch": 0.963506240018927, "grad_norm": 1.3515383005142212, "learning_rate": 3.7146877822132936e-06, "loss": 0.0948, "step": 32580 }, { "epoch": 0.9638019755131011, "grad_norm": 1.0565015077590942, "learning_rate": 3.7158279914837478e-06, "loss": 0.0826, "step": 32590 }, { "epoch": 0.964097711007275, "grad_norm": 1.395154595375061, "learning_rate": 3.7169682007542024e-06, "loss": 0.082, "step": 32600 }, { "epoch": 0.9643934465014491, "grad_norm": 1.179937720298767, "learning_rate": 3.7181084100246565e-06, "loss": 0.0942, "step": 32610 }, { "epoch": 0.9646891819956231, "grad_norm": 0.8489545583724976, "learning_rate": 3.7192486192951107e-06, "loss": 0.0881, "step": 32620 }, { "epoch": 0.9649849174897971, "grad_norm": 0.827660858631134, "learning_rate": 3.7203888285655653e-06, "loss": 0.1038, "step": 32630 }, { "epoch": 0.9652806529839711, "grad_norm": 0.7487171292304993, "learning_rate": 3.7215290378360195e-06, "loss": 0.0633, "step": 32640 }, { "epoch": 0.9655763884781452, "grad_norm": 0.7002878785133362, "learning_rate": 3.722669247106474e-06, "loss": 0.0666, "step": 32650 }, { "epoch": 0.9658721239723191, "grad_norm": 1.5014067888259888, "learning_rate": 3.7238094563769286e-06, "loss": 0.1005, "step": 32660 }, { "epoch": 0.9661678594664932, "grad_norm": 1.4769115447998047, "learning_rate": 3.724949665647383e-06, "loss": 0.0982, "step": 32670 }, { "epoch": 0.9664635949606671, "grad_norm": 0.9994655251502991, "learning_rate": 3.7260898749178374e-06, "loss": 0.0795, "step": 32680 }, { "epoch": 0.9667593304548412, "grad_norm": 0.8914806842803955, "learning_rate": 3.7272300841882916e-06, "loss": 0.0655, "step": 32690 }, { "epoch": 0.9670550659490152, "grad_norm": 1.0176348686218262, "learning_rate": 3.7283702934587457e-06, "loss": 0.066, "step": 32700 }, { "epoch": 0.9673508014431892, "grad_norm": 0.9311741590499878, "learning_rate": 3.7295105027292003e-06, "loss": 0.1088, "step": 32710 }, { "epoch": 0.9676465369373632, "grad_norm": 1.22019362449646, "learning_rate": 3.7306507119996545e-06, "loss": 0.1045, "step": 32720 }, { "epoch": 0.9679422724315372, "grad_norm": 0.46544909477233887, "learning_rate": 3.731790921270109e-06, "loss": 0.0841, "step": 32730 }, { "epoch": 0.9682380079257112, "grad_norm": 0.5333192944526672, "learning_rate": 3.7329311305405636e-06, "loss": 0.0828, "step": 32740 }, { "epoch": 0.9685337434198853, "grad_norm": 1.4887081384658813, "learning_rate": 3.734071339811018e-06, "loss": 0.1052, "step": 32750 }, { "epoch": 0.9688294789140592, "grad_norm": 1.3435593843460083, "learning_rate": 3.7352115490814724e-06, "loss": 0.1094, "step": 32760 }, { "epoch": 0.9691252144082333, "grad_norm": 1.0503370761871338, "learning_rate": 3.736351758351926e-06, "loss": 0.103, "step": 32770 }, { "epoch": 0.9694209499024072, "grad_norm": 0.7483175992965698, "learning_rate": 3.7374919676223807e-06, "loss": 0.0897, "step": 32780 }, { "epoch": 0.9697166853965813, "grad_norm": 1.1714608669281006, "learning_rate": 3.7386321768928353e-06, "loss": 0.08, "step": 32790 }, { "epoch": 0.9700124208907553, "grad_norm": 1.0818612575531006, "learning_rate": 3.7397723861632895e-06, "loss": 0.069, "step": 32800 }, { "epoch": 0.9703081563849293, "grad_norm": 1.3665759563446045, "learning_rate": 3.740912595433744e-06, "loss": 0.1073, "step": 32810 }, { "epoch": 0.9706038918791033, "grad_norm": 0.9680915474891663, "learning_rate": 3.7420528047041982e-06, "loss": 0.0899, "step": 32820 }, { "epoch": 0.9708996273732773, "grad_norm": 1.281281590461731, "learning_rate": 3.743193013974653e-06, "loss": 0.0819, "step": 32830 }, { "epoch": 0.9711953628674513, "grad_norm": 0.8805367946624756, "learning_rate": 3.7443332232451074e-06, "loss": 0.0859, "step": 32840 }, { "epoch": 0.9714910983616254, "grad_norm": 0.8315549492835999, "learning_rate": 3.745473432515561e-06, "loss": 0.0728, "step": 32850 }, { "epoch": 0.9717868338557993, "grad_norm": 0.8726893067359924, "learning_rate": 3.7466136417860157e-06, "loss": 0.1048, "step": 32860 }, { "epoch": 0.9720825693499734, "grad_norm": 1.12690007686615, "learning_rate": 3.74775385105647e-06, "loss": 0.0897, "step": 32870 }, { "epoch": 0.9723783048441474, "grad_norm": 1.1168620586395264, "learning_rate": 3.7488940603269245e-06, "loss": 0.0796, "step": 32880 }, { "epoch": 0.9726740403383214, "grad_norm": 1.322689414024353, "learning_rate": 3.750034269597379e-06, "loss": 0.0902, "step": 32890 }, { "epoch": 0.9729697758324954, "grad_norm": 1.428666114807129, "learning_rate": 3.7511744788678333e-06, "loss": 0.1007, "step": 32900 }, { "epoch": 0.9732655113266694, "grad_norm": 1.2587021589279175, "learning_rate": 3.752314688138288e-06, "loss": 0.1068, "step": 32910 }, { "epoch": 0.9735612468208434, "grad_norm": 1.2234090566635132, "learning_rate": 3.7534548974087424e-06, "loss": 0.119, "step": 32920 }, { "epoch": 0.9738569823150175, "grad_norm": 0.975399374961853, "learning_rate": 3.754595106679196e-06, "loss": 0.119, "step": 32930 }, { "epoch": 0.9741527178091914, "grad_norm": 1.0295897722244263, "learning_rate": 3.7557353159496508e-06, "loss": 0.0921, "step": 32940 }, { "epoch": 0.9744484533033655, "grad_norm": 0.6433205604553223, "learning_rate": 3.756875525220105e-06, "loss": 0.0678, "step": 32950 }, { "epoch": 0.9747441887975394, "grad_norm": 1.0362932682037354, "learning_rate": 3.7580157344905595e-06, "loss": 0.1142, "step": 32960 }, { "epoch": 0.9750399242917135, "grad_norm": 1.106435775756836, "learning_rate": 3.759155943761014e-06, "loss": 0.1098, "step": 32970 }, { "epoch": 0.9753356597858875, "grad_norm": 1.2385891675949097, "learning_rate": 3.7602961530314683e-06, "loss": 0.092, "step": 32980 }, { "epoch": 0.9756313952800615, "grad_norm": 0.7569208145141602, "learning_rate": 3.761436362301923e-06, "loss": 0.0851, "step": 32990 }, { "epoch": 0.9759271307742355, "grad_norm": 3.0516107082366943, "learning_rate": 3.762576571572377e-06, "loss": 0.0704, "step": 33000 }, { "epoch": 0.9762228662684095, "grad_norm": 1.0581679344177246, "learning_rate": 3.763716780842831e-06, "loss": 0.0905, "step": 33010 }, { "epoch": 0.9765186017625835, "grad_norm": 0.473730206489563, "learning_rate": 3.7648569901132858e-06, "loss": 0.0826, "step": 33020 }, { "epoch": 0.9768143372567576, "grad_norm": 0.7854183912277222, "learning_rate": 3.76599719938374e-06, "loss": 0.0938, "step": 33030 }, { "epoch": 0.9771100727509315, "grad_norm": 0.6759809255599976, "learning_rate": 3.7671374086541945e-06, "loss": 0.0841, "step": 33040 }, { "epoch": 0.9774058082451056, "grad_norm": 1.0982820987701416, "learning_rate": 3.7682776179246487e-06, "loss": 0.082, "step": 33050 }, { "epoch": 0.9777015437392796, "grad_norm": 1.0720288753509521, "learning_rate": 3.7694178271951033e-06, "loss": 0.1012, "step": 33060 }, { "epoch": 0.9779972792334536, "grad_norm": 1.423437476158142, "learning_rate": 3.770558036465558e-06, "loss": 0.1086, "step": 33070 }, { "epoch": 0.9782930147276276, "grad_norm": 0.7303283214569092, "learning_rate": 3.771698245736012e-06, "loss": 0.0947, "step": 33080 }, { "epoch": 0.9785887502218016, "grad_norm": 0.7175980806350708, "learning_rate": 3.772838455006466e-06, "loss": 0.082, "step": 33090 }, { "epoch": 0.9788844857159756, "grad_norm": 0.7723501920700073, "learning_rate": 3.7739786642769204e-06, "loss": 0.0943, "step": 33100 }, { "epoch": 0.9791802212101497, "grad_norm": 0.9974132776260376, "learning_rate": 3.775118873547375e-06, "loss": 0.0939, "step": 33110 }, { "epoch": 0.9794759567043236, "grad_norm": 0.9300626516342163, "learning_rate": 3.7762590828178295e-06, "loss": 0.0927, "step": 33120 }, { "epoch": 0.9797716921984977, "grad_norm": 0.7236129641532898, "learning_rate": 3.7773992920882837e-06, "loss": 0.1004, "step": 33130 }, { "epoch": 0.9800674276926716, "grad_norm": 0.7934963703155518, "learning_rate": 3.7785395013587383e-06, "loss": 0.0721, "step": 33140 }, { "epoch": 0.9803631631868457, "grad_norm": 1.1462963819503784, "learning_rate": 3.779679710629193e-06, "loss": 0.0857, "step": 33150 }, { "epoch": 0.9806588986810197, "grad_norm": 0.7209426164627075, "learning_rate": 3.780819919899647e-06, "loss": 0.0977, "step": 33160 }, { "epoch": 0.9809546341751937, "grad_norm": 0.6916504502296448, "learning_rate": 3.7819601291701016e-06, "loss": 0.0924, "step": 33170 }, { "epoch": 0.9812503696693677, "grad_norm": 1.5468419790267944, "learning_rate": 3.7831003384405554e-06, "loss": 0.0927, "step": 33180 }, { "epoch": 0.9815461051635417, "grad_norm": 1.2097798585891724, "learning_rate": 3.78424054771101e-06, "loss": 0.0942, "step": 33190 }, { "epoch": 0.9818418406577157, "grad_norm": 0.7726947069168091, "learning_rate": 3.7853807569814646e-06, "loss": 0.0708, "step": 33200 }, { "epoch": 0.9821375761518898, "grad_norm": 0.7765222191810608, "learning_rate": 3.7865209662519187e-06, "loss": 0.1175, "step": 33210 }, { "epoch": 0.9824333116460637, "grad_norm": 0.8516300320625305, "learning_rate": 3.7876611755223733e-06, "loss": 0.0997, "step": 33220 }, { "epoch": 0.9827290471402378, "grad_norm": 1.6580826044082642, "learning_rate": 3.7888013847928275e-06, "loss": 0.0845, "step": 33230 }, { "epoch": 0.9830247826344117, "grad_norm": 1.2461762428283691, "learning_rate": 3.789941594063282e-06, "loss": 0.0817, "step": 33240 }, { "epoch": 0.9833205181285858, "grad_norm": 0.7245975732803345, "learning_rate": 3.7910818033337367e-06, "loss": 0.077, "step": 33250 }, { "epoch": 0.9836162536227598, "grad_norm": 1.0919139385223389, "learning_rate": 3.7922220126041904e-06, "loss": 0.1086, "step": 33260 }, { "epoch": 0.9839119891169338, "grad_norm": 0.6814976334571838, "learning_rate": 3.793362221874645e-06, "loss": 0.1019, "step": 33270 }, { "epoch": 0.9842077246111078, "grad_norm": 0.9412352442741394, "learning_rate": 3.794502431145099e-06, "loss": 0.0702, "step": 33280 }, { "epoch": 0.9845034601052819, "grad_norm": 1.5306559801101685, "learning_rate": 3.7956426404155537e-06, "loss": 0.0801, "step": 33290 }, { "epoch": 0.9847991955994558, "grad_norm": 1.5933552980422974, "learning_rate": 3.7967828496860083e-06, "loss": 0.0934, "step": 33300 }, { "epoch": 0.9850949310936299, "grad_norm": 1.359939455986023, "learning_rate": 3.7979230589564625e-06, "loss": 0.0996, "step": 33310 }, { "epoch": 0.9853906665878038, "grad_norm": 1.447529911994934, "learning_rate": 3.799063268226917e-06, "loss": 0.0918, "step": 33320 }, { "epoch": 0.9856864020819779, "grad_norm": 0.8673820495605469, "learning_rate": 3.8002034774973713e-06, "loss": 0.1085, "step": 33330 }, { "epoch": 0.9859821375761519, "grad_norm": 0.9587679505348206, "learning_rate": 3.8013436867678254e-06, "loss": 0.0856, "step": 33340 }, { "epoch": 0.9862778730703259, "grad_norm": 1.175950527191162, "learning_rate": 3.80248389603828e-06, "loss": 0.0753, "step": 33350 }, { "epoch": 0.9865736085644999, "grad_norm": 1.3100634813308716, "learning_rate": 3.803624105308734e-06, "loss": 0.0944, "step": 33360 }, { "epoch": 0.9868693440586739, "grad_norm": 0.8913015127182007, "learning_rate": 3.8047643145791888e-06, "loss": 0.0967, "step": 33370 }, { "epoch": 0.9871650795528479, "grad_norm": 1.5668089389801025, "learning_rate": 3.8059045238496433e-06, "loss": 0.0935, "step": 33380 }, { "epoch": 0.987460815047022, "grad_norm": 0.8013717532157898, "learning_rate": 3.8070447331200975e-06, "loss": 0.0846, "step": 33390 }, { "epoch": 0.9877565505411959, "grad_norm": 0.8510339260101318, "learning_rate": 3.808184942390552e-06, "loss": 0.071, "step": 33400 }, { "epoch": 0.98805228603537, "grad_norm": 0.9691235423088074, "learning_rate": 3.8093251516610063e-06, "loss": 0.1014, "step": 33410 }, { "epoch": 0.9883480215295439, "grad_norm": 0.8099057078361511, "learning_rate": 3.8104653609314604e-06, "loss": 0.1053, "step": 33420 }, { "epoch": 0.988643757023718, "grad_norm": 0.7072492837905884, "learning_rate": 3.811605570201915e-06, "loss": 0.0931, "step": 33430 }, { "epoch": 0.988939492517892, "grad_norm": 0.6314131021499634, "learning_rate": 3.812745779472369e-06, "loss": 0.0866, "step": 33440 }, { "epoch": 0.989235228012066, "grad_norm": 1.0355429649353027, "learning_rate": 3.8138859887428238e-06, "loss": 0.0787, "step": 33450 }, { "epoch": 0.98953096350624, "grad_norm": 1.0318411588668823, "learning_rate": 3.8150261980132775e-06, "loss": 0.1054, "step": 33460 }, { "epoch": 0.989826699000414, "grad_norm": 0.9002761840820312, "learning_rate": 3.816166407283732e-06, "loss": 0.093, "step": 33470 }, { "epoch": 0.990122434494588, "grad_norm": 1.178514003753662, "learning_rate": 3.817306616554187e-06, "loss": 0.1058, "step": 33480 }, { "epoch": 0.9904181699887621, "grad_norm": 0.9441206455230713, "learning_rate": 3.818446825824641e-06, "loss": 0.0667, "step": 33490 }, { "epoch": 0.990713905482936, "grad_norm": 0.8795446753501892, "learning_rate": 3.819587035095096e-06, "loss": 0.0706, "step": 33500 }, { "epoch": 0.9910096409771101, "grad_norm": 0.6315853595733643, "learning_rate": 3.82072724436555e-06, "loss": 0.091, "step": 33510 }, { "epoch": 0.9913053764712841, "grad_norm": 0.6103978753089905, "learning_rate": 3.821867453636004e-06, "loss": 0.0942, "step": 33520 }, { "epoch": 0.9916011119654581, "grad_norm": 1.066377878189087, "learning_rate": 3.823007662906459e-06, "loss": 0.0996, "step": 33530 }, { "epoch": 0.9918968474596321, "grad_norm": 0.5955730080604553, "learning_rate": 3.8241478721769125e-06, "loss": 0.0921, "step": 33540 }, { "epoch": 0.9921925829538061, "grad_norm": 0.8444139361381531, "learning_rate": 3.825288081447367e-06, "loss": 0.0809, "step": 33550 }, { "epoch": 0.9924883184479801, "grad_norm": 1.3775310516357422, "learning_rate": 3.826428290717822e-06, "loss": 0.0931, "step": 33560 }, { "epoch": 0.9927840539421542, "grad_norm": 0.7312410473823547, "learning_rate": 3.827568499988276e-06, "loss": 0.0854, "step": 33570 }, { "epoch": 0.9930797894363281, "grad_norm": 1.013239860534668, "learning_rate": 3.828708709258731e-06, "loss": 0.0763, "step": 33580 }, { "epoch": 0.9933755249305022, "grad_norm": 0.9365570545196533, "learning_rate": 3.829848918529185e-06, "loss": 0.0904, "step": 33590 }, { "epoch": 0.9936712604246761, "grad_norm": 1.254468321800232, "learning_rate": 3.830989127799639e-06, "loss": 0.0773, "step": 33600 }, { "epoch": 0.9939669959188502, "grad_norm": 1.8399604558944702, "learning_rate": 3.832129337070094e-06, "loss": 0.1013, "step": 33610 }, { "epoch": 0.9942627314130242, "grad_norm": 0.7526140213012695, "learning_rate": 3.8332695463405475e-06, "loss": 0.0806, "step": 33620 }, { "epoch": 0.9945584669071982, "grad_norm": 0.8501171469688416, "learning_rate": 3.834409755611002e-06, "loss": 0.0792, "step": 33630 }, { "epoch": 0.9948542024013722, "grad_norm": 1.1423643827438354, "learning_rate": 3.835549964881457e-06, "loss": 0.0754, "step": 33640 }, { "epoch": 0.9951499378955462, "grad_norm": 1.142930507659912, "learning_rate": 3.836690174151911e-06, "loss": 0.0784, "step": 33650 }, { "epoch": 0.9954456733897202, "grad_norm": 0.6547523140907288, "learning_rate": 3.837830383422366e-06, "loss": 0.0854, "step": 33660 }, { "epoch": 0.9957414088838943, "grad_norm": 0.7608566284179688, "learning_rate": 3.83897059269282e-06, "loss": 0.0946, "step": 33670 }, { "epoch": 0.9960371443780682, "grad_norm": 1.051079511642456, "learning_rate": 3.840110801963274e-06, "loss": 0.0773, "step": 33680 }, { "epoch": 0.9963328798722423, "grad_norm": 0.8922697305679321, "learning_rate": 3.841251011233729e-06, "loss": 0.0855, "step": 33690 }, { "epoch": 0.9966286153664162, "grad_norm": 0.795051634311676, "learning_rate": 3.8423912205041826e-06, "loss": 0.0662, "step": 33700 }, { "epoch": 0.9969243508605903, "grad_norm": 1.5403941869735718, "learning_rate": 3.843531429774637e-06, "loss": 0.0807, "step": 33710 }, { "epoch": 0.9972200863547643, "grad_norm": 0.8529922962188721, "learning_rate": 3.844671639045092e-06, "loss": 0.0901, "step": 33720 }, { "epoch": 0.9975158218489383, "grad_norm": 1.1035817861557007, "learning_rate": 3.845811848315546e-06, "loss": 0.0861, "step": 33730 }, { "epoch": 0.9978115573431123, "grad_norm": 0.7278013229370117, "learning_rate": 3.846952057586e-06, "loss": 0.0912, "step": 33740 }, { "epoch": 0.9981072928372864, "grad_norm": 0.9235287308692932, "learning_rate": 3.848092266856455e-06, "loss": 0.0682, "step": 33750 }, { "epoch": 0.9984030283314603, "grad_norm": 0.6759059429168701, "learning_rate": 3.849232476126909e-06, "loss": 0.115, "step": 33760 }, { "epoch": 0.9986987638256344, "grad_norm": 0.9060132503509521, "learning_rate": 3.850372685397364e-06, "loss": 0.0939, "step": 33770 }, { "epoch": 0.9989944993198083, "grad_norm": 1.1028013229370117, "learning_rate": 3.8515128946678176e-06, "loss": 0.0817, "step": 33780 }, { "epoch": 0.9992902348139824, "grad_norm": 1.0346906185150146, "learning_rate": 3.852653103938272e-06, "loss": 0.0685, "step": 33790 }, { "epoch": 0.9995859703081564, "grad_norm": 1.064097285270691, "learning_rate": 3.853793313208727e-06, "loss": 0.0741, "step": 33800 }, { "epoch": 0.9998817058023304, "grad_norm": 0.9813267588615417, "learning_rate": 3.854933522479181e-06, "loss": 0.0981, "step": 33810 }, { "epoch": 1.0, "eval_accuracy": 0.6561865788335496, "eval_animal_abuse/accuracy": 0.9945769704228632, "eval_animal_abuse/f1": 0.7623906705539358, "eval_animal_abuse/fpr": 0.0027092518426278015, "eval_animal_abuse/precision": 0.7646198830409356, "eval_animal_abuse/recall": 0.7601744186046512, "eval_animal_abuse/threshold": 0.596433162689209, "eval_child_abuse/accuracy": 0.9963070166683302, "eval_child_abuse/f1": 0.6686567164179105, "eval_child_abuse/fpr": 0.001890232682624911, "eval_child_abuse/precision": 0.6646884272997032, "eval_child_abuse/recall": 0.6726726726726727, "eval_child_abuse/threshold": 0.5832033753395081, "eval_controversial_topics,politics/accuracy": 0.9659147619522906, "eval_controversial_topics,politics/f1": 0.49269621193364693, "eval_controversial_topics,politics/fpr": 0.020627402526084533, "eval_controversial_topics,politics/precision": 0.4528903049613109, "eval_controversial_topics,politics/recall": 0.5401737242128122, "eval_controversial_topics,politics/threshold": 0.3674972355365753, "eval_discrimination,stereotype,injustice/accuracy": 0.9528229696909206, "eval_discrimination,stereotype,injustice/f1": 0.7104940792160065, "eval_discrimination,stereotype,injustice/fpr": 0.027758701702389078, "eval_discrimination,stereotype,injustice/precision": 0.69377990430622, "eval_discrimination,stereotype,injustice/recall": 0.7280334728033473, "eval_discrimination,stereotype,injustice/threshold": 0.3674972355365753, "eval_drug_abuse,weapons,banned_substance/accuracy": 0.9717203979106365, "eval_drug_abuse,weapons,banned_substance/f1": 0.7604959143420682, "eval_drug_abuse,weapons,banned_substance/fpr": 0.017857142857142825, "eval_drug_abuse,weapons,banned_substance/precision": 0.7271012931034483, "eval_drug_abuse,weapons,banned_substance/recall": 0.7971057294743059, "eval_drug_abuse,weapons,banned_substance/threshold": 0.4806005656719208, "eval_financial_crime,property_crime,theft/accuracy": 0.9577469474664804, "eval_financial_crime,property_crime,theft/f1": 0.7923479398299542, "eval_financial_crime,property_crime,theft/fpr": 0.028288152147872347, "eval_financial_crime,property_crime,theft/precision": 0.7594420937157186, "eval_financial_crime,property_crime,theft/recall": 0.8282344898307982, "eval_financial_crime,property_crime,theft/threshold": 0.2727987766265869, "eval_flagged/accuracy": 0.8415177828791962, "eval_flagged/aucpr": 0.8940956296286793, "eval_flagged/f1": 0.8612741172187841, "eval_flagged/fpr": 0.21190413681881182, "eval_flagged/precision": 0.8395980013627072, "eval_flagged/recall": 0.88409913007085, "eval_hate_speech,offensive_language/accuracy": 0.9457530691685797, "eval_hate_speech,offensive_language/f1": 0.6875538947973555, "eval_hate_speech,offensive_language/fpr": 0.026767769048054033, "eval_hate_speech,offensive_language/precision": 0.7100732238274292, "eval_hate_speech,offensive_language/recall": 0.6664190193164933, "eval_hate_speech,offensive_language/threshold": 0.4248891770839691, "eval_loss": 0.0875847190618515, "eval_macro_f1": 0.6545260636671346, "eval_macro_precision": 0.6458477956495938, "eval_macro_recall": 0.677576691257457, "eval_micro_f1": 0.7394931555912748, "eval_micro_precision": 0.7268012912974792, "eval_micro_recall": 0.7526361648025321, "eval_misinformation_regarding_ethics,laws_and_safety/accuracy": 0.9786572179525568, "eval_misinformation_regarding_ethics,laws_and_safety/f1": 0.1822817080943276, "eval_misinformation_regarding_ethics,laws_and_safety/fpr": 0.011703686240169726, "eval_misinformation_regarding_ethics,laws_and_safety/precision": 0.17064439140811455, "eval_misinformation_regarding_ethics,laws_and_safety/recall": 0.19562243502051985, "eval_misinformation_regarding_ethics,laws_and_safety/threshold": 0.07477004826068878, "eval_non_violent_unethical_behavior/accuracy": 0.8812256712246731, "eval_non_violent_unethical_behavior/f1": 0.6901041666666666, "eval_non_violent_unethical_behavior/fpr": 0.06527154957648218, "eval_non_violent_unethical_behavior/precision": 0.7166035694970254, "eval_non_violent_unethical_behavior/recall": 0.6654947262682069, "eval_non_violent_unethical_behavior/threshold": 0.48035678267478943, "eval_privacy_violation/accuracy": 0.980453804438234, "eval_privacy_violation/f1": 0.8064569263712733, "eval_privacy_violation/fpr": 0.011496465318121348, "eval_privacy_violation/precision": 0.7884057971014493, "eval_privacy_violation/recall": 0.825354012137559, "eval_privacy_violation/threshold": 0.46952024102211, "eval_runtime": 49.1809, "eval_samples_per_second": 1222.304, "eval_self_harm/accuracy": 0.9968726087101174, "eval_self_harm/f1": 0.7314285714285714, "eval_self_harm/fpr": 0.0005694760820045548, "eval_self_harm/precision": 0.8827586206896552, "eval_self_harm/recall": 0.624390243902439, "eval_self_harm/threshold": 0.8365545868873596, "eval_sexually_explicit,adult_content/accuracy": 0.9827328076654357, "eval_sexually_explicit,adult_content/f1": 0.6681585677749361, "eval_sexually_explicit,adult_content/fpr": 0.010840847495184668, "eval_sexually_explicit,adult_content/precision": 0.6216537775133849, "eval_sexually_explicit,adult_content/recall": 0.7221838286109191, "eval_sexually_explicit,adult_content/threshold": 0.34953272342681885, "eval_steps_per_second": 19.113, "eval_terrorism,organized_crime/accuracy": 0.983448115247696, "eval_terrorism,organized_crime/f1": 0.36502871729419273, "eval_terrorism,organized_crime/fpr": 0.013415390807103427, "eval_terrorism,organized_crime/precision": 0.26335174953959484, "eval_terrorism,organized_crime/recall": 0.5945945945945946, "eval_terrorism,organized_crime/threshold": 0.09670579433441162, "eval_violence,aiding_and_abetting,incitement/accuracy": 0.9156935156535915, "eval_violence,aiding_and_abetting,incitement/f1": 0.8452708066190389, "eval_violence,aiding_and_abetting,incitement/fpr": 0.06615747246271686, "eval_violence,aiding_and_abetting,incitement/precision": 0.8258561030903233, "eval_violence,aiding_and_abetting,incitement/recall": 0.8656203101550776, "eval_violence,aiding_and_abetting,incitement/threshold": 0.4330222010612488, "step": 33814 }, { "epoch": 1.0001774412965043, "grad_norm": 0.6865261793136597, "learning_rate": 3.855440282154939e-06, "loss": 0.088, "step": 33820 }, { "epoch": 1.0004731767906785, "grad_norm": 0.8810931444168091, "learning_rate": 3.855313592235999e-06, "loss": 0.0898, "step": 33830 }, { "epoch": 1.0007689122848524, "grad_norm": 0.7123658061027527, "learning_rate": 3.85518690231706e-06, "loss": 0.0925, "step": 33840 }, { "epoch": 1.0010646477790264, "grad_norm": 0.7694294452667236, "learning_rate": 3.8550602123981206e-06, "loss": 0.0747, "step": 33850 }, { "epoch": 1.0013603832732005, "grad_norm": 1.0978682041168213, "learning_rate": 3.854933522479181e-06, "loss": 0.0693, "step": 33860 }, { "epoch": 1.0016561187673745, "grad_norm": 1.1350494623184204, "learning_rate": 3.854806832560241e-06, "loss": 0.0745, "step": 33870 }, { "epoch": 1.0019518542615484, "grad_norm": 1.1612763404846191, "learning_rate": 3.854680142641302e-06, "loss": 0.1078, "step": 33880 }, { "epoch": 1.0022475897557226, "grad_norm": 0.590511679649353, "learning_rate": 3.854553452722363e-06, "loss": 0.0913, "step": 33890 }, { "epoch": 1.0025433252498965, "grad_norm": 2.4732255935668945, "learning_rate": 3.854426762803424e-06, "loss": 0.0912, "step": 33900 }, { "epoch": 1.0028390607440705, "grad_norm": 0.7679509520530701, "learning_rate": 3.8543000728844844e-06, "loss": 0.0867, "step": 33910 }, { "epoch": 1.0031347962382444, "grad_norm": 1.0694262981414795, "learning_rate": 3.854173382965545e-06, "loss": 0.0729, "step": 33920 }, { "epoch": 1.0034305317324186, "grad_norm": 1.0919157266616821, "learning_rate": 3.854046693046605e-06, "loss": 0.0966, "step": 33930 }, { "epoch": 1.0037262672265925, "grad_norm": 1.154318928718567, "learning_rate": 3.853920003127666e-06, "loss": 0.0919, "step": 33940 }, { "epoch": 1.0040220027207665, "grad_norm": 1.2499479055404663, "learning_rate": 3.853793313208727e-06, "loss": 0.0804, "step": 33950 }, { "epoch": 1.0043177382149406, "grad_norm": 0.46404871344566345, "learning_rate": 3.8536666232897875e-06, "loss": 0.0852, "step": 33960 }, { "epoch": 1.0046134737091146, "grad_norm": 0.6741245985031128, "learning_rate": 3.8535399333708475e-06, "loss": 0.0858, "step": 33970 }, { "epoch": 1.0049092092032885, "grad_norm": 1.4234548807144165, "learning_rate": 3.853413243451909e-06, "loss": 0.1043, "step": 33980 }, { "epoch": 1.0052049446974627, "grad_norm": 0.9789338707923889, "learning_rate": 3.853286553532969e-06, "loss": 0.0956, "step": 33990 }, { "epoch": 1.0055006801916366, "grad_norm": 0.8644643425941467, "learning_rate": 3.85315986361403e-06, "loss": 0.0779, "step": 34000 }, { "epoch": 1.0057964156858106, "grad_norm": 2.069530725479126, "learning_rate": 3.853033173695091e-06, "loss": 0.0807, "step": 34010 }, { "epoch": 1.0060921511799845, "grad_norm": 1.0711493492126465, "learning_rate": 3.8529064837761514e-06, "loss": 0.1061, "step": 34020 }, { "epoch": 1.0063878866741587, "grad_norm": 1.3969388008117676, "learning_rate": 3.852779793857211e-06, "loss": 0.0975, "step": 34030 }, { "epoch": 1.0066836221683326, "grad_norm": 0.9167234897613525, "learning_rate": 3.852653103938272e-06, "loss": 0.0855, "step": 34040 }, { "epoch": 1.0069793576625066, "grad_norm": 0.6473798751831055, "learning_rate": 3.852526414019333e-06, "loss": 0.0818, "step": 34050 }, { "epoch": 1.0072750931566807, "grad_norm": 0.6864476203918457, "learning_rate": 3.852399724100394e-06, "loss": 0.0674, "step": 34060 }, { "epoch": 1.0075708286508547, "grad_norm": 1.3330097198486328, "learning_rate": 3.8522730341814545e-06, "loss": 0.0829, "step": 34070 }, { "epoch": 1.0078665641450286, "grad_norm": 0.7433825135231018, "learning_rate": 3.8521463442625145e-06, "loss": 0.0984, "step": 34080 }, { "epoch": 1.0081622996392028, "grad_norm": 1.3084609508514404, "learning_rate": 3.852019654343575e-06, "loss": 0.0896, "step": 34090 }, { "epoch": 1.0084580351333767, "grad_norm": 0.35285019874572754, "learning_rate": 3.851892964424636e-06, "loss": 0.0752, "step": 34100 }, { "epoch": 1.0087537706275507, "grad_norm": 1.2033179998397827, "learning_rate": 3.851766274505697e-06, "loss": 0.0951, "step": 34110 }, { "epoch": 1.0090495061217248, "grad_norm": 1.1263850927352905, "learning_rate": 3.851639584586757e-06, "loss": 0.088, "step": 34120 }, { "epoch": 1.0093452416158988, "grad_norm": 0.8800605535507202, "learning_rate": 3.8515128946678176e-06, "loss": 0.0919, "step": 34130 }, { "epoch": 1.0096409771100727, "grad_norm": 1.2113691568374634, "learning_rate": 3.851386204748878e-06, "loss": 0.0778, "step": 34140 }, { "epoch": 1.0099367126042467, "grad_norm": 1.1017005443572998, "learning_rate": 3.851259514829939e-06, "loss": 0.0729, "step": 34150 }, { "epoch": 1.0102324480984208, "grad_norm": 0.6125223636627197, "learning_rate": 3.851132824911e-06, "loss": 0.0829, "step": 34160 }, { "epoch": 1.0105281835925948, "grad_norm": 1.0960750579833984, "learning_rate": 3.851006134992061e-06, "loss": 0.0872, "step": 34170 }, { "epoch": 1.0108239190867687, "grad_norm": 1.0888925790786743, "learning_rate": 3.850879445073121e-06, "loss": 0.1, "step": 34180 }, { "epoch": 1.0111196545809429, "grad_norm": 0.6502181887626648, "learning_rate": 3.8507527551541815e-06, "loss": 0.0817, "step": 34190 }, { "epoch": 1.0114153900751168, "grad_norm": 1.167218565940857, "learning_rate": 3.850626065235242e-06, "loss": 0.0992, "step": 34200 }, { "epoch": 1.0117111255692908, "grad_norm": 1.2087547779083252, "learning_rate": 3.850499375316303e-06, "loss": 0.0869, "step": 34210 }, { "epoch": 1.012006861063465, "grad_norm": 1.6728969812393188, "learning_rate": 3.850372685397364e-06, "loss": 0.0851, "step": 34220 }, { "epoch": 1.0123025965576389, "grad_norm": 1.0290049314498901, "learning_rate": 3.850245995478425e-06, "loss": 0.0719, "step": 34230 }, { "epoch": 1.0125983320518128, "grad_norm": 0.7139500379562378, "learning_rate": 3.8501193055594846e-06, "loss": 0.0968, "step": 34240 }, { "epoch": 1.0128940675459868, "grad_norm": 1.1597886085510254, "learning_rate": 3.849992615640545e-06, "loss": 0.0926, "step": 34250 }, { "epoch": 1.013189803040161, "grad_norm": 1.2085050344467163, "learning_rate": 3.849865925721606e-06, "loss": 0.0688, "step": 34260 }, { "epoch": 1.0134855385343349, "grad_norm": 1.1228435039520264, "learning_rate": 3.849739235802667e-06, "loss": 0.0868, "step": 34270 }, { "epoch": 1.0137812740285088, "grad_norm": 0.8459746241569519, "learning_rate": 3.849612545883727e-06, "loss": 0.0888, "step": 34280 }, { "epoch": 1.014077009522683, "grad_norm": 1.0635128021240234, "learning_rate": 3.849485855964788e-06, "loss": 0.0976, "step": 34290 }, { "epoch": 1.014372745016857, "grad_norm": 0.8832670450210571, "learning_rate": 3.8493591660458485e-06, "loss": 0.0913, "step": 34300 }, { "epoch": 1.0146684805110309, "grad_norm": 1.156153678894043, "learning_rate": 3.849232476126909e-06, "loss": 0.0808, "step": 34310 }, { "epoch": 1.014964216005205, "grad_norm": 0.7888913750648499, "learning_rate": 3.84910578620797e-06, "loss": 0.0886, "step": 34320 }, { "epoch": 1.015259951499379, "grad_norm": 1.189525842666626, "learning_rate": 3.848979096289031e-06, "loss": 0.088, "step": 34330 }, { "epoch": 1.015555686993553, "grad_norm": 0.9886720776557922, "learning_rate": 3.848852406370091e-06, "loss": 0.0811, "step": 34340 }, { "epoch": 1.015851422487727, "grad_norm": 0.6344681978225708, "learning_rate": 3.8487257164511516e-06, "loss": 0.0726, "step": 34350 }, { "epoch": 1.016147157981901, "grad_norm": 0.7374041676521301, "learning_rate": 3.848599026532212e-06, "loss": 0.0859, "step": 34360 }, { "epoch": 1.016442893476075, "grad_norm": 1.6519577503204346, "learning_rate": 3.848472336613273e-06, "loss": 0.0943, "step": 34370 }, { "epoch": 1.016738628970249, "grad_norm": 1.0325747728347778, "learning_rate": 3.848345646694334e-06, "loss": 0.087, "step": 34380 }, { "epoch": 1.017034364464423, "grad_norm": 0.7601255178451538, "learning_rate": 3.848218956775395e-06, "loss": 0.0929, "step": 34390 }, { "epoch": 1.017330099958597, "grad_norm": 1.5867096185684204, "learning_rate": 3.848092266856455e-06, "loss": 0.0951, "step": 34400 }, { "epoch": 1.017625835452771, "grad_norm": 0.5232115387916565, "learning_rate": 3.8479655769375154e-06, "loss": 0.0739, "step": 34410 }, { "epoch": 1.0179215709469451, "grad_norm": 1.206896424293518, "learning_rate": 3.847838887018576e-06, "loss": 0.0881, "step": 34420 }, { "epoch": 1.018217306441119, "grad_norm": 0.8235935568809509, "learning_rate": 3.847712197099637e-06, "loss": 0.1157, "step": 34430 }, { "epoch": 1.018513041935293, "grad_norm": 0.9388240575790405, "learning_rate": 3.847585507180697e-06, "loss": 0.0855, "step": 34440 }, { "epoch": 1.0188087774294672, "grad_norm": 0.7327553033828735, "learning_rate": 3.847458817261758e-06, "loss": 0.0776, "step": 34450 }, { "epoch": 1.0191045129236411, "grad_norm": 0.7860578894615173, "learning_rate": 3.8473321273428185e-06, "loss": 0.068, "step": 34460 }, { "epoch": 1.019400248417815, "grad_norm": 1.1994324922561646, "learning_rate": 3.847205437423879e-06, "loss": 0.0843, "step": 34470 }, { "epoch": 1.019695983911989, "grad_norm": 1.0938100814819336, "learning_rate": 3.84707874750494e-06, "loss": 0.108, "step": 34480 }, { "epoch": 1.0199917194061632, "grad_norm": 0.8992987871170044, "learning_rate": 3.846952057586e-06, "loss": 0.0905, "step": 34490 }, { "epoch": 1.0202874549003371, "grad_norm": 1.1625173091888428, "learning_rate": 3.846825367667061e-06, "loss": 0.0902, "step": 34500 }, { "epoch": 1.020583190394511, "grad_norm": 0.44307905435562134, "learning_rate": 3.846698677748122e-06, "loss": 0.0792, "step": 34510 }, { "epoch": 1.0208789258886852, "grad_norm": 1.3922386169433594, "learning_rate": 3.8465719878291824e-06, "loss": 0.0982, "step": 34520 }, { "epoch": 1.0211746613828592, "grad_norm": 1.023139476776123, "learning_rate": 3.846445297910242e-06, "loss": 0.0756, "step": 34530 }, { "epoch": 1.0214703968770331, "grad_norm": 1.6774705648422241, "learning_rate": 3.846318607991304e-06, "loss": 0.0917, "step": 34540 }, { "epoch": 1.0217661323712073, "grad_norm": 0.8944912552833557, "learning_rate": 3.846191918072364e-06, "loss": 0.0728, "step": 34550 }, { "epoch": 1.0220618678653812, "grad_norm": 1.6265532970428467, "learning_rate": 3.846065228153425e-06, "loss": 0.0867, "step": 34560 }, { "epoch": 1.0223576033595552, "grad_norm": 0.7489534020423889, "learning_rate": 3.8459385382344855e-06, "loss": 0.0979, "step": 34570 }, { "epoch": 1.0226533388537293, "grad_norm": 0.6352784633636475, "learning_rate": 3.845811848315546e-06, "loss": 0.0959, "step": 34580 }, { "epoch": 1.0229490743479033, "grad_norm": 0.9762629866600037, "learning_rate": 3.845685158396606e-06, "loss": 0.0918, "step": 34590 }, { "epoch": 1.0232448098420772, "grad_norm": 1.006966233253479, "learning_rate": 3.845558468477667e-06, "loss": 0.0835, "step": 34600 }, { "epoch": 1.0235405453362512, "grad_norm": 0.7152177691459656, "learning_rate": 3.845431778558728e-06, "loss": 0.0816, "step": 34610 }, { "epoch": 1.0238362808304253, "grad_norm": 1.0174710750579834, "learning_rate": 3.845305088639789e-06, "loss": 0.0869, "step": 34620 }, { "epoch": 1.0241320163245993, "grad_norm": 0.660157322883606, "learning_rate": 3.8451783987208494e-06, "loss": 0.0789, "step": 34630 }, { "epoch": 1.0244277518187732, "grad_norm": 1.1628886461257935, "learning_rate": 3.84505170880191e-06, "loss": 0.077, "step": 34640 }, { "epoch": 1.0247234873129474, "grad_norm": 1.2095327377319336, "learning_rate": 3.84492501888297e-06, "loss": 0.0864, "step": 34650 }, { "epoch": 1.0250192228071213, "grad_norm": 0.9542286992073059, "learning_rate": 3.844798328964031e-06, "loss": 0.0756, "step": 34660 }, { "epoch": 1.0253149583012953, "grad_norm": 1.3775911331176758, "learning_rate": 3.844671639045092e-06, "loss": 0.0816, "step": 34670 }, { "epoch": 1.0256106937954694, "grad_norm": 0.9344788789749146, "learning_rate": 3.8445449491261525e-06, "loss": 0.0885, "step": 34680 }, { "epoch": 1.0259064292896434, "grad_norm": 0.9176598787307739, "learning_rate": 3.8444182592072125e-06, "loss": 0.0822, "step": 34690 }, { "epoch": 1.0262021647838173, "grad_norm": 1.2073885202407837, "learning_rate": 3.844291569288274e-06, "loss": 0.0776, "step": 34700 }, { "epoch": 1.0264979002779913, "grad_norm": 0.6571563482284546, "learning_rate": 3.844164879369334e-06, "loss": 0.0784, "step": 34710 }, { "epoch": 1.0267936357721654, "grad_norm": 0.9683822989463806, "learning_rate": 3.844038189450395e-06, "loss": 0.0837, "step": 34720 }, { "epoch": 1.0270893712663394, "grad_norm": 1.8916552066802979, "learning_rate": 3.843911499531456e-06, "loss": 0.0934, "step": 34730 }, { "epoch": 1.0273851067605133, "grad_norm": 1.1175308227539062, "learning_rate": 3.843784809612516e-06, "loss": 0.0757, "step": 34740 }, { "epoch": 1.0276808422546875, "grad_norm": 1.5521010160446167, "learning_rate": 3.843658119693576e-06, "loss": 0.1042, "step": 34750 }, { "epoch": 1.0279765777488614, "grad_norm": 1.0267465114593506, "learning_rate": 3.843531429774637e-06, "loss": 0.0783, "step": 34760 }, { "epoch": 1.0282723132430354, "grad_norm": 0.6773044466972351, "learning_rate": 3.843404739855698e-06, "loss": 0.0918, "step": 34770 }, { "epoch": 1.0285680487372095, "grad_norm": 1.413598656654358, "learning_rate": 3.843278049936759e-06, "loss": 0.081, "step": 34780 }, { "epoch": 1.0288637842313835, "grad_norm": 1.5880851745605469, "learning_rate": 3.8431513600178195e-06, "loss": 0.0917, "step": 34790 }, { "epoch": 1.0291595197255574, "grad_norm": 1.0850036144256592, "learning_rate": 3.84302467009888e-06, "loss": 0.0668, "step": 34800 }, { "epoch": 1.0294552552197316, "grad_norm": 0.8193054795265198, "learning_rate": 3.84289798017994e-06, "loss": 0.0763, "step": 34810 }, { "epoch": 1.0297509907139055, "grad_norm": 1.5198954343795776, "learning_rate": 3.842771290261001e-06, "loss": 0.0867, "step": 34820 }, { "epoch": 1.0300467262080795, "grad_norm": 1.321205735206604, "learning_rate": 3.842644600342062e-06, "loss": 0.0842, "step": 34830 }, { "epoch": 1.0303424617022534, "grad_norm": 0.9729416370391846, "learning_rate": 3.842517910423123e-06, "loss": 0.0996, "step": 34840 }, { "epoch": 1.0306381971964276, "grad_norm": 1.1214786767959595, "learning_rate": 3.8423912205041826e-06, "loss": 0.0959, "step": 34850 }, { "epoch": 1.0309339326906015, "grad_norm": 0.7692162990570068, "learning_rate": 3.842264530585243e-06, "loss": 0.0725, "step": 34860 }, { "epoch": 1.0312296681847755, "grad_norm": 0.7821021676063538, "learning_rate": 3.842137840666304e-06, "loss": 0.0881, "step": 34870 }, { "epoch": 1.0315254036789496, "grad_norm": 0.8939810991287231, "learning_rate": 3.842011150747365e-06, "loss": 0.096, "step": 34880 }, { "epoch": 1.0318211391731236, "grad_norm": 0.7198687791824341, "learning_rate": 3.841884460828426e-06, "loss": 0.0769, "step": 34890 }, { "epoch": 1.0321168746672975, "grad_norm": 0.7055640816688538, "learning_rate": 3.841757770909486e-06, "loss": 0.0781, "step": 34900 }, { "epoch": 1.0324126101614717, "grad_norm": 0.9119021892547607, "learning_rate": 3.8416310809905465e-06, "loss": 0.0858, "step": 34910 }, { "epoch": 1.0327083456556456, "grad_norm": 0.6951549649238586, "learning_rate": 3.841504391071607e-06, "loss": 0.0835, "step": 34920 }, { "epoch": 1.0330040811498196, "grad_norm": 0.9781546592712402, "learning_rate": 3.841377701152668e-06, "loss": 0.0719, "step": 34930 }, { "epoch": 1.0332998166439935, "grad_norm": 0.7483818531036377, "learning_rate": 3.841251011233729e-06, "loss": 0.0788, "step": 34940 }, { "epoch": 1.0335955521381677, "grad_norm": 0.7935370802879333, "learning_rate": 3.84112432131479e-06, "loss": 0.081, "step": 34950 }, { "epoch": 1.0338912876323416, "grad_norm": 0.6742188334465027, "learning_rate": 3.8409976313958496e-06, "loss": 0.0597, "step": 34960 }, { "epoch": 1.0341870231265156, "grad_norm": 0.6874536275863647, "learning_rate": 3.84087094147691e-06, "loss": 0.0817, "step": 34970 }, { "epoch": 1.0344827586206897, "grad_norm": 0.7310208678245544, "learning_rate": 3.840744251557971e-06, "loss": 0.0769, "step": 34980 }, { "epoch": 1.0347784941148637, "grad_norm": 1.2556488513946533, "learning_rate": 3.840617561639032e-06, "loss": 0.0948, "step": 34990 }, { "epoch": 1.0350742296090376, "grad_norm": 0.9851891398429871, "learning_rate": 3.840490871720092e-06, "loss": 0.0788, "step": 35000 }, { "epoch": 1.0353699651032118, "grad_norm": 0.6652372479438782, "learning_rate": 3.840364181801153e-06, "loss": 0.0825, "step": 35010 }, { "epoch": 1.0356657005973857, "grad_norm": 1.4675617218017578, "learning_rate": 3.8402374918822134e-06, "loss": 0.095, "step": 35020 }, { "epoch": 1.0359614360915597, "grad_norm": 0.979390025138855, "learning_rate": 3.840110801963274e-06, "loss": 0.0914, "step": 35030 }, { "epoch": 1.0362571715857338, "grad_norm": 0.690953254699707, "learning_rate": 3.839984112044335e-06, "loss": 0.1004, "step": 35040 }, { "epoch": 1.0365529070799078, "grad_norm": 1.7992548942565918, "learning_rate": 3.839857422125396e-06, "loss": 0.104, "step": 35050 }, { "epoch": 1.0368486425740817, "grad_norm": 0.8605745434761047, "learning_rate": 3.839730732206456e-06, "loss": 0.07, "step": 35060 }, { "epoch": 1.0371443780682557, "grad_norm": 0.6621278524398804, "learning_rate": 3.8396040422875165e-06, "loss": 0.0877, "step": 35070 }, { "epoch": 1.0374401135624298, "grad_norm": 0.9261907935142517, "learning_rate": 3.839477352368577e-06, "loss": 0.0788, "step": 35080 }, { "epoch": 1.0377358490566038, "grad_norm": 0.8204295039176941, "learning_rate": 3.839350662449638e-06, "loss": 0.0974, "step": 35090 }, { "epoch": 1.0380315845507777, "grad_norm": 0.7497175931930542, "learning_rate": 3.839223972530699e-06, "loss": 0.0753, "step": 35100 }, { "epoch": 1.0383273200449519, "grad_norm": 1.1245665550231934, "learning_rate": 3.83909728261176e-06, "loss": 0.0632, "step": 35110 }, { "epoch": 1.0386230555391258, "grad_norm": 1.002977728843689, "learning_rate": 3.83897059269282e-06, "loss": 0.0996, "step": 35120 }, { "epoch": 1.0389187910332998, "grad_norm": 1.0109999179840088, "learning_rate": 3.8388439027738804e-06, "loss": 0.0855, "step": 35130 }, { "epoch": 1.039214526527474, "grad_norm": 1.2837815284729004, "learning_rate": 3.838717212854941e-06, "loss": 0.0924, "step": 35140 }, { "epoch": 1.0395102620216479, "grad_norm": 0.8684301972389221, "learning_rate": 3.838590522936002e-06, "loss": 0.0707, "step": 35150 }, { "epoch": 1.0398059975158218, "grad_norm": 0.7015656232833862, "learning_rate": 3.838463833017062e-06, "loss": 0.0717, "step": 35160 }, { "epoch": 1.0401017330099958, "grad_norm": 0.8472921252250671, "learning_rate": 3.838337143098123e-06, "loss": 0.0988, "step": 35170 }, { "epoch": 1.04039746850417, "grad_norm": 0.838628351688385, "learning_rate": 3.8382104531791835e-06, "loss": 0.1008, "step": 35180 }, { "epoch": 1.0406932039983439, "grad_norm": 0.7423936128616333, "learning_rate": 3.838083763260244e-06, "loss": 0.0701, "step": 35190 }, { "epoch": 1.0409889394925178, "grad_norm": 0.712758481502533, "learning_rate": 3.837957073341305e-06, "loss": 0.0786, "step": 35200 }, { "epoch": 1.041284674986692, "grad_norm": 0.9849653244018555, "learning_rate": 3.837830383422366e-06, "loss": 0.0816, "step": 35210 }, { "epoch": 1.041580410480866, "grad_norm": 0.5531652569770813, "learning_rate": 3.837703693503426e-06, "loss": 0.0754, "step": 35220 }, { "epoch": 1.0418761459750399, "grad_norm": 0.8234468698501587, "learning_rate": 3.837577003584487e-06, "loss": 0.1085, "step": 35230 }, { "epoch": 1.042171881469214, "grad_norm": 0.874636173248291, "learning_rate": 3.837450313665547e-06, "loss": 0.0859, "step": 35240 }, { "epoch": 1.042467616963388, "grad_norm": 1.3739776611328125, "learning_rate": 3.837323623746608e-06, "loss": 0.0816, "step": 35250 }, { "epoch": 1.042763352457562, "grad_norm": 0.6370015144348145, "learning_rate": 3.837196933827669e-06, "loss": 0.0798, "step": 35260 }, { "epoch": 1.043059087951736, "grad_norm": 0.8959142565727234, "learning_rate": 3.837070243908729e-06, "loss": 0.0818, "step": 35270 }, { "epoch": 1.04335482344591, "grad_norm": 0.7989101409912109, "learning_rate": 3.83694355398979e-06, "loss": 0.1099, "step": 35280 }, { "epoch": 1.043650558940084, "grad_norm": 0.7304271459579468, "learning_rate": 3.8368168640708505e-06, "loss": 0.0961, "step": 35290 }, { "epoch": 1.043946294434258, "grad_norm": 0.7742159962654114, "learning_rate": 3.836690174151911e-06, "loss": 0.0739, "step": 35300 }, { "epoch": 1.044242029928432, "grad_norm": 1.0333093404769897, "learning_rate": 3.836563484232971e-06, "loss": 0.0817, "step": 35310 }, { "epoch": 1.044537765422606, "grad_norm": 1.223683476448059, "learning_rate": 3.836436794314032e-06, "loss": 0.0936, "step": 35320 }, { "epoch": 1.04483350091678, "grad_norm": 0.6023619771003723, "learning_rate": 3.836310104395093e-06, "loss": 0.0875, "step": 35330 }, { "epoch": 1.0451292364109541, "grad_norm": 0.6962149143218994, "learning_rate": 3.836183414476154e-06, "loss": 0.0855, "step": 35340 }, { "epoch": 1.045424971905128, "grad_norm": 0.9347482919692993, "learning_rate": 3.836056724557214e-06, "loss": 0.078, "step": 35350 }, { "epoch": 1.045720707399302, "grad_norm": 1.244333267211914, "learning_rate": 3.835930034638275e-06, "loss": 0.0709, "step": 35360 }, { "epoch": 1.0460164428934762, "grad_norm": 1.3592536449432373, "learning_rate": 3.835803344719335e-06, "loss": 0.101, "step": 35370 }, { "epoch": 1.0463121783876501, "grad_norm": 1.1203489303588867, "learning_rate": 3.835676654800396e-06, "loss": 0.0909, "step": 35380 }, { "epoch": 1.046607913881824, "grad_norm": 1.609015703201294, "learning_rate": 3.835549964881457e-06, "loss": 0.0834, "step": 35390 }, { "epoch": 1.046903649375998, "grad_norm": 1.279662847518921, "learning_rate": 3.8354232749625175e-06, "loss": 0.0945, "step": 35400 }, { "epoch": 1.0471993848701722, "grad_norm": 0.8417612314224243, "learning_rate": 3.8352965850435775e-06, "loss": 0.0876, "step": 35410 }, { "epoch": 1.047495120364346, "grad_norm": 0.7989944219589233, "learning_rate": 3.835169895124639e-06, "loss": 0.0778, "step": 35420 }, { "epoch": 1.04779085585852, "grad_norm": 0.7731327414512634, "learning_rate": 3.835043205205699e-06, "loss": 0.1012, "step": 35430 }, { "epoch": 1.0480865913526942, "grad_norm": 1.0529744625091553, "learning_rate": 3.83491651528676e-06, "loss": 0.0823, "step": 35440 }, { "epoch": 1.0483823268468682, "grad_norm": 1.0729302167892456, "learning_rate": 3.834789825367821e-06, "loss": 0.0871, "step": 35450 }, { "epoch": 1.048678062341042, "grad_norm": 1.036277174949646, "learning_rate": 3.834663135448881e-06, "loss": 0.08, "step": 35460 }, { "epoch": 1.0489737978352163, "grad_norm": 0.973781168460846, "learning_rate": 3.834536445529941e-06, "loss": 0.1022, "step": 35470 }, { "epoch": 1.0492695333293902, "grad_norm": 0.6183832287788391, "learning_rate": 3.834409755611002e-06, "loss": 0.0913, "step": 35480 }, { "epoch": 1.0495652688235642, "grad_norm": 1.4206476211547852, "learning_rate": 3.834283065692063e-06, "loss": 0.0825, "step": 35490 }, { "epoch": 1.0498610043177383, "grad_norm": 0.9855476021766663, "learning_rate": 3.834156375773124e-06, "loss": 0.0886, "step": 35500 }, { "epoch": 1.0501567398119123, "grad_norm": 0.9973443746566772, "learning_rate": 3.8340296858541845e-06, "loss": 0.0743, "step": 35510 }, { "epoch": 1.0504524753060862, "grad_norm": 1.0854175090789795, "learning_rate": 3.833902995935245e-06, "loss": 0.0899, "step": 35520 }, { "epoch": 1.0507482108002602, "grad_norm": 1.0195746421813965, "learning_rate": 3.833776306016305e-06, "loss": 0.1152, "step": 35530 }, { "epoch": 1.0510439462944343, "grad_norm": 0.9453768730163574, "learning_rate": 3.833649616097366e-06, "loss": 0.0897, "step": 35540 }, { "epoch": 1.0513396817886083, "grad_norm": 0.4970884323120117, "learning_rate": 3.833522926178427e-06, "loss": 0.073, "step": 35550 }, { "epoch": 1.0516354172827822, "grad_norm": 1.060679316520691, "learning_rate": 3.833396236259488e-06, "loss": 0.081, "step": 35560 }, { "epoch": 1.0519311527769564, "grad_norm": 0.9870697259902954, "learning_rate": 3.8332695463405475e-06, "loss": 0.0884, "step": 35570 }, { "epoch": 1.0522268882711303, "grad_norm": 1.0640254020690918, "learning_rate": 3.833142856421609e-06, "loss": 0.1017, "step": 35580 }, { "epoch": 1.0525226237653043, "grad_norm": 0.9142894148826599, "learning_rate": 3.833016166502669e-06, "loss": 0.1028, "step": 35590 }, { "epoch": 1.0528183592594784, "grad_norm": 1.282074213027954, "learning_rate": 3.83288947658373e-06, "loss": 0.0906, "step": 35600 }, { "epoch": 1.0531140947536524, "grad_norm": 1.3154852390289307, "learning_rate": 3.832762786664791e-06, "loss": 0.0683, "step": 35610 }, { "epoch": 1.0534098302478263, "grad_norm": 0.774249792098999, "learning_rate": 3.8326360967458515e-06, "loss": 0.0799, "step": 35620 }, { "epoch": 1.0537055657420002, "grad_norm": 0.865060567855835, "learning_rate": 3.8325094068269114e-06, "loss": 0.0785, "step": 35630 }, { "epoch": 1.0540013012361744, "grad_norm": 0.8333196043968201, "learning_rate": 3.832382716907972e-06, "loss": 0.096, "step": 35640 }, { "epoch": 1.0542970367303484, "grad_norm": 1.112975835800171, "learning_rate": 3.832256026989033e-06, "loss": 0.0727, "step": 35650 }, { "epoch": 1.0545927722245223, "grad_norm": 0.9193236231803894, "learning_rate": 3.832129337070094e-06, "loss": 0.087, "step": 35660 }, { "epoch": 1.0548885077186965, "grad_norm": 1.2548269033432007, "learning_rate": 3.832002647151155e-06, "loss": 0.0835, "step": 35670 }, { "epoch": 1.0551842432128704, "grad_norm": 0.9821863174438477, "learning_rate": 3.8318759572322145e-06, "loss": 0.0844, "step": 35680 }, { "epoch": 1.0554799787070444, "grad_norm": 0.6354431509971619, "learning_rate": 3.831749267313275e-06, "loss": 0.1022, "step": 35690 }, { "epoch": 1.0557757142012185, "grad_norm": 1.4519890546798706, "learning_rate": 3.831622577394336e-06, "loss": 0.0938, "step": 35700 }, { "epoch": 1.0560714496953925, "grad_norm": 0.5907469987869263, "learning_rate": 3.831495887475397e-06, "loss": 0.0736, "step": 35710 }, { "epoch": 1.0563671851895664, "grad_norm": 1.175990104675293, "learning_rate": 3.831369197556457e-06, "loss": 0.0839, "step": 35720 }, { "epoch": 1.0566629206837406, "grad_norm": 0.9219407439231873, "learning_rate": 3.831242507637518e-06, "loss": 0.1021, "step": 35730 }, { "epoch": 1.0569586561779145, "grad_norm": 1.1136298179626465, "learning_rate": 3.8311158177185784e-06, "loss": 0.0764, "step": 35740 }, { "epoch": 1.0572543916720885, "grad_norm": 0.8631220459938049, "learning_rate": 3.830989127799639e-06, "loss": 0.0852, "step": 35750 }, { "epoch": 1.0575501271662624, "grad_norm": 1.4190410375595093, "learning_rate": 3.8308624378807e-06, "loss": 0.079, "step": 35760 }, { "epoch": 1.0578458626604366, "grad_norm": 1.6076884269714355, "learning_rate": 3.830735747961761e-06, "loss": 0.0719, "step": 35770 }, { "epoch": 1.0581415981546105, "grad_norm": 1.0642321109771729, "learning_rate": 3.830609058042821e-06, "loss": 0.0874, "step": 35780 }, { "epoch": 1.0584373336487845, "grad_norm": 1.0491479635238647, "learning_rate": 3.8304823681238815e-06, "loss": 0.0768, "step": 35790 }, { "epoch": 1.0587330691429586, "grad_norm": 0.8400108814239502, "learning_rate": 3.830355678204942e-06, "loss": 0.0939, "step": 35800 }, { "epoch": 1.0590288046371326, "grad_norm": 0.7271426916122437, "learning_rate": 3.830228988286003e-06, "loss": 0.0812, "step": 35810 }, { "epoch": 1.0593245401313065, "grad_norm": 1.1015634536743164, "learning_rate": 3.830102298367064e-06, "loss": 0.0842, "step": 35820 }, { "epoch": 1.0596202756254807, "grad_norm": 0.8624109029769897, "learning_rate": 3.829975608448125e-06, "loss": 0.1045, "step": 35830 }, { "epoch": 1.0599160111196546, "grad_norm": 1.1599483489990234, "learning_rate": 3.829848918529185e-06, "loss": 0.0853, "step": 35840 }, { "epoch": 1.0602117466138286, "grad_norm": 0.8302025198936462, "learning_rate": 3.829722228610245e-06, "loss": 0.0992, "step": 35850 }, { "epoch": 1.0605074821080025, "grad_norm": 1.032537579536438, "learning_rate": 3.829595538691306e-06, "loss": 0.0846, "step": 35860 }, { "epoch": 1.0608032176021767, "grad_norm": 0.722919762134552, "learning_rate": 3.829468848772367e-06, "loss": 0.0994, "step": 35870 }, { "epoch": 1.0610989530963506, "grad_norm": 0.7767746448516846, "learning_rate": 3.829342158853427e-06, "loss": 0.101, "step": 35880 }, { "epoch": 1.0613946885905245, "grad_norm": 0.7806829214096069, "learning_rate": 3.829215468934488e-06, "loss": 0.099, "step": 35890 }, { "epoch": 1.0616904240846987, "grad_norm": 0.9636351466178894, "learning_rate": 3.8290887790155485e-06, "loss": 0.0895, "step": 35900 }, { "epoch": 1.0619861595788727, "grad_norm": 1.0615954399108887, "learning_rate": 3.828962089096609e-06, "loss": 0.0796, "step": 35910 }, { "epoch": 1.0622818950730466, "grad_norm": 1.3085525035858154, "learning_rate": 3.82883539917767e-06, "loss": 0.0911, "step": 35920 }, { "epoch": 1.0625776305672208, "grad_norm": 1.0034174919128418, "learning_rate": 3.828708709258731e-06, "loss": 0.096, "step": 35930 }, { "epoch": 1.0628733660613947, "grad_norm": 1.2137809991836548, "learning_rate": 3.828582019339791e-06, "loss": 0.0927, "step": 35940 }, { "epoch": 1.0631691015555687, "grad_norm": 0.9755887389183044, "learning_rate": 3.828455329420852e-06, "loss": 0.1027, "step": 35950 }, { "epoch": 1.0634648370497426, "grad_norm": 1.2939544916152954, "learning_rate": 3.828328639501912e-06, "loss": 0.0866, "step": 35960 }, { "epoch": 1.0637605725439168, "grad_norm": 1.2443761825561523, "learning_rate": 3.828201949582973e-06, "loss": 0.0927, "step": 35970 }, { "epoch": 1.0640563080380907, "grad_norm": 0.7870545983314514, "learning_rate": 3.828075259664034e-06, "loss": 0.0906, "step": 35980 }, { "epoch": 1.0643520435322646, "grad_norm": 1.2904235124588013, "learning_rate": 3.827948569745095e-06, "loss": 0.0934, "step": 35990 }, { "epoch": 1.0646477790264388, "grad_norm": 0.9035102725028992, "learning_rate": 3.827821879826155e-06, "loss": 0.0752, "step": 36000 }, { "epoch": 1.0649435145206128, "grad_norm": 0.6057193875312805, "learning_rate": 3.8276951899072155e-06, "loss": 0.075, "step": 36010 }, { "epoch": 1.0652392500147867, "grad_norm": 1.1751091480255127, "learning_rate": 3.827568499988276e-06, "loss": 0.0984, "step": 36020 }, { "epoch": 1.0655349855089609, "grad_norm": 0.5201590657234192, "learning_rate": 3.827441810069337e-06, "loss": 0.0999, "step": 36030 }, { "epoch": 1.0658307210031348, "grad_norm": 0.7471513748168945, "learning_rate": 3.827315120150397e-06, "loss": 0.0977, "step": 36040 }, { "epoch": 1.0661264564973087, "grad_norm": 0.4990270137786865, "learning_rate": 3.827188430231458e-06, "loss": 0.086, "step": 36050 }, { "epoch": 1.066422191991483, "grad_norm": 0.8029642701148987, "learning_rate": 3.827061740312519e-06, "loss": 0.0586, "step": 36060 }, { "epoch": 1.0667179274856569, "grad_norm": 0.6405538320541382, "learning_rate": 3.826935050393579e-06, "loss": 0.1109, "step": 36070 }, { "epoch": 1.0670136629798308, "grad_norm": 1.0676829814910889, "learning_rate": 3.82680836047464e-06, "loss": 0.0908, "step": 36080 }, { "epoch": 1.0673093984740047, "grad_norm": 1.4070055484771729, "learning_rate": 3.826681670555701e-06, "loss": 0.1031, "step": 36090 }, { "epoch": 1.067605133968179, "grad_norm": 0.8583338856697083, "learning_rate": 3.826554980636761e-06, "loss": 0.08, "step": 36100 }, { "epoch": 1.0679008694623529, "grad_norm": 1.133280634880066, "learning_rate": 3.826428290717822e-06, "loss": 0.0656, "step": 36110 }, { "epoch": 1.0681966049565268, "grad_norm": 0.8552712202072144, "learning_rate": 3.8263016007988825e-06, "loss": 0.0733, "step": 36120 }, { "epoch": 1.068492340450701, "grad_norm": 0.9760055541992188, "learning_rate": 3.8261749108799424e-06, "loss": 0.0863, "step": 36130 }, { "epoch": 1.068788075944875, "grad_norm": 0.8346191048622131, "learning_rate": 3.826048220961004e-06, "loss": 0.0841, "step": 36140 }, { "epoch": 1.0690838114390488, "grad_norm": 0.7326526045799255, "learning_rate": 3.825921531042064e-06, "loss": 0.0696, "step": 36150 }, { "epoch": 1.069379546933223, "grad_norm": 1.3619142770767212, "learning_rate": 3.825794841123125e-06, "loss": 0.0775, "step": 36160 }, { "epoch": 1.069675282427397, "grad_norm": 1.592583179473877, "learning_rate": 3.825668151204186e-06, "loss": 0.1008, "step": 36170 }, { "epoch": 1.069971017921571, "grad_norm": 0.8596464395523071, "learning_rate": 3.825541461285246e-06, "loss": 0.0994, "step": 36180 }, { "epoch": 1.070266753415745, "grad_norm": 0.8268159627914429, "learning_rate": 3.825414771366306e-06, "loss": 0.0956, "step": 36190 }, { "epoch": 1.070562488909919, "grad_norm": 0.9844247698783875, "learning_rate": 3.825288081447367e-06, "loss": 0.0714, "step": 36200 }, { "epoch": 1.070858224404093, "grad_norm": 0.540056586265564, "learning_rate": 3.825161391528428e-06, "loss": 0.0743, "step": 36210 }, { "epoch": 1.071153959898267, "grad_norm": 0.717085599899292, "learning_rate": 3.825034701609489e-06, "loss": 0.0782, "step": 36220 }, { "epoch": 1.071449695392441, "grad_norm": 0.9871371388435364, "learning_rate": 3.8249080116905495e-06, "loss": 0.105, "step": 36230 }, { "epoch": 1.071745430886615, "grad_norm": 1.5237213373184204, "learning_rate": 3.82478132177161e-06, "loss": 0.0912, "step": 36240 }, { "epoch": 1.072041166380789, "grad_norm": 0.9010123014450073, "learning_rate": 3.82465463185267e-06, "loss": 0.0835, "step": 36250 }, { "epoch": 1.072336901874963, "grad_norm": 0.7031849026679993, "learning_rate": 3.824527941933731e-06, "loss": 0.0764, "step": 36260 }, { "epoch": 1.072632637369137, "grad_norm": 1.8779549598693848, "learning_rate": 3.824401252014792e-06, "loss": 0.0871, "step": 36270 }, { "epoch": 1.072928372863311, "grad_norm": 1.309936761856079, "learning_rate": 3.824274562095853e-06, "loss": 0.1039, "step": 36280 }, { "epoch": 1.0732241083574852, "grad_norm": 0.9556559920310974, "learning_rate": 3.8241478721769125e-06, "loss": 0.087, "step": 36290 }, { "epoch": 1.073519843851659, "grad_norm": 0.968011200428009, "learning_rate": 3.824021182257974e-06, "loss": 0.0962, "step": 36300 }, { "epoch": 1.073815579345833, "grad_norm": 1.6156202554702759, "learning_rate": 3.823894492339034e-06, "loss": 0.0779, "step": 36310 }, { "epoch": 1.0741113148400072, "grad_norm": 0.7983059287071228, "learning_rate": 3.823767802420095e-06, "loss": 0.0841, "step": 36320 }, { "epoch": 1.0744070503341812, "grad_norm": 0.9924606084823608, "learning_rate": 3.823641112501156e-06, "loss": 0.0861, "step": 36330 }, { "epoch": 1.074702785828355, "grad_norm": 1.2400890588760376, "learning_rate": 3.8235144225822165e-06, "loss": 0.0816, "step": 36340 }, { "epoch": 1.074998521322529, "grad_norm": 0.9830821752548218, "learning_rate": 3.823387732663276e-06, "loss": 0.0938, "step": 36350 }, { "epoch": 1.0752942568167032, "grad_norm": 0.556152880191803, "learning_rate": 3.823261042744337e-06, "loss": 0.0682, "step": 36360 }, { "epoch": 1.0755899923108772, "grad_norm": 0.852250337600708, "learning_rate": 3.823134352825398e-06, "loss": 0.0804, "step": 36370 }, { "epoch": 1.075885727805051, "grad_norm": 1.1251208782196045, "learning_rate": 3.823007662906459e-06, "loss": 0.0766, "step": 36380 }, { "epoch": 1.0761814632992253, "grad_norm": 1.3533697128295898, "learning_rate": 3.82288097298752e-06, "loss": 0.0845, "step": 36390 }, { "epoch": 1.0764771987933992, "grad_norm": 0.8054975867271423, "learning_rate": 3.82275428306858e-06, "loss": 0.073, "step": 36400 }, { "epoch": 1.0767729342875731, "grad_norm": 1.2201193571090698, "learning_rate": 3.82262759314964e-06, "loss": 0.0728, "step": 36410 }, { "epoch": 1.077068669781747, "grad_norm": 0.8353965282440186, "learning_rate": 3.822500903230701e-06, "loss": 0.0837, "step": 36420 }, { "epoch": 1.0773644052759213, "grad_norm": 1.0421638488769531, "learning_rate": 3.822374213311762e-06, "loss": 0.0946, "step": 36430 }, { "epoch": 1.0776601407700952, "grad_norm": 0.893754243850708, "learning_rate": 3.822247523392823e-06, "loss": 0.0789, "step": 36440 }, { "epoch": 1.0779558762642691, "grad_norm": 0.9696335196495056, "learning_rate": 3.822120833473883e-06, "loss": 0.0811, "step": 36450 }, { "epoch": 1.0782516117584433, "grad_norm": 1.005748987197876, "learning_rate": 3.821994143554944e-06, "loss": 0.0926, "step": 36460 }, { "epoch": 1.0785473472526173, "grad_norm": 0.9209296703338623, "learning_rate": 3.821867453636004e-06, "loss": 0.0959, "step": 36470 }, { "epoch": 1.0788430827467912, "grad_norm": 0.8642947673797607, "learning_rate": 3.821740763717065e-06, "loss": 0.0881, "step": 36480 }, { "epoch": 1.0791388182409654, "grad_norm": 1.3148245811462402, "learning_rate": 3.821614073798126e-06, "loss": 0.0945, "step": 36490 }, { "epoch": 1.0794345537351393, "grad_norm": 0.7594534754753113, "learning_rate": 3.8214873838791866e-06, "loss": 0.0831, "step": 36500 }, { "epoch": 1.0797302892293132, "grad_norm": 0.9484379887580872, "learning_rate": 3.8213606939602465e-06, "loss": 0.0938, "step": 36510 }, { "epoch": 1.0800260247234874, "grad_norm": 0.9075420498847961, "learning_rate": 3.821234004041307e-06, "loss": 0.0859, "step": 36520 }, { "epoch": 1.0803217602176614, "grad_norm": 0.7219173908233643, "learning_rate": 3.821107314122368e-06, "loss": 0.0741, "step": 36530 }, { "epoch": 1.0806174957118353, "grad_norm": 0.8134803175926208, "learning_rate": 3.820980624203429e-06, "loss": 0.1033, "step": 36540 }, { "epoch": 1.0809132312060092, "grad_norm": 0.6012154221534729, "learning_rate": 3.82085393428449e-06, "loss": 0.0715, "step": 36550 }, { "epoch": 1.0812089667001834, "grad_norm": 0.9937418699264526, "learning_rate": 3.82072724436555e-06, "loss": 0.0735, "step": 36560 }, { "epoch": 1.0815047021943573, "grad_norm": 0.8233491778373718, "learning_rate": 3.82060055444661e-06, "loss": 0.0962, "step": 36570 }, { "epoch": 1.0818004376885313, "grad_norm": 0.9155701994895935, "learning_rate": 3.820473864527671e-06, "loss": 0.0864, "step": 36580 }, { "epoch": 1.0820961731827055, "grad_norm": 0.5687236189842224, "learning_rate": 3.820347174608732e-06, "loss": 0.1008, "step": 36590 }, { "epoch": 1.0823919086768794, "grad_norm": 0.873283326625824, "learning_rate": 3.820220484689792e-06, "loss": 0.0698, "step": 36600 }, { "epoch": 1.0826876441710533, "grad_norm": 0.813779354095459, "learning_rate": 3.820093794770853e-06, "loss": 0.1067, "step": 36610 }, { "epoch": 1.0829833796652275, "grad_norm": 0.8448309302330017, "learning_rate": 3.8199671048519135e-06, "loss": 0.073, "step": 36620 }, { "epoch": 1.0832791151594015, "grad_norm": 1.0405453443527222, "learning_rate": 3.819840414932974e-06, "loss": 0.0875, "step": 36630 }, { "epoch": 1.0835748506535754, "grad_norm": 0.5508952140808105, "learning_rate": 3.819713725014035e-06, "loss": 0.0857, "step": 36640 }, { "epoch": 1.0838705861477496, "grad_norm": 1.1195119619369507, "learning_rate": 3.819587035095096e-06, "loss": 0.0789, "step": 36650 }, { "epoch": 1.0841663216419235, "grad_norm": 1.3967773914337158, "learning_rate": 3.819460345176156e-06, "loss": 0.0816, "step": 36660 }, { "epoch": 1.0844620571360974, "grad_norm": 0.9729121923446655, "learning_rate": 3.819333655257217e-06, "loss": 0.091, "step": 36670 }, { "epoch": 1.0847577926302714, "grad_norm": 1.2352209091186523, "learning_rate": 3.819206965338277e-06, "loss": 0.1085, "step": 36680 }, { "epoch": 1.0850535281244456, "grad_norm": 0.8032063245773315, "learning_rate": 3.819080275419338e-06, "loss": 0.1034, "step": 36690 }, { "epoch": 1.0853492636186195, "grad_norm": 0.9455852508544922, "learning_rate": 3.818953585500399e-06, "loss": 0.1022, "step": 36700 }, { "epoch": 1.0856449991127934, "grad_norm": 0.48820415139198303, "learning_rate": 3.81882689558146e-06, "loss": 0.0576, "step": 36710 }, { "epoch": 1.0859407346069676, "grad_norm": 0.7664909362792969, "learning_rate": 3.81870020566252e-06, "loss": 0.1009, "step": 36720 }, { "epoch": 1.0862364701011415, "grad_norm": 0.7413383722305298, "learning_rate": 3.8185735157435805e-06, "loss": 0.0967, "step": 36730 }, { "epoch": 1.0865322055953155, "grad_norm": 0.8721510767936707, "learning_rate": 3.818446825824641e-06, "loss": 0.0888, "step": 36740 }, { "epoch": 1.0868279410894897, "grad_norm": 0.7136737108230591, "learning_rate": 3.818320135905702e-06, "loss": 0.0814, "step": 36750 }, { "epoch": 1.0871236765836636, "grad_norm": 0.847134530544281, "learning_rate": 3.818193445986762e-06, "loss": 0.0869, "step": 36760 }, { "epoch": 1.0874194120778375, "grad_norm": 0.8807571530342102, "learning_rate": 3.818066756067823e-06, "loss": 0.0815, "step": 36770 }, { "epoch": 1.0877151475720117, "grad_norm": 1.1065874099731445, "learning_rate": 3.817940066148884e-06, "loss": 0.0901, "step": 36780 }, { "epoch": 1.0880108830661857, "grad_norm": 1.7272443771362305, "learning_rate": 3.817813376229944e-06, "loss": 0.107, "step": 36790 }, { "epoch": 1.0883066185603596, "grad_norm": 0.4765593409538269, "learning_rate": 3.817686686311005e-06, "loss": 0.0735, "step": 36800 }, { "epoch": 1.0886023540545335, "grad_norm": 0.7406877279281616, "learning_rate": 3.817559996392066e-06, "loss": 0.0847, "step": 36810 }, { "epoch": 1.0888980895487077, "grad_norm": 1.1641147136688232, "learning_rate": 3.817433306473126e-06, "loss": 0.0824, "step": 36820 }, { "epoch": 1.0891938250428816, "grad_norm": 1.1552475690841675, "learning_rate": 3.817306616554187e-06, "loss": 0.1164, "step": 36830 }, { "epoch": 1.0894895605370556, "grad_norm": 0.8409350514411926, "learning_rate": 3.8171799266352475e-06, "loss": 0.0737, "step": 36840 }, { "epoch": 1.0897852960312298, "grad_norm": 0.8188825249671936, "learning_rate": 3.817053236716308e-06, "loss": 0.0827, "step": 36850 }, { "epoch": 1.0900810315254037, "grad_norm": 1.188090205192566, "learning_rate": 3.816926546797369e-06, "loss": 0.072, "step": 36860 }, { "epoch": 1.0903767670195776, "grad_norm": 1.001729965209961, "learning_rate": 3.81679985687843e-06, "loss": 0.0765, "step": 36870 }, { "epoch": 1.0906725025137516, "grad_norm": 0.7325876951217651, "learning_rate": 3.81667316695949e-06, "loss": 0.1108, "step": 36880 }, { "epoch": 1.0909682380079258, "grad_norm": 0.8413428664207458, "learning_rate": 3.816546477040551e-06, "loss": 0.0804, "step": 36890 }, { "epoch": 1.0912639735020997, "grad_norm": 0.633029043674469, "learning_rate": 3.816419787121611e-06, "loss": 0.0738, "step": 36900 }, { "epoch": 1.0915597089962736, "grad_norm": 0.9977152347564697, "learning_rate": 3.816293097202672e-06, "loss": 0.0596, "step": 36910 }, { "epoch": 1.0918554444904478, "grad_norm": 1.306154489517212, "learning_rate": 3.816166407283732e-06, "loss": 0.0896, "step": 36920 }, { "epoch": 1.0921511799846217, "grad_norm": 0.9846440553665161, "learning_rate": 3.816039717364793e-06, "loss": 0.0904, "step": 36930 }, { "epoch": 1.0924469154787957, "grad_norm": 0.6522229909896851, "learning_rate": 3.815913027445854e-06, "loss": 0.0885, "step": 36940 }, { "epoch": 1.0927426509729699, "grad_norm": 1.5554771423339844, "learning_rate": 3.8157863375269145e-06, "loss": 0.1005, "step": 36950 }, { "epoch": 1.0930383864671438, "grad_norm": 1.0552973747253418, "learning_rate": 3.815659647607975e-06, "loss": 0.0931, "step": 36960 }, { "epoch": 1.0933341219613177, "grad_norm": 0.7967544794082642, "learning_rate": 3.815532957689035e-06, "loss": 0.0971, "step": 36970 }, { "epoch": 1.093629857455492, "grad_norm": 0.9214008450508118, "learning_rate": 3.815406267770096e-06, "loss": 0.0992, "step": 36980 }, { "epoch": 1.0939255929496658, "grad_norm": 0.722139835357666, "learning_rate": 3.815279577851157e-06, "loss": 0.0844, "step": 36990 }, { "epoch": 1.0942213284438398, "grad_norm": 1.0590335130691528, "learning_rate": 3.8151528879322176e-06, "loss": 0.1018, "step": 37000 }, { "epoch": 1.0945170639380137, "grad_norm": 0.9251091480255127, "learning_rate": 3.8150261980132775e-06, "loss": 0.0762, "step": 37010 }, { "epoch": 1.094812799432188, "grad_norm": 1.1330221891403198, "learning_rate": 3.814899508094339e-06, "loss": 0.0852, "step": 37020 }, { "epoch": 1.0951085349263618, "grad_norm": 0.9732328653335571, "learning_rate": 3.814772818175399e-06, "loss": 0.0823, "step": 37030 }, { "epoch": 1.0954042704205358, "grad_norm": 0.9470537304878235, "learning_rate": 3.81464612825646e-06, "loss": 0.1147, "step": 37040 }, { "epoch": 1.09570000591471, "grad_norm": 0.9441283941268921, "learning_rate": 3.8145194383375202e-06, "loss": 0.0926, "step": 37050 }, { "epoch": 1.095995741408884, "grad_norm": 1.4091191291809082, "learning_rate": 3.8143927484185815e-06, "loss": 0.0734, "step": 37060 }, { "epoch": 1.0962914769030578, "grad_norm": 0.89626145362854, "learning_rate": 3.814266058499642e-06, "loss": 0.0899, "step": 37070 }, { "epoch": 1.096587212397232, "grad_norm": 1.2853517532348633, "learning_rate": 3.8141393685807026e-06, "loss": 0.1066, "step": 37080 }, { "epoch": 1.096882947891406, "grad_norm": 1.0685222148895264, "learning_rate": 3.814012678661763e-06, "loss": 0.0802, "step": 37090 }, { "epoch": 1.09717868338558, "grad_norm": 1.0094449520111084, "learning_rate": 3.8138859887428238e-06, "loss": 0.0787, "step": 37100 }, { "epoch": 1.097474418879754, "grad_norm": 0.7330787777900696, "learning_rate": 3.813759298823884e-06, "loss": 0.0745, "step": 37110 }, { "epoch": 1.097770154373928, "grad_norm": 0.6592264771461487, "learning_rate": 3.813632608904945e-06, "loss": 0.0889, "step": 37120 }, { "epoch": 1.098065889868102, "grad_norm": 0.7728214859962463, "learning_rate": 3.8135059189860053e-06, "loss": 0.1036, "step": 37130 }, { "epoch": 1.0983616253622759, "grad_norm": 1.0939857959747314, "learning_rate": 3.8133792290670665e-06, "loss": 0.1005, "step": 37140 }, { "epoch": 1.09865736085645, "grad_norm": 0.7490022778511047, "learning_rate": 3.813252539148127e-06, "loss": 0.0722, "step": 37150 }, { "epoch": 1.098953096350624, "grad_norm": 1.3105677366256714, "learning_rate": 3.8131258492291877e-06, "loss": 0.0802, "step": 37160 }, { "epoch": 1.099248831844798, "grad_norm": 1.5364491939544678, "learning_rate": 3.812999159310248e-06, "loss": 0.0738, "step": 37170 }, { "epoch": 1.099544567338972, "grad_norm": 0.7950206398963928, "learning_rate": 3.812872469391309e-06, "loss": 0.1069, "step": 37180 }, { "epoch": 1.099840302833146, "grad_norm": 1.2401049137115479, "learning_rate": 3.812745779472369e-06, "loss": 0.0843, "step": 37190 }, { "epoch": 1.10013603832732, "grad_norm": 0.7854188680648804, "learning_rate": 3.81261908955343e-06, "loss": 0.0954, "step": 37200 }, { "epoch": 1.1004317738214942, "grad_norm": 0.6864897012710571, "learning_rate": 3.8124923996344903e-06, "loss": 0.0633, "step": 37210 }, { "epoch": 1.100727509315668, "grad_norm": 1.3548831939697266, "learning_rate": 3.8123657097155516e-06, "loss": 0.0928, "step": 37220 }, { "epoch": 1.101023244809842, "grad_norm": 0.6799892783164978, "learning_rate": 3.812239019796612e-06, "loss": 0.0881, "step": 37230 }, { "epoch": 1.1013189803040162, "grad_norm": 1.3433235883712769, "learning_rate": 3.8121123298776727e-06, "loss": 0.0978, "step": 37240 }, { "epoch": 1.1016147157981901, "grad_norm": 1.222965955734253, "learning_rate": 3.811985639958733e-06, "loss": 0.0915, "step": 37250 }, { "epoch": 1.101910451292364, "grad_norm": 0.35209164023399353, "learning_rate": 3.811858950039794e-06, "loss": 0.0653, "step": 37260 }, { "epoch": 1.102206186786538, "grad_norm": 1.2242276668548584, "learning_rate": 3.8117322601208542e-06, "loss": 0.089, "step": 37270 }, { "epoch": 1.1025019222807122, "grad_norm": 0.8507435917854309, "learning_rate": 3.811605570201915e-06, "loss": 0.0918, "step": 37280 }, { "epoch": 1.1027976577748861, "grad_norm": 1.016122579574585, "learning_rate": 3.8114788802829754e-06, "loss": 0.0865, "step": 37290 }, { "epoch": 1.10309339326906, "grad_norm": 0.6451500654220581, "learning_rate": 3.8113521903640366e-06, "loss": 0.0775, "step": 37300 }, { "epoch": 1.1033891287632343, "grad_norm": 0.97666996717453, "learning_rate": 3.811225500445097e-06, "loss": 0.0828, "step": 37310 }, { "epoch": 1.1036848642574082, "grad_norm": 1.1365678310394287, "learning_rate": 3.8110988105261578e-06, "loss": 0.0802, "step": 37320 }, { "epoch": 1.1039805997515821, "grad_norm": 1.2243098020553589, "learning_rate": 3.810972120607218e-06, "loss": 0.0884, "step": 37330 }, { "epoch": 1.104276335245756, "grad_norm": 1.4042572975158691, "learning_rate": 3.8108454306882785e-06, "loss": 0.1043, "step": 37340 }, { "epoch": 1.1045720707399302, "grad_norm": 0.7616188526153564, "learning_rate": 3.8107187407693393e-06, "loss": 0.0874, "step": 37350 }, { "epoch": 1.1048678062341042, "grad_norm": 1.5549286603927612, "learning_rate": 3.8105920508503996e-06, "loss": 0.0838, "step": 37360 }, { "epoch": 1.1051635417282781, "grad_norm": 1.1842336654663086, "learning_rate": 3.8104653609314604e-06, "loss": 0.0877, "step": 37370 }, { "epoch": 1.1054592772224523, "grad_norm": 0.5631547570228577, "learning_rate": 3.8103386710125212e-06, "loss": 0.0681, "step": 37380 }, { "epoch": 1.1057550127166262, "grad_norm": 1.0746885538101196, "learning_rate": 3.810211981093582e-06, "loss": 0.1006, "step": 37390 }, { "epoch": 1.1060507482108002, "grad_norm": 0.8335316777229309, "learning_rate": 3.8100852911746424e-06, "loss": 0.0856, "step": 37400 }, { "epoch": 1.1063464837049743, "grad_norm": 1.271690011024475, "learning_rate": 3.809958601255703e-06, "loss": 0.0666, "step": 37410 }, { "epoch": 1.1066422191991483, "grad_norm": 1.5070453882217407, "learning_rate": 3.8098319113367635e-06, "loss": 0.0919, "step": 37420 }, { "epoch": 1.1069379546933222, "grad_norm": 0.5396719574928284, "learning_rate": 3.8097052214178243e-06, "loss": 0.075, "step": 37430 }, { "epoch": 1.1072336901874964, "grad_norm": 0.9930130243301392, "learning_rate": 3.8095785314988847e-06, "loss": 0.0887, "step": 37440 }, { "epoch": 1.1075294256816703, "grad_norm": 1.676056146621704, "learning_rate": 3.8094518415799455e-06, "loss": 0.0803, "step": 37450 }, { "epoch": 1.1078251611758443, "grad_norm": 0.745854377746582, "learning_rate": 3.8093251516610063e-06, "loss": 0.0848, "step": 37460 }, { "epoch": 1.1081208966700182, "grad_norm": 1.1969531774520874, "learning_rate": 3.809198461742067e-06, "loss": 0.0757, "step": 37470 }, { "epoch": 1.1084166321641924, "grad_norm": 0.8933366537094116, "learning_rate": 3.8090717718231274e-06, "loss": 0.0937, "step": 37480 }, { "epoch": 1.1087123676583663, "grad_norm": 0.7120084762573242, "learning_rate": 3.808945081904188e-06, "loss": 0.0944, "step": 37490 }, { "epoch": 1.1090081031525403, "grad_norm": 0.8842336535453796, "learning_rate": 3.8088183919852486e-06, "loss": 0.0915, "step": 37500 }, { "epoch": 1.1093038386467144, "grad_norm": 0.6358911395072937, "learning_rate": 3.8086917020663094e-06, "loss": 0.0566, "step": 37510 }, { "epoch": 1.1095995741408884, "grad_norm": 1.3384801149368286, "learning_rate": 3.8085650121473697e-06, "loss": 0.0922, "step": 37520 }, { "epoch": 1.1098953096350623, "grad_norm": 0.831244707107544, "learning_rate": 3.8084383222284305e-06, "loss": 0.1183, "step": 37530 }, { "epoch": 1.1101910451292365, "grad_norm": 0.7214848399162292, "learning_rate": 3.8083116323094913e-06, "loss": 0.0831, "step": 37540 }, { "epoch": 1.1104867806234104, "grad_norm": 1.1560875177383423, "learning_rate": 3.808184942390552e-06, "loss": 0.0873, "step": 37550 }, { "epoch": 1.1107825161175844, "grad_norm": 1.0057750940322876, "learning_rate": 3.8080582524716125e-06, "loss": 0.0741, "step": 37560 }, { "epoch": 1.1110782516117585, "grad_norm": 1.048683524131775, "learning_rate": 3.8079315625526733e-06, "loss": 0.0844, "step": 37570 }, { "epoch": 1.1113739871059325, "grad_norm": 0.7952044606208801, "learning_rate": 3.8078048726337336e-06, "loss": 0.0957, "step": 37580 }, { "epoch": 1.1116697226001064, "grad_norm": 0.7433246374130249, "learning_rate": 3.8076781827147944e-06, "loss": 0.0858, "step": 37590 }, { "epoch": 1.1119654580942804, "grad_norm": 0.8579266667366028, "learning_rate": 3.8075514927958548e-06, "loss": 0.0872, "step": 37600 }, { "epoch": 1.1122611935884545, "grad_norm": 0.9947556853294373, "learning_rate": 3.8074248028769156e-06, "loss": 0.0819, "step": 37610 }, { "epoch": 1.1125569290826285, "grad_norm": 1.4094003438949585, "learning_rate": 3.8072981129579764e-06, "loss": 0.0883, "step": 37620 }, { "epoch": 1.1128526645768024, "grad_norm": 0.6735395789146423, "learning_rate": 3.807171423039037e-06, "loss": 0.1075, "step": 37630 }, { "epoch": 1.1131484000709766, "grad_norm": 0.9604397416114807, "learning_rate": 3.8070447331200975e-06, "loss": 0.1012, "step": 37640 }, { "epoch": 1.1134441355651505, "grad_norm": 1.1062971353530884, "learning_rate": 3.8069180432011583e-06, "loss": 0.0748, "step": 37650 }, { "epoch": 1.1137398710593245, "grad_norm": 0.9630351066589355, "learning_rate": 3.8067913532822187e-06, "loss": 0.0792, "step": 37660 }, { "epoch": 1.1140356065534986, "grad_norm": 1.1424026489257812, "learning_rate": 3.8066646633632795e-06, "loss": 0.0853, "step": 37670 }, { "epoch": 1.1143313420476726, "grad_norm": 0.8380874991416931, "learning_rate": 3.80653797344434e-06, "loss": 0.1047, "step": 37680 }, { "epoch": 1.1146270775418465, "grad_norm": 1.2229132652282715, "learning_rate": 3.8064112835254006e-06, "loss": 0.0845, "step": 37690 }, { "epoch": 1.1149228130360207, "grad_norm": 0.9199686050415039, "learning_rate": 3.8062845936064614e-06, "loss": 0.0745, "step": 37700 }, { "epoch": 1.1152185485301946, "grad_norm": 0.6569526791572571, "learning_rate": 3.806157903687522e-06, "loss": 0.0756, "step": 37710 }, { "epoch": 1.1155142840243686, "grad_norm": 0.5222189426422119, "learning_rate": 3.8060312137685826e-06, "loss": 0.0945, "step": 37720 }, { "epoch": 1.1158100195185425, "grad_norm": 1.2903326749801636, "learning_rate": 3.8059045238496433e-06, "loss": 0.0885, "step": 37730 }, { "epoch": 1.1161057550127167, "grad_norm": 0.6736968159675598, "learning_rate": 3.8057778339307037e-06, "loss": 0.0756, "step": 37740 }, { "epoch": 1.1164014905068906, "grad_norm": 1.2775728702545166, "learning_rate": 3.805651144011764e-06, "loss": 0.0859, "step": 37750 }, { "epoch": 1.1166972260010646, "grad_norm": 1.2124030590057373, "learning_rate": 3.805524454092825e-06, "loss": 0.0674, "step": 37760 }, { "epoch": 1.1169929614952387, "grad_norm": 1.6355124711990356, "learning_rate": 3.8053977641738852e-06, "loss": 0.0837, "step": 37770 }, { "epoch": 1.1172886969894127, "grad_norm": 1.1145398616790771, "learning_rate": 3.8052710742549464e-06, "loss": 0.0998, "step": 37780 }, { "epoch": 1.1175844324835866, "grad_norm": 0.9235697388648987, "learning_rate": 3.805144384336007e-06, "loss": 0.0827, "step": 37790 }, { "epoch": 1.1178801679777606, "grad_norm": 1.1083340644836426, "learning_rate": 3.8050176944170676e-06, "loss": 0.0925, "step": 37800 }, { "epoch": 1.1181759034719347, "grad_norm": 0.5998646020889282, "learning_rate": 3.804891004498128e-06, "loss": 0.0735, "step": 37810 }, { "epoch": 1.1184716389661087, "grad_norm": 1.0531213283538818, "learning_rate": 3.8047643145791888e-06, "loss": 0.078, "step": 37820 }, { "epoch": 1.1187673744602826, "grad_norm": 1.1022192239761353, "learning_rate": 3.804637624660249e-06, "loss": 0.1, "step": 37830 }, { "epoch": 1.1190631099544568, "grad_norm": 1.1768758296966553, "learning_rate": 3.80451093474131e-06, "loss": 0.0884, "step": 37840 }, { "epoch": 1.1193588454486307, "grad_norm": 1.0876543521881104, "learning_rate": 3.8043842448223703e-06, "loss": 0.0859, "step": 37850 }, { "epoch": 1.1196545809428047, "grad_norm": 0.5666180849075317, "learning_rate": 3.8042575549034315e-06, "loss": 0.0919, "step": 37860 }, { "epoch": 1.1199503164369788, "grad_norm": 0.8166902661323547, "learning_rate": 3.804130864984492e-06, "loss": 0.0686, "step": 37870 }, { "epoch": 1.1202460519311528, "grad_norm": 0.9380121231079102, "learning_rate": 3.8040041750655526e-06, "loss": 0.0873, "step": 37880 }, { "epoch": 1.1205417874253267, "grad_norm": 0.9037344455718994, "learning_rate": 3.803877485146613e-06, "loss": 0.091, "step": 37890 }, { "epoch": 1.120837522919501, "grad_norm": 0.870993435382843, "learning_rate": 3.803750795227674e-06, "loss": 0.089, "step": 37900 }, { "epoch": 1.1211332584136748, "grad_norm": 1.026026964187622, "learning_rate": 3.803624105308734e-06, "loss": 0.0796, "step": 37910 }, { "epoch": 1.1214289939078488, "grad_norm": 1.0117535591125488, "learning_rate": 3.803497415389795e-06, "loss": 0.0986, "step": 37920 }, { "epoch": 1.1217247294020227, "grad_norm": 1.0999575853347778, "learning_rate": 3.8033707254708553e-06, "loss": 0.0737, "step": 37930 }, { "epoch": 1.122020464896197, "grad_norm": 0.8756876587867737, "learning_rate": 3.8032440355519165e-06, "loss": 0.0892, "step": 37940 }, { "epoch": 1.1223162003903708, "grad_norm": 0.6203020811080933, "learning_rate": 3.803117345632977e-06, "loss": 0.0857, "step": 37950 }, { "epoch": 1.1226119358845448, "grad_norm": 0.8514872789382935, "learning_rate": 3.8029906557140377e-06, "loss": 0.0794, "step": 37960 }, { "epoch": 1.122907671378719, "grad_norm": 1.1557047367095947, "learning_rate": 3.802863965795098e-06, "loss": 0.0984, "step": 37970 }, { "epoch": 1.1232034068728929, "grad_norm": 1.6117757558822632, "learning_rate": 3.802737275876159e-06, "loss": 0.0975, "step": 37980 }, { "epoch": 1.1234991423670668, "grad_norm": 0.8823937773704529, "learning_rate": 3.8026105859572192e-06, "loss": 0.0879, "step": 37990 }, { "epoch": 1.123794877861241, "grad_norm": 0.7551530599594116, "learning_rate": 3.80248389603828e-06, "loss": 0.0906, "step": 38000 }, { "epoch": 1.124090613355415, "grad_norm": 1.3097922801971436, "learning_rate": 3.8023572061193404e-06, "loss": 0.0669, "step": 38010 }, { "epoch": 1.1243863488495889, "grad_norm": 1.0349310636520386, "learning_rate": 3.8022305162004016e-06, "loss": 0.0951, "step": 38020 }, { "epoch": 1.124682084343763, "grad_norm": 1.3155637979507446, "learning_rate": 3.802103826281462e-06, "loss": 0.1079, "step": 38030 }, { "epoch": 1.124977819837937, "grad_norm": 1.232192039489746, "learning_rate": 3.8019771363625227e-06, "loss": 0.0748, "step": 38040 }, { "epoch": 1.125273555332111, "grad_norm": 0.7101234197616577, "learning_rate": 3.801850446443583e-06, "loss": 0.0919, "step": 38050 }, { "epoch": 1.1255692908262849, "grad_norm": 1.0355970859527588, "learning_rate": 3.801723756524644e-06, "loss": 0.0752, "step": 38060 }, { "epoch": 1.125865026320459, "grad_norm": 0.7525581121444702, "learning_rate": 3.8015970666057043e-06, "loss": 0.0663, "step": 38070 }, { "epoch": 1.126160761814633, "grad_norm": 1.160431981086731, "learning_rate": 3.801470376686765e-06, "loss": 0.1014, "step": 38080 }, { "epoch": 1.126456497308807, "grad_norm": 0.8095492124557495, "learning_rate": 3.8013436867678254e-06, "loss": 0.1013, "step": 38090 }, { "epoch": 1.126752232802981, "grad_norm": 1.3842906951904297, "learning_rate": 3.8012169968488866e-06, "loss": 0.0964, "step": 38100 }, { "epoch": 1.127047968297155, "grad_norm": 0.9958745241165161, "learning_rate": 3.801090306929947e-06, "loss": 0.0797, "step": 38110 }, { "epoch": 1.127343703791329, "grad_norm": 1.2823034524917603, "learning_rate": 3.8009636170110078e-06, "loss": 0.0945, "step": 38120 }, { "epoch": 1.1276394392855031, "grad_norm": 1.2981833219528198, "learning_rate": 3.800836927092068e-06, "loss": 0.0971, "step": 38130 }, { "epoch": 1.127935174779677, "grad_norm": 1.3184056282043457, "learning_rate": 3.800710237173129e-06, "loss": 0.0758, "step": 38140 }, { "epoch": 1.128230910273851, "grad_norm": 0.3351867198944092, "learning_rate": 3.8005835472541893e-06, "loss": 0.0782, "step": 38150 }, { "epoch": 1.1285266457680252, "grad_norm": 0.868144154548645, "learning_rate": 3.80045685733525e-06, "loss": 0.0753, "step": 38160 }, { "epoch": 1.1288223812621991, "grad_norm": 0.8805364370346069, "learning_rate": 3.8003301674163105e-06, "loss": 0.0784, "step": 38170 }, { "epoch": 1.129118116756373, "grad_norm": 0.9241329431533813, "learning_rate": 3.8002034774973713e-06, "loss": 0.0937, "step": 38180 }, { "epoch": 1.129413852250547, "grad_norm": 0.8883780837059021, "learning_rate": 3.800076787578432e-06, "loss": 0.0812, "step": 38190 }, { "epoch": 1.1297095877447212, "grad_norm": 0.7400480508804321, "learning_rate": 3.7999500976594924e-06, "loss": 0.0663, "step": 38200 }, { "epoch": 1.1300053232388951, "grad_norm": 0.965704619884491, "learning_rate": 3.799823407740553e-06, "loss": 0.0724, "step": 38210 }, { "epoch": 1.130301058733069, "grad_norm": 0.8163557052612305, "learning_rate": 3.7996967178216136e-06, "loss": 0.0789, "step": 38220 }, { "epoch": 1.1305967942272432, "grad_norm": 0.6334064602851868, "learning_rate": 3.7995700279026744e-06, "loss": 0.0903, "step": 38230 }, { "epoch": 1.1308925297214172, "grad_norm": 1.014549970626831, "learning_rate": 3.7994433379837347e-06, "loss": 0.0785, "step": 38240 }, { "epoch": 1.1311882652155911, "grad_norm": 1.4102089405059814, "learning_rate": 3.7993166480647955e-06, "loss": 0.0672, "step": 38250 }, { "epoch": 1.131484000709765, "grad_norm": 0.7206421494483948, "learning_rate": 3.7991899581458563e-06, "loss": 0.0812, "step": 38260 }, { "epoch": 1.1317797362039392, "grad_norm": 1.4387738704681396, "learning_rate": 3.799063268226917e-06, "loss": 0.0837, "step": 38270 }, { "epoch": 1.1320754716981132, "grad_norm": 0.905573308467865, "learning_rate": 3.7989365783079775e-06, "loss": 0.0938, "step": 38280 }, { "epoch": 1.1323712071922871, "grad_norm": 1.0952839851379395, "learning_rate": 3.7988098883890382e-06, "loss": 0.0929, "step": 38290 }, { "epoch": 1.1326669426864613, "grad_norm": 1.1314984560012817, "learning_rate": 3.7986831984700986e-06, "loss": 0.0735, "step": 38300 }, { "epoch": 1.1329626781806352, "grad_norm": 0.8126928210258484, "learning_rate": 3.7985565085511594e-06, "loss": 0.0863, "step": 38310 }, { "epoch": 1.1332584136748092, "grad_norm": 0.8814185857772827, "learning_rate": 3.7984298186322198e-06, "loss": 0.081, "step": 38320 }, { "epoch": 1.1335541491689833, "grad_norm": 0.8936780691146851, "learning_rate": 3.7983031287132806e-06, "loss": 0.0842, "step": 38330 }, { "epoch": 1.1338498846631573, "grad_norm": 0.7618563175201416, "learning_rate": 3.7981764387943413e-06, "loss": 0.0878, "step": 38340 }, { "epoch": 1.1341456201573312, "grad_norm": 0.50667804479599, "learning_rate": 3.798049748875402e-06, "loss": 0.0771, "step": 38350 }, { "epoch": 1.1344413556515054, "grad_norm": 0.6683253645896912, "learning_rate": 3.7979230589564625e-06, "loss": 0.0553, "step": 38360 }, { "epoch": 1.1347370911456793, "grad_norm": 0.960074782371521, "learning_rate": 3.7977963690375233e-06, "loss": 0.1028, "step": 38370 }, { "epoch": 1.1350328266398533, "grad_norm": 0.9135516881942749, "learning_rate": 3.7976696791185837e-06, "loss": 0.0977, "step": 38380 }, { "epoch": 1.1353285621340272, "grad_norm": 1.093906044960022, "learning_rate": 3.7975429891996444e-06, "loss": 0.0875, "step": 38390 }, { "epoch": 1.1356242976282014, "grad_norm": 0.836508572101593, "learning_rate": 3.797416299280705e-06, "loss": 0.0832, "step": 38400 }, { "epoch": 1.1359200331223753, "grad_norm": 1.024651288986206, "learning_rate": 3.7972896093617656e-06, "loss": 0.0736, "step": 38410 }, { "epoch": 1.1362157686165493, "grad_norm": 1.3333070278167725, "learning_rate": 3.7971629194428264e-06, "loss": 0.0809, "step": 38420 }, { "epoch": 1.1365115041107234, "grad_norm": 0.6557658314704895, "learning_rate": 3.797036229523887e-06, "loss": 0.0853, "step": 38430 }, { "epoch": 1.1368072396048974, "grad_norm": 1.1486973762512207, "learning_rate": 3.7969095396049475e-06, "loss": 0.0961, "step": 38440 }, { "epoch": 1.1371029750990713, "grad_norm": 0.7411247491836548, "learning_rate": 3.7967828496860083e-06, "loss": 0.0772, "step": 38450 }, { "epoch": 1.1373987105932455, "grad_norm": 1.7994754314422607, "learning_rate": 3.7966561597670687e-06, "loss": 0.088, "step": 38460 }, { "epoch": 1.1376944460874194, "grad_norm": 1.404000997543335, "learning_rate": 3.7965294698481295e-06, "loss": 0.101, "step": 38470 }, { "epoch": 1.1379901815815934, "grad_norm": 1.0706795454025269, "learning_rate": 3.79640277992919e-06, "loss": 0.0927, "step": 38480 }, { "epoch": 1.1382859170757675, "grad_norm": 1.009475588798523, "learning_rate": 3.7962760900102506e-06, "loss": 0.1014, "step": 38490 }, { "epoch": 1.1385816525699415, "grad_norm": 0.7621009349822998, "learning_rate": 3.7961494000913114e-06, "loss": 0.0894, "step": 38500 }, { "epoch": 1.1388773880641154, "grad_norm": 0.9532961845397949, "learning_rate": 3.7960227101723722e-06, "loss": 0.0679, "step": 38510 }, { "epoch": 1.1391731235582894, "grad_norm": 1.1521953344345093, "learning_rate": 3.7958960202534326e-06, "loss": 0.1101, "step": 38520 }, { "epoch": 1.1394688590524635, "grad_norm": 1.1345793008804321, "learning_rate": 3.7957693303344934e-06, "loss": 0.1132, "step": 38530 }, { "epoch": 1.1397645945466375, "grad_norm": 0.8771858811378479, "learning_rate": 3.7956426404155537e-06, "loss": 0.0968, "step": 38540 }, { "epoch": 1.1400603300408114, "grad_norm": 1.0207452774047852, "learning_rate": 3.7955159504966145e-06, "loss": 0.0836, "step": 38550 }, { "epoch": 1.1403560655349856, "grad_norm": 0.7596648931503296, "learning_rate": 3.795389260577675e-06, "loss": 0.0617, "step": 38560 }, { "epoch": 1.1406518010291595, "grad_norm": 1.1303573846817017, "learning_rate": 3.7952625706587357e-06, "loss": 0.0904, "step": 38570 }, { "epoch": 1.1409475365233335, "grad_norm": 1.0805177688598633, "learning_rate": 3.7951358807397965e-06, "loss": 0.0772, "step": 38580 }, { "epoch": 1.1412432720175076, "grad_norm": 0.8149809241294861, "learning_rate": 3.795009190820857e-06, "loss": 0.0895, "step": 38590 }, { "epoch": 1.1415390075116816, "grad_norm": 1.1619057655334473, "learning_rate": 3.7948825009019176e-06, "loss": 0.0887, "step": 38600 }, { "epoch": 1.1418347430058555, "grad_norm": 1.104313850402832, "learning_rate": 3.794755810982978e-06, "loss": 0.0847, "step": 38610 }, { "epoch": 1.1421304785000297, "grad_norm": 0.7030765414237976, "learning_rate": 3.7946291210640388e-06, "loss": 0.0791, "step": 38620 }, { "epoch": 1.1424262139942036, "grad_norm": 1.0153226852416992, "learning_rate": 3.794502431145099e-06, "loss": 0.1153, "step": 38630 }, { "epoch": 1.1427219494883776, "grad_norm": 0.7698953747749329, "learning_rate": 3.79437574122616e-06, "loss": 0.1015, "step": 38640 }, { "epoch": 1.1430176849825515, "grad_norm": 0.9995932579040527, "learning_rate": 3.7942490513072203e-06, "loss": 0.0653, "step": 38650 }, { "epoch": 1.1433134204767257, "grad_norm": 1.049709439277649, "learning_rate": 3.7941223613882815e-06, "loss": 0.076, "step": 38660 }, { "epoch": 1.1436091559708996, "grad_norm": 1.6006439924240112, "learning_rate": 3.793995671469342e-06, "loss": 0.1103, "step": 38670 }, { "epoch": 1.1439048914650736, "grad_norm": 1.0747376680374146, "learning_rate": 3.7938689815504027e-06, "loss": 0.0947, "step": 38680 }, { "epoch": 1.1442006269592477, "grad_norm": 1.1213854551315308, "learning_rate": 3.793742291631463e-06, "loss": 0.0906, "step": 38690 }, { "epoch": 1.1444963624534217, "grad_norm": 0.7378377318382263, "learning_rate": 3.793615601712524e-06, "loss": 0.0817, "step": 38700 }, { "epoch": 1.1447920979475956, "grad_norm": 0.5928450226783752, "learning_rate": 3.793488911793584e-06, "loss": 0.0654, "step": 38710 }, { "epoch": 1.1450878334417696, "grad_norm": 0.8763554096221924, "learning_rate": 3.793362221874645e-06, "loss": 0.0875, "step": 38720 }, { "epoch": 1.1453835689359437, "grad_norm": 1.0500304698944092, "learning_rate": 3.7932355319557054e-06, "loss": 0.0862, "step": 38730 }, { "epoch": 1.1456793044301177, "grad_norm": 1.0702987909317017, "learning_rate": 3.7931088420367666e-06, "loss": 0.0766, "step": 38740 }, { "epoch": 1.1459750399242918, "grad_norm": 0.6566046476364136, "learning_rate": 3.792982152117827e-06, "loss": 0.0739, "step": 38750 }, { "epoch": 1.1462707754184658, "grad_norm": 1.19586181640625, "learning_rate": 3.7928554621988877e-06, "loss": 0.0855, "step": 38760 }, { "epoch": 1.1465665109126397, "grad_norm": 0.6917278170585632, "learning_rate": 3.792728772279948e-06, "loss": 0.0939, "step": 38770 }, { "epoch": 1.1468622464068137, "grad_norm": 0.8787504434585571, "learning_rate": 3.792602082361009e-06, "loss": 0.0883, "step": 38780 }, { "epoch": 1.1471579819009878, "grad_norm": 1.0181633234024048, "learning_rate": 3.7924753924420692e-06, "loss": 0.0906, "step": 38790 }, { "epoch": 1.1474537173951618, "grad_norm": 1.135150671005249, "learning_rate": 3.79234870252313e-06, "loss": 0.0808, "step": 38800 }, { "epoch": 1.1477494528893357, "grad_norm": 1.4856150150299072, "learning_rate": 3.7922220126041904e-06, "loss": 0.0781, "step": 38810 }, { "epoch": 1.1480451883835099, "grad_norm": 1.0902622938156128, "learning_rate": 3.7920953226852516e-06, "loss": 0.1041, "step": 38820 }, { "epoch": 1.1483409238776838, "grad_norm": 1.0374878644943237, "learning_rate": 3.791968632766312e-06, "loss": 0.0936, "step": 38830 }, { "epoch": 1.1486366593718578, "grad_norm": 1.0660957098007202, "learning_rate": 3.7918419428473728e-06, "loss": 0.0999, "step": 38840 }, { "epoch": 1.1489323948660317, "grad_norm": 0.6759591698646545, "learning_rate": 3.791715252928433e-06, "loss": 0.0755, "step": 38850 }, { "epoch": 1.1492281303602059, "grad_norm": 1.1223680973052979, "learning_rate": 3.791588563009494e-06, "loss": 0.0669, "step": 38860 }, { "epoch": 1.1495238658543798, "grad_norm": 0.7099519371986389, "learning_rate": 3.7914618730905543e-06, "loss": 0.0759, "step": 38870 }, { "epoch": 1.1498196013485538, "grad_norm": 1.1419063806533813, "learning_rate": 3.791335183171615e-06, "loss": 0.0862, "step": 38880 }, { "epoch": 1.150115336842728, "grad_norm": 1.0938197374343872, "learning_rate": 3.7912084932526754e-06, "loss": 0.0884, "step": 38890 }, { "epoch": 1.1504110723369019, "grad_norm": 1.967569351196289, "learning_rate": 3.7910818033337367e-06, "loss": 0.0892, "step": 38900 }, { "epoch": 1.1507068078310758, "grad_norm": 1.0213228464126587, "learning_rate": 3.790955113414797e-06, "loss": 0.0737, "step": 38910 }, { "epoch": 1.15100254332525, "grad_norm": 1.5035964250564575, "learning_rate": 3.790828423495858e-06, "loss": 0.0927, "step": 38920 }, { "epoch": 1.151298278819424, "grad_norm": 0.7927942872047424, "learning_rate": 3.790701733576918e-06, "loss": 0.0785, "step": 38930 }, { "epoch": 1.1515940143135979, "grad_norm": 0.7750995755195618, "learning_rate": 3.790575043657979e-06, "loss": 0.0745, "step": 38940 }, { "epoch": 1.151889749807772, "grad_norm": 1.9631049633026123, "learning_rate": 3.7904483537390393e-06, "loss": 0.0798, "step": 38950 }, { "epoch": 1.152185485301946, "grad_norm": 0.6577039361000061, "learning_rate": 3.7903216638201e-06, "loss": 0.0721, "step": 38960 }, { "epoch": 1.15248122079612, "grad_norm": 0.9026480913162231, "learning_rate": 3.7901949739011605e-06, "loss": 0.099, "step": 38970 }, { "epoch": 1.1527769562902939, "grad_norm": 0.5955251455307007, "learning_rate": 3.7900682839822217e-06, "loss": 0.072, "step": 38980 }, { "epoch": 1.153072691784468, "grad_norm": 1.1974601745605469, "learning_rate": 3.789941594063282e-06, "loss": 0.0861, "step": 38990 }, { "epoch": 1.153368427278642, "grad_norm": 0.9481181502342224, "learning_rate": 3.7898149041443424e-06, "loss": 0.0895, "step": 39000 }, { "epoch": 1.153664162772816, "grad_norm": 0.6302220225334167, "learning_rate": 3.7896882142254032e-06, "loss": 0.0737, "step": 39010 }, { "epoch": 1.15395989826699, "grad_norm": 1.0428537130355835, "learning_rate": 3.7895615243064636e-06, "loss": 0.0739, "step": 39020 }, { "epoch": 1.154255633761164, "grad_norm": 0.6996426582336426, "learning_rate": 3.7894348343875244e-06, "loss": 0.0724, "step": 39030 }, { "epoch": 1.154551369255338, "grad_norm": 1.5070616006851196, "learning_rate": 3.7893081444685847e-06, "loss": 0.0928, "step": 39040 }, { "epoch": 1.1548471047495121, "grad_norm": 0.8842436671257019, "learning_rate": 3.7891814545496455e-06, "loss": 0.0839, "step": 39050 }, { "epoch": 1.155142840243686, "grad_norm": 0.8056800365447998, "learning_rate": 3.7890547646307063e-06, "loss": 0.0749, "step": 39060 }, { "epoch": 1.15543857573786, "grad_norm": 1.346543312072754, "learning_rate": 3.788928074711767e-06, "loss": 0.0806, "step": 39070 }, { "epoch": 1.1557343112320342, "grad_norm": 1.0352447032928467, "learning_rate": 3.7888013847928275e-06, "loss": 0.0901, "step": 39080 }, { "epoch": 1.1560300467262081, "grad_norm": 1.4027714729309082, "learning_rate": 3.7886746948738883e-06, "loss": 0.0875, "step": 39090 }, { "epoch": 1.156325782220382, "grad_norm": 1.2910804748535156, "learning_rate": 3.7885480049549486e-06, "loss": 0.0805, "step": 39100 }, { "epoch": 1.156621517714556, "grad_norm": 0.6325640678405762, "learning_rate": 3.7884213150360094e-06, "loss": 0.069, "step": 39110 }, { "epoch": 1.1569172532087302, "grad_norm": 1.0757672786712646, "learning_rate": 3.78829462511707e-06, "loss": 0.0771, "step": 39120 }, { "epoch": 1.1572129887029041, "grad_norm": 0.7347498536109924, "learning_rate": 3.7881679351981306e-06, "loss": 0.0804, "step": 39130 }, { "epoch": 1.157508724197078, "grad_norm": 0.8754751682281494, "learning_rate": 3.7880412452791914e-06, "loss": 0.0968, "step": 39140 }, { "epoch": 1.1578044596912522, "grad_norm": 0.886823296546936, "learning_rate": 3.787914555360252e-06, "loss": 0.0919, "step": 39150 }, { "epoch": 1.1581001951854262, "grad_norm": 1.04085373878479, "learning_rate": 3.7877878654413125e-06, "loss": 0.0765, "step": 39160 }, { "epoch": 1.1583959306796001, "grad_norm": 1.0158854722976685, "learning_rate": 3.7876611755223733e-06, "loss": 0.0899, "step": 39170 }, { "epoch": 1.158691666173774, "grad_norm": 0.8321571946144104, "learning_rate": 3.7875344856034337e-06, "loss": 0.091, "step": 39180 }, { "epoch": 1.1589874016679482, "grad_norm": 0.9404139518737793, "learning_rate": 3.7874077956844945e-06, "loss": 0.0849, "step": 39190 }, { "epoch": 1.1592831371621222, "grad_norm": 0.818886935710907, "learning_rate": 3.787281105765555e-06, "loss": 0.086, "step": 39200 }, { "epoch": 1.1595788726562963, "grad_norm": 1.1708393096923828, "learning_rate": 3.7871544158466156e-06, "loss": 0.0892, "step": 39210 }, { "epoch": 1.1598746081504703, "grad_norm": 1.0589077472686768, "learning_rate": 3.7870277259276764e-06, "loss": 0.078, "step": 39220 }, { "epoch": 1.1601703436446442, "grad_norm": 0.7618764638900757, "learning_rate": 3.786901036008737e-06, "loss": 0.0931, "step": 39230 }, { "epoch": 1.1604660791388182, "grad_norm": 0.7141079306602478, "learning_rate": 3.7867743460897976e-06, "loss": 0.0724, "step": 39240 }, { "epoch": 1.1607618146329923, "grad_norm": 1.0117381811141968, "learning_rate": 3.7866476561708584e-06, "loss": 0.0775, "step": 39250 }, { "epoch": 1.1610575501271663, "grad_norm": 0.5892488360404968, "learning_rate": 3.7865209662519187e-06, "loss": 0.0616, "step": 39260 }, { "epoch": 1.1613532856213402, "grad_norm": 1.1827785968780518, "learning_rate": 3.7863942763329795e-06, "loss": 0.1104, "step": 39270 }, { "epoch": 1.1616490211155144, "grad_norm": 1.0858584642410278, "learning_rate": 3.78626758641404e-06, "loss": 0.1061, "step": 39280 }, { "epoch": 1.1619447566096883, "grad_norm": 1.0356491804122925, "learning_rate": 3.7861408964951007e-06, "loss": 0.1033, "step": 39290 }, { "epoch": 1.1622404921038623, "grad_norm": 0.6841399073600769, "learning_rate": 3.7860142065761615e-06, "loss": 0.0688, "step": 39300 }, { "epoch": 1.1625362275980362, "grad_norm": 0.3205001652240753, "learning_rate": 3.7858875166572223e-06, "loss": 0.07, "step": 39310 }, { "epoch": 1.1628319630922104, "grad_norm": 0.9096465706825256, "learning_rate": 3.7857608267382826e-06, "loss": 0.0939, "step": 39320 }, { "epoch": 1.1631276985863843, "grad_norm": 0.8341665863990784, "learning_rate": 3.7856341368193434e-06, "loss": 0.0974, "step": 39330 }, { "epoch": 1.1634234340805583, "grad_norm": 0.7676963210105896, "learning_rate": 3.7855074469004038e-06, "loss": 0.0727, "step": 39340 }, { "epoch": 1.1637191695747324, "grad_norm": 1.123712420463562, "learning_rate": 3.7853807569814646e-06, "loss": 0.0927, "step": 39350 }, { "epoch": 1.1640149050689064, "grad_norm": 1.1364926099777222, "learning_rate": 3.785254067062525e-06, "loss": 0.0731, "step": 39360 }, { "epoch": 1.1643106405630803, "grad_norm": 0.8467269539833069, "learning_rate": 3.7851273771435857e-06, "loss": 0.0825, "step": 39370 }, { "epoch": 1.1646063760572545, "grad_norm": 1.139435052871704, "learning_rate": 3.7850006872246465e-06, "loss": 0.0925, "step": 39380 }, { "epoch": 1.1649021115514284, "grad_norm": 1.184839129447937, "learning_rate": 3.7848739973057073e-06, "loss": 0.0878, "step": 39390 }, { "epoch": 1.1651978470456024, "grad_norm": 0.913263738155365, "learning_rate": 3.7847473073867677e-06, "loss": 0.0952, "step": 39400 }, { "epoch": 1.1654935825397765, "grad_norm": 0.6902431845664978, "learning_rate": 3.784620617467828e-06, "loss": 0.0593, "step": 39410 }, { "epoch": 1.1657893180339505, "grad_norm": 0.8087202310562134, "learning_rate": 3.784493927548889e-06, "loss": 0.0972, "step": 39420 }, { "epoch": 1.1660850535281244, "grad_norm": 1.3927206993103027, "learning_rate": 3.784367237629949e-06, "loss": 0.0924, "step": 39430 }, { "epoch": 1.1663807890222984, "grad_norm": 1.1832672357559204, "learning_rate": 3.78424054771101e-06, "loss": 0.0811, "step": 39440 }, { "epoch": 1.1666765245164725, "grad_norm": 1.3776118755340576, "learning_rate": 3.7841138577920703e-06, "loss": 0.0967, "step": 39450 }, { "epoch": 1.1669722600106465, "grad_norm": 1.2383239269256592, "learning_rate": 3.7839871678731316e-06, "loss": 0.0673, "step": 39460 }, { "epoch": 1.1672679955048204, "grad_norm": 1.170654058456421, "learning_rate": 3.783860477954192e-06, "loss": 0.099, "step": 39470 }, { "epoch": 1.1675637309989946, "grad_norm": 0.9437136054039001, "learning_rate": 3.7837337880352527e-06, "loss": 0.073, "step": 39480 }, { "epoch": 1.1678594664931685, "grad_norm": 0.6015310287475586, "learning_rate": 3.783607098116313e-06, "loss": 0.0768, "step": 39490 }, { "epoch": 1.1681552019873425, "grad_norm": 1.254583477973938, "learning_rate": 3.783480408197374e-06, "loss": 0.107, "step": 39500 }, { "epoch": 1.1684509374815166, "grad_norm": 0.90576171875, "learning_rate": 3.7833537182784342e-06, "loss": 0.0768, "step": 39510 }, { "epoch": 1.1687466729756906, "grad_norm": 0.7933081984519958, "learning_rate": 3.783227028359495e-06, "loss": 0.0834, "step": 39520 }, { "epoch": 1.1690424084698645, "grad_norm": 1.1106425523757935, "learning_rate": 3.7831003384405554e-06, "loss": 0.0891, "step": 39530 }, { "epoch": 1.1693381439640387, "grad_norm": 0.95894455909729, "learning_rate": 3.7829736485216166e-06, "loss": 0.0939, "step": 39540 }, { "epoch": 1.1696338794582126, "grad_norm": 0.7064570188522339, "learning_rate": 3.782846958602677e-06, "loss": 0.0722, "step": 39550 }, { "epoch": 1.1699296149523866, "grad_norm": 2.737581253051758, "learning_rate": 3.7827202686837378e-06, "loss": 0.0686, "step": 39560 }, { "epoch": 1.1702253504465605, "grad_norm": 0.9777272939682007, "learning_rate": 3.782593578764798e-06, "loss": 0.0788, "step": 39570 }, { "epoch": 1.1705210859407347, "grad_norm": 0.6279338598251343, "learning_rate": 3.782466888845859e-06, "loss": 0.0763, "step": 39580 }, { "epoch": 1.1708168214349086, "grad_norm": 1.064673900604248, "learning_rate": 3.7823401989269193e-06, "loss": 0.0978, "step": 39590 }, { "epoch": 1.1711125569290826, "grad_norm": 1.3056360483169556, "learning_rate": 3.78221350900798e-06, "loss": 0.083, "step": 39600 }, { "epoch": 1.1714082924232567, "grad_norm": 0.7821276187896729, "learning_rate": 3.7820868190890404e-06, "loss": 0.0922, "step": 39610 }, { "epoch": 1.1717040279174307, "grad_norm": 1.376320481300354, "learning_rate": 3.7819601291701016e-06, "loss": 0.0889, "step": 39620 }, { "epoch": 1.1719997634116046, "grad_norm": 1.0968948602676392, "learning_rate": 3.781833439251162e-06, "loss": 0.0874, "step": 39630 }, { "epoch": 1.1722954989057786, "grad_norm": 0.8677997589111328, "learning_rate": 3.781706749332223e-06, "loss": 0.0862, "step": 39640 }, { "epoch": 1.1725912343999527, "grad_norm": 1.1660020351409912, "learning_rate": 3.781580059413283e-06, "loss": 0.0841, "step": 39650 }, { "epoch": 1.1728869698941267, "grad_norm": 0.7275251150131226, "learning_rate": 3.781453369494344e-06, "loss": 0.0834, "step": 39660 }, { "epoch": 1.1731827053883008, "grad_norm": 1.0485408306121826, "learning_rate": 3.7813266795754043e-06, "loss": 0.0754, "step": 39670 }, { "epoch": 1.1734784408824748, "grad_norm": 0.7811110615730286, "learning_rate": 3.781199989656465e-06, "loss": 0.0892, "step": 39680 }, { "epoch": 1.1737741763766487, "grad_norm": 0.6842013597488403, "learning_rate": 3.7810732997375255e-06, "loss": 0.0846, "step": 39690 }, { "epoch": 1.1740699118708227, "grad_norm": 1.2282050848007202, "learning_rate": 3.7809466098185867e-06, "loss": 0.0815, "step": 39700 }, { "epoch": 1.1743656473649968, "grad_norm": 0.8429193496704102, "learning_rate": 3.780819919899647e-06, "loss": 0.0909, "step": 39710 }, { "epoch": 1.1746613828591708, "grad_norm": 0.8636810779571533, "learning_rate": 3.780693229980708e-06, "loss": 0.0858, "step": 39720 }, { "epoch": 1.1749571183533447, "grad_norm": 1.4190882444381714, "learning_rate": 3.780566540061768e-06, "loss": 0.0957, "step": 39730 }, { "epoch": 1.1752528538475189, "grad_norm": 0.6449540853500366, "learning_rate": 3.780439850142829e-06, "loss": 0.0763, "step": 39740 }, { "epoch": 1.1755485893416928, "grad_norm": 0.7047298550605774, "learning_rate": 3.7803131602238894e-06, "loss": 0.0904, "step": 39750 }, { "epoch": 1.1758443248358668, "grad_norm": 1.3818975687026978, "learning_rate": 3.78018647030495e-06, "loss": 0.0824, "step": 39760 }, { "epoch": 1.1761400603300407, "grad_norm": 1.41317880153656, "learning_rate": 3.7800597803860105e-06, "loss": 0.0977, "step": 39770 }, { "epoch": 1.1764357958242149, "grad_norm": 0.756568193435669, "learning_rate": 3.7799330904670717e-06, "loss": 0.0885, "step": 39780 }, { "epoch": 1.1767315313183888, "grad_norm": 1.2198995351791382, "learning_rate": 3.779806400548132e-06, "loss": 0.1032, "step": 39790 }, { "epoch": 1.1770272668125628, "grad_norm": 0.8426986932754517, "learning_rate": 3.779679710629193e-06, "loss": 0.0798, "step": 39800 }, { "epoch": 1.177323002306737, "grad_norm": 0.7548944354057312, "learning_rate": 3.7795530207102533e-06, "loss": 0.0755, "step": 39810 }, { "epoch": 1.1776187378009109, "grad_norm": 0.8027089238166809, "learning_rate": 3.7794263307913136e-06, "loss": 0.0854, "step": 39820 }, { "epoch": 1.1779144732950848, "grad_norm": 0.9743969440460205, "learning_rate": 3.7792996408723744e-06, "loss": 0.0841, "step": 39830 }, { "epoch": 1.178210208789259, "grad_norm": 1.4553859233856201, "learning_rate": 3.7791729509534348e-06, "loss": 0.0878, "step": 39840 }, { "epoch": 1.178505944283433, "grad_norm": 1.7518278360366821, "learning_rate": 3.7790462610344956e-06, "loss": 0.0802, "step": 39850 }, { "epoch": 1.1788016797776069, "grad_norm": 0.7439333200454712, "learning_rate": 3.778919571115556e-06, "loss": 0.0641, "step": 39860 }, { "epoch": 1.179097415271781, "grad_norm": 2.1537232398986816, "learning_rate": 3.778792881196617e-06, "loss": 0.0851, "step": 39870 }, { "epoch": 1.179393150765955, "grad_norm": 0.8799219727516174, "learning_rate": 3.7786661912776775e-06, "loss": 0.117, "step": 39880 }, { "epoch": 1.179688886260129, "grad_norm": 0.6154837608337402, "learning_rate": 3.7785395013587383e-06, "loss": 0.0806, "step": 39890 }, { "epoch": 1.1799846217543029, "grad_norm": 1.1603034734725952, "learning_rate": 3.7784128114397987e-06, "loss": 0.0871, "step": 39900 }, { "epoch": 1.180280357248477, "grad_norm": 0.6240968108177185, "learning_rate": 3.7782861215208595e-06, "loss": 0.0828, "step": 39910 }, { "epoch": 1.180576092742651, "grad_norm": 1.2880254983901978, "learning_rate": 3.77815943160192e-06, "loss": 0.1128, "step": 39920 }, { "epoch": 1.180871828236825, "grad_norm": 1.1670176982879639, "learning_rate": 3.7780327416829806e-06, "loss": 0.1146, "step": 39930 }, { "epoch": 1.181167563730999, "grad_norm": 1.0088489055633545, "learning_rate": 3.777906051764041e-06, "loss": 0.0866, "step": 39940 }, { "epoch": 1.181463299225173, "grad_norm": 0.3928906321525574, "learning_rate": 3.777779361845102e-06, "loss": 0.0821, "step": 39950 }, { "epoch": 1.181759034719347, "grad_norm": 0.6837438344955444, "learning_rate": 3.7776526719261626e-06, "loss": 0.0726, "step": 39960 }, { "epoch": 1.1820547702135211, "grad_norm": 0.839739978313446, "learning_rate": 3.7775259820072233e-06, "loss": 0.0946, "step": 39970 }, { "epoch": 1.182350505707695, "grad_norm": 0.9356025457382202, "learning_rate": 3.7773992920882837e-06, "loss": 0.0882, "step": 39980 }, { "epoch": 1.182646241201869, "grad_norm": 0.7967051863670349, "learning_rate": 3.7772726021693445e-06, "loss": 0.0876, "step": 39990 }, { "epoch": 1.1829419766960432, "grad_norm": 1.1105146408081055, "learning_rate": 3.777145912250405e-06, "loss": 0.0817, "step": 40000 }, { "epoch": 1.1832377121902171, "grad_norm": 0.6344742178916931, "learning_rate": 3.7770192223314657e-06, "loss": 0.0641, "step": 40010 }, { "epoch": 1.183533447684391, "grad_norm": 0.653515636920929, "learning_rate": 3.776892532412526e-06, "loss": 0.0765, "step": 40020 }, { "epoch": 1.183829183178565, "grad_norm": 1.0250552892684937, "learning_rate": 3.7767658424935872e-06, "loss": 0.0758, "step": 40030 }, { "epoch": 1.1841249186727392, "grad_norm": 1.0698728561401367, "learning_rate": 3.7766391525746476e-06, "loss": 0.0876, "step": 40040 }, { "epoch": 1.1844206541669131, "grad_norm": 0.8373624682426453, "learning_rate": 3.7765124626557084e-06, "loss": 0.0771, "step": 40050 }, { "epoch": 1.184716389661087, "grad_norm": 1.26399564743042, "learning_rate": 3.7763857727367688e-06, "loss": 0.0754, "step": 40060 }, { "epoch": 1.1850121251552612, "grad_norm": 1.3409326076507568, "learning_rate": 3.7762590828178295e-06, "loss": 0.0919, "step": 40070 }, { "epoch": 1.1853078606494352, "grad_norm": 1.0852686166763306, "learning_rate": 3.77613239289889e-06, "loss": 0.0889, "step": 40080 }, { "epoch": 1.185603596143609, "grad_norm": 0.7273481488227844, "learning_rate": 3.7760057029799507e-06, "loss": 0.0721, "step": 40090 }, { "epoch": 1.185899331637783, "grad_norm": 0.4614730775356293, "learning_rate": 3.775879013061011e-06, "loss": 0.0642, "step": 40100 }, { "epoch": 1.1861950671319572, "grad_norm": 0.9689737558364868, "learning_rate": 3.7757523231420723e-06, "loss": 0.0677, "step": 40110 }, { "epoch": 1.1864908026261312, "grad_norm": 0.902093768119812, "learning_rate": 3.7756256332231326e-06, "loss": 0.0697, "step": 40120 }, { "epoch": 1.1867865381203053, "grad_norm": 1.2899978160858154, "learning_rate": 3.7754989433041934e-06, "loss": 0.0971, "step": 40130 }, { "epoch": 1.1870822736144793, "grad_norm": 0.8958795070648193, "learning_rate": 3.775372253385254e-06, "loss": 0.0985, "step": 40140 }, { "epoch": 1.1873780091086532, "grad_norm": 0.9493626356124878, "learning_rate": 3.7752455634663146e-06, "loss": 0.0848, "step": 40150 }, { "epoch": 1.1876737446028272, "grad_norm": 1.1650861501693726, "learning_rate": 3.775118873547375e-06, "loss": 0.0756, "step": 40160 }, { "epoch": 1.1879694800970013, "grad_norm": 0.816244900226593, "learning_rate": 3.7749921836284357e-06, "loss": 0.0768, "step": 40170 }, { "epoch": 1.1882652155911753, "grad_norm": 0.6563299894332886, "learning_rate": 3.774865493709496e-06, "loss": 0.0805, "step": 40180 }, { "epoch": 1.1885609510853492, "grad_norm": 1.3287044763565063, "learning_rate": 3.7747388037905573e-06, "loss": 0.0971, "step": 40190 }, { "epoch": 1.1888566865795234, "grad_norm": 1.0191688537597656, "learning_rate": 3.7746121138716177e-06, "loss": 0.0883, "step": 40200 }, { "epoch": 1.1891524220736973, "grad_norm": 0.9509804844856262, "learning_rate": 3.7744854239526785e-06, "loss": 0.0725, "step": 40210 }, { "epoch": 1.1894481575678713, "grad_norm": 0.8151803016662598, "learning_rate": 3.774358734033739e-06, "loss": 0.0912, "step": 40220 }, { "epoch": 1.1897438930620452, "grad_norm": 1.3537874221801758, "learning_rate": 3.7742320441147992e-06, "loss": 0.0842, "step": 40230 }, { "epoch": 1.1900396285562194, "grad_norm": 0.5184977054595947, "learning_rate": 3.77410535419586e-06, "loss": 0.0746, "step": 40240 }, { "epoch": 1.1903353640503933, "grad_norm": 0.8900899291038513, "learning_rate": 3.7739786642769204e-06, "loss": 0.0775, "step": 40250 }, { "epoch": 1.1906310995445673, "grad_norm": 1.7475857734680176, "learning_rate": 3.773851974357981e-06, "loss": 0.0805, "step": 40260 }, { "epoch": 1.1909268350387414, "grad_norm": 1.267576813697815, "learning_rate": 3.773725284439042e-06, "loss": 0.0883, "step": 40270 }, { "epoch": 1.1912225705329154, "grad_norm": 1.0180968046188354, "learning_rate": 3.7735985945201027e-06, "loss": 0.1118, "step": 40280 }, { "epoch": 1.1915183060270893, "grad_norm": 0.6780694723129272, "learning_rate": 3.773471904601163e-06, "loss": 0.0917, "step": 40290 }, { "epoch": 1.1918140415212635, "grad_norm": 0.8262017965316772, "learning_rate": 3.773345214682224e-06, "loss": 0.0841, "step": 40300 }, { "epoch": 1.1921097770154374, "grad_norm": 0.7843750715255737, "learning_rate": 3.7732185247632843e-06, "loss": 0.0658, "step": 40310 }, { "epoch": 1.1924055125096114, "grad_norm": 0.9116793274879456, "learning_rate": 3.773091834844345e-06, "loss": 0.061, "step": 40320 }, { "epoch": 1.1927012480037855, "grad_norm": 1.219513177871704, "learning_rate": 3.7729651449254054e-06, "loss": 0.0835, "step": 40330 }, { "epoch": 1.1929969834979595, "grad_norm": 0.6274988055229187, "learning_rate": 3.772838455006466e-06, "loss": 0.0888, "step": 40340 }, { "epoch": 1.1932927189921334, "grad_norm": 1.157253623008728, "learning_rate": 3.772711765087527e-06, "loss": 0.0885, "step": 40350 }, { "epoch": 1.1935884544863073, "grad_norm": 1.2879081964492798, "learning_rate": 3.7725850751685878e-06, "loss": 0.0818, "step": 40360 }, { "epoch": 1.1938841899804815, "grad_norm": 0.8989830613136292, "learning_rate": 3.772458385249648e-06, "loss": 0.0897, "step": 40370 }, { "epoch": 1.1941799254746555, "grad_norm": 1.5158629417419434, "learning_rate": 3.772331695330709e-06, "loss": 0.1013, "step": 40380 }, { "epoch": 1.1944756609688294, "grad_norm": 0.7128211259841919, "learning_rate": 3.7722050054117693e-06, "loss": 0.0862, "step": 40390 }, { "epoch": 1.1947713964630036, "grad_norm": 1.235787034034729, "learning_rate": 3.77207831549283e-06, "loss": 0.0755, "step": 40400 }, { "epoch": 1.1950671319571775, "grad_norm": 0.7987324595451355, "learning_rate": 3.7719516255738905e-06, "loss": 0.0717, "step": 40410 }, { "epoch": 1.1953628674513515, "grad_norm": 0.9950603246688843, "learning_rate": 3.7718249356549512e-06, "loss": 0.0972, "step": 40420 }, { "epoch": 1.1956586029455256, "grad_norm": 1.3883099555969238, "learning_rate": 3.771698245736012e-06, "loss": 0.1004, "step": 40430 }, { "epoch": 1.1959543384396996, "grad_norm": 0.8308966159820557, "learning_rate": 3.771571555817073e-06, "loss": 0.0918, "step": 40440 }, { "epoch": 1.1962500739338735, "grad_norm": 0.9428160190582275, "learning_rate": 3.771444865898133e-06, "loss": 0.0883, "step": 40450 }, { "epoch": 1.1965458094280477, "grad_norm": 0.8274093866348267, "learning_rate": 3.771318175979194e-06, "loss": 0.0761, "step": 40460 }, { "epoch": 1.1968415449222216, "grad_norm": 0.7492689490318298, "learning_rate": 3.7711914860602543e-06, "loss": 0.0748, "step": 40470 }, { "epoch": 1.1971372804163956, "grad_norm": 0.7420651912689209, "learning_rate": 3.771064796141315e-06, "loss": 0.0924, "step": 40480 }, { "epoch": 1.1974330159105695, "grad_norm": 1.0648876428604126, "learning_rate": 3.7709381062223755e-06, "loss": 0.1016, "step": 40490 }, { "epoch": 1.1977287514047437, "grad_norm": 0.972137987613678, "learning_rate": 3.7708114163034363e-06, "loss": 0.1017, "step": 40500 }, { "epoch": 1.1980244868989176, "grad_norm": 0.9056682586669922, "learning_rate": 3.770684726384497e-06, "loss": 0.0709, "step": 40510 }, { "epoch": 1.1983202223930915, "grad_norm": 0.7848405241966248, "learning_rate": 3.770558036465558e-06, "loss": 0.0879, "step": 40520 }, { "epoch": 1.1986159578872657, "grad_norm": 0.7214329242706299, "learning_rate": 3.7704313465466182e-06, "loss": 0.0888, "step": 40530 }, { "epoch": 1.1989116933814397, "grad_norm": 0.6220455169677734, "learning_rate": 3.770304656627679e-06, "loss": 0.0733, "step": 40540 }, { "epoch": 1.1992074288756136, "grad_norm": 1.0459771156311035, "learning_rate": 3.7701779667087394e-06, "loss": 0.081, "step": 40550 }, { "epoch": 1.1995031643697875, "grad_norm": 1.1241815090179443, "learning_rate": 3.7700512767898e-06, "loss": 0.0795, "step": 40560 }, { "epoch": 1.1997988998639617, "grad_norm": 0.6339595913887024, "learning_rate": 3.7699245868708606e-06, "loss": 0.0724, "step": 40570 }, { "epoch": 1.2000946353581357, "grad_norm": 1.0946979522705078, "learning_rate": 3.7697978969519213e-06, "loss": 0.0835, "step": 40580 }, { "epoch": 1.2003903708523098, "grad_norm": 1.5633153915405273, "learning_rate": 3.769671207032982e-06, "loss": 0.0806, "step": 40590 }, { "epoch": 1.2006861063464838, "grad_norm": 0.9200705289840698, "learning_rate": 3.769544517114043e-06, "loss": 0.0815, "step": 40600 }, { "epoch": 1.2009818418406577, "grad_norm": 0.6134412884712219, "learning_rate": 3.7694178271951033e-06, "loss": 0.0617, "step": 40610 }, { "epoch": 1.2012775773348316, "grad_norm": 0.9709145426750183, "learning_rate": 3.769291137276164e-06, "loss": 0.0709, "step": 40620 }, { "epoch": 1.2015733128290058, "grad_norm": 1.3042367696762085, "learning_rate": 3.7691644473572244e-06, "loss": 0.1058, "step": 40630 }, { "epoch": 1.2018690483231798, "grad_norm": 0.6163318157196045, "learning_rate": 3.7690377574382852e-06, "loss": 0.0761, "step": 40640 }, { "epoch": 1.2021647838173537, "grad_norm": 1.9867476224899292, "learning_rate": 3.7689110675193456e-06, "loss": 0.0867, "step": 40650 }, { "epoch": 1.2024605193115279, "grad_norm": 0.8365916609764099, "learning_rate": 3.768784377600406e-06, "loss": 0.0816, "step": 40660 }, { "epoch": 1.2027562548057018, "grad_norm": 0.7590726613998413, "learning_rate": 3.768657687681467e-06, "loss": 0.0893, "step": 40670 }, { "epoch": 1.2030519902998758, "grad_norm": 1.0104937553405762, "learning_rate": 3.7685309977625275e-06, "loss": 0.1024, "step": 40680 }, { "epoch": 1.2033477257940497, "grad_norm": 0.9872831106185913, "learning_rate": 3.7684043078435883e-06, "loss": 0.0902, "step": 40690 }, { "epoch": 1.2036434612882239, "grad_norm": 0.8967199921607971, "learning_rate": 3.7682776179246487e-06, "loss": 0.0651, "step": 40700 }, { "epoch": 1.2039391967823978, "grad_norm": 0.9154890775680542, "learning_rate": 3.7681509280057095e-06, "loss": 0.0743, "step": 40710 }, { "epoch": 1.2042349322765717, "grad_norm": 1.1332597732543945, "learning_rate": 3.76802423808677e-06, "loss": 0.0882, "step": 40720 }, { "epoch": 1.204530667770746, "grad_norm": 1.165120244026184, "learning_rate": 3.7678975481678306e-06, "loss": 0.0887, "step": 40730 }, { "epoch": 1.2048264032649199, "grad_norm": 0.9264705181121826, "learning_rate": 3.767770858248891e-06, "loss": 0.1049, "step": 40740 }, { "epoch": 1.2051221387590938, "grad_norm": 0.7999647259712219, "learning_rate": 3.7676441683299522e-06, "loss": 0.0773, "step": 40750 }, { "epoch": 1.205417874253268, "grad_norm": 0.7282774448394775, "learning_rate": 3.7675174784110126e-06, "loss": 0.0681, "step": 40760 }, { "epoch": 1.205713609747442, "grad_norm": 1.751054048538208, "learning_rate": 3.7673907884920734e-06, "loss": 0.0803, "step": 40770 }, { "epoch": 1.2060093452416158, "grad_norm": 1.1468011140823364, "learning_rate": 3.7672640985731337e-06, "loss": 0.0886, "step": 40780 }, { "epoch": 1.20630508073579, "grad_norm": 1.5382333993911743, "learning_rate": 3.7671374086541945e-06, "loss": 0.0885, "step": 40790 }, { "epoch": 1.206600816229964, "grad_norm": 0.9214845299720764, "learning_rate": 3.767010718735255e-06, "loss": 0.0898, "step": 40800 }, { "epoch": 1.206896551724138, "grad_norm": 0.5600555539131165, "learning_rate": 3.7668840288163157e-06, "loss": 0.0788, "step": 40810 }, { "epoch": 1.2071922872183118, "grad_norm": 0.7308516502380371, "learning_rate": 3.766757338897376e-06, "loss": 0.0894, "step": 40820 }, { "epoch": 1.207488022712486, "grad_norm": 0.9569129943847656, "learning_rate": 3.7666306489784373e-06, "loss": 0.086, "step": 40830 }, { "epoch": 1.20778375820666, "grad_norm": 1.1446174383163452, "learning_rate": 3.7665039590594976e-06, "loss": 0.0711, "step": 40840 }, { "epoch": 1.208079493700834, "grad_norm": 1.022242546081543, "learning_rate": 3.7663772691405584e-06, "loss": 0.0821, "step": 40850 }, { "epoch": 1.208375229195008, "grad_norm": 1.1347237825393677, "learning_rate": 3.7662505792216188e-06, "loss": 0.0813, "step": 40860 }, { "epoch": 1.208670964689182, "grad_norm": 0.8722379207611084, "learning_rate": 3.7661238893026796e-06, "loss": 0.0824, "step": 40870 }, { "epoch": 1.208966700183356, "grad_norm": 0.9834651947021484, "learning_rate": 3.76599719938374e-06, "loss": 0.0897, "step": 40880 }, { "epoch": 1.2092624356775301, "grad_norm": 1.546714425086975, "learning_rate": 3.7658705094648007e-06, "loss": 0.0787, "step": 40890 }, { "epoch": 1.209558171171704, "grad_norm": 0.8255575895309448, "learning_rate": 3.765743819545861e-06, "loss": 0.0735, "step": 40900 }, { "epoch": 1.209853906665878, "grad_norm": 0.8017672300338745, "learning_rate": 3.7656171296269223e-06, "loss": 0.0747, "step": 40910 }, { "epoch": 1.2101496421600522, "grad_norm": 0.756024956703186, "learning_rate": 3.7654904397079827e-06, "loss": 0.0735, "step": 40920 }, { "epoch": 1.210445377654226, "grad_norm": 1.0319349765777588, "learning_rate": 3.7653637497890435e-06, "loss": 0.0874, "step": 40930 }, { "epoch": 1.2107411131484, "grad_norm": 0.7978315353393555, "learning_rate": 3.765237059870104e-06, "loss": 0.0914, "step": 40940 }, { "epoch": 1.211036848642574, "grad_norm": 1.205059289932251, "learning_rate": 3.7651103699511646e-06, "loss": 0.069, "step": 40950 }, { "epoch": 1.2113325841367482, "grad_norm": 0.9209543466567993, "learning_rate": 3.764983680032225e-06, "loss": 0.0789, "step": 40960 }, { "epoch": 1.211628319630922, "grad_norm": 1.2056573629379272, "learning_rate": 3.7648569901132858e-06, "loss": 0.1071, "step": 40970 }, { "epoch": 1.211924055125096, "grad_norm": 0.8152947425842285, "learning_rate": 3.764730300194346e-06, "loss": 0.0845, "step": 40980 }, { "epoch": 1.2122197906192702, "grad_norm": 1.5797626972198486, "learning_rate": 3.7646036102754074e-06, "loss": 0.1006, "step": 40990 }, { "epoch": 1.2125155261134442, "grad_norm": 0.7471251487731934, "learning_rate": 3.7644769203564677e-06, "loss": 0.0715, "step": 41000 }, { "epoch": 1.212811261607618, "grad_norm": 1.013348937034607, "learning_rate": 3.7643502304375285e-06, "loss": 0.0706, "step": 41010 }, { "epoch": 1.213106997101792, "grad_norm": 0.9566812515258789, "learning_rate": 3.764223540518589e-06, "loss": 0.0801, "step": 41020 }, { "epoch": 1.2134027325959662, "grad_norm": 1.0359554290771484, "learning_rate": 3.7640968505996497e-06, "loss": 0.0919, "step": 41030 }, { "epoch": 1.2136984680901401, "grad_norm": 0.830436646938324, "learning_rate": 3.76397016068071e-06, "loss": 0.0869, "step": 41040 }, { "epoch": 1.2139942035843143, "grad_norm": 1.3774052858352661, "learning_rate": 3.763843470761771e-06, "loss": 0.0806, "step": 41050 }, { "epoch": 1.2142899390784883, "grad_norm": 0.7361682057380676, "learning_rate": 3.763716780842831e-06, "loss": 0.0727, "step": 41060 }, { "epoch": 1.2145856745726622, "grad_norm": 0.8545621037483215, "learning_rate": 3.763590090923892e-06, "loss": 0.083, "step": 41070 }, { "epoch": 1.2148814100668361, "grad_norm": 0.531455397605896, "learning_rate": 3.7634634010049528e-06, "loss": 0.0773, "step": 41080 }, { "epoch": 1.2151771455610103, "grad_norm": 1.3029738664627075, "learning_rate": 3.763336711086013e-06, "loss": 0.0864, "step": 41090 }, { "epoch": 1.2154728810551843, "grad_norm": 0.7051945328712463, "learning_rate": 3.763210021167074e-06, "loss": 0.0781, "step": 41100 }, { "epoch": 1.2157686165493582, "grad_norm": 0.6302741765975952, "learning_rate": 3.7630833312481343e-06, "loss": 0.0649, "step": 41110 }, { "epoch": 1.2160643520435324, "grad_norm": 1.2294822931289673, "learning_rate": 3.762956641329195e-06, "loss": 0.1135, "step": 41120 }, { "epoch": 1.2163600875377063, "grad_norm": 1.028856635093689, "learning_rate": 3.7628299514102554e-06, "loss": 0.1062, "step": 41130 }, { "epoch": 1.2166558230318802, "grad_norm": 0.6923469305038452, "learning_rate": 3.7627032614913162e-06, "loss": 0.0792, "step": 41140 }, { "epoch": 1.2169515585260542, "grad_norm": 0.8965608477592468, "learning_rate": 3.762576571572377e-06, "loss": 0.1006, "step": 41150 }, { "epoch": 1.2172472940202284, "grad_norm": 0.6174112558364868, "learning_rate": 3.762449881653438e-06, "loss": 0.0621, "step": 41160 }, { "epoch": 1.2175430295144023, "grad_norm": 0.8508679866790771, "learning_rate": 3.762323191734498e-06, "loss": 0.1047, "step": 41170 }, { "epoch": 1.2178387650085762, "grad_norm": 0.8480759859085083, "learning_rate": 3.762196501815559e-06, "loss": 0.0984, "step": 41180 }, { "epoch": 1.2181345005027504, "grad_norm": 0.797450065612793, "learning_rate": 3.7620698118966193e-06, "loss": 0.1006, "step": 41190 }, { "epoch": 1.2184302359969243, "grad_norm": 0.693214476108551, "learning_rate": 3.76194312197768e-06, "loss": 0.0786, "step": 41200 }, { "epoch": 1.2187259714910983, "grad_norm": 1.7502851486206055, "learning_rate": 3.7618164320587405e-06, "loss": 0.0914, "step": 41210 }, { "epoch": 1.2190217069852725, "grad_norm": 0.8219584822654724, "learning_rate": 3.7616897421398013e-06, "loss": 0.0799, "step": 41220 }, { "epoch": 1.2193174424794464, "grad_norm": 0.8830151557922363, "learning_rate": 3.761563052220862e-06, "loss": 0.1056, "step": 41230 }, { "epoch": 1.2196131779736203, "grad_norm": 0.8551913499832153, "learning_rate": 3.761436362301923e-06, "loss": 0.1002, "step": 41240 }, { "epoch": 1.2199089134677945, "grad_norm": 0.7374126315116882, "learning_rate": 3.7613096723829832e-06, "loss": 0.0925, "step": 41250 }, { "epoch": 1.2202046489619685, "grad_norm": 0.8665549755096436, "learning_rate": 3.761182982464044e-06, "loss": 0.0782, "step": 41260 }, { "epoch": 1.2205003844561424, "grad_norm": 1.3691871166229248, "learning_rate": 3.7610562925451044e-06, "loss": 0.0759, "step": 41270 }, { "epoch": 1.2207961199503163, "grad_norm": 0.9730311036109924, "learning_rate": 3.760929602626165e-06, "loss": 0.0992, "step": 41280 }, { "epoch": 1.2210918554444905, "grad_norm": 0.9451532959938049, "learning_rate": 3.7608029127072255e-06, "loss": 0.0878, "step": 41290 }, { "epoch": 1.2213875909386644, "grad_norm": 1.06460702419281, "learning_rate": 3.7606762227882863e-06, "loss": 0.0849, "step": 41300 }, { "epoch": 1.2216833264328384, "grad_norm": 0.5743469595909119, "learning_rate": 3.760549532869347e-06, "loss": 0.0838, "step": 41310 }, { "epoch": 1.2219790619270126, "grad_norm": 1.0686726570129395, "learning_rate": 3.760422842950408e-06, "loss": 0.0864, "step": 41320 }, { "epoch": 1.2222747974211865, "grad_norm": 0.7900591492652893, "learning_rate": 3.7602961530314683e-06, "loss": 0.0814, "step": 41330 }, { "epoch": 1.2225705329153604, "grad_norm": 0.8100199699401855, "learning_rate": 3.760169463112529e-06, "loss": 0.0948, "step": 41340 }, { "epoch": 1.2228662684095346, "grad_norm": 0.8411688208580017, "learning_rate": 3.7600427731935894e-06, "loss": 0.0719, "step": 41350 }, { "epoch": 1.2231620039037086, "grad_norm": 0.5850663185119629, "learning_rate": 3.7599160832746502e-06, "loss": 0.0769, "step": 41360 }, { "epoch": 1.2234577393978825, "grad_norm": 0.7900218367576599, "learning_rate": 3.7597893933557106e-06, "loss": 0.0982, "step": 41370 }, { "epoch": 1.2237534748920567, "grad_norm": 0.9115356206893921, "learning_rate": 3.7596627034367714e-06, "loss": 0.1022, "step": 41380 }, { "epoch": 1.2240492103862306, "grad_norm": 0.989962100982666, "learning_rate": 3.759536013517832e-06, "loss": 0.1007, "step": 41390 }, { "epoch": 1.2243449458804045, "grad_norm": 0.8437445163726807, "learning_rate": 3.759409323598893e-06, "loss": 0.0969, "step": 41400 }, { "epoch": 1.2246406813745785, "grad_norm": 1.0618177652359009, "learning_rate": 3.7592826336799533e-06, "loss": 0.07, "step": 41410 }, { "epoch": 1.2249364168687527, "grad_norm": 1.7112901210784912, "learning_rate": 3.759155943761014e-06, "loss": 0.0837, "step": 41420 }, { "epoch": 1.2252321523629266, "grad_norm": 1.1308397054672241, "learning_rate": 3.7590292538420745e-06, "loss": 0.0923, "step": 41430 }, { "epoch": 1.2255278878571005, "grad_norm": 0.8794617056846619, "learning_rate": 3.7589025639231353e-06, "loss": 0.0888, "step": 41440 }, { "epoch": 1.2258236233512747, "grad_norm": 0.6148607730865479, "learning_rate": 3.7587758740041956e-06, "loss": 0.0902, "step": 41450 }, { "epoch": 1.2261193588454486, "grad_norm": 0.9168487191200256, "learning_rate": 3.7586491840852564e-06, "loss": 0.0697, "step": 41460 }, { "epoch": 1.2264150943396226, "grad_norm": 1.0684726238250732, "learning_rate": 3.758522494166317e-06, "loss": 0.0894, "step": 41470 }, { "epoch": 1.2267108298337965, "grad_norm": 1.096335768699646, "learning_rate": 3.7583958042473776e-06, "loss": 0.086, "step": 41480 }, { "epoch": 1.2270065653279707, "grad_norm": 0.5562200546264648, "learning_rate": 3.7582691143284384e-06, "loss": 0.1036, "step": 41490 }, { "epoch": 1.2273023008221446, "grad_norm": 1.0047246217727661, "learning_rate": 3.7581424244094987e-06, "loss": 0.0769, "step": 41500 }, { "epoch": 1.2275980363163188, "grad_norm": 0.6041533350944519, "learning_rate": 3.7580157344905595e-06, "loss": 0.0641, "step": 41510 }, { "epoch": 1.2278937718104928, "grad_norm": 0.8419924378395081, "learning_rate": 3.75788904457162e-06, "loss": 0.0937, "step": 41520 }, { "epoch": 1.2281895073046667, "grad_norm": 1.0541123151779175, "learning_rate": 3.7577623546526807e-06, "loss": 0.0862, "step": 41530 }, { "epoch": 1.2284852427988406, "grad_norm": 0.9894426465034485, "learning_rate": 3.757635664733741e-06, "loss": 0.0922, "step": 41540 }, { "epoch": 1.2287809782930148, "grad_norm": 1.2076575756072998, "learning_rate": 3.7575089748148023e-06, "loss": 0.0867, "step": 41550 }, { "epoch": 1.2290767137871887, "grad_norm": 0.41088739037513733, "learning_rate": 3.7573822848958626e-06, "loss": 0.0667, "step": 41560 }, { "epoch": 1.2293724492813627, "grad_norm": 1.4113563299179077, "learning_rate": 3.7572555949769234e-06, "loss": 0.0948, "step": 41570 }, { "epoch": 1.2296681847755369, "grad_norm": 0.9761290550231934, "learning_rate": 3.7571289050579838e-06, "loss": 0.1062, "step": 41580 }, { "epoch": 1.2299639202697108, "grad_norm": 1.0491981506347656, "learning_rate": 3.7570022151390446e-06, "loss": 0.1002, "step": 41590 }, { "epoch": 1.2302596557638847, "grad_norm": 0.6568334102630615, "learning_rate": 3.756875525220105e-06, "loss": 0.0865, "step": 41600 }, { "epoch": 1.2305553912580587, "grad_norm": 1.0175927877426147, "learning_rate": 3.7567488353011657e-06, "loss": 0.0788, "step": 41610 }, { "epoch": 1.2308511267522328, "grad_norm": 0.9939190149307251, "learning_rate": 3.756622145382226e-06, "loss": 0.0881, "step": 41620 }, { "epoch": 1.2311468622464068, "grad_norm": 1.1712980270385742, "learning_rate": 3.7564954554632873e-06, "loss": 0.1009, "step": 41630 }, { "epoch": 1.2314425977405807, "grad_norm": 1.436538815498352, "learning_rate": 3.7563687655443477e-06, "loss": 0.0941, "step": 41640 }, { "epoch": 1.231738333234755, "grad_norm": 2.0207431316375732, "learning_rate": 3.7562420756254085e-06, "loss": 0.0941, "step": 41650 }, { "epoch": 1.2320340687289288, "grad_norm": 0.7616507411003113, "learning_rate": 3.756115385706469e-06, "loss": 0.0728, "step": 41660 }, { "epoch": 1.2323298042231028, "grad_norm": 1.0467075109481812, "learning_rate": 3.7559886957875296e-06, "loss": 0.0951, "step": 41670 }, { "epoch": 1.232625539717277, "grad_norm": 0.8936682939529419, "learning_rate": 3.75586200586859e-06, "loss": 0.0872, "step": 41680 }, { "epoch": 1.232921275211451, "grad_norm": 1.0715597867965698, "learning_rate": 3.7557353159496508e-06, "loss": 0.0843, "step": 41690 }, { "epoch": 1.2332170107056248, "grad_norm": 0.9322073459625244, "learning_rate": 3.755608626030711e-06, "loss": 0.0821, "step": 41700 }, { "epoch": 1.233512746199799, "grad_norm": 1.2817270755767822, "learning_rate": 3.7554819361117723e-06, "loss": 0.0698, "step": 41710 }, { "epoch": 1.233808481693973, "grad_norm": 0.7821609973907471, "learning_rate": 3.7553552461928327e-06, "loss": 0.0688, "step": 41720 }, { "epoch": 1.234104217188147, "grad_norm": 0.8847651481628418, "learning_rate": 3.7552285562738935e-06, "loss": 0.1004, "step": 41730 }, { "epoch": 1.2343999526823208, "grad_norm": 0.8507346510887146, "learning_rate": 3.755101866354954e-06, "loss": 0.0909, "step": 41740 }, { "epoch": 1.234695688176495, "grad_norm": 0.5085054636001587, "learning_rate": 3.7549751764360147e-06, "loss": 0.079, "step": 41750 }, { "epoch": 1.234991423670669, "grad_norm": 0.7457472681999207, "learning_rate": 3.754848486517075e-06, "loss": 0.0634, "step": 41760 }, { "epoch": 1.2352871591648429, "grad_norm": 0.8051024079322815, "learning_rate": 3.754721796598136e-06, "loss": 0.0838, "step": 41770 }, { "epoch": 1.235582894659017, "grad_norm": 0.7357924580574036, "learning_rate": 3.754595106679196e-06, "loss": 0.078, "step": 41780 }, { "epoch": 1.235878630153191, "grad_norm": 0.43302035331726074, "learning_rate": 3.7544684167602574e-06, "loss": 0.0804, "step": 41790 }, { "epoch": 1.236174365647365, "grad_norm": 0.989494800567627, "learning_rate": 3.7543417268413178e-06, "loss": 0.0844, "step": 41800 }, { "epoch": 1.236470101141539, "grad_norm": 0.7298458218574524, "learning_rate": 3.7542150369223785e-06, "loss": 0.0924, "step": 41810 }, { "epoch": 1.236765836635713, "grad_norm": 1.0972023010253906, "learning_rate": 3.754088347003439e-06, "loss": 0.0949, "step": 41820 }, { "epoch": 1.237061572129887, "grad_norm": 0.5792984366416931, "learning_rate": 3.7539616570844997e-06, "loss": 0.1018, "step": 41830 }, { "epoch": 1.2373573076240612, "grad_norm": 0.6531001329421997, "learning_rate": 3.75383496716556e-06, "loss": 0.0846, "step": 41840 }, { "epoch": 1.237653043118235, "grad_norm": 0.7152041792869568, "learning_rate": 3.753708277246621e-06, "loss": 0.0757, "step": 41850 }, { "epoch": 1.237948778612409, "grad_norm": 0.7132461071014404, "learning_rate": 3.7535815873276812e-06, "loss": 0.0781, "step": 41860 }, { "epoch": 1.238244514106583, "grad_norm": 1.0602270364761353, "learning_rate": 3.7534548974087424e-06, "loss": 0.0984, "step": 41870 }, { "epoch": 1.2385402496007571, "grad_norm": 0.6484856605529785, "learning_rate": 3.753328207489803e-06, "loss": 0.0874, "step": 41880 }, { "epoch": 1.238835985094931, "grad_norm": 0.7437695264816284, "learning_rate": 3.753201517570863e-06, "loss": 0.0819, "step": 41890 }, { "epoch": 1.239131720589105, "grad_norm": 0.8429372906684875, "learning_rate": 3.753074827651924e-06, "loss": 0.0848, "step": 41900 }, { "epoch": 1.2394274560832792, "grad_norm": 0.6087430119514465, "learning_rate": 3.7529481377329843e-06, "loss": 0.087, "step": 41910 }, { "epoch": 1.2397231915774531, "grad_norm": 1.3895028829574585, "learning_rate": 3.752821447814045e-06, "loss": 0.0978, "step": 41920 }, { "epoch": 1.240018927071627, "grad_norm": 0.9153549075126648, "learning_rate": 3.7526947578951055e-06, "loss": 0.0916, "step": 41930 }, { "epoch": 1.240314662565801, "grad_norm": 0.6225422620773315, "learning_rate": 3.7525680679761663e-06, "loss": 0.0864, "step": 41940 }, { "epoch": 1.2406103980599752, "grad_norm": 0.7995703816413879, "learning_rate": 3.752441378057227e-06, "loss": 0.0778, "step": 41950 }, { "epoch": 1.2409061335541491, "grad_norm": 1.8042985200881958, "learning_rate": 3.752314688138288e-06, "loss": 0.0866, "step": 41960 }, { "epoch": 1.2412018690483233, "grad_norm": 0.9711881279945374, "learning_rate": 3.752187998219348e-06, "loss": 0.0844, "step": 41970 }, { "epoch": 1.2414976045424972, "grad_norm": 0.5719006657600403, "learning_rate": 3.752061308300409e-06, "loss": 0.0684, "step": 41980 }, { "epoch": 1.2417933400366712, "grad_norm": 0.7609742879867554, "learning_rate": 3.7519346183814694e-06, "loss": 0.1016, "step": 41990 }, { "epoch": 1.2420890755308451, "grad_norm": 1.0162383317947388, "learning_rate": 3.75180792846253e-06, "loss": 0.1006, "step": 42000 }, { "epoch": 1.2423848110250193, "grad_norm": 1.74953293800354, "learning_rate": 3.7516812385435905e-06, "loss": 0.0917, "step": 42010 }, { "epoch": 1.2426805465191932, "grad_norm": 0.7528865337371826, "learning_rate": 3.7515545486246513e-06, "loss": 0.1109, "step": 42020 }, { "epoch": 1.2429762820133672, "grad_norm": 0.9903630614280701, "learning_rate": 3.751427858705712e-06, "loss": 0.0846, "step": 42030 }, { "epoch": 1.2432720175075413, "grad_norm": 0.8239556550979614, "learning_rate": 3.751301168786773e-06, "loss": 0.0986, "step": 42040 }, { "epoch": 1.2435677530017153, "grad_norm": 0.7768332958221436, "learning_rate": 3.7511744788678333e-06, "loss": 0.0945, "step": 42050 }, { "epoch": 1.2438634884958892, "grad_norm": 0.9697291851043701, "learning_rate": 3.751047788948894e-06, "loss": 0.0605, "step": 42060 }, { "epoch": 1.2441592239900632, "grad_norm": 0.5616796016693115, "learning_rate": 3.7509210990299544e-06, "loss": 0.0718, "step": 42070 }, { "epoch": 1.2444549594842373, "grad_norm": 0.8037534952163696, "learning_rate": 3.750794409111015e-06, "loss": 0.1161, "step": 42080 }, { "epoch": 1.2447506949784113, "grad_norm": 1.1613843441009521, "learning_rate": 3.7506677191920756e-06, "loss": 0.0947, "step": 42090 }, { "epoch": 1.2450464304725852, "grad_norm": 1.2851982116699219, "learning_rate": 3.7505410292731364e-06, "loss": 0.0874, "step": 42100 }, { "epoch": 1.2453421659667594, "grad_norm": 1.0499430894851685, "learning_rate": 3.750414339354197e-06, "loss": 0.0876, "step": 42110 }, { "epoch": 1.2456379014609333, "grad_norm": 0.8069732189178467, "learning_rate": 3.750287649435258e-06, "loss": 0.0985, "step": 42120 }, { "epoch": 1.2459336369551073, "grad_norm": 0.9920420050621033, "learning_rate": 3.7501609595163183e-06, "loss": 0.1011, "step": 42130 }, { "epoch": 1.2462293724492814, "grad_norm": 0.8953287601470947, "learning_rate": 3.750034269597379e-06, "loss": 0.0831, "step": 42140 }, { "epoch": 1.2465251079434554, "grad_norm": 1.4572566747665405, "learning_rate": 3.7499075796784395e-06, "loss": 0.0781, "step": 42150 }, { "epoch": 1.2468208434376293, "grad_norm": 0.7372402548789978, "learning_rate": 3.7497808897595002e-06, "loss": 0.0653, "step": 42160 }, { "epoch": 1.2471165789318035, "grad_norm": 0.9245830774307251, "learning_rate": 3.7496541998405606e-06, "loss": 0.0835, "step": 42170 }, { "epoch": 1.2474123144259774, "grad_norm": 0.909487247467041, "learning_rate": 3.7495275099216214e-06, "loss": 0.0884, "step": 42180 }, { "epoch": 1.2477080499201514, "grad_norm": 0.9915205240249634, "learning_rate": 3.749400820002682e-06, "loss": 0.098, "step": 42190 }, { "epoch": 1.2480037854143253, "grad_norm": 1.3773252964019775, "learning_rate": 3.749274130083743e-06, "loss": 0.1019, "step": 42200 }, { "epoch": 1.2482995209084995, "grad_norm": 0.6992997527122498, "learning_rate": 3.7491474401648033e-06, "loss": 0.075, "step": 42210 }, { "epoch": 1.2485952564026734, "grad_norm": 1.2376317977905273, "learning_rate": 3.749020750245864e-06, "loss": 0.0772, "step": 42220 }, { "epoch": 1.2488909918968474, "grad_norm": 1.2234818935394287, "learning_rate": 3.7488940603269245e-06, "loss": 0.0991, "step": 42230 }, { "epoch": 1.2491867273910215, "grad_norm": 0.8757628798484802, "learning_rate": 3.7487673704079853e-06, "loss": 0.0818, "step": 42240 }, { "epoch": 1.2494824628851955, "grad_norm": 0.8544530868530273, "learning_rate": 3.7486406804890457e-06, "loss": 0.073, "step": 42250 }, { "epoch": 1.2497781983793694, "grad_norm": 1.2629172801971436, "learning_rate": 3.7485139905701064e-06, "loss": 0.0937, "step": 42260 }, { "epoch": 1.2500739338735434, "grad_norm": 1.0065553188323975, "learning_rate": 3.7483873006511672e-06, "loss": 0.0906, "step": 42270 }, { "epoch": 1.2503696693677175, "grad_norm": 1.0634278059005737, "learning_rate": 3.748260610732228e-06, "loss": 0.1151, "step": 42280 }, { "epoch": 1.2506654048618915, "grad_norm": 0.7679752111434937, "learning_rate": 3.7481339208132884e-06, "loss": 0.078, "step": 42290 }, { "epoch": 1.2509611403560656, "grad_norm": 1.067168951034546, "learning_rate": 3.7480072308943488e-06, "loss": 0.0851, "step": 42300 }, { "epoch": 1.2512568758502396, "grad_norm": 0.7206249833106995, "learning_rate": 3.7478805409754095e-06, "loss": 0.0728, "step": 42310 }, { "epoch": 1.2515526113444135, "grad_norm": 0.9229725003242493, "learning_rate": 3.74775385105647e-06, "loss": 0.0799, "step": 42320 }, { "epoch": 1.2518483468385875, "grad_norm": 1.5970432758331299, "learning_rate": 3.7476271611375307e-06, "loss": 0.1153, "step": 42330 }, { "epoch": 1.2521440823327616, "grad_norm": 1.4175281524658203, "learning_rate": 3.747500471218591e-06, "loss": 0.0741, "step": 42340 }, { "epoch": 1.2524398178269356, "grad_norm": 0.9177912473678589, "learning_rate": 3.7473737812996523e-06, "loss": 0.0902, "step": 42350 }, { "epoch": 1.2527355533211095, "grad_norm": 0.7890149354934692, "learning_rate": 3.7472470913807126e-06, "loss": 0.0868, "step": 42360 }, { "epoch": 1.2530312888152837, "grad_norm": 1.162296175956726, "learning_rate": 3.7471204014617734e-06, "loss": 0.0835, "step": 42370 }, { "epoch": 1.2533270243094576, "grad_norm": 0.9866397976875305, "learning_rate": 3.746993711542834e-06, "loss": 0.0865, "step": 42380 }, { "epoch": 1.2536227598036316, "grad_norm": 0.7926512360572815, "learning_rate": 3.7468670216238946e-06, "loss": 0.0994, "step": 42390 }, { "epoch": 1.2539184952978055, "grad_norm": 0.9691844582557678, "learning_rate": 3.746740331704955e-06, "loss": 0.0851, "step": 42400 }, { "epoch": 1.2542142307919797, "grad_norm": 0.29222601652145386, "learning_rate": 3.7466136417860157e-06, "loss": 0.066, "step": 42410 }, { "epoch": 1.2545099662861536, "grad_norm": 0.9797205328941345, "learning_rate": 3.746486951867076e-06, "loss": 0.0781, "step": 42420 }, { "epoch": 1.2548057017803278, "grad_norm": 1.1672555208206177, "learning_rate": 3.7463602619481373e-06, "loss": 0.0959, "step": 42430 }, { "epoch": 1.2551014372745017, "grad_norm": 0.9763263463973999, "learning_rate": 3.7462335720291977e-06, "loss": 0.0825, "step": 42440 }, { "epoch": 1.2553971727686757, "grad_norm": 1.0554226636886597, "learning_rate": 3.7461068821102585e-06, "loss": 0.0933, "step": 42450 }, { "epoch": 1.2556929082628496, "grad_norm": 1.1301542520523071, "learning_rate": 3.745980192191319e-06, "loss": 0.0703, "step": 42460 }, { "epoch": 1.2559886437570238, "grad_norm": 1.2757847309112549, "learning_rate": 3.7458535022723796e-06, "loss": 0.0942, "step": 42470 }, { "epoch": 1.2562843792511977, "grad_norm": 0.7504573464393616, "learning_rate": 3.74572681235344e-06, "loss": 0.0868, "step": 42480 }, { "epoch": 1.2565801147453717, "grad_norm": 0.6021506190299988, "learning_rate": 3.745600122434501e-06, "loss": 0.0865, "step": 42490 }, { "epoch": 1.2568758502395458, "grad_norm": 0.6118834614753723, "learning_rate": 3.745473432515561e-06, "loss": 0.076, "step": 42500 }, { "epoch": 1.2571715857337198, "grad_norm": 0.5101650953292847, "learning_rate": 3.7453467425966224e-06, "loss": 0.0716, "step": 42510 }, { "epoch": 1.2574673212278937, "grad_norm": 0.663946807384491, "learning_rate": 3.7452200526776827e-06, "loss": 0.072, "step": 42520 }, { "epoch": 1.2577630567220677, "grad_norm": 0.7416878938674927, "learning_rate": 3.7450933627587435e-06, "loss": 0.0856, "step": 42530 }, { "epoch": 1.2580587922162418, "grad_norm": 0.8689663410186768, "learning_rate": 3.744966672839804e-06, "loss": 0.0896, "step": 42540 }, { "epoch": 1.2583545277104158, "grad_norm": 0.953608512878418, "learning_rate": 3.7448399829208647e-06, "loss": 0.0809, "step": 42550 }, { "epoch": 1.25865026320459, "grad_norm": 1.019813060760498, "learning_rate": 3.744713293001925e-06, "loss": 0.0701, "step": 42560 }, { "epoch": 1.258945998698764, "grad_norm": 0.9027200937271118, "learning_rate": 3.744586603082986e-06, "loss": 0.0917, "step": 42570 }, { "epoch": 1.2592417341929378, "grad_norm": 1.0809786319732666, "learning_rate": 3.744459913164046e-06, "loss": 0.0809, "step": 42580 }, { "epoch": 1.2595374696871118, "grad_norm": 0.9582976698875427, "learning_rate": 3.7443332232451074e-06, "loss": 0.0874, "step": 42590 }, { "epoch": 1.2598332051812857, "grad_norm": 1.597115159034729, "learning_rate": 3.7442065333261678e-06, "loss": 0.0807, "step": 42600 }, { "epoch": 1.2601289406754599, "grad_norm": 0.8743013143539429, "learning_rate": 3.7440798434072286e-06, "loss": 0.0764, "step": 42610 }, { "epoch": 1.2604246761696338, "grad_norm": 1.1334775686264038, "learning_rate": 3.743953153488289e-06, "loss": 0.0937, "step": 42620 }, { "epoch": 1.260720411663808, "grad_norm": 0.6754427552223206, "learning_rate": 3.7438264635693497e-06, "loss": 0.0813, "step": 42630 }, { "epoch": 1.261016147157982, "grad_norm": 1.3009501695632935, "learning_rate": 3.74369977365041e-06, "loss": 0.076, "step": 42640 }, { "epoch": 1.2613118826521559, "grad_norm": 0.7376998662948608, "learning_rate": 3.743573083731471e-06, "loss": 0.0723, "step": 42650 }, { "epoch": 1.2616076181463298, "grad_norm": 1.0029456615447998, "learning_rate": 3.7434463938125312e-06, "loss": 0.0933, "step": 42660 }, { "epoch": 1.261903353640504, "grad_norm": 0.5969700813293457, "learning_rate": 3.7433197038935925e-06, "loss": 0.0904, "step": 42670 }, { "epoch": 1.262199089134678, "grad_norm": 1.3979071378707886, "learning_rate": 3.743193013974653e-06, "loss": 0.0916, "step": 42680 }, { "epoch": 1.2624948246288519, "grad_norm": 0.8554077744483948, "learning_rate": 3.7430663240557136e-06, "loss": 0.0807, "step": 42690 }, { "epoch": 1.262790560123026, "grad_norm": 0.7644006609916687, "learning_rate": 3.742939634136774e-06, "loss": 0.0909, "step": 42700 }, { "epoch": 1.2630862956172, "grad_norm": 1.2914291620254517, "learning_rate": 3.7428129442178348e-06, "loss": 0.0661, "step": 42710 }, { "epoch": 1.263382031111374, "grad_norm": 1.3546793460845947, "learning_rate": 3.742686254298895e-06, "loss": 0.0946, "step": 42720 }, { "epoch": 1.2636777666055479, "grad_norm": 1.379703402519226, "learning_rate": 3.7425595643799555e-06, "loss": 0.0946, "step": 42730 }, { "epoch": 1.263973502099722, "grad_norm": 1.0099979639053345, "learning_rate": 3.7424328744610163e-06, "loss": 0.0839, "step": 42740 }, { "epoch": 1.264269237593896, "grad_norm": 0.353260338306427, "learning_rate": 3.742306184542077e-06, "loss": 0.0849, "step": 42750 }, { "epoch": 1.2645649730880701, "grad_norm": 1.0942612886428833, "learning_rate": 3.742179494623138e-06, "loss": 0.0717, "step": 42760 }, { "epoch": 1.264860708582244, "grad_norm": 0.8916851282119751, "learning_rate": 3.7420528047041982e-06, "loss": 0.0804, "step": 42770 }, { "epoch": 1.265156444076418, "grad_norm": 0.7828767895698547, "learning_rate": 3.741926114785259e-06, "loss": 0.0892, "step": 42780 }, { "epoch": 1.265452179570592, "grad_norm": 1.0356589555740356, "learning_rate": 3.7417994248663194e-06, "loss": 0.0889, "step": 42790 }, { "epoch": 1.2657479150647661, "grad_norm": 0.8304154276847839, "learning_rate": 3.74167273494738e-06, "loss": 0.0816, "step": 42800 }, { "epoch": 1.26604365055894, "grad_norm": 0.9028463959693909, "learning_rate": 3.7415460450284406e-06, "loss": 0.0663, "step": 42810 }, { "epoch": 1.266339386053114, "grad_norm": 0.7720123529434204, "learning_rate": 3.7414193551095013e-06, "loss": 0.0891, "step": 42820 }, { "epoch": 1.2666351215472882, "grad_norm": 0.6261732578277588, "learning_rate": 3.741292665190562e-06, "loss": 0.086, "step": 42830 }, { "epoch": 1.2669308570414621, "grad_norm": 1.5444706678390503, "learning_rate": 3.741165975271623e-06, "loss": 0.0827, "step": 42840 }, { "epoch": 1.267226592535636, "grad_norm": 1.2807399034500122, "learning_rate": 3.7410392853526833e-06, "loss": 0.094, "step": 42850 }, { "epoch": 1.26752232802981, "grad_norm": 0.7631714940071106, "learning_rate": 3.740912595433744e-06, "loss": 0.0789, "step": 42860 }, { "epoch": 1.2678180635239842, "grad_norm": 0.7240846753120422, "learning_rate": 3.7407859055148044e-06, "loss": 0.0921, "step": 42870 }, { "epoch": 1.2681137990181581, "grad_norm": 0.9625108242034912, "learning_rate": 3.7406592155958652e-06, "loss": 0.0859, "step": 42880 }, { "epoch": 1.2684095345123323, "grad_norm": 0.9420056939125061, "learning_rate": 3.7405325256769256e-06, "loss": 0.0869, "step": 42890 }, { "epoch": 1.2687052700065062, "grad_norm": 0.9079645872116089, "learning_rate": 3.7404058357579864e-06, "loss": 0.0777, "step": 42900 }, { "epoch": 1.2690010055006802, "grad_norm": 0.8741405606269836, "learning_rate": 3.740279145839047e-06, "loss": 0.065, "step": 42910 }, { "epoch": 1.2692967409948541, "grad_norm": 1.4878778457641602, "learning_rate": 3.740152455920108e-06, "loss": 0.0869, "step": 42920 }, { "epoch": 1.2695924764890283, "grad_norm": 0.6329077482223511, "learning_rate": 3.7400257660011683e-06, "loss": 0.0777, "step": 42930 }, { "epoch": 1.2698882119832022, "grad_norm": 0.6655717492103577, "learning_rate": 3.739899076082229e-06, "loss": 0.0785, "step": 42940 }, { "epoch": 1.2701839474773762, "grad_norm": 0.8067567944526672, "learning_rate": 3.7397723861632895e-06, "loss": 0.0892, "step": 42950 }, { "epoch": 1.2704796829715503, "grad_norm": 0.9570068120956421, "learning_rate": 3.7396456962443503e-06, "loss": 0.083, "step": 42960 }, { "epoch": 1.2707754184657243, "grad_norm": 1.4496431350708008, "learning_rate": 3.7395190063254106e-06, "loss": 0.0877, "step": 42970 }, { "epoch": 1.2710711539598982, "grad_norm": 0.9465388059616089, "learning_rate": 3.7393923164064714e-06, "loss": 0.0998, "step": 42980 }, { "epoch": 1.2713668894540722, "grad_norm": 0.765674889087677, "learning_rate": 3.7392656264875322e-06, "loss": 0.095, "step": 42990 }, { "epoch": 1.2716626249482463, "grad_norm": 0.762030839920044, "learning_rate": 3.739138936568593e-06, "loss": 0.0762, "step": 43000 }, { "epoch": 1.2719583604424203, "grad_norm": 0.9922345876693726, "learning_rate": 3.7390122466496534e-06, "loss": 0.0682, "step": 43010 }, { "epoch": 1.2722540959365944, "grad_norm": 0.7248967885971069, "learning_rate": 3.738885556730714e-06, "loss": 0.0765, "step": 43020 }, { "epoch": 1.2725498314307684, "grad_norm": 0.9730235934257507, "learning_rate": 3.7387588668117745e-06, "loss": 0.0912, "step": 43030 }, { "epoch": 1.2728455669249423, "grad_norm": 0.9175568222999573, "learning_rate": 3.7386321768928353e-06, "loss": 0.0859, "step": 43040 }, { "epoch": 1.2731413024191163, "grad_norm": 0.6883106827735901, "learning_rate": 3.7385054869738957e-06, "loss": 0.0833, "step": 43050 }, { "epoch": 1.2734370379132902, "grad_norm": 0.9661504626274109, "learning_rate": 3.7383787970549565e-06, "loss": 0.0754, "step": 43060 }, { "epoch": 1.2737327734074644, "grad_norm": 0.8478977680206299, "learning_rate": 3.7382521071360173e-06, "loss": 0.0825, "step": 43070 }, { "epoch": 1.2740285089016383, "grad_norm": 0.9651369452476501, "learning_rate": 3.738125417217078e-06, "loss": 0.102, "step": 43080 }, { "epoch": 1.2743242443958125, "grad_norm": 0.861335039138794, "learning_rate": 3.7379987272981384e-06, "loss": 0.0891, "step": 43090 }, { "epoch": 1.2746199798899864, "grad_norm": 0.597589910030365, "learning_rate": 3.737872037379199e-06, "loss": 0.0821, "step": 43100 }, { "epoch": 1.2749157153841604, "grad_norm": 0.8681191205978394, "learning_rate": 3.7377453474602596e-06, "loss": 0.0658, "step": 43110 }, { "epoch": 1.2752114508783343, "grad_norm": 1.2090071439743042, "learning_rate": 3.7376186575413204e-06, "loss": 0.0941, "step": 43120 }, { "epoch": 1.2755071863725085, "grad_norm": 0.7967703342437744, "learning_rate": 3.7374919676223807e-06, "loss": 0.0992, "step": 43130 }, { "epoch": 1.2758029218666824, "grad_norm": 0.510810375213623, "learning_rate": 3.737365277703441e-06, "loss": 0.0939, "step": 43140 }, { "epoch": 1.2760986573608564, "grad_norm": 0.6028605699539185, "learning_rate": 3.7372385877845023e-06, "loss": 0.1108, "step": 43150 }, { "epoch": 1.2763943928550305, "grad_norm": 0.9033665060997009, "learning_rate": 3.7371118978655627e-06, "loss": 0.0847, "step": 43160 }, { "epoch": 1.2766901283492045, "grad_norm": 0.9766151905059814, "learning_rate": 3.7369852079466235e-06, "loss": 0.1064, "step": 43170 }, { "epoch": 1.2769858638433784, "grad_norm": 0.6988223195075989, "learning_rate": 3.736858518027684e-06, "loss": 0.0934, "step": 43180 }, { "epoch": 1.2772815993375524, "grad_norm": 0.9185852408409119, "learning_rate": 3.7367318281087446e-06, "loss": 0.0853, "step": 43190 }, { "epoch": 1.2775773348317265, "grad_norm": 1.0890977382659912, "learning_rate": 3.736605138189805e-06, "loss": 0.0795, "step": 43200 }, { "epoch": 1.2778730703259005, "grad_norm": 0.628909170627594, "learning_rate": 3.7364784482708658e-06, "loss": 0.073, "step": 43210 }, { "epoch": 1.2781688058200746, "grad_norm": 0.9467898011207581, "learning_rate": 3.736351758351926e-06, "loss": 0.0857, "step": 43220 }, { "epoch": 1.2784645413142486, "grad_norm": 0.654207706451416, "learning_rate": 3.7362250684329874e-06, "loss": 0.0972, "step": 43230 }, { "epoch": 1.2787602768084225, "grad_norm": 0.7975670695304871, "learning_rate": 3.7360983785140477e-06, "loss": 0.0745, "step": 43240 }, { "epoch": 1.2790560123025965, "grad_norm": 1.2982107400894165, "learning_rate": 3.7359716885951085e-06, "loss": 0.0879, "step": 43250 }, { "epoch": 1.2793517477967706, "grad_norm": 0.9555748105049133, "learning_rate": 3.735844998676169e-06, "loss": 0.0749, "step": 43260 }, { "epoch": 1.2796474832909446, "grad_norm": 1.5487889051437378, "learning_rate": 3.7357183087572297e-06, "loss": 0.0904, "step": 43270 }, { "epoch": 1.2799432187851185, "grad_norm": 1.153072714805603, "learning_rate": 3.73559161883829e-06, "loss": 0.0758, "step": 43280 }, { "epoch": 1.2802389542792927, "grad_norm": 0.4754239022731781, "learning_rate": 3.735464928919351e-06, "loss": 0.0738, "step": 43290 }, { "epoch": 1.2805346897734666, "grad_norm": 0.9130874276161194, "learning_rate": 3.735338239000411e-06, "loss": 0.0603, "step": 43300 }, { "epoch": 1.2808304252676406, "grad_norm": 1.0567922592163086, "learning_rate": 3.7352115490814724e-06, "loss": 0.0751, "step": 43310 }, { "epoch": 1.2811261607618145, "grad_norm": 0.805241048336029, "learning_rate": 3.7350848591625328e-06, "loss": 0.0706, "step": 43320 }, { "epoch": 1.2814218962559887, "grad_norm": 0.8794615268707275, "learning_rate": 3.7349581692435936e-06, "loss": 0.084, "step": 43330 }, { "epoch": 1.2817176317501626, "grad_norm": 1.3311282396316528, "learning_rate": 3.734831479324654e-06, "loss": 0.0805, "step": 43340 }, { "epoch": 1.2820133672443368, "grad_norm": 1.3671907186508179, "learning_rate": 3.7347047894057147e-06, "loss": 0.0987, "step": 43350 }, { "epoch": 1.2823091027385107, "grad_norm": 0.6924756765365601, "learning_rate": 3.734578099486775e-06, "loss": 0.0778, "step": 43360 }, { "epoch": 1.2826048382326847, "grad_norm": 0.8708570599555969, "learning_rate": 3.734451409567836e-06, "loss": 0.1058, "step": 43370 }, { "epoch": 1.2829005737268586, "grad_norm": 1.7461968660354614, "learning_rate": 3.7343247196488962e-06, "loss": 0.0942, "step": 43380 }, { "epoch": 1.2831963092210328, "grad_norm": 0.9486379027366638, "learning_rate": 3.7341980297299574e-06, "loss": 0.0891, "step": 43390 }, { "epoch": 1.2834920447152067, "grad_norm": 0.44732820987701416, "learning_rate": 3.734071339811018e-06, "loss": 0.0793, "step": 43400 }, { "epoch": 1.2837877802093807, "grad_norm": 0.9009105563163757, "learning_rate": 3.7339446498920786e-06, "loss": 0.0672, "step": 43410 }, { "epoch": 1.2840835157035548, "grad_norm": 0.6032166481018066, "learning_rate": 3.733817959973139e-06, "loss": 0.0877, "step": 43420 }, { "epoch": 1.2843792511977288, "grad_norm": 0.7672204971313477, "learning_rate": 3.7336912700541998e-06, "loss": 0.115, "step": 43430 }, { "epoch": 1.2846749866919027, "grad_norm": 0.652728796005249, "learning_rate": 3.73356458013526e-06, "loss": 0.0822, "step": 43440 }, { "epoch": 1.2849707221860767, "grad_norm": 0.8733640909194946, "learning_rate": 3.733437890216321e-06, "loss": 0.0869, "step": 43450 }, { "epoch": 1.2852664576802508, "grad_norm": 0.9209519624710083, "learning_rate": 3.7333112002973813e-06, "loss": 0.0712, "step": 43460 }, { "epoch": 1.2855621931744248, "grad_norm": 0.8708978891372681, "learning_rate": 3.7331845103784425e-06, "loss": 0.0919, "step": 43470 }, { "epoch": 1.285857928668599, "grad_norm": 1.379393458366394, "learning_rate": 3.733057820459503e-06, "loss": 0.0925, "step": 43480 }, { "epoch": 1.2861536641627729, "grad_norm": 0.8813328146934509, "learning_rate": 3.7329311305405636e-06, "loss": 0.0701, "step": 43490 }, { "epoch": 1.2864493996569468, "grad_norm": 0.9663726687431335, "learning_rate": 3.732804440621624e-06, "loss": 0.0741, "step": 43500 }, { "epoch": 1.2867451351511208, "grad_norm": 0.7292461395263672, "learning_rate": 3.732677750702685e-06, "loss": 0.0654, "step": 43510 }, { "epoch": 1.2870408706452947, "grad_norm": 0.9173634648323059, "learning_rate": 3.732551060783745e-06, "loss": 0.0784, "step": 43520 }, { "epoch": 1.2873366061394689, "grad_norm": 0.6245774626731873, "learning_rate": 3.732424370864806e-06, "loss": 0.0763, "step": 43530 }, { "epoch": 1.2876323416336428, "grad_norm": 0.8887973427772522, "learning_rate": 3.7322976809458663e-06, "loss": 0.0858, "step": 43540 }, { "epoch": 1.287928077127817, "grad_norm": 1.2029929161071777, "learning_rate": 3.732170991026927e-06, "loss": 0.1062, "step": 43550 }, { "epoch": 1.288223812621991, "grad_norm": 0.631034255027771, "learning_rate": 3.732044301107988e-06, "loss": 0.0639, "step": 43560 }, { "epoch": 1.2885195481161649, "grad_norm": 0.4576331079006195, "learning_rate": 3.7319176111890483e-06, "loss": 0.0948, "step": 43570 }, { "epoch": 1.2888152836103388, "grad_norm": 1.5502439737319946, "learning_rate": 3.731790921270109e-06, "loss": 0.0925, "step": 43580 }, { "epoch": 1.289111019104513, "grad_norm": 0.9716516733169556, "learning_rate": 3.7316642313511694e-06, "loss": 0.0797, "step": 43590 }, { "epoch": 1.289406754598687, "grad_norm": 0.639911949634552, "learning_rate": 3.7315375414322302e-06, "loss": 0.0841, "step": 43600 }, { "epoch": 1.2897024900928609, "grad_norm": 1.148176670074463, "learning_rate": 3.7314108515132906e-06, "loss": 0.0875, "step": 43610 }, { "epoch": 1.289998225587035, "grad_norm": 1.2581112384796143, "learning_rate": 3.7312841615943514e-06, "loss": 0.0779, "step": 43620 }, { "epoch": 1.290293961081209, "grad_norm": 0.7804326415061951, "learning_rate": 3.731157471675412e-06, "loss": 0.0885, "step": 43630 }, { "epoch": 1.290589696575383, "grad_norm": 0.6628298759460449, "learning_rate": 3.731030781756473e-06, "loss": 0.0828, "step": 43640 }, { "epoch": 1.2908854320695569, "grad_norm": 0.6661248803138733, "learning_rate": 3.7309040918375333e-06, "loss": 0.0731, "step": 43650 }, { "epoch": 1.291181167563731, "grad_norm": 0.9031774997711182, "learning_rate": 3.730777401918594e-06, "loss": 0.0683, "step": 43660 }, { "epoch": 1.291476903057905, "grad_norm": 0.6693060994148254, "learning_rate": 3.7306507119996545e-06, "loss": 0.0923, "step": 43670 }, { "epoch": 1.2917726385520791, "grad_norm": 0.7992461919784546, "learning_rate": 3.7305240220807153e-06, "loss": 0.0761, "step": 43680 }, { "epoch": 1.292068374046253, "grad_norm": 0.996830403804779, "learning_rate": 3.7303973321617756e-06, "loss": 0.08, "step": 43690 }, { "epoch": 1.292364109540427, "grad_norm": 0.97559654712677, "learning_rate": 3.7302706422428364e-06, "loss": 0.0778, "step": 43700 }, { "epoch": 1.292659845034601, "grad_norm": 0.36818963289260864, "learning_rate": 3.730143952323897e-06, "loss": 0.0598, "step": 43710 }, { "epoch": 1.2929555805287751, "grad_norm": 1.140847086906433, "learning_rate": 3.730017262404958e-06, "loss": 0.0833, "step": 43720 }, { "epoch": 1.293251316022949, "grad_norm": 1.0795364379882812, "learning_rate": 3.7298905724860184e-06, "loss": 0.0926, "step": 43730 }, { "epoch": 1.293547051517123, "grad_norm": 1.0951108932495117, "learning_rate": 3.729763882567079e-06, "loss": 0.0938, "step": 43740 }, { "epoch": 1.2938427870112972, "grad_norm": 0.9429440498352051, "learning_rate": 3.7296371926481395e-06, "loss": 0.0696, "step": 43750 }, { "epoch": 1.2941385225054711, "grad_norm": 1.3761645555496216, "learning_rate": 3.7295105027292003e-06, "loss": 0.0879, "step": 43760 }, { "epoch": 1.294434257999645, "grad_norm": 0.7962223291397095, "learning_rate": 3.7293838128102607e-06, "loss": 0.0762, "step": 43770 }, { "epoch": 1.294729993493819, "grad_norm": 0.8210790753364563, "learning_rate": 3.7292571228913215e-06, "loss": 0.0832, "step": 43780 }, { "epoch": 1.2950257289879932, "grad_norm": 0.8661232590675354, "learning_rate": 3.7291304329723822e-06, "loss": 0.107, "step": 43790 }, { "epoch": 1.2953214644821671, "grad_norm": 0.8177246451377869, "learning_rate": 3.729003743053443e-06, "loss": 0.0754, "step": 43800 }, { "epoch": 1.2956171999763413, "grad_norm": 0.7047896385192871, "learning_rate": 3.7288770531345034e-06, "loss": 0.0783, "step": 43810 }, { "epoch": 1.2959129354705152, "grad_norm": 0.9570768475532532, "learning_rate": 3.728750363215564e-06, "loss": 0.0988, "step": 43820 }, { "epoch": 1.2962086709646892, "grad_norm": 0.8821380734443665, "learning_rate": 3.7286236732966246e-06, "loss": 0.0878, "step": 43830 }, { "epoch": 1.2965044064588631, "grad_norm": 0.7103534936904907, "learning_rate": 3.7284969833776854e-06, "loss": 0.0847, "step": 43840 }, { "epoch": 1.2968001419530373, "grad_norm": 0.9592472314834595, "learning_rate": 3.7283702934587457e-06, "loss": 0.1006, "step": 43850 }, { "epoch": 1.2970958774472112, "grad_norm": 0.8995451331138611, "learning_rate": 3.7282436035398065e-06, "loss": 0.0586, "step": 43860 }, { "epoch": 1.2973916129413852, "grad_norm": 0.719965934753418, "learning_rate": 3.7281169136208673e-06, "loss": 0.0866, "step": 43870 }, { "epoch": 1.2976873484355593, "grad_norm": 0.5722550749778748, "learning_rate": 3.727990223701928e-06, "loss": 0.084, "step": 43880 }, { "epoch": 1.2979830839297333, "grad_norm": 1.1210379600524902, "learning_rate": 3.7278635337829885e-06, "loss": 0.091, "step": 43890 }, { "epoch": 1.2982788194239072, "grad_norm": 1.0515819787979126, "learning_rate": 3.7277368438640492e-06, "loss": 0.0865, "step": 43900 }, { "epoch": 1.2985745549180812, "grad_norm": 0.9628286957740784, "learning_rate": 3.7276101539451096e-06, "loss": 0.0761, "step": 43910 }, { "epoch": 1.2988702904122553, "grad_norm": 1.038271188735962, "learning_rate": 3.7274834640261704e-06, "loss": 0.084, "step": 43920 }, { "epoch": 1.2991660259064293, "grad_norm": 0.5708283185958862, "learning_rate": 3.7273567741072308e-06, "loss": 0.0761, "step": 43930 }, { "epoch": 1.2994617614006034, "grad_norm": 0.9989637136459351, "learning_rate": 3.7272300841882916e-06, "loss": 0.0712, "step": 43940 }, { "epoch": 1.2997574968947774, "grad_norm": 0.9162495732307434, "learning_rate": 3.7271033942693523e-06, "loss": 0.075, "step": 43950 }, { "epoch": 1.3000532323889513, "grad_norm": 0.9074941277503967, "learning_rate": 3.7269767043504127e-06, "loss": 0.0611, "step": 43960 }, { "epoch": 1.3003489678831253, "grad_norm": 0.7521846890449524, "learning_rate": 3.7268500144314735e-06, "loss": 0.0792, "step": 43970 }, { "epoch": 1.3006447033772992, "grad_norm": 1.0527218580245972, "learning_rate": 3.726723324512534e-06, "loss": 0.0927, "step": 43980 }, { "epoch": 1.3009404388714734, "grad_norm": 0.7043496370315552, "learning_rate": 3.7265966345935947e-06, "loss": 0.0828, "step": 43990 }, { "epoch": 1.3012361743656473, "grad_norm": 0.3945295214653015, "learning_rate": 3.726469944674655e-06, "loss": 0.0822, "step": 44000 }, { "epoch": 1.3015319098598215, "grad_norm": 0.7499088644981384, "learning_rate": 3.726343254755716e-06, "loss": 0.0543, "step": 44010 }, { "epoch": 1.3018276453539954, "grad_norm": 1.2315269708633423, "learning_rate": 3.726216564836776e-06, "loss": 0.085, "step": 44020 }, { "epoch": 1.3021233808481694, "grad_norm": 1.4677001237869263, "learning_rate": 3.7260898749178374e-06, "loss": 0.0923, "step": 44030 }, { "epoch": 1.3024191163423433, "grad_norm": 1.2807034254074097, "learning_rate": 3.7259631849988978e-06, "loss": 0.104, "step": 44040 }, { "epoch": 1.3027148518365175, "grad_norm": 1.1317445039749146, "learning_rate": 3.7258364950799585e-06, "loss": 0.077, "step": 44050 }, { "epoch": 1.3030105873306914, "grad_norm": 0.7308432459831238, "learning_rate": 3.725709805161019e-06, "loss": 0.0748, "step": 44060 }, { "epoch": 1.3033063228248654, "grad_norm": 0.8053468465805054, "learning_rate": 3.7255831152420797e-06, "loss": 0.0927, "step": 44070 }, { "epoch": 1.3036020583190395, "grad_norm": 1.1577855348587036, "learning_rate": 3.72545642532314e-06, "loss": 0.1078, "step": 44080 }, { "epoch": 1.3038977938132135, "grad_norm": 1.081749439239502, "learning_rate": 3.725329735404201e-06, "loss": 0.0955, "step": 44090 }, { "epoch": 1.3041935293073874, "grad_norm": 1.0886342525482178, "learning_rate": 3.7252030454852612e-06, "loss": 0.0927, "step": 44100 }, { "epoch": 1.3044892648015614, "grad_norm": 1.4351131916046143, "learning_rate": 3.7250763555663224e-06, "loss": 0.0846, "step": 44110 }, { "epoch": 1.3047850002957355, "grad_norm": 0.6635094881057739, "learning_rate": 3.724949665647383e-06, "loss": 0.0661, "step": 44120 }, { "epoch": 1.3050807357899095, "grad_norm": 1.5276963710784912, "learning_rate": 3.7248229757284436e-06, "loss": 0.0901, "step": 44130 }, { "epoch": 1.3053764712840836, "grad_norm": 0.9673143625259399, "learning_rate": 3.724696285809504e-06, "loss": 0.1026, "step": 44140 }, { "epoch": 1.3056722067782576, "grad_norm": 0.8388931155204773, "learning_rate": 3.7245695958905647e-06, "loss": 0.0889, "step": 44150 }, { "epoch": 1.3059679422724315, "grad_norm": 1.0816324949264526, "learning_rate": 3.724442905971625e-06, "loss": 0.08, "step": 44160 }, { "epoch": 1.3062636777666055, "grad_norm": 1.1855289936065674, "learning_rate": 3.724316216052686e-06, "loss": 0.0944, "step": 44170 }, { "epoch": 1.3065594132607796, "grad_norm": 0.978905975818634, "learning_rate": 3.7241895261337463e-06, "loss": 0.09, "step": 44180 }, { "epoch": 1.3068551487549536, "grad_norm": 0.9102829098701477, "learning_rate": 3.7240628362148075e-06, "loss": 0.1158, "step": 44190 }, { "epoch": 1.3071508842491275, "grad_norm": 0.6333782076835632, "learning_rate": 3.723936146295868e-06, "loss": 0.0765, "step": 44200 }, { "epoch": 1.3074466197433017, "grad_norm": 0.9988126158714294, "learning_rate": 3.7238094563769286e-06, "loss": 0.0802, "step": 44210 }, { "epoch": 1.3077423552374756, "grad_norm": 0.554569661617279, "learning_rate": 3.723682766457989e-06, "loss": 0.0787, "step": 44220 }, { "epoch": 1.3080380907316496, "grad_norm": 0.8207646012306213, "learning_rate": 3.7235560765390498e-06, "loss": 0.1073, "step": 44230 }, { "epoch": 1.3083338262258235, "grad_norm": 1.211997628211975, "learning_rate": 3.72342938662011e-06, "loss": 0.1181, "step": 44240 }, { "epoch": 1.3086295617199977, "grad_norm": 0.9025633335113525, "learning_rate": 3.723302696701171e-06, "loss": 0.0712, "step": 44250 }, { "epoch": 1.3089252972141716, "grad_norm": 1.4353725910186768, "learning_rate": 3.7231760067822313e-06, "loss": 0.0758, "step": 44260 }, { "epoch": 1.3092210327083458, "grad_norm": 0.8583076000213623, "learning_rate": 3.7230493168632925e-06, "loss": 0.1002, "step": 44270 }, { "epoch": 1.3095167682025197, "grad_norm": 0.8788390159606934, "learning_rate": 3.722922626944353e-06, "loss": 0.0908, "step": 44280 }, { "epoch": 1.3098125036966937, "grad_norm": 0.6310043334960938, "learning_rate": 3.7227959370254137e-06, "loss": 0.0988, "step": 44290 }, { "epoch": 1.3101082391908676, "grad_norm": 0.5096102952957153, "learning_rate": 3.722669247106474e-06, "loss": 0.0848, "step": 44300 }, { "epoch": 1.3104039746850418, "grad_norm": 0.15466471016407013, "learning_rate": 3.722542557187535e-06, "loss": 0.0678, "step": 44310 }, { "epoch": 1.3106997101792157, "grad_norm": 0.9752365350723267, "learning_rate": 3.722415867268595e-06, "loss": 0.0713, "step": 44320 }, { "epoch": 1.3109954456733897, "grad_norm": 0.8418387174606323, "learning_rate": 3.722289177349656e-06, "loss": 0.0949, "step": 44330 }, { "epoch": 1.3112911811675638, "grad_norm": 0.8246874213218689, "learning_rate": 3.7221624874307164e-06, "loss": 0.0919, "step": 44340 }, { "epoch": 1.3115869166617378, "grad_norm": 1.137337327003479, "learning_rate": 3.7220357975117776e-06, "loss": 0.0732, "step": 44350 }, { "epoch": 1.3118826521559117, "grad_norm": 0.9427660703659058, "learning_rate": 3.721909107592838e-06, "loss": 0.0815, "step": 44360 }, { "epoch": 1.3121783876500857, "grad_norm": 0.9791033267974854, "learning_rate": 3.7217824176738983e-06, "loss": 0.0841, "step": 44370 }, { "epoch": 1.3124741231442598, "grad_norm": 0.815300703048706, "learning_rate": 3.721655727754959e-06, "loss": 0.0929, "step": 44380 }, { "epoch": 1.3127698586384338, "grad_norm": 0.6792600154876709, "learning_rate": 3.7215290378360195e-06, "loss": 0.106, "step": 44390 }, { "epoch": 1.313065594132608, "grad_norm": 0.6608729958534241, "learning_rate": 3.7214023479170802e-06, "loss": 0.0767, "step": 44400 }, { "epoch": 1.3133613296267819, "grad_norm": 1.0360156297683716, "learning_rate": 3.7212756579981406e-06, "loss": 0.0634, "step": 44410 }, { "epoch": 1.3136570651209558, "grad_norm": 1.205923080444336, "learning_rate": 3.7211489680792014e-06, "loss": 0.0764, "step": 44420 }, { "epoch": 1.3139528006151298, "grad_norm": 1.2890738248825073, "learning_rate": 3.721022278160262e-06, "loss": 0.0976, "step": 44430 }, { "epoch": 1.3142485361093037, "grad_norm": 0.8048992156982422, "learning_rate": 3.720895588241323e-06, "loss": 0.0812, "step": 44440 }, { "epoch": 1.3145442716034779, "grad_norm": 1.259719729423523, "learning_rate": 3.7207688983223833e-06, "loss": 0.0912, "step": 44450 }, { "epoch": 1.3148400070976518, "grad_norm": 1.1188220977783203, "learning_rate": 3.720642208403444e-06, "loss": 0.0893, "step": 44460 }, { "epoch": 1.315135742591826, "grad_norm": 0.668399453163147, "learning_rate": 3.7205155184845045e-06, "loss": 0.078, "step": 44470 }, { "epoch": 1.315431478086, "grad_norm": 0.917708158493042, "learning_rate": 3.7203888285655653e-06, "loss": 0.079, "step": 44480 }, { "epoch": 1.3157272135801739, "grad_norm": 0.9828507900238037, "learning_rate": 3.7202621386466257e-06, "loss": 0.1006, "step": 44490 }, { "epoch": 1.3160229490743478, "grad_norm": 1.1912513971328735, "learning_rate": 3.7201354487276864e-06, "loss": 0.096, "step": 44500 }, { "epoch": 1.316318684568522, "grad_norm": 0.9684063792228699, "learning_rate": 3.7200087588087472e-06, "loss": 0.0747, "step": 44510 }, { "epoch": 1.316614420062696, "grad_norm": 0.9167999625205994, "learning_rate": 3.719882068889808e-06, "loss": 0.0786, "step": 44520 }, { "epoch": 1.3169101555568699, "grad_norm": 1.2309929132461548, "learning_rate": 3.7197553789708684e-06, "loss": 0.1033, "step": 44530 }, { "epoch": 1.317205891051044, "grad_norm": 0.9658454656600952, "learning_rate": 3.719628689051929e-06, "loss": 0.075, "step": 44540 }, { "epoch": 1.317501626545218, "grad_norm": 0.8989617228507996, "learning_rate": 3.7195019991329895e-06, "loss": 0.0789, "step": 44550 }, { "epoch": 1.317797362039392, "grad_norm": 1.0133806467056274, "learning_rate": 3.7193753092140503e-06, "loss": 0.0816, "step": 44560 }, { "epoch": 1.3180930975335658, "grad_norm": 0.7610520720481873, "learning_rate": 3.7192486192951107e-06, "loss": 0.0741, "step": 44570 }, { "epoch": 1.31838883302774, "grad_norm": 1.1608539819717407, "learning_rate": 3.7191219293761715e-06, "loss": 0.0958, "step": 44580 }, { "epoch": 1.318684568521914, "grad_norm": 1.1951385736465454, "learning_rate": 3.7189952394572323e-06, "loss": 0.0902, "step": 44590 }, { "epoch": 1.3189803040160881, "grad_norm": 0.9675915241241455, "learning_rate": 3.718868549538293e-06, "loss": 0.0846, "step": 44600 }, { "epoch": 1.319276039510262, "grad_norm": 0.8621637225151062, "learning_rate": 3.7187418596193534e-06, "loss": 0.0688, "step": 44610 }, { "epoch": 1.319571775004436, "grad_norm": 1.120537519454956, "learning_rate": 3.7186151697004142e-06, "loss": 0.0941, "step": 44620 }, { "epoch": 1.31986751049861, "grad_norm": 0.6851926445960999, "learning_rate": 3.7184884797814746e-06, "loss": 0.0989, "step": 44630 }, { "epoch": 1.3201632459927841, "grad_norm": 0.6930907964706421, "learning_rate": 3.7183617898625354e-06, "loss": 0.0881, "step": 44640 }, { "epoch": 1.320458981486958, "grad_norm": 0.8704269528388977, "learning_rate": 3.7182350999435957e-06, "loss": 0.0884, "step": 44650 }, { "epoch": 1.320754716981132, "grad_norm": 0.6113861799240112, "learning_rate": 3.7181084100246565e-06, "loss": 0.0663, "step": 44660 }, { "epoch": 1.3210504524753062, "grad_norm": 1.240067720413208, "learning_rate": 3.7179817201057173e-06, "loss": 0.1004, "step": 44670 }, { "epoch": 1.3213461879694801, "grad_norm": 0.5304318070411682, "learning_rate": 3.717855030186778e-06, "loss": 0.0888, "step": 44680 }, { "epoch": 1.321641923463654, "grad_norm": 0.7441277503967285, "learning_rate": 3.7177283402678385e-06, "loss": 0.0891, "step": 44690 }, { "epoch": 1.321937658957828, "grad_norm": 0.8275614380836487, "learning_rate": 3.7176016503488993e-06, "loss": 0.1034, "step": 44700 }, { "epoch": 1.3222333944520022, "grad_norm": 0.7735366225242615, "learning_rate": 3.7174749604299596e-06, "loss": 0.0742, "step": 44710 }, { "epoch": 1.322529129946176, "grad_norm": 1.041877269744873, "learning_rate": 3.7173482705110204e-06, "loss": 0.0792, "step": 44720 }, { "epoch": 1.3228248654403503, "grad_norm": 0.7789503931999207, "learning_rate": 3.717221580592081e-06, "loss": 0.0917, "step": 44730 }, { "epoch": 1.3231206009345242, "grad_norm": 0.5354737639427185, "learning_rate": 3.7170948906731416e-06, "loss": 0.092, "step": 44740 }, { "epoch": 1.3234163364286982, "grad_norm": 1.0232728719711304, "learning_rate": 3.7169682007542024e-06, "loss": 0.0689, "step": 44750 }, { "epoch": 1.323712071922872, "grad_norm": 1.0596201419830322, "learning_rate": 3.716841510835263e-06, "loss": 0.0696, "step": 44760 }, { "epoch": 1.3240078074170463, "grad_norm": 1.171359658241272, "learning_rate": 3.7167148209163235e-06, "loss": 0.0913, "step": 44770 }, { "epoch": 1.3243035429112202, "grad_norm": 1.0877307653427124, "learning_rate": 3.7165881309973843e-06, "loss": 0.0878, "step": 44780 }, { "epoch": 1.3245992784053942, "grad_norm": 1.429160714149475, "learning_rate": 3.7164614410784447e-06, "loss": 0.0893, "step": 44790 }, { "epoch": 1.3248950138995683, "grad_norm": 0.8582305312156677, "learning_rate": 3.716334751159505e-06, "loss": 0.069, "step": 44800 }, { "epoch": 1.3251907493937423, "grad_norm": 0.7375635504722595, "learning_rate": 3.716208061240566e-06, "loss": 0.0637, "step": 44810 }, { "epoch": 1.3254864848879162, "grad_norm": 0.8554176092147827, "learning_rate": 3.716081371321626e-06, "loss": 0.0859, "step": 44820 }, { "epoch": 1.3257822203820901, "grad_norm": 1.4815024137496948, "learning_rate": 3.7159546814026874e-06, "loss": 0.1038, "step": 44830 }, { "epoch": 1.3260779558762643, "grad_norm": 1.240991473197937, "learning_rate": 3.7158279914837478e-06, "loss": 0.0746, "step": 44840 }, { "epoch": 1.3263736913704383, "grad_norm": 1.0871516466140747, "learning_rate": 3.7157013015648086e-06, "loss": 0.0836, "step": 44850 }, { "epoch": 1.3266694268646124, "grad_norm": 0.7904028296470642, "learning_rate": 3.715574611645869e-06, "loss": 0.0818, "step": 44860 }, { "epoch": 1.3269651623587864, "grad_norm": 1.0694999694824219, "learning_rate": 3.7154479217269297e-06, "loss": 0.0842, "step": 44870 }, { "epoch": 1.3272608978529603, "grad_norm": 0.8441120982170105, "learning_rate": 3.71532123180799e-06, "loss": 0.0953, "step": 44880 }, { "epoch": 1.3275566333471343, "grad_norm": 1.047867774963379, "learning_rate": 3.715194541889051e-06, "loss": 0.0907, "step": 44890 }, { "epoch": 1.3278523688413084, "grad_norm": 0.9388678669929504, "learning_rate": 3.7150678519701112e-06, "loss": 0.0889, "step": 44900 }, { "epoch": 1.3281481043354824, "grad_norm": 1.4458889961242676, "learning_rate": 3.7149411620511725e-06, "loss": 0.0682, "step": 44910 }, { "epoch": 1.3284438398296563, "grad_norm": 1.2549937963485718, "learning_rate": 3.714814472132233e-06, "loss": 0.0757, "step": 44920 }, { "epoch": 1.3287395753238305, "grad_norm": 1.1877310276031494, "learning_rate": 3.7146877822132936e-06, "loss": 0.0905, "step": 44930 }, { "epoch": 1.3290353108180044, "grad_norm": 0.9146619439125061, "learning_rate": 3.714561092294354e-06, "loss": 0.0988, "step": 44940 }, { "epoch": 1.3293310463121784, "grad_norm": 0.9223468899726868, "learning_rate": 3.7144344023754148e-06, "loss": 0.0884, "step": 44950 }, { "epoch": 1.3296267818063523, "grad_norm": 0.7380067110061646, "learning_rate": 3.714307712456475e-06, "loss": 0.066, "step": 44960 }, { "epoch": 1.3299225173005265, "grad_norm": 0.9913044571876526, "learning_rate": 3.714181022537536e-06, "loss": 0.0793, "step": 44970 }, { "epoch": 1.3302182527947004, "grad_norm": 1.0472137928009033, "learning_rate": 3.7140543326185963e-06, "loss": 0.1036, "step": 44980 }, { "epoch": 1.3305139882888743, "grad_norm": 0.6298155784606934, "learning_rate": 3.7139276426996575e-06, "loss": 0.0788, "step": 44990 }, { "epoch": 1.3308097237830485, "grad_norm": 0.8062241673469543, "learning_rate": 3.713800952780718e-06, "loss": 0.0879, "step": 45000 }, { "epoch": 1.3311054592772225, "grad_norm": 0.7146267294883728, "learning_rate": 3.7136742628617787e-06, "loss": 0.0766, "step": 45010 }, { "epoch": 1.3314011947713964, "grad_norm": 0.9303663372993469, "learning_rate": 3.713547572942839e-06, "loss": 0.0851, "step": 45020 }, { "epoch": 1.3316969302655703, "grad_norm": 0.7152849435806274, "learning_rate": 3.7134208830239e-06, "loss": 0.0842, "step": 45030 }, { "epoch": 1.3319926657597445, "grad_norm": 0.9818800687789917, "learning_rate": 3.71329419310496e-06, "loss": 0.0887, "step": 45040 }, { "epoch": 1.3322884012539185, "grad_norm": 0.9090149402618408, "learning_rate": 3.713167503186021e-06, "loss": 0.0906, "step": 45050 }, { "epoch": 1.3325841367480926, "grad_norm": 0.7504969239234924, "learning_rate": 3.7130408132670813e-06, "loss": 0.0885, "step": 45060 }, { "epoch": 1.3328798722422666, "grad_norm": 0.9359619617462158, "learning_rate": 3.7129141233481426e-06, "loss": 0.0715, "step": 45070 }, { "epoch": 1.3331756077364405, "grad_norm": 0.9427173733711243, "learning_rate": 3.712787433429203e-06, "loss": 0.0832, "step": 45080 }, { "epoch": 1.3334713432306144, "grad_norm": 1.070046305656433, "learning_rate": 3.7126607435102637e-06, "loss": 0.0923, "step": 45090 }, { "epoch": 1.3337670787247886, "grad_norm": 1.0158889293670654, "learning_rate": 3.712534053591324e-06, "loss": 0.1054, "step": 45100 }, { "epoch": 1.3340628142189626, "grad_norm": 1.0552258491516113, "learning_rate": 3.712407363672385e-06, "loss": 0.0823, "step": 45110 }, { "epoch": 1.3343585497131365, "grad_norm": 0.9986121654510498, "learning_rate": 3.7122806737534452e-06, "loss": 0.0769, "step": 45120 }, { "epoch": 1.3346542852073107, "grad_norm": 1.3027498722076416, "learning_rate": 3.712153983834506e-06, "loss": 0.1047, "step": 45130 }, { "epoch": 1.3349500207014846, "grad_norm": 1.0400662422180176, "learning_rate": 3.7120272939155664e-06, "loss": 0.077, "step": 45140 }, { "epoch": 1.3352457561956586, "grad_norm": 0.6446738243103027, "learning_rate": 3.7119006039966276e-06, "loss": 0.0909, "step": 45150 }, { "epoch": 1.3355414916898325, "grad_norm": 0.41337743401527405, "learning_rate": 3.711773914077688e-06, "loss": 0.0813, "step": 45160 }, { "epoch": 1.3358372271840067, "grad_norm": 0.8834086060523987, "learning_rate": 3.7116472241587488e-06, "loss": 0.0707, "step": 45170 }, { "epoch": 1.3361329626781806, "grad_norm": 1.155559778213501, "learning_rate": 3.711520534239809e-06, "loss": 0.1054, "step": 45180 }, { "epoch": 1.3364286981723548, "grad_norm": 0.7855841517448425, "learning_rate": 3.71139384432087e-06, "loss": 0.0949, "step": 45190 }, { "epoch": 1.3367244336665287, "grad_norm": 0.8778378367424011, "learning_rate": 3.7112671544019303e-06, "loss": 0.0643, "step": 45200 }, { "epoch": 1.3370201691607027, "grad_norm": 0.8851824402809143, "learning_rate": 3.7111404644829906e-06, "loss": 0.0783, "step": 45210 }, { "epoch": 1.3373159046548766, "grad_norm": 0.9146435260772705, "learning_rate": 3.7110137745640514e-06, "loss": 0.077, "step": 45220 }, { "epoch": 1.3376116401490508, "grad_norm": 1.493747591972351, "learning_rate": 3.7108870846451122e-06, "loss": 0.0922, "step": 45230 }, { "epoch": 1.3379073756432247, "grad_norm": 0.8962335586547852, "learning_rate": 3.710760394726173e-06, "loss": 0.0987, "step": 45240 }, { "epoch": 1.3382031111373986, "grad_norm": 0.7937518358230591, "learning_rate": 3.7106337048072334e-06, "loss": 0.0991, "step": 45250 }, { "epoch": 1.3384988466315728, "grad_norm": 0.8434497714042664, "learning_rate": 3.710507014888294e-06, "loss": 0.0735, "step": 45260 }, { "epoch": 1.3387945821257468, "grad_norm": 1.1099787950515747, "learning_rate": 3.7103803249693545e-06, "loss": 0.0998, "step": 45270 }, { "epoch": 1.3390903176199207, "grad_norm": 1.2126600742340088, "learning_rate": 3.7102536350504153e-06, "loss": 0.099, "step": 45280 }, { "epoch": 1.3393860531140946, "grad_norm": 0.8400696516036987, "learning_rate": 3.7101269451314757e-06, "loss": 0.0821, "step": 45290 }, { "epoch": 1.3396817886082688, "grad_norm": 0.5361392498016357, "learning_rate": 3.7100002552125365e-06, "loss": 0.0752, "step": 45300 }, { "epoch": 1.3399775241024428, "grad_norm": 1.1721726655960083, "learning_rate": 3.7098735652935973e-06, "loss": 0.0783, "step": 45310 }, { "epoch": 1.340273259596617, "grad_norm": 0.6577802300453186, "learning_rate": 3.709746875374658e-06, "loss": 0.0927, "step": 45320 }, { "epoch": 1.3405689950907909, "grad_norm": 1.5435271263122559, "learning_rate": 3.7096201854557184e-06, "loss": 0.0972, "step": 45330 }, { "epoch": 1.3408647305849648, "grad_norm": 1.0884120464324951, "learning_rate": 3.709493495536779e-06, "loss": 0.088, "step": 45340 }, { "epoch": 1.3411604660791387, "grad_norm": 0.7262629866600037, "learning_rate": 3.7093668056178396e-06, "loss": 0.0939, "step": 45350 }, { "epoch": 1.341456201573313, "grad_norm": 1.1696844100952148, "learning_rate": 3.7092401156989004e-06, "loss": 0.0715, "step": 45360 }, { "epoch": 1.3417519370674869, "grad_norm": 0.4932417571544647, "learning_rate": 3.7091134257799607e-06, "loss": 0.089, "step": 45370 }, { "epoch": 1.3420476725616608, "grad_norm": 1.2227684259414673, "learning_rate": 3.7089867358610215e-06, "loss": 0.0999, "step": 45380 }, { "epoch": 1.342343408055835, "grad_norm": 0.6464659571647644, "learning_rate": 3.7088600459420823e-06, "loss": 0.0824, "step": 45390 }, { "epoch": 1.342639143550009, "grad_norm": 1.3330715894699097, "learning_rate": 3.708733356023143e-06, "loss": 0.0703, "step": 45400 }, { "epoch": 1.3429348790441828, "grad_norm": 0.8373938798904419, "learning_rate": 3.7086066661042035e-06, "loss": 0.0879, "step": 45410 }, { "epoch": 1.3432306145383568, "grad_norm": 1.4582101106643677, "learning_rate": 3.7084799761852643e-06, "loss": 0.0935, "step": 45420 }, { "epoch": 1.343526350032531, "grad_norm": 1.57431960105896, "learning_rate": 3.7083532862663246e-06, "loss": 0.0932, "step": 45430 }, { "epoch": 1.343822085526705, "grad_norm": 1.2495242357254028, "learning_rate": 3.7082265963473854e-06, "loss": 0.0857, "step": 45440 }, { "epoch": 1.3441178210208788, "grad_norm": 0.5637765526771545, "learning_rate": 3.7080999064284458e-06, "loss": 0.0964, "step": 45450 }, { "epoch": 1.344413556515053, "grad_norm": 1.9259976148605347, "learning_rate": 3.7079732165095066e-06, "loss": 0.0946, "step": 45460 }, { "epoch": 1.344709292009227, "grad_norm": 0.9953540563583374, "learning_rate": 3.7078465265905674e-06, "loss": 0.0839, "step": 45470 }, { "epoch": 1.345005027503401, "grad_norm": 0.8435531854629517, "learning_rate": 3.707719836671628e-06, "loss": 0.1055, "step": 45480 }, { "epoch": 1.3453007629975748, "grad_norm": 0.7792770862579346, "learning_rate": 3.7075931467526885e-06, "loss": 0.0724, "step": 45490 }, { "epoch": 1.345596498491749, "grad_norm": 1.0361801385879517, "learning_rate": 3.7074664568337493e-06, "loss": 0.095, "step": 45500 }, { "epoch": 1.345892233985923, "grad_norm": 1.6425615549087524, "learning_rate": 3.7073397669148097e-06, "loss": 0.0679, "step": 45510 }, { "epoch": 1.3461879694800971, "grad_norm": 0.6475518345832825, "learning_rate": 3.7072130769958705e-06, "loss": 0.0837, "step": 45520 }, { "epoch": 1.346483704974271, "grad_norm": 0.796721875667572, "learning_rate": 3.707086387076931e-06, "loss": 0.0943, "step": 45530 }, { "epoch": 1.346779440468445, "grad_norm": 0.6890921592712402, "learning_rate": 3.7069596971579916e-06, "loss": 0.0838, "step": 45540 }, { "epoch": 1.347075175962619, "grad_norm": 1.0593359470367432, "learning_rate": 3.7068330072390524e-06, "loss": 0.0727, "step": 45550 }, { "epoch": 1.347370911456793, "grad_norm": 0.6995857357978821, "learning_rate": 3.706706317320113e-06, "loss": 0.0716, "step": 45560 }, { "epoch": 1.347666646950967, "grad_norm": 0.9979463219642639, "learning_rate": 3.7065796274011736e-06, "loss": 0.0895, "step": 45570 }, { "epoch": 1.347962382445141, "grad_norm": 0.8965262770652771, "learning_rate": 3.7064529374822343e-06, "loss": 0.0925, "step": 45580 }, { "epoch": 1.3482581179393152, "grad_norm": 0.8269041776657104, "learning_rate": 3.7063262475632947e-06, "loss": 0.1057, "step": 45590 }, { "epoch": 1.348553853433489, "grad_norm": 0.5519936680793762, "learning_rate": 3.7061995576443555e-06, "loss": 0.0736, "step": 45600 }, { "epoch": 1.348849588927663, "grad_norm": 0.6860911250114441, "learning_rate": 3.706072867725416e-06, "loss": 0.0525, "step": 45610 }, { "epoch": 1.349145324421837, "grad_norm": 0.9312728643417358, "learning_rate": 3.7059461778064762e-06, "loss": 0.079, "step": 45620 }, { "epoch": 1.3494410599160112, "grad_norm": 1.0182048082351685, "learning_rate": 3.7058194878875374e-06, "loss": 0.0707, "step": 45630 }, { "epoch": 1.349736795410185, "grad_norm": 1.8477526903152466, "learning_rate": 3.705692797968598e-06, "loss": 0.1003, "step": 45640 }, { "epoch": 1.3500325309043593, "grad_norm": 2.4278998374938965, "learning_rate": 3.7055661080496586e-06, "loss": 0.0882, "step": 45650 }, { "epoch": 1.3503282663985332, "grad_norm": 0.5277886390686035, "learning_rate": 3.705439418130719e-06, "loss": 0.0585, "step": 45660 }, { "epoch": 1.3506240018927071, "grad_norm": 1.4507395029067993, "learning_rate": 3.7053127282117798e-06, "loss": 0.0995, "step": 45670 }, { "epoch": 1.350919737386881, "grad_norm": 0.6486244201660156, "learning_rate": 3.70518603829284e-06, "loss": 0.0907, "step": 45680 }, { "epoch": 1.3512154728810553, "grad_norm": 0.8140150904655457, "learning_rate": 3.705059348373901e-06, "loss": 0.0855, "step": 45690 }, { "epoch": 1.3515112083752292, "grad_norm": 0.8804877996444702, "learning_rate": 3.7049326584549613e-06, "loss": 0.0897, "step": 45700 }, { "epoch": 1.3518069438694031, "grad_norm": 1.2690229415893555, "learning_rate": 3.7048059685360225e-06, "loss": 0.0544, "step": 45710 }, { "epoch": 1.3521026793635773, "grad_norm": 1.1479722261428833, "learning_rate": 3.704679278617083e-06, "loss": 0.0819, "step": 45720 }, { "epoch": 1.3523984148577513, "grad_norm": 0.9746156334877014, "learning_rate": 3.7045525886981436e-06, "loss": 0.0805, "step": 45730 }, { "epoch": 1.3526941503519252, "grad_norm": 0.8595405220985413, "learning_rate": 3.704425898779204e-06, "loss": 0.0714, "step": 45740 }, { "epoch": 1.3529898858460991, "grad_norm": 1.143624186515808, "learning_rate": 3.704299208860265e-06, "loss": 0.0943, "step": 45750 }, { "epoch": 1.3532856213402733, "grad_norm": 0.7318881154060364, "learning_rate": 3.704172518941325e-06, "loss": 0.0685, "step": 45760 }, { "epoch": 1.3535813568344472, "grad_norm": 0.7699687480926514, "learning_rate": 3.704045829022386e-06, "loss": 0.1087, "step": 45770 }, { "epoch": 1.3538770923286214, "grad_norm": 1.01951265335083, "learning_rate": 3.7039191391034463e-06, "loss": 0.0965, "step": 45780 }, { "epoch": 1.3541728278227954, "grad_norm": 0.934282124042511, "learning_rate": 3.7037924491845075e-06, "loss": 0.1152, "step": 45790 }, { "epoch": 1.3544685633169693, "grad_norm": 0.9208492636680603, "learning_rate": 3.703665759265568e-06, "loss": 0.0805, "step": 45800 }, { "epoch": 1.3547642988111432, "grad_norm": 1.0519943237304688, "learning_rate": 3.7035390693466287e-06, "loss": 0.0659, "step": 45810 }, { "epoch": 1.3550600343053174, "grad_norm": 1.119829535484314, "learning_rate": 3.703412379427689e-06, "loss": 0.086, "step": 45820 }, { "epoch": 1.3553557697994914, "grad_norm": 0.6164326667785645, "learning_rate": 3.70328568950875e-06, "loss": 0.0814, "step": 45830 }, { "epoch": 1.3556515052936653, "grad_norm": 0.7795289754867554, "learning_rate": 3.7031589995898102e-06, "loss": 0.0885, "step": 45840 }, { "epoch": 1.3559472407878395, "grad_norm": 1.366969108581543, "learning_rate": 3.703032309670871e-06, "loss": 0.0802, "step": 45850 }, { "epoch": 1.3562429762820134, "grad_norm": 0.8708949089050293, "learning_rate": 3.7029056197519314e-06, "loss": 0.0735, "step": 45860 }, { "epoch": 1.3565387117761873, "grad_norm": 0.8698769807815552, "learning_rate": 3.7027789298329926e-06, "loss": 0.0804, "step": 45870 }, { "epoch": 1.3568344472703613, "grad_norm": 0.9496622085571289, "learning_rate": 3.702652239914053e-06, "loss": 0.0908, "step": 45880 }, { "epoch": 1.3571301827645355, "grad_norm": 0.839159369468689, "learning_rate": 3.7025255499951137e-06, "loss": 0.0845, "step": 45890 }, { "epoch": 1.3574259182587094, "grad_norm": 0.7993208169937134, "learning_rate": 3.702398860076174e-06, "loss": 0.0867, "step": 45900 }, { "epoch": 1.3577216537528833, "grad_norm": 0.7507115602493286, "learning_rate": 3.702272170157235e-06, "loss": 0.0718, "step": 45910 }, { "epoch": 1.3580173892470575, "grad_norm": 1.4958667755126953, "learning_rate": 3.7021454802382953e-06, "loss": 0.1038, "step": 45920 }, { "epoch": 1.3583131247412314, "grad_norm": 0.7283258438110352, "learning_rate": 3.702018790319356e-06, "loss": 0.0852, "step": 45930 }, { "epoch": 1.3586088602354054, "grad_norm": 1.0813101530075073, "learning_rate": 3.7018921004004164e-06, "loss": 0.0999, "step": 45940 }, { "epoch": 1.3589045957295793, "grad_norm": 0.7209503054618835, "learning_rate": 3.7017654104814776e-06, "loss": 0.09, "step": 45950 }, { "epoch": 1.3592003312237535, "grad_norm": 0.7272310256958008, "learning_rate": 3.701638720562538e-06, "loss": 0.0848, "step": 45960 }, { "epoch": 1.3594960667179274, "grad_norm": 1.3411847352981567, "learning_rate": 3.7015120306435988e-06, "loss": 0.1094, "step": 45970 }, { "epoch": 1.3597918022121016, "grad_norm": 1.1284117698669434, "learning_rate": 3.701385340724659e-06, "loss": 0.0961, "step": 45980 }, { "epoch": 1.3600875377062756, "grad_norm": 0.9795366525650024, "learning_rate": 3.70125865080572e-06, "loss": 0.0961, "step": 45990 }, { "epoch": 1.3603832732004495, "grad_norm": 0.733097493648529, "learning_rate": 3.7011319608867803e-06, "loss": 0.0846, "step": 46000 }, { "epoch": 1.3606790086946234, "grad_norm": 0.6802700161933899, "learning_rate": 3.701005270967841e-06, "loss": 0.0784, "step": 46010 }, { "epoch": 1.3609747441887976, "grad_norm": 0.7337502837181091, "learning_rate": 3.7008785810489015e-06, "loss": 0.0854, "step": 46020 }, { "epoch": 1.3612704796829715, "grad_norm": 0.6689562797546387, "learning_rate": 3.7007518911299622e-06, "loss": 0.1109, "step": 46030 }, { "epoch": 1.3615662151771455, "grad_norm": 1.135125994682312, "learning_rate": 3.700625201211023e-06, "loss": 0.0848, "step": 46040 }, { "epoch": 1.3618619506713197, "grad_norm": 0.6026698350906372, "learning_rate": 3.7004985112920834e-06, "loss": 0.0764, "step": 46050 }, { "epoch": 1.3621576861654936, "grad_norm": 0.783457338809967, "learning_rate": 3.700371821373144e-06, "loss": 0.0652, "step": 46060 }, { "epoch": 1.3624534216596675, "grad_norm": 1.3375623226165771, "learning_rate": 3.7002451314542046e-06, "loss": 0.1066, "step": 46070 }, { "epoch": 1.3627491571538415, "grad_norm": 1.3483856916427612, "learning_rate": 3.7001184415352653e-06, "loss": 0.1006, "step": 46080 }, { "epoch": 1.3630448926480156, "grad_norm": 0.8923561573028564, "learning_rate": 3.6999917516163257e-06, "loss": 0.0756, "step": 46090 }, { "epoch": 1.3633406281421896, "grad_norm": 1.0176451206207275, "learning_rate": 3.6998650616973865e-06, "loss": 0.0712, "step": 46100 }, { "epoch": 1.3636363636363638, "grad_norm": 0.7469700574874878, "learning_rate": 3.6997383717784473e-06, "loss": 0.08, "step": 46110 }, { "epoch": 1.3639320991305377, "grad_norm": 1.1170676946640015, "learning_rate": 3.699611681859508e-06, "loss": 0.0899, "step": 46120 }, { "epoch": 1.3642278346247116, "grad_norm": 0.643509030342102, "learning_rate": 3.6994849919405684e-06, "loss": 0.0905, "step": 46130 }, { "epoch": 1.3645235701188856, "grad_norm": 0.7635863423347473, "learning_rate": 3.6993583020216292e-06, "loss": 0.0746, "step": 46140 }, { "epoch": 1.3648193056130598, "grad_norm": 0.8088040947914124, "learning_rate": 3.6992316121026896e-06, "loss": 0.0735, "step": 46150 }, { "epoch": 1.3651150411072337, "grad_norm": 0.8445755243301392, "learning_rate": 3.6991049221837504e-06, "loss": 0.0692, "step": 46160 }, { "epoch": 1.3654107766014076, "grad_norm": 0.9411683678627014, "learning_rate": 3.6989782322648108e-06, "loss": 0.082, "step": 46170 }, { "epoch": 1.3657065120955818, "grad_norm": 1.3881322145462036, "learning_rate": 3.6988515423458716e-06, "loss": 0.0912, "step": 46180 }, { "epoch": 1.3660022475897557, "grad_norm": 0.6053621172904968, "learning_rate": 3.6987248524269323e-06, "loss": 0.0904, "step": 46190 }, { "epoch": 1.3662979830839297, "grad_norm": 0.817387580871582, "learning_rate": 3.698598162507993e-06, "loss": 0.0993, "step": 46200 }, { "epoch": 1.3665937185781036, "grad_norm": 1.1311986446380615, "learning_rate": 3.6984714725890535e-06, "loss": 0.0768, "step": 46210 }, { "epoch": 1.3668894540722778, "grad_norm": 0.883080780506134, "learning_rate": 3.6983447826701143e-06, "loss": 0.081, "step": 46220 }, { "epoch": 1.3671851895664517, "grad_norm": 0.5871652364730835, "learning_rate": 3.6982180927511747e-06, "loss": 0.0996, "step": 46230 }, { "epoch": 1.367480925060626, "grad_norm": 0.9657794833183289, "learning_rate": 3.6980914028322354e-06, "loss": 0.0972, "step": 46240 }, { "epoch": 1.3677766605547999, "grad_norm": 0.7901495695114136, "learning_rate": 3.697964712913296e-06, "loss": 0.0958, "step": 46250 }, { "epoch": 1.3680723960489738, "grad_norm": 0.5019508004188538, "learning_rate": 3.6978380229943566e-06, "loss": 0.0767, "step": 46260 }, { "epoch": 1.3683681315431477, "grad_norm": 0.9628196358680725, "learning_rate": 3.6977113330754174e-06, "loss": 0.0844, "step": 46270 }, { "epoch": 1.368663867037322, "grad_norm": 0.9224172830581665, "learning_rate": 3.697584643156478e-06, "loss": 0.0999, "step": 46280 }, { "epoch": 1.3689596025314958, "grad_norm": 1.0999482870101929, "learning_rate": 3.6974579532375385e-06, "loss": 0.0941, "step": 46290 }, { "epoch": 1.3692553380256698, "grad_norm": 1.2156822681427002, "learning_rate": 3.6973312633185993e-06, "loss": 0.1074, "step": 46300 }, { "epoch": 1.369551073519844, "grad_norm": 0.4647972285747528, "learning_rate": 3.6972045733996597e-06, "loss": 0.0808, "step": 46310 }, { "epoch": 1.369846809014018, "grad_norm": 0.907709538936615, "learning_rate": 3.6970778834807205e-06, "loss": 0.0878, "step": 46320 }, { "epoch": 1.3701425445081918, "grad_norm": 0.8810322880744934, "learning_rate": 3.696951193561781e-06, "loss": 0.0953, "step": 46330 }, { "epoch": 1.3704382800023658, "grad_norm": 0.8477375507354736, "learning_rate": 3.6968245036428416e-06, "loss": 0.0984, "step": 46340 }, { "epoch": 1.37073401549654, "grad_norm": 0.9997457265853882, "learning_rate": 3.6966978137239024e-06, "loss": 0.0795, "step": 46350 }, { "epoch": 1.371029750990714, "grad_norm": 0.7188669443130493, "learning_rate": 3.6965711238049632e-06, "loss": 0.0648, "step": 46360 }, { "epoch": 1.3713254864848878, "grad_norm": 0.8876965641975403, "learning_rate": 3.6964444338860236e-06, "loss": 0.0857, "step": 46370 }, { "epoch": 1.371621221979062, "grad_norm": 1.434979796409607, "learning_rate": 3.6963177439670844e-06, "loss": 0.1138, "step": 46380 }, { "epoch": 1.371916957473236, "grad_norm": 0.7747137546539307, "learning_rate": 3.6961910540481447e-06, "loss": 0.081, "step": 46390 }, { "epoch": 1.3722126929674099, "grad_norm": 0.7781855463981628, "learning_rate": 3.6960643641292055e-06, "loss": 0.0702, "step": 46400 }, { "epoch": 1.3725084284615838, "grad_norm": 1.0412507057189941, "learning_rate": 3.695937674210266e-06, "loss": 0.0768, "step": 46410 }, { "epoch": 1.372804163955758, "grad_norm": 1.1196359395980835, "learning_rate": 3.6958109842913267e-06, "loss": 0.0944, "step": 46420 }, { "epoch": 1.373099899449932, "grad_norm": 1.605254888534546, "learning_rate": 3.6956842943723875e-06, "loss": 0.0879, "step": 46430 }, { "epoch": 1.373395634944106, "grad_norm": 0.432099312543869, "learning_rate": 3.695557604453448e-06, "loss": 0.0774, "step": 46440 }, { "epoch": 1.37369137043828, "grad_norm": 0.801089882850647, "learning_rate": 3.6954309145345086e-06, "loss": 0.0839, "step": 46450 }, { "epoch": 1.373987105932454, "grad_norm": 0.6533997058868408, "learning_rate": 3.695304224615569e-06, "loss": 0.089, "step": 46460 }, { "epoch": 1.374282841426628, "grad_norm": 0.8468542695045471, "learning_rate": 3.6951775346966298e-06, "loss": 0.0896, "step": 46470 }, { "epoch": 1.374578576920802, "grad_norm": 0.9660812616348267, "learning_rate": 3.69505084477769e-06, "loss": 0.0984, "step": 46480 }, { "epoch": 1.374874312414976, "grad_norm": 0.5811926126480103, "learning_rate": 3.694924154858751e-06, "loss": 0.0885, "step": 46490 }, { "epoch": 1.37517004790915, "grad_norm": 0.7651557922363281, "learning_rate": 3.6947974649398113e-06, "loss": 0.0675, "step": 46500 }, { "epoch": 1.3754657834033241, "grad_norm": 0.7896642088890076, "learning_rate": 3.6946707750208725e-06, "loss": 0.0584, "step": 46510 }, { "epoch": 1.375761518897498, "grad_norm": 0.5781728625297546, "learning_rate": 3.694544085101933e-06, "loss": 0.0887, "step": 46520 }, { "epoch": 1.376057254391672, "grad_norm": 1.1879384517669678, "learning_rate": 3.6944173951829937e-06, "loss": 0.0938, "step": 46530 }, { "epoch": 1.376352989885846, "grad_norm": 1.3427047729492188, "learning_rate": 3.694290705264054e-06, "loss": 0.0868, "step": 46540 }, { "epoch": 1.3766487253800201, "grad_norm": 1.0455330610275269, "learning_rate": 3.694164015345115e-06, "loss": 0.0796, "step": 46550 }, { "epoch": 1.376944460874194, "grad_norm": 0.9327178001403809, "learning_rate": 3.694037325426175e-06, "loss": 0.0934, "step": 46560 }, { "epoch": 1.3772401963683683, "grad_norm": 0.9931123852729797, "learning_rate": 3.693910635507236e-06, "loss": 0.0875, "step": 46570 }, { "epoch": 1.3775359318625422, "grad_norm": 0.835322916507721, "learning_rate": 3.6937839455882964e-06, "loss": 0.0811, "step": 46580 }, { "epoch": 1.3778316673567161, "grad_norm": 0.7967821955680847, "learning_rate": 3.6936572556693576e-06, "loss": 0.0856, "step": 46590 }, { "epoch": 1.37812740285089, "grad_norm": 0.7000260353088379, "learning_rate": 3.693530565750418e-06, "loss": 0.0813, "step": 46600 }, { "epoch": 1.3784231383450642, "grad_norm": 0.5965503454208374, "learning_rate": 3.6934038758314787e-06, "loss": 0.0885, "step": 46610 }, { "epoch": 1.3787188738392382, "grad_norm": 0.9475760459899902, "learning_rate": 3.693277185912539e-06, "loss": 0.0877, "step": 46620 }, { "epoch": 1.3790146093334121, "grad_norm": 0.7680121660232544, "learning_rate": 3.6931504959936e-06, "loss": 0.0844, "step": 46630 }, { "epoch": 1.3793103448275863, "grad_norm": 0.7193249464035034, "learning_rate": 3.6930238060746602e-06, "loss": 0.087, "step": 46640 }, { "epoch": 1.3796060803217602, "grad_norm": 0.7980409264564514, "learning_rate": 3.692897116155721e-06, "loss": 0.0834, "step": 46650 }, { "epoch": 1.3799018158159342, "grad_norm": 0.5255067348480225, "learning_rate": 3.6927704262367814e-06, "loss": 0.0885, "step": 46660 }, { "epoch": 1.3801975513101081, "grad_norm": 0.6002826690673828, "learning_rate": 3.6926437363178426e-06, "loss": 0.09, "step": 46670 }, { "epoch": 1.3804932868042823, "grad_norm": 0.7623518705368042, "learning_rate": 3.692517046398903e-06, "loss": 0.1224, "step": 46680 }, { "epoch": 1.3807890222984562, "grad_norm": 0.7259508371353149, "learning_rate": 3.6923903564799638e-06, "loss": 0.0979, "step": 46690 }, { "epoch": 1.3810847577926304, "grad_norm": 0.8145548701286316, "learning_rate": 3.692263666561024e-06, "loss": 0.0907, "step": 46700 }, { "epoch": 1.3813804932868043, "grad_norm": 0.9504180550575256, "learning_rate": 3.692136976642085e-06, "loss": 0.0941, "step": 46710 }, { "epoch": 1.3816762287809783, "grad_norm": 0.9132959842681885, "learning_rate": 3.6920102867231453e-06, "loss": 0.0959, "step": 46720 }, { "epoch": 1.3819719642751522, "grad_norm": 0.7524731755256653, "learning_rate": 3.691883596804206e-06, "loss": 0.1015, "step": 46730 }, { "epoch": 1.3822676997693264, "grad_norm": 0.6796769499778748, "learning_rate": 3.6917569068852664e-06, "loss": 0.0798, "step": 46740 }, { "epoch": 1.3825634352635003, "grad_norm": 0.7869263291358948, "learning_rate": 3.6916302169663277e-06, "loss": 0.0812, "step": 46750 }, { "epoch": 1.3828591707576743, "grad_norm": 0.7350571751594543, "learning_rate": 3.691503527047388e-06, "loss": 0.0727, "step": 46760 }, { "epoch": 1.3831549062518484, "grad_norm": 1.2204031944274902, "learning_rate": 3.691376837128449e-06, "loss": 0.1051, "step": 46770 }, { "epoch": 1.3834506417460224, "grad_norm": 0.8981227874755859, "learning_rate": 3.691250147209509e-06, "loss": 0.0882, "step": 46780 }, { "epoch": 1.3837463772401963, "grad_norm": 0.5384066700935364, "learning_rate": 3.69112345729057e-06, "loss": 0.0994, "step": 46790 }, { "epoch": 1.3840421127343703, "grad_norm": 0.6939336657524109, "learning_rate": 3.6909967673716303e-06, "loss": 0.1006, "step": 46800 }, { "epoch": 1.3843378482285444, "grad_norm": 0.6191978454589844, "learning_rate": 3.690870077452691e-06, "loss": 0.0666, "step": 46810 }, { "epoch": 1.3846335837227184, "grad_norm": 1.03658926486969, "learning_rate": 3.6907433875337515e-06, "loss": 0.0871, "step": 46820 }, { "epoch": 1.3849293192168923, "grad_norm": 0.8964992165565491, "learning_rate": 3.6906166976148127e-06, "loss": 0.0915, "step": 46830 }, { "epoch": 1.3852250547110665, "grad_norm": 1.3436825275421143, "learning_rate": 3.690490007695873e-06, "loss": 0.1047, "step": 46840 }, { "epoch": 1.3855207902052404, "grad_norm": 1.1668919324874878, "learning_rate": 3.690363317776934e-06, "loss": 0.0905, "step": 46850 }, { "epoch": 1.3858165256994144, "grad_norm": 0.6132275462150574, "learning_rate": 3.6902366278579942e-06, "loss": 0.0676, "step": 46860 }, { "epoch": 1.3861122611935883, "grad_norm": 0.5399114489555359, "learning_rate": 3.6901099379390546e-06, "loss": 0.0963, "step": 46870 }, { "epoch": 1.3864079966877625, "grad_norm": 1.21224045753479, "learning_rate": 3.6899832480201154e-06, "loss": 0.1076, "step": 46880 }, { "epoch": 1.3867037321819364, "grad_norm": 0.8013545274734497, "learning_rate": 3.6898565581011757e-06, "loss": 0.0866, "step": 46890 }, { "epoch": 1.3869994676761106, "grad_norm": 1.2656925916671753, "learning_rate": 3.6897298681822365e-06, "loss": 0.0744, "step": 46900 }, { "epoch": 1.3872952031702845, "grad_norm": 1.2063058614730835, "learning_rate": 3.6896031782632973e-06, "loss": 0.088, "step": 46910 }, { "epoch": 1.3875909386644585, "grad_norm": 1.1428937911987305, "learning_rate": 3.689476488344358e-06, "loss": 0.0873, "step": 46920 }, { "epoch": 1.3878866741586324, "grad_norm": 0.8906348347663879, "learning_rate": 3.6893497984254185e-06, "loss": 0.0972, "step": 46930 }, { "epoch": 1.3881824096528066, "grad_norm": 0.7274457812309265, "learning_rate": 3.6892231085064793e-06, "loss": 0.0722, "step": 46940 }, { "epoch": 1.3884781451469805, "grad_norm": 0.9596692323684692, "learning_rate": 3.6890964185875396e-06, "loss": 0.099, "step": 46950 }, { "epoch": 1.3887738806411545, "grad_norm": 0.535740852355957, "learning_rate": 3.6889697286686004e-06, "loss": 0.0622, "step": 46960 }, { "epoch": 1.3890696161353286, "grad_norm": 0.5741809010505676, "learning_rate": 3.688843038749661e-06, "loss": 0.0816, "step": 46970 }, { "epoch": 1.3893653516295026, "grad_norm": 0.5042871832847595, "learning_rate": 3.6887163488307216e-06, "loss": 0.0708, "step": 46980 }, { "epoch": 1.3896610871236765, "grad_norm": 0.69212806224823, "learning_rate": 3.6885896589117824e-06, "loss": 0.0785, "step": 46990 }, { "epoch": 1.3899568226178505, "grad_norm": 0.8116775155067444, "learning_rate": 3.688462968992843e-06, "loss": 0.077, "step": 47000 }, { "epoch": 1.3902525581120246, "grad_norm": 0.7716686725616455, "learning_rate": 3.6883362790739035e-06, "loss": 0.0571, "step": 47010 }, { "epoch": 1.3905482936061986, "grad_norm": 0.860797107219696, "learning_rate": 3.6882095891549643e-06, "loss": 0.0924, "step": 47020 }, { "epoch": 1.3908440291003727, "grad_norm": 0.874825656414032, "learning_rate": 3.6880828992360247e-06, "loss": 0.0952, "step": 47030 }, { "epoch": 1.3911397645945467, "grad_norm": 1.8604621887207031, "learning_rate": 3.6879562093170855e-06, "loss": 0.0975, "step": 47040 }, { "epoch": 1.3914355000887206, "grad_norm": 1.4625599384307861, "learning_rate": 3.687829519398146e-06, "loss": 0.077, "step": 47050 }, { "epoch": 1.3917312355828946, "grad_norm": 0.7105812430381775, "learning_rate": 3.6877028294792066e-06, "loss": 0.0703, "step": 47060 }, { "epoch": 1.3920269710770687, "grad_norm": 0.6804087162017822, "learning_rate": 3.6875761395602674e-06, "loss": 0.091, "step": 47070 }, { "epoch": 1.3923227065712427, "grad_norm": 0.9617413282394409, "learning_rate": 3.687449449641328e-06, "loss": 0.0858, "step": 47080 }, { "epoch": 1.3926184420654166, "grad_norm": 0.7339203357696533, "learning_rate": 3.6873227597223886e-06, "loss": 0.0874, "step": 47090 }, { "epoch": 1.3929141775595908, "grad_norm": 1.1341009140014648, "learning_rate": 3.6871960698034494e-06, "loss": 0.0832, "step": 47100 }, { "epoch": 1.3932099130537647, "grad_norm": 0.7836818695068359, "learning_rate": 3.6870693798845097e-06, "loss": 0.0698, "step": 47110 }, { "epoch": 1.3935056485479387, "grad_norm": 0.9172804355621338, "learning_rate": 3.6869426899655705e-06, "loss": 0.0905, "step": 47120 }, { "epoch": 1.3938013840421126, "grad_norm": 0.6192931532859802, "learning_rate": 3.686816000046631e-06, "loss": 0.0841, "step": 47130 }, { "epoch": 1.3940971195362868, "grad_norm": 0.9583274722099304, "learning_rate": 3.6866893101276917e-06, "loss": 0.1024, "step": 47140 }, { "epoch": 1.3943928550304607, "grad_norm": 0.7271084785461426, "learning_rate": 3.6865626202087525e-06, "loss": 0.0731, "step": 47150 }, { "epoch": 1.394688590524635, "grad_norm": 1.4034444093704224, "learning_rate": 3.6864359302898132e-06, "loss": 0.0682, "step": 47160 }, { "epoch": 1.3949843260188088, "grad_norm": 1.1526349782943726, "learning_rate": 3.6863092403708736e-06, "loss": 0.0823, "step": 47170 }, { "epoch": 1.3952800615129828, "grad_norm": 0.5589869022369385, "learning_rate": 3.6861825504519344e-06, "loss": 0.0936, "step": 47180 }, { "epoch": 1.3955757970071567, "grad_norm": 0.8710979223251343, "learning_rate": 3.6860558605329948e-06, "loss": 0.0848, "step": 47190 }, { "epoch": 1.395871532501331, "grad_norm": 0.8122715950012207, "learning_rate": 3.6859291706140556e-06, "loss": 0.0907, "step": 47200 }, { "epoch": 1.3961672679955048, "grad_norm": 0.5868943929672241, "learning_rate": 3.685802480695116e-06, "loss": 0.082, "step": 47210 }, { "epoch": 1.3964630034896788, "grad_norm": 1.259342074394226, "learning_rate": 3.6856757907761767e-06, "loss": 0.0868, "step": 47220 }, { "epoch": 1.396758738983853, "grad_norm": 0.6639541387557983, "learning_rate": 3.6855491008572375e-06, "loss": 0.0894, "step": 47230 }, { "epoch": 1.3970544744780269, "grad_norm": 1.027688980102539, "learning_rate": 3.6854224109382983e-06, "loss": 0.086, "step": 47240 }, { "epoch": 1.3973502099722008, "grad_norm": 0.8107123970985413, "learning_rate": 3.6852957210193587e-06, "loss": 0.1056, "step": 47250 }, { "epoch": 1.3976459454663748, "grad_norm": 0.8694022297859192, "learning_rate": 3.6851690311004195e-06, "loss": 0.0672, "step": 47260 }, { "epoch": 1.397941680960549, "grad_norm": 0.9895607233047485, "learning_rate": 3.68504234118148e-06, "loss": 0.0846, "step": 47270 }, { "epoch": 1.3982374164547229, "grad_norm": 1.040778636932373, "learning_rate": 3.68491565126254e-06, "loss": 0.0949, "step": 47280 }, { "epoch": 1.3985331519488968, "grad_norm": 0.8826725482940674, "learning_rate": 3.684788961343601e-06, "loss": 0.0876, "step": 47290 }, { "epoch": 1.398828887443071, "grad_norm": 0.8232002258300781, "learning_rate": 3.6846622714246613e-06, "loss": 0.0746, "step": 47300 }, { "epoch": 1.399124622937245, "grad_norm": 0.4703103303909302, "learning_rate": 3.6845355815057226e-06, "loss": 0.0579, "step": 47310 }, { "epoch": 1.3994203584314189, "grad_norm": 0.9542785286903381, "learning_rate": 3.684408891586783e-06, "loss": 0.0877, "step": 47320 }, { "epoch": 1.3997160939255928, "grad_norm": 0.6951085925102234, "learning_rate": 3.6842822016678437e-06, "loss": 0.0892, "step": 47330 }, { "epoch": 1.400011829419767, "grad_norm": 0.7526839971542358, "learning_rate": 3.684155511748904e-06, "loss": 0.0976, "step": 47340 }, { "epoch": 1.400307564913941, "grad_norm": 0.9516199231147766, "learning_rate": 3.684028821829965e-06, "loss": 0.078, "step": 47350 }, { "epoch": 1.400603300408115, "grad_norm": 0.7072054743766785, "learning_rate": 3.6839021319110252e-06, "loss": 0.0835, "step": 47360 }, { "epoch": 1.400899035902289, "grad_norm": 1.1706607341766357, "learning_rate": 3.683775441992086e-06, "loss": 0.0932, "step": 47370 }, { "epoch": 1.401194771396463, "grad_norm": 0.8364479541778564, "learning_rate": 3.6836487520731464e-06, "loss": 0.0845, "step": 47380 }, { "epoch": 1.401490506890637, "grad_norm": 0.6296427249908447, "learning_rate": 3.6835220621542076e-06, "loss": 0.0747, "step": 47390 }, { "epoch": 1.401786242384811, "grad_norm": 0.5891263484954834, "learning_rate": 3.683395372235268e-06, "loss": 0.0836, "step": 47400 }, { "epoch": 1.402081977878985, "grad_norm": 0.8927537798881531, "learning_rate": 3.6832686823163288e-06, "loss": 0.0826, "step": 47410 }, { "epoch": 1.402377713373159, "grad_norm": 0.9928381443023682, "learning_rate": 3.683141992397389e-06, "loss": 0.0971, "step": 47420 }, { "epoch": 1.4026734488673331, "grad_norm": 1.409127116203308, "learning_rate": 3.68301530247845e-06, "loss": 0.0857, "step": 47430 }, { "epoch": 1.402969184361507, "grad_norm": 0.8757569193840027, "learning_rate": 3.6828886125595103e-06, "loss": 0.0939, "step": 47440 }, { "epoch": 1.403264919855681, "grad_norm": 0.391147643327713, "learning_rate": 3.682761922640571e-06, "loss": 0.0622, "step": 47450 }, { "epoch": 1.403560655349855, "grad_norm": 0.7357069849967957, "learning_rate": 3.6826352327216314e-06, "loss": 0.0823, "step": 47460 }, { "epoch": 1.4038563908440291, "grad_norm": 1.0040255784988403, "learning_rate": 3.6825085428026926e-06, "loss": 0.0961, "step": 47470 }, { "epoch": 1.404152126338203, "grad_norm": 0.883209228515625, "learning_rate": 3.682381852883753e-06, "loss": 0.0961, "step": 47480 }, { "epoch": 1.4044478618323772, "grad_norm": 0.6384981274604797, "learning_rate": 3.682255162964814e-06, "loss": 0.0763, "step": 47490 }, { "epoch": 1.4047435973265512, "grad_norm": 0.9095750451087952, "learning_rate": 3.682128473045874e-06, "loss": 0.0826, "step": 47500 }, { "epoch": 1.4050393328207251, "grad_norm": 0.3850127160549164, "learning_rate": 3.682001783126935e-06, "loss": 0.0642, "step": 47510 }, { "epoch": 1.405335068314899, "grad_norm": 0.8583458065986633, "learning_rate": 3.6818750932079953e-06, "loss": 0.1036, "step": 47520 }, { "epoch": 1.4056308038090732, "grad_norm": 0.8494948744773865, "learning_rate": 3.681748403289056e-06, "loss": 0.11, "step": 47530 }, { "epoch": 1.4059265393032472, "grad_norm": 0.9223930835723877, "learning_rate": 3.6816217133701165e-06, "loss": 0.0857, "step": 47540 }, { "epoch": 1.4062222747974211, "grad_norm": 0.9255238175392151, "learning_rate": 3.6814950234511777e-06, "loss": 0.0973, "step": 47550 }, { "epoch": 1.4065180102915953, "grad_norm": 0.7268729209899902, "learning_rate": 3.681368333532238e-06, "loss": 0.0765, "step": 47560 }, { "epoch": 1.4068137457857692, "grad_norm": 1.1316074132919312, "learning_rate": 3.681241643613299e-06, "loss": 0.101, "step": 47570 }, { "epoch": 1.4071094812799432, "grad_norm": 0.966429591178894, "learning_rate": 3.681114953694359e-06, "loss": 0.0864, "step": 47580 }, { "epoch": 1.4074052167741171, "grad_norm": 1.1504572629928589, "learning_rate": 3.68098826377542e-06, "loss": 0.0905, "step": 47590 }, { "epoch": 1.4077009522682913, "grad_norm": 0.5457674264907837, "learning_rate": 3.6808615738564804e-06, "loss": 0.074, "step": 47600 }, { "epoch": 1.4079966877624652, "grad_norm": 0.9442660212516785, "learning_rate": 3.680734883937541e-06, "loss": 0.0779, "step": 47610 }, { "epoch": 1.4082924232566394, "grad_norm": 1.1313157081604004, "learning_rate": 3.6806081940186015e-06, "loss": 0.1039, "step": 47620 }, { "epoch": 1.4085881587508133, "grad_norm": 1.8606806993484497, "learning_rate": 3.6804815040996627e-06, "loss": 0.0849, "step": 47630 }, { "epoch": 1.4088838942449873, "grad_norm": 0.8061386346817017, "learning_rate": 3.680354814180723e-06, "loss": 0.0971, "step": 47640 }, { "epoch": 1.4091796297391612, "grad_norm": 0.9641615152359009, "learning_rate": 3.680228124261784e-06, "loss": 0.0902, "step": 47650 }, { "epoch": 1.4094753652333354, "grad_norm": 1.2707871198654175, "learning_rate": 3.6801014343428443e-06, "loss": 0.0844, "step": 47660 }, { "epoch": 1.4097711007275093, "grad_norm": 0.8973277807235718, "learning_rate": 3.679974744423905e-06, "loss": 0.0839, "step": 47670 }, { "epoch": 1.4100668362216833, "grad_norm": 1.171504259109497, "learning_rate": 3.6798480545049654e-06, "loss": 0.1052, "step": 47680 }, { "epoch": 1.4103625717158574, "grad_norm": 0.8200151324272156, "learning_rate": 3.6797213645860258e-06, "loss": 0.1038, "step": 47690 }, { "epoch": 1.4106583072100314, "grad_norm": 1.10170316696167, "learning_rate": 3.6795946746670866e-06, "loss": 0.0798, "step": 47700 }, { "epoch": 1.4109540427042053, "grad_norm": 0.32310962677001953, "learning_rate": 3.6794679847481474e-06, "loss": 0.0782, "step": 47710 }, { "epoch": 1.4112497781983793, "grad_norm": 1.0396300554275513, "learning_rate": 3.679341294829208e-06, "loss": 0.1111, "step": 47720 }, { "epoch": 1.4115455136925534, "grad_norm": 0.8301917314529419, "learning_rate": 3.6792146049102685e-06, "loss": 0.1213, "step": 47730 }, { "epoch": 1.4118412491867274, "grad_norm": 0.889616072177887, "learning_rate": 3.6790879149913293e-06, "loss": 0.0745, "step": 47740 }, { "epoch": 1.4121369846809013, "grad_norm": 0.5114882588386536, "learning_rate": 3.6789612250723897e-06, "loss": 0.0672, "step": 47750 }, { "epoch": 1.4124327201750755, "grad_norm": 0.5852167010307312, "learning_rate": 3.6788345351534505e-06, "loss": 0.0573, "step": 47760 }, { "epoch": 1.4127284556692494, "grad_norm": 1.429695963859558, "learning_rate": 3.678707845234511e-06, "loss": 0.1021, "step": 47770 }, { "epoch": 1.4130241911634234, "grad_norm": 1.0441025495529175, "learning_rate": 3.6785811553155716e-06, "loss": 0.107, "step": 47780 }, { "epoch": 1.4133199266575973, "grad_norm": 0.5806090235710144, "learning_rate": 3.6784544653966324e-06, "loss": 0.0866, "step": 47790 }, { "epoch": 1.4136156621517715, "grad_norm": 0.858780562877655, "learning_rate": 3.678327775477693e-06, "loss": 0.084, "step": 47800 }, { "epoch": 1.4139113976459454, "grad_norm": 0.9917737245559692, "learning_rate": 3.6782010855587536e-06, "loss": 0.0747, "step": 47810 }, { "epoch": 1.4142071331401196, "grad_norm": 0.7354257106781006, "learning_rate": 3.6780743956398143e-06, "loss": 0.0792, "step": 47820 }, { "epoch": 1.4145028686342935, "grad_norm": 1.2119427919387817, "learning_rate": 3.6779477057208747e-06, "loss": 0.0793, "step": 47830 }, { "epoch": 1.4147986041284675, "grad_norm": 0.8581165671348572, "learning_rate": 3.6778210158019355e-06, "loss": 0.0823, "step": 47840 }, { "epoch": 1.4150943396226414, "grad_norm": 0.8208515644073486, "learning_rate": 3.677694325882996e-06, "loss": 0.1209, "step": 47850 }, { "epoch": 1.4153900751168156, "grad_norm": 0.7330505847930908, "learning_rate": 3.6775676359640567e-06, "loss": 0.0652, "step": 47860 }, { "epoch": 1.4156858106109895, "grad_norm": 1.4970080852508545, "learning_rate": 3.6774409460451174e-06, "loss": 0.0862, "step": 47870 }, { "epoch": 1.4159815461051635, "grad_norm": 0.8171080946922302, "learning_rate": 3.6773142561261782e-06, "loss": 0.083, "step": 47880 }, { "epoch": 1.4162772815993376, "grad_norm": 1.1495420932769775, "learning_rate": 3.6771875662072386e-06, "loss": 0.0999, "step": 47890 }, { "epoch": 1.4165730170935116, "grad_norm": 1.2139970064163208, "learning_rate": 3.6770608762882994e-06, "loss": 0.1054, "step": 47900 }, { "epoch": 1.4168687525876855, "grad_norm": 0.5322687029838562, "learning_rate": 3.6769341863693598e-06, "loss": 0.0658, "step": 47910 }, { "epoch": 1.4171644880818595, "grad_norm": 1.0564031600952148, "learning_rate": 3.6768074964504205e-06, "loss": 0.0836, "step": 47920 }, { "epoch": 1.4174602235760336, "grad_norm": 0.849980354309082, "learning_rate": 3.676680806531481e-06, "loss": 0.0983, "step": 47930 }, { "epoch": 1.4177559590702076, "grad_norm": 1.1031062602996826, "learning_rate": 3.6765541166125417e-06, "loss": 0.0815, "step": 47940 }, { "epoch": 1.4180516945643817, "grad_norm": 0.9755809307098389, "learning_rate": 3.6764274266936025e-06, "loss": 0.0948, "step": 47950 }, { "epoch": 1.4183474300585557, "grad_norm": 0.7756405472755432, "learning_rate": 3.6763007367746633e-06, "loss": 0.0902, "step": 47960 }, { "epoch": 1.4186431655527296, "grad_norm": 0.8411691188812256, "learning_rate": 3.6761740468557236e-06, "loss": 0.091, "step": 47970 }, { "epoch": 1.4189389010469036, "grad_norm": 0.6997418403625488, "learning_rate": 3.6760473569367844e-06, "loss": 0.0907, "step": 47980 }, { "epoch": 1.4192346365410777, "grad_norm": 0.8726866841316223, "learning_rate": 3.675920667017845e-06, "loss": 0.0876, "step": 47990 }, { "epoch": 1.4195303720352517, "grad_norm": 0.6048980951309204, "learning_rate": 3.6757939770989056e-06, "loss": 0.0809, "step": 48000 }, { "epoch": 1.4198261075294256, "grad_norm": 0.6254958510398865, "learning_rate": 3.675667287179966e-06, "loss": 0.0775, "step": 48010 }, { "epoch": 1.4201218430235998, "grad_norm": 0.6775648593902588, "learning_rate": 3.6755405972610267e-06, "loss": 0.087, "step": 48020 }, { "epoch": 1.4204175785177737, "grad_norm": 1.3596501350402832, "learning_rate": 3.6754139073420875e-06, "loss": 0.1023, "step": 48030 }, { "epoch": 1.4207133140119477, "grad_norm": 0.6189193725585938, "learning_rate": 3.6752872174231483e-06, "loss": 0.0982, "step": 48040 }, { "epoch": 1.4210090495061216, "grad_norm": 0.6958450078964233, "learning_rate": 3.6751605275042087e-06, "loss": 0.0844, "step": 48050 }, { "epoch": 1.4213047850002958, "grad_norm": 0.9061412215232849, "learning_rate": 3.6750338375852695e-06, "loss": 0.0705, "step": 48060 }, { "epoch": 1.4216005204944697, "grad_norm": 0.8647528886795044, "learning_rate": 3.67490714766633e-06, "loss": 0.0662, "step": 48070 }, { "epoch": 1.4218962559886439, "grad_norm": 0.9055373072624207, "learning_rate": 3.6747804577473906e-06, "loss": 0.1096, "step": 48080 }, { "epoch": 1.4221919914828178, "grad_norm": 0.6069841384887695, "learning_rate": 3.674653767828451e-06, "loss": 0.076, "step": 48090 }, { "epoch": 1.4224877269769918, "grad_norm": 0.6591700315475464, "learning_rate": 3.6745270779095114e-06, "loss": 0.0908, "step": 48100 }, { "epoch": 1.4227834624711657, "grad_norm": 0.7319990396499634, "learning_rate": 3.6744003879905726e-06, "loss": 0.0781, "step": 48110 }, { "epoch": 1.4230791979653399, "grad_norm": 0.724498987197876, "learning_rate": 3.674273698071633e-06, "loss": 0.082, "step": 48120 }, { "epoch": 1.4233749334595138, "grad_norm": 1.2275325059890747, "learning_rate": 3.6741470081526937e-06, "loss": 0.1054, "step": 48130 }, { "epoch": 1.4236706689536878, "grad_norm": 0.6772985458374023, "learning_rate": 3.674020318233754e-06, "loss": 0.0904, "step": 48140 }, { "epoch": 1.423966404447862, "grad_norm": 0.677543044090271, "learning_rate": 3.673893628314815e-06, "loss": 0.0662, "step": 48150 }, { "epoch": 1.4242621399420359, "grad_norm": 0.6462500095367432, "learning_rate": 3.6737669383958753e-06, "loss": 0.0743, "step": 48160 }, { "epoch": 1.4245578754362098, "grad_norm": 0.6699581146240234, "learning_rate": 3.673640248476936e-06, "loss": 0.0674, "step": 48170 }, { "epoch": 1.4248536109303838, "grad_norm": 1.248785376548767, "learning_rate": 3.6735135585579964e-06, "loss": 0.0939, "step": 48180 }, { "epoch": 1.425149346424558, "grad_norm": 1.389331579208374, "learning_rate": 3.6733868686390576e-06, "loss": 0.0923, "step": 48190 }, { "epoch": 1.4254450819187319, "grad_norm": 0.7687870264053345, "learning_rate": 3.673260178720118e-06, "loss": 0.0744, "step": 48200 }, { "epoch": 1.4257408174129058, "grad_norm": 0.8687921166419983, "learning_rate": 3.6731334888011788e-06, "loss": 0.0825, "step": 48210 }, { "epoch": 1.42603655290708, "grad_norm": 0.6717288494110107, "learning_rate": 3.673006798882239e-06, "loss": 0.0718, "step": 48220 }, { "epoch": 1.426332288401254, "grad_norm": 0.7572656869888306, "learning_rate": 3.6728801089633e-06, "loss": 0.0934, "step": 48230 }, { "epoch": 1.4266280238954279, "grad_norm": 0.7030724883079529, "learning_rate": 3.6727534190443603e-06, "loss": 0.081, "step": 48240 }, { "epoch": 1.4269237593896018, "grad_norm": 0.9541671276092529, "learning_rate": 3.672626729125421e-06, "loss": 0.0911, "step": 48250 }, { "epoch": 1.427219494883776, "grad_norm": 0.5894238948822021, "learning_rate": 3.6725000392064815e-06, "loss": 0.0628, "step": 48260 }, { "epoch": 1.42751523037795, "grad_norm": 1.3906663656234741, "learning_rate": 3.6723733492875427e-06, "loss": 0.0944, "step": 48270 }, { "epoch": 1.427810965872124, "grad_norm": 0.7005363702774048, "learning_rate": 3.672246659368603e-06, "loss": 0.0841, "step": 48280 }, { "epoch": 1.428106701366298, "grad_norm": 0.8541318774223328, "learning_rate": 3.672119969449664e-06, "loss": 0.0834, "step": 48290 }, { "epoch": 1.428402436860472, "grad_norm": 0.8824127316474915, "learning_rate": 3.671993279530724e-06, "loss": 0.0927, "step": 48300 }, { "epoch": 1.428698172354646, "grad_norm": 0.6823508739471436, "learning_rate": 3.671866589611785e-06, "loss": 0.0749, "step": 48310 }, { "epoch": 1.42899390784882, "grad_norm": 0.7218245267868042, "learning_rate": 3.6717398996928453e-06, "loss": 0.0994, "step": 48320 }, { "epoch": 1.429289643342994, "grad_norm": 0.968693733215332, "learning_rate": 3.671613209773906e-06, "loss": 0.1089, "step": 48330 }, { "epoch": 1.429585378837168, "grad_norm": 1.0611660480499268, "learning_rate": 3.6714865198549665e-06, "loss": 0.0962, "step": 48340 }, { "epoch": 1.4298811143313421, "grad_norm": 0.8371890783309937, "learning_rate": 3.6713598299360277e-06, "loss": 0.082, "step": 48350 }, { "epoch": 1.430176849825516, "grad_norm": 0.5632766485214233, "learning_rate": 3.671233140017088e-06, "loss": 0.0747, "step": 48360 }, { "epoch": 1.43047258531969, "grad_norm": 0.7134272456169128, "learning_rate": 3.671106450098149e-06, "loss": 0.0682, "step": 48370 }, { "epoch": 1.430768320813864, "grad_norm": 0.6903653144836426, "learning_rate": 3.6709797601792092e-06, "loss": 0.0784, "step": 48380 }, { "epoch": 1.4310640563080381, "grad_norm": 0.9479851126670837, "learning_rate": 3.67085307026027e-06, "loss": 0.1057, "step": 48390 }, { "epoch": 1.431359791802212, "grad_norm": 0.5666230320930481, "learning_rate": 3.6707263803413304e-06, "loss": 0.0877, "step": 48400 }, { "epoch": 1.4316555272963862, "grad_norm": 1.0262175798416138, "learning_rate": 3.670599690422391e-06, "loss": 0.0803, "step": 48410 }, { "epoch": 1.4319512627905602, "grad_norm": 0.8582553267478943, "learning_rate": 3.6704730005034515e-06, "loss": 0.0731, "step": 48420 }, { "epoch": 1.4322469982847341, "grad_norm": 1.0251399278640747, "learning_rate": 3.6703463105845128e-06, "loss": 0.1108, "step": 48430 }, { "epoch": 1.432542733778908, "grad_norm": 0.984589695930481, "learning_rate": 3.670219620665573e-06, "loss": 0.0726, "step": 48440 }, { "epoch": 1.4328384692730822, "grad_norm": 1.1245003938674927, "learning_rate": 3.670092930746634e-06, "loss": 0.074, "step": 48450 }, { "epoch": 1.4331342047672562, "grad_norm": 1.111241102218628, "learning_rate": 3.6699662408276943e-06, "loss": 0.0962, "step": 48460 }, { "epoch": 1.4334299402614301, "grad_norm": 1.0079865455627441, "learning_rate": 3.669839550908755e-06, "loss": 0.0834, "step": 48470 }, { "epoch": 1.4337256757556043, "grad_norm": 0.6429092884063721, "learning_rate": 3.6697128609898154e-06, "loss": 0.0678, "step": 48480 }, { "epoch": 1.4340214112497782, "grad_norm": 0.6346753239631653, "learning_rate": 3.6695861710708762e-06, "loss": 0.081, "step": 48490 }, { "epoch": 1.4343171467439522, "grad_norm": 0.9704449772834778, "learning_rate": 3.6694594811519366e-06, "loss": 0.0712, "step": 48500 }, { "epoch": 1.434612882238126, "grad_norm": 0.8497135043144226, "learning_rate": 3.6693327912329974e-06, "loss": 0.0644, "step": 48510 }, { "epoch": 1.4349086177323003, "grad_norm": 0.8816795349121094, "learning_rate": 3.669206101314058e-06, "loss": 0.092, "step": 48520 }, { "epoch": 1.4352043532264742, "grad_norm": 0.9604073762893677, "learning_rate": 3.6690794113951185e-06, "loss": 0.0894, "step": 48530 }, { "epoch": 1.4355000887206484, "grad_norm": 0.853870153427124, "learning_rate": 3.6689527214761793e-06, "loss": 0.0841, "step": 48540 }, { "epoch": 1.4357958242148223, "grad_norm": 0.9127302169799805, "learning_rate": 3.6688260315572397e-06, "loss": 0.0969, "step": 48550 }, { "epoch": 1.4360915597089963, "grad_norm": 0.7748372554779053, "learning_rate": 3.6686993416383005e-06, "loss": 0.0746, "step": 48560 }, { "epoch": 1.4363872952031702, "grad_norm": 0.5672320127487183, "learning_rate": 3.668572651719361e-06, "loss": 0.105, "step": 48570 }, { "epoch": 1.4366830306973444, "grad_norm": 0.9687100648880005, "learning_rate": 3.6684459618004216e-06, "loss": 0.0922, "step": 48580 }, { "epoch": 1.4369787661915183, "grad_norm": 0.770635187625885, "learning_rate": 3.6683192718814824e-06, "loss": 0.0957, "step": 48590 }, { "epoch": 1.4372745016856923, "grad_norm": 0.8787503838539124, "learning_rate": 3.6681925819625432e-06, "loss": 0.0826, "step": 48600 }, { "epoch": 1.4375702371798664, "grad_norm": 0.6825858354568481, "learning_rate": 3.6680658920436036e-06, "loss": 0.066, "step": 48610 }, { "epoch": 1.4378659726740404, "grad_norm": 0.8051450848579407, "learning_rate": 3.6679392021246644e-06, "loss": 0.1002, "step": 48620 }, { "epoch": 1.4381617081682143, "grad_norm": 0.7324829697608948, "learning_rate": 3.6678125122057247e-06, "loss": 0.1003, "step": 48630 }, { "epoch": 1.4384574436623883, "grad_norm": 1.0249418020248413, "learning_rate": 3.6676858222867855e-06, "loss": 0.0886, "step": 48640 }, { "epoch": 1.4387531791565624, "grad_norm": 0.5584948062896729, "learning_rate": 3.667559132367846e-06, "loss": 0.0798, "step": 48650 }, { "epoch": 1.4390489146507364, "grad_norm": 1.2013647556304932, "learning_rate": 3.6674324424489067e-06, "loss": 0.0784, "step": 48660 }, { "epoch": 1.4393446501449103, "grad_norm": 0.6500933766365051, "learning_rate": 3.6673057525299675e-06, "loss": 0.0935, "step": 48670 }, { "epoch": 1.4396403856390845, "grad_norm": 0.8571261763572693, "learning_rate": 3.6671790626110283e-06, "loss": 0.085, "step": 48680 }, { "epoch": 1.4399361211332584, "grad_norm": 0.9751619100570679, "learning_rate": 3.6670523726920886e-06, "loss": 0.0951, "step": 48690 }, { "epoch": 1.4402318566274324, "grad_norm": 0.7638399004936218, "learning_rate": 3.6669256827731494e-06, "loss": 0.0791, "step": 48700 }, { "epoch": 1.4405275921216063, "grad_norm": 0.8626996874809265, "learning_rate": 3.6667989928542098e-06, "loss": 0.0829, "step": 48710 }, { "epoch": 1.4408233276157805, "grad_norm": 0.785253643989563, "learning_rate": 3.6666723029352706e-06, "loss": 0.0849, "step": 48720 }, { "epoch": 1.4411190631099544, "grad_norm": 0.659934937953949, "learning_rate": 3.666545613016331e-06, "loss": 0.0835, "step": 48730 }, { "epoch": 1.4414147986041286, "grad_norm": 1.2403051853179932, "learning_rate": 3.6664189230973917e-06, "loss": 0.0899, "step": 48740 }, { "epoch": 1.4417105340983025, "grad_norm": 0.5602385401725769, "learning_rate": 3.6662922331784525e-06, "loss": 0.0891, "step": 48750 }, { "epoch": 1.4420062695924765, "grad_norm": 0.6633550524711609, "learning_rate": 3.6661655432595133e-06, "loss": 0.0694, "step": 48760 }, { "epoch": 1.4423020050866504, "grad_norm": 0.7703213095664978, "learning_rate": 3.6660388533405737e-06, "loss": 0.087, "step": 48770 }, { "epoch": 1.4425977405808246, "grad_norm": 1.0207598209381104, "learning_rate": 3.6659121634216345e-06, "loss": 0.0907, "step": 48780 }, { "epoch": 1.4428934760749985, "grad_norm": 0.8795071840286255, "learning_rate": 3.665785473502695e-06, "loss": 0.0937, "step": 48790 }, { "epoch": 1.4431892115691725, "grad_norm": 0.8715656399726868, "learning_rate": 3.6656587835837556e-06, "loss": 0.0855, "step": 48800 }, { "epoch": 1.4434849470633466, "grad_norm": 0.5910595655441284, "learning_rate": 3.665532093664816e-06, "loss": 0.0798, "step": 48810 }, { "epoch": 1.4437806825575206, "grad_norm": 1.2376699447631836, "learning_rate": 3.6654054037458768e-06, "loss": 0.0882, "step": 48820 }, { "epoch": 1.4440764180516945, "grad_norm": 0.6067704558372498, "learning_rate": 3.6652787138269376e-06, "loss": 0.0955, "step": 48830 }, { "epoch": 1.4443721535458685, "grad_norm": 0.6804429292678833, "learning_rate": 3.6651520239079984e-06, "loss": 0.0876, "step": 48840 }, { "epoch": 1.4446678890400426, "grad_norm": 0.7751551270484924, "learning_rate": 3.6650253339890587e-06, "loss": 0.0853, "step": 48850 }, { "epoch": 1.4449636245342166, "grad_norm": 0.5968551635742188, "learning_rate": 3.6648986440701195e-06, "loss": 0.0747, "step": 48860 }, { "epoch": 1.4452593600283907, "grad_norm": 1.316393256187439, "learning_rate": 3.66477195415118e-06, "loss": 0.0959, "step": 48870 }, { "epoch": 1.4455550955225647, "grad_norm": 0.9312198162078857, "learning_rate": 3.6646452642322407e-06, "loss": 0.0845, "step": 48880 }, { "epoch": 1.4458508310167386, "grad_norm": 0.639537513256073, "learning_rate": 3.664518574313301e-06, "loss": 0.0681, "step": 48890 }, { "epoch": 1.4461465665109126, "grad_norm": 1.347847819328308, "learning_rate": 3.664391884394362e-06, "loss": 0.0908, "step": 48900 }, { "epoch": 1.4464423020050867, "grad_norm": 2.3272924423217773, "learning_rate": 3.6642651944754226e-06, "loss": 0.073, "step": 48910 }, { "epoch": 1.4467380374992607, "grad_norm": 1.0028581619262695, "learning_rate": 3.664138504556483e-06, "loss": 0.0831, "step": 48920 }, { "epoch": 1.4470337729934346, "grad_norm": 1.5805927515029907, "learning_rate": 3.6640118146375438e-06, "loss": 0.0889, "step": 48930 }, { "epoch": 1.4473295084876088, "grad_norm": 1.0012290477752686, "learning_rate": 3.663885124718604e-06, "loss": 0.0819, "step": 48940 }, { "epoch": 1.4476252439817827, "grad_norm": 0.8574801087379456, "learning_rate": 3.663758434799665e-06, "loss": 0.0902, "step": 48950 }, { "epoch": 1.4479209794759567, "grad_norm": 0.7123228311538696, "learning_rate": 3.6636317448807253e-06, "loss": 0.0642, "step": 48960 }, { "epoch": 1.4482167149701306, "grad_norm": 1.1972782611846924, "learning_rate": 3.663505054961786e-06, "loss": 0.0925, "step": 48970 }, { "epoch": 1.4485124504643048, "grad_norm": 0.664045512676239, "learning_rate": 3.6633783650428464e-06, "loss": 0.0946, "step": 48980 }, { "epoch": 1.4488081859584787, "grad_norm": 0.9386164546012878, "learning_rate": 3.6632516751239077e-06, "loss": 0.0745, "step": 48990 }, { "epoch": 1.4491039214526529, "grad_norm": 0.6997975707054138, "learning_rate": 3.663124985204968e-06, "loss": 0.0905, "step": 49000 }, { "epoch": 1.4493996569468268, "grad_norm": 0.46030184626579285, "learning_rate": 3.662998295286029e-06, "loss": 0.0758, "step": 49010 }, { "epoch": 1.4496953924410008, "grad_norm": 0.8167387247085571, "learning_rate": 3.662871605367089e-06, "loss": 0.0797, "step": 49020 }, { "epoch": 1.4499911279351747, "grad_norm": 0.7354376316070557, "learning_rate": 3.66274491544815e-06, "loss": 0.0885, "step": 49030 }, { "epoch": 1.4502868634293489, "grad_norm": 1.2387913465499878, "learning_rate": 3.6626182255292103e-06, "loss": 0.0905, "step": 49040 }, { "epoch": 1.4505825989235228, "grad_norm": 0.36444881558418274, "learning_rate": 3.662491535610271e-06, "loss": 0.0644, "step": 49050 }, { "epoch": 1.4508783344176968, "grad_norm": 0.9420848488807678, "learning_rate": 3.6623648456913315e-06, "loss": 0.0838, "step": 49060 }, { "epoch": 1.451174069911871, "grad_norm": 0.8060253858566284, "learning_rate": 3.6622381557723927e-06, "loss": 0.0866, "step": 49070 }, { "epoch": 1.4514698054060449, "grad_norm": 1.1598294973373413, "learning_rate": 3.662111465853453e-06, "loss": 0.0905, "step": 49080 }, { "epoch": 1.4517655409002188, "grad_norm": 0.7075713276863098, "learning_rate": 3.661984775934514e-06, "loss": 0.0948, "step": 49090 }, { "epoch": 1.4520612763943928, "grad_norm": 0.7225887775421143, "learning_rate": 3.6618580860155742e-06, "loss": 0.0739, "step": 49100 }, { "epoch": 1.452357011888567, "grad_norm": 1.1718952655792236, "learning_rate": 3.661731396096635e-06, "loss": 0.0724, "step": 49110 }, { "epoch": 1.4526527473827409, "grad_norm": 1.4508252143859863, "learning_rate": 3.6616047061776954e-06, "loss": 0.0895, "step": 49120 }, { "epoch": 1.4529484828769148, "grad_norm": 0.9553869366645813, "learning_rate": 3.661478016258756e-06, "loss": 0.0791, "step": 49130 }, { "epoch": 1.453244218371089, "grad_norm": 0.8923538327217102, "learning_rate": 3.6613513263398165e-06, "loss": 0.0731, "step": 49140 }, { "epoch": 1.453539953865263, "grad_norm": 0.9514589309692383, "learning_rate": 3.6612246364208777e-06, "loss": 0.0858, "step": 49150 }, { "epoch": 1.4538356893594369, "grad_norm": 0.7240941524505615, "learning_rate": 3.661097946501938e-06, "loss": 0.0824, "step": 49160 }, { "epoch": 1.4541314248536108, "grad_norm": 1.1986680030822754, "learning_rate": 3.660971256582999e-06, "loss": 0.0792, "step": 49170 }, { "epoch": 1.454427160347785, "grad_norm": 0.6760991215705872, "learning_rate": 3.6608445666640593e-06, "loss": 0.0791, "step": 49180 }, { "epoch": 1.454722895841959, "grad_norm": 1.0205925703048706, "learning_rate": 3.66071787674512e-06, "loss": 0.0927, "step": 49190 }, { "epoch": 1.455018631336133, "grad_norm": 0.6740887761116028, "learning_rate": 3.6605911868261804e-06, "loss": 0.0782, "step": 49200 }, { "epoch": 1.455314366830307, "grad_norm": 0.9797219634056091, "learning_rate": 3.6604644969072412e-06, "loss": 0.0686, "step": 49210 }, { "epoch": 1.455610102324481, "grad_norm": 1.2712386846542358, "learning_rate": 3.6603378069883016e-06, "loss": 0.0786, "step": 49220 }, { "epoch": 1.455905837818655, "grad_norm": 1.0108063220977783, "learning_rate": 3.660211117069363e-06, "loss": 0.074, "step": 49230 }, { "epoch": 1.456201573312829, "grad_norm": 0.849471390247345, "learning_rate": 3.660084427150423e-06, "loss": 0.102, "step": 49240 }, { "epoch": 1.456497308807003, "grad_norm": 0.6220207810401917, "learning_rate": 3.659957737231484e-06, "loss": 0.0701, "step": 49250 }, { "epoch": 1.456793044301177, "grad_norm": 0.9976096749305725, "learning_rate": 3.6598310473125443e-06, "loss": 0.0812, "step": 49260 }, { "epoch": 1.4570887797953511, "grad_norm": 0.8443806767463684, "learning_rate": 3.659704357393605e-06, "loss": 0.0668, "step": 49270 }, { "epoch": 1.457384515289525, "grad_norm": 1.2866837978363037, "learning_rate": 3.6595776674746655e-06, "loss": 0.0789, "step": 49280 }, { "epoch": 1.457680250783699, "grad_norm": 0.912317156791687, "learning_rate": 3.6594509775557263e-06, "loss": 0.0931, "step": 49290 }, { "epoch": 1.457975986277873, "grad_norm": 0.9013009071350098, "learning_rate": 3.6593242876367866e-06, "loss": 0.0989, "step": 49300 }, { "epoch": 1.4582717217720471, "grad_norm": 0.7112159729003906, "learning_rate": 3.659197597717848e-06, "loss": 0.0766, "step": 49310 }, { "epoch": 1.458567457266221, "grad_norm": 1.0319883823394775, "learning_rate": 3.659070907798908e-06, "loss": 0.0888, "step": 49320 }, { "epoch": 1.4588631927603952, "grad_norm": 0.6164060831069946, "learning_rate": 3.658944217879969e-06, "loss": 0.0956, "step": 49330 }, { "epoch": 1.4591589282545692, "grad_norm": 1.04552161693573, "learning_rate": 3.6588175279610294e-06, "loss": 0.0896, "step": 49340 }, { "epoch": 1.459454663748743, "grad_norm": 0.6639124155044556, "learning_rate": 3.6586908380420897e-06, "loss": 0.0845, "step": 49350 }, { "epoch": 1.459750399242917, "grad_norm": 1.547063946723938, "learning_rate": 3.6585641481231505e-06, "loss": 0.0809, "step": 49360 }, { "epoch": 1.4600461347370912, "grad_norm": 0.5141035914421082, "learning_rate": 3.658437458204211e-06, "loss": 0.0792, "step": 49370 }, { "epoch": 1.4603418702312652, "grad_norm": 0.7522023916244507, "learning_rate": 3.6583107682852717e-06, "loss": 0.0956, "step": 49380 }, { "epoch": 1.460637605725439, "grad_norm": 0.9883207082748413, "learning_rate": 3.6581840783663325e-06, "loss": 0.1006, "step": 49390 }, { "epoch": 1.4609333412196133, "grad_norm": 0.9885323643684387, "learning_rate": 3.6580573884473932e-06, "loss": 0.0886, "step": 49400 }, { "epoch": 1.4612290767137872, "grad_norm": 0.6827167272567749, "learning_rate": 3.6579306985284536e-06, "loss": 0.0834, "step": 49410 }, { "epoch": 1.4615248122079612, "grad_norm": 0.789344072341919, "learning_rate": 3.6578040086095144e-06, "loss": 0.0858, "step": 49420 }, { "epoch": 1.461820547702135, "grad_norm": 0.9158502817153931, "learning_rate": 3.6576773186905748e-06, "loss": 0.0942, "step": 49430 }, { "epoch": 1.4621162831963093, "grad_norm": 0.9321610331535339, "learning_rate": 3.6575506287716356e-06, "loss": 0.0698, "step": 49440 }, { "epoch": 1.4624120186904832, "grad_norm": 1.6133673191070557, "learning_rate": 3.657423938852696e-06, "loss": 0.1014, "step": 49450 }, { "epoch": 1.4627077541846574, "grad_norm": 0.6736146211624146, "learning_rate": 3.6572972489337567e-06, "loss": 0.0749, "step": 49460 }, { "epoch": 1.4630034896788313, "grad_norm": 0.9111492037773132, "learning_rate": 3.6571705590148175e-06, "loss": 0.0899, "step": 49470 }, { "epoch": 1.4632992251730053, "grad_norm": 0.7077203392982483, "learning_rate": 3.6570438690958783e-06, "loss": 0.0956, "step": 49480 }, { "epoch": 1.4635949606671792, "grad_norm": 0.5565915107727051, "learning_rate": 3.6569171791769387e-06, "loss": 0.0982, "step": 49490 }, { "epoch": 1.4638906961613534, "grad_norm": 0.76179438829422, "learning_rate": 3.6567904892579995e-06, "loss": 0.0882, "step": 49500 }, { "epoch": 1.4641864316555273, "grad_norm": 0.6647360324859619, "learning_rate": 3.65666379933906e-06, "loss": 0.0749, "step": 49510 }, { "epoch": 1.4644821671497013, "grad_norm": 1.2400904893875122, "learning_rate": 3.6565371094201206e-06, "loss": 0.1018, "step": 49520 }, { "epoch": 1.4647779026438754, "grad_norm": 1.445029377937317, "learning_rate": 3.656410419501181e-06, "loss": 0.0964, "step": 49530 }, { "epoch": 1.4650736381380494, "grad_norm": 0.7086266279220581, "learning_rate": 3.6562837295822418e-06, "loss": 0.0877, "step": 49540 }, { "epoch": 1.4653693736322233, "grad_norm": 0.8118572235107422, "learning_rate": 3.6561570396633026e-06, "loss": 0.1071, "step": 49550 }, { "epoch": 1.4656651091263972, "grad_norm": 0.910933792591095, "learning_rate": 3.6560303497443633e-06, "loss": 0.0664, "step": 49560 }, { "epoch": 1.4659608446205714, "grad_norm": 0.765660285949707, "learning_rate": 3.6559036598254237e-06, "loss": 0.0888, "step": 49570 }, { "epoch": 1.4662565801147454, "grad_norm": 3.0670530796051025, "learning_rate": 3.6557769699064845e-06, "loss": 0.0928, "step": 49580 }, { "epoch": 1.4665523156089195, "grad_norm": 0.8051163554191589, "learning_rate": 3.655650279987545e-06, "loss": 0.0845, "step": 49590 }, { "epoch": 1.4668480511030935, "grad_norm": 0.657451868057251, "learning_rate": 3.6555235900686057e-06, "loss": 0.0787, "step": 49600 }, { "epoch": 1.4671437865972674, "grad_norm": 0.5337772965431213, "learning_rate": 3.655396900149666e-06, "loss": 0.0921, "step": 49610 }, { "epoch": 1.4674395220914414, "grad_norm": 1.1245957612991333, "learning_rate": 3.655270210230727e-06, "loss": 0.0713, "step": 49620 }, { "epoch": 1.4677352575856153, "grad_norm": 0.9360198378562927, "learning_rate": 3.6551435203117876e-06, "loss": 0.0916, "step": 49630 }, { "epoch": 1.4680309930797895, "grad_norm": 1.100405216217041, "learning_rate": 3.6550168303928484e-06, "loss": 0.1044, "step": 49640 }, { "epoch": 1.4683267285739634, "grad_norm": 0.4993140697479248, "learning_rate": 3.6548901404739088e-06, "loss": 0.087, "step": 49650 }, { "epoch": 1.4686224640681376, "grad_norm": 0.8669780492782593, "learning_rate": 3.6547634505549695e-06, "loss": 0.093, "step": 49660 }, { "epoch": 1.4689181995623115, "grad_norm": 0.9165154099464417, "learning_rate": 3.65463676063603e-06, "loss": 0.0939, "step": 49670 }, { "epoch": 1.4692139350564855, "grad_norm": 0.8640576004981995, "learning_rate": 3.6545100707170907e-06, "loss": 0.0909, "step": 49680 }, { "epoch": 1.4695096705506594, "grad_norm": 1.0936503410339355, "learning_rate": 3.654383380798151e-06, "loss": 0.1024, "step": 49690 }, { "epoch": 1.4698054060448336, "grad_norm": 0.6615318655967712, "learning_rate": 3.654256690879212e-06, "loss": 0.0791, "step": 49700 }, { "epoch": 1.4701011415390075, "grad_norm": 0.6296750903129578, "learning_rate": 3.6541300009602726e-06, "loss": 0.064, "step": 49710 }, { "epoch": 1.4703968770331814, "grad_norm": 0.7955536246299744, "learning_rate": 3.6540033110413334e-06, "loss": 0.0788, "step": 49720 }, { "epoch": 1.4706926125273556, "grad_norm": 1.1573750972747803, "learning_rate": 3.653876621122394e-06, "loss": 0.0973, "step": 49730 }, { "epoch": 1.4709883480215296, "grad_norm": 1.333949327468872, "learning_rate": 3.6537499312034546e-06, "loss": 0.1102, "step": 49740 }, { "epoch": 1.4712840835157035, "grad_norm": 0.5268728137016296, "learning_rate": 3.653623241284515e-06, "loss": 0.0987, "step": 49750 }, { "epoch": 1.4715798190098774, "grad_norm": 1.1321375370025635, "learning_rate": 3.6534965513655753e-06, "loss": 0.0873, "step": 49760 }, { "epoch": 1.4718755545040516, "grad_norm": 0.6587902307510376, "learning_rate": 3.653369861446636e-06, "loss": 0.081, "step": 49770 }, { "epoch": 1.4721712899982256, "grad_norm": 0.6792289018630981, "learning_rate": 3.6532431715276965e-06, "loss": 0.0898, "step": 49780 }, { "epoch": 1.4724670254923997, "grad_norm": 0.7753515839576721, "learning_rate": 3.6531164816087577e-06, "loss": 0.0846, "step": 49790 }, { "epoch": 1.4727627609865737, "grad_norm": 0.8970879316329956, "learning_rate": 3.652989791689818e-06, "loss": 0.0942, "step": 49800 }, { "epoch": 1.4730584964807476, "grad_norm": 0.9257311820983887, "learning_rate": 3.652863101770879e-06, "loss": 0.0848, "step": 49810 }, { "epoch": 1.4733542319749215, "grad_norm": 1.2584140300750732, "learning_rate": 3.652736411851939e-06, "loss": 0.0974, "step": 49820 }, { "epoch": 1.4736499674690957, "grad_norm": 0.5548869967460632, "learning_rate": 3.652609721933e-06, "loss": 0.0902, "step": 49830 }, { "epoch": 1.4739457029632697, "grad_norm": 0.9142337441444397, "learning_rate": 3.6524830320140604e-06, "loss": 0.0836, "step": 49840 }, { "epoch": 1.4742414384574436, "grad_norm": 0.6749340295791626, "learning_rate": 3.652356342095121e-06, "loss": 0.0754, "step": 49850 }, { "epoch": 1.4745371739516178, "grad_norm": 1.416021466255188, "learning_rate": 3.6522296521761815e-06, "loss": 0.0765, "step": 49860 }, { "epoch": 1.4748329094457917, "grad_norm": 0.9754433035850525, "learning_rate": 3.6521029622572427e-06, "loss": 0.078, "step": 49870 }, { "epoch": 1.4751286449399656, "grad_norm": 0.6056538224220276, "learning_rate": 3.651976272338303e-06, "loss": 0.0803, "step": 49880 }, { "epoch": 1.4754243804341396, "grad_norm": 0.8724729418754578, "learning_rate": 3.651849582419364e-06, "loss": 0.0829, "step": 49890 }, { "epoch": 1.4757201159283138, "grad_norm": 0.6162087917327881, "learning_rate": 3.6517228925004243e-06, "loss": 0.0819, "step": 49900 }, { "epoch": 1.4760158514224877, "grad_norm": 1.0455708503723145, "learning_rate": 3.651596202581485e-06, "loss": 0.0899, "step": 49910 }, { "epoch": 1.4763115869166619, "grad_norm": 1.0044885873794556, "learning_rate": 3.6514695126625454e-06, "loss": 0.0718, "step": 49920 }, { "epoch": 1.4766073224108358, "grad_norm": 0.6853893399238586, "learning_rate": 3.651342822743606e-06, "loss": 0.074, "step": 49930 }, { "epoch": 1.4769030579050098, "grad_norm": 1.0711424350738525, "learning_rate": 3.6512161328246666e-06, "loss": 0.0851, "step": 49940 }, { "epoch": 1.4771987933991837, "grad_norm": 0.7637973427772522, "learning_rate": 3.6510894429057278e-06, "loss": 0.0653, "step": 49950 }, { "epoch": 1.4774945288933579, "grad_norm": 0.9542972445487976, "learning_rate": 3.650962752986788e-06, "loss": 0.072, "step": 49960 }, { "epoch": 1.4777902643875318, "grad_norm": 0.7651882767677307, "learning_rate": 3.650836063067849e-06, "loss": 0.081, "step": 49970 }, { "epoch": 1.4780859998817057, "grad_norm": 0.8406457901000977, "learning_rate": 3.6507093731489093e-06, "loss": 0.0928, "step": 49980 }, { "epoch": 1.47838173537588, "grad_norm": 0.9657913446426392, "learning_rate": 3.65058268322997e-06, "loss": 0.0948, "step": 49990 }, { "epoch": 1.4786774708700539, "grad_norm": 0.6862518787384033, "learning_rate": 3.6504559933110305e-06, "loss": 0.0613, "step": 50000 }, { "epoch": 1.4789732063642278, "grad_norm": 0.8766524791717529, "learning_rate": 3.6503293033920912e-06, "loss": 0.0671, "step": 50010 }, { "epoch": 1.4792689418584017, "grad_norm": 0.9230462908744812, "learning_rate": 3.6502026134731516e-06, "loss": 0.0896, "step": 50020 }, { "epoch": 1.479564677352576, "grad_norm": 1.488867163658142, "learning_rate": 3.650075923554213e-06, "loss": 0.1027, "step": 50030 }, { "epoch": 1.4798604128467499, "grad_norm": 0.6294771432876587, "learning_rate": 3.649949233635273e-06, "loss": 0.0958, "step": 50040 }, { "epoch": 1.480156148340924, "grad_norm": 0.5008896589279175, "learning_rate": 3.649822543716334e-06, "loss": 0.0635, "step": 50050 }, { "epoch": 1.480451883835098, "grad_norm": 0.9329009652137756, "learning_rate": 3.6496958537973943e-06, "loss": 0.0952, "step": 50060 }, { "epoch": 1.480747619329272, "grad_norm": 0.9224260449409485, "learning_rate": 3.649569163878455e-06, "loss": 0.0838, "step": 50070 }, { "epoch": 1.4810433548234458, "grad_norm": 0.841124415397644, "learning_rate": 3.6494424739595155e-06, "loss": 0.0836, "step": 50080 }, { "epoch": 1.4813390903176198, "grad_norm": 0.8175151944160461, "learning_rate": 3.6493157840405763e-06, "loss": 0.1031, "step": 50090 }, { "epoch": 1.481634825811794, "grad_norm": 0.5713269710540771, "learning_rate": 3.6491890941216367e-06, "loss": 0.0829, "step": 50100 }, { "epoch": 1.481930561305968, "grad_norm": 0.9474539160728455, "learning_rate": 3.649062404202698e-06, "loss": 0.0768, "step": 50110 }, { "epoch": 1.482226296800142, "grad_norm": 0.8223444819450378, "learning_rate": 3.6489357142837582e-06, "loss": 0.0848, "step": 50120 }, { "epoch": 1.482522032294316, "grad_norm": 0.6607178449630737, "learning_rate": 3.648809024364819e-06, "loss": 0.0954, "step": 50130 }, { "epoch": 1.48281776778849, "grad_norm": 0.6435355544090271, "learning_rate": 3.6486823344458794e-06, "loss": 0.0922, "step": 50140 }, { "epoch": 1.483113503282664, "grad_norm": 0.7470300197601318, "learning_rate": 3.64855564452694e-06, "loss": 0.0763, "step": 50150 }, { "epoch": 1.483409238776838, "grad_norm": 0.8450338840484619, "learning_rate": 3.6484289546080005e-06, "loss": 0.0748, "step": 50160 }, { "epoch": 1.483704974271012, "grad_norm": 1.0088130235671997, "learning_rate": 3.648302264689061e-06, "loss": 0.0758, "step": 50170 }, { "epoch": 1.484000709765186, "grad_norm": 1.0213909149169922, "learning_rate": 3.6481755747701217e-06, "loss": 0.1083, "step": 50180 }, { "epoch": 1.48429644525936, "grad_norm": 0.7041268348693848, "learning_rate": 3.6480488848511825e-06, "loss": 0.077, "step": 50190 }, { "epoch": 1.484592180753534, "grad_norm": 0.7291852235794067, "learning_rate": 3.6479221949322433e-06, "loss": 0.084, "step": 50200 }, { "epoch": 1.484887916247708, "grad_norm": 0.6639018058776855, "learning_rate": 3.6477955050133036e-06, "loss": 0.0711, "step": 50210 }, { "epoch": 1.485183651741882, "grad_norm": 0.7841416001319885, "learning_rate": 3.6476688150943644e-06, "loss": 0.0676, "step": 50220 }, { "epoch": 1.485479387236056, "grad_norm": 0.6060830354690552, "learning_rate": 3.647542125175425e-06, "loss": 0.0849, "step": 50230 }, { "epoch": 1.48577512273023, "grad_norm": 0.8326110243797302, "learning_rate": 3.6474154352564856e-06, "loss": 0.087, "step": 50240 }, { "epoch": 1.4860708582244042, "grad_norm": 0.5540775060653687, "learning_rate": 3.647288745337546e-06, "loss": 0.0837, "step": 50250 }, { "epoch": 1.4863665937185782, "grad_norm": 0.9786304831504822, "learning_rate": 3.6471620554186067e-06, "loss": 0.0696, "step": 50260 }, { "epoch": 1.486662329212752, "grad_norm": 0.8247507810592651, "learning_rate": 3.6470353654996675e-06, "loss": 0.082, "step": 50270 }, { "epoch": 1.486958064706926, "grad_norm": 0.8494155406951904, "learning_rate": 3.6469086755807283e-06, "loss": 0.0929, "step": 50280 }, { "epoch": 1.4872538002011002, "grad_norm": 0.6990033984184265, "learning_rate": 3.6467819856617887e-06, "loss": 0.0929, "step": 50290 }, { "epoch": 1.4875495356952741, "grad_norm": 0.7660380601882935, "learning_rate": 3.6466552957428495e-06, "loss": 0.0766, "step": 50300 }, { "epoch": 1.487845271189448, "grad_norm": 1.2551493644714355, "learning_rate": 3.64652860582391e-06, "loss": 0.0824, "step": 50310 }, { "epoch": 1.4881410066836223, "grad_norm": 0.7705190181732178, "learning_rate": 3.6464019159049706e-06, "loss": 0.0832, "step": 50320 }, { "epoch": 1.4884367421777962, "grad_norm": 0.6467694640159607, "learning_rate": 3.646275225986031e-06, "loss": 0.094, "step": 50330 }, { "epoch": 1.4887324776719701, "grad_norm": 0.6131467223167419, "learning_rate": 3.646148536067092e-06, "loss": 0.101, "step": 50340 }, { "epoch": 1.489028213166144, "grad_norm": 0.6662569046020508, "learning_rate": 3.6460218461481526e-06, "loss": 0.0757, "step": 50350 }, { "epoch": 1.4893239486603183, "grad_norm": 1.4225256443023682, "learning_rate": 3.6458951562292134e-06, "loss": 0.0938, "step": 50360 }, { "epoch": 1.4896196841544922, "grad_norm": 0.8124212622642517, "learning_rate": 3.6457684663102737e-06, "loss": 0.0871, "step": 50370 }, { "epoch": 1.4899154196486664, "grad_norm": 1.24815833568573, "learning_rate": 3.6456417763913345e-06, "loss": 0.0909, "step": 50380 }, { "epoch": 1.4902111551428403, "grad_norm": 0.5015251040458679, "learning_rate": 3.645515086472395e-06, "loss": 0.0758, "step": 50390 }, { "epoch": 1.4905068906370142, "grad_norm": 0.8426122665405273, "learning_rate": 3.6453883965534557e-06, "loss": 0.0881, "step": 50400 }, { "epoch": 1.4908026261311882, "grad_norm": 0.8457314372062683, "learning_rate": 3.645261706634516e-06, "loss": 0.0639, "step": 50410 }, { "epoch": 1.4910983616253624, "grad_norm": 0.8884547352790833, "learning_rate": 3.645135016715577e-06, "loss": 0.0749, "step": 50420 }, { "epoch": 1.4913940971195363, "grad_norm": 0.6067601442337036, "learning_rate": 3.6450083267966376e-06, "loss": 0.0915, "step": 50430 }, { "epoch": 1.4916898326137102, "grad_norm": 1.0995914936065674, "learning_rate": 3.6448816368776984e-06, "loss": 0.0836, "step": 50440 }, { "epoch": 1.4919855681078844, "grad_norm": 0.9249408841133118, "learning_rate": 3.6447549469587588e-06, "loss": 0.0819, "step": 50450 }, { "epoch": 1.4922813036020584, "grad_norm": 0.7168461680412292, "learning_rate": 3.6446282570398196e-06, "loss": 0.0693, "step": 50460 }, { "epoch": 1.4925770390962323, "grad_norm": 0.7849279642105103, "learning_rate": 3.64450156712088e-06, "loss": 0.0648, "step": 50470 }, { "epoch": 1.4928727745904062, "grad_norm": 0.907734215259552, "learning_rate": 3.6443748772019407e-06, "loss": 0.0981, "step": 50480 }, { "epoch": 1.4931685100845804, "grad_norm": 0.9922482967376709, "learning_rate": 3.644248187283001e-06, "loss": 0.0741, "step": 50490 }, { "epoch": 1.4934642455787543, "grad_norm": 1.1520943641662598, "learning_rate": 3.644121497364062e-06, "loss": 0.092, "step": 50500 }, { "epoch": 1.4937599810729285, "grad_norm": 0.6484019160270691, "learning_rate": 3.6439948074451227e-06, "loss": 0.0924, "step": 50510 }, { "epoch": 1.4940557165671025, "grad_norm": 0.7603106498718262, "learning_rate": 3.6438681175261835e-06, "loss": 0.0823, "step": 50520 }, { "epoch": 1.4943514520612764, "grad_norm": 0.9078448414802551, "learning_rate": 3.643741427607244e-06, "loss": 0.0978, "step": 50530 }, { "epoch": 1.4946471875554503, "grad_norm": 0.7870774269104004, "learning_rate": 3.6436147376883046e-06, "loss": 0.0707, "step": 50540 }, { "epoch": 1.4949429230496243, "grad_norm": 0.8207451701164246, "learning_rate": 3.643488047769365e-06, "loss": 0.0839, "step": 50550 }, { "epoch": 1.4952386585437984, "grad_norm": 0.7237091660499573, "learning_rate": 3.6433613578504258e-06, "loss": 0.0776, "step": 50560 }, { "epoch": 1.4955343940379724, "grad_norm": 0.6690853834152222, "learning_rate": 3.643234667931486e-06, "loss": 0.0813, "step": 50570 }, { "epoch": 1.4958301295321466, "grad_norm": 0.8856021165847778, "learning_rate": 3.6431079780125465e-06, "loss": 0.0757, "step": 50580 }, { "epoch": 1.4961258650263205, "grad_norm": 1.4034268856048584, "learning_rate": 3.6429812880936077e-06, "loss": 0.0952, "step": 50590 }, { "epoch": 1.4964216005204944, "grad_norm": 0.8555280566215515, "learning_rate": 3.642854598174668e-06, "loss": 0.0792, "step": 50600 }, { "epoch": 1.4967173360146684, "grad_norm": 0.8301329016685486, "learning_rate": 3.642727908255729e-06, "loss": 0.0834, "step": 50610 }, { "epoch": 1.4970130715088426, "grad_norm": 0.9453135132789612, "learning_rate": 3.6426012183367892e-06, "loss": 0.081, "step": 50620 }, { "epoch": 1.4973088070030165, "grad_norm": 0.835797905921936, "learning_rate": 3.64247452841785e-06, "loss": 0.0882, "step": 50630 }, { "epoch": 1.4976045424971904, "grad_norm": 0.9786956906318665, "learning_rate": 3.6423478384989104e-06, "loss": 0.083, "step": 50640 }, { "epoch": 1.4979002779913646, "grad_norm": 0.9009311199188232, "learning_rate": 3.642221148579971e-06, "loss": 0.0994, "step": 50650 }, { "epoch": 1.4981960134855385, "grad_norm": 0.4775956869125366, "learning_rate": 3.6420944586610315e-06, "loss": 0.0764, "step": 50660 }, { "epoch": 1.4984917489797125, "grad_norm": 1.315622091293335, "learning_rate": 3.6419677687420928e-06, "loss": 0.0825, "step": 50670 }, { "epoch": 1.4987874844738864, "grad_norm": 1.0803167819976807, "learning_rate": 3.641841078823153e-06, "loss": 0.0764, "step": 50680 }, { "epoch": 1.4990832199680606, "grad_norm": 1.0774608850479126, "learning_rate": 3.641714388904214e-06, "loss": 0.0917, "step": 50690 }, { "epoch": 1.4993789554622345, "grad_norm": 0.6659232378005981, "learning_rate": 3.6415876989852743e-06, "loss": 0.0838, "step": 50700 }, { "epoch": 1.4996746909564087, "grad_norm": 0.8758479356765747, "learning_rate": 3.641461009066335e-06, "loss": 0.068, "step": 50710 }, { "epoch": 1.4999704264505827, "grad_norm": 0.7350618839263916, "learning_rate": 3.6413343191473954e-06, "loss": 0.0923, "step": 50720 }, { "epoch": 1.5002661619447566, "grad_norm": 0.83323073387146, "learning_rate": 3.6412076292284562e-06, "loss": 0.0895, "step": 50730 }, { "epoch": 1.5005618974389305, "grad_norm": 0.7286800742149353, "learning_rate": 3.6410809393095166e-06, "loss": 0.0955, "step": 50740 }, { "epoch": 1.5008576329331045, "grad_norm": 0.4230135381221771, "learning_rate": 3.640954249390578e-06, "loss": 0.0867, "step": 50750 }, { "epoch": 1.5011533684272786, "grad_norm": 0.8334985971450806, "learning_rate": 3.640827559471638e-06, "loss": 0.063, "step": 50760 }, { "epoch": 1.5014491039214528, "grad_norm": 0.7980448603630066, "learning_rate": 3.640700869552699e-06, "loss": 0.0806, "step": 50770 }, { "epoch": 1.5017448394156268, "grad_norm": 1.009277105331421, "learning_rate": 3.6405741796337593e-06, "loss": 0.1002, "step": 50780 }, { "epoch": 1.5020405749098007, "grad_norm": 0.9168532490730286, "learning_rate": 3.64044748971482e-06, "loss": 0.1026, "step": 50790 }, { "epoch": 1.5023363104039746, "grad_norm": 0.8405824899673462, "learning_rate": 3.6403207997958805e-06, "loss": 0.0817, "step": 50800 }, { "epoch": 1.5026320458981486, "grad_norm": 0.6172899007797241, "learning_rate": 3.6401941098769413e-06, "loss": 0.0809, "step": 50810 }, { "epoch": 1.5029277813923227, "grad_norm": 1.0291409492492676, "learning_rate": 3.6400674199580016e-06, "loss": 0.0802, "step": 50820 }, { "epoch": 1.5032235168864967, "grad_norm": 0.8470194935798645, "learning_rate": 3.639940730039063e-06, "loss": 0.0939, "step": 50830 }, { "epoch": 1.5035192523806709, "grad_norm": 0.8907373547554016, "learning_rate": 3.6398140401201232e-06, "loss": 0.086, "step": 50840 }, { "epoch": 1.5038149878748448, "grad_norm": 0.8249471187591553, "learning_rate": 3.639687350201184e-06, "loss": 0.0826, "step": 50850 }, { "epoch": 1.5041107233690187, "grad_norm": 0.6850346326828003, "learning_rate": 3.6395606602822444e-06, "loss": 0.0789, "step": 50860 }, { "epoch": 1.5044064588631927, "grad_norm": 0.9925662279129028, "learning_rate": 3.639433970363305e-06, "loss": 0.0857, "step": 50870 }, { "epoch": 1.5047021943573666, "grad_norm": 0.8800414800643921, "learning_rate": 3.6393072804443655e-06, "loss": 0.075, "step": 50880 }, { "epoch": 1.5049979298515408, "grad_norm": 0.6740554571151733, "learning_rate": 3.6391805905254263e-06, "loss": 0.0855, "step": 50890 }, { "epoch": 1.5052936653457147, "grad_norm": 1.3110820055007935, "learning_rate": 3.6390539006064867e-06, "loss": 0.0806, "step": 50900 }, { "epoch": 1.505589400839889, "grad_norm": 0.5756946206092834, "learning_rate": 3.638927210687548e-06, "loss": 0.0598, "step": 50910 }, { "epoch": 1.5058851363340628, "grad_norm": 1.3237853050231934, "learning_rate": 3.6388005207686083e-06, "loss": 0.0958, "step": 50920 }, { "epoch": 1.5061808718282368, "grad_norm": 0.9362363219261169, "learning_rate": 3.638673830849669e-06, "loss": 0.088, "step": 50930 }, { "epoch": 1.5064766073224107, "grad_norm": 0.9076979756355286, "learning_rate": 3.6385471409307294e-06, "loss": 0.0731, "step": 50940 }, { "epoch": 1.506772342816585, "grad_norm": 0.9455458521842957, "learning_rate": 3.63842045101179e-06, "loss": 0.0915, "step": 50950 }, { "epoch": 1.5070680783107588, "grad_norm": 0.9733490943908691, "learning_rate": 3.6382937610928506e-06, "loss": 0.0778, "step": 50960 }, { "epoch": 1.507363813804933, "grad_norm": 0.9807643890380859, "learning_rate": 3.6381670711739114e-06, "loss": 0.0698, "step": 50970 }, { "epoch": 1.507659549299107, "grad_norm": 1.010738492012024, "learning_rate": 3.6380403812549717e-06, "loss": 0.0874, "step": 50980 }, { "epoch": 1.507955284793281, "grad_norm": 0.8439101576805115, "learning_rate": 3.637913691336032e-06, "loss": 0.084, "step": 50990 }, { "epoch": 1.5082510202874548, "grad_norm": 0.8226482272148132, "learning_rate": 3.6377870014170933e-06, "loss": 0.0826, "step": 51000 }, { "epoch": 1.5085467557816288, "grad_norm": 0.3751595616340637, "learning_rate": 3.6376603114981537e-06, "loss": 0.077, "step": 51010 }, { "epoch": 1.508842491275803, "grad_norm": 0.6825022101402283, "learning_rate": 3.6375336215792145e-06, "loss": 0.0831, "step": 51020 }, { "epoch": 1.5091382267699769, "grad_norm": 0.5319076180458069, "learning_rate": 3.637406931660275e-06, "loss": 0.0759, "step": 51030 }, { "epoch": 1.509433962264151, "grad_norm": 0.6519925594329834, "learning_rate": 3.6372802417413356e-06, "loss": 0.0775, "step": 51040 }, { "epoch": 1.509729697758325, "grad_norm": 1.193376064300537, "learning_rate": 3.637153551822396e-06, "loss": 0.0715, "step": 51050 }, { "epoch": 1.510025433252499, "grad_norm": 1.3280043601989746, "learning_rate": 3.6370268619034568e-06, "loss": 0.0873, "step": 51060 }, { "epoch": 1.5103211687466729, "grad_norm": 0.8449310064315796, "learning_rate": 3.636900171984517e-06, "loss": 0.0848, "step": 51070 }, { "epoch": 1.5106169042408468, "grad_norm": 0.8766087889671326, "learning_rate": 3.6367734820655784e-06, "loss": 0.0845, "step": 51080 }, { "epoch": 1.510912639735021, "grad_norm": 0.4530183970928192, "learning_rate": 3.6366467921466387e-06, "loss": 0.0744, "step": 51090 }, { "epoch": 1.5112083752291952, "grad_norm": 0.9764174818992615, "learning_rate": 3.6365201022276995e-06, "loss": 0.0886, "step": 51100 }, { "epoch": 1.511504110723369, "grad_norm": 0.6268443465232849, "learning_rate": 3.63639341230876e-06, "loss": 0.079, "step": 51110 }, { "epoch": 1.511799846217543, "grad_norm": 0.5819433927536011, "learning_rate": 3.6362667223898207e-06, "loss": 0.0945, "step": 51120 }, { "epoch": 1.512095581711717, "grad_norm": 0.7173457741737366, "learning_rate": 3.636140032470881e-06, "loss": 0.1056, "step": 51130 }, { "epoch": 1.512391317205891, "grad_norm": 0.6069852113723755, "learning_rate": 3.636013342551942e-06, "loss": 0.0862, "step": 51140 }, { "epoch": 1.512687052700065, "grad_norm": 0.953295886516571, "learning_rate": 3.635886652633002e-06, "loss": 0.0771, "step": 51150 }, { "epoch": 1.512982788194239, "grad_norm": 0.6799917817115784, "learning_rate": 3.6357599627140634e-06, "loss": 0.0806, "step": 51160 }, { "epoch": 1.5132785236884132, "grad_norm": 0.7157905697822571, "learning_rate": 3.6356332727951238e-06, "loss": 0.08, "step": 51170 }, { "epoch": 1.5135742591825871, "grad_norm": 0.8688067197799683, "learning_rate": 3.6355065828761846e-06, "loss": 0.0774, "step": 51180 }, { "epoch": 1.513869994676761, "grad_norm": 0.9166573286056519, "learning_rate": 3.635379892957245e-06, "loss": 0.0875, "step": 51190 }, { "epoch": 1.514165730170935, "grad_norm": 0.6618770360946655, "learning_rate": 3.6352532030383057e-06, "loss": 0.066, "step": 51200 }, { "epoch": 1.514461465665109, "grad_norm": 1.0192346572875977, "learning_rate": 3.635126513119366e-06, "loss": 0.0872, "step": 51210 }, { "epoch": 1.5147572011592831, "grad_norm": 0.919571042060852, "learning_rate": 3.634999823200427e-06, "loss": 0.098, "step": 51220 }, { "epoch": 1.5150529366534573, "grad_norm": 0.9666796922683716, "learning_rate": 3.6348731332814872e-06, "loss": 0.094, "step": 51230 }, { "epoch": 1.5153486721476312, "grad_norm": 0.5055071711540222, "learning_rate": 3.6347464433625484e-06, "loss": 0.0774, "step": 51240 }, { "epoch": 1.5156444076418052, "grad_norm": 0.9905960559844971, "learning_rate": 3.634619753443609e-06, "loss": 0.086, "step": 51250 }, { "epoch": 1.5159401431359791, "grad_norm": 0.30471181869506836, "learning_rate": 3.6344930635246696e-06, "loss": 0.0642, "step": 51260 }, { "epoch": 1.516235878630153, "grad_norm": 1.346763014793396, "learning_rate": 3.63436637360573e-06, "loss": 0.0937, "step": 51270 }, { "epoch": 1.5165316141243272, "grad_norm": 0.8242560625076294, "learning_rate": 3.6342396836867908e-06, "loss": 0.0909, "step": 51280 }, { "epoch": 1.5168273496185012, "grad_norm": 0.4806423783302307, "learning_rate": 3.634112993767851e-06, "loss": 0.0745, "step": 51290 }, { "epoch": 1.5171230851126754, "grad_norm": 1.068050503730774, "learning_rate": 3.633986303848912e-06, "loss": 0.0826, "step": 51300 }, { "epoch": 1.5174188206068493, "grad_norm": 0.7700453996658325, "learning_rate": 3.6338596139299723e-06, "loss": 0.0664, "step": 51310 }, { "epoch": 1.5177145561010232, "grad_norm": 1.1303073167800903, "learning_rate": 3.6337329240110335e-06, "loss": 0.091, "step": 51320 }, { "epoch": 1.5180102915951972, "grad_norm": 0.8590166568756104, "learning_rate": 3.633606234092094e-06, "loss": 0.0771, "step": 51330 }, { "epoch": 1.5183060270893711, "grad_norm": 1.2683428525924683, "learning_rate": 3.6334795441731546e-06, "loss": 0.0864, "step": 51340 }, { "epoch": 1.5186017625835453, "grad_norm": 1.4685771465301514, "learning_rate": 3.633352854254215e-06, "loss": 0.0992, "step": 51350 }, { "epoch": 1.5188974980777192, "grad_norm": 0.5518445372581482, "learning_rate": 3.633226164335276e-06, "loss": 0.0815, "step": 51360 }, { "epoch": 1.5191932335718934, "grad_norm": 0.7876520752906799, "learning_rate": 3.633099474416336e-06, "loss": 0.091, "step": 51370 }, { "epoch": 1.5194889690660673, "grad_norm": 0.4537588655948639, "learning_rate": 3.632972784497397e-06, "loss": 0.0916, "step": 51380 }, { "epoch": 1.5197847045602413, "grad_norm": 0.9511486887931824, "learning_rate": 3.6328460945784573e-06, "loss": 0.085, "step": 51390 }, { "epoch": 1.5200804400544152, "grad_norm": 0.6469376683235168, "learning_rate": 3.6327194046595185e-06, "loss": 0.0731, "step": 51400 }, { "epoch": 1.5203761755485894, "grad_norm": 0.6527405381202698, "learning_rate": 3.632592714740579e-06, "loss": 0.0906, "step": 51410 }, { "epoch": 1.5206719110427633, "grad_norm": 0.7816893458366394, "learning_rate": 3.6324660248216393e-06, "loss": 0.0893, "step": 51420 }, { "epoch": 1.5209676465369375, "grad_norm": 0.635955810546875, "learning_rate": 3.6323393349027e-06, "loss": 0.081, "step": 51430 }, { "epoch": 1.5212633820311114, "grad_norm": 0.8777666091918945, "learning_rate": 3.6322126449837604e-06, "loss": 0.0854, "step": 51440 }, { "epoch": 1.5215591175252854, "grad_norm": 1.2867337465286255, "learning_rate": 3.6320859550648212e-06, "loss": 0.0735, "step": 51450 }, { "epoch": 1.5218548530194593, "grad_norm": 0.9962736368179321, "learning_rate": 3.6319592651458816e-06, "loss": 0.0705, "step": 51460 }, { "epoch": 1.5221505885136333, "grad_norm": 0.7987475991249084, "learning_rate": 3.6318325752269424e-06, "loss": 0.0837, "step": 51470 }, { "epoch": 1.5224463240078074, "grad_norm": 0.8722137808799744, "learning_rate": 3.631705885308003e-06, "loss": 0.0894, "step": 51480 }, { "epoch": 1.5227420595019814, "grad_norm": 0.8075509071350098, "learning_rate": 3.631579195389064e-06, "loss": 0.079, "step": 51490 }, { "epoch": 1.5230377949961555, "grad_norm": 0.8840047121047974, "learning_rate": 3.6314525054701243e-06, "loss": 0.081, "step": 51500 }, { "epoch": 1.5233335304903295, "grad_norm": 0.7281421422958374, "learning_rate": 3.631325815551185e-06, "loss": 0.0596, "step": 51510 }, { "epoch": 1.5236292659845034, "grad_norm": 0.6975935697555542, "learning_rate": 3.6311991256322455e-06, "loss": 0.0865, "step": 51520 }, { "epoch": 1.5239250014786774, "grad_norm": 0.7579208016395569, "learning_rate": 3.6310724357133063e-06, "loss": 0.0895, "step": 51530 }, { "epoch": 1.5242207369728513, "grad_norm": 1.1258293390274048, "learning_rate": 3.6309457457943666e-06, "loss": 0.0928, "step": 51540 }, { "epoch": 1.5245164724670255, "grad_norm": 0.6222377419471741, "learning_rate": 3.6308190558754274e-06, "loss": 0.0729, "step": 51550 }, { "epoch": 1.5248122079611997, "grad_norm": 1.334004521369934, "learning_rate": 3.630692365956488e-06, "loss": 0.0695, "step": 51560 }, { "epoch": 1.5251079434553736, "grad_norm": 0.8168115615844727, "learning_rate": 3.630565676037549e-06, "loss": 0.0824, "step": 51570 }, { "epoch": 1.5254036789495475, "grad_norm": 1.0836267471313477, "learning_rate": 3.6304389861186094e-06, "loss": 0.1035, "step": 51580 }, { "epoch": 1.5256994144437215, "grad_norm": 0.5185701847076416, "learning_rate": 3.63031229619967e-06, "loss": 0.0808, "step": 51590 }, { "epoch": 1.5259951499378954, "grad_norm": 0.9219051003456116, "learning_rate": 3.6301856062807305e-06, "loss": 0.0736, "step": 51600 }, { "epoch": 1.5262908854320696, "grad_norm": 1.1901862621307373, "learning_rate": 3.6300589163617913e-06, "loss": 0.0846, "step": 51610 }, { "epoch": 1.5265866209262435, "grad_norm": 0.8014853596687317, "learning_rate": 3.6299322264428517e-06, "loss": 0.0839, "step": 51620 }, { "epoch": 1.5268823564204177, "grad_norm": 0.8124697208404541, "learning_rate": 3.6298055365239125e-06, "loss": 0.0883, "step": 51630 }, { "epoch": 1.5271780919145916, "grad_norm": 0.7837971448898315, "learning_rate": 3.6296788466049732e-06, "loss": 0.0882, "step": 51640 }, { "epoch": 1.5274738274087656, "grad_norm": 0.9462758302688599, "learning_rate": 3.629552156686034e-06, "loss": 0.0937, "step": 51650 }, { "epoch": 1.5277695629029395, "grad_norm": 0.5754828453063965, "learning_rate": 3.6294254667670944e-06, "loss": 0.0711, "step": 51660 }, { "epoch": 1.5280652983971135, "grad_norm": 2.147073268890381, "learning_rate": 3.629298776848155e-06, "loss": 0.1094, "step": 51670 }, { "epoch": 1.5283610338912876, "grad_norm": 0.794377863407135, "learning_rate": 3.6291720869292156e-06, "loss": 0.1039, "step": 51680 }, { "epoch": 1.5286567693854618, "grad_norm": 0.9541426301002502, "learning_rate": 3.6290453970102763e-06, "loss": 0.0801, "step": 51690 }, { "epoch": 1.5289525048796357, "grad_norm": 0.46159452199935913, "learning_rate": 3.6289187070913367e-06, "loss": 0.0797, "step": 51700 }, { "epoch": 1.5292482403738097, "grad_norm": 0.4762428104877472, "learning_rate": 3.6287920171723975e-06, "loss": 0.059, "step": 51710 }, { "epoch": 1.5295439758679836, "grad_norm": 1.0962475538253784, "learning_rate": 3.6286653272534583e-06, "loss": 0.0771, "step": 51720 }, { "epoch": 1.5298397113621576, "grad_norm": 1.1660113334655762, "learning_rate": 3.628538637334519e-06, "loss": 0.109, "step": 51730 }, { "epoch": 1.5301354468563317, "grad_norm": 0.8575220704078674, "learning_rate": 3.6284119474155794e-06, "loss": 0.0834, "step": 51740 }, { "epoch": 1.5304311823505057, "grad_norm": 0.9211084246635437, "learning_rate": 3.6282852574966402e-06, "loss": 0.0961, "step": 51750 }, { "epoch": 1.5307269178446798, "grad_norm": 0.8134245872497559, "learning_rate": 3.6281585675777006e-06, "loss": 0.0855, "step": 51760 }, { "epoch": 1.5310226533388538, "grad_norm": 0.8499832153320312, "learning_rate": 3.6280318776587614e-06, "loss": 0.098, "step": 51770 }, { "epoch": 1.5313183888330277, "grad_norm": 0.7184646725654602, "learning_rate": 3.6279051877398218e-06, "loss": 0.0947, "step": 51780 }, { "epoch": 1.5316141243272017, "grad_norm": 3.9044182300567627, "learning_rate": 3.6277784978208825e-06, "loss": 0.1028, "step": 51790 }, { "epoch": 1.5319098598213756, "grad_norm": 1.2749689817428589, "learning_rate": 3.6276518079019433e-06, "loss": 0.0782, "step": 51800 }, { "epoch": 1.5322055953155498, "grad_norm": 0.5845704674720764, "learning_rate": 3.627525117983004e-06, "loss": 0.0632, "step": 51810 }, { "epoch": 1.532501330809724, "grad_norm": 0.5150631070137024, "learning_rate": 3.6273984280640645e-06, "loss": 0.0826, "step": 51820 }, { "epoch": 1.532797066303898, "grad_norm": 0.8515899181365967, "learning_rate": 3.627271738145125e-06, "loss": 0.0815, "step": 51830 }, { "epoch": 1.5330928017980718, "grad_norm": 0.5474453568458557, "learning_rate": 3.6271450482261857e-06, "loss": 0.077, "step": 51840 }, { "epoch": 1.5333885372922458, "grad_norm": 1.0670030117034912, "learning_rate": 3.627018358307246e-06, "loss": 0.0885, "step": 51850 }, { "epoch": 1.5336842727864197, "grad_norm": 1.0375841856002808, "learning_rate": 3.626891668388307e-06, "loss": 0.0748, "step": 51860 }, { "epoch": 1.5339800082805939, "grad_norm": 1.0285141468048096, "learning_rate": 3.626764978469367e-06, "loss": 0.0838, "step": 51870 }, { "epoch": 1.5342757437747678, "grad_norm": 0.716825544834137, "learning_rate": 3.6266382885504284e-06, "loss": 0.0886, "step": 51880 }, { "epoch": 1.534571479268942, "grad_norm": 0.6341862678527832, "learning_rate": 3.6265115986314888e-06, "loss": 0.0829, "step": 51890 }, { "epoch": 1.534867214763116, "grad_norm": 0.7056092619895935, "learning_rate": 3.6263849087125495e-06, "loss": 0.0721, "step": 51900 }, { "epoch": 1.5351629502572899, "grad_norm": 0.5864280462265015, "learning_rate": 3.62625821879361e-06, "loss": 0.0566, "step": 51910 }, { "epoch": 1.5354586857514638, "grad_norm": 0.9861656427383423, "learning_rate": 3.6261315288746707e-06, "loss": 0.0908, "step": 51920 }, { "epoch": 1.5357544212456378, "grad_norm": 1.0241531133651733, "learning_rate": 3.626004838955731e-06, "loss": 0.0852, "step": 51930 }, { "epoch": 1.536050156739812, "grad_norm": 1.2270216941833496, "learning_rate": 3.625878149036792e-06, "loss": 0.1033, "step": 51940 }, { "epoch": 1.5363458922339859, "grad_norm": 1.2500498294830322, "learning_rate": 3.6257514591178522e-06, "loss": 0.0938, "step": 51950 }, { "epoch": 1.53664162772816, "grad_norm": 1.0997073650360107, "learning_rate": 3.6256247691989134e-06, "loss": 0.0691, "step": 51960 }, { "epoch": 1.536937363222334, "grad_norm": 1.7847079038619995, "learning_rate": 3.625498079279974e-06, "loss": 0.1004, "step": 51970 }, { "epoch": 1.537233098716508, "grad_norm": 0.7789561152458191, "learning_rate": 3.6253713893610346e-06, "loss": 0.0804, "step": 51980 }, { "epoch": 1.5375288342106819, "grad_norm": 0.7012112736701965, "learning_rate": 3.625244699442095e-06, "loss": 0.0817, "step": 51990 }, { "epoch": 1.5378245697048558, "grad_norm": 0.6878781914710999, "learning_rate": 3.6251180095231557e-06, "loss": 0.0655, "step": 52000 }, { "epoch": 1.53812030519903, "grad_norm": 0.44702425599098206, "learning_rate": 3.624991319604216e-06, "loss": 0.0731, "step": 52010 }, { "epoch": 1.5384160406932041, "grad_norm": 1.2077155113220215, "learning_rate": 3.624864629685277e-06, "loss": 0.0943, "step": 52020 }, { "epoch": 1.538711776187378, "grad_norm": 1.196099877357483, "learning_rate": 3.6247379397663373e-06, "loss": 0.0804, "step": 52030 }, { "epoch": 1.539007511681552, "grad_norm": 0.7679767608642578, "learning_rate": 3.6246112498473985e-06, "loss": 0.0844, "step": 52040 }, { "epoch": 1.539303247175726, "grad_norm": 0.5960180759429932, "learning_rate": 3.624484559928459e-06, "loss": 0.0883, "step": 52050 }, { "epoch": 1.5395989826699, "grad_norm": 0.6086120009422302, "learning_rate": 3.6243578700095196e-06, "loss": 0.0784, "step": 52060 }, { "epoch": 1.539894718164074, "grad_norm": 0.7873321771621704, "learning_rate": 3.62423118009058e-06, "loss": 0.085, "step": 52070 }, { "epoch": 1.540190453658248, "grad_norm": 0.8044849634170532, "learning_rate": 3.6241044901716408e-06, "loss": 0.0933, "step": 52080 }, { "epoch": 1.5404861891524222, "grad_norm": 0.7799475193023682, "learning_rate": 3.623977800252701e-06, "loss": 0.0758, "step": 52090 }, { "epoch": 1.5407819246465961, "grad_norm": 0.5358714461326599, "learning_rate": 3.623851110333762e-06, "loss": 0.0826, "step": 52100 }, { "epoch": 1.54107766014077, "grad_norm": 0.4780406653881073, "learning_rate": 3.6237244204148223e-06, "loss": 0.0579, "step": 52110 }, { "epoch": 1.541373395634944, "grad_norm": 0.6044159531593323, "learning_rate": 3.6235977304958835e-06, "loss": 0.0846, "step": 52120 }, { "epoch": 1.541669131129118, "grad_norm": 1.130300760269165, "learning_rate": 3.623471040576944e-06, "loss": 0.0888, "step": 52130 }, { "epoch": 1.5419648666232921, "grad_norm": 0.9681478142738342, "learning_rate": 3.6233443506580047e-06, "loss": 0.083, "step": 52140 }, { "epoch": 1.5422606021174663, "grad_norm": 0.8952561616897583, "learning_rate": 3.623217660739065e-06, "loss": 0.0887, "step": 52150 }, { "epoch": 1.5425563376116402, "grad_norm": 0.5303727984428406, "learning_rate": 3.623090970820126e-06, "loss": 0.0843, "step": 52160 }, { "epoch": 1.5428520731058142, "grad_norm": 0.9098249077796936, "learning_rate": 3.622964280901186e-06, "loss": 0.095, "step": 52170 }, { "epoch": 1.5431478085999881, "grad_norm": 0.5332533717155457, "learning_rate": 3.622837590982247e-06, "loss": 0.0848, "step": 52180 }, { "epoch": 1.543443544094162, "grad_norm": 0.9589129090309143, "learning_rate": 3.6227109010633074e-06, "loss": 0.0852, "step": 52190 }, { "epoch": 1.5437392795883362, "grad_norm": 0.7211261987686157, "learning_rate": 3.6225842111443686e-06, "loss": 0.0856, "step": 52200 }, { "epoch": 1.5440350150825102, "grad_norm": 1.1879363059997559, "learning_rate": 3.622457521225429e-06, "loss": 0.0846, "step": 52210 }, { "epoch": 1.5443307505766843, "grad_norm": 0.8616844415664673, "learning_rate": 3.6223308313064897e-06, "loss": 0.0695, "step": 52220 }, { "epoch": 1.5446264860708583, "grad_norm": 1.2145044803619385, "learning_rate": 3.62220414138755e-06, "loss": 0.0791, "step": 52230 }, { "epoch": 1.5449222215650322, "grad_norm": 1.1976085901260376, "learning_rate": 3.6220774514686105e-06, "loss": 0.0959, "step": 52240 }, { "epoch": 1.5452179570592062, "grad_norm": 1.069471001625061, "learning_rate": 3.6219507615496712e-06, "loss": 0.0846, "step": 52250 }, { "epoch": 1.5455136925533801, "grad_norm": 0.8563931584358215, "learning_rate": 3.6218240716307316e-06, "loss": 0.0705, "step": 52260 }, { "epoch": 1.5458094280475543, "grad_norm": 0.8011598587036133, "learning_rate": 3.6216973817117924e-06, "loss": 0.0808, "step": 52270 }, { "epoch": 1.5461051635417284, "grad_norm": 1.0842905044555664, "learning_rate": 3.621570691792853e-06, "loss": 0.0996, "step": 52280 }, { "epoch": 1.5464008990359024, "grad_norm": 0.8198730945587158, "learning_rate": 3.621444001873914e-06, "loss": 0.0637, "step": 52290 }, { "epoch": 1.5466966345300763, "grad_norm": 1.0912675857543945, "learning_rate": 3.6213173119549743e-06, "loss": 0.0851, "step": 52300 }, { "epoch": 1.5469923700242503, "grad_norm": 1.438689947128296, "learning_rate": 3.621190622036035e-06, "loss": 0.0853, "step": 52310 }, { "epoch": 1.5472881055184242, "grad_norm": 0.9999144673347473, "learning_rate": 3.6210639321170955e-06, "loss": 0.1142, "step": 52320 }, { "epoch": 1.5475838410125984, "grad_norm": 0.9395305514335632, "learning_rate": 3.6209372421981563e-06, "loss": 0.0833, "step": 52330 }, { "epoch": 1.5478795765067723, "grad_norm": 0.6252694725990295, "learning_rate": 3.6208105522792167e-06, "loss": 0.0927, "step": 52340 }, { "epoch": 1.5481753120009465, "grad_norm": 0.9662117958068848, "learning_rate": 3.6206838623602774e-06, "loss": 0.0775, "step": 52350 }, { "epoch": 1.5484710474951204, "grad_norm": 0.48595789074897766, "learning_rate": 3.6205571724413382e-06, "loss": 0.0572, "step": 52360 }, { "epoch": 1.5487667829892944, "grad_norm": 1.4308916330337524, "learning_rate": 3.620430482522399e-06, "loss": 0.0974, "step": 52370 }, { "epoch": 1.5490625184834683, "grad_norm": 0.5606576204299927, "learning_rate": 3.6203037926034594e-06, "loss": 0.1, "step": 52380 }, { "epoch": 1.5493582539776423, "grad_norm": 0.6820563673973083, "learning_rate": 3.62017710268452e-06, "loss": 0.0896, "step": 52390 }, { "epoch": 1.5496539894718164, "grad_norm": 0.9070613980293274, "learning_rate": 3.6200504127655805e-06, "loss": 0.0929, "step": 52400 }, { "epoch": 1.5499497249659904, "grad_norm": 0.7922313809394836, "learning_rate": 3.6199237228466413e-06, "loss": 0.0613, "step": 52410 }, { "epoch": 1.5502454604601645, "grad_norm": 2.8169796466827393, "learning_rate": 3.6197970329277017e-06, "loss": 0.0887, "step": 52420 }, { "epoch": 1.5505411959543385, "grad_norm": 0.981849730014801, "learning_rate": 3.6196703430087625e-06, "loss": 0.1035, "step": 52430 }, { "epoch": 1.5508369314485124, "grad_norm": 0.8191239833831787, "learning_rate": 3.6195436530898233e-06, "loss": 0.093, "step": 52440 }, { "epoch": 1.5511326669426864, "grad_norm": 1.3129085302352905, "learning_rate": 3.619416963170884e-06, "loss": 0.0911, "step": 52450 }, { "epoch": 1.5514284024368603, "grad_norm": 0.7957398295402527, "learning_rate": 3.6192902732519444e-06, "loss": 0.0579, "step": 52460 }, { "epoch": 1.5517241379310345, "grad_norm": 0.8509763479232788, "learning_rate": 3.6191635833330052e-06, "loss": 0.0832, "step": 52470 }, { "epoch": 1.5520198734252086, "grad_norm": 0.8830906748771667, "learning_rate": 3.6190368934140656e-06, "loss": 0.0757, "step": 52480 }, { "epoch": 1.5523156089193826, "grad_norm": 0.5484997034072876, "learning_rate": 3.6189102034951264e-06, "loss": 0.0658, "step": 52490 }, { "epoch": 1.5526113444135565, "grad_norm": 0.8412808179855347, "learning_rate": 3.6187835135761867e-06, "loss": 0.0721, "step": 52500 }, { "epoch": 1.5529070799077305, "grad_norm": 0.4792225956916809, "learning_rate": 3.6186568236572475e-06, "loss": 0.073, "step": 52510 }, { "epoch": 1.5532028154019044, "grad_norm": 0.8111521005630493, "learning_rate": 3.6185301337383083e-06, "loss": 0.0922, "step": 52520 }, { "epoch": 1.5534985508960786, "grad_norm": 0.9430521726608276, "learning_rate": 3.618403443819369e-06, "loss": 0.0957, "step": 52530 }, { "epoch": 1.5537942863902525, "grad_norm": 0.48865216970443726, "learning_rate": 3.6182767539004295e-06, "loss": 0.0702, "step": 52540 }, { "epoch": 1.5540900218844267, "grad_norm": 0.6832804083824158, "learning_rate": 3.6181500639814903e-06, "loss": 0.0842, "step": 52550 }, { "epoch": 1.5543857573786006, "grad_norm": 1.069827675819397, "learning_rate": 3.6180233740625506e-06, "loss": 0.0738, "step": 52560 }, { "epoch": 1.5546814928727746, "grad_norm": 0.652382493019104, "learning_rate": 3.6178966841436114e-06, "loss": 0.074, "step": 52570 }, { "epoch": 1.5549772283669485, "grad_norm": 1.349450945854187, "learning_rate": 3.617769994224672e-06, "loss": 0.0912, "step": 52580 }, { "epoch": 1.5552729638611225, "grad_norm": 0.6780678629875183, "learning_rate": 3.6176433043057326e-06, "loss": 0.0882, "step": 52590 }, { "epoch": 1.5555686993552966, "grad_norm": 0.7704794406890869, "learning_rate": 3.6175166143867934e-06, "loss": 0.0827, "step": 52600 }, { "epoch": 1.5558644348494708, "grad_norm": 1.5132882595062256, "learning_rate": 3.617389924467854e-06, "loss": 0.0695, "step": 52610 }, { "epoch": 1.5561601703436447, "grad_norm": 0.8011022806167603, "learning_rate": 3.6172632345489145e-06, "loss": 0.0712, "step": 52620 }, { "epoch": 1.5564559058378187, "grad_norm": 1.0618516206741333, "learning_rate": 3.6171365446299753e-06, "loss": 0.0872, "step": 52630 }, { "epoch": 1.5567516413319926, "grad_norm": 1.0816465616226196, "learning_rate": 3.6170098547110357e-06, "loss": 0.0873, "step": 52640 }, { "epoch": 1.5570473768261666, "grad_norm": 0.6756705045700073, "learning_rate": 3.616883164792096e-06, "loss": 0.0801, "step": 52650 }, { "epoch": 1.5573431123203407, "grad_norm": 0.6663536429405212, "learning_rate": 3.616756474873157e-06, "loss": 0.0802, "step": 52660 }, { "epoch": 1.5576388478145147, "grad_norm": 1.2020514011383057, "learning_rate": 3.616629784954217e-06, "loss": 0.0845, "step": 52670 }, { "epoch": 1.5579345833086888, "grad_norm": 0.7767703533172607, "learning_rate": 3.6165030950352784e-06, "loss": 0.0798, "step": 52680 }, { "epoch": 1.5582303188028628, "grad_norm": 0.4642431139945984, "learning_rate": 3.6163764051163388e-06, "loss": 0.0891, "step": 52690 }, { "epoch": 1.5585260542970367, "grad_norm": 0.8272549510002136, "learning_rate": 3.6162497151973996e-06, "loss": 0.1112, "step": 52700 }, { "epoch": 1.5588217897912107, "grad_norm": 0.7296743988990784, "learning_rate": 3.61612302527846e-06, "loss": 0.0803, "step": 52710 }, { "epoch": 1.5591175252853846, "grad_norm": 0.8272987008094788, "learning_rate": 3.6159963353595207e-06, "loss": 0.08, "step": 52720 }, { "epoch": 1.5594132607795588, "grad_norm": 1.1481326818466187, "learning_rate": 3.615869645440581e-06, "loss": 0.0888, "step": 52730 }, { "epoch": 1.559708996273733, "grad_norm": 1.0207306146621704, "learning_rate": 3.615742955521642e-06, "loss": 0.0834, "step": 52740 }, { "epoch": 1.5600047317679069, "grad_norm": 1.0152966976165771, "learning_rate": 3.6156162656027022e-06, "loss": 0.08, "step": 52750 }, { "epoch": 1.5603004672620808, "grad_norm": 0.9411894083023071, "learning_rate": 3.6154895756837635e-06, "loss": 0.0744, "step": 52760 }, { "epoch": 1.5605962027562548, "grad_norm": 0.8778804540634155, "learning_rate": 3.615362885764824e-06, "loss": 0.0779, "step": 52770 }, { "epoch": 1.5608919382504287, "grad_norm": 1.3346039056777954, "learning_rate": 3.6152361958458846e-06, "loss": 0.0877, "step": 52780 }, { "epoch": 1.5611876737446029, "grad_norm": 1.0823849439620972, "learning_rate": 3.615109505926945e-06, "loss": 0.0918, "step": 52790 }, { "epoch": 1.5614834092387768, "grad_norm": 0.8478049635887146, "learning_rate": 3.6149828160080058e-06, "loss": 0.0829, "step": 52800 }, { "epoch": 1.561779144732951, "grad_norm": 0.5518606901168823, "learning_rate": 3.614856126089066e-06, "loss": 0.065, "step": 52810 }, { "epoch": 1.562074880227125, "grad_norm": 0.551764190196991, "learning_rate": 3.614729436170127e-06, "loss": 0.1005, "step": 52820 }, { "epoch": 1.5623706157212989, "grad_norm": 0.8639764785766602, "learning_rate": 3.6146027462511873e-06, "loss": 0.1093, "step": 52830 }, { "epoch": 1.5626663512154728, "grad_norm": 1.2477372884750366, "learning_rate": 3.6144760563322485e-06, "loss": 0.0808, "step": 52840 }, { "epoch": 1.5629620867096468, "grad_norm": 0.79936683177948, "learning_rate": 3.614349366413309e-06, "loss": 0.0815, "step": 52850 }, { "epoch": 1.563257822203821, "grad_norm": 0.6754801869392395, "learning_rate": 3.6142226764943697e-06, "loss": 0.0564, "step": 52860 }, { "epoch": 1.5635535576979949, "grad_norm": 1.0226268768310547, "learning_rate": 3.61409598657543e-06, "loss": 0.097, "step": 52870 }, { "epoch": 1.563849293192169, "grad_norm": 1.1674222946166992, "learning_rate": 3.613969296656491e-06, "loss": 0.0826, "step": 52880 }, { "epoch": 1.564145028686343, "grad_norm": 0.43631160259246826, "learning_rate": 3.613842606737551e-06, "loss": 0.069, "step": 52890 }, { "epoch": 1.564440764180517, "grad_norm": 0.7842617630958557, "learning_rate": 3.613715916818612e-06, "loss": 0.0976, "step": 52900 }, { "epoch": 1.5647364996746909, "grad_norm": 1.0745209455490112, "learning_rate": 3.6135892268996723e-06, "loss": 0.072, "step": 52910 }, { "epoch": 1.5650322351688648, "grad_norm": 0.99623703956604, "learning_rate": 3.6134625369807336e-06, "loss": 0.0729, "step": 52920 }, { "epoch": 1.565327970663039, "grad_norm": 0.9477986693382263, "learning_rate": 3.613335847061794e-06, "loss": 0.0791, "step": 52930 }, { "epoch": 1.5656237061572131, "grad_norm": 0.956149697303772, "learning_rate": 3.6132091571428547e-06, "loss": 0.0959, "step": 52940 }, { "epoch": 1.565919441651387, "grad_norm": 1.0357986688613892, "learning_rate": 3.613082467223915e-06, "loss": 0.0782, "step": 52950 }, { "epoch": 1.566215177145561, "grad_norm": 0.671175479888916, "learning_rate": 3.612955777304976e-06, "loss": 0.0722, "step": 52960 }, { "epoch": 1.566510912639735, "grad_norm": 0.9767400622367859, "learning_rate": 3.6128290873860362e-06, "loss": 0.0805, "step": 52970 }, { "epoch": 1.566806648133909, "grad_norm": 1.170745849609375, "learning_rate": 3.612702397467097e-06, "loss": 0.0789, "step": 52980 }, { "epoch": 1.567102383628083, "grad_norm": 1.0455294847488403, "learning_rate": 3.6125757075481574e-06, "loss": 0.0922, "step": 52990 }, { "epoch": 1.567398119122257, "grad_norm": 0.8208442330360413, "learning_rate": 3.6124490176292186e-06, "loss": 0.097, "step": 53000 }, { "epoch": 1.5676938546164312, "grad_norm": 0.6412217020988464, "learning_rate": 3.612322327710279e-06, "loss": 0.0713, "step": 53010 }, { "epoch": 1.5679895901106051, "grad_norm": 1.1618798971176147, "learning_rate": 3.6121956377913398e-06, "loss": 0.0789, "step": 53020 }, { "epoch": 1.568285325604779, "grad_norm": 0.6355721950531006, "learning_rate": 3.6120689478724e-06, "loss": 0.0787, "step": 53030 }, { "epoch": 1.568581061098953, "grad_norm": 0.6308850049972534, "learning_rate": 3.611942257953461e-06, "loss": 0.0787, "step": 53040 }, { "epoch": 1.568876796593127, "grad_norm": 1.102344036102295, "learning_rate": 3.6118155680345213e-06, "loss": 0.0867, "step": 53050 }, { "epoch": 1.5691725320873011, "grad_norm": 0.529599666595459, "learning_rate": 3.6116888781155816e-06, "loss": 0.087, "step": 53060 }, { "epoch": 1.5694682675814753, "grad_norm": 0.8543092608451843, "learning_rate": 3.6115621881966424e-06, "loss": 0.0869, "step": 53070 }, { "epoch": 1.5697640030756492, "grad_norm": 0.3597826063632965, "learning_rate": 3.6114354982777032e-06, "loss": 0.0982, "step": 53080 }, { "epoch": 1.5700597385698232, "grad_norm": 1.0650194883346558, "learning_rate": 3.611308808358764e-06, "loss": 0.0927, "step": 53090 }, { "epoch": 1.5703554740639971, "grad_norm": 0.8046293258666992, "learning_rate": 3.6111821184398244e-06, "loss": 0.073, "step": 53100 }, { "epoch": 1.570651209558171, "grad_norm": 0.6270223259925842, "learning_rate": 3.611055428520885e-06, "loss": 0.0729, "step": 53110 }, { "epoch": 1.5709469450523452, "grad_norm": 0.6881964206695557, "learning_rate": 3.6109287386019455e-06, "loss": 0.1104, "step": 53120 }, { "epoch": 1.5712426805465192, "grad_norm": 0.6677550673484802, "learning_rate": 3.6108020486830063e-06, "loss": 0.0915, "step": 53130 }, { "epoch": 1.5715384160406933, "grad_norm": 0.6765766143798828, "learning_rate": 3.6106753587640667e-06, "loss": 0.0842, "step": 53140 }, { "epoch": 1.5718341515348673, "grad_norm": 0.5174915790557861, "learning_rate": 3.6105486688451275e-06, "loss": 0.089, "step": 53150 }, { "epoch": 1.5721298870290412, "grad_norm": 1.0636119842529297, "learning_rate": 3.6104219789261883e-06, "loss": 0.0726, "step": 53160 }, { "epoch": 1.5724256225232152, "grad_norm": 1.2245392799377441, "learning_rate": 3.610295289007249e-06, "loss": 0.0778, "step": 53170 }, { "epoch": 1.572721358017389, "grad_norm": 1.3196566104888916, "learning_rate": 3.6101685990883094e-06, "loss": 0.0987, "step": 53180 }, { "epoch": 1.5730170935115633, "grad_norm": 1.37611722946167, "learning_rate": 3.61004190916937e-06, "loss": 0.1021, "step": 53190 }, { "epoch": 1.5733128290057374, "grad_norm": 0.9688277244567871, "learning_rate": 3.6099152192504306e-06, "loss": 0.0727, "step": 53200 }, { "epoch": 1.5736085644999114, "grad_norm": 0.9761370420455933, "learning_rate": 3.6097885293314914e-06, "loss": 0.0798, "step": 53210 }, { "epoch": 1.5739042999940853, "grad_norm": 0.6742059588432312, "learning_rate": 3.6096618394125517e-06, "loss": 0.0812, "step": 53220 }, { "epoch": 1.5742000354882593, "grad_norm": 0.783903181552887, "learning_rate": 3.6095351494936125e-06, "loss": 0.0889, "step": 53230 }, { "epoch": 1.5744957709824332, "grad_norm": 1.0042872428894043, "learning_rate": 3.6094084595746733e-06, "loss": 0.0943, "step": 53240 }, { "epoch": 1.5747915064766074, "grad_norm": 0.7912810444831848, "learning_rate": 3.609281769655734e-06, "loss": 0.0888, "step": 53250 }, { "epoch": 1.5750872419707813, "grad_norm": 0.7305676937103271, "learning_rate": 3.6091550797367945e-06, "loss": 0.07, "step": 53260 }, { "epoch": 1.5753829774649555, "grad_norm": 0.6685335040092468, "learning_rate": 3.6090283898178553e-06, "loss": 0.0932, "step": 53270 }, { "epoch": 1.5756787129591294, "grad_norm": 0.8397771716117859, "learning_rate": 3.6089016998989156e-06, "loss": 0.0898, "step": 53280 }, { "epoch": 1.5759744484533034, "grad_norm": 1.0936119556427002, "learning_rate": 3.6087750099799764e-06, "loss": 0.0833, "step": 53290 }, { "epoch": 1.5762701839474773, "grad_norm": 0.7117913365364075, "learning_rate": 3.6086483200610368e-06, "loss": 0.0877, "step": 53300 }, { "epoch": 1.5765659194416513, "grad_norm": 0.5115485787391663, "learning_rate": 3.6085216301420976e-06, "loss": 0.0687, "step": 53310 }, { "epoch": 1.5768616549358254, "grad_norm": 1.0143649578094482, "learning_rate": 3.6083949402231584e-06, "loss": 0.0881, "step": 53320 }, { "epoch": 1.5771573904299994, "grad_norm": 0.9477642774581909, "learning_rate": 3.608268250304219e-06, "loss": 0.0933, "step": 53330 }, { "epoch": 1.5774531259241735, "grad_norm": 0.7979260683059692, "learning_rate": 3.6081415603852795e-06, "loss": 0.0806, "step": 53340 }, { "epoch": 1.5777488614183475, "grad_norm": 0.5979206562042236, "learning_rate": 3.6080148704663403e-06, "loss": 0.0803, "step": 53350 }, { "epoch": 1.5780445969125214, "grad_norm": 1.239837646484375, "learning_rate": 3.6078881805474007e-06, "loss": 0.0745, "step": 53360 }, { "epoch": 1.5783403324066954, "grad_norm": 0.657986581325531, "learning_rate": 3.6077614906284615e-06, "loss": 0.0819, "step": 53370 }, { "epoch": 1.5786360679008693, "grad_norm": 2.050182342529297, "learning_rate": 3.607634800709522e-06, "loss": 0.1005, "step": 53380 }, { "epoch": 1.5789318033950435, "grad_norm": 0.6481121182441711, "learning_rate": 3.6075081107905826e-06, "loss": 0.0815, "step": 53390 }, { "epoch": 1.5792275388892176, "grad_norm": 1.2836521863937378, "learning_rate": 3.6073814208716434e-06, "loss": 0.0736, "step": 53400 }, { "epoch": 1.5795232743833916, "grad_norm": 0.8223262429237366, "learning_rate": 3.607254730952704e-06, "loss": 0.0871, "step": 53410 }, { "epoch": 1.5798190098775655, "grad_norm": 0.6901447772979736, "learning_rate": 3.6071280410337646e-06, "loss": 0.0903, "step": 53420 }, { "epoch": 1.5801147453717395, "grad_norm": 1.013472318649292, "learning_rate": 3.6070013511148253e-06, "loss": 0.0939, "step": 53430 }, { "epoch": 1.5804104808659134, "grad_norm": 0.910433828830719, "learning_rate": 3.6068746611958857e-06, "loss": 0.0938, "step": 53440 }, { "epoch": 1.5807062163600876, "grad_norm": 0.6548424363136292, "learning_rate": 3.6067479712769465e-06, "loss": 0.0718, "step": 53450 }, { "epoch": 1.5810019518542615, "grad_norm": 0.6552696228027344, "learning_rate": 3.606621281358007e-06, "loss": 0.0672, "step": 53460 }, { "epoch": 1.5812976873484357, "grad_norm": 1.1227562427520752, "learning_rate": 3.6064945914390677e-06, "loss": 0.0764, "step": 53470 }, { "epoch": 1.5815934228426096, "grad_norm": 0.866042971611023, "learning_rate": 3.6063679015201284e-06, "loss": 0.0929, "step": 53480 }, { "epoch": 1.5818891583367836, "grad_norm": 1.431174635887146, "learning_rate": 3.606241211601189e-06, "loss": 0.0858, "step": 53490 }, { "epoch": 1.5821848938309575, "grad_norm": 0.8189655542373657, "learning_rate": 3.6061145216822496e-06, "loss": 0.0874, "step": 53500 }, { "epoch": 1.5824806293251314, "grad_norm": 0.5378872156143188, "learning_rate": 3.60598783176331e-06, "loss": 0.0654, "step": 53510 }, { "epoch": 1.5827763648193056, "grad_norm": 1.0818079710006714, "learning_rate": 3.6058611418443708e-06, "loss": 0.0779, "step": 53520 }, { "epoch": 1.5830721003134798, "grad_norm": 1.0240026712417603, "learning_rate": 3.605734451925431e-06, "loss": 0.1119, "step": 53530 }, { "epoch": 1.5833678358076537, "grad_norm": 0.8494312167167664, "learning_rate": 3.605607762006492e-06, "loss": 0.0845, "step": 53540 }, { "epoch": 1.5836635713018277, "grad_norm": 0.7961553335189819, "learning_rate": 3.6054810720875523e-06, "loss": 0.0756, "step": 53550 }, { "epoch": 1.5839593067960016, "grad_norm": 0.8880236744880676, "learning_rate": 3.6053543821686135e-06, "loss": 0.081, "step": 53560 }, { "epoch": 1.5842550422901756, "grad_norm": 1.0634267330169678, "learning_rate": 3.605227692249674e-06, "loss": 0.0884, "step": 53570 }, { "epoch": 1.5845507777843497, "grad_norm": 0.8347643613815308, "learning_rate": 3.6051010023307346e-06, "loss": 0.1034, "step": 53580 }, { "epoch": 1.5848465132785237, "grad_norm": 1.0335025787353516, "learning_rate": 3.604974312411795e-06, "loss": 0.089, "step": 53590 }, { "epoch": 1.5851422487726978, "grad_norm": 0.9329718351364136, "learning_rate": 3.604847622492856e-06, "loss": 0.0758, "step": 53600 }, { "epoch": 1.5854379842668718, "grad_norm": 0.9570953249931335, "learning_rate": 3.604720932573916e-06, "loss": 0.0907, "step": 53610 }, { "epoch": 1.5857337197610457, "grad_norm": 1.461281180381775, "learning_rate": 3.604594242654977e-06, "loss": 0.0928, "step": 53620 }, { "epoch": 1.5860294552552197, "grad_norm": 0.6958686709403992, "learning_rate": 3.6044675527360373e-06, "loss": 0.0842, "step": 53630 }, { "epoch": 1.5863251907493936, "grad_norm": 0.7955420613288879, "learning_rate": 3.6043408628170985e-06, "loss": 0.0859, "step": 53640 }, { "epoch": 1.5866209262435678, "grad_norm": 0.847488522529602, "learning_rate": 3.604214172898159e-06, "loss": 0.0856, "step": 53650 }, { "epoch": 1.586916661737742, "grad_norm": 0.8591592311859131, "learning_rate": 3.6040874829792197e-06, "loss": 0.0765, "step": 53660 }, { "epoch": 1.5872123972319159, "grad_norm": 0.7896518707275391, "learning_rate": 3.60396079306028e-06, "loss": 0.0845, "step": 53670 }, { "epoch": 1.5875081327260898, "grad_norm": 1.1381670236587524, "learning_rate": 3.603834103141341e-06, "loss": 0.0912, "step": 53680 }, { "epoch": 1.5878038682202638, "grad_norm": 0.8510891795158386, "learning_rate": 3.6037074132224012e-06, "loss": 0.0871, "step": 53690 }, { "epoch": 1.5880996037144377, "grad_norm": 1.1367863416671753, "learning_rate": 3.603580723303462e-06, "loss": 0.0789, "step": 53700 }, { "epoch": 1.5883953392086119, "grad_norm": 0.8284741044044495, "learning_rate": 3.6034540333845224e-06, "loss": 0.0853, "step": 53710 }, { "epoch": 1.5886910747027858, "grad_norm": 0.738053560256958, "learning_rate": 3.6033273434655836e-06, "loss": 0.0865, "step": 53720 }, { "epoch": 1.58898681019696, "grad_norm": 0.9987923502922058, "learning_rate": 3.603200653546644e-06, "loss": 0.0852, "step": 53730 }, { "epoch": 1.589282545691134, "grad_norm": 1.0753073692321777, "learning_rate": 3.6030739636277047e-06, "loss": 0.0919, "step": 53740 }, { "epoch": 1.5895782811853079, "grad_norm": 0.35810136795043945, "learning_rate": 3.602947273708765e-06, "loss": 0.0856, "step": 53750 }, { "epoch": 1.5898740166794818, "grad_norm": 0.4735258221626282, "learning_rate": 3.602820583789826e-06, "loss": 0.0626, "step": 53760 }, { "epoch": 1.5901697521736557, "grad_norm": 0.9962508678436279, "learning_rate": 3.6026938938708863e-06, "loss": 0.1032, "step": 53770 }, { "epoch": 1.59046548766783, "grad_norm": 0.8544794321060181, "learning_rate": 3.602567203951947e-06, "loss": 0.0795, "step": 53780 }, { "epoch": 1.5907612231620039, "grad_norm": 0.7215203642845154, "learning_rate": 3.6024405140330074e-06, "loss": 0.0848, "step": 53790 }, { "epoch": 1.591056958656178, "grad_norm": 0.8301446437835693, "learning_rate": 3.6023138241140686e-06, "loss": 0.0781, "step": 53800 }, { "epoch": 1.591352694150352, "grad_norm": 0.8343635201454163, "learning_rate": 3.602187134195129e-06, "loss": 0.0807, "step": 53810 }, { "epoch": 1.591648429644526, "grad_norm": 0.5986179113388062, "learning_rate": 3.6020604442761898e-06, "loss": 0.0781, "step": 53820 }, { "epoch": 1.5919441651386999, "grad_norm": 0.5762543678283691, "learning_rate": 3.60193375435725e-06, "loss": 0.0863, "step": 53830 }, { "epoch": 1.5922399006328738, "grad_norm": 0.9484983086585999, "learning_rate": 3.601807064438311e-06, "loss": 0.0862, "step": 53840 }, { "epoch": 1.592535636127048, "grad_norm": 0.7608144283294678, "learning_rate": 3.6016803745193713e-06, "loss": 0.0649, "step": 53850 }, { "epoch": 1.5928313716212221, "grad_norm": 0.7608165740966797, "learning_rate": 3.601553684600432e-06, "loss": 0.0688, "step": 53860 }, { "epoch": 1.593127107115396, "grad_norm": 0.6482632160186768, "learning_rate": 3.6014269946814925e-06, "loss": 0.0819, "step": 53870 }, { "epoch": 1.59342284260957, "grad_norm": 1.2830041646957397, "learning_rate": 3.6013003047625537e-06, "loss": 0.1017, "step": 53880 }, { "epoch": 1.593718578103744, "grad_norm": 0.8524541258811951, "learning_rate": 3.601173614843614e-06, "loss": 0.1136, "step": 53890 }, { "epoch": 1.594014313597918, "grad_norm": 0.642575740814209, "learning_rate": 3.6010469249246744e-06, "loss": 0.072, "step": 53900 }, { "epoch": 1.594310049092092, "grad_norm": 0.8954830169677734, "learning_rate": 3.600920235005735e-06, "loss": 0.0791, "step": 53910 }, { "epoch": 1.594605784586266, "grad_norm": 0.9560320377349854, "learning_rate": 3.6007935450867956e-06, "loss": 0.0868, "step": 53920 }, { "epoch": 1.5949015200804402, "grad_norm": 0.6868446469306946, "learning_rate": 3.6006668551678563e-06, "loss": 0.0901, "step": 53930 }, { "epoch": 1.5951972555746141, "grad_norm": 0.7004765272140503, "learning_rate": 3.6005401652489167e-06, "loss": 0.0819, "step": 53940 }, { "epoch": 1.595492991068788, "grad_norm": 0.8468960523605347, "learning_rate": 3.6004134753299775e-06, "loss": 0.0807, "step": 53950 }, { "epoch": 1.595788726562962, "grad_norm": 0.8134527206420898, "learning_rate": 3.6002867854110383e-06, "loss": 0.0758, "step": 53960 }, { "epoch": 1.596084462057136, "grad_norm": 0.6555392742156982, "learning_rate": 3.600160095492099e-06, "loss": 0.0744, "step": 53970 }, { "epoch": 1.59638019755131, "grad_norm": 0.75010085105896, "learning_rate": 3.6000334055731594e-06, "loss": 0.0754, "step": 53980 }, { "epoch": 1.5966759330454843, "grad_norm": 0.7838338613510132, "learning_rate": 3.5999067156542202e-06, "loss": 0.0963, "step": 53990 }, { "epoch": 1.5969716685396582, "grad_norm": 1.0245784521102905, "learning_rate": 3.5997800257352806e-06, "loss": 0.0708, "step": 54000 }, { "epoch": 1.5972674040338322, "grad_norm": 0.7390252947807312, "learning_rate": 3.5996533358163414e-06, "loss": 0.0766, "step": 54010 }, { "epoch": 1.597563139528006, "grad_norm": 0.9005239605903625, "learning_rate": 3.5995266458974018e-06, "loss": 0.0822, "step": 54020 }, { "epoch": 1.59785887502218, "grad_norm": 1.2908273935317993, "learning_rate": 3.5993999559784625e-06, "loss": 0.0945, "step": 54030 }, { "epoch": 1.5981546105163542, "grad_norm": 1.277320384979248, "learning_rate": 3.5992732660595233e-06, "loss": 0.0902, "step": 54040 }, { "epoch": 1.5984503460105282, "grad_norm": 1.0061403512954712, "learning_rate": 3.599146576140584e-06, "loss": 0.0781, "step": 54050 }, { "epoch": 1.5987460815047023, "grad_norm": 0.8471932411193848, "learning_rate": 3.5990198862216445e-06, "loss": 0.0722, "step": 54060 }, { "epoch": 1.5990418169988763, "grad_norm": 0.5180407762527466, "learning_rate": 3.5988931963027053e-06, "loss": 0.0857, "step": 54070 }, { "epoch": 1.5993375524930502, "grad_norm": 0.6482900381088257, "learning_rate": 3.5987665063837656e-06, "loss": 0.0785, "step": 54080 }, { "epoch": 1.5996332879872242, "grad_norm": 1.4678056240081787, "learning_rate": 3.5986398164648264e-06, "loss": 0.0963, "step": 54090 }, { "epoch": 1.599929023481398, "grad_norm": 0.9914617538452148, "learning_rate": 3.598513126545887e-06, "loss": 0.0774, "step": 54100 }, { "epoch": 1.6002247589755723, "grad_norm": 1.3018666505813599, "learning_rate": 3.5983864366269476e-06, "loss": 0.0765, "step": 54110 }, { "epoch": 1.6005204944697464, "grad_norm": 0.46533656120300293, "learning_rate": 3.5982597467080084e-06, "loss": 0.0673, "step": 54120 }, { "epoch": 1.6008162299639204, "grad_norm": 0.7980405688285828, "learning_rate": 3.598133056789069e-06, "loss": 0.0977, "step": 54130 }, { "epoch": 1.6011119654580943, "grad_norm": 1.3168206214904785, "learning_rate": 3.5980063668701295e-06, "loss": 0.0841, "step": 54140 }, { "epoch": 1.6014077009522683, "grad_norm": 1.0258976221084595, "learning_rate": 3.5978796769511903e-06, "loss": 0.0635, "step": 54150 }, { "epoch": 1.6017034364464422, "grad_norm": 0.8824389576911926, "learning_rate": 3.5977529870322507e-06, "loss": 0.0757, "step": 54160 }, { "epoch": 1.6019991719406164, "grad_norm": 0.991924524307251, "learning_rate": 3.5976262971133115e-06, "loss": 0.0867, "step": 54170 }, { "epoch": 1.6022949074347903, "grad_norm": 0.9022427201271057, "learning_rate": 3.597499607194372e-06, "loss": 0.0933, "step": 54180 }, { "epoch": 1.6025906429289645, "grad_norm": 0.41537782549858093, "learning_rate": 3.5973729172754326e-06, "loss": 0.0689, "step": 54190 }, { "epoch": 1.6028863784231384, "grad_norm": 1.1585445404052734, "learning_rate": 3.5972462273564934e-06, "loss": 0.0755, "step": 54200 }, { "epoch": 1.6031821139173124, "grad_norm": 0.6414781212806702, "learning_rate": 3.5971195374375542e-06, "loss": 0.0847, "step": 54210 }, { "epoch": 1.6034778494114863, "grad_norm": 0.9509695172309875, "learning_rate": 3.5969928475186146e-06, "loss": 0.0718, "step": 54220 }, { "epoch": 1.6037735849056602, "grad_norm": 0.8991751074790955, "learning_rate": 3.5968661575996754e-06, "loss": 0.0868, "step": 54230 }, { "epoch": 1.6040693203998344, "grad_norm": 8.936958312988281, "learning_rate": 3.5967394676807357e-06, "loss": 0.0827, "step": 54240 }, { "epoch": 1.6043650558940084, "grad_norm": 0.42448848485946655, "learning_rate": 3.5966127777617965e-06, "loss": 0.0776, "step": 54250 }, { "epoch": 1.6046607913881825, "grad_norm": 0.5822800397872925, "learning_rate": 3.596486087842857e-06, "loss": 0.0862, "step": 54260 }, { "epoch": 1.6049565268823565, "grad_norm": 0.9601818919181824, "learning_rate": 3.5963593979239177e-06, "loss": 0.0841, "step": 54270 }, { "epoch": 1.6052522623765304, "grad_norm": 1.2918118238449097, "learning_rate": 3.5962327080049785e-06, "loss": 0.0869, "step": 54280 }, { "epoch": 1.6055479978707043, "grad_norm": 1.0451935529708862, "learning_rate": 3.5961060180860393e-06, "loss": 0.103, "step": 54290 }, { "epoch": 1.6058437333648783, "grad_norm": 0.6749366521835327, "learning_rate": 3.5959793281670996e-06, "loss": 0.0873, "step": 54300 }, { "epoch": 1.6061394688590525, "grad_norm": 1.1372569799423218, "learning_rate": 3.59585263824816e-06, "loss": 0.0882, "step": 54310 }, { "epoch": 1.6064352043532266, "grad_norm": 1.1156998872756958, "learning_rate": 3.5957259483292208e-06, "loss": 0.0796, "step": 54320 }, { "epoch": 1.6067309398474006, "grad_norm": 1.0837429761886597, "learning_rate": 3.595599258410281e-06, "loss": 0.1015, "step": 54330 }, { "epoch": 1.6070266753415745, "grad_norm": 0.6729563474655151, "learning_rate": 3.595472568491342e-06, "loss": 0.0779, "step": 54340 }, { "epoch": 1.6073224108357484, "grad_norm": 0.6251175999641418, "learning_rate": 3.5953458785724023e-06, "loss": 0.0854, "step": 54350 }, { "epoch": 1.6076181463299224, "grad_norm": 0.7805784940719604, "learning_rate": 3.5952191886534635e-06, "loss": 0.0652, "step": 54360 }, { "epoch": 1.6079138818240966, "grad_norm": 0.9886298775672913, "learning_rate": 3.595092498734524e-06, "loss": 0.0841, "step": 54370 }, { "epoch": 1.6082096173182705, "grad_norm": 0.6681249141693115, "learning_rate": 3.5949658088155847e-06, "loss": 0.0875, "step": 54380 }, { "epoch": 1.6085053528124447, "grad_norm": 0.8992251753807068, "learning_rate": 3.594839118896645e-06, "loss": 0.0889, "step": 54390 }, { "epoch": 1.6088010883066186, "grad_norm": 1.3357722759246826, "learning_rate": 3.594712428977706e-06, "loss": 0.0688, "step": 54400 }, { "epoch": 1.6090968238007926, "grad_norm": 0.8752620220184326, "learning_rate": 3.594585739058766e-06, "loss": 0.0764, "step": 54410 }, { "epoch": 1.6093925592949665, "grad_norm": 1.0754098892211914, "learning_rate": 3.594459049139827e-06, "loss": 0.0908, "step": 54420 }, { "epoch": 1.6096882947891404, "grad_norm": 0.9571095705032349, "learning_rate": 3.5943323592208874e-06, "loss": 0.0865, "step": 54430 }, { "epoch": 1.6099840302833146, "grad_norm": 0.7330929636955261, "learning_rate": 3.5942056693019486e-06, "loss": 0.0766, "step": 54440 }, { "epoch": 1.6102797657774888, "grad_norm": 0.5760074257850647, "learning_rate": 3.594078979383009e-06, "loss": 0.0627, "step": 54450 }, { "epoch": 1.6105755012716627, "grad_norm": 0.7196188569068909, "learning_rate": 3.5939522894640697e-06, "loss": 0.0712, "step": 54460 }, { "epoch": 1.6108712367658367, "grad_norm": 0.8340210914611816, "learning_rate": 3.59382559954513e-06, "loss": 0.0814, "step": 54470 }, { "epoch": 1.6111669722600106, "grad_norm": 0.9910387396812439, "learning_rate": 3.593698909626191e-06, "loss": 0.1113, "step": 54480 }, { "epoch": 1.6114627077541845, "grad_norm": 0.5910753607749939, "learning_rate": 3.5935722197072512e-06, "loss": 0.0701, "step": 54490 }, { "epoch": 1.6117584432483587, "grad_norm": 0.46657365560531616, "learning_rate": 3.593445529788312e-06, "loss": 0.0827, "step": 54500 }, { "epoch": 1.6120541787425327, "grad_norm": 0.6613959074020386, "learning_rate": 3.5933188398693724e-06, "loss": 0.0801, "step": 54510 }, { "epoch": 1.6123499142367068, "grad_norm": 0.7613370418548584, "learning_rate": 3.5931921499504336e-06, "loss": 0.0813, "step": 54520 }, { "epoch": 1.6126456497308808, "grad_norm": 1.001387596130371, "learning_rate": 3.593065460031494e-06, "loss": 0.088, "step": 54530 }, { "epoch": 1.6129413852250547, "grad_norm": 0.6166892647743225, "learning_rate": 3.5929387701125548e-06, "loss": 0.0687, "step": 54540 }, { "epoch": 1.6132371207192286, "grad_norm": 1.03830885887146, "learning_rate": 3.592812080193615e-06, "loss": 0.0774, "step": 54550 }, { "epoch": 1.6135328562134026, "grad_norm": 0.49077001214027405, "learning_rate": 3.592685390274676e-06, "loss": 0.0771, "step": 54560 }, { "epoch": 1.6138285917075768, "grad_norm": 0.8957677483558655, "learning_rate": 3.5925587003557363e-06, "loss": 0.0891, "step": 54570 }, { "epoch": 1.614124327201751, "grad_norm": 2.4891984462738037, "learning_rate": 3.592432010436797e-06, "loss": 0.0837, "step": 54580 }, { "epoch": 1.6144200626959249, "grad_norm": 0.6648929715156555, "learning_rate": 3.5923053205178574e-06, "loss": 0.0831, "step": 54590 }, { "epoch": 1.6147157981900988, "grad_norm": 0.44929665327072144, "learning_rate": 3.5921786305989187e-06, "loss": 0.0778, "step": 54600 }, { "epoch": 1.6150115336842727, "grad_norm": 0.7674849629402161, "learning_rate": 3.592051940679979e-06, "loss": 0.1008, "step": 54610 }, { "epoch": 1.6153072691784467, "grad_norm": 0.5522481203079224, "learning_rate": 3.59192525076104e-06, "loss": 0.0864, "step": 54620 }, { "epoch": 1.6156030046726209, "grad_norm": 1.0684587955474854, "learning_rate": 3.5917985608421e-06, "loss": 0.0887, "step": 54630 }, { "epoch": 1.6158987401667948, "grad_norm": 2.4646384716033936, "learning_rate": 3.591671870923161e-06, "loss": 0.0899, "step": 54640 }, { "epoch": 1.616194475660969, "grad_norm": 0.9024750590324402, "learning_rate": 3.5915451810042213e-06, "loss": 0.0681, "step": 54650 }, { "epoch": 1.616490211155143, "grad_norm": 0.4479120373725891, "learning_rate": 3.591418491085282e-06, "loss": 0.0773, "step": 54660 }, { "epoch": 1.6167859466493169, "grad_norm": 0.8686327338218689, "learning_rate": 3.5912918011663425e-06, "loss": 0.0848, "step": 54670 }, { "epoch": 1.6170816821434908, "grad_norm": 1.0807162523269653, "learning_rate": 3.5911651112474037e-06, "loss": 0.0841, "step": 54680 }, { "epoch": 1.6173774176376647, "grad_norm": 1.0091745853424072, "learning_rate": 3.591038421328464e-06, "loss": 0.0908, "step": 54690 }, { "epoch": 1.617673153131839, "grad_norm": 0.9320539832115173, "learning_rate": 3.590911731409525e-06, "loss": 0.0886, "step": 54700 }, { "epoch": 1.6179688886260128, "grad_norm": 0.7172123193740845, "learning_rate": 3.5907850414905852e-06, "loss": 0.0646, "step": 54710 }, { "epoch": 1.618264624120187, "grad_norm": 0.6538234353065491, "learning_rate": 3.5906583515716456e-06, "loss": 0.0735, "step": 54720 }, { "epoch": 1.618560359614361, "grad_norm": 0.9141430258750916, "learning_rate": 3.5905316616527064e-06, "loss": 0.1029, "step": 54730 }, { "epoch": 1.618856095108535, "grad_norm": 0.8964720368385315, "learning_rate": 3.5904049717337667e-06, "loss": 0.1095, "step": 54740 }, { "epoch": 1.6191518306027088, "grad_norm": 0.9733577370643616, "learning_rate": 3.5902782818148275e-06, "loss": 0.0873, "step": 54750 }, { "epoch": 1.6194475660968828, "grad_norm": 1.047162413597107, "learning_rate": 3.5901515918958883e-06, "loss": 0.0826, "step": 54760 }, { "epoch": 1.619743301591057, "grad_norm": 0.7712172269821167, "learning_rate": 3.590024901976949e-06, "loss": 0.0758, "step": 54770 }, { "epoch": 1.6200390370852311, "grad_norm": 1.0680967569351196, "learning_rate": 3.5898982120580095e-06, "loss": 0.1, "step": 54780 }, { "epoch": 1.620334772579405, "grad_norm": 0.9676958918571472, "learning_rate": 3.5897715221390703e-06, "loss": 0.0883, "step": 54790 }, { "epoch": 1.620630508073579, "grad_norm": 0.6031246781349182, "learning_rate": 3.5896448322201306e-06, "loss": 0.076, "step": 54800 }, { "epoch": 1.620926243567753, "grad_norm": 0.9635839462280273, "learning_rate": 3.5895181423011914e-06, "loss": 0.0817, "step": 54810 }, { "epoch": 1.6212219790619269, "grad_norm": 0.920335590839386, "learning_rate": 3.589391452382252e-06, "loss": 0.0878, "step": 54820 }, { "epoch": 1.621517714556101, "grad_norm": 0.7852944731712341, "learning_rate": 3.5892647624633126e-06, "loss": 0.0897, "step": 54830 }, { "epoch": 1.621813450050275, "grad_norm": 0.8711849451065063, "learning_rate": 3.5891380725443734e-06, "loss": 0.0845, "step": 54840 }, { "epoch": 1.6221091855444492, "grad_norm": 0.8033129572868347, "learning_rate": 3.589011382625434e-06, "loss": 0.1069, "step": 54850 }, { "epoch": 1.622404921038623, "grad_norm": 1.0419268608093262, "learning_rate": 3.5888846927064945e-06, "loss": 0.0792, "step": 54860 }, { "epoch": 1.622700656532797, "grad_norm": 0.6925987005233765, "learning_rate": 3.5887580027875553e-06, "loss": 0.0616, "step": 54870 }, { "epoch": 1.622996392026971, "grad_norm": 0.9089452028274536, "learning_rate": 3.5886313128686157e-06, "loss": 0.091, "step": 54880 }, { "epoch": 1.623292127521145, "grad_norm": 0.5741907358169556, "learning_rate": 3.5885046229496765e-06, "loss": 0.0738, "step": 54890 }, { "epoch": 1.623587863015319, "grad_norm": 0.8599603772163391, "learning_rate": 3.588377933030737e-06, "loss": 0.0817, "step": 54900 }, { "epoch": 1.6238835985094933, "grad_norm": 1.0441899299621582, "learning_rate": 3.5882512431117976e-06, "loss": 0.0782, "step": 54910 }, { "epoch": 1.6241793340036672, "grad_norm": 1.006350040435791, "learning_rate": 3.5881245531928584e-06, "loss": 0.0922, "step": 54920 }, { "epoch": 1.6244750694978412, "grad_norm": 1.370358943939209, "learning_rate": 3.587997863273919e-06, "loss": 0.0846, "step": 54930 }, { "epoch": 1.624770804992015, "grad_norm": 0.678164541721344, "learning_rate": 3.5878711733549796e-06, "loss": 0.086, "step": 54940 }, { "epoch": 1.625066540486189, "grad_norm": 0.544219970703125, "learning_rate": 3.5877444834360404e-06, "loss": 0.078, "step": 54950 }, { "epoch": 1.6253622759803632, "grad_norm": 0.9476760029792786, "learning_rate": 3.5876177935171007e-06, "loss": 0.0775, "step": 54960 }, { "epoch": 1.6256580114745371, "grad_norm": 0.927568256855011, "learning_rate": 3.5874911035981615e-06, "loss": 0.0766, "step": 54970 }, { "epoch": 1.6259537469687113, "grad_norm": 0.8428471088409424, "learning_rate": 3.587364413679222e-06, "loss": 0.0792, "step": 54980 }, { "epoch": 1.6262494824628853, "grad_norm": 0.7911810278892517, "learning_rate": 3.5872377237602827e-06, "loss": 0.0954, "step": 54990 }, { "epoch": 1.6265452179570592, "grad_norm": 1.1282389163970947, "learning_rate": 3.5871110338413435e-06, "loss": 0.0793, "step": 55000 }, { "epoch": 1.6268409534512331, "grad_norm": 0.6986416578292847, "learning_rate": 3.5869843439224042e-06, "loss": 0.069, "step": 55010 }, { "epoch": 1.627136688945407, "grad_norm": 1.3301047086715698, "learning_rate": 3.5868576540034646e-06, "loss": 0.0827, "step": 55020 }, { "epoch": 1.6274324244395812, "grad_norm": 1.0084996223449707, "learning_rate": 3.5867309640845254e-06, "loss": 0.0967, "step": 55030 }, { "epoch": 1.6277281599337554, "grad_norm": 0.7091256976127625, "learning_rate": 3.5866042741655858e-06, "loss": 0.0905, "step": 55040 }, { "epoch": 1.6280238954279294, "grad_norm": 0.5934812426567078, "learning_rate": 3.5864775842466466e-06, "loss": 0.0763, "step": 55050 }, { "epoch": 1.6283196309221033, "grad_norm": 1.1741842031478882, "learning_rate": 3.586350894327707e-06, "loss": 0.0818, "step": 55060 }, { "epoch": 1.6286153664162772, "grad_norm": 0.997093141078949, "learning_rate": 3.5862242044087677e-06, "loss": 0.096, "step": 55070 }, { "epoch": 1.6289111019104512, "grad_norm": 1.4484224319458008, "learning_rate": 3.5860975144898285e-06, "loss": 0.0992, "step": 55080 }, { "epoch": 1.6292068374046254, "grad_norm": 1.0015642642974854, "learning_rate": 3.5859708245708893e-06, "loss": 0.0927, "step": 55090 }, { "epoch": 1.6295025728987993, "grad_norm": 0.8740527629852295, "learning_rate": 3.5858441346519497e-06, "loss": 0.0694, "step": 55100 }, { "epoch": 1.6297983083929735, "grad_norm": 0.6324475407600403, "learning_rate": 3.5857174447330104e-06, "loss": 0.0561, "step": 55110 }, { "epoch": 1.6300940438871474, "grad_norm": 0.6840358376502991, "learning_rate": 3.585590754814071e-06, "loss": 0.0713, "step": 55120 }, { "epoch": 1.6303897793813213, "grad_norm": 0.8914721012115479, "learning_rate": 3.585464064895131e-06, "loss": 0.1038, "step": 55130 }, { "epoch": 1.6306855148754953, "grad_norm": 93.47509765625, "learning_rate": 3.585337374976192e-06, "loss": 0.0897, "step": 55140 }, { "epoch": 1.6309812503696692, "grad_norm": 1.0113439559936523, "learning_rate": 3.5852106850572523e-06, "loss": 0.0825, "step": 55150 }, { "epoch": 1.6312769858638434, "grad_norm": 0.7415313124656677, "learning_rate": 3.5850839951383136e-06, "loss": 0.057, "step": 55160 }, { "epoch": 1.6315727213580173, "grad_norm": 0.7511247992515564, "learning_rate": 3.584957305219374e-06, "loss": 0.0894, "step": 55170 }, { "epoch": 1.6318684568521915, "grad_norm": 1.1339025497436523, "learning_rate": 3.5848306153004347e-06, "loss": 0.0809, "step": 55180 }, { "epoch": 1.6321641923463654, "grad_norm": 1.1615947484970093, "learning_rate": 3.584703925381495e-06, "loss": 0.1043, "step": 55190 }, { "epoch": 1.6324599278405394, "grad_norm": 0.26459887623786926, "learning_rate": 3.584577235462556e-06, "loss": 0.077, "step": 55200 }, { "epoch": 1.6327556633347133, "grad_norm": 0.35799071192741394, "learning_rate": 3.5844505455436162e-06, "loss": 0.0637, "step": 55210 }, { "epoch": 1.6330513988288873, "grad_norm": 0.7805510759353638, "learning_rate": 3.584323855624677e-06, "loss": 0.0839, "step": 55220 }, { "epoch": 1.6333471343230614, "grad_norm": 0.6014989018440247, "learning_rate": 3.5841971657057374e-06, "loss": 0.0923, "step": 55230 }, { "epoch": 1.6336428698172356, "grad_norm": 1.0649672746658325, "learning_rate": 3.5840704757867986e-06, "loss": 0.0882, "step": 55240 }, { "epoch": 1.6339386053114096, "grad_norm": 0.4877781569957733, "learning_rate": 3.583943785867859e-06, "loss": 0.1033, "step": 55250 }, { "epoch": 1.6342343408055835, "grad_norm": 0.9618253111839294, "learning_rate": 3.5838170959489198e-06, "loss": 0.0685, "step": 55260 }, { "epoch": 1.6345300762997574, "grad_norm": 0.9644327759742737, "learning_rate": 3.58369040602998e-06, "loss": 0.0908, "step": 55270 }, { "epoch": 1.6348258117939314, "grad_norm": 0.78748619556427, "learning_rate": 3.583563716111041e-06, "loss": 0.1054, "step": 55280 }, { "epoch": 1.6351215472881055, "grad_norm": 1.1171857118606567, "learning_rate": 3.5834370261921013e-06, "loss": 0.087, "step": 55290 }, { "epoch": 1.6354172827822795, "grad_norm": 0.809125542640686, "learning_rate": 3.583310336273162e-06, "loss": 0.084, "step": 55300 }, { "epoch": 1.6357130182764537, "grad_norm": 0.544400155544281, "learning_rate": 3.5831836463542224e-06, "loss": 0.0558, "step": 55310 }, { "epoch": 1.6360087537706276, "grad_norm": 0.9217625856399536, "learning_rate": 3.5830569564352836e-06, "loss": 0.0839, "step": 55320 }, { "epoch": 1.6363044892648015, "grad_norm": 0.5904917120933533, "learning_rate": 3.582930266516344e-06, "loss": 0.0897, "step": 55330 }, { "epoch": 1.6366002247589755, "grad_norm": 1.093903660774231, "learning_rate": 3.582803576597405e-06, "loss": 0.0995, "step": 55340 }, { "epoch": 1.6368959602531494, "grad_norm": 1.0181198120117188, "learning_rate": 3.582676886678465e-06, "loss": 0.09, "step": 55350 }, { "epoch": 1.6371916957473236, "grad_norm": 0.5775913000106812, "learning_rate": 3.582550196759526e-06, "loss": 0.0842, "step": 55360 }, { "epoch": 1.6374874312414978, "grad_norm": 0.8659677505493164, "learning_rate": 3.5824235068405863e-06, "loss": 0.0846, "step": 55370 }, { "epoch": 1.6377831667356717, "grad_norm": 0.7343344688415527, "learning_rate": 3.582296816921647e-06, "loss": 0.0839, "step": 55380 }, { "epoch": 1.6380789022298456, "grad_norm": 1.1043368577957153, "learning_rate": 3.5821701270027075e-06, "loss": 0.0897, "step": 55390 }, { "epoch": 1.6383746377240196, "grad_norm": 0.9045693874359131, "learning_rate": 3.5820434370837687e-06, "loss": 0.0939, "step": 55400 }, { "epoch": 1.6386703732181935, "grad_norm": 0.7765815258026123, "learning_rate": 3.581916747164829e-06, "loss": 0.0781, "step": 55410 }, { "epoch": 1.6389661087123677, "grad_norm": 1.2551710605621338, "learning_rate": 3.58179005724589e-06, "loss": 0.0846, "step": 55420 }, { "epoch": 1.6392618442065416, "grad_norm": 1.0989322662353516, "learning_rate": 3.58166336732695e-06, "loss": 0.0936, "step": 55430 }, { "epoch": 1.6395575797007158, "grad_norm": 1.108064889907837, "learning_rate": 3.581536677408011e-06, "loss": 0.0897, "step": 55440 }, { "epoch": 1.6398533151948897, "grad_norm": 0.8989487290382385, "learning_rate": 3.5814099874890714e-06, "loss": 0.0737, "step": 55450 }, { "epoch": 1.6401490506890637, "grad_norm": 1.047558069229126, "learning_rate": 3.581283297570132e-06, "loss": 0.0737, "step": 55460 }, { "epoch": 1.6404447861832376, "grad_norm": 1.7912989854812622, "learning_rate": 3.5811566076511925e-06, "loss": 0.0877, "step": 55470 }, { "epoch": 1.6407405216774116, "grad_norm": 0.9967662692070007, "learning_rate": 3.5810299177322537e-06, "loss": 0.0851, "step": 55480 }, { "epoch": 1.6410362571715857, "grad_norm": 0.729591965675354, "learning_rate": 3.580903227813314e-06, "loss": 0.1073, "step": 55490 }, { "epoch": 1.64133199266576, "grad_norm": 0.6027619242668152, "learning_rate": 3.580776537894375e-06, "loss": 0.0817, "step": 55500 }, { "epoch": 1.6416277281599339, "grad_norm": 0.4601939916610718, "learning_rate": 3.5806498479754353e-06, "loss": 0.0593, "step": 55510 }, { "epoch": 1.6419234636541078, "grad_norm": 1.0637794733047485, "learning_rate": 3.580523158056496e-06, "loss": 0.0929, "step": 55520 }, { "epoch": 1.6422191991482817, "grad_norm": 1.1433781385421753, "learning_rate": 3.5803964681375564e-06, "loss": 0.103, "step": 55530 }, { "epoch": 1.6425149346424557, "grad_norm": 0.6472547650337219, "learning_rate": 3.580269778218617e-06, "loss": 0.0812, "step": 55540 }, { "epoch": 1.6428106701366298, "grad_norm": 0.7766684889793396, "learning_rate": 3.5801430882996776e-06, "loss": 0.0832, "step": 55550 }, { "epoch": 1.6431064056308038, "grad_norm": 0.796036422252655, "learning_rate": 3.5800163983807384e-06, "loss": 0.081, "step": 55560 }, { "epoch": 1.643402141124978, "grad_norm": 1.3880993127822876, "learning_rate": 3.579889708461799e-06, "loss": 0.097, "step": 55570 }, { "epoch": 1.643697876619152, "grad_norm": 1.2681459188461304, "learning_rate": 3.5797630185428595e-06, "loss": 0.0933, "step": 55580 }, { "epoch": 1.6439936121133258, "grad_norm": 1.0274053812026978, "learning_rate": 3.5796363286239203e-06, "loss": 0.0861, "step": 55590 }, { "epoch": 1.6442893476074998, "grad_norm": 0.8606389760971069, "learning_rate": 3.5795096387049807e-06, "loss": 0.0919, "step": 55600 }, { "epoch": 1.6445850831016737, "grad_norm": 0.8651228547096252, "learning_rate": 3.5793829487860415e-06, "loss": 0.0706, "step": 55610 }, { "epoch": 1.644880818595848, "grad_norm": 0.7421112656593323, "learning_rate": 3.579256258867102e-06, "loss": 0.0913, "step": 55620 }, { "epoch": 1.6451765540900218, "grad_norm": 0.7627928853034973, "learning_rate": 3.5791295689481626e-06, "loss": 0.0867, "step": 55630 }, { "epoch": 1.645472289584196, "grad_norm": 0.7245749831199646, "learning_rate": 3.5790028790292234e-06, "loss": 0.0828, "step": 55640 }, { "epoch": 1.64576802507837, "grad_norm": 0.8754986524581909, "learning_rate": 3.578876189110284e-06, "loss": 0.1129, "step": 55650 }, { "epoch": 1.6460637605725439, "grad_norm": 0.5151461958885193, "learning_rate": 3.5787494991913446e-06, "loss": 0.0728, "step": 55660 }, { "epoch": 1.6463594960667178, "grad_norm": 0.8053398728370667, "learning_rate": 3.5786228092724053e-06, "loss": 0.0921, "step": 55670 }, { "epoch": 1.6466552315608918, "grad_norm": 0.9895126223564148, "learning_rate": 3.5784961193534657e-06, "loss": 0.0801, "step": 55680 }, { "epoch": 1.646950967055066, "grad_norm": 0.9220374822616577, "learning_rate": 3.5783694294345265e-06, "loss": 0.0858, "step": 55690 }, { "epoch": 1.64724670254924, "grad_norm": 0.7948667407035828, "learning_rate": 3.578242739515587e-06, "loss": 0.0748, "step": 55700 }, { "epoch": 1.647542438043414, "grad_norm": 0.5453572869300842, "learning_rate": 3.5781160495966477e-06, "loss": 0.0638, "step": 55710 }, { "epoch": 1.647838173537588, "grad_norm": 1.3384084701538086, "learning_rate": 3.5779893596777084e-06, "loss": 0.0776, "step": 55720 }, { "epoch": 1.648133909031762, "grad_norm": 0.9581775069236755, "learning_rate": 3.5778626697587692e-06, "loss": 0.0788, "step": 55730 }, { "epoch": 1.6484296445259359, "grad_norm": 0.6777416467666626, "learning_rate": 3.5777359798398296e-06, "loss": 0.0816, "step": 55740 }, { "epoch": 1.64872538002011, "grad_norm": 0.3778734505176544, "learning_rate": 3.5776092899208904e-06, "loss": 0.0788, "step": 55750 }, { "epoch": 1.649021115514284, "grad_norm": 1.2010031938552856, "learning_rate": 3.5774826000019508e-06, "loss": 0.0656, "step": 55760 }, { "epoch": 1.6493168510084582, "grad_norm": 1.5790702104568481, "learning_rate": 3.5773559100830115e-06, "loss": 0.0848, "step": 55770 }, { "epoch": 1.649612586502632, "grad_norm": 0.8304104208946228, "learning_rate": 3.577229220164072e-06, "loss": 0.0973, "step": 55780 }, { "epoch": 1.649908321996806, "grad_norm": 0.7983932495117188, "learning_rate": 3.5771025302451327e-06, "loss": 0.0881, "step": 55790 }, { "epoch": 1.65020405749098, "grad_norm": 0.964360237121582, "learning_rate": 3.5769758403261935e-06, "loss": 0.0736, "step": 55800 }, { "epoch": 1.650499792985154, "grad_norm": 1.2023019790649414, "learning_rate": 3.5768491504072543e-06, "loss": 0.0656, "step": 55810 }, { "epoch": 1.650795528479328, "grad_norm": 1.1230732202529907, "learning_rate": 3.5767224604883146e-06, "loss": 0.0951, "step": 55820 }, { "epoch": 1.6510912639735023, "grad_norm": 0.7720232009887695, "learning_rate": 3.5765957705693754e-06, "loss": 0.0835, "step": 55830 }, { "epoch": 1.6513869994676762, "grad_norm": 0.9978621602058411, "learning_rate": 3.576469080650436e-06, "loss": 0.091, "step": 55840 }, { "epoch": 1.6516827349618501, "grad_norm": 0.6952884793281555, "learning_rate": 3.5763423907314966e-06, "loss": 0.0825, "step": 55850 }, { "epoch": 1.651978470456024, "grad_norm": 0.8254978060722351, "learning_rate": 3.576215700812557e-06, "loss": 0.0694, "step": 55860 }, { "epoch": 1.652274205950198, "grad_norm": 0.8712684512138367, "learning_rate": 3.5760890108936177e-06, "loss": 0.0802, "step": 55870 }, { "epoch": 1.6525699414443722, "grad_norm": 0.6843929886817932, "learning_rate": 3.5759623209746785e-06, "loss": 0.0933, "step": 55880 }, { "epoch": 1.6528656769385461, "grad_norm": 1.1776478290557861, "learning_rate": 3.5758356310557393e-06, "loss": 0.0706, "step": 55890 }, { "epoch": 1.6531614124327203, "grad_norm": 0.8843658566474915, "learning_rate": 3.5757089411367997e-06, "loss": 0.0813, "step": 55900 }, { "epoch": 1.6534571479268942, "grad_norm": 0.7725419998168945, "learning_rate": 3.5755822512178605e-06, "loss": 0.0782, "step": 55910 }, { "epoch": 1.6537528834210682, "grad_norm": 0.8123763799667358, "learning_rate": 3.575455561298921e-06, "loss": 0.0878, "step": 55920 }, { "epoch": 1.6540486189152421, "grad_norm": 0.8085437417030334, "learning_rate": 3.5753288713799816e-06, "loss": 0.0772, "step": 55930 }, { "epoch": 1.654344354409416, "grad_norm": 1.0195868015289307, "learning_rate": 3.575202181461042e-06, "loss": 0.0958, "step": 55940 }, { "epoch": 1.6546400899035902, "grad_norm": 0.8630759716033936, "learning_rate": 3.575075491542103e-06, "loss": 0.0881, "step": 55950 }, { "epoch": 1.6549358253977644, "grad_norm": 0.6528432369232178, "learning_rate": 3.5749488016231636e-06, "loss": 0.0611, "step": 55960 }, { "epoch": 1.6552315608919383, "grad_norm": 0.5190956592559814, "learning_rate": 3.574822111704224e-06, "loss": 0.0705, "step": 55970 }, { "epoch": 1.6555272963861123, "grad_norm": 1.0854227542877197, "learning_rate": 3.5746954217852847e-06, "loss": 0.0914, "step": 55980 }, { "epoch": 1.6558230318802862, "grad_norm": 0.623669445514679, "learning_rate": 3.574568731866345e-06, "loss": 0.0882, "step": 55990 }, { "epoch": 1.6561187673744602, "grad_norm": 0.7010537981987, "learning_rate": 3.574442041947406e-06, "loss": 0.066, "step": 56000 }, { "epoch": 1.6564145028686343, "grad_norm": 0.8454858064651489, "learning_rate": 3.5743153520284663e-06, "loss": 0.0788, "step": 56010 }, { "epoch": 1.6567102383628083, "grad_norm": 1.0415445566177368, "learning_rate": 3.574188662109527e-06, "loss": 0.0749, "step": 56020 }, { "epoch": 1.6570059738569825, "grad_norm": 1.0938379764556885, "learning_rate": 3.5740619721905874e-06, "loss": 0.0977, "step": 56030 }, { "epoch": 1.6573017093511564, "grad_norm": 0.8326930403709412, "learning_rate": 3.5739352822716486e-06, "loss": 0.0787, "step": 56040 }, { "epoch": 1.6575974448453303, "grad_norm": 0.8786888718605042, "learning_rate": 3.573808592352709e-06, "loss": 0.0884, "step": 56050 }, { "epoch": 1.6578931803395043, "grad_norm": 0.4623979926109314, "learning_rate": 3.5736819024337698e-06, "loss": 0.092, "step": 56060 }, { "epoch": 1.6581889158336782, "grad_norm": 0.8403043150901794, "learning_rate": 3.57355521251483e-06, "loss": 0.0863, "step": 56070 }, { "epoch": 1.6584846513278524, "grad_norm": 0.6868780851364136, "learning_rate": 3.573428522595891e-06, "loss": 0.1039, "step": 56080 }, { "epoch": 1.6587803868220263, "grad_norm": 6.867360591888428, "learning_rate": 3.5733018326769513e-06, "loss": 0.1039, "step": 56090 }, { "epoch": 1.6590761223162005, "grad_norm": 0.7890139222145081, "learning_rate": 3.573175142758012e-06, "loss": 0.0791, "step": 56100 }, { "epoch": 1.6593718578103744, "grad_norm": 1.359218716621399, "learning_rate": 3.5730484528390725e-06, "loss": 0.0814, "step": 56110 }, { "epoch": 1.6596675933045484, "grad_norm": 1.2763853073120117, "learning_rate": 3.5729217629201337e-06, "loss": 0.0975, "step": 56120 }, { "epoch": 1.6599633287987223, "grad_norm": 0.7903873324394226, "learning_rate": 3.572795073001194e-06, "loss": 0.0714, "step": 56130 }, { "epoch": 1.6602590642928963, "grad_norm": 1.3189189434051514, "learning_rate": 3.572668383082255e-06, "loss": 0.0897, "step": 56140 }, { "epoch": 1.6605547997870704, "grad_norm": 0.8919309377670288, "learning_rate": 3.572541693163315e-06, "loss": 0.0762, "step": 56150 }, { "epoch": 1.6608505352812446, "grad_norm": 0.8802850246429443, "learning_rate": 3.572415003244376e-06, "loss": 0.0684, "step": 56160 }, { "epoch": 1.6611462707754185, "grad_norm": 0.9404922723770142, "learning_rate": 3.5722883133254363e-06, "loss": 0.0759, "step": 56170 }, { "epoch": 1.6614420062695925, "grad_norm": 0.7794104218482971, "learning_rate": 3.572161623406497e-06, "loss": 0.0882, "step": 56180 }, { "epoch": 1.6617377417637664, "grad_norm": 0.8115549683570862, "learning_rate": 3.5720349334875575e-06, "loss": 0.0951, "step": 56190 }, { "epoch": 1.6620334772579404, "grad_norm": 0.6620703935623169, "learning_rate": 3.5719082435686187e-06, "loss": 0.0788, "step": 56200 }, { "epoch": 1.6623292127521145, "grad_norm": 1.3623207807540894, "learning_rate": 3.571781553649679e-06, "loss": 0.0702, "step": 56210 }, { "epoch": 1.6626249482462885, "grad_norm": 0.684422492980957, "learning_rate": 3.57165486373074e-06, "loss": 0.0835, "step": 56220 }, { "epoch": 1.6629206837404626, "grad_norm": 0.7871317863464355, "learning_rate": 3.5715281738118002e-06, "loss": 0.0796, "step": 56230 }, { "epoch": 1.6632164192346366, "grad_norm": 1.1780221462249756, "learning_rate": 3.571401483892861e-06, "loss": 0.0905, "step": 56240 }, { "epoch": 1.6635121547288105, "grad_norm": 0.7868873476982117, "learning_rate": 3.5712747939739214e-06, "loss": 0.0696, "step": 56250 }, { "epoch": 1.6638078902229845, "grad_norm": 1.472321629524231, "learning_rate": 3.571148104054982e-06, "loss": 0.0612, "step": 56260 }, { "epoch": 1.6641036257171584, "grad_norm": 0.7190447449684143, "learning_rate": 3.5710214141360425e-06, "loss": 0.0924, "step": 56270 }, { "epoch": 1.6643993612113326, "grad_norm": 0.9539662599563599, "learning_rate": 3.5708947242171038e-06, "loss": 0.104, "step": 56280 }, { "epoch": 1.6646950967055067, "grad_norm": 0.7883808016777039, "learning_rate": 3.570768034298164e-06, "loss": 0.0927, "step": 56290 }, { "epoch": 1.6649908321996807, "grad_norm": 0.639891505241394, "learning_rate": 3.570641344379225e-06, "loss": 0.0785, "step": 56300 }, { "epoch": 1.6652865676938546, "grad_norm": 0.8181878328323364, "learning_rate": 3.5705146544602853e-06, "loss": 0.0703, "step": 56310 }, { "epoch": 1.6655823031880286, "grad_norm": 1.04245924949646, "learning_rate": 3.570387964541346e-06, "loss": 0.0916, "step": 56320 }, { "epoch": 1.6658780386822025, "grad_norm": 0.6897565722465515, "learning_rate": 3.5702612746224064e-06, "loss": 0.083, "step": 56330 }, { "epoch": 1.6661737741763767, "grad_norm": 0.6478614807128906, "learning_rate": 3.5701345847034672e-06, "loss": 0.0784, "step": 56340 }, { "epoch": 1.6664695096705506, "grad_norm": 0.9095539450645447, "learning_rate": 3.5700078947845276e-06, "loss": 0.0727, "step": 56350 }, { "epoch": 1.6667652451647248, "grad_norm": 0.7211617231369019, "learning_rate": 3.569881204865589e-06, "loss": 0.069, "step": 56360 }, { "epoch": 1.6670609806588987, "grad_norm": 0.88547682762146, "learning_rate": 3.569754514946649e-06, "loss": 0.0837, "step": 56370 }, { "epoch": 1.6673567161530727, "grad_norm": 1.2218350172042847, "learning_rate": 3.5696278250277095e-06, "loss": 0.0916, "step": 56380 }, { "epoch": 1.6676524516472466, "grad_norm": 0.8793776631355286, "learning_rate": 3.5695011351087703e-06, "loss": 0.0803, "step": 56390 }, { "epoch": 1.6679481871414206, "grad_norm": 0.6413806080818176, "learning_rate": 3.5693744451898307e-06, "loss": 0.0884, "step": 56400 }, { "epoch": 1.6682439226355947, "grad_norm": 0.4251892864704132, "learning_rate": 3.5692477552708915e-06, "loss": 0.0693, "step": 56410 }, { "epoch": 1.668539658129769, "grad_norm": 1.2114696502685547, "learning_rate": 3.569121065351952e-06, "loss": 0.0982, "step": 56420 }, { "epoch": 1.6688353936239428, "grad_norm": 0.8710294961929321, "learning_rate": 3.5689943754330126e-06, "loss": 0.0996, "step": 56430 }, { "epoch": 1.6691311291181168, "grad_norm": 1.1981595754623413, "learning_rate": 3.5688676855140734e-06, "loss": 0.0833, "step": 56440 }, { "epoch": 1.6694268646122907, "grad_norm": 1.0159497261047363, "learning_rate": 3.5687409955951342e-06, "loss": 0.067, "step": 56450 }, { "epoch": 1.6697226001064647, "grad_norm": 0.7407074570655823, "learning_rate": 3.5686143056761946e-06, "loss": 0.0555, "step": 56460 }, { "epoch": 1.6700183356006388, "grad_norm": 0.6197118163108826, "learning_rate": 3.5684876157572554e-06, "loss": 0.0849, "step": 56470 }, { "epoch": 1.6703140710948128, "grad_norm": 0.8487651348114014, "learning_rate": 3.5683609258383157e-06, "loss": 0.1, "step": 56480 }, { "epoch": 1.670609806588987, "grad_norm": 0.6898977756500244, "learning_rate": 3.5682342359193765e-06, "loss": 0.0834, "step": 56490 }, { "epoch": 1.6709055420831609, "grad_norm": 1.3626878261566162, "learning_rate": 3.568107546000437e-06, "loss": 0.1009, "step": 56500 }, { "epoch": 1.6712012775773348, "grad_norm": 0.9815592169761658, "learning_rate": 3.5679808560814977e-06, "loss": 0.0722, "step": 56510 }, { "epoch": 1.6714970130715088, "grad_norm": 1.2723236083984375, "learning_rate": 3.5678541661625585e-06, "loss": 0.0958, "step": 56520 }, { "epoch": 1.6717927485656827, "grad_norm": 1.1995775699615479, "learning_rate": 3.5677274762436193e-06, "loss": 0.0835, "step": 56530 }, { "epoch": 1.6720884840598569, "grad_norm": 0.8499554395675659, "learning_rate": 3.5676007863246796e-06, "loss": 0.0858, "step": 56540 }, { "epoch": 1.6723842195540308, "grad_norm": 0.7015314102172852, "learning_rate": 3.5674740964057404e-06, "loss": 0.0852, "step": 56550 }, { "epoch": 1.672679955048205, "grad_norm": 0.502355694770813, "learning_rate": 3.5673474064868008e-06, "loss": 0.0639, "step": 56560 }, { "epoch": 1.672975690542379, "grad_norm": 0.6691601872444153, "learning_rate": 3.5672207165678616e-06, "loss": 0.0788, "step": 56570 }, { "epoch": 1.6732714260365529, "grad_norm": 0.6881194710731506, "learning_rate": 3.567094026648922e-06, "loss": 0.0886, "step": 56580 }, { "epoch": 1.6735671615307268, "grad_norm": 5.939064025878906, "learning_rate": 3.5669673367299827e-06, "loss": 0.0787, "step": 56590 }, { "epoch": 1.6738628970249008, "grad_norm": 0.9515036940574646, "learning_rate": 3.5668406468110435e-06, "loss": 0.0801, "step": 56600 }, { "epoch": 1.674158632519075, "grad_norm": 0.39351579546928406, "learning_rate": 3.5667139568921043e-06, "loss": 0.0733, "step": 56610 }, { "epoch": 1.674454368013249, "grad_norm": 1.4368146657943726, "learning_rate": 3.5665872669731647e-06, "loss": 0.0775, "step": 56620 }, { "epoch": 1.674750103507423, "grad_norm": 1.3245052099227905, "learning_rate": 3.5664605770542255e-06, "loss": 0.0858, "step": 56630 }, { "epoch": 1.675045839001597, "grad_norm": 1.1468361616134644, "learning_rate": 3.566333887135286e-06, "loss": 0.0819, "step": 56640 }, { "epoch": 1.675341574495771, "grad_norm": 0.8511074781417847, "learning_rate": 3.5662071972163466e-06, "loss": 0.0948, "step": 56650 }, { "epoch": 1.6756373099899449, "grad_norm": 1.013951063156128, "learning_rate": 3.566080507297407e-06, "loss": 0.0767, "step": 56660 }, { "epoch": 1.675933045484119, "grad_norm": 0.8856264352798462, "learning_rate": 3.5659538173784678e-06, "loss": 0.0953, "step": 56670 }, { "epoch": 1.676228780978293, "grad_norm": 1.6635353565216064, "learning_rate": 3.5658271274595286e-06, "loss": 0.0887, "step": 56680 }, { "epoch": 1.6765245164724671, "grad_norm": 1.5583549737930298, "learning_rate": 3.5657004375405894e-06, "loss": 0.1017, "step": 56690 }, { "epoch": 1.676820251966641, "grad_norm": 0.7316295504570007, "learning_rate": 3.5655737476216497e-06, "loss": 0.077, "step": 56700 }, { "epoch": 1.677115987460815, "grad_norm": 1.2496378421783447, "learning_rate": 3.5654470577027105e-06, "loss": 0.076, "step": 56710 }, { "epoch": 1.677411722954989, "grad_norm": 1.144723653793335, "learning_rate": 3.565320367783771e-06, "loss": 0.0764, "step": 56720 }, { "epoch": 1.677707458449163, "grad_norm": 1.4285808801651, "learning_rate": 3.5651936778648317e-06, "loss": 0.0873, "step": 56730 }, { "epoch": 1.678003193943337, "grad_norm": 0.7283560633659363, "learning_rate": 3.565066987945892e-06, "loss": 0.0789, "step": 56740 }, { "epoch": 1.6782989294375112, "grad_norm": 0.8491941690444946, "learning_rate": 3.564940298026953e-06, "loss": 0.0882, "step": 56750 }, { "epoch": 1.6785946649316852, "grad_norm": 0.7982632517814636, "learning_rate": 3.5648136081080136e-06, "loss": 0.0796, "step": 56760 }, { "epoch": 1.6788904004258591, "grad_norm": 0.7306171655654907, "learning_rate": 3.5646869181890744e-06, "loss": 0.0842, "step": 56770 }, { "epoch": 1.679186135920033, "grad_norm": 0.6186901926994324, "learning_rate": 3.5645602282701348e-06, "loss": 0.1043, "step": 56780 }, { "epoch": 1.679481871414207, "grad_norm": 0.7139551639556885, "learning_rate": 3.564433538351195e-06, "loss": 0.0714, "step": 56790 }, { "epoch": 1.6797776069083812, "grad_norm": 0.9436407089233398, "learning_rate": 3.564306848432256e-06, "loss": 0.0725, "step": 56800 }, { "epoch": 1.6800733424025551, "grad_norm": 0.6446757316589355, "learning_rate": 3.5641801585133163e-06, "loss": 0.08, "step": 56810 }, { "epoch": 1.6803690778967293, "grad_norm": 0.8517657518386841, "learning_rate": 3.564053468594377e-06, "loss": 0.097, "step": 56820 }, { "epoch": 1.6806648133909032, "grad_norm": 0.8423631191253662, "learning_rate": 3.5639267786754374e-06, "loss": 0.0881, "step": 56830 }, { "epoch": 1.6809605488850772, "grad_norm": 0.8873953819274902, "learning_rate": 3.5638000887564987e-06, "loss": 0.0879, "step": 56840 }, { "epoch": 1.6812562843792511, "grad_norm": 0.7613838315010071, "learning_rate": 3.563673398837559e-06, "loss": 0.0877, "step": 56850 }, { "epoch": 1.681552019873425, "grad_norm": 3.051285743713379, "learning_rate": 3.56354670891862e-06, "loss": 0.0781, "step": 56860 }, { "epoch": 1.6818477553675992, "grad_norm": 0.9590145945549011, "learning_rate": 3.56342001899968e-06, "loss": 0.0915, "step": 56870 }, { "epoch": 1.6821434908617734, "grad_norm": 0.7922818660736084, "learning_rate": 3.563293329080741e-06, "loss": 0.1034, "step": 56880 }, { "epoch": 1.6824392263559473, "grad_norm": 0.9163803458213806, "learning_rate": 3.5631666391618013e-06, "loss": 0.0865, "step": 56890 }, { "epoch": 1.6827349618501213, "grad_norm": 0.6538141369819641, "learning_rate": 3.563039949242862e-06, "loss": 0.0854, "step": 56900 }, { "epoch": 1.6830306973442952, "grad_norm": 0.5766021013259888, "learning_rate": 3.5629132593239225e-06, "loss": 0.0567, "step": 56910 }, { "epoch": 1.6833264328384692, "grad_norm": 0.9336046576499939, "learning_rate": 3.5627865694049837e-06, "loss": 0.0759, "step": 56920 }, { "epoch": 1.6836221683326433, "grad_norm": 0.8719120025634766, "learning_rate": 3.562659879486044e-06, "loss": 0.0903, "step": 56930 }, { "epoch": 1.6839179038268173, "grad_norm": 0.592498779296875, "learning_rate": 3.562533189567105e-06, "loss": 0.0772, "step": 56940 }, { "epoch": 1.6842136393209914, "grad_norm": 1.0253825187683105, "learning_rate": 3.5624064996481652e-06, "loss": 0.083, "step": 56950 }, { "epoch": 1.6845093748151654, "grad_norm": 1.2793610095977783, "learning_rate": 3.562279809729226e-06, "loss": 0.069, "step": 56960 }, { "epoch": 1.6848051103093393, "grad_norm": 0.9788816571235657, "learning_rate": 3.5621531198102864e-06, "loss": 0.0931, "step": 56970 }, { "epoch": 1.6851008458035133, "grad_norm": 0.5295563340187073, "learning_rate": 3.562026429891347e-06, "loss": 0.085, "step": 56980 }, { "epoch": 1.6853965812976872, "grad_norm": 0.6905859112739563, "learning_rate": 3.5618997399724075e-06, "loss": 0.0889, "step": 56990 }, { "epoch": 1.6856923167918614, "grad_norm": 1.0310059785842896, "learning_rate": 3.5617730500534687e-06, "loss": 0.0875, "step": 57000 }, { "epoch": 1.6859880522860353, "grad_norm": 0.9467900395393372, "learning_rate": 3.561646360134529e-06, "loss": 0.0809, "step": 57010 }, { "epoch": 1.6862837877802095, "grad_norm": 0.8542195558547974, "learning_rate": 3.56151967021559e-06, "loss": 0.0844, "step": 57020 }, { "epoch": 1.6865795232743834, "grad_norm": 0.8006970882415771, "learning_rate": 3.5613929802966503e-06, "loss": 0.0761, "step": 57030 }, { "epoch": 1.6868752587685574, "grad_norm": 1.3008811473846436, "learning_rate": 3.561266290377711e-06, "loss": 0.0942, "step": 57040 }, { "epoch": 1.6871709942627313, "grad_norm": 0.8501468896865845, "learning_rate": 3.5611396004587714e-06, "loss": 0.0903, "step": 57050 }, { "epoch": 1.6874667297569053, "grad_norm": 1.1552948951721191, "learning_rate": 3.5610129105398322e-06, "loss": 0.0851, "step": 57060 }, { "epoch": 1.6877624652510794, "grad_norm": 0.6946707367897034, "learning_rate": 3.5608862206208926e-06, "loss": 0.0995, "step": 57070 }, { "epoch": 1.6880582007452536, "grad_norm": 1.1993489265441895, "learning_rate": 3.560759530701954e-06, "loss": 0.0921, "step": 57080 }, { "epoch": 1.6883539362394275, "grad_norm": 0.7138578295707703, "learning_rate": 3.560632840783014e-06, "loss": 0.0866, "step": 57090 }, { "epoch": 1.6886496717336015, "grad_norm": 0.9938045740127563, "learning_rate": 3.560506150864075e-06, "loss": 0.092, "step": 57100 }, { "epoch": 1.6889454072277754, "grad_norm": 1.7629824876785278, "learning_rate": 3.5603794609451353e-06, "loss": 0.0946, "step": 57110 }, { "epoch": 1.6892411427219494, "grad_norm": 0.7913803458213806, "learning_rate": 3.560252771026196e-06, "loss": 0.1104, "step": 57120 }, { "epoch": 1.6895368782161235, "grad_norm": 0.6277543902397156, "learning_rate": 3.5601260811072565e-06, "loss": 0.0826, "step": 57130 }, { "epoch": 1.6898326137102975, "grad_norm": 0.9572917222976685, "learning_rate": 3.5599993911883173e-06, "loss": 0.0976, "step": 57140 }, { "epoch": 1.6901283492044716, "grad_norm": 0.6457683444023132, "learning_rate": 3.5598727012693776e-06, "loss": 0.0712, "step": 57150 }, { "epoch": 1.6904240846986456, "grad_norm": 0.20858390629291534, "learning_rate": 3.559746011350439e-06, "loss": 0.0651, "step": 57160 }, { "epoch": 1.6907198201928195, "grad_norm": 1.1738625764846802, "learning_rate": 3.559619321431499e-06, "loss": 0.09, "step": 57170 }, { "epoch": 1.6910155556869935, "grad_norm": 0.8181134462356567, "learning_rate": 3.55949263151256e-06, "loss": 0.0788, "step": 57180 }, { "epoch": 1.6913112911811674, "grad_norm": 0.8127229809761047, "learning_rate": 3.5593659415936204e-06, "loss": 0.0975, "step": 57190 }, { "epoch": 1.6916070266753416, "grad_norm": 1.0125234127044678, "learning_rate": 3.5592392516746807e-06, "loss": 0.0852, "step": 57200 }, { "epoch": 1.6919027621695157, "grad_norm": 0.8029753565788269, "learning_rate": 3.5591125617557415e-06, "loss": 0.0689, "step": 57210 }, { "epoch": 1.6921984976636897, "grad_norm": 0.6501047015190125, "learning_rate": 3.558985871836802e-06, "loss": 0.0876, "step": 57220 }, { "epoch": 1.6924942331578636, "grad_norm": 0.718995213508606, "learning_rate": 3.5588591819178627e-06, "loss": 0.0874, "step": 57230 }, { "epoch": 1.6927899686520376, "grad_norm": 1.0256257057189941, "learning_rate": 3.5587324919989235e-06, "loss": 0.0938, "step": 57240 }, { "epoch": 1.6930857041462115, "grad_norm": 1.0988205671310425, "learning_rate": 3.5586058020799842e-06, "loss": 0.0864, "step": 57250 }, { "epoch": 1.6933814396403857, "grad_norm": 0.42915210127830505, "learning_rate": 3.5584791121610446e-06, "loss": 0.0677, "step": 57260 }, { "epoch": 1.6936771751345596, "grad_norm": 1.0207418203353882, "learning_rate": 3.5583524222421054e-06, "loss": 0.1089, "step": 57270 }, { "epoch": 1.6939729106287338, "grad_norm": 1.2094311714172363, "learning_rate": 3.5582257323231658e-06, "loss": 0.0841, "step": 57280 }, { "epoch": 1.6942686461229077, "grad_norm": 0.940296471118927, "learning_rate": 3.5580990424042266e-06, "loss": 0.0757, "step": 57290 }, { "epoch": 1.6945643816170817, "grad_norm": 0.8142160773277283, "learning_rate": 3.557972352485287e-06, "loss": 0.0961, "step": 57300 }, { "epoch": 1.6948601171112556, "grad_norm": 0.7131302952766418, "learning_rate": 3.5578456625663477e-06, "loss": 0.0705, "step": 57310 }, { "epoch": 1.6951558526054296, "grad_norm": 0.8396185040473938, "learning_rate": 3.5577189726474085e-06, "loss": 0.0795, "step": 57320 }, { "epoch": 1.6954515880996037, "grad_norm": 1.1084425449371338, "learning_rate": 3.5575922827284693e-06, "loss": 0.0959, "step": 57330 }, { "epoch": 1.6957473235937779, "grad_norm": 0.7992724180221558, "learning_rate": 3.5574655928095297e-06, "loss": 0.0853, "step": 57340 }, { "epoch": 1.6960430590879518, "grad_norm": 1.1595853567123413, "learning_rate": 3.5573389028905904e-06, "loss": 0.0839, "step": 57350 }, { "epoch": 1.6963387945821258, "grad_norm": 1.0713709592819214, "learning_rate": 3.557212212971651e-06, "loss": 0.0806, "step": 57360 }, { "epoch": 1.6966345300762997, "grad_norm": 0.8201619982719421, "learning_rate": 3.5570855230527116e-06, "loss": 0.0873, "step": 57370 }, { "epoch": 1.6969302655704737, "grad_norm": 0.8549246191978455, "learning_rate": 3.556958833133772e-06, "loss": 0.0773, "step": 57380 }, { "epoch": 1.6972260010646478, "grad_norm": 0.703526496887207, "learning_rate": 3.5568321432148328e-06, "loss": 0.0848, "step": 57390 }, { "epoch": 1.6975217365588218, "grad_norm": 0.5027283430099487, "learning_rate": 3.5567054532958935e-06, "loss": 0.0725, "step": 57400 }, { "epoch": 1.697817472052996, "grad_norm": 0.6857689619064331, "learning_rate": 3.5565787633769543e-06, "loss": 0.0846, "step": 57410 }, { "epoch": 1.6981132075471699, "grad_norm": 0.6404552459716797, "learning_rate": 3.5564520734580147e-06, "loss": 0.0934, "step": 57420 }, { "epoch": 1.6984089430413438, "grad_norm": 1.1216570138931274, "learning_rate": 3.5563253835390755e-06, "loss": 0.0841, "step": 57430 }, { "epoch": 1.6987046785355178, "grad_norm": 0.9062824845314026, "learning_rate": 3.556198693620136e-06, "loss": 0.0613, "step": 57440 }, { "epoch": 1.6990004140296917, "grad_norm": 0.9927075505256653, "learning_rate": 3.5560720037011966e-06, "loss": 0.0872, "step": 57450 }, { "epoch": 1.6992961495238659, "grad_norm": 1.228332281112671, "learning_rate": 3.555945313782257e-06, "loss": 0.069, "step": 57460 }, { "epoch": 1.6995918850180398, "grad_norm": 1.3537527322769165, "learning_rate": 3.555818623863318e-06, "loss": 0.0969, "step": 57470 }, { "epoch": 1.699887620512214, "grad_norm": 0.928043782711029, "learning_rate": 3.5556919339443786e-06, "loss": 0.0866, "step": 57480 }, { "epoch": 1.700183356006388, "grad_norm": 0.9967954158782959, "learning_rate": 3.5555652440254394e-06, "loss": 0.0933, "step": 57490 }, { "epoch": 1.7004790915005619, "grad_norm": 0.6295350790023804, "learning_rate": 3.5554385541064998e-06, "loss": 0.0727, "step": 57500 }, { "epoch": 1.7007748269947358, "grad_norm": 0.7062523365020752, "learning_rate": 3.5553118641875605e-06, "loss": 0.0744, "step": 57510 }, { "epoch": 1.7010705624889098, "grad_norm": 1.1776115894317627, "learning_rate": 3.555185174268621e-06, "loss": 0.1039, "step": 57520 }, { "epoch": 1.701366297983084, "grad_norm": 1.1366214752197266, "learning_rate": 3.5550584843496817e-06, "loss": 0.0965, "step": 57530 }, { "epoch": 1.701662033477258, "grad_norm": 0.8132131695747375, "learning_rate": 3.554931794430742e-06, "loss": 0.0924, "step": 57540 }, { "epoch": 1.701957768971432, "grad_norm": 1.1500548124313354, "learning_rate": 3.554805104511803e-06, "loss": 0.0873, "step": 57550 }, { "epoch": 1.702253504465606, "grad_norm": 0.8415878415107727, "learning_rate": 3.5546784145928636e-06, "loss": 0.0715, "step": 57560 }, { "epoch": 1.70254923995978, "grad_norm": 1.101374864578247, "learning_rate": 3.5545517246739244e-06, "loss": 0.0851, "step": 57570 }, { "epoch": 1.7028449754539539, "grad_norm": 0.8525958061218262, "learning_rate": 3.554425034754985e-06, "loss": 0.0996, "step": 57580 }, { "epoch": 1.703140710948128, "grad_norm": 0.606904923915863, "learning_rate": 3.5542983448360456e-06, "loss": 0.0811, "step": 57590 }, { "epoch": 1.703436446442302, "grad_norm": 0.7093179821968079, "learning_rate": 3.554171654917106e-06, "loss": 0.083, "step": 57600 }, { "epoch": 1.7037321819364761, "grad_norm": 0.6412045955657959, "learning_rate": 3.5540449649981663e-06, "loss": 0.0755, "step": 57610 }, { "epoch": 1.70402791743065, "grad_norm": 0.6475026607513428, "learning_rate": 3.553918275079227e-06, "loss": 0.0757, "step": 57620 }, { "epoch": 1.704323652924824, "grad_norm": 1.0802171230316162, "learning_rate": 3.5537915851602875e-06, "loss": 0.0906, "step": 57630 }, { "epoch": 1.704619388418998, "grad_norm": 0.4969923496246338, "learning_rate": 3.5536648952413487e-06, "loss": 0.0659, "step": 57640 }, { "epoch": 1.704915123913172, "grad_norm": 0.720872700214386, "learning_rate": 3.553538205322409e-06, "loss": 0.081, "step": 57650 }, { "epoch": 1.705210859407346, "grad_norm": 0.47883182764053345, "learning_rate": 3.55341151540347e-06, "loss": 0.0616, "step": 57660 }, { "epoch": 1.7055065949015202, "grad_norm": 0.7804462909698486, "learning_rate": 3.55328482548453e-06, "loss": 0.07, "step": 57670 }, { "epoch": 1.7058023303956942, "grad_norm": 1.7666484117507935, "learning_rate": 3.553158135565591e-06, "loss": 0.099, "step": 57680 }, { "epoch": 1.7060980658898681, "grad_norm": 0.783775269985199, "learning_rate": 3.5530314456466514e-06, "loss": 0.0728, "step": 57690 }, { "epoch": 1.706393801384042, "grad_norm": 0.7844205498695374, "learning_rate": 3.552904755727712e-06, "loss": 0.0707, "step": 57700 }, { "epoch": 1.706689536878216, "grad_norm": 0.9240407943725586, "learning_rate": 3.5527780658087725e-06, "loss": 0.0776, "step": 57710 }, { "epoch": 1.7069852723723902, "grad_norm": 1.1898066997528076, "learning_rate": 3.5526513758898337e-06, "loss": 0.0872, "step": 57720 }, { "epoch": 1.7072810078665641, "grad_norm": 0.49173346161842346, "learning_rate": 3.552524685970894e-06, "loss": 0.0905, "step": 57730 }, { "epoch": 1.7075767433607383, "grad_norm": 0.824028491973877, "learning_rate": 3.552397996051955e-06, "loss": 0.0867, "step": 57740 }, { "epoch": 1.7078724788549122, "grad_norm": 0.7977532148361206, "learning_rate": 3.5522713061330153e-06, "loss": 0.0863, "step": 57750 }, { "epoch": 1.7081682143490862, "grad_norm": 1.7764146327972412, "learning_rate": 3.552144616214076e-06, "loss": 0.0746, "step": 57760 }, { "epoch": 1.70846394984326, "grad_norm": 0.8749362826347351, "learning_rate": 3.5520179262951364e-06, "loss": 0.0839, "step": 57770 }, { "epoch": 1.708759685337434, "grad_norm": 0.9535995125770569, "learning_rate": 3.551891236376197e-06, "loss": 0.0887, "step": 57780 }, { "epoch": 1.7090554208316082, "grad_norm": 1.077526330947876, "learning_rate": 3.5517645464572576e-06, "loss": 0.0795, "step": 57790 }, { "epoch": 1.7093511563257824, "grad_norm": 0.8845226168632507, "learning_rate": 3.5516378565383188e-06, "loss": 0.0912, "step": 57800 }, { "epoch": 1.7096468918199563, "grad_norm": 0.5262885689735413, "learning_rate": 3.551511166619379e-06, "loss": 0.065, "step": 57810 }, { "epoch": 1.7099426273141303, "grad_norm": 0.7941120862960815, "learning_rate": 3.55138447670044e-06, "loss": 0.0779, "step": 57820 }, { "epoch": 1.7102383628083042, "grad_norm": 0.8634751439094543, "learning_rate": 3.5512577867815003e-06, "loss": 0.0957, "step": 57830 }, { "epoch": 1.7105340983024782, "grad_norm": 0.6680055856704712, "learning_rate": 3.551131096862561e-06, "loss": 0.0873, "step": 57840 }, { "epoch": 1.7108298337966523, "grad_norm": 0.7620048522949219, "learning_rate": 3.5510044069436215e-06, "loss": 0.0812, "step": 57850 }, { "epoch": 1.7111255692908263, "grad_norm": 0.9044471383094788, "learning_rate": 3.5508777170246822e-06, "loss": 0.0818, "step": 57860 }, { "epoch": 1.7114213047850004, "grad_norm": 0.5712133049964905, "learning_rate": 3.5507510271057426e-06, "loss": 0.063, "step": 57870 }, { "epoch": 1.7117170402791744, "grad_norm": 1.1428683996200562, "learning_rate": 3.550624337186804e-06, "loss": 0.0949, "step": 57880 }, { "epoch": 1.7120127757733483, "grad_norm": 0.44320064783096313, "learning_rate": 3.550497647267864e-06, "loss": 0.0835, "step": 57890 }, { "epoch": 1.7123085112675223, "grad_norm": 0.5881098508834839, "learning_rate": 3.550370957348925e-06, "loss": 0.0646, "step": 57900 }, { "epoch": 1.7126042467616962, "grad_norm": 0.7769665718078613, "learning_rate": 3.5502442674299853e-06, "loss": 0.0822, "step": 57910 }, { "epoch": 1.7128999822558704, "grad_norm": 0.6557688117027283, "learning_rate": 3.550117577511046e-06, "loss": 0.0925, "step": 57920 }, { "epoch": 1.7131957177500443, "grad_norm": 0.8685073256492615, "learning_rate": 3.5499908875921065e-06, "loss": 0.0846, "step": 57930 }, { "epoch": 1.7134914532442185, "grad_norm": 0.9181568622589111, "learning_rate": 3.5498641976731673e-06, "loss": 0.0896, "step": 57940 }, { "epoch": 1.7137871887383924, "grad_norm": 0.9169096946716309, "learning_rate": 3.5497375077542277e-06, "loss": 0.0744, "step": 57950 }, { "epoch": 1.7140829242325664, "grad_norm": 0.7293540239334106, "learning_rate": 3.549610817835289e-06, "loss": 0.0719, "step": 57960 }, { "epoch": 1.7143786597267403, "grad_norm": 0.8324088454246521, "learning_rate": 3.5494841279163492e-06, "loss": 0.0646, "step": 57970 }, { "epoch": 1.7146743952209142, "grad_norm": 0.5333375930786133, "learning_rate": 3.54935743799741e-06, "loss": 0.0708, "step": 57980 }, { "epoch": 1.7149701307150884, "grad_norm": 1.1601321697235107, "learning_rate": 3.5492307480784704e-06, "loss": 0.0867, "step": 57990 }, { "epoch": 1.7152658662092626, "grad_norm": 0.7838901281356812, "learning_rate": 3.549104058159531e-06, "loss": 0.0694, "step": 58000 }, { "epoch": 1.7155616017034365, "grad_norm": 0.3868686258792877, "learning_rate": 3.5489773682405915e-06, "loss": 0.0783, "step": 58010 }, { "epoch": 1.7158573371976105, "grad_norm": 0.5214998722076416, "learning_rate": 3.5488506783216523e-06, "loss": 0.0697, "step": 58020 }, { "epoch": 1.7161530726917844, "grad_norm": 0.9911348223686218, "learning_rate": 3.5487239884027127e-06, "loss": 0.0886, "step": 58030 }, { "epoch": 1.7164488081859584, "grad_norm": 1.0121747255325317, "learning_rate": 3.5485972984837735e-06, "loss": 0.1089, "step": 58040 }, { "epoch": 1.7167445436801325, "grad_norm": 1.041353702545166, "learning_rate": 3.5484706085648343e-06, "loss": 0.0892, "step": 58050 }, { "epoch": 1.7170402791743065, "grad_norm": 0.6091114282608032, "learning_rate": 3.5483439186458946e-06, "loss": 0.0743, "step": 58060 }, { "epoch": 1.7173360146684806, "grad_norm": 1.1012128591537476, "learning_rate": 3.5482172287269554e-06, "loss": 0.0925, "step": 58070 }, { "epoch": 1.7176317501626546, "grad_norm": 0.7720889449119568, "learning_rate": 3.548090538808016e-06, "loss": 0.0889, "step": 58080 }, { "epoch": 1.7179274856568285, "grad_norm": 1.0414626598358154, "learning_rate": 3.5479638488890766e-06, "loss": 0.0869, "step": 58090 }, { "epoch": 1.7182232211510025, "grad_norm": 0.6585509181022644, "learning_rate": 3.547837158970137e-06, "loss": 0.0736, "step": 58100 }, { "epoch": 1.7185189566451764, "grad_norm": 1.434385061264038, "learning_rate": 3.5477104690511977e-06, "loss": 0.0807, "step": 58110 }, { "epoch": 1.7188146921393506, "grad_norm": 0.8828415274620056, "learning_rate": 3.5475837791322585e-06, "loss": 0.0665, "step": 58120 }, { "epoch": 1.7191104276335247, "grad_norm": 0.7520617842674255, "learning_rate": 3.5474570892133193e-06, "loss": 0.0873, "step": 58130 }, { "epoch": 1.7194061631276987, "grad_norm": 1.2079060077667236, "learning_rate": 3.5473303992943797e-06, "loss": 0.098, "step": 58140 }, { "epoch": 1.7197018986218726, "grad_norm": 0.8193935751914978, "learning_rate": 3.5472037093754405e-06, "loss": 0.0779, "step": 58150 }, { "epoch": 1.7199976341160466, "grad_norm": 0.5356658101081848, "learning_rate": 3.547077019456501e-06, "loss": 0.0721, "step": 58160 }, { "epoch": 1.7202933696102205, "grad_norm": 0.6454612016677856, "learning_rate": 3.5469503295375616e-06, "loss": 0.0726, "step": 58170 }, { "epoch": 1.7205891051043947, "grad_norm": 0.6399930715560913, "learning_rate": 3.546823639618622e-06, "loss": 0.1032, "step": 58180 }, { "epoch": 1.7208848405985686, "grad_norm": 0.9330269694328308, "learning_rate": 3.546696949699683e-06, "loss": 0.0949, "step": 58190 }, { "epoch": 1.7211805760927428, "grad_norm": 0.5472079515457153, "learning_rate": 3.5465702597807436e-06, "loss": 0.0689, "step": 58200 }, { "epoch": 1.7214763115869167, "grad_norm": 0.5650675296783447, "learning_rate": 3.5464435698618044e-06, "loss": 0.0714, "step": 58210 }, { "epoch": 1.7217720470810907, "grad_norm": 0.7697626948356628, "learning_rate": 3.5463168799428647e-06, "loss": 0.1054, "step": 58220 }, { "epoch": 1.7220677825752646, "grad_norm": 0.7874590158462524, "learning_rate": 3.5461901900239255e-06, "loss": 0.094, "step": 58230 }, { "epoch": 1.7223635180694385, "grad_norm": 1.3492246866226196, "learning_rate": 3.546063500104986e-06, "loss": 0.0973, "step": 58240 }, { "epoch": 1.7226592535636127, "grad_norm": 0.7442793250083923, "learning_rate": 3.5459368101860467e-06, "loss": 0.0829, "step": 58250 }, { "epoch": 1.7229549890577869, "grad_norm": 0.5477451086044312, "learning_rate": 3.545810120267107e-06, "loss": 0.0651, "step": 58260 }, { "epoch": 1.7232507245519608, "grad_norm": 1.3318232297897339, "learning_rate": 3.545683430348168e-06, "loss": 0.1074, "step": 58270 }, { "epoch": 1.7235464600461348, "grad_norm": 0.8975790143013, "learning_rate": 3.5455567404292286e-06, "loss": 0.0848, "step": 58280 }, { "epoch": 1.7238421955403087, "grad_norm": 0.7174981236457825, "learning_rate": 3.5454300505102894e-06, "loss": 0.0876, "step": 58290 }, { "epoch": 1.7241379310344827, "grad_norm": 0.817421019077301, "learning_rate": 3.5453033605913498e-06, "loss": 0.0784, "step": 58300 }, { "epoch": 1.7244336665286568, "grad_norm": 0.8435320258140564, "learning_rate": 3.5451766706724106e-06, "loss": 0.0859, "step": 58310 }, { "epoch": 1.7247294020228308, "grad_norm": 0.6126177906990051, "learning_rate": 3.545049980753471e-06, "loss": 0.1049, "step": 58320 }, { "epoch": 1.725025137517005, "grad_norm": 0.7646175622940063, "learning_rate": 3.5449232908345317e-06, "loss": 0.0816, "step": 58330 }, { "epoch": 1.7253208730111789, "grad_norm": 1.0356355905532837, "learning_rate": 3.544796600915592e-06, "loss": 0.082, "step": 58340 }, { "epoch": 1.7256166085053528, "grad_norm": 0.27025315165519714, "learning_rate": 3.544669910996653e-06, "loss": 0.0688, "step": 58350 }, { "epoch": 1.7259123439995268, "grad_norm": 0.8491237759590149, "learning_rate": 3.5445432210777137e-06, "loss": 0.0743, "step": 58360 }, { "epoch": 1.7262080794937007, "grad_norm": 1.1219044923782349, "learning_rate": 3.5444165311587745e-06, "loss": 0.1021, "step": 58370 }, { "epoch": 1.7265038149878749, "grad_norm": 0.7165444493293762, "learning_rate": 3.544289841239835e-06, "loss": 0.0787, "step": 58380 }, { "epoch": 1.7267995504820488, "grad_norm": 0.529603898525238, "learning_rate": 3.5441631513208956e-06, "loss": 0.0963, "step": 58390 }, { "epoch": 1.727095285976223, "grad_norm": 0.7897764444351196, "learning_rate": 3.544036461401956e-06, "loss": 0.0832, "step": 58400 }, { "epoch": 1.727391021470397, "grad_norm": 0.4685709476470947, "learning_rate": 3.5439097714830168e-06, "loss": 0.0702, "step": 58410 }, { "epoch": 1.7276867569645709, "grad_norm": 0.928523600101471, "learning_rate": 3.543783081564077e-06, "loss": 0.0936, "step": 58420 }, { "epoch": 1.7279824924587448, "grad_norm": 0.6494885683059692, "learning_rate": 3.543656391645138e-06, "loss": 0.0796, "step": 58430 }, { "epoch": 1.7282782279529187, "grad_norm": 0.7596092820167542, "learning_rate": 3.5435297017261987e-06, "loss": 0.0815, "step": 58440 }, { "epoch": 1.728573963447093, "grad_norm": 0.7088961005210876, "learning_rate": 3.543403011807259e-06, "loss": 0.0903, "step": 58450 }, { "epoch": 1.728869698941267, "grad_norm": 0.7153450846672058, "learning_rate": 3.54327632188832e-06, "loss": 0.0664, "step": 58460 }, { "epoch": 1.729165434435441, "grad_norm": 1.3126622438430786, "learning_rate": 3.5431496319693802e-06, "loss": 0.082, "step": 58470 }, { "epoch": 1.729461169929615, "grad_norm": 0.7029448747634888, "learning_rate": 3.543022942050441e-06, "loss": 0.096, "step": 58480 }, { "epoch": 1.729756905423789, "grad_norm": 0.8772305846214294, "learning_rate": 3.5428962521315014e-06, "loss": 0.0737, "step": 58490 }, { "epoch": 1.7300526409179628, "grad_norm": 0.9166029691696167, "learning_rate": 3.542769562212562e-06, "loss": 0.0849, "step": 58500 }, { "epoch": 1.730348376412137, "grad_norm": 0.5925690531730652, "learning_rate": 3.5426428722936225e-06, "loss": 0.0742, "step": 58510 }, { "epoch": 1.730644111906311, "grad_norm": 0.6419707536697388, "learning_rate": 3.5425161823746838e-06, "loss": 0.0838, "step": 58520 }, { "epoch": 1.7309398474004851, "grad_norm": 0.9499370455741882, "learning_rate": 3.542389492455744e-06, "loss": 0.0888, "step": 58530 }, { "epoch": 1.731235582894659, "grad_norm": 1.2788583040237427, "learning_rate": 3.542262802536805e-06, "loss": 0.0955, "step": 58540 }, { "epoch": 1.731531318388833, "grad_norm": 0.7605754137039185, "learning_rate": 3.5421361126178653e-06, "loss": 0.0807, "step": 58550 }, { "epoch": 1.731827053883007, "grad_norm": 0.7595532536506653, "learning_rate": 3.542009422698926e-06, "loss": 0.0588, "step": 58560 }, { "epoch": 1.732122789377181, "grad_norm": 0.8054291605949402, "learning_rate": 3.5418827327799864e-06, "loss": 0.0816, "step": 58570 }, { "epoch": 1.732418524871355, "grad_norm": 0.7793862819671631, "learning_rate": 3.5417560428610472e-06, "loss": 0.097, "step": 58580 }, { "epoch": 1.7327142603655292, "grad_norm": 0.9164230823516846, "learning_rate": 3.5416293529421076e-06, "loss": 0.0849, "step": 58590 }, { "epoch": 1.7330099958597032, "grad_norm": 0.9330893754959106, "learning_rate": 3.541502663023169e-06, "loss": 0.0877, "step": 58600 }, { "epoch": 1.733305731353877, "grad_norm": 0.703241765499115, "learning_rate": 3.541375973104229e-06, "loss": 0.0643, "step": 58610 }, { "epoch": 1.733601466848051, "grad_norm": 0.7434529662132263, "learning_rate": 3.54124928318529e-06, "loss": 0.0879, "step": 58620 }, { "epoch": 1.733897202342225, "grad_norm": 0.754725456237793, "learning_rate": 3.5411225932663503e-06, "loss": 0.0844, "step": 58630 }, { "epoch": 1.7341929378363992, "grad_norm": 1.0814095735549927, "learning_rate": 3.540995903347411e-06, "loss": 0.1047, "step": 58640 }, { "epoch": 1.734488673330573, "grad_norm": 0.7522170543670654, "learning_rate": 3.5408692134284715e-06, "loss": 0.0763, "step": 58650 }, { "epoch": 1.7347844088247473, "grad_norm": 0.5740485787391663, "learning_rate": 3.5407425235095323e-06, "loss": 0.0748, "step": 58660 }, { "epoch": 1.7350801443189212, "grad_norm": 1.2806802988052368, "learning_rate": 3.5406158335905926e-06, "loss": 0.0969, "step": 58670 }, { "epoch": 1.7353758798130952, "grad_norm": 1.1594558954238892, "learning_rate": 3.540489143671654e-06, "loss": 0.0921, "step": 58680 }, { "epoch": 1.735671615307269, "grad_norm": 0.6631406545639038, "learning_rate": 3.5403624537527142e-06, "loss": 0.1102, "step": 58690 }, { "epoch": 1.735967350801443, "grad_norm": 0.8330338001251221, "learning_rate": 3.540235763833775e-06, "loss": 0.0968, "step": 58700 }, { "epoch": 1.7362630862956172, "grad_norm": 1.1339401006698608, "learning_rate": 3.5401090739148354e-06, "loss": 0.0815, "step": 58710 }, { "epoch": 1.7365588217897914, "grad_norm": 1.108109474182129, "learning_rate": 3.539982383995896e-06, "loss": 0.0918, "step": 58720 }, { "epoch": 1.7368545572839653, "grad_norm": 1.2129371166229248, "learning_rate": 3.5398556940769565e-06, "loss": 0.0881, "step": 58730 }, { "epoch": 1.7371502927781393, "grad_norm": 0.7964450716972351, "learning_rate": 3.5397290041580173e-06, "loss": 0.0972, "step": 58740 }, { "epoch": 1.7374460282723132, "grad_norm": 0.9123765826225281, "learning_rate": 3.5396023142390777e-06, "loss": 0.0899, "step": 58750 }, { "epoch": 1.7377417637664871, "grad_norm": 0.5033811926841736, "learning_rate": 3.539475624320139e-06, "loss": 0.0754, "step": 58760 }, { "epoch": 1.7380374992606613, "grad_norm": 0.7744402885437012, "learning_rate": 3.5393489344011993e-06, "loss": 0.0872, "step": 58770 }, { "epoch": 1.7383332347548353, "grad_norm": 0.7080647349357605, "learning_rate": 3.53922224448226e-06, "loss": 0.0897, "step": 58780 }, { "epoch": 1.7386289702490094, "grad_norm": 0.7686471939086914, "learning_rate": 3.5390955545633204e-06, "loss": 0.1012, "step": 58790 }, { "epoch": 1.7389247057431834, "grad_norm": 0.5798013210296631, "learning_rate": 3.538968864644381e-06, "loss": 0.0882, "step": 58800 }, { "epoch": 1.7392204412373573, "grad_norm": 0.7650876641273499, "learning_rate": 3.5388421747254416e-06, "loss": 0.0694, "step": 58810 }, { "epoch": 1.7395161767315312, "grad_norm": 0.6751505136489868, "learning_rate": 3.5387154848065024e-06, "loss": 0.082, "step": 58820 }, { "epoch": 1.7398119122257052, "grad_norm": 0.7427133917808533, "learning_rate": 3.5385887948875627e-06, "loss": 0.0806, "step": 58830 }, { "epoch": 1.7401076477198794, "grad_norm": 0.9709773063659668, "learning_rate": 3.538462104968624e-06, "loss": 0.0832, "step": 58840 }, { "epoch": 1.7404033832140533, "grad_norm": 0.9091829657554626, "learning_rate": 3.5383354150496843e-06, "loss": 0.0742, "step": 58850 }, { "epoch": 1.7406991187082275, "grad_norm": 0.5003809928894043, "learning_rate": 3.5382087251307447e-06, "loss": 0.0689, "step": 58860 }, { "epoch": 1.7409948542024014, "grad_norm": 0.9045262336730957, "learning_rate": 3.5380820352118055e-06, "loss": 0.0791, "step": 58870 }, { "epoch": 1.7412905896965754, "grad_norm": 1.284106969833374, "learning_rate": 3.537955345292866e-06, "loss": 0.0872, "step": 58880 }, { "epoch": 1.7415863251907493, "grad_norm": 0.747464656829834, "learning_rate": 3.5378286553739266e-06, "loss": 0.0864, "step": 58890 }, { "epoch": 1.7418820606849232, "grad_norm": 0.5639634728431702, "learning_rate": 3.537701965454987e-06, "loss": 0.0767, "step": 58900 }, { "epoch": 1.7421777961790974, "grad_norm": 0.67939692735672, "learning_rate": 3.5375752755360478e-06, "loss": 0.0747, "step": 58910 }, { "epoch": 1.7424735316732716, "grad_norm": 1.180346131324768, "learning_rate": 3.5374485856171086e-06, "loss": 0.0834, "step": 58920 }, { "epoch": 1.7427692671674455, "grad_norm": 0.6633026599884033, "learning_rate": 3.5373218956981694e-06, "loss": 0.0836, "step": 58930 }, { "epoch": 1.7430650026616195, "grad_norm": 0.9967055916786194, "learning_rate": 3.5371952057792297e-06, "loss": 0.0931, "step": 58940 }, { "epoch": 1.7433607381557934, "grad_norm": 0.6780157685279846, "learning_rate": 3.5370685158602905e-06, "loss": 0.0882, "step": 58950 }, { "epoch": 1.7436564736499673, "grad_norm": 0.5722262263298035, "learning_rate": 3.536941825941351e-06, "loss": 0.0645, "step": 58960 }, { "epoch": 1.7439522091441415, "grad_norm": 1.4102402925491333, "learning_rate": 3.5368151360224117e-06, "loss": 0.0724, "step": 58970 }, { "epoch": 1.7442479446383155, "grad_norm": 0.757470965385437, "learning_rate": 3.536688446103472e-06, "loss": 0.0762, "step": 58980 }, { "epoch": 1.7445436801324896, "grad_norm": 0.5737081170082092, "learning_rate": 3.536561756184533e-06, "loss": 0.0796, "step": 58990 }, { "epoch": 1.7448394156266636, "grad_norm": 0.8708536028862, "learning_rate": 3.5364350662655936e-06, "loss": 0.0886, "step": 59000 }, { "epoch": 1.7451351511208375, "grad_norm": 0.7912960052490234, "learning_rate": 3.5363083763466544e-06, "loss": 0.0692, "step": 59010 }, { "epoch": 1.7454308866150114, "grad_norm": 0.9599847197532654, "learning_rate": 3.5361816864277148e-06, "loss": 0.0828, "step": 59020 }, { "epoch": 1.7457266221091854, "grad_norm": 1.1095032691955566, "learning_rate": 3.5360549965087756e-06, "loss": 0.0896, "step": 59030 }, { "epoch": 1.7460223576033596, "grad_norm": 0.8497794270515442, "learning_rate": 3.535928306589836e-06, "loss": 0.0858, "step": 59040 }, { "epoch": 1.7463180930975337, "grad_norm": 1.1925060749053955, "learning_rate": 3.5358016166708967e-06, "loss": 0.0703, "step": 59050 }, { "epoch": 1.7466138285917077, "grad_norm": 1.0523065328598022, "learning_rate": 3.535674926751957e-06, "loss": 0.0711, "step": 59060 }, { "epoch": 1.7469095640858816, "grad_norm": 0.8290107846260071, "learning_rate": 3.535548236833018e-06, "loss": 0.0725, "step": 59070 }, { "epoch": 1.7472052995800555, "grad_norm": 0.9945465922355652, "learning_rate": 3.5354215469140787e-06, "loss": 0.0967, "step": 59080 }, { "epoch": 1.7475010350742295, "grad_norm": 1.3024959564208984, "learning_rate": 3.5352948569951394e-06, "loss": 0.0877, "step": 59090 }, { "epoch": 1.7477967705684037, "grad_norm": 0.966683566570282, "learning_rate": 3.5351681670762e-06, "loss": 0.0797, "step": 59100 }, { "epoch": 1.7480925060625776, "grad_norm": 0.9733840823173523, "learning_rate": 3.5350414771572606e-06, "loss": 0.0831, "step": 59110 }, { "epoch": 1.7483882415567518, "grad_norm": 1.011269211769104, "learning_rate": 3.534914787238321e-06, "loss": 0.0723, "step": 59120 }, { "epoch": 1.7486839770509257, "grad_norm": 0.588123083114624, "learning_rate": 3.5347880973193818e-06, "loss": 0.0748, "step": 59130 }, { "epoch": 1.7489797125450997, "grad_norm": 0.6563487648963928, "learning_rate": 3.534661407400442e-06, "loss": 0.0891, "step": 59140 }, { "epoch": 1.7492754480392736, "grad_norm": 0.7260230183601379, "learning_rate": 3.534534717481503e-06, "loss": 0.0854, "step": 59150 }, { "epoch": 1.7495711835334475, "grad_norm": 0.4485577344894409, "learning_rate": 3.5344080275625637e-06, "loss": 0.0664, "step": 59160 }, { "epoch": 1.7498669190276217, "grad_norm": 0.6755760908126831, "learning_rate": 3.5342813376436245e-06, "loss": 0.0902, "step": 59170 }, { "epoch": 1.7501626545217959, "grad_norm": 0.6383517384529114, "learning_rate": 3.534154647724685e-06, "loss": 0.0831, "step": 59180 }, { "epoch": 1.7504583900159698, "grad_norm": 0.9901023507118225, "learning_rate": 3.5340279578057456e-06, "loss": 0.0902, "step": 59190 }, { "epoch": 1.7507541255101438, "grad_norm": 1.0007959604263306, "learning_rate": 3.533901267886806e-06, "loss": 0.0748, "step": 59200 }, { "epoch": 1.7510498610043177, "grad_norm": 0.8205969333648682, "learning_rate": 3.533774577967867e-06, "loss": 0.0588, "step": 59210 }, { "epoch": 1.7513455964984916, "grad_norm": 0.9056835174560547, "learning_rate": 3.533647888048927e-06, "loss": 0.0714, "step": 59220 }, { "epoch": 1.7516413319926658, "grad_norm": 1.3754526376724243, "learning_rate": 3.533521198129988e-06, "loss": 0.0884, "step": 59230 }, { "epoch": 1.7519370674868397, "grad_norm": 0.4660642147064209, "learning_rate": 3.5333945082110487e-06, "loss": 0.0776, "step": 59240 }, { "epoch": 1.752232802981014, "grad_norm": 0.37170255184173584, "learning_rate": 3.5332678182921095e-06, "loss": 0.0823, "step": 59250 }, { "epoch": 1.7525285384751879, "grad_norm": 0.6079521179199219, "learning_rate": 3.53314112837317e-06, "loss": 0.0731, "step": 59260 }, { "epoch": 1.7528242739693618, "grad_norm": 1.3736660480499268, "learning_rate": 3.5330144384542303e-06, "loss": 0.0911, "step": 59270 }, { "epoch": 1.7531200094635357, "grad_norm": 0.9543487429618835, "learning_rate": 3.532887748535291e-06, "loss": 0.103, "step": 59280 }, { "epoch": 1.7534157449577097, "grad_norm": 30.498903274536133, "learning_rate": 3.5327610586163514e-06, "loss": 0.0842, "step": 59290 }, { "epoch": 1.7537114804518839, "grad_norm": 0.7821425199508667, "learning_rate": 3.5326343686974122e-06, "loss": 0.0829, "step": 59300 }, { "epoch": 1.7540072159460578, "grad_norm": 0.5545873641967773, "learning_rate": 3.5325076787784726e-06, "loss": 0.0766, "step": 59310 }, { "epoch": 1.754302951440232, "grad_norm": 0.8519172072410583, "learning_rate": 3.532380988859534e-06, "loss": 0.0769, "step": 59320 }, { "epoch": 1.754598686934406, "grad_norm": 0.5321108102798462, "learning_rate": 3.532254298940594e-06, "loss": 0.0765, "step": 59330 }, { "epoch": 1.7548944224285798, "grad_norm": 0.9406210780143738, "learning_rate": 3.532127609021655e-06, "loss": 0.0928, "step": 59340 }, { "epoch": 1.7551901579227538, "grad_norm": 0.8334487080574036, "learning_rate": 3.5320009191027153e-06, "loss": 0.0716, "step": 59350 }, { "epoch": 1.7554858934169277, "grad_norm": 0.42692768573760986, "learning_rate": 3.531874229183776e-06, "loss": 0.0609, "step": 59360 }, { "epoch": 1.755781628911102, "grad_norm": 0.8677074313163757, "learning_rate": 3.5317475392648365e-06, "loss": 0.0816, "step": 59370 }, { "epoch": 1.756077364405276, "grad_norm": 1.3637157678604126, "learning_rate": 3.5316208493458973e-06, "loss": 0.1032, "step": 59380 }, { "epoch": 1.75637309989945, "grad_norm": 0.676956832408905, "learning_rate": 3.5314941594269576e-06, "loss": 0.0832, "step": 59390 }, { "epoch": 1.756668835393624, "grad_norm": 0.6483030319213867, "learning_rate": 3.531367469508019e-06, "loss": 0.0684, "step": 59400 }, { "epoch": 1.756964570887798, "grad_norm": 0.9309768676757812, "learning_rate": 3.531240779589079e-06, "loss": 0.0716, "step": 59410 }, { "epoch": 1.7572603063819718, "grad_norm": 1.1318070888519287, "learning_rate": 3.53111408967014e-06, "loss": 0.1163, "step": 59420 }, { "epoch": 1.757556041876146, "grad_norm": 0.9514037370681763, "learning_rate": 3.5309873997512004e-06, "loss": 0.1042, "step": 59430 }, { "epoch": 1.75785177737032, "grad_norm": 0.6584245562553406, "learning_rate": 3.530860709832261e-06, "loss": 0.0832, "step": 59440 }, { "epoch": 1.758147512864494, "grad_norm": 0.7422671914100647, "learning_rate": 3.5307340199133215e-06, "loss": 0.0845, "step": 59450 }, { "epoch": 1.758443248358668, "grad_norm": 0.3899489641189575, "learning_rate": 3.5306073299943823e-06, "loss": 0.0554, "step": 59460 }, { "epoch": 1.758738983852842, "grad_norm": 0.8685901165008545, "learning_rate": 3.5304806400754427e-06, "loss": 0.0808, "step": 59470 }, { "epoch": 1.759034719347016, "grad_norm": 0.7599400877952576, "learning_rate": 3.530353950156504e-06, "loss": 0.0776, "step": 59480 }, { "epoch": 1.7593304548411899, "grad_norm": 0.5837475657463074, "learning_rate": 3.5302272602375642e-06, "loss": 0.0703, "step": 59490 }, { "epoch": 1.759626190335364, "grad_norm": 1.2770041227340698, "learning_rate": 3.530100570318625e-06, "loss": 0.096, "step": 59500 }, { "epoch": 1.7599219258295382, "grad_norm": 0.887287974357605, "learning_rate": 3.5299738803996854e-06, "loss": 0.0704, "step": 59510 }, { "epoch": 1.7602176613237122, "grad_norm": 0.9840902090072632, "learning_rate": 3.529847190480746e-06, "loss": 0.0888, "step": 59520 }, { "epoch": 1.760513396817886, "grad_norm": 1.022793173789978, "learning_rate": 3.5297205005618066e-06, "loss": 0.0994, "step": 59530 }, { "epoch": 1.76080913231206, "grad_norm": 1.4549006223678589, "learning_rate": 3.5295938106428673e-06, "loss": 0.0851, "step": 59540 }, { "epoch": 1.761104867806234, "grad_norm": 1.1222808361053467, "learning_rate": 3.5294671207239277e-06, "loss": 0.0837, "step": 59550 }, { "epoch": 1.7614006033004082, "grad_norm": 0.8377749919891357, "learning_rate": 3.529340430804989e-06, "loss": 0.0814, "step": 59560 }, { "epoch": 1.761696338794582, "grad_norm": 0.8293507099151611, "learning_rate": 3.5292137408860493e-06, "loss": 0.0935, "step": 59570 }, { "epoch": 1.7619920742887563, "grad_norm": 0.7991289496421814, "learning_rate": 3.52908705096711e-06, "loss": 0.0733, "step": 59580 }, { "epoch": 1.7622878097829302, "grad_norm": 0.46035730838775635, "learning_rate": 3.5289603610481704e-06, "loss": 0.0924, "step": 59590 }, { "epoch": 1.7625835452771041, "grad_norm": 0.7491909265518188, "learning_rate": 3.5288336711292312e-06, "loss": 0.0754, "step": 59600 }, { "epoch": 1.762879280771278, "grad_norm": 0.6287092566490173, "learning_rate": 3.5287069812102916e-06, "loss": 0.0807, "step": 59610 }, { "epoch": 1.763175016265452, "grad_norm": 0.7440016269683838, "learning_rate": 3.5285802912913524e-06, "loss": 0.0718, "step": 59620 }, { "epoch": 1.7634707517596262, "grad_norm": 0.5239824652671814, "learning_rate": 3.5284536013724128e-06, "loss": 0.0816, "step": 59630 }, { "epoch": 1.7637664872538004, "grad_norm": 0.9105445146560669, "learning_rate": 3.528326911453474e-06, "loss": 0.0831, "step": 59640 }, { "epoch": 1.7640622227479743, "grad_norm": 0.782317042350769, "learning_rate": 3.5282002215345343e-06, "loss": 0.0802, "step": 59650 }, { "epoch": 1.7643579582421482, "grad_norm": 0.7782353758811951, "learning_rate": 3.528073531615595e-06, "loss": 0.0659, "step": 59660 }, { "epoch": 1.7646536937363222, "grad_norm": 1.2220537662506104, "learning_rate": 3.5279468416966555e-06, "loss": 0.0798, "step": 59670 }, { "epoch": 1.7649494292304961, "grad_norm": 1.0446032285690308, "learning_rate": 3.527820151777716e-06, "loss": 0.083, "step": 59680 }, { "epoch": 1.7652451647246703, "grad_norm": 0.6212196946144104, "learning_rate": 3.5276934618587766e-06, "loss": 0.091, "step": 59690 }, { "epoch": 1.7655409002188442, "grad_norm": 0.8917070031166077, "learning_rate": 3.527566771939837e-06, "loss": 0.0832, "step": 59700 }, { "epoch": 1.7658366357130184, "grad_norm": 0.7713705897331238, "learning_rate": 3.527440082020898e-06, "loss": 0.0656, "step": 59710 }, { "epoch": 1.7661323712071924, "grad_norm": 0.8408812880516052, "learning_rate": 3.5273133921019586e-06, "loss": 0.082, "step": 59720 }, { "epoch": 1.7664281067013663, "grad_norm": 1.1497360467910767, "learning_rate": 3.5271867021830194e-06, "loss": 0.0987, "step": 59730 }, { "epoch": 1.7667238421955402, "grad_norm": 1.1072444915771484, "learning_rate": 3.5270600122640797e-06, "loss": 0.0843, "step": 59740 }, { "epoch": 1.7670195776897142, "grad_norm": 0.7049185633659363, "learning_rate": 3.5269333223451405e-06, "loss": 0.078, "step": 59750 }, { "epoch": 1.7673153131838883, "grad_norm": 0.34444430470466614, "learning_rate": 3.526806632426201e-06, "loss": 0.0645, "step": 59760 }, { "epoch": 1.7676110486780623, "grad_norm": 1.2714674472808838, "learning_rate": 3.5266799425072617e-06, "loss": 0.0851, "step": 59770 }, { "epoch": 1.7679067841722365, "grad_norm": 0.8275915384292603, "learning_rate": 3.526553252588322e-06, "loss": 0.0912, "step": 59780 }, { "epoch": 1.7682025196664104, "grad_norm": 1.0933812856674194, "learning_rate": 3.526426562669383e-06, "loss": 0.0848, "step": 59790 }, { "epoch": 1.7684982551605843, "grad_norm": 0.928281843662262, "learning_rate": 3.5262998727504436e-06, "loss": 0.0953, "step": 59800 }, { "epoch": 1.7687939906547583, "grad_norm": 0.44684186577796936, "learning_rate": 3.5261731828315044e-06, "loss": 0.0644, "step": 59810 }, { "epoch": 1.7690897261489322, "grad_norm": 0.8975716829299927, "learning_rate": 3.526046492912565e-06, "loss": 0.0852, "step": 59820 }, { "epoch": 1.7693854616431064, "grad_norm": 0.5089813470840454, "learning_rate": 3.5259198029936256e-06, "loss": 0.084, "step": 59830 }, { "epoch": 1.7696811971372806, "grad_norm": 0.9271133542060852, "learning_rate": 3.525793113074686e-06, "loss": 0.0809, "step": 59840 }, { "epoch": 1.7699769326314545, "grad_norm": 1.134665846824646, "learning_rate": 3.5256664231557467e-06, "loss": 0.0803, "step": 59850 }, { "epoch": 1.7702726681256284, "grad_norm": 0.6465706825256348, "learning_rate": 3.525539733236807e-06, "loss": 0.0788, "step": 59860 }, { "epoch": 1.7705684036198024, "grad_norm": 0.6241709589958191, "learning_rate": 3.525413043317868e-06, "loss": 0.0761, "step": 59870 }, { "epoch": 1.7708641391139763, "grad_norm": 0.6521829962730408, "learning_rate": 3.5252863533989287e-06, "loss": 0.0995, "step": 59880 }, { "epoch": 1.7711598746081505, "grad_norm": 0.7755852341651917, "learning_rate": 3.5251596634799895e-06, "loss": 0.0911, "step": 59890 }, { "epoch": 1.7714556101023244, "grad_norm": 1.114369511604309, "learning_rate": 3.52503297356105e-06, "loss": 0.0838, "step": 59900 }, { "epoch": 1.7717513455964986, "grad_norm": 0.6133168339729309, "learning_rate": 3.5249062836421106e-06, "loss": 0.0635, "step": 59910 }, { "epoch": 1.7720470810906725, "grad_norm": 0.9458000659942627, "learning_rate": 3.524779593723171e-06, "loss": 0.0723, "step": 59920 }, { "epoch": 1.7723428165848465, "grad_norm": 0.7322225570678711, "learning_rate": 3.5246529038042318e-06, "loss": 0.1098, "step": 59930 }, { "epoch": 1.7726385520790204, "grad_norm": 0.5360817313194275, "learning_rate": 3.524526213885292e-06, "loss": 0.0815, "step": 59940 }, { "epoch": 1.7729342875731944, "grad_norm": 0.5056047439575195, "learning_rate": 3.524399523966353e-06, "loss": 0.0695, "step": 59950 }, { "epoch": 1.7732300230673685, "grad_norm": 1.392610788345337, "learning_rate": 3.5242728340474137e-06, "loss": 0.0758, "step": 59960 }, { "epoch": 1.7735257585615427, "grad_norm": 0.8097618818283081, "learning_rate": 3.5241461441284745e-06, "loss": 0.075, "step": 59970 }, { "epoch": 1.7738214940557167, "grad_norm": 0.7372820973396301, "learning_rate": 3.524019454209535e-06, "loss": 0.0908, "step": 59980 }, { "epoch": 1.7741172295498906, "grad_norm": 0.860235333442688, "learning_rate": 3.5238927642905957e-06, "loss": 0.0955, "step": 59990 }, { "epoch": 1.7744129650440645, "grad_norm": 0.6866289973258972, "learning_rate": 3.523766074371656e-06, "loss": 0.072, "step": 60000 }, { "epoch": 1.7747087005382385, "grad_norm": 0.7329927086830139, "learning_rate": 3.523639384452717e-06, "loss": 0.0845, "step": 60010 }, { "epoch": 1.7750044360324126, "grad_norm": 0.6807065010070801, "learning_rate": 3.523512694533777e-06, "loss": 0.0875, "step": 60020 }, { "epoch": 1.7753001715265866, "grad_norm": 1.0049395561218262, "learning_rate": 3.523386004614838e-06, "loss": 0.0826, "step": 60030 }, { "epoch": 1.7755959070207608, "grad_norm": 1.1398162841796875, "learning_rate": 3.5232593146958988e-06, "loss": 0.0968, "step": 60040 }, { "epoch": 1.7758916425149347, "grad_norm": 0.7833079695701599, "learning_rate": 3.5231326247769596e-06, "loss": 0.0847, "step": 60050 }, { "epoch": 1.7761873780091086, "grad_norm": 0.8126233220100403, "learning_rate": 3.52300593485802e-06, "loss": 0.0847, "step": 60060 }, { "epoch": 1.7764831135032826, "grad_norm": 0.5952963829040527, "learning_rate": 3.5228792449390807e-06, "loss": 0.085, "step": 60070 }, { "epoch": 1.7767788489974565, "grad_norm": 0.8784690499305725, "learning_rate": 3.522752555020141e-06, "loss": 0.0892, "step": 60080 }, { "epoch": 1.7770745844916307, "grad_norm": 0.5641751289367676, "learning_rate": 3.522625865101202e-06, "loss": 0.0806, "step": 60090 }, { "epoch": 1.7773703199858049, "grad_norm": 0.9957653284072876, "learning_rate": 3.5224991751822622e-06, "loss": 0.0753, "step": 60100 }, { "epoch": 1.7776660554799788, "grad_norm": 0.8756179213523865, "learning_rate": 3.5223724852633226e-06, "loss": 0.0829, "step": 60110 }, { "epoch": 1.7779617909741527, "grad_norm": 0.9119640588760376, "learning_rate": 3.522245795344384e-06, "loss": 0.0933, "step": 60120 }, { "epoch": 1.7782575264683267, "grad_norm": 0.8627129793167114, "learning_rate": 3.522119105425444e-06, "loss": 0.0973, "step": 60130 }, { "epoch": 1.7785532619625006, "grad_norm": 0.7863043546676636, "learning_rate": 3.521992415506505e-06, "loss": 0.0929, "step": 60140 }, { "epoch": 1.7788489974566748, "grad_norm": 0.8463903665542603, "learning_rate": 3.5218657255875653e-06, "loss": 0.0961, "step": 60150 }, { "epoch": 1.7791447329508487, "grad_norm": 0.9161096215248108, "learning_rate": 3.521739035668626e-06, "loss": 0.0714, "step": 60160 }, { "epoch": 1.779440468445023, "grad_norm": 0.5395854711532593, "learning_rate": 3.5216123457496865e-06, "loss": 0.0851, "step": 60170 }, { "epoch": 1.7797362039391968, "grad_norm": 0.8848744630813599, "learning_rate": 3.5214856558307473e-06, "loss": 0.0867, "step": 60180 }, { "epoch": 1.7800319394333708, "grad_norm": 0.4166732132434845, "learning_rate": 3.5213589659118077e-06, "loss": 0.0956, "step": 60190 }, { "epoch": 1.7803276749275447, "grad_norm": 0.7756832838058472, "learning_rate": 3.521232275992869e-06, "loss": 0.0831, "step": 60200 }, { "epoch": 1.7806234104217187, "grad_norm": 0.6527923345565796, "learning_rate": 3.5211055860739292e-06, "loss": 0.0757, "step": 60210 }, { "epoch": 1.7809191459158928, "grad_norm": 0.8441252708435059, "learning_rate": 3.52097889615499e-06, "loss": 0.08, "step": 60220 }, { "epoch": 1.7812148814100668, "grad_norm": 0.9288942217826843, "learning_rate": 3.5208522062360504e-06, "loss": 0.0813, "step": 60230 }, { "epoch": 1.781510616904241, "grad_norm": 0.9634854793548584, "learning_rate": 3.520725516317111e-06, "loss": 0.0846, "step": 60240 }, { "epoch": 1.781806352398415, "grad_norm": 0.6276716589927673, "learning_rate": 3.5205988263981715e-06, "loss": 0.0731, "step": 60250 }, { "epoch": 1.7821020878925888, "grad_norm": 0.8632671236991882, "learning_rate": 3.5204721364792323e-06, "loss": 0.0662, "step": 60260 }, { "epoch": 1.7823978233867628, "grad_norm": 0.8134764432907104, "learning_rate": 3.5203454465602927e-06, "loss": 0.1014, "step": 60270 }, { "epoch": 1.7826935588809367, "grad_norm": 0.5411640405654907, "learning_rate": 3.520218756641354e-06, "loss": 0.0922, "step": 60280 }, { "epoch": 1.782989294375111, "grad_norm": 0.7658613920211792, "learning_rate": 3.5200920667224143e-06, "loss": 0.0979, "step": 60290 }, { "epoch": 1.783285029869285, "grad_norm": 0.7371016144752502, "learning_rate": 3.519965376803475e-06, "loss": 0.0862, "step": 60300 }, { "epoch": 1.783580765363459, "grad_norm": 0.7794959545135498, "learning_rate": 3.5198386868845354e-06, "loss": 0.0659, "step": 60310 }, { "epoch": 1.783876500857633, "grad_norm": 1.250040054321289, "learning_rate": 3.5197119969655962e-06, "loss": 0.0824, "step": 60320 }, { "epoch": 1.7841722363518069, "grad_norm": 1.0191692113876343, "learning_rate": 3.5195853070466566e-06, "loss": 0.0962, "step": 60330 }, { "epoch": 1.7844679718459808, "grad_norm": 1.2488068342208862, "learning_rate": 3.5194586171277174e-06, "loss": 0.0844, "step": 60340 }, { "epoch": 1.784763707340155, "grad_norm": 0.9890275001525879, "learning_rate": 3.5193319272087777e-06, "loss": 0.0918, "step": 60350 }, { "epoch": 1.785059442834329, "grad_norm": 0.5377335548400879, "learning_rate": 3.519205237289839e-06, "loss": 0.0724, "step": 60360 }, { "epoch": 1.785355178328503, "grad_norm": 0.522699773311615, "learning_rate": 3.5190785473708993e-06, "loss": 0.072, "step": 60370 }, { "epoch": 1.785650913822677, "grad_norm": 0.8055475354194641, "learning_rate": 3.51895185745196e-06, "loss": 0.0721, "step": 60380 }, { "epoch": 1.785946649316851, "grad_norm": 0.40311524271965027, "learning_rate": 3.5188251675330205e-06, "loss": 0.0833, "step": 60390 }, { "epoch": 1.786242384811025, "grad_norm": 1.7389507293701172, "learning_rate": 3.5186984776140813e-06, "loss": 0.0847, "step": 60400 }, { "epoch": 1.7865381203051989, "grad_norm": 1.1621544361114502, "learning_rate": 3.5185717876951416e-06, "loss": 0.0802, "step": 60410 }, { "epoch": 1.786833855799373, "grad_norm": 0.9590482711791992, "learning_rate": 3.5184450977762024e-06, "loss": 0.0894, "step": 60420 }, { "epoch": 1.7871295912935472, "grad_norm": 1.039827823638916, "learning_rate": 3.518318407857263e-06, "loss": 0.0861, "step": 60430 }, { "epoch": 1.7874253267877211, "grad_norm": 0.6580411195755005, "learning_rate": 3.518191717938324e-06, "loss": 0.0719, "step": 60440 }, { "epoch": 1.787721062281895, "grad_norm": 0.7035750150680542, "learning_rate": 3.5180650280193844e-06, "loss": 0.0893, "step": 60450 }, { "epoch": 1.788016797776069, "grad_norm": 0.5215247869491577, "learning_rate": 3.517938338100445e-06, "loss": 0.0737, "step": 60460 }, { "epoch": 1.788312533270243, "grad_norm": 0.8964138031005859, "learning_rate": 3.5178116481815055e-06, "loss": 0.099, "step": 60470 }, { "epoch": 1.7886082687644171, "grad_norm": 0.4402958154678345, "learning_rate": 3.5176849582625663e-06, "loss": 0.0763, "step": 60480 }, { "epoch": 1.788904004258591, "grad_norm": 0.6643008589744568, "learning_rate": 3.5175582683436267e-06, "loss": 0.0904, "step": 60490 }, { "epoch": 1.7891997397527653, "grad_norm": 0.8156493306159973, "learning_rate": 3.5174315784246875e-06, "loss": 0.0907, "step": 60500 }, { "epoch": 1.7894954752469392, "grad_norm": 0.9322139024734497, "learning_rate": 3.517304888505748e-06, "loss": 0.0733, "step": 60510 }, { "epoch": 1.7897912107411131, "grad_norm": 0.9768657088279724, "learning_rate": 3.5171781985868086e-06, "loss": 0.0927, "step": 60520 }, { "epoch": 1.790086946235287, "grad_norm": 0.8583595752716064, "learning_rate": 3.5170515086678694e-06, "loss": 0.1047, "step": 60530 }, { "epoch": 1.790382681729461, "grad_norm": 1.3968082666397095, "learning_rate": 3.5169248187489298e-06, "loss": 0.1171, "step": 60540 }, { "epoch": 1.7906784172236352, "grad_norm": 0.9896492958068848, "learning_rate": 3.5167981288299906e-06, "loss": 0.0875, "step": 60550 }, { "epoch": 1.7909741527178094, "grad_norm": 0.8333709239959717, "learning_rate": 3.516671438911051e-06, "loss": 0.0778, "step": 60560 }, { "epoch": 1.7912698882119833, "grad_norm": 0.991526186466217, "learning_rate": 3.5165447489921117e-06, "loss": 0.0926, "step": 60570 }, { "epoch": 1.7915656237061572, "grad_norm": 0.7607606649398804, "learning_rate": 3.516418059073172e-06, "loss": 0.1007, "step": 60580 }, { "epoch": 1.7918613592003312, "grad_norm": 0.6567667126655579, "learning_rate": 3.516291369154233e-06, "loss": 0.0897, "step": 60590 }, { "epoch": 1.7921570946945051, "grad_norm": 0.7253881692886353, "learning_rate": 3.5161646792352937e-06, "loss": 0.0762, "step": 60600 }, { "epoch": 1.7924528301886793, "grad_norm": 0.7709870934486389, "learning_rate": 3.5160379893163545e-06, "loss": 0.0576, "step": 60610 }, { "epoch": 1.7927485656828532, "grad_norm": 0.6092609167098999, "learning_rate": 3.515911299397415e-06, "loss": 0.0804, "step": 60620 }, { "epoch": 1.7930443011770274, "grad_norm": 0.5631664991378784, "learning_rate": 3.5157846094784756e-06, "loss": 0.0716, "step": 60630 }, { "epoch": 1.7933400366712013, "grad_norm": 0.7531901001930237, "learning_rate": 3.515657919559536e-06, "loss": 0.0902, "step": 60640 }, { "epoch": 1.7936357721653753, "grad_norm": 1.1370915174484253, "learning_rate": 3.5155312296405968e-06, "loss": 0.0702, "step": 60650 }, { "epoch": 1.7939315076595492, "grad_norm": 1.3724256753921509, "learning_rate": 3.515404539721657e-06, "loss": 0.0636, "step": 60660 }, { "epoch": 1.7942272431537232, "grad_norm": 0.80849689245224, "learning_rate": 3.515277849802718e-06, "loss": 0.0742, "step": 60670 }, { "epoch": 1.7945229786478973, "grad_norm": 0.9322243928909302, "learning_rate": 3.5151511598837787e-06, "loss": 0.0951, "step": 60680 }, { "epoch": 1.7948187141420713, "grad_norm": 0.8485756516456604, "learning_rate": 3.5150244699648395e-06, "loss": 0.1136, "step": 60690 }, { "epoch": 1.7951144496362454, "grad_norm": 0.6818145513534546, "learning_rate": 3.5148977800459e-06, "loss": 0.0766, "step": 60700 }, { "epoch": 1.7954101851304194, "grad_norm": 0.6325955986976624, "learning_rate": 3.5147710901269607e-06, "loss": 0.0813, "step": 60710 }, { "epoch": 1.7957059206245933, "grad_norm": 1.5708081722259521, "learning_rate": 3.514644400208021e-06, "loss": 0.0676, "step": 60720 }, { "epoch": 1.7960016561187673, "grad_norm": 0.6362389326095581, "learning_rate": 3.514517710289082e-06, "loss": 0.087, "step": 60730 }, { "epoch": 1.7962973916129412, "grad_norm": 0.8208821415901184, "learning_rate": 3.514391020370142e-06, "loss": 0.0897, "step": 60740 }, { "epoch": 1.7965931271071154, "grad_norm": 0.8598330616950989, "learning_rate": 3.514264330451203e-06, "loss": 0.0778, "step": 60750 }, { "epoch": 1.7968888626012895, "grad_norm": 0.871829628944397, "learning_rate": 3.5141376405322638e-06, "loss": 0.075, "step": 60760 }, { "epoch": 1.7971845980954635, "grad_norm": 0.7674286961555481, "learning_rate": 3.5140109506133245e-06, "loss": 0.0777, "step": 60770 }, { "epoch": 1.7974803335896374, "grad_norm": 0.5325760841369629, "learning_rate": 3.513884260694385e-06, "loss": 0.0741, "step": 60780 }, { "epoch": 1.7977760690838114, "grad_norm": 0.8396331071853638, "learning_rate": 3.5137575707754457e-06, "loss": 0.0966, "step": 60790 }, { "epoch": 1.7980718045779853, "grad_norm": 0.7181581258773804, "learning_rate": 3.513630880856506e-06, "loss": 0.089, "step": 60800 }, { "epoch": 1.7983675400721595, "grad_norm": 0.8425076603889465, "learning_rate": 3.513504190937567e-06, "loss": 0.0964, "step": 60810 }, { "epoch": 1.7986632755663334, "grad_norm": 1.237239122390747, "learning_rate": 3.5133775010186272e-06, "loss": 0.0955, "step": 60820 }, { "epoch": 1.7989590110605076, "grad_norm": 0.6744962334632874, "learning_rate": 3.513250811099688e-06, "loss": 0.0943, "step": 60830 }, { "epoch": 1.7992547465546815, "grad_norm": 0.809718668460846, "learning_rate": 3.513124121180749e-06, "loss": 0.0879, "step": 60840 }, { "epoch": 1.7995504820488555, "grad_norm": 0.8221434950828552, "learning_rate": 3.5129974312618096e-06, "loss": 0.0855, "step": 60850 }, { "epoch": 1.7998462175430294, "grad_norm": 0.9313421249389648, "learning_rate": 3.51287074134287e-06, "loss": 0.0673, "step": 60860 }, { "epoch": 1.8001419530372034, "grad_norm": 0.8222553133964539, "learning_rate": 3.5127440514239308e-06, "loss": 0.0835, "step": 60870 }, { "epoch": 1.8004376885313775, "grad_norm": 1.0243898630142212, "learning_rate": 3.512617361504991e-06, "loss": 0.0991, "step": 60880 }, { "epoch": 1.8007334240255517, "grad_norm": 0.9514586329460144, "learning_rate": 3.512490671586052e-06, "loss": 0.0723, "step": 60890 }, { "epoch": 1.8010291595197256, "grad_norm": 0.8583382964134216, "learning_rate": 3.5123639816671123e-06, "loss": 0.0864, "step": 60900 }, { "epoch": 1.8013248950138996, "grad_norm": 0.5502882599830627, "learning_rate": 3.512237291748173e-06, "loss": 0.0865, "step": 60910 }, { "epoch": 1.8016206305080735, "grad_norm": 0.6957730054855347, "learning_rate": 3.512110601829234e-06, "loss": 0.0791, "step": 60920 }, { "epoch": 1.8019163660022475, "grad_norm": 1.0788365602493286, "learning_rate": 3.5119839119102942e-06, "loss": 0.0985, "step": 60930 }, { "epoch": 1.8022121014964216, "grad_norm": 0.8943567872047424, "learning_rate": 3.511857221991355e-06, "loss": 0.0917, "step": 60940 }, { "epoch": 1.8025078369905956, "grad_norm": 0.7762530446052551, "learning_rate": 3.5117305320724154e-06, "loss": 0.0839, "step": 60950 }, { "epoch": 1.8028035724847697, "grad_norm": 0.7310557961463928, "learning_rate": 3.511603842153476e-06, "loss": 0.0759, "step": 60960 }, { "epoch": 1.8030993079789437, "grad_norm": 1.1155039072036743, "learning_rate": 3.5114771522345365e-06, "loss": 0.0687, "step": 60970 }, { "epoch": 1.8033950434731176, "grad_norm": 1.1308989524841309, "learning_rate": 3.5113504623155973e-06, "loss": 0.0911, "step": 60980 }, { "epoch": 1.8036907789672916, "grad_norm": 0.6455294489860535, "learning_rate": 3.5112237723966577e-06, "loss": 0.0781, "step": 60990 }, { "epoch": 1.8039865144614655, "grad_norm": 0.702374279499054, "learning_rate": 3.511097082477719e-06, "loss": 0.0946, "step": 61000 }, { "epoch": 1.8042822499556397, "grad_norm": 0.7324443459510803, "learning_rate": 3.5109703925587793e-06, "loss": 0.0623, "step": 61010 }, { "epoch": 1.8045779854498138, "grad_norm": 0.764782726764679, "learning_rate": 3.51084370263984e-06, "loss": 0.0701, "step": 61020 }, { "epoch": 1.8048737209439878, "grad_norm": 1.7939778566360474, "learning_rate": 3.5107170127209004e-06, "loss": 0.0926, "step": 61030 }, { "epoch": 1.8051694564381617, "grad_norm": 0.7029160857200623, "learning_rate": 3.510590322801961e-06, "loss": 0.086, "step": 61040 }, { "epoch": 1.8054651919323357, "grad_norm": 0.6811650395393372, "learning_rate": 3.5104636328830216e-06, "loss": 0.0766, "step": 61050 }, { "epoch": 1.8057609274265096, "grad_norm": 1.082541823387146, "learning_rate": 3.5103369429640824e-06, "loss": 0.0867, "step": 61060 }, { "epoch": 1.8060566629206838, "grad_norm": 0.7803827524185181, "learning_rate": 3.5102102530451427e-06, "loss": 0.0809, "step": 61070 }, { "epoch": 1.8063523984148577, "grad_norm": 3.488852024078369, "learning_rate": 3.510083563126204e-06, "loss": 0.0837, "step": 61080 }, { "epoch": 1.806648133909032, "grad_norm": 0.7558793425559998, "learning_rate": 3.5099568732072643e-06, "loss": 0.0852, "step": 61090 }, { "epoch": 1.8069438694032058, "grad_norm": 0.7074859142303467, "learning_rate": 3.509830183288325e-06, "loss": 0.0878, "step": 61100 }, { "epoch": 1.8072396048973798, "grad_norm": 0.5659870505332947, "learning_rate": 3.5097034933693855e-06, "loss": 0.0789, "step": 61110 }, { "epoch": 1.8075353403915537, "grad_norm": 0.5938900113105774, "learning_rate": 3.5095768034504463e-06, "loss": 0.0843, "step": 61120 }, { "epoch": 1.8078310758857277, "grad_norm": 1.288114309310913, "learning_rate": 3.5094501135315066e-06, "loss": 0.089, "step": 61130 }, { "epoch": 1.8081268113799018, "grad_norm": 0.7131286859512329, "learning_rate": 3.5093234236125674e-06, "loss": 0.0816, "step": 61140 }, { "epoch": 1.8084225468740758, "grad_norm": 0.799088180065155, "learning_rate": 3.5091967336936278e-06, "loss": 0.0737, "step": 61150 }, { "epoch": 1.80871828236825, "grad_norm": 0.7372720837593079, "learning_rate": 3.509070043774689e-06, "loss": 0.0768, "step": 61160 }, { "epoch": 1.8090140178624239, "grad_norm": 0.8845309615135193, "learning_rate": 3.5089433538557494e-06, "loss": 0.0758, "step": 61170 }, { "epoch": 1.8093097533565978, "grad_norm": 0.9463710188865662, "learning_rate": 3.50881666393681e-06, "loss": 0.0774, "step": 61180 }, { "epoch": 1.8096054888507718, "grad_norm": 0.45684564113616943, "learning_rate": 3.5086899740178705e-06, "loss": 0.0942, "step": 61190 }, { "epoch": 1.809901224344946, "grad_norm": 0.48576974868774414, "learning_rate": 3.5085632840989313e-06, "loss": 0.0611, "step": 61200 }, { "epoch": 1.8101969598391199, "grad_norm": 0.7046759128570557, "learning_rate": 3.5084365941799917e-06, "loss": 0.073, "step": 61210 }, { "epoch": 1.810492695333294, "grad_norm": 0.6376058459281921, "learning_rate": 3.5083099042610525e-06, "loss": 0.0802, "step": 61220 }, { "epoch": 1.810788430827468, "grad_norm": 0.6103218197822571, "learning_rate": 3.508183214342113e-06, "loss": 0.0845, "step": 61230 }, { "epoch": 1.811084166321642, "grad_norm": 0.7724084854125977, "learning_rate": 3.508056524423174e-06, "loss": 0.092, "step": 61240 }, { "epoch": 1.8113799018158159, "grad_norm": 0.7507956027984619, "learning_rate": 3.5079298345042344e-06, "loss": 0.0655, "step": 61250 }, { "epoch": 1.8116756373099898, "grad_norm": 0.709753692150116, "learning_rate": 3.507803144585295e-06, "loss": 0.0882, "step": 61260 }, { "epoch": 1.811971372804164, "grad_norm": 0.5255005955696106, "learning_rate": 3.5076764546663556e-06, "loss": 0.0741, "step": 61270 }, { "epoch": 1.812267108298338, "grad_norm": 0.6206113696098328, "learning_rate": 3.5075497647474163e-06, "loss": 0.0995, "step": 61280 }, { "epoch": 1.812562843792512, "grad_norm": 1.0413329601287842, "learning_rate": 3.5074230748284767e-06, "loss": 0.09, "step": 61290 }, { "epoch": 1.812858579286686, "grad_norm": 0.7644106149673462, "learning_rate": 3.5072963849095375e-06, "loss": 0.0895, "step": 61300 }, { "epoch": 1.81315431478086, "grad_norm": 0.6577955484390259, "learning_rate": 3.507169694990598e-06, "loss": 0.0771, "step": 61310 }, { "epoch": 1.813450050275034, "grad_norm": 0.9714413285255432, "learning_rate": 3.507043005071659e-06, "loss": 0.0862, "step": 61320 }, { "epoch": 1.8137457857692079, "grad_norm": 0.709561288356781, "learning_rate": 3.5069163151527194e-06, "loss": 0.0972, "step": 61330 }, { "epoch": 1.814041521263382, "grad_norm": 0.4181861877441406, "learning_rate": 3.50678962523378e-06, "loss": 0.0737, "step": 61340 }, { "epoch": 1.8143372567575562, "grad_norm": 0.6403738260269165, "learning_rate": 3.5066629353148406e-06, "loss": 0.078, "step": 61350 }, { "epoch": 1.8146329922517301, "grad_norm": 0.4677065908908844, "learning_rate": 3.506536245395901e-06, "loss": 0.0694, "step": 61360 }, { "epoch": 1.814928727745904, "grad_norm": 0.6119681596755981, "learning_rate": 3.5064095554769618e-06, "loss": 0.0766, "step": 61370 }, { "epoch": 1.815224463240078, "grad_norm": 1.0457961559295654, "learning_rate": 3.506282865558022e-06, "loss": 0.1, "step": 61380 }, { "epoch": 1.815520198734252, "grad_norm": 0.8856508135795593, "learning_rate": 3.506156175639083e-06, "loss": 0.0974, "step": 61390 }, { "epoch": 1.8158159342284261, "grad_norm": 0.7047658562660217, "learning_rate": 3.5060294857201437e-06, "loss": 0.0739, "step": 61400 }, { "epoch": 1.8161116697226, "grad_norm": 1.0759906768798828, "learning_rate": 3.5059027958012045e-06, "loss": 0.0785, "step": 61410 }, { "epoch": 1.8164074052167742, "grad_norm": 1.0022542476654053, "learning_rate": 3.505776105882265e-06, "loss": 0.0909, "step": 61420 }, { "epoch": 1.8167031407109482, "grad_norm": 0.925806999206543, "learning_rate": 3.5056494159633256e-06, "loss": 0.0906, "step": 61430 }, { "epoch": 1.8169988762051221, "grad_norm": 0.9776707291603088, "learning_rate": 3.505522726044386e-06, "loss": 0.085, "step": 61440 }, { "epoch": 1.817294611699296, "grad_norm": 1.085070252418518, "learning_rate": 3.505396036125447e-06, "loss": 0.0826, "step": 61450 }, { "epoch": 1.81759034719347, "grad_norm": 0.6975405812263489, "learning_rate": 3.505269346206507e-06, "loss": 0.0749, "step": 61460 }, { "epoch": 1.8178860826876442, "grad_norm": 2.058159828186035, "learning_rate": 3.505142656287568e-06, "loss": 0.0874, "step": 61470 }, { "epoch": 1.8181818181818183, "grad_norm": 0.5768546462059021, "learning_rate": 3.5050159663686287e-06, "loss": 0.0747, "step": 61480 }, { "epoch": 1.8184775536759923, "grad_norm": 0.6270526051521301, "learning_rate": 3.5048892764496895e-06, "loss": 0.0817, "step": 61490 }, { "epoch": 1.8187732891701662, "grad_norm": 0.9887555241584778, "learning_rate": 3.50476258653075e-06, "loss": 0.0724, "step": 61500 }, { "epoch": 1.8190690246643402, "grad_norm": 0.8998024463653564, "learning_rate": 3.5046358966118107e-06, "loss": 0.065, "step": 61510 }, { "epoch": 1.8193647601585141, "grad_norm": 0.9667860269546509, "learning_rate": 3.504509206692871e-06, "loss": 0.0964, "step": 61520 }, { "epoch": 1.8196604956526883, "grad_norm": 1.0976860523223877, "learning_rate": 3.504382516773932e-06, "loss": 0.0829, "step": 61530 }, { "epoch": 1.8199562311468622, "grad_norm": 0.7459164261817932, "learning_rate": 3.504255826854992e-06, "loss": 0.1054, "step": 61540 }, { "epoch": 1.8202519666410364, "grad_norm": 0.7425723075866699, "learning_rate": 3.504129136936053e-06, "loss": 0.0879, "step": 61550 }, { "epoch": 1.8205477021352103, "grad_norm": 0.9273602366447449, "learning_rate": 3.504002447017114e-06, "loss": 0.0817, "step": 61560 }, { "epoch": 1.8208434376293843, "grad_norm": 1.3830522298812866, "learning_rate": 3.5038757570981746e-06, "loss": 0.0925, "step": 61570 }, { "epoch": 1.8211391731235582, "grad_norm": 0.7630725502967834, "learning_rate": 3.503749067179235e-06, "loss": 0.0735, "step": 61580 }, { "epoch": 1.8214349086177322, "grad_norm": 0.48812878131866455, "learning_rate": 3.5036223772602957e-06, "loss": 0.0856, "step": 61590 }, { "epoch": 1.8217306441119063, "grad_norm": 0.8236172199249268, "learning_rate": 3.503495687341356e-06, "loss": 0.0914, "step": 61600 }, { "epoch": 1.8220263796060803, "grad_norm": 0.9889567494392395, "learning_rate": 3.503368997422417e-06, "loss": 0.0611, "step": 61610 }, { "epoch": 1.8223221151002544, "grad_norm": 1.0396931171417236, "learning_rate": 3.5032423075034773e-06, "loss": 0.0865, "step": 61620 }, { "epoch": 1.8226178505944284, "grad_norm": 1.1440787315368652, "learning_rate": 3.503115617584538e-06, "loss": 0.1084, "step": 61630 }, { "epoch": 1.8229135860886023, "grad_norm": 1.1827623844146729, "learning_rate": 3.502988927665599e-06, "loss": 0.0968, "step": 61640 }, { "epoch": 1.8232093215827763, "grad_norm": 0.8271826505661011, "learning_rate": 3.5028622377466596e-06, "loss": 0.0694, "step": 61650 }, { "epoch": 1.8235050570769504, "grad_norm": 0.5554361939430237, "learning_rate": 3.50273554782772e-06, "loss": 0.0872, "step": 61660 }, { "epoch": 1.8238007925711244, "grad_norm": 0.4413782060146332, "learning_rate": 3.5026088579087808e-06, "loss": 0.0981, "step": 61670 }, { "epoch": 1.8240965280652985, "grad_norm": 0.9153391122817993, "learning_rate": 3.502482167989841e-06, "loss": 0.0848, "step": 61680 }, { "epoch": 1.8243922635594725, "grad_norm": 0.9344654083251953, "learning_rate": 3.502355478070902e-06, "loss": 0.0636, "step": 61690 }, { "epoch": 1.8246879990536464, "grad_norm": 0.8361095190048218, "learning_rate": 3.5022287881519623e-06, "loss": 0.0537, "step": 61700 }, { "epoch": 1.8249837345478204, "grad_norm": 1.5467151403427124, "learning_rate": 3.502102098233023e-06, "loss": 0.0573, "step": 61710 }, { "epoch": 1.8252794700419943, "grad_norm": 0.6344877481460571, "learning_rate": 3.501975408314084e-06, "loss": 0.0799, "step": 61720 }, { "epoch": 1.8255752055361685, "grad_norm": 1.2598820924758911, "learning_rate": 3.5018487183951447e-06, "loss": 0.0871, "step": 61730 }, { "epoch": 1.8258709410303424, "grad_norm": 0.8114537596702576, "learning_rate": 3.501722028476205e-06, "loss": 0.0904, "step": 61740 }, { "epoch": 1.8261666765245166, "grad_norm": 1.0474133491516113, "learning_rate": 3.5015953385572654e-06, "loss": 0.0753, "step": 61750 }, { "epoch": 1.8264624120186905, "grad_norm": 0.9486001133918762, "learning_rate": 3.501468648638326e-06, "loss": 0.075, "step": 61760 }, { "epoch": 1.8267581475128645, "grad_norm": 1.2227998971939087, "learning_rate": 3.5013419587193866e-06, "loss": 0.0929, "step": 61770 }, { "epoch": 1.8270538830070384, "grad_norm": 0.6332228183746338, "learning_rate": 3.5012152688004473e-06, "loss": 0.0808, "step": 61780 }, { "epoch": 1.8273496185012124, "grad_norm": 0.6286694407463074, "learning_rate": 3.5010885788815077e-06, "loss": 0.0714, "step": 61790 }, { "epoch": 1.8276453539953865, "grad_norm": 0.8982498049736023, "learning_rate": 3.500961888962569e-06, "loss": 0.0937, "step": 61800 }, { "epoch": 1.8279410894895607, "grad_norm": 0.45739543437957764, "learning_rate": 3.5008351990436293e-06, "loss": 0.0682, "step": 61810 }, { "epoch": 1.8282368249837346, "grad_norm": 0.6057119369506836, "learning_rate": 3.50070850912469e-06, "loss": 0.0592, "step": 61820 }, { "epoch": 1.8285325604779086, "grad_norm": 0.7911377549171448, "learning_rate": 3.5005818192057504e-06, "loss": 0.0934, "step": 61830 }, { "epoch": 1.8288282959720825, "grad_norm": 0.6249753832817078, "learning_rate": 3.5004551292868112e-06, "loss": 0.0929, "step": 61840 }, { "epoch": 1.8291240314662565, "grad_norm": 0.8533900380134583, "learning_rate": 3.5003284393678716e-06, "loss": 0.0739, "step": 61850 }, { "epoch": 1.8294197669604306, "grad_norm": 0.7761657238006592, "learning_rate": 3.5002017494489324e-06, "loss": 0.0747, "step": 61860 }, { "epoch": 1.8297155024546046, "grad_norm": 0.7428116202354431, "learning_rate": 3.5000750595299928e-06, "loss": 0.0846, "step": 61870 }, { "epoch": 1.8300112379487787, "grad_norm": 0.6062932014465332, "learning_rate": 3.499948369611054e-06, "loss": 0.0855, "step": 61880 }, { "epoch": 1.8303069734429527, "grad_norm": 0.8534843921661377, "learning_rate": 3.4998216796921143e-06, "loss": 0.0717, "step": 61890 }, { "epoch": 1.8306027089371266, "grad_norm": 0.46709883213043213, "learning_rate": 3.499694989773175e-06, "loss": 0.0811, "step": 61900 }, { "epoch": 1.8308984444313006, "grad_norm": 0.6857055425643921, "learning_rate": 3.4995682998542355e-06, "loss": 0.0699, "step": 61910 }, { "epoch": 1.8311941799254745, "grad_norm": 0.6291048526763916, "learning_rate": 3.4994416099352963e-06, "loss": 0.086, "step": 61920 }, { "epoch": 1.8314899154196487, "grad_norm": 0.7583613395690918, "learning_rate": 3.4993149200163566e-06, "loss": 0.0814, "step": 61930 }, { "epoch": 1.8317856509138228, "grad_norm": 0.7395267486572266, "learning_rate": 3.4991882300974174e-06, "loss": 0.0776, "step": 61940 }, { "epoch": 1.8320813864079968, "grad_norm": 0.5660784244537354, "learning_rate": 3.499061540178478e-06, "loss": 0.0749, "step": 61950 }, { "epoch": 1.8323771219021707, "grad_norm": 0.8559134602546692, "learning_rate": 3.498934850259539e-06, "loss": 0.086, "step": 61960 }, { "epoch": 1.8326728573963447, "grad_norm": 0.7143874168395996, "learning_rate": 3.4988081603405994e-06, "loss": 0.1047, "step": 61970 }, { "epoch": 1.8329685928905186, "grad_norm": 0.7282611727714539, "learning_rate": 3.49868147042166e-06, "loss": 0.089, "step": 61980 }, { "epoch": 1.8332643283846928, "grad_norm": 0.8388187289237976, "learning_rate": 3.4985547805027205e-06, "loss": 0.0805, "step": 61990 }, { "epoch": 1.8335600638788667, "grad_norm": 0.9047855138778687, "learning_rate": 3.4984280905837813e-06, "loss": 0.0773, "step": 62000 }, { "epoch": 1.8338557993730409, "grad_norm": 0.7230564951896667, "learning_rate": 3.4983014006648417e-06, "loss": 0.0636, "step": 62010 }, { "epoch": 1.8341515348672148, "grad_norm": 1.351447343826294, "learning_rate": 3.4981747107459025e-06, "loss": 0.0655, "step": 62020 }, { "epoch": 1.8344472703613888, "grad_norm": 0.6293336153030396, "learning_rate": 3.498048020826963e-06, "loss": 0.0906, "step": 62030 }, { "epoch": 1.8347430058555627, "grad_norm": 0.6909865140914917, "learning_rate": 3.497921330908024e-06, "loss": 0.0981, "step": 62040 }, { "epoch": 1.8350387413497367, "grad_norm": 2.0665061473846436, "learning_rate": 3.4977946409890844e-06, "loss": 0.0901, "step": 62050 }, { "epoch": 1.8353344768439108, "grad_norm": 0.6738609671592712, "learning_rate": 3.4976679510701452e-06, "loss": 0.0614, "step": 62060 }, { "epoch": 1.8356302123380848, "grad_norm": 1.0992461442947388, "learning_rate": 3.4975412611512056e-06, "loss": 0.0887, "step": 62070 }, { "epoch": 1.835925947832259, "grad_norm": 0.8926888108253479, "learning_rate": 3.4974145712322664e-06, "loss": 0.0836, "step": 62080 }, { "epoch": 1.8362216833264329, "grad_norm": 0.7377490997314453, "learning_rate": 3.4972878813133267e-06, "loss": 0.08, "step": 62090 }, { "epoch": 1.8365174188206068, "grad_norm": 0.7658066749572754, "learning_rate": 3.4971611913943875e-06, "loss": 0.0882, "step": 62100 }, { "epoch": 1.8368131543147808, "grad_norm": 0.6810826063156128, "learning_rate": 3.497034501475448e-06, "loss": 0.0831, "step": 62110 }, { "epoch": 1.837108889808955, "grad_norm": 0.8250305652618408, "learning_rate": 3.496907811556509e-06, "loss": 0.082, "step": 62120 }, { "epoch": 1.8374046253031289, "grad_norm": 0.9313737750053406, "learning_rate": 3.4967811216375695e-06, "loss": 0.0845, "step": 62130 }, { "epoch": 1.837700360797303, "grad_norm": 0.7868231534957886, "learning_rate": 3.4966544317186303e-06, "loss": 0.0798, "step": 62140 }, { "epoch": 1.837996096291477, "grad_norm": 0.9171154499053955, "learning_rate": 3.4965277417996906e-06, "loss": 0.0916, "step": 62150 }, { "epoch": 1.838291831785651, "grad_norm": 0.9759225249290466, "learning_rate": 3.4964010518807514e-06, "loss": 0.0788, "step": 62160 }, { "epoch": 1.8385875672798249, "grad_norm": 1.1130375862121582, "learning_rate": 3.4962743619618118e-06, "loss": 0.0763, "step": 62170 }, { "epoch": 1.8388833027739988, "grad_norm": 0.6279237270355225, "learning_rate": 3.496147672042872e-06, "loss": 0.09, "step": 62180 }, { "epoch": 1.839179038268173, "grad_norm": 0.7577215433120728, "learning_rate": 3.496020982123933e-06, "loss": 0.0884, "step": 62190 }, { "epoch": 1.839474773762347, "grad_norm": 0.6220859885215759, "learning_rate": 3.4958942922049937e-06, "loss": 0.0899, "step": 62200 }, { "epoch": 1.839770509256521, "grad_norm": 0.6052147150039673, "learning_rate": 3.4957676022860545e-06, "loss": 0.0657, "step": 62210 }, { "epoch": 1.840066244750695, "grad_norm": 0.6528453826904297, "learning_rate": 3.495640912367115e-06, "loss": 0.1008, "step": 62220 }, { "epoch": 1.840361980244869, "grad_norm": 1.033387303352356, "learning_rate": 3.4955142224481757e-06, "loss": 0.0969, "step": 62230 }, { "epoch": 1.840657715739043, "grad_norm": 0.7421077489852905, "learning_rate": 3.495387532529236e-06, "loss": 0.0981, "step": 62240 }, { "epoch": 1.8409534512332169, "grad_norm": 0.8630538582801819, "learning_rate": 3.495260842610297e-06, "loss": 0.0885, "step": 62250 }, { "epoch": 1.841249186727391, "grad_norm": 0.8411090970039368, "learning_rate": 3.495134152691357e-06, "loss": 0.0576, "step": 62260 }, { "epoch": 1.8415449222215652, "grad_norm": 1.0760504007339478, "learning_rate": 3.495007462772418e-06, "loss": 0.1034, "step": 62270 }, { "epoch": 1.8418406577157391, "grad_norm": 0.6660443544387817, "learning_rate": 3.4948807728534788e-06, "loss": 0.1056, "step": 62280 }, { "epoch": 1.842136393209913, "grad_norm": 0.802586555480957, "learning_rate": 3.4947540829345396e-06, "loss": 0.1049, "step": 62290 }, { "epoch": 1.842432128704087, "grad_norm": 0.8754353523254395, "learning_rate": 3.4946273930156e-06, "loss": 0.075, "step": 62300 }, { "epoch": 1.842727864198261, "grad_norm": 0.733156681060791, "learning_rate": 3.4945007030966607e-06, "loss": 0.0745, "step": 62310 }, { "epoch": 1.8430235996924351, "grad_norm": 0.7739485502243042, "learning_rate": 3.494374013177721e-06, "loss": 0.0842, "step": 62320 }, { "epoch": 1.843319335186609, "grad_norm": 1.2512587308883667, "learning_rate": 3.494247323258782e-06, "loss": 0.0966, "step": 62330 }, { "epoch": 1.8436150706807832, "grad_norm": 0.753004789352417, "learning_rate": 3.4941206333398422e-06, "loss": 0.0885, "step": 62340 }, { "epoch": 1.8439108061749572, "grad_norm": 0.6127499938011169, "learning_rate": 3.493993943420903e-06, "loss": 0.0727, "step": 62350 }, { "epoch": 1.8442065416691311, "grad_norm": 0.8465591073036194, "learning_rate": 3.493867253501964e-06, "loss": 0.0751, "step": 62360 }, { "epoch": 1.844502277163305, "grad_norm": 1.126147747039795, "learning_rate": 3.4937405635830246e-06, "loss": 0.083, "step": 62370 }, { "epoch": 1.844798012657479, "grad_norm": 1.7051326036453247, "learning_rate": 3.493613873664085e-06, "loss": 0.081, "step": 62380 }, { "epoch": 1.8450937481516532, "grad_norm": 0.9465075135231018, "learning_rate": 3.4934871837451458e-06, "loss": 0.0931, "step": 62390 }, { "epoch": 1.8453894836458273, "grad_norm": 0.596249520778656, "learning_rate": 3.493360493826206e-06, "loss": 0.0917, "step": 62400 }, { "epoch": 1.8456852191400013, "grad_norm": 0.5887784361839294, "learning_rate": 3.493233803907267e-06, "loss": 0.0694, "step": 62410 }, { "epoch": 1.8459809546341752, "grad_norm": 0.7539801597595215, "learning_rate": 3.4931071139883273e-06, "loss": 0.0866, "step": 62420 }, { "epoch": 1.8462766901283492, "grad_norm": 1.0939639806747437, "learning_rate": 3.492980424069388e-06, "loss": 0.0756, "step": 62430 }, { "epoch": 1.846572425622523, "grad_norm": 0.7205720543861389, "learning_rate": 3.492853734150449e-06, "loss": 0.0677, "step": 62440 }, { "epoch": 1.8468681611166973, "grad_norm": 0.8154630064964294, "learning_rate": 3.4927270442315097e-06, "loss": 0.0705, "step": 62450 }, { "epoch": 1.8471638966108712, "grad_norm": 0.8691762089729309, "learning_rate": 3.49260035431257e-06, "loss": 0.0727, "step": 62460 }, { "epoch": 1.8474596321050454, "grad_norm": 1.0167492628097534, "learning_rate": 3.492473664393631e-06, "loss": 0.0853, "step": 62470 }, { "epoch": 1.8477553675992193, "grad_norm": 1.0289472341537476, "learning_rate": 3.492346974474691e-06, "loss": 0.0806, "step": 62480 }, { "epoch": 1.8480511030933933, "grad_norm": 1.0510730743408203, "learning_rate": 3.492220284555752e-06, "loss": 0.0855, "step": 62490 }, { "epoch": 1.8483468385875672, "grad_norm": 0.7141270637512207, "learning_rate": 3.4920935946368123e-06, "loss": 0.0694, "step": 62500 }, { "epoch": 1.8486425740817412, "grad_norm": 0.9894477725028992, "learning_rate": 3.491966904717873e-06, "loss": 0.0705, "step": 62510 }, { "epoch": 1.8489383095759153, "grad_norm": 0.6870266795158386, "learning_rate": 3.491840214798934e-06, "loss": 0.0803, "step": 62520 }, { "epoch": 1.8492340450700893, "grad_norm": 0.8597265481948853, "learning_rate": 3.4917135248799947e-06, "loss": 0.0919, "step": 62530 }, { "epoch": 1.8495297805642634, "grad_norm": 0.7257919311523438, "learning_rate": 3.491586834961055e-06, "loss": 0.0862, "step": 62540 }, { "epoch": 1.8498255160584374, "grad_norm": 1.0100117921829224, "learning_rate": 3.491460145042116e-06, "loss": 0.0805, "step": 62550 }, { "epoch": 1.8501212515526113, "grad_norm": 0.9511421322822571, "learning_rate": 3.4913334551231762e-06, "loss": 0.0741, "step": 62560 }, { "epoch": 1.8504169870467853, "grad_norm": 1.2378973960876465, "learning_rate": 3.491206765204237e-06, "loss": 0.0854, "step": 62570 }, { "epoch": 1.8507127225409594, "grad_norm": 0.7060052156448364, "learning_rate": 3.4910800752852974e-06, "loss": 0.096, "step": 62580 }, { "epoch": 1.8510084580351334, "grad_norm": 0.6123114824295044, "learning_rate": 3.4909533853663577e-06, "loss": 0.0972, "step": 62590 }, { "epoch": 1.8513041935293075, "grad_norm": 0.8740389943122864, "learning_rate": 3.490826695447419e-06, "loss": 0.0916, "step": 62600 }, { "epoch": 1.8515999290234815, "grad_norm": 1.115608811378479, "learning_rate": 3.4907000055284793e-06, "loss": 0.0884, "step": 62610 }, { "epoch": 1.8518956645176554, "grad_norm": 0.7736483216285706, "learning_rate": 3.49057331560954e-06, "loss": 0.0773, "step": 62620 }, { "epoch": 1.8521914000118294, "grad_norm": 0.8505054116249084, "learning_rate": 3.4904466256906005e-06, "loss": 0.0848, "step": 62630 }, { "epoch": 1.8524871355060033, "grad_norm": 0.7028942108154297, "learning_rate": 3.4903199357716613e-06, "loss": 0.0954, "step": 62640 }, { "epoch": 1.8527828710001775, "grad_norm": 1.224019169807434, "learning_rate": 3.4901932458527216e-06, "loss": 0.0909, "step": 62650 }, { "epoch": 1.8530786064943514, "grad_norm": 1.1274733543395996, "learning_rate": 3.4900665559337824e-06, "loss": 0.0777, "step": 62660 }, { "epoch": 1.8533743419885256, "grad_norm": 0.8948617577552795, "learning_rate": 3.489939866014843e-06, "loss": 0.0906, "step": 62670 }, { "epoch": 1.8536700774826995, "grad_norm": 1.1708126068115234, "learning_rate": 3.489813176095904e-06, "loss": 0.0787, "step": 62680 }, { "epoch": 1.8539658129768735, "grad_norm": 0.8144914507865906, "learning_rate": 3.4896864861769644e-06, "loss": 0.0946, "step": 62690 }, { "epoch": 1.8542615484710474, "grad_norm": 1.2202293872833252, "learning_rate": 3.489559796258025e-06, "loss": 0.0791, "step": 62700 }, { "epoch": 1.8545572839652213, "grad_norm": 0.2508613169193268, "learning_rate": 3.4894331063390855e-06, "loss": 0.0701, "step": 62710 }, { "epoch": 1.8548530194593955, "grad_norm": 0.795924961566925, "learning_rate": 3.4893064164201463e-06, "loss": 0.0802, "step": 62720 }, { "epoch": 1.8551487549535697, "grad_norm": 1.1258633136749268, "learning_rate": 3.4891797265012067e-06, "loss": 0.1067, "step": 62730 }, { "epoch": 1.8554444904477436, "grad_norm": 0.6093615293502808, "learning_rate": 3.4890530365822675e-06, "loss": 0.0857, "step": 62740 }, { "epoch": 1.8557402259419176, "grad_norm": 0.8517267107963562, "learning_rate": 3.488926346663328e-06, "loss": 0.0833, "step": 62750 }, { "epoch": 1.8560359614360915, "grad_norm": 0.8707848191261292, "learning_rate": 3.488799656744389e-06, "loss": 0.0715, "step": 62760 }, { "epoch": 1.8563316969302655, "grad_norm": 0.9660927653312683, "learning_rate": 3.4886729668254494e-06, "loss": 0.0747, "step": 62770 }, { "epoch": 1.8566274324244396, "grad_norm": 0.6321187615394592, "learning_rate": 3.48854627690651e-06, "loss": 0.0903, "step": 62780 }, { "epoch": 1.8569231679186136, "grad_norm": 0.8095279335975647, "learning_rate": 3.4884195869875706e-06, "loss": 0.0937, "step": 62790 }, { "epoch": 1.8572189034127877, "grad_norm": 1.0643985271453857, "learning_rate": 3.4882928970686314e-06, "loss": 0.0723, "step": 62800 }, { "epoch": 1.8575146389069617, "grad_norm": 0.7076213955879211, "learning_rate": 3.4881662071496917e-06, "loss": 0.0719, "step": 62810 }, { "epoch": 1.8578103744011356, "grad_norm": 1.9095250368118286, "learning_rate": 3.4880395172307525e-06, "loss": 0.0773, "step": 62820 }, { "epoch": 1.8581061098953096, "grad_norm": 0.733745813369751, "learning_rate": 3.487912827311813e-06, "loss": 0.0742, "step": 62830 }, { "epoch": 1.8584018453894835, "grad_norm": 2.6040115356445312, "learning_rate": 3.487786137392874e-06, "loss": 0.0784, "step": 62840 }, { "epoch": 1.8586975808836577, "grad_norm": 0.5225286483764648, "learning_rate": 3.4876594474739345e-06, "loss": 0.0718, "step": 62850 }, { "epoch": 1.8589933163778318, "grad_norm": 0.6221834421157837, "learning_rate": 3.4875327575549952e-06, "loss": 0.0638, "step": 62860 }, { "epoch": 1.8592890518720058, "grad_norm": 0.990483283996582, "learning_rate": 3.4874060676360556e-06, "loss": 0.0893, "step": 62870 }, { "epoch": 1.8595847873661797, "grad_norm": 1.2917087078094482, "learning_rate": 3.4872793777171164e-06, "loss": 0.0996, "step": 62880 }, { "epoch": 1.8598805228603537, "grad_norm": 0.907316267490387, "learning_rate": 3.4871526877981768e-06, "loss": 0.08, "step": 62890 }, { "epoch": 1.8601762583545276, "grad_norm": 0.563555121421814, "learning_rate": 3.4870259978792376e-06, "loss": 0.0756, "step": 62900 }, { "epoch": 1.8604719938487018, "grad_norm": 0.9471719861030579, "learning_rate": 3.486899307960298e-06, "loss": 0.0643, "step": 62910 }, { "epoch": 1.8607677293428757, "grad_norm": 1.2111995220184326, "learning_rate": 3.486772618041359e-06, "loss": 0.0993, "step": 62920 }, { "epoch": 1.8610634648370499, "grad_norm": 0.8302919864654541, "learning_rate": 3.4866459281224195e-06, "loss": 0.1089, "step": 62930 }, { "epoch": 1.8613592003312238, "grad_norm": 0.7558932304382324, "learning_rate": 3.4865192382034803e-06, "loss": 0.0856, "step": 62940 }, { "epoch": 1.8616549358253978, "grad_norm": 0.8889937400817871, "learning_rate": 3.4863925482845407e-06, "loss": 0.0891, "step": 62950 }, { "epoch": 1.8619506713195717, "grad_norm": 0.6464913487434387, "learning_rate": 3.4862658583656014e-06, "loss": 0.0655, "step": 62960 }, { "epoch": 1.8622464068137456, "grad_norm": 1.221399188041687, "learning_rate": 3.486139168446662e-06, "loss": 0.0778, "step": 62970 }, { "epoch": 1.8625421423079198, "grad_norm": 0.9098455905914307, "learning_rate": 3.4860124785277226e-06, "loss": 0.0949, "step": 62980 }, { "epoch": 1.8628378778020938, "grad_norm": 0.6430556178092957, "learning_rate": 3.485885788608783e-06, "loss": 0.0907, "step": 62990 }, { "epoch": 1.863133613296268, "grad_norm": 0.8700863122940063, "learning_rate": 3.4857590986898433e-06, "loss": 0.0856, "step": 63000 }, { "epoch": 1.8634293487904419, "grad_norm": 1.000393033027649, "learning_rate": 3.4856324087709045e-06, "loss": 0.0722, "step": 63010 }, { "epoch": 1.8637250842846158, "grad_norm": 0.5525712966918945, "learning_rate": 3.485505718851965e-06, "loss": 0.0776, "step": 63020 }, { "epoch": 1.8640208197787898, "grad_norm": 0.8325001001358032, "learning_rate": 3.4853790289330257e-06, "loss": 0.095, "step": 63030 }, { "epoch": 1.864316555272964, "grad_norm": 0.8282055258750916, "learning_rate": 3.485252339014086e-06, "loss": 0.0947, "step": 63040 }, { "epoch": 1.8646122907671379, "grad_norm": 1.2596880197525024, "learning_rate": 3.485125649095147e-06, "loss": 0.0919, "step": 63050 }, { "epoch": 1.864908026261312, "grad_norm": 0.3658826947212219, "learning_rate": 3.4849989591762072e-06, "loss": 0.0681, "step": 63060 }, { "epoch": 1.865203761755486, "grad_norm": 0.9150882363319397, "learning_rate": 3.484872269257268e-06, "loss": 0.0768, "step": 63070 }, { "epoch": 1.86549949724966, "grad_norm": 0.7904502153396606, "learning_rate": 3.4847455793383284e-06, "loss": 0.0808, "step": 63080 }, { "epoch": 1.8657952327438339, "grad_norm": 1.2144863605499268, "learning_rate": 3.4846188894193896e-06, "loss": 0.0859, "step": 63090 }, { "epoch": 1.8660909682380078, "grad_norm": 0.9159339666366577, "learning_rate": 3.48449219950045e-06, "loss": 0.0889, "step": 63100 }, { "epoch": 1.866386703732182, "grad_norm": 0.6196478605270386, "learning_rate": 3.4843655095815107e-06, "loss": 0.0823, "step": 63110 }, { "epoch": 1.866682439226356, "grad_norm": 0.9118410348892212, "learning_rate": 3.484238819662571e-06, "loss": 0.0799, "step": 63120 }, { "epoch": 1.86697817472053, "grad_norm": 0.7471369504928589, "learning_rate": 3.484112129743632e-06, "loss": 0.0911, "step": 63130 }, { "epoch": 1.867273910214704, "grad_norm": 0.7641991376876831, "learning_rate": 3.4839854398246923e-06, "loss": 0.0986, "step": 63140 }, { "epoch": 1.867569645708878, "grad_norm": 0.6159576177597046, "learning_rate": 3.483858749905753e-06, "loss": 0.0888, "step": 63150 }, { "epoch": 1.867865381203052, "grad_norm": 1.176276445388794, "learning_rate": 3.4837320599868134e-06, "loss": 0.0823, "step": 63160 }, { "epoch": 1.8681611166972258, "grad_norm": 0.5261609554290771, "learning_rate": 3.4836053700678746e-06, "loss": 0.0884, "step": 63170 }, { "epoch": 1.8684568521914, "grad_norm": 0.7613547444343567, "learning_rate": 3.483478680148935e-06, "loss": 0.0947, "step": 63180 }, { "epoch": 1.8687525876855742, "grad_norm": 0.7866893410682678, "learning_rate": 3.483351990229996e-06, "loss": 0.0909, "step": 63190 }, { "epoch": 1.8690483231797481, "grad_norm": 0.9153842329978943, "learning_rate": 3.483225300311056e-06, "loss": 0.073, "step": 63200 }, { "epoch": 1.869344058673922, "grad_norm": 0.7098385691642761, "learning_rate": 3.483098610392117e-06, "loss": 0.0846, "step": 63210 }, { "epoch": 1.869639794168096, "grad_norm": 2.4671452045440674, "learning_rate": 3.4829719204731773e-06, "loss": 0.0859, "step": 63220 }, { "epoch": 1.86993552966227, "grad_norm": 0.7693500518798828, "learning_rate": 3.482845230554238e-06, "loss": 0.0966, "step": 63230 }, { "epoch": 1.8702312651564441, "grad_norm": 0.8544062376022339, "learning_rate": 3.4827185406352985e-06, "loss": 0.0972, "step": 63240 }, { "epoch": 1.870527000650618, "grad_norm": 0.7601969242095947, "learning_rate": 3.4825918507163597e-06, "loss": 0.0773, "step": 63250 }, { "epoch": 1.8708227361447922, "grad_norm": 0.4803164303302765, "learning_rate": 3.48246516079742e-06, "loss": 0.0543, "step": 63260 }, { "epoch": 1.8711184716389662, "grad_norm": 2.783784866333008, "learning_rate": 3.482338470878481e-06, "loss": 0.0826, "step": 63270 }, { "epoch": 1.87141420713314, "grad_norm": 0.784274697303772, "learning_rate": 3.482211780959541e-06, "loss": 0.0886, "step": 63280 }, { "epoch": 1.871709942627314, "grad_norm": 0.4844251275062561, "learning_rate": 3.482085091040602e-06, "loss": 0.0932, "step": 63290 }, { "epoch": 1.872005678121488, "grad_norm": 0.9405816197395325, "learning_rate": 3.4819584011216624e-06, "loss": 0.0796, "step": 63300 }, { "epoch": 1.8723014136156622, "grad_norm": 0.6488059759140015, "learning_rate": 3.481831711202723e-06, "loss": 0.0685, "step": 63310 }, { "epoch": 1.8725971491098363, "grad_norm": 0.5021411180496216, "learning_rate": 3.4817050212837835e-06, "loss": 0.086, "step": 63320 }, { "epoch": 1.8728928846040103, "grad_norm": 0.9486604928970337, "learning_rate": 3.4815783313648447e-06, "loss": 0.0989, "step": 63330 }, { "epoch": 1.8731886200981842, "grad_norm": 1.078603982925415, "learning_rate": 3.481451641445905e-06, "loss": 0.0803, "step": 63340 }, { "epoch": 1.8734843555923582, "grad_norm": 0.8469807505607605, "learning_rate": 3.481324951526966e-06, "loss": 0.0592, "step": 63350 }, { "epoch": 1.873780091086532, "grad_norm": 0.5681924223899841, "learning_rate": 3.4811982616080263e-06, "loss": 0.0692, "step": 63360 }, { "epoch": 1.8740758265807063, "grad_norm": 1.3642908334732056, "learning_rate": 3.481071571689087e-06, "loss": 0.0808, "step": 63370 }, { "epoch": 1.8743715620748802, "grad_norm": 0.5781458616256714, "learning_rate": 3.4809448817701474e-06, "loss": 0.0723, "step": 63380 }, { "epoch": 1.8746672975690544, "grad_norm": 0.7135138511657715, "learning_rate": 3.480818191851208e-06, "loss": 0.082, "step": 63390 }, { "epoch": 1.8749630330632283, "grad_norm": 0.37802645564079285, "learning_rate": 3.4806915019322686e-06, "loss": 0.0786, "step": 63400 }, { "epoch": 1.8752587685574023, "grad_norm": 0.920241117477417, "learning_rate": 3.4805648120133294e-06, "loss": 0.0865, "step": 63410 }, { "epoch": 1.8755545040515762, "grad_norm": 0.9285162091255188, "learning_rate": 3.48043812209439e-06, "loss": 0.0745, "step": 63420 }, { "epoch": 1.8758502395457501, "grad_norm": 1.3065129518508911, "learning_rate": 3.4803114321754505e-06, "loss": 0.089, "step": 63430 }, { "epoch": 1.8761459750399243, "grad_norm": 1.0144522190093994, "learning_rate": 3.4801847422565113e-06, "loss": 0.0993, "step": 63440 }, { "epoch": 1.8764417105340983, "grad_norm": 1.2811731100082397, "learning_rate": 3.4800580523375717e-06, "loss": 0.0911, "step": 63450 }, { "epoch": 1.8767374460282724, "grad_norm": 1.22624671459198, "learning_rate": 3.4799313624186325e-06, "loss": 0.0724, "step": 63460 }, { "epoch": 1.8770331815224464, "grad_norm": 0.6126525402069092, "learning_rate": 3.479804672499693e-06, "loss": 0.067, "step": 63470 }, { "epoch": 1.8773289170166203, "grad_norm": 0.8277590870857239, "learning_rate": 3.4796779825807536e-06, "loss": 0.0829, "step": 63480 }, { "epoch": 1.8776246525107942, "grad_norm": 1.116026759147644, "learning_rate": 3.4795512926618144e-06, "loss": 0.1003, "step": 63490 }, { "epoch": 1.8779203880049684, "grad_norm": 0.7128158807754517, "learning_rate": 3.479424602742875e-06, "loss": 0.0743, "step": 63500 }, { "epoch": 1.8782161234991424, "grad_norm": 1.065117359161377, "learning_rate": 3.4792979128239356e-06, "loss": 0.0839, "step": 63510 }, { "epoch": 1.8785118589933165, "grad_norm": 0.7430490851402283, "learning_rate": 3.4791712229049963e-06, "loss": 0.0829, "step": 63520 }, { "epoch": 1.8788075944874905, "grad_norm": 1.4942706823349, "learning_rate": 3.4790445329860567e-06, "loss": 0.074, "step": 63530 }, { "epoch": 1.8791033299816644, "grad_norm": 0.8310760855674744, "learning_rate": 3.4789178430671175e-06, "loss": 0.0738, "step": 63540 }, { "epoch": 1.8793990654758383, "grad_norm": 0.6233971118927002, "learning_rate": 3.478791153148178e-06, "loss": 0.0683, "step": 63550 }, { "epoch": 1.8796948009700123, "grad_norm": 0.7429624199867249, "learning_rate": 3.4786644632292387e-06, "loss": 0.0742, "step": 63560 }, { "epoch": 1.8799905364641865, "grad_norm": 0.8852857351303101, "learning_rate": 3.4785377733102994e-06, "loss": 0.0865, "step": 63570 }, { "epoch": 1.8802862719583604, "grad_norm": 0.9196518063545227, "learning_rate": 3.4784110833913602e-06, "loss": 0.1047, "step": 63580 }, { "epoch": 1.8805820074525346, "grad_norm": 0.5864231586456299, "learning_rate": 3.4782843934724206e-06, "loss": 0.0788, "step": 63590 }, { "epoch": 1.8808777429467085, "grad_norm": 1.047413945198059, "learning_rate": 3.4781577035534814e-06, "loss": 0.0863, "step": 63600 }, { "epoch": 1.8811734784408825, "grad_norm": 1.1196659803390503, "learning_rate": 3.4780310136345418e-06, "loss": 0.079, "step": 63610 }, { "epoch": 1.8814692139350564, "grad_norm": 1.0799161195755005, "learning_rate": 3.4779043237156025e-06, "loss": 0.0928, "step": 63620 }, { "epoch": 1.8817649494292303, "grad_norm": 1.0492665767669678, "learning_rate": 3.477777633796663e-06, "loss": 0.1035, "step": 63630 }, { "epoch": 1.8820606849234045, "grad_norm": 1.3033348321914673, "learning_rate": 3.4776509438777237e-06, "loss": 0.0863, "step": 63640 }, { "epoch": 1.8823564204175787, "grad_norm": 0.6920613050460815, "learning_rate": 3.4775242539587845e-06, "loss": 0.083, "step": 63650 }, { "epoch": 1.8826521559117526, "grad_norm": 0.7031392455101013, "learning_rate": 3.4773975640398453e-06, "loss": 0.0693, "step": 63660 }, { "epoch": 1.8829478914059266, "grad_norm": 0.9933557510375977, "learning_rate": 3.4772708741209056e-06, "loss": 0.0948, "step": 63670 }, { "epoch": 1.8832436269001005, "grad_norm": 0.898125946521759, "learning_rate": 3.4771441842019664e-06, "loss": 0.1112, "step": 63680 }, { "epoch": 1.8835393623942744, "grad_norm": 0.9352166056632996, "learning_rate": 3.477017494283027e-06, "loss": 0.0898, "step": 63690 }, { "epoch": 1.8838350978884486, "grad_norm": 0.7080094218254089, "learning_rate": 3.4768908043640876e-06, "loss": 0.0738, "step": 63700 }, { "epoch": 1.8841308333826225, "grad_norm": 0.7767011523246765, "learning_rate": 3.476764114445148e-06, "loss": 0.0545, "step": 63710 }, { "epoch": 1.8844265688767967, "grad_norm": 0.5941631197929382, "learning_rate": 3.4766374245262087e-06, "loss": 0.0786, "step": 63720 }, { "epoch": 1.8847223043709707, "grad_norm": 0.9102201461791992, "learning_rate": 3.4765107346072695e-06, "loss": 0.0813, "step": 63730 }, { "epoch": 1.8850180398651446, "grad_norm": 0.8613954782485962, "learning_rate": 3.4763840446883303e-06, "loss": 0.0679, "step": 63740 }, { "epoch": 1.8853137753593185, "grad_norm": 0.6253947019577026, "learning_rate": 3.4762573547693907e-06, "loss": 0.0621, "step": 63750 }, { "epoch": 1.8856095108534925, "grad_norm": 1.1291390657424927, "learning_rate": 3.4761306648504515e-06, "loss": 0.0686, "step": 63760 }, { "epoch": 1.8859052463476667, "grad_norm": 0.7177234292030334, "learning_rate": 3.476003974931512e-06, "loss": 0.0824, "step": 63770 }, { "epoch": 1.8862009818418408, "grad_norm": 0.9341846704483032, "learning_rate": 3.4758772850125726e-06, "loss": 0.0746, "step": 63780 }, { "epoch": 1.8864967173360148, "grad_norm": 1.852984070777893, "learning_rate": 3.475750595093633e-06, "loss": 0.0872, "step": 63790 }, { "epoch": 1.8867924528301887, "grad_norm": 0.4814658761024475, "learning_rate": 3.475623905174694e-06, "loss": 0.0791, "step": 63800 }, { "epoch": 1.8870881883243626, "grad_norm": 0.4539101719856262, "learning_rate": 3.4754972152557546e-06, "loss": 0.0756, "step": 63810 }, { "epoch": 1.8873839238185366, "grad_norm": 1.090777039527893, "learning_rate": 3.475370525336815e-06, "loss": 0.0838, "step": 63820 }, { "epoch": 1.8876796593127108, "grad_norm": 1.1892732381820679, "learning_rate": 3.4752438354178757e-06, "loss": 0.0807, "step": 63830 }, { "epoch": 1.8879753948068847, "grad_norm": 0.795512318611145, "learning_rate": 3.475117145498936e-06, "loss": 0.0678, "step": 63840 }, { "epoch": 1.8882711303010589, "grad_norm": 0.39678987860679626, "learning_rate": 3.474990455579997e-06, "loss": 0.0851, "step": 63850 }, { "epoch": 1.8885668657952328, "grad_norm": 1.1362789869308472, "learning_rate": 3.4748637656610573e-06, "loss": 0.082, "step": 63860 }, { "epoch": 1.8888626012894068, "grad_norm": 0.769433319568634, "learning_rate": 3.474737075742118e-06, "loss": 0.0655, "step": 63870 }, { "epoch": 1.8891583367835807, "grad_norm": 0.5452122092247009, "learning_rate": 3.4746103858231784e-06, "loss": 0.077, "step": 63880 }, { "epoch": 1.8894540722777546, "grad_norm": 0.8191555142402649, "learning_rate": 3.4744836959042396e-06, "loss": 0.0956, "step": 63890 }, { "epoch": 1.8897498077719288, "grad_norm": 0.4125296175479889, "learning_rate": 3.4743570059853e-06, "loss": 0.0638, "step": 63900 }, { "epoch": 1.8900455432661027, "grad_norm": 0.6350603103637695, "learning_rate": 3.4742303160663608e-06, "loss": 0.0718, "step": 63910 }, { "epoch": 1.890341278760277, "grad_norm": 0.830072283744812, "learning_rate": 3.474103626147421e-06, "loss": 0.0834, "step": 63920 }, { "epoch": 1.8906370142544509, "grad_norm": 5.453653335571289, "learning_rate": 3.473976936228482e-06, "loss": 0.1043, "step": 63930 }, { "epoch": 1.8909327497486248, "grad_norm": 0.5593686699867249, "learning_rate": 3.4738502463095423e-06, "loss": 0.0857, "step": 63940 }, { "epoch": 1.8912284852427987, "grad_norm": 1.001253366470337, "learning_rate": 3.473723556390603e-06, "loss": 0.096, "step": 63950 }, { "epoch": 1.891524220736973, "grad_norm": 0.7987959384918213, "learning_rate": 3.4735968664716635e-06, "loss": 0.0659, "step": 63960 }, { "epoch": 1.8918199562311468, "grad_norm": 1.0653135776519775, "learning_rate": 3.4734701765527247e-06, "loss": 0.0774, "step": 63970 }, { "epoch": 1.892115691725321, "grad_norm": 0.35287269949913025, "learning_rate": 3.473343486633785e-06, "loss": 0.0713, "step": 63980 }, { "epoch": 1.892411427219495, "grad_norm": 1.089613437652588, "learning_rate": 3.473216796714846e-06, "loss": 0.086, "step": 63990 }, { "epoch": 1.892707162713669, "grad_norm": 1.248244285583496, "learning_rate": 3.473090106795906e-06, "loss": 0.073, "step": 64000 }, { "epoch": 1.8930028982078428, "grad_norm": 0.6787700653076172, "learning_rate": 3.472963416876967e-06, "loss": 0.0843, "step": 64010 }, { "epoch": 1.8932986337020168, "grad_norm": 0.8024337887763977, "learning_rate": 3.4728367269580273e-06, "loss": 0.084, "step": 64020 }, { "epoch": 1.893594369196191, "grad_norm": 0.8108032941818237, "learning_rate": 3.472710037039088e-06, "loss": 0.0831, "step": 64030 }, { "epoch": 1.893890104690365, "grad_norm": 0.6951099038124084, "learning_rate": 3.4725833471201485e-06, "loss": 0.085, "step": 64040 }, { "epoch": 1.894185840184539, "grad_norm": 0.6493582129478455, "learning_rate": 3.4724566572012097e-06, "loss": 0.0644, "step": 64050 }, { "epoch": 1.894481575678713, "grad_norm": 0.6732364892959595, "learning_rate": 3.47232996728227e-06, "loss": 0.062, "step": 64060 }, { "epoch": 1.894777311172887, "grad_norm": 0.8140398263931274, "learning_rate": 3.472203277363331e-06, "loss": 0.074, "step": 64070 }, { "epoch": 1.895073046667061, "grad_norm": 0.6346666216850281, "learning_rate": 3.4720765874443912e-06, "loss": 0.0933, "step": 64080 }, { "epoch": 1.8953687821612348, "grad_norm": 0.5451135039329529, "learning_rate": 3.471949897525452e-06, "loss": 0.0894, "step": 64090 }, { "epoch": 1.895664517655409, "grad_norm": 0.5363314747810364, "learning_rate": 3.4718232076065124e-06, "loss": 0.0776, "step": 64100 }, { "epoch": 1.8959602531495832, "grad_norm": 0.6442518830299377, "learning_rate": 3.471696517687573e-06, "loss": 0.0749, "step": 64110 }, { "epoch": 1.896255988643757, "grad_norm": 0.8621118068695068, "learning_rate": 3.4715698277686335e-06, "loss": 0.0894, "step": 64120 }, { "epoch": 1.896551724137931, "grad_norm": 0.9183483123779297, "learning_rate": 3.4714431378496948e-06, "loss": 0.0752, "step": 64130 }, { "epoch": 1.896847459632105, "grad_norm": 0.5535963773727417, "learning_rate": 3.471316447930755e-06, "loss": 0.0839, "step": 64140 }, { "epoch": 1.897143195126279, "grad_norm": 0.9291518330574036, "learning_rate": 3.471189758011816e-06, "loss": 0.0728, "step": 64150 }, { "epoch": 1.897438930620453, "grad_norm": 0.5688595771789551, "learning_rate": 3.4710630680928763e-06, "loss": 0.0793, "step": 64160 }, { "epoch": 1.897734666114627, "grad_norm": 1.140177845954895, "learning_rate": 3.470936378173937e-06, "loss": 0.07, "step": 64170 }, { "epoch": 1.8980304016088012, "grad_norm": 0.848141610622406, "learning_rate": 3.4708096882549974e-06, "loss": 0.0756, "step": 64180 }, { "epoch": 1.8983261371029752, "grad_norm": 0.576752781867981, "learning_rate": 3.4706829983360582e-06, "loss": 0.0844, "step": 64190 }, { "epoch": 1.898621872597149, "grad_norm": 1.1115370988845825, "learning_rate": 3.4705563084171186e-06, "loss": 0.0836, "step": 64200 }, { "epoch": 1.898917608091323, "grad_norm": 0.914603054523468, "learning_rate": 3.47042961849818e-06, "loss": 0.0923, "step": 64210 }, { "epoch": 1.899213343585497, "grad_norm": 1.3104814291000366, "learning_rate": 3.47030292857924e-06, "loss": 0.0833, "step": 64220 }, { "epoch": 1.8995090790796711, "grad_norm": 0.8454039096832275, "learning_rate": 3.470176238660301e-06, "loss": 0.1027, "step": 64230 }, { "epoch": 1.8998048145738453, "grad_norm": 1.0940355062484741, "learning_rate": 3.4700495487413613e-06, "loss": 0.0935, "step": 64240 }, { "epoch": 1.9001005500680193, "grad_norm": 0.5006803274154663, "learning_rate": 3.4699228588224217e-06, "loss": 0.0696, "step": 64250 }, { "epoch": 1.9003962855621932, "grad_norm": 0.6958169937133789, "learning_rate": 3.4697961689034825e-06, "loss": 0.0681, "step": 64260 }, { "epoch": 1.9006920210563671, "grad_norm": 0.751767098903656, "learning_rate": 3.469669478984543e-06, "loss": 0.0852, "step": 64270 }, { "epoch": 1.900987756550541, "grad_norm": 0.6479078531265259, "learning_rate": 3.4695427890656036e-06, "loss": 0.0898, "step": 64280 }, { "epoch": 1.9012834920447153, "grad_norm": 0.6922821998596191, "learning_rate": 3.4694160991466644e-06, "loss": 0.0892, "step": 64290 }, { "epoch": 1.9015792275388892, "grad_norm": 0.8399052023887634, "learning_rate": 3.4692894092277252e-06, "loss": 0.0958, "step": 64300 }, { "epoch": 1.9018749630330634, "grad_norm": 0.7693126201629639, "learning_rate": 3.4691627193087856e-06, "loss": 0.0799, "step": 64310 }, { "epoch": 1.9021706985272373, "grad_norm": 0.666706383228302, "learning_rate": 3.4690360293898464e-06, "loss": 0.0809, "step": 64320 }, { "epoch": 1.9024664340214112, "grad_norm": 0.8065574765205383, "learning_rate": 3.4689093394709067e-06, "loss": 0.0871, "step": 64330 }, { "epoch": 1.9027621695155852, "grad_norm": 0.88962721824646, "learning_rate": 3.4687826495519675e-06, "loss": 0.1021, "step": 64340 }, { "epoch": 1.9030579050097591, "grad_norm": 1.0227410793304443, "learning_rate": 3.468655959633028e-06, "loss": 0.0886, "step": 64350 }, { "epoch": 1.9033536405039333, "grad_norm": 0.6919842958450317, "learning_rate": 3.4685292697140887e-06, "loss": 0.0872, "step": 64360 }, { "epoch": 1.9036493759981072, "grad_norm": 0.8613666296005249, "learning_rate": 3.4684025797951495e-06, "loss": 0.0963, "step": 64370 }, { "epoch": 1.9039451114922814, "grad_norm": 0.7885882258415222, "learning_rate": 3.4682758898762103e-06, "loss": 0.0761, "step": 64380 }, { "epoch": 1.9042408469864553, "grad_norm": 1.093406081199646, "learning_rate": 3.4681491999572706e-06, "loss": 0.0808, "step": 64390 }, { "epoch": 1.9045365824806293, "grad_norm": 0.7942336797714233, "learning_rate": 3.4680225100383314e-06, "loss": 0.0788, "step": 64400 }, { "epoch": 1.9048323179748032, "grad_norm": 0.9470803141593933, "learning_rate": 3.4678958201193918e-06, "loss": 0.0753, "step": 64410 }, { "epoch": 1.9051280534689774, "grad_norm": 1.0250561237335205, "learning_rate": 3.4677691302004526e-06, "loss": 0.0954, "step": 64420 }, { "epoch": 1.9054237889631513, "grad_norm": 8.244359016418457, "learning_rate": 3.467642440281513e-06, "loss": 0.0906, "step": 64430 }, { "epoch": 1.9057195244573255, "grad_norm": 0.8879435658454895, "learning_rate": 3.4675157503625737e-06, "loss": 0.1083, "step": 64440 }, { "epoch": 1.9060152599514995, "grad_norm": 0.6273727416992188, "learning_rate": 3.4673890604436345e-06, "loss": 0.081, "step": 64450 }, { "epoch": 1.9063109954456734, "grad_norm": 0.767368733882904, "learning_rate": 3.4672623705246953e-06, "loss": 0.0853, "step": 64460 }, { "epoch": 1.9066067309398473, "grad_norm": 1.1050527095794678, "learning_rate": 3.4671356806057557e-06, "loss": 0.0973, "step": 64470 }, { "epoch": 1.9069024664340213, "grad_norm": 0.6004257798194885, "learning_rate": 3.4670089906868165e-06, "loss": 0.0864, "step": 64480 }, { "epoch": 1.9071982019281954, "grad_norm": 1.0948795080184937, "learning_rate": 3.466882300767877e-06, "loss": 0.1028, "step": 64490 }, { "epoch": 1.9074939374223694, "grad_norm": 0.604177713394165, "learning_rate": 3.4667556108489376e-06, "loss": 0.0759, "step": 64500 }, { "epoch": 1.9077896729165436, "grad_norm": 0.6515494585037231, "learning_rate": 3.466628920929998e-06, "loss": 0.0596, "step": 64510 }, { "epoch": 1.9080854084107175, "grad_norm": 1.0222463607788086, "learning_rate": 3.4665022310110588e-06, "loss": 0.0843, "step": 64520 }, { "epoch": 1.9083811439048914, "grad_norm": 0.758133590221405, "learning_rate": 3.4663755410921196e-06, "loss": 0.0918, "step": 64530 }, { "epoch": 1.9086768793990654, "grad_norm": 0.6915281414985657, "learning_rate": 3.4662488511731804e-06, "loss": 0.1013, "step": 64540 }, { "epoch": 1.9089726148932393, "grad_norm": 0.6512230038642883, "learning_rate": 3.4661221612542407e-06, "loss": 0.0739, "step": 64550 }, { "epoch": 1.9092683503874135, "grad_norm": 0.6437195539474487, "learning_rate": 3.4659954713353015e-06, "loss": 0.0679, "step": 64560 }, { "epoch": 1.9095640858815877, "grad_norm": 0.6763375401496887, "learning_rate": 3.465868781416362e-06, "loss": 0.0802, "step": 64570 }, { "epoch": 1.9098598213757616, "grad_norm": 0.9804810881614685, "learning_rate": 3.4657420914974227e-06, "loss": 0.0726, "step": 64580 }, { "epoch": 1.9101555568699355, "grad_norm": 1.1524938344955444, "learning_rate": 3.465615401578483e-06, "loss": 0.095, "step": 64590 }, { "epoch": 1.9104512923641095, "grad_norm": 0.6515979170799255, "learning_rate": 3.465488711659544e-06, "loss": 0.0667, "step": 64600 }, { "epoch": 1.9107470278582834, "grad_norm": 0.586739182472229, "learning_rate": 3.4653620217406046e-06, "loss": 0.0653, "step": 64610 }, { "epoch": 1.9110427633524576, "grad_norm": 0.7343229055404663, "learning_rate": 3.4652353318216654e-06, "loss": 0.0845, "step": 64620 }, { "epoch": 1.9113384988466315, "grad_norm": 0.8874489665031433, "learning_rate": 3.4651086419027258e-06, "loss": 0.0856, "step": 64630 }, { "epoch": 1.9116342343408057, "grad_norm": 0.9253434538841248, "learning_rate": 3.4649819519837866e-06, "loss": 0.0904, "step": 64640 }, { "epoch": 1.9119299698349796, "grad_norm": 1.122246265411377, "learning_rate": 3.464855262064847e-06, "loss": 0.0677, "step": 64650 }, { "epoch": 1.9122257053291536, "grad_norm": 0.6271930932998657, "learning_rate": 3.4647285721459073e-06, "loss": 0.0656, "step": 64660 }, { "epoch": 1.9125214408233275, "grad_norm": 0.7722951769828796, "learning_rate": 3.464601882226968e-06, "loss": 0.087, "step": 64670 }, { "epoch": 1.9128171763175015, "grad_norm": 0.6620450019836426, "learning_rate": 3.4644751923080284e-06, "loss": 0.0961, "step": 64680 }, { "epoch": 1.9131129118116756, "grad_norm": 0.7339807748794556, "learning_rate": 3.4643485023890897e-06, "loss": 0.0693, "step": 64690 }, { "epoch": 1.9134086473058498, "grad_norm": 0.7566631436347961, "learning_rate": 3.46422181247015e-06, "loss": 0.0702, "step": 64700 }, { "epoch": 1.9137043828000238, "grad_norm": 0.4259566068649292, "learning_rate": 3.464095122551211e-06, "loss": 0.0568, "step": 64710 }, { "epoch": 1.9140001182941977, "grad_norm": 1.4549800157546997, "learning_rate": 3.463968432632271e-06, "loss": 0.0775, "step": 64720 }, { "epoch": 1.9142958537883716, "grad_norm": 0.7734715342521667, "learning_rate": 3.463841742713332e-06, "loss": 0.0993, "step": 64730 }, { "epoch": 1.9145915892825456, "grad_norm": 0.7084649801254272, "learning_rate": 3.4637150527943923e-06, "loss": 0.1133, "step": 64740 }, { "epoch": 1.9148873247767197, "grad_norm": 0.6622551083564758, "learning_rate": 3.463588362875453e-06, "loss": 0.087, "step": 64750 }, { "epoch": 1.9151830602708937, "grad_norm": 0.8301424384117126, "learning_rate": 3.4634616729565135e-06, "loss": 0.0909, "step": 64760 }, { "epoch": 1.9154787957650679, "grad_norm": 0.8318000435829163, "learning_rate": 3.4633349830375747e-06, "loss": 0.0887, "step": 64770 }, { "epoch": 1.9157745312592418, "grad_norm": 0.586298942565918, "learning_rate": 3.463208293118635e-06, "loss": 0.0768, "step": 64780 }, { "epoch": 1.9160702667534157, "grad_norm": 0.4782581925392151, "learning_rate": 3.463081603199696e-06, "loss": 0.0773, "step": 64790 }, { "epoch": 1.9163660022475897, "grad_norm": 0.4873252213001251, "learning_rate": 3.4629549132807562e-06, "loss": 0.078, "step": 64800 }, { "epoch": 1.9166617377417636, "grad_norm": 0.7419688105583191, "learning_rate": 3.462828223361817e-06, "loss": 0.0648, "step": 64810 }, { "epoch": 1.9169574732359378, "grad_norm": 0.9613654017448425, "learning_rate": 3.4627015334428774e-06, "loss": 0.0777, "step": 64820 }, { "epoch": 1.9172532087301117, "grad_norm": 0.6561884880065918, "learning_rate": 3.462574843523938e-06, "loss": 0.0758, "step": 64830 }, { "epoch": 1.917548944224286, "grad_norm": 0.6496971249580383, "learning_rate": 3.4624481536049985e-06, "loss": 0.0809, "step": 64840 }, { "epoch": 1.9178446797184598, "grad_norm": 0.7110748887062073, "learning_rate": 3.4623214636860597e-06, "loss": 0.0772, "step": 64850 }, { "epoch": 1.9181404152126338, "grad_norm": 0.7965512871742249, "learning_rate": 3.46219477376712e-06, "loss": 0.0726, "step": 64860 }, { "epoch": 1.9184361507068077, "grad_norm": 0.971409797668457, "learning_rate": 3.462068083848181e-06, "loss": 0.0855, "step": 64870 }, { "epoch": 1.918731886200982, "grad_norm": 0.5638508200645447, "learning_rate": 3.4619413939292413e-06, "loss": 0.0806, "step": 64880 }, { "epoch": 1.9190276216951558, "grad_norm": 0.7942882776260376, "learning_rate": 3.461814704010302e-06, "loss": 0.0761, "step": 64890 }, { "epoch": 1.91932335718933, "grad_norm": 1.0429003238677979, "learning_rate": 3.4616880140913624e-06, "loss": 0.0892, "step": 64900 }, { "epoch": 1.919619092683504, "grad_norm": 0.7678113579750061, "learning_rate": 3.461561324172423e-06, "loss": 0.0478, "step": 64910 }, { "epoch": 1.919914828177678, "grad_norm": 0.8059314489364624, "learning_rate": 3.4614346342534836e-06, "loss": 0.0857, "step": 64920 }, { "epoch": 1.9202105636718518, "grad_norm": 2.359590530395508, "learning_rate": 3.461307944334545e-06, "loss": 0.0978, "step": 64930 }, { "epoch": 1.9205062991660258, "grad_norm": 0.8813620805740356, "learning_rate": 3.461181254415605e-06, "loss": 0.0897, "step": 64940 }, { "epoch": 1.9208020346602, "grad_norm": 0.4508672058582306, "learning_rate": 3.461054564496666e-06, "loss": 0.0798, "step": 64950 }, { "epoch": 1.9210977701543739, "grad_norm": 0.715994656085968, "learning_rate": 3.4609278745777263e-06, "loss": 0.0814, "step": 64960 }, { "epoch": 1.921393505648548, "grad_norm": 0.8338372707366943, "learning_rate": 3.460801184658787e-06, "loss": 0.0686, "step": 64970 }, { "epoch": 1.921689241142722, "grad_norm": 0.9337151050567627, "learning_rate": 3.4606744947398475e-06, "loss": 0.1089, "step": 64980 }, { "epoch": 1.921984976636896, "grad_norm": 1.4477139711380005, "learning_rate": 3.4605478048209083e-06, "loss": 0.1136, "step": 64990 }, { "epoch": 1.9222807121310699, "grad_norm": 0.5103251934051514, "learning_rate": 3.4604211149019686e-06, "loss": 0.0845, "step": 65000 }, { "epoch": 1.9225764476252438, "grad_norm": 0.4556509554386139, "learning_rate": 3.46029442498303e-06, "loss": 0.0617, "step": 65010 }, { "epoch": 1.922872183119418, "grad_norm": 0.8032433986663818, "learning_rate": 3.46016773506409e-06, "loss": 0.0796, "step": 65020 }, { "epoch": 1.9231679186135922, "grad_norm": 0.6497885584831238, "learning_rate": 3.460041045145151e-06, "loss": 0.0696, "step": 65030 }, { "epoch": 1.923463654107766, "grad_norm": 1.115221381187439, "learning_rate": 3.4599143552262114e-06, "loss": 0.0899, "step": 65040 }, { "epoch": 1.92375938960194, "grad_norm": 1.084109902381897, "learning_rate": 3.459787665307272e-06, "loss": 0.0725, "step": 65050 }, { "epoch": 1.924055125096114, "grad_norm": 0.8696497678756714, "learning_rate": 3.4596609753883325e-06, "loss": 0.0647, "step": 65060 }, { "epoch": 1.924350860590288, "grad_norm": 0.97188800573349, "learning_rate": 3.459534285469393e-06, "loss": 0.0682, "step": 65070 }, { "epoch": 1.924646596084462, "grad_norm": 0.7478742003440857, "learning_rate": 3.4594075955504537e-06, "loss": 0.0903, "step": 65080 }, { "epoch": 1.924942331578636, "grad_norm": 0.5028858184814453, "learning_rate": 3.4592809056315145e-06, "loss": 0.0597, "step": 65090 }, { "epoch": 1.9252380670728102, "grad_norm": 1.3519182205200195, "learning_rate": 3.4591542157125752e-06, "loss": 0.0904, "step": 65100 }, { "epoch": 1.9255338025669841, "grad_norm": 0.9822875261306763, "learning_rate": 3.4590275257936356e-06, "loss": 0.0907, "step": 65110 }, { "epoch": 1.925829538061158, "grad_norm": 0.8920740485191345, "learning_rate": 3.4589008358746964e-06, "loss": 0.0861, "step": 65120 }, { "epoch": 1.926125273555332, "grad_norm": 1.0514131784439087, "learning_rate": 3.4587741459557568e-06, "loss": 0.0913, "step": 65130 }, { "epoch": 1.926421009049506, "grad_norm": 0.6184898018836975, "learning_rate": 3.4586474560368176e-06, "loss": 0.0848, "step": 65140 }, { "epoch": 1.9267167445436801, "grad_norm": 0.8136964440345764, "learning_rate": 3.458520766117878e-06, "loss": 0.0767, "step": 65150 }, { "epoch": 1.9270124800378543, "grad_norm": 0.6836538910865784, "learning_rate": 3.4583940761989387e-06, "loss": 0.0647, "step": 65160 }, { "epoch": 1.9273082155320282, "grad_norm": 0.5491153597831726, "learning_rate": 3.4582673862799995e-06, "loss": 0.0994, "step": 65170 }, { "epoch": 1.9276039510262022, "grad_norm": 0.9597465395927429, "learning_rate": 3.4581406963610603e-06, "loss": 0.0803, "step": 65180 }, { "epoch": 1.9278996865203761, "grad_norm": 0.791652262210846, "learning_rate": 3.4580140064421207e-06, "loss": 0.0982, "step": 65190 }, { "epoch": 1.92819542201455, "grad_norm": 1.1068483591079712, "learning_rate": 3.4578873165231814e-06, "loss": 0.0962, "step": 65200 }, { "epoch": 1.9284911575087242, "grad_norm": 0.7346541881561279, "learning_rate": 3.457760626604242e-06, "loss": 0.0656, "step": 65210 }, { "epoch": 1.9287868930028982, "grad_norm": 0.89845210313797, "learning_rate": 3.4576339366853026e-06, "loss": 0.0649, "step": 65220 }, { "epoch": 1.9290826284970723, "grad_norm": 0.8533062934875488, "learning_rate": 3.457507246766363e-06, "loss": 0.1018, "step": 65230 }, { "epoch": 1.9293783639912463, "grad_norm": 0.7083910703659058, "learning_rate": 3.4573805568474238e-06, "loss": 0.0875, "step": 65240 }, { "epoch": 1.9296740994854202, "grad_norm": 0.6816828846931458, "learning_rate": 3.4572538669284845e-06, "loss": 0.0928, "step": 65250 }, { "epoch": 1.9299698349795942, "grad_norm": 0.6469454169273376, "learning_rate": 3.4571271770095453e-06, "loss": 0.0745, "step": 65260 }, { "epoch": 1.9302655704737681, "grad_norm": 0.7144581079483032, "learning_rate": 3.4570004870906057e-06, "loss": 0.0742, "step": 65270 }, { "epoch": 1.9305613059679423, "grad_norm": 0.5306157469749451, "learning_rate": 3.4568737971716665e-06, "loss": 0.086, "step": 65280 }, { "epoch": 1.9308570414621162, "grad_norm": 0.7746897339820862, "learning_rate": 3.456747107252727e-06, "loss": 0.1018, "step": 65290 }, { "epoch": 1.9311527769562904, "grad_norm": 0.8157539963722229, "learning_rate": 3.4566204173337876e-06, "loss": 0.068, "step": 65300 }, { "epoch": 1.9314485124504643, "grad_norm": 1.3660062551498413, "learning_rate": 3.456493727414848e-06, "loss": 0.0795, "step": 65310 }, { "epoch": 1.9317442479446383, "grad_norm": 0.5217109322547913, "learning_rate": 3.456367037495909e-06, "loss": 0.0902, "step": 65320 }, { "epoch": 1.9320399834388122, "grad_norm": 0.76219642162323, "learning_rate": 3.4562403475769696e-06, "loss": 0.0973, "step": 65330 }, { "epoch": 1.9323357189329864, "grad_norm": 0.7233889698982239, "learning_rate": 3.4561136576580304e-06, "loss": 0.0803, "step": 65340 }, { "epoch": 1.9326314544271603, "grad_norm": 0.8973203301429749, "learning_rate": 3.4559869677390907e-06, "loss": 0.0725, "step": 65350 }, { "epoch": 1.9329271899213345, "grad_norm": 1.001473069190979, "learning_rate": 3.4558602778201515e-06, "loss": 0.0788, "step": 65360 }, { "epoch": 1.9332229254155084, "grad_norm": 0.8055823445320129, "learning_rate": 3.455733587901212e-06, "loss": 0.0819, "step": 65370 }, { "epoch": 1.9335186609096824, "grad_norm": 0.5766475796699524, "learning_rate": 3.4556068979822727e-06, "loss": 0.0853, "step": 65380 }, { "epoch": 1.9338143964038563, "grad_norm": 0.5411571264266968, "learning_rate": 3.455480208063333e-06, "loss": 0.0639, "step": 65390 }, { "epoch": 1.9341101318980303, "grad_norm": 1.0804723501205444, "learning_rate": 3.455353518144394e-06, "loss": 0.0735, "step": 65400 }, { "epoch": 1.9344058673922044, "grad_norm": 0.9590131044387817, "learning_rate": 3.4552268282254546e-06, "loss": 0.0564, "step": 65410 }, { "epoch": 1.9347016028863784, "grad_norm": 0.9727126359939575, "learning_rate": 3.4551001383065154e-06, "loss": 0.0817, "step": 65420 }, { "epoch": 1.9349973383805525, "grad_norm": 0.8307595252990723, "learning_rate": 3.454973448387576e-06, "loss": 0.0965, "step": 65430 }, { "epoch": 1.9352930738747265, "grad_norm": 1.0750128030776978, "learning_rate": 3.4548467584686366e-06, "loss": 0.093, "step": 65440 }, { "epoch": 1.9355888093689004, "grad_norm": 0.7609298229217529, "learning_rate": 3.454720068549697e-06, "loss": 0.0724, "step": 65450 }, { "epoch": 1.9358845448630744, "grad_norm": 0.9880464673042297, "learning_rate": 3.4545933786307577e-06, "loss": 0.0717, "step": 65460 }, { "epoch": 1.9361802803572483, "grad_norm": 1.0276727676391602, "learning_rate": 3.454466688711818e-06, "loss": 0.0903, "step": 65470 }, { "epoch": 1.9364760158514225, "grad_norm": 0.6816169023513794, "learning_rate": 3.4543399987928785e-06, "loss": 0.0893, "step": 65480 }, { "epoch": 1.9367717513455966, "grad_norm": 0.5507534146308899, "learning_rate": 3.4542133088739397e-06, "loss": 0.0669, "step": 65490 }, { "epoch": 1.9370674868397706, "grad_norm": 0.8451064229011536, "learning_rate": 3.454086618955e-06, "loss": 0.078, "step": 65500 }, { "epoch": 1.9373632223339445, "grad_norm": 0.9510778784751892, "learning_rate": 3.453959929036061e-06, "loss": 0.088, "step": 65510 }, { "epoch": 1.9376589578281185, "grad_norm": 0.932068943977356, "learning_rate": 3.453833239117121e-06, "loss": 0.0907, "step": 65520 }, { "epoch": 1.9379546933222924, "grad_norm": 0.4910902678966522, "learning_rate": 3.453706549198182e-06, "loss": 0.0914, "step": 65530 }, { "epoch": 1.9382504288164666, "grad_norm": 0.7424570322036743, "learning_rate": 3.4535798592792424e-06, "loss": 0.0779, "step": 65540 }, { "epoch": 1.9385461643106405, "grad_norm": 0.4875687062740326, "learning_rate": 3.453453169360303e-06, "loss": 0.0818, "step": 65550 }, { "epoch": 1.9388418998048147, "grad_norm": 0.9140841960906982, "learning_rate": 3.4533264794413635e-06, "loss": 0.0639, "step": 65560 }, { "epoch": 1.9391376352989886, "grad_norm": 0.9333755373954773, "learning_rate": 3.4531997895224247e-06, "loss": 0.0974, "step": 65570 }, { "epoch": 1.9394333707931626, "grad_norm": 1.4338552951812744, "learning_rate": 3.453073099603485e-06, "loss": 0.1063, "step": 65580 }, { "epoch": 1.9397291062873365, "grad_norm": 0.9058924913406372, "learning_rate": 3.452946409684546e-06, "loss": 0.0918, "step": 65590 }, { "epoch": 1.9400248417815105, "grad_norm": 0.8628184199333191, "learning_rate": 3.4528197197656063e-06, "loss": 0.0678, "step": 65600 }, { "epoch": 1.9403205772756846, "grad_norm": 1.1507272720336914, "learning_rate": 3.452693029846667e-06, "loss": 0.081, "step": 65610 }, { "epoch": 1.9406163127698588, "grad_norm": 0.6617481112480164, "learning_rate": 3.4525663399277274e-06, "loss": 0.0988, "step": 65620 }, { "epoch": 1.9409120482640327, "grad_norm": 0.8193278908729553, "learning_rate": 3.452439650008788e-06, "loss": 0.0951, "step": 65630 }, { "epoch": 1.9412077837582067, "grad_norm": 0.632035493850708, "learning_rate": 3.4523129600898486e-06, "loss": 0.074, "step": 65640 }, { "epoch": 1.9415035192523806, "grad_norm": 0.46681028604507446, "learning_rate": 3.4521862701709098e-06, "loss": 0.0823, "step": 65650 }, { "epoch": 1.9417992547465546, "grad_norm": 1.028968334197998, "learning_rate": 3.45205958025197e-06, "loss": 0.0734, "step": 65660 }, { "epoch": 1.9420949902407287, "grad_norm": 0.7897313237190247, "learning_rate": 3.451932890333031e-06, "loss": 0.1042, "step": 65670 }, { "epoch": 1.9423907257349027, "grad_norm": 0.5986829400062561, "learning_rate": 3.4518062004140913e-06, "loss": 0.099, "step": 65680 }, { "epoch": 1.9426864612290768, "grad_norm": 0.6897351145744324, "learning_rate": 3.451679510495152e-06, "loss": 0.0844, "step": 65690 }, { "epoch": 1.9429821967232508, "grad_norm": 0.7699353098869324, "learning_rate": 3.4515528205762125e-06, "loss": 0.0812, "step": 65700 }, { "epoch": 1.9432779322174247, "grad_norm": 0.6972827315330505, "learning_rate": 3.4514261306572732e-06, "loss": 0.0813, "step": 65710 }, { "epoch": 1.9435736677115987, "grad_norm": 0.9789050817489624, "learning_rate": 3.4512994407383336e-06, "loss": 0.0935, "step": 65720 }, { "epoch": 1.9438694032057726, "grad_norm": 0.7842503190040588, "learning_rate": 3.451172750819395e-06, "loss": 0.0952, "step": 65730 }, { "epoch": 1.9441651386999468, "grad_norm": 0.6019386053085327, "learning_rate": 3.451046060900455e-06, "loss": 0.0988, "step": 65740 }, { "epoch": 1.9444608741941207, "grad_norm": 0.8611172437667847, "learning_rate": 3.450919370981516e-06, "loss": 0.0895, "step": 65750 }, { "epoch": 1.944756609688295, "grad_norm": 0.976894199848175, "learning_rate": 3.4507926810625763e-06, "loss": 0.0723, "step": 65760 }, { "epoch": 1.9450523451824688, "grad_norm": 0.957743763923645, "learning_rate": 3.450665991143637e-06, "loss": 0.0853, "step": 65770 }, { "epoch": 1.9453480806766428, "grad_norm": 0.9831870198249817, "learning_rate": 3.4505393012246975e-06, "loss": 0.091, "step": 65780 }, { "epoch": 1.9456438161708167, "grad_norm": 0.8816179037094116, "learning_rate": 3.4504126113057583e-06, "loss": 0.0862, "step": 65790 }, { "epoch": 1.9459395516649909, "grad_norm": 0.4802745580673218, "learning_rate": 3.4502859213868187e-06, "loss": 0.0675, "step": 65800 }, { "epoch": 1.9462352871591648, "grad_norm": 0.5130062699317932, "learning_rate": 3.45015923146788e-06, "loss": 0.0824, "step": 65810 }, { "epoch": 1.946531022653339, "grad_norm": 1.119385838508606, "learning_rate": 3.4500325415489402e-06, "loss": 0.0871, "step": 65820 }, { "epoch": 1.946826758147513, "grad_norm": 0.9065943956375122, "learning_rate": 3.449905851630001e-06, "loss": 0.0715, "step": 65830 }, { "epoch": 1.9471224936416869, "grad_norm": 0.7148520946502686, "learning_rate": 3.4497791617110614e-06, "loss": 0.0905, "step": 65840 }, { "epoch": 1.9474182291358608, "grad_norm": 1.2124972343444824, "learning_rate": 3.449652471792122e-06, "loss": 0.074, "step": 65850 }, { "epoch": 1.9477139646300348, "grad_norm": 0.5288719534873962, "learning_rate": 3.4495257818731825e-06, "loss": 0.0536, "step": 65860 }, { "epoch": 1.948009700124209, "grad_norm": 0.5152828097343445, "learning_rate": 3.4493990919542433e-06, "loss": 0.0765, "step": 65870 }, { "epoch": 1.9483054356183829, "grad_norm": 0.5578282475471497, "learning_rate": 3.4492724020353037e-06, "loss": 0.077, "step": 65880 }, { "epoch": 1.948601171112557, "grad_norm": 0.8946805000305176, "learning_rate": 3.4491457121163645e-06, "loss": 0.0662, "step": 65890 }, { "epoch": 1.948896906606731, "grad_norm": 1.2774136066436768, "learning_rate": 3.4490190221974253e-06, "loss": 0.1112, "step": 65900 }, { "epoch": 1.949192642100905, "grad_norm": 0.9498665928840637, "learning_rate": 3.4488923322784856e-06, "loss": 0.0605, "step": 65910 }, { "epoch": 1.9494883775950789, "grad_norm": 1.2779490947723389, "learning_rate": 3.4487656423595464e-06, "loss": 0.1008, "step": 65920 }, { "epoch": 1.9497841130892528, "grad_norm": 1.017303228378296, "learning_rate": 3.448638952440607e-06, "loss": 0.1016, "step": 65930 }, { "epoch": 1.950079848583427, "grad_norm": 0.6828583478927612, "learning_rate": 3.4485122625216676e-06, "loss": 0.0762, "step": 65940 }, { "epoch": 1.9503755840776011, "grad_norm": 0.4687505066394806, "learning_rate": 3.448385572602728e-06, "loss": 0.0659, "step": 65950 }, { "epoch": 1.950671319571775, "grad_norm": 0.7532684206962585, "learning_rate": 3.4482588826837887e-06, "loss": 0.0672, "step": 65960 }, { "epoch": 1.950967055065949, "grad_norm": 0.9120769500732422, "learning_rate": 3.4481321927648495e-06, "loss": 0.0878, "step": 65970 }, { "epoch": 1.951262790560123, "grad_norm": 0.9388498067855835, "learning_rate": 3.4480055028459103e-06, "loss": 0.0985, "step": 65980 }, { "epoch": 1.951558526054297, "grad_norm": 0.5916539430618286, "learning_rate": 3.4478788129269707e-06, "loss": 0.0995, "step": 65990 }, { "epoch": 1.951854261548471, "grad_norm": 0.9786238074302673, "learning_rate": 3.4477521230080315e-06, "loss": 0.0797, "step": 66000 }, { "epoch": 1.952149997042645, "grad_norm": 0.5963929295539856, "learning_rate": 3.447625433089092e-06, "loss": 0.0573, "step": 66010 }, { "epoch": 1.9524457325368192, "grad_norm": 1.0247297286987305, "learning_rate": 3.4474987431701526e-06, "loss": 0.0807, "step": 66020 }, { "epoch": 1.9527414680309931, "grad_norm": 0.735034704208374, "learning_rate": 3.447372053251213e-06, "loss": 0.0736, "step": 66030 }, { "epoch": 1.953037203525167, "grad_norm": 0.5062978267669678, "learning_rate": 3.447245363332274e-06, "loss": 0.0663, "step": 66040 }, { "epoch": 1.953332939019341, "grad_norm": 1.4604541063308716, "learning_rate": 3.4471186734133346e-06, "loss": 0.0733, "step": 66050 }, { "epoch": 1.953628674513515, "grad_norm": 0.8049811124801636, "learning_rate": 3.4469919834943954e-06, "loss": 0.0743, "step": 66060 }, { "epoch": 1.9539244100076891, "grad_norm": 0.6650086045265198, "learning_rate": 3.4468652935754557e-06, "loss": 0.0824, "step": 66070 }, { "epoch": 1.9542201455018633, "grad_norm": 0.9749355316162109, "learning_rate": 3.4467386036565165e-06, "loss": 0.0883, "step": 66080 }, { "epoch": 1.9545158809960372, "grad_norm": 1.1509891748428345, "learning_rate": 3.446611913737577e-06, "loss": 0.0932, "step": 66090 }, { "epoch": 1.9548116164902112, "grad_norm": 0.6043002009391785, "learning_rate": 3.4464852238186377e-06, "loss": 0.0941, "step": 66100 }, { "epoch": 1.9551073519843851, "grad_norm": 1.3845363855361938, "learning_rate": 3.446358533899698e-06, "loss": 0.0926, "step": 66110 }, { "epoch": 1.955403087478559, "grad_norm": 0.8080188632011414, "learning_rate": 3.446231843980759e-06, "loss": 0.0701, "step": 66120 }, { "epoch": 1.9556988229727332, "grad_norm": 1.1077200174331665, "learning_rate": 3.4461051540618196e-06, "loss": 0.104, "step": 66130 }, { "epoch": 1.9559945584669072, "grad_norm": 0.878397524356842, "learning_rate": 3.4459784641428804e-06, "loss": 0.0985, "step": 66140 }, { "epoch": 1.9562902939610813, "grad_norm": 0.9686726927757263, "learning_rate": 3.4458517742239408e-06, "loss": 0.0641, "step": 66150 }, { "epoch": 1.9565860294552553, "grad_norm": 0.9990512728691101, "learning_rate": 3.4457250843050016e-06, "loss": 0.0792, "step": 66160 }, { "epoch": 1.9568817649494292, "grad_norm": 0.8720077276229858, "learning_rate": 3.445598394386062e-06, "loss": 0.073, "step": 66170 }, { "epoch": 1.9571775004436032, "grad_norm": 0.5957579612731934, "learning_rate": 3.4454717044671227e-06, "loss": 0.078, "step": 66180 }, { "epoch": 1.9574732359377771, "grad_norm": 1.1057801246643066, "learning_rate": 3.445345014548183e-06, "loss": 0.0806, "step": 66190 }, { "epoch": 1.9577689714319513, "grad_norm": 0.9056764841079712, "learning_rate": 3.445218324629244e-06, "loss": 0.0707, "step": 66200 }, { "epoch": 1.9580647069261252, "grad_norm": 0.6326918005943298, "learning_rate": 3.4450916347103047e-06, "loss": 0.0952, "step": 66210 }, { "epoch": 1.9583604424202994, "grad_norm": 0.6261048913002014, "learning_rate": 3.4449649447913655e-06, "loss": 0.0844, "step": 66220 }, { "epoch": 1.9586561779144733, "grad_norm": 0.5105950832366943, "learning_rate": 3.444838254872426e-06, "loss": 0.1039, "step": 66230 }, { "epoch": 1.9589519134086473, "grad_norm": 0.8241481184959412, "learning_rate": 3.4447115649534866e-06, "loss": 0.0889, "step": 66240 }, { "epoch": 1.9592476489028212, "grad_norm": 0.5343165993690491, "learning_rate": 3.444584875034547e-06, "loss": 0.0805, "step": 66250 }, { "epoch": 1.9595433843969954, "grad_norm": 0.6324456930160522, "learning_rate": 3.4444581851156078e-06, "loss": 0.0678, "step": 66260 }, { "epoch": 1.9598391198911693, "grad_norm": 0.47869232296943665, "learning_rate": 3.444331495196668e-06, "loss": 0.0778, "step": 66270 }, { "epoch": 1.9601348553853435, "grad_norm": 1.0503355264663696, "learning_rate": 3.444204805277729e-06, "loss": 0.0965, "step": 66280 }, { "epoch": 1.9604305908795174, "grad_norm": 0.6300709247589111, "learning_rate": 3.4440781153587897e-06, "loss": 0.0858, "step": 66290 }, { "epoch": 1.9607263263736914, "grad_norm": 0.6391859650611877, "learning_rate": 3.4439514254398505e-06, "loss": 0.0878, "step": 66300 }, { "epoch": 1.9610220618678653, "grad_norm": 0.5388932824134827, "learning_rate": 3.443824735520911e-06, "loss": 0.0734, "step": 66310 }, { "epoch": 1.9613177973620393, "grad_norm": 0.7141878008842468, "learning_rate": 3.4436980456019712e-06, "loss": 0.0911, "step": 66320 }, { "epoch": 1.9616135328562134, "grad_norm": 0.6279175877571106, "learning_rate": 3.443571355683032e-06, "loss": 0.1046, "step": 66330 }, { "epoch": 1.9619092683503874, "grad_norm": 0.8066928386688232, "learning_rate": 3.4434446657640924e-06, "loss": 0.0837, "step": 66340 }, { "epoch": 1.9622050038445615, "grad_norm": 0.8729102611541748, "learning_rate": 3.443317975845153e-06, "loss": 0.0767, "step": 66350 }, { "epoch": 1.9625007393387355, "grad_norm": 1.0016884803771973, "learning_rate": 3.4431912859262135e-06, "loss": 0.0771, "step": 66360 }, { "epoch": 1.9627964748329094, "grad_norm": 1.1481311321258545, "learning_rate": 3.4430645960072748e-06, "loss": 0.0818, "step": 66370 }, { "epoch": 1.9630922103270834, "grad_norm": 0.9416411519050598, "learning_rate": 3.442937906088335e-06, "loss": 0.1005, "step": 66380 }, { "epoch": 1.9633879458212573, "grad_norm": 0.6780542731285095, "learning_rate": 3.442811216169396e-06, "loss": 0.0834, "step": 66390 }, { "epoch": 1.9636836813154315, "grad_norm": 0.6176682114601135, "learning_rate": 3.4426845262504563e-06, "loss": 0.0828, "step": 66400 }, { "epoch": 1.9639794168096056, "grad_norm": 0.5375052690505981, "learning_rate": 3.442557836331517e-06, "loss": 0.0853, "step": 66410 }, { "epoch": 1.9642751523037796, "grad_norm": 1.0362794399261475, "learning_rate": 3.4424311464125774e-06, "loss": 0.0848, "step": 66420 }, { "epoch": 1.9645708877979535, "grad_norm": 0.6776818037033081, "learning_rate": 3.4423044564936382e-06, "loss": 0.0795, "step": 66430 }, { "epoch": 1.9648666232921275, "grad_norm": 1.1074362993240356, "learning_rate": 3.4421777665746986e-06, "loss": 0.0786, "step": 66440 }, { "epoch": 1.9651623587863014, "grad_norm": 0.6890609264373779, "learning_rate": 3.44205107665576e-06, "loss": 0.087, "step": 66450 }, { "epoch": 1.9654580942804756, "grad_norm": 0.6163840293884277, "learning_rate": 3.44192438673682e-06, "loss": 0.0608, "step": 66460 }, { "epoch": 1.9657538297746495, "grad_norm": 0.6588914394378662, "learning_rate": 3.441797696817881e-06, "loss": 0.0894, "step": 66470 }, { "epoch": 1.9660495652688237, "grad_norm": 1.02361261844635, "learning_rate": 3.4416710068989413e-06, "loss": 0.0926, "step": 66480 }, { "epoch": 1.9663453007629976, "grad_norm": 0.7614907622337341, "learning_rate": 3.441544316980002e-06, "loss": 0.0681, "step": 66490 }, { "epoch": 1.9666410362571716, "grad_norm": 1.2049989700317383, "learning_rate": 3.4414176270610625e-06, "loss": 0.0829, "step": 66500 }, { "epoch": 1.9669367717513455, "grad_norm": 0.7813082933425903, "learning_rate": 3.4412909371421233e-06, "loss": 0.0841, "step": 66510 }, { "epoch": 1.9672325072455195, "grad_norm": 0.7727323174476624, "learning_rate": 3.4411642472231836e-06, "loss": 0.0926, "step": 66520 }, { "epoch": 1.9675282427396936, "grad_norm": 0.8180872797966003, "learning_rate": 3.441037557304245e-06, "loss": 0.0923, "step": 66530 }, { "epoch": 1.9678239782338678, "grad_norm": 0.6318673491477966, "learning_rate": 3.4409108673853052e-06, "loss": 0.0685, "step": 66540 }, { "epoch": 1.9681197137280417, "grad_norm": 1.5942810773849487, "learning_rate": 3.440784177466366e-06, "loss": 0.0907, "step": 66550 }, { "epoch": 1.9684154492222157, "grad_norm": 0.8028586506843567, "learning_rate": 3.4406574875474264e-06, "loss": 0.0736, "step": 66560 }, { "epoch": 1.9687111847163896, "grad_norm": 0.7157976031303406, "learning_rate": 3.440530797628487e-06, "loss": 0.0775, "step": 66570 }, { "epoch": 1.9690069202105636, "grad_norm": 0.8651654720306396, "learning_rate": 3.4404041077095475e-06, "loss": 0.0955, "step": 66580 }, { "epoch": 1.9693026557047377, "grad_norm": 0.3593846261501312, "learning_rate": 3.4402774177906083e-06, "loss": 0.0795, "step": 66590 }, { "epoch": 1.9695983911989117, "grad_norm": 0.8551504015922546, "learning_rate": 3.4401507278716687e-06, "loss": 0.0829, "step": 66600 }, { "epoch": 1.9698941266930858, "grad_norm": 0.7699337005615234, "learning_rate": 3.44002403795273e-06, "loss": 0.0709, "step": 66610 }, { "epoch": 1.9701898621872598, "grad_norm": 0.883095383644104, "learning_rate": 3.4398973480337903e-06, "loss": 0.0929, "step": 66620 }, { "epoch": 1.9704855976814337, "grad_norm": 1.141226053237915, "learning_rate": 3.439770658114851e-06, "loss": 0.0969, "step": 66630 }, { "epoch": 1.9707813331756077, "grad_norm": 1.1281719207763672, "learning_rate": 3.4396439681959114e-06, "loss": 0.0746, "step": 66640 }, { "epoch": 1.9710770686697816, "grad_norm": 1.1800683736801147, "learning_rate": 3.439517278276972e-06, "loss": 0.0919, "step": 66650 }, { "epoch": 1.9713728041639558, "grad_norm": 0.6409802436828613, "learning_rate": 3.4393905883580326e-06, "loss": 0.0733, "step": 66660 }, { "epoch": 1.9716685396581297, "grad_norm": 0.8016213774681091, "learning_rate": 3.4392638984390934e-06, "loss": 0.0797, "step": 66670 }, { "epoch": 1.9719642751523039, "grad_norm": 0.8033866882324219, "learning_rate": 3.4391372085201537e-06, "loss": 0.1033, "step": 66680 }, { "epoch": 1.9722600106464778, "grad_norm": 0.6909334659576416, "learning_rate": 3.439010518601215e-06, "loss": 0.0676, "step": 66690 }, { "epoch": 1.9725557461406518, "grad_norm": 1.1790060997009277, "learning_rate": 3.4388838286822753e-06, "loss": 0.0855, "step": 66700 }, { "epoch": 1.9728514816348257, "grad_norm": 0.5704641342163086, "learning_rate": 3.438757138763336e-06, "loss": 0.0794, "step": 66710 }, { "epoch": 1.9731472171289999, "grad_norm": 0.7563511729240417, "learning_rate": 3.4386304488443965e-06, "loss": 0.0916, "step": 66720 }, { "epoch": 1.9734429526231738, "grad_norm": 1.0589476823806763, "learning_rate": 3.438503758925457e-06, "loss": 0.1125, "step": 66730 }, { "epoch": 1.973738688117348, "grad_norm": 0.5982716679573059, "learning_rate": 3.4383770690065176e-06, "loss": 0.0737, "step": 66740 }, { "epoch": 1.974034423611522, "grad_norm": 0.9293766021728516, "learning_rate": 3.438250379087578e-06, "loss": 0.0892, "step": 66750 }, { "epoch": 1.9743301591056959, "grad_norm": 0.7290042042732239, "learning_rate": 3.4381236891686388e-06, "loss": 0.0724, "step": 66760 }, { "epoch": 1.9746258945998698, "grad_norm": 0.5015333890914917, "learning_rate": 3.4379969992496996e-06, "loss": 0.0702, "step": 66770 }, { "epoch": 1.9749216300940438, "grad_norm": 0.687345027923584, "learning_rate": 3.4378703093307604e-06, "loss": 0.0812, "step": 66780 }, { "epoch": 1.975217365588218, "grad_norm": 1.3897863626480103, "learning_rate": 3.4377436194118207e-06, "loss": 0.0941, "step": 66790 }, { "epoch": 1.9755131010823919, "grad_norm": 0.7039024829864502, "learning_rate": 3.4376169294928815e-06, "loss": 0.0836, "step": 66800 }, { "epoch": 1.975808836576566, "grad_norm": 0.6860908269882202, "learning_rate": 3.437490239573942e-06, "loss": 0.0559, "step": 66810 }, { "epoch": 1.97610457207074, "grad_norm": 1.1271281242370605, "learning_rate": 3.4373635496550027e-06, "loss": 0.095, "step": 66820 }, { "epoch": 1.976400307564914, "grad_norm": 0.7145306468009949, "learning_rate": 3.437236859736063e-06, "loss": 0.0754, "step": 66830 }, { "epoch": 1.9766960430590879, "grad_norm": 1.1149187088012695, "learning_rate": 3.437110169817124e-06, "loss": 0.0993, "step": 66840 }, { "epoch": 1.9769917785532618, "grad_norm": 1.2164947986602783, "learning_rate": 3.4369834798981846e-06, "loss": 0.084, "step": 66850 }, { "epoch": 1.977287514047436, "grad_norm": 0.929245114326477, "learning_rate": 3.4368567899792454e-06, "loss": 0.0652, "step": 66860 }, { "epoch": 1.9775832495416101, "grad_norm": 0.9172244071960449, "learning_rate": 3.4367301000603058e-06, "loss": 0.0913, "step": 66870 }, { "epoch": 1.977878985035784, "grad_norm": 1.410823941230774, "learning_rate": 3.4366034101413666e-06, "loss": 0.0861, "step": 66880 }, { "epoch": 1.978174720529958, "grad_norm": 0.6833034157752991, "learning_rate": 3.436476720222427e-06, "loss": 0.0831, "step": 66890 }, { "epoch": 1.978470456024132, "grad_norm": 0.46796154975891113, "learning_rate": 3.4363500303034877e-06, "loss": 0.0711, "step": 66900 }, { "epoch": 1.978766191518306, "grad_norm": 0.6488729119300842, "learning_rate": 3.436223340384548e-06, "loss": 0.0827, "step": 66910 }, { "epoch": 1.97906192701248, "grad_norm": 0.8110145330429077, "learning_rate": 3.436096650465609e-06, "loss": 0.085, "step": 66920 }, { "epoch": 1.979357662506654, "grad_norm": 1.0370192527770996, "learning_rate": 3.4359699605466697e-06, "loss": 0.1119, "step": 66930 }, { "epoch": 1.9796533980008282, "grad_norm": 0.7061485648155212, "learning_rate": 3.4358432706277304e-06, "loss": 0.0732, "step": 66940 }, { "epoch": 1.9799491334950021, "grad_norm": 0.7647896409034729, "learning_rate": 3.435716580708791e-06, "loss": 0.0792, "step": 66950 }, { "epoch": 1.980244868989176, "grad_norm": 1.103065848350525, "learning_rate": 3.4355898907898516e-06, "loss": 0.0854, "step": 66960 }, { "epoch": 1.98054060448335, "grad_norm": 0.7797220349311829, "learning_rate": 3.435463200870912e-06, "loss": 0.0749, "step": 66970 }, { "epoch": 1.980836339977524, "grad_norm": 0.9289553165435791, "learning_rate": 3.4353365109519728e-06, "loss": 0.0932, "step": 66980 }, { "epoch": 1.9811320754716981, "grad_norm": 0.7988272309303284, "learning_rate": 3.435209821033033e-06, "loss": 0.0733, "step": 66990 }, { "epoch": 1.9814278109658723, "grad_norm": 0.6459926962852478, "learning_rate": 3.435083131114094e-06, "loss": 0.0941, "step": 67000 }, { "epoch": 1.9817235464600462, "grad_norm": 0.7523521184921265, "learning_rate": 3.4349564411951547e-06, "loss": 0.0547, "step": 67010 }, { "epoch": 1.9820192819542202, "grad_norm": 1.3427258729934692, "learning_rate": 3.4348297512762155e-06, "loss": 0.0826, "step": 67020 }, { "epoch": 1.9823150174483941, "grad_norm": 0.6749891638755798, "learning_rate": 3.434703061357276e-06, "loss": 0.0882, "step": 67030 }, { "epoch": 1.982610752942568, "grad_norm": 0.986676037311554, "learning_rate": 3.4345763714383366e-06, "loss": 0.0871, "step": 67040 }, { "epoch": 1.9829064884367422, "grad_norm": 1.0527044534683228, "learning_rate": 3.434449681519397e-06, "loss": 0.0741, "step": 67050 }, { "epoch": 1.9832022239309162, "grad_norm": 0.613672137260437, "learning_rate": 3.434322991600458e-06, "loss": 0.0677, "step": 67060 }, { "epoch": 1.9834979594250903, "grad_norm": 0.857691764831543, "learning_rate": 3.434196301681518e-06, "loss": 0.104, "step": 67070 }, { "epoch": 1.9837936949192643, "grad_norm": 0.7638744711875916, "learning_rate": 3.434069611762579e-06, "loss": 0.0819, "step": 67080 }, { "epoch": 1.9840894304134382, "grad_norm": 1.181654453277588, "learning_rate": 3.4339429218436397e-06, "loss": 0.1067, "step": 67090 }, { "epoch": 1.9843851659076122, "grad_norm": 0.7893800735473633, "learning_rate": 3.4338162319247005e-06, "loss": 0.0742, "step": 67100 }, { "epoch": 1.984680901401786, "grad_norm": 0.9048041701316833, "learning_rate": 3.433689542005761e-06, "loss": 0.0632, "step": 67110 }, { "epoch": 1.9849766368959603, "grad_norm": 0.7443365454673767, "learning_rate": 3.4335628520868217e-06, "loss": 0.0841, "step": 67120 }, { "epoch": 1.9852723723901342, "grad_norm": 0.8418610692024231, "learning_rate": 3.433436162167882e-06, "loss": 0.103, "step": 67130 }, { "epoch": 1.9855681078843084, "grad_norm": 1.0036715269088745, "learning_rate": 3.4333094722489424e-06, "loss": 0.082, "step": 67140 }, { "epoch": 1.9858638433784823, "grad_norm": 0.7932870388031006, "learning_rate": 3.433182782330003e-06, "loss": 0.0958, "step": 67150 }, { "epoch": 1.9861595788726563, "grad_norm": 0.9143384099006653, "learning_rate": 3.4330560924110636e-06, "loss": 0.0822, "step": 67160 }, { "epoch": 1.9864553143668302, "grad_norm": 0.8155956268310547, "learning_rate": 3.432929402492125e-06, "loss": 0.0918, "step": 67170 }, { "epoch": 1.9867510498610044, "grad_norm": 0.8381942510604858, "learning_rate": 3.432802712573185e-06, "loss": 0.0964, "step": 67180 }, { "epoch": 1.9870467853551783, "grad_norm": 0.9421172142028809, "learning_rate": 3.432676022654246e-06, "loss": 0.0797, "step": 67190 }, { "epoch": 1.9873425208493525, "grad_norm": 1.0267900228500366, "learning_rate": 3.4325493327353063e-06, "loss": 0.0827, "step": 67200 }, { "epoch": 1.9876382563435264, "grad_norm": 0.646447479724884, "learning_rate": 3.432422642816367e-06, "loss": 0.0693, "step": 67210 }, { "epoch": 1.9879339918377004, "grad_norm": 0.6939583420753479, "learning_rate": 3.4322959528974275e-06, "loss": 0.0885, "step": 67220 }, { "epoch": 1.9882297273318743, "grad_norm": 0.7073160409927368, "learning_rate": 3.4321692629784883e-06, "loss": 0.1036, "step": 67230 }, { "epoch": 1.9885254628260483, "grad_norm": 0.6531716585159302, "learning_rate": 3.4320425730595486e-06, "loss": 0.0832, "step": 67240 }, { "epoch": 1.9888211983202224, "grad_norm": 0.43573740124702454, "learning_rate": 3.43191588314061e-06, "loss": 0.0627, "step": 67250 }, { "epoch": 1.9891169338143964, "grad_norm": 0.6062765121459961, "learning_rate": 3.43178919322167e-06, "loss": 0.0543, "step": 67260 }, { "epoch": 1.9894126693085705, "grad_norm": 0.8895412087440491, "learning_rate": 3.431662503302731e-06, "loss": 0.087, "step": 67270 }, { "epoch": 1.9897084048027445, "grad_norm": 0.8935640454292297, "learning_rate": 3.4315358133837914e-06, "loss": 0.083, "step": 67280 }, { "epoch": 1.9900041402969184, "grad_norm": 0.6330086588859558, "learning_rate": 3.431409123464852e-06, "loss": 0.0895, "step": 67290 }, { "epoch": 1.9902998757910924, "grad_norm": 0.3761831820011139, "learning_rate": 3.4312824335459125e-06, "loss": 0.0681, "step": 67300 }, { "epoch": 1.9905956112852663, "grad_norm": 0.9677596092224121, "learning_rate": 3.4311557436269733e-06, "loss": 0.0644, "step": 67310 }, { "epoch": 1.9908913467794405, "grad_norm": 1.6001338958740234, "learning_rate": 3.4310290537080337e-06, "loss": 0.0746, "step": 67320 }, { "epoch": 1.9911870822736146, "grad_norm": 0.9164373874664307, "learning_rate": 3.430902363789095e-06, "loss": 0.1025, "step": 67330 }, { "epoch": 1.9914828177677886, "grad_norm": 0.6085959076881409, "learning_rate": 3.4307756738701552e-06, "loss": 0.0989, "step": 67340 }, { "epoch": 1.9917785532619625, "grad_norm": 0.7158206105232239, "learning_rate": 3.430648983951216e-06, "loss": 0.0708, "step": 67350 }, { "epoch": 1.9920742887561365, "grad_norm": 2.1633031368255615, "learning_rate": 3.4305222940322764e-06, "loss": 0.0693, "step": 67360 }, { "epoch": 1.9923700242503104, "grad_norm": 0.6436998844146729, "learning_rate": 3.430395604113337e-06, "loss": 0.0892, "step": 67370 }, { "epoch": 1.9926657597444846, "grad_norm": 1.4863951206207275, "learning_rate": 3.4302689141943976e-06, "loss": 0.0811, "step": 67380 }, { "epoch": 1.9929614952386585, "grad_norm": 0.8805351257324219, "learning_rate": 3.4301422242754583e-06, "loss": 0.085, "step": 67390 }, { "epoch": 1.9932572307328327, "grad_norm": 0.8241816759109497, "learning_rate": 3.4300155343565187e-06, "loss": 0.0794, "step": 67400 }, { "epoch": 1.9935529662270066, "grad_norm": 0.7589578032493591, "learning_rate": 3.42988884443758e-06, "loss": 0.0728, "step": 67410 }, { "epoch": 1.9938487017211806, "grad_norm": 1.0369435548782349, "learning_rate": 3.4297621545186403e-06, "loss": 0.0781, "step": 67420 }, { "epoch": 1.9941444372153545, "grad_norm": 0.5665013790130615, "learning_rate": 3.429635464599701e-06, "loss": 0.0936, "step": 67430 }, { "epoch": 1.9944401727095284, "grad_norm": 0.8973540663719177, "learning_rate": 3.4295087746807614e-06, "loss": 0.0925, "step": 67440 }, { "epoch": 1.9947359082037026, "grad_norm": 0.7363468408584595, "learning_rate": 3.4293820847618222e-06, "loss": 0.0909, "step": 67450 }, { "epoch": 1.9950316436978768, "grad_norm": 0.9129327535629272, "learning_rate": 3.4292553948428826e-06, "loss": 0.0824, "step": 67460 }, { "epoch": 1.9953273791920507, "grad_norm": 1.245479941368103, "learning_rate": 3.4291287049239434e-06, "loss": 0.0959, "step": 67470 }, { "epoch": 1.9956231146862247, "grad_norm": 0.7660197019577026, "learning_rate": 3.4290020150050038e-06, "loss": 0.0828, "step": 67480 }, { "epoch": 1.9959188501803986, "grad_norm": 0.8456180691719055, "learning_rate": 3.428875325086065e-06, "loss": 0.0869, "step": 67490 }, { "epoch": 1.9962145856745726, "grad_norm": 0.8716756105422974, "learning_rate": 3.4287486351671253e-06, "loss": 0.0931, "step": 67500 }, { "epoch": 1.9965103211687467, "grad_norm": 0.6206812858581543, "learning_rate": 3.428621945248186e-06, "loss": 0.0787, "step": 67510 }, { "epoch": 1.9968060566629207, "grad_norm": 0.7428106665611267, "learning_rate": 3.4284952553292465e-06, "loss": 0.0788, "step": 67520 }, { "epoch": 1.9971017921570948, "grad_norm": 1.1989290714263916, "learning_rate": 3.4283685654103073e-06, "loss": 0.0894, "step": 67530 }, { "epoch": 1.9973975276512688, "grad_norm": 0.5709385275840759, "learning_rate": 3.4282418754913676e-06, "loss": 0.0783, "step": 67540 }, { "epoch": 1.9976932631454427, "grad_norm": 1.7587560415267944, "learning_rate": 3.428115185572428e-06, "loss": 0.0708, "step": 67550 }, { "epoch": 1.9979889986396167, "grad_norm": 0.784104585647583, "learning_rate": 3.427988495653489e-06, "loss": 0.0736, "step": 67560 }, { "epoch": 1.9982847341337906, "grad_norm": 0.6662929654121399, "learning_rate": 3.4278618057345496e-06, "loss": 0.0771, "step": 67570 }, { "epoch": 1.9985804696279648, "grad_norm": 0.8662662506103516, "learning_rate": 3.4277351158156104e-06, "loss": 0.0855, "step": 67580 }, { "epoch": 1.9988762051221387, "grad_norm": 0.4818097949028015, "learning_rate": 3.4276084258966707e-06, "loss": 0.0853, "step": 67590 }, { "epoch": 1.9991719406163129, "grad_norm": 1.0978080034255981, "learning_rate": 3.4274817359777315e-06, "loss": 0.09, "step": 67600 }, { "epoch": 1.9994676761104868, "grad_norm": 0.8891754150390625, "learning_rate": 3.427355046058792e-06, "loss": 0.0783, "step": 67610 }, { "epoch": 1.9997634116046608, "grad_norm": 0.7751420736312866, "learning_rate": 3.4272283561398527e-06, "loss": 0.0867, "step": 67620 }, { "epoch": 2.0, "eval_accuracy": 0.6724223974448548, "eval_animal_abuse/accuracy": 0.9948431313837043, "eval_animal_abuse/f1": 0.7683109118086696, "eval_animal_abuse/fpr": 0.002288560562716652, "eval_animal_abuse/precision": 0.7907692307692308, "eval_animal_abuse/recall": 0.747093023255814, "eval_animal_abuse/threshold": 0.42107561230659485, "eval_child_abuse/accuracy": 0.9963569218484879, "eval_child_abuse/f1": 0.669683257918552, "eval_child_abuse/fpr": 0.0018065940683494725, "eval_child_abuse/precision": 0.6727272727272727, "eval_child_abuse/recall": 0.6666666666666666, "eval_child_abuse/threshold": 0.270480215549469, "eval_controversial_topics,politics/accuracy": 0.9669128655554446, "eval_controversial_topics,politics/f1": 0.5164113785557987, "eval_controversial_topics,politics/fpr": 0.02074752883031298, "eval_controversial_topics,politics/precision": 0.4676354029062087, "eval_controversial_topics,politics/recall": 0.5765472312703583, "eval_controversial_topics,politics/threshold": 0.22953519225120544, "eval_discrimination,stereotype,injustice/accuracy": 0.9539874238946002, "eval_discrimination,stereotype,injustice/f1": 0.7215062424486508, "eval_discrimination,stereotype,injustice/fpr": 0.028355080059276342, "eval_discrimination,stereotype,injustice/precision": 0.6954580745341615, "eval_discrimination,stereotype,injustice/recall": 0.749581589958159, "eval_discrimination,stereotype,injustice/threshold": 0.30404168367385864, "eval_drug_abuse,weapons,banned_substance/accuracy": 0.974348737398942, "eval_drug_abuse,weapons,banned_substance/f1": 0.7763921113689095, "eval_drug_abuse,weapons,banned_substance/fpr": 0.014684106614017743, "eval_drug_abuse,weapons,banned_substance/precision": 0.7626780626780627, "eval_drug_abuse,weapons,banned_substance/recall": 0.79060838747785, "eval_drug_abuse,weapons,banned_substance/threshold": 0.49551400542259216, "eval_financial_crime,property_crime,theft/accuracy": 0.9611404997172039, "eval_financial_crime,property_crime,theft/f1": 0.80649436713055, "eval_financial_crime,property_crime,theft/fpr": 0.024934117170079012, "eval_financial_crime,property_crime,theft/precision": 0.7825108503456036, "eval_financial_crime,property_crime,theft/recall": 0.8319945308494274, "eval_financial_crime,property_crime,theft/threshold": 0.37387582659721375, "eval_flagged/accuracy": 0.8552417074225638, "eval_flagged/aucpr": 0.9038576795950924, "eval_flagged/f1": 0.8720519908251485, "eval_flagged/fpr": 0.184000300041255, "eval_flagged/precision": 0.8580480888863169, "eval_flagged/recall": 0.8865205823443245, "eval_hate_speech,offensive_language/accuracy": 0.9454203679675284, "eval_hate_speech,offensive_language/f1": 0.700283182607107, "eval_hate_speech,offensive_language/fpr": 0.0316097204458249, "eval_hate_speech,offensive_language/precision": 0.6890167175984181, "eval_hate_speech,offensive_language/recall": 0.711924219910847, "eval_hate_speech,offensive_language/threshold": 0.272024542093277, "eval_loss": 0.08164986222982407, "eval_macro_f1": 0.6745665244148362, "eval_macro_precision": 0.668596081601965, "eval_macro_recall": 0.684870508652353, "eval_micro_f1": 0.7530226918413303, "eval_micro_precision": 0.7340391401989093, "eval_micro_recall": 0.7730142077280082, "eval_misinformation_regarding_ethics,laws_and_safety/accuracy": 0.9804870745583392, "eval_misinformation_regarding_ethics,laws_and_safety/f1": 0.2318271119842829, "eval_misinformation_regarding_ethics,laws_and_safety/fpr": 0.010423858680093611, "eval_misinformation_regarding_ethics,laws_and_safety/precision": 0.22236180904522612, "eval_misinformation_regarding_ethics,laws_and_safety/recall": 0.2421340629274966, "eval_misinformation_regarding_ethics,laws_and_safety/threshold": 0.18126320838928223, "eval_non_violent_unethical_behavior/accuracy": 0.8779485643943175, "eval_non_violent_unethical_behavior/f1": 0.6988960479336808, "eval_non_violent_unethical_behavior/fpr": 0.08109118086696544, "eval_non_violent_unethical_behavior/precision": 0.6855325658159569, "eval_non_violent_unethical_behavior/recall": 0.7127908923489034, "eval_non_violent_unethical_behavior/threshold": 0.31658050417900085, "eval_privacy_violation/accuracy": 0.9808530458794956, "eval_privacy_violation/f1": 0.8076215945178005, "eval_privacy_violation/fpr": 0.010516553510184065, "eval_privacy_violation/precision": 0.8007954922108055, "eval_privacy_violation/recall": 0.8145650708024275, "eval_privacy_violation/threshold": 0.44794324040412903, "eval_runtime": 49.4099, "eval_samples_per_second": 1216.64, "eval_self_harm/accuracy": 0.9969890541304854, "eval_self_harm/f1": 0.7468531468531469, "eval_self_harm/fpr": 0.0006364732681227378, "eval_self_harm/precision": 0.8754098360655738, "eval_self_harm/recall": 0.651219512195122, "eval_self_harm/threshold": 0.4472188949584961, "eval_sexually_explicit,adult_content/accuracy": 0.9837974515087999, "eval_sexually_explicit,adult_content/f1": 0.6858064516129032, "eval_sexually_explicit,adult_content/fpr": 0.010056761041130432, "eval_sexually_explicit,adult_content/precision": 0.6430732002419842, "eval_sexually_explicit,adult_content/recall": 0.7346233586731168, "eval_sexually_explicit,adult_content/threshold": 0.3302551507949829, "eval_steps_per_second": 19.025, "eval_terrorism,organized_crime/accuracy": 0.9908839870911934, "eval_terrorism,organized_crime/f1": 0.45742574257425744, "eval_terrorism,organized_crime/fpr": 0.004997233075646026, "eval_terrorism,organized_crime/precision": 0.43667296786389415, "eval_terrorism,organized_crime/recall": 0.4802494802494803, "eval_terrorism,organized_crime/threshold": 0.2107662856578827, "eval_violence,aiding_and_abetting,incitement/accuracy": 0.921665502212463, "eval_violence,aiding_and_abetting,incitement/f1": 0.8564197944933988, "eval_violence,aiding_and_abetting,incitement/fpr": 0.06257649245274452, "eval_violence,aiding_and_abetting,incitement/precision": 0.8357036596251116, "eval_violence,aiding_and_abetting,incitement/recall": 0.8781890945472737, "eval_violence,aiding_and_abetting,incitement/threshold": 0.4335017800331116, "step": 67628 }, { "epoch": 2.0000591470988347, "grad_norm": 0.7637369632720947, "learning_rate": 3.427101666220913e-06, "loss": 0.0758, "step": 67630 }, { "epoch": 2.0003548825930086, "grad_norm": 0.8119154572486877, "learning_rate": 3.426974976301974e-06, "loss": 0.0834, "step": 67640 }, { "epoch": 2.000650618087183, "grad_norm": 0.8045868277549744, "learning_rate": 3.4268482863830346e-06, "loss": 0.0942, "step": 67650 }, { "epoch": 2.000946353581357, "grad_norm": 0.5757815837860107, "learning_rate": 3.4267215964640954e-06, "loss": 0.0803, "step": 67660 }, { "epoch": 2.001242089075531, "grad_norm": 0.7473806738853455, "learning_rate": 3.426594906545156e-06, "loss": 0.0655, "step": 67670 }, { "epoch": 2.001537824569705, "grad_norm": 1.0128505229949951, "learning_rate": 3.4264682166262166e-06, "loss": 0.0797, "step": 67680 }, { "epoch": 2.001833560063879, "grad_norm": 1.1464364528656006, "learning_rate": 3.426341526707277e-06, "loss": 0.0811, "step": 67690 }, { "epoch": 2.0021292955580527, "grad_norm": 1.0778385400772095, "learning_rate": 3.4262148367883377e-06, "loss": 0.0758, "step": 67700 }, { "epoch": 2.0024250310522267, "grad_norm": 1.0377861261367798, "learning_rate": 3.426088146869398e-06, "loss": 0.0863, "step": 67710 }, { "epoch": 2.002720766546401, "grad_norm": 1.0712779760360718, "learning_rate": 3.425961456950459e-06, "loss": 0.0757, "step": 67720 }, { "epoch": 2.003016502040575, "grad_norm": 1.0179067850112915, "learning_rate": 3.4258347670315197e-06, "loss": 0.0737, "step": 67730 }, { "epoch": 2.003312237534749, "grad_norm": 1.1043190956115723, "learning_rate": 3.4257080771125805e-06, "loss": 0.0951, "step": 67740 }, { "epoch": 2.003607973028923, "grad_norm": 0.7690392732620239, "learning_rate": 3.425581387193641e-06, "loss": 0.077, "step": 67750 }, { "epoch": 2.003903708523097, "grad_norm": 0.6294684410095215, "learning_rate": 3.4254546972747016e-06, "loss": 0.0867, "step": 67760 }, { "epoch": 2.004199444017271, "grad_norm": 0.463556170463562, "learning_rate": 3.425328007355762e-06, "loss": 0.0776, "step": 67770 }, { "epoch": 2.004495179511445, "grad_norm": 0.7919016480445862, "learning_rate": 3.4252013174368228e-06, "loss": 0.0694, "step": 67780 }, { "epoch": 2.004790915005619, "grad_norm": 0.6615317463874817, "learning_rate": 3.425074627517883e-06, "loss": 0.0893, "step": 67790 }, { "epoch": 2.005086650499793, "grad_norm": 0.6664357781410217, "learning_rate": 3.424947937598944e-06, "loss": 0.1044, "step": 67800 }, { "epoch": 2.005382385993967, "grad_norm": 0.5499696731567383, "learning_rate": 3.4248212476800047e-06, "loss": 0.0685, "step": 67810 }, { "epoch": 2.005678121488141, "grad_norm": 0.5058571100234985, "learning_rate": 3.4246945577610655e-06, "loss": 0.0829, "step": 67820 }, { "epoch": 2.005973856982315, "grad_norm": 0.9116377234458923, "learning_rate": 3.424567867842126e-06, "loss": 0.0844, "step": 67830 }, { "epoch": 2.006269592476489, "grad_norm": 0.7393741011619568, "learning_rate": 3.4244411779231867e-06, "loss": 0.0961, "step": 67840 }, { "epoch": 2.0065653279706632, "grad_norm": 0.5567277073860168, "learning_rate": 3.424314488004247e-06, "loss": 0.092, "step": 67850 }, { "epoch": 2.006861063464837, "grad_norm": 0.9696742296218872, "learning_rate": 3.424187798085308e-06, "loss": 0.0816, "step": 67860 }, { "epoch": 2.007156798959011, "grad_norm": 0.5495527386665344, "learning_rate": 3.424061108166368e-06, "loss": 0.0857, "step": 67870 }, { "epoch": 2.007452534453185, "grad_norm": 0.836591362953186, "learning_rate": 3.423934418247429e-06, "loss": 0.0741, "step": 67880 }, { "epoch": 2.007748269947359, "grad_norm": 0.7003704309463501, "learning_rate": 3.4238077283284898e-06, "loss": 0.092, "step": 67890 }, { "epoch": 2.008044005441533, "grad_norm": 0.6345576047897339, "learning_rate": 3.4236810384095506e-06, "loss": 0.0779, "step": 67900 }, { "epoch": 2.008339740935707, "grad_norm": 1.9193663597106934, "learning_rate": 3.423554348490611e-06, "loss": 0.0748, "step": 67910 }, { "epoch": 2.0086354764298813, "grad_norm": 0.5904542207717896, "learning_rate": 3.4234276585716717e-06, "loss": 0.0675, "step": 67920 }, { "epoch": 2.008931211924055, "grad_norm": 0.8580060005187988, "learning_rate": 3.423300968652732e-06, "loss": 0.0755, "step": 67930 }, { "epoch": 2.009226947418229, "grad_norm": 0.7564568519592285, "learning_rate": 3.423174278733793e-06, "loss": 0.083, "step": 67940 }, { "epoch": 2.009522682912403, "grad_norm": 1.357109546661377, "learning_rate": 3.4230475888148532e-06, "loss": 0.0797, "step": 67950 }, { "epoch": 2.009818418406577, "grad_norm": 0.8915157318115234, "learning_rate": 3.4229208988959136e-06, "loss": 0.0956, "step": 67960 }, { "epoch": 2.010114153900751, "grad_norm": 0.990952730178833, "learning_rate": 3.422794208976975e-06, "loss": 0.0729, "step": 67970 }, { "epoch": 2.0104098893949254, "grad_norm": 0.7259432673454285, "learning_rate": 3.422667519058035e-06, "loss": 0.0707, "step": 67980 }, { "epoch": 2.0107056248890993, "grad_norm": 0.6545162200927734, "learning_rate": 3.422540829139096e-06, "loss": 0.0827, "step": 67990 }, { "epoch": 2.0110013603832733, "grad_norm": 0.8658719658851624, "learning_rate": 3.4224141392201563e-06, "loss": 0.0815, "step": 68000 }, { "epoch": 2.011297095877447, "grad_norm": 0.5700904726982117, "learning_rate": 3.422287449301217e-06, "loss": 0.0895, "step": 68010 }, { "epoch": 2.011592831371621, "grad_norm": 0.6014273166656494, "learning_rate": 3.4221607593822775e-06, "loss": 0.068, "step": 68020 }, { "epoch": 2.011888566865795, "grad_norm": 0.6630551218986511, "learning_rate": 3.4220340694633383e-06, "loss": 0.0677, "step": 68030 }, { "epoch": 2.012184302359969, "grad_norm": 0.912230908870697, "learning_rate": 3.4219073795443987e-06, "loss": 0.087, "step": 68040 }, { "epoch": 2.0124800378541434, "grad_norm": 0.9976372122764587, "learning_rate": 3.42178068962546e-06, "loss": 0.0739, "step": 68050 }, { "epoch": 2.0127757733483174, "grad_norm": 0.7489490509033203, "learning_rate": 3.4216539997065202e-06, "loss": 0.0917, "step": 68060 }, { "epoch": 2.0130715088424913, "grad_norm": 0.7024515867233276, "learning_rate": 3.421527309787581e-06, "loss": 0.0754, "step": 68070 }, { "epoch": 2.0133672443366653, "grad_norm": 0.9771106839179993, "learning_rate": 3.4214006198686414e-06, "loss": 0.0672, "step": 68080 }, { "epoch": 2.013662979830839, "grad_norm": 0.895363986492157, "learning_rate": 3.421273929949702e-06, "loss": 0.1047, "step": 68090 }, { "epoch": 2.013958715325013, "grad_norm": 1.1849867105484009, "learning_rate": 3.4211472400307625e-06, "loss": 0.0938, "step": 68100 }, { "epoch": 2.0142544508191875, "grad_norm": 1.0817118883132935, "learning_rate": 3.4210205501118233e-06, "loss": 0.0691, "step": 68110 }, { "epoch": 2.0145501863133615, "grad_norm": 0.7053824663162231, "learning_rate": 3.4208938601928837e-06, "loss": 0.0731, "step": 68120 }, { "epoch": 2.0148459218075354, "grad_norm": 0.6852504014968872, "learning_rate": 3.420767170273945e-06, "loss": 0.0656, "step": 68130 }, { "epoch": 2.0151416573017094, "grad_norm": 0.8152490854263306, "learning_rate": 3.4206404803550053e-06, "loss": 0.0865, "step": 68140 }, { "epoch": 2.0154373927958833, "grad_norm": 0.669758141040802, "learning_rate": 3.420513790436066e-06, "loss": 0.0775, "step": 68150 }, { "epoch": 2.0157331282900572, "grad_norm": 0.73555588722229, "learning_rate": 3.4203871005171264e-06, "loss": 0.0716, "step": 68160 }, { "epoch": 2.016028863784231, "grad_norm": 0.4496499300003052, "learning_rate": 3.4202604105981872e-06, "loss": 0.0715, "step": 68170 }, { "epoch": 2.0163245992784056, "grad_norm": 0.7760449051856995, "learning_rate": 3.4201337206792476e-06, "loss": 0.0647, "step": 68180 }, { "epoch": 2.0166203347725795, "grad_norm": 0.6817173361778259, "learning_rate": 3.4200070307603084e-06, "loss": 0.097, "step": 68190 }, { "epoch": 2.0169160702667535, "grad_norm": 1.1024482250213623, "learning_rate": 3.4198803408413687e-06, "loss": 0.0876, "step": 68200 }, { "epoch": 2.0172118057609274, "grad_norm": 0.7637853026390076, "learning_rate": 3.41975365092243e-06, "loss": 0.0814, "step": 68210 }, { "epoch": 2.0175075412551013, "grad_norm": 0.5886203646659851, "learning_rate": 3.4196269610034903e-06, "loss": 0.063, "step": 68220 }, { "epoch": 2.0178032767492753, "grad_norm": 0.9527585506439209, "learning_rate": 3.419500271084551e-06, "loss": 0.0718, "step": 68230 }, { "epoch": 2.0180990122434497, "grad_norm": 1.3168444633483887, "learning_rate": 3.4193735811656115e-06, "loss": 0.0865, "step": 68240 }, { "epoch": 2.0183947477376236, "grad_norm": 0.6838928461074829, "learning_rate": 3.4192468912466723e-06, "loss": 0.0756, "step": 68250 }, { "epoch": 2.0186904832317976, "grad_norm": 0.4766075611114502, "learning_rate": 3.4191202013277326e-06, "loss": 0.0759, "step": 68260 }, { "epoch": 2.0189862187259715, "grad_norm": 0.5867958068847656, "learning_rate": 3.4189935114087934e-06, "loss": 0.0641, "step": 68270 }, { "epoch": 2.0192819542201454, "grad_norm": 1.1451209783554077, "learning_rate": 3.4188668214898538e-06, "loss": 0.066, "step": 68280 }, { "epoch": 2.0195776897143194, "grad_norm": 1.6768838167190552, "learning_rate": 3.418740131570915e-06, "loss": 0.0765, "step": 68290 }, { "epoch": 2.0198734252084933, "grad_norm": 0.4537110924720764, "learning_rate": 3.4186134416519754e-06, "loss": 0.0754, "step": 68300 }, { "epoch": 2.0201691607026677, "grad_norm": 1.0906524658203125, "learning_rate": 3.418486751733036e-06, "loss": 0.0876, "step": 68310 }, { "epoch": 2.0204648961968417, "grad_norm": 0.509300708770752, "learning_rate": 3.4183600618140965e-06, "loss": 0.0611, "step": 68320 }, { "epoch": 2.0207606316910156, "grad_norm": 0.6364713311195374, "learning_rate": 3.4182333718951573e-06, "loss": 0.0459, "step": 68330 }, { "epoch": 2.0210563671851896, "grad_norm": 1.052157998085022, "learning_rate": 3.4181066819762177e-06, "loss": 0.0835, "step": 68340 }, { "epoch": 2.0213521026793635, "grad_norm": 1.0037213563919067, "learning_rate": 3.4179799920572785e-06, "loss": 0.0832, "step": 68350 }, { "epoch": 2.0216478381735374, "grad_norm": 0.8365936279296875, "learning_rate": 3.417853302138339e-06, "loss": 0.0694, "step": 68360 }, { "epoch": 2.0219435736677114, "grad_norm": 0.6717662811279297, "learning_rate": 3.4177266122193996e-06, "loss": 0.0772, "step": 68370 }, { "epoch": 2.0222393091618858, "grad_norm": 1.014197826385498, "learning_rate": 3.4175999223004604e-06, "loss": 0.0634, "step": 68380 }, { "epoch": 2.0225350446560597, "grad_norm": 1.093559741973877, "learning_rate": 3.4174732323815208e-06, "loss": 0.077, "step": 68390 }, { "epoch": 2.0228307801502337, "grad_norm": 1.217341423034668, "learning_rate": 3.4173465424625816e-06, "loss": 0.1043, "step": 68400 }, { "epoch": 2.0231265156444076, "grad_norm": 0.8354248404502869, "learning_rate": 3.417219852543642e-06, "loss": 0.072, "step": 68410 }, { "epoch": 2.0234222511385815, "grad_norm": 0.6587139964103699, "learning_rate": 3.4170931626247027e-06, "loss": 0.0665, "step": 68420 }, { "epoch": 2.0237179866327555, "grad_norm": 1.0081754922866821, "learning_rate": 3.416966472705763e-06, "loss": 0.0684, "step": 68430 }, { "epoch": 2.02401372212693, "grad_norm": 1.0155866146087646, "learning_rate": 3.416839782786824e-06, "loss": 0.0959, "step": 68440 }, { "epoch": 2.024309457621104, "grad_norm": 0.720564603805542, "learning_rate": 3.4167130928678847e-06, "loss": 0.0797, "step": 68450 }, { "epoch": 2.0246051931152778, "grad_norm": 1.160791039466858, "learning_rate": 3.4165864029489455e-06, "loss": 0.0887, "step": 68460 }, { "epoch": 2.0249009286094517, "grad_norm": 0.5949514508247375, "learning_rate": 3.416459713030006e-06, "loss": 0.0611, "step": 68470 }, { "epoch": 2.0251966641036256, "grad_norm": 0.8920325040817261, "learning_rate": 3.4163330231110666e-06, "loss": 0.0676, "step": 68480 }, { "epoch": 2.0254923995977996, "grad_norm": 1.0085415840148926, "learning_rate": 3.416206333192127e-06, "loss": 0.0709, "step": 68490 }, { "epoch": 2.0257881350919735, "grad_norm": 0.8999636769294739, "learning_rate": 3.4160796432731878e-06, "loss": 0.0908, "step": 68500 }, { "epoch": 2.026083870586148, "grad_norm": 0.7169336080551147, "learning_rate": 3.415952953354248e-06, "loss": 0.0866, "step": 68510 }, { "epoch": 2.026379606080322, "grad_norm": 0.2952367961406708, "learning_rate": 3.415826263435309e-06, "loss": 0.0599, "step": 68520 }, { "epoch": 2.026675341574496, "grad_norm": 0.6269505023956299, "learning_rate": 3.4156995735163697e-06, "loss": 0.0825, "step": 68530 }, { "epoch": 2.0269710770686697, "grad_norm": 1.0178953409194946, "learning_rate": 3.4155728835974305e-06, "loss": 0.0767, "step": 68540 }, { "epoch": 2.0272668125628437, "grad_norm": 0.7912213802337646, "learning_rate": 3.415446193678491e-06, "loss": 0.0733, "step": 68550 }, { "epoch": 2.0275625480570176, "grad_norm": 0.6711838245391846, "learning_rate": 3.4153195037595517e-06, "loss": 0.0877, "step": 68560 }, { "epoch": 2.027858283551192, "grad_norm": 0.47526317834854126, "learning_rate": 3.415192813840612e-06, "loss": 0.0718, "step": 68570 }, { "epoch": 2.028154019045366, "grad_norm": 0.7506629824638367, "learning_rate": 3.415066123921673e-06, "loss": 0.083, "step": 68580 }, { "epoch": 2.02844975453954, "grad_norm": 1.1398941278457642, "learning_rate": 3.414939434002733e-06, "loss": 0.0849, "step": 68590 }, { "epoch": 2.028745490033714, "grad_norm": 0.8221340775489807, "learning_rate": 3.414812744083794e-06, "loss": 0.082, "step": 68600 }, { "epoch": 2.029041225527888, "grad_norm": 0.9088895320892334, "learning_rate": 3.4146860541648548e-06, "loss": 0.0871, "step": 68610 }, { "epoch": 2.0293369610220617, "grad_norm": 0.7386672496795654, "learning_rate": 3.4145593642459155e-06, "loss": 0.0672, "step": 68620 }, { "epoch": 2.0296326965162357, "grad_norm": 0.9666950106620789, "learning_rate": 3.414432674326976e-06, "loss": 0.076, "step": 68630 }, { "epoch": 2.02992843201041, "grad_norm": 0.880012571811676, "learning_rate": 3.4143059844080367e-06, "loss": 0.0822, "step": 68640 }, { "epoch": 2.030224167504584, "grad_norm": 0.7924299836158752, "learning_rate": 3.414179294489097e-06, "loss": 0.0923, "step": 68650 }, { "epoch": 2.030519902998758, "grad_norm": 0.7087775468826294, "learning_rate": 3.414052604570158e-06, "loss": 0.0709, "step": 68660 }, { "epoch": 2.030815638492932, "grad_norm": 0.7745292782783508, "learning_rate": 3.4139259146512182e-06, "loss": 0.0636, "step": 68670 }, { "epoch": 2.031111373987106, "grad_norm": 0.745305061340332, "learning_rate": 3.413799224732279e-06, "loss": 0.066, "step": 68680 }, { "epoch": 2.03140710948128, "grad_norm": 0.5474061965942383, "learning_rate": 3.41367253481334e-06, "loss": 0.0698, "step": 68690 }, { "epoch": 2.031702844975454, "grad_norm": 0.678064227104187, "learning_rate": 3.4135458448944006e-06, "loss": 0.0699, "step": 68700 }, { "epoch": 2.031998580469628, "grad_norm": 0.5574472546577454, "learning_rate": 3.413419154975461e-06, "loss": 0.0744, "step": 68710 }, { "epoch": 2.032294315963802, "grad_norm": 0.7655063271522522, "learning_rate": 3.4132924650565217e-06, "loss": 0.0592, "step": 68720 }, { "epoch": 2.032590051457976, "grad_norm": 0.9991351962089539, "learning_rate": 3.413165775137582e-06, "loss": 0.0669, "step": 68730 }, { "epoch": 2.03288578695215, "grad_norm": 0.7455460429191589, "learning_rate": 3.413039085218643e-06, "loss": 0.0852, "step": 68740 }, { "epoch": 2.033181522446324, "grad_norm": 0.9885822534561157, "learning_rate": 3.4129123952997033e-06, "loss": 0.074, "step": 68750 }, { "epoch": 2.033477257940498, "grad_norm": 0.6148245334625244, "learning_rate": 3.412785705380764e-06, "loss": 0.0716, "step": 68760 }, { "epoch": 2.033772993434672, "grad_norm": 0.7294507622718811, "learning_rate": 3.412659015461825e-06, "loss": 0.0694, "step": 68770 }, { "epoch": 2.034068728928846, "grad_norm": 0.7685104608535767, "learning_rate": 3.4125323255428856e-06, "loss": 0.0789, "step": 68780 }, { "epoch": 2.03436446442302, "grad_norm": 0.6503279805183411, "learning_rate": 3.412405635623946e-06, "loss": 0.0793, "step": 68790 }, { "epoch": 2.034660199917194, "grad_norm": 0.8826162815093994, "learning_rate": 3.4122789457050064e-06, "loss": 0.0861, "step": 68800 }, { "epoch": 2.034955935411368, "grad_norm": 0.9120293855667114, "learning_rate": 3.412152255786067e-06, "loss": 0.0712, "step": 68810 }, { "epoch": 2.035251670905542, "grad_norm": 0.5268052816390991, "learning_rate": 3.4120255658671275e-06, "loss": 0.0604, "step": 68820 }, { "epoch": 2.035547406399716, "grad_norm": 0.9022212028503418, "learning_rate": 3.4118988759481883e-06, "loss": 0.0674, "step": 68830 }, { "epoch": 2.0358431418938903, "grad_norm": 0.9471805095672607, "learning_rate": 3.4117721860292487e-06, "loss": 0.1059, "step": 68840 }, { "epoch": 2.036138877388064, "grad_norm": 0.6062988042831421, "learning_rate": 3.41164549611031e-06, "loss": 0.0868, "step": 68850 }, { "epoch": 2.036434612882238, "grad_norm": 1.0128757953643799, "learning_rate": 3.4115188061913703e-06, "loss": 0.0764, "step": 68860 }, { "epoch": 2.036730348376412, "grad_norm": 0.7446255087852478, "learning_rate": 3.411392116272431e-06, "loss": 0.0786, "step": 68870 }, { "epoch": 2.037026083870586, "grad_norm": 1.0082452297210693, "learning_rate": 3.4112654263534914e-06, "loss": 0.0699, "step": 68880 }, { "epoch": 2.03732181936476, "grad_norm": 0.7863815426826477, "learning_rate": 3.411138736434552e-06, "loss": 0.1001, "step": 68890 }, { "epoch": 2.0376175548589344, "grad_norm": 0.575255274772644, "learning_rate": 3.4110120465156126e-06, "loss": 0.0771, "step": 68900 }, { "epoch": 2.0379132903531083, "grad_norm": 2.101402997970581, "learning_rate": 3.4108853565966734e-06, "loss": 0.0698, "step": 68910 }, { "epoch": 2.0382090258472823, "grad_norm": 1.5995118618011475, "learning_rate": 3.4107586666777337e-06, "loss": 0.0868, "step": 68920 }, { "epoch": 2.038504761341456, "grad_norm": 0.6584615111351013, "learning_rate": 3.410631976758795e-06, "loss": 0.0742, "step": 68930 }, { "epoch": 2.03880049683563, "grad_norm": 0.7026601433753967, "learning_rate": 3.4105052868398553e-06, "loss": 0.0973, "step": 68940 }, { "epoch": 2.039096232329804, "grad_norm": 0.7193909287452698, "learning_rate": 3.410378596920916e-06, "loss": 0.0875, "step": 68950 }, { "epoch": 2.039391967823978, "grad_norm": 1.3471206426620483, "learning_rate": 3.4102519070019765e-06, "loss": 0.0834, "step": 68960 }, { "epoch": 2.0396877033181524, "grad_norm": 0.41394567489624023, "learning_rate": 3.4101252170830373e-06, "loss": 0.0682, "step": 68970 }, { "epoch": 2.0399834388123264, "grad_norm": 0.7882623672485352, "learning_rate": 3.4099985271640976e-06, "loss": 0.0759, "step": 68980 }, { "epoch": 2.0402791743065003, "grad_norm": 0.9046436548233032, "learning_rate": 3.4098718372451584e-06, "loss": 0.0968, "step": 68990 }, { "epoch": 2.0405749098006742, "grad_norm": 0.7612646222114563, "learning_rate": 3.4097451473262188e-06, "loss": 0.078, "step": 69000 }, { "epoch": 2.040870645294848, "grad_norm": 0.805019736289978, "learning_rate": 3.40961845740728e-06, "loss": 0.0767, "step": 69010 }, { "epoch": 2.041166380789022, "grad_norm": 0.4913070499897003, "learning_rate": 3.4094917674883404e-06, "loss": 0.0647, "step": 69020 }, { "epoch": 2.0414621162831965, "grad_norm": 0.7108134627342224, "learning_rate": 3.409365077569401e-06, "loss": 0.0538, "step": 69030 }, { "epoch": 2.0417578517773705, "grad_norm": 0.6242770552635193, "learning_rate": 3.4092383876504615e-06, "loss": 0.0796, "step": 69040 }, { "epoch": 2.0420535872715444, "grad_norm": 0.9417688846588135, "learning_rate": 3.4091116977315223e-06, "loss": 0.0964, "step": 69050 }, { "epoch": 2.0423493227657183, "grad_norm": 0.8610933423042297, "learning_rate": 3.4089850078125827e-06, "loss": 0.0686, "step": 69060 }, { "epoch": 2.0426450582598923, "grad_norm": 0.8185065388679504, "learning_rate": 3.4088583178936435e-06, "loss": 0.066, "step": 69070 }, { "epoch": 2.0429407937540662, "grad_norm": 0.8640707731246948, "learning_rate": 3.408731627974704e-06, "loss": 0.073, "step": 69080 }, { "epoch": 2.04323652924824, "grad_norm": 0.9656480550765991, "learning_rate": 3.408604938055765e-06, "loss": 0.0782, "step": 69090 }, { "epoch": 2.0435322647424146, "grad_norm": 0.9812491536140442, "learning_rate": 3.4084782481368254e-06, "loss": 0.0934, "step": 69100 }, { "epoch": 2.0438280002365885, "grad_norm": 1.245199203491211, "learning_rate": 3.408351558217886e-06, "loss": 0.092, "step": 69110 }, { "epoch": 2.0441237357307624, "grad_norm": 0.8230035901069641, "learning_rate": 3.4082248682989466e-06, "loss": 0.062, "step": 69120 }, { "epoch": 2.0444194712249364, "grad_norm": 0.5979685187339783, "learning_rate": 3.4080981783800073e-06, "loss": 0.0704, "step": 69130 }, { "epoch": 2.0447152067191103, "grad_norm": 0.8337492942810059, "learning_rate": 3.4079714884610677e-06, "loss": 0.0865, "step": 69140 }, { "epoch": 2.0450109422132843, "grad_norm": 0.8750213384628296, "learning_rate": 3.4078447985421285e-06, "loss": 0.0783, "step": 69150 }, { "epoch": 2.0453066777074587, "grad_norm": 0.7476868033409119, "learning_rate": 3.407718108623189e-06, "loss": 0.0707, "step": 69160 }, { "epoch": 2.0456024132016326, "grad_norm": 0.9489940404891968, "learning_rate": 3.40759141870425e-06, "loss": 0.0718, "step": 69170 }, { "epoch": 2.0458981486958066, "grad_norm": 1.0703277587890625, "learning_rate": 3.4074647287853104e-06, "loss": 0.0853, "step": 69180 }, { "epoch": 2.0461938841899805, "grad_norm": 1.092885136604309, "learning_rate": 3.4073380388663712e-06, "loss": 0.0841, "step": 69190 }, { "epoch": 2.0464896196841544, "grad_norm": 0.6334011554718018, "learning_rate": 3.4072113489474316e-06, "loss": 0.0766, "step": 69200 }, { "epoch": 2.0467853551783284, "grad_norm": 0.8216009736061096, "learning_rate": 3.407084659028492e-06, "loss": 0.0749, "step": 69210 }, { "epoch": 2.0470810906725023, "grad_norm": 0.7554314732551575, "learning_rate": 3.4069579691095528e-06, "loss": 0.066, "step": 69220 }, { "epoch": 2.0473768261666767, "grad_norm": 1.2884000539779663, "learning_rate": 3.406831279190613e-06, "loss": 0.08, "step": 69230 }, { "epoch": 2.0476725616608507, "grad_norm": 0.7178453207015991, "learning_rate": 3.406704589271674e-06, "loss": 0.0884, "step": 69240 }, { "epoch": 2.0479682971550246, "grad_norm": 0.875327467918396, "learning_rate": 3.4065778993527347e-06, "loss": 0.0862, "step": 69250 }, { "epoch": 2.0482640326491985, "grad_norm": 0.5864644646644592, "learning_rate": 3.4064512094337955e-06, "loss": 0.06, "step": 69260 }, { "epoch": 2.0485597681433725, "grad_norm": 0.7747300267219543, "learning_rate": 3.406324519514856e-06, "loss": 0.0861, "step": 69270 }, { "epoch": 2.0488555036375464, "grad_norm": 0.9146130084991455, "learning_rate": 3.4061978295959166e-06, "loss": 0.0845, "step": 69280 }, { "epoch": 2.049151239131721, "grad_norm": 1.0396941900253296, "learning_rate": 3.406071139676977e-06, "loss": 0.0931, "step": 69290 }, { "epoch": 2.0494469746258948, "grad_norm": 1.0686591863632202, "learning_rate": 3.405944449758038e-06, "loss": 0.0967, "step": 69300 }, { "epoch": 2.0497427101200687, "grad_norm": 0.8131487965583801, "learning_rate": 3.405817759839098e-06, "loss": 0.0868, "step": 69310 }, { "epoch": 2.0500384456142426, "grad_norm": 1.125252604484558, "learning_rate": 3.405691069920159e-06, "loss": 0.0751, "step": 69320 }, { "epoch": 2.0503341811084166, "grad_norm": 0.7948513627052307, "learning_rate": 3.4055643800012197e-06, "loss": 0.0562, "step": 69330 }, { "epoch": 2.0506299166025905, "grad_norm": 1.035619854927063, "learning_rate": 3.4054376900822805e-06, "loss": 0.0746, "step": 69340 }, { "epoch": 2.0509256520967645, "grad_norm": 0.5786420106887817, "learning_rate": 3.405311000163341e-06, "loss": 0.0801, "step": 69350 }, { "epoch": 2.051221387590939, "grad_norm": 0.7285149693489075, "learning_rate": 3.4051843102444017e-06, "loss": 0.0644, "step": 69360 }, { "epoch": 2.051517123085113, "grad_norm": 0.8731911778450012, "learning_rate": 3.405057620325462e-06, "loss": 0.0602, "step": 69370 }, { "epoch": 2.0518128585792867, "grad_norm": 0.6909586191177368, "learning_rate": 3.404930930406523e-06, "loss": 0.0686, "step": 69380 }, { "epoch": 2.0521085940734607, "grad_norm": 1.5946784019470215, "learning_rate": 3.404804240487583e-06, "loss": 0.0941, "step": 69390 }, { "epoch": 2.0524043295676346, "grad_norm": 0.7857334017753601, "learning_rate": 3.404677550568644e-06, "loss": 0.0857, "step": 69400 }, { "epoch": 2.0527000650618086, "grad_norm": 0.5073606371879578, "learning_rate": 3.404550860649705e-06, "loss": 0.0765, "step": 69410 }, { "epoch": 2.0529958005559825, "grad_norm": 0.5585498213768005, "learning_rate": 3.4044241707307656e-06, "loss": 0.0874, "step": 69420 }, { "epoch": 2.053291536050157, "grad_norm": 0.7117495536804199, "learning_rate": 3.404297480811826e-06, "loss": 0.0693, "step": 69430 }, { "epoch": 2.053587271544331, "grad_norm": 0.9956152439117432, "learning_rate": 3.4041707908928867e-06, "loss": 0.102, "step": 69440 }, { "epoch": 2.053883007038505, "grad_norm": 0.6226698756217957, "learning_rate": 3.404044100973947e-06, "loss": 0.0783, "step": 69450 }, { "epoch": 2.0541787425326787, "grad_norm": 0.9252637624740601, "learning_rate": 3.403917411055008e-06, "loss": 0.0643, "step": 69460 }, { "epoch": 2.0544744780268527, "grad_norm": 0.5133211612701416, "learning_rate": 3.4037907211360683e-06, "loss": 0.071, "step": 69470 }, { "epoch": 2.0547702135210266, "grad_norm": 1.6769721508026123, "learning_rate": 3.403664031217129e-06, "loss": 0.0876, "step": 69480 }, { "epoch": 2.055065949015201, "grad_norm": 1.0238288640975952, "learning_rate": 3.40353734129819e-06, "loss": 0.0802, "step": 69490 }, { "epoch": 2.055361684509375, "grad_norm": 0.8303462862968445, "learning_rate": 3.4034106513792506e-06, "loss": 0.0824, "step": 69500 }, { "epoch": 2.055657420003549, "grad_norm": 0.6628228425979614, "learning_rate": 3.403283961460311e-06, "loss": 0.0891, "step": 69510 }, { "epoch": 2.055953155497723, "grad_norm": 0.5179913640022278, "learning_rate": 3.4031572715413718e-06, "loss": 0.0646, "step": 69520 }, { "epoch": 2.056248890991897, "grad_norm": 1.2726634740829468, "learning_rate": 3.403030581622432e-06, "loss": 0.0623, "step": 69530 }, { "epoch": 2.0565446264860707, "grad_norm": 0.7698820233345032, "learning_rate": 3.402903891703493e-06, "loss": 0.0798, "step": 69540 }, { "epoch": 2.0568403619802447, "grad_norm": 0.9229866862297058, "learning_rate": 3.4027772017845533e-06, "loss": 0.0969, "step": 69550 }, { "epoch": 2.057136097474419, "grad_norm": 0.713348388671875, "learning_rate": 3.402650511865614e-06, "loss": 0.0731, "step": 69560 }, { "epoch": 2.057431832968593, "grad_norm": 0.7415651082992554, "learning_rate": 3.402523821946675e-06, "loss": 0.0659, "step": 69570 }, { "epoch": 2.057727568462767, "grad_norm": 0.6965618133544922, "learning_rate": 3.4023971320277357e-06, "loss": 0.079, "step": 69580 }, { "epoch": 2.058023303956941, "grad_norm": 0.5533750653266907, "learning_rate": 3.402270442108796e-06, "loss": 0.0729, "step": 69590 }, { "epoch": 2.058319039451115, "grad_norm": 1.4121735095977783, "learning_rate": 3.402143752189857e-06, "loss": 0.0693, "step": 69600 }, { "epoch": 2.0586147749452888, "grad_norm": 0.8677055835723877, "learning_rate": 3.402017062270917e-06, "loss": 0.0867, "step": 69610 }, { "epoch": 2.058910510439463, "grad_norm": 0.44247955083847046, "learning_rate": 3.4018903723519776e-06, "loss": 0.0746, "step": 69620 }, { "epoch": 2.059206245933637, "grad_norm": 1.3670822381973267, "learning_rate": 3.4017636824330383e-06, "loss": 0.0675, "step": 69630 }, { "epoch": 2.059501981427811, "grad_norm": 1.181177020072937, "learning_rate": 3.4016369925140987e-06, "loss": 0.099, "step": 69640 }, { "epoch": 2.059797716921985, "grad_norm": 0.8855575323104858, "learning_rate": 3.40151030259516e-06, "loss": 0.0783, "step": 69650 }, { "epoch": 2.060093452416159, "grad_norm": 0.7122086882591248, "learning_rate": 3.4013836126762203e-06, "loss": 0.0642, "step": 69660 }, { "epoch": 2.060389187910333, "grad_norm": 1.0572419166564941, "learning_rate": 3.401256922757281e-06, "loss": 0.0873, "step": 69670 }, { "epoch": 2.060684923404507, "grad_norm": 1.067826509475708, "learning_rate": 3.4011302328383414e-06, "loss": 0.0794, "step": 69680 }, { "epoch": 2.060980658898681, "grad_norm": 0.5913927555084229, "learning_rate": 3.4010035429194022e-06, "loss": 0.112, "step": 69690 }, { "epoch": 2.061276394392855, "grad_norm": 0.8321560621261597, "learning_rate": 3.4008768530004626e-06, "loss": 0.0757, "step": 69700 }, { "epoch": 2.061572129887029, "grad_norm": 0.7274037599563599, "learning_rate": 3.4007501630815234e-06, "loss": 0.0711, "step": 69710 }, { "epoch": 2.061867865381203, "grad_norm": 0.714763343334198, "learning_rate": 3.4006234731625838e-06, "loss": 0.0783, "step": 69720 }, { "epoch": 2.062163600875377, "grad_norm": 0.8805219531059265, "learning_rate": 3.400496783243645e-06, "loss": 0.0744, "step": 69730 }, { "epoch": 2.062459336369551, "grad_norm": 0.539956271648407, "learning_rate": 3.4003700933247053e-06, "loss": 0.0786, "step": 69740 }, { "epoch": 2.062755071863725, "grad_norm": 0.5145081877708435, "learning_rate": 3.400243403405766e-06, "loss": 0.0752, "step": 69750 }, { "epoch": 2.0630508073578993, "grad_norm": 1.1679208278656006, "learning_rate": 3.4001167134868265e-06, "loss": 0.0845, "step": 69760 }, { "epoch": 2.063346542852073, "grad_norm": 0.572283148765564, "learning_rate": 3.3999900235678873e-06, "loss": 0.067, "step": 69770 }, { "epoch": 2.063642278346247, "grad_norm": 0.5427298545837402, "learning_rate": 3.3998633336489476e-06, "loss": 0.0694, "step": 69780 }, { "epoch": 2.063938013840421, "grad_norm": 0.5156599879264832, "learning_rate": 3.3997366437300084e-06, "loss": 0.0841, "step": 69790 }, { "epoch": 2.064233749334595, "grad_norm": 0.757167398929596, "learning_rate": 3.399609953811069e-06, "loss": 0.0756, "step": 69800 }, { "epoch": 2.064529484828769, "grad_norm": 0.5487842559814453, "learning_rate": 3.39948326389213e-06, "loss": 0.0848, "step": 69810 }, { "epoch": 2.0648252203229434, "grad_norm": 0.8029923439025879, "learning_rate": 3.3993565739731904e-06, "loss": 0.0731, "step": 69820 }, { "epoch": 2.0651209558171173, "grad_norm": 0.7733016014099121, "learning_rate": 3.399229884054251e-06, "loss": 0.0718, "step": 69830 }, { "epoch": 2.0654166913112912, "grad_norm": 0.9379956722259521, "learning_rate": 3.3991031941353115e-06, "loss": 0.0868, "step": 69840 }, { "epoch": 2.065712426805465, "grad_norm": 0.5784285664558411, "learning_rate": 3.3989765042163723e-06, "loss": 0.0771, "step": 69850 }, { "epoch": 2.066008162299639, "grad_norm": 0.5513434410095215, "learning_rate": 3.3988498142974327e-06, "loss": 0.0921, "step": 69860 }, { "epoch": 2.066303897793813, "grad_norm": 0.6276763081550598, "learning_rate": 3.3987231243784935e-06, "loss": 0.0719, "step": 69870 }, { "epoch": 2.066599633287987, "grad_norm": 1.067735195159912, "learning_rate": 3.398596434459554e-06, "loss": 0.07, "step": 69880 }, { "epoch": 2.0668953687821614, "grad_norm": 0.6402254104614258, "learning_rate": 3.398469744540615e-06, "loss": 0.0712, "step": 69890 }, { "epoch": 2.0671911042763353, "grad_norm": 0.6966906189918518, "learning_rate": 3.3983430546216754e-06, "loss": 0.0854, "step": 69900 }, { "epoch": 2.0674868397705093, "grad_norm": 0.7879645228385925, "learning_rate": 3.3982163647027362e-06, "loss": 0.0745, "step": 69910 }, { "epoch": 2.0677825752646832, "grad_norm": 0.7398843169212341, "learning_rate": 3.3980896747837966e-06, "loss": 0.0804, "step": 69920 }, { "epoch": 2.068078310758857, "grad_norm": 0.7717393040657043, "learning_rate": 3.3979629848648574e-06, "loss": 0.0773, "step": 69930 }, { "epoch": 2.068374046253031, "grad_norm": 0.7431049346923828, "learning_rate": 3.3978362949459177e-06, "loss": 0.0825, "step": 69940 }, { "epoch": 2.0686697817472055, "grad_norm": 0.6109124422073364, "learning_rate": 3.3977096050269785e-06, "loss": 0.0752, "step": 69950 }, { "epoch": 2.0689655172413794, "grad_norm": 0.9211262464523315, "learning_rate": 3.397582915108039e-06, "loss": 0.0731, "step": 69960 }, { "epoch": 2.0692612527355534, "grad_norm": 1.0075753927230835, "learning_rate": 3.3974562251891e-06, "loss": 0.0798, "step": 69970 }, { "epoch": 2.0695569882297273, "grad_norm": 0.9242764115333557, "learning_rate": 3.3973295352701605e-06, "loss": 0.0808, "step": 69980 }, { "epoch": 2.0698527237239013, "grad_norm": 0.8547878861427307, "learning_rate": 3.3972028453512213e-06, "loss": 0.0899, "step": 69990 }, { "epoch": 2.070148459218075, "grad_norm": 0.9286977648735046, "learning_rate": 3.3970761554322816e-06, "loss": 0.0721, "step": 70000 }, { "epoch": 2.070444194712249, "grad_norm": 0.7721636891365051, "learning_rate": 3.3969494655133424e-06, "loss": 0.1071, "step": 70010 }, { "epoch": 2.0707399302064236, "grad_norm": 0.6364922523498535, "learning_rate": 3.3968227755944028e-06, "loss": 0.0727, "step": 70020 }, { "epoch": 2.0710356657005975, "grad_norm": 1.1567354202270508, "learning_rate": 3.396696085675463e-06, "loss": 0.0756, "step": 70030 }, { "epoch": 2.0713314011947714, "grad_norm": 0.7516201138496399, "learning_rate": 3.396569395756524e-06, "loss": 0.0836, "step": 70040 }, { "epoch": 2.0716271366889454, "grad_norm": 0.5133593678474426, "learning_rate": 3.3964427058375847e-06, "loss": 0.0771, "step": 70050 }, { "epoch": 2.0719228721831193, "grad_norm": 0.642315685749054, "learning_rate": 3.3963160159186455e-06, "loss": 0.0738, "step": 70060 }, { "epoch": 2.0722186076772933, "grad_norm": 0.9572287797927856, "learning_rate": 3.396189325999706e-06, "loss": 0.078, "step": 70070 }, { "epoch": 2.0725143431714677, "grad_norm": 0.8053346872329712, "learning_rate": 3.3960626360807667e-06, "loss": 0.0606, "step": 70080 }, { "epoch": 2.0728100786656416, "grad_norm": 1.1282986402511597, "learning_rate": 3.395935946161827e-06, "loss": 0.0687, "step": 70090 }, { "epoch": 2.0731058141598155, "grad_norm": 1.2636722326278687, "learning_rate": 3.395809256242888e-06, "loss": 0.0837, "step": 70100 }, { "epoch": 2.0734015496539895, "grad_norm": 0.9784166216850281, "learning_rate": 3.395682566323948e-06, "loss": 0.0903, "step": 70110 }, { "epoch": 2.0736972851481634, "grad_norm": 0.614781379699707, "learning_rate": 3.395555876405009e-06, "loss": 0.0629, "step": 70120 }, { "epoch": 2.0739930206423374, "grad_norm": 0.8028191924095154, "learning_rate": 3.3954291864860698e-06, "loss": 0.0598, "step": 70130 }, { "epoch": 2.0742887561365113, "grad_norm": 0.5984654426574707, "learning_rate": 3.3953024965671306e-06, "loss": 0.0815, "step": 70140 }, { "epoch": 2.0745844916306857, "grad_norm": 0.5301780700683594, "learning_rate": 3.395175806648191e-06, "loss": 0.0612, "step": 70150 }, { "epoch": 2.0748802271248596, "grad_norm": 0.7071959972381592, "learning_rate": 3.3950491167292517e-06, "loss": 0.0805, "step": 70160 }, { "epoch": 2.0751759626190336, "grad_norm": 1.1744345426559448, "learning_rate": 3.394922426810312e-06, "loss": 0.0786, "step": 70170 }, { "epoch": 2.0754716981132075, "grad_norm": 1.1203982830047607, "learning_rate": 3.394795736891373e-06, "loss": 0.0664, "step": 70180 }, { "epoch": 2.0757674336073815, "grad_norm": 1.294910192489624, "learning_rate": 3.3946690469724332e-06, "loss": 0.0842, "step": 70190 }, { "epoch": 2.0760631691015554, "grad_norm": 1.6241123676300049, "learning_rate": 3.394542357053494e-06, "loss": 0.0838, "step": 70200 }, { "epoch": 2.07635890459573, "grad_norm": 0.6786664724349976, "learning_rate": 3.394415667134555e-06, "loss": 0.0751, "step": 70210 }, { "epoch": 2.0766546400899037, "grad_norm": 0.9170427322387695, "learning_rate": 3.3942889772156156e-06, "loss": 0.0595, "step": 70220 }, { "epoch": 2.0769503755840777, "grad_norm": 1.1024211645126343, "learning_rate": 3.394162287296676e-06, "loss": 0.0616, "step": 70230 }, { "epoch": 2.0772461110782516, "grad_norm": 0.8261524438858032, "learning_rate": 3.3940355973777368e-06, "loss": 0.0845, "step": 70240 }, { "epoch": 2.0775418465724256, "grad_norm": 1.0439629554748535, "learning_rate": 3.393908907458797e-06, "loss": 0.0827, "step": 70250 }, { "epoch": 2.0778375820665995, "grad_norm": 1.0728328227996826, "learning_rate": 3.393782217539858e-06, "loss": 0.072, "step": 70260 }, { "epoch": 2.0781333175607735, "grad_norm": 1.2937895059585571, "learning_rate": 3.3936555276209183e-06, "loss": 0.0553, "step": 70270 }, { "epoch": 2.078429053054948, "grad_norm": 0.9685639142990112, "learning_rate": 3.393528837701979e-06, "loss": 0.0727, "step": 70280 }, { "epoch": 2.078724788549122, "grad_norm": 1.0061064958572388, "learning_rate": 3.39340214778304e-06, "loss": 0.0834, "step": 70290 }, { "epoch": 2.0790205240432957, "grad_norm": 0.7545387744903564, "learning_rate": 3.3932754578641007e-06, "loss": 0.0881, "step": 70300 }, { "epoch": 2.0793162595374697, "grad_norm": 0.6984307765960693, "learning_rate": 3.393148767945161e-06, "loss": 0.0928, "step": 70310 }, { "epoch": 2.0796119950316436, "grad_norm": 0.9415621757507324, "learning_rate": 3.393022078026222e-06, "loss": 0.0648, "step": 70320 }, { "epoch": 2.0799077305258176, "grad_norm": 0.8191342353820801, "learning_rate": 3.392895388107282e-06, "loss": 0.07, "step": 70330 }, { "epoch": 2.0802034660199915, "grad_norm": 1.200334906578064, "learning_rate": 3.392768698188343e-06, "loss": 0.1004, "step": 70340 }, { "epoch": 2.080499201514166, "grad_norm": 0.9837552905082703, "learning_rate": 3.3926420082694033e-06, "loss": 0.0802, "step": 70350 }, { "epoch": 2.08079493700834, "grad_norm": 0.8159943222999573, "learning_rate": 3.392515318350464e-06, "loss": 0.0807, "step": 70360 }, { "epoch": 2.081090672502514, "grad_norm": 0.6290460824966431, "learning_rate": 3.392388628431525e-06, "loss": 0.0611, "step": 70370 }, { "epoch": 2.0813864079966877, "grad_norm": 0.7507573366165161, "learning_rate": 3.3922619385125857e-06, "loss": 0.0628, "step": 70380 }, { "epoch": 2.0816821434908617, "grad_norm": 1.4212898015975952, "learning_rate": 3.392135248593646e-06, "loss": 0.0781, "step": 70390 }, { "epoch": 2.0819778789850356, "grad_norm": 1.1918084621429443, "learning_rate": 3.392008558674707e-06, "loss": 0.0819, "step": 70400 }, { "epoch": 2.08227361447921, "grad_norm": 0.4580180048942566, "learning_rate": 3.3918818687557672e-06, "loss": 0.073, "step": 70410 }, { "epoch": 2.082569349973384, "grad_norm": 1.1283385753631592, "learning_rate": 3.391755178836828e-06, "loss": 0.0719, "step": 70420 }, { "epoch": 2.082865085467558, "grad_norm": 0.8653733730316162, "learning_rate": 3.3916284889178884e-06, "loss": 0.0812, "step": 70430 }, { "epoch": 2.083160820961732, "grad_norm": 0.5348125696182251, "learning_rate": 3.3915017989989487e-06, "loss": 0.0753, "step": 70440 }, { "epoch": 2.0834565564559058, "grad_norm": 0.714289128780365, "learning_rate": 3.39137510908001e-06, "loss": 0.0819, "step": 70450 }, { "epoch": 2.0837522919500797, "grad_norm": 0.626833438873291, "learning_rate": 3.3912484191610703e-06, "loss": 0.077, "step": 70460 }, { "epoch": 2.0840480274442537, "grad_norm": 0.6249631643295288, "learning_rate": 3.391121729242131e-06, "loss": 0.0856, "step": 70470 }, { "epoch": 2.084343762938428, "grad_norm": 0.8261796236038208, "learning_rate": 3.3909950393231915e-06, "loss": 0.0729, "step": 70480 }, { "epoch": 2.084639498432602, "grad_norm": 0.6282345056533813, "learning_rate": 3.3908683494042523e-06, "loss": 0.0916, "step": 70490 }, { "epoch": 2.084935233926776, "grad_norm": 0.41995522379875183, "learning_rate": 3.3907416594853126e-06, "loss": 0.081, "step": 70500 }, { "epoch": 2.08523096942095, "grad_norm": 0.7914180755615234, "learning_rate": 3.3906149695663734e-06, "loss": 0.0709, "step": 70510 }, { "epoch": 2.085526704915124, "grad_norm": 0.925678014755249, "learning_rate": 3.3904882796474338e-06, "loss": 0.0819, "step": 70520 }, { "epoch": 2.0858224404092978, "grad_norm": 0.9160482287406921, "learning_rate": 3.390361589728495e-06, "loss": 0.064, "step": 70530 }, { "epoch": 2.086118175903472, "grad_norm": 1.2042279243469238, "learning_rate": 3.3902348998095554e-06, "loss": 0.0939, "step": 70540 }, { "epoch": 2.086413911397646, "grad_norm": 0.7654713988304138, "learning_rate": 3.390108209890616e-06, "loss": 0.0982, "step": 70550 }, { "epoch": 2.08670964689182, "grad_norm": 0.9471474885940552, "learning_rate": 3.3899815199716765e-06, "loss": 0.0715, "step": 70560 }, { "epoch": 2.087005382385994, "grad_norm": 0.8760037422180176, "learning_rate": 3.3898548300527373e-06, "loss": 0.0735, "step": 70570 }, { "epoch": 2.087301117880168, "grad_norm": 1.2154738903045654, "learning_rate": 3.3897281401337977e-06, "loss": 0.0628, "step": 70580 }, { "epoch": 2.087596853374342, "grad_norm": 0.630707323551178, "learning_rate": 3.3896014502148585e-06, "loss": 0.0867, "step": 70590 }, { "epoch": 2.087892588868516, "grad_norm": 0.6187701225280762, "learning_rate": 3.389474760295919e-06, "loss": 0.0742, "step": 70600 }, { "epoch": 2.08818832436269, "grad_norm": 1.050886631011963, "learning_rate": 3.38934807037698e-06, "loss": 0.0729, "step": 70610 }, { "epoch": 2.088484059856864, "grad_norm": 0.5717872381210327, "learning_rate": 3.3892213804580404e-06, "loss": 0.0562, "step": 70620 }, { "epoch": 2.088779795351038, "grad_norm": 0.7942542433738708, "learning_rate": 3.389094690539101e-06, "loss": 0.0785, "step": 70630 }, { "epoch": 2.089075530845212, "grad_norm": 0.6948928236961365, "learning_rate": 3.3889680006201616e-06, "loss": 0.085, "step": 70640 }, { "epoch": 2.089371266339386, "grad_norm": 0.7814064621925354, "learning_rate": 3.3888413107012224e-06, "loss": 0.0749, "step": 70650 }, { "epoch": 2.08966700183356, "grad_norm": 0.9056185483932495, "learning_rate": 3.3887146207822827e-06, "loss": 0.0818, "step": 70660 }, { "epoch": 2.089962737327734, "grad_norm": 0.7491656541824341, "learning_rate": 3.3885879308633435e-06, "loss": 0.0738, "step": 70670 }, { "epoch": 2.0902584728219082, "grad_norm": 0.9151018261909485, "learning_rate": 3.388461240944404e-06, "loss": 0.0569, "step": 70680 }, { "epoch": 2.090554208316082, "grad_norm": 1.2723228931427002, "learning_rate": 3.388334551025465e-06, "loss": 0.0924, "step": 70690 }, { "epoch": 2.090849943810256, "grad_norm": 1.853967547416687, "learning_rate": 3.3882078611065255e-06, "loss": 0.0878, "step": 70700 }, { "epoch": 2.09114567930443, "grad_norm": 1.225191354751587, "learning_rate": 3.3880811711875862e-06, "loss": 0.0691, "step": 70710 }, { "epoch": 2.091441414798604, "grad_norm": 0.6407108902931213, "learning_rate": 3.3879544812686466e-06, "loss": 0.0761, "step": 70720 }, { "epoch": 2.091737150292778, "grad_norm": 0.6330147385597229, "learning_rate": 3.3878277913497074e-06, "loss": 0.0592, "step": 70730 }, { "epoch": 2.0920328857869523, "grad_norm": 1.2112571001052856, "learning_rate": 3.3877011014307678e-06, "loss": 0.0872, "step": 70740 }, { "epoch": 2.0923286212811263, "grad_norm": 0.8564323782920837, "learning_rate": 3.3875744115118286e-06, "loss": 0.0843, "step": 70750 }, { "epoch": 2.0926243567753002, "grad_norm": 0.7251211404800415, "learning_rate": 3.387447721592889e-06, "loss": 0.0867, "step": 70760 }, { "epoch": 2.092920092269474, "grad_norm": 0.6030671000480652, "learning_rate": 3.38732103167395e-06, "loss": 0.0609, "step": 70770 }, { "epoch": 2.093215827763648, "grad_norm": 1.2241568565368652, "learning_rate": 3.3871943417550105e-06, "loss": 0.0808, "step": 70780 }, { "epoch": 2.093511563257822, "grad_norm": 0.5877489447593689, "learning_rate": 3.3870676518360713e-06, "loss": 0.0894, "step": 70790 }, { "epoch": 2.093807298751996, "grad_norm": 0.7264202237129211, "learning_rate": 3.3869409619171317e-06, "loss": 0.1057, "step": 70800 }, { "epoch": 2.0941030342461704, "grad_norm": 0.7502118349075317, "learning_rate": 3.3868142719981924e-06, "loss": 0.0871, "step": 70810 }, { "epoch": 2.0943987697403443, "grad_norm": 0.5884445309638977, "learning_rate": 3.386687582079253e-06, "loss": 0.0638, "step": 70820 }, { "epoch": 2.0946945052345183, "grad_norm": 0.7748191952705383, "learning_rate": 3.3865608921603136e-06, "loss": 0.0696, "step": 70830 }, { "epoch": 2.094990240728692, "grad_norm": 0.8513668179512024, "learning_rate": 3.386434202241374e-06, "loss": 0.0896, "step": 70840 }, { "epoch": 2.095285976222866, "grad_norm": 0.8937861323356628, "learning_rate": 3.386307512322435e-06, "loss": 0.0663, "step": 70850 }, { "epoch": 2.09558171171704, "grad_norm": 1.0385067462921143, "learning_rate": 3.3861808224034955e-06, "loss": 0.0717, "step": 70860 }, { "epoch": 2.0958774472112145, "grad_norm": 0.5716288685798645, "learning_rate": 3.386054132484556e-06, "loss": 0.0622, "step": 70870 }, { "epoch": 2.0961731827053884, "grad_norm": 0.65436190366745, "learning_rate": 3.3859274425656167e-06, "loss": 0.0618, "step": 70880 }, { "epoch": 2.0964689181995624, "grad_norm": 1.0562762022018433, "learning_rate": 3.385800752646677e-06, "loss": 0.0894, "step": 70890 }, { "epoch": 2.0967646536937363, "grad_norm": 1.1500452756881714, "learning_rate": 3.385674062727738e-06, "loss": 0.097, "step": 70900 }, { "epoch": 2.0970603891879103, "grad_norm": 1.0210813283920288, "learning_rate": 3.3855473728087982e-06, "loss": 0.0904, "step": 70910 }, { "epoch": 2.097356124682084, "grad_norm": 0.542942225933075, "learning_rate": 3.385420682889859e-06, "loss": 0.0595, "step": 70920 }, { "epoch": 2.097651860176258, "grad_norm": 1.1229063272476196, "learning_rate": 3.38529399297092e-06, "loss": 0.0778, "step": 70930 }, { "epoch": 2.0979475956704325, "grad_norm": 1.4607293605804443, "learning_rate": 3.3851673030519806e-06, "loss": 0.0996, "step": 70940 }, { "epoch": 2.0982433311646065, "grad_norm": 1.3878865242004395, "learning_rate": 3.385040613133041e-06, "loss": 0.0908, "step": 70950 }, { "epoch": 2.0985390666587804, "grad_norm": 0.49958425760269165, "learning_rate": 3.3849139232141017e-06, "loss": 0.0795, "step": 70960 }, { "epoch": 2.0988348021529544, "grad_norm": 0.9252473711967468, "learning_rate": 3.384787233295162e-06, "loss": 0.0868, "step": 70970 }, { "epoch": 2.0991305376471283, "grad_norm": 0.8697139024734497, "learning_rate": 3.384660543376223e-06, "loss": 0.0693, "step": 70980 }, { "epoch": 2.0994262731413023, "grad_norm": 0.7702372670173645, "learning_rate": 3.3845338534572833e-06, "loss": 0.0781, "step": 70990 }, { "epoch": 2.0997220086354766, "grad_norm": 0.8460411429405212, "learning_rate": 3.384407163538344e-06, "loss": 0.0755, "step": 71000 }, { "epoch": 2.1000177441296506, "grad_norm": 1.07587730884552, "learning_rate": 3.384280473619405e-06, "loss": 0.0858, "step": 71010 }, { "epoch": 2.1003134796238245, "grad_norm": 0.7361949682235718, "learning_rate": 3.3841537837004656e-06, "loss": 0.0733, "step": 71020 }, { "epoch": 2.1006092151179985, "grad_norm": 1.092899203300476, "learning_rate": 3.384027093781526e-06, "loss": 0.0615, "step": 71030 }, { "epoch": 2.1009049506121724, "grad_norm": 0.9133099913597107, "learning_rate": 3.383900403862587e-06, "loss": 0.0972, "step": 71040 }, { "epoch": 2.1012006861063464, "grad_norm": 0.5798846483230591, "learning_rate": 3.383773713943647e-06, "loss": 0.0582, "step": 71050 }, { "epoch": 2.1014964216005203, "grad_norm": 0.7298443913459778, "learning_rate": 3.383647024024708e-06, "loss": 0.0693, "step": 71060 }, { "epoch": 2.1017921570946947, "grad_norm": 0.924481987953186, "learning_rate": 3.3835203341057683e-06, "loss": 0.0911, "step": 71070 }, { "epoch": 2.1020878925888686, "grad_norm": 0.7009854912757874, "learning_rate": 3.383393644186829e-06, "loss": 0.0745, "step": 71080 }, { "epoch": 2.1023836280830426, "grad_norm": 0.8389187455177307, "learning_rate": 3.38326695426789e-06, "loss": 0.0812, "step": 71090 }, { "epoch": 2.1026793635772165, "grad_norm": 0.6736899018287659, "learning_rate": 3.3831402643489507e-06, "loss": 0.0996, "step": 71100 }, { "epoch": 2.1029750990713905, "grad_norm": 0.617232620716095, "learning_rate": 3.383013574430011e-06, "loss": 0.0648, "step": 71110 }, { "epoch": 2.1032708345655644, "grad_norm": 0.7706455588340759, "learning_rate": 3.382886884511072e-06, "loss": 0.0765, "step": 71120 }, { "epoch": 2.103566570059739, "grad_norm": 0.8823813796043396, "learning_rate": 3.382760194592132e-06, "loss": 0.0532, "step": 71130 }, { "epoch": 2.1038623055539127, "grad_norm": 0.8789005279541016, "learning_rate": 3.382633504673193e-06, "loss": 0.0991, "step": 71140 }, { "epoch": 2.1041580410480867, "grad_norm": 1.5431381464004517, "learning_rate": 3.3825068147542534e-06, "loss": 0.098, "step": 71150 }, { "epoch": 2.1044537765422606, "grad_norm": 0.7045754194259644, "learning_rate": 3.382380124835314e-06, "loss": 0.0887, "step": 71160 }, { "epoch": 2.1047495120364346, "grad_norm": 0.6599418520927429, "learning_rate": 3.382253434916375e-06, "loss": 0.0591, "step": 71170 }, { "epoch": 2.1050452475306085, "grad_norm": 0.48004090785980225, "learning_rate": 3.3821267449974357e-06, "loss": 0.0693, "step": 71180 }, { "epoch": 2.1053409830247825, "grad_norm": 0.8795672059059143, "learning_rate": 3.382000055078496e-06, "loss": 0.0824, "step": 71190 }, { "epoch": 2.105636718518957, "grad_norm": 0.9494951367378235, "learning_rate": 3.381873365159557e-06, "loss": 0.0922, "step": 71200 }, { "epoch": 2.105932454013131, "grad_norm": 0.8255816698074341, "learning_rate": 3.3817466752406173e-06, "loss": 0.0778, "step": 71210 }, { "epoch": 2.1062281895073047, "grad_norm": 0.8777755498886108, "learning_rate": 3.381619985321678e-06, "loss": 0.0745, "step": 71220 }, { "epoch": 2.1065239250014787, "grad_norm": 1.4046539068222046, "learning_rate": 3.3814932954027384e-06, "loss": 0.0747, "step": 71230 }, { "epoch": 2.1068196604956526, "grad_norm": 0.9123421311378479, "learning_rate": 3.381366605483799e-06, "loss": 0.0898, "step": 71240 }, { "epoch": 2.1071153959898266, "grad_norm": 0.9820210337638855, "learning_rate": 3.38123991556486e-06, "loss": 0.0702, "step": 71250 }, { "epoch": 2.1074111314840005, "grad_norm": 0.6214127540588379, "learning_rate": 3.3811132256459208e-06, "loss": 0.0831, "step": 71260 }, { "epoch": 2.107706866978175, "grad_norm": 0.9555495977401733, "learning_rate": 3.380986535726981e-06, "loss": 0.0866, "step": 71270 }, { "epoch": 2.108002602472349, "grad_norm": 0.5685352087020874, "learning_rate": 3.3808598458080415e-06, "loss": 0.0569, "step": 71280 }, { "epoch": 2.1082983379665228, "grad_norm": 0.6647111773490906, "learning_rate": 3.3807331558891023e-06, "loss": 0.0904, "step": 71290 }, { "epoch": 2.1085940734606967, "grad_norm": 0.6422305107116699, "learning_rate": 3.3806064659701627e-06, "loss": 0.0875, "step": 71300 }, { "epoch": 2.1088898089548707, "grad_norm": 0.836764395236969, "learning_rate": 3.3804797760512235e-06, "loss": 0.1004, "step": 71310 }, { "epoch": 2.1091855444490446, "grad_norm": 0.6513237357139587, "learning_rate": 3.380353086132284e-06, "loss": 0.0695, "step": 71320 }, { "epoch": 2.109481279943219, "grad_norm": 0.6171590089797974, "learning_rate": 3.380226396213345e-06, "loss": 0.0667, "step": 71330 }, { "epoch": 2.109777015437393, "grad_norm": 1.057995319366455, "learning_rate": 3.3800997062944054e-06, "loss": 0.101, "step": 71340 }, { "epoch": 2.110072750931567, "grad_norm": 0.9150294065475464, "learning_rate": 3.379973016375466e-06, "loss": 0.0817, "step": 71350 }, { "epoch": 2.110368486425741, "grad_norm": 0.5817564129829407, "learning_rate": 3.3798463264565266e-06, "loss": 0.058, "step": 71360 }, { "epoch": 2.1106642219199148, "grad_norm": 0.726890504360199, "learning_rate": 3.3797196365375873e-06, "loss": 0.0817, "step": 71370 }, { "epoch": 2.1109599574140887, "grad_norm": 1.0515600442886353, "learning_rate": 3.3795929466186477e-06, "loss": 0.0821, "step": 71380 }, { "epoch": 2.1112556929082626, "grad_norm": 0.9644374251365662, "learning_rate": 3.3794662566997085e-06, "loss": 0.0817, "step": 71390 }, { "epoch": 2.111551428402437, "grad_norm": 0.7410555481910706, "learning_rate": 3.379339566780769e-06, "loss": 0.0682, "step": 71400 }, { "epoch": 2.111847163896611, "grad_norm": 0.5539645552635193, "learning_rate": 3.37921287686183e-06, "loss": 0.0639, "step": 71410 }, { "epoch": 2.112142899390785, "grad_norm": 0.5801887512207031, "learning_rate": 3.3790861869428904e-06, "loss": 0.0709, "step": 71420 }, { "epoch": 2.112438634884959, "grad_norm": 1.0495928525924683, "learning_rate": 3.3789594970239512e-06, "loss": 0.0668, "step": 71430 }, { "epoch": 2.112734370379133, "grad_norm": 0.9090667366981506, "learning_rate": 3.3788328071050116e-06, "loss": 0.092, "step": 71440 }, { "epoch": 2.1130301058733068, "grad_norm": 1.1155836582183838, "learning_rate": 3.3787061171860724e-06, "loss": 0.0867, "step": 71450 }, { "epoch": 2.113325841367481, "grad_norm": 0.9354200959205627, "learning_rate": 3.3785794272671328e-06, "loss": 0.0831, "step": 71460 }, { "epoch": 2.113621576861655, "grad_norm": 0.6464117765426636, "learning_rate": 3.3784527373481935e-06, "loss": 0.0652, "step": 71470 }, { "epoch": 2.113917312355829, "grad_norm": 0.7881245017051697, "learning_rate": 3.378326047429254e-06, "loss": 0.0581, "step": 71480 }, { "epoch": 2.114213047850003, "grad_norm": 0.7469843029975891, "learning_rate": 3.378199357510315e-06, "loss": 0.0932, "step": 71490 }, { "epoch": 2.114508783344177, "grad_norm": 1.0008350610733032, "learning_rate": 3.3780726675913755e-06, "loss": 0.0839, "step": 71500 }, { "epoch": 2.114804518838351, "grad_norm": 1.0706661939620972, "learning_rate": 3.3779459776724363e-06, "loss": 0.0756, "step": 71510 }, { "epoch": 2.115100254332525, "grad_norm": 0.8245667815208435, "learning_rate": 3.3778192877534966e-06, "loss": 0.0805, "step": 71520 }, { "epoch": 2.115395989826699, "grad_norm": 0.8078861832618713, "learning_rate": 3.3776925978345574e-06, "loss": 0.0692, "step": 71530 }, { "epoch": 2.115691725320873, "grad_norm": 1.1573851108551025, "learning_rate": 3.377565907915618e-06, "loss": 0.0984, "step": 71540 }, { "epoch": 2.115987460815047, "grad_norm": 0.7361376881599426, "learning_rate": 3.3774392179966786e-06, "loss": 0.0785, "step": 71550 }, { "epoch": 2.116283196309221, "grad_norm": 0.8755768537521362, "learning_rate": 3.377312528077739e-06, "loss": 0.0794, "step": 71560 }, { "epoch": 2.116578931803395, "grad_norm": 0.6499308943748474, "learning_rate": 3.3771858381588e-06, "loss": 0.061, "step": 71570 }, { "epoch": 2.116874667297569, "grad_norm": 0.714444100856781, "learning_rate": 3.3770591482398605e-06, "loss": 0.0628, "step": 71580 }, { "epoch": 2.117170402791743, "grad_norm": 0.9034260511398315, "learning_rate": 3.3769324583209213e-06, "loss": 0.086, "step": 71590 }, { "epoch": 2.1174661382859172, "grad_norm": 1.0146369934082031, "learning_rate": 3.3768057684019817e-06, "loss": 0.0895, "step": 71600 }, { "epoch": 2.117761873780091, "grad_norm": 1.2966623306274414, "learning_rate": 3.3766790784830425e-06, "loss": 0.0801, "step": 71610 }, { "epoch": 2.118057609274265, "grad_norm": 0.9051265716552734, "learning_rate": 3.376552388564103e-06, "loss": 0.0932, "step": 71620 }, { "epoch": 2.118353344768439, "grad_norm": 0.5572429895401001, "learning_rate": 3.3764256986451636e-06, "loss": 0.0623, "step": 71630 }, { "epoch": 2.118649080262613, "grad_norm": 0.888717532157898, "learning_rate": 3.376299008726224e-06, "loss": 0.0905, "step": 71640 }, { "epoch": 2.118944815756787, "grad_norm": 0.8590104579925537, "learning_rate": 3.3761723188072852e-06, "loss": 0.0898, "step": 71650 }, { "epoch": 2.1192405512509613, "grad_norm": 1.1768063306808472, "learning_rate": 3.3760456288883456e-06, "loss": 0.0868, "step": 71660 }, { "epoch": 2.1195362867451353, "grad_norm": 0.9926496744155884, "learning_rate": 3.3759189389694064e-06, "loss": 0.0682, "step": 71670 }, { "epoch": 2.119832022239309, "grad_norm": 0.5208438634872437, "learning_rate": 3.3757922490504667e-06, "loss": 0.0684, "step": 71680 }, { "epoch": 2.120127757733483, "grad_norm": 0.8133621215820312, "learning_rate": 3.375665559131527e-06, "loss": 0.0944, "step": 71690 }, { "epoch": 2.120423493227657, "grad_norm": 0.6907304525375366, "learning_rate": 3.375538869212588e-06, "loss": 0.073, "step": 71700 }, { "epoch": 2.120719228721831, "grad_norm": 0.7080528140068054, "learning_rate": 3.3754121792936483e-06, "loss": 0.0665, "step": 71710 }, { "epoch": 2.121014964216005, "grad_norm": 1.1201045513153076, "learning_rate": 3.375285489374709e-06, "loss": 0.0764, "step": 71720 }, { "epoch": 2.1213106997101794, "grad_norm": 1.1410964727401733, "learning_rate": 3.37515879945577e-06, "loss": 0.073, "step": 71730 }, { "epoch": 2.1216064352043533, "grad_norm": 1.0144377946853638, "learning_rate": 3.3750321095368306e-06, "loss": 0.077, "step": 71740 }, { "epoch": 2.1219021706985273, "grad_norm": 0.4977571666240692, "learning_rate": 3.374905419617891e-06, "loss": 0.0733, "step": 71750 }, { "epoch": 2.122197906192701, "grad_norm": 0.4415510594844818, "learning_rate": 3.3747787296989518e-06, "loss": 0.085, "step": 71760 }, { "epoch": 2.122493641686875, "grad_norm": 0.63071608543396, "learning_rate": 3.374652039780012e-06, "loss": 0.0663, "step": 71770 }, { "epoch": 2.122789377181049, "grad_norm": 0.8375443816184998, "learning_rate": 3.374525349861073e-06, "loss": 0.0762, "step": 71780 }, { "epoch": 2.1230851126752235, "grad_norm": 1.0892584323883057, "learning_rate": 3.3743986599421333e-06, "loss": 0.0928, "step": 71790 }, { "epoch": 2.1233808481693974, "grad_norm": 0.8169460892677307, "learning_rate": 3.374271970023194e-06, "loss": 0.0871, "step": 71800 }, { "epoch": 2.1236765836635714, "grad_norm": 0.5313543677330017, "learning_rate": 3.374145280104255e-06, "loss": 0.0653, "step": 71810 }, { "epoch": 2.1239723191577453, "grad_norm": 0.6493123769760132, "learning_rate": 3.3740185901853157e-06, "loss": 0.0813, "step": 71820 }, { "epoch": 2.1242680546519193, "grad_norm": 0.9316059350967407, "learning_rate": 3.373891900266376e-06, "loss": 0.0748, "step": 71830 }, { "epoch": 2.124563790146093, "grad_norm": 1.0593501329421997, "learning_rate": 3.373765210347437e-06, "loss": 0.0727, "step": 71840 }, { "epoch": 2.124859525640267, "grad_norm": 0.7353328466415405, "learning_rate": 3.373638520428497e-06, "loss": 0.0761, "step": 71850 }, { "epoch": 2.1251552611344415, "grad_norm": 0.9466953277587891, "learning_rate": 3.373511830509558e-06, "loss": 0.0619, "step": 71860 }, { "epoch": 2.1254509966286155, "grad_norm": 0.9135801196098328, "learning_rate": 3.3733851405906183e-06, "loss": 0.0658, "step": 71870 }, { "epoch": 2.1257467321227894, "grad_norm": 1.020332932472229, "learning_rate": 3.373258450671679e-06, "loss": 0.0789, "step": 71880 }, { "epoch": 2.1260424676169634, "grad_norm": 0.9853237271308899, "learning_rate": 3.37313176075274e-06, "loss": 0.0787, "step": 71890 }, { "epoch": 2.1263382031111373, "grad_norm": 0.8281403183937073, "learning_rate": 3.3730050708338007e-06, "loss": 0.0703, "step": 71900 }, { "epoch": 2.1266339386053112, "grad_norm": 1.061594843864441, "learning_rate": 3.372878380914861e-06, "loss": 0.0809, "step": 71910 }, { "epoch": 2.126929674099485, "grad_norm": 0.5886381268501282, "learning_rate": 3.372751690995922e-06, "loss": 0.0689, "step": 71920 }, { "epoch": 2.1272254095936596, "grad_norm": 0.9864619374275208, "learning_rate": 3.3726250010769822e-06, "loss": 0.0714, "step": 71930 }, { "epoch": 2.1275211450878335, "grad_norm": 0.7944070100784302, "learning_rate": 3.372498311158043e-06, "loss": 0.0881, "step": 71940 }, { "epoch": 2.1278168805820075, "grad_norm": 0.8693403601646423, "learning_rate": 3.3723716212391034e-06, "loss": 0.0738, "step": 71950 }, { "epoch": 2.1281126160761814, "grad_norm": 0.6808340549468994, "learning_rate": 3.372244931320164e-06, "loss": 0.0881, "step": 71960 }, { "epoch": 2.1284083515703554, "grad_norm": 0.7461633086204529, "learning_rate": 3.372118241401225e-06, "loss": 0.0643, "step": 71970 }, { "epoch": 2.1287040870645293, "grad_norm": 0.945275604724884, "learning_rate": 3.3719915514822858e-06, "loss": 0.0801, "step": 71980 }, { "epoch": 2.1289998225587037, "grad_norm": 0.9532104730606079, "learning_rate": 3.371864861563346e-06, "loss": 0.0881, "step": 71990 }, { "epoch": 2.1292955580528776, "grad_norm": 0.9978888630867004, "learning_rate": 3.371738171644407e-06, "loss": 0.0896, "step": 72000 }, { "epoch": 2.1295912935470516, "grad_norm": 1.0016459226608276, "learning_rate": 3.3716114817254673e-06, "loss": 0.0759, "step": 72010 }, { "epoch": 2.1298870290412255, "grad_norm": 0.9227241277694702, "learning_rate": 3.371484791806528e-06, "loss": 0.0623, "step": 72020 }, { "epoch": 2.1301827645353995, "grad_norm": 0.9557576775550842, "learning_rate": 3.3713581018875884e-06, "loss": 0.0755, "step": 72030 }, { "epoch": 2.1304785000295734, "grad_norm": 0.7556123733520508, "learning_rate": 3.3712314119686492e-06, "loss": 0.0767, "step": 72040 }, { "epoch": 2.130774235523748, "grad_norm": 0.9071747064590454, "learning_rate": 3.37110472204971e-06, "loss": 0.0886, "step": 72050 }, { "epoch": 2.1310699710179217, "grad_norm": 0.8271936178207397, "learning_rate": 3.370978032130771e-06, "loss": 0.08, "step": 72060 }, { "epoch": 2.1313657065120957, "grad_norm": 0.2991441488265991, "learning_rate": 3.370851342211831e-06, "loss": 0.0747, "step": 72070 }, { "epoch": 2.1316614420062696, "grad_norm": 0.6907131671905518, "learning_rate": 3.370724652292892e-06, "loss": 0.0742, "step": 72080 }, { "epoch": 2.1319571775004436, "grad_norm": 1.2374415397644043, "learning_rate": 3.3705979623739523e-06, "loss": 0.082, "step": 72090 }, { "epoch": 2.1322529129946175, "grad_norm": 0.9805840253829956, "learning_rate": 3.3704712724550127e-06, "loss": 0.0814, "step": 72100 }, { "epoch": 2.1325486484887914, "grad_norm": 1.122112512588501, "learning_rate": 3.3703445825360735e-06, "loss": 0.0746, "step": 72110 }, { "epoch": 2.132844383982966, "grad_norm": 0.7866384387016296, "learning_rate": 3.370217892617134e-06, "loss": 0.0672, "step": 72120 }, { "epoch": 2.1331401194771398, "grad_norm": 0.9703351855278015, "learning_rate": 3.370091202698195e-06, "loss": 0.0717, "step": 72130 }, { "epoch": 2.1334358549713137, "grad_norm": 1.144473910331726, "learning_rate": 3.3699645127792554e-06, "loss": 0.0804, "step": 72140 }, { "epoch": 2.1337315904654877, "grad_norm": 0.8810145854949951, "learning_rate": 3.3698378228603162e-06, "loss": 0.0934, "step": 72150 }, { "epoch": 2.1340273259596616, "grad_norm": 1.205357313156128, "learning_rate": 3.3697111329413766e-06, "loss": 0.0927, "step": 72160 }, { "epoch": 2.1343230614538355, "grad_norm": 0.6676097512245178, "learning_rate": 3.3695844430224374e-06, "loss": 0.0971, "step": 72170 }, { "epoch": 2.1346187969480095, "grad_norm": 0.6613426208496094, "learning_rate": 3.3694577531034977e-06, "loss": 0.0714, "step": 72180 }, { "epoch": 2.134914532442184, "grad_norm": 0.6255723834037781, "learning_rate": 3.3693310631845585e-06, "loss": 0.0706, "step": 72190 }, { "epoch": 2.135210267936358, "grad_norm": 1.4838175773620605, "learning_rate": 3.369204373265619e-06, "loss": 0.0879, "step": 72200 }, { "epoch": 2.1355060034305318, "grad_norm": 0.5221678018569946, "learning_rate": 3.36907768334668e-06, "loss": 0.0792, "step": 72210 }, { "epoch": 2.1358017389247057, "grad_norm": 0.5792906880378723, "learning_rate": 3.3689509934277405e-06, "loss": 0.0693, "step": 72220 }, { "epoch": 2.1360974744188796, "grad_norm": 0.6271437406539917, "learning_rate": 3.3688243035088013e-06, "loss": 0.0767, "step": 72230 }, { "epoch": 2.1363932099130536, "grad_norm": 0.6303712725639343, "learning_rate": 3.3686976135898616e-06, "loss": 0.1106, "step": 72240 }, { "epoch": 2.136688945407228, "grad_norm": 0.9910341501235962, "learning_rate": 3.3685709236709224e-06, "loss": 0.1002, "step": 72250 }, { "epoch": 2.136984680901402, "grad_norm": 1.215132236480713, "learning_rate": 3.3684442337519828e-06, "loss": 0.0924, "step": 72260 }, { "epoch": 2.137280416395576, "grad_norm": 0.5277800559997559, "learning_rate": 3.3683175438330436e-06, "loss": 0.0783, "step": 72270 }, { "epoch": 2.13757615188975, "grad_norm": 0.5456967949867249, "learning_rate": 3.368190853914104e-06, "loss": 0.0712, "step": 72280 }, { "epoch": 2.1378718873839238, "grad_norm": 0.8411778807640076, "learning_rate": 3.368064163995165e-06, "loss": 0.1023, "step": 72290 }, { "epoch": 2.1381676228780977, "grad_norm": 1.3232128620147705, "learning_rate": 3.3679374740762255e-06, "loss": 0.0773, "step": 72300 }, { "epoch": 2.1384633583722716, "grad_norm": 0.9383792281150818, "learning_rate": 3.3678107841572863e-06, "loss": 0.0891, "step": 72310 }, { "epoch": 2.138759093866446, "grad_norm": 0.8357301950454712, "learning_rate": 3.3676840942383467e-06, "loss": 0.0696, "step": 72320 }, { "epoch": 2.13905482936062, "grad_norm": 0.6535347104072571, "learning_rate": 3.3675574043194075e-06, "loss": 0.0622, "step": 72330 }, { "epoch": 2.139350564854794, "grad_norm": 0.6993080377578735, "learning_rate": 3.367430714400468e-06, "loss": 0.0874, "step": 72340 }, { "epoch": 2.139646300348968, "grad_norm": 1.099621057510376, "learning_rate": 3.3673040244815286e-06, "loss": 0.0808, "step": 72350 }, { "epoch": 2.139942035843142, "grad_norm": 1.096044659614563, "learning_rate": 3.367177334562589e-06, "loss": 0.0865, "step": 72360 }, { "epoch": 2.1402377713373157, "grad_norm": 0.6172500252723694, "learning_rate": 3.36705064464365e-06, "loss": 0.0761, "step": 72370 }, { "epoch": 2.14053350683149, "grad_norm": 0.7819969654083252, "learning_rate": 3.3669239547247106e-06, "loss": 0.0842, "step": 72380 }, { "epoch": 2.140829242325664, "grad_norm": 1.1089082956314087, "learning_rate": 3.3667972648057714e-06, "loss": 0.0916, "step": 72390 }, { "epoch": 2.141124977819838, "grad_norm": 0.6694575548171997, "learning_rate": 3.3666705748868317e-06, "loss": 0.0845, "step": 72400 }, { "epoch": 2.141420713314012, "grad_norm": 1.2758090496063232, "learning_rate": 3.3665438849678925e-06, "loss": 0.0821, "step": 72410 }, { "epoch": 2.141716448808186, "grad_norm": 1.2089922428131104, "learning_rate": 3.366417195048953e-06, "loss": 0.089, "step": 72420 }, { "epoch": 2.14201218430236, "grad_norm": 0.6975029110908508, "learning_rate": 3.3662905051300137e-06, "loss": 0.0644, "step": 72430 }, { "epoch": 2.142307919796534, "grad_norm": 0.8306214213371277, "learning_rate": 3.366163815211074e-06, "loss": 0.0993, "step": 72440 }, { "epoch": 2.142603655290708, "grad_norm": 0.501280665397644, "learning_rate": 3.3660371252921352e-06, "loss": 0.0805, "step": 72450 }, { "epoch": 2.142899390784882, "grad_norm": 0.640242338180542, "learning_rate": 3.3659104353731956e-06, "loss": 0.0688, "step": 72460 }, { "epoch": 2.143195126279056, "grad_norm": 0.6699175834655762, "learning_rate": 3.3657837454542564e-06, "loss": 0.0694, "step": 72470 }, { "epoch": 2.14349086177323, "grad_norm": 0.6672627925872803, "learning_rate": 3.3656570555353168e-06, "loss": 0.07, "step": 72480 }, { "epoch": 2.143786597267404, "grad_norm": 0.6686713099479675, "learning_rate": 3.3655303656163776e-06, "loss": 0.0933, "step": 72490 }, { "epoch": 2.144082332761578, "grad_norm": 2.496598482131958, "learning_rate": 3.365403675697438e-06, "loss": 0.0905, "step": 72500 }, { "epoch": 2.144378068255752, "grad_norm": 0.9148902893066406, "learning_rate": 3.3652769857784983e-06, "loss": 0.0899, "step": 72510 }, { "epoch": 2.144673803749926, "grad_norm": 0.8313035368919373, "learning_rate": 3.365150295859559e-06, "loss": 0.0729, "step": 72520 }, { "epoch": 2.1449695392441, "grad_norm": 0.410717248916626, "learning_rate": 3.36502360594062e-06, "loss": 0.0599, "step": 72530 }, { "epoch": 2.145265274738274, "grad_norm": 1.0549325942993164, "learning_rate": 3.3648969160216807e-06, "loss": 0.0704, "step": 72540 }, { "epoch": 2.145561010232448, "grad_norm": 0.9801075458526611, "learning_rate": 3.364770226102741e-06, "loss": 0.0917, "step": 72550 }, { "epoch": 2.145856745726622, "grad_norm": 0.5201889872550964, "learning_rate": 3.364643536183802e-06, "loss": 0.0719, "step": 72560 }, { "epoch": 2.146152481220796, "grad_norm": 0.9197100400924683, "learning_rate": 3.364516846264862e-06, "loss": 0.0674, "step": 72570 }, { "epoch": 2.1464482167149703, "grad_norm": 1.4231964349746704, "learning_rate": 3.364390156345923e-06, "loss": 0.0803, "step": 72580 }, { "epoch": 2.1467439522091443, "grad_norm": 0.6464078426361084, "learning_rate": 3.3642634664269833e-06, "loss": 0.0884, "step": 72590 }, { "epoch": 2.147039687703318, "grad_norm": 0.5978819727897644, "learning_rate": 3.364136776508044e-06, "loss": 0.0924, "step": 72600 }, { "epoch": 2.147335423197492, "grad_norm": 0.7166228890419006, "learning_rate": 3.364010086589105e-06, "loss": 0.0822, "step": 72610 }, { "epoch": 2.147631158691666, "grad_norm": 1.002882719039917, "learning_rate": 3.3638833966701657e-06, "loss": 0.0757, "step": 72620 }, { "epoch": 2.14792689418584, "grad_norm": 1.1833997964859009, "learning_rate": 3.363756706751226e-06, "loss": 0.0665, "step": 72630 }, { "epoch": 2.1482226296800144, "grad_norm": 0.7893409729003906, "learning_rate": 3.363630016832287e-06, "loss": 0.0865, "step": 72640 }, { "epoch": 2.1485183651741884, "grad_norm": 0.5937584042549133, "learning_rate": 3.3635033269133472e-06, "loss": 0.0718, "step": 72650 }, { "epoch": 2.1488141006683623, "grad_norm": 0.7122012376785278, "learning_rate": 3.363376636994408e-06, "loss": 0.0844, "step": 72660 }, { "epoch": 2.1491098361625363, "grad_norm": 0.7660729885101318, "learning_rate": 3.3632499470754684e-06, "loss": 0.0887, "step": 72670 }, { "epoch": 2.14940557165671, "grad_norm": 0.8311895728111267, "learning_rate": 3.363123257156529e-06, "loss": 0.0618, "step": 72680 }, { "epoch": 2.149701307150884, "grad_norm": 0.9545425176620483, "learning_rate": 3.36299656723759e-06, "loss": 0.0999, "step": 72690 }, { "epoch": 2.149997042645058, "grad_norm": 0.9209149479866028, "learning_rate": 3.3628698773186507e-06, "loss": 0.1012, "step": 72700 }, { "epoch": 2.1502927781392325, "grad_norm": 0.6426482200622559, "learning_rate": 3.362743187399711e-06, "loss": 0.0735, "step": 72710 }, { "epoch": 2.1505885136334064, "grad_norm": 0.825808584690094, "learning_rate": 3.362616497480772e-06, "loss": 0.068, "step": 72720 }, { "epoch": 2.1508842491275804, "grad_norm": 1.210310935974121, "learning_rate": 3.3624898075618323e-06, "loss": 0.0791, "step": 72730 }, { "epoch": 2.1511799846217543, "grad_norm": 1.2211003303527832, "learning_rate": 3.362363117642893e-06, "loss": 0.0862, "step": 72740 }, { "epoch": 2.1514757201159282, "grad_norm": 0.929787278175354, "learning_rate": 3.3622364277239534e-06, "loss": 0.0793, "step": 72750 }, { "epoch": 2.151771455610102, "grad_norm": 0.7779310345649719, "learning_rate": 3.362109737805014e-06, "loss": 0.0839, "step": 72760 }, { "epoch": 2.152067191104276, "grad_norm": 1.0202137231826782, "learning_rate": 3.361983047886075e-06, "loss": 0.0603, "step": 72770 }, { "epoch": 2.1523629265984505, "grad_norm": 0.9588810801506042, "learning_rate": 3.361856357967136e-06, "loss": 0.083, "step": 72780 }, { "epoch": 2.1526586620926245, "grad_norm": 0.9243130683898926, "learning_rate": 3.361729668048196e-06, "loss": 0.0928, "step": 72790 }, { "epoch": 2.1529543975867984, "grad_norm": 0.9050512313842773, "learning_rate": 3.361602978129257e-06, "loss": 0.0911, "step": 72800 }, { "epoch": 2.1532501330809724, "grad_norm": 0.4912916421890259, "learning_rate": 3.3614762882103173e-06, "loss": 0.0553, "step": 72810 }, { "epoch": 2.1535458685751463, "grad_norm": 0.6820771098136902, "learning_rate": 3.361349598291378e-06, "loss": 0.0746, "step": 72820 }, { "epoch": 2.1538416040693202, "grad_norm": 4.7628254890441895, "learning_rate": 3.3612229083724385e-06, "loss": 0.0818, "step": 72830 }, { "epoch": 2.154137339563494, "grad_norm": 0.9281952381134033, "learning_rate": 3.3610962184534993e-06, "loss": 0.0791, "step": 72840 }, { "epoch": 2.1544330750576686, "grad_norm": 0.680694580078125, "learning_rate": 3.36096952853456e-06, "loss": 0.0808, "step": 72850 }, { "epoch": 2.1547288105518425, "grad_norm": 0.5831058025360107, "learning_rate": 3.360842838615621e-06, "loss": 0.0742, "step": 72860 }, { "epoch": 2.1550245460460165, "grad_norm": 0.5172802209854126, "learning_rate": 3.360716148696681e-06, "loss": 0.0593, "step": 72870 }, { "epoch": 2.1553202815401904, "grad_norm": 0.3968361020088196, "learning_rate": 3.360589458777742e-06, "loss": 0.059, "step": 72880 }, { "epoch": 2.1556160170343643, "grad_norm": 0.7518443465232849, "learning_rate": 3.3604627688588024e-06, "loss": 0.0807, "step": 72890 }, { "epoch": 2.1559117525285383, "grad_norm": 0.7455534934997559, "learning_rate": 3.360336078939863e-06, "loss": 0.0799, "step": 72900 }, { "epoch": 2.1562074880227127, "grad_norm": 0.4939521551132202, "learning_rate": 3.3602093890209235e-06, "loss": 0.0649, "step": 72910 }, { "epoch": 2.1565032235168866, "grad_norm": 0.6228790283203125, "learning_rate": 3.3600826991019843e-06, "loss": 0.0849, "step": 72920 }, { "epoch": 2.1567989590110606, "grad_norm": 3.6913933753967285, "learning_rate": 3.359956009183045e-06, "loss": 0.0617, "step": 72930 }, { "epoch": 2.1570946945052345, "grad_norm": 0.595639705657959, "learning_rate": 3.3598293192641055e-06, "loss": 0.0851, "step": 72940 }, { "epoch": 2.1573904299994084, "grad_norm": 0.8829187154769897, "learning_rate": 3.3597026293451662e-06, "loss": 0.0898, "step": 72950 }, { "epoch": 2.1576861654935824, "grad_norm": 0.433737576007843, "learning_rate": 3.3595759394262266e-06, "loss": 0.0759, "step": 72960 }, { "epoch": 2.1579819009877568, "grad_norm": 1.2682193517684937, "learning_rate": 3.3594492495072874e-06, "loss": 0.0824, "step": 72970 }, { "epoch": 2.1582776364819307, "grad_norm": 0.918818473815918, "learning_rate": 3.3593225595883478e-06, "loss": 0.0761, "step": 72980 }, { "epoch": 2.1585733719761047, "grad_norm": 0.6545584797859192, "learning_rate": 3.3591958696694086e-06, "loss": 0.0831, "step": 72990 }, { "epoch": 2.1588691074702786, "grad_norm": 0.6950978636741638, "learning_rate": 3.359069179750469e-06, "loss": 0.0801, "step": 73000 }, { "epoch": 2.1591648429644525, "grad_norm": 1.0030272006988525, "learning_rate": 3.35894248983153e-06, "loss": 0.0788, "step": 73010 }, { "epoch": 2.1594605784586265, "grad_norm": 0.8080575466156006, "learning_rate": 3.3588157999125905e-06, "loss": 0.0768, "step": 73020 }, { "epoch": 2.1597563139528004, "grad_norm": 0.760869026184082, "learning_rate": 3.3586891099936513e-06, "loss": 0.052, "step": 73030 }, { "epoch": 2.160052049446975, "grad_norm": 0.5693359971046448, "learning_rate": 3.3585624200747117e-06, "loss": 0.0835, "step": 73040 }, { "epoch": 2.1603477849411488, "grad_norm": 0.8896132111549377, "learning_rate": 3.3584357301557724e-06, "loss": 0.0794, "step": 73050 }, { "epoch": 2.1606435204353227, "grad_norm": 0.9066525101661682, "learning_rate": 3.358309040236833e-06, "loss": 0.09, "step": 73060 }, { "epoch": 2.1609392559294966, "grad_norm": 1.1672613620758057, "learning_rate": 3.3581823503178936e-06, "loss": 0.0644, "step": 73070 }, { "epoch": 2.1612349914236706, "grad_norm": 0.8186910152435303, "learning_rate": 3.358055660398954e-06, "loss": 0.0716, "step": 73080 }, { "epoch": 2.1615307269178445, "grad_norm": 1.082969069480896, "learning_rate": 3.357928970480015e-06, "loss": 0.0814, "step": 73090 }, { "epoch": 2.1618264624120185, "grad_norm": 0.9288350343704224, "learning_rate": 3.3578022805610755e-06, "loss": 0.0695, "step": 73100 }, { "epoch": 2.162122197906193, "grad_norm": 1.8181895017623901, "learning_rate": 3.3576755906421363e-06, "loss": 0.0863, "step": 73110 }, { "epoch": 2.162417933400367, "grad_norm": 0.5201042890548706, "learning_rate": 3.3575489007231967e-06, "loss": 0.0721, "step": 73120 }, { "epoch": 2.1627136688945408, "grad_norm": 1.0151644945144653, "learning_rate": 3.3574222108042575e-06, "loss": 0.0887, "step": 73130 }, { "epoch": 2.1630094043887147, "grad_norm": 0.8088253140449524, "learning_rate": 3.357295520885318e-06, "loss": 0.0917, "step": 73140 }, { "epoch": 2.1633051398828886, "grad_norm": 0.9796615242958069, "learning_rate": 3.3571688309663786e-06, "loss": 0.0837, "step": 73150 }, { "epoch": 2.1636008753770626, "grad_norm": 0.7975320816040039, "learning_rate": 3.357042141047439e-06, "loss": 0.0707, "step": 73160 }, { "epoch": 2.163896610871237, "grad_norm": 0.43639376759529114, "learning_rate": 3.3569154511285002e-06, "loss": 0.0676, "step": 73170 }, { "epoch": 2.164192346365411, "grad_norm": 0.7152603268623352, "learning_rate": 3.3567887612095606e-06, "loss": 0.065, "step": 73180 }, { "epoch": 2.164488081859585, "grad_norm": 1.031943678855896, "learning_rate": 3.3566620712906214e-06, "loss": 0.0722, "step": 73190 }, { "epoch": 2.164783817353759, "grad_norm": 1.3828352689743042, "learning_rate": 3.3565353813716817e-06, "loss": 0.0712, "step": 73200 }, { "epoch": 2.1650795528479327, "grad_norm": 1.0651620626449585, "learning_rate": 3.3564086914527425e-06, "loss": 0.0799, "step": 73210 }, { "epoch": 2.1653752883421067, "grad_norm": 0.5645517110824585, "learning_rate": 3.356282001533803e-06, "loss": 0.0701, "step": 73220 }, { "epoch": 2.1656710238362806, "grad_norm": 1.2365738153457642, "learning_rate": 3.3561553116148637e-06, "loss": 0.0927, "step": 73230 }, { "epoch": 2.165966759330455, "grad_norm": 0.9745633006095886, "learning_rate": 3.356028621695924e-06, "loss": 0.0912, "step": 73240 }, { "epoch": 2.166262494824629, "grad_norm": 0.788375198841095, "learning_rate": 3.3559019317769853e-06, "loss": 0.0789, "step": 73250 }, { "epoch": 2.166558230318803, "grad_norm": 1.0907893180847168, "learning_rate": 3.3557752418580456e-06, "loss": 0.0816, "step": 73260 }, { "epoch": 2.166853965812977, "grad_norm": 1.3121803998947144, "learning_rate": 3.3556485519391064e-06, "loss": 0.0791, "step": 73270 }, { "epoch": 2.167149701307151, "grad_norm": 0.6964975595474243, "learning_rate": 3.355521862020167e-06, "loss": 0.0652, "step": 73280 }, { "epoch": 2.1674454368013247, "grad_norm": 0.7005093693733215, "learning_rate": 3.3553951721012276e-06, "loss": 0.0761, "step": 73290 }, { "epoch": 2.167741172295499, "grad_norm": 0.9078810811042786, "learning_rate": 3.355268482182288e-06, "loss": 0.0778, "step": 73300 }, { "epoch": 2.168036907789673, "grad_norm": 0.7252604961395264, "learning_rate": 3.3551417922633487e-06, "loss": 0.074, "step": 73310 }, { "epoch": 2.168332643283847, "grad_norm": 0.716832160949707, "learning_rate": 3.355015102344409e-06, "loss": 0.0667, "step": 73320 }, { "epoch": 2.168628378778021, "grad_norm": 1.0117731094360352, "learning_rate": 3.3548884124254703e-06, "loss": 0.0638, "step": 73330 }, { "epoch": 2.168924114272195, "grad_norm": 0.7459130883216858, "learning_rate": 3.3547617225065307e-06, "loss": 0.0864, "step": 73340 }, { "epoch": 2.169219849766369, "grad_norm": 0.7564637660980225, "learning_rate": 3.354635032587591e-06, "loss": 0.0776, "step": 73350 }, { "epoch": 2.169515585260543, "grad_norm": 0.76300048828125, "learning_rate": 3.354508342668652e-06, "loss": 0.071, "step": 73360 }, { "epoch": 2.169811320754717, "grad_norm": 0.8964502811431885, "learning_rate": 3.354381652749712e-06, "loss": 0.0693, "step": 73370 }, { "epoch": 2.170107056248891, "grad_norm": 1.4048172235488892, "learning_rate": 3.354254962830773e-06, "loss": 0.0623, "step": 73380 }, { "epoch": 2.170402791743065, "grad_norm": 0.728446900844574, "learning_rate": 3.3541282729118334e-06, "loss": 0.0881, "step": 73390 }, { "epoch": 2.170698527237239, "grad_norm": 1.0238797664642334, "learning_rate": 3.354001582992894e-06, "loss": 0.0914, "step": 73400 }, { "epoch": 2.170994262731413, "grad_norm": 0.9195840358734131, "learning_rate": 3.353874893073955e-06, "loss": 0.0728, "step": 73410 }, { "epoch": 2.171289998225587, "grad_norm": 0.8465241193771362, "learning_rate": 3.3537482031550157e-06, "loss": 0.0951, "step": 73420 }, { "epoch": 2.171585733719761, "grad_norm": 0.8140630125999451, "learning_rate": 3.353621513236076e-06, "loss": 0.0813, "step": 73430 }, { "epoch": 2.171881469213935, "grad_norm": 0.6786138415336609, "learning_rate": 3.353494823317137e-06, "loss": 0.0921, "step": 73440 }, { "epoch": 2.172177204708109, "grad_norm": 0.6085035800933838, "learning_rate": 3.3533681333981973e-06, "loss": 0.0836, "step": 73450 }, { "epoch": 2.172472940202283, "grad_norm": 3.8710148334503174, "learning_rate": 3.353241443479258e-06, "loss": 0.0851, "step": 73460 }, { "epoch": 2.172768675696457, "grad_norm": 0.6042609214782715, "learning_rate": 3.3531147535603184e-06, "loss": 0.0851, "step": 73470 }, { "epoch": 2.173064411190631, "grad_norm": 0.879004716873169, "learning_rate": 3.352988063641379e-06, "loss": 0.0732, "step": 73480 }, { "epoch": 2.173360146684805, "grad_norm": 0.7085775136947632, "learning_rate": 3.35286137372244e-06, "loss": 0.0853, "step": 73490 }, { "epoch": 2.1736558821789793, "grad_norm": 1.0245254039764404, "learning_rate": 3.3527346838035008e-06, "loss": 0.0804, "step": 73500 }, { "epoch": 2.1739516176731533, "grad_norm": 0.9015503525733948, "learning_rate": 3.352607993884561e-06, "loss": 0.062, "step": 73510 }, { "epoch": 2.174247353167327, "grad_norm": 0.772279679775238, "learning_rate": 3.352481303965622e-06, "loss": 0.0664, "step": 73520 }, { "epoch": 2.174543088661501, "grad_norm": 1.1398309469223022, "learning_rate": 3.3523546140466823e-06, "loss": 0.0783, "step": 73530 }, { "epoch": 2.174838824155675, "grad_norm": 1.0141316652297974, "learning_rate": 3.352227924127743e-06, "loss": 0.0829, "step": 73540 }, { "epoch": 2.175134559649849, "grad_norm": 0.9283319711685181, "learning_rate": 3.3521012342088035e-06, "loss": 0.0901, "step": 73550 }, { "epoch": 2.1754302951440234, "grad_norm": 0.779546320438385, "learning_rate": 3.3519745442898642e-06, "loss": 0.0805, "step": 73560 }, { "epoch": 2.1757260306381974, "grad_norm": 0.554101824760437, "learning_rate": 3.351847854370925e-06, "loss": 0.0726, "step": 73570 }, { "epoch": 2.1760217661323713, "grad_norm": 0.7924546599388123, "learning_rate": 3.351721164451986e-06, "loss": 0.0687, "step": 73580 }, { "epoch": 2.1763175016265452, "grad_norm": 0.6920680403709412, "learning_rate": 3.351594474533046e-06, "loss": 0.0838, "step": 73590 }, { "epoch": 2.176613237120719, "grad_norm": 0.8886864185333252, "learning_rate": 3.351467784614107e-06, "loss": 0.0827, "step": 73600 }, { "epoch": 2.176908972614893, "grad_norm": 0.7769003510475159, "learning_rate": 3.3513410946951673e-06, "loss": 0.0589, "step": 73610 }, { "epoch": 2.177204708109067, "grad_norm": 0.5166786313056946, "learning_rate": 3.351214404776228e-06, "loss": 0.0464, "step": 73620 }, { "epoch": 2.1775004436032415, "grad_norm": 1.1952446699142456, "learning_rate": 3.3510877148572885e-06, "loss": 0.0859, "step": 73630 }, { "epoch": 2.1777961790974154, "grad_norm": 1.039586067199707, "learning_rate": 3.3509610249383493e-06, "loss": 0.0966, "step": 73640 }, { "epoch": 2.1780919145915894, "grad_norm": 0.7748615145683289, "learning_rate": 3.35083433501941e-06, "loss": 0.0892, "step": 73650 }, { "epoch": 2.1783876500857633, "grad_norm": 1.0064131021499634, "learning_rate": 3.350707645100471e-06, "loss": 0.0715, "step": 73660 }, { "epoch": 2.1786833855799372, "grad_norm": 1.0238715410232544, "learning_rate": 3.3505809551815312e-06, "loss": 0.0835, "step": 73670 }, { "epoch": 2.178979121074111, "grad_norm": 0.6288301944732666, "learning_rate": 3.350454265262592e-06, "loss": 0.058, "step": 73680 }, { "epoch": 2.179274856568285, "grad_norm": 1.0286051034927368, "learning_rate": 3.3503275753436524e-06, "loss": 0.0909, "step": 73690 }, { "epoch": 2.1795705920624595, "grad_norm": 0.7317423820495605, "learning_rate": 3.350200885424713e-06, "loss": 0.0606, "step": 73700 }, { "epoch": 2.1798663275566335, "grad_norm": 0.8271226286888123, "learning_rate": 3.3500741955057735e-06, "loss": 0.0664, "step": 73710 }, { "epoch": 2.1801620630508074, "grad_norm": 0.5038201212882996, "learning_rate": 3.3499475055868343e-06, "loss": 0.0652, "step": 73720 }, { "epoch": 2.1804577985449813, "grad_norm": 0.9549365043640137, "learning_rate": 3.349820815667895e-06, "loss": 0.072, "step": 73730 }, { "epoch": 2.1807535340391553, "grad_norm": 0.8469972014427185, "learning_rate": 3.349694125748956e-06, "loss": 0.0782, "step": 73740 }, { "epoch": 2.1810492695333292, "grad_norm": 0.7455132603645325, "learning_rate": 3.3495674358300163e-06, "loss": 0.0748, "step": 73750 }, { "epoch": 2.181345005027503, "grad_norm": 0.4297701120376587, "learning_rate": 3.3494407459110766e-06, "loss": 0.0669, "step": 73760 }, { "epoch": 2.1816407405216776, "grad_norm": 1.4017081260681152, "learning_rate": 3.3493140559921374e-06, "loss": 0.1093, "step": 73770 }, { "epoch": 2.1819364760158515, "grad_norm": 0.5876032710075378, "learning_rate": 3.349187366073198e-06, "loss": 0.0596, "step": 73780 }, { "epoch": 2.1822322115100254, "grad_norm": 0.8044346570968628, "learning_rate": 3.3490606761542586e-06, "loss": 0.1099, "step": 73790 }, { "epoch": 2.1825279470041994, "grad_norm": 1.041501760482788, "learning_rate": 3.348933986235319e-06, "loss": 0.0975, "step": 73800 }, { "epoch": 2.1828236824983733, "grad_norm": 0.7176015377044678, "learning_rate": 3.34880729631638e-06, "loss": 0.082, "step": 73810 }, { "epoch": 2.1831194179925473, "grad_norm": 0.845093846321106, "learning_rate": 3.3486806063974405e-06, "loss": 0.0696, "step": 73820 }, { "epoch": 2.1834151534867217, "grad_norm": 1.0753891468048096, "learning_rate": 3.3485539164785013e-06, "loss": 0.071, "step": 73830 }, { "epoch": 2.1837108889808956, "grad_norm": 1.244207739830017, "learning_rate": 3.3484272265595617e-06, "loss": 0.0914, "step": 73840 }, { "epoch": 2.1840066244750695, "grad_norm": 0.6613330841064453, "learning_rate": 3.3483005366406225e-06, "loss": 0.0676, "step": 73850 }, { "epoch": 2.1843023599692435, "grad_norm": 0.6848141551017761, "learning_rate": 3.348173846721683e-06, "loss": 0.0773, "step": 73860 }, { "epoch": 2.1845980954634174, "grad_norm": 0.3995108902454376, "learning_rate": 3.3480471568027436e-06, "loss": 0.0633, "step": 73870 }, { "epoch": 2.1848938309575914, "grad_norm": 0.8397091031074524, "learning_rate": 3.347920466883804e-06, "loss": 0.0641, "step": 73880 }, { "epoch": 2.1851895664517658, "grad_norm": 0.49725309014320374, "learning_rate": 3.347793776964865e-06, "loss": 0.0729, "step": 73890 }, { "epoch": 2.1854853019459397, "grad_norm": 0.6019169092178345, "learning_rate": 3.3476670870459256e-06, "loss": 0.0669, "step": 73900 }, { "epoch": 2.1857810374401136, "grad_norm": 0.739306628704071, "learning_rate": 3.3475403971269864e-06, "loss": 0.0838, "step": 73910 }, { "epoch": 2.1860767729342876, "grad_norm": 0.8639389872550964, "learning_rate": 3.3474137072080467e-06, "loss": 0.0581, "step": 73920 }, { "epoch": 2.1863725084284615, "grad_norm": 0.46454524993896484, "learning_rate": 3.3472870172891075e-06, "loss": 0.0664, "step": 73930 }, { "epoch": 2.1866682439226355, "grad_norm": 0.4959631562232971, "learning_rate": 3.347160327370168e-06, "loss": 0.0809, "step": 73940 }, { "epoch": 2.1869639794168094, "grad_norm": 0.7466253638267517, "learning_rate": 3.3470336374512287e-06, "loss": 0.0815, "step": 73950 }, { "epoch": 2.187259714910984, "grad_norm": 1.9037797451019287, "learning_rate": 3.346906947532289e-06, "loss": 0.0824, "step": 73960 }, { "epoch": 2.1875554504051578, "grad_norm": 0.7165631055831909, "learning_rate": 3.3467802576133503e-06, "loss": 0.0725, "step": 73970 }, { "epoch": 2.1878511858993317, "grad_norm": 1.933905839920044, "learning_rate": 3.3466535676944106e-06, "loss": 0.0741, "step": 73980 }, { "epoch": 2.1881469213935056, "grad_norm": 0.8278191089630127, "learning_rate": 3.3465268777754714e-06, "loss": 0.088, "step": 73990 }, { "epoch": 2.1884426568876796, "grad_norm": 0.8089788556098938, "learning_rate": 3.3464001878565318e-06, "loss": 0.0762, "step": 74000 }, { "epoch": 2.1887383923818535, "grad_norm": 1.1863733530044556, "learning_rate": 3.3462734979375926e-06, "loss": 0.094, "step": 74010 }, { "epoch": 2.1890341278760275, "grad_norm": 0.6083149313926697, "learning_rate": 3.346146808018653e-06, "loss": 0.0732, "step": 74020 }, { "epoch": 2.189329863370202, "grad_norm": 0.802657425403595, "learning_rate": 3.3460201180997137e-06, "loss": 0.0675, "step": 74030 }, { "epoch": 2.189625598864376, "grad_norm": 1.2098945379257202, "learning_rate": 3.345893428180774e-06, "loss": 0.0903, "step": 74040 }, { "epoch": 2.1899213343585497, "grad_norm": 0.8089709877967834, "learning_rate": 3.3457667382618353e-06, "loss": 0.0757, "step": 74050 }, { "epoch": 2.1902170698527237, "grad_norm": 0.8121131658554077, "learning_rate": 3.3456400483428957e-06, "loss": 0.0746, "step": 74060 }, { "epoch": 2.1905128053468976, "grad_norm": 0.4620065987110138, "learning_rate": 3.3455133584239565e-06, "loss": 0.0556, "step": 74070 }, { "epoch": 2.1908085408410716, "grad_norm": 0.8773826360702515, "learning_rate": 3.345386668505017e-06, "loss": 0.0855, "step": 74080 }, { "epoch": 2.191104276335246, "grad_norm": 0.7047937512397766, "learning_rate": 3.3452599785860776e-06, "loss": 0.0795, "step": 74090 }, { "epoch": 2.19140001182942, "grad_norm": 0.754560112953186, "learning_rate": 3.345133288667138e-06, "loss": 0.0872, "step": 74100 }, { "epoch": 2.191695747323594, "grad_norm": 2.0087080001831055, "learning_rate": 3.3450065987481988e-06, "loss": 0.0737, "step": 74110 }, { "epoch": 2.191991482817768, "grad_norm": 1.3219341039657593, "learning_rate": 3.344879908829259e-06, "loss": 0.0677, "step": 74120 }, { "epoch": 2.1922872183119417, "grad_norm": 1.0224478244781494, "learning_rate": 3.3447532189103203e-06, "loss": 0.0678, "step": 74130 }, { "epoch": 2.1925829538061157, "grad_norm": 0.6800541877746582, "learning_rate": 3.3446265289913807e-06, "loss": 0.0844, "step": 74140 }, { "epoch": 2.1928786893002896, "grad_norm": 0.706408679485321, "learning_rate": 3.3444998390724415e-06, "loss": 0.0632, "step": 74150 }, { "epoch": 2.193174424794464, "grad_norm": 0.7885420918464661, "learning_rate": 3.344373149153502e-06, "loss": 0.0863, "step": 74160 }, { "epoch": 2.193470160288638, "grad_norm": 0.8366873264312744, "learning_rate": 3.3442464592345622e-06, "loss": 0.0696, "step": 74170 }, { "epoch": 2.193765895782812, "grad_norm": 0.5546212792396545, "learning_rate": 3.344119769315623e-06, "loss": 0.0566, "step": 74180 }, { "epoch": 2.194061631276986, "grad_norm": 0.9852002859115601, "learning_rate": 3.3439930793966834e-06, "loss": 0.0888, "step": 74190 }, { "epoch": 2.19435736677116, "grad_norm": 0.6347293853759766, "learning_rate": 3.343866389477744e-06, "loss": 0.0641, "step": 74200 }, { "epoch": 2.1946531022653337, "grad_norm": 0.9100726246833801, "learning_rate": 3.3437396995588045e-06, "loss": 0.0632, "step": 74210 }, { "epoch": 2.194948837759508, "grad_norm": 1.818372368812561, "learning_rate": 3.3436130096398658e-06, "loss": 0.0906, "step": 74220 }, { "epoch": 2.195244573253682, "grad_norm": 0.6285003423690796, "learning_rate": 3.343486319720926e-06, "loss": 0.0624, "step": 74230 }, { "epoch": 2.195540308747856, "grad_norm": 0.5421401262283325, "learning_rate": 3.343359629801987e-06, "loss": 0.0809, "step": 74240 }, { "epoch": 2.19583604424203, "grad_norm": 0.8286568522453308, "learning_rate": 3.3432329398830473e-06, "loss": 0.0962, "step": 74250 }, { "epoch": 2.196131779736204, "grad_norm": 0.6264868378639221, "learning_rate": 3.343106249964108e-06, "loss": 0.0803, "step": 74260 }, { "epoch": 2.196427515230378, "grad_norm": 0.7069072723388672, "learning_rate": 3.3429795600451684e-06, "loss": 0.0679, "step": 74270 }, { "epoch": 2.1967232507245518, "grad_norm": 0.8920868635177612, "learning_rate": 3.3428528701262292e-06, "loss": 0.0728, "step": 74280 }, { "epoch": 2.197018986218726, "grad_norm": 1.025835633277893, "learning_rate": 3.3427261802072896e-06, "loss": 0.0794, "step": 74290 }, { "epoch": 2.1973147217129, "grad_norm": 1.062601089477539, "learning_rate": 3.342599490288351e-06, "loss": 0.0933, "step": 74300 }, { "epoch": 2.197610457207074, "grad_norm": 0.6470765471458435, "learning_rate": 3.342472800369411e-06, "loss": 0.0881, "step": 74310 }, { "epoch": 2.197906192701248, "grad_norm": 0.5268717408180237, "learning_rate": 3.342346110450472e-06, "loss": 0.0791, "step": 74320 }, { "epoch": 2.198201928195422, "grad_norm": 0.9217900633811951, "learning_rate": 3.3422194205315323e-06, "loss": 0.0566, "step": 74330 }, { "epoch": 2.198497663689596, "grad_norm": 1.3477098941802979, "learning_rate": 3.342092730612593e-06, "loss": 0.0752, "step": 74340 }, { "epoch": 2.19879339918377, "grad_norm": 0.7997663617134094, "learning_rate": 3.3419660406936535e-06, "loss": 0.0773, "step": 74350 }, { "epoch": 2.199089134677944, "grad_norm": 0.47204291820526123, "learning_rate": 3.3418393507747143e-06, "loss": 0.0812, "step": 74360 }, { "epoch": 2.199384870172118, "grad_norm": 0.9809486865997314, "learning_rate": 3.3417126608557746e-06, "loss": 0.0673, "step": 74370 }, { "epoch": 2.199680605666292, "grad_norm": 0.6825376152992249, "learning_rate": 3.341585970936836e-06, "loss": 0.0632, "step": 74380 }, { "epoch": 2.199976341160466, "grad_norm": 0.4971790313720703, "learning_rate": 3.3414592810178962e-06, "loss": 0.0774, "step": 74390 }, { "epoch": 2.20027207665464, "grad_norm": 0.7109484672546387, "learning_rate": 3.341332591098957e-06, "loss": 0.0691, "step": 74400 }, { "epoch": 2.200567812148814, "grad_norm": 0.8409108519554138, "learning_rate": 3.3412059011800174e-06, "loss": 0.0745, "step": 74410 }, { "epoch": 2.2008635476429883, "grad_norm": 0.5485775470733643, "learning_rate": 3.341079211261078e-06, "loss": 0.0689, "step": 74420 }, { "epoch": 2.2011592831371622, "grad_norm": 1.2427780628204346, "learning_rate": 3.3409525213421385e-06, "loss": 0.0719, "step": 74430 }, { "epoch": 2.201455018631336, "grad_norm": 1.0429279804229736, "learning_rate": 3.3408258314231993e-06, "loss": 0.0822, "step": 74440 }, { "epoch": 2.20175075412551, "grad_norm": 1.0353132486343384, "learning_rate": 3.3406991415042597e-06, "loss": 0.0727, "step": 74450 }, { "epoch": 2.202046489619684, "grad_norm": 0.9012273550033569, "learning_rate": 3.340572451585321e-06, "loss": 0.0792, "step": 74460 }, { "epoch": 2.202342225113858, "grad_norm": 0.7180676460266113, "learning_rate": 3.3404457616663813e-06, "loss": 0.065, "step": 74470 }, { "epoch": 2.2026379606080324, "grad_norm": 1.2548893690109253, "learning_rate": 3.340319071747442e-06, "loss": 0.0736, "step": 74480 }, { "epoch": 2.2029336961022064, "grad_norm": 1.5448122024536133, "learning_rate": 3.3401923818285024e-06, "loss": 0.0988, "step": 74490 }, { "epoch": 2.2032294315963803, "grad_norm": 0.5341573357582092, "learning_rate": 3.340065691909563e-06, "loss": 0.0793, "step": 74500 }, { "epoch": 2.2035251670905542, "grad_norm": 1.316607117652893, "learning_rate": 3.3399390019906236e-06, "loss": 0.0668, "step": 74510 }, { "epoch": 2.203820902584728, "grad_norm": 1.049118161201477, "learning_rate": 3.3398123120716844e-06, "loss": 0.0661, "step": 74520 }, { "epoch": 2.204116638078902, "grad_norm": 1.1945933103561401, "learning_rate": 3.3396856221527447e-06, "loss": 0.0632, "step": 74530 }, { "epoch": 2.204412373573076, "grad_norm": 1.1968237161636353, "learning_rate": 3.339558932233806e-06, "loss": 0.0876, "step": 74540 }, { "epoch": 2.2047081090672505, "grad_norm": 1.2030863761901855, "learning_rate": 3.3394322423148663e-06, "loss": 0.0875, "step": 74550 }, { "epoch": 2.2050038445614244, "grad_norm": 0.8879080414772034, "learning_rate": 3.339305552395927e-06, "loss": 0.0895, "step": 74560 }, { "epoch": 2.2052995800555983, "grad_norm": 0.5115868449211121, "learning_rate": 3.3391788624769875e-06, "loss": 0.0702, "step": 74570 }, { "epoch": 2.2055953155497723, "grad_norm": 0.8462963700294495, "learning_rate": 3.339052172558048e-06, "loss": 0.0625, "step": 74580 }, { "epoch": 2.2058910510439462, "grad_norm": 0.6269963979721069, "learning_rate": 3.3389254826391086e-06, "loss": 0.0744, "step": 74590 }, { "epoch": 2.20618678653812, "grad_norm": 0.8145015835762024, "learning_rate": 3.338798792720169e-06, "loss": 0.0693, "step": 74600 }, { "epoch": 2.206482522032294, "grad_norm": 1.0258442163467407, "learning_rate": 3.3386721028012298e-06, "loss": 0.0777, "step": 74610 }, { "epoch": 2.2067782575264685, "grad_norm": 0.6787322759628296, "learning_rate": 3.3385454128822906e-06, "loss": 0.0636, "step": 74620 }, { "epoch": 2.2070739930206424, "grad_norm": 0.7943761348724365, "learning_rate": 3.3384187229633514e-06, "loss": 0.0828, "step": 74630 }, { "epoch": 2.2073697285148164, "grad_norm": 0.5401843786239624, "learning_rate": 3.3382920330444117e-06, "loss": 0.0829, "step": 74640 }, { "epoch": 2.2076654640089903, "grad_norm": 0.8536997437477112, "learning_rate": 3.3381653431254725e-06, "loss": 0.0889, "step": 74650 }, { "epoch": 2.2079611995031643, "grad_norm": 0.5315716862678528, "learning_rate": 3.338038653206533e-06, "loss": 0.066, "step": 74660 }, { "epoch": 2.208256934997338, "grad_norm": 0.7188588976860046, "learning_rate": 3.3379119632875937e-06, "loss": 0.0631, "step": 74670 }, { "epoch": 2.208552670491512, "grad_norm": 0.7861222624778748, "learning_rate": 3.337785273368654e-06, "loss": 0.0671, "step": 74680 }, { "epoch": 2.2088484059856865, "grad_norm": 0.8655344247817993, "learning_rate": 3.337658583449715e-06, "loss": 0.0773, "step": 74690 }, { "epoch": 2.2091441414798605, "grad_norm": 0.7978233695030212, "learning_rate": 3.3375318935307756e-06, "loss": 0.0757, "step": 74700 }, { "epoch": 2.2094398769740344, "grad_norm": 0.4799853265285492, "learning_rate": 3.3374052036118364e-06, "loss": 0.073, "step": 74710 }, { "epoch": 2.2097356124682084, "grad_norm": 0.49844154715538025, "learning_rate": 3.3372785136928968e-06, "loss": 0.0634, "step": 74720 }, { "epoch": 2.2100313479623823, "grad_norm": 0.4799097180366516, "learning_rate": 3.3371518237739576e-06, "loss": 0.0653, "step": 74730 }, { "epoch": 2.2103270834565563, "grad_norm": 0.7625836133956909, "learning_rate": 3.337025133855018e-06, "loss": 0.0806, "step": 74740 }, { "epoch": 2.2106228189507307, "grad_norm": 0.7432123422622681, "learning_rate": 3.3368984439360787e-06, "loss": 0.079, "step": 74750 }, { "epoch": 2.2109185544449046, "grad_norm": 0.7840718030929565, "learning_rate": 3.336771754017139e-06, "loss": 0.0892, "step": 74760 }, { "epoch": 2.2112142899390785, "grad_norm": 1.2033754587173462, "learning_rate": 3.3366450640982e-06, "loss": 0.0824, "step": 74770 }, { "epoch": 2.2115100254332525, "grad_norm": 1.128431797027588, "learning_rate": 3.3365183741792607e-06, "loss": 0.0789, "step": 74780 }, { "epoch": 2.2118057609274264, "grad_norm": 1.137970209121704, "learning_rate": 3.3363916842603214e-06, "loss": 0.0715, "step": 74790 }, { "epoch": 2.2121014964216004, "grad_norm": 0.5330039262771606, "learning_rate": 3.336264994341382e-06, "loss": 0.0742, "step": 74800 }, { "epoch": 2.2123972319157748, "grad_norm": 0.7085964679718018, "learning_rate": 3.3361383044224426e-06, "loss": 0.0786, "step": 74810 }, { "epoch": 2.2126929674099487, "grad_norm": 0.4692862927913666, "learning_rate": 3.336011614503503e-06, "loss": 0.0883, "step": 74820 }, { "epoch": 2.2129887029041226, "grad_norm": 0.9012659192085266, "learning_rate": 3.3358849245845638e-06, "loss": 0.0789, "step": 74830 }, { "epoch": 2.2132844383982966, "grad_norm": 0.7982600927352905, "learning_rate": 3.335758234665624e-06, "loss": 0.0851, "step": 74840 }, { "epoch": 2.2135801738924705, "grad_norm": 0.8775944113731384, "learning_rate": 3.335631544746685e-06, "loss": 0.0811, "step": 74850 }, { "epoch": 2.2138759093866445, "grad_norm": 0.6918126940727234, "learning_rate": 3.3355048548277457e-06, "loss": 0.0824, "step": 74860 }, { "epoch": 2.2141716448808184, "grad_norm": 0.8205420970916748, "learning_rate": 3.3353781649088065e-06, "loss": 0.0804, "step": 74870 }, { "epoch": 2.214467380374993, "grad_norm": 0.8317002058029175, "learning_rate": 3.335251474989867e-06, "loss": 0.0603, "step": 74880 }, { "epoch": 2.2147631158691667, "grad_norm": 0.821914553642273, "learning_rate": 3.3351247850709276e-06, "loss": 0.0832, "step": 74890 }, { "epoch": 2.2150588513633407, "grad_norm": 0.9884838461875916, "learning_rate": 3.334998095151988e-06, "loss": 0.0929, "step": 74900 }, { "epoch": 2.2153545868575146, "grad_norm": 0.7600764036178589, "learning_rate": 3.334871405233049e-06, "loss": 0.0839, "step": 74910 }, { "epoch": 2.2156503223516886, "grad_norm": 0.6807572841644287, "learning_rate": 3.334744715314109e-06, "loss": 0.07, "step": 74920 }, { "epoch": 2.2159460578458625, "grad_norm": 1.459364891052246, "learning_rate": 3.33461802539517e-06, "loss": 0.0725, "step": 74930 }, { "epoch": 2.2162417933400365, "grad_norm": 1.1229921579360962, "learning_rate": 3.3344913354762307e-06, "loss": 0.0913, "step": 74940 }, { "epoch": 2.216537528834211, "grad_norm": 0.839870810508728, "learning_rate": 3.3343646455572915e-06, "loss": 0.0893, "step": 74950 }, { "epoch": 2.216833264328385, "grad_norm": 0.5814005136489868, "learning_rate": 3.334237955638352e-06, "loss": 0.0749, "step": 74960 }, { "epoch": 2.2171289998225587, "grad_norm": 0.4985211491584778, "learning_rate": 3.3341112657194127e-06, "loss": 0.0659, "step": 74970 }, { "epoch": 2.2174247353167327, "grad_norm": 0.8578279614448547, "learning_rate": 3.333984575800473e-06, "loss": 0.0692, "step": 74980 }, { "epoch": 2.2177204708109066, "grad_norm": 1.106833577156067, "learning_rate": 3.333857885881534e-06, "loss": 0.0956, "step": 74990 }, { "epoch": 2.2180162063050806, "grad_norm": 0.46416404843330383, "learning_rate": 3.333731195962594e-06, "loss": 0.0665, "step": 75000 }, { "epoch": 2.218311941799255, "grad_norm": 1.1446912288665771, "learning_rate": 3.3336045060436546e-06, "loss": 0.0748, "step": 75010 }, { "epoch": 2.218607677293429, "grad_norm": 0.4427725672721863, "learning_rate": 3.333477816124716e-06, "loss": 0.0713, "step": 75020 }, { "epoch": 2.218903412787603, "grad_norm": 0.837424099445343, "learning_rate": 3.333351126205776e-06, "loss": 0.0768, "step": 75030 }, { "epoch": 2.219199148281777, "grad_norm": 1.0502935647964478, "learning_rate": 3.333224436286837e-06, "loss": 0.0795, "step": 75040 }, { "epoch": 2.2194948837759507, "grad_norm": 1.4601454734802246, "learning_rate": 3.3330977463678973e-06, "loss": 0.0849, "step": 75050 }, { "epoch": 2.2197906192701247, "grad_norm": 0.5031377077102661, "learning_rate": 3.332971056448958e-06, "loss": 0.0712, "step": 75060 }, { "epoch": 2.2200863547642986, "grad_norm": 1.1596033573150635, "learning_rate": 3.3328443665300185e-06, "loss": 0.0936, "step": 75070 }, { "epoch": 2.220382090258473, "grad_norm": 0.5677051544189453, "learning_rate": 3.3327176766110793e-06, "loss": 0.0595, "step": 75080 }, { "epoch": 2.220677825752647, "grad_norm": 0.8341840505599976, "learning_rate": 3.3325909866921396e-06, "loss": 0.0825, "step": 75090 }, { "epoch": 2.220973561246821, "grad_norm": 0.7154794335365295, "learning_rate": 3.332464296773201e-06, "loss": 0.0779, "step": 75100 }, { "epoch": 2.221269296740995, "grad_norm": 1.1127924919128418, "learning_rate": 3.332337606854261e-06, "loss": 0.0719, "step": 75110 }, { "epoch": 2.2215650322351688, "grad_norm": 1.058274745941162, "learning_rate": 3.332210916935322e-06, "loss": 0.0778, "step": 75120 }, { "epoch": 2.2218607677293427, "grad_norm": 0.5922613739967346, "learning_rate": 3.3320842270163824e-06, "loss": 0.0686, "step": 75130 }, { "epoch": 2.222156503223517, "grad_norm": 0.7979335784912109, "learning_rate": 3.331957537097443e-06, "loss": 0.099, "step": 75140 }, { "epoch": 2.222452238717691, "grad_norm": 1.139207124710083, "learning_rate": 3.3318308471785035e-06, "loss": 0.0783, "step": 75150 }, { "epoch": 2.222747974211865, "grad_norm": 1.8407127857208252, "learning_rate": 3.3317041572595643e-06, "loss": 0.0785, "step": 75160 }, { "epoch": 2.223043709706039, "grad_norm": 0.6616493463516235, "learning_rate": 3.3315774673406247e-06, "loss": 0.068, "step": 75170 }, { "epoch": 2.223339445200213, "grad_norm": 0.7686536312103271, "learning_rate": 3.331450777421686e-06, "loss": 0.0605, "step": 75180 }, { "epoch": 2.223635180694387, "grad_norm": 0.655412495136261, "learning_rate": 3.3313240875027462e-06, "loss": 0.0636, "step": 75190 }, { "epoch": 2.2239309161885608, "grad_norm": 0.5039514899253845, "learning_rate": 3.331197397583807e-06, "loss": 0.094, "step": 75200 }, { "epoch": 2.224226651682735, "grad_norm": 1.0898876190185547, "learning_rate": 3.3310707076648674e-06, "loss": 0.0735, "step": 75210 }, { "epoch": 2.224522387176909, "grad_norm": 0.5880303382873535, "learning_rate": 3.330944017745928e-06, "loss": 0.0609, "step": 75220 }, { "epoch": 2.224818122671083, "grad_norm": 1.170494556427002, "learning_rate": 3.3308173278269886e-06, "loss": 0.0843, "step": 75230 }, { "epoch": 2.225113858165257, "grad_norm": 0.6945592761039734, "learning_rate": 3.3306906379080493e-06, "loss": 0.0904, "step": 75240 }, { "epoch": 2.225409593659431, "grad_norm": 0.9737545847892761, "learning_rate": 3.3305639479891097e-06, "loss": 0.069, "step": 75250 }, { "epoch": 2.225705329153605, "grad_norm": 0.871185302734375, "learning_rate": 3.330437258070171e-06, "loss": 0.0952, "step": 75260 }, { "epoch": 2.226001064647779, "grad_norm": 0.5521631240844727, "learning_rate": 3.3303105681512313e-06, "loss": 0.0671, "step": 75270 }, { "epoch": 2.226296800141953, "grad_norm": 0.7027758955955505, "learning_rate": 3.330183878232292e-06, "loss": 0.08, "step": 75280 }, { "epoch": 2.226592535636127, "grad_norm": 0.8310226202011108, "learning_rate": 3.3300571883133524e-06, "loss": 0.098, "step": 75290 }, { "epoch": 2.226888271130301, "grad_norm": 0.4875766932964325, "learning_rate": 3.3299304983944132e-06, "loss": 0.0791, "step": 75300 }, { "epoch": 2.227184006624475, "grad_norm": 0.4809073805809021, "learning_rate": 3.3298038084754736e-06, "loss": 0.071, "step": 75310 }, { "epoch": 2.227479742118649, "grad_norm": 0.8221019506454468, "learning_rate": 3.3296771185565344e-06, "loss": 0.0758, "step": 75320 }, { "epoch": 2.227775477612823, "grad_norm": 0.9694706201553345, "learning_rate": 3.3295504286375948e-06, "loss": 0.086, "step": 75330 }, { "epoch": 2.2280712131069973, "grad_norm": 1.1273889541625977, "learning_rate": 3.329423738718656e-06, "loss": 0.1047, "step": 75340 }, { "epoch": 2.2283669486011712, "grad_norm": 0.8191384673118591, "learning_rate": 3.3292970487997163e-06, "loss": 0.0639, "step": 75350 }, { "epoch": 2.228662684095345, "grad_norm": 0.798602819442749, "learning_rate": 3.329170358880777e-06, "loss": 0.087, "step": 75360 }, { "epoch": 2.228958419589519, "grad_norm": 0.6175433397293091, "learning_rate": 3.3290436689618375e-06, "loss": 0.0635, "step": 75370 }, { "epoch": 2.229254155083693, "grad_norm": 1.2976438999176025, "learning_rate": 3.3289169790428983e-06, "loss": 0.0761, "step": 75380 }, { "epoch": 2.229549890577867, "grad_norm": 0.7642723917961121, "learning_rate": 3.3287902891239586e-06, "loss": 0.0855, "step": 75390 }, { "epoch": 2.2298456260720414, "grad_norm": 0.7783822417259216, "learning_rate": 3.3286635992050194e-06, "loss": 0.0872, "step": 75400 }, { "epoch": 2.2301413615662153, "grad_norm": 0.6356682777404785, "learning_rate": 3.32853690928608e-06, "loss": 0.0627, "step": 75410 }, { "epoch": 2.2304370970603893, "grad_norm": 0.572115957736969, "learning_rate": 3.3284102193671406e-06, "loss": 0.0744, "step": 75420 }, { "epoch": 2.2307328325545632, "grad_norm": 0.628948986530304, "learning_rate": 3.3282835294482014e-06, "loss": 0.0691, "step": 75430 }, { "epoch": 2.231028568048737, "grad_norm": 0.9056342244148254, "learning_rate": 3.3281568395292617e-06, "loss": 0.0747, "step": 75440 }, { "epoch": 2.231324303542911, "grad_norm": 0.6691081523895264, "learning_rate": 3.3280301496103225e-06, "loss": 0.0786, "step": 75450 }, { "epoch": 2.231620039037085, "grad_norm": 0.7168769240379333, "learning_rate": 3.327903459691383e-06, "loss": 0.0878, "step": 75460 }, { "epoch": 2.2319157745312594, "grad_norm": 0.719695508480072, "learning_rate": 3.3277767697724437e-06, "loss": 0.0755, "step": 75470 }, { "epoch": 2.2322115100254334, "grad_norm": 0.9676816463470459, "learning_rate": 3.327650079853504e-06, "loss": 0.0807, "step": 75480 }, { "epoch": 2.2325072455196073, "grad_norm": 0.8226722478866577, "learning_rate": 3.327523389934565e-06, "loss": 0.0876, "step": 75490 }, { "epoch": 2.2328029810137813, "grad_norm": 1.149207592010498, "learning_rate": 3.3273967000156256e-06, "loss": 0.0781, "step": 75500 }, { "epoch": 2.233098716507955, "grad_norm": 0.9487044215202332, "learning_rate": 3.3272700100966864e-06, "loss": 0.0861, "step": 75510 }, { "epoch": 2.233394452002129, "grad_norm": 0.8560929298400879, "learning_rate": 3.327143320177747e-06, "loss": 0.0761, "step": 75520 }, { "epoch": 2.233690187496303, "grad_norm": 0.6105039119720459, "learning_rate": 3.3270166302588076e-06, "loss": 0.0781, "step": 75530 }, { "epoch": 2.2339859229904775, "grad_norm": 0.7415565848350525, "learning_rate": 3.326889940339868e-06, "loss": 0.081, "step": 75540 }, { "epoch": 2.2342816584846514, "grad_norm": 0.7011340260505676, "learning_rate": 3.3267632504209287e-06, "loss": 0.1012, "step": 75550 }, { "epoch": 2.2345773939788254, "grad_norm": 0.7226959466934204, "learning_rate": 3.326636560501989e-06, "loss": 0.068, "step": 75560 }, { "epoch": 2.2348731294729993, "grad_norm": 0.8746832609176636, "learning_rate": 3.32650987058305e-06, "loss": 0.0689, "step": 75570 }, { "epoch": 2.2351688649671733, "grad_norm": 0.7643082141876221, "learning_rate": 3.3263831806641107e-06, "loss": 0.0678, "step": 75580 }, { "epoch": 2.235464600461347, "grad_norm": 0.7757977843284607, "learning_rate": 3.3262564907451715e-06, "loss": 0.0836, "step": 75590 }, { "epoch": 2.235760335955521, "grad_norm": 0.6961070895195007, "learning_rate": 3.326129800826232e-06, "loss": 0.0717, "step": 75600 }, { "epoch": 2.2360560714496955, "grad_norm": 1.0878361463546753, "learning_rate": 3.3260031109072926e-06, "loss": 0.0633, "step": 75610 }, { "epoch": 2.2363518069438695, "grad_norm": 0.4177478849887848, "learning_rate": 3.325876420988353e-06, "loss": 0.0709, "step": 75620 }, { "epoch": 2.2366475424380434, "grad_norm": 1.2687599658966064, "learning_rate": 3.3257497310694138e-06, "loss": 0.083, "step": 75630 }, { "epoch": 2.2369432779322174, "grad_norm": 1.1720870733261108, "learning_rate": 3.325623041150474e-06, "loss": 0.092, "step": 75640 }, { "epoch": 2.2372390134263913, "grad_norm": 1.0716359615325928, "learning_rate": 3.325496351231535e-06, "loss": 0.0793, "step": 75650 }, { "epoch": 2.2375347489205653, "grad_norm": 0.6268373131752014, "learning_rate": 3.3253696613125957e-06, "loss": 0.069, "step": 75660 }, { "epoch": 2.2378304844147396, "grad_norm": 0.5698441863059998, "learning_rate": 3.3252429713936565e-06, "loss": 0.0777, "step": 75670 }, { "epoch": 2.2381262199089136, "grad_norm": 0.569153368473053, "learning_rate": 3.325116281474717e-06, "loss": 0.0701, "step": 75680 }, { "epoch": 2.2384219554030875, "grad_norm": 0.8159182071685791, "learning_rate": 3.3249895915557777e-06, "loss": 0.0914, "step": 75690 }, { "epoch": 2.2387176908972615, "grad_norm": 1.0289641618728638, "learning_rate": 3.324862901636838e-06, "loss": 0.0814, "step": 75700 }, { "epoch": 2.2390134263914354, "grad_norm": 0.956110954284668, "learning_rate": 3.324736211717899e-06, "loss": 0.0794, "step": 75710 }, { "epoch": 2.2393091618856094, "grad_norm": 0.35488244891166687, "learning_rate": 3.324609521798959e-06, "loss": 0.0889, "step": 75720 }, { "epoch": 2.2396048973797837, "grad_norm": 0.839815080165863, "learning_rate": 3.32448283188002e-06, "loss": 0.0813, "step": 75730 }, { "epoch": 2.2399006328739577, "grad_norm": 0.9678882360458374, "learning_rate": 3.3243561419610808e-06, "loss": 0.0776, "step": 75740 }, { "epoch": 2.2401963683681316, "grad_norm": 1.0656319856643677, "learning_rate": 3.3242294520421416e-06, "loss": 0.0775, "step": 75750 }, { "epoch": 2.2404921038623056, "grad_norm": 0.4454902708530426, "learning_rate": 3.324102762123202e-06, "loss": 0.057, "step": 75760 }, { "epoch": 2.2407878393564795, "grad_norm": 0.6174764037132263, "learning_rate": 3.3239760722042627e-06, "loss": 0.0493, "step": 75770 }, { "epoch": 2.2410835748506535, "grad_norm": 0.7750703692436218, "learning_rate": 3.323849382285323e-06, "loss": 0.0582, "step": 75780 }, { "epoch": 2.2413793103448274, "grad_norm": 0.7379878163337708, "learning_rate": 3.323722692366384e-06, "loss": 0.0886, "step": 75790 }, { "epoch": 2.241675045839002, "grad_norm": 0.9346755146980286, "learning_rate": 3.3235960024474442e-06, "loss": 0.096, "step": 75800 }, { "epoch": 2.2419707813331757, "grad_norm": 0.8032132983207703, "learning_rate": 3.323469312528505e-06, "loss": 0.0638, "step": 75810 }, { "epoch": 2.2422665168273497, "grad_norm": 1.1307772397994995, "learning_rate": 3.323342622609566e-06, "loss": 0.0688, "step": 75820 }, { "epoch": 2.2425622523215236, "grad_norm": 0.8144747614860535, "learning_rate": 3.323215932690626e-06, "loss": 0.0738, "step": 75830 }, { "epoch": 2.2428579878156976, "grad_norm": 0.8380735516548157, "learning_rate": 3.323089242771687e-06, "loss": 0.0905, "step": 75840 }, { "epoch": 2.2431537233098715, "grad_norm": 1.035637617111206, "learning_rate": 3.3229625528527473e-06, "loss": 0.0962, "step": 75850 }, { "epoch": 2.2434494588040454, "grad_norm": 0.8751523494720459, "learning_rate": 3.322835862933808e-06, "loss": 0.0915, "step": 75860 }, { "epoch": 2.24374519429822, "grad_norm": 0.6452653408050537, "learning_rate": 3.3227091730148685e-06, "loss": 0.0683, "step": 75870 }, { "epoch": 2.244040929792394, "grad_norm": 0.9344536066055298, "learning_rate": 3.3225824830959293e-06, "loss": 0.0696, "step": 75880 }, { "epoch": 2.2443366652865677, "grad_norm": 0.9127082228660583, "learning_rate": 3.3224557931769897e-06, "loss": 0.0927, "step": 75890 }, { "epoch": 2.2446324007807417, "grad_norm": 0.482921838760376, "learning_rate": 3.322329103258051e-06, "loss": 0.0612, "step": 75900 }, { "epoch": 2.2449281362749156, "grad_norm": 0.7279992699623108, "learning_rate": 3.3222024133391112e-06, "loss": 0.093, "step": 75910 }, { "epoch": 2.2452238717690896, "grad_norm": 0.5620589256286621, "learning_rate": 3.322075723420172e-06, "loss": 0.0667, "step": 75920 }, { "epoch": 2.245519607263264, "grad_norm": 0.6971695423126221, "learning_rate": 3.3219490335012324e-06, "loss": 0.0754, "step": 75930 }, { "epoch": 2.245815342757438, "grad_norm": 1.2268964052200317, "learning_rate": 3.321822343582293e-06, "loss": 0.0909, "step": 75940 }, { "epoch": 2.246111078251612, "grad_norm": 1.0535645484924316, "learning_rate": 3.3216956536633535e-06, "loss": 0.0776, "step": 75950 }, { "epoch": 2.2464068137457858, "grad_norm": 0.7060681581497192, "learning_rate": 3.3215689637444143e-06, "loss": 0.08, "step": 75960 }, { "epoch": 2.2467025492399597, "grad_norm": 0.6857613921165466, "learning_rate": 3.3214422738254747e-06, "loss": 0.0662, "step": 75970 }, { "epoch": 2.2469982847341337, "grad_norm": 1.7355804443359375, "learning_rate": 3.321315583906536e-06, "loss": 0.0848, "step": 75980 }, { "epoch": 2.2472940202283076, "grad_norm": 0.7389030456542969, "learning_rate": 3.3211888939875963e-06, "loss": 0.0919, "step": 75990 }, { "epoch": 2.247589755722482, "grad_norm": 0.7006120681762695, "learning_rate": 3.321062204068657e-06, "loss": 0.0777, "step": 76000 }, { "epoch": 2.247885491216656, "grad_norm": 1.190442681312561, "learning_rate": 3.3209355141497174e-06, "loss": 0.0832, "step": 76010 }, { "epoch": 2.24818122671083, "grad_norm": 0.863633394241333, "learning_rate": 3.3208088242307782e-06, "loss": 0.0588, "step": 76020 }, { "epoch": 2.248476962205004, "grad_norm": 0.7404044270515442, "learning_rate": 3.3206821343118386e-06, "loss": 0.0831, "step": 76030 }, { "epoch": 2.2487726976991778, "grad_norm": 0.9898576140403748, "learning_rate": 3.3205554443928994e-06, "loss": 0.08, "step": 76040 }, { "epoch": 2.2490684331933517, "grad_norm": 0.8617891073226929, "learning_rate": 3.3204287544739597e-06, "loss": 0.0794, "step": 76050 }, { "epoch": 2.249364168687526, "grad_norm": 0.9308375716209412, "learning_rate": 3.320302064555021e-06, "loss": 0.0733, "step": 76060 }, { "epoch": 2.2496599041817, "grad_norm": 0.8531404137611389, "learning_rate": 3.3201753746360813e-06, "loss": 0.0951, "step": 76070 }, { "epoch": 2.249955639675874, "grad_norm": 0.9584431648254395, "learning_rate": 3.320048684717142e-06, "loss": 0.0567, "step": 76080 }, { "epoch": 2.250251375170048, "grad_norm": 0.9464530348777771, "learning_rate": 3.3199219947982025e-06, "loss": 0.0779, "step": 76090 }, { "epoch": 2.250547110664222, "grad_norm": 0.6546347141265869, "learning_rate": 3.3197953048792633e-06, "loss": 0.0817, "step": 76100 }, { "epoch": 2.250842846158396, "grad_norm": 0.7071324586868286, "learning_rate": 3.3196686149603236e-06, "loss": 0.0823, "step": 76110 }, { "epoch": 2.2511385816525697, "grad_norm": 0.6118316054344177, "learning_rate": 3.3195419250413844e-06, "loss": 0.0606, "step": 76120 }, { "epoch": 2.251434317146744, "grad_norm": 1.2119040489196777, "learning_rate": 3.3194152351224448e-06, "loss": 0.0886, "step": 76130 }, { "epoch": 2.251730052640918, "grad_norm": 1.2181364297866821, "learning_rate": 3.319288545203506e-06, "loss": 0.0979, "step": 76140 }, { "epoch": 2.252025788135092, "grad_norm": 0.5688532590866089, "learning_rate": 3.3191618552845664e-06, "loss": 0.059, "step": 76150 }, { "epoch": 2.252321523629266, "grad_norm": 0.8305404782295227, "learning_rate": 3.319035165365627e-06, "loss": 0.0727, "step": 76160 }, { "epoch": 2.25261725912344, "grad_norm": 0.6255071759223938, "learning_rate": 3.3189084754466875e-06, "loss": 0.0665, "step": 76170 }, { "epoch": 2.252912994617614, "grad_norm": 0.9947735071182251, "learning_rate": 3.3187817855277483e-06, "loss": 0.0666, "step": 76180 }, { "epoch": 2.253208730111788, "grad_norm": 0.34544551372528076, "learning_rate": 3.3186550956088087e-06, "loss": 0.0859, "step": 76190 }, { "epoch": 2.253504465605962, "grad_norm": 0.48517096042633057, "learning_rate": 3.3185284056898695e-06, "loss": 0.0774, "step": 76200 }, { "epoch": 2.253800201100136, "grad_norm": 0.924950897693634, "learning_rate": 3.31840171577093e-06, "loss": 0.0715, "step": 76210 }, { "epoch": 2.25409593659431, "grad_norm": 1.0290638208389282, "learning_rate": 3.318275025851991e-06, "loss": 0.0792, "step": 76220 }, { "epoch": 2.254391672088484, "grad_norm": 1.0789439678192139, "learning_rate": 3.3181483359330514e-06, "loss": 0.0728, "step": 76230 }, { "epoch": 2.254687407582658, "grad_norm": 0.7080819606781006, "learning_rate": 3.3180216460141118e-06, "loss": 0.083, "step": 76240 }, { "epoch": 2.254983143076832, "grad_norm": 0.6368477940559387, "learning_rate": 3.3178949560951726e-06, "loss": 0.0666, "step": 76250 }, { "epoch": 2.2552788785710063, "grad_norm": 0.5723812580108643, "learning_rate": 3.317768266176233e-06, "loss": 0.0836, "step": 76260 }, { "epoch": 2.2555746140651802, "grad_norm": 0.29122793674468994, "learning_rate": 3.3176415762572937e-06, "loss": 0.075, "step": 76270 }, { "epoch": 2.255870349559354, "grad_norm": 0.9486469030380249, "learning_rate": 3.317514886338354e-06, "loss": 0.0611, "step": 76280 }, { "epoch": 2.256166085053528, "grad_norm": 0.6877859234809875, "learning_rate": 3.317388196419415e-06, "loss": 0.096, "step": 76290 }, { "epoch": 2.256461820547702, "grad_norm": 0.8724910616874695, "learning_rate": 3.3172615065004757e-06, "loss": 0.0938, "step": 76300 }, { "epoch": 2.256757556041876, "grad_norm": 0.5018656849861145, "learning_rate": 3.3171348165815365e-06, "loss": 0.0806, "step": 76310 }, { "epoch": 2.2570532915360504, "grad_norm": 0.7390943765640259, "learning_rate": 3.317008126662597e-06, "loss": 0.05, "step": 76320 }, { "epoch": 2.2573490270302243, "grad_norm": 1.063808798789978, "learning_rate": 3.3168814367436576e-06, "loss": 0.0626, "step": 76330 }, { "epoch": 2.2576447625243983, "grad_norm": 0.6574842929840088, "learning_rate": 3.316754746824718e-06, "loss": 0.0881, "step": 76340 }, { "epoch": 2.257940498018572, "grad_norm": 0.8875854015350342, "learning_rate": 3.3166280569057788e-06, "loss": 0.0933, "step": 76350 }, { "epoch": 2.258236233512746, "grad_norm": 0.810682475566864, "learning_rate": 3.316501366986839e-06, "loss": 0.0874, "step": 76360 }, { "epoch": 2.25853196900692, "grad_norm": 0.7031384110450745, "learning_rate": 3.3163746770679e-06, "loss": 0.0689, "step": 76370 }, { "epoch": 2.258827704501094, "grad_norm": 0.8197012543678284, "learning_rate": 3.3162479871489607e-06, "loss": 0.0714, "step": 76380 }, { "epoch": 2.2591234399952684, "grad_norm": 0.8454140424728394, "learning_rate": 3.3161212972300215e-06, "loss": 0.0834, "step": 76390 }, { "epoch": 2.2594191754894424, "grad_norm": 0.6257666349411011, "learning_rate": 3.315994607311082e-06, "loss": 0.0846, "step": 76400 }, { "epoch": 2.2597149109836163, "grad_norm": 1.182787537574768, "learning_rate": 3.3158679173921427e-06, "loss": 0.0795, "step": 76410 }, { "epoch": 2.2600106464777903, "grad_norm": 0.833699107170105, "learning_rate": 3.315741227473203e-06, "loss": 0.083, "step": 76420 }, { "epoch": 2.260306381971964, "grad_norm": 0.9523379802703857, "learning_rate": 3.315614537554264e-06, "loss": 0.0811, "step": 76430 }, { "epoch": 2.260602117466138, "grad_norm": 0.8719787001609802, "learning_rate": 3.315487847635324e-06, "loss": 0.0933, "step": 76440 }, { "epoch": 2.260897852960312, "grad_norm": 1.1530367136001587, "learning_rate": 3.315361157716385e-06, "loss": 0.0689, "step": 76450 }, { "epoch": 2.2611935884544865, "grad_norm": 0.7131362557411194, "learning_rate": 3.3152344677974458e-06, "loss": 0.0696, "step": 76460 }, { "epoch": 2.2614893239486604, "grad_norm": 0.7669743299484253, "learning_rate": 3.3151077778785065e-06, "loss": 0.0783, "step": 76470 }, { "epoch": 2.2617850594428344, "grad_norm": 0.8628120422363281, "learning_rate": 3.314981087959567e-06, "loss": 0.0733, "step": 76480 }, { "epoch": 2.2620807949370083, "grad_norm": 0.7375703454017639, "learning_rate": 3.3148543980406277e-06, "loss": 0.0885, "step": 76490 }, { "epoch": 2.2623765304311823, "grad_norm": 0.620672345161438, "learning_rate": 3.314727708121688e-06, "loss": 0.06, "step": 76500 }, { "epoch": 2.262672265925356, "grad_norm": 0.8355657458305359, "learning_rate": 3.314601018202749e-06, "loss": 0.073, "step": 76510 }, { "epoch": 2.26296800141953, "grad_norm": 1.3483539819717407, "learning_rate": 3.3144743282838092e-06, "loss": 0.0726, "step": 76520 }, { "epoch": 2.2632637369137045, "grad_norm": 0.8029792904853821, "learning_rate": 3.31434763836487e-06, "loss": 0.0701, "step": 76530 }, { "epoch": 2.2635594724078785, "grad_norm": 1.0527030229568481, "learning_rate": 3.314220948445931e-06, "loss": 0.0771, "step": 76540 }, { "epoch": 2.2638552079020524, "grad_norm": 0.5888248085975647, "learning_rate": 3.3140942585269916e-06, "loss": 0.0799, "step": 76550 }, { "epoch": 2.2641509433962264, "grad_norm": 0.8845342397689819, "learning_rate": 3.313967568608052e-06, "loss": 0.0884, "step": 76560 }, { "epoch": 2.2644466788904003, "grad_norm": 1.0090551376342773, "learning_rate": 3.3138408786891127e-06, "loss": 0.0764, "step": 76570 }, { "epoch": 2.2647424143845742, "grad_norm": 0.8846502900123596, "learning_rate": 3.313714188770173e-06, "loss": 0.0748, "step": 76580 }, { "epoch": 2.2650381498787486, "grad_norm": 1.1025274991989136, "learning_rate": 3.313587498851234e-06, "loss": 0.0903, "step": 76590 }, { "epoch": 2.2653338853729226, "grad_norm": 1.9878649711608887, "learning_rate": 3.3134608089322943e-06, "loss": 0.0916, "step": 76600 }, { "epoch": 2.2656296208670965, "grad_norm": 0.8920660614967346, "learning_rate": 3.313334119013355e-06, "loss": 0.0912, "step": 76610 }, { "epoch": 2.2659253563612705, "grad_norm": 6.918940544128418, "learning_rate": 3.313207429094416e-06, "loss": 0.0667, "step": 76620 }, { "epoch": 2.2662210918554444, "grad_norm": 0.876299262046814, "learning_rate": 3.3130807391754766e-06, "loss": 0.0654, "step": 76630 }, { "epoch": 2.2665168273496183, "grad_norm": 0.43095311522483826, "learning_rate": 3.312954049256537e-06, "loss": 0.074, "step": 76640 }, { "epoch": 2.2668125628437927, "grad_norm": 0.6725254058837891, "learning_rate": 3.3128273593375974e-06, "loss": 0.1131, "step": 76650 }, { "epoch": 2.2671082983379667, "grad_norm": 1.050298810005188, "learning_rate": 3.312700669418658e-06, "loss": 0.0807, "step": 76660 }, { "epoch": 2.2674040338321406, "grad_norm": 1.213898777961731, "learning_rate": 3.3125739794997185e-06, "loss": 0.0718, "step": 76670 }, { "epoch": 2.2676997693263146, "grad_norm": 0.9208070635795593, "learning_rate": 3.3124472895807793e-06, "loss": 0.0743, "step": 76680 }, { "epoch": 2.2679955048204885, "grad_norm": 0.5822960734367371, "learning_rate": 3.3123205996618397e-06, "loss": 0.0869, "step": 76690 }, { "epoch": 2.2682912403146624, "grad_norm": 0.8990453481674194, "learning_rate": 3.312193909742901e-06, "loss": 0.081, "step": 76700 }, { "epoch": 2.2685869758088364, "grad_norm": 0.7759921550750732, "learning_rate": 3.3120672198239613e-06, "loss": 0.0789, "step": 76710 }, { "epoch": 2.268882711303011, "grad_norm": 0.5246426463127136, "learning_rate": 3.311940529905022e-06, "loss": 0.0661, "step": 76720 }, { "epoch": 2.2691784467971847, "grad_norm": 1.1002174615859985, "learning_rate": 3.3118138399860824e-06, "loss": 0.0699, "step": 76730 }, { "epoch": 2.2694741822913587, "grad_norm": 1.0291943550109863, "learning_rate": 3.311687150067143e-06, "loss": 0.0918, "step": 76740 }, { "epoch": 2.2697699177855326, "grad_norm": 0.6333842873573303, "learning_rate": 3.3115604601482036e-06, "loss": 0.0726, "step": 76750 }, { "epoch": 2.2700656532797066, "grad_norm": 0.5211836099624634, "learning_rate": 3.3114337702292644e-06, "loss": 0.0692, "step": 76760 }, { "epoch": 2.2703613887738805, "grad_norm": 1.1332135200500488, "learning_rate": 3.3113070803103247e-06, "loss": 0.0741, "step": 76770 }, { "epoch": 2.2706571242680544, "grad_norm": 0.7769654989242554, "learning_rate": 3.311180390391386e-06, "loss": 0.0774, "step": 76780 }, { "epoch": 2.270952859762229, "grad_norm": 0.7932919859886169, "learning_rate": 3.3110537004724463e-06, "loss": 0.0794, "step": 76790 }, { "epoch": 2.2712485952564028, "grad_norm": 0.7388771772384644, "learning_rate": 3.310927010553507e-06, "loss": 0.0941, "step": 76800 }, { "epoch": 2.2715443307505767, "grad_norm": 0.8287932276725769, "learning_rate": 3.3108003206345675e-06, "loss": 0.0754, "step": 76810 }, { "epoch": 2.2718400662447507, "grad_norm": 0.6009745001792908, "learning_rate": 3.3106736307156283e-06, "loss": 0.0625, "step": 76820 }, { "epoch": 2.2721358017389246, "grad_norm": 0.8752396702766418, "learning_rate": 3.3105469407966886e-06, "loss": 0.0661, "step": 76830 }, { "epoch": 2.2724315372330985, "grad_norm": 1.0968431234359741, "learning_rate": 3.3104202508777494e-06, "loss": 0.0874, "step": 76840 }, { "epoch": 2.2727272727272725, "grad_norm": 0.6681157350540161, "learning_rate": 3.3102935609588098e-06, "loss": 0.082, "step": 76850 }, { "epoch": 2.273023008221447, "grad_norm": 0.7188848853111267, "learning_rate": 3.310166871039871e-06, "loss": 0.0799, "step": 76860 }, { "epoch": 2.273318743715621, "grad_norm": 0.9463359117507935, "learning_rate": 3.3100401811209314e-06, "loss": 0.0662, "step": 76870 }, { "epoch": 2.2736144792097948, "grad_norm": 0.910366952419281, "learning_rate": 3.309913491201992e-06, "loss": 0.0705, "step": 76880 }, { "epoch": 2.2739102147039687, "grad_norm": 1.0080581903457642, "learning_rate": 3.3097868012830525e-06, "loss": 0.086, "step": 76890 }, { "epoch": 2.2742059501981426, "grad_norm": 1.1660152673721313, "learning_rate": 3.3096601113641133e-06, "loss": 0.0946, "step": 76900 }, { "epoch": 2.274501685692317, "grad_norm": 0.7218242287635803, "learning_rate": 3.3095334214451737e-06, "loss": 0.0873, "step": 76910 }, { "epoch": 2.274797421186491, "grad_norm": 0.886353075504303, "learning_rate": 3.3094067315262345e-06, "loss": 0.078, "step": 76920 }, { "epoch": 2.275093156680665, "grad_norm": 1.0129916667938232, "learning_rate": 3.309280041607295e-06, "loss": 0.0619, "step": 76930 }, { "epoch": 2.275388892174839, "grad_norm": 0.8255851864814758, "learning_rate": 3.309153351688356e-06, "loss": 0.094, "step": 76940 }, { "epoch": 2.275684627669013, "grad_norm": 1.1370972394943237, "learning_rate": 3.3090266617694164e-06, "loss": 0.0721, "step": 76950 }, { "epoch": 2.2759803631631867, "grad_norm": 0.737256646156311, "learning_rate": 3.308899971850477e-06, "loss": 0.0863, "step": 76960 }, { "epoch": 2.2762760986573607, "grad_norm": 0.7303403615951538, "learning_rate": 3.3087732819315376e-06, "loss": 0.0768, "step": 76970 }, { "epoch": 2.276571834151535, "grad_norm": 0.506740391254425, "learning_rate": 3.3086465920125983e-06, "loss": 0.0698, "step": 76980 }, { "epoch": 2.276867569645709, "grad_norm": 0.738679826259613, "learning_rate": 3.3085199020936587e-06, "loss": 0.077, "step": 76990 }, { "epoch": 2.277163305139883, "grad_norm": 0.7528980374336243, "learning_rate": 3.3083932121747195e-06, "loss": 0.0791, "step": 77000 }, { "epoch": 2.277459040634057, "grad_norm": 0.9847680330276489, "learning_rate": 3.30826652225578e-06, "loss": 0.0847, "step": 77010 }, { "epoch": 2.277754776128231, "grad_norm": 1.239048957824707, "learning_rate": 3.308139832336841e-06, "loss": 0.0724, "step": 77020 }, { "epoch": 2.278050511622405, "grad_norm": 0.975472629070282, "learning_rate": 3.3080131424179014e-06, "loss": 0.0801, "step": 77030 }, { "epoch": 2.2783462471165787, "grad_norm": 0.7796934247016907, "learning_rate": 3.3078864524989622e-06, "loss": 0.0904, "step": 77040 }, { "epoch": 2.278641982610753, "grad_norm": 0.747579038143158, "learning_rate": 3.3077597625800226e-06, "loss": 0.0886, "step": 77050 }, { "epoch": 2.278937718104927, "grad_norm": 0.7824572324752808, "learning_rate": 3.307633072661083e-06, "loss": 0.0848, "step": 77060 }, { "epoch": 2.279233453599101, "grad_norm": 1.3236944675445557, "learning_rate": 3.3075063827421438e-06, "loss": 0.0685, "step": 77070 }, { "epoch": 2.279529189093275, "grad_norm": 0.8240978717803955, "learning_rate": 3.307379692823204e-06, "loss": 0.0656, "step": 77080 }, { "epoch": 2.279824924587449, "grad_norm": 1.0923492908477783, "learning_rate": 3.307253002904265e-06, "loss": 0.1087, "step": 77090 }, { "epoch": 2.280120660081623, "grad_norm": 0.7874100804328918, "learning_rate": 3.3071263129853257e-06, "loss": 0.0861, "step": 77100 }, { "epoch": 2.280416395575797, "grad_norm": 1.2592251300811768, "learning_rate": 3.3069996230663865e-06, "loss": 0.0961, "step": 77110 }, { "epoch": 2.280712131069971, "grad_norm": 0.631833016872406, "learning_rate": 3.306872933147447e-06, "loss": 0.057, "step": 77120 }, { "epoch": 2.281007866564145, "grad_norm": 1.031619668006897, "learning_rate": 3.3067462432285076e-06, "loss": 0.0729, "step": 77130 }, { "epoch": 2.281303602058319, "grad_norm": 1.0887395143508911, "learning_rate": 3.306619553309568e-06, "loss": 0.0877, "step": 77140 }, { "epoch": 2.281599337552493, "grad_norm": 0.8603513240814209, "learning_rate": 3.306492863390629e-06, "loss": 0.0848, "step": 77150 }, { "epoch": 2.281895073046667, "grad_norm": 1.3321855068206787, "learning_rate": 3.306366173471689e-06, "loss": 0.0941, "step": 77160 }, { "epoch": 2.282190808540841, "grad_norm": 0.5870726108551025, "learning_rate": 3.30623948355275e-06, "loss": 0.0739, "step": 77170 }, { "epoch": 2.2824865440350153, "grad_norm": 0.8743919134140015, "learning_rate": 3.3061127936338107e-06, "loss": 0.0698, "step": 77180 }, { "epoch": 2.282782279529189, "grad_norm": 0.6504511833190918, "learning_rate": 3.3059861037148715e-06, "loss": 0.0981, "step": 77190 }, { "epoch": 2.283078015023363, "grad_norm": 0.8498626947402954, "learning_rate": 3.305859413795932e-06, "loss": 0.0836, "step": 77200 }, { "epoch": 2.283373750517537, "grad_norm": 0.8796895742416382, "learning_rate": 3.3057327238769927e-06, "loss": 0.0731, "step": 77210 }, { "epoch": 2.283669486011711, "grad_norm": 0.4570339620113373, "learning_rate": 3.305606033958053e-06, "loss": 0.0723, "step": 77220 }, { "epoch": 2.283965221505885, "grad_norm": 0.6360157132148743, "learning_rate": 3.305479344039114e-06, "loss": 0.0677, "step": 77230 }, { "epoch": 2.2842609570000594, "grad_norm": 1.0282108783721924, "learning_rate": 3.305352654120174e-06, "loss": 0.0913, "step": 77240 }, { "epoch": 2.2845566924942333, "grad_norm": 1.2849010229110718, "learning_rate": 3.305225964201235e-06, "loss": 0.0887, "step": 77250 }, { "epoch": 2.2848524279884073, "grad_norm": 0.6702215671539307, "learning_rate": 3.3050992742822958e-06, "loss": 0.0756, "step": 77260 }, { "epoch": 2.285148163482581, "grad_norm": 0.7884435057640076, "learning_rate": 3.3049725843633566e-06, "loss": 0.0587, "step": 77270 }, { "epoch": 2.285443898976755, "grad_norm": 0.7685215473175049, "learning_rate": 3.304845894444417e-06, "loss": 0.0797, "step": 77280 }, { "epoch": 2.285739634470929, "grad_norm": 0.8993931412696838, "learning_rate": 3.3047192045254777e-06, "loss": 0.0797, "step": 77290 }, { "epoch": 2.286035369965103, "grad_norm": 1.2919833660125732, "learning_rate": 3.304592514606538e-06, "loss": 0.0917, "step": 77300 }, { "epoch": 2.2863311054592774, "grad_norm": 0.7838002443313599, "learning_rate": 3.304465824687599e-06, "loss": 0.0881, "step": 77310 }, { "epoch": 2.2866268409534514, "grad_norm": 0.457888126373291, "learning_rate": 3.3043391347686593e-06, "loss": 0.0621, "step": 77320 }, { "epoch": 2.2869225764476253, "grad_norm": 0.7820075154304504, "learning_rate": 3.30421244484972e-06, "loss": 0.0748, "step": 77330 }, { "epoch": 2.2872183119417993, "grad_norm": 0.8508726954460144, "learning_rate": 3.304085754930781e-06, "loss": 0.0834, "step": 77340 }, { "epoch": 2.287514047435973, "grad_norm": 0.7014750838279724, "learning_rate": 3.3039590650118416e-06, "loss": 0.089, "step": 77350 }, { "epoch": 2.287809782930147, "grad_norm": 0.5265529751777649, "learning_rate": 3.303832375092902e-06, "loss": 0.0958, "step": 77360 }, { "epoch": 2.288105518424321, "grad_norm": 0.47605589032173157, "learning_rate": 3.3037056851739628e-06, "loss": 0.056, "step": 77370 }, { "epoch": 2.2884012539184955, "grad_norm": 0.8675696849822998, "learning_rate": 3.303578995255023e-06, "loss": 0.056, "step": 77380 }, { "epoch": 2.2886969894126694, "grad_norm": 0.8211027979850769, "learning_rate": 3.303452305336084e-06, "loss": 0.0782, "step": 77390 }, { "epoch": 2.2889927249068434, "grad_norm": 0.9208623766899109, "learning_rate": 3.3033256154171443e-06, "loss": 0.0765, "step": 77400 }, { "epoch": 2.2892884604010173, "grad_norm": 1.135227918624878, "learning_rate": 3.303198925498205e-06, "loss": 0.0864, "step": 77410 }, { "epoch": 2.2895841958951912, "grad_norm": 1.3152062892913818, "learning_rate": 3.303072235579266e-06, "loss": 0.0785, "step": 77420 }, { "epoch": 2.289879931389365, "grad_norm": 1.0998984575271606, "learning_rate": 3.3029455456603267e-06, "loss": 0.0771, "step": 77430 }, { "epoch": 2.290175666883539, "grad_norm": 0.7205987572669983, "learning_rate": 3.302818855741387e-06, "loss": 0.0874, "step": 77440 }, { "epoch": 2.2904714023777135, "grad_norm": 0.8975473642349243, "learning_rate": 3.302692165822448e-06, "loss": 0.0855, "step": 77450 }, { "epoch": 2.2907671378718875, "grad_norm": 0.5113054513931274, "learning_rate": 3.302565475903508e-06, "loss": 0.0781, "step": 77460 }, { "epoch": 2.2910628733660614, "grad_norm": 1.0420926809310913, "learning_rate": 3.302438785984569e-06, "loss": 0.0642, "step": 77470 }, { "epoch": 2.2913586088602353, "grad_norm": 0.7489457726478577, "learning_rate": 3.3023120960656293e-06, "loss": 0.0725, "step": 77480 }, { "epoch": 2.2916543443544093, "grad_norm": 1.2516106367111206, "learning_rate": 3.3021854061466897e-06, "loss": 0.0795, "step": 77490 }, { "epoch": 2.2919500798485837, "grad_norm": 0.648888111114502, "learning_rate": 3.302058716227751e-06, "loss": 0.0812, "step": 77500 }, { "epoch": 2.2922458153427576, "grad_norm": 1.019936203956604, "learning_rate": 3.3019320263088113e-06, "loss": 0.0687, "step": 77510 }, { "epoch": 2.2925415508369316, "grad_norm": 1.065752387046814, "learning_rate": 3.301805336389872e-06, "loss": 0.0563, "step": 77520 }, { "epoch": 2.2928372863311055, "grad_norm": 0.644649863243103, "learning_rate": 3.3016786464709324e-06, "loss": 0.0647, "step": 77530 }, { "epoch": 2.2931330218252794, "grad_norm": 0.6794047355651855, "learning_rate": 3.3015519565519932e-06, "loss": 0.0901, "step": 77540 }, { "epoch": 2.2934287573194534, "grad_norm": 0.659382164478302, "learning_rate": 3.3014252666330536e-06, "loss": 0.0835, "step": 77550 }, { "epoch": 2.2937244928136273, "grad_norm": 1.296399712562561, "learning_rate": 3.3012985767141144e-06, "loss": 0.0933, "step": 77560 }, { "epoch": 2.2940202283078017, "grad_norm": 0.5861605405807495, "learning_rate": 3.3011718867951748e-06, "loss": 0.0732, "step": 77570 }, { "epoch": 2.2943159638019757, "grad_norm": 1.5743789672851562, "learning_rate": 3.301045196876236e-06, "loss": 0.0769, "step": 77580 }, { "epoch": 2.2946116992961496, "grad_norm": 0.47286200523376465, "learning_rate": 3.3009185069572963e-06, "loss": 0.0859, "step": 77590 }, { "epoch": 2.2949074347903236, "grad_norm": 0.7635775804519653, "learning_rate": 3.300791817038357e-06, "loss": 0.0724, "step": 77600 }, { "epoch": 2.2952031702844975, "grad_norm": 0.8771281838417053, "learning_rate": 3.3006651271194175e-06, "loss": 0.077, "step": 77610 }, { "epoch": 2.2954989057786714, "grad_norm": 0.8240987062454224, "learning_rate": 3.3005384372004783e-06, "loss": 0.0542, "step": 77620 }, { "epoch": 2.2957946412728454, "grad_norm": 1.8145427703857422, "learning_rate": 3.3004117472815386e-06, "loss": 0.0813, "step": 77630 }, { "epoch": 2.2960903767670198, "grad_norm": 1.0162838697433472, "learning_rate": 3.3002850573625994e-06, "loss": 0.1077, "step": 77640 }, { "epoch": 2.2963861122611937, "grad_norm": 0.5144945979118347, "learning_rate": 3.30015836744366e-06, "loss": 0.0648, "step": 77650 }, { "epoch": 2.2966818477553677, "grad_norm": 0.7234771847724915, "learning_rate": 3.300031677524721e-06, "loss": 0.0777, "step": 77660 }, { "epoch": 2.2969775832495416, "grad_norm": 0.5664528012275696, "learning_rate": 3.2999049876057814e-06, "loss": 0.0674, "step": 77670 }, { "epoch": 2.2972733187437155, "grad_norm": 1.007765293121338, "learning_rate": 3.299778297686842e-06, "loss": 0.0707, "step": 77680 }, { "epoch": 2.2975690542378895, "grad_norm": 1.187781572341919, "learning_rate": 3.2996516077679025e-06, "loss": 0.0883, "step": 77690 }, { "epoch": 2.2978647897320634, "grad_norm": 0.7978734970092773, "learning_rate": 3.2995249178489633e-06, "loss": 0.0814, "step": 77700 }, { "epoch": 2.298160525226238, "grad_norm": 0.7941870093345642, "learning_rate": 3.2993982279300237e-06, "loss": 0.0844, "step": 77710 }, { "epoch": 2.2984562607204118, "grad_norm": 0.7552120089530945, "learning_rate": 3.2992715380110845e-06, "loss": 0.0714, "step": 77720 }, { "epoch": 2.2987519962145857, "grad_norm": 0.7483386993408203, "learning_rate": 3.299144848092145e-06, "loss": 0.069, "step": 77730 }, { "epoch": 2.2990477317087596, "grad_norm": 0.7798996567726135, "learning_rate": 3.299018158173206e-06, "loss": 0.0828, "step": 77740 }, { "epoch": 2.2993434672029336, "grad_norm": 0.6703688502311707, "learning_rate": 3.2988914682542664e-06, "loss": 0.0927, "step": 77750 }, { "epoch": 2.2996392026971075, "grad_norm": 0.4884907305240631, "learning_rate": 3.2987647783353272e-06, "loss": 0.0783, "step": 77760 }, { "epoch": 2.2999349381912815, "grad_norm": 0.4491198658943176, "learning_rate": 3.2986380884163876e-06, "loss": 0.0768, "step": 77770 }, { "epoch": 2.300230673685456, "grad_norm": 0.7880173921585083, "learning_rate": 3.2985113984974484e-06, "loss": 0.071, "step": 77780 }, { "epoch": 2.30052640917963, "grad_norm": 0.9461303949356079, "learning_rate": 3.2983847085785087e-06, "loss": 0.0814, "step": 77790 }, { "epoch": 2.3008221446738037, "grad_norm": 1.0394810438156128, "learning_rate": 3.2982580186595695e-06, "loss": 0.0951, "step": 77800 }, { "epoch": 2.3011178801679777, "grad_norm": 1.122170329093933, "learning_rate": 3.29813132874063e-06, "loss": 0.0707, "step": 77810 }, { "epoch": 2.3014136156621516, "grad_norm": 0.9657930135726929, "learning_rate": 3.298004638821691e-06, "loss": 0.0656, "step": 77820 }, { "epoch": 2.301709351156326, "grad_norm": 0.8716535568237305, "learning_rate": 3.2978779489027515e-06, "loss": 0.0726, "step": 77830 }, { "epoch": 2.3020050866505, "grad_norm": 0.7463181614875793, "learning_rate": 3.2977512589838123e-06, "loss": 0.0854, "step": 77840 }, { "epoch": 2.302300822144674, "grad_norm": 1.1501280069351196, "learning_rate": 3.2976245690648726e-06, "loss": 0.0978, "step": 77850 }, { "epoch": 2.302596557638848, "grad_norm": 0.6245042085647583, "learning_rate": 3.2974978791459334e-06, "loss": 0.09, "step": 77860 }, { "epoch": 2.302892293133022, "grad_norm": 0.9231840372085571, "learning_rate": 3.2973711892269938e-06, "loss": 0.0758, "step": 77870 }, { "epoch": 2.3031880286271957, "grad_norm": 0.8439611792564392, "learning_rate": 3.2972444993080546e-06, "loss": 0.0746, "step": 77880 }, { "epoch": 2.3034837641213697, "grad_norm": 0.8058249950408936, "learning_rate": 3.297117809389115e-06, "loss": 0.0802, "step": 77890 }, { "epoch": 2.303779499615544, "grad_norm": 1.2579994201660156, "learning_rate": 3.2969911194701757e-06, "loss": 0.0871, "step": 77900 }, { "epoch": 2.304075235109718, "grad_norm": 0.828278660774231, "learning_rate": 3.2968644295512365e-06, "loss": 0.0852, "step": 77910 }, { "epoch": 2.304370970603892, "grad_norm": 0.35295385122299194, "learning_rate": 3.296737739632297e-06, "loss": 0.0614, "step": 77920 }, { "epoch": 2.304666706098066, "grad_norm": 0.8912258744239807, "learning_rate": 3.2966110497133577e-06, "loss": 0.0804, "step": 77930 }, { "epoch": 2.30496244159224, "grad_norm": 0.9178513884544373, "learning_rate": 3.296484359794418e-06, "loss": 0.0835, "step": 77940 }, { "epoch": 2.305258177086414, "grad_norm": 0.627495527267456, "learning_rate": 3.296357669875479e-06, "loss": 0.0919, "step": 77950 }, { "epoch": 2.3055539125805877, "grad_norm": 0.6561066508293152, "learning_rate": 3.296230979956539e-06, "loss": 0.0734, "step": 77960 }, { "epoch": 2.305849648074762, "grad_norm": 0.8471730947494507, "learning_rate": 3.2961042900376e-06, "loss": 0.0691, "step": 77970 }, { "epoch": 2.306145383568936, "grad_norm": 0.8169417977333069, "learning_rate": 3.2959776001186608e-06, "loss": 0.0793, "step": 77980 }, { "epoch": 2.30644111906311, "grad_norm": 0.8030062317848206, "learning_rate": 3.2958509101997216e-06, "loss": 0.0939, "step": 77990 }, { "epoch": 2.306736854557284, "grad_norm": 0.831079363822937, "learning_rate": 3.295724220280782e-06, "loss": 0.0793, "step": 78000 }, { "epoch": 2.307032590051458, "grad_norm": 0.7673880457878113, "learning_rate": 3.2955975303618427e-06, "loss": 0.0945, "step": 78010 }, { "epoch": 2.307328325545632, "grad_norm": 0.5646135210990906, "learning_rate": 3.295470840442903e-06, "loss": 0.0882, "step": 78020 }, { "epoch": 2.3076240610398058, "grad_norm": 0.8092741966247559, "learning_rate": 3.295344150523964e-06, "loss": 0.0644, "step": 78030 }, { "epoch": 2.30791979653398, "grad_norm": 1.0326859951019287, "learning_rate": 3.2952174606050242e-06, "loss": 0.0863, "step": 78040 }, { "epoch": 2.308215532028154, "grad_norm": 0.6257308125495911, "learning_rate": 3.295090770686085e-06, "loss": 0.0888, "step": 78050 }, { "epoch": 2.308511267522328, "grad_norm": 0.6688035130500793, "learning_rate": 3.294964080767146e-06, "loss": 0.0873, "step": 78060 }, { "epoch": 2.308807003016502, "grad_norm": 0.47498321533203125, "learning_rate": 3.2948373908482066e-06, "loss": 0.069, "step": 78070 }, { "epoch": 2.309102738510676, "grad_norm": 0.5912192463874817, "learning_rate": 3.294710700929267e-06, "loss": 0.0533, "step": 78080 }, { "epoch": 2.30939847400485, "grad_norm": 0.9165993332862854, "learning_rate": 3.2945840110103278e-06, "loss": 0.0863, "step": 78090 }, { "epoch": 2.3096942094990243, "grad_norm": 0.5745760798454285, "learning_rate": 3.294457321091388e-06, "loss": 0.0757, "step": 78100 }, { "epoch": 2.309989944993198, "grad_norm": 0.5274747610092163, "learning_rate": 3.294330631172449e-06, "loss": 0.089, "step": 78110 }, { "epoch": 2.310285680487372, "grad_norm": 0.5426596999168396, "learning_rate": 3.2942039412535093e-06, "loss": 0.0723, "step": 78120 }, { "epoch": 2.310581415981546, "grad_norm": 0.7285879254341125, "learning_rate": 3.29407725133457e-06, "loss": 0.0653, "step": 78130 }, { "epoch": 2.31087715147572, "grad_norm": 0.6908748745918274, "learning_rate": 3.293950561415631e-06, "loss": 0.0958, "step": 78140 }, { "epoch": 2.311172886969894, "grad_norm": 0.7549419403076172, "learning_rate": 3.2938238714966917e-06, "loss": 0.0823, "step": 78150 }, { "epoch": 2.3114686224640684, "grad_norm": 0.72413569688797, "learning_rate": 3.293697181577752e-06, "loss": 0.0701, "step": 78160 }, { "epoch": 2.3117643579582423, "grad_norm": 1.06563401222229, "learning_rate": 3.293570491658813e-06, "loss": 0.0793, "step": 78170 }, { "epoch": 2.3120600934524163, "grad_norm": 0.5660839080810547, "learning_rate": 3.293443801739873e-06, "loss": 0.0579, "step": 78180 }, { "epoch": 2.31235582894659, "grad_norm": 0.4847647249698639, "learning_rate": 3.293317111820934e-06, "loss": 0.0854, "step": 78190 }, { "epoch": 2.312651564440764, "grad_norm": 0.9085446000099182, "learning_rate": 3.2931904219019943e-06, "loss": 0.0982, "step": 78200 }, { "epoch": 2.312947299934938, "grad_norm": 1.0324313640594482, "learning_rate": 3.293063731983055e-06, "loss": 0.09, "step": 78210 }, { "epoch": 2.313243035429112, "grad_norm": 0.497259259223938, "learning_rate": 3.292937042064116e-06, "loss": 0.0679, "step": 78220 }, { "epoch": 2.3135387709232864, "grad_norm": 0.7252687811851501, "learning_rate": 3.2928103521451767e-06, "loss": 0.0709, "step": 78230 }, { "epoch": 2.3138345064174604, "grad_norm": 0.8782147169113159, "learning_rate": 3.292683662226237e-06, "loss": 0.0894, "step": 78240 }, { "epoch": 2.3141302419116343, "grad_norm": 0.8124037384986877, "learning_rate": 3.292556972307298e-06, "loss": 0.0922, "step": 78250 }, { "epoch": 2.3144259774058082, "grad_norm": 0.7166008353233337, "learning_rate": 3.2924302823883582e-06, "loss": 0.0747, "step": 78260 }, { "epoch": 2.314721712899982, "grad_norm": 0.9728748202323914, "learning_rate": 3.292303592469419e-06, "loss": 0.0785, "step": 78270 }, { "epoch": 2.315017448394156, "grad_norm": 0.7615436911582947, "learning_rate": 3.2921769025504794e-06, "loss": 0.0757, "step": 78280 }, { "epoch": 2.31531318388833, "grad_norm": 0.9508724808692932, "learning_rate": 3.29205021263154e-06, "loss": 0.0742, "step": 78290 }, { "epoch": 2.3156089193825045, "grad_norm": 0.9125702977180481, "learning_rate": 3.291923522712601e-06, "loss": 0.0902, "step": 78300 }, { "epoch": 2.3159046548766784, "grad_norm": 0.4085257649421692, "learning_rate": 3.2917968327936613e-06, "loss": 0.0674, "step": 78310 }, { "epoch": 2.3162003903708523, "grad_norm": 0.49769070744514465, "learning_rate": 3.291670142874722e-06, "loss": 0.0668, "step": 78320 }, { "epoch": 2.3164961258650263, "grad_norm": 1.3107136487960815, "learning_rate": 3.2915434529557825e-06, "loss": 0.0858, "step": 78330 }, { "epoch": 2.3167918613592002, "grad_norm": 1.3894150257110596, "learning_rate": 3.2914167630368433e-06, "loss": 0.0897, "step": 78340 }, { "epoch": 2.317087596853374, "grad_norm": 1.2774983644485474, "learning_rate": 3.2912900731179036e-06, "loss": 0.0915, "step": 78350 }, { "epoch": 2.317383332347548, "grad_norm": 0.649002730846405, "learning_rate": 3.2911633831989644e-06, "loss": 0.058, "step": 78360 }, { "epoch": 2.3176790678417225, "grad_norm": 0.9831129908561707, "learning_rate": 3.2910366932800248e-06, "loss": 0.0711, "step": 78370 }, { "epoch": 2.3179748033358964, "grad_norm": 0.6121321320533752, "learning_rate": 3.290910003361086e-06, "loss": 0.055, "step": 78380 }, { "epoch": 2.3182705388300704, "grad_norm": 0.544263482093811, "learning_rate": 3.2907833134421464e-06, "loss": 0.0935, "step": 78390 }, { "epoch": 2.3185662743242443, "grad_norm": 0.8824122548103333, "learning_rate": 3.290656623523207e-06, "loss": 0.0853, "step": 78400 }, { "epoch": 2.3188620098184183, "grad_norm": 0.8276695013046265, "learning_rate": 3.2905299336042675e-06, "loss": 0.079, "step": 78410 }, { "epoch": 2.3191577453125927, "grad_norm": 0.8214608430862427, "learning_rate": 3.2904032436853283e-06, "loss": 0.0627, "step": 78420 }, { "epoch": 2.3194534808067666, "grad_norm": 0.9811152815818787, "learning_rate": 3.2902765537663887e-06, "loss": 0.0688, "step": 78430 }, { "epoch": 2.3197492163009406, "grad_norm": 0.9234744906425476, "learning_rate": 3.2901498638474495e-06, "loss": 0.0757, "step": 78440 }, { "epoch": 2.3200449517951145, "grad_norm": 0.681748628616333, "learning_rate": 3.29002317392851e-06, "loss": 0.0736, "step": 78450 }, { "epoch": 2.3203406872892884, "grad_norm": 0.6506637930870056, "learning_rate": 3.289896484009571e-06, "loss": 0.0924, "step": 78460 }, { "epoch": 2.3206364227834624, "grad_norm": 0.7827115058898926, "learning_rate": 3.2897697940906314e-06, "loss": 0.068, "step": 78470 }, { "epoch": 2.3209321582776363, "grad_norm": 0.7052584886550903, "learning_rate": 3.289643104171692e-06, "loss": 0.0742, "step": 78480 }, { "epoch": 2.3212278937718107, "grad_norm": 0.8691881895065308, "learning_rate": 3.2895164142527526e-06, "loss": 0.0989, "step": 78490 }, { "epoch": 2.3215236292659847, "grad_norm": 0.9570214152336121, "learning_rate": 3.2893897243338134e-06, "loss": 0.0709, "step": 78500 }, { "epoch": 2.3218193647601586, "grad_norm": 0.939658522605896, "learning_rate": 3.2892630344148737e-06, "loss": 0.0734, "step": 78510 }, { "epoch": 2.3221151002543325, "grad_norm": 0.66437166929245, "learning_rate": 3.2891363444959345e-06, "loss": 0.0744, "step": 78520 }, { "epoch": 2.3224108357485065, "grad_norm": 0.9164813160896301, "learning_rate": 3.289009654576995e-06, "loss": 0.0653, "step": 78530 }, { "epoch": 2.3227065712426804, "grad_norm": 0.6820273399353027, "learning_rate": 3.288882964658056e-06, "loss": 0.0962, "step": 78540 }, { "epoch": 2.3230023067368544, "grad_norm": 1.229796290397644, "learning_rate": 3.2887562747391165e-06, "loss": 0.074, "step": 78550 }, { "epoch": 2.3232980422310288, "grad_norm": 0.5210201144218445, "learning_rate": 3.2886295848201772e-06, "loss": 0.0755, "step": 78560 }, { "epoch": 2.3235937777252027, "grad_norm": 0.646457850933075, "learning_rate": 3.2885028949012376e-06, "loss": 0.0607, "step": 78570 }, { "epoch": 2.3238895132193766, "grad_norm": 1.019680380821228, "learning_rate": 3.2883762049822984e-06, "loss": 0.0716, "step": 78580 }, { "epoch": 2.3241852487135506, "grad_norm": 0.5932077169418335, "learning_rate": 3.2882495150633588e-06, "loss": 0.0799, "step": 78590 }, { "epoch": 2.3244809842077245, "grad_norm": 0.9739411473274231, "learning_rate": 3.2881228251444196e-06, "loss": 0.0726, "step": 78600 }, { "epoch": 2.3247767197018985, "grad_norm": 0.7825993895530701, "learning_rate": 3.28799613522548e-06, "loss": 0.0782, "step": 78610 }, { "epoch": 2.3250724551960724, "grad_norm": 0.34324541687965393, "learning_rate": 3.287869445306541e-06, "loss": 0.0717, "step": 78620 }, { "epoch": 2.325368190690247, "grad_norm": 1.0633642673492432, "learning_rate": 3.2877427553876015e-06, "loss": 0.0843, "step": 78630 }, { "epoch": 2.3256639261844207, "grad_norm": 1.4757401943206787, "learning_rate": 3.2876160654686623e-06, "loss": 0.099, "step": 78640 }, { "epoch": 2.3259596616785947, "grad_norm": 0.7801302075386047, "learning_rate": 3.2874893755497227e-06, "loss": 0.0853, "step": 78650 }, { "epoch": 2.3262553971727686, "grad_norm": 0.4177555739879608, "learning_rate": 3.2873626856307834e-06, "loss": 0.0652, "step": 78660 }, { "epoch": 2.3265511326669426, "grad_norm": 0.9465867280960083, "learning_rate": 3.287235995711844e-06, "loss": 0.06, "step": 78670 }, { "epoch": 2.3268468681611165, "grad_norm": 1.5616474151611328, "learning_rate": 3.2871093057929046e-06, "loss": 0.0762, "step": 78680 }, { "epoch": 2.3271426036552905, "grad_norm": 0.8271903991699219, "learning_rate": 3.286982615873965e-06, "loss": 0.0951, "step": 78690 }, { "epoch": 2.327438339149465, "grad_norm": 0.8871287703514099, "learning_rate": 3.286855925955026e-06, "loss": 0.0689, "step": 78700 }, { "epoch": 2.327734074643639, "grad_norm": 0.8101159930229187, "learning_rate": 3.2867292360360865e-06, "loss": 0.0665, "step": 78710 }, { "epoch": 2.3280298101378127, "grad_norm": 0.8036407232284546, "learning_rate": 3.286602546117147e-06, "loss": 0.0742, "step": 78720 }, { "epoch": 2.3283255456319867, "grad_norm": 0.6671242117881775, "learning_rate": 3.2864758561982077e-06, "loss": 0.0611, "step": 78730 }, { "epoch": 2.3286212811261606, "grad_norm": 0.5835124850273132, "learning_rate": 3.286349166279268e-06, "loss": 0.0892, "step": 78740 }, { "epoch": 2.328917016620335, "grad_norm": 1.013013243675232, "learning_rate": 3.286222476360329e-06, "loss": 0.0786, "step": 78750 }, { "epoch": 2.329212752114509, "grad_norm": 0.7073287963867188, "learning_rate": 3.2860957864413892e-06, "loss": 0.0726, "step": 78760 }, { "epoch": 2.329508487608683, "grad_norm": 0.6211012601852417, "learning_rate": 3.28596909652245e-06, "loss": 0.069, "step": 78770 }, { "epoch": 2.329804223102857, "grad_norm": 1.184808373451233, "learning_rate": 3.285842406603511e-06, "loss": 0.0709, "step": 78780 }, { "epoch": 2.330099958597031, "grad_norm": 0.5051703453063965, "learning_rate": 3.2857157166845716e-06, "loss": 0.0706, "step": 78790 }, { "epoch": 2.3303956940912047, "grad_norm": 1.1416516304016113, "learning_rate": 3.285589026765632e-06, "loss": 0.079, "step": 78800 }, { "epoch": 2.3306914295853787, "grad_norm": 0.6130198836326599, "learning_rate": 3.2854623368466927e-06, "loss": 0.0745, "step": 78810 }, { "epoch": 2.330987165079553, "grad_norm": 0.7379502654075623, "learning_rate": 3.285335646927753e-06, "loss": 0.0748, "step": 78820 }, { "epoch": 2.331282900573727, "grad_norm": 0.8325877785682678, "learning_rate": 3.285208957008814e-06, "loss": 0.0636, "step": 78830 }, { "epoch": 2.331578636067901, "grad_norm": 0.9677477478981018, "learning_rate": 3.2850822670898743e-06, "loss": 0.0911, "step": 78840 }, { "epoch": 2.331874371562075, "grad_norm": 1.0377259254455566, "learning_rate": 3.284955577170935e-06, "loss": 0.079, "step": 78850 }, { "epoch": 2.332170107056249, "grad_norm": 0.8067598938941956, "learning_rate": 3.284828887251996e-06, "loss": 0.0826, "step": 78860 }, { "epoch": 2.3324658425504228, "grad_norm": 0.43523842096328735, "learning_rate": 3.2847021973330566e-06, "loss": 0.0664, "step": 78870 }, { "epoch": 2.3327615780445967, "grad_norm": 1.4191994667053223, "learning_rate": 3.284575507414117e-06, "loss": 0.0666, "step": 78880 }, { "epoch": 2.333057313538771, "grad_norm": 0.6802717447280884, "learning_rate": 3.284448817495178e-06, "loss": 0.0786, "step": 78890 }, { "epoch": 2.333353049032945, "grad_norm": 1.0016976594924927, "learning_rate": 3.284322127576238e-06, "loss": 0.093, "step": 78900 }, { "epoch": 2.333648784527119, "grad_norm": 0.9734673500061035, "learning_rate": 3.284195437657299e-06, "loss": 0.07, "step": 78910 }, { "epoch": 2.333944520021293, "grad_norm": 0.6929900050163269, "learning_rate": 3.2840687477383593e-06, "loss": 0.0658, "step": 78920 }, { "epoch": 2.334240255515467, "grad_norm": 0.7952291965484619, "learning_rate": 3.28394205781942e-06, "loss": 0.0745, "step": 78930 }, { "epoch": 2.334535991009641, "grad_norm": 0.7857101559638977, "learning_rate": 3.283815367900481e-06, "loss": 0.0827, "step": 78940 }, { "epoch": 2.3348317265038148, "grad_norm": 1.0202152729034424, "learning_rate": 3.2836886779815417e-06, "loss": 0.0919, "step": 78950 }, { "epoch": 2.335127461997989, "grad_norm": 1.7196273803710938, "learning_rate": 3.283561988062602e-06, "loss": 0.0835, "step": 78960 }, { "epoch": 2.335423197492163, "grad_norm": 0.48662900924682617, "learning_rate": 3.283435298143663e-06, "loss": 0.0755, "step": 78970 }, { "epoch": 2.335718932986337, "grad_norm": 0.8930050730705261, "learning_rate": 3.283308608224723e-06, "loss": 0.0764, "step": 78980 }, { "epoch": 2.336014668480511, "grad_norm": 0.9269769787788391, "learning_rate": 3.283181918305784e-06, "loss": 0.0974, "step": 78990 }, { "epoch": 2.336310403974685, "grad_norm": 0.47298839688301086, "learning_rate": 3.2830552283868444e-06, "loss": 0.0834, "step": 79000 }, { "epoch": 2.336606139468859, "grad_norm": 0.5790307521820068, "learning_rate": 3.282928538467905e-06, "loss": 0.081, "step": 79010 }, { "epoch": 2.3369018749630333, "grad_norm": 0.7416173815727234, "learning_rate": 3.282801848548966e-06, "loss": 0.0699, "step": 79020 }, { "epoch": 2.337197610457207, "grad_norm": 0.7220419049263, "learning_rate": 3.2826751586300267e-06, "loss": 0.0681, "step": 79030 }, { "epoch": 2.337493345951381, "grad_norm": 1.2013468742370605, "learning_rate": 3.282548468711087e-06, "loss": 0.0811, "step": 79040 }, { "epoch": 2.337789081445555, "grad_norm": 0.6613538861274719, "learning_rate": 3.282421778792148e-06, "loss": 0.085, "step": 79050 }, { "epoch": 2.338084816939729, "grad_norm": 0.7325860261917114, "learning_rate": 3.2822950888732082e-06, "loss": 0.0611, "step": 79060 }, { "epoch": 2.338380552433903, "grad_norm": 0.6587759852409363, "learning_rate": 3.282168398954269e-06, "loss": 0.0683, "step": 79070 }, { "epoch": 2.3386762879280774, "grad_norm": 0.9603439569473267, "learning_rate": 3.2820417090353294e-06, "loss": 0.0597, "step": 79080 }, { "epoch": 2.3389720234222513, "grad_norm": 0.7457214593887329, "learning_rate": 3.28191501911639e-06, "loss": 0.0821, "step": 79090 }, { "epoch": 2.3392677589164252, "grad_norm": 0.8991216421127319, "learning_rate": 3.281788329197451e-06, "loss": 0.1025, "step": 79100 }, { "epoch": 2.339563494410599, "grad_norm": 1.0715956687927246, "learning_rate": 3.2816616392785118e-06, "loss": 0.0747, "step": 79110 }, { "epoch": 2.339859229904773, "grad_norm": 0.7938645482063293, "learning_rate": 3.281534949359572e-06, "loss": 0.0593, "step": 79120 }, { "epoch": 2.340154965398947, "grad_norm": 0.6069481372833252, "learning_rate": 3.2814082594406325e-06, "loss": 0.0742, "step": 79130 }, { "epoch": 2.340450700893121, "grad_norm": 1.5929038524627686, "learning_rate": 3.2812815695216933e-06, "loss": 0.1039, "step": 79140 }, { "epoch": 2.3407464363872954, "grad_norm": 0.8826651573181152, "learning_rate": 3.2811548796027537e-06, "loss": 0.0899, "step": 79150 }, { "epoch": 2.3410421718814693, "grad_norm": 0.556176483631134, "learning_rate": 3.2810281896838145e-06, "loss": 0.0701, "step": 79160 }, { "epoch": 2.3413379073756433, "grad_norm": 0.7193285226821899, "learning_rate": 3.280901499764875e-06, "loss": 0.069, "step": 79170 }, { "epoch": 2.3416336428698172, "grad_norm": 0.6775873899459839, "learning_rate": 3.280774809845936e-06, "loss": 0.061, "step": 79180 }, { "epoch": 2.341929378363991, "grad_norm": 0.7461833953857422, "learning_rate": 3.2806481199269964e-06, "loss": 0.0878, "step": 79190 }, { "epoch": 2.342225113858165, "grad_norm": 0.8385935425758362, "learning_rate": 3.280521430008057e-06, "loss": 0.088, "step": 79200 }, { "epoch": 2.342520849352339, "grad_norm": 0.649886965751648, "learning_rate": 3.2803947400891176e-06, "loss": 0.0732, "step": 79210 }, { "epoch": 2.3428165848465135, "grad_norm": 0.7321672439575195, "learning_rate": 3.2802680501701783e-06, "loss": 0.0836, "step": 79220 }, { "epoch": 2.3431123203406874, "grad_norm": 1.2029893398284912, "learning_rate": 3.2801413602512387e-06, "loss": 0.0543, "step": 79230 }, { "epoch": 2.3434080558348613, "grad_norm": 0.8535735011100769, "learning_rate": 3.2800146703322995e-06, "loss": 0.0772, "step": 79240 }, { "epoch": 2.3437037913290353, "grad_norm": 0.5353856682777405, "learning_rate": 3.27988798041336e-06, "loss": 0.0683, "step": 79250 }, { "epoch": 2.3439995268232092, "grad_norm": 0.8427966833114624, "learning_rate": 3.279761290494421e-06, "loss": 0.0862, "step": 79260 }, { "epoch": 2.344295262317383, "grad_norm": 0.7140282988548279, "learning_rate": 3.2796346005754814e-06, "loss": 0.0815, "step": 79270 }, { "epoch": 2.344590997811557, "grad_norm": 0.8992099761962891, "learning_rate": 3.2795079106565422e-06, "loss": 0.0859, "step": 79280 }, { "epoch": 2.3448867333057315, "grad_norm": 0.9509099721908569, "learning_rate": 3.2793812207376026e-06, "loss": 0.0929, "step": 79290 }, { "epoch": 2.3451824687999054, "grad_norm": 0.6199519634246826, "learning_rate": 3.2792545308186634e-06, "loss": 0.0804, "step": 79300 }, { "epoch": 2.3454782042940794, "grad_norm": 0.6139047741889954, "learning_rate": 3.2791278408997238e-06, "loss": 0.0689, "step": 79310 }, { "epoch": 2.3457739397882533, "grad_norm": 0.8705029487609863, "learning_rate": 3.2790011509807845e-06, "loss": 0.0675, "step": 79320 }, { "epoch": 2.3460696752824273, "grad_norm": 1.0827667713165283, "learning_rate": 3.278874461061845e-06, "loss": 0.0597, "step": 79330 }, { "epoch": 2.3463654107766017, "grad_norm": 0.8882189393043518, "learning_rate": 3.278747771142906e-06, "loss": 0.0933, "step": 79340 }, { "epoch": 2.3466611462707756, "grad_norm": 0.981380045413971, "learning_rate": 3.2786210812239665e-06, "loss": 0.0907, "step": 79350 }, { "epoch": 2.3469568817649495, "grad_norm": 0.9548531770706177, "learning_rate": 3.2784943913050273e-06, "loss": 0.0722, "step": 79360 }, { "epoch": 2.3472526172591235, "grad_norm": 0.928717315196991, "learning_rate": 3.2783677013860876e-06, "loss": 0.0705, "step": 79370 }, { "epoch": 2.3475483527532974, "grad_norm": 0.888157844543457, "learning_rate": 3.2782410114671484e-06, "loss": 0.0682, "step": 79380 }, { "epoch": 2.3478440882474714, "grad_norm": 0.8469278812408447, "learning_rate": 3.278114321548209e-06, "loss": 0.077, "step": 79390 }, { "epoch": 2.3481398237416453, "grad_norm": 0.9544640183448792, "learning_rate": 3.2779876316292696e-06, "loss": 0.0865, "step": 79400 }, { "epoch": 2.3484355592358197, "grad_norm": 0.7553333640098572, "learning_rate": 3.27786094171033e-06, "loss": 0.079, "step": 79410 }, { "epoch": 2.3487312947299936, "grad_norm": 0.9652346968650818, "learning_rate": 3.277734251791391e-06, "loss": 0.0699, "step": 79420 }, { "epoch": 2.3490270302241676, "grad_norm": 0.9291048645973206, "learning_rate": 3.2776075618724515e-06, "loss": 0.065, "step": 79430 }, { "epoch": 2.3493227657183415, "grad_norm": 0.9804145693778992, "learning_rate": 3.2774808719535123e-06, "loss": 0.0954, "step": 79440 }, { "epoch": 2.3496185012125155, "grad_norm": 0.8277712464332581, "learning_rate": 3.2773541820345727e-06, "loss": 0.0839, "step": 79450 }, { "epoch": 2.3499142367066894, "grad_norm": 0.8628600239753723, "learning_rate": 3.2772274921156335e-06, "loss": 0.0863, "step": 79460 }, { "epoch": 2.3502099722008634, "grad_norm": 0.6701633930206299, "learning_rate": 3.277100802196694e-06, "loss": 0.0568, "step": 79470 }, { "epoch": 2.3505057076950377, "grad_norm": 0.7009729743003845, "learning_rate": 3.2769741122777546e-06, "loss": 0.0768, "step": 79480 }, { "epoch": 2.3508014431892117, "grad_norm": 1.0582568645477295, "learning_rate": 3.276847422358815e-06, "loss": 0.0858, "step": 79490 }, { "epoch": 2.3510971786833856, "grad_norm": 0.7352052330970764, "learning_rate": 3.276720732439876e-06, "loss": 0.0763, "step": 79500 }, { "epoch": 2.3513929141775596, "grad_norm": 0.7646552324295044, "learning_rate": 3.2765940425209366e-06, "loss": 0.0812, "step": 79510 }, { "epoch": 2.3516886496717335, "grad_norm": 0.6711429953575134, "learning_rate": 3.2764673526019974e-06, "loss": 0.074, "step": 79520 }, { "epoch": 2.3519843851659075, "grad_norm": 0.7674005627632141, "learning_rate": 3.2763406626830577e-06, "loss": 0.0606, "step": 79530 }, { "epoch": 2.3522801206600814, "grad_norm": 0.9788093566894531, "learning_rate": 3.2762139727641185e-06, "loss": 0.0945, "step": 79540 }, { "epoch": 2.352575856154256, "grad_norm": 0.9708129167556763, "learning_rate": 3.276087282845179e-06, "loss": 0.0796, "step": 79550 }, { "epoch": 2.3528715916484297, "grad_norm": 1.0715553760528564, "learning_rate": 3.2759605929262393e-06, "loss": 0.0978, "step": 79560 }, { "epoch": 2.3531673271426037, "grad_norm": 1.6565608978271484, "learning_rate": 3.2758339030073e-06, "loss": 0.0682, "step": 79570 }, { "epoch": 2.3534630626367776, "grad_norm": 0.6972378492355347, "learning_rate": 3.275707213088361e-06, "loss": 0.0683, "step": 79580 }, { "epoch": 2.3537587981309516, "grad_norm": 0.7625640034675598, "learning_rate": 3.2755805231694216e-06, "loss": 0.1183, "step": 79590 }, { "epoch": 2.3540545336251255, "grad_norm": 1.700161099433899, "learning_rate": 3.275453833250482e-06, "loss": 0.0839, "step": 79600 }, { "epoch": 2.3543502691192995, "grad_norm": 0.8302429914474487, "learning_rate": 3.2753271433315428e-06, "loss": 0.0706, "step": 79610 }, { "epoch": 2.354646004613474, "grad_norm": 0.8765214085578918, "learning_rate": 3.275200453412603e-06, "loss": 0.0691, "step": 79620 }, { "epoch": 2.354941740107648, "grad_norm": 0.7563633322715759, "learning_rate": 3.275073763493664e-06, "loss": 0.0576, "step": 79630 }, { "epoch": 2.3552374756018217, "grad_norm": 0.6224713921546936, "learning_rate": 3.2749470735747243e-06, "loss": 0.0887, "step": 79640 }, { "epoch": 2.3555332110959957, "grad_norm": 0.6342236995697021, "learning_rate": 3.274820383655785e-06, "loss": 0.1111, "step": 79650 }, { "epoch": 2.3558289465901696, "grad_norm": 0.7410433888435364, "learning_rate": 3.274693693736846e-06, "loss": 0.0686, "step": 79660 }, { "epoch": 2.356124682084344, "grad_norm": 0.5112819075584412, "learning_rate": 3.2745670038179067e-06, "loss": 0.0706, "step": 79670 }, { "epoch": 2.356420417578518, "grad_norm": 0.8291999101638794, "learning_rate": 3.274440313898967e-06, "loss": 0.0691, "step": 79680 }, { "epoch": 2.356716153072692, "grad_norm": 0.712076723575592, "learning_rate": 3.274313623980028e-06, "loss": 0.0721, "step": 79690 }, { "epoch": 2.357011888566866, "grad_norm": 0.9653681516647339, "learning_rate": 3.274186934061088e-06, "loss": 0.085, "step": 79700 }, { "epoch": 2.3573076240610398, "grad_norm": 0.8926113247871399, "learning_rate": 3.274060244142149e-06, "loss": 0.0765, "step": 79710 }, { "epoch": 2.3576033595552137, "grad_norm": 0.5991691946983337, "learning_rate": 3.2739335542232093e-06, "loss": 0.0724, "step": 79720 }, { "epoch": 2.3578990950493877, "grad_norm": 1.0475443601608276, "learning_rate": 3.27380686430427e-06, "loss": 0.0627, "step": 79730 }, { "epoch": 2.358194830543562, "grad_norm": 0.8524516224861145, "learning_rate": 3.273680174385331e-06, "loss": 0.0664, "step": 79740 }, { "epoch": 2.358490566037736, "grad_norm": 1.1791239976882935, "learning_rate": 3.2735534844663917e-06, "loss": 0.0998, "step": 79750 }, { "epoch": 2.35878630153191, "grad_norm": 0.6104037761688232, "learning_rate": 3.273426794547452e-06, "loss": 0.0615, "step": 79760 }, { "epoch": 2.359082037026084, "grad_norm": 1.0250433683395386, "learning_rate": 3.273300104628513e-06, "loss": 0.075, "step": 79770 }, { "epoch": 2.359377772520258, "grad_norm": 1.017089605331421, "learning_rate": 3.2731734147095732e-06, "loss": 0.0729, "step": 79780 }, { "epoch": 2.3596735080144318, "grad_norm": 0.6631885766983032, "learning_rate": 3.273046724790634e-06, "loss": 0.0733, "step": 79790 }, { "epoch": 2.3599692435086057, "grad_norm": 0.9180448651313782, "learning_rate": 3.2729200348716944e-06, "loss": 0.0946, "step": 79800 }, { "epoch": 2.36026497900278, "grad_norm": 0.6947911977767944, "learning_rate": 3.272793344952755e-06, "loss": 0.0829, "step": 79810 }, { "epoch": 2.360560714496954, "grad_norm": 0.694859504699707, "learning_rate": 3.272666655033816e-06, "loss": 0.0654, "step": 79820 }, { "epoch": 2.360856449991128, "grad_norm": 1.2358285188674927, "learning_rate": 3.2725399651148768e-06, "loss": 0.0861, "step": 79830 }, { "epoch": 2.361152185485302, "grad_norm": 0.48873794078826904, "learning_rate": 3.272413275195937e-06, "loss": 0.0707, "step": 79840 }, { "epoch": 2.361447920979476, "grad_norm": 1.5572593212127686, "learning_rate": 3.272286585276998e-06, "loss": 0.0913, "step": 79850 }, { "epoch": 2.36174365647365, "grad_norm": 0.8130329847335815, "learning_rate": 3.2721598953580583e-06, "loss": 0.0834, "step": 79860 }, { "epoch": 2.3620393919678238, "grad_norm": 0.9168672561645508, "learning_rate": 3.272033205439119e-06, "loss": 0.0772, "step": 79870 }, { "epoch": 2.362335127461998, "grad_norm": 0.5261618494987488, "learning_rate": 3.2719065155201794e-06, "loss": 0.0659, "step": 79880 }, { "epoch": 2.362630862956172, "grad_norm": 0.6885831356048584, "learning_rate": 3.2717798256012402e-06, "loss": 0.1057, "step": 79890 }, { "epoch": 2.362926598450346, "grad_norm": 0.6693474054336548, "learning_rate": 3.271653135682301e-06, "loss": 0.0778, "step": 79900 }, { "epoch": 2.36322233394452, "grad_norm": 0.6993433237075806, "learning_rate": 3.271526445763362e-06, "loss": 0.0863, "step": 79910 }, { "epoch": 2.363518069438694, "grad_norm": 1.5564868450164795, "learning_rate": 3.271399755844422e-06, "loss": 0.064, "step": 79920 }, { "epoch": 2.363813804932868, "grad_norm": 0.6878409385681152, "learning_rate": 3.271273065925483e-06, "loss": 0.0677, "step": 79930 }, { "epoch": 2.3641095404270422, "grad_norm": 0.7321294546127319, "learning_rate": 3.2711463760065433e-06, "loss": 0.0948, "step": 79940 }, { "epoch": 2.364405275921216, "grad_norm": 0.8973638415336609, "learning_rate": 3.271019686087604e-06, "loss": 0.0793, "step": 79950 }, { "epoch": 2.36470101141539, "grad_norm": 0.5909472703933716, "learning_rate": 3.2708929961686645e-06, "loss": 0.0826, "step": 79960 }, { "epoch": 2.364996746909564, "grad_norm": 0.5769582390785217, "learning_rate": 3.270766306249725e-06, "loss": 0.0748, "step": 79970 }, { "epoch": 2.365292482403738, "grad_norm": 0.8072920441627502, "learning_rate": 3.270639616330786e-06, "loss": 0.0763, "step": 79980 }, { "epoch": 2.365588217897912, "grad_norm": 0.7760341167449951, "learning_rate": 3.2705129264118464e-06, "loss": 0.0862, "step": 79990 }, { "epoch": 2.3658839533920863, "grad_norm": 0.6394952535629272, "learning_rate": 3.2703862364929072e-06, "loss": 0.0852, "step": 80000 }, { "epoch": 2.3661796888862603, "grad_norm": 0.8971005082130432, "learning_rate": 3.2702595465739676e-06, "loss": 0.0812, "step": 80010 }, { "epoch": 2.3664754243804342, "grad_norm": 0.580150842666626, "learning_rate": 3.2701328566550284e-06, "loss": 0.0664, "step": 80020 }, { "epoch": 2.366771159874608, "grad_norm": 0.9677460193634033, "learning_rate": 3.2700061667360887e-06, "loss": 0.0726, "step": 80030 }, { "epoch": 2.367066895368782, "grad_norm": 0.9356650114059448, "learning_rate": 3.2698794768171495e-06, "loss": 0.0862, "step": 80040 }, { "epoch": 2.367362630862956, "grad_norm": 0.5466968417167664, "learning_rate": 3.26975278689821e-06, "loss": 0.0853, "step": 80050 }, { "epoch": 2.36765836635713, "grad_norm": 1.4409723281860352, "learning_rate": 3.269626096979271e-06, "loss": 0.081, "step": 80060 }, { "epoch": 2.3679541018513044, "grad_norm": 1.0640238523483276, "learning_rate": 3.2694994070603315e-06, "loss": 0.0685, "step": 80070 }, { "epoch": 2.3682498373454783, "grad_norm": 0.7049176692962646, "learning_rate": 3.2693727171413923e-06, "loss": 0.0759, "step": 80080 }, { "epoch": 2.3685455728396523, "grad_norm": 0.8369069695472717, "learning_rate": 3.2692460272224526e-06, "loss": 0.0969, "step": 80090 }, { "epoch": 2.3688413083338262, "grad_norm": 0.6086422801017761, "learning_rate": 3.2691193373035134e-06, "loss": 0.0726, "step": 80100 }, { "epoch": 2.369137043828, "grad_norm": 0.943330705165863, "learning_rate": 3.2689926473845738e-06, "loss": 0.0749, "step": 80110 }, { "epoch": 2.369432779322174, "grad_norm": 0.7916920185089111, "learning_rate": 3.2688659574656346e-06, "loss": 0.0683, "step": 80120 }, { "epoch": 2.369728514816348, "grad_norm": 0.73052579164505, "learning_rate": 3.268739267546695e-06, "loss": 0.0616, "step": 80130 }, { "epoch": 2.3700242503105224, "grad_norm": 0.8552947044372559, "learning_rate": 3.268612577627756e-06, "loss": 0.08, "step": 80140 }, { "epoch": 2.3703199858046964, "grad_norm": 1.266474962234497, "learning_rate": 3.2684858877088165e-06, "loss": 0.0787, "step": 80150 }, { "epoch": 2.3706157212988703, "grad_norm": 0.7781624794006348, "learning_rate": 3.2683591977898773e-06, "loss": 0.0595, "step": 80160 }, { "epoch": 2.3709114567930443, "grad_norm": 0.6909212470054626, "learning_rate": 3.2682325078709377e-06, "loss": 0.065, "step": 80170 }, { "epoch": 2.371207192287218, "grad_norm": 0.6822530627250671, "learning_rate": 3.2681058179519985e-06, "loss": 0.0592, "step": 80180 }, { "epoch": 2.371502927781392, "grad_norm": 1.5525703430175781, "learning_rate": 3.267979128033059e-06, "loss": 0.084, "step": 80190 }, { "epoch": 2.371798663275566, "grad_norm": 0.8941861391067505, "learning_rate": 3.2678524381141196e-06, "loss": 0.0909, "step": 80200 }, { "epoch": 2.3720943987697405, "grad_norm": 0.9585095047950745, "learning_rate": 3.26772574819518e-06, "loss": 0.0756, "step": 80210 }, { "epoch": 2.3723901342639144, "grad_norm": 0.4943820834159851, "learning_rate": 3.267599058276241e-06, "loss": 0.0937, "step": 80220 }, { "epoch": 2.3726858697580884, "grad_norm": 0.9825859069824219, "learning_rate": 3.2674723683573016e-06, "loss": 0.0625, "step": 80230 }, { "epoch": 2.3729816052522623, "grad_norm": 0.9321302771568298, "learning_rate": 3.2673456784383624e-06, "loss": 0.0764, "step": 80240 }, { "epoch": 2.3732773407464363, "grad_norm": 0.9741007685661316, "learning_rate": 3.2672189885194227e-06, "loss": 0.0889, "step": 80250 }, { "epoch": 2.3735730762406106, "grad_norm": 0.6677332520484924, "learning_rate": 3.2670922986004835e-06, "loss": 0.0855, "step": 80260 }, { "epoch": 2.3738688117347846, "grad_norm": 0.7217801809310913, "learning_rate": 3.266965608681544e-06, "loss": 0.0685, "step": 80270 }, { "epoch": 2.3741645472289585, "grad_norm": 0.9240152835845947, "learning_rate": 3.2668389187626047e-06, "loss": 0.0762, "step": 80280 }, { "epoch": 2.3744602827231325, "grad_norm": 0.859984815120697, "learning_rate": 3.266712228843665e-06, "loss": 0.0835, "step": 80290 }, { "epoch": 2.3747560182173064, "grad_norm": 0.7022321820259094, "learning_rate": 3.2665855389247262e-06, "loss": 0.0731, "step": 80300 }, { "epoch": 2.3750517537114804, "grad_norm": 0.7121326923370361, "learning_rate": 3.2664588490057866e-06, "loss": 0.0814, "step": 80310 }, { "epoch": 2.3753474892056543, "grad_norm": 0.6938979029655457, "learning_rate": 3.2663321590868474e-06, "loss": 0.0734, "step": 80320 }, { "epoch": 2.3756432246998287, "grad_norm": 0.6305587291717529, "learning_rate": 3.2662054691679078e-06, "loss": 0.0569, "step": 80330 }, { "epoch": 2.3759389601940026, "grad_norm": 1.8963252305984497, "learning_rate": 3.2660787792489686e-06, "loss": 0.0886, "step": 80340 }, { "epoch": 2.3762346956881766, "grad_norm": 0.8549871444702148, "learning_rate": 3.265952089330029e-06, "loss": 0.0993, "step": 80350 }, { "epoch": 2.3765304311823505, "grad_norm": 0.7959296107292175, "learning_rate": 3.2658253994110897e-06, "loss": 0.0818, "step": 80360 }, { "epoch": 2.3768261666765245, "grad_norm": 1.6762969493865967, "learning_rate": 3.26569870949215e-06, "loss": 0.0687, "step": 80370 }, { "epoch": 2.3771219021706984, "grad_norm": 1.0449702739715576, "learning_rate": 3.265572019573211e-06, "loss": 0.0783, "step": 80380 }, { "epoch": 2.3774176376648724, "grad_norm": 0.9123631715774536, "learning_rate": 3.2654453296542717e-06, "loss": 0.0901, "step": 80390 }, { "epoch": 2.3777133731590467, "grad_norm": 0.7366774082183838, "learning_rate": 3.265318639735332e-06, "loss": 0.0783, "step": 80400 }, { "epoch": 2.3780091086532207, "grad_norm": 0.5763874053955078, "learning_rate": 3.265191949816393e-06, "loss": 0.0735, "step": 80410 }, { "epoch": 2.3783048441473946, "grad_norm": 1.0213795900344849, "learning_rate": 3.265065259897453e-06, "loss": 0.0686, "step": 80420 }, { "epoch": 2.3786005796415686, "grad_norm": 0.9003778696060181, "learning_rate": 3.264938569978514e-06, "loss": 0.0685, "step": 80430 }, { "epoch": 2.3788963151357425, "grad_norm": 0.8283786177635193, "learning_rate": 3.2648118800595743e-06, "loss": 0.084, "step": 80440 }, { "epoch": 2.3791920506299165, "grad_norm": 0.8429985642433167, "learning_rate": 3.264685190140635e-06, "loss": 0.0832, "step": 80450 }, { "epoch": 2.3794877861240904, "grad_norm": 0.7387670874595642, "learning_rate": 3.264558500221696e-06, "loss": 0.0717, "step": 80460 }, { "epoch": 2.379783521618265, "grad_norm": 0.7979739904403687, "learning_rate": 3.2644318103027567e-06, "loss": 0.0726, "step": 80470 }, { "epoch": 2.3800792571124387, "grad_norm": 0.8443391919136047, "learning_rate": 3.264305120383817e-06, "loss": 0.0629, "step": 80480 }, { "epoch": 2.3803749926066127, "grad_norm": 0.5721583962440491, "learning_rate": 3.264178430464878e-06, "loss": 0.0874, "step": 80490 }, { "epoch": 2.3806707281007866, "grad_norm": 0.67957603931427, "learning_rate": 3.2640517405459382e-06, "loss": 0.0868, "step": 80500 }, { "epoch": 2.3809664635949606, "grad_norm": 1.1556916236877441, "learning_rate": 3.263925050626999e-06, "loss": 0.0724, "step": 80510 }, { "epoch": 2.3812621990891345, "grad_norm": 7.4414472579956055, "learning_rate": 3.2637983607080594e-06, "loss": 0.0683, "step": 80520 }, { "epoch": 2.3815579345833084, "grad_norm": 1.0386526584625244, "learning_rate": 3.26367167078912e-06, "loss": 0.0924, "step": 80530 }, { "epoch": 2.381853670077483, "grad_norm": 1.1672089099884033, "learning_rate": 3.263544980870181e-06, "loss": 0.0787, "step": 80540 }, { "epoch": 2.3821494055716568, "grad_norm": 0.6176676750183105, "learning_rate": 3.2634182909512417e-06, "loss": 0.0697, "step": 80550 }, { "epoch": 2.3824451410658307, "grad_norm": 0.6513113379478455, "learning_rate": 3.263291601032302e-06, "loss": 0.0805, "step": 80560 }, { "epoch": 2.3827408765600047, "grad_norm": 0.5797874927520752, "learning_rate": 3.263164911113363e-06, "loss": 0.0555, "step": 80570 }, { "epoch": 2.3830366120541786, "grad_norm": 0.6562490463256836, "learning_rate": 3.2630382211944233e-06, "loss": 0.0762, "step": 80580 }, { "epoch": 2.383332347548353, "grad_norm": 0.8069260716438293, "learning_rate": 3.262911531275484e-06, "loss": 0.0725, "step": 80590 }, { "epoch": 2.383628083042527, "grad_norm": 0.6602210402488708, "learning_rate": 3.2627848413565444e-06, "loss": 0.0756, "step": 80600 }, { "epoch": 2.383923818536701, "grad_norm": 0.5084828734397888, "learning_rate": 3.262658151437605e-06, "loss": 0.0832, "step": 80610 }, { "epoch": 2.384219554030875, "grad_norm": 0.6308945417404175, "learning_rate": 3.262531461518666e-06, "loss": 0.0837, "step": 80620 }, { "epoch": 2.3845152895250488, "grad_norm": 0.7455193400382996, "learning_rate": 3.262404771599727e-06, "loss": 0.0534, "step": 80630 }, { "epoch": 2.3848110250192227, "grad_norm": 0.40047672390937805, "learning_rate": 3.262278081680787e-06, "loss": 0.0803, "step": 80640 }, { "epoch": 2.3851067605133967, "grad_norm": 0.575968861579895, "learning_rate": 3.262151391761848e-06, "loss": 0.0925, "step": 80650 }, { "epoch": 2.385402496007571, "grad_norm": 0.7546083927154541, "learning_rate": 3.2620247018429083e-06, "loss": 0.0853, "step": 80660 }, { "epoch": 2.385698231501745, "grad_norm": 0.75453782081604, "learning_rate": 3.261898011923969e-06, "loss": 0.0495, "step": 80670 }, { "epoch": 2.385993966995919, "grad_norm": 1.010976791381836, "learning_rate": 3.2617713220050295e-06, "loss": 0.0631, "step": 80680 }, { "epoch": 2.386289702490093, "grad_norm": 0.7628724575042725, "learning_rate": 3.2616446320860903e-06, "loss": 0.0749, "step": 80690 }, { "epoch": 2.386585437984267, "grad_norm": 1.0667227506637573, "learning_rate": 3.261517942167151e-06, "loss": 0.0959, "step": 80700 }, { "epoch": 2.3868811734784408, "grad_norm": 0.6230002641677856, "learning_rate": 3.261391252248212e-06, "loss": 0.0669, "step": 80710 }, { "epoch": 2.3871769089726147, "grad_norm": 0.3746315538883209, "learning_rate": 3.261264562329272e-06, "loss": 0.0816, "step": 80720 }, { "epoch": 2.387472644466789, "grad_norm": 1.133162021636963, "learning_rate": 3.261137872410333e-06, "loss": 0.0709, "step": 80730 }, { "epoch": 2.387768379960963, "grad_norm": 0.6916379928588867, "learning_rate": 3.2610111824913934e-06, "loss": 0.0858, "step": 80740 }, { "epoch": 2.388064115455137, "grad_norm": 0.9031664729118347, "learning_rate": 3.260884492572454e-06, "loss": 0.0917, "step": 80750 }, { "epoch": 2.388359850949311, "grad_norm": 1.0892151594161987, "learning_rate": 3.2607578026535145e-06, "loss": 0.0869, "step": 80760 }, { "epoch": 2.388655586443485, "grad_norm": 1.2776027917861938, "learning_rate": 3.2606311127345753e-06, "loss": 0.0743, "step": 80770 }, { "epoch": 2.388951321937659, "grad_norm": 0.848793089389801, "learning_rate": 3.260504422815636e-06, "loss": 0.0678, "step": 80780 }, { "epoch": 2.3892470574318327, "grad_norm": 1.0696362257003784, "learning_rate": 3.2603777328966965e-06, "loss": 0.0925, "step": 80790 }, { "epoch": 2.389542792926007, "grad_norm": 0.6402137875556946, "learning_rate": 3.2602510429777572e-06, "loss": 0.0615, "step": 80800 }, { "epoch": 2.389838528420181, "grad_norm": 0.7465853095054626, "learning_rate": 3.2601243530588176e-06, "loss": 0.0839, "step": 80810 }, { "epoch": 2.390134263914355, "grad_norm": 0.5587236881256104, "learning_rate": 3.2599976631398784e-06, "loss": 0.084, "step": 80820 }, { "epoch": 2.390429999408529, "grad_norm": 0.7965142130851746, "learning_rate": 3.2598709732209388e-06, "loss": 0.0661, "step": 80830 }, { "epoch": 2.390725734902703, "grad_norm": 0.7259973883628845, "learning_rate": 3.2597442833019996e-06, "loss": 0.0836, "step": 80840 }, { "epoch": 2.391021470396877, "grad_norm": 0.7362650036811829, "learning_rate": 3.25961759338306e-06, "loss": 0.0817, "step": 80850 }, { "epoch": 2.3913172058910512, "grad_norm": 0.4857375919818878, "learning_rate": 3.259490903464121e-06, "loss": 0.0716, "step": 80860 }, { "epoch": 2.391612941385225, "grad_norm": 0.8417854905128479, "learning_rate": 3.2593642135451815e-06, "loss": 0.0784, "step": 80870 }, { "epoch": 2.391908676879399, "grad_norm": 0.6984581351280212, "learning_rate": 3.2592375236262423e-06, "loss": 0.0833, "step": 80880 }, { "epoch": 2.392204412373573, "grad_norm": 0.5980466604232788, "learning_rate": 3.2591108337073027e-06, "loss": 0.0733, "step": 80890 }, { "epoch": 2.392500147867747, "grad_norm": 0.9062715172767639, "learning_rate": 3.2589841437883634e-06, "loss": 0.083, "step": 80900 }, { "epoch": 2.392795883361921, "grad_norm": 1.6577868461608887, "learning_rate": 3.258857453869424e-06, "loss": 0.0625, "step": 80910 }, { "epoch": 2.3930916188560953, "grad_norm": 0.7831593155860901, "learning_rate": 3.2587307639504846e-06, "loss": 0.0667, "step": 80920 }, { "epoch": 2.3933873543502693, "grad_norm": 1.0808000564575195, "learning_rate": 3.258604074031545e-06, "loss": 0.0666, "step": 80930 }, { "epoch": 2.3936830898444432, "grad_norm": 1.8375775814056396, "learning_rate": 3.258477384112606e-06, "loss": 0.0977, "step": 80940 }, { "epoch": 2.393978825338617, "grad_norm": 0.6911044120788574, "learning_rate": 3.2583506941936665e-06, "loss": 0.0894, "step": 80950 }, { "epoch": 2.394274560832791, "grad_norm": 1.205919623374939, "learning_rate": 3.2582240042747273e-06, "loss": 0.0918, "step": 80960 }, { "epoch": 2.394570296326965, "grad_norm": 0.2935488820075989, "learning_rate": 3.2580973143557877e-06, "loss": 0.0707, "step": 80970 }, { "epoch": 2.394866031821139, "grad_norm": 0.8472706079483032, "learning_rate": 3.2579706244368485e-06, "loss": 0.0757, "step": 80980 }, { "epoch": 2.3951617673153134, "grad_norm": 1.0602784156799316, "learning_rate": 3.257843934517909e-06, "loss": 0.0877, "step": 80990 }, { "epoch": 2.3954575028094873, "grad_norm": 0.5235233306884766, "learning_rate": 3.2577172445989696e-06, "loss": 0.0795, "step": 81000 }, { "epoch": 2.3957532383036613, "grad_norm": 0.9078126549720764, "learning_rate": 3.25759055468003e-06, "loss": 0.0784, "step": 81010 }, { "epoch": 2.396048973797835, "grad_norm": 0.7109205722808838, "learning_rate": 3.2574638647610912e-06, "loss": 0.0667, "step": 81020 }, { "epoch": 2.396344709292009, "grad_norm": 1.274863839149475, "learning_rate": 3.2573371748421516e-06, "loss": 0.0827, "step": 81030 }, { "epoch": 2.396640444786183, "grad_norm": 1.0424939393997192, "learning_rate": 3.2572104849232124e-06, "loss": 0.0866, "step": 81040 }, { "epoch": 2.396936180280357, "grad_norm": 1.191998839378357, "learning_rate": 3.2570837950042727e-06, "loss": 0.0953, "step": 81050 }, { "epoch": 2.3972319157745314, "grad_norm": 1.012493371963501, "learning_rate": 3.2569571050853335e-06, "loss": 0.0726, "step": 81060 }, { "epoch": 2.3975276512687054, "grad_norm": 0.5464583039283752, "learning_rate": 3.256830415166394e-06, "loss": 0.0699, "step": 81070 }, { "epoch": 2.3978233867628793, "grad_norm": 0.8550463318824768, "learning_rate": 3.2567037252474547e-06, "loss": 0.0585, "step": 81080 }, { "epoch": 2.3981191222570533, "grad_norm": 0.8447556495666504, "learning_rate": 3.256577035328515e-06, "loss": 0.0771, "step": 81090 }, { "epoch": 2.398414857751227, "grad_norm": 1.034670114517212, "learning_rate": 3.2564503454095763e-06, "loss": 0.0661, "step": 81100 }, { "epoch": 2.398710593245401, "grad_norm": 0.7312059998512268, "learning_rate": 3.2563236554906366e-06, "loss": 0.0799, "step": 81110 }, { "epoch": 2.399006328739575, "grad_norm": 0.6104058623313904, "learning_rate": 3.2561969655716974e-06, "loss": 0.0639, "step": 81120 }, { "epoch": 2.3993020642337495, "grad_norm": 0.7048400640487671, "learning_rate": 3.256070275652758e-06, "loss": 0.0744, "step": 81130 }, { "epoch": 2.3995977997279234, "grad_norm": 0.8238644599914551, "learning_rate": 3.2559435857338186e-06, "loss": 0.0919, "step": 81140 }, { "epoch": 2.3998935352220974, "grad_norm": 0.5965853333473206, "learning_rate": 3.255816895814879e-06, "loss": 0.076, "step": 81150 }, { "epoch": 2.4001892707162713, "grad_norm": 0.6236978769302368, "learning_rate": 3.2556902058959397e-06, "loss": 0.0826, "step": 81160 }, { "epoch": 2.4004850062104452, "grad_norm": 0.8181807398796082, "learning_rate": 3.255563515977e-06, "loss": 0.0751, "step": 81170 }, { "epoch": 2.4007807417046196, "grad_norm": 0.7247817516326904, "learning_rate": 3.2554368260580613e-06, "loss": 0.0607, "step": 81180 }, { "epoch": 2.4010764771987936, "grad_norm": 0.9474620223045349, "learning_rate": 3.2553101361391217e-06, "loss": 0.0924, "step": 81190 }, { "epoch": 2.4013722126929675, "grad_norm": 0.7738854885101318, "learning_rate": 3.255183446220182e-06, "loss": 0.0918, "step": 81200 }, { "epoch": 2.4016679481871415, "grad_norm": 0.6520787477493286, "learning_rate": 3.255056756301243e-06, "loss": 0.0923, "step": 81210 }, { "epoch": 2.4019636836813154, "grad_norm": 0.7151676416397095, "learning_rate": 3.254930066382303e-06, "loss": 0.0924, "step": 81220 }, { "epoch": 2.4022594191754894, "grad_norm": 0.7818716168403625, "learning_rate": 3.254803376463364e-06, "loss": 0.0684, "step": 81230 }, { "epoch": 2.4025551546696633, "grad_norm": 0.5547944903373718, "learning_rate": 3.2546766865444244e-06, "loss": 0.0708, "step": 81240 }, { "epoch": 2.4028508901638377, "grad_norm": 0.43457794189453125, "learning_rate": 3.254549996625485e-06, "loss": 0.0744, "step": 81250 }, { "epoch": 2.4031466256580116, "grad_norm": 0.9078200459480286, "learning_rate": 3.254423306706546e-06, "loss": 0.0796, "step": 81260 }, { "epoch": 2.4034423611521856, "grad_norm": 1.454909324645996, "learning_rate": 3.2542966167876067e-06, "loss": 0.0814, "step": 81270 }, { "epoch": 2.4037380966463595, "grad_norm": 0.8200478553771973, "learning_rate": 3.254169926868667e-06, "loss": 0.0689, "step": 81280 }, { "epoch": 2.4040338321405335, "grad_norm": 0.7644107341766357, "learning_rate": 3.254043236949728e-06, "loss": 0.0943, "step": 81290 }, { "epoch": 2.4043295676347074, "grad_norm": 0.9255892634391785, "learning_rate": 3.2539165470307882e-06, "loss": 0.08, "step": 81300 }, { "epoch": 2.4046253031288813, "grad_norm": 0.4785113036632538, "learning_rate": 3.253789857111849e-06, "loss": 0.0706, "step": 81310 }, { "epoch": 2.4049210386230557, "grad_norm": 0.6717028617858887, "learning_rate": 3.2536631671929094e-06, "loss": 0.0787, "step": 81320 }, { "epoch": 2.4052167741172297, "grad_norm": 0.5834415555000305, "learning_rate": 3.25353647727397e-06, "loss": 0.0609, "step": 81330 }, { "epoch": 2.4055125096114036, "grad_norm": 0.7127591967582703, "learning_rate": 3.253409787355031e-06, "loss": 0.1, "step": 81340 }, { "epoch": 2.4058082451055776, "grad_norm": 0.546235978603363, "learning_rate": 3.2532830974360918e-06, "loss": 0.0949, "step": 81350 }, { "epoch": 2.4061039805997515, "grad_norm": 0.8558312058448792, "learning_rate": 3.253156407517152e-06, "loss": 0.0717, "step": 81360 }, { "epoch": 2.4063997160939254, "grad_norm": 1.0219593048095703, "learning_rate": 3.253029717598213e-06, "loss": 0.0708, "step": 81370 }, { "epoch": 2.4066954515880994, "grad_norm": 1.1894181966781616, "learning_rate": 3.2529030276792733e-06, "loss": 0.0923, "step": 81380 }, { "epoch": 2.4069911870822738, "grad_norm": 0.9852447509765625, "learning_rate": 3.252776337760334e-06, "loss": 0.086, "step": 81390 }, { "epoch": 2.4072869225764477, "grad_norm": 0.6047474145889282, "learning_rate": 3.2526496478413944e-06, "loss": 0.0824, "step": 81400 }, { "epoch": 2.4075826580706217, "grad_norm": 0.6415524482727051, "learning_rate": 3.2525229579224552e-06, "loss": 0.0874, "step": 81410 }, { "epoch": 2.4078783935647956, "grad_norm": 0.7105437517166138, "learning_rate": 3.252396268003516e-06, "loss": 0.0779, "step": 81420 }, { "epoch": 2.4081741290589695, "grad_norm": 0.6246111989021301, "learning_rate": 3.252269578084577e-06, "loss": 0.069, "step": 81430 }, { "epoch": 2.4084698645531435, "grad_norm": 0.8469570875167847, "learning_rate": 3.252142888165637e-06, "loss": 0.0853, "step": 81440 }, { "epoch": 2.4087656000473174, "grad_norm": 0.9348793625831604, "learning_rate": 3.252016198246698e-06, "loss": 0.084, "step": 81450 }, { "epoch": 2.409061335541492, "grad_norm": 0.8025521039962769, "learning_rate": 3.2518895083277583e-06, "loss": 0.0795, "step": 81460 }, { "epoch": 2.4093570710356658, "grad_norm": 0.6176202297210693, "learning_rate": 3.251762818408819e-06, "loss": 0.0729, "step": 81470 }, { "epoch": 2.4096528065298397, "grad_norm": 0.8567183613777161, "learning_rate": 3.2516361284898795e-06, "loss": 0.0731, "step": 81480 }, { "epoch": 2.4099485420240137, "grad_norm": 0.5355621576309204, "learning_rate": 3.2515094385709403e-06, "loss": 0.0953, "step": 81490 }, { "epoch": 2.4102442775181876, "grad_norm": 0.7831769585609436, "learning_rate": 3.251382748652001e-06, "loss": 0.0788, "step": 81500 }, { "epoch": 2.410540013012362, "grad_norm": 0.5206223130226135, "learning_rate": 3.251256058733062e-06, "loss": 0.0854, "step": 81510 }, { "epoch": 2.410835748506536, "grad_norm": 0.5886691212654114, "learning_rate": 3.2511293688141222e-06, "loss": 0.0564, "step": 81520 }, { "epoch": 2.41113148400071, "grad_norm": 0.7282886505126953, "learning_rate": 3.251002678895183e-06, "loss": 0.0681, "step": 81530 }, { "epoch": 2.411427219494884, "grad_norm": 1.344944715499878, "learning_rate": 3.2508759889762434e-06, "loss": 0.0756, "step": 81540 }, { "epoch": 2.4117229549890578, "grad_norm": 0.8184931874275208, "learning_rate": 3.250749299057304e-06, "loss": 0.0809, "step": 81550 }, { "epoch": 2.4120186904832317, "grad_norm": 0.667530357837677, "learning_rate": 3.2506226091383645e-06, "loss": 0.074, "step": 81560 }, { "epoch": 2.4123144259774056, "grad_norm": 0.49306008219718933, "learning_rate": 3.2504959192194253e-06, "loss": 0.0695, "step": 81570 }, { "epoch": 2.41261016147158, "grad_norm": 0.9842982292175293, "learning_rate": 3.250369229300486e-06, "loss": 0.0785, "step": 81580 }, { "epoch": 2.412905896965754, "grad_norm": 1.093150019645691, "learning_rate": 3.250242539381547e-06, "loss": 0.0884, "step": 81590 }, { "epoch": 2.413201632459928, "grad_norm": 0.8154045343399048, "learning_rate": 3.2501158494626073e-06, "loss": 0.083, "step": 81600 }, { "epoch": 2.413497367954102, "grad_norm": 0.6903152465820312, "learning_rate": 3.249989159543668e-06, "loss": 0.0858, "step": 81610 }, { "epoch": 2.413793103448276, "grad_norm": 0.8680471777915955, "learning_rate": 3.2498624696247284e-06, "loss": 0.0726, "step": 81620 }, { "epoch": 2.4140888389424497, "grad_norm": 0.6379680633544922, "learning_rate": 3.249735779705789e-06, "loss": 0.0665, "step": 81630 }, { "epoch": 2.4143845744366237, "grad_norm": 1.1190277338027954, "learning_rate": 3.2496090897868496e-06, "loss": 0.0851, "step": 81640 }, { "epoch": 2.414680309930798, "grad_norm": 1.5682944059371948, "learning_rate": 3.24948239986791e-06, "loss": 0.0963, "step": 81650 }, { "epoch": 2.414976045424972, "grad_norm": 0.6890641450881958, "learning_rate": 3.249355709948971e-06, "loss": 0.0738, "step": 81660 }, { "epoch": 2.415271780919146, "grad_norm": 0.5790094137191772, "learning_rate": 3.2492290200300315e-06, "loss": 0.082, "step": 81670 }, { "epoch": 2.41556751641332, "grad_norm": 0.47289490699768066, "learning_rate": 3.2491023301110923e-06, "loss": 0.0746, "step": 81680 }, { "epoch": 2.415863251907494, "grad_norm": 0.7276337742805481, "learning_rate": 3.2489756401921527e-06, "loss": 0.0698, "step": 81690 }, { "epoch": 2.416158987401668, "grad_norm": 1.0190708637237549, "learning_rate": 3.2488489502732135e-06, "loss": 0.096, "step": 81700 }, { "epoch": 2.4164547228958417, "grad_norm": 0.841299831867218, "learning_rate": 3.248722260354274e-06, "loss": 0.0805, "step": 81710 }, { "epoch": 2.416750458390016, "grad_norm": 0.5703656673431396, "learning_rate": 3.2485955704353346e-06, "loss": 0.056, "step": 81720 }, { "epoch": 2.41704619388419, "grad_norm": 0.5823450088500977, "learning_rate": 3.248468880516395e-06, "loss": 0.0661, "step": 81730 }, { "epoch": 2.417341929378364, "grad_norm": 0.6202654242515564, "learning_rate": 3.248342190597456e-06, "loss": 0.0812, "step": 81740 }, { "epoch": 2.417637664872538, "grad_norm": 0.924698531627655, "learning_rate": 3.2482155006785166e-06, "loss": 0.0774, "step": 81750 }, { "epoch": 2.417933400366712, "grad_norm": 0.9388338923454285, "learning_rate": 3.2480888107595774e-06, "loss": 0.0939, "step": 81760 }, { "epoch": 2.418229135860886, "grad_norm": 0.40989843010902405, "learning_rate": 3.2479621208406377e-06, "loss": 0.0652, "step": 81770 }, { "epoch": 2.4185248713550602, "grad_norm": 1.0936065912246704, "learning_rate": 3.2478354309216985e-06, "loss": 0.0733, "step": 81780 }, { "epoch": 2.418820606849234, "grad_norm": 0.5173248648643494, "learning_rate": 3.247708741002759e-06, "loss": 0.0898, "step": 81790 }, { "epoch": 2.419116342343408, "grad_norm": 0.830066978931427, "learning_rate": 3.2475820510838197e-06, "loss": 0.0834, "step": 81800 }, { "epoch": 2.419412077837582, "grad_norm": 0.7583540081977844, "learning_rate": 3.24745536116488e-06, "loss": 0.0761, "step": 81810 }, { "epoch": 2.419707813331756, "grad_norm": 0.7404763102531433, "learning_rate": 3.2473286712459413e-06, "loss": 0.0636, "step": 81820 }, { "epoch": 2.42000354882593, "grad_norm": 0.8153067231178284, "learning_rate": 3.2472019813270016e-06, "loss": 0.0745, "step": 81830 }, { "epoch": 2.4202992843201043, "grad_norm": 0.6891948580741882, "learning_rate": 3.2470752914080624e-06, "loss": 0.0878, "step": 81840 }, { "epoch": 2.4205950198142783, "grad_norm": 1.018846035003662, "learning_rate": 3.2469486014891228e-06, "loss": 0.0797, "step": 81850 }, { "epoch": 2.420890755308452, "grad_norm": 1.3486425876617432, "learning_rate": 3.2468219115701836e-06, "loss": 0.0701, "step": 81860 }, { "epoch": 2.421186490802626, "grad_norm": 0.8567845821380615, "learning_rate": 3.246695221651244e-06, "loss": 0.0902, "step": 81870 }, { "epoch": 2.4214822262968, "grad_norm": 1.0405781269073486, "learning_rate": 3.2465685317323047e-06, "loss": 0.075, "step": 81880 }, { "epoch": 2.421777961790974, "grad_norm": 0.88615483045578, "learning_rate": 3.246441841813365e-06, "loss": 0.0957, "step": 81890 }, { "epoch": 2.422073697285148, "grad_norm": 1.2067902088165283, "learning_rate": 3.2463151518944263e-06, "loss": 0.0724, "step": 81900 }, { "epoch": 2.4223694327793224, "grad_norm": 0.5078397989273071, "learning_rate": 3.2461884619754867e-06, "loss": 0.069, "step": 81910 }, { "epoch": 2.4226651682734963, "grad_norm": 0.7331904768943787, "learning_rate": 3.2460617720565475e-06, "loss": 0.0599, "step": 81920 }, { "epoch": 2.4229609037676703, "grad_norm": 0.728708803653717, "learning_rate": 3.245935082137608e-06, "loss": 0.0677, "step": 81930 }, { "epoch": 2.423256639261844, "grad_norm": 0.795242965221405, "learning_rate": 3.2458083922186686e-06, "loss": 0.0825, "step": 81940 }, { "epoch": 2.423552374756018, "grad_norm": 0.37892988324165344, "learning_rate": 3.245681702299729e-06, "loss": 0.0818, "step": 81950 }, { "epoch": 2.423848110250192, "grad_norm": 1.1717255115509033, "learning_rate": 3.2455550123807898e-06, "loss": 0.0805, "step": 81960 }, { "epoch": 2.424143845744366, "grad_norm": 0.8128724694252014, "learning_rate": 3.24542832246185e-06, "loss": 0.0598, "step": 81970 }, { "epoch": 2.4244395812385404, "grad_norm": 0.8530897498130798, "learning_rate": 3.2453016325429113e-06, "loss": 0.075, "step": 81980 }, { "epoch": 2.4247353167327144, "grad_norm": 1.0477895736694336, "learning_rate": 3.2451749426239717e-06, "loss": 0.0915, "step": 81990 }, { "epoch": 2.4250310522268883, "grad_norm": 0.9460133910179138, "learning_rate": 3.2450482527050325e-06, "loss": 0.071, "step": 82000 }, { "epoch": 2.4253267877210622, "grad_norm": 0.9895788431167603, "learning_rate": 3.244921562786093e-06, "loss": 0.0736, "step": 82010 }, { "epoch": 2.425622523215236, "grad_norm": 0.9364139437675476, "learning_rate": 3.2447948728671537e-06, "loss": 0.0814, "step": 82020 }, { "epoch": 2.42591825870941, "grad_norm": 1.0523723363876343, "learning_rate": 3.244668182948214e-06, "loss": 0.0799, "step": 82030 }, { "epoch": 2.426213994203584, "grad_norm": 1.376405954360962, "learning_rate": 3.2445414930292744e-06, "loss": 0.099, "step": 82040 }, { "epoch": 2.4265097296977585, "grad_norm": 1.0033704042434692, "learning_rate": 3.244414803110335e-06, "loss": 0.0954, "step": 82050 }, { "epoch": 2.4268054651919324, "grad_norm": 0.6987352967262268, "learning_rate": 3.244288113191396e-06, "loss": 0.0871, "step": 82060 }, { "epoch": 2.4271012006861064, "grad_norm": 0.8872929811477661, "learning_rate": 3.2441614232724568e-06, "loss": 0.0657, "step": 82070 }, { "epoch": 2.4273969361802803, "grad_norm": 0.6003784537315369, "learning_rate": 3.244034733353517e-06, "loss": 0.0651, "step": 82080 }, { "epoch": 2.4276926716744542, "grad_norm": 0.5294029712677002, "learning_rate": 3.243908043434578e-06, "loss": 0.0829, "step": 82090 }, { "epoch": 2.4279884071686286, "grad_norm": 0.8009626269340515, "learning_rate": 3.2437813535156383e-06, "loss": 0.071, "step": 82100 }, { "epoch": 2.4282841426628026, "grad_norm": 0.8870251178741455, "learning_rate": 3.243654663596699e-06, "loss": 0.08, "step": 82110 }, { "epoch": 2.4285798781569765, "grad_norm": 0.838693380355835, "learning_rate": 3.2435279736777594e-06, "loss": 0.0684, "step": 82120 }, { "epoch": 2.4288756136511505, "grad_norm": 0.7208455801010132, "learning_rate": 3.2434012837588202e-06, "loss": 0.0629, "step": 82130 }, { "epoch": 2.4291713491453244, "grad_norm": 0.7766832113265991, "learning_rate": 3.243274593839881e-06, "loss": 0.0783, "step": 82140 }, { "epoch": 2.4294670846394983, "grad_norm": 0.9610044956207275, "learning_rate": 3.243147903920942e-06, "loss": 0.0904, "step": 82150 }, { "epoch": 2.4297628201336723, "grad_norm": 0.8066942095756531, "learning_rate": 3.243021214002002e-06, "loss": 0.0746, "step": 82160 }, { "epoch": 2.4300585556278467, "grad_norm": 0.6599717140197754, "learning_rate": 3.242894524083063e-06, "loss": 0.0842, "step": 82170 }, { "epoch": 2.4303542911220206, "grad_norm": 0.9325987100601196, "learning_rate": 3.2427678341641233e-06, "loss": 0.0685, "step": 82180 }, { "epoch": 2.4306500266161946, "grad_norm": 0.8887273669242859, "learning_rate": 3.242641144245184e-06, "loss": 0.0891, "step": 82190 }, { "epoch": 2.4309457621103685, "grad_norm": 0.7305731177330017, "learning_rate": 3.2425144543262445e-06, "loss": 0.0689, "step": 82200 }, { "epoch": 2.4312414976045424, "grad_norm": 0.5392717123031616, "learning_rate": 3.2423877644073053e-06, "loss": 0.0784, "step": 82210 }, { "epoch": 2.4315372330987164, "grad_norm": 0.49200761318206787, "learning_rate": 3.242261074488366e-06, "loss": 0.0684, "step": 82220 }, { "epoch": 2.4318329685928903, "grad_norm": 0.5332679152488708, "learning_rate": 3.242134384569427e-06, "loss": 0.0646, "step": 82230 }, { "epoch": 2.4321287040870647, "grad_norm": 0.7700250148773193, "learning_rate": 3.2420076946504872e-06, "loss": 0.0817, "step": 82240 }, { "epoch": 2.4324244395812387, "grad_norm": 0.7859552502632141, "learning_rate": 3.241881004731548e-06, "loss": 0.0876, "step": 82250 }, { "epoch": 2.4327201750754126, "grad_norm": 0.8811107277870178, "learning_rate": 3.2417543148126084e-06, "loss": 0.0735, "step": 82260 }, { "epoch": 2.4330159105695865, "grad_norm": 1.1125644445419312, "learning_rate": 3.241627624893669e-06, "loss": 0.0762, "step": 82270 }, { "epoch": 2.4333116460637605, "grad_norm": 0.6372920870780945, "learning_rate": 3.2415009349747295e-06, "loss": 0.0701, "step": 82280 }, { "epoch": 2.4336073815579344, "grad_norm": 0.38306060433387756, "learning_rate": 3.2413742450557903e-06, "loss": 0.0997, "step": 82290 }, { "epoch": 2.4339031170521084, "grad_norm": 1.1070046424865723, "learning_rate": 3.241247555136851e-06, "loss": 0.0691, "step": 82300 }, { "epoch": 2.4341988525462828, "grad_norm": 0.846504807472229, "learning_rate": 3.241120865217912e-06, "loss": 0.0767, "step": 82310 }, { "epoch": 2.4344945880404567, "grad_norm": 0.7722322940826416, "learning_rate": 3.2409941752989723e-06, "loss": 0.0729, "step": 82320 }, { "epoch": 2.4347903235346307, "grad_norm": 0.4950074851512909, "learning_rate": 3.240867485380033e-06, "loss": 0.0637, "step": 82330 }, { "epoch": 2.4350860590288046, "grad_norm": 0.6363392472267151, "learning_rate": 3.2407407954610934e-06, "loss": 0.081, "step": 82340 }, { "epoch": 2.4353817945229785, "grad_norm": 1.0752543210983276, "learning_rate": 3.240614105542154e-06, "loss": 0.0785, "step": 82350 }, { "epoch": 2.4356775300171525, "grad_norm": 0.6717449426651001, "learning_rate": 3.2404874156232146e-06, "loss": 0.0686, "step": 82360 }, { "epoch": 2.4359732655113264, "grad_norm": 0.7531020641326904, "learning_rate": 3.2403607257042754e-06, "loss": 0.0813, "step": 82370 }, { "epoch": 2.436269001005501, "grad_norm": 1.4099539518356323, "learning_rate": 3.240234035785336e-06, "loss": 0.0626, "step": 82380 }, { "epoch": 2.4365647364996748, "grad_norm": 0.845159649848938, "learning_rate": 3.240107345866397e-06, "loss": 0.0831, "step": 82390 }, { "epoch": 2.4368604719938487, "grad_norm": 0.8069440722465515, "learning_rate": 3.2399806559474573e-06, "loss": 0.0832, "step": 82400 }, { "epoch": 2.4371562074880226, "grad_norm": 0.6799831986427307, "learning_rate": 3.239853966028518e-06, "loss": 0.0904, "step": 82410 }, { "epoch": 2.4374519429821966, "grad_norm": 0.809503436088562, "learning_rate": 3.2397272761095785e-06, "loss": 0.0793, "step": 82420 }, { "epoch": 2.437747678476371, "grad_norm": 0.9492999315261841, "learning_rate": 3.2396005861906392e-06, "loss": 0.0684, "step": 82430 }, { "epoch": 2.438043413970545, "grad_norm": 0.8152896761894226, "learning_rate": 3.2394738962716996e-06, "loss": 0.0831, "step": 82440 }, { "epoch": 2.438339149464719, "grad_norm": 0.5895603895187378, "learning_rate": 3.23934720635276e-06, "loss": 0.0846, "step": 82450 }, { "epoch": 2.438634884958893, "grad_norm": 0.6848604083061218, "learning_rate": 3.239220516433821e-06, "loss": 0.0775, "step": 82460 }, { "epoch": 2.4389306204530667, "grad_norm": 0.32390230894088745, "learning_rate": 3.2390938265148816e-06, "loss": 0.0635, "step": 82470 }, { "epoch": 2.4392263559472407, "grad_norm": 0.8956952095031738, "learning_rate": 3.2389671365959424e-06, "loss": 0.0604, "step": 82480 }, { "epoch": 2.4395220914414146, "grad_norm": 0.7736942768096924, "learning_rate": 3.2388404466770027e-06, "loss": 0.0931, "step": 82490 }, { "epoch": 2.439817826935589, "grad_norm": 0.4658032953739166, "learning_rate": 3.2387137567580635e-06, "loss": 0.0819, "step": 82500 }, { "epoch": 2.440113562429763, "grad_norm": 0.8041635155677795, "learning_rate": 3.238587066839124e-06, "loss": 0.0915, "step": 82510 }, { "epoch": 2.440409297923937, "grad_norm": 0.567293643951416, "learning_rate": 3.2384603769201847e-06, "loss": 0.0565, "step": 82520 }, { "epoch": 2.440705033418111, "grad_norm": 0.6225780248641968, "learning_rate": 3.238333687001245e-06, "loss": 0.0633, "step": 82530 }, { "epoch": 2.441000768912285, "grad_norm": 0.845816969871521, "learning_rate": 3.2382069970823062e-06, "loss": 0.0785, "step": 82540 }, { "epoch": 2.4412965044064587, "grad_norm": 0.32503849267959595, "learning_rate": 3.2380803071633666e-06, "loss": 0.0818, "step": 82550 }, { "epoch": 2.4415922399006327, "grad_norm": 0.44118601083755493, "learning_rate": 3.2379536172444274e-06, "loss": 0.0875, "step": 82560 }, { "epoch": 2.441887975394807, "grad_norm": 0.8089327812194824, "learning_rate": 3.2378269273254878e-06, "loss": 0.0799, "step": 82570 }, { "epoch": 2.442183710888981, "grad_norm": 0.9215083718299866, "learning_rate": 3.2377002374065486e-06, "loss": 0.0695, "step": 82580 }, { "epoch": 2.442479446383155, "grad_norm": 0.7277911901473999, "learning_rate": 3.237573547487609e-06, "loss": 0.0735, "step": 82590 }, { "epoch": 2.442775181877329, "grad_norm": 0.7815933227539062, "learning_rate": 3.2374468575686697e-06, "loss": 0.085, "step": 82600 }, { "epoch": 2.443070917371503, "grad_norm": 0.6456515789031982, "learning_rate": 3.23732016764973e-06, "loss": 0.0864, "step": 82610 }, { "epoch": 2.443366652865677, "grad_norm": 0.9642951488494873, "learning_rate": 3.2371934777307913e-06, "loss": 0.0723, "step": 82620 }, { "epoch": 2.4436623883598507, "grad_norm": 0.803098201751709, "learning_rate": 3.2370667878118517e-06, "loss": 0.0703, "step": 82630 }, { "epoch": 2.443958123854025, "grad_norm": 0.8341031670570374, "learning_rate": 3.2369400978929124e-06, "loss": 0.0861, "step": 82640 }, { "epoch": 2.444253859348199, "grad_norm": 0.8591834902763367, "learning_rate": 3.236813407973973e-06, "loss": 0.0843, "step": 82650 }, { "epoch": 2.444549594842373, "grad_norm": 0.8819200992584229, "learning_rate": 3.2366867180550336e-06, "loss": 0.0879, "step": 82660 }, { "epoch": 2.444845330336547, "grad_norm": 0.4508889317512512, "learning_rate": 3.236560028136094e-06, "loss": 0.0605, "step": 82670 }, { "epoch": 2.445141065830721, "grad_norm": 0.8549841642379761, "learning_rate": 3.2364333382171548e-06, "loss": 0.0611, "step": 82680 }, { "epoch": 2.445436801324895, "grad_norm": 0.91812664270401, "learning_rate": 3.236306648298215e-06, "loss": 0.0998, "step": 82690 }, { "epoch": 2.445732536819069, "grad_norm": 0.5358178615570068, "learning_rate": 3.2361799583792763e-06, "loss": 0.0764, "step": 82700 }, { "epoch": 2.446028272313243, "grad_norm": 0.5398651957511902, "learning_rate": 3.2360532684603367e-06, "loss": 0.0861, "step": 82710 }, { "epoch": 2.446324007807417, "grad_norm": 0.7576831579208374, "learning_rate": 3.2359265785413975e-06, "loss": 0.0874, "step": 82720 }, { "epoch": 2.446619743301591, "grad_norm": 0.8092566132545471, "learning_rate": 3.235799888622458e-06, "loss": 0.0679, "step": 82730 }, { "epoch": 2.446915478795765, "grad_norm": 0.702401876449585, "learning_rate": 3.2356731987035186e-06, "loss": 0.0785, "step": 82740 }, { "epoch": 2.447211214289939, "grad_norm": 1.1279839277267456, "learning_rate": 3.235546508784579e-06, "loss": 0.0795, "step": 82750 }, { "epoch": 2.4475069497841133, "grad_norm": 1.0973073244094849, "learning_rate": 3.23541981886564e-06, "loss": 0.0901, "step": 82760 }, { "epoch": 2.4478026852782873, "grad_norm": 0.4908320903778076, "learning_rate": 3.2352931289467e-06, "loss": 0.0798, "step": 82770 }, { "epoch": 2.448098420772461, "grad_norm": 0.67683345079422, "learning_rate": 3.2351664390277614e-06, "loss": 0.0652, "step": 82780 }, { "epoch": 2.448394156266635, "grad_norm": 0.7624040842056274, "learning_rate": 3.2350397491088217e-06, "loss": 0.0916, "step": 82790 }, { "epoch": 2.448689891760809, "grad_norm": 1.0104435682296753, "learning_rate": 3.2349130591898825e-06, "loss": 0.0838, "step": 82800 }, { "epoch": 2.448985627254983, "grad_norm": 0.524068295955658, "learning_rate": 3.234786369270943e-06, "loss": 0.0756, "step": 82810 }, { "epoch": 2.449281362749157, "grad_norm": 0.6258789300918579, "learning_rate": 3.2346596793520037e-06, "loss": 0.0772, "step": 82820 }, { "epoch": 2.4495770982433314, "grad_norm": 0.7293018102645874, "learning_rate": 3.234532989433064e-06, "loss": 0.0587, "step": 82830 }, { "epoch": 2.4498728337375053, "grad_norm": 1.0172829627990723, "learning_rate": 3.234406299514125e-06, "loss": 0.1027, "step": 82840 }, { "epoch": 2.4501685692316792, "grad_norm": 0.7679797410964966, "learning_rate": 3.234279609595185e-06, "loss": 0.0899, "step": 82850 }, { "epoch": 2.450464304725853, "grad_norm": 0.7541584968566895, "learning_rate": 3.234152919676246e-06, "loss": 0.0741, "step": 82860 }, { "epoch": 2.450760040220027, "grad_norm": 0.8935659527778625, "learning_rate": 3.2340262297573068e-06, "loss": 0.0721, "step": 82870 }, { "epoch": 2.451055775714201, "grad_norm": 0.33851754665374756, "learning_rate": 3.233899539838367e-06, "loss": 0.0807, "step": 82880 }, { "epoch": 2.451351511208375, "grad_norm": 1.037584900856018, "learning_rate": 3.233772849919428e-06, "loss": 0.0872, "step": 82890 }, { "epoch": 2.4516472467025494, "grad_norm": 0.9802864193916321, "learning_rate": 3.2336461600004883e-06, "loss": 0.0678, "step": 82900 }, { "epoch": 2.4519429821967234, "grad_norm": 0.9324734807014465, "learning_rate": 3.233519470081549e-06, "loss": 0.075, "step": 82910 }, { "epoch": 2.4522387176908973, "grad_norm": 0.6825172901153564, "learning_rate": 3.2333927801626095e-06, "loss": 0.0647, "step": 82920 }, { "epoch": 2.4525344531850712, "grad_norm": 0.7376744151115417, "learning_rate": 3.2332660902436703e-06, "loss": 0.0678, "step": 82930 }, { "epoch": 2.452830188679245, "grad_norm": 0.6312974095344543, "learning_rate": 3.233139400324731e-06, "loss": 0.081, "step": 82940 }, { "epoch": 2.453125924173419, "grad_norm": 0.7723373174667358, "learning_rate": 3.233012710405792e-06, "loss": 0.0902, "step": 82950 }, { "epoch": 2.453421659667593, "grad_norm": 0.7489868402481079, "learning_rate": 3.232886020486852e-06, "loss": 0.0746, "step": 82960 }, { "epoch": 2.4537173951617675, "grad_norm": 0.41698718070983887, "learning_rate": 3.232759330567913e-06, "loss": 0.0617, "step": 82970 }, { "epoch": 2.4540131306559414, "grad_norm": 0.879054844379425, "learning_rate": 3.2326326406489734e-06, "loss": 0.0824, "step": 82980 }, { "epoch": 2.4543088661501153, "grad_norm": 1.0290234088897705, "learning_rate": 3.232505950730034e-06, "loss": 0.0898, "step": 82990 }, { "epoch": 2.4546046016442893, "grad_norm": 0.7705320119857788, "learning_rate": 3.2323792608110945e-06, "loss": 0.0736, "step": 83000 }, { "epoch": 2.4549003371384632, "grad_norm": 0.4365677237510681, "learning_rate": 3.2322525708921553e-06, "loss": 0.0622, "step": 83010 }, { "epoch": 2.4551960726326376, "grad_norm": 0.6450752019882202, "learning_rate": 3.232125880973216e-06, "loss": 0.0881, "step": 83020 }, { "epoch": 2.4554918081268116, "grad_norm": 0.9612869024276733, "learning_rate": 3.231999191054277e-06, "loss": 0.0831, "step": 83030 }, { "epoch": 2.4557875436209855, "grad_norm": 1.3120474815368652, "learning_rate": 3.2318725011353372e-06, "loss": 0.0966, "step": 83040 }, { "epoch": 2.4560832791151594, "grad_norm": 0.7494111657142639, "learning_rate": 3.231745811216398e-06, "loss": 0.0907, "step": 83050 }, { "epoch": 2.4563790146093334, "grad_norm": 1.054007649421692, "learning_rate": 3.2316191212974584e-06, "loss": 0.0921, "step": 83060 }, { "epoch": 2.4566747501035073, "grad_norm": 1.118678331375122, "learning_rate": 3.231492431378519e-06, "loss": 0.0744, "step": 83070 }, { "epoch": 2.4569704855976813, "grad_norm": 0.8080284595489502, "learning_rate": 3.2313657414595796e-06, "loss": 0.0661, "step": 83080 }, { "epoch": 2.4572662210918557, "grad_norm": 1.2379230260849, "learning_rate": 3.2312390515406403e-06, "loss": 0.0768, "step": 83090 }, { "epoch": 2.4575619565860296, "grad_norm": 1.048813819885254, "learning_rate": 3.231112361621701e-06, "loss": 0.0958, "step": 83100 }, { "epoch": 2.4578576920802035, "grad_norm": 0.6650347113609314, "learning_rate": 3.230985671702762e-06, "loss": 0.0848, "step": 83110 }, { "epoch": 2.4581534275743775, "grad_norm": 0.6251088976860046, "learning_rate": 3.2308589817838223e-06, "loss": 0.0875, "step": 83120 }, { "epoch": 2.4584491630685514, "grad_norm": 0.64426189661026, "learning_rate": 3.230732291864883e-06, "loss": 0.064, "step": 83130 }, { "epoch": 2.4587448985627254, "grad_norm": 0.5648738145828247, "learning_rate": 3.2306056019459434e-06, "loss": 0.0824, "step": 83140 }, { "epoch": 2.4590406340568993, "grad_norm": 1.0132384300231934, "learning_rate": 3.2304789120270042e-06, "loss": 0.0902, "step": 83150 }, { "epoch": 2.4593363695510737, "grad_norm": 0.8611749410629272, "learning_rate": 3.2303522221080646e-06, "loss": 0.0816, "step": 83160 }, { "epoch": 2.4596321050452477, "grad_norm": 0.8124877214431763, "learning_rate": 3.2302255321891254e-06, "loss": 0.0698, "step": 83170 }, { "epoch": 2.4599278405394216, "grad_norm": 0.7284964323043823, "learning_rate": 3.230098842270186e-06, "loss": 0.0876, "step": 83180 }, { "epoch": 2.4602235760335955, "grad_norm": 0.6737721562385559, "learning_rate": 3.229972152351247e-06, "loss": 0.08, "step": 83190 }, { "epoch": 2.4605193115277695, "grad_norm": 0.826908528804779, "learning_rate": 3.2298454624323073e-06, "loss": 0.0799, "step": 83200 }, { "epoch": 2.4608150470219434, "grad_norm": 0.5846064686775208, "learning_rate": 3.229718772513368e-06, "loss": 0.0659, "step": 83210 }, { "epoch": 2.4611107825161174, "grad_norm": 0.9683825969696045, "learning_rate": 3.2295920825944285e-06, "loss": 0.0742, "step": 83220 }, { "epoch": 2.4614065180102918, "grad_norm": 1.2077336311340332, "learning_rate": 3.2294653926754893e-06, "loss": 0.0848, "step": 83230 }, { "epoch": 2.4617022535044657, "grad_norm": 0.5530649423599243, "learning_rate": 3.2293387027565496e-06, "loss": 0.0913, "step": 83240 }, { "epoch": 2.4619979889986396, "grad_norm": 0.9111826419830322, "learning_rate": 3.2292120128376104e-06, "loss": 0.0772, "step": 83250 }, { "epoch": 2.4622937244928136, "grad_norm": 0.7628955245018005, "learning_rate": 3.2290853229186712e-06, "loss": 0.0842, "step": 83260 }, { "epoch": 2.4625894599869875, "grad_norm": 0.628749668598175, "learning_rate": 3.2289586329997316e-06, "loss": 0.085, "step": 83270 }, { "epoch": 2.4628851954811615, "grad_norm": 1.1016560792922974, "learning_rate": 3.2288319430807924e-06, "loss": 0.0764, "step": 83280 }, { "epoch": 2.4631809309753354, "grad_norm": 0.7681671380996704, "learning_rate": 3.2287052531618527e-06, "loss": 0.0858, "step": 83290 }, { "epoch": 2.46347666646951, "grad_norm": 0.9216282367706299, "learning_rate": 3.2285785632429135e-06, "loss": 0.0787, "step": 83300 }, { "epoch": 2.4637724019636837, "grad_norm": 0.7411771416664124, "learning_rate": 3.228451873323974e-06, "loss": 0.0807, "step": 83310 }, { "epoch": 2.4640681374578577, "grad_norm": 0.6298080086708069, "learning_rate": 3.2283251834050347e-06, "loss": 0.0625, "step": 83320 }, { "epoch": 2.4643638729520316, "grad_norm": 0.8587992787361145, "learning_rate": 3.228198493486095e-06, "loss": 0.0652, "step": 83330 }, { "epoch": 2.4646596084462056, "grad_norm": 0.8591896295547485, "learning_rate": 3.2280718035671563e-06, "loss": 0.1079, "step": 83340 }, { "epoch": 2.46495534394038, "grad_norm": 1.1890459060668945, "learning_rate": 3.2279451136482166e-06, "loss": 0.0796, "step": 83350 }, { "epoch": 2.465251079434554, "grad_norm": 0.7207399010658264, "learning_rate": 3.2278184237292774e-06, "loss": 0.0827, "step": 83360 }, { "epoch": 2.465546814928728, "grad_norm": 0.5816358923912048, "learning_rate": 3.227691733810338e-06, "loss": 0.0619, "step": 83370 }, { "epoch": 2.465842550422902, "grad_norm": 0.665778636932373, "learning_rate": 3.2275650438913986e-06, "loss": 0.0666, "step": 83380 }, { "epoch": 2.4661382859170757, "grad_norm": 1.1238681077957153, "learning_rate": 3.227438353972459e-06, "loss": 0.0804, "step": 83390 }, { "epoch": 2.4664340214112497, "grad_norm": 0.5591815710067749, "learning_rate": 3.2273116640535197e-06, "loss": 0.0655, "step": 83400 }, { "epoch": 2.4667297569054236, "grad_norm": 0.5316219329833984, "learning_rate": 3.22718497413458e-06, "loss": 0.0837, "step": 83410 }, { "epoch": 2.467025492399598, "grad_norm": 0.8389546275138855, "learning_rate": 3.2270582842156413e-06, "loss": 0.0651, "step": 83420 }, { "epoch": 2.467321227893772, "grad_norm": 0.741244375705719, "learning_rate": 3.2269315942967017e-06, "loss": 0.0593, "step": 83430 }, { "epoch": 2.467616963387946, "grad_norm": 0.8338624835014343, "learning_rate": 3.2268049043777625e-06, "loss": 0.0981, "step": 83440 }, { "epoch": 2.46791269888212, "grad_norm": 0.6031546592712402, "learning_rate": 3.226678214458823e-06, "loss": 0.0822, "step": 83450 }, { "epoch": 2.468208434376294, "grad_norm": 0.8132814168930054, "learning_rate": 3.2265515245398836e-06, "loss": 0.0923, "step": 83460 }, { "epoch": 2.4685041698704677, "grad_norm": 0.32729992270469666, "learning_rate": 3.226424834620944e-06, "loss": 0.0679, "step": 83470 }, { "epoch": 2.4687999053646417, "grad_norm": 0.6074833273887634, "learning_rate": 3.2262981447020048e-06, "loss": 0.0695, "step": 83480 }, { "epoch": 2.469095640858816, "grad_norm": 0.6765990257263184, "learning_rate": 3.226171454783065e-06, "loss": 0.072, "step": 83490 }, { "epoch": 2.46939137635299, "grad_norm": 0.5733281373977661, "learning_rate": 3.2260447648641264e-06, "loss": 0.0812, "step": 83500 }, { "epoch": 2.469687111847164, "grad_norm": 0.4912799596786499, "learning_rate": 3.2259180749451867e-06, "loss": 0.0738, "step": 83510 }, { "epoch": 2.469982847341338, "grad_norm": 0.8641477227210999, "learning_rate": 3.2257913850262475e-06, "loss": 0.0803, "step": 83520 }, { "epoch": 2.470278582835512, "grad_norm": 0.9724145531654358, "learning_rate": 3.225664695107308e-06, "loss": 0.0771, "step": 83530 }, { "epoch": 2.4705743183296858, "grad_norm": 0.7006354331970215, "learning_rate": 3.2255380051883687e-06, "loss": 0.0647, "step": 83540 }, { "epoch": 2.4708700538238597, "grad_norm": 1.3236424922943115, "learning_rate": 3.225411315269429e-06, "loss": 0.0845, "step": 83550 }, { "epoch": 2.471165789318034, "grad_norm": 0.813883364200592, "learning_rate": 3.22528462535049e-06, "loss": 0.0945, "step": 83560 }, { "epoch": 2.471461524812208, "grad_norm": 0.7771551012992859, "learning_rate": 3.22515793543155e-06, "loss": 0.0896, "step": 83570 }, { "epoch": 2.471757260306382, "grad_norm": 0.6079134941101074, "learning_rate": 3.2250312455126114e-06, "loss": 0.0587, "step": 83580 }, { "epoch": 2.472052995800556, "grad_norm": 1.0796220302581787, "learning_rate": 3.2249045555936718e-06, "loss": 0.0915, "step": 83590 }, { "epoch": 2.47234873129473, "grad_norm": 0.8638812303543091, "learning_rate": 3.2247778656747326e-06, "loss": 0.0728, "step": 83600 }, { "epoch": 2.472644466788904, "grad_norm": 1.080203652381897, "learning_rate": 3.224651175755793e-06, "loss": 0.0763, "step": 83610 }, { "epoch": 2.472940202283078, "grad_norm": 0.5809884667396545, "learning_rate": 3.2245244858368537e-06, "loss": 0.0728, "step": 83620 }, { "epoch": 2.473235937777252, "grad_norm": 1.085080623626709, "learning_rate": 3.224397795917914e-06, "loss": 0.0747, "step": 83630 }, { "epoch": 2.473531673271426, "grad_norm": 0.667704164981842, "learning_rate": 3.224271105998975e-06, "loss": 0.0973, "step": 83640 }, { "epoch": 2.4738274087656, "grad_norm": 0.7144431471824646, "learning_rate": 3.2241444160800352e-06, "loss": 0.0816, "step": 83650 }, { "epoch": 2.474123144259774, "grad_norm": 0.6177486181259155, "learning_rate": 3.2240177261610965e-06, "loss": 0.061, "step": 83660 }, { "epoch": 2.474418879753948, "grad_norm": 0.774161696434021, "learning_rate": 3.223891036242157e-06, "loss": 0.0794, "step": 83670 }, { "epoch": 2.4747146152481223, "grad_norm": 1.0344305038452148, "learning_rate": 3.2237643463232176e-06, "loss": 0.0693, "step": 83680 }, { "epoch": 2.4750103507422963, "grad_norm": 0.7076760530471802, "learning_rate": 3.223637656404278e-06, "loss": 0.0965, "step": 83690 }, { "epoch": 2.47530608623647, "grad_norm": 1.0338932275772095, "learning_rate": 3.2235109664853383e-06, "loss": 0.0753, "step": 83700 }, { "epoch": 2.475601821730644, "grad_norm": 1.0085045099258423, "learning_rate": 3.223384276566399e-06, "loss": 0.0838, "step": 83710 }, { "epoch": 2.475897557224818, "grad_norm": 0.7827909588813782, "learning_rate": 3.2232575866474595e-06, "loss": 0.0782, "step": 83720 }, { "epoch": 2.476193292718992, "grad_norm": 1.035019874572754, "learning_rate": 3.2231308967285203e-06, "loss": 0.0807, "step": 83730 }, { "epoch": 2.476489028213166, "grad_norm": 0.655661404132843, "learning_rate": 3.223004206809581e-06, "loss": 0.0937, "step": 83740 }, { "epoch": 2.4767847637073404, "grad_norm": 1.502591848373413, "learning_rate": 3.222877516890642e-06, "loss": 0.0927, "step": 83750 }, { "epoch": 2.4770804992015143, "grad_norm": 0.7246002554893494, "learning_rate": 3.2227508269717022e-06, "loss": 0.0915, "step": 83760 }, { "epoch": 2.4773762346956882, "grad_norm": 0.5412653684616089, "learning_rate": 3.222624137052763e-06, "loss": 0.053, "step": 83770 }, { "epoch": 2.477671970189862, "grad_norm": 1.7128169536590576, "learning_rate": 3.2224974471338234e-06, "loss": 0.0627, "step": 83780 }, { "epoch": 2.477967705684036, "grad_norm": 0.851192831993103, "learning_rate": 3.222370757214884e-06, "loss": 0.0919, "step": 83790 }, { "epoch": 2.47826344117821, "grad_norm": 0.6039738059043884, "learning_rate": 3.2222440672959445e-06, "loss": 0.0731, "step": 83800 }, { "epoch": 2.478559176672384, "grad_norm": 0.444166898727417, "learning_rate": 3.2221173773770053e-06, "loss": 0.0666, "step": 83810 }, { "epoch": 2.4788549121665584, "grad_norm": 0.8505227565765381, "learning_rate": 3.221990687458066e-06, "loss": 0.0628, "step": 83820 }, { "epoch": 2.4791506476607323, "grad_norm": 0.9757848978042603, "learning_rate": 3.221863997539127e-06, "loss": 0.0684, "step": 83830 }, { "epoch": 2.4794463831549063, "grad_norm": 0.7604394555091858, "learning_rate": 3.2217373076201873e-06, "loss": 0.1021, "step": 83840 }, { "epoch": 2.4797421186490802, "grad_norm": 0.8908443450927734, "learning_rate": 3.221610617701248e-06, "loss": 0.0897, "step": 83850 }, { "epoch": 2.480037854143254, "grad_norm": 0.8514789938926697, "learning_rate": 3.2214839277823084e-06, "loss": 0.0737, "step": 83860 }, { "epoch": 2.480333589637428, "grad_norm": 1.5438544750213623, "learning_rate": 3.2213572378633692e-06, "loss": 0.0742, "step": 83870 }, { "epoch": 2.480629325131602, "grad_norm": 0.6348989009857178, "learning_rate": 3.2212305479444296e-06, "loss": 0.0697, "step": 83880 }, { "epoch": 2.4809250606257764, "grad_norm": 0.6425968408584595, "learning_rate": 3.2211038580254904e-06, "loss": 0.0795, "step": 83890 }, { "epoch": 2.4812207961199504, "grad_norm": 1.1201916933059692, "learning_rate": 3.220977168106551e-06, "loss": 0.0796, "step": 83900 }, { "epoch": 2.4815165316141243, "grad_norm": 0.6098557710647583, "learning_rate": 3.220850478187612e-06, "loss": 0.0786, "step": 83910 }, { "epoch": 2.4818122671082983, "grad_norm": 0.6942582726478577, "learning_rate": 3.2207237882686723e-06, "loss": 0.0687, "step": 83920 }, { "epoch": 2.482108002602472, "grad_norm": 0.8323404788970947, "learning_rate": 3.220597098349733e-06, "loss": 0.0845, "step": 83930 }, { "epoch": 2.4824037380966466, "grad_norm": 1.377880573272705, "learning_rate": 3.2204704084307935e-06, "loss": 0.095, "step": 83940 }, { "epoch": 2.4826994735908205, "grad_norm": 0.9624540209770203, "learning_rate": 3.2203437185118543e-06, "loss": 0.0823, "step": 83950 }, { "epoch": 2.4829952090849945, "grad_norm": 0.5359442234039307, "learning_rate": 3.2202170285929146e-06, "loss": 0.0633, "step": 83960 }, { "epoch": 2.4832909445791684, "grad_norm": 0.701255202293396, "learning_rate": 3.2200903386739754e-06, "loss": 0.0785, "step": 83970 }, { "epoch": 2.4835866800733424, "grad_norm": 0.699236273765564, "learning_rate": 3.219963648755036e-06, "loss": 0.068, "step": 83980 }, { "epoch": 2.4838824155675163, "grad_norm": 0.8721308708190918, "learning_rate": 3.219836958836097e-06, "loss": 0.0863, "step": 83990 }, { "epoch": 2.4841781510616903, "grad_norm": 0.4303683936595917, "learning_rate": 3.2197102689171574e-06, "loss": 0.0736, "step": 84000 }, { "epoch": 2.4844738865558647, "grad_norm": 0.9224768280982971, "learning_rate": 3.219583578998218e-06, "loss": 0.0742, "step": 84010 }, { "epoch": 2.4847696220500386, "grad_norm": 0.8345910906791687, "learning_rate": 3.2194568890792785e-06, "loss": 0.0806, "step": 84020 }, { "epoch": 2.4850653575442125, "grad_norm": 0.7609907388687134, "learning_rate": 3.2193301991603393e-06, "loss": 0.0802, "step": 84030 }, { "epoch": 2.4853610930383865, "grad_norm": 0.6194797158241272, "learning_rate": 3.2192035092413997e-06, "loss": 0.0901, "step": 84040 }, { "epoch": 2.4856568285325604, "grad_norm": 1.1709282398223877, "learning_rate": 3.2190768193224605e-06, "loss": 0.0768, "step": 84050 }, { "epoch": 2.4859525640267344, "grad_norm": 1.1877779960632324, "learning_rate": 3.2189501294035213e-06, "loss": 0.0812, "step": 84060 }, { "epoch": 2.4862482995209083, "grad_norm": 0.9776909947395325, "learning_rate": 3.218823439484582e-06, "loss": 0.062, "step": 84070 }, { "epoch": 2.4865440350150827, "grad_norm": 0.6183986663818359, "learning_rate": 3.2186967495656424e-06, "loss": 0.0617, "step": 84080 }, { "epoch": 2.4868397705092566, "grad_norm": 0.6185476779937744, "learning_rate": 3.218570059646703e-06, "loss": 0.0776, "step": 84090 }, { "epoch": 2.4871355060034306, "grad_norm": 0.5743611454963684, "learning_rate": 3.2184433697277636e-06, "loss": 0.0917, "step": 84100 }, { "epoch": 2.4874312414976045, "grad_norm": 0.6571003794670105, "learning_rate": 3.218316679808824e-06, "loss": 0.0786, "step": 84110 }, { "epoch": 2.4877269769917785, "grad_norm": 0.8711639046669006, "learning_rate": 3.2181899898898847e-06, "loss": 0.0868, "step": 84120 }, { "epoch": 2.4880227124859524, "grad_norm": 0.5053685903549194, "learning_rate": 3.218063299970945e-06, "loss": 0.0592, "step": 84130 }, { "epoch": 2.4883184479801264, "grad_norm": 0.8521684408187866, "learning_rate": 3.2179366100520063e-06, "loss": 0.0877, "step": 84140 }, { "epoch": 2.4886141834743007, "grad_norm": 0.9262905716896057, "learning_rate": 3.2178099201330667e-06, "loss": 0.0745, "step": 84150 }, { "epoch": 2.4889099189684747, "grad_norm": 0.759403645992279, "learning_rate": 3.2176832302141275e-06, "loss": 0.0763, "step": 84160 }, { "epoch": 2.4892056544626486, "grad_norm": 1.091853380203247, "learning_rate": 3.217556540295188e-06, "loss": 0.073, "step": 84170 }, { "epoch": 2.4895013899568226, "grad_norm": 0.8060718178749084, "learning_rate": 3.2174298503762486e-06, "loss": 0.0708, "step": 84180 }, { "epoch": 2.4897971254509965, "grad_norm": 0.6887248754501343, "learning_rate": 3.217303160457309e-06, "loss": 0.0796, "step": 84190 }, { "epoch": 2.4900928609451705, "grad_norm": 0.8835276365280151, "learning_rate": 3.2171764705383698e-06, "loss": 0.0852, "step": 84200 }, { "epoch": 2.4903885964393444, "grad_norm": 0.7686653733253479, "learning_rate": 3.21704978061943e-06, "loss": 0.0821, "step": 84210 }, { "epoch": 2.490684331933519, "grad_norm": 1.0123032331466675, "learning_rate": 3.2169230907004913e-06, "loss": 0.0774, "step": 84220 }, { "epoch": 2.4909800674276927, "grad_norm": 0.8623606562614441, "learning_rate": 3.2167964007815517e-06, "loss": 0.0564, "step": 84230 }, { "epoch": 2.4912758029218667, "grad_norm": 0.8008431792259216, "learning_rate": 3.2166697108626125e-06, "loss": 0.0845, "step": 84240 }, { "epoch": 2.4915715384160406, "grad_norm": 1.3033450841903687, "learning_rate": 3.216543020943673e-06, "loss": 0.0713, "step": 84250 }, { "epoch": 2.4918672739102146, "grad_norm": 0.3767617344856262, "learning_rate": 3.2164163310247337e-06, "loss": 0.0692, "step": 84260 }, { "epoch": 2.492163009404389, "grad_norm": 0.9401673674583435, "learning_rate": 3.216289641105794e-06, "loss": 0.0708, "step": 84270 }, { "epoch": 2.492458744898563, "grad_norm": 0.8844475746154785, "learning_rate": 3.216162951186855e-06, "loss": 0.0707, "step": 84280 }, { "epoch": 2.492754480392737, "grad_norm": 1.3920540809631348, "learning_rate": 3.216036261267915e-06, "loss": 0.0837, "step": 84290 }, { "epoch": 2.493050215886911, "grad_norm": 0.8164139986038208, "learning_rate": 3.2159095713489764e-06, "loss": 0.087, "step": 84300 }, { "epoch": 2.4933459513810847, "grad_norm": 1.121549367904663, "learning_rate": 3.2157828814300368e-06, "loss": 0.072, "step": 84310 }, { "epoch": 2.4936416868752587, "grad_norm": 0.9259976744651794, "learning_rate": 3.2156561915110975e-06, "loss": 0.0575, "step": 84320 }, { "epoch": 2.4939374223694326, "grad_norm": 1.0308042764663696, "learning_rate": 3.215529501592158e-06, "loss": 0.0788, "step": 84330 }, { "epoch": 2.494233157863607, "grad_norm": 0.9306319355964661, "learning_rate": 3.2154028116732187e-06, "loss": 0.1032, "step": 84340 }, { "epoch": 2.494528893357781, "grad_norm": 1.0952398777008057, "learning_rate": 3.215276121754279e-06, "loss": 0.0814, "step": 84350 }, { "epoch": 2.494824628851955, "grad_norm": 0.7773139476776123, "learning_rate": 3.21514943183534e-06, "loss": 0.096, "step": 84360 }, { "epoch": 2.495120364346129, "grad_norm": 0.6857804656028748, "learning_rate": 3.2150227419164002e-06, "loss": 0.0676, "step": 84370 }, { "epoch": 2.4954160998403028, "grad_norm": 1.1163322925567627, "learning_rate": 3.2148960519974614e-06, "loss": 0.0904, "step": 84380 }, { "epoch": 2.4957118353344767, "grad_norm": 0.9653829336166382, "learning_rate": 3.214769362078522e-06, "loss": 0.0787, "step": 84390 }, { "epoch": 2.4960075708286507, "grad_norm": 1.516319990158081, "learning_rate": 3.2146426721595826e-06, "loss": 0.0876, "step": 84400 }, { "epoch": 2.496303306322825, "grad_norm": 1.1647299528121948, "learning_rate": 3.214515982240643e-06, "loss": 0.0568, "step": 84410 }, { "epoch": 2.496599041816999, "grad_norm": 0.6666610836982727, "learning_rate": 3.2143892923217037e-06, "loss": 0.0838, "step": 84420 }, { "epoch": 2.496894777311173, "grad_norm": 0.7787046432495117, "learning_rate": 3.214262602402764e-06, "loss": 0.0941, "step": 84430 }, { "epoch": 2.497190512805347, "grad_norm": 0.8677696585655212, "learning_rate": 3.214135912483825e-06, "loss": 0.0841, "step": 84440 }, { "epoch": 2.497486248299521, "grad_norm": 0.7637228965759277, "learning_rate": 3.2140092225648853e-06, "loss": 0.0724, "step": 84450 }, { "epoch": 2.4977819837936948, "grad_norm": 0.717964231967926, "learning_rate": 3.2138825326459465e-06, "loss": 0.0786, "step": 84460 }, { "epoch": 2.4980777192878687, "grad_norm": 1.0373412370681763, "learning_rate": 3.213755842727007e-06, "loss": 0.0747, "step": 84470 }, { "epoch": 2.498373454782043, "grad_norm": 0.6356706619262695, "learning_rate": 3.2136291528080676e-06, "loss": 0.0601, "step": 84480 }, { "epoch": 2.498669190276217, "grad_norm": 0.7453232407569885, "learning_rate": 3.213502462889128e-06, "loss": 0.0871, "step": 84490 }, { "epoch": 2.498964925770391, "grad_norm": 0.47732555866241455, "learning_rate": 3.213375772970189e-06, "loss": 0.0834, "step": 84500 }, { "epoch": 2.499260661264565, "grad_norm": 0.7503336071968079, "learning_rate": 3.213249083051249e-06, "loss": 0.1155, "step": 84510 }, { "epoch": 2.499556396758739, "grad_norm": 0.5846684575080872, "learning_rate": 3.2131223931323095e-06, "loss": 0.0698, "step": 84520 }, { "epoch": 2.499852132252913, "grad_norm": 0.6212148666381836, "learning_rate": 3.2129957032133703e-06, "loss": 0.0577, "step": 84530 }, { "epoch": 2.5001478677470867, "grad_norm": 0.5380394458770752, "learning_rate": 3.212869013294431e-06, "loss": 0.0778, "step": 84540 }, { "epoch": 2.500443603241261, "grad_norm": 0.8989628553390503, "learning_rate": 3.212742323375492e-06, "loss": 0.0903, "step": 84550 }, { "epoch": 2.500739338735435, "grad_norm": 0.6042037606239319, "learning_rate": 3.2126156334565523e-06, "loss": 0.0963, "step": 84560 }, { "epoch": 2.501035074229609, "grad_norm": 0.8361038565635681, "learning_rate": 3.212488943537613e-06, "loss": 0.0756, "step": 84570 }, { "epoch": 2.501330809723783, "grad_norm": 0.7125259041786194, "learning_rate": 3.2123622536186734e-06, "loss": 0.081, "step": 84580 }, { "epoch": 2.501626545217957, "grad_norm": 0.8631741404533386, "learning_rate": 3.212235563699734e-06, "loss": 0.1067, "step": 84590 }, { "epoch": 2.5019222807121313, "grad_norm": 0.6932222247123718, "learning_rate": 3.2121088737807946e-06, "loss": 0.0881, "step": 84600 }, { "epoch": 2.5022180162063052, "grad_norm": 1.135852336883545, "learning_rate": 3.2119821838618554e-06, "loss": 0.0789, "step": 84610 }, { "epoch": 2.502513751700479, "grad_norm": 0.5234857201576233, "learning_rate": 3.211855493942916e-06, "loss": 0.0715, "step": 84620 }, { "epoch": 2.502809487194653, "grad_norm": 0.7982685565948486, "learning_rate": 3.211728804023977e-06, "loss": 0.0641, "step": 84630 }, { "epoch": 2.503105222688827, "grad_norm": 0.8194012641906738, "learning_rate": 3.2116021141050373e-06, "loss": 0.0958, "step": 84640 }, { "epoch": 2.503400958183001, "grad_norm": 0.6749634146690369, "learning_rate": 3.211475424186098e-06, "loss": 0.0844, "step": 84650 }, { "epoch": 2.503696693677175, "grad_norm": 0.5777822732925415, "learning_rate": 3.2113487342671585e-06, "loss": 0.0894, "step": 84660 }, { "epoch": 2.5039924291713493, "grad_norm": 0.882470965385437, "learning_rate": 3.2112220443482192e-06, "loss": 0.0763, "step": 84670 }, { "epoch": 2.5042881646655233, "grad_norm": 0.6006553173065186, "learning_rate": 3.2110953544292796e-06, "loss": 0.0636, "step": 84680 }, { "epoch": 2.5045839001596972, "grad_norm": 0.6827069520950317, "learning_rate": 3.2109686645103404e-06, "loss": 0.085, "step": 84690 }, { "epoch": 2.504879635653871, "grad_norm": 0.743998110294342, "learning_rate": 3.210841974591401e-06, "loss": 0.0784, "step": 84700 }, { "epoch": 2.505175371148045, "grad_norm": 0.9014074802398682, "learning_rate": 3.210715284672462e-06, "loss": 0.072, "step": 84710 }, { "epoch": 2.505471106642219, "grad_norm": 0.7459322214126587, "learning_rate": 3.2105885947535223e-06, "loss": 0.0561, "step": 84720 }, { "epoch": 2.505766842136393, "grad_norm": 0.7114390730857849, "learning_rate": 3.210461904834583e-06, "loss": 0.0589, "step": 84730 }, { "epoch": 2.5060625776305674, "grad_norm": 0.8287791609764099, "learning_rate": 3.2103352149156435e-06, "loss": 0.0933, "step": 84740 }, { "epoch": 2.5063583131247413, "grad_norm": 0.9255901575088501, "learning_rate": 3.2102085249967043e-06, "loss": 0.0974, "step": 84750 }, { "epoch": 2.5066540486189153, "grad_norm": 0.6617668867111206, "learning_rate": 3.2100818350777647e-06, "loss": 0.0794, "step": 84760 }, { "epoch": 2.506949784113089, "grad_norm": 0.5642827153205872, "learning_rate": 3.2099551451588255e-06, "loss": 0.0688, "step": 84770 }, { "epoch": 2.507245519607263, "grad_norm": 0.8169763088226318, "learning_rate": 3.2098284552398862e-06, "loss": 0.067, "step": 84780 }, { "epoch": 2.5075412551014375, "grad_norm": 0.8654688596725464, "learning_rate": 3.209701765320947e-06, "loss": 0.1134, "step": 84790 }, { "epoch": 2.507836990595611, "grad_norm": 0.42570140957832336, "learning_rate": 3.2095750754020074e-06, "loss": 0.0715, "step": 84800 }, { "epoch": 2.5081327260897854, "grad_norm": 0.58061283826828, "learning_rate": 3.209448385483068e-06, "loss": 0.0762, "step": 84810 }, { "epoch": 2.5084284615839594, "grad_norm": 0.5309928059577942, "learning_rate": 3.2093216955641286e-06, "loss": 0.0536, "step": 84820 }, { "epoch": 2.5087241970781333, "grad_norm": 0.6114233732223511, "learning_rate": 3.2091950056451893e-06, "loss": 0.0597, "step": 84830 }, { "epoch": 2.5090199325723073, "grad_norm": 1.2496262788772583, "learning_rate": 3.2090683157262497e-06, "loss": 0.0864, "step": 84840 }, { "epoch": 2.509315668066481, "grad_norm": 0.7244722843170166, "learning_rate": 3.2089416258073105e-06, "loss": 0.0875, "step": 84850 }, { "epoch": 2.5096114035606556, "grad_norm": 0.6119932532310486, "learning_rate": 3.2088149358883713e-06, "loss": 0.073, "step": 84860 }, { "epoch": 2.509907139054829, "grad_norm": 0.6507854461669922, "learning_rate": 3.208688245969432e-06, "loss": 0.0554, "step": 84870 }, { "epoch": 2.5102028745490035, "grad_norm": 1.3555188179016113, "learning_rate": 3.2085615560504924e-06, "loss": 0.0761, "step": 84880 }, { "epoch": 2.5104986100431774, "grad_norm": 0.6736340522766113, "learning_rate": 3.2084348661315532e-06, "loss": 0.0974, "step": 84890 }, { "epoch": 2.5107943455373514, "grad_norm": 0.9089305400848389, "learning_rate": 3.2083081762126136e-06, "loss": 0.0921, "step": 84900 }, { "epoch": 2.5110900810315253, "grad_norm": 0.6375586986541748, "learning_rate": 3.2081814862936744e-06, "loss": 0.0872, "step": 84910 }, { "epoch": 2.5113858165256993, "grad_norm": 0.8548231720924377, "learning_rate": 3.2080547963747348e-06, "loss": 0.0728, "step": 84920 }, { "epoch": 2.5116815520198736, "grad_norm": 0.9533647298812866, "learning_rate": 3.207928106455795e-06, "loss": 0.0769, "step": 84930 }, { "epoch": 2.5119772875140476, "grad_norm": 0.8030642867088318, "learning_rate": 3.2078014165368563e-06, "loss": 0.0909, "step": 84940 }, { "epoch": 2.5122730230082215, "grad_norm": 1.0546441078186035, "learning_rate": 3.2076747266179167e-06, "loss": 0.104, "step": 84950 }, { "epoch": 2.5125687585023955, "grad_norm": 0.5589042901992798, "learning_rate": 3.2075480366989775e-06, "loss": 0.0608, "step": 84960 }, { "epoch": 2.5128644939965694, "grad_norm": 0.9067904949188232, "learning_rate": 3.207421346780038e-06, "loss": 0.072, "step": 84970 }, { "epoch": 2.5131602294907434, "grad_norm": 1.0229613780975342, "learning_rate": 3.2072946568610986e-06, "loss": 0.0829, "step": 84980 }, { "epoch": 2.5134559649849173, "grad_norm": 0.8443976640701294, "learning_rate": 3.207167966942159e-06, "loss": 0.0825, "step": 84990 }, { "epoch": 2.5137517004790917, "grad_norm": 0.9045760631561279, "learning_rate": 3.20704127702322e-06, "loss": 0.0942, "step": 85000 }, { "epoch": 2.5140474359732656, "grad_norm": 0.42549023032188416, "learning_rate": 3.20691458710428e-06, "loss": 0.0671, "step": 85010 }, { "epoch": 2.5143431714674396, "grad_norm": 0.8506447076797485, "learning_rate": 3.2067878971853414e-06, "loss": 0.0756, "step": 85020 }, { "epoch": 2.5146389069616135, "grad_norm": 0.4759618043899536, "learning_rate": 3.2066612072664017e-06, "loss": 0.0673, "step": 85030 }, { "epoch": 2.5149346424557875, "grad_norm": 0.8303056955337524, "learning_rate": 3.2065345173474625e-06, "loss": 0.072, "step": 85040 }, { "epoch": 2.5152303779499614, "grad_norm": 0.5661473870277405, "learning_rate": 3.206407827428523e-06, "loss": 0.0816, "step": 85050 }, { "epoch": 2.5155261134441353, "grad_norm": 0.3485085964202881, "learning_rate": 3.2062811375095837e-06, "loss": 0.0672, "step": 85060 }, { "epoch": 2.5158218489383097, "grad_norm": 0.7324588298797607, "learning_rate": 3.206154447590644e-06, "loss": 0.0792, "step": 85070 }, { "epoch": 2.5161175844324837, "grad_norm": 1.0073310136795044, "learning_rate": 3.206027757671705e-06, "loss": 0.0657, "step": 85080 }, { "epoch": 2.5164133199266576, "grad_norm": 0.5612668395042419, "learning_rate": 3.205901067752765e-06, "loss": 0.0963, "step": 85090 }, { "epoch": 2.5167090554208316, "grad_norm": 0.669468104839325, "learning_rate": 3.2057743778338264e-06, "loss": 0.0787, "step": 85100 }, { "epoch": 2.5170047909150055, "grad_norm": 0.6034017205238342, "learning_rate": 3.2056476879148868e-06, "loss": 0.0721, "step": 85110 }, { "epoch": 2.51730052640918, "grad_norm": 0.8306761384010315, "learning_rate": 3.2055209979959476e-06, "loss": 0.0613, "step": 85120 }, { "epoch": 2.5175962619033534, "grad_norm": 0.8776229023933411, "learning_rate": 3.205394308077008e-06, "loss": 0.0754, "step": 85130 }, { "epoch": 2.517891997397528, "grad_norm": 0.5656031966209412, "learning_rate": 3.2052676181580687e-06, "loss": 0.0892, "step": 85140 }, { "epoch": 2.5181877328917017, "grad_norm": 0.8921806216239929, "learning_rate": 3.205140928239129e-06, "loss": 0.0687, "step": 85150 }, { "epoch": 2.5184834683858757, "grad_norm": 0.9842779040336609, "learning_rate": 3.20501423832019e-06, "loss": 0.0847, "step": 85160 }, { "epoch": 2.5187792038800496, "grad_norm": 0.7696945071220398, "learning_rate": 3.2048875484012503e-06, "loss": 0.0819, "step": 85170 }, { "epoch": 2.5190749393742236, "grad_norm": 0.8071917295455933, "learning_rate": 3.2047608584823115e-06, "loss": 0.0562, "step": 85180 }, { "epoch": 2.519370674868398, "grad_norm": 1.2390576601028442, "learning_rate": 3.204634168563372e-06, "loss": 0.0905, "step": 85190 }, { "epoch": 2.5196664103625714, "grad_norm": 0.6766554713249207, "learning_rate": 3.2045074786444326e-06, "loss": 0.0681, "step": 85200 }, { "epoch": 2.519962145856746, "grad_norm": 0.9423398375511169, "learning_rate": 3.204380788725493e-06, "loss": 0.0904, "step": 85210 }, { "epoch": 2.5202578813509198, "grad_norm": 0.5699289441108704, "learning_rate": 3.2042540988065538e-06, "loss": 0.0675, "step": 85220 }, { "epoch": 2.5205536168450937, "grad_norm": 1.035190463066101, "learning_rate": 3.204127408887614e-06, "loss": 0.0836, "step": 85230 }, { "epoch": 2.5208493523392677, "grad_norm": 0.645814836025238, "learning_rate": 3.204000718968675e-06, "loss": 0.0836, "step": 85240 }, { "epoch": 2.5211450878334416, "grad_norm": 0.657623827457428, "learning_rate": 3.2038740290497353e-06, "loss": 0.0933, "step": 85250 }, { "epoch": 2.521440823327616, "grad_norm": 1.0975641012191772, "learning_rate": 3.2037473391307965e-06, "loss": 0.0731, "step": 85260 }, { "epoch": 2.52173655882179, "grad_norm": 0.5698392987251282, "learning_rate": 3.203620649211857e-06, "loss": 0.0665, "step": 85270 }, { "epoch": 2.522032294315964, "grad_norm": 0.8973016142845154, "learning_rate": 3.2034939592929177e-06, "loss": 0.0725, "step": 85280 }, { "epoch": 2.522328029810138, "grad_norm": 0.5439878106117249, "learning_rate": 3.203367269373978e-06, "loss": 0.0743, "step": 85290 }, { "epoch": 2.5226237653043118, "grad_norm": 0.5780395865440369, "learning_rate": 3.203240579455039e-06, "loss": 0.08, "step": 85300 }, { "epoch": 2.5229195007984857, "grad_norm": 1.0993016958236694, "learning_rate": 3.203113889536099e-06, "loss": 0.0771, "step": 85310 }, { "epoch": 2.5232152362926596, "grad_norm": 0.7139085531234741, "learning_rate": 3.20298719961716e-06, "loss": 0.0656, "step": 85320 }, { "epoch": 2.523510971786834, "grad_norm": 1.0436798334121704, "learning_rate": 3.2028605096982203e-06, "loss": 0.0642, "step": 85330 }, { "epoch": 2.523806707281008, "grad_norm": 0.6618252992630005, "learning_rate": 3.202733819779281e-06, "loss": 0.0975, "step": 85340 }, { "epoch": 2.524102442775182, "grad_norm": 0.5558339953422546, "learning_rate": 3.202607129860342e-06, "loss": 0.0828, "step": 85350 }, { "epoch": 2.524398178269356, "grad_norm": 0.7389269471168518, "learning_rate": 3.2024804399414023e-06, "loss": 0.0834, "step": 85360 }, { "epoch": 2.52469391376353, "grad_norm": 0.7598859071731567, "learning_rate": 3.202353750022463e-06, "loss": 0.0771, "step": 85370 }, { "epoch": 2.5249896492577037, "grad_norm": 0.9532144665718079, "learning_rate": 3.2022270601035234e-06, "loss": 0.0644, "step": 85380 }, { "epoch": 2.5252853847518777, "grad_norm": 0.8168230056762695, "learning_rate": 3.2021003701845842e-06, "loss": 0.0883, "step": 85390 }, { "epoch": 2.525581120246052, "grad_norm": 0.7258157134056091, "learning_rate": 3.2019736802656446e-06, "loss": 0.0764, "step": 85400 }, { "epoch": 2.525876855740226, "grad_norm": 0.5910842418670654, "learning_rate": 3.2018469903467054e-06, "loss": 0.0807, "step": 85410 }, { "epoch": 2.5261725912344, "grad_norm": 0.6403117179870605, "learning_rate": 3.201720300427766e-06, "loss": 0.0633, "step": 85420 }, { "epoch": 2.526468326728574, "grad_norm": 0.7290320992469788, "learning_rate": 3.201593610508827e-06, "loss": 0.0593, "step": 85430 }, { "epoch": 2.526764062222748, "grad_norm": 0.9475073218345642, "learning_rate": 3.2014669205898873e-06, "loss": 0.0841, "step": 85440 }, { "epoch": 2.5270597977169222, "grad_norm": 0.4909643530845642, "learning_rate": 3.201340230670948e-06, "loss": 0.0742, "step": 85450 }, { "epoch": 2.5273555332110957, "grad_norm": 1.208093523979187, "learning_rate": 3.2012135407520085e-06, "loss": 0.0607, "step": 85460 }, { "epoch": 2.52765126870527, "grad_norm": 0.687344491481781, "learning_rate": 3.2010868508330693e-06, "loss": 0.0659, "step": 85470 }, { "epoch": 2.527947004199444, "grad_norm": 0.8538145422935486, "learning_rate": 3.2009601609141296e-06, "loss": 0.0724, "step": 85480 }, { "epoch": 2.528242739693618, "grad_norm": 0.7408214211463928, "learning_rate": 3.2008334709951904e-06, "loss": 0.0834, "step": 85490 }, { "epoch": 2.528538475187792, "grad_norm": 0.4190329313278198, "learning_rate": 3.2007067810762512e-06, "loss": 0.0852, "step": 85500 }, { "epoch": 2.528834210681966, "grad_norm": 0.6741976737976074, "learning_rate": 3.200580091157312e-06, "loss": 0.0723, "step": 85510 }, { "epoch": 2.5291299461761403, "grad_norm": 0.7975922226905823, "learning_rate": 3.2004534012383724e-06, "loss": 0.0602, "step": 85520 }, { "epoch": 2.5294256816703142, "grad_norm": 0.7085968852043152, "learning_rate": 3.200326711319433e-06, "loss": 0.0726, "step": 85530 }, { "epoch": 2.529721417164488, "grad_norm": 1.0131311416625977, "learning_rate": 3.2002000214004935e-06, "loss": 0.0866, "step": 85540 }, { "epoch": 2.530017152658662, "grad_norm": 1.5166233777999878, "learning_rate": 3.2000733314815543e-06, "loss": 0.093, "step": 85550 }, { "epoch": 2.530312888152836, "grad_norm": 0.6548185348510742, "learning_rate": 3.1999466415626147e-06, "loss": 0.0698, "step": 85560 }, { "epoch": 2.53060862364701, "grad_norm": 0.8342153429985046, "learning_rate": 3.1998199516436755e-06, "loss": 0.0586, "step": 85570 }, { "epoch": 2.530904359141184, "grad_norm": 1.1232445240020752, "learning_rate": 3.1996932617247363e-06, "loss": 0.0618, "step": 85580 }, { "epoch": 2.5312000946353583, "grad_norm": 1.4461339712142944, "learning_rate": 3.199566571805797e-06, "loss": 0.0799, "step": 85590 }, { "epoch": 2.5314958301295323, "grad_norm": 0.4506567716598511, "learning_rate": 3.1994398818868574e-06, "loss": 0.0669, "step": 85600 }, { "epoch": 2.531791565623706, "grad_norm": 1.4470373392105103, "learning_rate": 3.1993131919679182e-06, "loss": 0.1002, "step": 85610 }, { "epoch": 2.53208730111788, "grad_norm": 0.735950231552124, "learning_rate": 3.1991865020489786e-06, "loss": 0.0824, "step": 85620 }, { "epoch": 2.532383036612054, "grad_norm": 0.9170495271682739, "learning_rate": 3.1990598121300394e-06, "loss": 0.0695, "step": 85630 }, { "epoch": 2.532678772106228, "grad_norm": 0.9266405701637268, "learning_rate": 3.1989331222110997e-06, "loss": 0.0971, "step": 85640 }, { "epoch": 2.532974507600402, "grad_norm": 0.9072824716567993, "learning_rate": 3.1988064322921605e-06, "loss": 0.0964, "step": 85650 }, { "epoch": 2.5332702430945764, "grad_norm": 0.5875149369239807, "learning_rate": 3.1986797423732213e-06, "loss": 0.0756, "step": 85660 }, { "epoch": 2.5335659785887503, "grad_norm": 0.5967398881912231, "learning_rate": 3.198553052454282e-06, "loss": 0.0794, "step": 85670 }, { "epoch": 2.5338617140829243, "grad_norm": 1.055375099182129, "learning_rate": 3.1984263625353425e-06, "loss": 0.0853, "step": 85680 }, { "epoch": 2.534157449577098, "grad_norm": 0.846748411655426, "learning_rate": 3.1982996726164033e-06, "loss": 0.0795, "step": 85690 }, { "epoch": 2.534453185071272, "grad_norm": 0.594505250453949, "learning_rate": 3.1981729826974636e-06, "loss": 0.0817, "step": 85700 }, { "epoch": 2.5347489205654465, "grad_norm": 0.7378668785095215, "learning_rate": 3.1980462927785244e-06, "loss": 0.072, "step": 85710 }, { "epoch": 2.53504465605962, "grad_norm": 1.0282098054885864, "learning_rate": 3.1979196028595848e-06, "loss": 0.0566, "step": 85720 }, { "epoch": 2.5353403915537944, "grad_norm": 1.2463016510009766, "learning_rate": 3.1977929129406456e-06, "loss": 0.0668, "step": 85730 }, { "epoch": 2.5356361270479684, "grad_norm": 0.481587290763855, "learning_rate": 3.1976662230217064e-06, "loss": 0.0939, "step": 85740 }, { "epoch": 2.5359318625421423, "grad_norm": 0.6219737529754639, "learning_rate": 3.1975395331027667e-06, "loss": 0.0674, "step": 85750 }, { "epoch": 2.5362275980363163, "grad_norm": 1.0923645496368408, "learning_rate": 3.1974128431838275e-06, "loss": 0.0756, "step": 85760 }, { "epoch": 2.53652333353049, "grad_norm": 0.704860270023346, "learning_rate": 3.197286153264888e-06, "loss": 0.0866, "step": 85770 }, { "epoch": 2.5368190690246646, "grad_norm": 0.706356406211853, "learning_rate": 3.1971594633459487e-06, "loss": 0.0673, "step": 85780 }, { "epoch": 2.537114804518838, "grad_norm": 0.7896486520767212, "learning_rate": 3.197032773427009e-06, "loss": 0.0946, "step": 85790 }, { "epoch": 2.5374105400130125, "grad_norm": 0.6577031016349792, "learning_rate": 3.19690608350807e-06, "loss": 0.078, "step": 85800 }, { "epoch": 2.5377062755071864, "grad_norm": 0.615749180316925, "learning_rate": 3.19677939358913e-06, "loss": 0.0808, "step": 85810 }, { "epoch": 2.5380020110013604, "grad_norm": 0.5593977570533752, "learning_rate": 3.1966527036701914e-06, "loss": 0.0562, "step": 85820 }, { "epoch": 2.5382977464955343, "grad_norm": 0.8392620086669922, "learning_rate": 3.1965260137512518e-06, "loss": 0.0799, "step": 85830 }, { "epoch": 2.5385934819897082, "grad_norm": 1.2402151823043823, "learning_rate": 3.1963993238323126e-06, "loss": 0.0902, "step": 85840 }, { "epoch": 2.5388892174838826, "grad_norm": 1.1035881042480469, "learning_rate": 3.196272633913373e-06, "loss": 0.0713, "step": 85850 }, { "epoch": 2.5391849529780566, "grad_norm": 0.8834604024887085, "learning_rate": 3.1961459439944337e-06, "loss": 0.0939, "step": 85860 }, { "epoch": 2.5394806884722305, "grad_norm": 0.7342343330383301, "learning_rate": 3.196019254075494e-06, "loss": 0.0618, "step": 85870 }, { "epoch": 2.5397764239664045, "grad_norm": 0.6904715299606323, "learning_rate": 3.195892564156555e-06, "loss": 0.0774, "step": 85880 }, { "epoch": 2.5400721594605784, "grad_norm": 0.4799315333366394, "learning_rate": 3.1957658742376152e-06, "loss": 0.083, "step": 85890 }, { "epoch": 2.5403678949547523, "grad_norm": 3.1345107555389404, "learning_rate": 3.1956391843186765e-06, "loss": 0.0808, "step": 85900 }, { "epoch": 2.5406636304489263, "grad_norm": 0.6212658286094666, "learning_rate": 3.195512494399737e-06, "loss": 0.0785, "step": 85910 }, { "epoch": 2.5409593659431007, "grad_norm": 1.3571213483810425, "learning_rate": 3.1953858044807976e-06, "loss": 0.0765, "step": 85920 }, { "epoch": 2.5412551014372746, "grad_norm": 0.7340943813323975, "learning_rate": 3.195259114561858e-06, "loss": 0.0647, "step": 85930 }, { "epoch": 2.5415508369314486, "grad_norm": 0.6296449899673462, "learning_rate": 3.1951324246429188e-06, "loss": 0.0951, "step": 85940 }, { "epoch": 2.5418465724256225, "grad_norm": 0.9354947805404663, "learning_rate": 3.195005734723979e-06, "loss": 0.0636, "step": 85950 }, { "epoch": 2.5421423079197965, "grad_norm": 0.9068058133125305, "learning_rate": 3.19487904480504e-06, "loss": 0.0796, "step": 85960 }, { "epoch": 2.5424380434139704, "grad_norm": 0.7905629873275757, "learning_rate": 3.1947523548861003e-06, "loss": 0.0758, "step": 85970 }, { "epoch": 2.5427337789081443, "grad_norm": 1.0252063274383545, "learning_rate": 3.1946256649671615e-06, "loss": 0.0694, "step": 85980 }, { "epoch": 2.5430295144023187, "grad_norm": 0.46910178661346436, "learning_rate": 3.194498975048222e-06, "loss": 0.0936, "step": 85990 }, { "epoch": 2.5433252498964927, "grad_norm": 0.6703379154205322, "learning_rate": 3.1943722851292827e-06, "loss": 0.0836, "step": 86000 }, { "epoch": 2.5436209853906666, "grad_norm": 0.7474024891853333, "learning_rate": 3.194245595210343e-06, "loss": 0.0674, "step": 86010 }, { "epoch": 2.5439167208848406, "grad_norm": 1.0142982006072998, "learning_rate": 3.194118905291404e-06, "loss": 0.077, "step": 86020 }, { "epoch": 2.5442124563790145, "grad_norm": 0.716092586517334, "learning_rate": 3.193992215372464e-06, "loss": 0.0505, "step": 86030 }, { "epoch": 2.544508191873189, "grad_norm": 0.6817883253097534, "learning_rate": 3.193865525453525e-06, "loss": 0.0915, "step": 86040 }, { "epoch": 2.5448039273673624, "grad_norm": 0.6404019594192505, "learning_rate": 3.1937388355345853e-06, "loss": 0.077, "step": 86050 }, { "epoch": 2.5450996628615368, "grad_norm": 0.6970333456993103, "learning_rate": 3.1936121456156465e-06, "loss": 0.0893, "step": 86060 }, { "epoch": 2.5453953983557107, "grad_norm": 0.8006481528282166, "learning_rate": 3.193485455696707e-06, "loss": 0.0813, "step": 86070 }, { "epoch": 2.5456911338498847, "grad_norm": 0.8413152098655701, "learning_rate": 3.1933587657777677e-06, "loss": 0.0651, "step": 86080 }, { "epoch": 2.5459868693440586, "grad_norm": 0.9984099864959717, "learning_rate": 3.193232075858828e-06, "loss": 0.0942, "step": 86090 }, { "epoch": 2.5462826048382325, "grad_norm": 1.200512409210205, "learning_rate": 3.193105385939889e-06, "loss": 0.0765, "step": 86100 }, { "epoch": 2.546578340332407, "grad_norm": 0.9880414605140686, "learning_rate": 3.1929786960209492e-06, "loss": 0.1026, "step": 86110 }, { "epoch": 2.5468740758265804, "grad_norm": 0.9615892767906189, "learning_rate": 3.19285200610201e-06, "loss": 0.0676, "step": 86120 }, { "epoch": 2.547169811320755, "grad_norm": 0.6197458505630493, "learning_rate": 3.1927253161830704e-06, "loss": 0.066, "step": 86130 }, { "epoch": 2.5474655468149288, "grad_norm": 0.9501551389694214, "learning_rate": 3.1925986262641316e-06, "loss": 0.0788, "step": 86140 }, { "epoch": 2.5477612823091027, "grad_norm": 0.8615494966506958, "learning_rate": 3.192471936345192e-06, "loss": 0.0793, "step": 86150 }, { "epoch": 2.5480570178032766, "grad_norm": 0.6155278086662292, "learning_rate": 3.1923452464262527e-06, "loss": 0.0797, "step": 86160 }, { "epoch": 2.5483527532974506, "grad_norm": 0.9819760918617249, "learning_rate": 3.192218556507313e-06, "loss": 0.0706, "step": 86170 }, { "epoch": 2.548648488791625, "grad_norm": 0.9852137565612793, "learning_rate": 3.1920918665883735e-06, "loss": 0.0684, "step": 86180 }, { "epoch": 2.548944224285799, "grad_norm": 0.690470278263092, "learning_rate": 3.1919651766694343e-06, "loss": 0.0861, "step": 86190 }, { "epoch": 2.549239959779973, "grad_norm": 0.993460476398468, "learning_rate": 3.1918384867504946e-06, "loss": 0.0887, "step": 86200 }, { "epoch": 2.549535695274147, "grad_norm": 0.46886610984802246, "learning_rate": 3.1917117968315554e-06, "loss": 0.0735, "step": 86210 }, { "epoch": 2.5498314307683208, "grad_norm": 1.0240870714187622, "learning_rate": 3.1915851069126158e-06, "loss": 0.0561, "step": 86220 }, { "epoch": 2.5501271662624947, "grad_norm": 0.59844970703125, "learning_rate": 3.191458416993677e-06, "loss": 0.0659, "step": 86230 }, { "epoch": 2.5504229017566686, "grad_norm": 0.7727851271629333, "learning_rate": 3.1913317270747374e-06, "loss": 0.0869, "step": 86240 }, { "epoch": 2.550718637250843, "grad_norm": 0.7272516489028931, "learning_rate": 3.191205037155798e-06, "loss": 0.0935, "step": 86250 }, { "epoch": 2.551014372745017, "grad_norm": 0.6655511856079102, "learning_rate": 3.1910783472368585e-06, "loss": 0.07, "step": 86260 }, { "epoch": 2.551310108239191, "grad_norm": 1.1418424844741821, "learning_rate": 3.1909516573179193e-06, "loss": 0.0752, "step": 86270 }, { "epoch": 2.551605843733365, "grad_norm": 0.6002304553985596, "learning_rate": 3.1908249673989797e-06, "loss": 0.0683, "step": 86280 }, { "epoch": 2.551901579227539, "grad_norm": 0.8957858085632324, "learning_rate": 3.1906982774800405e-06, "loss": 0.0862, "step": 86290 }, { "epoch": 2.5521973147217127, "grad_norm": 1.0816173553466797, "learning_rate": 3.190571587561101e-06, "loss": 0.0706, "step": 86300 }, { "epoch": 2.5524930502158867, "grad_norm": 0.7352268695831299, "learning_rate": 3.190444897642162e-06, "loss": 0.0847, "step": 86310 }, { "epoch": 2.552788785710061, "grad_norm": 0.8848100304603577, "learning_rate": 3.1903182077232224e-06, "loss": 0.0735, "step": 86320 }, { "epoch": 2.553084521204235, "grad_norm": 0.6988300681114197, "learning_rate": 3.190191517804283e-06, "loss": 0.0908, "step": 86330 }, { "epoch": 2.553380256698409, "grad_norm": 0.6175190210342407, "learning_rate": 3.1900648278853436e-06, "loss": 0.0828, "step": 86340 }, { "epoch": 2.553675992192583, "grad_norm": 0.556782603263855, "learning_rate": 3.1899381379664044e-06, "loss": 0.0864, "step": 86350 }, { "epoch": 2.553971727686757, "grad_norm": 0.7067630290985107, "learning_rate": 3.1898114480474647e-06, "loss": 0.0783, "step": 86360 }, { "epoch": 2.5542674631809312, "grad_norm": 0.419508695602417, "learning_rate": 3.1896847581285255e-06, "loss": 0.0508, "step": 86370 }, { "epoch": 2.5545631986751047, "grad_norm": 0.7754731178283691, "learning_rate": 3.189558068209586e-06, "loss": 0.0729, "step": 86380 }, { "epoch": 2.554858934169279, "grad_norm": 0.5805140733718872, "learning_rate": 3.189431378290647e-06, "loss": 0.0821, "step": 86390 }, { "epoch": 2.555154669663453, "grad_norm": 1.0486005544662476, "learning_rate": 3.1893046883717075e-06, "loss": 0.082, "step": 86400 }, { "epoch": 2.555450405157627, "grad_norm": 1.1047381162643433, "learning_rate": 3.1891779984527682e-06, "loss": 0.0727, "step": 86410 }, { "epoch": 2.555746140651801, "grad_norm": 0.3903489112854004, "learning_rate": 3.1890513085338286e-06, "loss": 0.0939, "step": 86420 }, { "epoch": 2.556041876145975, "grad_norm": 1.1250720024108887, "learning_rate": 3.1889246186148894e-06, "loss": 0.0686, "step": 86430 }, { "epoch": 2.5563376116401493, "grad_norm": 0.6335436701774597, "learning_rate": 3.1887979286959498e-06, "loss": 0.079, "step": 86440 }, { "epoch": 2.556633347134323, "grad_norm": 1.027659296989441, "learning_rate": 3.1886712387770106e-06, "loss": 0.0855, "step": 86450 }, { "epoch": 2.556929082628497, "grad_norm": 0.9596951007843018, "learning_rate": 3.188544548858071e-06, "loss": 0.0723, "step": 86460 }, { "epoch": 2.557224818122671, "grad_norm": 0.639228105545044, "learning_rate": 3.188417858939132e-06, "loss": 0.0624, "step": 86470 }, { "epoch": 2.557520553616845, "grad_norm": 0.8096216917037964, "learning_rate": 3.1882911690201925e-06, "loss": 0.0699, "step": 86480 }, { "epoch": 2.557816289111019, "grad_norm": 0.787547767162323, "learning_rate": 3.1881644791012533e-06, "loss": 0.092, "step": 86490 }, { "epoch": 2.558112024605193, "grad_norm": 1.1065857410430908, "learning_rate": 3.1880377891823137e-06, "loss": 0.0886, "step": 86500 }, { "epoch": 2.5584077600993673, "grad_norm": 1.3997746706008911, "learning_rate": 3.1879110992633744e-06, "loss": 0.08, "step": 86510 }, { "epoch": 2.5587034955935413, "grad_norm": 0.8248578310012817, "learning_rate": 3.187784409344435e-06, "loss": 0.0712, "step": 86520 }, { "epoch": 2.558999231087715, "grad_norm": 0.7605929374694824, "learning_rate": 3.1876577194254956e-06, "loss": 0.0733, "step": 86530 }, { "epoch": 2.559294966581889, "grad_norm": 0.9582875967025757, "learning_rate": 3.187531029506556e-06, "loss": 0.0893, "step": 86540 }, { "epoch": 2.559590702076063, "grad_norm": 0.7527284622192383, "learning_rate": 3.187404339587617e-06, "loss": 0.0781, "step": 86550 }, { "epoch": 2.559886437570237, "grad_norm": 0.7279284596443176, "learning_rate": 3.1872776496686775e-06, "loss": 0.0745, "step": 86560 }, { "epoch": 2.560182173064411, "grad_norm": 0.6747389435768127, "learning_rate": 3.1871509597497383e-06, "loss": 0.0629, "step": 86570 }, { "epoch": 2.5604779085585854, "grad_norm": 0.8611587882041931, "learning_rate": 3.1870242698307987e-06, "loss": 0.0641, "step": 86580 }, { "epoch": 2.5607736440527593, "grad_norm": 0.7545676231384277, "learning_rate": 3.186897579911859e-06, "loss": 0.0989, "step": 86590 }, { "epoch": 2.5610693795469333, "grad_norm": 0.6736353039741516, "learning_rate": 3.18677088999292e-06, "loss": 0.0851, "step": 86600 }, { "epoch": 2.561365115041107, "grad_norm": 0.7081774473190308, "learning_rate": 3.1866442000739802e-06, "loss": 0.0787, "step": 86610 }, { "epoch": 2.561660850535281, "grad_norm": 0.806990385055542, "learning_rate": 3.186517510155041e-06, "loss": 0.0616, "step": 86620 }, { "epoch": 2.5619565860294555, "grad_norm": 0.4886287450790405, "learning_rate": 3.186390820236102e-06, "loss": 0.0667, "step": 86630 }, { "epoch": 2.562252321523629, "grad_norm": 0.5617750287055969, "learning_rate": 3.1862641303171626e-06, "loss": 0.0745, "step": 86640 }, { "epoch": 2.5625480570178034, "grad_norm": 0.6916900277137756, "learning_rate": 3.186137440398223e-06, "loss": 0.0795, "step": 86650 }, { "epoch": 2.5628437925119774, "grad_norm": 0.721485435962677, "learning_rate": 3.1860107504792837e-06, "loss": 0.0761, "step": 86660 }, { "epoch": 2.5631395280061513, "grad_norm": 0.7787211537361145, "learning_rate": 3.185884060560344e-06, "loss": 0.0704, "step": 86670 }, { "epoch": 2.5634352635003252, "grad_norm": 1.3378795385360718, "learning_rate": 3.185757370641405e-06, "loss": 0.0744, "step": 86680 }, { "epoch": 2.563730998994499, "grad_norm": 0.6875439286231995, "learning_rate": 3.1856306807224653e-06, "loss": 0.0751, "step": 86690 }, { "epoch": 2.5640267344886736, "grad_norm": 0.743206262588501, "learning_rate": 3.185503990803526e-06, "loss": 0.0833, "step": 86700 }, { "epoch": 2.564322469982847, "grad_norm": 0.8996177315711975, "learning_rate": 3.185377300884587e-06, "loss": 0.0856, "step": 86710 }, { "epoch": 2.5646182054770215, "grad_norm": 1.1851966381072998, "learning_rate": 3.1852506109656476e-06, "loss": 0.0882, "step": 86720 }, { "epoch": 2.5649139409711954, "grad_norm": 0.9543100595474243, "learning_rate": 3.185123921046708e-06, "loss": 0.0721, "step": 86730 }, { "epoch": 2.5652096764653693, "grad_norm": 0.8608494400978088, "learning_rate": 3.184997231127769e-06, "loss": 0.0767, "step": 86740 }, { "epoch": 2.5655054119595433, "grad_norm": 1.101306676864624, "learning_rate": 3.184870541208829e-06, "loss": 0.1, "step": 86750 }, { "epoch": 2.5658011474537172, "grad_norm": 0.7305518388748169, "learning_rate": 3.18474385128989e-06, "loss": 0.0672, "step": 86760 }, { "epoch": 2.5660968829478916, "grad_norm": 0.6008965373039246, "learning_rate": 3.1846171613709503e-06, "loss": 0.0671, "step": 86770 }, { "epoch": 2.5663926184420656, "grad_norm": 0.8073768615722656, "learning_rate": 3.184490471452011e-06, "loss": 0.0722, "step": 86780 }, { "epoch": 2.5666883539362395, "grad_norm": 1.065146565437317, "learning_rate": 3.184363781533072e-06, "loss": 0.0868, "step": 86790 }, { "epoch": 2.5669840894304135, "grad_norm": 1.0864022970199585, "learning_rate": 3.1842370916141327e-06, "loss": 0.0896, "step": 86800 }, { "epoch": 2.5672798249245874, "grad_norm": 0.7659648060798645, "learning_rate": 3.184110401695193e-06, "loss": 0.1046, "step": 86810 }, { "epoch": 2.5675755604187613, "grad_norm": 0.7654936909675598, "learning_rate": 3.183983711776254e-06, "loss": 0.0701, "step": 86820 }, { "epoch": 2.5678712959129353, "grad_norm": 0.7961021661758423, "learning_rate": 3.183857021857314e-06, "loss": 0.0753, "step": 86830 }, { "epoch": 2.5681670314071097, "grad_norm": 0.8361321091651917, "learning_rate": 3.183730331938375e-06, "loss": 0.0902, "step": 86840 }, { "epoch": 2.5684627669012836, "grad_norm": 0.9399027824401855, "learning_rate": 3.1836036420194354e-06, "loss": 0.0803, "step": 86850 }, { "epoch": 2.5687585023954576, "grad_norm": 0.44606882333755493, "learning_rate": 3.183476952100496e-06, "loss": 0.0754, "step": 86860 }, { "epoch": 2.5690542378896315, "grad_norm": 0.983278751373291, "learning_rate": 3.183350262181557e-06, "loss": 0.0763, "step": 86870 }, { "epoch": 2.5693499733838054, "grad_norm": 0.7712082266807556, "learning_rate": 3.1832235722626177e-06, "loss": 0.0811, "step": 86880 }, { "epoch": 2.5696457088779794, "grad_norm": 0.4898533225059509, "learning_rate": 3.183096882343678e-06, "loss": 0.0889, "step": 86890 }, { "epoch": 2.5699414443721533, "grad_norm": 0.8018094301223755, "learning_rate": 3.182970192424739e-06, "loss": 0.0761, "step": 86900 }, { "epoch": 2.5702371798663277, "grad_norm": 0.6702153086662292, "learning_rate": 3.1828435025057992e-06, "loss": 0.0663, "step": 86910 }, { "epoch": 2.5705329153605017, "grad_norm": 0.4684324264526367, "learning_rate": 3.18271681258686e-06, "loss": 0.0773, "step": 86920 }, { "epoch": 2.5708286508546756, "grad_norm": 0.9426437020301819, "learning_rate": 3.1825901226679204e-06, "loss": 0.0756, "step": 86930 }, { "epoch": 2.5711243863488495, "grad_norm": 0.7045570611953735, "learning_rate": 3.182463432748981e-06, "loss": 0.0713, "step": 86940 }, { "epoch": 2.5714201218430235, "grad_norm": 0.8874663710594177, "learning_rate": 3.182336742830042e-06, "loss": 0.0772, "step": 86950 }, { "epoch": 2.571715857337198, "grad_norm": 0.5105517506599426, "learning_rate": 3.1822100529111028e-06, "loss": 0.0825, "step": 86960 }, { "epoch": 2.5720115928313714, "grad_norm": 0.6010618209838867, "learning_rate": 3.182083362992163e-06, "loss": 0.0651, "step": 86970 }, { "epoch": 2.5723073283255458, "grad_norm": 1.3034018278121948, "learning_rate": 3.181956673073224e-06, "loss": 0.0733, "step": 86980 }, { "epoch": 2.5726030638197197, "grad_norm": 0.6671225428581238, "learning_rate": 3.1818299831542843e-06, "loss": 0.0932, "step": 86990 }, { "epoch": 2.5728987993138936, "grad_norm": 0.6547207236289978, "learning_rate": 3.1817032932353447e-06, "loss": 0.0842, "step": 87000 }, { "epoch": 2.5731945348080676, "grad_norm": 0.8442313075065613, "learning_rate": 3.1815766033164054e-06, "loss": 0.082, "step": 87010 }, { "epoch": 2.5734902703022415, "grad_norm": 0.30944743752479553, "learning_rate": 3.181449913397466e-06, "loss": 0.0836, "step": 87020 }, { "epoch": 2.573786005796416, "grad_norm": 1.3434184789657593, "learning_rate": 3.181323223478527e-06, "loss": 0.0907, "step": 87030 }, { "epoch": 2.5740817412905894, "grad_norm": 0.7786676287651062, "learning_rate": 3.1811965335595874e-06, "loss": 0.0781, "step": 87040 }, { "epoch": 2.574377476784764, "grad_norm": 0.8501812815666199, "learning_rate": 3.181069843640648e-06, "loss": 0.0723, "step": 87050 }, { "epoch": 2.5746732122789378, "grad_norm": 0.5532944202423096, "learning_rate": 3.1809431537217085e-06, "loss": 0.0736, "step": 87060 }, { "epoch": 2.5749689477731117, "grad_norm": 0.578026533126831, "learning_rate": 3.1808164638027693e-06, "loss": 0.0671, "step": 87070 }, { "epoch": 2.5752646832672856, "grad_norm": 0.822683572769165, "learning_rate": 3.1806897738838297e-06, "loss": 0.068, "step": 87080 }, { "epoch": 2.5755604187614596, "grad_norm": 0.9199075102806091, "learning_rate": 3.1805630839648905e-06, "loss": 0.0716, "step": 87090 }, { "epoch": 2.575856154255634, "grad_norm": 0.8641871809959412, "learning_rate": 3.180436394045951e-06, "loss": 0.0752, "step": 87100 }, { "epoch": 2.576151889749808, "grad_norm": 0.6644486784934998, "learning_rate": 3.180309704127012e-06, "loss": 0.0734, "step": 87110 }, { "epoch": 2.576447625243982, "grad_norm": 1.4191935062408447, "learning_rate": 3.1801830142080724e-06, "loss": 0.0742, "step": 87120 }, { "epoch": 2.576743360738156, "grad_norm": 0.8687930703163147, "learning_rate": 3.1800563242891332e-06, "loss": 0.0861, "step": 87130 }, { "epoch": 2.5770390962323297, "grad_norm": 0.6733434200286865, "learning_rate": 3.1799296343701936e-06, "loss": 0.0801, "step": 87140 }, { "epoch": 2.5773348317265037, "grad_norm": 0.5852848887443542, "learning_rate": 3.1798029444512544e-06, "loss": 0.0775, "step": 87150 }, { "epoch": 2.5776305672206776, "grad_norm": 1.1308715343475342, "learning_rate": 3.1796762545323148e-06, "loss": 0.0916, "step": 87160 }, { "epoch": 2.577926302714852, "grad_norm": 0.41807785630226135, "learning_rate": 3.1795495646133755e-06, "loss": 0.0697, "step": 87170 }, { "epoch": 2.578222038209026, "grad_norm": 0.6134514808654785, "learning_rate": 3.179422874694436e-06, "loss": 0.0668, "step": 87180 }, { "epoch": 2.5785177737032, "grad_norm": 0.890195369720459, "learning_rate": 3.179296184775497e-06, "loss": 0.101, "step": 87190 }, { "epoch": 2.578813509197374, "grad_norm": 0.6056222915649414, "learning_rate": 3.1791694948565575e-06, "loss": 0.0781, "step": 87200 }, { "epoch": 2.579109244691548, "grad_norm": 0.6098456978797913, "learning_rate": 3.1790428049376183e-06, "loss": 0.0666, "step": 87210 }, { "epoch": 2.5794049801857217, "grad_norm": 1.0450280904769897, "learning_rate": 3.1789161150186786e-06, "loss": 0.0843, "step": 87220 }, { "epoch": 2.5797007156798957, "grad_norm": 0.8317927718162537, "learning_rate": 3.1787894250997394e-06, "loss": 0.064, "step": 87230 }, { "epoch": 2.57999645117407, "grad_norm": 1.3108198642730713, "learning_rate": 3.1786627351808e-06, "loss": 0.0951, "step": 87240 }, { "epoch": 2.580292186668244, "grad_norm": 0.7969551682472229, "learning_rate": 3.1785360452618606e-06, "loss": 0.0885, "step": 87250 }, { "epoch": 2.580587922162418, "grad_norm": 1.0720363855361938, "learning_rate": 3.178409355342921e-06, "loss": 0.0765, "step": 87260 }, { "epoch": 2.580883657656592, "grad_norm": 1.0024758577346802, "learning_rate": 3.178282665423982e-06, "loss": 0.0653, "step": 87270 }, { "epoch": 2.581179393150766, "grad_norm": 0.8829008936882019, "learning_rate": 3.1781559755050425e-06, "loss": 0.0683, "step": 87280 }, { "epoch": 2.58147512864494, "grad_norm": 0.7163590788841248, "learning_rate": 3.1780292855861033e-06, "loss": 0.083, "step": 87290 }, { "epoch": 2.5817708641391137, "grad_norm": 0.5461522936820984, "learning_rate": 3.1779025956671637e-06, "loss": 0.0743, "step": 87300 }, { "epoch": 2.582066599633288, "grad_norm": 1.0016008615493774, "learning_rate": 3.1777759057482245e-06, "loss": 0.0786, "step": 87310 }, { "epoch": 2.582362335127462, "grad_norm": 0.3155145049095154, "learning_rate": 3.177649215829285e-06, "loss": 0.074, "step": 87320 }, { "epoch": 2.582658070621636, "grad_norm": 0.939954400062561, "learning_rate": 3.1775225259103456e-06, "loss": 0.0756, "step": 87330 }, { "epoch": 2.58295380611581, "grad_norm": 0.7585912942886353, "learning_rate": 3.177395835991406e-06, "loss": 0.0725, "step": 87340 }, { "epoch": 2.583249541609984, "grad_norm": 0.7841126918792725, "learning_rate": 3.177269146072467e-06, "loss": 0.0692, "step": 87350 }, { "epoch": 2.5835452771041583, "grad_norm": 1.1015381813049316, "learning_rate": 3.1771424561535276e-06, "loss": 0.0769, "step": 87360 }, { "epoch": 2.583841012598332, "grad_norm": 0.6945440769195557, "learning_rate": 3.1770157662345884e-06, "loss": 0.0641, "step": 87370 }, { "epoch": 2.584136748092506, "grad_norm": 0.9517022967338562, "learning_rate": 3.1768890763156487e-06, "loss": 0.07, "step": 87380 }, { "epoch": 2.58443248358668, "grad_norm": 0.840114414691925, "learning_rate": 3.1767623863967095e-06, "loss": 0.0971, "step": 87390 }, { "epoch": 2.584728219080854, "grad_norm": 0.9755634665489197, "learning_rate": 3.17663569647777e-06, "loss": 0.0949, "step": 87400 }, { "epoch": 2.585023954575028, "grad_norm": 0.6458279490470886, "learning_rate": 3.1765090065588303e-06, "loss": 0.085, "step": 87410 }, { "epoch": 2.585319690069202, "grad_norm": 0.47843971848487854, "learning_rate": 3.176382316639891e-06, "loss": 0.0641, "step": 87420 }, { "epoch": 2.5856154255633763, "grad_norm": 0.928843080997467, "learning_rate": 3.176255626720952e-06, "loss": 0.081, "step": 87430 }, { "epoch": 2.5859111610575503, "grad_norm": 0.7817610502243042, "learning_rate": 3.1761289368020126e-06, "loss": 0.0752, "step": 87440 }, { "epoch": 2.586206896551724, "grad_norm": 0.9196603298187256, "learning_rate": 3.176002246883073e-06, "loss": 0.0819, "step": 87450 }, { "epoch": 2.586502632045898, "grad_norm": 0.661163330078125, "learning_rate": 3.1758755569641338e-06, "loss": 0.0687, "step": 87460 }, { "epoch": 2.586798367540072, "grad_norm": 1.0569294691085815, "learning_rate": 3.175748867045194e-06, "loss": 0.0763, "step": 87470 }, { "epoch": 2.587094103034246, "grad_norm": 0.9715414047241211, "learning_rate": 3.175622177126255e-06, "loss": 0.0684, "step": 87480 }, { "epoch": 2.58738983852842, "grad_norm": 0.8895046710968018, "learning_rate": 3.1754954872073153e-06, "loss": 0.0918, "step": 87490 }, { "epoch": 2.5876855740225944, "grad_norm": 0.5890216827392578, "learning_rate": 3.175368797288376e-06, "loss": 0.0789, "step": 87500 }, { "epoch": 2.5879813095167683, "grad_norm": 0.6093421578407288, "learning_rate": 3.175242107369437e-06, "loss": 0.0856, "step": 87510 }, { "epoch": 2.5882770450109422, "grad_norm": 0.7116516828536987, "learning_rate": 3.1751154174504977e-06, "loss": 0.0644, "step": 87520 }, { "epoch": 2.588572780505116, "grad_norm": 0.8347620368003845, "learning_rate": 3.174988727531558e-06, "loss": 0.071, "step": 87530 }, { "epoch": 2.58886851599929, "grad_norm": 1.0452300310134888, "learning_rate": 3.174862037612619e-06, "loss": 0.0776, "step": 87540 }, { "epoch": 2.5891642514934645, "grad_norm": 1.2217762470245361, "learning_rate": 3.174735347693679e-06, "loss": 0.0795, "step": 87550 }, { "epoch": 2.589459986987638, "grad_norm": 1.0878651142120361, "learning_rate": 3.17460865777474e-06, "loss": 0.0875, "step": 87560 }, { "epoch": 2.5897557224818124, "grad_norm": 2.03538179397583, "learning_rate": 3.1744819678558003e-06, "loss": 0.0745, "step": 87570 }, { "epoch": 2.5900514579759863, "grad_norm": 0.8217225670814514, "learning_rate": 3.174355277936861e-06, "loss": 0.058, "step": 87580 }, { "epoch": 2.5903471934701603, "grad_norm": 0.798906147480011, "learning_rate": 3.174228588017922e-06, "loss": 0.0705, "step": 87590 }, { "epoch": 2.5906429289643342, "grad_norm": 0.8771393299102783, "learning_rate": 3.1741018980989827e-06, "loss": 0.0996, "step": 87600 }, { "epoch": 2.590938664458508, "grad_norm": 0.4843314290046692, "learning_rate": 3.173975208180043e-06, "loss": 0.0719, "step": 87610 }, { "epoch": 2.5912343999526826, "grad_norm": 0.8432711362838745, "learning_rate": 3.173848518261104e-06, "loss": 0.0819, "step": 87620 }, { "epoch": 2.591530135446856, "grad_norm": 0.6036401987075806, "learning_rate": 3.1737218283421642e-06, "loss": 0.0829, "step": 87630 }, { "epoch": 2.5918258709410305, "grad_norm": 0.601201593875885, "learning_rate": 3.173595138423225e-06, "loss": 0.082, "step": 87640 }, { "epoch": 2.5921216064352044, "grad_norm": 0.8102869391441345, "learning_rate": 3.1734684485042854e-06, "loss": 0.075, "step": 87650 }, { "epoch": 2.5924173419293783, "grad_norm": 0.7277684807777405, "learning_rate": 3.173341758585346e-06, "loss": 0.0836, "step": 87660 }, { "epoch": 2.5927130774235523, "grad_norm": 0.6343516111373901, "learning_rate": 3.173215068666407e-06, "loss": 0.0837, "step": 87670 }, { "epoch": 2.5930088129177262, "grad_norm": 1.106558918952942, "learning_rate": 3.1730883787474678e-06, "loss": 0.0721, "step": 87680 }, { "epoch": 2.5933045484119006, "grad_norm": 0.8897512555122375, "learning_rate": 3.172961688828528e-06, "loss": 0.1132, "step": 87690 }, { "epoch": 2.5936002839060746, "grad_norm": 0.6621208190917969, "learning_rate": 3.172834998909589e-06, "loss": 0.0645, "step": 87700 }, { "epoch": 2.5938960194002485, "grad_norm": 0.4843004047870636, "learning_rate": 3.1727083089906493e-06, "loss": 0.0801, "step": 87710 }, { "epoch": 2.5941917548944224, "grad_norm": 1.0273526906967163, "learning_rate": 3.17258161907171e-06, "loss": 0.0729, "step": 87720 }, { "epoch": 2.5944874903885964, "grad_norm": 0.6434133648872375, "learning_rate": 3.1724549291527704e-06, "loss": 0.0677, "step": 87730 }, { "epoch": 2.5947832258827703, "grad_norm": 0.8974341154098511, "learning_rate": 3.1723282392338312e-06, "loss": 0.079, "step": 87740 }, { "epoch": 2.5950789613769443, "grad_norm": 0.691431999206543, "learning_rate": 3.172201549314892e-06, "loss": 0.0796, "step": 87750 }, { "epoch": 2.5953746968711187, "grad_norm": 0.7463666200637817, "learning_rate": 3.172074859395953e-06, "loss": 0.0768, "step": 87760 }, { "epoch": 2.5956704323652926, "grad_norm": 0.5939770936965942, "learning_rate": 3.171948169477013e-06, "loss": 0.0669, "step": 87770 }, { "epoch": 2.5959661678594665, "grad_norm": 1.0253063440322876, "learning_rate": 3.171821479558074e-06, "loss": 0.0622, "step": 87780 }, { "epoch": 2.5962619033536405, "grad_norm": 0.7787125110626221, "learning_rate": 3.1716947896391343e-06, "loss": 0.0703, "step": 87790 }, { "epoch": 2.5965576388478144, "grad_norm": 0.6437535881996155, "learning_rate": 3.171568099720195e-06, "loss": 0.0718, "step": 87800 }, { "epoch": 2.5968533743419884, "grad_norm": 0.9391998648643494, "learning_rate": 3.1714414098012555e-06, "loss": 0.0802, "step": 87810 }, { "epoch": 2.5971491098361623, "grad_norm": 0.6430447697639465, "learning_rate": 3.171314719882316e-06, "loss": 0.08, "step": 87820 }, { "epoch": 2.5974448453303367, "grad_norm": 0.7994515299797058, "learning_rate": 3.171188029963377e-06, "loss": 0.0786, "step": 87830 }, { "epoch": 2.5977405808245106, "grad_norm": 0.9274745583534241, "learning_rate": 3.1710613400444374e-06, "loss": 0.0866, "step": 87840 }, { "epoch": 2.5980363163186846, "grad_norm": 1.4390608072280884, "learning_rate": 3.1709346501254982e-06, "loss": 0.087, "step": 87850 }, { "epoch": 2.5983320518128585, "grad_norm": 1.2086561918258667, "learning_rate": 3.1708079602065586e-06, "loss": 0.0794, "step": 87860 }, { "epoch": 2.5986277873070325, "grad_norm": 0.4670902192592621, "learning_rate": 3.1706812702876194e-06, "loss": 0.0851, "step": 87870 }, { "epoch": 2.598923522801207, "grad_norm": 0.7788154482841492, "learning_rate": 3.1705545803686797e-06, "loss": 0.067, "step": 87880 }, { "epoch": 2.5992192582953804, "grad_norm": 0.8221318125724792, "learning_rate": 3.1704278904497405e-06, "loss": 0.0783, "step": 87890 }, { "epoch": 2.5995149937895548, "grad_norm": 0.8962579965591431, "learning_rate": 3.170301200530801e-06, "loss": 0.0949, "step": 87900 }, { "epoch": 2.5998107292837287, "grad_norm": 0.776379406452179, "learning_rate": 3.170174510611862e-06, "loss": 0.0861, "step": 87910 }, { "epoch": 2.6001064647779026, "grad_norm": 0.5297428369522095, "learning_rate": 3.1700478206929225e-06, "loss": 0.0498, "step": 87920 }, { "epoch": 2.6004022002720766, "grad_norm": 1.2226762771606445, "learning_rate": 3.1699211307739833e-06, "loss": 0.0777, "step": 87930 }, { "epoch": 2.6006979357662505, "grad_norm": 0.9070461988449097, "learning_rate": 3.1697944408550436e-06, "loss": 0.0814, "step": 87940 }, { "epoch": 2.600993671260425, "grad_norm": 0.5750687718391418, "learning_rate": 3.1696677509361044e-06, "loss": 0.0835, "step": 87950 }, { "epoch": 2.6012894067545984, "grad_norm": 0.5845387578010559, "learning_rate": 3.1695410610171648e-06, "loss": 0.0749, "step": 87960 }, { "epoch": 2.601585142248773, "grad_norm": 0.5751424431800842, "learning_rate": 3.1694143710982256e-06, "loss": 0.0711, "step": 87970 }, { "epoch": 2.6018808777429467, "grad_norm": 0.7670214772224426, "learning_rate": 3.169287681179286e-06, "loss": 0.0776, "step": 87980 }, { "epoch": 2.6021766132371207, "grad_norm": 0.9985567927360535, "learning_rate": 3.169160991260347e-06, "loss": 0.1011, "step": 87990 }, { "epoch": 2.6024723487312946, "grad_norm": 0.7159750461578369, "learning_rate": 3.1690343013414075e-06, "loss": 0.0743, "step": 88000 }, { "epoch": 2.6027680842254686, "grad_norm": 0.8265098929405212, "learning_rate": 3.1689076114224683e-06, "loss": 0.0788, "step": 88010 }, { "epoch": 2.603063819719643, "grad_norm": 0.6191354990005493, "learning_rate": 3.1687809215035287e-06, "loss": 0.0758, "step": 88020 }, { "epoch": 2.603359555213817, "grad_norm": 0.9274851083755493, "learning_rate": 3.1686542315845895e-06, "loss": 0.0679, "step": 88030 }, { "epoch": 2.603655290707991, "grad_norm": 0.8410375714302063, "learning_rate": 3.16852754166565e-06, "loss": 0.0865, "step": 88040 }, { "epoch": 2.603951026202165, "grad_norm": 0.5806344747543335, "learning_rate": 3.1684008517467106e-06, "loss": 0.0704, "step": 88050 }, { "epoch": 2.6042467616963387, "grad_norm": 0.5918993949890137, "learning_rate": 3.168274161827771e-06, "loss": 0.0676, "step": 88060 }, { "epoch": 2.6045424971905127, "grad_norm": 0.5583851337432861, "learning_rate": 3.168147471908832e-06, "loss": 0.0582, "step": 88070 }, { "epoch": 2.6048382326846866, "grad_norm": 0.7625942826271057, "learning_rate": 3.1680207819898926e-06, "loss": 0.0851, "step": 88080 }, { "epoch": 2.605133968178861, "grad_norm": 0.8258311152458191, "learning_rate": 3.1678940920709533e-06, "loss": 0.0722, "step": 88090 }, { "epoch": 2.605429703673035, "grad_norm": 0.9480258822441101, "learning_rate": 3.1677674021520137e-06, "loss": 0.0883, "step": 88100 }, { "epoch": 2.605725439167209, "grad_norm": 0.8383276462554932, "learning_rate": 3.1676407122330745e-06, "loss": 0.0782, "step": 88110 }, { "epoch": 2.606021174661383, "grad_norm": 0.7651557922363281, "learning_rate": 3.167514022314135e-06, "loss": 0.0688, "step": 88120 }, { "epoch": 2.6063169101555568, "grad_norm": 0.8431714773178101, "learning_rate": 3.1673873323951957e-06, "loss": 0.0762, "step": 88130 }, { "epoch": 2.6066126456497307, "grad_norm": 0.6929445862770081, "learning_rate": 3.167260642476256e-06, "loss": 0.0937, "step": 88140 }, { "epoch": 2.6069083811439047, "grad_norm": 0.7431625723838806, "learning_rate": 3.1671339525573172e-06, "loss": 0.0779, "step": 88150 }, { "epoch": 2.607204116638079, "grad_norm": 1.0372992753982544, "learning_rate": 3.1670072626383776e-06, "loss": 0.0749, "step": 88160 }, { "epoch": 2.607499852132253, "grad_norm": 0.3172239661216736, "learning_rate": 3.1668805727194384e-06, "loss": 0.0562, "step": 88170 }, { "epoch": 2.607795587626427, "grad_norm": 0.9513006210327148, "learning_rate": 3.1667538828004988e-06, "loss": 0.0744, "step": 88180 }, { "epoch": 2.608091323120601, "grad_norm": 0.8780444264411926, "learning_rate": 3.1666271928815596e-06, "loss": 0.0906, "step": 88190 }, { "epoch": 2.608387058614775, "grad_norm": 0.7292040586471558, "learning_rate": 3.16650050296262e-06, "loss": 0.0813, "step": 88200 }, { "epoch": 2.608682794108949, "grad_norm": 0.7760095596313477, "learning_rate": 3.1663738130436807e-06, "loss": 0.0665, "step": 88210 }, { "epoch": 2.6089785296031227, "grad_norm": 0.7510316967964172, "learning_rate": 3.166247123124741e-06, "loss": 0.0615, "step": 88220 }, { "epoch": 2.609274265097297, "grad_norm": 0.8666266798973083, "learning_rate": 3.1661204332058023e-06, "loss": 0.0912, "step": 88230 }, { "epoch": 2.609570000591471, "grad_norm": 0.6876423358917236, "learning_rate": 3.1659937432868627e-06, "loss": 0.0783, "step": 88240 }, { "epoch": 2.609865736085645, "grad_norm": 1.0914924144744873, "learning_rate": 3.165867053367923e-06, "loss": 0.0849, "step": 88250 }, { "epoch": 2.610161471579819, "grad_norm": 0.7167068719863892, "learning_rate": 3.165740363448984e-06, "loss": 0.0811, "step": 88260 }, { "epoch": 2.610457207073993, "grad_norm": 0.4677436947822571, "learning_rate": 3.165613673530044e-06, "loss": 0.0805, "step": 88270 }, { "epoch": 2.6107529425681673, "grad_norm": 0.896693766117096, "learning_rate": 3.165486983611105e-06, "loss": 0.0725, "step": 88280 }, { "epoch": 2.611048678062341, "grad_norm": 0.8735182285308838, "learning_rate": 3.1653602936921653e-06, "loss": 0.098, "step": 88290 }, { "epoch": 2.611344413556515, "grad_norm": 0.5389071106910706, "learning_rate": 3.165233603773226e-06, "loss": 0.0657, "step": 88300 }, { "epoch": 2.611640149050689, "grad_norm": 0.6871364712715149, "learning_rate": 3.165106913854287e-06, "loss": 0.0763, "step": 88310 }, { "epoch": 2.611935884544863, "grad_norm": 0.8824886679649353, "learning_rate": 3.1649802239353477e-06, "loss": 0.0779, "step": 88320 }, { "epoch": 2.612231620039037, "grad_norm": 0.563447117805481, "learning_rate": 3.164853534016408e-06, "loss": 0.0776, "step": 88330 }, { "epoch": 2.612527355533211, "grad_norm": 0.839876115322113, "learning_rate": 3.164726844097469e-06, "loss": 0.1054, "step": 88340 }, { "epoch": 2.6128230910273853, "grad_norm": 0.6403786540031433, "learning_rate": 3.1646001541785292e-06, "loss": 0.0702, "step": 88350 }, { "epoch": 2.6131188265215592, "grad_norm": 0.8325467109680176, "learning_rate": 3.16447346425959e-06, "loss": 0.0793, "step": 88360 }, { "epoch": 2.613414562015733, "grad_norm": 0.9164292812347412, "learning_rate": 3.1643467743406504e-06, "loss": 0.0799, "step": 88370 }, { "epoch": 2.613710297509907, "grad_norm": 0.5821477770805359, "learning_rate": 3.164220084421711e-06, "loss": 0.0601, "step": 88380 }, { "epoch": 2.614006033004081, "grad_norm": 0.9991928339004517, "learning_rate": 3.164093394502772e-06, "loss": 0.0962, "step": 88390 }, { "epoch": 2.614301768498255, "grad_norm": 0.7564947605133057, "learning_rate": 3.1639667045838327e-06, "loss": 0.0831, "step": 88400 }, { "epoch": 2.614597503992429, "grad_norm": 0.8581715226173401, "learning_rate": 3.163840014664893e-06, "loss": 0.0764, "step": 88410 }, { "epoch": 2.6148932394866033, "grad_norm": 0.7435173988342285, "learning_rate": 3.163713324745954e-06, "loss": 0.0655, "step": 88420 }, { "epoch": 2.6151889749807773, "grad_norm": 1.2374519109725952, "learning_rate": 3.1635866348270143e-06, "loss": 0.0827, "step": 88430 }, { "epoch": 2.6154847104749512, "grad_norm": 1.355465054512024, "learning_rate": 3.163459944908075e-06, "loss": 0.0946, "step": 88440 }, { "epoch": 2.615780445969125, "grad_norm": 0.9872331619262695, "learning_rate": 3.1633332549891354e-06, "loss": 0.0827, "step": 88450 }, { "epoch": 2.616076181463299, "grad_norm": 0.4894111454486847, "learning_rate": 3.163206565070196e-06, "loss": 0.0823, "step": 88460 }, { "epoch": 2.6163719169574735, "grad_norm": 0.46528708934783936, "learning_rate": 3.163079875151257e-06, "loss": 0.0774, "step": 88470 }, { "epoch": 2.616667652451647, "grad_norm": 0.8485066294670105, "learning_rate": 3.1629531852323178e-06, "loss": 0.0813, "step": 88480 }, { "epoch": 2.6169633879458214, "grad_norm": 0.8685113787651062, "learning_rate": 3.162826495313378e-06, "loss": 0.0687, "step": 88490 }, { "epoch": 2.6172591234399953, "grad_norm": 1.0409884452819824, "learning_rate": 3.162699805394439e-06, "loss": 0.0741, "step": 88500 }, { "epoch": 2.6175548589341693, "grad_norm": 0.7512182593345642, "learning_rate": 3.1625731154754993e-06, "loss": 0.0728, "step": 88510 }, { "epoch": 2.6178505944283432, "grad_norm": 0.44266167283058167, "learning_rate": 3.16244642555656e-06, "loss": 0.0731, "step": 88520 }, { "epoch": 2.618146329922517, "grad_norm": 1.043540120124817, "learning_rate": 3.1623197356376205e-06, "loss": 0.0825, "step": 88530 }, { "epoch": 2.6184420654166916, "grad_norm": 0.8122621178627014, "learning_rate": 3.1621930457186813e-06, "loss": 0.1033, "step": 88540 }, { "epoch": 2.618737800910865, "grad_norm": 0.7292711138725281, "learning_rate": 3.162066355799742e-06, "loss": 0.0887, "step": 88550 }, { "epoch": 2.6190335364050394, "grad_norm": 0.5229969620704651, "learning_rate": 3.161939665880803e-06, "loss": 0.0734, "step": 88560 }, { "epoch": 2.6193292718992134, "grad_norm": 0.432420015335083, "learning_rate": 3.161812975961863e-06, "loss": 0.0732, "step": 88570 }, { "epoch": 2.6196250073933873, "grad_norm": 1.1289986371994019, "learning_rate": 3.161686286042924e-06, "loss": 0.074, "step": 88580 }, { "epoch": 2.6199207428875613, "grad_norm": 0.8161040544509888, "learning_rate": 3.1615595961239844e-06, "loss": 0.0867, "step": 88590 }, { "epoch": 2.620216478381735, "grad_norm": 0.8442760705947876, "learning_rate": 3.161432906205045e-06, "loss": 0.0752, "step": 88600 }, { "epoch": 2.6205122138759096, "grad_norm": 1.62376868724823, "learning_rate": 3.1613062162861055e-06, "loss": 0.0982, "step": 88610 }, { "epoch": 2.6208079493700835, "grad_norm": 1.1824744939804077, "learning_rate": 3.1611795263671663e-06, "loss": 0.0734, "step": 88620 }, { "epoch": 2.6211036848642575, "grad_norm": 0.7427413463592529, "learning_rate": 3.161052836448227e-06, "loss": 0.0585, "step": 88630 }, { "epoch": 2.6213994203584314, "grad_norm": 0.9180964231491089, "learning_rate": 3.160926146529288e-06, "loss": 0.0789, "step": 88640 }, { "epoch": 2.6216951558526054, "grad_norm": 0.9266498684883118, "learning_rate": 3.1607994566103482e-06, "loss": 0.0868, "step": 88650 }, { "epoch": 2.6219908913467793, "grad_norm": 0.9153845310211182, "learning_rate": 3.1606727666914086e-06, "loss": 0.0873, "step": 88660 }, { "epoch": 2.6222866268409533, "grad_norm": 0.8993943929672241, "learning_rate": 3.1605460767724694e-06, "loss": 0.0696, "step": 88670 }, { "epoch": 2.6225823623351276, "grad_norm": 0.7598811984062195, "learning_rate": 3.1604193868535298e-06, "loss": 0.0708, "step": 88680 }, { "epoch": 2.6228780978293016, "grad_norm": 0.5256925821304321, "learning_rate": 3.1602926969345906e-06, "loss": 0.0802, "step": 88690 }, { "epoch": 2.6231738333234755, "grad_norm": 1.0403512716293335, "learning_rate": 3.160166007015651e-06, "loss": 0.0889, "step": 88700 }, { "epoch": 2.6234695688176495, "grad_norm": 0.8357226252555847, "learning_rate": 3.160039317096712e-06, "loss": 0.0717, "step": 88710 }, { "epoch": 2.6237653043118234, "grad_norm": 0.47729915380477905, "learning_rate": 3.1599126271777725e-06, "loss": 0.0766, "step": 88720 }, { "epoch": 2.6240610398059974, "grad_norm": 0.6012353301048279, "learning_rate": 3.1597859372588333e-06, "loss": 0.0673, "step": 88730 }, { "epoch": 2.6243567753001713, "grad_norm": 0.7478475570678711, "learning_rate": 3.1596592473398937e-06, "loss": 0.0883, "step": 88740 }, { "epoch": 2.6246525107943457, "grad_norm": 0.6521570086479187, "learning_rate": 3.1595325574209544e-06, "loss": 0.0827, "step": 88750 }, { "epoch": 2.6249482462885196, "grad_norm": 1.1188510656356812, "learning_rate": 3.159405867502015e-06, "loss": 0.0849, "step": 88760 }, { "epoch": 2.6252439817826936, "grad_norm": 1.2684000730514526, "learning_rate": 3.1592791775830756e-06, "loss": 0.0721, "step": 88770 }, { "epoch": 2.6255397172768675, "grad_norm": 0.7109423875808716, "learning_rate": 3.159152487664136e-06, "loss": 0.0693, "step": 88780 }, { "epoch": 2.6258354527710415, "grad_norm": 0.6514816284179688, "learning_rate": 3.159025797745197e-06, "loss": 0.09, "step": 88790 }, { "epoch": 2.626131188265216, "grad_norm": 0.6193401217460632, "learning_rate": 3.1588991078262575e-06, "loss": 0.0847, "step": 88800 }, { "epoch": 2.6264269237593894, "grad_norm": 0.6410872936248779, "learning_rate": 3.1587724179073183e-06, "loss": 0.0674, "step": 88810 }, { "epoch": 2.6267226592535637, "grad_norm": 0.5557751059532166, "learning_rate": 3.1586457279883787e-06, "loss": 0.0618, "step": 88820 }, { "epoch": 2.6270183947477377, "grad_norm": 0.8186057806015015, "learning_rate": 3.1585190380694395e-06, "loss": 0.056, "step": 88830 }, { "epoch": 2.6273141302419116, "grad_norm": 1.1925318241119385, "learning_rate": 3.1583923481505e-06, "loss": 0.0835, "step": 88840 }, { "epoch": 2.6276098657360856, "grad_norm": 0.7129371762275696, "learning_rate": 3.1582656582315606e-06, "loss": 0.0775, "step": 88850 }, { "epoch": 2.6279056012302595, "grad_norm": 0.895487368106842, "learning_rate": 3.158138968312621e-06, "loss": 0.0703, "step": 88860 }, { "epoch": 2.628201336724434, "grad_norm": 1.3248448371887207, "learning_rate": 3.1580122783936822e-06, "loss": 0.0772, "step": 88870 }, { "epoch": 2.6284970722186074, "grad_norm": 1.0499104261398315, "learning_rate": 3.1578855884747426e-06, "loss": 0.069, "step": 88880 }, { "epoch": 2.628792807712782, "grad_norm": 0.9672770500183105, "learning_rate": 3.1577588985558034e-06, "loss": 0.0851, "step": 88890 }, { "epoch": 2.6290885432069557, "grad_norm": 0.8937497735023499, "learning_rate": 3.1576322086368637e-06, "loss": 0.0944, "step": 88900 }, { "epoch": 2.6293842787011297, "grad_norm": 0.7198933959007263, "learning_rate": 3.1575055187179245e-06, "loss": 0.0714, "step": 88910 }, { "epoch": 2.6296800141953036, "grad_norm": 0.9344416856765747, "learning_rate": 3.157378828798985e-06, "loss": 0.0718, "step": 88920 }, { "epoch": 2.6299757496894776, "grad_norm": 0.7433466911315918, "learning_rate": 3.1572521388800457e-06, "loss": 0.0708, "step": 88930 }, { "epoch": 2.630271485183652, "grad_norm": 1.2837400436401367, "learning_rate": 3.157125448961106e-06, "loss": 0.0862, "step": 88940 }, { "epoch": 2.630567220677826, "grad_norm": 0.8421487212181091, "learning_rate": 3.1569987590421673e-06, "loss": 0.0735, "step": 88950 }, { "epoch": 2.630862956172, "grad_norm": 1.1602082252502441, "learning_rate": 3.1568720691232276e-06, "loss": 0.0841, "step": 88960 }, { "epoch": 2.6311586916661738, "grad_norm": 0.9659678339958191, "learning_rate": 3.1567453792042884e-06, "loss": 0.0634, "step": 88970 }, { "epoch": 2.6314544271603477, "grad_norm": 1.056929349899292, "learning_rate": 3.156618689285349e-06, "loss": 0.0659, "step": 88980 }, { "epoch": 2.6317501626545217, "grad_norm": 1.0858274698257446, "learning_rate": 3.1564919993664096e-06, "loss": 0.0924, "step": 88990 }, { "epoch": 2.6320458981486956, "grad_norm": 0.9607486724853516, "learning_rate": 3.15636530944747e-06, "loss": 0.0834, "step": 89000 }, { "epoch": 2.63234163364287, "grad_norm": 1.0235044956207275, "learning_rate": 3.1562386195285307e-06, "loss": 0.0828, "step": 89010 }, { "epoch": 2.632637369137044, "grad_norm": 0.614233136177063, "learning_rate": 3.156111929609591e-06, "loss": 0.0621, "step": 89020 }, { "epoch": 2.632933104631218, "grad_norm": 0.9860360622406006, "learning_rate": 3.1559852396906523e-06, "loss": 0.0829, "step": 89030 }, { "epoch": 2.633228840125392, "grad_norm": 0.9815686345100403, "learning_rate": 3.1558585497717127e-06, "loss": 0.0779, "step": 89040 }, { "epoch": 2.6335245756195658, "grad_norm": 0.5647879242897034, "learning_rate": 3.1557318598527735e-06, "loss": 0.0851, "step": 89050 }, { "epoch": 2.6338203111137397, "grad_norm": 0.8569514155387878, "learning_rate": 3.155605169933834e-06, "loss": 0.0717, "step": 89060 }, { "epoch": 2.6341160466079137, "grad_norm": 1.1377373933792114, "learning_rate": 3.155478480014894e-06, "loss": 0.069, "step": 89070 }, { "epoch": 2.634411782102088, "grad_norm": 0.8862385153770447, "learning_rate": 3.155351790095955e-06, "loss": 0.0735, "step": 89080 }, { "epoch": 2.634707517596262, "grad_norm": 1.1540244817733765, "learning_rate": 3.1552251001770154e-06, "loss": 0.0834, "step": 89090 }, { "epoch": 2.635003253090436, "grad_norm": 0.7765922546386719, "learning_rate": 3.155098410258076e-06, "loss": 0.0764, "step": 89100 }, { "epoch": 2.63529898858461, "grad_norm": 0.7819129228591919, "learning_rate": 3.154971720339137e-06, "loss": 0.102, "step": 89110 }, { "epoch": 2.635594724078784, "grad_norm": 0.8428846597671509, "learning_rate": 3.1548450304201977e-06, "loss": 0.0837, "step": 89120 }, { "epoch": 2.635890459572958, "grad_norm": 0.8289098739624023, "learning_rate": 3.154718340501258e-06, "loss": 0.0753, "step": 89130 }, { "epoch": 2.6361861950671317, "grad_norm": 1.24419367313385, "learning_rate": 3.154591650582319e-06, "loss": 0.1076, "step": 89140 }, { "epoch": 2.636481930561306, "grad_norm": 0.7624487280845642, "learning_rate": 3.1544649606633792e-06, "loss": 0.0866, "step": 89150 }, { "epoch": 2.63677766605548, "grad_norm": 0.7740007042884827, "learning_rate": 3.15433827074444e-06, "loss": 0.0733, "step": 89160 }, { "epoch": 2.637073401549654, "grad_norm": 0.7314602136611938, "learning_rate": 3.1542115808255004e-06, "loss": 0.0694, "step": 89170 }, { "epoch": 2.637369137043828, "grad_norm": 0.703934371471405, "learning_rate": 3.154084890906561e-06, "loss": 0.0656, "step": 89180 }, { "epoch": 2.637664872538002, "grad_norm": 0.5620635151863098, "learning_rate": 3.153958200987622e-06, "loss": 0.0838, "step": 89190 }, { "epoch": 2.6379606080321762, "grad_norm": 0.7320178151130676, "learning_rate": 3.1538315110686828e-06, "loss": 0.0827, "step": 89200 }, { "epoch": 2.63825634352635, "grad_norm": 0.8038031458854675, "learning_rate": 3.153704821149743e-06, "loss": 0.076, "step": 89210 }, { "epoch": 2.638552079020524, "grad_norm": 0.6880769729614258, "learning_rate": 3.153578131230804e-06, "loss": 0.0705, "step": 89220 }, { "epoch": 2.638847814514698, "grad_norm": 0.6490364670753479, "learning_rate": 3.1534514413118643e-06, "loss": 0.0794, "step": 89230 }, { "epoch": 2.639143550008872, "grad_norm": 0.7843096256256104, "learning_rate": 3.153324751392925e-06, "loss": 0.0894, "step": 89240 }, { "epoch": 2.639439285503046, "grad_norm": 1.1643097400665283, "learning_rate": 3.1531980614739854e-06, "loss": 0.1, "step": 89250 }, { "epoch": 2.63973502099722, "grad_norm": 0.9591648578643799, "learning_rate": 3.1530713715550462e-06, "loss": 0.0739, "step": 89260 }, { "epoch": 2.6400307564913943, "grad_norm": 0.9268031120300293, "learning_rate": 3.152944681636107e-06, "loss": 0.0741, "step": 89270 }, { "epoch": 2.6403264919855682, "grad_norm": 0.9406489729881287, "learning_rate": 3.152817991717168e-06, "loss": 0.0715, "step": 89280 }, { "epoch": 2.640622227479742, "grad_norm": 1.2045329809188843, "learning_rate": 3.152691301798228e-06, "loss": 0.0863, "step": 89290 }, { "epoch": 2.640917962973916, "grad_norm": 0.9986044764518738, "learning_rate": 3.152564611879289e-06, "loss": 0.0871, "step": 89300 }, { "epoch": 2.64121369846809, "grad_norm": 0.5764761567115784, "learning_rate": 3.1524379219603493e-06, "loss": 0.064, "step": 89310 }, { "epoch": 2.641509433962264, "grad_norm": 0.47824451327323914, "learning_rate": 3.15231123204141e-06, "loss": 0.0711, "step": 89320 }, { "epoch": 2.641805169456438, "grad_norm": 0.9765607118606567, "learning_rate": 3.1521845421224705e-06, "loss": 0.0822, "step": 89330 }, { "epoch": 2.6421009049506123, "grad_norm": 0.9155518412590027, "learning_rate": 3.1520578522035313e-06, "loss": 0.081, "step": 89340 }, { "epoch": 2.6423966404447863, "grad_norm": 0.6298478841781616, "learning_rate": 3.151931162284592e-06, "loss": 0.07, "step": 89350 }, { "epoch": 2.6426923759389602, "grad_norm": 1.1595643758773804, "learning_rate": 3.151804472365653e-06, "loss": 0.0841, "step": 89360 }, { "epoch": 2.642988111433134, "grad_norm": 0.6937330961227417, "learning_rate": 3.1516777824467132e-06, "loss": 0.0641, "step": 89370 }, { "epoch": 2.643283846927308, "grad_norm": 0.6396675705909729, "learning_rate": 3.151551092527774e-06, "loss": 0.0595, "step": 89380 }, { "epoch": 2.6435795824214825, "grad_norm": 0.7814270853996277, "learning_rate": 3.1514244026088344e-06, "loss": 0.0765, "step": 89390 }, { "epoch": 2.643875317915656, "grad_norm": 0.728389322757721, "learning_rate": 3.151297712689895e-06, "loss": 0.0687, "step": 89400 }, { "epoch": 2.6441710534098304, "grad_norm": 0.9584036469459534, "learning_rate": 3.1511710227709555e-06, "loss": 0.0773, "step": 89410 }, { "epoch": 2.6444667889040043, "grad_norm": 0.9387927651405334, "learning_rate": 3.1510443328520163e-06, "loss": 0.0755, "step": 89420 }, { "epoch": 2.6447625243981783, "grad_norm": 0.6369950175285339, "learning_rate": 3.150917642933077e-06, "loss": 0.0542, "step": 89430 }, { "epoch": 2.645058259892352, "grad_norm": 0.6611319780349731, "learning_rate": 3.150790953014138e-06, "loss": 0.0851, "step": 89440 }, { "epoch": 2.645353995386526, "grad_norm": 0.9860441088676453, "learning_rate": 3.1506642630951983e-06, "loss": 0.0897, "step": 89450 }, { "epoch": 2.6456497308807005, "grad_norm": 0.7353153824806213, "learning_rate": 3.150537573176259e-06, "loss": 0.0699, "step": 89460 }, { "epoch": 2.645945466374874, "grad_norm": 0.5012874603271484, "learning_rate": 3.1504108832573194e-06, "loss": 0.0548, "step": 89470 }, { "epoch": 2.6462412018690484, "grad_norm": 1.0437726974487305, "learning_rate": 3.15028419333838e-06, "loss": 0.072, "step": 89480 }, { "epoch": 2.6465369373632224, "grad_norm": 0.8882045745849609, "learning_rate": 3.1501575034194406e-06, "loss": 0.0874, "step": 89490 }, { "epoch": 2.6468326728573963, "grad_norm": 0.8525453805923462, "learning_rate": 3.150030813500501e-06, "loss": 0.0953, "step": 89500 }, { "epoch": 2.6471284083515703, "grad_norm": 0.5097687840461731, "learning_rate": 3.149904123581562e-06, "loss": 0.0879, "step": 89510 }, { "epoch": 2.647424143845744, "grad_norm": 0.4167402386665344, "learning_rate": 3.1497774336626225e-06, "loss": 0.0738, "step": 89520 }, { "epoch": 2.6477198793399186, "grad_norm": 1.2416597604751587, "learning_rate": 3.1496507437436833e-06, "loss": 0.0606, "step": 89530 }, { "epoch": 2.6480156148340925, "grad_norm": 0.7477688789367676, "learning_rate": 3.1495240538247437e-06, "loss": 0.0757, "step": 89540 }, { "epoch": 2.6483113503282665, "grad_norm": 0.746648371219635, "learning_rate": 3.1493973639058045e-06, "loss": 0.094, "step": 89550 }, { "epoch": 2.6486070858224404, "grad_norm": 0.8263762593269348, "learning_rate": 3.149270673986865e-06, "loss": 0.0785, "step": 89560 }, { "epoch": 2.6489028213166144, "grad_norm": 0.5616634488105774, "learning_rate": 3.1491439840679256e-06, "loss": 0.0775, "step": 89570 }, { "epoch": 2.6491985568107883, "grad_norm": 0.8928478360176086, "learning_rate": 3.149017294148986e-06, "loss": 0.0808, "step": 89580 }, { "epoch": 2.6494942923049623, "grad_norm": 0.7294304370880127, "learning_rate": 3.148890604230047e-06, "loss": 0.0838, "step": 89590 }, { "epoch": 2.6497900277991366, "grad_norm": 0.6618821620941162, "learning_rate": 3.1487639143111076e-06, "loss": 0.0726, "step": 89600 }, { "epoch": 2.6500857632933106, "grad_norm": 0.6311138868331909, "learning_rate": 3.1486372243921684e-06, "loss": 0.0802, "step": 89610 }, { "epoch": 2.6503814987874845, "grad_norm": 0.5159848928451538, "learning_rate": 3.1485105344732287e-06, "loss": 0.0571, "step": 89620 }, { "epoch": 2.6506772342816585, "grad_norm": 0.7652291655540466, "learning_rate": 3.1483838445542895e-06, "loss": 0.0569, "step": 89630 }, { "epoch": 2.6509729697758324, "grad_norm": 0.7228724956512451, "learning_rate": 3.14825715463535e-06, "loss": 0.091, "step": 89640 }, { "epoch": 2.6512687052700064, "grad_norm": 1.048126220703125, "learning_rate": 3.1481304647164107e-06, "loss": 0.087, "step": 89650 }, { "epoch": 2.6515644407641803, "grad_norm": 0.5544221997261047, "learning_rate": 3.148003774797471e-06, "loss": 0.079, "step": 89660 }, { "epoch": 2.6518601762583547, "grad_norm": 1.4591819047927856, "learning_rate": 3.1478770848785323e-06, "loss": 0.0782, "step": 89670 }, { "epoch": 2.6521559117525286, "grad_norm": 1.069855809211731, "learning_rate": 3.1477503949595926e-06, "loss": 0.0724, "step": 89680 }, { "epoch": 2.6524516472467026, "grad_norm": 0.8287960290908813, "learning_rate": 3.1476237050406534e-06, "loss": 0.0802, "step": 89690 }, { "epoch": 2.6527473827408765, "grad_norm": 0.6469740271568298, "learning_rate": 3.1474970151217138e-06, "loss": 0.0754, "step": 89700 }, { "epoch": 2.6530431182350505, "grad_norm": 0.6399095058441162, "learning_rate": 3.1473703252027746e-06, "loss": 0.0932, "step": 89710 }, { "epoch": 2.653338853729225, "grad_norm": 0.9523780941963196, "learning_rate": 3.147243635283835e-06, "loss": 0.0658, "step": 89720 }, { "epoch": 2.6536345892233983, "grad_norm": 1.1332508325576782, "learning_rate": 3.1471169453648957e-06, "loss": 0.0776, "step": 89730 }, { "epoch": 2.6539303247175727, "grad_norm": 1.1916823387145996, "learning_rate": 3.146990255445956e-06, "loss": 0.1008, "step": 89740 }, { "epoch": 2.6542260602117467, "grad_norm": 0.4608691334724426, "learning_rate": 3.1468635655270173e-06, "loss": 0.0784, "step": 89750 }, { "epoch": 2.6545217957059206, "grad_norm": 0.8422609567642212, "learning_rate": 3.1467368756080777e-06, "loss": 0.09, "step": 89760 }, { "epoch": 2.6548175312000946, "grad_norm": 0.9220327138900757, "learning_rate": 3.1466101856891385e-06, "loss": 0.0653, "step": 89770 }, { "epoch": 2.6551132666942685, "grad_norm": 0.6105079054832458, "learning_rate": 3.146483495770199e-06, "loss": 0.0595, "step": 89780 }, { "epoch": 2.655409002188443, "grad_norm": 1.1117393970489502, "learning_rate": 3.1463568058512596e-06, "loss": 0.0952, "step": 89790 }, { "epoch": 2.655704737682617, "grad_norm": 0.6263181567192078, "learning_rate": 3.14623011593232e-06, "loss": 0.0802, "step": 89800 }, { "epoch": 2.6560004731767908, "grad_norm": 0.9191086292266846, "learning_rate": 3.1461034260133808e-06, "loss": 0.0767, "step": 89810 }, { "epoch": 2.6562962086709647, "grad_norm": 1.1743541955947876, "learning_rate": 3.145976736094441e-06, "loss": 0.0647, "step": 89820 }, { "epoch": 2.6565919441651387, "grad_norm": 1.4889732599258423, "learning_rate": 3.1458500461755023e-06, "loss": 0.0824, "step": 89830 }, { "epoch": 2.6568876796593126, "grad_norm": 1.005961537361145, "learning_rate": 3.1457233562565627e-06, "loss": 0.0804, "step": 89840 }, { "epoch": 2.6571834151534865, "grad_norm": 1.0108178853988647, "learning_rate": 3.1455966663376235e-06, "loss": 0.0773, "step": 89850 }, { "epoch": 2.657479150647661, "grad_norm": 0.7674174904823303, "learning_rate": 3.145469976418684e-06, "loss": 0.0784, "step": 89860 }, { "epoch": 2.657774886141835, "grad_norm": 0.8681128025054932, "learning_rate": 3.1453432864997447e-06, "loss": 0.0736, "step": 89870 }, { "epoch": 2.658070621636009, "grad_norm": 0.8133619427680969, "learning_rate": 3.145216596580805e-06, "loss": 0.0818, "step": 89880 }, { "epoch": 2.6583663571301828, "grad_norm": 0.8069908618927002, "learning_rate": 3.1450899066618654e-06, "loss": 0.0852, "step": 89890 }, { "epoch": 2.6586620926243567, "grad_norm": 1.264026165008545, "learning_rate": 3.144963216742926e-06, "loss": 0.1033, "step": 89900 }, { "epoch": 2.6589578281185307, "grad_norm": 0.9843267202377319, "learning_rate": 3.144836526823987e-06, "loss": 0.0871, "step": 89910 }, { "epoch": 2.6592535636127046, "grad_norm": 0.49015188217163086, "learning_rate": 3.1447098369050478e-06, "loss": 0.0614, "step": 89920 }, { "epoch": 2.659549299106879, "grad_norm": 1.3790258169174194, "learning_rate": 3.144583146986108e-06, "loss": 0.0709, "step": 89930 }, { "epoch": 2.659845034601053, "grad_norm": 0.8954811096191406, "learning_rate": 3.144456457067169e-06, "loss": 0.0914, "step": 89940 }, { "epoch": 2.660140770095227, "grad_norm": 0.8794224262237549, "learning_rate": 3.1443297671482293e-06, "loss": 0.0805, "step": 89950 }, { "epoch": 2.660436505589401, "grad_norm": 0.45795780420303345, "learning_rate": 3.14420307722929e-06, "loss": 0.0741, "step": 89960 }, { "epoch": 2.6607322410835748, "grad_norm": 0.9167183041572571, "learning_rate": 3.1440763873103504e-06, "loss": 0.0783, "step": 89970 }, { "epoch": 2.6610279765777487, "grad_norm": 0.6786463260650635, "learning_rate": 3.1439496973914112e-06, "loss": 0.0667, "step": 89980 }, { "epoch": 2.6613237120719226, "grad_norm": 0.6770030856132507, "learning_rate": 3.143823007472472e-06, "loss": 0.0812, "step": 89990 }, { "epoch": 2.661619447566097, "grad_norm": 0.6299745440483093, "learning_rate": 3.143696317553533e-06, "loss": 0.0796, "step": 90000 }, { "epoch": 2.661915183060271, "grad_norm": 0.7848609685897827, "learning_rate": 3.143569627634593e-06, "loss": 0.0967, "step": 90010 }, { "epoch": 2.662210918554445, "grad_norm": 1.0346477031707764, "learning_rate": 3.143442937715654e-06, "loss": 0.0716, "step": 90020 }, { "epoch": 2.662506654048619, "grad_norm": 0.7261191010475159, "learning_rate": 3.1433162477967143e-06, "loss": 0.0638, "step": 90030 }, { "epoch": 2.662802389542793, "grad_norm": 0.8876771330833435, "learning_rate": 3.143189557877775e-06, "loss": 0.0849, "step": 90040 }, { "epoch": 2.663098125036967, "grad_norm": 0.7408024072647095, "learning_rate": 3.1430628679588355e-06, "loss": 0.0806, "step": 90050 }, { "epoch": 2.6633938605311407, "grad_norm": 0.6284027695655823, "learning_rate": 3.1429361780398963e-06, "loss": 0.0798, "step": 90060 }, { "epoch": 2.663689596025315, "grad_norm": 0.4572412669658661, "learning_rate": 3.142809488120957e-06, "loss": 0.0777, "step": 90070 }, { "epoch": 2.663985331519489, "grad_norm": 1.154874563217163, "learning_rate": 3.142682798202018e-06, "loss": 0.0812, "step": 90080 }, { "epoch": 2.664281067013663, "grad_norm": 0.8237200379371643, "learning_rate": 3.1425561082830782e-06, "loss": 0.0689, "step": 90090 }, { "epoch": 2.664576802507837, "grad_norm": 0.9411603212356567, "learning_rate": 3.142429418364139e-06, "loss": 0.0954, "step": 90100 }, { "epoch": 2.664872538002011, "grad_norm": 0.6162247657775879, "learning_rate": 3.1423027284451994e-06, "loss": 0.0778, "step": 90110 }, { "epoch": 2.6651682734961852, "grad_norm": 0.6990569829940796, "learning_rate": 3.14217603852626e-06, "loss": 0.0744, "step": 90120 }, { "epoch": 2.665464008990359, "grad_norm": 0.7853195071220398, "learning_rate": 3.1420493486073205e-06, "loss": 0.0712, "step": 90130 }, { "epoch": 2.665759744484533, "grad_norm": 1.312002182006836, "learning_rate": 3.1419226586883813e-06, "loss": 0.0934, "step": 90140 }, { "epoch": 2.666055479978707, "grad_norm": 0.7333270907402039, "learning_rate": 3.141795968769442e-06, "loss": 0.0848, "step": 90150 }, { "epoch": 2.666351215472881, "grad_norm": 0.7579410672187805, "learning_rate": 3.141669278850503e-06, "loss": 0.0716, "step": 90160 }, { "epoch": 2.666646950967055, "grad_norm": 0.6583071947097778, "learning_rate": 3.1415425889315633e-06, "loss": 0.075, "step": 90170 }, { "epoch": 2.666942686461229, "grad_norm": 0.6992393136024475, "learning_rate": 3.141415899012624e-06, "loss": 0.0761, "step": 90180 }, { "epoch": 2.6672384219554033, "grad_norm": 0.7868935465812683, "learning_rate": 3.1412892090936844e-06, "loss": 0.0736, "step": 90190 }, { "epoch": 2.6675341574495772, "grad_norm": 0.7545393109321594, "learning_rate": 3.141162519174745e-06, "loss": 0.0881, "step": 90200 }, { "epoch": 2.667829892943751, "grad_norm": 0.581174373626709, "learning_rate": 3.1410358292558056e-06, "loss": 0.0798, "step": 90210 }, { "epoch": 2.668125628437925, "grad_norm": 0.6396305561065674, "learning_rate": 3.1409091393368664e-06, "loss": 0.0723, "step": 90220 }, { "epoch": 2.668421363932099, "grad_norm": 0.8580775856971741, "learning_rate": 3.140782449417927e-06, "loss": 0.0624, "step": 90230 }, { "epoch": 2.668717099426273, "grad_norm": 0.8378746509552002, "learning_rate": 3.140655759498988e-06, "loss": 0.0747, "step": 90240 }, { "epoch": 2.669012834920447, "grad_norm": 0.9693901538848877, "learning_rate": 3.1405290695800483e-06, "loss": 0.0789, "step": 90250 }, { "epoch": 2.6693085704146213, "grad_norm": 1.1857223510742188, "learning_rate": 3.140402379661109e-06, "loss": 0.066, "step": 90260 }, { "epoch": 2.6696043059087953, "grad_norm": 1.272519588470459, "learning_rate": 3.1402756897421695e-06, "loss": 0.0668, "step": 90270 }, { "epoch": 2.669900041402969, "grad_norm": 0.656567394733429, "learning_rate": 3.1401489998232302e-06, "loss": 0.0661, "step": 90280 }, { "epoch": 2.670195776897143, "grad_norm": 0.7219277024269104, "learning_rate": 3.1400223099042906e-06, "loss": 0.0947, "step": 90290 }, { "epoch": 2.670491512391317, "grad_norm": 0.8368471264839172, "learning_rate": 3.1398956199853514e-06, "loss": 0.0827, "step": 90300 }, { "epoch": 2.6707872478854915, "grad_norm": 0.6535025835037231, "learning_rate": 3.139768930066412e-06, "loss": 0.0853, "step": 90310 }, { "epoch": 2.671082983379665, "grad_norm": 0.6607027649879456, "learning_rate": 3.1396422401474726e-06, "loss": 0.0619, "step": 90320 }, { "epoch": 2.6713787188738394, "grad_norm": 0.7779410481452942, "learning_rate": 3.1395155502285333e-06, "loss": 0.0744, "step": 90330 }, { "epoch": 2.6716744543680133, "grad_norm": 0.6144349575042725, "learning_rate": 3.1393888603095937e-06, "loss": 0.0971, "step": 90340 }, { "epoch": 2.6719701898621873, "grad_norm": 1.155900478363037, "learning_rate": 3.1392621703906545e-06, "loss": 0.0874, "step": 90350 }, { "epoch": 2.672265925356361, "grad_norm": 0.7573524713516235, "learning_rate": 3.139135480471715e-06, "loss": 0.0823, "step": 90360 }, { "epoch": 2.672561660850535, "grad_norm": 0.5257236957550049, "learning_rate": 3.1390087905527757e-06, "loss": 0.0792, "step": 90370 }, { "epoch": 2.6728573963447095, "grad_norm": 0.6232516765594482, "learning_rate": 3.138882100633836e-06, "loss": 0.0715, "step": 90380 }, { "epoch": 2.673153131838883, "grad_norm": 0.6436643004417419, "learning_rate": 3.1387554107148972e-06, "loss": 0.1089, "step": 90390 }, { "epoch": 2.6734488673330574, "grad_norm": 1.0591530799865723, "learning_rate": 3.1386287207959576e-06, "loss": 0.0887, "step": 90400 }, { "epoch": 2.6737446028272314, "grad_norm": 0.5422905087471008, "learning_rate": 3.1385020308770184e-06, "loss": 0.0718, "step": 90410 }, { "epoch": 2.6740403383214053, "grad_norm": 0.6752262115478516, "learning_rate": 3.1383753409580788e-06, "loss": 0.0613, "step": 90420 }, { "epoch": 2.6743360738155793, "grad_norm": 0.7706072926521301, "learning_rate": 3.1382486510391396e-06, "loss": 0.076, "step": 90430 }, { "epoch": 2.674631809309753, "grad_norm": 0.5779707431793213, "learning_rate": 3.1381219611202e-06, "loss": 0.0728, "step": 90440 }, { "epoch": 2.6749275448039276, "grad_norm": 1.621048927307129, "learning_rate": 3.1379952712012607e-06, "loss": 0.0947, "step": 90450 }, { "epoch": 2.6752232802981015, "grad_norm": 0.45448482036590576, "learning_rate": 3.137868581282321e-06, "loss": 0.0772, "step": 90460 }, { "epoch": 2.6755190157922755, "grad_norm": 0.5233164429664612, "learning_rate": 3.1377418913633823e-06, "loss": 0.0755, "step": 90470 }, { "epoch": 2.6758147512864494, "grad_norm": 0.8226873874664307, "learning_rate": 3.1376152014444427e-06, "loss": 0.079, "step": 90480 }, { "epoch": 2.6761104867806234, "grad_norm": 0.659397304058075, "learning_rate": 3.1374885115255034e-06, "loss": 0.081, "step": 90490 }, { "epoch": 2.6764062222747973, "grad_norm": 0.6559782028198242, "learning_rate": 3.137361821606564e-06, "loss": 0.0847, "step": 90500 }, { "epoch": 2.6767019577689712, "grad_norm": 1.033419132232666, "learning_rate": 3.1372351316876246e-06, "loss": 0.0583, "step": 90510 }, { "epoch": 2.6769976932631456, "grad_norm": 0.5268020629882812, "learning_rate": 3.137108441768685e-06, "loss": 0.0802, "step": 90520 }, { "epoch": 2.6772934287573196, "grad_norm": 0.5518450736999512, "learning_rate": 3.1369817518497458e-06, "loss": 0.0747, "step": 90530 }, { "epoch": 2.6775891642514935, "grad_norm": 0.5821383595466614, "learning_rate": 3.136855061930806e-06, "loss": 0.0882, "step": 90540 }, { "epoch": 2.6778848997456675, "grad_norm": 0.6088299751281738, "learning_rate": 3.1367283720118673e-06, "loss": 0.0783, "step": 90550 }, { "epoch": 2.6781806352398414, "grad_norm": 0.877235472202301, "learning_rate": 3.1366016820929277e-06, "loss": 0.0717, "step": 90560 }, { "epoch": 2.6784763707340153, "grad_norm": 0.382167786359787, "learning_rate": 3.1364749921739885e-06, "loss": 0.0867, "step": 90570 }, { "epoch": 2.6787721062281893, "grad_norm": 0.8185224533081055, "learning_rate": 3.136348302255049e-06, "loss": 0.0924, "step": 90580 }, { "epoch": 2.6790678417223637, "grad_norm": 0.737246036529541, "learning_rate": 3.1362216123361096e-06, "loss": 0.0893, "step": 90590 }, { "epoch": 2.6793635772165376, "grad_norm": 0.9811809062957764, "learning_rate": 3.13609492241717e-06, "loss": 0.0875, "step": 90600 }, { "epoch": 2.6796593127107116, "grad_norm": 0.6623039245605469, "learning_rate": 3.135968232498231e-06, "loss": 0.0744, "step": 90610 }, { "epoch": 2.6799550482048855, "grad_norm": 0.6660003066062927, "learning_rate": 3.135841542579291e-06, "loss": 0.0674, "step": 90620 }, { "epoch": 2.6802507836990594, "grad_norm": 0.9790269136428833, "learning_rate": 3.1357148526603524e-06, "loss": 0.0685, "step": 90630 }, { "epoch": 2.680546519193234, "grad_norm": 1.2903319597244263, "learning_rate": 3.1355881627414127e-06, "loss": 0.1101, "step": 90640 }, { "epoch": 2.6808422546874073, "grad_norm": 0.6329438090324402, "learning_rate": 3.1354614728224735e-06, "loss": 0.078, "step": 90650 }, { "epoch": 2.6811379901815817, "grad_norm": 1.0601463317871094, "learning_rate": 3.135334782903534e-06, "loss": 0.0733, "step": 90660 }, { "epoch": 2.6814337256757557, "grad_norm": 0.47004395723342896, "learning_rate": 3.1352080929845947e-06, "loss": 0.0794, "step": 90670 }, { "epoch": 2.6817294611699296, "grad_norm": 0.9585199356079102, "learning_rate": 3.135081403065655e-06, "loss": 0.0625, "step": 90680 }, { "epoch": 2.6820251966641036, "grad_norm": 0.6378899812698364, "learning_rate": 3.134954713146716e-06, "loss": 0.0746, "step": 90690 }, { "epoch": 2.6823209321582775, "grad_norm": 0.5804288983345032, "learning_rate": 3.134828023227776e-06, "loss": 0.079, "step": 90700 }, { "epoch": 2.682616667652452, "grad_norm": 0.7720199227333069, "learning_rate": 3.1347013333088374e-06, "loss": 0.0504, "step": 90710 }, { "epoch": 2.682912403146626, "grad_norm": 0.8276724815368652, "learning_rate": 3.1345746433898978e-06, "loss": 0.0774, "step": 90720 }, { "epoch": 2.6832081386407998, "grad_norm": 1.0640239715576172, "learning_rate": 3.134447953470958e-06, "loss": 0.0689, "step": 90730 }, { "epoch": 2.6835038741349737, "grad_norm": 0.9275432229042053, "learning_rate": 3.134321263552019e-06, "loss": 0.0836, "step": 90740 }, { "epoch": 2.6837996096291477, "grad_norm": 0.762121856212616, "learning_rate": 3.1341945736330793e-06, "loss": 0.0938, "step": 90750 }, { "epoch": 2.6840953451233216, "grad_norm": 1.2392762899398804, "learning_rate": 3.13406788371414e-06, "loss": 0.0856, "step": 90760 }, { "epoch": 2.6843910806174955, "grad_norm": 0.9015629887580872, "learning_rate": 3.1339411937952005e-06, "loss": 0.0676, "step": 90770 }, { "epoch": 2.68468681611167, "grad_norm": 0.7756113409996033, "learning_rate": 3.1338145038762613e-06, "loss": 0.0649, "step": 90780 }, { "epoch": 2.684982551605844, "grad_norm": 1.0695077180862427, "learning_rate": 3.133687813957322e-06, "loss": 0.0944, "step": 90790 }, { "epoch": 2.685278287100018, "grad_norm": 0.9264869689941406, "learning_rate": 3.133561124038383e-06, "loss": 0.0929, "step": 90800 }, { "epoch": 2.6855740225941918, "grad_norm": 0.9577769041061401, "learning_rate": 3.133434434119443e-06, "loss": 0.0784, "step": 90810 }, { "epoch": 2.6858697580883657, "grad_norm": 0.41370174288749695, "learning_rate": 3.133307744200504e-06, "loss": 0.0613, "step": 90820 }, { "epoch": 2.6861654935825396, "grad_norm": 0.832124650478363, "learning_rate": 3.1331810542815644e-06, "loss": 0.0736, "step": 90830 }, { "epoch": 2.6864612290767136, "grad_norm": 0.7711387872695923, "learning_rate": 3.133054364362625e-06, "loss": 0.0838, "step": 90840 }, { "epoch": 2.686756964570888, "grad_norm": 1.0662717819213867, "learning_rate": 3.1329276744436855e-06, "loss": 0.087, "step": 90850 }, { "epoch": 2.687052700065062, "grad_norm": 1.0449663400650024, "learning_rate": 3.1328009845247463e-06, "loss": 0.0923, "step": 90860 }, { "epoch": 2.687348435559236, "grad_norm": 0.4953235387802124, "learning_rate": 3.132674294605807e-06, "loss": 0.0644, "step": 90870 }, { "epoch": 2.68764417105341, "grad_norm": 1.0670602321624756, "learning_rate": 3.132547604686868e-06, "loss": 0.073, "step": 90880 }, { "epoch": 2.6879399065475837, "grad_norm": 0.5340431928634644, "learning_rate": 3.1324209147679282e-06, "loss": 0.0814, "step": 90890 }, { "epoch": 2.6882356420417577, "grad_norm": 0.9946364760398865, "learning_rate": 3.132294224848989e-06, "loss": 0.0779, "step": 90900 }, { "epoch": 2.6885313775359316, "grad_norm": 0.4898955821990967, "learning_rate": 3.1321675349300494e-06, "loss": 0.0856, "step": 90910 }, { "epoch": 2.688827113030106, "grad_norm": 0.5643196702003479, "learning_rate": 3.13204084501111e-06, "loss": 0.0765, "step": 90920 }, { "epoch": 2.68912284852428, "grad_norm": 0.6884992718696594, "learning_rate": 3.1319141550921706e-06, "loss": 0.057, "step": 90930 }, { "epoch": 2.689418584018454, "grad_norm": 0.5996238589286804, "learning_rate": 3.1317874651732313e-06, "loss": 0.0792, "step": 90940 }, { "epoch": 2.689714319512628, "grad_norm": 0.781025230884552, "learning_rate": 3.131660775254292e-06, "loss": 0.0798, "step": 90950 }, { "epoch": 2.690010055006802, "grad_norm": 1.065535306930542, "learning_rate": 3.131534085335353e-06, "loss": 0.0836, "step": 90960 }, { "epoch": 2.690305790500976, "grad_norm": 0.6128410696983337, "learning_rate": 3.1314073954164133e-06, "loss": 0.0803, "step": 90970 }, { "epoch": 2.6906015259951497, "grad_norm": 0.7672094106674194, "learning_rate": 3.131280705497474e-06, "loss": 0.067, "step": 90980 }, { "epoch": 2.690897261489324, "grad_norm": 0.6350921988487244, "learning_rate": 3.1311540155785344e-06, "loss": 0.0786, "step": 90990 }, { "epoch": 2.691192996983498, "grad_norm": 0.9753614664077759, "learning_rate": 3.1310273256595952e-06, "loss": 0.0925, "step": 91000 }, { "epoch": 2.691488732477672, "grad_norm": 1.1595263481140137, "learning_rate": 3.1309006357406556e-06, "loss": 0.0816, "step": 91010 }, { "epoch": 2.691784467971846, "grad_norm": 0.6525536179542542, "learning_rate": 3.1307739458217164e-06, "loss": 0.0669, "step": 91020 }, { "epoch": 2.69208020346602, "grad_norm": 0.910573422908783, "learning_rate": 3.130647255902777e-06, "loss": 0.0764, "step": 91030 }, { "epoch": 2.6923759389601942, "grad_norm": 0.5318692326545715, "learning_rate": 3.130520565983838e-06, "loss": 0.079, "step": 91040 }, { "epoch": 2.692671674454368, "grad_norm": 0.7465441226959229, "learning_rate": 3.1303938760648983e-06, "loss": 0.0786, "step": 91050 }, { "epoch": 2.692967409948542, "grad_norm": 1.0148404836654663, "learning_rate": 3.130267186145959e-06, "loss": 0.0707, "step": 91060 }, { "epoch": 2.693263145442716, "grad_norm": 0.5695400238037109, "learning_rate": 3.1301404962270195e-06, "loss": 0.0721, "step": 91070 }, { "epoch": 2.69355888093689, "grad_norm": 0.7468443512916565, "learning_rate": 3.1300138063080803e-06, "loss": 0.0621, "step": 91080 }, { "epoch": 2.693854616431064, "grad_norm": 0.96971195936203, "learning_rate": 3.1298871163891406e-06, "loss": 0.0892, "step": 91090 }, { "epoch": 2.694150351925238, "grad_norm": 1.1284010410308838, "learning_rate": 3.1297604264702014e-06, "loss": 0.0915, "step": 91100 }, { "epoch": 2.6944460874194123, "grad_norm": 0.5944033861160278, "learning_rate": 3.1296337365512622e-06, "loss": 0.0902, "step": 91110 }, { "epoch": 2.694741822913586, "grad_norm": 0.9494492411613464, "learning_rate": 3.129507046632323e-06, "loss": 0.0573, "step": 91120 }, { "epoch": 2.69503755840776, "grad_norm": 0.9836544990539551, "learning_rate": 3.1293803567133834e-06, "loss": 0.0713, "step": 91130 }, { "epoch": 2.695333293901934, "grad_norm": 0.9101495146751404, "learning_rate": 3.1292536667944437e-06, "loss": 0.0886, "step": 91140 }, { "epoch": 2.695629029396108, "grad_norm": 0.6020776629447937, "learning_rate": 3.1291269768755045e-06, "loss": 0.0639, "step": 91150 }, { "epoch": 2.695924764890282, "grad_norm": 0.6638724207878113, "learning_rate": 3.129000286956565e-06, "loss": 0.0897, "step": 91160 }, { "epoch": 2.696220500384456, "grad_norm": 0.6056152582168579, "learning_rate": 3.1288735970376257e-06, "loss": 0.0739, "step": 91170 }, { "epoch": 2.6965162358786303, "grad_norm": 0.7096302509307861, "learning_rate": 3.128746907118686e-06, "loss": 0.0621, "step": 91180 }, { "epoch": 2.6968119713728043, "grad_norm": 1.0227320194244385, "learning_rate": 3.1286202171997473e-06, "loss": 0.0876, "step": 91190 }, { "epoch": 2.697107706866978, "grad_norm": 0.5666522979736328, "learning_rate": 3.1284935272808076e-06, "loss": 0.0831, "step": 91200 }, { "epoch": 2.697403442361152, "grad_norm": 0.6267540454864502, "learning_rate": 3.1283668373618684e-06, "loss": 0.0747, "step": 91210 }, { "epoch": 2.697699177855326, "grad_norm": 0.6857399344444275, "learning_rate": 3.128240147442929e-06, "loss": 0.0761, "step": 91220 }, { "epoch": 2.6979949133495005, "grad_norm": 0.8295357823371887, "learning_rate": 3.1281134575239896e-06, "loss": 0.0747, "step": 91230 }, { "epoch": 2.698290648843674, "grad_norm": 0.735268235206604, "learning_rate": 3.12798676760505e-06, "loss": 0.0904, "step": 91240 }, { "epoch": 2.6985863843378484, "grad_norm": 0.844810962677002, "learning_rate": 3.1278600776861107e-06, "loss": 0.0734, "step": 91250 }, { "epoch": 2.6988821198320223, "grad_norm": 1.2175278663635254, "learning_rate": 3.127733387767171e-06, "loss": 0.0785, "step": 91260 }, { "epoch": 2.6991778553261963, "grad_norm": 0.5750241279602051, "learning_rate": 3.1276066978482323e-06, "loss": 0.0898, "step": 91270 }, { "epoch": 2.69947359082037, "grad_norm": 0.72725909948349, "learning_rate": 3.1274800079292927e-06, "loss": 0.0728, "step": 91280 }, { "epoch": 2.699769326314544, "grad_norm": 0.9342910051345825, "learning_rate": 3.1273533180103535e-06, "loss": 0.0793, "step": 91290 }, { "epoch": 2.7000650618087185, "grad_norm": 0.9233747720718384, "learning_rate": 3.127226628091414e-06, "loss": 0.0778, "step": 91300 }, { "epoch": 2.700360797302892, "grad_norm": 0.8422476053237915, "learning_rate": 3.1270999381724746e-06, "loss": 0.0669, "step": 91310 }, { "epoch": 2.7006565327970664, "grad_norm": 0.8113466501235962, "learning_rate": 3.126973248253535e-06, "loss": 0.0653, "step": 91320 }, { "epoch": 2.7009522682912404, "grad_norm": 0.7357101440429688, "learning_rate": 3.1268465583345958e-06, "loss": 0.0699, "step": 91330 }, { "epoch": 2.7012480037854143, "grad_norm": 0.8789581060409546, "learning_rate": 3.126719868415656e-06, "loss": 0.0918, "step": 91340 }, { "epoch": 2.7015437392795882, "grad_norm": 0.6216180324554443, "learning_rate": 3.1265931784967174e-06, "loss": 0.0909, "step": 91350 }, { "epoch": 2.701839474773762, "grad_norm": 0.9548308253288269, "learning_rate": 3.1264664885777777e-06, "loss": 0.082, "step": 91360 }, { "epoch": 2.7021352102679366, "grad_norm": 0.9567369818687439, "learning_rate": 3.1263397986588385e-06, "loss": 0.0704, "step": 91370 }, { "epoch": 2.7024309457621105, "grad_norm": 0.6836733818054199, "learning_rate": 3.126213108739899e-06, "loss": 0.0682, "step": 91380 }, { "epoch": 2.7027266812562845, "grad_norm": 0.7270485758781433, "learning_rate": 3.1260864188209597e-06, "loss": 0.0724, "step": 91390 }, { "epoch": 2.7030224167504584, "grad_norm": 0.9753693342208862, "learning_rate": 3.12595972890202e-06, "loss": 0.0813, "step": 91400 }, { "epoch": 2.7033181522446323, "grad_norm": 0.5661029815673828, "learning_rate": 3.125833038983081e-06, "loss": 0.0767, "step": 91410 }, { "epoch": 2.7036138877388063, "grad_norm": 0.7317980527877808, "learning_rate": 3.125706349064141e-06, "loss": 0.0785, "step": 91420 }, { "epoch": 2.7039096232329802, "grad_norm": 0.8393252491950989, "learning_rate": 3.1255796591452024e-06, "loss": 0.071, "step": 91430 }, { "epoch": 2.7042053587271546, "grad_norm": 0.7095303535461426, "learning_rate": 3.1254529692262628e-06, "loss": 0.0893, "step": 91440 }, { "epoch": 2.7045010942213286, "grad_norm": 0.9665932059288025, "learning_rate": 3.1253262793073236e-06, "loss": 0.0743, "step": 91450 }, { "epoch": 2.7047968297155025, "grad_norm": 0.5349906086921692, "learning_rate": 3.125199589388384e-06, "loss": 0.0782, "step": 91460 }, { "epoch": 2.7050925652096764, "grad_norm": 1.0417506694793701, "learning_rate": 3.1250728994694447e-06, "loss": 0.0686, "step": 91470 }, { "epoch": 2.7053883007038504, "grad_norm": 0.8561083078384399, "learning_rate": 3.124946209550505e-06, "loss": 0.0701, "step": 91480 }, { "epoch": 2.7056840361980243, "grad_norm": 0.8833364844322205, "learning_rate": 3.124819519631566e-06, "loss": 0.0808, "step": 91490 }, { "epoch": 2.7059797716921983, "grad_norm": 0.6040852069854736, "learning_rate": 3.1246928297126262e-06, "loss": 0.085, "step": 91500 }, { "epoch": 2.7062755071863727, "grad_norm": 0.8121662735939026, "learning_rate": 3.1245661397936875e-06, "loss": 0.098, "step": 91510 }, { "epoch": 2.7065712426805466, "grad_norm": 0.7272875308990479, "learning_rate": 3.124439449874748e-06, "loss": 0.0751, "step": 91520 }, { "epoch": 2.7068669781747206, "grad_norm": 0.7249480485916138, "learning_rate": 3.1243127599558086e-06, "loss": 0.0459, "step": 91530 }, { "epoch": 2.7071627136688945, "grad_norm": 0.6206229329109192, "learning_rate": 3.124186070036869e-06, "loss": 0.0947, "step": 91540 }, { "epoch": 2.7074584491630684, "grad_norm": 1.167238473892212, "learning_rate": 3.1240593801179293e-06, "loss": 0.0881, "step": 91550 }, { "epoch": 2.707754184657243, "grad_norm": 0.37622278928756714, "learning_rate": 3.12393269019899e-06, "loss": 0.0759, "step": 91560 }, { "epoch": 2.7080499201514163, "grad_norm": 1.0339877605438232, "learning_rate": 3.1238060002800505e-06, "loss": 0.0576, "step": 91570 }, { "epoch": 2.7083456556455907, "grad_norm": 0.8846235871315002, "learning_rate": 3.1236793103611113e-06, "loss": 0.0728, "step": 91580 }, { "epoch": 2.7086413911397647, "grad_norm": 0.6534650325775146, "learning_rate": 3.123552620442172e-06, "loss": 0.0896, "step": 91590 }, { "epoch": 2.7089371266339386, "grad_norm": 0.7954158782958984, "learning_rate": 3.123425930523233e-06, "loss": 0.0903, "step": 91600 }, { "epoch": 2.7092328621281125, "grad_norm": 0.9001904726028442, "learning_rate": 3.1232992406042932e-06, "loss": 0.0926, "step": 91610 }, { "epoch": 2.7095285976222865, "grad_norm": 0.8158282041549683, "learning_rate": 3.123172550685354e-06, "loss": 0.0616, "step": 91620 }, { "epoch": 2.709824333116461, "grad_norm": 1.0610244274139404, "learning_rate": 3.1230458607664144e-06, "loss": 0.0773, "step": 91630 }, { "epoch": 2.710120068610635, "grad_norm": 0.8141051530838013, "learning_rate": 3.122919170847475e-06, "loss": 0.0797, "step": 91640 }, { "epoch": 2.7104158041048088, "grad_norm": 0.7069621086120605, "learning_rate": 3.1227924809285355e-06, "loss": 0.0945, "step": 91650 }, { "epoch": 2.7107115395989827, "grad_norm": 0.7156313061714172, "learning_rate": 3.1226657910095963e-06, "loss": 0.0762, "step": 91660 }, { "epoch": 2.7110072750931566, "grad_norm": 0.442388117313385, "learning_rate": 3.122539101090657e-06, "loss": 0.0562, "step": 91670 }, { "epoch": 2.7113030105873306, "grad_norm": 0.5957505106925964, "learning_rate": 3.122412411171718e-06, "loss": 0.0606, "step": 91680 }, { "epoch": 2.7115987460815045, "grad_norm": 0.49646109342575073, "learning_rate": 3.1222857212527783e-06, "loss": 0.0781, "step": 91690 }, { "epoch": 2.711894481575679, "grad_norm": 0.8345279693603516, "learning_rate": 3.122159031333839e-06, "loss": 0.0879, "step": 91700 }, { "epoch": 2.712190217069853, "grad_norm": 0.7474302649497986, "learning_rate": 3.1220323414148994e-06, "loss": 0.0952, "step": 91710 }, { "epoch": 2.712485952564027, "grad_norm": 1.008103847503662, "learning_rate": 3.1219056514959602e-06, "loss": 0.0826, "step": 91720 }, { "epoch": 2.7127816880582007, "grad_norm": 1.0750371217727661, "learning_rate": 3.1217789615770206e-06, "loss": 0.0855, "step": 91730 }, { "epoch": 2.7130774235523747, "grad_norm": 0.7498418092727661, "learning_rate": 3.1216522716580814e-06, "loss": 0.0978, "step": 91740 }, { "epoch": 2.7133731590465486, "grad_norm": 0.5064898133277893, "learning_rate": 3.121525581739142e-06, "loss": 0.0923, "step": 91750 }, { "epoch": 2.7136688945407226, "grad_norm": 0.6094392538070679, "learning_rate": 3.121398891820203e-06, "loss": 0.0808, "step": 91760 }, { "epoch": 2.713964630034897, "grad_norm": 0.6638646125793457, "learning_rate": 3.1212722019012633e-06, "loss": 0.0564, "step": 91770 }, { "epoch": 2.714260365529071, "grad_norm": 0.9718143343925476, "learning_rate": 3.121145511982324e-06, "loss": 0.0823, "step": 91780 }, { "epoch": 2.714556101023245, "grad_norm": 0.8536917567253113, "learning_rate": 3.1210188220633845e-06, "loss": 0.084, "step": 91790 }, { "epoch": 2.714851836517419, "grad_norm": 0.8697062730789185, "learning_rate": 3.1208921321444453e-06, "loss": 0.0879, "step": 91800 }, { "epoch": 2.7151475720115927, "grad_norm": 0.8480314016342163, "learning_rate": 3.1207654422255056e-06, "loss": 0.0738, "step": 91810 }, { "epoch": 2.7154433075057667, "grad_norm": 1.0337470769882202, "learning_rate": 3.1206387523065664e-06, "loss": 0.0669, "step": 91820 }, { "epoch": 2.7157390429999406, "grad_norm": 0.8268803954124451, "learning_rate": 3.120512062387627e-06, "loss": 0.0814, "step": 91830 }, { "epoch": 2.716034778494115, "grad_norm": 1.1850677728652954, "learning_rate": 3.120385372468688e-06, "loss": 0.0837, "step": 91840 }, { "epoch": 2.716330513988289, "grad_norm": 1.0710760354995728, "learning_rate": 3.1202586825497484e-06, "loss": 0.0745, "step": 91850 }, { "epoch": 2.716626249482463, "grad_norm": 0.6493810415267944, "learning_rate": 3.120131992630809e-06, "loss": 0.0854, "step": 91860 }, { "epoch": 2.716921984976637, "grad_norm": 0.7316933870315552, "learning_rate": 3.1200053027118695e-06, "loss": 0.072, "step": 91870 }, { "epoch": 2.717217720470811, "grad_norm": 0.8973621129989624, "learning_rate": 3.1198786127929303e-06, "loss": 0.0781, "step": 91880 }, { "epoch": 2.717513455964985, "grad_norm": 0.5687627196311951, "learning_rate": 3.1197519228739907e-06, "loss": 0.0825, "step": 91890 }, { "epoch": 2.7178091914591587, "grad_norm": 0.5793851017951965, "learning_rate": 3.1196252329550515e-06, "loss": 0.0665, "step": 91900 }, { "epoch": 2.718104926953333, "grad_norm": 0.8946505784988403, "learning_rate": 3.1194985430361123e-06, "loss": 0.0647, "step": 91910 }, { "epoch": 2.718400662447507, "grad_norm": 0.6753572225570679, "learning_rate": 3.119371853117173e-06, "loss": 0.0755, "step": 91920 }, { "epoch": 2.718696397941681, "grad_norm": 0.6194847822189331, "learning_rate": 3.1192451631982334e-06, "loss": 0.0591, "step": 91930 }, { "epoch": 2.718992133435855, "grad_norm": 0.7390021085739136, "learning_rate": 3.119118473279294e-06, "loss": 0.0905, "step": 91940 }, { "epoch": 2.719287868930029, "grad_norm": 0.7470808625221252, "learning_rate": 3.1189917833603546e-06, "loss": 0.0762, "step": 91950 }, { "epoch": 2.719583604424203, "grad_norm": 1.0905466079711914, "learning_rate": 3.118865093441415e-06, "loss": 0.0868, "step": 91960 }, { "epoch": 2.719879339918377, "grad_norm": 0.743969738483429, "learning_rate": 3.1187384035224757e-06, "loss": 0.0695, "step": 91970 }, { "epoch": 2.720175075412551, "grad_norm": 1.176019549369812, "learning_rate": 3.118611713603536e-06, "loss": 0.0779, "step": 91980 }, { "epoch": 2.720470810906725, "grad_norm": 0.5451492667198181, "learning_rate": 3.1184850236845973e-06, "loss": 0.085, "step": 91990 }, { "epoch": 2.720766546400899, "grad_norm": 0.5030705332756042, "learning_rate": 3.1183583337656577e-06, "loss": 0.0742, "step": 92000 }, { "epoch": 2.721062281895073, "grad_norm": 0.9842029809951782, "learning_rate": 3.1182316438467185e-06, "loss": 0.0774, "step": 92010 }, { "epoch": 2.721358017389247, "grad_norm": 0.6497399806976318, "learning_rate": 3.118104953927779e-06, "loss": 0.0592, "step": 92020 }, { "epoch": 2.7216537528834213, "grad_norm": 0.5834762454032898, "learning_rate": 3.1179782640088396e-06, "loss": 0.0706, "step": 92030 }, { "epoch": 2.721949488377595, "grad_norm": 0.8243691921234131, "learning_rate": 3.1178515740899e-06, "loss": 0.0857, "step": 92040 }, { "epoch": 2.722245223871769, "grad_norm": 1.0040189027786255, "learning_rate": 3.1177248841709608e-06, "loss": 0.1026, "step": 92050 }, { "epoch": 2.722540959365943, "grad_norm": 0.6492526531219482, "learning_rate": 3.117598194252021e-06, "loss": 0.0722, "step": 92060 }, { "epoch": 2.722836694860117, "grad_norm": 0.9607149958610535, "learning_rate": 3.1174715043330823e-06, "loss": 0.0871, "step": 92070 }, { "epoch": 2.723132430354291, "grad_norm": 0.9031463861465454, "learning_rate": 3.1173448144141427e-06, "loss": 0.0762, "step": 92080 }, { "epoch": 2.723428165848465, "grad_norm": 1.1149954795837402, "learning_rate": 3.1172181244952035e-06, "loss": 0.0809, "step": 92090 }, { "epoch": 2.7237239013426393, "grad_norm": 1.2285327911376953, "learning_rate": 3.117091434576264e-06, "loss": 0.0808, "step": 92100 }, { "epoch": 2.7240196368368133, "grad_norm": 0.5355067849159241, "learning_rate": 3.1169647446573247e-06, "loss": 0.0705, "step": 92110 }, { "epoch": 2.724315372330987, "grad_norm": 0.4711681604385376, "learning_rate": 3.116838054738385e-06, "loss": 0.0648, "step": 92120 }, { "epoch": 2.724611107825161, "grad_norm": 1.0204617977142334, "learning_rate": 3.116711364819446e-06, "loss": 0.0807, "step": 92130 }, { "epoch": 2.724906843319335, "grad_norm": 0.8919541835784912, "learning_rate": 3.116584674900506e-06, "loss": 0.0876, "step": 92140 }, { "epoch": 2.7252025788135095, "grad_norm": 0.5937910079956055, "learning_rate": 3.1164579849815674e-06, "loss": 0.0739, "step": 92150 }, { "epoch": 2.725498314307683, "grad_norm": 0.6423051953315735, "learning_rate": 3.1163312950626278e-06, "loss": 0.0747, "step": 92160 }, { "epoch": 2.7257940498018574, "grad_norm": 0.2792232930660248, "learning_rate": 3.1162046051436885e-06, "loss": 0.0838, "step": 92170 }, { "epoch": 2.7260897852960313, "grad_norm": 1.429392695426941, "learning_rate": 3.116077915224749e-06, "loss": 0.0638, "step": 92180 }, { "epoch": 2.7263855207902052, "grad_norm": 0.8390551805496216, "learning_rate": 3.1159512253058097e-06, "loss": 0.0858, "step": 92190 }, { "epoch": 2.726681256284379, "grad_norm": 0.4674932360649109, "learning_rate": 3.11582453538687e-06, "loss": 0.0766, "step": 92200 }, { "epoch": 2.726976991778553, "grad_norm": 1.2126548290252686, "learning_rate": 3.115697845467931e-06, "loss": 0.0842, "step": 92210 }, { "epoch": 2.7272727272727275, "grad_norm": 0.7617209553718567, "learning_rate": 3.1155711555489912e-06, "loss": 0.0666, "step": 92220 }, { "epoch": 2.727568462766901, "grad_norm": 1.412642240524292, "learning_rate": 3.1154444656300524e-06, "loss": 0.0795, "step": 92230 }, { "epoch": 2.7278641982610754, "grad_norm": 1.0986709594726562, "learning_rate": 3.115317775711113e-06, "loss": 0.0719, "step": 92240 }, { "epoch": 2.7281599337552493, "grad_norm": 0.7856982946395874, "learning_rate": 3.1151910857921736e-06, "loss": 0.0726, "step": 92250 }, { "epoch": 2.7284556692494233, "grad_norm": 0.6488982439041138, "learning_rate": 3.115064395873234e-06, "loss": 0.0744, "step": 92260 }, { "epoch": 2.7287514047435972, "grad_norm": 0.613550066947937, "learning_rate": 3.1149377059542947e-06, "loss": 0.0778, "step": 92270 }, { "epoch": 2.729047140237771, "grad_norm": 0.6899893879890442, "learning_rate": 3.114811016035355e-06, "loss": 0.0673, "step": 92280 }, { "epoch": 2.7293428757319456, "grad_norm": 0.6171810626983643, "learning_rate": 3.114684326116416e-06, "loss": 0.0829, "step": 92290 }, { "epoch": 2.7296386112261195, "grad_norm": 1.0897847414016724, "learning_rate": 3.1145576361974763e-06, "loss": 0.0932, "step": 92300 }, { "epoch": 2.7299343467202934, "grad_norm": 0.727360725402832, "learning_rate": 3.1144309462785375e-06, "loss": 0.0807, "step": 92310 }, { "epoch": 2.7302300822144674, "grad_norm": 0.42115554213523865, "learning_rate": 3.114304256359598e-06, "loss": 0.0598, "step": 92320 }, { "epoch": 2.7305258177086413, "grad_norm": 1.0268560647964478, "learning_rate": 3.1141775664406586e-06, "loss": 0.0706, "step": 92330 }, { "epoch": 2.7308215532028153, "grad_norm": 0.7476699948310852, "learning_rate": 3.114050876521719e-06, "loss": 0.1008, "step": 92340 }, { "epoch": 2.731117288696989, "grad_norm": 0.5552934408187866, "learning_rate": 3.11392418660278e-06, "loss": 0.0722, "step": 92350 }, { "epoch": 2.7314130241911636, "grad_norm": 0.7878734469413757, "learning_rate": 3.11379749668384e-06, "loss": 0.0814, "step": 92360 }, { "epoch": 2.7317087596853376, "grad_norm": 0.8851650953292847, "learning_rate": 3.113670806764901e-06, "loss": 0.0668, "step": 92370 }, { "epoch": 2.7320044951795115, "grad_norm": 0.7189567685127258, "learning_rate": 3.1135441168459613e-06, "loss": 0.0683, "step": 92380 }, { "epoch": 2.7323002306736854, "grad_norm": 0.7135301828384399, "learning_rate": 3.113417426927022e-06, "loss": 0.0938, "step": 92390 }, { "epoch": 2.7325959661678594, "grad_norm": 1.2716652154922485, "learning_rate": 3.113290737008083e-06, "loss": 0.0883, "step": 92400 }, { "epoch": 2.7328917016620333, "grad_norm": 0.9345539212226868, "learning_rate": 3.1131640470891433e-06, "loss": 0.0928, "step": 92410 }, { "epoch": 2.7331874371562073, "grad_norm": 0.8294223546981812, "learning_rate": 3.113037357170204e-06, "loss": 0.065, "step": 92420 }, { "epoch": 2.7334831726503817, "grad_norm": 0.5907031893730164, "learning_rate": 3.1129106672512644e-06, "loss": 0.0715, "step": 92430 }, { "epoch": 2.7337789081445556, "grad_norm": 0.8005092144012451, "learning_rate": 3.112783977332325e-06, "loss": 0.086, "step": 92440 }, { "epoch": 2.7340746436387295, "grad_norm": 0.3495950996875763, "learning_rate": 3.1126572874133856e-06, "loss": 0.0724, "step": 92450 }, { "epoch": 2.7343703791329035, "grad_norm": 0.938919723033905, "learning_rate": 3.1125305974944464e-06, "loss": 0.0688, "step": 92460 }, { "epoch": 2.7346661146270774, "grad_norm": 0.8359476327896118, "learning_rate": 3.112403907575507e-06, "loss": 0.0863, "step": 92470 }, { "epoch": 2.734961850121252, "grad_norm": 0.7626363039016724, "learning_rate": 3.112277217656568e-06, "loss": 0.0794, "step": 92480 }, { "epoch": 2.7352575856154253, "grad_norm": 1.3459359407424927, "learning_rate": 3.1121505277376283e-06, "loss": 0.0973, "step": 92490 }, { "epoch": 2.7355533211095997, "grad_norm": 0.636470377445221, "learning_rate": 3.112023837818689e-06, "loss": 0.0818, "step": 92500 }, { "epoch": 2.7358490566037736, "grad_norm": 0.6409415006637573, "learning_rate": 3.1118971478997495e-06, "loss": 0.069, "step": 92510 }, { "epoch": 2.7361447920979476, "grad_norm": 0.7072123289108276, "learning_rate": 3.1117704579808102e-06, "loss": 0.0699, "step": 92520 }, { "epoch": 2.7364405275921215, "grad_norm": 0.85946124792099, "learning_rate": 3.1116437680618706e-06, "loss": 0.0773, "step": 92530 }, { "epoch": 2.7367362630862955, "grad_norm": 0.947352409362793, "learning_rate": 3.1115170781429314e-06, "loss": 0.1122, "step": 92540 }, { "epoch": 2.73703199858047, "grad_norm": 1.0115368366241455, "learning_rate": 3.111390388223992e-06, "loss": 0.088, "step": 92550 }, { "epoch": 2.737327734074644, "grad_norm": 0.7275092005729675, "learning_rate": 3.111263698305053e-06, "loss": 0.0729, "step": 92560 }, { "epoch": 2.7376234695688177, "grad_norm": 1.3297559022903442, "learning_rate": 3.1111370083861133e-06, "loss": 0.0787, "step": 92570 }, { "epoch": 2.7379192050629917, "grad_norm": 0.8179389834403992, "learning_rate": 3.111010318467174e-06, "loss": 0.0866, "step": 92580 }, { "epoch": 2.7382149405571656, "grad_norm": 0.8599823713302612, "learning_rate": 3.1108836285482345e-06, "loss": 0.0924, "step": 92590 }, { "epoch": 2.7385106760513396, "grad_norm": 0.9649415016174316, "learning_rate": 3.1107569386292953e-06, "loss": 0.1025, "step": 92600 }, { "epoch": 2.7388064115455135, "grad_norm": 0.9753228425979614, "learning_rate": 3.1106302487103557e-06, "loss": 0.0706, "step": 92610 }, { "epoch": 2.739102147039688, "grad_norm": 0.8803365230560303, "learning_rate": 3.1105035587914164e-06, "loss": 0.0724, "step": 92620 }, { "epoch": 2.739397882533862, "grad_norm": 0.7657085061073303, "learning_rate": 3.1103768688724772e-06, "loss": 0.0623, "step": 92630 }, { "epoch": 2.739693618028036, "grad_norm": 0.7733266949653625, "learning_rate": 3.110250178953538e-06, "loss": 0.1145, "step": 92640 }, { "epoch": 2.7399893535222097, "grad_norm": 0.8703230023384094, "learning_rate": 3.1101234890345984e-06, "loss": 0.0871, "step": 92650 }, { "epoch": 2.7402850890163837, "grad_norm": 0.8188963532447815, "learning_rate": 3.109996799115659e-06, "loss": 0.078, "step": 92660 }, { "epoch": 2.7405808245105576, "grad_norm": 0.8444216847419739, "learning_rate": 3.1098701091967195e-06, "loss": 0.0676, "step": 92670 }, { "epoch": 2.7408765600047316, "grad_norm": 1.1119968891143799, "learning_rate": 3.1097434192777803e-06, "loss": 0.071, "step": 92680 }, { "epoch": 2.741172295498906, "grad_norm": 1.0446362495422363, "learning_rate": 3.1096167293588407e-06, "loss": 0.0888, "step": 92690 }, { "epoch": 2.74146803099308, "grad_norm": 0.7734158635139465, "learning_rate": 3.1094900394399015e-06, "loss": 0.0851, "step": 92700 }, { "epoch": 2.741763766487254, "grad_norm": 0.560750424861908, "learning_rate": 3.1093633495209623e-06, "loss": 0.0721, "step": 92710 }, { "epoch": 2.742059501981428, "grad_norm": 1.0099109411239624, "learning_rate": 3.109236659602023e-06, "loss": 0.0807, "step": 92720 }, { "epoch": 2.7423552374756017, "grad_norm": 1.243688702583313, "learning_rate": 3.1091099696830834e-06, "loss": 0.0799, "step": 92730 }, { "epoch": 2.7426509729697757, "grad_norm": 0.8674187660217285, "learning_rate": 3.1089832797641442e-06, "loss": 0.0892, "step": 92740 }, { "epoch": 2.7429467084639496, "grad_norm": 0.9412120580673218, "learning_rate": 3.1088565898452046e-06, "loss": 0.096, "step": 92750 }, { "epoch": 2.743242443958124, "grad_norm": 1.0238577127456665, "learning_rate": 3.1087298999262654e-06, "loss": 0.093, "step": 92760 }, { "epoch": 2.743538179452298, "grad_norm": 1.0711164474487305, "learning_rate": 3.1086032100073258e-06, "loss": 0.0724, "step": 92770 }, { "epoch": 2.743833914946472, "grad_norm": 0.6817315220832825, "learning_rate": 3.1084765200883865e-06, "loss": 0.0795, "step": 92780 }, { "epoch": 2.744129650440646, "grad_norm": 0.7714031934738159, "learning_rate": 3.1083498301694473e-06, "loss": 0.0962, "step": 92790 }, { "epoch": 2.7444253859348198, "grad_norm": 0.7007291316986084, "learning_rate": 3.1082231402505077e-06, "loss": 0.0824, "step": 92800 }, { "epoch": 2.744721121428994, "grad_norm": 0.5472589135169983, "learning_rate": 3.1080964503315685e-06, "loss": 0.0889, "step": 92810 }, { "epoch": 2.7450168569231677, "grad_norm": 0.6544780135154724, "learning_rate": 3.107969760412629e-06, "loss": 0.0774, "step": 92820 }, { "epoch": 2.745312592417342, "grad_norm": 0.8387486338615417, "learning_rate": 3.1078430704936896e-06, "loss": 0.0698, "step": 92830 }, { "epoch": 2.745608327911516, "grad_norm": 1.2828631401062012, "learning_rate": 3.10771638057475e-06, "loss": 0.0934, "step": 92840 }, { "epoch": 2.74590406340569, "grad_norm": 0.6109793782234192, "learning_rate": 3.107589690655811e-06, "loss": 0.075, "step": 92850 }, { "epoch": 2.746199798899864, "grad_norm": 1.2126744985580444, "learning_rate": 3.107463000736871e-06, "loss": 0.0796, "step": 92860 }, { "epoch": 2.746495534394038, "grad_norm": 0.9549990892410278, "learning_rate": 3.1073363108179324e-06, "loss": 0.081, "step": 92870 }, { "epoch": 2.746791269888212, "grad_norm": 0.8640692234039307, "learning_rate": 3.1072096208989927e-06, "loss": 0.0529, "step": 92880 }, { "epoch": 2.747087005382386, "grad_norm": 0.7291214466094971, "learning_rate": 3.1070829309800535e-06, "loss": 0.0835, "step": 92890 }, { "epoch": 2.74738274087656, "grad_norm": 1.155542254447937, "learning_rate": 3.106956241061114e-06, "loss": 0.0892, "step": 92900 }, { "epoch": 2.747678476370734, "grad_norm": 0.4068027138710022, "learning_rate": 3.1068295511421747e-06, "loss": 0.0779, "step": 92910 }, { "epoch": 2.747974211864908, "grad_norm": 1.50215744972229, "learning_rate": 3.106702861223235e-06, "loss": 0.077, "step": 92920 }, { "epoch": 2.748269947359082, "grad_norm": 1.158484935760498, "learning_rate": 3.106576171304296e-06, "loss": 0.0751, "step": 92930 }, { "epoch": 2.748565682853256, "grad_norm": 0.9586952924728394, "learning_rate": 3.106449481385356e-06, "loss": 0.0944, "step": 92940 }, { "epoch": 2.7488614183474303, "grad_norm": 0.7750362753868103, "learning_rate": 3.1063227914664174e-06, "loss": 0.0768, "step": 92950 }, { "epoch": 2.749157153841604, "grad_norm": 0.8834481835365295, "learning_rate": 3.1061961015474778e-06, "loss": 0.0794, "step": 92960 }, { "epoch": 2.749452889335778, "grad_norm": 0.7662191390991211, "learning_rate": 3.1060694116285386e-06, "loss": 0.0632, "step": 92970 }, { "epoch": 2.749748624829952, "grad_norm": 0.9342902302742004, "learning_rate": 3.105942721709599e-06, "loss": 0.0724, "step": 92980 }, { "epoch": 2.750044360324126, "grad_norm": 0.7013269066810608, "learning_rate": 3.1058160317906597e-06, "loss": 0.085, "step": 92990 }, { "epoch": 2.7503400958183, "grad_norm": 0.4856949746608734, "learning_rate": 3.10568934187172e-06, "loss": 0.0748, "step": 93000 }, { "epoch": 2.750635831312474, "grad_norm": 0.623192310333252, "learning_rate": 3.105562651952781e-06, "loss": 0.0858, "step": 93010 }, { "epoch": 2.7509315668066483, "grad_norm": 0.5661178231239319, "learning_rate": 3.1054359620338413e-06, "loss": 0.076, "step": 93020 }, { "epoch": 2.7512273023008222, "grad_norm": 0.7227466702461243, "learning_rate": 3.1053092721149025e-06, "loss": 0.0606, "step": 93030 }, { "epoch": 2.751523037794996, "grad_norm": 0.6657871007919312, "learning_rate": 3.105182582195963e-06, "loss": 0.0876, "step": 93040 }, { "epoch": 2.75181877328917, "grad_norm": 0.9235370755195618, "learning_rate": 3.1050558922770236e-06, "loss": 0.0824, "step": 93050 }, { "epoch": 2.752114508783344, "grad_norm": 0.6141487956047058, "learning_rate": 3.104929202358084e-06, "loss": 0.0749, "step": 93060 }, { "epoch": 2.7524102442775185, "grad_norm": 0.7987546920776367, "learning_rate": 3.1048025124391448e-06, "loss": 0.0488, "step": 93070 }, { "epoch": 2.752705979771692, "grad_norm": 0.5502804517745972, "learning_rate": 3.104675822520205e-06, "loss": 0.0703, "step": 93080 }, { "epoch": 2.7530017152658663, "grad_norm": 0.9917306900024414, "learning_rate": 3.104549132601266e-06, "loss": 0.0826, "step": 93090 }, { "epoch": 2.7532974507600403, "grad_norm": 0.706199586391449, "learning_rate": 3.1044224426823263e-06, "loss": 0.0644, "step": 93100 }, { "epoch": 2.7535931862542142, "grad_norm": 0.7873579263687134, "learning_rate": 3.1042957527633875e-06, "loss": 0.0727, "step": 93110 }, { "epoch": 2.753888921748388, "grad_norm": 0.7318394780158997, "learning_rate": 3.104169062844448e-06, "loss": 0.0665, "step": 93120 }, { "epoch": 2.754184657242562, "grad_norm": 0.8387457728385925, "learning_rate": 3.1040423729255087e-06, "loss": 0.0663, "step": 93130 }, { "epoch": 2.7544803927367365, "grad_norm": 0.8400466442108154, "learning_rate": 3.103915683006569e-06, "loss": 0.0731, "step": 93140 }, { "epoch": 2.75477612823091, "grad_norm": 0.8565192818641663, "learning_rate": 3.10378899308763e-06, "loss": 0.0902, "step": 93150 }, { "epoch": 2.7550718637250844, "grad_norm": 0.4048128128051758, "learning_rate": 3.10366230316869e-06, "loss": 0.0796, "step": 93160 }, { "epoch": 2.7553675992192583, "grad_norm": 0.8525227904319763, "learning_rate": 3.103535613249751e-06, "loss": 0.0705, "step": 93170 }, { "epoch": 2.7556633347134323, "grad_norm": 0.7223042845726013, "learning_rate": 3.1034089233308113e-06, "loss": 0.0575, "step": 93180 }, { "epoch": 2.755959070207606, "grad_norm": 1.2690236568450928, "learning_rate": 3.1032822334118726e-06, "loss": 0.0803, "step": 93190 }, { "epoch": 2.75625480570178, "grad_norm": 0.3996526002883911, "learning_rate": 3.103155543492933e-06, "loss": 0.0702, "step": 93200 }, { "epoch": 2.7565505411959546, "grad_norm": 0.8987526893615723, "learning_rate": 3.1030288535739933e-06, "loss": 0.0731, "step": 93210 }, { "epoch": 2.7568462766901285, "grad_norm": 0.5478434562683105, "learning_rate": 3.102902163655054e-06, "loss": 0.0948, "step": 93220 }, { "epoch": 2.7571420121843024, "grad_norm": 0.740707516670227, "learning_rate": 3.1027754737361144e-06, "loss": 0.0728, "step": 93230 }, { "epoch": 2.7574377476784764, "grad_norm": 1.0373632907867432, "learning_rate": 3.1026487838171752e-06, "loss": 0.0932, "step": 93240 }, { "epoch": 2.7577334831726503, "grad_norm": 0.9519327878952026, "learning_rate": 3.1025220938982356e-06, "loss": 0.0798, "step": 93250 }, { "epoch": 2.7580292186668243, "grad_norm": 0.5236476063728333, "learning_rate": 3.1023954039792964e-06, "loss": 0.0725, "step": 93260 }, { "epoch": 2.758324954160998, "grad_norm": 0.4848661422729492, "learning_rate": 3.102268714060357e-06, "loss": 0.0805, "step": 93270 }, { "epoch": 2.7586206896551726, "grad_norm": 0.6592941284179688, "learning_rate": 3.102142024141418e-06, "loss": 0.0761, "step": 93280 }, { "epoch": 2.7589164251493465, "grad_norm": 0.698131263256073, "learning_rate": 3.1020153342224783e-06, "loss": 0.0922, "step": 93290 }, { "epoch": 2.7592121606435205, "grad_norm": 0.8873666524887085, "learning_rate": 3.101888644303539e-06, "loss": 0.0864, "step": 93300 }, { "epoch": 2.7595078961376944, "grad_norm": 0.5011268258094788, "learning_rate": 3.1017619543845995e-06, "loss": 0.073, "step": 93310 }, { "epoch": 2.7598036316318684, "grad_norm": 0.6636426448822021, "learning_rate": 3.1016352644656603e-06, "loss": 0.0605, "step": 93320 }, { "epoch": 2.7600993671260423, "grad_norm": 1.0296497344970703, "learning_rate": 3.1015085745467206e-06, "loss": 0.0612, "step": 93330 }, { "epoch": 2.7603951026202163, "grad_norm": 0.6238236427307129, "learning_rate": 3.1013818846277814e-06, "loss": 0.0701, "step": 93340 }, { "epoch": 2.7606908381143906, "grad_norm": 0.9720893502235413, "learning_rate": 3.1012551947088422e-06, "loss": 0.0873, "step": 93350 }, { "epoch": 2.7609865736085646, "grad_norm": 0.7740465998649597, "learning_rate": 3.101128504789903e-06, "loss": 0.0796, "step": 93360 }, { "epoch": 2.7612823091027385, "grad_norm": 0.7824012041091919, "learning_rate": 3.1010018148709634e-06, "loss": 0.0544, "step": 93370 }, { "epoch": 2.7615780445969125, "grad_norm": 0.72990882396698, "learning_rate": 3.100875124952024e-06, "loss": 0.0613, "step": 93380 }, { "epoch": 2.7618737800910864, "grad_norm": 0.6053391695022583, "learning_rate": 3.1007484350330845e-06, "loss": 0.0869, "step": 93390 }, { "epoch": 2.762169515585261, "grad_norm": 1.00032377243042, "learning_rate": 3.1006217451141453e-06, "loss": 0.1018, "step": 93400 }, { "epoch": 2.7624652510794343, "grad_norm": 0.8653048276901245, "learning_rate": 3.1004950551952057e-06, "loss": 0.0885, "step": 93410 }, { "epoch": 2.7627609865736087, "grad_norm": 0.5019547939300537, "learning_rate": 3.1003683652762665e-06, "loss": 0.0551, "step": 93420 }, { "epoch": 2.7630567220677826, "grad_norm": 0.7322325706481934, "learning_rate": 3.1002416753573273e-06, "loss": 0.0787, "step": 93430 }, { "epoch": 2.7633524575619566, "grad_norm": 1.4351069927215576, "learning_rate": 3.100114985438388e-06, "loss": 0.0871, "step": 93440 }, { "epoch": 2.7636481930561305, "grad_norm": 0.7526983618736267, "learning_rate": 3.0999882955194484e-06, "loss": 0.0941, "step": 93450 }, { "epoch": 2.7639439285503045, "grad_norm": 0.9213915467262268, "learning_rate": 3.0998616056005092e-06, "loss": 0.0958, "step": 93460 }, { "epoch": 2.764239664044479, "grad_norm": 0.9067345857620239, "learning_rate": 3.0997349156815696e-06, "loss": 0.0674, "step": 93470 }, { "epoch": 2.764535399538653, "grad_norm": 0.8535899519920349, "learning_rate": 3.0996082257626304e-06, "loss": 0.0769, "step": 93480 }, { "epoch": 2.7648311350328267, "grad_norm": 0.680526077747345, "learning_rate": 3.0994815358436907e-06, "loss": 0.0769, "step": 93490 }, { "epoch": 2.7651268705270007, "grad_norm": 0.9264295101165771, "learning_rate": 3.0993548459247515e-06, "loss": 0.0961, "step": 93500 }, { "epoch": 2.7654226060211746, "grad_norm": 0.9714794754981995, "learning_rate": 3.0992281560058123e-06, "loss": 0.0862, "step": 93510 }, { "epoch": 2.7657183415153486, "grad_norm": 0.47940850257873535, "learning_rate": 3.099101466086873e-06, "loss": 0.0723, "step": 93520 }, { "epoch": 2.7660140770095225, "grad_norm": 1.069189429283142, "learning_rate": 3.0989747761679335e-06, "loss": 0.0676, "step": 93530 }, { "epoch": 2.766309812503697, "grad_norm": 0.8107236623764038, "learning_rate": 3.0988480862489943e-06, "loss": 0.0882, "step": 93540 }, { "epoch": 2.766605547997871, "grad_norm": 0.9836380481719971, "learning_rate": 3.0987213963300546e-06, "loss": 0.0752, "step": 93550 }, { "epoch": 2.766901283492045, "grad_norm": 0.5857119560241699, "learning_rate": 3.0985947064111154e-06, "loss": 0.058, "step": 93560 }, { "epoch": 2.7671970189862187, "grad_norm": 0.6815667152404785, "learning_rate": 3.0984680164921758e-06, "loss": 0.0702, "step": 93570 }, { "epoch": 2.7674927544803927, "grad_norm": 0.4538857042789459, "learning_rate": 3.0983413265732366e-06, "loss": 0.0685, "step": 93580 }, { "epoch": 2.7677884899745666, "grad_norm": 1.3463637828826904, "learning_rate": 3.0982146366542974e-06, "loss": 0.0951, "step": 93590 }, { "epoch": 2.7680842254687406, "grad_norm": 0.6823112368583679, "learning_rate": 3.098087946735358e-06, "loss": 0.0801, "step": 93600 }, { "epoch": 2.768379960962915, "grad_norm": 0.7786179780960083, "learning_rate": 3.0979612568164185e-06, "loss": 0.0877, "step": 93610 }, { "epoch": 2.768675696457089, "grad_norm": 0.4443485140800476, "learning_rate": 3.097834566897479e-06, "loss": 0.0639, "step": 93620 }, { "epoch": 2.768971431951263, "grad_norm": 0.7510002851486206, "learning_rate": 3.0977078769785397e-06, "loss": 0.0584, "step": 93630 }, { "epoch": 2.7692671674454368, "grad_norm": 0.768481433391571, "learning_rate": 3.0975811870596e-06, "loss": 0.0865, "step": 93640 }, { "epoch": 2.7695629029396107, "grad_norm": 0.920473575592041, "learning_rate": 3.097454497140661e-06, "loss": 0.08, "step": 93650 }, { "epoch": 2.7698586384337847, "grad_norm": 0.8597397208213806, "learning_rate": 3.097327807221721e-06, "loss": 0.0748, "step": 93660 }, { "epoch": 2.7701543739279586, "grad_norm": 0.6233574748039246, "learning_rate": 3.0972011173027824e-06, "loss": 0.0798, "step": 93670 }, { "epoch": 2.770450109422133, "grad_norm": 0.5233123302459717, "learning_rate": 3.0970744273838428e-06, "loss": 0.0758, "step": 93680 }, { "epoch": 2.770745844916307, "grad_norm": 0.8608322143554688, "learning_rate": 3.0969477374649036e-06, "loss": 0.1047, "step": 93690 }, { "epoch": 2.771041580410481, "grad_norm": 1.1446706056594849, "learning_rate": 3.096821047545964e-06, "loss": 0.0878, "step": 93700 }, { "epoch": 2.771337315904655, "grad_norm": 0.8897706866264343, "learning_rate": 3.0966943576270247e-06, "loss": 0.0755, "step": 93710 }, { "epoch": 2.7716330513988288, "grad_norm": 0.7039892077445984, "learning_rate": 3.096567667708085e-06, "loss": 0.0804, "step": 93720 }, { "epoch": 2.771928786893003, "grad_norm": 0.9907447695732117, "learning_rate": 3.096440977789146e-06, "loss": 0.0912, "step": 93730 }, { "epoch": 2.7722245223871766, "grad_norm": 0.8984050154685974, "learning_rate": 3.0963142878702062e-06, "loss": 0.0785, "step": 93740 }, { "epoch": 2.772520257881351, "grad_norm": 0.7973426580429077, "learning_rate": 3.0961875979512674e-06, "loss": 0.0787, "step": 93750 }, { "epoch": 2.772815993375525, "grad_norm": 0.6454118490219116, "learning_rate": 3.096060908032328e-06, "loss": 0.0846, "step": 93760 }, { "epoch": 2.773111728869699, "grad_norm": 0.8023062944412231, "learning_rate": 3.0959342181133886e-06, "loss": 0.0598, "step": 93770 }, { "epoch": 2.773407464363873, "grad_norm": 1.0216143131256104, "learning_rate": 3.095807528194449e-06, "loss": 0.0819, "step": 93780 }, { "epoch": 2.773703199858047, "grad_norm": 1.3285527229309082, "learning_rate": 3.0956808382755098e-06, "loss": 0.1022, "step": 93790 }, { "epoch": 2.773998935352221, "grad_norm": 0.659134566783905, "learning_rate": 3.09555414835657e-06, "loss": 0.0915, "step": 93800 }, { "epoch": 2.774294670846395, "grad_norm": 0.5224774479866028, "learning_rate": 3.095427458437631e-06, "loss": 0.0669, "step": 93810 }, { "epoch": 2.774590406340569, "grad_norm": 1.307578206062317, "learning_rate": 3.0953007685186913e-06, "loss": 0.0699, "step": 93820 }, { "epoch": 2.774886141834743, "grad_norm": 0.7190690636634827, "learning_rate": 3.0951740785997525e-06, "loss": 0.0539, "step": 93830 }, { "epoch": 2.775181877328917, "grad_norm": 0.7876976132392883, "learning_rate": 3.095047388680813e-06, "loss": 0.0943, "step": 93840 }, { "epoch": 2.775477612823091, "grad_norm": 0.6358250379562378, "learning_rate": 3.0949206987618737e-06, "loss": 0.0675, "step": 93850 }, { "epoch": 2.775773348317265, "grad_norm": 0.6966760158538818, "learning_rate": 3.094794008842934e-06, "loss": 0.0789, "step": 93860 }, { "epoch": 2.7760690838114392, "grad_norm": 0.6786233186721802, "learning_rate": 3.094667318923995e-06, "loss": 0.0707, "step": 93870 }, { "epoch": 2.776364819305613, "grad_norm": 0.44928789138793945, "learning_rate": 3.094540629005055e-06, "loss": 0.08, "step": 93880 }, { "epoch": 2.776660554799787, "grad_norm": 0.6708481907844543, "learning_rate": 3.094413939086116e-06, "loss": 0.09, "step": 93890 }, { "epoch": 2.776956290293961, "grad_norm": 0.6460803747177124, "learning_rate": 3.0942872491671763e-06, "loss": 0.0807, "step": 93900 }, { "epoch": 2.777252025788135, "grad_norm": 0.6447038650512695, "learning_rate": 3.0941605592482375e-06, "loss": 0.0692, "step": 93910 }, { "epoch": 2.777547761282309, "grad_norm": 0.43661534786224365, "learning_rate": 3.094033869329298e-06, "loss": 0.0708, "step": 93920 }, { "epoch": 2.777843496776483, "grad_norm": 1.0370395183563232, "learning_rate": 3.0939071794103587e-06, "loss": 0.0723, "step": 93930 }, { "epoch": 2.7781392322706573, "grad_norm": 0.8447607159614563, "learning_rate": 3.093780489491419e-06, "loss": 0.1071, "step": 93940 }, { "epoch": 2.7784349677648312, "grad_norm": 0.6358205080032349, "learning_rate": 3.09365379957248e-06, "loss": 0.0645, "step": 93950 }, { "epoch": 2.778730703259005, "grad_norm": 0.6088887453079224, "learning_rate": 3.0935271096535402e-06, "loss": 0.0712, "step": 93960 }, { "epoch": 2.779026438753179, "grad_norm": 0.7735272645950317, "learning_rate": 3.093400419734601e-06, "loss": 0.0717, "step": 93970 }, { "epoch": 2.779322174247353, "grad_norm": 1.0655173063278198, "learning_rate": 3.0932737298156614e-06, "loss": 0.0743, "step": 93980 }, { "epoch": 2.7796179097415274, "grad_norm": 1.5404136180877686, "learning_rate": 3.0931470398967226e-06, "loss": 0.0975, "step": 93990 }, { "epoch": 2.779913645235701, "grad_norm": 0.6002833247184753, "learning_rate": 3.093020349977783e-06, "loss": 0.0838, "step": 94000 }, { "epoch": 2.7802093807298753, "grad_norm": 0.4768494963645935, "learning_rate": 3.0928936600588437e-06, "loss": 0.0774, "step": 94010 }, { "epoch": 2.7805051162240493, "grad_norm": 0.8132797479629517, "learning_rate": 3.092766970139904e-06, "loss": 0.0661, "step": 94020 }, { "epoch": 2.780800851718223, "grad_norm": 0.7394833564758301, "learning_rate": 3.0926402802209645e-06, "loss": 0.0583, "step": 94030 }, { "epoch": 2.781096587212397, "grad_norm": 0.8255077004432678, "learning_rate": 3.0925135903020253e-06, "loss": 0.0781, "step": 94040 }, { "epoch": 2.781392322706571, "grad_norm": 0.7022741436958313, "learning_rate": 3.0923869003830856e-06, "loss": 0.0826, "step": 94050 }, { "epoch": 2.7816880582007455, "grad_norm": 0.4890395998954773, "learning_rate": 3.0922602104641464e-06, "loss": 0.0727, "step": 94060 }, { "epoch": 2.781983793694919, "grad_norm": 0.49793118238449097, "learning_rate": 3.092133520545207e-06, "loss": 0.0662, "step": 94070 }, { "epoch": 2.7822795291890934, "grad_norm": 0.9626700282096863, "learning_rate": 3.092006830626268e-06, "loss": 0.0674, "step": 94080 }, { "epoch": 2.7825752646832673, "grad_norm": 0.6951974630355835, "learning_rate": 3.0918801407073284e-06, "loss": 0.0843, "step": 94090 }, { "epoch": 2.7828710001774413, "grad_norm": 0.665037214756012, "learning_rate": 3.091753450788389e-06, "loss": 0.0747, "step": 94100 }, { "epoch": 2.783166735671615, "grad_norm": 0.8293807506561279, "learning_rate": 3.0916267608694495e-06, "loss": 0.0605, "step": 94110 }, { "epoch": 2.783462471165789, "grad_norm": 1.1109892129898071, "learning_rate": 3.0915000709505103e-06, "loss": 0.0572, "step": 94120 }, { "epoch": 2.7837582066599635, "grad_norm": 0.9793128967285156, "learning_rate": 3.0913733810315707e-06, "loss": 0.074, "step": 94130 }, { "epoch": 2.7840539421541375, "grad_norm": 0.9441953897476196, "learning_rate": 3.0912466911126315e-06, "loss": 0.0829, "step": 94140 }, { "epoch": 2.7843496776483114, "grad_norm": 0.4948652982711792, "learning_rate": 3.0911200011936923e-06, "loss": 0.0847, "step": 94150 }, { "epoch": 2.7846454131424854, "grad_norm": 0.9027261137962341, "learning_rate": 3.090993311274753e-06, "loss": 0.0874, "step": 94160 }, { "epoch": 2.7849411486366593, "grad_norm": 0.7131496667861938, "learning_rate": 3.0908666213558134e-06, "loss": 0.0708, "step": 94170 }, { "epoch": 2.7852368841308333, "grad_norm": 0.7033644318580627, "learning_rate": 3.090739931436874e-06, "loss": 0.078, "step": 94180 }, { "epoch": 2.785532619625007, "grad_norm": 0.5062427520751953, "learning_rate": 3.0906132415179346e-06, "loss": 0.0845, "step": 94190 }, { "epoch": 2.7858283551191816, "grad_norm": 1.1874170303344727, "learning_rate": 3.0904865515989954e-06, "loss": 0.0973, "step": 94200 }, { "epoch": 2.7861240906133555, "grad_norm": 0.5156505703926086, "learning_rate": 3.0903598616800557e-06, "loss": 0.0703, "step": 94210 }, { "epoch": 2.7864198261075295, "grad_norm": 0.6196566224098206, "learning_rate": 3.0902331717611165e-06, "loss": 0.1009, "step": 94220 }, { "epoch": 2.7867155616017034, "grad_norm": 0.7334961295127869, "learning_rate": 3.0901064818421773e-06, "loss": 0.0681, "step": 94230 }, { "epoch": 2.7870112970958774, "grad_norm": 0.9268574118614197, "learning_rate": 3.089979791923238e-06, "loss": 0.0782, "step": 94240 }, { "epoch": 2.7873070325900513, "grad_norm": 0.7912241816520691, "learning_rate": 3.0898531020042985e-06, "loss": 0.0887, "step": 94250 }, { "epoch": 2.7876027680842252, "grad_norm": 0.6362401843070984, "learning_rate": 3.0897264120853592e-06, "loss": 0.0759, "step": 94260 }, { "epoch": 2.7878985035783996, "grad_norm": 0.6275933384895325, "learning_rate": 3.0895997221664196e-06, "loss": 0.072, "step": 94270 }, { "epoch": 2.7881942390725736, "grad_norm": 0.6136696934700012, "learning_rate": 3.0894730322474804e-06, "loss": 0.0606, "step": 94280 }, { "epoch": 2.7884899745667475, "grad_norm": 0.7121675610542297, "learning_rate": 3.0893463423285408e-06, "loss": 0.0819, "step": 94290 }, { "epoch": 2.7887857100609215, "grad_norm": 0.7351255416870117, "learning_rate": 3.0892196524096016e-06, "loss": 0.0922, "step": 94300 }, { "epoch": 2.7890814455550954, "grad_norm": 0.5615973472595215, "learning_rate": 3.0890929624906623e-06, "loss": 0.0773, "step": 94310 }, { "epoch": 2.78937718104927, "grad_norm": 0.770401656627655, "learning_rate": 3.088966272571723e-06, "loss": 0.0842, "step": 94320 }, { "epoch": 2.7896729165434433, "grad_norm": 0.8777627944946289, "learning_rate": 3.0888395826527835e-06, "loss": 0.0736, "step": 94330 }, { "epoch": 2.7899686520376177, "grad_norm": 0.603685736656189, "learning_rate": 3.0887128927338443e-06, "loss": 0.0897, "step": 94340 }, { "epoch": 2.7902643875317916, "grad_norm": 0.6375893354415894, "learning_rate": 3.0885862028149047e-06, "loss": 0.075, "step": 94350 }, { "epoch": 2.7905601230259656, "grad_norm": 1.0469298362731934, "learning_rate": 3.0884595128959654e-06, "loss": 0.0804, "step": 94360 }, { "epoch": 2.7908558585201395, "grad_norm": 0.9992874264717102, "learning_rate": 3.088332822977026e-06, "loss": 0.0764, "step": 94370 }, { "epoch": 2.7911515940143135, "grad_norm": 0.5684293508529663, "learning_rate": 3.0882061330580866e-06, "loss": 0.0727, "step": 94380 }, { "epoch": 2.791447329508488, "grad_norm": 0.6644034385681152, "learning_rate": 3.0880794431391474e-06, "loss": 0.0952, "step": 94390 }, { "epoch": 2.791743065002662, "grad_norm": 0.8162825107574463, "learning_rate": 3.087952753220208e-06, "loss": 0.0778, "step": 94400 }, { "epoch": 2.7920388004968357, "grad_norm": 0.8347814083099365, "learning_rate": 3.0878260633012685e-06, "loss": 0.0748, "step": 94410 }, { "epoch": 2.7923345359910097, "grad_norm": 0.9126595258712769, "learning_rate": 3.0876993733823293e-06, "loss": 0.0823, "step": 94420 }, { "epoch": 2.7926302714851836, "grad_norm": 0.6643896102905273, "learning_rate": 3.0875726834633897e-06, "loss": 0.0808, "step": 94430 }, { "epoch": 2.7929260069793576, "grad_norm": 0.6582721471786499, "learning_rate": 3.0874459935444505e-06, "loss": 0.0765, "step": 94440 }, { "epoch": 2.7932217424735315, "grad_norm": 0.7057924866676331, "learning_rate": 3.087319303625511e-06, "loss": 0.0894, "step": 94450 }, { "epoch": 2.793517477967706, "grad_norm": 0.9296672344207764, "learning_rate": 3.0871926137065712e-06, "loss": 0.1018, "step": 94460 }, { "epoch": 2.79381321346188, "grad_norm": 0.8799965977668762, "learning_rate": 3.0870659237876324e-06, "loss": 0.0834, "step": 94470 }, { "epoch": 2.7941089489560538, "grad_norm": 0.7215861678123474, "learning_rate": 3.086939233868693e-06, "loss": 0.0733, "step": 94480 }, { "epoch": 2.7944046844502277, "grad_norm": 0.6307134032249451, "learning_rate": 3.0868125439497536e-06, "loss": 0.0682, "step": 94490 }, { "epoch": 2.7947004199444017, "grad_norm": 0.7484679222106934, "learning_rate": 3.086685854030814e-06, "loss": 0.081, "step": 94500 }, { "epoch": 2.7949961554385756, "grad_norm": 0.8561397790908813, "learning_rate": 3.0865591641118747e-06, "loss": 0.0667, "step": 94510 }, { "epoch": 2.7952918909327495, "grad_norm": 0.7482571005821228, "learning_rate": 3.086432474192935e-06, "loss": 0.0761, "step": 94520 }, { "epoch": 2.795587626426924, "grad_norm": 1.0251363515853882, "learning_rate": 3.086305784273996e-06, "loss": 0.0682, "step": 94530 }, { "epoch": 2.795883361921098, "grad_norm": 1.1407008171081543, "learning_rate": 3.0861790943550563e-06, "loss": 0.0976, "step": 94540 }, { "epoch": 2.796179097415272, "grad_norm": 0.6938512921333313, "learning_rate": 3.0860524044361175e-06, "loss": 0.0778, "step": 94550 }, { "epoch": 2.7964748329094458, "grad_norm": 0.7558192014694214, "learning_rate": 3.085925714517178e-06, "loss": 0.0832, "step": 94560 }, { "epoch": 2.7967705684036197, "grad_norm": 0.7875214219093323, "learning_rate": 3.0857990245982386e-06, "loss": 0.09, "step": 94570 }, { "epoch": 2.7970663038977936, "grad_norm": 1.4359948635101318, "learning_rate": 3.085672334679299e-06, "loss": 0.0833, "step": 94580 }, { "epoch": 2.7973620393919676, "grad_norm": 0.6260539889335632, "learning_rate": 3.08554564476036e-06, "loss": 0.094, "step": 94590 }, { "epoch": 2.797657774886142, "grad_norm": 1.1010979413986206, "learning_rate": 3.08541895484142e-06, "loss": 0.107, "step": 94600 }, { "epoch": 2.797953510380316, "grad_norm": 0.8819848895072937, "learning_rate": 3.085292264922481e-06, "loss": 0.0886, "step": 94610 }, { "epoch": 2.79824924587449, "grad_norm": 0.6383065581321716, "learning_rate": 3.0851655750035413e-06, "loss": 0.0734, "step": 94620 }, { "epoch": 2.798544981368664, "grad_norm": 1.110215425491333, "learning_rate": 3.0850388850846025e-06, "loss": 0.0622, "step": 94630 }, { "epoch": 2.7988407168628378, "grad_norm": 1.2453055381774902, "learning_rate": 3.084912195165663e-06, "loss": 0.1014, "step": 94640 }, { "epoch": 2.799136452357012, "grad_norm": 0.9171404242515564, "learning_rate": 3.0847855052467237e-06, "loss": 0.0749, "step": 94650 }, { "epoch": 2.7994321878511856, "grad_norm": 0.5900371074676514, "learning_rate": 3.084658815327784e-06, "loss": 0.0817, "step": 94660 }, { "epoch": 2.79972792334536, "grad_norm": 0.43957796692848206, "learning_rate": 3.084532125408845e-06, "loss": 0.0638, "step": 94670 }, { "epoch": 2.800023658839534, "grad_norm": 0.7946572303771973, "learning_rate": 3.084405435489905e-06, "loss": 0.0818, "step": 94680 }, { "epoch": 2.800319394333708, "grad_norm": 0.7597737908363342, "learning_rate": 3.084278745570966e-06, "loss": 0.0842, "step": 94690 }, { "epoch": 2.800615129827882, "grad_norm": 1.258109450340271, "learning_rate": 3.0841520556520264e-06, "loss": 0.0686, "step": 94700 }, { "epoch": 2.800910865322056, "grad_norm": 0.7816287279129028, "learning_rate": 3.0840253657330876e-06, "loss": 0.0844, "step": 94710 }, { "epoch": 2.80120660081623, "grad_norm": 0.3933291435241699, "learning_rate": 3.083898675814148e-06, "loss": 0.0607, "step": 94720 }, { "epoch": 2.801502336310404, "grad_norm": 1.195796012878418, "learning_rate": 3.0837719858952087e-06, "loss": 0.0776, "step": 94730 }, { "epoch": 2.801798071804578, "grad_norm": 0.8534906506538391, "learning_rate": 3.083645295976269e-06, "loss": 0.0858, "step": 94740 }, { "epoch": 2.802093807298752, "grad_norm": 0.6618479490280151, "learning_rate": 3.08351860605733e-06, "loss": 0.0865, "step": 94750 }, { "epoch": 2.802389542792926, "grad_norm": 1.0764667987823486, "learning_rate": 3.0833919161383902e-06, "loss": 0.0931, "step": 94760 }, { "epoch": 2.8026852782871, "grad_norm": 0.6428677439689636, "learning_rate": 3.083265226219451e-06, "loss": 0.0664, "step": 94770 }, { "epoch": 2.802981013781274, "grad_norm": 0.9671320915222168, "learning_rate": 3.0831385363005114e-06, "loss": 0.0844, "step": 94780 }, { "epoch": 2.8032767492754482, "grad_norm": 0.6288554072380066, "learning_rate": 3.0830118463815726e-06, "loss": 0.0869, "step": 94790 }, { "epoch": 2.803572484769622, "grad_norm": 0.7510187029838562, "learning_rate": 3.082885156462633e-06, "loss": 0.0919, "step": 94800 }, { "epoch": 2.803868220263796, "grad_norm": 0.6457772850990295, "learning_rate": 3.0827584665436938e-06, "loss": 0.0728, "step": 94810 }, { "epoch": 2.80416395575797, "grad_norm": 0.3847062587738037, "learning_rate": 3.082631776624754e-06, "loss": 0.0639, "step": 94820 }, { "epoch": 2.804459691252144, "grad_norm": 0.7081385850906372, "learning_rate": 3.082505086705815e-06, "loss": 0.0705, "step": 94830 }, { "epoch": 2.804755426746318, "grad_norm": 0.6978366374969482, "learning_rate": 3.0823783967868753e-06, "loss": 0.0853, "step": 94840 }, { "epoch": 2.805051162240492, "grad_norm": 0.6579407453536987, "learning_rate": 3.082251706867936e-06, "loss": 0.0687, "step": 94850 }, { "epoch": 2.8053468977346663, "grad_norm": 1.097796082496643, "learning_rate": 3.0821250169489964e-06, "loss": 0.0703, "step": 94860 }, { "epoch": 2.80564263322884, "grad_norm": 0.5818014740943909, "learning_rate": 3.0819983270300572e-06, "loss": 0.0646, "step": 94870 }, { "epoch": 2.805938368723014, "grad_norm": 1.0440011024475098, "learning_rate": 3.081871637111118e-06, "loss": 0.0747, "step": 94880 }, { "epoch": 2.806234104217188, "grad_norm": 1.174397587776184, "learning_rate": 3.0817449471921784e-06, "loss": 0.0977, "step": 94890 }, { "epoch": 2.806529839711362, "grad_norm": 0.7232552170753479, "learning_rate": 3.081618257273239e-06, "loss": 0.0761, "step": 94900 }, { "epoch": 2.8068255752055364, "grad_norm": 0.9761987924575806, "learning_rate": 3.0814915673542995e-06, "loss": 0.069, "step": 94910 }, { "epoch": 2.80712131069971, "grad_norm": 0.7037648558616638, "learning_rate": 3.0813648774353603e-06, "loss": 0.0777, "step": 94920 }, { "epoch": 2.8074170461938843, "grad_norm": 0.7762095928192139, "learning_rate": 3.0812381875164207e-06, "loss": 0.0782, "step": 94930 }, { "epoch": 2.8077127816880583, "grad_norm": 0.8164080381393433, "learning_rate": 3.0811114975974815e-06, "loss": 0.0722, "step": 94940 }, { "epoch": 2.808008517182232, "grad_norm": 0.6346108317375183, "learning_rate": 3.0809848076785423e-06, "loss": 0.0731, "step": 94950 }, { "epoch": 2.808304252676406, "grad_norm": 0.6977017521858215, "learning_rate": 3.080858117759603e-06, "loss": 0.073, "step": 94960 }, { "epoch": 2.80859998817058, "grad_norm": 0.5382416248321533, "learning_rate": 3.0807314278406634e-06, "loss": 0.0595, "step": 94970 }, { "epoch": 2.8088957236647545, "grad_norm": 1.0528075695037842, "learning_rate": 3.0806047379217242e-06, "loss": 0.0831, "step": 94980 }, { "epoch": 2.809191459158928, "grad_norm": 0.5970491766929626, "learning_rate": 3.0804780480027846e-06, "loss": 0.0792, "step": 94990 }, { "epoch": 2.8094871946531024, "grad_norm": 0.7681974172592163, "learning_rate": 3.0803513580838454e-06, "loss": 0.0781, "step": 95000 }, { "epoch": 2.8097829301472763, "grad_norm": 1.08797025680542, "learning_rate": 3.0802246681649057e-06, "loss": 0.0858, "step": 95010 }, { "epoch": 2.8100786656414503, "grad_norm": 0.9118304252624512, "learning_rate": 3.0800979782459665e-06, "loss": 0.0748, "step": 95020 }, { "epoch": 2.810374401135624, "grad_norm": 1.0090279579162598, "learning_rate": 3.0799712883270273e-06, "loss": 0.0694, "step": 95030 }, { "epoch": 2.810670136629798, "grad_norm": 0.6200495362281799, "learning_rate": 3.079844598408088e-06, "loss": 0.0811, "step": 95040 }, { "epoch": 2.8109658721239725, "grad_norm": 0.9654179215431213, "learning_rate": 3.0797179084891485e-06, "loss": 0.0699, "step": 95050 }, { "epoch": 2.8112616076181465, "grad_norm": 0.8256102800369263, "learning_rate": 3.0795912185702093e-06, "loss": 0.0814, "step": 95060 }, { "epoch": 2.8115573431123204, "grad_norm": 0.4485251009464264, "learning_rate": 3.0794645286512696e-06, "loss": 0.0709, "step": 95070 }, { "epoch": 2.8118530786064944, "grad_norm": 1.1544386148452759, "learning_rate": 3.0793378387323304e-06, "loss": 0.0562, "step": 95080 }, { "epoch": 2.8121488141006683, "grad_norm": 0.8043834567070007, "learning_rate": 3.079211148813391e-06, "loss": 0.0818, "step": 95090 }, { "epoch": 2.8124445495948422, "grad_norm": 0.6961836814880371, "learning_rate": 3.0790844588944516e-06, "loss": 0.0763, "step": 95100 }, { "epoch": 2.812740285089016, "grad_norm": 0.7264445424079895, "learning_rate": 3.0789577689755124e-06, "loss": 0.0944, "step": 95110 }, { "epoch": 2.8130360205831906, "grad_norm": 1.035927653312683, "learning_rate": 3.078831079056573e-06, "loss": 0.0702, "step": 95120 }, { "epoch": 2.8133317560773645, "grad_norm": 0.7601067423820496, "learning_rate": 3.0787043891376335e-06, "loss": 0.0907, "step": 95130 }, { "epoch": 2.8136274915715385, "grad_norm": 0.8545243144035339, "learning_rate": 3.0785776992186943e-06, "loss": 0.0957, "step": 95140 }, { "epoch": 2.8139232270657124, "grad_norm": 0.7772248983383179, "learning_rate": 3.0784510092997547e-06, "loss": 0.0953, "step": 95150 }, { "epoch": 2.8142189625598864, "grad_norm": 0.7457374930381775, "learning_rate": 3.0783243193808155e-06, "loss": 0.0784, "step": 95160 }, { "epoch": 2.8145146980540603, "grad_norm": 0.5957939028739929, "learning_rate": 3.078197629461876e-06, "loss": 0.0741, "step": 95170 }, { "epoch": 2.8148104335482342, "grad_norm": 0.7199602723121643, "learning_rate": 3.0780709395429366e-06, "loss": 0.073, "step": 95180 }, { "epoch": 2.8151061690424086, "grad_norm": 0.9733665585517883, "learning_rate": 3.0779442496239974e-06, "loss": 0.0808, "step": 95190 }, { "epoch": 2.8154019045365826, "grad_norm": 0.602641761302948, "learning_rate": 3.077817559705058e-06, "loss": 0.071, "step": 95200 }, { "epoch": 2.8156976400307565, "grad_norm": 0.6164265871047974, "learning_rate": 3.0776908697861186e-06, "loss": 0.0879, "step": 95210 }, { "epoch": 2.8159933755249305, "grad_norm": 0.8743146061897278, "learning_rate": 3.0775641798671794e-06, "loss": 0.076, "step": 95220 }, { "epoch": 2.8162891110191044, "grad_norm": 1.142789602279663, "learning_rate": 3.0774374899482397e-06, "loss": 0.082, "step": 95230 }, { "epoch": 2.816584846513279, "grad_norm": 0.8236047029495239, "learning_rate": 3.0773108000293005e-06, "loss": 0.0848, "step": 95240 }, { "epoch": 2.8168805820074523, "grad_norm": 1.047466516494751, "learning_rate": 3.077184110110361e-06, "loss": 0.089, "step": 95250 }, { "epoch": 2.8171763175016267, "grad_norm": 0.6574945449829102, "learning_rate": 3.0770574201914217e-06, "loss": 0.0829, "step": 95260 }, { "epoch": 2.8174720529958006, "grad_norm": 1.0601716041564941, "learning_rate": 3.0769307302724825e-06, "loss": 0.0714, "step": 95270 }, { "epoch": 2.8177677884899746, "grad_norm": 0.82817143201828, "learning_rate": 3.076804040353543e-06, "loss": 0.0748, "step": 95280 }, { "epoch": 2.8180635239841485, "grad_norm": 0.9200836420059204, "learning_rate": 3.0766773504346036e-06, "loss": 0.1076, "step": 95290 }, { "epoch": 2.8183592594783224, "grad_norm": 0.8378177285194397, "learning_rate": 3.076550660515664e-06, "loss": 0.0802, "step": 95300 }, { "epoch": 2.818654994972497, "grad_norm": 1.087156057357788, "learning_rate": 3.0764239705967248e-06, "loss": 0.0976, "step": 95310 }, { "epoch": 2.8189507304666708, "grad_norm": 0.4435255229473114, "learning_rate": 3.076297280677785e-06, "loss": 0.0814, "step": 95320 }, { "epoch": 2.8192464659608447, "grad_norm": 1.2096757888793945, "learning_rate": 3.076170590758846e-06, "loss": 0.072, "step": 95330 }, { "epoch": 2.8195422014550187, "grad_norm": 0.7653753161430359, "learning_rate": 3.0760439008399063e-06, "loss": 0.0752, "step": 95340 }, { "epoch": 2.8198379369491926, "grad_norm": 0.5995638966560364, "learning_rate": 3.0759172109209675e-06, "loss": 0.0859, "step": 95350 }, { "epoch": 2.8201336724433665, "grad_norm": 0.5811287760734558, "learning_rate": 3.075790521002028e-06, "loss": 0.0538, "step": 95360 }, { "epoch": 2.8204294079375405, "grad_norm": 0.5743506550788879, "learning_rate": 3.0756638310830887e-06, "loss": 0.0817, "step": 95370 }, { "epoch": 2.820725143431715, "grad_norm": 0.7328834533691406, "learning_rate": 3.075537141164149e-06, "loss": 0.0699, "step": 95380 }, { "epoch": 2.821020878925889, "grad_norm": 0.9299649596214294, "learning_rate": 3.07541045124521e-06, "loss": 0.0862, "step": 95390 }, { "epoch": 2.8213166144200628, "grad_norm": 0.746666431427002, "learning_rate": 3.07528376132627e-06, "loss": 0.0823, "step": 95400 }, { "epoch": 2.8216123499142367, "grad_norm": 0.6068497896194458, "learning_rate": 3.075157071407331e-06, "loss": 0.0993, "step": 95410 }, { "epoch": 2.8219080854084106, "grad_norm": 0.545737087726593, "learning_rate": 3.0750303814883913e-06, "loss": 0.0631, "step": 95420 }, { "epoch": 2.8222038209025846, "grad_norm": 0.6148062348365784, "learning_rate": 3.0749036915694526e-06, "loss": 0.0659, "step": 95430 }, { "epoch": 2.8224995563967585, "grad_norm": 0.5525818467140198, "learning_rate": 3.074777001650513e-06, "loss": 0.0881, "step": 95440 }, { "epoch": 2.822795291890933, "grad_norm": 0.9420620799064636, "learning_rate": 3.0746503117315737e-06, "loss": 0.0766, "step": 95450 }, { "epoch": 2.823091027385107, "grad_norm": 0.7742213010787964, "learning_rate": 3.074523621812634e-06, "loss": 0.0826, "step": 95460 }, { "epoch": 2.823386762879281, "grad_norm": 0.784844696521759, "learning_rate": 3.074396931893695e-06, "loss": 0.077, "step": 95470 }, { "epoch": 2.8236824983734548, "grad_norm": 0.5705137252807617, "learning_rate": 3.0742702419747552e-06, "loss": 0.0767, "step": 95480 }, { "epoch": 2.8239782338676287, "grad_norm": 1.0270695686340332, "learning_rate": 3.074143552055816e-06, "loss": 0.0876, "step": 95490 }, { "epoch": 2.8242739693618026, "grad_norm": 0.6417896747589111, "learning_rate": 3.0740168621368764e-06, "loss": 0.1025, "step": 95500 }, { "epoch": 2.8245697048559766, "grad_norm": 0.7737558484077454, "learning_rate": 3.0738901722179376e-06, "loss": 0.0839, "step": 95510 }, { "epoch": 2.824865440350151, "grad_norm": 0.4174153506755829, "learning_rate": 3.073763482298998e-06, "loss": 0.0628, "step": 95520 }, { "epoch": 2.825161175844325, "grad_norm": 0.6377220749855042, "learning_rate": 3.0736367923800588e-06, "loss": 0.0785, "step": 95530 }, { "epoch": 2.825456911338499, "grad_norm": 0.927513599395752, "learning_rate": 3.073510102461119e-06, "loss": 0.0769, "step": 95540 }, { "epoch": 2.825752646832673, "grad_norm": 0.5920975208282471, "learning_rate": 3.07338341254218e-06, "loss": 0.0842, "step": 95550 }, { "epoch": 2.8260483823268467, "grad_norm": 0.7921103835105896, "learning_rate": 3.0732567226232403e-06, "loss": 0.0748, "step": 95560 }, { "epoch": 2.826344117821021, "grad_norm": 0.40831458568573, "learning_rate": 3.073130032704301e-06, "loss": 0.0792, "step": 95570 }, { "epoch": 2.8266398533151946, "grad_norm": 0.9692396521568298, "learning_rate": 3.0730033427853614e-06, "loss": 0.0727, "step": 95580 }, { "epoch": 2.826935588809369, "grad_norm": 0.931900143623352, "learning_rate": 3.0728766528664226e-06, "loss": 0.0879, "step": 95590 }, { "epoch": 2.827231324303543, "grad_norm": 0.8251670598983765, "learning_rate": 3.072749962947483e-06, "loss": 0.0669, "step": 95600 }, { "epoch": 2.827527059797717, "grad_norm": 0.799164354801178, "learning_rate": 3.072623273028544e-06, "loss": 0.0875, "step": 95610 }, { "epoch": 2.827822795291891, "grad_norm": 0.5725483894348145, "learning_rate": 3.072496583109604e-06, "loss": 0.0591, "step": 95620 }, { "epoch": 2.828118530786065, "grad_norm": 0.9719407558441162, "learning_rate": 3.072369893190665e-06, "loss": 0.0772, "step": 95630 }, { "epoch": 2.828414266280239, "grad_norm": 0.8375073671340942, "learning_rate": 3.0722432032717253e-06, "loss": 0.0982, "step": 95640 }, { "epoch": 2.828710001774413, "grad_norm": 1.0156642198562622, "learning_rate": 3.072116513352786e-06, "loss": 0.0863, "step": 95650 }, { "epoch": 2.829005737268587, "grad_norm": 0.7666656970977783, "learning_rate": 3.0719898234338465e-06, "loss": 0.0853, "step": 95660 }, { "epoch": 2.829301472762761, "grad_norm": 0.4347330331802368, "learning_rate": 3.0718631335149077e-06, "loss": 0.0807, "step": 95670 }, { "epoch": 2.829597208256935, "grad_norm": 0.5683918595314026, "learning_rate": 3.071736443595968e-06, "loss": 0.0738, "step": 95680 }, { "epoch": 2.829892943751109, "grad_norm": 0.7442597150802612, "learning_rate": 3.0716097536770284e-06, "loss": 0.0868, "step": 95690 }, { "epoch": 2.830188679245283, "grad_norm": 0.6316391229629517, "learning_rate": 3.0714830637580892e-06, "loss": 0.0769, "step": 95700 }, { "epoch": 2.830484414739457, "grad_norm": 0.7534443140029907, "learning_rate": 3.0713563738391496e-06, "loss": 0.066, "step": 95710 }, { "epoch": 2.830780150233631, "grad_norm": 1.2523764371871948, "learning_rate": 3.0712296839202104e-06, "loss": 0.0691, "step": 95720 }, { "epoch": 2.831075885727805, "grad_norm": 0.9840617775917053, "learning_rate": 3.0711029940012707e-06, "loss": 0.0842, "step": 95730 }, { "epoch": 2.831371621221979, "grad_norm": 1.026854157447815, "learning_rate": 3.0709763040823315e-06, "loss": 0.0997, "step": 95740 }, { "epoch": 2.831667356716153, "grad_norm": 1.0292752981185913, "learning_rate": 3.0708496141633923e-06, "loss": 0.0714, "step": 95750 }, { "epoch": 2.831963092210327, "grad_norm": 0.9297065138816833, "learning_rate": 3.070722924244453e-06, "loss": 0.0726, "step": 95760 }, { "epoch": 2.832258827704501, "grad_norm": 1.140605092048645, "learning_rate": 3.0705962343255135e-06, "loss": 0.0976, "step": 95770 }, { "epoch": 2.8325545631986753, "grad_norm": 0.6231035590171814, "learning_rate": 3.0704695444065743e-06, "loss": 0.0634, "step": 95780 }, { "epoch": 2.832850298692849, "grad_norm": 1.1239101886749268, "learning_rate": 3.0703428544876346e-06, "loss": 0.0712, "step": 95790 }, { "epoch": 2.833146034187023, "grad_norm": 0.6917856931686401, "learning_rate": 3.0702161645686954e-06, "loss": 0.0747, "step": 95800 }, { "epoch": 2.833441769681197, "grad_norm": 0.7932007312774658, "learning_rate": 3.0700894746497558e-06, "loss": 0.0764, "step": 95810 }, { "epoch": 2.833737505175371, "grad_norm": 0.7015883326530457, "learning_rate": 3.0699627847308166e-06, "loss": 0.0638, "step": 95820 }, { "epoch": 2.8340332406695454, "grad_norm": 0.7655390501022339, "learning_rate": 3.0698360948118774e-06, "loss": 0.0618, "step": 95830 }, { "epoch": 2.834328976163719, "grad_norm": 0.7309301495552063, "learning_rate": 3.069709404892938e-06, "loss": 0.0916, "step": 95840 }, { "epoch": 2.8346247116578933, "grad_norm": 0.5671662092208862, "learning_rate": 3.0695827149739985e-06, "loss": 0.0898, "step": 95850 }, { "epoch": 2.8349204471520673, "grad_norm": 0.671058177947998, "learning_rate": 3.0694560250550593e-06, "loss": 0.0701, "step": 95860 }, { "epoch": 2.835216182646241, "grad_norm": 1.120354175567627, "learning_rate": 3.0693293351361197e-06, "loss": 0.0916, "step": 95870 }, { "epoch": 2.835511918140415, "grad_norm": 0.7994858622550964, "learning_rate": 3.0692026452171805e-06, "loss": 0.0775, "step": 95880 }, { "epoch": 2.835807653634589, "grad_norm": 0.6315785646438599, "learning_rate": 3.069075955298241e-06, "loss": 0.0857, "step": 95890 }, { "epoch": 2.8361033891287635, "grad_norm": 0.7953314185142517, "learning_rate": 3.0689492653793016e-06, "loss": 0.085, "step": 95900 }, { "epoch": 2.836399124622937, "grad_norm": 0.7385249733924866, "learning_rate": 3.0688225754603624e-06, "loss": 0.0788, "step": 95910 }, { "epoch": 2.8366948601171114, "grad_norm": 0.7704886794090271, "learning_rate": 3.068695885541423e-06, "loss": 0.0693, "step": 95920 }, { "epoch": 2.8369905956112853, "grad_norm": 0.6604409217834473, "learning_rate": 3.0685691956224836e-06, "loss": 0.0589, "step": 95930 }, { "epoch": 2.8372863311054592, "grad_norm": 0.8508373498916626, "learning_rate": 3.0684425057035443e-06, "loss": 0.114, "step": 95940 }, { "epoch": 2.837582066599633, "grad_norm": 0.7062802314758301, "learning_rate": 3.0683158157846047e-06, "loss": 0.0905, "step": 95950 }, { "epoch": 2.837877802093807, "grad_norm": 0.6864964365959167, "learning_rate": 3.0681891258656655e-06, "loss": 0.0806, "step": 95960 }, { "epoch": 2.8381735375879815, "grad_norm": 0.8081148862838745, "learning_rate": 3.068062435946726e-06, "loss": 0.0701, "step": 95970 }, { "epoch": 2.8384692730821555, "grad_norm": 0.9318079352378845, "learning_rate": 3.0679357460277867e-06, "loss": 0.0713, "step": 95980 }, { "epoch": 2.8387650085763294, "grad_norm": 0.7283614277839661, "learning_rate": 3.0678090561088474e-06, "loss": 0.0898, "step": 95990 }, { "epoch": 2.8390607440705034, "grad_norm": 0.5432910919189453, "learning_rate": 3.0676823661899082e-06, "loss": 0.0703, "step": 96000 }, { "epoch": 2.8393564795646773, "grad_norm": 0.7800233960151672, "learning_rate": 3.0675556762709686e-06, "loss": 0.0594, "step": 96010 }, { "epoch": 2.8396522150588512, "grad_norm": 0.7360520958900452, "learning_rate": 3.0674289863520294e-06, "loss": 0.0764, "step": 96020 }, { "epoch": 2.839947950553025, "grad_norm": 0.8838130831718445, "learning_rate": 3.0673022964330898e-06, "loss": 0.065, "step": 96030 }, { "epoch": 2.8402436860471996, "grad_norm": 0.3329341411590576, "learning_rate": 3.0671756065141505e-06, "loss": 0.078, "step": 96040 }, { "epoch": 2.8405394215413735, "grad_norm": 0.9723253846168518, "learning_rate": 3.067048916595211e-06, "loss": 0.09, "step": 96050 }, { "epoch": 2.8408351570355475, "grad_norm": 0.9076205492019653, "learning_rate": 3.0669222266762717e-06, "loss": 0.0763, "step": 96060 }, { "epoch": 2.8411308925297214, "grad_norm": 1.1409021615982056, "learning_rate": 3.0667955367573325e-06, "loss": 0.0779, "step": 96070 }, { "epoch": 2.8414266280238953, "grad_norm": 1.1579029560089111, "learning_rate": 3.0666688468383933e-06, "loss": 0.0732, "step": 96080 }, { "epoch": 2.8417223635180693, "grad_norm": 0.5851319432258606, "learning_rate": 3.0665421569194537e-06, "loss": 0.0875, "step": 96090 }, { "epoch": 2.8420180990122432, "grad_norm": 0.7427212595939636, "learning_rate": 3.066415467000514e-06, "loss": 0.0801, "step": 96100 }, { "epoch": 2.8423138345064176, "grad_norm": 0.7657721042633057, "learning_rate": 3.066288777081575e-06, "loss": 0.0808, "step": 96110 }, { "epoch": 2.8426095700005916, "grad_norm": 0.8938665390014648, "learning_rate": 3.066162087162635e-06, "loss": 0.0791, "step": 96120 }, { "epoch": 2.8429053054947655, "grad_norm": 0.7742533087730408, "learning_rate": 3.066035397243696e-06, "loss": 0.0772, "step": 96130 }, { "epoch": 2.8432010409889394, "grad_norm": 0.703113853931427, "learning_rate": 3.0659087073247563e-06, "loss": 0.0978, "step": 96140 }, { "epoch": 2.8434967764831134, "grad_norm": 0.8890879154205322, "learning_rate": 3.0657820174058175e-06, "loss": 0.0738, "step": 96150 }, { "epoch": 2.8437925119772878, "grad_norm": 0.8191998600959778, "learning_rate": 3.065655327486878e-06, "loss": 0.0798, "step": 96160 }, { "epoch": 2.8440882474714613, "grad_norm": 0.5532292127609253, "learning_rate": 3.0655286375679387e-06, "loss": 0.0737, "step": 96170 }, { "epoch": 2.8443839829656357, "grad_norm": 1.2901811599731445, "learning_rate": 3.065401947648999e-06, "loss": 0.0662, "step": 96180 }, { "epoch": 2.8446797184598096, "grad_norm": 1.214732050895691, "learning_rate": 3.06527525773006e-06, "loss": 0.0796, "step": 96190 }, { "epoch": 2.8449754539539835, "grad_norm": 0.662597119808197, "learning_rate": 3.0651485678111202e-06, "loss": 0.0816, "step": 96200 }, { "epoch": 2.8452711894481575, "grad_norm": 0.6253504753112793, "learning_rate": 3.065021877892181e-06, "loss": 0.068, "step": 96210 }, { "epoch": 2.8455669249423314, "grad_norm": 0.7394058108329773, "learning_rate": 3.0648951879732414e-06, "loss": 0.0608, "step": 96220 }, { "epoch": 2.845862660436506, "grad_norm": 1.034192681312561, "learning_rate": 3.0647684980543026e-06, "loss": 0.0791, "step": 96230 }, { "epoch": 2.8461583959306798, "grad_norm": 0.6421331167221069, "learning_rate": 3.064641808135363e-06, "loss": 0.0756, "step": 96240 }, { "epoch": 2.8464541314248537, "grad_norm": 0.8088926076889038, "learning_rate": 3.0645151182164237e-06, "loss": 0.0851, "step": 96250 }, { "epoch": 2.8467498669190276, "grad_norm": 0.901692807674408, "learning_rate": 3.064388428297484e-06, "loss": 0.0762, "step": 96260 }, { "epoch": 2.8470456024132016, "grad_norm": 0.572730541229248, "learning_rate": 3.064261738378545e-06, "loss": 0.0691, "step": 96270 }, { "epoch": 2.8473413379073755, "grad_norm": 0.4929046034812927, "learning_rate": 3.0641350484596053e-06, "loss": 0.0917, "step": 96280 }, { "epoch": 2.8476370734015495, "grad_norm": 1.0333070755004883, "learning_rate": 3.064008358540666e-06, "loss": 0.087, "step": 96290 }, { "epoch": 2.847932808895724, "grad_norm": 0.8678668141365051, "learning_rate": 3.0638816686217264e-06, "loss": 0.0874, "step": 96300 }, { "epoch": 2.848228544389898, "grad_norm": 1.1638911962509155, "learning_rate": 3.0637549787027876e-06, "loss": 0.0758, "step": 96310 }, { "epoch": 2.8485242798840718, "grad_norm": 0.9200000762939453, "learning_rate": 3.063628288783848e-06, "loss": 0.0817, "step": 96320 }, { "epoch": 2.8488200153782457, "grad_norm": 0.5784204006195068, "learning_rate": 3.0635015988649088e-06, "loss": 0.0783, "step": 96330 }, { "epoch": 2.8491157508724196, "grad_norm": 0.8438665270805359, "learning_rate": 3.063374908945969e-06, "loss": 0.0813, "step": 96340 }, { "epoch": 2.8494114863665936, "grad_norm": 0.6618569493293762, "learning_rate": 3.06324821902703e-06, "loss": 0.0902, "step": 96350 }, { "epoch": 2.8497072218607675, "grad_norm": 0.6925318241119385, "learning_rate": 3.0631215291080903e-06, "loss": 0.0685, "step": 96360 }, { "epoch": 2.850002957354942, "grad_norm": 0.538933515548706, "learning_rate": 3.062994839189151e-06, "loss": 0.07, "step": 96370 }, { "epoch": 2.850298692849116, "grad_norm": 0.7162027955055237, "learning_rate": 3.0628681492702115e-06, "loss": 0.0672, "step": 96380 }, { "epoch": 2.85059442834329, "grad_norm": 0.5749326944351196, "learning_rate": 3.0627414593512727e-06, "loss": 0.0834, "step": 96390 }, { "epoch": 2.8508901638374637, "grad_norm": 0.7693763971328735, "learning_rate": 3.062614769432333e-06, "loss": 0.0875, "step": 96400 }, { "epoch": 2.8511858993316377, "grad_norm": 0.6630040407180786, "learning_rate": 3.062488079513394e-06, "loss": 0.0704, "step": 96410 }, { "epoch": 2.8514816348258116, "grad_norm": 0.5662199854850769, "learning_rate": 3.062361389594454e-06, "loss": 0.0747, "step": 96420 }, { "epoch": 2.8517773703199856, "grad_norm": 0.8745583891868591, "learning_rate": 3.062234699675515e-06, "loss": 0.0579, "step": 96430 }, { "epoch": 2.85207310581416, "grad_norm": 0.8354657888412476, "learning_rate": 3.0621080097565754e-06, "loss": 0.0803, "step": 96440 }, { "epoch": 2.852368841308334, "grad_norm": 0.7311144471168518, "learning_rate": 3.061981319837636e-06, "loss": 0.0983, "step": 96450 }, { "epoch": 2.852664576802508, "grad_norm": 0.5199628472328186, "learning_rate": 3.0618546299186965e-06, "loss": 0.0748, "step": 96460 }, { "epoch": 2.852960312296682, "grad_norm": 0.8445791006088257, "learning_rate": 3.0617279399997577e-06, "loss": 0.0693, "step": 96470 }, { "epoch": 2.8532560477908557, "grad_norm": 0.8053773045539856, "learning_rate": 3.061601250080818e-06, "loss": 0.0718, "step": 96480 }, { "epoch": 2.85355178328503, "grad_norm": 0.9205760955810547, "learning_rate": 3.061474560161879e-06, "loss": 0.0776, "step": 96490 }, { "epoch": 2.8538475187792036, "grad_norm": 0.6243491768836975, "learning_rate": 3.0613478702429392e-06, "loss": 0.0947, "step": 96500 }, { "epoch": 2.854143254273378, "grad_norm": 0.6653279066085815, "learning_rate": 3.0612211803239996e-06, "loss": 0.0898, "step": 96510 }, { "epoch": 2.854438989767552, "grad_norm": 0.5661172866821289, "learning_rate": 3.0610944904050604e-06, "loss": 0.0818, "step": 96520 }, { "epoch": 2.854734725261726, "grad_norm": 1.3442411422729492, "learning_rate": 3.0609678004861208e-06, "loss": 0.0753, "step": 96530 }, { "epoch": 2.8550304607559, "grad_norm": 0.5779563188552856, "learning_rate": 3.0608411105671816e-06, "loss": 0.0928, "step": 96540 }, { "epoch": 2.855326196250074, "grad_norm": 0.5867509245872498, "learning_rate": 3.0607144206482423e-06, "loss": 0.0736, "step": 96550 }, { "epoch": 2.855621931744248, "grad_norm": 0.6126600503921509, "learning_rate": 3.060587730729303e-06, "loss": 0.0669, "step": 96560 }, { "epoch": 2.855917667238422, "grad_norm": 0.7354921698570251, "learning_rate": 3.0604610408103635e-06, "loss": 0.067, "step": 96570 }, { "epoch": 2.856213402732596, "grad_norm": 0.6730086803436279, "learning_rate": 3.0603343508914243e-06, "loss": 0.0824, "step": 96580 }, { "epoch": 2.85650913822677, "grad_norm": 1.057782530784607, "learning_rate": 3.0602076609724847e-06, "loss": 0.0911, "step": 96590 }, { "epoch": 2.856804873720944, "grad_norm": 0.8106725215911865, "learning_rate": 3.0600809710535454e-06, "loss": 0.0738, "step": 96600 }, { "epoch": 2.857100609215118, "grad_norm": 0.5261502265930176, "learning_rate": 3.059954281134606e-06, "loss": 0.0716, "step": 96610 }, { "epoch": 2.857396344709292, "grad_norm": 0.6067133545875549, "learning_rate": 3.0598275912156666e-06, "loss": 0.0689, "step": 96620 }, { "epoch": 2.857692080203466, "grad_norm": 0.8337298035621643, "learning_rate": 3.0597009012967274e-06, "loss": 0.0701, "step": 96630 }, { "epoch": 2.85798781569764, "grad_norm": 0.8642124533653259, "learning_rate": 3.059574211377788e-06, "loss": 0.0834, "step": 96640 }, { "epoch": 2.858283551191814, "grad_norm": 0.7585806846618652, "learning_rate": 3.0594475214588485e-06, "loss": 0.0805, "step": 96650 }, { "epoch": 2.858579286685988, "grad_norm": 0.5115651488304138, "learning_rate": 3.0593208315399093e-06, "loss": 0.0713, "step": 96660 }, { "epoch": 2.858875022180162, "grad_norm": 0.5046822428703308, "learning_rate": 3.0591941416209697e-06, "loss": 0.0605, "step": 96670 }, { "epoch": 2.859170757674336, "grad_norm": 1.0667575597763062, "learning_rate": 3.0590674517020305e-06, "loss": 0.0627, "step": 96680 }, { "epoch": 2.85946649316851, "grad_norm": 0.8498280048370361, "learning_rate": 3.058940761783091e-06, "loss": 0.077, "step": 96690 }, { "epoch": 2.8597622286626843, "grad_norm": 0.8511611223220825, "learning_rate": 3.0588140718641516e-06, "loss": 0.079, "step": 96700 }, { "epoch": 2.860057964156858, "grad_norm": 0.6089348196983337, "learning_rate": 3.0586873819452124e-06, "loss": 0.0818, "step": 96710 }, { "epoch": 2.860353699651032, "grad_norm": 1.121002197265625, "learning_rate": 3.0585606920262732e-06, "loss": 0.0837, "step": 96720 }, { "epoch": 2.860649435145206, "grad_norm": 0.9790201783180237, "learning_rate": 3.0584340021073336e-06, "loss": 0.0683, "step": 96730 }, { "epoch": 2.86094517063938, "grad_norm": 0.8206919431686401, "learning_rate": 3.0583073121883944e-06, "loss": 0.0772, "step": 96740 }, { "epoch": 2.8612409061335544, "grad_norm": 0.9177082180976868, "learning_rate": 3.0581806222694547e-06, "loss": 0.0879, "step": 96750 }, { "epoch": 2.861536641627728, "grad_norm": 0.9483222961425781, "learning_rate": 3.0580539323505155e-06, "loss": 0.0748, "step": 96760 }, { "epoch": 2.8618323771219023, "grad_norm": 1.0240710973739624, "learning_rate": 3.057927242431576e-06, "loss": 0.0783, "step": 96770 }, { "epoch": 2.8621281126160762, "grad_norm": 0.8136853575706482, "learning_rate": 3.0578005525126367e-06, "loss": 0.0811, "step": 96780 }, { "epoch": 2.86242384811025, "grad_norm": 0.6258880496025085, "learning_rate": 3.0576738625936975e-06, "loss": 0.0814, "step": 96790 }, { "epoch": 2.862719583604424, "grad_norm": 0.8654016852378845, "learning_rate": 3.0575471726747583e-06, "loss": 0.0966, "step": 96800 }, { "epoch": 2.863015319098598, "grad_norm": 0.5502139925956726, "learning_rate": 3.0574204827558186e-06, "loss": 0.0825, "step": 96810 }, { "epoch": 2.8633110545927725, "grad_norm": 0.5224237442016602, "learning_rate": 3.0572937928368794e-06, "loss": 0.0746, "step": 96820 }, { "epoch": 2.863606790086946, "grad_norm": 0.984969973564148, "learning_rate": 3.05716710291794e-06, "loss": 0.0674, "step": 96830 }, { "epoch": 2.8639025255811204, "grad_norm": 0.6386361718177795, "learning_rate": 3.0570404129990006e-06, "loss": 0.0829, "step": 96840 }, { "epoch": 2.8641982610752943, "grad_norm": 0.5915345549583435, "learning_rate": 3.056913723080061e-06, "loss": 0.0683, "step": 96850 }, { "epoch": 2.8644939965694682, "grad_norm": 0.60155189037323, "learning_rate": 3.0567870331611217e-06, "loss": 0.0751, "step": 96860 }, { "epoch": 2.864789732063642, "grad_norm": 0.7681757211685181, "learning_rate": 3.0566603432421825e-06, "loss": 0.0647, "step": 96870 }, { "epoch": 2.865085467557816, "grad_norm": 1.404589295387268, "learning_rate": 3.0565336533232433e-06, "loss": 0.0794, "step": 96880 }, { "epoch": 2.8653812030519905, "grad_norm": 0.7729214429855347, "learning_rate": 3.0564069634043037e-06, "loss": 0.079, "step": 96890 }, { "epoch": 2.8656769385461645, "grad_norm": 0.4369029700756073, "learning_rate": 3.0562802734853645e-06, "loss": 0.0991, "step": 96900 }, { "epoch": 2.8659726740403384, "grad_norm": 0.5924579501152039, "learning_rate": 3.056153583566425e-06, "loss": 0.0697, "step": 96910 }, { "epoch": 2.8662684095345123, "grad_norm": 0.6260709762573242, "learning_rate": 3.0560268936474856e-06, "loss": 0.0576, "step": 96920 }, { "epoch": 2.8665641450286863, "grad_norm": 0.623414933681488, "learning_rate": 3.055900203728546e-06, "loss": 0.0591, "step": 96930 }, { "epoch": 2.8668598805228602, "grad_norm": 0.5914831757545471, "learning_rate": 3.0557735138096064e-06, "loss": 0.0901, "step": 96940 }, { "epoch": 2.867155616017034, "grad_norm": 0.5181540250778198, "learning_rate": 3.0556468238906676e-06, "loss": 0.073, "step": 96950 }, { "epoch": 2.8674513515112086, "grad_norm": 0.9376007318496704, "learning_rate": 3.055520133971728e-06, "loss": 0.077, "step": 96960 }, { "epoch": 2.8677470870053825, "grad_norm": 0.6024037599563599, "learning_rate": 3.0553934440527887e-06, "loss": 0.0585, "step": 96970 }, { "epoch": 2.8680428224995564, "grad_norm": 0.8397935032844543, "learning_rate": 3.055266754133849e-06, "loss": 0.0688, "step": 96980 }, { "epoch": 2.8683385579937304, "grad_norm": 0.7927919030189514, "learning_rate": 3.05514006421491e-06, "loss": 0.089, "step": 96990 }, { "epoch": 2.8686342934879043, "grad_norm": 0.7194389700889587, "learning_rate": 3.0550133742959702e-06, "loss": 0.0664, "step": 97000 }, { "epoch": 2.8689300289820783, "grad_norm": 1.007534384727478, "learning_rate": 3.054886684377031e-06, "loss": 0.0907, "step": 97010 }, { "epoch": 2.869225764476252, "grad_norm": 0.5408751964569092, "learning_rate": 3.0547599944580914e-06, "loss": 0.0758, "step": 97020 }, { "epoch": 2.8695214999704266, "grad_norm": 0.7341688275337219, "learning_rate": 3.0546333045391526e-06, "loss": 0.0655, "step": 97030 }, { "epoch": 2.8698172354646005, "grad_norm": 0.7608625888824463, "learning_rate": 3.054506614620213e-06, "loss": 0.0887, "step": 97040 }, { "epoch": 2.8701129709587745, "grad_norm": 1.1696652173995972, "learning_rate": 3.0543799247012738e-06, "loss": 0.0802, "step": 97050 }, { "epoch": 2.8704087064529484, "grad_norm": 0.5688692927360535, "learning_rate": 3.054253234782334e-06, "loss": 0.0755, "step": 97060 }, { "epoch": 2.8707044419471224, "grad_norm": 0.3866788446903229, "learning_rate": 3.054126544863395e-06, "loss": 0.0704, "step": 97070 }, { "epoch": 2.8710001774412968, "grad_norm": 0.8647569417953491, "learning_rate": 3.0539998549444553e-06, "loss": 0.065, "step": 97080 }, { "epoch": 2.8712959129354703, "grad_norm": 1.031577467918396, "learning_rate": 3.053873165025516e-06, "loss": 0.0912, "step": 97090 }, { "epoch": 2.8715916484296446, "grad_norm": 0.4611479938030243, "learning_rate": 3.0537464751065764e-06, "loss": 0.0766, "step": 97100 }, { "epoch": 2.8718873839238186, "grad_norm": 0.6927909851074219, "learning_rate": 3.0536197851876377e-06, "loss": 0.0816, "step": 97110 }, { "epoch": 2.8721831194179925, "grad_norm": 0.5923721790313721, "learning_rate": 3.053493095268698e-06, "loss": 0.073, "step": 97120 }, { "epoch": 2.8724788549121665, "grad_norm": 1.0235674381256104, "learning_rate": 3.053366405349759e-06, "loss": 0.0543, "step": 97130 }, { "epoch": 2.8727745904063404, "grad_norm": 0.9172337651252747, "learning_rate": 3.053239715430819e-06, "loss": 0.0806, "step": 97140 }, { "epoch": 2.873070325900515, "grad_norm": 0.722306489944458, "learning_rate": 3.05311302551188e-06, "loss": 0.0721, "step": 97150 }, { "epoch": 2.8733660613946888, "grad_norm": 0.49613943696022034, "learning_rate": 3.0529863355929403e-06, "loss": 0.0712, "step": 97160 }, { "epoch": 2.8736617968888627, "grad_norm": 0.5498660206794739, "learning_rate": 3.052859645674001e-06, "loss": 0.0567, "step": 97170 }, { "epoch": 2.8739575323830366, "grad_norm": 0.9618596434593201, "learning_rate": 3.0527329557550615e-06, "loss": 0.0652, "step": 97180 }, { "epoch": 2.8742532678772106, "grad_norm": 0.6103780269622803, "learning_rate": 3.0526062658361227e-06, "loss": 0.0716, "step": 97190 }, { "epoch": 2.8745490033713845, "grad_norm": 0.8355284333229065, "learning_rate": 3.052479575917183e-06, "loss": 0.0891, "step": 97200 }, { "epoch": 2.8748447388655585, "grad_norm": 0.8418568968772888, "learning_rate": 3.052352885998244e-06, "loss": 0.0722, "step": 97210 }, { "epoch": 2.875140474359733, "grad_norm": 0.9824134707450867, "learning_rate": 3.0522261960793042e-06, "loss": 0.0774, "step": 97220 }, { "epoch": 2.875436209853907, "grad_norm": 0.9814562201499939, "learning_rate": 3.052099506160365e-06, "loss": 0.0572, "step": 97230 }, { "epoch": 2.8757319453480807, "grad_norm": 1.514174461364746, "learning_rate": 3.0519728162414254e-06, "loss": 0.0862, "step": 97240 }, { "epoch": 2.8760276808422547, "grad_norm": 1.0266807079315186, "learning_rate": 3.051846126322486e-06, "loss": 0.0979, "step": 97250 }, { "epoch": 2.8763234163364286, "grad_norm": 0.6898698210716248, "learning_rate": 3.0517194364035465e-06, "loss": 0.0665, "step": 97260 }, { "epoch": 2.8766191518306026, "grad_norm": 0.43362289667129517, "learning_rate": 3.0515927464846078e-06, "loss": 0.0619, "step": 97270 }, { "epoch": 2.8769148873247765, "grad_norm": 0.6550460457801819, "learning_rate": 3.051466056565668e-06, "loss": 0.0786, "step": 97280 }, { "epoch": 2.877210622818951, "grad_norm": 1.0292025804519653, "learning_rate": 3.051339366646729e-06, "loss": 0.0959, "step": 97290 }, { "epoch": 2.877506358313125, "grad_norm": 0.722726047039032, "learning_rate": 3.0512126767277893e-06, "loss": 0.0763, "step": 97300 }, { "epoch": 2.877802093807299, "grad_norm": 0.4766368269920349, "learning_rate": 3.05108598680885e-06, "loss": 0.0689, "step": 97310 }, { "epoch": 2.8780978293014727, "grad_norm": 0.848268985748291, "learning_rate": 3.0509592968899104e-06, "loss": 0.0749, "step": 97320 }, { "epoch": 2.8783935647956467, "grad_norm": 0.9142272472381592, "learning_rate": 3.0508326069709712e-06, "loss": 0.0554, "step": 97330 }, { "epoch": 2.8786893002898206, "grad_norm": 1.0762149095535278, "learning_rate": 3.0507059170520316e-06, "loss": 0.0742, "step": 97340 }, { "epoch": 2.8789850357839946, "grad_norm": 0.5434954166412354, "learning_rate": 3.050579227133092e-06, "loss": 0.0705, "step": 97350 }, { "epoch": 2.879280771278169, "grad_norm": 0.7428567409515381, "learning_rate": 3.050452537214153e-06, "loss": 0.0817, "step": 97360 }, { "epoch": 2.879576506772343, "grad_norm": 1.0524710416793823, "learning_rate": 3.0503258472952135e-06, "loss": 0.0906, "step": 97370 }, { "epoch": 2.879872242266517, "grad_norm": 0.6714532971382141, "learning_rate": 3.0501991573762743e-06, "loss": 0.0881, "step": 97380 }, { "epoch": 2.880167977760691, "grad_norm": 0.5457287430763245, "learning_rate": 3.0500724674573347e-06, "loss": 0.0883, "step": 97390 }, { "epoch": 2.8804637132548647, "grad_norm": 0.95691978931427, "learning_rate": 3.0499457775383955e-06, "loss": 0.0901, "step": 97400 }, { "epoch": 2.880759448749039, "grad_norm": 1.0476875305175781, "learning_rate": 3.049819087619456e-06, "loss": 0.0774, "step": 97410 }, { "epoch": 2.8810551842432126, "grad_norm": 0.7937028408050537, "learning_rate": 3.0496923977005166e-06, "loss": 0.0687, "step": 97420 }, { "epoch": 2.881350919737387, "grad_norm": 1.2004913091659546, "learning_rate": 3.049565707781577e-06, "loss": 0.0555, "step": 97430 }, { "epoch": 2.881646655231561, "grad_norm": 0.6301331520080566, "learning_rate": 3.049439017862638e-06, "loss": 0.0825, "step": 97440 }, { "epoch": 2.881942390725735, "grad_norm": 0.5450118780136108, "learning_rate": 3.0493123279436986e-06, "loss": 0.0804, "step": 97450 }, { "epoch": 2.882238126219909, "grad_norm": 0.8255667090415955, "learning_rate": 3.0491856380247594e-06, "loss": 0.0807, "step": 97460 }, { "epoch": 2.8825338617140828, "grad_norm": 0.583798348903656, "learning_rate": 3.0490589481058197e-06, "loss": 0.0624, "step": 97470 }, { "epoch": 2.882829597208257, "grad_norm": 0.7774698138237, "learning_rate": 3.0489322581868805e-06, "loss": 0.0504, "step": 97480 }, { "epoch": 2.883125332702431, "grad_norm": 0.7127782702445984, "learning_rate": 3.048805568267941e-06, "loss": 0.0793, "step": 97490 }, { "epoch": 2.883421068196605, "grad_norm": 0.8532427549362183, "learning_rate": 3.0486788783490017e-06, "loss": 0.0851, "step": 97500 }, { "epoch": 2.883716803690779, "grad_norm": 0.8533177375793457, "learning_rate": 3.048552188430062e-06, "loss": 0.0811, "step": 97510 }, { "epoch": 2.884012539184953, "grad_norm": 0.9805999994277954, "learning_rate": 3.0484254985111233e-06, "loss": 0.0538, "step": 97520 }, { "epoch": 2.884308274679127, "grad_norm": 1.1038967370986938, "learning_rate": 3.0482988085921836e-06, "loss": 0.0727, "step": 97530 }, { "epoch": 2.884604010173301, "grad_norm": 0.7868649959564209, "learning_rate": 3.0481721186732444e-06, "loss": 0.0794, "step": 97540 }, { "epoch": 2.884899745667475, "grad_norm": 0.6829120516777039, "learning_rate": 3.0480454287543048e-06, "loss": 0.0924, "step": 97550 }, { "epoch": 2.885195481161649, "grad_norm": 0.5810517072677612, "learning_rate": 3.0479187388353656e-06, "loss": 0.0909, "step": 97560 }, { "epoch": 2.885491216655823, "grad_norm": 0.6139417886734009, "learning_rate": 3.047792048916426e-06, "loss": 0.0986, "step": 97570 }, { "epoch": 2.885786952149997, "grad_norm": 0.7029429078102112, "learning_rate": 3.0476653589974867e-06, "loss": 0.0705, "step": 97580 }, { "epoch": 2.886082687644171, "grad_norm": 0.6172345876693726, "learning_rate": 3.047538669078547e-06, "loss": 0.0711, "step": 97590 }, { "epoch": 2.886378423138345, "grad_norm": 1.1998443603515625, "learning_rate": 3.0474119791596083e-06, "loss": 0.0986, "step": 97600 }, { "epoch": 2.886674158632519, "grad_norm": 0.5796893835067749, "learning_rate": 3.0472852892406687e-06, "loss": 0.0739, "step": 97610 }, { "epoch": 2.8869698941266932, "grad_norm": 0.473389208316803, "learning_rate": 3.0471585993217295e-06, "loss": 0.075, "step": 97620 }, { "epoch": 2.887265629620867, "grad_norm": 0.6886554956436157, "learning_rate": 3.04703190940279e-06, "loss": 0.0766, "step": 97630 }, { "epoch": 2.887561365115041, "grad_norm": 0.8177501559257507, "learning_rate": 3.0469052194838506e-06, "loss": 0.0846, "step": 97640 }, { "epoch": 2.887857100609215, "grad_norm": 0.9167170524597168, "learning_rate": 3.046778529564911e-06, "loss": 0.0817, "step": 97650 }, { "epoch": 2.888152836103389, "grad_norm": 0.7669001221656799, "learning_rate": 3.0466518396459718e-06, "loss": 0.0717, "step": 97660 }, { "epoch": 2.8884485715975634, "grad_norm": 1.0116755962371826, "learning_rate": 3.046525149727032e-06, "loss": 0.0528, "step": 97670 }, { "epoch": 2.888744307091737, "grad_norm": 1.1004834175109863, "learning_rate": 3.0463984598080933e-06, "loss": 0.0643, "step": 97680 }, { "epoch": 2.8890400425859113, "grad_norm": 0.7962668538093567, "learning_rate": 3.0462717698891537e-06, "loss": 0.0919, "step": 97690 }, { "epoch": 2.8893357780800852, "grad_norm": 0.7100151777267456, "learning_rate": 3.0461450799702145e-06, "loss": 0.0794, "step": 97700 }, { "epoch": 2.889631513574259, "grad_norm": 0.487413614988327, "learning_rate": 3.046018390051275e-06, "loss": 0.0602, "step": 97710 }, { "epoch": 2.889927249068433, "grad_norm": 0.5239264965057373, "learning_rate": 3.0458917001323357e-06, "loss": 0.0693, "step": 97720 }, { "epoch": 2.890222984562607, "grad_norm": 1.0405550003051758, "learning_rate": 3.045765010213396e-06, "loss": 0.0637, "step": 97730 }, { "epoch": 2.8905187200567815, "grad_norm": 0.787160336971283, "learning_rate": 3.045638320294457e-06, "loss": 0.0794, "step": 97740 }, { "epoch": 2.890814455550955, "grad_norm": 0.5927099585533142, "learning_rate": 3.045511630375517e-06, "loss": 0.0939, "step": 97750 }, { "epoch": 2.8911101910451293, "grad_norm": 0.8635073900222778, "learning_rate": 3.045384940456578e-06, "loss": 0.0809, "step": 97760 }, { "epoch": 2.8914059265393033, "grad_norm": 0.792415201663971, "learning_rate": 3.0452582505376388e-06, "loss": 0.0776, "step": 97770 }, { "epoch": 2.8917016620334772, "grad_norm": 1.0038906335830688, "learning_rate": 3.045131560618699e-06, "loss": 0.0695, "step": 97780 }, { "epoch": 2.891997397527651, "grad_norm": 0.9077304601669312, "learning_rate": 3.04500487069976e-06, "loss": 0.0747, "step": 97790 }, { "epoch": 2.892293133021825, "grad_norm": 1.0287927389144897, "learning_rate": 3.0448781807808203e-06, "loss": 0.0774, "step": 97800 }, { "epoch": 2.8925888685159995, "grad_norm": 0.9969298243522644, "learning_rate": 3.044751490861881e-06, "loss": 0.0725, "step": 97810 }, { "epoch": 2.8928846040101734, "grad_norm": 0.9566528797149658, "learning_rate": 3.0446248009429414e-06, "loss": 0.0656, "step": 97820 }, { "epoch": 2.8931803395043474, "grad_norm": 0.7702236175537109, "learning_rate": 3.0444981110240022e-06, "loss": 0.0798, "step": 97830 }, { "epoch": 2.8934760749985213, "grad_norm": 1.3253357410430908, "learning_rate": 3.044371421105063e-06, "loss": 0.0901, "step": 97840 }, { "epoch": 2.8937718104926953, "grad_norm": 0.81590336561203, "learning_rate": 3.044244731186124e-06, "loss": 0.0807, "step": 97850 }, { "epoch": 2.894067545986869, "grad_norm": 0.9911730885505676, "learning_rate": 3.044118041267184e-06, "loss": 0.0798, "step": 97860 }, { "epoch": 2.894363281481043, "grad_norm": 0.6784887313842773, "learning_rate": 3.043991351348245e-06, "loss": 0.0591, "step": 97870 }, { "epoch": 2.8946590169752175, "grad_norm": 1.2844457626342773, "learning_rate": 3.0438646614293053e-06, "loss": 0.0685, "step": 97880 }, { "epoch": 2.8949547524693915, "grad_norm": 0.8467706441879272, "learning_rate": 3.043737971510366e-06, "loss": 0.0819, "step": 97890 }, { "epoch": 2.8952504879635654, "grad_norm": 1.3386913537979126, "learning_rate": 3.0436112815914265e-06, "loss": 0.0962, "step": 97900 }, { "epoch": 2.8955462234577394, "grad_norm": 0.9986451864242554, "learning_rate": 3.0434845916724873e-06, "loss": 0.0787, "step": 97910 }, { "epoch": 2.8958419589519133, "grad_norm": 0.7054981589317322, "learning_rate": 3.043357901753548e-06, "loss": 0.0771, "step": 97920 }, { "epoch": 2.8961376944460873, "grad_norm": 0.5373132824897766, "learning_rate": 3.043231211834609e-06, "loss": 0.0749, "step": 97930 }, { "epoch": 2.896433429940261, "grad_norm": 0.6509149670600891, "learning_rate": 3.0431045219156692e-06, "loss": 0.0902, "step": 97940 }, { "epoch": 2.8967291654344356, "grad_norm": 0.5413529276847839, "learning_rate": 3.04297783199673e-06, "loss": 0.0766, "step": 97950 }, { "epoch": 2.8970249009286095, "grad_norm": 1.5584372282028198, "learning_rate": 3.0428511420777904e-06, "loss": 0.0736, "step": 97960 }, { "epoch": 2.8973206364227835, "grad_norm": 1.9900329113006592, "learning_rate": 3.042724452158851e-06, "loss": 0.0792, "step": 97970 }, { "epoch": 2.8976163719169574, "grad_norm": 0.5786042809486389, "learning_rate": 3.0425977622399115e-06, "loss": 0.055, "step": 97980 }, { "epoch": 2.8979121074111314, "grad_norm": 0.8684082627296448, "learning_rate": 3.0424710723209723e-06, "loss": 0.0896, "step": 97990 }, { "epoch": 2.8982078429053058, "grad_norm": 1.1229256391525269, "learning_rate": 3.042344382402033e-06, "loss": 0.0913, "step": 98000 }, { "epoch": 2.8985035783994793, "grad_norm": 1.0576518774032593, "learning_rate": 3.042217692483094e-06, "loss": 0.076, "step": 98010 }, { "epoch": 2.8987993138936536, "grad_norm": 0.6986780166625977, "learning_rate": 3.0420910025641543e-06, "loss": 0.081, "step": 98020 }, { "epoch": 2.8990950493878276, "grad_norm": 0.7080106139183044, "learning_rate": 3.041964312645215e-06, "loss": 0.0573, "step": 98030 }, { "epoch": 2.8993907848820015, "grad_norm": 0.7192469239234924, "learning_rate": 3.0418376227262754e-06, "loss": 0.0894, "step": 98040 }, { "epoch": 2.8996865203761755, "grad_norm": 1.1606918573379517, "learning_rate": 3.041710932807336e-06, "loss": 0.0919, "step": 98050 }, { "epoch": 2.8999822558703494, "grad_norm": 0.9065273404121399, "learning_rate": 3.0415842428883966e-06, "loss": 0.0855, "step": 98060 }, { "epoch": 2.900277991364524, "grad_norm": 0.46545201539993286, "learning_rate": 3.0414575529694578e-06, "loss": 0.0763, "step": 98070 }, { "epoch": 2.9005737268586977, "grad_norm": 1.5168368816375732, "learning_rate": 3.041330863050518e-06, "loss": 0.076, "step": 98080 }, { "epoch": 2.9008694623528717, "grad_norm": 0.5102985501289368, "learning_rate": 3.041204173131579e-06, "loss": 0.0865, "step": 98090 }, { "epoch": 2.9011651978470456, "grad_norm": 0.7247375845909119, "learning_rate": 3.0410774832126393e-06, "loss": 0.0831, "step": 98100 }, { "epoch": 2.9014609333412196, "grad_norm": 0.7180176973342896, "learning_rate": 3.0409507932937e-06, "loss": 0.0746, "step": 98110 }, { "epoch": 2.9017566688353935, "grad_norm": 1.3209444284439087, "learning_rate": 3.0408241033747605e-06, "loss": 0.0934, "step": 98120 }, { "epoch": 2.9020524043295675, "grad_norm": 0.7823948860168457, "learning_rate": 3.0406974134558212e-06, "loss": 0.0661, "step": 98130 }, { "epoch": 2.902348139823742, "grad_norm": 0.3850722014904022, "learning_rate": 3.0405707235368816e-06, "loss": 0.0651, "step": 98140 }, { "epoch": 2.902643875317916, "grad_norm": 0.7882539629936218, "learning_rate": 3.040444033617943e-06, "loss": 0.0803, "step": 98150 }, { "epoch": 2.9029396108120897, "grad_norm": 1.0245224237442017, "learning_rate": 3.040317343699003e-06, "loss": 0.0869, "step": 98160 }, { "epoch": 2.9032353463062637, "grad_norm": 0.5872160792350769, "learning_rate": 3.0401906537800636e-06, "loss": 0.0706, "step": 98170 }, { "epoch": 2.9035310818004376, "grad_norm": 0.8903976678848267, "learning_rate": 3.0400639638611243e-06, "loss": 0.0744, "step": 98180 }, { "epoch": 2.9038268172946116, "grad_norm": 1.0910645723342896, "learning_rate": 3.0399372739421847e-06, "loss": 0.0853, "step": 98190 }, { "epoch": 2.9041225527887855, "grad_norm": 0.9896368980407715, "learning_rate": 3.0398105840232455e-06, "loss": 0.0808, "step": 98200 }, { "epoch": 2.90441828828296, "grad_norm": 0.8472151160240173, "learning_rate": 3.039683894104306e-06, "loss": 0.0858, "step": 98210 }, { "epoch": 2.904714023777134, "grad_norm": 0.8524237275123596, "learning_rate": 3.0395572041853667e-06, "loss": 0.0721, "step": 98220 }, { "epoch": 2.905009759271308, "grad_norm": 0.7166328430175781, "learning_rate": 3.039430514266427e-06, "loss": 0.0673, "step": 98230 }, { "epoch": 2.9053054947654817, "grad_norm": 0.801827609539032, "learning_rate": 3.0393038243474882e-06, "loss": 0.0841, "step": 98240 }, { "epoch": 2.9056012302596557, "grad_norm": 0.7567864060401917, "learning_rate": 3.0391771344285486e-06, "loss": 0.0717, "step": 98250 }, { "epoch": 2.9058969657538296, "grad_norm": 0.7104691863059998, "learning_rate": 3.0390504445096094e-06, "loss": 0.0663, "step": 98260 }, { "epoch": 2.9061927012480036, "grad_norm": 0.801448404788971, "learning_rate": 3.0389237545906698e-06, "loss": 0.0829, "step": 98270 }, { "epoch": 2.906488436742178, "grad_norm": 0.7725269198417664, "learning_rate": 3.0387970646717305e-06, "loss": 0.064, "step": 98280 }, { "epoch": 2.906784172236352, "grad_norm": 1.4895399808883667, "learning_rate": 3.038670374752791e-06, "loss": 0.074, "step": 98290 }, { "epoch": 2.907079907730526, "grad_norm": 0.7770376801490784, "learning_rate": 3.0385436848338517e-06, "loss": 0.0788, "step": 98300 }, { "epoch": 2.9073756432246998, "grad_norm": 0.6934806108474731, "learning_rate": 3.038416994914912e-06, "loss": 0.0767, "step": 98310 }, { "epoch": 2.9076713787188737, "grad_norm": 0.7986049056053162, "learning_rate": 3.0382903049959733e-06, "loss": 0.0709, "step": 98320 }, { "epoch": 2.907967114213048, "grad_norm": 0.6061950325965881, "learning_rate": 3.0381636150770336e-06, "loss": 0.0607, "step": 98330 }, { "epoch": 2.9082628497072216, "grad_norm": 1.6033765077590942, "learning_rate": 3.0380369251580944e-06, "loss": 0.0861, "step": 98340 }, { "epoch": 2.908558585201396, "grad_norm": 0.6156417727470398, "learning_rate": 3.037910235239155e-06, "loss": 0.089, "step": 98350 }, { "epoch": 2.90885432069557, "grad_norm": 0.432161420583725, "learning_rate": 3.0377835453202156e-06, "loss": 0.0671, "step": 98360 }, { "epoch": 2.909150056189744, "grad_norm": 0.6568943858146667, "learning_rate": 3.037656855401276e-06, "loss": 0.0665, "step": 98370 }, { "epoch": 2.909445791683918, "grad_norm": 0.7254111170768738, "learning_rate": 3.0375301654823367e-06, "loss": 0.0809, "step": 98380 }, { "epoch": 2.9097415271780918, "grad_norm": 0.614208459854126, "learning_rate": 3.037403475563397e-06, "loss": 0.0764, "step": 98390 }, { "epoch": 2.910037262672266, "grad_norm": 0.8128306269645691, "learning_rate": 3.0372767856444583e-06, "loss": 0.0812, "step": 98400 }, { "epoch": 2.91033299816644, "grad_norm": 0.6121509075164795, "learning_rate": 3.0371500957255187e-06, "loss": 0.0689, "step": 98410 }, { "epoch": 2.910628733660614, "grad_norm": 0.7433437705039978, "learning_rate": 3.0370234058065795e-06, "loss": 0.0795, "step": 98420 }, { "epoch": 2.910924469154788, "grad_norm": 1.1078605651855469, "learning_rate": 3.03689671588764e-06, "loss": 0.0832, "step": 98430 }, { "epoch": 2.911220204648962, "grad_norm": 0.513097882270813, "learning_rate": 3.0367700259687006e-06, "loss": 0.0951, "step": 98440 }, { "epoch": 2.911515940143136, "grad_norm": 0.832256019115448, "learning_rate": 3.036643336049761e-06, "loss": 0.0859, "step": 98450 }, { "epoch": 2.91181167563731, "grad_norm": 0.9093921780586243, "learning_rate": 3.036516646130822e-06, "loss": 0.0788, "step": 98460 }, { "epoch": 2.912107411131484, "grad_norm": 0.7486997842788696, "learning_rate": 3.036389956211882e-06, "loss": 0.0749, "step": 98470 }, { "epoch": 2.912403146625658, "grad_norm": 1.0376991033554077, "learning_rate": 3.0362632662929434e-06, "loss": 0.0797, "step": 98480 }, { "epoch": 2.912698882119832, "grad_norm": 0.7414079904556274, "learning_rate": 3.0361365763740037e-06, "loss": 0.0774, "step": 98490 }, { "epoch": 2.912994617614006, "grad_norm": 0.6302059888839722, "learning_rate": 3.0360098864550645e-06, "loss": 0.08, "step": 98500 }, { "epoch": 2.91329035310818, "grad_norm": 0.9018816947937012, "learning_rate": 3.035883196536125e-06, "loss": 0.0876, "step": 98510 }, { "epoch": 2.913586088602354, "grad_norm": 1.1281630992889404, "learning_rate": 3.0357565066171857e-06, "loss": 0.0674, "step": 98520 }, { "epoch": 2.913881824096528, "grad_norm": 0.8572651147842407, "learning_rate": 3.035629816698246e-06, "loss": 0.0795, "step": 98530 }, { "epoch": 2.9141775595907022, "grad_norm": 0.5456727147102356, "learning_rate": 3.035503126779307e-06, "loss": 0.0644, "step": 98540 }, { "epoch": 2.914473295084876, "grad_norm": 1.1197842359542847, "learning_rate": 3.035376436860367e-06, "loss": 0.0943, "step": 98550 }, { "epoch": 2.91476903057905, "grad_norm": 0.6793978214263916, "learning_rate": 3.0352497469414284e-06, "loss": 0.0879, "step": 98560 }, { "epoch": 2.915064766073224, "grad_norm": 0.9006810784339905, "learning_rate": 3.0351230570224888e-06, "loss": 0.0762, "step": 98570 }, { "epoch": 2.915360501567398, "grad_norm": 0.8833417892456055, "learning_rate": 3.034996367103549e-06, "loss": 0.0686, "step": 98580 }, { "epoch": 2.9156562370615724, "grad_norm": 1.4493123292922974, "learning_rate": 3.03486967718461e-06, "loss": 0.0894, "step": 98590 }, { "epoch": 2.915951972555746, "grad_norm": 0.6470469832420349, "learning_rate": 3.0347429872656703e-06, "loss": 0.0863, "step": 98600 }, { "epoch": 2.9162477080499203, "grad_norm": 0.6918660998344421, "learning_rate": 3.034616297346731e-06, "loss": 0.0867, "step": 98610 }, { "epoch": 2.9165434435440942, "grad_norm": 1.1119533777236938, "learning_rate": 3.0344896074277915e-06, "loss": 0.068, "step": 98620 }, { "epoch": 2.916839179038268, "grad_norm": 0.6265364289283752, "learning_rate": 3.0343629175088523e-06, "loss": 0.0752, "step": 98630 }, { "epoch": 2.917134914532442, "grad_norm": 1.1357941627502441, "learning_rate": 3.034236227589913e-06, "loss": 0.0895, "step": 98640 }, { "epoch": 2.917430650026616, "grad_norm": 1.0941916704177856, "learning_rate": 3.034109537670974e-06, "loss": 0.0767, "step": 98650 }, { "epoch": 2.9177263855207904, "grad_norm": 0.37464985251426697, "learning_rate": 3.033982847752034e-06, "loss": 0.0822, "step": 98660 }, { "epoch": 2.918022121014964, "grad_norm": 0.46218305826187134, "learning_rate": 3.033856157833095e-06, "loss": 0.0719, "step": 98670 }, { "epoch": 2.9183178565091383, "grad_norm": 0.7853198051452637, "learning_rate": 3.0337294679141554e-06, "loss": 0.0804, "step": 98680 }, { "epoch": 2.9186135920033123, "grad_norm": 0.6196861863136292, "learning_rate": 3.033602777995216e-06, "loss": 0.0826, "step": 98690 }, { "epoch": 2.918909327497486, "grad_norm": 0.8262657523155212, "learning_rate": 3.0334760880762765e-06, "loss": 0.0853, "step": 98700 }, { "epoch": 2.91920506299166, "grad_norm": 0.8974018692970276, "learning_rate": 3.0333493981573373e-06, "loss": 0.0901, "step": 98710 }, { "epoch": 2.919500798485834, "grad_norm": 0.7626820206642151, "learning_rate": 3.033222708238398e-06, "loss": 0.0801, "step": 98720 }, { "epoch": 2.9197965339800085, "grad_norm": 0.9566575884819031, "learning_rate": 3.033096018319459e-06, "loss": 0.0709, "step": 98730 }, { "epoch": 2.9200922694741824, "grad_norm": 0.8310668468475342, "learning_rate": 3.0329693284005192e-06, "loss": 0.0774, "step": 98740 }, { "epoch": 2.9203880049683564, "grad_norm": 0.42295005917549133, "learning_rate": 3.03284263848158e-06, "loss": 0.0785, "step": 98750 }, { "epoch": 2.9206837404625303, "grad_norm": 0.4825819432735443, "learning_rate": 3.0327159485626404e-06, "loss": 0.0717, "step": 98760 }, { "epoch": 2.9209794759567043, "grad_norm": 0.6248596906661987, "learning_rate": 3.032589258643701e-06, "loss": 0.0842, "step": 98770 }, { "epoch": 2.921275211450878, "grad_norm": 0.883084774017334, "learning_rate": 3.0324625687247616e-06, "loss": 0.0624, "step": 98780 }, { "epoch": 2.921570946945052, "grad_norm": 0.4834229648113251, "learning_rate": 3.0323358788058223e-06, "loss": 0.083, "step": 98790 }, { "epoch": 2.9218666824392265, "grad_norm": 1.4242247343063354, "learning_rate": 3.032209188886883e-06, "loss": 0.0822, "step": 98800 }, { "epoch": 2.9221624179334005, "grad_norm": 0.8233676552772522, "learning_rate": 3.032082498967944e-06, "loss": 0.0818, "step": 98810 }, { "epoch": 2.9224581534275744, "grad_norm": 0.9141247272491455, "learning_rate": 3.0319558090490043e-06, "loss": 0.0669, "step": 98820 }, { "epoch": 2.9227538889217484, "grad_norm": 0.6945911645889282, "learning_rate": 3.031829119130065e-06, "loss": 0.0827, "step": 98830 }, { "epoch": 2.9230496244159223, "grad_norm": 0.7807687520980835, "learning_rate": 3.0317024292111254e-06, "loss": 0.0876, "step": 98840 }, { "epoch": 2.9233453599100963, "grad_norm": 0.7805315852165222, "learning_rate": 3.0315757392921862e-06, "loss": 0.0828, "step": 98850 }, { "epoch": 2.92364109540427, "grad_norm": 0.6335242390632629, "learning_rate": 3.0314490493732466e-06, "loss": 0.0795, "step": 98860 }, { "epoch": 2.9239368308984446, "grad_norm": 0.5757834315299988, "learning_rate": 3.0313223594543074e-06, "loss": 0.0605, "step": 98870 }, { "epoch": 2.9242325663926185, "grad_norm": 0.8558435440063477, "learning_rate": 3.031195669535368e-06, "loss": 0.0749, "step": 98880 }, { "epoch": 2.9245283018867925, "grad_norm": 0.6044763326644897, "learning_rate": 3.031068979616429e-06, "loss": 0.0711, "step": 98890 }, { "epoch": 2.9248240373809664, "grad_norm": 1.1516245603561401, "learning_rate": 3.0309422896974893e-06, "loss": 0.101, "step": 98900 }, { "epoch": 2.9251197728751404, "grad_norm": 1.2297276258468628, "learning_rate": 3.03081559977855e-06, "loss": 0.0711, "step": 98910 }, { "epoch": 2.9254155083693147, "grad_norm": 0.7247101068496704, "learning_rate": 3.0306889098596105e-06, "loss": 0.0768, "step": 98920 }, { "epoch": 2.9257112438634882, "grad_norm": 0.9813672304153442, "learning_rate": 3.0305622199406713e-06, "loss": 0.0731, "step": 98930 }, { "epoch": 2.9260069793576626, "grad_norm": 0.7622358202934265, "learning_rate": 3.0304355300217316e-06, "loss": 0.0815, "step": 98940 }, { "epoch": 2.9263027148518366, "grad_norm": 1.3989027738571167, "learning_rate": 3.0303088401027924e-06, "loss": 0.0758, "step": 98950 }, { "epoch": 2.9265984503460105, "grad_norm": 0.6837902069091797, "learning_rate": 3.0301821501838532e-06, "loss": 0.0816, "step": 98960 }, { "epoch": 2.9268941858401845, "grad_norm": 0.419241338968277, "learning_rate": 3.030055460264914e-06, "loss": 0.059, "step": 98970 }, { "epoch": 2.9271899213343584, "grad_norm": 1.2449005842208862, "learning_rate": 3.0299287703459744e-06, "loss": 0.0844, "step": 98980 }, { "epoch": 2.927485656828533, "grad_norm": 0.8670262098312378, "learning_rate": 3.029802080427035e-06, "loss": 0.081, "step": 98990 }, { "epoch": 2.9277813923227067, "grad_norm": 0.4407309591770172, "learning_rate": 3.0296753905080955e-06, "loss": 0.0915, "step": 99000 }, { "epoch": 2.9280771278168807, "grad_norm": 0.94057297706604, "learning_rate": 3.029548700589156e-06, "loss": 0.0915, "step": 99010 }, { "epoch": 2.9283728633110546, "grad_norm": 0.5989348292350769, "learning_rate": 3.0294220106702167e-06, "loss": 0.089, "step": 99020 }, { "epoch": 2.9286685988052286, "grad_norm": 0.6194133758544922, "learning_rate": 3.029295320751277e-06, "loss": 0.0568, "step": 99030 }, { "epoch": 2.9289643342994025, "grad_norm": 0.7872284650802612, "learning_rate": 3.0291686308323383e-06, "loss": 0.0869, "step": 99040 }, { "epoch": 2.9292600697935764, "grad_norm": 0.8304745554924011, "learning_rate": 3.0290419409133986e-06, "loss": 0.0717, "step": 99050 }, { "epoch": 2.929555805287751, "grad_norm": 0.7484614849090576, "learning_rate": 3.0289152509944594e-06, "loss": 0.0684, "step": 99060 }, { "epoch": 2.929851540781925, "grad_norm": 0.8209795951843262, "learning_rate": 3.02878856107552e-06, "loss": 0.0847, "step": 99070 }, { "epoch": 2.9301472762760987, "grad_norm": 0.9183018803596497, "learning_rate": 3.0286618711565806e-06, "loss": 0.0742, "step": 99080 }, { "epoch": 2.9304430117702727, "grad_norm": 0.8596780300140381, "learning_rate": 3.028535181237641e-06, "loss": 0.0796, "step": 99090 }, { "epoch": 2.9307387472644466, "grad_norm": 0.41414448618888855, "learning_rate": 3.0284084913187017e-06, "loss": 0.0679, "step": 99100 }, { "epoch": 2.9310344827586206, "grad_norm": 0.999441385269165, "learning_rate": 3.028281801399762e-06, "loss": 0.0839, "step": 99110 }, { "epoch": 2.9313302182527945, "grad_norm": 0.8630990386009216, "learning_rate": 3.0281551114808233e-06, "loss": 0.0701, "step": 99120 }, { "epoch": 2.931625953746969, "grad_norm": 0.7670745253562927, "learning_rate": 3.0280284215618837e-06, "loss": 0.0692, "step": 99130 }, { "epoch": 2.931921689241143, "grad_norm": 0.6686710715293884, "learning_rate": 3.0279017316429445e-06, "loss": 0.0779, "step": 99140 }, { "epoch": 2.9322174247353168, "grad_norm": 0.6684080958366394, "learning_rate": 3.027775041724005e-06, "loss": 0.0774, "step": 99150 }, { "epoch": 2.9325131602294907, "grad_norm": 0.9317070245742798, "learning_rate": 3.0276483518050656e-06, "loss": 0.072, "step": 99160 }, { "epoch": 2.9328088957236647, "grad_norm": 0.43624162673950195, "learning_rate": 3.027521661886126e-06, "loss": 0.0607, "step": 99170 }, { "epoch": 2.933104631217839, "grad_norm": 1.1290174722671509, "learning_rate": 3.0273949719671868e-06, "loss": 0.0702, "step": 99180 }, { "epoch": 2.9334003667120125, "grad_norm": 0.6326122283935547, "learning_rate": 3.027268282048247e-06, "loss": 0.0716, "step": 99190 }, { "epoch": 2.933696102206187, "grad_norm": 1.6666017770767212, "learning_rate": 3.0271415921293084e-06, "loss": 0.0944, "step": 99200 }, { "epoch": 2.933991837700361, "grad_norm": 0.7579112648963928, "learning_rate": 3.0270149022103687e-06, "loss": 0.0879, "step": 99210 }, { "epoch": 2.934287573194535, "grad_norm": 1.224694848060608, "learning_rate": 3.0268882122914295e-06, "loss": 0.0568, "step": 99220 }, { "epoch": 2.9345833086887088, "grad_norm": 0.8558365702629089, "learning_rate": 3.02676152237249e-06, "loss": 0.0811, "step": 99230 }, { "epoch": 2.9348790441828827, "grad_norm": 0.7472929358482361, "learning_rate": 3.0266348324535507e-06, "loss": 0.0776, "step": 99240 }, { "epoch": 2.935174779677057, "grad_norm": 0.7366487979888916, "learning_rate": 3.026508142534611e-06, "loss": 0.0801, "step": 99250 }, { "epoch": 2.9354705151712306, "grad_norm": 0.989136815071106, "learning_rate": 3.026381452615672e-06, "loss": 0.0798, "step": 99260 }, { "epoch": 2.935766250665405, "grad_norm": 0.8612618446350098, "learning_rate": 3.026254762696732e-06, "loss": 0.0899, "step": 99270 }, { "epoch": 2.936061986159579, "grad_norm": 0.5884112119674683, "learning_rate": 3.0261280727777934e-06, "loss": 0.0681, "step": 99280 }, { "epoch": 2.936357721653753, "grad_norm": 0.8837231397628784, "learning_rate": 3.0260013828588538e-06, "loss": 0.083, "step": 99290 }, { "epoch": 2.936653457147927, "grad_norm": 0.8424996733665466, "learning_rate": 3.0258746929399146e-06, "loss": 0.0766, "step": 99300 }, { "epoch": 2.9369491926421007, "grad_norm": 0.47853752970695496, "learning_rate": 3.025748003020975e-06, "loss": 0.0808, "step": 99310 }, { "epoch": 2.937244928136275, "grad_norm": 0.6637229323387146, "learning_rate": 3.0256213131020357e-06, "loss": 0.0699, "step": 99320 }, { "epoch": 2.937540663630449, "grad_norm": 0.9628177881240845, "learning_rate": 3.025494623183096e-06, "loss": 0.0687, "step": 99330 }, { "epoch": 2.937836399124623, "grad_norm": 0.6080150604248047, "learning_rate": 3.025367933264157e-06, "loss": 0.0821, "step": 99340 }, { "epoch": 2.938132134618797, "grad_norm": 0.9858716130256653, "learning_rate": 3.0252412433452172e-06, "loss": 0.0806, "step": 99350 }, { "epoch": 2.938427870112971, "grad_norm": 0.7408034801483154, "learning_rate": 3.0251145534262784e-06, "loss": 0.0871, "step": 99360 }, { "epoch": 2.938723605607145, "grad_norm": 0.7611651420593262, "learning_rate": 3.024987863507339e-06, "loss": 0.07, "step": 99370 }, { "epoch": 2.939019341101319, "grad_norm": 0.6911616921424866, "learning_rate": 3.0248611735883996e-06, "loss": 0.068, "step": 99380 }, { "epoch": 2.939315076595493, "grad_norm": 0.7533694505691528, "learning_rate": 3.02473448366946e-06, "loss": 0.0838, "step": 99390 }, { "epoch": 2.939610812089667, "grad_norm": 0.49425989389419556, "learning_rate": 3.0246077937505208e-06, "loss": 0.0651, "step": 99400 }, { "epoch": 2.939906547583841, "grad_norm": 0.9889987111091614, "learning_rate": 3.024481103831581e-06, "loss": 0.0738, "step": 99410 }, { "epoch": 2.940202283078015, "grad_norm": 0.8816784620285034, "learning_rate": 3.0243544139126415e-06, "loss": 0.0542, "step": 99420 }, { "epoch": 2.940498018572189, "grad_norm": 0.9420228004455566, "learning_rate": 3.0242277239937023e-06, "loss": 0.0618, "step": 99430 }, { "epoch": 2.940793754066363, "grad_norm": 1.0970683097839355, "learning_rate": 3.024101034074763e-06, "loss": 0.0872, "step": 99440 }, { "epoch": 2.941089489560537, "grad_norm": 1.2840650081634521, "learning_rate": 3.023974344155824e-06, "loss": 0.0947, "step": 99450 }, { "epoch": 2.9413852250547112, "grad_norm": 0.8529224991798401, "learning_rate": 3.0238476542368842e-06, "loss": 0.0877, "step": 99460 }, { "epoch": 2.941680960548885, "grad_norm": 0.37586963176727295, "learning_rate": 3.023720964317945e-06, "loss": 0.0567, "step": 99470 }, { "epoch": 2.941976696043059, "grad_norm": 0.9413831830024719, "learning_rate": 3.0235942743990054e-06, "loss": 0.0701, "step": 99480 }, { "epoch": 2.942272431537233, "grad_norm": 0.9819089770317078, "learning_rate": 3.023467584480066e-06, "loss": 0.0811, "step": 99490 }, { "epoch": 2.942568167031407, "grad_norm": 0.700911819934845, "learning_rate": 3.0233408945611265e-06, "loss": 0.0825, "step": 99500 }, { "epoch": 2.9428639025255814, "grad_norm": 0.5169742107391357, "learning_rate": 3.0232142046421873e-06, "loss": 0.076, "step": 99510 }, { "epoch": 2.943159638019755, "grad_norm": 0.752328097820282, "learning_rate": 3.023087514723248e-06, "loss": 0.073, "step": 99520 }, { "epoch": 2.9434553735139293, "grad_norm": 0.5521595478057861, "learning_rate": 3.022960824804309e-06, "loss": 0.0715, "step": 99530 }, { "epoch": 2.943751109008103, "grad_norm": 0.49513304233551025, "learning_rate": 3.0228341348853693e-06, "loss": 0.0765, "step": 99540 }, { "epoch": 2.944046844502277, "grad_norm": 0.4675343334674835, "learning_rate": 3.02270744496643e-06, "loss": 0.0551, "step": 99550 }, { "epoch": 2.944342579996451, "grad_norm": 0.9257264137268066, "learning_rate": 3.0225807550474904e-06, "loss": 0.0859, "step": 99560 }, { "epoch": 2.944638315490625, "grad_norm": 0.8350043296813965, "learning_rate": 3.0224540651285512e-06, "loss": 0.0751, "step": 99570 }, { "epoch": 2.9449340509847994, "grad_norm": 0.9646304249763489, "learning_rate": 3.0223273752096116e-06, "loss": 0.0667, "step": 99580 }, { "epoch": 2.945229786478973, "grad_norm": 0.7491241097450256, "learning_rate": 3.0222006852906724e-06, "loss": 0.0874, "step": 99590 }, { "epoch": 2.9455255219731473, "grad_norm": 0.7867695689201355, "learning_rate": 3.022073995371733e-06, "loss": 0.0669, "step": 99600 }, { "epoch": 2.9458212574673213, "grad_norm": 0.9953551888465881, "learning_rate": 3.021947305452794e-06, "loss": 0.0654, "step": 99610 }, { "epoch": 2.946116992961495, "grad_norm": 0.7109573483467102, "learning_rate": 3.0218206155338543e-06, "loss": 0.0745, "step": 99620 }, { "epoch": 2.946412728455669, "grad_norm": 0.6824116110801697, "learning_rate": 3.021693925614915e-06, "loss": 0.0822, "step": 99630 }, { "epoch": 2.946708463949843, "grad_norm": 0.7871861457824707, "learning_rate": 3.0215672356959755e-06, "loss": 0.0638, "step": 99640 }, { "epoch": 2.9470041994440175, "grad_norm": 1.0815528631210327, "learning_rate": 3.0214405457770363e-06, "loss": 0.0819, "step": 99650 }, { "epoch": 2.9472999349381914, "grad_norm": 0.8965665698051453, "learning_rate": 3.0213138558580966e-06, "loss": 0.0896, "step": 99660 }, { "epoch": 2.9475956704323654, "grad_norm": 1.0123158693313599, "learning_rate": 3.0211871659391574e-06, "loss": 0.0753, "step": 99670 }, { "epoch": 2.9478914059265393, "grad_norm": 0.9419722557067871, "learning_rate": 3.021060476020218e-06, "loss": 0.0744, "step": 99680 }, { "epoch": 2.9481871414207133, "grad_norm": 0.9956957697868347, "learning_rate": 3.020933786101279e-06, "loss": 0.0838, "step": 99690 }, { "epoch": 2.948482876914887, "grad_norm": 0.7246586084365845, "learning_rate": 3.0208070961823394e-06, "loss": 0.0661, "step": 99700 }, { "epoch": 2.948778612409061, "grad_norm": 0.6206046342849731, "learning_rate": 3.0206804062634e-06, "loss": 0.0753, "step": 99710 }, { "epoch": 2.9490743479032355, "grad_norm": 0.8078784942626953, "learning_rate": 3.0205537163444605e-06, "loss": 0.0758, "step": 99720 }, { "epoch": 2.9493700833974095, "grad_norm": 0.9447654485702515, "learning_rate": 3.0204270264255213e-06, "loss": 0.0696, "step": 99730 }, { "epoch": 2.9496658188915834, "grad_norm": 0.7044234275817871, "learning_rate": 3.0203003365065817e-06, "loss": 0.0833, "step": 99740 }, { "epoch": 2.9499615543857574, "grad_norm": 0.8554084897041321, "learning_rate": 3.0201736465876425e-06, "loss": 0.0653, "step": 99750 }, { "epoch": 2.9502572898799313, "grad_norm": 0.6641768217086792, "learning_rate": 3.0200469566687033e-06, "loss": 0.1018, "step": 99760 }, { "epoch": 2.9505530253741052, "grad_norm": 0.4807046055793762, "learning_rate": 3.019920266749764e-06, "loss": 0.055, "step": 99770 }, { "epoch": 2.950848760868279, "grad_norm": 1.0426064729690552, "learning_rate": 3.0197935768308244e-06, "loss": 0.0663, "step": 99780 }, { "epoch": 2.9511444963624536, "grad_norm": 0.8124799132347107, "learning_rate": 3.019666886911885e-06, "loss": 0.0717, "step": 99790 }, { "epoch": 2.9514402318566275, "grad_norm": 0.7062307000160217, "learning_rate": 3.0195401969929456e-06, "loss": 0.0687, "step": 99800 }, { "epoch": 2.9517359673508015, "grad_norm": 0.4423384368419647, "learning_rate": 3.0194135070740064e-06, "loss": 0.0479, "step": 99810 }, { "epoch": 2.9520317028449754, "grad_norm": 0.6903926134109497, "learning_rate": 3.0192868171550667e-06, "loss": 0.0656, "step": 99820 }, { "epoch": 2.9523274383391493, "grad_norm": 0.6472305059432983, "learning_rate": 3.019160127236127e-06, "loss": 0.0662, "step": 99830 }, { "epoch": 2.9526231738333237, "grad_norm": 0.6655529141426086, "learning_rate": 3.0190334373171883e-06, "loss": 0.0809, "step": 99840 }, { "epoch": 2.9529189093274972, "grad_norm": 1.1803691387176514, "learning_rate": 3.0189067473982487e-06, "loss": 0.0807, "step": 99850 }, { "epoch": 2.9532146448216716, "grad_norm": 1.4700570106506348, "learning_rate": 3.0187800574793095e-06, "loss": 0.1016, "step": 99860 }, { "epoch": 2.9535103803158456, "grad_norm": 0.9252539873123169, "learning_rate": 3.01865336756037e-06, "loss": 0.074, "step": 99870 }, { "epoch": 2.9538061158100195, "grad_norm": 1.150007724761963, "learning_rate": 3.0185266776414306e-06, "loss": 0.0826, "step": 99880 }, { "epoch": 2.9541018513041934, "grad_norm": 0.6916178464889526, "learning_rate": 3.018399987722491e-06, "loss": 0.077, "step": 99890 }, { "epoch": 2.9543975867983674, "grad_norm": 0.8434432148933411, "learning_rate": 3.0182732978035518e-06, "loss": 0.0633, "step": 99900 }, { "epoch": 2.954693322292542, "grad_norm": 0.5784620046615601, "learning_rate": 3.018146607884612e-06, "loss": 0.0873, "step": 99910 }, { "epoch": 2.9549890577867157, "grad_norm": 1.1187206506729126, "learning_rate": 3.0180199179656733e-06, "loss": 0.0684, "step": 99920 }, { "epoch": 2.9552847932808897, "grad_norm": 0.8103621602058411, "learning_rate": 3.0178932280467337e-06, "loss": 0.0681, "step": 99930 }, { "epoch": 2.9555805287750636, "grad_norm": 1.0601619482040405, "learning_rate": 3.0177665381277945e-06, "loss": 0.0976, "step": 99940 }, { "epoch": 2.9558762642692376, "grad_norm": 1.2077690362930298, "learning_rate": 3.017639848208855e-06, "loss": 0.0914, "step": 99950 }, { "epoch": 2.9561719997634115, "grad_norm": 0.47604990005493164, "learning_rate": 3.0175131582899157e-06, "loss": 0.066, "step": 99960 }, { "epoch": 2.9564677352575854, "grad_norm": 0.8062540292739868, "learning_rate": 3.017386468370976e-06, "loss": 0.0788, "step": 99970 }, { "epoch": 2.95676347075176, "grad_norm": 0.9975315928459167, "learning_rate": 3.017259778452037e-06, "loss": 0.0641, "step": 99980 }, { "epoch": 2.9570592062459338, "grad_norm": 0.7339315414428711, "learning_rate": 3.017133088533097e-06, "loss": 0.0841, "step": 99990 }, { "epoch": 2.9573549417401077, "grad_norm": 0.744038999080658, "learning_rate": 3.0170063986141584e-06, "loss": 0.0915, "step": 100000 }, { "epoch": 2.9576506772342817, "grad_norm": 0.9070940613746643, "learning_rate": 3.0168797086952188e-06, "loss": 0.0737, "step": 100010 }, { "epoch": 2.9579464127284556, "grad_norm": 0.8937065005302429, "learning_rate": 3.0167530187762795e-06, "loss": 0.0684, "step": 100020 }, { "epoch": 2.9582421482226295, "grad_norm": 0.6246180534362793, "learning_rate": 3.01662632885734e-06, "loss": 0.0631, "step": 100030 }, { "epoch": 2.9585378837168035, "grad_norm": 1.0150697231292725, "learning_rate": 3.0164996389384007e-06, "loss": 0.0783, "step": 100040 }, { "epoch": 2.958833619210978, "grad_norm": 0.7871407866477966, "learning_rate": 3.016372949019461e-06, "loss": 0.0823, "step": 100050 }, { "epoch": 2.959129354705152, "grad_norm": 0.6329100131988525, "learning_rate": 3.016246259100522e-06, "loss": 0.0805, "step": 100060 }, { "epoch": 2.9594250901993258, "grad_norm": 0.780755877494812, "learning_rate": 3.0161195691815822e-06, "loss": 0.0714, "step": 100070 }, { "epoch": 2.9597208256934997, "grad_norm": 0.6819784045219421, "learning_rate": 3.0159928792626434e-06, "loss": 0.0531, "step": 100080 }, { "epoch": 2.9600165611876736, "grad_norm": 0.6291922330856323, "learning_rate": 3.015866189343704e-06, "loss": 0.0828, "step": 100090 }, { "epoch": 2.960312296681848, "grad_norm": 1.495197057723999, "learning_rate": 3.0157394994247646e-06, "loss": 0.0853, "step": 100100 }, { "epoch": 2.9606080321760215, "grad_norm": 0.6628403067588806, "learning_rate": 3.015612809505825e-06, "loss": 0.0739, "step": 100110 }, { "epoch": 2.960903767670196, "grad_norm": 0.6458272337913513, "learning_rate": 3.0154861195868857e-06, "loss": 0.0594, "step": 100120 }, { "epoch": 2.96119950316437, "grad_norm": 0.7459774017333984, "learning_rate": 3.015359429667946e-06, "loss": 0.067, "step": 100130 }, { "epoch": 2.961495238658544, "grad_norm": 1.415237307548523, "learning_rate": 3.015232739749007e-06, "loss": 0.0959, "step": 100140 }, { "epoch": 2.9617909741527177, "grad_norm": 1.0803050994873047, "learning_rate": 3.0151060498300673e-06, "loss": 0.087, "step": 100150 }, { "epoch": 2.9620867096468917, "grad_norm": 0.6116913557052612, "learning_rate": 3.0149793599111285e-06, "loss": 0.073, "step": 100160 }, { "epoch": 2.962382445141066, "grad_norm": 0.5381507873535156, "learning_rate": 3.014852669992189e-06, "loss": 0.0725, "step": 100170 }, { "epoch": 2.9626781806352396, "grad_norm": 1.0252240896224976, "learning_rate": 3.0147259800732496e-06, "loss": 0.0813, "step": 100180 }, { "epoch": 2.962973916129414, "grad_norm": 0.5095444917678833, "learning_rate": 3.01459929015431e-06, "loss": 0.0759, "step": 100190 }, { "epoch": 2.963269651623588, "grad_norm": 1.1735997200012207, "learning_rate": 3.014472600235371e-06, "loss": 0.0956, "step": 100200 }, { "epoch": 2.963565387117762, "grad_norm": 0.55202317237854, "learning_rate": 3.014345910316431e-06, "loss": 0.0654, "step": 100210 }, { "epoch": 2.963861122611936, "grad_norm": 1.3215395212173462, "learning_rate": 3.014219220397492e-06, "loss": 0.0589, "step": 100220 }, { "epoch": 2.9641568581061097, "grad_norm": 1.1075109243392944, "learning_rate": 3.0140925304785523e-06, "loss": 0.0753, "step": 100230 }, { "epoch": 2.964452593600284, "grad_norm": 0.8836490511894226, "learning_rate": 3.013965840559613e-06, "loss": 0.0835, "step": 100240 }, { "epoch": 2.964748329094458, "grad_norm": 1.341802716255188, "learning_rate": 3.013839150640674e-06, "loss": 0.1153, "step": 100250 }, { "epoch": 2.965044064588632, "grad_norm": 0.7313161492347717, "learning_rate": 3.0137124607217343e-06, "loss": 0.0753, "step": 100260 }, { "epoch": 2.965339800082806, "grad_norm": 0.5343813896179199, "learning_rate": 3.013585770802795e-06, "loss": 0.0654, "step": 100270 }, { "epoch": 2.96563553557698, "grad_norm": 0.6416770815849304, "learning_rate": 3.0134590808838554e-06, "loss": 0.0637, "step": 100280 }, { "epoch": 2.965931271071154, "grad_norm": 0.9828344583511353, "learning_rate": 3.013332390964916e-06, "loss": 0.0828, "step": 100290 }, { "epoch": 2.966227006565328, "grad_norm": 0.6737270355224609, "learning_rate": 3.0132057010459766e-06, "loss": 0.0827, "step": 100300 }, { "epoch": 2.966522742059502, "grad_norm": 0.7686550617218018, "learning_rate": 3.0130790111270374e-06, "loss": 0.0899, "step": 100310 }, { "epoch": 2.966818477553676, "grad_norm": 0.6063393354415894, "learning_rate": 3.012952321208098e-06, "loss": 0.0578, "step": 100320 }, { "epoch": 2.96711421304785, "grad_norm": 0.5301980972290039, "learning_rate": 3.012825631289159e-06, "loss": 0.0697, "step": 100330 }, { "epoch": 2.967409948542024, "grad_norm": 0.7901214957237244, "learning_rate": 3.0126989413702193e-06, "loss": 0.0854, "step": 100340 }, { "epoch": 2.967705684036198, "grad_norm": 0.6219304800033569, "learning_rate": 3.01257225145128e-06, "loss": 0.087, "step": 100350 }, { "epoch": 2.968001419530372, "grad_norm": 0.9512356519699097, "learning_rate": 3.0124455615323405e-06, "loss": 0.0718, "step": 100360 }, { "epoch": 2.968297155024546, "grad_norm": 0.37762120366096497, "learning_rate": 3.0123188716134012e-06, "loss": 0.0704, "step": 100370 }, { "epoch": 2.96859289051872, "grad_norm": 0.8626106977462769, "learning_rate": 3.0121921816944616e-06, "loss": 0.0601, "step": 100380 }, { "epoch": 2.968888626012894, "grad_norm": 0.8269791603088379, "learning_rate": 3.0120654917755224e-06, "loss": 0.0933, "step": 100390 }, { "epoch": 2.969184361507068, "grad_norm": 0.8458095788955688, "learning_rate": 3.011938801856583e-06, "loss": 0.0854, "step": 100400 }, { "epoch": 2.969480097001242, "grad_norm": 1.2535043954849243, "learning_rate": 3.011812111937644e-06, "loss": 0.0966, "step": 100410 }, { "epoch": 2.969775832495416, "grad_norm": 0.8792778849601746, "learning_rate": 3.0116854220187043e-06, "loss": 0.0791, "step": 100420 }, { "epoch": 2.9700715679895904, "grad_norm": 0.7582218050956726, "learning_rate": 3.011558732099765e-06, "loss": 0.0596, "step": 100430 }, { "epoch": 2.970367303483764, "grad_norm": 0.7788811326026917, "learning_rate": 3.0114320421808255e-06, "loss": 0.0767, "step": 100440 }, { "epoch": 2.9706630389779383, "grad_norm": 1.1634325981140137, "learning_rate": 3.0113053522618863e-06, "loss": 0.0793, "step": 100450 }, { "epoch": 2.970958774472112, "grad_norm": 1.1336803436279297, "learning_rate": 3.0111786623429467e-06, "loss": 0.072, "step": 100460 }, { "epoch": 2.971254509966286, "grad_norm": 0.6575928330421448, "learning_rate": 3.0110519724240074e-06, "loss": 0.073, "step": 100470 }, { "epoch": 2.97155024546046, "grad_norm": 0.9364156723022461, "learning_rate": 3.0109252825050682e-06, "loss": 0.0649, "step": 100480 }, { "epoch": 2.971845980954634, "grad_norm": 0.9817273616790771, "learning_rate": 3.010798592586129e-06, "loss": 0.082, "step": 100490 }, { "epoch": 2.9721417164488084, "grad_norm": 0.5435211062431335, "learning_rate": 3.0106719026671894e-06, "loss": 0.077, "step": 100500 }, { "epoch": 2.972437451942982, "grad_norm": 0.7740097045898438, "learning_rate": 3.01054521274825e-06, "loss": 0.0874, "step": 100510 }, { "epoch": 2.9727331874371563, "grad_norm": 0.9270645976066589, "learning_rate": 3.0104185228293105e-06, "loss": 0.0627, "step": 100520 }, { "epoch": 2.9730289229313303, "grad_norm": 0.7048524618148804, "learning_rate": 3.0102918329103713e-06, "loss": 0.0754, "step": 100530 }, { "epoch": 2.973324658425504, "grad_norm": 0.9581345915794373, "learning_rate": 3.0101651429914317e-06, "loss": 0.104, "step": 100540 }, { "epoch": 2.973620393919678, "grad_norm": 0.6145033836364746, "learning_rate": 3.0100384530724925e-06, "loss": 0.0824, "step": 100550 }, { "epoch": 2.973916129413852, "grad_norm": 0.867401123046875, "learning_rate": 3.0099117631535533e-06, "loss": 0.0881, "step": 100560 }, { "epoch": 2.9742118649080265, "grad_norm": 0.35220250487327576, "learning_rate": 3.009785073234614e-06, "loss": 0.0691, "step": 100570 }, { "epoch": 2.9745076004022004, "grad_norm": 0.6987287402153015, "learning_rate": 3.0096583833156744e-06, "loss": 0.065, "step": 100580 }, { "epoch": 2.9748033358963744, "grad_norm": 0.7356123328208923, "learning_rate": 3.0095316933967352e-06, "loss": 0.0734, "step": 100590 }, { "epoch": 2.9750990713905483, "grad_norm": 0.8300260901451111, "learning_rate": 3.0094050034777956e-06, "loss": 0.0722, "step": 100600 }, { "epoch": 2.9753948068847222, "grad_norm": 0.7949405312538147, "learning_rate": 3.0092783135588564e-06, "loss": 0.0769, "step": 100610 }, { "epoch": 2.975690542378896, "grad_norm": 0.7487817406654358, "learning_rate": 3.0091516236399167e-06, "loss": 0.0623, "step": 100620 }, { "epoch": 2.97598627787307, "grad_norm": 0.6582891941070557, "learning_rate": 3.0090249337209775e-06, "loss": 0.0671, "step": 100630 }, { "epoch": 2.9762820133672445, "grad_norm": 0.8533763885498047, "learning_rate": 3.0088982438020383e-06, "loss": 0.0812, "step": 100640 }, { "epoch": 2.9765777488614185, "grad_norm": 0.6793770790100098, "learning_rate": 3.0087715538830987e-06, "loss": 0.0708, "step": 100650 }, { "epoch": 2.9768734843555924, "grad_norm": 0.5272619128227234, "learning_rate": 3.0086448639641595e-06, "loss": 0.0582, "step": 100660 }, { "epoch": 2.9771692198497663, "grad_norm": 0.6730108857154846, "learning_rate": 3.00851817404522e-06, "loss": 0.0763, "step": 100670 }, { "epoch": 2.9774649553439403, "grad_norm": 0.6594484448432922, "learning_rate": 3.0083914841262806e-06, "loss": 0.0647, "step": 100680 }, { "epoch": 2.9777606908381142, "grad_norm": 0.7336535453796387, "learning_rate": 3.008264794207341e-06, "loss": 0.0791, "step": 100690 }, { "epoch": 2.978056426332288, "grad_norm": 0.5972954630851746, "learning_rate": 3.008138104288402e-06, "loss": 0.0903, "step": 100700 }, { "epoch": 2.9783521618264626, "grad_norm": 0.8497734069824219, "learning_rate": 3.008011414369462e-06, "loss": 0.081, "step": 100710 }, { "epoch": 2.9786478973206365, "grad_norm": 1.206071376800537, "learning_rate": 3.0078847244505234e-06, "loss": 0.0825, "step": 100720 }, { "epoch": 2.9789436328148104, "grad_norm": 1.0375467538833618, "learning_rate": 3.0077580345315837e-06, "loss": 0.0649, "step": 100730 }, { "epoch": 2.9792393683089844, "grad_norm": 0.7188552021980286, "learning_rate": 3.0076313446126445e-06, "loss": 0.1091, "step": 100740 }, { "epoch": 2.9795351038031583, "grad_norm": 0.6581712961196899, "learning_rate": 3.007504654693705e-06, "loss": 0.0877, "step": 100750 }, { "epoch": 2.9798308392973327, "grad_norm": 0.5967571139335632, "learning_rate": 3.0073779647747657e-06, "loss": 0.0646, "step": 100760 }, { "epoch": 2.9801265747915062, "grad_norm": 0.5308001637458801, "learning_rate": 3.007251274855826e-06, "loss": 0.0692, "step": 100770 }, { "epoch": 2.9804223102856806, "grad_norm": 0.5660647749900818, "learning_rate": 3.007124584936887e-06, "loss": 0.0741, "step": 100780 }, { "epoch": 2.9807180457798546, "grad_norm": 0.47981536388397217, "learning_rate": 3.006997895017947e-06, "loss": 0.0849, "step": 100790 }, { "epoch": 2.9810137812740285, "grad_norm": 0.5425413250923157, "learning_rate": 3.0068712050990084e-06, "loss": 0.0775, "step": 100800 }, { "epoch": 2.9813095167682024, "grad_norm": 0.9643464684486389, "learning_rate": 3.0067445151800688e-06, "loss": 0.0848, "step": 100810 }, { "epoch": 2.9816052522623764, "grad_norm": 0.7568941116333008, "learning_rate": 3.0066178252611296e-06, "loss": 0.0797, "step": 100820 }, { "epoch": 2.9819009877565508, "grad_norm": 0.8282167911529541, "learning_rate": 3.00649113534219e-06, "loss": 0.069, "step": 100830 }, { "epoch": 2.9821967232507247, "grad_norm": 0.8010543584823608, "learning_rate": 3.0063644454232507e-06, "loss": 0.074, "step": 100840 }, { "epoch": 2.9824924587448987, "grad_norm": 1.5998331308364868, "learning_rate": 3.006237755504311e-06, "loss": 0.0769, "step": 100850 }, { "epoch": 2.9827881942390726, "grad_norm": 0.6074503064155579, "learning_rate": 3.006111065585372e-06, "loss": 0.0965, "step": 100860 }, { "epoch": 2.9830839297332465, "grad_norm": 0.48585882782936096, "learning_rate": 3.0059843756664323e-06, "loss": 0.0587, "step": 100870 }, { "epoch": 2.9833796652274205, "grad_norm": 0.9796658158302307, "learning_rate": 3.0058576857474935e-06, "loss": 0.0703, "step": 100880 }, { "epoch": 2.9836754007215944, "grad_norm": 0.6321825981140137, "learning_rate": 3.005730995828554e-06, "loss": 0.0907, "step": 100890 }, { "epoch": 2.983971136215769, "grad_norm": 0.883492648601532, "learning_rate": 3.0056043059096146e-06, "loss": 0.0903, "step": 100900 }, { "epoch": 2.9842668717099428, "grad_norm": 0.5448857545852661, "learning_rate": 3.005477615990675e-06, "loss": 0.0714, "step": 100910 }, { "epoch": 2.9845626072041167, "grad_norm": 0.39929458498954773, "learning_rate": 3.0053509260717358e-06, "loss": 0.0849, "step": 100920 }, { "epoch": 2.9848583426982906, "grad_norm": 0.6760478019714355, "learning_rate": 3.005224236152796e-06, "loss": 0.0727, "step": 100930 }, { "epoch": 2.9851540781924646, "grad_norm": 0.7971497774124146, "learning_rate": 3.005097546233857e-06, "loss": 0.075, "step": 100940 }, { "epoch": 2.9854498136866385, "grad_norm": 1.1002264022827148, "learning_rate": 3.0049708563149173e-06, "loss": 0.094, "step": 100950 }, { "epoch": 2.9857455491808125, "grad_norm": 1.449022889137268, "learning_rate": 3.0048441663959785e-06, "loss": 0.0827, "step": 100960 }, { "epoch": 2.986041284674987, "grad_norm": 0.6758941411972046, "learning_rate": 3.004717476477039e-06, "loss": 0.0728, "step": 100970 }, { "epoch": 2.986337020169161, "grad_norm": 0.6857956051826477, "learning_rate": 3.0045907865580997e-06, "loss": 0.0739, "step": 100980 }, { "epoch": 2.9866327556633347, "grad_norm": 1.081375241279602, "learning_rate": 3.00446409663916e-06, "loss": 0.0846, "step": 100990 }, { "epoch": 2.9869284911575087, "grad_norm": 0.6341068744659424, "learning_rate": 3.004337406720221e-06, "loss": 0.0766, "step": 101000 }, { "epoch": 2.9872242266516826, "grad_norm": 0.5774085521697998, "learning_rate": 3.004210716801281e-06, "loss": 0.0845, "step": 101010 }, { "epoch": 2.987519962145857, "grad_norm": 0.9845628142356873, "learning_rate": 3.004084026882342e-06, "loss": 0.087, "step": 101020 }, { "epoch": 2.9878156976400305, "grad_norm": 1.0598336458206177, "learning_rate": 3.0039573369634023e-06, "loss": 0.0884, "step": 101030 }, { "epoch": 2.988111433134205, "grad_norm": 0.8428500890731812, "learning_rate": 3.0038306470444636e-06, "loss": 0.0924, "step": 101040 }, { "epoch": 2.988407168628379, "grad_norm": 0.9112616181373596, "learning_rate": 3.003703957125524e-06, "loss": 0.0799, "step": 101050 }, { "epoch": 2.988702904122553, "grad_norm": 0.5447333455085754, "learning_rate": 3.0035772672065847e-06, "loss": 0.0677, "step": 101060 }, { "epoch": 2.9889986396167267, "grad_norm": 0.6210402846336365, "learning_rate": 3.003450577287645e-06, "loss": 0.0752, "step": 101070 }, { "epoch": 2.9892943751109007, "grad_norm": 0.7111345529556274, "learning_rate": 3.0033238873687054e-06, "loss": 0.0574, "step": 101080 }, { "epoch": 2.989590110605075, "grad_norm": 0.9267817735671997, "learning_rate": 3.0031971974497662e-06, "loss": 0.0886, "step": 101090 }, { "epoch": 2.9898858460992486, "grad_norm": 0.6724984049797058, "learning_rate": 3.0030705075308266e-06, "loss": 0.0794, "step": 101100 }, { "epoch": 2.990181581593423, "grad_norm": 0.9841680526733398, "learning_rate": 3.0029438176118874e-06, "loss": 0.0885, "step": 101110 }, { "epoch": 2.990477317087597, "grad_norm": 1.089798092842102, "learning_rate": 3.002817127692948e-06, "loss": 0.0705, "step": 101120 }, { "epoch": 2.990773052581771, "grad_norm": 1.0747672319412231, "learning_rate": 3.002690437774009e-06, "loss": 0.0737, "step": 101130 }, { "epoch": 2.991068788075945, "grad_norm": 0.8595035672187805, "learning_rate": 3.0025637478550693e-06, "loss": 0.0791, "step": 101140 }, { "epoch": 2.9913645235701187, "grad_norm": 0.6571161150932312, "learning_rate": 3.00243705793613e-06, "loss": 0.0685, "step": 101150 }, { "epoch": 2.991660259064293, "grad_norm": 0.7020344138145447, "learning_rate": 3.0023103680171905e-06, "loss": 0.0795, "step": 101160 }, { "epoch": 2.991955994558467, "grad_norm": 0.7482272386550903, "learning_rate": 3.0021836780982513e-06, "loss": 0.0688, "step": 101170 }, { "epoch": 2.992251730052641, "grad_norm": 0.9704676866531372, "learning_rate": 3.0020569881793116e-06, "loss": 0.0657, "step": 101180 }, { "epoch": 2.992547465546815, "grad_norm": 0.9299921989440918, "learning_rate": 3.0019302982603724e-06, "loss": 0.0866, "step": 101190 }, { "epoch": 2.992843201040989, "grad_norm": 1.0049519538879395, "learning_rate": 3.0018036083414332e-06, "loss": 0.0869, "step": 101200 }, { "epoch": 2.993138936535163, "grad_norm": 1.094904899597168, "learning_rate": 3.001676918422494e-06, "loss": 0.0849, "step": 101210 }, { "epoch": 2.9934346720293368, "grad_norm": 1.2169413566589355, "learning_rate": 3.0015502285035544e-06, "loss": 0.0757, "step": 101220 }, { "epoch": 2.993730407523511, "grad_norm": 0.6511644721031189, "learning_rate": 3.001423538584615e-06, "loss": 0.0673, "step": 101230 }, { "epoch": 2.994026143017685, "grad_norm": 0.7664244771003723, "learning_rate": 3.0012968486656755e-06, "loss": 0.0772, "step": 101240 }, { "epoch": 2.994321878511859, "grad_norm": 0.6436039805412292, "learning_rate": 3.0011701587467363e-06, "loss": 0.0703, "step": 101250 }, { "epoch": 2.994617614006033, "grad_norm": 1.3379088640213013, "learning_rate": 3.0010434688277967e-06, "loss": 0.0777, "step": 101260 }, { "epoch": 2.994913349500207, "grad_norm": 0.6159680485725403, "learning_rate": 3.0009167789088575e-06, "loss": 0.0739, "step": 101270 }, { "epoch": 2.995209084994381, "grad_norm": 0.3537009060382843, "learning_rate": 3.0007900889899183e-06, "loss": 0.0718, "step": 101280 }, { "epoch": 2.995504820488555, "grad_norm": 0.942847728729248, "learning_rate": 3.000663399070979e-06, "loss": 0.0843, "step": 101290 }, { "epoch": 2.995800555982729, "grad_norm": 0.6726488471031189, "learning_rate": 3.0005367091520394e-06, "loss": 0.0817, "step": 101300 }, { "epoch": 2.996096291476903, "grad_norm": 1.273807406425476, "learning_rate": 3.0004100192331002e-06, "loss": 0.0833, "step": 101310 }, { "epoch": 2.996392026971077, "grad_norm": 0.729710042476654, "learning_rate": 3.0002833293141606e-06, "loss": 0.0673, "step": 101320 }, { "epoch": 2.996687762465251, "grad_norm": 0.9196587800979614, "learning_rate": 3.0001566393952214e-06, "loss": 0.0728, "step": 101330 }, { "epoch": 2.996983497959425, "grad_norm": 0.5990824699401855, "learning_rate": 3.0000299494762817e-06, "loss": 0.1039, "step": 101340 }, { "epoch": 2.9972792334535994, "grad_norm": 0.760002613067627, "learning_rate": 2.9999032595573425e-06, "loss": 0.0824, "step": 101350 }, { "epoch": 2.997574968947773, "grad_norm": 0.6951223611831665, "learning_rate": 2.9997765696384033e-06, "loss": 0.0735, "step": 101360 }, { "epoch": 2.9978707044419473, "grad_norm": 1.3990528583526611, "learning_rate": 2.999649879719464e-06, "loss": 0.079, "step": 101370 }, { "epoch": 2.998166439936121, "grad_norm": 1.0250097513198853, "learning_rate": 2.9995231898005245e-06, "loss": 0.0748, "step": 101380 }, { "epoch": 2.998462175430295, "grad_norm": 0.7656370401382446, "learning_rate": 2.9993964998815853e-06, "loss": 0.0844, "step": 101390 }, { "epoch": 2.998757910924469, "grad_norm": 0.7063989043235779, "learning_rate": 2.9992698099626456e-06, "loss": 0.0895, "step": 101400 }, { "epoch": 2.999053646418643, "grad_norm": 0.9515435695648193, "learning_rate": 2.9991431200437064e-06, "loss": 0.0777, "step": 101410 }, { "epoch": 2.9993493819128174, "grad_norm": 1.0954900979995728, "learning_rate": 2.9990164301247668e-06, "loss": 0.0721, "step": 101420 }, { "epoch": 2.999645117406991, "grad_norm": 0.600613534450531, "learning_rate": 2.9988897402058276e-06, "loss": 0.0625, "step": 101430 }, { "epoch": 2.9999408529011653, "grad_norm": 0.6623870730400085, "learning_rate": 2.9987630502868884e-06, "loss": 0.056, "step": 101440 }, { "epoch": 3.0, "eval_accuracy": 0.6751505472934757, "eval_animal_abuse/accuracy": 0.994776591143494, "eval_animal_abuse/f1": 0.7687776141384389, "eval_animal_abuse/fpr": 0.002490492377074004, "eval_animal_abuse/precision": 0.7791044776119403, "eval_animal_abuse/recall": 0.7587209302325582, "eval_animal_abuse/threshold": 0.5114725828170776, "eval_child_abuse/accuracy": 0.9965399075090661, "eval_child_abuse/f1": 0.6876876876876877, "eval_child_abuse/fpr": 0.0017396831769291216, "eval_child_abuse/precision": 0.6876876876876877, "eval_child_abuse/recall": 0.6876876876876877, "eval_child_abuse/threshold": 0.4428774416446686, "eval_controversial_topics,politics/accuracy": 0.9660977476128689, "eval_controversial_topics,politics/f1": 0.5200188412623645, "eval_controversial_topics,politics/fpr": 0.022309170785282773, "eval_controversial_topics,politics/precision": 0.45923460898502494, "eval_controversial_topics,politics/recall": 0.5993485342019544, "eval_controversial_topics,politics/threshold": 0.24798741936683655, "eval_discrimination,stereotype,injustice/accuracy": 0.9541038693149683, "eval_discrimination,stereotype,injustice/f1": 0.72190303396835, "eval_discrimination,stereotype,injustice/fpr": 0.028192431416488906, "eval_discrimination,stereotype,injustice/precision": 0.6965570900602995, "eval_discrimination,stereotype,injustice/recall": 0.749163179916318, "eval_discrimination,stereotype,injustice/threshold": 0.3674972355365753, "eval_drug_abuse,weapons,banned_substance/accuracy": 0.9738330505373124, "eval_drug_abuse,weapons,banned_substance/f1": 0.7760216431724334, "eval_drug_abuse,weapons,banned_substance/fpr": 0.016076716965167083, "eval_drug_abuse,weapons,banned_substance/precision": 0.7492438823205939, "eval_drug_abuse,weapons,banned_substance/recall": 0.8047844063792085, "eval_drug_abuse,weapons,banned_substance/threshold": 0.4573790431022644, "eval_financial_crime,property_crime,theft/accuracy": 0.9607911634561, "eval_financial_crime,property_crime,theft/f1": 0.804609135372627, "eval_financial_crime,property_crime,theft/fpr": 0.025044689751764507, "eval_financial_crime,property_crime,theft/precision": 0.7812298776561494, "eval_financial_crime,property_crime,theft/recall": 0.8294308665185438, "eval_financial_crime,property_crime,theft/threshold": 0.507415235042572, "eval_flagged/accuracy": 0.8543933193598829, "eval_flagged/aucpr": 0.9033587178990591, "eval_flagged/f1": 0.8712623729611272, "eval_flagged/fpr": 0.18456287739564123, "eval_flagged/precision": 0.857527504342791, "eval_flagged/recall": 0.8854443813338914, "eval_hate_speech,offensive_language/accuracy": 0.9492464317796188, "eval_hate_speech,offensive_language/f1": 0.7024285574953673, "eval_hate_speech,offensive_language/fpr": 0.023168280650465882, "eval_hate_speech,offensive_language/precision": 0.7395769151776546, "eval_hate_speech,offensive_language/recall": 0.6688335809806835, "eval_hate_speech,offensive_language/threshold": 0.45447221398353577, "eval_loss": 0.080803781747818, "eval_macro_f1": 0.6790032184713165, "eval_macro_precision": 0.6705162164664411, "eval_macro_recall": 0.6956175841118866, "eval_micro_f1": 0.7534948048423946, "eval_micro_precision": 0.7420177918764224, "eval_micro_recall": 0.7653324323843733, "eval_misinformation_regarding_ethics,laws_and_safety/accuracy": 0.9774095884486143, "eval_misinformation_regarding_ethics,laws_and_safety/f1": 0.25792349726775954, "eval_misinformation_regarding_ethics,laws_and_safety/fpr": 0.014532778741390608, "eval_misinformation_regarding_ethics,laws_and_safety/precision": 0.21474067333939945, "eval_misinformation_regarding_ethics,laws_and_safety/recall": 0.3228454172366621, "eval_misinformation_regarding_ethics,laws_and_safety/threshold": 0.08269734680652618, "eval_non_violent_unethical_behavior/accuracy": 0.885184815517184, "eval_non_violent_unethical_behavior/f1": 0.7023203657379453, "eval_non_violent_unethical_behavior/fpr": 0.0643165587111774, "eval_non_violent_unethical_behavior/precision": 0.7243772241992883, "eval_non_violent_unethical_behavior/recall": 0.6815670517327976, "eval_non_violent_unethical_behavior/threshold": 0.3993430435657501, "eval_privacy_violation/accuracy": 0.9810193964800213, "eval_privacy_violation/f1": 0.8114984305303156, "eval_privacy_violation/fpr": 0.01104150626443618, "eval_privacy_violation/precision": 0.7955944282474895, "eval_privacy_violation/recall": 0.8280512474713418, "eval_privacy_violation/threshold": 0.42823341488838196, "eval_runtime": 49.4947, "eval_samples_per_second": 1214.553, "eval_self_harm/accuracy": 0.9969890541304854, "eval_self_harm/f1": 0.7475592747559274, "eval_self_harm/fpr": 0.0006532225646522834, "eval_self_harm/precision": 0.8729641693811075, "eval_self_harm/recall": 0.6536585365853659, "eval_self_harm/threshold": 0.7592254281044006, "eval_sexually_explicit,adult_content/accuracy": 0.9836976411484846, "eval_sexually_explicit,adult_content/f1": 0.678477690288714, "eval_sexually_explicit,adult_content/fpr": 0.009664717814103312, "eval_sexually_explicit,adult_content/precision": 0.6458463460337289, "eval_sexually_explicit,adult_content/recall": 0.7145818935729095, "eval_sexually_explicit,adult_content/threshold": 0.4172714352607727, "eval_steps_per_second": 18.992, "eval_terrorism,organized_crime/accuracy": 0.9898359783078817, "eval_terrorism,organized_crime/f1": 0.47007805724197743, "eval_terrorism,organized_crime/fpr": 0.0067244646420605925, "eval_terrorism,organized_crime/precision": 0.40327380952380953, "eval_terrorism,organized_crime/recall": 0.5634095634095634, "eval_terrorism,organized_crime/threshold": 0.37570643424987793, "eval_violence,aiding_and_abetting,incitement/accuracy": 0.9220148384735669, "eval_violence,aiding_and_abetting,incitement/f1": 0.8567412296785234, "eval_violence,aiding_and_abetting,incitement/fpr": 0.061511264221929964, "eval_violence,aiding_and_abetting,incitement/precision": 0.8377958403060005, "eval_violence,aiding_and_abetting,incitement/recall": 0.8765632816408204, "eval_violence,aiding_and_abetting,incitement/threshold": 0.5049131512641907, "step": 101442 }, { "epoch": 3.0002365883953392, "grad_norm": 0.9589141607284546, "learning_rate": 2.998636360367949e-06, "loss": 0.0885, "step": 101450 }, { "epoch": 3.000532323889513, "grad_norm": 0.8400146961212158, "learning_rate": 2.9985096704490095e-06, "loss": 0.0792, "step": 101460 }, { "epoch": 3.000828059383687, "grad_norm": 0.7299308776855469, "learning_rate": 2.9983829805300703e-06, "loss": 0.0634, "step": 101470 }, { "epoch": 3.001123794877861, "grad_norm": 0.6617385149002075, "learning_rate": 2.9982562906111307e-06, "loss": 0.0555, "step": 101480 }, { "epoch": 3.0014195303720355, "grad_norm": 0.8356468677520752, "learning_rate": 2.998129600692191e-06, "loss": 0.0736, "step": 101490 }, { "epoch": 3.0017152658662094, "grad_norm": 0.95450758934021, "learning_rate": 2.998002910773252e-06, "loss": 0.083, "step": 101500 }, { "epoch": 3.0020110013603833, "grad_norm": 1.0911900997161865, "learning_rate": 2.997876220854312e-06, "loss": 0.0674, "step": 101510 }, { "epoch": 3.0023067368545573, "grad_norm": 0.8657729029655457, "learning_rate": 2.9977495309353734e-06, "loss": 0.0697, "step": 101520 }, { "epoch": 3.0026024723487312, "grad_norm": 0.8506889939308167, "learning_rate": 2.9976228410164338e-06, "loss": 0.0795, "step": 101530 }, { "epoch": 3.002898207842905, "grad_norm": 0.8700278401374817, "learning_rate": 2.9974961510974946e-06, "loss": 0.0619, "step": 101540 }, { "epoch": 3.003193943337079, "grad_norm": 0.8902119398117065, "learning_rate": 2.997369461178555e-06, "loss": 0.0729, "step": 101550 }, { "epoch": 3.0034896788312535, "grad_norm": 0.8369147181510925, "learning_rate": 2.9972427712596157e-06, "loss": 0.0796, "step": 101560 }, { "epoch": 3.0037854143254274, "grad_norm": 0.7911316752433777, "learning_rate": 2.997116081340676e-06, "loss": 0.0857, "step": 101570 }, { "epoch": 3.0040811498196014, "grad_norm": 0.8088401556015015, "learning_rate": 2.996989391421737e-06, "loss": 0.071, "step": 101580 }, { "epoch": 3.0043768853137753, "grad_norm": 0.3780277967453003, "learning_rate": 2.9968627015027972e-06, "loss": 0.0621, "step": 101590 }, { "epoch": 3.0046726208079493, "grad_norm": 0.9393802285194397, "learning_rate": 2.9967360115838584e-06, "loss": 0.0781, "step": 101600 }, { "epoch": 3.0049683563021232, "grad_norm": 0.8402996063232422, "learning_rate": 2.996609321664919e-06, "loss": 0.0934, "step": 101610 }, { "epoch": 3.0052640917962976, "grad_norm": 0.6149714589118958, "learning_rate": 2.9964826317459796e-06, "loss": 0.0629, "step": 101620 }, { "epoch": 3.0055598272904716, "grad_norm": 0.6523768901824951, "learning_rate": 2.99635594182704e-06, "loss": 0.0645, "step": 101630 }, { "epoch": 3.0058555627846455, "grad_norm": 0.6318656802177429, "learning_rate": 2.9962292519081008e-06, "loss": 0.0517, "step": 101640 }, { "epoch": 3.0061512982788194, "grad_norm": 0.9412868618965149, "learning_rate": 2.996102561989161e-06, "loss": 0.0846, "step": 101650 }, { "epoch": 3.0064470337729934, "grad_norm": 1.032657265663147, "learning_rate": 2.995975872070222e-06, "loss": 0.069, "step": 101660 }, { "epoch": 3.0067427692671673, "grad_norm": 0.6426252126693726, "learning_rate": 2.9958491821512823e-06, "loss": 0.0615, "step": 101670 }, { "epoch": 3.0070385047613413, "grad_norm": 1.296656847000122, "learning_rate": 2.9957224922323435e-06, "loss": 0.057, "step": 101680 }, { "epoch": 3.0073342402555157, "grad_norm": 0.5464335680007935, "learning_rate": 2.995595802313404e-06, "loss": 0.0605, "step": 101690 }, { "epoch": 3.0076299757496896, "grad_norm": 0.8717021346092224, "learning_rate": 2.9954691123944646e-06, "loss": 0.0794, "step": 101700 }, { "epoch": 3.0079257112438635, "grad_norm": 0.8738321661949158, "learning_rate": 2.995342422475525e-06, "loss": 0.0847, "step": 101710 }, { "epoch": 3.0082214467380375, "grad_norm": 0.5183880925178528, "learning_rate": 2.995215732556586e-06, "loss": 0.0644, "step": 101720 }, { "epoch": 3.0085171822322114, "grad_norm": 0.8130860328674316, "learning_rate": 2.995089042637646e-06, "loss": 0.079, "step": 101730 }, { "epoch": 3.0088129177263854, "grad_norm": 0.7018712162971497, "learning_rate": 2.994962352718707e-06, "loss": 0.0548, "step": 101740 }, { "epoch": 3.0091086532205593, "grad_norm": 1.0595753192901611, "learning_rate": 2.9948356627997673e-06, "loss": 0.076, "step": 101750 }, { "epoch": 3.0094043887147337, "grad_norm": 0.48738205432891846, "learning_rate": 2.9947089728808285e-06, "loss": 0.0812, "step": 101760 }, { "epoch": 3.0097001242089076, "grad_norm": 0.820197343826294, "learning_rate": 2.994582282961889e-06, "loss": 0.0766, "step": 101770 }, { "epoch": 3.0099958597030816, "grad_norm": 0.7983548641204834, "learning_rate": 2.9944555930429497e-06, "loss": 0.0672, "step": 101780 }, { "epoch": 3.0102915951972555, "grad_norm": 1.271565556526184, "learning_rate": 2.99432890312401e-06, "loss": 0.0636, "step": 101790 }, { "epoch": 3.0105873306914295, "grad_norm": 0.4139474034309387, "learning_rate": 2.994202213205071e-06, "loss": 0.0784, "step": 101800 }, { "epoch": 3.0108830661856034, "grad_norm": 0.5834382772445679, "learning_rate": 2.9940755232861312e-06, "loss": 0.0739, "step": 101810 }, { "epoch": 3.011178801679778, "grad_norm": 0.6044744849205017, "learning_rate": 2.993948833367192e-06, "loss": 0.0743, "step": 101820 }, { "epoch": 3.0114745371739517, "grad_norm": 0.7226553559303284, "learning_rate": 2.9938221434482524e-06, "loss": 0.0726, "step": 101830 }, { "epoch": 3.0117702726681257, "grad_norm": 0.6019631028175354, "learning_rate": 2.9936954535293136e-06, "loss": 0.0526, "step": 101840 }, { "epoch": 3.0120660081622996, "grad_norm": 0.7752636075019836, "learning_rate": 2.993568763610374e-06, "loss": 0.0749, "step": 101850 }, { "epoch": 3.0123617436564736, "grad_norm": 0.4532296061515808, "learning_rate": 2.9934420736914347e-06, "loss": 0.0778, "step": 101860 }, { "epoch": 3.0126574791506475, "grad_norm": 0.6233338713645935, "learning_rate": 2.993315383772495e-06, "loss": 0.0758, "step": 101870 }, { "epoch": 3.0129532146448215, "grad_norm": 0.7866412997245789, "learning_rate": 2.993188693853556e-06, "loss": 0.0669, "step": 101880 }, { "epoch": 3.013248950138996, "grad_norm": 0.8490892648696899, "learning_rate": 2.9930620039346163e-06, "loss": 0.0554, "step": 101890 }, { "epoch": 3.01354468563317, "grad_norm": 0.7962298393249512, "learning_rate": 2.9929353140156766e-06, "loss": 0.0685, "step": 101900 }, { "epoch": 3.0138404211273437, "grad_norm": 0.938938558101654, "learning_rate": 2.9928086240967374e-06, "loss": 0.0792, "step": 101910 }, { "epoch": 3.0141361566215177, "grad_norm": 0.714079737663269, "learning_rate": 2.992681934177798e-06, "loss": 0.0778, "step": 101920 }, { "epoch": 3.0144318921156916, "grad_norm": 0.6465883255004883, "learning_rate": 2.992555244258859e-06, "loss": 0.0714, "step": 101930 }, { "epoch": 3.0147276276098656, "grad_norm": 0.5489195585250854, "learning_rate": 2.9924285543399194e-06, "loss": 0.0624, "step": 101940 }, { "epoch": 3.01502336310404, "grad_norm": 0.8522346019744873, "learning_rate": 2.99230186442098e-06, "loss": 0.0959, "step": 101950 }, { "epoch": 3.015319098598214, "grad_norm": 0.7189643979072571, "learning_rate": 2.9921751745020405e-06, "loss": 0.0704, "step": 101960 }, { "epoch": 3.015614834092388, "grad_norm": 0.6328274607658386, "learning_rate": 2.9920484845831013e-06, "loss": 0.074, "step": 101970 }, { "epoch": 3.015910569586562, "grad_norm": 0.5494099259376526, "learning_rate": 2.9919217946641617e-06, "loss": 0.0741, "step": 101980 }, { "epoch": 3.0162063050807357, "grad_norm": 0.5544125437736511, "learning_rate": 2.9917951047452225e-06, "loss": 0.06, "step": 101990 }, { "epoch": 3.0165020405749097, "grad_norm": 0.8263944983482361, "learning_rate": 2.9916684148262833e-06, "loss": 0.087, "step": 102000 }, { "epoch": 3.0167977760690836, "grad_norm": 1.1393098831176758, "learning_rate": 2.991541724907344e-06, "loss": 0.0869, "step": 102010 }, { "epoch": 3.017093511563258, "grad_norm": 0.7191615104675293, "learning_rate": 2.9914150349884044e-06, "loss": 0.0647, "step": 102020 }, { "epoch": 3.017389247057432, "grad_norm": 0.7299771308898926, "learning_rate": 2.991288345069465e-06, "loss": 0.0762, "step": 102030 }, { "epoch": 3.017684982551606, "grad_norm": 0.601435124874115, "learning_rate": 2.9911616551505256e-06, "loss": 0.0579, "step": 102040 }, { "epoch": 3.01798071804578, "grad_norm": 0.7098234295845032, "learning_rate": 2.9910349652315864e-06, "loss": 0.0757, "step": 102050 }, { "epoch": 3.0182764535399538, "grad_norm": 1.3258229494094849, "learning_rate": 2.9909082753126467e-06, "loss": 0.0801, "step": 102060 }, { "epoch": 3.0185721890341277, "grad_norm": 0.7004856467247009, "learning_rate": 2.9907815853937075e-06, "loss": 0.0625, "step": 102070 }, { "epoch": 3.018867924528302, "grad_norm": 0.8789544105529785, "learning_rate": 2.9906548954747683e-06, "loss": 0.0657, "step": 102080 }, { "epoch": 3.019163660022476, "grad_norm": 0.9561577439308167, "learning_rate": 2.990528205555829e-06, "loss": 0.0562, "step": 102090 }, { "epoch": 3.01945939551665, "grad_norm": 0.9655889868736267, "learning_rate": 2.9904015156368895e-06, "loss": 0.0844, "step": 102100 }, { "epoch": 3.019755131010824, "grad_norm": 0.8051304221153259, "learning_rate": 2.9902748257179502e-06, "loss": 0.0971, "step": 102110 }, { "epoch": 3.020050866504998, "grad_norm": 0.5606247782707214, "learning_rate": 2.9901481357990106e-06, "loss": 0.0772, "step": 102120 }, { "epoch": 3.020346601999172, "grad_norm": 0.7624753713607788, "learning_rate": 2.9900214458800714e-06, "loss": 0.0703, "step": 102130 }, { "epoch": 3.0206423374933458, "grad_norm": 0.6419124603271484, "learning_rate": 2.9898947559611318e-06, "loss": 0.0522, "step": 102140 }, { "epoch": 3.02093807298752, "grad_norm": 0.7173855900764465, "learning_rate": 2.9897680660421926e-06, "loss": 0.0683, "step": 102150 }, { "epoch": 3.021233808481694, "grad_norm": 0.9659842848777771, "learning_rate": 2.9896413761232533e-06, "loss": 0.0788, "step": 102160 }, { "epoch": 3.021529543975868, "grad_norm": 1.0792688131332397, "learning_rate": 2.989514686204314e-06, "loss": 0.0728, "step": 102170 }, { "epoch": 3.021825279470042, "grad_norm": 0.9317246675491333, "learning_rate": 2.9893879962853745e-06, "loss": 0.0771, "step": 102180 }, { "epoch": 3.022121014964216, "grad_norm": 0.5038214921951294, "learning_rate": 2.9892613063664353e-06, "loss": 0.0691, "step": 102190 }, { "epoch": 3.02241675045839, "grad_norm": 0.8804773688316345, "learning_rate": 2.9891346164474957e-06, "loss": 0.0723, "step": 102200 }, { "epoch": 3.022712485952564, "grad_norm": 0.7650586366653442, "learning_rate": 2.9890079265285564e-06, "loss": 0.0778, "step": 102210 }, { "epoch": 3.023008221446738, "grad_norm": 0.4267091155052185, "learning_rate": 2.988881236609617e-06, "loss": 0.0622, "step": 102220 }, { "epoch": 3.023303956940912, "grad_norm": 0.7290276885032654, "learning_rate": 2.9887545466906776e-06, "loss": 0.066, "step": 102230 }, { "epoch": 3.023599692435086, "grad_norm": 1.1212610006332397, "learning_rate": 2.9886278567717384e-06, "loss": 0.0678, "step": 102240 }, { "epoch": 3.02389542792926, "grad_norm": 0.9929481744766235, "learning_rate": 2.988501166852799e-06, "loss": 0.0859, "step": 102250 }, { "epoch": 3.024191163423434, "grad_norm": 0.8576931953430176, "learning_rate": 2.9883744769338595e-06, "loss": 0.0826, "step": 102260 }, { "epoch": 3.024486898917608, "grad_norm": 0.7616984248161316, "learning_rate": 2.9882477870149203e-06, "loss": 0.0797, "step": 102270 }, { "epoch": 3.0247826344117823, "grad_norm": 0.49117887020111084, "learning_rate": 2.9881210970959807e-06, "loss": 0.0561, "step": 102280 }, { "epoch": 3.0250783699059562, "grad_norm": 0.7121790647506714, "learning_rate": 2.9879944071770415e-06, "loss": 0.0781, "step": 102290 }, { "epoch": 3.02537410540013, "grad_norm": 1.263270616531372, "learning_rate": 2.987867717258102e-06, "loss": 0.0658, "step": 102300 }, { "epoch": 3.025669840894304, "grad_norm": 0.703768253326416, "learning_rate": 2.9877410273391622e-06, "loss": 0.0826, "step": 102310 }, { "epoch": 3.025965576388478, "grad_norm": 1.0406144857406616, "learning_rate": 2.9876143374202234e-06, "loss": 0.0808, "step": 102320 }, { "epoch": 3.026261311882652, "grad_norm": 0.6786102056503296, "learning_rate": 2.987487647501284e-06, "loss": 0.07, "step": 102330 }, { "epoch": 3.026557047376826, "grad_norm": 0.5025174021720886, "learning_rate": 2.9873609575823446e-06, "loss": 0.0495, "step": 102340 }, { "epoch": 3.0268527828710003, "grad_norm": 1.0294251441955566, "learning_rate": 2.987234267663405e-06, "loss": 0.0801, "step": 102350 }, { "epoch": 3.0271485183651743, "grad_norm": 0.954036295413971, "learning_rate": 2.9871075777444657e-06, "loss": 0.082, "step": 102360 }, { "epoch": 3.0274442538593482, "grad_norm": 0.861696720123291, "learning_rate": 2.986980887825526e-06, "loss": 0.084, "step": 102370 }, { "epoch": 3.027739989353522, "grad_norm": 0.44871601462364197, "learning_rate": 2.986854197906587e-06, "loss": 0.069, "step": 102380 }, { "epoch": 3.028035724847696, "grad_norm": 0.5700882077217102, "learning_rate": 2.9867275079876473e-06, "loss": 0.0509, "step": 102390 }, { "epoch": 3.02833146034187, "grad_norm": 0.9796914458274841, "learning_rate": 2.9866008180687085e-06, "loss": 0.0681, "step": 102400 }, { "epoch": 3.0286271958360445, "grad_norm": 0.9851585626602173, "learning_rate": 2.986474128149769e-06, "loss": 0.0836, "step": 102410 }, { "epoch": 3.0289229313302184, "grad_norm": 0.7237773537635803, "learning_rate": 2.9863474382308296e-06, "loss": 0.0712, "step": 102420 }, { "epoch": 3.0292186668243923, "grad_norm": 0.6282045245170593, "learning_rate": 2.98622074831189e-06, "loss": 0.0712, "step": 102430 }, { "epoch": 3.0295144023185663, "grad_norm": 0.4790777862071991, "learning_rate": 2.986094058392951e-06, "loss": 0.0521, "step": 102440 }, { "epoch": 3.0298101378127402, "grad_norm": 0.8918161988258362, "learning_rate": 2.985967368474011e-06, "loss": 0.0905, "step": 102450 }, { "epoch": 3.030105873306914, "grad_norm": 0.7245776057243347, "learning_rate": 2.985840678555072e-06, "loss": 0.0793, "step": 102460 }, { "epoch": 3.030401608801088, "grad_norm": 0.7671898603439331, "learning_rate": 2.9857139886361323e-06, "loss": 0.0688, "step": 102470 }, { "epoch": 3.0306973442952625, "grad_norm": 0.7285618782043457, "learning_rate": 2.9855872987171935e-06, "loss": 0.0737, "step": 102480 }, { "epoch": 3.0309930797894364, "grad_norm": 0.9190587997436523, "learning_rate": 2.985460608798254e-06, "loss": 0.0718, "step": 102490 }, { "epoch": 3.0312888152836104, "grad_norm": 0.7922295928001404, "learning_rate": 2.9853339188793147e-06, "loss": 0.0928, "step": 102500 }, { "epoch": 3.0315845507777843, "grad_norm": 1.1343716382980347, "learning_rate": 2.985207228960375e-06, "loss": 0.0837, "step": 102510 }, { "epoch": 3.0318802862719583, "grad_norm": 0.5356486439704895, "learning_rate": 2.985080539041436e-06, "loss": 0.0703, "step": 102520 }, { "epoch": 3.032176021766132, "grad_norm": 0.5236185193061829, "learning_rate": 2.984953849122496e-06, "loss": 0.0646, "step": 102530 }, { "epoch": 3.0324717572603066, "grad_norm": 1.05244779586792, "learning_rate": 2.984827159203557e-06, "loss": 0.0834, "step": 102540 }, { "epoch": 3.0327674927544805, "grad_norm": 1.2762959003448486, "learning_rate": 2.9847004692846174e-06, "loss": 0.095, "step": 102550 }, { "epoch": 3.0330632282486545, "grad_norm": 1.168958067893982, "learning_rate": 2.9845737793656786e-06, "loss": 0.0822, "step": 102560 }, { "epoch": 3.0333589637428284, "grad_norm": 0.8181138038635254, "learning_rate": 2.984447089446739e-06, "loss": 0.0617, "step": 102570 }, { "epoch": 3.0336546992370024, "grad_norm": 0.46289539337158203, "learning_rate": 2.9843203995277997e-06, "loss": 0.0544, "step": 102580 }, { "epoch": 3.0339504347311763, "grad_norm": 1.2217084169387817, "learning_rate": 2.98419370960886e-06, "loss": 0.0667, "step": 102590 }, { "epoch": 3.0342461702253503, "grad_norm": 0.8539205193519592, "learning_rate": 2.984067019689921e-06, "loss": 0.0866, "step": 102600 }, { "epoch": 3.0345419057195246, "grad_norm": 0.7761197686195374, "learning_rate": 2.9839403297709812e-06, "loss": 0.0779, "step": 102610 }, { "epoch": 3.0348376412136986, "grad_norm": 1.9071050882339478, "learning_rate": 2.983813639852042e-06, "loss": 0.077, "step": 102620 }, { "epoch": 3.0351333767078725, "grad_norm": 0.8887467980384827, "learning_rate": 2.9836869499331024e-06, "loss": 0.0808, "step": 102630 }, { "epoch": 3.0354291122020465, "grad_norm": 0.42987990379333496, "learning_rate": 2.9835602600141636e-06, "loss": 0.0582, "step": 102640 }, { "epoch": 3.0357248476962204, "grad_norm": 1.0493295192718506, "learning_rate": 2.983433570095224e-06, "loss": 0.0803, "step": 102650 }, { "epoch": 3.0360205831903944, "grad_norm": 0.6632997989654541, "learning_rate": 2.9833068801762848e-06, "loss": 0.0714, "step": 102660 }, { "epoch": 3.0363163186845683, "grad_norm": 0.6988099813461304, "learning_rate": 2.983180190257345e-06, "loss": 0.0642, "step": 102670 }, { "epoch": 3.0366120541787427, "grad_norm": 0.6890956163406372, "learning_rate": 2.983053500338406e-06, "loss": 0.0587, "step": 102680 }, { "epoch": 3.0369077896729166, "grad_norm": 0.9549149870872498, "learning_rate": 2.9829268104194663e-06, "loss": 0.0731, "step": 102690 }, { "epoch": 3.0372035251670906, "grad_norm": 0.8197628259658813, "learning_rate": 2.982800120500527e-06, "loss": 0.0729, "step": 102700 }, { "epoch": 3.0374992606612645, "grad_norm": 1.156227469444275, "learning_rate": 2.9826734305815874e-06, "loss": 0.0681, "step": 102710 }, { "epoch": 3.0377949961554385, "grad_norm": 1.5398941040039062, "learning_rate": 2.9825467406626482e-06, "loss": 0.0892, "step": 102720 }, { "epoch": 3.0380907316496124, "grad_norm": 0.6571569442749023, "learning_rate": 2.982420050743709e-06, "loss": 0.0781, "step": 102730 }, { "epoch": 3.038386467143787, "grad_norm": 0.6747809052467346, "learning_rate": 2.9822933608247694e-06, "loss": 0.0614, "step": 102740 }, { "epoch": 3.0386822026379607, "grad_norm": 0.8941850662231445, "learning_rate": 2.98216667090583e-06, "loss": 0.0574, "step": 102750 }, { "epoch": 3.0389779381321347, "grad_norm": 1.2416943311691284, "learning_rate": 2.9820399809868905e-06, "loss": 0.0865, "step": 102760 }, { "epoch": 3.0392736736263086, "grad_norm": 0.5283985137939453, "learning_rate": 2.9819132910679513e-06, "loss": 0.071, "step": 102770 }, { "epoch": 3.0395694091204826, "grad_norm": 1.2468938827514648, "learning_rate": 2.9817866011490117e-06, "loss": 0.0772, "step": 102780 }, { "epoch": 3.0398651446146565, "grad_norm": 0.8477365970611572, "learning_rate": 2.9816599112300725e-06, "loss": 0.0751, "step": 102790 }, { "epoch": 3.0401608801088305, "grad_norm": 0.8702495098114014, "learning_rate": 2.9815332213111333e-06, "loss": 0.0808, "step": 102800 }, { "epoch": 3.040456615603005, "grad_norm": 0.8918529748916626, "learning_rate": 2.981406531392194e-06, "loss": 0.0761, "step": 102810 }, { "epoch": 3.040752351097179, "grad_norm": 0.6163540482521057, "learning_rate": 2.9812798414732544e-06, "loss": 0.073, "step": 102820 }, { "epoch": 3.0410480865913527, "grad_norm": 0.9464666247367859, "learning_rate": 2.9811531515543152e-06, "loss": 0.0576, "step": 102830 }, { "epoch": 3.0413438220855267, "grad_norm": 0.982174277305603, "learning_rate": 2.9810264616353756e-06, "loss": 0.0657, "step": 102840 }, { "epoch": 3.0416395575797006, "grad_norm": 1.1218441724777222, "learning_rate": 2.9808997717164364e-06, "loss": 0.0909, "step": 102850 }, { "epoch": 3.0419352930738746, "grad_norm": 1.0157389640808105, "learning_rate": 2.9807730817974967e-06, "loss": 0.0712, "step": 102860 }, { "epoch": 3.042231028568049, "grad_norm": 0.8169978857040405, "learning_rate": 2.9806463918785575e-06, "loss": 0.087, "step": 102870 }, { "epoch": 3.042526764062223, "grad_norm": 0.8713991641998291, "learning_rate": 2.9805197019596183e-06, "loss": 0.0734, "step": 102880 }, { "epoch": 3.042822499556397, "grad_norm": 0.49228861927986145, "learning_rate": 2.980393012040679e-06, "loss": 0.0715, "step": 102890 }, { "epoch": 3.0431182350505708, "grad_norm": 0.8094732761383057, "learning_rate": 2.9802663221217395e-06, "loss": 0.0657, "step": 102900 }, { "epoch": 3.0434139705447447, "grad_norm": 0.9205501079559326, "learning_rate": 2.9801396322028003e-06, "loss": 0.0908, "step": 102910 }, { "epoch": 3.0437097060389187, "grad_norm": 0.4353807866573334, "learning_rate": 2.9800129422838606e-06, "loss": 0.068, "step": 102920 }, { "epoch": 3.0440054415330926, "grad_norm": 0.6687650084495544, "learning_rate": 2.9798862523649214e-06, "loss": 0.0636, "step": 102930 }, { "epoch": 3.044301177027267, "grad_norm": 0.6888607740402222, "learning_rate": 2.979759562445982e-06, "loss": 0.0753, "step": 102940 }, { "epoch": 3.044596912521441, "grad_norm": 0.6542914509773254, "learning_rate": 2.9796328725270426e-06, "loss": 0.0721, "step": 102950 }, { "epoch": 3.044892648015615, "grad_norm": 0.43096837401390076, "learning_rate": 2.9795061826081034e-06, "loss": 0.0733, "step": 102960 }, { "epoch": 3.045188383509789, "grad_norm": 0.6035621762275696, "learning_rate": 2.979379492689164e-06, "loss": 0.0638, "step": 102970 }, { "epoch": 3.0454841190039628, "grad_norm": 0.8537949323654175, "learning_rate": 2.9792528027702245e-06, "loss": 0.0702, "step": 102980 }, { "epoch": 3.0457798544981367, "grad_norm": 0.45655497908592224, "learning_rate": 2.9791261128512853e-06, "loss": 0.0594, "step": 102990 }, { "epoch": 3.046075589992311, "grad_norm": 0.7227585911750793, "learning_rate": 2.9789994229323457e-06, "loss": 0.0793, "step": 103000 }, { "epoch": 3.046371325486485, "grad_norm": 1.1898808479309082, "learning_rate": 2.9788727330134065e-06, "loss": 0.0813, "step": 103010 }, { "epoch": 3.046667060980659, "grad_norm": 0.7662531733512878, "learning_rate": 2.978746043094467e-06, "loss": 0.0821, "step": 103020 }, { "epoch": 3.046962796474833, "grad_norm": 0.45715323090553284, "learning_rate": 2.9786193531755276e-06, "loss": 0.0638, "step": 103030 }, { "epoch": 3.047258531969007, "grad_norm": 0.5587903261184692, "learning_rate": 2.9784926632565884e-06, "loss": 0.0533, "step": 103040 }, { "epoch": 3.047554267463181, "grad_norm": 0.8942430019378662, "learning_rate": 2.978365973337649e-06, "loss": 0.0667, "step": 103050 }, { "epoch": 3.0478500029573548, "grad_norm": 0.871060311794281, "learning_rate": 2.9782392834187096e-06, "loss": 0.0746, "step": 103060 }, { "epoch": 3.048145738451529, "grad_norm": 0.8555482625961304, "learning_rate": 2.9781125934997704e-06, "loss": 0.0797, "step": 103070 }, { "epoch": 3.048441473945703, "grad_norm": 0.5529300570487976, "learning_rate": 2.9779859035808307e-06, "loss": 0.0596, "step": 103080 }, { "epoch": 3.048737209439877, "grad_norm": 1.1306184530258179, "learning_rate": 2.9778592136618915e-06, "loss": 0.0524, "step": 103090 }, { "epoch": 3.049032944934051, "grad_norm": 0.7876675128936768, "learning_rate": 2.977732523742952e-06, "loss": 0.0686, "step": 103100 }, { "epoch": 3.049328680428225, "grad_norm": 1.0147649049758911, "learning_rate": 2.9776058338240127e-06, "loss": 0.0837, "step": 103110 }, { "epoch": 3.049624415922399, "grad_norm": 1.1302001476287842, "learning_rate": 2.9774791439050735e-06, "loss": 0.0712, "step": 103120 }, { "epoch": 3.049920151416573, "grad_norm": 0.4093194603919983, "learning_rate": 2.9773524539861343e-06, "loss": 0.0663, "step": 103130 }, { "epoch": 3.050215886910747, "grad_norm": 0.7567825317382812, "learning_rate": 2.9772257640671946e-06, "loss": 0.0747, "step": 103140 }, { "epoch": 3.050511622404921, "grad_norm": 0.736553430557251, "learning_rate": 2.977099074148255e-06, "loss": 0.0708, "step": 103150 }, { "epoch": 3.050807357899095, "grad_norm": 0.8778336644172668, "learning_rate": 2.9769723842293158e-06, "loss": 0.0702, "step": 103160 }, { "epoch": 3.051103093393269, "grad_norm": 0.7240003943443298, "learning_rate": 2.976845694310376e-06, "loss": 0.0678, "step": 103170 }, { "epoch": 3.051398828887443, "grad_norm": 0.5581708550453186, "learning_rate": 2.976719004391437e-06, "loss": 0.0709, "step": 103180 }, { "epoch": 3.051694564381617, "grad_norm": 0.4731532633304596, "learning_rate": 2.9765923144724973e-06, "loss": 0.0527, "step": 103190 }, { "epoch": 3.0519902998757913, "grad_norm": 1.0742850303649902, "learning_rate": 2.9764656245535585e-06, "loss": 0.0762, "step": 103200 }, { "epoch": 3.0522860353699652, "grad_norm": 0.7896698117256165, "learning_rate": 2.976338934634619e-06, "loss": 0.0796, "step": 103210 }, { "epoch": 3.052581770864139, "grad_norm": 0.900566577911377, "learning_rate": 2.9762122447156797e-06, "loss": 0.0624, "step": 103220 }, { "epoch": 3.052877506358313, "grad_norm": 0.5346869230270386, "learning_rate": 2.97608555479674e-06, "loss": 0.0654, "step": 103230 }, { "epoch": 3.053173241852487, "grad_norm": 0.8315101861953735, "learning_rate": 2.975958864877801e-06, "loss": 0.044, "step": 103240 }, { "epoch": 3.053468977346661, "grad_norm": 1.0792384147644043, "learning_rate": 2.975832174958861e-06, "loss": 0.0848, "step": 103250 }, { "epoch": 3.053764712840835, "grad_norm": 1.3072459697723389, "learning_rate": 2.975705485039922e-06, "loss": 0.087, "step": 103260 }, { "epoch": 3.0540604483350093, "grad_norm": 0.5088756084442139, "learning_rate": 2.9755787951209823e-06, "loss": 0.0851, "step": 103270 }, { "epoch": 3.0543561838291833, "grad_norm": 1.0449025630950928, "learning_rate": 2.9754521052020436e-06, "loss": 0.0626, "step": 103280 }, { "epoch": 3.0546519193233572, "grad_norm": 0.33175069093704224, "learning_rate": 2.975325415283104e-06, "loss": 0.0637, "step": 103290 }, { "epoch": 3.054947654817531, "grad_norm": 0.6985043883323669, "learning_rate": 2.9751987253641647e-06, "loss": 0.0652, "step": 103300 }, { "epoch": 3.055243390311705, "grad_norm": 1.2028687000274658, "learning_rate": 2.975072035445225e-06, "loss": 0.0765, "step": 103310 }, { "epoch": 3.055539125805879, "grad_norm": 1.0209113359451294, "learning_rate": 2.974945345526286e-06, "loss": 0.0631, "step": 103320 }, { "epoch": 3.0558348613000534, "grad_norm": 0.8092290163040161, "learning_rate": 2.9748186556073462e-06, "loss": 0.0495, "step": 103330 }, { "epoch": 3.0561305967942274, "grad_norm": 0.6516426801681519, "learning_rate": 2.974691965688407e-06, "loss": 0.0577, "step": 103340 }, { "epoch": 3.0564263322884013, "grad_norm": 1.0639545917510986, "learning_rate": 2.9745652757694674e-06, "loss": 0.075, "step": 103350 }, { "epoch": 3.0567220677825753, "grad_norm": 0.7297106981277466, "learning_rate": 2.9744385858505286e-06, "loss": 0.0758, "step": 103360 }, { "epoch": 3.057017803276749, "grad_norm": 0.5555514097213745, "learning_rate": 2.974311895931589e-06, "loss": 0.0572, "step": 103370 }, { "epoch": 3.057313538770923, "grad_norm": 0.7027503848075867, "learning_rate": 2.9741852060126498e-06, "loss": 0.0828, "step": 103380 }, { "epoch": 3.057609274265097, "grad_norm": 0.9798038005828857, "learning_rate": 2.97405851609371e-06, "loss": 0.0531, "step": 103390 }, { "epoch": 3.0579050097592715, "grad_norm": 0.9580746293067932, "learning_rate": 2.973931826174771e-06, "loss": 0.0723, "step": 103400 }, { "epoch": 3.0582007452534454, "grad_norm": 0.8969283103942871, "learning_rate": 2.9738051362558313e-06, "loss": 0.0831, "step": 103410 }, { "epoch": 3.0584964807476194, "grad_norm": 0.5677013397216797, "learning_rate": 2.973678446336892e-06, "loss": 0.0775, "step": 103420 }, { "epoch": 3.0587922162417933, "grad_norm": 0.5828153491020203, "learning_rate": 2.9735517564179524e-06, "loss": 0.0664, "step": 103430 }, { "epoch": 3.0590879517359673, "grad_norm": 0.5893745422363281, "learning_rate": 2.9734250664990136e-06, "loss": 0.0673, "step": 103440 }, { "epoch": 3.059383687230141, "grad_norm": 0.8007262945175171, "learning_rate": 2.973298376580074e-06, "loss": 0.0965, "step": 103450 }, { "epoch": 3.0596794227243156, "grad_norm": 0.5908390283584595, "learning_rate": 2.973171686661135e-06, "loss": 0.0636, "step": 103460 }, { "epoch": 3.0599751582184895, "grad_norm": 0.7178112864494324, "learning_rate": 2.973044996742195e-06, "loss": 0.0719, "step": 103470 }, { "epoch": 3.0602708937126635, "grad_norm": 0.92298424243927, "learning_rate": 2.972918306823256e-06, "loss": 0.0804, "step": 103480 }, { "epoch": 3.0605666292068374, "grad_norm": 0.767876148223877, "learning_rate": 2.9727916169043163e-06, "loss": 0.0596, "step": 103490 }, { "epoch": 3.0608623647010114, "grad_norm": 0.685091495513916, "learning_rate": 2.972664926985377e-06, "loss": 0.0626, "step": 103500 }, { "epoch": 3.0611581001951853, "grad_norm": 0.8540890216827393, "learning_rate": 2.9725382370664375e-06, "loss": 0.0888, "step": 103510 }, { "epoch": 3.0614538356893592, "grad_norm": 0.5944088101387024, "learning_rate": 2.9724115471474987e-06, "loss": 0.064, "step": 103520 }, { "epoch": 3.0617495711835336, "grad_norm": 0.6038162112236023, "learning_rate": 2.972284857228559e-06, "loss": 0.0694, "step": 103530 }, { "epoch": 3.0620453066777076, "grad_norm": 0.5735265016555786, "learning_rate": 2.97215816730962e-06, "loss": 0.0648, "step": 103540 }, { "epoch": 3.0623410421718815, "grad_norm": 1.0376625061035156, "learning_rate": 2.97203147739068e-06, "loss": 0.0782, "step": 103550 }, { "epoch": 3.0626367776660555, "grad_norm": 0.7979482412338257, "learning_rate": 2.9719047874717406e-06, "loss": 0.0798, "step": 103560 }, { "epoch": 3.0629325131602294, "grad_norm": 0.9642032980918884, "learning_rate": 2.9717780975528014e-06, "loss": 0.0736, "step": 103570 }, { "epoch": 3.0632282486544034, "grad_norm": 0.8345025777816772, "learning_rate": 2.9716514076338617e-06, "loss": 0.0745, "step": 103580 }, { "epoch": 3.0635239841485777, "grad_norm": 0.715209424495697, "learning_rate": 2.9715247177149225e-06, "loss": 0.0617, "step": 103590 }, { "epoch": 3.0638197196427517, "grad_norm": 1.0978399515151978, "learning_rate": 2.9713980277959833e-06, "loss": 0.0759, "step": 103600 }, { "epoch": 3.0641154551369256, "grad_norm": 0.7888630628585815, "learning_rate": 2.971271337877044e-06, "loss": 0.0685, "step": 103610 }, { "epoch": 3.0644111906310996, "grad_norm": 1.2334951162338257, "learning_rate": 2.9711446479581045e-06, "loss": 0.07, "step": 103620 }, { "epoch": 3.0647069261252735, "grad_norm": 0.6873016357421875, "learning_rate": 2.9710179580391653e-06, "loss": 0.0692, "step": 103630 }, { "epoch": 3.0650026616194475, "grad_norm": 0.9516882300376892, "learning_rate": 2.9708912681202256e-06, "loss": 0.0661, "step": 103640 }, { "epoch": 3.0652983971136214, "grad_norm": 0.9646915793418884, "learning_rate": 2.9707645782012864e-06, "loss": 0.0689, "step": 103650 }, { "epoch": 3.065594132607796, "grad_norm": 0.8558480739593506, "learning_rate": 2.9706378882823468e-06, "loss": 0.0878, "step": 103660 }, { "epoch": 3.0658898681019697, "grad_norm": 1.2519581317901611, "learning_rate": 2.9705111983634076e-06, "loss": 0.0689, "step": 103670 }, { "epoch": 3.0661856035961437, "grad_norm": 0.7525831460952759, "learning_rate": 2.9703845084444684e-06, "loss": 0.0659, "step": 103680 }, { "epoch": 3.0664813390903176, "grad_norm": 0.8163991570472717, "learning_rate": 2.970257818525529e-06, "loss": 0.0657, "step": 103690 }, { "epoch": 3.0667770745844916, "grad_norm": 0.9930779933929443, "learning_rate": 2.9701311286065895e-06, "loss": 0.0627, "step": 103700 }, { "epoch": 3.0670728100786655, "grad_norm": 0.604485034942627, "learning_rate": 2.9700044386876503e-06, "loss": 0.0785, "step": 103710 }, { "epoch": 3.0673685455728394, "grad_norm": 1.2348427772521973, "learning_rate": 2.9698777487687107e-06, "loss": 0.0776, "step": 103720 }, { "epoch": 3.067664281067014, "grad_norm": 0.7851232886314392, "learning_rate": 2.9697510588497715e-06, "loss": 0.0685, "step": 103730 }, { "epoch": 3.0679600165611878, "grad_norm": 0.8853986263275146, "learning_rate": 2.969624368930832e-06, "loss": 0.0711, "step": 103740 }, { "epoch": 3.0682557520553617, "grad_norm": 0.8941249251365662, "learning_rate": 2.9694976790118926e-06, "loss": 0.0831, "step": 103750 }, { "epoch": 3.0685514875495357, "grad_norm": 0.812934398651123, "learning_rate": 2.9693709890929534e-06, "loss": 0.0753, "step": 103760 }, { "epoch": 3.0688472230437096, "grad_norm": 0.8623657822608948, "learning_rate": 2.969244299174014e-06, "loss": 0.0726, "step": 103770 }, { "epoch": 3.0691429585378835, "grad_norm": 1.1887311935424805, "learning_rate": 2.9691176092550746e-06, "loss": 0.075, "step": 103780 }, { "epoch": 3.069438694032058, "grad_norm": 0.6080965995788574, "learning_rate": 2.9689909193361353e-06, "loss": 0.0593, "step": 103790 }, { "epoch": 3.069734429526232, "grad_norm": 0.8268933892250061, "learning_rate": 2.9688642294171957e-06, "loss": 0.0774, "step": 103800 }, { "epoch": 3.070030165020406, "grad_norm": 1.1486924886703491, "learning_rate": 2.9687375394982565e-06, "loss": 0.0723, "step": 103810 }, { "epoch": 3.0703259005145798, "grad_norm": 1.0298796892166138, "learning_rate": 2.968610849579317e-06, "loss": 0.0743, "step": 103820 }, { "epoch": 3.0706216360087537, "grad_norm": 0.6278163194656372, "learning_rate": 2.9684841596603777e-06, "loss": 0.058, "step": 103830 }, { "epoch": 3.0709173715029277, "grad_norm": 1.2606855630874634, "learning_rate": 2.9683574697414384e-06, "loss": 0.0562, "step": 103840 }, { "epoch": 3.0712131069971016, "grad_norm": 0.6077288389205933, "learning_rate": 2.9682307798224992e-06, "loss": 0.0706, "step": 103850 }, { "epoch": 3.071508842491276, "grad_norm": 1.2282726764678955, "learning_rate": 2.9681040899035596e-06, "loss": 0.0858, "step": 103860 }, { "epoch": 3.07180457798545, "grad_norm": 0.7139294147491455, "learning_rate": 2.9679773999846204e-06, "loss": 0.0712, "step": 103870 }, { "epoch": 3.072100313479624, "grad_norm": 0.6713416576385498, "learning_rate": 2.9678507100656808e-06, "loss": 0.0744, "step": 103880 }, { "epoch": 3.072396048973798, "grad_norm": 0.589712917804718, "learning_rate": 2.9677240201467415e-06, "loss": 0.0406, "step": 103890 }, { "epoch": 3.0726917844679718, "grad_norm": 1.0702128410339355, "learning_rate": 2.967597330227802e-06, "loss": 0.066, "step": 103900 }, { "epoch": 3.0729875199621457, "grad_norm": 0.6996033191680908, "learning_rate": 2.9674706403088627e-06, "loss": 0.0807, "step": 103910 }, { "epoch": 3.07328325545632, "grad_norm": 0.6461215019226074, "learning_rate": 2.9673439503899235e-06, "loss": 0.0782, "step": 103920 }, { "epoch": 3.073578990950494, "grad_norm": 0.7642673254013062, "learning_rate": 2.9672172604709843e-06, "loss": 0.0611, "step": 103930 }, { "epoch": 3.073874726444668, "grad_norm": 0.6360310316085815, "learning_rate": 2.9670905705520446e-06, "loss": 0.0652, "step": 103940 }, { "epoch": 3.074170461938842, "grad_norm": 0.9143223166465759, "learning_rate": 2.9669638806331054e-06, "loss": 0.0823, "step": 103950 }, { "epoch": 3.074466197433016, "grad_norm": 0.757463276386261, "learning_rate": 2.966837190714166e-06, "loss": 0.0738, "step": 103960 }, { "epoch": 3.07476193292719, "grad_norm": 1.0777482986450195, "learning_rate": 2.966710500795226e-06, "loss": 0.0775, "step": 103970 }, { "epoch": 3.0750576684213637, "grad_norm": 1.3843215703964233, "learning_rate": 2.966583810876287e-06, "loss": 0.095, "step": 103980 }, { "epoch": 3.075353403915538, "grad_norm": 0.5748558640480042, "learning_rate": 2.9664571209573473e-06, "loss": 0.0485, "step": 103990 }, { "epoch": 3.075649139409712, "grad_norm": 1.0066338777542114, "learning_rate": 2.9663304310384085e-06, "loss": 0.0622, "step": 104000 }, { "epoch": 3.075944874903886, "grad_norm": 0.8139887452125549, "learning_rate": 2.966203741119469e-06, "loss": 0.0797, "step": 104010 }, { "epoch": 3.07624061039806, "grad_norm": 1.1161056756973267, "learning_rate": 2.9660770512005297e-06, "loss": 0.0688, "step": 104020 }, { "epoch": 3.076536345892234, "grad_norm": 0.702788233757019, "learning_rate": 2.96595036128159e-06, "loss": 0.0683, "step": 104030 }, { "epoch": 3.076832081386408, "grad_norm": 2.0137555599212646, "learning_rate": 2.965823671362651e-06, "loss": 0.0692, "step": 104040 }, { "epoch": 3.077127816880582, "grad_norm": 0.7432659864425659, "learning_rate": 2.9656969814437112e-06, "loss": 0.079, "step": 104050 }, { "epoch": 3.077423552374756, "grad_norm": 0.8158800005912781, "learning_rate": 2.965570291524772e-06, "loss": 0.0805, "step": 104060 }, { "epoch": 3.07771928786893, "grad_norm": 0.7210305333137512, "learning_rate": 2.9654436016058324e-06, "loss": 0.0572, "step": 104070 }, { "epoch": 3.078015023363104, "grad_norm": 0.38253965973854065, "learning_rate": 2.9653169116868936e-06, "loss": 0.0839, "step": 104080 }, { "epoch": 3.078310758857278, "grad_norm": 0.4802958369255066, "learning_rate": 2.965190221767954e-06, "loss": 0.0617, "step": 104090 }, { "epoch": 3.078606494351452, "grad_norm": 1.3246958255767822, "learning_rate": 2.9650635318490147e-06, "loss": 0.0841, "step": 104100 }, { "epoch": 3.078902229845626, "grad_norm": 0.9720621705055237, "learning_rate": 2.964936841930075e-06, "loss": 0.0829, "step": 104110 }, { "epoch": 3.0791979653398003, "grad_norm": 0.6908105611801147, "learning_rate": 2.964810152011136e-06, "loss": 0.0705, "step": 104120 }, { "epoch": 3.0794937008339742, "grad_norm": 0.5286064743995667, "learning_rate": 2.9646834620921963e-06, "loss": 0.0619, "step": 104130 }, { "epoch": 3.079789436328148, "grad_norm": 0.7334216237068176, "learning_rate": 2.964556772173257e-06, "loss": 0.0588, "step": 104140 }, { "epoch": 3.080085171822322, "grad_norm": 1.0821548700332642, "learning_rate": 2.9644300822543174e-06, "loss": 0.0731, "step": 104150 }, { "epoch": 3.080380907316496, "grad_norm": 0.4594743549823761, "learning_rate": 2.9643033923353786e-06, "loss": 0.0728, "step": 104160 }, { "epoch": 3.08067664281067, "grad_norm": 0.6995557546615601, "learning_rate": 2.964176702416439e-06, "loss": 0.0766, "step": 104170 }, { "epoch": 3.080972378304844, "grad_norm": 0.6973878145217896, "learning_rate": 2.9640500124974998e-06, "loss": 0.0917, "step": 104180 }, { "epoch": 3.0812681137990183, "grad_norm": 0.9476771354675293, "learning_rate": 2.96392332257856e-06, "loss": 0.0622, "step": 104190 }, { "epoch": 3.0815638492931923, "grad_norm": 0.660946249961853, "learning_rate": 2.963796632659621e-06, "loss": 0.0811, "step": 104200 }, { "epoch": 3.081859584787366, "grad_norm": 0.9187449812889099, "learning_rate": 2.9636699427406813e-06, "loss": 0.0793, "step": 104210 }, { "epoch": 3.08215532028154, "grad_norm": 0.8002455830574036, "learning_rate": 2.963543252821742e-06, "loss": 0.0799, "step": 104220 }, { "epoch": 3.082451055775714, "grad_norm": 1.4707599878311157, "learning_rate": 2.9634165629028025e-06, "loss": 0.0744, "step": 104230 }, { "epoch": 3.082746791269888, "grad_norm": 0.789251983165741, "learning_rate": 2.9632898729838637e-06, "loss": 0.0532, "step": 104240 }, { "epoch": 3.0830425267640624, "grad_norm": 1.1291552782058716, "learning_rate": 2.963163183064924e-06, "loss": 0.0815, "step": 104250 }, { "epoch": 3.0833382622582364, "grad_norm": 1.0067386627197266, "learning_rate": 2.963036493145985e-06, "loss": 0.0916, "step": 104260 }, { "epoch": 3.0836339977524103, "grad_norm": 0.6406216621398926, "learning_rate": 2.962909803227045e-06, "loss": 0.0685, "step": 104270 }, { "epoch": 3.0839297332465843, "grad_norm": 0.8347079753875732, "learning_rate": 2.962783113308106e-06, "loss": 0.0671, "step": 104280 }, { "epoch": 3.084225468740758, "grad_norm": 0.679097592830658, "learning_rate": 2.9626564233891664e-06, "loss": 0.0511, "step": 104290 }, { "epoch": 3.084521204234932, "grad_norm": 0.4402569532394409, "learning_rate": 2.962529733470227e-06, "loss": 0.0791, "step": 104300 }, { "epoch": 3.084816939729106, "grad_norm": 0.6457432508468628, "learning_rate": 2.9624030435512875e-06, "loss": 0.0761, "step": 104310 }, { "epoch": 3.0851126752232805, "grad_norm": 1.6280461549758911, "learning_rate": 2.9622763536323487e-06, "loss": 0.0748, "step": 104320 }, { "epoch": 3.0854084107174544, "grad_norm": 1.094374179840088, "learning_rate": 2.962149663713409e-06, "loss": 0.0577, "step": 104330 }, { "epoch": 3.0857041462116284, "grad_norm": 0.664723813533783, "learning_rate": 2.96202297379447e-06, "loss": 0.0649, "step": 104340 }, { "epoch": 3.0859998817058023, "grad_norm": 0.8020917177200317, "learning_rate": 2.9618962838755302e-06, "loss": 0.0825, "step": 104350 }, { "epoch": 3.0862956171999762, "grad_norm": 0.6406033635139465, "learning_rate": 2.961769593956591e-06, "loss": 0.0844, "step": 104360 }, { "epoch": 3.08659135269415, "grad_norm": 0.8206819891929626, "learning_rate": 2.9616429040376514e-06, "loss": 0.0774, "step": 104370 }, { "epoch": 3.086887088188324, "grad_norm": 0.6900759339332581, "learning_rate": 2.9615162141187118e-06, "loss": 0.0574, "step": 104380 }, { "epoch": 3.0871828236824985, "grad_norm": 1.420474886894226, "learning_rate": 2.9613895241997726e-06, "loss": 0.0615, "step": 104390 }, { "epoch": 3.0874785591766725, "grad_norm": 1.0959786176681519, "learning_rate": 2.9612628342808333e-06, "loss": 0.0844, "step": 104400 }, { "epoch": 3.0877742946708464, "grad_norm": 0.7714305520057678, "learning_rate": 2.961136144361894e-06, "loss": 0.0771, "step": 104410 }, { "epoch": 3.0880700301650204, "grad_norm": 0.5979045033454895, "learning_rate": 2.9610094544429545e-06, "loss": 0.0646, "step": 104420 }, { "epoch": 3.0883657656591943, "grad_norm": 0.44127869606018066, "learning_rate": 2.9608827645240153e-06, "loss": 0.0746, "step": 104430 }, { "epoch": 3.0886615011533682, "grad_norm": 0.47882023453712463, "learning_rate": 2.9607560746050757e-06, "loss": 0.0621, "step": 104440 }, { "epoch": 3.0889572366475426, "grad_norm": 0.9356638193130493, "learning_rate": 2.9606293846861364e-06, "loss": 0.0848, "step": 104450 }, { "epoch": 3.0892529721417166, "grad_norm": 1.0066893100738525, "learning_rate": 2.960502694767197e-06, "loss": 0.0787, "step": 104460 }, { "epoch": 3.0895487076358905, "grad_norm": 0.7372993230819702, "learning_rate": 2.9603760048482576e-06, "loss": 0.075, "step": 104470 }, { "epoch": 3.0898444431300645, "grad_norm": 1.2786149978637695, "learning_rate": 2.9602493149293184e-06, "loss": 0.0575, "step": 104480 }, { "epoch": 3.0901401786242384, "grad_norm": 1.2733497619628906, "learning_rate": 2.960122625010379e-06, "loss": 0.0634, "step": 104490 }, { "epoch": 3.0904359141184123, "grad_norm": 0.9941621422767639, "learning_rate": 2.9599959350914395e-06, "loss": 0.0767, "step": 104500 }, { "epoch": 3.0907316496125867, "grad_norm": 0.8570445775985718, "learning_rate": 2.9598692451725003e-06, "loss": 0.0725, "step": 104510 }, { "epoch": 3.0910273851067607, "grad_norm": 0.9467464685440063, "learning_rate": 2.9597425552535607e-06, "loss": 0.0871, "step": 104520 }, { "epoch": 3.0913231206009346, "grad_norm": 1.209098219871521, "learning_rate": 2.9596158653346215e-06, "loss": 0.0622, "step": 104530 }, { "epoch": 3.0916188560951086, "grad_norm": 0.8694918751716614, "learning_rate": 2.959489175415682e-06, "loss": 0.0663, "step": 104540 }, { "epoch": 3.0919145915892825, "grad_norm": 0.6118839979171753, "learning_rate": 2.9593624854967426e-06, "loss": 0.0705, "step": 104550 }, { "epoch": 3.0922103270834564, "grad_norm": 1.169083595275879, "learning_rate": 2.9592357955778034e-06, "loss": 0.0669, "step": 104560 }, { "epoch": 3.0925060625776304, "grad_norm": 0.9235050678253174, "learning_rate": 2.9591091056588642e-06, "loss": 0.0849, "step": 104570 }, { "epoch": 3.0928017980718048, "grad_norm": 0.702445387840271, "learning_rate": 2.9589824157399246e-06, "loss": 0.0792, "step": 104580 }, { "epoch": 3.0930975335659787, "grad_norm": 0.7719864845275879, "learning_rate": 2.9588557258209854e-06, "loss": 0.068, "step": 104590 }, { "epoch": 3.0933932690601527, "grad_norm": 0.6250966787338257, "learning_rate": 2.9587290359020457e-06, "loss": 0.0659, "step": 104600 }, { "epoch": 3.0936890045543266, "grad_norm": 0.769582986831665, "learning_rate": 2.9586023459831065e-06, "loss": 0.0825, "step": 104610 }, { "epoch": 3.0939847400485005, "grad_norm": 0.648935854434967, "learning_rate": 2.958475656064167e-06, "loss": 0.0677, "step": 104620 }, { "epoch": 3.0942804755426745, "grad_norm": 0.6443228125572205, "learning_rate": 2.9583489661452277e-06, "loss": 0.0672, "step": 104630 }, { "epoch": 3.0945762110368484, "grad_norm": 1.0519551038742065, "learning_rate": 2.9582222762262885e-06, "loss": 0.0605, "step": 104640 }, { "epoch": 3.094871946531023, "grad_norm": 0.7875810861587524, "learning_rate": 2.9580955863073493e-06, "loss": 0.0672, "step": 104650 }, { "epoch": 3.0951676820251968, "grad_norm": 0.6610272526741028, "learning_rate": 2.9579688963884096e-06, "loss": 0.0769, "step": 104660 }, { "epoch": 3.0954634175193707, "grad_norm": 1.1651394367218018, "learning_rate": 2.9578422064694704e-06, "loss": 0.0896, "step": 104670 }, { "epoch": 3.0957591530135447, "grad_norm": 1.001030683517456, "learning_rate": 2.957715516550531e-06, "loss": 0.0698, "step": 104680 }, { "epoch": 3.0960548885077186, "grad_norm": 1.1497983932495117, "learning_rate": 2.9575888266315916e-06, "loss": 0.0662, "step": 104690 }, { "epoch": 3.0963506240018925, "grad_norm": 0.551279604434967, "learning_rate": 2.957462136712652e-06, "loss": 0.0811, "step": 104700 }, { "epoch": 3.096646359496067, "grad_norm": 0.5499516725540161, "learning_rate": 2.9573354467937127e-06, "loss": 0.0718, "step": 104710 }, { "epoch": 3.096942094990241, "grad_norm": 0.8571828603744507, "learning_rate": 2.9572087568747735e-06, "loss": 0.0679, "step": 104720 }, { "epoch": 3.097237830484415, "grad_norm": 1.2058624029159546, "learning_rate": 2.9570820669558343e-06, "loss": 0.0835, "step": 104730 }, { "epoch": 3.0975335659785888, "grad_norm": 0.8383855223655701, "learning_rate": 2.9569553770368947e-06, "loss": 0.061, "step": 104740 }, { "epoch": 3.0978293014727627, "grad_norm": 1.5390886068344116, "learning_rate": 2.9568286871179555e-06, "loss": 0.0996, "step": 104750 }, { "epoch": 3.0981250369669366, "grad_norm": 0.623414158821106, "learning_rate": 2.956701997199016e-06, "loss": 0.0857, "step": 104760 }, { "epoch": 3.0984207724611106, "grad_norm": 0.6091523170471191, "learning_rate": 2.9565753072800766e-06, "loss": 0.0755, "step": 104770 }, { "epoch": 3.098716507955285, "grad_norm": 0.7435265779495239, "learning_rate": 2.956448617361137e-06, "loss": 0.0614, "step": 104780 }, { "epoch": 3.099012243449459, "grad_norm": 0.749805212020874, "learning_rate": 2.9563219274421974e-06, "loss": 0.0567, "step": 104790 }, { "epoch": 3.099307978943633, "grad_norm": 1.1071542501449585, "learning_rate": 2.9561952375232586e-06, "loss": 0.0759, "step": 104800 }, { "epoch": 3.099603714437807, "grad_norm": 0.7063218355178833, "learning_rate": 2.956068547604319e-06, "loss": 0.0725, "step": 104810 }, { "epoch": 3.0998994499319807, "grad_norm": 0.5067731142044067, "learning_rate": 2.9559418576853797e-06, "loss": 0.0708, "step": 104820 }, { "epoch": 3.1001951854261547, "grad_norm": 0.8774795532226562, "learning_rate": 2.95581516776644e-06, "loss": 0.0844, "step": 104830 }, { "epoch": 3.100490920920329, "grad_norm": 1.1536113023757935, "learning_rate": 2.955688477847501e-06, "loss": 0.058, "step": 104840 }, { "epoch": 3.100786656414503, "grad_norm": 0.8703799843788147, "learning_rate": 2.9555617879285612e-06, "loss": 0.1058, "step": 104850 }, { "epoch": 3.101082391908677, "grad_norm": 1.2397853136062622, "learning_rate": 2.955435098009622e-06, "loss": 0.0768, "step": 104860 }, { "epoch": 3.101378127402851, "grad_norm": 0.7666987776756287, "learning_rate": 2.9553084080906824e-06, "loss": 0.0821, "step": 104870 }, { "epoch": 3.101673862897025, "grad_norm": 0.6431084871292114, "learning_rate": 2.9551817181717436e-06, "loss": 0.069, "step": 104880 }, { "epoch": 3.101969598391199, "grad_norm": 0.6799401640892029, "learning_rate": 2.955055028252804e-06, "loss": 0.0539, "step": 104890 }, { "epoch": 3.1022653338853727, "grad_norm": 0.9750879406929016, "learning_rate": 2.9549283383338648e-06, "loss": 0.0861, "step": 104900 }, { "epoch": 3.102561069379547, "grad_norm": 1.2686330080032349, "learning_rate": 2.954801648414925e-06, "loss": 0.086, "step": 104910 }, { "epoch": 3.102856804873721, "grad_norm": 0.8629791736602783, "learning_rate": 2.954674958495986e-06, "loss": 0.0664, "step": 104920 }, { "epoch": 3.103152540367895, "grad_norm": 0.7360258102416992, "learning_rate": 2.9545482685770463e-06, "loss": 0.0624, "step": 104930 }, { "epoch": 3.103448275862069, "grad_norm": 0.7873677015304565, "learning_rate": 2.954421578658107e-06, "loss": 0.0594, "step": 104940 }, { "epoch": 3.103744011356243, "grad_norm": 0.6710798740386963, "learning_rate": 2.9542948887391674e-06, "loss": 0.0906, "step": 104950 }, { "epoch": 3.104039746850417, "grad_norm": 0.7786498665809631, "learning_rate": 2.9541681988202287e-06, "loss": 0.081, "step": 104960 }, { "epoch": 3.104335482344591, "grad_norm": 1.1560455560684204, "learning_rate": 2.954041508901289e-06, "loss": 0.082, "step": 104970 }, { "epoch": 3.104631217838765, "grad_norm": 0.7550597190856934, "learning_rate": 2.95391481898235e-06, "loss": 0.0625, "step": 104980 }, { "epoch": 3.104926953332939, "grad_norm": 0.7099475264549255, "learning_rate": 2.95378812906341e-06, "loss": 0.0789, "step": 104990 }, { "epoch": 3.105222688827113, "grad_norm": 1.0595500469207764, "learning_rate": 2.953661439144471e-06, "loss": 0.0681, "step": 105000 }, { "epoch": 3.105518424321287, "grad_norm": 0.5759577751159668, "learning_rate": 2.9535347492255313e-06, "loss": 0.0758, "step": 105010 }, { "epoch": 3.105814159815461, "grad_norm": 0.6051292419433594, "learning_rate": 2.953408059306592e-06, "loss": 0.0784, "step": 105020 }, { "epoch": 3.106109895309635, "grad_norm": 0.6344627737998962, "learning_rate": 2.9532813693876525e-06, "loss": 0.0727, "step": 105030 }, { "epoch": 3.1064056308038093, "grad_norm": 0.5833083987236023, "learning_rate": 2.9531546794687137e-06, "loss": 0.0715, "step": 105040 }, { "epoch": 3.106701366297983, "grad_norm": 0.5641628503799438, "learning_rate": 2.953027989549774e-06, "loss": 0.0708, "step": 105050 }, { "epoch": 3.106997101792157, "grad_norm": 0.8276727199554443, "learning_rate": 2.952901299630835e-06, "loss": 0.0723, "step": 105060 }, { "epoch": 3.107292837286331, "grad_norm": 0.6150544881820679, "learning_rate": 2.9527746097118952e-06, "loss": 0.0655, "step": 105070 }, { "epoch": 3.107588572780505, "grad_norm": 1.049294352531433, "learning_rate": 2.952647919792956e-06, "loss": 0.0594, "step": 105080 }, { "epoch": 3.107884308274679, "grad_norm": 1.769305944442749, "learning_rate": 2.9525212298740164e-06, "loss": 0.0465, "step": 105090 }, { "epoch": 3.108180043768853, "grad_norm": 0.8636937737464905, "learning_rate": 2.952394539955077e-06, "loss": 0.0744, "step": 105100 }, { "epoch": 3.1084757792630273, "grad_norm": 0.9480960965156555, "learning_rate": 2.9522678500361375e-06, "loss": 0.0859, "step": 105110 }, { "epoch": 3.1087715147572013, "grad_norm": 0.7871571779251099, "learning_rate": 2.9521411601171988e-06, "loss": 0.0717, "step": 105120 }, { "epoch": 3.109067250251375, "grad_norm": 0.8275344967842102, "learning_rate": 2.952014470198259e-06, "loss": 0.0568, "step": 105130 }, { "epoch": 3.109362985745549, "grad_norm": 0.6708593964576721, "learning_rate": 2.95188778027932e-06, "loss": 0.078, "step": 105140 }, { "epoch": 3.109658721239723, "grad_norm": 0.7409178018569946, "learning_rate": 2.9517610903603803e-06, "loss": 0.0756, "step": 105150 }, { "epoch": 3.109954456733897, "grad_norm": 0.6807176470756531, "learning_rate": 2.951634400441441e-06, "loss": 0.085, "step": 105160 }, { "epoch": 3.1102501922280714, "grad_norm": 0.969863772392273, "learning_rate": 2.9515077105225014e-06, "loss": 0.0957, "step": 105170 }, { "epoch": 3.1105459277222454, "grad_norm": 0.7370405197143555, "learning_rate": 2.9513810206035622e-06, "loss": 0.0663, "step": 105180 }, { "epoch": 3.1108416632164193, "grad_norm": 0.4591527581214905, "learning_rate": 2.9512543306846226e-06, "loss": 0.0555, "step": 105190 }, { "epoch": 3.1111373987105932, "grad_norm": 0.6513895988464355, "learning_rate": 2.9511276407656834e-06, "loss": 0.0873, "step": 105200 }, { "epoch": 3.111433134204767, "grad_norm": 0.7610551118850708, "learning_rate": 2.951000950846744e-06, "loss": 0.0786, "step": 105210 }, { "epoch": 3.111728869698941, "grad_norm": 0.9391015768051147, "learning_rate": 2.9508742609278045e-06, "loss": 0.0701, "step": 105220 }, { "epoch": 3.112024605193115, "grad_norm": 0.6529654264450073, "learning_rate": 2.9507475710088653e-06, "loss": 0.0735, "step": 105230 }, { "epoch": 3.1123203406872895, "grad_norm": 0.6814257502555847, "learning_rate": 2.9506208810899257e-06, "loss": 0.058, "step": 105240 }, { "epoch": 3.1126160761814634, "grad_norm": 0.6319327354431152, "learning_rate": 2.9504941911709865e-06, "loss": 0.0797, "step": 105250 }, { "epoch": 3.1129118116756374, "grad_norm": 0.8056283593177795, "learning_rate": 2.950367501252047e-06, "loss": 0.0785, "step": 105260 }, { "epoch": 3.1132075471698113, "grad_norm": 1.2374229431152344, "learning_rate": 2.9502408113331076e-06, "loss": 0.0738, "step": 105270 }, { "epoch": 3.1135032826639852, "grad_norm": 0.654532790184021, "learning_rate": 2.9501141214141684e-06, "loss": 0.0706, "step": 105280 }, { "epoch": 3.113799018158159, "grad_norm": 0.8295325040817261, "learning_rate": 2.949987431495229e-06, "loss": 0.0616, "step": 105290 }, { "epoch": 3.114094753652333, "grad_norm": 0.9802553057670593, "learning_rate": 2.9498607415762896e-06, "loss": 0.0853, "step": 105300 }, { "epoch": 3.1143904891465075, "grad_norm": 0.6730180978775024, "learning_rate": 2.9497340516573504e-06, "loss": 0.0793, "step": 105310 }, { "epoch": 3.1146862246406815, "grad_norm": 0.9657940864562988, "learning_rate": 2.9496073617384107e-06, "loss": 0.0833, "step": 105320 }, { "epoch": 3.1149819601348554, "grad_norm": 0.6293461322784424, "learning_rate": 2.9494806718194715e-06, "loss": 0.0579, "step": 105330 }, { "epoch": 3.1152776956290293, "grad_norm": 0.5874853730201721, "learning_rate": 2.949353981900532e-06, "loss": 0.0629, "step": 105340 }, { "epoch": 3.1155734311232033, "grad_norm": 1.3790919780731201, "learning_rate": 2.9492272919815927e-06, "loss": 0.1025, "step": 105350 }, { "epoch": 3.1158691666173772, "grad_norm": 0.9608975052833557, "learning_rate": 2.9491006020626535e-06, "loss": 0.0725, "step": 105360 }, { "epoch": 3.1161649021115516, "grad_norm": 1.2022472620010376, "learning_rate": 2.9489739121437143e-06, "loss": 0.0771, "step": 105370 }, { "epoch": 3.1164606376057256, "grad_norm": 1.650850534439087, "learning_rate": 2.9488472222247746e-06, "loss": 0.0883, "step": 105380 }, { "epoch": 3.1167563730998995, "grad_norm": 0.6717312932014465, "learning_rate": 2.9487205323058354e-06, "loss": 0.0659, "step": 105390 }, { "epoch": 3.1170521085940734, "grad_norm": 0.7582560181617737, "learning_rate": 2.9485938423868958e-06, "loss": 0.0807, "step": 105400 }, { "epoch": 3.1173478440882474, "grad_norm": 0.9486151337623596, "learning_rate": 2.9484671524679566e-06, "loss": 0.0766, "step": 105410 }, { "epoch": 3.1176435795824213, "grad_norm": 0.9609311819076538, "learning_rate": 2.948340462549017e-06, "loss": 0.0674, "step": 105420 }, { "epoch": 3.1179393150765957, "grad_norm": 0.9352126717567444, "learning_rate": 2.9482137726300777e-06, "loss": 0.0563, "step": 105430 }, { "epoch": 3.1182350505707697, "grad_norm": 0.41412779688835144, "learning_rate": 2.9480870827111385e-06, "loss": 0.087, "step": 105440 }, { "epoch": 3.1185307860649436, "grad_norm": 0.9853270649909973, "learning_rate": 2.9479603927921993e-06, "loss": 0.0851, "step": 105450 }, { "epoch": 3.1188265215591175, "grad_norm": 0.8329216837882996, "learning_rate": 2.9478337028732597e-06, "loss": 0.085, "step": 105460 }, { "epoch": 3.1191222570532915, "grad_norm": 0.7762851715087891, "learning_rate": 2.9477070129543205e-06, "loss": 0.0692, "step": 105470 }, { "epoch": 3.1194179925474654, "grad_norm": 0.9229347705841064, "learning_rate": 2.947580323035381e-06, "loss": 0.0859, "step": 105480 }, { "epoch": 3.1197137280416394, "grad_norm": 0.7726295590400696, "learning_rate": 2.9474536331164416e-06, "loss": 0.0657, "step": 105490 }, { "epoch": 3.1200094635358138, "grad_norm": 1.1192305088043213, "learning_rate": 2.947326943197502e-06, "loss": 0.0607, "step": 105500 }, { "epoch": 3.1203051990299877, "grad_norm": 1.0509570837020874, "learning_rate": 2.9472002532785628e-06, "loss": 0.0761, "step": 105510 }, { "epoch": 3.1206009345241617, "grad_norm": 0.772146999835968, "learning_rate": 2.9470735633596236e-06, "loss": 0.0617, "step": 105520 }, { "epoch": 3.1208966700183356, "grad_norm": 0.5001708269119263, "learning_rate": 2.9469468734406843e-06, "loss": 0.0518, "step": 105530 }, { "epoch": 3.1211924055125095, "grad_norm": 0.5310167074203491, "learning_rate": 2.9468201835217447e-06, "loss": 0.0456, "step": 105540 }, { "epoch": 3.1214881410066835, "grad_norm": 0.9087434411048889, "learning_rate": 2.9466934936028055e-06, "loss": 0.082, "step": 105550 }, { "epoch": 3.1217838765008574, "grad_norm": 0.856026291847229, "learning_rate": 2.946566803683866e-06, "loss": 0.0897, "step": 105560 }, { "epoch": 3.122079611995032, "grad_norm": 1.2535258531570435, "learning_rate": 2.9464401137649267e-06, "loss": 0.0756, "step": 105570 }, { "epoch": 3.1223753474892058, "grad_norm": 1.0517585277557373, "learning_rate": 2.946313423845987e-06, "loss": 0.0653, "step": 105580 }, { "epoch": 3.1226710829833797, "grad_norm": 0.4812416434288025, "learning_rate": 2.946186733927048e-06, "loss": 0.0812, "step": 105590 }, { "epoch": 3.1229668184775536, "grad_norm": 1.4139400720596313, "learning_rate": 2.9460600440081086e-06, "loss": 0.0719, "step": 105600 }, { "epoch": 3.1232625539717276, "grad_norm": 0.38776710629463196, "learning_rate": 2.9459333540891694e-06, "loss": 0.073, "step": 105610 }, { "epoch": 3.1235582894659015, "grad_norm": 0.5268620848655701, "learning_rate": 2.9458066641702298e-06, "loss": 0.0727, "step": 105620 }, { "epoch": 3.123854024960076, "grad_norm": 0.36058488488197327, "learning_rate": 2.94567997425129e-06, "loss": 0.0709, "step": 105630 }, { "epoch": 3.12414976045425, "grad_norm": 0.7540135383605957, "learning_rate": 2.945553284332351e-06, "loss": 0.0756, "step": 105640 }, { "epoch": 3.124445495948424, "grad_norm": 0.8398376107215881, "learning_rate": 2.9454265944134113e-06, "loss": 0.0802, "step": 105650 }, { "epoch": 3.1247412314425977, "grad_norm": 0.9880920052528381, "learning_rate": 2.945299904494472e-06, "loss": 0.0801, "step": 105660 }, { "epoch": 3.1250369669367717, "grad_norm": 0.7451409697532654, "learning_rate": 2.9451732145755324e-06, "loss": 0.0657, "step": 105670 }, { "epoch": 3.1253327024309456, "grad_norm": 0.6932271122932434, "learning_rate": 2.9450465246565936e-06, "loss": 0.0571, "step": 105680 }, { "epoch": 3.1256284379251196, "grad_norm": 0.6826294660568237, "learning_rate": 2.944919834737654e-06, "loss": 0.0567, "step": 105690 }, { "epoch": 3.125924173419294, "grad_norm": 1.9741852283477783, "learning_rate": 2.944793144818715e-06, "loss": 0.076, "step": 105700 }, { "epoch": 3.126219908913468, "grad_norm": 1.0327036380767822, "learning_rate": 2.944666454899775e-06, "loss": 0.0829, "step": 105710 }, { "epoch": 3.126515644407642, "grad_norm": 0.6685781478881836, "learning_rate": 2.944539764980836e-06, "loss": 0.0858, "step": 105720 }, { "epoch": 3.126811379901816, "grad_norm": 1.0608606338500977, "learning_rate": 2.9444130750618963e-06, "loss": 0.0701, "step": 105730 }, { "epoch": 3.1271071153959897, "grad_norm": 0.8991503119468689, "learning_rate": 2.944286385142957e-06, "loss": 0.0635, "step": 105740 }, { "epoch": 3.1274028508901637, "grad_norm": 0.9013274312019348, "learning_rate": 2.9441596952240175e-06, "loss": 0.0638, "step": 105750 }, { "epoch": 3.127698586384338, "grad_norm": 1.1070365905761719, "learning_rate": 2.9440330053050787e-06, "loss": 0.0786, "step": 105760 }, { "epoch": 3.127994321878512, "grad_norm": 0.9435988664627075, "learning_rate": 2.943906315386139e-06, "loss": 0.073, "step": 105770 }, { "epoch": 3.128290057372686, "grad_norm": 1.1424237489700317, "learning_rate": 2.9437796254672e-06, "loss": 0.0817, "step": 105780 }, { "epoch": 3.12858579286686, "grad_norm": 0.4784614145755768, "learning_rate": 2.94365293554826e-06, "loss": 0.056, "step": 105790 }, { "epoch": 3.128881528361034, "grad_norm": 0.9827001094818115, "learning_rate": 2.943526245629321e-06, "loss": 0.0654, "step": 105800 }, { "epoch": 3.129177263855208, "grad_norm": 0.9691890478134155, "learning_rate": 2.9433995557103814e-06, "loss": 0.0745, "step": 105810 }, { "epoch": 3.1294729993493817, "grad_norm": 0.6939381957054138, "learning_rate": 2.943272865791442e-06, "loss": 0.0661, "step": 105820 }, { "epoch": 3.129768734843556, "grad_norm": 0.5429309606552124, "learning_rate": 2.9431461758725025e-06, "loss": 0.0663, "step": 105830 }, { "epoch": 3.13006447033773, "grad_norm": 1.2081243991851807, "learning_rate": 2.9430194859535637e-06, "loss": 0.0616, "step": 105840 }, { "epoch": 3.130360205831904, "grad_norm": 0.9588484764099121, "learning_rate": 2.942892796034624e-06, "loss": 0.0935, "step": 105850 }, { "epoch": 3.130655941326078, "grad_norm": 0.7543112635612488, "learning_rate": 2.942766106115685e-06, "loss": 0.0797, "step": 105860 }, { "epoch": 3.130951676820252, "grad_norm": 0.979972243309021, "learning_rate": 2.9426394161967453e-06, "loss": 0.0786, "step": 105870 }, { "epoch": 3.131247412314426, "grad_norm": 0.9199116826057434, "learning_rate": 2.942512726277806e-06, "loss": 0.0719, "step": 105880 }, { "epoch": 3.1315431478085998, "grad_norm": 1.1204750537872314, "learning_rate": 2.9423860363588664e-06, "loss": 0.0663, "step": 105890 }, { "epoch": 3.131838883302774, "grad_norm": 0.739112913608551, "learning_rate": 2.942259346439927e-06, "loss": 0.0856, "step": 105900 }, { "epoch": 3.132134618796948, "grad_norm": 1.0220316648483276, "learning_rate": 2.9421326565209876e-06, "loss": 0.0834, "step": 105910 }, { "epoch": 3.132430354291122, "grad_norm": 0.8410536050796509, "learning_rate": 2.9420059666020488e-06, "loss": 0.0776, "step": 105920 }, { "epoch": 3.132726089785296, "grad_norm": 0.45726045966148376, "learning_rate": 2.941879276683109e-06, "loss": 0.0704, "step": 105930 }, { "epoch": 3.13302182527947, "grad_norm": 0.6684725880622864, "learning_rate": 2.94175258676417e-06, "loss": 0.0604, "step": 105940 }, { "epoch": 3.133317560773644, "grad_norm": 0.6894519329071045, "learning_rate": 2.9416258968452303e-06, "loss": 0.0818, "step": 105950 }, { "epoch": 3.1336132962678183, "grad_norm": 0.8499628901481628, "learning_rate": 2.941499206926291e-06, "loss": 0.0886, "step": 105960 }, { "epoch": 3.133909031761992, "grad_norm": 0.83088219165802, "learning_rate": 2.9413725170073515e-06, "loss": 0.0712, "step": 105970 }, { "epoch": 3.134204767256166, "grad_norm": 1.3064906597137451, "learning_rate": 2.9412458270884122e-06, "loss": 0.0738, "step": 105980 }, { "epoch": 3.13450050275034, "grad_norm": 0.6842607259750366, "learning_rate": 2.9411191371694726e-06, "loss": 0.0652, "step": 105990 }, { "epoch": 3.134796238244514, "grad_norm": 0.6740965843200684, "learning_rate": 2.940992447250534e-06, "loss": 0.084, "step": 106000 }, { "epoch": 3.135091973738688, "grad_norm": 0.8219391703605652, "learning_rate": 2.940865757331594e-06, "loss": 0.0739, "step": 106010 }, { "epoch": 3.1353877092328624, "grad_norm": 1.632866621017456, "learning_rate": 2.940739067412655e-06, "loss": 0.0945, "step": 106020 }, { "epoch": 3.1356834447270363, "grad_norm": 0.6399466395378113, "learning_rate": 2.9406123774937153e-06, "loss": 0.0795, "step": 106030 }, { "epoch": 3.1359791802212102, "grad_norm": 0.9120396971702576, "learning_rate": 2.9404856875747757e-06, "loss": 0.06, "step": 106040 }, { "epoch": 3.136274915715384, "grad_norm": 1.2324601411819458, "learning_rate": 2.9403589976558365e-06, "loss": 0.0814, "step": 106050 }, { "epoch": 3.136570651209558, "grad_norm": 0.741280198097229, "learning_rate": 2.940232307736897e-06, "loss": 0.0788, "step": 106060 }, { "epoch": 3.136866386703732, "grad_norm": 0.7208606004714966, "learning_rate": 2.9401056178179577e-06, "loss": 0.0706, "step": 106070 }, { "epoch": 3.137162122197906, "grad_norm": 0.9446208477020264, "learning_rate": 2.9399789278990184e-06, "loss": 0.0652, "step": 106080 }, { "epoch": 3.1374578576920804, "grad_norm": 0.7617871165275574, "learning_rate": 2.9398522379800792e-06, "loss": 0.0549, "step": 106090 }, { "epoch": 3.1377535931862544, "grad_norm": 1.264953374862671, "learning_rate": 2.9397255480611396e-06, "loss": 0.0766, "step": 106100 }, { "epoch": 3.1380493286804283, "grad_norm": 0.49895188212394714, "learning_rate": 2.9395988581422004e-06, "loss": 0.0671, "step": 106110 }, { "epoch": 3.1383450641746022, "grad_norm": 0.6506744027137756, "learning_rate": 2.9394721682232608e-06, "loss": 0.0774, "step": 106120 }, { "epoch": 3.138640799668776, "grad_norm": 0.7113181352615356, "learning_rate": 2.9393454783043215e-06, "loss": 0.0766, "step": 106130 }, { "epoch": 3.13893653516295, "grad_norm": 0.8625595569610596, "learning_rate": 2.939218788385382e-06, "loss": 0.0643, "step": 106140 }, { "epoch": 3.139232270657124, "grad_norm": 1.0627208948135376, "learning_rate": 2.9390920984664427e-06, "loss": 0.0887, "step": 106150 }, { "epoch": 3.1395280061512985, "grad_norm": 0.6274286508560181, "learning_rate": 2.9389654085475035e-06, "loss": 0.0691, "step": 106160 }, { "epoch": 3.1398237416454724, "grad_norm": 0.9250359535217285, "learning_rate": 2.9388387186285643e-06, "loss": 0.0759, "step": 106170 }, { "epoch": 3.1401194771396463, "grad_norm": 0.8408792018890381, "learning_rate": 2.9387120287096246e-06, "loss": 0.0759, "step": 106180 }, { "epoch": 3.1404152126338203, "grad_norm": 0.4347660541534424, "learning_rate": 2.9385853387906854e-06, "loss": 0.0636, "step": 106190 }, { "epoch": 3.1407109481279942, "grad_norm": 1.1702771186828613, "learning_rate": 2.938458648871746e-06, "loss": 0.0752, "step": 106200 }, { "epoch": 3.141006683622168, "grad_norm": 0.8043779134750366, "learning_rate": 2.9383319589528066e-06, "loss": 0.0846, "step": 106210 }, { "epoch": 3.141302419116342, "grad_norm": 0.7694118618965149, "learning_rate": 2.938205269033867e-06, "loss": 0.0686, "step": 106220 }, { "epoch": 3.1415981546105165, "grad_norm": 0.7297210693359375, "learning_rate": 2.9380785791149277e-06, "loss": 0.0636, "step": 106230 }, { "epoch": 3.1418938901046904, "grad_norm": 1.3559014797210693, "learning_rate": 2.9379518891959885e-06, "loss": 0.0724, "step": 106240 }, { "epoch": 3.1421896255988644, "grad_norm": 0.7629320025444031, "learning_rate": 2.9378251992770493e-06, "loss": 0.0717, "step": 106250 }, { "epoch": 3.1424853610930383, "grad_norm": 1.1398779153823853, "learning_rate": 2.9376985093581097e-06, "loss": 0.102, "step": 106260 }, { "epoch": 3.1427810965872123, "grad_norm": 1.0640838146209717, "learning_rate": 2.9375718194391705e-06, "loss": 0.0718, "step": 106270 }, { "epoch": 3.143076832081386, "grad_norm": 0.955826461315155, "learning_rate": 2.937445129520231e-06, "loss": 0.0595, "step": 106280 }, { "epoch": 3.1433725675755606, "grad_norm": 0.4813036620616913, "learning_rate": 2.9373184396012916e-06, "loss": 0.0609, "step": 106290 }, { "epoch": 3.1436683030697345, "grad_norm": 0.5987585186958313, "learning_rate": 2.937191749682352e-06, "loss": 0.081, "step": 106300 }, { "epoch": 3.1439640385639085, "grad_norm": 0.6695368885993958, "learning_rate": 2.937065059763413e-06, "loss": 0.0601, "step": 106310 }, { "epoch": 3.1442597740580824, "grad_norm": 1.133241891860962, "learning_rate": 2.9369383698444736e-06, "loss": 0.0738, "step": 106320 }, { "epoch": 3.1445555095522564, "grad_norm": 0.9619620442390442, "learning_rate": 2.9368116799255344e-06, "loss": 0.067, "step": 106330 }, { "epoch": 3.1448512450464303, "grad_norm": 1.3745684623718262, "learning_rate": 2.9366849900065947e-06, "loss": 0.0756, "step": 106340 }, { "epoch": 3.1451469805406047, "grad_norm": 1.0201841592788696, "learning_rate": 2.9365583000876555e-06, "loss": 0.0705, "step": 106350 }, { "epoch": 3.1454427160347787, "grad_norm": 0.941548764705658, "learning_rate": 2.936431610168716e-06, "loss": 0.0815, "step": 106360 }, { "epoch": 3.1457384515289526, "grad_norm": 0.536261260509491, "learning_rate": 2.9363049202497767e-06, "loss": 0.0754, "step": 106370 }, { "epoch": 3.1460341870231265, "grad_norm": 0.5953213572502136, "learning_rate": 2.936178230330837e-06, "loss": 0.0654, "step": 106380 }, { "epoch": 3.1463299225173005, "grad_norm": 0.5797236561775208, "learning_rate": 2.936051540411898e-06, "loss": 0.0582, "step": 106390 }, { "epoch": 3.1466256580114744, "grad_norm": 0.9268299341201782, "learning_rate": 2.9359248504929586e-06, "loss": 0.0812, "step": 106400 }, { "epoch": 3.1469213935056484, "grad_norm": 0.9995453357696533, "learning_rate": 2.9357981605740194e-06, "loss": 0.0935, "step": 106410 }, { "epoch": 3.1472171289998228, "grad_norm": 0.7427138090133667, "learning_rate": 2.9356714706550798e-06, "loss": 0.0587, "step": 106420 }, { "epoch": 3.1475128644939967, "grad_norm": 0.8195388913154602, "learning_rate": 2.9355447807361406e-06, "loss": 0.0624, "step": 106430 }, { "epoch": 3.1478085999881706, "grad_norm": 0.5238476395606995, "learning_rate": 2.935418090817201e-06, "loss": 0.0609, "step": 106440 }, { "epoch": 3.1481043354823446, "grad_norm": 0.9465104341506958, "learning_rate": 2.9352914008982613e-06, "loss": 0.0781, "step": 106450 }, { "epoch": 3.1484000709765185, "grad_norm": 0.6554776430130005, "learning_rate": 2.935164710979322e-06, "loss": 0.066, "step": 106460 }, { "epoch": 3.1486958064706925, "grad_norm": 1.0020934343338013, "learning_rate": 2.9350380210603825e-06, "loss": 0.0685, "step": 106470 }, { "epoch": 3.1489915419648664, "grad_norm": 0.672258198261261, "learning_rate": 2.9349113311414437e-06, "loss": 0.0765, "step": 106480 }, { "epoch": 3.149287277459041, "grad_norm": 0.7912158966064453, "learning_rate": 2.934784641222504e-06, "loss": 0.055, "step": 106490 }, { "epoch": 3.1495830129532147, "grad_norm": 1.3829485177993774, "learning_rate": 2.934657951303565e-06, "loss": 0.0828, "step": 106500 }, { "epoch": 3.1498787484473887, "grad_norm": 0.9612916111946106, "learning_rate": 2.934531261384625e-06, "loss": 0.0986, "step": 106510 }, { "epoch": 3.1501744839415626, "grad_norm": 0.6805010437965393, "learning_rate": 2.934404571465686e-06, "loss": 0.065, "step": 106520 }, { "epoch": 3.1504702194357366, "grad_norm": 0.8770395517349243, "learning_rate": 2.9342778815467464e-06, "loss": 0.0736, "step": 106530 }, { "epoch": 3.1507659549299105, "grad_norm": 0.5811104774475098, "learning_rate": 2.934151191627807e-06, "loss": 0.0761, "step": 106540 }, { "epoch": 3.151061690424085, "grad_norm": 0.8118131160736084, "learning_rate": 2.9340245017088675e-06, "loss": 0.0785, "step": 106550 }, { "epoch": 3.151357425918259, "grad_norm": 1.58399498462677, "learning_rate": 2.9338978117899287e-06, "loss": 0.1008, "step": 106560 }, { "epoch": 3.151653161412433, "grad_norm": 0.7445306181907654, "learning_rate": 2.933771121870989e-06, "loss": 0.0695, "step": 106570 }, { "epoch": 3.1519488969066067, "grad_norm": 0.5296454429626465, "learning_rate": 2.93364443195205e-06, "loss": 0.0746, "step": 106580 }, { "epoch": 3.1522446324007807, "grad_norm": 1.18075692653656, "learning_rate": 2.9335177420331102e-06, "loss": 0.0738, "step": 106590 }, { "epoch": 3.1525403678949546, "grad_norm": 1.7820699214935303, "learning_rate": 2.933391052114171e-06, "loss": 0.0834, "step": 106600 }, { "epoch": 3.1528361033891286, "grad_norm": 0.6750451922416687, "learning_rate": 2.9332643621952314e-06, "loss": 0.0715, "step": 106610 }, { "epoch": 3.153131838883303, "grad_norm": 1.1048409938812256, "learning_rate": 2.933137672276292e-06, "loss": 0.0742, "step": 106620 }, { "epoch": 3.153427574377477, "grad_norm": 1.0051103830337524, "learning_rate": 2.9330109823573526e-06, "loss": 0.0639, "step": 106630 }, { "epoch": 3.153723309871651, "grad_norm": 0.74034184217453, "learning_rate": 2.9328842924384138e-06, "loss": 0.062, "step": 106640 }, { "epoch": 3.154019045365825, "grad_norm": 1.020392656326294, "learning_rate": 2.932757602519474e-06, "loss": 0.0964, "step": 106650 }, { "epoch": 3.1543147808599987, "grad_norm": 0.6535744071006775, "learning_rate": 2.932630912600535e-06, "loss": 0.0753, "step": 106660 }, { "epoch": 3.1546105163541727, "grad_norm": 0.9323148131370544, "learning_rate": 2.9325042226815953e-06, "loss": 0.0756, "step": 106670 }, { "epoch": 3.154906251848347, "grad_norm": 0.7890008687973022, "learning_rate": 2.932377532762656e-06, "loss": 0.0759, "step": 106680 }, { "epoch": 3.155201987342521, "grad_norm": 0.8182563185691833, "learning_rate": 2.9322508428437164e-06, "loss": 0.0496, "step": 106690 }, { "epoch": 3.155497722836695, "grad_norm": 0.9342987537384033, "learning_rate": 2.9321241529247772e-06, "loss": 0.078, "step": 106700 }, { "epoch": 3.155793458330869, "grad_norm": 1.0601637363433838, "learning_rate": 2.9319974630058376e-06, "loss": 0.0632, "step": 106710 }, { "epoch": 3.156089193825043, "grad_norm": 1.172561526298523, "learning_rate": 2.931870773086899e-06, "loss": 0.0867, "step": 106720 }, { "epoch": 3.1563849293192168, "grad_norm": 0.5157206654548645, "learning_rate": 2.931744083167959e-06, "loss": 0.0588, "step": 106730 }, { "epoch": 3.1566806648133907, "grad_norm": 0.3890129327774048, "learning_rate": 2.93161739324902e-06, "loss": 0.0498, "step": 106740 }, { "epoch": 3.156976400307565, "grad_norm": 0.8947989344596863, "learning_rate": 2.9314907033300803e-06, "loss": 0.095, "step": 106750 }, { "epoch": 3.157272135801739, "grad_norm": 0.7412502765655518, "learning_rate": 2.931364013411141e-06, "loss": 0.072, "step": 106760 }, { "epoch": 3.157567871295913, "grad_norm": 1.5353721380233765, "learning_rate": 2.9312373234922015e-06, "loss": 0.0862, "step": 106770 }, { "epoch": 3.157863606790087, "grad_norm": 0.6434110403060913, "learning_rate": 2.9311106335732623e-06, "loss": 0.0751, "step": 106780 }, { "epoch": 3.158159342284261, "grad_norm": 1.1499511003494263, "learning_rate": 2.9309839436543226e-06, "loss": 0.0668, "step": 106790 }, { "epoch": 3.158455077778435, "grad_norm": 1.1203641891479492, "learning_rate": 2.930857253735384e-06, "loss": 0.074, "step": 106800 }, { "epoch": 3.1587508132726088, "grad_norm": 0.554902970790863, "learning_rate": 2.9307305638164442e-06, "loss": 0.0798, "step": 106810 }, { "epoch": 3.159046548766783, "grad_norm": 0.9289032220840454, "learning_rate": 2.930603873897505e-06, "loss": 0.0741, "step": 106820 }, { "epoch": 3.159342284260957, "grad_norm": 0.91108238697052, "learning_rate": 2.9304771839785654e-06, "loss": 0.0683, "step": 106830 }, { "epoch": 3.159638019755131, "grad_norm": 0.5191221833229065, "learning_rate": 2.930350494059626e-06, "loss": 0.0485, "step": 106840 }, { "epoch": 3.159933755249305, "grad_norm": 0.8483527898788452, "learning_rate": 2.9302238041406865e-06, "loss": 0.0709, "step": 106850 }, { "epoch": 3.160229490743479, "grad_norm": 1.0674084424972534, "learning_rate": 2.930097114221747e-06, "loss": 0.0898, "step": 106860 }, { "epoch": 3.160525226237653, "grad_norm": 0.9802752137184143, "learning_rate": 2.9299704243028077e-06, "loss": 0.071, "step": 106870 }, { "epoch": 3.1608209617318272, "grad_norm": 0.5545872449874878, "learning_rate": 2.9298437343838685e-06, "loss": 0.0711, "step": 106880 }, { "epoch": 3.161116697226001, "grad_norm": 0.7097567319869995, "learning_rate": 2.9297170444649293e-06, "loss": 0.0587, "step": 106890 }, { "epoch": 3.161412432720175, "grad_norm": 0.8665409684181213, "learning_rate": 2.9295903545459896e-06, "loss": 0.0832, "step": 106900 }, { "epoch": 3.161708168214349, "grad_norm": 0.9016961455345154, "learning_rate": 2.9294636646270504e-06, "loss": 0.0693, "step": 106910 }, { "epoch": 3.162003903708523, "grad_norm": 1.5324277877807617, "learning_rate": 2.929336974708111e-06, "loss": 0.0765, "step": 106920 }, { "epoch": 3.162299639202697, "grad_norm": 0.7340595126152039, "learning_rate": 2.9292102847891716e-06, "loss": 0.0795, "step": 106930 }, { "epoch": 3.1625953746968714, "grad_norm": 0.5510828495025635, "learning_rate": 2.929083594870232e-06, "loss": 0.0716, "step": 106940 }, { "epoch": 3.1628911101910453, "grad_norm": 1.037197232246399, "learning_rate": 2.9289569049512927e-06, "loss": 0.0919, "step": 106950 }, { "epoch": 3.1631868456852192, "grad_norm": 1.158898115158081, "learning_rate": 2.9288302150323535e-06, "loss": 0.0775, "step": 106960 }, { "epoch": 3.163482581179393, "grad_norm": 1.0522902011871338, "learning_rate": 2.9287035251134143e-06, "loss": 0.0915, "step": 106970 }, { "epoch": 3.163778316673567, "grad_norm": 1.701661229133606, "learning_rate": 2.9285768351944747e-06, "loss": 0.0722, "step": 106980 }, { "epoch": 3.164074052167741, "grad_norm": 0.42409124970436096, "learning_rate": 2.9284501452755355e-06, "loss": 0.061, "step": 106990 }, { "epoch": 3.164369787661915, "grad_norm": 1.6747095584869385, "learning_rate": 2.928323455356596e-06, "loss": 0.0836, "step": 107000 }, { "epoch": 3.1646655231560894, "grad_norm": 0.48455381393432617, "learning_rate": 2.9281967654376566e-06, "loss": 0.0495, "step": 107010 }, { "epoch": 3.1649612586502633, "grad_norm": 0.6793933510780334, "learning_rate": 2.928070075518717e-06, "loss": 0.0816, "step": 107020 }, { "epoch": 3.1652569941444373, "grad_norm": 0.8979274034500122, "learning_rate": 2.9279433855997778e-06, "loss": 0.0579, "step": 107030 }, { "epoch": 3.1655527296386112, "grad_norm": 1.011975884437561, "learning_rate": 2.9278166956808386e-06, "loss": 0.0602, "step": 107040 }, { "epoch": 3.165848465132785, "grad_norm": 0.7306911945343018, "learning_rate": 2.9276900057618994e-06, "loss": 0.0606, "step": 107050 }, { "epoch": 3.166144200626959, "grad_norm": 0.7829665541648865, "learning_rate": 2.9275633158429597e-06, "loss": 0.0814, "step": 107060 }, { "epoch": 3.166439936121133, "grad_norm": 0.8206177353858948, "learning_rate": 2.9274366259240205e-06, "loss": 0.0707, "step": 107070 }, { "epoch": 3.1667356716153074, "grad_norm": 1.622517704963684, "learning_rate": 2.927309936005081e-06, "loss": 0.0636, "step": 107080 }, { "epoch": 3.1670314071094814, "grad_norm": 0.8497385382652283, "learning_rate": 2.9271832460861417e-06, "loss": 0.0512, "step": 107090 }, { "epoch": 3.1673271426036553, "grad_norm": 0.9561231136322021, "learning_rate": 2.927056556167202e-06, "loss": 0.0845, "step": 107100 }, { "epoch": 3.1676228780978293, "grad_norm": 0.8153383731842041, "learning_rate": 2.926929866248263e-06, "loss": 0.0697, "step": 107110 }, { "epoch": 3.167918613592003, "grad_norm": 1.818590760231018, "learning_rate": 2.9268031763293236e-06, "loss": 0.0772, "step": 107120 }, { "epoch": 3.168214349086177, "grad_norm": 1.0576156377792358, "learning_rate": 2.9266764864103844e-06, "loss": 0.082, "step": 107130 }, { "epoch": 3.168510084580351, "grad_norm": 0.5350685119628906, "learning_rate": 2.9265497964914448e-06, "loss": 0.0681, "step": 107140 }, { "epoch": 3.1688058200745255, "grad_norm": 1.024312973022461, "learning_rate": 2.9264231065725056e-06, "loss": 0.0942, "step": 107150 }, { "epoch": 3.1691015555686994, "grad_norm": 1.0481537580490112, "learning_rate": 2.926296416653566e-06, "loss": 0.0771, "step": 107160 }, { "epoch": 3.1693972910628734, "grad_norm": 1.0520265102386475, "learning_rate": 2.9261697267346267e-06, "loss": 0.0863, "step": 107170 }, { "epoch": 3.1696930265570473, "grad_norm": 0.9518738985061646, "learning_rate": 2.926043036815687e-06, "loss": 0.0748, "step": 107180 }, { "epoch": 3.1699887620512213, "grad_norm": 0.7285835146903992, "learning_rate": 2.925916346896748e-06, "loss": 0.0554, "step": 107190 }, { "epoch": 3.170284497545395, "grad_norm": 0.5448667407035828, "learning_rate": 2.9257896569778087e-06, "loss": 0.081, "step": 107200 }, { "epoch": 3.1705802330395696, "grad_norm": 0.8672394752502441, "learning_rate": 2.9256629670588694e-06, "loss": 0.0718, "step": 107210 }, { "epoch": 3.1708759685337435, "grad_norm": 0.8719441890716553, "learning_rate": 2.92553627713993e-06, "loss": 0.081, "step": 107220 }, { "epoch": 3.1711717040279175, "grad_norm": 0.928848922252655, "learning_rate": 2.9254095872209906e-06, "loss": 0.0708, "step": 107230 }, { "epoch": 3.1714674395220914, "grad_norm": 0.8705161809921265, "learning_rate": 2.925282897302051e-06, "loss": 0.0643, "step": 107240 }, { "epoch": 3.1717631750162654, "grad_norm": 3.2913336753845215, "learning_rate": 2.9251562073831118e-06, "loss": 0.0711, "step": 107250 }, { "epoch": 3.1720589105104393, "grad_norm": 0.7190690040588379, "learning_rate": 2.925029517464172e-06, "loss": 0.0935, "step": 107260 }, { "epoch": 3.1723546460046137, "grad_norm": 0.7036359310150146, "learning_rate": 2.9249028275452325e-06, "loss": 0.0828, "step": 107270 }, { "epoch": 3.1726503814987876, "grad_norm": 0.47528666257858276, "learning_rate": 2.9247761376262937e-06, "loss": 0.0838, "step": 107280 }, { "epoch": 3.1729461169929616, "grad_norm": 1.0222597122192383, "learning_rate": 2.924649447707354e-06, "loss": 0.0658, "step": 107290 }, { "epoch": 3.1732418524871355, "grad_norm": 0.716697633266449, "learning_rate": 2.924522757788415e-06, "loss": 0.0875, "step": 107300 }, { "epoch": 3.1735375879813095, "grad_norm": 0.6796208620071411, "learning_rate": 2.9243960678694752e-06, "loss": 0.074, "step": 107310 }, { "epoch": 3.1738333234754834, "grad_norm": 1.1907894611358643, "learning_rate": 2.924269377950536e-06, "loss": 0.0832, "step": 107320 }, { "epoch": 3.1741290589696574, "grad_norm": 0.9143301248550415, "learning_rate": 2.9241426880315964e-06, "loss": 0.0766, "step": 107330 }, { "epoch": 3.1744247944638317, "grad_norm": 0.574205756187439, "learning_rate": 2.924015998112657e-06, "loss": 0.0623, "step": 107340 }, { "epoch": 3.1747205299580057, "grad_norm": 0.5723435282707214, "learning_rate": 2.9238893081937175e-06, "loss": 0.0905, "step": 107350 }, { "epoch": 3.1750162654521796, "grad_norm": 1.0546560287475586, "learning_rate": 2.9237626182747787e-06, "loss": 0.0853, "step": 107360 }, { "epoch": 3.1753120009463536, "grad_norm": 0.8734220862388611, "learning_rate": 2.923635928355839e-06, "loss": 0.0781, "step": 107370 }, { "epoch": 3.1756077364405275, "grad_norm": 0.6216651797294617, "learning_rate": 2.9235092384369e-06, "loss": 0.0682, "step": 107380 }, { "epoch": 3.1759034719347015, "grad_norm": 0.712223470211029, "learning_rate": 2.9233825485179603e-06, "loss": 0.0731, "step": 107390 }, { "epoch": 3.1761992074288754, "grad_norm": 0.6831107139587402, "learning_rate": 2.923255858599021e-06, "loss": 0.0648, "step": 107400 }, { "epoch": 3.17649494292305, "grad_norm": 1.0779613256454468, "learning_rate": 2.9231291686800814e-06, "loss": 0.0807, "step": 107410 }, { "epoch": 3.1767906784172237, "grad_norm": 0.7608692646026611, "learning_rate": 2.9230024787611422e-06, "loss": 0.0741, "step": 107420 }, { "epoch": 3.1770864139113977, "grad_norm": 0.8909640908241272, "learning_rate": 2.9228757888422026e-06, "loss": 0.0774, "step": 107430 }, { "epoch": 3.1773821494055716, "grad_norm": 0.4904158413410187, "learning_rate": 2.922749098923264e-06, "loss": 0.0591, "step": 107440 }, { "epoch": 3.1776778848997456, "grad_norm": 0.9070190191268921, "learning_rate": 2.922622409004324e-06, "loss": 0.0827, "step": 107450 }, { "epoch": 3.1779736203939195, "grad_norm": 0.8473617434501648, "learning_rate": 2.922495719085385e-06, "loss": 0.083, "step": 107460 }, { "epoch": 3.178269355888094, "grad_norm": 0.6815613508224487, "learning_rate": 2.9223690291664453e-06, "loss": 0.0907, "step": 107470 }, { "epoch": 3.178565091382268, "grad_norm": 0.6253186464309692, "learning_rate": 2.922242339247506e-06, "loss": 0.0778, "step": 107480 }, { "epoch": 3.178860826876442, "grad_norm": 1.1971888542175293, "learning_rate": 2.9221156493285665e-06, "loss": 0.0622, "step": 107490 }, { "epoch": 3.1791565623706157, "grad_norm": 1.0363925695419312, "learning_rate": 2.9219889594096273e-06, "loss": 0.0812, "step": 107500 }, { "epoch": 3.1794522978647897, "grad_norm": 0.756106972694397, "learning_rate": 2.9218622694906876e-06, "loss": 0.0779, "step": 107510 }, { "epoch": 3.1797480333589636, "grad_norm": 0.47278228402137756, "learning_rate": 2.921735579571749e-06, "loss": 0.0558, "step": 107520 }, { "epoch": 3.1800437688531376, "grad_norm": 0.8071403503417969, "learning_rate": 2.921608889652809e-06, "loss": 0.0639, "step": 107530 }, { "epoch": 3.180339504347312, "grad_norm": 0.6352477073669434, "learning_rate": 2.92148219973387e-06, "loss": 0.0633, "step": 107540 }, { "epoch": 3.180635239841486, "grad_norm": 0.8284496068954468, "learning_rate": 2.9213555098149304e-06, "loss": 0.078, "step": 107550 }, { "epoch": 3.18093097533566, "grad_norm": 0.8481472134590149, "learning_rate": 2.921228819895991e-06, "loss": 0.1, "step": 107560 }, { "epoch": 3.1812267108298338, "grad_norm": 1.4089956283569336, "learning_rate": 2.9211021299770515e-06, "loss": 0.0657, "step": 107570 }, { "epoch": 3.1815224463240077, "grad_norm": 0.80655837059021, "learning_rate": 2.9209754400581123e-06, "loss": 0.067, "step": 107580 }, { "epoch": 3.1818181818181817, "grad_norm": 0.6169736981391907, "learning_rate": 2.9208487501391727e-06, "loss": 0.0591, "step": 107590 }, { "epoch": 3.182113917312356, "grad_norm": 1.2780660390853882, "learning_rate": 2.920722060220234e-06, "loss": 0.0934, "step": 107600 }, { "epoch": 3.18240965280653, "grad_norm": 0.7116259336471558, "learning_rate": 2.9205953703012943e-06, "loss": 0.0798, "step": 107610 }, { "epoch": 3.182705388300704, "grad_norm": 0.8218358159065247, "learning_rate": 2.920468680382355e-06, "loss": 0.0695, "step": 107620 }, { "epoch": 3.183001123794878, "grad_norm": 0.6175109148025513, "learning_rate": 2.9203419904634154e-06, "loss": 0.065, "step": 107630 }, { "epoch": 3.183296859289052, "grad_norm": 1.2117992639541626, "learning_rate": 2.920215300544476e-06, "loss": 0.069, "step": 107640 }, { "epoch": 3.1835925947832258, "grad_norm": 0.8298571705818176, "learning_rate": 2.9200886106255366e-06, "loss": 0.082, "step": 107650 }, { "epoch": 3.1838883302773997, "grad_norm": 0.7851061820983887, "learning_rate": 2.9199619207065974e-06, "loss": 0.0766, "step": 107660 }, { "epoch": 3.184184065771574, "grad_norm": 1.1745635271072388, "learning_rate": 2.9198352307876577e-06, "loss": 0.0867, "step": 107670 }, { "epoch": 3.184479801265748, "grad_norm": 1.0996428728103638, "learning_rate": 2.919708540868719e-06, "loss": 0.0813, "step": 107680 }, { "epoch": 3.184775536759922, "grad_norm": 0.6985654830932617, "learning_rate": 2.9195818509497793e-06, "loss": 0.0567, "step": 107690 }, { "epoch": 3.185071272254096, "grad_norm": 0.9029271006584167, "learning_rate": 2.9194551610308397e-06, "loss": 0.0747, "step": 107700 }, { "epoch": 3.18536700774827, "grad_norm": 0.4788949191570282, "learning_rate": 2.9193284711119005e-06, "loss": 0.0819, "step": 107710 }, { "epoch": 3.185662743242444, "grad_norm": 0.7172756195068359, "learning_rate": 2.919201781192961e-06, "loss": 0.0934, "step": 107720 }, { "epoch": 3.1859584787366177, "grad_norm": 1.0150728225708008, "learning_rate": 2.9190750912740216e-06, "loss": 0.0638, "step": 107730 }, { "epoch": 3.186254214230792, "grad_norm": 0.704098105430603, "learning_rate": 2.918948401355082e-06, "loss": 0.0745, "step": 107740 }, { "epoch": 3.186549949724966, "grad_norm": 0.6491268277168274, "learning_rate": 2.9188217114361428e-06, "loss": 0.0706, "step": 107750 }, { "epoch": 3.18684568521914, "grad_norm": 0.6809276938438416, "learning_rate": 2.9186950215172036e-06, "loss": 0.0868, "step": 107760 }, { "epoch": 3.187141420713314, "grad_norm": 0.9673191905021667, "learning_rate": 2.9185683315982643e-06, "loss": 0.0732, "step": 107770 }, { "epoch": 3.187437156207488, "grad_norm": 0.9577310681343079, "learning_rate": 2.9184416416793247e-06, "loss": 0.0706, "step": 107780 }, { "epoch": 3.187732891701662, "grad_norm": 0.8927040100097656, "learning_rate": 2.9183149517603855e-06, "loss": 0.0509, "step": 107790 }, { "epoch": 3.1880286271958362, "grad_norm": 0.9327614903450012, "learning_rate": 2.918188261841446e-06, "loss": 0.0843, "step": 107800 }, { "epoch": 3.18832436269001, "grad_norm": 0.8014667630195618, "learning_rate": 2.9180615719225067e-06, "loss": 0.0772, "step": 107810 }, { "epoch": 3.188620098184184, "grad_norm": 0.9678772687911987, "learning_rate": 2.917934882003567e-06, "loss": 0.0764, "step": 107820 }, { "epoch": 3.188915833678358, "grad_norm": 1.2390871047973633, "learning_rate": 2.917808192084628e-06, "loss": 0.0784, "step": 107830 }, { "epoch": 3.189211569172532, "grad_norm": 0.6582068800926208, "learning_rate": 2.9176815021656886e-06, "loss": 0.0595, "step": 107840 }, { "epoch": 3.189507304666706, "grad_norm": 0.7063310146331787, "learning_rate": 2.9175548122467494e-06, "loss": 0.0671, "step": 107850 }, { "epoch": 3.1898030401608803, "grad_norm": 0.9258794188499451, "learning_rate": 2.9174281223278098e-06, "loss": 0.0874, "step": 107860 }, { "epoch": 3.1900987756550543, "grad_norm": 0.6852195262908936, "learning_rate": 2.9173014324088705e-06, "loss": 0.0714, "step": 107870 }, { "epoch": 3.1903945111492282, "grad_norm": 1.0395331382751465, "learning_rate": 2.917174742489931e-06, "loss": 0.0622, "step": 107880 }, { "epoch": 3.190690246643402, "grad_norm": 1.3916391134262085, "learning_rate": 2.9170480525709917e-06, "loss": 0.0518, "step": 107890 }, { "epoch": 3.190985982137576, "grad_norm": 0.8444160223007202, "learning_rate": 2.916921362652052e-06, "loss": 0.0844, "step": 107900 }, { "epoch": 3.19128171763175, "grad_norm": 1.1750503778457642, "learning_rate": 2.916794672733113e-06, "loss": 0.0704, "step": 107910 }, { "epoch": 3.191577453125924, "grad_norm": 0.6238377094268799, "learning_rate": 2.9166679828141736e-06, "loss": 0.0793, "step": 107920 }, { "epoch": 3.1918731886200984, "grad_norm": 0.40892675518989563, "learning_rate": 2.9165412928952344e-06, "loss": 0.0604, "step": 107930 }, { "epoch": 3.1921689241142723, "grad_norm": 1.8994762897491455, "learning_rate": 2.916414602976295e-06, "loss": 0.0676, "step": 107940 }, { "epoch": 3.1924646596084463, "grad_norm": 0.9945460557937622, "learning_rate": 2.9162879130573556e-06, "loss": 0.0797, "step": 107950 }, { "epoch": 3.19276039510262, "grad_norm": 0.9823383688926697, "learning_rate": 2.916161223138416e-06, "loss": 0.0798, "step": 107960 }, { "epoch": 3.193056130596794, "grad_norm": 1.1542670726776123, "learning_rate": 2.9160345332194767e-06, "loss": 0.0873, "step": 107970 }, { "epoch": 3.193351866090968, "grad_norm": 0.5808501243591309, "learning_rate": 2.915907843300537e-06, "loss": 0.0608, "step": 107980 }, { "epoch": 3.193647601585142, "grad_norm": 0.5650895237922668, "learning_rate": 2.915781153381598e-06, "loss": 0.0635, "step": 107990 }, { "epoch": 3.1939433370793164, "grad_norm": 0.6027506589889526, "learning_rate": 2.9156544634626587e-06, "loss": 0.085, "step": 108000 }, { "epoch": 3.1942390725734904, "grad_norm": 0.7455739974975586, "learning_rate": 2.9155277735437195e-06, "loss": 0.0637, "step": 108010 }, { "epoch": 3.1945348080676643, "grad_norm": 0.6908165216445923, "learning_rate": 2.91540108362478e-06, "loss": 0.063, "step": 108020 }, { "epoch": 3.1948305435618383, "grad_norm": 0.7409669160842896, "learning_rate": 2.9152743937058406e-06, "loss": 0.0676, "step": 108030 }, { "epoch": 3.195126279056012, "grad_norm": 0.6681167483329773, "learning_rate": 2.915147703786901e-06, "loss": 0.0646, "step": 108040 }, { "epoch": 3.195422014550186, "grad_norm": 0.8907991647720337, "learning_rate": 2.915021013867962e-06, "loss": 0.0781, "step": 108050 }, { "epoch": 3.19571775004436, "grad_norm": 0.5971043109893799, "learning_rate": 2.914894323949022e-06, "loss": 0.0656, "step": 108060 }, { "epoch": 3.1960134855385345, "grad_norm": 0.7613540291786194, "learning_rate": 2.914767634030083e-06, "loss": 0.0796, "step": 108070 }, { "epoch": 3.1963092210327084, "grad_norm": 0.8931037783622742, "learning_rate": 2.9146409441111437e-06, "loss": 0.0693, "step": 108080 }, { "epoch": 3.1966049565268824, "grad_norm": 0.6956355571746826, "learning_rate": 2.9145142541922045e-06, "loss": 0.0521, "step": 108090 }, { "epoch": 3.1969006920210563, "grad_norm": 0.9842389225959778, "learning_rate": 2.914387564273265e-06, "loss": 0.0905, "step": 108100 }, { "epoch": 3.1971964275152303, "grad_norm": 0.5377153158187866, "learning_rate": 2.9142608743543253e-06, "loss": 0.0785, "step": 108110 }, { "epoch": 3.197492163009404, "grad_norm": 0.6861253976821899, "learning_rate": 2.914134184435386e-06, "loss": 0.0738, "step": 108120 }, { "epoch": 3.1977878985035786, "grad_norm": 0.8005958199501038, "learning_rate": 2.9140074945164464e-06, "loss": 0.0663, "step": 108130 }, { "epoch": 3.1980836339977525, "grad_norm": 0.8304660320281982, "learning_rate": 2.913880804597507e-06, "loss": 0.0725, "step": 108140 }, { "epoch": 3.1983793694919265, "grad_norm": 1.4194644689559937, "learning_rate": 2.9137541146785676e-06, "loss": 0.0733, "step": 108150 }, { "epoch": 3.1986751049861004, "grad_norm": 0.5494266152381897, "learning_rate": 2.9136274247596288e-06, "loss": 0.0642, "step": 108160 }, { "epoch": 3.1989708404802744, "grad_norm": 0.8799721002578735, "learning_rate": 2.913500734840689e-06, "loss": 0.0748, "step": 108170 }, { "epoch": 3.1992665759744483, "grad_norm": 0.554595947265625, "learning_rate": 2.91337404492175e-06, "loss": 0.0625, "step": 108180 }, { "epoch": 3.1995623114686227, "grad_norm": 0.597353994846344, "learning_rate": 2.9132473550028103e-06, "loss": 0.055, "step": 108190 }, { "epoch": 3.1998580469627966, "grad_norm": 0.8241556286811829, "learning_rate": 2.913120665083871e-06, "loss": 0.0755, "step": 108200 }, { "epoch": 3.2001537824569706, "grad_norm": 1.0135984420776367, "learning_rate": 2.9129939751649315e-06, "loss": 0.0722, "step": 108210 }, { "epoch": 3.2004495179511445, "grad_norm": 1.2220053672790527, "learning_rate": 2.9128672852459922e-06, "loss": 0.0802, "step": 108220 }, { "epoch": 3.2007452534453185, "grad_norm": 1.0850530862808228, "learning_rate": 2.9127405953270526e-06, "loss": 0.0724, "step": 108230 }, { "epoch": 3.2010409889394924, "grad_norm": 0.6298733949661255, "learning_rate": 2.912613905408114e-06, "loss": 0.0615, "step": 108240 }, { "epoch": 3.2013367244336663, "grad_norm": 0.7003649473190308, "learning_rate": 2.912487215489174e-06, "loss": 0.0837, "step": 108250 }, { "epoch": 3.2016324599278407, "grad_norm": 0.8062260150909424, "learning_rate": 2.912360525570235e-06, "loss": 0.0825, "step": 108260 }, { "epoch": 3.2019281954220147, "grad_norm": 0.6572579741477966, "learning_rate": 2.9122338356512953e-06, "loss": 0.0786, "step": 108270 }, { "epoch": 3.2022239309161886, "grad_norm": 0.7216363549232483, "learning_rate": 2.912107145732356e-06, "loss": 0.0649, "step": 108280 }, { "epoch": 3.2025196664103626, "grad_norm": 0.625828206539154, "learning_rate": 2.9119804558134165e-06, "loss": 0.0658, "step": 108290 }, { "epoch": 3.2028154019045365, "grad_norm": 0.6960595846176147, "learning_rate": 2.9118537658944773e-06, "loss": 0.074, "step": 108300 }, { "epoch": 3.2031111373987105, "grad_norm": 1.4493414163589478, "learning_rate": 2.9117270759755377e-06, "loss": 0.0832, "step": 108310 }, { "epoch": 3.2034068728928844, "grad_norm": 0.7179741859436035, "learning_rate": 2.911600386056599e-06, "loss": 0.0698, "step": 108320 }, { "epoch": 3.203702608387059, "grad_norm": 0.6576471328735352, "learning_rate": 2.9114736961376592e-06, "loss": 0.0774, "step": 108330 }, { "epoch": 3.2039983438812327, "grad_norm": 0.41325658559799194, "learning_rate": 2.91134700621872e-06, "loss": 0.0716, "step": 108340 }, { "epoch": 3.2042940793754067, "grad_norm": 1.1156100034713745, "learning_rate": 2.9112203162997804e-06, "loss": 0.0897, "step": 108350 }, { "epoch": 3.2045898148695806, "grad_norm": 0.49001505970954895, "learning_rate": 2.911093626380841e-06, "loss": 0.0713, "step": 108360 }, { "epoch": 3.2048855503637546, "grad_norm": 0.45465949177742004, "learning_rate": 2.9109669364619015e-06, "loss": 0.0864, "step": 108370 }, { "epoch": 3.2051812858579285, "grad_norm": 0.7695748805999756, "learning_rate": 2.9108402465429623e-06, "loss": 0.0743, "step": 108380 }, { "epoch": 3.205477021352103, "grad_norm": 0.863196611404419, "learning_rate": 2.9107135566240227e-06, "loss": 0.0637, "step": 108390 }, { "epoch": 3.205772756846277, "grad_norm": 0.7496755719184875, "learning_rate": 2.910586866705084e-06, "loss": 0.0737, "step": 108400 }, { "epoch": 3.2060684923404508, "grad_norm": 0.784119188785553, "learning_rate": 2.9104601767861443e-06, "loss": 0.0719, "step": 108410 }, { "epoch": 3.2063642278346247, "grad_norm": 1.0718945264816284, "learning_rate": 2.910333486867205e-06, "loss": 0.0648, "step": 108420 }, { "epoch": 3.2066599633287987, "grad_norm": 0.6634100079536438, "learning_rate": 2.9102067969482654e-06, "loss": 0.0762, "step": 108430 }, { "epoch": 3.2069556988229726, "grad_norm": 0.7735612392425537, "learning_rate": 2.9100801070293262e-06, "loss": 0.0551, "step": 108440 }, { "epoch": 3.2072514343171465, "grad_norm": 0.8411725759506226, "learning_rate": 2.9099534171103866e-06, "loss": 0.0769, "step": 108450 }, { "epoch": 3.207547169811321, "grad_norm": 0.771778404712677, "learning_rate": 2.9098267271914474e-06, "loss": 0.0764, "step": 108460 }, { "epoch": 3.207842905305495, "grad_norm": 0.43113279342651367, "learning_rate": 2.9097000372725077e-06, "loss": 0.0652, "step": 108470 }, { "epoch": 3.208138640799669, "grad_norm": 0.8209302425384521, "learning_rate": 2.909573347353569e-06, "loss": 0.0768, "step": 108480 }, { "epoch": 3.2084343762938428, "grad_norm": 1.0546596050262451, "learning_rate": 2.9094466574346293e-06, "loss": 0.0555, "step": 108490 }, { "epoch": 3.2087301117880167, "grad_norm": 1.7510340213775635, "learning_rate": 2.90931996751569e-06, "loss": 0.0805, "step": 108500 }, { "epoch": 3.2090258472821906, "grad_norm": 0.898333728313446, "learning_rate": 2.9091932775967505e-06, "loss": 0.0867, "step": 108510 }, { "epoch": 3.209321582776365, "grad_norm": 1.1405616998672485, "learning_rate": 2.909066587677811e-06, "loss": 0.0782, "step": 108520 }, { "epoch": 3.209617318270539, "grad_norm": 0.7224470376968384, "learning_rate": 2.9089398977588716e-06, "loss": 0.0656, "step": 108530 }, { "epoch": 3.209913053764713, "grad_norm": 0.7250182032585144, "learning_rate": 2.908813207839932e-06, "loss": 0.0572, "step": 108540 }, { "epoch": 3.210208789258887, "grad_norm": 0.46294480562210083, "learning_rate": 2.908686517920993e-06, "loss": 0.0678, "step": 108550 }, { "epoch": 3.210504524753061, "grad_norm": 1.1824194192886353, "learning_rate": 2.9085598280020536e-06, "loss": 0.0932, "step": 108560 }, { "epoch": 3.2108002602472347, "grad_norm": 1.1408988237380981, "learning_rate": 2.9084331380831144e-06, "loss": 0.0941, "step": 108570 }, { "epoch": 3.2110959957414087, "grad_norm": 0.8734371066093445, "learning_rate": 2.9083064481641747e-06, "loss": 0.088, "step": 108580 }, { "epoch": 3.211391731235583, "grad_norm": 0.7105814218521118, "learning_rate": 2.9081797582452355e-06, "loss": 0.0697, "step": 108590 }, { "epoch": 3.211687466729757, "grad_norm": 0.8658667206764221, "learning_rate": 2.908053068326296e-06, "loss": 0.0919, "step": 108600 }, { "epoch": 3.211983202223931, "grad_norm": 1.2038992643356323, "learning_rate": 2.9079263784073567e-06, "loss": 0.0942, "step": 108610 }, { "epoch": 3.212278937718105, "grad_norm": 0.9137391448020935, "learning_rate": 2.907799688488417e-06, "loss": 0.095, "step": 108620 }, { "epoch": 3.212574673212279, "grad_norm": 0.7913630604743958, "learning_rate": 2.907672998569478e-06, "loss": 0.0782, "step": 108630 }, { "epoch": 3.212870408706453, "grad_norm": 1.349617838859558, "learning_rate": 2.9075463086505386e-06, "loss": 0.0674, "step": 108640 }, { "epoch": 3.2131661442006267, "grad_norm": 0.8682730793952942, "learning_rate": 2.9074196187315994e-06, "loss": 0.0777, "step": 108650 }, { "epoch": 3.213461879694801, "grad_norm": 0.827229917049408, "learning_rate": 2.9072929288126598e-06, "loss": 0.0799, "step": 108660 }, { "epoch": 3.213757615188975, "grad_norm": 0.9921612739562988, "learning_rate": 2.9071662388937206e-06, "loss": 0.081, "step": 108670 }, { "epoch": 3.214053350683149, "grad_norm": 0.6623623967170715, "learning_rate": 2.907039548974781e-06, "loss": 0.0696, "step": 108680 }, { "epoch": 3.214349086177323, "grad_norm": 0.33628368377685547, "learning_rate": 2.9069128590558417e-06, "loss": 0.0714, "step": 108690 }, { "epoch": 3.214644821671497, "grad_norm": 0.6276277303695679, "learning_rate": 2.906786169136902e-06, "loss": 0.0642, "step": 108700 }, { "epoch": 3.214940557165671, "grad_norm": 0.7178930640220642, "learning_rate": 2.906659479217963e-06, "loss": 0.0755, "step": 108710 }, { "epoch": 3.2152362926598452, "grad_norm": 1.264050006866455, "learning_rate": 2.9065327892990237e-06, "loss": 0.0727, "step": 108720 }, { "epoch": 3.215532028154019, "grad_norm": 0.42434895038604736, "learning_rate": 2.9064060993800845e-06, "loss": 0.0608, "step": 108730 }, { "epoch": 3.215827763648193, "grad_norm": 0.9571108222007751, "learning_rate": 2.906279409461145e-06, "loss": 0.0621, "step": 108740 }, { "epoch": 3.216123499142367, "grad_norm": 0.9861356616020203, "learning_rate": 2.9061527195422056e-06, "loss": 0.0719, "step": 108750 }, { "epoch": 3.216419234636541, "grad_norm": 0.7077438235282898, "learning_rate": 2.906026029623266e-06, "loss": 0.0661, "step": 108760 }, { "epoch": 3.216714970130715, "grad_norm": 0.8796486258506775, "learning_rate": 2.9058993397043268e-06, "loss": 0.0778, "step": 108770 }, { "epoch": 3.2170107056248893, "grad_norm": 0.9526026844978333, "learning_rate": 2.905772649785387e-06, "loss": 0.0726, "step": 108780 }, { "epoch": 3.2173064411190633, "grad_norm": 0.8407782912254333, "learning_rate": 2.905645959866448e-06, "loss": 0.058, "step": 108790 }, { "epoch": 3.217602176613237, "grad_norm": 0.7025159597396851, "learning_rate": 2.9055192699475087e-06, "loss": 0.0719, "step": 108800 }, { "epoch": 3.217897912107411, "grad_norm": 0.6092053651809692, "learning_rate": 2.9053925800285695e-06, "loss": 0.0745, "step": 108810 }, { "epoch": 3.218193647601585, "grad_norm": 1.4255125522613525, "learning_rate": 2.90526589010963e-06, "loss": 0.0747, "step": 108820 }, { "epoch": 3.218489383095759, "grad_norm": 0.9126417636871338, "learning_rate": 2.9051392001906907e-06, "loss": 0.0553, "step": 108830 }, { "epoch": 3.218785118589933, "grad_norm": 0.9400326609611511, "learning_rate": 2.905012510271751e-06, "loss": 0.0617, "step": 108840 }, { "epoch": 3.2190808540841074, "grad_norm": 0.752928614616394, "learning_rate": 2.904885820352812e-06, "loss": 0.0747, "step": 108850 }, { "epoch": 3.2193765895782813, "grad_norm": 0.8749231696128845, "learning_rate": 2.904759130433872e-06, "loss": 0.0803, "step": 108860 }, { "epoch": 3.2196723250724553, "grad_norm": 0.6527756452560425, "learning_rate": 2.904632440514933e-06, "loss": 0.0755, "step": 108870 }, { "epoch": 3.219968060566629, "grad_norm": 0.6084804534912109, "learning_rate": 2.9045057505959938e-06, "loss": 0.0703, "step": 108880 }, { "epoch": 3.220263796060803, "grad_norm": 0.8993920087814331, "learning_rate": 2.9043790606770546e-06, "loss": 0.0579, "step": 108890 }, { "epoch": 3.220559531554977, "grad_norm": 0.990679144859314, "learning_rate": 2.904252370758115e-06, "loss": 0.0841, "step": 108900 }, { "epoch": 3.220855267049151, "grad_norm": 0.912682831287384, "learning_rate": 2.9041256808391757e-06, "loss": 0.0883, "step": 108910 }, { "epoch": 3.2211510025433254, "grad_norm": 1.033489465713501, "learning_rate": 2.903998990920236e-06, "loss": 0.0854, "step": 108920 }, { "epoch": 3.2214467380374994, "grad_norm": 0.8081075549125671, "learning_rate": 2.9038723010012964e-06, "loss": 0.077, "step": 108930 }, { "epoch": 3.2217424735316733, "grad_norm": 0.8282284140586853, "learning_rate": 2.9037456110823572e-06, "loss": 0.0593, "step": 108940 }, { "epoch": 3.2220382090258473, "grad_norm": 0.8752256631851196, "learning_rate": 2.9036189211634176e-06, "loss": 0.0684, "step": 108950 }, { "epoch": 3.222333944520021, "grad_norm": 0.7125632762908936, "learning_rate": 2.903492231244479e-06, "loss": 0.067, "step": 108960 }, { "epoch": 3.222629680014195, "grad_norm": 0.677827000617981, "learning_rate": 2.903365541325539e-06, "loss": 0.0678, "step": 108970 }, { "epoch": 3.222925415508369, "grad_norm": 1.5201009511947632, "learning_rate": 2.9032388514066e-06, "loss": 0.0761, "step": 108980 }, { "epoch": 3.2232211510025435, "grad_norm": 0.49098682403564453, "learning_rate": 2.9031121614876603e-06, "loss": 0.0713, "step": 108990 }, { "epoch": 3.2235168864967174, "grad_norm": 1.0028001070022583, "learning_rate": 2.902985471568721e-06, "loss": 0.081, "step": 109000 }, { "epoch": 3.2238126219908914, "grad_norm": 0.694001317024231, "learning_rate": 2.9028587816497815e-06, "loss": 0.0657, "step": 109010 }, { "epoch": 3.2241083574850653, "grad_norm": 0.7828106880187988, "learning_rate": 2.9027320917308423e-06, "loss": 0.0672, "step": 109020 }, { "epoch": 3.2244040929792392, "grad_norm": 0.9974297285079956, "learning_rate": 2.9026054018119026e-06, "loss": 0.0947, "step": 109030 }, { "epoch": 3.224699828473413, "grad_norm": 0.4232538342475891, "learning_rate": 2.902478711892964e-06, "loss": 0.0684, "step": 109040 }, { "epoch": 3.2249955639675876, "grad_norm": 1.0378262996673584, "learning_rate": 2.9023520219740242e-06, "loss": 0.0686, "step": 109050 }, { "epoch": 3.2252912994617615, "grad_norm": 0.7801640629768372, "learning_rate": 2.902225332055085e-06, "loss": 0.0791, "step": 109060 }, { "epoch": 3.2255870349559355, "grad_norm": 0.8858595490455627, "learning_rate": 2.9020986421361454e-06, "loss": 0.0665, "step": 109070 }, { "epoch": 3.2258827704501094, "grad_norm": 1.0145131349563599, "learning_rate": 2.901971952217206e-06, "loss": 0.0767, "step": 109080 }, { "epoch": 3.2261785059442833, "grad_norm": 0.2811455726623535, "learning_rate": 2.9018452622982665e-06, "loss": 0.0397, "step": 109090 }, { "epoch": 3.2264742414384573, "grad_norm": 0.7471125721931458, "learning_rate": 2.9017185723793273e-06, "loss": 0.0772, "step": 109100 }, { "epoch": 3.2267699769326317, "grad_norm": 0.9342729449272156, "learning_rate": 2.9015918824603877e-06, "loss": 0.0771, "step": 109110 }, { "epoch": 3.2270657124268056, "grad_norm": 0.78813236951828, "learning_rate": 2.901465192541449e-06, "loss": 0.0824, "step": 109120 }, { "epoch": 3.2273614479209796, "grad_norm": 1.3206850290298462, "learning_rate": 2.9013385026225093e-06, "loss": 0.0989, "step": 109130 }, { "epoch": 3.2276571834151535, "grad_norm": 1.0016039609909058, "learning_rate": 2.90121181270357e-06, "loss": 0.0703, "step": 109140 }, { "epoch": 3.2279529189093275, "grad_norm": 0.5337756276130676, "learning_rate": 2.9010851227846304e-06, "loss": 0.0784, "step": 109150 }, { "epoch": 3.2282486544035014, "grad_norm": 1.067179799079895, "learning_rate": 2.900958432865691e-06, "loss": 0.0732, "step": 109160 }, { "epoch": 3.2285443898976753, "grad_norm": 0.8468594551086426, "learning_rate": 2.9008317429467516e-06, "loss": 0.0813, "step": 109170 }, { "epoch": 3.2288401253918497, "grad_norm": 1.0695337057113647, "learning_rate": 2.9007050530278124e-06, "loss": 0.0722, "step": 109180 }, { "epoch": 3.2291358608860237, "grad_norm": 1.2465462684631348, "learning_rate": 2.9005783631088727e-06, "loss": 0.0651, "step": 109190 }, { "epoch": 3.2294315963801976, "grad_norm": 0.5870857834815979, "learning_rate": 2.900451673189934e-06, "loss": 0.0905, "step": 109200 }, { "epoch": 3.2297273318743716, "grad_norm": 0.7249016165733337, "learning_rate": 2.9003249832709943e-06, "loss": 0.0764, "step": 109210 }, { "epoch": 3.2300230673685455, "grad_norm": 0.3058723509311676, "learning_rate": 2.900198293352055e-06, "loss": 0.0565, "step": 109220 }, { "epoch": 3.2303188028627194, "grad_norm": 0.7604644894599915, "learning_rate": 2.9000716034331155e-06, "loss": 0.077, "step": 109230 }, { "epoch": 3.2306145383568934, "grad_norm": 0.4400869309902191, "learning_rate": 2.8999449135141763e-06, "loss": 0.0532, "step": 109240 }, { "epoch": 3.2309102738510678, "grad_norm": 0.7011749148368835, "learning_rate": 2.8998182235952366e-06, "loss": 0.073, "step": 109250 }, { "epoch": 3.2312060093452417, "grad_norm": 0.7566825747489929, "learning_rate": 2.8996915336762974e-06, "loss": 0.0788, "step": 109260 }, { "epoch": 3.2315017448394157, "grad_norm": 1.2130374908447266, "learning_rate": 2.8995648437573578e-06, "loss": 0.0901, "step": 109270 }, { "epoch": 3.2317974803335896, "grad_norm": 1.1343616247177124, "learning_rate": 2.899438153838419e-06, "loss": 0.0653, "step": 109280 }, { "epoch": 3.2320932158277635, "grad_norm": 0.4238317906856537, "learning_rate": 2.8993114639194794e-06, "loss": 0.0599, "step": 109290 }, { "epoch": 3.2323889513219375, "grad_norm": 0.6754606366157532, "learning_rate": 2.89918477400054e-06, "loss": 0.0702, "step": 109300 }, { "epoch": 3.232684686816112, "grad_norm": 0.8432955145835876, "learning_rate": 2.8990580840816005e-06, "loss": 0.0821, "step": 109310 }, { "epoch": 3.232980422310286, "grad_norm": 0.6908577084541321, "learning_rate": 2.8989313941626613e-06, "loss": 0.0801, "step": 109320 }, { "epoch": 3.2332761578044598, "grad_norm": 0.670974850654602, "learning_rate": 2.8988047042437217e-06, "loss": 0.0602, "step": 109330 }, { "epoch": 3.2335718932986337, "grad_norm": 1.2088459730148315, "learning_rate": 2.898678014324782e-06, "loss": 0.0646, "step": 109340 }, { "epoch": 3.2338676287928076, "grad_norm": 0.8135057091712952, "learning_rate": 2.898551324405843e-06, "loss": 0.091, "step": 109350 }, { "epoch": 3.2341633642869816, "grad_norm": 0.9857777953147888, "learning_rate": 2.898424634486903e-06, "loss": 0.0845, "step": 109360 }, { "epoch": 3.2344590997811555, "grad_norm": 1.235856533050537, "learning_rate": 2.8982979445679644e-06, "loss": 0.0656, "step": 109370 }, { "epoch": 3.23475483527533, "grad_norm": 0.8290674090385437, "learning_rate": 2.8981712546490248e-06, "loss": 0.0795, "step": 109380 }, { "epoch": 3.235050570769504, "grad_norm": 1.1825027465820312, "learning_rate": 2.8980445647300856e-06, "loss": 0.0641, "step": 109390 }, { "epoch": 3.235346306263678, "grad_norm": 1.3901640176773071, "learning_rate": 2.897917874811146e-06, "loss": 0.0927, "step": 109400 }, { "epoch": 3.2356420417578518, "grad_norm": 0.6192564964294434, "learning_rate": 2.8977911848922067e-06, "loss": 0.0812, "step": 109410 }, { "epoch": 3.2359377772520257, "grad_norm": 1.4360026121139526, "learning_rate": 2.897664494973267e-06, "loss": 0.0744, "step": 109420 }, { "epoch": 3.2362335127461996, "grad_norm": 1.4812008142471313, "learning_rate": 2.897537805054328e-06, "loss": 0.0744, "step": 109430 }, { "epoch": 3.236529248240374, "grad_norm": 1.0545626878738403, "learning_rate": 2.8974111151353882e-06, "loss": 0.079, "step": 109440 }, { "epoch": 3.236824983734548, "grad_norm": 0.8401098251342773, "learning_rate": 2.8972844252164494e-06, "loss": 0.0876, "step": 109450 }, { "epoch": 3.237120719228722, "grad_norm": 0.727469801902771, "learning_rate": 2.89715773529751e-06, "loss": 0.0781, "step": 109460 }, { "epoch": 3.237416454722896, "grad_norm": 0.5379912257194519, "learning_rate": 2.8970310453785706e-06, "loss": 0.0844, "step": 109470 }, { "epoch": 3.23771219021707, "grad_norm": 1.004408597946167, "learning_rate": 2.896904355459631e-06, "loss": 0.0679, "step": 109480 }, { "epoch": 3.2380079257112437, "grad_norm": 0.8481292724609375, "learning_rate": 2.8967776655406918e-06, "loss": 0.0754, "step": 109490 }, { "epoch": 3.2383036612054177, "grad_norm": 0.8317965269088745, "learning_rate": 2.896650975621752e-06, "loss": 0.0846, "step": 109500 }, { "epoch": 3.238599396699592, "grad_norm": 0.5796132683753967, "learning_rate": 2.896524285702813e-06, "loss": 0.074, "step": 109510 }, { "epoch": 3.238895132193766, "grad_norm": 0.5150145888328552, "learning_rate": 2.8963975957838733e-06, "loss": 0.0803, "step": 109520 }, { "epoch": 3.23919086768794, "grad_norm": 0.8590226769447327, "learning_rate": 2.8962709058649345e-06, "loss": 0.07, "step": 109530 }, { "epoch": 3.239486603182114, "grad_norm": 0.465420126914978, "learning_rate": 2.896144215945995e-06, "loss": 0.0676, "step": 109540 }, { "epoch": 3.239782338676288, "grad_norm": 1.0186264514923096, "learning_rate": 2.8960175260270556e-06, "loss": 0.0955, "step": 109550 }, { "epoch": 3.240078074170462, "grad_norm": 0.8117030262947083, "learning_rate": 2.895890836108116e-06, "loss": 0.0851, "step": 109560 }, { "epoch": 3.2403738096646357, "grad_norm": 0.9758462309837341, "learning_rate": 2.895764146189177e-06, "loss": 0.0747, "step": 109570 }, { "epoch": 3.24066954515881, "grad_norm": 0.7121732831001282, "learning_rate": 2.895637456270237e-06, "loss": 0.0497, "step": 109580 }, { "epoch": 3.240965280652984, "grad_norm": 0.6787912249565125, "learning_rate": 2.895510766351298e-06, "loss": 0.0603, "step": 109590 }, { "epoch": 3.241261016147158, "grad_norm": 1.4531409740447998, "learning_rate": 2.8953840764323583e-06, "loss": 0.0735, "step": 109600 }, { "epoch": 3.241556751641332, "grad_norm": 0.6849517822265625, "learning_rate": 2.8952573865134195e-06, "loss": 0.0798, "step": 109610 }, { "epoch": 3.241852487135506, "grad_norm": 0.9160655736923218, "learning_rate": 2.89513069659448e-06, "loss": 0.0674, "step": 109620 }, { "epoch": 3.24214822262968, "grad_norm": 0.6374645233154297, "learning_rate": 2.8950040066755407e-06, "loss": 0.0563, "step": 109630 }, { "epoch": 3.242443958123854, "grad_norm": 1.3310773372650146, "learning_rate": 2.894877316756601e-06, "loss": 0.0688, "step": 109640 }, { "epoch": 3.242739693618028, "grad_norm": 0.9990587830543518, "learning_rate": 2.894750626837662e-06, "loss": 0.0818, "step": 109650 }, { "epoch": 3.243035429112202, "grad_norm": 0.65895676612854, "learning_rate": 2.8946239369187222e-06, "loss": 0.0803, "step": 109660 }, { "epoch": 3.243331164606376, "grad_norm": 0.6083670258522034, "learning_rate": 2.894497246999783e-06, "loss": 0.0752, "step": 109670 }, { "epoch": 3.24362690010055, "grad_norm": 0.5510844588279724, "learning_rate": 2.8943705570808434e-06, "loss": 0.0783, "step": 109680 }, { "epoch": 3.243922635594724, "grad_norm": 0.9143244624137878, "learning_rate": 2.8942438671619046e-06, "loss": 0.0638, "step": 109690 }, { "epoch": 3.2442183710888983, "grad_norm": 1.0049259662628174, "learning_rate": 2.894117177242965e-06, "loss": 0.0654, "step": 109700 }, { "epoch": 3.2445141065830723, "grad_norm": 0.8417786359786987, "learning_rate": 2.8939904873240257e-06, "loss": 0.0706, "step": 109710 }, { "epoch": 3.244809842077246, "grad_norm": 1.0771291255950928, "learning_rate": 2.893863797405086e-06, "loss": 0.0674, "step": 109720 }, { "epoch": 3.24510557757142, "grad_norm": 0.674187183380127, "learning_rate": 2.893737107486147e-06, "loss": 0.0767, "step": 109730 }, { "epoch": 3.245401313065594, "grad_norm": 0.5887393951416016, "learning_rate": 2.8936104175672073e-06, "loss": 0.0551, "step": 109740 }, { "epoch": 3.245697048559768, "grad_norm": 1.0220328569412231, "learning_rate": 2.893483727648268e-06, "loss": 0.072, "step": 109750 }, { "epoch": 3.245992784053942, "grad_norm": 0.6435105204582214, "learning_rate": 2.8933570377293284e-06, "loss": 0.0739, "step": 109760 }, { "epoch": 3.2462885195481164, "grad_norm": 0.8971590995788574, "learning_rate": 2.893230347810389e-06, "loss": 0.0859, "step": 109770 }, { "epoch": 3.2465842550422903, "grad_norm": 0.7633353471755981, "learning_rate": 2.89310365789145e-06, "loss": 0.0616, "step": 109780 }, { "epoch": 3.2468799905364643, "grad_norm": 1.1057113409042358, "learning_rate": 2.8929769679725104e-06, "loss": 0.0536, "step": 109790 }, { "epoch": 3.247175726030638, "grad_norm": 0.8921508193016052, "learning_rate": 2.892850278053571e-06, "loss": 0.077, "step": 109800 }, { "epoch": 3.247471461524812, "grad_norm": 0.8769564628601074, "learning_rate": 2.8927235881346315e-06, "loss": 0.0662, "step": 109810 }, { "epoch": 3.247767197018986, "grad_norm": 0.5941265225410461, "learning_rate": 2.8925968982156923e-06, "loss": 0.0818, "step": 109820 }, { "epoch": 3.24806293251316, "grad_norm": 0.9117015600204468, "learning_rate": 2.8924702082967527e-06, "loss": 0.0718, "step": 109830 }, { "epoch": 3.2483586680073344, "grad_norm": 0.8622479438781738, "learning_rate": 2.8923435183778135e-06, "loss": 0.0653, "step": 109840 }, { "epoch": 3.2486544035015084, "grad_norm": 1.0327658653259277, "learning_rate": 2.8922168284588743e-06, "loss": 0.0811, "step": 109850 }, { "epoch": 3.2489501389956823, "grad_norm": 1.0165541172027588, "learning_rate": 2.892090138539935e-06, "loss": 0.0924, "step": 109860 }, { "epoch": 3.2492458744898562, "grad_norm": 0.8875517845153809, "learning_rate": 2.8919634486209954e-06, "loss": 0.0759, "step": 109870 }, { "epoch": 3.24954160998403, "grad_norm": 0.8570793271064758, "learning_rate": 2.891836758702056e-06, "loss": 0.0763, "step": 109880 }, { "epoch": 3.249837345478204, "grad_norm": 0.8583486080169678, "learning_rate": 2.8917100687831166e-06, "loss": 0.064, "step": 109890 }, { "epoch": 3.250133080972378, "grad_norm": 1.0080242156982422, "learning_rate": 2.8915833788641774e-06, "loss": 0.0769, "step": 109900 }, { "epoch": 3.2504288164665525, "grad_norm": 0.6364027261734009, "learning_rate": 2.8914566889452377e-06, "loss": 0.0662, "step": 109910 }, { "epoch": 3.2507245519607264, "grad_norm": 0.6322718262672424, "learning_rate": 2.8913299990262985e-06, "loss": 0.0742, "step": 109920 }, { "epoch": 3.2510202874549003, "grad_norm": 0.7715861797332764, "learning_rate": 2.8912033091073593e-06, "loss": 0.0607, "step": 109930 }, { "epoch": 3.2513160229490743, "grad_norm": 0.682917058467865, "learning_rate": 2.89107661918842e-06, "loss": 0.0733, "step": 109940 }, { "epoch": 3.2516117584432482, "grad_norm": 0.9012771248817444, "learning_rate": 2.8909499292694805e-06, "loss": 0.0858, "step": 109950 }, { "epoch": 3.251907493937422, "grad_norm": 0.953538179397583, "learning_rate": 2.8908232393505412e-06, "loss": 0.0695, "step": 109960 }, { "epoch": 3.2522032294315966, "grad_norm": 1.505913257598877, "learning_rate": 2.8906965494316016e-06, "loss": 0.0834, "step": 109970 }, { "epoch": 3.2524989649257705, "grad_norm": 0.938776433467865, "learning_rate": 2.8905698595126624e-06, "loss": 0.0705, "step": 109980 }, { "epoch": 3.2527947004199445, "grad_norm": 0.3550853729248047, "learning_rate": 2.8904431695937228e-06, "loss": 0.0714, "step": 109990 }, { "epoch": 3.2530904359141184, "grad_norm": 0.8113697171211243, "learning_rate": 2.8903164796747836e-06, "loss": 0.0899, "step": 110000 }, { "epoch": 3.2533861714082923, "grad_norm": 0.7281051278114319, "learning_rate": 2.8901897897558443e-06, "loss": 0.08, "step": 110010 }, { "epoch": 3.2536819069024663, "grad_norm": 1.1495182514190674, "learning_rate": 2.890063099836905e-06, "loss": 0.0698, "step": 110020 }, { "epoch": 3.2539776423966407, "grad_norm": 0.6396492123603821, "learning_rate": 2.8899364099179655e-06, "loss": 0.0705, "step": 110030 }, { "epoch": 3.2542733778908146, "grad_norm": 0.6598595976829529, "learning_rate": 2.8898097199990263e-06, "loss": 0.0636, "step": 110040 }, { "epoch": 3.2545691133849886, "grad_norm": 0.5906664133071899, "learning_rate": 2.8896830300800867e-06, "loss": 0.0788, "step": 110050 }, { "epoch": 3.2548648488791625, "grad_norm": 1.2412099838256836, "learning_rate": 2.8895563401611474e-06, "loss": 0.1079, "step": 110060 }, { "epoch": 3.2551605843733364, "grad_norm": 0.611992597579956, "learning_rate": 2.889429650242208e-06, "loss": 0.0699, "step": 110070 }, { "epoch": 3.2554563198675104, "grad_norm": 0.6124412417411804, "learning_rate": 2.8893029603232686e-06, "loss": 0.0615, "step": 110080 }, { "epoch": 3.2557520553616843, "grad_norm": 0.900146484375, "learning_rate": 2.8891762704043294e-06, "loss": 0.0614, "step": 110090 }, { "epoch": 3.2560477908558587, "grad_norm": 0.7717861533164978, "learning_rate": 2.88904958048539e-06, "loss": 0.0818, "step": 110100 }, { "epoch": 3.2563435263500327, "grad_norm": 0.8932154774665833, "learning_rate": 2.8889228905664505e-06, "loss": 0.0803, "step": 110110 }, { "epoch": 3.2566392618442066, "grad_norm": 0.7286979556083679, "learning_rate": 2.8887962006475113e-06, "loss": 0.0721, "step": 110120 }, { "epoch": 3.2569349973383805, "grad_norm": 0.5607559084892273, "learning_rate": 2.8886695107285717e-06, "loss": 0.0561, "step": 110130 }, { "epoch": 3.2572307328325545, "grad_norm": 0.8184845447540283, "learning_rate": 2.8885428208096325e-06, "loss": 0.063, "step": 110140 }, { "epoch": 3.2575264683267284, "grad_norm": 1.128214716911316, "learning_rate": 2.888416130890693e-06, "loss": 0.0902, "step": 110150 }, { "epoch": 3.2578222038209024, "grad_norm": 0.9548044800758362, "learning_rate": 2.8882894409717536e-06, "loss": 0.0916, "step": 110160 }, { "epoch": 3.2581179393150768, "grad_norm": 1.1447193622589111, "learning_rate": 2.8881627510528144e-06, "loss": 0.0785, "step": 110170 }, { "epoch": 3.2584136748092507, "grad_norm": 0.896154522895813, "learning_rate": 2.888036061133875e-06, "loss": 0.07, "step": 110180 }, { "epoch": 3.2587094103034246, "grad_norm": 0.8330102562904358, "learning_rate": 2.8879093712149356e-06, "loss": 0.0643, "step": 110190 }, { "epoch": 3.2590051457975986, "grad_norm": 0.6715846657752991, "learning_rate": 2.887782681295996e-06, "loss": 0.0914, "step": 110200 }, { "epoch": 3.2593008812917725, "grad_norm": 0.8646750450134277, "learning_rate": 2.8876559913770567e-06, "loss": 0.0703, "step": 110210 }, { "epoch": 3.2595966167859465, "grad_norm": 0.915187656879425, "learning_rate": 2.887529301458117e-06, "loss": 0.0836, "step": 110220 }, { "epoch": 3.2598923522801204, "grad_norm": 1.019882321357727, "learning_rate": 2.887402611539178e-06, "loss": 0.0884, "step": 110230 }, { "epoch": 3.260188087774295, "grad_norm": 0.9719979763031006, "learning_rate": 2.8872759216202383e-06, "loss": 0.0783, "step": 110240 }, { "epoch": 3.2604838232684688, "grad_norm": 1.0917072296142578, "learning_rate": 2.8871492317012995e-06, "loss": 0.0724, "step": 110250 }, { "epoch": 3.2607795587626427, "grad_norm": 0.7258602380752563, "learning_rate": 2.88702254178236e-06, "loss": 0.0876, "step": 110260 }, { "epoch": 3.2610752942568166, "grad_norm": 0.8878863453865051, "learning_rate": 2.8868958518634206e-06, "loss": 0.0776, "step": 110270 }, { "epoch": 3.2613710297509906, "grad_norm": 0.8541404008865356, "learning_rate": 2.886769161944481e-06, "loss": 0.0728, "step": 110280 }, { "epoch": 3.261666765245165, "grad_norm": 0.7287957668304443, "learning_rate": 2.886642472025542e-06, "loss": 0.0601, "step": 110290 }, { "epoch": 3.261962500739339, "grad_norm": 0.832943856716156, "learning_rate": 2.886515782106602e-06, "loss": 0.0772, "step": 110300 }, { "epoch": 3.262258236233513, "grad_norm": 0.3971465528011322, "learning_rate": 2.886389092187663e-06, "loss": 0.0866, "step": 110310 }, { "epoch": 3.262553971727687, "grad_norm": 1.0295575857162476, "learning_rate": 2.8862624022687233e-06, "loss": 0.0856, "step": 110320 }, { "epoch": 3.2628497072218607, "grad_norm": 0.7938178181648254, "learning_rate": 2.8861357123497845e-06, "loss": 0.0723, "step": 110330 }, { "epoch": 3.2631454427160347, "grad_norm": 0.7213545441627502, "learning_rate": 2.886009022430845e-06, "loss": 0.0624, "step": 110340 }, { "epoch": 3.2634411782102086, "grad_norm": 1.1578730344772339, "learning_rate": 2.8858823325119057e-06, "loss": 0.0714, "step": 110350 }, { "epoch": 3.263736913704383, "grad_norm": 1.1360208988189697, "learning_rate": 2.885755642592966e-06, "loss": 0.0837, "step": 110360 }, { "epoch": 3.264032649198557, "grad_norm": 0.36992359161376953, "learning_rate": 2.885628952674027e-06, "loss": 0.0686, "step": 110370 }, { "epoch": 3.264328384692731, "grad_norm": 0.6658340096473694, "learning_rate": 2.885502262755087e-06, "loss": 0.0583, "step": 110380 }, { "epoch": 3.264624120186905, "grad_norm": 0.7891387939453125, "learning_rate": 2.885375572836148e-06, "loss": 0.0678, "step": 110390 }, { "epoch": 3.264919855681079, "grad_norm": 0.9004544019699097, "learning_rate": 2.8852488829172084e-06, "loss": 0.0939, "step": 110400 }, { "epoch": 3.2652155911752527, "grad_norm": 0.9290643334388733, "learning_rate": 2.8851221929982696e-06, "loss": 0.0654, "step": 110410 }, { "epoch": 3.2655113266694267, "grad_norm": 0.6310049295425415, "learning_rate": 2.88499550307933e-06, "loss": 0.0701, "step": 110420 }, { "epoch": 3.265807062163601, "grad_norm": 0.9535017013549805, "learning_rate": 2.8848688131603907e-06, "loss": 0.0774, "step": 110430 }, { "epoch": 3.266102797657775, "grad_norm": 0.817764163017273, "learning_rate": 2.884742123241451e-06, "loss": 0.0668, "step": 110440 }, { "epoch": 3.266398533151949, "grad_norm": 1.194424033164978, "learning_rate": 2.884615433322512e-06, "loss": 0.076, "step": 110450 }, { "epoch": 3.266694268646123, "grad_norm": 0.9397309422492981, "learning_rate": 2.8844887434035722e-06, "loss": 0.0733, "step": 110460 }, { "epoch": 3.266990004140297, "grad_norm": 0.7484422326087952, "learning_rate": 2.884362053484633e-06, "loss": 0.0826, "step": 110470 }, { "epoch": 3.2672857396344708, "grad_norm": 0.7893766164779663, "learning_rate": 2.8842353635656934e-06, "loss": 0.0696, "step": 110480 }, { "epoch": 3.2675814751286447, "grad_norm": 0.6857865452766418, "learning_rate": 2.8841086736467546e-06, "loss": 0.0476, "step": 110490 }, { "epoch": 3.267877210622819, "grad_norm": 1.1324878931045532, "learning_rate": 2.883981983727815e-06, "loss": 0.0954, "step": 110500 }, { "epoch": 3.268172946116993, "grad_norm": 0.6047303676605225, "learning_rate": 2.8838552938088758e-06, "loss": 0.0749, "step": 110510 }, { "epoch": 3.268468681611167, "grad_norm": 0.7365212440490723, "learning_rate": 2.883728603889936e-06, "loss": 0.0735, "step": 110520 }, { "epoch": 3.268764417105341, "grad_norm": 0.709122359752655, "learning_rate": 2.883601913970997e-06, "loss": 0.0563, "step": 110530 }, { "epoch": 3.269060152599515, "grad_norm": 1.1156178712844849, "learning_rate": 2.8834752240520573e-06, "loss": 0.0566, "step": 110540 }, { "epoch": 3.269355888093689, "grad_norm": 0.9373070001602173, "learning_rate": 2.883348534133118e-06, "loss": 0.0808, "step": 110550 }, { "epoch": 3.269651623587863, "grad_norm": 0.5611772537231445, "learning_rate": 2.8832218442141784e-06, "loss": 0.0742, "step": 110560 }, { "epoch": 3.269947359082037, "grad_norm": 1.3483632802963257, "learning_rate": 2.8830951542952397e-06, "loss": 0.0834, "step": 110570 }, { "epoch": 3.270243094576211, "grad_norm": 0.9392113089561462, "learning_rate": 2.8829684643763e-06, "loss": 0.0675, "step": 110580 }, { "epoch": 3.270538830070385, "grad_norm": 0.4903155565261841, "learning_rate": 2.8828417744573604e-06, "loss": 0.072, "step": 110590 }, { "epoch": 3.270834565564559, "grad_norm": 0.645616888999939, "learning_rate": 2.882715084538421e-06, "loss": 0.07, "step": 110600 }, { "epoch": 3.271130301058733, "grad_norm": 0.9087990522384644, "learning_rate": 2.8825883946194815e-06, "loss": 0.0826, "step": 110610 }, { "epoch": 3.2714260365529073, "grad_norm": 0.9957188963890076, "learning_rate": 2.8824617047005423e-06, "loss": 0.0839, "step": 110620 }, { "epoch": 3.2717217720470813, "grad_norm": 0.723739743232727, "learning_rate": 2.8823350147816027e-06, "loss": 0.0654, "step": 110630 }, { "epoch": 3.272017507541255, "grad_norm": 0.5958638191223145, "learning_rate": 2.8822083248626635e-06, "loss": 0.0511, "step": 110640 }, { "epoch": 3.272313243035429, "grad_norm": 1.2444629669189453, "learning_rate": 2.8820816349437243e-06, "loss": 0.0899, "step": 110650 }, { "epoch": 3.272608978529603, "grad_norm": 0.8356924057006836, "learning_rate": 2.881954945024785e-06, "loss": 0.0739, "step": 110660 }, { "epoch": 3.272904714023777, "grad_norm": 0.6299473643302917, "learning_rate": 2.8818282551058454e-06, "loss": 0.0726, "step": 110670 }, { "epoch": 3.273200449517951, "grad_norm": 0.9244523048400879, "learning_rate": 2.8817015651869062e-06, "loss": 0.0674, "step": 110680 }, { "epoch": 3.2734961850121254, "grad_norm": 0.7948064804077148, "learning_rate": 2.8815748752679666e-06, "loss": 0.0673, "step": 110690 }, { "epoch": 3.2737919205062993, "grad_norm": 0.9067016243934631, "learning_rate": 2.8814481853490274e-06, "loss": 0.0781, "step": 110700 }, { "epoch": 3.2740876560004732, "grad_norm": 1.0932400226593018, "learning_rate": 2.8813214954300877e-06, "loss": 0.065, "step": 110710 }, { "epoch": 3.274383391494647, "grad_norm": 0.5778804421424866, "learning_rate": 2.8811948055111485e-06, "loss": 0.0737, "step": 110720 }, { "epoch": 3.274679126988821, "grad_norm": 0.5767612457275391, "learning_rate": 2.8810681155922093e-06, "loss": 0.0627, "step": 110730 }, { "epoch": 3.274974862482995, "grad_norm": 1.0298250913619995, "learning_rate": 2.88094142567327e-06, "loss": 0.0649, "step": 110740 }, { "epoch": 3.275270597977169, "grad_norm": 0.9510816335678101, "learning_rate": 2.8808147357543305e-06, "loss": 0.069, "step": 110750 }, { "epoch": 3.2755663334713434, "grad_norm": 0.4743049740791321, "learning_rate": 2.8806880458353913e-06, "loss": 0.0726, "step": 110760 }, { "epoch": 3.2758620689655173, "grad_norm": 0.9997352361679077, "learning_rate": 2.8805613559164516e-06, "loss": 0.0705, "step": 110770 }, { "epoch": 3.2761578044596913, "grad_norm": 0.7973633408546448, "learning_rate": 2.8804346659975124e-06, "loss": 0.0725, "step": 110780 }, { "epoch": 3.2764535399538652, "grad_norm": 0.9224846363067627, "learning_rate": 2.880307976078573e-06, "loss": 0.0683, "step": 110790 }, { "epoch": 3.276749275448039, "grad_norm": 1.0608975887298584, "learning_rate": 2.8801812861596336e-06, "loss": 0.099, "step": 110800 }, { "epoch": 3.277045010942213, "grad_norm": 0.9709392189979553, "learning_rate": 2.8800545962406944e-06, "loss": 0.0721, "step": 110810 }, { "epoch": 3.277340746436387, "grad_norm": 1.5612690448760986, "learning_rate": 2.879927906321755e-06, "loss": 0.0752, "step": 110820 }, { "epoch": 3.2776364819305615, "grad_norm": 0.42586374282836914, "learning_rate": 2.8798012164028155e-06, "loss": 0.0663, "step": 110830 }, { "epoch": 3.2779322174247354, "grad_norm": 0.9934108257293701, "learning_rate": 2.8796745264838763e-06, "loss": 0.0666, "step": 110840 }, { "epoch": 3.2782279529189093, "grad_norm": 1.2468284368515015, "learning_rate": 2.8795478365649367e-06, "loss": 0.0788, "step": 110850 }, { "epoch": 3.2785236884130833, "grad_norm": 1.314916729927063, "learning_rate": 2.8794211466459975e-06, "loss": 0.0797, "step": 110860 }, { "epoch": 3.2788194239072572, "grad_norm": 0.5069453120231628, "learning_rate": 2.879294456727058e-06, "loss": 0.0802, "step": 110870 }, { "epoch": 3.279115159401431, "grad_norm": 0.6566920876502991, "learning_rate": 2.8791677668081186e-06, "loss": 0.0867, "step": 110880 }, { "epoch": 3.2794108948956056, "grad_norm": 1.5076038837432861, "learning_rate": 2.8790410768891794e-06, "loss": 0.0673, "step": 110890 }, { "epoch": 3.2797066303897795, "grad_norm": 0.8882617950439453, "learning_rate": 2.87891438697024e-06, "loss": 0.0827, "step": 110900 }, { "epoch": 3.2800023658839534, "grad_norm": 0.7686514854431152, "learning_rate": 2.8787876970513006e-06, "loss": 0.0691, "step": 110910 }, { "epoch": 3.2802981013781274, "grad_norm": 0.8482399582862854, "learning_rate": 2.8786610071323614e-06, "loss": 0.081, "step": 110920 }, { "epoch": 3.2805938368723013, "grad_norm": 1.4016854763031006, "learning_rate": 2.8785343172134217e-06, "loss": 0.096, "step": 110930 }, { "epoch": 3.2808895723664753, "grad_norm": 1.0749890804290771, "learning_rate": 2.8784076272944825e-06, "loss": 0.0583, "step": 110940 }, { "epoch": 3.2811853078606497, "grad_norm": 0.5900947451591492, "learning_rate": 2.878280937375543e-06, "loss": 0.068, "step": 110950 }, { "epoch": 3.2814810433548236, "grad_norm": 0.6795114278793335, "learning_rate": 2.8781542474566037e-06, "loss": 0.0724, "step": 110960 }, { "epoch": 3.2817767788489975, "grad_norm": 0.5924368500709534, "learning_rate": 2.8780275575376645e-06, "loss": 0.0647, "step": 110970 }, { "epoch": 3.2820725143431715, "grad_norm": 1.44317626953125, "learning_rate": 2.8779008676187253e-06, "loss": 0.0872, "step": 110980 }, { "epoch": 3.2823682498373454, "grad_norm": 0.8359047174453735, "learning_rate": 2.8777741776997856e-06, "loss": 0.0712, "step": 110990 }, { "epoch": 3.2826639853315194, "grad_norm": 0.909626305103302, "learning_rate": 2.877647487780846e-06, "loss": 0.0783, "step": 111000 }, { "epoch": 3.2829597208256933, "grad_norm": 0.9044972062110901, "learning_rate": 2.8775207978619068e-06, "loss": 0.093, "step": 111010 }, { "epoch": 3.2832554563198677, "grad_norm": 0.7380398511886597, "learning_rate": 2.877394107942967e-06, "loss": 0.0606, "step": 111020 }, { "epoch": 3.2835511918140416, "grad_norm": 0.5481858253479004, "learning_rate": 2.877267418024028e-06, "loss": 0.0632, "step": 111030 }, { "epoch": 3.2838469273082156, "grad_norm": 0.6060653328895569, "learning_rate": 2.8771407281050883e-06, "loss": 0.0492, "step": 111040 }, { "epoch": 3.2841426628023895, "grad_norm": 0.900181770324707, "learning_rate": 2.8770140381861495e-06, "loss": 0.062, "step": 111050 }, { "epoch": 3.2844383982965635, "grad_norm": 0.9047858119010925, "learning_rate": 2.87688734826721e-06, "loss": 0.0871, "step": 111060 }, { "epoch": 3.2847341337907374, "grad_norm": 0.8998751640319824, "learning_rate": 2.8767606583482707e-06, "loss": 0.075, "step": 111070 }, { "epoch": 3.2850298692849114, "grad_norm": 0.3974063992500305, "learning_rate": 2.876633968429331e-06, "loss": 0.057, "step": 111080 }, { "epoch": 3.2853256047790858, "grad_norm": 0.6061705946922302, "learning_rate": 2.876507278510392e-06, "loss": 0.0675, "step": 111090 }, { "epoch": 3.2856213402732597, "grad_norm": 1.3060890436172485, "learning_rate": 2.876380588591452e-06, "loss": 0.0836, "step": 111100 }, { "epoch": 3.2859170757674336, "grad_norm": 1.5030401945114136, "learning_rate": 2.876253898672513e-06, "loss": 0.0847, "step": 111110 }, { "epoch": 3.2862128112616076, "grad_norm": 0.9190801978111267, "learning_rate": 2.8761272087535733e-06, "loss": 0.084, "step": 111120 }, { "epoch": 3.2865085467557815, "grad_norm": 0.9259178638458252, "learning_rate": 2.8760005188346346e-06, "loss": 0.0659, "step": 111130 }, { "epoch": 3.2868042822499555, "grad_norm": 0.711601972579956, "learning_rate": 2.875873828915695e-06, "loss": 0.0647, "step": 111140 }, { "epoch": 3.2871000177441294, "grad_norm": 0.6869173049926758, "learning_rate": 2.8757471389967557e-06, "loss": 0.0772, "step": 111150 }, { "epoch": 3.287395753238304, "grad_norm": 0.8988888263702393, "learning_rate": 2.875620449077816e-06, "loss": 0.0799, "step": 111160 }, { "epoch": 3.2876914887324777, "grad_norm": 0.686767041683197, "learning_rate": 2.875493759158877e-06, "loss": 0.0701, "step": 111170 }, { "epoch": 3.2879872242266517, "grad_norm": 0.8959707021713257, "learning_rate": 2.8753670692399372e-06, "loss": 0.075, "step": 111180 }, { "epoch": 3.2882829597208256, "grad_norm": 0.7214027047157288, "learning_rate": 2.875240379320998e-06, "loss": 0.0627, "step": 111190 }, { "epoch": 3.2885786952149996, "grad_norm": 0.9002577662467957, "learning_rate": 2.8751136894020584e-06, "loss": 0.0766, "step": 111200 }, { "epoch": 3.288874430709174, "grad_norm": 0.5515501499176025, "learning_rate": 2.8749869994831196e-06, "loss": 0.0746, "step": 111210 }, { "epoch": 3.289170166203348, "grad_norm": 0.9744277596473694, "learning_rate": 2.87486030956418e-06, "loss": 0.0718, "step": 111220 }, { "epoch": 3.289465901697522, "grad_norm": 0.5825044512748718, "learning_rate": 2.8747336196452408e-06, "loss": 0.0776, "step": 111230 }, { "epoch": 3.289761637191696, "grad_norm": 0.43399032950401306, "learning_rate": 2.874606929726301e-06, "loss": 0.0681, "step": 111240 }, { "epoch": 3.2900573726858697, "grad_norm": 0.7092791199684143, "learning_rate": 2.874480239807362e-06, "loss": 0.0784, "step": 111250 }, { "epoch": 3.2903531081800437, "grad_norm": 0.6671923995018005, "learning_rate": 2.8743535498884223e-06, "loss": 0.0632, "step": 111260 }, { "epoch": 3.2906488436742176, "grad_norm": 0.9196158051490784, "learning_rate": 2.874226859969483e-06, "loss": 0.0761, "step": 111270 }, { "epoch": 3.290944579168392, "grad_norm": 1.123202919960022, "learning_rate": 2.8741001700505434e-06, "loss": 0.0709, "step": 111280 }, { "epoch": 3.291240314662566, "grad_norm": 0.7034774422645569, "learning_rate": 2.8739734801316046e-06, "loss": 0.0531, "step": 111290 }, { "epoch": 3.29153605015674, "grad_norm": 1.1217089891433716, "learning_rate": 2.873846790212665e-06, "loss": 0.0829, "step": 111300 }, { "epoch": 3.291831785650914, "grad_norm": 0.7316517233848572, "learning_rate": 2.873720100293726e-06, "loss": 0.0554, "step": 111310 }, { "epoch": 3.2921275211450878, "grad_norm": 0.8725861310958862, "learning_rate": 2.873593410374786e-06, "loss": 0.0851, "step": 111320 }, { "epoch": 3.2924232566392617, "grad_norm": 0.8718324303627014, "learning_rate": 2.873466720455847e-06, "loss": 0.0602, "step": 111330 }, { "epoch": 3.2927189921334357, "grad_norm": 1.6877344846725464, "learning_rate": 2.8733400305369073e-06, "loss": 0.0654, "step": 111340 }, { "epoch": 3.29301472762761, "grad_norm": 0.7356613278388977, "learning_rate": 2.873213340617968e-06, "loss": 0.0816, "step": 111350 }, { "epoch": 3.293310463121784, "grad_norm": 0.991564154624939, "learning_rate": 2.8730866506990285e-06, "loss": 0.0732, "step": 111360 }, { "epoch": 3.293606198615958, "grad_norm": 0.5545520782470703, "learning_rate": 2.8729599607800897e-06, "loss": 0.0812, "step": 111370 }, { "epoch": 3.293901934110132, "grad_norm": 1.0088621377944946, "learning_rate": 2.87283327086115e-06, "loss": 0.0664, "step": 111380 }, { "epoch": 3.294197669604306, "grad_norm": 1.0745410919189453, "learning_rate": 2.872706580942211e-06, "loss": 0.0741, "step": 111390 }, { "epoch": 3.2944934050984798, "grad_norm": 0.8625231981277466, "learning_rate": 2.872579891023271e-06, "loss": 0.089, "step": 111400 }, { "epoch": 3.2947891405926537, "grad_norm": 0.5824520587921143, "learning_rate": 2.8724532011043316e-06, "loss": 0.0852, "step": 111410 }, { "epoch": 3.295084876086828, "grad_norm": 0.9220231771469116, "learning_rate": 2.8723265111853924e-06, "loss": 0.0727, "step": 111420 }, { "epoch": 3.295380611581002, "grad_norm": 0.8076673150062561, "learning_rate": 2.8721998212664527e-06, "loss": 0.0634, "step": 111430 }, { "epoch": 3.295676347075176, "grad_norm": 1.136353611946106, "learning_rate": 2.8720731313475135e-06, "loss": 0.0528, "step": 111440 }, { "epoch": 3.29597208256935, "grad_norm": 0.9578499794006348, "learning_rate": 2.8719464414285743e-06, "loss": 0.0923, "step": 111450 }, { "epoch": 3.296267818063524, "grad_norm": 0.6089584231376648, "learning_rate": 2.871819751509635e-06, "loss": 0.0753, "step": 111460 }, { "epoch": 3.296563553557698, "grad_norm": 0.9201444983482361, "learning_rate": 2.8716930615906955e-06, "loss": 0.0692, "step": 111470 }, { "epoch": 3.296859289051872, "grad_norm": 0.7373906373977661, "learning_rate": 2.8715663716717563e-06, "loss": 0.0781, "step": 111480 }, { "epoch": 3.297155024546046, "grad_norm": 1.3862546682357788, "learning_rate": 2.8714396817528166e-06, "loss": 0.065, "step": 111490 }, { "epoch": 3.29745076004022, "grad_norm": 0.6681663990020752, "learning_rate": 2.8713129918338774e-06, "loss": 0.0772, "step": 111500 }, { "epoch": 3.297746495534394, "grad_norm": 0.9110973477363586, "learning_rate": 2.8711863019149378e-06, "loss": 0.0885, "step": 111510 }, { "epoch": 3.298042231028568, "grad_norm": 0.9237430095672607, "learning_rate": 2.8710596119959986e-06, "loss": 0.0818, "step": 111520 }, { "epoch": 3.298337966522742, "grad_norm": 0.7682095766067505, "learning_rate": 2.8709329220770594e-06, "loss": 0.0798, "step": 111530 }, { "epoch": 3.2986337020169163, "grad_norm": 0.6930173635482788, "learning_rate": 2.87080623215812e-06, "loss": 0.0651, "step": 111540 }, { "epoch": 3.2989294375110902, "grad_norm": 0.7706225514411926, "learning_rate": 2.8706795422391805e-06, "loss": 0.0711, "step": 111550 }, { "epoch": 3.299225173005264, "grad_norm": 1.217210292816162, "learning_rate": 2.8705528523202413e-06, "loss": 0.0653, "step": 111560 }, { "epoch": 3.299520908499438, "grad_norm": 0.8524772524833679, "learning_rate": 2.8704261624013017e-06, "loss": 0.0724, "step": 111570 }, { "epoch": 3.299816643993612, "grad_norm": 0.6526967287063599, "learning_rate": 2.8702994724823625e-06, "loss": 0.0682, "step": 111580 }, { "epoch": 3.300112379487786, "grad_norm": 0.6621847152709961, "learning_rate": 2.870172782563423e-06, "loss": 0.0586, "step": 111590 }, { "epoch": 3.30040811498196, "grad_norm": 1.123266339302063, "learning_rate": 2.8700460926444836e-06, "loss": 0.0724, "step": 111600 }, { "epoch": 3.3007038504761343, "grad_norm": 1.0071699619293213, "learning_rate": 2.8699194027255444e-06, "loss": 0.0849, "step": 111610 }, { "epoch": 3.3009995859703083, "grad_norm": 0.609212338924408, "learning_rate": 2.869792712806605e-06, "loss": 0.0616, "step": 111620 }, { "epoch": 3.3012953214644822, "grad_norm": 0.5157697796821594, "learning_rate": 2.8696660228876656e-06, "loss": 0.0722, "step": 111630 }, { "epoch": 3.301591056958656, "grad_norm": 0.9517328143119812, "learning_rate": 2.8695393329687263e-06, "loss": 0.0646, "step": 111640 }, { "epoch": 3.30188679245283, "grad_norm": 1.149428129196167, "learning_rate": 2.8694126430497867e-06, "loss": 0.0779, "step": 111650 }, { "epoch": 3.302182527947004, "grad_norm": 0.6626692414283752, "learning_rate": 2.8692859531308475e-06, "loss": 0.064, "step": 111660 }, { "epoch": 3.302478263441178, "grad_norm": 0.945239782333374, "learning_rate": 2.869159263211908e-06, "loss": 0.0693, "step": 111670 }, { "epoch": 3.3027739989353524, "grad_norm": 0.42469123005867004, "learning_rate": 2.8690325732929687e-06, "loss": 0.0633, "step": 111680 }, { "epoch": 3.3030697344295263, "grad_norm": 0.8588343858718872, "learning_rate": 2.8689058833740294e-06, "loss": 0.0424, "step": 111690 }, { "epoch": 3.3033654699237003, "grad_norm": 1.0990040302276611, "learning_rate": 2.8687791934550902e-06, "loss": 0.0953, "step": 111700 }, { "epoch": 3.3036612054178742, "grad_norm": 0.726279079914093, "learning_rate": 2.8686525035361506e-06, "loss": 0.087, "step": 111710 }, { "epoch": 3.303956940912048, "grad_norm": 0.9502460360527039, "learning_rate": 2.8685258136172114e-06, "loss": 0.0698, "step": 111720 }, { "epoch": 3.304252676406222, "grad_norm": 0.8319804668426514, "learning_rate": 2.8683991236982718e-06, "loss": 0.0764, "step": 111730 }, { "epoch": 3.304548411900396, "grad_norm": 0.8453660011291504, "learning_rate": 2.8682724337793325e-06, "loss": 0.0526, "step": 111740 }, { "epoch": 3.3048441473945704, "grad_norm": 0.9236912131309509, "learning_rate": 2.868145743860393e-06, "loss": 0.0713, "step": 111750 }, { "epoch": 3.3051398828887444, "grad_norm": 0.8899155259132385, "learning_rate": 2.8680190539414537e-06, "loss": 0.0854, "step": 111760 }, { "epoch": 3.3054356183829183, "grad_norm": 0.9758942127227783, "learning_rate": 2.8678923640225145e-06, "loss": 0.0764, "step": 111770 }, { "epoch": 3.3057313538770923, "grad_norm": 0.7734717130661011, "learning_rate": 2.8677656741035753e-06, "loss": 0.0783, "step": 111780 }, { "epoch": 3.306027089371266, "grad_norm": 1.1280767917633057, "learning_rate": 2.8676389841846356e-06, "loss": 0.0624, "step": 111790 }, { "epoch": 3.30632282486544, "grad_norm": 1.1984020471572876, "learning_rate": 2.8675122942656964e-06, "loss": 0.0898, "step": 111800 }, { "epoch": 3.3066185603596145, "grad_norm": 0.9830158352851868, "learning_rate": 2.867385604346757e-06, "loss": 0.0869, "step": 111810 }, { "epoch": 3.3069142958537885, "grad_norm": 0.7615934610366821, "learning_rate": 2.8672589144278176e-06, "loss": 0.0734, "step": 111820 }, { "epoch": 3.3072100313479624, "grad_norm": 0.8732248544692993, "learning_rate": 2.867132224508878e-06, "loss": 0.0661, "step": 111830 }, { "epoch": 3.3075057668421364, "grad_norm": 0.7566546201705933, "learning_rate": 2.8670055345899383e-06, "loss": 0.0647, "step": 111840 }, { "epoch": 3.3078015023363103, "grad_norm": 1.280597448348999, "learning_rate": 2.8668788446709995e-06, "loss": 0.0771, "step": 111850 }, { "epoch": 3.3080972378304843, "grad_norm": 0.7080345153808594, "learning_rate": 2.86675215475206e-06, "loss": 0.087, "step": 111860 }, { "epoch": 3.3083929733246586, "grad_norm": 0.538306474685669, "learning_rate": 2.8666254648331207e-06, "loss": 0.0886, "step": 111870 }, { "epoch": 3.3086887088188326, "grad_norm": 0.8272925615310669, "learning_rate": 2.866498774914181e-06, "loss": 0.0672, "step": 111880 }, { "epoch": 3.3089844443130065, "grad_norm": 0.6711820363998413, "learning_rate": 2.866372084995242e-06, "loss": 0.0659, "step": 111890 }, { "epoch": 3.3092801798071805, "grad_norm": 0.967848539352417, "learning_rate": 2.8662453950763022e-06, "loss": 0.0919, "step": 111900 }, { "epoch": 3.3095759153013544, "grad_norm": 1.030308723449707, "learning_rate": 2.866118705157363e-06, "loss": 0.0812, "step": 111910 }, { "epoch": 3.3098716507955284, "grad_norm": 0.6648669242858887, "learning_rate": 2.8659920152384234e-06, "loss": 0.078, "step": 111920 }, { "epoch": 3.3101673862897023, "grad_norm": 0.5663114786148071, "learning_rate": 2.8658653253194846e-06, "loss": 0.0527, "step": 111930 }, { "epoch": 3.3104631217838767, "grad_norm": 0.7756742238998413, "learning_rate": 2.865738635400545e-06, "loss": 0.0572, "step": 111940 }, { "epoch": 3.3107588572780506, "grad_norm": 0.8649997115135193, "learning_rate": 2.8656119454816057e-06, "loss": 0.0664, "step": 111950 }, { "epoch": 3.3110545927722246, "grad_norm": 0.4058213531970978, "learning_rate": 2.865485255562666e-06, "loss": 0.079, "step": 111960 }, { "epoch": 3.3113503282663985, "grad_norm": 0.6756601333618164, "learning_rate": 2.865358565643727e-06, "loss": 0.0739, "step": 111970 }, { "epoch": 3.3116460637605725, "grad_norm": 0.41692450642585754, "learning_rate": 2.8652318757247873e-06, "loss": 0.0815, "step": 111980 }, { "epoch": 3.3119417992547464, "grad_norm": 0.5879070162773132, "learning_rate": 2.865105185805848e-06, "loss": 0.0454, "step": 111990 }, { "epoch": 3.3122375347489204, "grad_norm": 0.7036771774291992, "learning_rate": 2.8649784958869084e-06, "loss": 0.0784, "step": 112000 }, { "epoch": 3.3125332702430947, "grad_norm": 0.9323778748512268, "learning_rate": 2.8648518059679696e-06, "loss": 0.0683, "step": 112010 }, { "epoch": 3.3128290057372687, "grad_norm": 2.2895255088806152, "learning_rate": 2.86472511604903e-06, "loss": 0.0774, "step": 112020 }, { "epoch": 3.3131247412314426, "grad_norm": 0.6663378477096558, "learning_rate": 2.8645984261300908e-06, "loss": 0.0744, "step": 112030 }, { "epoch": 3.3134204767256166, "grad_norm": 0.39794227480888367, "learning_rate": 2.864471736211151e-06, "loss": 0.0573, "step": 112040 }, { "epoch": 3.3137162122197905, "grad_norm": 0.6815615892410278, "learning_rate": 2.864345046292212e-06, "loss": 0.0729, "step": 112050 }, { "epoch": 3.3140119477139645, "grad_norm": 1.368243932723999, "learning_rate": 2.8642183563732723e-06, "loss": 0.0769, "step": 112060 }, { "epoch": 3.3143076832081384, "grad_norm": 0.823775053024292, "learning_rate": 2.864091666454333e-06, "loss": 0.0766, "step": 112070 }, { "epoch": 3.314603418702313, "grad_norm": 0.7898060083389282, "learning_rate": 2.8639649765353935e-06, "loss": 0.0642, "step": 112080 }, { "epoch": 3.3148991541964867, "grad_norm": 0.46853503584861755, "learning_rate": 2.8638382866164547e-06, "loss": 0.06, "step": 112090 }, { "epoch": 3.3151948896906607, "grad_norm": 1.094759225845337, "learning_rate": 2.863711596697515e-06, "loss": 0.0843, "step": 112100 }, { "epoch": 3.3154906251848346, "grad_norm": 1.0289394855499268, "learning_rate": 2.863584906778576e-06, "loss": 0.0718, "step": 112110 }, { "epoch": 3.3157863606790086, "grad_norm": 0.4750557541847229, "learning_rate": 2.863458216859636e-06, "loss": 0.0609, "step": 112120 }, { "epoch": 3.316082096173183, "grad_norm": 0.841724693775177, "learning_rate": 2.863331526940697e-06, "loss": 0.0713, "step": 112130 }, { "epoch": 3.316377831667357, "grad_norm": 1.0984586477279663, "learning_rate": 2.8632048370217574e-06, "loss": 0.077, "step": 112140 }, { "epoch": 3.316673567161531, "grad_norm": 0.6265063285827637, "learning_rate": 2.863078147102818e-06, "loss": 0.0825, "step": 112150 }, { "epoch": 3.3169693026557048, "grad_norm": 0.8134124279022217, "learning_rate": 2.8629514571838785e-06, "loss": 0.0974, "step": 112160 }, { "epoch": 3.3172650381498787, "grad_norm": 0.5752631425857544, "learning_rate": 2.8628247672649397e-06, "loss": 0.0667, "step": 112170 }, { "epoch": 3.3175607736440527, "grad_norm": 0.7591695785522461, "learning_rate": 2.862698077346e-06, "loss": 0.0785, "step": 112180 }, { "epoch": 3.3178565091382266, "grad_norm": 0.32180988788604736, "learning_rate": 2.862571387427061e-06, "loss": 0.0607, "step": 112190 }, { "epoch": 3.318152244632401, "grad_norm": 1.3350350856781006, "learning_rate": 2.8624446975081212e-06, "loss": 0.0968, "step": 112200 }, { "epoch": 3.318447980126575, "grad_norm": 0.9603362679481506, "learning_rate": 2.862318007589182e-06, "loss": 0.0795, "step": 112210 }, { "epoch": 3.318743715620749, "grad_norm": 0.7594372034072876, "learning_rate": 2.8621913176702424e-06, "loss": 0.0856, "step": 112220 }, { "epoch": 3.319039451114923, "grad_norm": 0.5142862796783447, "learning_rate": 2.862064627751303e-06, "loss": 0.0757, "step": 112230 }, { "epoch": 3.3193351866090968, "grad_norm": 0.759043276309967, "learning_rate": 2.8619379378323636e-06, "loss": 0.0545, "step": 112240 }, { "epoch": 3.3196309221032707, "grad_norm": 1.0095725059509277, "learning_rate": 2.8618112479134243e-06, "loss": 0.0871, "step": 112250 }, { "epoch": 3.3199266575974447, "grad_norm": 0.5479997396469116, "learning_rate": 2.861684557994485e-06, "loss": 0.0766, "step": 112260 }, { "epoch": 3.320222393091619, "grad_norm": 0.8619263172149658, "learning_rate": 2.8615578680755455e-06, "loss": 0.0765, "step": 112270 }, { "epoch": 3.320518128585793, "grad_norm": 0.9653680920600891, "learning_rate": 2.8614311781566063e-06, "loss": 0.0673, "step": 112280 }, { "epoch": 3.320813864079967, "grad_norm": 0.8030759692192078, "learning_rate": 2.8613044882376667e-06, "loss": 0.0741, "step": 112290 }, { "epoch": 3.321109599574141, "grad_norm": 1.1351234912872314, "learning_rate": 2.8611777983187274e-06, "loss": 0.0804, "step": 112300 }, { "epoch": 3.321405335068315, "grad_norm": 0.8472748398780823, "learning_rate": 2.861051108399788e-06, "loss": 0.0869, "step": 112310 }, { "epoch": 3.3217010705624888, "grad_norm": 1.140328049659729, "learning_rate": 2.8609244184808486e-06, "loss": 0.0789, "step": 112320 }, { "epoch": 3.3219968060566627, "grad_norm": 0.6543183922767639, "learning_rate": 2.8607977285619094e-06, "loss": 0.0736, "step": 112330 }, { "epoch": 3.322292541550837, "grad_norm": 0.57591313123703, "learning_rate": 2.86067103864297e-06, "loss": 0.0693, "step": 112340 }, { "epoch": 3.322588277045011, "grad_norm": 1.1251599788665771, "learning_rate": 2.8605443487240305e-06, "loss": 0.0779, "step": 112350 }, { "epoch": 3.322884012539185, "grad_norm": 1.1860202550888062, "learning_rate": 2.8604176588050913e-06, "loss": 0.0724, "step": 112360 }, { "epoch": 3.323179748033359, "grad_norm": 0.830241858959198, "learning_rate": 2.8602909688861517e-06, "loss": 0.0902, "step": 112370 }, { "epoch": 3.323475483527533, "grad_norm": 0.6268186569213867, "learning_rate": 2.8601642789672125e-06, "loss": 0.0741, "step": 112380 }, { "epoch": 3.323771219021707, "grad_norm": 0.9485234618186951, "learning_rate": 2.860037589048273e-06, "loss": 0.0629, "step": 112390 }, { "epoch": 3.324066954515881, "grad_norm": 1.0560359954833984, "learning_rate": 2.8599108991293336e-06, "loss": 0.0895, "step": 112400 }, { "epoch": 3.324362690010055, "grad_norm": 0.9277238845825195, "learning_rate": 2.8597842092103944e-06, "loss": 0.0825, "step": 112410 }, { "epoch": 3.324658425504229, "grad_norm": 0.7355927228927612, "learning_rate": 2.8596575192914552e-06, "loss": 0.0815, "step": 112420 }, { "epoch": 3.324954160998403, "grad_norm": 0.8192245364189148, "learning_rate": 2.8595308293725156e-06, "loss": 0.0808, "step": 112430 }, { "epoch": 3.325249896492577, "grad_norm": 0.6140127778053284, "learning_rate": 2.8594041394535764e-06, "loss": 0.039, "step": 112440 }, { "epoch": 3.325545631986751, "grad_norm": 1.0651819705963135, "learning_rate": 2.8592774495346367e-06, "loss": 0.0816, "step": 112450 }, { "epoch": 3.3258413674809253, "grad_norm": 0.6713547706604004, "learning_rate": 2.8591507596156975e-06, "loss": 0.079, "step": 112460 }, { "epoch": 3.3261371029750992, "grad_norm": 0.4723522365093231, "learning_rate": 2.859024069696758e-06, "loss": 0.0631, "step": 112470 }, { "epoch": 3.326432838469273, "grad_norm": 0.6622140407562256, "learning_rate": 2.8588973797778187e-06, "loss": 0.0552, "step": 112480 }, { "epoch": 3.326728573963447, "grad_norm": 0.9668481945991516, "learning_rate": 2.8587706898588795e-06, "loss": 0.0714, "step": 112490 }, { "epoch": 3.327024309457621, "grad_norm": 0.7507405877113342, "learning_rate": 2.8586439999399403e-06, "loss": 0.0771, "step": 112500 }, { "epoch": 3.327320044951795, "grad_norm": 0.651707112789154, "learning_rate": 2.8585173100210006e-06, "loss": 0.0701, "step": 112510 }, { "epoch": 3.327615780445969, "grad_norm": 0.9974100589752197, "learning_rate": 2.8583906201020614e-06, "loss": 0.0852, "step": 112520 }, { "epoch": 3.3279115159401433, "grad_norm": 0.5824806094169617, "learning_rate": 2.8582639301831218e-06, "loss": 0.0737, "step": 112530 }, { "epoch": 3.3282072514343173, "grad_norm": 1.06435227394104, "learning_rate": 2.8581372402641826e-06, "loss": 0.0698, "step": 112540 }, { "epoch": 3.3285029869284912, "grad_norm": 1.047838807106018, "learning_rate": 2.858010550345243e-06, "loss": 0.0716, "step": 112550 }, { "epoch": 3.328798722422665, "grad_norm": 0.7194925546646118, "learning_rate": 2.8578838604263037e-06, "loss": 0.091, "step": 112560 }, { "epoch": 3.329094457916839, "grad_norm": 0.5262104272842407, "learning_rate": 2.8577571705073645e-06, "loss": 0.0799, "step": 112570 }, { "epoch": 3.329390193411013, "grad_norm": 1.2826800346374512, "learning_rate": 2.8576304805884253e-06, "loss": 0.0706, "step": 112580 }, { "epoch": 3.329685928905187, "grad_norm": 0.821140706539154, "learning_rate": 2.8575037906694857e-06, "loss": 0.0631, "step": 112590 }, { "epoch": 3.3299816643993614, "grad_norm": 0.8613811731338501, "learning_rate": 2.8573771007505465e-06, "loss": 0.0936, "step": 112600 }, { "epoch": 3.3302773998935353, "grad_norm": 0.49996644258499146, "learning_rate": 2.857250410831607e-06, "loss": 0.0643, "step": 112610 }, { "epoch": 3.3305731353877093, "grad_norm": 0.9874022006988525, "learning_rate": 2.8571237209126676e-06, "loss": 0.0657, "step": 112620 }, { "epoch": 3.330868870881883, "grad_norm": 0.7569842338562012, "learning_rate": 2.856997030993728e-06, "loss": 0.0709, "step": 112630 }, { "epoch": 3.331164606376057, "grad_norm": 0.4284569323062897, "learning_rate": 2.8568703410747888e-06, "loss": 0.055, "step": 112640 }, { "epoch": 3.331460341870231, "grad_norm": 0.8574154376983643, "learning_rate": 2.8567436511558496e-06, "loss": 0.0812, "step": 112650 }, { "epoch": 3.331756077364405, "grad_norm": 0.7896620631217957, "learning_rate": 2.85661696123691e-06, "loss": 0.0832, "step": 112660 }, { "epoch": 3.3320518128585794, "grad_norm": 0.9124220013618469, "learning_rate": 2.8564902713179707e-06, "loss": 0.0766, "step": 112670 }, { "epoch": 3.3323475483527534, "grad_norm": 0.5582168102264404, "learning_rate": 2.856363581399031e-06, "loss": 0.072, "step": 112680 }, { "epoch": 3.3326432838469273, "grad_norm": 0.8072779774665833, "learning_rate": 2.856236891480092e-06, "loss": 0.0613, "step": 112690 }, { "epoch": 3.3329390193411013, "grad_norm": 0.8259541988372803, "learning_rate": 2.8561102015611522e-06, "loss": 0.0724, "step": 112700 }, { "epoch": 3.333234754835275, "grad_norm": 0.8328658938407898, "learning_rate": 2.855983511642213e-06, "loss": 0.0683, "step": 112710 }, { "epoch": 3.333530490329449, "grad_norm": 0.2991750240325928, "learning_rate": 2.8558568217232734e-06, "loss": 0.0615, "step": 112720 }, { "epoch": 3.3338262258236235, "grad_norm": 0.8799479603767395, "learning_rate": 2.8557301318043346e-06, "loss": 0.0683, "step": 112730 }, { "epoch": 3.3341219613177975, "grad_norm": 0.7961862087249756, "learning_rate": 2.855603441885395e-06, "loss": 0.066, "step": 112740 }, { "epoch": 3.3344176968119714, "grad_norm": 0.8079529404640198, "learning_rate": 2.8554767519664558e-06, "loss": 0.0843, "step": 112750 }, { "epoch": 3.3347134323061454, "grad_norm": 0.8959870934486389, "learning_rate": 2.855350062047516e-06, "loss": 0.0774, "step": 112760 }, { "epoch": 3.3350091678003193, "grad_norm": 0.785778284072876, "learning_rate": 2.855223372128577e-06, "loss": 0.0742, "step": 112770 }, { "epoch": 3.3353049032944933, "grad_norm": 0.6447533965110779, "learning_rate": 2.8550966822096373e-06, "loss": 0.0628, "step": 112780 }, { "epoch": 3.3356006387886676, "grad_norm": 0.6774718165397644, "learning_rate": 2.854969992290698e-06, "loss": 0.0645, "step": 112790 }, { "epoch": 3.3358963742828416, "grad_norm": 1.4532240629196167, "learning_rate": 2.8548433023717584e-06, "loss": 0.0891, "step": 112800 }, { "epoch": 3.3361921097770155, "grad_norm": 0.7650600671768188, "learning_rate": 2.8547166124528197e-06, "loss": 0.0739, "step": 112810 }, { "epoch": 3.3364878452711895, "grad_norm": 0.9948883652687073, "learning_rate": 2.85458992253388e-06, "loss": 0.0675, "step": 112820 }, { "epoch": 3.3367835807653634, "grad_norm": 0.8520765900611877, "learning_rate": 2.854463232614941e-06, "loss": 0.0619, "step": 112830 }, { "epoch": 3.3370793162595374, "grad_norm": 0.7249321937561035, "learning_rate": 2.854336542696001e-06, "loss": 0.0727, "step": 112840 }, { "epoch": 3.3373750517537113, "grad_norm": 1.556581974029541, "learning_rate": 2.854209852777062e-06, "loss": 0.0772, "step": 112850 }, { "epoch": 3.3376707872478857, "grad_norm": 0.737586498260498, "learning_rate": 2.8540831628581223e-06, "loss": 0.0664, "step": 112860 }, { "epoch": 3.3379665227420596, "grad_norm": 0.851285994052887, "learning_rate": 2.853956472939183e-06, "loss": 0.0706, "step": 112870 }, { "epoch": 3.3382622582362336, "grad_norm": 0.7218759655952454, "learning_rate": 2.8538297830202435e-06, "loss": 0.0691, "step": 112880 }, { "epoch": 3.3385579937304075, "grad_norm": 0.881453812122345, "learning_rate": 2.8537030931013047e-06, "loss": 0.0654, "step": 112890 }, { "epoch": 3.3388537292245815, "grad_norm": 0.88846755027771, "learning_rate": 2.853576403182365e-06, "loss": 0.0969, "step": 112900 }, { "epoch": 3.3391494647187554, "grad_norm": 0.7566891312599182, "learning_rate": 2.853449713263426e-06, "loss": 0.0872, "step": 112910 }, { "epoch": 3.3394452002129293, "grad_norm": 1.4576318264007568, "learning_rate": 2.8533230233444862e-06, "loss": 0.0632, "step": 112920 }, { "epoch": 3.3397409357071037, "grad_norm": 0.6235204339027405, "learning_rate": 2.853196333425547e-06, "loss": 0.0597, "step": 112930 }, { "epoch": 3.3400366712012777, "grad_norm": 0.8539131879806519, "learning_rate": 2.8530696435066074e-06, "loss": 0.056, "step": 112940 }, { "epoch": 3.3403324066954516, "grad_norm": 0.9370696544647217, "learning_rate": 2.852942953587668e-06, "loss": 0.0839, "step": 112950 }, { "epoch": 3.3406281421896256, "grad_norm": 0.6335523724555969, "learning_rate": 2.8528162636687285e-06, "loss": 0.0813, "step": 112960 }, { "epoch": 3.3409238776837995, "grad_norm": 0.9523911476135254, "learning_rate": 2.8526895737497897e-06, "loss": 0.0807, "step": 112970 }, { "epoch": 3.3412196131779734, "grad_norm": 0.8001177310943604, "learning_rate": 2.85256288383085e-06, "loss": 0.0681, "step": 112980 }, { "epoch": 3.3415153486721474, "grad_norm": 0.8757475018501282, "learning_rate": 2.852436193911911e-06, "loss": 0.0493, "step": 112990 }, { "epoch": 3.3418110841663218, "grad_norm": 1.3984544277191162, "learning_rate": 2.8523095039929713e-06, "loss": 0.083, "step": 113000 }, { "epoch": 3.3421068196604957, "grad_norm": 1.0861924886703491, "learning_rate": 2.852182814074032e-06, "loss": 0.0784, "step": 113010 }, { "epoch": 3.3424025551546697, "grad_norm": 0.6427766680717468, "learning_rate": 2.8520561241550924e-06, "loss": 0.0821, "step": 113020 }, { "epoch": 3.3426982906488436, "grad_norm": 1.0880404710769653, "learning_rate": 2.8519294342361532e-06, "loss": 0.071, "step": 113030 }, { "epoch": 3.3429940261430175, "grad_norm": 0.26680564880371094, "learning_rate": 2.8518027443172136e-06, "loss": 0.061, "step": 113040 }, { "epoch": 3.343289761637192, "grad_norm": 0.9590889811515808, "learning_rate": 2.851676054398275e-06, "loss": 0.0705, "step": 113050 }, { "epoch": 3.343585497131366, "grad_norm": 0.6644076704978943, "learning_rate": 2.851549364479335e-06, "loss": 0.0696, "step": 113060 }, { "epoch": 3.34388123262554, "grad_norm": 0.8156271576881409, "learning_rate": 2.8514226745603955e-06, "loss": 0.0735, "step": 113070 }, { "epoch": 3.3441769681197138, "grad_norm": 0.9654210209846497, "learning_rate": 2.8512959846414563e-06, "loss": 0.0623, "step": 113080 }, { "epoch": 3.3444727036138877, "grad_norm": 0.42969462275505066, "learning_rate": 2.8511692947225167e-06, "loss": 0.0567, "step": 113090 }, { "epoch": 3.3447684391080617, "grad_norm": 0.775180995464325, "learning_rate": 2.8510426048035775e-06, "loss": 0.0746, "step": 113100 }, { "epoch": 3.3450641746022356, "grad_norm": 0.7069252133369446, "learning_rate": 2.850915914884638e-06, "loss": 0.0913, "step": 113110 }, { "epoch": 3.34535991009641, "grad_norm": 0.5812058448791504, "learning_rate": 2.8507892249656986e-06, "loss": 0.0658, "step": 113120 }, { "epoch": 3.345655645590584, "grad_norm": 0.33834367990493774, "learning_rate": 2.8506625350467594e-06, "loss": 0.0956, "step": 113130 }, { "epoch": 3.345951381084758, "grad_norm": 0.7211728692054749, "learning_rate": 2.85053584512782e-06, "loss": 0.065, "step": 113140 }, { "epoch": 3.346247116578932, "grad_norm": 0.8376433849334717, "learning_rate": 2.8504091552088806e-06, "loss": 0.0818, "step": 113150 }, { "epoch": 3.3465428520731058, "grad_norm": 0.7935110926628113, "learning_rate": 2.8502824652899414e-06, "loss": 0.0807, "step": 113160 }, { "epoch": 3.3468385875672797, "grad_norm": 1.3772716522216797, "learning_rate": 2.8501557753710017e-06, "loss": 0.0739, "step": 113170 }, { "epoch": 3.3471343230614536, "grad_norm": 1.1115189790725708, "learning_rate": 2.8500290854520625e-06, "loss": 0.0688, "step": 113180 }, { "epoch": 3.347430058555628, "grad_norm": 1.0050243139266968, "learning_rate": 2.849902395533123e-06, "loss": 0.0607, "step": 113190 }, { "epoch": 3.347725794049802, "grad_norm": 0.8648233413696289, "learning_rate": 2.8497757056141837e-06, "loss": 0.0795, "step": 113200 }, { "epoch": 3.348021529543976, "grad_norm": 0.8354882001876831, "learning_rate": 2.8496490156952445e-06, "loss": 0.0717, "step": 113210 }, { "epoch": 3.34831726503815, "grad_norm": 0.6977883577346802, "learning_rate": 2.8495223257763053e-06, "loss": 0.0692, "step": 113220 }, { "epoch": 3.348613000532324, "grad_norm": 0.8057272434234619, "learning_rate": 2.8493956358573656e-06, "loss": 0.0716, "step": 113230 }, { "epoch": 3.3489087360264977, "grad_norm": 0.9932725429534912, "learning_rate": 2.8492689459384264e-06, "loss": 0.0746, "step": 113240 }, { "epoch": 3.3492044715206717, "grad_norm": 0.8483207821846008, "learning_rate": 2.8491422560194868e-06, "loss": 0.0733, "step": 113250 }, { "epoch": 3.349500207014846, "grad_norm": 0.7798606157302856, "learning_rate": 2.8490155661005476e-06, "loss": 0.0897, "step": 113260 }, { "epoch": 3.34979594250902, "grad_norm": 0.8229846954345703, "learning_rate": 2.848888876181608e-06, "loss": 0.0848, "step": 113270 }, { "epoch": 3.350091678003194, "grad_norm": 0.7600171566009521, "learning_rate": 2.8487621862626687e-06, "loss": 0.0674, "step": 113280 }, { "epoch": 3.350387413497368, "grad_norm": 1.2074902057647705, "learning_rate": 2.8486354963437295e-06, "loss": 0.059, "step": 113290 }, { "epoch": 3.350683148991542, "grad_norm": 0.8313608765602112, "learning_rate": 2.8485088064247903e-06, "loss": 0.0696, "step": 113300 }, { "epoch": 3.350978884485716, "grad_norm": 0.5563545227050781, "learning_rate": 2.8483821165058507e-06, "loss": 0.0626, "step": 113310 }, { "epoch": 3.35127461997989, "grad_norm": 0.9012792110443115, "learning_rate": 2.8482554265869115e-06, "loss": 0.0843, "step": 113320 }, { "epoch": 3.351570355474064, "grad_norm": 1.5567692518234253, "learning_rate": 2.848128736667972e-06, "loss": 0.074, "step": 113330 }, { "epoch": 3.351866090968238, "grad_norm": 0.9001246094703674, "learning_rate": 2.8480020467490326e-06, "loss": 0.0549, "step": 113340 }, { "epoch": 3.352161826462412, "grad_norm": 0.9513344168663025, "learning_rate": 2.847875356830093e-06, "loss": 0.0699, "step": 113350 }, { "epoch": 3.352457561956586, "grad_norm": 0.549860417842865, "learning_rate": 2.8477486669111538e-06, "loss": 0.073, "step": 113360 }, { "epoch": 3.35275329745076, "grad_norm": 1.1676280498504639, "learning_rate": 2.8476219769922146e-06, "loss": 0.0642, "step": 113370 }, { "epoch": 3.3530490329449343, "grad_norm": 0.47464704513549805, "learning_rate": 2.8474952870732753e-06, "loss": 0.0632, "step": 113380 }, { "epoch": 3.3533447684391082, "grad_norm": 1.2205333709716797, "learning_rate": 2.8473685971543357e-06, "loss": 0.0585, "step": 113390 }, { "epoch": 3.353640503933282, "grad_norm": 0.7829319834709167, "learning_rate": 2.8472419072353965e-06, "loss": 0.0791, "step": 113400 }, { "epoch": 3.353936239427456, "grad_norm": 0.9741780161857605, "learning_rate": 2.847115217316457e-06, "loss": 0.0791, "step": 113410 }, { "epoch": 3.35423197492163, "grad_norm": 0.8055132627487183, "learning_rate": 2.8469885273975177e-06, "loss": 0.0729, "step": 113420 }, { "epoch": 3.354527710415804, "grad_norm": 0.9905652403831482, "learning_rate": 2.846861837478578e-06, "loss": 0.0633, "step": 113430 }, { "epoch": 3.354823445909978, "grad_norm": 0.45896491408348083, "learning_rate": 2.846735147559639e-06, "loss": 0.0634, "step": 113440 }, { "epoch": 3.3551191814041523, "grad_norm": 0.7402764558792114, "learning_rate": 2.8466084576406996e-06, "loss": 0.0724, "step": 113450 }, { "epoch": 3.3554149168983263, "grad_norm": 0.715620219707489, "learning_rate": 2.8464817677217604e-06, "loss": 0.081, "step": 113460 }, { "epoch": 3.3557106523925, "grad_norm": 1.015337347984314, "learning_rate": 2.8463550778028208e-06, "loss": 0.068, "step": 113470 }, { "epoch": 3.356006387886674, "grad_norm": 0.8963671922683716, "learning_rate": 2.846228387883881e-06, "loss": 0.0765, "step": 113480 }, { "epoch": 3.356302123380848, "grad_norm": 0.6960486769676208, "learning_rate": 2.846101697964942e-06, "loss": 0.0503, "step": 113490 }, { "epoch": 3.356597858875022, "grad_norm": 0.9708020091056824, "learning_rate": 2.8459750080460023e-06, "loss": 0.0807, "step": 113500 }, { "epoch": 3.356893594369196, "grad_norm": 0.7773751020431519, "learning_rate": 2.845848318127063e-06, "loss": 0.0981, "step": 113510 }, { "epoch": 3.3571893298633704, "grad_norm": 0.6410707235336304, "learning_rate": 2.8457216282081234e-06, "loss": 0.0806, "step": 113520 }, { "epoch": 3.3574850653575443, "grad_norm": 0.339664101600647, "learning_rate": 2.8455949382891846e-06, "loss": 0.0696, "step": 113530 }, { "epoch": 3.3577808008517183, "grad_norm": 0.4382316768169403, "learning_rate": 2.845468248370245e-06, "loss": 0.0446, "step": 113540 }, { "epoch": 3.358076536345892, "grad_norm": 0.906502366065979, "learning_rate": 2.845341558451306e-06, "loss": 0.0823, "step": 113550 }, { "epoch": 3.358372271840066, "grad_norm": 0.9963238835334778, "learning_rate": 2.845214868532366e-06, "loss": 0.0853, "step": 113560 }, { "epoch": 3.35866800733424, "grad_norm": 0.766024112701416, "learning_rate": 2.845088178613427e-06, "loss": 0.0761, "step": 113570 }, { "epoch": 3.358963742828414, "grad_norm": 0.9122951626777649, "learning_rate": 2.8449614886944873e-06, "loss": 0.0645, "step": 113580 }, { "epoch": 3.3592594783225884, "grad_norm": 0.3326343595981598, "learning_rate": 2.844834798775548e-06, "loss": 0.0504, "step": 113590 }, { "epoch": 3.3595552138167624, "grad_norm": 0.706560492515564, "learning_rate": 2.8447081088566085e-06, "loss": 0.072, "step": 113600 }, { "epoch": 3.3598509493109363, "grad_norm": 1.0124361515045166, "learning_rate": 2.8445814189376697e-06, "loss": 0.084, "step": 113610 }, { "epoch": 3.3601466848051103, "grad_norm": 1.0929936170578003, "learning_rate": 2.84445472901873e-06, "loss": 0.0925, "step": 113620 }, { "epoch": 3.360442420299284, "grad_norm": 0.6441714763641357, "learning_rate": 2.844328039099791e-06, "loss": 0.0691, "step": 113630 }, { "epoch": 3.360738155793458, "grad_norm": 0.6427709460258484, "learning_rate": 2.844201349180851e-06, "loss": 0.0478, "step": 113640 }, { "epoch": 3.3610338912876325, "grad_norm": 1.0719956159591675, "learning_rate": 2.844074659261912e-06, "loss": 0.0766, "step": 113650 }, { "epoch": 3.3613296267818065, "grad_norm": 1.4039452075958252, "learning_rate": 2.8439479693429724e-06, "loss": 0.0935, "step": 113660 }, { "epoch": 3.3616253622759804, "grad_norm": 0.7296482920646667, "learning_rate": 2.843821279424033e-06, "loss": 0.0833, "step": 113670 }, { "epoch": 3.3619210977701544, "grad_norm": 0.4518038332462311, "learning_rate": 2.8436945895050935e-06, "loss": 0.0622, "step": 113680 }, { "epoch": 3.3622168332643283, "grad_norm": 1.313206672668457, "learning_rate": 2.8435678995861547e-06, "loss": 0.0669, "step": 113690 }, { "epoch": 3.3625125687585022, "grad_norm": 0.9428349137306213, "learning_rate": 2.843441209667215e-06, "loss": 0.086, "step": 113700 }, { "epoch": 3.3628083042526766, "grad_norm": 1.0710526704788208, "learning_rate": 2.843314519748276e-06, "loss": 0.0814, "step": 113710 }, { "epoch": 3.3631040397468506, "grad_norm": 0.6613501906394958, "learning_rate": 2.8431878298293363e-06, "loss": 0.0749, "step": 113720 }, { "epoch": 3.3633997752410245, "grad_norm": 0.7912555932998657, "learning_rate": 2.843061139910397e-06, "loss": 0.0632, "step": 113730 }, { "epoch": 3.3636955107351985, "grad_norm": 1.109794020652771, "learning_rate": 2.8429344499914574e-06, "loss": 0.0546, "step": 113740 }, { "epoch": 3.3639912462293724, "grad_norm": 1.1647131443023682, "learning_rate": 2.842807760072518e-06, "loss": 0.0841, "step": 113750 }, { "epoch": 3.3642869817235463, "grad_norm": 0.6670531630516052, "learning_rate": 2.8426810701535786e-06, "loss": 0.0791, "step": 113760 }, { "epoch": 3.3645827172177203, "grad_norm": 0.6467811465263367, "learning_rate": 2.8425543802346398e-06, "loss": 0.0856, "step": 113770 }, { "epoch": 3.3648784527118947, "grad_norm": 1.110552430152893, "learning_rate": 2.8424276903157e-06, "loss": 0.069, "step": 113780 }, { "epoch": 3.3651741882060686, "grad_norm": 0.3025036156177521, "learning_rate": 2.842301000396761e-06, "loss": 0.0504, "step": 113790 }, { "epoch": 3.3654699237002426, "grad_norm": 0.7169115543365479, "learning_rate": 2.8421743104778213e-06, "loss": 0.0797, "step": 113800 }, { "epoch": 3.3657656591944165, "grad_norm": 0.7903358340263367, "learning_rate": 2.842047620558882e-06, "loss": 0.072, "step": 113810 }, { "epoch": 3.3660613946885904, "grad_norm": 1.1267573833465576, "learning_rate": 2.8419209306399425e-06, "loss": 0.076, "step": 113820 }, { "epoch": 3.3663571301827644, "grad_norm": 0.9955781102180481, "learning_rate": 2.8417942407210032e-06, "loss": 0.06, "step": 113830 }, { "epoch": 3.3666528656769383, "grad_norm": 0.7910758852958679, "learning_rate": 2.8416675508020636e-06, "loss": 0.0546, "step": 113840 }, { "epoch": 3.3669486011711127, "grad_norm": 0.8135099411010742, "learning_rate": 2.841540860883125e-06, "loss": 0.0703, "step": 113850 }, { "epoch": 3.3672443366652867, "grad_norm": 0.8675488233566284, "learning_rate": 2.841414170964185e-06, "loss": 0.0822, "step": 113860 }, { "epoch": 3.3675400721594606, "grad_norm": 1.213439702987671, "learning_rate": 2.841287481045246e-06, "loss": 0.0695, "step": 113870 }, { "epoch": 3.3678358076536346, "grad_norm": 0.8376495838165283, "learning_rate": 2.8411607911263063e-06, "loss": 0.0691, "step": 113880 }, { "epoch": 3.3681315431478085, "grad_norm": 0.9994907379150391, "learning_rate": 2.8410341012073667e-06, "loss": 0.0561, "step": 113890 }, { "epoch": 3.3684272786419824, "grad_norm": 0.6957517266273499, "learning_rate": 2.8409074112884275e-06, "loss": 0.0603, "step": 113900 }, { "epoch": 3.3687230141361564, "grad_norm": 1.07551109790802, "learning_rate": 2.840780721369488e-06, "loss": 0.0812, "step": 113910 }, { "epoch": 3.3690187496303308, "grad_norm": 1.2259387969970703, "learning_rate": 2.8406540314505487e-06, "loss": 0.0705, "step": 113920 }, { "epoch": 3.3693144851245047, "grad_norm": 1.269856333732605, "learning_rate": 2.8405273415316094e-06, "loss": 0.0771, "step": 113930 }, { "epoch": 3.3696102206186787, "grad_norm": 1.950585126876831, "learning_rate": 2.8404006516126702e-06, "loss": 0.0652, "step": 113940 }, { "epoch": 3.3699059561128526, "grad_norm": 1.30229651927948, "learning_rate": 2.8402739616937306e-06, "loss": 0.0812, "step": 113950 }, { "epoch": 3.3702016916070265, "grad_norm": 0.6229177117347717, "learning_rate": 2.8401472717747914e-06, "loss": 0.084, "step": 113960 }, { "epoch": 3.370497427101201, "grad_norm": 0.4783272445201874, "learning_rate": 2.8400205818558518e-06, "loss": 0.0794, "step": 113970 }, { "epoch": 3.370793162595375, "grad_norm": 1.3055154085159302, "learning_rate": 2.8398938919369125e-06, "loss": 0.0718, "step": 113980 }, { "epoch": 3.371088898089549, "grad_norm": 0.7192959785461426, "learning_rate": 2.839767202017973e-06, "loss": 0.05, "step": 113990 }, { "epoch": 3.3713846335837228, "grad_norm": 1.056693196296692, "learning_rate": 2.8396405120990337e-06, "loss": 0.0763, "step": 114000 }, { "epoch": 3.3716803690778967, "grad_norm": 0.9389842748641968, "learning_rate": 2.8395138221800945e-06, "loss": 0.0678, "step": 114010 }, { "epoch": 3.3719761045720706, "grad_norm": 1.039807677268982, "learning_rate": 2.8393871322611553e-06, "loss": 0.0715, "step": 114020 }, { "epoch": 3.3722718400662446, "grad_norm": 1.3475372791290283, "learning_rate": 2.8392604423422156e-06, "loss": 0.0717, "step": 114030 }, { "epoch": 3.372567575560419, "grad_norm": 0.9812145233154297, "learning_rate": 2.8391337524232764e-06, "loss": 0.0546, "step": 114040 }, { "epoch": 3.372863311054593, "grad_norm": 0.9609482288360596, "learning_rate": 2.839007062504337e-06, "loss": 0.0785, "step": 114050 }, { "epoch": 3.373159046548767, "grad_norm": 1.2586616277694702, "learning_rate": 2.8388803725853976e-06, "loss": 0.0686, "step": 114060 }, { "epoch": 3.373454782042941, "grad_norm": 1.1377160549163818, "learning_rate": 2.838753682666458e-06, "loss": 0.0707, "step": 114070 }, { "epoch": 3.3737505175371147, "grad_norm": 0.8038536310195923, "learning_rate": 2.8386269927475187e-06, "loss": 0.0673, "step": 114080 }, { "epoch": 3.3740462530312887, "grad_norm": 1.4954650402069092, "learning_rate": 2.8385003028285795e-06, "loss": 0.0543, "step": 114090 }, { "epoch": 3.3743419885254626, "grad_norm": 0.9027893543243408, "learning_rate": 2.8383736129096403e-06, "loss": 0.0848, "step": 114100 }, { "epoch": 3.374637724019637, "grad_norm": 1.6061145067214966, "learning_rate": 2.8382469229907007e-06, "loss": 0.0844, "step": 114110 }, { "epoch": 3.374933459513811, "grad_norm": 1.0329797267913818, "learning_rate": 2.8381202330717615e-06, "loss": 0.0637, "step": 114120 }, { "epoch": 3.375229195007985, "grad_norm": 0.793546736240387, "learning_rate": 2.837993543152822e-06, "loss": 0.0757, "step": 114130 }, { "epoch": 3.375524930502159, "grad_norm": 0.5191544890403748, "learning_rate": 2.8378668532338826e-06, "loss": 0.0503, "step": 114140 }, { "epoch": 3.375820665996333, "grad_norm": 0.9891175627708435, "learning_rate": 2.837740163314943e-06, "loss": 0.0995, "step": 114150 }, { "epoch": 3.3761164014905067, "grad_norm": 1.6167254447937012, "learning_rate": 2.837613473396004e-06, "loss": 0.0777, "step": 114160 }, { "epoch": 3.3764121369846807, "grad_norm": 1.504328966140747, "learning_rate": 2.8374867834770646e-06, "loss": 0.0785, "step": 114170 }, { "epoch": 3.376707872478855, "grad_norm": 0.7331992387771606, "learning_rate": 2.8373600935581254e-06, "loss": 0.0642, "step": 114180 }, { "epoch": 3.377003607973029, "grad_norm": 0.8616593480110168, "learning_rate": 2.8372334036391857e-06, "loss": 0.0664, "step": 114190 }, { "epoch": 3.377299343467203, "grad_norm": 0.6751562356948853, "learning_rate": 2.8371067137202465e-06, "loss": 0.0844, "step": 114200 }, { "epoch": 3.377595078961377, "grad_norm": 0.6263115406036377, "learning_rate": 2.836980023801307e-06, "loss": 0.0733, "step": 114210 }, { "epoch": 3.377890814455551, "grad_norm": 0.566411554813385, "learning_rate": 2.8368533338823677e-06, "loss": 0.0658, "step": 114220 }, { "epoch": 3.378186549949725, "grad_norm": 0.4550206959247589, "learning_rate": 2.836726643963428e-06, "loss": 0.0615, "step": 114230 }, { "epoch": 3.378482285443899, "grad_norm": 1.380302906036377, "learning_rate": 2.836599954044489e-06, "loss": 0.0641, "step": 114240 }, { "epoch": 3.378778020938073, "grad_norm": 0.7844297289848328, "learning_rate": 2.8364732641255496e-06, "loss": 0.0794, "step": 114250 }, { "epoch": 3.379073756432247, "grad_norm": 0.7117806077003479, "learning_rate": 2.8363465742066104e-06, "loss": 0.076, "step": 114260 }, { "epoch": 3.379369491926421, "grad_norm": 0.931568443775177, "learning_rate": 2.8362198842876708e-06, "loss": 0.0827, "step": 114270 }, { "epoch": 3.379665227420595, "grad_norm": 0.8677920699119568, "learning_rate": 2.8360931943687316e-06, "loss": 0.0786, "step": 114280 }, { "epoch": 3.379960962914769, "grad_norm": 0.8996595740318298, "learning_rate": 2.835966504449792e-06, "loss": 0.0687, "step": 114290 }, { "epoch": 3.3802566984089433, "grad_norm": 0.9157449007034302, "learning_rate": 2.8358398145308527e-06, "loss": 0.0876, "step": 114300 }, { "epoch": 3.380552433903117, "grad_norm": 1.0071743726730347, "learning_rate": 2.835713124611913e-06, "loss": 0.0797, "step": 114310 }, { "epoch": 3.380848169397291, "grad_norm": 1.3309181928634644, "learning_rate": 2.8355864346929735e-06, "loss": 0.0943, "step": 114320 }, { "epoch": 3.381143904891465, "grad_norm": 0.9488871693611145, "learning_rate": 2.8354597447740347e-06, "loss": 0.0686, "step": 114330 }, { "epoch": 3.381439640385639, "grad_norm": 0.7987213730812073, "learning_rate": 2.835333054855095e-06, "loss": 0.0651, "step": 114340 }, { "epoch": 3.381735375879813, "grad_norm": 0.8174057006835938, "learning_rate": 2.835206364936156e-06, "loss": 0.084, "step": 114350 }, { "epoch": 3.382031111373987, "grad_norm": 1.0014649629592896, "learning_rate": 2.835079675017216e-06, "loss": 0.0874, "step": 114360 }, { "epoch": 3.3823268468681613, "grad_norm": 1.329642415046692, "learning_rate": 2.834952985098277e-06, "loss": 0.0759, "step": 114370 }, { "epoch": 3.3826225823623353, "grad_norm": 1.0274391174316406, "learning_rate": 2.8348262951793374e-06, "loss": 0.0878, "step": 114380 }, { "epoch": 3.382918317856509, "grad_norm": 0.6989240050315857, "learning_rate": 2.834699605260398e-06, "loss": 0.0664, "step": 114390 }, { "epoch": 3.383214053350683, "grad_norm": 0.8868293166160583, "learning_rate": 2.8345729153414585e-06, "loss": 0.0904, "step": 114400 }, { "epoch": 3.383509788844857, "grad_norm": 0.7319665551185608, "learning_rate": 2.8344462254225197e-06, "loss": 0.0815, "step": 114410 }, { "epoch": 3.383805524339031, "grad_norm": 0.6504685878753662, "learning_rate": 2.83431953550358e-06, "loss": 0.0861, "step": 114420 }, { "epoch": 3.384101259833205, "grad_norm": 1.3885167837142944, "learning_rate": 2.834192845584641e-06, "loss": 0.0774, "step": 114430 }, { "epoch": 3.3843969953273794, "grad_norm": 0.5794827938079834, "learning_rate": 2.8340661556657012e-06, "loss": 0.061, "step": 114440 }, { "epoch": 3.3846927308215533, "grad_norm": 1.311834692955017, "learning_rate": 2.833939465746762e-06, "loss": 0.1016, "step": 114450 }, { "epoch": 3.3849884663157273, "grad_norm": 0.692421019077301, "learning_rate": 2.8338127758278224e-06, "loss": 0.0863, "step": 114460 }, { "epoch": 3.385284201809901, "grad_norm": 0.6766344308853149, "learning_rate": 2.833686085908883e-06, "loss": 0.067, "step": 114470 }, { "epoch": 3.385579937304075, "grad_norm": 0.6395924091339111, "learning_rate": 2.8335593959899436e-06, "loss": 0.0747, "step": 114480 }, { "epoch": 3.385875672798249, "grad_norm": 0.8590078353881836, "learning_rate": 2.8334327060710048e-06, "loss": 0.0616, "step": 114490 }, { "epoch": 3.386171408292423, "grad_norm": 1.363812804222107, "learning_rate": 2.833306016152065e-06, "loss": 0.0855, "step": 114500 }, { "epoch": 3.3864671437865974, "grad_norm": 0.7472514510154724, "learning_rate": 2.833179326233126e-06, "loss": 0.0704, "step": 114510 }, { "epoch": 3.3867628792807714, "grad_norm": 0.6937998533248901, "learning_rate": 2.8330526363141863e-06, "loss": 0.0842, "step": 114520 }, { "epoch": 3.3870586147749453, "grad_norm": 0.6384459733963013, "learning_rate": 2.832925946395247e-06, "loss": 0.0679, "step": 114530 }, { "epoch": 3.3873543502691192, "grad_norm": 0.9090566039085388, "learning_rate": 2.8327992564763074e-06, "loss": 0.0633, "step": 114540 }, { "epoch": 3.387650085763293, "grad_norm": 0.6252172589302063, "learning_rate": 2.8326725665573682e-06, "loss": 0.0667, "step": 114550 }, { "epoch": 3.387945821257467, "grad_norm": 1.054140329360962, "learning_rate": 2.8325458766384286e-06, "loss": 0.0698, "step": 114560 }, { "epoch": 3.3882415567516415, "grad_norm": 0.7991840243339539, "learning_rate": 2.83241918671949e-06, "loss": 0.081, "step": 114570 }, { "epoch": 3.3885372922458155, "grad_norm": 1.1778779029846191, "learning_rate": 2.83229249680055e-06, "loss": 0.0834, "step": 114580 }, { "epoch": 3.3888330277399894, "grad_norm": 0.627958357334137, "learning_rate": 2.832165806881611e-06, "loss": 0.0643, "step": 114590 }, { "epoch": 3.3891287632341633, "grad_norm": 1.0399280786514282, "learning_rate": 2.8320391169626713e-06, "loss": 0.0807, "step": 114600 }, { "epoch": 3.3894244987283373, "grad_norm": 0.6595660448074341, "learning_rate": 2.831912427043732e-06, "loss": 0.0792, "step": 114610 }, { "epoch": 3.3897202342225112, "grad_norm": 1.1000462770462036, "learning_rate": 2.8317857371247925e-06, "loss": 0.0775, "step": 114620 }, { "epoch": 3.3900159697166856, "grad_norm": 0.718212902545929, "learning_rate": 2.8316590472058533e-06, "loss": 0.0866, "step": 114630 }, { "epoch": 3.3903117052108596, "grad_norm": 0.9605766534805298, "learning_rate": 2.8315323572869136e-06, "loss": 0.0646, "step": 114640 }, { "epoch": 3.3906074407050335, "grad_norm": 1.382887363433838, "learning_rate": 2.831405667367975e-06, "loss": 0.0878, "step": 114650 }, { "epoch": 3.3909031761992074, "grad_norm": 0.9195048213005066, "learning_rate": 2.8312789774490352e-06, "loss": 0.0778, "step": 114660 }, { "epoch": 3.3911989116933814, "grad_norm": 0.678032398223877, "learning_rate": 2.831152287530096e-06, "loss": 0.086, "step": 114670 }, { "epoch": 3.3914946471875553, "grad_norm": 0.7362022995948792, "learning_rate": 2.8310255976111564e-06, "loss": 0.0758, "step": 114680 }, { "epoch": 3.3917903826817293, "grad_norm": 0.877322793006897, "learning_rate": 2.830898907692217e-06, "loss": 0.0621, "step": 114690 }, { "epoch": 3.3920861181759037, "grad_norm": 0.7982485890388489, "learning_rate": 2.8307722177732775e-06, "loss": 0.0805, "step": 114700 }, { "epoch": 3.3923818536700776, "grad_norm": 0.8812232613563538, "learning_rate": 2.8306455278543383e-06, "loss": 0.0807, "step": 114710 }, { "epoch": 3.3926775891642516, "grad_norm": 0.7662246823310852, "learning_rate": 2.8305188379353987e-06, "loss": 0.0764, "step": 114720 }, { "epoch": 3.3929733246584255, "grad_norm": 1.4603028297424316, "learning_rate": 2.8303921480164595e-06, "loss": 0.0731, "step": 114730 }, { "epoch": 3.3932690601525994, "grad_norm": 0.603938639163971, "learning_rate": 2.8302654580975203e-06, "loss": 0.0498, "step": 114740 }, { "epoch": 3.3935647956467734, "grad_norm": 0.8612066507339478, "learning_rate": 2.8301387681785806e-06, "loss": 0.0824, "step": 114750 }, { "epoch": 3.3938605311409473, "grad_norm": 0.9488875269889832, "learning_rate": 2.8300120782596414e-06, "loss": 0.0743, "step": 114760 }, { "epoch": 3.3941562666351217, "grad_norm": 1.2046055793762207, "learning_rate": 2.8298853883407018e-06, "loss": 0.0715, "step": 114770 }, { "epoch": 3.3944520021292957, "grad_norm": 1.2260398864746094, "learning_rate": 2.8297586984217626e-06, "loss": 0.0756, "step": 114780 }, { "epoch": 3.3947477376234696, "grad_norm": 0.6446289420127869, "learning_rate": 2.829632008502823e-06, "loss": 0.068, "step": 114790 }, { "epoch": 3.3950434731176435, "grad_norm": 1.0341675281524658, "learning_rate": 2.8295053185838837e-06, "loss": 0.0768, "step": 114800 }, { "epoch": 3.3953392086118175, "grad_norm": 0.9388046860694885, "learning_rate": 2.8293786286649445e-06, "loss": 0.069, "step": 114810 }, { "epoch": 3.3956349441059914, "grad_norm": 0.8337457180023193, "learning_rate": 2.8292519387460053e-06, "loss": 0.0664, "step": 114820 }, { "epoch": 3.3959306796001654, "grad_norm": 0.6578294634819031, "learning_rate": 2.8291252488270657e-06, "loss": 0.0873, "step": 114830 }, { "epoch": 3.3962264150943398, "grad_norm": 0.960564374923706, "learning_rate": 2.8289985589081265e-06, "loss": 0.0658, "step": 114840 }, { "epoch": 3.3965221505885137, "grad_norm": 1.0494801998138428, "learning_rate": 2.828871868989187e-06, "loss": 0.0801, "step": 114850 }, { "epoch": 3.3968178860826876, "grad_norm": 1.2268908023834229, "learning_rate": 2.8287451790702476e-06, "loss": 0.0823, "step": 114860 }, { "epoch": 3.3971136215768616, "grad_norm": 0.7782576084136963, "learning_rate": 2.828618489151308e-06, "loss": 0.0678, "step": 114870 }, { "epoch": 3.3974093570710355, "grad_norm": 0.8642292022705078, "learning_rate": 2.8284917992323688e-06, "loss": 0.066, "step": 114880 }, { "epoch": 3.39770509256521, "grad_norm": 0.5924301743507385, "learning_rate": 2.8283651093134296e-06, "loss": 0.0502, "step": 114890 }, { "epoch": 3.398000828059384, "grad_norm": 1.1700735092163086, "learning_rate": 2.8282384193944904e-06, "loss": 0.0803, "step": 114900 }, { "epoch": 3.398296563553558, "grad_norm": 1.100197672843933, "learning_rate": 2.8281117294755507e-06, "loss": 0.0972, "step": 114910 }, { "epoch": 3.3985922990477317, "grad_norm": 0.7118486166000366, "learning_rate": 2.8279850395566115e-06, "loss": 0.0747, "step": 114920 }, { "epoch": 3.3988880345419057, "grad_norm": 1.0465524196624756, "learning_rate": 2.827858349637672e-06, "loss": 0.0651, "step": 114930 }, { "epoch": 3.3991837700360796, "grad_norm": 1.0079714059829712, "learning_rate": 2.8277316597187327e-06, "loss": 0.0603, "step": 114940 }, { "epoch": 3.3994795055302536, "grad_norm": 1.3127186298370361, "learning_rate": 2.827604969799793e-06, "loss": 0.0757, "step": 114950 }, { "epoch": 3.399775241024428, "grad_norm": 0.9120844602584839, "learning_rate": 2.827478279880854e-06, "loss": 0.0705, "step": 114960 }, { "epoch": 3.400070976518602, "grad_norm": 1.013059139251709, "learning_rate": 2.8273515899619146e-06, "loss": 0.0769, "step": 114970 }, { "epoch": 3.400366712012776, "grad_norm": 0.5065194368362427, "learning_rate": 2.8272249000429754e-06, "loss": 0.0761, "step": 114980 }, { "epoch": 3.40066244750695, "grad_norm": 0.8966113924980164, "learning_rate": 2.8270982101240358e-06, "loss": 0.0648, "step": 114990 }, { "epoch": 3.4009581830011237, "grad_norm": 0.6904852390289307, "learning_rate": 2.8269715202050966e-06, "loss": 0.0751, "step": 115000 }, { "epoch": 3.4012539184952977, "grad_norm": 1.599657416343689, "learning_rate": 2.826844830286157e-06, "loss": 0.072, "step": 115010 }, { "epoch": 3.4015496539894716, "grad_norm": 0.7439097762107849, "learning_rate": 2.8267181403672177e-06, "loss": 0.0826, "step": 115020 }, { "epoch": 3.401845389483646, "grad_norm": 2.2272307872772217, "learning_rate": 2.826591450448278e-06, "loss": 0.0628, "step": 115030 }, { "epoch": 3.40214112497782, "grad_norm": 1.088533639907837, "learning_rate": 2.826464760529339e-06, "loss": 0.0793, "step": 115040 }, { "epoch": 3.402436860471994, "grad_norm": 1.1599639654159546, "learning_rate": 2.8263380706103997e-06, "loss": 0.0935, "step": 115050 }, { "epoch": 3.402732595966168, "grad_norm": 0.72422856092453, "learning_rate": 2.8262113806914604e-06, "loss": 0.0811, "step": 115060 }, { "epoch": 3.403028331460342, "grad_norm": 0.7548467516899109, "learning_rate": 2.826084690772521e-06, "loss": 0.0718, "step": 115070 }, { "epoch": 3.4033240669545157, "grad_norm": 0.7374622821807861, "learning_rate": 2.8259580008535816e-06, "loss": 0.0668, "step": 115080 }, { "epoch": 3.4036198024486897, "grad_norm": 0.7507949471473694, "learning_rate": 2.825831310934642e-06, "loss": 0.0585, "step": 115090 }, { "epoch": 3.403915537942864, "grad_norm": 1.2165024280548096, "learning_rate": 2.8257046210157028e-06, "loss": 0.0793, "step": 115100 }, { "epoch": 3.404211273437038, "grad_norm": 0.921934962272644, "learning_rate": 2.825577931096763e-06, "loss": 0.0773, "step": 115110 }, { "epoch": 3.404507008931212, "grad_norm": 0.7769036889076233, "learning_rate": 2.825451241177824e-06, "loss": 0.0681, "step": 115120 }, { "epoch": 3.404802744425386, "grad_norm": 0.8578250408172607, "learning_rate": 2.8253245512588847e-06, "loss": 0.0787, "step": 115130 }, { "epoch": 3.40509847991956, "grad_norm": 1.581722617149353, "learning_rate": 2.825197861339945e-06, "loss": 0.0574, "step": 115140 }, { "epoch": 3.4053942154137338, "grad_norm": 1.2275915145874023, "learning_rate": 2.825071171421006e-06, "loss": 0.0987, "step": 115150 }, { "epoch": 3.405689950907908, "grad_norm": 0.7363007068634033, "learning_rate": 2.8249444815020662e-06, "loss": 0.0759, "step": 115160 }, { "epoch": 3.405985686402082, "grad_norm": 0.761384129524231, "learning_rate": 2.824817791583127e-06, "loss": 0.0782, "step": 115170 }, { "epoch": 3.406281421896256, "grad_norm": 0.9653704762458801, "learning_rate": 2.8246911016641874e-06, "loss": 0.0698, "step": 115180 }, { "epoch": 3.40657715739043, "grad_norm": 1.0092625617980957, "learning_rate": 2.824564411745248e-06, "loss": 0.0631, "step": 115190 }, { "epoch": 3.406872892884604, "grad_norm": 0.6917619109153748, "learning_rate": 2.8244377218263085e-06, "loss": 0.0621, "step": 115200 }, { "epoch": 3.407168628378778, "grad_norm": 0.47426721453666687, "learning_rate": 2.8243110319073697e-06, "loss": 0.084, "step": 115210 }, { "epoch": 3.4074643638729523, "grad_norm": 0.650936484336853, "learning_rate": 2.82418434198843e-06, "loss": 0.0809, "step": 115220 }, { "epoch": 3.407760099367126, "grad_norm": 0.9529038667678833, "learning_rate": 2.824057652069491e-06, "loss": 0.0714, "step": 115230 }, { "epoch": 3.4080558348613, "grad_norm": 0.5533929467201233, "learning_rate": 2.8239309621505513e-06, "loss": 0.0597, "step": 115240 }, { "epoch": 3.408351570355474, "grad_norm": 0.8571462631225586, "learning_rate": 2.823804272231612e-06, "loss": 0.075, "step": 115250 }, { "epoch": 3.408647305849648, "grad_norm": 0.9683941602706909, "learning_rate": 2.8236775823126724e-06, "loss": 0.0765, "step": 115260 }, { "epoch": 3.408943041343822, "grad_norm": 0.7865235209465027, "learning_rate": 2.8235508923937332e-06, "loss": 0.0774, "step": 115270 }, { "epoch": 3.409238776837996, "grad_norm": 0.7304435968399048, "learning_rate": 2.8234242024747936e-06, "loss": 0.0824, "step": 115280 }, { "epoch": 3.4095345123321703, "grad_norm": 0.6572493314743042, "learning_rate": 2.823297512555855e-06, "loss": 0.0587, "step": 115290 }, { "epoch": 3.4098302478263443, "grad_norm": 0.8736838698387146, "learning_rate": 2.823170822636915e-06, "loss": 0.0813, "step": 115300 }, { "epoch": 3.410125983320518, "grad_norm": 1.246937870979309, "learning_rate": 2.823044132717976e-06, "loss": 0.0803, "step": 115310 }, { "epoch": 3.410421718814692, "grad_norm": 0.9235628247261047, "learning_rate": 2.8229174427990363e-06, "loss": 0.0902, "step": 115320 }, { "epoch": 3.410717454308866, "grad_norm": 0.8769066333770752, "learning_rate": 2.822790752880097e-06, "loss": 0.0719, "step": 115330 }, { "epoch": 3.41101318980304, "grad_norm": 0.8815338015556335, "learning_rate": 2.8226640629611575e-06, "loss": 0.0647, "step": 115340 }, { "epoch": 3.411308925297214, "grad_norm": 1.001670479774475, "learning_rate": 2.8225373730422183e-06, "loss": 0.0736, "step": 115350 }, { "epoch": 3.4116046607913884, "grad_norm": 0.8224853277206421, "learning_rate": 2.8224106831232786e-06, "loss": 0.0916, "step": 115360 }, { "epoch": 3.4119003962855623, "grad_norm": 1.1957453489303589, "learning_rate": 2.82228399320434e-06, "loss": 0.0748, "step": 115370 }, { "epoch": 3.4121961317797362, "grad_norm": 0.4332294762134552, "learning_rate": 2.8221573032854e-06, "loss": 0.0838, "step": 115380 }, { "epoch": 3.41249186727391, "grad_norm": 0.8730527758598328, "learning_rate": 2.822030613366461e-06, "loss": 0.0453, "step": 115390 }, { "epoch": 3.412787602768084, "grad_norm": 0.8928958773612976, "learning_rate": 2.8219039234475214e-06, "loss": 0.0782, "step": 115400 }, { "epoch": 3.413083338262258, "grad_norm": 0.6159773468971252, "learning_rate": 2.821777233528582e-06, "loss": 0.0736, "step": 115410 }, { "epoch": 3.413379073756432, "grad_norm": 0.6554266214370728, "learning_rate": 2.8216505436096425e-06, "loss": 0.0805, "step": 115420 }, { "epoch": 3.4136748092506064, "grad_norm": 0.696037232875824, "learning_rate": 2.8215238536907033e-06, "loss": 0.0571, "step": 115430 }, { "epoch": 3.4139705447447803, "grad_norm": 0.6609219312667847, "learning_rate": 2.8213971637717637e-06, "loss": 0.0678, "step": 115440 }, { "epoch": 3.4142662802389543, "grad_norm": 0.7504254579544067, "learning_rate": 2.821270473852825e-06, "loss": 0.0753, "step": 115450 }, { "epoch": 3.4145620157331282, "grad_norm": 1.0846467018127441, "learning_rate": 2.8211437839338853e-06, "loss": 0.0684, "step": 115460 }, { "epoch": 3.414857751227302, "grad_norm": 0.9726859927177429, "learning_rate": 2.821017094014946e-06, "loss": 0.0674, "step": 115470 }, { "epoch": 3.4151534867214766, "grad_norm": 0.6626895070075989, "learning_rate": 2.8208904040960064e-06, "loss": 0.0689, "step": 115480 }, { "epoch": 3.4154492222156505, "grad_norm": 0.6074608564376831, "learning_rate": 2.820763714177067e-06, "loss": 0.0533, "step": 115490 }, { "epoch": 3.4157449577098244, "grad_norm": 0.44626688957214355, "learning_rate": 2.8206370242581276e-06, "loss": 0.0767, "step": 115500 }, { "epoch": 3.4160406932039984, "grad_norm": 0.7348412871360779, "learning_rate": 2.8205103343391884e-06, "loss": 0.074, "step": 115510 }, { "epoch": 3.4163364286981723, "grad_norm": 0.8822003602981567, "learning_rate": 2.8203836444202487e-06, "loss": 0.0807, "step": 115520 }, { "epoch": 3.4166321641923463, "grad_norm": 1.0234413146972656, "learning_rate": 2.82025695450131e-06, "loss": 0.0833, "step": 115530 }, { "epoch": 3.41692789968652, "grad_norm": 0.5501644611358643, "learning_rate": 2.8201302645823703e-06, "loss": 0.0492, "step": 115540 }, { "epoch": 3.4172236351806946, "grad_norm": 0.9363442063331604, "learning_rate": 2.8200035746634307e-06, "loss": 0.0892, "step": 115550 }, { "epoch": 3.4175193706748686, "grad_norm": 0.9776995182037354, "learning_rate": 2.8198768847444915e-06, "loss": 0.0766, "step": 115560 }, { "epoch": 3.4178151061690425, "grad_norm": 2.2163867950439453, "learning_rate": 2.819750194825552e-06, "loss": 0.0755, "step": 115570 }, { "epoch": 3.4181108416632164, "grad_norm": 1.1256287097930908, "learning_rate": 2.8196235049066126e-06, "loss": 0.0701, "step": 115580 }, { "epoch": 3.4184065771573904, "grad_norm": 0.5164584517478943, "learning_rate": 2.819496814987673e-06, "loss": 0.058, "step": 115590 }, { "epoch": 3.4187023126515643, "grad_norm": 1.1421760320663452, "learning_rate": 2.8193701250687338e-06, "loss": 0.078, "step": 115600 }, { "epoch": 3.4189980481457383, "grad_norm": 0.9433148503303528, "learning_rate": 2.8192434351497946e-06, "loss": 0.0657, "step": 115610 }, { "epoch": 3.4192937836399127, "grad_norm": 1.283746600151062, "learning_rate": 2.8191167452308553e-06, "loss": 0.0639, "step": 115620 }, { "epoch": 3.4195895191340866, "grad_norm": 0.9526348114013672, "learning_rate": 2.8189900553119157e-06, "loss": 0.0522, "step": 115630 }, { "epoch": 3.4198852546282605, "grad_norm": 0.5045960545539856, "learning_rate": 2.8188633653929765e-06, "loss": 0.0644, "step": 115640 }, { "epoch": 3.4201809901224345, "grad_norm": 1.0463610887527466, "learning_rate": 2.818736675474037e-06, "loss": 0.0743, "step": 115650 }, { "epoch": 3.4204767256166084, "grad_norm": 1.2866230010986328, "learning_rate": 2.8186099855550977e-06, "loss": 0.0876, "step": 115660 }, { "epoch": 3.4207724611107824, "grad_norm": 0.7477794885635376, "learning_rate": 2.818483295636158e-06, "loss": 0.0835, "step": 115670 }, { "epoch": 3.4210681966049563, "grad_norm": 1.1291600465774536, "learning_rate": 2.818356605717219e-06, "loss": 0.081, "step": 115680 }, { "epoch": 3.4213639320991307, "grad_norm": 0.8491576313972473, "learning_rate": 2.8182299157982796e-06, "loss": 0.0564, "step": 115690 }, { "epoch": 3.4216596675933046, "grad_norm": 0.8052017688751221, "learning_rate": 2.8181032258793404e-06, "loss": 0.0768, "step": 115700 }, { "epoch": 3.4219554030874786, "grad_norm": 0.9602989554405212, "learning_rate": 2.8179765359604008e-06, "loss": 0.093, "step": 115710 }, { "epoch": 3.4222511385816525, "grad_norm": 0.974260151386261, "learning_rate": 2.8178498460414615e-06, "loss": 0.0802, "step": 115720 }, { "epoch": 3.4225468740758265, "grad_norm": 0.9543970227241516, "learning_rate": 2.817723156122522e-06, "loss": 0.0654, "step": 115730 }, { "epoch": 3.4228426095700004, "grad_norm": 0.9163342118263245, "learning_rate": 2.8175964662035827e-06, "loss": 0.0612, "step": 115740 }, { "epoch": 3.4231383450641744, "grad_norm": 0.5920558571815491, "learning_rate": 2.817469776284643e-06, "loss": 0.083, "step": 115750 }, { "epoch": 3.4234340805583487, "grad_norm": 0.6403548717498779, "learning_rate": 2.817343086365704e-06, "loss": 0.0664, "step": 115760 }, { "epoch": 3.4237298160525227, "grad_norm": 0.41936296224594116, "learning_rate": 2.8172163964467646e-06, "loss": 0.0805, "step": 115770 }, { "epoch": 3.4240255515466966, "grad_norm": 0.8198935985565186, "learning_rate": 2.8170897065278254e-06, "loss": 0.086, "step": 115780 }, { "epoch": 3.4243212870408706, "grad_norm": 1.2200204133987427, "learning_rate": 2.816963016608886e-06, "loss": 0.0716, "step": 115790 }, { "epoch": 3.4246170225350445, "grad_norm": 1.1441859006881714, "learning_rate": 2.8168363266899466e-06, "loss": 0.0742, "step": 115800 }, { "epoch": 3.424912758029219, "grad_norm": 0.9380640387535095, "learning_rate": 2.816709636771007e-06, "loss": 0.0931, "step": 115810 }, { "epoch": 3.425208493523393, "grad_norm": 0.5764615535736084, "learning_rate": 2.8165829468520677e-06, "loss": 0.0726, "step": 115820 }, { "epoch": 3.425504229017567, "grad_norm": 0.6489649415016174, "learning_rate": 2.816456256933128e-06, "loss": 0.0621, "step": 115830 }, { "epoch": 3.4257999645117407, "grad_norm": 0.7060625553131104, "learning_rate": 2.816329567014189e-06, "loss": 0.0605, "step": 115840 }, { "epoch": 3.4260957000059147, "grad_norm": 0.5945353507995605, "learning_rate": 2.8162028770952497e-06, "loss": 0.0876, "step": 115850 }, { "epoch": 3.4263914355000886, "grad_norm": 1.0360368490219116, "learning_rate": 2.8160761871763105e-06, "loss": 0.0791, "step": 115860 }, { "epoch": 3.4266871709942626, "grad_norm": 1.7848154306411743, "learning_rate": 2.815949497257371e-06, "loss": 0.0777, "step": 115870 }, { "epoch": 3.426982906488437, "grad_norm": 1.094780683517456, "learning_rate": 2.8158228073384316e-06, "loss": 0.0666, "step": 115880 }, { "epoch": 3.427278641982611, "grad_norm": 0.6790952086448669, "learning_rate": 2.815696117419492e-06, "loss": 0.0479, "step": 115890 }, { "epoch": 3.427574377476785, "grad_norm": 0.7347491383552551, "learning_rate": 2.8155694275005528e-06, "loss": 0.0713, "step": 115900 }, { "epoch": 3.427870112970959, "grad_norm": 1.0821001529693604, "learning_rate": 2.815442737581613e-06, "loss": 0.0817, "step": 115910 }, { "epoch": 3.4281658484651327, "grad_norm": 0.6028574705123901, "learning_rate": 2.815316047662674e-06, "loss": 0.0631, "step": 115920 }, { "epoch": 3.4284615839593067, "grad_norm": 0.7989180088043213, "learning_rate": 2.8151893577437347e-06, "loss": 0.0737, "step": 115930 }, { "epoch": 3.4287573194534806, "grad_norm": 0.8206923604011536, "learning_rate": 2.8150626678247955e-06, "loss": 0.0832, "step": 115940 }, { "epoch": 3.429053054947655, "grad_norm": 1.0217336416244507, "learning_rate": 2.814935977905856e-06, "loss": 0.0787, "step": 115950 }, { "epoch": 3.429348790441829, "grad_norm": 0.9397019147872925, "learning_rate": 2.8148092879869163e-06, "loss": 0.0699, "step": 115960 }, { "epoch": 3.429644525936003, "grad_norm": 1.2057642936706543, "learning_rate": 2.814682598067977e-06, "loss": 0.0829, "step": 115970 }, { "epoch": 3.429940261430177, "grad_norm": 0.8604112863540649, "learning_rate": 2.8145559081490374e-06, "loss": 0.0628, "step": 115980 }, { "epoch": 3.4302359969243508, "grad_norm": 1.2210257053375244, "learning_rate": 2.814429218230098e-06, "loss": 0.06, "step": 115990 }, { "epoch": 3.4305317324185247, "grad_norm": 0.8247233033180237, "learning_rate": 2.8143025283111586e-06, "loss": 0.068, "step": 116000 }, { "epoch": 3.4308274679126987, "grad_norm": 1.5097613334655762, "learning_rate": 2.8141758383922198e-06, "loss": 0.0756, "step": 116010 }, { "epoch": 3.431123203406873, "grad_norm": 0.8326315879821777, "learning_rate": 2.81404914847328e-06, "loss": 0.0729, "step": 116020 }, { "epoch": 3.431418938901047, "grad_norm": 1.2538079023361206, "learning_rate": 2.813922458554341e-06, "loss": 0.0741, "step": 116030 }, { "epoch": 3.431714674395221, "grad_norm": 0.5412291288375854, "learning_rate": 2.8137957686354013e-06, "loss": 0.058, "step": 116040 }, { "epoch": 3.432010409889395, "grad_norm": 0.7162148356437683, "learning_rate": 2.813669078716462e-06, "loss": 0.0612, "step": 116050 }, { "epoch": 3.432306145383569, "grad_norm": 0.5442380309104919, "learning_rate": 2.8135423887975225e-06, "loss": 0.0769, "step": 116060 }, { "epoch": 3.4326018808777428, "grad_norm": 0.9112063646316528, "learning_rate": 2.8134156988785832e-06, "loss": 0.0808, "step": 116070 }, { "epoch": 3.432897616371917, "grad_norm": 1.2723504304885864, "learning_rate": 2.8132890089596436e-06, "loss": 0.0702, "step": 116080 }, { "epoch": 3.433193351866091, "grad_norm": 0.7329260110855103, "learning_rate": 2.813162319040705e-06, "loss": 0.0579, "step": 116090 }, { "epoch": 3.433489087360265, "grad_norm": 1.7739989757537842, "learning_rate": 2.813035629121765e-06, "loss": 0.0718, "step": 116100 }, { "epoch": 3.433784822854439, "grad_norm": 1.4823873043060303, "learning_rate": 2.812908939202826e-06, "loss": 0.0908, "step": 116110 }, { "epoch": 3.434080558348613, "grad_norm": 1.2295284271240234, "learning_rate": 2.8127822492838863e-06, "loss": 0.0734, "step": 116120 }, { "epoch": 3.434376293842787, "grad_norm": 0.8660095930099487, "learning_rate": 2.812655559364947e-06, "loss": 0.0814, "step": 116130 }, { "epoch": 3.4346720293369613, "grad_norm": 0.7727881669998169, "learning_rate": 2.8125288694460075e-06, "loss": 0.0625, "step": 116140 }, { "epoch": 3.434967764831135, "grad_norm": 0.4312504529953003, "learning_rate": 2.8124021795270683e-06, "loss": 0.0776, "step": 116150 }, { "epoch": 3.435263500325309, "grad_norm": 0.9899801015853882, "learning_rate": 2.8122754896081287e-06, "loss": 0.0735, "step": 116160 }, { "epoch": 3.435559235819483, "grad_norm": 0.6269161701202393, "learning_rate": 2.81214879968919e-06, "loss": 0.0593, "step": 116170 }, { "epoch": 3.435854971313657, "grad_norm": 0.8139151334762573, "learning_rate": 2.8120221097702502e-06, "loss": 0.0768, "step": 116180 }, { "epoch": 3.436150706807831, "grad_norm": 1.2068068981170654, "learning_rate": 2.811895419851311e-06, "loss": 0.0675, "step": 116190 }, { "epoch": 3.436446442302005, "grad_norm": 0.9863395690917969, "learning_rate": 2.8117687299323714e-06, "loss": 0.0747, "step": 116200 }, { "epoch": 3.4367421777961793, "grad_norm": 1.2181764841079712, "learning_rate": 2.811642040013432e-06, "loss": 0.0883, "step": 116210 }, { "epoch": 3.4370379132903532, "grad_norm": 0.6458531022071838, "learning_rate": 2.8115153500944925e-06, "loss": 0.0896, "step": 116220 }, { "epoch": 3.437333648784527, "grad_norm": 0.7579948306083679, "learning_rate": 2.8113886601755533e-06, "loss": 0.0947, "step": 116230 }, { "epoch": 3.437629384278701, "grad_norm": 0.7180668711662292, "learning_rate": 2.8112619702566137e-06, "loss": 0.0533, "step": 116240 }, { "epoch": 3.437925119772875, "grad_norm": 1.2086398601531982, "learning_rate": 2.811135280337675e-06, "loss": 0.0953, "step": 116250 }, { "epoch": 3.438220855267049, "grad_norm": 0.5196147561073303, "learning_rate": 2.8110085904187353e-06, "loss": 0.0931, "step": 116260 }, { "epoch": 3.438516590761223, "grad_norm": 0.9358680248260498, "learning_rate": 2.810881900499796e-06, "loss": 0.0853, "step": 116270 }, { "epoch": 3.4388123262553973, "grad_norm": 0.6087987422943115, "learning_rate": 2.8107552105808564e-06, "loss": 0.0483, "step": 116280 }, { "epoch": 3.4391080617495713, "grad_norm": 0.5575027465820312, "learning_rate": 2.8106285206619172e-06, "loss": 0.063, "step": 116290 }, { "epoch": 3.4394037972437452, "grad_norm": 0.8909807801246643, "learning_rate": 2.8105018307429776e-06, "loss": 0.0771, "step": 116300 }, { "epoch": 3.439699532737919, "grad_norm": 1.1180495023727417, "learning_rate": 2.8103751408240384e-06, "loss": 0.0901, "step": 116310 }, { "epoch": 3.439995268232093, "grad_norm": 0.7179195284843445, "learning_rate": 2.8102484509050987e-06, "loss": 0.0737, "step": 116320 }, { "epoch": 3.440291003726267, "grad_norm": 0.3030759394168854, "learning_rate": 2.81012176098616e-06, "loss": 0.0596, "step": 116330 }, { "epoch": 3.440586739220441, "grad_norm": 0.5532143115997314, "learning_rate": 2.8099950710672203e-06, "loss": 0.0518, "step": 116340 }, { "epoch": 3.4408824747146154, "grad_norm": 0.6595944166183472, "learning_rate": 2.809868381148281e-06, "loss": 0.0843, "step": 116350 }, { "epoch": 3.4411782102087893, "grad_norm": 0.6549411416053772, "learning_rate": 2.8097416912293415e-06, "loss": 0.0791, "step": 116360 }, { "epoch": 3.4414739457029633, "grad_norm": 0.6155311465263367, "learning_rate": 2.8096150013104023e-06, "loss": 0.0711, "step": 116370 }, { "epoch": 3.441769681197137, "grad_norm": 0.5532020926475525, "learning_rate": 2.8094883113914626e-06, "loss": 0.0649, "step": 116380 }, { "epoch": 3.442065416691311, "grad_norm": 0.9136495590209961, "learning_rate": 2.809361621472523e-06, "loss": 0.0709, "step": 116390 }, { "epoch": 3.4423611521854856, "grad_norm": 0.7943657040596008, "learning_rate": 2.809234931553584e-06, "loss": 0.0801, "step": 116400 }, { "epoch": 3.4426568876796595, "grad_norm": 1.1739388704299927, "learning_rate": 2.8091082416346446e-06, "loss": 0.0808, "step": 116410 }, { "epoch": 3.4429526231738334, "grad_norm": 1.2505420446395874, "learning_rate": 2.8089815517157054e-06, "loss": 0.0785, "step": 116420 }, { "epoch": 3.4432483586680074, "grad_norm": 1.2678077220916748, "learning_rate": 2.8088548617967657e-06, "loss": 0.0815, "step": 116430 }, { "epoch": 3.4435440941621813, "grad_norm": 0.728194534778595, "learning_rate": 2.8087281718778265e-06, "loss": 0.0612, "step": 116440 }, { "epoch": 3.4438398296563553, "grad_norm": 0.6259037852287292, "learning_rate": 2.808601481958887e-06, "loss": 0.0879, "step": 116450 }, { "epoch": 3.444135565150529, "grad_norm": 0.8744532465934753, "learning_rate": 2.8084747920399477e-06, "loss": 0.0823, "step": 116460 }, { "epoch": 3.4444313006447036, "grad_norm": 0.7425752282142639, "learning_rate": 2.808348102121008e-06, "loss": 0.0712, "step": 116470 }, { "epoch": 3.4447270361388775, "grad_norm": 0.7250081300735474, "learning_rate": 2.808221412202069e-06, "loss": 0.0747, "step": 116480 }, { "epoch": 3.4450227716330515, "grad_norm": 0.5589936971664429, "learning_rate": 2.8080947222831296e-06, "loss": 0.057, "step": 116490 }, { "epoch": 3.4453185071272254, "grad_norm": 1.340754747390747, "learning_rate": 2.8079680323641904e-06, "loss": 0.0846, "step": 116500 }, { "epoch": 3.4456142426213994, "grad_norm": 0.7977778315544128, "learning_rate": 2.8078413424452508e-06, "loss": 0.0717, "step": 116510 }, { "epoch": 3.4459099781155733, "grad_norm": 0.7209972739219666, "learning_rate": 2.8077146525263116e-06, "loss": 0.0713, "step": 116520 }, { "epoch": 3.4462057136097473, "grad_norm": 0.7306705713272095, "learning_rate": 2.807587962607372e-06, "loss": 0.0659, "step": 116530 }, { "epoch": 3.4465014491039216, "grad_norm": 0.6023370027542114, "learning_rate": 2.8074612726884327e-06, "loss": 0.0596, "step": 116540 }, { "epoch": 3.4467971845980956, "grad_norm": 1.0180078744888306, "learning_rate": 2.807334582769493e-06, "loss": 0.083, "step": 116550 }, { "epoch": 3.4470929200922695, "grad_norm": 0.9246268272399902, "learning_rate": 2.807207892850554e-06, "loss": 0.0804, "step": 116560 }, { "epoch": 3.4473886555864435, "grad_norm": 0.690730094909668, "learning_rate": 2.8070812029316147e-06, "loss": 0.0674, "step": 116570 }, { "epoch": 3.4476843910806174, "grad_norm": 0.7564706206321716, "learning_rate": 2.8069545130126755e-06, "loss": 0.0697, "step": 116580 }, { "epoch": 3.4479801265747914, "grad_norm": 1.1181361675262451, "learning_rate": 2.806827823093736e-06, "loss": 0.0696, "step": 116590 }, { "epoch": 3.4482758620689653, "grad_norm": 0.8419915437698364, "learning_rate": 2.8067011331747966e-06, "loss": 0.0774, "step": 116600 }, { "epoch": 3.4485715975631397, "grad_norm": 0.7834492921829224, "learning_rate": 2.806574443255857e-06, "loss": 0.0799, "step": 116610 }, { "epoch": 3.4488673330573136, "grad_norm": 0.7566487789154053, "learning_rate": 2.8064477533369178e-06, "loss": 0.0766, "step": 116620 }, { "epoch": 3.4491630685514876, "grad_norm": 0.40591177344322205, "learning_rate": 2.806321063417978e-06, "loss": 0.057, "step": 116630 }, { "epoch": 3.4494588040456615, "grad_norm": 1.022110104560852, "learning_rate": 2.806194373499039e-06, "loss": 0.078, "step": 116640 }, { "epoch": 3.4497545395398355, "grad_norm": 2.845637083053589, "learning_rate": 2.8060676835800997e-06, "loss": 0.0877, "step": 116650 }, { "epoch": 3.4500502750340094, "grad_norm": 0.9772235155105591, "learning_rate": 2.8059409936611605e-06, "loss": 0.086, "step": 116660 }, { "epoch": 3.4503460105281833, "grad_norm": 0.766424834728241, "learning_rate": 2.805814303742221e-06, "loss": 0.0776, "step": 116670 }, { "epoch": 3.4506417460223577, "grad_norm": 0.7173107266426086, "learning_rate": 2.8056876138232817e-06, "loss": 0.0815, "step": 116680 }, { "epoch": 3.4509374815165317, "grad_norm": 0.5103711485862732, "learning_rate": 2.805560923904342e-06, "loss": 0.0712, "step": 116690 }, { "epoch": 3.4512332170107056, "grad_norm": 0.9409945011138916, "learning_rate": 2.805434233985403e-06, "loss": 0.0794, "step": 116700 }, { "epoch": 3.4515289525048796, "grad_norm": 1.4522937536239624, "learning_rate": 2.805307544066463e-06, "loss": 0.0851, "step": 116710 }, { "epoch": 3.4518246879990535, "grad_norm": 0.5324541926383972, "learning_rate": 2.805180854147524e-06, "loss": 0.0701, "step": 116720 }, { "epoch": 3.452120423493228, "grad_norm": 0.9941402077674866, "learning_rate": 2.8050541642285848e-06, "loss": 0.0655, "step": 116730 }, { "epoch": 3.452416158987402, "grad_norm": 0.8515713810920715, "learning_rate": 2.8049274743096456e-06, "loss": 0.066, "step": 116740 }, { "epoch": 3.452711894481576, "grad_norm": 1.2218782901763916, "learning_rate": 2.804800784390706e-06, "loss": 0.0962, "step": 116750 }, { "epoch": 3.4530076299757497, "grad_norm": 1.1845206022262573, "learning_rate": 2.8046740944717667e-06, "loss": 0.0769, "step": 116760 }, { "epoch": 3.4533033654699237, "grad_norm": 1.1882370710372925, "learning_rate": 2.804547404552827e-06, "loss": 0.073, "step": 116770 }, { "epoch": 3.4535991009640976, "grad_norm": 0.823075532913208, "learning_rate": 2.804420714633888e-06, "loss": 0.0682, "step": 116780 }, { "epoch": 3.4538948364582716, "grad_norm": 0.7030251026153564, "learning_rate": 2.8042940247149482e-06, "loss": 0.0783, "step": 116790 }, { "epoch": 3.454190571952446, "grad_norm": 0.7249537706375122, "learning_rate": 2.8041673347960086e-06, "loss": 0.0707, "step": 116800 }, { "epoch": 3.45448630744662, "grad_norm": 0.8106158971786499, "learning_rate": 2.80404064487707e-06, "loss": 0.0742, "step": 116810 }, { "epoch": 3.454782042940794, "grad_norm": 1.3225313425064087, "learning_rate": 2.80391395495813e-06, "loss": 0.0737, "step": 116820 }, { "epoch": 3.4550777784349678, "grad_norm": 1.230364441871643, "learning_rate": 2.803787265039191e-06, "loss": 0.0711, "step": 116830 }, { "epoch": 3.4553735139291417, "grad_norm": 1.020353078842163, "learning_rate": 2.8036605751202513e-06, "loss": 0.0713, "step": 116840 }, { "epoch": 3.4556692494233157, "grad_norm": 0.8517923951148987, "learning_rate": 2.803533885201312e-06, "loss": 0.0871, "step": 116850 }, { "epoch": 3.4559649849174896, "grad_norm": 1.74233078956604, "learning_rate": 2.8034071952823725e-06, "loss": 0.0862, "step": 116860 }, { "epoch": 3.456260720411664, "grad_norm": 0.6334033608436584, "learning_rate": 2.8032805053634333e-06, "loss": 0.0732, "step": 116870 }, { "epoch": 3.456556455905838, "grad_norm": 0.8307142853736877, "learning_rate": 2.8031538154444936e-06, "loss": 0.0788, "step": 116880 }, { "epoch": 3.456852191400012, "grad_norm": 0.5507476329803467, "learning_rate": 2.803027125525555e-06, "loss": 0.0626, "step": 116890 }, { "epoch": 3.457147926894186, "grad_norm": 1.0007580518722534, "learning_rate": 2.8029004356066152e-06, "loss": 0.0921, "step": 116900 }, { "epoch": 3.4574436623883598, "grad_norm": 0.8367026448249817, "learning_rate": 2.802773745687676e-06, "loss": 0.084, "step": 116910 }, { "epoch": 3.4577393978825337, "grad_norm": 0.916502058506012, "learning_rate": 2.8026470557687364e-06, "loss": 0.0632, "step": 116920 }, { "epoch": 3.4580351333767076, "grad_norm": 0.9854723215103149, "learning_rate": 2.802520365849797e-06, "loss": 0.08, "step": 116930 }, { "epoch": 3.458330868870882, "grad_norm": 0.8461350202560425, "learning_rate": 2.8023936759308575e-06, "loss": 0.0693, "step": 116940 }, { "epoch": 3.458626604365056, "grad_norm": 1.0365263223648071, "learning_rate": 2.8022669860119183e-06, "loss": 0.0871, "step": 116950 }, { "epoch": 3.45892233985923, "grad_norm": 0.7679287791252136, "learning_rate": 2.8021402960929787e-06, "loss": 0.0751, "step": 116960 }, { "epoch": 3.459218075353404, "grad_norm": 0.9370001554489136, "learning_rate": 2.80201360617404e-06, "loss": 0.0686, "step": 116970 }, { "epoch": 3.459513810847578, "grad_norm": 0.8482109904289246, "learning_rate": 2.8018869162551003e-06, "loss": 0.0608, "step": 116980 }, { "epoch": 3.4598095463417518, "grad_norm": 1.1438686847686768, "learning_rate": 2.801760226336161e-06, "loss": 0.0646, "step": 116990 }, { "epoch": 3.460105281835926, "grad_norm": 0.6175031661987305, "learning_rate": 2.8016335364172214e-06, "loss": 0.0767, "step": 117000 }, { "epoch": 3.4604010173301, "grad_norm": 0.9593144059181213, "learning_rate": 2.801506846498282e-06, "loss": 0.0762, "step": 117010 }, { "epoch": 3.460696752824274, "grad_norm": 1.0053924322128296, "learning_rate": 2.8013801565793426e-06, "loss": 0.073, "step": 117020 }, { "epoch": 3.460992488318448, "grad_norm": 0.9222036004066467, "learning_rate": 2.8012534666604034e-06, "loss": 0.077, "step": 117030 }, { "epoch": 3.461288223812622, "grad_norm": 0.2840908169746399, "learning_rate": 2.8011267767414637e-06, "loss": 0.0607, "step": 117040 }, { "epoch": 3.461583959306796, "grad_norm": 1.0038015842437744, "learning_rate": 2.801000086822525e-06, "loss": 0.0934, "step": 117050 }, { "epoch": 3.4618796948009702, "grad_norm": 0.8568241596221924, "learning_rate": 2.8008733969035853e-06, "loss": 0.0686, "step": 117060 }, { "epoch": 3.462175430295144, "grad_norm": 1.2614295482635498, "learning_rate": 2.800746706984646e-06, "loss": 0.0739, "step": 117070 }, { "epoch": 3.462471165789318, "grad_norm": 1.1136553287506104, "learning_rate": 2.8006200170657065e-06, "loss": 0.0651, "step": 117080 }, { "epoch": 3.462766901283492, "grad_norm": 0.578201174736023, "learning_rate": 2.8004933271467673e-06, "loss": 0.0648, "step": 117090 }, { "epoch": 3.463062636777666, "grad_norm": 0.7026573419570923, "learning_rate": 2.8003666372278276e-06, "loss": 0.0762, "step": 117100 }, { "epoch": 3.46335837227184, "grad_norm": 0.4491939842700958, "learning_rate": 2.8002399473088884e-06, "loss": 0.0642, "step": 117110 }, { "epoch": 3.463654107766014, "grad_norm": 0.9676555395126343, "learning_rate": 2.8001132573899488e-06, "loss": 0.0824, "step": 117120 }, { "epoch": 3.4639498432601883, "grad_norm": 0.8287506103515625, "learning_rate": 2.79998656747101e-06, "loss": 0.0637, "step": 117130 }, { "epoch": 3.4642455787543622, "grad_norm": 0.875841498374939, "learning_rate": 2.7998598775520704e-06, "loss": 0.0573, "step": 117140 }, { "epoch": 3.464541314248536, "grad_norm": 0.7637282013893127, "learning_rate": 2.799733187633131e-06, "loss": 0.0715, "step": 117150 }, { "epoch": 3.46483704974271, "grad_norm": 1.1499744653701782, "learning_rate": 2.7996064977141915e-06, "loss": 0.0785, "step": 117160 }, { "epoch": 3.465132785236884, "grad_norm": 1.1065412759780884, "learning_rate": 2.7994798077952523e-06, "loss": 0.0722, "step": 117170 }, { "epoch": 3.465428520731058, "grad_norm": 0.4588501453399658, "learning_rate": 2.7993531178763127e-06, "loss": 0.0806, "step": 117180 }, { "epoch": 3.465724256225232, "grad_norm": 0.8798460960388184, "learning_rate": 2.7992264279573735e-06, "loss": 0.0702, "step": 117190 }, { "epoch": 3.4660199917194063, "grad_norm": 1.2610927820205688, "learning_rate": 2.799099738038434e-06, "loss": 0.0877, "step": 117200 }, { "epoch": 3.4663157272135803, "grad_norm": 0.5851929783821106, "learning_rate": 2.7989730481194946e-06, "loss": 0.082, "step": 117210 }, { "epoch": 3.466611462707754, "grad_norm": 0.8101959228515625, "learning_rate": 2.7988463582005554e-06, "loss": 0.0809, "step": 117220 }, { "epoch": 3.466907198201928, "grad_norm": 0.7263274192810059, "learning_rate": 2.7987196682816158e-06, "loss": 0.0745, "step": 117230 }, { "epoch": 3.467202933696102, "grad_norm": 0.6480680704116821, "learning_rate": 2.7985929783626766e-06, "loss": 0.0734, "step": 117240 }, { "epoch": 3.467498669190276, "grad_norm": 0.8090904355049133, "learning_rate": 2.798466288443737e-06, "loss": 0.0746, "step": 117250 }, { "epoch": 3.46779440468445, "grad_norm": 1.098535180091858, "learning_rate": 2.7983395985247977e-06, "loss": 0.0699, "step": 117260 }, { "epoch": 3.4680901401786244, "grad_norm": 0.8378864526748657, "learning_rate": 2.798212908605858e-06, "loss": 0.0856, "step": 117270 }, { "epoch": 3.4683858756727983, "grad_norm": 0.5009201169013977, "learning_rate": 2.798086218686919e-06, "loss": 0.087, "step": 117280 }, { "epoch": 3.4686816111669723, "grad_norm": 0.8723235130310059, "learning_rate": 2.7979595287679797e-06, "loss": 0.0774, "step": 117290 }, { "epoch": 3.468977346661146, "grad_norm": 0.7174007892608643, "learning_rate": 2.7978328388490404e-06, "loss": 0.081, "step": 117300 }, { "epoch": 3.46927308215532, "grad_norm": 1.0524595975875854, "learning_rate": 2.797706148930101e-06, "loss": 0.0856, "step": 117310 }, { "epoch": 3.4695688176494945, "grad_norm": 0.5020195841789246, "learning_rate": 2.7975794590111616e-06, "loss": 0.0661, "step": 117320 }, { "epoch": 3.4698645531436685, "grad_norm": 0.8464064598083496, "learning_rate": 2.797452769092222e-06, "loss": 0.0779, "step": 117330 }, { "epoch": 3.4701602886378424, "grad_norm": 1.4914478063583374, "learning_rate": 2.7973260791732828e-06, "loss": 0.0674, "step": 117340 }, { "epoch": 3.4704560241320164, "grad_norm": 0.4834405183792114, "learning_rate": 2.797199389254343e-06, "loss": 0.0831, "step": 117350 }, { "epoch": 3.4707517596261903, "grad_norm": 0.7852376103401184, "learning_rate": 2.797072699335404e-06, "loss": 0.0713, "step": 117360 }, { "epoch": 3.4710474951203643, "grad_norm": 1.3422353267669678, "learning_rate": 2.7969460094164647e-06, "loss": 0.0876, "step": 117370 }, { "epoch": 3.471343230614538, "grad_norm": 0.6019725799560547, "learning_rate": 2.7968193194975255e-06, "loss": 0.0599, "step": 117380 }, { "epoch": 3.4716389661087126, "grad_norm": 1.8301033973693848, "learning_rate": 2.796692629578586e-06, "loss": 0.0546, "step": 117390 }, { "epoch": 3.4719347016028865, "grad_norm": 1.7557222843170166, "learning_rate": 2.7965659396596466e-06, "loss": 0.0827, "step": 117400 }, { "epoch": 3.4722304370970605, "grad_norm": 0.8967693448066711, "learning_rate": 2.796439249740707e-06, "loss": 0.0757, "step": 117410 }, { "epoch": 3.4725261725912344, "grad_norm": 0.634926438331604, "learning_rate": 2.796312559821768e-06, "loss": 0.0818, "step": 117420 }, { "epoch": 3.4728219080854084, "grad_norm": 0.6228482127189636, "learning_rate": 2.796185869902828e-06, "loss": 0.0652, "step": 117430 }, { "epoch": 3.4731176435795823, "grad_norm": 0.8372787237167358, "learning_rate": 2.796059179983889e-06, "loss": 0.062, "step": 117440 }, { "epoch": 3.4734133790737562, "grad_norm": 1.1383497714996338, "learning_rate": 2.7959324900649497e-06, "loss": 0.07, "step": 117450 }, { "epoch": 3.4737091145679306, "grad_norm": 0.6633824706077576, "learning_rate": 2.7958058001460105e-06, "loss": 0.0598, "step": 117460 }, { "epoch": 3.4740048500621046, "grad_norm": 0.7590635418891907, "learning_rate": 2.795679110227071e-06, "loss": 0.0773, "step": 117470 }, { "epoch": 3.4743005855562785, "grad_norm": 0.5732219815254211, "learning_rate": 2.7955524203081317e-06, "loss": 0.0599, "step": 117480 }, { "epoch": 3.4745963210504525, "grad_norm": 0.6633855104446411, "learning_rate": 2.795425730389192e-06, "loss": 0.0583, "step": 117490 }, { "epoch": 3.4748920565446264, "grad_norm": 1.1314520835876465, "learning_rate": 2.795299040470253e-06, "loss": 0.0803, "step": 117500 }, { "epoch": 3.4751877920388003, "grad_norm": 1.455629587173462, "learning_rate": 2.7951723505513132e-06, "loss": 0.0911, "step": 117510 }, { "epoch": 3.4754835275329743, "grad_norm": 0.9105058908462524, "learning_rate": 2.795045660632374e-06, "loss": 0.0854, "step": 117520 }, { "epoch": 3.4757792630271487, "grad_norm": 0.5951915383338928, "learning_rate": 2.794918970713435e-06, "loss": 0.0588, "step": 117530 }, { "epoch": 3.4760749985213226, "grad_norm": 0.9889602065086365, "learning_rate": 2.7947922807944956e-06, "loss": 0.0647, "step": 117540 }, { "epoch": 3.4763707340154966, "grad_norm": 0.7024766206741333, "learning_rate": 2.794665590875556e-06, "loss": 0.0773, "step": 117550 }, { "epoch": 3.4766664695096705, "grad_norm": 0.8708027601242065, "learning_rate": 2.7945389009566167e-06, "loss": 0.0775, "step": 117560 }, { "epoch": 3.4769622050038445, "grad_norm": 1.596186876296997, "learning_rate": 2.794412211037677e-06, "loss": 0.0741, "step": 117570 }, { "epoch": 3.4772579404980184, "grad_norm": 0.8056138157844543, "learning_rate": 2.794285521118738e-06, "loss": 0.0619, "step": 117580 }, { "epoch": 3.4775536759921923, "grad_norm": 0.5067310929298401, "learning_rate": 2.7941588311997983e-06, "loss": 0.0566, "step": 117590 }, { "epoch": 3.4778494114863667, "grad_norm": 0.9529201984405518, "learning_rate": 2.794032141280859e-06, "loss": 0.083, "step": 117600 }, { "epoch": 3.4781451469805407, "grad_norm": 1.1135700941085815, "learning_rate": 2.79390545136192e-06, "loss": 0.0699, "step": 117610 }, { "epoch": 3.4784408824747146, "grad_norm": 0.7400045990943909, "learning_rate": 2.79377876144298e-06, "loss": 0.0817, "step": 117620 }, { "epoch": 3.4787366179688886, "grad_norm": 0.7804133892059326, "learning_rate": 2.793652071524041e-06, "loss": 0.0844, "step": 117630 }, { "epoch": 3.4790323534630625, "grad_norm": 0.7897351384162903, "learning_rate": 2.7935253816051014e-06, "loss": 0.0597, "step": 117640 }, { "epoch": 3.479328088957237, "grad_norm": 0.9386324882507324, "learning_rate": 2.793398691686162e-06, "loss": 0.0607, "step": 117650 }, { "epoch": 3.479623824451411, "grad_norm": 0.9536910057067871, "learning_rate": 2.7932720017672225e-06, "loss": 0.0732, "step": 117660 }, { "epoch": 3.4799195599455848, "grad_norm": 1.2031941413879395, "learning_rate": 2.7931453118482833e-06, "loss": 0.0892, "step": 117670 }, { "epoch": 3.4802152954397587, "grad_norm": 0.8271722793579102, "learning_rate": 2.7930186219293437e-06, "loss": 0.078, "step": 117680 }, { "epoch": 3.4805110309339327, "grad_norm": 0.6512380838394165, "learning_rate": 2.792891932010405e-06, "loss": 0.0551, "step": 117690 }, { "epoch": 3.4808067664281066, "grad_norm": 0.8927130699157715, "learning_rate": 2.7927652420914652e-06, "loss": 0.0867, "step": 117700 }, { "epoch": 3.4811025019222805, "grad_norm": 0.726786732673645, "learning_rate": 2.792638552172526e-06, "loss": 0.0739, "step": 117710 }, { "epoch": 3.481398237416455, "grad_norm": 0.647885262966156, "learning_rate": 2.7925118622535864e-06, "loss": 0.0939, "step": 117720 }, { "epoch": 3.481693972910629, "grad_norm": 0.5028814077377319, "learning_rate": 2.792385172334647e-06, "loss": 0.0809, "step": 117730 }, { "epoch": 3.481989708404803, "grad_norm": 0.4594452381134033, "learning_rate": 2.7922584824157076e-06, "loss": 0.0593, "step": 117740 }, { "epoch": 3.4822854438989768, "grad_norm": 0.5487266778945923, "learning_rate": 2.7921317924967684e-06, "loss": 0.0647, "step": 117750 }, { "epoch": 3.4825811793931507, "grad_norm": 1.0153584480285645, "learning_rate": 2.7920051025778287e-06, "loss": 0.0646, "step": 117760 }, { "epoch": 3.4828769148873246, "grad_norm": 1.5657782554626465, "learning_rate": 2.79187841265889e-06, "loss": 0.0894, "step": 117770 }, { "epoch": 3.4831726503814986, "grad_norm": 0.8996114730834961, "learning_rate": 2.7917517227399503e-06, "loss": 0.0687, "step": 117780 }, { "epoch": 3.483468385875673, "grad_norm": 0.6080062985420227, "learning_rate": 2.791625032821011e-06, "loss": 0.0498, "step": 117790 }, { "epoch": 3.483764121369847, "grad_norm": 0.7953366637229919, "learning_rate": 2.7914983429020715e-06, "loss": 0.0762, "step": 117800 }, { "epoch": 3.484059856864021, "grad_norm": 0.8600997924804688, "learning_rate": 2.7913716529831322e-06, "loss": 0.0676, "step": 117810 }, { "epoch": 3.484355592358195, "grad_norm": 1.0761853456497192, "learning_rate": 2.7912449630641926e-06, "loss": 0.0769, "step": 117820 }, { "epoch": 3.4846513278523688, "grad_norm": 0.8130003809928894, "learning_rate": 2.7911182731452534e-06, "loss": 0.0571, "step": 117830 }, { "epoch": 3.4849470633465427, "grad_norm": 0.7689175605773926, "learning_rate": 2.7909915832263138e-06, "loss": 0.0979, "step": 117840 }, { "epoch": 3.4852427988407166, "grad_norm": 0.575563907623291, "learning_rate": 2.790864893307375e-06, "loss": 0.0682, "step": 117850 }, { "epoch": 3.485538534334891, "grad_norm": 0.8693364858627319, "learning_rate": 2.7907382033884353e-06, "loss": 0.076, "step": 117860 }, { "epoch": 3.485834269829065, "grad_norm": 0.8269609212875366, "learning_rate": 2.790611513469496e-06, "loss": 0.0746, "step": 117870 }, { "epoch": 3.486130005323239, "grad_norm": 1.05520498752594, "learning_rate": 2.7904848235505565e-06, "loss": 0.0762, "step": 117880 }, { "epoch": 3.486425740817413, "grad_norm": 0.7647047638893127, "learning_rate": 2.7903581336316173e-06, "loss": 0.0689, "step": 117890 }, { "epoch": 3.486721476311587, "grad_norm": 0.7819309234619141, "learning_rate": 2.7902314437126777e-06, "loss": 0.0667, "step": 117900 }, { "epoch": 3.4870172118057607, "grad_norm": 0.5606473684310913, "learning_rate": 2.7901047537937384e-06, "loss": 0.0654, "step": 117910 }, { "epoch": 3.487312947299935, "grad_norm": 0.7498955726623535, "learning_rate": 2.789978063874799e-06, "loss": 0.0784, "step": 117920 }, { "epoch": 3.487608682794109, "grad_norm": 0.43627268075942993, "learning_rate": 2.78985137395586e-06, "loss": 0.0648, "step": 117930 }, { "epoch": 3.487904418288283, "grad_norm": 0.8248856067657471, "learning_rate": 2.7897246840369204e-06, "loss": 0.0735, "step": 117940 }, { "epoch": 3.488200153782457, "grad_norm": 1.1193493604660034, "learning_rate": 2.789597994117981e-06, "loss": 0.0813, "step": 117950 }, { "epoch": 3.488495889276631, "grad_norm": 0.6677243113517761, "learning_rate": 2.7894713041990415e-06, "loss": 0.0725, "step": 117960 }, { "epoch": 3.488791624770805, "grad_norm": 0.9106341600418091, "learning_rate": 2.7893446142801023e-06, "loss": 0.0861, "step": 117970 }, { "epoch": 3.4890873602649792, "grad_norm": 0.7733978629112244, "learning_rate": 2.7892179243611627e-06, "loss": 0.0759, "step": 117980 }, { "epoch": 3.489383095759153, "grad_norm": 0.789608895778656, "learning_rate": 2.7890912344422235e-06, "loss": 0.0587, "step": 117990 }, { "epoch": 3.489678831253327, "grad_norm": 0.5679166913032532, "learning_rate": 2.788964544523284e-06, "loss": 0.0767, "step": 118000 }, { "epoch": 3.489974566747501, "grad_norm": 0.782579243183136, "learning_rate": 2.788837854604345e-06, "loss": 0.0866, "step": 118010 }, { "epoch": 3.490270302241675, "grad_norm": 1.3353900909423828, "learning_rate": 2.7887111646854054e-06, "loss": 0.0864, "step": 118020 }, { "epoch": 3.490566037735849, "grad_norm": 0.8184924721717834, "learning_rate": 2.788584474766466e-06, "loss": 0.0576, "step": 118030 }, { "epoch": 3.490861773230023, "grad_norm": 0.5992584228515625, "learning_rate": 2.7884577848475266e-06, "loss": 0.0597, "step": 118040 }, { "epoch": 3.4911575087241973, "grad_norm": 0.7552610039710999, "learning_rate": 2.788331094928587e-06, "loss": 0.0677, "step": 118050 }, { "epoch": 3.491453244218371, "grad_norm": 1.392155647277832, "learning_rate": 2.7882044050096477e-06, "loss": 0.093, "step": 118060 }, { "epoch": 3.491748979712545, "grad_norm": 0.6463810801506042, "learning_rate": 2.788077715090708e-06, "loss": 0.0687, "step": 118070 }, { "epoch": 3.492044715206719, "grad_norm": 0.6754687428474426, "learning_rate": 2.787951025171769e-06, "loss": 0.0739, "step": 118080 }, { "epoch": 3.492340450700893, "grad_norm": 1.3195730447769165, "learning_rate": 2.7878243352528297e-06, "loss": 0.0684, "step": 118090 }, { "epoch": 3.492636186195067, "grad_norm": 0.7772991061210632, "learning_rate": 2.7876976453338905e-06, "loss": 0.0813, "step": 118100 }, { "epoch": 3.492931921689241, "grad_norm": 0.8706021308898926, "learning_rate": 2.787570955414951e-06, "loss": 0.0928, "step": 118110 }, { "epoch": 3.4932276571834153, "grad_norm": 0.8464189767837524, "learning_rate": 2.7874442654960116e-06, "loss": 0.0783, "step": 118120 }, { "epoch": 3.4935233926775893, "grad_norm": 0.7120341062545776, "learning_rate": 2.787317575577072e-06, "loss": 0.0625, "step": 118130 }, { "epoch": 3.493819128171763, "grad_norm": 0.5401418209075928, "learning_rate": 2.7871908856581328e-06, "loss": 0.0576, "step": 118140 }, { "epoch": 3.494114863665937, "grad_norm": 0.7046948671340942, "learning_rate": 2.787064195739193e-06, "loss": 0.0646, "step": 118150 }, { "epoch": 3.494410599160111, "grad_norm": 0.8819357752799988, "learning_rate": 2.786937505820254e-06, "loss": 0.0692, "step": 118160 }, { "epoch": 3.494706334654285, "grad_norm": 0.9255150556564331, "learning_rate": 2.7868108159013147e-06, "loss": 0.0662, "step": 118170 }, { "epoch": 3.495002070148459, "grad_norm": 1.1911500692367554, "learning_rate": 2.7866841259823755e-06, "loss": 0.0744, "step": 118180 }, { "epoch": 3.4952978056426334, "grad_norm": 0.6230136156082153, "learning_rate": 2.786557436063436e-06, "loss": 0.0624, "step": 118190 }, { "epoch": 3.4955935411368073, "grad_norm": 1.1847760677337646, "learning_rate": 2.7864307461444967e-06, "loss": 0.095, "step": 118200 }, { "epoch": 3.4958892766309813, "grad_norm": 0.7401443719863892, "learning_rate": 2.786304056225557e-06, "loss": 0.0796, "step": 118210 }, { "epoch": 3.496185012125155, "grad_norm": 1.0889215469360352, "learning_rate": 2.786177366306618e-06, "loss": 0.0814, "step": 118220 }, { "epoch": 3.496480747619329, "grad_norm": 0.8983848094940186, "learning_rate": 2.786050676387678e-06, "loss": 0.0561, "step": 118230 }, { "epoch": 3.4967764831135035, "grad_norm": 0.8368040919303894, "learning_rate": 2.785923986468739e-06, "loss": 0.0558, "step": 118240 }, { "epoch": 3.4970722186076775, "grad_norm": 0.7563791275024414, "learning_rate": 2.7857972965497998e-06, "loss": 0.0792, "step": 118250 }, { "epoch": 3.4973679541018514, "grad_norm": 1.1571698188781738, "learning_rate": 2.7856706066308606e-06, "loss": 0.0901, "step": 118260 }, { "epoch": 3.4976636895960254, "grad_norm": 0.7164832353591919, "learning_rate": 2.785543916711921e-06, "loss": 0.0689, "step": 118270 }, { "epoch": 3.4979594250901993, "grad_norm": 0.9755460023880005, "learning_rate": 2.7854172267929817e-06, "loss": 0.0688, "step": 118280 }, { "epoch": 3.4982551605843732, "grad_norm": 0.7097353339195251, "learning_rate": 2.785290536874042e-06, "loss": 0.0609, "step": 118290 }, { "epoch": 3.498550896078547, "grad_norm": 1.1632115840911865, "learning_rate": 2.785163846955103e-06, "loss": 0.0625, "step": 118300 }, { "epoch": 3.4988466315727216, "grad_norm": 0.8963889479637146, "learning_rate": 2.7850371570361632e-06, "loss": 0.0999, "step": 118310 }, { "epoch": 3.4991423670668955, "grad_norm": 0.6118723750114441, "learning_rate": 2.784910467117224e-06, "loss": 0.0695, "step": 118320 }, { "epoch": 3.4994381025610695, "grad_norm": 0.5860889554023743, "learning_rate": 2.784783777198285e-06, "loss": 0.0642, "step": 118330 }, { "epoch": 3.4997338380552434, "grad_norm": 0.531943678855896, "learning_rate": 2.7846570872793456e-06, "loss": 0.0657, "step": 118340 }, { "epoch": 3.5000295735494173, "grad_norm": 0.8542025089263916, "learning_rate": 2.784530397360406e-06, "loss": 0.082, "step": 118350 }, { "epoch": 3.5003253090435913, "grad_norm": 0.9838287830352783, "learning_rate": 2.7844037074414668e-06, "loss": 0.0802, "step": 118360 }, { "epoch": 3.5006210445377652, "grad_norm": 1.0540921688079834, "learning_rate": 2.784277017522527e-06, "loss": 0.0742, "step": 118370 }, { "epoch": 3.5009167800319396, "grad_norm": 0.7682093381881714, "learning_rate": 2.784150327603588e-06, "loss": 0.0795, "step": 118380 }, { "epoch": 3.5012125155261136, "grad_norm": 1.0573668479919434, "learning_rate": 2.7840236376846483e-06, "loss": 0.065, "step": 118390 }, { "epoch": 3.5015082510202875, "grad_norm": 0.7187412977218628, "learning_rate": 2.783896947765709e-06, "loss": 0.0891, "step": 118400 }, { "epoch": 3.5018039865144615, "grad_norm": 1.2516889572143555, "learning_rate": 2.78377025784677e-06, "loss": 0.0871, "step": 118410 }, { "epoch": 3.5020997220086354, "grad_norm": 0.5933654308319092, "learning_rate": 2.7836435679278307e-06, "loss": 0.0725, "step": 118420 }, { "epoch": 3.5023954575028093, "grad_norm": 0.5073681473731995, "learning_rate": 2.783516878008891e-06, "loss": 0.0756, "step": 118430 }, { "epoch": 3.5026911929969833, "grad_norm": 0.2136562019586563, "learning_rate": 2.783390188089952e-06, "loss": 0.0451, "step": 118440 }, { "epoch": 3.5029869284911577, "grad_norm": 0.7675934433937073, "learning_rate": 2.783263498171012e-06, "loss": 0.096, "step": 118450 }, { "epoch": 3.5032826639853316, "grad_norm": 0.4945664703845978, "learning_rate": 2.7831368082520725e-06, "loss": 0.0706, "step": 118460 }, { "epoch": 3.5035783994795056, "grad_norm": 1.0164451599121094, "learning_rate": 2.7830101183331333e-06, "loss": 0.0678, "step": 118470 }, { "epoch": 3.5038741349736795, "grad_norm": 1.1097067594528198, "learning_rate": 2.7828834284141937e-06, "loss": 0.0883, "step": 118480 }, { "epoch": 3.5041698704678534, "grad_norm": 0.6438956260681152, "learning_rate": 2.782756738495255e-06, "loss": 0.0536, "step": 118490 }, { "epoch": 3.504465605962028, "grad_norm": 1.3647671937942505, "learning_rate": 2.7826300485763153e-06, "loss": 0.0802, "step": 118500 }, { "epoch": 3.5047613414562013, "grad_norm": 0.7862593531608582, "learning_rate": 2.782503358657376e-06, "loss": 0.0772, "step": 118510 }, { "epoch": 3.5050570769503757, "grad_norm": 0.9037920236587524, "learning_rate": 2.7823766687384364e-06, "loss": 0.0838, "step": 118520 }, { "epoch": 3.5053528124445497, "grad_norm": 0.943585216999054, "learning_rate": 2.7822499788194972e-06, "loss": 0.0552, "step": 118530 }, { "epoch": 3.5056485479387236, "grad_norm": 0.6180572509765625, "learning_rate": 2.7821232889005576e-06, "loss": 0.054, "step": 118540 }, { "epoch": 3.5059442834328975, "grad_norm": 0.7078492641448975, "learning_rate": 2.7819965989816184e-06, "loss": 0.087, "step": 118550 }, { "epoch": 3.5062400189270715, "grad_norm": 0.9324924349784851, "learning_rate": 2.7818699090626787e-06, "loss": 0.079, "step": 118560 }, { "epoch": 3.506535754421246, "grad_norm": 0.3555964529514313, "learning_rate": 2.78174321914374e-06, "loss": 0.0744, "step": 118570 }, { "epoch": 3.5068314899154194, "grad_norm": 0.43417438864707947, "learning_rate": 2.7816165292248003e-06, "loss": 0.0719, "step": 118580 }, { "epoch": 3.5071272254095938, "grad_norm": 0.4677533805370331, "learning_rate": 2.781489839305861e-06, "loss": 0.0566, "step": 118590 }, { "epoch": 3.5074229609037677, "grad_norm": 1.6945652961730957, "learning_rate": 2.7813631493869215e-06, "loss": 0.0879, "step": 118600 }, { "epoch": 3.5077186963979416, "grad_norm": 0.9172545671463013, "learning_rate": 2.7812364594679823e-06, "loss": 0.0911, "step": 118610 }, { "epoch": 3.5080144318921156, "grad_norm": 1.2938786745071411, "learning_rate": 2.7811097695490426e-06, "loss": 0.0658, "step": 118620 }, { "epoch": 3.5083101673862895, "grad_norm": 0.5279759168624878, "learning_rate": 2.7809830796301034e-06, "loss": 0.0609, "step": 118630 }, { "epoch": 3.508605902880464, "grad_norm": 0.815268337726593, "learning_rate": 2.780856389711164e-06, "loss": 0.0616, "step": 118640 }, { "epoch": 3.508901638374638, "grad_norm": 0.7830461263656616, "learning_rate": 2.780729699792225e-06, "loss": 0.0803, "step": 118650 }, { "epoch": 3.509197373868812, "grad_norm": 0.7190282344818115, "learning_rate": 2.7806030098732854e-06, "loss": 0.0693, "step": 118660 }, { "epoch": 3.5094931093629858, "grad_norm": 1.0703036785125732, "learning_rate": 2.780476319954346e-06, "loss": 0.0731, "step": 118670 }, { "epoch": 3.5097888448571597, "grad_norm": 0.519733726978302, "learning_rate": 2.7803496300354065e-06, "loss": 0.0719, "step": 118680 }, { "epoch": 3.5100845803513336, "grad_norm": 0.6164687871932983, "learning_rate": 2.7802229401164673e-06, "loss": 0.0635, "step": 118690 }, { "epoch": 3.5103803158455076, "grad_norm": 0.8263679146766663, "learning_rate": 2.7800962501975277e-06, "loss": 0.0948, "step": 118700 }, { "epoch": 3.510676051339682, "grad_norm": 0.895246684551239, "learning_rate": 2.7799695602785885e-06, "loss": 0.0832, "step": 118710 }, { "epoch": 3.510971786833856, "grad_norm": 0.9687962532043457, "learning_rate": 2.779842870359649e-06, "loss": 0.0763, "step": 118720 }, { "epoch": 3.51126752232803, "grad_norm": 0.828402578830719, "learning_rate": 2.77971618044071e-06, "loss": 0.0792, "step": 118730 }, { "epoch": 3.511563257822204, "grad_norm": 1.278451681137085, "learning_rate": 2.7795894905217704e-06, "loss": 0.0683, "step": 118740 }, { "epoch": 3.5118589933163777, "grad_norm": 0.7038522362709045, "learning_rate": 2.779462800602831e-06, "loss": 0.082, "step": 118750 }, { "epoch": 3.5121547288105517, "grad_norm": 0.8996055126190186, "learning_rate": 2.7793361106838916e-06, "loss": 0.0889, "step": 118760 }, { "epoch": 3.5124504643047256, "grad_norm": 0.921384334564209, "learning_rate": 2.7792094207649524e-06, "loss": 0.0646, "step": 118770 }, { "epoch": 3.5127461997989, "grad_norm": 1.345577359199524, "learning_rate": 2.7790827308460127e-06, "loss": 0.0799, "step": 118780 }, { "epoch": 3.513041935293074, "grad_norm": 0.6006431579589844, "learning_rate": 2.7789560409270735e-06, "loss": 0.0642, "step": 118790 }, { "epoch": 3.513337670787248, "grad_norm": 0.5045706033706665, "learning_rate": 2.778829351008134e-06, "loss": 0.0773, "step": 118800 }, { "epoch": 3.513633406281422, "grad_norm": 0.566904604434967, "learning_rate": 2.778702661089195e-06, "loss": 0.0845, "step": 118810 }, { "epoch": 3.513929141775596, "grad_norm": 1.2881059646606445, "learning_rate": 2.7785759711702555e-06, "loss": 0.0868, "step": 118820 }, { "epoch": 3.51422487726977, "grad_norm": 0.7538445591926575, "learning_rate": 2.7784492812513163e-06, "loss": 0.06, "step": 118830 }, { "epoch": 3.5145206127639437, "grad_norm": 0.7866622805595398, "learning_rate": 2.7783225913323766e-06, "loss": 0.0614, "step": 118840 }, { "epoch": 3.514816348258118, "grad_norm": 1.0317069292068481, "learning_rate": 2.7781959014134374e-06, "loss": 0.0702, "step": 118850 }, { "epoch": 3.515112083752292, "grad_norm": 0.8527427911758423, "learning_rate": 2.7780692114944978e-06, "loss": 0.0811, "step": 118860 }, { "epoch": 3.515407819246466, "grad_norm": 0.6182346343994141, "learning_rate": 2.777942521575558e-06, "loss": 0.0621, "step": 118870 }, { "epoch": 3.51570355474064, "grad_norm": 0.7459995746612549, "learning_rate": 2.777815831656619e-06, "loss": 0.0764, "step": 118880 }, { "epoch": 3.515999290234814, "grad_norm": 0.43477317690849304, "learning_rate": 2.7776891417376797e-06, "loss": 0.0782, "step": 118890 }, { "epoch": 3.516295025728988, "grad_norm": 0.7596708536148071, "learning_rate": 2.7775624518187405e-06, "loss": 0.0787, "step": 118900 }, { "epoch": 3.516590761223162, "grad_norm": 0.6859630942344666, "learning_rate": 2.777435761899801e-06, "loss": 0.064, "step": 118910 }, { "epoch": 3.516886496717336, "grad_norm": 1.086134672164917, "learning_rate": 2.7773090719808617e-06, "loss": 0.0774, "step": 118920 }, { "epoch": 3.51718223221151, "grad_norm": 1.3320305347442627, "learning_rate": 2.777182382061922e-06, "loss": 0.0839, "step": 118930 }, { "epoch": 3.517477967705684, "grad_norm": 0.5226428508758545, "learning_rate": 2.777055692142983e-06, "loss": 0.0538, "step": 118940 }, { "epoch": 3.517773703199858, "grad_norm": 0.8874167203903198, "learning_rate": 2.776929002224043e-06, "loss": 0.0853, "step": 118950 }, { "epoch": 3.518069438694032, "grad_norm": 0.6168454885482788, "learning_rate": 2.776802312305104e-06, "loss": 0.0622, "step": 118960 }, { "epoch": 3.5183651741882063, "grad_norm": 0.6342722773551941, "learning_rate": 2.7766756223861648e-06, "loss": 0.0744, "step": 118970 }, { "epoch": 3.51866090968238, "grad_norm": 0.6854098439216614, "learning_rate": 2.7765489324672256e-06, "loss": 0.0677, "step": 118980 }, { "epoch": 3.518956645176554, "grad_norm": 1.1286027431488037, "learning_rate": 2.776422242548286e-06, "loss": 0.0715, "step": 118990 }, { "epoch": 3.519252380670728, "grad_norm": 0.4929102957248688, "learning_rate": 2.7762955526293467e-06, "loss": 0.0754, "step": 119000 }, { "epoch": 3.519548116164902, "grad_norm": 1.023687481880188, "learning_rate": 2.776168862710407e-06, "loss": 0.0725, "step": 119010 }, { "epoch": 3.519843851659076, "grad_norm": 0.855453372001648, "learning_rate": 2.776042172791468e-06, "loss": 0.0838, "step": 119020 }, { "epoch": 3.52013958715325, "grad_norm": 1.2867926359176636, "learning_rate": 2.7759154828725282e-06, "loss": 0.0661, "step": 119030 }, { "epoch": 3.5204353226474243, "grad_norm": 0.6776743531227112, "learning_rate": 2.775788792953589e-06, "loss": 0.0654, "step": 119040 }, { "epoch": 3.5207310581415983, "grad_norm": 0.6759560704231262, "learning_rate": 2.77566210303465e-06, "loss": 0.0859, "step": 119050 }, { "epoch": 3.521026793635772, "grad_norm": 1.174564242362976, "learning_rate": 2.7755354131157106e-06, "loss": 0.0877, "step": 119060 }, { "epoch": 3.521322529129946, "grad_norm": 0.5927112102508545, "learning_rate": 2.775408723196771e-06, "loss": 0.0778, "step": 119070 }, { "epoch": 3.52161826462412, "grad_norm": 0.5264405012130737, "learning_rate": 2.7752820332778318e-06, "loss": 0.0538, "step": 119080 }, { "epoch": 3.5219140001182945, "grad_norm": 1.1233738660812378, "learning_rate": 2.775155343358892e-06, "loss": 0.0559, "step": 119090 }, { "epoch": 3.522209735612468, "grad_norm": 0.8342267274856567, "learning_rate": 2.775028653439953e-06, "loss": 0.0856, "step": 119100 }, { "epoch": 3.5225054711066424, "grad_norm": 1.1158819198608398, "learning_rate": 2.7749019635210133e-06, "loss": 0.0816, "step": 119110 }, { "epoch": 3.5228012066008163, "grad_norm": 0.9386625289916992, "learning_rate": 2.774775273602074e-06, "loss": 0.0878, "step": 119120 }, { "epoch": 3.5230969420949902, "grad_norm": 1.129836082458496, "learning_rate": 2.774648583683135e-06, "loss": 0.0737, "step": 119130 }, { "epoch": 3.523392677589164, "grad_norm": 0.6164896488189697, "learning_rate": 2.7745218937641956e-06, "loss": 0.0689, "step": 119140 }, { "epoch": 3.523688413083338, "grad_norm": 0.8830827474594116, "learning_rate": 2.774395203845256e-06, "loss": 0.093, "step": 119150 }, { "epoch": 3.5239841485775125, "grad_norm": 0.7145805358886719, "learning_rate": 2.774268513926317e-06, "loss": 0.0805, "step": 119160 }, { "epoch": 3.524279884071686, "grad_norm": 0.6344000697135925, "learning_rate": 2.774141824007377e-06, "loss": 0.0821, "step": 119170 }, { "epoch": 3.5245756195658604, "grad_norm": 0.6863317489624023, "learning_rate": 2.774015134088438e-06, "loss": 0.0716, "step": 119180 }, { "epoch": 3.5248713550600344, "grad_norm": 0.6622938513755798, "learning_rate": 2.7738884441694983e-06, "loss": 0.0493, "step": 119190 }, { "epoch": 3.5251670905542083, "grad_norm": 0.8789580464363098, "learning_rate": 2.773761754250559e-06, "loss": 0.0859, "step": 119200 }, { "epoch": 3.5254628260483822, "grad_norm": 0.8580170273780823, "learning_rate": 2.77363506433162e-06, "loss": 0.0818, "step": 119210 }, { "epoch": 3.525758561542556, "grad_norm": 0.9379702210426331, "learning_rate": 2.7735083744126807e-06, "loss": 0.0717, "step": 119220 }, { "epoch": 3.5260542970367306, "grad_norm": 0.762567400932312, "learning_rate": 2.773381684493741e-06, "loss": 0.069, "step": 119230 }, { "epoch": 3.5263500325309045, "grad_norm": 1.0041298866271973, "learning_rate": 2.773254994574802e-06, "loss": 0.0519, "step": 119240 }, { "epoch": 3.5266457680250785, "grad_norm": 0.8573933243751526, "learning_rate": 2.773128304655862e-06, "loss": 0.0582, "step": 119250 }, { "epoch": 3.5269415035192524, "grad_norm": 0.8094114661216736, "learning_rate": 2.773001614736923e-06, "loss": 0.0699, "step": 119260 }, { "epoch": 3.5272372390134263, "grad_norm": 0.9310706257820129, "learning_rate": 2.7728749248179834e-06, "loss": 0.0676, "step": 119270 }, { "epoch": 3.5275329745076003, "grad_norm": 0.8374733924865723, "learning_rate": 2.7727482348990437e-06, "loss": 0.0543, "step": 119280 }, { "epoch": 3.5278287100017742, "grad_norm": 0.5123453140258789, "learning_rate": 2.772621544980105e-06, "loss": 0.0742, "step": 119290 }, { "epoch": 3.5281244454959486, "grad_norm": 1.2110508680343628, "learning_rate": 2.7724948550611653e-06, "loss": 0.082, "step": 119300 }, { "epoch": 3.5284201809901226, "grad_norm": 1.1393479108810425, "learning_rate": 2.772368165142226e-06, "loss": 0.0809, "step": 119310 }, { "epoch": 3.5287159164842965, "grad_norm": 0.634921133518219, "learning_rate": 2.7722414752232865e-06, "loss": 0.074, "step": 119320 }, { "epoch": 3.5290116519784704, "grad_norm": 0.9906076192855835, "learning_rate": 2.7721147853043473e-06, "loss": 0.0888, "step": 119330 }, { "epoch": 3.5293073874726444, "grad_norm": 0.7638213038444519, "learning_rate": 2.7719880953854076e-06, "loss": 0.0552, "step": 119340 }, { "epoch": 3.5296031229668183, "grad_norm": 1.1696890592575073, "learning_rate": 2.7718614054664684e-06, "loss": 0.0836, "step": 119350 }, { "epoch": 3.5298988584609923, "grad_norm": 0.8467298150062561, "learning_rate": 2.7717347155475288e-06, "loss": 0.0709, "step": 119360 }, { "epoch": 3.5301945939551667, "grad_norm": 0.6634470224380493, "learning_rate": 2.77160802562859e-06, "loss": 0.0632, "step": 119370 }, { "epoch": 3.5304903294493406, "grad_norm": 0.9824844002723694, "learning_rate": 2.7714813357096504e-06, "loss": 0.0628, "step": 119380 }, { "epoch": 3.5307860649435145, "grad_norm": 0.6429280042648315, "learning_rate": 2.771354645790711e-06, "loss": 0.0763, "step": 119390 }, { "epoch": 3.5310818004376885, "grad_norm": 1.4210902452468872, "learning_rate": 2.7712279558717715e-06, "loss": 0.0917, "step": 119400 }, { "epoch": 3.5313775359318624, "grad_norm": 0.8837377429008484, "learning_rate": 2.7711012659528323e-06, "loss": 0.0766, "step": 119410 }, { "epoch": 3.531673271426037, "grad_norm": 0.9663992524147034, "learning_rate": 2.7709745760338927e-06, "loss": 0.0742, "step": 119420 }, { "epoch": 3.5319690069202103, "grad_norm": 0.8028247952461243, "learning_rate": 2.7708478861149535e-06, "loss": 0.0721, "step": 119430 }, { "epoch": 3.5322647424143847, "grad_norm": 0.46572813391685486, "learning_rate": 2.770721196196014e-06, "loss": 0.069, "step": 119440 }, { "epoch": 3.5325604779085586, "grad_norm": 1.7181212902069092, "learning_rate": 2.770594506277075e-06, "loss": 0.0867, "step": 119450 }, { "epoch": 3.5328562134027326, "grad_norm": 0.9497933983802795, "learning_rate": 2.7704678163581354e-06, "loss": 0.0902, "step": 119460 }, { "epoch": 3.5331519488969065, "grad_norm": 1.0009684562683105, "learning_rate": 2.770341126439196e-06, "loss": 0.0685, "step": 119470 }, { "epoch": 3.5334476843910805, "grad_norm": 0.7176281213760376, "learning_rate": 2.7702144365202566e-06, "loss": 0.058, "step": 119480 }, { "epoch": 3.533743419885255, "grad_norm": 0.9060711860656738, "learning_rate": 2.7700877466013173e-06, "loss": 0.0771, "step": 119490 }, { "epoch": 3.5340391553794284, "grad_norm": 0.7280449271202087, "learning_rate": 2.7699610566823777e-06, "loss": 0.0715, "step": 119500 }, { "epoch": 3.5343348908736028, "grad_norm": 0.8392075896263123, "learning_rate": 2.7698343667634385e-06, "loss": 0.0704, "step": 119510 }, { "epoch": 3.5346306263677767, "grad_norm": 0.7723734378814697, "learning_rate": 2.769707676844499e-06, "loss": 0.0766, "step": 119520 }, { "epoch": 3.5349263618619506, "grad_norm": 0.6057173609733582, "learning_rate": 2.76958098692556e-06, "loss": 0.0789, "step": 119530 }, { "epoch": 3.5352220973561246, "grad_norm": 0.9902932643890381, "learning_rate": 2.7694542970066204e-06, "loss": 0.073, "step": 119540 }, { "epoch": 3.5355178328502985, "grad_norm": 1.2324000597000122, "learning_rate": 2.7693276070876812e-06, "loss": 0.0829, "step": 119550 }, { "epoch": 3.535813568344473, "grad_norm": 0.4277709424495697, "learning_rate": 2.7692009171687416e-06, "loss": 0.0729, "step": 119560 }, { "epoch": 3.536109303838647, "grad_norm": 0.7340251207351685, "learning_rate": 2.7690742272498024e-06, "loss": 0.0745, "step": 119570 }, { "epoch": 3.536405039332821, "grad_norm": 0.8255231976509094, "learning_rate": 2.7689475373308628e-06, "loss": 0.0763, "step": 119580 }, { "epoch": 3.5367007748269947, "grad_norm": 1.144399881362915, "learning_rate": 2.7688208474119235e-06, "loss": 0.0629, "step": 119590 }, { "epoch": 3.5369965103211687, "grad_norm": 0.6440675258636475, "learning_rate": 2.768694157492984e-06, "loss": 0.0808, "step": 119600 }, { "epoch": 3.5372922458153426, "grad_norm": 0.8977541923522949, "learning_rate": 2.768567467574045e-06, "loss": 0.0843, "step": 119610 }, { "epoch": 3.5375879813095166, "grad_norm": 0.9945605397224426, "learning_rate": 2.7684407776551055e-06, "loss": 0.0843, "step": 119620 }, { "epoch": 3.537883716803691, "grad_norm": 0.5527136921882629, "learning_rate": 2.7683140877361663e-06, "loss": 0.0734, "step": 119630 }, { "epoch": 3.538179452297865, "grad_norm": 1.9097243547439575, "learning_rate": 2.7681873978172266e-06, "loss": 0.0694, "step": 119640 }, { "epoch": 3.538475187792039, "grad_norm": 0.7215574979782104, "learning_rate": 2.7680607078982874e-06, "loss": 0.0693, "step": 119650 }, { "epoch": 3.538770923286213, "grad_norm": 0.7053928971290588, "learning_rate": 2.767934017979348e-06, "loss": 0.0715, "step": 119660 }, { "epoch": 3.5390666587803867, "grad_norm": 0.8366401195526123, "learning_rate": 2.7678073280604086e-06, "loss": 0.0741, "step": 119670 }, { "epoch": 3.5393623942745607, "grad_norm": 0.48834937810897827, "learning_rate": 2.767680638141469e-06, "loss": 0.065, "step": 119680 }, { "epoch": 3.5396581297687346, "grad_norm": 0.9667680859565735, "learning_rate": 2.7675539482225297e-06, "loss": 0.0757, "step": 119690 }, { "epoch": 3.539953865262909, "grad_norm": 0.9462109804153442, "learning_rate": 2.7674272583035905e-06, "loss": 0.0934, "step": 119700 }, { "epoch": 3.540249600757083, "grad_norm": 1.1440937519073486, "learning_rate": 2.767300568384651e-06, "loss": 0.0762, "step": 119710 }, { "epoch": 3.540545336251257, "grad_norm": 1.2442766427993774, "learning_rate": 2.7671738784657117e-06, "loss": 0.0794, "step": 119720 }, { "epoch": 3.540841071745431, "grad_norm": 0.8440248370170593, "learning_rate": 2.767047188546772e-06, "loss": 0.0876, "step": 119730 }, { "epoch": 3.5411368072396048, "grad_norm": 0.6312885880470276, "learning_rate": 2.766920498627833e-06, "loss": 0.0677, "step": 119740 }, { "epoch": 3.541432542733779, "grad_norm": 1.313240885734558, "learning_rate": 2.7667938087088932e-06, "loss": 0.085, "step": 119750 }, { "epoch": 3.5417282782279527, "grad_norm": 0.6798446178436279, "learning_rate": 2.766667118789954e-06, "loss": 0.0718, "step": 119760 }, { "epoch": 3.542024013722127, "grad_norm": 0.9060837030410767, "learning_rate": 2.766540428871015e-06, "loss": 0.0723, "step": 119770 }, { "epoch": 3.542319749216301, "grad_norm": 0.8722915053367615, "learning_rate": 2.7664137389520756e-06, "loss": 0.0561, "step": 119780 }, { "epoch": 3.542615484710475, "grad_norm": 0.9599272608757019, "learning_rate": 2.766287049033136e-06, "loss": 0.0702, "step": 119790 }, { "epoch": 3.542911220204649, "grad_norm": 1.120491623878479, "learning_rate": 2.7661603591141967e-06, "loss": 0.0746, "step": 119800 }, { "epoch": 3.543206955698823, "grad_norm": 0.7835818529129028, "learning_rate": 2.766033669195257e-06, "loss": 0.0656, "step": 119810 }, { "epoch": 3.543502691192997, "grad_norm": 1.0334633588790894, "learning_rate": 2.765906979276318e-06, "loss": 0.0799, "step": 119820 }, { "epoch": 3.543798426687171, "grad_norm": 0.7906219959259033, "learning_rate": 2.7657802893573783e-06, "loss": 0.0722, "step": 119830 }, { "epoch": 3.544094162181345, "grad_norm": 0.6960518956184387, "learning_rate": 2.765653599438439e-06, "loss": 0.0498, "step": 119840 }, { "epoch": 3.544389897675519, "grad_norm": 0.6946427226066589, "learning_rate": 2.7655269095195e-06, "loss": 0.0726, "step": 119850 }, { "epoch": 3.544685633169693, "grad_norm": 0.9881042242050171, "learning_rate": 2.7654002196005606e-06, "loss": 0.0666, "step": 119860 }, { "epoch": 3.544981368663867, "grad_norm": 0.69691002368927, "learning_rate": 2.765273529681621e-06, "loss": 0.0772, "step": 119870 }, { "epoch": 3.545277104158041, "grad_norm": 0.747911274433136, "learning_rate": 2.7651468397626818e-06, "loss": 0.0673, "step": 119880 }, { "epoch": 3.5455728396522153, "grad_norm": 0.9263376593589783, "learning_rate": 2.765020149843742e-06, "loss": 0.0665, "step": 119890 }, { "epoch": 3.545868575146389, "grad_norm": 0.7456077337265015, "learning_rate": 2.764893459924803e-06, "loss": 0.0717, "step": 119900 }, { "epoch": 3.546164310640563, "grad_norm": 0.7638139128684998, "learning_rate": 2.7647667700058633e-06, "loss": 0.0921, "step": 119910 }, { "epoch": 3.546460046134737, "grad_norm": 0.6226413249969482, "learning_rate": 2.764640080086924e-06, "loss": 0.0654, "step": 119920 }, { "epoch": 3.546755781628911, "grad_norm": 0.7292193174362183, "learning_rate": 2.764513390167985e-06, "loss": 0.0613, "step": 119930 }, { "epoch": 3.547051517123085, "grad_norm": 1.052778720855713, "learning_rate": 2.7643867002490457e-06, "loss": 0.0678, "step": 119940 }, { "epoch": 3.547347252617259, "grad_norm": 0.7128891348838806, "learning_rate": 2.764260010330106e-06, "loss": 0.0859, "step": 119950 }, { "epoch": 3.5476429881114333, "grad_norm": 0.9841415882110596, "learning_rate": 2.764133320411167e-06, "loss": 0.0769, "step": 119960 }, { "epoch": 3.5479387236056072, "grad_norm": 0.6531355977058411, "learning_rate": 2.764006630492227e-06, "loss": 0.0782, "step": 119970 }, { "epoch": 3.548234459099781, "grad_norm": 0.7496287822723389, "learning_rate": 2.763879940573288e-06, "loss": 0.0688, "step": 119980 }, { "epoch": 3.548530194593955, "grad_norm": 1.1776453256607056, "learning_rate": 2.7637532506543483e-06, "loss": 0.0681, "step": 119990 }, { "epoch": 3.548825930088129, "grad_norm": 0.6365205645561218, "learning_rate": 2.763626560735409e-06, "loss": 0.0717, "step": 120000 }, { "epoch": 3.5491216655823035, "grad_norm": 0.8513774275779724, "learning_rate": 2.76349987081647e-06, "loss": 0.0782, "step": 120010 }, { "epoch": 3.549417401076477, "grad_norm": 1.6526811122894287, "learning_rate": 2.7633731808975307e-06, "loss": 0.1052, "step": 120020 }, { "epoch": 3.5497131365706514, "grad_norm": 0.7921391725540161, "learning_rate": 2.763246490978591e-06, "loss": 0.0776, "step": 120030 }, { "epoch": 3.5500088720648253, "grad_norm": 0.45681527256965637, "learning_rate": 2.763119801059652e-06, "loss": 0.0589, "step": 120040 }, { "epoch": 3.5503046075589992, "grad_norm": 1.3055024147033691, "learning_rate": 2.7629931111407122e-06, "loss": 0.093, "step": 120050 }, { "epoch": 3.550600343053173, "grad_norm": 0.9277663826942444, "learning_rate": 2.762866421221773e-06, "loss": 0.0975, "step": 120060 }, { "epoch": 3.550896078547347, "grad_norm": 1.0217666625976562, "learning_rate": 2.7627397313028334e-06, "loss": 0.0816, "step": 120070 }, { "epoch": 3.5511918140415215, "grad_norm": 1.0193274021148682, "learning_rate": 2.762613041383894e-06, "loss": 0.0672, "step": 120080 }, { "epoch": 3.551487549535695, "grad_norm": 0.595129668712616, "learning_rate": 2.762486351464955e-06, "loss": 0.0632, "step": 120090 }, { "epoch": 3.5517832850298694, "grad_norm": 0.766872763633728, "learning_rate": 2.7623596615460153e-06, "loss": 0.0837, "step": 120100 }, { "epoch": 3.5520790205240433, "grad_norm": 0.6497275829315186, "learning_rate": 2.762232971627076e-06, "loss": 0.0702, "step": 120110 }, { "epoch": 3.5523747560182173, "grad_norm": 1.2633787393569946, "learning_rate": 2.7621062817081365e-06, "loss": 0.083, "step": 120120 }, { "epoch": 3.5526704915123912, "grad_norm": 0.5597922205924988, "learning_rate": 2.7619795917891973e-06, "loss": 0.0723, "step": 120130 }, { "epoch": 3.552966227006565, "grad_norm": 0.7356634736061096, "learning_rate": 2.7618529018702577e-06, "loss": 0.0604, "step": 120140 }, { "epoch": 3.5532619625007396, "grad_norm": 0.8567421436309814, "learning_rate": 2.7617262119513184e-06, "loss": 0.081, "step": 120150 }, { "epoch": 3.5535576979949135, "grad_norm": 1.1485676765441895, "learning_rate": 2.761599522032379e-06, "loss": 0.0907, "step": 120160 }, { "epoch": 3.5538534334890874, "grad_norm": 0.8807674050331116, "learning_rate": 2.76147283211344e-06, "loss": 0.0618, "step": 120170 }, { "epoch": 3.5541491689832614, "grad_norm": 0.6431307196617126, "learning_rate": 2.7613461421945004e-06, "loss": 0.0634, "step": 120180 }, { "epoch": 3.5544449044774353, "grad_norm": 0.9405707120895386, "learning_rate": 2.761219452275561e-06, "loss": 0.064, "step": 120190 }, { "epoch": 3.5547406399716093, "grad_norm": 0.5589476227760315, "learning_rate": 2.7610927623566215e-06, "loss": 0.07, "step": 120200 }, { "epoch": 3.555036375465783, "grad_norm": 1.1051671504974365, "learning_rate": 2.7609660724376823e-06, "loss": 0.083, "step": 120210 }, { "epoch": 3.5553321109599576, "grad_norm": 0.8775043487548828, "learning_rate": 2.7608393825187427e-06, "loss": 0.0728, "step": 120220 }, { "epoch": 3.5556278464541315, "grad_norm": 1.2954407930374146, "learning_rate": 2.7607126925998035e-06, "loss": 0.0672, "step": 120230 }, { "epoch": 3.5559235819483055, "grad_norm": 1.4934229850769043, "learning_rate": 2.760586002680864e-06, "loss": 0.0549, "step": 120240 }, { "epoch": 3.5562193174424794, "grad_norm": 1.1533210277557373, "learning_rate": 2.760459312761925e-06, "loss": 0.0902, "step": 120250 }, { "epoch": 3.5565150529366534, "grad_norm": 0.795094907283783, "learning_rate": 2.7603326228429854e-06, "loss": 0.0781, "step": 120260 }, { "epoch": 3.5568107884308273, "grad_norm": 0.7679897546768188, "learning_rate": 2.7602059329240462e-06, "loss": 0.0659, "step": 120270 }, { "epoch": 3.5571065239250013, "grad_norm": 0.8434847593307495, "learning_rate": 2.7600792430051066e-06, "loss": 0.0542, "step": 120280 }, { "epoch": 3.5574022594191756, "grad_norm": 1.3780008554458618, "learning_rate": 2.7599525530861674e-06, "loss": 0.0595, "step": 120290 }, { "epoch": 3.5576979949133496, "grad_norm": 0.973210334777832, "learning_rate": 2.7598258631672277e-06, "loss": 0.071, "step": 120300 }, { "epoch": 3.5579937304075235, "grad_norm": 0.7299523949623108, "learning_rate": 2.7596991732482885e-06, "loss": 0.0698, "step": 120310 }, { "epoch": 3.5582894659016975, "grad_norm": 1.1308344602584839, "learning_rate": 2.759572483329349e-06, "loss": 0.0834, "step": 120320 }, { "epoch": 3.5585852013958714, "grad_norm": 1.056617021560669, "learning_rate": 2.75944579341041e-06, "loss": 0.084, "step": 120330 }, { "epoch": 3.558880936890046, "grad_norm": 0.6819891333580017, "learning_rate": 2.7593191034914705e-06, "loss": 0.0663, "step": 120340 }, { "epoch": 3.5591766723842193, "grad_norm": 0.8313033580780029, "learning_rate": 2.7591924135725313e-06, "loss": 0.0713, "step": 120350 }, { "epoch": 3.5594724078783937, "grad_norm": 0.8365635871887207, "learning_rate": 2.7590657236535916e-06, "loss": 0.0824, "step": 120360 }, { "epoch": 3.5597681433725676, "grad_norm": 1.1612005233764648, "learning_rate": 2.7589390337346524e-06, "loss": 0.069, "step": 120370 }, { "epoch": 3.5600638788667416, "grad_norm": 0.6423743963241577, "learning_rate": 2.7588123438157128e-06, "loss": 0.065, "step": 120380 }, { "epoch": 3.5603596143609155, "grad_norm": 0.7575399279594421, "learning_rate": 2.7586856538967736e-06, "loss": 0.0681, "step": 120390 }, { "epoch": 3.5606553498550895, "grad_norm": 1.5491576194763184, "learning_rate": 2.758558963977834e-06, "loss": 0.0775, "step": 120400 }, { "epoch": 3.560951085349264, "grad_norm": 0.8660595417022705, "learning_rate": 2.758432274058895e-06, "loss": 0.0778, "step": 120410 }, { "epoch": 3.5612468208434374, "grad_norm": 0.6699705123901367, "learning_rate": 2.7583055841399555e-06, "loss": 0.0793, "step": 120420 }, { "epoch": 3.5615425563376117, "grad_norm": 1.4711065292358398, "learning_rate": 2.7581788942210163e-06, "loss": 0.06, "step": 120430 }, { "epoch": 3.5618382918317857, "grad_norm": 0.6074960827827454, "learning_rate": 2.7580522043020767e-06, "loss": 0.0605, "step": 120440 }, { "epoch": 3.5621340273259596, "grad_norm": 1.7543226480484009, "learning_rate": 2.7579255143831375e-06, "loss": 0.0835, "step": 120450 }, { "epoch": 3.5624297628201336, "grad_norm": 0.8289636373519897, "learning_rate": 2.757798824464198e-06, "loss": 0.0661, "step": 120460 }, { "epoch": 3.5627254983143075, "grad_norm": 0.7355225086212158, "learning_rate": 2.7576721345452586e-06, "loss": 0.0734, "step": 120470 }, { "epoch": 3.563021233808482, "grad_norm": 0.7608654499053955, "learning_rate": 2.757545444626319e-06, "loss": 0.0729, "step": 120480 }, { "epoch": 3.563316969302656, "grad_norm": 0.8784144520759583, "learning_rate": 2.75741875470738e-06, "loss": 0.0714, "step": 120490 }, { "epoch": 3.56361270479683, "grad_norm": 0.8707671165466309, "learning_rate": 2.7572920647884406e-06, "loss": 0.0799, "step": 120500 }, { "epoch": 3.5639084402910037, "grad_norm": 0.9809159636497498, "learning_rate": 2.7571653748695014e-06, "loss": 0.0763, "step": 120510 }, { "epoch": 3.5642041757851777, "grad_norm": 1.0117651224136353, "learning_rate": 2.7570386849505617e-06, "loss": 0.0824, "step": 120520 }, { "epoch": 3.5644999112793516, "grad_norm": 1.1814169883728027, "learning_rate": 2.756911995031622e-06, "loss": 0.0769, "step": 120530 }, { "epoch": 3.5647956467735256, "grad_norm": 1.0300554037094116, "learning_rate": 2.756785305112683e-06, "loss": 0.0629, "step": 120540 }, { "epoch": 3.5650913822677, "grad_norm": 0.7416080236434937, "learning_rate": 2.7566586151937432e-06, "loss": 0.0883, "step": 120550 }, { "epoch": 3.565387117761874, "grad_norm": 0.8931044936180115, "learning_rate": 2.756531925274804e-06, "loss": 0.0975, "step": 120560 }, { "epoch": 3.565682853256048, "grad_norm": 0.5609613656997681, "learning_rate": 2.7564052353558644e-06, "loss": 0.0741, "step": 120570 }, { "epoch": 3.565978588750222, "grad_norm": 1.2285079956054688, "learning_rate": 2.7562785454369256e-06, "loss": 0.0821, "step": 120580 }, { "epoch": 3.5662743242443957, "grad_norm": 0.769442081451416, "learning_rate": 2.756151855517986e-06, "loss": 0.0467, "step": 120590 }, { "epoch": 3.5665700597385697, "grad_norm": 0.5890923738479614, "learning_rate": 2.7560251655990468e-06, "loss": 0.0623, "step": 120600 }, { "epoch": 3.5668657952327436, "grad_norm": 1.0147823095321655, "learning_rate": 2.755898475680107e-06, "loss": 0.0926, "step": 120610 }, { "epoch": 3.567161530726918, "grad_norm": 0.7146095633506775, "learning_rate": 2.755771785761168e-06, "loss": 0.0841, "step": 120620 }, { "epoch": 3.567457266221092, "grad_norm": 0.8845331072807312, "learning_rate": 2.7556450958422283e-06, "loss": 0.0818, "step": 120630 }, { "epoch": 3.567753001715266, "grad_norm": 0.9073642492294312, "learning_rate": 2.755518405923289e-06, "loss": 0.0766, "step": 120640 }, { "epoch": 3.56804873720944, "grad_norm": 0.7362906336784363, "learning_rate": 2.7553917160043494e-06, "loss": 0.0831, "step": 120650 }, { "epoch": 3.5683444727036138, "grad_norm": 0.9091393947601318, "learning_rate": 2.7552650260854107e-06, "loss": 0.0788, "step": 120660 }, { "epoch": 3.568640208197788, "grad_norm": 0.6927855014801025, "learning_rate": 2.755138336166471e-06, "loss": 0.078, "step": 120670 }, { "epoch": 3.5689359436919617, "grad_norm": 0.7928038239479065, "learning_rate": 2.755011646247532e-06, "loss": 0.0785, "step": 120680 }, { "epoch": 3.569231679186136, "grad_norm": 0.6334394812583923, "learning_rate": 2.754884956328592e-06, "loss": 0.0404, "step": 120690 }, { "epoch": 3.56952741468031, "grad_norm": 1.6797012090682983, "learning_rate": 2.754758266409653e-06, "loss": 0.1023, "step": 120700 }, { "epoch": 3.569823150174484, "grad_norm": 0.7373780608177185, "learning_rate": 2.7546315764907133e-06, "loss": 0.0752, "step": 120710 }, { "epoch": 3.570118885668658, "grad_norm": 0.6848888397216797, "learning_rate": 2.754504886571774e-06, "loss": 0.0689, "step": 120720 }, { "epoch": 3.570414621162832, "grad_norm": 0.6002721786499023, "learning_rate": 2.7543781966528345e-06, "loss": 0.0755, "step": 120730 }, { "epoch": 3.570710356657006, "grad_norm": 1.2106525897979736, "learning_rate": 2.7542515067338957e-06, "loss": 0.0599, "step": 120740 }, { "epoch": 3.57100609215118, "grad_norm": 0.8266615271568298, "learning_rate": 2.754124816814956e-06, "loss": 0.0837, "step": 120750 }, { "epoch": 3.571301827645354, "grad_norm": 0.9539084434509277, "learning_rate": 2.753998126896017e-06, "loss": 0.0864, "step": 120760 }, { "epoch": 3.571597563139528, "grad_norm": 1.0298529863357544, "learning_rate": 2.7538714369770772e-06, "loss": 0.0908, "step": 120770 }, { "epoch": 3.571893298633702, "grad_norm": 0.7638154625892639, "learning_rate": 2.753744747058138e-06, "loss": 0.0662, "step": 120780 }, { "epoch": 3.572189034127876, "grad_norm": 0.5703847408294678, "learning_rate": 2.7536180571391984e-06, "loss": 0.0608, "step": 120790 }, { "epoch": 3.57248476962205, "grad_norm": 1.037103533744812, "learning_rate": 2.753491367220259e-06, "loss": 0.0811, "step": 120800 }, { "epoch": 3.5727805051162242, "grad_norm": 0.8683149814605713, "learning_rate": 2.7533646773013195e-06, "loss": 0.0737, "step": 120810 }, { "epoch": 3.573076240610398, "grad_norm": 1.1287360191345215, "learning_rate": 2.7532379873823807e-06, "loss": 0.0658, "step": 120820 }, { "epoch": 3.573371976104572, "grad_norm": 1.0263001918792725, "learning_rate": 2.753111297463441e-06, "loss": 0.0747, "step": 120830 }, { "epoch": 3.573667711598746, "grad_norm": 0.5788147449493408, "learning_rate": 2.752984607544502e-06, "loss": 0.0678, "step": 120840 }, { "epoch": 3.57396344709292, "grad_norm": 1.014410376548767, "learning_rate": 2.7528579176255623e-06, "loss": 0.0949, "step": 120850 }, { "epoch": 3.574259182587094, "grad_norm": 0.591620922088623, "learning_rate": 2.752731227706623e-06, "loss": 0.0655, "step": 120860 }, { "epoch": 3.574554918081268, "grad_norm": 0.596100926399231, "learning_rate": 2.7526045377876834e-06, "loss": 0.0641, "step": 120870 }, { "epoch": 3.5748506535754423, "grad_norm": 1.9191734790802002, "learning_rate": 2.7524778478687442e-06, "loss": 0.0703, "step": 120880 }, { "epoch": 3.5751463890696162, "grad_norm": 0.6050050258636475, "learning_rate": 2.7523511579498046e-06, "loss": 0.0565, "step": 120890 }, { "epoch": 3.57544212456379, "grad_norm": 0.6725713610649109, "learning_rate": 2.752224468030866e-06, "loss": 0.07, "step": 120900 }, { "epoch": 3.575737860057964, "grad_norm": 1.0157943964004517, "learning_rate": 2.752097778111926e-06, "loss": 0.1007, "step": 120910 }, { "epoch": 3.576033595552138, "grad_norm": 0.7849177122116089, "learning_rate": 2.751971088192987e-06, "loss": 0.073, "step": 120920 }, { "epoch": 3.5763293310463125, "grad_norm": 0.778873860836029, "learning_rate": 2.7518443982740473e-06, "loss": 0.0714, "step": 120930 }, { "epoch": 3.576625066540486, "grad_norm": 1.2076587677001953, "learning_rate": 2.7517177083551077e-06, "loss": 0.053, "step": 120940 }, { "epoch": 3.5769208020346603, "grad_norm": 0.8477238416671753, "learning_rate": 2.7515910184361685e-06, "loss": 0.0687, "step": 120950 }, { "epoch": 3.5772165375288343, "grad_norm": 0.9781898856163025, "learning_rate": 2.751464328517229e-06, "loss": 0.0733, "step": 120960 }, { "epoch": 3.5775122730230082, "grad_norm": 0.9925937056541443, "learning_rate": 2.75133763859829e-06, "loss": 0.0818, "step": 120970 }, { "epoch": 3.577808008517182, "grad_norm": 0.8941686153411865, "learning_rate": 2.7512109486793504e-06, "loss": 0.0614, "step": 120980 }, { "epoch": 3.578103744011356, "grad_norm": 1.1384819746017456, "learning_rate": 2.751084258760411e-06, "loss": 0.0624, "step": 120990 }, { "epoch": 3.5783994795055305, "grad_norm": 0.5495200157165527, "learning_rate": 2.7509575688414716e-06, "loss": 0.0694, "step": 121000 }, { "epoch": 3.578695214999704, "grad_norm": 1.01521897315979, "learning_rate": 2.7508308789225324e-06, "loss": 0.0917, "step": 121010 }, { "epoch": 3.5789909504938784, "grad_norm": 1.0980061292648315, "learning_rate": 2.7507041890035927e-06, "loss": 0.0984, "step": 121020 }, { "epoch": 3.5792866859880523, "grad_norm": 0.722634494304657, "learning_rate": 2.7505774990846535e-06, "loss": 0.0626, "step": 121030 }, { "epoch": 3.5795824214822263, "grad_norm": 0.6039723753929138, "learning_rate": 2.750450809165714e-06, "loss": 0.0538, "step": 121040 }, { "epoch": 3.5798781569764, "grad_norm": 0.8303734064102173, "learning_rate": 2.750324119246775e-06, "loss": 0.0786, "step": 121050 }, { "epoch": 3.580173892470574, "grad_norm": 0.6898054480552673, "learning_rate": 2.7501974293278355e-06, "loss": 0.081, "step": 121060 }, { "epoch": 3.5804696279647485, "grad_norm": 1.1088604927062988, "learning_rate": 2.7500707394088963e-06, "loss": 0.0637, "step": 121070 }, { "epoch": 3.5807653634589225, "grad_norm": 0.6982985734939575, "learning_rate": 2.7499440494899566e-06, "loss": 0.0618, "step": 121080 }, { "epoch": 3.5810610989530964, "grad_norm": 0.93592768907547, "learning_rate": 2.7498173595710174e-06, "loss": 0.0745, "step": 121090 }, { "epoch": 3.5813568344472704, "grad_norm": 0.5280436873435974, "learning_rate": 2.7496906696520778e-06, "loss": 0.0759, "step": 121100 }, { "epoch": 3.5816525699414443, "grad_norm": 0.698973536491394, "learning_rate": 2.7495639797331386e-06, "loss": 0.0734, "step": 121110 }, { "epoch": 3.5819483054356183, "grad_norm": 1.875399112701416, "learning_rate": 2.749437289814199e-06, "loss": 0.1059, "step": 121120 }, { "epoch": 3.582244040929792, "grad_norm": 0.9635666608810425, "learning_rate": 2.74931059989526e-06, "loss": 0.0677, "step": 121130 }, { "epoch": 3.5825397764239666, "grad_norm": 0.6888383030891418, "learning_rate": 2.7491839099763205e-06, "loss": 0.0676, "step": 121140 }, { "epoch": 3.5828355119181405, "grad_norm": 1.392723560333252, "learning_rate": 2.7490572200573813e-06, "loss": 0.0624, "step": 121150 }, { "epoch": 3.5831312474123145, "grad_norm": 0.7754938006401062, "learning_rate": 2.7489305301384417e-06, "loss": 0.082, "step": 121160 }, { "epoch": 3.5834269829064884, "grad_norm": 0.7271311283111572, "learning_rate": 2.7488038402195025e-06, "loss": 0.0729, "step": 121170 }, { "epoch": 3.5837227184006624, "grad_norm": 1.2897588014602661, "learning_rate": 2.748677150300563e-06, "loss": 0.0699, "step": 121180 }, { "epoch": 3.5840184538948363, "grad_norm": 0.7147805690765381, "learning_rate": 2.7485504603816236e-06, "loss": 0.0562, "step": 121190 }, { "epoch": 3.5843141893890103, "grad_norm": 0.7727016806602478, "learning_rate": 2.748423770462684e-06, "loss": 0.071, "step": 121200 }, { "epoch": 3.5846099248831846, "grad_norm": 0.791012167930603, "learning_rate": 2.748297080543745e-06, "loss": 0.0823, "step": 121210 }, { "epoch": 3.5849056603773586, "grad_norm": 0.9157824516296387, "learning_rate": 2.7481703906248056e-06, "loss": 0.0699, "step": 121220 }, { "epoch": 3.5852013958715325, "grad_norm": 1.0929741859436035, "learning_rate": 2.7480437007058663e-06, "loss": 0.0593, "step": 121230 }, { "epoch": 3.5854971313657065, "grad_norm": 0.9075626730918884, "learning_rate": 2.7479170107869267e-06, "loss": 0.0686, "step": 121240 }, { "epoch": 3.5857928668598804, "grad_norm": 0.944745659828186, "learning_rate": 2.7477903208679875e-06, "loss": 0.0751, "step": 121250 }, { "epoch": 3.586088602354055, "grad_norm": 0.9922284483909607, "learning_rate": 2.747663630949048e-06, "loss": 0.088, "step": 121260 }, { "epoch": 3.5863843378482283, "grad_norm": 1.231977105140686, "learning_rate": 2.7475369410301087e-06, "loss": 0.0807, "step": 121270 }, { "epoch": 3.5866800733424027, "grad_norm": 1.5957791805267334, "learning_rate": 2.747410251111169e-06, "loss": 0.077, "step": 121280 }, { "epoch": 3.5869758088365766, "grad_norm": 0.5462226271629333, "learning_rate": 2.7472835611922302e-06, "loss": 0.0614, "step": 121290 }, { "epoch": 3.5872715443307506, "grad_norm": 0.8458849191665649, "learning_rate": 2.7471568712732906e-06, "loss": 0.0844, "step": 121300 }, { "epoch": 3.5875672798249245, "grad_norm": 0.51321941614151, "learning_rate": 2.7470301813543514e-06, "loss": 0.0776, "step": 121310 }, { "epoch": 3.5878630153190985, "grad_norm": 1.0429450273513794, "learning_rate": 2.7469034914354118e-06, "loss": 0.0872, "step": 121320 }, { "epoch": 3.588158750813273, "grad_norm": 0.5335555672645569, "learning_rate": 2.7467768015164725e-06, "loss": 0.0733, "step": 121330 }, { "epoch": 3.5884544863074463, "grad_norm": 1.1822123527526855, "learning_rate": 2.746650111597533e-06, "loss": 0.0707, "step": 121340 }, { "epoch": 3.5887502218016207, "grad_norm": 0.7478208541870117, "learning_rate": 2.7465234216785933e-06, "loss": 0.0712, "step": 121350 }, { "epoch": 3.5890459572957947, "grad_norm": 0.4467645585536957, "learning_rate": 2.746396731759654e-06, "loss": 0.078, "step": 121360 }, { "epoch": 3.5893416927899686, "grad_norm": 0.9508342146873474, "learning_rate": 2.7462700418407144e-06, "loss": 0.0736, "step": 121370 }, { "epoch": 3.5896374282841426, "grad_norm": 0.773746907711029, "learning_rate": 2.7461433519217756e-06, "loss": 0.0752, "step": 121380 }, { "epoch": 3.5899331637783165, "grad_norm": 0.6103328466415405, "learning_rate": 2.746016662002836e-06, "loss": 0.0712, "step": 121390 }, { "epoch": 3.590228899272491, "grad_norm": 0.7454030513763428, "learning_rate": 2.745889972083897e-06, "loss": 0.0801, "step": 121400 }, { "epoch": 3.590524634766665, "grad_norm": 0.767937421798706, "learning_rate": 2.745763282164957e-06, "loss": 0.0749, "step": 121410 }, { "epoch": 3.590820370260839, "grad_norm": 0.6092470288276672, "learning_rate": 2.745636592246018e-06, "loss": 0.0647, "step": 121420 }, { "epoch": 3.5911161057550127, "grad_norm": 0.7441362142562866, "learning_rate": 2.7455099023270783e-06, "loss": 0.0764, "step": 121430 }, { "epoch": 3.5914118412491867, "grad_norm": 0.9743009805679321, "learning_rate": 2.745383212408139e-06, "loss": 0.0699, "step": 121440 }, { "epoch": 3.5917075767433606, "grad_norm": 0.6367278099060059, "learning_rate": 2.7452565224891995e-06, "loss": 0.1001, "step": 121450 }, { "epoch": 3.5920033122375346, "grad_norm": 0.7743680477142334, "learning_rate": 2.7451298325702607e-06, "loss": 0.0838, "step": 121460 }, { "epoch": 3.592299047731709, "grad_norm": 0.49503278732299805, "learning_rate": 2.745003142651321e-06, "loss": 0.0564, "step": 121470 }, { "epoch": 3.592594783225883, "grad_norm": 0.7572937607765198, "learning_rate": 2.744876452732382e-06, "loss": 0.0719, "step": 121480 }, { "epoch": 3.592890518720057, "grad_norm": 0.4907464385032654, "learning_rate": 2.744749762813442e-06, "loss": 0.0725, "step": 121490 }, { "epoch": 3.5931862542142308, "grad_norm": 0.8028450608253479, "learning_rate": 2.744623072894503e-06, "loss": 0.0645, "step": 121500 }, { "epoch": 3.5934819897084047, "grad_norm": 0.7862111330032349, "learning_rate": 2.7444963829755634e-06, "loss": 0.0821, "step": 121510 }, { "epoch": 3.5937777252025787, "grad_norm": 1.1236406564712524, "learning_rate": 2.744369693056624e-06, "loss": 0.0747, "step": 121520 }, { "epoch": 3.5940734606967526, "grad_norm": 0.4714842140674591, "learning_rate": 2.7442430031376845e-06, "loss": 0.0674, "step": 121530 }, { "epoch": 3.594369196190927, "grad_norm": 0.643326997756958, "learning_rate": 2.7441163132187457e-06, "loss": 0.05, "step": 121540 }, { "epoch": 3.594664931685101, "grad_norm": 0.9115114212036133, "learning_rate": 2.743989623299806e-06, "loss": 0.0852, "step": 121550 }, { "epoch": 3.594960667179275, "grad_norm": 0.9572880268096924, "learning_rate": 2.743862933380867e-06, "loss": 0.0902, "step": 121560 }, { "epoch": 3.595256402673449, "grad_norm": 0.6137418746948242, "learning_rate": 2.7437362434619273e-06, "loss": 0.0884, "step": 121570 }, { "epoch": 3.5955521381676228, "grad_norm": 1.298611044883728, "learning_rate": 2.743609553542988e-06, "loss": 0.0821, "step": 121580 }, { "epoch": 3.595847873661797, "grad_norm": 1.1184457540512085, "learning_rate": 2.7434828636240484e-06, "loss": 0.0579, "step": 121590 }, { "epoch": 3.5961436091559706, "grad_norm": 1.0523881912231445, "learning_rate": 2.743356173705109e-06, "loss": 0.0752, "step": 121600 }, { "epoch": 3.596439344650145, "grad_norm": 0.8403446674346924, "learning_rate": 2.7432294837861696e-06, "loss": 0.0861, "step": 121610 }, { "epoch": 3.596735080144319, "grad_norm": 0.8035722374916077, "learning_rate": 2.7431027938672308e-06, "loss": 0.0723, "step": 121620 }, { "epoch": 3.597030815638493, "grad_norm": 0.683934211730957, "learning_rate": 2.742976103948291e-06, "loss": 0.0597, "step": 121630 }, { "epoch": 3.597326551132667, "grad_norm": 0.8010859489440918, "learning_rate": 2.742849414029352e-06, "loss": 0.0588, "step": 121640 }, { "epoch": 3.597622286626841, "grad_norm": 1.0689083337783813, "learning_rate": 2.7427227241104123e-06, "loss": 0.093, "step": 121650 }, { "epoch": 3.597918022121015, "grad_norm": 1.0961993932724, "learning_rate": 2.742596034191473e-06, "loss": 0.0758, "step": 121660 }, { "epoch": 3.598213757615189, "grad_norm": 1.245792269706726, "learning_rate": 2.7424693442725335e-06, "loss": 0.0731, "step": 121670 }, { "epoch": 3.598509493109363, "grad_norm": 0.6676614284515381, "learning_rate": 2.7423426543535942e-06, "loss": 0.0806, "step": 121680 }, { "epoch": 3.598805228603537, "grad_norm": 0.7846065163612366, "learning_rate": 2.7422159644346546e-06, "loss": 0.0611, "step": 121690 }, { "epoch": 3.599100964097711, "grad_norm": 1.03898286819458, "learning_rate": 2.742089274515716e-06, "loss": 0.0727, "step": 121700 }, { "epoch": 3.599396699591885, "grad_norm": 0.8816746473312378, "learning_rate": 2.741962584596776e-06, "loss": 0.0853, "step": 121710 }, { "epoch": 3.599692435086059, "grad_norm": 1.157240629196167, "learning_rate": 2.741835894677837e-06, "loss": 0.0718, "step": 121720 }, { "epoch": 3.5999881705802332, "grad_norm": 0.7721248865127563, "learning_rate": 2.7417092047588973e-06, "loss": 0.0729, "step": 121730 }, { "epoch": 3.600283906074407, "grad_norm": 0.8354260325431824, "learning_rate": 2.741582514839958e-06, "loss": 0.0543, "step": 121740 }, { "epoch": 3.600579641568581, "grad_norm": 0.7228872179985046, "learning_rate": 2.7414558249210185e-06, "loss": 0.0708, "step": 121750 }, { "epoch": 3.600875377062755, "grad_norm": 1.5618302822113037, "learning_rate": 2.741329135002079e-06, "loss": 0.0711, "step": 121760 }, { "epoch": 3.601171112556929, "grad_norm": 0.610614538192749, "learning_rate": 2.7412024450831397e-06, "loss": 0.0647, "step": 121770 }, { "epoch": 3.601466848051103, "grad_norm": 0.7211953997612, "learning_rate": 2.7410757551642004e-06, "loss": 0.0859, "step": 121780 }, { "epoch": 3.601762583545277, "grad_norm": 1.1025785207748413, "learning_rate": 2.7409490652452612e-06, "loss": 0.0717, "step": 121790 }, { "epoch": 3.6020583190394513, "grad_norm": 1.1648659706115723, "learning_rate": 2.7408223753263216e-06, "loss": 0.0801, "step": 121800 }, { "epoch": 3.6023540545336252, "grad_norm": 0.7641915678977966, "learning_rate": 2.7406956854073824e-06, "loss": 0.0841, "step": 121810 }, { "epoch": 3.602649790027799, "grad_norm": 0.7264963984489441, "learning_rate": 2.7405689954884428e-06, "loss": 0.0748, "step": 121820 }, { "epoch": 3.602945525521973, "grad_norm": 0.6403154730796814, "learning_rate": 2.7404423055695035e-06, "loss": 0.0675, "step": 121830 }, { "epoch": 3.603241261016147, "grad_norm": 0.8103233575820923, "learning_rate": 2.740315615650564e-06, "loss": 0.0663, "step": 121840 }, { "epoch": 3.6035369965103214, "grad_norm": 1.1468501091003418, "learning_rate": 2.7401889257316247e-06, "loss": 0.0734, "step": 121850 }, { "epoch": 3.603832732004495, "grad_norm": 0.606040358543396, "learning_rate": 2.7400622358126855e-06, "loss": 0.0743, "step": 121860 }, { "epoch": 3.6041284674986693, "grad_norm": 0.9456705451011658, "learning_rate": 2.7399355458937463e-06, "loss": 0.0805, "step": 121870 }, { "epoch": 3.6044242029928433, "grad_norm": 0.7265223264694214, "learning_rate": 2.7398088559748066e-06, "loss": 0.0645, "step": 121880 }, { "epoch": 3.604719938487017, "grad_norm": 0.7268472909927368, "learning_rate": 2.7396821660558674e-06, "loss": 0.0717, "step": 121890 }, { "epoch": 3.605015673981191, "grad_norm": 0.9082805514335632, "learning_rate": 2.739555476136928e-06, "loss": 0.0721, "step": 121900 }, { "epoch": 3.605311409475365, "grad_norm": 1.0812997817993164, "learning_rate": 2.7394287862179886e-06, "loss": 0.1017, "step": 121910 }, { "epoch": 3.6056071449695395, "grad_norm": 0.7310215830802917, "learning_rate": 2.739302096299049e-06, "loss": 0.0858, "step": 121920 }, { "epoch": 3.605902880463713, "grad_norm": 0.6928257346153259, "learning_rate": 2.7391754063801097e-06, "loss": 0.0581, "step": 121930 }, { "epoch": 3.6061986159578874, "grad_norm": 0.9885081648826599, "learning_rate": 2.7390487164611705e-06, "loss": 0.0749, "step": 121940 }, { "epoch": 3.6064943514520613, "grad_norm": 0.9462706446647644, "learning_rate": 2.7389220265422313e-06, "loss": 0.0779, "step": 121950 }, { "epoch": 3.6067900869462353, "grad_norm": 0.6330946683883667, "learning_rate": 2.7387953366232917e-06, "loss": 0.0772, "step": 121960 }, { "epoch": 3.607085822440409, "grad_norm": 0.6111565828323364, "learning_rate": 2.7386686467043525e-06, "loss": 0.0686, "step": 121970 }, { "epoch": 3.607381557934583, "grad_norm": 0.7992645502090454, "learning_rate": 2.738541956785413e-06, "loss": 0.0881, "step": 121980 }, { "epoch": 3.6076772934287575, "grad_norm": 0.3590121269226074, "learning_rate": 2.7384152668664736e-06, "loss": 0.0451, "step": 121990 }, { "epoch": 3.6079730289229315, "grad_norm": 0.9465211629867554, "learning_rate": 2.738288576947534e-06, "loss": 0.0859, "step": 122000 }, { "epoch": 3.6082687644171054, "grad_norm": 0.6116524934768677, "learning_rate": 2.738161887028595e-06, "loss": 0.078, "step": 122010 }, { "epoch": 3.6085644999112794, "grad_norm": 1.4153212308883667, "learning_rate": 2.7380351971096556e-06, "loss": 0.0712, "step": 122020 }, { "epoch": 3.6088602354054533, "grad_norm": 0.6655593514442444, "learning_rate": 2.7379085071907164e-06, "loss": 0.0691, "step": 122030 }, { "epoch": 3.6091559708996273, "grad_norm": 0.5765891671180725, "learning_rate": 2.7377818172717767e-06, "loss": 0.0658, "step": 122040 }, { "epoch": 3.609451706393801, "grad_norm": 0.7340549230575562, "learning_rate": 2.7376551273528375e-06, "loss": 0.0752, "step": 122050 }, { "epoch": 3.6097474418879756, "grad_norm": 0.9049891829490662, "learning_rate": 2.737528437433898e-06, "loss": 0.0721, "step": 122060 }, { "epoch": 3.6100431773821495, "grad_norm": 0.8068904280662537, "learning_rate": 2.7374017475149587e-06, "loss": 0.0807, "step": 122070 }, { "epoch": 3.6103389128763235, "grad_norm": 0.48510006070137024, "learning_rate": 2.737275057596019e-06, "loss": 0.0766, "step": 122080 }, { "epoch": 3.6106346483704974, "grad_norm": 0.6369756460189819, "learning_rate": 2.73714836767708e-06, "loss": 0.0609, "step": 122090 }, { "epoch": 3.6109303838646714, "grad_norm": 0.43614283204078674, "learning_rate": 2.7370216777581406e-06, "loss": 0.0858, "step": 122100 }, { "epoch": 3.6112261193588453, "grad_norm": 0.8355537056922913, "learning_rate": 2.7368949878392014e-06, "loss": 0.0818, "step": 122110 }, { "epoch": 3.6115218548530192, "grad_norm": 0.7518907189369202, "learning_rate": 2.7367682979202618e-06, "loss": 0.0646, "step": 122120 }, { "epoch": 3.6118175903471936, "grad_norm": 0.7588083744049072, "learning_rate": 2.7366416080013226e-06, "loss": 0.0641, "step": 122130 }, { "epoch": 3.6121133258413676, "grad_norm": 0.4607129395008087, "learning_rate": 2.736514918082383e-06, "loss": 0.0593, "step": 122140 }, { "epoch": 3.6124090613355415, "grad_norm": 0.5053060054779053, "learning_rate": 2.7363882281634437e-06, "loss": 0.0695, "step": 122150 }, { "epoch": 3.6127047968297155, "grad_norm": 1.2050111293792725, "learning_rate": 2.736261538244504e-06, "loss": 0.0906, "step": 122160 }, { "epoch": 3.6130005323238894, "grad_norm": 0.6975489854812622, "learning_rate": 2.7361348483255645e-06, "loss": 0.0676, "step": 122170 }, { "epoch": 3.613296267818064, "grad_norm": 0.7651994228363037, "learning_rate": 2.7360081584066257e-06, "loss": 0.0758, "step": 122180 }, { "epoch": 3.6135920033122373, "grad_norm": 0.7714555859565735, "learning_rate": 2.735881468487686e-06, "loss": 0.0634, "step": 122190 }, { "epoch": 3.6138877388064117, "grad_norm": 0.9658549427986145, "learning_rate": 2.735754778568747e-06, "loss": 0.0793, "step": 122200 }, { "epoch": 3.6141834743005856, "grad_norm": 0.831648051738739, "learning_rate": 2.735628088649807e-06, "loss": 0.082, "step": 122210 }, { "epoch": 3.6144792097947596, "grad_norm": 0.6843954920768738, "learning_rate": 2.735501398730868e-06, "loss": 0.0851, "step": 122220 }, { "epoch": 3.6147749452889335, "grad_norm": 0.6655383706092834, "learning_rate": 2.7353747088119283e-06, "loss": 0.0655, "step": 122230 }, { "epoch": 3.6150706807831074, "grad_norm": 5.245326042175293, "learning_rate": 2.735248018892989e-06, "loss": 0.0626, "step": 122240 }, { "epoch": 3.615366416277282, "grad_norm": 0.7869680523872375, "learning_rate": 2.7351213289740495e-06, "loss": 0.0772, "step": 122250 }, { "epoch": 3.6156621517714553, "grad_norm": 0.7055557370185852, "learning_rate": 2.7349946390551107e-06, "loss": 0.0782, "step": 122260 }, { "epoch": 3.6159578872656297, "grad_norm": 0.8367007970809937, "learning_rate": 2.734867949136171e-06, "loss": 0.0619, "step": 122270 }, { "epoch": 3.6162536227598037, "grad_norm": 0.6839040517807007, "learning_rate": 2.734741259217232e-06, "loss": 0.0754, "step": 122280 }, { "epoch": 3.6165493582539776, "grad_norm": 0.9080787897109985, "learning_rate": 2.7346145692982922e-06, "loss": 0.0576, "step": 122290 }, { "epoch": 3.6168450937481516, "grad_norm": 1.0860389471054077, "learning_rate": 2.734487879379353e-06, "loss": 0.1019, "step": 122300 }, { "epoch": 3.6171408292423255, "grad_norm": 0.927104651927948, "learning_rate": 2.7343611894604134e-06, "loss": 0.0641, "step": 122310 }, { "epoch": 3.6174365647365, "grad_norm": 0.8670552968978882, "learning_rate": 2.734234499541474e-06, "loss": 0.0591, "step": 122320 }, { "epoch": 3.617732300230674, "grad_norm": 0.5732275247573853, "learning_rate": 2.7341078096225345e-06, "loss": 0.062, "step": 122330 }, { "epoch": 3.6180280357248478, "grad_norm": 0.4072914719581604, "learning_rate": 2.7339811197035958e-06, "loss": 0.0523, "step": 122340 }, { "epoch": 3.6183237712190217, "grad_norm": 0.9350129961967468, "learning_rate": 2.733854429784656e-06, "loss": 0.0879, "step": 122350 }, { "epoch": 3.6186195067131957, "grad_norm": 0.6489276885986328, "learning_rate": 2.733727739865717e-06, "loss": 0.0727, "step": 122360 }, { "epoch": 3.6189152422073696, "grad_norm": 0.6771432161331177, "learning_rate": 2.7336010499467773e-06, "loss": 0.0683, "step": 122370 }, { "epoch": 3.6192109777015435, "grad_norm": 1.3049366474151611, "learning_rate": 2.733474360027838e-06, "loss": 0.0626, "step": 122380 }, { "epoch": 3.619506713195718, "grad_norm": 0.5973877906799316, "learning_rate": 2.7333476701088984e-06, "loss": 0.0691, "step": 122390 }, { "epoch": 3.619802448689892, "grad_norm": 0.5283424258232117, "learning_rate": 2.7332209801899592e-06, "loss": 0.0743, "step": 122400 }, { "epoch": 3.620098184184066, "grad_norm": 0.5440409183502197, "learning_rate": 2.7330942902710196e-06, "loss": 0.0635, "step": 122410 }, { "epoch": 3.6203939196782398, "grad_norm": 0.4241040050983429, "learning_rate": 2.732967600352081e-06, "loss": 0.0644, "step": 122420 }, { "epoch": 3.6206896551724137, "grad_norm": 0.3259527087211609, "learning_rate": 2.732840910433141e-06, "loss": 0.061, "step": 122430 }, { "epoch": 3.6209853906665876, "grad_norm": 0.502924382686615, "learning_rate": 2.732714220514202e-06, "loss": 0.0693, "step": 122440 }, { "epoch": 3.6212811261607616, "grad_norm": 0.7722507119178772, "learning_rate": 2.7325875305952623e-06, "loss": 0.0616, "step": 122450 }, { "epoch": 3.621576861654936, "grad_norm": 0.6737288236618042, "learning_rate": 2.732460840676323e-06, "loss": 0.0668, "step": 122460 }, { "epoch": 3.62187259714911, "grad_norm": 0.6511598229408264, "learning_rate": 2.7323341507573835e-06, "loss": 0.0855, "step": 122470 }, { "epoch": 3.622168332643284, "grad_norm": 0.9045276045799255, "learning_rate": 2.7322074608384443e-06, "loss": 0.0612, "step": 122480 }, { "epoch": 3.622464068137458, "grad_norm": 0.8186381459236145, "learning_rate": 2.7320807709195046e-06, "loss": 0.0495, "step": 122490 }, { "epoch": 3.6227598036316317, "grad_norm": 0.651861846446991, "learning_rate": 2.731954081000566e-06, "loss": 0.0751, "step": 122500 }, { "epoch": 3.623055539125806, "grad_norm": 0.886603832244873, "learning_rate": 2.7318273910816262e-06, "loss": 0.0973, "step": 122510 }, { "epoch": 3.6233512746199796, "grad_norm": 1.1036081314086914, "learning_rate": 2.731700701162687e-06, "loss": 0.0832, "step": 122520 }, { "epoch": 3.623647010114154, "grad_norm": 0.7287691235542297, "learning_rate": 2.7315740112437474e-06, "loss": 0.0642, "step": 122530 }, { "epoch": 3.623942745608328, "grad_norm": 0.7127400040626526, "learning_rate": 2.731447321324808e-06, "loss": 0.0587, "step": 122540 }, { "epoch": 3.624238481102502, "grad_norm": 0.5186428427696228, "learning_rate": 2.7313206314058685e-06, "loss": 0.0673, "step": 122550 }, { "epoch": 3.624534216596676, "grad_norm": 4.195217132568359, "learning_rate": 2.7311939414869293e-06, "loss": 0.0774, "step": 122560 }, { "epoch": 3.62482995209085, "grad_norm": 0.5179218649864197, "learning_rate": 2.7310672515679897e-06, "loss": 0.0679, "step": 122570 }, { "epoch": 3.625125687585024, "grad_norm": 1.5288254022598267, "learning_rate": 2.730940561649051e-06, "loss": 0.0858, "step": 122580 }, { "epoch": 3.625421423079198, "grad_norm": 0.7818942070007324, "learning_rate": 2.7308138717301113e-06, "loss": 0.0807, "step": 122590 }, { "epoch": 3.625717158573372, "grad_norm": 0.8309574723243713, "learning_rate": 2.7306871818111716e-06, "loss": 0.0725, "step": 122600 }, { "epoch": 3.626012894067546, "grad_norm": 1.1060024499893188, "learning_rate": 2.7305604918922324e-06, "loss": 0.0757, "step": 122610 }, { "epoch": 3.62630862956172, "grad_norm": 0.9962722659111023, "learning_rate": 2.7304338019732928e-06, "loss": 0.0635, "step": 122620 }, { "epoch": 3.626604365055894, "grad_norm": 0.5874831676483154, "learning_rate": 2.7303071120543536e-06, "loss": 0.0834, "step": 122630 }, { "epoch": 3.626900100550068, "grad_norm": 0.7137001156806946, "learning_rate": 2.730180422135414e-06, "loss": 0.0484, "step": 122640 }, { "epoch": 3.6271958360442422, "grad_norm": 0.7694450616836548, "learning_rate": 2.7300537322164747e-06, "loss": 0.0713, "step": 122650 }, { "epoch": 3.627491571538416, "grad_norm": 1.1239638328552246, "learning_rate": 2.7299270422975355e-06, "loss": 0.08, "step": 122660 }, { "epoch": 3.62778730703259, "grad_norm": 1.1668075323104858, "learning_rate": 2.7298003523785963e-06, "loss": 0.0815, "step": 122670 }, { "epoch": 3.628083042526764, "grad_norm": 0.7353588342666626, "learning_rate": 2.7296736624596567e-06, "loss": 0.0715, "step": 122680 }, { "epoch": 3.628378778020938, "grad_norm": 0.6452282667160034, "learning_rate": 2.7295469725407175e-06, "loss": 0.0522, "step": 122690 }, { "epoch": 3.628674513515112, "grad_norm": 0.5021892786026001, "learning_rate": 2.729420282621778e-06, "loss": 0.0646, "step": 122700 }, { "epoch": 3.628970249009286, "grad_norm": 0.9285064339637756, "learning_rate": 2.7292935927028386e-06, "loss": 0.087, "step": 122710 }, { "epoch": 3.6292659845034603, "grad_norm": 0.48826247453689575, "learning_rate": 2.729166902783899e-06, "loss": 0.0789, "step": 122720 }, { "epoch": 3.629561719997634, "grad_norm": 0.7809814810752869, "learning_rate": 2.7290402128649598e-06, "loss": 0.0679, "step": 122730 }, { "epoch": 3.629857455491808, "grad_norm": 1.1317428350448608, "learning_rate": 2.7289135229460206e-06, "loss": 0.06, "step": 122740 }, { "epoch": 3.630153190985982, "grad_norm": 1.1145051717758179, "learning_rate": 2.7287868330270814e-06, "loss": 0.0719, "step": 122750 }, { "epoch": 3.630448926480156, "grad_norm": 0.8954639434814453, "learning_rate": 2.7286601431081417e-06, "loss": 0.0828, "step": 122760 }, { "epoch": 3.6307446619743304, "grad_norm": 0.835605263710022, "learning_rate": 2.7285334531892025e-06, "loss": 0.073, "step": 122770 }, { "epoch": 3.631040397468504, "grad_norm": 1.3816883563995361, "learning_rate": 2.728406763270263e-06, "loss": 0.0669, "step": 122780 }, { "epoch": 3.6313361329626783, "grad_norm": 0.49437329173088074, "learning_rate": 2.7282800733513237e-06, "loss": 0.057, "step": 122790 }, { "epoch": 3.6316318684568523, "grad_norm": 1.0360989570617676, "learning_rate": 2.728153383432384e-06, "loss": 0.0886, "step": 122800 }, { "epoch": 3.631927603951026, "grad_norm": 0.9760445356369019, "learning_rate": 2.728026693513445e-06, "loss": 0.0789, "step": 122810 }, { "epoch": 3.6322233394452, "grad_norm": 1.1179248094558716, "learning_rate": 2.7279000035945056e-06, "loss": 0.0878, "step": 122820 }, { "epoch": 3.632519074939374, "grad_norm": 0.8212294578552246, "learning_rate": 2.7277733136755664e-06, "loss": 0.0687, "step": 122830 }, { "epoch": 3.6328148104335485, "grad_norm": 0.6691701412200928, "learning_rate": 2.7276466237566268e-06, "loss": 0.0676, "step": 122840 }, { "epoch": 3.633110545927722, "grad_norm": 0.43221572041511536, "learning_rate": 2.7275199338376876e-06, "loss": 0.0731, "step": 122850 }, { "epoch": 3.6334062814218964, "grad_norm": 0.7256189584732056, "learning_rate": 2.727393243918748e-06, "loss": 0.0795, "step": 122860 }, { "epoch": 3.6337020169160703, "grad_norm": 0.7007655501365662, "learning_rate": 2.7272665539998087e-06, "loss": 0.0931, "step": 122870 }, { "epoch": 3.6339977524102443, "grad_norm": 0.6845206022262573, "learning_rate": 2.727139864080869e-06, "loss": 0.0662, "step": 122880 }, { "epoch": 3.634293487904418, "grad_norm": 0.5649965405464172, "learning_rate": 2.72701317416193e-06, "loss": 0.0547, "step": 122890 }, { "epoch": 3.634589223398592, "grad_norm": 0.9038673639297485, "learning_rate": 2.7268864842429907e-06, "loss": 0.0672, "step": 122900 }, { "epoch": 3.6348849588927665, "grad_norm": 1.305923581123352, "learning_rate": 2.7267597943240514e-06, "loss": 0.0766, "step": 122910 }, { "epoch": 3.6351806943869405, "grad_norm": 0.776154100894928, "learning_rate": 2.726633104405112e-06, "loss": 0.0711, "step": 122920 }, { "epoch": 3.6354764298811144, "grad_norm": 1.1633784770965576, "learning_rate": 2.7265064144861726e-06, "loss": 0.0711, "step": 122930 }, { "epoch": 3.6357721653752884, "grad_norm": 0.6055285930633545, "learning_rate": 2.726379724567233e-06, "loss": 0.0708, "step": 122940 }, { "epoch": 3.6360679008694623, "grad_norm": 0.5633294582366943, "learning_rate": 2.7262530346482938e-06, "loss": 0.101, "step": 122950 }, { "epoch": 3.6363636363636362, "grad_norm": 0.8302326202392578, "learning_rate": 2.726126344729354e-06, "loss": 0.08, "step": 122960 }, { "epoch": 3.63665937185781, "grad_norm": 0.5426850914955139, "learning_rate": 2.725999654810415e-06, "loss": 0.0527, "step": 122970 }, { "epoch": 3.6369551073519846, "grad_norm": 0.762305736541748, "learning_rate": 2.7258729648914757e-06, "loss": 0.0697, "step": 122980 }, { "epoch": 3.6372508428461585, "grad_norm": 0.6515005230903625, "learning_rate": 2.7257462749725365e-06, "loss": 0.054, "step": 122990 }, { "epoch": 3.6375465783403325, "grad_norm": 1.1465603113174438, "learning_rate": 2.725619585053597e-06, "loss": 0.0712, "step": 123000 }, { "epoch": 3.6378423138345064, "grad_norm": 0.6379371285438538, "learning_rate": 2.7254928951346572e-06, "loss": 0.0755, "step": 123010 }, { "epoch": 3.6381380493286803, "grad_norm": 0.7665948867797852, "learning_rate": 2.725366205215718e-06, "loss": 0.0685, "step": 123020 }, { "epoch": 3.6384337848228543, "grad_norm": 0.9074424505233765, "learning_rate": 2.7252395152967784e-06, "loss": 0.0729, "step": 123030 }, { "epoch": 3.6387295203170282, "grad_norm": 0.5265476703643799, "learning_rate": 2.725112825377839e-06, "loss": 0.0659, "step": 123040 }, { "epoch": 3.6390252558112026, "grad_norm": 0.7203742265701294, "learning_rate": 2.7249861354588995e-06, "loss": 0.0906, "step": 123050 }, { "epoch": 3.6393209913053766, "grad_norm": 0.9049658179283142, "learning_rate": 2.7248594455399607e-06, "loss": 0.1026, "step": 123060 }, { "epoch": 3.6396167267995505, "grad_norm": 1.1953132152557373, "learning_rate": 2.724732755621021e-06, "loss": 0.0794, "step": 123070 }, { "epoch": 3.6399124622937244, "grad_norm": 0.8338658213615417, "learning_rate": 2.724606065702082e-06, "loss": 0.0636, "step": 123080 }, { "epoch": 3.6402081977878984, "grad_norm": 1.135559320449829, "learning_rate": 2.7244793757831423e-06, "loss": 0.0751, "step": 123090 }, { "epoch": 3.640503933282073, "grad_norm": 0.6048365831375122, "learning_rate": 2.724352685864203e-06, "loss": 0.0859, "step": 123100 }, { "epoch": 3.6407996687762463, "grad_norm": 0.8187577724456787, "learning_rate": 2.7242259959452634e-06, "loss": 0.0701, "step": 123110 }, { "epoch": 3.6410954042704207, "grad_norm": 0.9610045552253723, "learning_rate": 2.7240993060263242e-06, "loss": 0.0798, "step": 123120 }, { "epoch": 3.6413911397645946, "grad_norm": 1.7426012754440308, "learning_rate": 2.7239726161073846e-06, "loss": 0.068, "step": 123130 }, { "epoch": 3.6416868752587686, "grad_norm": 1.0144829750061035, "learning_rate": 2.723845926188446e-06, "loss": 0.0816, "step": 123140 }, { "epoch": 3.6419826107529425, "grad_norm": 0.8057563900947571, "learning_rate": 2.723719236269506e-06, "loss": 0.0682, "step": 123150 }, { "epoch": 3.6422783462471164, "grad_norm": 0.6978287696838379, "learning_rate": 2.723592546350567e-06, "loss": 0.069, "step": 123160 }, { "epoch": 3.642574081741291, "grad_norm": 0.5072581171989441, "learning_rate": 2.7234658564316273e-06, "loss": 0.0721, "step": 123170 }, { "epoch": 3.6428698172354643, "grad_norm": 0.33811020851135254, "learning_rate": 2.723339166512688e-06, "loss": 0.0673, "step": 123180 }, { "epoch": 3.6431655527296387, "grad_norm": 1.0952115058898926, "learning_rate": 2.7232124765937485e-06, "loss": 0.0598, "step": 123190 }, { "epoch": 3.6434612882238127, "grad_norm": 1.0469579696655273, "learning_rate": 2.7230857866748093e-06, "loss": 0.0745, "step": 123200 }, { "epoch": 3.6437570237179866, "grad_norm": 0.9509063363075256, "learning_rate": 2.7229590967558696e-06, "loss": 0.0894, "step": 123210 }, { "epoch": 3.6440527592121605, "grad_norm": 1.1245149374008179, "learning_rate": 2.722832406836931e-06, "loss": 0.0692, "step": 123220 }, { "epoch": 3.6443484947063345, "grad_norm": 0.7526495456695557, "learning_rate": 2.722705716917991e-06, "loss": 0.0777, "step": 123230 }, { "epoch": 3.644644230200509, "grad_norm": 0.6435370445251465, "learning_rate": 2.722579026999052e-06, "loss": 0.0592, "step": 123240 }, { "epoch": 3.644939965694683, "grad_norm": 0.5394509434700012, "learning_rate": 2.7224523370801124e-06, "loss": 0.0708, "step": 123250 }, { "epoch": 3.6452357011888568, "grad_norm": 1.4917235374450684, "learning_rate": 2.722325647161173e-06, "loss": 0.0724, "step": 123260 }, { "epoch": 3.6455314366830307, "grad_norm": 1.049895167350769, "learning_rate": 2.7221989572422335e-06, "loss": 0.0817, "step": 123270 }, { "epoch": 3.6458271721772046, "grad_norm": 0.921928882598877, "learning_rate": 2.7220722673232943e-06, "loss": 0.0786, "step": 123280 }, { "epoch": 3.6461229076713786, "grad_norm": 0.7328655123710632, "learning_rate": 2.7219455774043547e-06, "loss": 0.043, "step": 123290 }, { "epoch": 3.6464186431655525, "grad_norm": 0.927306592464447, "learning_rate": 2.721818887485416e-06, "loss": 0.067, "step": 123300 }, { "epoch": 3.646714378659727, "grad_norm": 0.992717444896698, "learning_rate": 2.7216921975664762e-06, "loss": 0.0915, "step": 123310 }, { "epoch": 3.647010114153901, "grad_norm": 1.1446797847747803, "learning_rate": 2.721565507647537e-06, "loss": 0.0895, "step": 123320 }, { "epoch": 3.647305849648075, "grad_norm": 1.0615923404693604, "learning_rate": 2.7214388177285974e-06, "loss": 0.0601, "step": 123330 }, { "epoch": 3.6476015851422487, "grad_norm": 0.3916066884994507, "learning_rate": 2.721312127809658e-06, "loss": 0.0737, "step": 123340 }, { "epoch": 3.6478973206364227, "grad_norm": 0.9598180055618286, "learning_rate": 2.7211854378907186e-06, "loss": 0.0875, "step": 123350 }, { "epoch": 3.6481930561305966, "grad_norm": 0.5345758199691772, "learning_rate": 2.7210587479717793e-06, "loss": 0.0749, "step": 123360 }, { "epoch": 3.6484887916247706, "grad_norm": 0.47683265805244446, "learning_rate": 2.7209320580528397e-06, "loss": 0.0694, "step": 123370 }, { "epoch": 3.648784527118945, "grad_norm": 1.2348878383636475, "learning_rate": 2.720805368133901e-06, "loss": 0.0709, "step": 123380 }, { "epoch": 3.649080262613119, "grad_norm": 0.5996575355529785, "learning_rate": 2.7206786782149613e-06, "loss": 0.0586, "step": 123390 }, { "epoch": 3.649375998107293, "grad_norm": 0.5602794885635376, "learning_rate": 2.720551988296022e-06, "loss": 0.0752, "step": 123400 }, { "epoch": 3.649671733601467, "grad_norm": 0.7354291081428528, "learning_rate": 2.7204252983770825e-06, "loss": 0.074, "step": 123410 }, { "epoch": 3.6499674690956407, "grad_norm": 0.618096113204956, "learning_rate": 2.720298608458143e-06, "loss": 0.0766, "step": 123420 }, { "epoch": 3.650263204589815, "grad_norm": 0.6224899291992188, "learning_rate": 2.7201719185392036e-06, "loss": 0.0707, "step": 123430 }, { "epoch": 3.6505589400839886, "grad_norm": 0.9986904263496399, "learning_rate": 2.720045228620264e-06, "loss": 0.0825, "step": 123440 }, { "epoch": 3.650854675578163, "grad_norm": 1.2650786638259888, "learning_rate": 2.7199185387013248e-06, "loss": 0.0797, "step": 123450 }, { "epoch": 3.651150411072337, "grad_norm": 0.4436782896518707, "learning_rate": 2.7197918487823856e-06, "loss": 0.0639, "step": 123460 }, { "epoch": 3.651446146566511, "grad_norm": 0.44913581013679504, "learning_rate": 2.7196651588634463e-06, "loss": 0.0664, "step": 123470 }, { "epoch": 3.651741882060685, "grad_norm": 0.9261976480484009, "learning_rate": 2.7195384689445067e-06, "loss": 0.0515, "step": 123480 }, { "epoch": 3.652037617554859, "grad_norm": 0.31308436393737793, "learning_rate": 2.7194117790255675e-06, "loss": 0.0572, "step": 123490 }, { "epoch": 3.652333353049033, "grad_norm": 0.957436740398407, "learning_rate": 2.719285089106628e-06, "loss": 0.0737, "step": 123500 }, { "epoch": 3.652629088543207, "grad_norm": 1.646499752998352, "learning_rate": 2.7191583991876887e-06, "loss": 0.0839, "step": 123510 }, { "epoch": 3.652924824037381, "grad_norm": 0.53648442029953, "learning_rate": 2.719031709268749e-06, "loss": 0.0802, "step": 123520 }, { "epoch": 3.653220559531555, "grad_norm": 0.6636659502983093, "learning_rate": 2.71890501934981e-06, "loss": 0.0713, "step": 123530 }, { "epoch": 3.653516295025729, "grad_norm": 0.8029887080192566, "learning_rate": 2.7187783294308706e-06, "loss": 0.0615, "step": 123540 }, { "epoch": 3.653812030519903, "grad_norm": 1.2071884870529175, "learning_rate": 2.7186516395119314e-06, "loss": 0.0728, "step": 123550 }, { "epoch": 3.654107766014077, "grad_norm": 1.1750484704971313, "learning_rate": 2.7185249495929918e-06, "loss": 0.0831, "step": 123560 }, { "epoch": 3.654403501508251, "grad_norm": 0.6687819957733154, "learning_rate": 2.7183982596740525e-06, "loss": 0.0929, "step": 123570 }, { "epoch": 3.654699237002425, "grad_norm": 0.9591203331947327, "learning_rate": 2.718271569755113e-06, "loss": 0.0707, "step": 123580 }, { "epoch": 3.654994972496599, "grad_norm": 0.9675905704498291, "learning_rate": 2.7181448798361737e-06, "loss": 0.0727, "step": 123590 }, { "epoch": 3.655290707990773, "grad_norm": 0.9512107968330383, "learning_rate": 2.718018189917234e-06, "loss": 0.0918, "step": 123600 }, { "epoch": 3.655586443484947, "grad_norm": 0.8233715891838074, "learning_rate": 2.717891499998295e-06, "loss": 0.0737, "step": 123610 }, { "epoch": 3.655882178979121, "grad_norm": 0.930493950843811, "learning_rate": 2.7177648100793556e-06, "loss": 0.0815, "step": 123620 }, { "epoch": 3.656177914473295, "grad_norm": 0.5979627966880798, "learning_rate": 2.7176381201604164e-06, "loss": 0.0784, "step": 123630 }, { "epoch": 3.6564736499674693, "grad_norm": 1.1569228172302246, "learning_rate": 2.717511430241477e-06, "loss": 0.056, "step": 123640 }, { "epoch": 3.656769385461643, "grad_norm": 1.1578993797302246, "learning_rate": 2.7173847403225376e-06, "loss": 0.0997, "step": 123650 }, { "epoch": 3.657065120955817, "grad_norm": 0.654266893863678, "learning_rate": 2.717258050403598e-06, "loss": 0.0772, "step": 123660 }, { "epoch": 3.657360856449991, "grad_norm": 0.5646277070045471, "learning_rate": 2.7171313604846587e-06, "loss": 0.062, "step": 123670 }, { "epoch": 3.657656591944165, "grad_norm": 1.1246949434280396, "learning_rate": 2.717004670565719e-06, "loss": 0.091, "step": 123680 }, { "epoch": 3.6579523274383394, "grad_norm": 0.5162971615791321, "learning_rate": 2.71687798064678e-06, "loss": 0.0583, "step": 123690 }, { "epoch": 3.658248062932513, "grad_norm": 0.8875399827957153, "learning_rate": 2.7167512907278407e-06, "loss": 0.091, "step": 123700 }, { "epoch": 3.6585437984266873, "grad_norm": 0.7451173663139343, "learning_rate": 2.7166246008089015e-06, "loss": 0.0627, "step": 123710 }, { "epoch": 3.6588395339208613, "grad_norm": 0.8466628789901733, "learning_rate": 2.716497910889962e-06, "loss": 0.0801, "step": 123720 }, { "epoch": 3.659135269415035, "grad_norm": 0.7939454913139343, "learning_rate": 2.7163712209710226e-06, "loss": 0.073, "step": 123730 }, { "epoch": 3.659431004909209, "grad_norm": 0.5488919019699097, "learning_rate": 2.716244531052083e-06, "loss": 0.0698, "step": 123740 }, { "epoch": 3.659726740403383, "grad_norm": 0.7663641571998596, "learning_rate": 2.7161178411331438e-06, "loss": 0.0728, "step": 123750 }, { "epoch": 3.6600224758975575, "grad_norm": 0.7181276082992554, "learning_rate": 2.715991151214204e-06, "loss": 0.0849, "step": 123760 }, { "epoch": 3.660318211391731, "grad_norm": 0.8398851156234741, "learning_rate": 2.715864461295265e-06, "loss": 0.0728, "step": 123770 }, { "epoch": 3.6606139468859054, "grad_norm": 0.660823404788971, "learning_rate": 2.7157377713763257e-06, "loss": 0.0622, "step": 123780 }, { "epoch": 3.6609096823800793, "grad_norm": 0.9567461013793945, "learning_rate": 2.7156110814573865e-06, "loss": 0.0643, "step": 123790 }, { "epoch": 3.6612054178742532, "grad_norm": 1.0364322662353516, "learning_rate": 2.715484391538447e-06, "loss": 0.077, "step": 123800 }, { "epoch": 3.661501153368427, "grad_norm": 0.7516508102416992, "learning_rate": 2.7153577016195077e-06, "loss": 0.0638, "step": 123810 }, { "epoch": 3.661796888862601, "grad_norm": 0.4016253352165222, "learning_rate": 2.715231011700568e-06, "loss": 0.0745, "step": 123820 }, { "epoch": 3.6620926243567755, "grad_norm": 0.7550269961357117, "learning_rate": 2.7151043217816284e-06, "loss": 0.0579, "step": 123830 }, { "epoch": 3.6623883598509495, "grad_norm": 0.7442088723182678, "learning_rate": 2.714977631862689e-06, "loss": 0.0639, "step": 123840 }, { "epoch": 3.6626840953451234, "grad_norm": 0.8462662696838379, "learning_rate": 2.7148509419437496e-06, "loss": 0.0861, "step": 123850 }, { "epoch": 3.6629798308392973, "grad_norm": 0.44744908809661865, "learning_rate": 2.7147242520248108e-06, "loss": 0.0779, "step": 123860 }, { "epoch": 3.6632755663334713, "grad_norm": 1.002404808998108, "learning_rate": 2.714597562105871e-06, "loss": 0.0922, "step": 123870 }, { "epoch": 3.6635713018276452, "grad_norm": 0.5870286822319031, "learning_rate": 2.714470872186932e-06, "loss": 0.0728, "step": 123880 }, { "epoch": 3.663867037321819, "grad_norm": 0.7416065335273743, "learning_rate": 2.7143441822679923e-06, "loss": 0.0694, "step": 123890 }, { "epoch": 3.6641627728159936, "grad_norm": 1.344058632850647, "learning_rate": 2.714217492349053e-06, "loss": 0.0755, "step": 123900 }, { "epoch": 3.6644585083101675, "grad_norm": 0.9792688488960266, "learning_rate": 2.7140908024301135e-06, "loss": 0.0727, "step": 123910 }, { "epoch": 3.6647542438043414, "grad_norm": 0.6820172667503357, "learning_rate": 2.7139641125111742e-06, "loss": 0.0753, "step": 123920 }, { "epoch": 3.6650499792985154, "grad_norm": 0.6967806220054626, "learning_rate": 2.7138374225922346e-06, "loss": 0.0673, "step": 123930 }, { "epoch": 3.6653457147926893, "grad_norm": 0.9199786186218262, "learning_rate": 2.713710732673296e-06, "loss": 0.0796, "step": 123940 }, { "epoch": 3.6656414502868633, "grad_norm": 0.9322657585144043, "learning_rate": 2.713584042754356e-06, "loss": 0.0679, "step": 123950 }, { "epoch": 3.665937185781037, "grad_norm": 0.6376836895942688, "learning_rate": 2.713457352835417e-06, "loss": 0.0696, "step": 123960 }, { "epoch": 3.6662329212752116, "grad_norm": 0.6963257789611816, "learning_rate": 2.7133306629164773e-06, "loss": 0.0841, "step": 123970 }, { "epoch": 3.6665286567693856, "grad_norm": 0.8712180852890015, "learning_rate": 2.713203972997538e-06, "loss": 0.0667, "step": 123980 }, { "epoch": 3.6668243922635595, "grad_norm": 1.3755933046340942, "learning_rate": 2.7130772830785985e-06, "loss": 0.0643, "step": 123990 }, { "epoch": 3.6671201277577334, "grad_norm": 0.6650381684303284, "learning_rate": 2.7129505931596593e-06, "loss": 0.0893, "step": 124000 }, { "epoch": 3.6674158632519074, "grad_norm": 0.9549694061279297, "learning_rate": 2.7128239032407197e-06, "loss": 0.0899, "step": 124010 }, { "epoch": 3.6677115987460818, "grad_norm": 0.5973078608512878, "learning_rate": 2.712697213321781e-06, "loss": 0.0661, "step": 124020 }, { "epoch": 3.6680073342402553, "grad_norm": 1.4520009756088257, "learning_rate": 2.7125705234028412e-06, "loss": 0.0662, "step": 124030 }, { "epoch": 3.6683030697344297, "grad_norm": 0.8906968832015991, "learning_rate": 2.712443833483902e-06, "loss": 0.0461, "step": 124040 }, { "epoch": 3.6685988052286036, "grad_norm": 0.8159891366958618, "learning_rate": 2.7123171435649624e-06, "loss": 0.0852, "step": 124050 }, { "epoch": 3.6688945407227775, "grad_norm": 0.8640322685241699, "learning_rate": 2.712190453646023e-06, "loss": 0.0851, "step": 124060 }, { "epoch": 3.6691902762169515, "grad_norm": 1.518742561340332, "learning_rate": 2.7120637637270835e-06, "loss": 0.0733, "step": 124070 }, { "epoch": 3.6694860117111254, "grad_norm": 0.44325047731399536, "learning_rate": 2.7119370738081443e-06, "loss": 0.0934, "step": 124080 }, { "epoch": 3.6697817472053, "grad_norm": 0.9008054137229919, "learning_rate": 2.7118103838892047e-06, "loss": 0.0676, "step": 124090 }, { "epoch": 3.6700774826994733, "grad_norm": 1.2651900053024292, "learning_rate": 2.711683693970266e-06, "loss": 0.0776, "step": 124100 }, { "epoch": 3.6703732181936477, "grad_norm": 2.2905948162078857, "learning_rate": 2.7115570040513263e-06, "loss": 0.0885, "step": 124110 }, { "epoch": 3.6706689536878216, "grad_norm": 0.5173512101173401, "learning_rate": 2.711430314132387e-06, "loss": 0.0711, "step": 124120 }, { "epoch": 3.6709646891819956, "grad_norm": 0.6900131702423096, "learning_rate": 2.7113036242134474e-06, "loss": 0.0739, "step": 124130 }, { "epoch": 3.6712604246761695, "grad_norm": 0.7047144174575806, "learning_rate": 2.7111769342945082e-06, "loss": 0.0659, "step": 124140 }, { "epoch": 3.6715561601703435, "grad_norm": 0.6932010054588318, "learning_rate": 2.7110502443755686e-06, "loss": 0.072, "step": 124150 }, { "epoch": 3.671851895664518, "grad_norm": 1.8683339357376099, "learning_rate": 2.7109235544566294e-06, "loss": 0.085, "step": 124160 }, { "epoch": 3.672147631158692, "grad_norm": 0.7951931953430176, "learning_rate": 2.7107968645376897e-06, "loss": 0.0669, "step": 124170 }, { "epoch": 3.6724433666528657, "grad_norm": 0.4420320391654968, "learning_rate": 2.710670174618751e-06, "loss": 0.0606, "step": 124180 }, { "epoch": 3.6727391021470397, "grad_norm": 0.5948648452758789, "learning_rate": 2.7105434846998113e-06, "loss": 0.0703, "step": 124190 }, { "epoch": 3.6730348376412136, "grad_norm": 1.1124988794326782, "learning_rate": 2.710416794780872e-06, "loss": 0.0806, "step": 124200 }, { "epoch": 3.6733305731353876, "grad_norm": 1.4785782098770142, "learning_rate": 2.7102901048619325e-06, "loss": 0.0762, "step": 124210 }, { "epoch": 3.6736263086295615, "grad_norm": 0.9394643306732178, "learning_rate": 2.7101634149429933e-06, "loss": 0.087, "step": 124220 }, { "epoch": 3.673922044123736, "grad_norm": 0.5649760961532593, "learning_rate": 2.7100367250240536e-06, "loss": 0.0679, "step": 124230 }, { "epoch": 3.67421777961791, "grad_norm": 0.62230384349823, "learning_rate": 2.709910035105114e-06, "loss": 0.068, "step": 124240 }, { "epoch": 3.674513515112084, "grad_norm": 0.9462393522262573, "learning_rate": 2.709783345186175e-06, "loss": 0.0917, "step": 124250 }, { "epoch": 3.6748092506062577, "grad_norm": 1.0273621082305908, "learning_rate": 2.7096566552672356e-06, "loss": 0.0828, "step": 124260 }, { "epoch": 3.6751049861004317, "grad_norm": 0.5116599202156067, "learning_rate": 2.7095299653482964e-06, "loss": 0.0696, "step": 124270 }, { "epoch": 3.6754007215946056, "grad_norm": 0.6002257466316223, "learning_rate": 2.7094032754293567e-06, "loss": 0.0815, "step": 124280 }, { "epoch": 3.6756964570887796, "grad_norm": 0.9316986203193665, "learning_rate": 2.7092765855104175e-06, "loss": 0.0439, "step": 124290 }, { "epoch": 3.675992192582954, "grad_norm": 1.0894914865493774, "learning_rate": 2.709149895591478e-06, "loss": 0.0902, "step": 124300 }, { "epoch": 3.676287928077128, "grad_norm": 1.2556148767471313, "learning_rate": 2.7090232056725387e-06, "loss": 0.0824, "step": 124310 }, { "epoch": 3.676583663571302, "grad_norm": 0.6871079802513123, "learning_rate": 2.708896515753599e-06, "loss": 0.0748, "step": 124320 }, { "epoch": 3.676879399065476, "grad_norm": 1.109277367591858, "learning_rate": 2.70876982583466e-06, "loss": 0.0797, "step": 124330 }, { "epoch": 3.6771751345596497, "grad_norm": 1.0527888536453247, "learning_rate": 2.7086431359157206e-06, "loss": 0.0661, "step": 124340 }, { "epoch": 3.677470870053824, "grad_norm": 0.49458739161491394, "learning_rate": 2.7085164459967814e-06, "loss": 0.0723, "step": 124350 }, { "epoch": 3.6777666055479976, "grad_norm": 0.8129891157150269, "learning_rate": 2.7083897560778418e-06, "loss": 0.0794, "step": 124360 }, { "epoch": 3.678062341042172, "grad_norm": 0.8303651809692383, "learning_rate": 2.7082630661589026e-06, "loss": 0.0642, "step": 124370 }, { "epoch": 3.678358076536346, "grad_norm": 1.1419615745544434, "learning_rate": 2.708136376239963e-06, "loss": 0.0728, "step": 124380 }, { "epoch": 3.67865381203052, "grad_norm": 0.9174612164497375, "learning_rate": 2.7080096863210237e-06, "loss": 0.0623, "step": 124390 }, { "epoch": 3.678949547524694, "grad_norm": 1.1463029384613037, "learning_rate": 2.707882996402084e-06, "loss": 0.0821, "step": 124400 }, { "epoch": 3.6792452830188678, "grad_norm": 0.5037805438041687, "learning_rate": 2.707756306483145e-06, "loss": 0.0702, "step": 124410 }, { "epoch": 3.679541018513042, "grad_norm": 1.0004247426986694, "learning_rate": 2.7076296165642057e-06, "loss": 0.0813, "step": 124420 }, { "epoch": 3.679836754007216, "grad_norm": 0.6823955774307251, "learning_rate": 2.7075029266452665e-06, "loss": 0.0676, "step": 124430 }, { "epoch": 3.68013248950139, "grad_norm": 0.651509165763855, "learning_rate": 2.707376236726327e-06, "loss": 0.0705, "step": 124440 }, { "epoch": 3.680428224995564, "grad_norm": 1.0793958902359009, "learning_rate": 2.7072495468073876e-06, "loss": 0.089, "step": 124450 }, { "epoch": 3.680723960489738, "grad_norm": 0.7629652619361877, "learning_rate": 2.707122856888448e-06, "loss": 0.0795, "step": 124460 }, { "epoch": 3.681019695983912, "grad_norm": 0.597590446472168, "learning_rate": 2.7069961669695088e-06, "loss": 0.064, "step": 124470 }, { "epoch": 3.681315431478086, "grad_norm": 0.6455647945404053, "learning_rate": 2.706869477050569e-06, "loss": 0.0686, "step": 124480 }, { "epoch": 3.68161116697226, "grad_norm": 0.6403614282608032, "learning_rate": 2.70674278713163e-06, "loss": 0.0603, "step": 124490 }, { "epoch": 3.681906902466434, "grad_norm": 0.7816056609153748, "learning_rate": 2.7066160972126907e-06, "loss": 0.0721, "step": 124500 }, { "epoch": 3.682202637960608, "grad_norm": 1.1544841527938843, "learning_rate": 2.7064894072937515e-06, "loss": 0.0811, "step": 124510 }, { "epoch": 3.682498373454782, "grad_norm": 0.6363405585289001, "learning_rate": 2.706362717374812e-06, "loss": 0.0545, "step": 124520 }, { "epoch": 3.682794108948956, "grad_norm": 0.41348522901535034, "learning_rate": 2.7062360274558727e-06, "loss": 0.0656, "step": 124530 }, { "epoch": 3.68308984444313, "grad_norm": 1.3788336515426636, "learning_rate": 2.706109337536933e-06, "loss": 0.0679, "step": 124540 }, { "epoch": 3.683385579937304, "grad_norm": 0.9347731471061707, "learning_rate": 2.705982647617994e-06, "loss": 0.0859, "step": 124550 }, { "epoch": 3.6836813154314783, "grad_norm": 0.7231065034866333, "learning_rate": 2.705855957699054e-06, "loss": 0.0662, "step": 124560 }, { "epoch": 3.683977050925652, "grad_norm": 0.9863783121109009, "learning_rate": 2.705729267780115e-06, "loss": 0.0699, "step": 124570 }, { "epoch": 3.684272786419826, "grad_norm": 0.5080764293670654, "learning_rate": 2.7056025778611758e-06, "loss": 0.0588, "step": 124580 }, { "epoch": 3.684568521914, "grad_norm": 1.0097321271896362, "learning_rate": 2.7054758879422366e-06, "loss": 0.0664, "step": 124590 }, { "epoch": 3.684864257408174, "grad_norm": 0.7540591359138489, "learning_rate": 2.705349198023297e-06, "loss": 0.0671, "step": 124600 }, { "epoch": 3.6851599929023484, "grad_norm": 0.627375066280365, "learning_rate": 2.7052225081043577e-06, "loss": 0.0811, "step": 124610 }, { "epoch": 3.685455728396522, "grad_norm": 0.5641058087348938, "learning_rate": 2.705095818185418e-06, "loss": 0.0706, "step": 124620 }, { "epoch": 3.6857514638906963, "grad_norm": 1.4723403453826904, "learning_rate": 2.704969128266479e-06, "loss": 0.0874, "step": 124630 }, { "epoch": 3.6860471993848702, "grad_norm": 0.9202284216880798, "learning_rate": 2.7048424383475392e-06, "loss": 0.0615, "step": 124640 }, { "epoch": 3.686342934879044, "grad_norm": 1.1403326988220215, "learning_rate": 2.7047157484285996e-06, "loss": 0.0819, "step": 124650 }, { "epoch": 3.686638670373218, "grad_norm": 0.7645073533058167, "learning_rate": 2.704589058509661e-06, "loss": 0.0863, "step": 124660 }, { "epoch": 3.686934405867392, "grad_norm": 0.8736027479171753, "learning_rate": 2.704462368590721e-06, "loss": 0.0683, "step": 124670 }, { "epoch": 3.6872301413615665, "grad_norm": 0.5477562546730042, "learning_rate": 2.704335678671782e-06, "loss": 0.0819, "step": 124680 }, { "epoch": 3.68752587685574, "grad_norm": 0.49660441279411316, "learning_rate": 2.7042089887528423e-06, "loss": 0.059, "step": 124690 }, { "epoch": 3.6878216123499143, "grad_norm": 0.8227769136428833, "learning_rate": 2.704082298833903e-06, "loss": 0.077, "step": 124700 }, { "epoch": 3.6881173478440883, "grad_norm": 0.6134648323059082, "learning_rate": 2.7039556089149635e-06, "loss": 0.0713, "step": 124710 }, { "epoch": 3.6884130833382622, "grad_norm": 0.5976935029029846, "learning_rate": 2.7038289189960243e-06, "loss": 0.0656, "step": 124720 }, { "epoch": 3.688708818832436, "grad_norm": 0.6503282189369202, "learning_rate": 2.7037022290770846e-06, "loss": 0.0591, "step": 124730 }, { "epoch": 3.68900455432661, "grad_norm": 0.8888300657272339, "learning_rate": 2.703575539158146e-06, "loss": 0.049, "step": 124740 }, { "epoch": 3.6893002898207845, "grad_norm": 0.8033417463302612, "learning_rate": 2.7034488492392062e-06, "loss": 0.085, "step": 124750 }, { "epoch": 3.6895960253149584, "grad_norm": 0.6552505493164062, "learning_rate": 2.703322159320267e-06, "loss": 0.0912, "step": 124760 }, { "epoch": 3.6898917608091324, "grad_norm": 0.7286739349365234, "learning_rate": 2.7031954694013274e-06, "loss": 0.0525, "step": 124770 }, { "epoch": 3.6901874963033063, "grad_norm": 0.9006055593490601, "learning_rate": 2.703068779482388e-06, "loss": 0.074, "step": 124780 }, { "epoch": 3.6904832317974803, "grad_norm": 0.7378872632980347, "learning_rate": 2.7029420895634485e-06, "loss": 0.057, "step": 124790 }, { "epoch": 3.6907789672916542, "grad_norm": 1.1513314247131348, "learning_rate": 2.7028153996445093e-06, "loss": 0.084, "step": 124800 }, { "epoch": 3.691074702785828, "grad_norm": 1.2086411714553833, "learning_rate": 2.7026887097255697e-06, "loss": 0.0802, "step": 124810 }, { "epoch": 3.6913704382800026, "grad_norm": 2.3293046951293945, "learning_rate": 2.702562019806631e-06, "loss": 0.0799, "step": 124820 }, { "epoch": 3.6916661737741765, "grad_norm": 1.1810097694396973, "learning_rate": 2.7024353298876913e-06, "loss": 0.0787, "step": 124830 }, { "epoch": 3.6919619092683504, "grad_norm": 1.3841124773025513, "learning_rate": 2.702308639968752e-06, "loss": 0.0704, "step": 124840 }, { "epoch": 3.6922576447625244, "grad_norm": 1.1708643436431885, "learning_rate": 2.7021819500498124e-06, "loss": 0.0644, "step": 124850 }, { "epoch": 3.6925533802566983, "grad_norm": 0.8766102194786072, "learning_rate": 2.702055260130873e-06, "loss": 0.0758, "step": 124860 }, { "epoch": 3.6928491157508723, "grad_norm": 1.1917048692703247, "learning_rate": 2.7019285702119336e-06, "loss": 0.0766, "step": 124870 }, { "epoch": 3.693144851245046, "grad_norm": 0.6494134068489075, "learning_rate": 2.7018018802929944e-06, "loss": 0.062, "step": 124880 }, { "epoch": 3.6934405867392206, "grad_norm": 0.8191201686859131, "learning_rate": 2.7016751903740547e-06, "loss": 0.0774, "step": 124890 }, { "epoch": 3.6937363222333945, "grad_norm": 0.6317261457443237, "learning_rate": 2.701548500455116e-06, "loss": 0.0753, "step": 124900 }, { "epoch": 3.6940320577275685, "grad_norm": 0.9129835963249207, "learning_rate": 2.7014218105361763e-06, "loss": 0.0724, "step": 124910 }, { "epoch": 3.6943277932217424, "grad_norm": 0.6804128289222717, "learning_rate": 2.701295120617237e-06, "loss": 0.073, "step": 124920 }, { "epoch": 3.6946235287159164, "grad_norm": 0.44677332043647766, "learning_rate": 2.7011684306982975e-06, "loss": 0.0726, "step": 124930 }, { "epoch": 3.6949192642100908, "grad_norm": 0.45442071557044983, "learning_rate": 2.7010417407793583e-06, "loss": 0.0719, "step": 124940 }, { "epoch": 3.6952149997042643, "grad_norm": 0.7203884720802307, "learning_rate": 2.7009150508604186e-06, "loss": 0.0868, "step": 124950 }, { "epoch": 3.6955107351984386, "grad_norm": 0.8690165877342224, "learning_rate": 2.7007883609414794e-06, "loss": 0.0712, "step": 124960 }, { "epoch": 3.6958064706926126, "grad_norm": 0.6884123682975769, "learning_rate": 2.7006616710225398e-06, "loss": 0.0824, "step": 124970 }, { "epoch": 3.6961022061867865, "grad_norm": 0.6828315854072571, "learning_rate": 2.700534981103601e-06, "loss": 0.0591, "step": 124980 }, { "epoch": 3.6963979416809605, "grad_norm": 1.0219111442565918, "learning_rate": 2.7004082911846614e-06, "loss": 0.0644, "step": 124990 }, { "epoch": 3.6966936771751344, "grad_norm": 0.6540035009384155, "learning_rate": 2.700281601265722e-06, "loss": 0.0668, "step": 125000 }, { "epoch": 3.696989412669309, "grad_norm": 1.5264743566513062, "learning_rate": 2.7001549113467825e-06, "loss": 0.0717, "step": 125010 }, { "epoch": 3.6972851481634823, "grad_norm": 0.7403894662857056, "learning_rate": 2.7000282214278433e-06, "loss": 0.0819, "step": 125020 }, { "epoch": 3.6975808836576567, "grad_norm": 1.0640859603881836, "learning_rate": 2.6999015315089037e-06, "loss": 0.0701, "step": 125030 }, { "epoch": 3.6978766191518306, "grad_norm": 0.8128832578659058, "learning_rate": 2.6997748415899645e-06, "loss": 0.0605, "step": 125040 }, { "epoch": 3.6981723546460046, "grad_norm": 0.806376576423645, "learning_rate": 2.699648151671025e-06, "loss": 0.0735, "step": 125050 }, { "epoch": 3.6984680901401785, "grad_norm": 1.143537163734436, "learning_rate": 2.699521461752086e-06, "loss": 0.0953, "step": 125060 }, { "epoch": 3.6987638256343525, "grad_norm": 0.9116944670677185, "learning_rate": 2.6993947718331464e-06, "loss": 0.0734, "step": 125070 }, { "epoch": 3.699059561128527, "grad_norm": 1.12692391872406, "learning_rate": 2.6992680819142068e-06, "loss": 0.0805, "step": 125080 }, { "epoch": 3.699355296622701, "grad_norm": 0.9225859642028809, "learning_rate": 2.6991413919952676e-06, "loss": 0.0679, "step": 125090 }, { "epoch": 3.6996510321168747, "grad_norm": 0.7937801480293274, "learning_rate": 2.699014702076328e-06, "loss": 0.072, "step": 125100 }, { "epoch": 3.6999467676110487, "grad_norm": 0.5101287961006165, "learning_rate": 2.6988880121573887e-06, "loss": 0.0662, "step": 125110 }, { "epoch": 3.7002425031052226, "grad_norm": 0.5348852276802063, "learning_rate": 2.698761322238449e-06, "loss": 0.0703, "step": 125120 }, { "epoch": 3.7005382385993966, "grad_norm": 0.6517502069473267, "learning_rate": 2.69863463231951e-06, "loss": 0.0652, "step": 125130 }, { "epoch": 3.7008339740935705, "grad_norm": 0.6842478513717651, "learning_rate": 2.6985079424005707e-06, "loss": 0.0562, "step": 125140 }, { "epoch": 3.701129709587745, "grad_norm": 1.1760600805282593, "learning_rate": 2.6983812524816314e-06, "loss": 0.0818, "step": 125150 }, { "epoch": 3.701425445081919, "grad_norm": 1.3676247596740723, "learning_rate": 2.698254562562692e-06, "loss": 0.0854, "step": 125160 }, { "epoch": 3.701721180576093, "grad_norm": 0.7629773616790771, "learning_rate": 2.6981278726437526e-06, "loss": 0.0717, "step": 125170 }, { "epoch": 3.7020169160702667, "grad_norm": 0.36263519525527954, "learning_rate": 2.698001182724813e-06, "loss": 0.0576, "step": 125180 }, { "epoch": 3.7023126515644407, "grad_norm": 0.7946465015411377, "learning_rate": 2.6978744928058738e-06, "loss": 0.0567, "step": 125190 }, { "epoch": 3.7026083870586146, "grad_norm": 0.5672316551208496, "learning_rate": 2.697747802886934e-06, "loss": 0.0784, "step": 125200 }, { "epoch": 3.7029041225527886, "grad_norm": 1.0084275007247925, "learning_rate": 2.697621112967995e-06, "loss": 0.0905, "step": 125210 }, { "epoch": 3.703199858046963, "grad_norm": 0.7228044271469116, "learning_rate": 2.6974944230490557e-06, "loss": 0.0765, "step": 125220 }, { "epoch": 3.703495593541137, "grad_norm": 0.5506707429885864, "learning_rate": 2.6973677331301165e-06, "loss": 0.0801, "step": 125230 }, { "epoch": 3.703791329035311, "grad_norm": 0.704496443271637, "learning_rate": 2.697241043211177e-06, "loss": 0.0603, "step": 125240 }, { "epoch": 3.7040870645294848, "grad_norm": 0.9659068584442139, "learning_rate": 2.6971143532922376e-06, "loss": 0.066, "step": 125250 }, { "epoch": 3.7043828000236587, "grad_norm": 0.39688706398010254, "learning_rate": 2.696987663373298e-06, "loss": 0.0765, "step": 125260 }, { "epoch": 3.704678535517833, "grad_norm": 1.1970216035842896, "learning_rate": 2.696860973454359e-06, "loss": 0.092, "step": 125270 }, { "epoch": 3.7049742710120066, "grad_norm": 0.6742543578147888, "learning_rate": 2.696734283535419e-06, "loss": 0.0773, "step": 125280 }, { "epoch": 3.705270006506181, "grad_norm": 0.9702335596084595, "learning_rate": 2.69660759361648e-06, "loss": 0.05, "step": 125290 }, { "epoch": 3.705565742000355, "grad_norm": 0.7493091821670532, "learning_rate": 2.6964809036975407e-06, "loss": 0.086, "step": 125300 }, { "epoch": 3.705861477494529, "grad_norm": 0.661446213722229, "learning_rate": 2.6963542137786015e-06, "loss": 0.062, "step": 125310 }, { "epoch": 3.706157212988703, "grad_norm": 0.8077936768531799, "learning_rate": 2.696227523859662e-06, "loss": 0.0792, "step": 125320 }, { "epoch": 3.7064529484828768, "grad_norm": 0.7037233114242554, "learning_rate": 2.6961008339407227e-06, "loss": 0.0816, "step": 125330 }, { "epoch": 3.706748683977051, "grad_norm": 0.7181568145751953, "learning_rate": 2.695974144021783e-06, "loss": 0.0677, "step": 125340 }, { "epoch": 3.707044419471225, "grad_norm": 0.7157099843025208, "learning_rate": 2.695847454102844e-06, "loss": 0.0803, "step": 125350 }, { "epoch": 3.707340154965399, "grad_norm": 0.9677992463111877, "learning_rate": 2.6957207641839042e-06, "loss": 0.0761, "step": 125360 }, { "epoch": 3.707635890459573, "grad_norm": 1.01077139377594, "learning_rate": 2.695594074264965e-06, "loss": 0.0684, "step": 125370 }, { "epoch": 3.707931625953747, "grad_norm": 0.7551068663597107, "learning_rate": 2.695467384346026e-06, "loss": 0.0725, "step": 125380 }, { "epoch": 3.708227361447921, "grad_norm": 1.4138392210006714, "learning_rate": 2.6953406944270866e-06, "loss": 0.0696, "step": 125390 }, { "epoch": 3.708523096942095, "grad_norm": 0.677684485912323, "learning_rate": 2.695214004508147e-06, "loss": 0.0824, "step": 125400 }, { "epoch": 3.708818832436269, "grad_norm": 0.5462371110916138, "learning_rate": 2.6950873145892077e-06, "loss": 0.0805, "step": 125410 }, { "epoch": 3.709114567930443, "grad_norm": 0.6717944145202637, "learning_rate": 2.694960624670268e-06, "loss": 0.065, "step": 125420 }, { "epoch": 3.709410303424617, "grad_norm": 0.8542241454124451, "learning_rate": 2.694833934751329e-06, "loss": 0.0525, "step": 125430 }, { "epoch": 3.709706038918791, "grad_norm": 0.9486143589019775, "learning_rate": 2.6947072448323893e-06, "loss": 0.0578, "step": 125440 }, { "epoch": 3.710001774412965, "grad_norm": 0.681997537612915, "learning_rate": 2.69458055491345e-06, "loss": 0.0792, "step": 125450 }, { "epoch": 3.710297509907139, "grad_norm": 1.5123885869979858, "learning_rate": 2.694453864994511e-06, "loss": 0.0969, "step": 125460 }, { "epoch": 3.710593245401313, "grad_norm": 0.8842847347259521, "learning_rate": 2.6943271750755716e-06, "loss": 0.0814, "step": 125470 }, { "epoch": 3.7108889808954872, "grad_norm": 0.62811279296875, "learning_rate": 2.694200485156632e-06, "loss": 0.0819, "step": 125480 }, { "epoch": 3.711184716389661, "grad_norm": 0.5251947641372681, "learning_rate": 2.6940737952376924e-06, "loss": 0.0693, "step": 125490 }, { "epoch": 3.711480451883835, "grad_norm": 0.7075746059417725, "learning_rate": 2.693947105318753e-06, "loss": 0.0767, "step": 125500 }, { "epoch": 3.711776187378009, "grad_norm": 0.6194908022880554, "learning_rate": 2.6938204153998135e-06, "loss": 0.0691, "step": 125510 }, { "epoch": 3.712071922872183, "grad_norm": 0.8084251284599304, "learning_rate": 2.6936937254808743e-06, "loss": 0.0716, "step": 125520 }, { "epoch": 3.7123676583663574, "grad_norm": 1.1953431367874146, "learning_rate": 2.6935670355619347e-06, "loss": 0.0703, "step": 125530 }, { "epoch": 3.712663393860531, "grad_norm": 0.7030850052833557, "learning_rate": 2.693440345642996e-06, "loss": 0.0612, "step": 125540 }, { "epoch": 3.7129591293547053, "grad_norm": 0.7727872729301453, "learning_rate": 2.6933136557240562e-06, "loss": 0.0711, "step": 125550 }, { "epoch": 3.7132548648488792, "grad_norm": 0.8251408338546753, "learning_rate": 2.693186965805117e-06, "loss": 0.0651, "step": 125560 }, { "epoch": 3.713550600343053, "grad_norm": 0.9688242077827454, "learning_rate": 2.6930602758861774e-06, "loss": 0.0929, "step": 125570 }, { "epoch": 3.713846335837227, "grad_norm": 0.43497395515441895, "learning_rate": 2.692933585967238e-06, "loss": 0.0869, "step": 125580 }, { "epoch": 3.714142071331401, "grad_norm": 0.7765936851501465, "learning_rate": 2.6928068960482986e-06, "loss": 0.0528, "step": 125590 }, { "epoch": 3.7144378068255754, "grad_norm": 0.7144209742546082, "learning_rate": 2.6926802061293593e-06, "loss": 0.0832, "step": 125600 }, { "epoch": 3.714733542319749, "grad_norm": 1.1869386434555054, "learning_rate": 2.6925535162104197e-06, "loss": 0.0675, "step": 125610 }, { "epoch": 3.7150292778139233, "grad_norm": 0.8720482587814331, "learning_rate": 2.692426826291481e-06, "loss": 0.0717, "step": 125620 }, { "epoch": 3.7153250133080973, "grad_norm": 0.5670264363288879, "learning_rate": 2.6923001363725413e-06, "loss": 0.067, "step": 125630 }, { "epoch": 3.7156207488022712, "grad_norm": 0.9236192107200623, "learning_rate": 2.692173446453602e-06, "loss": 0.0524, "step": 125640 }, { "epoch": 3.715916484296445, "grad_norm": 1.2951884269714355, "learning_rate": 2.6920467565346624e-06, "loss": 0.0868, "step": 125650 }, { "epoch": 3.716212219790619, "grad_norm": 0.6081483364105225, "learning_rate": 2.6919200666157232e-06, "loss": 0.0664, "step": 125660 }, { "epoch": 3.7165079552847935, "grad_norm": 0.8733974695205688, "learning_rate": 2.6917933766967836e-06, "loss": 0.0671, "step": 125670 }, { "epoch": 3.7168036907789674, "grad_norm": 0.4935421943664551, "learning_rate": 2.6916666867778444e-06, "loss": 0.0812, "step": 125680 }, { "epoch": 3.7170994262731414, "grad_norm": 1.076550841331482, "learning_rate": 2.6915399968589048e-06, "loss": 0.0771, "step": 125690 }, { "epoch": 3.7173951617673153, "grad_norm": 0.9141011238098145, "learning_rate": 2.691413306939966e-06, "loss": 0.0742, "step": 125700 }, { "epoch": 3.7176908972614893, "grad_norm": 1.041040062904358, "learning_rate": 2.6912866170210263e-06, "loss": 0.0784, "step": 125710 }, { "epoch": 3.717986632755663, "grad_norm": 0.7191495895385742, "learning_rate": 2.691159927102087e-06, "loss": 0.0753, "step": 125720 }, { "epoch": 3.718282368249837, "grad_norm": 1.3797171115875244, "learning_rate": 2.6910332371831475e-06, "loss": 0.086, "step": 125730 }, { "epoch": 3.7185781037440115, "grad_norm": 0.6888248920440674, "learning_rate": 2.6909065472642083e-06, "loss": 0.0593, "step": 125740 }, { "epoch": 3.7188738392381855, "grad_norm": 0.9342405796051025, "learning_rate": 2.6907798573452687e-06, "loss": 0.0752, "step": 125750 }, { "epoch": 3.7191695747323594, "grad_norm": 0.9278279542922974, "learning_rate": 2.6906531674263294e-06, "loss": 0.1002, "step": 125760 }, { "epoch": 3.7194653102265334, "grad_norm": 0.8404292464256287, "learning_rate": 2.69052647750739e-06, "loss": 0.0816, "step": 125770 }, { "epoch": 3.7197610457207073, "grad_norm": 1.7161647081375122, "learning_rate": 2.690399787588451e-06, "loss": 0.0799, "step": 125780 }, { "epoch": 3.7200567812148813, "grad_norm": 0.7814091444015503, "learning_rate": 2.6902730976695114e-06, "loss": 0.057, "step": 125790 }, { "epoch": 3.720352516709055, "grad_norm": 2.0858235359191895, "learning_rate": 2.690146407750572e-06, "loss": 0.0959, "step": 125800 }, { "epoch": 3.7206482522032296, "grad_norm": 0.6382283568382263, "learning_rate": 2.6900197178316325e-06, "loss": 0.0678, "step": 125810 }, { "epoch": 3.7209439876974035, "grad_norm": 1.0707485675811768, "learning_rate": 2.6898930279126933e-06, "loss": 0.0774, "step": 125820 }, { "epoch": 3.7212397231915775, "grad_norm": 0.7961865067481995, "learning_rate": 2.6897663379937537e-06, "loss": 0.0592, "step": 125830 }, { "epoch": 3.7215354586857514, "grad_norm": 0.861501932144165, "learning_rate": 2.6896396480748145e-06, "loss": 0.0492, "step": 125840 }, { "epoch": 3.7218311941799254, "grad_norm": 0.7950779795646667, "learning_rate": 2.689512958155875e-06, "loss": 0.0798, "step": 125850 }, { "epoch": 3.7221269296740997, "grad_norm": 0.7241518497467041, "learning_rate": 2.689386268236936e-06, "loss": 0.0775, "step": 125860 }, { "epoch": 3.7224226651682732, "grad_norm": 0.8121349811553955, "learning_rate": 2.6892595783179964e-06, "loss": 0.0709, "step": 125870 }, { "epoch": 3.7227184006624476, "grad_norm": 1.053486704826355, "learning_rate": 2.6891328883990572e-06, "loss": 0.0662, "step": 125880 }, { "epoch": 3.7230141361566216, "grad_norm": 0.7641308903694153, "learning_rate": 2.6890061984801176e-06, "loss": 0.0607, "step": 125890 }, { "epoch": 3.7233098716507955, "grad_norm": 1.2555558681488037, "learning_rate": 2.688879508561178e-06, "loss": 0.0815, "step": 125900 }, { "epoch": 3.7236056071449695, "grad_norm": 1.0381598472595215, "learning_rate": 2.6887528186422387e-06, "loss": 0.0917, "step": 125910 }, { "epoch": 3.7239013426391434, "grad_norm": 0.944800078868866, "learning_rate": 2.688626128723299e-06, "loss": 0.0748, "step": 125920 }, { "epoch": 3.724197078133318, "grad_norm": 0.7730180621147156, "learning_rate": 2.68849943880436e-06, "loss": 0.0732, "step": 125930 }, { "epoch": 3.7244928136274913, "grad_norm": 0.48246705532073975, "learning_rate": 2.6883727488854207e-06, "loss": 0.0656, "step": 125940 }, { "epoch": 3.7247885491216657, "grad_norm": 0.8910912275314331, "learning_rate": 2.6882460589664815e-06, "loss": 0.0833, "step": 125950 }, { "epoch": 3.7250842846158396, "grad_norm": 0.7781717777252197, "learning_rate": 2.688119369047542e-06, "loss": 0.0831, "step": 125960 }, { "epoch": 3.7253800201100136, "grad_norm": 0.5992653965950012, "learning_rate": 2.6879926791286026e-06, "loss": 0.0731, "step": 125970 }, { "epoch": 3.7256757556041875, "grad_norm": 1.0308938026428223, "learning_rate": 2.687865989209663e-06, "loss": 0.0851, "step": 125980 }, { "epoch": 3.7259714910983615, "grad_norm": 1.093065857887268, "learning_rate": 2.6877392992907238e-06, "loss": 0.0695, "step": 125990 }, { "epoch": 3.726267226592536, "grad_norm": 1.4615055322647095, "learning_rate": 2.687612609371784e-06, "loss": 0.0821, "step": 126000 }, { "epoch": 3.72656296208671, "grad_norm": 1.0596710443496704, "learning_rate": 2.687485919452845e-06, "loss": 0.0798, "step": 126010 }, { "epoch": 3.7268586975808837, "grad_norm": 0.6640788316726685, "learning_rate": 2.6873592295339057e-06, "loss": 0.058, "step": 126020 }, { "epoch": 3.7271544330750577, "grad_norm": 1.4186196327209473, "learning_rate": 2.6872325396149665e-06, "loss": 0.074, "step": 126030 }, { "epoch": 3.7274501685692316, "grad_norm": 0.7805818915367126, "learning_rate": 2.687105849696027e-06, "loss": 0.0609, "step": 126040 }, { "epoch": 3.7277459040634056, "grad_norm": 1.2704516649246216, "learning_rate": 2.6869791597770877e-06, "loss": 0.0889, "step": 126050 }, { "epoch": 3.7280416395575795, "grad_norm": 0.6489726901054382, "learning_rate": 2.686852469858148e-06, "loss": 0.0867, "step": 126060 }, { "epoch": 3.728337375051754, "grad_norm": 1.137353539466858, "learning_rate": 2.686725779939209e-06, "loss": 0.0843, "step": 126070 }, { "epoch": 3.728633110545928, "grad_norm": 0.7585985660552979, "learning_rate": 2.686599090020269e-06, "loss": 0.0746, "step": 126080 }, { "epoch": 3.7289288460401018, "grad_norm": 0.8131903409957886, "learning_rate": 2.68647240010133e-06, "loss": 0.0639, "step": 126090 }, { "epoch": 3.7292245815342757, "grad_norm": 0.5854064226150513, "learning_rate": 2.6863457101823908e-06, "loss": 0.0795, "step": 126100 }, { "epoch": 3.7295203170284497, "grad_norm": 0.7195985317230225, "learning_rate": 2.6862190202634516e-06, "loss": 0.0835, "step": 126110 }, { "epoch": 3.7298160525226236, "grad_norm": 0.9156650304794312, "learning_rate": 2.686092330344512e-06, "loss": 0.0797, "step": 126120 }, { "epoch": 3.7301117880167975, "grad_norm": 0.869482159614563, "learning_rate": 2.6859656404255727e-06, "loss": 0.0637, "step": 126130 }, { "epoch": 3.730407523510972, "grad_norm": 0.6853296756744385, "learning_rate": 2.685838950506633e-06, "loss": 0.0651, "step": 126140 }, { "epoch": 3.730703259005146, "grad_norm": 0.7375344038009644, "learning_rate": 2.685712260587694e-06, "loss": 0.0735, "step": 126150 }, { "epoch": 3.73099899449932, "grad_norm": 1.2840839624404907, "learning_rate": 2.6855855706687542e-06, "loss": 0.0809, "step": 126160 }, { "epoch": 3.7312947299934938, "grad_norm": 0.9835697412490845, "learning_rate": 2.685458880749815e-06, "loss": 0.0682, "step": 126170 }, { "epoch": 3.7315904654876677, "grad_norm": 0.9783716201782227, "learning_rate": 2.685332190830876e-06, "loss": 0.0671, "step": 126180 }, { "epoch": 3.731886200981842, "grad_norm": 0.7171112895011902, "learning_rate": 2.6852055009119366e-06, "loss": 0.0723, "step": 126190 }, { "epoch": 3.7321819364760156, "grad_norm": 0.4040079712867737, "learning_rate": 2.685078810992997e-06, "loss": 0.0645, "step": 126200 }, { "epoch": 3.73247767197019, "grad_norm": 0.8412355184555054, "learning_rate": 2.6849521210740578e-06, "loss": 0.0798, "step": 126210 }, { "epoch": 3.732773407464364, "grad_norm": 0.6197213530540466, "learning_rate": 2.684825431155118e-06, "loss": 0.0754, "step": 126220 }, { "epoch": 3.733069142958538, "grad_norm": 0.44517895579338074, "learning_rate": 2.684698741236179e-06, "loss": 0.0697, "step": 126230 }, { "epoch": 3.733364878452712, "grad_norm": 0.7296125888824463, "learning_rate": 2.6845720513172393e-06, "loss": 0.0634, "step": 126240 }, { "epoch": 3.7336606139468858, "grad_norm": 0.5325252413749695, "learning_rate": 2.6844453613983e-06, "loss": 0.0869, "step": 126250 }, { "epoch": 3.73395634944106, "grad_norm": 0.7805300951004028, "learning_rate": 2.684318671479361e-06, "loss": 0.0652, "step": 126260 }, { "epoch": 3.734252084935234, "grad_norm": 0.7244940996170044, "learning_rate": 2.6841919815604217e-06, "loss": 0.0681, "step": 126270 }, { "epoch": 3.734547820429408, "grad_norm": 0.5090890526771545, "learning_rate": 2.684065291641482e-06, "loss": 0.0734, "step": 126280 }, { "epoch": 3.734843555923582, "grad_norm": 0.8033849596977234, "learning_rate": 2.683938601722543e-06, "loss": 0.0748, "step": 126290 }, { "epoch": 3.735139291417756, "grad_norm": 0.8705846071243286, "learning_rate": 2.683811911803603e-06, "loss": 0.089, "step": 126300 }, { "epoch": 3.73543502691193, "grad_norm": 1.087262749671936, "learning_rate": 2.6836852218846635e-06, "loss": 0.0629, "step": 126310 }, { "epoch": 3.735730762406104, "grad_norm": 0.9528273344039917, "learning_rate": 2.6835585319657243e-06, "loss": 0.08, "step": 126320 }, { "epoch": 3.736026497900278, "grad_norm": 1.1330643892288208, "learning_rate": 2.6834318420467847e-06, "loss": 0.0738, "step": 126330 }, { "epoch": 3.736322233394452, "grad_norm": 0.7481351494789124, "learning_rate": 2.683305152127846e-06, "loss": 0.0789, "step": 126340 }, { "epoch": 3.736617968888626, "grad_norm": 1.0386565923690796, "learning_rate": 2.6831784622089063e-06, "loss": 0.0838, "step": 126350 }, { "epoch": 3.7369137043828, "grad_norm": 0.6251006722450256, "learning_rate": 2.683051772289967e-06, "loss": 0.0783, "step": 126360 }, { "epoch": 3.737209439876974, "grad_norm": 1.0145487785339355, "learning_rate": 2.6829250823710274e-06, "loss": 0.0901, "step": 126370 }, { "epoch": 3.737505175371148, "grad_norm": 1.040461540222168, "learning_rate": 2.6827983924520882e-06, "loss": 0.075, "step": 126380 }, { "epoch": 3.737800910865322, "grad_norm": 1.0403852462768555, "learning_rate": 2.6826717025331486e-06, "loss": 0.0662, "step": 126390 }, { "epoch": 3.7380966463594962, "grad_norm": 1.0131069421768188, "learning_rate": 2.6825450126142094e-06, "loss": 0.0807, "step": 126400 }, { "epoch": 3.73839238185367, "grad_norm": 0.9877771139144897, "learning_rate": 2.6824183226952697e-06, "loss": 0.0879, "step": 126410 }, { "epoch": 3.738688117347844, "grad_norm": 0.6165667176246643, "learning_rate": 2.682291632776331e-06, "loss": 0.0731, "step": 126420 }, { "epoch": 3.738983852842018, "grad_norm": 0.597254753112793, "learning_rate": 2.6821649428573913e-06, "loss": 0.0654, "step": 126430 }, { "epoch": 3.739279588336192, "grad_norm": 0.9927289485931396, "learning_rate": 2.682038252938452e-06, "loss": 0.0662, "step": 126440 }, { "epoch": 3.7395753238303664, "grad_norm": 0.33063602447509766, "learning_rate": 2.6819115630195125e-06, "loss": 0.0688, "step": 126450 }, { "epoch": 3.73987105932454, "grad_norm": 0.7755464315414429, "learning_rate": 2.6817848731005733e-06, "loss": 0.0797, "step": 126460 }, { "epoch": 3.7401667948187143, "grad_norm": 0.7060325145721436, "learning_rate": 2.6816581831816336e-06, "loss": 0.0699, "step": 126470 }, { "epoch": 3.7404625303128882, "grad_norm": 1.1921789646148682, "learning_rate": 2.6815314932626944e-06, "loss": 0.0699, "step": 126480 }, { "epoch": 3.740758265807062, "grad_norm": 0.4993670582771301, "learning_rate": 2.681404803343755e-06, "loss": 0.0541, "step": 126490 }, { "epoch": 3.741054001301236, "grad_norm": 1.2391855716705322, "learning_rate": 2.681278113424816e-06, "loss": 0.0955, "step": 126500 }, { "epoch": 3.74134973679541, "grad_norm": 0.7420902252197266, "learning_rate": 2.6811514235058764e-06, "loss": 0.0874, "step": 126510 }, { "epoch": 3.7416454722895844, "grad_norm": 0.7648038864135742, "learning_rate": 2.681024733586937e-06, "loss": 0.0686, "step": 126520 }, { "epoch": 3.741941207783758, "grad_norm": 0.3775193393230438, "learning_rate": 2.6808980436679975e-06, "loss": 0.0834, "step": 126530 }, { "epoch": 3.7422369432779323, "grad_norm": 0.6796805262565613, "learning_rate": 2.6807713537490583e-06, "loss": 0.0683, "step": 126540 }, { "epoch": 3.7425326787721063, "grad_norm": 0.9809973835945129, "learning_rate": 2.6806446638301187e-06, "loss": 0.0795, "step": 126550 }, { "epoch": 3.74282841426628, "grad_norm": 0.8312225937843323, "learning_rate": 2.6805179739111795e-06, "loss": 0.0644, "step": 126560 }, { "epoch": 3.743124149760454, "grad_norm": 1.1278965473175049, "learning_rate": 2.68039128399224e-06, "loss": 0.0929, "step": 126570 }, { "epoch": 3.743419885254628, "grad_norm": 0.6597571969032288, "learning_rate": 2.680264594073301e-06, "loss": 0.0767, "step": 126580 }, { "epoch": 3.7437156207488025, "grad_norm": 1.2588436603546143, "learning_rate": 2.6801379041543614e-06, "loss": 0.073, "step": 126590 }, { "epoch": 3.7440113562429764, "grad_norm": 0.6509237885475159, "learning_rate": 2.680011214235422e-06, "loss": 0.0799, "step": 126600 }, { "epoch": 3.7443070917371504, "grad_norm": 0.6254974007606506, "learning_rate": 2.6798845243164826e-06, "loss": 0.0859, "step": 126610 }, { "epoch": 3.7446028272313243, "grad_norm": 0.8439838886260986, "learning_rate": 2.6797578343975434e-06, "loss": 0.0699, "step": 126620 }, { "epoch": 3.7448985627254983, "grad_norm": 1.7083710432052612, "learning_rate": 2.6796311444786037e-06, "loss": 0.0728, "step": 126630 }, { "epoch": 3.745194298219672, "grad_norm": 0.7626800537109375, "learning_rate": 2.6795044545596645e-06, "loss": 0.072, "step": 126640 }, { "epoch": 3.745490033713846, "grad_norm": 0.8548101186752319, "learning_rate": 2.679377764640725e-06, "loss": 0.0996, "step": 126650 }, { "epoch": 3.7457857692080205, "grad_norm": 0.5754145383834839, "learning_rate": 2.679251074721786e-06, "loss": 0.0799, "step": 126660 }, { "epoch": 3.7460815047021945, "grad_norm": 0.9988211989402771, "learning_rate": 2.6791243848028465e-06, "loss": 0.083, "step": 126670 }, { "epoch": 3.7463772401963684, "grad_norm": 0.8288564682006836, "learning_rate": 2.6789976948839072e-06, "loss": 0.079, "step": 126680 }, { "epoch": 3.7466729756905424, "grad_norm": 0.7969912886619568, "learning_rate": 2.6788710049649676e-06, "loss": 0.0677, "step": 126690 }, { "epoch": 3.7469687111847163, "grad_norm": 0.7605188488960266, "learning_rate": 2.6787443150460284e-06, "loss": 0.0753, "step": 126700 }, { "epoch": 3.7472644466788902, "grad_norm": 0.6338459253311157, "learning_rate": 2.6786176251270888e-06, "loss": 0.0783, "step": 126710 }, { "epoch": 3.747560182173064, "grad_norm": 0.8032935261726379, "learning_rate": 2.678490935208149e-06, "loss": 0.074, "step": 126720 }, { "epoch": 3.7478559176672386, "grad_norm": 0.9843236804008484, "learning_rate": 2.67836424528921e-06, "loss": 0.0688, "step": 126730 }, { "epoch": 3.7481516531614125, "grad_norm": 0.31582146883010864, "learning_rate": 2.6782375553702707e-06, "loss": 0.0576, "step": 126740 }, { "epoch": 3.7484473886555865, "grad_norm": 1.425136685371399, "learning_rate": 2.6781108654513315e-06, "loss": 0.0724, "step": 126750 }, { "epoch": 3.7487431241497604, "grad_norm": 1.414321780204773, "learning_rate": 2.677984175532392e-06, "loss": 0.101, "step": 126760 }, { "epoch": 3.7490388596439344, "grad_norm": 0.8027673363685608, "learning_rate": 2.6778574856134527e-06, "loss": 0.0897, "step": 126770 }, { "epoch": 3.7493345951381087, "grad_norm": 0.9055583477020264, "learning_rate": 2.677730795694513e-06, "loss": 0.0539, "step": 126780 }, { "epoch": 3.7496303306322822, "grad_norm": 1.3900176286697388, "learning_rate": 2.677604105775574e-06, "loss": 0.0635, "step": 126790 }, { "epoch": 3.7499260661264566, "grad_norm": 0.5871680974960327, "learning_rate": 2.677477415856634e-06, "loss": 0.0729, "step": 126800 }, { "epoch": 3.7502218016206306, "grad_norm": 1.1920702457427979, "learning_rate": 2.677350725937695e-06, "loss": 0.0827, "step": 126810 }, { "epoch": 3.7505175371148045, "grad_norm": 0.8510885834693909, "learning_rate": 2.6772240360187558e-06, "loss": 0.0631, "step": 126820 }, { "epoch": 3.7508132726089785, "grad_norm": 0.6498968601226807, "learning_rate": 2.6770973460998166e-06, "loss": 0.0712, "step": 126830 }, { "epoch": 3.7511090081031524, "grad_norm": 0.9168384075164795, "learning_rate": 2.676970656180877e-06, "loss": 0.0695, "step": 126840 }, { "epoch": 3.751404743597327, "grad_norm": 0.8285203576087952, "learning_rate": 2.6768439662619377e-06, "loss": 0.0864, "step": 126850 }, { "epoch": 3.7517004790915007, "grad_norm": 0.7266643643379211, "learning_rate": 2.676717276342998e-06, "loss": 0.0795, "step": 126860 }, { "epoch": 3.7519962145856747, "grad_norm": 1.1968011856079102, "learning_rate": 2.676590586424059e-06, "loss": 0.0744, "step": 126870 }, { "epoch": 3.7522919500798486, "grad_norm": 0.8882124423980713, "learning_rate": 2.6764638965051192e-06, "loss": 0.0787, "step": 126880 }, { "epoch": 3.7525876855740226, "grad_norm": 1.2612650394439697, "learning_rate": 2.67633720658618e-06, "loss": 0.0707, "step": 126890 }, { "epoch": 3.7528834210681965, "grad_norm": 0.5822870135307312, "learning_rate": 2.676210516667241e-06, "loss": 0.0935, "step": 126900 }, { "epoch": 3.7531791565623704, "grad_norm": 0.7760770320892334, "learning_rate": 2.6760838267483016e-06, "loss": 0.0755, "step": 126910 }, { "epoch": 3.753474892056545, "grad_norm": 0.6027697920799255, "learning_rate": 2.675957136829362e-06, "loss": 0.0846, "step": 126920 }, { "epoch": 3.7537706275507188, "grad_norm": 0.7147489190101624, "learning_rate": 2.6758304469104228e-06, "loss": 0.0805, "step": 126930 }, { "epoch": 3.7540663630448927, "grad_norm": 0.6737122535705566, "learning_rate": 2.675703756991483e-06, "loss": 0.0752, "step": 126940 }, { "epoch": 3.7543620985390667, "grad_norm": 0.8028890490531921, "learning_rate": 2.675577067072544e-06, "loss": 0.0659, "step": 126950 }, { "epoch": 3.7546578340332406, "grad_norm": 1.2242745161056519, "learning_rate": 2.6754503771536043e-06, "loss": 0.0894, "step": 126960 }, { "epoch": 3.7549535695274145, "grad_norm": 0.7858642339706421, "learning_rate": 2.675323687234665e-06, "loss": 0.0916, "step": 126970 }, { "epoch": 3.7552493050215885, "grad_norm": 0.5786662101745605, "learning_rate": 2.675196997315726e-06, "loss": 0.0709, "step": 126980 }, { "epoch": 3.755545040515763, "grad_norm": 0.9134238958358765, "learning_rate": 2.6750703073967866e-06, "loss": 0.07, "step": 126990 }, { "epoch": 3.755840776009937, "grad_norm": 1.0525567531585693, "learning_rate": 2.674943617477847e-06, "loss": 0.0681, "step": 127000 }, { "epoch": 3.7561365115041108, "grad_norm": 1.2217507362365723, "learning_rate": 2.674816927558908e-06, "loss": 0.0823, "step": 127010 }, { "epoch": 3.7564322469982847, "grad_norm": 1.03415048122406, "learning_rate": 2.674690237639968e-06, "loss": 0.0808, "step": 127020 }, { "epoch": 3.7567279824924587, "grad_norm": 0.8240117430686951, "learning_rate": 2.674563547721029e-06, "loss": 0.0662, "step": 127030 }, { "epoch": 3.7570237179866326, "grad_norm": 0.6295844316482544, "learning_rate": 2.6744368578020893e-06, "loss": 0.0522, "step": 127040 }, { "epoch": 3.7573194534808065, "grad_norm": 0.8834755420684814, "learning_rate": 2.67431016788315e-06, "loss": 0.0755, "step": 127050 }, { "epoch": 3.757615188974981, "grad_norm": 0.7110648155212402, "learning_rate": 2.674183477964211e-06, "loss": 0.0748, "step": 127060 }, { "epoch": 3.757910924469155, "grad_norm": 0.564887285232544, "learning_rate": 2.6740567880452717e-06, "loss": 0.0729, "step": 127070 }, { "epoch": 3.758206659963329, "grad_norm": 0.8841985464096069, "learning_rate": 2.673930098126332e-06, "loss": 0.0677, "step": 127080 }, { "epoch": 3.7585023954575028, "grad_norm": 0.4806327223777771, "learning_rate": 2.673803408207393e-06, "loss": 0.0555, "step": 127090 }, { "epoch": 3.7587981309516767, "grad_norm": 0.7626027464866638, "learning_rate": 2.673676718288453e-06, "loss": 0.0942, "step": 127100 }, { "epoch": 3.759093866445851, "grad_norm": 1.315279245376587, "learning_rate": 2.673550028369514e-06, "loss": 0.0834, "step": 127110 }, { "epoch": 3.7593896019400246, "grad_norm": 1.63715398311615, "learning_rate": 2.6734233384505744e-06, "loss": 0.072, "step": 127120 }, { "epoch": 3.759685337434199, "grad_norm": 0.8308658003807068, "learning_rate": 2.673296648531635e-06, "loss": 0.0801, "step": 127130 }, { "epoch": 3.759981072928373, "grad_norm": 0.5297095775604248, "learning_rate": 2.673169958612696e-06, "loss": 0.0697, "step": 127140 }, { "epoch": 3.760276808422547, "grad_norm": 0.7538512945175171, "learning_rate": 2.6730432686937563e-06, "loss": 0.0686, "step": 127150 }, { "epoch": 3.760572543916721, "grad_norm": 0.5221938490867615, "learning_rate": 2.672916578774817e-06, "loss": 0.0832, "step": 127160 }, { "epoch": 3.7608682794108947, "grad_norm": 0.943730354309082, "learning_rate": 2.6727898888558775e-06, "loss": 0.0663, "step": 127170 }, { "epoch": 3.761164014905069, "grad_norm": 0.4315440058708191, "learning_rate": 2.6726631989369383e-06, "loss": 0.0631, "step": 127180 }, { "epoch": 3.761459750399243, "grad_norm": 0.6449989080429077, "learning_rate": 2.6725365090179986e-06, "loss": 0.0591, "step": 127190 }, { "epoch": 3.761755485893417, "grad_norm": 1.5013514757156372, "learning_rate": 2.6724098190990594e-06, "loss": 0.0823, "step": 127200 }, { "epoch": 3.762051221387591, "grad_norm": 1.1505613327026367, "learning_rate": 2.6722831291801198e-06, "loss": 0.0941, "step": 127210 }, { "epoch": 3.762346956881765, "grad_norm": 0.6007300615310669, "learning_rate": 2.672156439261181e-06, "loss": 0.0686, "step": 127220 }, { "epoch": 3.762642692375939, "grad_norm": 1.043441891670227, "learning_rate": 2.6720297493422414e-06, "loss": 0.0564, "step": 127230 }, { "epoch": 3.762938427870113, "grad_norm": 1.330002784729004, "learning_rate": 2.671903059423302e-06, "loss": 0.0749, "step": 127240 }, { "epoch": 3.763234163364287, "grad_norm": 0.9551045298576355, "learning_rate": 2.6717763695043625e-06, "loss": 0.0861, "step": 127250 }, { "epoch": 3.763529898858461, "grad_norm": 1.1426392793655396, "learning_rate": 2.6716496795854233e-06, "loss": 0.0858, "step": 127260 }, { "epoch": 3.763825634352635, "grad_norm": 0.5278288125991821, "learning_rate": 2.6715229896664837e-06, "loss": 0.0817, "step": 127270 }, { "epoch": 3.764121369846809, "grad_norm": 1.1546584367752075, "learning_rate": 2.6713962997475445e-06, "loss": 0.0784, "step": 127280 }, { "epoch": 3.764417105340983, "grad_norm": 1.1353991031646729, "learning_rate": 2.671269609828605e-06, "loss": 0.0704, "step": 127290 }, { "epoch": 3.764712840835157, "grad_norm": 0.6577978134155273, "learning_rate": 2.671142919909666e-06, "loss": 0.0805, "step": 127300 }, { "epoch": 3.765008576329331, "grad_norm": 0.7651881575584412, "learning_rate": 2.6710162299907264e-06, "loss": 0.091, "step": 127310 }, { "epoch": 3.7653043118235052, "grad_norm": 0.8382494449615479, "learning_rate": 2.670889540071787e-06, "loss": 0.0885, "step": 127320 }, { "epoch": 3.765600047317679, "grad_norm": 1.156571388244629, "learning_rate": 2.6707628501528476e-06, "loss": 0.0829, "step": 127330 }, { "epoch": 3.765895782811853, "grad_norm": 1.0009788274765015, "learning_rate": 2.6706361602339083e-06, "loss": 0.0606, "step": 127340 }, { "epoch": 3.766191518306027, "grad_norm": 0.6813039183616638, "learning_rate": 2.6705094703149687e-06, "loss": 0.0706, "step": 127350 }, { "epoch": 3.766487253800201, "grad_norm": 0.9660332798957825, "learning_rate": 2.6703827803960295e-06, "loss": 0.0803, "step": 127360 }, { "epoch": 3.7667829892943754, "grad_norm": 0.5850870013237, "learning_rate": 2.67025609047709e-06, "loss": 0.071, "step": 127370 }, { "epoch": 3.767078724788549, "grad_norm": 1.0732630491256714, "learning_rate": 2.670129400558151e-06, "loss": 0.0727, "step": 127380 }, { "epoch": 3.7673744602827233, "grad_norm": 1.1922627687454224, "learning_rate": 2.6700027106392114e-06, "loss": 0.0799, "step": 127390 }, { "epoch": 3.767670195776897, "grad_norm": 1.0936263799667358, "learning_rate": 2.6698760207202722e-06, "loss": 0.082, "step": 127400 }, { "epoch": 3.767965931271071, "grad_norm": 1.002500057220459, "learning_rate": 2.6697493308013326e-06, "loss": 0.0819, "step": 127410 }, { "epoch": 3.768261666765245, "grad_norm": 0.6302407383918762, "learning_rate": 2.6696226408823934e-06, "loss": 0.0698, "step": 127420 }, { "epoch": 3.768557402259419, "grad_norm": 1.1347079277038574, "learning_rate": 2.6694959509634538e-06, "loss": 0.076, "step": 127430 }, { "epoch": 3.7688531377535934, "grad_norm": 0.803037703037262, "learning_rate": 2.6693692610445145e-06, "loss": 0.0603, "step": 127440 }, { "epoch": 3.769148873247767, "grad_norm": 0.7356593012809753, "learning_rate": 2.669242571125575e-06, "loss": 0.0618, "step": 127450 }, { "epoch": 3.7694446087419413, "grad_norm": 0.7866820096969604, "learning_rate": 2.669115881206636e-06, "loss": 0.0757, "step": 127460 }, { "epoch": 3.7697403442361153, "grad_norm": 0.6051468253135681, "learning_rate": 2.6689891912876965e-06, "loss": 0.0888, "step": 127470 }, { "epoch": 3.770036079730289, "grad_norm": 0.8608277440071106, "learning_rate": 2.6688625013687573e-06, "loss": 0.0764, "step": 127480 }, { "epoch": 3.770331815224463, "grad_norm": 0.7077103853225708, "learning_rate": 2.6687358114498176e-06, "loss": 0.065, "step": 127490 }, { "epoch": 3.770627550718637, "grad_norm": 0.7482923269271851, "learning_rate": 2.6686091215308784e-06, "loss": 0.0775, "step": 127500 }, { "epoch": 3.7709232862128115, "grad_norm": 1.074756145477295, "learning_rate": 2.668482431611939e-06, "loss": 0.0741, "step": 127510 }, { "epoch": 3.7712190217069854, "grad_norm": 1.3314441442489624, "learning_rate": 2.6683557416929996e-06, "loss": 0.0739, "step": 127520 }, { "epoch": 3.7715147572011594, "grad_norm": 0.9889296293258667, "learning_rate": 2.66822905177406e-06, "loss": 0.0733, "step": 127530 }, { "epoch": 3.7718104926953333, "grad_norm": 0.7317382097244263, "learning_rate": 2.668102361855121e-06, "loss": 0.0653, "step": 127540 }, { "epoch": 3.7721062281895072, "grad_norm": 0.7665560245513916, "learning_rate": 2.6679756719361815e-06, "loss": 0.072, "step": 127550 }, { "epoch": 3.772401963683681, "grad_norm": 1.0319424867630005, "learning_rate": 2.667848982017242e-06, "loss": 0.0798, "step": 127560 }, { "epoch": 3.772697699177855, "grad_norm": 0.6168349385261536, "learning_rate": 2.6677222920983027e-06, "loss": 0.0802, "step": 127570 }, { "epoch": 3.7729934346720295, "grad_norm": 0.8816723227500916, "learning_rate": 2.667595602179363e-06, "loss": 0.059, "step": 127580 }, { "epoch": 3.7732891701662035, "grad_norm": 0.4634103775024414, "learning_rate": 2.667468912260424e-06, "loss": 0.0571, "step": 127590 }, { "epoch": 3.7735849056603774, "grad_norm": 0.4763525724411011, "learning_rate": 2.6673422223414842e-06, "loss": 0.0726, "step": 127600 }, { "epoch": 3.7738806411545514, "grad_norm": 0.6775370240211487, "learning_rate": 2.667215532422545e-06, "loss": 0.0802, "step": 127610 }, { "epoch": 3.7741763766487253, "grad_norm": 0.5989536046981812, "learning_rate": 2.667088842503606e-06, "loss": 0.077, "step": 127620 }, { "epoch": 3.7744721121428992, "grad_norm": 0.5602604746818542, "learning_rate": 2.6669621525846666e-06, "loss": 0.0682, "step": 127630 }, { "epoch": 3.774767847637073, "grad_norm": 1.2658072710037231, "learning_rate": 2.666835462665727e-06, "loss": 0.0682, "step": 127640 }, { "epoch": 3.7750635831312476, "grad_norm": 1.3715168237686157, "learning_rate": 2.6667087727467877e-06, "loss": 0.0691, "step": 127650 }, { "epoch": 3.7753593186254215, "grad_norm": 1.21599543094635, "learning_rate": 2.666582082827848e-06, "loss": 0.0716, "step": 127660 }, { "epoch": 3.7756550541195955, "grad_norm": 0.7672065496444702, "learning_rate": 2.666455392908909e-06, "loss": 0.0731, "step": 127670 }, { "epoch": 3.7759507896137694, "grad_norm": 0.5595796704292297, "learning_rate": 2.6663287029899693e-06, "loss": 0.0777, "step": 127680 }, { "epoch": 3.7762465251079433, "grad_norm": 0.81758052110672, "learning_rate": 2.66620201307103e-06, "loss": 0.0573, "step": 127690 }, { "epoch": 3.7765422606021177, "grad_norm": 1.142038106918335, "learning_rate": 2.666075323152091e-06, "loss": 0.0888, "step": 127700 }, { "epoch": 3.7768379960962912, "grad_norm": 1.0697208642959595, "learning_rate": 2.6659486332331516e-06, "loss": 0.0771, "step": 127710 }, { "epoch": 3.7771337315904656, "grad_norm": 1.2805614471435547, "learning_rate": 2.665821943314212e-06, "loss": 0.0879, "step": 127720 }, { "epoch": 3.7774294670846396, "grad_norm": 0.7874007821083069, "learning_rate": 2.6656952533952728e-06, "loss": 0.0557, "step": 127730 }, { "epoch": 3.7777252025788135, "grad_norm": 0.652519702911377, "learning_rate": 2.665568563476333e-06, "loss": 0.0521, "step": 127740 }, { "epoch": 3.7780209380729874, "grad_norm": 0.8923032879829407, "learning_rate": 2.665441873557394e-06, "loss": 0.0891, "step": 127750 }, { "epoch": 3.7783166735671614, "grad_norm": 0.8395608067512512, "learning_rate": 2.6653151836384543e-06, "loss": 0.0762, "step": 127760 }, { "epoch": 3.7786124090613358, "grad_norm": 0.7654407620429993, "learning_rate": 2.665188493719515e-06, "loss": 0.0645, "step": 127770 }, { "epoch": 3.7789081445555097, "grad_norm": 1.0595718622207642, "learning_rate": 2.665061803800576e-06, "loss": 0.0739, "step": 127780 }, { "epoch": 3.7792038800496837, "grad_norm": 0.6079632639884949, "learning_rate": 2.6649351138816367e-06, "loss": 0.0611, "step": 127790 }, { "epoch": 3.7794996155438576, "grad_norm": 1.0220218896865845, "learning_rate": 2.664808423962697e-06, "loss": 0.0955, "step": 127800 }, { "epoch": 3.7797953510380315, "grad_norm": 1.060996651649475, "learning_rate": 2.664681734043758e-06, "loss": 0.0782, "step": 127810 }, { "epoch": 3.7800910865322055, "grad_norm": 0.9561481475830078, "learning_rate": 2.664555044124818e-06, "loss": 0.0803, "step": 127820 }, { "epoch": 3.7803868220263794, "grad_norm": 0.7371719479560852, "learning_rate": 2.664428354205879e-06, "loss": 0.0704, "step": 127830 }, { "epoch": 3.780682557520554, "grad_norm": 1.0963119268417358, "learning_rate": 2.6643016642869393e-06, "loss": 0.0648, "step": 127840 }, { "epoch": 3.7809782930147278, "grad_norm": 0.8276650905609131, "learning_rate": 2.664174974368e-06, "loss": 0.0837, "step": 127850 }, { "epoch": 3.7812740285089017, "grad_norm": 0.9471026659011841, "learning_rate": 2.664048284449061e-06, "loss": 0.0701, "step": 127860 }, { "epoch": 3.7815697640030757, "grad_norm": 0.6930776238441467, "learning_rate": 2.6639215945301217e-06, "loss": 0.0722, "step": 127870 }, { "epoch": 3.7818654994972496, "grad_norm": 1.363451361656189, "learning_rate": 2.663794904611182e-06, "loss": 0.0723, "step": 127880 }, { "epoch": 3.7821612349914235, "grad_norm": 1.2315022945404053, "learning_rate": 2.663668214692243e-06, "loss": 0.0695, "step": 127890 }, { "epoch": 3.7824569704855975, "grad_norm": 1.3041812181472778, "learning_rate": 2.6635415247733032e-06, "loss": 0.075, "step": 127900 }, { "epoch": 3.782752705979772, "grad_norm": 0.6857536435127258, "learning_rate": 2.663414834854364e-06, "loss": 0.0842, "step": 127910 }, { "epoch": 3.783048441473946, "grad_norm": 0.5864793658256531, "learning_rate": 2.6632881449354244e-06, "loss": 0.0581, "step": 127920 }, { "epoch": 3.7833441769681198, "grad_norm": 0.7360021471977234, "learning_rate": 2.663161455016485e-06, "loss": 0.0698, "step": 127930 }, { "epoch": 3.7836399124622937, "grad_norm": 1.141667366027832, "learning_rate": 2.663034765097546e-06, "loss": 0.0565, "step": 127940 }, { "epoch": 3.7839356479564676, "grad_norm": 0.8785054087638855, "learning_rate": 2.6629080751786068e-06, "loss": 0.0855, "step": 127950 }, { "epoch": 3.7842313834506416, "grad_norm": 0.5501366853713989, "learning_rate": 2.662781385259667e-06, "loss": 0.0727, "step": 127960 }, { "epoch": 3.7845271189448155, "grad_norm": 0.7097815871238708, "learning_rate": 2.6626546953407275e-06, "loss": 0.068, "step": 127970 }, { "epoch": 3.78482285443899, "grad_norm": 0.7130709886550903, "learning_rate": 2.6625280054217883e-06, "loss": 0.0584, "step": 127980 }, { "epoch": 3.785118589933164, "grad_norm": 0.5882846117019653, "learning_rate": 2.6624013155028486e-06, "loss": 0.0691, "step": 127990 }, { "epoch": 3.785414325427338, "grad_norm": 0.9864741563796997, "learning_rate": 2.6622746255839094e-06, "loss": 0.0723, "step": 128000 }, { "epoch": 3.7857100609215117, "grad_norm": 0.8572033643722534, "learning_rate": 2.66214793566497e-06, "loss": 0.0743, "step": 128010 }, { "epoch": 3.7860057964156857, "grad_norm": 0.8516945242881775, "learning_rate": 2.662021245746031e-06, "loss": 0.0817, "step": 128020 }, { "epoch": 3.78630153190986, "grad_norm": 0.49943673610687256, "learning_rate": 2.6618945558270914e-06, "loss": 0.0644, "step": 128030 }, { "epoch": 3.7865972674040336, "grad_norm": 0.8436969518661499, "learning_rate": 2.661767865908152e-06, "loss": 0.0735, "step": 128040 }, { "epoch": 3.786893002898208, "grad_norm": 0.4952230155467987, "learning_rate": 2.6616411759892125e-06, "loss": 0.0806, "step": 128050 }, { "epoch": 3.787188738392382, "grad_norm": 1.128209114074707, "learning_rate": 2.6615144860702733e-06, "loss": 0.0733, "step": 128060 }, { "epoch": 3.787484473886556, "grad_norm": 0.8566361665725708, "learning_rate": 2.6613877961513337e-06, "loss": 0.0633, "step": 128070 }, { "epoch": 3.78778020938073, "grad_norm": 1.012367606163025, "learning_rate": 2.6612611062323945e-06, "loss": 0.0658, "step": 128080 }, { "epoch": 3.7880759448749037, "grad_norm": 1.4895774126052856, "learning_rate": 2.661134416313455e-06, "loss": 0.0688, "step": 128090 }, { "epoch": 3.788371680369078, "grad_norm": 0.45326051115989685, "learning_rate": 2.661007726394516e-06, "loss": 0.076, "step": 128100 }, { "epoch": 3.788667415863252, "grad_norm": 2.245497226715088, "learning_rate": 2.6608810364755764e-06, "loss": 0.0941, "step": 128110 }, { "epoch": 3.788963151357426, "grad_norm": 1.004273772239685, "learning_rate": 2.6607543465566372e-06, "loss": 0.0795, "step": 128120 }, { "epoch": 3.7892588868516, "grad_norm": 0.6463133692741394, "learning_rate": 2.6606276566376976e-06, "loss": 0.0632, "step": 128130 }, { "epoch": 3.789554622345774, "grad_norm": 0.7677014470100403, "learning_rate": 2.6605009667187584e-06, "loss": 0.055, "step": 128140 }, { "epoch": 3.789850357839948, "grad_norm": 1.306758999824524, "learning_rate": 2.6603742767998187e-06, "loss": 0.0977, "step": 128150 }, { "epoch": 3.790146093334122, "grad_norm": 0.5950707197189331, "learning_rate": 2.6602475868808795e-06, "loss": 0.0872, "step": 128160 }, { "epoch": 3.790441828828296, "grad_norm": 1.077085018157959, "learning_rate": 2.66012089696194e-06, "loss": 0.0888, "step": 128170 }, { "epoch": 3.79073756432247, "grad_norm": 0.6487597227096558, "learning_rate": 2.659994207043001e-06, "loss": 0.0886, "step": 128180 }, { "epoch": 3.791033299816644, "grad_norm": 0.7128481268882751, "learning_rate": 2.6598675171240615e-06, "loss": 0.0707, "step": 128190 }, { "epoch": 3.791329035310818, "grad_norm": 0.7258906960487366, "learning_rate": 2.6597408272051223e-06, "loss": 0.0791, "step": 128200 }, { "epoch": 3.791624770804992, "grad_norm": 0.5805373787879944, "learning_rate": 2.6596141372861826e-06, "loss": 0.066, "step": 128210 }, { "epoch": 3.791920506299166, "grad_norm": 0.6193425059318542, "learning_rate": 2.6594874473672434e-06, "loss": 0.0876, "step": 128220 }, { "epoch": 3.79221624179334, "grad_norm": 0.8246262073516846, "learning_rate": 2.6593607574483038e-06, "loss": 0.074, "step": 128230 }, { "epoch": 3.792511977287514, "grad_norm": 0.7666680216789246, "learning_rate": 2.6592340675293646e-06, "loss": 0.0557, "step": 128240 }, { "epoch": 3.792807712781688, "grad_norm": 1.0278490781784058, "learning_rate": 2.659107377610425e-06, "loss": 0.1029, "step": 128250 }, { "epoch": 3.793103448275862, "grad_norm": 0.9896033406257629, "learning_rate": 2.658980687691486e-06, "loss": 0.08, "step": 128260 }, { "epoch": 3.793399183770036, "grad_norm": 0.7325714230537415, "learning_rate": 2.6588539977725465e-06, "loss": 0.069, "step": 128270 }, { "epoch": 3.79369491926421, "grad_norm": 0.7597594261169434, "learning_rate": 2.6587273078536073e-06, "loss": 0.0719, "step": 128280 }, { "epoch": 3.7939906547583844, "grad_norm": 0.8703689575195312, "learning_rate": 2.6586006179346677e-06, "loss": 0.0653, "step": 128290 }, { "epoch": 3.794286390252558, "grad_norm": 0.9765953421592712, "learning_rate": 2.6584739280157285e-06, "loss": 0.0794, "step": 128300 }, { "epoch": 3.7945821257467323, "grad_norm": 0.8463911414146423, "learning_rate": 2.658347238096789e-06, "loss": 0.0774, "step": 128310 }, { "epoch": 3.794877861240906, "grad_norm": 0.6121952533721924, "learning_rate": 2.6582205481778496e-06, "loss": 0.0761, "step": 128320 }, { "epoch": 3.79517359673508, "grad_norm": 1.0547555685043335, "learning_rate": 2.65809385825891e-06, "loss": 0.0863, "step": 128330 }, { "epoch": 3.795469332229254, "grad_norm": 0.7563843131065369, "learning_rate": 2.657967168339971e-06, "loss": 0.069, "step": 128340 }, { "epoch": 3.795765067723428, "grad_norm": 0.7848578691482544, "learning_rate": 2.6578404784210316e-06, "loss": 0.0787, "step": 128350 }, { "epoch": 3.7960608032176024, "grad_norm": 0.8611167669296265, "learning_rate": 2.6577137885020924e-06, "loss": 0.076, "step": 128360 }, { "epoch": 3.796356538711776, "grad_norm": 0.7275288701057434, "learning_rate": 2.6575870985831527e-06, "loss": 0.0714, "step": 128370 }, { "epoch": 3.7966522742059503, "grad_norm": 0.5818515419960022, "learning_rate": 2.657460408664213e-06, "loss": 0.0682, "step": 128380 }, { "epoch": 3.7969480097001242, "grad_norm": 0.5167914032936096, "learning_rate": 2.657333718745274e-06, "loss": 0.0481, "step": 128390 }, { "epoch": 3.797243745194298, "grad_norm": 0.7287124395370483, "learning_rate": 2.6572070288263342e-06, "loss": 0.0866, "step": 128400 }, { "epoch": 3.797539480688472, "grad_norm": 0.7653347849845886, "learning_rate": 2.657080338907395e-06, "loss": 0.0781, "step": 128410 }, { "epoch": 3.797835216182646, "grad_norm": 1.1462515592575073, "learning_rate": 2.656953648988456e-06, "loss": 0.0698, "step": 128420 }, { "epoch": 3.7981309516768205, "grad_norm": 0.2959475815296173, "learning_rate": 2.6568269590695166e-06, "loss": 0.0678, "step": 128430 }, { "epoch": 3.7984266871709944, "grad_norm": 0.9983434081077576, "learning_rate": 2.656700269150577e-06, "loss": 0.0714, "step": 128440 }, { "epoch": 3.7987224226651684, "grad_norm": 0.810765266418457, "learning_rate": 2.6565735792316378e-06, "loss": 0.0787, "step": 128450 }, { "epoch": 3.7990181581593423, "grad_norm": 0.9051943421363831, "learning_rate": 2.656446889312698e-06, "loss": 0.0812, "step": 128460 }, { "epoch": 3.7993138936535162, "grad_norm": 0.6607500910758972, "learning_rate": 2.656320199393759e-06, "loss": 0.0806, "step": 128470 }, { "epoch": 3.79960962914769, "grad_norm": 1.2994002103805542, "learning_rate": 2.6561935094748193e-06, "loss": 0.0651, "step": 128480 }, { "epoch": 3.799905364641864, "grad_norm": 1.0860395431518555, "learning_rate": 2.65606681955588e-06, "loss": 0.0646, "step": 128490 }, { "epoch": 3.8002011001360385, "grad_norm": 0.8921497464179993, "learning_rate": 2.655940129636941e-06, "loss": 0.0709, "step": 128500 }, { "epoch": 3.8004968356302125, "grad_norm": 0.6951353549957275, "learning_rate": 2.6558134397180017e-06, "loss": 0.0758, "step": 128510 }, { "epoch": 3.8007925711243864, "grad_norm": 0.930419385433197, "learning_rate": 2.655686749799062e-06, "loss": 0.0761, "step": 128520 }, { "epoch": 3.8010883066185603, "grad_norm": 0.5268458724021912, "learning_rate": 2.655560059880123e-06, "loss": 0.0663, "step": 128530 }, { "epoch": 3.8013840421127343, "grad_norm": 0.4902164041996002, "learning_rate": 2.655433369961183e-06, "loss": 0.049, "step": 128540 }, { "epoch": 3.8016797776069082, "grad_norm": 0.468753844499588, "learning_rate": 2.655306680042244e-06, "loss": 0.0667, "step": 128550 }, { "epoch": 3.801975513101082, "grad_norm": 0.6412757635116577, "learning_rate": 2.6551799901233043e-06, "loss": 0.0806, "step": 128560 }, { "epoch": 3.8022712485952566, "grad_norm": 1.0592820644378662, "learning_rate": 2.655053300204365e-06, "loss": 0.0738, "step": 128570 }, { "epoch": 3.8025669840894305, "grad_norm": 1.0081887245178223, "learning_rate": 2.654926610285426e-06, "loss": 0.0823, "step": 128580 }, { "epoch": 3.8028627195836044, "grad_norm": 0.887351393699646, "learning_rate": 2.6547999203664867e-06, "loss": 0.0514, "step": 128590 }, { "epoch": 3.8031584550777784, "grad_norm": 0.42963674664497375, "learning_rate": 2.654673230447547e-06, "loss": 0.0648, "step": 128600 }, { "epoch": 3.8034541905719523, "grad_norm": 1.168729305267334, "learning_rate": 2.654546540528608e-06, "loss": 0.0922, "step": 128610 }, { "epoch": 3.8037499260661267, "grad_norm": 0.8546903133392334, "learning_rate": 2.6544198506096682e-06, "loss": 0.0711, "step": 128620 }, { "epoch": 3.8040456615603, "grad_norm": 0.9428926110267639, "learning_rate": 2.654293160690729e-06, "loss": 0.0764, "step": 128630 }, { "epoch": 3.8043413970544746, "grad_norm": 0.5631933212280273, "learning_rate": 2.6541664707717894e-06, "loss": 0.0522, "step": 128640 }, { "epoch": 3.8046371325486485, "grad_norm": 0.44968730211257935, "learning_rate": 2.65403978085285e-06, "loss": 0.0613, "step": 128650 }, { "epoch": 3.8049328680428225, "grad_norm": 1.4029968976974487, "learning_rate": 2.653913090933911e-06, "loss": 0.0874, "step": 128660 }, { "epoch": 3.8052286035369964, "grad_norm": 0.8746681809425354, "learning_rate": 2.6537864010149717e-06, "loss": 0.0677, "step": 128670 }, { "epoch": 3.8055243390311704, "grad_norm": 0.9459499716758728, "learning_rate": 2.653659711096032e-06, "loss": 0.0751, "step": 128680 }, { "epoch": 3.8058200745253448, "grad_norm": 0.9125748872756958, "learning_rate": 2.653533021177093e-06, "loss": 0.0669, "step": 128690 }, { "epoch": 3.8061158100195187, "grad_norm": 0.7731539011001587, "learning_rate": 2.6534063312581533e-06, "loss": 0.0689, "step": 128700 }, { "epoch": 3.8064115455136927, "grad_norm": 0.6219837069511414, "learning_rate": 2.653279641339214e-06, "loss": 0.081, "step": 128710 }, { "epoch": 3.8067072810078666, "grad_norm": 0.6828524470329285, "learning_rate": 2.6531529514202744e-06, "loss": 0.073, "step": 128720 }, { "epoch": 3.8070030165020405, "grad_norm": 0.6627485156059265, "learning_rate": 2.6530262615013352e-06, "loss": 0.0811, "step": 128730 }, { "epoch": 3.8072987519962145, "grad_norm": 1.2238831520080566, "learning_rate": 2.652899571582396e-06, "loss": 0.0613, "step": 128740 }, { "epoch": 3.8075944874903884, "grad_norm": 0.5653150081634521, "learning_rate": 2.652772881663457e-06, "loss": 0.0768, "step": 128750 }, { "epoch": 3.807890222984563, "grad_norm": 0.5270010232925415, "learning_rate": 2.652646191744517e-06, "loss": 0.0689, "step": 128760 }, { "epoch": 3.8081859584787368, "grad_norm": 0.5565055012702942, "learning_rate": 2.652519501825578e-06, "loss": 0.0715, "step": 128770 }, { "epoch": 3.8084816939729107, "grad_norm": 0.45103466510772705, "learning_rate": 2.6523928119066383e-06, "loss": 0.0648, "step": 128780 }, { "epoch": 3.8087774294670846, "grad_norm": 0.837881863117218, "learning_rate": 2.6522661219876987e-06, "loss": 0.0551, "step": 128790 }, { "epoch": 3.8090731649612586, "grad_norm": 0.7784356474876404, "learning_rate": 2.6521394320687595e-06, "loss": 0.0724, "step": 128800 }, { "epoch": 3.8093689004554325, "grad_norm": 1.1777278184890747, "learning_rate": 2.65201274214982e-06, "loss": 0.0739, "step": 128810 }, { "epoch": 3.8096646359496065, "grad_norm": 0.5077579021453857, "learning_rate": 2.651886052230881e-06, "loss": 0.0762, "step": 128820 }, { "epoch": 3.809960371443781, "grad_norm": 0.7560803890228271, "learning_rate": 2.6517593623119414e-06, "loss": 0.0531, "step": 128830 }, { "epoch": 3.810256106937955, "grad_norm": 0.5052004456520081, "learning_rate": 2.651632672393002e-06, "loss": 0.0635, "step": 128840 }, { "epoch": 3.8105518424321287, "grad_norm": 0.7442567348480225, "learning_rate": 2.6515059824740626e-06, "loss": 0.0818, "step": 128850 }, { "epoch": 3.8108475779263027, "grad_norm": 1.0439378023147583, "learning_rate": 2.6513792925551234e-06, "loss": 0.0919, "step": 128860 }, { "epoch": 3.8111433134204766, "grad_norm": 1.265464425086975, "learning_rate": 2.6512526026361837e-06, "loss": 0.0798, "step": 128870 }, { "epoch": 3.8114390489146506, "grad_norm": 0.6711717844009399, "learning_rate": 2.6511259127172445e-06, "loss": 0.0714, "step": 128880 }, { "epoch": 3.8117347844088245, "grad_norm": 1.4075886011123657, "learning_rate": 2.650999222798305e-06, "loss": 0.0625, "step": 128890 }, { "epoch": 3.812030519902999, "grad_norm": 0.8660832643508911, "learning_rate": 2.650872532879366e-06, "loss": 0.0669, "step": 128900 }, { "epoch": 3.812326255397173, "grad_norm": 0.6192325353622437, "learning_rate": 2.6507458429604265e-06, "loss": 0.0799, "step": 128910 }, { "epoch": 3.812621990891347, "grad_norm": 0.9020535349845886, "learning_rate": 2.6506191530414872e-06, "loss": 0.0763, "step": 128920 }, { "epoch": 3.8129177263855207, "grad_norm": 1.003928780555725, "learning_rate": 2.6504924631225476e-06, "loss": 0.0616, "step": 128930 }, { "epoch": 3.8132134618796947, "grad_norm": 1.0079243183135986, "learning_rate": 2.6503657732036084e-06, "loss": 0.0764, "step": 128940 }, { "epoch": 3.813509197373869, "grad_norm": 0.8734971880912781, "learning_rate": 2.6502390832846688e-06, "loss": 0.0741, "step": 128950 }, { "epoch": 3.8138049328680426, "grad_norm": 0.42111337184906006, "learning_rate": 2.6501123933657296e-06, "loss": 0.0778, "step": 128960 }, { "epoch": 3.814100668362217, "grad_norm": 0.914164125919342, "learning_rate": 2.64998570344679e-06, "loss": 0.0628, "step": 128970 }, { "epoch": 3.814396403856391, "grad_norm": 0.7678545713424683, "learning_rate": 2.649859013527851e-06, "loss": 0.0767, "step": 128980 }, { "epoch": 3.814692139350565, "grad_norm": 0.7059463858604431, "learning_rate": 2.6497323236089115e-06, "loss": 0.0774, "step": 128990 }, { "epoch": 3.814987874844739, "grad_norm": 0.7349643111228943, "learning_rate": 2.6496056336899723e-06, "loss": 0.0668, "step": 129000 }, { "epoch": 3.8152836103389127, "grad_norm": 0.8781865835189819, "learning_rate": 2.6494789437710327e-06, "loss": 0.0753, "step": 129010 }, { "epoch": 3.815579345833087, "grad_norm": 1.1487054824829102, "learning_rate": 2.6493522538520934e-06, "loss": 0.0813, "step": 129020 }, { "epoch": 3.815875081327261, "grad_norm": 0.5027156472206116, "learning_rate": 2.649225563933154e-06, "loss": 0.07, "step": 129030 }, { "epoch": 3.816170816821435, "grad_norm": 0.8225622773170471, "learning_rate": 2.6490988740142146e-06, "loss": 0.065, "step": 129040 }, { "epoch": 3.816466552315609, "grad_norm": 1.0244160890579224, "learning_rate": 2.648972184095275e-06, "loss": 0.0644, "step": 129050 }, { "epoch": 3.816762287809783, "grad_norm": 0.63130784034729, "learning_rate": 2.648845494176336e-06, "loss": 0.0658, "step": 129060 }, { "epoch": 3.817058023303957, "grad_norm": 0.6503874063491821, "learning_rate": 2.6487188042573966e-06, "loss": 0.0675, "step": 129070 }, { "epoch": 3.8173537587981308, "grad_norm": 0.7893216609954834, "learning_rate": 2.6485921143384573e-06, "loss": 0.067, "step": 129080 }, { "epoch": 3.817649494292305, "grad_norm": 0.6936733722686768, "learning_rate": 2.6484654244195177e-06, "loss": 0.051, "step": 129090 }, { "epoch": 3.817945229786479, "grad_norm": 0.8636159896850586, "learning_rate": 2.6483387345005785e-06, "loss": 0.0741, "step": 129100 }, { "epoch": 3.818240965280653, "grad_norm": 0.7680098414421082, "learning_rate": 2.648212044581639e-06, "loss": 0.0863, "step": 129110 }, { "epoch": 3.818536700774827, "grad_norm": 0.9693307876586914, "learning_rate": 2.6480853546626997e-06, "loss": 0.0588, "step": 129120 }, { "epoch": 3.818832436269001, "grad_norm": 0.6579539775848389, "learning_rate": 2.64795866474376e-06, "loss": 0.0911, "step": 129130 }, { "epoch": 3.819128171763175, "grad_norm": 0.7738545536994934, "learning_rate": 2.6478319748248212e-06, "loss": 0.0525, "step": 129140 }, { "epoch": 3.819423907257349, "grad_norm": 0.9339158535003662, "learning_rate": 2.6477052849058816e-06, "loss": 0.0824, "step": 129150 }, { "epoch": 3.819719642751523, "grad_norm": 0.8419684171676636, "learning_rate": 2.6475785949869424e-06, "loss": 0.0678, "step": 129160 }, { "epoch": 3.820015378245697, "grad_norm": 1.3874894380569458, "learning_rate": 2.6474519050680028e-06, "loss": 0.0848, "step": 129170 }, { "epoch": 3.820311113739871, "grad_norm": 0.5843542814254761, "learning_rate": 2.6473252151490635e-06, "loss": 0.0534, "step": 129180 }, { "epoch": 3.820606849234045, "grad_norm": 0.6635259985923767, "learning_rate": 2.647198525230124e-06, "loss": 0.0528, "step": 129190 }, { "epoch": 3.820902584728219, "grad_norm": 0.6078441143035889, "learning_rate": 2.6470718353111847e-06, "loss": 0.0725, "step": 129200 }, { "epoch": 3.8211983202223934, "grad_norm": 0.5421531796455383, "learning_rate": 2.646945145392245e-06, "loss": 0.0659, "step": 129210 }, { "epoch": 3.821494055716567, "grad_norm": 0.49061867594718933, "learning_rate": 2.646818455473306e-06, "loss": 0.077, "step": 129220 }, { "epoch": 3.8217897912107412, "grad_norm": 0.8582271337509155, "learning_rate": 2.6466917655543666e-06, "loss": 0.0722, "step": 129230 }, { "epoch": 3.822085526704915, "grad_norm": 0.4894953668117523, "learning_rate": 2.646565075635427e-06, "loss": 0.0561, "step": 129240 }, { "epoch": 3.822381262199089, "grad_norm": 1.1467344760894775, "learning_rate": 2.646438385716488e-06, "loss": 0.0845, "step": 129250 }, { "epoch": 3.822676997693263, "grad_norm": 1.7608710527420044, "learning_rate": 2.646311695797548e-06, "loss": 0.087, "step": 129260 }, { "epoch": 3.822972733187437, "grad_norm": 0.5565053224563599, "learning_rate": 2.646185005878609e-06, "loss": 0.0651, "step": 129270 }, { "epoch": 3.8232684686816114, "grad_norm": 0.8653407096862793, "learning_rate": 2.6460583159596693e-06, "loss": 0.0744, "step": 129280 }, { "epoch": 3.823564204175785, "grad_norm": 0.6305488348007202, "learning_rate": 2.64593162604073e-06, "loss": 0.062, "step": 129290 }, { "epoch": 3.8238599396699593, "grad_norm": 0.7523651719093323, "learning_rate": 2.645804936121791e-06, "loss": 0.069, "step": 129300 }, { "epoch": 3.8241556751641332, "grad_norm": 0.8754561543464661, "learning_rate": 2.6456782462028517e-06, "loss": 0.0714, "step": 129310 }, { "epoch": 3.824451410658307, "grad_norm": 0.6125627160072327, "learning_rate": 2.645551556283912e-06, "loss": 0.0697, "step": 129320 }, { "epoch": 3.824747146152481, "grad_norm": 1.1618880033493042, "learning_rate": 2.645424866364973e-06, "loss": 0.0839, "step": 129330 }, { "epoch": 3.825042881646655, "grad_norm": 0.7362832427024841, "learning_rate": 2.645298176446033e-06, "loss": 0.0757, "step": 129340 }, { "epoch": 3.8253386171408295, "grad_norm": 0.665908932685852, "learning_rate": 2.645171486527094e-06, "loss": 0.0832, "step": 129350 }, { "epoch": 3.8256343526350034, "grad_norm": 0.6520746350288391, "learning_rate": 2.6450447966081544e-06, "loss": 0.0826, "step": 129360 }, { "epoch": 3.8259300881291773, "grad_norm": 1.2803641557693481, "learning_rate": 2.644918106689215e-06, "loss": 0.082, "step": 129370 }, { "epoch": 3.8262258236233513, "grad_norm": 0.5707926154136658, "learning_rate": 2.644791416770276e-06, "loss": 0.0929, "step": 129380 }, { "epoch": 3.8265215591175252, "grad_norm": 1.2082446813583374, "learning_rate": 2.6446647268513367e-06, "loss": 0.0643, "step": 129390 }, { "epoch": 3.826817294611699, "grad_norm": 0.9007300138473511, "learning_rate": 2.644538036932397e-06, "loss": 0.0944, "step": 129400 }, { "epoch": 3.827113030105873, "grad_norm": 0.7726150751113892, "learning_rate": 2.644411347013458e-06, "loss": 0.0849, "step": 129410 }, { "epoch": 3.8274087656000475, "grad_norm": 1.0747798681259155, "learning_rate": 2.6442846570945183e-06, "loss": 0.0836, "step": 129420 }, { "epoch": 3.8277045010942214, "grad_norm": 0.9199180006980896, "learning_rate": 2.644157967175579e-06, "loss": 0.0683, "step": 129430 }, { "epoch": 3.8280002365883954, "grad_norm": 0.9812209010124207, "learning_rate": 2.6440312772566394e-06, "loss": 0.0593, "step": 129440 }, { "epoch": 3.8282959720825693, "grad_norm": 0.800288200378418, "learning_rate": 2.6439045873377e-06, "loss": 0.083, "step": 129450 }, { "epoch": 3.8285917075767433, "grad_norm": 1.5709021091461182, "learning_rate": 2.643777897418761e-06, "loss": 0.0866, "step": 129460 }, { "epoch": 3.828887443070917, "grad_norm": 0.7931678295135498, "learning_rate": 2.6436512074998218e-06, "loss": 0.0871, "step": 129470 }, { "epoch": 3.829183178565091, "grad_norm": 0.8547167778015137, "learning_rate": 2.643524517580882e-06, "loss": 0.0781, "step": 129480 }, { "epoch": 3.8294789140592655, "grad_norm": 0.7268905639648438, "learning_rate": 2.643397827661943e-06, "loss": 0.0605, "step": 129490 }, { "epoch": 3.8297746495534395, "grad_norm": 1.147843837738037, "learning_rate": 2.6432711377430033e-06, "loss": 0.0882, "step": 129500 }, { "epoch": 3.8300703850476134, "grad_norm": 1.0815094709396362, "learning_rate": 2.643144447824064e-06, "loss": 0.0788, "step": 129510 }, { "epoch": 3.8303661205417874, "grad_norm": 0.8584821820259094, "learning_rate": 2.6430177579051245e-06, "loss": 0.0746, "step": 129520 }, { "epoch": 3.8306618560359613, "grad_norm": 1.9343749284744263, "learning_rate": 2.6428910679861852e-06, "loss": 0.0791, "step": 129530 }, { "epoch": 3.8309575915301357, "grad_norm": 0.9403412342071533, "learning_rate": 2.642764378067246e-06, "loss": 0.0516, "step": 129540 }, { "epoch": 3.831253327024309, "grad_norm": 0.5299974679946899, "learning_rate": 2.642637688148307e-06, "loss": 0.0845, "step": 129550 }, { "epoch": 3.8315490625184836, "grad_norm": 0.8064263463020325, "learning_rate": 2.642510998229367e-06, "loss": 0.0816, "step": 129560 }, { "epoch": 3.8318447980126575, "grad_norm": 0.7842507362365723, "learning_rate": 2.642384308310428e-06, "loss": 0.0694, "step": 129570 }, { "epoch": 3.8321405335068315, "grad_norm": 0.6880940198898315, "learning_rate": 2.6422576183914883e-06, "loss": 0.0761, "step": 129580 }, { "epoch": 3.8324362690010054, "grad_norm": 0.7653167247772217, "learning_rate": 2.642130928472549e-06, "loss": 0.0617, "step": 129590 }, { "epoch": 3.8327320044951794, "grad_norm": 0.7224128842353821, "learning_rate": 2.6420042385536095e-06, "loss": 0.069, "step": 129600 }, { "epoch": 3.8330277399893538, "grad_norm": 1.451820731163025, "learning_rate": 2.6418775486346703e-06, "loss": 0.064, "step": 129610 }, { "epoch": 3.8333234754835277, "grad_norm": 1.2573912143707275, "learning_rate": 2.641750858715731e-06, "loss": 0.0925, "step": 129620 }, { "epoch": 3.8336192109777016, "grad_norm": 1.0139133930206299, "learning_rate": 2.6416241687967914e-06, "loss": 0.085, "step": 129630 }, { "epoch": 3.8339149464718756, "grad_norm": 1.1045112609863281, "learning_rate": 2.6414974788778522e-06, "loss": 0.0591, "step": 129640 }, { "epoch": 3.8342106819660495, "grad_norm": 0.68144690990448, "learning_rate": 2.6413707889589126e-06, "loss": 0.0692, "step": 129650 }, { "epoch": 3.8345064174602235, "grad_norm": 0.5754064321517944, "learning_rate": 2.6412440990399734e-06, "loss": 0.0685, "step": 129660 }, { "epoch": 3.8348021529543974, "grad_norm": 0.6596006751060486, "learning_rate": 2.6411174091210338e-06, "loss": 0.0734, "step": 129670 }, { "epoch": 3.835097888448572, "grad_norm": 1.0356169939041138, "learning_rate": 2.6409907192020945e-06, "loss": 0.0571, "step": 129680 }, { "epoch": 3.8353936239427457, "grad_norm": 0.6202601194381714, "learning_rate": 2.640864029283155e-06, "loss": 0.0575, "step": 129690 }, { "epoch": 3.8356893594369197, "grad_norm": 1.1284606456756592, "learning_rate": 2.640737339364216e-06, "loss": 0.0837, "step": 129700 }, { "epoch": 3.8359850949310936, "grad_norm": 0.7374747395515442, "learning_rate": 2.6406106494452765e-06, "loss": 0.0781, "step": 129710 }, { "epoch": 3.8362808304252676, "grad_norm": 0.7498453259468079, "learning_rate": 2.6404839595263373e-06, "loss": 0.0784, "step": 129720 }, { "epoch": 3.8365765659194415, "grad_norm": 0.5356760621070862, "learning_rate": 2.6403572696073976e-06, "loss": 0.0755, "step": 129730 }, { "epoch": 3.8368723014136155, "grad_norm": 1.1411904096603394, "learning_rate": 2.6402305796884584e-06, "loss": 0.0608, "step": 129740 }, { "epoch": 3.83716803690779, "grad_norm": 0.6651732921600342, "learning_rate": 2.640103889769519e-06, "loss": 0.0904, "step": 129750 }, { "epoch": 3.837463772401964, "grad_norm": 0.7507479190826416, "learning_rate": 2.6399771998505796e-06, "loss": 0.0969, "step": 129760 }, { "epoch": 3.8377595078961377, "grad_norm": 0.4974394738674164, "learning_rate": 2.63985050993164e-06, "loss": 0.0649, "step": 129770 }, { "epoch": 3.8380552433903117, "grad_norm": 0.6894080638885498, "learning_rate": 2.639723820012701e-06, "loss": 0.0684, "step": 129780 }, { "epoch": 3.8383509788844856, "grad_norm": 1.1255630254745483, "learning_rate": 2.6395971300937615e-06, "loss": 0.0663, "step": 129790 }, { "epoch": 3.8386467143786596, "grad_norm": 0.8628680109977722, "learning_rate": 2.6394704401748223e-06, "loss": 0.0735, "step": 129800 }, { "epoch": 3.8389424498728335, "grad_norm": 0.8195766806602478, "learning_rate": 2.6393437502558827e-06, "loss": 0.0886, "step": 129810 }, { "epoch": 3.839238185367008, "grad_norm": 0.857933759689331, "learning_rate": 2.6392170603369435e-06, "loss": 0.0837, "step": 129820 }, { "epoch": 3.839533920861182, "grad_norm": 0.5614764094352722, "learning_rate": 2.639090370418004e-06, "loss": 0.0483, "step": 129830 }, { "epoch": 3.839829656355356, "grad_norm": 0.6515809893608093, "learning_rate": 2.6389636804990646e-06, "loss": 0.0522, "step": 129840 }, { "epoch": 3.8401253918495297, "grad_norm": 0.868532657623291, "learning_rate": 2.638836990580125e-06, "loss": 0.088, "step": 129850 }, { "epoch": 3.8404211273437037, "grad_norm": 1.3672877550125122, "learning_rate": 2.6387103006611862e-06, "loss": 0.0914, "step": 129860 }, { "epoch": 3.840716862837878, "grad_norm": 0.6611142754554749, "learning_rate": 2.6385836107422466e-06, "loss": 0.0679, "step": 129870 }, { "epoch": 3.8410125983320516, "grad_norm": 0.5122966766357422, "learning_rate": 2.6384569208233074e-06, "loss": 0.0729, "step": 129880 }, { "epoch": 3.841308333826226, "grad_norm": 0.36321544647216797, "learning_rate": 2.6383302309043677e-06, "loss": 0.0588, "step": 129890 }, { "epoch": 3.8416040693204, "grad_norm": 0.6974862813949585, "learning_rate": 2.6382035409854285e-06, "loss": 0.0811, "step": 129900 }, { "epoch": 3.841899804814574, "grad_norm": 1.2078903913497925, "learning_rate": 2.638076851066489e-06, "loss": 0.0717, "step": 129910 }, { "epoch": 3.8421955403087478, "grad_norm": 1.1945726871490479, "learning_rate": 2.6379501611475497e-06, "loss": 0.0812, "step": 129920 }, { "epoch": 3.8424912758029217, "grad_norm": 0.8238325119018555, "learning_rate": 2.63782347122861e-06, "loss": 0.0648, "step": 129930 }, { "epoch": 3.842787011297096, "grad_norm": 0.2822715938091278, "learning_rate": 2.6376967813096713e-06, "loss": 0.0569, "step": 129940 }, { "epoch": 3.84308274679127, "grad_norm": 0.9562029242515564, "learning_rate": 2.6375700913907316e-06, "loss": 0.0928, "step": 129950 }, { "epoch": 3.843378482285444, "grad_norm": 0.7423855066299438, "learning_rate": 2.6374434014717924e-06, "loss": 0.0653, "step": 129960 }, { "epoch": 3.843674217779618, "grad_norm": 0.6635525226593018, "learning_rate": 2.6373167115528528e-06, "loss": 0.0727, "step": 129970 }, { "epoch": 3.843969953273792, "grad_norm": 0.8016699552536011, "learning_rate": 2.6371900216339136e-06, "loss": 0.0763, "step": 129980 }, { "epoch": 3.844265688767966, "grad_norm": 0.9396792650222778, "learning_rate": 2.637063331714974e-06, "loss": 0.0625, "step": 129990 }, { "epoch": 3.8445614242621398, "grad_norm": 0.5742197632789612, "learning_rate": 2.6369366417960347e-06, "loss": 0.0728, "step": 130000 }, { "epoch": 3.844857159756314, "grad_norm": 1.067255973815918, "learning_rate": 2.636809951877095e-06, "loss": 0.0829, "step": 130010 }, { "epoch": 3.845152895250488, "grad_norm": 0.6526411771774292, "learning_rate": 2.6366832619581563e-06, "loss": 0.061, "step": 130020 }, { "epoch": 3.845448630744662, "grad_norm": 0.9912139177322388, "learning_rate": 2.6365565720392167e-06, "loss": 0.076, "step": 130030 }, { "epoch": 3.845744366238836, "grad_norm": 1.308800220489502, "learning_rate": 2.636429882120277e-06, "loss": 0.0705, "step": 130040 }, { "epoch": 3.84604010173301, "grad_norm": 0.40949735045433044, "learning_rate": 2.636303192201338e-06, "loss": 0.0634, "step": 130050 }, { "epoch": 3.846335837227184, "grad_norm": 0.9575076699256897, "learning_rate": 2.636176502282398e-06, "loss": 0.0774, "step": 130060 }, { "epoch": 3.846631572721358, "grad_norm": 0.5101721286773682, "learning_rate": 2.636049812363459e-06, "loss": 0.0703, "step": 130070 }, { "epoch": 3.846927308215532, "grad_norm": 0.3908825218677521, "learning_rate": 2.6359231224445193e-06, "loss": 0.0709, "step": 130080 }, { "epoch": 3.847223043709706, "grad_norm": 0.6745844483375549, "learning_rate": 2.63579643252558e-06, "loss": 0.062, "step": 130090 }, { "epoch": 3.84751877920388, "grad_norm": 1.2797335386276245, "learning_rate": 2.635669742606641e-06, "loss": 0.0913, "step": 130100 }, { "epoch": 3.847814514698054, "grad_norm": 0.5989786982536316, "learning_rate": 2.6355430526877017e-06, "loss": 0.0829, "step": 130110 }, { "epoch": 3.848110250192228, "grad_norm": 0.9659011363983154, "learning_rate": 2.635416362768762e-06, "loss": 0.0786, "step": 130120 }, { "epoch": 3.8484059856864024, "grad_norm": 0.7834530472755432, "learning_rate": 2.635289672849823e-06, "loss": 0.0656, "step": 130130 }, { "epoch": 3.848701721180576, "grad_norm": 0.5582263469696045, "learning_rate": 2.6351629829308832e-06, "loss": 0.0596, "step": 130140 }, { "epoch": 3.8489974566747502, "grad_norm": 1.389520287513733, "learning_rate": 2.635036293011944e-06, "loss": 0.0709, "step": 130150 }, { "epoch": 3.849293192168924, "grad_norm": 0.8598795533180237, "learning_rate": 2.6349096030930044e-06, "loss": 0.0779, "step": 130160 }, { "epoch": 3.849588927663098, "grad_norm": 1.1021769046783447, "learning_rate": 2.634782913174065e-06, "loss": 0.0737, "step": 130170 }, { "epoch": 3.849884663157272, "grad_norm": 1.2956979274749756, "learning_rate": 2.634656223255126e-06, "loss": 0.0735, "step": 130180 }, { "epoch": 3.850180398651446, "grad_norm": 0.5561503171920776, "learning_rate": 2.6345295333361868e-06, "loss": 0.0543, "step": 130190 }, { "epoch": 3.8504761341456204, "grad_norm": 0.7214230895042419, "learning_rate": 2.634402843417247e-06, "loss": 0.076, "step": 130200 }, { "epoch": 3.850771869639794, "grad_norm": 0.9996178150177002, "learning_rate": 2.634276153498308e-06, "loss": 0.0904, "step": 130210 }, { "epoch": 3.8510676051339683, "grad_norm": 0.9182019233703613, "learning_rate": 2.6341494635793683e-06, "loss": 0.0692, "step": 130220 }, { "epoch": 3.8513633406281422, "grad_norm": 0.5189116597175598, "learning_rate": 2.634022773660429e-06, "loss": 0.0617, "step": 130230 }, { "epoch": 3.851659076122316, "grad_norm": 0.5504944324493408, "learning_rate": 2.6338960837414894e-06, "loss": 0.0589, "step": 130240 }, { "epoch": 3.85195481161649, "grad_norm": 0.9019342660903931, "learning_rate": 2.6337693938225502e-06, "loss": 0.0621, "step": 130250 }, { "epoch": 3.852250547110664, "grad_norm": 1.1240671873092651, "learning_rate": 2.633642703903611e-06, "loss": 0.0746, "step": 130260 }, { "epoch": 3.8525462826048384, "grad_norm": 1.4819605350494385, "learning_rate": 2.633516013984672e-06, "loss": 0.0907, "step": 130270 }, { "epoch": 3.8528420180990124, "grad_norm": 0.8380573391914368, "learning_rate": 2.633389324065732e-06, "loss": 0.0711, "step": 130280 }, { "epoch": 3.8531377535931863, "grad_norm": 0.7734581232070923, "learning_rate": 2.633262634146793e-06, "loss": 0.066, "step": 130290 }, { "epoch": 3.8534334890873603, "grad_norm": 0.6357418894767761, "learning_rate": 2.6331359442278533e-06, "loss": 0.0665, "step": 130300 }, { "epoch": 3.853729224581534, "grad_norm": 0.7428213953971863, "learning_rate": 2.633009254308914e-06, "loss": 0.0743, "step": 130310 }, { "epoch": 3.854024960075708, "grad_norm": 0.7073659896850586, "learning_rate": 2.6328825643899745e-06, "loss": 0.0542, "step": 130320 }, { "epoch": 3.854320695569882, "grad_norm": 1.0660275220870972, "learning_rate": 2.6327558744710353e-06, "loss": 0.0716, "step": 130330 }, { "epoch": 3.8546164310640565, "grad_norm": 0.9381737112998962, "learning_rate": 2.632629184552096e-06, "loss": 0.0668, "step": 130340 }, { "epoch": 3.8549121665582304, "grad_norm": 1.0167566537857056, "learning_rate": 2.632502494633157e-06, "loss": 0.0801, "step": 130350 }, { "epoch": 3.8552079020524044, "grad_norm": 1.0839264392852783, "learning_rate": 2.6323758047142172e-06, "loss": 0.0767, "step": 130360 }, { "epoch": 3.8555036375465783, "grad_norm": 0.7873496413230896, "learning_rate": 2.632249114795278e-06, "loss": 0.0729, "step": 130370 }, { "epoch": 3.8557993730407523, "grad_norm": 1.1999093294143677, "learning_rate": 2.6321224248763384e-06, "loss": 0.0665, "step": 130380 }, { "epoch": 3.856095108534926, "grad_norm": 0.5356882214546204, "learning_rate": 2.631995734957399e-06, "loss": 0.0642, "step": 130390 }, { "epoch": 3.8563908440291, "grad_norm": 0.9913585782051086, "learning_rate": 2.6318690450384595e-06, "loss": 0.0815, "step": 130400 }, { "epoch": 3.8566865795232745, "grad_norm": 1.0596749782562256, "learning_rate": 2.6317423551195203e-06, "loss": 0.0777, "step": 130410 }, { "epoch": 3.8569823150174485, "grad_norm": 0.7515676021575928, "learning_rate": 2.631615665200581e-06, "loss": 0.0776, "step": 130420 }, { "epoch": 3.8572780505116224, "grad_norm": 0.7252320647239685, "learning_rate": 2.631488975281642e-06, "loss": 0.0735, "step": 130430 }, { "epoch": 3.8575737860057964, "grad_norm": 0.651904284954071, "learning_rate": 2.6313622853627023e-06, "loss": 0.0548, "step": 130440 }, { "epoch": 3.8578695214999703, "grad_norm": 0.8843218088150024, "learning_rate": 2.6312355954437626e-06, "loss": 0.0848, "step": 130450 }, { "epoch": 3.8581652569941447, "grad_norm": 0.6243205666542053, "learning_rate": 2.6311089055248234e-06, "loss": 0.0802, "step": 130460 }, { "epoch": 3.858460992488318, "grad_norm": 1.1898131370544434, "learning_rate": 2.6309822156058838e-06, "loss": 0.0716, "step": 130470 }, { "epoch": 3.8587567279824926, "grad_norm": 1.2863863706588745, "learning_rate": 2.6308555256869446e-06, "loss": 0.0783, "step": 130480 }, { "epoch": 3.8590524634766665, "grad_norm": 0.9079420566558838, "learning_rate": 2.630728835768005e-06, "loss": 0.0705, "step": 130490 }, { "epoch": 3.8593481989708405, "grad_norm": 0.7321657538414001, "learning_rate": 2.630602145849066e-06, "loss": 0.0716, "step": 130500 }, { "epoch": 3.8596439344650144, "grad_norm": 0.9023249745368958, "learning_rate": 2.6304754559301265e-06, "loss": 0.0791, "step": 130510 }, { "epoch": 3.8599396699591884, "grad_norm": 1.196268916130066, "learning_rate": 2.6303487660111873e-06, "loss": 0.0734, "step": 130520 }, { "epoch": 3.8602354054533627, "grad_norm": 0.6767809391021729, "learning_rate": 2.6302220760922477e-06, "loss": 0.0621, "step": 130530 }, { "epoch": 3.8605311409475367, "grad_norm": 0.5007191300392151, "learning_rate": 2.6300953861733085e-06, "loss": 0.0641, "step": 130540 }, { "epoch": 3.8608268764417106, "grad_norm": 0.8881307244300842, "learning_rate": 2.629968696254369e-06, "loss": 0.0932, "step": 130550 }, { "epoch": 3.8611226119358846, "grad_norm": 0.7050532102584839, "learning_rate": 2.6298420063354296e-06, "loss": 0.069, "step": 130560 }, { "epoch": 3.8614183474300585, "grad_norm": 0.658721923828125, "learning_rate": 2.62971531641649e-06, "loss": 0.0763, "step": 130570 }, { "epoch": 3.8617140829242325, "grad_norm": 0.65420001745224, "learning_rate": 2.629588626497551e-06, "loss": 0.0785, "step": 130580 }, { "epoch": 3.8620098184184064, "grad_norm": 1.5111554861068726, "learning_rate": 2.6294619365786116e-06, "loss": 0.0745, "step": 130590 }, { "epoch": 3.862305553912581, "grad_norm": 0.6338340640068054, "learning_rate": 2.6293352466596724e-06, "loss": 0.0798, "step": 130600 }, { "epoch": 3.8626012894067547, "grad_norm": 0.8082738518714905, "learning_rate": 2.6292085567407327e-06, "loss": 0.0835, "step": 130610 }, { "epoch": 3.8628970249009287, "grad_norm": 0.5699964165687561, "learning_rate": 2.6290818668217935e-06, "loss": 0.0776, "step": 130620 }, { "epoch": 3.8631927603951026, "grad_norm": 1.3335092067718506, "learning_rate": 2.628955176902854e-06, "loss": 0.0692, "step": 130630 }, { "epoch": 3.8634884958892766, "grad_norm": 1.2783485651016235, "learning_rate": 2.6288284869839147e-06, "loss": 0.082, "step": 130640 }, { "epoch": 3.8637842313834505, "grad_norm": 0.6144974231719971, "learning_rate": 2.628701797064975e-06, "loss": 0.0868, "step": 130650 }, { "epoch": 3.8640799668776245, "grad_norm": 1.055112600326538, "learning_rate": 2.6285751071460362e-06, "loss": 0.0901, "step": 130660 }, { "epoch": 3.864375702371799, "grad_norm": 0.5910940170288086, "learning_rate": 2.6284484172270966e-06, "loss": 0.0739, "step": 130670 }, { "epoch": 3.864671437865973, "grad_norm": 0.7015979886054993, "learning_rate": 2.6283217273081574e-06, "loss": 0.0746, "step": 130680 }, { "epoch": 3.8649671733601467, "grad_norm": 0.7228469848632812, "learning_rate": 2.6281950373892178e-06, "loss": 0.0705, "step": 130690 }, { "epoch": 3.8652629088543207, "grad_norm": 0.7183685302734375, "learning_rate": 2.6280683474702786e-06, "loss": 0.1024, "step": 130700 }, { "epoch": 3.8655586443484946, "grad_norm": 1.1081113815307617, "learning_rate": 2.627941657551339e-06, "loss": 0.0942, "step": 130710 }, { "epoch": 3.8658543798426686, "grad_norm": 0.62119460105896, "learning_rate": 2.6278149676323997e-06, "loss": 0.0715, "step": 130720 }, { "epoch": 3.8661501153368425, "grad_norm": 0.7327693104743958, "learning_rate": 2.62768827771346e-06, "loss": 0.064, "step": 130730 }, { "epoch": 3.866445850831017, "grad_norm": 0.949122965335846, "learning_rate": 2.6275615877945213e-06, "loss": 0.0633, "step": 130740 }, { "epoch": 3.866741586325191, "grad_norm": 0.5055491924285889, "learning_rate": 2.6274348978755817e-06, "loss": 0.0692, "step": 130750 }, { "epoch": 3.8670373218193648, "grad_norm": 0.9013646841049194, "learning_rate": 2.6273082079566424e-06, "loss": 0.0731, "step": 130760 }, { "epoch": 3.8673330573135387, "grad_norm": 0.4561381936073303, "learning_rate": 2.627181518037703e-06, "loss": 0.0794, "step": 130770 }, { "epoch": 3.8676287928077127, "grad_norm": 0.6828781962394714, "learning_rate": 2.6270548281187636e-06, "loss": 0.0588, "step": 130780 }, { "epoch": 3.867924528301887, "grad_norm": 0.4082765281200409, "learning_rate": 2.626928138199824e-06, "loss": 0.0556, "step": 130790 }, { "epoch": 3.8682202637960605, "grad_norm": 1.0007884502410889, "learning_rate": 2.6268014482808848e-06, "loss": 0.0838, "step": 130800 }, { "epoch": 3.868515999290235, "grad_norm": 0.6720011830329895, "learning_rate": 2.626674758361945e-06, "loss": 0.0613, "step": 130810 }, { "epoch": 3.868811734784409, "grad_norm": 0.9978810548782349, "learning_rate": 2.6265480684430063e-06, "loss": 0.0729, "step": 130820 }, { "epoch": 3.869107470278583, "grad_norm": 1.1042616367340088, "learning_rate": 2.6264213785240667e-06, "loss": 0.0715, "step": 130830 }, { "epoch": 3.8694032057727568, "grad_norm": 1.105804681777954, "learning_rate": 2.6262946886051275e-06, "loss": 0.0724, "step": 130840 }, { "epoch": 3.8696989412669307, "grad_norm": 1.0722930431365967, "learning_rate": 2.626167998686188e-06, "loss": 0.0886, "step": 130850 }, { "epoch": 3.869994676761105, "grad_norm": 0.6891552805900574, "learning_rate": 2.6260413087672482e-06, "loss": 0.0933, "step": 130860 }, { "epoch": 3.870290412255279, "grad_norm": 0.5771830677986145, "learning_rate": 2.625914618848309e-06, "loss": 0.0827, "step": 130870 }, { "epoch": 3.870586147749453, "grad_norm": 0.7103292346000671, "learning_rate": 2.6257879289293694e-06, "loss": 0.0673, "step": 130880 }, { "epoch": 3.870881883243627, "grad_norm": 1.6724241971969604, "learning_rate": 2.62566123901043e-06, "loss": 0.0642, "step": 130890 }, { "epoch": 3.871177618737801, "grad_norm": 0.8574778437614441, "learning_rate": 2.625534549091491e-06, "loss": 0.0702, "step": 130900 }, { "epoch": 3.871473354231975, "grad_norm": 0.881538450717926, "learning_rate": 2.6254078591725517e-06, "loss": 0.0933, "step": 130910 }, { "epoch": 3.8717690897261487, "grad_norm": 0.9119794368743896, "learning_rate": 2.625281169253612e-06, "loss": 0.0743, "step": 130920 }, { "epoch": 3.872064825220323, "grad_norm": 0.3225070536136627, "learning_rate": 2.625154479334673e-06, "loss": 0.0642, "step": 130930 }, { "epoch": 3.872360560714497, "grad_norm": 0.6768196821212769, "learning_rate": 2.6250277894157333e-06, "loss": 0.0705, "step": 130940 }, { "epoch": 3.872656296208671, "grad_norm": 0.948972761631012, "learning_rate": 2.624901099496794e-06, "loss": 0.0705, "step": 130950 }, { "epoch": 3.872952031702845, "grad_norm": 1.8851195573806763, "learning_rate": 2.6247744095778544e-06, "loss": 0.0826, "step": 130960 }, { "epoch": 3.873247767197019, "grad_norm": 0.895852267742157, "learning_rate": 2.6246477196589152e-06, "loss": 0.089, "step": 130970 }, { "epoch": 3.873543502691193, "grad_norm": 0.34721988439559937, "learning_rate": 2.624521029739976e-06, "loss": 0.0622, "step": 130980 }, { "epoch": 3.873839238185367, "grad_norm": 0.6486652493476868, "learning_rate": 2.624394339821037e-06, "loss": 0.0651, "step": 130990 }, { "epoch": 3.874134973679541, "grad_norm": 0.8000043630599976, "learning_rate": 2.624267649902097e-06, "loss": 0.0699, "step": 131000 }, { "epoch": 3.874430709173715, "grad_norm": 0.3580518066883087, "learning_rate": 2.624140959983158e-06, "loss": 0.0712, "step": 131010 }, { "epoch": 3.874726444667889, "grad_norm": 1.143929362297058, "learning_rate": 2.6240142700642183e-06, "loss": 0.0661, "step": 131020 }, { "epoch": 3.875022180162063, "grad_norm": 0.6738064289093018, "learning_rate": 2.623887580145279e-06, "loss": 0.0723, "step": 131030 }, { "epoch": 3.875317915656237, "grad_norm": 0.7101559042930603, "learning_rate": 2.6237608902263395e-06, "loss": 0.0523, "step": 131040 }, { "epoch": 3.8756136511504113, "grad_norm": 0.9418679475784302, "learning_rate": 2.6236342003074003e-06, "loss": 0.084, "step": 131050 }, { "epoch": 3.875909386644585, "grad_norm": 0.9124466776847839, "learning_rate": 2.623507510388461e-06, "loss": 0.0856, "step": 131060 }, { "epoch": 3.8762051221387592, "grad_norm": 1.1452391147613525, "learning_rate": 2.623380820469522e-06, "loss": 0.0756, "step": 131070 }, { "epoch": 3.876500857632933, "grad_norm": 0.6988884806632996, "learning_rate": 2.623254130550582e-06, "loss": 0.0738, "step": 131080 }, { "epoch": 3.876796593127107, "grad_norm": 0.6349477171897888, "learning_rate": 2.623127440631643e-06, "loss": 0.0473, "step": 131090 }, { "epoch": 3.877092328621281, "grad_norm": 1.1029092073440552, "learning_rate": 2.6230007507127034e-06, "loss": 0.0858, "step": 131100 }, { "epoch": 3.877388064115455, "grad_norm": 0.688187301158905, "learning_rate": 2.622874060793764e-06, "loss": 0.0683, "step": 131110 }, { "epoch": 3.8776837996096294, "grad_norm": 0.9567621350288391, "learning_rate": 2.6227473708748245e-06, "loss": 0.072, "step": 131120 }, { "epoch": 3.877979535103803, "grad_norm": 0.8328858613967896, "learning_rate": 2.6226206809558853e-06, "loss": 0.0589, "step": 131130 }, { "epoch": 3.8782752705979773, "grad_norm": 1.0988212823867798, "learning_rate": 2.622493991036946e-06, "loss": 0.0599, "step": 131140 }, { "epoch": 3.878571006092151, "grad_norm": 0.9830378890037537, "learning_rate": 2.622367301118007e-06, "loss": 0.0981, "step": 131150 }, { "epoch": 3.878866741586325, "grad_norm": 0.7259227633476257, "learning_rate": 2.6222406111990672e-06, "loss": 0.0767, "step": 131160 }, { "epoch": 3.879162477080499, "grad_norm": 0.7557498216629028, "learning_rate": 2.622113921280128e-06, "loss": 0.0837, "step": 131170 }, { "epoch": 3.879458212574673, "grad_norm": 0.8230693340301514, "learning_rate": 2.6219872313611884e-06, "loss": 0.0754, "step": 131180 }, { "epoch": 3.8797539480688474, "grad_norm": 0.5946642756462097, "learning_rate": 2.621860541442249e-06, "loss": 0.0509, "step": 131190 }, { "epoch": 3.8800496835630214, "grad_norm": 0.9442870020866394, "learning_rate": 2.6217338515233096e-06, "loss": 0.0737, "step": 131200 }, { "epoch": 3.8803454190571953, "grad_norm": 0.37601786851882935, "learning_rate": 2.6216071616043703e-06, "loss": 0.0682, "step": 131210 }, { "epoch": 3.8806411545513693, "grad_norm": 0.6440948843955994, "learning_rate": 2.621480471685431e-06, "loss": 0.0659, "step": 131220 }, { "epoch": 3.880936890045543, "grad_norm": 0.7196648120880127, "learning_rate": 2.621353781766492e-06, "loss": 0.0811, "step": 131230 }, { "epoch": 3.881232625539717, "grad_norm": 1.38206148147583, "learning_rate": 2.6212270918475523e-06, "loss": 0.0657, "step": 131240 }, { "epoch": 3.881528361033891, "grad_norm": 0.7904144525527954, "learning_rate": 2.621100401928613e-06, "loss": 0.0558, "step": 131250 }, { "epoch": 3.8818240965280655, "grad_norm": 0.782376229763031, "learning_rate": 2.6209737120096734e-06, "loss": 0.0673, "step": 131260 }, { "epoch": 3.8821198320222394, "grad_norm": 0.75171959400177, "learning_rate": 2.6208470220907342e-06, "loss": 0.0773, "step": 131270 }, { "epoch": 3.8824155675164134, "grad_norm": 1.085294485092163, "learning_rate": 2.6207203321717946e-06, "loss": 0.0827, "step": 131280 }, { "epoch": 3.8827113030105873, "grad_norm": 1.1430655717849731, "learning_rate": 2.620593642252855e-06, "loss": 0.0555, "step": 131290 }, { "epoch": 3.8830070385047613, "grad_norm": 1.06013023853302, "learning_rate": 2.620466952333916e-06, "loss": 0.0741, "step": 131300 }, { "epoch": 3.883302773998935, "grad_norm": 1.0143526792526245, "learning_rate": 2.6203402624149765e-06, "loss": 0.0888, "step": 131310 }, { "epoch": 3.883598509493109, "grad_norm": 1.2209789752960205, "learning_rate": 2.6202135724960373e-06, "loss": 0.0823, "step": 131320 }, { "epoch": 3.8838942449872835, "grad_norm": 0.6513954997062683, "learning_rate": 2.6200868825770977e-06, "loss": 0.0646, "step": 131330 }, { "epoch": 3.8841899804814575, "grad_norm": 0.5386901497840881, "learning_rate": 2.6199601926581585e-06, "loss": 0.0691, "step": 131340 }, { "epoch": 3.8844857159756314, "grad_norm": 0.9926748871803284, "learning_rate": 2.619833502739219e-06, "loss": 0.0743, "step": 131350 }, { "epoch": 3.8847814514698054, "grad_norm": 0.9891919493675232, "learning_rate": 2.6197068128202796e-06, "loss": 0.0733, "step": 131360 }, { "epoch": 3.8850771869639793, "grad_norm": 0.9979923367500305, "learning_rate": 2.61958012290134e-06, "loss": 0.081, "step": 131370 }, { "epoch": 3.8853729224581537, "grad_norm": 0.736832320690155, "learning_rate": 2.6194534329824012e-06, "loss": 0.0733, "step": 131380 }, { "epoch": 3.885668657952327, "grad_norm": 0.779321551322937, "learning_rate": 2.6193267430634616e-06, "loss": 0.0776, "step": 131390 }, { "epoch": 3.8859643934465016, "grad_norm": 1.119928240776062, "learning_rate": 2.6192000531445224e-06, "loss": 0.0888, "step": 131400 }, { "epoch": 3.8862601289406755, "grad_norm": 0.730198323726654, "learning_rate": 2.6190733632255828e-06, "loss": 0.0841, "step": 131410 }, { "epoch": 3.8865558644348495, "grad_norm": 0.9316238760948181, "learning_rate": 2.6189466733066435e-06, "loss": 0.0883, "step": 131420 }, { "epoch": 3.8868515999290234, "grad_norm": 0.6335093379020691, "learning_rate": 2.618819983387704e-06, "loss": 0.067, "step": 131430 }, { "epoch": 3.8871473354231973, "grad_norm": 0.9557408094406128, "learning_rate": 2.6186932934687647e-06, "loss": 0.0625, "step": 131440 }, { "epoch": 3.8874430709173717, "grad_norm": 0.9091939926147461, "learning_rate": 2.618566603549825e-06, "loss": 0.083, "step": 131450 }, { "epoch": 3.8877388064115457, "grad_norm": 0.8427996039390564, "learning_rate": 2.6184399136308863e-06, "loss": 0.0737, "step": 131460 }, { "epoch": 3.8880345419057196, "grad_norm": 0.6461329460144043, "learning_rate": 2.6183132237119466e-06, "loss": 0.0713, "step": 131470 }, { "epoch": 3.8883302773998936, "grad_norm": 0.6628183722496033, "learning_rate": 2.6181865337930074e-06, "loss": 0.0588, "step": 131480 }, { "epoch": 3.8886260128940675, "grad_norm": 1.0361957550048828, "learning_rate": 2.618059843874068e-06, "loss": 0.0609, "step": 131490 }, { "epoch": 3.8889217483882415, "grad_norm": 0.7969825267791748, "learning_rate": 2.6179331539551286e-06, "loss": 0.076, "step": 131500 }, { "epoch": 3.8892174838824154, "grad_norm": 1.6760034561157227, "learning_rate": 2.617806464036189e-06, "loss": 0.0736, "step": 131510 }, { "epoch": 3.88951321937659, "grad_norm": 0.7798526883125305, "learning_rate": 2.6176797741172497e-06, "loss": 0.0697, "step": 131520 }, { "epoch": 3.8898089548707637, "grad_norm": 0.7058160305023193, "learning_rate": 2.61755308419831e-06, "loss": 0.0816, "step": 131530 }, { "epoch": 3.8901046903649377, "grad_norm": 1.098135232925415, "learning_rate": 2.6174263942793713e-06, "loss": 0.0684, "step": 131540 }, { "epoch": 3.8904004258591116, "grad_norm": 1.1884719133377075, "learning_rate": 2.6172997043604317e-06, "loss": 0.0809, "step": 131550 }, { "epoch": 3.8906961613532856, "grad_norm": 1.148712158203125, "learning_rate": 2.6171730144414925e-06, "loss": 0.0868, "step": 131560 }, { "epoch": 3.8909918968474595, "grad_norm": 0.7794604301452637, "learning_rate": 2.617046324522553e-06, "loss": 0.0854, "step": 131570 }, { "epoch": 3.8912876323416334, "grad_norm": 1.3217886686325073, "learning_rate": 2.6169196346036136e-06, "loss": 0.0728, "step": 131580 }, { "epoch": 3.891583367835808, "grad_norm": 1.302949070930481, "learning_rate": 2.616792944684674e-06, "loss": 0.0767, "step": 131590 }, { "epoch": 3.8918791033299818, "grad_norm": 0.9980552196502686, "learning_rate": 2.6166662547657348e-06, "loss": 0.0715, "step": 131600 }, { "epoch": 3.8921748388241557, "grad_norm": 0.8697496056556702, "learning_rate": 2.616539564846795e-06, "loss": 0.0873, "step": 131610 }, { "epoch": 3.8924705743183297, "grad_norm": 1.1834062337875366, "learning_rate": 2.6164128749278564e-06, "loss": 0.0844, "step": 131620 }, { "epoch": 3.8927663098125036, "grad_norm": 0.6713430285453796, "learning_rate": 2.6162861850089167e-06, "loss": 0.0766, "step": 131630 }, { "epoch": 3.8930620453066775, "grad_norm": 0.7575658559799194, "learning_rate": 2.6161594950899775e-06, "loss": 0.0667, "step": 131640 }, { "epoch": 3.8933577808008515, "grad_norm": 0.895426869392395, "learning_rate": 2.616032805171038e-06, "loss": 0.0846, "step": 131650 }, { "epoch": 3.893653516295026, "grad_norm": 0.7404404282569885, "learning_rate": 2.6159061152520987e-06, "loss": 0.0816, "step": 131660 }, { "epoch": 3.8939492517892, "grad_norm": 1.3305636644363403, "learning_rate": 2.615779425333159e-06, "loss": 0.0885, "step": 131670 }, { "epoch": 3.8942449872833738, "grad_norm": 0.44454070925712585, "learning_rate": 2.61565273541422e-06, "loss": 0.0736, "step": 131680 }, { "epoch": 3.8945407227775477, "grad_norm": 0.8345876336097717, "learning_rate": 2.61552604549528e-06, "loss": 0.0636, "step": 131690 }, { "epoch": 3.8948364582717216, "grad_norm": 0.7123910784721375, "learning_rate": 2.615399355576341e-06, "loss": 0.073, "step": 131700 }, { "epoch": 3.895132193765896, "grad_norm": 0.7724895477294922, "learning_rate": 2.6152726656574018e-06, "loss": 0.0824, "step": 131710 }, { "epoch": 3.8954279292600695, "grad_norm": 0.8052807450294495, "learning_rate": 2.615145975738462e-06, "loss": 0.081, "step": 131720 }, { "epoch": 3.895723664754244, "grad_norm": 0.8759538531303406, "learning_rate": 2.615019285819523e-06, "loss": 0.0769, "step": 131730 }, { "epoch": 3.896019400248418, "grad_norm": 0.5271346569061279, "learning_rate": 2.6148925959005833e-06, "loss": 0.0529, "step": 131740 }, { "epoch": 3.896315135742592, "grad_norm": 1.136751651763916, "learning_rate": 2.614765905981644e-06, "loss": 0.0702, "step": 131750 }, { "epoch": 3.8966108712367657, "grad_norm": 0.7237411737442017, "learning_rate": 2.6146392160627045e-06, "loss": 0.085, "step": 131760 }, { "epoch": 3.8969066067309397, "grad_norm": 0.833898663520813, "learning_rate": 2.6145125261437652e-06, "loss": 0.0655, "step": 131770 }, { "epoch": 3.897202342225114, "grad_norm": 1.2016067504882812, "learning_rate": 2.614385836224826e-06, "loss": 0.0802, "step": 131780 }, { "epoch": 3.897498077719288, "grad_norm": 0.9821657538414001, "learning_rate": 2.614259146305887e-06, "loss": 0.0551, "step": 131790 }, { "epoch": 3.897793813213462, "grad_norm": 0.6367598176002502, "learning_rate": 2.614132456386947e-06, "loss": 0.0707, "step": 131800 }, { "epoch": 3.898089548707636, "grad_norm": 0.8651772737503052, "learning_rate": 2.614005766468008e-06, "loss": 0.0669, "step": 131810 }, { "epoch": 3.89838528420181, "grad_norm": 0.7449782490730286, "learning_rate": 2.6138790765490683e-06, "loss": 0.0734, "step": 131820 }, { "epoch": 3.898681019695984, "grad_norm": 1.0734527111053467, "learning_rate": 2.613752386630129e-06, "loss": 0.0633, "step": 131830 }, { "epoch": 3.8989767551901577, "grad_norm": 0.6701083183288574, "learning_rate": 2.6136256967111895e-06, "loss": 0.0663, "step": 131840 }, { "epoch": 3.899272490684332, "grad_norm": 0.6542691588401794, "learning_rate": 2.6134990067922503e-06, "loss": 0.0628, "step": 131850 }, { "epoch": 3.899568226178506, "grad_norm": 0.6564834713935852, "learning_rate": 2.613372316873311e-06, "loss": 0.0831, "step": 131860 }, { "epoch": 3.89986396167268, "grad_norm": 0.7615820169448853, "learning_rate": 2.613245626954372e-06, "loss": 0.0812, "step": 131870 }, { "epoch": 3.900159697166854, "grad_norm": 2.07539439201355, "learning_rate": 2.6131189370354322e-06, "loss": 0.0717, "step": 131880 }, { "epoch": 3.900455432661028, "grad_norm": 0.92399662733078, "learning_rate": 2.612992247116493e-06, "loss": 0.0597, "step": 131890 }, { "epoch": 3.900751168155202, "grad_norm": 0.7898693680763245, "learning_rate": 2.6128655571975534e-06, "loss": 0.0773, "step": 131900 }, { "epoch": 3.901046903649376, "grad_norm": 0.8102545738220215, "learning_rate": 2.612738867278614e-06, "loss": 0.0715, "step": 131910 }, { "epoch": 3.90134263914355, "grad_norm": 0.7610223889350891, "learning_rate": 2.6126121773596745e-06, "loss": 0.0745, "step": 131920 }, { "epoch": 3.901638374637724, "grad_norm": 0.5712294578552246, "learning_rate": 2.6124854874407353e-06, "loss": 0.073, "step": 131930 }, { "epoch": 3.901934110131898, "grad_norm": 0.9684894680976868, "learning_rate": 2.612358797521796e-06, "loss": 0.078, "step": 131940 }, { "epoch": 3.902229845626072, "grad_norm": 0.8808975219726562, "learning_rate": 2.612232107602857e-06, "loss": 0.0808, "step": 131950 }, { "epoch": 3.902525581120246, "grad_norm": 0.5316197276115417, "learning_rate": 2.6121054176839173e-06, "loss": 0.0851, "step": 131960 }, { "epoch": 3.9028213166144203, "grad_norm": 0.5461671948432922, "learning_rate": 2.611978727764978e-06, "loss": 0.0872, "step": 131970 }, { "epoch": 3.903117052108594, "grad_norm": 1.1091686487197876, "learning_rate": 2.6118520378460384e-06, "loss": 0.0758, "step": 131980 }, { "epoch": 3.903412787602768, "grad_norm": 1.100664496421814, "learning_rate": 2.6117253479270992e-06, "loss": 0.0452, "step": 131990 }, { "epoch": 3.903708523096942, "grad_norm": 0.8273984789848328, "learning_rate": 2.6115986580081596e-06, "loss": 0.0768, "step": 132000 }, { "epoch": 3.904004258591116, "grad_norm": 1.593147873878479, "learning_rate": 2.6114719680892204e-06, "loss": 0.0825, "step": 132010 }, { "epoch": 3.90429999408529, "grad_norm": 0.701409637928009, "learning_rate": 2.611345278170281e-06, "loss": 0.0745, "step": 132020 }, { "epoch": 3.904595729579464, "grad_norm": 0.7959363460540771, "learning_rate": 2.611218588251342e-06, "loss": 0.0624, "step": 132030 }, { "epoch": 3.9048914650736384, "grad_norm": 1.094549536705017, "learning_rate": 2.6110918983324023e-06, "loss": 0.062, "step": 132040 }, { "epoch": 3.905187200567812, "grad_norm": 0.7746700644493103, "learning_rate": 2.610965208413463e-06, "loss": 0.0739, "step": 132050 }, { "epoch": 3.9054829360619863, "grad_norm": 0.6927776336669922, "learning_rate": 2.6108385184945235e-06, "loss": 0.0738, "step": 132060 }, { "epoch": 3.90577867155616, "grad_norm": 0.9031542539596558, "learning_rate": 2.6107118285755843e-06, "loss": 0.0751, "step": 132070 }, { "epoch": 3.906074407050334, "grad_norm": 0.6882684230804443, "learning_rate": 2.6105851386566446e-06, "loss": 0.0674, "step": 132080 }, { "epoch": 3.906370142544508, "grad_norm": 0.5233885645866394, "learning_rate": 2.6104584487377054e-06, "loss": 0.0658, "step": 132090 }, { "epoch": 3.906665878038682, "grad_norm": 0.9587896466255188, "learning_rate": 2.6103317588187662e-06, "loss": 0.0637, "step": 132100 }, { "epoch": 3.9069616135328564, "grad_norm": 1.2979522943496704, "learning_rate": 2.6102050688998266e-06, "loss": 0.0781, "step": 132110 }, { "epoch": 3.9072573490270304, "grad_norm": 0.6830717325210571, "learning_rate": 2.6100783789808874e-06, "loss": 0.0773, "step": 132120 }, { "epoch": 3.9075530845212043, "grad_norm": 0.8227566480636597, "learning_rate": 2.6099516890619477e-06, "loss": 0.0659, "step": 132130 }, { "epoch": 3.9078488200153783, "grad_norm": 0.8711541891098022, "learning_rate": 2.6098249991430085e-06, "loss": 0.0499, "step": 132140 }, { "epoch": 3.908144555509552, "grad_norm": 0.7009445428848267, "learning_rate": 2.609698309224069e-06, "loss": 0.0863, "step": 132150 }, { "epoch": 3.908440291003726, "grad_norm": 0.7443072199821472, "learning_rate": 2.6095716193051297e-06, "loss": 0.0685, "step": 132160 }, { "epoch": 3.9087360264979, "grad_norm": 1.2882323265075684, "learning_rate": 2.60944492938619e-06, "loss": 0.0881, "step": 132170 }, { "epoch": 3.9090317619920745, "grad_norm": 0.888003945350647, "learning_rate": 2.6093182394672513e-06, "loss": 0.0781, "step": 132180 }, { "epoch": 3.9093274974862484, "grad_norm": 0.4583713710308075, "learning_rate": 2.6091915495483116e-06, "loss": 0.0583, "step": 132190 }, { "epoch": 3.9096232329804224, "grad_norm": 0.8472242951393127, "learning_rate": 2.6090648596293724e-06, "loss": 0.0686, "step": 132200 }, { "epoch": 3.9099189684745963, "grad_norm": 1.0269241333007812, "learning_rate": 2.6089381697104328e-06, "loss": 0.0677, "step": 132210 }, { "epoch": 3.9102147039687702, "grad_norm": 0.8544309139251709, "learning_rate": 2.6088114797914936e-06, "loss": 0.0903, "step": 132220 }, { "epoch": 3.910510439462944, "grad_norm": 1.599861979484558, "learning_rate": 2.608684789872554e-06, "loss": 0.0796, "step": 132230 }, { "epoch": 3.910806174957118, "grad_norm": 1.0076731443405151, "learning_rate": 2.6085580999536147e-06, "loss": 0.0603, "step": 132240 }, { "epoch": 3.9111019104512925, "grad_norm": 1.0727698802947998, "learning_rate": 2.608431410034675e-06, "loss": 0.0826, "step": 132250 }, { "epoch": 3.9113976459454665, "grad_norm": 0.6383532285690308, "learning_rate": 2.6083047201157363e-06, "loss": 0.0888, "step": 132260 }, { "epoch": 3.9116933814396404, "grad_norm": 0.6387092471122742, "learning_rate": 2.6081780301967967e-06, "loss": 0.0627, "step": 132270 }, { "epoch": 3.9119891169338143, "grad_norm": 0.8760960698127747, "learning_rate": 2.6080513402778575e-06, "loss": 0.0776, "step": 132280 }, { "epoch": 3.9122848524279883, "grad_norm": 0.5398558974266052, "learning_rate": 2.607924650358918e-06, "loss": 0.0596, "step": 132290 }, { "epoch": 3.9125805879221627, "grad_norm": 0.7129467129707336, "learning_rate": 2.6077979604399786e-06, "loss": 0.0766, "step": 132300 }, { "epoch": 3.912876323416336, "grad_norm": 0.6833489537239075, "learning_rate": 2.607671270521039e-06, "loss": 0.0817, "step": 132310 }, { "epoch": 3.9131720589105106, "grad_norm": 0.8878832459449768, "learning_rate": 2.6075445806020998e-06, "loss": 0.0746, "step": 132320 }, { "epoch": 3.9134677944046845, "grad_norm": 0.6777436137199402, "learning_rate": 2.60741789068316e-06, "loss": 0.0726, "step": 132330 }, { "epoch": 3.9137635298988585, "grad_norm": 0.6066758632659912, "learning_rate": 2.6072912007642213e-06, "loss": 0.0551, "step": 132340 }, { "epoch": 3.9140592653930324, "grad_norm": 0.6310498714447021, "learning_rate": 2.6071645108452817e-06, "loss": 0.0914, "step": 132350 }, { "epoch": 3.9143550008872063, "grad_norm": 0.6015142202377319, "learning_rate": 2.6070378209263425e-06, "loss": 0.0914, "step": 132360 }, { "epoch": 3.9146507363813807, "grad_norm": 1.1101174354553223, "learning_rate": 2.606911131007403e-06, "loss": 0.0791, "step": 132370 }, { "epoch": 3.9149464718755547, "grad_norm": 0.5512440800666809, "learning_rate": 2.6067844410884637e-06, "loss": 0.0649, "step": 132380 }, { "epoch": 3.9152422073697286, "grad_norm": 0.5882386565208435, "learning_rate": 2.606657751169524e-06, "loss": 0.0461, "step": 132390 }, { "epoch": 3.9155379428639026, "grad_norm": 0.9383631348609924, "learning_rate": 2.606531061250585e-06, "loss": 0.0733, "step": 132400 }, { "epoch": 3.9158336783580765, "grad_norm": 0.7680928707122803, "learning_rate": 2.606404371331645e-06, "loss": 0.0795, "step": 132410 }, { "epoch": 3.9161294138522504, "grad_norm": 0.9795317649841309, "learning_rate": 2.6062776814127064e-06, "loss": 0.068, "step": 132420 }, { "epoch": 3.9164251493464244, "grad_norm": 1.4688829183578491, "learning_rate": 2.6061509914937668e-06, "loss": 0.0977, "step": 132430 }, { "epoch": 3.9167208848405988, "grad_norm": 0.8275347948074341, "learning_rate": 2.6060243015748276e-06, "loss": 0.0646, "step": 132440 }, { "epoch": 3.9170166203347727, "grad_norm": 0.7139353156089783, "learning_rate": 2.605897611655888e-06, "loss": 0.0847, "step": 132450 }, { "epoch": 3.9173123558289467, "grad_norm": 0.8296581506729126, "learning_rate": 2.6057709217369487e-06, "loss": 0.0803, "step": 132460 }, { "epoch": 3.9176080913231206, "grad_norm": 0.8305462002754211, "learning_rate": 2.605644231818009e-06, "loss": 0.0751, "step": 132470 }, { "epoch": 3.9179038268172945, "grad_norm": 0.724696159362793, "learning_rate": 2.60551754189907e-06, "loss": 0.0567, "step": 132480 }, { "epoch": 3.9181995623114685, "grad_norm": 0.49605387449264526, "learning_rate": 2.6053908519801302e-06, "loss": 0.0524, "step": 132490 }, { "epoch": 3.9184952978056424, "grad_norm": 0.6217430830001831, "learning_rate": 2.6052641620611914e-06, "loss": 0.0733, "step": 132500 }, { "epoch": 3.918791033299817, "grad_norm": 1.189399242401123, "learning_rate": 2.605137472142252e-06, "loss": 0.0789, "step": 132510 }, { "epoch": 3.9190867687939908, "grad_norm": 1.049269199371338, "learning_rate": 2.605010782223312e-06, "loss": 0.0632, "step": 132520 }, { "epoch": 3.9193825042881647, "grad_norm": 0.8334183096885681, "learning_rate": 2.604884092304373e-06, "loss": 0.0669, "step": 132530 }, { "epoch": 3.9196782397823386, "grad_norm": 0.5188648104667664, "learning_rate": 2.6047574023854333e-06, "loss": 0.0581, "step": 132540 }, { "epoch": 3.9199739752765126, "grad_norm": 0.8661729693412781, "learning_rate": 2.604630712466494e-06, "loss": 0.0635, "step": 132550 }, { "epoch": 3.9202697107706865, "grad_norm": 1.2460126876831055, "learning_rate": 2.6045040225475545e-06, "loss": 0.0788, "step": 132560 }, { "epoch": 3.9205654462648605, "grad_norm": 1.0390501022338867, "learning_rate": 2.6043773326286153e-06, "loss": 0.0914, "step": 132570 }, { "epoch": 3.920861181759035, "grad_norm": 0.5488178133964539, "learning_rate": 2.6042506427096756e-06, "loss": 0.0716, "step": 132580 }, { "epoch": 3.921156917253209, "grad_norm": 0.766494870185852, "learning_rate": 2.604123952790737e-06, "loss": 0.0621, "step": 132590 }, { "epoch": 3.9214526527473827, "grad_norm": 0.8645080327987671, "learning_rate": 2.6039972628717972e-06, "loss": 0.0642, "step": 132600 }, { "epoch": 3.9217483882415567, "grad_norm": 1.1370580196380615, "learning_rate": 2.603870572952858e-06, "loss": 0.0786, "step": 132610 }, { "epoch": 3.9220441237357306, "grad_norm": 0.612232506275177, "learning_rate": 2.6037438830339184e-06, "loss": 0.0652, "step": 132620 }, { "epoch": 3.922339859229905, "grad_norm": 0.4867614209651947, "learning_rate": 2.603617193114979e-06, "loss": 0.0563, "step": 132630 }, { "epoch": 3.9226355947240785, "grad_norm": 1.1578572988510132, "learning_rate": 2.6034905031960395e-06, "loss": 0.0697, "step": 132640 }, { "epoch": 3.922931330218253, "grad_norm": 0.7070804834365845, "learning_rate": 2.6033638132771003e-06, "loss": 0.0714, "step": 132650 }, { "epoch": 3.923227065712427, "grad_norm": 1.0393807888031006, "learning_rate": 2.6032371233581607e-06, "loss": 0.0932, "step": 132660 }, { "epoch": 3.923522801206601, "grad_norm": 1.2433167695999146, "learning_rate": 2.603110433439222e-06, "loss": 0.0744, "step": 132670 }, { "epoch": 3.9238185367007747, "grad_norm": 1.119584560394287, "learning_rate": 2.6029837435202823e-06, "loss": 0.086, "step": 132680 }, { "epoch": 3.9241142721949487, "grad_norm": 0.9032192230224609, "learning_rate": 2.602857053601343e-06, "loss": 0.0527, "step": 132690 }, { "epoch": 3.924410007689123, "grad_norm": 1.500532865524292, "learning_rate": 2.6027303636824034e-06, "loss": 0.075, "step": 132700 }, { "epoch": 3.924705743183297, "grad_norm": 0.7310333847999573, "learning_rate": 2.602603673763464e-06, "loss": 0.0687, "step": 132710 }, { "epoch": 3.925001478677471, "grad_norm": 0.8937367796897888, "learning_rate": 2.6024769838445246e-06, "loss": 0.0797, "step": 132720 }, { "epoch": 3.925297214171645, "grad_norm": 1.203389286994934, "learning_rate": 2.6023502939255854e-06, "loss": 0.0675, "step": 132730 }, { "epoch": 3.925592949665819, "grad_norm": 1.011550784111023, "learning_rate": 2.6022236040066457e-06, "loss": 0.0577, "step": 132740 }, { "epoch": 3.925888685159993, "grad_norm": 1.100743293762207, "learning_rate": 2.602096914087707e-06, "loss": 0.0904, "step": 132750 }, { "epoch": 3.9261844206541667, "grad_norm": 0.8509632349014282, "learning_rate": 2.6019702241687673e-06, "loss": 0.076, "step": 132760 }, { "epoch": 3.926480156148341, "grad_norm": 0.988498330116272, "learning_rate": 2.601843534249828e-06, "loss": 0.0764, "step": 132770 }, { "epoch": 3.926775891642515, "grad_norm": 0.6787058711051941, "learning_rate": 2.6017168443308885e-06, "loss": 0.0839, "step": 132780 }, { "epoch": 3.927071627136689, "grad_norm": 0.9557105302810669, "learning_rate": 2.6015901544119493e-06, "loss": 0.0616, "step": 132790 }, { "epoch": 3.927367362630863, "grad_norm": 1.1642060279846191, "learning_rate": 2.6014634644930096e-06, "loss": 0.0703, "step": 132800 }, { "epoch": 3.927663098125037, "grad_norm": 0.9639930725097656, "learning_rate": 2.6013367745740704e-06, "loss": 0.0708, "step": 132810 }, { "epoch": 3.927958833619211, "grad_norm": 0.9604145288467407, "learning_rate": 2.6012100846551308e-06, "loss": 0.0637, "step": 132820 }, { "epoch": 3.9282545691133848, "grad_norm": 0.6541303396224976, "learning_rate": 2.601083394736192e-06, "loss": 0.0777, "step": 132830 }, { "epoch": 3.928550304607559, "grad_norm": 0.7080393433570862, "learning_rate": 2.6009567048172524e-06, "loss": 0.0751, "step": 132840 }, { "epoch": 3.928846040101733, "grad_norm": 0.6995594501495361, "learning_rate": 2.600830014898313e-06, "loss": 0.0785, "step": 132850 }, { "epoch": 3.929141775595907, "grad_norm": 0.7371845245361328, "learning_rate": 2.6007033249793735e-06, "loss": 0.0759, "step": 132860 }, { "epoch": 3.929437511090081, "grad_norm": 0.962963342666626, "learning_rate": 2.6005766350604343e-06, "loss": 0.0829, "step": 132870 }, { "epoch": 3.929733246584255, "grad_norm": 0.7318558096885681, "learning_rate": 2.6004499451414947e-06, "loss": 0.0684, "step": 132880 }, { "epoch": 3.9300289820784293, "grad_norm": 1.4293255805969238, "learning_rate": 2.6003232552225555e-06, "loss": 0.0747, "step": 132890 }, { "epoch": 3.930324717572603, "grad_norm": 0.7215109467506409, "learning_rate": 2.600196565303616e-06, "loss": 0.0704, "step": 132900 }, { "epoch": 3.930620453066777, "grad_norm": 0.6230242848396301, "learning_rate": 2.600069875384677e-06, "loss": 0.0779, "step": 132910 }, { "epoch": 3.930916188560951, "grad_norm": 0.6694878935813904, "learning_rate": 2.5999431854657374e-06, "loss": 0.0916, "step": 132920 }, { "epoch": 3.931211924055125, "grad_norm": 1.0302432775497437, "learning_rate": 2.5998164955467978e-06, "loss": 0.0782, "step": 132930 }, { "epoch": 3.931507659549299, "grad_norm": 0.7399921417236328, "learning_rate": 2.5996898056278586e-06, "loss": 0.0765, "step": 132940 }, { "epoch": 3.931803395043473, "grad_norm": 0.9456447958946228, "learning_rate": 2.599563115708919e-06, "loss": 0.0732, "step": 132950 }, { "epoch": 3.9320991305376474, "grad_norm": 0.916341245174408, "learning_rate": 2.5994364257899797e-06, "loss": 0.0685, "step": 132960 }, { "epoch": 3.932394866031821, "grad_norm": 0.7429360747337341, "learning_rate": 2.59930973587104e-06, "loss": 0.0897, "step": 132970 }, { "epoch": 3.9326906015259953, "grad_norm": 0.615571916103363, "learning_rate": 2.599183045952101e-06, "loss": 0.0634, "step": 132980 }, { "epoch": 3.932986337020169, "grad_norm": 0.6464516520500183, "learning_rate": 2.5990563560331617e-06, "loss": 0.0539, "step": 132990 }, { "epoch": 3.933282072514343, "grad_norm": 0.7854744791984558, "learning_rate": 2.5989296661142224e-06, "loss": 0.0681, "step": 133000 }, { "epoch": 3.933577808008517, "grad_norm": 1.152249813079834, "learning_rate": 2.598802976195283e-06, "loss": 0.0938, "step": 133010 }, { "epoch": 3.933873543502691, "grad_norm": 0.9009928703308105, "learning_rate": 2.5986762862763436e-06, "loss": 0.0774, "step": 133020 }, { "epoch": 3.9341692789968654, "grad_norm": 0.6060752272605896, "learning_rate": 2.598549596357404e-06, "loss": 0.0674, "step": 133030 }, { "epoch": 3.9344650144910394, "grad_norm": 0.9364457726478577, "learning_rate": 2.5984229064384648e-06, "loss": 0.0527, "step": 133040 }, { "epoch": 3.9347607499852133, "grad_norm": 0.7367448806762695, "learning_rate": 2.598296216519525e-06, "loss": 0.0856, "step": 133050 }, { "epoch": 3.9350564854793872, "grad_norm": 0.8607929944992065, "learning_rate": 2.598169526600586e-06, "loss": 0.0918, "step": 133060 }, { "epoch": 3.935352220973561, "grad_norm": 1.0320171117782593, "learning_rate": 2.5980428366816467e-06, "loss": 0.0714, "step": 133070 }, { "epoch": 3.935647956467735, "grad_norm": 0.7547299861907959, "learning_rate": 2.5979161467627075e-06, "loss": 0.069, "step": 133080 }, { "epoch": 3.935943691961909, "grad_norm": 0.9769920706748962, "learning_rate": 2.597789456843768e-06, "loss": 0.0579, "step": 133090 }, { "epoch": 3.9362394274560835, "grad_norm": 0.5733233094215393, "learning_rate": 2.5976627669248286e-06, "loss": 0.0643, "step": 133100 }, { "epoch": 3.9365351629502574, "grad_norm": 0.8579316735267639, "learning_rate": 2.597536077005889e-06, "loss": 0.0865, "step": 133110 }, { "epoch": 3.9368308984444313, "grad_norm": 0.6537173986434937, "learning_rate": 2.59740938708695e-06, "loss": 0.06, "step": 133120 }, { "epoch": 3.9371266339386053, "grad_norm": 0.979524552822113, "learning_rate": 2.59728269716801e-06, "loss": 0.0663, "step": 133130 }, { "epoch": 3.9374223694327792, "grad_norm": 0.8158250451087952, "learning_rate": 2.597156007249071e-06, "loss": 0.07, "step": 133140 }, { "epoch": 3.937718104926953, "grad_norm": 1.3849033117294312, "learning_rate": 2.5970293173301317e-06, "loss": 0.0679, "step": 133150 }, { "epoch": 3.938013840421127, "grad_norm": 0.5816667675971985, "learning_rate": 2.5969026274111925e-06, "loss": 0.071, "step": 133160 }, { "epoch": 3.9383095759153015, "grad_norm": 0.8177536129951477, "learning_rate": 2.596775937492253e-06, "loss": 0.0927, "step": 133170 }, { "epoch": 3.9386053114094755, "grad_norm": 0.8171014785766602, "learning_rate": 2.5966492475733137e-06, "loss": 0.0652, "step": 133180 }, { "epoch": 3.9389010469036494, "grad_norm": 0.655654788017273, "learning_rate": 2.596522557654374e-06, "loss": 0.0559, "step": 133190 }, { "epoch": 3.9391967823978233, "grad_norm": 0.6543400883674622, "learning_rate": 2.596395867735435e-06, "loss": 0.0669, "step": 133200 }, { "epoch": 3.9394925178919973, "grad_norm": 0.5758596658706665, "learning_rate": 2.5962691778164952e-06, "loss": 0.0738, "step": 133210 }, { "epoch": 3.9397882533861717, "grad_norm": 0.5473132729530334, "learning_rate": 2.596142487897556e-06, "loss": 0.0702, "step": 133220 }, { "epoch": 3.940083988880345, "grad_norm": 0.8245540261268616, "learning_rate": 2.596015797978617e-06, "loss": 0.0617, "step": 133230 }, { "epoch": 3.9403797243745196, "grad_norm": 0.3327164649963379, "learning_rate": 2.5958891080596776e-06, "loss": 0.0525, "step": 133240 }, { "epoch": 3.9406754598686935, "grad_norm": 1.0542140007019043, "learning_rate": 2.595762418140738e-06, "loss": 0.0718, "step": 133250 }, { "epoch": 3.9409711953628674, "grad_norm": 1.0506383180618286, "learning_rate": 2.5956357282217987e-06, "loss": 0.0895, "step": 133260 }, { "epoch": 3.9412669308570414, "grad_norm": 0.9430152773857117, "learning_rate": 2.595509038302859e-06, "loss": 0.0826, "step": 133270 }, { "epoch": 3.9415626663512153, "grad_norm": 2.046398639678955, "learning_rate": 2.59538234838392e-06, "loss": 0.0686, "step": 133280 }, { "epoch": 3.9418584018453897, "grad_norm": 1.03725004196167, "learning_rate": 2.5952556584649803e-06, "loss": 0.0628, "step": 133290 }, { "epoch": 3.9421541373395637, "grad_norm": 0.8170303106307983, "learning_rate": 2.595128968546041e-06, "loss": 0.0749, "step": 133300 }, { "epoch": 3.9424498728337376, "grad_norm": 0.9027332663536072, "learning_rate": 2.595002278627102e-06, "loss": 0.0849, "step": 133310 }, { "epoch": 3.9427456083279115, "grad_norm": 0.7037277221679688, "learning_rate": 2.5948755887081626e-06, "loss": 0.0775, "step": 133320 }, { "epoch": 3.9430413438220855, "grad_norm": 0.8358100056648254, "learning_rate": 2.594748898789223e-06, "loss": 0.0703, "step": 133330 }, { "epoch": 3.9433370793162594, "grad_norm": 0.8020159006118774, "learning_rate": 2.5946222088702834e-06, "loss": 0.0684, "step": 133340 }, { "epoch": 3.9436328148104334, "grad_norm": 0.9951508641242981, "learning_rate": 2.594495518951344e-06, "loss": 0.086, "step": 133350 }, { "epoch": 3.9439285503046078, "grad_norm": 0.8699804544448853, "learning_rate": 2.5943688290324045e-06, "loss": 0.0931, "step": 133360 }, { "epoch": 3.9442242857987817, "grad_norm": 0.33833175897598267, "learning_rate": 2.5942421391134653e-06, "loss": 0.0622, "step": 133370 }, { "epoch": 3.9445200212929556, "grad_norm": 0.7236493825912476, "learning_rate": 2.5941154491945257e-06, "loss": 0.0806, "step": 133380 }, { "epoch": 3.9448157567871296, "grad_norm": 0.6163172721862793, "learning_rate": 2.593988759275587e-06, "loss": 0.0575, "step": 133390 }, { "epoch": 3.9451114922813035, "grad_norm": 1.9528447389602661, "learning_rate": 2.5938620693566472e-06, "loss": 0.083, "step": 133400 }, { "epoch": 3.9454072277754775, "grad_norm": 1.1030983924865723, "learning_rate": 2.593735379437708e-06, "loss": 0.0888, "step": 133410 }, { "epoch": 3.9457029632696514, "grad_norm": 0.7913077473640442, "learning_rate": 2.5936086895187684e-06, "loss": 0.0827, "step": 133420 }, { "epoch": 3.945998698763826, "grad_norm": 0.6254116296768188, "learning_rate": 2.593481999599829e-06, "loss": 0.074, "step": 133430 }, { "epoch": 3.9462944342579998, "grad_norm": 0.5756328105926514, "learning_rate": 2.5933553096808896e-06, "loss": 0.0654, "step": 133440 }, { "epoch": 3.9465901697521737, "grad_norm": 0.9429375529289246, "learning_rate": 2.5932286197619503e-06, "loss": 0.0734, "step": 133450 }, { "epoch": 3.9468859052463476, "grad_norm": 0.9320926070213318, "learning_rate": 2.5931019298430107e-06, "loss": 0.0872, "step": 133460 }, { "epoch": 3.9471816407405216, "grad_norm": 0.837350606918335, "learning_rate": 2.592975239924072e-06, "loss": 0.0727, "step": 133470 }, { "epoch": 3.9474773762346955, "grad_norm": 0.5246627926826477, "learning_rate": 2.5928485500051323e-06, "loss": 0.07, "step": 133480 }, { "epoch": 3.9477731117288695, "grad_norm": 0.8037709593772888, "learning_rate": 2.592721860086193e-06, "loss": 0.0668, "step": 133490 }, { "epoch": 3.948068847223044, "grad_norm": 1.8931611776351929, "learning_rate": 2.5925951701672534e-06, "loss": 0.0786, "step": 133500 }, { "epoch": 3.948364582717218, "grad_norm": 0.7817291617393494, "learning_rate": 2.5924684802483142e-06, "loss": 0.0669, "step": 133510 }, { "epoch": 3.9486603182113917, "grad_norm": 0.8574672937393188, "learning_rate": 2.5923417903293746e-06, "loss": 0.0694, "step": 133520 }, { "epoch": 3.9489560537055657, "grad_norm": 0.813433825969696, "learning_rate": 2.5922151004104354e-06, "loss": 0.0703, "step": 133530 }, { "epoch": 3.9492517891997396, "grad_norm": 1.4484118223190308, "learning_rate": 2.5920884104914958e-06, "loss": 0.0893, "step": 133540 }, { "epoch": 3.949547524693914, "grad_norm": 0.5290111899375916, "learning_rate": 2.591961720572557e-06, "loss": 0.082, "step": 133550 }, { "epoch": 3.9498432601880875, "grad_norm": 0.7970077395439148, "learning_rate": 2.5918350306536173e-06, "loss": 0.0776, "step": 133560 }, { "epoch": 3.950138995682262, "grad_norm": 0.39972740411758423, "learning_rate": 2.591708340734678e-06, "loss": 0.0556, "step": 133570 }, { "epoch": 3.950434731176436, "grad_norm": 0.6422905325889587, "learning_rate": 2.5915816508157385e-06, "loss": 0.059, "step": 133580 }, { "epoch": 3.95073046667061, "grad_norm": 0.8092330694198608, "learning_rate": 2.5914549608967993e-06, "loss": 0.0557, "step": 133590 }, { "epoch": 3.9510262021647837, "grad_norm": 0.5825043320655823, "learning_rate": 2.5913282709778596e-06, "loss": 0.0796, "step": 133600 }, { "epoch": 3.9513219376589577, "grad_norm": 0.958892285823822, "learning_rate": 2.5912015810589204e-06, "loss": 0.0868, "step": 133610 }, { "epoch": 3.951617673153132, "grad_norm": 0.4804195463657379, "learning_rate": 2.591074891139981e-06, "loss": 0.0789, "step": 133620 }, { "epoch": 3.951913408647306, "grad_norm": 0.711854100227356, "learning_rate": 2.590948201221042e-06, "loss": 0.0757, "step": 133630 }, { "epoch": 3.95220914414148, "grad_norm": 0.9282992482185364, "learning_rate": 2.5908215113021024e-06, "loss": 0.0638, "step": 133640 }, { "epoch": 3.952504879635654, "grad_norm": 0.6887775659561157, "learning_rate": 2.590694821383163e-06, "loss": 0.081, "step": 133650 }, { "epoch": 3.952800615129828, "grad_norm": 1.4015274047851562, "learning_rate": 2.5905681314642235e-06, "loss": 0.0829, "step": 133660 }, { "epoch": 3.9530963506240018, "grad_norm": 1.0796444416046143, "learning_rate": 2.5904414415452843e-06, "loss": 0.074, "step": 133670 }, { "epoch": 3.9533920861181757, "grad_norm": 0.8810772895812988, "learning_rate": 2.5903147516263447e-06, "loss": 0.0685, "step": 133680 }, { "epoch": 3.95368782161235, "grad_norm": 0.7133532762527466, "learning_rate": 2.5901880617074055e-06, "loss": 0.0555, "step": 133690 }, { "epoch": 3.953983557106524, "grad_norm": 0.6662120223045349, "learning_rate": 2.590061371788466e-06, "loss": 0.0829, "step": 133700 }, { "epoch": 3.954279292600698, "grad_norm": 1.0581762790679932, "learning_rate": 2.589934681869527e-06, "loss": 0.0818, "step": 133710 }, { "epoch": 3.954575028094872, "grad_norm": 0.7456709146499634, "learning_rate": 2.5898079919505874e-06, "loss": 0.0822, "step": 133720 }, { "epoch": 3.954870763589046, "grad_norm": 0.9587528109550476, "learning_rate": 2.5896813020316482e-06, "loss": 0.071, "step": 133730 }, { "epoch": 3.95516649908322, "grad_norm": 1.1717866659164429, "learning_rate": 2.5895546121127086e-06, "loss": 0.0742, "step": 133740 }, { "epoch": 3.9554622345773938, "grad_norm": 0.9009163975715637, "learning_rate": 2.5894279221937694e-06, "loss": 0.0926, "step": 133750 }, { "epoch": 3.955757970071568, "grad_norm": 0.7896714210510254, "learning_rate": 2.5893012322748297e-06, "loss": 0.0925, "step": 133760 }, { "epoch": 3.956053705565742, "grad_norm": 0.7777814269065857, "learning_rate": 2.58917454235589e-06, "loss": 0.0884, "step": 133770 }, { "epoch": 3.956349441059916, "grad_norm": 0.6683864593505859, "learning_rate": 2.589047852436951e-06, "loss": 0.0671, "step": 133780 }, { "epoch": 3.95664517655409, "grad_norm": 0.5672176480293274, "learning_rate": 2.5889211625180117e-06, "loss": 0.0745, "step": 133790 }, { "epoch": 3.956940912048264, "grad_norm": 0.9110903143882751, "learning_rate": 2.5887944725990725e-06, "loss": 0.0824, "step": 133800 }, { "epoch": 3.9572366475424383, "grad_norm": 0.7773540616035461, "learning_rate": 2.588667782680133e-06, "loss": 0.0827, "step": 133810 }, { "epoch": 3.957532383036612, "grad_norm": 0.5104024410247803, "learning_rate": 2.5885410927611936e-06, "loss": 0.0711, "step": 133820 }, { "epoch": 3.957828118530786, "grad_norm": 0.7799907326698303, "learning_rate": 2.588414402842254e-06, "loss": 0.0791, "step": 133830 }, { "epoch": 3.95812385402496, "grad_norm": 0.9589950442314148, "learning_rate": 2.5882877129233148e-06, "loss": 0.0611, "step": 133840 }, { "epoch": 3.958419589519134, "grad_norm": 0.6975918412208557, "learning_rate": 2.588161023004375e-06, "loss": 0.0622, "step": 133850 }, { "epoch": 3.958715325013308, "grad_norm": 0.7520077228546143, "learning_rate": 2.588034333085436e-06, "loss": 0.0955, "step": 133860 }, { "epoch": 3.959011060507482, "grad_norm": 0.6287814974784851, "learning_rate": 2.5879076431664967e-06, "loss": 0.078, "step": 133870 }, { "epoch": 3.9593067960016564, "grad_norm": 0.6771199703216553, "learning_rate": 2.5877809532475575e-06, "loss": 0.0537, "step": 133880 }, { "epoch": 3.95960253149583, "grad_norm": 0.7280851602554321, "learning_rate": 2.587654263328618e-06, "loss": 0.0553, "step": 133890 }, { "epoch": 3.9598982669900042, "grad_norm": 0.9906440377235413, "learning_rate": 2.5875275734096787e-06, "loss": 0.0803, "step": 133900 }, { "epoch": 3.960194002484178, "grad_norm": 0.654592752456665, "learning_rate": 2.587400883490739e-06, "loss": 0.0955, "step": 133910 }, { "epoch": 3.960489737978352, "grad_norm": 0.6132461428642273, "learning_rate": 2.5872741935718e-06, "loss": 0.0826, "step": 133920 }, { "epoch": 3.960785473472526, "grad_norm": 0.6731410622596741, "learning_rate": 2.58714750365286e-06, "loss": 0.0652, "step": 133930 }, { "epoch": 3.9610812089667, "grad_norm": 0.7546716332435608, "learning_rate": 2.587020813733921e-06, "loss": 0.0503, "step": 133940 }, { "epoch": 3.9613769444608744, "grad_norm": 0.7899438738822937, "learning_rate": 2.5868941238149818e-06, "loss": 0.0712, "step": 133950 }, { "epoch": 3.9616726799550483, "grad_norm": 1.4641447067260742, "learning_rate": 2.5867674338960426e-06, "loss": 0.089, "step": 133960 }, { "epoch": 3.9619684154492223, "grad_norm": 0.8758440017700195, "learning_rate": 2.586640743977103e-06, "loss": 0.0789, "step": 133970 }, { "epoch": 3.9622641509433962, "grad_norm": 0.6534911394119263, "learning_rate": 2.5865140540581637e-06, "loss": 0.064, "step": 133980 }, { "epoch": 3.96255988643757, "grad_norm": 0.8129060864448547, "learning_rate": 2.586387364139224e-06, "loss": 0.0649, "step": 133990 }, { "epoch": 3.962855621931744, "grad_norm": 0.9574471712112427, "learning_rate": 2.586260674220285e-06, "loss": 0.0791, "step": 134000 }, { "epoch": 3.963151357425918, "grad_norm": 1.1441144943237305, "learning_rate": 2.5861339843013452e-06, "loss": 0.0818, "step": 134010 }, { "epoch": 3.9634470929200925, "grad_norm": 1.0616353750228882, "learning_rate": 2.586007294382406e-06, "loss": 0.0763, "step": 134020 }, { "epoch": 3.9637428284142664, "grad_norm": 1.0457072257995605, "learning_rate": 2.585880604463467e-06, "loss": 0.0786, "step": 134030 }, { "epoch": 3.9640385639084403, "grad_norm": 0.7167569398880005, "learning_rate": 2.5857539145445276e-06, "loss": 0.0695, "step": 134040 }, { "epoch": 3.9643342994026143, "grad_norm": 0.5781705975532532, "learning_rate": 2.585627224625588e-06, "loss": 0.0622, "step": 134050 }, { "epoch": 3.9646300348967882, "grad_norm": 0.7800662517547607, "learning_rate": 2.5855005347066488e-06, "loss": 0.0853, "step": 134060 }, { "epoch": 3.964925770390962, "grad_norm": 0.8843331933021545, "learning_rate": 2.585373844787709e-06, "loss": 0.0793, "step": 134070 }, { "epoch": 3.965221505885136, "grad_norm": 0.49221107363700867, "learning_rate": 2.58524715486877e-06, "loss": 0.0762, "step": 134080 }, { "epoch": 3.9655172413793105, "grad_norm": 1.085510492324829, "learning_rate": 2.5851204649498303e-06, "loss": 0.0724, "step": 134090 }, { "epoch": 3.9658129768734844, "grad_norm": 0.7467467188835144, "learning_rate": 2.584993775030891e-06, "loss": 0.0794, "step": 134100 }, { "epoch": 3.9661087123676584, "grad_norm": 0.775316596031189, "learning_rate": 2.584867085111952e-06, "loss": 0.0747, "step": 134110 }, { "epoch": 3.9664044478618323, "grad_norm": 0.5566672682762146, "learning_rate": 2.5847403951930127e-06, "loss": 0.0713, "step": 134120 }, { "epoch": 3.9667001833560063, "grad_norm": 0.8126400113105774, "learning_rate": 2.584613705274073e-06, "loss": 0.0704, "step": 134130 }, { "epoch": 3.9669959188501807, "grad_norm": 0.9449554681777954, "learning_rate": 2.584487015355134e-06, "loss": 0.0791, "step": 134140 }, { "epoch": 3.967291654344354, "grad_norm": 0.8756777048110962, "learning_rate": 2.584360325436194e-06, "loss": 0.0889, "step": 134150 }, { "epoch": 3.9675873898385285, "grad_norm": 1.0740559101104736, "learning_rate": 2.584233635517255e-06, "loss": 0.0748, "step": 134160 }, { "epoch": 3.9678831253327025, "grad_norm": 0.6786192655563354, "learning_rate": 2.5841069455983153e-06, "loss": 0.074, "step": 134170 }, { "epoch": 3.9681788608268764, "grad_norm": 0.7856521010398865, "learning_rate": 2.5839802556793757e-06, "loss": 0.0774, "step": 134180 }, { "epoch": 3.9684745963210504, "grad_norm": 0.6571362614631653, "learning_rate": 2.583853565760437e-06, "loss": 0.0545, "step": 134190 }, { "epoch": 3.9687703318152243, "grad_norm": 1.144679307937622, "learning_rate": 2.5837268758414973e-06, "loss": 0.0794, "step": 134200 }, { "epoch": 3.9690660673093987, "grad_norm": 0.4095466136932373, "learning_rate": 2.583600185922558e-06, "loss": 0.0679, "step": 134210 }, { "epoch": 3.9693618028035726, "grad_norm": 0.971310555934906, "learning_rate": 2.5834734960036184e-06, "loss": 0.0739, "step": 134220 }, { "epoch": 3.9696575382977466, "grad_norm": 0.8179274797439575, "learning_rate": 2.5833468060846792e-06, "loss": 0.0665, "step": 134230 }, { "epoch": 3.9699532737919205, "grad_norm": 0.4272295832633972, "learning_rate": 2.5832201161657396e-06, "loss": 0.0683, "step": 134240 }, { "epoch": 3.9702490092860945, "grad_norm": 0.670849621295929, "learning_rate": 2.5830934262468004e-06, "loss": 0.0719, "step": 134250 }, { "epoch": 3.9705447447802684, "grad_norm": 0.8585383296012878, "learning_rate": 2.5829667363278607e-06, "loss": 0.0704, "step": 134260 }, { "epoch": 3.9708404802744424, "grad_norm": 1.0730245113372803, "learning_rate": 2.582840046408922e-06, "loss": 0.0777, "step": 134270 }, { "epoch": 3.9711362157686168, "grad_norm": 0.9225028157234192, "learning_rate": 2.5827133564899823e-06, "loss": 0.0667, "step": 134280 }, { "epoch": 3.9714319512627907, "grad_norm": 0.8209409713745117, "learning_rate": 2.582586666571043e-06, "loss": 0.0556, "step": 134290 }, { "epoch": 3.9717276867569646, "grad_norm": 0.7762119174003601, "learning_rate": 2.5824599766521035e-06, "loss": 0.0772, "step": 134300 }, { "epoch": 3.9720234222511386, "grad_norm": 1.645534634590149, "learning_rate": 2.5823332867331643e-06, "loss": 0.0859, "step": 134310 }, { "epoch": 3.9723191577453125, "grad_norm": 1.016348958015442, "learning_rate": 2.5822065968142246e-06, "loss": 0.0734, "step": 134320 }, { "epoch": 3.9726148932394865, "grad_norm": 1.1926301717758179, "learning_rate": 2.5820799068952854e-06, "loss": 0.0764, "step": 134330 }, { "epoch": 3.9729106287336604, "grad_norm": 1.1610710620880127, "learning_rate": 2.581953216976346e-06, "loss": 0.0575, "step": 134340 }, { "epoch": 3.973206364227835, "grad_norm": 0.5208753943443298, "learning_rate": 2.581826527057407e-06, "loss": 0.0725, "step": 134350 }, { "epoch": 3.9735020997220087, "grad_norm": 0.857586681842804, "learning_rate": 2.5816998371384674e-06, "loss": 0.0824, "step": 134360 }, { "epoch": 3.9737978352161827, "grad_norm": 0.7357731461524963, "learning_rate": 2.581573147219528e-06, "loss": 0.0743, "step": 134370 }, { "epoch": 3.9740935707103566, "grad_norm": 1.2443859577178955, "learning_rate": 2.5814464573005885e-06, "loss": 0.0642, "step": 134380 }, { "epoch": 3.9743893062045306, "grad_norm": 0.8089929819107056, "learning_rate": 2.5813197673816493e-06, "loss": 0.0589, "step": 134390 }, { "epoch": 3.9746850416987045, "grad_norm": 0.6468443870544434, "learning_rate": 2.5811930774627097e-06, "loss": 0.0722, "step": 134400 }, { "epoch": 3.9749807771928785, "grad_norm": 0.7998695373535156, "learning_rate": 2.5810663875437705e-06, "loss": 0.0851, "step": 134410 }, { "epoch": 3.975276512687053, "grad_norm": 0.8507596850395203, "learning_rate": 2.580939697624831e-06, "loss": 0.0896, "step": 134420 }, { "epoch": 3.975572248181227, "grad_norm": 1.0275416374206543, "learning_rate": 2.580813007705892e-06, "loss": 0.0626, "step": 134430 }, { "epoch": 3.9758679836754007, "grad_norm": 0.8898620009422302, "learning_rate": 2.5806863177869524e-06, "loss": 0.0663, "step": 134440 }, { "epoch": 3.9761637191695747, "grad_norm": 0.6818543672561646, "learning_rate": 2.580559627868013e-06, "loss": 0.0775, "step": 134450 }, { "epoch": 3.9764594546637486, "grad_norm": 1.234215497970581, "learning_rate": 2.5804329379490736e-06, "loss": 0.0797, "step": 134460 }, { "epoch": 3.976755190157923, "grad_norm": 0.6664389371871948, "learning_rate": 2.5803062480301344e-06, "loss": 0.0872, "step": 134470 }, { "epoch": 3.9770509256520965, "grad_norm": 1.2076143026351929, "learning_rate": 2.5801795581111947e-06, "loss": 0.0656, "step": 134480 }, { "epoch": 3.977346661146271, "grad_norm": 0.681431770324707, "learning_rate": 2.5800528681922555e-06, "loss": 0.0795, "step": 134490 }, { "epoch": 3.977642396640445, "grad_norm": 0.8017081618309021, "learning_rate": 2.579926178273316e-06, "loss": 0.0731, "step": 134500 }, { "epoch": 3.9779381321346188, "grad_norm": 0.889283299446106, "learning_rate": 2.579799488354377e-06, "loss": 0.0771, "step": 134510 }, { "epoch": 3.9782338676287927, "grad_norm": 0.8758597373962402, "learning_rate": 2.5796727984354375e-06, "loss": 0.084, "step": 134520 }, { "epoch": 3.9785296031229667, "grad_norm": 0.4914364218711853, "learning_rate": 2.5795461085164982e-06, "loss": 0.0642, "step": 134530 }, { "epoch": 3.978825338617141, "grad_norm": 0.6584014296531677, "learning_rate": 2.5794194185975586e-06, "loss": 0.0692, "step": 134540 }, { "epoch": 3.979121074111315, "grad_norm": 0.9231376051902771, "learning_rate": 2.5792927286786194e-06, "loss": 0.0866, "step": 134550 }, { "epoch": 3.979416809605489, "grad_norm": 0.6725465655326843, "learning_rate": 2.5791660387596798e-06, "loss": 0.0893, "step": 134560 }, { "epoch": 3.979712545099663, "grad_norm": 0.8621688485145569, "learning_rate": 2.5790393488407406e-06, "loss": 0.0798, "step": 134570 }, { "epoch": 3.980008280593837, "grad_norm": 1.086169958114624, "learning_rate": 2.578912658921801e-06, "loss": 0.0702, "step": 134580 }, { "epoch": 3.9803040160880108, "grad_norm": 0.7640660405158997, "learning_rate": 2.5787859690028617e-06, "loss": 0.0619, "step": 134590 }, { "epoch": 3.9805997515821847, "grad_norm": 1.6410713195800781, "learning_rate": 2.5786592790839225e-06, "loss": 0.0811, "step": 134600 }, { "epoch": 3.980895487076359, "grad_norm": 1.056258201599121, "learning_rate": 2.578532589164983e-06, "loss": 0.072, "step": 134610 }, { "epoch": 3.981191222570533, "grad_norm": 0.45885246992111206, "learning_rate": 2.5784058992460437e-06, "loss": 0.0797, "step": 134620 }, { "epoch": 3.981486958064707, "grad_norm": 0.7409994006156921, "learning_rate": 2.578279209327104e-06, "loss": 0.0716, "step": 134630 }, { "epoch": 3.981782693558881, "grad_norm": 1.2190189361572266, "learning_rate": 2.578152519408165e-06, "loss": 0.0786, "step": 134640 }, { "epoch": 3.982078429053055, "grad_norm": 1.286515474319458, "learning_rate": 2.578025829489225e-06, "loss": 0.0838, "step": 134650 }, { "epoch": 3.982374164547229, "grad_norm": 0.9166869521141052, "learning_rate": 2.577899139570286e-06, "loss": 0.074, "step": 134660 }, { "epoch": 3.9826699000414028, "grad_norm": 1.0450551509857178, "learning_rate": 2.5777724496513468e-06, "loss": 0.0797, "step": 134670 }, { "epoch": 3.982965635535577, "grad_norm": 0.6803193092346191, "learning_rate": 2.5776457597324075e-06, "loss": 0.0679, "step": 134680 }, { "epoch": 3.983261371029751, "grad_norm": 0.452557235956192, "learning_rate": 2.577519069813468e-06, "loss": 0.0499, "step": 134690 }, { "epoch": 3.983557106523925, "grad_norm": 0.6892573237419128, "learning_rate": 2.5773923798945287e-06, "loss": 0.0837, "step": 134700 }, { "epoch": 3.983852842018099, "grad_norm": 0.8708665370941162, "learning_rate": 2.577265689975589e-06, "loss": 0.0792, "step": 134710 }, { "epoch": 3.984148577512273, "grad_norm": 0.557212769985199, "learning_rate": 2.57713900005665e-06, "loss": 0.0791, "step": 134720 }, { "epoch": 3.9844443130064473, "grad_norm": 0.46511244773864746, "learning_rate": 2.5770123101377102e-06, "loss": 0.066, "step": 134730 }, { "epoch": 3.984740048500621, "grad_norm": 0.977618932723999, "learning_rate": 2.576885620218771e-06, "loss": 0.0772, "step": 134740 }, { "epoch": 3.985035783994795, "grad_norm": 1.0540200471878052, "learning_rate": 2.576758930299832e-06, "loss": 0.0847, "step": 134750 }, { "epoch": 3.985331519488969, "grad_norm": 1.0073007345199585, "learning_rate": 2.5766322403808926e-06, "loss": 0.0799, "step": 134760 }, { "epoch": 3.985627254983143, "grad_norm": 0.6612672209739685, "learning_rate": 2.576505550461953e-06, "loss": 0.0838, "step": 134770 }, { "epoch": 3.985922990477317, "grad_norm": 1.1804803609848022, "learning_rate": 2.5763788605430138e-06, "loss": 0.0847, "step": 134780 }, { "epoch": 3.986218725971491, "grad_norm": 1.4265871047973633, "learning_rate": 2.576252170624074e-06, "loss": 0.0628, "step": 134790 }, { "epoch": 3.9865144614656653, "grad_norm": 0.7901949286460876, "learning_rate": 2.576125480705135e-06, "loss": 0.0776, "step": 134800 }, { "epoch": 3.986810196959839, "grad_norm": 0.4166664183139801, "learning_rate": 2.5759987907861953e-06, "loss": 0.0752, "step": 134810 }, { "epoch": 3.9871059324540132, "grad_norm": 1.275209665298462, "learning_rate": 2.575872100867256e-06, "loss": 0.0769, "step": 134820 }, { "epoch": 3.987401667948187, "grad_norm": 0.7801334857940674, "learning_rate": 2.575745410948317e-06, "loss": 0.0823, "step": 134830 }, { "epoch": 3.987697403442361, "grad_norm": 1.1055742502212524, "learning_rate": 2.5756187210293776e-06, "loss": 0.0612, "step": 134840 }, { "epoch": 3.987993138936535, "grad_norm": 0.8300718665122986, "learning_rate": 2.575492031110438e-06, "loss": 0.0799, "step": 134850 }, { "epoch": 3.988288874430709, "grad_norm": 0.6001459956169128, "learning_rate": 2.575365341191499e-06, "loss": 0.0705, "step": 134860 }, { "epoch": 3.9885846099248834, "grad_norm": 0.5163858532905579, "learning_rate": 2.575238651272559e-06, "loss": 0.0686, "step": 134870 }, { "epoch": 3.9888803454190573, "grad_norm": 1.0844076871871948, "learning_rate": 2.57511196135362e-06, "loss": 0.0751, "step": 134880 }, { "epoch": 3.9891760809132313, "grad_norm": 0.733363687992096, "learning_rate": 2.5749852714346803e-06, "loss": 0.068, "step": 134890 }, { "epoch": 3.9894718164074052, "grad_norm": 0.883638858795166, "learning_rate": 2.574858581515741e-06, "loss": 0.0838, "step": 134900 }, { "epoch": 3.989767551901579, "grad_norm": 0.7737745642662048, "learning_rate": 2.574731891596802e-06, "loss": 0.0865, "step": 134910 }, { "epoch": 3.990063287395753, "grad_norm": 0.8050497174263, "learning_rate": 2.5746052016778627e-06, "loss": 0.0939, "step": 134920 }, { "epoch": 3.990359022889927, "grad_norm": 0.645318865776062, "learning_rate": 2.574478511758923e-06, "loss": 0.0636, "step": 134930 }, { "epoch": 3.9906547583841014, "grad_norm": 0.38158440589904785, "learning_rate": 2.574351821839984e-06, "loss": 0.0642, "step": 134940 }, { "epoch": 3.9909504938782754, "grad_norm": 0.7081239223480225, "learning_rate": 2.574225131921044e-06, "loss": 0.0684, "step": 134950 }, { "epoch": 3.9912462293724493, "grad_norm": 0.8313815593719482, "learning_rate": 2.574098442002105e-06, "loss": 0.0776, "step": 134960 }, { "epoch": 3.9915419648666233, "grad_norm": 0.736700713634491, "learning_rate": 2.5739717520831654e-06, "loss": 0.0764, "step": 134970 }, { "epoch": 3.991837700360797, "grad_norm": 0.7230051159858704, "learning_rate": 2.573845062164226e-06, "loss": 0.064, "step": 134980 }, { "epoch": 3.992133435854971, "grad_norm": 1.109710693359375, "learning_rate": 2.573718372245287e-06, "loss": 0.0631, "step": 134990 }, { "epoch": 3.992429171349145, "grad_norm": 1.011497974395752, "learning_rate": 2.5735916823263473e-06, "loss": 0.0916, "step": 135000 }, { "epoch": 3.9927249068433195, "grad_norm": 0.9660828113555908, "learning_rate": 2.573464992407408e-06, "loss": 0.0907, "step": 135010 }, { "epoch": 3.9930206423374934, "grad_norm": 0.8977073431015015, "learning_rate": 2.5733383024884685e-06, "loss": 0.088, "step": 135020 }, { "epoch": 3.9933163778316674, "grad_norm": 0.6976026296615601, "learning_rate": 2.5732116125695293e-06, "loss": 0.0638, "step": 135030 }, { "epoch": 3.9936121133258413, "grad_norm": 0.5893504619598389, "learning_rate": 2.5730849226505896e-06, "loss": 0.0527, "step": 135040 }, { "epoch": 3.9939078488200153, "grad_norm": 1.268271565437317, "learning_rate": 2.5729582327316504e-06, "loss": 0.0668, "step": 135050 }, { "epoch": 3.9942035843141896, "grad_norm": 0.7922341823577881, "learning_rate": 2.5728315428127108e-06, "loss": 0.0729, "step": 135060 }, { "epoch": 3.994499319808363, "grad_norm": 0.6641149520874023, "learning_rate": 2.572704852893772e-06, "loss": 0.0791, "step": 135070 }, { "epoch": 3.9947950553025375, "grad_norm": 0.5817373394966125, "learning_rate": 2.5725781629748324e-06, "loss": 0.0904, "step": 135080 }, { "epoch": 3.9950907907967115, "grad_norm": 1.3046327829360962, "learning_rate": 2.572451473055893e-06, "loss": 0.0724, "step": 135090 }, { "epoch": 3.9953865262908854, "grad_norm": 0.6186131238937378, "learning_rate": 2.5723247831369535e-06, "loss": 0.0615, "step": 135100 }, { "epoch": 3.9956822617850594, "grad_norm": 0.7024182081222534, "learning_rate": 2.5721980932180143e-06, "loss": 0.0803, "step": 135110 }, { "epoch": 3.9959779972792333, "grad_norm": 1.3785263299942017, "learning_rate": 2.5720714032990747e-06, "loss": 0.0689, "step": 135120 }, { "epoch": 3.9962737327734077, "grad_norm": 0.8559420704841614, "learning_rate": 2.5719447133801355e-06, "loss": 0.0658, "step": 135130 }, { "epoch": 3.9965694682675816, "grad_norm": 1.0717322826385498, "learning_rate": 2.571818023461196e-06, "loss": 0.0748, "step": 135140 }, { "epoch": 3.9968652037617556, "grad_norm": 0.8844244480133057, "learning_rate": 2.571691333542257e-06, "loss": 0.0717, "step": 135150 }, { "epoch": 3.9971609392559295, "grad_norm": 1.0841666460037231, "learning_rate": 2.5715646436233174e-06, "loss": 0.0807, "step": 135160 }, { "epoch": 3.9974566747501035, "grad_norm": 0.5332518815994263, "learning_rate": 2.571437953704378e-06, "loss": 0.0854, "step": 135170 }, { "epoch": 3.9977524102442774, "grad_norm": 0.7728108167648315, "learning_rate": 2.5713112637854386e-06, "loss": 0.0622, "step": 135180 }, { "epoch": 3.9980481457384514, "grad_norm": 0.8597897887229919, "learning_rate": 2.5711845738664993e-06, "loss": 0.0524, "step": 135190 }, { "epoch": 3.9983438812326257, "grad_norm": 0.7868993878364563, "learning_rate": 2.5710578839475597e-06, "loss": 0.0849, "step": 135200 }, { "epoch": 3.9986396167267997, "grad_norm": 1.2443236112594604, "learning_rate": 2.5709311940286205e-06, "loss": 0.0743, "step": 135210 }, { "epoch": 3.9989353522209736, "grad_norm": 0.9757182002067566, "learning_rate": 2.570804504109681e-06, "loss": 0.078, "step": 135220 }, { "epoch": 3.9992310877151476, "grad_norm": 0.9133742451667786, "learning_rate": 2.570677814190742e-06, "loss": 0.0668, "step": 135230 }, { "epoch": 3.9995268232093215, "grad_norm": 0.895409345626831, "learning_rate": 2.5705511242718024e-06, "loss": 0.0703, "step": 135240 }, { "epoch": 3.9998225587034955, "grad_norm": 1.0248671770095825, "learning_rate": 2.5704244343528632e-06, "loss": 0.0891, "step": 135250 }, { "epoch": 4.0, "eval_accuracy": 0.6722893169644343, "eval_animal_abuse/accuracy": 0.9952091027048607, "eval_animal_abuse/f1": 0.77947932618683, "eval_animal_abuse/fpr": 0.0018342139804126108, "eval_animal_abuse/precision": 0.8236245954692557, "eval_animal_abuse/recall": 0.7398255813953488, "eval_animal_abuse/threshold": 0.5404388308525085, "eval_child_abuse/accuracy": 0.9965399075090661, "eval_child_abuse/f1": 0.6904761904761905, "eval_child_abuse/fpr": 0.0017898663454943848, "eval_child_abuse/precision": 0.6843657817109144, "eval_child_abuse/recall": 0.6966966966966966, "eval_child_abuse/threshold": 0.42727720737457275, "eval_controversial_topics,politics/accuracy": 0.9671956615763383, "eval_controversial_topics,politics/f1": 0.5171400587659157, "eval_controversial_topics,politics/fpr": 0.020352828116419513, "eval_controversial_topics,politics/precision": 0.47100802854594115, "eval_controversial_topics,politics/recall": 0.5732899022801303, "eval_controversial_topics,politics/threshold": 0.3250918686389923, "eval_discrimination,stereotype,injustice/accuracy": 0.9536214525734438, "eval_discrimination,stereotype,injustice/f1": 0.7204732303990375, "eval_discrimination,stereotype,injustice/fpr": 0.02893338634474278, "eval_discrimination,stereotype,injustice/precision": 0.6917597227570274, "eval_discrimination,stereotype,injustice/recall": 0.751673640167364, "eval_discrimination,stereotype,injustice/threshold": 0.4163219630718231, "eval_drug_abuse,weapons,banned_substance/accuracy": 0.9725188807931596, "eval_drug_abuse,weapons,banned_substance/f1": 0.7690156599552572, "eval_drug_abuse,weapons,banned_substance/fpr": 0.017910026794528244, "eval_drug_abuse,weapons,banned_substance/precision": 0.7302177376526819, "eval_drug_abuse,weapons,banned_substance/recall": 0.8121677495569994, "eval_drug_abuse,weapons,banned_substance/threshold": 0.5250034928321838, "eval_financial_crime,property_crime,theft/accuracy": 0.9606248128555744, "eval_financial_crime,property_crime,theft/f1": 0.8050728814955118, "eval_financial_crime,property_crime,theft/fpr": 0.025873984114405717, "eval_financial_crime,property_crime,theft/precision": 0.7768595041322314, "eval_financial_crime,property_crime,theft/recall": 0.8354127499572722, "eval_financial_crime,property_crime,theft/threshold": 0.4248891770839691, "eval_flagged/accuracy": 0.8571214692085039, "eval_flagged/aucpr": 0.9058677934084914, "eval_flagged/f1": 0.8729306289112778, "eval_flagged/fpr": 0.17402392829013924, "eval_flagged/precision": 0.864097006619413, "eval_flagged/recall": 0.8819467280499835, "eval_hate_speech,offensive_language/accuracy": 0.950543966463719, "eval_hate_speech,offensive_language/f1": 0.7034413965087282, "eval_hate_speech,offensive_language/fpr": 0.020372738900054775, "eval_hate_speech,offensive_language/precision": 0.7597500538677009, "eval_hate_speech,offensive_language/recall": 0.6549034175334324, "eval_hate_speech,offensive_language/threshold": 0.5034179091453552, "eval_loss": 0.08106982707977295, "eval_macro_f1": 0.6780517022777939, "eval_macro_precision": 0.6663561727830009, "eval_macro_recall": 0.7004210576160055, "eval_micro_f1": 0.751947984032632, "eval_micro_precision": 0.7374224319195857, "eval_micro_recall": 0.7670572754592172, "eval_misinformation_regarding_ethics,laws_and_safety/accuracy": 0.9804371693781815, "eval_misinformation_regarding_ethics,laws_and_safety/f1": 0.2631578947368421, "eval_misinformation_regarding_ethics,laws_and_safety/fpr": 0.011030092787498086, "eval_misinformation_regarding_ethics,laws_and_safety/precision": 0.24277456647398843, "eval_misinformation_regarding_ethics,laws_and_safety/recall": 0.2872777017783858, "eval_misinformation_regarding_ethics,laws_and_safety/threshold": 0.13477592170238495, "eval_non_violent_unethical_behavior/accuracy": 0.8789133978773663, "eval_non_violent_unethical_behavior/f1": 0.7003416903379852, "eval_non_violent_unethical_behavior/fpr": 0.07970021591097808, "eval_non_violent_unethical_behavior/precision": 0.6890238963142973, "eval_non_violent_unethical_behavior/recall": 0.7120375020927507, "eval_non_violent_unethical_behavior/threshold": 0.3937346339225769, "eval_privacy_violation/accuracy": 0.9813687327411252, "eval_privacy_violation/f1": 0.8113843044796227, "eval_privacy_violation/fpr": 0.00985161335479805, "eval_privacy_violation/precision": 0.8105652759084792, "eval_privacy_violation/recall": 0.8122049898853675, "eval_privacy_violation/threshold": 0.5931380391120911, "eval_runtime": 49.5456, "eval_samples_per_second": 1213.307, "eval_self_harm/accuracy": 0.9969058788302225, "eval_self_harm/f1": 0.752, "eval_self_harm/fpr": 0.0009714591987136523, "eval_self_harm/precision": 0.8294117647058824, "eval_self_harm/recall": 0.6878048780487804, "eval_self_harm/threshold": 0.5784481763839722, "eval_sexually_explicit,adult_content/accuracy": 0.9833982100675384, "eval_sexually_explicit,adult_content/f1": 0.6819630337794774, "eval_sexually_explicit,adult_content/fpr": 0.010585167129732198, "eval_sexually_explicit,adult_content/precision": 0.6327616794795978, "eval_sexually_explicit,adult_content/recall": 0.7394609536973048, "eval_sexually_explicit,adult_content/threshold": 0.31153160333633423, "eval_steps_per_second": 18.972, "eval_terrorism,organized_crime/accuracy": 0.9871577336394184, "eval_terrorism,organized_crime/f1": 0.4438040345821326, "eval_terrorism,organized_crime/fpr": 0.010044773866818691, "eval_terrorism,organized_crime/precision": 0.3395810363836825, "eval_terrorism,organized_crime/recall": 0.6403326403326404, "eval_terrorism,organized_crime/threshold": 0.16885694861412048, "eval_violence,aiding_and_abetting,incitement/accuracy": 0.9221312838939348, "eval_violence,aiding_and_abetting,incitement/f1": 0.8549741301855811, "eval_violence,aiding_and_abetting,incitement/fpr": 0.05636643851140008, "eval_violence,aiding_and_abetting,incitement/precision": 0.8472827755603316, "eval_violence,aiding_and_abetting,incitement/recall": 0.8628064032016008, "eval_violence,aiding_and_abetting,incitement/threshold": 0.45132648944854736, "step": 135256 }, { "epoch": 4.000118294197669, "grad_norm": 0.7817575931549072, "learning_rate": 2.5702977444339236e-06, "loss": 0.056, "step": 135260 }, { "epoch": 4.000414029691844, "grad_norm": 0.8507325053215027, "learning_rate": 2.5701710545149844e-06, "loss": 0.0816, "step": 135270 }, { "epoch": 4.000709765186017, "grad_norm": 0.7268235683441162, "learning_rate": 2.5700443645960448e-06, "loss": 0.0719, "step": 135280 }, { "epoch": 4.001005500680192, "grad_norm": 0.6310796737670898, "learning_rate": 2.5699176746771055e-06, "loss": 0.0611, "step": 135290 }, { "epoch": 4.001301236174366, "grad_norm": 0.8858194351196289, "learning_rate": 2.569790984758166e-06, "loss": 0.0696, "step": 135300 }, { "epoch": 4.00159697166854, "grad_norm": 1.114098072052002, "learning_rate": 2.569664294839227e-06, "loss": 0.0634, "step": 135310 }, { "epoch": 4.001892707162714, "grad_norm": 0.7424736618995667, "learning_rate": 2.5695376049202875e-06, "loss": 0.0731, "step": 135320 }, { "epoch": 4.0021884426568874, "grad_norm": 0.990957498550415, "learning_rate": 2.5694109150013483e-06, "loss": 0.0766, "step": 135330 }, { "epoch": 4.002484178151062, "grad_norm": 1.0015745162963867, "learning_rate": 2.5692842250824086e-06, "loss": 0.073, "step": 135340 }, { "epoch": 4.002779913645235, "grad_norm": 0.564423143863678, "learning_rate": 2.5691575351634694e-06, "loss": 0.0778, "step": 135350 }, { "epoch": 4.00307564913941, "grad_norm": 0.5722174048423767, "learning_rate": 2.56903084524453e-06, "loss": 0.0624, "step": 135360 }, { "epoch": 4.003371384633584, "grad_norm": 0.7512434124946594, "learning_rate": 2.5689041553255906e-06, "loss": 0.0588, "step": 135370 }, { "epoch": 4.003667120127758, "grad_norm": 1.6192570924758911, "learning_rate": 2.568777465406651e-06, "loss": 0.0862, "step": 135380 }, { "epoch": 4.003962855621932, "grad_norm": 0.6017919778823853, "learning_rate": 2.568650775487712e-06, "loss": 0.0676, "step": 135390 }, { "epoch": 4.0042585911161055, "grad_norm": 0.6208542585372925, "learning_rate": 2.5685240855687725e-06, "loss": 0.0538, "step": 135400 }, { "epoch": 4.00455432661028, "grad_norm": 0.6195865869522095, "learning_rate": 2.568397395649833e-06, "loss": 0.0757, "step": 135410 }, { "epoch": 4.004850062104453, "grad_norm": 1.0000282526016235, "learning_rate": 2.5682707057308937e-06, "loss": 0.0958, "step": 135420 }, { "epoch": 4.005145797598628, "grad_norm": 0.6736742258071899, "learning_rate": 2.568144015811954e-06, "loss": 0.062, "step": 135430 }, { "epoch": 4.005441533092802, "grad_norm": 1.0310838222503662, "learning_rate": 2.568017325893015e-06, "loss": 0.0705, "step": 135440 }, { "epoch": 4.005737268586976, "grad_norm": 0.5420587062835693, "learning_rate": 2.567890635974075e-06, "loss": 0.056, "step": 135450 }, { "epoch": 4.00603300408115, "grad_norm": 0.7132548093795776, "learning_rate": 2.567763946055136e-06, "loss": 0.0641, "step": 135460 }, { "epoch": 4.0063287395753235, "grad_norm": 0.8634176850318909, "learning_rate": 2.567637256136197e-06, "loss": 0.0793, "step": 135470 }, { "epoch": 4.006624475069498, "grad_norm": 0.7353851795196533, "learning_rate": 2.5675105662172576e-06, "loss": 0.0725, "step": 135480 }, { "epoch": 4.006920210563671, "grad_norm": 1.1859521865844727, "learning_rate": 2.567383876298318e-06, "loss": 0.0636, "step": 135490 }, { "epoch": 4.007215946057846, "grad_norm": 0.5539875030517578, "learning_rate": 2.5672571863793787e-06, "loss": 0.066, "step": 135500 }, { "epoch": 4.00751168155202, "grad_norm": 1.190369725227356, "learning_rate": 2.567130496460439e-06, "loss": 0.0628, "step": 135510 }, { "epoch": 4.007807417046194, "grad_norm": 0.7691286206245422, "learning_rate": 2.5670038065415e-06, "loss": 0.069, "step": 135520 }, { "epoch": 4.008103152540368, "grad_norm": 1.1008251905441284, "learning_rate": 2.5668771166225603e-06, "loss": 0.0689, "step": 135530 }, { "epoch": 4.008398888034542, "grad_norm": 1.145686149597168, "learning_rate": 2.566750426703621e-06, "loss": 0.0798, "step": 135540 }, { "epoch": 4.008694623528716, "grad_norm": 0.7599855661392212, "learning_rate": 2.566623736784682e-06, "loss": 0.0609, "step": 135550 }, { "epoch": 4.00899035902289, "grad_norm": 1.0136057138442993, "learning_rate": 2.5664970468657426e-06, "loss": 0.0578, "step": 135560 }, { "epoch": 4.009286094517064, "grad_norm": 2.3075437545776367, "learning_rate": 2.566370356946803e-06, "loss": 0.0675, "step": 135570 }, { "epoch": 4.009581830011238, "grad_norm": 0.8317354321479797, "learning_rate": 2.5662436670278638e-06, "loss": 0.0779, "step": 135580 }, { "epoch": 4.009877565505412, "grad_norm": 0.511548638343811, "learning_rate": 2.566116977108924e-06, "loss": 0.069, "step": 135590 }, { "epoch": 4.010173300999586, "grad_norm": 0.9261022806167603, "learning_rate": 2.565990287189985e-06, "loss": 0.0564, "step": 135600 }, { "epoch": 4.01046903649376, "grad_norm": 1.027998685836792, "learning_rate": 2.5658635972710453e-06, "loss": 0.0751, "step": 135610 }, { "epoch": 4.010764771987934, "grad_norm": 0.6480430364608765, "learning_rate": 2.565736907352106e-06, "loss": 0.059, "step": 135620 }, { "epoch": 4.011060507482108, "grad_norm": 0.8363066911697388, "learning_rate": 2.565610217433167e-06, "loss": 0.0794, "step": 135630 }, { "epoch": 4.011356242976282, "grad_norm": 0.9970353245735168, "learning_rate": 2.5654835275142277e-06, "loss": 0.0812, "step": 135640 }, { "epoch": 4.011651978470456, "grad_norm": 0.9584670662879944, "learning_rate": 2.565356837595288e-06, "loss": 0.0549, "step": 135650 }, { "epoch": 4.01194771396463, "grad_norm": 0.8551009297370911, "learning_rate": 2.565230147676349e-06, "loss": 0.0713, "step": 135660 }, { "epoch": 4.012243449458804, "grad_norm": 0.618326723575592, "learning_rate": 2.565103457757409e-06, "loss": 0.0627, "step": 135670 }, { "epoch": 4.012539184952978, "grad_norm": 1.078111171722412, "learning_rate": 2.56497676783847e-06, "loss": 0.0719, "step": 135680 }, { "epoch": 4.012834920447152, "grad_norm": 0.6708885431289673, "learning_rate": 2.5648500779195303e-06, "loss": 0.0737, "step": 135690 }, { "epoch": 4.0131306559413265, "grad_norm": 1.3366132974624634, "learning_rate": 2.564723388000591e-06, "loss": 0.068, "step": 135700 }, { "epoch": 4.0134263914355, "grad_norm": 1.022576928138733, "learning_rate": 2.564596698081652e-06, "loss": 0.0604, "step": 135710 }, { "epoch": 4.013722126929674, "grad_norm": 0.6793902516365051, "learning_rate": 2.5644700081627127e-06, "loss": 0.0718, "step": 135720 }, { "epoch": 4.014017862423848, "grad_norm": 0.8274125456809998, "learning_rate": 2.564343318243773e-06, "loss": 0.0811, "step": 135730 }, { "epoch": 4.014313597918022, "grad_norm": 0.6428841948509216, "learning_rate": 2.564216628324834e-06, "loss": 0.0663, "step": 135740 }, { "epoch": 4.014609333412196, "grad_norm": 0.7112045288085938, "learning_rate": 2.5640899384058942e-06, "loss": 0.0488, "step": 135750 }, { "epoch": 4.01490506890637, "grad_norm": 0.7693468332290649, "learning_rate": 2.563963248486955e-06, "loss": 0.0637, "step": 135760 }, { "epoch": 4.0152008044005445, "grad_norm": 1.4423880577087402, "learning_rate": 2.5638365585680154e-06, "loss": 0.0898, "step": 135770 }, { "epoch": 4.015496539894718, "grad_norm": 0.9031810760498047, "learning_rate": 2.563709868649076e-06, "loss": 0.0758, "step": 135780 }, { "epoch": 4.015792275388892, "grad_norm": 0.6918407678604126, "learning_rate": 2.563583178730137e-06, "loss": 0.0611, "step": 135790 }, { "epoch": 4.016088010883066, "grad_norm": 0.7004978060722351, "learning_rate": 2.5634564888111978e-06, "loss": 0.0674, "step": 135800 }, { "epoch": 4.01638374637724, "grad_norm": 0.7065534591674805, "learning_rate": 2.563329798892258e-06, "loss": 0.0509, "step": 135810 }, { "epoch": 4.016679481871414, "grad_norm": 1.1468793153762817, "learning_rate": 2.563203108973319e-06, "loss": 0.0723, "step": 135820 }, { "epoch": 4.016975217365588, "grad_norm": 0.7447846531867981, "learning_rate": 2.5630764190543793e-06, "loss": 0.0821, "step": 135830 }, { "epoch": 4.0172709528597625, "grad_norm": 0.8522635698318481, "learning_rate": 2.5629497291354396e-06, "loss": 0.0725, "step": 135840 }, { "epoch": 4.017566688353936, "grad_norm": 0.5940942764282227, "learning_rate": 2.5628230392165004e-06, "loss": 0.062, "step": 135850 }, { "epoch": 4.01786242384811, "grad_norm": 1.0792112350463867, "learning_rate": 2.562696349297561e-06, "loss": 0.0658, "step": 135860 }, { "epoch": 4.018158159342284, "grad_norm": 0.8711427450180054, "learning_rate": 2.562569659378622e-06, "loss": 0.075, "step": 135870 }, { "epoch": 4.018453894836458, "grad_norm": 1.171570897102356, "learning_rate": 2.5624429694596824e-06, "loss": 0.0797, "step": 135880 }, { "epoch": 4.018749630330633, "grad_norm": 0.7505319118499756, "learning_rate": 2.562316279540743e-06, "loss": 0.0764, "step": 135890 }, { "epoch": 4.019045365824806, "grad_norm": 0.902566134929657, "learning_rate": 2.5621895896218035e-06, "loss": 0.0733, "step": 135900 }, { "epoch": 4.019341101318981, "grad_norm": 1.0722622871398926, "learning_rate": 2.5620628997028643e-06, "loss": 0.0709, "step": 135910 }, { "epoch": 4.019636836813154, "grad_norm": 1.6444740295410156, "learning_rate": 2.5619362097839247e-06, "loss": 0.0759, "step": 135920 }, { "epoch": 4.0199325723073285, "grad_norm": 0.7215778231620789, "learning_rate": 2.5618095198649855e-06, "loss": 0.0638, "step": 135930 }, { "epoch": 4.020228307801502, "grad_norm": 0.799961268901825, "learning_rate": 2.561682829946046e-06, "loss": 0.068, "step": 135940 }, { "epoch": 4.020524043295676, "grad_norm": 0.9155943393707275, "learning_rate": 2.561556140027107e-06, "loss": 0.0612, "step": 135950 }, { "epoch": 4.020819778789851, "grad_norm": 0.9121465086936951, "learning_rate": 2.5614294501081674e-06, "loss": 0.06, "step": 135960 }, { "epoch": 4.021115514284024, "grad_norm": 0.9877799153327942, "learning_rate": 2.5613027601892282e-06, "loss": 0.0678, "step": 135970 }, { "epoch": 4.021411249778199, "grad_norm": 0.6254696249961853, "learning_rate": 2.5611760702702886e-06, "loss": 0.0733, "step": 135980 }, { "epoch": 4.021706985272372, "grad_norm": 0.9265708327293396, "learning_rate": 2.5610493803513494e-06, "loss": 0.0626, "step": 135990 }, { "epoch": 4.0220027207665465, "grad_norm": 0.9872322678565979, "learning_rate": 2.5609226904324097e-06, "loss": 0.0676, "step": 136000 }, { "epoch": 4.02229845626072, "grad_norm": 0.6829956769943237, "learning_rate": 2.5607960005134705e-06, "loss": 0.0588, "step": 136010 }, { "epoch": 4.022594191754894, "grad_norm": 0.7765684723854065, "learning_rate": 2.560669310594531e-06, "loss": 0.0679, "step": 136020 }, { "epoch": 4.022889927249069, "grad_norm": 0.7309555411338806, "learning_rate": 2.560542620675592e-06, "loss": 0.0661, "step": 136030 }, { "epoch": 4.023185662743242, "grad_norm": 0.9656543135643005, "learning_rate": 2.5604159307566525e-06, "loss": 0.0516, "step": 136040 }, { "epoch": 4.023481398237417, "grad_norm": 0.4550529718399048, "learning_rate": 2.5602892408377133e-06, "loss": 0.0684, "step": 136050 }, { "epoch": 4.02377713373159, "grad_norm": 0.7651550769805908, "learning_rate": 2.5601625509187736e-06, "loss": 0.0687, "step": 136060 }, { "epoch": 4.024072869225765, "grad_norm": 0.9705538153648376, "learning_rate": 2.5600358609998344e-06, "loss": 0.0703, "step": 136070 }, { "epoch": 4.024368604719938, "grad_norm": 0.6420986652374268, "learning_rate": 2.5599091710808948e-06, "loss": 0.0693, "step": 136080 }, { "epoch": 4.0246643402141125, "grad_norm": 0.7835440635681152, "learning_rate": 2.5597824811619556e-06, "loss": 0.0555, "step": 136090 }, { "epoch": 4.024960075708287, "grad_norm": 1.0979876518249512, "learning_rate": 2.559655791243016e-06, "loss": 0.0569, "step": 136100 }, { "epoch": 4.02525581120246, "grad_norm": 1.4136494398117065, "learning_rate": 2.559529101324077e-06, "loss": 0.0665, "step": 136110 }, { "epoch": 4.025551546696635, "grad_norm": 0.8471550345420837, "learning_rate": 2.5594024114051375e-06, "loss": 0.0743, "step": 136120 }, { "epoch": 4.025847282190808, "grad_norm": 0.8313940167427063, "learning_rate": 2.5592757214861983e-06, "loss": 0.0654, "step": 136130 }, { "epoch": 4.026143017684983, "grad_norm": 0.8874711990356445, "learning_rate": 2.5591490315672587e-06, "loss": 0.0813, "step": 136140 }, { "epoch": 4.026438753179157, "grad_norm": 1.1498565673828125, "learning_rate": 2.5590223416483195e-06, "loss": 0.0579, "step": 136150 }, { "epoch": 4.0267344886733305, "grad_norm": 1.0626838207244873, "learning_rate": 2.55889565172938e-06, "loss": 0.0623, "step": 136160 }, { "epoch": 4.027030224167505, "grad_norm": 0.856101930141449, "learning_rate": 2.5587689618104406e-06, "loss": 0.0606, "step": 136170 }, { "epoch": 4.027325959661678, "grad_norm": 0.9474104642868042, "learning_rate": 2.558642271891501e-06, "loss": 0.0742, "step": 136180 }, { "epoch": 4.027621695155853, "grad_norm": 1.7992268800735474, "learning_rate": 2.558515581972562e-06, "loss": 0.0747, "step": 136190 }, { "epoch": 4.027917430650026, "grad_norm": 0.4862058162689209, "learning_rate": 2.5583888920536226e-06, "loss": 0.0544, "step": 136200 }, { "epoch": 4.028213166144201, "grad_norm": 0.6987475156784058, "learning_rate": 2.5582622021346834e-06, "loss": 0.0598, "step": 136210 }, { "epoch": 4.028508901638375, "grad_norm": 0.7106077671051025, "learning_rate": 2.5581355122157437e-06, "loss": 0.086, "step": 136220 }, { "epoch": 4.0288046371325485, "grad_norm": 0.5574755072593689, "learning_rate": 2.5580088222968045e-06, "loss": 0.0672, "step": 136230 }, { "epoch": 4.029100372626723, "grad_norm": 0.8735749125480652, "learning_rate": 2.557882132377865e-06, "loss": 0.0541, "step": 136240 }, { "epoch": 4.029396108120896, "grad_norm": 0.5283002853393555, "learning_rate": 2.5577554424589252e-06, "loss": 0.0516, "step": 136250 }, { "epoch": 4.029691843615071, "grad_norm": 0.5754572749137878, "learning_rate": 2.557628752539986e-06, "loss": 0.054, "step": 136260 }, { "epoch": 4.029987579109244, "grad_norm": 1.1364350318908691, "learning_rate": 2.557502062621047e-06, "loss": 0.0844, "step": 136270 }, { "epoch": 4.030283314603419, "grad_norm": 1.3275609016418457, "learning_rate": 2.5573753727021076e-06, "loss": 0.0809, "step": 136280 }, { "epoch": 4.030579050097593, "grad_norm": 0.8237524628639221, "learning_rate": 2.557248682783168e-06, "loss": 0.0519, "step": 136290 }, { "epoch": 4.030874785591767, "grad_norm": 1.3621845245361328, "learning_rate": 2.5571219928642288e-06, "loss": 0.065, "step": 136300 }, { "epoch": 4.031170521085941, "grad_norm": 0.927252471446991, "learning_rate": 2.556995302945289e-06, "loss": 0.0689, "step": 136310 }, { "epoch": 4.0314662565801145, "grad_norm": 1.4693522453308105, "learning_rate": 2.55686861302635e-06, "loss": 0.0823, "step": 136320 }, { "epoch": 4.031761992074289, "grad_norm": 1.146956205368042, "learning_rate": 2.5567419231074103e-06, "loss": 0.0665, "step": 136330 }, { "epoch": 4.032057727568462, "grad_norm": 0.5000660419464111, "learning_rate": 2.556615233188471e-06, "loss": 0.0673, "step": 136340 }, { "epoch": 4.032353463062637, "grad_norm": 0.8822066187858582, "learning_rate": 2.556488543269532e-06, "loss": 0.0673, "step": 136350 }, { "epoch": 4.032649198556811, "grad_norm": 0.7849896550178528, "learning_rate": 2.5563618533505927e-06, "loss": 0.0538, "step": 136360 }, { "epoch": 4.032944934050985, "grad_norm": 0.6653276681900024, "learning_rate": 2.556235163431653e-06, "loss": 0.082, "step": 136370 }, { "epoch": 4.033240669545159, "grad_norm": 1.692616581916809, "learning_rate": 2.556108473512714e-06, "loss": 0.0644, "step": 136380 }, { "epoch": 4.0335364050393325, "grad_norm": 0.6653929948806763, "learning_rate": 2.555981783593774e-06, "loss": 0.0621, "step": 136390 }, { "epoch": 4.033832140533507, "grad_norm": 0.75135737657547, "learning_rate": 2.555855093674835e-06, "loss": 0.0604, "step": 136400 }, { "epoch": 4.03412787602768, "grad_norm": 0.5174446702003479, "learning_rate": 2.5557284037558953e-06, "loss": 0.0648, "step": 136410 }, { "epoch": 4.034423611521855, "grad_norm": 0.8479408025741577, "learning_rate": 2.555601713836956e-06, "loss": 0.077, "step": 136420 }, { "epoch": 4.034719347016029, "grad_norm": 0.8871355056762695, "learning_rate": 2.555475023918017e-06, "loss": 0.0711, "step": 136430 }, { "epoch": 4.035015082510203, "grad_norm": 0.5602805614471436, "learning_rate": 2.5553483339990777e-06, "loss": 0.0566, "step": 136440 }, { "epoch": 4.035310818004377, "grad_norm": 0.9252237677574158, "learning_rate": 2.555221644080138e-06, "loss": 0.0722, "step": 136450 }, { "epoch": 4.035606553498551, "grad_norm": 0.819287896156311, "learning_rate": 2.555094954161199e-06, "loss": 0.0599, "step": 136460 }, { "epoch": 4.035902288992725, "grad_norm": 1.0344058275222778, "learning_rate": 2.5549682642422592e-06, "loss": 0.0856, "step": 136470 }, { "epoch": 4.036198024486899, "grad_norm": 0.997689425945282, "learning_rate": 2.55484157432332e-06, "loss": 0.0694, "step": 136480 }, { "epoch": 4.036493759981073, "grad_norm": 1.8996717929840088, "learning_rate": 2.5547148844043804e-06, "loss": 0.0645, "step": 136490 }, { "epoch": 4.036789495475247, "grad_norm": 0.4792247712612152, "learning_rate": 2.554588194485441e-06, "loss": 0.0678, "step": 136500 }, { "epoch": 4.037085230969421, "grad_norm": 1.334993600845337, "learning_rate": 2.554461504566502e-06, "loss": 0.0682, "step": 136510 }, { "epoch": 4.037380966463595, "grad_norm": 1.0459928512573242, "learning_rate": 2.5543348146475627e-06, "loss": 0.0774, "step": 136520 }, { "epoch": 4.037676701957769, "grad_norm": 0.7486528158187866, "learning_rate": 2.554208124728623e-06, "loss": 0.0612, "step": 136530 }, { "epoch": 4.037972437451943, "grad_norm": 1.2678598165512085, "learning_rate": 2.554081434809684e-06, "loss": 0.0914, "step": 136540 }, { "epoch": 4.038268172946117, "grad_norm": 0.9699767231941223, "learning_rate": 2.5539547448907443e-06, "loss": 0.0548, "step": 136550 }, { "epoch": 4.038563908440291, "grad_norm": 1.229335904121399, "learning_rate": 2.553828054971805e-06, "loss": 0.0717, "step": 136560 }, { "epoch": 4.038859643934465, "grad_norm": 0.8482301831245422, "learning_rate": 2.5537013650528654e-06, "loss": 0.0605, "step": 136570 }, { "epoch": 4.039155379428639, "grad_norm": 1.0167553424835205, "learning_rate": 2.5535746751339262e-06, "loss": 0.0819, "step": 136580 }, { "epoch": 4.039451114922813, "grad_norm": 0.6645736694335938, "learning_rate": 2.553447985214987e-06, "loss": 0.0918, "step": 136590 }, { "epoch": 4.039746850416987, "grad_norm": 0.9135688543319702, "learning_rate": 2.553321295296048e-06, "loss": 0.0596, "step": 136600 }, { "epoch": 4.040042585911161, "grad_norm": 1.041513204574585, "learning_rate": 2.553194605377108e-06, "loss": 0.0572, "step": 136610 }, { "epoch": 4.040338321405335, "grad_norm": 1.205647349357605, "learning_rate": 2.553067915458169e-06, "loss": 0.0803, "step": 136620 }, { "epoch": 4.040634056899509, "grad_norm": 0.7909528613090515, "learning_rate": 2.5529412255392293e-06, "loss": 0.0676, "step": 136630 }, { "epoch": 4.040929792393683, "grad_norm": 0.8210251927375793, "learning_rate": 2.55281453562029e-06, "loss": 0.0629, "step": 136640 }, { "epoch": 4.041225527887857, "grad_norm": 0.6557623744010925, "learning_rate": 2.5526878457013505e-06, "loss": 0.0568, "step": 136650 }, { "epoch": 4.041521263382031, "grad_norm": 0.8351110219955444, "learning_rate": 2.552561155782411e-06, "loss": 0.068, "step": 136660 }, { "epoch": 4.041816998876205, "grad_norm": 0.6957122683525085, "learning_rate": 2.552434465863472e-06, "loss": 0.0789, "step": 136670 }, { "epoch": 4.042112734370379, "grad_norm": 1.000478982925415, "learning_rate": 2.5523077759445324e-06, "loss": 0.0655, "step": 136680 }, { "epoch": 4.0424084698645535, "grad_norm": 0.9523271918296814, "learning_rate": 2.552181086025593e-06, "loss": 0.07, "step": 136690 }, { "epoch": 4.042704205358727, "grad_norm": 1.149997591972351, "learning_rate": 2.5520543961066536e-06, "loss": 0.0764, "step": 136700 }, { "epoch": 4.042999940852901, "grad_norm": 0.7579692602157593, "learning_rate": 2.5519277061877144e-06, "loss": 0.066, "step": 136710 }, { "epoch": 4.043295676347075, "grad_norm": 1.0061546564102173, "learning_rate": 2.5518010162687747e-06, "loss": 0.0696, "step": 136720 }, { "epoch": 4.043591411841249, "grad_norm": 0.7174385786056519, "learning_rate": 2.5516743263498355e-06, "loss": 0.0621, "step": 136730 }, { "epoch": 4.043887147335423, "grad_norm": 0.6903408765792847, "learning_rate": 2.551547636430896e-06, "loss": 0.0517, "step": 136740 }, { "epoch": 4.044182882829597, "grad_norm": 0.5374740362167358, "learning_rate": 2.551420946511957e-06, "loss": 0.0705, "step": 136750 }, { "epoch": 4.0444786183237715, "grad_norm": 0.7801252603530884, "learning_rate": 2.5512942565930175e-06, "loss": 0.0497, "step": 136760 }, { "epoch": 4.044774353817945, "grad_norm": 1.019706130027771, "learning_rate": 2.5511675666740782e-06, "loss": 0.0776, "step": 136770 }, { "epoch": 4.045070089312119, "grad_norm": 1.0316798686981201, "learning_rate": 2.5510408767551386e-06, "loss": 0.0695, "step": 136780 }, { "epoch": 4.045365824806293, "grad_norm": 0.9225483536720276, "learning_rate": 2.5509141868361994e-06, "loss": 0.0645, "step": 136790 }, { "epoch": 4.045661560300467, "grad_norm": 1.2567945718765259, "learning_rate": 2.5507874969172598e-06, "loss": 0.0517, "step": 136800 }, { "epoch": 4.045957295794642, "grad_norm": 0.7333462238311768, "learning_rate": 2.5506608069983206e-06, "loss": 0.0633, "step": 136810 }, { "epoch": 4.046253031288815, "grad_norm": 1.4170347452163696, "learning_rate": 2.550534117079381e-06, "loss": 0.0622, "step": 136820 }, { "epoch": 4.04654876678299, "grad_norm": 0.8626725077629089, "learning_rate": 2.550407427160442e-06, "loss": 0.0727, "step": 136830 }, { "epoch": 4.046844502277163, "grad_norm": 0.8729771375656128, "learning_rate": 2.5502807372415025e-06, "loss": 0.0751, "step": 136840 }, { "epoch": 4.0471402377713375, "grad_norm": 1.2943857908248901, "learning_rate": 2.5501540473225633e-06, "loss": 0.0614, "step": 136850 }, { "epoch": 4.047435973265511, "grad_norm": 1.1719121932983398, "learning_rate": 2.5500273574036237e-06, "loss": 0.0553, "step": 136860 }, { "epoch": 4.047731708759685, "grad_norm": 1.6096572875976562, "learning_rate": 2.5499006674846844e-06, "loss": 0.0757, "step": 136870 }, { "epoch": 4.04802744425386, "grad_norm": 0.7410084009170532, "learning_rate": 2.549773977565745e-06, "loss": 0.0715, "step": 136880 }, { "epoch": 4.048323179748033, "grad_norm": 0.8570199012756348, "learning_rate": 2.5496472876468056e-06, "loss": 0.08, "step": 136890 }, { "epoch": 4.048618915242208, "grad_norm": 0.9417272210121155, "learning_rate": 2.549520597727866e-06, "loss": 0.0509, "step": 136900 }, { "epoch": 4.048914650736381, "grad_norm": 1.2103451490402222, "learning_rate": 2.549393907808927e-06, "loss": 0.0741, "step": 136910 }, { "epoch": 4.0492103862305555, "grad_norm": 0.5568766593933105, "learning_rate": 2.5492672178899875e-06, "loss": 0.0803, "step": 136920 }, { "epoch": 4.049506121724729, "grad_norm": 1.112352728843689, "learning_rate": 2.5491405279710483e-06, "loss": 0.0678, "step": 136930 }, { "epoch": 4.049801857218903, "grad_norm": 0.9079926013946533, "learning_rate": 2.5490138380521087e-06, "loss": 0.0648, "step": 136940 }, { "epoch": 4.050097592713078, "grad_norm": 0.659904956817627, "learning_rate": 2.5488871481331695e-06, "loss": 0.0536, "step": 136950 }, { "epoch": 4.050393328207251, "grad_norm": 1.5837794542312622, "learning_rate": 2.54876045821423e-06, "loss": 0.0675, "step": 136960 }, { "epoch": 4.050689063701426, "grad_norm": 1.0367721319198608, "learning_rate": 2.5486337682952906e-06, "loss": 0.0702, "step": 136970 }, { "epoch": 4.050984799195599, "grad_norm": 0.6289398670196533, "learning_rate": 2.548507078376351e-06, "loss": 0.076, "step": 136980 }, { "epoch": 4.051280534689774, "grad_norm": 0.9738603234291077, "learning_rate": 2.5483803884574122e-06, "loss": 0.0734, "step": 136990 }, { "epoch": 4.051576270183947, "grad_norm": 0.6589932441711426, "learning_rate": 2.5482536985384726e-06, "loss": 0.0631, "step": 137000 }, { "epoch": 4.0518720056781214, "grad_norm": 1.165319800376892, "learning_rate": 2.5481270086195334e-06, "loss": 0.0638, "step": 137010 }, { "epoch": 4.052167741172296, "grad_norm": 1.0329971313476562, "learning_rate": 2.5480003187005937e-06, "loss": 0.0898, "step": 137020 }, { "epoch": 4.052463476666469, "grad_norm": 1.0821943283081055, "learning_rate": 2.5478736287816545e-06, "loss": 0.0718, "step": 137030 }, { "epoch": 4.052759212160644, "grad_norm": 0.9170861840248108, "learning_rate": 2.547746938862715e-06, "loss": 0.0862, "step": 137040 }, { "epoch": 4.053054947654817, "grad_norm": 0.7988600134849548, "learning_rate": 2.5476202489437757e-06, "loss": 0.0552, "step": 137050 }, { "epoch": 4.053350683148992, "grad_norm": 1.4524070024490356, "learning_rate": 2.547493559024836e-06, "loss": 0.0636, "step": 137060 }, { "epoch": 4.053646418643165, "grad_norm": 0.8636276125907898, "learning_rate": 2.547366869105897e-06, "loss": 0.0873, "step": 137070 }, { "epoch": 4.0539421541373395, "grad_norm": 0.8340485095977783, "learning_rate": 2.5472401791869576e-06, "loss": 0.0797, "step": 137080 }, { "epoch": 4.054237889631514, "grad_norm": 1.0894460678100586, "learning_rate": 2.547113489268018e-06, "loss": 0.0691, "step": 137090 }, { "epoch": 4.054533625125687, "grad_norm": 1.799857258796692, "learning_rate": 2.546986799349079e-06, "loss": 0.0514, "step": 137100 }, { "epoch": 4.054829360619862, "grad_norm": 1.1099439859390259, "learning_rate": 2.546860109430139e-06, "loss": 0.056, "step": 137110 }, { "epoch": 4.055125096114035, "grad_norm": 0.9410701990127563, "learning_rate": 2.5467334195112e-06, "loss": 0.0693, "step": 137120 }, { "epoch": 4.05542083160821, "grad_norm": 0.8843044638633728, "learning_rate": 2.5466067295922603e-06, "loss": 0.0592, "step": 137130 }, { "epoch": 4.055716567102384, "grad_norm": 1.587022304534912, "learning_rate": 2.546480039673321e-06, "loss": 0.0911, "step": 137140 }, { "epoch": 4.0560123025965575, "grad_norm": 0.7084165811538696, "learning_rate": 2.546353349754382e-06, "loss": 0.0546, "step": 137150 }, { "epoch": 4.056308038090732, "grad_norm": 1.3562427759170532, "learning_rate": 2.5462266598354427e-06, "loss": 0.0713, "step": 137160 }, { "epoch": 4.056603773584905, "grad_norm": 0.6753170490264893, "learning_rate": 2.546099969916503e-06, "loss": 0.0541, "step": 137170 }, { "epoch": 4.05689950907908, "grad_norm": 0.9024345874786377, "learning_rate": 2.545973279997564e-06, "loss": 0.0678, "step": 137180 }, { "epoch": 4.057195244573253, "grad_norm": 0.6428389549255371, "learning_rate": 2.545846590078624e-06, "loss": 0.0664, "step": 137190 }, { "epoch": 4.057490980067428, "grad_norm": 0.7116459608078003, "learning_rate": 2.545719900159685e-06, "loss": 0.0731, "step": 137200 }, { "epoch": 4.057786715561602, "grad_norm": 1.1863064765930176, "learning_rate": 2.5455932102407454e-06, "loss": 0.0626, "step": 137210 }, { "epoch": 4.058082451055776, "grad_norm": 0.7788766622543335, "learning_rate": 2.545466520321806e-06, "loss": 0.062, "step": 137220 }, { "epoch": 4.05837818654995, "grad_norm": 1.1434521675109863, "learning_rate": 2.545339830402867e-06, "loss": 0.0784, "step": 137230 }, { "epoch": 4.0586739220441235, "grad_norm": 0.8017941117286682, "learning_rate": 2.5452131404839277e-06, "loss": 0.0641, "step": 137240 }, { "epoch": 4.058969657538298, "grad_norm": 1.0949000120162964, "learning_rate": 2.545086450564988e-06, "loss": 0.0584, "step": 137250 }, { "epoch": 4.059265393032471, "grad_norm": 0.8753467798233032, "learning_rate": 2.544959760646049e-06, "loss": 0.0673, "step": 137260 }, { "epoch": 4.059561128526646, "grad_norm": 1.5059142112731934, "learning_rate": 2.5448330707271093e-06, "loss": 0.0769, "step": 137270 }, { "epoch": 4.05985686402082, "grad_norm": 1.1495810747146606, "learning_rate": 2.54470638080817e-06, "loss": 0.0718, "step": 137280 }, { "epoch": 4.060152599514994, "grad_norm": 0.8844366073608398, "learning_rate": 2.5445796908892304e-06, "loss": 0.0702, "step": 137290 }, { "epoch": 4.060448335009168, "grad_norm": 0.45752280950546265, "learning_rate": 2.544453000970291e-06, "loss": 0.062, "step": 137300 }, { "epoch": 4.0607440705033415, "grad_norm": 0.6195996403694153, "learning_rate": 2.544326311051352e-06, "loss": 0.0616, "step": 137310 }, { "epoch": 4.061039805997516, "grad_norm": 1.0310032367706299, "learning_rate": 2.5441996211324128e-06, "loss": 0.059, "step": 137320 }, { "epoch": 4.061335541491689, "grad_norm": 0.6250643134117126, "learning_rate": 2.544072931213473e-06, "loss": 0.0751, "step": 137330 }, { "epoch": 4.061631276985864, "grad_norm": 0.2952851951122284, "learning_rate": 2.543946241294534e-06, "loss": 0.0642, "step": 137340 }, { "epoch": 4.061927012480038, "grad_norm": 0.9835371971130371, "learning_rate": 2.5438195513755943e-06, "loss": 0.0559, "step": 137350 }, { "epoch": 4.062222747974212, "grad_norm": 1.5948036909103394, "learning_rate": 2.543692861456655e-06, "loss": 0.0626, "step": 137360 }, { "epoch": 4.062518483468386, "grad_norm": 0.723127543926239, "learning_rate": 2.5435661715377155e-06, "loss": 0.0716, "step": 137370 }, { "epoch": 4.06281421896256, "grad_norm": 1.121026873588562, "learning_rate": 2.5434394816187762e-06, "loss": 0.0639, "step": 137380 }, { "epoch": 4.063109954456734, "grad_norm": 0.7070893049240112, "learning_rate": 2.543312791699837e-06, "loss": 0.0665, "step": 137390 }, { "epoch": 4.063405689950908, "grad_norm": 0.4431189298629761, "learning_rate": 2.543186101780898e-06, "loss": 0.0535, "step": 137400 }, { "epoch": 4.063701425445082, "grad_norm": 1.2364237308502197, "learning_rate": 2.543059411861958e-06, "loss": 0.0669, "step": 137410 }, { "epoch": 4.063997160939256, "grad_norm": 0.7137261629104614, "learning_rate": 2.542932721943019e-06, "loss": 0.0678, "step": 137420 }, { "epoch": 4.06429289643343, "grad_norm": 0.8952222466468811, "learning_rate": 2.5428060320240793e-06, "loss": 0.0655, "step": 137430 }, { "epoch": 4.064588631927604, "grad_norm": 0.8711695671081543, "learning_rate": 2.54267934210514e-06, "loss": 0.07, "step": 137440 }, { "epoch": 4.064884367421778, "grad_norm": 0.8987517356872559, "learning_rate": 2.5425526521862005e-06, "loss": 0.0549, "step": 137450 }, { "epoch": 4.065180102915952, "grad_norm": 0.7722123265266418, "learning_rate": 2.5424259622672613e-06, "loss": 0.0715, "step": 137460 }, { "epoch": 4.065475838410126, "grad_norm": 0.93486487865448, "learning_rate": 2.542299272348322e-06, "loss": 0.0835, "step": 137470 }, { "epoch": 4.0657715739043, "grad_norm": 1.1783264875411987, "learning_rate": 2.5421725824293824e-06, "loss": 0.082, "step": 137480 }, { "epoch": 4.066067309398474, "grad_norm": 1.090914249420166, "learning_rate": 2.5420458925104432e-06, "loss": 0.0731, "step": 137490 }, { "epoch": 4.066363044892648, "grad_norm": 1.3243666887283325, "learning_rate": 2.5419192025915036e-06, "loss": 0.0662, "step": 137500 }, { "epoch": 4.066658780386822, "grad_norm": 0.8757858276367188, "learning_rate": 2.5417925126725644e-06, "loss": 0.0621, "step": 137510 }, { "epoch": 4.066954515880996, "grad_norm": 0.8724662661552429, "learning_rate": 2.5416658227536248e-06, "loss": 0.0826, "step": 137520 }, { "epoch": 4.06725025137517, "grad_norm": 0.8404848575592041, "learning_rate": 2.5415391328346855e-06, "loss": 0.0806, "step": 137530 }, { "epoch": 4.067545986869344, "grad_norm": 0.6021321415901184, "learning_rate": 2.541412442915746e-06, "loss": 0.0625, "step": 137540 }, { "epoch": 4.067841722363518, "grad_norm": 0.9596754908561707, "learning_rate": 2.541285752996807e-06, "loss": 0.0669, "step": 137550 }, { "epoch": 4.068137457857692, "grad_norm": 0.7771168947219849, "learning_rate": 2.5411590630778675e-06, "loss": 0.0693, "step": 137560 }, { "epoch": 4.068433193351866, "grad_norm": 1.0579758882522583, "learning_rate": 2.5410323731589283e-06, "loss": 0.085, "step": 137570 }, { "epoch": 4.06872892884604, "grad_norm": 0.9819709062576294, "learning_rate": 2.5409056832399886e-06, "loss": 0.0613, "step": 137580 }, { "epoch": 4.069024664340214, "grad_norm": 0.5603731870651245, "learning_rate": 2.5407789933210494e-06, "loss": 0.0688, "step": 137590 }, { "epoch": 4.069320399834388, "grad_norm": 0.6134955286979675, "learning_rate": 2.54065230340211e-06, "loss": 0.0661, "step": 137600 }, { "epoch": 4.0696161353285625, "grad_norm": 1.5043514966964722, "learning_rate": 2.5405256134831706e-06, "loss": 0.0739, "step": 137610 }, { "epoch": 4.069911870822736, "grad_norm": 3.682827949523926, "learning_rate": 2.540398923564231e-06, "loss": 0.0817, "step": 137620 }, { "epoch": 4.07020760631691, "grad_norm": 0.9504842758178711, "learning_rate": 2.540272233645292e-06, "loss": 0.0793, "step": 137630 }, { "epoch": 4.070503341811084, "grad_norm": 0.6299290657043457, "learning_rate": 2.5401455437263525e-06, "loss": 0.0666, "step": 137640 }, { "epoch": 4.070799077305258, "grad_norm": 0.6651141047477722, "learning_rate": 2.5400188538074133e-06, "loss": 0.0581, "step": 137650 }, { "epoch": 4.071094812799432, "grad_norm": 0.8578124046325684, "learning_rate": 2.5398921638884737e-06, "loss": 0.0752, "step": 137660 }, { "epoch": 4.071390548293606, "grad_norm": 0.9363489747047424, "learning_rate": 2.5397654739695345e-06, "loss": 0.0958, "step": 137670 }, { "epoch": 4.0716862837877805, "grad_norm": 0.49799543619155884, "learning_rate": 2.539638784050595e-06, "loss": 0.0659, "step": 137680 }, { "epoch": 4.071982019281954, "grad_norm": 0.8487972617149353, "learning_rate": 2.5395120941316556e-06, "loss": 0.0666, "step": 137690 }, { "epoch": 4.072277754776128, "grad_norm": 0.7181388735771179, "learning_rate": 2.539385404212716e-06, "loss": 0.0631, "step": 137700 }, { "epoch": 4.072573490270302, "grad_norm": 0.8888437747955322, "learning_rate": 2.5392587142937772e-06, "loss": 0.0586, "step": 137710 }, { "epoch": 4.072869225764476, "grad_norm": 1.0149109363555908, "learning_rate": 2.5391320243748376e-06, "loss": 0.0737, "step": 137720 }, { "epoch": 4.073164961258651, "grad_norm": 1.7228925228118896, "learning_rate": 2.5390053344558984e-06, "loss": 0.0858, "step": 137730 }, { "epoch": 4.073460696752824, "grad_norm": 1.2621190547943115, "learning_rate": 2.5388786445369587e-06, "loss": 0.067, "step": 137740 }, { "epoch": 4.073756432246999, "grad_norm": 0.4711518883705139, "learning_rate": 2.5387519546180195e-06, "loss": 0.0563, "step": 137750 }, { "epoch": 4.074052167741172, "grad_norm": 1.0916165113449097, "learning_rate": 2.53862526469908e-06, "loss": 0.0652, "step": 137760 }, { "epoch": 4.0743479032353465, "grad_norm": 0.7146779894828796, "learning_rate": 2.5384985747801407e-06, "loss": 0.0661, "step": 137770 }, { "epoch": 4.07464363872952, "grad_norm": 0.4557354748249054, "learning_rate": 2.538371884861201e-06, "loss": 0.0677, "step": 137780 }, { "epoch": 4.074939374223694, "grad_norm": 1.4428482055664062, "learning_rate": 2.5382451949422623e-06, "loss": 0.0734, "step": 137790 }, { "epoch": 4.075235109717869, "grad_norm": 1.4883326292037964, "learning_rate": 2.5381185050233226e-06, "loss": 0.0484, "step": 137800 }, { "epoch": 4.075530845212042, "grad_norm": 0.8559253215789795, "learning_rate": 2.5379918151043834e-06, "loss": 0.063, "step": 137810 }, { "epoch": 4.075826580706217, "grad_norm": 1.713387370109558, "learning_rate": 2.5378651251854438e-06, "loss": 0.0836, "step": 137820 }, { "epoch": 4.07612231620039, "grad_norm": 0.7036530375480652, "learning_rate": 2.5377384352665046e-06, "loss": 0.0646, "step": 137830 }, { "epoch": 4.0764180516945645, "grad_norm": 1.7835747003555298, "learning_rate": 2.537611745347565e-06, "loss": 0.0874, "step": 137840 }, { "epoch": 4.076713787188738, "grad_norm": 1.2890253067016602, "learning_rate": 2.5374850554286257e-06, "loss": 0.055, "step": 137850 }, { "epoch": 4.077009522682912, "grad_norm": 1.0576096773147583, "learning_rate": 2.537358365509686e-06, "loss": 0.0507, "step": 137860 }, { "epoch": 4.077305258177087, "grad_norm": 1.4775549173355103, "learning_rate": 2.5372316755907473e-06, "loss": 0.0803, "step": 137870 }, { "epoch": 4.07760099367126, "grad_norm": 0.6794804930686951, "learning_rate": 2.5371049856718077e-06, "loss": 0.0734, "step": 137880 }, { "epoch": 4.077896729165435, "grad_norm": 0.47373566031455994, "learning_rate": 2.5369782957528685e-06, "loss": 0.0687, "step": 137890 }, { "epoch": 4.078192464659608, "grad_norm": 0.6523892283439636, "learning_rate": 2.536851605833929e-06, "loss": 0.0592, "step": 137900 }, { "epoch": 4.0784882001537826, "grad_norm": 1.04700767993927, "learning_rate": 2.536724915914989e-06, "loss": 0.0694, "step": 137910 }, { "epoch": 4.078783935647956, "grad_norm": 1.0925540924072266, "learning_rate": 2.53659822599605e-06, "loss": 0.0597, "step": 137920 }, { "epoch": 4.07907967114213, "grad_norm": 0.6626037955284119, "learning_rate": 2.5364715360771103e-06, "loss": 0.0758, "step": 137930 }, { "epoch": 4.079375406636305, "grad_norm": 1.4799927473068237, "learning_rate": 2.536344846158171e-06, "loss": 0.0604, "step": 137940 }, { "epoch": 4.079671142130478, "grad_norm": 1.0081311464309692, "learning_rate": 2.536218156239232e-06, "loss": 0.0583, "step": 137950 }, { "epoch": 4.079966877624653, "grad_norm": 0.8361450433731079, "learning_rate": 2.5360914663202927e-06, "loss": 0.0584, "step": 137960 }, { "epoch": 4.080262613118826, "grad_norm": 0.9116685390472412, "learning_rate": 2.535964776401353e-06, "loss": 0.0705, "step": 137970 }, { "epoch": 4.080558348613001, "grad_norm": 0.7419289350509644, "learning_rate": 2.535838086482414e-06, "loss": 0.0745, "step": 137980 }, { "epoch": 4.080854084107175, "grad_norm": 0.6611955165863037, "learning_rate": 2.5357113965634742e-06, "loss": 0.061, "step": 137990 }, { "epoch": 4.0811498196013485, "grad_norm": 0.7414877414703369, "learning_rate": 2.535584706644535e-06, "loss": 0.0585, "step": 138000 }, { "epoch": 4.081445555095523, "grad_norm": 2.2428464889526367, "learning_rate": 2.5354580167255954e-06, "loss": 0.0815, "step": 138010 }, { "epoch": 4.081741290589696, "grad_norm": 1.231346607208252, "learning_rate": 2.535331326806656e-06, "loss": 0.0739, "step": 138020 }, { "epoch": 4.082037026083871, "grad_norm": 1.1224963665008545, "learning_rate": 2.535204636887717e-06, "loss": 0.0709, "step": 138030 }, { "epoch": 4.082332761578044, "grad_norm": 0.7664132118225098, "learning_rate": 2.5350779469687778e-06, "loss": 0.0639, "step": 138040 }, { "epoch": 4.082628497072219, "grad_norm": 0.7761374115943909, "learning_rate": 2.534951257049838e-06, "loss": 0.0512, "step": 138050 }, { "epoch": 4.082924232566393, "grad_norm": 0.8297985196113586, "learning_rate": 2.534824567130899e-06, "loss": 0.07, "step": 138060 }, { "epoch": 4.0832199680605665, "grad_norm": 0.7666140198707581, "learning_rate": 2.5346978772119593e-06, "loss": 0.0656, "step": 138070 }, { "epoch": 4.083515703554741, "grad_norm": 1.1811721324920654, "learning_rate": 2.53457118729302e-06, "loss": 0.0731, "step": 138080 }, { "epoch": 4.083811439048914, "grad_norm": 1.1236947774887085, "learning_rate": 2.5344444973740804e-06, "loss": 0.0761, "step": 138090 }, { "epoch": 4.084107174543089, "grad_norm": 0.7252439856529236, "learning_rate": 2.5343178074551412e-06, "loss": 0.082, "step": 138100 }, { "epoch": 4.084402910037262, "grad_norm": 1.1746487617492676, "learning_rate": 2.534191117536202e-06, "loss": 0.0748, "step": 138110 }, { "epoch": 4.084698645531437, "grad_norm": 0.6294987797737122, "learning_rate": 2.534064427617263e-06, "loss": 0.079, "step": 138120 }, { "epoch": 4.084994381025611, "grad_norm": 0.8219602108001709, "learning_rate": 2.533937737698323e-06, "loss": 0.0634, "step": 138130 }, { "epoch": 4.085290116519785, "grad_norm": 1.2287875413894653, "learning_rate": 2.533811047779384e-06, "loss": 0.0714, "step": 138140 }, { "epoch": 4.085585852013959, "grad_norm": 1.0650209188461304, "learning_rate": 2.5336843578604443e-06, "loss": 0.0522, "step": 138150 }, { "epoch": 4.0858815875081325, "grad_norm": 0.8898959755897522, "learning_rate": 2.533557667941505e-06, "loss": 0.0672, "step": 138160 }, { "epoch": 4.086177323002307, "grad_norm": 0.9354457259178162, "learning_rate": 2.5334309780225655e-06, "loss": 0.0779, "step": 138170 }, { "epoch": 4.08647305849648, "grad_norm": 0.6321344971656799, "learning_rate": 2.5333042881036263e-06, "loss": 0.0766, "step": 138180 }, { "epoch": 4.086768793990655, "grad_norm": 0.8248727321624756, "learning_rate": 2.533177598184687e-06, "loss": 0.0681, "step": 138190 }, { "epoch": 4.087064529484829, "grad_norm": 0.6871725916862488, "learning_rate": 2.533050908265748e-06, "loss": 0.0564, "step": 138200 }, { "epoch": 4.087360264979003, "grad_norm": 0.9965757727622986, "learning_rate": 2.5329242183468082e-06, "loss": 0.0788, "step": 138210 }, { "epoch": 4.087656000473177, "grad_norm": 1.4368164539337158, "learning_rate": 2.532797528427869e-06, "loss": 0.084, "step": 138220 }, { "epoch": 4.0879517359673505, "grad_norm": 0.910284698009491, "learning_rate": 2.5326708385089294e-06, "loss": 0.0699, "step": 138230 }, { "epoch": 4.088247471461525, "grad_norm": 0.6438556909561157, "learning_rate": 2.53254414858999e-06, "loss": 0.0734, "step": 138240 }, { "epoch": 4.088543206955698, "grad_norm": 1.2052959203720093, "learning_rate": 2.5324174586710505e-06, "loss": 0.0618, "step": 138250 }, { "epoch": 4.088838942449873, "grad_norm": 0.9851293563842773, "learning_rate": 2.5322907687521113e-06, "loss": 0.0708, "step": 138260 }, { "epoch": 4.089134677944047, "grad_norm": 0.9948610663414001, "learning_rate": 2.532164078833172e-06, "loss": 0.0764, "step": 138270 }, { "epoch": 4.089430413438221, "grad_norm": 0.7381978034973145, "learning_rate": 2.532037388914233e-06, "loss": 0.0663, "step": 138280 }, { "epoch": 4.089726148932395, "grad_norm": 0.9274241328239441, "learning_rate": 2.5319106989952933e-06, "loss": 0.0742, "step": 138290 }, { "epoch": 4.0900218844265686, "grad_norm": 0.8664215207099915, "learning_rate": 2.531784009076354e-06, "loss": 0.0645, "step": 138300 }, { "epoch": 4.090317619920743, "grad_norm": 0.7880221605300903, "learning_rate": 2.5316573191574144e-06, "loss": 0.0597, "step": 138310 }, { "epoch": 4.090613355414917, "grad_norm": 0.7494462728500366, "learning_rate": 2.5315306292384748e-06, "loss": 0.0879, "step": 138320 }, { "epoch": 4.090909090909091, "grad_norm": 0.7230410575866699, "learning_rate": 2.5314039393195356e-06, "loss": 0.0781, "step": 138330 }, { "epoch": 4.091204826403265, "grad_norm": 0.8301229476928711, "learning_rate": 2.531277249400596e-06, "loss": 0.0657, "step": 138340 }, { "epoch": 4.091500561897439, "grad_norm": 0.6302752494812012, "learning_rate": 2.531150559481657e-06, "loss": 0.0606, "step": 138350 }, { "epoch": 4.091796297391613, "grad_norm": 0.8175095319747925, "learning_rate": 2.5310238695627175e-06, "loss": 0.064, "step": 138360 }, { "epoch": 4.092092032885787, "grad_norm": 1.0985043048858643, "learning_rate": 2.5308971796437783e-06, "loss": 0.0688, "step": 138370 }, { "epoch": 4.092387768379961, "grad_norm": 1.1982502937316895, "learning_rate": 2.5307704897248387e-06, "loss": 0.086, "step": 138380 }, { "epoch": 4.092683503874135, "grad_norm": 0.8378854990005493, "learning_rate": 2.5306437998058995e-06, "loss": 0.0689, "step": 138390 }, { "epoch": 4.092979239368309, "grad_norm": 1.0544506311416626, "learning_rate": 2.53051710988696e-06, "loss": 0.0594, "step": 138400 }, { "epoch": 4.093274974862483, "grad_norm": 1.0816490650177002, "learning_rate": 2.5303904199680206e-06, "loss": 0.0658, "step": 138410 }, { "epoch": 4.093570710356657, "grad_norm": 0.5929749608039856, "learning_rate": 2.530263730049081e-06, "loss": 0.0778, "step": 138420 }, { "epoch": 4.093866445850831, "grad_norm": 0.8086329698562622, "learning_rate": 2.530137040130142e-06, "loss": 0.0596, "step": 138430 }, { "epoch": 4.094162181345005, "grad_norm": 0.9456161260604858, "learning_rate": 2.5300103502112026e-06, "loss": 0.062, "step": 138440 }, { "epoch": 4.094457916839179, "grad_norm": 0.8738975524902344, "learning_rate": 2.5298836602922634e-06, "loss": 0.0561, "step": 138450 }, { "epoch": 4.094753652333353, "grad_norm": 2.4431443214416504, "learning_rate": 2.5297569703733237e-06, "loss": 0.0775, "step": 138460 }, { "epoch": 4.095049387827527, "grad_norm": 0.6831527352333069, "learning_rate": 2.5296302804543845e-06, "loss": 0.0734, "step": 138470 }, { "epoch": 4.095345123321701, "grad_norm": 1.1688461303710938, "learning_rate": 2.529503590535445e-06, "loss": 0.0825, "step": 138480 }, { "epoch": 4.095640858815875, "grad_norm": 1.0352964401245117, "learning_rate": 2.5293769006165057e-06, "loss": 0.0691, "step": 138490 }, { "epoch": 4.095936594310049, "grad_norm": 0.9807374477386475, "learning_rate": 2.529250210697566e-06, "loss": 0.0681, "step": 138500 }, { "epoch": 4.096232329804223, "grad_norm": 1.0846128463745117, "learning_rate": 2.5291235207786272e-06, "loss": 0.0459, "step": 138510 }, { "epoch": 4.096528065298397, "grad_norm": 0.9699088931083679, "learning_rate": 2.5289968308596876e-06, "loss": 0.0887, "step": 138520 }, { "epoch": 4.0968238007925715, "grad_norm": 0.3463415801525116, "learning_rate": 2.5288701409407484e-06, "loss": 0.0718, "step": 138530 }, { "epoch": 4.097119536286745, "grad_norm": 1.551806926727295, "learning_rate": 2.5287434510218088e-06, "loss": 0.0727, "step": 138540 }, { "epoch": 4.097415271780919, "grad_norm": 1.297545075416565, "learning_rate": 2.5286167611028696e-06, "loss": 0.0531, "step": 138550 }, { "epoch": 4.097711007275093, "grad_norm": 1.0579720735549927, "learning_rate": 2.52849007118393e-06, "loss": 0.0608, "step": 138560 }, { "epoch": 4.098006742769267, "grad_norm": 1.0093213319778442, "learning_rate": 2.5283633812649907e-06, "loss": 0.0796, "step": 138570 }, { "epoch": 4.098302478263442, "grad_norm": 0.9311513304710388, "learning_rate": 2.528236691346051e-06, "loss": 0.0782, "step": 138580 }, { "epoch": 4.098598213757615, "grad_norm": 0.6485919952392578, "learning_rate": 2.5281100014271123e-06, "loss": 0.0747, "step": 138590 }, { "epoch": 4.0988939492517895, "grad_norm": 1.2019634246826172, "learning_rate": 2.5279833115081727e-06, "loss": 0.061, "step": 138600 }, { "epoch": 4.099189684745963, "grad_norm": 0.8310196995735168, "learning_rate": 2.5278566215892334e-06, "loss": 0.0686, "step": 138610 }, { "epoch": 4.099485420240137, "grad_norm": 0.7430434823036194, "learning_rate": 2.527729931670294e-06, "loss": 0.0695, "step": 138620 }, { "epoch": 4.099781155734311, "grad_norm": 0.7335312962532043, "learning_rate": 2.5276032417513546e-06, "loss": 0.0709, "step": 138630 }, { "epoch": 4.100076891228485, "grad_norm": 1.2322988510131836, "learning_rate": 2.527476551832415e-06, "loss": 0.0738, "step": 138640 }, { "epoch": 4.10037262672266, "grad_norm": 1.004416584968567, "learning_rate": 2.5273498619134758e-06, "loss": 0.065, "step": 138650 }, { "epoch": 4.100668362216833, "grad_norm": 1.2513892650604248, "learning_rate": 2.527223171994536e-06, "loss": 0.0689, "step": 138660 }, { "epoch": 4.100964097711008, "grad_norm": 0.8617306351661682, "learning_rate": 2.5270964820755973e-06, "loss": 0.0696, "step": 138670 }, { "epoch": 4.101259833205181, "grad_norm": 0.721245288848877, "learning_rate": 2.5269697921566577e-06, "loss": 0.0588, "step": 138680 }, { "epoch": 4.1015555686993554, "grad_norm": 0.42183634638786316, "learning_rate": 2.5268431022377185e-06, "loss": 0.0706, "step": 138690 }, { "epoch": 4.101851304193529, "grad_norm": 0.8216595649719238, "learning_rate": 2.526716412318779e-06, "loss": 0.0416, "step": 138700 }, { "epoch": 4.102147039687703, "grad_norm": 1.0127949714660645, "learning_rate": 2.5265897223998396e-06, "loss": 0.0773, "step": 138710 }, { "epoch": 4.102442775181878, "grad_norm": 1.511623501777649, "learning_rate": 2.5264630324809e-06, "loss": 0.0875, "step": 138720 }, { "epoch": 4.102738510676051, "grad_norm": 0.6197124719619751, "learning_rate": 2.5263363425619604e-06, "loss": 0.0832, "step": 138730 }, { "epoch": 4.103034246170226, "grad_norm": 0.7942157983779907, "learning_rate": 2.526209652643021e-06, "loss": 0.055, "step": 138740 }, { "epoch": 4.103329981664399, "grad_norm": 0.8275613784790039, "learning_rate": 2.526082962724082e-06, "loss": 0.0553, "step": 138750 }, { "epoch": 4.1036257171585735, "grad_norm": 0.5901459455490112, "learning_rate": 2.5259562728051427e-06, "loss": 0.0598, "step": 138760 }, { "epoch": 4.103921452652747, "grad_norm": 0.5955044031143188, "learning_rate": 2.525829582886203e-06, "loss": 0.0729, "step": 138770 }, { "epoch": 4.104217188146921, "grad_norm": 1.0849833488464355, "learning_rate": 2.525702892967264e-06, "loss": 0.0785, "step": 138780 }, { "epoch": 4.104512923641096, "grad_norm": 0.9909160733222961, "learning_rate": 2.5255762030483243e-06, "loss": 0.066, "step": 138790 }, { "epoch": 4.104808659135269, "grad_norm": 1.312649130821228, "learning_rate": 2.525449513129385e-06, "loss": 0.0689, "step": 138800 }, { "epoch": 4.105104394629444, "grad_norm": 0.6776071786880493, "learning_rate": 2.5253228232104454e-06, "loss": 0.0633, "step": 138810 }, { "epoch": 4.105400130123617, "grad_norm": 0.5371289253234863, "learning_rate": 2.525196133291506e-06, "loss": 0.0677, "step": 138820 }, { "epoch": 4.1056958656177915, "grad_norm": 0.8843347430229187, "learning_rate": 2.525069443372567e-06, "loss": 0.0596, "step": 138830 }, { "epoch": 4.105991601111965, "grad_norm": 0.8605115413665771, "learning_rate": 2.524942753453628e-06, "loss": 0.0644, "step": 138840 }, { "epoch": 4.106287336606139, "grad_norm": 0.7184551954269409, "learning_rate": 2.524816063534688e-06, "loss": 0.0576, "step": 138850 }, { "epoch": 4.106583072100314, "grad_norm": 2.5408473014831543, "learning_rate": 2.524689373615749e-06, "loss": 0.0706, "step": 138860 }, { "epoch": 4.106878807594487, "grad_norm": 0.928877592086792, "learning_rate": 2.5245626836968093e-06, "loss": 0.0807, "step": 138870 }, { "epoch": 4.107174543088662, "grad_norm": 0.5341552495956421, "learning_rate": 2.52443599377787e-06, "loss": 0.0688, "step": 138880 }, { "epoch": 4.107470278582835, "grad_norm": 1.198838710784912, "learning_rate": 2.5243093038589305e-06, "loss": 0.0706, "step": 138890 }, { "epoch": 4.10776601407701, "grad_norm": 1.0221880674362183, "learning_rate": 2.5241826139399913e-06, "loss": 0.0533, "step": 138900 }, { "epoch": 4.108061749571183, "grad_norm": 0.982155978679657, "learning_rate": 2.524055924021052e-06, "loss": 0.0629, "step": 138910 }, { "epoch": 4.1083574850653575, "grad_norm": 0.9448078274726868, "learning_rate": 2.523929234102113e-06, "loss": 0.0771, "step": 138920 }, { "epoch": 4.108653220559532, "grad_norm": 1.0428657531738281, "learning_rate": 2.523802544183173e-06, "loss": 0.0827, "step": 138930 }, { "epoch": 4.108948956053705, "grad_norm": 1.3451286554336548, "learning_rate": 2.523675854264234e-06, "loss": 0.0728, "step": 138940 }, { "epoch": 4.10924469154788, "grad_norm": 0.8085564970970154, "learning_rate": 2.5235491643452944e-06, "loss": 0.0582, "step": 138950 }, { "epoch": 4.109540427042053, "grad_norm": 0.7789638042449951, "learning_rate": 2.523422474426355e-06, "loss": 0.0641, "step": 138960 }, { "epoch": 4.109836162536228, "grad_norm": 1.0452044010162354, "learning_rate": 2.5232957845074155e-06, "loss": 0.0985, "step": 138970 }, { "epoch": 4.110131898030402, "grad_norm": 0.6341636776924133, "learning_rate": 2.5231690945884763e-06, "loss": 0.074, "step": 138980 }, { "epoch": 4.1104276335245755, "grad_norm": 0.9507685899734497, "learning_rate": 2.523042404669537e-06, "loss": 0.0684, "step": 138990 }, { "epoch": 4.11072336901875, "grad_norm": 0.8197273015975952, "learning_rate": 2.522915714750598e-06, "loss": 0.0538, "step": 139000 }, { "epoch": 4.111019104512923, "grad_norm": 0.7621573209762573, "learning_rate": 2.5227890248316582e-06, "loss": 0.0576, "step": 139010 }, { "epoch": 4.111314840007098, "grad_norm": 1.369642972946167, "learning_rate": 2.522662334912719e-06, "loss": 0.0625, "step": 139020 }, { "epoch": 4.111610575501271, "grad_norm": 1.5360229015350342, "learning_rate": 2.5225356449937794e-06, "loss": 0.063, "step": 139030 }, { "epoch": 4.111906310995446, "grad_norm": 0.746938169002533, "learning_rate": 2.52240895507484e-06, "loss": 0.0622, "step": 139040 }, { "epoch": 4.11220204648962, "grad_norm": 0.6043264269828796, "learning_rate": 2.5222822651559006e-06, "loss": 0.0716, "step": 139050 }, { "epoch": 4.112497781983794, "grad_norm": 0.6980419158935547, "learning_rate": 2.5221555752369613e-06, "loss": 0.067, "step": 139060 }, { "epoch": 4.112793517477968, "grad_norm": 0.9602250456809998, "learning_rate": 2.522028885318022e-06, "loss": 0.082, "step": 139070 }, { "epoch": 4.1130892529721415, "grad_norm": 1.126561164855957, "learning_rate": 2.521902195399083e-06, "loss": 0.0634, "step": 139080 }, { "epoch": 4.113384988466316, "grad_norm": 1.37200129032135, "learning_rate": 2.5217755054801433e-06, "loss": 0.0645, "step": 139090 }, { "epoch": 4.113680723960489, "grad_norm": 1.119529366493225, "learning_rate": 2.521648815561204e-06, "loss": 0.0579, "step": 139100 }, { "epoch": 4.113976459454664, "grad_norm": 0.9201611280441284, "learning_rate": 2.5215221256422644e-06, "loss": 0.0595, "step": 139110 }, { "epoch": 4.114272194948838, "grad_norm": 1.9823882579803467, "learning_rate": 2.5213954357233252e-06, "loss": 0.0761, "step": 139120 }, { "epoch": 4.114567930443012, "grad_norm": 1.0371769666671753, "learning_rate": 2.5212687458043856e-06, "loss": 0.0764, "step": 139130 }, { "epoch": 4.114863665937186, "grad_norm": 0.7204731106758118, "learning_rate": 2.521142055885446e-06, "loss": 0.0593, "step": 139140 }, { "epoch": 4.1151594014313595, "grad_norm": 1.0544863939285278, "learning_rate": 2.521015365966507e-06, "loss": 0.0653, "step": 139150 }, { "epoch": 4.115455136925534, "grad_norm": 1.0095590353012085, "learning_rate": 2.5208886760475675e-06, "loss": 0.0733, "step": 139160 }, { "epoch": 4.115750872419707, "grad_norm": 0.6320910453796387, "learning_rate": 2.5207619861286283e-06, "loss": 0.0751, "step": 139170 }, { "epoch": 4.116046607913882, "grad_norm": 1.0437242984771729, "learning_rate": 2.5206352962096887e-06, "loss": 0.0692, "step": 139180 }, { "epoch": 4.116342343408056, "grad_norm": 1.0501341819763184, "learning_rate": 2.5205086062907495e-06, "loss": 0.0648, "step": 139190 }, { "epoch": 4.11663807890223, "grad_norm": 0.7261267304420471, "learning_rate": 2.52038191637181e-06, "loss": 0.059, "step": 139200 }, { "epoch": 4.116933814396404, "grad_norm": 1.0071808099746704, "learning_rate": 2.5202552264528706e-06, "loss": 0.0713, "step": 139210 }, { "epoch": 4.1172295498905775, "grad_norm": 1.1385506391525269, "learning_rate": 2.520128536533931e-06, "loss": 0.0705, "step": 139220 }, { "epoch": 4.117525285384752, "grad_norm": 1.0479947328567505, "learning_rate": 2.5200018466149922e-06, "loss": 0.0784, "step": 139230 }, { "epoch": 4.117821020878926, "grad_norm": 0.9867274165153503, "learning_rate": 2.5198751566960526e-06, "loss": 0.0692, "step": 139240 }, { "epoch": 4.1181167563731, "grad_norm": 0.9405547976493835, "learning_rate": 2.5197484667771134e-06, "loss": 0.0642, "step": 139250 }, { "epoch": 4.118412491867274, "grad_norm": 0.6357544660568237, "learning_rate": 2.5196217768581737e-06, "loss": 0.0702, "step": 139260 }, { "epoch": 4.118708227361448, "grad_norm": 0.7740024924278259, "learning_rate": 2.5194950869392345e-06, "loss": 0.0748, "step": 139270 }, { "epoch": 4.119003962855622, "grad_norm": 0.8199096322059631, "learning_rate": 2.519368397020295e-06, "loss": 0.0667, "step": 139280 }, { "epoch": 4.119299698349796, "grad_norm": 1.1937048435211182, "learning_rate": 2.5192417071013557e-06, "loss": 0.058, "step": 139290 }, { "epoch": 4.11959543384397, "grad_norm": 0.7592008709907532, "learning_rate": 2.519115017182416e-06, "loss": 0.0707, "step": 139300 }, { "epoch": 4.119891169338144, "grad_norm": 1.396655559539795, "learning_rate": 2.5189883272634773e-06, "loss": 0.0768, "step": 139310 }, { "epoch": 4.120186904832318, "grad_norm": 0.8440071940422058, "learning_rate": 2.5188616373445376e-06, "loss": 0.0786, "step": 139320 }, { "epoch": 4.120482640326492, "grad_norm": 0.9297073483467102, "learning_rate": 2.5187349474255984e-06, "loss": 0.0719, "step": 139330 }, { "epoch": 4.120778375820666, "grad_norm": 0.9396333694458008, "learning_rate": 2.518608257506659e-06, "loss": 0.0791, "step": 139340 }, { "epoch": 4.12107411131484, "grad_norm": 0.9418594241142273, "learning_rate": 2.5184815675877196e-06, "loss": 0.0607, "step": 139350 }, { "epoch": 4.121369846809014, "grad_norm": 0.8376247882843018, "learning_rate": 2.51835487766878e-06, "loss": 0.0747, "step": 139360 }, { "epoch": 4.121665582303188, "grad_norm": 1.1592060327529907, "learning_rate": 2.5182281877498407e-06, "loss": 0.0753, "step": 139370 }, { "epoch": 4.121961317797362, "grad_norm": 0.6451947093009949, "learning_rate": 2.518101497830901e-06, "loss": 0.0663, "step": 139380 }, { "epoch": 4.122257053291536, "grad_norm": 0.9499331712722778, "learning_rate": 2.5179748079119623e-06, "loss": 0.0636, "step": 139390 }, { "epoch": 4.12255278878571, "grad_norm": 1.2188340425491333, "learning_rate": 2.5178481179930227e-06, "loss": 0.0552, "step": 139400 }, { "epoch": 4.122848524279884, "grad_norm": 1.012707233428955, "learning_rate": 2.5177214280740835e-06, "loss": 0.0815, "step": 139410 }, { "epoch": 4.123144259774058, "grad_norm": 0.6252022385597229, "learning_rate": 2.517594738155144e-06, "loss": 0.0767, "step": 139420 }, { "epoch": 4.123439995268232, "grad_norm": 1.6850829124450684, "learning_rate": 2.5174680482362046e-06, "loss": 0.0754, "step": 139430 }, { "epoch": 4.123735730762406, "grad_norm": 0.6115559935569763, "learning_rate": 2.517341358317265e-06, "loss": 0.0593, "step": 139440 }, { "epoch": 4.1240314662565805, "grad_norm": 0.6645835041999817, "learning_rate": 2.5172146683983258e-06, "loss": 0.0437, "step": 139450 }, { "epoch": 4.124327201750754, "grad_norm": 0.5820252895355225, "learning_rate": 2.517087978479386e-06, "loss": 0.0587, "step": 139460 }, { "epoch": 4.124622937244928, "grad_norm": 0.9167125225067139, "learning_rate": 2.5169612885604474e-06, "loss": 0.0876, "step": 139470 }, { "epoch": 4.124918672739102, "grad_norm": 0.8051365613937378, "learning_rate": 2.5168345986415077e-06, "loss": 0.0731, "step": 139480 }, { "epoch": 4.125214408233276, "grad_norm": 0.5782898664474487, "learning_rate": 2.5167079087225685e-06, "loss": 0.0666, "step": 139490 }, { "epoch": 4.12551014372745, "grad_norm": 0.5671716928482056, "learning_rate": 2.516581218803629e-06, "loss": 0.0654, "step": 139500 }, { "epoch": 4.125805879221624, "grad_norm": 0.7645250558853149, "learning_rate": 2.5164545288846897e-06, "loss": 0.0668, "step": 139510 }, { "epoch": 4.1261016147157985, "grad_norm": 0.809370756149292, "learning_rate": 2.51632783896575e-06, "loss": 0.0682, "step": 139520 }, { "epoch": 4.126397350209972, "grad_norm": 1.4412927627563477, "learning_rate": 2.516201149046811e-06, "loss": 0.0746, "step": 139530 }, { "epoch": 4.126693085704146, "grad_norm": 0.8343147039413452, "learning_rate": 2.516074459127871e-06, "loss": 0.0699, "step": 139540 }, { "epoch": 4.12698882119832, "grad_norm": 0.8133230209350586, "learning_rate": 2.515947769208932e-06, "loss": 0.0643, "step": 139550 }, { "epoch": 4.127284556692494, "grad_norm": 0.9228433966636658, "learning_rate": 2.5158210792899928e-06, "loss": 0.0694, "step": 139560 }, { "epoch": 4.127580292186669, "grad_norm": 0.8165654540061951, "learning_rate": 2.515694389371053e-06, "loss": 0.0827, "step": 139570 }, { "epoch": 4.127876027680842, "grad_norm": 1.4564497470855713, "learning_rate": 2.515567699452114e-06, "loss": 0.0867, "step": 139580 }, { "epoch": 4.1281717631750166, "grad_norm": 1.2135897874832153, "learning_rate": 2.5154410095331743e-06, "loss": 0.0722, "step": 139590 }, { "epoch": 4.12846749866919, "grad_norm": 1.4473884105682373, "learning_rate": 2.515314319614235e-06, "loss": 0.0733, "step": 139600 }, { "epoch": 4.128763234163364, "grad_norm": 1.4703596830368042, "learning_rate": 2.5151876296952955e-06, "loss": 0.051, "step": 139610 }, { "epoch": 4.129058969657538, "grad_norm": 1.225616216659546, "learning_rate": 2.5150609397763562e-06, "loss": 0.0867, "step": 139620 }, { "epoch": 4.129354705151712, "grad_norm": 0.811384379863739, "learning_rate": 2.514934249857417e-06, "loss": 0.0684, "step": 139630 }, { "epoch": 4.129650440645887, "grad_norm": 0.9046415090560913, "learning_rate": 2.514807559938478e-06, "loss": 0.0643, "step": 139640 }, { "epoch": 4.12994617614006, "grad_norm": 0.8099322319030762, "learning_rate": 2.514680870019538e-06, "loss": 0.0548, "step": 139650 }, { "epoch": 4.130241911634235, "grad_norm": 0.9926580786705017, "learning_rate": 2.514554180100599e-06, "loss": 0.07, "step": 139660 }, { "epoch": 4.130537647128408, "grad_norm": 0.56102055311203, "learning_rate": 2.5144274901816593e-06, "loss": 0.0755, "step": 139670 }, { "epoch": 4.1308333826225825, "grad_norm": 0.9590795040130615, "learning_rate": 2.51430080026272e-06, "loss": 0.0677, "step": 139680 }, { "epoch": 4.131129118116756, "grad_norm": 1.1870249509811401, "learning_rate": 2.5141741103437805e-06, "loss": 0.0721, "step": 139690 }, { "epoch": 4.13142485361093, "grad_norm": 0.5562266111373901, "learning_rate": 2.5140474204248413e-06, "loss": 0.0585, "step": 139700 }, { "epoch": 4.131720589105105, "grad_norm": 1.34205961227417, "learning_rate": 2.513920730505902e-06, "loss": 0.0768, "step": 139710 }, { "epoch": 4.132016324599278, "grad_norm": 0.9442832469940186, "learning_rate": 2.513794040586963e-06, "loss": 0.0727, "step": 139720 }, { "epoch": 4.132312060093453, "grad_norm": 1.0683183670043945, "learning_rate": 2.5136673506680232e-06, "loss": 0.0687, "step": 139730 }, { "epoch": 4.132607795587626, "grad_norm": 1.0421043634414673, "learning_rate": 2.513540660749084e-06, "loss": 0.0669, "step": 139740 }, { "epoch": 4.1329035310818005, "grad_norm": 0.6609064936637878, "learning_rate": 2.5134139708301444e-06, "loss": 0.0513, "step": 139750 }, { "epoch": 4.133199266575974, "grad_norm": 1.2180546522140503, "learning_rate": 2.513287280911205e-06, "loss": 0.0723, "step": 139760 }, { "epoch": 4.133495002070148, "grad_norm": 1.1619517803192139, "learning_rate": 2.5131605909922655e-06, "loss": 0.0775, "step": 139770 }, { "epoch": 4.133790737564323, "grad_norm": 0.7486892938613892, "learning_rate": 2.5130339010733263e-06, "loss": 0.0614, "step": 139780 }, { "epoch": 4.134086473058496, "grad_norm": 0.834345281124115, "learning_rate": 2.512907211154387e-06, "loss": 0.0561, "step": 139790 }, { "epoch": 4.134382208552671, "grad_norm": 0.7977767586708069, "learning_rate": 2.512780521235448e-06, "loss": 0.0637, "step": 139800 }, { "epoch": 4.134677944046844, "grad_norm": 1.3862922191619873, "learning_rate": 2.5126538313165083e-06, "loss": 0.0599, "step": 139810 }, { "epoch": 4.134973679541019, "grad_norm": 1.3902318477630615, "learning_rate": 2.512527141397569e-06, "loss": 0.0866, "step": 139820 }, { "epoch": 4.135269415035193, "grad_norm": 0.35724517703056335, "learning_rate": 2.5124004514786294e-06, "loss": 0.0695, "step": 139830 }, { "epoch": 4.1355651505293665, "grad_norm": 0.8726220726966858, "learning_rate": 2.5122737615596902e-06, "loss": 0.0784, "step": 139840 }, { "epoch": 4.135860886023541, "grad_norm": 0.5506006479263306, "learning_rate": 2.5121470716407506e-06, "loss": 0.0603, "step": 139850 }, { "epoch": 4.136156621517714, "grad_norm": 0.9491549730300903, "learning_rate": 2.5120203817218114e-06, "loss": 0.0671, "step": 139860 }, { "epoch": 4.136452357011889, "grad_norm": 1.1615196466445923, "learning_rate": 2.511893691802872e-06, "loss": 0.0666, "step": 139870 }, { "epoch": 4.136748092506062, "grad_norm": 0.8231201171875, "learning_rate": 2.511767001883933e-06, "loss": 0.0718, "step": 139880 }, { "epoch": 4.137043828000237, "grad_norm": 0.7271007299423218, "learning_rate": 2.5116403119649933e-06, "loss": 0.0712, "step": 139890 }, { "epoch": 4.137339563494411, "grad_norm": 0.9600127935409546, "learning_rate": 2.511513622046054e-06, "loss": 0.0662, "step": 139900 }, { "epoch": 4.1376352989885845, "grad_norm": 1.9620405435562134, "learning_rate": 2.5113869321271145e-06, "loss": 0.0735, "step": 139910 }, { "epoch": 4.137931034482759, "grad_norm": 0.987330973148346, "learning_rate": 2.5112602422081753e-06, "loss": 0.0748, "step": 139920 }, { "epoch": 4.138226769976932, "grad_norm": 0.7587239146232605, "learning_rate": 2.5111335522892356e-06, "loss": 0.0671, "step": 139930 }, { "epoch": 4.138522505471107, "grad_norm": 0.5158078074455261, "learning_rate": 2.5110068623702964e-06, "loss": 0.0521, "step": 139940 }, { "epoch": 4.13881824096528, "grad_norm": 1.0371681451797485, "learning_rate": 2.5108801724513572e-06, "loss": 0.06, "step": 139950 }, { "epoch": 4.139113976459455, "grad_norm": 1.0744191408157349, "learning_rate": 2.510753482532418e-06, "loss": 0.0672, "step": 139960 }, { "epoch": 4.139409711953629, "grad_norm": 0.9753370881080627, "learning_rate": 2.5106267926134784e-06, "loss": 0.0731, "step": 139970 }, { "epoch": 4.1397054474478026, "grad_norm": 0.985327422618866, "learning_rate": 2.5105001026945387e-06, "loss": 0.096, "step": 139980 }, { "epoch": 4.140001182941977, "grad_norm": 0.5306456089019775, "learning_rate": 2.5103734127755995e-06, "loss": 0.0677, "step": 139990 }, { "epoch": 4.14029691843615, "grad_norm": 0.7318142056465149, "learning_rate": 2.51024672285666e-06, "loss": 0.0687, "step": 140000 }, { "epoch": 4.140592653930325, "grad_norm": 1.6882988214492798, "learning_rate": 2.5101200329377207e-06, "loss": 0.0671, "step": 140010 }, { "epoch": 4.140888389424498, "grad_norm": 0.5862410068511963, "learning_rate": 2.509993343018781e-06, "loss": 0.0684, "step": 140020 }, { "epoch": 4.141184124918673, "grad_norm": 0.6627609729766846, "learning_rate": 2.5098666530998423e-06, "loss": 0.0605, "step": 140030 }, { "epoch": 4.141479860412847, "grad_norm": 1.2644211053848267, "learning_rate": 2.5097399631809026e-06, "loss": 0.0523, "step": 140040 }, { "epoch": 4.141775595907021, "grad_norm": 0.4825729727745056, "learning_rate": 2.5096132732619634e-06, "loss": 0.0524, "step": 140050 }, { "epoch": 4.142071331401195, "grad_norm": 1.0490833520889282, "learning_rate": 2.5094865833430238e-06, "loss": 0.0663, "step": 140060 }, { "epoch": 4.1423670668953685, "grad_norm": 0.9735134840011597, "learning_rate": 2.5093598934240846e-06, "loss": 0.0686, "step": 140070 }, { "epoch": 4.142662802389543, "grad_norm": 1.2389640808105469, "learning_rate": 2.509233203505145e-06, "loss": 0.0624, "step": 140080 }, { "epoch": 4.142958537883716, "grad_norm": 0.38897866010665894, "learning_rate": 2.5091065135862057e-06, "loss": 0.0679, "step": 140090 }, { "epoch": 4.143254273377891, "grad_norm": 0.6960402131080627, "learning_rate": 2.508979823667266e-06, "loss": 0.0599, "step": 140100 }, { "epoch": 4.143550008872065, "grad_norm": 0.8080918788909912, "learning_rate": 2.5088531337483273e-06, "loss": 0.0521, "step": 140110 }, { "epoch": 4.143845744366239, "grad_norm": 0.8489924669265747, "learning_rate": 2.5087264438293877e-06, "loss": 0.0762, "step": 140120 }, { "epoch": 4.144141479860413, "grad_norm": 1.1443121433258057, "learning_rate": 2.5085997539104485e-06, "loss": 0.0752, "step": 140130 }, { "epoch": 4.1444372153545865, "grad_norm": 0.633622944355011, "learning_rate": 2.508473063991509e-06, "loss": 0.078, "step": 140140 }, { "epoch": 4.144732950848761, "grad_norm": 0.8213728666305542, "learning_rate": 2.5083463740725696e-06, "loss": 0.0626, "step": 140150 }, { "epoch": 4.145028686342935, "grad_norm": 0.5164963603019714, "learning_rate": 2.50821968415363e-06, "loss": 0.0635, "step": 140160 }, { "epoch": 4.145324421837109, "grad_norm": 1.100637435913086, "learning_rate": 2.5080929942346908e-06, "loss": 0.0793, "step": 140170 }, { "epoch": 4.145620157331283, "grad_norm": 1.0830321311950684, "learning_rate": 2.507966304315751e-06, "loss": 0.0706, "step": 140180 }, { "epoch": 4.145915892825457, "grad_norm": 1.0820945501327515, "learning_rate": 2.5078396143968123e-06, "loss": 0.0611, "step": 140190 }, { "epoch": 4.146211628319631, "grad_norm": 0.46061545610427856, "learning_rate": 2.5077129244778727e-06, "loss": 0.0521, "step": 140200 }, { "epoch": 4.146507363813805, "grad_norm": 1.0042731761932373, "learning_rate": 2.5075862345589335e-06, "loss": 0.0628, "step": 140210 }, { "epoch": 4.146803099307979, "grad_norm": 0.6362776160240173, "learning_rate": 2.507459544639994e-06, "loss": 0.0804, "step": 140220 }, { "epoch": 4.147098834802153, "grad_norm": 0.7793088555335999, "learning_rate": 2.5073328547210547e-06, "loss": 0.0774, "step": 140230 }, { "epoch": 4.147394570296327, "grad_norm": 0.9896360635757446, "learning_rate": 2.507206164802115e-06, "loss": 0.062, "step": 140240 }, { "epoch": 4.147690305790501, "grad_norm": 1.0382750034332275, "learning_rate": 2.507079474883176e-06, "loss": 0.0559, "step": 140250 }, { "epoch": 4.147986041284675, "grad_norm": 1.1169179677963257, "learning_rate": 2.506952784964236e-06, "loss": 0.0674, "step": 140260 }, { "epoch": 4.148281776778849, "grad_norm": 0.9630103707313538, "learning_rate": 2.5068260950452974e-06, "loss": 0.0684, "step": 140270 }, { "epoch": 4.148577512273023, "grad_norm": 0.9604938626289368, "learning_rate": 2.5066994051263578e-06, "loss": 0.0878, "step": 140280 }, { "epoch": 4.148873247767197, "grad_norm": 0.8345896601676941, "learning_rate": 2.5065727152074185e-06, "loss": 0.0581, "step": 140290 }, { "epoch": 4.149168983261371, "grad_norm": 0.33564522862434387, "learning_rate": 2.506446025288479e-06, "loss": 0.0561, "step": 140300 }, { "epoch": 4.149464718755545, "grad_norm": 1.6414437294006348, "learning_rate": 2.5063193353695397e-06, "loss": 0.0678, "step": 140310 }, { "epoch": 4.149760454249719, "grad_norm": 1.1056838035583496, "learning_rate": 2.5061926454506e-06, "loss": 0.0734, "step": 140320 }, { "epoch": 4.150056189743893, "grad_norm": 1.081092357635498, "learning_rate": 2.506065955531661e-06, "loss": 0.0729, "step": 140330 }, { "epoch": 4.150351925238067, "grad_norm": 1.0464857816696167, "learning_rate": 2.5059392656127212e-06, "loss": 0.0688, "step": 140340 }, { "epoch": 4.150647660732241, "grad_norm": 0.766893208026886, "learning_rate": 2.5058125756937824e-06, "loss": 0.0665, "step": 140350 }, { "epoch": 4.150943396226415, "grad_norm": 0.8147795796394348, "learning_rate": 2.505685885774843e-06, "loss": 0.0664, "step": 140360 }, { "epoch": 4.1512391317205894, "grad_norm": 0.4988706707954407, "learning_rate": 2.5055591958559036e-06, "loss": 0.0686, "step": 140370 }, { "epoch": 4.151534867214763, "grad_norm": 0.7260753512382507, "learning_rate": 2.505432505936964e-06, "loss": 0.0683, "step": 140380 }, { "epoch": 4.151830602708937, "grad_norm": 1.1088194847106934, "learning_rate": 2.5053058160180243e-06, "loss": 0.0758, "step": 140390 }, { "epoch": 4.152126338203111, "grad_norm": 1.3168222904205322, "learning_rate": 2.505179126099085e-06, "loss": 0.0613, "step": 140400 }, { "epoch": 4.152422073697285, "grad_norm": 1.474531650543213, "learning_rate": 2.5050524361801455e-06, "loss": 0.0672, "step": 140410 }, { "epoch": 4.15271780919146, "grad_norm": 1.0535465478897095, "learning_rate": 2.5049257462612063e-06, "loss": 0.0664, "step": 140420 }, { "epoch": 4.153013544685633, "grad_norm": 0.8122789859771729, "learning_rate": 2.504799056342267e-06, "loss": 0.0682, "step": 140430 }, { "epoch": 4.1533092801798075, "grad_norm": 0.8304623961448669, "learning_rate": 2.504672366423328e-06, "loss": 0.0702, "step": 140440 }, { "epoch": 4.153605015673981, "grad_norm": 1.1382406949996948, "learning_rate": 2.5045456765043882e-06, "loss": 0.0653, "step": 140450 }, { "epoch": 4.153900751168155, "grad_norm": 1.0590147972106934, "learning_rate": 2.504418986585449e-06, "loss": 0.0698, "step": 140460 }, { "epoch": 4.154196486662329, "grad_norm": 0.8844878077507019, "learning_rate": 2.5042922966665094e-06, "loss": 0.0769, "step": 140470 }, { "epoch": 4.154492222156503, "grad_norm": 1.2598989009857178, "learning_rate": 2.50416560674757e-06, "loss": 0.0663, "step": 140480 }, { "epoch": 4.154787957650678, "grad_norm": 0.998442530632019, "learning_rate": 2.5040389168286305e-06, "loss": 0.0655, "step": 140490 }, { "epoch": 4.155083693144851, "grad_norm": 0.7314008474349976, "learning_rate": 2.5039122269096913e-06, "loss": 0.0617, "step": 140500 }, { "epoch": 4.1553794286390255, "grad_norm": 0.5634648203849792, "learning_rate": 2.503785536990752e-06, "loss": 0.0585, "step": 140510 }, { "epoch": 4.155675164133199, "grad_norm": 0.739224910736084, "learning_rate": 2.503658847071813e-06, "loss": 0.0674, "step": 140520 }, { "epoch": 4.155970899627373, "grad_norm": 0.49438732862472534, "learning_rate": 2.5035321571528733e-06, "loss": 0.0683, "step": 140530 }, { "epoch": 4.156266635121547, "grad_norm": 0.9534237384796143, "learning_rate": 2.503405467233934e-06, "loss": 0.063, "step": 140540 }, { "epoch": 4.156562370615721, "grad_norm": 0.7542206048965454, "learning_rate": 2.5032787773149944e-06, "loss": 0.0678, "step": 140550 }, { "epoch": 4.156858106109896, "grad_norm": 0.9218693375587463, "learning_rate": 2.503152087396055e-06, "loss": 0.0583, "step": 140560 }, { "epoch": 4.157153841604069, "grad_norm": 0.5893589854240417, "learning_rate": 2.5030253974771156e-06, "loss": 0.0701, "step": 140570 }, { "epoch": 4.157449577098244, "grad_norm": 0.9815834164619446, "learning_rate": 2.5028987075581764e-06, "loss": 0.0752, "step": 140580 }, { "epoch": 4.157745312592417, "grad_norm": 1.534237265586853, "learning_rate": 2.502772017639237e-06, "loss": 0.064, "step": 140590 }, { "epoch": 4.1580410480865915, "grad_norm": 1.0890986919403076, "learning_rate": 2.502645327720298e-06, "loss": 0.0548, "step": 140600 }, { "epoch": 4.158336783580765, "grad_norm": 1.0192347764968872, "learning_rate": 2.5025186378013583e-06, "loss": 0.0545, "step": 140610 }, { "epoch": 4.158632519074939, "grad_norm": 0.761467456817627, "learning_rate": 2.502391947882419e-06, "loss": 0.0716, "step": 140620 }, { "epoch": 4.158928254569114, "grad_norm": 0.3521746098995209, "learning_rate": 2.5022652579634795e-06, "loss": 0.0772, "step": 140630 }, { "epoch": 4.159223990063287, "grad_norm": 1.3214013576507568, "learning_rate": 2.5021385680445403e-06, "loss": 0.0716, "step": 140640 }, { "epoch": 4.159519725557462, "grad_norm": 0.7395907640457153, "learning_rate": 2.5020118781256006e-06, "loss": 0.0731, "step": 140650 }, { "epoch": 4.159815461051635, "grad_norm": 0.8644816875457764, "learning_rate": 2.5018851882066614e-06, "loss": 0.0781, "step": 140660 }, { "epoch": 4.1601111965458095, "grad_norm": 0.6761685013771057, "learning_rate": 2.501758498287722e-06, "loss": 0.086, "step": 140670 }, { "epoch": 4.160406932039983, "grad_norm": 1.7160344123840332, "learning_rate": 2.501631808368783e-06, "loss": 0.0683, "step": 140680 }, { "epoch": 4.160702667534157, "grad_norm": 0.8721678256988525, "learning_rate": 2.5015051184498434e-06, "loss": 0.0648, "step": 140690 }, { "epoch": 4.160998403028332, "grad_norm": 0.9702358245849609, "learning_rate": 2.501378428530904e-06, "loss": 0.0579, "step": 140700 }, { "epoch": 4.161294138522505, "grad_norm": 0.684283971786499, "learning_rate": 2.5012517386119645e-06, "loss": 0.0523, "step": 140710 }, { "epoch": 4.16158987401668, "grad_norm": 1.0400142669677734, "learning_rate": 2.5011250486930253e-06, "loss": 0.0788, "step": 140720 }, { "epoch": 4.161885609510853, "grad_norm": 1.0178219079971313, "learning_rate": 2.5009983587740857e-06, "loss": 0.0748, "step": 140730 }, { "epoch": 4.162181345005028, "grad_norm": 1.2489039897918701, "learning_rate": 2.5008716688551465e-06, "loss": 0.0744, "step": 140740 }, { "epoch": 4.162477080499201, "grad_norm": 1.008867859840393, "learning_rate": 2.5007449789362072e-06, "loss": 0.0507, "step": 140750 }, { "epoch": 4.1627728159933755, "grad_norm": 1.2266613245010376, "learning_rate": 2.500618289017268e-06, "loss": 0.067, "step": 140760 }, { "epoch": 4.16306855148755, "grad_norm": 0.8224027752876282, "learning_rate": 2.5004915990983284e-06, "loss": 0.078, "step": 140770 }, { "epoch": 4.163364286981723, "grad_norm": 0.9389595985412598, "learning_rate": 2.500364909179389e-06, "loss": 0.072, "step": 140780 }, { "epoch": 4.163660022475898, "grad_norm": 1.5722002983093262, "learning_rate": 2.5002382192604496e-06, "loss": 0.0739, "step": 140790 }, { "epoch": 4.163955757970071, "grad_norm": 2.00057315826416, "learning_rate": 2.50011152934151e-06, "loss": 0.0642, "step": 140800 }, { "epoch": 4.164251493464246, "grad_norm": 0.7764270901679993, "learning_rate": 2.4999848394225707e-06, "loss": 0.0634, "step": 140810 }, { "epoch": 4.16454722895842, "grad_norm": 1.0067346096038818, "learning_rate": 2.499858149503631e-06, "loss": 0.0785, "step": 140820 }, { "epoch": 4.1648429644525935, "grad_norm": 0.8658057451248169, "learning_rate": 2.4997314595846923e-06, "loss": 0.0768, "step": 140830 }, { "epoch": 4.165138699946768, "grad_norm": 0.8712626099586487, "learning_rate": 2.4996047696657527e-06, "loss": 0.0568, "step": 140840 }, { "epoch": 4.165434435440941, "grad_norm": 0.7390337586402893, "learning_rate": 2.4994780797468134e-06, "loss": 0.0603, "step": 140850 }, { "epoch": 4.165730170935116, "grad_norm": 1.0002094507217407, "learning_rate": 2.499351389827874e-06, "loss": 0.0685, "step": 140860 }, { "epoch": 4.166025906429289, "grad_norm": 0.7973560094833374, "learning_rate": 2.4992246999089346e-06, "loss": 0.0783, "step": 140870 }, { "epoch": 4.166321641923464, "grad_norm": 0.9555928707122803, "learning_rate": 2.499098009989995e-06, "loss": 0.0558, "step": 140880 }, { "epoch": 4.166617377417638, "grad_norm": 0.7763690948486328, "learning_rate": 2.4989713200710558e-06, "loss": 0.0572, "step": 140890 }, { "epoch": 4.1669131129118115, "grad_norm": 1.463153600692749, "learning_rate": 2.498844630152116e-06, "loss": 0.0643, "step": 140900 }, { "epoch": 4.167208848405986, "grad_norm": 0.8488304615020752, "learning_rate": 2.4987179402331773e-06, "loss": 0.0645, "step": 140910 }, { "epoch": 4.167504583900159, "grad_norm": 0.5060309171676636, "learning_rate": 2.4985912503142377e-06, "loss": 0.0826, "step": 140920 }, { "epoch": 4.167800319394334, "grad_norm": 0.8669180274009705, "learning_rate": 2.4984645603952985e-06, "loss": 0.066, "step": 140930 }, { "epoch": 4.168096054888507, "grad_norm": 0.8607916235923767, "learning_rate": 2.498337870476359e-06, "loss": 0.077, "step": 140940 }, { "epoch": 4.168391790382682, "grad_norm": 1.1929906606674194, "learning_rate": 2.4982111805574196e-06, "loss": 0.0687, "step": 140950 }, { "epoch": 4.168687525876856, "grad_norm": 0.983194887638092, "learning_rate": 2.49808449063848e-06, "loss": 0.0695, "step": 140960 }, { "epoch": 4.16898326137103, "grad_norm": 0.8633143901824951, "learning_rate": 2.497957800719541e-06, "loss": 0.0623, "step": 140970 }, { "epoch": 4.169278996865204, "grad_norm": 0.6460456252098083, "learning_rate": 2.497831110800601e-06, "loss": 0.0647, "step": 140980 }, { "epoch": 4.1695747323593775, "grad_norm": 0.7547476291656494, "learning_rate": 2.4977044208816624e-06, "loss": 0.0793, "step": 140990 }, { "epoch": 4.169870467853552, "grad_norm": 0.7803981900215149, "learning_rate": 2.4975777309627227e-06, "loss": 0.0447, "step": 141000 }, { "epoch": 4.170166203347725, "grad_norm": 0.6471205353736877, "learning_rate": 2.4974510410437835e-06, "loss": 0.0692, "step": 141010 }, { "epoch": 4.1704619388419, "grad_norm": 0.5842392444610596, "learning_rate": 2.497324351124844e-06, "loss": 0.0979, "step": 141020 }, { "epoch": 4.170757674336074, "grad_norm": 0.6364553570747375, "learning_rate": 2.4971976612059047e-06, "loss": 0.0698, "step": 141030 }, { "epoch": 4.171053409830248, "grad_norm": 0.5397197604179382, "learning_rate": 2.497070971286965e-06, "loss": 0.0819, "step": 141040 }, { "epoch": 4.171349145324422, "grad_norm": 1.4697221517562866, "learning_rate": 2.496944281368026e-06, "loss": 0.0653, "step": 141050 }, { "epoch": 4.1716448808185955, "grad_norm": 1.3683327436447144, "learning_rate": 2.496817591449086e-06, "loss": 0.0636, "step": 141060 }, { "epoch": 4.17194061631277, "grad_norm": 0.6882673501968384, "learning_rate": 2.4966909015301474e-06, "loss": 0.0726, "step": 141070 }, { "epoch": 4.172236351806944, "grad_norm": 0.647419810295105, "learning_rate": 2.496564211611208e-06, "loss": 0.0626, "step": 141080 }, { "epoch": 4.172532087301118, "grad_norm": 1.7467732429504395, "learning_rate": 2.4964375216922686e-06, "loss": 0.0804, "step": 141090 }, { "epoch": 4.172827822795292, "grad_norm": 0.91507887840271, "learning_rate": 2.496310831773329e-06, "loss": 0.0652, "step": 141100 }, { "epoch": 4.173123558289466, "grad_norm": 1.6001330614089966, "learning_rate": 2.4961841418543897e-06, "loss": 0.0581, "step": 141110 }, { "epoch": 4.17341929378364, "grad_norm": 0.8023799657821655, "learning_rate": 2.49605745193545e-06, "loss": 0.0656, "step": 141120 }, { "epoch": 4.173715029277814, "grad_norm": 1.563245177268982, "learning_rate": 2.495930762016511e-06, "loss": 0.0741, "step": 141130 }, { "epoch": 4.174010764771988, "grad_norm": 0.6177402138710022, "learning_rate": 2.4958040720975713e-06, "loss": 0.0737, "step": 141140 }, { "epoch": 4.174306500266162, "grad_norm": 1.2032263278961182, "learning_rate": 2.4956773821786325e-06, "loss": 0.0738, "step": 141150 }, { "epoch": 4.174602235760336, "grad_norm": 1.12896728515625, "learning_rate": 2.495550692259693e-06, "loss": 0.077, "step": 141160 }, { "epoch": 4.17489797125451, "grad_norm": 0.5313147902488708, "learning_rate": 2.4954240023407536e-06, "loss": 0.0604, "step": 141170 }, { "epoch": 4.175193706748684, "grad_norm": 0.5837768316268921, "learning_rate": 2.495297312421814e-06, "loss": 0.0829, "step": 141180 }, { "epoch": 4.175489442242858, "grad_norm": 0.7352539896965027, "learning_rate": 2.4951706225028748e-06, "loss": 0.0694, "step": 141190 }, { "epoch": 4.175785177737032, "grad_norm": 0.9388347864151001, "learning_rate": 2.495043932583935e-06, "loss": 0.0546, "step": 141200 }, { "epoch": 4.176080913231206, "grad_norm": 1.0880824327468872, "learning_rate": 2.4949172426649955e-06, "loss": 0.0737, "step": 141210 }, { "epoch": 4.17637664872538, "grad_norm": 1.1877907514572144, "learning_rate": 2.4947905527460563e-06, "loss": 0.0861, "step": 141220 }, { "epoch": 4.176672384219554, "grad_norm": 0.909975528717041, "learning_rate": 2.494663862827117e-06, "loss": 0.0617, "step": 141230 }, { "epoch": 4.176968119713728, "grad_norm": 0.905031144618988, "learning_rate": 2.494537172908178e-06, "loss": 0.0713, "step": 141240 }, { "epoch": 4.177263855207902, "grad_norm": 0.9094129800796509, "learning_rate": 2.4944104829892382e-06, "loss": 0.0619, "step": 141250 }, { "epoch": 4.177559590702076, "grad_norm": 0.9719632863998413, "learning_rate": 2.494283793070299e-06, "loss": 0.0678, "step": 141260 }, { "epoch": 4.17785532619625, "grad_norm": 1.224213719367981, "learning_rate": 2.4941571031513594e-06, "loss": 0.0833, "step": 141270 }, { "epoch": 4.178151061690424, "grad_norm": 0.9790079593658447, "learning_rate": 2.49403041323242e-06, "loss": 0.0699, "step": 141280 }, { "epoch": 4.178446797184598, "grad_norm": 0.8278059363365173, "learning_rate": 2.4939037233134806e-06, "loss": 0.0681, "step": 141290 }, { "epoch": 4.178742532678772, "grad_norm": 1.7738306522369385, "learning_rate": 2.4937770333945413e-06, "loss": 0.0704, "step": 141300 }, { "epoch": 4.179038268172946, "grad_norm": 0.7035521268844604, "learning_rate": 2.493650343475602e-06, "loss": 0.0582, "step": 141310 }, { "epoch": 4.17933400366712, "grad_norm": 0.6873041391372681, "learning_rate": 2.493523653556663e-06, "loss": 0.0615, "step": 141320 }, { "epoch": 4.179629739161294, "grad_norm": 0.8174028992652893, "learning_rate": 2.4933969636377233e-06, "loss": 0.0786, "step": 141330 }, { "epoch": 4.179925474655468, "grad_norm": 0.7907552719116211, "learning_rate": 2.493270273718784e-06, "loss": 0.0666, "step": 141340 }, { "epoch": 4.180221210149642, "grad_norm": 1.5193530321121216, "learning_rate": 2.4931435837998444e-06, "loss": 0.0496, "step": 141350 }, { "epoch": 4.1805169456438165, "grad_norm": 1.1346466541290283, "learning_rate": 2.4930168938809052e-06, "loss": 0.0765, "step": 141360 }, { "epoch": 4.18081268113799, "grad_norm": 0.9549484252929688, "learning_rate": 2.4928902039619656e-06, "loss": 0.0706, "step": 141370 }, { "epoch": 4.181108416632164, "grad_norm": 0.679643452167511, "learning_rate": 2.4927635140430264e-06, "loss": 0.0764, "step": 141380 }, { "epoch": 4.181404152126338, "grad_norm": 0.6586211323738098, "learning_rate": 2.492636824124087e-06, "loss": 0.0662, "step": 141390 }, { "epoch": 4.181699887620512, "grad_norm": 1.3102575540542603, "learning_rate": 2.492510134205148e-06, "loss": 0.0615, "step": 141400 }, { "epoch": 4.181995623114687, "grad_norm": 1.2490345239639282, "learning_rate": 2.4923834442862083e-06, "loss": 0.06, "step": 141410 }, { "epoch": 4.18229135860886, "grad_norm": 1.3388824462890625, "learning_rate": 2.492256754367269e-06, "loss": 0.0548, "step": 141420 }, { "epoch": 4.1825870941030345, "grad_norm": 0.7429509162902832, "learning_rate": 2.4921300644483295e-06, "loss": 0.0735, "step": 141430 }, { "epoch": 4.182882829597208, "grad_norm": 1.7317728996276855, "learning_rate": 2.4920033745293903e-06, "loss": 0.0727, "step": 141440 }, { "epoch": 4.183178565091382, "grad_norm": 1.2557240724563599, "learning_rate": 2.4918766846104506e-06, "loss": 0.0712, "step": 141450 }, { "epoch": 4.183474300585556, "grad_norm": 1.0809910297393799, "learning_rate": 2.4917499946915114e-06, "loss": 0.0898, "step": 141460 }, { "epoch": 4.18377003607973, "grad_norm": 0.8051312565803528, "learning_rate": 2.4916233047725722e-06, "loss": 0.063, "step": 141470 }, { "epoch": 4.184065771573905, "grad_norm": 1.0555064678192139, "learning_rate": 2.491496614853633e-06, "loss": 0.0604, "step": 141480 }, { "epoch": 4.184361507068078, "grad_norm": 0.6856648325920105, "learning_rate": 2.4913699249346934e-06, "loss": 0.0686, "step": 141490 }, { "epoch": 4.184657242562253, "grad_norm": 0.6259728670120239, "learning_rate": 2.491243235015754e-06, "loss": 0.0698, "step": 141500 }, { "epoch": 4.184952978056426, "grad_norm": 0.8790740966796875, "learning_rate": 2.4911165450968145e-06, "loss": 0.0666, "step": 141510 }, { "epoch": 4.1852487135506005, "grad_norm": 0.9487437605857849, "learning_rate": 2.4909898551778753e-06, "loss": 0.0666, "step": 141520 }, { "epoch": 4.185544449044774, "grad_norm": 1.1906501054763794, "learning_rate": 2.4908631652589357e-06, "loss": 0.0887, "step": 141530 }, { "epoch": 4.185840184538948, "grad_norm": 0.9645311832427979, "learning_rate": 2.4907364753399965e-06, "loss": 0.0714, "step": 141540 }, { "epoch": 4.186135920033123, "grad_norm": 0.7492360472679138, "learning_rate": 2.4906097854210573e-06, "loss": 0.064, "step": 141550 }, { "epoch": 4.186431655527296, "grad_norm": 1.849259614944458, "learning_rate": 2.490483095502118e-06, "loss": 0.0894, "step": 141560 }, { "epoch": 4.186727391021471, "grad_norm": 0.7588711380958557, "learning_rate": 2.4903564055831784e-06, "loss": 0.0876, "step": 141570 }, { "epoch": 4.187023126515644, "grad_norm": 0.8948582410812378, "learning_rate": 2.4902297156642392e-06, "loss": 0.0888, "step": 141580 }, { "epoch": 4.1873188620098185, "grad_norm": 0.5144438743591309, "learning_rate": 2.4901030257452996e-06, "loss": 0.0706, "step": 141590 }, { "epoch": 4.187614597503992, "grad_norm": 1.135354995727539, "learning_rate": 2.4899763358263604e-06, "loss": 0.0666, "step": 141600 }, { "epoch": 4.187910332998166, "grad_norm": 0.9149249196052551, "learning_rate": 2.4898496459074207e-06, "loss": 0.061, "step": 141610 }, { "epoch": 4.188206068492341, "grad_norm": 0.8568065762519836, "learning_rate": 2.489722955988481e-06, "loss": 0.0774, "step": 141620 }, { "epoch": 4.188501803986514, "grad_norm": 0.8752992749214172, "learning_rate": 2.4895962660695423e-06, "loss": 0.0675, "step": 141630 }, { "epoch": 4.188797539480689, "grad_norm": 0.45774027705192566, "learning_rate": 2.4894695761506027e-06, "loss": 0.0693, "step": 141640 }, { "epoch": 4.189093274974862, "grad_norm": 1.022610068321228, "learning_rate": 2.4893428862316635e-06, "loss": 0.0601, "step": 141650 }, { "epoch": 4.189389010469037, "grad_norm": 0.9537646174430847, "learning_rate": 2.489216196312724e-06, "loss": 0.0628, "step": 141660 }, { "epoch": 4.189684745963211, "grad_norm": 0.8014106750488281, "learning_rate": 2.4890895063937846e-06, "loss": 0.0688, "step": 141670 }, { "epoch": 4.189980481457384, "grad_norm": 0.7844513654708862, "learning_rate": 2.488962816474845e-06, "loss": 0.0896, "step": 141680 }, { "epoch": 4.190276216951559, "grad_norm": 0.9106543660163879, "learning_rate": 2.4888361265559058e-06, "loss": 0.0695, "step": 141690 }, { "epoch": 4.190571952445732, "grad_norm": 1.6787375211715698, "learning_rate": 2.488709436636966e-06, "loss": 0.059, "step": 141700 }, { "epoch": 4.190867687939907, "grad_norm": 0.9762713313102722, "learning_rate": 2.4885827467180274e-06, "loss": 0.0542, "step": 141710 }, { "epoch": 4.19116342343408, "grad_norm": 1.1656495332717896, "learning_rate": 2.4884560567990877e-06, "loss": 0.0831, "step": 141720 }, { "epoch": 4.191459158928255, "grad_norm": 0.8032123446464539, "learning_rate": 2.4883293668801485e-06, "loss": 0.0667, "step": 141730 }, { "epoch": 4.191754894422429, "grad_norm": 1.360492467880249, "learning_rate": 2.488202676961209e-06, "loss": 0.0842, "step": 141740 }, { "epoch": 4.1920506299166025, "grad_norm": 1.2552565336227417, "learning_rate": 2.4880759870422697e-06, "loss": 0.0789, "step": 141750 }, { "epoch": 4.192346365410777, "grad_norm": 0.9520595669746399, "learning_rate": 2.48794929712333e-06, "loss": 0.0729, "step": 141760 }, { "epoch": 4.19264210090495, "grad_norm": 1.1193760633468628, "learning_rate": 2.487822607204391e-06, "loss": 0.0739, "step": 141770 }, { "epoch": 4.192937836399125, "grad_norm": 0.9961056709289551, "learning_rate": 2.487695917285451e-06, "loss": 0.0668, "step": 141780 }, { "epoch": 4.193233571893298, "grad_norm": 1.950250267982483, "learning_rate": 2.4875692273665124e-06, "loss": 0.0795, "step": 141790 }, { "epoch": 4.193529307387473, "grad_norm": 0.4874626696109772, "learning_rate": 2.4874425374475728e-06, "loss": 0.0624, "step": 141800 }, { "epoch": 4.193825042881647, "grad_norm": 0.8765453100204468, "learning_rate": 2.4873158475286336e-06, "loss": 0.0698, "step": 141810 }, { "epoch": 4.1941207783758205, "grad_norm": 0.5755470395088196, "learning_rate": 2.487189157609694e-06, "loss": 0.0649, "step": 141820 }, { "epoch": 4.194416513869995, "grad_norm": 1.052215337753296, "learning_rate": 2.4870624676907547e-06, "loss": 0.0682, "step": 141830 }, { "epoch": 4.194712249364168, "grad_norm": 0.7416785359382629, "learning_rate": 2.486935777771815e-06, "loss": 0.0719, "step": 141840 }, { "epoch": 4.195007984858343, "grad_norm": 1.0250307321548462, "learning_rate": 2.486809087852876e-06, "loss": 0.0574, "step": 141850 }, { "epoch": 4.195303720352516, "grad_norm": 0.772357702255249, "learning_rate": 2.4866823979339362e-06, "loss": 0.0653, "step": 141860 }, { "epoch": 4.195599455846691, "grad_norm": 1.7723381519317627, "learning_rate": 2.4865557080149975e-06, "loss": 0.0846, "step": 141870 }, { "epoch": 4.195895191340865, "grad_norm": 0.9032430648803711, "learning_rate": 2.486429018096058e-06, "loss": 0.0779, "step": 141880 }, { "epoch": 4.196190926835039, "grad_norm": 0.615910530090332, "learning_rate": 2.4863023281771186e-06, "loss": 0.0711, "step": 141890 }, { "epoch": 4.196486662329213, "grad_norm": 0.3328149616718292, "learning_rate": 2.486175638258179e-06, "loss": 0.064, "step": 141900 }, { "epoch": 4.1967823978233865, "grad_norm": 1.0131796598434448, "learning_rate": 2.4860489483392398e-06, "loss": 0.0538, "step": 141910 }, { "epoch": 4.197078133317561, "grad_norm": 0.9157229661941528, "learning_rate": 2.4859222584203e-06, "loss": 0.07, "step": 141920 }, { "epoch": 4.197373868811734, "grad_norm": 0.7625077962875366, "learning_rate": 2.485795568501361e-06, "loss": 0.0655, "step": 141930 }, { "epoch": 4.197669604305909, "grad_norm": 0.9653738141059875, "learning_rate": 2.4856688785824213e-06, "loss": 0.0793, "step": 141940 }, { "epoch": 4.197965339800083, "grad_norm": 0.5798012614250183, "learning_rate": 2.4855421886634825e-06, "loss": 0.0577, "step": 141950 }, { "epoch": 4.198261075294257, "grad_norm": 0.8326302766799927, "learning_rate": 2.485415498744543e-06, "loss": 0.0612, "step": 141960 }, { "epoch": 4.198556810788431, "grad_norm": 1.392417073249817, "learning_rate": 2.4852888088256037e-06, "loss": 0.0829, "step": 141970 }, { "epoch": 4.1988525462826045, "grad_norm": 0.8428451418876648, "learning_rate": 2.485162118906664e-06, "loss": 0.074, "step": 141980 }, { "epoch": 4.199148281776779, "grad_norm": 0.9904075860977173, "learning_rate": 2.485035428987725e-06, "loss": 0.0778, "step": 141990 }, { "epoch": 4.199444017270953, "grad_norm": 0.937735915184021, "learning_rate": 2.484908739068785e-06, "loss": 0.0684, "step": 142000 }, { "epoch": 4.199739752765127, "grad_norm": 1.072300672531128, "learning_rate": 2.484782049149846e-06, "loss": 0.0849, "step": 142010 }, { "epoch": 4.200035488259301, "grad_norm": 0.8640601634979248, "learning_rate": 2.4846553592309063e-06, "loss": 0.0835, "step": 142020 }, { "epoch": 4.200331223753475, "grad_norm": 1.1065541505813599, "learning_rate": 2.484528669311967e-06, "loss": 0.0736, "step": 142030 }, { "epoch": 4.200626959247649, "grad_norm": 0.6847318410873413, "learning_rate": 2.484401979393028e-06, "loss": 0.0568, "step": 142040 }, { "epoch": 4.200922694741823, "grad_norm": 0.4896664023399353, "learning_rate": 2.4842752894740883e-06, "loss": 0.0665, "step": 142050 }, { "epoch": 4.201218430235997, "grad_norm": 1.2618862390518188, "learning_rate": 2.484148599555149e-06, "loss": 0.0659, "step": 142060 }, { "epoch": 4.201514165730171, "grad_norm": 0.6418079733848572, "learning_rate": 2.4840219096362094e-06, "loss": 0.0684, "step": 142070 }, { "epoch": 4.201809901224345, "grad_norm": 0.660528302192688, "learning_rate": 2.4838952197172702e-06, "loss": 0.0657, "step": 142080 }, { "epoch": 4.202105636718519, "grad_norm": 2.012349843978882, "learning_rate": 2.4837685297983306e-06, "loss": 0.0676, "step": 142090 }, { "epoch": 4.202401372212693, "grad_norm": 1.4772859811782837, "learning_rate": 2.4836418398793914e-06, "loss": 0.0644, "step": 142100 }, { "epoch": 4.202697107706867, "grad_norm": 0.9398871660232544, "learning_rate": 2.483515149960452e-06, "loss": 0.061, "step": 142110 }, { "epoch": 4.202992843201041, "grad_norm": 0.7704134583473206, "learning_rate": 2.483388460041513e-06, "loss": 0.0565, "step": 142120 }, { "epoch": 4.203288578695215, "grad_norm": 0.6494321227073669, "learning_rate": 2.4832617701225733e-06, "loss": 0.068, "step": 142130 }, { "epoch": 4.203584314189389, "grad_norm": 0.7527545690536499, "learning_rate": 2.483135080203634e-06, "loss": 0.0795, "step": 142140 }, { "epoch": 4.203880049683563, "grad_norm": 1.7381913661956787, "learning_rate": 2.4830083902846945e-06, "loss": 0.0747, "step": 142150 }, { "epoch": 4.204175785177737, "grad_norm": 1.2478857040405273, "learning_rate": 2.4828817003657553e-06, "loss": 0.0953, "step": 142160 }, { "epoch": 4.204471520671911, "grad_norm": 0.9597997069358826, "learning_rate": 2.4827550104468156e-06, "loss": 0.069, "step": 142170 }, { "epoch": 4.204767256166085, "grad_norm": 0.908968985080719, "learning_rate": 2.4826283205278764e-06, "loss": 0.0778, "step": 142180 }, { "epoch": 4.205062991660259, "grad_norm": 1.4273158311843872, "learning_rate": 2.4825016306089372e-06, "loss": 0.0867, "step": 142190 }, { "epoch": 4.205358727154433, "grad_norm": 0.8324891328811646, "learning_rate": 2.482374940689998e-06, "loss": 0.0655, "step": 142200 }, { "epoch": 4.205654462648607, "grad_norm": 1.5211336612701416, "learning_rate": 2.4822482507710584e-06, "loss": 0.0719, "step": 142210 }, { "epoch": 4.205950198142781, "grad_norm": 1.8057355880737305, "learning_rate": 2.482121560852119e-06, "loss": 0.0751, "step": 142220 }, { "epoch": 4.206245933636955, "grad_norm": 1.0698398351669312, "learning_rate": 2.4819948709331795e-06, "loss": 0.0703, "step": 142230 }, { "epoch": 4.206541669131129, "grad_norm": 0.6814219951629639, "learning_rate": 2.4818681810142403e-06, "loss": 0.0629, "step": 142240 }, { "epoch": 4.206837404625303, "grad_norm": 0.6191147565841675, "learning_rate": 2.4817414910953007e-06, "loss": 0.0595, "step": 142250 }, { "epoch": 4.207133140119478, "grad_norm": 1.3966705799102783, "learning_rate": 2.4816148011763615e-06, "loss": 0.0738, "step": 142260 }, { "epoch": 4.207428875613651, "grad_norm": 1.6508938074111938, "learning_rate": 2.4814881112574223e-06, "loss": 0.0889, "step": 142270 }, { "epoch": 4.2077246111078255, "grad_norm": 1.0961703062057495, "learning_rate": 2.481361421338483e-06, "loss": 0.0808, "step": 142280 }, { "epoch": 4.208020346601999, "grad_norm": 0.5711501836776733, "learning_rate": 2.4812347314195434e-06, "loss": 0.058, "step": 142290 }, { "epoch": 4.208316082096173, "grad_norm": 0.7477506995201111, "learning_rate": 2.481108041500604e-06, "loss": 0.0564, "step": 142300 }, { "epoch": 4.208611817590347, "grad_norm": 1.2005832195281982, "learning_rate": 2.4809813515816646e-06, "loss": 0.0701, "step": 142310 }, { "epoch": 4.208907553084521, "grad_norm": 0.8795585632324219, "learning_rate": 2.4808546616627254e-06, "loss": 0.0845, "step": 142320 }, { "epoch": 4.209203288578696, "grad_norm": 1.2106876373291016, "learning_rate": 2.4807279717437857e-06, "loss": 0.0802, "step": 142330 }, { "epoch": 4.209499024072869, "grad_norm": 1.2916005849838257, "learning_rate": 2.4806012818248465e-06, "loss": 0.0609, "step": 142340 }, { "epoch": 4.2097947595670435, "grad_norm": 0.8099114894866943, "learning_rate": 2.4804745919059073e-06, "loss": 0.0628, "step": 142350 }, { "epoch": 4.210090495061217, "grad_norm": 0.9695650935173035, "learning_rate": 2.480347901986968e-06, "loss": 0.076, "step": 142360 }, { "epoch": 4.210386230555391, "grad_norm": 0.8948245644569397, "learning_rate": 2.4802212120680285e-06, "loss": 0.0666, "step": 142370 }, { "epoch": 4.210681966049565, "grad_norm": 0.34832724928855896, "learning_rate": 2.4800945221490892e-06, "loss": 0.0732, "step": 142380 }, { "epoch": 4.210977701543739, "grad_norm": 1.0345067977905273, "learning_rate": 2.4799678322301496e-06, "loss": 0.0711, "step": 142390 }, { "epoch": 4.211273437037914, "grad_norm": 0.9172055125236511, "learning_rate": 2.4798411423112104e-06, "loss": 0.0541, "step": 142400 }, { "epoch": 4.211569172532087, "grad_norm": 0.7256161570549011, "learning_rate": 2.4797144523922708e-06, "loss": 0.0715, "step": 142410 }, { "epoch": 4.211864908026262, "grad_norm": 2.296053647994995, "learning_rate": 2.4795877624733316e-06, "loss": 0.0801, "step": 142420 }, { "epoch": 4.212160643520435, "grad_norm": 0.7308542728424072, "learning_rate": 2.4794610725543923e-06, "loss": 0.0659, "step": 142430 }, { "epoch": 4.2124563790146095, "grad_norm": 0.634495198726654, "learning_rate": 2.479334382635453e-06, "loss": 0.0636, "step": 142440 }, { "epoch": 4.212752114508783, "grad_norm": 0.6216789484024048, "learning_rate": 2.4792076927165135e-06, "loss": 0.0444, "step": 142450 }, { "epoch": 4.213047850002957, "grad_norm": 0.9341400265693665, "learning_rate": 2.479081002797574e-06, "loss": 0.0675, "step": 142460 }, { "epoch": 4.213343585497132, "grad_norm": 0.9815422892570496, "learning_rate": 2.4789543128786347e-06, "loss": 0.0701, "step": 142470 }, { "epoch": 4.213639320991305, "grad_norm": 1.6816905736923218, "learning_rate": 2.478827622959695e-06, "loss": 0.0667, "step": 142480 }, { "epoch": 4.21393505648548, "grad_norm": 0.6570014357566833, "learning_rate": 2.478700933040756e-06, "loss": 0.0807, "step": 142490 }, { "epoch": 4.214230791979653, "grad_norm": 1.538217544555664, "learning_rate": 2.478574243121816e-06, "loss": 0.0607, "step": 142500 }, { "epoch": 4.2145265274738275, "grad_norm": 1.277072787284851, "learning_rate": 2.4784475532028774e-06, "loss": 0.0752, "step": 142510 }, { "epoch": 4.214822262968001, "grad_norm": 1.4253716468811035, "learning_rate": 2.4783208632839378e-06, "loss": 0.0658, "step": 142520 }, { "epoch": 4.215117998462175, "grad_norm": 0.6670449376106262, "learning_rate": 2.4781941733649985e-06, "loss": 0.0605, "step": 142530 }, { "epoch": 4.21541373395635, "grad_norm": 1.340549349784851, "learning_rate": 2.478067483446059e-06, "loss": 0.0713, "step": 142540 }, { "epoch": 4.215709469450523, "grad_norm": 0.6211187243461609, "learning_rate": 2.4779407935271197e-06, "loss": 0.049, "step": 142550 }, { "epoch": 4.216005204944698, "grad_norm": 0.9919421672821045, "learning_rate": 2.47781410360818e-06, "loss": 0.0603, "step": 142560 }, { "epoch": 4.216300940438871, "grad_norm": 0.9537718296051025, "learning_rate": 2.477687413689241e-06, "loss": 0.076, "step": 142570 }, { "epoch": 4.2165966759330455, "grad_norm": 1.1113685369491577, "learning_rate": 2.4775607237703012e-06, "loss": 0.0599, "step": 142580 }, { "epoch": 4.216892411427219, "grad_norm": 0.8096379637718201, "learning_rate": 2.4774340338513624e-06, "loss": 0.0527, "step": 142590 }, { "epoch": 4.217188146921393, "grad_norm": 0.8499545454978943, "learning_rate": 2.477307343932423e-06, "loss": 0.0694, "step": 142600 }, { "epoch": 4.217483882415568, "grad_norm": 1.0860778093338013, "learning_rate": 2.4771806540134836e-06, "loss": 0.0566, "step": 142610 }, { "epoch": 4.217779617909741, "grad_norm": 0.9618926048278809, "learning_rate": 2.477053964094544e-06, "loss": 0.0691, "step": 142620 }, { "epoch": 4.218075353403916, "grad_norm": 0.7234383225440979, "learning_rate": 2.4769272741756047e-06, "loss": 0.0774, "step": 142630 }, { "epoch": 4.218371088898089, "grad_norm": 0.9296359419822693, "learning_rate": 2.476800584256665e-06, "loss": 0.082, "step": 142640 }, { "epoch": 4.218666824392264, "grad_norm": 1.231840968132019, "learning_rate": 2.476673894337726e-06, "loss": 0.0648, "step": 142650 }, { "epoch": 4.218962559886438, "grad_norm": 1.2456368207931519, "learning_rate": 2.4765472044187863e-06, "loss": 0.0614, "step": 142660 }, { "epoch": 4.2192582953806115, "grad_norm": 0.8534766435623169, "learning_rate": 2.4764205144998475e-06, "loss": 0.0736, "step": 142670 }, { "epoch": 4.219554030874786, "grad_norm": 1.360769271850586, "learning_rate": 2.476293824580908e-06, "loss": 0.063, "step": 142680 }, { "epoch": 4.219849766368959, "grad_norm": 1.0483636856079102, "learning_rate": 2.4761671346619686e-06, "loss": 0.07, "step": 142690 }, { "epoch": 4.220145501863134, "grad_norm": 0.8931286931037903, "learning_rate": 2.476040444743029e-06, "loss": 0.0791, "step": 142700 }, { "epoch": 4.220441237357307, "grad_norm": 1.4456793069839478, "learning_rate": 2.47591375482409e-06, "loss": 0.0593, "step": 142710 }, { "epoch": 4.220736972851482, "grad_norm": 0.9015984535217285, "learning_rate": 2.47578706490515e-06, "loss": 0.0738, "step": 142720 }, { "epoch": 4.221032708345656, "grad_norm": 1.1748460531234741, "learning_rate": 2.475660374986211e-06, "loss": 0.0677, "step": 142730 }, { "epoch": 4.2213284438398295, "grad_norm": 1.0580663681030273, "learning_rate": 2.4755336850672713e-06, "loss": 0.0733, "step": 142740 }, { "epoch": 4.221624179334004, "grad_norm": 1.097739815711975, "learning_rate": 2.4754069951483325e-06, "loss": 0.0658, "step": 142750 }, { "epoch": 4.221919914828177, "grad_norm": 1.0597702264785767, "learning_rate": 2.475280305229393e-06, "loss": 0.0634, "step": 142760 }, { "epoch": 4.222215650322352, "grad_norm": 1.06693696975708, "learning_rate": 2.4751536153104537e-06, "loss": 0.0828, "step": 142770 }, { "epoch": 4.222511385816525, "grad_norm": 0.6732436418533325, "learning_rate": 2.475026925391514e-06, "loss": 0.0598, "step": 142780 }, { "epoch": 4.2228071213107, "grad_norm": 0.5675897002220154, "learning_rate": 2.474900235472575e-06, "loss": 0.0786, "step": 142790 }, { "epoch": 4.223102856804874, "grad_norm": 1.4439575672149658, "learning_rate": 2.474773545553635e-06, "loss": 0.0615, "step": 142800 }, { "epoch": 4.223398592299048, "grad_norm": 0.8106903433799744, "learning_rate": 2.474646855634696e-06, "loss": 0.0628, "step": 142810 }, { "epoch": 4.223694327793222, "grad_norm": 0.7081692814826965, "learning_rate": 2.4745201657157564e-06, "loss": 0.0771, "step": 142820 }, { "epoch": 4.2239900632873955, "grad_norm": 0.7389923930168152, "learning_rate": 2.4743934757968176e-06, "loss": 0.0662, "step": 142830 }, { "epoch": 4.22428579878157, "grad_norm": 1.2621362209320068, "learning_rate": 2.474266785877878e-06, "loss": 0.0662, "step": 142840 }, { "epoch": 4.224581534275744, "grad_norm": 0.9718146920204163, "learning_rate": 2.4741400959589387e-06, "loss": 0.0609, "step": 142850 }, { "epoch": 4.224877269769918, "grad_norm": 0.9049882888793945, "learning_rate": 2.474013406039999e-06, "loss": 0.0657, "step": 142860 }, { "epoch": 4.225173005264092, "grad_norm": 0.9452255368232727, "learning_rate": 2.4738867161210595e-06, "loss": 0.0699, "step": 142870 }, { "epoch": 4.225468740758266, "grad_norm": 1.0788938999176025, "learning_rate": 2.4737600262021203e-06, "loss": 0.0614, "step": 142880 }, { "epoch": 4.22576447625244, "grad_norm": 0.6561617851257324, "learning_rate": 2.4736333362831806e-06, "loss": 0.0773, "step": 142890 }, { "epoch": 4.2260602117466135, "grad_norm": 1.668111801147461, "learning_rate": 2.4735066463642414e-06, "loss": 0.0771, "step": 142900 }, { "epoch": 4.226355947240788, "grad_norm": 1.057779312133789, "learning_rate": 2.473379956445302e-06, "loss": 0.0662, "step": 142910 }, { "epoch": 4.226651682734962, "grad_norm": 0.9223595261573792, "learning_rate": 2.473253266526363e-06, "loss": 0.0734, "step": 142920 }, { "epoch": 4.226947418229136, "grad_norm": 0.6534197926521301, "learning_rate": 2.4731265766074234e-06, "loss": 0.0686, "step": 142930 }, { "epoch": 4.22724315372331, "grad_norm": 0.6466550827026367, "learning_rate": 2.472999886688484e-06, "loss": 0.0711, "step": 142940 }, { "epoch": 4.227538889217484, "grad_norm": 0.6563474535942078, "learning_rate": 2.4728731967695445e-06, "loss": 0.0571, "step": 142950 }, { "epoch": 4.227834624711658, "grad_norm": 0.8531776070594788, "learning_rate": 2.4727465068506053e-06, "loss": 0.0818, "step": 142960 }, { "epoch": 4.2281303602058316, "grad_norm": 1.0674879550933838, "learning_rate": 2.4726198169316657e-06, "loss": 0.0835, "step": 142970 }, { "epoch": 4.228426095700006, "grad_norm": 0.7112433910369873, "learning_rate": 2.4724931270127265e-06, "loss": 0.0774, "step": 142980 }, { "epoch": 4.22872183119418, "grad_norm": 1.6878163814544678, "learning_rate": 2.4723664370937872e-06, "loss": 0.0786, "step": 142990 }, { "epoch": 4.229017566688354, "grad_norm": 0.7659482955932617, "learning_rate": 2.472239747174848e-06, "loss": 0.0744, "step": 143000 }, { "epoch": 4.229313302182528, "grad_norm": 0.8599721193313599, "learning_rate": 2.4721130572559084e-06, "loss": 0.0713, "step": 143010 }, { "epoch": 4.229609037676702, "grad_norm": 0.5879963040351868, "learning_rate": 2.471986367336969e-06, "loss": 0.0727, "step": 143020 }, { "epoch": 4.229904773170876, "grad_norm": 0.604611337184906, "learning_rate": 2.4718596774180296e-06, "loss": 0.0667, "step": 143030 }, { "epoch": 4.23020050866505, "grad_norm": 0.6053969860076904, "learning_rate": 2.4717329874990903e-06, "loss": 0.0725, "step": 143040 }, { "epoch": 4.230496244159224, "grad_norm": 0.6226977109909058, "learning_rate": 2.4716062975801507e-06, "loss": 0.0658, "step": 143050 }, { "epoch": 4.230791979653398, "grad_norm": 0.7588201761245728, "learning_rate": 2.4714796076612115e-06, "loss": 0.0732, "step": 143060 }, { "epoch": 4.231087715147572, "grad_norm": 0.8003724813461304, "learning_rate": 2.4713529177422723e-06, "loss": 0.0788, "step": 143070 }, { "epoch": 4.231383450641746, "grad_norm": 1.3100941181182861, "learning_rate": 2.471226227823333e-06, "loss": 0.076, "step": 143080 }, { "epoch": 4.23167918613592, "grad_norm": 0.8196238279342651, "learning_rate": 2.4710995379043934e-06, "loss": 0.0631, "step": 143090 }, { "epoch": 4.231974921630094, "grad_norm": 0.8269856572151184, "learning_rate": 2.4709728479854542e-06, "loss": 0.0619, "step": 143100 }, { "epoch": 4.232270657124268, "grad_norm": 0.6906707882881165, "learning_rate": 2.4708461580665146e-06, "loss": 0.0651, "step": 143110 }, { "epoch": 4.232566392618442, "grad_norm": 1.1563520431518555, "learning_rate": 2.4707194681475754e-06, "loss": 0.0698, "step": 143120 }, { "epoch": 4.232862128112616, "grad_norm": 0.5577437877655029, "learning_rate": 2.4705927782286358e-06, "loss": 0.0729, "step": 143130 }, { "epoch": 4.23315786360679, "grad_norm": 0.7976051568984985, "learning_rate": 2.4704660883096965e-06, "loss": 0.068, "step": 143140 }, { "epoch": 4.233453599100964, "grad_norm": 0.4865734875202179, "learning_rate": 2.4703393983907573e-06, "loss": 0.0546, "step": 143150 }, { "epoch": 4.233749334595138, "grad_norm": 1.8065612316131592, "learning_rate": 2.470212708471818e-06, "loss": 0.0628, "step": 143160 }, { "epoch": 4.234045070089312, "grad_norm": 1.334710955619812, "learning_rate": 2.4700860185528785e-06, "loss": 0.0768, "step": 143170 }, { "epoch": 4.234340805583486, "grad_norm": 1.1121118068695068, "learning_rate": 2.4699593286339393e-06, "loss": 0.0726, "step": 143180 }, { "epoch": 4.23463654107766, "grad_norm": 0.5769183039665222, "learning_rate": 2.4698326387149996e-06, "loss": 0.0606, "step": 143190 }, { "epoch": 4.2349322765718345, "grad_norm": 0.9535248279571533, "learning_rate": 2.4697059487960604e-06, "loss": 0.0519, "step": 143200 }, { "epoch": 4.235228012066008, "grad_norm": 1.4969322681427002, "learning_rate": 2.469579258877121e-06, "loss": 0.0656, "step": 143210 }, { "epoch": 4.235523747560182, "grad_norm": 0.6714465618133545, "learning_rate": 2.4694525689581816e-06, "loss": 0.0722, "step": 143220 }, { "epoch": 4.235819483054356, "grad_norm": 0.6639111638069153, "learning_rate": 2.4693258790392424e-06, "loss": 0.0678, "step": 143230 }, { "epoch": 4.23611521854853, "grad_norm": 0.6281713843345642, "learning_rate": 2.469199189120303e-06, "loss": 0.0649, "step": 143240 }, { "epoch": 4.236410954042705, "grad_norm": 0.9243355393409729, "learning_rate": 2.4690724992013635e-06, "loss": 0.0597, "step": 143250 }, { "epoch": 4.236706689536878, "grad_norm": 0.7524240016937256, "learning_rate": 2.4689458092824243e-06, "loss": 0.0808, "step": 143260 }, { "epoch": 4.2370024250310525, "grad_norm": 0.7991100549697876, "learning_rate": 2.4688191193634847e-06, "loss": 0.0896, "step": 143270 }, { "epoch": 4.237298160525226, "grad_norm": 0.8932698965072632, "learning_rate": 2.468692429444545e-06, "loss": 0.0644, "step": 143280 }, { "epoch": 4.2375938960194, "grad_norm": 0.9710367918014526, "learning_rate": 2.468565739525606e-06, "loss": 0.0799, "step": 143290 }, { "epoch": 4.237889631513574, "grad_norm": 1.1064152717590332, "learning_rate": 2.468439049606666e-06, "loss": 0.0582, "step": 143300 }, { "epoch": 4.238185367007748, "grad_norm": 1.1803276538848877, "learning_rate": 2.4683123596877274e-06, "loss": 0.0679, "step": 143310 }, { "epoch": 4.238481102501923, "grad_norm": 1.4577893018722534, "learning_rate": 2.468185669768788e-06, "loss": 0.0736, "step": 143320 }, { "epoch": 4.238776837996096, "grad_norm": 1.2299338579177856, "learning_rate": 2.4680589798498486e-06, "loss": 0.0641, "step": 143330 }, { "epoch": 4.239072573490271, "grad_norm": 0.7112215757369995, "learning_rate": 2.467932289930909e-06, "loss": 0.0636, "step": 143340 }, { "epoch": 4.239368308984444, "grad_norm": 0.9449148774147034, "learning_rate": 2.4678056000119697e-06, "loss": 0.0638, "step": 143350 }, { "epoch": 4.239664044478618, "grad_norm": 0.6795408725738525, "learning_rate": 2.46767891009303e-06, "loss": 0.0676, "step": 143360 }, { "epoch": 4.239959779972792, "grad_norm": 0.7745653390884399, "learning_rate": 2.467552220174091e-06, "loss": 0.0813, "step": 143370 }, { "epoch": 4.240255515466966, "grad_norm": 1.3865172863006592, "learning_rate": 2.4674255302551513e-06, "loss": 0.0708, "step": 143380 }, { "epoch": 4.240551250961141, "grad_norm": 0.5400403141975403, "learning_rate": 2.4672988403362125e-06, "loss": 0.0546, "step": 143390 }, { "epoch": 4.240846986455314, "grad_norm": 0.8393450975418091, "learning_rate": 2.467172150417273e-06, "loss": 0.0713, "step": 143400 }, { "epoch": 4.241142721949489, "grad_norm": 0.9027358293533325, "learning_rate": 2.4670454604983336e-06, "loss": 0.0718, "step": 143410 }, { "epoch": 4.241438457443662, "grad_norm": 1.0263339281082153, "learning_rate": 2.466918770579394e-06, "loss": 0.0822, "step": 143420 }, { "epoch": 4.2417341929378365, "grad_norm": 0.7681092023849487, "learning_rate": 2.4667920806604548e-06, "loss": 0.0658, "step": 143430 }, { "epoch": 4.24202992843201, "grad_norm": 0.7281895279884338, "learning_rate": 2.466665390741515e-06, "loss": 0.0639, "step": 143440 }, { "epoch": 4.242325663926184, "grad_norm": 0.8209909200668335, "learning_rate": 2.466538700822576e-06, "loss": 0.0655, "step": 143450 }, { "epoch": 4.242621399420359, "grad_norm": 1.8364940881729126, "learning_rate": 2.4664120109036363e-06, "loss": 0.0843, "step": 143460 }, { "epoch": 4.242917134914532, "grad_norm": 2.2857279777526855, "learning_rate": 2.4662853209846975e-06, "loss": 0.089, "step": 143470 }, { "epoch": 4.243212870408707, "grad_norm": 1.6364673376083374, "learning_rate": 2.466158631065758e-06, "loss": 0.0747, "step": 143480 }, { "epoch": 4.24350860590288, "grad_norm": 1.0320971012115479, "learning_rate": 2.4660319411468187e-06, "loss": 0.0779, "step": 143490 }, { "epoch": 4.2438043413970545, "grad_norm": 1.1371005773544312, "learning_rate": 2.465905251227879e-06, "loss": 0.0657, "step": 143500 }, { "epoch": 4.244100076891229, "grad_norm": 1.0744545459747314, "learning_rate": 2.46577856130894e-06, "loss": 0.0783, "step": 143510 }, { "epoch": 4.244395812385402, "grad_norm": 0.6679837703704834, "learning_rate": 2.46565187139e-06, "loss": 0.0738, "step": 143520 }, { "epoch": 4.244691547879577, "grad_norm": 1.038704514503479, "learning_rate": 2.465525181471061e-06, "loss": 0.0677, "step": 143530 }, { "epoch": 4.24498728337375, "grad_norm": 0.7141715288162231, "learning_rate": 2.4653984915521213e-06, "loss": 0.0725, "step": 143540 }, { "epoch": 4.245283018867925, "grad_norm": 1.1641076803207397, "learning_rate": 2.4652718016331826e-06, "loss": 0.0728, "step": 143550 }, { "epoch": 4.245578754362098, "grad_norm": 1.4041955471038818, "learning_rate": 2.465145111714243e-06, "loss": 0.0534, "step": 143560 }, { "epoch": 4.245874489856273, "grad_norm": 0.813837468624115, "learning_rate": 2.4650184217953037e-06, "loss": 0.0792, "step": 143570 }, { "epoch": 4.246170225350447, "grad_norm": 0.6320449709892273, "learning_rate": 2.464891731876364e-06, "loss": 0.0689, "step": 143580 }, { "epoch": 4.2464659608446205, "grad_norm": 1.2872668504714966, "learning_rate": 2.464765041957425e-06, "loss": 0.0802, "step": 143590 }, { "epoch": 4.246761696338795, "grad_norm": 0.8065491318702698, "learning_rate": 2.4646383520384852e-06, "loss": 0.0688, "step": 143600 }, { "epoch": 4.247057431832968, "grad_norm": 1.060483455657959, "learning_rate": 2.464511662119546e-06, "loss": 0.0711, "step": 143610 }, { "epoch": 4.247353167327143, "grad_norm": 0.8002492189407349, "learning_rate": 2.4643849722006064e-06, "loss": 0.0814, "step": 143620 }, { "epoch": 4.247648902821316, "grad_norm": 0.7341111898422241, "learning_rate": 2.4642582822816676e-06, "loss": 0.0584, "step": 143630 }, { "epoch": 4.247944638315491, "grad_norm": 1.8299870491027832, "learning_rate": 2.464131592362728e-06, "loss": 0.1011, "step": 143640 }, { "epoch": 4.248240373809665, "grad_norm": 0.8279021978378296, "learning_rate": 2.4640049024437888e-06, "loss": 0.0571, "step": 143650 }, { "epoch": 4.2485361093038385, "grad_norm": 0.7667034864425659, "learning_rate": 2.463878212524849e-06, "loss": 0.0775, "step": 143660 }, { "epoch": 4.248831844798013, "grad_norm": 0.5202335715293884, "learning_rate": 2.46375152260591e-06, "loss": 0.0773, "step": 143670 }, { "epoch": 4.249127580292186, "grad_norm": 0.7384215593338013, "learning_rate": 2.4636248326869703e-06, "loss": 0.0753, "step": 143680 }, { "epoch": 4.249423315786361, "grad_norm": 0.9076839089393616, "learning_rate": 2.4634981427680306e-06, "loss": 0.0771, "step": 143690 }, { "epoch": 4.249719051280534, "grad_norm": 0.8793022632598877, "learning_rate": 2.4633714528490914e-06, "loss": 0.0625, "step": 143700 }, { "epoch": 4.250014786774709, "grad_norm": 0.918801486492157, "learning_rate": 2.463244762930152e-06, "loss": 0.0729, "step": 143710 }, { "epoch": 4.250310522268883, "grad_norm": 0.7477115988731384, "learning_rate": 2.463118073011213e-06, "loss": 0.0757, "step": 143720 }, { "epoch": 4.250606257763057, "grad_norm": 0.8306111097335815, "learning_rate": 2.4629913830922734e-06, "loss": 0.0659, "step": 143730 }, { "epoch": 4.250901993257231, "grad_norm": 0.8378888368606567, "learning_rate": 2.462864693173334e-06, "loss": 0.0649, "step": 143740 }, { "epoch": 4.2511977287514044, "grad_norm": 0.6133606433868408, "learning_rate": 2.4627380032543945e-06, "loss": 0.0598, "step": 143750 }, { "epoch": 4.251493464245579, "grad_norm": 0.8681881427764893, "learning_rate": 2.4626113133354553e-06, "loss": 0.0599, "step": 143760 }, { "epoch": 4.251789199739752, "grad_norm": 0.7940186262130737, "learning_rate": 2.4624846234165157e-06, "loss": 0.0628, "step": 143770 }, { "epoch": 4.252084935233927, "grad_norm": 0.6191936731338501, "learning_rate": 2.4623579334975765e-06, "loss": 0.0728, "step": 143780 }, { "epoch": 4.252380670728101, "grad_norm": 0.6787294745445251, "learning_rate": 2.4622312435786373e-06, "loss": 0.068, "step": 143790 }, { "epoch": 4.252676406222275, "grad_norm": 0.9280147552490234, "learning_rate": 2.462104553659698e-06, "loss": 0.0704, "step": 143800 }, { "epoch": 4.252972141716449, "grad_norm": 0.9224207997322083, "learning_rate": 2.4619778637407584e-06, "loss": 0.0605, "step": 143810 }, { "epoch": 4.2532678772106225, "grad_norm": 1.4252327680587769, "learning_rate": 2.4618511738218192e-06, "loss": 0.08, "step": 143820 }, { "epoch": 4.253563612704797, "grad_norm": 0.9335815906524658, "learning_rate": 2.4617244839028796e-06, "loss": 0.0716, "step": 143830 }, { "epoch": 4.25385934819897, "grad_norm": 1.8505845069885254, "learning_rate": 2.4615977939839404e-06, "loss": 0.0649, "step": 143840 }, { "epoch": 4.254155083693145, "grad_norm": 1.117702603340149, "learning_rate": 2.4614711040650007e-06, "loss": 0.0597, "step": 143850 }, { "epoch": 4.254450819187319, "grad_norm": 0.9452509880065918, "learning_rate": 2.4613444141460615e-06, "loss": 0.0576, "step": 143860 }, { "epoch": 4.254746554681493, "grad_norm": 2.2536261081695557, "learning_rate": 2.4612177242271223e-06, "loss": 0.0797, "step": 143870 }, { "epoch": 4.255042290175667, "grad_norm": 0.8456873893737793, "learning_rate": 2.461091034308183e-06, "loss": 0.0865, "step": 143880 }, { "epoch": 4.2553380256698405, "grad_norm": 0.6192169189453125, "learning_rate": 2.4609643443892435e-06, "loss": 0.0545, "step": 143890 }, { "epoch": 4.255633761164015, "grad_norm": 0.6511799693107605, "learning_rate": 2.4608376544703043e-06, "loss": 0.0614, "step": 143900 }, { "epoch": 4.255929496658189, "grad_norm": 1.117214560508728, "learning_rate": 2.4607109645513646e-06, "loss": 0.0737, "step": 143910 }, { "epoch": 4.256225232152363, "grad_norm": 1.3122122287750244, "learning_rate": 2.4605842746324254e-06, "loss": 0.07, "step": 143920 }, { "epoch": 4.256520967646537, "grad_norm": 0.799565851688385, "learning_rate": 2.4604575847134858e-06, "loss": 0.0834, "step": 143930 }, { "epoch": 4.256816703140711, "grad_norm": 0.9634721875190735, "learning_rate": 2.4603308947945466e-06, "loss": 0.0797, "step": 143940 }, { "epoch": 4.257112438634885, "grad_norm": 0.543265700340271, "learning_rate": 2.4602042048756074e-06, "loss": 0.0483, "step": 143950 }, { "epoch": 4.257408174129059, "grad_norm": 0.909757673740387, "learning_rate": 2.460077514956668e-06, "loss": 0.0607, "step": 143960 }, { "epoch": 4.257703909623233, "grad_norm": 0.6366667747497559, "learning_rate": 2.4599508250377285e-06, "loss": 0.0702, "step": 143970 }, { "epoch": 4.257999645117407, "grad_norm": 0.8009206652641296, "learning_rate": 2.4598241351187893e-06, "loss": 0.0711, "step": 143980 }, { "epoch": 4.258295380611581, "grad_norm": 0.5210387706756592, "learning_rate": 2.4596974451998497e-06, "loss": 0.0589, "step": 143990 }, { "epoch": 4.258591116105755, "grad_norm": 1.120779275894165, "learning_rate": 2.4595707552809105e-06, "loss": 0.0664, "step": 144000 }, { "epoch": 4.258886851599929, "grad_norm": 1.5679117441177368, "learning_rate": 2.459444065361971e-06, "loss": 0.0773, "step": 144010 }, { "epoch": 4.259182587094103, "grad_norm": 0.9723390936851501, "learning_rate": 2.4593173754430316e-06, "loss": 0.0926, "step": 144020 }, { "epoch": 4.259478322588277, "grad_norm": 1.2092528343200684, "learning_rate": 2.4591906855240924e-06, "loss": 0.0793, "step": 144030 }, { "epoch": 4.259774058082451, "grad_norm": 0.811924934387207, "learning_rate": 2.459063995605153e-06, "loss": 0.0605, "step": 144040 }, { "epoch": 4.260069793576625, "grad_norm": 0.6882352232933044, "learning_rate": 2.4589373056862136e-06, "loss": 0.0745, "step": 144050 }, { "epoch": 4.260365529070799, "grad_norm": 0.7056641578674316, "learning_rate": 2.4588106157672744e-06, "loss": 0.0642, "step": 144060 }, { "epoch": 4.260661264564973, "grad_norm": 0.8625096678733826, "learning_rate": 2.4586839258483347e-06, "loss": 0.0782, "step": 144070 }, { "epoch": 4.260957000059147, "grad_norm": 1.0184030532836914, "learning_rate": 2.4585572359293955e-06, "loss": 0.079, "step": 144080 }, { "epoch": 4.261252735553321, "grad_norm": 0.7901127338409424, "learning_rate": 2.458430546010456e-06, "loss": 0.0728, "step": 144090 }, { "epoch": 4.261548471047496, "grad_norm": 0.9692967534065247, "learning_rate": 2.4583038560915162e-06, "loss": 0.0662, "step": 144100 }, { "epoch": 4.261844206541669, "grad_norm": 1.9906939268112183, "learning_rate": 2.4581771661725775e-06, "loss": 0.0643, "step": 144110 }, { "epoch": 4.2621399420358435, "grad_norm": 0.628618061542511, "learning_rate": 2.458050476253638e-06, "loss": 0.0722, "step": 144120 }, { "epoch": 4.262435677530017, "grad_norm": 2.1075356006622314, "learning_rate": 2.4579237863346986e-06, "loss": 0.0732, "step": 144130 }, { "epoch": 4.262731413024191, "grad_norm": 1.26961350440979, "learning_rate": 2.457797096415759e-06, "loss": 0.0636, "step": 144140 }, { "epoch": 4.263027148518365, "grad_norm": 1.5642223358154297, "learning_rate": 2.4576704064968198e-06, "loss": 0.0693, "step": 144150 }, { "epoch": 4.263322884012539, "grad_norm": 0.8814094066619873, "learning_rate": 2.45754371657788e-06, "loss": 0.0707, "step": 144160 }, { "epoch": 4.263618619506714, "grad_norm": 0.8367817401885986, "learning_rate": 2.457417026658941e-06, "loss": 0.0769, "step": 144170 }, { "epoch": 4.263914355000887, "grad_norm": 1.6247638463974, "learning_rate": 2.4572903367400013e-06, "loss": 0.0742, "step": 144180 }, { "epoch": 4.2642100904950615, "grad_norm": 1.6242084503173828, "learning_rate": 2.4571636468210625e-06, "loss": 0.07, "step": 144190 }, { "epoch": 4.264505825989235, "grad_norm": 0.731078028678894, "learning_rate": 2.457036956902123e-06, "loss": 0.0563, "step": 144200 }, { "epoch": 4.264801561483409, "grad_norm": 0.8338714241981506, "learning_rate": 2.4569102669831837e-06, "loss": 0.0561, "step": 144210 }, { "epoch": 4.265097296977583, "grad_norm": 0.6891733407974243, "learning_rate": 2.456783577064244e-06, "loss": 0.0826, "step": 144220 }, { "epoch": 4.265393032471757, "grad_norm": 0.9588093161582947, "learning_rate": 2.456656887145305e-06, "loss": 0.0721, "step": 144230 }, { "epoch": 4.265688767965932, "grad_norm": 0.7271934747695923, "learning_rate": 2.456530197226365e-06, "loss": 0.0525, "step": 144240 }, { "epoch": 4.265984503460105, "grad_norm": 0.5258010029792786, "learning_rate": 2.456403507307426e-06, "loss": 0.0543, "step": 144250 }, { "epoch": 4.2662802389542795, "grad_norm": 0.9338403940200806, "learning_rate": 2.4562768173884863e-06, "loss": 0.0607, "step": 144260 }, { "epoch": 4.266575974448453, "grad_norm": 0.979846179485321, "learning_rate": 2.4561501274695475e-06, "loss": 0.0618, "step": 144270 }, { "epoch": 4.266871709942627, "grad_norm": 0.7768211364746094, "learning_rate": 2.456023437550608e-06, "loss": 0.0728, "step": 144280 }, { "epoch": 4.267167445436801, "grad_norm": 0.9459567070007324, "learning_rate": 2.4558967476316687e-06, "loss": 0.0729, "step": 144290 }, { "epoch": 4.267463180930975, "grad_norm": 0.8831136226654053, "learning_rate": 2.455770057712729e-06, "loss": 0.0518, "step": 144300 }, { "epoch": 4.26775891642515, "grad_norm": 0.6765552163124084, "learning_rate": 2.45564336779379e-06, "loss": 0.0738, "step": 144310 }, { "epoch": 4.268054651919323, "grad_norm": 0.8457659482955933, "learning_rate": 2.4555166778748502e-06, "loss": 0.0661, "step": 144320 }, { "epoch": 4.268350387413498, "grad_norm": 1.087715983390808, "learning_rate": 2.455389987955911e-06, "loss": 0.0704, "step": 144330 }, { "epoch": 4.268646122907671, "grad_norm": 0.5797798037528992, "learning_rate": 2.4552632980369714e-06, "loss": 0.0622, "step": 144340 }, { "epoch": 4.2689418584018455, "grad_norm": 0.8212094902992249, "learning_rate": 2.4551366081180326e-06, "loss": 0.0494, "step": 144350 }, { "epoch": 4.269237593896019, "grad_norm": 1.127842664718628, "learning_rate": 2.455009918199093e-06, "loss": 0.068, "step": 144360 }, { "epoch": 4.269533329390193, "grad_norm": 1.5140032768249512, "learning_rate": 2.4548832282801537e-06, "loss": 0.0948, "step": 144370 }, { "epoch": 4.269829064884368, "grad_norm": 1.0160633325576782, "learning_rate": 2.454756538361214e-06, "loss": 0.0788, "step": 144380 }, { "epoch": 4.270124800378541, "grad_norm": 0.6740337014198303, "learning_rate": 2.454629848442275e-06, "loss": 0.0514, "step": 144390 }, { "epoch": 4.270420535872716, "grad_norm": 0.9269116520881653, "learning_rate": 2.4545031585233353e-06, "loss": 0.0702, "step": 144400 }, { "epoch": 4.270716271366889, "grad_norm": 1.4726133346557617, "learning_rate": 2.454376468604396e-06, "loss": 0.0678, "step": 144410 }, { "epoch": 4.2710120068610635, "grad_norm": 0.8348960876464844, "learning_rate": 2.4542497786854564e-06, "loss": 0.0659, "step": 144420 }, { "epoch": 4.271307742355237, "grad_norm": 1.0436640977859497, "learning_rate": 2.4541230887665176e-06, "loss": 0.0818, "step": 144430 }, { "epoch": 4.271603477849411, "grad_norm": 1.0072636604309082, "learning_rate": 2.453996398847578e-06, "loss": 0.083, "step": 144440 }, { "epoch": 4.271899213343586, "grad_norm": 0.6573046445846558, "learning_rate": 2.453869708928639e-06, "loss": 0.0564, "step": 144450 }, { "epoch": 4.272194948837759, "grad_norm": 1.3162074089050293, "learning_rate": 2.453743019009699e-06, "loss": 0.0719, "step": 144460 }, { "epoch": 4.272490684331934, "grad_norm": 0.6623771786689758, "learning_rate": 2.45361632909076e-06, "loss": 0.0615, "step": 144470 }, { "epoch": 4.272786419826107, "grad_norm": 0.9150333404541016, "learning_rate": 2.4534896391718203e-06, "loss": 0.0799, "step": 144480 }, { "epoch": 4.273082155320282, "grad_norm": 1.8086960315704346, "learning_rate": 2.453362949252881e-06, "loss": 0.0638, "step": 144490 }, { "epoch": 4.273377890814456, "grad_norm": 1.2056078910827637, "learning_rate": 2.4532362593339415e-06, "loss": 0.0706, "step": 144500 }, { "epoch": 4.2736736263086295, "grad_norm": 0.9495550990104675, "learning_rate": 2.4531095694150027e-06, "loss": 0.0694, "step": 144510 }, { "epoch": 4.273969361802804, "grad_norm": 0.9442737698554993, "learning_rate": 2.452982879496063e-06, "loss": 0.0584, "step": 144520 }, { "epoch": 4.274265097296977, "grad_norm": 0.8061621785163879, "learning_rate": 2.4528561895771234e-06, "loss": 0.0726, "step": 144530 }, { "epoch": 4.274560832791152, "grad_norm": 1.0687566995620728, "learning_rate": 2.452729499658184e-06, "loss": 0.0656, "step": 144540 }, { "epoch": 4.274856568285325, "grad_norm": 0.6903408765792847, "learning_rate": 2.4526028097392446e-06, "loss": 0.0681, "step": 144550 }, { "epoch": 4.2751523037795, "grad_norm": 0.9843521118164062, "learning_rate": 2.4524761198203054e-06, "loss": 0.0704, "step": 144560 }, { "epoch": 4.275448039273674, "grad_norm": 0.7298693060874939, "learning_rate": 2.4523494299013657e-06, "loss": 0.0664, "step": 144570 }, { "epoch": 4.2757437747678475, "grad_norm": 0.8874750137329102, "learning_rate": 2.4522227399824265e-06, "loss": 0.0989, "step": 144580 }, { "epoch": 4.276039510262022, "grad_norm": 0.926384687423706, "learning_rate": 2.452096050063487e-06, "loss": 0.0736, "step": 144590 }, { "epoch": 4.276335245756195, "grad_norm": 0.6744692921638489, "learning_rate": 2.451969360144548e-06, "loss": 0.0646, "step": 144600 }, { "epoch": 4.27663098125037, "grad_norm": 1.1998027563095093, "learning_rate": 2.4518426702256085e-06, "loss": 0.0572, "step": 144610 }, { "epoch": 4.276926716744543, "grad_norm": 0.8803945779800415, "learning_rate": 2.4517159803066692e-06, "loss": 0.0713, "step": 144620 }, { "epoch": 4.277222452238718, "grad_norm": 1.0789109468460083, "learning_rate": 2.4515892903877296e-06, "loss": 0.0678, "step": 144630 }, { "epoch": 4.277518187732892, "grad_norm": 1.6669970750808716, "learning_rate": 2.4514626004687904e-06, "loss": 0.0677, "step": 144640 }, { "epoch": 4.2778139232270656, "grad_norm": 0.6087762117385864, "learning_rate": 2.4513359105498508e-06, "loss": 0.0537, "step": 144650 }, { "epoch": 4.27810965872124, "grad_norm": 1.0058395862579346, "learning_rate": 2.4512092206309116e-06, "loss": 0.0768, "step": 144660 }, { "epoch": 4.278405394215413, "grad_norm": 0.845879077911377, "learning_rate": 2.451082530711972e-06, "loss": 0.0782, "step": 144670 }, { "epoch": 4.278701129709588, "grad_norm": 1.2041020393371582, "learning_rate": 2.450955840793033e-06, "loss": 0.0804, "step": 144680 }, { "epoch": 4.278996865203762, "grad_norm": 0.8816413283348083, "learning_rate": 2.4508291508740935e-06, "loss": 0.0746, "step": 144690 }, { "epoch": 4.279292600697936, "grad_norm": 0.7997035384178162, "learning_rate": 2.4507024609551543e-06, "loss": 0.0453, "step": 144700 }, { "epoch": 4.27958833619211, "grad_norm": 1.048509120941162, "learning_rate": 2.4505757710362147e-06, "loss": 0.0584, "step": 144710 }, { "epoch": 4.279884071686284, "grad_norm": 0.7633653283119202, "learning_rate": 2.4504490811172754e-06, "loss": 0.0736, "step": 144720 }, { "epoch": 4.280179807180458, "grad_norm": 2.736013650894165, "learning_rate": 2.450322391198336e-06, "loss": 0.0674, "step": 144730 }, { "epoch": 4.2804755426746315, "grad_norm": 1.2753866910934448, "learning_rate": 2.4501957012793966e-06, "loss": 0.0642, "step": 144740 }, { "epoch": 4.280771278168806, "grad_norm": 0.905206561088562, "learning_rate": 2.450069011360457e-06, "loss": 0.0564, "step": 144750 }, { "epoch": 4.28106701366298, "grad_norm": 0.6713292598724365, "learning_rate": 2.449942321441518e-06, "loss": 0.0793, "step": 144760 }, { "epoch": 4.281362749157154, "grad_norm": 0.83746737241745, "learning_rate": 2.4498156315225785e-06, "loss": 0.0732, "step": 144770 }, { "epoch": 4.281658484651328, "grad_norm": 1.1584969758987427, "learning_rate": 2.4496889416036393e-06, "loss": 0.0806, "step": 144780 }, { "epoch": 4.281954220145502, "grad_norm": 1.8591400384902954, "learning_rate": 2.4495622516846997e-06, "loss": 0.0757, "step": 144790 }, { "epoch": 4.282249955639676, "grad_norm": 0.34215542674064636, "learning_rate": 2.4494355617657605e-06, "loss": 0.0466, "step": 144800 }, { "epoch": 4.2825456911338495, "grad_norm": 1.5727592706680298, "learning_rate": 2.449308871846821e-06, "loss": 0.0737, "step": 144810 }, { "epoch": 4.282841426628024, "grad_norm": 0.5124838948249817, "learning_rate": 2.4491821819278816e-06, "loss": 0.0686, "step": 144820 }, { "epoch": 4.283137162122198, "grad_norm": 1.08883535861969, "learning_rate": 2.449055492008942e-06, "loss": 0.0685, "step": 144830 }, { "epoch": 4.283432897616372, "grad_norm": 1.800100564956665, "learning_rate": 2.4489288020900032e-06, "loss": 0.0745, "step": 144840 }, { "epoch": 4.283728633110546, "grad_norm": 1.1308369636535645, "learning_rate": 2.4488021121710636e-06, "loss": 0.0624, "step": 144850 }, { "epoch": 4.28402436860472, "grad_norm": 1.534363865852356, "learning_rate": 2.4486754222521244e-06, "loss": 0.072, "step": 144860 }, { "epoch": 4.284320104098894, "grad_norm": 0.5616556406021118, "learning_rate": 2.4485487323331847e-06, "loss": 0.0731, "step": 144870 }, { "epoch": 4.284615839593068, "grad_norm": 0.8100592494010925, "learning_rate": 2.4484220424142455e-06, "loss": 0.0688, "step": 144880 }, { "epoch": 4.284911575087242, "grad_norm": 1.401837706565857, "learning_rate": 2.448295352495306e-06, "loss": 0.0863, "step": 144890 }, { "epoch": 4.285207310581416, "grad_norm": 0.8679303526878357, "learning_rate": 2.4481686625763667e-06, "loss": 0.0604, "step": 144900 }, { "epoch": 4.28550304607559, "grad_norm": 0.7797789573669434, "learning_rate": 2.448041972657427e-06, "loss": 0.0757, "step": 144910 }, { "epoch": 4.285798781569764, "grad_norm": 1.249908685684204, "learning_rate": 2.4479152827384883e-06, "loss": 0.0629, "step": 144920 }, { "epoch": 4.286094517063938, "grad_norm": 0.8557584285736084, "learning_rate": 2.4477885928195486e-06, "loss": 0.0911, "step": 144930 }, { "epoch": 4.286390252558112, "grad_norm": 1.0393285751342773, "learning_rate": 2.447661902900609e-06, "loss": 0.0688, "step": 144940 }, { "epoch": 4.286685988052286, "grad_norm": 0.8330293297767639, "learning_rate": 2.44753521298167e-06, "loss": 0.0506, "step": 144950 }, { "epoch": 4.28698172354646, "grad_norm": 0.6583319902420044, "learning_rate": 2.44740852306273e-06, "loss": 0.0692, "step": 144960 }, { "epoch": 4.287277459040634, "grad_norm": 0.8245336413383484, "learning_rate": 2.447281833143791e-06, "loss": 0.0796, "step": 144970 }, { "epoch": 4.287573194534808, "grad_norm": 0.7597522735595703, "learning_rate": 2.4471551432248513e-06, "loss": 0.0717, "step": 144980 }, { "epoch": 4.287868930028982, "grad_norm": 1.2600576877593994, "learning_rate": 2.447028453305912e-06, "loss": 0.063, "step": 144990 }, { "epoch": 4.288164665523156, "grad_norm": 0.4306749403476715, "learning_rate": 2.446901763386973e-06, "loss": 0.0543, "step": 145000 }, { "epoch": 4.28846040101733, "grad_norm": 0.872155487537384, "learning_rate": 2.4467750734680337e-06, "loss": 0.0711, "step": 145010 }, { "epoch": 4.288756136511504, "grad_norm": 0.8809186220169067, "learning_rate": 2.446648383549094e-06, "loss": 0.0768, "step": 145020 }, { "epoch": 4.289051872005678, "grad_norm": 0.8998440504074097, "learning_rate": 2.446521693630155e-06, "loss": 0.0666, "step": 145030 }, { "epoch": 4.289347607499852, "grad_norm": 0.7108694314956665, "learning_rate": 2.446395003711215e-06, "loss": 0.0775, "step": 145040 }, { "epoch": 4.289643342994026, "grad_norm": 0.6683861017227173, "learning_rate": 2.446268313792276e-06, "loss": 0.0628, "step": 145050 }, { "epoch": 4.2899390784882, "grad_norm": 1.048280954360962, "learning_rate": 2.4461416238733364e-06, "loss": 0.0822, "step": 145060 }, { "epoch": 4.290234813982374, "grad_norm": 0.8624784350395203, "learning_rate": 2.446014933954397e-06, "loss": 0.0734, "step": 145070 }, { "epoch": 4.290530549476548, "grad_norm": 0.9356296062469482, "learning_rate": 2.445888244035458e-06, "loss": 0.0841, "step": 145080 }, { "epoch": 4.290826284970722, "grad_norm": 0.9734877943992615, "learning_rate": 2.4457615541165187e-06, "loss": 0.0714, "step": 145090 }, { "epoch": 4.291122020464896, "grad_norm": 0.8556122779846191, "learning_rate": 2.445634864197579e-06, "loss": 0.0642, "step": 145100 }, { "epoch": 4.2914177559590705, "grad_norm": 0.9972327947616577, "learning_rate": 2.44550817427864e-06, "loss": 0.0586, "step": 145110 }, { "epoch": 4.291713491453244, "grad_norm": 0.9285668134689331, "learning_rate": 2.4453814843597003e-06, "loss": 0.0886, "step": 145120 }, { "epoch": 4.292009226947418, "grad_norm": 0.5972156524658203, "learning_rate": 2.445254794440761e-06, "loss": 0.0648, "step": 145130 }, { "epoch": 4.292304962441592, "grad_norm": 0.7240452766418457, "learning_rate": 2.4451281045218214e-06, "loss": 0.0669, "step": 145140 }, { "epoch": 4.292600697935766, "grad_norm": 0.8130641579627991, "learning_rate": 2.445001414602882e-06, "loss": 0.0577, "step": 145150 }, { "epoch": 4.292896433429941, "grad_norm": 1.3324662446975708, "learning_rate": 2.444874724683943e-06, "loss": 0.0729, "step": 145160 }, { "epoch": 4.293192168924114, "grad_norm": 0.8983137607574463, "learning_rate": 2.4447480347650038e-06, "loss": 0.0764, "step": 145170 }, { "epoch": 4.2934879044182885, "grad_norm": 1.6623369455337524, "learning_rate": 2.444621344846064e-06, "loss": 0.0746, "step": 145180 }, { "epoch": 4.293783639912462, "grad_norm": 0.8807225227355957, "learning_rate": 2.444494654927125e-06, "loss": 0.068, "step": 145190 }, { "epoch": 4.294079375406636, "grad_norm": 0.6491151452064514, "learning_rate": 2.4443679650081853e-06, "loss": 0.0715, "step": 145200 }, { "epoch": 4.29437511090081, "grad_norm": 0.9752838015556335, "learning_rate": 2.444241275089246e-06, "loss": 0.0821, "step": 145210 }, { "epoch": 4.294670846394984, "grad_norm": 0.8276843428611755, "learning_rate": 2.4441145851703065e-06, "loss": 0.0889, "step": 145220 }, { "epoch": 4.294966581889159, "grad_norm": 0.6592323184013367, "learning_rate": 2.4439878952513672e-06, "loss": 0.0603, "step": 145230 }, { "epoch": 4.295262317383332, "grad_norm": 0.6184729337692261, "learning_rate": 2.443861205332428e-06, "loss": 0.0655, "step": 145240 }, { "epoch": 4.295558052877507, "grad_norm": 1.3861572742462158, "learning_rate": 2.443734515413489e-06, "loss": 0.0647, "step": 145250 }, { "epoch": 4.29585378837168, "grad_norm": 0.9914331436157227, "learning_rate": 2.443607825494549e-06, "loss": 0.0721, "step": 145260 }, { "epoch": 4.2961495238658545, "grad_norm": 1.0587794780731201, "learning_rate": 2.44348113557561e-06, "loss": 0.0742, "step": 145270 }, { "epoch": 4.296445259360029, "grad_norm": 1.0239737033843994, "learning_rate": 2.4433544456566703e-06, "loss": 0.0627, "step": 145280 }, { "epoch": 4.296740994854202, "grad_norm": 1.3162531852722168, "learning_rate": 2.443227755737731e-06, "loss": 0.0652, "step": 145290 }, { "epoch": 4.297036730348377, "grad_norm": 1.2462866306304932, "learning_rate": 2.4431010658187915e-06, "loss": 0.0745, "step": 145300 }, { "epoch": 4.29733246584255, "grad_norm": 1.1426210403442383, "learning_rate": 2.4429743758998523e-06, "loss": 0.0771, "step": 145310 }, { "epoch": 4.297628201336725, "grad_norm": 0.8685059547424316, "learning_rate": 2.442847685980913e-06, "loss": 0.0796, "step": 145320 }, { "epoch": 4.297923936830898, "grad_norm": 0.6178910136222839, "learning_rate": 2.442720996061974e-06, "loss": 0.0732, "step": 145330 }, { "epoch": 4.2982196723250725, "grad_norm": 1.4550375938415527, "learning_rate": 2.4425943061430342e-06, "loss": 0.0746, "step": 145340 }, { "epoch": 4.298515407819247, "grad_norm": 1.8584822416305542, "learning_rate": 2.4424676162240946e-06, "loss": 0.0666, "step": 145350 }, { "epoch": 4.29881114331342, "grad_norm": 0.8705131411552429, "learning_rate": 2.4423409263051554e-06, "loss": 0.0593, "step": 145360 }, { "epoch": 4.299106878807595, "grad_norm": 0.7330602407455444, "learning_rate": 2.4422142363862158e-06, "loss": 0.0671, "step": 145370 }, { "epoch": 4.299402614301768, "grad_norm": 1.0690550804138184, "learning_rate": 2.4420875464672765e-06, "loss": 0.0887, "step": 145380 }, { "epoch": 4.299698349795943, "grad_norm": 1.4770349264144897, "learning_rate": 2.441960856548337e-06, "loss": 0.0903, "step": 145390 }, { "epoch": 4.299994085290116, "grad_norm": 1.3697905540466309, "learning_rate": 2.441834166629398e-06, "loss": 0.0497, "step": 145400 }, { "epoch": 4.300289820784291, "grad_norm": 0.6519412994384766, "learning_rate": 2.4417074767104585e-06, "loss": 0.0628, "step": 145410 }, { "epoch": 4.300585556278465, "grad_norm": 0.795789897441864, "learning_rate": 2.4415807867915193e-06, "loss": 0.068, "step": 145420 }, { "epoch": 4.3008812917726384, "grad_norm": 0.7673395872116089, "learning_rate": 2.4414540968725796e-06, "loss": 0.0595, "step": 145430 }, { "epoch": 4.301177027266813, "grad_norm": 0.6222850680351257, "learning_rate": 2.4413274069536404e-06, "loss": 0.0692, "step": 145440 }, { "epoch": 4.301472762760986, "grad_norm": 0.5191056132316589, "learning_rate": 2.441200717034701e-06, "loss": 0.0519, "step": 145450 }, { "epoch": 4.301768498255161, "grad_norm": 0.5680822134017944, "learning_rate": 2.4410740271157616e-06, "loss": 0.0458, "step": 145460 }, { "epoch": 4.302064233749334, "grad_norm": 1.1074024438858032, "learning_rate": 2.440947337196822e-06, "loss": 0.0754, "step": 145470 }, { "epoch": 4.302359969243509, "grad_norm": 0.62424635887146, "learning_rate": 2.440820647277883e-06, "loss": 0.0783, "step": 145480 }, { "epoch": 4.302655704737683, "grad_norm": 1.2461042404174805, "learning_rate": 2.4406939573589435e-06, "loss": 0.0627, "step": 145490 }, { "epoch": 4.3029514402318565, "grad_norm": 0.6392264366149902, "learning_rate": 2.4405672674400043e-06, "loss": 0.0556, "step": 145500 }, { "epoch": 4.303247175726031, "grad_norm": 1.2212204933166504, "learning_rate": 2.4404405775210647e-06, "loss": 0.0645, "step": 145510 }, { "epoch": 4.303542911220204, "grad_norm": 0.6432257890701294, "learning_rate": 2.4403138876021255e-06, "loss": 0.0843, "step": 145520 }, { "epoch": 4.303838646714379, "grad_norm": 0.9158658981323242, "learning_rate": 2.440187197683186e-06, "loss": 0.0777, "step": 145530 }, { "epoch": 4.304134382208552, "grad_norm": 0.9398930668830872, "learning_rate": 2.4400605077642466e-06, "loss": 0.07, "step": 145540 }, { "epoch": 4.304430117702727, "grad_norm": 1.0196117162704468, "learning_rate": 2.439933817845307e-06, "loss": 0.0628, "step": 145550 }, { "epoch": 4.304725853196901, "grad_norm": 0.8450302481651306, "learning_rate": 2.4398071279263682e-06, "loss": 0.0608, "step": 145560 }, { "epoch": 4.3050215886910745, "grad_norm": 0.7081588506698608, "learning_rate": 2.4396804380074286e-06, "loss": 0.0696, "step": 145570 }, { "epoch": 4.305317324185249, "grad_norm": 0.7699721455574036, "learning_rate": 2.4395537480884894e-06, "loss": 0.072, "step": 145580 }, { "epoch": 4.305613059679422, "grad_norm": 0.6810230016708374, "learning_rate": 2.4394270581695497e-06, "loss": 0.058, "step": 145590 }, { "epoch": 4.305908795173597, "grad_norm": 1.0847859382629395, "learning_rate": 2.4393003682506105e-06, "loss": 0.0517, "step": 145600 }, { "epoch": 4.30620453066777, "grad_norm": 1.2148934602737427, "learning_rate": 2.439173678331671e-06, "loss": 0.0633, "step": 145610 }, { "epoch": 4.306500266161945, "grad_norm": 1.162477970123291, "learning_rate": 2.4390469884127317e-06, "loss": 0.061, "step": 145620 }, { "epoch": 4.306796001656119, "grad_norm": 0.9139823913574219, "learning_rate": 2.438920298493792e-06, "loss": 0.0924, "step": 145630 }, { "epoch": 4.307091737150293, "grad_norm": 1.1045445203781128, "learning_rate": 2.4387936085748533e-06, "loss": 0.0715, "step": 145640 }, { "epoch": 4.307387472644467, "grad_norm": 1.2341175079345703, "learning_rate": 2.4386669186559136e-06, "loss": 0.0658, "step": 145650 }, { "epoch": 4.3076832081386405, "grad_norm": 0.3937932550907135, "learning_rate": 2.4385402287369744e-06, "loss": 0.0593, "step": 145660 }, { "epoch": 4.307978943632815, "grad_norm": 1.1177830696105957, "learning_rate": 2.4384135388180348e-06, "loss": 0.0733, "step": 145670 }, { "epoch": 4.308274679126988, "grad_norm": 1.2989041805267334, "learning_rate": 2.4382868488990956e-06, "loss": 0.0717, "step": 145680 }, { "epoch": 4.308570414621163, "grad_norm": 1.4124947786331177, "learning_rate": 2.438160158980156e-06, "loss": 0.0676, "step": 145690 }, { "epoch": 4.308866150115337, "grad_norm": 0.9305050373077393, "learning_rate": 2.4380334690612167e-06, "loss": 0.0575, "step": 145700 }, { "epoch": 4.309161885609511, "grad_norm": 1.1285446882247925, "learning_rate": 2.437906779142277e-06, "loss": 0.0825, "step": 145710 }, { "epoch": 4.309457621103685, "grad_norm": 0.8966696262359619, "learning_rate": 2.4377800892233383e-06, "loss": 0.0685, "step": 145720 }, { "epoch": 4.3097533565978585, "grad_norm": 0.7845652103424072, "learning_rate": 2.4376533993043987e-06, "loss": 0.0793, "step": 145730 }, { "epoch": 4.310049092092033, "grad_norm": 1.0255810022354126, "learning_rate": 2.4375267093854595e-06, "loss": 0.0769, "step": 145740 }, { "epoch": 4.310344827586207, "grad_norm": 0.6524994969367981, "learning_rate": 2.43740001946652e-06, "loss": 0.0506, "step": 145750 }, { "epoch": 4.310640563080381, "grad_norm": 0.9524381756782532, "learning_rate": 2.43727332954758e-06, "loss": 0.0721, "step": 145760 }, { "epoch": 4.310936298574555, "grad_norm": 0.4985221028327942, "learning_rate": 2.437146639628641e-06, "loss": 0.0783, "step": 145770 }, { "epoch": 4.311232034068729, "grad_norm": 0.6518172025680542, "learning_rate": 2.4370199497097013e-06, "loss": 0.0658, "step": 145780 }, { "epoch": 4.311527769562903, "grad_norm": 0.8337422609329224, "learning_rate": 2.436893259790762e-06, "loss": 0.0599, "step": 145790 }, { "epoch": 4.311823505057077, "grad_norm": 0.7917141318321228, "learning_rate": 2.436766569871823e-06, "loss": 0.0574, "step": 145800 }, { "epoch": 4.312119240551251, "grad_norm": 0.5730703473091125, "learning_rate": 2.4366398799528837e-06, "loss": 0.0591, "step": 145810 }, { "epoch": 4.312414976045425, "grad_norm": 0.6671497821807861, "learning_rate": 2.436513190033944e-06, "loss": 0.0724, "step": 145820 }, { "epoch": 4.312710711539599, "grad_norm": 0.6325516104698181, "learning_rate": 2.436386500115005e-06, "loss": 0.0712, "step": 145830 }, { "epoch": 4.313006447033773, "grad_norm": 1.4926258325576782, "learning_rate": 2.4362598101960652e-06, "loss": 0.0595, "step": 145840 }, { "epoch": 4.313302182527947, "grad_norm": 1.3137338161468506, "learning_rate": 2.436133120277126e-06, "loss": 0.0742, "step": 145850 }, { "epoch": 4.313597918022121, "grad_norm": 1.1090316772460938, "learning_rate": 2.4360064303581864e-06, "loss": 0.061, "step": 145860 }, { "epoch": 4.313893653516295, "grad_norm": 1.0047862529754639, "learning_rate": 2.435879740439247e-06, "loss": 0.0809, "step": 145870 }, { "epoch": 4.314189389010469, "grad_norm": 0.8744192719459534, "learning_rate": 2.435753050520308e-06, "loss": 0.0698, "step": 145880 }, { "epoch": 4.314485124504643, "grad_norm": 0.5430483818054199, "learning_rate": 2.4356263606013688e-06, "loss": 0.0625, "step": 145890 }, { "epoch": 4.314780859998817, "grad_norm": 0.6242708563804626, "learning_rate": 2.435499670682429e-06, "loss": 0.0626, "step": 145900 }, { "epoch": 4.315076595492991, "grad_norm": 0.674085795879364, "learning_rate": 2.43537298076349e-06, "loss": 0.0613, "step": 145910 }, { "epoch": 4.315372330987165, "grad_norm": 1.124687910079956, "learning_rate": 2.4352462908445503e-06, "loss": 0.0701, "step": 145920 }, { "epoch": 4.315668066481339, "grad_norm": 1.2549980878829956, "learning_rate": 2.435119600925611e-06, "loss": 0.0698, "step": 145930 }, { "epoch": 4.3159638019755135, "grad_norm": 0.9431156516075134, "learning_rate": 2.4349929110066714e-06, "loss": 0.0706, "step": 145940 }, { "epoch": 4.316259537469687, "grad_norm": 0.5205667018890381, "learning_rate": 2.4348662210877322e-06, "loss": 0.0735, "step": 145950 }, { "epoch": 4.316555272963861, "grad_norm": 0.973635196685791, "learning_rate": 2.434739531168793e-06, "loss": 0.0717, "step": 145960 }, { "epoch": 4.316851008458035, "grad_norm": 2.460130453109741, "learning_rate": 2.434612841249854e-06, "loss": 0.0853, "step": 145970 }, { "epoch": 4.317146743952209, "grad_norm": 1.5042932033538818, "learning_rate": 2.434486151330914e-06, "loss": 0.0773, "step": 145980 }, { "epoch": 4.317442479446383, "grad_norm": 1.2362278699874878, "learning_rate": 2.434359461411975e-06, "loss": 0.067, "step": 145990 }, { "epoch": 4.317738214940557, "grad_norm": 1.4654276371002197, "learning_rate": 2.4342327714930353e-06, "loss": 0.0588, "step": 146000 }, { "epoch": 4.318033950434732, "grad_norm": 0.6784314513206482, "learning_rate": 2.434106081574096e-06, "loss": 0.0696, "step": 146010 }, { "epoch": 4.318329685928905, "grad_norm": 1.3513909578323364, "learning_rate": 2.4339793916551565e-06, "loss": 0.0633, "step": 146020 }, { "epoch": 4.3186254214230795, "grad_norm": 0.5227283239364624, "learning_rate": 2.4338527017362173e-06, "loss": 0.0854, "step": 146030 }, { "epoch": 4.318921156917253, "grad_norm": 0.843650758266449, "learning_rate": 2.433726011817278e-06, "loss": 0.0729, "step": 146040 }, { "epoch": 4.319216892411427, "grad_norm": 0.4988102614879608, "learning_rate": 2.433599321898339e-06, "loss": 0.065, "step": 146050 }, { "epoch": 4.319512627905601, "grad_norm": 1.524160385131836, "learning_rate": 2.4334726319793992e-06, "loss": 0.061, "step": 146060 }, { "epoch": 4.319808363399775, "grad_norm": 0.9163029193878174, "learning_rate": 2.43334594206046e-06, "loss": 0.0713, "step": 146070 }, { "epoch": 4.32010409889395, "grad_norm": 0.9581591486930847, "learning_rate": 2.4332192521415204e-06, "loss": 0.072, "step": 146080 }, { "epoch": 4.320399834388123, "grad_norm": 1.2329776287078857, "learning_rate": 2.433092562222581e-06, "loss": 0.0759, "step": 146090 }, { "epoch": 4.3206955698822975, "grad_norm": 0.5262103080749512, "learning_rate": 2.4329658723036415e-06, "loss": 0.0535, "step": 146100 }, { "epoch": 4.320991305376471, "grad_norm": 1.4186049699783325, "learning_rate": 2.4328391823847023e-06, "loss": 0.0685, "step": 146110 }, { "epoch": 4.321287040870645, "grad_norm": 0.8459762334823608, "learning_rate": 2.432712492465763e-06, "loss": 0.0765, "step": 146120 }, { "epoch": 4.321582776364819, "grad_norm": 1.1793872117996216, "learning_rate": 2.432585802546824e-06, "loss": 0.0766, "step": 146130 }, { "epoch": 4.321878511858993, "grad_norm": 1.24903404712677, "learning_rate": 2.4324591126278843e-06, "loss": 0.0535, "step": 146140 }, { "epoch": 4.322174247353168, "grad_norm": 0.9302078485488892, "learning_rate": 2.432332422708945e-06, "loss": 0.0697, "step": 146150 }, { "epoch": 4.322469982847341, "grad_norm": 1.1562868356704712, "learning_rate": 2.4322057327900054e-06, "loss": 0.0698, "step": 146160 }, { "epoch": 4.322765718341516, "grad_norm": 0.8473076820373535, "learning_rate": 2.4320790428710658e-06, "loss": 0.0764, "step": 146170 }, { "epoch": 4.323061453835689, "grad_norm": 0.8979157209396362, "learning_rate": 2.4319523529521266e-06, "loss": 0.0731, "step": 146180 }, { "epoch": 4.3233571893298635, "grad_norm": 1.3824065923690796, "learning_rate": 2.431825663033187e-06, "loss": 0.066, "step": 146190 }, { "epoch": 4.323652924824037, "grad_norm": 0.8441586494445801, "learning_rate": 2.431698973114248e-06, "loss": 0.067, "step": 146200 }, { "epoch": 4.323948660318211, "grad_norm": 0.6488489508628845, "learning_rate": 2.4315722831953085e-06, "loss": 0.0544, "step": 146210 }, { "epoch": 4.324244395812386, "grad_norm": 1.076907753944397, "learning_rate": 2.4314455932763693e-06, "loss": 0.071, "step": 146220 }, { "epoch": 4.324540131306559, "grad_norm": 0.5919002890586853, "learning_rate": 2.4313189033574297e-06, "loss": 0.065, "step": 146230 }, { "epoch": 4.324835866800734, "grad_norm": 0.6860808730125427, "learning_rate": 2.4311922134384905e-06, "loss": 0.0714, "step": 146240 }, { "epoch": 4.325131602294907, "grad_norm": 0.967830240726471, "learning_rate": 2.431065523519551e-06, "loss": 0.0644, "step": 146250 }, { "epoch": 4.3254273377890815, "grad_norm": 1.732582926750183, "learning_rate": 2.4309388336006116e-06, "loss": 0.0668, "step": 146260 }, { "epoch": 4.325723073283255, "grad_norm": 1.2017672061920166, "learning_rate": 2.430812143681672e-06, "loss": 0.0743, "step": 146270 }, { "epoch": 4.326018808777429, "grad_norm": 0.896941065788269, "learning_rate": 2.430685453762733e-06, "loss": 0.0688, "step": 146280 }, { "epoch": 4.326314544271604, "grad_norm": 0.5952847599983215, "learning_rate": 2.4305587638437936e-06, "loss": 0.0626, "step": 146290 }, { "epoch": 4.326610279765777, "grad_norm": 0.734244704246521, "learning_rate": 2.4304320739248544e-06, "loss": 0.0653, "step": 146300 }, { "epoch": 4.326906015259952, "grad_norm": 0.9501565098762512, "learning_rate": 2.4303053840059147e-06, "loss": 0.0642, "step": 146310 }, { "epoch": 4.327201750754125, "grad_norm": 0.8849769830703735, "learning_rate": 2.4301786940869755e-06, "loss": 0.0743, "step": 146320 }, { "epoch": 4.3274974862482996, "grad_norm": 1.2888761758804321, "learning_rate": 2.430052004168036e-06, "loss": 0.073, "step": 146330 }, { "epoch": 4.327793221742474, "grad_norm": 1.1189531087875366, "learning_rate": 2.4299253142490967e-06, "loss": 0.0619, "step": 146340 }, { "epoch": 4.328088957236647, "grad_norm": 1.5550282001495361, "learning_rate": 2.429798624330157e-06, "loss": 0.0619, "step": 146350 }, { "epoch": 4.328384692730822, "grad_norm": 0.7700496912002563, "learning_rate": 2.4296719344112182e-06, "loss": 0.0703, "step": 146360 }, { "epoch": 4.328680428224995, "grad_norm": 0.7643076181411743, "learning_rate": 2.4295452444922786e-06, "loss": 0.0693, "step": 146370 }, { "epoch": 4.32897616371917, "grad_norm": 0.9601761698722839, "learning_rate": 2.4294185545733394e-06, "loss": 0.0729, "step": 146380 }, { "epoch": 4.329271899213343, "grad_norm": 0.9779094457626343, "learning_rate": 2.4292918646543998e-06, "loss": 0.067, "step": 146390 }, { "epoch": 4.329567634707518, "grad_norm": 0.9177445769309998, "learning_rate": 2.4291651747354606e-06, "loss": 0.0674, "step": 146400 }, { "epoch": 4.329863370201692, "grad_norm": 1.317032814025879, "learning_rate": 2.429038484816521e-06, "loss": 0.0614, "step": 146410 }, { "epoch": 4.3301591056958655, "grad_norm": 0.6293204426765442, "learning_rate": 2.4289117948975817e-06, "loss": 0.086, "step": 146420 }, { "epoch": 4.33045484119004, "grad_norm": 1.3212060928344727, "learning_rate": 2.428785104978642e-06, "loss": 0.0715, "step": 146430 }, { "epoch": 4.330750576684213, "grad_norm": 1.140160083770752, "learning_rate": 2.4286584150597033e-06, "loss": 0.0724, "step": 146440 }, { "epoch": 4.331046312178388, "grad_norm": 0.7175917029380798, "learning_rate": 2.4285317251407637e-06, "loss": 0.0578, "step": 146450 }, { "epoch": 4.331342047672561, "grad_norm": 1.6943351030349731, "learning_rate": 2.4284050352218244e-06, "loss": 0.0727, "step": 146460 }, { "epoch": 4.331637783166736, "grad_norm": 0.6049142479896545, "learning_rate": 2.428278345302885e-06, "loss": 0.076, "step": 146470 }, { "epoch": 4.33193351866091, "grad_norm": 0.8987306952476501, "learning_rate": 2.4281516553839456e-06, "loss": 0.0733, "step": 146480 }, { "epoch": 4.3322292541550835, "grad_norm": 0.6998110413551331, "learning_rate": 2.428024965465006e-06, "loss": 0.0616, "step": 146490 }, { "epoch": 4.332524989649258, "grad_norm": 0.5395822525024414, "learning_rate": 2.4278982755460668e-06, "loss": 0.0515, "step": 146500 }, { "epoch": 4.332820725143431, "grad_norm": 0.5274925231933594, "learning_rate": 2.427771585627127e-06, "loss": 0.0511, "step": 146510 }, { "epoch": 4.333116460637606, "grad_norm": 1.3710428476333618, "learning_rate": 2.4276448957081883e-06, "loss": 0.0792, "step": 146520 }, { "epoch": 4.33341219613178, "grad_norm": 0.7569693922996521, "learning_rate": 2.4275182057892487e-06, "loss": 0.0762, "step": 146530 }, { "epoch": 4.333707931625954, "grad_norm": 0.7522618770599365, "learning_rate": 2.4273915158703095e-06, "loss": 0.068, "step": 146540 }, { "epoch": 4.334003667120128, "grad_norm": 0.9418289065361023, "learning_rate": 2.42726482595137e-06, "loss": 0.0648, "step": 146550 }, { "epoch": 4.334299402614302, "grad_norm": 0.9803860783576965, "learning_rate": 2.4271381360324306e-06, "loss": 0.0659, "step": 146560 }, { "epoch": 4.334595138108476, "grad_norm": 0.7444397211074829, "learning_rate": 2.427011446113491e-06, "loss": 0.0807, "step": 146570 }, { "epoch": 4.3348908736026495, "grad_norm": 0.9768343567848206, "learning_rate": 2.426884756194552e-06, "loss": 0.0639, "step": 146580 }, { "epoch": 4.335186609096824, "grad_norm": 0.5722211599349976, "learning_rate": 2.426758066275612e-06, "loss": 0.0631, "step": 146590 }, { "epoch": 4.335482344590998, "grad_norm": 1.2595703601837158, "learning_rate": 2.426631376356673e-06, "loss": 0.0551, "step": 146600 }, { "epoch": 4.335778080085172, "grad_norm": 2.5436553955078125, "learning_rate": 2.4265046864377337e-06, "loss": 0.0765, "step": 146610 }, { "epoch": 4.336073815579346, "grad_norm": 0.6812506914138794, "learning_rate": 2.426377996518794e-06, "loss": 0.0734, "step": 146620 }, { "epoch": 4.33636955107352, "grad_norm": 0.9295057654380798, "learning_rate": 2.426251306599855e-06, "loss": 0.0866, "step": 146630 }, { "epoch": 4.336665286567694, "grad_norm": 1.8223555088043213, "learning_rate": 2.4261246166809153e-06, "loss": 0.0647, "step": 146640 }, { "epoch": 4.3369610220618675, "grad_norm": 0.6074042320251465, "learning_rate": 2.425997926761976e-06, "loss": 0.0522, "step": 146650 }, { "epoch": 4.337256757556042, "grad_norm": 0.7435128092765808, "learning_rate": 2.4258712368430364e-06, "loss": 0.0673, "step": 146660 }, { "epoch": 4.337552493050216, "grad_norm": 0.7739437818527222, "learning_rate": 2.425744546924097e-06, "loss": 0.0831, "step": 146670 }, { "epoch": 4.33784822854439, "grad_norm": 0.6675230860710144, "learning_rate": 2.425617857005158e-06, "loss": 0.0642, "step": 146680 }, { "epoch": 4.338143964038564, "grad_norm": 2.0148866176605225, "learning_rate": 2.425491167086219e-06, "loss": 0.0876, "step": 146690 }, { "epoch": 4.338439699532738, "grad_norm": 0.3841848075389862, "learning_rate": 2.425364477167279e-06, "loss": 0.0443, "step": 146700 }, { "epoch": 4.338735435026912, "grad_norm": 0.8698272705078125, "learning_rate": 2.42523778724834e-06, "loss": 0.073, "step": 146710 }, { "epoch": 4.339031170521086, "grad_norm": 1.8130041360855103, "learning_rate": 2.4251110973294003e-06, "loss": 0.0716, "step": 146720 }, { "epoch": 4.33932690601526, "grad_norm": 0.8008157014846802, "learning_rate": 2.424984407410461e-06, "loss": 0.0647, "step": 146730 }, { "epoch": 4.339622641509434, "grad_norm": 0.5523982048034668, "learning_rate": 2.4248577174915215e-06, "loss": 0.0629, "step": 146740 }, { "epoch": 4.339918377003608, "grad_norm": 0.438265323638916, "learning_rate": 2.4247310275725823e-06, "loss": 0.0609, "step": 146750 }, { "epoch": 4.340214112497782, "grad_norm": 0.7239845991134644, "learning_rate": 2.424604337653643e-06, "loss": 0.0537, "step": 146760 }, { "epoch": 4.340509847991956, "grad_norm": 1.1220206022262573, "learning_rate": 2.424477647734704e-06, "loss": 0.0614, "step": 146770 }, { "epoch": 4.34080558348613, "grad_norm": 0.5039269924163818, "learning_rate": 2.424350957815764e-06, "loss": 0.0665, "step": 146780 }, { "epoch": 4.341101318980304, "grad_norm": 0.7633408904075623, "learning_rate": 2.424224267896825e-06, "loss": 0.0628, "step": 146790 }, { "epoch": 4.341397054474478, "grad_norm": 0.6688685417175293, "learning_rate": 2.4240975779778854e-06, "loss": 0.0459, "step": 146800 }, { "epoch": 4.341692789968652, "grad_norm": 1.1394554376602173, "learning_rate": 2.423970888058946e-06, "loss": 0.056, "step": 146810 }, { "epoch": 4.341988525462826, "grad_norm": 0.9293957948684692, "learning_rate": 2.4238441981400065e-06, "loss": 0.0781, "step": 146820 }, { "epoch": 4.342284260957, "grad_norm": 0.70061856508255, "learning_rate": 2.4237175082210673e-06, "loss": 0.0728, "step": 146830 }, { "epoch": 4.342579996451174, "grad_norm": 1.4586102962493896, "learning_rate": 2.423590818302128e-06, "loss": 0.0693, "step": 146840 }, { "epoch": 4.342875731945348, "grad_norm": 0.9520909786224365, "learning_rate": 2.423464128383189e-06, "loss": 0.0606, "step": 146850 }, { "epoch": 4.343171467439522, "grad_norm": 1.1516683101654053, "learning_rate": 2.4233374384642492e-06, "loss": 0.0663, "step": 146860 }, { "epoch": 4.343467202933696, "grad_norm": 3.6945078372955322, "learning_rate": 2.42321074854531e-06, "loss": 0.0786, "step": 146870 }, { "epoch": 4.34376293842787, "grad_norm": 0.8638396263122559, "learning_rate": 2.4230840586263704e-06, "loss": 0.0698, "step": 146880 }, { "epoch": 4.344058673922044, "grad_norm": 1.1001038551330566, "learning_rate": 2.422957368707431e-06, "loss": 0.0556, "step": 146890 }, { "epoch": 4.344354409416218, "grad_norm": 2.1859517097473145, "learning_rate": 2.4228306787884916e-06, "loss": 0.0627, "step": 146900 }, { "epoch": 4.344650144910392, "grad_norm": 0.8363476991653442, "learning_rate": 2.4227039888695523e-06, "loss": 0.0527, "step": 146910 }, { "epoch": 4.344945880404566, "grad_norm": 1.669779658317566, "learning_rate": 2.422577298950613e-06, "loss": 0.0743, "step": 146920 }, { "epoch": 4.34524161589874, "grad_norm": 0.9983128309249878, "learning_rate": 2.422450609031674e-06, "loss": 0.0708, "step": 146930 }, { "epoch": 4.345537351392914, "grad_norm": 0.6956465840339661, "learning_rate": 2.4223239191127343e-06, "loss": 0.0556, "step": 146940 }, { "epoch": 4.3458330868870885, "grad_norm": 0.6779259443283081, "learning_rate": 2.422197229193795e-06, "loss": 0.0631, "step": 146950 }, { "epoch": 4.346128822381262, "grad_norm": 1.3815326690673828, "learning_rate": 2.4220705392748554e-06, "loss": 0.0613, "step": 146960 }, { "epoch": 4.346424557875436, "grad_norm": 0.7156379222869873, "learning_rate": 2.4219438493559162e-06, "loss": 0.0626, "step": 146970 }, { "epoch": 4.34672029336961, "grad_norm": 0.3741742968559265, "learning_rate": 2.4218171594369766e-06, "loss": 0.0813, "step": 146980 }, { "epoch": 4.347016028863784, "grad_norm": 1.0228227376937866, "learning_rate": 2.4216904695180374e-06, "loss": 0.0748, "step": 146990 }, { "epoch": 4.347311764357959, "grad_norm": 1.0243197679519653, "learning_rate": 2.421563779599098e-06, "loss": 0.0694, "step": 147000 }, { "epoch": 4.347607499852132, "grad_norm": 1.0407569408416748, "learning_rate": 2.4214370896801585e-06, "loss": 0.0769, "step": 147010 }, { "epoch": 4.3479032353463065, "grad_norm": 1.0959081649780273, "learning_rate": 2.4213103997612193e-06, "loss": 0.0701, "step": 147020 }, { "epoch": 4.34819897084048, "grad_norm": 1.2828449010849, "learning_rate": 2.4211837098422797e-06, "loss": 0.068, "step": 147030 }, { "epoch": 4.348494706334654, "grad_norm": 0.6559280157089233, "learning_rate": 2.4210570199233405e-06, "loss": 0.0713, "step": 147040 }, { "epoch": 4.348790441828828, "grad_norm": 1.1477638483047485, "learning_rate": 2.420930330004401e-06, "loss": 0.0633, "step": 147050 }, { "epoch": 4.349086177323002, "grad_norm": 1.0506961345672607, "learning_rate": 2.4208036400854616e-06, "loss": 0.0621, "step": 147060 }, { "epoch": 4.349381912817177, "grad_norm": 0.5976316928863525, "learning_rate": 2.420676950166522e-06, "loss": 0.0845, "step": 147070 }, { "epoch": 4.34967764831135, "grad_norm": 0.8647435307502747, "learning_rate": 2.4205502602475832e-06, "loss": 0.063, "step": 147080 }, { "epoch": 4.349973383805525, "grad_norm": 1.1649245023727417, "learning_rate": 2.4204235703286436e-06, "loss": 0.073, "step": 147090 }, { "epoch": 4.350269119299698, "grad_norm": 0.978786289691925, "learning_rate": 2.4202968804097044e-06, "loss": 0.055, "step": 147100 }, { "epoch": 4.3505648547938724, "grad_norm": 1.4091829061508179, "learning_rate": 2.4201701904907647e-06, "loss": 0.0729, "step": 147110 }, { "epoch": 4.350860590288047, "grad_norm": 1.1137731075286865, "learning_rate": 2.4200435005718255e-06, "loss": 0.0812, "step": 147120 }, { "epoch": 4.35115632578222, "grad_norm": 0.6240487098693848, "learning_rate": 2.419916810652886e-06, "loss": 0.0769, "step": 147130 }, { "epoch": 4.351452061276395, "grad_norm": 0.6924628615379333, "learning_rate": 2.4197901207339467e-06, "loss": 0.0691, "step": 147140 }, { "epoch": 4.351747796770568, "grad_norm": 0.5265648365020752, "learning_rate": 2.419663430815007e-06, "loss": 0.0575, "step": 147150 }, { "epoch": 4.352043532264743, "grad_norm": 1.119512677192688, "learning_rate": 2.4195367408960683e-06, "loss": 0.0704, "step": 147160 }, { "epoch": 4.352339267758916, "grad_norm": 0.8568527102470398, "learning_rate": 2.4194100509771286e-06, "loss": 0.0957, "step": 147170 }, { "epoch": 4.3526350032530905, "grad_norm": 1.3674513101577759, "learning_rate": 2.4192833610581894e-06, "loss": 0.0667, "step": 147180 }, { "epoch": 4.352930738747265, "grad_norm": 0.6847766041755676, "learning_rate": 2.41915667113925e-06, "loss": 0.0666, "step": 147190 }, { "epoch": 4.353226474241438, "grad_norm": 0.6505435109138489, "learning_rate": 2.4190299812203106e-06, "loss": 0.0632, "step": 147200 }, { "epoch": 4.353522209735613, "grad_norm": 0.9937528967857361, "learning_rate": 2.418903291301371e-06, "loss": 0.0559, "step": 147210 }, { "epoch": 4.353817945229786, "grad_norm": 1.257749319076538, "learning_rate": 2.4187766013824317e-06, "loss": 0.0789, "step": 147220 }, { "epoch": 4.354113680723961, "grad_norm": 0.44315105676651, "learning_rate": 2.418649911463492e-06, "loss": 0.0745, "step": 147230 }, { "epoch": 4.354409416218134, "grad_norm": 0.5650058388710022, "learning_rate": 2.4185232215445533e-06, "loss": 0.0726, "step": 147240 }, { "epoch": 4.3547051517123085, "grad_norm": 0.5152354836463928, "learning_rate": 2.4183965316256137e-06, "loss": 0.0573, "step": 147250 }, { "epoch": 4.355000887206483, "grad_norm": 1.1794193983078003, "learning_rate": 2.4182698417066745e-06, "loss": 0.0531, "step": 147260 }, { "epoch": 4.355296622700656, "grad_norm": 0.8342468738555908, "learning_rate": 2.418143151787735e-06, "loss": 0.0904, "step": 147270 }, { "epoch": 4.355592358194831, "grad_norm": 1.3585928678512573, "learning_rate": 2.4180164618687956e-06, "loss": 0.0694, "step": 147280 }, { "epoch": 4.355888093689004, "grad_norm": 1.0656107664108276, "learning_rate": 2.417889771949856e-06, "loss": 0.0657, "step": 147290 }, { "epoch": 4.356183829183179, "grad_norm": 0.9004132747650146, "learning_rate": 2.4177630820309168e-06, "loss": 0.0517, "step": 147300 }, { "epoch": 4.356479564677352, "grad_norm": 1.065622091293335, "learning_rate": 2.417636392111977e-06, "loss": 0.0592, "step": 147310 }, { "epoch": 4.356775300171527, "grad_norm": 1.5653530359268188, "learning_rate": 2.4175097021930384e-06, "loss": 0.0658, "step": 147320 }, { "epoch": 4.357071035665701, "grad_norm": 0.7573001384735107, "learning_rate": 2.4173830122740987e-06, "loss": 0.0642, "step": 147330 }, { "epoch": 4.3573667711598745, "grad_norm": 0.5836427211761475, "learning_rate": 2.4172563223551595e-06, "loss": 0.0613, "step": 147340 }, { "epoch": 4.357662506654049, "grad_norm": 1.1228227615356445, "learning_rate": 2.41712963243622e-06, "loss": 0.0615, "step": 147350 }, { "epoch": 4.357958242148222, "grad_norm": 1.380625605583191, "learning_rate": 2.4170029425172807e-06, "loss": 0.0816, "step": 147360 }, { "epoch": 4.358253977642397, "grad_norm": 1.5367119312286377, "learning_rate": 2.416876252598341e-06, "loss": 0.0789, "step": 147370 }, { "epoch": 4.35854971313657, "grad_norm": 0.5759344696998596, "learning_rate": 2.416749562679402e-06, "loss": 0.0708, "step": 147380 }, { "epoch": 4.358845448630745, "grad_norm": 1.9606519937515259, "learning_rate": 2.416622872760462e-06, "loss": 0.0557, "step": 147390 }, { "epoch": 4.359141184124919, "grad_norm": 0.8355814218521118, "learning_rate": 2.4164961828415234e-06, "loss": 0.056, "step": 147400 }, { "epoch": 4.3594369196190925, "grad_norm": 0.6282762289047241, "learning_rate": 2.4163694929225838e-06, "loss": 0.0693, "step": 147410 }, { "epoch": 4.359732655113267, "grad_norm": 0.6527140736579895, "learning_rate": 2.416242803003644e-06, "loss": 0.0603, "step": 147420 }, { "epoch": 4.36002839060744, "grad_norm": 0.8175161480903625, "learning_rate": 2.416116113084705e-06, "loss": 0.0877, "step": 147430 }, { "epoch": 4.360324126101615, "grad_norm": 0.6070702075958252, "learning_rate": 2.4159894231657653e-06, "loss": 0.0549, "step": 147440 }, { "epoch": 4.360619861595788, "grad_norm": 2.0096631050109863, "learning_rate": 2.415862733246826e-06, "loss": 0.0621, "step": 147450 }, { "epoch": 4.360915597089963, "grad_norm": 0.924142062664032, "learning_rate": 2.4157360433278865e-06, "loss": 0.0592, "step": 147460 }, { "epoch": 4.361211332584137, "grad_norm": 0.8948819041252136, "learning_rate": 2.4156093534089472e-06, "loss": 0.0727, "step": 147470 }, { "epoch": 4.361507068078311, "grad_norm": 0.8603091835975647, "learning_rate": 2.415482663490008e-06, "loss": 0.0697, "step": 147480 }, { "epoch": 4.361802803572485, "grad_norm": 0.6880483031272888, "learning_rate": 2.415355973571069e-06, "loss": 0.0681, "step": 147490 }, { "epoch": 4.3620985390666585, "grad_norm": 0.6445077657699585, "learning_rate": 2.415229283652129e-06, "loss": 0.0709, "step": 147500 }, { "epoch": 4.362394274560833, "grad_norm": 1.0796382427215576, "learning_rate": 2.41510259373319e-06, "loss": 0.074, "step": 147510 }, { "epoch": 4.362690010055006, "grad_norm": 0.9471040964126587, "learning_rate": 2.4149759038142503e-06, "loss": 0.0635, "step": 147520 }, { "epoch": 4.362985745549181, "grad_norm": 1.044384479522705, "learning_rate": 2.414849213895311e-06, "loss": 0.0898, "step": 147530 }, { "epoch": 4.363281481043355, "grad_norm": 1.3862206935882568, "learning_rate": 2.4147225239763715e-06, "loss": 0.0618, "step": 147540 }, { "epoch": 4.363577216537529, "grad_norm": 1.1330734491348267, "learning_rate": 2.4145958340574323e-06, "loss": 0.0737, "step": 147550 }, { "epoch": 4.363872952031703, "grad_norm": 0.7844101786613464, "learning_rate": 2.414469144138493e-06, "loss": 0.0576, "step": 147560 }, { "epoch": 4.3641686875258765, "grad_norm": 1.2440011501312256, "learning_rate": 2.414342454219554e-06, "loss": 0.0699, "step": 147570 }, { "epoch": 4.364464423020051, "grad_norm": 1.075331687927246, "learning_rate": 2.4142157643006142e-06, "loss": 0.0903, "step": 147580 }, { "epoch": 4.364760158514225, "grad_norm": 0.7944121956825256, "learning_rate": 2.414089074381675e-06, "loss": 0.0649, "step": 147590 }, { "epoch": 4.365055894008399, "grad_norm": 0.884815514087677, "learning_rate": 2.4139623844627354e-06, "loss": 0.0515, "step": 147600 }, { "epoch": 4.365351629502573, "grad_norm": 0.8626195192337036, "learning_rate": 2.413835694543796e-06, "loss": 0.0722, "step": 147610 }, { "epoch": 4.365647364996747, "grad_norm": 2.0391299724578857, "learning_rate": 2.4137090046248565e-06, "loss": 0.0783, "step": 147620 }, { "epoch": 4.365943100490921, "grad_norm": 1.6803724765777588, "learning_rate": 2.4135823147059173e-06, "loss": 0.0801, "step": 147630 }, { "epoch": 4.3662388359850945, "grad_norm": 1.0754960775375366, "learning_rate": 2.413455624786978e-06, "loss": 0.0875, "step": 147640 }, { "epoch": 4.366534571479269, "grad_norm": 0.6686949729919434, "learning_rate": 2.413328934868039e-06, "loss": 0.0572, "step": 147650 }, { "epoch": 4.366830306973443, "grad_norm": 1.1086050271987915, "learning_rate": 2.4132022449490993e-06, "loss": 0.0764, "step": 147660 }, { "epoch": 4.367126042467617, "grad_norm": 0.6326956152915955, "learning_rate": 2.41307555503016e-06, "loss": 0.0662, "step": 147670 }, { "epoch": 4.367421777961791, "grad_norm": 0.8222157955169678, "learning_rate": 2.4129488651112204e-06, "loss": 0.0671, "step": 147680 }, { "epoch": 4.367717513455965, "grad_norm": 0.497043251991272, "learning_rate": 2.4128221751922812e-06, "loss": 0.0649, "step": 147690 }, { "epoch": 4.368013248950139, "grad_norm": 1.0860803127288818, "learning_rate": 2.4126954852733416e-06, "loss": 0.0643, "step": 147700 }, { "epoch": 4.368308984444313, "grad_norm": 0.7830612063407898, "learning_rate": 2.4125687953544024e-06, "loss": 0.0723, "step": 147710 }, { "epoch": 4.368604719938487, "grad_norm": 1.6090589761734009, "learning_rate": 2.412442105435463e-06, "loss": 0.0793, "step": 147720 }, { "epoch": 4.368900455432661, "grad_norm": 1.035398244857788, "learning_rate": 2.412315415516524e-06, "loss": 0.0697, "step": 147730 }, { "epoch": 4.369196190926835, "grad_norm": 0.8015630841255188, "learning_rate": 2.4121887255975843e-06, "loss": 0.0697, "step": 147740 }, { "epoch": 4.369491926421009, "grad_norm": 1.0019495487213135, "learning_rate": 2.412062035678645e-06, "loss": 0.0678, "step": 147750 }, { "epoch": 4.369787661915183, "grad_norm": 1.5114067792892456, "learning_rate": 2.4119353457597055e-06, "loss": 0.0739, "step": 147760 }, { "epoch": 4.370083397409357, "grad_norm": 0.7445114850997925, "learning_rate": 2.4118086558407663e-06, "loss": 0.0684, "step": 147770 }, { "epoch": 4.3703791329035315, "grad_norm": 1.057915210723877, "learning_rate": 2.4116819659218266e-06, "loss": 0.0595, "step": 147780 }, { "epoch": 4.370674868397705, "grad_norm": 1.602527379989624, "learning_rate": 2.4115552760028874e-06, "loss": 0.0674, "step": 147790 }, { "epoch": 4.370970603891879, "grad_norm": 1.2198162078857422, "learning_rate": 2.411428586083948e-06, "loss": 0.0658, "step": 147800 }, { "epoch": 4.371266339386053, "grad_norm": 1.3008737564086914, "learning_rate": 2.411301896165009e-06, "loss": 0.0798, "step": 147810 }, { "epoch": 4.371562074880227, "grad_norm": 0.9514670968055725, "learning_rate": 2.4111752062460694e-06, "loss": 0.0696, "step": 147820 }, { "epoch": 4.371857810374401, "grad_norm": 1.0029408931732178, "learning_rate": 2.4110485163271297e-06, "loss": 0.0875, "step": 147830 }, { "epoch": 4.372153545868575, "grad_norm": 0.7649217844009399, "learning_rate": 2.4109218264081905e-06, "loss": 0.0648, "step": 147840 }, { "epoch": 4.37244928136275, "grad_norm": 0.7679442763328552, "learning_rate": 2.410795136489251e-06, "loss": 0.0585, "step": 147850 }, { "epoch": 4.372745016856923, "grad_norm": 1.111940622329712, "learning_rate": 2.4106684465703117e-06, "loss": 0.0876, "step": 147860 }, { "epoch": 4.3730407523510975, "grad_norm": 2.0234131813049316, "learning_rate": 2.410541756651372e-06, "loss": 0.0828, "step": 147870 }, { "epoch": 4.373336487845271, "grad_norm": 0.7146392464637756, "learning_rate": 2.4104150667324333e-06, "loss": 0.0906, "step": 147880 }, { "epoch": 4.373632223339445, "grad_norm": 0.9077399969100952, "learning_rate": 2.4102883768134936e-06, "loss": 0.0569, "step": 147890 }, { "epoch": 4.373927958833619, "grad_norm": 0.9326209425926208, "learning_rate": 2.4101616868945544e-06, "loss": 0.0551, "step": 147900 }, { "epoch": 4.374223694327793, "grad_norm": 1.1680158376693726, "learning_rate": 2.4100349969756148e-06, "loss": 0.077, "step": 147910 }, { "epoch": 4.374519429821968, "grad_norm": 0.7535377144813538, "learning_rate": 2.4099083070566756e-06, "loss": 0.0683, "step": 147920 }, { "epoch": 4.374815165316141, "grad_norm": 0.7945593595504761, "learning_rate": 2.409781617137736e-06, "loss": 0.0715, "step": 147930 }, { "epoch": 4.3751109008103155, "grad_norm": 0.9115332961082458, "learning_rate": 2.4096549272187967e-06, "loss": 0.0619, "step": 147940 }, { "epoch": 4.375406636304489, "grad_norm": 1.0317293405532837, "learning_rate": 2.409528237299857e-06, "loss": 0.0618, "step": 147950 }, { "epoch": 4.375702371798663, "grad_norm": 0.6873916983604431, "learning_rate": 2.4094015473809183e-06, "loss": 0.0749, "step": 147960 }, { "epoch": 4.375998107292837, "grad_norm": 1.0506203174591064, "learning_rate": 2.4092748574619787e-06, "loss": 0.054, "step": 147970 }, { "epoch": 4.376293842787011, "grad_norm": 0.7796443700790405, "learning_rate": 2.4091481675430395e-06, "loss": 0.0847, "step": 147980 }, { "epoch": 4.376589578281186, "grad_norm": 0.711667001247406, "learning_rate": 2.4090214776241e-06, "loss": 0.0627, "step": 147990 }, { "epoch": 4.376885313775359, "grad_norm": 0.7359992265701294, "learning_rate": 2.4088947877051606e-06, "loss": 0.0692, "step": 148000 }, { "epoch": 4.3771810492695336, "grad_norm": 1.0778300762176514, "learning_rate": 2.408768097786221e-06, "loss": 0.0773, "step": 148010 }, { "epoch": 4.377476784763707, "grad_norm": 1.2074586153030396, "learning_rate": 2.4086414078672818e-06, "loss": 0.0837, "step": 148020 }, { "epoch": 4.377772520257881, "grad_norm": 0.5905874371528625, "learning_rate": 2.408514717948342e-06, "loss": 0.0735, "step": 148030 }, { "epoch": 4.378068255752055, "grad_norm": 0.6461352109909058, "learning_rate": 2.4083880280294033e-06, "loss": 0.0712, "step": 148040 }, { "epoch": 4.378363991246229, "grad_norm": 0.964552104473114, "learning_rate": 2.4082613381104637e-06, "loss": 0.0671, "step": 148050 }, { "epoch": 4.378659726740404, "grad_norm": 0.9944481253623962, "learning_rate": 2.4081346481915245e-06, "loss": 0.0685, "step": 148060 }, { "epoch": 4.378955462234577, "grad_norm": 0.8367555141448975, "learning_rate": 2.408007958272585e-06, "loss": 0.07, "step": 148070 }, { "epoch": 4.379251197728752, "grad_norm": 0.7451063990592957, "learning_rate": 2.4078812683536457e-06, "loss": 0.0753, "step": 148080 }, { "epoch": 4.379546933222925, "grad_norm": 0.9416907429695129, "learning_rate": 2.407754578434706e-06, "loss": 0.0641, "step": 148090 }, { "epoch": 4.3798426687170995, "grad_norm": 1.4224786758422852, "learning_rate": 2.407627888515767e-06, "loss": 0.0711, "step": 148100 }, { "epoch": 4.380138404211273, "grad_norm": 0.8128553032875061, "learning_rate": 2.407501198596827e-06, "loss": 0.0533, "step": 148110 }, { "epoch": 4.380434139705447, "grad_norm": 1.2573217153549194, "learning_rate": 2.4073745086778884e-06, "loss": 0.0859, "step": 148120 }, { "epoch": 4.380729875199622, "grad_norm": 0.7536013126373291, "learning_rate": 2.4072478187589488e-06, "loss": 0.0647, "step": 148130 }, { "epoch": 4.381025610693795, "grad_norm": 0.5350071787834167, "learning_rate": 2.4071211288400095e-06, "loss": 0.0602, "step": 148140 }, { "epoch": 4.38132134618797, "grad_norm": 1.0756185054779053, "learning_rate": 2.40699443892107e-06, "loss": 0.0675, "step": 148150 }, { "epoch": 4.381617081682143, "grad_norm": 0.8496521711349487, "learning_rate": 2.4068677490021307e-06, "loss": 0.0641, "step": 148160 }, { "epoch": 4.3819128171763175, "grad_norm": 1.2841100692749023, "learning_rate": 2.406741059083191e-06, "loss": 0.0687, "step": 148170 }, { "epoch": 4.382208552670492, "grad_norm": 0.9400937557220459, "learning_rate": 2.406614369164252e-06, "loss": 0.0674, "step": 148180 }, { "epoch": 4.382504288164665, "grad_norm": 0.7067880630493164, "learning_rate": 2.4064876792453122e-06, "loss": 0.0681, "step": 148190 }, { "epoch": 4.38280002365884, "grad_norm": 0.8938435316085815, "learning_rate": 2.4063609893263734e-06, "loss": 0.0674, "step": 148200 }, { "epoch": 4.383095759153013, "grad_norm": 1.0140894651412964, "learning_rate": 2.406234299407434e-06, "loss": 0.067, "step": 148210 }, { "epoch": 4.383391494647188, "grad_norm": 0.9609116315841675, "learning_rate": 2.4061076094884946e-06, "loss": 0.0752, "step": 148220 }, { "epoch": 4.383687230141361, "grad_norm": 0.8779718279838562, "learning_rate": 2.405980919569555e-06, "loss": 0.0671, "step": 148230 }, { "epoch": 4.383982965635536, "grad_norm": 0.8687759041786194, "learning_rate": 2.4058542296506153e-06, "loss": 0.0643, "step": 148240 }, { "epoch": 4.38427870112971, "grad_norm": 0.5349277257919312, "learning_rate": 2.405727539731676e-06, "loss": 0.0535, "step": 148250 }, { "epoch": 4.3845744366238835, "grad_norm": 0.7365890145301819, "learning_rate": 2.4056008498127365e-06, "loss": 0.0729, "step": 148260 }, { "epoch": 4.384870172118058, "grad_norm": 0.7384339570999146, "learning_rate": 2.4054741598937973e-06, "loss": 0.078, "step": 148270 }, { "epoch": 4.385165907612231, "grad_norm": 1.0812052488327026, "learning_rate": 2.405347469974858e-06, "loss": 0.0816, "step": 148280 }, { "epoch": 4.385461643106406, "grad_norm": 0.8383809328079224, "learning_rate": 2.405220780055919e-06, "loss": 0.0592, "step": 148290 }, { "epoch": 4.385757378600579, "grad_norm": 0.9004945755004883, "learning_rate": 2.4050940901369792e-06, "loss": 0.0622, "step": 148300 }, { "epoch": 4.386053114094754, "grad_norm": 0.6646020412445068, "learning_rate": 2.40496740021804e-06, "loss": 0.0539, "step": 148310 }, { "epoch": 4.386348849588928, "grad_norm": 1.4906911849975586, "learning_rate": 2.4048407102991004e-06, "loss": 0.0981, "step": 148320 }, { "epoch": 4.3866445850831015, "grad_norm": 0.45504453778266907, "learning_rate": 2.404714020380161e-06, "loss": 0.0645, "step": 148330 }, { "epoch": 4.386940320577276, "grad_norm": 0.6468295454978943, "learning_rate": 2.4045873304612215e-06, "loss": 0.0682, "step": 148340 }, { "epoch": 4.387236056071449, "grad_norm": 0.4845370650291443, "learning_rate": 2.4044606405422823e-06, "loss": 0.0662, "step": 148350 }, { "epoch": 4.387531791565624, "grad_norm": 0.691207766532898, "learning_rate": 2.404333950623343e-06, "loss": 0.0678, "step": 148360 }, { "epoch": 4.387827527059798, "grad_norm": 1.295333981513977, "learning_rate": 2.404207260704404e-06, "loss": 0.0708, "step": 148370 }, { "epoch": 4.388123262553972, "grad_norm": 0.7903343439102173, "learning_rate": 2.4040805707854643e-06, "loss": 0.0716, "step": 148380 }, { "epoch": 4.388418998048146, "grad_norm": 1.7370882034301758, "learning_rate": 2.403953880866525e-06, "loss": 0.0674, "step": 148390 }, { "epoch": 4.38871473354232, "grad_norm": 0.995916485786438, "learning_rate": 2.4038271909475854e-06, "loss": 0.058, "step": 148400 }, { "epoch": 4.389010469036494, "grad_norm": 0.8004736304283142, "learning_rate": 2.403700501028646e-06, "loss": 0.0667, "step": 148410 }, { "epoch": 4.389306204530667, "grad_norm": 0.9469745755195618, "learning_rate": 2.4035738111097066e-06, "loss": 0.0832, "step": 148420 }, { "epoch": 4.389601940024842, "grad_norm": 0.8740888833999634, "learning_rate": 2.4034471211907674e-06, "loss": 0.0864, "step": 148430 }, { "epoch": 4.389897675519016, "grad_norm": 0.7773456573486328, "learning_rate": 2.403320431271828e-06, "loss": 0.0754, "step": 148440 }, { "epoch": 4.39019341101319, "grad_norm": 0.9590563178062439, "learning_rate": 2.403193741352889e-06, "loss": 0.0749, "step": 148450 }, { "epoch": 4.390489146507364, "grad_norm": 1.1473093032836914, "learning_rate": 2.4030670514339493e-06, "loss": 0.0621, "step": 148460 }, { "epoch": 4.390784882001538, "grad_norm": 1.0636407136917114, "learning_rate": 2.40294036151501e-06, "loss": 0.0732, "step": 148470 }, { "epoch": 4.391080617495712, "grad_norm": 0.8413029313087463, "learning_rate": 2.4028136715960705e-06, "loss": 0.0727, "step": 148480 }, { "epoch": 4.3913763529898855, "grad_norm": 0.8267503976821899, "learning_rate": 2.4026869816771313e-06, "loss": 0.0817, "step": 148490 }, { "epoch": 4.39167208848406, "grad_norm": 0.795626163482666, "learning_rate": 2.4025602917581916e-06, "loss": 0.0622, "step": 148500 }, { "epoch": 4.391967823978234, "grad_norm": 1.0280448198318481, "learning_rate": 2.4024336018392524e-06, "loss": 0.0732, "step": 148510 }, { "epoch": 4.392263559472408, "grad_norm": 0.9574740529060364, "learning_rate": 2.402306911920313e-06, "loss": 0.0834, "step": 148520 }, { "epoch": 4.392559294966582, "grad_norm": 1.2472065687179565, "learning_rate": 2.402180222001374e-06, "loss": 0.0642, "step": 148530 }, { "epoch": 4.392855030460756, "grad_norm": 1.2800267934799194, "learning_rate": 2.4020535320824344e-06, "loss": 0.0713, "step": 148540 }, { "epoch": 4.39315076595493, "grad_norm": 1.179341197013855, "learning_rate": 2.401926842163495e-06, "loss": 0.0649, "step": 148550 }, { "epoch": 4.3934465014491035, "grad_norm": 1.1163303852081299, "learning_rate": 2.4018001522445555e-06, "loss": 0.0662, "step": 148560 }, { "epoch": 4.393742236943278, "grad_norm": 1.6352276802062988, "learning_rate": 2.4016734623256163e-06, "loss": 0.0702, "step": 148570 }, { "epoch": 4.394037972437452, "grad_norm": 0.958185613155365, "learning_rate": 2.4015467724066767e-06, "loss": 0.0706, "step": 148580 }, { "epoch": 4.394333707931626, "grad_norm": 1.0244560241699219, "learning_rate": 2.4014200824877375e-06, "loss": 0.0609, "step": 148590 }, { "epoch": 4.3946294434258, "grad_norm": 0.5446419715881348, "learning_rate": 2.4012933925687982e-06, "loss": 0.0627, "step": 148600 }, { "epoch": 4.394925178919974, "grad_norm": 1.1916375160217285, "learning_rate": 2.401166702649859e-06, "loss": 0.0671, "step": 148610 }, { "epoch": 4.395220914414148, "grad_norm": 0.8505241870880127, "learning_rate": 2.4010400127309194e-06, "loss": 0.0672, "step": 148620 }, { "epoch": 4.395516649908322, "grad_norm": 1.225804328918457, "learning_rate": 2.40091332281198e-06, "loss": 0.0734, "step": 148630 }, { "epoch": 4.395812385402496, "grad_norm": 0.5978350639343262, "learning_rate": 2.4007866328930406e-06, "loss": 0.0752, "step": 148640 }, { "epoch": 4.39610812089667, "grad_norm": 0.7081311941146851, "learning_rate": 2.4006599429741013e-06, "loss": 0.0571, "step": 148650 }, { "epoch": 4.396403856390844, "grad_norm": 0.77036052942276, "learning_rate": 2.4005332530551617e-06, "loss": 0.0847, "step": 148660 }, { "epoch": 4.396699591885018, "grad_norm": 0.8881434798240662, "learning_rate": 2.400406563136222e-06, "loss": 0.0793, "step": 148670 }, { "epoch": 4.396995327379192, "grad_norm": 1.0806859731674194, "learning_rate": 2.4002798732172833e-06, "loss": 0.0679, "step": 148680 }, { "epoch": 4.397291062873366, "grad_norm": 0.8423462510108948, "learning_rate": 2.4001531832983437e-06, "loss": 0.057, "step": 148690 }, { "epoch": 4.39758679836754, "grad_norm": 0.8792159557342529, "learning_rate": 2.4000264933794044e-06, "loss": 0.0587, "step": 148700 }, { "epoch": 4.397882533861714, "grad_norm": 0.658271849155426, "learning_rate": 2.399899803460465e-06, "loss": 0.0598, "step": 148710 }, { "epoch": 4.398178269355888, "grad_norm": 0.5832363963127136, "learning_rate": 2.3997731135415256e-06, "loss": 0.0754, "step": 148720 }, { "epoch": 4.398474004850062, "grad_norm": 0.895829439163208, "learning_rate": 2.399646423622586e-06, "loss": 0.0801, "step": 148730 }, { "epoch": 4.398769740344236, "grad_norm": 0.7852542996406555, "learning_rate": 2.3995197337036468e-06, "loss": 0.078, "step": 148740 }, { "epoch": 4.39906547583841, "grad_norm": 1.0513324737548828, "learning_rate": 2.399393043784707e-06, "loss": 0.0578, "step": 148750 }, { "epoch": 4.399361211332584, "grad_norm": 0.5086524486541748, "learning_rate": 2.3992663538657683e-06, "loss": 0.061, "step": 148760 }, { "epoch": 4.399656946826759, "grad_norm": 1.699764370918274, "learning_rate": 2.3991396639468287e-06, "loss": 0.0845, "step": 148770 }, { "epoch": 4.399952682320932, "grad_norm": 0.7588411569595337, "learning_rate": 2.3990129740278895e-06, "loss": 0.0692, "step": 148780 }, { "epoch": 4.4002484178151064, "grad_norm": 1.197020173072815, "learning_rate": 2.39888628410895e-06, "loss": 0.0699, "step": 148790 }, { "epoch": 4.40054415330928, "grad_norm": 1.078066110610962, "learning_rate": 2.3987595941900106e-06, "loss": 0.0658, "step": 148800 }, { "epoch": 4.400839888803454, "grad_norm": 0.8214796781539917, "learning_rate": 2.398632904271071e-06, "loss": 0.0733, "step": 148810 }, { "epoch": 4.401135624297628, "grad_norm": 1.5645085573196411, "learning_rate": 2.398506214352132e-06, "loss": 0.0812, "step": 148820 }, { "epoch": 4.401431359791802, "grad_norm": 0.70768141746521, "learning_rate": 2.398379524433192e-06, "loss": 0.0823, "step": 148830 }, { "epoch": 4.401727095285977, "grad_norm": 1.3351733684539795, "learning_rate": 2.3982528345142534e-06, "loss": 0.0732, "step": 148840 }, { "epoch": 4.40202283078015, "grad_norm": 1.0733177661895752, "learning_rate": 2.3981261445953137e-06, "loss": 0.0666, "step": 148850 }, { "epoch": 4.4023185662743245, "grad_norm": 1.4243957996368408, "learning_rate": 2.3979994546763745e-06, "loss": 0.0659, "step": 148860 }, { "epoch": 4.402614301768498, "grad_norm": 0.5901256203651428, "learning_rate": 2.397872764757435e-06, "loss": 0.0791, "step": 148870 }, { "epoch": 4.402910037262672, "grad_norm": 0.5878445506095886, "learning_rate": 2.3977460748384957e-06, "loss": 0.0706, "step": 148880 }, { "epoch": 4.403205772756846, "grad_norm": 1.8817243576049805, "learning_rate": 2.397619384919556e-06, "loss": 0.0671, "step": 148890 }, { "epoch": 4.40350150825102, "grad_norm": 0.5538864731788635, "learning_rate": 2.397492695000617e-06, "loss": 0.0608, "step": 148900 }, { "epoch": 4.403797243745195, "grad_norm": 1.0779392719268799, "learning_rate": 2.397366005081677e-06, "loss": 0.0578, "step": 148910 }, { "epoch": 4.404092979239368, "grad_norm": 0.5688900351524353, "learning_rate": 2.3972393151627384e-06, "loss": 0.0695, "step": 148920 }, { "epoch": 4.4043887147335425, "grad_norm": 1.0698740482330322, "learning_rate": 2.397112625243799e-06, "loss": 0.0757, "step": 148930 }, { "epoch": 4.404684450227716, "grad_norm": 0.6485323309898376, "learning_rate": 2.3969859353248596e-06, "loss": 0.0611, "step": 148940 }, { "epoch": 4.40498018572189, "grad_norm": 1.496636152267456, "learning_rate": 2.39685924540592e-06, "loss": 0.0727, "step": 148950 }, { "epoch": 4.405275921216065, "grad_norm": 1.20405113697052, "learning_rate": 2.3967325554869807e-06, "loss": 0.0656, "step": 148960 }, { "epoch": 4.405571656710238, "grad_norm": 0.5168381929397583, "learning_rate": 2.396605865568041e-06, "loss": 0.0722, "step": 148970 }, { "epoch": 4.405867392204413, "grad_norm": 0.719154417514801, "learning_rate": 2.396479175649102e-06, "loss": 0.0857, "step": 148980 }, { "epoch": 4.406163127698586, "grad_norm": 0.9638093113899231, "learning_rate": 2.3963524857301623e-06, "loss": 0.0785, "step": 148990 }, { "epoch": 4.406458863192761, "grad_norm": 0.41607263684272766, "learning_rate": 2.3962257958112235e-06, "loss": 0.0435, "step": 149000 }, { "epoch": 4.406754598686934, "grad_norm": 0.7724089026451111, "learning_rate": 2.396099105892284e-06, "loss": 0.0743, "step": 149010 }, { "epoch": 4.4070503341811085, "grad_norm": 1.0493061542510986, "learning_rate": 2.3959724159733446e-06, "loss": 0.0739, "step": 149020 }, { "epoch": 4.407346069675283, "grad_norm": 1.130246877670288, "learning_rate": 2.395845726054405e-06, "loss": 0.0727, "step": 149030 }, { "epoch": 4.407641805169456, "grad_norm": 0.8092392086982727, "learning_rate": 2.3957190361354658e-06, "loss": 0.0606, "step": 149040 }, { "epoch": 4.407937540663631, "grad_norm": 1.022334337234497, "learning_rate": 2.395592346216526e-06, "loss": 0.0596, "step": 149050 }, { "epoch": 4.408233276157804, "grad_norm": 1.0613505840301514, "learning_rate": 2.395465656297587e-06, "loss": 0.0583, "step": 149060 }, { "epoch": 4.408529011651979, "grad_norm": 0.4660550653934479, "learning_rate": 2.3953389663786473e-06, "loss": 0.0736, "step": 149070 }, { "epoch": 4.408824747146152, "grad_norm": 0.5438719391822815, "learning_rate": 2.395212276459708e-06, "loss": 0.0644, "step": 149080 }, { "epoch": 4.4091204826403265, "grad_norm": 0.8577509522438049, "learning_rate": 2.395085586540769e-06, "loss": 0.0589, "step": 149090 }, { "epoch": 4.409416218134501, "grad_norm": 0.6602715849876404, "learning_rate": 2.3949588966218292e-06, "loss": 0.0594, "step": 149100 }, { "epoch": 4.409711953628674, "grad_norm": 1.271741509437561, "learning_rate": 2.39483220670289e-06, "loss": 0.0579, "step": 149110 }, { "epoch": 4.410007689122849, "grad_norm": 0.6593549847602844, "learning_rate": 2.3947055167839504e-06, "loss": 0.0735, "step": 149120 }, { "epoch": 4.410303424617022, "grad_norm": 0.8750271201133728, "learning_rate": 2.394578826865011e-06, "loss": 0.0789, "step": 149130 }, { "epoch": 4.410599160111197, "grad_norm": 0.6533330678939819, "learning_rate": 2.3944521369460716e-06, "loss": 0.0598, "step": 149140 }, { "epoch": 4.41089489560537, "grad_norm": 0.9662790894508362, "learning_rate": 2.3943254470271323e-06, "loss": 0.0554, "step": 149150 }, { "epoch": 4.411190631099545, "grad_norm": 1.195530652999878, "learning_rate": 2.394198757108193e-06, "loss": 0.0653, "step": 149160 }, { "epoch": 4.411486366593719, "grad_norm": 1.8328055143356323, "learning_rate": 2.394072067189254e-06, "loss": 0.0702, "step": 149170 }, { "epoch": 4.4117821020878925, "grad_norm": 1.1861307621002197, "learning_rate": 2.3939453772703143e-06, "loss": 0.0806, "step": 149180 }, { "epoch": 4.412077837582067, "grad_norm": 1.080043077468872, "learning_rate": 2.393818687351375e-06, "loss": 0.0694, "step": 149190 }, { "epoch": 4.41237357307624, "grad_norm": 0.9164556860923767, "learning_rate": 2.3936919974324354e-06, "loss": 0.0621, "step": 149200 }, { "epoch": 4.412669308570415, "grad_norm": 1.2527304887771606, "learning_rate": 2.3935653075134962e-06, "loss": 0.0647, "step": 149210 }, { "epoch": 4.412965044064588, "grad_norm": 0.7872610688209534, "learning_rate": 2.3934386175945566e-06, "loss": 0.0785, "step": 149220 }, { "epoch": 4.413260779558763, "grad_norm": 0.8350162506103516, "learning_rate": 2.3933119276756174e-06, "loss": 0.0642, "step": 149230 }, { "epoch": 4.413556515052937, "grad_norm": 1.0089033842086792, "learning_rate": 2.393185237756678e-06, "loss": 0.0634, "step": 149240 }, { "epoch": 4.4138522505471105, "grad_norm": 1.554247260093689, "learning_rate": 2.393058547837739e-06, "loss": 0.059, "step": 149250 }, { "epoch": 4.414147986041285, "grad_norm": 0.7091280221939087, "learning_rate": 2.3929318579187993e-06, "loss": 0.0703, "step": 149260 }, { "epoch": 4.414443721535458, "grad_norm": 1.1153258085250854, "learning_rate": 2.39280516799986e-06, "loss": 0.0766, "step": 149270 }, { "epoch": 4.414739457029633, "grad_norm": 0.5004022717475891, "learning_rate": 2.3926784780809205e-06, "loss": 0.0625, "step": 149280 }, { "epoch": 4.415035192523806, "grad_norm": 0.8983291983604431, "learning_rate": 2.3925517881619813e-06, "loss": 0.0715, "step": 149290 }, { "epoch": 4.415330928017981, "grad_norm": 0.9887843728065491, "learning_rate": 2.3924250982430416e-06, "loss": 0.0768, "step": 149300 }, { "epoch": 4.415626663512155, "grad_norm": 0.5920582413673401, "learning_rate": 2.3922984083241024e-06, "loss": 0.075, "step": 149310 }, { "epoch": 4.4159223990063285, "grad_norm": 0.9994083642959595, "learning_rate": 2.3921717184051632e-06, "loss": 0.0798, "step": 149320 }, { "epoch": 4.416218134500503, "grad_norm": 1.092774510383606, "learning_rate": 2.392045028486224e-06, "loss": 0.0743, "step": 149330 }, { "epoch": 4.416513869994676, "grad_norm": 0.5678912997245789, "learning_rate": 2.3919183385672844e-06, "loss": 0.0713, "step": 149340 }, { "epoch": 4.416809605488851, "grad_norm": 0.6674422025680542, "learning_rate": 2.391791648648345e-06, "loss": 0.0671, "step": 149350 }, { "epoch": 4.417105340983024, "grad_norm": 0.5912424325942993, "learning_rate": 2.3916649587294055e-06, "loss": 0.0783, "step": 149360 }, { "epoch": 4.417401076477199, "grad_norm": 0.6651091575622559, "learning_rate": 2.3915382688104663e-06, "loss": 0.1001, "step": 149370 }, { "epoch": 4.417696811971373, "grad_norm": 1.1964572668075562, "learning_rate": 2.3914115788915267e-06, "loss": 0.0736, "step": 149380 }, { "epoch": 4.417992547465547, "grad_norm": 1.1757255792617798, "learning_rate": 2.3912848889725875e-06, "loss": 0.0775, "step": 149390 }, { "epoch": 4.418288282959721, "grad_norm": 0.6292234063148499, "learning_rate": 2.3911581990536483e-06, "loss": 0.0618, "step": 149400 }, { "epoch": 4.4185840184538945, "grad_norm": 1.4740544557571411, "learning_rate": 2.391031509134709e-06, "loss": 0.076, "step": 149410 }, { "epoch": 4.418879753948069, "grad_norm": 0.8409475088119507, "learning_rate": 2.3909048192157694e-06, "loss": 0.074, "step": 149420 }, { "epoch": 4.419175489442243, "grad_norm": 0.720960259437561, "learning_rate": 2.3907781292968302e-06, "loss": 0.0667, "step": 149430 }, { "epoch": 4.419471224936417, "grad_norm": 0.672505259513855, "learning_rate": 2.3906514393778906e-06, "loss": 0.0675, "step": 149440 }, { "epoch": 4.419766960430591, "grad_norm": 0.5752819776535034, "learning_rate": 2.3905247494589514e-06, "loss": 0.0608, "step": 149450 }, { "epoch": 4.420062695924765, "grad_norm": 0.7505975365638733, "learning_rate": 2.3903980595400117e-06, "loss": 0.0679, "step": 149460 }, { "epoch": 4.420358431418939, "grad_norm": 0.6867398023605347, "learning_rate": 2.3902713696210725e-06, "loss": 0.0753, "step": 149470 }, { "epoch": 4.4206541669131125, "grad_norm": 0.8361939191818237, "learning_rate": 2.3901446797021333e-06, "loss": 0.0727, "step": 149480 }, { "epoch": 4.420949902407287, "grad_norm": 0.7871248722076416, "learning_rate": 2.3900179897831937e-06, "loss": 0.0705, "step": 149490 }, { "epoch": 4.421245637901461, "grad_norm": 0.6379683017730713, "learning_rate": 2.3898912998642545e-06, "loss": 0.0538, "step": 149500 }, { "epoch": 4.421541373395635, "grad_norm": 0.8173292875289917, "learning_rate": 2.389764609945315e-06, "loss": 0.0643, "step": 149510 }, { "epoch": 4.421837108889809, "grad_norm": 1.187943458557129, "learning_rate": 2.3896379200263756e-06, "loss": 0.0779, "step": 149520 }, { "epoch": 4.422132844383983, "grad_norm": 0.6571731567382812, "learning_rate": 2.389511230107436e-06, "loss": 0.0626, "step": 149530 }, { "epoch": 4.422428579878157, "grad_norm": 1.2055561542510986, "learning_rate": 2.3893845401884968e-06, "loss": 0.0696, "step": 149540 }, { "epoch": 4.422724315372331, "grad_norm": 0.8773232698440552, "learning_rate": 2.389257850269557e-06, "loss": 0.0662, "step": 149550 }, { "epoch": 4.423020050866505, "grad_norm": 0.936718761920929, "learning_rate": 2.3891311603506184e-06, "loss": 0.0711, "step": 149560 }, { "epoch": 4.423315786360679, "grad_norm": 1.4561930894851685, "learning_rate": 2.3890044704316787e-06, "loss": 0.0787, "step": 149570 }, { "epoch": 4.423611521854853, "grad_norm": 1.2865073680877686, "learning_rate": 2.3888777805127395e-06, "loss": 0.076, "step": 149580 }, { "epoch": 4.423907257349027, "grad_norm": 0.9167488813400269, "learning_rate": 2.3887510905938e-06, "loss": 0.0713, "step": 149590 }, { "epoch": 4.424202992843201, "grad_norm": 0.6167562007904053, "learning_rate": 2.3886244006748607e-06, "loss": 0.0561, "step": 149600 }, { "epoch": 4.424498728337375, "grad_norm": 1.017754077911377, "learning_rate": 2.388497710755921e-06, "loss": 0.0861, "step": 149610 }, { "epoch": 4.4247944638315495, "grad_norm": 0.9085825681686401, "learning_rate": 2.388371020836982e-06, "loss": 0.0833, "step": 149620 }, { "epoch": 4.425090199325723, "grad_norm": 1.1289383172988892, "learning_rate": 2.388244330918042e-06, "loss": 0.0694, "step": 149630 }, { "epoch": 4.425385934819897, "grad_norm": 0.8497929573059082, "learning_rate": 2.3881176409991034e-06, "loss": 0.0697, "step": 149640 }, { "epoch": 4.425681670314071, "grad_norm": 1.3411750793457031, "learning_rate": 2.3879909510801638e-06, "loss": 0.0557, "step": 149650 }, { "epoch": 4.425977405808245, "grad_norm": 1.1063181161880493, "learning_rate": 2.3878642611612246e-06, "loss": 0.0563, "step": 149660 }, { "epoch": 4.426273141302419, "grad_norm": 0.8803395628929138, "learning_rate": 2.387737571242285e-06, "loss": 0.0712, "step": 149670 }, { "epoch": 4.426568876796593, "grad_norm": 0.9824532270431519, "learning_rate": 2.3876108813233457e-06, "loss": 0.0699, "step": 149680 }, { "epoch": 4.4268646122907676, "grad_norm": 1.0015932321548462, "learning_rate": 2.387484191404406e-06, "loss": 0.0673, "step": 149690 }, { "epoch": 4.427160347784941, "grad_norm": 0.444802463054657, "learning_rate": 2.387357501485467e-06, "loss": 0.0528, "step": 149700 }, { "epoch": 4.427456083279115, "grad_norm": 0.845429003238678, "learning_rate": 2.3872308115665272e-06, "loss": 0.0553, "step": 149710 }, { "epoch": 4.427751818773289, "grad_norm": 1.1369013786315918, "learning_rate": 2.3871041216475885e-06, "loss": 0.0777, "step": 149720 }, { "epoch": 4.428047554267463, "grad_norm": 0.8141046166419983, "learning_rate": 2.386977431728649e-06, "loss": 0.0644, "step": 149730 }, { "epoch": 4.428343289761637, "grad_norm": 1.0007861852645874, "learning_rate": 2.3868507418097096e-06, "loss": 0.0603, "step": 149740 }, { "epoch": 4.428639025255811, "grad_norm": 0.9523950219154358, "learning_rate": 2.38672405189077e-06, "loss": 0.0588, "step": 149750 }, { "epoch": 4.428934760749986, "grad_norm": 0.6920627951622009, "learning_rate": 2.3865973619718308e-06, "loss": 0.0565, "step": 149760 }, { "epoch": 4.429230496244159, "grad_norm": 0.8728414177894592, "learning_rate": 2.386470672052891e-06, "loss": 0.0751, "step": 149770 }, { "epoch": 4.4295262317383335, "grad_norm": 0.627905547618866, "learning_rate": 2.386343982133952e-06, "loss": 0.07, "step": 149780 }, { "epoch": 4.429821967232507, "grad_norm": 1.108985185623169, "learning_rate": 2.3862172922150123e-06, "loss": 0.0679, "step": 149790 }, { "epoch": 4.430117702726681, "grad_norm": 0.5942022204399109, "learning_rate": 2.3860906022960735e-06, "loss": 0.0581, "step": 149800 }, { "epoch": 4.430413438220855, "grad_norm": 1.1607298851013184, "learning_rate": 2.385963912377134e-06, "loss": 0.071, "step": 149810 }, { "epoch": 4.430709173715029, "grad_norm": 0.7518234848976135, "learning_rate": 2.3858372224581947e-06, "loss": 0.0661, "step": 149820 }, { "epoch": 4.431004909209204, "grad_norm": 0.7617647647857666, "learning_rate": 2.385710532539255e-06, "loss": 0.0715, "step": 149830 }, { "epoch": 4.431300644703377, "grad_norm": 0.7706639170646667, "learning_rate": 2.385583842620316e-06, "loss": 0.0691, "step": 149840 }, { "epoch": 4.4315963801975515, "grad_norm": 0.46669235825538635, "learning_rate": 2.385457152701376e-06, "loss": 0.0531, "step": 149850 }, { "epoch": 4.431892115691725, "grad_norm": 0.8377664089202881, "learning_rate": 2.385330462782437e-06, "loss": 0.0719, "step": 149860 }, { "epoch": 4.432187851185899, "grad_norm": 0.7954787015914917, "learning_rate": 2.3852037728634973e-06, "loss": 0.0723, "step": 149870 }, { "epoch": 4.432483586680073, "grad_norm": 0.5875369906425476, "learning_rate": 2.3850770829445585e-06, "loss": 0.0676, "step": 149880 }, { "epoch": 4.432779322174247, "grad_norm": 1.0172357559204102, "learning_rate": 2.384950393025619e-06, "loss": 0.0616, "step": 149890 }, { "epoch": 4.433075057668422, "grad_norm": 1.0485597848892212, "learning_rate": 2.3848237031066793e-06, "loss": 0.0739, "step": 149900 }, { "epoch": 4.433370793162595, "grad_norm": 0.741753876209259, "learning_rate": 2.38469701318774e-06, "loss": 0.0699, "step": 149910 }, { "epoch": 4.43366652865677, "grad_norm": 0.5398454666137695, "learning_rate": 2.3845703232688004e-06, "loss": 0.0782, "step": 149920 }, { "epoch": 4.433962264150943, "grad_norm": 0.9938831925392151, "learning_rate": 2.3844436333498612e-06, "loss": 0.0594, "step": 149930 }, { "epoch": 4.4342579996451175, "grad_norm": 1.1476424932479858, "learning_rate": 2.3843169434309216e-06, "loss": 0.0563, "step": 149940 }, { "epoch": 4.434553735139291, "grad_norm": 0.8189972043037415, "learning_rate": 2.3841902535119824e-06, "loss": 0.0576, "step": 149950 }, { "epoch": 4.434849470633465, "grad_norm": 0.8681055903434753, "learning_rate": 2.384063563593043e-06, "loss": 0.0609, "step": 149960 }, { "epoch": 4.43514520612764, "grad_norm": 0.8959430456161499, "learning_rate": 2.383936873674104e-06, "loss": 0.0643, "step": 149970 }, { "epoch": 4.435440941621813, "grad_norm": 1.1740847826004028, "learning_rate": 2.3838101837551643e-06, "loss": 0.0742, "step": 149980 }, { "epoch": 4.435736677115988, "grad_norm": 0.5991647839546204, "learning_rate": 2.383683493836225e-06, "loss": 0.0638, "step": 149990 }, { "epoch": 4.436032412610161, "grad_norm": 0.6622368097305298, "learning_rate": 2.3835568039172855e-06, "loss": 0.0594, "step": 150000 }, { "epoch": 4.4363281481043355, "grad_norm": 0.8438031673431396, "learning_rate": 2.3834301139983463e-06, "loss": 0.0751, "step": 150010 }, { "epoch": 4.43662388359851, "grad_norm": 0.9125930666923523, "learning_rate": 2.3833034240794066e-06, "loss": 0.0681, "step": 150020 }, { "epoch": 4.436919619092683, "grad_norm": 0.9328869581222534, "learning_rate": 2.3831767341604674e-06, "loss": 0.0786, "step": 150030 }, { "epoch": 4.437215354586858, "grad_norm": 0.9678542017936707, "learning_rate": 2.383050044241528e-06, "loss": 0.0685, "step": 150040 }, { "epoch": 4.437511090081031, "grad_norm": 0.33015328645706177, "learning_rate": 2.382923354322589e-06, "loss": 0.0476, "step": 150050 }, { "epoch": 4.437806825575206, "grad_norm": 1.2862086296081543, "learning_rate": 2.3827966644036494e-06, "loss": 0.074, "step": 150060 }, { "epoch": 4.438102561069379, "grad_norm": 0.9810709953308105, "learning_rate": 2.38266997448471e-06, "loss": 0.0704, "step": 150070 }, { "epoch": 4.438398296563554, "grad_norm": 1.3882248401641846, "learning_rate": 2.3825432845657705e-06, "loss": 0.076, "step": 150080 }, { "epoch": 4.438694032057728, "grad_norm": 1.2243531942367554, "learning_rate": 2.3824165946468313e-06, "loss": 0.0639, "step": 150090 }, { "epoch": 4.4389897675519014, "grad_norm": 0.7795746922492981, "learning_rate": 2.3822899047278917e-06, "loss": 0.0625, "step": 150100 }, { "epoch": 4.439285503046076, "grad_norm": 1.1501266956329346, "learning_rate": 2.3821632148089525e-06, "loss": 0.0688, "step": 150110 }, { "epoch": 4.439581238540249, "grad_norm": 1.2692872285842896, "learning_rate": 2.3820365248900133e-06, "loss": 0.0619, "step": 150120 }, { "epoch": 4.439876974034424, "grad_norm": 1.3592941761016846, "learning_rate": 2.381909834971074e-06, "loss": 0.063, "step": 150130 }, { "epoch": 4.440172709528597, "grad_norm": 1.100340723991394, "learning_rate": 2.3817831450521344e-06, "loss": 0.0763, "step": 150140 }, { "epoch": 4.440468445022772, "grad_norm": 1.4510929584503174, "learning_rate": 2.381656455133195e-06, "loss": 0.0614, "step": 150150 }, { "epoch": 4.440764180516946, "grad_norm": 0.9047114253044128, "learning_rate": 2.3815297652142556e-06, "loss": 0.0833, "step": 150160 }, { "epoch": 4.4410599160111195, "grad_norm": 0.8410454988479614, "learning_rate": 2.3814030752953164e-06, "loss": 0.0764, "step": 150170 }, { "epoch": 4.441355651505294, "grad_norm": 1.0426393747329712, "learning_rate": 2.3812763853763767e-06, "loss": 0.0644, "step": 150180 }, { "epoch": 4.441651386999467, "grad_norm": 0.47747689485549927, "learning_rate": 2.3811496954574375e-06, "loss": 0.0616, "step": 150190 }, { "epoch": 4.441947122493642, "grad_norm": 1.2163783311843872, "learning_rate": 2.3810230055384983e-06, "loss": 0.0696, "step": 150200 }, { "epoch": 4.442242857987816, "grad_norm": 1.0997217893600464, "learning_rate": 2.380896315619559e-06, "loss": 0.0704, "step": 150210 }, { "epoch": 4.44253859348199, "grad_norm": 1.24382746219635, "learning_rate": 2.3807696257006195e-06, "loss": 0.0653, "step": 150220 }, { "epoch": 4.442834328976164, "grad_norm": 0.8968930244445801, "learning_rate": 2.3806429357816802e-06, "loss": 0.0781, "step": 150230 }, { "epoch": 4.4431300644703375, "grad_norm": 0.9282222986221313, "learning_rate": 2.3805162458627406e-06, "loss": 0.0823, "step": 150240 }, { "epoch": 4.443425799964512, "grad_norm": 0.6434268355369568, "learning_rate": 2.3803895559438014e-06, "loss": 0.0514, "step": 150250 }, { "epoch": 4.443721535458685, "grad_norm": 0.9701704382896423, "learning_rate": 2.3802628660248618e-06, "loss": 0.0655, "step": 150260 }, { "epoch": 4.44401727095286, "grad_norm": 0.9868311285972595, "learning_rate": 2.3801361761059226e-06, "loss": 0.0741, "step": 150270 }, { "epoch": 4.444313006447034, "grad_norm": 1.217084527015686, "learning_rate": 2.3800094861869833e-06, "loss": 0.0711, "step": 150280 }, { "epoch": 4.444608741941208, "grad_norm": 0.7504672408103943, "learning_rate": 2.379882796268044e-06, "loss": 0.0643, "step": 150290 }, { "epoch": 4.444904477435382, "grad_norm": 1.2017866373062134, "learning_rate": 2.3797561063491045e-06, "loss": 0.0686, "step": 150300 }, { "epoch": 4.445200212929556, "grad_norm": 0.9895902276039124, "learning_rate": 2.379629416430165e-06, "loss": 0.0647, "step": 150310 }, { "epoch": 4.44549594842373, "grad_norm": 0.6240612268447876, "learning_rate": 2.3795027265112257e-06, "loss": 0.0802, "step": 150320 }, { "epoch": 4.4457916839179035, "grad_norm": 0.7388050556182861, "learning_rate": 2.379376036592286e-06, "loss": 0.0802, "step": 150330 }, { "epoch": 4.446087419412078, "grad_norm": 1.073366641998291, "learning_rate": 2.379249346673347e-06, "loss": 0.0682, "step": 150340 }, { "epoch": 4.446383154906252, "grad_norm": 0.8596991300582886, "learning_rate": 2.379122656754407e-06, "loss": 0.0614, "step": 150350 }, { "epoch": 4.446678890400426, "grad_norm": 0.7652453780174255, "learning_rate": 2.3789959668354684e-06, "loss": 0.0673, "step": 150360 }, { "epoch": 4.4469746258946, "grad_norm": 1.13181734085083, "learning_rate": 2.3788692769165288e-06, "loss": 0.0726, "step": 150370 }, { "epoch": 4.447270361388774, "grad_norm": 0.9890280961990356, "learning_rate": 2.3787425869975895e-06, "loss": 0.085, "step": 150380 }, { "epoch": 4.447566096882948, "grad_norm": 0.4662114977836609, "learning_rate": 2.37861589707865e-06, "loss": 0.0688, "step": 150390 }, { "epoch": 4.4478618323771215, "grad_norm": 1.1662992238998413, "learning_rate": 2.3784892071597107e-06, "loss": 0.064, "step": 150400 }, { "epoch": 4.448157567871296, "grad_norm": 0.7861268520355225, "learning_rate": 2.378362517240771e-06, "loss": 0.0665, "step": 150410 }, { "epoch": 4.44845330336547, "grad_norm": 0.8846657872200012, "learning_rate": 2.378235827321832e-06, "loss": 0.069, "step": 150420 }, { "epoch": 4.448749038859644, "grad_norm": 0.9687169790267944, "learning_rate": 2.3781091374028922e-06, "loss": 0.0762, "step": 150430 }, { "epoch": 4.449044774353818, "grad_norm": 0.6175808310508728, "learning_rate": 2.3779824474839534e-06, "loss": 0.0691, "step": 150440 }, { "epoch": 4.449340509847992, "grad_norm": 0.8516145944595337, "learning_rate": 2.377855757565014e-06, "loss": 0.0799, "step": 150450 }, { "epoch": 4.449636245342166, "grad_norm": 1.745116949081421, "learning_rate": 2.3777290676460746e-06, "loss": 0.0655, "step": 150460 }, { "epoch": 4.44993198083634, "grad_norm": 1.0032652616500854, "learning_rate": 2.377602377727135e-06, "loss": 0.0771, "step": 150470 }, { "epoch": 4.450227716330514, "grad_norm": 1.2357516288757324, "learning_rate": 2.3774756878081957e-06, "loss": 0.0722, "step": 150480 }, { "epoch": 4.450523451824688, "grad_norm": 0.9757728576660156, "learning_rate": 2.377348997889256e-06, "loss": 0.0692, "step": 150490 }, { "epoch": 4.450819187318862, "grad_norm": 1.5503733158111572, "learning_rate": 2.377222307970317e-06, "loss": 0.0779, "step": 150500 }, { "epoch": 4.451114922813036, "grad_norm": 0.8091546893119812, "learning_rate": 2.3770956180513773e-06, "loss": 0.0627, "step": 150510 }, { "epoch": 4.45141065830721, "grad_norm": 1.2943627834320068, "learning_rate": 2.3769689281324385e-06, "loss": 0.1031, "step": 150520 }, { "epoch": 4.451706393801384, "grad_norm": 0.9151666164398193, "learning_rate": 2.376842238213499e-06, "loss": 0.0795, "step": 150530 }, { "epoch": 4.452002129295558, "grad_norm": 0.671714186668396, "learning_rate": 2.3767155482945596e-06, "loss": 0.0671, "step": 150540 }, { "epoch": 4.452297864789732, "grad_norm": 0.8308204412460327, "learning_rate": 2.37658885837562e-06, "loss": 0.0632, "step": 150550 }, { "epoch": 4.452593600283906, "grad_norm": 0.9760770797729492, "learning_rate": 2.376462168456681e-06, "loss": 0.0799, "step": 150560 }, { "epoch": 4.45288933577808, "grad_norm": 1.2746785879135132, "learning_rate": 2.376335478537741e-06, "loss": 0.072, "step": 150570 }, { "epoch": 4.453185071272254, "grad_norm": 0.8796069025993347, "learning_rate": 2.376208788618802e-06, "loss": 0.075, "step": 150580 }, { "epoch": 4.453480806766428, "grad_norm": 0.8225142955780029, "learning_rate": 2.3760820986998623e-06, "loss": 0.0772, "step": 150590 }, { "epoch": 4.453776542260602, "grad_norm": 0.6329404711723328, "learning_rate": 2.3759554087809235e-06, "loss": 0.0591, "step": 150600 }, { "epoch": 4.4540722777547765, "grad_norm": 1.3876665830612183, "learning_rate": 2.375828718861984e-06, "loss": 0.0565, "step": 150610 }, { "epoch": 4.45436801324895, "grad_norm": 0.7539305686950684, "learning_rate": 2.3757020289430447e-06, "loss": 0.0723, "step": 150620 }, { "epoch": 4.454663748743124, "grad_norm": 0.7295517921447754, "learning_rate": 2.375575339024105e-06, "loss": 0.0825, "step": 150630 }, { "epoch": 4.454959484237298, "grad_norm": 1.0457522869110107, "learning_rate": 2.375448649105166e-06, "loss": 0.0814, "step": 150640 }, { "epoch": 4.455255219731472, "grad_norm": 1.1351886987686157, "learning_rate": 2.375321959186226e-06, "loss": 0.0666, "step": 150650 }, { "epoch": 4.455550955225646, "grad_norm": 1.0186798572540283, "learning_rate": 2.375195269267287e-06, "loss": 0.0626, "step": 150660 }, { "epoch": 4.45584669071982, "grad_norm": 0.9588260054588318, "learning_rate": 2.3750685793483474e-06, "loss": 0.0675, "step": 150670 }, { "epoch": 4.456142426213995, "grad_norm": 1.0221627950668335, "learning_rate": 2.3749418894294086e-06, "loss": 0.0728, "step": 150680 }, { "epoch": 4.456438161708168, "grad_norm": 0.7133147120475769, "learning_rate": 2.374815199510469e-06, "loss": 0.0639, "step": 150690 }, { "epoch": 4.4567338972023425, "grad_norm": 1.6625628471374512, "learning_rate": 2.3746885095915297e-06, "loss": 0.074, "step": 150700 }, { "epoch": 4.457029632696516, "grad_norm": 0.9599630832672119, "learning_rate": 2.37456181967259e-06, "loss": 0.0641, "step": 150710 }, { "epoch": 4.45732536819069, "grad_norm": 0.9017776846885681, "learning_rate": 2.374435129753651e-06, "loss": 0.0809, "step": 150720 }, { "epoch": 4.457621103684864, "grad_norm": 0.9239205718040466, "learning_rate": 2.3743084398347113e-06, "loss": 0.0731, "step": 150730 }, { "epoch": 4.457916839179038, "grad_norm": 0.8669400811195374, "learning_rate": 2.3741817499157716e-06, "loss": 0.0584, "step": 150740 }, { "epoch": 4.458212574673213, "grad_norm": 0.8275465369224548, "learning_rate": 2.3740550599968324e-06, "loss": 0.0695, "step": 150750 }, { "epoch": 4.458508310167386, "grad_norm": 0.7192026972770691, "learning_rate": 2.373928370077893e-06, "loss": 0.0655, "step": 150760 }, { "epoch": 4.4588040456615605, "grad_norm": 0.8596952557563782, "learning_rate": 2.373801680158954e-06, "loss": 0.0792, "step": 150770 }, { "epoch": 4.459099781155734, "grad_norm": 0.7759280800819397, "learning_rate": 2.3736749902400144e-06, "loss": 0.082, "step": 150780 }, { "epoch": 4.459395516649908, "grad_norm": 1.0490621328353882, "learning_rate": 2.373548300321075e-06, "loss": 0.0647, "step": 150790 }, { "epoch": 4.459691252144083, "grad_norm": 0.9325178861618042, "learning_rate": 2.3734216104021355e-06, "loss": 0.0655, "step": 150800 }, { "epoch": 4.459986987638256, "grad_norm": 0.7055909633636475, "learning_rate": 2.3732949204831963e-06, "loss": 0.0713, "step": 150810 }, { "epoch": 4.460282723132431, "grad_norm": 1.4598140716552734, "learning_rate": 2.3731682305642567e-06, "loss": 0.079, "step": 150820 }, { "epoch": 4.460578458626604, "grad_norm": 1.247780442237854, "learning_rate": 2.3730415406453175e-06, "loss": 0.0741, "step": 150830 }, { "epoch": 4.460874194120779, "grad_norm": 0.9096190333366394, "learning_rate": 2.3729148507263782e-06, "loss": 0.0644, "step": 150840 }, { "epoch": 4.461169929614952, "grad_norm": 1.7214432954788208, "learning_rate": 2.372788160807439e-06, "loss": 0.0647, "step": 150850 }, { "epoch": 4.4614656651091265, "grad_norm": 0.7656156420707703, "learning_rate": 2.3726614708884994e-06, "loss": 0.0683, "step": 150860 }, { "epoch": 4.461761400603301, "grad_norm": 1.205881118774414, "learning_rate": 2.37253478096956e-06, "loss": 0.0785, "step": 150870 }, { "epoch": 4.462057136097474, "grad_norm": 0.9030925631523132, "learning_rate": 2.3724080910506206e-06, "loss": 0.0706, "step": 150880 }, { "epoch": 4.462352871591649, "grad_norm": 0.6680166721343994, "learning_rate": 2.3722814011316813e-06, "loss": 0.0661, "step": 150890 }, { "epoch": 4.462648607085822, "grad_norm": 0.8425313234329224, "learning_rate": 2.3721547112127417e-06, "loss": 0.0665, "step": 150900 }, { "epoch": 4.462944342579997, "grad_norm": 1.2188540697097778, "learning_rate": 2.3720280212938025e-06, "loss": 0.0729, "step": 150910 }, { "epoch": 4.46324007807417, "grad_norm": 0.7931913733482361, "learning_rate": 2.3719013313748633e-06, "loss": 0.0863, "step": 150920 }, { "epoch": 4.4635358135683445, "grad_norm": 0.7541639804840088, "learning_rate": 2.371774641455924e-06, "loss": 0.0718, "step": 150930 }, { "epoch": 4.463831549062519, "grad_norm": 1.2102829217910767, "learning_rate": 2.3716479515369844e-06, "loss": 0.066, "step": 150940 }, { "epoch": 4.464127284556692, "grad_norm": 1.0819021463394165, "learning_rate": 2.3715212616180452e-06, "loss": 0.0696, "step": 150950 }, { "epoch": 4.464423020050867, "grad_norm": 0.9881701469421387, "learning_rate": 2.3713945716991056e-06, "loss": 0.0593, "step": 150960 }, { "epoch": 4.46471875554504, "grad_norm": 0.6694579124450684, "learning_rate": 2.3712678817801664e-06, "loss": 0.076, "step": 150970 }, { "epoch": 4.465014491039215, "grad_norm": 1.0670623779296875, "learning_rate": 2.3711411918612268e-06, "loss": 0.0659, "step": 150980 }, { "epoch": 4.465310226533388, "grad_norm": 0.5867268443107605, "learning_rate": 2.3710145019422875e-06, "loss": 0.0707, "step": 150990 }, { "epoch": 4.4656059620275625, "grad_norm": 1.27621591091156, "learning_rate": 2.3708878120233483e-06, "loss": 0.074, "step": 151000 }, { "epoch": 4.465901697521737, "grad_norm": 0.7004911303520203, "learning_rate": 2.370761122104409e-06, "loss": 0.0633, "step": 151010 }, { "epoch": 4.46619743301591, "grad_norm": 1.0948244333267212, "learning_rate": 2.3706344321854695e-06, "loss": 0.0762, "step": 151020 }, { "epoch": 4.466493168510085, "grad_norm": 0.8142155408859253, "learning_rate": 2.3705077422665303e-06, "loss": 0.0819, "step": 151030 }, { "epoch": 4.466788904004258, "grad_norm": 1.050589680671692, "learning_rate": 2.3703810523475906e-06, "loss": 0.0736, "step": 151040 }, { "epoch": 4.467084639498433, "grad_norm": 0.7779101729393005, "learning_rate": 2.3702543624286514e-06, "loss": 0.0721, "step": 151050 }, { "epoch": 4.467380374992606, "grad_norm": 1.2147213220596313, "learning_rate": 2.370127672509712e-06, "loss": 0.0599, "step": 151060 }, { "epoch": 4.467676110486781, "grad_norm": 0.5711904764175415, "learning_rate": 2.3700009825907726e-06, "loss": 0.075, "step": 151070 }, { "epoch": 4.467971845980955, "grad_norm": 1.3032605648040771, "learning_rate": 2.3698742926718334e-06, "loss": 0.0664, "step": 151080 }, { "epoch": 4.4682675814751285, "grad_norm": 0.8423940539360046, "learning_rate": 2.369747602752894e-06, "loss": 0.0626, "step": 151090 }, { "epoch": 4.468563316969303, "grad_norm": 0.9397933483123779, "learning_rate": 2.3696209128339545e-06, "loss": 0.0606, "step": 151100 }, { "epoch": 4.468859052463476, "grad_norm": 0.9965483546257019, "learning_rate": 2.3694942229150153e-06, "loss": 0.0851, "step": 151110 }, { "epoch": 4.469154787957651, "grad_norm": 0.6202232837677002, "learning_rate": 2.3693675329960757e-06, "loss": 0.0948, "step": 151120 }, { "epoch": 4.469450523451824, "grad_norm": 0.9164401292800903, "learning_rate": 2.3692408430771365e-06, "loss": 0.0749, "step": 151130 }, { "epoch": 4.469746258945999, "grad_norm": 1.103088617324829, "learning_rate": 2.369114153158197e-06, "loss": 0.0764, "step": 151140 }, { "epoch": 4.470041994440173, "grad_norm": 0.975701093673706, "learning_rate": 2.368987463239257e-06, "loss": 0.0587, "step": 151150 }, { "epoch": 4.4703377299343465, "grad_norm": 0.9588416218757629, "learning_rate": 2.3688607733203184e-06, "loss": 0.0567, "step": 151160 }, { "epoch": 4.470633465428521, "grad_norm": 1.3203115463256836, "learning_rate": 2.3687340834013788e-06, "loss": 0.0829, "step": 151170 }, { "epoch": 4.470929200922694, "grad_norm": 0.7128233313560486, "learning_rate": 2.3686073934824396e-06, "loss": 0.0696, "step": 151180 }, { "epoch": 4.471224936416869, "grad_norm": 0.9112917184829712, "learning_rate": 2.3684807035635e-06, "loss": 0.0657, "step": 151190 }, { "epoch": 4.471520671911042, "grad_norm": 1.0321623086929321, "learning_rate": 2.3683540136445607e-06, "loss": 0.0465, "step": 151200 }, { "epoch": 4.471816407405217, "grad_norm": 1.1991863250732422, "learning_rate": 2.368227323725621e-06, "loss": 0.0631, "step": 151210 }, { "epoch": 4.472112142899391, "grad_norm": 0.9893758296966553, "learning_rate": 2.368100633806682e-06, "loss": 0.0849, "step": 151220 }, { "epoch": 4.472407878393565, "grad_norm": 0.5428078174591064, "learning_rate": 2.3679739438877423e-06, "loss": 0.0792, "step": 151230 }, { "epoch": 4.472703613887739, "grad_norm": 0.8847444653511047, "learning_rate": 2.3678472539688035e-06, "loss": 0.0828, "step": 151240 }, { "epoch": 4.4729993493819125, "grad_norm": 0.6282227039337158, "learning_rate": 2.367720564049864e-06, "loss": 0.0738, "step": 151250 }, { "epoch": 4.473295084876087, "grad_norm": 1.2261313199996948, "learning_rate": 2.3675938741309246e-06, "loss": 0.0718, "step": 151260 }, { "epoch": 4.473590820370261, "grad_norm": 1.9489246606826782, "learning_rate": 2.367467184211985e-06, "loss": 0.0893, "step": 151270 }, { "epoch": 4.473886555864435, "grad_norm": 0.5989021062850952, "learning_rate": 2.3673404942930458e-06, "loss": 0.0729, "step": 151280 }, { "epoch": 4.474182291358609, "grad_norm": 0.601471483707428, "learning_rate": 2.367213804374106e-06, "loss": 0.08, "step": 151290 }, { "epoch": 4.474478026852783, "grad_norm": 0.6756053566932678, "learning_rate": 2.367087114455167e-06, "loss": 0.0581, "step": 151300 }, { "epoch": 4.474773762346957, "grad_norm": 1.0448867082595825, "learning_rate": 2.3669604245362273e-06, "loss": 0.0857, "step": 151310 }, { "epoch": 4.4750694978411305, "grad_norm": 0.7756848931312561, "learning_rate": 2.3668337346172885e-06, "loss": 0.0713, "step": 151320 }, { "epoch": 4.475365233335305, "grad_norm": 1.1477309465408325, "learning_rate": 2.366707044698349e-06, "loss": 0.073, "step": 151330 }, { "epoch": 4.475660968829479, "grad_norm": 1.1333056688308716, "learning_rate": 2.3665803547794097e-06, "loss": 0.0773, "step": 151340 }, { "epoch": 4.475956704323653, "grad_norm": 0.5800716280937195, "learning_rate": 2.36645366486047e-06, "loss": 0.0488, "step": 151350 }, { "epoch": 4.476252439817827, "grad_norm": 1.0034666061401367, "learning_rate": 2.366326974941531e-06, "loss": 0.0709, "step": 151360 }, { "epoch": 4.476548175312001, "grad_norm": 0.9912508130073547, "learning_rate": 2.366200285022591e-06, "loss": 0.0788, "step": 151370 }, { "epoch": 4.476843910806175, "grad_norm": 1.205556869506836, "learning_rate": 2.366073595103652e-06, "loss": 0.086, "step": 151380 }, { "epoch": 4.4771396463003486, "grad_norm": 0.9444587230682373, "learning_rate": 2.3659469051847123e-06, "loss": 0.0617, "step": 151390 }, { "epoch": 4.477435381794523, "grad_norm": 0.7712187170982361, "learning_rate": 2.3658202152657736e-06, "loss": 0.0754, "step": 151400 }, { "epoch": 4.477731117288697, "grad_norm": 1.0279242992401123, "learning_rate": 2.365693525346834e-06, "loss": 0.0509, "step": 151410 }, { "epoch": 4.478026852782871, "grad_norm": 0.5946908593177795, "learning_rate": 2.3655668354278947e-06, "loss": 0.0674, "step": 151420 }, { "epoch": 4.478322588277045, "grad_norm": 1.1405205726623535, "learning_rate": 2.365440145508955e-06, "loss": 0.0736, "step": 151430 }, { "epoch": 4.478618323771219, "grad_norm": 0.9546360373497009, "learning_rate": 2.365313455590016e-06, "loss": 0.0644, "step": 151440 }, { "epoch": 4.478914059265393, "grad_norm": 0.8871179223060608, "learning_rate": 2.3651867656710762e-06, "loss": 0.0683, "step": 151450 }, { "epoch": 4.4792097947595675, "grad_norm": 0.9849808812141418, "learning_rate": 2.365060075752137e-06, "loss": 0.0697, "step": 151460 }, { "epoch": 4.479505530253741, "grad_norm": 1.0396640300750732, "learning_rate": 2.3649333858331974e-06, "loss": 0.0671, "step": 151470 }, { "epoch": 4.479801265747915, "grad_norm": 1.2085449695587158, "learning_rate": 2.3648066959142586e-06, "loss": 0.0651, "step": 151480 }, { "epoch": 4.480097001242089, "grad_norm": 1.3857593536376953, "learning_rate": 2.364680005995319e-06, "loss": 0.0756, "step": 151490 }, { "epoch": 4.480392736736263, "grad_norm": 0.8073819279670715, "learning_rate": 2.3645533160763798e-06, "loss": 0.0651, "step": 151500 }, { "epoch": 4.480688472230437, "grad_norm": 0.985966145992279, "learning_rate": 2.36442662615744e-06, "loss": 0.0696, "step": 151510 }, { "epoch": 4.480984207724611, "grad_norm": 0.7425276637077332, "learning_rate": 2.364299936238501e-06, "loss": 0.0688, "step": 151520 }, { "epoch": 4.4812799432187855, "grad_norm": 0.7927693724632263, "learning_rate": 2.3641732463195613e-06, "loss": 0.0649, "step": 151530 }, { "epoch": 4.481575678712959, "grad_norm": 0.7187374234199524, "learning_rate": 2.364046556400622e-06, "loss": 0.072, "step": 151540 }, { "epoch": 4.481871414207133, "grad_norm": 0.7757334113121033, "learning_rate": 2.3639198664816824e-06, "loss": 0.0639, "step": 151550 }, { "epoch": 4.482167149701307, "grad_norm": 0.653089165687561, "learning_rate": 2.3637931765627432e-06, "loss": 0.0647, "step": 151560 }, { "epoch": 4.482462885195481, "grad_norm": 1.0888162851333618, "learning_rate": 2.363666486643804e-06, "loss": 0.0811, "step": 151570 }, { "epoch": 4.482758620689655, "grad_norm": 1.1125367879867554, "learning_rate": 2.3635397967248644e-06, "loss": 0.0688, "step": 151580 }, { "epoch": 4.483054356183829, "grad_norm": 0.9118102192878723, "learning_rate": 2.363413106805925e-06, "loss": 0.0745, "step": 151590 }, { "epoch": 4.483350091678004, "grad_norm": 1.0168523788452148, "learning_rate": 2.3632864168869855e-06, "loss": 0.0596, "step": 151600 }, { "epoch": 4.483645827172177, "grad_norm": 2.428314208984375, "learning_rate": 2.3631597269680463e-06, "loss": 0.0609, "step": 151610 }, { "epoch": 4.4839415626663515, "grad_norm": 0.9067090153694153, "learning_rate": 2.3630330370491067e-06, "loss": 0.0706, "step": 151620 }, { "epoch": 4.484237298160525, "grad_norm": 0.9232124090194702, "learning_rate": 2.3629063471301675e-06, "loss": 0.0593, "step": 151630 }, { "epoch": 4.484533033654699, "grad_norm": 0.6259156465530396, "learning_rate": 2.3627796572112283e-06, "loss": 0.0821, "step": 151640 }, { "epoch": 4.484828769148873, "grad_norm": 1.08566153049469, "learning_rate": 2.362652967292289e-06, "loss": 0.0715, "step": 151650 }, { "epoch": 4.485124504643047, "grad_norm": 1.0546637773513794, "learning_rate": 2.3625262773733494e-06, "loss": 0.0678, "step": 151660 }, { "epoch": 4.485420240137222, "grad_norm": 1.389649748802185, "learning_rate": 2.3623995874544102e-06, "loss": 0.0776, "step": 151670 }, { "epoch": 4.485715975631395, "grad_norm": 0.757610023021698, "learning_rate": 2.3622728975354706e-06, "loss": 0.0619, "step": 151680 }, { "epoch": 4.4860117111255695, "grad_norm": 0.7919554710388184, "learning_rate": 2.3621462076165314e-06, "loss": 0.0684, "step": 151690 }, { "epoch": 4.486307446619743, "grad_norm": 0.8125913739204407, "learning_rate": 2.3620195176975917e-06, "loss": 0.0553, "step": 151700 }, { "epoch": 4.486603182113917, "grad_norm": 1.4276535511016846, "learning_rate": 2.3618928277786525e-06, "loss": 0.0558, "step": 151710 }, { "epoch": 4.486898917608091, "grad_norm": 1.1226595640182495, "learning_rate": 2.3617661378597133e-06, "loss": 0.0702, "step": 151720 }, { "epoch": 4.487194653102265, "grad_norm": 0.8408880829811096, "learning_rate": 2.361639447940774e-06, "loss": 0.0782, "step": 151730 }, { "epoch": 4.48749038859644, "grad_norm": 0.7820824384689331, "learning_rate": 2.3615127580218345e-06, "loss": 0.0807, "step": 151740 }, { "epoch": 4.487786124090613, "grad_norm": 0.8146434426307678, "learning_rate": 2.3613860681028953e-06, "loss": 0.0504, "step": 151750 }, { "epoch": 4.488081859584788, "grad_norm": 1.003870964050293, "learning_rate": 2.3612593781839556e-06, "loss": 0.0796, "step": 151760 }, { "epoch": 4.488377595078961, "grad_norm": 0.5547019243240356, "learning_rate": 2.3611326882650164e-06, "loss": 0.0736, "step": 151770 }, { "epoch": 4.4886733305731354, "grad_norm": 0.7126796841621399, "learning_rate": 2.3610059983460768e-06, "loss": 0.0767, "step": 151780 }, { "epoch": 4.488969066067309, "grad_norm": 0.978816032409668, "learning_rate": 2.3608793084271376e-06, "loss": 0.0748, "step": 151790 }, { "epoch": 4.489264801561483, "grad_norm": 0.5972059369087219, "learning_rate": 2.3607526185081984e-06, "loss": 0.0569, "step": 151800 }, { "epoch": 4.489560537055658, "grad_norm": 0.6527246236801147, "learning_rate": 2.360625928589259e-06, "loss": 0.0637, "step": 151810 }, { "epoch": 4.489856272549831, "grad_norm": 0.6292623281478882, "learning_rate": 2.3604992386703195e-06, "loss": 0.0672, "step": 151820 }, { "epoch": 4.490152008044006, "grad_norm": 0.7314724326133728, "learning_rate": 2.3603725487513803e-06, "loss": 0.075, "step": 151830 }, { "epoch": 4.490447743538179, "grad_norm": 1.2602483034133911, "learning_rate": 2.3602458588324407e-06, "loss": 0.0951, "step": 151840 }, { "epoch": 4.4907434790323535, "grad_norm": 0.7504388093948364, "learning_rate": 2.3601191689135015e-06, "loss": 0.0655, "step": 151850 }, { "epoch": 4.491039214526528, "grad_norm": 0.8068950772285461, "learning_rate": 2.359992478994562e-06, "loss": 0.068, "step": 151860 }, { "epoch": 4.491334950020701, "grad_norm": 1.0870518684387207, "learning_rate": 2.3598657890756226e-06, "loss": 0.0698, "step": 151870 }, { "epoch": 4.491630685514876, "grad_norm": 0.8589293956756592, "learning_rate": 2.3597390991566834e-06, "loss": 0.0788, "step": 151880 }, { "epoch": 4.491926421009049, "grad_norm": 0.9463559985160828, "learning_rate": 2.359612409237744e-06, "loss": 0.0678, "step": 151890 }, { "epoch": 4.492222156503224, "grad_norm": 0.851205050945282, "learning_rate": 2.3594857193188046e-06, "loss": 0.0526, "step": 151900 }, { "epoch": 4.492517891997397, "grad_norm": 1.645936131477356, "learning_rate": 2.3593590293998654e-06, "loss": 0.063, "step": 151910 }, { "epoch": 4.4928136274915715, "grad_norm": 0.8293304443359375, "learning_rate": 2.3592323394809257e-06, "loss": 0.0799, "step": 151920 }, { "epoch": 4.493109362985746, "grad_norm": 1.3401381969451904, "learning_rate": 2.3591056495619865e-06, "loss": 0.0803, "step": 151930 }, { "epoch": 4.493405098479919, "grad_norm": 0.9854195713996887, "learning_rate": 2.358978959643047e-06, "loss": 0.0552, "step": 151940 }, { "epoch": 4.493700833974094, "grad_norm": 0.7625505924224854, "learning_rate": 2.3588522697241077e-06, "loss": 0.0577, "step": 151950 }, { "epoch": 4.493996569468267, "grad_norm": 0.9819173812866211, "learning_rate": 2.3587255798051685e-06, "loss": 0.0635, "step": 151960 }, { "epoch": 4.494292304962442, "grad_norm": 1.1345394849777222, "learning_rate": 2.358598889886229e-06, "loss": 0.0823, "step": 151970 }, { "epoch": 4.494588040456615, "grad_norm": 0.8458058834075928, "learning_rate": 2.3584721999672896e-06, "loss": 0.086, "step": 151980 }, { "epoch": 4.49488377595079, "grad_norm": 1.4994033575057983, "learning_rate": 2.35834551004835e-06, "loss": 0.0683, "step": 151990 }, { "epoch": 4.495179511444964, "grad_norm": 0.898061215877533, "learning_rate": 2.3582188201294108e-06, "loss": 0.0636, "step": 152000 }, { "epoch": 4.4954752469391375, "grad_norm": 0.6805923581123352, "learning_rate": 2.358092130210471e-06, "loss": 0.0468, "step": 152010 }, { "epoch": 4.495770982433312, "grad_norm": 1.1692250967025757, "learning_rate": 2.357965440291532e-06, "loss": 0.0758, "step": 152020 }, { "epoch": 4.496066717927485, "grad_norm": 0.8604753613471985, "learning_rate": 2.3578387503725923e-06, "loss": 0.0732, "step": 152030 }, { "epoch": 4.49636245342166, "grad_norm": 0.6913816928863525, "learning_rate": 2.3577120604536535e-06, "loss": 0.0701, "step": 152040 }, { "epoch": 4.496658188915834, "grad_norm": 0.5546200275421143, "learning_rate": 2.357585370534714e-06, "loss": 0.0606, "step": 152050 }, { "epoch": 4.496953924410008, "grad_norm": 0.9171914458274841, "learning_rate": 2.3574586806157747e-06, "loss": 0.0707, "step": 152060 }, { "epoch": 4.497249659904182, "grad_norm": 1.2315001487731934, "learning_rate": 2.357331990696835e-06, "loss": 0.088, "step": 152070 }, { "epoch": 4.4975453953983555, "grad_norm": 0.8176037073135376, "learning_rate": 2.357205300777896e-06, "loss": 0.0712, "step": 152080 }, { "epoch": 4.49784113089253, "grad_norm": 0.848138689994812, "learning_rate": 2.357078610858956e-06, "loss": 0.0678, "step": 152090 }, { "epoch": 4.498136866386703, "grad_norm": 0.8633762001991272, "learning_rate": 2.356951920940017e-06, "loss": 0.0491, "step": 152100 }, { "epoch": 4.498432601880878, "grad_norm": 1.1054307222366333, "learning_rate": 2.3568252310210773e-06, "loss": 0.0604, "step": 152110 }, { "epoch": 4.498728337375052, "grad_norm": 0.7192738056182861, "learning_rate": 2.3566985411021385e-06, "loss": 0.0757, "step": 152120 }, { "epoch": 4.499024072869226, "grad_norm": 1.0802017450332642, "learning_rate": 2.356571851183199e-06, "loss": 0.0714, "step": 152130 }, { "epoch": 4.4993198083634, "grad_norm": 0.7219065427780151, "learning_rate": 2.3564451612642597e-06, "loss": 0.0723, "step": 152140 }, { "epoch": 4.499615543857574, "grad_norm": 0.8936824798583984, "learning_rate": 2.35631847134532e-06, "loss": 0.0582, "step": 152150 }, { "epoch": 4.499911279351748, "grad_norm": 0.9700683951377869, "learning_rate": 2.356191781426381e-06, "loss": 0.0708, "step": 152160 }, { "epoch": 4.5002070148459214, "grad_norm": 0.916650116443634, "learning_rate": 2.3560650915074412e-06, "loss": 0.0947, "step": 152170 }, { "epoch": 4.500502750340096, "grad_norm": 0.9265167117118835, "learning_rate": 2.355938401588502e-06, "loss": 0.0753, "step": 152180 }, { "epoch": 4.50079848583427, "grad_norm": 1.5180556774139404, "learning_rate": 2.3558117116695624e-06, "loss": 0.0728, "step": 152190 }, { "epoch": 4.501094221328444, "grad_norm": 1.174131989479065, "learning_rate": 2.3556850217506236e-06, "loss": 0.0717, "step": 152200 }, { "epoch": 4.501389956822618, "grad_norm": 1.138640284538269, "learning_rate": 2.355558331831684e-06, "loss": 0.0646, "step": 152210 }, { "epoch": 4.501685692316792, "grad_norm": 0.8556693196296692, "learning_rate": 2.3554316419127447e-06, "loss": 0.0658, "step": 152220 }, { "epoch": 4.501981427810966, "grad_norm": 1.263323426246643, "learning_rate": 2.355304951993805e-06, "loss": 0.0658, "step": 152230 }, { "epoch": 4.5022771633051395, "grad_norm": 1.098341703414917, "learning_rate": 2.355178262074866e-06, "loss": 0.0509, "step": 152240 }, { "epoch": 4.502572898799314, "grad_norm": 0.5332863926887512, "learning_rate": 2.3550515721559263e-06, "loss": 0.0813, "step": 152250 }, { "epoch": 4.502868634293488, "grad_norm": 1.1252671480178833, "learning_rate": 2.354924882236987e-06, "loss": 0.0684, "step": 152260 }, { "epoch": 4.503164369787662, "grad_norm": 0.9401429295539856, "learning_rate": 2.3547981923180474e-06, "loss": 0.0834, "step": 152270 }, { "epoch": 4.503460105281836, "grad_norm": 1.0275224447250366, "learning_rate": 2.3546715023991086e-06, "loss": 0.0684, "step": 152280 }, { "epoch": 4.50375584077601, "grad_norm": 1.1481802463531494, "learning_rate": 2.354544812480169e-06, "loss": 0.0752, "step": 152290 }, { "epoch": 4.504051576270184, "grad_norm": 0.9942424893379211, "learning_rate": 2.35441812256123e-06, "loss": 0.057, "step": 152300 }, { "epoch": 4.5043473117643575, "grad_norm": 0.6933721303939819, "learning_rate": 2.35429143264229e-06, "loss": 0.0653, "step": 152310 }, { "epoch": 4.504643047258532, "grad_norm": 1.1137402057647705, "learning_rate": 2.354164742723351e-06, "loss": 0.0724, "step": 152320 }, { "epoch": 4.504938782752706, "grad_norm": 1.04141104221344, "learning_rate": 2.3540380528044113e-06, "loss": 0.0675, "step": 152330 }, { "epoch": 4.50523451824688, "grad_norm": 0.8841942548751831, "learning_rate": 2.353911362885472e-06, "loss": 0.0758, "step": 152340 }, { "epoch": 4.505530253741054, "grad_norm": 0.8535938858985901, "learning_rate": 2.3537846729665325e-06, "loss": 0.0612, "step": 152350 }, { "epoch": 4.505825989235228, "grad_norm": 1.212528944015503, "learning_rate": 2.3536579830475937e-06, "loss": 0.0792, "step": 152360 }, { "epoch": 4.506121724729402, "grad_norm": 0.9114335179328918, "learning_rate": 2.353531293128654e-06, "loss": 0.0747, "step": 152370 }, { "epoch": 4.506417460223576, "grad_norm": 0.6901245713233948, "learning_rate": 2.3534046032097144e-06, "loss": 0.0703, "step": 152380 }, { "epoch": 4.50671319571775, "grad_norm": 1.0810582637786865, "learning_rate": 2.353277913290775e-06, "loss": 0.0763, "step": 152390 }, { "epoch": 4.507008931211924, "grad_norm": 0.4557611048221588, "learning_rate": 2.3531512233718356e-06, "loss": 0.0511, "step": 152400 }, { "epoch": 4.507304666706098, "grad_norm": 0.7574629783630371, "learning_rate": 2.3530245334528964e-06, "loss": 0.0622, "step": 152410 }, { "epoch": 4.507600402200272, "grad_norm": 0.764114499092102, "learning_rate": 2.3528978435339567e-06, "loss": 0.0781, "step": 152420 }, { "epoch": 4.507896137694446, "grad_norm": 0.726080060005188, "learning_rate": 2.3527711536150175e-06, "loss": 0.0655, "step": 152430 }, { "epoch": 4.50819187318862, "grad_norm": 1.1427384614944458, "learning_rate": 2.3526444636960783e-06, "loss": 0.081, "step": 152440 }, { "epoch": 4.508487608682794, "grad_norm": 1.11820387840271, "learning_rate": 2.352517773777139e-06, "loss": 0.0604, "step": 152450 }, { "epoch": 4.508783344176968, "grad_norm": 1.3540834188461304, "learning_rate": 2.3523910838581995e-06, "loss": 0.058, "step": 152460 }, { "epoch": 4.509079079671142, "grad_norm": 0.8634227514266968, "learning_rate": 2.3522643939392602e-06, "loss": 0.0902, "step": 152470 }, { "epoch": 4.509374815165316, "grad_norm": 1.0720288753509521, "learning_rate": 2.3521377040203206e-06, "loss": 0.0889, "step": 152480 }, { "epoch": 4.50967055065949, "grad_norm": 0.7288775444030762, "learning_rate": 2.3520110141013814e-06, "loss": 0.0707, "step": 152490 }, { "epoch": 4.509966286153664, "grad_norm": 1.0083211660385132, "learning_rate": 2.3518843241824418e-06, "loss": 0.0531, "step": 152500 }, { "epoch": 4.510262021647838, "grad_norm": 0.691490888595581, "learning_rate": 2.3517576342635026e-06, "loss": 0.0737, "step": 152510 }, { "epoch": 4.510557757142013, "grad_norm": 0.617281436920166, "learning_rate": 2.3516309443445633e-06, "loss": 0.0633, "step": 152520 }, { "epoch": 4.510853492636186, "grad_norm": 1.4307080507278442, "learning_rate": 2.351504254425624e-06, "loss": 0.0557, "step": 152530 }, { "epoch": 4.5111492281303605, "grad_norm": 0.5580660700798035, "learning_rate": 2.3513775645066845e-06, "loss": 0.0661, "step": 152540 }, { "epoch": 4.511444963624534, "grad_norm": 1.6817554235458374, "learning_rate": 2.3512508745877453e-06, "loss": 0.0585, "step": 152550 }, { "epoch": 4.511740699118708, "grad_norm": 1.3943347930908203, "learning_rate": 2.3511241846688057e-06, "loss": 0.0718, "step": 152560 }, { "epoch": 4.512036434612883, "grad_norm": 1.0514565706253052, "learning_rate": 2.3509974947498664e-06, "loss": 0.0925, "step": 152570 }, { "epoch": 4.512332170107056, "grad_norm": 0.7410578727722168, "learning_rate": 2.350870804830927e-06, "loss": 0.0785, "step": 152580 }, { "epoch": 4.512627905601231, "grad_norm": 1.4507081508636475, "learning_rate": 2.3507441149119876e-06, "loss": 0.0693, "step": 152590 }, { "epoch": 4.512923641095404, "grad_norm": 0.4340618848800659, "learning_rate": 2.3506174249930484e-06, "loss": 0.0497, "step": 152600 }, { "epoch": 4.5132193765895785, "grad_norm": 0.8239379525184631, "learning_rate": 2.350490735074109e-06, "loss": 0.0751, "step": 152610 }, { "epoch": 4.513515112083752, "grad_norm": 0.8334581255912781, "learning_rate": 2.3503640451551695e-06, "loss": 0.0879, "step": 152620 }, { "epoch": 4.513810847577926, "grad_norm": 0.844448983669281, "learning_rate": 2.3502373552362303e-06, "loss": 0.0733, "step": 152630 }, { "epoch": 4.514106583072101, "grad_norm": 0.8497874736785889, "learning_rate": 2.3501106653172907e-06, "loss": 0.0608, "step": 152640 }, { "epoch": 4.514402318566274, "grad_norm": 0.572256326675415, "learning_rate": 2.3499839753983515e-06, "loss": 0.0536, "step": 152650 }, { "epoch": 4.514698054060449, "grad_norm": 0.740257978439331, "learning_rate": 2.349857285479412e-06, "loss": 0.0709, "step": 152660 }, { "epoch": 4.514993789554622, "grad_norm": 1.122471570968628, "learning_rate": 2.3497305955604726e-06, "loss": 0.079, "step": 152670 }, { "epoch": 4.5152895250487965, "grad_norm": 0.44603219628334045, "learning_rate": 2.3496039056415334e-06, "loss": 0.0752, "step": 152680 }, { "epoch": 4.51558526054297, "grad_norm": 0.7084376215934753, "learning_rate": 2.3494772157225942e-06, "loss": 0.0643, "step": 152690 }, { "epoch": 4.515880996037144, "grad_norm": 0.9901527166366577, "learning_rate": 2.3493505258036546e-06, "loss": 0.0534, "step": 152700 }, { "epoch": 4.516176731531319, "grad_norm": 0.7984315752983093, "learning_rate": 2.3492238358847154e-06, "loss": 0.0634, "step": 152710 }, { "epoch": 4.516472467025492, "grad_norm": 0.9014053344726562, "learning_rate": 2.3490971459657757e-06, "loss": 0.0736, "step": 152720 }, { "epoch": 4.516768202519667, "grad_norm": 0.7353307604789734, "learning_rate": 2.3489704560468365e-06, "loss": 0.0681, "step": 152730 }, { "epoch": 4.51706393801384, "grad_norm": 0.3979807198047638, "learning_rate": 2.348843766127897e-06, "loss": 0.072, "step": 152740 }, { "epoch": 4.517359673508015, "grad_norm": 1.0712289810180664, "learning_rate": 2.3487170762089577e-06, "loss": 0.0617, "step": 152750 }, { "epoch": 4.517655409002188, "grad_norm": 1.324432611465454, "learning_rate": 2.3485903862900185e-06, "loss": 0.0727, "step": 152760 }, { "epoch": 4.5179511444963625, "grad_norm": 1.3171992301940918, "learning_rate": 2.3484636963710793e-06, "loss": 0.0752, "step": 152770 }, { "epoch": 4.518246879990537, "grad_norm": 0.646449089050293, "learning_rate": 2.3483370064521396e-06, "loss": 0.0926, "step": 152780 }, { "epoch": 4.51854261548471, "grad_norm": 0.685276985168457, "learning_rate": 2.3482103165332e-06, "loss": 0.0694, "step": 152790 }, { "epoch": 4.518838350978885, "grad_norm": 0.9684839844703674, "learning_rate": 2.348083626614261e-06, "loss": 0.0655, "step": 152800 }, { "epoch": 4.519134086473058, "grad_norm": 1.0164669752120972, "learning_rate": 2.347956936695321e-06, "loss": 0.0699, "step": 152810 }, { "epoch": 4.519429821967233, "grad_norm": 0.8090460300445557, "learning_rate": 2.347830246776382e-06, "loss": 0.0735, "step": 152820 }, { "epoch": 4.519725557461406, "grad_norm": 1.0981471538543701, "learning_rate": 2.3477035568574423e-06, "loss": 0.0718, "step": 152830 }, { "epoch": 4.5200212929555805, "grad_norm": 0.6098825931549072, "learning_rate": 2.3475768669385035e-06, "loss": 0.0664, "step": 152840 }, { "epoch": 4.520317028449755, "grad_norm": 0.7335856556892395, "learning_rate": 2.347450177019564e-06, "loss": 0.0639, "step": 152850 }, { "epoch": 4.520612763943928, "grad_norm": 0.6205036044120789, "learning_rate": 2.3473234871006247e-06, "loss": 0.0541, "step": 152860 }, { "epoch": 4.520908499438103, "grad_norm": 0.525761067867279, "learning_rate": 2.347196797181685e-06, "loss": 0.0675, "step": 152870 }, { "epoch": 4.521204234932276, "grad_norm": 0.8361417055130005, "learning_rate": 2.347070107262746e-06, "loss": 0.0802, "step": 152880 }, { "epoch": 4.521499970426451, "grad_norm": 0.7974849939346313, "learning_rate": 2.346943417343806e-06, "loss": 0.0618, "step": 152890 }, { "epoch": 4.521795705920624, "grad_norm": 0.6966158151626587, "learning_rate": 2.346816727424867e-06, "loss": 0.0554, "step": 152900 }, { "epoch": 4.522091441414799, "grad_norm": 0.7388870120048523, "learning_rate": 2.3466900375059274e-06, "loss": 0.0726, "step": 152910 }, { "epoch": 4.522387176908973, "grad_norm": 1.1299070119857788, "learning_rate": 2.3465633475869886e-06, "loss": 0.074, "step": 152920 }, { "epoch": 4.5226829124031465, "grad_norm": 1.2497066259384155, "learning_rate": 2.346436657668049e-06, "loss": 0.0895, "step": 152930 }, { "epoch": 4.522978647897321, "grad_norm": 0.7301971316337585, "learning_rate": 2.3463099677491097e-06, "loss": 0.075, "step": 152940 }, { "epoch": 4.523274383391494, "grad_norm": 1.7159918546676636, "learning_rate": 2.34618327783017e-06, "loss": 0.0648, "step": 152950 }, { "epoch": 4.523570118885669, "grad_norm": 0.7539898157119751, "learning_rate": 2.346056587911231e-06, "loss": 0.0658, "step": 152960 }, { "epoch": 4.523865854379842, "grad_norm": 0.6568185687065125, "learning_rate": 2.3459298979922912e-06, "loss": 0.0753, "step": 152970 }, { "epoch": 4.524161589874017, "grad_norm": 0.921617865562439, "learning_rate": 2.345803208073352e-06, "loss": 0.0767, "step": 152980 }, { "epoch": 4.524457325368191, "grad_norm": 0.9855047464370728, "learning_rate": 2.3456765181544124e-06, "loss": 0.0678, "step": 152990 }, { "epoch": 4.5247530608623645, "grad_norm": 0.41256633400917053, "learning_rate": 2.3455498282354736e-06, "loss": 0.0515, "step": 153000 }, { "epoch": 4.525048796356539, "grad_norm": 0.8860851526260376, "learning_rate": 2.345423138316534e-06, "loss": 0.062, "step": 153010 }, { "epoch": 4.525344531850712, "grad_norm": 1.2445014715194702, "learning_rate": 2.3452964483975948e-06, "loss": 0.0716, "step": 153020 }, { "epoch": 4.525640267344887, "grad_norm": 1.2605347633361816, "learning_rate": 2.345169758478655e-06, "loss": 0.074, "step": 153030 }, { "epoch": 4.52593600283906, "grad_norm": 1.0574055910110474, "learning_rate": 2.345043068559716e-06, "loss": 0.0903, "step": 153040 }, { "epoch": 4.526231738333235, "grad_norm": 0.7644636631011963, "learning_rate": 2.3449163786407763e-06, "loss": 0.0649, "step": 153050 }, { "epoch": 4.526527473827409, "grad_norm": 0.9864877462387085, "learning_rate": 2.344789688721837e-06, "loss": 0.0635, "step": 153060 }, { "epoch": 4.5268232093215826, "grad_norm": 1.4956655502319336, "learning_rate": 2.3446629988028975e-06, "loss": 0.0677, "step": 153070 }, { "epoch": 4.527118944815757, "grad_norm": 0.8592215776443481, "learning_rate": 2.3445363088839587e-06, "loss": 0.07, "step": 153080 }, { "epoch": 4.52741468030993, "grad_norm": 0.8849050402641296, "learning_rate": 2.344409618965019e-06, "loss": 0.0818, "step": 153090 }, { "epoch": 4.527710415804105, "grad_norm": 0.8973361849784851, "learning_rate": 2.34428292904608e-06, "loss": 0.0544, "step": 153100 }, { "epoch": 4.528006151298278, "grad_norm": 2.3275680541992188, "learning_rate": 2.34415623912714e-06, "loss": 0.0658, "step": 153110 }, { "epoch": 4.528301886792453, "grad_norm": 0.5735364556312561, "learning_rate": 2.344029549208201e-06, "loss": 0.0839, "step": 153120 }, { "epoch": 4.528597622286627, "grad_norm": 0.4560997784137726, "learning_rate": 2.3439028592892613e-06, "loss": 0.0598, "step": 153130 }, { "epoch": 4.528893357780801, "grad_norm": 0.5432168841362, "learning_rate": 2.343776169370322e-06, "loss": 0.0628, "step": 153140 }, { "epoch": 4.529189093274975, "grad_norm": 0.554022490978241, "learning_rate": 2.3436494794513825e-06, "loss": 0.0488, "step": 153150 }, { "epoch": 4.5294848287691485, "grad_norm": 1.0156790018081665, "learning_rate": 2.3435227895324437e-06, "loss": 0.0611, "step": 153160 }, { "epoch": 4.529780564263323, "grad_norm": 0.9610052108764648, "learning_rate": 2.343396099613504e-06, "loss": 0.0672, "step": 153170 }, { "epoch": 4.530076299757497, "grad_norm": 0.8898886442184448, "learning_rate": 2.343269409694565e-06, "loss": 0.0878, "step": 153180 }, { "epoch": 4.530372035251671, "grad_norm": 0.8443955183029175, "learning_rate": 2.3431427197756252e-06, "loss": 0.0618, "step": 153190 }, { "epoch": 4.530667770745845, "grad_norm": 1.059267282485962, "learning_rate": 2.343016029856686e-06, "loss": 0.0514, "step": 153200 }, { "epoch": 4.530963506240019, "grad_norm": 1.1346988677978516, "learning_rate": 2.3428893399377464e-06, "loss": 0.062, "step": 153210 }, { "epoch": 4.531259241734193, "grad_norm": 2.6397573947906494, "learning_rate": 2.3427626500188068e-06, "loss": 0.0768, "step": 153220 }, { "epoch": 4.531554977228367, "grad_norm": 1.363074541091919, "learning_rate": 2.3426359600998675e-06, "loss": 0.0903, "step": 153230 }, { "epoch": 4.531850712722541, "grad_norm": 0.8911600708961487, "learning_rate": 2.3425092701809283e-06, "loss": 0.0791, "step": 153240 }, { "epoch": 4.532146448216715, "grad_norm": 0.8623918294906616, "learning_rate": 2.342382580261989e-06, "loss": 0.0585, "step": 153250 }, { "epoch": 4.532442183710889, "grad_norm": 0.8821555376052856, "learning_rate": 2.3422558903430495e-06, "loss": 0.0672, "step": 153260 }, { "epoch": 4.532737919205063, "grad_norm": 1.0408380031585693, "learning_rate": 2.3421292004241103e-06, "loss": 0.08, "step": 153270 }, { "epoch": 4.533033654699237, "grad_norm": 0.974702775478363, "learning_rate": 2.3420025105051706e-06, "loss": 0.0806, "step": 153280 }, { "epoch": 4.533329390193411, "grad_norm": 0.46406757831573486, "learning_rate": 2.3418758205862314e-06, "loss": 0.0575, "step": 153290 }, { "epoch": 4.5336251256875855, "grad_norm": 1.45486319065094, "learning_rate": 2.341749130667292e-06, "loss": 0.0783, "step": 153300 }, { "epoch": 4.533920861181759, "grad_norm": 1.2108521461486816, "learning_rate": 2.3416224407483526e-06, "loss": 0.0719, "step": 153310 }, { "epoch": 4.534216596675933, "grad_norm": 0.979796826839447, "learning_rate": 2.3414957508294134e-06, "loss": 0.0592, "step": 153320 }, { "epoch": 4.534512332170107, "grad_norm": 0.7577542662620544, "learning_rate": 2.341369060910474e-06, "loss": 0.074, "step": 153330 }, { "epoch": 4.534808067664281, "grad_norm": 1.3438366651535034, "learning_rate": 2.3412423709915345e-06, "loss": 0.0677, "step": 153340 }, { "epoch": 4.535103803158455, "grad_norm": 0.5386213064193726, "learning_rate": 2.3411156810725953e-06, "loss": 0.0747, "step": 153350 }, { "epoch": 4.535399538652629, "grad_norm": 0.8937715291976929, "learning_rate": 2.3409889911536557e-06, "loss": 0.0531, "step": 153360 }, { "epoch": 4.5356952741468035, "grad_norm": 0.9785033464431763, "learning_rate": 2.3408623012347165e-06, "loss": 0.0754, "step": 153370 }, { "epoch": 4.535991009640977, "grad_norm": 0.8947958946228027, "learning_rate": 2.340735611315777e-06, "loss": 0.0718, "step": 153380 }, { "epoch": 4.536286745135151, "grad_norm": 1.3863730430603027, "learning_rate": 2.3406089213968376e-06, "loss": 0.0712, "step": 153390 }, { "epoch": 4.536582480629325, "grad_norm": 0.6202712655067444, "learning_rate": 2.3404822314778984e-06, "loss": 0.0578, "step": 153400 }, { "epoch": 4.536878216123499, "grad_norm": 1.0242805480957031, "learning_rate": 2.340355541558959e-06, "loss": 0.0653, "step": 153410 }, { "epoch": 4.537173951617673, "grad_norm": 0.8035141825675964, "learning_rate": 2.3402288516400196e-06, "loss": 0.0818, "step": 153420 }, { "epoch": 4.537469687111847, "grad_norm": 0.8855798840522766, "learning_rate": 2.3401021617210804e-06, "loss": 0.076, "step": 153430 }, { "epoch": 4.537765422606022, "grad_norm": 0.7472812533378601, "learning_rate": 2.3399754718021407e-06, "loss": 0.0685, "step": 153440 }, { "epoch": 4.538061158100195, "grad_norm": 0.9896199703216553, "learning_rate": 2.3398487818832015e-06, "loss": 0.0657, "step": 153450 }, { "epoch": 4.5383568935943694, "grad_norm": 1.0545064210891724, "learning_rate": 2.339722091964262e-06, "loss": 0.0591, "step": 153460 }, { "epoch": 4.538652629088543, "grad_norm": 1.2295043468475342, "learning_rate": 2.3395954020453227e-06, "loss": 0.072, "step": 153470 }, { "epoch": 4.538948364582717, "grad_norm": 0.7152012586593628, "learning_rate": 2.3394687121263835e-06, "loss": 0.0599, "step": 153480 }, { "epoch": 4.539244100076891, "grad_norm": 0.5956851840019226, "learning_rate": 2.3393420222074443e-06, "loss": 0.0577, "step": 153490 }, { "epoch": 4.539539835571065, "grad_norm": 0.7349935173988342, "learning_rate": 2.3392153322885046e-06, "loss": 0.0673, "step": 153500 }, { "epoch": 4.53983557106524, "grad_norm": 0.8356713056564331, "learning_rate": 2.3390886423695654e-06, "loss": 0.07, "step": 153510 }, { "epoch": 4.540131306559413, "grad_norm": 0.7653278708457947, "learning_rate": 2.3389619524506258e-06, "loss": 0.0582, "step": 153520 }, { "epoch": 4.5404270420535875, "grad_norm": 0.6195743680000305, "learning_rate": 2.3388352625316866e-06, "loss": 0.0754, "step": 153530 }, { "epoch": 4.540722777547761, "grad_norm": 1.279842734336853, "learning_rate": 2.338708572612747e-06, "loss": 0.0588, "step": 153540 }, { "epoch": 4.541018513041935, "grad_norm": 1.4729833602905273, "learning_rate": 2.3385818826938077e-06, "loss": 0.054, "step": 153550 }, { "epoch": 4.541314248536109, "grad_norm": 1.201796531677246, "learning_rate": 2.3384551927748685e-06, "loss": 0.0759, "step": 153560 }, { "epoch": 4.541609984030283, "grad_norm": 1.9609270095825195, "learning_rate": 2.3383285028559293e-06, "loss": 0.0774, "step": 153570 }, { "epoch": 4.541905719524458, "grad_norm": 1.8154667615890503, "learning_rate": 2.3382018129369897e-06, "loss": 0.0742, "step": 153580 }, { "epoch": 4.542201455018631, "grad_norm": 0.885839581489563, "learning_rate": 2.3380751230180505e-06, "loss": 0.0792, "step": 153590 }, { "epoch": 4.5424971905128055, "grad_norm": 0.930261492729187, "learning_rate": 2.337948433099111e-06, "loss": 0.0688, "step": 153600 }, { "epoch": 4.542792926006979, "grad_norm": 1.1254905462265015, "learning_rate": 2.3378217431801716e-06, "loss": 0.082, "step": 153610 }, { "epoch": 4.543088661501153, "grad_norm": 1.0487779378890991, "learning_rate": 2.337695053261232e-06, "loss": 0.086, "step": 153620 }, { "epoch": 4.543384396995327, "grad_norm": 0.7730385065078735, "learning_rate": 2.3375683633422923e-06, "loss": 0.0746, "step": 153630 }, { "epoch": 4.543680132489501, "grad_norm": 0.8640013337135315, "learning_rate": 2.3374416734233536e-06, "loss": 0.0748, "step": 153640 }, { "epoch": 4.543975867983676, "grad_norm": 0.478437602519989, "learning_rate": 2.337314983504414e-06, "loss": 0.0561, "step": 153650 }, { "epoch": 4.544271603477849, "grad_norm": 0.8891376256942749, "learning_rate": 2.3371882935854747e-06, "loss": 0.0771, "step": 153660 }, { "epoch": 4.544567338972024, "grad_norm": 1.0479830503463745, "learning_rate": 2.337061603666535e-06, "loss": 0.0841, "step": 153670 }, { "epoch": 4.544863074466197, "grad_norm": 0.5940801501274109, "learning_rate": 2.336934913747596e-06, "loss": 0.0641, "step": 153680 }, { "epoch": 4.5451588099603715, "grad_norm": 0.6856117248535156, "learning_rate": 2.3368082238286562e-06, "loss": 0.0732, "step": 153690 }, { "epoch": 4.545454545454545, "grad_norm": 1.0048983097076416, "learning_rate": 2.336681533909717e-06, "loss": 0.0619, "step": 153700 }, { "epoch": 4.545750280948719, "grad_norm": 0.5847039818763733, "learning_rate": 2.3365548439907774e-06, "loss": 0.0735, "step": 153710 }, { "epoch": 4.546046016442894, "grad_norm": 0.6553558707237244, "learning_rate": 2.3364281540718386e-06, "loss": 0.0887, "step": 153720 }, { "epoch": 4.546341751937067, "grad_norm": 0.7999834418296814, "learning_rate": 2.336301464152899e-06, "loss": 0.077, "step": 153730 }, { "epoch": 4.546637487431242, "grad_norm": 0.947010338306427, "learning_rate": 2.3361747742339598e-06, "loss": 0.0711, "step": 153740 }, { "epoch": 4.546933222925415, "grad_norm": 0.8253382444381714, "learning_rate": 2.33604808431502e-06, "loss": 0.0684, "step": 153750 }, { "epoch": 4.5472289584195895, "grad_norm": 0.46958211064338684, "learning_rate": 2.335921394396081e-06, "loss": 0.0585, "step": 153760 }, { "epoch": 4.547524693913764, "grad_norm": 0.8486934304237366, "learning_rate": 2.3357947044771413e-06, "loss": 0.0621, "step": 153770 }, { "epoch": 4.547820429407937, "grad_norm": 1.1315375566482544, "learning_rate": 2.335668014558202e-06, "loss": 0.0659, "step": 153780 }, { "epoch": 4.548116164902112, "grad_norm": 0.9375646710395813, "learning_rate": 2.3355413246392624e-06, "loss": 0.074, "step": 153790 }, { "epoch": 4.548411900396285, "grad_norm": 1.0783746242523193, "learning_rate": 2.3354146347203236e-06, "loss": 0.0637, "step": 153800 }, { "epoch": 4.54870763589046, "grad_norm": 0.8420196175575256, "learning_rate": 2.335287944801384e-06, "loss": 0.0559, "step": 153810 }, { "epoch": 4.549003371384634, "grad_norm": 0.9631246328353882, "learning_rate": 2.335161254882445e-06, "loss": 0.0769, "step": 153820 }, { "epoch": 4.549299106878808, "grad_norm": 0.47585466504096985, "learning_rate": 2.335034564963505e-06, "loss": 0.0689, "step": 153830 }, { "epoch": 4.549594842372982, "grad_norm": 0.8303189873695374, "learning_rate": 2.334907875044566e-06, "loss": 0.0762, "step": 153840 }, { "epoch": 4.5498905778671555, "grad_norm": 1.217187762260437, "learning_rate": 2.3347811851256263e-06, "loss": 0.069, "step": 153850 }, { "epoch": 4.55018631336133, "grad_norm": 1.4766676425933838, "learning_rate": 2.334654495206687e-06, "loss": 0.0701, "step": 153860 }, { "epoch": 4.550482048855503, "grad_norm": 1.020984172821045, "learning_rate": 2.3345278052877475e-06, "loss": 0.0817, "step": 153870 }, { "epoch": 4.550777784349678, "grad_norm": 0.9322181344032288, "learning_rate": 2.3344011153688087e-06, "loss": 0.0847, "step": 153880 }, { "epoch": 4.551073519843852, "grad_norm": 0.9198503494262695, "learning_rate": 2.334274425449869e-06, "loss": 0.0662, "step": 153890 }, { "epoch": 4.551369255338026, "grad_norm": 1.2476619482040405, "learning_rate": 2.33414773553093e-06, "loss": 0.0567, "step": 153900 }, { "epoch": 4.5516649908322, "grad_norm": 0.9000142812728882, "learning_rate": 2.3340210456119902e-06, "loss": 0.066, "step": 153910 }, { "epoch": 4.5519607263263735, "grad_norm": 0.46887093782424927, "learning_rate": 2.333894355693051e-06, "loss": 0.0646, "step": 153920 }, { "epoch": 4.552256461820548, "grad_norm": 1.1772340536117554, "learning_rate": 2.3337676657741114e-06, "loss": 0.0765, "step": 153930 }, { "epoch": 4.552552197314721, "grad_norm": 0.5951077938079834, "learning_rate": 2.333640975855172e-06, "loss": 0.0654, "step": 153940 }, { "epoch": 4.552847932808896, "grad_norm": 0.9176315665245056, "learning_rate": 2.3335142859362325e-06, "loss": 0.0636, "step": 153950 }, { "epoch": 4.55314366830307, "grad_norm": 0.8353962302207947, "learning_rate": 2.3333875960172937e-06, "loss": 0.0715, "step": 153960 }, { "epoch": 4.553439403797244, "grad_norm": 0.5605400204658508, "learning_rate": 2.333260906098354e-06, "loss": 0.0715, "step": 153970 }, { "epoch": 4.553735139291418, "grad_norm": 1.0595167875289917, "learning_rate": 2.333134216179415e-06, "loss": 0.0722, "step": 153980 }, { "epoch": 4.5540308747855915, "grad_norm": 0.9061388969421387, "learning_rate": 2.3330075262604753e-06, "loss": 0.0698, "step": 153990 }, { "epoch": 4.554326610279766, "grad_norm": 1.4256200790405273, "learning_rate": 2.332880836341536e-06, "loss": 0.0715, "step": 154000 }, { "epoch": 4.554622345773939, "grad_norm": 0.8531121611595154, "learning_rate": 2.3327541464225964e-06, "loss": 0.0588, "step": 154010 }, { "epoch": 4.554918081268114, "grad_norm": 0.6129953265190125, "learning_rate": 2.332627456503657e-06, "loss": 0.082, "step": 154020 }, { "epoch": 4.555213816762288, "grad_norm": 0.7035289406776428, "learning_rate": 2.3325007665847176e-06, "loss": 0.0699, "step": 154030 }, { "epoch": 4.555509552256462, "grad_norm": 1.2438836097717285, "learning_rate": 2.3323740766657784e-06, "loss": 0.0775, "step": 154040 }, { "epoch": 4.555805287750636, "grad_norm": 0.5393076539039612, "learning_rate": 2.332247386746839e-06, "loss": 0.0461, "step": 154050 }, { "epoch": 4.55610102324481, "grad_norm": 0.8493756055831909, "learning_rate": 2.3321206968278995e-06, "loss": 0.0741, "step": 154060 }, { "epoch": 4.556396758738984, "grad_norm": 0.7919735908508301, "learning_rate": 2.3319940069089603e-06, "loss": 0.0723, "step": 154070 }, { "epoch": 4.5566924942331575, "grad_norm": 0.7816357612609863, "learning_rate": 2.3318673169900207e-06, "loss": 0.0694, "step": 154080 }, { "epoch": 4.556988229727332, "grad_norm": 0.6034833788871765, "learning_rate": 2.3317406270710815e-06, "loss": 0.0583, "step": 154090 }, { "epoch": 4.557283965221506, "grad_norm": 1.062326192855835, "learning_rate": 2.331613937152142e-06, "loss": 0.0621, "step": 154100 }, { "epoch": 4.55757970071568, "grad_norm": 0.6455386281013489, "learning_rate": 2.3314872472332026e-06, "loss": 0.0545, "step": 154110 }, { "epoch": 4.557875436209854, "grad_norm": 0.7411508560180664, "learning_rate": 2.3313605573142634e-06, "loss": 0.0686, "step": 154120 }, { "epoch": 4.558171171704028, "grad_norm": 0.9956860542297363, "learning_rate": 2.331233867395324e-06, "loss": 0.0906, "step": 154130 }, { "epoch": 4.558466907198202, "grad_norm": 0.47994187474250793, "learning_rate": 2.3311071774763846e-06, "loss": 0.0655, "step": 154140 }, { "epoch": 4.5587626426923755, "grad_norm": 0.8280116319656372, "learning_rate": 2.3309804875574454e-06, "loss": 0.0587, "step": 154150 }, { "epoch": 4.55905837818655, "grad_norm": 1.5369435548782349, "learning_rate": 2.3308537976385057e-06, "loss": 0.0601, "step": 154160 }, { "epoch": 4.559354113680724, "grad_norm": 1.2478314638137817, "learning_rate": 2.3307271077195665e-06, "loss": 0.0607, "step": 154170 }, { "epoch": 4.559649849174898, "grad_norm": 0.9029074907302856, "learning_rate": 2.330600417800627e-06, "loss": 0.0793, "step": 154180 }, { "epoch": 4.559945584669072, "grad_norm": 0.7372649312019348, "learning_rate": 2.3304737278816877e-06, "loss": 0.0728, "step": 154190 }, { "epoch": 4.560241320163246, "grad_norm": 1.7358287572860718, "learning_rate": 2.3303470379627485e-06, "loss": 0.0511, "step": 154200 }, { "epoch": 4.56053705565742, "grad_norm": 0.7049252390861511, "learning_rate": 2.3302203480438092e-06, "loss": 0.0603, "step": 154210 }, { "epoch": 4.560832791151594, "grad_norm": 1.0464680194854736, "learning_rate": 2.3300936581248696e-06, "loss": 0.0695, "step": 154220 }, { "epoch": 4.561128526645768, "grad_norm": 0.5232036709785461, "learning_rate": 2.3299669682059304e-06, "loss": 0.0708, "step": 154230 }, { "epoch": 4.561424262139942, "grad_norm": 1.0612306594848633, "learning_rate": 2.3298402782869908e-06, "loss": 0.0802, "step": 154240 }, { "epoch": 4.561719997634116, "grad_norm": 0.9079907536506653, "learning_rate": 2.3297135883680516e-06, "loss": 0.0696, "step": 154250 }, { "epoch": 4.56201573312829, "grad_norm": 0.7982879281044006, "learning_rate": 2.329586898449112e-06, "loss": 0.0753, "step": 154260 }, { "epoch": 4.562311468622464, "grad_norm": 0.6043526530265808, "learning_rate": 2.3294602085301727e-06, "loss": 0.0721, "step": 154270 }, { "epoch": 4.562607204116638, "grad_norm": 0.5638050436973572, "learning_rate": 2.3293335186112335e-06, "loss": 0.0694, "step": 154280 }, { "epoch": 4.562902939610812, "grad_norm": 1.0351698398590088, "learning_rate": 2.3292068286922943e-06, "loss": 0.0809, "step": 154290 }, { "epoch": 4.563198675104986, "grad_norm": 0.7650550007820129, "learning_rate": 2.3290801387733547e-06, "loss": 0.0606, "step": 154300 }, { "epoch": 4.56349441059916, "grad_norm": 1.0935914516448975, "learning_rate": 2.3289534488544154e-06, "loss": 0.0602, "step": 154310 }, { "epoch": 4.563790146093334, "grad_norm": 0.8466583490371704, "learning_rate": 2.328826758935476e-06, "loss": 0.0758, "step": 154320 }, { "epoch": 4.564085881587508, "grad_norm": 0.7587630748748779, "learning_rate": 2.3287000690165366e-06, "loss": 0.0679, "step": 154330 }, { "epoch": 4.564381617081682, "grad_norm": 0.920256495475769, "learning_rate": 2.328573379097597e-06, "loss": 0.0701, "step": 154340 }, { "epoch": 4.564677352575856, "grad_norm": 0.9917835593223572, "learning_rate": 2.3284466891786578e-06, "loss": 0.0582, "step": 154350 }, { "epoch": 4.5649730880700305, "grad_norm": 1.5096274614334106, "learning_rate": 2.3283199992597185e-06, "loss": 0.0631, "step": 154360 }, { "epoch": 4.565268823564204, "grad_norm": 1.2791295051574707, "learning_rate": 2.3281933093407793e-06, "loss": 0.0813, "step": 154370 }, { "epoch": 4.565564559058378, "grad_norm": 0.9288119673728943, "learning_rate": 2.3280666194218397e-06, "loss": 0.0783, "step": 154380 }, { "epoch": 4.565860294552552, "grad_norm": 0.7604818344116211, "learning_rate": 2.3279399295029005e-06, "loss": 0.0567, "step": 154390 }, { "epoch": 4.566156030046726, "grad_norm": 0.49085360765457153, "learning_rate": 2.327813239583961e-06, "loss": 0.0482, "step": 154400 }, { "epoch": 4.566451765540901, "grad_norm": 1.7560029029846191, "learning_rate": 2.3276865496650216e-06, "loss": 0.0731, "step": 154410 }, { "epoch": 4.566747501035074, "grad_norm": 1.3837034702301025, "learning_rate": 2.327559859746082e-06, "loss": 0.08, "step": 154420 }, { "epoch": 4.567043236529249, "grad_norm": 1.2349705696105957, "learning_rate": 2.327433169827143e-06, "loss": 0.0761, "step": 154430 }, { "epoch": 4.567338972023422, "grad_norm": 0.6777758002281189, "learning_rate": 2.3273064799082036e-06, "loss": 0.07, "step": 154440 }, { "epoch": 4.5676347075175965, "grad_norm": 0.8458685874938965, "learning_rate": 2.327179789989264e-06, "loss": 0.0651, "step": 154450 }, { "epoch": 4.56793044301177, "grad_norm": 0.64127117395401, "learning_rate": 2.3270531000703247e-06, "loss": 0.0556, "step": 154460 }, { "epoch": 4.568226178505944, "grad_norm": 0.754542350769043, "learning_rate": 2.326926410151385e-06, "loss": 0.0542, "step": 154470 }, { "epoch": 4.568521914000119, "grad_norm": 0.8537079095840454, "learning_rate": 2.326799720232446e-06, "loss": 0.0477, "step": 154480 }, { "epoch": 4.568817649494292, "grad_norm": 1.0057264566421509, "learning_rate": 2.3266730303135063e-06, "loss": 0.0741, "step": 154490 }, { "epoch": 4.569113384988467, "grad_norm": 1.2880276441574097, "learning_rate": 2.326546340394567e-06, "loss": 0.0513, "step": 154500 }, { "epoch": 4.56940912048264, "grad_norm": 1.0564037561416626, "learning_rate": 2.3264196504756274e-06, "loss": 0.0608, "step": 154510 }, { "epoch": 4.5697048559768145, "grad_norm": 1.3423281908035278, "learning_rate": 2.3262929605566886e-06, "loss": 0.0781, "step": 154520 }, { "epoch": 4.570000591470988, "grad_norm": 0.7397165298461914, "learning_rate": 2.326166270637749e-06, "loss": 0.0713, "step": 154530 }, { "epoch": 4.570296326965162, "grad_norm": 0.5695855617523193, "learning_rate": 2.32603958071881e-06, "loss": 0.0667, "step": 154540 }, { "epoch": 4.570592062459337, "grad_norm": 0.7873933911323547, "learning_rate": 2.32591289079987e-06, "loss": 0.0662, "step": 154550 }, { "epoch": 4.57088779795351, "grad_norm": 1.06882905960083, "learning_rate": 2.325786200880931e-06, "loss": 0.0715, "step": 154560 }, { "epoch": 4.571183533447685, "grad_norm": 1.3739960193634033, "learning_rate": 2.3256595109619913e-06, "loss": 0.0896, "step": 154570 }, { "epoch": 4.571479268941858, "grad_norm": 0.8703485727310181, "learning_rate": 2.325532821043052e-06, "loss": 0.0828, "step": 154580 }, { "epoch": 4.571775004436033, "grad_norm": 1.0086830854415894, "learning_rate": 2.3254061311241125e-06, "loss": 0.0662, "step": 154590 }, { "epoch": 4.572070739930206, "grad_norm": 0.7839275002479553, "learning_rate": 2.3252794412051737e-06, "loss": 0.0539, "step": 154600 }, { "epoch": 4.5723664754243805, "grad_norm": 0.6065524220466614, "learning_rate": 2.325152751286234e-06, "loss": 0.0668, "step": 154610 }, { "epoch": 4.572662210918555, "grad_norm": 0.9303827285766602, "learning_rate": 2.325026061367295e-06, "loss": 0.0774, "step": 154620 }, { "epoch": 4.572957946412728, "grad_norm": 0.7409100532531738, "learning_rate": 2.324899371448355e-06, "loss": 0.0556, "step": 154630 }, { "epoch": 4.573253681906903, "grad_norm": 0.7980684638023376, "learning_rate": 2.324772681529416e-06, "loss": 0.0618, "step": 154640 }, { "epoch": 4.573549417401076, "grad_norm": 0.9891704320907593, "learning_rate": 2.3246459916104764e-06, "loss": 0.0657, "step": 154650 }, { "epoch": 4.573845152895251, "grad_norm": 0.9273456335067749, "learning_rate": 2.324519301691537e-06, "loss": 0.0662, "step": 154660 }, { "epoch": 4.574140888389424, "grad_norm": 1.145216703414917, "learning_rate": 2.3243926117725975e-06, "loss": 0.0741, "step": 154670 }, { "epoch": 4.5744366238835985, "grad_norm": 1.1232465505599976, "learning_rate": 2.3242659218536587e-06, "loss": 0.0733, "step": 154680 }, { "epoch": 4.574732359377773, "grad_norm": 1.3827441930770874, "learning_rate": 2.324139231934719e-06, "loss": 0.073, "step": 154690 }, { "epoch": 4.575028094871946, "grad_norm": 0.8391321897506714, "learning_rate": 2.32401254201578e-06, "loss": 0.0543, "step": 154700 }, { "epoch": 4.575323830366121, "grad_norm": 0.8610594272613525, "learning_rate": 2.3238858520968402e-06, "loss": 0.0669, "step": 154710 }, { "epoch": 4.575619565860294, "grad_norm": 0.8277375102043152, "learning_rate": 2.323759162177901e-06, "loss": 0.0835, "step": 154720 }, { "epoch": 4.575915301354469, "grad_norm": 1.1549861431121826, "learning_rate": 2.3236324722589614e-06, "loss": 0.0681, "step": 154730 }, { "epoch": 4.576211036848642, "grad_norm": 0.9907665848731995, "learning_rate": 2.323505782340022e-06, "loss": 0.0762, "step": 154740 }, { "epoch": 4.5765067723428166, "grad_norm": 1.295858383178711, "learning_rate": 2.3233790924210826e-06, "loss": 0.0622, "step": 154750 }, { "epoch": 4.576802507836991, "grad_norm": 0.5662508010864258, "learning_rate": 2.3232524025021438e-06, "loss": 0.0528, "step": 154760 }, { "epoch": 4.577098243331164, "grad_norm": 1.633702278137207, "learning_rate": 2.323125712583204e-06, "loss": 0.0847, "step": 154770 }, { "epoch": 4.577393978825339, "grad_norm": 1.9871145486831665, "learning_rate": 2.322999022664265e-06, "loss": 0.0883, "step": 154780 }, { "epoch": 4.577689714319512, "grad_norm": 0.7966201901435852, "learning_rate": 2.3228723327453253e-06, "loss": 0.0747, "step": 154790 }, { "epoch": 4.577985449813687, "grad_norm": 0.4941963255405426, "learning_rate": 2.322745642826386e-06, "loss": 0.0659, "step": 154800 }, { "epoch": 4.57828118530786, "grad_norm": 0.650296151638031, "learning_rate": 2.3226189529074464e-06, "loss": 0.0633, "step": 154810 }, { "epoch": 4.578576920802035, "grad_norm": 1.548932433128357, "learning_rate": 2.3224922629885072e-06, "loss": 0.0848, "step": 154820 }, { "epoch": 4.578872656296209, "grad_norm": 0.7444802522659302, "learning_rate": 2.3223655730695676e-06, "loss": 0.087, "step": 154830 }, { "epoch": 4.5791683917903825, "grad_norm": 1.2734538316726685, "learning_rate": 2.322238883150629e-06, "loss": 0.0775, "step": 154840 }, { "epoch": 4.579464127284557, "grad_norm": 0.6552737355232239, "learning_rate": 2.322112193231689e-06, "loss": 0.0703, "step": 154850 }, { "epoch": 4.57975986277873, "grad_norm": 0.6778932809829712, "learning_rate": 2.3219855033127495e-06, "loss": 0.0692, "step": 154860 }, { "epoch": 4.580055598272905, "grad_norm": 0.7170160412788391, "learning_rate": 2.3218588133938103e-06, "loss": 0.0663, "step": 154870 }, { "epoch": 4.580351333767078, "grad_norm": 0.5884621143341064, "learning_rate": 2.3217321234748707e-06, "loss": 0.0622, "step": 154880 }, { "epoch": 4.580647069261253, "grad_norm": 0.8582025766372681, "learning_rate": 2.3216054335559315e-06, "loss": 0.0717, "step": 154890 }, { "epoch": 4.580942804755427, "grad_norm": 0.7052797675132751, "learning_rate": 2.321478743636992e-06, "loss": 0.0599, "step": 154900 }, { "epoch": 4.5812385402496005, "grad_norm": 1.3321893215179443, "learning_rate": 2.3213520537180526e-06, "loss": 0.0726, "step": 154910 }, { "epoch": 4.581534275743775, "grad_norm": 0.7791566848754883, "learning_rate": 2.3212253637991134e-06, "loss": 0.0606, "step": 154920 }, { "epoch": 4.581830011237948, "grad_norm": 1.0796726942062378, "learning_rate": 2.3210986738801742e-06, "loss": 0.0729, "step": 154930 }, { "epoch": 4.582125746732123, "grad_norm": 1.0323076248168945, "learning_rate": 2.3209719839612346e-06, "loss": 0.0686, "step": 154940 }, { "epoch": 4.582421482226296, "grad_norm": 1.0551484823226929, "learning_rate": 2.3208452940422954e-06, "loss": 0.0616, "step": 154950 }, { "epoch": 4.582717217720471, "grad_norm": 1.0722655057907104, "learning_rate": 2.3207186041233557e-06, "loss": 0.0636, "step": 154960 }, { "epoch": 4.583012953214645, "grad_norm": 0.883310854434967, "learning_rate": 2.3205919142044165e-06, "loss": 0.073, "step": 154970 }, { "epoch": 4.583308688708819, "grad_norm": 1.1795624494552612, "learning_rate": 2.320465224285477e-06, "loss": 0.0664, "step": 154980 }, { "epoch": 4.583604424202993, "grad_norm": 1.567552924156189, "learning_rate": 2.3203385343665377e-06, "loss": 0.066, "step": 154990 }, { "epoch": 4.583900159697167, "grad_norm": 0.41767141222953796, "learning_rate": 2.3202118444475985e-06, "loss": 0.0624, "step": 155000 }, { "epoch": 4.584195895191341, "grad_norm": 0.6518895626068115, "learning_rate": 2.3200851545286593e-06, "loss": 0.0664, "step": 155010 }, { "epoch": 4.584491630685515, "grad_norm": 0.832059383392334, "learning_rate": 2.3199584646097196e-06, "loss": 0.0821, "step": 155020 }, { "epoch": 4.584787366179689, "grad_norm": 1.0828561782836914, "learning_rate": 2.3198317746907804e-06, "loss": 0.0783, "step": 155030 }, { "epoch": 4.585083101673863, "grad_norm": 0.596442699432373, "learning_rate": 2.319705084771841e-06, "loss": 0.0687, "step": 155040 }, { "epoch": 4.585378837168037, "grad_norm": 0.8429916501045227, "learning_rate": 2.3195783948529016e-06, "loss": 0.0674, "step": 155050 }, { "epoch": 4.585674572662211, "grad_norm": 0.7832167744636536, "learning_rate": 2.319451704933962e-06, "loss": 0.0843, "step": 155060 }, { "epoch": 4.585970308156385, "grad_norm": 0.6388078927993774, "learning_rate": 2.3193250150150227e-06, "loss": 0.0807, "step": 155070 }, { "epoch": 4.586266043650559, "grad_norm": 0.619781494140625, "learning_rate": 2.3191983250960835e-06, "loss": 0.0682, "step": 155080 }, { "epoch": 4.586561779144733, "grad_norm": 1.4036816358566284, "learning_rate": 2.3190716351771443e-06, "loss": 0.0749, "step": 155090 }, { "epoch": 4.586857514638907, "grad_norm": 0.9513135552406311, "learning_rate": 2.3189449452582047e-06, "loss": 0.0591, "step": 155100 }, { "epoch": 4.587153250133081, "grad_norm": 1.4361757040023804, "learning_rate": 2.3188182553392655e-06, "loss": 0.0687, "step": 155110 }, { "epoch": 4.587448985627255, "grad_norm": 0.8588830232620239, "learning_rate": 2.318691565420326e-06, "loss": 0.0821, "step": 155120 }, { "epoch": 4.587744721121429, "grad_norm": 1.6912305355072021, "learning_rate": 2.3185648755013866e-06, "loss": 0.0868, "step": 155130 }, { "epoch": 4.5880404566156034, "grad_norm": 0.8250070810317993, "learning_rate": 2.318438185582447e-06, "loss": 0.0674, "step": 155140 }, { "epoch": 4.588336192109777, "grad_norm": 0.6335489153862, "learning_rate": 2.3183114956635078e-06, "loss": 0.0609, "step": 155150 }, { "epoch": 4.588631927603951, "grad_norm": 0.8069185018539429, "learning_rate": 2.3181848057445686e-06, "loss": 0.0679, "step": 155160 }, { "epoch": 4.588927663098125, "grad_norm": 0.6870298385620117, "learning_rate": 2.3180581158256294e-06, "loss": 0.0717, "step": 155170 }, { "epoch": 4.589223398592299, "grad_norm": 1.151786208152771, "learning_rate": 2.3179314259066897e-06, "loss": 0.0599, "step": 155180 }, { "epoch": 4.589519134086473, "grad_norm": 1.0516873598098755, "learning_rate": 2.3178047359877505e-06, "loss": 0.0596, "step": 155190 }, { "epoch": 4.589814869580647, "grad_norm": 1.1558974981307983, "learning_rate": 2.317678046068811e-06, "loss": 0.0651, "step": 155200 }, { "epoch": 4.5901106050748215, "grad_norm": 0.46376219391822815, "learning_rate": 2.3175513561498717e-06, "loss": 0.0631, "step": 155210 }, { "epoch": 4.590406340568995, "grad_norm": 1.0093570947647095, "learning_rate": 2.317424666230932e-06, "loss": 0.0737, "step": 155220 }, { "epoch": 4.590702076063169, "grad_norm": 0.7841184139251709, "learning_rate": 2.317297976311993e-06, "loss": 0.0648, "step": 155230 }, { "epoch": 4.590997811557343, "grad_norm": 1.0025057792663574, "learning_rate": 2.3171712863930536e-06, "loss": 0.0617, "step": 155240 }, { "epoch": 4.591293547051517, "grad_norm": 1.4524667263031006, "learning_rate": 2.3170445964741144e-06, "loss": 0.0614, "step": 155250 }, { "epoch": 4.591589282545691, "grad_norm": 1.7720129489898682, "learning_rate": 2.3169179065551748e-06, "loss": 0.0638, "step": 155260 }, { "epoch": 4.591885018039865, "grad_norm": 0.9967120885848999, "learning_rate": 2.3167912166362356e-06, "loss": 0.0856, "step": 155270 }, { "epoch": 4.5921807535340395, "grad_norm": 1.374131202697754, "learning_rate": 2.316664526717296e-06, "loss": 0.0828, "step": 155280 }, { "epoch": 4.592476489028213, "grad_norm": 0.9167153239250183, "learning_rate": 2.3165378367983563e-06, "loss": 0.0632, "step": 155290 }, { "epoch": 4.592772224522387, "grad_norm": 1.008881688117981, "learning_rate": 2.316411146879417e-06, "loss": 0.0488, "step": 155300 }, { "epoch": 4.593067960016561, "grad_norm": 1.3821789026260376, "learning_rate": 2.3162844569604774e-06, "loss": 0.0679, "step": 155310 }, { "epoch": 4.593363695510735, "grad_norm": 0.9387503266334534, "learning_rate": 2.3161577670415387e-06, "loss": 0.0701, "step": 155320 }, { "epoch": 4.593659431004909, "grad_norm": 1.0191653966903687, "learning_rate": 2.316031077122599e-06, "loss": 0.0593, "step": 155330 }, { "epoch": 4.593955166499083, "grad_norm": 1.2696549892425537, "learning_rate": 2.31590438720366e-06, "loss": 0.0651, "step": 155340 }, { "epoch": 4.594250901993258, "grad_norm": 0.4853661060333252, "learning_rate": 2.31577769728472e-06, "loss": 0.0641, "step": 155350 }, { "epoch": 4.594546637487431, "grad_norm": 0.600666880607605, "learning_rate": 2.315651007365781e-06, "loss": 0.0614, "step": 155360 }, { "epoch": 4.5948423729816055, "grad_norm": 1.3111473321914673, "learning_rate": 2.3155243174468413e-06, "loss": 0.0793, "step": 155370 }, { "epoch": 4.595138108475779, "grad_norm": 0.6854016184806824, "learning_rate": 2.315397627527902e-06, "loss": 0.0788, "step": 155380 }, { "epoch": 4.595433843969953, "grad_norm": 0.8250278830528259, "learning_rate": 2.3152709376089625e-06, "loss": 0.0699, "step": 155390 }, { "epoch": 4.595729579464127, "grad_norm": 1.266850233078003, "learning_rate": 2.3151442476900237e-06, "loss": 0.0713, "step": 155400 }, { "epoch": 4.596025314958301, "grad_norm": 0.8901605010032654, "learning_rate": 2.315017557771084e-06, "loss": 0.0721, "step": 155410 }, { "epoch": 4.596321050452476, "grad_norm": 0.9784656763076782, "learning_rate": 2.314890867852145e-06, "loss": 0.0747, "step": 155420 }, { "epoch": 4.596616785946649, "grad_norm": 0.9990247488021851, "learning_rate": 2.3147641779332052e-06, "loss": 0.0695, "step": 155430 }, { "epoch": 4.5969125214408235, "grad_norm": 1.4012088775634766, "learning_rate": 2.314637488014266e-06, "loss": 0.0851, "step": 155440 }, { "epoch": 4.597208256934997, "grad_norm": 0.8390518426895142, "learning_rate": 2.3145107980953264e-06, "loss": 0.0575, "step": 155450 }, { "epoch": 4.597503992429171, "grad_norm": 1.123947024345398, "learning_rate": 2.314384108176387e-06, "loss": 0.0722, "step": 155460 }, { "epoch": 4.597799727923345, "grad_norm": 1.0056240558624268, "learning_rate": 2.3142574182574475e-06, "loss": 0.0726, "step": 155470 }, { "epoch": 4.598095463417519, "grad_norm": 0.7717240452766418, "learning_rate": 2.3141307283385088e-06, "loss": 0.0686, "step": 155480 }, { "epoch": 4.598391198911694, "grad_norm": 0.6621854305267334, "learning_rate": 2.314004038419569e-06, "loss": 0.0543, "step": 155490 }, { "epoch": 4.598686934405867, "grad_norm": 0.8380272388458252, "learning_rate": 2.31387734850063e-06, "loss": 0.0693, "step": 155500 }, { "epoch": 4.598982669900042, "grad_norm": 1.287169337272644, "learning_rate": 2.3137506585816903e-06, "loss": 0.0658, "step": 155510 }, { "epoch": 4.599278405394215, "grad_norm": 0.9798297882080078, "learning_rate": 2.313623968662751e-06, "loss": 0.0789, "step": 155520 }, { "epoch": 4.5995741408883895, "grad_norm": 0.8268445730209351, "learning_rate": 2.3134972787438114e-06, "loss": 0.0779, "step": 155530 }, { "epoch": 4.599869876382563, "grad_norm": 0.876640796661377, "learning_rate": 2.3133705888248722e-06, "loss": 0.0685, "step": 155540 }, { "epoch": 4.600165611876737, "grad_norm": 0.8047987222671509, "learning_rate": 2.3132438989059326e-06, "loss": 0.0613, "step": 155550 }, { "epoch": 4.600461347370912, "grad_norm": 0.5301390886306763, "learning_rate": 2.313117208986994e-06, "loss": 0.0564, "step": 155560 }, { "epoch": 4.600757082865085, "grad_norm": 0.7571027278900146, "learning_rate": 2.312990519068054e-06, "loss": 0.0749, "step": 155570 }, { "epoch": 4.60105281835926, "grad_norm": 0.9646645784378052, "learning_rate": 2.312863829149115e-06, "loss": 0.0666, "step": 155580 }, { "epoch": 4.601348553853433, "grad_norm": 0.9364733695983887, "learning_rate": 2.3127371392301753e-06, "loss": 0.0681, "step": 155590 }, { "epoch": 4.6016442893476075, "grad_norm": 0.7627482414245605, "learning_rate": 2.312610449311236e-06, "loss": 0.0594, "step": 155600 }, { "epoch": 4.601940024841782, "grad_norm": 0.9560323357582092, "learning_rate": 2.3124837593922965e-06, "loss": 0.0546, "step": 155610 }, { "epoch": 4.602235760335955, "grad_norm": 0.7604756355285645, "learning_rate": 2.3123570694733573e-06, "loss": 0.0638, "step": 155620 }, { "epoch": 4.60253149583013, "grad_norm": 1.3760725259780884, "learning_rate": 2.3122303795544176e-06, "loss": 0.0669, "step": 155630 }, { "epoch": 4.602827231324303, "grad_norm": 0.8671414852142334, "learning_rate": 2.312103689635479e-06, "loss": 0.0794, "step": 155640 }, { "epoch": 4.603122966818478, "grad_norm": 0.777256429195404, "learning_rate": 2.311976999716539e-06, "loss": 0.0651, "step": 155650 }, { "epoch": 4.603418702312652, "grad_norm": 0.8767224550247192, "learning_rate": 2.3118503097976e-06, "loss": 0.0569, "step": 155660 }, { "epoch": 4.6037144378068255, "grad_norm": 0.7784066796302795, "learning_rate": 2.3117236198786604e-06, "loss": 0.0769, "step": 155670 }, { "epoch": 4.604010173301, "grad_norm": 0.7661774158477783, "learning_rate": 2.311596929959721e-06, "loss": 0.0726, "step": 155680 }, { "epoch": 4.604305908795173, "grad_norm": 0.8914898037910461, "learning_rate": 2.3114702400407815e-06, "loss": 0.0633, "step": 155690 }, { "epoch": 4.604601644289348, "grad_norm": 0.7525434494018555, "learning_rate": 2.311343550121842e-06, "loss": 0.0668, "step": 155700 }, { "epoch": 4.604897379783521, "grad_norm": 0.6318889856338501, "learning_rate": 2.3112168602029027e-06, "loss": 0.0596, "step": 155710 }, { "epoch": 4.605193115277696, "grad_norm": 0.6082821488380432, "learning_rate": 2.311090170283963e-06, "loss": 0.0848, "step": 155720 }, { "epoch": 4.60548885077187, "grad_norm": 1.5849003791809082, "learning_rate": 2.3109634803650243e-06, "loss": 0.0761, "step": 155730 }, { "epoch": 4.605784586266044, "grad_norm": 0.9611275792121887, "learning_rate": 2.3108367904460846e-06, "loss": 0.0723, "step": 155740 }, { "epoch": 4.606080321760218, "grad_norm": 0.5686185956001282, "learning_rate": 2.3107101005271454e-06, "loss": 0.0564, "step": 155750 }, { "epoch": 4.6063760572543915, "grad_norm": 1.3935860395431519, "learning_rate": 2.3105834106082058e-06, "loss": 0.0615, "step": 155760 }, { "epoch": 4.606671792748566, "grad_norm": 0.41972121596336365, "learning_rate": 2.3104567206892666e-06, "loss": 0.0689, "step": 155770 }, { "epoch": 4.606967528242739, "grad_norm": 1.407842755317688, "learning_rate": 2.310330030770327e-06, "loss": 0.0972, "step": 155780 }, { "epoch": 4.607263263736914, "grad_norm": 1.049089789390564, "learning_rate": 2.3102033408513877e-06, "loss": 0.0855, "step": 155790 }, { "epoch": 4.607558999231088, "grad_norm": 0.686075747013092, "learning_rate": 2.310076650932448e-06, "loss": 0.0466, "step": 155800 }, { "epoch": 4.607854734725262, "grad_norm": 0.8621866703033447, "learning_rate": 2.3099499610135093e-06, "loss": 0.0757, "step": 155810 }, { "epoch": 4.608150470219436, "grad_norm": 0.8347556591033936, "learning_rate": 2.3098232710945697e-06, "loss": 0.0785, "step": 155820 }, { "epoch": 4.6084462057136095, "grad_norm": 1.4045474529266357, "learning_rate": 2.3096965811756305e-06, "loss": 0.0641, "step": 155830 }, { "epoch": 4.608741941207784, "grad_norm": 0.8600298166275024, "learning_rate": 2.309569891256691e-06, "loss": 0.0473, "step": 155840 }, { "epoch": 4.609037676701957, "grad_norm": 1.0264086723327637, "learning_rate": 2.3094432013377516e-06, "loss": 0.0512, "step": 155850 }, { "epoch": 4.609333412196132, "grad_norm": 1.2239996194839478, "learning_rate": 2.309316511418812e-06, "loss": 0.0766, "step": 155860 }, { "epoch": 4.609629147690306, "grad_norm": 0.4663827419281006, "learning_rate": 2.3091898214998728e-06, "loss": 0.0727, "step": 155870 }, { "epoch": 4.60992488318448, "grad_norm": 1.3480960130691528, "learning_rate": 2.309063131580933e-06, "loss": 0.0733, "step": 155880 }, { "epoch": 4.610220618678654, "grad_norm": 1.3317426443099976, "learning_rate": 2.3089364416619943e-06, "loss": 0.0782, "step": 155890 }, { "epoch": 4.610516354172828, "grad_norm": 0.8878673911094666, "learning_rate": 2.3088097517430547e-06, "loss": 0.0488, "step": 155900 }, { "epoch": 4.610812089667002, "grad_norm": 0.5362998247146606, "learning_rate": 2.3086830618241155e-06, "loss": 0.077, "step": 155910 }, { "epoch": 4.6111078251611755, "grad_norm": 1.5209866762161255, "learning_rate": 2.308556371905176e-06, "loss": 0.0676, "step": 155920 }, { "epoch": 4.61140356065535, "grad_norm": 0.8351227641105652, "learning_rate": 2.3084296819862367e-06, "loss": 0.077, "step": 155930 }, { "epoch": 4.611699296149524, "grad_norm": 0.806922972202301, "learning_rate": 2.308302992067297e-06, "loss": 0.0649, "step": 155940 }, { "epoch": 4.611995031643698, "grad_norm": 0.42921748757362366, "learning_rate": 2.308176302148358e-06, "loss": 0.0598, "step": 155950 }, { "epoch": 4.612290767137872, "grad_norm": 1.0720261335372925, "learning_rate": 2.308049612229418e-06, "loss": 0.0496, "step": 155960 }, { "epoch": 4.612586502632046, "grad_norm": 1.0854647159576416, "learning_rate": 2.3079229223104794e-06, "loss": 0.0794, "step": 155970 }, { "epoch": 4.61288223812622, "grad_norm": 0.9729707837104797, "learning_rate": 2.3077962323915398e-06, "loss": 0.0704, "step": 155980 }, { "epoch": 4.6131779736203935, "grad_norm": 0.6718632578849792, "learning_rate": 2.3076695424726005e-06, "loss": 0.0695, "step": 155990 }, { "epoch": 4.613473709114568, "grad_norm": 1.348433017730713, "learning_rate": 2.307542852553661e-06, "loss": 0.0675, "step": 156000 }, { "epoch": 4.613769444608742, "grad_norm": 0.8128876686096191, "learning_rate": 2.3074161626347217e-06, "loss": 0.066, "step": 156010 }, { "epoch": 4.614065180102916, "grad_norm": 0.6418266892433167, "learning_rate": 2.307289472715782e-06, "loss": 0.0727, "step": 156020 }, { "epoch": 4.61436091559709, "grad_norm": 0.8028393387794495, "learning_rate": 2.307162782796843e-06, "loss": 0.0722, "step": 156030 }, { "epoch": 4.614656651091264, "grad_norm": 1.1540766954421997, "learning_rate": 2.3070360928779032e-06, "loss": 0.0719, "step": 156040 }, { "epoch": 4.614952386585438, "grad_norm": 0.5183001160621643, "learning_rate": 2.3069094029589644e-06, "loss": 0.0546, "step": 156050 }, { "epoch": 4.6152481220796115, "grad_norm": 0.750600278377533, "learning_rate": 2.306782713040025e-06, "loss": 0.0607, "step": 156060 }, { "epoch": 4.615543857573786, "grad_norm": 1.0192996263504028, "learning_rate": 2.3066560231210856e-06, "loss": 0.0717, "step": 156070 }, { "epoch": 4.61583959306796, "grad_norm": 1.1999198198318481, "learning_rate": 2.306529333202146e-06, "loss": 0.0667, "step": 156080 }, { "epoch": 4.616135328562134, "grad_norm": 1.3610786199569702, "learning_rate": 2.3064026432832067e-06, "loss": 0.0657, "step": 156090 }, { "epoch": 4.616431064056308, "grad_norm": 1.0362277030944824, "learning_rate": 2.306275953364267e-06, "loss": 0.0614, "step": 156100 }, { "epoch": 4.616726799550482, "grad_norm": 2.135789394378662, "learning_rate": 2.3061492634453275e-06, "loss": 0.0747, "step": 156110 }, { "epoch": 4.617022535044656, "grad_norm": 1.214956521987915, "learning_rate": 2.3060225735263883e-06, "loss": 0.0902, "step": 156120 }, { "epoch": 4.61731827053883, "grad_norm": 1.1505454778671265, "learning_rate": 2.305895883607449e-06, "loss": 0.0755, "step": 156130 }, { "epoch": 4.617614006033004, "grad_norm": 1.2992234230041504, "learning_rate": 2.30576919368851e-06, "loss": 0.09, "step": 156140 }, { "epoch": 4.617909741527178, "grad_norm": 0.6363811492919922, "learning_rate": 2.3056425037695702e-06, "loss": 0.0521, "step": 156150 }, { "epoch": 4.618205477021352, "grad_norm": 0.9535069465637207, "learning_rate": 2.305515813850631e-06, "loss": 0.0743, "step": 156160 }, { "epoch": 4.618501212515526, "grad_norm": 0.9615479707717896, "learning_rate": 2.3053891239316914e-06, "loss": 0.0619, "step": 156170 }, { "epoch": 4.6187969480097, "grad_norm": 1.2276771068572998, "learning_rate": 2.305262434012752e-06, "loss": 0.0788, "step": 156180 }, { "epoch": 4.619092683503874, "grad_norm": 0.8843190670013428, "learning_rate": 2.3051357440938125e-06, "loss": 0.0619, "step": 156190 }, { "epoch": 4.6193884189980485, "grad_norm": 0.8809137940406799, "learning_rate": 2.3050090541748733e-06, "loss": 0.0668, "step": 156200 }, { "epoch": 4.619684154492222, "grad_norm": 1.2029120922088623, "learning_rate": 2.304882364255934e-06, "loss": 0.0586, "step": 156210 }, { "epoch": 4.619979889986396, "grad_norm": 1.4243992567062378, "learning_rate": 2.304755674336995e-06, "loss": 0.0903, "step": 156220 }, { "epoch": 4.62027562548057, "grad_norm": 2.3792426586151123, "learning_rate": 2.3046289844180553e-06, "loss": 0.0728, "step": 156230 }, { "epoch": 4.620571360974744, "grad_norm": 0.8736501932144165, "learning_rate": 2.304502294499116e-06, "loss": 0.0699, "step": 156240 }, { "epoch": 4.620867096468919, "grad_norm": 0.9692328572273254, "learning_rate": 2.3043756045801764e-06, "loss": 0.0566, "step": 156250 }, { "epoch": 4.621162831963092, "grad_norm": 0.8017250895500183, "learning_rate": 2.304248914661237e-06, "loss": 0.067, "step": 156260 }, { "epoch": 4.621458567457267, "grad_norm": 0.7810633778572083, "learning_rate": 2.3041222247422976e-06, "loss": 0.066, "step": 156270 }, { "epoch": 4.62175430295144, "grad_norm": 0.7271156907081604, "learning_rate": 2.3039955348233584e-06, "loss": 0.0804, "step": 156280 }, { "epoch": 4.6220500384456145, "grad_norm": 0.8138699531555176, "learning_rate": 2.303868844904419e-06, "loss": 0.0634, "step": 156290 }, { "epoch": 4.622345773939788, "grad_norm": 1.1120411157608032, "learning_rate": 2.30374215498548e-06, "loss": 0.065, "step": 156300 }, { "epoch": 4.622641509433962, "grad_norm": 0.7865335941314697, "learning_rate": 2.3036154650665403e-06, "loss": 0.0822, "step": 156310 }, { "epoch": 4.622937244928137, "grad_norm": 1.2620103359222412, "learning_rate": 2.303488775147601e-06, "loss": 0.0749, "step": 156320 }, { "epoch": 4.62323298042231, "grad_norm": 0.9379914402961731, "learning_rate": 2.3033620852286615e-06, "loss": 0.0786, "step": 156330 }, { "epoch": 4.623528715916485, "grad_norm": 1.0831410884857178, "learning_rate": 2.3032353953097223e-06, "loss": 0.0614, "step": 156340 }, { "epoch": 4.623824451410658, "grad_norm": 1.448483943939209, "learning_rate": 2.3031087053907826e-06, "loss": 0.0713, "step": 156350 }, { "epoch": 4.6241201869048325, "grad_norm": 0.8671109676361084, "learning_rate": 2.3029820154718434e-06, "loss": 0.0519, "step": 156360 }, { "epoch": 4.624415922399006, "grad_norm": 0.7121971845626831, "learning_rate": 2.302855325552904e-06, "loss": 0.079, "step": 156370 }, { "epoch": 4.62471165789318, "grad_norm": 1.066007375717163, "learning_rate": 2.302728635633965e-06, "loss": 0.056, "step": 156380 }, { "epoch": 4.625007393387355, "grad_norm": 0.8336177468299866, "learning_rate": 2.3026019457150254e-06, "loss": 0.077, "step": 156390 }, { "epoch": 4.625303128881528, "grad_norm": 0.6264138221740723, "learning_rate": 2.302475255796086e-06, "loss": 0.0818, "step": 156400 }, { "epoch": 4.625598864375703, "grad_norm": 1.1685445308685303, "learning_rate": 2.3023485658771465e-06, "loss": 0.0808, "step": 156410 }, { "epoch": 4.625894599869876, "grad_norm": 0.7033259868621826, "learning_rate": 2.3022218759582073e-06, "loss": 0.075, "step": 156420 }, { "epoch": 4.6261903353640506, "grad_norm": 1.1867833137512207, "learning_rate": 2.3020951860392677e-06, "loss": 0.0598, "step": 156430 }, { "epoch": 4.626486070858224, "grad_norm": 0.6678686738014221, "learning_rate": 2.3019684961203285e-06, "loss": 0.0564, "step": 156440 }, { "epoch": 4.626781806352398, "grad_norm": 1.2775342464447021, "learning_rate": 2.3018418062013892e-06, "loss": 0.0605, "step": 156450 }, { "epoch": 4.627077541846573, "grad_norm": 0.8698681592941284, "learning_rate": 2.30171511628245e-06, "loss": 0.0616, "step": 156460 }, { "epoch": 4.627373277340746, "grad_norm": 1.489372730255127, "learning_rate": 2.3015884263635104e-06, "loss": 0.0796, "step": 156470 }, { "epoch": 4.627669012834921, "grad_norm": 0.5999967455863953, "learning_rate": 2.301461736444571e-06, "loss": 0.0719, "step": 156480 }, { "epoch": 4.627964748329094, "grad_norm": 0.5863078832626343, "learning_rate": 2.3013350465256316e-06, "loss": 0.0525, "step": 156490 }, { "epoch": 4.628260483823269, "grad_norm": 0.9068540334701538, "learning_rate": 2.3012083566066923e-06, "loss": 0.0605, "step": 156500 }, { "epoch": 4.628556219317442, "grad_norm": 1.2532111406326294, "learning_rate": 2.3010816666877527e-06, "loss": 0.0695, "step": 156510 }, { "epoch": 4.6288519548116165, "grad_norm": 0.8046987652778625, "learning_rate": 2.300954976768813e-06, "loss": 0.0787, "step": 156520 }, { "epoch": 4.629147690305791, "grad_norm": 0.5506638884544373, "learning_rate": 2.3008282868498743e-06, "loss": 0.0706, "step": 156530 }, { "epoch": 4.629443425799964, "grad_norm": 0.6807615160942078, "learning_rate": 2.3007015969309347e-06, "loss": 0.0697, "step": 156540 }, { "epoch": 4.629739161294139, "grad_norm": 0.6001811623573303, "learning_rate": 2.3005749070119954e-06, "loss": 0.0516, "step": 156550 }, { "epoch": 4.630034896788312, "grad_norm": 0.8306893110275269, "learning_rate": 2.300448217093056e-06, "loss": 0.0625, "step": 156560 }, { "epoch": 4.630330632282487, "grad_norm": 1.0028786659240723, "learning_rate": 2.3003215271741166e-06, "loss": 0.0863, "step": 156570 }, { "epoch": 4.63062636777666, "grad_norm": 0.7755389213562012, "learning_rate": 2.300194837255177e-06, "loss": 0.071, "step": 156580 }, { "epoch": 4.6309221032708345, "grad_norm": 0.7329712510108948, "learning_rate": 2.3000681473362378e-06, "loss": 0.0694, "step": 156590 }, { "epoch": 4.631217838765009, "grad_norm": 1.7729946374893188, "learning_rate": 2.299941457417298e-06, "loss": 0.0778, "step": 156600 }, { "epoch": 4.631513574259182, "grad_norm": 0.987694501876831, "learning_rate": 2.2998147674983593e-06, "loss": 0.0891, "step": 156610 }, { "epoch": 4.631809309753357, "grad_norm": 0.7931014895439148, "learning_rate": 2.2996880775794197e-06, "loss": 0.0688, "step": 156620 }, { "epoch": 4.63210504524753, "grad_norm": 0.97358238697052, "learning_rate": 2.2995613876604805e-06, "loss": 0.0765, "step": 156630 }, { "epoch": 4.632400780741705, "grad_norm": 0.7151036262512207, "learning_rate": 2.299434697741541e-06, "loss": 0.0871, "step": 156640 }, { "epoch": 4.632696516235878, "grad_norm": 0.7266984581947327, "learning_rate": 2.2993080078226016e-06, "loss": 0.0558, "step": 156650 }, { "epoch": 4.632992251730053, "grad_norm": 0.5579700469970703, "learning_rate": 2.299181317903662e-06, "loss": 0.0655, "step": 156660 }, { "epoch": 4.633287987224227, "grad_norm": 0.9660993218421936, "learning_rate": 2.299054627984723e-06, "loss": 0.0701, "step": 156670 }, { "epoch": 4.6335837227184005, "grad_norm": 0.6696252226829529, "learning_rate": 2.298927938065783e-06, "loss": 0.0697, "step": 156680 }, { "epoch": 4.633879458212575, "grad_norm": 1.9218353033065796, "learning_rate": 2.2988012481468444e-06, "loss": 0.0668, "step": 156690 }, { "epoch": 4.634175193706748, "grad_norm": 0.6265743970870972, "learning_rate": 2.2986745582279047e-06, "loss": 0.0562, "step": 156700 }, { "epoch": 4.634470929200923, "grad_norm": 1.347922444343567, "learning_rate": 2.2985478683089655e-06, "loss": 0.0557, "step": 156710 }, { "epoch": 4.634766664695096, "grad_norm": 0.7520106434822083, "learning_rate": 2.298421178390026e-06, "loss": 0.0925, "step": 156720 }, { "epoch": 4.635062400189271, "grad_norm": 0.844863772392273, "learning_rate": 2.2982944884710867e-06, "loss": 0.0849, "step": 156730 }, { "epoch": 4.635358135683445, "grad_norm": 0.8029410243034363, "learning_rate": 2.298167798552147e-06, "loss": 0.0639, "step": 156740 }, { "epoch": 4.6356538711776185, "grad_norm": 0.8217620253562927, "learning_rate": 2.298041108633208e-06, "loss": 0.0592, "step": 156750 }, { "epoch": 4.635949606671793, "grad_norm": 0.3876648247241974, "learning_rate": 2.297914418714268e-06, "loss": 0.0487, "step": 156760 }, { "epoch": 4.636245342165966, "grad_norm": 0.6120260953903198, "learning_rate": 2.2977877287953294e-06, "loss": 0.0785, "step": 156770 }, { "epoch": 4.636541077660141, "grad_norm": 0.9930747747421265, "learning_rate": 2.2976610388763898e-06, "loss": 0.0609, "step": 156780 }, { "epoch": 4.636836813154314, "grad_norm": 1.2334673404693604, "learning_rate": 2.2975343489574506e-06, "loss": 0.0748, "step": 156790 }, { "epoch": 4.637132548648489, "grad_norm": 0.7263911962509155, "learning_rate": 2.297407659038511e-06, "loss": 0.059, "step": 156800 }, { "epoch": 4.637428284142663, "grad_norm": 0.6926511526107788, "learning_rate": 2.2972809691195717e-06, "loss": 0.0634, "step": 156810 }, { "epoch": 4.637724019636837, "grad_norm": 1.4363089799880981, "learning_rate": 2.297154279200632e-06, "loss": 0.0628, "step": 156820 }, { "epoch": 4.638019755131011, "grad_norm": 0.8197410702705383, "learning_rate": 2.297027589281693e-06, "loss": 0.0646, "step": 156830 }, { "epoch": 4.638315490625185, "grad_norm": 0.631834089756012, "learning_rate": 2.2969008993627533e-06, "loss": 0.0862, "step": 156840 }, { "epoch": 4.638611226119359, "grad_norm": 0.9018288850784302, "learning_rate": 2.2967742094438145e-06, "loss": 0.0606, "step": 156850 }, { "epoch": 4.638906961613533, "grad_norm": 1.1063729524612427, "learning_rate": 2.296647519524875e-06, "loss": 0.0571, "step": 156860 }, { "epoch": 4.639202697107707, "grad_norm": 0.7551749348640442, "learning_rate": 2.2965208296059356e-06, "loss": 0.0738, "step": 156870 }, { "epoch": 4.639498432601881, "grad_norm": 1.0367038249969482, "learning_rate": 2.296394139686996e-06, "loss": 0.083, "step": 156880 }, { "epoch": 4.639794168096055, "grad_norm": 1.139752984046936, "learning_rate": 2.2962674497680568e-06, "loss": 0.072, "step": 156890 }, { "epoch": 4.640089903590229, "grad_norm": 0.5985425710678101, "learning_rate": 2.296140759849117e-06, "loss": 0.0796, "step": 156900 }, { "epoch": 4.640385639084403, "grad_norm": 1.7691316604614258, "learning_rate": 2.296014069930178e-06, "loss": 0.074, "step": 156910 }, { "epoch": 4.640681374578577, "grad_norm": 0.8088517189025879, "learning_rate": 2.2958873800112383e-06, "loss": 0.081, "step": 156920 }, { "epoch": 4.640977110072751, "grad_norm": 0.7155810594558716, "learning_rate": 2.295760690092299e-06, "loss": 0.0683, "step": 156930 }, { "epoch": 4.641272845566925, "grad_norm": 1.1287261247634888, "learning_rate": 2.29563400017336e-06, "loss": 0.0683, "step": 156940 }, { "epoch": 4.641568581061099, "grad_norm": 1.0282707214355469, "learning_rate": 2.2955073102544202e-06, "loss": 0.0794, "step": 156950 }, { "epoch": 4.641864316555273, "grad_norm": 0.7226210832595825, "learning_rate": 2.295380620335481e-06, "loss": 0.059, "step": 156960 }, { "epoch": 4.642160052049447, "grad_norm": 1.3256343603134155, "learning_rate": 2.2952539304165414e-06, "loss": 0.0728, "step": 156970 }, { "epoch": 4.642455787543621, "grad_norm": 1.3524738550186157, "learning_rate": 2.295127240497602e-06, "loss": 0.0728, "step": 156980 }, { "epoch": 4.642751523037795, "grad_norm": 0.7498579025268555, "learning_rate": 2.2950005505786626e-06, "loss": 0.0848, "step": 156990 }, { "epoch": 4.643047258531969, "grad_norm": 0.8811725378036499, "learning_rate": 2.2948738606597233e-06, "loss": 0.066, "step": 157000 }, { "epoch": 4.643342994026143, "grad_norm": 1.345290184020996, "learning_rate": 2.294747170740784e-06, "loss": 0.0656, "step": 157010 }, { "epoch": 4.643638729520317, "grad_norm": 0.8689037561416626, "learning_rate": 2.294620480821845e-06, "loss": 0.0775, "step": 157020 }, { "epoch": 4.643934465014491, "grad_norm": 1.5682600736618042, "learning_rate": 2.2944937909029053e-06, "loss": 0.1001, "step": 157030 }, { "epoch": 4.644230200508665, "grad_norm": 0.584349513053894, "learning_rate": 2.294367100983966e-06, "loss": 0.0608, "step": 157040 }, { "epoch": 4.6445259360028395, "grad_norm": 0.8524353504180908, "learning_rate": 2.2942404110650264e-06, "loss": 0.0546, "step": 157050 }, { "epoch": 4.644821671497013, "grad_norm": 0.6956102848052979, "learning_rate": 2.2941137211460872e-06, "loss": 0.0661, "step": 157060 }, { "epoch": 4.645117406991187, "grad_norm": 0.8669999837875366, "learning_rate": 2.2939870312271476e-06, "loss": 0.0711, "step": 157070 }, { "epoch": 4.645413142485361, "grad_norm": 0.7298780679702759, "learning_rate": 2.2938603413082084e-06, "loss": 0.072, "step": 157080 }, { "epoch": 4.645708877979535, "grad_norm": 1.4335490465164185, "learning_rate": 2.293733651389269e-06, "loss": 0.0747, "step": 157090 }, { "epoch": 4.646004613473709, "grad_norm": 0.9303289651870728, "learning_rate": 2.29360696147033e-06, "loss": 0.0716, "step": 157100 }, { "epoch": 4.646300348967883, "grad_norm": 0.811163604259491, "learning_rate": 2.2934802715513903e-06, "loss": 0.0619, "step": 157110 }, { "epoch": 4.6465960844620575, "grad_norm": 0.8932774662971497, "learning_rate": 2.293353581632451e-06, "loss": 0.0819, "step": 157120 }, { "epoch": 4.646891819956231, "grad_norm": 0.8731891512870789, "learning_rate": 2.2932268917135115e-06, "loss": 0.0773, "step": 157130 }, { "epoch": 4.647187555450405, "grad_norm": 0.5993386507034302, "learning_rate": 2.2931002017945723e-06, "loss": 0.0504, "step": 157140 }, { "epoch": 4.647483290944579, "grad_norm": 1.2404619455337524, "learning_rate": 2.2929735118756326e-06, "loss": 0.0692, "step": 157150 }, { "epoch": 4.647779026438753, "grad_norm": 0.8440252542495728, "learning_rate": 2.2928468219566934e-06, "loss": 0.0485, "step": 157160 }, { "epoch": 4.648074761932927, "grad_norm": 0.8317309021949768, "learning_rate": 2.2927201320377542e-06, "loss": 0.0939, "step": 157170 }, { "epoch": 4.648370497427101, "grad_norm": 0.661836564540863, "learning_rate": 2.292593442118815e-06, "loss": 0.0671, "step": 157180 }, { "epoch": 4.648666232921276, "grad_norm": 1.5839314460754395, "learning_rate": 2.2924667521998754e-06, "loss": 0.0675, "step": 157190 }, { "epoch": 4.648961968415449, "grad_norm": 1.4251803159713745, "learning_rate": 2.292340062280936e-06, "loss": 0.0706, "step": 157200 }, { "epoch": 4.6492577039096235, "grad_norm": 0.7127304673194885, "learning_rate": 2.2922133723619965e-06, "loss": 0.0675, "step": 157210 }, { "epoch": 4.649553439403797, "grad_norm": 0.797546923160553, "learning_rate": 2.2920866824430573e-06, "loss": 0.0771, "step": 157220 }, { "epoch": 4.649849174897971, "grad_norm": 1.7683876752853394, "learning_rate": 2.2919599925241177e-06, "loss": 0.089, "step": 157230 }, { "epoch": 4.650144910392145, "grad_norm": 0.890832781791687, "learning_rate": 2.2918333026051785e-06, "loss": 0.0682, "step": 157240 }, { "epoch": 4.650440645886319, "grad_norm": 1.542418122291565, "learning_rate": 2.2917066126862393e-06, "loss": 0.0632, "step": 157250 }, { "epoch": 4.650736381380494, "grad_norm": 1.032792568206787, "learning_rate": 2.2915799227673e-06, "loss": 0.07, "step": 157260 }, { "epoch": 4.651032116874667, "grad_norm": 0.8376660346984863, "learning_rate": 2.2914532328483604e-06, "loss": 0.0857, "step": 157270 }, { "epoch": 4.6513278523688415, "grad_norm": 1.1345546245574951, "learning_rate": 2.2913265429294212e-06, "loss": 0.0794, "step": 157280 }, { "epoch": 4.651623587863015, "grad_norm": 0.5552816987037659, "learning_rate": 2.2911998530104816e-06, "loss": 0.0682, "step": 157290 }, { "epoch": 4.651919323357189, "grad_norm": 0.7116076350212097, "learning_rate": 2.2910731630915424e-06, "loss": 0.0569, "step": 157300 }, { "epoch": 4.652215058851363, "grad_norm": 1.0292632579803467, "learning_rate": 2.2909464731726027e-06, "loss": 0.0611, "step": 157310 }, { "epoch": 4.652510794345537, "grad_norm": 1.0010511875152588, "learning_rate": 2.2908197832536635e-06, "loss": 0.0767, "step": 157320 }, { "epoch": 4.652806529839712, "grad_norm": 0.9798828363418579, "learning_rate": 2.2906930933347243e-06, "loss": 0.0852, "step": 157330 }, { "epoch": 4.653102265333885, "grad_norm": 0.9842633008956909, "learning_rate": 2.290566403415785e-06, "loss": 0.072, "step": 157340 }, { "epoch": 4.6533980008280595, "grad_norm": 0.5857346653938293, "learning_rate": 2.2904397134968455e-06, "loss": 0.071, "step": 157350 }, { "epoch": 4.653693736322233, "grad_norm": 1.1141180992126465, "learning_rate": 2.290313023577906e-06, "loss": 0.0768, "step": 157360 }, { "epoch": 4.653989471816407, "grad_norm": 1.1438521146774292, "learning_rate": 2.2901863336589666e-06, "loss": 0.0718, "step": 157370 }, { "epoch": 4.654285207310581, "grad_norm": 0.7577583193778992, "learning_rate": 2.290059643740027e-06, "loss": 0.0616, "step": 157380 }, { "epoch": 4.654580942804755, "grad_norm": 1.1598050594329834, "learning_rate": 2.2899329538210878e-06, "loss": 0.082, "step": 157390 }, { "epoch": 4.65487667829893, "grad_norm": 0.6511175632476807, "learning_rate": 2.289806263902148e-06, "loss": 0.0592, "step": 157400 }, { "epoch": 4.655172413793103, "grad_norm": 0.7204201221466064, "learning_rate": 2.2896795739832094e-06, "loss": 0.0649, "step": 157410 }, { "epoch": 4.655468149287278, "grad_norm": 0.7094792723655701, "learning_rate": 2.2895528840642697e-06, "loss": 0.0657, "step": 157420 }, { "epoch": 4.655763884781451, "grad_norm": 0.5715508460998535, "learning_rate": 2.2894261941453305e-06, "loss": 0.0568, "step": 157430 }, { "epoch": 4.6560596202756255, "grad_norm": 1.1799439191818237, "learning_rate": 2.289299504226391e-06, "loss": 0.0704, "step": 157440 }, { "epoch": 4.6563553557698, "grad_norm": 0.7141587138175964, "learning_rate": 2.2891728143074517e-06, "loss": 0.0532, "step": 157450 }, { "epoch": 4.656651091263973, "grad_norm": 0.8711400032043457, "learning_rate": 2.289046124388512e-06, "loss": 0.0566, "step": 157460 }, { "epoch": 4.656946826758148, "grad_norm": 0.5559380054473877, "learning_rate": 2.288919434469573e-06, "loss": 0.0794, "step": 157470 }, { "epoch": 4.657242562252321, "grad_norm": 1.326709270477295, "learning_rate": 2.288792744550633e-06, "loss": 0.0808, "step": 157480 }, { "epoch": 4.657538297746496, "grad_norm": 0.7053375840187073, "learning_rate": 2.2886660546316944e-06, "loss": 0.0634, "step": 157490 }, { "epoch": 4.65783403324067, "grad_norm": 1.8569284677505493, "learning_rate": 2.2885393647127548e-06, "loss": 0.0671, "step": 157500 }, { "epoch": 4.6581297687348435, "grad_norm": 1.1664111614227295, "learning_rate": 2.2884126747938156e-06, "loss": 0.0556, "step": 157510 }, { "epoch": 4.658425504229018, "grad_norm": 1.1019660234451294, "learning_rate": 2.288285984874876e-06, "loss": 0.0624, "step": 157520 }, { "epoch": 4.658721239723191, "grad_norm": 0.7521605491638184, "learning_rate": 2.2881592949559367e-06, "loss": 0.0755, "step": 157530 }, { "epoch": 4.659016975217366, "grad_norm": 0.9016842842102051, "learning_rate": 2.288032605036997e-06, "loss": 0.0675, "step": 157540 }, { "epoch": 4.659312710711539, "grad_norm": 0.5793187618255615, "learning_rate": 2.287905915118058e-06, "loss": 0.0656, "step": 157550 }, { "epoch": 4.659608446205714, "grad_norm": 1.1596810817718506, "learning_rate": 2.2877792251991182e-06, "loss": 0.0621, "step": 157560 }, { "epoch": 4.659904181699888, "grad_norm": 0.8662042021751404, "learning_rate": 2.2876525352801795e-06, "loss": 0.0762, "step": 157570 }, { "epoch": 4.660199917194062, "grad_norm": 0.8150057196617126, "learning_rate": 2.28752584536124e-06, "loss": 0.0703, "step": 157580 }, { "epoch": 4.660495652688236, "grad_norm": 0.7826606035232544, "learning_rate": 2.2873991554423006e-06, "loss": 0.07, "step": 157590 }, { "epoch": 4.6607913881824095, "grad_norm": 1.2846347093582153, "learning_rate": 2.287272465523361e-06, "loss": 0.0764, "step": 157600 }, { "epoch": 4.661087123676584, "grad_norm": 1.0433481931686401, "learning_rate": 2.2871457756044218e-06, "loss": 0.0678, "step": 157610 }, { "epoch": 4.661382859170757, "grad_norm": 1.3988780975341797, "learning_rate": 2.287019085685482e-06, "loss": 0.0736, "step": 157620 }, { "epoch": 4.661678594664932, "grad_norm": 0.9680570363998413, "learning_rate": 2.286892395766543e-06, "loss": 0.0805, "step": 157630 }, { "epoch": 4.661974330159106, "grad_norm": 0.5836668014526367, "learning_rate": 2.2867657058476033e-06, "loss": 0.0728, "step": 157640 }, { "epoch": 4.66227006565328, "grad_norm": 1.1074451208114624, "learning_rate": 2.2866390159286645e-06, "loss": 0.072, "step": 157650 }, { "epoch": 4.662565801147454, "grad_norm": 1.0222654342651367, "learning_rate": 2.286512326009725e-06, "loss": 0.0558, "step": 157660 }, { "epoch": 4.6628615366416275, "grad_norm": 0.9322477579116821, "learning_rate": 2.2863856360907857e-06, "loss": 0.0566, "step": 157670 }, { "epoch": 4.663157272135802, "grad_norm": 0.9260908365249634, "learning_rate": 2.286258946171846e-06, "loss": 0.0691, "step": 157680 }, { "epoch": 4.663453007629975, "grad_norm": 0.8954790234565735, "learning_rate": 2.286132256252907e-06, "loss": 0.0635, "step": 157690 }, { "epoch": 4.66374874312415, "grad_norm": 1.4821865558624268, "learning_rate": 2.286005566333967e-06, "loss": 0.0727, "step": 157700 }, { "epoch": 4.664044478618324, "grad_norm": 0.8666923642158508, "learning_rate": 2.285878876415028e-06, "loss": 0.0649, "step": 157710 }, { "epoch": 4.664340214112498, "grad_norm": 0.9061606526374817, "learning_rate": 2.2857521864960883e-06, "loss": 0.0795, "step": 157720 }, { "epoch": 4.664635949606672, "grad_norm": 1.2488261461257935, "learning_rate": 2.2856254965771495e-06, "loss": 0.0917, "step": 157730 }, { "epoch": 4.6649316851008455, "grad_norm": 1.7313741445541382, "learning_rate": 2.28549880665821e-06, "loss": 0.0663, "step": 157740 }, { "epoch": 4.66522742059502, "grad_norm": 0.6050275564193726, "learning_rate": 2.2853721167392707e-06, "loss": 0.0638, "step": 157750 }, { "epoch": 4.665523156089193, "grad_norm": 0.8492250442504883, "learning_rate": 2.285245426820331e-06, "loss": 0.0705, "step": 157760 }, { "epoch": 4.665818891583368, "grad_norm": 0.7502482533454895, "learning_rate": 2.2851187369013914e-06, "loss": 0.0765, "step": 157770 }, { "epoch": 4.666114627077542, "grad_norm": 1.2063606977462769, "learning_rate": 2.2849920469824522e-06, "loss": 0.1006, "step": 157780 }, { "epoch": 4.666410362571716, "grad_norm": 0.7604199647903442, "learning_rate": 2.2848653570635126e-06, "loss": 0.0609, "step": 157790 }, { "epoch": 4.66670609806589, "grad_norm": 1.0318118333816528, "learning_rate": 2.2847386671445734e-06, "loss": 0.0572, "step": 157800 }, { "epoch": 4.667001833560064, "grad_norm": 1.0031017065048218, "learning_rate": 2.284611977225634e-06, "loss": 0.061, "step": 157810 }, { "epoch": 4.667297569054238, "grad_norm": 1.077112078666687, "learning_rate": 2.284485287306695e-06, "loss": 0.0785, "step": 157820 }, { "epoch": 4.6675933045484115, "grad_norm": 1.865561604499817, "learning_rate": 2.2843585973877553e-06, "loss": 0.0781, "step": 157830 }, { "epoch": 4.667889040042586, "grad_norm": 0.8225372433662415, "learning_rate": 2.284231907468816e-06, "loss": 0.0632, "step": 157840 }, { "epoch": 4.66818477553676, "grad_norm": 0.8017961978912354, "learning_rate": 2.2841052175498765e-06, "loss": 0.0558, "step": 157850 }, { "epoch": 4.668480511030934, "grad_norm": 0.8659413456916809, "learning_rate": 2.2839785276309373e-06, "loss": 0.0636, "step": 157860 }, { "epoch": 4.668776246525108, "grad_norm": 0.7783573865890503, "learning_rate": 2.2838518377119976e-06, "loss": 0.0705, "step": 157870 }, { "epoch": 4.669071982019282, "grad_norm": 0.7782026529312134, "learning_rate": 2.2837251477930584e-06, "loss": 0.0694, "step": 157880 }, { "epoch": 4.669367717513456, "grad_norm": 1.1057711839675903, "learning_rate": 2.283598457874119e-06, "loss": 0.0875, "step": 157890 }, { "epoch": 4.6696634530076295, "grad_norm": 0.6609477400779724, "learning_rate": 2.28347176795518e-06, "loss": 0.0639, "step": 157900 }, { "epoch": 4.669959188501804, "grad_norm": 0.7933603525161743, "learning_rate": 2.2833450780362404e-06, "loss": 0.0706, "step": 157910 }, { "epoch": 4.670254923995978, "grad_norm": 1.0484702587127686, "learning_rate": 2.283218388117301e-06, "loss": 0.0771, "step": 157920 }, { "epoch": 4.670550659490152, "grad_norm": 1.2000895738601685, "learning_rate": 2.2830916981983615e-06, "loss": 0.0803, "step": 157930 }, { "epoch": 4.670846394984326, "grad_norm": 0.7224156856536865, "learning_rate": 2.2829650082794223e-06, "loss": 0.0566, "step": 157940 }, { "epoch": 4.6711421304785, "grad_norm": 1.272515892982483, "learning_rate": 2.2828383183604827e-06, "loss": 0.0676, "step": 157950 }, { "epoch": 4.671437865972674, "grad_norm": 0.9088544249534607, "learning_rate": 2.2827116284415435e-06, "loss": 0.0669, "step": 157960 }, { "epoch": 4.671733601466848, "grad_norm": 0.7093479633331299, "learning_rate": 2.2825849385226043e-06, "loss": 0.0708, "step": 157970 }, { "epoch": 4.672029336961022, "grad_norm": 1.070680022239685, "learning_rate": 2.282458248603665e-06, "loss": 0.0813, "step": 157980 }, { "epoch": 4.672325072455196, "grad_norm": 0.9367585182189941, "learning_rate": 2.2823315586847254e-06, "loss": 0.0861, "step": 157990 }, { "epoch": 4.67262080794937, "grad_norm": 1.1493855714797974, "learning_rate": 2.282204868765786e-06, "loss": 0.0684, "step": 158000 }, { "epoch": 4.672916543443544, "grad_norm": 0.916157603263855, "learning_rate": 2.2820781788468466e-06, "loss": 0.0691, "step": 158010 }, { "epoch": 4.673212278937718, "grad_norm": 0.5435064435005188, "learning_rate": 2.2819514889279074e-06, "loss": 0.0733, "step": 158020 }, { "epoch": 4.673508014431892, "grad_norm": 0.7067614197731018, "learning_rate": 2.2818247990089677e-06, "loss": 0.0736, "step": 158030 }, { "epoch": 4.6738037499260665, "grad_norm": 0.818061351776123, "learning_rate": 2.2816981090900285e-06, "loss": 0.0534, "step": 158040 }, { "epoch": 4.67409948542024, "grad_norm": 0.4904542565345764, "learning_rate": 2.2815714191710893e-06, "loss": 0.0594, "step": 158050 }, { "epoch": 4.674395220914414, "grad_norm": 0.9008513689041138, "learning_rate": 2.28144472925215e-06, "loss": 0.0711, "step": 158060 }, { "epoch": 4.674690956408588, "grad_norm": 1.5475791692733765, "learning_rate": 2.2813180393332105e-06, "loss": 0.0848, "step": 158070 }, { "epoch": 4.674986691902762, "grad_norm": 1.786010503768921, "learning_rate": 2.2811913494142712e-06, "loss": 0.0754, "step": 158080 }, { "epoch": 4.675282427396937, "grad_norm": 0.9034402966499329, "learning_rate": 2.2810646594953316e-06, "loss": 0.0816, "step": 158090 }, { "epoch": 4.67557816289111, "grad_norm": 0.9996939897537231, "learning_rate": 2.2809379695763924e-06, "loss": 0.0675, "step": 158100 }, { "epoch": 4.6758738983852846, "grad_norm": 2.1958565711975098, "learning_rate": 2.2808112796574528e-06, "loss": 0.0755, "step": 158110 }, { "epoch": 4.676169633879458, "grad_norm": 0.8006467819213867, "learning_rate": 2.2806845897385136e-06, "loss": 0.065, "step": 158120 }, { "epoch": 4.676465369373632, "grad_norm": 0.7428382635116577, "learning_rate": 2.2805578998195743e-06, "loss": 0.0646, "step": 158130 }, { "epoch": 4.676761104867806, "grad_norm": 0.7412609457969666, "learning_rate": 2.280431209900635e-06, "loss": 0.072, "step": 158140 }, { "epoch": 4.67705684036198, "grad_norm": 1.4691091775894165, "learning_rate": 2.2803045199816955e-06, "loss": 0.069, "step": 158150 }, { "epoch": 4.677352575856155, "grad_norm": 0.8221351504325867, "learning_rate": 2.2801778300627563e-06, "loss": 0.0692, "step": 158160 }, { "epoch": 4.677648311350328, "grad_norm": 1.6355783939361572, "learning_rate": 2.2800511401438167e-06, "loss": 0.0728, "step": 158170 }, { "epoch": 4.677944046844503, "grad_norm": 0.5859116315841675, "learning_rate": 2.279924450224877e-06, "loss": 0.0782, "step": 158180 }, { "epoch": 4.678239782338676, "grad_norm": 0.5886405110359192, "learning_rate": 2.279797760305938e-06, "loss": 0.0769, "step": 158190 }, { "epoch": 4.6785355178328505, "grad_norm": 0.6064978241920471, "learning_rate": 2.279671070386998e-06, "loss": 0.0637, "step": 158200 }, { "epoch": 4.678831253327024, "grad_norm": 1.154212236404419, "learning_rate": 2.2795443804680594e-06, "loss": 0.0709, "step": 158210 }, { "epoch": 4.679126988821198, "grad_norm": 1.1924583911895752, "learning_rate": 2.2794176905491198e-06, "loss": 0.0859, "step": 158220 }, { "epoch": 4.679422724315373, "grad_norm": 0.959173321723938, "learning_rate": 2.2792910006301805e-06, "loss": 0.0782, "step": 158230 }, { "epoch": 4.679718459809546, "grad_norm": 0.6002522110939026, "learning_rate": 2.279164310711241e-06, "loss": 0.0603, "step": 158240 }, { "epoch": 4.680014195303721, "grad_norm": 0.5253111720085144, "learning_rate": 2.2790376207923017e-06, "loss": 0.0514, "step": 158250 }, { "epoch": 4.680309930797894, "grad_norm": 0.8950768113136292, "learning_rate": 2.278910930873362e-06, "loss": 0.0615, "step": 158260 }, { "epoch": 4.6806056662920685, "grad_norm": 1.1992278099060059, "learning_rate": 2.278784240954423e-06, "loss": 0.0746, "step": 158270 }, { "epoch": 4.680901401786242, "grad_norm": 0.7929403781890869, "learning_rate": 2.2786575510354832e-06, "loss": 0.0617, "step": 158280 }, { "epoch": 4.681197137280416, "grad_norm": 2.4643280506134033, "learning_rate": 2.2785308611165444e-06, "loss": 0.0559, "step": 158290 }, { "epoch": 4.681492872774591, "grad_norm": 0.8168185949325562, "learning_rate": 2.278404171197605e-06, "loss": 0.0759, "step": 158300 }, { "epoch": 4.681788608268764, "grad_norm": 0.910224199295044, "learning_rate": 2.2782774812786656e-06, "loss": 0.0593, "step": 158310 }, { "epoch": 4.682084343762939, "grad_norm": 0.8264349699020386, "learning_rate": 2.278150791359726e-06, "loss": 0.0756, "step": 158320 }, { "epoch": 4.682380079257112, "grad_norm": 1.5382118225097656, "learning_rate": 2.2780241014407867e-06, "loss": 0.0828, "step": 158330 }, { "epoch": 4.682675814751287, "grad_norm": 0.7593387961387634, "learning_rate": 2.277897411521847e-06, "loss": 0.0682, "step": 158340 }, { "epoch": 4.68297155024546, "grad_norm": 0.5639930367469788, "learning_rate": 2.277770721602908e-06, "loss": 0.0508, "step": 158350 }, { "epoch": 4.6832672857396345, "grad_norm": 0.7075510025024414, "learning_rate": 2.2776440316839683e-06, "loss": 0.0678, "step": 158360 }, { "epoch": 4.683563021233809, "grad_norm": 0.8953431248664856, "learning_rate": 2.2775173417650295e-06, "loss": 0.0794, "step": 158370 }, { "epoch": 4.683858756727982, "grad_norm": 0.58290034532547, "learning_rate": 2.27739065184609e-06, "loss": 0.0641, "step": 158380 }, { "epoch": 4.684154492222157, "grad_norm": 1.1520447731018066, "learning_rate": 2.2772639619271506e-06, "loss": 0.0814, "step": 158390 }, { "epoch": 4.68445022771633, "grad_norm": 0.6562836170196533, "learning_rate": 2.277137272008211e-06, "loss": 0.0627, "step": 158400 }, { "epoch": 4.684745963210505, "grad_norm": 0.7361546158790588, "learning_rate": 2.277010582089272e-06, "loss": 0.0644, "step": 158410 }, { "epoch": 4.685041698704678, "grad_norm": 0.9407923817634583, "learning_rate": 2.276883892170332e-06, "loss": 0.0699, "step": 158420 }, { "epoch": 4.6853374341988525, "grad_norm": 1.0713030099868774, "learning_rate": 2.276757202251393e-06, "loss": 0.0751, "step": 158430 }, { "epoch": 4.685633169693027, "grad_norm": 0.8672116994857788, "learning_rate": 2.2766305123324533e-06, "loss": 0.0758, "step": 158440 }, { "epoch": 4.6859289051872, "grad_norm": 0.9356014132499695, "learning_rate": 2.2765038224135145e-06, "loss": 0.0711, "step": 158450 }, { "epoch": 4.686224640681375, "grad_norm": 0.8482313752174377, "learning_rate": 2.276377132494575e-06, "loss": 0.0621, "step": 158460 }, { "epoch": 4.686520376175548, "grad_norm": 0.7207359075546265, "learning_rate": 2.2762504425756357e-06, "loss": 0.0809, "step": 158470 }, { "epoch": 4.686816111669723, "grad_norm": 0.8892361521720886, "learning_rate": 2.276123752656696e-06, "loss": 0.0685, "step": 158480 }, { "epoch": 4.687111847163896, "grad_norm": 0.9642463326454163, "learning_rate": 2.275997062737757e-06, "loss": 0.0541, "step": 158490 }, { "epoch": 4.687407582658071, "grad_norm": 0.7325391173362732, "learning_rate": 2.275870372818817e-06, "loss": 0.0553, "step": 158500 }, { "epoch": 4.687703318152245, "grad_norm": 0.971976101398468, "learning_rate": 2.275743682899878e-06, "loss": 0.0629, "step": 158510 }, { "epoch": 4.6879990536464184, "grad_norm": 1.1327546834945679, "learning_rate": 2.2756169929809384e-06, "loss": 0.08, "step": 158520 }, { "epoch": 4.688294789140593, "grad_norm": 0.8523585796356201, "learning_rate": 2.2754903030619996e-06, "loss": 0.0727, "step": 158530 }, { "epoch": 4.688590524634766, "grad_norm": 0.8846010565757751, "learning_rate": 2.27536361314306e-06, "loss": 0.0807, "step": 158540 }, { "epoch": 4.688886260128941, "grad_norm": 0.8439188003540039, "learning_rate": 2.2752369232241207e-06, "loss": 0.0548, "step": 158550 }, { "epoch": 4.689181995623114, "grad_norm": 0.7516354918479919, "learning_rate": 2.275110233305181e-06, "loss": 0.0706, "step": 158560 }, { "epoch": 4.689477731117289, "grad_norm": 0.9343413710594177, "learning_rate": 2.274983543386242e-06, "loss": 0.0801, "step": 158570 }, { "epoch": 4.689773466611463, "grad_norm": 0.9510758519172668, "learning_rate": 2.2748568534673022e-06, "loss": 0.0641, "step": 158580 }, { "epoch": 4.6900692021056365, "grad_norm": 1.0069035291671753, "learning_rate": 2.2747301635483626e-06, "loss": 0.0792, "step": 158590 }, { "epoch": 4.690364937599811, "grad_norm": 0.8456889390945435, "learning_rate": 2.2746034736294234e-06, "loss": 0.0646, "step": 158600 }, { "epoch": 4.690660673093984, "grad_norm": 1.0688259601593018, "learning_rate": 2.274476783710484e-06, "loss": 0.0626, "step": 158610 }, { "epoch": 4.690956408588159, "grad_norm": 0.695771336555481, "learning_rate": 2.274350093791545e-06, "loss": 0.0739, "step": 158620 }, { "epoch": 4.691252144082332, "grad_norm": 0.7415289282798767, "learning_rate": 2.2742234038726053e-06, "loss": 0.0724, "step": 158630 }, { "epoch": 4.691547879576507, "grad_norm": 0.7481408715248108, "learning_rate": 2.274096713953666e-06, "loss": 0.0642, "step": 158640 }, { "epoch": 4.691843615070681, "grad_norm": 0.946611225605011, "learning_rate": 2.2739700240347265e-06, "loss": 0.077, "step": 158650 }, { "epoch": 4.6921393505648545, "grad_norm": 0.7337369918823242, "learning_rate": 2.2738433341157873e-06, "loss": 0.0713, "step": 158660 }, { "epoch": 4.692435086059029, "grad_norm": 0.7973650693893433, "learning_rate": 2.2737166441968477e-06, "loss": 0.0772, "step": 158670 }, { "epoch": 4.692730821553203, "grad_norm": 1.5974324941635132, "learning_rate": 2.2735899542779085e-06, "loss": 0.0877, "step": 158680 }, { "epoch": 4.693026557047377, "grad_norm": 0.8477083444595337, "learning_rate": 2.2734632643589692e-06, "loss": 0.0602, "step": 158690 }, { "epoch": 4.693322292541551, "grad_norm": 1.1903808116912842, "learning_rate": 2.27333657444003e-06, "loss": 0.0722, "step": 158700 }, { "epoch": 4.693618028035725, "grad_norm": 1.3171303272247314, "learning_rate": 2.2732098845210904e-06, "loss": 0.0645, "step": 158710 }, { "epoch": 4.693913763529899, "grad_norm": 0.8387251496315002, "learning_rate": 2.273083194602151e-06, "loss": 0.0779, "step": 158720 }, { "epoch": 4.694209499024073, "grad_norm": 0.6384289264678955, "learning_rate": 2.2729565046832116e-06, "loss": 0.0721, "step": 158730 }, { "epoch": 4.694505234518247, "grad_norm": 1.4077558517456055, "learning_rate": 2.2728298147642723e-06, "loss": 0.0695, "step": 158740 }, { "epoch": 4.694800970012421, "grad_norm": 1.088541865348816, "learning_rate": 2.2727031248453327e-06, "loss": 0.071, "step": 158750 }, { "epoch": 4.695096705506595, "grad_norm": 0.8733797073364258, "learning_rate": 2.2725764349263935e-06, "loss": 0.066, "step": 158760 }, { "epoch": 4.695392441000769, "grad_norm": 1.0071465969085693, "learning_rate": 2.2724497450074543e-06, "loss": 0.0856, "step": 158770 }, { "epoch": 4.695688176494943, "grad_norm": 0.8282217383384705, "learning_rate": 2.272323055088515e-06, "loss": 0.0814, "step": 158780 }, { "epoch": 4.695983911989117, "grad_norm": 0.6446723341941833, "learning_rate": 2.2721963651695754e-06, "loss": 0.0599, "step": 158790 }, { "epoch": 4.696279647483291, "grad_norm": 1.0838048458099365, "learning_rate": 2.2720696752506362e-06, "loss": 0.0531, "step": 158800 }, { "epoch": 4.696575382977465, "grad_norm": 1.0531433820724487, "learning_rate": 2.2719429853316966e-06, "loss": 0.0607, "step": 158810 }, { "epoch": 4.696871118471639, "grad_norm": 1.1271159648895264, "learning_rate": 2.2718162954127574e-06, "loss": 0.0775, "step": 158820 }, { "epoch": 4.697166853965813, "grad_norm": 0.5859450101852417, "learning_rate": 2.2716896054938178e-06, "loss": 0.0739, "step": 158830 }, { "epoch": 4.697462589459987, "grad_norm": 0.6487588286399841, "learning_rate": 2.2715629155748785e-06, "loss": 0.0584, "step": 158840 }, { "epoch": 4.697758324954161, "grad_norm": 0.8273909687995911, "learning_rate": 2.2714362256559393e-06, "loss": 0.0627, "step": 158850 }, { "epoch": 4.698054060448335, "grad_norm": 0.7934644222259521, "learning_rate": 2.271309535737e-06, "loss": 0.0595, "step": 158860 }, { "epoch": 4.698349795942509, "grad_norm": 1.2106348276138306, "learning_rate": 2.2711828458180605e-06, "loss": 0.1008, "step": 158870 }, { "epoch": 4.698645531436683, "grad_norm": 1.1639602184295654, "learning_rate": 2.2710561558991213e-06, "loss": 0.07, "step": 158880 }, { "epoch": 4.6989412669308575, "grad_norm": 0.5673099160194397, "learning_rate": 2.2709294659801816e-06, "loss": 0.0657, "step": 158890 }, { "epoch": 4.699237002425031, "grad_norm": 1.1157375574111938, "learning_rate": 2.2708027760612424e-06, "loss": 0.0592, "step": 158900 }, { "epoch": 4.699532737919205, "grad_norm": 1.0612190961837769, "learning_rate": 2.270676086142303e-06, "loss": 0.0631, "step": 158910 }, { "epoch": 4.699828473413379, "grad_norm": 0.6266502141952515, "learning_rate": 2.2705493962233636e-06, "loss": 0.0758, "step": 158920 }, { "epoch": 4.700124208907553, "grad_norm": 0.814315140247345, "learning_rate": 2.2704227063044244e-06, "loss": 0.0657, "step": 158930 }, { "epoch": 4.700419944401727, "grad_norm": 0.8931986093521118, "learning_rate": 2.270296016385485e-06, "loss": 0.0601, "step": 158940 }, { "epoch": 4.700715679895901, "grad_norm": 2.3704888820648193, "learning_rate": 2.2701693264665455e-06, "loss": 0.0653, "step": 158950 }, { "epoch": 4.7010114153900755, "grad_norm": 0.8327547311782837, "learning_rate": 2.2700426365476063e-06, "loss": 0.0599, "step": 158960 }, { "epoch": 4.701307150884249, "grad_norm": 0.7200551629066467, "learning_rate": 2.2699159466286667e-06, "loss": 0.079, "step": 158970 }, { "epoch": 4.701602886378423, "grad_norm": 0.49174565076828003, "learning_rate": 2.2697892567097275e-06, "loss": 0.0591, "step": 158980 }, { "epoch": 4.701898621872597, "grad_norm": 1.707665205001831, "learning_rate": 2.269662566790788e-06, "loss": 0.0763, "step": 158990 }, { "epoch": 4.702194357366771, "grad_norm": 0.7342385053634644, "learning_rate": 2.269535876871848e-06, "loss": 0.0587, "step": 159000 }, { "epoch": 4.702490092860945, "grad_norm": 1.1129519939422607, "learning_rate": 2.2694091869529094e-06, "loss": 0.0746, "step": 159010 }, { "epoch": 4.702785828355119, "grad_norm": 0.7244213223457336, "learning_rate": 2.2692824970339698e-06, "loss": 0.0759, "step": 159020 }, { "epoch": 4.7030815638492935, "grad_norm": 0.6206242442131042, "learning_rate": 2.2691558071150306e-06, "loss": 0.0723, "step": 159030 }, { "epoch": 4.703377299343467, "grad_norm": 0.9192813634872437, "learning_rate": 2.269029117196091e-06, "loss": 0.0695, "step": 159040 }, { "epoch": 4.703673034837641, "grad_norm": 1.2257784605026245, "learning_rate": 2.2689024272771517e-06, "loss": 0.0739, "step": 159050 }, { "epoch": 4.703968770331815, "grad_norm": 1.061352252960205, "learning_rate": 2.268775737358212e-06, "loss": 0.0618, "step": 159060 }, { "epoch": 4.704264505825989, "grad_norm": 1.1975752115249634, "learning_rate": 2.268649047439273e-06, "loss": 0.0886, "step": 159070 }, { "epoch": 4.704560241320163, "grad_norm": 0.644758403301239, "learning_rate": 2.2685223575203333e-06, "loss": 0.0728, "step": 159080 }, { "epoch": 4.704855976814337, "grad_norm": 0.717074453830719, "learning_rate": 2.2683956676013945e-06, "loss": 0.074, "step": 159090 }, { "epoch": 4.705151712308512, "grad_norm": 0.8908482789993286, "learning_rate": 2.268268977682455e-06, "loss": 0.0615, "step": 159100 }, { "epoch": 4.705447447802685, "grad_norm": 0.7618628740310669, "learning_rate": 2.2681422877635156e-06, "loss": 0.0759, "step": 159110 }, { "epoch": 4.7057431832968595, "grad_norm": 0.653853178024292, "learning_rate": 2.268015597844576e-06, "loss": 0.0702, "step": 159120 }, { "epoch": 4.706038918791033, "grad_norm": 0.7130910158157349, "learning_rate": 2.2678889079256368e-06, "loss": 0.0634, "step": 159130 }, { "epoch": 4.706334654285207, "grad_norm": 0.6467068791389465, "learning_rate": 2.267762218006697e-06, "loss": 0.0735, "step": 159140 }, { "epoch": 4.706630389779381, "grad_norm": 1.0785449743270874, "learning_rate": 2.267635528087758e-06, "loss": 0.0736, "step": 159150 }, { "epoch": 4.706926125273555, "grad_norm": 1.099780797958374, "learning_rate": 2.2675088381688183e-06, "loss": 0.0645, "step": 159160 }, { "epoch": 4.70722186076773, "grad_norm": 0.857572615146637, "learning_rate": 2.2673821482498795e-06, "loss": 0.0796, "step": 159170 }, { "epoch": 4.707517596261903, "grad_norm": 0.9189552068710327, "learning_rate": 2.26725545833094e-06, "loss": 0.0784, "step": 159180 }, { "epoch": 4.7078133317560775, "grad_norm": 0.805836021900177, "learning_rate": 2.2671287684120007e-06, "loss": 0.0721, "step": 159190 }, { "epoch": 4.708109067250251, "grad_norm": 0.82172691822052, "learning_rate": 2.267002078493061e-06, "loss": 0.0655, "step": 159200 }, { "epoch": 4.708404802744425, "grad_norm": 0.8495074510574341, "learning_rate": 2.266875388574122e-06, "loss": 0.0649, "step": 159210 }, { "epoch": 4.708700538238599, "grad_norm": 0.8579456210136414, "learning_rate": 2.266748698655182e-06, "loss": 0.0605, "step": 159220 }, { "epoch": 4.708996273732773, "grad_norm": 1.7190529108047485, "learning_rate": 2.266622008736243e-06, "loss": 0.0742, "step": 159230 }, { "epoch": 4.709292009226948, "grad_norm": 1.3648027181625366, "learning_rate": 2.2664953188173033e-06, "loss": 0.0762, "step": 159240 }, { "epoch": 4.709587744721121, "grad_norm": 1.0322151184082031, "learning_rate": 2.2663686288983646e-06, "loss": 0.0612, "step": 159250 }, { "epoch": 4.709883480215296, "grad_norm": 1.1440794467926025, "learning_rate": 2.266241938979425e-06, "loss": 0.078, "step": 159260 }, { "epoch": 4.710179215709469, "grad_norm": 1.4332706928253174, "learning_rate": 2.2661152490604857e-06, "loss": 0.0723, "step": 159270 }, { "epoch": 4.7104749512036435, "grad_norm": 1.0230401754379272, "learning_rate": 2.265988559141546e-06, "loss": 0.0806, "step": 159280 }, { "epoch": 4.710770686697818, "grad_norm": 0.7686421871185303, "learning_rate": 2.265861869222607e-06, "loss": 0.0718, "step": 159290 }, { "epoch": 4.711066422191991, "grad_norm": 1.2385107278823853, "learning_rate": 2.2657351793036672e-06, "loss": 0.0657, "step": 159300 }, { "epoch": 4.711362157686166, "grad_norm": 0.8180669546127319, "learning_rate": 2.265608489384728e-06, "loss": 0.0715, "step": 159310 }, { "epoch": 4.711657893180339, "grad_norm": 0.8665921092033386, "learning_rate": 2.2654817994657884e-06, "loss": 0.0752, "step": 159320 }, { "epoch": 4.711953628674514, "grad_norm": 0.5624337792396545, "learning_rate": 2.2653551095468496e-06, "loss": 0.0653, "step": 159330 }, { "epoch": 4.712249364168688, "grad_norm": 0.9174962043762207, "learning_rate": 2.26522841962791e-06, "loss": 0.0565, "step": 159340 }, { "epoch": 4.7125450996628615, "grad_norm": 0.8149868249893188, "learning_rate": 2.2651017297089708e-06, "loss": 0.0685, "step": 159350 }, { "epoch": 4.712840835157036, "grad_norm": 1.0310977697372437, "learning_rate": 2.264975039790031e-06, "loss": 0.0674, "step": 159360 }, { "epoch": 4.713136570651209, "grad_norm": 1.6787251234054565, "learning_rate": 2.264848349871092e-06, "loss": 0.0654, "step": 159370 }, { "epoch": 4.713432306145384, "grad_norm": 1.1203209161758423, "learning_rate": 2.2647216599521523e-06, "loss": 0.0785, "step": 159380 }, { "epoch": 4.713728041639557, "grad_norm": 0.6170321702957153, "learning_rate": 2.264594970033213e-06, "loss": 0.054, "step": 159390 }, { "epoch": 4.714023777133732, "grad_norm": 0.9521638751029968, "learning_rate": 2.2644682801142734e-06, "loss": 0.0507, "step": 159400 }, { "epoch": 4.714319512627906, "grad_norm": 0.6124665141105652, "learning_rate": 2.2643415901953346e-06, "loss": 0.0652, "step": 159410 }, { "epoch": 4.7146152481220795, "grad_norm": 1.1816856861114502, "learning_rate": 2.264214900276395e-06, "loss": 0.0855, "step": 159420 }, { "epoch": 4.714910983616254, "grad_norm": 0.9254218339920044, "learning_rate": 2.2640882103574554e-06, "loss": 0.0669, "step": 159430 }, { "epoch": 4.715206719110427, "grad_norm": 1.0916991233825684, "learning_rate": 2.263961520438516e-06, "loss": 0.0636, "step": 159440 }, { "epoch": 4.715502454604602, "grad_norm": 0.9118383526802063, "learning_rate": 2.2638348305195765e-06, "loss": 0.0644, "step": 159450 }, { "epoch": 4.715798190098775, "grad_norm": 1.1291548013687134, "learning_rate": 2.2637081406006373e-06, "loss": 0.062, "step": 159460 }, { "epoch": 4.71609392559295, "grad_norm": 0.9546741843223572, "learning_rate": 2.2635814506816977e-06, "loss": 0.0818, "step": 159470 }, { "epoch": 4.716389661087124, "grad_norm": 1.0422002077102661, "learning_rate": 2.2634547607627585e-06, "loss": 0.082, "step": 159480 }, { "epoch": 4.716685396581298, "grad_norm": 0.9724335670471191, "learning_rate": 2.2633280708438193e-06, "loss": 0.067, "step": 159490 }, { "epoch": 4.716981132075472, "grad_norm": 0.6803432106971741, "learning_rate": 2.26320138092488e-06, "loss": 0.0632, "step": 159500 }, { "epoch": 4.7172768675696455, "grad_norm": 0.9624203443527222, "learning_rate": 2.2630746910059404e-06, "loss": 0.0792, "step": 159510 }, { "epoch": 4.71757260306382, "grad_norm": 0.7583451867103577, "learning_rate": 2.2629480010870012e-06, "loss": 0.0733, "step": 159520 }, { "epoch": 4.717868338557993, "grad_norm": 0.6515427827835083, "learning_rate": 2.2628213111680616e-06, "loss": 0.0626, "step": 159530 }, { "epoch": 4.718164074052168, "grad_norm": 0.780693769454956, "learning_rate": 2.2626946212491224e-06, "loss": 0.0802, "step": 159540 }, { "epoch": 4.718459809546342, "grad_norm": 2.032557487487793, "learning_rate": 2.2625679313301827e-06, "loss": 0.0699, "step": 159550 }, { "epoch": 4.718755545040516, "grad_norm": 0.9460805654525757, "learning_rate": 2.2624412414112435e-06, "loss": 0.0614, "step": 159560 }, { "epoch": 4.71905128053469, "grad_norm": 0.9977855682373047, "learning_rate": 2.2623145514923043e-06, "loss": 0.0824, "step": 159570 }, { "epoch": 4.7193470160288635, "grad_norm": 1.3570502996444702, "learning_rate": 2.262187861573365e-06, "loss": 0.0791, "step": 159580 }, { "epoch": 4.719642751523038, "grad_norm": 0.8675847053527832, "learning_rate": 2.2620611716544255e-06, "loss": 0.057, "step": 159590 }, { "epoch": 4.719938487017211, "grad_norm": 1.1333531141281128, "learning_rate": 2.2619344817354863e-06, "loss": 0.0709, "step": 159600 }, { "epoch": 4.720234222511386, "grad_norm": 1.4460710287094116, "learning_rate": 2.2618077918165466e-06, "loss": 0.0758, "step": 159610 }, { "epoch": 4.72052995800556, "grad_norm": 0.9277278780937195, "learning_rate": 2.2616811018976074e-06, "loss": 0.0777, "step": 159620 }, { "epoch": 4.720825693499734, "grad_norm": 0.7104461193084717, "learning_rate": 2.2615544119786678e-06, "loss": 0.0617, "step": 159630 }, { "epoch": 4.721121428993908, "grad_norm": 1.0150351524353027, "learning_rate": 2.2614277220597286e-06, "loss": 0.0762, "step": 159640 }, { "epoch": 4.721417164488082, "grad_norm": 0.7171101570129395, "learning_rate": 2.2613010321407894e-06, "loss": 0.0576, "step": 159650 }, { "epoch": 4.721712899982256, "grad_norm": 0.8380858302116394, "learning_rate": 2.26117434222185e-06, "loss": 0.0463, "step": 159660 }, { "epoch": 4.7220086354764295, "grad_norm": 1.037177324295044, "learning_rate": 2.2610476523029105e-06, "loss": 0.0673, "step": 159670 }, { "epoch": 4.722304370970604, "grad_norm": 1.051541805267334, "learning_rate": 2.2609209623839713e-06, "loss": 0.0531, "step": 159680 }, { "epoch": 4.722600106464778, "grad_norm": 1.3241071701049805, "learning_rate": 2.2607942724650317e-06, "loss": 0.0628, "step": 159690 }, { "epoch": 4.722895841958952, "grad_norm": 1.1372827291488647, "learning_rate": 2.2606675825460925e-06, "loss": 0.0729, "step": 159700 }, { "epoch": 4.723191577453126, "grad_norm": 1.3004217147827148, "learning_rate": 2.260540892627153e-06, "loss": 0.0745, "step": 159710 }, { "epoch": 4.7234873129473, "grad_norm": 0.9729959964752197, "learning_rate": 2.2604142027082136e-06, "loss": 0.0757, "step": 159720 }, { "epoch": 4.723783048441474, "grad_norm": 2.397350549697876, "learning_rate": 2.2602875127892744e-06, "loss": 0.074, "step": 159730 }, { "epoch": 4.7240787839356475, "grad_norm": 0.8630455136299133, "learning_rate": 2.260160822870335e-06, "loss": 0.0642, "step": 159740 }, { "epoch": 4.724374519429822, "grad_norm": 0.4584854543209076, "learning_rate": 2.2600341329513956e-06, "loss": 0.0677, "step": 159750 }, { "epoch": 4.724670254923996, "grad_norm": 0.8190386891365051, "learning_rate": 2.2599074430324564e-06, "loss": 0.0629, "step": 159760 }, { "epoch": 4.72496599041817, "grad_norm": 0.6683382391929626, "learning_rate": 2.2597807531135167e-06, "loss": 0.0665, "step": 159770 }, { "epoch": 4.725261725912344, "grad_norm": 1.0130963325500488, "learning_rate": 2.2596540631945775e-06, "loss": 0.0783, "step": 159780 }, { "epoch": 4.725557461406518, "grad_norm": 0.4871290624141693, "learning_rate": 2.259527373275638e-06, "loss": 0.0525, "step": 159790 }, { "epoch": 4.725853196900692, "grad_norm": 0.45860373973846436, "learning_rate": 2.2594006833566987e-06, "loss": 0.0653, "step": 159800 }, { "epoch": 4.7261489323948656, "grad_norm": 1.4159739017486572, "learning_rate": 2.2592739934377595e-06, "loss": 0.0591, "step": 159810 }, { "epoch": 4.72644466788904, "grad_norm": 1.0914427042007446, "learning_rate": 2.2591473035188202e-06, "loss": 0.0728, "step": 159820 }, { "epoch": 4.726740403383214, "grad_norm": 1.2087817192077637, "learning_rate": 2.2590206135998806e-06, "loss": 0.0869, "step": 159830 }, { "epoch": 4.727036138877388, "grad_norm": 1.0185974836349487, "learning_rate": 2.258893923680941e-06, "loss": 0.0883, "step": 159840 }, { "epoch": 4.727331874371562, "grad_norm": 0.7482290267944336, "learning_rate": 2.2587672337620018e-06, "loss": 0.0617, "step": 159850 }, { "epoch": 4.727627609865736, "grad_norm": 0.8471884727478027, "learning_rate": 2.258640543843062e-06, "loss": 0.0574, "step": 159860 }, { "epoch": 4.72792334535991, "grad_norm": 0.624765932559967, "learning_rate": 2.258513853924123e-06, "loss": 0.0644, "step": 159870 }, { "epoch": 4.7282190808540845, "grad_norm": 0.8229817748069763, "learning_rate": 2.2583871640051833e-06, "loss": 0.0857, "step": 159880 }, { "epoch": 4.728514816348258, "grad_norm": 0.9489980340003967, "learning_rate": 2.2582604740862445e-06, "loss": 0.0706, "step": 159890 }, { "epoch": 4.728810551842432, "grad_norm": 0.9301343560218811, "learning_rate": 2.258133784167305e-06, "loss": 0.052, "step": 159900 }, { "epoch": 4.729106287336606, "grad_norm": 0.9116297960281372, "learning_rate": 2.2580070942483657e-06, "loss": 0.0664, "step": 159910 }, { "epoch": 4.72940202283078, "grad_norm": 1.4127835035324097, "learning_rate": 2.257880404329426e-06, "loss": 0.0777, "step": 159920 }, { "epoch": 4.729697758324955, "grad_norm": 1.0138580799102783, "learning_rate": 2.257753714410487e-06, "loss": 0.0783, "step": 159930 }, { "epoch": 4.729993493819128, "grad_norm": 0.957044243812561, "learning_rate": 2.257627024491547e-06, "loss": 0.0824, "step": 159940 }, { "epoch": 4.7302892293133025, "grad_norm": 0.6963719725608826, "learning_rate": 2.257500334572608e-06, "loss": 0.0623, "step": 159950 }, { "epoch": 4.730584964807476, "grad_norm": 0.7161269187927246, "learning_rate": 2.2573736446536683e-06, "loss": 0.0575, "step": 159960 }, { "epoch": 4.73088070030165, "grad_norm": 0.7569338083267212, "learning_rate": 2.2572469547347295e-06, "loss": 0.0837, "step": 159970 }, { "epoch": 4.731176435795824, "grad_norm": 1.306800365447998, "learning_rate": 2.25712026481579e-06, "loss": 0.0695, "step": 159980 }, { "epoch": 4.731472171289998, "grad_norm": 1.003599762916565, "learning_rate": 2.2569935748968507e-06, "loss": 0.0702, "step": 159990 }, { "epoch": 4.731767906784173, "grad_norm": 0.689200222492218, "learning_rate": 2.256866884977911e-06, "loss": 0.0606, "step": 160000 }, { "epoch": 4.732063642278346, "grad_norm": 0.5745123028755188, "learning_rate": 2.256740195058972e-06, "loss": 0.0693, "step": 160010 }, { "epoch": 4.732359377772521, "grad_norm": 1.1270796060562134, "learning_rate": 2.2566135051400322e-06, "loss": 0.0783, "step": 160020 }, { "epoch": 4.732655113266694, "grad_norm": 0.7488618493080139, "learning_rate": 2.256486815221093e-06, "loss": 0.0751, "step": 160030 }, { "epoch": 4.7329508487608685, "grad_norm": 0.36717689037323, "learning_rate": 2.2563601253021534e-06, "loss": 0.0616, "step": 160040 }, { "epoch": 4.733246584255042, "grad_norm": 1.129719614982605, "learning_rate": 2.2562334353832146e-06, "loss": 0.0507, "step": 160050 }, { "epoch": 4.733542319749216, "grad_norm": 0.9532378315925598, "learning_rate": 2.256106745464275e-06, "loss": 0.0596, "step": 160060 }, { "epoch": 4.733838055243391, "grad_norm": 0.7873300313949585, "learning_rate": 2.2559800555453357e-06, "loss": 0.0857, "step": 160070 }, { "epoch": 4.734133790737564, "grad_norm": 1.2768921852111816, "learning_rate": 2.255853365626396e-06, "loss": 0.0773, "step": 160080 }, { "epoch": 4.734429526231739, "grad_norm": 0.7887033820152283, "learning_rate": 2.255726675707457e-06, "loss": 0.0735, "step": 160090 }, { "epoch": 4.734725261725912, "grad_norm": 0.7704172730445862, "learning_rate": 2.2555999857885173e-06, "loss": 0.0695, "step": 160100 }, { "epoch": 4.7350209972200865, "grad_norm": 1.3416692018508911, "learning_rate": 2.255473295869578e-06, "loss": 0.0882, "step": 160110 }, { "epoch": 4.73531673271426, "grad_norm": 0.845443069934845, "learning_rate": 2.2553466059506384e-06, "loss": 0.0864, "step": 160120 }, { "epoch": 4.735612468208434, "grad_norm": 1.9293121099472046, "learning_rate": 2.2552199160316996e-06, "loss": 0.0709, "step": 160130 }, { "epoch": 4.735908203702609, "grad_norm": 0.6609821319580078, "learning_rate": 2.25509322611276e-06, "loss": 0.0721, "step": 160140 }, { "epoch": 4.736203939196782, "grad_norm": 0.42428016662597656, "learning_rate": 2.2549665361938208e-06, "loss": 0.0551, "step": 160150 }, { "epoch": 4.736499674690957, "grad_norm": 0.7807528376579285, "learning_rate": 2.254839846274881e-06, "loss": 0.0782, "step": 160160 }, { "epoch": 4.73679541018513, "grad_norm": 1.275065541267395, "learning_rate": 2.254713156355942e-06, "loss": 0.0812, "step": 160170 }, { "epoch": 4.737091145679305, "grad_norm": 1.338069200515747, "learning_rate": 2.2545864664370023e-06, "loss": 0.0829, "step": 160180 }, { "epoch": 4.737386881173478, "grad_norm": 0.5959751009941101, "learning_rate": 2.254459776518063e-06, "loss": 0.065, "step": 160190 }, { "epoch": 4.7376826166676524, "grad_norm": 0.6579450964927673, "learning_rate": 2.2543330865991235e-06, "loss": 0.0613, "step": 160200 }, { "epoch": 4.737978352161827, "grad_norm": 0.7023327946662903, "learning_rate": 2.2542063966801847e-06, "loss": 0.0567, "step": 160210 }, { "epoch": 4.738274087656, "grad_norm": 0.825951099395752, "learning_rate": 2.254079706761245e-06, "loss": 0.0975, "step": 160220 }, { "epoch": 4.738569823150175, "grad_norm": 0.8465480208396912, "learning_rate": 2.253953016842306e-06, "loss": 0.0686, "step": 160230 }, { "epoch": 4.738865558644348, "grad_norm": 0.7287740111351013, "learning_rate": 2.253826326923366e-06, "loss": 0.0677, "step": 160240 }, { "epoch": 4.739161294138523, "grad_norm": 1.1740506887435913, "learning_rate": 2.2536996370044266e-06, "loss": 0.0702, "step": 160250 }, { "epoch": 4.739457029632696, "grad_norm": 0.9716889262199402, "learning_rate": 2.2535729470854874e-06, "loss": 0.0707, "step": 160260 }, { "epoch": 4.7397527651268705, "grad_norm": 0.7831478714942932, "learning_rate": 2.2534462571665477e-06, "loss": 0.0793, "step": 160270 }, { "epoch": 4.740048500621045, "grad_norm": 0.7012069821357727, "learning_rate": 2.2533195672476085e-06, "loss": 0.0652, "step": 160280 }, { "epoch": 4.740344236115218, "grad_norm": 0.7916865348815918, "learning_rate": 2.2531928773286693e-06, "loss": 0.0676, "step": 160290 }, { "epoch": 4.740639971609393, "grad_norm": 0.9114116430282593, "learning_rate": 2.25306618740973e-06, "loss": 0.0602, "step": 160300 }, { "epoch": 4.740935707103566, "grad_norm": 0.9162945747375488, "learning_rate": 2.2529394974907905e-06, "loss": 0.0609, "step": 160310 }, { "epoch": 4.741231442597741, "grad_norm": 1.2973614931106567, "learning_rate": 2.2528128075718512e-06, "loss": 0.0723, "step": 160320 }, { "epoch": 4.741527178091914, "grad_norm": 0.9269322752952576, "learning_rate": 2.2526861176529116e-06, "loss": 0.0734, "step": 160330 }, { "epoch": 4.7418229135860885, "grad_norm": 0.59889817237854, "learning_rate": 2.2525594277339724e-06, "loss": 0.0719, "step": 160340 }, { "epoch": 4.742118649080263, "grad_norm": 1.3753570318222046, "learning_rate": 2.2524327378150328e-06, "loss": 0.0707, "step": 160350 }, { "epoch": 4.742414384574436, "grad_norm": 1.0276864767074585, "learning_rate": 2.2523060478960936e-06, "loss": 0.0704, "step": 160360 }, { "epoch": 4.742710120068611, "grad_norm": 0.6375560164451599, "learning_rate": 2.2521793579771543e-06, "loss": 0.0701, "step": 160370 }, { "epoch": 4.743005855562784, "grad_norm": 0.7800917029380798, "learning_rate": 2.252052668058215e-06, "loss": 0.0678, "step": 160380 }, { "epoch": 4.743301591056959, "grad_norm": 1.5730020999908447, "learning_rate": 2.2519259781392755e-06, "loss": 0.0611, "step": 160390 }, { "epoch": 4.743597326551132, "grad_norm": 0.8668614625930786, "learning_rate": 2.2517992882203363e-06, "loss": 0.0787, "step": 160400 }, { "epoch": 4.743893062045307, "grad_norm": 0.8016195893287659, "learning_rate": 2.2516725983013967e-06, "loss": 0.0736, "step": 160410 }, { "epoch": 4.744188797539481, "grad_norm": 0.8135762214660645, "learning_rate": 2.2515459083824574e-06, "loss": 0.0767, "step": 160420 }, { "epoch": 4.7444845330336545, "grad_norm": 0.868047297000885, "learning_rate": 2.251419218463518e-06, "loss": 0.0631, "step": 160430 }, { "epoch": 4.744780268527829, "grad_norm": 0.8438490629196167, "learning_rate": 2.2512925285445786e-06, "loss": 0.0834, "step": 160440 }, { "epoch": 4.745076004022002, "grad_norm": 0.8679684400558472, "learning_rate": 2.2511658386256394e-06, "loss": 0.0576, "step": 160450 }, { "epoch": 4.745371739516177, "grad_norm": 1.2795912027359009, "learning_rate": 2.2510391487067e-06, "loss": 0.0843, "step": 160460 }, { "epoch": 4.74566747501035, "grad_norm": 0.8101442456245422, "learning_rate": 2.2509124587877605e-06, "loss": 0.0744, "step": 160470 }, { "epoch": 4.745963210504525, "grad_norm": 0.9979054927825928, "learning_rate": 2.2507857688688213e-06, "loss": 0.0665, "step": 160480 }, { "epoch": 4.746258945998699, "grad_norm": 1.3689285516738892, "learning_rate": 2.2506590789498817e-06, "loss": 0.0659, "step": 160490 }, { "epoch": 4.7465546814928725, "grad_norm": 0.6454658508300781, "learning_rate": 2.2505323890309425e-06, "loss": 0.0598, "step": 160500 }, { "epoch": 4.746850416987047, "grad_norm": 2.242586612701416, "learning_rate": 2.250405699112003e-06, "loss": 0.0701, "step": 160510 }, { "epoch": 4.747146152481221, "grad_norm": 1.1645351648330688, "learning_rate": 2.2502790091930636e-06, "loss": 0.0717, "step": 160520 }, { "epoch": 4.747441887975395, "grad_norm": 0.8188661336898804, "learning_rate": 2.2501523192741244e-06, "loss": 0.0761, "step": 160530 }, { "epoch": 4.747737623469569, "grad_norm": 0.6474679112434387, "learning_rate": 2.2500256293551852e-06, "loss": 0.0644, "step": 160540 }, { "epoch": 4.748033358963743, "grad_norm": 0.8741417527198792, "learning_rate": 2.2498989394362456e-06, "loss": 0.0663, "step": 160550 }, { "epoch": 4.748329094457917, "grad_norm": 0.7312256097793579, "learning_rate": 2.2497722495173064e-06, "loss": 0.0514, "step": 160560 }, { "epoch": 4.748624829952091, "grad_norm": 0.7747955918312073, "learning_rate": 2.2496455595983667e-06, "loss": 0.0763, "step": 160570 }, { "epoch": 4.748920565446265, "grad_norm": 1.0135151147842407, "learning_rate": 2.2495188696794275e-06, "loss": 0.0676, "step": 160580 }, { "epoch": 4.749216300940439, "grad_norm": 0.9892878532409668, "learning_rate": 2.249392179760488e-06, "loss": 0.0707, "step": 160590 }, { "epoch": 4.749512036434613, "grad_norm": 1.6727036237716675, "learning_rate": 2.2492654898415487e-06, "loss": 0.0545, "step": 160600 }, { "epoch": 4.749807771928787, "grad_norm": 1.0027976036071777, "learning_rate": 2.2491387999226095e-06, "loss": 0.08, "step": 160610 }, { "epoch": 4.750103507422961, "grad_norm": 0.6841374635696411, "learning_rate": 2.2490121100036703e-06, "loss": 0.0779, "step": 160620 }, { "epoch": 4.750399242917135, "grad_norm": 0.6843174695968628, "learning_rate": 2.2488854200847306e-06, "loss": 0.078, "step": 160630 }, { "epoch": 4.750694978411309, "grad_norm": 0.8181635141372681, "learning_rate": 2.2487587301657914e-06, "loss": 0.0658, "step": 160640 }, { "epoch": 4.750990713905483, "grad_norm": 0.8834363222122192, "learning_rate": 2.248632040246852e-06, "loss": 0.0571, "step": 160650 }, { "epoch": 4.751286449399657, "grad_norm": 1.271567940711975, "learning_rate": 2.248505350327912e-06, "loss": 0.0665, "step": 160660 }, { "epoch": 4.751582184893831, "grad_norm": 1.2253564596176147, "learning_rate": 2.248378660408973e-06, "loss": 0.0816, "step": 160670 }, { "epoch": 4.751877920388005, "grad_norm": 0.8962602019309998, "learning_rate": 2.2482519704900333e-06, "loss": 0.0771, "step": 160680 }, { "epoch": 4.752173655882179, "grad_norm": 0.6831992268562317, "learning_rate": 2.2481252805710945e-06, "loss": 0.0667, "step": 160690 }, { "epoch": 4.752469391376353, "grad_norm": 1.0398777723312378, "learning_rate": 2.247998590652155e-06, "loss": 0.0618, "step": 160700 }, { "epoch": 4.752765126870527, "grad_norm": 0.8797245621681213, "learning_rate": 2.2478719007332157e-06, "loss": 0.0641, "step": 160710 }, { "epoch": 4.753060862364701, "grad_norm": 0.9334527254104614, "learning_rate": 2.247745210814276e-06, "loss": 0.0868, "step": 160720 }, { "epoch": 4.753356597858875, "grad_norm": 1.1897125244140625, "learning_rate": 2.247618520895337e-06, "loss": 0.079, "step": 160730 }, { "epoch": 4.753652333353049, "grad_norm": 0.6382526159286499, "learning_rate": 2.247491830976397e-06, "loss": 0.0668, "step": 160740 }, { "epoch": 4.753948068847223, "grad_norm": 0.7532276511192322, "learning_rate": 2.247365141057458e-06, "loss": 0.0703, "step": 160750 }, { "epoch": 4.754243804341397, "grad_norm": 1.2003649473190308, "learning_rate": 2.2472384511385184e-06, "loss": 0.067, "step": 160760 }, { "epoch": 4.754539539835571, "grad_norm": 1.0162571668624878, "learning_rate": 2.2471117612195796e-06, "loss": 0.0709, "step": 160770 }, { "epoch": 4.754835275329745, "grad_norm": 0.8221041560173035, "learning_rate": 2.24698507130064e-06, "loss": 0.0683, "step": 160780 }, { "epoch": 4.755131010823919, "grad_norm": 1.5804067850112915, "learning_rate": 2.2468583813817007e-06, "loss": 0.0654, "step": 160790 }, { "epoch": 4.7554267463180935, "grad_norm": 3.4770052433013916, "learning_rate": 2.246731691462761e-06, "loss": 0.0626, "step": 160800 }, { "epoch": 4.755722481812267, "grad_norm": 0.6785109639167786, "learning_rate": 2.246605001543822e-06, "loss": 0.0685, "step": 160810 }, { "epoch": 4.756018217306441, "grad_norm": 0.8853347301483154, "learning_rate": 2.2464783116248822e-06, "loss": 0.0848, "step": 160820 }, { "epoch": 4.756313952800615, "grad_norm": 0.7527301907539368, "learning_rate": 2.246351621705943e-06, "loss": 0.0615, "step": 160830 }, { "epoch": 4.756609688294789, "grad_norm": 0.5080953240394592, "learning_rate": 2.2462249317870034e-06, "loss": 0.0755, "step": 160840 }, { "epoch": 4.756905423788963, "grad_norm": 1.0196171998977661, "learning_rate": 2.2460982418680646e-06, "loss": 0.0714, "step": 160850 }, { "epoch": 4.757201159283137, "grad_norm": 1.9066094160079956, "learning_rate": 2.245971551949125e-06, "loss": 0.0791, "step": 160860 }, { "epoch": 4.7574968947773115, "grad_norm": 0.6608265042304993, "learning_rate": 2.2458448620301858e-06, "loss": 0.085, "step": 160870 }, { "epoch": 4.757792630271485, "grad_norm": 0.975715160369873, "learning_rate": 2.245718172111246e-06, "loss": 0.0727, "step": 160880 }, { "epoch": 4.758088365765659, "grad_norm": 1.6081063747406006, "learning_rate": 2.245591482192307e-06, "loss": 0.0678, "step": 160890 }, { "epoch": 4.758384101259833, "grad_norm": 0.661027193069458, "learning_rate": 2.2454647922733673e-06, "loss": 0.0606, "step": 160900 }, { "epoch": 4.758679836754007, "grad_norm": 0.8790421485900879, "learning_rate": 2.245338102354428e-06, "loss": 0.0657, "step": 160910 }, { "epoch": 4.758975572248181, "grad_norm": 0.8948742747306824, "learning_rate": 2.2452114124354884e-06, "loss": 0.0686, "step": 160920 }, { "epoch": 4.759271307742355, "grad_norm": 0.679920494556427, "learning_rate": 2.2450847225165497e-06, "loss": 0.0838, "step": 160930 }, { "epoch": 4.75956704323653, "grad_norm": 0.9312638640403748, "learning_rate": 2.24495803259761e-06, "loss": 0.0691, "step": 160940 }, { "epoch": 4.759862778730703, "grad_norm": 1.0499660968780518, "learning_rate": 2.244831342678671e-06, "loss": 0.0669, "step": 160950 }, { "epoch": 4.7601585142248775, "grad_norm": 0.9647449254989624, "learning_rate": 2.244704652759731e-06, "loss": 0.0714, "step": 160960 }, { "epoch": 4.760454249719051, "grad_norm": 0.8070092797279358, "learning_rate": 2.244577962840792e-06, "loss": 0.0788, "step": 160970 }, { "epoch": 4.760749985213225, "grad_norm": 0.9175090789794922, "learning_rate": 2.2444512729218523e-06, "loss": 0.0622, "step": 160980 }, { "epoch": 4.761045720707399, "grad_norm": 0.6480469107627869, "learning_rate": 2.244324583002913e-06, "loss": 0.0702, "step": 160990 }, { "epoch": 4.761341456201573, "grad_norm": 0.9470797181129456, "learning_rate": 2.2441978930839735e-06, "loss": 0.0426, "step": 161000 }, { "epoch": 4.761637191695748, "grad_norm": 0.8803773522377014, "learning_rate": 2.2440712031650347e-06, "loss": 0.0635, "step": 161010 }, { "epoch": 4.761932927189921, "grad_norm": 1.3286302089691162, "learning_rate": 2.243944513246095e-06, "loss": 0.0735, "step": 161020 }, { "epoch": 4.7622286626840955, "grad_norm": 1.031272053718567, "learning_rate": 2.243817823327156e-06, "loss": 0.0658, "step": 161030 }, { "epoch": 4.762524398178269, "grad_norm": 0.591622531414032, "learning_rate": 2.2436911334082162e-06, "loss": 0.0644, "step": 161040 }, { "epoch": 4.762820133672443, "grad_norm": 1.0142722129821777, "learning_rate": 2.243564443489277e-06, "loss": 0.0675, "step": 161050 }, { "epoch": 4.763115869166617, "grad_norm": 1.402653694152832, "learning_rate": 2.2434377535703374e-06, "loss": 0.0819, "step": 161060 }, { "epoch": 4.763411604660791, "grad_norm": 0.8825052380561829, "learning_rate": 2.2433110636513978e-06, "loss": 0.0718, "step": 161070 }, { "epoch": 4.763707340154966, "grad_norm": 0.6534925103187561, "learning_rate": 2.2431843737324585e-06, "loss": 0.062, "step": 161080 }, { "epoch": 4.764003075649139, "grad_norm": 0.7470347881317139, "learning_rate": 2.2430576838135193e-06, "loss": 0.0683, "step": 161090 }, { "epoch": 4.7642988111433136, "grad_norm": 0.45685404539108276, "learning_rate": 2.24293099389458e-06, "loss": 0.0539, "step": 161100 }, { "epoch": 4.764594546637487, "grad_norm": 1.2064355611801147, "learning_rate": 2.2428043039756405e-06, "loss": 0.0761, "step": 161110 }, { "epoch": 4.764890282131661, "grad_norm": 0.939227819442749, "learning_rate": 2.2426776140567013e-06, "loss": 0.0645, "step": 161120 }, { "epoch": 4.765186017625836, "grad_norm": 0.5267946720123291, "learning_rate": 2.2425509241377616e-06, "loss": 0.0713, "step": 161130 }, { "epoch": 4.765481753120009, "grad_norm": 0.7965307235717773, "learning_rate": 2.2424242342188224e-06, "loss": 0.0722, "step": 161140 }, { "epoch": 4.765777488614184, "grad_norm": 0.6408495903015137, "learning_rate": 2.242297544299883e-06, "loss": 0.0486, "step": 161150 }, { "epoch": 4.766073224108357, "grad_norm": 1.0629897117614746, "learning_rate": 2.2421708543809436e-06, "loss": 0.0601, "step": 161160 }, { "epoch": 4.766368959602532, "grad_norm": 0.9837272763252258, "learning_rate": 2.2420441644620044e-06, "loss": 0.0768, "step": 161170 }, { "epoch": 4.766664695096706, "grad_norm": 0.7913024425506592, "learning_rate": 2.241917474543065e-06, "loss": 0.0646, "step": 161180 }, { "epoch": 4.7669604305908795, "grad_norm": 1.160496473312378, "learning_rate": 2.2417907846241255e-06, "loss": 0.0787, "step": 161190 }, { "epoch": 4.767256166085054, "grad_norm": 0.7631476521492004, "learning_rate": 2.2416640947051863e-06, "loss": 0.056, "step": 161200 }, { "epoch": 4.767551901579227, "grad_norm": 1.3763494491577148, "learning_rate": 2.2415374047862467e-06, "loss": 0.0736, "step": 161210 }, { "epoch": 4.767847637073402, "grad_norm": 0.9052330851554871, "learning_rate": 2.2414107148673075e-06, "loss": 0.0747, "step": 161220 }, { "epoch": 4.768143372567575, "grad_norm": 1.1707786321640015, "learning_rate": 2.241284024948368e-06, "loss": 0.0812, "step": 161230 }, { "epoch": 4.76843910806175, "grad_norm": 0.7653310298919678, "learning_rate": 2.2411573350294286e-06, "loss": 0.0831, "step": 161240 }, { "epoch": 4.768734843555924, "grad_norm": 0.8540863990783691, "learning_rate": 2.2410306451104894e-06, "loss": 0.0659, "step": 161250 }, { "epoch": 4.7690305790500975, "grad_norm": 1.009584903717041, "learning_rate": 2.24090395519155e-06, "loss": 0.0613, "step": 161260 }, { "epoch": 4.769326314544272, "grad_norm": 1.375293493270874, "learning_rate": 2.2407772652726106e-06, "loss": 0.0769, "step": 161270 }, { "epoch": 4.769622050038445, "grad_norm": 1.527741551399231, "learning_rate": 2.2406505753536714e-06, "loss": 0.065, "step": 161280 }, { "epoch": 4.76991778553262, "grad_norm": 0.5440224409103394, "learning_rate": 2.2405238854347317e-06, "loss": 0.0665, "step": 161290 }, { "epoch": 4.770213521026793, "grad_norm": 0.9642772674560547, "learning_rate": 2.2403971955157925e-06, "loss": 0.0537, "step": 161300 }, { "epoch": 4.770509256520968, "grad_norm": 0.9394444227218628, "learning_rate": 2.240270505596853e-06, "loss": 0.0562, "step": 161310 }, { "epoch": 4.770804992015142, "grad_norm": 1.210274577140808, "learning_rate": 2.2401438156779137e-06, "loss": 0.0926, "step": 161320 }, { "epoch": 4.771100727509316, "grad_norm": 0.8636598587036133, "learning_rate": 2.2400171257589745e-06, "loss": 0.0747, "step": 161330 }, { "epoch": 4.77139646300349, "grad_norm": 0.6570806503295898, "learning_rate": 2.2398904358400353e-06, "loss": 0.0628, "step": 161340 }, { "epoch": 4.7716921984976635, "grad_norm": 0.8682769536972046, "learning_rate": 2.2397637459210956e-06, "loss": 0.0641, "step": 161350 }, { "epoch": 4.771987933991838, "grad_norm": 1.2039109468460083, "learning_rate": 2.2396370560021564e-06, "loss": 0.0754, "step": 161360 }, { "epoch": 4.772283669486011, "grad_norm": 1.1540400981903076, "learning_rate": 2.2395103660832168e-06, "loss": 0.0743, "step": 161370 }, { "epoch": 4.772579404980186, "grad_norm": 0.815529465675354, "learning_rate": 2.2393836761642776e-06, "loss": 0.0794, "step": 161380 }, { "epoch": 4.77287514047436, "grad_norm": 0.8817587494850159, "learning_rate": 2.239256986245338e-06, "loss": 0.0548, "step": 161390 }, { "epoch": 4.773170875968534, "grad_norm": 0.5632975697517395, "learning_rate": 2.2391302963263987e-06, "loss": 0.0624, "step": 161400 }, { "epoch": 4.773466611462708, "grad_norm": 0.7918719053268433, "learning_rate": 2.2390036064074595e-06, "loss": 0.0638, "step": 161410 }, { "epoch": 4.7737623469568815, "grad_norm": 0.8509530425071716, "learning_rate": 2.2388769164885203e-06, "loss": 0.0638, "step": 161420 }, { "epoch": 4.774058082451056, "grad_norm": 1.1639374494552612, "learning_rate": 2.2387502265695807e-06, "loss": 0.0729, "step": 161430 }, { "epoch": 4.774353817945229, "grad_norm": 0.8973472714424133, "learning_rate": 2.2386235366506415e-06, "loss": 0.0732, "step": 161440 }, { "epoch": 4.774649553439404, "grad_norm": 1.3171504735946655, "learning_rate": 2.238496846731702e-06, "loss": 0.0747, "step": 161450 }, { "epoch": 4.774945288933578, "grad_norm": 1.0198615789413452, "learning_rate": 2.2383701568127626e-06, "loss": 0.0569, "step": 161460 }, { "epoch": 4.775241024427752, "grad_norm": 1.4129605293273926, "learning_rate": 2.238243466893823e-06, "loss": 0.0785, "step": 161470 }, { "epoch": 4.775536759921926, "grad_norm": 0.8528056740760803, "learning_rate": 2.2381167769748833e-06, "loss": 0.0684, "step": 161480 }, { "epoch": 4.7758324954160996, "grad_norm": 0.835696816444397, "learning_rate": 2.2379900870559446e-06, "loss": 0.0713, "step": 161490 }, { "epoch": 4.776128230910274, "grad_norm": 1.0265288352966309, "learning_rate": 2.237863397137005e-06, "loss": 0.0756, "step": 161500 }, { "epoch": 4.776423966404447, "grad_norm": 1.0152372121810913, "learning_rate": 2.2377367072180657e-06, "loss": 0.0561, "step": 161510 }, { "epoch": 4.776719701898622, "grad_norm": 0.9155030846595764, "learning_rate": 2.237610017299126e-06, "loss": 0.0621, "step": 161520 }, { "epoch": 4.777015437392796, "grad_norm": 0.7673748731613159, "learning_rate": 2.237483327380187e-06, "loss": 0.085, "step": 161530 }, { "epoch": 4.77731117288697, "grad_norm": 1.0308986902236938, "learning_rate": 2.2373566374612472e-06, "loss": 0.0766, "step": 161540 }, { "epoch": 4.777606908381144, "grad_norm": 1.479197382926941, "learning_rate": 2.237229947542308e-06, "loss": 0.052, "step": 161550 }, { "epoch": 4.777902643875318, "grad_norm": 0.9314952492713928, "learning_rate": 2.2371032576233684e-06, "loss": 0.0796, "step": 161560 }, { "epoch": 4.778198379369492, "grad_norm": 0.6535501480102539, "learning_rate": 2.2369765677044296e-06, "loss": 0.0696, "step": 161570 }, { "epoch": 4.7784941148636655, "grad_norm": 0.6439569592475891, "learning_rate": 2.23684987778549e-06, "loss": 0.0613, "step": 161580 }, { "epoch": 4.77878985035784, "grad_norm": 0.5406950116157532, "learning_rate": 2.2367231878665508e-06, "loss": 0.0712, "step": 161590 }, { "epoch": 4.779085585852014, "grad_norm": 0.9233195781707764, "learning_rate": 2.236596497947611e-06, "loss": 0.0672, "step": 161600 }, { "epoch": 4.779381321346188, "grad_norm": 0.8968027234077454, "learning_rate": 2.236469808028672e-06, "loss": 0.0594, "step": 161610 }, { "epoch": 4.779677056840362, "grad_norm": 1.6926133632659912, "learning_rate": 2.2363431181097323e-06, "loss": 0.0881, "step": 161620 }, { "epoch": 4.779972792334536, "grad_norm": 0.9669937491416931, "learning_rate": 2.236216428190793e-06, "loss": 0.0856, "step": 161630 }, { "epoch": 4.78026852782871, "grad_norm": 0.5912455916404724, "learning_rate": 2.2360897382718534e-06, "loss": 0.0609, "step": 161640 }, { "epoch": 4.7805642633228835, "grad_norm": 0.8071913123130798, "learning_rate": 2.2359630483529146e-06, "loss": 0.0623, "step": 161650 }, { "epoch": 4.780859998817058, "grad_norm": 1.2322301864624023, "learning_rate": 2.235836358433975e-06, "loss": 0.0667, "step": 161660 }, { "epoch": 4.781155734311232, "grad_norm": 0.9616738557815552, "learning_rate": 2.235709668515036e-06, "loss": 0.0851, "step": 161670 }, { "epoch": 4.781451469805406, "grad_norm": 1.0544220209121704, "learning_rate": 2.235582978596096e-06, "loss": 0.0684, "step": 161680 }, { "epoch": 4.78174720529958, "grad_norm": 1.2717680931091309, "learning_rate": 2.235456288677157e-06, "loss": 0.0557, "step": 161690 }, { "epoch": 4.782042940793754, "grad_norm": 0.48338064551353455, "learning_rate": 2.2353295987582173e-06, "loss": 0.0518, "step": 161700 }, { "epoch": 4.782338676287928, "grad_norm": 0.8474752902984619, "learning_rate": 2.235202908839278e-06, "loss": 0.0503, "step": 161710 }, { "epoch": 4.7826344117821025, "grad_norm": 0.8897085189819336, "learning_rate": 2.2350762189203385e-06, "loss": 0.097, "step": 161720 }, { "epoch": 4.782930147276276, "grad_norm": 1.065767765045166, "learning_rate": 2.2349495290013997e-06, "loss": 0.067, "step": 161730 }, { "epoch": 4.78322588277045, "grad_norm": 1.1140462160110474, "learning_rate": 2.23482283908246e-06, "loss": 0.0843, "step": 161740 }, { "epoch": 4.783521618264624, "grad_norm": 0.605833888053894, "learning_rate": 2.234696149163521e-06, "loss": 0.0629, "step": 161750 }, { "epoch": 4.783817353758798, "grad_norm": 1.1069313287734985, "learning_rate": 2.2345694592445812e-06, "loss": 0.0777, "step": 161760 }, { "epoch": 4.784113089252973, "grad_norm": 1.0099769830703735, "learning_rate": 2.234442769325642e-06, "loss": 0.0742, "step": 161770 }, { "epoch": 4.784408824747146, "grad_norm": 2.1283621788024902, "learning_rate": 2.2343160794067024e-06, "loss": 0.0669, "step": 161780 }, { "epoch": 4.7847045602413205, "grad_norm": 0.7244351506233215, "learning_rate": 2.234189389487763e-06, "loss": 0.0761, "step": 161790 }, { "epoch": 4.785000295735494, "grad_norm": 1.0565004348754883, "learning_rate": 2.2340626995688235e-06, "loss": 0.0487, "step": 161800 }, { "epoch": 4.785296031229668, "grad_norm": 1.014086365699768, "learning_rate": 2.2339360096498847e-06, "loss": 0.0708, "step": 161810 }, { "epoch": 4.785591766723842, "grad_norm": 1.2499769926071167, "learning_rate": 2.233809319730945e-06, "loss": 0.0793, "step": 161820 }, { "epoch": 4.785887502218016, "grad_norm": 1.2750622034072876, "learning_rate": 2.233682629812006e-06, "loss": 0.0521, "step": 161830 }, { "epoch": 4.786183237712191, "grad_norm": 0.6619217395782471, "learning_rate": 2.2335559398930663e-06, "loss": 0.0584, "step": 161840 }, { "epoch": 4.786478973206364, "grad_norm": 0.7720069885253906, "learning_rate": 2.233429249974127e-06, "loss": 0.062, "step": 161850 }, { "epoch": 4.786774708700539, "grad_norm": 1.1638449430465698, "learning_rate": 2.2333025600551874e-06, "loss": 0.0741, "step": 161860 }, { "epoch": 4.787070444194712, "grad_norm": 0.8831265568733215, "learning_rate": 2.233175870136248e-06, "loss": 0.0754, "step": 161870 }, { "epoch": 4.7873661796888864, "grad_norm": 0.944749653339386, "learning_rate": 2.2330491802173086e-06, "loss": 0.0743, "step": 161880 }, { "epoch": 4.78766191518306, "grad_norm": 1.3529523611068726, "learning_rate": 2.2329224902983698e-06, "loss": 0.071, "step": 161890 }, { "epoch": 4.787957650677234, "grad_norm": 1.044419527053833, "learning_rate": 2.23279580037943e-06, "loss": 0.0571, "step": 161900 }, { "epoch": 4.788253386171409, "grad_norm": 1.15772545337677, "learning_rate": 2.2326691104604905e-06, "loss": 0.0745, "step": 161910 }, { "epoch": 4.788549121665582, "grad_norm": 0.9921742081642151, "learning_rate": 2.2325424205415513e-06, "loss": 0.0785, "step": 161920 }, { "epoch": 4.788844857159757, "grad_norm": 1.0023351907730103, "learning_rate": 2.2324157306226117e-06, "loss": 0.0611, "step": 161930 }, { "epoch": 4.78914059265393, "grad_norm": 0.549588143825531, "learning_rate": 2.2322890407036725e-06, "loss": 0.0622, "step": 161940 }, { "epoch": 4.7894363281481045, "grad_norm": 0.7627002000808716, "learning_rate": 2.232162350784733e-06, "loss": 0.0618, "step": 161950 }, { "epoch": 4.789732063642278, "grad_norm": 0.6736595630645752, "learning_rate": 2.2320356608657936e-06, "loss": 0.07, "step": 161960 }, { "epoch": 4.790027799136452, "grad_norm": 0.7551254034042358, "learning_rate": 2.2319089709468544e-06, "loss": 0.1013, "step": 161970 }, { "epoch": 4.790323534630627, "grad_norm": 1.1563663482666016, "learning_rate": 2.231782281027915e-06, "loss": 0.0642, "step": 161980 }, { "epoch": 4.7906192701248, "grad_norm": 1.3374375104904175, "learning_rate": 2.2316555911089756e-06, "loss": 0.0754, "step": 161990 }, { "epoch": 4.790915005618975, "grad_norm": 0.44180166721343994, "learning_rate": 2.2315289011900364e-06, "loss": 0.0537, "step": 162000 }, { "epoch": 4.791210741113148, "grad_norm": 0.9332523345947266, "learning_rate": 2.2314022112710967e-06, "loss": 0.0605, "step": 162010 }, { "epoch": 4.7915064766073225, "grad_norm": 0.6550812125205994, "learning_rate": 2.2312755213521575e-06, "loss": 0.0828, "step": 162020 }, { "epoch": 4.791802212101496, "grad_norm": 0.5980172753334045, "learning_rate": 2.231148831433218e-06, "loss": 0.0626, "step": 162030 }, { "epoch": 4.79209794759567, "grad_norm": 0.9990801215171814, "learning_rate": 2.2310221415142787e-06, "loss": 0.0514, "step": 162040 }, { "epoch": 4.792393683089845, "grad_norm": 0.641178548336029, "learning_rate": 2.2308954515953395e-06, "loss": 0.051, "step": 162050 }, { "epoch": 4.792689418584018, "grad_norm": 1.114598274230957, "learning_rate": 2.2307687616764002e-06, "loss": 0.0629, "step": 162060 }, { "epoch": 4.792985154078193, "grad_norm": 0.9332212209701538, "learning_rate": 2.2306420717574606e-06, "loss": 0.0678, "step": 162070 }, { "epoch": 4.793280889572366, "grad_norm": 1.1177423000335693, "learning_rate": 2.2305153818385214e-06, "loss": 0.0884, "step": 162080 }, { "epoch": 4.793576625066541, "grad_norm": 1.0373297929763794, "learning_rate": 2.2303886919195818e-06, "loss": 0.0631, "step": 162090 }, { "epoch": 4.793872360560714, "grad_norm": 0.7883139252662659, "learning_rate": 2.2302620020006426e-06, "loss": 0.0568, "step": 162100 }, { "epoch": 4.7941680960548885, "grad_norm": 1.220733642578125, "learning_rate": 2.230135312081703e-06, "loss": 0.0862, "step": 162110 }, { "epoch": 4.794463831549063, "grad_norm": 1.5587034225463867, "learning_rate": 2.2300086221627637e-06, "loss": 0.0815, "step": 162120 }, { "epoch": 4.794759567043236, "grad_norm": 1.6583747863769531, "learning_rate": 2.2298819322438245e-06, "loss": 0.0683, "step": 162130 }, { "epoch": 4.795055302537411, "grad_norm": 0.5890887975692749, "learning_rate": 2.2297552423248853e-06, "loss": 0.0665, "step": 162140 }, { "epoch": 4.795351038031584, "grad_norm": 1.622275948524475, "learning_rate": 2.2296285524059457e-06, "loss": 0.0625, "step": 162150 }, { "epoch": 4.795646773525759, "grad_norm": 0.9774712324142456, "learning_rate": 2.2295018624870064e-06, "loss": 0.075, "step": 162160 }, { "epoch": 4.795942509019932, "grad_norm": 0.5979164838790894, "learning_rate": 2.229375172568067e-06, "loss": 0.0726, "step": 162170 }, { "epoch": 4.7962382445141065, "grad_norm": 0.5825631618499756, "learning_rate": 2.2292484826491276e-06, "loss": 0.0734, "step": 162180 }, { "epoch": 4.796533980008281, "grad_norm": 0.9266547560691833, "learning_rate": 2.229121792730188e-06, "loss": 0.0701, "step": 162190 }, { "epoch": 4.796829715502454, "grad_norm": 0.6869767308235168, "learning_rate": 2.2289951028112488e-06, "loss": 0.07, "step": 162200 }, { "epoch": 4.797125450996629, "grad_norm": 1.0446770191192627, "learning_rate": 2.2288684128923095e-06, "loss": 0.0651, "step": 162210 }, { "epoch": 4.797421186490802, "grad_norm": 1.8625867366790771, "learning_rate": 2.2287417229733703e-06, "loss": 0.082, "step": 162220 }, { "epoch": 4.797716921984977, "grad_norm": 1.2567352056503296, "learning_rate": 2.2286150330544307e-06, "loss": 0.0802, "step": 162230 }, { "epoch": 4.79801265747915, "grad_norm": 1.0566484928131104, "learning_rate": 2.2284883431354915e-06, "loss": 0.0682, "step": 162240 }, { "epoch": 4.798308392973325, "grad_norm": 1.135992407798767, "learning_rate": 2.228361653216552e-06, "loss": 0.0588, "step": 162250 }, { "epoch": 4.798604128467499, "grad_norm": 1.309112310409546, "learning_rate": 2.2282349632976126e-06, "loss": 0.0788, "step": 162260 }, { "epoch": 4.7988998639616725, "grad_norm": 1.2698842287063599, "learning_rate": 2.228108273378673e-06, "loss": 0.0802, "step": 162270 }, { "epoch": 4.799195599455847, "grad_norm": 1.0067613124847412, "learning_rate": 2.227981583459734e-06, "loss": 0.0683, "step": 162280 }, { "epoch": 4.79949133495002, "grad_norm": 0.8786618113517761, "learning_rate": 2.2278548935407946e-06, "loss": 0.0545, "step": 162290 }, { "epoch": 4.799787070444195, "grad_norm": 0.4658370018005371, "learning_rate": 2.2277282036218554e-06, "loss": 0.0541, "step": 162300 }, { "epoch": 4.800082805938368, "grad_norm": 0.7621973156929016, "learning_rate": 2.2276015137029157e-06, "loss": 0.0595, "step": 162310 }, { "epoch": 4.800378541432543, "grad_norm": 1.5412251949310303, "learning_rate": 2.227474823783976e-06, "loss": 0.0831, "step": 162320 }, { "epoch": 4.800674276926717, "grad_norm": 2.0124616622924805, "learning_rate": 2.227348133865037e-06, "loss": 0.0751, "step": 162330 }, { "epoch": 4.8009700124208905, "grad_norm": 1.4442838430404663, "learning_rate": 2.2272214439460973e-06, "loss": 0.0612, "step": 162340 }, { "epoch": 4.801265747915065, "grad_norm": 1.2357929944992065, "learning_rate": 2.227094754027158e-06, "loss": 0.0498, "step": 162350 }, { "epoch": 4.801561483409239, "grad_norm": 1.1845331192016602, "learning_rate": 2.2269680641082184e-06, "loss": 0.0653, "step": 162360 }, { "epoch": 4.801857218903413, "grad_norm": 0.7356383800506592, "learning_rate": 2.2268413741892796e-06, "loss": 0.0864, "step": 162370 }, { "epoch": 4.802152954397587, "grad_norm": 0.7929505109786987, "learning_rate": 2.22671468427034e-06, "loss": 0.0654, "step": 162380 }, { "epoch": 4.802448689891761, "grad_norm": 0.858537495136261, "learning_rate": 2.2265879943514008e-06, "loss": 0.0695, "step": 162390 }, { "epoch": 4.802744425385935, "grad_norm": 0.7284194231033325, "learning_rate": 2.226461304432461e-06, "loss": 0.0521, "step": 162400 }, { "epoch": 4.8030401608801085, "grad_norm": 0.817457377910614, "learning_rate": 2.226334614513522e-06, "loss": 0.0661, "step": 162410 }, { "epoch": 4.803335896374283, "grad_norm": 1.5998111963272095, "learning_rate": 2.2262079245945823e-06, "loss": 0.0623, "step": 162420 }, { "epoch": 4.803631631868457, "grad_norm": 0.7662792801856995, "learning_rate": 2.226081234675643e-06, "loss": 0.082, "step": 162430 }, { "epoch": 4.803927367362631, "grad_norm": 1.0805121660232544, "learning_rate": 2.2259545447567035e-06, "loss": 0.072, "step": 162440 }, { "epoch": 4.804223102856805, "grad_norm": 0.749008297920227, "learning_rate": 2.2258278548377647e-06, "loss": 0.0738, "step": 162450 }, { "epoch": 4.804518838350979, "grad_norm": 1.3250099420547485, "learning_rate": 2.225701164918825e-06, "loss": 0.0722, "step": 162460 }, { "epoch": 4.804814573845153, "grad_norm": 1.0105618238449097, "learning_rate": 2.225574474999886e-06, "loss": 0.0716, "step": 162470 }, { "epoch": 4.805110309339327, "grad_norm": 1.0057724714279175, "learning_rate": 2.225447785080946e-06, "loss": 0.0691, "step": 162480 }, { "epoch": 4.805406044833501, "grad_norm": 0.8448864221572876, "learning_rate": 2.225321095162007e-06, "loss": 0.0721, "step": 162490 }, { "epoch": 4.805701780327675, "grad_norm": 0.7772698998451233, "learning_rate": 2.2251944052430674e-06, "loss": 0.0536, "step": 162500 }, { "epoch": 4.805997515821849, "grad_norm": 0.8280871510505676, "learning_rate": 2.225067715324128e-06, "loss": 0.0639, "step": 162510 }, { "epoch": 4.806293251316023, "grad_norm": 1.3436578512191772, "learning_rate": 2.2249410254051885e-06, "loss": 0.0743, "step": 162520 }, { "epoch": 4.806588986810197, "grad_norm": 0.4707108736038208, "learning_rate": 2.2248143354862497e-06, "loss": 0.0728, "step": 162530 }, { "epoch": 4.806884722304371, "grad_norm": 0.8390052914619446, "learning_rate": 2.22468764556731e-06, "loss": 0.0688, "step": 162540 }, { "epoch": 4.807180457798545, "grad_norm": 1.5004349946975708, "learning_rate": 2.224560955648371e-06, "loss": 0.0576, "step": 162550 }, { "epoch": 4.807476193292719, "grad_norm": 0.5345853567123413, "learning_rate": 2.2244342657294312e-06, "loss": 0.0668, "step": 162560 }, { "epoch": 4.807771928786893, "grad_norm": 0.9552362561225891, "learning_rate": 2.224307575810492e-06, "loss": 0.0905, "step": 162570 }, { "epoch": 4.808067664281067, "grad_norm": 0.8056214451789856, "learning_rate": 2.2241808858915524e-06, "loss": 0.0734, "step": 162580 }, { "epoch": 4.808363399775241, "grad_norm": 1.3210511207580566, "learning_rate": 2.224054195972613e-06, "loss": 0.0702, "step": 162590 }, { "epoch": 4.808659135269415, "grad_norm": 0.5791662335395813, "learning_rate": 2.2239275060536736e-06, "loss": 0.0637, "step": 162600 }, { "epoch": 4.808954870763589, "grad_norm": 1.134187936782837, "learning_rate": 2.2238008161347348e-06, "loss": 0.0761, "step": 162610 }, { "epoch": 4.809250606257763, "grad_norm": 0.95158851146698, "learning_rate": 2.223674126215795e-06, "loss": 0.0807, "step": 162620 }, { "epoch": 4.809546341751937, "grad_norm": 0.965960681438446, "learning_rate": 2.223547436296856e-06, "loss": 0.0625, "step": 162630 }, { "epoch": 4.8098420772461115, "grad_norm": 0.5603660941123962, "learning_rate": 2.2234207463779163e-06, "loss": 0.0639, "step": 162640 }, { "epoch": 4.810137812740285, "grad_norm": 1.06709885597229, "learning_rate": 2.223294056458977e-06, "loss": 0.0559, "step": 162650 }, { "epoch": 4.810433548234459, "grad_norm": 0.6382904052734375, "learning_rate": 2.2231673665400374e-06, "loss": 0.0658, "step": 162660 }, { "epoch": 4.810729283728633, "grad_norm": 1.0078802108764648, "learning_rate": 2.2230406766210982e-06, "loss": 0.074, "step": 162670 }, { "epoch": 4.811025019222807, "grad_norm": 1.0135351419448853, "learning_rate": 2.2229139867021586e-06, "loss": 0.0637, "step": 162680 }, { "epoch": 4.811320754716981, "grad_norm": 0.6988201141357422, "learning_rate": 2.22278729678322e-06, "loss": 0.0658, "step": 162690 }, { "epoch": 4.811616490211155, "grad_norm": 0.48461437225341797, "learning_rate": 2.22266060686428e-06, "loss": 0.0672, "step": 162700 }, { "epoch": 4.8119122257053295, "grad_norm": 1.1623685359954834, "learning_rate": 2.222533916945341e-06, "loss": 0.0812, "step": 162710 }, { "epoch": 4.812207961199503, "grad_norm": 1.0308189392089844, "learning_rate": 2.2224072270264013e-06, "loss": 0.067, "step": 162720 }, { "epoch": 4.812503696693677, "grad_norm": 0.9989097714424133, "learning_rate": 2.2222805371074617e-06, "loss": 0.0758, "step": 162730 }, { "epoch": 4.812799432187851, "grad_norm": 0.9334052205085754, "learning_rate": 2.2221538471885225e-06, "loss": 0.0648, "step": 162740 }, { "epoch": 4.813095167682025, "grad_norm": 0.6501289010047913, "learning_rate": 2.222027157269583e-06, "loss": 0.0732, "step": 162750 }, { "epoch": 4.813390903176199, "grad_norm": 0.9344618916511536, "learning_rate": 2.2219004673506436e-06, "loss": 0.0672, "step": 162760 }, { "epoch": 4.813686638670373, "grad_norm": 0.7364406585693359, "learning_rate": 2.2217737774317044e-06, "loss": 0.0666, "step": 162770 }, { "epoch": 4.8139823741645476, "grad_norm": 0.8140503168106079, "learning_rate": 2.2216470875127652e-06, "loss": 0.0706, "step": 162780 }, { "epoch": 4.814278109658721, "grad_norm": 0.5023892521858215, "learning_rate": 2.2215203975938256e-06, "loss": 0.0687, "step": 162790 }, { "epoch": 4.814573845152895, "grad_norm": 1.3776241540908813, "learning_rate": 2.2213937076748864e-06, "loss": 0.0716, "step": 162800 }, { "epoch": 4.814869580647069, "grad_norm": 0.6211349368095398, "learning_rate": 2.2212670177559467e-06, "loss": 0.05, "step": 162810 }, { "epoch": 4.815165316141243, "grad_norm": 1.1604931354522705, "learning_rate": 2.2211403278370075e-06, "loss": 0.0593, "step": 162820 }, { "epoch": 4.815461051635417, "grad_norm": 1.3077452182769775, "learning_rate": 2.221013637918068e-06, "loss": 0.0707, "step": 162830 }, { "epoch": 4.815756787129591, "grad_norm": 0.7203150987625122, "learning_rate": 2.2208869479991287e-06, "loss": 0.0663, "step": 162840 }, { "epoch": 4.816052522623766, "grad_norm": 0.6534132957458496, "learning_rate": 2.2207602580801895e-06, "loss": 0.055, "step": 162850 }, { "epoch": 4.816348258117939, "grad_norm": 0.9630913734436035, "learning_rate": 2.2206335681612503e-06, "loss": 0.0723, "step": 162860 }, { "epoch": 4.8166439936121135, "grad_norm": 1.3116731643676758, "learning_rate": 2.2205068782423106e-06, "loss": 0.0838, "step": 162870 }, { "epoch": 4.816939729106287, "grad_norm": 0.5312009453773499, "learning_rate": 2.2203801883233714e-06, "loss": 0.0715, "step": 162880 }, { "epoch": 4.817235464600461, "grad_norm": 0.5695411562919617, "learning_rate": 2.220253498404432e-06, "loss": 0.0668, "step": 162890 }, { "epoch": 4.817531200094635, "grad_norm": 1.1163142919540405, "learning_rate": 2.2201268084854926e-06, "loss": 0.0569, "step": 162900 }, { "epoch": 4.817826935588809, "grad_norm": 0.8476214408874512, "learning_rate": 2.220000118566553e-06, "loss": 0.0702, "step": 162910 }, { "epoch": 4.818122671082984, "grad_norm": 1.2073341608047485, "learning_rate": 2.2198734286476137e-06, "loss": 0.0824, "step": 162920 }, { "epoch": 4.818418406577157, "grad_norm": 1.3800413608551025, "learning_rate": 2.2197467387286745e-06, "loss": 0.0708, "step": 162930 }, { "epoch": 4.8187141420713315, "grad_norm": 1.0103679895401, "learning_rate": 2.2196200488097353e-06, "loss": 0.0709, "step": 162940 }, { "epoch": 4.819009877565505, "grad_norm": 1.5333876609802246, "learning_rate": 2.2194933588907957e-06, "loss": 0.0688, "step": 162950 }, { "epoch": 4.819305613059679, "grad_norm": 1.3855797052383423, "learning_rate": 2.2193666689718565e-06, "loss": 0.0726, "step": 162960 }, { "epoch": 4.819601348553854, "grad_norm": 1.0174624919891357, "learning_rate": 2.219239979052917e-06, "loss": 0.0757, "step": 162970 }, { "epoch": 4.819897084048027, "grad_norm": 0.849956750869751, "learning_rate": 2.2191132891339776e-06, "loss": 0.0605, "step": 162980 }, { "epoch": 4.820192819542202, "grad_norm": 0.774887204170227, "learning_rate": 2.218986599215038e-06, "loss": 0.0676, "step": 162990 }, { "epoch": 4.820488555036375, "grad_norm": 0.9498513340950012, "learning_rate": 2.2188599092960988e-06, "loss": 0.0706, "step": 163000 }, { "epoch": 4.82078429053055, "grad_norm": 1.0053365230560303, "learning_rate": 2.2187332193771596e-06, "loss": 0.0662, "step": 163010 }, { "epoch": 4.821080026024724, "grad_norm": 0.8097012042999268, "learning_rate": 2.2186065294582204e-06, "loss": 0.0769, "step": 163020 }, { "epoch": 4.8213757615188975, "grad_norm": 1.3895529508590698, "learning_rate": 2.2184798395392807e-06, "loss": 0.082, "step": 163030 }, { "epoch": 4.821671497013072, "grad_norm": 0.6012275815010071, "learning_rate": 2.2183531496203415e-06, "loss": 0.0672, "step": 163040 }, { "epoch": 4.821967232507245, "grad_norm": 0.7338289618492126, "learning_rate": 2.218226459701402e-06, "loss": 0.0662, "step": 163050 }, { "epoch": 4.82226296800142, "grad_norm": 0.9239739179611206, "learning_rate": 2.2180997697824627e-06, "loss": 0.0569, "step": 163060 }, { "epoch": 4.822558703495593, "grad_norm": 0.8464061617851257, "learning_rate": 2.217973079863523e-06, "loss": 0.0741, "step": 163070 }, { "epoch": 4.822854438989768, "grad_norm": 1.0808863639831543, "learning_rate": 2.217846389944584e-06, "loss": 0.0862, "step": 163080 }, { "epoch": 4.823150174483942, "grad_norm": 0.8277605772018433, "learning_rate": 2.2177197000256446e-06, "loss": 0.0621, "step": 163090 }, { "epoch": 4.8234459099781155, "grad_norm": 0.7165467739105225, "learning_rate": 2.2175930101067054e-06, "loss": 0.0668, "step": 163100 }, { "epoch": 4.82374164547229, "grad_norm": 0.912489116191864, "learning_rate": 2.2174663201877658e-06, "loss": 0.075, "step": 163110 }, { "epoch": 4.824037380966463, "grad_norm": 1.8563499450683594, "learning_rate": 2.2173396302688266e-06, "loss": 0.0721, "step": 163120 }, { "epoch": 4.824333116460638, "grad_norm": 1.1938246488571167, "learning_rate": 2.217212940349887e-06, "loss": 0.0683, "step": 163130 }, { "epoch": 4.824628851954811, "grad_norm": 1.4163013696670532, "learning_rate": 2.2170862504309473e-06, "loss": 0.0615, "step": 163140 }, { "epoch": 4.824924587448986, "grad_norm": 1.2045981884002686, "learning_rate": 2.216959560512008e-06, "loss": 0.0586, "step": 163150 }, { "epoch": 4.82522032294316, "grad_norm": 0.9362525343894958, "learning_rate": 2.2168328705930684e-06, "loss": 0.0822, "step": 163160 }, { "epoch": 4.8255160584373336, "grad_norm": 1.386560320854187, "learning_rate": 2.2167061806741297e-06, "loss": 0.0825, "step": 163170 }, { "epoch": 4.825811793931508, "grad_norm": 0.8256946206092834, "learning_rate": 2.21657949075519e-06, "loss": 0.0665, "step": 163180 }, { "epoch": 4.826107529425681, "grad_norm": 2.3550589084625244, "learning_rate": 2.216452800836251e-06, "loss": 0.09, "step": 163190 }, { "epoch": 4.826403264919856, "grad_norm": 1.0788334608078003, "learning_rate": 2.216326110917311e-06, "loss": 0.058, "step": 163200 }, { "epoch": 4.826699000414029, "grad_norm": 1.7112817764282227, "learning_rate": 2.216199420998372e-06, "loss": 0.0809, "step": 163210 }, { "epoch": 4.826994735908204, "grad_norm": 0.9389986395835876, "learning_rate": 2.2160727310794323e-06, "loss": 0.0836, "step": 163220 }, { "epoch": 4.827290471402378, "grad_norm": 1.7916593551635742, "learning_rate": 2.215946041160493e-06, "loss": 0.09, "step": 163230 }, { "epoch": 4.827586206896552, "grad_norm": 1.612517237663269, "learning_rate": 2.2158193512415535e-06, "loss": 0.079, "step": 163240 }, { "epoch": 4.827881942390726, "grad_norm": 1.189483404159546, "learning_rate": 2.2156926613226147e-06, "loss": 0.0675, "step": 163250 }, { "epoch": 4.8281776778848995, "grad_norm": 1.2021822929382324, "learning_rate": 2.215565971403675e-06, "loss": 0.08, "step": 163260 }, { "epoch": 4.828473413379074, "grad_norm": 0.7241964340209961, "learning_rate": 2.215439281484736e-06, "loss": 0.0579, "step": 163270 }, { "epoch": 4.828769148873247, "grad_norm": 0.6799580454826355, "learning_rate": 2.2153125915657962e-06, "loss": 0.0597, "step": 163280 }, { "epoch": 4.829064884367422, "grad_norm": 1.029502272605896, "learning_rate": 2.215185901646857e-06, "loss": 0.0675, "step": 163290 }, { "epoch": 4.829360619861596, "grad_norm": 1.4604716300964355, "learning_rate": 2.2150592117279174e-06, "loss": 0.0588, "step": 163300 }, { "epoch": 4.82965635535577, "grad_norm": 0.8710196018218994, "learning_rate": 2.214932521808978e-06, "loss": 0.0744, "step": 163310 }, { "epoch": 4.829952090849944, "grad_norm": 0.7438832521438599, "learning_rate": 2.2148058318900385e-06, "loss": 0.0755, "step": 163320 }, { "epoch": 4.8302478263441175, "grad_norm": 0.8132899403572083, "learning_rate": 2.2146791419710998e-06, "loss": 0.06, "step": 163330 }, { "epoch": 4.830543561838292, "grad_norm": 0.6115593314170837, "learning_rate": 2.21455245205216e-06, "loss": 0.0678, "step": 163340 }, { "epoch": 4.830839297332465, "grad_norm": 0.9842869639396667, "learning_rate": 2.214425762133221e-06, "loss": 0.0502, "step": 163350 }, { "epoch": 4.83113503282664, "grad_norm": 1.1628150939941406, "learning_rate": 2.2142990722142813e-06, "loss": 0.0627, "step": 163360 }, { "epoch": 4.831430768320814, "grad_norm": 0.9789555072784424, "learning_rate": 2.214172382295342e-06, "loss": 0.0638, "step": 163370 }, { "epoch": 4.831726503814988, "grad_norm": 0.9337045550346375, "learning_rate": 2.2140456923764024e-06, "loss": 0.0806, "step": 163380 }, { "epoch": 4.832022239309162, "grad_norm": 0.9041203260421753, "learning_rate": 2.2139190024574632e-06, "loss": 0.0597, "step": 163390 }, { "epoch": 4.832317974803336, "grad_norm": 0.8653473258018494, "learning_rate": 2.2137923125385236e-06, "loss": 0.0764, "step": 163400 }, { "epoch": 4.83261371029751, "grad_norm": 1.176237940788269, "learning_rate": 2.213665622619585e-06, "loss": 0.0687, "step": 163410 }, { "epoch": 4.8329094457916835, "grad_norm": 0.5701938271522522, "learning_rate": 2.213538932700645e-06, "loss": 0.0734, "step": 163420 }, { "epoch": 4.833205181285858, "grad_norm": 0.7483301758766174, "learning_rate": 2.213412242781706e-06, "loss": 0.0772, "step": 163430 }, { "epoch": 4.833500916780032, "grad_norm": 1.3578217029571533, "learning_rate": 2.2132855528627663e-06, "loss": 0.0652, "step": 163440 }, { "epoch": 4.833796652274206, "grad_norm": 1.1524118185043335, "learning_rate": 2.213158862943827e-06, "loss": 0.0743, "step": 163450 }, { "epoch": 4.83409238776838, "grad_norm": 1.444137454032898, "learning_rate": 2.2130321730248875e-06, "loss": 0.0692, "step": 163460 }, { "epoch": 4.834388123262554, "grad_norm": 0.6788312792778015, "learning_rate": 2.2129054831059483e-06, "loss": 0.0829, "step": 163470 }, { "epoch": 4.834683858756728, "grad_norm": 1.9373221397399902, "learning_rate": 2.2127787931870086e-06, "loss": 0.0764, "step": 163480 }, { "epoch": 4.8349795942509015, "grad_norm": 0.9281589984893799, "learning_rate": 2.21265210326807e-06, "loss": 0.0678, "step": 163490 }, { "epoch": 4.835275329745076, "grad_norm": 1.244559645652771, "learning_rate": 2.21252541334913e-06, "loss": 0.0605, "step": 163500 }, { "epoch": 4.83557106523925, "grad_norm": 1.532017469406128, "learning_rate": 2.212398723430191e-06, "loss": 0.0677, "step": 163510 }, { "epoch": 4.835866800733424, "grad_norm": 0.8137168288230896, "learning_rate": 2.2122720335112514e-06, "loss": 0.0782, "step": 163520 }, { "epoch": 4.836162536227598, "grad_norm": 0.9834891557693481, "learning_rate": 2.212145343592312e-06, "loss": 0.0689, "step": 163530 }, { "epoch": 4.836458271721772, "grad_norm": 0.938021183013916, "learning_rate": 2.2120186536733725e-06, "loss": 0.0821, "step": 163540 }, { "epoch": 4.836754007215946, "grad_norm": 0.536821186542511, "learning_rate": 2.211891963754433e-06, "loss": 0.0631, "step": 163550 }, { "epoch": 4.8370497427101204, "grad_norm": 0.9691076278686523, "learning_rate": 2.2117652738354937e-06, "loss": 0.0638, "step": 163560 }, { "epoch": 4.837345478204294, "grad_norm": 0.8693137764930725, "learning_rate": 2.2116385839165545e-06, "loss": 0.0896, "step": 163570 }, { "epoch": 4.837641213698468, "grad_norm": 0.731650173664093, "learning_rate": 2.2115118939976153e-06, "loss": 0.0716, "step": 163580 }, { "epoch": 4.837936949192642, "grad_norm": 0.6177934408187866, "learning_rate": 2.2113852040786756e-06, "loss": 0.0686, "step": 163590 }, { "epoch": 4.838232684686816, "grad_norm": 0.5029605627059937, "learning_rate": 2.2112585141597364e-06, "loss": 0.0663, "step": 163600 }, { "epoch": 4.838528420180991, "grad_norm": 0.73558509349823, "learning_rate": 2.2111318242407968e-06, "loss": 0.0604, "step": 163610 }, { "epoch": 4.838824155675164, "grad_norm": 1.0352083444595337, "learning_rate": 2.2110051343218576e-06, "loss": 0.0572, "step": 163620 }, { "epoch": 4.8391198911693385, "grad_norm": 1.1140124797821045, "learning_rate": 2.210878444402918e-06, "loss": 0.084, "step": 163630 }, { "epoch": 4.839415626663512, "grad_norm": 1.0797250270843506, "learning_rate": 2.2107517544839787e-06, "loss": 0.073, "step": 163640 }, { "epoch": 4.839711362157686, "grad_norm": 0.6862106919288635, "learning_rate": 2.2106250645650395e-06, "loss": 0.0639, "step": 163650 }, { "epoch": 4.84000709765186, "grad_norm": 0.943947970867157, "learning_rate": 2.2104983746461003e-06, "loss": 0.097, "step": 163660 }, { "epoch": 4.840302833146034, "grad_norm": 0.9931633472442627, "learning_rate": 2.2103716847271607e-06, "loss": 0.0969, "step": 163670 }, { "epoch": 4.840598568640209, "grad_norm": 0.7260143756866455, "learning_rate": 2.2102449948082215e-06, "loss": 0.0605, "step": 163680 }, { "epoch": 4.840894304134382, "grad_norm": 0.7585265636444092, "learning_rate": 2.210118304889282e-06, "loss": 0.0702, "step": 163690 }, { "epoch": 4.8411900396285565, "grad_norm": 0.530350923538208, "learning_rate": 2.2099916149703426e-06, "loss": 0.0594, "step": 163700 }, { "epoch": 4.84148577512273, "grad_norm": 1.045822262763977, "learning_rate": 2.209864925051403e-06, "loss": 0.0595, "step": 163710 }, { "epoch": 4.841781510616904, "grad_norm": 0.7904530167579651, "learning_rate": 2.2097382351324638e-06, "loss": 0.0738, "step": 163720 }, { "epoch": 4.842077246111078, "grad_norm": 0.7834004163742065, "learning_rate": 2.2096115452135246e-06, "loss": 0.0609, "step": 163730 }, { "epoch": 4.842372981605252, "grad_norm": 0.5588489770889282, "learning_rate": 2.2094848552945853e-06, "loss": 0.0617, "step": 163740 }, { "epoch": 4.842668717099427, "grad_norm": 0.6277708411216736, "learning_rate": 2.2093581653756457e-06, "loss": 0.0662, "step": 163750 }, { "epoch": 4.8429644525936, "grad_norm": 0.9033480286598206, "learning_rate": 2.2092314754567065e-06, "loss": 0.0577, "step": 163760 }, { "epoch": 4.843260188087775, "grad_norm": 0.9682791233062744, "learning_rate": 2.209104785537767e-06, "loss": 0.0877, "step": 163770 }, { "epoch": 4.843555923581948, "grad_norm": 0.9004384279251099, "learning_rate": 2.2089780956188277e-06, "loss": 0.0657, "step": 163780 }, { "epoch": 4.8438516590761225, "grad_norm": 0.45914679765701294, "learning_rate": 2.208851405699888e-06, "loss": 0.0614, "step": 163790 }, { "epoch": 4.844147394570296, "grad_norm": 1.3041473627090454, "learning_rate": 2.208724715780949e-06, "loss": 0.0629, "step": 163800 }, { "epoch": 4.84444313006447, "grad_norm": 1.244682788848877, "learning_rate": 2.2085980258620096e-06, "loss": 0.0584, "step": 163810 }, { "epoch": 4.844738865558645, "grad_norm": 1.110298991203308, "learning_rate": 2.2084713359430704e-06, "loss": 0.088, "step": 163820 }, { "epoch": 4.845034601052818, "grad_norm": 0.3818047344684601, "learning_rate": 2.2083446460241308e-06, "loss": 0.0682, "step": 163830 }, { "epoch": 4.845330336546993, "grad_norm": 0.9731072187423706, "learning_rate": 2.2082179561051915e-06, "loss": 0.0777, "step": 163840 }, { "epoch": 4.845626072041166, "grad_norm": 0.8440766334533691, "learning_rate": 2.208091266186252e-06, "loss": 0.0717, "step": 163850 }, { "epoch": 4.8459218075353405, "grad_norm": 0.5883360505104065, "learning_rate": 2.2079645762673127e-06, "loss": 0.0662, "step": 163860 }, { "epoch": 4.846217543029514, "grad_norm": 1.063125491142273, "learning_rate": 2.207837886348373e-06, "loss": 0.0678, "step": 163870 }, { "epoch": 4.846513278523688, "grad_norm": 2.2669098377227783, "learning_rate": 2.207711196429434e-06, "loss": 0.0641, "step": 163880 }, { "epoch": 4.846809014017863, "grad_norm": 1.1304739713668823, "learning_rate": 2.2075845065104946e-06, "loss": 0.0794, "step": 163890 }, { "epoch": 4.847104749512036, "grad_norm": 1.4666082859039307, "learning_rate": 2.2074578165915554e-06, "loss": 0.0683, "step": 163900 }, { "epoch": 4.847400485006211, "grad_norm": 0.8898372054100037, "learning_rate": 2.207331126672616e-06, "loss": 0.0591, "step": 163910 }, { "epoch": 4.847696220500384, "grad_norm": 1.1674972772598267, "learning_rate": 2.2072044367536766e-06, "loss": 0.0883, "step": 163920 }, { "epoch": 4.847991955994559, "grad_norm": 0.7145665884017944, "learning_rate": 2.207077746834737e-06, "loss": 0.0656, "step": 163930 }, { "epoch": 4.848287691488732, "grad_norm": 1.3543771505355835, "learning_rate": 2.2069510569157977e-06, "loss": 0.0707, "step": 163940 }, { "epoch": 4.8485834269829065, "grad_norm": 0.839004397392273, "learning_rate": 2.206824366996858e-06, "loss": 0.0594, "step": 163950 }, { "epoch": 4.848879162477081, "grad_norm": 1.121158242225647, "learning_rate": 2.206697677077919e-06, "loss": 0.0731, "step": 163960 }, { "epoch": 4.849174897971254, "grad_norm": 0.8263702988624573, "learning_rate": 2.2065709871589797e-06, "loss": 0.0763, "step": 163970 }, { "epoch": 4.849470633465429, "grad_norm": 0.8601177930831909, "learning_rate": 2.20644429724004e-06, "loss": 0.066, "step": 163980 }, { "epoch": 4.849766368959602, "grad_norm": 1.059092402458191, "learning_rate": 2.206317607321101e-06, "loss": 0.0585, "step": 163990 }, { "epoch": 4.850062104453777, "grad_norm": 0.492268830537796, "learning_rate": 2.2061909174021612e-06, "loss": 0.0529, "step": 164000 }, { "epoch": 4.85035783994795, "grad_norm": 0.6974887251853943, "learning_rate": 2.206064227483222e-06, "loss": 0.0688, "step": 164010 }, { "epoch": 4.8506535754421245, "grad_norm": 0.9769245386123657, "learning_rate": 2.2059375375642824e-06, "loss": 0.0751, "step": 164020 }, { "epoch": 4.850949310936299, "grad_norm": 0.7439485788345337, "learning_rate": 2.205810847645343e-06, "loss": 0.0731, "step": 164030 }, { "epoch": 4.851245046430472, "grad_norm": 0.6962452530860901, "learning_rate": 2.2056841577264035e-06, "loss": 0.0594, "step": 164040 }, { "epoch": 4.851540781924647, "grad_norm": 0.7880342602729797, "learning_rate": 2.2055574678074647e-06, "loss": 0.0553, "step": 164050 }, { "epoch": 4.85183651741882, "grad_norm": 1.019406795501709, "learning_rate": 2.205430777888525e-06, "loss": 0.0603, "step": 164060 }, { "epoch": 4.852132252912995, "grad_norm": 1.4242459535598755, "learning_rate": 2.205304087969586e-06, "loss": 0.0749, "step": 164070 }, { "epoch": 4.852427988407168, "grad_norm": 0.9500486850738525, "learning_rate": 2.2051773980506463e-06, "loss": 0.0786, "step": 164080 }, { "epoch": 4.8527237239013425, "grad_norm": 0.6438195109367371, "learning_rate": 2.205050708131707e-06, "loss": 0.0808, "step": 164090 }, { "epoch": 4.853019459395517, "grad_norm": 1.050589919090271, "learning_rate": 2.2049240182127674e-06, "loss": 0.0637, "step": 164100 }, { "epoch": 4.85331519488969, "grad_norm": 0.9425230622291565, "learning_rate": 2.204797328293828e-06, "loss": 0.0749, "step": 164110 }, { "epoch": 4.853610930383865, "grad_norm": 1.0621106624603271, "learning_rate": 2.2046706383748886e-06, "loss": 0.0702, "step": 164120 }, { "epoch": 4.853906665878038, "grad_norm": 1.0757330656051636, "learning_rate": 2.2045439484559498e-06, "loss": 0.0754, "step": 164130 }, { "epoch": 4.854202401372213, "grad_norm": 0.8793514370918274, "learning_rate": 2.20441725853701e-06, "loss": 0.0832, "step": 164140 }, { "epoch": 4.854498136866386, "grad_norm": 0.6263942122459412, "learning_rate": 2.204290568618071e-06, "loss": 0.0552, "step": 164150 }, { "epoch": 4.854793872360561, "grad_norm": 0.5814727544784546, "learning_rate": 2.2041638786991313e-06, "loss": 0.0606, "step": 164160 }, { "epoch": 4.855089607854735, "grad_norm": 1.529119610786438, "learning_rate": 2.204037188780192e-06, "loss": 0.0907, "step": 164170 }, { "epoch": 4.8553853433489085, "grad_norm": 0.5734904408454895, "learning_rate": 2.2039104988612525e-06, "loss": 0.0712, "step": 164180 }, { "epoch": 4.855681078843083, "grad_norm": 0.9637226462364197, "learning_rate": 2.2037838089423132e-06, "loss": 0.0771, "step": 164190 }, { "epoch": 4.855976814337257, "grad_norm": 0.9003956317901611, "learning_rate": 2.2036571190233736e-06, "loss": 0.0751, "step": 164200 }, { "epoch": 4.856272549831431, "grad_norm": 0.6913411021232605, "learning_rate": 2.203530429104435e-06, "loss": 0.0663, "step": 164210 }, { "epoch": 4.856568285325605, "grad_norm": 1.1315099000930786, "learning_rate": 2.203403739185495e-06, "loss": 0.0845, "step": 164220 }, { "epoch": 4.856864020819779, "grad_norm": 1.5840885639190674, "learning_rate": 2.203277049266556e-06, "loss": 0.0849, "step": 164230 }, { "epoch": 4.857159756313953, "grad_norm": 0.8384872674942017, "learning_rate": 2.2031503593476163e-06, "loss": 0.062, "step": 164240 }, { "epoch": 4.8574554918081265, "grad_norm": 0.4695151746273041, "learning_rate": 2.203023669428677e-06, "loss": 0.0692, "step": 164250 }, { "epoch": 4.857751227302301, "grad_norm": 0.9627184867858887, "learning_rate": 2.2028969795097375e-06, "loss": 0.0562, "step": 164260 }, { "epoch": 4.858046962796475, "grad_norm": 1.1018054485321045, "learning_rate": 2.2027702895907983e-06, "loss": 0.0735, "step": 164270 }, { "epoch": 4.858342698290649, "grad_norm": 1.1603270769119263, "learning_rate": 2.2026435996718587e-06, "loss": 0.0898, "step": 164280 }, { "epoch": 4.858638433784823, "grad_norm": 0.5793856978416443, "learning_rate": 2.20251690975292e-06, "loss": 0.0669, "step": 164290 }, { "epoch": 4.858934169278997, "grad_norm": 0.4818252921104431, "learning_rate": 2.2023902198339802e-06, "loss": 0.0653, "step": 164300 }, { "epoch": 4.859229904773171, "grad_norm": 1.4489083290100098, "learning_rate": 2.202263529915041e-06, "loss": 0.0713, "step": 164310 }, { "epoch": 4.859525640267345, "grad_norm": 0.8294099569320679, "learning_rate": 2.2021368399961014e-06, "loss": 0.0768, "step": 164320 }, { "epoch": 4.859821375761519, "grad_norm": 0.8193306922912598, "learning_rate": 2.202010150077162e-06, "loss": 0.0792, "step": 164330 }, { "epoch": 4.860117111255693, "grad_norm": 0.9247968196868896, "learning_rate": 2.2018834601582226e-06, "loss": 0.0658, "step": 164340 }, { "epoch": 4.860412846749867, "grad_norm": 0.9898508191108704, "learning_rate": 2.2017567702392833e-06, "loss": 0.062, "step": 164350 }, { "epoch": 4.860708582244041, "grad_norm": 0.69734787940979, "learning_rate": 2.2016300803203437e-06, "loss": 0.0652, "step": 164360 }, { "epoch": 4.861004317738215, "grad_norm": 0.6105204224586487, "learning_rate": 2.201503390401405e-06, "loss": 0.0768, "step": 164370 }, { "epoch": 4.861300053232389, "grad_norm": 1.0379379987716675, "learning_rate": 2.2013767004824653e-06, "loss": 0.0682, "step": 164380 }, { "epoch": 4.861595788726563, "grad_norm": 0.6525388360023499, "learning_rate": 2.2012500105635257e-06, "loss": 0.0621, "step": 164390 }, { "epoch": 4.861891524220737, "grad_norm": 0.6119933128356934, "learning_rate": 2.2011233206445864e-06, "loss": 0.0643, "step": 164400 }, { "epoch": 4.862187259714911, "grad_norm": 0.8502643704414368, "learning_rate": 2.200996630725647e-06, "loss": 0.0822, "step": 164410 }, { "epoch": 4.862482995209085, "grad_norm": 1.417341709136963, "learning_rate": 2.2008699408067076e-06, "loss": 0.0616, "step": 164420 }, { "epoch": 4.862778730703259, "grad_norm": 0.43463507294654846, "learning_rate": 2.200743250887768e-06, "loss": 0.0784, "step": 164430 }, { "epoch": 4.863074466197433, "grad_norm": 2.247692584991455, "learning_rate": 2.2006165609688288e-06, "loss": 0.0767, "step": 164440 }, { "epoch": 4.863370201691607, "grad_norm": 0.9541464447975159, "learning_rate": 2.2004898710498895e-06, "loss": 0.0725, "step": 164450 }, { "epoch": 4.863665937185781, "grad_norm": 1.2078431844711304, "learning_rate": 2.2003631811309503e-06, "loss": 0.0653, "step": 164460 }, { "epoch": 4.863961672679955, "grad_norm": 0.898030698299408, "learning_rate": 2.2002364912120107e-06, "loss": 0.0612, "step": 164470 }, { "epoch": 4.864257408174129, "grad_norm": 0.390000581741333, "learning_rate": 2.2001098012930715e-06, "loss": 0.0783, "step": 164480 }, { "epoch": 4.864553143668303, "grad_norm": 0.539581298828125, "learning_rate": 2.199983111374132e-06, "loss": 0.0619, "step": 164490 }, { "epoch": 4.864848879162477, "grad_norm": 0.918004035949707, "learning_rate": 2.1998564214551926e-06, "loss": 0.0607, "step": 164500 }, { "epoch": 4.865144614656651, "grad_norm": 0.9815563559532166, "learning_rate": 2.199729731536253e-06, "loss": 0.066, "step": 164510 }, { "epoch": 4.865440350150825, "grad_norm": 0.9112265110015869, "learning_rate": 2.199603041617314e-06, "loss": 0.0706, "step": 164520 }, { "epoch": 4.865736085644999, "grad_norm": 1.1846240758895874, "learning_rate": 2.1994763516983746e-06, "loss": 0.0694, "step": 164530 }, { "epoch": 4.866031821139173, "grad_norm": 0.7684186697006226, "learning_rate": 2.1993496617794354e-06, "loss": 0.0744, "step": 164540 }, { "epoch": 4.8663275566333475, "grad_norm": 0.691590428352356, "learning_rate": 2.1992229718604957e-06, "loss": 0.0522, "step": 164550 }, { "epoch": 4.866623292127521, "grad_norm": 1.2462427616119385, "learning_rate": 2.1990962819415565e-06, "loss": 0.076, "step": 164560 }, { "epoch": 4.866919027621695, "grad_norm": 1.183724284172058, "learning_rate": 2.198969592022617e-06, "loss": 0.0803, "step": 164570 }, { "epoch": 4.867214763115869, "grad_norm": 1.3862756490707397, "learning_rate": 2.1988429021036777e-06, "loss": 0.0744, "step": 164580 }, { "epoch": 4.867510498610043, "grad_norm": 1.3878200054168701, "learning_rate": 2.198716212184738e-06, "loss": 0.0681, "step": 164590 }, { "epoch": 4.867806234104217, "grad_norm": 0.810262143611908, "learning_rate": 2.198589522265799e-06, "loss": 0.0543, "step": 164600 }, { "epoch": 4.868101969598391, "grad_norm": 0.7588873505592346, "learning_rate": 2.1984628323468596e-06, "loss": 0.0608, "step": 164610 }, { "epoch": 4.8683977050925655, "grad_norm": 0.633195698261261, "learning_rate": 2.1983361424279204e-06, "loss": 0.0701, "step": 164620 }, { "epoch": 4.868693440586739, "grad_norm": 0.9925492405891418, "learning_rate": 2.1982094525089808e-06, "loss": 0.0686, "step": 164630 }, { "epoch": 4.868989176080913, "grad_norm": 0.9218592047691345, "learning_rate": 2.1980827625900416e-06, "loss": 0.0651, "step": 164640 }, { "epoch": 4.869284911575087, "grad_norm": 0.504291832447052, "learning_rate": 2.197956072671102e-06, "loss": 0.0478, "step": 164650 }, { "epoch": 4.869580647069261, "grad_norm": 1.5821092128753662, "learning_rate": 2.1978293827521627e-06, "loss": 0.0776, "step": 164660 }, { "epoch": 4.869876382563435, "grad_norm": 1.101431965827942, "learning_rate": 2.197702692833223e-06, "loss": 0.0844, "step": 164670 }, { "epoch": 4.870172118057609, "grad_norm": 1.1907060146331787, "learning_rate": 2.197576002914284e-06, "loss": 0.0717, "step": 164680 }, { "epoch": 4.870467853551784, "grad_norm": 0.8426060676574707, "learning_rate": 2.1974493129953447e-06, "loss": 0.0719, "step": 164690 }, { "epoch": 4.870763589045957, "grad_norm": 0.7002421617507935, "learning_rate": 2.1973226230764055e-06, "loss": 0.0693, "step": 164700 }, { "epoch": 4.8710593245401315, "grad_norm": 0.7307029962539673, "learning_rate": 2.197195933157466e-06, "loss": 0.0654, "step": 164710 }, { "epoch": 4.871355060034305, "grad_norm": 1.2234702110290527, "learning_rate": 2.1970692432385266e-06, "loss": 0.0811, "step": 164720 }, { "epoch": 4.871650795528479, "grad_norm": 0.5470285415649414, "learning_rate": 2.196942553319587e-06, "loss": 0.0613, "step": 164730 }, { "epoch": 4.871946531022653, "grad_norm": 0.6617609262466431, "learning_rate": 2.1968158634006478e-06, "loss": 0.0586, "step": 164740 }, { "epoch": 4.872242266516827, "grad_norm": 1.1589610576629639, "learning_rate": 2.196689173481708e-06, "loss": 0.0619, "step": 164750 }, { "epoch": 4.872538002011002, "grad_norm": 0.9981172680854797, "learning_rate": 2.196562483562769e-06, "loss": 0.0744, "step": 164760 }, { "epoch": 4.872833737505175, "grad_norm": 1.4673762321472168, "learning_rate": 2.1964357936438297e-06, "loss": 0.075, "step": 164770 }, { "epoch": 4.8731294729993495, "grad_norm": 0.8899680972099304, "learning_rate": 2.1963091037248905e-06, "loss": 0.0881, "step": 164780 }, { "epoch": 4.873425208493523, "grad_norm": 1.3245526552200317, "learning_rate": 2.196182413805951e-06, "loss": 0.0748, "step": 164790 }, { "epoch": 4.873720943987697, "grad_norm": 0.643968403339386, "learning_rate": 2.1960557238870112e-06, "loss": 0.0669, "step": 164800 }, { "epoch": 4.874016679481872, "grad_norm": 1.185592532157898, "learning_rate": 2.195929033968072e-06, "loss": 0.0889, "step": 164810 }, { "epoch": 4.874312414976045, "grad_norm": 0.7436279654502869, "learning_rate": 2.1958023440491324e-06, "loss": 0.0808, "step": 164820 }, { "epoch": 4.87460815047022, "grad_norm": 1.315261960029602, "learning_rate": 2.195675654130193e-06, "loss": 0.0647, "step": 164830 }, { "epoch": 4.874903885964393, "grad_norm": 0.6756170392036438, "learning_rate": 2.1955489642112536e-06, "loss": 0.0529, "step": 164840 }, { "epoch": 4.875199621458568, "grad_norm": 1.2452672719955444, "learning_rate": 2.1954222742923148e-06, "loss": 0.0597, "step": 164850 }, { "epoch": 4.875495356952742, "grad_norm": 0.9509993195533752, "learning_rate": 2.195295584373375e-06, "loss": 0.0653, "step": 164860 }, { "epoch": 4.875791092446915, "grad_norm": 0.8221637010574341, "learning_rate": 2.195168894454436e-06, "loss": 0.0631, "step": 164870 }, { "epoch": 4.87608682794109, "grad_norm": 1.0652250051498413, "learning_rate": 2.1950422045354963e-06, "loss": 0.0691, "step": 164880 }, { "epoch": 4.876382563435263, "grad_norm": 0.7238380312919617, "learning_rate": 2.194915514616557e-06, "loss": 0.0689, "step": 164890 }, { "epoch": 4.876678298929438, "grad_norm": 1.5135639905929565, "learning_rate": 2.1947888246976174e-06, "loss": 0.0761, "step": 164900 }, { "epoch": 4.876974034423611, "grad_norm": 0.7018762230873108, "learning_rate": 2.1946621347786782e-06, "loss": 0.0713, "step": 164910 }, { "epoch": 4.877269769917786, "grad_norm": 0.5717940926551819, "learning_rate": 2.1945354448597386e-06, "loss": 0.0694, "step": 164920 }, { "epoch": 4.87756550541196, "grad_norm": 0.5281595587730408, "learning_rate": 2.1944087549408e-06, "loss": 0.0664, "step": 164930 }, { "epoch": 4.8778612409061335, "grad_norm": 0.7471169233322144, "learning_rate": 2.19428206502186e-06, "loss": 0.0804, "step": 164940 }, { "epoch": 4.878156976400308, "grad_norm": 0.9090285301208496, "learning_rate": 2.194155375102921e-06, "loss": 0.057, "step": 164950 }, { "epoch": 4.878452711894481, "grad_norm": 0.7559913992881775, "learning_rate": 2.1940286851839813e-06, "loss": 0.0563, "step": 164960 }, { "epoch": 4.878748447388656, "grad_norm": 1.2923253774642944, "learning_rate": 2.193901995265042e-06, "loss": 0.0795, "step": 164970 }, { "epoch": 4.879044182882829, "grad_norm": 0.9624623656272888, "learning_rate": 2.1937753053461025e-06, "loss": 0.0735, "step": 164980 }, { "epoch": 4.879339918377004, "grad_norm": 0.48801669478416443, "learning_rate": 2.1936486154271633e-06, "loss": 0.0656, "step": 164990 }, { "epoch": 4.879635653871178, "grad_norm": 0.9081778526306152, "learning_rate": 2.1935219255082236e-06, "loss": 0.0756, "step": 165000 }, { "epoch": 4.8799313893653515, "grad_norm": 0.894016683101654, "learning_rate": 2.193395235589285e-06, "loss": 0.0607, "step": 165010 }, { "epoch": 4.880227124859526, "grad_norm": 1.5346471071243286, "learning_rate": 2.1932685456703452e-06, "loss": 0.0794, "step": 165020 }, { "epoch": 4.880522860353699, "grad_norm": 0.8012959361076355, "learning_rate": 2.193141855751406e-06, "loss": 0.0624, "step": 165030 }, { "epoch": 4.880818595847874, "grad_norm": 0.7099512219429016, "learning_rate": 2.1930151658324664e-06, "loss": 0.0742, "step": 165040 }, { "epoch": 4.881114331342047, "grad_norm": 0.6430708169937134, "learning_rate": 2.192888475913527e-06, "loss": 0.0638, "step": 165050 }, { "epoch": 4.881410066836222, "grad_norm": 0.9217867851257324, "learning_rate": 2.1927617859945875e-06, "loss": 0.0616, "step": 165060 }, { "epoch": 4.881705802330396, "grad_norm": 0.7877830266952515, "learning_rate": 2.1926350960756483e-06, "loss": 0.0675, "step": 165070 }, { "epoch": 4.88200153782457, "grad_norm": 1.154126524925232, "learning_rate": 2.1925084061567087e-06, "loss": 0.0747, "step": 165080 }, { "epoch": 4.882297273318744, "grad_norm": 0.6187244653701782, "learning_rate": 2.19238171623777e-06, "loss": 0.0652, "step": 165090 }, { "epoch": 4.8825930088129175, "grad_norm": 0.644697368144989, "learning_rate": 2.1922550263188303e-06, "loss": 0.0451, "step": 165100 }, { "epoch": 4.882888744307092, "grad_norm": 1.6641802787780762, "learning_rate": 2.192128336399891e-06, "loss": 0.0748, "step": 165110 }, { "epoch": 4.883184479801265, "grad_norm": 0.6455410718917847, "learning_rate": 2.1920016464809514e-06, "loss": 0.0739, "step": 165120 }, { "epoch": 4.88348021529544, "grad_norm": 0.796461284160614, "learning_rate": 2.1918749565620122e-06, "loss": 0.0746, "step": 165130 }, { "epoch": 4.883775950789614, "grad_norm": 1.2670660018920898, "learning_rate": 2.1917482666430726e-06, "loss": 0.0694, "step": 165140 }, { "epoch": 4.884071686283788, "grad_norm": 0.985431432723999, "learning_rate": 2.1916215767241334e-06, "loss": 0.0636, "step": 165150 }, { "epoch": 4.884367421777962, "grad_norm": 1.3641602993011475, "learning_rate": 2.1914948868051937e-06, "loss": 0.0665, "step": 165160 }, { "epoch": 4.8846631572721355, "grad_norm": 1.273805856704712, "learning_rate": 2.191368196886255e-06, "loss": 0.0756, "step": 165170 }, { "epoch": 4.88495889276631, "grad_norm": 0.9924094676971436, "learning_rate": 2.1912415069673153e-06, "loss": 0.0772, "step": 165180 }, { "epoch": 4.885254628260483, "grad_norm": 0.8160341382026672, "learning_rate": 2.191114817048376e-06, "loss": 0.0728, "step": 165190 }, { "epoch": 4.885550363754658, "grad_norm": 1.011922001838684, "learning_rate": 2.1909881271294365e-06, "loss": 0.0644, "step": 165200 }, { "epoch": 4.885846099248832, "grad_norm": 1.1323164701461792, "learning_rate": 2.190861437210497e-06, "loss": 0.0713, "step": 165210 }, { "epoch": 4.886141834743006, "grad_norm": 0.7634323239326477, "learning_rate": 2.1907347472915576e-06, "loss": 0.0819, "step": 165220 }, { "epoch": 4.88643757023718, "grad_norm": 2.267172336578369, "learning_rate": 2.190608057372618e-06, "loss": 0.0677, "step": 165230 }, { "epoch": 4.886733305731354, "grad_norm": 1.306837797164917, "learning_rate": 2.1904813674536788e-06, "loss": 0.0738, "step": 165240 }, { "epoch": 4.887029041225528, "grad_norm": 0.5917859673500061, "learning_rate": 2.1903546775347396e-06, "loss": 0.0594, "step": 165250 }, { "epoch": 4.8873247767197014, "grad_norm": 0.996395468711853, "learning_rate": 2.1902279876158004e-06, "loss": 0.0572, "step": 165260 }, { "epoch": 4.887620512213876, "grad_norm": 1.3467522859573364, "learning_rate": 2.1901012976968607e-06, "loss": 0.0752, "step": 165270 }, { "epoch": 4.88791624770805, "grad_norm": 1.013202428817749, "learning_rate": 2.1899746077779215e-06, "loss": 0.0568, "step": 165280 }, { "epoch": 4.888211983202224, "grad_norm": 0.959160327911377, "learning_rate": 2.189847917858982e-06, "loss": 0.0709, "step": 165290 }, { "epoch": 4.888507718696398, "grad_norm": 0.5276565551757812, "learning_rate": 2.1897212279400427e-06, "loss": 0.0587, "step": 165300 }, { "epoch": 4.888803454190572, "grad_norm": 1.7307387590408325, "learning_rate": 2.189594538021103e-06, "loss": 0.0799, "step": 165310 }, { "epoch": 4.889099189684746, "grad_norm": 1.4011378288269043, "learning_rate": 2.189467848102164e-06, "loss": 0.0734, "step": 165320 }, { "epoch": 4.8893949251789195, "grad_norm": 1.073421597480774, "learning_rate": 2.1893411581832246e-06, "loss": 0.0759, "step": 165330 }, { "epoch": 4.889690660673094, "grad_norm": 0.825543224811554, "learning_rate": 2.1892144682642854e-06, "loss": 0.0664, "step": 165340 }, { "epoch": 4.889986396167268, "grad_norm": 1.0546194314956665, "learning_rate": 2.1890877783453458e-06, "loss": 0.0664, "step": 165350 }, { "epoch": 4.890282131661442, "grad_norm": 1.1625639200210571, "learning_rate": 2.1889610884264066e-06, "loss": 0.0576, "step": 165360 }, { "epoch": 4.890577867155616, "grad_norm": 1.9391889572143555, "learning_rate": 2.188834398507467e-06, "loss": 0.0718, "step": 165370 }, { "epoch": 4.89087360264979, "grad_norm": 0.904711127281189, "learning_rate": 2.1887077085885277e-06, "loss": 0.0794, "step": 165380 }, { "epoch": 4.891169338143964, "grad_norm": 1.108487844467163, "learning_rate": 2.188581018669588e-06, "loss": 0.071, "step": 165390 }, { "epoch": 4.891465073638138, "grad_norm": 0.937921404838562, "learning_rate": 2.188454328750649e-06, "loss": 0.0572, "step": 165400 }, { "epoch": 4.891760809132312, "grad_norm": 0.9626737833023071, "learning_rate": 2.1883276388317097e-06, "loss": 0.066, "step": 165410 }, { "epoch": 4.892056544626486, "grad_norm": 1.7331337928771973, "learning_rate": 2.1882009489127705e-06, "loss": 0.0725, "step": 165420 }, { "epoch": 4.89235228012066, "grad_norm": 1.346600890159607, "learning_rate": 2.188074258993831e-06, "loss": 0.0763, "step": 165430 }, { "epoch": 4.892648015614834, "grad_norm": 0.6858683228492737, "learning_rate": 2.1879475690748916e-06, "loss": 0.0805, "step": 165440 }, { "epoch": 4.892943751109009, "grad_norm": 0.557522714138031, "learning_rate": 2.187820879155952e-06, "loss": 0.0557, "step": 165450 }, { "epoch": 4.893239486603182, "grad_norm": 1.3095275163650513, "learning_rate": 2.1876941892370128e-06, "loss": 0.069, "step": 165460 }, { "epoch": 4.8935352220973565, "grad_norm": 0.9970222115516663, "learning_rate": 2.187567499318073e-06, "loss": 0.0554, "step": 165470 }, { "epoch": 4.89383095759153, "grad_norm": 0.892547607421875, "learning_rate": 2.187440809399134e-06, "loss": 0.0786, "step": 165480 }, { "epoch": 4.894126693085704, "grad_norm": 1.1565533876419067, "learning_rate": 2.1873141194801947e-06, "loss": 0.0701, "step": 165490 }, { "epoch": 4.894422428579878, "grad_norm": 0.5596340894699097, "learning_rate": 2.1871874295612555e-06, "loss": 0.0494, "step": 165500 }, { "epoch": 4.894718164074052, "grad_norm": 0.9685291647911072, "learning_rate": 2.187060739642316e-06, "loss": 0.0752, "step": 165510 }, { "epoch": 4.895013899568227, "grad_norm": 1.861221194267273, "learning_rate": 2.1869340497233767e-06, "loss": 0.1005, "step": 165520 }, { "epoch": 4.8953096350624, "grad_norm": 0.655407190322876, "learning_rate": 2.186807359804437e-06, "loss": 0.0594, "step": 165530 }, { "epoch": 4.8956053705565745, "grad_norm": 1.5771164894104004, "learning_rate": 2.186680669885498e-06, "loss": 0.0751, "step": 165540 }, { "epoch": 4.895901106050748, "grad_norm": 1.2486540079116821, "learning_rate": 2.186553979966558e-06, "loss": 0.0694, "step": 165550 }, { "epoch": 4.896196841544922, "grad_norm": 1.1223609447479248, "learning_rate": 2.186427290047619e-06, "loss": 0.0675, "step": 165560 }, { "epoch": 4.896492577039096, "grad_norm": 0.8650273680686951, "learning_rate": 2.1863006001286798e-06, "loss": 0.0751, "step": 165570 }, { "epoch": 4.89678831253327, "grad_norm": 0.7453206777572632, "learning_rate": 2.1861739102097405e-06, "loss": 0.0775, "step": 165580 }, { "epoch": 4.897084048027445, "grad_norm": 1.3601446151733398, "learning_rate": 2.186047220290801e-06, "loss": 0.0645, "step": 165590 }, { "epoch": 4.897379783521618, "grad_norm": 0.9560220241546631, "learning_rate": 2.1859205303718617e-06, "loss": 0.0796, "step": 165600 }, { "epoch": 4.897675519015793, "grad_norm": 1.0075563192367554, "learning_rate": 2.185793840452922e-06, "loss": 0.0745, "step": 165610 }, { "epoch": 4.897971254509966, "grad_norm": 0.5979048013687134, "learning_rate": 2.1856671505339824e-06, "loss": 0.0675, "step": 165620 }, { "epoch": 4.8982669900041405, "grad_norm": 0.5694265961647034, "learning_rate": 2.1855404606150432e-06, "loss": 0.0719, "step": 165630 }, { "epoch": 4.898562725498314, "grad_norm": 0.7463915348052979, "learning_rate": 2.1854137706961036e-06, "loss": 0.0753, "step": 165640 }, { "epoch": 4.898858460992488, "grad_norm": 1.9214634895324707, "learning_rate": 2.185287080777165e-06, "loss": 0.0627, "step": 165650 }, { "epoch": 4.899154196486663, "grad_norm": 1.4909999370574951, "learning_rate": 2.185160390858225e-06, "loss": 0.0673, "step": 165660 }, { "epoch": 4.899449931980836, "grad_norm": 0.8346744179725647, "learning_rate": 2.185033700939286e-06, "loss": 0.0794, "step": 165670 }, { "epoch": 4.899745667475011, "grad_norm": 1.1672230958938599, "learning_rate": 2.1849070110203463e-06, "loss": 0.0755, "step": 165680 }, { "epoch": 4.900041402969184, "grad_norm": 0.6712252497673035, "learning_rate": 2.184780321101407e-06, "loss": 0.0504, "step": 165690 }, { "epoch": 4.9003371384633585, "grad_norm": 0.7428914308547974, "learning_rate": 2.1846536311824675e-06, "loss": 0.062, "step": 165700 }, { "epoch": 4.900632873957532, "grad_norm": 1.0156171321868896, "learning_rate": 2.1845269412635283e-06, "loss": 0.0608, "step": 165710 }, { "epoch": 4.900928609451706, "grad_norm": 1.1609203815460205, "learning_rate": 2.1844002513445886e-06, "loss": 0.0757, "step": 165720 }, { "epoch": 4.901224344945881, "grad_norm": 0.6717706322669983, "learning_rate": 2.18427356142565e-06, "loss": 0.0691, "step": 165730 }, { "epoch": 4.901520080440054, "grad_norm": 0.8433135747909546, "learning_rate": 2.18414687150671e-06, "loss": 0.0886, "step": 165740 }, { "epoch": 4.901815815934229, "grad_norm": 0.7432774305343628, "learning_rate": 2.184020181587771e-06, "loss": 0.0526, "step": 165750 }, { "epoch": 4.902111551428402, "grad_norm": 0.9702721238136292, "learning_rate": 2.1838934916688314e-06, "loss": 0.0604, "step": 165760 }, { "epoch": 4.9024072869225765, "grad_norm": 1.2330132722854614, "learning_rate": 2.183766801749892e-06, "loss": 0.0821, "step": 165770 }, { "epoch": 4.90270302241675, "grad_norm": 1.0065330266952515, "learning_rate": 2.1836401118309525e-06, "loss": 0.0831, "step": 165780 }, { "epoch": 4.902998757910924, "grad_norm": 0.7503649592399597, "learning_rate": 2.1835134219120133e-06, "loss": 0.0573, "step": 165790 }, { "epoch": 4.903294493405099, "grad_norm": 0.46777161955833435, "learning_rate": 2.1833867319930737e-06, "loss": 0.0484, "step": 165800 }, { "epoch": 4.903590228899272, "grad_norm": 1.0675231218338013, "learning_rate": 2.183260042074135e-06, "loss": 0.0721, "step": 165810 }, { "epoch": 4.903885964393447, "grad_norm": 1.2181295156478882, "learning_rate": 2.1831333521551953e-06, "loss": 0.0728, "step": 165820 }, { "epoch": 4.90418169988762, "grad_norm": 0.8515839576721191, "learning_rate": 2.183006662236256e-06, "loss": 0.0742, "step": 165830 }, { "epoch": 4.904477435381795, "grad_norm": 0.7918776273727417, "learning_rate": 2.1828799723173164e-06, "loss": 0.0532, "step": 165840 }, { "epoch": 4.904773170875968, "grad_norm": 1.004264235496521, "learning_rate": 2.182753282398377e-06, "loss": 0.0601, "step": 165850 }, { "epoch": 4.9050689063701425, "grad_norm": 1.1605234146118164, "learning_rate": 2.1826265924794376e-06, "loss": 0.071, "step": 165860 }, { "epoch": 4.905364641864317, "grad_norm": 1.5129153728485107, "learning_rate": 2.1824999025604984e-06, "loss": 0.0748, "step": 165870 }, { "epoch": 4.90566037735849, "grad_norm": 0.8401957154273987, "learning_rate": 2.1823732126415587e-06, "loss": 0.0678, "step": 165880 }, { "epoch": 4.905956112852665, "grad_norm": 0.5353199243545532, "learning_rate": 2.18224652272262e-06, "loss": 0.0717, "step": 165890 }, { "epoch": 4.906251848346838, "grad_norm": 0.6694477200508118, "learning_rate": 2.1821198328036803e-06, "loss": 0.0577, "step": 165900 }, { "epoch": 4.906547583841013, "grad_norm": 0.9070515632629395, "learning_rate": 2.181993142884741e-06, "loss": 0.0716, "step": 165910 }, { "epoch": 4.906843319335186, "grad_norm": 0.757830023765564, "learning_rate": 2.1818664529658015e-06, "loss": 0.0767, "step": 165920 }, { "epoch": 4.9071390548293605, "grad_norm": 0.7200834155082703, "learning_rate": 2.1817397630468622e-06, "loss": 0.0687, "step": 165930 }, { "epoch": 4.907434790323535, "grad_norm": 1.168054461479187, "learning_rate": 2.1816130731279226e-06, "loss": 0.0836, "step": 165940 }, { "epoch": 4.907730525817708, "grad_norm": 1.0153361558914185, "learning_rate": 2.1814863832089834e-06, "loss": 0.053, "step": 165950 }, { "epoch": 4.908026261311883, "grad_norm": 1.1409528255462646, "learning_rate": 2.1813596932900438e-06, "loss": 0.054, "step": 165960 }, { "epoch": 4.908321996806056, "grad_norm": 0.8801071643829346, "learning_rate": 2.181233003371105e-06, "loss": 0.075, "step": 165970 }, { "epoch": 4.908617732300231, "grad_norm": 0.6791715025901794, "learning_rate": 2.1811063134521653e-06, "loss": 0.0757, "step": 165980 }, { "epoch": 4.908913467794405, "grad_norm": 0.8894743323326111, "learning_rate": 2.180979623533226e-06, "loss": 0.0653, "step": 165990 }, { "epoch": 4.909209203288579, "grad_norm": 1.4067986011505127, "learning_rate": 2.1808529336142865e-06, "loss": 0.0572, "step": 166000 }, { "epoch": 4.909504938782753, "grad_norm": 1.2991760969161987, "learning_rate": 2.1807262436953473e-06, "loss": 0.0758, "step": 166010 }, { "epoch": 4.9098006742769265, "grad_norm": 1.0601252317428589, "learning_rate": 2.1805995537764077e-06, "loss": 0.0647, "step": 166020 }, { "epoch": 4.910096409771101, "grad_norm": 0.6702587604522705, "learning_rate": 2.1804728638574684e-06, "loss": 0.0597, "step": 166030 }, { "epoch": 4.910392145265275, "grad_norm": 0.944506824016571, "learning_rate": 2.180346173938529e-06, "loss": 0.0712, "step": 166040 }, { "epoch": 4.910687880759449, "grad_norm": 0.975921630859375, "learning_rate": 2.1802194840195896e-06, "loss": 0.0645, "step": 166050 }, { "epoch": 4.910983616253623, "grad_norm": 0.9670097827911377, "learning_rate": 2.1800927941006504e-06, "loss": 0.0718, "step": 166060 }, { "epoch": 4.911279351747797, "grad_norm": 1.4809050559997559, "learning_rate": 2.1799661041817108e-06, "loss": 0.0799, "step": 166070 }, { "epoch": 4.911575087241971, "grad_norm": 0.7243709564208984, "learning_rate": 2.1798394142627715e-06, "loss": 0.0722, "step": 166080 }, { "epoch": 4.9118708227361445, "grad_norm": 1.0034536123275757, "learning_rate": 2.179712724343832e-06, "loss": 0.0607, "step": 166090 }, { "epoch": 4.912166558230319, "grad_norm": 0.487783282995224, "learning_rate": 2.1795860344248927e-06, "loss": 0.0696, "step": 166100 }, { "epoch": 4.912462293724493, "grad_norm": 0.9429073333740234, "learning_rate": 2.179459344505953e-06, "loss": 0.0674, "step": 166110 }, { "epoch": 4.912758029218667, "grad_norm": 0.9789155721664429, "learning_rate": 2.179332654587014e-06, "loss": 0.077, "step": 166120 }, { "epoch": 4.913053764712841, "grad_norm": 0.9207156300544739, "learning_rate": 2.1792059646680746e-06, "loss": 0.0884, "step": 166130 }, { "epoch": 4.913349500207015, "grad_norm": 0.9822036623954773, "learning_rate": 2.1790792747491354e-06, "loss": 0.0679, "step": 166140 }, { "epoch": 4.913645235701189, "grad_norm": 0.7640188932418823, "learning_rate": 2.178952584830196e-06, "loss": 0.0736, "step": 166150 }, { "epoch": 4.9139409711953626, "grad_norm": 0.7814347743988037, "learning_rate": 2.1788258949112566e-06, "loss": 0.0655, "step": 166160 }, { "epoch": 4.914236706689537, "grad_norm": 0.8342772126197815, "learning_rate": 2.178699204992317e-06, "loss": 0.0746, "step": 166170 }, { "epoch": 4.914532442183711, "grad_norm": 2.1529383659362793, "learning_rate": 2.1785725150733777e-06, "loss": 0.0863, "step": 166180 }, { "epoch": 4.914828177677885, "grad_norm": 0.5383344292640686, "learning_rate": 2.178445825154438e-06, "loss": 0.0622, "step": 166190 }, { "epoch": 4.915123913172059, "grad_norm": 1.048028588294983, "learning_rate": 2.178319135235499e-06, "loss": 0.0657, "step": 166200 }, { "epoch": 4.915419648666233, "grad_norm": 1.1594164371490479, "learning_rate": 2.1781924453165597e-06, "loss": 0.0672, "step": 166210 }, { "epoch": 4.915715384160407, "grad_norm": 1.2106722593307495, "learning_rate": 2.1780657553976205e-06, "loss": 0.0778, "step": 166220 }, { "epoch": 4.916011119654581, "grad_norm": 0.7548385262489319, "learning_rate": 2.177939065478681e-06, "loss": 0.0734, "step": 166230 }, { "epoch": 4.916306855148755, "grad_norm": 1.004098892211914, "learning_rate": 2.1778123755597416e-06, "loss": 0.0683, "step": 166240 }, { "epoch": 4.916602590642929, "grad_norm": 1.397778034210205, "learning_rate": 2.177685685640802e-06, "loss": 0.0569, "step": 166250 }, { "epoch": 4.916898326137103, "grad_norm": 0.5377439260482788, "learning_rate": 2.177558995721863e-06, "loss": 0.0605, "step": 166260 }, { "epoch": 4.917194061631277, "grad_norm": 0.8237380385398865, "learning_rate": 2.177432305802923e-06, "loss": 0.0658, "step": 166270 }, { "epoch": 4.917489797125451, "grad_norm": 1.13063645362854, "learning_rate": 2.177305615883984e-06, "loss": 0.0762, "step": 166280 }, { "epoch": 4.917785532619625, "grad_norm": 0.7933874726295471, "learning_rate": 2.1771789259650447e-06, "loss": 0.0729, "step": 166290 }, { "epoch": 4.918081268113799, "grad_norm": 0.9106969237327576, "learning_rate": 2.1770522360461055e-06, "loss": 0.0623, "step": 166300 }, { "epoch": 4.918377003607973, "grad_norm": 0.8214962482452393, "learning_rate": 2.176925546127166e-06, "loss": 0.0601, "step": 166310 }, { "epoch": 4.918672739102147, "grad_norm": 0.7320809960365295, "learning_rate": 2.1767988562082267e-06, "loss": 0.0645, "step": 166320 }, { "epoch": 4.918968474596321, "grad_norm": 0.8010920286178589, "learning_rate": 2.176672166289287e-06, "loss": 0.0824, "step": 166330 }, { "epoch": 4.919264210090495, "grad_norm": 1.05497145652771, "learning_rate": 2.176545476370348e-06, "loss": 0.057, "step": 166340 }, { "epoch": 4.919559945584669, "grad_norm": 0.6399857401847839, "learning_rate": 2.176418786451408e-06, "loss": 0.0626, "step": 166350 }, { "epoch": 4.919855681078843, "grad_norm": 0.8805009126663208, "learning_rate": 2.176292096532469e-06, "loss": 0.0659, "step": 166360 }, { "epoch": 4.920151416573017, "grad_norm": 0.970991313457489, "learning_rate": 2.1761654066135298e-06, "loss": 0.071, "step": 166370 }, { "epoch": 4.920447152067191, "grad_norm": 0.9675703644752502, "learning_rate": 2.1760387166945906e-06, "loss": 0.0707, "step": 166380 }, { "epoch": 4.9207428875613655, "grad_norm": 1.0119075775146484, "learning_rate": 2.175912026775651e-06, "loss": 0.0689, "step": 166390 }, { "epoch": 4.921038623055539, "grad_norm": 0.9337997436523438, "learning_rate": 2.1757853368567117e-06, "loss": 0.0785, "step": 166400 }, { "epoch": 4.921334358549713, "grad_norm": 1.319506049156189, "learning_rate": 2.175658646937772e-06, "loss": 0.0768, "step": 166410 }, { "epoch": 4.921630094043887, "grad_norm": 1.1953065395355225, "learning_rate": 2.175531957018833e-06, "loss": 0.0828, "step": 166420 }, { "epoch": 4.921925829538061, "grad_norm": 1.0799760818481445, "learning_rate": 2.1754052670998932e-06, "loss": 0.0621, "step": 166430 }, { "epoch": 4.922221565032235, "grad_norm": 1.1320843696594238, "learning_rate": 2.175278577180954e-06, "loss": 0.077, "step": 166440 }, { "epoch": 4.922517300526409, "grad_norm": 0.9732047915458679, "learning_rate": 2.175151887262015e-06, "loss": 0.0675, "step": 166450 }, { "epoch": 4.9228130360205835, "grad_norm": 0.749571681022644, "learning_rate": 2.175025197343075e-06, "loss": 0.0679, "step": 166460 }, { "epoch": 4.923108771514757, "grad_norm": 1.1489758491516113, "learning_rate": 2.174898507424136e-06, "loss": 0.0652, "step": 166470 }, { "epoch": 4.923404507008931, "grad_norm": 1.479258418083191, "learning_rate": 2.1747718175051963e-06, "loss": 0.08, "step": 166480 }, { "epoch": 4.923700242503105, "grad_norm": 0.7606186866760254, "learning_rate": 2.174645127586257e-06, "loss": 0.0561, "step": 166490 }, { "epoch": 4.923995977997279, "grad_norm": 0.6846698522567749, "learning_rate": 2.1745184376673175e-06, "loss": 0.0727, "step": 166500 }, { "epoch": 4.924291713491453, "grad_norm": 0.9291477203369141, "learning_rate": 2.1743917477483783e-06, "loss": 0.0714, "step": 166510 }, { "epoch": 4.924587448985627, "grad_norm": 0.8196431398391724, "learning_rate": 2.1742650578294387e-06, "loss": 0.0792, "step": 166520 }, { "epoch": 4.924883184479802, "grad_norm": 1.2920963764190674, "learning_rate": 2.1741383679105e-06, "loss": 0.0836, "step": 166530 }, { "epoch": 4.925178919973975, "grad_norm": 0.9870489835739136, "learning_rate": 2.1740116779915602e-06, "loss": 0.0567, "step": 166540 }, { "epoch": 4.925474655468149, "grad_norm": 0.855872631072998, "learning_rate": 2.173884988072621e-06, "loss": 0.0635, "step": 166550 }, { "epoch": 4.925770390962323, "grad_norm": 1.7095401287078857, "learning_rate": 2.1737582981536814e-06, "loss": 0.0659, "step": 166560 }, { "epoch": 4.926066126456497, "grad_norm": 0.9529018998146057, "learning_rate": 2.173631608234742e-06, "loss": 0.0925, "step": 166570 }, { "epoch": 4.926361861950671, "grad_norm": 0.9214898943901062, "learning_rate": 2.1735049183158025e-06, "loss": 0.0691, "step": 166580 }, { "epoch": 4.926657597444845, "grad_norm": 0.5566343069076538, "learning_rate": 2.1733782283968633e-06, "loss": 0.0645, "step": 166590 }, { "epoch": 4.92695333293902, "grad_norm": 0.5067952871322632, "learning_rate": 2.1732515384779237e-06, "loss": 0.067, "step": 166600 }, { "epoch": 4.927249068433193, "grad_norm": 0.7973870038986206, "learning_rate": 2.173124848558985e-06, "loss": 0.052, "step": 166610 }, { "epoch": 4.9275448039273675, "grad_norm": 0.6791199445724487, "learning_rate": 2.1729981586400453e-06, "loss": 0.0877, "step": 166620 }, { "epoch": 4.927840539421541, "grad_norm": 0.6355628967285156, "learning_rate": 2.172871468721106e-06, "loss": 0.0653, "step": 166630 }, { "epoch": 4.928136274915715, "grad_norm": 0.6556379795074463, "learning_rate": 2.1727447788021664e-06, "loss": 0.0788, "step": 166640 }, { "epoch": 4.92843201040989, "grad_norm": 1.0580629110336304, "learning_rate": 2.1726180888832272e-06, "loss": 0.0565, "step": 166650 }, { "epoch": 4.928727745904063, "grad_norm": 1.0782647132873535, "learning_rate": 2.1724913989642876e-06, "loss": 0.0596, "step": 166660 }, { "epoch": 4.929023481398238, "grad_norm": 0.7703900337219238, "learning_rate": 2.1723647090453484e-06, "loss": 0.0653, "step": 166670 }, { "epoch": 4.929319216892411, "grad_norm": 0.6812962293624878, "learning_rate": 2.1722380191264088e-06, "loss": 0.0639, "step": 166680 }, { "epoch": 4.9296149523865855, "grad_norm": 1.5236165523529053, "learning_rate": 2.17211132920747e-06, "loss": 0.0647, "step": 166690 }, { "epoch": 4.92991068788076, "grad_norm": 0.6607213616371155, "learning_rate": 2.1719846392885303e-06, "loss": 0.0652, "step": 166700 }, { "epoch": 4.930206423374933, "grad_norm": 0.9791616201400757, "learning_rate": 2.171857949369591e-06, "loss": 0.0831, "step": 166710 }, { "epoch": 4.930502158869108, "grad_norm": 1.7671808004379272, "learning_rate": 2.1717312594506515e-06, "loss": 0.0819, "step": 166720 }, { "epoch": 4.930797894363281, "grad_norm": 0.7137010097503662, "learning_rate": 2.1716045695317123e-06, "loss": 0.0754, "step": 166730 }, { "epoch": 4.931093629857456, "grad_norm": 0.5096627473831177, "learning_rate": 2.1714778796127726e-06, "loss": 0.066, "step": 166740 }, { "epoch": 4.931389365351629, "grad_norm": 0.9308146238327026, "learning_rate": 2.1713511896938334e-06, "loss": 0.0695, "step": 166750 }, { "epoch": 4.931685100845804, "grad_norm": 0.9347020983695984, "learning_rate": 2.171224499774894e-06, "loss": 0.0629, "step": 166760 }, { "epoch": 4.931980836339978, "grad_norm": 2.238246202468872, "learning_rate": 2.171097809855955e-06, "loss": 0.084, "step": 166770 }, { "epoch": 4.9322765718341515, "grad_norm": 0.708345890045166, "learning_rate": 2.1709711199370154e-06, "loss": 0.0661, "step": 166780 }, { "epoch": 4.932572307328326, "grad_norm": 1.118434190750122, "learning_rate": 2.170844430018076e-06, "loss": 0.0696, "step": 166790 }, { "epoch": 4.932868042822499, "grad_norm": 0.6357236504554749, "learning_rate": 2.1707177400991365e-06, "loss": 0.0659, "step": 166800 }, { "epoch": 4.933163778316674, "grad_norm": 0.8579325079917908, "learning_rate": 2.1705910501801973e-06, "loss": 0.0668, "step": 166810 }, { "epoch": 4.933459513810847, "grad_norm": 0.8051676154136658, "learning_rate": 2.1704643602612577e-06, "loss": 0.0967, "step": 166820 }, { "epoch": 4.933755249305022, "grad_norm": 0.7898712754249573, "learning_rate": 2.1703376703423185e-06, "loss": 0.0892, "step": 166830 }, { "epoch": 4.934050984799196, "grad_norm": 1.0925124883651733, "learning_rate": 2.170210980423379e-06, "loss": 0.082, "step": 166840 }, { "epoch": 4.9343467202933695, "grad_norm": 0.7792243957519531, "learning_rate": 2.17008429050444e-06, "loss": 0.0468, "step": 166850 }, { "epoch": 4.934642455787544, "grad_norm": 1.0253918170928955, "learning_rate": 2.1699576005855004e-06, "loss": 0.0828, "step": 166860 }, { "epoch": 4.934938191281717, "grad_norm": 1.1381696462631226, "learning_rate": 2.1698309106665608e-06, "loss": 0.0578, "step": 166870 }, { "epoch": 4.935233926775892, "grad_norm": 0.8449164628982544, "learning_rate": 2.1697042207476216e-06, "loss": 0.0673, "step": 166880 }, { "epoch": 4.935529662270065, "grad_norm": 0.7993143200874329, "learning_rate": 2.169577530828682e-06, "loss": 0.0653, "step": 166890 }, { "epoch": 4.93582539776424, "grad_norm": 1.1763639450073242, "learning_rate": 2.1694508409097427e-06, "loss": 0.0554, "step": 166900 }, { "epoch": 4.936121133258414, "grad_norm": 1.302878499031067, "learning_rate": 2.169324150990803e-06, "loss": 0.0567, "step": 166910 }, { "epoch": 4.936416868752588, "grad_norm": 0.9834997653961182, "learning_rate": 2.169197461071864e-06, "loss": 0.0838, "step": 166920 }, { "epoch": 4.936712604246762, "grad_norm": 0.8305772542953491, "learning_rate": 2.1690707711529247e-06, "loss": 0.0738, "step": 166930 }, { "epoch": 4.9370083397409354, "grad_norm": 0.9863437414169312, "learning_rate": 2.1689440812339855e-06, "loss": 0.0803, "step": 166940 }, { "epoch": 4.93730407523511, "grad_norm": 0.8159177303314209, "learning_rate": 2.168817391315046e-06, "loss": 0.0669, "step": 166950 }, { "epoch": 4.937599810729283, "grad_norm": 0.8100953102111816, "learning_rate": 2.1686907013961066e-06, "loss": 0.0589, "step": 166960 }, { "epoch": 4.937895546223458, "grad_norm": 1.013349175453186, "learning_rate": 2.168564011477167e-06, "loss": 0.068, "step": 166970 }, { "epoch": 4.938191281717632, "grad_norm": 0.6078709363937378, "learning_rate": 2.1684373215582278e-06, "loss": 0.0704, "step": 166980 }, { "epoch": 4.938487017211806, "grad_norm": 1.0798696279525757, "learning_rate": 2.168310631639288e-06, "loss": 0.0792, "step": 166990 }, { "epoch": 4.93878275270598, "grad_norm": 0.6736429333686829, "learning_rate": 2.168183941720349e-06, "loss": 0.0722, "step": 167000 }, { "epoch": 4.9390784882001535, "grad_norm": 0.9037219882011414, "learning_rate": 2.1680572518014097e-06, "loss": 0.0656, "step": 167010 }, { "epoch": 4.939374223694328, "grad_norm": 0.999322772026062, "learning_rate": 2.1679305618824705e-06, "loss": 0.108, "step": 167020 }, { "epoch": 4.939669959188501, "grad_norm": 0.6848104596138, "learning_rate": 2.167803871963531e-06, "loss": 0.0793, "step": 167030 }, { "epoch": 4.939965694682676, "grad_norm": 0.856988787651062, "learning_rate": 2.1676771820445917e-06, "loss": 0.0623, "step": 167040 }, { "epoch": 4.94026143017685, "grad_norm": 1.0790691375732422, "learning_rate": 2.167550492125652e-06, "loss": 0.0516, "step": 167050 }, { "epoch": 4.940557165671024, "grad_norm": 1.0327659845352173, "learning_rate": 2.167423802206713e-06, "loss": 0.0625, "step": 167060 }, { "epoch": 4.940852901165198, "grad_norm": 0.7704865336418152, "learning_rate": 2.167297112287773e-06, "loss": 0.0619, "step": 167070 }, { "epoch": 4.9411486366593715, "grad_norm": 0.7399687767028809, "learning_rate": 2.167170422368834e-06, "loss": 0.0697, "step": 167080 }, { "epoch": 4.941444372153546, "grad_norm": 1.1612907648086548, "learning_rate": 2.1670437324498948e-06, "loss": 0.0675, "step": 167090 }, { "epoch": 4.941740107647719, "grad_norm": 1.291681170463562, "learning_rate": 2.1669170425309556e-06, "loss": 0.0714, "step": 167100 }, { "epoch": 4.942035843141894, "grad_norm": 1.064286470413208, "learning_rate": 2.166790352612016e-06, "loss": 0.0674, "step": 167110 }, { "epoch": 4.942331578636068, "grad_norm": 1.0769319534301758, "learning_rate": 2.1666636626930767e-06, "loss": 0.0778, "step": 167120 }, { "epoch": 4.942627314130242, "grad_norm": 0.8961706757545471, "learning_rate": 2.166536972774137e-06, "loss": 0.0953, "step": 167130 }, { "epoch": 4.942923049624416, "grad_norm": 1.0387276411056519, "learning_rate": 2.166410282855198e-06, "loss": 0.0559, "step": 167140 }, { "epoch": 4.94321878511859, "grad_norm": 0.5288577675819397, "learning_rate": 2.1662835929362582e-06, "loss": 0.0645, "step": 167150 }, { "epoch": 4.943514520612764, "grad_norm": 0.9508105516433716, "learning_rate": 2.166156903017319e-06, "loss": 0.0632, "step": 167160 }, { "epoch": 4.9438102561069375, "grad_norm": 0.7988097667694092, "learning_rate": 2.16603021309838e-06, "loss": 0.0887, "step": 167170 }, { "epoch": 4.944105991601112, "grad_norm": 0.6891403794288635, "learning_rate": 2.1659035231794406e-06, "loss": 0.0736, "step": 167180 }, { "epoch": 4.944401727095286, "grad_norm": 0.6290221214294434, "learning_rate": 2.165776833260501e-06, "loss": 0.0585, "step": 167190 }, { "epoch": 4.94469746258946, "grad_norm": 0.8327507972717285, "learning_rate": 2.1656501433415618e-06, "loss": 0.0549, "step": 167200 }, { "epoch": 4.944993198083634, "grad_norm": 0.8989861011505127, "learning_rate": 2.165523453422622e-06, "loss": 0.0683, "step": 167210 }, { "epoch": 4.945288933577808, "grad_norm": 1.0270590782165527, "learning_rate": 2.165396763503683e-06, "loss": 0.0863, "step": 167220 }, { "epoch": 4.945584669071982, "grad_norm": 0.726977527141571, "learning_rate": 2.1652700735847433e-06, "loss": 0.0673, "step": 167230 }, { "epoch": 4.945880404566156, "grad_norm": 0.5737646222114563, "learning_rate": 2.165143383665804e-06, "loss": 0.0628, "step": 167240 }, { "epoch": 4.94617614006033, "grad_norm": 1.1078698635101318, "learning_rate": 2.165016693746865e-06, "loss": 0.0606, "step": 167250 }, { "epoch": 4.946471875554504, "grad_norm": 1.1043200492858887, "learning_rate": 2.1648900038279256e-06, "loss": 0.0888, "step": 167260 }, { "epoch": 4.946767611048678, "grad_norm": 1.4248580932617188, "learning_rate": 2.164763313908986e-06, "loss": 0.0829, "step": 167270 }, { "epoch": 4.947063346542852, "grad_norm": 0.9996803402900696, "learning_rate": 2.1646366239900464e-06, "loss": 0.0847, "step": 167280 }, { "epoch": 4.947359082037027, "grad_norm": 0.9091197848320007, "learning_rate": 2.164509934071107e-06, "loss": 0.0707, "step": 167290 }, { "epoch": 4.9476548175312, "grad_norm": 0.5751692056655884, "learning_rate": 2.1643832441521675e-06, "loss": 0.0714, "step": 167300 }, { "epoch": 4.9479505530253745, "grad_norm": 0.6764256954193115, "learning_rate": 2.1642565542332283e-06, "loss": 0.0646, "step": 167310 }, { "epoch": 4.948246288519548, "grad_norm": 0.6920072436332703, "learning_rate": 2.1641298643142887e-06, "loss": 0.0777, "step": 167320 }, { "epoch": 4.948542024013722, "grad_norm": 0.5876674056053162, "learning_rate": 2.16400317439535e-06, "loss": 0.067, "step": 167330 }, { "epoch": 4.948837759507896, "grad_norm": 0.9289448261260986, "learning_rate": 2.1638764844764103e-06, "loss": 0.0703, "step": 167340 }, { "epoch": 4.94913349500207, "grad_norm": 1.5994765758514404, "learning_rate": 2.163749794557471e-06, "loss": 0.0576, "step": 167350 }, { "epoch": 4.949429230496245, "grad_norm": 1.0391802787780762, "learning_rate": 2.1636231046385314e-06, "loss": 0.0701, "step": 167360 }, { "epoch": 4.949724965990418, "grad_norm": 0.9607040286064148, "learning_rate": 2.1634964147195922e-06, "loss": 0.0609, "step": 167370 }, { "epoch": 4.9500207014845925, "grad_norm": 1.1434444189071655, "learning_rate": 2.1633697248006526e-06, "loss": 0.0622, "step": 167380 }, { "epoch": 4.950316436978766, "grad_norm": 1.0227766036987305, "learning_rate": 2.1632430348817134e-06, "loss": 0.0728, "step": 167390 }, { "epoch": 4.95061217247294, "grad_norm": 0.9052267074584961, "learning_rate": 2.1631163449627737e-06, "loss": 0.0567, "step": 167400 }, { "epoch": 4.950907907967114, "grad_norm": 1.2160645723342896, "learning_rate": 2.162989655043835e-06, "loss": 0.0706, "step": 167410 }, { "epoch": 4.951203643461288, "grad_norm": 0.744830846786499, "learning_rate": 2.1628629651248953e-06, "loss": 0.0789, "step": 167420 }, { "epoch": 4.951499378955463, "grad_norm": 1.0725966691970825, "learning_rate": 2.162736275205956e-06, "loss": 0.0814, "step": 167430 }, { "epoch": 4.951795114449636, "grad_norm": 1.4831796884536743, "learning_rate": 2.1626095852870165e-06, "loss": 0.0745, "step": 167440 }, { "epoch": 4.9520908499438105, "grad_norm": 1.2995723485946655, "learning_rate": 2.1624828953680773e-06, "loss": 0.0713, "step": 167450 }, { "epoch": 4.952386585437984, "grad_norm": 0.710986316204071, "learning_rate": 2.1623562054491376e-06, "loss": 0.0526, "step": 167460 }, { "epoch": 4.952682320932158, "grad_norm": 1.366996169090271, "learning_rate": 2.1622295155301984e-06, "loss": 0.1008, "step": 167470 }, { "epoch": 4.952978056426332, "grad_norm": 2.277261257171631, "learning_rate": 2.1621028256112588e-06, "loss": 0.0811, "step": 167480 }, { "epoch": 4.953273791920506, "grad_norm": 0.6412124037742615, "learning_rate": 2.16197613569232e-06, "loss": 0.052, "step": 167490 }, { "epoch": 4.953569527414681, "grad_norm": 0.6098716259002686, "learning_rate": 2.1618494457733804e-06, "loss": 0.0626, "step": 167500 }, { "epoch": 4.953865262908854, "grad_norm": 1.0554256439208984, "learning_rate": 2.161722755854441e-06, "loss": 0.068, "step": 167510 }, { "epoch": 4.954160998403029, "grad_norm": 0.626133143901825, "learning_rate": 2.1615960659355015e-06, "loss": 0.0718, "step": 167520 }, { "epoch": 4.954456733897202, "grad_norm": 1.1322412490844727, "learning_rate": 2.1614693760165623e-06, "loss": 0.0747, "step": 167530 }, { "epoch": 4.9547524693913765, "grad_norm": 0.9611213803291321, "learning_rate": 2.1613426860976227e-06, "loss": 0.0687, "step": 167540 }, { "epoch": 4.95504820488555, "grad_norm": 0.6605638861656189, "learning_rate": 2.1612159961786835e-06, "loss": 0.071, "step": 167550 }, { "epoch": 4.955343940379724, "grad_norm": 0.7930222749710083, "learning_rate": 2.161089306259744e-06, "loss": 0.052, "step": 167560 }, { "epoch": 4.955639675873899, "grad_norm": 1.1582024097442627, "learning_rate": 2.160962616340805e-06, "loss": 0.065, "step": 167570 }, { "epoch": 4.955935411368072, "grad_norm": 2.3840138912200928, "learning_rate": 2.1608359264218654e-06, "loss": 0.0759, "step": 167580 }, { "epoch": 4.956231146862247, "grad_norm": 1.2083252668380737, "learning_rate": 2.160709236502926e-06, "loss": 0.0662, "step": 167590 }, { "epoch": 4.95652688235642, "grad_norm": 0.8813917636871338, "learning_rate": 2.1605825465839866e-06, "loss": 0.0814, "step": 167600 }, { "epoch": 4.9568226178505945, "grad_norm": 1.4020456075668335, "learning_rate": 2.1604558566650473e-06, "loss": 0.0643, "step": 167610 }, { "epoch": 4.957118353344768, "grad_norm": 0.9710187315940857, "learning_rate": 2.1603291667461077e-06, "loss": 0.0701, "step": 167620 }, { "epoch": 4.957414088838942, "grad_norm": 0.8368409276008606, "learning_rate": 2.1602024768271685e-06, "loss": 0.0669, "step": 167630 }, { "epoch": 4.957709824333117, "grad_norm": 0.8106716275215149, "learning_rate": 2.160075786908229e-06, "loss": 0.065, "step": 167640 }, { "epoch": 4.95800555982729, "grad_norm": 0.8969361782073975, "learning_rate": 2.15994909698929e-06, "loss": 0.066, "step": 167650 }, { "epoch": 4.958301295321465, "grad_norm": 0.9660314321517944, "learning_rate": 2.1598224070703505e-06, "loss": 0.0643, "step": 167660 }, { "epoch": 4.958597030815638, "grad_norm": 0.8186718225479126, "learning_rate": 2.1596957171514112e-06, "loss": 0.0812, "step": 167670 }, { "epoch": 4.958892766309813, "grad_norm": 2.107106924057007, "learning_rate": 2.1595690272324716e-06, "loss": 0.0839, "step": 167680 }, { "epoch": 4.959188501803986, "grad_norm": 0.8288646936416626, "learning_rate": 2.159442337313532e-06, "loss": 0.0818, "step": 167690 }, { "epoch": 4.9594842372981605, "grad_norm": 0.550622284412384, "learning_rate": 2.1593156473945928e-06, "loss": 0.0615, "step": 167700 }, { "epoch": 4.959779972792335, "grad_norm": 0.8665083050727844, "learning_rate": 2.159188957475653e-06, "loss": 0.0588, "step": 167710 }, { "epoch": 4.960075708286508, "grad_norm": 0.7291003465652466, "learning_rate": 2.159062267556714e-06, "loss": 0.0766, "step": 167720 }, { "epoch": 4.960371443780683, "grad_norm": 0.6216638088226318, "learning_rate": 2.1589355776377743e-06, "loss": 0.0856, "step": 167730 }, { "epoch": 4.960667179274856, "grad_norm": 0.5282840728759766, "learning_rate": 2.1588088877188355e-06, "loss": 0.0629, "step": 167740 }, { "epoch": 4.960962914769031, "grad_norm": 0.7746578454971313, "learning_rate": 2.158682197799896e-06, "loss": 0.072, "step": 167750 }, { "epoch": 4.961258650263204, "grad_norm": 1.0364474058151245, "learning_rate": 2.1585555078809567e-06, "loss": 0.0604, "step": 167760 }, { "epoch": 4.9615543857573785, "grad_norm": 0.6050469875335693, "learning_rate": 2.158428817962017e-06, "loss": 0.075, "step": 167770 }, { "epoch": 4.961850121251553, "grad_norm": 0.9240615963935852, "learning_rate": 2.158302128043078e-06, "loss": 0.0841, "step": 167780 }, { "epoch": 4.962145856745726, "grad_norm": 0.7083909511566162, "learning_rate": 2.158175438124138e-06, "loss": 0.0526, "step": 167790 }, { "epoch": 4.962441592239901, "grad_norm": 0.6182013750076294, "learning_rate": 2.158048748205199e-06, "loss": 0.0583, "step": 167800 }, { "epoch": 4.962737327734074, "grad_norm": 1.0507372617721558, "learning_rate": 2.1579220582862593e-06, "loss": 0.075, "step": 167810 }, { "epoch": 4.963033063228249, "grad_norm": 0.6120525598526001, "learning_rate": 2.1577953683673205e-06, "loss": 0.0769, "step": 167820 }, { "epoch": 4.963328798722423, "grad_norm": 1.7725563049316406, "learning_rate": 2.157668678448381e-06, "loss": 0.0769, "step": 167830 }, { "epoch": 4.9636245342165966, "grad_norm": 0.917849600315094, "learning_rate": 2.1575419885294417e-06, "loss": 0.0654, "step": 167840 }, { "epoch": 4.963920269710771, "grad_norm": 1.019912838935852, "learning_rate": 2.157415298610502e-06, "loss": 0.062, "step": 167850 }, { "epoch": 4.964216005204944, "grad_norm": 0.9151989221572876, "learning_rate": 2.157288608691563e-06, "loss": 0.0731, "step": 167860 }, { "epoch": 4.964511740699119, "grad_norm": 2.1462619304656982, "learning_rate": 2.1571619187726232e-06, "loss": 0.0701, "step": 167870 }, { "epoch": 4.964807476193293, "grad_norm": 1.867829442024231, "learning_rate": 2.157035228853684e-06, "loss": 0.0746, "step": 167880 }, { "epoch": 4.965103211687467, "grad_norm": 0.8133172392845154, "learning_rate": 2.1569085389347444e-06, "loss": 0.0716, "step": 167890 }, { "epoch": 4.965398947181641, "grad_norm": 1.049439787864685, "learning_rate": 2.1567818490158056e-06, "loss": 0.0608, "step": 167900 }, { "epoch": 4.965694682675815, "grad_norm": 1.0214983224868774, "learning_rate": 2.156655159096866e-06, "loss": 0.063, "step": 167910 }, { "epoch": 4.965990418169989, "grad_norm": 1.3772202730178833, "learning_rate": 2.1565284691779267e-06, "loss": 0.0849, "step": 167920 }, { "epoch": 4.9662861536641625, "grad_norm": 0.8855537176132202, "learning_rate": 2.156401779258987e-06, "loss": 0.0803, "step": 167930 }, { "epoch": 4.966581889158337, "grad_norm": 0.8754469752311707, "learning_rate": 2.156275089340048e-06, "loss": 0.0656, "step": 167940 }, { "epoch": 4.966877624652511, "grad_norm": 0.571493923664093, "learning_rate": 2.1561483994211083e-06, "loss": 0.055, "step": 167950 }, { "epoch": 4.967173360146685, "grad_norm": 0.8850283026695251, "learning_rate": 2.156021709502169e-06, "loss": 0.069, "step": 167960 }, { "epoch": 4.967469095640859, "grad_norm": 1.0537824630737305, "learning_rate": 2.1558950195832294e-06, "loss": 0.0833, "step": 167970 }, { "epoch": 4.967764831135033, "grad_norm": 1.1504489183425903, "learning_rate": 2.1557683296642906e-06, "loss": 0.0647, "step": 167980 }, { "epoch": 4.968060566629207, "grad_norm": 1.4680801630020142, "learning_rate": 2.155641639745351e-06, "loss": 0.0663, "step": 167990 }, { "epoch": 4.9683563021233805, "grad_norm": 0.987127959728241, "learning_rate": 2.1555149498264118e-06, "loss": 0.057, "step": 168000 }, { "epoch": 4.968652037617555, "grad_norm": 0.8375971913337708, "learning_rate": 2.155388259907472e-06, "loss": 0.0641, "step": 168010 }, { "epoch": 4.968947773111729, "grad_norm": 0.6046515107154846, "learning_rate": 2.155261569988533e-06, "loss": 0.0815, "step": 168020 }, { "epoch": 4.969243508605903, "grad_norm": 1.1629998683929443, "learning_rate": 2.1551348800695933e-06, "loss": 0.0782, "step": 168030 }, { "epoch": 4.969539244100077, "grad_norm": 0.9206407070159912, "learning_rate": 2.155008190150654e-06, "loss": 0.064, "step": 168040 }, { "epoch": 4.969834979594251, "grad_norm": 0.5703777074813843, "learning_rate": 2.1548815002317145e-06, "loss": 0.0603, "step": 168050 }, { "epoch": 4.970130715088425, "grad_norm": 1.1131880283355713, "learning_rate": 2.1547548103127757e-06, "loss": 0.0667, "step": 168060 }, { "epoch": 4.970426450582599, "grad_norm": 0.7848528623580933, "learning_rate": 2.154628120393836e-06, "loss": 0.073, "step": 168070 }, { "epoch": 4.970722186076773, "grad_norm": 0.8504973649978638, "learning_rate": 2.154501430474897e-06, "loss": 0.0712, "step": 168080 }, { "epoch": 4.971017921570947, "grad_norm": 0.8096691966056824, "learning_rate": 2.154374740555957e-06, "loss": 0.0669, "step": 168090 }, { "epoch": 4.971313657065121, "grad_norm": 0.8840470314025879, "learning_rate": 2.154248050637018e-06, "loss": 0.0657, "step": 168100 }, { "epoch": 4.971609392559295, "grad_norm": 0.831411600112915, "learning_rate": 2.1541213607180784e-06, "loss": 0.0423, "step": 168110 }, { "epoch": 4.971905128053469, "grad_norm": 1.0330721139907837, "learning_rate": 2.1539946707991387e-06, "loss": 0.0713, "step": 168120 }, { "epoch": 4.972200863547643, "grad_norm": 0.7788897156715393, "learning_rate": 2.1538679808801995e-06, "loss": 0.0638, "step": 168130 }, { "epoch": 4.972496599041817, "grad_norm": 0.8697062730789185, "learning_rate": 2.1537412909612603e-06, "loss": 0.0701, "step": 168140 }, { "epoch": 4.972792334535991, "grad_norm": 0.999447226524353, "learning_rate": 2.153614601042321e-06, "loss": 0.0606, "step": 168150 }, { "epoch": 4.973088070030165, "grad_norm": 0.7391831278800964, "learning_rate": 2.1534879111233815e-06, "loss": 0.0598, "step": 168160 }, { "epoch": 4.973383805524339, "grad_norm": 0.7772641181945801, "learning_rate": 2.1533612212044422e-06, "loss": 0.0701, "step": 168170 }, { "epoch": 4.973679541018513, "grad_norm": 0.8422960042953491, "learning_rate": 2.1532345312855026e-06, "loss": 0.0788, "step": 168180 }, { "epoch": 4.973975276512687, "grad_norm": 0.8653672337532043, "learning_rate": 2.1531078413665634e-06, "loss": 0.0724, "step": 168190 }, { "epoch": 4.974271012006861, "grad_norm": 0.43361568450927734, "learning_rate": 2.1529811514476238e-06, "loss": 0.0518, "step": 168200 }, { "epoch": 4.974566747501035, "grad_norm": 0.5306547284126282, "learning_rate": 2.1528544615286846e-06, "loss": 0.0636, "step": 168210 }, { "epoch": 4.974862482995209, "grad_norm": 0.6846599578857422, "learning_rate": 2.1527277716097453e-06, "loss": 0.0833, "step": 168220 }, { "epoch": 4.975158218489383, "grad_norm": 1.0393043756484985, "learning_rate": 2.152601081690806e-06, "loss": 0.0725, "step": 168230 }, { "epoch": 4.975453953983557, "grad_norm": 0.6797126531600952, "learning_rate": 2.1524743917718665e-06, "loss": 0.0724, "step": 168240 }, { "epoch": 4.975749689477731, "grad_norm": 1.5295624732971191, "learning_rate": 2.1523477018529273e-06, "loss": 0.061, "step": 168250 }, { "epoch": 4.976045424971905, "grad_norm": 0.8430253863334656, "learning_rate": 2.1522210119339877e-06, "loss": 0.0561, "step": 168260 }, { "epoch": 4.976341160466079, "grad_norm": 1.612251877784729, "learning_rate": 2.1520943220150484e-06, "loss": 0.0726, "step": 168270 }, { "epoch": 4.976636895960253, "grad_norm": 0.9794027209281921, "learning_rate": 2.151967632096109e-06, "loss": 0.0615, "step": 168280 }, { "epoch": 4.976932631454427, "grad_norm": 0.8894906640052795, "learning_rate": 2.1518409421771696e-06, "loss": 0.0781, "step": 168290 }, { "epoch": 4.9772283669486015, "grad_norm": 0.7758429050445557, "learning_rate": 2.1517142522582304e-06, "loss": 0.0753, "step": 168300 }, { "epoch": 4.977524102442775, "grad_norm": 0.6768582463264465, "learning_rate": 2.151587562339291e-06, "loss": 0.0569, "step": 168310 }, { "epoch": 4.977819837936949, "grad_norm": 0.6783525943756104, "learning_rate": 2.1514608724203515e-06, "loss": 0.0828, "step": 168320 }, { "epoch": 4.978115573431123, "grad_norm": 1.0474580526351929, "learning_rate": 2.1513341825014123e-06, "loss": 0.0708, "step": 168330 }, { "epoch": 4.978411308925297, "grad_norm": 1.3498646020889282, "learning_rate": 2.1512074925824727e-06, "loss": 0.087, "step": 168340 }, { "epoch": 4.978707044419471, "grad_norm": 0.49941524863243103, "learning_rate": 2.1510808026635335e-06, "loss": 0.0572, "step": 168350 }, { "epoch": 4.979002779913645, "grad_norm": 0.8760906457901001, "learning_rate": 2.150954112744594e-06, "loss": 0.0526, "step": 168360 }, { "epoch": 4.9792985154078195, "grad_norm": 0.5194734930992126, "learning_rate": 2.1508274228256546e-06, "loss": 0.0552, "step": 168370 }, { "epoch": 4.979594250901993, "grad_norm": 0.7811402082443237, "learning_rate": 2.1507007329067154e-06, "loss": 0.0729, "step": 168380 }, { "epoch": 4.979889986396167, "grad_norm": 1.204601764678955, "learning_rate": 2.1505740429877762e-06, "loss": 0.069, "step": 168390 }, { "epoch": 4.980185721890341, "grad_norm": 1.007621169090271, "learning_rate": 2.1504473530688366e-06, "loss": 0.0596, "step": 168400 }, { "epoch": 4.980481457384515, "grad_norm": 0.4861728549003601, "learning_rate": 2.1503206631498974e-06, "loss": 0.0467, "step": 168410 }, { "epoch": 4.980777192878689, "grad_norm": 1.2468210458755493, "learning_rate": 2.1501939732309577e-06, "loss": 0.0678, "step": 168420 }, { "epoch": 4.981072928372863, "grad_norm": 0.6305192708969116, "learning_rate": 2.1500672833120185e-06, "loss": 0.0689, "step": 168430 }, { "epoch": 4.981368663867038, "grad_norm": 0.6606158018112183, "learning_rate": 2.149940593393079e-06, "loss": 0.0787, "step": 168440 }, { "epoch": 4.981664399361211, "grad_norm": 0.27835673093795776, "learning_rate": 2.1498139034741397e-06, "loss": 0.05, "step": 168450 }, { "epoch": 4.9819601348553855, "grad_norm": 0.9550458788871765, "learning_rate": 2.1496872135552005e-06, "loss": 0.0639, "step": 168460 }, { "epoch": 4.982255870349559, "grad_norm": 1.068316102027893, "learning_rate": 2.1495605236362613e-06, "loss": 0.0782, "step": 168470 }, { "epoch": 4.982551605843733, "grad_norm": 1.2131983041763306, "learning_rate": 2.1494338337173216e-06, "loss": 0.0784, "step": 168480 }, { "epoch": 4.982847341337908, "grad_norm": 0.8465083837509155, "learning_rate": 2.1493071437983824e-06, "loss": 0.057, "step": 168490 }, { "epoch": 4.983143076832081, "grad_norm": 0.6127386689186096, "learning_rate": 2.149180453879443e-06, "loss": 0.0667, "step": 168500 }, { "epoch": 4.983438812326256, "grad_norm": 1.2938939332962036, "learning_rate": 2.1490537639605036e-06, "loss": 0.0584, "step": 168510 }, { "epoch": 4.983734547820429, "grad_norm": 1.670198678970337, "learning_rate": 2.148927074041564e-06, "loss": 0.0746, "step": 168520 }, { "epoch": 4.9840302833146035, "grad_norm": 1.0376131534576416, "learning_rate": 2.1488003841226243e-06, "loss": 0.0719, "step": 168530 }, { "epoch": 4.984326018808778, "grad_norm": 1.0504504442214966, "learning_rate": 2.1486736942036855e-06, "loss": 0.0787, "step": 168540 }, { "epoch": 4.984621754302951, "grad_norm": 0.653228223323822, "learning_rate": 2.148547004284746e-06, "loss": 0.0646, "step": 168550 }, { "epoch": 4.984917489797126, "grad_norm": 0.667202889919281, "learning_rate": 2.1484203143658067e-06, "loss": 0.0702, "step": 168560 }, { "epoch": 4.985213225291299, "grad_norm": 1.33893620967865, "learning_rate": 2.148293624446867e-06, "loss": 0.0803, "step": 168570 }, { "epoch": 4.985508960785474, "grad_norm": 1.2804527282714844, "learning_rate": 2.148166934527928e-06, "loss": 0.0674, "step": 168580 }, { "epoch": 4.985804696279647, "grad_norm": 0.9759496450424194, "learning_rate": 2.148040244608988e-06, "loss": 0.0694, "step": 168590 }, { "epoch": 4.986100431773822, "grad_norm": 0.6853177547454834, "learning_rate": 2.147913554690049e-06, "loss": 0.0637, "step": 168600 }, { "epoch": 4.986396167267996, "grad_norm": 0.978273868560791, "learning_rate": 2.1477868647711094e-06, "loss": 0.0706, "step": 168610 }, { "epoch": 4.9866919027621694, "grad_norm": 0.8197841644287109, "learning_rate": 2.1476601748521706e-06, "loss": 0.0671, "step": 168620 }, { "epoch": 4.986987638256344, "grad_norm": 0.724186897277832, "learning_rate": 2.147533484933231e-06, "loss": 0.0847, "step": 168630 }, { "epoch": 4.987283373750517, "grad_norm": 0.9061822295188904, "learning_rate": 2.1474067950142917e-06, "loss": 0.0679, "step": 168640 }, { "epoch": 4.987579109244692, "grad_norm": 1.265978217124939, "learning_rate": 2.147280105095352e-06, "loss": 0.0617, "step": 168650 }, { "epoch": 4.987874844738865, "grad_norm": 1.0166295766830444, "learning_rate": 2.147153415176413e-06, "loss": 0.0756, "step": 168660 }, { "epoch": 4.98817058023304, "grad_norm": 1.5010638236999512, "learning_rate": 2.1470267252574732e-06, "loss": 0.0804, "step": 168670 }, { "epoch": 4.988466315727214, "grad_norm": 1.0265109539031982, "learning_rate": 2.146900035338534e-06, "loss": 0.0859, "step": 168680 }, { "epoch": 4.9887620512213875, "grad_norm": 0.855082094669342, "learning_rate": 2.1467733454195944e-06, "loss": 0.0716, "step": 168690 }, { "epoch": 4.989057786715562, "grad_norm": 0.7971784472465515, "learning_rate": 2.1466466555006556e-06, "loss": 0.0646, "step": 168700 }, { "epoch": 4.989353522209735, "grad_norm": 1.3401042222976685, "learning_rate": 2.146519965581716e-06, "loss": 0.0756, "step": 168710 }, { "epoch": 4.98964925770391, "grad_norm": 0.7077468037605286, "learning_rate": 2.1463932756627768e-06, "loss": 0.0631, "step": 168720 }, { "epoch": 4.989944993198083, "grad_norm": 0.978976845741272, "learning_rate": 2.146266585743837e-06, "loss": 0.0832, "step": 168730 }, { "epoch": 4.990240728692258, "grad_norm": 0.7855513095855713, "learning_rate": 2.146139895824898e-06, "loss": 0.0776, "step": 168740 }, { "epoch": 4.990536464186432, "grad_norm": 0.8092512488365173, "learning_rate": 2.1460132059059583e-06, "loss": 0.0608, "step": 168750 }, { "epoch": 4.9908321996806055, "grad_norm": 1.191733479499817, "learning_rate": 2.145886515987019e-06, "loss": 0.0727, "step": 168760 }, { "epoch": 4.99112793517478, "grad_norm": 0.8252249956130981, "learning_rate": 2.1457598260680794e-06, "loss": 0.0688, "step": 168770 }, { "epoch": 4.991423670668953, "grad_norm": 0.7776004076004028, "learning_rate": 2.1456331361491407e-06, "loss": 0.0751, "step": 168780 }, { "epoch": 4.991719406163128, "grad_norm": 0.8630780577659607, "learning_rate": 2.145506446230201e-06, "loss": 0.0685, "step": 168790 }, { "epoch": 4.992015141657301, "grad_norm": 1.026084065437317, "learning_rate": 2.145379756311262e-06, "loss": 0.0681, "step": 168800 }, { "epoch": 4.992310877151476, "grad_norm": 1.1753816604614258, "learning_rate": 2.145253066392322e-06, "loss": 0.0559, "step": 168810 }, { "epoch": 4.99260661264565, "grad_norm": 0.47593221068382263, "learning_rate": 2.145126376473383e-06, "loss": 0.0691, "step": 168820 }, { "epoch": 4.992902348139824, "grad_norm": 1.149303674697876, "learning_rate": 2.1449996865544433e-06, "loss": 0.0799, "step": 168830 }, { "epoch": 4.993198083633998, "grad_norm": 1.4727940559387207, "learning_rate": 2.144872996635504e-06, "loss": 0.0706, "step": 168840 }, { "epoch": 4.9934938191281715, "grad_norm": 0.6791296005249023, "learning_rate": 2.1447463067165645e-06, "loss": 0.0601, "step": 168850 }, { "epoch": 4.993789554622346, "grad_norm": 0.6382099986076355, "learning_rate": 2.1446196167976257e-06, "loss": 0.0732, "step": 168860 }, { "epoch": 4.994085290116519, "grad_norm": 1.0702415704727173, "learning_rate": 2.144492926878686e-06, "loss": 0.0888, "step": 168870 }, { "epoch": 4.994381025610694, "grad_norm": 1.3138327598571777, "learning_rate": 2.144366236959747e-06, "loss": 0.0695, "step": 168880 }, { "epoch": 4.994676761104868, "grad_norm": 0.5154114365577698, "learning_rate": 2.1442395470408072e-06, "loss": 0.0817, "step": 168890 }, { "epoch": 4.994972496599042, "grad_norm": 0.8978919386863708, "learning_rate": 2.144112857121868e-06, "loss": 0.0698, "step": 168900 }, { "epoch": 4.995268232093216, "grad_norm": 0.7263864278793335, "learning_rate": 2.1439861672029284e-06, "loss": 0.0719, "step": 168910 }, { "epoch": 4.9955639675873895, "grad_norm": 1.309022068977356, "learning_rate": 2.143859477283989e-06, "loss": 0.0775, "step": 168920 }, { "epoch": 4.995859703081564, "grad_norm": 0.6171278357505798, "learning_rate": 2.1437327873650495e-06, "loss": 0.0801, "step": 168930 }, { "epoch": 4.996155438575737, "grad_norm": 0.726568877696991, "learning_rate": 2.1436060974461103e-06, "loss": 0.0707, "step": 168940 }, { "epoch": 4.996451174069912, "grad_norm": 1.1940747499465942, "learning_rate": 2.143479407527171e-06, "loss": 0.0557, "step": 168950 }, { "epoch": 4.996746909564086, "grad_norm": 1.0992772579193115, "learning_rate": 2.1433527176082315e-06, "loss": 0.0772, "step": 168960 }, { "epoch": 4.99704264505826, "grad_norm": 0.9782012104988098, "learning_rate": 2.1432260276892923e-06, "loss": 0.0819, "step": 168970 }, { "epoch": 4.997338380552434, "grad_norm": 1.023450255393982, "learning_rate": 2.1430993377703526e-06, "loss": 0.0721, "step": 168980 }, { "epoch": 4.997634116046608, "grad_norm": 0.9986972808837891, "learning_rate": 2.1429726478514134e-06, "loss": 0.0643, "step": 168990 }, { "epoch": 4.997929851540782, "grad_norm": 0.7081349492073059, "learning_rate": 2.142845957932474e-06, "loss": 0.0582, "step": 169000 }, { "epoch": 4.9982255870349555, "grad_norm": 0.7128975987434387, "learning_rate": 2.1427192680135346e-06, "loss": 0.0633, "step": 169010 }, { "epoch": 4.99852132252913, "grad_norm": 1.0882054567337036, "learning_rate": 2.1425925780945954e-06, "loss": 0.082, "step": 169020 }, { "epoch": 4.998817058023304, "grad_norm": 0.8057454228401184, "learning_rate": 2.142465888175656e-06, "loss": 0.0641, "step": 169030 }, { "epoch": 4.999112793517478, "grad_norm": 1.0825973749160767, "learning_rate": 2.1423391982567165e-06, "loss": 0.0835, "step": 169040 }, { "epoch": 4.999408529011652, "grad_norm": 0.7828987240791321, "learning_rate": 2.1422125083377773e-06, "loss": 0.0693, "step": 169050 }, { "epoch": 4.999704264505826, "grad_norm": 2.168497323989868, "learning_rate": 2.1420858184188377e-06, "loss": 0.0677, "step": 169060 }, { "epoch": 5.0, "grad_norm": 0.7790918350219727, "learning_rate": 2.1419591284998985e-06, "loss": 0.072, "step": 169070 }, { "epoch": 5.0, "eval_accuracy": 0.6726552882855907, "eval_animal_abuse/accuracy": 0.9949762118641249, "eval_animal_abuse/f1": 0.7732732732732732, "eval_animal_abuse/fpr": 0.0021707670043415303, "eval_animal_abuse/precision": 0.7996894409937888, "eval_animal_abuse/recall": 0.748546511627907, "eval_animal_abuse/threshold": 0.5455278754234314, "eval_child_abuse/accuracy": 0.9967062581095918, "eval_child_abuse/f1": 0.6934984520123839, "eval_child_abuse/fpr": 0.001488767334102806, "eval_child_abuse/precision": 0.7156549520766773, "eval_child_abuse/recall": 0.6726726726726727, "eval_child_abuse/threshold": 0.4359014928340912, "eval_controversial_topics,politics/accuracy": 0.968127224939282, "eval_controversial_topics,politics/f1": 0.5193176116407426, "eval_controversial_topics,politics/fpr": 0.019031438769906612, "eval_controversial_topics,politics/precision": 0.48274253731343286, "eval_controversial_topics,politics/recall": 0.5618892508143323, "eval_controversial_topics,politics/threshold": 0.3015676736831665, "eval_discrimination,stereotype,injustice/accuracy": 0.954918987257544, "eval_discrimination,stereotype,injustice/f1": 0.7136517328825022, "eval_discrimination,stereotype,injustice/fpr": 0.023620197347019874, "eval_discrimination,stereotype,injustice/precision": 0.7209649871904356, "eval_discrimination,stereotype,injustice/recall": 0.7064853556485355, "eval_discrimination,stereotype,injustice/threshold": 0.39793843030929565, "eval_drug_abuse,weapons,banned_substance/accuracy": 0.9720198289915827, "eval_drug_abuse,weapons,banned_substance/f1": 0.771839392295171, "eval_drug_abuse,weapons,banned_substance/fpr": 0.020113524185587327, "eval_drug_abuse,weapons,banned_substance/precision": 0.7137481184144506, "eval_drug_abuse,weapons,banned_substance/recall": 0.8402244536326049, "eval_drug_abuse,weapons,banned_substance/threshold": 0.35309353470802307, "eval_financial_crime,property_crime,theft/accuracy": 0.9599594104534718, "eval_financial_crime,property_crime,theft/f1": 0.803365738093293, "eval_financial_crime,property_crime,theft/fpr": 0.027145568803788902, "eval_financial_crime,property_crime,theft/precision": 0.7694835680751174, "eval_financial_crime,property_crime,theft/recall": 0.8403691676636472, "eval_financial_crime,property_crime,theft/threshold": 0.3881540596485138, "eval_flagged/accuracy": 0.8536780117776225, "eval_flagged/aucpr": 0.9028255992759388, "eval_flagged/f1": 0.8707117029720434, "eval_flagged/fpr": 0.18617559914488174, "eval_flagged/precision": 0.8564612670965503, "eval_flagged/recall": 0.8854443813338914, "eval_hate_speech,offensive_language/accuracy": 0.9488970955185149, "eval_hate_speech,offensive_language/f1": 0.7020946470131885, "eval_hate_speech,offensive_language/fpr": 0.023899141238808652, "eval_hate_speech,offensive_language/precision": 0.734577922077922, "eval_hate_speech,offensive_language/recall": 0.6723625557206538, "eval_hate_speech,offensive_language/threshold": 0.4248891770839691, "eval_loss": 0.08267437666654587, "eval_macro_f1": 0.6749479635403305, "eval_macro_precision": 0.6622176173074639, "eval_macro_recall": 0.6944232078069069, "eval_micro_f1": 0.7505847442834701, "eval_micro_precision": 0.7370378622971051, "eval_micro_recall": 0.7646389387769618, "eval_misinformation_regarding_ethics,laws_and_safety/accuracy": 0.9781415310909273, "eval_misinformation_regarding_ethics,laws_and_safety/f1": 0.23958333333333334, "eval_misinformation_regarding_ethics,laws_and_safety/fpr": 0.013303470690264867, "eval_misinformation_regarding_ethics,laws_and_safety/precision": 0.20762286860581744, "eval_misinformation_regarding_ethics,laws_and_safety/recall": 0.28317373461012313, "eval_misinformation_regarding_ethics,laws_and_safety/threshold": 0.1276526302099228, "eval_non_violent_unethical_behavior/accuracy": 0.8812755764048308, "eval_non_violent_unethical_behavior/f1": 0.695559442050932, "eval_non_violent_unethical_behavior/fpr": 0.06942368377345941, "eval_non_violent_unethical_behavior/precision": 0.7091415151778725, "eval_non_violent_unethical_behavior/recall": 0.6824878620458731, "eval_non_violent_unethical_behavior/threshold": 0.42536652088165283, "eval_privacy_violation/accuracy": 0.9810027614199687, "eval_privacy_violation/f1": 0.8094125500667557, "eval_privacy_violation/fpr": 0.010516553510184065, "eval_privacy_violation/precision": 0.8013879709187045, "eval_privacy_violation/recall": 0.8175994605529332, "eval_privacy_violation/threshold": 0.5139734148979187, "eval_runtime": 49.4988, "eval_samples_per_second": 1214.453, "eval_self_harm/accuracy": 0.9968060684699072, "eval_self_harm/f1": 0.7506493506493507, "eval_self_harm/fpr": 0.001189200053597747, "eval_self_harm/precision": 0.8027777777777778, "eval_self_harm/recall": 0.7048780487804878, "eval_self_harm/threshold": 0.5096423625946045, "eval_sexually_explicit,adult_content/accuracy": 0.9817014339421766, "eval_sexually_explicit,adult_content/f1": 0.6720333929636255, "eval_sexually_explicit,adult_content/fpr": 0.013295379003528366, "eval_sexually_explicit,adult_content/precision": 0.5909805977975878, "eval_sexually_explicit,adult_content/recall": 0.778852798894264, "eval_sexually_explicit,adult_content/threshold": 0.24364246428012848, "eval_steps_per_second": 18.99, "eval_terrorism,organized_crime/accuracy": 0.9895365472269355, "eval_terrorism,organized_crime/f1": 0.45065502183406114, "eval_terrorism,organized_crime/fpr": 0.006808310834604989, "eval_terrorism,organized_crime/precision": 0.3885542168674699, "eval_terrorism,organized_crime/recall": 0.5363825363825364, "eval_terrorism,organized_crime/threshold": 0.167763352394104, "eval_violence,aiding_and_abetting,incitement/accuracy": 0.9205343181288884, "eval_violence,aiding_and_abetting,incitement/f1": 0.8543375514560146, "eval_violence,aiding_and_abetting,incitement/fpr": 0.063324418657359, "eval_violence,aiding_and_abetting,incitement/precision": 0.8337201690174374, "eval_violence,aiding_and_abetting,incitement/recall": 0.8760005002501251, "eval_violence,aiding_and_abetting,incitement/threshold": 0.44577082991600037, "step": 169070 }, { "epoch": 5.0, "step": 169070, "total_flos": 5.970539301846385e+17, "train_loss": 0.08776032416230509, "train_runtime": 27301.134, "train_samples_per_second": 198.168, "train_steps_per_second": 12.386 } ], "logging_steps": 10, "max_steps": 338140, "num_input_tokens_seen": 0, "num_train_epochs": 10, "save_steps": 500, "stateful_callbacks": { "EarlyStoppingCallback": { "args": { "early_stopping_patience": 2, "early_stopping_threshold": 0.0 }, "attributes": { "early_stopping_patience_counter": 2 } }, "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 5.970539301846385e+17, "train_batch_size": 16, "trial_name": null, "trial_params": null }