{ "best_global_step": null, "best_metric": null, "best_model_checkpoint": null, "epoch": 3.0, "eval_steps": 500, "global_step": 18123, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0016553550736633008, "grad_norm": 7.984700679779053, "learning_rate": 4.964147821290679e-08, "loss": 1.0634, "step": 10 }, { "epoch": 0.0033107101473266016, "grad_norm": 6.1845703125, "learning_rate": 1.0479867622724768e-07, "loss": 1.1529, "step": 20 }, { "epoch": 0.004966065220989902, "grad_norm": 7.543320655822754, "learning_rate": 1.5995587424158854e-07, "loss": 1.2636, "step": 30 }, { "epoch": 0.006621420294653203, "grad_norm": 6.893922328948975, "learning_rate": 2.1511307225592942e-07, "loss": 1.2297, "step": 40 }, { "epoch": 0.008276775368316504, "grad_norm": 7.185118675231934, "learning_rate": 2.702702702702703e-07, "loss": 1.1841, "step": 50 }, { "epoch": 0.009932130441979804, "grad_norm": 8.388279914855957, "learning_rate": 3.2542746828461117e-07, "loss": 1.1145, "step": 60 }, { "epoch": 0.011587485515643106, "grad_norm": 6.8379292488098145, "learning_rate": 3.805846662989521e-07, "loss": 1.1385, "step": 70 }, { "epoch": 0.013242840589306406, "grad_norm": 8.465346336364746, "learning_rate": 4.357418643132929e-07, "loss": 1.1756, "step": 80 }, { "epoch": 0.014898195662969706, "grad_norm": 6.7175493240356445, "learning_rate": 4.908990623276337e-07, "loss": 1.029, "step": 90 }, { "epoch": 0.01655355073663301, "grad_norm": 4.6793212890625, "learning_rate": 5.460562603419746e-07, "loss": 1.1213, "step": 100 }, { "epoch": 0.01820890581029631, "grad_norm": 4.11850643157959, "learning_rate": 6.012134583563156e-07, "loss": 1.0167, "step": 110 }, { "epoch": 0.01986426088395961, "grad_norm": 6.64889669418335, "learning_rate": 6.563706563706564e-07, "loss": 1.0059, "step": 120 }, { "epoch": 0.02151961595762291, "grad_norm": 3.417647123336792, "learning_rate": 7.115278543849972e-07, "loss": 1.0292, "step": 130 }, { "epoch": 0.023174971031286212, "grad_norm": 3.204617738723755, "learning_rate": 7.666850523993381e-07, "loss": 0.9241, "step": 140 }, { "epoch": 0.02483032610494951, "grad_norm": 5.174026966094971, "learning_rate": 8.21842250413679e-07, "loss": 0.9826, "step": 150 }, { "epoch": 0.026485681178612813, "grad_norm": 5.526364803314209, "learning_rate": 8.769994484280199e-07, "loss": 0.9245, "step": 160 }, { "epoch": 0.028141036252276114, "grad_norm": 5.8991570472717285, "learning_rate": 9.321566464423607e-07, "loss": 0.9536, "step": 170 }, { "epoch": 0.029796391325939413, "grad_norm": 4.1310248374938965, "learning_rate": 9.873138444567016e-07, "loss": 0.9059, "step": 180 }, { "epoch": 0.031451746399602715, "grad_norm": 4.935130596160889, "learning_rate": 1.0424710424710426e-06, "loss": 0.9174, "step": 190 }, { "epoch": 0.03310710147326602, "grad_norm": 3.474487066268921, "learning_rate": 1.0976282404853834e-06, "loss": 0.9133, "step": 200 }, { "epoch": 0.03476245654692932, "grad_norm": 10.657366752624512, "learning_rate": 1.1527854384997244e-06, "loss": 0.7568, "step": 210 }, { "epoch": 0.03641781162059262, "grad_norm": 3.7891452312469482, "learning_rate": 1.2079426365140651e-06, "loss": 0.8258, "step": 220 }, { "epoch": 0.038073166694255915, "grad_norm": 5.292945861816406, "learning_rate": 1.263099834528406e-06, "loss": 0.784, "step": 230 }, { "epoch": 0.03972852176791922, "grad_norm": 4.9213690757751465, "learning_rate": 1.3182570325427469e-06, "loss": 0.7955, "step": 240 }, { "epoch": 0.04138387684158252, "grad_norm": 4.905694484710693, "learning_rate": 1.3734142305570876e-06, "loss": 0.877, "step": 250 }, { "epoch": 0.04303923191524582, "grad_norm": 3.1677496433258057, "learning_rate": 1.4285714285714286e-06, "loss": 0.8612, "step": 260 }, { "epoch": 0.04469458698890912, "grad_norm": 3.0269603729248047, "learning_rate": 1.4837286265857694e-06, "loss": 0.821, "step": 270 }, { "epoch": 0.046349942062572425, "grad_norm": 3.246804714202881, "learning_rate": 1.5388858246001104e-06, "loss": 0.8345, "step": 280 }, { "epoch": 0.04800529713623572, "grad_norm": 3.3569605350494385, "learning_rate": 1.5940430226144514e-06, "loss": 0.7517, "step": 290 }, { "epoch": 0.04966065220989902, "grad_norm": 3.597111701965332, "learning_rate": 1.6492002206287921e-06, "loss": 0.8384, "step": 300 }, { "epoch": 0.05131600728356232, "grad_norm": 3.895134925842285, "learning_rate": 1.7043574186431331e-06, "loss": 0.8339, "step": 310 }, { "epoch": 0.052971362357225625, "grad_norm": 4.427375316619873, "learning_rate": 1.7595146166574739e-06, "loss": 0.784, "step": 320 }, { "epoch": 0.05462671743088893, "grad_norm": 3.222888708114624, "learning_rate": 1.8146718146718149e-06, "loss": 0.7882, "step": 330 }, { "epoch": 0.05628207250455223, "grad_norm": 3.1282763481140137, "learning_rate": 1.8698290126861556e-06, "loss": 0.7937, "step": 340 }, { "epoch": 0.05793742757821553, "grad_norm": 4.77147102355957, "learning_rate": 1.9249862107004966e-06, "loss": 0.7097, "step": 350 }, { "epoch": 0.059592782651878826, "grad_norm": 4.3493571281433105, "learning_rate": 1.9801434087148376e-06, "loss": 0.6705, "step": 360 }, { "epoch": 0.06124813772554213, "grad_norm": 3.3635661602020264, "learning_rate": 2.035300606729178e-06, "loss": 0.8396, "step": 370 }, { "epoch": 0.06290349279920543, "grad_norm": 4.846203804016113, "learning_rate": 2.090457804743519e-06, "loss": 0.7266, "step": 380 }, { "epoch": 0.06455884787286872, "grad_norm": 4.919671058654785, "learning_rate": 2.14561500275786e-06, "loss": 0.6616, "step": 390 }, { "epoch": 0.06621420294653203, "grad_norm": 3.959589958190918, "learning_rate": 2.200772200772201e-06, "loss": 0.7379, "step": 400 }, { "epoch": 0.06786955802019533, "grad_norm": 4.111237049102783, "learning_rate": 2.255929398786542e-06, "loss": 0.7201, "step": 410 }, { "epoch": 0.06952491309385864, "grad_norm": 4.0857343673706055, "learning_rate": 2.3110865968008826e-06, "loss": 0.7806, "step": 420 }, { "epoch": 0.07118026816752193, "grad_norm": 2.696610689163208, "learning_rate": 2.3662437948152236e-06, "loss": 0.7176, "step": 430 }, { "epoch": 0.07283562324118524, "grad_norm": 3.1696786880493164, "learning_rate": 2.4214009928295646e-06, "loss": 0.7062, "step": 440 }, { "epoch": 0.07449097831484854, "grad_norm": 4.226884365081787, "learning_rate": 2.4765581908439056e-06, "loss": 0.7227, "step": 450 }, { "epoch": 0.07614633338851183, "grad_norm": 2.852306842803955, "learning_rate": 2.531715388858246e-06, "loss": 0.7354, "step": 460 }, { "epoch": 0.07780168846217514, "grad_norm": 3.600233316421509, "learning_rate": 2.5868725868725867e-06, "loss": 0.7057, "step": 470 }, { "epoch": 0.07945704353583843, "grad_norm": 6.20136833190918, "learning_rate": 2.642029784886928e-06, "loss": 0.713, "step": 480 }, { "epoch": 0.08111239860950174, "grad_norm": 5.960463047027588, "learning_rate": 2.6971869829012687e-06, "loss": 0.7262, "step": 490 }, { "epoch": 0.08276775368316504, "grad_norm": 2.7353596687316895, "learning_rate": 2.7523441809156096e-06, "loss": 0.7249, "step": 500 }, { "epoch": 0.08442310875682833, "grad_norm": 2.9528822898864746, "learning_rate": 2.8075013789299506e-06, "loss": 0.7158, "step": 510 }, { "epoch": 0.08607846383049164, "grad_norm": 3.323730230331421, "learning_rate": 2.8626585769442916e-06, "loss": 0.704, "step": 520 }, { "epoch": 0.08773381890415494, "grad_norm": 3.2658655643463135, "learning_rate": 2.917815774958632e-06, "loss": 0.7194, "step": 530 }, { "epoch": 0.08938917397781825, "grad_norm": 3.035658121109009, "learning_rate": 2.9729729729729736e-06, "loss": 0.6506, "step": 540 }, { "epoch": 0.09104452905148154, "grad_norm": 3.595717668533325, "learning_rate": 3.028130170987314e-06, "loss": 0.7421, "step": 550 }, { "epoch": 0.09269988412514485, "grad_norm": 4.5476202964782715, "learning_rate": 3.083287369001655e-06, "loss": 0.704, "step": 560 }, { "epoch": 0.09435523919880814, "grad_norm": 3.4544177055358887, "learning_rate": 3.1384445670159957e-06, "loss": 0.6521, "step": 570 }, { "epoch": 0.09601059427247144, "grad_norm": 3.8915786743164062, "learning_rate": 3.193601765030337e-06, "loss": 0.6756, "step": 580 }, { "epoch": 0.09766594934613475, "grad_norm": 4.146472930908203, "learning_rate": 3.2487589630446776e-06, "loss": 0.7626, "step": 590 }, { "epoch": 0.09932130441979804, "grad_norm": 3.272199869155884, "learning_rate": 3.3039161610590186e-06, "loss": 0.7363, "step": 600 }, { "epoch": 0.10097665949346135, "grad_norm": 2.5908915996551514, "learning_rate": 3.359073359073359e-06, "loss": 0.649, "step": 610 }, { "epoch": 0.10263201456712465, "grad_norm": 3.507157325744629, "learning_rate": 3.4142305570877006e-06, "loss": 0.7292, "step": 620 }, { "epoch": 0.10428736964078796, "grad_norm": 3.2821595668792725, "learning_rate": 3.469387755102041e-06, "loss": 0.7024, "step": 630 }, { "epoch": 0.10594272471445125, "grad_norm": 3.930805206298828, "learning_rate": 3.524544953116382e-06, "loss": 0.7038, "step": 640 }, { "epoch": 0.10759807978811455, "grad_norm": 2.831280469894409, "learning_rate": 3.5797021511307227e-06, "loss": 0.6795, "step": 650 }, { "epoch": 0.10925343486177785, "grad_norm": 2.831242799758911, "learning_rate": 3.634859349145064e-06, "loss": 0.6852, "step": 660 }, { "epoch": 0.11090878993544115, "grad_norm": 2.9769375324249268, "learning_rate": 3.6900165471594046e-06, "loss": 0.6547, "step": 670 }, { "epoch": 0.11256414500910446, "grad_norm": 2.967987060546875, "learning_rate": 3.745173745173745e-06, "loss": 0.7238, "step": 680 }, { "epoch": 0.11421950008276775, "grad_norm": 3.29638671875, "learning_rate": 3.800330943188086e-06, "loss": 0.6848, "step": 690 }, { "epoch": 0.11587485515643106, "grad_norm": 4.40482759475708, "learning_rate": 3.855488141202427e-06, "loss": 0.673, "step": 700 }, { "epoch": 0.11753021023009436, "grad_norm": 3.206632375717163, "learning_rate": 3.910645339216768e-06, "loss": 0.6148, "step": 710 }, { "epoch": 0.11918556530375765, "grad_norm": 3.0711138248443604, "learning_rate": 3.965802537231109e-06, "loss": 0.6717, "step": 720 }, { "epoch": 0.12084092037742096, "grad_norm": 3.105243444442749, "learning_rate": 4.02095973524545e-06, "loss": 0.5857, "step": 730 }, { "epoch": 0.12249627545108425, "grad_norm": 4.079644680023193, "learning_rate": 4.07611693325979e-06, "loss": 0.6482, "step": 740 }, { "epoch": 0.12415163052474756, "grad_norm": 3.890226364135742, "learning_rate": 4.131274131274132e-06, "loss": 0.5871, "step": 750 }, { "epoch": 0.12580698559841086, "grad_norm": 2.5968997478485107, "learning_rate": 4.186431329288472e-06, "loss": 0.6882, "step": 760 }, { "epoch": 0.12746234067207415, "grad_norm": 2.7869668006896973, "learning_rate": 4.241588527302813e-06, "loss": 0.6667, "step": 770 }, { "epoch": 0.12911769574573745, "grad_norm": 3.2500760555267334, "learning_rate": 4.296745725317154e-06, "loss": 0.6381, "step": 780 }, { "epoch": 0.13077305081940077, "grad_norm": 3.1198246479034424, "learning_rate": 4.351902923331495e-06, "loss": 0.6226, "step": 790 }, { "epoch": 0.13242840589306407, "grad_norm": 3.8074584007263184, "learning_rate": 4.407060121345836e-06, "loss": 0.6221, "step": 800 }, { "epoch": 0.13408376096672736, "grad_norm": 2.311058521270752, "learning_rate": 4.462217319360177e-06, "loss": 0.6789, "step": 810 }, { "epoch": 0.13573911604039066, "grad_norm": 2.830004930496216, "learning_rate": 4.517374517374517e-06, "loss": 0.6512, "step": 820 }, { "epoch": 0.13739447111405395, "grad_norm": 3.7663865089416504, "learning_rate": 4.572531715388859e-06, "loss": 0.7057, "step": 830 }, { "epoch": 0.13904982618771727, "grad_norm": 2.8656136989593506, "learning_rate": 4.627688913403199e-06, "loss": 0.6411, "step": 840 }, { "epoch": 0.14070518126138057, "grad_norm": 3.1035988330841064, "learning_rate": 4.68284611141754e-06, "loss": 0.6221, "step": 850 }, { "epoch": 0.14236053633504386, "grad_norm": 3.384016990661621, "learning_rate": 4.738003309431881e-06, "loss": 0.6321, "step": 860 }, { "epoch": 0.14401589140870716, "grad_norm": 3.3458902835845947, "learning_rate": 4.793160507446222e-06, "loss": 0.6163, "step": 870 }, { "epoch": 0.14567124648237048, "grad_norm": 3.6326417922973633, "learning_rate": 4.848317705460563e-06, "loss": 0.6228, "step": 880 }, { "epoch": 0.14732660155603378, "grad_norm": 3.407176971435547, "learning_rate": 4.903474903474904e-06, "loss": 0.675, "step": 890 }, { "epoch": 0.14898195662969707, "grad_norm": 2.779155731201172, "learning_rate": 4.958632101489245e-06, "loss": 0.6416, "step": 900 }, { "epoch": 0.15063731170336037, "grad_norm": 3.1110782623291016, "learning_rate": 5.013789299503585e-06, "loss": 0.5804, "step": 910 }, { "epoch": 0.15229266677702366, "grad_norm": 3.0117592811584473, "learning_rate": 5.068946497517927e-06, "loss": 0.7233, "step": 920 }, { "epoch": 0.15394802185068698, "grad_norm": 3.690901756286621, "learning_rate": 5.124103695532267e-06, "loss": 0.6569, "step": 930 }, { "epoch": 0.15560337692435028, "grad_norm": 3.155195713043213, "learning_rate": 5.179260893546608e-06, "loss": 0.6537, "step": 940 }, { "epoch": 0.15725873199801357, "grad_norm": 3.386868715286255, "learning_rate": 5.234418091560949e-06, "loss": 0.6306, "step": 950 }, { "epoch": 0.15891408707167687, "grad_norm": 4.336483001708984, "learning_rate": 5.28957528957529e-06, "loss": 0.6967, "step": 960 }, { "epoch": 0.16056944214534016, "grad_norm": 3.386204481124878, "learning_rate": 5.344732487589631e-06, "loss": 0.6151, "step": 970 }, { "epoch": 0.16222479721900349, "grad_norm": 3.8770434856414795, "learning_rate": 5.399889685603972e-06, "loss": 0.6667, "step": 980 }, { "epoch": 0.16388015229266678, "grad_norm": 3.62699294090271, "learning_rate": 5.455046883618312e-06, "loss": 0.6323, "step": 990 }, { "epoch": 0.16553550736633008, "grad_norm": 2.923297882080078, "learning_rate": 5.510204081632653e-06, "loss": 0.6294, "step": 1000 }, { "epoch": 0.16719086243999337, "grad_norm": 3.1828577518463135, "learning_rate": 5.565361279646995e-06, "loss": 0.6644, "step": 1010 }, { "epoch": 0.16884621751365667, "grad_norm": 2.8371975421905518, "learning_rate": 5.620518477661335e-06, "loss": 0.6294, "step": 1020 }, { "epoch": 0.17050157258732, "grad_norm": 2.7191975116729736, "learning_rate": 5.675675675675676e-06, "loss": 0.7093, "step": 1030 }, { "epoch": 0.17215692766098328, "grad_norm": 2.4802534580230713, "learning_rate": 5.730832873690016e-06, "loss": 0.5608, "step": 1040 }, { "epoch": 0.17381228273464658, "grad_norm": 5.264163017272949, "learning_rate": 5.785990071704358e-06, "loss": 0.6454, "step": 1050 }, { "epoch": 0.17546763780830987, "grad_norm": 2.8554723262786865, "learning_rate": 5.841147269718699e-06, "loss": 0.6278, "step": 1060 }, { "epoch": 0.1771229928819732, "grad_norm": 3.11765718460083, "learning_rate": 5.896304467733039e-06, "loss": 0.6203, "step": 1070 }, { "epoch": 0.1787783479556365, "grad_norm": 2.88675594329834, "learning_rate": 5.95146166574738e-06, "loss": 0.6984, "step": 1080 }, { "epoch": 0.18043370302929979, "grad_norm": 2.93239688873291, "learning_rate": 6.006618863761722e-06, "loss": 0.6054, "step": 1090 }, { "epoch": 0.18208905810296308, "grad_norm": 3.4710001945495605, "learning_rate": 6.061776061776062e-06, "loss": 0.6106, "step": 1100 }, { "epoch": 0.18374441317662638, "grad_norm": 2.3426156044006348, "learning_rate": 6.116933259790403e-06, "loss": 0.6256, "step": 1110 }, { "epoch": 0.1853997682502897, "grad_norm": 5.0461602210998535, "learning_rate": 6.172090457804743e-06, "loss": 0.6798, "step": 1120 }, { "epoch": 0.187055123323953, "grad_norm": 2.646273374557495, "learning_rate": 6.227247655819085e-06, "loss": 0.5688, "step": 1130 }, { "epoch": 0.1887104783976163, "grad_norm": 3.764864444732666, "learning_rate": 6.282404853833426e-06, "loss": 0.5802, "step": 1140 }, { "epoch": 0.19036583347127958, "grad_norm": 2.7304487228393555, "learning_rate": 6.337562051847766e-06, "loss": 0.6034, "step": 1150 }, { "epoch": 0.19202118854494288, "grad_norm": 2.973935127258301, "learning_rate": 6.392719249862107e-06, "loss": 0.6278, "step": 1160 }, { "epoch": 0.1936765436186062, "grad_norm": 2.750199794769287, "learning_rate": 6.447876447876449e-06, "loss": 0.6995, "step": 1170 }, { "epoch": 0.1953318986922695, "grad_norm": 2.852870225906372, "learning_rate": 6.503033645890789e-06, "loss": 0.6436, "step": 1180 }, { "epoch": 0.1969872537659328, "grad_norm": 3.1429359912872314, "learning_rate": 6.55819084390513e-06, "loss": 0.6216, "step": 1190 }, { "epoch": 0.19864260883959609, "grad_norm": 2.8767406940460205, "learning_rate": 6.61334804191947e-06, "loss": 0.649, "step": 1200 }, { "epoch": 0.20029796391325938, "grad_norm": 4.3968281745910645, "learning_rate": 6.668505239933812e-06, "loss": 0.626, "step": 1210 }, { "epoch": 0.2019533189869227, "grad_norm": 2.600132465362549, "learning_rate": 6.723662437948153e-06, "loss": 0.6757, "step": 1220 }, { "epoch": 0.203608674060586, "grad_norm": 3.5382864475250244, "learning_rate": 6.778819635962493e-06, "loss": 0.6118, "step": 1230 }, { "epoch": 0.2052640291342493, "grad_norm": 2.8111588954925537, "learning_rate": 6.833976833976834e-06, "loss": 0.58, "step": 1240 }, { "epoch": 0.2069193842079126, "grad_norm": 3.0125858783721924, "learning_rate": 6.889134031991176e-06, "loss": 0.6049, "step": 1250 }, { "epoch": 0.2085747392815759, "grad_norm": 3.686331272125244, "learning_rate": 6.944291230005516e-06, "loss": 0.5898, "step": 1260 }, { "epoch": 0.2102300943552392, "grad_norm": 4.72364616394043, "learning_rate": 6.999448428019857e-06, "loss": 0.5901, "step": 1270 }, { "epoch": 0.2118854494289025, "grad_norm": 2.531151294708252, "learning_rate": 7.054605626034198e-06, "loss": 0.6588, "step": 1280 }, { "epoch": 0.2135408045025658, "grad_norm": 3.2532570362091064, "learning_rate": 7.109762824048539e-06, "loss": 0.5635, "step": 1290 }, { "epoch": 0.2151961595762291, "grad_norm": 3.614664316177368, "learning_rate": 7.16492002206288e-06, "loss": 0.5676, "step": 1300 }, { "epoch": 0.2168515146498924, "grad_norm": 4.736164569854736, "learning_rate": 7.22007722007722e-06, "loss": 0.6029, "step": 1310 }, { "epoch": 0.2185068697235557, "grad_norm": 3.391282320022583, "learning_rate": 7.275234418091561e-06, "loss": 0.6341, "step": 1320 }, { "epoch": 0.220162224797219, "grad_norm": 3.4282443523406982, "learning_rate": 7.330391616105902e-06, "loss": 0.6301, "step": 1330 }, { "epoch": 0.2218175798708823, "grad_norm": 2.516094923019409, "learning_rate": 7.385548814120243e-06, "loss": 0.616, "step": 1340 }, { "epoch": 0.2234729349445456, "grad_norm": 2.89660906791687, "learning_rate": 7.440706012134584e-06, "loss": 0.6081, "step": 1350 }, { "epoch": 0.22512829001820892, "grad_norm": 2.6692073345184326, "learning_rate": 7.495863210148925e-06, "loss": 0.5554, "step": 1360 }, { "epoch": 0.2267836450918722, "grad_norm": 2.7826545238494873, "learning_rate": 7.551020408163265e-06, "loss": 0.6513, "step": 1370 }, { "epoch": 0.2284390001655355, "grad_norm": 4.580424785614014, "learning_rate": 7.606177606177607e-06, "loss": 0.6532, "step": 1380 }, { "epoch": 0.2300943552391988, "grad_norm": 2.9366021156311035, "learning_rate": 7.661334804191947e-06, "loss": 0.6284, "step": 1390 }, { "epoch": 0.23174971031286212, "grad_norm": 2.872647762298584, "learning_rate": 7.716492002206288e-06, "loss": 0.65, "step": 1400 }, { "epoch": 0.23340506538652542, "grad_norm": 2.3038113117218018, "learning_rate": 7.77164920022063e-06, "loss": 0.6274, "step": 1410 }, { "epoch": 0.2350604204601887, "grad_norm": 2.807044744491577, "learning_rate": 7.82680639823497e-06, "loss": 0.5892, "step": 1420 }, { "epoch": 0.236715775533852, "grad_norm": 2.3904168605804443, "learning_rate": 7.881963596249311e-06, "loss": 0.6505, "step": 1430 }, { "epoch": 0.2383711306075153, "grad_norm": 2.934279441833496, "learning_rate": 7.937120794263652e-06, "loss": 0.5834, "step": 1440 }, { "epoch": 0.24002648568117863, "grad_norm": 2.8970916271209717, "learning_rate": 7.992277992277993e-06, "loss": 0.6136, "step": 1450 }, { "epoch": 0.24168184075484192, "grad_norm": 2.5647435188293457, "learning_rate": 8.047435190292334e-06, "loss": 0.6831, "step": 1460 }, { "epoch": 0.24333719582850522, "grad_norm": 3.708890438079834, "learning_rate": 8.102592388306675e-06, "loss": 0.6018, "step": 1470 }, { "epoch": 0.2449925509021685, "grad_norm": 3.2741858959198, "learning_rate": 8.157749586321016e-06, "loss": 0.5826, "step": 1480 }, { "epoch": 0.2466479059758318, "grad_norm": 2.7993056774139404, "learning_rate": 8.212906784335355e-06, "loss": 0.6209, "step": 1490 }, { "epoch": 0.24830326104949513, "grad_norm": 2.5464768409729004, "learning_rate": 8.268063982349698e-06, "loss": 0.6195, "step": 1500 }, { "epoch": 0.24995861612315842, "grad_norm": 2.438197612762451, "learning_rate": 8.323221180364039e-06, "loss": 0.607, "step": 1510 }, { "epoch": 0.2516139711968217, "grad_norm": 3.0494797229766846, "learning_rate": 8.378378378378378e-06, "loss": 0.6154, "step": 1520 }, { "epoch": 0.25326932627048504, "grad_norm": 3.6615447998046875, "learning_rate": 8.43353557639272e-06, "loss": 0.6569, "step": 1530 }, { "epoch": 0.2549246813441483, "grad_norm": 4.182124614715576, "learning_rate": 8.488692774407062e-06, "loss": 0.6069, "step": 1540 }, { "epoch": 0.25658003641781163, "grad_norm": 3.966855525970459, "learning_rate": 8.543849972421401e-06, "loss": 0.6061, "step": 1550 }, { "epoch": 0.2582353914914749, "grad_norm": 3.1076576709747314, "learning_rate": 8.599007170435742e-06, "loss": 0.5949, "step": 1560 }, { "epoch": 0.2598907465651382, "grad_norm": 2.6115076541900635, "learning_rate": 8.654164368450083e-06, "loss": 0.5933, "step": 1570 }, { "epoch": 0.26154610163880154, "grad_norm": 2.357980489730835, "learning_rate": 8.709321566464424e-06, "loss": 0.6573, "step": 1580 }, { "epoch": 0.2632014567124648, "grad_norm": 2.8000941276550293, "learning_rate": 8.764478764478765e-06, "loss": 0.6048, "step": 1590 }, { "epoch": 0.26485681178612813, "grad_norm": 2.700944185256958, "learning_rate": 8.819635962493106e-06, "loss": 0.6463, "step": 1600 }, { "epoch": 0.2665121668597914, "grad_norm": 2.6585984230041504, "learning_rate": 8.874793160507447e-06, "loss": 0.6004, "step": 1610 }, { "epoch": 0.2681675219334547, "grad_norm": 2.9849841594696045, "learning_rate": 8.929950358521788e-06, "loss": 0.5947, "step": 1620 }, { "epoch": 0.26982287700711804, "grad_norm": 2.6921303272247314, "learning_rate": 8.985107556536129e-06, "loss": 0.6284, "step": 1630 }, { "epoch": 0.2714782320807813, "grad_norm": 3.0850579738616943, "learning_rate": 9.04026475455047e-06, "loss": 0.5238, "step": 1640 }, { "epoch": 0.27313358715444463, "grad_norm": 3.351339340209961, "learning_rate": 9.09542195256481e-06, "loss": 0.635, "step": 1650 }, { "epoch": 0.2747889422281079, "grad_norm": 2.2097935676574707, "learning_rate": 9.15057915057915e-06, "loss": 0.6214, "step": 1660 }, { "epoch": 0.2764442973017712, "grad_norm": 3.295842170715332, "learning_rate": 9.205736348593493e-06, "loss": 0.6673, "step": 1670 }, { "epoch": 0.27809965237543455, "grad_norm": 3.469433069229126, "learning_rate": 9.260893546607832e-06, "loss": 0.6393, "step": 1680 }, { "epoch": 0.2797550074490978, "grad_norm": 2.8949124813079834, "learning_rate": 9.316050744622173e-06, "loss": 0.6166, "step": 1690 }, { "epoch": 0.28141036252276114, "grad_norm": 3.850156545639038, "learning_rate": 9.371207942636514e-06, "loss": 0.5904, "step": 1700 }, { "epoch": 0.28306571759642446, "grad_norm": 2.344604730606079, "learning_rate": 9.426365140650855e-06, "loss": 0.6567, "step": 1710 }, { "epoch": 0.2847210726700877, "grad_norm": 2.964304208755493, "learning_rate": 9.481522338665196e-06, "loss": 0.6023, "step": 1720 }, { "epoch": 0.28637642774375105, "grad_norm": 3.1953861713409424, "learning_rate": 9.536679536679537e-06, "loss": 0.6116, "step": 1730 }, { "epoch": 0.2880317828174143, "grad_norm": 2.340214967727661, "learning_rate": 9.591836734693878e-06, "loss": 0.5774, "step": 1740 }, { "epoch": 0.28968713789107764, "grad_norm": 2.7652084827423096, "learning_rate": 9.64699393270822e-06, "loss": 0.6332, "step": 1750 }, { "epoch": 0.29134249296474096, "grad_norm": 2.380476713180542, "learning_rate": 9.70215113072256e-06, "loss": 0.5401, "step": 1760 }, { "epoch": 0.29299784803840423, "grad_norm": 2.936915397644043, "learning_rate": 9.757308328736901e-06, "loss": 0.6103, "step": 1770 }, { "epoch": 0.29465320311206755, "grad_norm": 2.580601215362549, "learning_rate": 9.812465526751242e-06, "loss": 0.6312, "step": 1780 }, { "epoch": 0.2963085581857308, "grad_norm": 2.9639976024627686, "learning_rate": 9.867622724765583e-06, "loss": 0.5844, "step": 1790 }, { "epoch": 0.29796391325939414, "grad_norm": 2.4568283557891846, "learning_rate": 9.922779922779924e-06, "loss": 0.6088, "step": 1800 }, { "epoch": 0.29961926833305746, "grad_norm": 2.4476640224456787, "learning_rate": 9.977937120794265e-06, "loss": 0.5739, "step": 1810 }, { "epoch": 0.30127462340672073, "grad_norm": 2.506065845489502, "learning_rate": 9.999996660862644e-06, "loss": 0.6742, "step": 1820 }, { "epoch": 0.30292997848038405, "grad_norm": 2.4358372688293457, "learning_rate": 9.999976255039388e-06, "loss": 0.6403, "step": 1830 }, { "epoch": 0.3045853335540473, "grad_norm": 3.3008499145507812, "learning_rate": 9.999937298544802e-06, "loss": 0.6091, "step": 1840 }, { "epoch": 0.30624068862771064, "grad_norm": 3.557581901550293, "learning_rate": 9.99987979152342e-06, "loss": 0.6313, "step": 1850 }, { "epoch": 0.30789604370137397, "grad_norm": 2.6936228275299072, "learning_rate": 9.999803734188604e-06, "loss": 0.6611, "step": 1860 }, { "epoch": 0.30955139877503723, "grad_norm": 3.0474772453308105, "learning_rate": 9.999709126822536e-06, "loss": 0.5883, "step": 1870 }, { "epoch": 0.31120675384870056, "grad_norm": 2.5758018493652344, "learning_rate": 9.999595969776225e-06, "loss": 0.5915, "step": 1880 }, { "epoch": 0.3128621089223638, "grad_norm": 3.6697728633880615, "learning_rate": 9.999464263469503e-06, "loss": 0.5736, "step": 1890 }, { "epoch": 0.31451746399602715, "grad_norm": 2.458699941635132, "learning_rate": 9.999314008391014e-06, "loss": 0.6448, "step": 1900 }, { "epoch": 0.31617281906969047, "grad_norm": 2.6207146644592285, "learning_rate": 9.999145205098234e-06, "loss": 0.57, "step": 1910 }, { "epoch": 0.31782817414335374, "grad_norm": 1.6553720235824585, "learning_rate": 9.998957854217444e-06, "loss": 0.5705, "step": 1920 }, { "epoch": 0.31948352921701706, "grad_norm": 2.402009963989258, "learning_rate": 9.998751956443748e-06, "loss": 0.6143, "step": 1930 }, { "epoch": 0.3211388842906803, "grad_norm": 2.5485851764678955, "learning_rate": 9.998527512541056e-06, "loss": 0.5782, "step": 1940 }, { "epoch": 0.32279423936434365, "grad_norm": 2.4615025520324707, "learning_rate": 9.998284523342088e-06, "loss": 0.6105, "step": 1950 }, { "epoch": 0.32444959443800697, "grad_norm": 2.0781445503234863, "learning_rate": 9.998022989748371e-06, "loss": 0.5435, "step": 1960 }, { "epoch": 0.32610494951167024, "grad_norm": 2.8392488956451416, "learning_rate": 9.997742912730238e-06, "loss": 0.6502, "step": 1970 }, { "epoch": 0.32776030458533356, "grad_norm": 3.554027557373047, "learning_rate": 9.997444293326812e-06, "loss": 0.6015, "step": 1980 }, { "epoch": 0.32941565965899683, "grad_norm": 2.370697259902954, "learning_rate": 9.99712713264602e-06, "loss": 0.6348, "step": 1990 }, { "epoch": 0.33107101473266015, "grad_norm": 2.2110366821289062, "learning_rate": 9.996791431864577e-06, "loss": 0.5641, "step": 2000 }, { "epoch": 0.3327263698063235, "grad_norm": 2.8644583225250244, "learning_rate": 9.996437192227979e-06, "loss": 0.5921, "step": 2010 }, { "epoch": 0.33438172487998674, "grad_norm": 1.8451977968215942, "learning_rate": 9.996064415050515e-06, "loss": 0.6194, "step": 2020 }, { "epoch": 0.33603707995365006, "grad_norm": 2.718644857406616, "learning_rate": 9.99567310171524e-06, "loss": 0.5981, "step": 2030 }, { "epoch": 0.33769243502731333, "grad_norm": 2.4472815990448, "learning_rate": 9.995263253673989e-06, "loss": 0.6079, "step": 2040 }, { "epoch": 0.33934779010097665, "grad_norm": 2.6010594367980957, "learning_rate": 9.994834872447357e-06, "loss": 0.608, "step": 2050 }, { "epoch": 0.34100314517464, "grad_norm": 3.013094425201416, "learning_rate": 9.994387959624707e-06, "loss": 0.5139, "step": 2060 }, { "epoch": 0.34265850024830324, "grad_norm": 2.3363730907440186, "learning_rate": 9.993922516864154e-06, "loss": 0.5739, "step": 2070 }, { "epoch": 0.34431385532196657, "grad_norm": 2.283562421798706, "learning_rate": 9.993438545892557e-06, "loss": 0.5872, "step": 2080 }, { "epoch": 0.3459692103956299, "grad_norm": 2.672161817550659, "learning_rate": 9.992936048505525e-06, "loss": 0.592, "step": 2090 }, { "epoch": 0.34762456546929316, "grad_norm": 2.5038950443267822, "learning_rate": 9.9924150265674e-06, "loss": 0.5921, "step": 2100 }, { "epoch": 0.3492799205429565, "grad_norm": 2.3302109241485596, "learning_rate": 9.991875482011251e-06, "loss": 0.6143, "step": 2110 }, { "epoch": 0.35093527561661975, "grad_norm": 3.3396990299224854, "learning_rate": 9.991317416838871e-06, "loss": 0.5817, "step": 2120 }, { "epoch": 0.35259063069028307, "grad_norm": 2.076221466064453, "learning_rate": 9.990740833120765e-06, "loss": 0.5941, "step": 2130 }, { "epoch": 0.3542459857639464, "grad_norm": 2.247899293899536, "learning_rate": 9.990145732996149e-06, "loss": 0.6015, "step": 2140 }, { "epoch": 0.35590134083760966, "grad_norm": 2.4962527751922607, "learning_rate": 9.989532118672933e-06, "loss": 0.6052, "step": 2150 }, { "epoch": 0.357556695911273, "grad_norm": 2.5895111560821533, "learning_rate": 9.988899992427719e-06, "loss": 0.6154, "step": 2160 }, { "epoch": 0.35921205098493625, "grad_norm": 2.0279507637023926, "learning_rate": 9.988249356605793e-06, "loss": 0.5683, "step": 2170 }, { "epoch": 0.36086740605859957, "grad_norm": 2.5845603942871094, "learning_rate": 9.98758021362111e-06, "loss": 0.6027, "step": 2180 }, { "epoch": 0.3625227611322629, "grad_norm": 2.4495720863342285, "learning_rate": 9.986892565956296e-06, "loss": 0.6118, "step": 2190 }, { "epoch": 0.36417811620592616, "grad_norm": 2.7284724712371826, "learning_rate": 9.986186416162624e-06, "loss": 0.6262, "step": 2200 }, { "epoch": 0.3658334712795895, "grad_norm": 3.050304412841797, "learning_rate": 9.985461766860021e-06, "loss": 0.5922, "step": 2210 }, { "epoch": 0.36748882635325275, "grad_norm": 2.5775296688079834, "learning_rate": 9.984718620737044e-06, "loss": 0.5898, "step": 2220 }, { "epoch": 0.3691441814269161, "grad_norm": 2.2817842960357666, "learning_rate": 9.983956980550877e-06, "loss": 0.5528, "step": 2230 }, { "epoch": 0.3707995365005794, "grad_norm": 2.512723445892334, "learning_rate": 9.983176849127323e-06, "loss": 0.6015, "step": 2240 }, { "epoch": 0.37245489157424266, "grad_norm": 3.1209611892700195, "learning_rate": 9.982378229360785e-06, "loss": 0.5624, "step": 2250 }, { "epoch": 0.374110246647906, "grad_norm": 2.4052345752716064, "learning_rate": 9.981561124214267e-06, "loss": 0.5279, "step": 2260 }, { "epoch": 0.37576560172156925, "grad_norm": 3.7475814819335938, "learning_rate": 9.980725536719352e-06, "loss": 0.6319, "step": 2270 }, { "epoch": 0.3774209567952326, "grad_norm": 2.4055349826812744, "learning_rate": 9.979871469976197e-06, "loss": 0.5653, "step": 2280 }, { "epoch": 0.3790763118688959, "grad_norm": 2.1154866218566895, "learning_rate": 9.978998927153516e-06, "loss": 0.6503, "step": 2290 }, { "epoch": 0.38073166694255917, "grad_norm": 2.19960618019104, "learning_rate": 9.978107911488581e-06, "loss": 0.5311, "step": 2300 }, { "epoch": 0.3823870220162225, "grad_norm": 2.6673924922943115, "learning_rate": 9.97719842628719e-06, "loss": 0.59, "step": 2310 }, { "epoch": 0.38404237708988576, "grad_norm": 2.573767900466919, "learning_rate": 9.976270474923675e-06, "loss": 0.559, "step": 2320 }, { "epoch": 0.3856977321635491, "grad_norm": 2.6069254875183105, "learning_rate": 9.975324060840874e-06, "loss": 0.6021, "step": 2330 }, { "epoch": 0.3873530872372124, "grad_norm": 2.925135374069214, "learning_rate": 9.97435918755013e-06, "loss": 0.5488, "step": 2340 }, { "epoch": 0.38900844231087567, "grad_norm": 2.631096601486206, "learning_rate": 9.973375858631266e-06, "loss": 0.5977, "step": 2350 }, { "epoch": 0.390663797384539, "grad_norm": 2.7871811389923096, "learning_rate": 9.972374077732585e-06, "loss": 0.6449, "step": 2360 }, { "epoch": 0.39231915245820226, "grad_norm": 1.6462632417678833, "learning_rate": 9.971353848570845e-06, "loss": 0.5528, "step": 2370 }, { "epoch": 0.3939745075318656, "grad_norm": 3.6753463745117188, "learning_rate": 9.97031517493125e-06, "loss": 0.562, "step": 2380 }, { "epoch": 0.3956298626055289, "grad_norm": 2.3143789768218994, "learning_rate": 9.969258060667435e-06, "loss": 0.59, "step": 2390 }, { "epoch": 0.39728521767919217, "grad_norm": 1.9230400323867798, "learning_rate": 9.96818250970146e-06, "loss": 0.5805, "step": 2400 }, { "epoch": 0.3989405727528555, "grad_norm": 2.318477153778076, "learning_rate": 9.96708852602378e-06, "loss": 0.6163, "step": 2410 }, { "epoch": 0.40059592782651876, "grad_norm": 2.983262300491333, "learning_rate": 9.965976113693238e-06, "loss": 0.5384, "step": 2420 }, { "epoch": 0.4022512829001821, "grad_norm": 2.5981662273406982, "learning_rate": 9.964845276837057e-06, "loss": 0.5324, "step": 2430 }, { "epoch": 0.4039066379738454, "grad_norm": 2.5291125774383545, "learning_rate": 9.96369601965081e-06, "loss": 0.5857, "step": 2440 }, { "epoch": 0.4055619930475087, "grad_norm": 2.275705575942993, "learning_rate": 9.962528346398418e-06, "loss": 0.5985, "step": 2450 }, { "epoch": 0.407217348121172, "grad_norm": 3.31367826461792, "learning_rate": 9.961342261412125e-06, "loss": 0.5318, "step": 2460 }, { "epoch": 0.4088727031948353, "grad_norm": 2.098184585571289, "learning_rate": 9.960137769092487e-06, "loss": 0.5643, "step": 2470 }, { "epoch": 0.4105280582684986, "grad_norm": 2.2636635303497314, "learning_rate": 9.958914873908353e-06, "loss": 0.5759, "step": 2480 }, { "epoch": 0.4121834133421619, "grad_norm": 2.2176895141601562, "learning_rate": 9.95767358039685e-06, "loss": 0.5145, "step": 2490 }, { "epoch": 0.4138387684158252, "grad_norm": 2.798469305038452, "learning_rate": 9.956413893163365e-06, "loss": 0.6119, "step": 2500 }, { "epoch": 0.4154941234894885, "grad_norm": 2.1799635887145996, "learning_rate": 9.95513581688153e-06, "loss": 0.5912, "step": 2510 }, { "epoch": 0.4171494785631518, "grad_norm": 3.491344690322876, "learning_rate": 9.953839356293202e-06, "loss": 0.5705, "step": 2520 }, { "epoch": 0.4188048336368151, "grad_norm": 3.2916722297668457, "learning_rate": 9.952524516208447e-06, "loss": 0.5886, "step": 2530 }, { "epoch": 0.4204601887104784, "grad_norm": 2.2452287673950195, "learning_rate": 9.951191301505519e-06, "loss": 0.4922, "step": 2540 }, { "epoch": 0.4221155437841417, "grad_norm": 2.5860681533813477, "learning_rate": 9.949839717130849e-06, "loss": 0.5827, "step": 2550 }, { "epoch": 0.423770898857805, "grad_norm": 2.4785280227661133, "learning_rate": 9.94846976809902e-06, "loss": 0.5541, "step": 2560 }, { "epoch": 0.4254262539314683, "grad_norm": 1.9815374612808228, "learning_rate": 9.947081459492751e-06, "loss": 0.5863, "step": 2570 }, { "epoch": 0.4270816090051316, "grad_norm": 2.4965639114379883, "learning_rate": 9.945674796462879e-06, "loss": 0.5708, "step": 2580 }, { "epoch": 0.4287369640787949, "grad_norm": 3.2701520919799805, "learning_rate": 9.944249784228335e-06, "loss": 0.5784, "step": 2590 }, { "epoch": 0.4303923191524582, "grad_norm": 2.430939197540283, "learning_rate": 9.942806428076132e-06, "loss": 0.5823, "step": 2600 }, { "epoch": 0.4320476742261215, "grad_norm": 2.519144296646118, "learning_rate": 9.941344733361344e-06, "loss": 0.5767, "step": 2610 }, { "epoch": 0.4337030292997848, "grad_norm": 2.906513214111328, "learning_rate": 9.939864705507073e-06, "loss": 0.548, "step": 2620 }, { "epoch": 0.4353583843734481, "grad_norm": 2.306670665740967, "learning_rate": 9.938366350004454e-06, "loss": 0.5714, "step": 2630 }, { "epoch": 0.4370137394471114, "grad_norm": 2.199439525604248, "learning_rate": 9.93684967241261e-06, "loss": 0.5586, "step": 2640 }, { "epoch": 0.4386690945207747, "grad_norm": 2.4085533618927, "learning_rate": 9.935314678358644e-06, "loss": 0.5811, "step": 2650 }, { "epoch": 0.440324449594438, "grad_norm": 2.153773307800293, "learning_rate": 9.933761373537621e-06, "loss": 0.5879, "step": 2660 }, { "epoch": 0.44197980466810133, "grad_norm": 2.2745254039764404, "learning_rate": 9.932189763712537e-06, "loss": 0.5682, "step": 2670 }, { "epoch": 0.4436351597417646, "grad_norm": 2.499537706375122, "learning_rate": 9.9305998547143e-06, "loss": 0.5803, "step": 2680 }, { "epoch": 0.4452905148154279, "grad_norm": 2.2464380264282227, "learning_rate": 9.928991652441717e-06, "loss": 0.5679, "step": 2690 }, { "epoch": 0.4469458698890912, "grad_norm": 2.874124526977539, "learning_rate": 9.92736516286146e-06, "loss": 0.5925, "step": 2700 }, { "epoch": 0.4486012249627545, "grad_norm": 2.706362247467041, "learning_rate": 9.925720392008056e-06, "loss": 0.5674, "step": 2710 }, { "epoch": 0.45025658003641783, "grad_norm": 2.4833574295043945, "learning_rate": 9.924057345983851e-06, "loss": 0.5762, "step": 2720 }, { "epoch": 0.4519119351100811, "grad_norm": 1.9858320951461792, "learning_rate": 9.922376030959e-06, "loss": 0.5669, "step": 2730 }, { "epoch": 0.4535672901837444, "grad_norm": 2.3540823459625244, "learning_rate": 9.920676453171438e-06, "loss": 0.5652, "step": 2740 }, { "epoch": 0.4552226452574077, "grad_norm": 2.1306300163269043, "learning_rate": 9.918958618926855e-06, "loss": 0.5218, "step": 2750 }, { "epoch": 0.456878000331071, "grad_norm": 2.7033262252807617, "learning_rate": 9.917222534598679e-06, "loss": 0.5731, "step": 2760 }, { "epoch": 0.45853335540473433, "grad_norm": 1.9918255805969238, "learning_rate": 9.915468206628046e-06, "loss": 0.5535, "step": 2770 }, { "epoch": 0.4601887104783976, "grad_norm": 2.858841896057129, "learning_rate": 9.913695641523777e-06, "loss": 0.601, "step": 2780 }, { "epoch": 0.4618440655520609, "grad_norm": 2.050906181335449, "learning_rate": 9.91190484586236e-06, "loss": 0.5605, "step": 2790 }, { "epoch": 0.46349942062572425, "grad_norm": 2.51007342338562, "learning_rate": 9.910095826287918e-06, "loss": 0.6002, "step": 2800 }, { "epoch": 0.4651547756993875, "grad_norm": 2.3648529052734375, "learning_rate": 9.908268589512187e-06, "loss": 0.6065, "step": 2810 }, { "epoch": 0.46681013077305084, "grad_norm": 2.8302104473114014, "learning_rate": 9.906423142314497e-06, "loss": 0.6004, "step": 2820 }, { "epoch": 0.4684654858467141, "grad_norm": 2.7113876342773438, "learning_rate": 9.904559491541735e-06, "loss": 0.5727, "step": 2830 }, { "epoch": 0.4701208409203774, "grad_norm": 1.8766558170318604, "learning_rate": 9.902677644108327e-06, "loss": 0.4911, "step": 2840 }, { "epoch": 0.47177619599404075, "grad_norm": 2.576292037963867, "learning_rate": 9.900777606996213e-06, "loss": 0.5602, "step": 2850 }, { "epoch": 0.473431551067704, "grad_norm": 2.50095796585083, "learning_rate": 9.898859387254823e-06, "loss": 0.5659, "step": 2860 }, { "epoch": 0.47508690614136734, "grad_norm": 2.3355982303619385, "learning_rate": 9.89692299200104e-06, "loss": 0.58, "step": 2870 }, { "epoch": 0.4767422612150306, "grad_norm": 2.1642611026763916, "learning_rate": 9.894968428419187e-06, "loss": 0.563, "step": 2880 }, { "epoch": 0.4783976162886939, "grad_norm": 2.645699977874756, "learning_rate": 9.892995703760988e-06, "loss": 0.5911, "step": 2890 }, { "epoch": 0.48005297136235725, "grad_norm": 3.1719911098480225, "learning_rate": 9.891004825345555e-06, "loss": 0.5638, "step": 2900 }, { "epoch": 0.4817083264360205, "grad_norm": 2.728663444519043, "learning_rate": 9.888995800559347e-06, "loss": 0.5808, "step": 2910 }, { "epoch": 0.48336368150968384, "grad_norm": 2.2633676528930664, "learning_rate": 9.886968636856153e-06, "loss": 0.4939, "step": 2920 }, { "epoch": 0.4850190365833471, "grad_norm": 1.8135114908218384, "learning_rate": 9.884923341757056e-06, "loss": 0.5163, "step": 2930 }, { "epoch": 0.48667439165701043, "grad_norm": 2.147280216217041, "learning_rate": 9.882859922850412e-06, "loss": 0.5427, "step": 2940 }, { "epoch": 0.48832974673067375, "grad_norm": 2.248060464859009, "learning_rate": 9.88077838779182e-06, "loss": 0.5357, "step": 2950 }, { "epoch": 0.489985101804337, "grad_norm": 2.9170844554901123, "learning_rate": 9.87867874430409e-06, "loss": 0.5257, "step": 2960 }, { "epoch": 0.49164045687800034, "grad_norm": 2.0134947299957275, "learning_rate": 9.87656100017722e-06, "loss": 0.559, "step": 2970 }, { "epoch": 0.4932958119516636, "grad_norm": 2.3166182041168213, "learning_rate": 9.87442516326836e-06, "loss": 0.5788, "step": 2980 }, { "epoch": 0.49495116702532693, "grad_norm": 2.9038329124450684, "learning_rate": 9.87227124150179e-06, "loss": 0.6224, "step": 2990 }, { "epoch": 0.49660652209899026, "grad_norm": 1.8098182678222656, "learning_rate": 9.870099242868887e-06, "loss": 0.5759, "step": 3000 }, { "epoch": 0.4982618771726535, "grad_norm": 2.722839593887329, "learning_rate": 9.867909175428096e-06, "loss": 0.4706, "step": 3010 }, { "epoch": 0.49991723224631684, "grad_norm": 2.420764446258545, "learning_rate": 9.8657010473049e-06, "loss": 0.577, "step": 3020 }, { "epoch": 0.5015725873199801, "grad_norm": 2.254530906677246, "learning_rate": 9.86347486669179e-06, "loss": 0.5189, "step": 3030 }, { "epoch": 0.5032279423936434, "grad_norm": 2.428337335586548, "learning_rate": 9.861230641848233e-06, "loss": 0.6204, "step": 3040 }, { "epoch": 0.5048832974673068, "grad_norm": 2.543137550354004, "learning_rate": 9.858968381100646e-06, "loss": 0.5617, "step": 3050 }, { "epoch": 0.5065386525409701, "grad_norm": 2.627063751220703, "learning_rate": 9.856688092842357e-06, "loss": 0.5348, "step": 3060 }, { "epoch": 0.5081940076146333, "grad_norm": 2.149599075317383, "learning_rate": 9.854389785533585e-06, "loss": 0.5694, "step": 3070 }, { "epoch": 0.5098493626882966, "grad_norm": 2.343366861343384, "learning_rate": 9.852073467701398e-06, "loss": 0.5729, "step": 3080 }, { "epoch": 0.5115047177619599, "grad_norm": 2.3842127323150635, "learning_rate": 9.849739147939685e-06, "loss": 0.5948, "step": 3090 }, { "epoch": 0.5131600728356233, "grad_norm": 2.269623041152954, "learning_rate": 9.84738683490913e-06, "loss": 0.5486, "step": 3100 }, { "epoch": 0.5148154279092866, "grad_norm": 2.9736173152923584, "learning_rate": 9.845016537337168e-06, "loss": 0.5615, "step": 3110 }, { "epoch": 0.5164707829829498, "grad_norm": 2.21465802192688, "learning_rate": 9.842628264017969e-06, "loss": 0.5914, "step": 3120 }, { "epoch": 0.5181261380566131, "grad_norm": 2.490063428878784, "learning_rate": 9.840222023812383e-06, "loss": 0.5833, "step": 3130 }, { "epoch": 0.5197814931302764, "grad_norm": 2.3841171264648438, "learning_rate": 9.83779782564793e-06, "loss": 0.577, "step": 3140 }, { "epoch": 0.5214368482039398, "grad_norm": 1.9457083940505981, "learning_rate": 9.835355678518754e-06, "loss": 0.5872, "step": 3150 }, { "epoch": 0.5230922032776031, "grad_norm": 2.5294015407562256, "learning_rate": 9.83289559148559e-06, "loss": 0.504, "step": 3160 }, { "epoch": 0.5247475583512663, "grad_norm": 3.2622299194335938, "learning_rate": 9.830417573675737e-06, "loss": 0.5809, "step": 3170 }, { "epoch": 0.5264029134249296, "grad_norm": 2.026658773422241, "learning_rate": 9.827921634283015e-06, "loss": 0.5413, "step": 3180 }, { "epoch": 0.5280582684985929, "grad_norm": 2.9788129329681396, "learning_rate": 9.825407782567738e-06, "loss": 0.5578, "step": 3190 }, { "epoch": 0.5297136235722563, "grad_norm": 2.1878185272216797, "learning_rate": 9.822876027856679e-06, "loss": 0.5203, "step": 3200 }, { "epoch": 0.5313689786459196, "grad_norm": 2.129364013671875, "learning_rate": 9.820326379543032e-06, "loss": 0.5794, "step": 3210 }, { "epoch": 0.5330243337195828, "grad_norm": 2.1982228755950928, "learning_rate": 9.817758847086381e-06, "loss": 0.5827, "step": 3220 }, { "epoch": 0.5346796887932461, "grad_norm": 2.233294725418091, "learning_rate": 9.815173440012657e-06, "loss": 0.5868, "step": 3230 }, { "epoch": 0.5363350438669094, "grad_norm": 2.100982666015625, "learning_rate": 9.81257016791412e-06, "loss": 0.5339, "step": 3240 }, { "epoch": 0.5379903989405728, "grad_norm": 2.1711225509643555, "learning_rate": 9.809949040449298e-06, "loss": 0.5841, "step": 3250 }, { "epoch": 0.5396457540142361, "grad_norm": 2.9437408447265625, "learning_rate": 9.807310067342976e-06, "loss": 0.5521, "step": 3260 }, { "epoch": 0.5413011090878993, "grad_norm": 4.610905170440674, "learning_rate": 9.804653258386145e-06, "loss": 0.5947, "step": 3270 }, { "epoch": 0.5429564641615626, "grad_norm": 2.275070905685425, "learning_rate": 9.801978623435967e-06, "loss": 0.5725, "step": 3280 }, { "epoch": 0.544611819235226, "grad_norm": 2.3280909061431885, "learning_rate": 9.799286172415746e-06, "loss": 0.5722, "step": 3290 }, { "epoch": 0.5462671743088893, "grad_norm": 2.2407679557800293, "learning_rate": 9.796575915314884e-06, "loss": 0.5641, "step": 3300 }, { "epoch": 0.5479225293825526, "grad_norm": 2.3367135524749756, "learning_rate": 9.793847862188848e-06, "loss": 0.5626, "step": 3310 }, { "epoch": 0.5495778844562158, "grad_norm": 1.977503776550293, "learning_rate": 9.791102023159125e-06, "loss": 0.5515, "step": 3320 }, { "epoch": 0.5512332395298791, "grad_norm": 2.1004018783569336, "learning_rate": 9.7883384084132e-06, "loss": 0.5421, "step": 3330 }, { "epoch": 0.5528885946035424, "grad_norm": 2.568941116333008, "learning_rate": 9.7855570282045e-06, "loss": 0.5618, "step": 3340 }, { "epoch": 0.5545439496772058, "grad_norm": 2.1921818256378174, "learning_rate": 9.782757892852367e-06, "loss": 0.5418, "step": 3350 }, { "epoch": 0.5561993047508691, "grad_norm": 2.2598955631256104, "learning_rate": 9.779941012742025e-06, "loss": 0.5643, "step": 3360 }, { "epoch": 0.5578546598245324, "grad_norm": 2.0559098720550537, "learning_rate": 9.77710639832452e-06, "loss": 0.6003, "step": 3370 }, { "epoch": 0.5595100148981956, "grad_norm": 2.366570234298706, "learning_rate": 9.774254060116703e-06, "loss": 0.5208, "step": 3380 }, { "epoch": 0.561165369971859, "grad_norm": 2.092085123062134, "learning_rate": 9.771384008701185e-06, "loss": 0.5987, "step": 3390 }, { "epoch": 0.5628207250455223, "grad_norm": 2.402676820755005, "learning_rate": 9.768496254726293e-06, "loss": 0.5836, "step": 3400 }, { "epoch": 0.5644760801191856, "grad_norm": 2.451547861099243, "learning_rate": 9.765590808906029e-06, "loss": 0.5874, "step": 3410 }, { "epoch": 0.5661314351928489, "grad_norm": 1.9011714458465576, "learning_rate": 9.76266768202004e-06, "loss": 0.495, "step": 3420 }, { "epoch": 0.5677867902665121, "grad_norm": 2.3859992027282715, "learning_rate": 9.759726884913572e-06, "loss": 0.5934, "step": 3430 }, { "epoch": 0.5694421453401755, "grad_norm": 2.100632905960083, "learning_rate": 9.756768428497427e-06, "loss": 0.5215, "step": 3440 }, { "epoch": 0.5710975004138388, "grad_norm": 2.366837501525879, "learning_rate": 9.753792323747928e-06, "loss": 0.4966, "step": 3450 }, { "epoch": 0.5727528554875021, "grad_norm": 2.0204997062683105, "learning_rate": 9.75079858170688e-06, "loss": 0.5586, "step": 3460 }, { "epoch": 0.5744082105611654, "grad_norm": 2.957526683807373, "learning_rate": 9.747787213481511e-06, "loss": 0.5437, "step": 3470 }, { "epoch": 0.5760635656348286, "grad_norm": 2.0730838775634766, "learning_rate": 9.744758230244465e-06, "loss": 0.5447, "step": 3480 }, { "epoch": 0.577718920708492, "grad_norm": 2.0039265155792236, "learning_rate": 9.741711643233724e-06, "loss": 0.5071, "step": 3490 }, { "epoch": 0.5793742757821553, "grad_norm": 1.7638771533966064, "learning_rate": 9.73864746375259e-06, "loss": 0.5415, "step": 3500 }, { "epoch": 0.5810296308558186, "grad_norm": 2.048380136489868, "learning_rate": 9.735565703169634e-06, "loss": 0.5371, "step": 3510 }, { "epoch": 0.5826849859294819, "grad_norm": 2.077610492706299, "learning_rate": 9.732466372918656e-06, "loss": 0.5548, "step": 3520 }, { "epoch": 0.5843403410031451, "grad_norm": 2.1865971088409424, "learning_rate": 9.729349484498642e-06, "loss": 0.5595, "step": 3530 }, { "epoch": 0.5859956960768085, "grad_norm": 2.1196391582489014, "learning_rate": 9.726215049473722e-06, "loss": 0.5333, "step": 3540 }, { "epoch": 0.5876510511504718, "grad_norm": 2.613009452819824, "learning_rate": 9.723063079473124e-06, "loss": 0.5031, "step": 3550 }, { "epoch": 0.5893064062241351, "grad_norm": 2.1916675567626953, "learning_rate": 9.719893586191137e-06, "loss": 0.4728, "step": 3560 }, { "epoch": 0.5909617612977984, "grad_norm": 1.9465389251708984, "learning_rate": 9.716706581387065e-06, "loss": 0.5766, "step": 3570 }, { "epoch": 0.5926171163714616, "grad_norm": 2.581099271774292, "learning_rate": 9.713502076885174e-06, "loss": 0.5806, "step": 3580 }, { "epoch": 0.594272471445125, "grad_norm": 2.1889805793762207, "learning_rate": 9.710280084574667e-06, "loss": 0.5075, "step": 3590 }, { "epoch": 0.5959278265187883, "grad_norm": 1.5537147521972656, "learning_rate": 9.707040616409623e-06, "loss": 0.5388, "step": 3600 }, { "epoch": 0.5975831815924516, "grad_norm": 2.479175329208374, "learning_rate": 9.703783684408961e-06, "loss": 0.5215, "step": 3610 }, { "epoch": 0.5992385366661149, "grad_norm": 2.344923257827759, "learning_rate": 9.700509300656395e-06, "loss": 0.5267, "step": 3620 }, { "epoch": 0.6008938917397781, "grad_norm": 3.7248928546905518, "learning_rate": 9.697217477300385e-06, "loss": 0.5766, "step": 3630 }, { "epoch": 0.6025492468134415, "grad_norm": 2.325535297393799, "learning_rate": 9.693908226554094e-06, "loss": 0.5377, "step": 3640 }, { "epoch": 0.6042046018871048, "grad_norm": 2.5930399894714355, "learning_rate": 9.690581560695346e-06, "loss": 0.5351, "step": 3650 }, { "epoch": 0.6058599569607681, "grad_norm": 2.1282172203063965, "learning_rate": 9.68723749206658e-06, "loss": 0.5148, "step": 3660 }, { "epoch": 0.6075153120344314, "grad_norm": 2.241332530975342, "learning_rate": 9.683876033074793e-06, "loss": 0.5813, "step": 3670 }, { "epoch": 0.6091706671080946, "grad_norm": 1.9913647174835205, "learning_rate": 9.680497196191511e-06, "loss": 0.5826, "step": 3680 }, { "epoch": 0.610826022181758, "grad_norm": 2.073176622390747, "learning_rate": 9.677100993952732e-06, "loss": 0.5039, "step": 3690 }, { "epoch": 0.6124813772554213, "grad_norm": 2.6423120498657227, "learning_rate": 9.673687438958883e-06, "loss": 0.4948, "step": 3700 }, { "epoch": 0.6141367323290846, "grad_norm": 2.061516761779785, "learning_rate": 9.670256543874769e-06, "loss": 0.5831, "step": 3710 }, { "epoch": 0.6157920874027479, "grad_norm": 2.189225196838379, "learning_rate": 9.666808321429534e-06, "loss": 0.5646, "step": 3720 }, { "epoch": 0.6174474424764111, "grad_norm": 2.219355344772339, "learning_rate": 9.663342784416609e-06, "loss": 0.553, "step": 3730 }, { "epoch": 0.6191027975500745, "grad_norm": 2.61342453956604, "learning_rate": 9.659859945693658e-06, "loss": 0.5279, "step": 3740 }, { "epoch": 0.6207581526237378, "grad_norm": 3.1807034015655518, "learning_rate": 9.656359818182543e-06, "loss": 0.5833, "step": 3750 }, { "epoch": 0.6224135076974011, "grad_norm": 2.533095121383667, "learning_rate": 9.65284241486927e-06, "loss": 0.5927, "step": 3760 }, { "epoch": 0.6240688627710644, "grad_norm": 2.798417329788208, "learning_rate": 9.649307748803939e-06, "loss": 0.5413, "step": 3770 }, { "epoch": 0.6257242178447276, "grad_norm": 2.3580000400543213, "learning_rate": 9.645755833100699e-06, "loss": 0.553, "step": 3780 }, { "epoch": 0.627379572918391, "grad_norm": 1.9809789657592773, "learning_rate": 9.642186680937695e-06, "loss": 0.4882, "step": 3790 }, { "epoch": 0.6290349279920543, "grad_norm": 2.5609359741210938, "learning_rate": 9.638600305557025e-06, "loss": 0.5164, "step": 3800 }, { "epoch": 0.6306902830657176, "grad_norm": 2.4713363647460938, "learning_rate": 9.634996720264684e-06, "loss": 0.535, "step": 3810 }, { "epoch": 0.6323456381393809, "grad_norm": 2.4818222522735596, "learning_rate": 9.631375938430525e-06, "loss": 0.5605, "step": 3820 }, { "epoch": 0.6340009932130442, "grad_norm": 2.3091225624084473, "learning_rate": 9.627737973488194e-06, "loss": 0.5705, "step": 3830 }, { "epoch": 0.6356563482867075, "grad_norm": 2.624854803085327, "learning_rate": 9.624082838935096e-06, "loss": 0.4703, "step": 3840 }, { "epoch": 0.6373117033603708, "grad_norm": 1.9231913089752197, "learning_rate": 9.620410548332336e-06, "loss": 0.4887, "step": 3850 }, { "epoch": 0.6389670584340341, "grad_norm": 2.4588680267333984, "learning_rate": 9.616721115304669e-06, "loss": 0.4479, "step": 3860 }, { "epoch": 0.6406224135076974, "grad_norm": 2.4490933418273926, "learning_rate": 9.61301455354045e-06, "loss": 0.5036, "step": 3870 }, { "epoch": 0.6422777685813607, "grad_norm": 2.085653305053711, "learning_rate": 9.609290876791589e-06, "loss": 0.5332, "step": 3880 }, { "epoch": 0.643933123655024, "grad_norm": 2.1635732650756836, "learning_rate": 9.60555009887349e-06, "loss": 0.5819, "step": 3890 }, { "epoch": 0.6455884787286873, "grad_norm": 1.8872817754745483, "learning_rate": 9.601792233665007e-06, "loss": 0.5124, "step": 3900 }, { "epoch": 0.6472438338023506, "grad_norm": 2.5642919540405273, "learning_rate": 9.598017295108395e-06, "loss": 0.5272, "step": 3910 }, { "epoch": 0.6488991888760139, "grad_norm": 2.362342357635498, "learning_rate": 9.594225297209245e-06, "loss": 0.512, "step": 3920 }, { "epoch": 0.6505545439496772, "grad_norm": 2.04681134223938, "learning_rate": 9.590416254036447e-06, "loss": 0.5667, "step": 3930 }, { "epoch": 0.6522098990233405, "grad_norm": 2.4778685569763184, "learning_rate": 9.586590179722131e-06, "loss": 0.5527, "step": 3940 }, { "epoch": 0.6538652540970038, "grad_norm": 2.4283761978149414, "learning_rate": 9.58274708846161e-06, "loss": 0.5567, "step": 3950 }, { "epoch": 0.6555206091706671, "grad_norm": 2.188366413116455, "learning_rate": 9.57888699451334e-06, "loss": 0.5653, "step": 3960 }, { "epoch": 0.6571759642443304, "grad_norm": 1.9464526176452637, "learning_rate": 9.575009912198853e-06, "loss": 0.5482, "step": 3970 }, { "epoch": 0.6588313193179937, "grad_norm": 2.024707555770874, "learning_rate": 9.571115855902715e-06, "loss": 0.5105, "step": 3980 }, { "epoch": 0.660486674391657, "grad_norm": 2.27622127532959, "learning_rate": 9.567204840072466e-06, "loss": 0.5506, "step": 3990 }, { "epoch": 0.6621420294653203, "grad_norm": 1.9838156700134277, "learning_rate": 9.563276879218568e-06, "loss": 0.5619, "step": 4000 }, { "epoch": 0.6637973845389836, "grad_norm": 2.1720376014709473, "learning_rate": 9.559331987914354e-06, "loss": 0.549, "step": 4010 }, { "epoch": 0.665452739612647, "grad_norm": 1.8095346689224243, "learning_rate": 9.555370180795967e-06, "loss": 0.5652, "step": 4020 }, { "epoch": 0.6671080946863102, "grad_norm": 2.1045024394989014, "learning_rate": 9.551391472562316e-06, "loss": 0.5002, "step": 4030 }, { "epoch": 0.6687634497599735, "grad_norm": 2.3684701919555664, "learning_rate": 9.547395877975012e-06, "loss": 0.5148, "step": 4040 }, { "epoch": 0.6704188048336368, "grad_norm": 2.073254108428955, "learning_rate": 9.543383411858318e-06, "loss": 0.5044, "step": 4050 }, { "epoch": 0.6720741599073001, "grad_norm": 2.361924171447754, "learning_rate": 9.539354089099092e-06, "loss": 0.5516, "step": 4060 }, { "epoch": 0.6737295149809635, "grad_norm": 2.4293785095214844, "learning_rate": 9.535307924646735e-06, "loss": 0.5263, "step": 4070 }, { "epoch": 0.6753848700546267, "grad_norm": 1.8119914531707764, "learning_rate": 9.531244933513129e-06, "loss": 0.5128, "step": 4080 }, { "epoch": 0.67704022512829, "grad_norm": 1.9259525537490845, "learning_rate": 9.52716513077259e-06, "loss": 0.4829, "step": 4090 }, { "epoch": 0.6786955802019533, "grad_norm": 2.1588046550750732, "learning_rate": 9.523068531561805e-06, "loss": 0.5347, "step": 4100 }, { "epoch": 0.6803509352756166, "grad_norm": 2.8328020572662354, "learning_rate": 9.518955151079781e-06, "loss": 0.5013, "step": 4110 }, { "epoch": 0.68200629034928, "grad_norm": 2.9788923263549805, "learning_rate": 9.514825004587784e-06, "loss": 0.515, "step": 4120 }, { "epoch": 0.6836616454229433, "grad_norm": 2.3657615184783936, "learning_rate": 9.510678107409282e-06, "loss": 0.5444, "step": 4130 }, { "epoch": 0.6853170004966065, "grad_norm": 2.1405718326568604, "learning_rate": 9.506514474929896e-06, "loss": 0.4501, "step": 4140 }, { "epoch": 0.6869723555702698, "grad_norm": 1.9537779092788696, "learning_rate": 9.502334122597335e-06, "loss": 0.4819, "step": 4150 }, { "epoch": 0.6886277106439331, "grad_norm": 2.405439615249634, "learning_rate": 9.49813706592134e-06, "loss": 0.4746, "step": 4160 }, { "epoch": 0.6902830657175965, "grad_norm": 2.6599020957946777, "learning_rate": 9.493923320473628e-06, "loss": 0.5271, "step": 4170 }, { "epoch": 0.6919384207912598, "grad_norm": 2.166412830352783, "learning_rate": 9.489692901887837e-06, "loss": 0.5563, "step": 4180 }, { "epoch": 0.693593775864923, "grad_norm": 1.994268774986267, "learning_rate": 9.48544582585946e-06, "loss": 0.5235, "step": 4190 }, { "epoch": 0.6952491309385863, "grad_norm": 3.00091814994812, "learning_rate": 9.481182108145798e-06, "loss": 0.4902, "step": 4200 }, { "epoch": 0.6969044860122496, "grad_norm": 2.903369426727295, "learning_rate": 9.476901764565887e-06, "loss": 0.5175, "step": 4210 }, { "epoch": 0.698559841085913, "grad_norm": 2.3588013648986816, "learning_rate": 9.472604811000453e-06, "loss": 0.5236, "step": 4220 }, { "epoch": 0.7002151961595763, "grad_norm": 2.4532833099365234, "learning_rate": 9.468291263391847e-06, "loss": 0.4895, "step": 4230 }, { "epoch": 0.7018705512332395, "grad_norm": 2.1600773334503174, "learning_rate": 9.463961137743986e-06, "loss": 0.5251, "step": 4240 }, { "epoch": 0.7035259063069028, "grad_norm": 1.243881344795227, "learning_rate": 9.459614450122293e-06, "loss": 0.5456, "step": 4250 }, { "epoch": 0.7051812613805661, "grad_norm": 2.715055465698242, "learning_rate": 9.45525121665364e-06, "loss": 0.5019, "step": 4260 }, { "epoch": 0.7068366164542295, "grad_norm": 2.0910329818725586, "learning_rate": 9.450871453526285e-06, "loss": 0.5024, "step": 4270 }, { "epoch": 0.7084919715278928, "grad_norm": 2.3491621017456055, "learning_rate": 9.446475176989816e-06, "loss": 0.5481, "step": 4280 }, { "epoch": 0.710147326601556, "grad_norm": 2.1992132663726807, "learning_rate": 9.442062403355085e-06, "loss": 0.5166, "step": 4290 }, { "epoch": 0.7118026816752193, "grad_norm": 2.599609375, "learning_rate": 9.437633148994154e-06, "loss": 0.5459, "step": 4300 }, { "epoch": 0.7134580367488826, "grad_norm": 1.8802043199539185, "learning_rate": 9.433187430340228e-06, "loss": 0.557, "step": 4310 }, { "epoch": 0.715113391822546, "grad_norm": 1.949325680732727, "learning_rate": 9.428725263887599e-06, "loss": 0.5385, "step": 4320 }, { "epoch": 0.7167687468962093, "grad_norm": 2.2506093978881836, "learning_rate": 9.424246666191582e-06, "loss": 0.5123, "step": 4330 }, { "epoch": 0.7184241019698725, "grad_norm": 2.1846063137054443, "learning_rate": 9.419751653868456e-06, "loss": 0.5491, "step": 4340 }, { "epoch": 0.7200794570435358, "grad_norm": 2.2196507453918457, "learning_rate": 9.415240243595397e-06, "loss": 0.4818, "step": 4350 }, { "epoch": 0.7217348121171991, "grad_norm": 1.7722293138504028, "learning_rate": 9.410712452110425e-06, "loss": 0.5207, "step": 4360 }, { "epoch": 0.7233901671908625, "grad_norm": 1.8259626626968384, "learning_rate": 9.406168296212332e-06, "loss": 0.5789, "step": 4370 }, { "epoch": 0.7250455222645258, "grad_norm": 3.340482711791992, "learning_rate": 9.401607792760628e-06, "loss": 0.5335, "step": 4380 }, { "epoch": 0.726700877338189, "grad_norm": 2.3359949588775635, "learning_rate": 9.397030958675473e-06, "loss": 0.5716, "step": 4390 }, { "epoch": 0.7283562324118523, "grad_norm": 2.8024532794952393, "learning_rate": 9.392437810937615e-06, "loss": 0.5089, "step": 4400 }, { "epoch": 0.7300115874855156, "grad_norm": 2.381237506866455, "learning_rate": 9.387828366588333e-06, "loss": 0.5497, "step": 4410 }, { "epoch": 0.731666942559179, "grad_norm": 2.007678270339966, "learning_rate": 9.383202642729363e-06, "loss": 0.4973, "step": 4420 }, { "epoch": 0.7333222976328423, "grad_norm": 2.0614326000213623, "learning_rate": 9.378560656522845e-06, "loss": 0.5366, "step": 4430 }, { "epoch": 0.7349776527065055, "grad_norm": 2.1177313327789307, "learning_rate": 9.37390242519125e-06, "loss": 0.5676, "step": 4440 }, { "epoch": 0.7366330077801688, "grad_norm": 1.7760319709777832, "learning_rate": 9.369227966017326e-06, "loss": 0.4562, "step": 4450 }, { "epoch": 0.7382883628538321, "grad_norm": 2.2758963108062744, "learning_rate": 9.364537296344029e-06, "loss": 0.5742, "step": 4460 }, { "epoch": 0.7399437179274955, "grad_norm": 1.834656834602356, "learning_rate": 9.359830433574451e-06, "loss": 0.5405, "step": 4470 }, { "epoch": 0.7415990730011588, "grad_norm": 1.730483889579773, "learning_rate": 9.35510739517177e-06, "loss": 0.5217, "step": 4480 }, { "epoch": 0.743254428074822, "grad_norm": 2.00947642326355, "learning_rate": 9.350368198659174e-06, "loss": 0.5132, "step": 4490 }, { "epoch": 0.7449097831484853, "grad_norm": 2.0409324169158936, "learning_rate": 9.345612861619805e-06, "loss": 0.5914, "step": 4500 }, { "epoch": 0.7465651382221486, "grad_norm": 2.4126083850860596, "learning_rate": 9.340841401696686e-06, "loss": 0.5179, "step": 4510 }, { "epoch": 0.748220493295812, "grad_norm": 1.9900450706481934, "learning_rate": 9.336053836592653e-06, "loss": 0.4732, "step": 4520 }, { "epoch": 0.7498758483694753, "grad_norm": 2.0462656021118164, "learning_rate": 9.331250184070307e-06, "loss": 0.4873, "step": 4530 }, { "epoch": 0.7515312034431385, "grad_norm": 2.1668014526367188, "learning_rate": 9.326430461951922e-06, "loss": 0.5338, "step": 4540 }, { "epoch": 0.7531865585168018, "grad_norm": 2.973837375640869, "learning_rate": 9.3215946881194e-06, "loss": 0.5087, "step": 4550 }, { "epoch": 0.7548419135904652, "grad_norm": 1.9659732580184937, "learning_rate": 9.3167428805142e-06, "loss": 0.5518, "step": 4560 }, { "epoch": 0.7564972686641285, "grad_norm": 1.702977180480957, "learning_rate": 9.311875057137263e-06, "loss": 0.5058, "step": 4570 }, { "epoch": 0.7581526237377918, "grad_norm": 1.8664321899414062, "learning_rate": 9.306991236048953e-06, "loss": 0.4974, "step": 4580 }, { "epoch": 0.759807978811455, "grad_norm": 1.9654330015182495, "learning_rate": 9.302091435368988e-06, "loss": 0.5579, "step": 4590 }, { "epoch": 0.7614633338851183, "grad_norm": 2.2180731296539307, "learning_rate": 9.297175673276372e-06, "loss": 0.5048, "step": 4600 }, { "epoch": 0.7631186889587817, "grad_norm": 2.6190831661224365, "learning_rate": 9.292243968009332e-06, "loss": 0.5507, "step": 4610 }, { "epoch": 0.764774044032445, "grad_norm": 2.6507656574249268, "learning_rate": 9.28729633786524e-06, "loss": 0.5322, "step": 4620 }, { "epoch": 0.7664293991061083, "grad_norm": 2.1486687660217285, "learning_rate": 9.282332801200557e-06, "loss": 0.5358, "step": 4630 }, { "epoch": 0.7680847541797715, "grad_norm": 2.3483917713165283, "learning_rate": 9.277353376430758e-06, "loss": 0.5372, "step": 4640 }, { "epoch": 0.7697401092534348, "grad_norm": 1.9544059038162231, "learning_rate": 9.272358082030263e-06, "loss": 0.5375, "step": 4650 }, { "epoch": 0.7713954643270982, "grad_norm": 2.1422975063323975, "learning_rate": 9.267346936532377e-06, "loss": 0.5314, "step": 4660 }, { "epoch": 0.7730508194007615, "grad_norm": 2.089794874191284, "learning_rate": 9.26231995852921e-06, "loss": 0.5177, "step": 4670 }, { "epoch": 0.7747061744744248, "grad_norm": 2.1427338123321533, "learning_rate": 9.25727716667161e-06, "loss": 0.5485, "step": 4680 }, { "epoch": 0.776361529548088, "grad_norm": 1.6319947242736816, "learning_rate": 9.252218579669105e-06, "loss": 0.5343, "step": 4690 }, { "epoch": 0.7780168846217513, "grad_norm": 2.030916452407837, "learning_rate": 9.247144216289821e-06, "loss": 0.5499, "step": 4700 }, { "epoch": 0.7796722396954147, "grad_norm": 2.0921289920806885, "learning_rate": 9.242054095360413e-06, "loss": 0.4607, "step": 4710 }, { "epoch": 0.781327594769078, "grad_norm": 1.9406028985977173, "learning_rate": 9.236948235766004e-06, "loss": 0.5819, "step": 4720 }, { "epoch": 0.7829829498427413, "grad_norm": 2.238834857940674, "learning_rate": 9.231826656450112e-06, "loss": 0.4634, "step": 4730 }, { "epoch": 0.7846383049164045, "grad_norm": 2.383314609527588, "learning_rate": 9.226689376414571e-06, "loss": 0.5393, "step": 4740 }, { "epoch": 0.7862936599900678, "grad_norm": 2.1563191413879395, "learning_rate": 9.221536414719472e-06, "loss": 0.5372, "step": 4750 }, { "epoch": 0.7879490150637312, "grad_norm": 2.5362823009490967, "learning_rate": 9.216367790483085e-06, "loss": 0.5739, "step": 4760 }, { "epoch": 0.7896043701373945, "grad_norm": 2.164787769317627, "learning_rate": 9.211183522881788e-06, "loss": 0.5437, "step": 4770 }, { "epoch": 0.7912597252110578, "grad_norm": 1.8392080068588257, "learning_rate": 9.205983631150005e-06, "loss": 0.5152, "step": 4780 }, { "epoch": 0.792915080284721, "grad_norm": 2.01461124420166, "learning_rate": 9.200768134580124e-06, "loss": 0.5199, "step": 4790 }, { "epoch": 0.7945704353583843, "grad_norm": 1.9222743511199951, "learning_rate": 9.195537052522428e-06, "loss": 0.5363, "step": 4800 }, { "epoch": 0.7962257904320477, "grad_norm": 2.3261756896972656, "learning_rate": 9.190290404385025e-06, "loss": 0.5161, "step": 4810 }, { "epoch": 0.797881145505711, "grad_norm": 1.8926310539245605, "learning_rate": 9.18502820963378e-06, "loss": 0.5086, "step": 4820 }, { "epoch": 0.7995365005793743, "grad_norm": 2.2464699745178223, "learning_rate": 9.179750487792232e-06, "loss": 0.5446, "step": 4830 }, { "epoch": 0.8011918556530375, "grad_norm": 2.444520950317383, "learning_rate": 9.17445725844153e-06, "loss": 0.5071, "step": 4840 }, { "epoch": 0.8028472107267008, "grad_norm": 2.0580263137817383, "learning_rate": 9.169148541220361e-06, "loss": 0.5257, "step": 4850 }, { "epoch": 0.8045025658003642, "grad_norm": 3.0564193725585938, "learning_rate": 9.163824355824871e-06, "loss": 0.5054, "step": 4860 }, { "epoch": 0.8061579208740275, "grad_norm": 2.9118645191192627, "learning_rate": 9.158484722008596e-06, "loss": 0.5212, "step": 4870 }, { "epoch": 0.8078132759476908, "grad_norm": 2.128197193145752, "learning_rate": 9.15312965958239e-06, "loss": 0.4898, "step": 4880 }, { "epoch": 0.8094686310213541, "grad_norm": 2.0659148693084717, "learning_rate": 9.147759188414348e-06, "loss": 0.5409, "step": 4890 }, { "epoch": 0.8111239860950173, "grad_norm": 2.312744140625, "learning_rate": 9.142373328429733e-06, "loss": 0.5638, "step": 4900 }, { "epoch": 0.8127793411686807, "grad_norm": 2.416036367416382, "learning_rate": 9.136972099610901e-06, "loss": 0.5098, "step": 4910 }, { "epoch": 0.814434696242344, "grad_norm": 2.6420984268188477, "learning_rate": 9.131555521997236e-06, "loss": 0.5312, "step": 4920 }, { "epoch": 0.8160900513160073, "grad_norm": 2.3493874073028564, "learning_rate": 9.126123615685061e-06, "loss": 0.5222, "step": 4930 }, { "epoch": 0.8177454063896706, "grad_norm": 2.002620220184326, "learning_rate": 9.120676400827575e-06, "loss": 0.5188, "step": 4940 }, { "epoch": 0.8194007614633338, "grad_norm": 1.8725508451461792, "learning_rate": 9.11521389763477e-06, "loss": 0.5371, "step": 4950 }, { "epoch": 0.8210561165369972, "grad_norm": 2.774479389190674, "learning_rate": 9.109736126373364e-06, "loss": 0.4711, "step": 4960 }, { "epoch": 0.8227114716106605, "grad_norm": 1.985054850578308, "learning_rate": 9.10424310736672e-06, "loss": 0.5487, "step": 4970 }, { "epoch": 0.8243668266843238, "grad_norm": 2.2153406143188477, "learning_rate": 9.098734860994774e-06, "loss": 0.5096, "step": 4980 }, { "epoch": 0.8260221817579871, "grad_norm": 2.1796064376831055, "learning_rate": 9.093211407693954e-06, "loss": 0.5158, "step": 4990 }, { "epoch": 0.8276775368316504, "grad_norm": 2.429814338684082, "learning_rate": 9.087672767957114e-06, "loss": 0.4919, "step": 5000 }, { "epoch": 0.8293328919053137, "grad_norm": 2.0959694385528564, "learning_rate": 9.082118962333445e-06, "loss": 0.4874, "step": 5010 }, { "epoch": 0.830988246978977, "grad_norm": 2.562439203262329, "learning_rate": 9.076550011428415e-06, "loss": 0.4963, "step": 5020 }, { "epoch": 0.8326436020526403, "grad_norm": 2.4915542602539062, "learning_rate": 9.070965935903672e-06, "loss": 0.5219, "step": 5030 }, { "epoch": 0.8342989571263036, "grad_norm": 2.3988428115844727, "learning_rate": 9.065366756476987e-06, "loss": 0.5804, "step": 5040 }, { "epoch": 0.8359543121999669, "grad_norm": 2.0617735385894775, "learning_rate": 9.059752493922165e-06, "loss": 0.5585, "step": 5050 }, { "epoch": 0.8376096672736302, "grad_norm": 2.474543571472168, "learning_rate": 9.054123169068974e-06, "loss": 0.5238, "step": 5060 }, { "epoch": 0.8392650223472935, "grad_norm": 1.58362877368927, "learning_rate": 9.048478802803062e-06, "loss": 0.5623, "step": 5070 }, { "epoch": 0.8409203774209568, "grad_norm": 1.7996039390563965, "learning_rate": 9.042819416065888e-06, "loss": 0.5107, "step": 5080 }, { "epoch": 0.8425757324946201, "grad_norm": 2.7114357948303223, "learning_rate": 9.037145029854637e-06, "loss": 0.4495, "step": 5090 }, { "epoch": 0.8442310875682834, "grad_norm": 2.9728622436523438, "learning_rate": 9.03145566522214e-06, "loss": 0.5361, "step": 5100 }, { "epoch": 0.8458864426419467, "grad_norm": 2.092686891555786, "learning_rate": 9.025751343276806e-06, "loss": 0.4656, "step": 5110 }, { "epoch": 0.84754179771561, "grad_norm": 2.272784471511841, "learning_rate": 9.020032085182535e-06, "loss": 0.546, "step": 5120 }, { "epoch": 0.8491971527892733, "grad_norm": 2.0549049377441406, "learning_rate": 9.014297912158645e-06, "loss": 0.5034, "step": 5130 }, { "epoch": 0.8508525078629366, "grad_norm": 1.5659208297729492, "learning_rate": 9.008548845479787e-06, "loss": 0.57, "step": 5140 }, { "epoch": 0.8525078629365999, "grad_norm": 1.5937261581420898, "learning_rate": 9.002784906475872e-06, "loss": 0.4394, "step": 5150 }, { "epoch": 0.8541632180102632, "grad_norm": 1.9835437536239624, "learning_rate": 8.99700611653199e-06, "loss": 0.492, "step": 5160 }, { "epoch": 0.8558185730839265, "grad_norm": 2.044243574142456, "learning_rate": 8.991212497088328e-06, "loss": 0.5157, "step": 5170 }, { "epoch": 0.8574739281575898, "grad_norm": 2.410057544708252, "learning_rate": 8.985404069640096e-06, "loss": 0.557, "step": 5180 }, { "epoch": 0.8591292832312531, "grad_norm": 1.7656784057617188, "learning_rate": 8.97958085573744e-06, "loss": 0.5092, "step": 5190 }, { "epoch": 0.8607846383049164, "grad_norm": 2.221282958984375, "learning_rate": 8.973742876985369e-06, "loss": 0.5198, "step": 5200 }, { "epoch": 0.8624399933785797, "grad_norm": 1.847607135772705, "learning_rate": 8.967890155043672e-06, "loss": 0.516, "step": 5210 }, { "epoch": 0.864095348452243, "grad_norm": 1.8600164651870728, "learning_rate": 8.962022711626835e-06, "loss": 0.4892, "step": 5220 }, { "epoch": 0.8657507035259063, "grad_norm": 1.9303275346755981, "learning_rate": 8.956140568503964e-06, "loss": 0.5309, "step": 5230 }, { "epoch": 0.8674060585995697, "grad_norm": 2.567194938659668, "learning_rate": 8.950243747498704e-06, "loss": 0.5249, "step": 5240 }, { "epoch": 0.8690614136732329, "grad_norm": 2.3800063133239746, "learning_rate": 8.944332270489156e-06, "loss": 0.4972, "step": 5250 }, { "epoch": 0.8707167687468962, "grad_norm": 1.9158269166946411, "learning_rate": 8.938406159407798e-06, "loss": 0.4903, "step": 5260 }, { "epoch": 0.8723721238205595, "grad_norm": 2.1870522499084473, "learning_rate": 8.932465436241403e-06, "loss": 0.5044, "step": 5270 }, { "epoch": 0.8740274788942228, "grad_norm": 2.8295249938964844, "learning_rate": 8.926510123030955e-06, "loss": 0.5025, "step": 5280 }, { "epoch": 0.8756828339678862, "grad_norm": 2.0139286518096924, "learning_rate": 8.92054024187157e-06, "loss": 0.5451, "step": 5290 }, { "epoch": 0.8773381890415494, "grad_norm": 1.9241939783096313, "learning_rate": 8.914555814912416e-06, "loss": 0.4705, "step": 5300 }, { "epoch": 0.8789935441152127, "grad_norm": 1.9577219486236572, "learning_rate": 8.908556864356625e-06, "loss": 0.5262, "step": 5310 }, { "epoch": 0.880648899188876, "grad_norm": 2.2654361724853516, "learning_rate": 8.902543412461214e-06, "loss": 0.4939, "step": 5320 }, { "epoch": 0.8823042542625393, "grad_norm": 1.9551903009414673, "learning_rate": 8.896515481537004e-06, "loss": 0.5221, "step": 5330 }, { "epoch": 0.8839596093362027, "grad_norm": 2.27404522895813, "learning_rate": 8.890473093948532e-06, "loss": 0.5371, "step": 5340 }, { "epoch": 0.8856149644098659, "grad_norm": 2.3164634704589844, "learning_rate": 8.884416272113977e-06, "loss": 0.5328, "step": 5350 }, { "epoch": 0.8872703194835292, "grad_norm": 1.646775484085083, "learning_rate": 8.878345038505067e-06, "loss": 0.5262, "step": 5360 }, { "epoch": 0.8889256745571925, "grad_norm": 2.1168019771575928, "learning_rate": 8.872259415646998e-06, "loss": 0.5132, "step": 5370 }, { "epoch": 0.8905810296308558, "grad_norm": 1.5265601873397827, "learning_rate": 8.86615942611836e-06, "loss": 0.4874, "step": 5380 }, { "epoch": 0.8922363847045192, "grad_norm": 1.885336995124817, "learning_rate": 8.860045092551034e-06, "loss": 0.4704, "step": 5390 }, { "epoch": 0.8938917397781824, "grad_norm": 2.109318256378174, "learning_rate": 8.853916437630135e-06, "loss": 0.4941, "step": 5400 }, { "epoch": 0.8955470948518457, "grad_norm": 1.7149527072906494, "learning_rate": 8.847773484093896e-06, "loss": 0.5617, "step": 5410 }, { "epoch": 0.897202449925509, "grad_norm": 2.4205265045166016, "learning_rate": 8.84161625473361e-06, "loss": 0.4991, "step": 5420 }, { "epoch": 0.8988578049991723, "grad_norm": 2.258826971054077, "learning_rate": 8.835444772393534e-06, "loss": 0.5105, "step": 5430 }, { "epoch": 0.9005131600728357, "grad_norm": 2.3182597160339355, "learning_rate": 8.829259059970805e-06, "loss": 0.5127, "step": 5440 }, { "epoch": 0.9021685151464989, "grad_norm": 1.9707034826278687, "learning_rate": 8.823059140415355e-06, "loss": 0.4753, "step": 5450 }, { "epoch": 0.9038238702201622, "grad_norm": 1.8292397260665894, "learning_rate": 8.816845036729827e-06, "loss": 0.4998, "step": 5460 }, { "epoch": 0.9054792252938255, "grad_norm": 1.737146258354187, "learning_rate": 8.81061677196949e-06, "loss": 0.4546, "step": 5470 }, { "epoch": 0.9071345803674888, "grad_norm": 1.6696151494979858, "learning_rate": 8.80437436924215e-06, "loss": 0.5443, "step": 5480 }, { "epoch": 0.9087899354411522, "grad_norm": 2.815847635269165, "learning_rate": 8.798117851708072e-06, "loss": 0.5335, "step": 5490 }, { "epoch": 0.9104452905148154, "grad_norm": 1.8941160440444946, "learning_rate": 8.791847242579887e-06, "loss": 0.5121, "step": 5500 }, { "epoch": 0.9121006455884787, "grad_norm": 2.581604242324829, "learning_rate": 8.785562565122505e-06, "loss": 0.5301, "step": 5510 }, { "epoch": 0.913756000662142, "grad_norm": 1.7762013673782349, "learning_rate": 8.779263842653034e-06, "loss": 0.5159, "step": 5520 }, { "epoch": 0.9154113557358053, "grad_norm": 2.0996243953704834, "learning_rate": 8.772951098540692e-06, "loss": 0.5227, "step": 5530 }, { "epoch": 0.9170667108094687, "grad_norm": 2.195647716522217, "learning_rate": 8.76662435620672e-06, "loss": 0.4409, "step": 5540 }, { "epoch": 0.9187220658831319, "grad_norm": 3.163017749786377, "learning_rate": 8.760283639124289e-06, "loss": 0.5603, "step": 5550 }, { "epoch": 0.9203774209567952, "grad_norm": 2.1786811351776123, "learning_rate": 8.753928970818426e-06, "loss": 0.5127, "step": 5560 }, { "epoch": 0.9220327760304585, "grad_norm": 1.9021741151809692, "learning_rate": 8.747560374865913e-06, "loss": 0.5137, "step": 5570 }, { "epoch": 0.9236881311041218, "grad_norm": 2.3319544792175293, "learning_rate": 8.74117787489521e-06, "loss": 0.515, "step": 5580 }, { "epoch": 0.9253434861777852, "grad_norm": 2.084627389907837, "learning_rate": 8.734781494586363e-06, "loss": 0.5457, "step": 5590 }, { "epoch": 0.9269988412514485, "grad_norm": 2.1023311614990234, "learning_rate": 8.72837125767091e-06, "loss": 0.5222, "step": 5600 }, { "epoch": 0.9286541963251117, "grad_norm": 1.880613923072815, "learning_rate": 8.721947187931807e-06, "loss": 0.5315, "step": 5610 }, { "epoch": 0.930309551398775, "grad_norm": 2.219151735305786, "learning_rate": 8.715509309203327e-06, "loss": 0.526, "step": 5620 }, { "epoch": 0.9319649064724383, "grad_norm": 2.4242360591888428, "learning_rate": 8.709057645370977e-06, "loss": 0.4777, "step": 5630 }, { "epoch": 0.9336202615461017, "grad_norm": 1.921246886253357, "learning_rate": 8.702592220371413e-06, "loss": 0.523, "step": 5640 }, { "epoch": 0.935275616619765, "grad_norm": 2.206521987915039, "learning_rate": 8.696113058192341e-06, "loss": 0.5092, "step": 5650 }, { "epoch": 0.9369309716934282, "grad_norm": 1.9734609127044678, "learning_rate": 8.689620182872435e-06, "loss": 0.5065, "step": 5660 }, { "epoch": 0.9385863267670915, "grad_norm": 1.9264705181121826, "learning_rate": 8.683113618501256e-06, "loss": 0.5365, "step": 5670 }, { "epoch": 0.9402416818407549, "grad_norm": 2.4335641860961914, "learning_rate": 8.676593389219137e-06, "loss": 0.4719, "step": 5680 }, { "epoch": 0.9418970369144182, "grad_norm": 2.1883111000061035, "learning_rate": 8.670059519217124e-06, "loss": 0.4727, "step": 5690 }, { "epoch": 0.9435523919880815, "grad_norm": 2.0516345500946045, "learning_rate": 8.663512032736868e-06, "loss": 0.5501, "step": 5700 }, { "epoch": 0.9452077470617447, "grad_norm": 2.072754144668579, "learning_rate": 8.656950954070536e-06, "loss": 0.5249, "step": 5710 }, { "epoch": 0.946863102135408, "grad_norm": 1.922422170639038, "learning_rate": 8.650376307560726e-06, "loss": 0.5304, "step": 5720 }, { "epoch": 0.9485184572090714, "grad_norm": 1.9621427059173584, "learning_rate": 8.643788117600376e-06, "loss": 0.5281, "step": 5730 }, { "epoch": 0.9501738122827347, "grad_norm": 2.2022109031677246, "learning_rate": 8.637186408632673e-06, "loss": 0.4689, "step": 5740 }, { "epoch": 0.951829167356398, "grad_norm": 2.15291690826416, "learning_rate": 8.630571205150957e-06, "loss": 0.5009, "step": 5750 }, { "epoch": 0.9534845224300612, "grad_norm": 2.6725265979766846, "learning_rate": 8.62394253169864e-06, "loss": 0.4522, "step": 5760 }, { "epoch": 0.9551398775037245, "grad_norm": 2.1421377658843994, "learning_rate": 8.617300412869105e-06, "loss": 0.5002, "step": 5770 }, { "epoch": 0.9567952325773879, "grad_norm": 1.951465368270874, "learning_rate": 8.610644873305625e-06, "loss": 0.4692, "step": 5780 }, { "epoch": 0.9584505876510512, "grad_norm": 2.3892083168029785, "learning_rate": 8.60397593770126e-06, "loss": 0.485, "step": 5790 }, { "epoch": 0.9601059427247145, "grad_norm": 2.2727770805358887, "learning_rate": 8.597293630798776e-06, "loss": 0.5378, "step": 5800 }, { "epoch": 0.9617612977983777, "grad_norm": 1.7045542001724243, "learning_rate": 8.590597977390542e-06, "loss": 0.4663, "step": 5810 }, { "epoch": 0.963416652872041, "grad_norm": 2.008007526397705, "learning_rate": 8.583889002318455e-06, "loss": 0.5119, "step": 5820 }, { "epoch": 0.9650720079457044, "grad_norm": 1.846248745918274, "learning_rate": 8.57716673047383e-06, "loss": 0.5105, "step": 5830 }, { "epoch": 0.9667273630193677, "grad_norm": 3.5718061923980713, "learning_rate": 8.570431186797314e-06, "loss": 0.4927, "step": 5840 }, { "epoch": 0.968382718093031, "grad_norm": 2.1415624618530273, "learning_rate": 8.563682396278799e-06, "loss": 0.4978, "step": 5850 }, { "epoch": 0.9700380731666942, "grad_norm": 2.2250213623046875, "learning_rate": 8.556920383957322e-06, "loss": 0.4732, "step": 5860 }, { "epoch": 0.9716934282403575, "grad_norm": 2.6127257347106934, "learning_rate": 8.550145174920977e-06, "loss": 0.4866, "step": 5870 }, { "epoch": 0.9733487833140209, "grad_norm": 2.1522982120513916, "learning_rate": 8.543356794306818e-06, "loss": 0.5143, "step": 5880 }, { "epoch": 0.9750041383876842, "grad_norm": 2.036477565765381, "learning_rate": 8.536555267300766e-06, "loss": 0.5438, "step": 5890 }, { "epoch": 0.9766594934613475, "grad_norm": 2.8033342361450195, "learning_rate": 8.529740619137523e-06, "loss": 0.514, "step": 5900 }, { "epoch": 0.9783148485350107, "grad_norm": 2.0152909755706787, "learning_rate": 8.522912875100467e-06, "loss": 0.5071, "step": 5910 }, { "epoch": 0.979970203608674, "grad_norm": 2.0931289196014404, "learning_rate": 8.516072060521566e-06, "loss": 0.5031, "step": 5920 }, { "epoch": 0.9816255586823374, "grad_norm": 1.396856427192688, "learning_rate": 8.509218200781278e-06, "loss": 0.5602, "step": 5930 }, { "epoch": 0.9832809137560007, "grad_norm": 2.251620054244995, "learning_rate": 8.502351321308468e-06, "loss": 0.5185, "step": 5940 }, { "epoch": 0.984936268829664, "grad_norm": 2.3058903217315674, "learning_rate": 8.4954714475803e-06, "loss": 0.4617, "step": 5950 }, { "epoch": 0.9865916239033272, "grad_norm": 2.428722620010376, "learning_rate": 8.488578605122149e-06, "loss": 0.5123, "step": 5960 }, { "epoch": 0.9882469789769905, "grad_norm": 2.9448606967926025, "learning_rate": 8.48167281950751e-06, "loss": 0.4912, "step": 5970 }, { "epoch": 0.9899023340506539, "grad_norm": 1.9352291822433472, "learning_rate": 8.474754116357895e-06, "loss": 0.4906, "step": 5980 }, { "epoch": 0.9915576891243172, "grad_norm": 2.116455078125, "learning_rate": 8.467822521342744e-06, "loss": 0.4532, "step": 5990 }, { "epoch": 0.9932130441979805, "grad_norm": 1.96929931640625, "learning_rate": 8.460878060179326e-06, "loss": 0.4909, "step": 6000 }, { "epoch": 0.9948683992716437, "grad_norm": 2.103734254837036, "learning_rate": 8.45392075863265e-06, "loss": 0.4986, "step": 6010 }, { "epoch": 0.996523754345307, "grad_norm": 1.8520708084106445, "learning_rate": 8.446950642515359e-06, "loss": 0.4959, "step": 6020 }, { "epoch": 0.9981791094189704, "grad_norm": 1.5398201942443848, "learning_rate": 8.439967737687642e-06, "loss": 0.5061, "step": 6030 }, { "epoch": 0.9998344644926337, "grad_norm": 2.7384068965911865, "learning_rate": 8.432972070057137e-06, "loss": 0.4164, "step": 6040 }, { "epoch": 1.001489819566297, "grad_norm": 1.699214220046997, "learning_rate": 8.425963665578833e-06, "loss": 0.4717, "step": 6050 }, { "epoch": 1.0031451746399602, "grad_norm": 2.7517991065979004, "learning_rate": 8.418942550254978e-06, "loss": 0.4783, "step": 6060 }, { "epoch": 1.0048005297136235, "grad_norm": 2.293407440185547, "learning_rate": 8.411908750134973e-06, "loss": 0.428, "step": 6070 }, { "epoch": 1.0064558847872869, "grad_norm": 2.3699285984039307, "learning_rate": 8.404862291315287e-06, "loss": 0.4529, "step": 6080 }, { "epoch": 1.0081112398609502, "grad_norm": 2.27496337890625, "learning_rate": 8.39780319993935e-06, "loss": 0.4486, "step": 6090 }, { "epoch": 1.0097665949346135, "grad_norm": 2.0127158164978027, "learning_rate": 8.390731502197465e-06, "loss": 0.4529, "step": 6100 }, { "epoch": 1.0114219500082768, "grad_norm": 2.6068265438079834, "learning_rate": 8.383647224326704e-06, "loss": 0.4397, "step": 6110 }, { "epoch": 1.0130773050819402, "grad_norm": 1.9427989721298218, "learning_rate": 8.376550392610813e-06, "loss": 0.4236, "step": 6120 }, { "epoch": 1.0147326601556035, "grad_norm": 2.082646369934082, "learning_rate": 8.369441033380119e-06, "loss": 0.4352, "step": 6130 }, { "epoch": 1.0163880152292666, "grad_norm": 2.0163888931274414, "learning_rate": 8.362319173011421e-06, "loss": 0.4359, "step": 6140 }, { "epoch": 1.01804337030293, "grad_norm": 1.7320841550827026, "learning_rate": 8.355184837927906e-06, "loss": 0.4454, "step": 6150 }, { "epoch": 1.0196987253765932, "grad_norm": 1.9709644317626953, "learning_rate": 8.348038054599037e-06, "loss": 0.4593, "step": 6160 }, { "epoch": 1.0213540804502566, "grad_norm": 2.382404327392578, "learning_rate": 8.340878849540466e-06, "loss": 0.3941, "step": 6170 }, { "epoch": 1.0230094355239199, "grad_norm": 2.566659450531006, "learning_rate": 8.333707249313933e-06, "loss": 0.4474, "step": 6180 }, { "epoch": 1.0246647905975832, "grad_norm": 1.6790319681167603, "learning_rate": 8.326523280527165e-06, "loss": 0.437, "step": 6190 }, { "epoch": 1.0263201456712465, "grad_norm": 1.9605789184570312, "learning_rate": 8.319326969833776e-06, "loss": 0.4346, "step": 6200 }, { "epoch": 1.0279755007449098, "grad_norm": 1.5337015390396118, "learning_rate": 8.312118343933172e-06, "loss": 0.4329, "step": 6210 }, { "epoch": 1.0296308558185732, "grad_norm": 2.0103812217712402, "learning_rate": 8.304897429570448e-06, "loss": 0.4199, "step": 6220 }, { "epoch": 1.0312862108922365, "grad_norm": 3.106372594833374, "learning_rate": 8.297664253536296e-06, "loss": 0.4429, "step": 6230 }, { "epoch": 1.0329415659658996, "grad_norm": 2.3438146114349365, "learning_rate": 8.290418842666894e-06, "loss": 0.4588, "step": 6240 }, { "epoch": 1.034596921039563, "grad_norm": 1.6596814393997192, "learning_rate": 8.28316122384382e-06, "loss": 0.4314, "step": 6250 }, { "epoch": 1.0362522761132262, "grad_norm": 2.1829092502593994, "learning_rate": 8.275891423993943e-06, "loss": 0.4292, "step": 6260 }, { "epoch": 1.0379076311868896, "grad_norm": 1.525658130645752, "learning_rate": 8.268609470089322e-06, "loss": 0.4206, "step": 6270 }, { "epoch": 1.0395629862605529, "grad_norm": 1.846199870109558, "learning_rate": 8.261315389147113e-06, "loss": 0.3781, "step": 6280 }, { "epoch": 1.0412183413342162, "grad_norm": 2.2340259552001953, "learning_rate": 8.254009208229464e-06, "loss": 0.4107, "step": 6290 }, { "epoch": 1.0428736964078795, "grad_norm": 2.0799405574798584, "learning_rate": 8.246690954443416e-06, "loss": 0.3953, "step": 6300 }, { "epoch": 1.0445290514815428, "grad_norm": 2.073094367980957, "learning_rate": 8.239360654940803e-06, "loss": 0.4502, "step": 6310 }, { "epoch": 1.0461844065552062, "grad_norm": 2.092500925064087, "learning_rate": 8.232018336918145e-06, "loss": 0.4067, "step": 6320 }, { "epoch": 1.0478397616288695, "grad_norm": 2.2009036540985107, "learning_rate": 8.224664027616565e-06, "loss": 0.4153, "step": 6330 }, { "epoch": 1.0494951167025326, "grad_norm": 2.1179239749908447, "learning_rate": 8.217297754321661e-06, "loss": 0.3964, "step": 6340 }, { "epoch": 1.051150471776196, "grad_norm": 1.9350873231887817, "learning_rate": 8.209919544363428e-06, "loss": 0.4776, "step": 6350 }, { "epoch": 1.0528058268498592, "grad_norm": 2.0609846115112305, "learning_rate": 8.202529425116145e-06, "loss": 0.4193, "step": 6360 }, { "epoch": 1.0544611819235226, "grad_norm": 2.755305528640747, "learning_rate": 8.195127423998279e-06, "loss": 0.4682, "step": 6370 }, { "epoch": 1.0561165369971859, "grad_norm": 1.6468199491500854, "learning_rate": 8.187713568472375e-06, "loss": 0.4609, "step": 6380 }, { "epoch": 1.0577718920708492, "grad_norm": 2.1090316772460938, "learning_rate": 8.180287886044967e-06, "loss": 0.4468, "step": 6390 }, { "epoch": 1.0594272471445125, "grad_norm": 3.3360707759857178, "learning_rate": 8.172850404266462e-06, "loss": 0.4611, "step": 6400 }, { "epoch": 1.0610826022181759, "grad_norm": 1.8891501426696777, "learning_rate": 8.165401150731045e-06, "loss": 0.42, "step": 6410 }, { "epoch": 1.0627379572918392, "grad_norm": 2.2766528129577637, "learning_rate": 8.157940153076582e-06, "loss": 0.3972, "step": 6420 }, { "epoch": 1.0643933123655025, "grad_norm": 2.1425061225891113, "learning_rate": 8.150467438984507e-06, "loss": 0.4378, "step": 6430 }, { "epoch": 1.0660486674391656, "grad_norm": 2.0477399826049805, "learning_rate": 8.142983036179723e-06, "loss": 0.4627, "step": 6440 }, { "epoch": 1.067704022512829, "grad_norm": 1.6100428104400635, "learning_rate": 8.135486972430502e-06, "loss": 0.415, "step": 6450 }, { "epoch": 1.0693593775864922, "grad_norm": 2.3228981494903564, "learning_rate": 8.127979275548376e-06, "loss": 0.4399, "step": 6460 }, { "epoch": 1.0710147326601556, "grad_norm": 2.075024127960205, "learning_rate": 8.120459973388046e-06, "loss": 0.4679, "step": 6470 }, { "epoch": 1.0726700877338189, "grad_norm": 2.06042218208313, "learning_rate": 8.112929093847262e-06, "loss": 0.3853, "step": 6480 }, { "epoch": 1.0743254428074822, "grad_norm": 2.618030071258545, "learning_rate": 8.105386664866732e-06, "loss": 0.4163, "step": 6490 }, { "epoch": 1.0759807978811455, "grad_norm": 2.011997938156128, "learning_rate": 8.09783271443001e-06, "loss": 0.4632, "step": 6500 }, { "epoch": 1.0776361529548089, "grad_norm": 1.7011245489120483, "learning_rate": 8.090267270563403e-06, "loss": 0.422, "step": 6510 }, { "epoch": 1.0792915080284722, "grad_norm": 2.1394636631011963, "learning_rate": 8.082690361335857e-06, "loss": 0.4856, "step": 6520 }, { "epoch": 1.0809468631021355, "grad_norm": 1.878475308418274, "learning_rate": 8.075102014858854e-06, "loss": 0.4819, "step": 6530 }, { "epoch": 1.0826022181757988, "grad_norm": 2.2121503353118896, "learning_rate": 8.067502259286313e-06, "loss": 0.4312, "step": 6540 }, { "epoch": 1.084257573249462, "grad_norm": 2.6810719966888428, "learning_rate": 8.059891122814481e-06, "loss": 0.4271, "step": 6550 }, { "epoch": 1.0859129283231252, "grad_norm": 2.5359325408935547, "learning_rate": 8.05226863368183e-06, "loss": 0.4325, "step": 6560 }, { "epoch": 1.0875682833967886, "grad_norm": 2.2482380867004395, "learning_rate": 8.044634820168954e-06, "loss": 0.4763, "step": 6570 }, { "epoch": 1.089223638470452, "grad_norm": 1.8877545595169067, "learning_rate": 8.036989710598458e-06, "loss": 0.4688, "step": 6580 }, { "epoch": 1.0908789935441152, "grad_norm": 2.3584036827087402, "learning_rate": 8.029333333334863e-06, "loss": 0.4688, "step": 6590 }, { "epoch": 1.0925343486177785, "grad_norm": 2.0854368209838867, "learning_rate": 8.02166571678449e-06, "loss": 0.4252, "step": 6600 }, { "epoch": 1.0941897036914419, "grad_norm": 1.9038151502609253, "learning_rate": 8.01398688939536e-06, "loss": 0.4127, "step": 6610 }, { "epoch": 1.0958450587651052, "grad_norm": 1.8824822902679443, "learning_rate": 8.00629687965709e-06, "loss": 0.4672, "step": 6620 }, { "epoch": 1.0975004138387685, "grad_norm": 2.430506467819214, "learning_rate": 7.998595716100783e-06, "loss": 0.4449, "step": 6630 }, { "epoch": 1.0991557689124316, "grad_norm": 2.1402385234832764, "learning_rate": 7.990883427298927e-06, "loss": 0.4782, "step": 6640 }, { "epoch": 1.100811123986095, "grad_norm": 2.3543331623077393, "learning_rate": 7.983160041865285e-06, "loss": 0.4212, "step": 6650 }, { "epoch": 1.1024664790597583, "grad_norm": 2.142435312271118, "learning_rate": 7.975425588454788e-06, "loss": 0.4939, "step": 6660 }, { "epoch": 1.1041218341334216, "grad_norm": 2.1134002208709717, "learning_rate": 7.967680095763434e-06, "loss": 0.4463, "step": 6670 }, { "epoch": 1.105777189207085, "grad_norm": 2.716151237487793, "learning_rate": 7.959923592528177e-06, "loss": 0.4561, "step": 6680 }, { "epoch": 1.1074325442807482, "grad_norm": 1.9613409042358398, "learning_rate": 7.952156107526826e-06, "loss": 0.4141, "step": 6690 }, { "epoch": 1.1090878993544115, "grad_norm": 1.8834747076034546, "learning_rate": 7.944377669577924e-06, "loss": 0.4361, "step": 6700 }, { "epoch": 1.1107432544280749, "grad_norm": 1.4934910535812378, "learning_rate": 7.93658830754066e-06, "loss": 0.4413, "step": 6710 }, { "epoch": 1.1123986095017382, "grad_norm": 1.9365501403808594, "learning_rate": 7.928788050314751e-06, "loss": 0.4656, "step": 6720 }, { "epoch": 1.1140539645754015, "grad_norm": 1.9965449571609497, "learning_rate": 7.920976926840334e-06, "loss": 0.4671, "step": 6730 }, { "epoch": 1.1157093196490648, "grad_norm": 1.9242488145828247, "learning_rate": 7.913154966097865e-06, "loss": 0.4345, "step": 6740 }, { "epoch": 1.117364674722728, "grad_norm": 1.4173060655593872, "learning_rate": 7.905322197108006e-06, "loss": 0.4674, "step": 6750 }, { "epoch": 1.1190200297963913, "grad_norm": 1.9419013261795044, "learning_rate": 7.897478648931521e-06, "loss": 0.4157, "step": 6760 }, { "epoch": 1.1206753848700546, "grad_norm": 1.910142421722412, "learning_rate": 7.889624350669162e-06, "loss": 0.4764, "step": 6770 }, { "epoch": 1.122330739943718, "grad_norm": 1.9535759687423706, "learning_rate": 7.88175933146157e-06, "loss": 0.4167, "step": 6780 }, { "epoch": 1.1239860950173812, "grad_norm": 2.105318784713745, "learning_rate": 7.873883620489164e-06, "loss": 0.4169, "step": 6790 }, { "epoch": 1.1256414500910445, "grad_norm": 2.383732318878174, "learning_rate": 7.865997246972023e-06, "loss": 0.4533, "step": 6800 }, { "epoch": 1.1272968051647079, "grad_norm": 1.8368053436279297, "learning_rate": 7.858100240169792e-06, "loss": 0.4594, "step": 6810 }, { "epoch": 1.1289521602383712, "grad_norm": 1.8236784934997559, "learning_rate": 7.850192629381568e-06, "loss": 0.4501, "step": 6820 }, { "epoch": 1.1306075153120345, "grad_norm": 1.8940480947494507, "learning_rate": 7.842274443945785e-06, "loss": 0.3969, "step": 6830 }, { "epoch": 1.1322628703856976, "grad_norm": 2.2269837856292725, "learning_rate": 7.834345713240114e-06, "loss": 0.4762, "step": 6840 }, { "epoch": 1.133918225459361, "grad_norm": 2.336608409881592, "learning_rate": 7.826406466681354e-06, "loss": 0.4974, "step": 6850 }, { "epoch": 1.1355735805330243, "grad_norm": 2.0093603134155273, "learning_rate": 7.81845673372531e-06, "loss": 0.4577, "step": 6860 }, { "epoch": 1.1372289356066876, "grad_norm": 1.781472086906433, "learning_rate": 7.810496543866704e-06, "loss": 0.434, "step": 6870 }, { "epoch": 1.138884290680351, "grad_norm": 3.0077342987060547, "learning_rate": 7.802525926639045e-06, "loss": 0.4651, "step": 6880 }, { "epoch": 1.1405396457540142, "grad_norm": 2.0327229499816895, "learning_rate": 7.794544911614537e-06, "loss": 0.4405, "step": 6890 }, { "epoch": 1.1421950008276776, "grad_norm": 1.7821011543273926, "learning_rate": 7.786553528403954e-06, "loss": 0.4293, "step": 6900 }, { "epoch": 1.1438503559013409, "grad_norm": 2.4968810081481934, "learning_rate": 7.778551806656546e-06, "loss": 0.4273, "step": 6910 }, { "epoch": 1.1455057109750042, "grad_norm": 2.041292905807495, "learning_rate": 7.770539776059914e-06, "loss": 0.4605, "step": 6920 }, { "epoch": 1.1471610660486675, "grad_norm": 2.0464673042297363, "learning_rate": 7.762517466339905e-06, "loss": 0.4407, "step": 6930 }, { "epoch": 1.1488164211223308, "grad_norm": 1.8867651224136353, "learning_rate": 7.754484907260513e-06, "loss": 0.4575, "step": 6940 }, { "epoch": 1.150471776195994, "grad_norm": 1.7853606939315796, "learning_rate": 7.74644212862375e-06, "loss": 0.4158, "step": 6950 }, { "epoch": 1.1521271312696573, "grad_norm": 1.7646677494049072, "learning_rate": 7.738389160269542e-06, "loss": 0.4148, "step": 6960 }, { "epoch": 1.1537824863433206, "grad_norm": 1.729858636856079, "learning_rate": 7.73032603207563e-06, "loss": 0.4153, "step": 6970 }, { "epoch": 1.155437841416984, "grad_norm": 2.1216416358947754, "learning_rate": 7.722252773957442e-06, "loss": 0.4578, "step": 6980 }, { "epoch": 1.1570931964906472, "grad_norm": 2.449660062789917, "learning_rate": 7.714169415867991e-06, "loss": 0.4396, "step": 6990 }, { "epoch": 1.1587485515643106, "grad_norm": 2.7764759063720703, "learning_rate": 7.706075987797767e-06, "loss": 0.4814, "step": 7000 }, { "epoch": 1.1604039066379739, "grad_norm": 1.6688860654830933, "learning_rate": 7.697972519774612e-06, "loss": 0.3879, "step": 7010 }, { "epoch": 1.1620592617116372, "grad_norm": 2.147960662841797, "learning_rate": 7.689859041863628e-06, "loss": 0.4821, "step": 7020 }, { "epoch": 1.1637146167853005, "grad_norm": 1.482928991317749, "learning_rate": 7.681735584167048e-06, "loss": 0.4695, "step": 7030 }, { "epoch": 1.1653699718589636, "grad_norm": 3.1044204235076904, "learning_rate": 7.673602176824134e-06, "loss": 0.4205, "step": 7040 }, { "epoch": 1.167025326932627, "grad_norm": 1.5646145343780518, "learning_rate": 7.665458850011062e-06, "loss": 0.4767, "step": 7050 }, { "epoch": 1.1686806820062903, "grad_norm": 2.1758921146392822, "learning_rate": 7.657305633940816e-06, "loss": 0.4152, "step": 7060 }, { "epoch": 1.1703360370799536, "grad_norm": 2.1989285945892334, "learning_rate": 7.649142558863056e-06, "loss": 0.4307, "step": 7070 }, { "epoch": 1.171991392153617, "grad_norm": 2.195272207260132, "learning_rate": 7.640969655064042e-06, "loss": 0.4449, "step": 7080 }, { "epoch": 1.1736467472272802, "grad_norm": 2.0945467948913574, "learning_rate": 7.63278695286648e-06, "loss": 0.4225, "step": 7090 }, { "epoch": 1.1753021023009436, "grad_norm": 1.938080906867981, "learning_rate": 7.624594482629442e-06, "loss": 0.4001, "step": 7100 }, { "epoch": 1.1769574573746069, "grad_norm": 1.8821436166763306, "learning_rate": 7.616392274748235e-06, "loss": 0.3847, "step": 7110 }, { "epoch": 1.1786128124482702, "grad_norm": 2.344076633453369, "learning_rate": 7.608180359654298e-06, "loss": 0.4675, "step": 7120 }, { "epoch": 1.1802681675219335, "grad_norm": 1.977381944656372, "learning_rate": 7.599958767815081e-06, "loss": 0.4387, "step": 7130 }, { "epoch": 1.1819235225955969, "grad_norm": 2.3042211532592773, "learning_rate": 7.591727529733941e-06, "loss": 0.459, "step": 7140 }, { "epoch": 1.1835788776692602, "grad_norm": 1.93119478225708, "learning_rate": 7.583486675950021e-06, "loss": 0.4625, "step": 7150 }, { "epoch": 1.1852342327429233, "grad_norm": 1.6517666578292847, "learning_rate": 7.575236237038136e-06, "loss": 0.474, "step": 7160 }, { "epoch": 1.1868895878165866, "grad_norm": 1.7987353801727295, "learning_rate": 7.566976243608673e-06, "loss": 0.4182, "step": 7170 }, { "epoch": 1.18854494289025, "grad_norm": 2.2666845321655273, "learning_rate": 7.558706726307459e-06, "loss": 0.43, "step": 7180 }, { "epoch": 1.1902002979639132, "grad_norm": 1.540936827659607, "learning_rate": 7.55042771581566e-06, "loss": 0.4305, "step": 7190 }, { "epoch": 1.1918556530375766, "grad_norm": 1.6909763813018799, "learning_rate": 7.542139242849664e-06, "loss": 0.4059, "step": 7200 }, { "epoch": 1.19351100811124, "grad_norm": 2.2971370220184326, "learning_rate": 7.533841338160963e-06, "loss": 0.4068, "step": 7210 }, { "epoch": 1.1951663631849032, "grad_norm": 1.584033727645874, "learning_rate": 7.525534032536044e-06, "loss": 0.4416, "step": 7220 }, { "epoch": 1.1968217182585665, "grad_norm": 1.8350242376327515, "learning_rate": 7.517217356796272e-06, "loss": 0.4115, "step": 7230 }, { "epoch": 1.1984770733322299, "grad_norm": 1.7758424282073975, "learning_rate": 7.508891341797777e-06, "loss": 0.4243, "step": 7240 }, { "epoch": 1.200132428405893, "grad_norm": 1.4368244409561157, "learning_rate": 7.500556018431342e-06, "loss": 0.4006, "step": 7250 }, { "epoch": 1.2017877834795563, "grad_norm": 2.0176913738250732, "learning_rate": 7.492211417622278e-06, "loss": 0.4484, "step": 7260 }, { "epoch": 1.2034431385532196, "grad_norm": 2.6586618423461914, "learning_rate": 7.483857570330326e-06, "loss": 0.4808, "step": 7270 }, { "epoch": 1.205098493626883, "grad_norm": 2.3201072216033936, "learning_rate": 7.475494507549526e-06, "loss": 0.4636, "step": 7280 }, { "epoch": 1.2067538487005462, "grad_norm": 2.491241693496704, "learning_rate": 7.4671222603081115e-06, "loss": 0.454, "step": 7290 }, { "epoch": 1.2084092037742096, "grad_norm": 2.3110857009887695, "learning_rate": 7.458740859668391e-06, "loss": 0.4583, "step": 7300 }, { "epoch": 1.210064558847873, "grad_norm": 2.2038140296936035, "learning_rate": 7.450350336726635e-06, "loss": 0.4352, "step": 7310 }, { "epoch": 1.2117199139215362, "grad_norm": 1.6321648359298706, "learning_rate": 7.441950722612957e-06, "loss": 0.4219, "step": 7320 }, { "epoch": 1.2133752689951995, "grad_norm": 1.9630392789840698, "learning_rate": 7.433542048491201e-06, "loss": 0.4495, "step": 7330 }, { "epoch": 1.2150306240688629, "grad_norm": 2.2168731689453125, "learning_rate": 7.4251243455588266e-06, "loss": 0.4646, "step": 7340 }, { "epoch": 1.2166859791425262, "grad_norm": 2.295635938644409, "learning_rate": 7.416697645046789e-06, "loss": 0.4543, "step": 7350 }, { "epoch": 1.2183413342161893, "grad_norm": 2.0987188816070557, "learning_rate": 7.408261978219426e-06, "loss": 0.464, "step": 7360 }, { "epoch": 1.2199966892898526, "grad_norm": 2.4366235733032227, "learning_rate": 7.399817376374346e-06, "loss": 0.4494, "step": 7370 }, { "epoch": 1.221652044363516, "grad_norm": 2.054619073867798, "learning_rate": 7.391363870842299e-06, "loss": 0.4304, "step": 7380 }, { "epoch": 1.2233073994371793, "grad_norm": 1.9227901697158813, "learning_rate": 7.3829014929870805e-06, "loss": 0.4561, "step": 7390 }, { "epoch": 1.2249627545108426, "grad_norm": 1.8692800998687744, "learning_rate": 7.374430274205395e-06, "loss": 0.46, "step": 7400 }, { "epoch": 1.226618109584506, "grad_norm": 2.2331535816192627, "learning_rate": 7.3659502459267516e-06, "loss": 0.3858, "step": 7410 }, { "epoch": 1.2282734646581692, "grad_norm": 2.1220543384552, "learning_rate": 7.357461439613341e-06, "loss": 0.4511, "step": 7420 }, { "epoch": 1.2299288197318325, "grad_norm": 1.9789310693740845, "learning_rate": 7.348963886759926e-06, "loss": 0.4326, "step": 7430 }, { "epoch": 1.2315841748054959, "grad_norm": 1.7887349128723145, "learning_rate": 7.340457618893717e-06, "loss": 0.4375, "step": 7440 }, { "epoch": 1.233239529879159, "grad_norm": 2.1581339836120605, "learning_rate": 7.331942667574262e-06, "loss": 0.4178, "step": 7450 }, { "epoch": 1.2348948849528223, "grad_norm": 2.1266531944274902, "learning_rate": 7.323419064393321e-06, "loss": 0.4299, "step": 7460 }, { "epoch": 1.2365502400264856, "grad_norm": 1.832108974456787, "learning_rate": 7.3148868409747585e-06, "loss": 0.429, "step": 7470 }, { "epoch": 1.238205595100149, "grad_norm": 1.735487937927246, "learning_rate": 7.306346028974418e-06, "loss": 0.4511, "step": 7480 }, { "epoch": 1.2398609501738123, "grad_norm": 2.0371644496917725, "learning_rate": 7.297796660080011e-06, "loss": 0.471, "step": 7490 }, { "epoch": 1.2415163052474756, "grad_norm": 2.0136220455169678, "learning_rate": 7.289238766010992e-06, "loss": 0.507, "step": 7500 }, { "epoch": 1.243171660321139, "grad_norm": 1.9500830173492432, "learning_rate": 7.280672378518449e-06, "loss": 0.461, "step": 7510 }, { "epoch": 1.2448270153948022, "grad_norm": 2.5668957233428955, "learning_rate": 7.2720975293849824e-06, "loss": 0.4572, "step": 7520 }, { "epoch": 1.2464823704684656, "grad_norm": 1.870226263999939, "learning_rate": 7.263514250424582e-06, "loss": 0.4433, "step": 7530 }, { "epoch": 1.2481377255421289, "grad_norm": 1.7757699489593506, "learning_rate": 7.254922573482518e-06, "loss": 0.4711, "step": 7540 }, { "epoch": 1.2497930806157922, "grad_norm": 2.230046510696411, "learning_rate": 7.246322530435217e-06, "loss": 0.418, "step": 7550 }, { "epoch": 1.2514484356894555, "grad_norm": 1.9823356866836548, "learning_rate": 7.237714153190143e-06, "loss": 0.4646, "step": 7560 }, { "epoch": 1.2531037907631186, "grad_norm": 2.5432960987091064, "learning_rate": 7.229097473685686e-06, "loss": 0.457, "step": 7570 }, { "epoch": 1.254759145836782, "grad_norm": 1.8689072132110596, "learning_rate": 7.220472523891035e-06, "loss": 0.4398, "step": 7580 }, { "epoch": 1.2564145009104453, "grad_norm": 2.0174477100372314, "learning_rate": 7.211839335806061e-06, "loss": 0.4594, "step": 7590 }, { "epoch": 1.2580698559841086, "grad_norm": 2.0547597408294678, "learning_rate": 7.203197941461206e-06, "loss": 0.3943, "step": 7600 }, { "epoch": 1.259725211057772, "grad_norm": 1.7089883089065552, "learning_rate": 7.194548372917356e-06, "loss": 0.4635, "step": 7610 }, { "epoch": 1.2613805661314352, "grad_norm": 1.6787538528442383, "learning_rate": 7.185890662265721e-06, "loss": 0.3988, "step": 7620 }, { "epoch": 1.2630359212050986, "grad_norm": 2.023766279220581, "learning_rate": 7.177224841627724e-06, "loss": 0.4548, "step": 7630 }, { "epoch": 1.2646912762787617, "grad_norm": 1.8672133684158325, "learning_rate": 7.168550943154877e-06, "loss": 0.4203, "step": 7640 }, { "epoch": 1.266346631352425, "grad_norm": 2.4731225967407227, "learning_rate": 7.159868999028658e-06, "loss": 0.4652, "step": 7650 }, { "epoch": 1.2680019864260883, "grad_norm": 1.876103401184082, "learning_rate": 7.151179041460402e-06, "loss": 0.4101, "step": 7660 }, { "epoch": 1.2696573414997516, "grad_norm": 1.7336941957473755, "learning_rate": 7.142481102691167e-06, "loss": 0.3927, "step": 7670 }, { "epoch": 1.271312696573415, "grad_norm": 1.5859344005584717, "learning_rate": 7.133775214991632e-06, "loss": 0.4542, "step": 7680 }, { "epoch": 1.2729680516470783, "grad_norm": 1.7735705375671387, "learning_rate": 7.125061410661959e-06, "loss": 0.4417, "step": 7690 }, { "epoch": 1.2746234067207416, "grad_norm": 2.3329756259918213, "learning_rate": 7.1163397220316865e-06, "loss": 0.4277, "step": 7700 }, { "epoch": 1.276278761794405, "grad_norm": 2.6831562519073486, "learning_rate": 7.107610181459603e-06, "loss": 0.3803, "step": 7710 }, { "epoch": 1.2779341168680682, "grad_norm": 1.7790484428405762, "learning_rate": 7.098872821333633e-06, "loss": 0.4598, "step": 7720 }, { "epoch": 1.2795894719417316, "grad_norm": 1.8431192636489868, "learning_rate": 7.090127674070707e-06, "loss": 0.426, "step": 7730 }, { "epoch": 1.2812448270153949, "grad_norm": 2.489999532699585, "learning_rate": 7.081374772116652e-06, "loss": 0.4192, "step": 7740 }, { "epoch": 1.2829001820890582, "grad_norm": 1.8376771211624146, "learning_rate": 7.07261414794606e-06, "loss": 0.4184, "step": 7750 }, { "epoch": 1.2845555371627215, "grad_norm": 1.8397380113601685, "learning_rate": 7.063845834062178e-06, "loss": 0.4131, "step": 7760 }, { "epoch": 1.2862108922363846, "grad_norm": 1.7695285081863403, "learning_rate": 7.055069862996786e-06, "loss": 0.424, "step": 7770 }, { "epoch": 1.287866247310048, "grad_norm": 1.8908298015594482, "learning_rate": 7.0462862673100675e-06, "loss": 0.4538, "step": 7780 }, { "epoch": 1.2895216023837113, "grad_norm": 2.012000799179077, "learning_rate": 7.037495079590494e-06, "loss": 0.4166, "step": 7790 }, { "epoch": 1.2911769574573746, "grad_norm": 2.016519784927368, "learning_rate": 7.028696332454712e-06, "loss": 0.4505, "step": 7800 }, { "epoch": 1.292832312531038, "grad_norm": 1.9093087911605835, "learning_rate": 7.0198900585474065e-06, "loss": 0.4142, "step": 7810 }, { "epoch": 1.2944876676047012, "grad_norm": 1.9886523485183716, "learning_rate": 7.01107629054119e-06, "loss": 0.4588, "step": 7820 }, { "epoch": 1.2961430226783646, "grad_norm": 2.125847101211548, "learning_rate": 7.0022550611364835e-06, "loss": 0.4451, "step": 7830 }, { "epoch": 1.2977983777520279, "grad_norm": 2.2600979804992676, "learning_rate": 6.993426403061389e-06, "loss": 0.4324, "step": 7840 }, { "epoch": 1.299453732825691, "grad_norm": 2.3956971168518066, "learning_rate": 6.984590349071564e-06, "loss": 0.4473, "step": 7850 }, { "epoch": 1.3011090878993543, "grad_norm": 2.047929286956787, "learning_rate": 6.975746931950116e-06, "loss": 0.4193, "step": 7860 }, { "epoch": 1.3027644429730176, "grad_norm": 1.9703757762908936, "learning_rate": 6.9668961845074615e-06, "loss": 0.4446, "step": 7870 }, { "epoch": 1.304419798046681, "grad_norm": 2.0574028491973877, "learning_rate": 6.95803813958122e-06, "loss": 0.4543, "step": 7880 }, { "epoch": 1.3060751531203443, "grad_norm": 1.9040168523788452, "learning_rate": 6.949172830036084e-06, "loss": 0.3936, "step": 7890 }, { "epoch": 1.3077305081940076, "grad_norm": 2.8889362812042236, "learning_rate": 6.940300288763697e-06, "loss": 0.4244, "step": 7900 }, { "epoch": 1.309385863267671, "grad_norm": 2.0269064903259277, "learning_rate": 6.931420548682535e-06, "loss": 0.4036, "step": 7910 }, { "epoch": 1.3110412183413342, "grad_norm": 3.0732882022857666, "learning_rate": 6.9225336427377835e-06, "loss": 0.459, "step": 7920 }, { "epoch": 1.3126965734149976, "grad_norm": 2.3372762203216553, "learning_rate": 6.9136396039012125e-06, "loss": 0.4699, "step": 7930 }, { "epoch": 1.314351928488661, "grad_norm": 1.708390474319458, "learning_rate": 6.904738465171058e-06, "loss": 0.412, "step": 7940 }, { "epoch": 1.3160072835623242, "grad_norm": 1.5611287355422974, "learning_rate": 6.895830259571894e-06, "loss": 0.4658, "step": 7950 }, { "epoch": 1.3176626386359875, "grad_norm": 2.084836721420288, "learning_rate": 6.886915020154519e-06, "loss": 0.4428, "step": 7960 }, { "epoch": 1.3193179937096506, "grad_norm": 4.934893608093262, "learning_rate": 6.877992779995825e-06, "loss": 0.4382, "step": 7970 }, { "epoch": 1.320973348783314, "grad_norm": 1.7433263063430786, "learning_rate": 6.869063572198678e-06, "loss": 0.4303, "step": 7980 }, { "epoch": 1.3226287038569773, "grad_norm": 2.039020538330078, "learning_rate": 6.860127429891792e-06, "loss": 0.4334, "step": 7990 }, { "epoch": 1.3242840589306406, "grad_norm": 2.0853657722473145, "learning_rate": 6.851184386229617e-06, "loss": 0.4279, "step": 8000 }, { "epoch": 1.325939414004304, "grad_norm": 2.357513904571533, "learning_rate": 6.842234474392201e-06, "loss": 0.4589, "step": 8010 }, { "epoch": 1.3275947690779673, "grad_norm": 1.799069881439209, "learning_rate": 6.833277727585076e-06, "loss": 0.3904, "step": 8020 }, { "epoch": 1.3292501241516306, "grad_norm": 1.7437762022018433, "learning_rate": 6.8243141790391345e-06, "loss": 0.4397, "step": 8030 }, { "epoch": 1.330905479225294, "grad_norm": 2.2342443466186523, "learning_rate": 6.8153438620105005e-06, "loss": 0.4762, "step": 8040 }, { "epoch": 1.332560834298957, "grad_norm": 3.045323133468628, "learning_rate": 6.806366809780415e-06, "loss": 0.4217, "step": 8050 }, { "epoch": 1.3342161893726203, "grad_norm": 1.7698055505752563, "learning_rate": 6.797383055655105e-06, "loss": 0.4276, "step": 8060 }, { "epoch": 1.3358715444462836, "grad_norm": 1.860013723373413, "learning_rate": 6.788392632965661e-06, "loss": 0.3845, "step": 8070 }, { "epoch": 1.337526899519947, "grad_norm": 2.04567551612854, "learning_rate": 6.779395575067919e-06, "loss": 0.4531, "step": 8080 }, { "epoch": 1.3391822545936103, "grad_norm": 1.8434317111968994, "learning_rate": 6.770391915342329e-06, "loss": 0.4137, "step": 8090 }, { "epoch": 1.3408376096672736, "grad_norm": 2.2670910358428955, "learning_rate": 6.761381687193836e-06, "loss": 0.4245, "step": 8100 }, { "epoch": 1.342492964740937, "grad_norm": 2.1821208000183105, "learning_rate": 6.752364924051757e-06, "loss": 0.4651, "step": 8110 }, { "epoch": 1.3441483198146003, "grad_norm": 1.7987016439437866, "learning_rate": 6.7433416593696485e-06, "loss": 0.4426, "step": 8120 }, { "epoch": 1.3458036748882636, "grad_norm": 2.464167833328247, "learning_rate": 6.734311926625198e-06, "loss": 0.4304, "step": 8130 }, { "epoch": 1.347459029961927, "grad_norm": 2.0317628383636475, "learning_rate": 6.725275759320082e-06, "loss": 0.418, "step": 8140 }, { "epoch": 1.3491143850355902, "grad_norm": 1.8057821989059448, "learning_rate": 6.716233190979855e-06, "loss": 0.4229, "step": 8150 }, { "epoch": 1.3507697401092535, "grad_norm": 2.1078262329101562, "learning_rate": 6.707184255153818e-06, "loss": 0.4275, "step": 8160 }, { "epoch": 1.3524250951829169, "grad_norm": 1.794978380203247, "learning_rate": 6.698128985414899e-06, "loss": 0.4377, "step": 8170 }, { "epoch": 1.35408045025658, "grad_norm": 1.7811648845672607, "learning_rate": 6.689067415359522e-06, "loss": 0.4426, "step": 8180 }, { "epoch": 1.3557358053302433, "grad_norm": 2.394949197769165, "learning_rate": 6.6799995786074916e-06, "loss": 0.4604, "step": 8190 }, { "epoch": 1.3573911604039066, "grad_norm": 2.0753695964813232, "learning_rate": 6.6709255088018545e-06, "loss": 0.4313, "step": 8200 }, { "epoch": 1.35904651547757, "grad_norm": 2.000762939453125, "learning_rate": 6.661845239608792e-06, "loss": 0.3965, "step": 8210 }, { "epoch": 1.3607018705512333, "grad_norm": 2.1908514499664307, "learning_rate": 6.652758804717479e-06, "loss": 0.4589, "step": 8220 }, { "epoch": 1.3623572256248966, "grad_norm": 1.8629430532455444, "learning_rate": 6.643666237839973e-06, "loss": 0.4131, "step": 8230 }, { "epoch": 1.36401258069856, "grad_norm": 1.983957290649414, "learning_rate": 6.6345675727110745e-06, "loss": 0.4443, "step": 8240 }, { "epoch": 1.365667935772223, "grad_norm": 1.5461257696151733, "learning_rate": 6.625462843088214e-06, "loss": 0.3976, "step": 8250 }, { "epoch": 1.3673232908458863, "grad_norm": 2.0671067237854004, "learning_rate": 6.616352082751322e-06, "loss": 0.4613, "step": 8260 }, { "epoch": 1.3689786459195497, "grad_norm": 1.9865403175354004, "learning_rate": 6.607235325502703e-06, "loss": 0.3965, "step": 8270 }, { "epoch": 1.370634000993213, "grad_norm": 1.8592555522918701, "learning_rate": 6.598112605166909e-06, "loss": 0.4623, "step": 8280 }, { "epoch": 1.3722893560668763, "grad_norm": 2.026170253753662, "learning_rate": 6.588983955590622e-06, "loss": 0.4354, "step": 8290 }, { "epoch": 1.3739447111405396, "grad_norm": 1.7887972593307495, "learning_rate": 6.5798494106425155e-06, "loss": 0.4408, "step": 8300 }, { "epoch": 1.375600066214203, "grad_norm": 1.863389253616333, "learning_rate": 6.570709004213139e-06, "loss": 0.4266, "step": 8310 }, { "epoch": 1.3772554212878663, "grad_norm": 1.7692625522613525, "learning_rate": 6.56156277021479e-06, "loss": 0.444, "step": 8320 }, { "epoch": 1.3789107763615296, "grad_norm": 2.5655503273010254, "learning_rate": 6.5524107425813834e-06, "loss": 0.408, "step": 8330 }, { "epoch": 1.380566131435193, "grad_norm": 2.5863282680511475, "learning_rate": 6.543252955268335e-06, "loss": 0.4313, "step": 8340 }, { "epoch": 1.3822214865088562, "grad_norm": 1.7978219985961914, "learning_rate": 6.5340894422524246e-06, "loss": 0.4399, "step": 8350 }, { "epoch": 1.3838768415825196, "grad_norm": 1.9100663661956787, "learning_rate": 6.524920237531678e-06, "loss": 0.4277, "step": 8360 }, { "epoch": 1.3855321966561829, "grad_norm": 1.9516162872314453, "learning_rate": 6.515745375125236e-06, "loss": 0.4612, "step": 8370 }, { "epoch": 1.387187551729846, "grad_norm": 1.9329428672790527, "learning_rate": 6.506564889073233e-06, "loss": 0.4383, "step": 8380 }, { "epoch": 1.3888429068035093, "grad_norm": 1.867749810218811, "learning_rate": 6.497378813436667e-06, "loss": 0.4156, "step": 8390 }, { "epoch": 1.3904982618771726, "grad_norm": 2.531853437423706, "learning_rate": 6.488187182297272e-06, "loss": 0.4459, "step": 8400 }, { "epoch": 1.392153616950836, "grad_norm": 1.8974616527557373, "learning_rate": 6.4789900297573985e-06, "loss": 0.4295, "step": 8410 }, { "epoch": 1.3938089720244993, "grad_norm": 2.2630221843719482, "learning_rate": 6.4697873899398756e-06, "loss": 0.4561, "step": 8420 }, { "epoch": 1.3954643270981626, "grad_norm": 2.2150561809539795, "learning_rate": 6.460579296987899e-06, "loss": 0.4594, "step": 8430 }, { "epoch": 1.397119682171826, "grad_norm": 2.0618362426757812, "learning_rate": 6.451365785064887e-06, "loss": 0.4673, "step": 8440 }, { "epoch": 1.3987750372454892, "grad_norm": 1.7820813655853271, "learning_rate": 6.442146888354373e-06, "loss": 0.4342, "step": 8450 }, { "epoch": 1.4004303923191523, "grad_norm": 1.9812285900115967, "learning_rate": 6.4329226410598625e-06, "loss": 0.4662, "step": 8460 }, { "epoch": 1.4020857473928157, "grad_norm": 2.486772060394287, "learning_rate": 6.423693077404713e-06, "loss": 0.4428, "step": 8470 }, { "epoch": 1.403741102466479, "grad_norm": 2.0029282569885254, "learning_rate": 6.4144582316320085e-06, "loss": 0.4074, "step": 8480 }, { "epoch": 1.4053964575401423, "grad_norm": 1.755991816520691, "learning_rate": 6.405218138004428e-06, "loss": 0.4384, "step": 8490 }, { "epoch": 1.4070518126138056, "grad_norm": 2.043971300125122, "learning_rate": 6.395972830804125e-06, "loss": 0.4143, "step": 8500 }, { "epoch": 1.408707167687469, "grad_norm": 2.047865152359009, "learning_rate": 6.386722344332591e-06, "loss": 0.3758, "step": 8510 }, { "epoch": 1.4103625227611323, "grad_norm": 1.7395375967025757, "learning_rate": 6.3774667129105374e-06, "loss": 0.4224, "step": 8520 }, { "epoch": 1.4120178778347956, "grad_norm": 1.747536063194275, "learning_rate": 6.36820597087776e-06, "loss": 0.4524, "step": 8530 }, { "epoch": 1.413673232908459, "grad_norm": 1.9488874673843384, "learning_rate": 6.358940152593021e-06, "loss": 0.4208, "step": 8540 }, { "epoch": 1.4153285879821222, "grad_norm": 2.2660651206970215, "learning_rate": 6.349669292433913e-06, "loss": 0.426, "step": 8550 }, { "epoch": 1.4169839430557856, "grad_norm": 2.1231184005737305, "learning_rate": 6.340393424796735e-06, "loss": 0.432, "step": 8560 }, { "epoch": 1.418639298129449, "grad_norm": 2.2942655086517334, "learning_rate": 6.331112584096364e-06, "loss": 0.4558, "step": 8570 }, { "epoch": 1.420294653203112, "grad_norm": 2.481250762939453, "learning_rate": 6.3218268047661316e-06, "loss": 0.3894, "step": 8580 }, { "epoch": 1.4219500082767753, "grad_norm": 1.7492702007293701, "learning_rate": 6.312536121257685e-06, "loss": 0.4185, "step": 8590 }, { "epoch": 1.4236053633504386, "grad_norm": 1.907914400100708, "learning_rate": 6.303240568040875e-06, "loss": 0.4708, "step": 8600 }, { "epoch": 1.425260718424102, "grad_norm": 2.4471542835235596, "learning_rate": 6.293940179603614e-06, "loss": 0.3845, "step": 8610 }, { "epoch": 1.4269160734977653, "grad_norm": 2.4658238887786865, "learning_rate": 6.284634990451755e-06, "loss": 0.3813, "step": 8620 }, { "epoch": 1.4285714285714286, "grad_norm": 1.6748656034469604, "learning_rate": 6.275325035108966e-06, "loss": 0.4004, "step": 8630 }, { "epoch": 1.430226783645092, "grad_norm": 2.4648280143737793, "learning_rate": 6.266010348116592e-06, "loss": 0.4744, "step": 8640 }, { "epoch": 1.4318821387187552, "grad_norm": 1.831211805343628, "learning_rate": 6.256690964033537e-06, "loss": 0.4707, "step": 8650 }, { "epoch": 1.4335374937924183, "grad_norm": 1.9654415845870972, "learning_rate": 6.247366917436135e-06, "loss": 0.4265, "step": 8660 }, { "epoch": 1.4351928488660817, "grad_norm": 1.9499164819717407, "learning_rate": 6.238038242918012e-06, "loss": 0.4302, "step": 8670 }, { "epoch": 1.436848203939745, "grad_norm": 1.837054967880249, "learning_rate": 6.228704975089966e-06, "loss": 0.4066, "step": 8680 }, { "epoch": 1.4385035590134083, "grad_norm": 2.007383108139038, "learning_rate": 6.2193671485798414e-06, "loss": 0.4104, "step": 8690 }, { "epoch": 1.4401589140870716, "grad_norm": 1.9571911096572876, "learning_rate": 6.2100247980323925e-06, "loss": 0.4567, "step": 8700 }, { "epoch": 1.441814269160735, "grad_norm": 1.5203781127929688, "learning_rate": 6.200677958109156e-06, "loss": 0.4417, "step": 8710 }, { "epoch": 1.4434696242343983, "grad_norm": 2.683063268661499, "learning_rate": 6.191326663488331e-06, "loss": 0.4542, "step": 8720 }, { "epoch": 1.4451249793080616, "grad_norm": 2.251321792602539, "learning_rate": 6.181970948864637e-06, "loss": 0.45, "step": 8730 }, { "epoch": 1.446780334381725, "grad_norm": 1.943559169769287, "learning_rate": 6.172610848949201e-06, "loss": 0.42, "step": 8740 }, { "epoch": 1.4484356894553883, "grad_norm": 1.8431415557861328, "learning_rate": 6.163246398469413e-06, "loss": 0.4268, "step": 8750 }, { "epoch": 1.4500910445290516, "grad_norm": 1.8626545667648315, "learning_rate": 6.153877632168805e-06, "loss": 0.4441, "step": 8760 }, { "epoch": 1.451746399602715, "grad_norm": 1.4640800952911377, "learning_rate": 6.144504584806924e-06, "loss": 0.4061, "step": 8770 }, { "epoch": 1.4534017546763782, "grad_norm": 2.071951150894165, "learning_rate": 6.135127291159201e-06, "loss": 0.4492, "step": 8780 }, { "epoch": 1.4550571097500413, "grad_norm": 2.0795013904571533, "learning_rate": 6.125745786016818e-06, "loss": 0.4285, "step": 8790 }, { "epoch": 1.4567124648237046, "grad_norm": 1.7705730199813843, "learning_rate": 6.116360104186586e-06, "loss": 0.3959, "step": 8800 }, { "epoch": 1.458367819897368, "grad_norm": 2.478497266769409, "learning_rate": 6.106970280490807e-06, "loss": 0.4817, "step": 8810 }, { "epoch": 1.4600231749710313, "grad_norm": 1.7252123355865479, "learning_rate": 6.097576349767155e-06, "loss": 0.4097, "step": 8820 }, { "epoch": 1.4616785300446946, "grad_norm": 1.9439191818237305, "learning_rate": 6.08817834686854e-06, "loss": 0.422, "step": 8830 }, { "epoch": 1.463333885118358, "grad_norm": 1.86637282371521, "learning_rate": 6.07877630666298e-06, "loss": 0.4097, "step": 8840 }, { "epoch": 1.4649892401920213, "grad_norm": 2.0580270290374756, "learning_rate": 6.069370264033472e-06, "loss": 0.4683, "step": 8850 }, { "epoch": 1.4666445952656844, "grad_norm": 1.9036422967910767, "learning_rate": 6.059960253877861e-06, "loss": 0.4154, "step": 8860 }, { "epoch": 1.4682999503393477, "grad_norm": 2.2205872535705566, "learning_rate": 6.050546311108718e-06, "loss": 0.432, "step": 8870 }, { "epoch": 1.469955305413011, "grad_norm": 1.7262464761734009, "learning_rate": 6.041128470653197e-06, "loss": 0.4073, "step": 8880 }, { "epoch": 1.4716106604866743, "grad_norm": 1.961290955543518, "learning_rate": 6.0317067674529186e-06, "loss": 0.4177, "step": 8890 }, { "epoch": 1.4732660155603376, "grad_norm": 1.9213963747024536, "learning_rate": 6.022281236463829e-06, "loss": 0.4509, "step": 8900 }, { "epoch": 1.474921370634001, "grad_norm": 2.405870199203491, "learning_rate": 6.012851912656084e-06, "loss": 0.4234, "step": 8910 }, { "epoch": 1.4765767257076643, "grad_norm": 1.8768267631530762, "learning_rate": 6.003418831013908e-06, "loss": 0.4133, "step": 8920 }, { "epoch": 1.4782320807813276, "grad_norm": 1.9255216121673584, "learning_rate": 5.993982026535461e-06, "loss": 0.4286, "step": 8930 }, { "epoch": 1.479887435854991, "grad_norm": 1.9891632795333862, "learning_rate": 5.984541534232725e-06, "loss": 0.4363, "step": 8940 }, { "epoch": 1.4815427909286543, "grad_norm": 2.4171807765960693, "learning_rate": 5.97509738913136e-06, "loss": 0.4239, "step": 8950 }, { "epoch": 1.4831981460023176, "grad_norm": 2.0096046924591064, "learning_rate": 5.96564962627058e-06, "loss": 0.4461, "step": 8960 }, { "epoch": 1.484853501075981, "grad_norm": 2.0493736267089844, "learning_rate": 5.956198280703016e-06, "loss": 0.4759, "step": 8970 }, { "epoch": 1.4865088561496442, "grad_norm": 2.407358169555664, "learning_rate": 5.946743387494598e-06, "loss": 0.4372, "step": 8980 }, { "epoch": 1.4881642112233073, "grad_norm": 2.0386157035827637, "learning_rate": 5.937284981724416e-06, "loss": 0.4227, "step": 8990 }, { "epoch": 1.4898195662969707, "grad_norm": 1.7787805795669556, "learning_rate": 5.9278230984845934e-06, "loss": 0.384, "step": 9000 }, { "epoch": 1.491474921370634, "grad_norm": 2.143181085586548, "learning_rate": 5.9183577728801524e-06, "loss": 0.4499, "step": 9010 }, { "epoch": 1.4931302764442973, "grad_norm": 2.1203973293304443, "learning_rate": 5.908889040028887e-06, "loss": 0.4441, "step": 9020 }, { "epoch": 1.4947856315179606, "grad_norm": 1.7282978296279907, "learning_rate": 5.899416935061237e-06, "loss": 0.3813, "step": 9030 }, { "epoch": 1.496440986591624, "grad_norm": 1.6689910888671875, "learning_rate": 5.889941493120151e-06, "loss": 0.4233, "step": 9040 }, { "epoch": 1.4980963416652873, "grad_norm": 1.9057822227478027, "learning_rate": 5.880462749360956e-06, "loss": 0.4192, "step": 9050 }, { "epoch": 1.4997516967389504, "grad_norm": 2.1098954677581787, "learning_rate": 5.8709807389512294e-06, "loss": 0.431, "step": 9060 }, { "epoch": 1.5014070518126137, "grad_norm": 1.6635946035385132, "learning_rate": 5.861495497070675e-06, "loss": 0.4421, "step": 9070 }, { "epoch": 1.503062406886277, "grad_norm": 1.9180066585540771, "learning_rate": 5.8520070589109755e-06, "loss": 0.4599, "step": 9080 }, { "epoch": 1.5047177619599403, "grad_norm": 1.7293899059295654, "learning_rate": 5.842515459675681e-06, "loss": 0.4043, "step": 9090 }, { "epoch": 1.5063731170336037, "grad_norm": 2.8191874027252197, "learning_rate": 5.833020734580065e-06, "loss": 0.4037, "step": 9100 }, { "epoch": 1.508028472107267, "grad_norm": 1.7623112201690674, "learning_rate": 5.823522918851e-06, "loss": 0.4251, "step": 9110 }, { "epoch": 1.5096838271809303, "grad_norm": 2.5156078338623047, "learning_rate": 5.814022047726826e-06, "loss": 0.4173, "step": 9120 }, { "epoch": 1.5113391822545936, "grad_norm": 2.159651279449463, "learning_rate": 5.804518156457216e-06, "loss": 0.4365, "step": 9130 }, { "epoch": 1.512994537328257, "grad_norm": 1.5260064601898193, "learning_rate": 5.7950112803030504e-06, "loss": 0.4707, "step": 9140 }, { "epoch": 1.5146498924019203, "grad_norm": 1.9621107578277588, "learning_rate": 5.785501454536286e-06, "loss": 0.4374, "step": 9150 }, { "epoch": 1.5163052474755836, "grad_norm": 1.5603402853012085, "learning_rate": 5.775988714439817e-06, "loss": 0.457, "step": 9160 }, { "epoch": 1.517960602549247, "grad_norm": 1.9429376125335693, "learning_rate": 5.766473095307357e-06, "loss": 0.4234, "step": 9170 }, { "epoch": 1.5196159576229102, "grad_norm": 1.9809801578521729, "learning_rate": 5.756954632443297e-06, "loss": 0.4448, "step": 9180 }, { "epoch": 1.5212713126965736, "grad_norm": 2.2237918376922607, "learning_rate": 5.747433361162581e-06, "loss": 0.3984, "step": 9190 }, { "epoch": 1.5229266677702367, "grad_norm": 2.021394729614258, "learning_rate": 5.737909316790571e-06, "loss": 0.4425, "step": 9200 }, { "epoch": 1.5245820228439, "grad_norm": 2.0034067630767822, "learning_rate": 5.728382534662917e-06, "loss": 0.4359, "step": 9210 }, { "epoch": 1.5262373779175633, "grad_norm": 1.3382004499435425, "learning_rate": 5.718853050125429e-06, "loss": 0.4135, "step": 9220 }, { "epoch": 1.5278927329912266, "grad_norm": 2.4722602367401123, "learning_rate": 5.709320898533942e-06, "loss": 0.4485, "step": 9230 }, { "epoch": 1.52954808806489, "grad_norm": 1.733469843864441, "learning_rate": 5.699786115254187e-06, "loss": 0.4223, "step": 9240 }, { "epoch": 1.531203443138553, "grad_norm": 2.123274087905884, "learning_rate": 5.690248735661655e-06, "loss": 0.4305, "step": 9250 }, { "epoch": 1.5328587982122164, "grad_norm": 1.6809306144714355, "learning_rate": 5.680708795141478e-06, "loss": 0.4472, "step": 9260 }, { "epoch": 1.5345141532858797, "grad_norm": 1.8373470306396484, "learning_rate": 5.671166329088278e-06, "loss": 0.4031, "step": 9270 }, { "epoch": 1.536169508359543, "grad_norm": 2.4719669818878174, "learning_rate": 5.661621372906058e-06, "loss": 0.3956, "step": 9280 }, { "epoch": 1.5378248634332063, "grad_norm": 2.367678642272949, "learning_rate": 5.652073962008054e-06, "loss": 0.4775, "step": 9290 }, { "epoch": 1.5394802185068697, "grad_norm": 2.1993610858917236, "learning_rate": 5.64252413181661e-06, "loss": 0.4199, "step": 9300 }, { "epoch": 1.541135573580533, "grad_norm": 2.5951197147369385, "learning_rate": 5.632971917763047e-06, "loss": 0.4151, "step": 9310 }, { "epoch": 1.5427909286541963, "grad_norm": 2.3838794231414795, "learning_rate": 5.623417355287532e-06, "loss": 0.4379, "step": 9320 }, { "epoch": 1.5444462837278596, "grad_norm": 2.005969285964966, "learning_rate": 5.61386047983894e-06, "loss": 0.4107, "step": 9330 }, { "epoch": 1.546101638801523, "grad_norm": 2.2176222801208496, "learning_rate": 5.604301326874729e-06, "loss": 0.451, "step": 9340 }, { "epoch": 1.5477569938751863, "grad_norm": 2.1976559162139893, "learning_rate": 5.594739931860812e-06, "loss": 0.4042, "step": 9350 }, { "epoch": 1.5494123489488496, "grad_norm": 2.2575743198394775, "learning_rate": 5.585176330271417e-06, "loss": 0.4359, "step": 9360 }, { "epoch": 1.551067704022513, "grad_norm": 2.771317720413208, "learning_rate": 5.575610557588955e-06, "loss": 0.4128, "step": 9370 }, { "epoch": 1.5527230590961763, "grad_norm": 1.7214008569717407, "learning_rate": 5.566042649303899e-06, "loss": 0.4136, "step": 9380 }, { "epoch": 1.5543784141698396, "grad_norm": 2.055534601211548, "learning_rate": 5.556472640914639e-06, "loss": 0.4742, "step": 9390 }, { "epoch": 1.556033769243503, "grad_norm": 2.1422717571258545, "learning_rate": 5.5469005679273616e-06, "loss": 0.4336, "step": 9400 }, { "epoch": 1.557689124317166, "grad_norm": 2.063735246658325, "learning_rate": 5.537326465855911e-06, "loss": 0.4168, "step": 9410 }, { "epoch": 1.5593444793908293, "grad_norm": 2.0593113899230957, "learning_rate": 5.527750370221661e-06, "loss": 0.4187, "step": 9420 }, { "epoch": 1.5609998344644926, "grad_norm": 2.017012596130371, "learning_rate": 5.518172316553378e-06, "loss": 0.448, "step": 9430 }, { "epoch": 1.562655189538156, "grad_norm": 1.9413402080535889, "learning_rate": 5.5085923403871e-06, "loss": 0.4058, "step": 9440 }, { "epoch": 1.5643105446118193, "grad_norm": 1.7295361757278442, "learning_rate": 5.4990104772659895e-06, "loss": 0.4131, "step": 9450 }, { "epoch": 1.5659658996854824, "grad_norm": 2.06921124458313, "learning_rate": 5.489426762740218e-06, "loss": 0.4081, "step": 9460 }, { "epoch": 1.5676212547591457, "grad_norm": 1.8260921239852905, "learning_rate": 5.479841232366818e-06, "loss": 0.3937, "step": 9470 }, { "epoch": 1.569276609832809, "grad_norm": 2.1799001693725586, "learning_rate": 5.470253921709565e-06, "loss": 0.3873, "step": 9480 }, { "epoch": 1.5709319649064724, "grad_norm": 2.076521158218384, "learning_rate": 5.46066486633884e-06, "loss": 0.4254, "step": 9490 }, { "epoch": 1.5725873199801357, "grad_norm": 2.1282382011413574, "learning_rate": 5.451074101831492e-06, "loss": 0.3999, "step": 9500 }, { "epoch": 1.574242675053799, "grad_norm": 2.0794641971588135, "learning_rate": 5.4414816637707115e-06, "loss": 0.3946, "step": 9510 }, { "epoch": 1.5758980301274623, "grad_norm": 2.05676531791687, "learning_rate": 5.431887587745906e-06, "loss": 0.3978, "step": 9520 }, { "epoch": 1.5775533852011256, "grad_norm": 2.6630096435546875, "learning_rate": 5.422291909352554e-06, "loss": 0.4185, "step": 9530 }, { "epoch": 1.579208740274789, "grad_norm": 2.0524003505706787, "learning_rate": 5.4126946641920766e-06, "loss": 0.4344, "step": 9540 }, { "epoch": 1.5808640953484523, "grad_norm": 2.0351216793060303, "learning_rate": 5.403095887871712e-06, "loss": 0.3743, "step": 9550 }, { "epoch": 1.5825194504221156, "grad_norm": 1.9727466106414795, "learning_rate": 5.393495616004382e-06, "loss": 0.4016, "step": 9560 }, { "epoch": 1.584174805495779, "grad_norm": 2.116448402404785, "learning_rate": 5.383893884208548e-06, "loss": 0.4173, "step": 9570 }, { "epoch": 1.5858301605694423, "grad_norm": 2.624253273010254, "learning_rate": 5.3742907281080956e-06, "loss": 0.4528, "step": 9580 }, { "epoch": 1.5874855156431056, "grad_norm": 1.7432576417922974, "learning_rate": 5.364686183332194e-06, "loss": 0.3978, "step": 9590 }, { "epoch": 1.589140870716769, "grad_norm": 2.323892831802368, "learning_rate": 5.35508028551516e-06, "loss": 0.4039, "step": 9600 }, { "epoch": 1.590796225790432, "grad_norm": 1.9422341585159302, "learning_rate": 5.345473070296337e-06, "loss": 0.42, "step": 9610 }, { "epoch": 1.5924515808640953, "grad_norm": 1.8938024044036865, "learning_rate": 5.335864573319951e-06, "loss": 0.3762, "step": 9620 }, { "epoch": 1.5941069359377587, "grad_norm": 2.0808968544006348, "learning_rate": 5.326254830234984e-06, "loss": 0.4134, "step": 9630 }, { "epoch": 1.595762291011422, "grad_norm": 1.8897898197174072, "learning_rate": 5.316643876695043e-06, "loss": 0.4059, "step": 9640 }, { "epoch": 1.5974176460850853, "grad_norm": 1.8060656785964966, "learning_rate": 5.307031748358227e-06, "loss": 0.4234, "step": 9650 }, { "epoch": 1.5990730011587484, "grad_norm": 1.8971941471099854, "learning_rate": 5.29741848088699e-06, "loss": 0.4222, "step": 9660 }, { "epoch": 1.6007283562324117, "grad_norm": 2.0121896266937256, "learning_rate": 5.2878041099480145e-06, "loss": 0.3959, "step": 9670 }, { "epoch": 1.602383711306075, "grad_norm": 2.076582908630371, "learning_rate": 5.278188671212079e-06, "loss": 0.4592, "step": 9680 }, { "epoch": 1.6040390663797384, "grad_norm": 2.6054913997650146, "learning_rate": 5.2685722003539215e-06, "loss": 0.4128, "step": 9690 }, { "epoch": 1.6056944214534017, "grad_norm": 2.281392812728882, "learning_rate": 5.258954733052109e-06, "loss": 0.4024, "step": 9700 }, { "epoch": 1.607349776527065, "grad_norm": 2.435349225997925, "learning_rate": 5.249336304988904e-06, "loss": 0.4275, "step": 9710 }, { "epoch": 1.6090051316007283, "grad_norm": 1.3645190000534058, "learning_rate": 5.239716951850136e-06, "loss": 0.3877, "step": 9720 }, { "epoch": 1.6106604866743917, "grad_norm": 2.2975997924804688, "learning_rate": 5.230096709325069e-06, "loss": 0.4439, "step": 9730 }, { "epoch": 1.612315841748055, "grad_norm": 2.102614164352417, "learning_rate": 5.220475613106261e-06, "loss": 0.4095, "step": 9740 }, { "epoch": 1.6139711968217183, "grad_norm": 1.812879204750061, "learning_rate": 5.210853698889442e-06, "loss": 0.3996, "step": 9750 }, { "epoch": 1.6156265518953816, "grad_norm": 1.8167248964309692, "learning_rate": 5.201231002373374e-06, "loss": 0.3547, "step": 9760 }, { "epoch": 1.617281906969045, "grad_norm": 1.8382624387741089, "learning_rate": 5.191607559259723e-06, "loss": 0.418, "step": 9770 }, { "epoch": 1.6189372620427083, "grad_norm": 1.9886109828948975, "learning_rate": 5.181983405252925e-06, "loss": 0.4426, "step": 9780 }, { "epoch": 1.6205926171163716, "grad_norm": 2.0963995456695557, "learning_rate": 5.172358576060052e-06, "loss": 0.438, "step": 9790 }, { "epoch": 1.622247972190035, "grad_norm": 2.365187168121338, "learning_rate": 5.162733107390684e-06, "loss": 0.4224, "step": 9800 }, { "epoch": 1.623903327263698, "grad_norm": 2.2666516304016113, "learning_rate": 5.153107034956772e-06, "loss": 0.4314, "step": 9810 }, { "epoch": 1.6255586823373613, "grad_norm": 2.123664379119873, "learning_rate": 5.143480394472504e-06, "loss": 0.409, "step": 9820 }, { "epoch": 1.6272140374110247, "grad_norm": 2.0788767337799072, "learning_rate": 5.13385322165418e-06, "loss": 0.4135, "step": 9830 }, { "epoch": 1.628869392484688, "grad_norm": 2.1448142528533936, "learning_rate": 5.124225552220073e-06, "loss": 0.3914, "step": 9840 }, { "epoch": 1.6305247475583513, "grad_norm": 1.6150662899017334, "learning_rate": 5.114597421890302e-06, "loss": 0.3785, "step": 9850 }, { "epoch": 1.6321801026320144, "grad_norm": 2.1484649181365967, "learning_rate": 5.104968866386687e-06, "loss": 0.4356, "step": 9860 }, { "epoch": 1.6338354577056777, "grad_norm": 1.89021635055542, "learning_rate": 5.095339921432636e-06, "loss": 0.3862, "step": 9870 }, { "epoch": 1.635490812779341, "grad_norm": 2.2360267639160156, "learning_rate": 5.085710622752994e-06, "loss": 0.4052, "step": 9880 }, { "epoch": 1.6371461678530044, "grad_norm": 1.9827935695648193, "learning_rate": 5.076081006073925e-06, "loss": 0.4089, "step": 9890 }, { "epoch": 1.6388015229266677, "grad_norm": 2.240103006362915, "learning_rate": 5.0664511071227676e-06, "loss": 0.3983, "step": 9900 }, { "epoch": 1.640456878000331, "grad_norm": 2.3089191913604736, "learning_rate": 5.0568209616279095e-06, "loss": 0.4148, "step": 9910 }, { "epoch": 1.6421122330739943, "grad_norm": 2.1113412380218506, "learning_rate": 5.047190605318652e-06, "loss": 0.4184, "step": 9920 }, { "epoch": 1.6437675881476577, "grad_norm": 1.8843637704849243, "learning_rate": 5.0375600739250855e-06, "loss": 0.3846, "step": 9930 }, { "epoch": 1.645422943221321, "grad_norm": 2.040696859359741, "learning_rate": 5.027929403177936e-06, "loss": 0.3744, "step": 9940 }, { "epoch": 1.6470782982949843, "grad_norm": 1.737169861793518, "learning_rate": 5.01829862880846e-06, "loss": 0.3958, "step": 9950 }, { "epoch": 1.6487336533686476, "grad_norm": 1.7232611179351807, "learning_rate": 5.008667786548291e-06, "loss": 0.4123, "step": 9960 }, { "epoch": 1.650389008442311, "grad_norm": 2.533484935760498, "learning_rate": 4.999036912129319e-06, "loss": 0.4368, "step": 9970 }, { "epoch": 1.6520443635159743, "grad_norm": 2.971538782119751, "learning_rate": 4.989406041283549e-06, "loss": 0.4178, "step": 9980 }, { "epoch": 1.6536997185896376, "grad_norm": 2.1657238006591797, "learning_rate": 4.9797752097429744e-06, "loss": 0.4145, "step": 9990 }, { "epoch": 1.655355073663301, "grad_norm": 2.1859874725341797, "learning_rate": 4.970144453239443e-06, "loss": 0.4099, "step": 10000 }, { "epoch": 1.657010428736964, "grad_norm": 1.8412467241287231, "learning_rate": 4.960513807504523e-06, "loss": 0.3941, "step": 10010 }, { "epoch": 1.6586657838106273, "grad_norm": 1.6499814987182617, "learning_rate": 4.950883308269378e-06, "loss": 0.4175, "step": 10020 }, { "epoch": 1.6603211388842907, "grad_norm": 2.210827350616455, "learning_rate": 4.941252991264619e-06, "loss": 0.4301, "step": 10030 }, { "epoch": 1.661976493957954, "grad_norm": 1.6489720344543457, "learning_rate": 4.931622892220184e-06, "loss": 0.4519, "step": 10040 }, { "epoch": 1.6636318490316173, "grad_norm": 2.0074875354766846, "learning_rate": 4.921993046865205e-06, "loss": 0.4016, "step": 10050 }, { "epoch": 1.6652872041052804, "grad_norm": 1.9746689796447754, "learning_rate": 4.9123634909278705e-06, "loss": 0.4213, "step": 10060 }, { "epoch": 1.6669425591789437, "grad_norm": 1.6770844459533691, "learning_rate": 4.902734260135293e-06, "loss": 0.3922, "step": 10070 }, { "epoch": 1.668597914252607, "grad_norm": 2.5734076499938965, "learning_rate": 4.893105390213386e-06, "loss": 0.4437, "step": 10080 }, { "epoch": 1.6702532693262704, "grad_norm": 2.2573366165161133, "learning_rate": 4.883476916886716e-06, "loss": 0.4324, "step": 10090 }, { "epoch": 1.6719086243999337, "grad_norm": 1.611232042312622, "learning_rate": 4.8738488758783835e-06, "loss": 0.4148, "step": 10100 }, { "epoch": 1.673563979473597, "grad_norm": 1.7315454483032227, "learning_rate": 4.864221302909882e-06, "loss": 0.4148, "step": 10110 }, { "epoch": 1.6752193345472604, "grad_norm": 1.6441781520843506, "learning_rate": 4.854594233700969e-06, "loss": 0.4179, "step": 10120 }, { "epoch": 1.6768746896209237, "grad_norm": 1.7164368629455566, "learning_rate": 4.844967703969532e-06, "loss": 0.399, "step": 10130 }, { "epoch": 1.678530044694587, "grad_norm": 1.6734899282455444, "learning_rate": 4.835341749431464e-06, "loss": 0.421, "step": 10140 }, { "epoch": 1.6801853997682503, "grad_norm": 1.6350212097167969, "learning_rate": 4.825716405800513e-06, "loss": 0.4089, "step": 10150 }, { "epoch": 1.6818407548419136, "grad_norm": 2.038209915161133, "learning_rate": 4.816091708788168e-06, "loss": 0.4091, "step": 10160 }, { "epoch": 1.683496109915577, "grad_norm": 1.6218624114990234, "learning_rate": 4.806467694103516e-06, "loss": 0.411, "step": 10170 }, { "epoch": 1.6851514649892403, "grad_norm": 1.921259880065918, "learning_rate": 4.796844397453115e-06, "loss": 0.4145, "step": 10180 }, { "epoch": 1.6868068200629036, "grad_norm": 2.2492053508758545, "learning_rate": 4.787221854540853e-06, "loss": 0.4506, "step": 10190 }, { "epoch": 1.688462175136567, "grad_norm": 1.9784705638885498, "learning_rate": 4.7776001010678305e-06, "loss": 0.4293, "step": 10200 }, { "epoch": 1.6901175302102303, "grad_norm": 1.732125997543335, "learning_rate": 4.767979172732212e-06, "loss": 0.412, "step": 10210 }, { "epoch": 1.6917728852838934, "grad_norm": 1.7823375463485718, "learning_rate": 4.758359105229103e-06, "loss": 0.4042, "step": 10220 }, { "epoch": 1.6934282403575567, "grad_norm": 1.6599950790405273, "learning_rate": 4.748739934250416e-06, "loss": 0.3674, "step": 10230 }, { "epoch": 1.69508359543122, "grad_norm": 1.7613921165466309, "learning_rate": 4.739121695484734e-06, "loss": 0.406, "step": 10240 }, { "epoch": 1.6967389505048833, "grad_norm": 1.9368239641189575, "learning_rate": 4.7295044246171865e-06, "loss": 0.4133, "step": 10250 }, { "epoch": 1.6983943055785466, "grad_norm": 2.9349164962768555, "learning_rate": 4.719888157329309e-06, "loss": 0.4141, "step": 10260 }, { "epoch": 1.7000496606522097, "grad_norm": 1.70059335231781, "learning_rate": 4.710272929298912e-06, "loss": 0.3912, "step": 10270 }, { "epoch": 1.701705015725873, "grad_norm": 1.844007968902588, "learning_rate": 4.700658776199952e-06, "loss": 0.4032, "step": 10280 }, { "epoch": 1.7033603707995364, "grad_norm": 1.9813511371612549, "learning_rate": 4.691045733702398e-06, "loss": 0.4067, "step": 10290 }, { "epoch": 1.7050157258731997, "grad_norm": 2.3343167304992676, "learning_rate": 4.681433837472097e-06, "loss": 0.4264, "step": 10300 }, { "epoch": 1.706671080946863, "grad_norm": 2.3405990600585938, "learning_rate": 4.671823123170646e-06, "loss": 0.3879, "step": 10310 }, { "epoch": 1.7083264360205264, "grad_norm": 2.8165476322174072, "learning_rate": 4.662213626455253e-06, "loss": 0.4121, "step": 10320 }, { "epoch": 1.7099817910941897, "grad_norm": 3.3409717082977295, "learning_rate": 4.65260538297861e-06, "loss": 0.4053, "step": 10330 }, { "epoch": 1.711637146167853, "grad_norm": 2.1133646965026855, "learning_rate": 4.642998428388761e-06, "loss": 0.4122, "step": 10340 }, { "epoch": 1.7132925012415163, "grad_norm": 2.1685373783111572, "learning_rate": 4.633392798328966e-06, "loss": 0.4506, "step": 10350 }, { "epoch": 1.7149478563151797, "grad_norm": 2.0110175609588623, "learning_rate": 4.623788528437571e-06, "loss": 0.4, "step": 10360 }, { "epoch": 1.716603211388843, "grad_norm": 2.60198712348938, "learning_rate": 4.614185654347877e-06, "loss": 0.4176, "step": 10370 }, { "epoch": 1.7182585664625063, "grad_norm": 1.7562052011489868, "learning_rate": 4.604584211688004e-06, "loss": 0.3981, "step": 10380 }, { "epoch": 1.7199139215361696, "grad_norm": 2.134413957595825, "learning_rate": 4.594984236080765e-06, "loss": 0.4336, "step": 10390 }, { "epoch": 1.721569276609833, "grad_norm": 2.218580961227417, "learning_rate": 4.585385763143526e-06, "loss": 0.4235, "step": 10400 }, { "epoch": 1.7232246316834963, "grad_norm": 1.7758234739303589, "learning_rate": 4.575788828488078e-06, "loss": 0.4221, "step": 10410 }, { "epoch": 1.7248799867571594, "grad_norm": 1.8942153453826904, "learning_rate": 4.566193467720506e-06, "loss": 0.4035, "step": 10420 }, { "epoch": 1.7265353418308227, "grad_norm": 1.9919090270996094, "learning_rate": 4.556599716441062e-06, "loss": 0.4017, "step": 10430 }, { "epoch": 1.728190696904486, "grad_norm": 2.020827293395996, "learning_rate": 4.547007610244015e-06, "loss": 0.4353, "step": 10440 }, { "epoch": 1.7298460519781493, "grad_norm": 2.077829360961914, "learning_rate": 4.537417184717536e-06, "loss": 0.4721, "step": 10450 }, { "epoch": 1.7315014070518127, "grad_norm": 3.0025055408477783, "learning_rate": 4.527828475443562e-06, "loss": 0.4413, "step": 10460 }, { "epoch": 1.7331567621254758, "grad_norm": 2.1447229385375977, "learning_rate": 4.518241517997657e-06, "loss": 0.4025, "step": 10470 }, { "epoch": 1.734812117199139, "grad_norm": 1.9372047185897827, "learning_rate": 4.508656347948896e-06, "loss": 0.455, "step": 10480 }, { "epoch": 1.7364674722728024, "grad_norm": 2.317401647567749, "learning_rate": 4.4990730008597115e-06, "loss": 0.4008, "step": 10490 }, { "epoch": 1.7381228273464657, "grad_norm": 1.678004264831543, "learning_rate": 4.489491512285776e-06, "loss": 0.4216, "step": 10500 }, { "epoch": 1.739778182420129, "grad_norm": 1.9995408058166504, "learning_rate": 4.47991191777587e-06, "loss": 0.3926, "step": 10510 }, { "epoch": 1.7414335374937924, "grad_norm": 2.1435697078704834, "learning_rate": 4.470334252871743e-06, "loss": 0.398, "step": 10520 }, { "epoch": 1.7430888925674557, "grad_norm": 1.8928353786468506, "learning_rate": 4.4607585531079844e-06, "loss": 0.4211, "step": 10530 }, { "epoch": 1.744744247641119, "grad_norm": 2.309457302093506, "learning_rate": 4.451184854011898e-06, "loss": 0.4261, "step": 10540 }, { "epoch": 1.7463996027147823, "grad_norm": 1.7776364088058472, "learning_rate": 4.44161319110336e-06, "loss": 0.4415, "step": 10550 }, { "epoch": 1.7480549577884457, "grad_norm": 1.907957911491394, "learning_rate": 4.432043599894694e-06, "loss": 0.4271, "step": 10560 }, { "epoch": 1.749710312862109, "grad_norm": 2.8148648738861084, "learning_rate": 4.422476115890537e-06, "loss": 0.4588, "step": 10570 }, { "epoch": 1.7513656679357723, "grad_norm": 1.931691288948059, "learning_rate": 4.412910774587708e-06, "loss": 0.4122, "step": 10580 }, { "epoch": 1.7530210230094356, "grad_norm": 2.7117698192596436, "learning_rate": 4.403347611475073e-06, "loss": 0.442, "step": 10590 }, { "epoch": 1.754676378083099, "grad_norm": 1.596501111984253, "learning_rate": 4.393786662033424e-06, "loss": 0.4256, "step": 10600 }, { "epoch": 1.7563317331567623, "grad_norm": 2.3910441398620605, "learning_rate": 4.384227961735334e-06, "loss": 0.458, "step": 10610 }, { "epoch": 1.7579870882304254, "grad_norm": 2.2331271171569824, "learning_rate": 4.374671546045031e-06, "loss": 0.3796, "step": 10620 }, { "epoch": 1.7596424433040887, "grad_norm": 1.9223552942276, "learning_rate": 4.365117450418274e-06, "loss": 0.4167, "step": 10630 }, { "epoch": 1.761297798377752, "grad_norm": 2.403165578842163, "learning_rate": 4.355565710302203e-06, "loss": 0.4277, "step": 10640 }, { "epoch": 1.7629531534514153, "grad_norm": 2.100135564804077, "learning_rate": 4.346016361135225e-06, "loss": 0.3965, "step": 10650 }, { "epoch": 1.7646085085250787, "grad_norm": 2.3119328022003174, "learning_rate": 4.3364694383468805e-06, "loss": 0.462, "step": 10660 }, { "epoch": 1.7662638635987418, "grad_norm": 1.6545701026916504, "learning_rate": 4.3269249773577e-06, "loss": 0.3747, "step": 10670 }, { "epoch": 1.767919218672405, "grad_norm": 2.4669320583343506, "learning_rate": 4.317383013579084e-06, "loss": 0.3581, "step": 10680 }, { "epoch": 1.7695745737460684, "grad_norm": 1.7150194644927979, "learning_rate": 4.307843582413166e-06, "loss": 0.38, "step": 10690 }, { "epoch": 1.7712299288197317, "grad_norm": 2.031128168106079, "learning_rate": 4.298306719252686e-06, "loss": 0.4136, "step": 10700 }, { "epoch": 1.772885283893395, "grad_norm": 2.0807642936706543, "learning_rate": 4.288772459480854e-06, "loss": 0.4435, "step": 10710 }, { "epoch": 1.7745406389670584, "grad_norm": 2.0820484161376953, "learning_rate": 4.2792408384712245e-06, "loss": 0.4107, "step": 10720 }, { "epoch": 1.7761959940407217, "grad_norm": 2.2544493675231934, "learning_rate": 4.269711891587556e-06, "loss": 0.381, "step": 10730 }, { "epoch": 1.777851349114385, "grad_norm": 1.900412917137146, "learning_rate": 4.260185654183689e-06, "loss": 0.4134, "step": 10740 }, { "epoch": 1.7795067041880483, "grad_norm": 1.8195431232452393, "learning_rate": 4.250662161603414e-06, "loss": 0.4334, "step": 10750 }, { "epoch": 1.7811620592617117, "grad_norm": 1.9061986207962036, "learning_rate": 4.24114144918033e-06, "loss": 0.4173, "step": 10760 }, { "epoch": 1.782817414335375, "grad_norm": 2.0322930812835693, "learning_rate": 4.231623552237731e-06, "loss": 0.3825, "step": 10770 }, { "epoch": 1.7844727694090383, "grad_norm": 1.757241129875183, "learning_rate": 4.222108506088457e-06, "loss": 0.3781, "step": 10780 }, { "epoch": 1.7861281244827016, "grad_norm": 2.6230099201202393, "learning_rate": 4.212596346034778e-06, "loss": 0.4582, "step": 10790 }, { "epoch": 1.787783479556365, "grad_norm": 1.8682286739349365, "learning_rate": 4.20308710736825e-06, "loss": 0.416, "step": 10800 }, { "epoch": 1.7894388346300283, "grad_norm": 1.6394546031951904, "learning_rate": 4.193580825369591e-06, "loss": 0.3774, "step": 10810 }, { "epoch": 1.7910941897036916, "grad_norm": 1.5209989547729492, "learning_rate": 4.184077535308554e-06, "loss": 0.4161, "step": 10820 }, { "epoch": 1.7927495447773547, "grad_norm": 2.6634228229522705, "learning_rate": 4.174577272443788e-06, "loss": 0.3987, "step": 10830 }, { "epoch": 1.794404899851018, "grad_norm": 2.2621631622314453, "learning_rate": 4.165080072022711e-06, "loss": 0.4146, "step": 10840 }, { "epoch": 1.7960602549246814, "grad_norm": 2.3683760166168213, "learning_rate": 4.15558596928138e-06, "loss": 0.4474, "step": 10850 }, { "epoch": 1.7977156099983447, "grad_norm": 2.4090867042541504, "learning_rate": 4.146094999444355e-06, "loss": 0.3767, "step": 10860 }, { "epoch": 1.799370965072008, "grad_norm": 1.345140814781189, "learning_rate": 4.13660719772458e-06, "loss": 0.3677, "step": 10870 }, { "epoch": 1.801026320145671, "grad_norm": 1.5577553510665894, "learning_rate": 4.127122599323235e-06, "loss": 0.4328, "step": 10880 }, { "epoch": 1.8026816752193344, "grad_norm": 2.3623404502868652, "learning_rate": 4.1176412394296265e-06, "loss": 0.3946, "step": 10890 }, { "epoch": 1.8043370302929977, "grad_norm": 1.6678826808929443, "learning_rate": 4.108163153221036e-06, "loss": 0.4202, "step": 10900 }, { "epoch": 1.805992385366661, "grad_norm": 1.8554919958114624, "learning_rate": 4.098688375862605e-06, "loss": 0.4213, "step": 10910 }, { "epoch": 1.8076477404403244, "grad_norm": 2.539498805999756, "learning_rate": 4.089216942507196e-06, "loss": 0.4458, "step": 10920 }, { "epoch": 1.8093030955139877, "grad_norm": 2.0051186084747314, "learning_rate": 4.0797488882952625e-06, "loss": 0.4274, "step": 10930 }, { "epoch": 1.810958450587651, "grad_norm": 2.1093010902404785, "learning_rate": 4.070284248354728e-06, "loss": 0.3565, "step": 10940 }, { "epoch": 1.8126138056613144, "grad_norm": 2.032156229019165, "learning_rate": 4.060823057800842e-06, "loss": 0.4315, "step": 10950 }, { "epoch": 1.8142691607349777, "grad_norm": 1.781123161315918, "learning_rate": 4.05136535173606e-06, "loss": 0.4531, "step": 10960 }, { "epoch": 1.815924515808641, "grad_norm": 2.380314350128174, "learning_rate": 4.041911165249905e-06, "loss": 0.3931, "step": 10970 }, { "epoch": 1.8175798708823043, "grad_norm": 1.8698779344558716, "learning_rate": 4.0324605334188474e-06, "loss": 0.3803, "step": 10980 }, { "epoch": 1.8192352259559676, "grad_norm": 1.6024515628814697, "learning_rate": 4.0230134913061645e-06, "loss": 0.394, "step": 10990 }, { "epoch": 1.820890581029631, "grad_norm": 2.1540608406066895, "learning_rate": 4.0135700739618205e-06, "loss": 0.401, "step": 11000 }, { "epoch": 1.8225459361032943, "grad_norm": 2.3779587745666504, "learning_rate": 4.004130316422327e-06, "loss": 0.4131, "step": 11010 }, { "epoch": 1.8242012911769576, "grad_norm": 1.9901835918426514, "learning_rate": 3.994694253710617e-06, "loss": 0.4113, "step": 11020 }, { "epoch": 1.8258566462506207, "grad_norm": 1.5083597898483276, "learning_rate": 3.985261920835917e-06, "loss": 0.3972, "step": 11030 }, { "epoch": 1.827512001324284, "grad_norm": 2.1913838386535645, "learning_rate": 3.975833352793615e-06, "loss": 0.4391, "step": 11040 }, { "epoch": 1.8291673563979474, "grad_norm": 2.0615127086639404, "learning_rate": 3.966408584565127e-06, "loss": 0.4167, "step": 11050 }, { "epoch": 1.8308227114716107, "grad_norm": 1.6090701818466187, "learning_rate": 3.9569876511177805e-06, "loss": 0.4146, "step": 11060 }, { "epoch": 1.832478066545274, "grad_norm": 2.14567494392395, "learning_rate": 3.947570587404667e-06, "loss": 0.3594, "step": 11070 }, { "epoch": 1.834133421618937, "grad_norm": 1.3794012069702148, "learning_rate": 3.9381574283645215e-06, "loss": 0.4172, "step": 11080 }, { "epoch": 1.8357887766926004, "grad_norm": 1.5953060388565063, "learning_rate": 3.928748208921597e-06, "loss": 0.356, "step": 11090 }, { "epoch": 1.8374441317662638, "grad_norm": 2.281615972518921, "learning_rate": 3.919342963985524e-06, "loss": 0.3627, "step": 11100 }, { "epoch": 1.839099486839927, "grad_norm": 1.8211688995361328, "learning_rate": 3.909941728451188e-06, "loss": 0.4197, "step": 11110 }, { "epoch": 1.8407548419135904, "grad_norm": 2.0925374031066895, "learning_rate": 3.900544537198607e-06, "loss": 0.4497, "step": 11120 }, { "epoch": 1.8424101969872537, "grad_norm": 2.1480743885040283, "learning_rate": 3.891151425092783e-06, "loss": 0.3789, "step": 11130 }, { "epoch": 1.844065552060917, "grad_norm": 1.6905628442764282, "learning_rate": 3.8817624269835904e-06, "loss": 0.4086, "step": 11140 }, { "epoch": 1.8457209071345804, "grad_norm": 1.8402177095413208, "learning_rate": 3.872377577705637e-06, "loss": 0.4026, "step": 11150 }, { "epoch": 1.8473762622082437, "grad_norm": 1.9096100330352783, "learning_rate": 3.862996912078138e-06, "loss": 0.4017, "step": 11160 }, { "epoch": 1.849031617281907, "grad_norm": 2.6157302856445312, "learning_rate": 3.853620464904792e-06, "loss": 0.3848, "step": 11170 }, { "epoch": 1.8506869723555703, "grad_norm": 3.2500317096710205, "learning_rate": 3.844248270973639e-06, "loss": 0.4319, "step": 11180 }, { "epoch": 1.8523423274292337, "grad_norm": 2.082878351211548, "learning_rate": 3.834880365056942e-06, "loss": 0.4486, "step": 11190 }, { "epoch": 1.853997682502897, "grad_norm": 2.111647605895996, "learning_rate": 3.825516781911056e-06, "loss": 0.4077, "step": 11200 }, { "epoch": 1.8556530375765603, "grad_norm": 1.6086077690124512, "learning_rate": 3.816157556276295e-06, "loss": 0.4098, "step": 11210 }, { "epoch": 1.8573083926502236, "grad_norm": 2.252664566040039, "learning_rate": 3.806802722876808e-06, "loss": 0.4283, "step": 11220 }, { "epoch": 1.8589637477238867, "grad_norm": 2.4436564445495605, "learning_rate": 3.79745231642045e-06, "loss": 0.4302, "step": 11230 }, { "epoch": 1.86061910279755, "grad_norm": 2.0436229705810547, "learning_rate": 3.7881063715986466e-06, "loss": 0.4131, "step": 11240 }, { "epoch": 1.8622744578712134, "grad_norm": 1.9353731870651245, "learning_rate": 3.7787649230862746e-06, "loss": 0.3978, "step": 11250 }, { "epoch": 1.8639298129448767, "grad_norm": 1.7772618532180786, "learning_rate": 3.769428005541525e-06, "loss": 0.4193, "step": 11260 }, { "epoch": 1.86558516801854, "grad_norm": 1.960239052772522, "learning_rate": 3.76009565360578e-06, "loss": 0.4456, "step": 11270 }, { "epoch": 1.8672405230922031, "grad_norm": 2.3844010829925537, "learning_rate": 3.7507679019034827e-06, "loss": 0.4229, "step": 11280 }, { "epoch": 1.8688958781658664, "grad_norm": 1.5547751188278198, "learning_rate": 3.7414447850420116e-06, "loss": 0.4223, "step": 11290 }, { "epoch": 1.8705512332395298, "grad_norm": 2.0283639430999756, "learning_rate": 3.732126337611544e-06, "loss": 0.4348, "step": 11300 }, { "epoch": 1.872206588313193, "grad_norm": 2.0857224464416504, "learning_rate": 3.7228125941849347e-06, "loss": 0.4507, "step": 11310 }, { "epoch": 1.8738619433868564, "grad_norm": 1.7453672885894775, "learning_rate": 3.7135035893175873e-06, "loss": 0.4225, "step": 11320 }, { "epoch": 1.8755172984605197, "grad_norm": 2.237485885620117, "learning_rate": 3.7041993575473245e-06, "loss": 0.4036, "step": 11330 }, { "epoch": 1.877172653534183, "grad_norm": 1.7037478685379028, "learning_rate": 3.6948999333942558e-06, "loss": 0.391, "step": 11340 }, { "epoch": 1.8788280086078464, "grad_norm": 2.2421579360961914, "learning_rate": 3.6856053513606615e-06, "loss": 0.3896, "step": 11350 }, { "epoch": 1.8804833636815097, "grad_norm": 1.633567452430725, "learning_rate": 3.676315645930851e-06, "loss": 0.4273, "step": 11360 }, { "epoch": 1.882138718755173, "grad_norm": 1.8429170846939087, "learning_rate": 3.667030851571043e-06, "loss": 0.3861, "step": 11370 }, { "epoch": 1.8837940738288363, "grad_norm": 2.3093669414520264, "learning_rate": 3.657751002729234e-06, "loss": 0.3958, "step": 11380 }, { "epoch": 1.8854494289024997, "grad_norm": 1.9372445344924927, "learning_rate": 3.6484761338350703e-06, "loss": 0.4537, "step": 11390 }, { "epoch": 1.887104783976163, "grad_norm": 2.930211305618286, "learning_rate": 3.6392062792997284e-06, "loss": 0.3707, "step": 11400 }, { "epoch": 1.8887601390498263, "grad_norm": 1.6780815124511719, "learning_rate": 3.6299414735157767e-06, "loss": 0.3917, "step": 11410 }, { "epoch": 1.8904154941234896, "grad_norm": 2.5266661643981934, "learning_rate": 3.620681750857049e-06, "loss": 0.4232, "step": 11420 }, { "epoch": 1.8920708491971527, "grad_norm": 1.8783165216445923, "learning_rate": 3.6114271456785237e-06, "loss": 0.415, "step": 11430 }, { "epoch": 1.893726204270816, "grad_norm": 1.8861968517303467, "learning_rate": 3.6021776923161927e-06, "loss": 0.3868, "step": 11440 }, { "epoch": 1.8953815593444794, "grad_norm": 1.9357882738113403, "learning_rate": 3.5929334250869297e-06, "loss": 0.4461, "step": 11450 }, { "epoch": 1.8970369144181427, "grad_norm": 1.3567824363708496, "learning_rate": 3.5836943782883747e-06, "loss": 0.3844, "step": 11460 }, { "epoch": 1.898692269491806, "grad_norm": 2.8328826427459717, "learning_rate": 3.5744605861987925e-06, "loss": 0.4203, "step": 11470 }, { "epoch": 1.9003476245654691, "grad_norm": 2.007815361022949, "learning_rate": 3.565232083076954e-06, "loss": 0.3869, "step": 11480 }, { "epoch": 1.9020029796391325, "grad_norm": 1.5280500650405884, "learning_rate": 3.556008903162007e-06, "loss": 0.4193, "step": 11490 }, { "epoch": 1.9036583347127958, "grad_norm": 2.2297863960266113, "learning_rate": 3.546791080673351e-06, "loss": 0.408, "step": 11500 }, { "epoch": 1.905313689786459, "grad_norm": 2.147474527359009, "learning_rate": 3.537578649810505e-06, "loss": 0.4071, "step": 11510 }, { "epoch": 1.9069690448601224, "grad_norm": 2.1503307819366455, "learning_rate": 3.52837164475299e-06, "loss": 0.4037, "step": 11520 }, { "epoch": 1.9086243999337857, "grad_norm": 2.1771047115325928, "learning_rate": 3.519170099660192e-06, "loss": 0.3948, "step": 11530 }, { "epoch": 1.910279755007449, "grad_norm": 1.9087363481521606, "learning_rate": 3.509974048671241e-06, "loss": 0.4097, "step": 11540 }, { "epoch": 1.9119351100811124, "grad_norm": 1.270500659942627, "learning_rate": 3.5007835259048818e-06, "loss": 0.395, "step": 11550 }, { "epoch": 1.9135904651547757, "grad_norm": 1.6682640314102173, "learning_rate": 3.491598565459351e-06, "loss": 0.3878, "step": 11560 }, { "epoch": 1.915245820228439, "grad_norm": 1.9983856678009033, "learning_rate": 3.482419201412246e-06, "loss": 0.4232, "step": 11570 }, { "epoch": 1.9169011753021024, "grad_norm": 1.9866315126419067, "learning_rate": 3.473245467820403e-06, "loss": 0.4339, "step": 11580 }, { "epoch": 1.9185565303757657, "grad_norm": 1.7992204427719116, "learning_rate": 3.4640773987197662e-06, "loss": 0.4178, "step": 11590 }, { "epoch": 1.920211885449429, "grad_norm": 1.685878872871399, "learning_rate": 3.4549150281252635e-06, "loss": 0.3865, "step": 11600 }, { "epoch": 1.9218672405230923, "grad_norm": 1.3427140712738037, "learning_rate": 3.4457583900306835e-06, "loss": 0.4346, "step": 11610 }, { "epoch": 1.9235225955967556, "grad_norm": 1.8562428951263428, "learning_rate": 3.4366075184085403e-06, "loss": 0.4133, "step": 11620 }, { "epoch": 1.925177950670419, "grad_norm": 1.8822894096374512, "learning_rate": 3.4274624472099626e-06, "loss": 0.4147, "step": 11630 }, { "epoch": 1.926833305744082, "grad_norm": 1.918009638786316, "learning_rate": 3.4183232103645508e-06, "loss": 0.392, "step": 11640 }, { "epoch": 1.9284886608177454, "grad_norm": 1.8526767492294312, "learning_rate": 3.409189841780263e-06, "loss": 0.4166, "step": 11650 }, { "epoch": 1.9301440158914087, "grad_norm": 2.163173198699951, "learning_rate": 3.400062375343283e-06, "loss": 0.4057, "step": 11660 }, { "epoch": 1.931799370965072, "grad_norm": 2.5304856300354004, "learning_rate": 3.390940844917897e-06, "loss": 0.3884, "step": 11670 }, { "epoch": 1.9334547260387354, "grad_norm": 2.086353302001953, "learning_rate": 3.3818252843463676e-06, "loss": 0.3999, "step": 11680 }, { "epoch": 1.9351100811123985, "grad_norm": 1.9789005517959595, "learning_rate": 3.3727157274488113e-06, "loss": 0.4293, "step": 11690 }, { "epoch": 1.9367654361860618, "grad_norm": 1.9313982725143433, "learning_rate": 3.363612208023068e-06, "loss": 0.3912, "step": 11700 }, { "epoch": 1.938420791259725, "grad_norm": 1.6824219226837158, "learning_rate": 3.354514759844576e-06, "loss": 0.3633, "step": 11710 }, { "epoch": 1.9400761463333884, "grad_norm": 2.064709424972534, "learning_rate": 3.345423416666249e-06, "loss": 0.3762, "step": 11720 }, { "epoch": 1.9417315014070518, "grad_norm": 1.9601470232009888, "learning_rate": 3.336338212218354e-06, "loss": 0.3626, "step": 11730 }, { "epoch": 1.943386856480715, "grad_norm": 1.799816608428955, "learning_rate": 3.327259180208375e-06, "loss": 0.3649, "step": 11740 }, { "epoch": 1.9450422115543784, "grad_norm": 2.4325437545776367, "learning_rate": 3.318186354320905e-06, "loss": 0.3869, "step": 11750 }, { "epoch": 1.9466975666280417, "grad_norm": 1.9126996994018555, "learning_rate": 3.3091197682175023e-06, "loss": 0.4228, "step": 11760 }, { "epoch": 1.948352921701705, "grad_norm": 1.600388526916504, "learning_rate": 3.300059455536579e-06, "loss": 0.3965, "step": 11770 }, { "epoch": 1.9500082767753684, "grad_norm": 2.1479532718658447, "learning_rate": 3.291005449893273e-06, "loss": 0.3816, "step": 11780 }, { "epoch": 1.9516636318490317, "grad_norm": 1.8163764476776123, "learning_rate": 3.281957784879317e-06, "loss": 0.4336, "step": 11790 }, { "epoch": 1.953318986922695, "grad_norm": 2.1194796562194824, "learning_rate": 3.2729164940629264e-06, "loss": 0.4344, "step": 11800 }, { "epoch": 1.9549743419963583, "grad_norm": 1.6033542156219482, "learning_rate": 3.2638816109886604e-06, "loss": 0.3745, "step": 11810 }, { "epoch": 1.9566296970700217, "grad_norm": 2.0563807487487793, "learning_rate": 3.254853169177311e-06, "loss": 0.408, "step": 11820 }, { "epoch": 1.958285052143685, "grad_norm": 2.165522813796997, "learning_rate": 3.2458312021257656e-06, "loss": 0.417, "step": 11830 }, { "epoch": 1.959940407217348, "grad_norm": 1.5800554752349854, "learning_rate": 3.2368157433068916e-06, "loss": 0.4462, "step": 11840 }, { "epoch": 1.9615957622910114, "grad_norm": 1.5213532447814941, "learning_rate": 3.2278068261694106e-06, "loss": 0.3837, "step": 11850 }, { "epoch": 1.9632511173646747, "grad_norm": 1.8690297603607178, "learning_rate": 3.2188044841377773e-06, "loss": 0.3819, "step": 11860 }, { "epoch": 1.964906472438338, "grad_norm": 1.4503165483474731, "learning_rate": 3.2098087506120456e-06, "loss": 0.4349, "step": 11870 }, { "epoch": 1.9665618275120014, "grad_norm": 1.9367406368255615, "learning_rate": 3.2008196589677532e-06, "loss": 0.465, "step": 11880 }, { "epoch": 1.9682171825856645, "grad_norm": 3.2155959606170654, "learning_rate": 3.1918372425557932e-06, "loss": 0.3958, "step": 11890 }, { "epoch": 1.9698725376593278, "grad_norm": 2.281371593475342, "learning_rate": 3.1828615347022984e-06, "loss": 0.4041, "step": 11900 }, { "epoch": 1.9715278927329911, "grad_norm": 1.9571949243545532, "learning_rate": 3.173892568708505e-06, "loss": 0.3746, "step": 11910 }, { "epoch": 1.9731832478066544, "grad_norm": 1.9914932250976562, "learning_rate": 3.1649303778506425e-06, "loss": 0.4203, "step": 11920 }, { "epoch": 1.9748386028803178, "grad_norm": 1.7412394285202026, "learning_rate": 3.1559749953797973e-06, "loss": 0.3784, "step": 11930 }, { "epoch": 1.976493957953981, "grad_norm": 1.6468333005905151, "learning_rate": 3.147026454521801e-06, "loss": 0.4356, "step": 11940 }, { "epoch": 1.9781493130276444, "grad_norm": 1.9956331253051758, "learning_rate": 3.138084788477098e-06, "loss": 0.4486, "step": 11950 }, { "epoch": 1.9798046681013077, "grad_norm": 2.0449624061584473, "learning_rate": 3.1291500304206262e-06, "loss": 0.3309, "step": 11960 }, { "epoch": 1.981460023174971, "grad_norm": 2.1764378547668457, "learning_rate": 3.120222213501697e-06, "loss": 0.4128, "step": 11970 }, { "epoch": 1.9831153782486344, "grad_norm": 1.891964077949524, "learning_rate": 3.1113013708438653e-06, "loss": 0.4075, "step": 11980 }, { "epoch": 1.9847707333222977, "grad_norm": 2.1923632621765137, "learning_rate": 3.1023875355448153e-06, "loss": 0.4233, "step": 11990 }, { "epoch": 1.986426088395961, "grad_norm": 2.2308120727539062, "learning_rate": 3.093480740676228e-06, "loss": 0.4074, "step": 12000 }, { "epoch": 1.9880814434696243, "grad_norm": 2.1118459701538086, "learning_rate": 3.0845810192836645e-06, "loss": 0.423, "step": 12010 }, { "epoch": 1.9897367985432877, "grad_norm": 1.4029134511947632, "learning_rate": 3.075688404386442e-06, "loss": 0.3564, "step": 12020 }, { "epoch": 1.991392153616951, "grad_norm": 1.9621480703353882, "learning_rate": 3.0668029289775163e-06, "loss": 0.3869, "step": 12030 }, { "epoch": 1.993047508690614, "grad_norm": 1.8076422214508057, "learning_rate": 3.0579246260233486e-06, "loss": 0.4299, "step": 12040 }, { "epoch": 1.9947028637642774, "grad_norm": 2.1234450340270996, "learning_rate": 3.04905352846379e-06, "loss": 0.4006, "step": 12050 }, { "epoch": 1.9963582188379407, "grad_norm": 1.8213273286819458, "learning_rate": 3.0401896692119626e-06, "loss": 0.4036, "step": 12060 }, { "epoch": 1.998013573911604, "grad_norm": 2.5499227046966553, "learning_rate": 3.031333081154129e-06, "loss": 0.4424, "step": 12070 }, { "epoch": 1.9996689289852674, "grad_norm": 2.273587942123413, "learning_rate": 3.0224837971495736e-06, "loss": 0.4118, "step": 12080 }, { "epoch": 2.0013242840589305, "grad_norm": 1.7035340070724487, "learning_rate": 3.0136418500304888e-06, "loss": 0.3556, "step": 12090 }, { "epoch": 2.002979639132594, "grad_norm": 1.4942774772644043, "learning_rate": 3.0048072726018386e-06, "loss": 0.3315, "step": 12100 }, { "epoch": 2.004634994206257, "grad_norm": 1.6278843879699707, "learning_rate": 2.995980097641248e-06, "loss": 0.3448, "step": 12110 }, { "epoch": 2.0062903492799204, "grad_norm": 2.276960849761963, "learning_rate": 2.987160357898877e-06, "loss": 0.3363, "step": 12120 }, { "epoch": 2.0079457043535838, "grad_norm": 1.993857502937317, "learning_rate": 2.978348086097298e-06, "loss": 0.3246, "step": 12130 }, { "epoch": 2.009601059427247, "grad_norm": 1.7416425943374634, "learning_rate": 2.9695433149313774e-06, "loss": 0.3325, "step": 12140 }, { "epoch": 2.0112564145009104, "grad_norm": 2.1714820861816406, "learning_rate": 2.960746077068158e-06, "loss": 0.3331, "step": 12150 }, { "epoch": 2.0129117695745737, "grad_norm": 1.8540669679641724, "learning_rate": 2.951956405146725e-06, "loss": 0.3549, "step": 12160 }, { "epoch": 2.014567124648237, "grad_norm": 1.6556892395019531, "learning_rate": 2.9431743317780957e-06, "loss": 0.3138, "step": 12170 }, { "epoch": 2.0162224797219004, "grad_norm": 1.8258768320083618, "learning_rate": 2.934399889545099e-06, "loss": 0.3415, "step": 12180 }, { "epoch": 2.0178778347955637, "grad_norm": 1.6194030046463013, "learning_rate": 2.9256331110022463e-06, "loss": 0.3436, "step": 12190 }, { "epoch": 2.019533189869227, "grad_norm": 2.233779191970825, "learning_rate": 2.9168740286756157e-06, "loss": 0.3653, "step": 12200 }, { "epoch": 2.0211885449428904, "grad_norm": 1.9311305284500122, "learning_rate": 2.9081226750627367e-06, "loss": 0.3901, "step": 12210 }, { "epoch": 2.0228439000165537, "grad_norm": 2.0436785221099854, "learning_rate": 2.89937908263246e-06, "loss": 0.343, "step": 12220 }, { "epoch": 2.024499255090217, "grad_norm": 1.8099346160888672, "learning_rate": 2.890643283824837e-06, "loss": 0.3493, "step": 12230 }, { "epoch": 2.0261546101638803, "grad_norm": 2.596013307571411, "learning_rate": 2.8819153110510147e-06, "loss": 0.3721, "step": 12240 }, { "epoch": 2.0278099652375436, "grad_norm": 2.1353633403778076, "learning_rate": 2.8731951966930917e-06, "loss": 0.361, "step": 12250 }, { "epoch": 2.029465320311207, "grad_norm": 2.3608481884002686, "learning_rate": 2.8644829731040214e-06, "loss": 0.3404, "step": 12260 }, { "epoch": 2.03112067538487, "grad_norm": 1.9163801670074463, "learning_rate": 2.8557786726074755e-06, "loss": 0.3844, "step": 12270 }, { "epoch": 2.032776030458533, "grad_norm": 2.0741047859191895, "learning_rate": 2.84708232749773e-06, "loss": 0.3721, "step": 12280 }, { "epoch": 2.0344313855321965, "grad_norm": 2.1986711025238037, "learning_rate": 2.8383939700395456e-06, "loss": 0.3018, "step": 12290 }, { "epoch": 2.03608674060586, "grad_norm": 1.790903925895691, "learning_rate": 2.8297136324680498e-06, "loss": 0.3716, "step": 12300 }, { "epoch": 2.037742095679523, "grad_norm": 1.8230623006820679, "learning_rate": 2.8210413469886094e-06, "loss": 0.3459, "step": 12310 }, { "epoch": 2.0393974507531865, "grad_norm": 1.7864844799041748, "learning_rate": 2.812377145776724e-06, "loss": 0.3428, "step": 12320 }, { "epoch": 2.04105280582685, "grad_norm": 1.8330215215682983, "learning_rate": 2.8037210609778975e-06, "loss": 0.3533, "step": 12330 }, { "epoch": 2.042708160900513, "grad_norm": 2.0148110389709473, "learning_rate": 2.795073124707518e-06, "loss": 0.3093, "step": 12340 }, { "epoch": 2.0443635159741764, "grad_norm": 1.9366060495376587, "learning_rate": 2.786433369050742e-06, "loss": 0.3289, "step": 12350 }, { "epoch": 2.0460188710478397, "grad_norm": 3.5124638080596924, "learning_rate": 2.777801826062375e-06, "loss": 0.3476, "step": 12360 }, { "epoch": 2.047674226121503, "grad_norm": 2.0207085609436035, "learning_rate": 2.7691785277667506e-06, "loss": 0.3595, "step": 12370 }, { "epoch": 2.0493295811951664, "grad_norm": 1.430230736732483, "learning_rate": 2.7605635061576195e-06, "loss": 0.3462, "step": 12380 }, { "epoch": 2.0509849362688297, "grad_norm": 2.4573211669921875, "learning_rate": 2.7519567931980185e-06, "loss": 0.3782, "step": 12390 }, { "epoch": 2.052640291342493, "grad_norm": 1.84562087059021, "learning_rate": 2.7433584208201577e-06, "loss": 0.3536, "step": 12400 }, { "epoch": 2.0542956464161564, "grad_norm": 1.9053126573562622, "learning_rate": 2.734768420925308e-06, "loss": 0.3187, "step": 12410 }, { "epoch": 2.0559510014898197, "grad_norm": 2.1721508502960205, "learning_rate": 2.726186825383673e-06, "loss": 0.3245, "step": 12420 }, { "epoch": 2.057606356563483, "grad_norm": 2.5854411125183105, "learning_rate": 2.717613666034272e-06, "loss": 0.3681, "step": 12430 }, { "epoch": 2.0592617116371463, "grad_norm": 1.7167317867279053, "learning_rate": 2.7090489746848336e-06, "loss": 0.3286, "step": 12440 }, { "epoch": 2.0609170667108097, "grad_norm": 1.8206017017364502, "learning_rate": 2.7004927831116614e-06, "loss": 0.3294, "step": 12450 }, { "epoch": 2.062572421784473, "grad_norm": 1.7760072946548462, "learning_rate": 2.691945123059525e-06, "loss": 0.3465, "step": 12460 }, { "epoch": 2.064227776858136, "grad_norm": 2.4801549911499023, "learning_rate": 2.6834060262415425e-06, "loss": 0.3392, "step": 12470 }, { "epoch": 2.065883131931799, "grad_norm": 2.1817026138305664, "learning_rate": 2.674875524339057e-06, "loss": 0.3275, "step": 12480 }, { "epoch": 2.0675384870054625, "grad_norm": 1.8844431638717651, "learning_rate": 2.6663536490015284e-06, "loss": 0.3257, "step": 12490 }, { "epoch": 2.069193842079126, "grad_norm": 2.3225162029266357, "learning_rate": 2.6578404318464112e-06, "loss": 0.3575, "step": 12500 }, { "epoch": 2.070849197152789, "grad_norm": 1.740742802619934, "learning_rate": 2.6493359044590317e-06, "loss": 0.3333, "step": 12510 }, { "epoch": 2.0725045522264525, "grad_norm": 2.0152180194854736, "learning_rate": 2.640840098392478e-06, "loss": 0.3601, "step": 12520 }, { "epoch": 2.074159907300116, "grad_norm": 2.1492371559143066, "learning_rate": 2.6323530451674815e-06, "loss": 0.3131, "step": 12530 }, { "epoch": 2.075815262373779, "grad_norm": 2.565166473388672, "learning_rate": 2.623874776272296e-06, "loss": 0.3644, "step": 12540 }, { "epoch": 2.0774706174474424, "grad_norm": 2.024240493774414, "learning_rate": 2.615405323162592e-06, "loss": 0.2927, "step": 12550 }, { "epoch": 2.0791259725211058, "grad_norm": 1.9475926160812378, "learning_rate": 2.606944717261323e-06, "loss": 0.3036, "step": 12560 }, { "epoch": 2.080781327594769, "grad_norm": 2.2539281845092773, "learning_rate": 2.598492989958622e-06, "loss": 0.3522, "step": 12570 }, { "epoch": 2.0824366826684324, "grad_norm": 1.7754216194152832, "learning_rate": 2.590050172611684e-06, "loss": 0.3114, "step": 12580 }, { "epoch": 2.0840920377420957, "grad_norm": 1.9726289510726929, "learning_rate": 2.5816162965446416e-06, "loss": 0.3297, "step": 12590 }, { "epoch": 2.085747392815759, "grad_norm": 1.9303443431854248, "learning_rate": 2.5731913930484543e-06, "loss": 0.3168, "step": 12600 }, { "epoch": 2.0874027478894224, "grad_norm": 3.2593839168548584, "learning_rate": 2.564775493380798e-06, "loss": 0.3679, "step": 12610 }, { "epoch": 2.0890581029630857, "grad_norm": 2.273982524871826, "learning_rate": 2.556368628765935e-06, "loss": 0.3396, "step": 12620 }, { "epoch": 2.090713458036749, "grad_norm": 2.084463119506836, "learning_rate": 2.5479708303946126e-06, "loss": 0.3432, "step": 12630 }, { "epoch": 2.0923688131104123, "grad_norm": 1.9511936902999878, "learning_rate": 2.539582129423934e-06, "loss": 0.3304, "step": 12640 }, { "epoch": 2.0940241681840757, "grad_norm": 3.004499912261963, "learning_rate": 2.531202556977259e-06, "loss": 0.3302, "step": 12650 }, { "epoch": 2.095679523257739, "grad_norm": 2.4408180713653564, "learning_rate": 2.5228321441440705e-06, "loss": 0.3696, "step": 12660 }, { "epoch": 2.0973348783314023, "grad_norm": 1.5785390138626099, "learning_rate": 2.514470921979874e-06, "loss": 0.3248, "step": 12670 }, { "epoch": 2.098990233405065, "grad_norm": 2.020374059677124, "learning_rate": 2.5061189215060733e-06, "loss": 0.3153, "step": 12680 }, { "epoch": 2.1006455884787285, "grad_norm": 4.800167560577393, "learning_rate": 2.497776173709857e-06, "loss": 0.3579, "step": 12690 }, { "epoch": 2.102300943552392, "grad_norm": 2.8129446506500244, "learning_rate": 2.4894427095440883e-06, "loss": 0.3541, "step": 12700 }, { "epoch": 2.103956298626055, "grad_norm": 2.103689670562744, "learning_rate": 2.4811185599271813e-06, "loss": 0.3655, "step": 12710 }, { "epoch": 2.1056116536997185, "grad_norm": 2.179983139038086, "learning_rate": 2.4728037557430012e-06, "loss": 0.381, "step": 12720 }, { "epoch": 2.107267008773382, "grad_norm": 1.8805456161499023, "learning_rate": 2.464498327840729e-06, "loss": 0.3306, "step": 12730 }, { "epoch": 2.108922363847045, "grad_norm": 1.8576078414916992, "learning_rate": 2.4562023070347685e-06, "loss": 0.3927, "step": 12740 }, { "epoch": 2.1105777189207084, "grad_norm": 2.150397300720215, "learning_rate": 2.4479157241046143e-06, "loss": 0.3269, "step": 12750 }, { "epoch": 2.1122330739943718, "grad_norm": 1.8863481283187866, "learning_rate": 2.4396386097947482e-06, "loss": 0.3511, "step": 12760 }, { "epoch": 2.113888429068035, "grad_norm": 1.7663947343826294, "learning_rate": 2.4313709948145193e-06, "loss": 0.3388, "step": 12770 }, { "epoch": 2.1155437841416984, "grad_norm": 2.267031669616699, "learning_rate": 2.423112909838039e-06, "loss": 0.3502, "step": 12780 }, { "epoch": 2.1171991392153617, "grad_norm": 2.0435879230499268, "learning_rate": 2.4148643855040543e-06, "loss": 0.3621, "step": 12790 }, { "epoch": 2.118854494289025, "grad_norm": 2.001474618911743, "learning_rate": 2.406625452415844e-06, "loss": 0.3321, "step": 12800 }, { "epoch": 2.1205098493626884, "grad_norm": 1.835279941558838, "learning_rate": 2.398396141141099e-06, "loss": 0.369, "step": 12810 }, { "epoch": 2.1221652044363517, "grad_norm": 1.7734911441802979, "learning_rate": 2.390176482211818e-06, "loss": 0.34, "step": 12820 }, { "epoch": 2.123820559510015, "grad_norm": 2.4885096549987793, "learning_rate": 2.3819665061241798e-06, "loss": 0.3385, "step": 12830 }, { "epoch": 2.1254759145836783, "grad_norm": 1.844212293624878, "learning_rate": 2.3737662433384474e-06, "loss": 0.3229, "step": 12840 }, { "epoch": 2.1271312696573417, "grad_norm": 1.8578639030456543, "learning_rate": 2.3655757242788385e-06, "loss": 0.354, "step": 12850 }, { "epoch": 2.128786624731005, "grad_norm": 2.1224701404571533, "learning_rate": 2.357394979333423e-06, "loss": 0.3277, "step": 12860 }, { "epoch": 2.130441979804668, "grad_norm": 1.8834820985794067, "learning_rate": 2.349224038854007e-06, "loss": 0.3472, "step": 12870 }, { "epoch": 2.132097334878331, "grad_norm": 1.910531759262085, "learning_rate": 2.3410629331560197e-06, "loss": 0.3553, "step": 12880 }, { "epoch": 2.1337526899519945, "grad_norm": 2.2018206119537354, "learning_rate": 2.332911692518406e-06, "loss": 0.3408, "step": 12890 }, { "epoch": 2.135408045025658, "grad_norm": 1.6149979829788208, "learning_rate": 2.3247703471835028e-06, "loss": 0.3158, "step": 12900 }, { "epoch": 2.137063400099321, "grad_norm": 2.036621570587158, "learning_rate": 2.3166389273569416e-06, "loss": 0.3459, "step": 12910 }, { "epoch": 2.1387187551729845, "grad_norm": 2.2086009979248047, "learning_rate": 2.3085174632075234e-06, "loss": 0.3457, "step": 12920 }, { "epoch": 2.140374110246648, "grad_norm": 1.6859116554260254, "learning_rate": 2.3004059848671133e-06, "loss": 0.3355, "step": 12930 }, { "epoch": 2.142029465320311, "grad_norm": 2.271038770675659, "learning_rate": 2.2923045224305267e-06, "loss": 0.3251, "step": 12940 }, { "epoch": 2.1436848203939745, "grad_norm": 1.8713353872299194, "learning_rate": 2.2842131059554228e-06, "loss": 0.3506, "step": 12950 }, { "epoch": 2.1453401754676378, "grad_norm": 2.219752550125122, "learning_rate": 2.2761317654621853e-06, "loss": 0.3681, "step": 12960 }, { "epoch": 2.146995530541301, "grad_norm": 2.1416406631469727, "learning_rate": 2.2680605309338103e-06, "loss": 0.3645, "step": 12970 }, { "epoch": 2.1486508856149644, "grad_norm": 1.8972928524017334, "learning_rate": 2.2599994323158103e-06, "loss": 0.3625, "step": 12980 }, { "epoch": 2.1503062406886277, "grad_norm": 1.7961077690124512, "learning_rate": 2.2519484995160818e-06, "loss": 0.3336, "step": 12990 }, { "epoch": 2.151961595762291, "grad_norm": 2.188765287399292, "learning_rate": 2.2439077624048074e-06, "loss": 0.3458, "step": 13000 }, { "epoch": 2.1536169508359544, "grad_norm": 1.96406888961792, "learning_rate": 2.2358772508143467e-06, "loss": 0.3536, "step": 13010 }, { "epoch": 2.1552723059096177, "grad_norm": 2.556922435760498, "learning_rate": 2.227856994539115e-06, "loss": 0.3177, "step": 13020 }, { "epoch": 2.156927660983281, "grad_norm": 2.1325490474700928, "learning_rate": 2.2198470233354817e-06, "loss": 0.3601, "step": 13030 }, { "epoch": 2.1585830160569444, "grad_norm": 2.0026392936706543, "learning_rate": 2.2118473669216568e-06, "loss": 0.3097, "step": 13040 }, { "epoch": 2.1602383711306077, "grad_norm": 2.2289645671844482, "learning_rate": 2.203858054977578e-06, "loss": 0.4102, "step": 13050 }, { "epoch": 2.161893726204271, "grad_norm": 1.3882899284362793, "learning_rate": 2.1958791171448083e-06, "loss": 0.3555, "step": 13060 }, { "epoch": 2.1635490812779343, "grad_norm": 1.9401888847351074, "learning_rate": 2.1879105830264213e-06, "loss": 0.3216, "step": 13070 }, { "epoch": 2.1652044363515976, "grad_norm": 2.3579916954040527, "learning_rate": 2.179952482186886e-06, "loss": 0.3729, "step": 13080 }, { "epoch": 2.1668597914252605, "grad_norm": 1.7595404386520386, "learning_rate": 2.1720048441519665e-06, "loss": 0.3457, "step": 13090 }, { "epoch": 2.168515146498924, "grad_norm": 1.9767661094665527, "learning_rate": 2.164067698408606e-06, "loss": 0.3442, "step": 13100 }, { "epoch": 2.170170501572587, "grad_norm": 2.1314504146575928, "learning_rate": 2.1561410744048213e-06, "loss": 0.3674, "step": 13110 }, { "epoch": 2.1718258566462505, "grad_norm": 2.088491439819336, "learning_rate": 2.148225001549589e-06, "loss": 0.3049, "step": 13120 }, { "epoch": 2.173481211719914, "grad_norm": 1.943539023399353, "learning_rate": 2.140319509212746e-06, "loss": 0.3338, "step": 13130 }, { "epoch": 2.175136566793577, "grad_norm": 1.8165936470031738, "learning_rate": 2.1324246267248657e-06, "loss": 0.343, "step": 13140 }, { "epoch": 2.1767919218672405, "grad_norm": 1.5986518859863281, "learning_rate": 2.124540383377165e-06, "loss": 0.3654, "step": 13150 }, { "epoch": 2.178447276940904, "grad_norm": 2.2084920406341553, "learning_rate": 2.1166668084213822e-06, "loss": 0.3188, "step": 13160 }, { "epoch": 2.180102632014567, "grad_norm": 1.8947356939315796, "learning_rate": 2.1088039310696744e-06, "loss": 0.3347, "step": 13170 }, { "epoch": 2.1817579870882304, "grad_norm": 1.6848961114883423, "learning_rate": 2.1009517804945146e-06, "loss": 0.3666, "step": 13180 }, { "epoch": 2.1834133421618938, "grad_norm": 1.5556492805480957, "learning_rate": 2.0931103858285725e-06, "loss": 0.3552, "step": 13190 }, { "epoch": 2.185068697235557, "grad_norm": 1.661304235458374, "learning_rate": 2.0852797761646125e-06, "loss": 0.3119, "step": 13200 }, { "epoch": 2.1867240523092204, "grad_norm": 2.146554470062256, "learning_rate": 2.0774599805553873e-06, "loss": 0.3636, "step": 13210 }, { "epoch": 2.1883794073828837, "grad_norm": 2.547508478164673, "learning_rate": 2.069651028013523e-06, "loss": 0.3619, "step": 13220 }, { "epoch": 2.190034762456547, "grad_norm": 1.6659605503082275, "learning_rate": 2.0618529475114218e-06, "loss": 0.2849, "step": 13230 }, { "epoch": 2.1916901175302104, "grad_norm": 2.031393051147461, "learning_rate": 2.054065767981149e-06, "loss": 0.3449, "step": 13240 }, { "epoch": 2.1933454726038737, "grad_norm": 1.7782469987869263, "learning_rate": 2.0462895183143217e-06, "loss": 0.3301, "step": 13250 }, { "epoch": 2.195000827677537, "grad_norm": 2.75504994392395, "learning_rate": 2.038524227362007e-06, "loss": 0.3668, "step": 13260 }, { "epoch": 2.1966561827512003, "grad_norm": 2.0453379154205322, "learning_rate": 2.0307699239346136e-06, "loss": 0.366, "step": 13270 }, { "epoch": 2.198311537824863, "grad_norm": 2.0684151649475098, "learning_rate": 2.023026636801785e-06, "loss": 0.361, "step": 13280 }, { "epoch": 2.1999668928985265, "grad_norm": 2.427762746810913, "learning_rate": 2.0152943946922904e-06, "loss": 0.3451, "step": 13290 }, { "epoch": 2.20162224797219, "grad_norm": 1.8866089582443237, "learning_rate": 2.007573226293927e-06, "loss": 0.3214, "step": 13300 }, { "epoch": 2.203277603045853, "grad_norm": 2.268911600112915, "learning_rate": 1.999863160253398e-06, "loss": 0.3247, "step": 13310 }, { "epoch": 2.2049329581195165, "grad_norm": 2.1680514812469482, "learning_rate": 1.992164225176223e-06, "loss": 0.3316, "step": 13320 }, { "epoch": 2.20658831319318, "grad_norm": 1.8509600162506104, "learning_rate": 1.9844764496266195e-06, "loss": 0.3393, "step": 13330 }, { "epoch": 2.208243668266843, "grad_norm": 1.7594846487045288, "learning_rate": 1.9767998621274014e-06, "loss": 0.3732, "step": 13340 }, { "epoch": 2.2098990233405065, "grad_norm": 2.6831443309783936, "learning_rate": 1.969134491159873e-06, "loss": 0.3263, "step": 13350 }, { "epoch": 2.21155437841417, "grad_norm": 1.7843753099441528, "learning_rate": 1.961480365163729e-06, "loss": 0.3278, "step": 13360 }, { "epoch": 2.213209733487833, "grad_norm": 2.025347948074341, "learning_rate": 1.953837512536936e-06, "loss": 0.3137, "step": 13370 }, { "epoch": 2.2148650885614964, "grad_norm": 2.1478426456451416, "learning_rate": 1.9462059616356377e-06, "loss": 0.3597, "step": 13380 }, { "epoch": 2.2165204436351598, "grad_norm": 2.198599100112915, "learning_rate": 1.9385857407740504e-06, "loss": 0.3527, "step": 13390 }, { "epoch": 2.218175798708823, "grad_norm": 2.009014844894409, "learning_rate": 1.9309768782243473e-06, "loss": 0.3131, "step": 13400 }, { "epoch": 2.2198311537824864, "grad_norm": 2.736865997314453, "learning_rate": 1.9233794022165674e-06, "loss": 0.3361, "step": 13410 }, { "epoch": 2.2214865088561497, "grad_norm": 1.7309480905532837, "learning_rate": 1.9157933409384993e-06, "loss": 0.3225, "step": 13420 }, { "epoch": 2.223141863929813, "grad_norm": 2.736997127532959, "learning_rate": 1.908218722535582e-06, "loss": 0.3144, "step": 13430 }, { "epoch": 2.2247972190034764, "grad_norm": 1.6784467697143555, "learning_rate": 1.9006555751108001e-06, "loss": 0.34, "step": 13440 }, { "epoch": 2.2264525740771397, "grad_norm": 2.2146949768066406, "learning_rate": 1.8931039267245803e-06, "loss": 0.3752, "step": 13450 }, { "epoch": 2.228107929150803, "grad_norm": 1.708247423171997, "learning_rate": 1.8855638053946823e-06, "loss": 0.3439, "step": 13460 }, { "epoch": 2.2297632842244663, "grad_norm": 2.3849120140075684, "learning_rate": 1.8780352390961042e-06, "loss": 0.3661, "step": 13470 }, { "epoch": 2.2314186392981297, "grad_norm": 2.215123176574707, "learning_rate": 1.8705182557609714e-06, "loss": 0.3341, "step": 13480 }, { "epoch": 2.2330739943717925, "grad_norm": 3.159491777420044, "learning_rate": 1.8630128832784323e-06, "loss": 0.3365, "step": 13490 }, { "epoch": 2.234729349445456, "grad_norm": 1.2012838125228882, "learning_rate": 1.8555191494945586e-06, "loss": 0.3193, "step": 13500 }, { "epoch": 2.236384704519119, "grad_norm": 2.053283452987671, "learning_rate": 1.8480370822122412e-06, "loss": 0.3612, "step": 13510 }, { "epoch": 2.2380400595927825, "grad_norm": 2.0240302085876465, "learning_rate": 1.8405667091910845e-06, "loss": 0.3702, "step": 13520 }, { "epoch": 2.239695414666446, "grad_norm": 2.051102638244629, "learning_rate": 1.83310805814731e-06, "loss": 0.3525, "step": 13530 }, { "epoch": 2.241350769740109, "grad_norm": 2.752277135848999, "learning_rate": 1.8256611567536442e-06, "loss": 0.3599, "step": 13540 }, { "epoch": 2.2430061248137725, "grad_norm": 1.6095753908157349, "learning_rate": 1.8182260326392208e-06, "loss": 0.2926, "step": 13550 }, { "epoch": 2.244661479887436, "grad_norm": 2.0322036743164062, "learning_rate": 1.8108027133894828e-06, "loss": 0.3475, "step": 13560 }, { "epoch": 2.246316834961099, "grad_norm": 1.9392104148864746, "learning_rate": 1.8033912265460695e-06, "loss": 0.3489, "step": 13570 }, { "epoch": 2.2479721900347625, "grad_norm": 1.6775577068328857, "learning_rate": 1.7959915996067256e-06, "loss": 0.3406, "step": 13580 }, { "epoch": 2.2496275451084258, "grad_norm": 1.9670125246047974, "learning_rate": 1.7886038600251888e-06, "loss": 0.3347, "step": 13590 }, { "epoch": 2.251282900182089, "grad_norm": 2.1814675331115723, "learning_rate": 1.7812280352110956e-06, "loss": 0.3371, "step": 13600 }, { "epoch": 2.2529382552557524, "grad_norm": 1.8385521173477173, "learning_rate": 1.7738641525298766e-06, "loss": 0.3531, "step": 13610 }, { "epoch": 2.2545936103294157, "grad_norm": 1.961987018585205, "learning_rate": 1.7665122393026523e-06, "loss": 0.3291, "step": 13620 }, { "epoch": 2.256248965403079, "grad_norm": 2.28271746635437, "learning_rate": 1.759172322806142e-06, "loss": 0.3615, "step": 13630 }, { "epoch": 2.2579043204767424, "grad_norm": 1.908655047416687, "learning_rate": 1.7518444302725467e-06, "loss": 0.3458, "step": 13640 }, { "epoch": 2.2595596755504057, "grad_norm": 2.255382776260376, "learning_rate": 1.7445285888894641e-06, "loss": 0.3442, "step": 13650 }, { "epoch": 2.261215030624069, "grad_norm": 1.4033169746398926, "learning_rate": 1.7372248257997753e-06, "loss": 0.3455, "step": 13660 }, { "epoch": 2.2628703856977324, "grad_norm": 2.0262038707733154, "learning_rate": 1.7299331681015508e-06, "loss": 0.3265, "step": 13670 }, { "epoch": 2.2645257407713952, "grad_norm": 2.379423141479492, "learning_rate": 1.722653642847948e-06, "loss": 0.3533, "step": 13680 }, { "epoch": 2.2661810958450586, "grad_norm": 1.8121516704559326, "learning_rate": 1.7153862770471096e-06, "loss": 0.3274, "step": 13690 }, { "epoch": 2.267836450918722, "grad_norm": 2.4560039043426514, "learning_rate": 1.7081310976620696e-06, "loss": 0.3399, "step": 13700 }, { "epoch": 2.269491805992385, "grad_norm": 1.8034098148345947, "learning_rate": 1.700888131610643e-06, "loss": 0.3056, "step": 13710 }, { "epoch": 2.2711471610660485, "grad_norm": 1.6858197450637817, "learning_rate": 1.6936574057653366e-06, "loss": 0.3334, "step": 13720 }, { "epoch": 2.272802516139712, "grad_norm": 1.8124359846115112, "learning_rate": 1.6864389469532393e-06, "loss": 0.3583, "step": 13730 }, { "epoch": 2.274457871213375, "grad_norm": 1.7046393156051636, "learning_rate": 1.6792327819559313e-06, "loss": 0.332, "step": 13740 }, { "epoch": 2.2761132262870385, "grad_norm": 2.347473382949829, "learning_rate": 1.672038937509376e-06, "loss": 0.4029, "step": 13750 }, { "epoch": 2.277768581360702, "grad_norm": 1.9951478242874146, "learning_rate": 1.6648574403038325e-06, "loss": 0.3345, "step": 13760 }, { "epoch": 2.279423936434365, "grad_norm": 2.5333080291748047, "learning_rate": 1.657688316983746e-06, "loss": 0.3114, "step": 13770 }, { "epoch": 2.2810792915080285, "grad_norm": 1.9064871072769165, "learning_rate": 1.6505315941476507e-06, "loss": 0.3244, "step": 13780 }, { "epoch": 2.282734646581692, "grad_norm": 1.6432058811187744, "learning_rate": 1.6433872983480758e-06, "loss": 0.3372, "step": 13790 }, { "epoch": 2.284390001655355, "grad_norm": 1.8521987199783325, "learning_rate": 1.636255456091444e-06, "loss": 0.3509, "step": 13800 }, { "epoch": 2.2860453567290184, "grad_norm": 1.8956800699234009, "learning_rate": 1.6291360938379752e-06, "loss": 0.3816, "step": 13810 }, { "epoch": 2.2877007118026818, "grad_norm": 2.190884828567505, "learning_rate": 1.622029238001584e-06, "loss": 0.3157, "step": 13820 }, { "epoch": 2.289356066876345, "grad_norm": 2.1423823833465576, "learning_rate": 1.6149349149497833e-06, "loss": 0.2804, "step": 13830 }, { "epoch": 2.2910114219500084, "grad_norm": 2.221036672592163, "learning_rate": 1.607853151003591e-06, "loss": 0.3681, "step": 13840 }, { "epoch": 2.2926667770236717, "grad_norm": 2.1888656616210938, "learning_rate": 1.6007839724374253e-06, "loss": 0.3536, "step": 13850 }, { "epoch": 2.294322132097335, "grad_norm": 2.2025184631347656, "learning_rate": 1.593727405479012e-06, "loss": 0.3667, "step": 13860 }, { "epoch": 2.2959774871709984, "grad_norm": 1.9448472261428833, "learning_rate": 1.5866834763092885e-06, "loss": 0.3342, "step": 13870 }, { "epoch": 2.2976328422446617, "grad_norm": 2.2824110984802246, "learning_rate": 1.5796522110623002e-06, "loss": 0.3581, "step": 13880 }, { "epoch": 2.299288197318325, "grad_norm": 2.112283706665039, "learning_rate": 1.5726336358251104e-06, "loss": 0.3352, "step": 13890 }, { "epoch": 2.300943552391988, "grad_norm": 2.393695592880249, "learning_rate": 1.5656277766376992e-06, "loss": 0.3287, "step": 13900 }, { "epoch": 2.302598907465651, "grad_norm": 2.341130018234253, "learning_rate": 1.5586346594928675e-06, "loss": 0.3384, "step": 13910 }, { "epoch": 2.3042542625393145, "grad_norm": 2.0110530853271484, "learning_rate": 1.5516543103361403e-06, "loss": 0.3276, "step": 13920 }, { "epoch": 2.305909617612978, "grad_norm": 3.0257723331451416, "learning_rate": 1.544686755065677e-06, "loss": 0.3544, "step": 13930 }, { "epoch": 2.307564972686641, "grad_norm": 1.825762391090393, "learning_rate": 1.5377320195321642e-06, "loss": 0.3448, "step": 13940 }, { "epoch": 2.3092203277603045, "grad_norm": 1.9047185182571411, "learning_rate": 1.5307901295387268e-06, "loss": 0.3239, "step": 13950 }, { "epoch": 2.310875682833968, "grad_norm": 2.2745182514190674, "learning_rate": 1.5238611108408292e-06, "loss": 0.3447, "step": 13960 }, { "epoch": 2.312531037907631, "grad_norm": 2.037247896194458, "learning_rate": 1.5169449891461867e-06, "loss": 0.3188, "step": 13970 }, { "epoch": 2.3141863929812945, "grad_norm": 2.3445773124694824, "learning_rate": 1.5100417901146585e-06, "loss": 0.3335, "step": 13980 }, { "epoch": 2.315841748054958, "grad_norm": 2.1629180908203125, "learning_rate": 1.5031515393581642e-06, "loss": 0.3525, "step": 13990 }, { "epoch": 2.317497103128621, "grad_norm": 2.212402820587158, "learning_rate": 1.496274262440579e-06, "loss": 0.3469, "step": 14000 }, { "epoch": 2.3191524582022844, "grad_norm": 2.41933012008667, "learning_rate": 1.4894099848776444e-06, "loss": 0.3755, "step": 14010 }, { "epoch": 2.3208078132759478, "grad_norm": 1.9362802505493164, "learning_rate": 1.482558732136874e-06, "loss": 0.3416, "step": 14020 }, { "epoch": 2.322463168349611, "grad_norm": 1.8047678470611572, "learning_rate": 1.4757205296374532e-06, "loss": 0.3323, "step": 14030 }, { "epoch": 2.3241185234232744, "grad_norm": 1.8480398654937744, "learning_rate": 1.4688954027501545e-06, "loss": 0.3112, "step": 14040 }, { "epoch": 2.3257738784969377, "grad_norm": 2.211897611618042, "learning_rate": 1.4620833767972365e-06, "loss": 0.3252, "step": 14050 }, { "epoch": 2.327429233570601, "grad_norm": 2.3595709800720215, "learning_rate": 1.4552844770523477e-06, "loss": 0.336, "step": 14060 }, { "epoch": 2.3290845886442644, "grad_norm": 3.3694913387298584, "learning_rate": 1.4484987287404407e-06, "loss": 0.3253, "step": 14070 }, { "epoch": 2.3307399437179273, "grad_norm": 1.8503248691558838, "learning_rate": 1.4417261570376701e-06, "loss": 0.3293, "step": 14080 }, { "epoch": 2.3323952987915906, "grad_norm": 2.116312265396118, "learning_rate": 1.4349667870713057e-06, "loss": 0.3472, "step": 14090 }, { "epoch": 2.334050653865254, "grad_norm": 2.352787971496582, "learning_rate": 1.4282206439196395e-06, "loss": 0.3609, "step": 14100 }, { "epoch": 2.335706008938917, "grad_norm": 1.6080447435379028, "learning_rate": 1.4214877526118853e-06, "loss": 0.3355, "step": 14110 }, { "epoch": 2.3373613640125805, "grad_norm": 2.383387565612793, "learning_rate": 1.4147681381280909e-06, "loss": 0.3439, "step": 14120 }, { "epoch": 2.339016719086244, "grad_norm": 2.3709840774536133, "learning_rate": 1.4080618253990502e-06, "loss": 0.3425, "step": 14130 }, { "epoch": 2.340672074159907, "grad_norm": 2.1909902095794678, "learning_rate": 1.4013688393062003e-06, "loss": 0.3428, "step": 14140 }, { "epoch": 2.3423274292335705, "grad_norm": 1.8654948472976685, "learning_rate": 1.3946892046815341e-06, "loss": 0.3237, "step": 14150 }, { "epoch": 2.343982784307234, "grad_norm": 1.9633307456970215, "learning_rate": 1.3880229463075146e-06, "loss": 0.3227, "step": 14160 }, { "epoch": 2.345638139380897, "grad_norm": 2.1636171340942383, "learning_rate": 1.3813700889169707e-06, "loss": 0.3633, "step": 14170 }, { "epoch": 2.3472934944545605, "grad_norm": 2.097670555114746, "learning_rate": 1.3747306571930141e-06, "loss": 0.363, "step": 14180 }, { "epoch": 2.348948849528224, "grad_norm": 2.1034417152404785, "learning_rate": 1.3681046757689448e-06, "loss": 0.3479, "step": 14190 }, { "epoch": 2.350604204601887, "grad_norm": 1.7305580377578735, "learning_rate": 1.3614921692281586e-06, "loss": 0.3351, "step": 14200 }, { "epoch": 2.3522595596755504, "grad_norm": 2.346315860748291, "learning_rate": 1.3548931621040607e-06, "loss": 0.382, "step": 14210 }, { "epoch": 2.3539149147492138, "grad_norm": 2.112192392349243, "learning_rate": 1.3483076788799715e-06, "loss": 0.3498, "step": 14220 }, { "epoch": 2.355570269822877, "grad_norm": 2.4844164848327637, "learning_rate": 1.3417357439890323e-06, "loss": 0.359, "step": 14230 }, { "epoch": 2.3572256248965404, "grad_norm": 2.170315742492676, "learning_rate": 1.33517738181412e-06, "loss": 0.3237, "step": 14240 }, { "epoch": 2.3588809799702037, "grad_norm": 1.9840915203094482, "learning_rate": 1.328632616687754e-06, "loss": 0.3087, "step": 14250 }, { "epoch": 2.360536335043867, "grad_norm": 2.098491907119751, "learning_rate": 1.3221014728920056e-06, "loss": 0.3542, "step": 14260 }, { "epoch": 2.3621916901175304, "grad_norm": 2.413897752761841, "learning_rate": 1.3155839746584138e-06, "loss": 0.3359, "step": 14270 }, { "epoch": 2.3638470451911937, "grad_norm": 2.1317267417907715, "learning_rate": 1.3090801461678848e-06, "loss": 0.3045, "step": 14280 }, { "epoch": 2.365502400264857, "grad_norm": 1.6186710596084595, "learning_rate": 1.3025900115506086e-06, "loss": 0.347, "step": 14290 }, { "epoch": 2.3671577553385204, "grad_norm": 2.192939519882202, "learning_rate": 1.2961135948859737e-06, "loss": 0.4005, "step": 14300 }, { "epoch": 2.3688131104121832, "grad_norm": 2.0540459156036377, "learning_rate": 1.2896509202024682e-06, "loss": 0.3477, "step": 14310 }, { "epoch": 2.3704684654858466, "grad_norm": 2.3481903076171875, "learning_rate": 1.2832020114775951e-06, "loss": 0.3254, "step": 14320 }, { "epoch": 2.37212382055951, "grad_norm": 1.9363404512405396, "learning_rate": 1.2767668926377885e-06, "loss": 0.3361, "step": 14330 }, { "epoch": 2.373779175633173, "grad_norm": 2.1417112350463867, "learning_rate": 1.2703455875583148e-06, "loss": 0.3502, "step": 14340 }, { "epoch": 2.3754345307068365, "grad_norm": 2.0085740089416504, "learning_rate": 1.263938120063191e-06, "loss": 0.3358, "step": 14350 }, { "epoch": 2.3770898857805, "grad_norm": 1.862365961074829, "learning_rate": 1.2575445139250936e-06, "loss": 0.3062, "step": 14360 }, { "epoch": 2.378745240854163, "grad_norm": 1.9572957754135132, "learning_rate": 1.2511647928652754e-06, "loss": 0.3038, "step": 14370 }, { "epoch": 2.3804005959278265, "grad_norm": 2.7064197063446045, "learning_rate": 1.2447989805534677e-06, "loss": 0.3101, "step": 14380 }, { "epoch": 2.38205595100149, "grad_norm": 2.0222458839416504, "learning_rate": 1.2384471006078036e-06, "loss": 0.3894, "step": 14390 }, { "epoch": 2.383711306075153, "grad_norm": 1.8793877363204956, "learning_rate": 1.2321091765947214e-06, "loss": 0.2965, "step": 14400 }, { "epoch": 2.3853666611488165, "grad_norm": 1.7460297346115112, "learning_rate": 1.2257852320288815e-06, "loss": 0.3337, "step": 14410 }, { "epoch": 2.38702201622248, "grad_norm": 2.319016218185425, "learning_rate": 1.219475290373079e-06, "loss": 0.3176, "step": 14420 }, { "epoch": 2.388677371296143, "grad_norm": 2.06717848777771, "learning_rate": 1.2131793750381554e-06, "loss": 0.3919, "step": 14430 }, { "epoch": 2.3903327263698064, "grad_norm": 2.179219961166382, "learning_rate": 1.2068975093829123e-06, "loss": 0.3412, "step": 14440 }, { "epoch": 2.3919880814434697, "grad_norm": 1.629726767539978, "learning_rate": 1.2006297167140257e-06, "loss": 0.307, "step": 14450 }, { "epoch": 2.393643436517133, "grad_norm": 2.2001848220825195, "learning_rate": 1.1943760202859606e-06, "loss": 0.3739, "step": 14460 }, { "epoch": 2.3952987915907964, "grad_norm": 1.8462634086608887, "learning_rate": 1.188136443300879e-06, "loss": 0.3466, "step": 14470 }, { "epoch": 2.3969541466644597, "grad_norm": 1.7766656875610352, "learning_rate": 1.1819110089085595e-06, "loss": 0.3227, "step": 14480 }, { "epoch": 2.3986095017381226, "grad_norm": 2.0982725620269775, "learning_rate": 1.1756997402063069e-06, "loss": 0.3111, "step": 14490 }, { "epoch": 2.400264856811786, "grad_norm": 2.340324878692627, "learning_rate": 1.1695026602388755e-06, "loss": 0.3458, "step": 14500 }, { "epoch": 2.4019202118854492, "grad_norm": 2.2285056114196777, "learning_rate": 1.1633197919983707e-06, "loss": 0.3496, "step": 14510 }, { "epoch": 2.4035755669591126, "grad_norm": 1.9656524658203125, "learning_rate": 1.1571511584241735e-06, "loss": 0.3217, "step": 14520 }, { "epoch": 2.405230922032776, "grad_norm": 2.775507688522339, "learning_rate": 1.1509967824028496e-06, "loss": 0.3409, "step": 14530 }, { "epoch": 2.406886277106439, "grad_norm": 2.0390307903289795, "learning_rate": 1.1448566867680715e-06, "loss": 0.3302, "step": 14540 }, { "epoch": 2.4085416321801025, "grad_norm": 2.3175621032714844, "learning_rate": 1.1387308943005248e-06, "loss": 0.3494, "step": 14550 }, { "epoch": 2.410196987253766, "grad_norm": 2.031985282897949, "learning_rate": 1.132619427727832e-06, "loss": 0.3195, "step": 14560 }, { "epoch": 2.411852342327429, "grad_norm": 1.8072073459625244, "learning_rate": 1.1265223097244604e-06, "loss": 0.3405, "step": 14570 }, { "epoch": 2.4135076974010925, "grad_norm": 2.3952126502990723, "learning_rate": 1.1204395629116445e-06, "loss": 0.3388, "step": 14580 }, { "epoch": 2.415163052474756, "grad_norm": 2.0482804775238037, "learning_rate": 1.114371209857299e-06, "loss": 0.337, "step": 14590 }, { "epoch": 2.416818407548419, "grad_norm": 2.1736388206481934, "learning_rate": 1.1083172730759351e-06, "loss": 0.3629, "step": 14600 }, { "epoch": 2.4184737626220825, "grad_norm": 2.3148396015167236, "learning_rate": 1.1022777750285767e-06, "loss": 0.3523, "step": 14610 }, { "epoch": 2.420129117695746, "grad_norm": 2.455919027328491, "learning_rate": 1.0962527381226795e-06, "loss": 0.3413, "step": 14620 }, { "epoch": 2.421784472769409, "grad_norm": 2.373774766921997, "learning_rate": 1.0902421847120482e-06, "loss": 0.3068, "step": 14630 }, { "epoch": 2.4234398278430724, "grad_norm": 2.160219669342041, "learning_rate": 1.084246137096746e-06, "loss": 0.345, "step": 14640 }, { "epoch": 2.4250951829167358, "grad_norm": 1.6808048486709595, "learning_rate": 1.0782646175230217e-06, "loss": 0.3279, "step": 14650 }, { "epoch": 2.426750537990399, "grad_norm": 1.472030520439148, "learning_rate": 1.0722976481832205e-06, "loss": 0.3158, "step": 14660 }, { "epoch": 2.4284058930640624, "grad_norm": 1.8282170295715332, "learning_rate": 1.0663452512157035e-06, "loss": 0.323, "step": 14670 }, { "epoch": 2.4300612481377257, "grad_norm": 1.9233111143112183, "learning_rate": 1.0604074487047705e-06, "loss": 0.3741, "step": 14680 }, { "epoch": 2.431716603211389, "grad_norm": 1.9156076908111572, "learning_rate": 1.0544842626805684e-06, "loss": 0.3296, "step": 14690 }, { "epoch": 2.4333719582850524, "grad_norm": 2.5253238677978516, "learning_rate": 1.0485757151190195e-06, "loss": 0.3149, "step": 14700 }, { "epoch": 2.4350273133587153, "grad_norm": 1.5516034364700317, "learning_rate": 1.0426818279417306e-06, "loss": 0.3297, "step": 14710 }, { "epoch": 2.4366826684323786, "grad_norm": 1.9635651111602783, "learning_rate": 1.036802623015918e-06, "loss": 0.3331, "step": 14720 }, { "epoch": 2.438338023506042, "grad_norm": 1.9763412475585938, "learning_rate": 1.0309381221543268e-06, "loss": 0.3308, "step": 14730 }, { "epoch": 2.439993378579705, "grad_norm": 1.942713975906372, "learning_rate": 1.0250883471151451e-06, "loss": 0.3429, "step": 14740 }, { "epoch": 2.4416487336533685, "grad_norm": 2.4745335578918457, "learning_rate": 1.0192533196019267e-06, "loss": 0.3287, "step": 14750 }, { "epoch": 2.443304088727032, "grad_norm": 2.276266098022461, "learning_rate": 1.0134330612635101e-06, "loss": 0.3187, "step": 14760 }, { "epoch": 2.444959443800695, "grad_norm": 1.9105377197265625, "learning_rate": 1.0076275936939357e-06, "loss": 0.2986, "step": 14770 }, { "epoch": 2.4466147988743585, "grad_norm": 2.1453704833984375, "learning_rate": 1.0018369384323722e-06, "loss": 0.3588, "step": 14780 }, { "epoch": 2.448270153948022, "grad_norm": 2.0307068824768066, "learning_rate": 9.960611169630308e-07, "loss": 0.3284, "step": 14790 }, { "epoch": 2.449925509021685, "grad_norm": 2.2573883533477783, "learning_rate": 9.90300150715085e-07, "loss": 0.3408, "step": 14800 }, { "epoch": 2.4515808640953485, "grad_norm": 2.224506378173828, "learning_rate": 9.845540610625953e-07, "loss": 0.338, "step": 14810 }, { "epoch": 2.453236219169012, "grad_norm": 2.1470425128936768, "learning_rate": 9.788228693244266e-07, "loss": 0.3116, "step": 14820 }, { "epoch": 2.454891574242675, "grad_norm": 1.8685119152069092, "learning_rate": 9.731065967641712e-07, "loss": 0.3046, "step": 14830 }, { "epoch": 2.4565469293163384, "grad_norm": 2.515958070755005, "learning_rate": 9.67405264590066e-07, "loss": 0.3251, "step": 14840 }, { "epoch": 2.4582022843900018, "grad_norm": 2.4196338653564453, "learning_rate": 9.617188939549232e-07, "loss": 0.318, "step": 14850 }, { "epoch": 2.459857639463665, "grad_norm": 2.0046846866607666, "learning_rate": 9.560475059560388e-07, "loss": 0.325, "step": 14860 }, { "epoch": 2.4615129945373284, "grad_norm": 2.182124376296997, "learning_rate": 9.503911216351252e-07, "loss": 0.3061, "step": 14870 }, { "epoch": 2.4631683496109917, "grad_norm": 2.041778087615967, "learning_rate": 9.447497619782269e-07, "loss": 0.31, "step": 14880 }, { "epoch": 2.4648237046846546, "grad_norm": 2.510472297668457, "learning_rate": 9.391234479156452e-07, "loss": 0.3278, "step": 14890 }, { "epoch": 2.466479059758318, "grad_norm": 1.6733882427215576, "learning_rate": 9.335122003218583e-07, "loss": 0.3061, "step": 14900 }, { "epoch": 2.4681344148319813, "grad_norm": 1.9581342935562134, "learning_rate": 9.279160400154497e-07, "loss": 0.3268, "step": 14910 }, { "epoch": 2.4697897699056446, "grad_norm": 2.368661642074585, "learning_rate": 9.223349877590226e-07, "loss": 0.3642, "step": 14920 }, { "epoch": 2.471445124979308, "grad_norm": 1.5843191146850586, "learning_rate": 9.167690642591287e-07, "loss": 0.3566, "step": 14930 }, { "epoch": 2.4731004800529712, "grad_norm": 1.8079990148544312, "learning_rate": 9.112182901661881e-07, "loss": 0.3455, "step": 14940 }, { "epoch": 2.4747558351266346, "grad_norm": 2.184272289276123, "learning_rate": 9.056826860744178e-07, "loss": 0.3407, "step": 14950 }, { "epoch": 2.476411190200298, "grad_norm": 1.532046914100647, "learning_rate": 9.001622725217495e-07, "loss": 0.31, "step": 14960 }, { "epoch": 2.478066545273961, "grad_norm": 2.5433170795440674, "learning_rate": 8.946570699897566e-07, "loss": 0.3267, "step": 14970 }, { "epoch": 2.4797219003476245, "grad_norm": 2.535883903503418, "learning_rate": 8.89167098903575e-07, "loss": 0.3047, "step": 14980 }, { "epoch": 2.481377255421288, "grad_norm": 1.7732888460159302, "learning_rate": 8.836923796318325e-07, "loss": 0.3173, "step": 14990 }, { "epoch": 2.483032610494951, "grad_norm": 2.467151403427124, "learning_rate": 8.78232932486568e-07, "loss": 0.3411, "step": 15000 }, { "epoch": 2.4846879655686145, "grad_norm": 2.1632003784179688, "learning_rate": 8.727887777231591e-07, "loss": 0.3038, "step": 15010 }, { "epoch": 2.486343320642278, "grad_norm": 2.1071043014526367, "learning_rate": 8.673599355402496e-07, "loss": 0.3386, "step": 15020 }, { "epoch": 2.487998675715941, "grad_norm": 1.7851759195327759, "learning_rate": 8.619464260796651e-07, "loss": 0.3284, "step": 15030 }, { "epoch": 2.4896540307896045, "grad_norm": 2.402301788330078, "learning_rate": 8.565482694263516e-07, "loss": 0.3356, "step": 15040 }, { "epoch": 2.4913093858632678, "grad_norm": 1.905591368675232, "learning_rate": 8.51165485608288e-07, "loss": 0.303, "step": 15050 }, { "epoch": 2.492964740936931, "grad_norm": 2.015292167663574, "learning_rate": 8.4579809459642e-07, "loss": 0.3312, "step": 15060 }, { "epoch": 2.4946200960105944, "grad_norm": 2.36039662361145, "learning_rate": 8.404461163045829e-07, "loss": 0.3066, "step": 15070 }, { "epoch": 2.4962754510842577, "grad_norm": 2.305978536605835, "learning_rate": 8.351095705894308e-07, "loss": 0.3414, "step": 15080 }, { "epoch": 2.497930806157921, "grad_norm": 2.039421558380127, "learning_rate": 8.297884772503578e-07, "loss": 0.3564, "step": 15090 }, { "epoch": 2.4995861612315844, "grad_norm": 2.0474462509155273, "learning_rate": 8.244828560294272e-07, "loss": 0.3178, "step": 15100 }, { "epoch": 2.5012415163052477, "grad_norm": 2.152980089187622, "learning_rate": 8.19192726611302e-07, "loss": 0.3511, "step": 15110 }, { "epoch": 2.502896871378911, "grad_norm": 2.019902467727661, "learning_rate": 8.139181086231651e-07, "loss": 0.3401, "step": 15120 }, { "epoch": 2.504552226452574, "grad_norm": 2.122307777404785, "learning_rate": 8.086590216346479e-07, "loss": 0.3577, "step": 15130 }, { "epoch": 2.5062075815262372, "grad_norm": 2.2243058681488037, "learning_rate": 8.034154851577658e-07, "loss": 0.361, "step": 15140 }, { "epoch": 2.5078629365999006, "grad_norm": 2.078754425048828, "learning_rate": 7.981875186468335e-07, "loss": 0.3345, "step": 15150 }, { "epoch": 2.509518291673564, "grad_norm": 2.2314717769622803, "learning_rate": 7.929751414984011e-07, "loss": 0.3215, "step": 15160 }, { "epoch": 2.511173646747227, "grad_norm": 2.4789726734161377, "learning_rate": 7.87778373051179e-07, "loss": 0.3323, "step": 15170 }, { "epoch": 2.5128290018208905, "grad_norm": 1.7479615211486816, "learning_rate": 7.825972325859671e-07, "loss": 0.3372, "step": 15180 }, { "epoch": 2.514484356894554, "grad_norm": 2.23327374458313, "learning_rate": 7.774317393255837e-07, "loss": 0.3448, "step": 15190 }, { "epoch": 2.516139711968217, "grad_norm": 1.7538516521453857, "learning_rate": 7.722819124347952e-07, "loss": 0.3118, "step": 15200 }, { "epoch": 2.5177950670418805, "grad_norm": 2.249854803085327, "learning_rate": 7.671477710202407e-07, "loss": 0.3393, "step": 15210 }, { "epoch": 2.519450422115544, "grad_norm": 1.923726201057434, "learning_rate": 7.620293341303636e-07, "loss": 0.3565, "step": 15220 }, { "epoch": 2.521105777189207, "grad_norm": 2.8457915782928467, "learning_rate": 7.569266207553427e-07, "loss": 0.3383, "step": 15230 }, { "epoch": 2.5227611322628705, "grad_norm": 2.412850856781006, "learning_rate": 7.518396498270191e-07, "loss": 0.3066, "step": 15240 }, { "epoch": 2.524416487336534, "grad_norm": 2.3531320095062256, "learning_rate": 7.467684402188291e-07, "loss": 0.3237, "step": 15250 }, { "epoch": 2.526071842410197, "grad_norm": 1.6720978021621704, "learning_rate": 7.417130107457293e-07, "loss": 0.3458, "step": 15260 }, { "epoch": 2.5277271974838604, "grad_norm": 2.2779157161712646, "learning_rate": 7.366733801641302e-07, "loss": 0.3091, "step": 15270 }, { "epoch": 2.5293825525575233, "grad_norm": 3.225557804107666, "learning_rate": 7.316495671718293e-07, "loss": 0.3412, "step": 15280 }, { "epoch": 2.5310379076311866, "grad_norm": 2.389375925064087, "learning_rate": 7.266415904079338e-07, "loss": 0.3549, "step": 15290 }, { "epoch": 2.53269326270485, "grad_norm": 2.0392401218414307, "learning_rate": 7.216494684527975e-07, "loss": 0.3286, "step": 15300 }, { "epoch": 2.5343486177785133, "grad_norm": 1.929891586303711, "learning_rate": 7.166732198279535e-07, "loss": 0.3015, "step": 15310 }, { "epoch": 2.5360039728521766, "grad_norm": 1.5978699922561646, "learning_rate": 7.11712862996039e-07, "loss": 0.3161, "step": 15320 }, { "epoch": 2.53765932792584, "grad_norm": 2.1672823429107666, "learning_rate": 7.067684163607308e-07, "loss": 0.3129, "step": 15330 }, { "epoch": 2.5393146829995032, "grad_norm": 1.8573296070098877, "learning_rate": 7.018398982666757e-07, "loss": 0.3553, "step": 15340 }, { "epoch": 2.5409700380731666, "grad_norm": 1.951036810874939, "learning_rate": 6.96927326999427e-07, "loss": 0.3444, "step": 15350 }, { "epoch": 2.54262539314683, "grad_norm": 2.200702428817749, "learning_rate": 6.920307207853683e-07, "loss": 0.3321, "step": 15360 }, { "epoch": 2.544280748220493, "grad_norm": 1.9019625186920166, "learning_rate": 6.87150097791654e-07, "loss": 0.3775, "step": 15370 }, { "epoch": 2.5459361032941565, "grad_norm": 1.8351606130599976, "learning_rate": 6.822854761261355e-07, "loss": 0.3357, "step": 15380 }, { "epoch": 2.54759145836782, "grad_norm": 2.076394557952881, "learning_rate": 6.774368738372988e-07, "loss": 0.3164, "step": 15390 }, { "epoch": 2.549246813441483, "grad_norm": 2.102830171585083, "learning_rate": 6.726043089141943e-07, "loss": 0.3377, "step": 15400 }, { "epoch": 2.5509021685151465, "grad_norm": 2.1659622192382812, "learning_rate": 6.677877992863707e-07, "loss": 0.3157, "step": 15410 }, { "epoch": 2.55255752358881, "grad_norm": 2.069488525390625, "learning_rate": 6.629873628238126e-07, "loss": 0.3388, "step": 15420 }, { "epoch": 2.554212878662473, "grad_norm": 2.4191696643829346, "learning_rate": 6.582030173368664e-07, "loss": 0.3583, "step": 15430 }, { "epoch": 2.5558682337361365, "grad_norm": 2.1078972816467285, "learning_rate": 6.534347805761826e-07, "loss": 0.3732, "step": 15440 }, { "epoch": 2.5575235888098, "grad_norm": 1.9522989988327026, "learning_rate": 6.486826702326426e-07, "loss": 0.289, "step": 15450 }, { "epoch": 2.559178943883463, "grad_norm": 1.9794495105743408, "learning_rate": 6.439467039372971e-07, "loss": 0.3604, "step": 15460 }, { "epoch": 2.5608342989571264, "grad_norm": 2.4447927474975586, "learning_rate": 6.392268992613005e-07, "loss": 0.3306, "step": 15470 }, { "epoch": 2.5624896540307898, "grad_norm": 2.0704166889190674, "learning_rate": 6.345232737158458e-07, "loss": 0.3003, "step": 15480 }, { "epoch": 2.564145009104453, "grad_norm": 2.33072566986084, "learning_rate": 6.298358447520985e-07, "loss": 0.3249, "step": 15490 }, { "epoch": 2.5658003641781164, "grad_norm": 2.0629100799560547, "learning_rate": 6.251646297611308e-07, "loss": 0.3381, "step": 15500 }, { "epoch": 2.5674557192517797, "grad_norm": 3.117828607559204, "learning_rate": 6.205096460738591e-07, "loss": 0.3267, "step": 15510 }, { "epoch": 2.569111074325443, "grad_norm": 1.9236739873886108, "learning_rate": 6.158709109609828e-07, "loss": 0.3281, "step": 15520 }, { "epoch": 2.5707664293991064, "grad_norm": 2.1610283851623535, "learning_rate": 6.112484416329107e-07, "loss": 0.3101, "step": 15530 }, { "epoch": 2.5724217844727693, "grad_norm": 1.9747469425201416, "learning_rate": 6.066422552397083e-07, "loss": 0.3243, "step": 15540 }, { "epoch": 2.5740771395464326, "grad_norm": 2.253056049346924, "learning_rate": 6.020523688710256e-07, "loss": 0.3359, "step": 15550 }, { "epoch": 2.575732494620096, "grad_norm": 2.041837215423584, "learning_rate": 5.974787995560389e-07, "loss": 0.302, "step": 15560 }, { "epoch": 2.5773878496937592, "grad_norm": 1.8018887042999268, "learning_rate": 5.92921564263384e-07, "loss": 0.36, "step": 15570 }, { "epoch": 2.5790432047674225, "grad_norm": 2.423647880554199, "learning_rate": 5.88380679901096e-07, "loss": 0.3439, "step": 15580 }, { "epoch": 2.580698559841086, "grad_norm": 2.334973096847534, "learning_rate": 5.838561633165474e-07, "loss": 0.2951, "step": 15590 }, { "epoch": 2.582353914914749, "grad_norm": 1.6279757022857666, "learning_rate": 5.793480312963789e-07, "loss": 0.3056, "step": 15600 }, { "epoch": 2.5840092699884125, "grad_norm": 1.6940557956695557, "learning_rate": 5.748563005664482e-07, "loss": 0.3447, "step": 15610 }, { "epoch": 2.585664625062076, "grad_norm": 2.257672071456909, "learning_rate": 5.703809877917571e-07, "loss": 0.3755, "step": 15620 }, { "epoch": 2.587319980135739, "grad_norm": 1.9907878637313843, "learning_rate": 5.659221095763955e-07, "loss": 0.3425, "step": 15630 }, { "epoch": 2.5889753352094025, "grad_norm": 1.69442617893219, "learning_rate": 5.614796824634783e-07, "loss": 0.332, "step": 15640 }, { "epoch": 2.590630690283066, "grad_norm": 1.8623377084732056, "learning_rate": 5.570537229350864e-07, "loss": 0.3538, "step": 15650 }, { "epoch": 2.592286045356729, "grad_norm": 2.767336845397949, "learning_rate": 5.526442474122013e-07, "loss": 0.3634, "step": 15660 }, { "epoch": 2.5939414004303925, "grad_norm": 2.1302411556243896, "learning_rate": 5.482512722546468e-07, "loss": 0.3413, "step": 15670 }, { "epoch": 2.5955967555040558, "grad_norm": 1.9488990306854248, "learning_rate": 5.438748137610267e-07, "loss": 0.3044, "step": 15680 }, { "epoch": 2.5972521105777187, "grad_norm": 1.6431246995925903, "learning_rate": 5.395148881686685e-07, "loss": 0.348, "step": 15690 }, { "epoch": 2.598907465651382, "grad_norm": 2.266380548477173, "learning_rate": 5.351715116535571e-07, "loss": 0.3123, "step": 15700 }, { "epoch": 2.6005628207250453, "grad_norm": 2.4466562271118164, "learning_rate": 5.308447003302808e-07, "loss": 0.3468, "step": 15710 }, { "epoch": 2.6022181757987086, "grad_norm": 2.20597505569458, "learning_rate": 5.265344702519654e-07, "loss": 0.3658, "step": 15720 }, { "epoch": 2.603873530872372, "grad_norm": 1.868127703666687, "learning_rate": 5.2224083741022e-07, "loss": 0.3256, "step": 15730 }, { "epoch": 2.6055288859460353, "grad_norm": 2.128237724304199, "learning_rate": 5.179638177350737e-07, "loss": 0.3305, "step": 15740 }, { "epoch": 2.6071842410196986, "grad_norm": 2.1606836318969727, "learning_rate": 5.137034270949182e-07, "loss": 0.3211, "step": 15750 }, { "epoch": 2.608839596093362, "grad_norm": 2.227559804916382, "learning_rate": 5.094596812964525e-07, "loss": 0.3505, "step": 15760 }, { "epoch": 2.6104949511670252, "grad_norm": 1.9581117630004883, "learning_rate": 5.052325960846155e-07, "loss": 0.3398, "step": 15770 }, { "epoch": 2.6121503062406886, "grad_norm": 2.039578914642334, "learning_rate": 5.010221871425375e-07, "loss": 0.3093, "step": 15780 }, { "epoch": 2.613805661314352, "grad_norm": 1.7403851747512817, "learning_rate": 4.968284700914744e-07, "loss": 0.3196, "step": 15790 }, { "epoch": 2.615461016388015, "grad_norm": 1.892404317855835, "learning_rate": 4.926514604907534e-07, "loss": 0.3266, "step": 15800 }, { "epoch": 2.6171163714616785, "grad_norm": 2.2020905017852783, "learning_rate": 4.88491173837713e-07, "loss": 0.3502, "step": 15810 }, { "epoch": 2.618771726535342, "grad_norm": 1.3924425840377808, "learning_rate": 4.843476255676516e-07, "loss": 0.3446, "step": 15820 }, { "epoch": 2.620427081609005, "grad_norm": 2.562934637069702, "learning_rate": 4.802208310537604e-07, "loss": 0.3755, "step": 15830 }, { "epoch": 2.6220824366826685, "grad_norm": 1.5193594694137573, "learning_rate": 4.7611080560707344e-07, "loss": 0.3136, "step": 15840 }, { "epoch": 2.623737791756332, "grad_norm": 2.2128007411956787, "learning_rate": 4.720175644764119e-07, "loss": 0.3413, "step": 15850 }, { "epoch": 2.625393146829995, "grad_norm": 1.999158263206482, "learning_rate": 4.6794112284831995e-07, "loss": 0.3565, "step": 15860 }, { "epoch": 2.6270485019036585, "grad_norm": 2.3605151176452637, "learning_rate": 4.63881495847015e-07, "loss": 0.317, "step": 15870 }, { "epoch": 2.628703856977322, "grad_norm": 2.1883292198181152, "learning_rate": 4.5983869853433174e-07, "loss": 0.317, "step": 15880 }, { "epoch": 2.630359212050985, "grad_norm": 2.075212240219116, "learning_rate": 4.558127459096612e-07, "loss": 0.308, "step": 15890 }, { "epoch": 2.6320145671246484, "grad_norm": 1.9252678155899048, "learning_rate": 4.51803652909899e-07, "loss": 0.3231, "step": 15900 }, { "epoch": 2.6336699221983118, "grad_norm": 2.443986415863037, "learning_rate": 4.4781143440938923e-07, "loss": 0.3437, "step": 15910 }, { "epoch": 2.635325277271975, "grad_norm": 1.7434769868850708, "learning_rate": 4.438361052198675e-07, "loss": 0.3152, "step": 15920 }, { "epoch": 2.6369806323456384, "grad_norm": 2.128065347671509, "learning_rate": 4.3987768009041033e-07, "loss": 0.3374, "step": 15930 }, { "epoch": 2.6386359874193013, "grad_norm": 1.744676113128662, "learning_rate": 4.3593617370737697e-07, "loss": 0.31, "step": 15940 }, { "epoch": 2.6402913424929646, "grad_norm": 1.8740226030349731, "learning_rate": 4.3201160069435367e-07, "loss": 0.3152, "step": 15950 }, { "epoch": 2.641946697566628, "grad_norm": 3.100207567214966, "learning_rate": 4.281039756121025e-07, "loss": 0.3397, "step": 15960 }, { "epoch": 2.6436020526402912, "grad_norm": 1.7563873529434204, "learning_rate": 4.242133129585063e-07, "loss": 0.3383, "step": 15970 }, { "epoch": 2.6452574077139546, "grad_norm": 2.2582356929779053, "learning_rate": 4.2033962716851396e-07, "loss": 0.3247, "step": 15980 }, { "epoch": 2.646912762787618, "grad_norm": 2.0863699913024902, "learning_rate": 4.164829326140873e-07, "loss": 0.335, "step": 15990 }, { "epoch": 2.648568117861281, "grad_norm": 2.127856969833374, "learning_rate": 4.126432436041511e-07, "loss": 0.3716, "step": 16000 }, { "epoch": 2.6502234729349445, "grad_norm": 1.7168405055999756, "learning_rate": 4.0882057438453305e-07, "loss": 0.3074, "step": 16010 }, { "epoch": 2.651878828008608, "grad_norm": 2.110229969024658, "learning_rate": 4.050149391379177e-07, "loss": 0.3661, "step": 16020 }, { "epoch": 2.653534183082271, "grad_norm": 1.760513186454773, "learning_rate": 4.0122635198378943e-07, "loss": 0.2615, "step": 16030 }, { "epoch": 2.6551895381559345, "grad_norm": 1.8273522853851318, "learning_rate": 3.974548269783807e-07, "loss": 0.3055, "step": 16040 }, { "epoch": 2.656844893229598, "grad_norm": 2.403547763824463, "learning_rate": 3.9370037811462424e-07, "loss": 0.341, "step": 16050 }, { "epoch": 2.658500248303261, "grad_norm": 2.5805859565734863, "learning_rate": 3.899630193220949e-07, "loss": 0.3347, "step": 16060 }, { "epoch": 2.6601556033769245, "grad_norm": 2.449556827545166, "learning_rate": 3.8624276446696086e-07, "loss": 0.3419, "step": 16070 }, { "epoch": 2.661810958450588, "grad_norm": 1.9614695310592651, "learning_rate": 3.825396273519322e-07, "loss": 0.3109, "step": 16080 }, { "epoch": 2.663466313524251, "grad_norm": 1.9693225622177124, "learning_rate": 3.78853621716212e-07, "loss": 0.3744, "step": 16090 }, { "epoch": 2.665121668597914, "grad_norm": 2.334308385848999, "learning_rate": 3.751847612354387e-07, "loss": 0.3278, "step": 16100 }, { "epoch": 2.6667770236715773, "grad_norm": 1.722469687461853, "learning_rate": 3.715330595216443e-07, "loss": 0.3293, "step": 16110 }, { "epoch": 2.6684323787452406, "grad_norm": 2.3163626194000244, "learning_rate": 3.678985301231952e-07, "loss": 0.3562, "step": 16120 }, { "epoch": 2.670087733818904, "grad_norm": 1.810492992401123, "learning_rate": 3.6428118652474807e-07, "loss": 0.3117, "step": 16130 }, { "epoch": 2.6717430888925673, "grad_norm": 1.643846869468689, "learning_rate": 3.606810421471973e-07, "loss": 0.3057, "step": 16140 }, { "epoch": 2.6733984439662306, "grad_norm": 2.1098129749298096, "learning_rate": 3.5709811034762456e-07, "loss": 0.3583, "step": 16150 }, { "epoch": 2.675053799039894, "grad_norm": 1.8769261837005615, "learning_rate": 3.535324044192506e-07, "loss": 0.3228, "step": 16160 }, { "epoch": 2.6767091541135573, "grad_norm": 2.2012782096862793, "learning_rate": 3.499839375913872e-07, "loss": 0.3166, "step": 16170 }, { "epoch": 2.6783645091872206, "grad_norm": 2.656381607055664, "learning_rate": 3.464527230293852e-07, "loss": 0.3546, "step": 16180 }, { "epoch": 2.680019864260884, "grad_norm": 2.1426846981048584, "learning_rate": 3.429387738345868e-07, "loss": 0.3262, "step": 16190 }, { "epoch": 2.6816752193345472, "grad_norm": 2.4014816284179688, "learning_rate": 3.3944210304427736e-07, "loss": 0.3479, "step": 16200 }, { "epoch": 2.6833305744082105, "grad_norm": 2.6231253147125244, "learning_rate": 3.3596272363163594e-07, "loss": 0.338, "step": 16210 }, { "epoch": 2.684985929481874, "grad_norm": 2.0805563926696777, "learning_rate": 3.325006485056881e-07, "loss": 0.318, "step": 16220 }, { "epoch": 2.686641284555537, "grad_norm": 1.7813467979431152, "learning_rate": 3.2905589051126065e-07, "loss": 0.336, "step": 16230 }, { "epoch": 2.6882966396292005, "grad_norm": 2.336962938308716, "learning_rate": 3.2562846242892744e-07, "loss": 0.3539, "step": 16240 }, { "epoch": 2.689951994702864, "grad_norm": 2.1561317443847656, "learning_rate": 3.2221837697496597e-07, "loss": 0.329, "step": 16250 }, { "epoch": 2.691607349776527, "grad_norm": 2.0386962890625, "learning_rate": 3.18825646801314e-07, "loss": 0.3483, "step": 16260 }, { "epoch": 2.6932627048501905, "grad_norm": 2.5452029705047607, "learning_rate": 3.1545028449551375e-07, "loss": 0.3314, "step": 16270 }, { "epoch": 2.694918059923854, "grad_norm": 1.8535023927688599, "learning_rate": 3.1209230258067324e-07, "loss": 0.3055, "step": 16280 }, { "epoch": 2.696573414997517, "grad_norm": 2.0997235774993896, "learning_rate": 3.0875171351541497e-07, "loss": 0.3775, "step": 16290 }, { "epoch": 2.6982287700711804, "grad_norm": 2.2901790142059326, "learning_rate": 3.0542852969383196e-07, "loss": 0.3575, "step": 16300 }, { "epoch": 2.6998841251448438, "grad_norm": 1.9664065837860107, "learning_rate": 3.021227634454399e-07, "loss": 0.2803, "step": 16310 }, { "epoch": 2.701539480218507, "grad_norm": 2.0761005878448486, "learning_rate": 2.988344270351351e-07, "loss": 0.3469, "step": 16320 }, { "epoch": 2.7031948352921704, "grad_norm": 1.9783636331558228, "learning_rate": 2.955635326631434e-07, "loss": 0.3665, "step": 16330 }, { "epoch": 2.7048501903658337, "grad_norm": 2.280743360519409, "learning_rate": 2.9231009246498077e-07, "loss": 0.3547, "step": 16340 }, { "epoch": 2.7065055454394966, "grad_norm": 3.1975841522216797, "learning_rate": 2.8907411851140487e-07, "loss": 0.3531, "step": 16350 }, { "epoch": 2.70816090051316, "grad_norm": 2.105062961578369, "learning_rate": 2.8585562280837033e-07, "loss": 0.3093, "step": 16360 }, { "epoch": 2.7098162555868233, "grad_norm": 2.0571696758270264, "learning_rate": 2.826546172969852e-07, "loss": 0.3689, "step": 16370 }, { "epoch": 2.7114716106604866, "grad_norm": 2.0750467777252197, "learning_rate": 2.794711138534656e-07, "loss": 0.3626, "step": 16380 }, { "epoch": 2.71312696573415, "grad_norm": 2.3040149211883545, "learning_rate": 2.7630512428909183e-07, "loss": 0.3472, "step": 16390 }, { "epoch": 2.7147823208078132, "grad_norm": 2.162661552429199, "learning_rate": 2.731566603501684e-07, "loss": 0.305, "step": 16400 }, { "epoch": 2.7164376758814766, "grad_norm": 2.7106826305389404, "learning_rate": 2.7002573371797347e-07, "loss": 0.3489, "step": 16410 }, { "epoch": 2.71809303095514, "grad_norm": 2.0429866313934326, "learning_rate": 2.6691235600872e-07, "loss": 0.3004, "step": 16420 }, { "epoch": 2.719748386028803, "grad_norm": 2.0341339111328125, "learning_rate": 2.638165387735131e-07, "loss": 0.3486, "step": 16430 }, { "epoch": 2.7214037411024665, "grad_norm": 2.3097574710845947, "learning_rate": 2.607382934983044e-07, "loss": 0.3774, "step": 16440 }, { "epoch": 2.72305909617613, "grad_norm": 2.3151650428771973, "learning_rate": 2.5767763160385095e-07, "loss": 0.3031, "step": 16450 }, { "epoch": 2.724714451249793, "grad_norm": 2.07060170173645, "learning_rate": 2.5463456444567436e-07, "loss": 0.3331, "step": 16460 }, { "epoch": 2.7263698063234565, "grad_norm": 1.5830529928207397, "learning_rate": 2.51609103314015e-07, "loss": 0.3127, "step": 16470 }, { "epoch": 2.72802516139712, "grad_norm": 2.311042547225952, "learning_rate": 2.486012594337939e-07, "loss": 0.3813, "step": 16480 }, { "epoch": 2.729680516470783, "grad_norm": 2.337822437286377, "learning_rate": 2.4561104396456815e-07, "loss": 0.3171, "step": 16490 }, { "epoch": 2.731335871544446, "grad_norm": 2.3123905658721924, "learning_rate": 2.426384680004917e-07, "loss": 0.3243, "step": 16500 }, { "epoch": 2.7329912266181093, "grad_norm": 2.2762632369995117, "learning_rate": 2.3968354257027205e-07, "loss": 0.3188, "step": 16510 }, { "epoch": 2.7346465816917727, "grad_norm": 2.021897315979004, "learning_rate": 2.3674627863713273e-07, "loss": 0.3599, "step": 16520 }, { "epoch": 2.736301936765436, "grad_norm": 1.663979172706604, "learning_rate": 2.3382668709876878e-07, "loss": 0.303, "step": 16530 }, { "epoch": 2.7379572918390993, "grad_norm": 2.365117311477661, "learning_rate": 2.3092477878730757e-07, "loss": 0.3329, "step": 16540 }, { "epoch": 2.7396126469127626, "grad_norm": 1.6717923879623413, "learning_rate": 2.280405644692696e-07, "loss": 0.3389, "step": 16550 }, { "epoch": 2.741268001986426, "grad_norm": 1.7539387941360474, "learning_rate": 2.2517405484552778e-07, "loss": 0.3362, "step": 16560 }, { "epoch": 2.7429233570600893, "grad_norm": 2.190721273422241, "learning_rate": 2.2232526055126992e-07, "loss": 0.3732, "step": 16570 }, { "epoch": 2.7445787121337526, "grad_norm": 1.689123272895813, "learning_rate": 2.1949419215595346e-07, "loss": 0.3564, "step": 16580 }, { "epoch": 2.746234067207416, "grad_norm": 2.2224695682525635, "learning_rate": 2.1668086016327415e-07, "loss": 0.3176, "step": 16590 }, { "epoch": 2.7478894222810792, "grad_norm": 2.078242063522339, "learning_rate": 2.1388527501111977e-07, "loss": 0.3233, "step": 16600 }, { "epoch": 2.7495447773547426, "grad_norm": 1.993311882019043, "learning_rate": 2.1110744707153574e-07, "loss": 0.3093, "step": 16610 }, { "epoch": 2.751200132428406, "grad_norm": 2.2646372318267822, "learning_rate": 2.0834738665068576e-07, "loss": 0.3471, "step": 16620 }, { "epoch": 2.752855487502069, "grad_norm": 1.9162275791168213, "learning_rate": 2.056051039888135e-07, "loss": 0.2779, "step": 16630 }, { "epoch": 2.7545108425757325, "grad_norm": 2.898524045944214, "learning_rate": 2.0288060926020425e-07, "loss": 0.3635, "step": 16640 }, { "epoch": 2.756166197649396, "grad_norm": 2.449143648147583, "learning_rate": 2.0017391257314723e-07, "loss": 0.3147, "step": 16650 }, { "epoch": 2.757821552723059, "grad_norm": 2.447744131088257, "learning_rate": 1.9748502396989722e-07, "loss": 0.3296, "step": 16660 }, { "epoch": 2.7594769077967225, "grad_norm": 1.8371167182922363, "learning_rate": 1.9481395342664078e-07, "loss": 0.3355, "step": 16670 }, { "epoch": 2.761132262870386, "grad_norm": 1.8588885068893433, "learning_rate": 1.92160710853454e-07, "loss": 0.3565, "step": 16680 }, { "epoch": 2.762787617944049, "grad_norm": 2.5657360553741455, "learning_rate": 1.8952530609427145e-07, "loss": 0.2915, "step": 16690 }, { "epoch": 2.7644429730177125, "grad_norm": 2.608978033065796, "learning_rate": 1.8690774892684395e-07, "loss": 0.3885, "step": 16700 }, { "epoch": 2.766098328091376, "grad_norm": 2.3337411880493164, "learning_rate": 1.8430804906270638e-07, "loss": 0.2748, "step": 16710 }, { "epoch": 2.767753683165039, "grad_norm": 2.865518808364868, "learning_rate": 1.8172621614714004e-07, "loss": 0.369, "step": 16720 }, { "epoch": 2.7694090382387024, "grad_norm": 2.097165822982788, "learning_rate": 1.7916225975913693e-07, "loss": 0.3264, "step": 16730 }, { "epoch": 2.7710643933123658, "grad_norm": 2.0352957248687744, "learning_rate": 1.766161894113655e-07, "loss": 0.2894, "step": 16740 }, { "epoch": 2.7727197483860286, "grad_norm": 1.8923014402389526, "learning_rate": 1.7408801455013224e-07, "loss": 0.2917, "step": 16750 }, { "epoch": 2.774375103459692, "grad_norm": 2.412301540374756, "learning_rate": 1.715777445553507e-07, "loss": 0.3357, "step": 16760 }, { "epoch": 2.7760304585333553, "grad_norm": 2.138453722000122, "learning_rate": 1.6908538874050417e-07, "loss": 0.3632, "step": 16770 }, { "epoch": 2.7776858136070186, "grad_norm": 2.331045627593994, "learning_rate": 1.6661095635261082e-07, "loss": 0.3331, "step": 16780 }, { "epoch": 2.779341168680682, "grad_norm": 1.8366329669952393, "learning_rate": 1.6415445657218975e-07, "loss": 0.2916, "step": 16790 }, { "epoch": 2.7809965237543453, "grad_norm": 1.9621044397354126, "learning_rate": 1.6171589851322999e-07, "loss": 0.3636, "step": 16800 }, { "epoch": 2.7826518788280086, "grad_norm": 2.012941360473633, "learning_rate": 1.59295291223151e-07, "loss": 0.3652, "step": 16810 }, { "epoch": 2.784307233901672, "grad_norm": 2.185295343399048, "learning_rate": 1.568926436827739e-07, "loss": 0.3466, "step": 16820 }, { "epoch": 2.785962588975335, "grad_norm": 2.6037065982818604, "learning_rate": 1.5450796480628637e-07, "loss": 0.3798, "step": 16830 }, { "epoch": 2.7876179440489985, "grad_norm": 2.552006483078003, "learning_rate": 1.5214126344120894e-07, "loss": 0.3012, "step": 16840 }, { "epoch": 2.789273299122662, "grad_norm": 1.1729012727737427, "learning_rate": 1.4979254836836266e-07, "loss": 0.3451, "step": 16850 }, { "epoch": 2.790928654196325, "grad_norm": 2.044630765914917, "learning_rate": 1.4746182830183809e-07, "loss": 0.338, "step": 16860 }, { "epoch": 2.7925840092699885, "grad_norm": 1.5621954202651978, "learning_rate": 1.4514911188895976e-07, "loss": 0.3422, "step": 16870 }, { "epoch": 2.794239364343652, "grad_norm": 2.243272066116333, "learning_rate": 1.4285440771025784e-07, "loss": 0.3366, "step": 16880 }, { "epoch": 2.795894719417315, "grad_norm": 2.275195598602295, "learning_rate": 1.405777242794326e-07, "loss": 0.3844, "step": 16890 }, { "epoch": 2.7975500744909785, "grad_norm": 2.221266746520996, "learning_rate": 1.3831907004332512e-07, "loss": 0.3765, "step": 16900 }, { "epoch": 2.7992054295646414, "grad_norm": 2.322007894515991, "learning_rate": 1.3607845338188595e-07, "loss": 0.3003, "step": 16910 }, { "epoch": 2.8008607846383047, "grad_norm": 1.8471641540527344, "learning_rate": 1.338558826081443e-07, "loss": 0.3199, "step": 16920 }, { "epoch": 2.802516139711968, "grad_norm": 1.6814085245132446, "learning_rate": 1.3165136596817395e-07, "loss": 0.332, "step": 16930 }, { "epoch": 2.8041714947856313, "grad_norm": 2.1636056900024414, "learning_rate": 1.2946491164106677e-07, "loss": 0.3538, "step": 16940 }, { "epoch": 2.8058268498592946, "grad_norm": 2.366356372833252, "learning_rate": 1.27296527738901e-07, "loss": 0.3084, "step": 16950 }, { "epoch": 2.807482204932958, "grad_norm": 1.5104891061782837, "learning_rate": 1.2514622230670958e-07, "loss": 0.3326, "step": 16960 }, { "epoch": 2.8091375600066213, "grad_norm": 2.3185157775878906, "learning_rate": 1.230140033224525e-07, "loss": 0.3529, "step": 16970 }, { "epoch": 2.8107929150802846, "grad_norm": 2.288813352584839, "learning_rate": 1.2089987869698615e-07, "loss": 0.35, "step": 16980 }, { "epoch": 2.812448270153948, "grad_norm": 1.8733060359954834, "learning_rate": 1.1880385627403345e-07, "loss": 0.3267, "step": 16990 }, { "epoch": 2.8141036252276113, "grad_norm": 2.303619146347046, "learning_rate": 1.1672594383015656e-07, "loss": 0.3233, "step": 17000 }, { "epoch": 2.8157589803012746, "grad_norm": 2.3300998210906982, "learning_rate": 1.146661490747264e-07, "loss": 0.3066, "step": 17010 }, { "epoch": 2.817414335374938, "grad_norm": 2.013810634613037, "learning_rate": 1.1262447964989376e-07, "loss": 0.3384, "step": 17020 }, { "epoch": 2.8190696904486012, "grad_norm": 2.131016254425049, "learning_rate": 1.1060094313056268e-07, "loss": 0.3419, "step": 17030 }, { "epoch": 2.8207250455222646, "grad_norm": 2.358128070831299, "learning_rate": 1.0859554702436104e-07, "loss": 0.3166, "step": 17040 }, { "epoch": 2.822380400595928, "grad_norm": 2.0983312129974365, "learning_rate": 1.0660829877161327e-07, "loss": 0.354, "step": 17050 }, { "epoch": 2.824035755669591, "grad_norm": 1.8471043109893799, "learning_rate": 1.0463920574531106e-07, "loss": 0.3207, "step": 17060 }, { "epoch": 2.8256911107432545, "grad_norm": 1.907423973083496, "learning_rate": 1.0268827525108937e-07, "loss": 0.3158, "step": 17070 }, { "epoch": 2.827346465816918, "grad_norm": 1.6720637083053589, "learning_rate": 1.0075551452719657e-07, "loss": 0.3553, "step": 17080 }, { "epoch": 2.829001820890581, "grad_norm": 1.6659080982208252, "learning_rate": 9.884093074446932e-08, "loss": 0.3111, "step": 17090 }, { "epoch": 2.8306571759642445, "grad_norm": 1.5908949375152588, "learning_rate": 9.694453100630275e-08, "loss": 0.3329, "step": 17100 }, { "epoch": 2.832312531037908, "grad_norm": 1.9934496879577637, "learning_rate": 9.506632234862922e-08, "loss": 0.3384, "step": 17110 }, { "epoch": 2.833967886111571, "grad_norm": 1.8065050840377808, "learning_rate": 9.320631173988737e-08, "loss": 0.3315, "step": 17120 }, { "epoch": 2.8356232411852345, "grad_norm": 2.1966464519500732, "learning_rate": 9.136450608099978e-08, "loss": 0.345, "step": 17130 }, { "epoch": 2.837278596258898, "grad_norm": 1.9803756475448608, "learning_rate": 8.954091220534366e-08, "loss": 0.3242, "step": 17140 }, { "epoch": 2.838933951332561, "grad_norm": 2.167667865753174, "learning_rate": 8.773553687873082e-08, "loss": 0.3268, "step": 17150 }, { "epoch": 2.840589306406224, "grad_norm": 2.3544301986694336, "learning_rate": 8.594838679937712e-08, "loss": 0.3454, "step": 17160 }, { "epoch": 2.8422446614798873, "grad_norm": 1.4057775735855103, "learning_rate": 8.41794685978814e-08, "loss": 0.3432, "step": 17170 }, { "epoch": 2.8439000165535506, "grad_norm": 1.7976477146148682, "learning_rate": 8.242878883719829e-08, "loss": 0.3196, "step": 17180 }, { "epoch": 2.845555371627214, "grad_norm": 2.1721675395965576, "learning_rate": 8.0696354012616e-08, "loss": 0.3821, "step": 17190 }, { "epoch": 2.8472107267008773, "grad_norm": 2.5062599182128906, "learning_rate": 7.898217055173075e-08, "loss": 0.3178, "step": 17200 }, { "epoch": 2.8488660817745406, "grad_norm": 1.7274291515350342, "learning_rate": 7.728624481442348e-08, "loss": 0.3445, "step": 17210 }, { "epoch": 2.850521436848204, "grad_norm": 2.225858211517334, "learning_rate": 7.560858309283658e-08, "loss": 0.3166, "step": 17220 }, { "epoch": 2.8521767919218672, "grad_norm": 1.7840882539749146, "learning_rate": 7.394919161134884e-08, "loss": 0.3327, "step": 17230 }, { "epoch": 2.8538321469955306, "grad_norm": 2.3786237239837646, "learning_rate": 7.230807652655603e-08, "loss": 0.3784, "step": 17240 }, { "epoch": 2.855487502069194, "grad_norm": 1.890674352645874, "learning_rate": 7.068524392724319e-08, "loss": 0.3401, "step": 17250 }, { "epoch": 2.857142857142857, "grad_norm": 2.209174156188965, "learning_rate": 6.908069983436683e-08, "loss": 0.3495, "step": 17260 }, { "epoch": 2.8587982122165205, "grad_norm": 1.9331399202346802, "learning_rate": 6.749445020102884e-08, "loss": 0.3155, "step": 17270 }, { "epoch": 2.860453567290184, "grad_norm": 1.9215776920318604, "learning_rate": 6.5926500912456e-08, "loss": 0.3355, "step": 17280 }, { "epoch": 2.862108922363847, "grad_norm": 2.269278049468994, "learning_rate": 6.437685778597824e-08, "loss": 0.3326, "step": 17290 }, { "epoch": 2.8637642774375105, "grad_norm": 2.3621773719787598, "learning_rate": 6.284552657100761e-08, "loss": 0.3111, "step": 17300 }, { "epoch": 2.8654196325111734, "grad_norm": 2.254394769668579, "learning_rate": 6.133251294901443e-08, "loss": 0.3558, "step": 17310 }, { "epoch": 2.8670749875848367, "grad_norm": 2.3596551418304443, "learning_rate": 5.983782253350944e-08, "loss": 0.3323, "step": 17320 }, { "epoch": 2.8687303426585, "grad_norm": 2.3255999088287354, "learning_rate": 5.836146087002226e-08, "loss": 0.3472, "step": 17330 }, { "epoch": 2.8703856977321633, "grad_norm": 2.21858549118042, "learning_rate": 5.690343343607796e-08, "loss": 0.3458, "step": 17340 }, { "epoch": 2.8720410528058267, "grad_norm": 2.3857204914093018, "learning_rate": 5.54637456411794e-08, "loss": 0.3269, "step": 17350 }, { "epoch": 2.87369640787949, "grad_norm": 2.049962282180786, "learning_rate": 5.4042402826787746e-08, "loss": 0.3314, "step": 17360 }, { "epoch": 2.8753517629531533, "grad_norm": 2.2532308101654053, "learning_rate": 5.2639410266299705e-08, "loss": 0.3339, "step": 17370 }, { "epoch": 2.8770071180268166, "grad_norm": 1.2130613327026367, "learning_rate": 5.1254773165032023e-08, "loss": 0.3056, "step": 17380 }, { "epoch": 2.87866247310048, "grad_norm": 2.2649145126342773, "learning_rate": 4.988849666019757e-08, "loss": 0.3218, "step": 17390 }, { "epoch": 2.8803178281741433, "grad_norm": 1.4370695352554321, "learning_rate": 4.854058582089038e-08, "loss": 0.3054, "step": 17400 }, { "epoch": 2.8819731832478066, "grad_norm": 2.0458033084869385, "learning_rate": 4.7211045648064004e-08, "loss": 0.3226, "step": 17410 }, { "epoch": 2.88362853832147, "grad_norm": 1.8102025985717773, "learning_rate": 4.589988107451482e-08, "loss": 0.3504, "step": 17420 }, { "epoch": 2.8852838933951332, "grad_norm": 1.952150583267212, "learning_rate": 4.4607096964862094e-08, "loss": 0.3291, "step": 17430 }, { "epoch": 2.8869392484687966, "grad_norm": 1.774665355682373, "learning_rate": 4.333269811553187e-08, "loss": 0.3339, "step": 17440 }, { "epoch": 2.88859460354246, "grad_norm": 2.385225772857666, "learning_rate": 4.20766892547364e-08, "loss": 0.3291, "step": 17450 }, { "epoch": 2.890249958616123, "grad_norm": 2.228212356567383, "learning_rate": 4.0839075042460875e-08, "loss": 0.3173, "step": 17460 }, { "epoch": 2.8919053136897865, "grad_norm": 1.6550447940826416, "learning_rate": 3.9619860070440056e-08, "loss": 0.334, "step": 17470 }, { "epoch": 2.89356066876345, "grad_norm": 2.184173822402954, "learning_rate": 3.841904886214831e-08, "loss": 0.3265, "step": 17480 }, { "epoch": 2.895216023837113, "grad_norm": 2.3634161949157715, "learning_rate": 3.723664587277631e-08, "loss": 0.3615, "step": 17490 }, { "epoch": 2.8968713789107765, "grad_norm": 2.1004719734191895, "learning_rate": 3.60726554892199e-08, "loss": 0.3142, "step": 17500 }, { "epoch": 2.89852673398444, "grad_norm": 1.8737444877624512, "learning_rate": 3.492708203006012e-08, "loss": 0.3214, "step": 17510 }, { "epoch": 2.900182089058103, "grad_norm": 1.9953153133392334, "learning_rate": 3.3799929745547685e-08, "loss": 0.3365, "step": 17520 }, { "epoch": 2.9018374441317665, "grad_norm": 1.4854812622070312, "learning_rate": 3.2691202817589086e-08, "loss": 0.3054, "step": 17530 }, { "epoch": 2.90349279920543, "grad_norm": 2.8899099826812744, "learning_rate": 3.160090535972993e-08, "loss": 0.3153, "step": 17540 }, { "epoch": 2.905148154279093, "grad_norm": 2.137659788131714, "learning_rate": 3.052904141713886e-08, "loss": 0.3037, "step": 17550 }, { "epoch": 2.9068035093527564, "grad_norm": 2.0721936225891113, "learning_rate": 2.9475614966594233e-08, "loss": 0.3211, "step": 17560 }, { "epoch": 2.9084588644264193, "grad_norm": 2.115863561630249, "learning_rate": 2.844062991646801e-08, "loss": 0.2805, "step": 17570 }, { "epoch": 2.9101142195000826, "grad_norm": 1.711255431175232, "learning_rate": 2.742409010671243e-08, "loss": 0.3288, "step": 17580 }, { "epoch": 2.911769574573746, "grad_norm": 2.0368499755859375, "learning_rate": 2.642599930884393e-08, "loss": 0.3121, "step": 17590 }, { "epoch": 2.9134249296474093, "grad_norm": 2.2370572090148926, "learning_rate": 2.544636122593147e-08, "loss": 0.3161, "step": 17600 }, { "epoch": 2.9150802847210726, "grad_norm": 2.0604302883148193, "learning_rate": 2.4485179492581e-08, "loss": 0.3285, "step": 17610 }, { "epoch": 2.916735639794736, "grad_norm": 2.018296003341675, "learning_rate": 2.354245767492269e-08, "loss": 0.3635, "step": 17620 }, { "epoch": 2.9183909948683993, "grad_norm": 2.0412917137145996, "learning_rate": 2.2618199270597607e-08, "loss": 0.3264, "step": 17630 }, { "epoch": 2.9200463499420626, "grad_norm": 1.991478681564331, "learning_rate": 2.1712407708744386e-08, "loss": 0.3597, "step": 17640 }, { "epoch": 2.921701705015726, "grad_norm": 1.9765408039093018, "learning_rate": 2.0825086349988145e-08, "loss": 0.3418, "step": 17650 }, { "epoch": 2.9233570600893892, "grad_norm": 2.1275086402893066, "learning_rate": 1.995623848642547e-08, "loss": 0.3345, "step": 17660 }, { "epoch": 2.9250124151630525, "grad_norm": 1.7225489616394043, "learning_rate": 1.9105867341613903e-08, "loss": 0.3135, "step": 17670 }, { "epoch": 2.926667770236716, "grad_norm": 1.9626611471176147, "learning_rate": 1.8273976070559695e-08, "loss": 0.3614, "step": 17680 }, { "epoch": 2.928323125310379, "grad_norm": 2.476229667663574, "learning_rate": 1.7460567759705615e-08, "loss": 0.3594, "step": 17690 }, { "epoch": 2.9299784803840425, "grad_norm": 1.9182066917419434, "learning_rate": 1.6665645426920396e-08, "loss": 0.3197, "step": 17700 }, { "epoch": 2.931633835457706, "grad_norm": 2.101400852203369, "learning_rate": 1.5889212021485966e-08, "loss": 0.3522, "step": 17710 }, { "epoch": 2.9332891905313687, "grad_norm": 1.9222100973129272, "learning_rate": 1.5131270424088573e-08, "loss": 0.3461, "step": 17720 }, { "epoch": 2.934944545605032, "grad_norm": 2.66011905670166, "learning_rate": 1.4391823446807117e-08, "loss": 0.3557, "step": 17730 }, { "epoch": 2.9365999006786954, "grad_norm": 2.2789878845214844, "learning_rate": 1.3670873833101505e-08, "loss": 0.3229, "step": 17740 }, { "epoch": 2.9382552557523587, "grad_norm": 2.3933591842651367, "learning_rate": 1.2968424257804313e-08, "loss": 0.3315, "step": 17750 }, { "epoch": 2.939910610826022, "grad_norm": 2.3596224784851074, "learning_rate": 1.22844773271108e-08, "loss": 0.3439, "step": 17760 }, { "epoch": 2.9415659658996853, "grad_norm": 2.155430316925049, "learning_rate": 1.161903557856725e-08, "loss": 0.3253, "step": 17770 }, { "epoch": 2.9432213209733487, "grad_norm": 2.187922954559326, "learning_rate": 1.0972101481063758e-08, "loss": 0.3758, "step": 17780 }, { "epoch": 2.944876676047012, "grad_norm": 2.4473180770874023, "learning_rate": 1.0343677434824783e-08, "loss": 0.3408, "step": 17790 }, { "epoch": 2.9465320311206753, "grad_norm": 2.050906181335449, "learning_rate": 9.733765771398063e-09, "loss": 0.3365, "step": 17800 }, { "epoch": 2.9481873861943386, "grad_norm": 2.1611318588256836, "learning_rate": 9.142368753649045e-09, "loss": 0.3467, "step": 17810 }, { "epoch": 2.949842741268002, "grad_norm": 2.38580322265625, "learning_rate": 8.56948857575035e-09, "loss": 0.314, "step": 17820 }, { "epoch": 2.9514980963416653, "grad_norm": 1.6547008752822876, "learning_rate": 8.015127363174558e-09, "loss": 0.3313, "step": 17830 }, { "epoch": 2.9531534514153286, "grad_norm": 2.329861640930176, "learning_rate": 7.479287172685867e-09, "loss": 0.3271, "step": 17840 }, { "epoch": 2.954808806488992, "grad_norm": 1.9690601825714111, "learning_rate": 6.96196999233234e-09, "loss": 0.3351, "step": 17850 }, { "epoch": 2.9564641615626552, "grad_norm": 2.030273199081421, "learning_rate": 6.463177741440341e-09, "loss": 0.3587, "step": 17860 }, { "epoch": 2.9581195166363186, "grad_norm": 2.2005035877227783, "learning_rate": 5.982912270603991e-09, "loss": 0.3264, "step": 17870 }, { "epoch": 2.959774871709982, "grad_norm": 2.0540130138397217, "learning_rate": 5.521175361681286e-09, "loss": 0.3387, "step": 17880 }, { "epoch": 2.961430226783645, "grad_norm": 1.6892403364181519, "learning_rate": 5.07796872778632e-09, "loss": 0.2986, "step": 17890 }, { "epoch": 2.9630855818573085, "grad_norm": 1.895839810371399, "learning_rate": 4.653294013283183e-09, "loss": 0.3291, "step": 17900 }, { "epoch": 2.964740936930972, "grad_norm": 1.6137304306030273, "learning_rate": 4.247152793779296e-09, "loss": 0.3265, "step": 17910 }, { "epoch": 2.966396292004635, "grad_norm": 1.9061734676361084, "learning_rate": 3.859546576120421e-09, "loss": 0.334, "step": 17920 }, { "epoch": 2.9680516470782985, "grad_norm": 2.028872013092041, "learning_rate": 3.4904767983851006e-09, "loss": 0.3213, "step": 17930 }, { "epoch": 2.969707002151962, "grad_norm": 1.7429839372634888, "learning_rate": 3.1399448298774503e-09, "loss": 0.3341, "step": 17940 }, { "epoch": 2.971362357225625, "grad_norm": 2.5334107875823975, "learning_rate": 2.8079519711243786e-09, "loss": 0.3251, "step": 17950 }, { "epoch": 2.9730177122992885, "grad_norm": 2.0650694370269775, "learning_rate": 2.4944994538700363e-09, "loss": 0.2856, "step": 17960 }, { "epoch": 2.9746730673729513, "grad_norm": 1.9629024267196655, "learning_rate": 2.1995884410708212e-09, "loss": 0.2924, "step": 17970 }, { "epoch": 2.9763284224466147, "grad_norm": 1.9057166576385498, "learning_rate": 1.9232200268914923e-09, "loss": 0.3179, "step": 17980 }, { "epoch": 2.977983777520278, "grad_norm": 2.323155164718628, "learning_rate": 1.6653952367007287e-09, "loss": 0.3351, "step": 17990 }, { "epoch": 2.9796391325939413, "grad_norm": 1.7210339307785034, "learning_rate": 1.426115027067243e-09, "loss": 0.3269, "step": 18000 }, { "epoch": 2.9812944876676046, "grad_norm": 1.83793044090271, "learning_rate": 1.2053802857581176e-09, "loss": 0.347, "step": 18010 }, { "epoch": 2.982949842741268, "grad_norm": 1.847522258758545, "learning_rate": 1.003191831731587e-09, "loss": 0.2979, "step": 18020 }, { "epoch": 2.9846051978149313, "grad_norm": 1.777446985244751, "learning_rate": 8.195504151381484e-10, "loss": 0.3473, "step": 18030 }, { "epoch": 2.9862605528885946, "grad_norm": 2.9953622817993164, "learning_rate": 6.544567173150107e-10, "loss": 0.3001, "step": 18040 }, { "epoch": 2.987915907962258, "grad_norm": 1.8776600360870361, "learning_rate": 5.079113507849842e-10, "loss": 0.357, "step": 18050 }, { "epoch": 2.9895712630359212, "grad_norm": 2.086153268814087, "learning_rate": 3.7991485925370496e-10, "loss": 0.3004, "step": 18060 }, { "epoch": 2.9912266181095846, "grad_norm": 2.1379270553588867, "learning_rate": 2.704677176079695e-10, "loss": 0.3721, "step": 18070 }, { "epoch": 2.992881973183248, "grad_norm": 2.7157349586486816, "learning_rate": 1.7957033191240425e-10, "loss": 0.3555, "step": 18080 }, { "epoch": 2.994537328256911, "grad_norm": 2.5094964504241943, "learning_rate": 1.0722303941113065e-10, "loss": 0.3435, "step": 18090 }, { "epoch": 2.9961926833305745, "grad_norm": 2.10398268699646, "learning_rate": 5.3426108522769415e-11, "loss": 0.3287, "step": 18100 }, { "epoch": 2.997848038404238, "grad_norm": 3.0506503582000732, "learning_rate": 1.8179738842660777e-11, "loss": 0.3678, "step": 18110 }, { "epoch": 2.9995033934779007, "grad_norm": 1.7245928049087524, "learning_rate": 1.484061139533921e-12, "loss": 0.3259, "step": 18120 }, { "epoch": 3.0, "step": 18123, "total_flos": 1.2130803771160658e+18, "train_loss": 0.45045166753521493, "train_runtime": 7695.2204, "train_samples_per_second": 37.681, "train_steps_per_second": 2.355 } ], "logging_steps": 10, "max_steps": 18123, "num_input_tokens_seen": 0, "num_train_epochs": 3, "save_steps": 10000, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 1.2130803771160658e+18, "train_batch_size": 1, "trial_name": null, "trial_params": null }