{ "best_metric": null, "best_model_checkpoint": null, "epoch": 0.9998504561088679, "eval_steps": 500, "global_step": 3343, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0002990877822640945, "grad_norm": 6.467864513397217, "learning_rate": 9.900990099009901e-08, "loss": 1.1757, "step": 1 }, { "epoch": 0.000598175564528189, "grad_norm": 6.2132720947265625, "learning_rate": 1.9801980198019803e-07, "loss": 1.2475, "step": 2 }, { "epoch": 0.0008972633467922835, "grad_norm": 5.559680461883545, "learning_rate": 2.9702970297029703e-07, "loss": 1.1239, "step": 3 }, { "epoch": 0.001196351129056378, "grad_norm": 6.208007335662842, "learning_rate": 3.9603960396039606e-07, "loss": 1.2593, "step": 4 }, { "epoch": 0.0014954389113204726, "grad_norm": 4.9004106521606445, "learning_rate": 4.950495049504951e-07, "loss": 1.1393, "step": 5 }, { "epoch": 0.001794526693584567, "grad_norm": 6.61062479019165, "learning_rate": 5.940594059405941e-07, "loss": 1.2601, "step": 6 }, { "epoch": 0.0020936144758486614, "grad_norm": 8.22998046875, "learning_rate": 6.930693069306931e-07, "loss": 1.1671, "step": 7 }, { "epoch": 0.002392702258112756, "grad_norm": 4.591616630554199, "learning_rate": 7.920792079207921e-07, "loss": 1.0995, "step": 8 }, { "epoch": 0.0026917900403768506, "grad_norm": 5.136438369750977, "learning_rate": 8.910891089108911e-07, "loss": 1.274, "step": 9 }, { "epoch": 0.0029908778226409452, "grad_norm": 5.057870864868164, "learning_rate": 9.900990099009902e-07, "loss": 1.2141, "step": 10 }, { "epoch": 0.0032899656049050394, "grad_norm": 6.0782999992370605, "learning_rate": 1.0891089108910893e-06, "loss": 1.2263, "step": 11 }, { "epoch": 0.003589053387169134, "grad_norm": 5.602115631103516, "learning_rate": 1.1881188118811881e-06, "loss": 1.1539, "step": 12 }, { "epoch": 0.0038881411694332286, "grad_norm": 4.9765400886535645, "learning_rate": 1.2871287128712872e-06, "loss": 1.1779, "step": 13 }, { "epoch": 0.004187228951697323, "grad_norm": 4.2855658531188965, "learning_rate": 1.3861386138613863e-06, "loss": 1.1828, "step": 14 }, { "epoch": 0.004486316733961417, "grad_norm": 4.374508380889893, "learning_rate": 1.4851485148514852e-06, "loss": 1.0944, "step": 15 }, { "epoch": 0.004785404516225512, "grad_norm": 4.000790596008301, "learning_rate": 1.5841584158415842e-06, "loss": 1.2483, "step": 16 }, { "epoch": 0.005084492298489607, "grad_norm": 3.4630966186523438, "learning_rate": 1.6831683168316833e-06, "loss": 1.0829, "step": 17 }, { "epoch": 0.005383580080753701, "grad_norm": 3.375501871109009, "learning_rate": 1.7821782178217822e-06, "loss": 1.0541, "step": 18 }, { "epoch": 0.005682667863017796, "grad_norm": 3.564779043197632, "learning_rate": 1.8811881188118813e-06, "loss": 1.1284, "step": 19 }, { "epoch": 0.0059817556452818905, "grad_norm": 3.098766565322876, "learning_rate": 1.9801980198019803e-06, "loss": 1.0915, "step": 20 }, { "epoch": 0.006280843427545985, "grad_norm": 3.012566328048706, "learning_rate": 2.0792079207920794e-06, "loss": 1.1276, "step": 21 }, { "epoch": 0.006579931209810079, "grad_norm": 2.7917869091033936, "learning_rate": 2.1782178217821785e-06, "loss": 1.1107, "step": 22 }, { "epoch": 0.0068790189920741734, "grad_norm": 2.2937748432159424, "learning_rate": 2.2772277227722776e-06, "loss": 1.0941, "step": 23 }, { "epoch": 0.007178106774338268, "grad_norm": 2.7692527770996094, "learning_rate": 2.3762376237623762e-06, "loss": 1.0132, "step": 24 }, { "epoch": 0.007477194556602363, "grad_norm": 2.50726056098938, "learning_rate": 2.4752475247524753e-06, "loss": 1.0982, "step": 25 }, { "epoch": 0.007776282338866457, "grad_norm": 2.7794408798217773, "learning_rate": 2.5742574257425744e-06, "loss": 1.1446, "step": 26 }, { "epoch": 0.008075370121130552, "grad_norm": 2.481109142303467, "learning_rate": 2.6732673267326735e-06, "loss": 0.9962, "step": 27 }, { "epoch": 0.008374457903394646, "grad_norm": 4.485240459442139, "learning_rate": 2.7722772277227726e-06, "loss": 1.0613, "step": 28 }, { "epoch": 0.008673545685658741, "grad_norm": 2.377653121948242, "learning_rate": 2.8712871287128712e-06, "loss": 1.1522, "step": 29 }, { "epoch": 0.008972633467922835, "grad_norm": 2.2561264038085938, "learning_rate": 2.9702970297029703e-06, "loss": 1.0342, "step": 30 }, { "epoch": 0.00927172125018693, "grad_norm": 2.160797595977783, "learning_rate": 3.0693069306930694e-06, "loss": 1.0484, "step": 31 }, { "epoch": 0.009570809032451024, "grad_norm": 2.245884895324707, "learning_rate": 3.1683168316831685e-06, "loss": 1.041, "step": 32 }, { "epoch": 0.00986989681471512, "grad_norm": 2.3002660274505615, "learning_rate": 3.2673267326732676e-06, "loss": 0.9991, "step": 33 }, { "epoch": 0.010168984596979213, "grad_norm": 2.223809242248535, "learning_rate": 3.3663366336633666e-06, "loss": 1.07, "step": 34 }, { "epoch": 0.010468072379243309, "grad_norm": 2.4116625785827637, "learning_rate": 3.4653465346534653e-06, "loss": 1.0397, "step": 35 }, { "epoch": 0.010767160161507403, "grad_norm": 2.0363879203796387, "learning_rate": 3.5643564356435644e-06, "loss": 1.0518, "step": 36 }, { "epoch": 0.011066247943771496, "grad_norm": 2.161952018737793, "learning_rate": 3.6633663366336635e-06, "loss": 1.0384, "step": 37 }, { "epoch": 0.011365335726035592, "grad_norm": 2.2431228160858154, "learning_rate": 3.7623762376237625e-06, "loss": 1.0483, "step": 38 }, { "epoch": 0.011664423508299685, "grad_norm": 2.2927422523498535, "learning_rate": 3.861386138613862e-06, "loss": 1.0237, "step": 39 }, { "epoch": 0.011963511290563781, "grad_norm": 2.3837950229644775, "learning_rate": 3.960396039603961e-06, "loss": 1.0386, "step": 40 }, { "epoch": 0.012262599072827875, "grad_norm": 2.033806085586548, "learning_rate": 4.05940594059406e-06, "loss": 0.9992, "step": 41 }, { "epoch": 0.01256168685509197, "grad_norm": 1.9823940992355347, "learning_rate": 4.158415841584159e-06, "loss": 1.0066, "step": 42 }, { "epoch": 0.012860774637356064, "grad_norm": 2.1727283000946045, "learning_rate": 4.2574257425742575e-06, "loss": 0.9262, "step": 43 }, { "epoch": 0.013159862419620158, "grad_norm": 2.156259775161743, "learning_rate": 4.356435643564357e-06, "loss": 0.9315, "step": 44 }, { "epoch": 0.013458950201884253, "grad_norm": 2.034162759780884, "learning_rate": 4.455445544554456e-06, "loss": 0.9705, "step": 45 }, { "epoch": 0.013758037984148347, "grad_norm": 2.1472127437591553, "learning_rate": 4.554455445544555e-06, "loss": 0.9354, "step": 46 }, { "epoch": 0.014057125766412442, "grad_norm": 2.2290048599243164, "learning_rate": 4.653465346534654e-06, "loss": 1.0146, "step": 47 }, { "epoch": 0.014356213548676536, "grad_norm": 2.0073301792144775, "learning_rate": 4.7524752475247525e-06, "loss": 1.0129, "step": 48 }, { "epoch": 0.014655301330940632, "grad_norm": 2.1298155784606934, "learning_rate": 4.851485148514852e-06, "loss": 1.0159, "step": 49 }, { "epoch": 0.014954389113204725, "grad_norm": 2.1482582092285156, "learning_rate": 4.950495049504951e-06, "loss": 0.9712, "step": 50 }, { "epoch": 0.01525347689546882, "grad_norm": 2.3218939304351807, "learning_rate": 5.04950495049505e-06, "loss": 1.0167, "step": 51 }, { "epoch": 0.015552564677732915, "grad_norm": 2.118840217590332, "learning_rate": 5.148514851485149e-06, "loss": 1.0144, "step": 52 }, { "epoch": 0.01585165245999701, "grad_norm": 2.4467275142669678, "learning_rate": 5.247524752475248e-06, "loss": 0.9837, "step": 53 }, { "epoch": 0.016150740242261104, "grad_norm": 2.127401113510132, "learning_rate": 5.346534653465347e-06, "loss": 0.9954, "step": 54 }, { "epoch": 0.016449828024525198, "grad_norm": 2.3728017807006836, "learning_rate": 5.4455445544554465e-06, "loss": 0.9944, "step": 55 }, { "epoch": 0.01674891580678929, "grad_norm": 2.113921880722046, "learning_rate": 5.544554455445545e-06, "loss": 0.9222, "step": 56 }, { "epoch": 0.01704800358905339, "grad_norm": 2.1111133098602295, "learning_rate": 5.643564356435644e-06, "loss": 0.9973, "step": 57 }, { "epoch": 0.017347091371317482, "grad_norm": 2.293419599533081, "learning_rate": 5.7425742574257425e-06, "loss": 1.0198, "step": 58 }, { "epoch": 0.017646179153581576, "grad_norm": 2.0116126537323, "learning_rate": 5.841584158415842e-06, "loss": 0.9832, "step": 59 }, { "epoch": 0.01794526693584567, "grad_norm": 2.204284906387329, "learning_rate": 5.940594059405941e-06, "loss": 0.9773, "step": 60 }, { "epoch": 0.018244354718109767, "grad_norm": 2.2167224884033203, "learning_rate": 6.03960396039604e-06, "loss": 0.9966, "step": 61 }, { "epoch": 0.01854344250037386, "grad_norm": 2.308530807495117, "learning_rate": 6.138613861386139e-06, "loss": 1.0181, "step": 62 }, { "epoch": 0.018842530282637954, "grad_norm": 2.094149351119995, "learning_rate": 6.237623762376238e-06, "loss": 0.9527, "step": 63 }, { "epoch": 0.019141618064902048, "grad_norm": 2.5540058612823486, "learning_rate": 6.336633663366337e-06, "loss": 0.9869, "step": 64 }, { "epoch": 0.019440705847166142, "grad_norm": 2.045499086380005, "learning_rate": 6.4356435643564364e-06, "loss": 0.9832, "step": 65 }, { "epoch": 0.01973979362943024, "grad_norm": 2.194822311401367, "learning_rate": 6.534653465346535e-06, "loss": 0.9919, "step": 66 }, { "epoch": 0.020038881411694333, "grad_norm": 2.1588592529296875, "learning_rate": 6.633663366336635e-06, "loss": 0.9578, "step": 67 }, { "epoch": 0.020337969193958427, "grad_norm": 2.170806407928467, "learning_rate": 6.732673267326733e-06, "loss": 0.9858, "step": 68 }, { "epoch": 0.02063705697622252, "grad_norm": 2.1518619060516357, "learning_rate": 6.831683168316833e-06, "loss": 0.9942, "step": 69 }, { "epoch": 0.020936144758486618, "grad_norm": 2.9534049034118652, "learning_rate": 6.930693069306931e-06, "loss": 0.9955, "step": 70 }, { "epoch": 0.02123523254075071, "grad_norm": 2.2518513202667236, "learning_rate": 7.02970297029703e-06, "loss": 0.9725, "step": 71 }, { "epoch": 0.021534320323014805, "grad_norm": 1.8818563222885132, "learning_rate": 7.128712871287129e-06, "loss": 0.9243, "step": 72 }, { "epoch": 0.0218334081052789, "grad_norm": 2.2362442016601562, "learning_rate": 7.227722772277228e-06, "loss": 0.9517, "step": 73 }, { "epoch": 0.022132495887542993, "grad_norm": 2.5143346786499023, "learning_rate": 7.326732673267327e-06, "loss": 0.9792, "step": 74 }, { "epoch": 0.02243158366980709, "grad_norm": 2.1341629028320312, "learning_rate": 7.425742574257426e-06, "loss": 0.9537, "step": 75 }, { "epoch": 0.022730671452071183, "grad_norm": 2.255032539367676, "learning_rate": 7.524752475247525e-06, "loss": 0.9367, "step": 76 }, { "epoch": 0.023029759234335277, "grad_norm": 2.3630597591400146, "learning_rate": 7.6237623762376246e-06, "loss": 0.9612, "step": 77 }, { "epoch": 0.02332884701659937, "grad_norm": 2.199533462524414, "learning_rate": 7.722772277227724e-06, "loss": 0.9368, "step": 78 }, { "epoch": 0.023627934798863465, "grad_norm": 2.173064708709717, "learning_rate": 7.821782178217822e-06, "loss": 0.9361, "step": 79 }, { "epoch": 0.023927022581127562, "grad_norm": 2.2709598541259766, "learning_rate": 7.920792079207921e-06, "loss": 0.9695, "step": 80 }, { "epoch": 0.024226110363391656, "grad_norm": 2.1295037269592285, "learning_rate": 8.019801980198021e-06, "loss": 0.9109, "step": 81 }, { "epoch": 0.02452519814565575, "grad_norm": 2.154649019241333, "learning_rate": 8.11881188118812e-06, "loss": 0.9876, "step": 82 }, { "epoch": 0.024824285927919843, "grad_norm": 2.220043897628784, "learning_rate": 8.217821782178218e-06, "loss": 0.8884, "step": 83 }, { "epoch": 0.02512337371018394, "grad_norm": 2.151214361190796, "learning_rate": 8.316831683168318e-06, "loss": 1.0024, "step": 84 }, { "epoch": 0.025422461492448034, "grad_norm": 2.136652946472168, "learning_rate": 8.415841584158416e-06, "loss": 0.9359, "step": 85 }, { "epoch": 0.025721549274712128, "grad_norm": 2.1071877479553223, "learning_rate": 8.514851485148515e-06, "loss": 0.9684, "step": 86 }, { "epoch": 0.02602063705697622, "grad_norm": 2.642867088317871, "learning_rate": 8.613861386138615e-06, "loss": 1.037, "step": 87 }, { "epoch": 0.026319724839240315, "grad_norm": 2.0572142601013184, "learning_rate": 8.712871287128714e-06, "loss": 0.9033, "step": 88 }, { "epoch": 0.026618812621504413, "grad_norm": 1.995407223701477, "learning_rate": 8.811881188118812e-06, "loss": 0.9593, "step": 89 }, { "epoch": 0.026917900403768506, "grad_norm": 2.259364604949951, "learning_rate": 8.910891089108911e-06, "loss": 0.9264, "step": 90 }, { "epoch": 0.0272169881860326, "grad_norm": 2.2154786586761475, "learning_rate": 9.009900990099011e-06, "loss": 0.9587, "step": 91 }, { "epoch": 0.027516075968296694, "grad_norm": 2.2588000297546387, "learning_rate": 9.10891089108911e-06, "loss": 0.9325, "step": 92 }, { "epoch": 0.02781516375056079, "grad_norm": 2.2294507026672363, "learning_rate": 9.20792079207921e-06, "loss": 0.9165, "step": 93 }, { "epoch": 0.028114251532824885, "grad_norm": 2.1310436725616455, "learning_rate": 9.306930693069308e-06, "loss": 0.9703, "step": 94 }, { "epoch": 0.02841333931508898, "grad_norm": 2.1413207054138184, "learning_rate": 9.405940594059405e-06, "loss": 1.0051, "step": 95 }, { "epoch": 0.028712427097353072, "grad_norm": 2.009608030319214, "learning_rate": 9.504950495049505e-06, "loss": 0.9454, "step": 96 }, { "epoch": 0.029011514879617166, "grad_norm": 2.1970038414001465, "learning_rate": 9.603960396039604e-06, "loss": 0.9412, "step": 97 }, { "epoch": 0.029310602661881263, "grad_norm": 2.268655776977539, "learning_rate": 9.702970297029704e-06, "loss": 0.9707, "step": 98 }, { "epoch": 0.029609690444145357, "grad_norm": 2.247079610824585, "learning_rate": 9.801980198019802e-06, "loss": 0.9193, "step": 99 }, { "epoch": 0.02990877822640945, "grad_norm": 2.199341297149658, "learning_rate": 9.900990099009901e-06, "loss": 0.9595, "step": 100 }, { "epoch": 0.030207866008673544, "grad_norm": 2.0435914993286133, "learning_rate": 1e-05, "loss": 0.9012, "step": 101 }, { "epoch": 0.03050695379093764, "grad_norm": 2.5134024620056152, "learning_rate": 9.999997652456228e-06, "loss": 0.9641, "step": 102 }, { "epoch": 0.030806041573201735, "grad_norm": 2.0889689922332764, "learning_rate": 9.999990609827113e-06, "loss": 0.9066, "step": 103 }, { "epoch": 0.03110512935546583, "grad_norm": 2.6673483848571777, "learning_rate": 9.999978872119267e-06, "loss": 0.9272, "step": 104 }, { "epoch": 0.03140421713772992, "grad_norm": 3.133084297180176, "learning_rate": 9.999962439343715e-06, "loss": 1.0145, "step": 105 }, { "epoch": 0.03170330491999402, "grad_norm": 2.107012987136841, "learning_rate": 9.999941311515888e-06, "loss": 0.9509, "step": 106 }, { "epoch": 0.03200239270225811, "grad_norm": 2.1817209720611572, "learning_rate": 9.999915488655623e-06, "loss": 0.9456, "step": 107 }, { "epoch": 0.03230148048452221, "grad_norm": 2.136371374130249, "learning_rate": 9.999884970787168e-06, "loss": 0.954, "step": 108 }, { "epoch": 0.032600568266786305, "grad_norm": 2.0324740409851074, "learning_rate": 9.999849757939182e-06, "loss": 0.9202, "step": 109 }, { "epoch": 0.032899656049050395, "grad_norm": 2.73146915435791, "learning_rate": 9.99980985014473e-06, "loss": 0.9044, "step": 110 }, { "epoch": 0.03319874383131449, "grad_norm": 2.0892693996429443, "learning_rate": 9.999765247441285e-06, "loss": 0.9727, "step": 111 }, { "epoch": 0.03349783161357858, "grad_norm": 2.1814208030700684, "learning_rate": 9.999715949870729e-06, "loss": 0.9648, "step": 112 }, { "epoch": 0.03379691939584268, "grad_norm": 2.3022897243499756, "learning_rate": 9.999661957479354e-06, "loss": 0.9193, "step": 113 }, { "epoch": 0.03409600717810678, "grad_norm": 2.2460272312164307, "learning_rate": 9.999603270317863e-06, "loss": 0.9232, "step": 114 }, { "epoch": 0.03439509496037087, "grad_norm": 2.1780221462249756, "learning_rate": 9.99953988844136e-06, "loss": 0.968, "step": 115 }, { "epoch": 0.034694182742634964, "grad_norm": 2.0386435985565186, "learning_rate": 9.999471811909363e-06, "loss": 0.9059, "step": 116 }, { "epoch": 0.034993270524899055, "grad_norm": 1.967865228652954, "learning_rate": 9.999399040785797e-06, "loss": 0.9539, "step": 117 }, { "epoch": 0.03529235830716315, "grad_norm": 2.624521255493164, "learning_rate": 9.999321575138997e-06, "loss": 1.0018, "step": 118 }, { "epoch": 0.03559144608942725, "grad_norm": 2.0185389518737793, "learning_rate": 9.999239415041701e-06, "loss": 0.933, "step": 119 }, { "epoch": 0.03589053387169134, "grad_norm": 2.4032232761383057, "learning_rate": 9.999152560571064e-06, "loss": 0.915, "step": 120 }, { "epoch": 0.03618962165395544, "grad_norm": 2.118293046951294, "learning_rate": 9.99906101180864e-06, "loss": 0.9672, "step": 121 }, { "epoch": 0.036488709436219534, "grad_norm": 2.1929712295532227, "learning_rate": 9.998964768840393e-06, "loss": 0.9426, "step": 122 }, { "epoch": 0.036787797218483624, "grad_norm": 2.1195485591888428, "learning_rate": 9.998863831756702e-06, "loss": 0.915, "step": 123 }, { "epoch": 0.03708688500074772, "grad_norm": 2.4792752265930176, "learning_rate": 9.998758200652346e-06, "loss": 0.9786, "step": 124 }, { "epoch": 0.03738597278301181, "grad_norm": 2.173375129699707, "learning_rate": 9.998647875626514e-06, "loss": 0.959, "step": 125 }, { "epoch": 0.03768506056527591, "grad_norm": 2.2410991191864014, "learning_rate": 9.998532856782805e-06, "loss": 0.9189, "step": 126 }, { "epoch": 0.037984148347540006, "grad_norm": 2.5221569538116455, "learning_rate": 9.998413144229224e-06, "loss": 1.0464, "step": 127 }, { "epoch": 0.038283236129804096, "grad_norm": 2.026813507080078, "learning_rate": 9.998288738078179e-06, "loss": 0.9348, "step": 128 }, { "epoch": 0.038582323912068194, "grad_norm": 2.2175710201263428, "learning_rate": 9.998159638446495e-06, "loss": 0.9511, "step": 129 }, { "epoch": 0.038881411694332284, "grad_norm": 2.193385362625122, "learning_rate": 9.998025845455394e-06, "loss": 0.9931, "step": 130 }, { "epoch": 0.03918049947659638, "grad_norm": 2.260263442993164, "learning_rate": 9.99788735923051e-06, "loss": 0.9383, "step": 131 }, { "epoch": 0.03947958725886048, "grad_norm": 2.046661376953125, "learning_rate": 9.997744179901891e-06, "loss": 0.8964, "step": 132 }, { "epoch": 0.03977867504112457, "grad_norm": 2.138976573944092, "learning_rate": 9.997596307603979e-06, "loss": 0.9506, "step": 133 }, { "epoch": 0.040077762823388666, "grad_norm": 2.048691749572754, "learning_rate": 9.997443742475628e-06, "loss": 0.9186, "step": 134 }, { "epoch": 0.040376850605652756, "grad_norm": 3.6780455112457275, "learning_rate": 9.997286484660101e-06, "loss": 0.9791, "step": 135 }, { "epoch": 0.04067593838791685, "grad_norm": 2.2234129905700684, "learning_rate": 9.997124534305065e-06, "loss": 0.9321, "step": 136 }, { "epoch": 0.04097502617018095, "grad_norm": 2.337744951248169, "learning_rate": 9.996957891562598e-06, "loss": 0.9187, "step": 137 }, { "epoch": 0.04127411395244504, "grad_norm": 2.133892059326172, "learning_rate": 9.996786556589175e-06, "loss": 0.9527, "step": 138 }, { "epoch": 0.04157320173470914, "grad_norm": 2.163987874984741, "learning_rate": 9.996610529545685e-06, "loss": 0.9675, "step": 139 }, { "epoch": 0.041872289516973235, "grad_norm": 2.3938302993774414, "learning_rate": 9.996429810597421e-06, "loss": 0.9662, "step": 140 }, { "epoch": 0.042171377299237325, "grad_norm": 2.3496336936950684, "learning_rate": 9.996244399914083e-06, "loss": 0.9416, "step": 141 }, { "epoch": 0.04247046508150142, "grad_norm": 2.2904446125030518, "learning_rate": 9.99605429766977e-06, "loss": 0.9394, "step": 142 }, { "epoch": 0.04276955286376551, "grad_norm": 2.2339518070220947, "learning_rate": 9.995859504042994e-06, "loss": 0.8972, "step": 143 }, { "epoch": 0.04306864064602961, "grad_norm": 2.3489906787872314, "learning_rate": 9.99566001921667e-06, "loss": 0.9203, "step": 144 }, { "epoch": 0.04336772842829371, "grad_norm": 2.3359427452087402, "learning_rate": 9.995455843378118e-06, "loss": 0.954, "step": 145 }, { "epoch": 0.0436668162105578, "grad_norm": 2.14128041267395, "learning_rate": 9.995246976719063e-06, "loss": 0.9331, "step": 146 }, { "epoch": 0.043965903992821895, "grad_norm": 2.228297472000122, "learning_rate": 9.995033419435632e-06, "loss": 1.0393, "step": 147 }, { "epoch": 0.044264991775085985, "grad_norm": 2.2302234172821045, "learning_rate": 9.994815171728362e-06, "loss": 1.0035, "step": 148 }, { "epoch": 0.04456407955735008, "grad_norm": 2.0658860206604004, "learning_rate": 9.994592233802189e-06, "loss": 1.0109, "step": 149 }, { "epoch": 0.04486316733961418, "grad_norm": 2.324981212615967, "learning_rate": 9.994364605866455e-06, "loss": 0.9069, "step": 150 }, { "epoch": 0.04516225512187827, "grad_norm": 2.2505886554718018, "learning_rate": 9.99413228813491e-06, "loss": 0.9389, "step": 151 }, { "epoch": 0.04546134290414237, "grad_norm": 2.2258894443511963, "learning_rate": 9.993895280825702e-06, "loss": 0.9485, "step": 152 }, { "epoch": 0.04576043068640646, "grad_norm": 2.2290866374969482, "learning_rate": 9.993653584161387e-06, "loss": 0.9544, "step": 153 }, { "epoch": 0.046059518468670554, "grad_norm": 2.3490583896636963, "learning_rate": 9.993407198368918e-06, "loss": 0.9835, "step": 154 }, { "epoch": 0.04635860625093465, "grad_norm": 2.2370145320892334, "learning_rate": 9.993156123679662e-06, "loss": 0.8909, "step": 155 }, { "epoch": 0.04665769403319874, "grad_norm": 2.120981216430664, "learning_rate": 9.992900360329376e-06, "loss": 0.9373, "step": 156 }, { "epoch": 0.04695678181546284, "grad_norm": 2.1867294311523438, "learning_rate": 9.992639908558232e-06, "loss": 0.9186, "step": 157 }, { "epoch": 0.04725586959772693, "grad_norm": 2.0503461360931396, "learning_rate": 9.992374768610795e-06, "loss": 0.9255, "step": 158 }, { "epoch": 0.04755495737999103, "grad_norm": 1.9152251482009888, "learning_rate": 9.992104940736038e-06, "loss": 0.8708, "step": 159 }, { "epoch": 0.047854045162255124, "grad_norm": 2.078386068344116, "learning_rate": 9.991830425187333e-06, "loss": 0.9274, "step": 160 }, { "epoch": 0.048153132944519214, "grad_norm": 2.34214186668396, "learning_rate": 9.991551222222455e-06, "loss": 1.015, "step": 161 }, { "epoch": 0.04845222072678331, "grad_norm": 2.375265121459961, "learning_rate": 9.99126733210358e-06, "loss": 0.9032, "step": 162 }, { "epoch": 0.04875130850904741, "grad_norm": 2.3650543689727783, "learning_rate": 9.990978755097287e-06, "loss": 0.9462, "step": 163 }, { "epoch": 0.0490503962913115, "grad_norm": 1.8767156600952148, "learning_rate": 9.990685491474555e-06, "loss": 0.8861, "step": 164 }, { "epoch": 0.049349484073575596, "grad_norm": 2.1046345233917236, "learning_rate": 9.990387541510761e-06, "loss": 0.9221, "step": 165 }, { "epoch": 0.049648571855839686, "grad_norm": 2.031039237976074, "learning_rate": 9.990084905485689e-06, "loss": 0.909, "step": 166 }, { "epoch": 0.049947659638103784, "grad_norm": 2.94842791557312, "learning_rate": 9.989777583683517e-06, "loss": 0.9193, "step": 167 }, { "epoch": 0.05024674742036788, "grad_norm": 2.394653797149658, "learning_rate": 9.989465576392828e-06, "loss": 0.9041, "step": 168 }, { "epoch": 0.05054583520263197, "grad_norm": 2.0728888511657715, "learning_rate": 9.989148883906599e-06, "loss": 0.887, "step": 169 }, { "epoch": 0.05084492298489607, "grad_norm": 2.1062378883361816, "learning_rate": 9.988827506522211e-06, "loss": 0.9253, "step": 170 }, { "epoch": 0.05114401076716016, "grad_norm": 2.3772060871124268, "learning_rate": 9.988501444541445e-06, "loss": 0.9388, "step": 171 }, { "epoch": 0.051443098549424256, "grad_norm": 2.204463243484497, "learning_rate": 9.988170698270477e-06, "loss": 0.9043, "step": 172 }, { "epoch": 0.05174218633168835, "grad_norm": 2.0169548988342285, "learning_rate": 9.987835268019883e-06, "loss": 0.9123, "step": 173 }, { "epoch": 0.05204127411395244, "grad_norm": 2.152456760406494, "learning_rate": 9.98749515410464e-06, "loss": 0.9278, "step": 174 }, { "epoch": 0.05234036189621654, "grad_norm": 3.0098843574523926, "learning_rate": 9.987150356844118e-06, "loss": 0.9477, "step": 175 }, { "epoch": 0.05263944967848063, "grad_norm": 2.0842204093933105, "learning_rate": 9.98680087656209e-06, "loss": 0.9096, "step": 176 }, { "epoch": 0.05293853746074473, "grad_norm": 2.136688232421875, "learning_rate": 9.986446713586724e-06, "loss": 0.8548, "step": 177 }, { "epoch": 0.053237625243008825, "grad_norm": 2.4634604454040527, "learning_rate": 9.986087868250584e-06, "loss": 0.986, "step": 178 }, { "epoch": 0.053536713025272915, "grad_norm": 2.356006145477295, "learning_rate": 9.985724340890633e-06, "loss": 0.9686, "step": 179 }, { "epoch": 0.05383580080753701, "grad_norm": 2.1103944778442383, "learning_rate": 9.98535613184823e-06, "loss": 0.9751, "step": 180 }, { "epoch": 0.05413488858980111, "grad_norm": 2.153198719024658, "learning_rate": 9.984983241469129e-06, "loss": 0.9112, "step": 181 }, { "epoch": 0.0544339763720652, "grad_norm": 2.406641960144043, "learning_rate": 9.984605670103478e-06, "loss": 0.9284, "step": 182 }, { "epoch": 0.0547330641543293, "grad_norm": 2.202843427658081, "learning_rate": 9.98422341810583e-06, "loss": 0.9028, "step": 183 }, { "epoch": 0.05503215193659339, "grad_norm": 2.120081901550293, "learning_rate": 9.98383648583512e-06, "loss": 0.9147, "step": 184 }, { "epoch": 0.055331239718857485, "grad_norm": 2.4549012184143066, "learning_rate": 9.983444873654683e-06, "loss": 0.9184, "step": 185 }, { "epoch": 0.05563032750112158, "grad_norm": 2.2914867401123047, "learning_rate": 9.983048581932257e-06, "loss": 0.9128, "step": 186 }, { "epoch": 0.05592941528338567, "grad_norm": 2.092322587966919, "learning_rate": 9.982647611039961e-06, "loss": 0.8701, "step": 187 }, { "epoch": 0.05622850306564977, "grad_norm": 2.4194605350494385, "learning_rate": 9.982241961354317e-06, "loss": 0.8879, "step": 188 }, { "epoch": 0.05652759084791386, "grad_norm": 2.2804901599884033, "learning_rate": 9.981831633256236e-06, "loss": 0.9577, "step": 189 }, { "epoch": 0.05682667863017796, "grad_norm": 2.4391958713531494, "learning_rate": 9.981416627131022e-06, "loss": 0.9778, "step": 190 }, { "epoch": 0.057125766412442054, "grad_norm": 2.4129958152770996, "learning_rate": 9.980996943368373e-06, "loss": 0.8923, "step": 191 }, { "epoch": 0.057424854194706144, "grad_norm": 2.8176677227020264, "learning_rate": 9.98057258236238e-06, "loss": 0.9269, "step": 192 }, { "epoch": 0.05772394197697024, "grad_norm": 2.2508320808410645, "learning_rate": 9.980143544511527e-06, "loss": 0.9844, "step": 193 }, { "epoch": 0.05802302975923433, "grad_norm": 2.243612766265869, "learning_rate": 9.979709830218688e-06, "loss": 0.9216, "step": 194 }, { "epoch": 0.05832211754149843, "grad_norm": 2.1857690811157227, "learning_rate": 9.979271439891125e-06, "loss": 0.9295, "step": 195 }, { "epoch": 0.058621205323762526, "grad_norm": 2.0818893909454346, "learning_rate": 9.978828373940498e-06, "loss": 0.949, "step": 196 }, { "epoch": 0.05892029310602662, "grad_norm": 2.7867891788482666, "learning_rate": 9.97838063278285e-06, "loss": 0.8385, "step": 197 }, { "epoch": 0.059219380888290714, "grad_norm": 2.227369546890259, "learning_rate": 9.977928216838622e-06, "loss": 0.8791, "step": 198 }, { "epoch": 0.05951846867055481, "grad_norm": 2.0494680404663086, "learning_rate": 9.977471126532636e-06, "loss": 0.874, "step": 199 }, { "epoch": 0.0598175564528189, "grad_norm": 1.9946218729019165, "learning_rate": 9.97700936229411e-06, "loss": 0.9387, "step": 200 }, { "epoch": 0.060116644235083, "grad_norm": 2.202942132949829, "learning_rate": 9.976542924556652e-06, "loss": 0.9423, "step": 201 }, { "epoch": 0.06041573201734709, "grad_norm": 2.1691505908966064, "learning_rate": 9.976071813758249e-06, "loss": 1.0364, "step": 202 }, { "epoch": 0.060714819799611186, "grad_norm": 2.0566298961639404, "learning_rate": 9.975596030341287e-06, "loss": 0.8926, "step": 203 }, { "epoch": 0.06101390758187528, "grad_norm": 2.315225124359131, "learning_rate": 9.975115574752532e-06, "loss": 0.9161, "step": 204 }, { "epoch": 0.061312995364139374, "grad_norm": 2.377856731414795, "learning_rate": 9.974630447443142e-06, "loss": 0.9642, "step": 205 }, { "epoch": 0.06161208314640347, "grad_norm": 2.2431962490081787, "learning_rate": 9.974140648868659e-06, "loss": 1.0097, "step": 206 }, { "epoch": 0.06191117092866756, "grad_norm": 2.1251087188720703, "learning_rate": 9.973646179489014e-06, "loss": 0.928, "step": 207 }, { "epoch": 0.06221025871093166, "grad_norm": 2.2884161472320557, "learning_rate": 9.97314703976852e-06, "loss": 0.9527, "step": 208 }, { "epoch": 0.06250934649319576, "grad_norm": 2.5177602767944336, "learning_rate": 9.97264323017588e-06, "loss": 0.9114, "step": 209 }, { "epoch": 0.06280843427545985, "grad_norm": 2.1517698764801025, "learning_rate": 9.97213475118418e-06, "loss": 0.9304, "step": 210 }, { "epoch": 0.06310752205772394, "grad_norm": 2.450845956802368, "learning_rate": 9.971621603270887e-06, "loss": 0.9299, "step": 211 }, { "epoch": 0.06340660983998804, "grad_norm": 2.0499141216278076, "learning_rate": 9.971103786917862e-06, "loss": 0.9119, "step": 212 }, { "epoch": 0.06370569762225213, "grad_norm": 2.826937437057495, "learning_rate": 9.97058130261134e-06, "loss": 0.8891, "step": 213 }, { "epoch": 0.06400478540451622, "grad_norm": 2.5715081691741943, "learning_rate": 9.970054150841942e-06, "loss": 0.9736, "step": 214 }, { "epoch": 0.06430387318678032, "grad_norm": 2.0408411026000977, "learning_rate": 9.969522332104675e-06, "loss": 0.9454, "step": 215 }, { "epoch": 0.06460296096904442, "grad_norm": 2.1611015796661377, "learning_rate": 9.968985846898924e-06, "loss": 0.9509, "step": 216 }, { "epoch": 0.0649020487513085, "grad_norm": 2.7818005084991455, "learning_rate": 9.968444695728461e-06, "loss": 0.9095, "step": 217 }, { "epoch": 0.06520113653357261, "grad_norm": 2.1147513389587402, "learning_rate": 9.967898879101434e-06, "loss": 0.9402, "step": 218 }, { "epoch": 0.0655002243158367, "grad_norm": 1.981581211090088, "learning_rate": 9.967348397530373e-06, "loss": 0.9971, "step": 219 }, { "epoch": 0.06579931209810079, "grad_norm": 2.082632064819336, "learning_rate": 9.966793251532197e-06, "loss": 0.9392, "step": 220 }, { "epoch": 0.06609839988036488, "grad_norm": 1.78872549533844, "learning_rate": 9.966233441628188e-06, "loss": 0.8555, "step": 221 }, { "epoch": 0.06639748766262898, "grad_norm": 2.2452573776245117, "learning_rate": 9.965668968344023e-06, "loss": 0.9192, "step": 222 }, { "epoch": 0.06669657544489307, "grad_norm": 2.1898322105407715, "learning_rate": 9.965099832209753e-06, "loss": 0.9183, "step": 223 }, { "epoch": 0.06699566322715717, "grad_norm": 2.35648775100708, "learning_rate": 9.964526033759803e-06, "loss": 0.9281, "step": 224 }, { "epoch": 0.06729475100942127, "grad_norm": 2.0320119857788086, "learning_rate": 9.963947573532983e-06, "loss": 0.9889, "step": 225 }, { "epoch": 0.06759383879168536, "grad_norm": 1.8740633726119995, "learning_rate": 9.963364452072475e-06, "loss": 0.8873, "step": 226 }, { "epoch": 0.06789292657394945, "grad_norm": 2.160003185272217, "learning_rate": 9.962776669925842e-06, "loss": 0.9334, "step": 227 }, { "epoch": 0.06819201435621355, "grad_norm": 1.975968599319458, "learning_rate": 9.962184227645021e-06, "loss": 0.9182, "step": 228 }, { "epoch": 0.06849110213847764, "grad_norm": 2.3422555923461914, "learning_rate": 9.961587125786328e-06, "loss": 0.9313, "step": 229 }, { "epoch": 0.06879018992074173, "grad_norm": 2.3071324825286865, "learning_rate": 9.960985364910448e-06, "loss": 0.9021, "step": 230 }, { "epoch": 0.06908927770300584, "grad_norm": 2.214460611343384, "learning_rate": 9.960378945582446e-06, "loss": 0.9364, "step": 231 }, { "epoch": 0.06938836548526993, "grad_norm": 2.1151297092437744, "learning_rate": 9.959767868371761e-06, "loss": 0.9116, "step": 232 }, { "epoch": 0.06968745326753402, "grad_norm": 2.335604667663574, "learning_rate": 9.959152133852209e-06, "loss": 0.9874, "step": 233 }, { "epoch": 0.06998654104979811, "grad_norm": 2.126277208328247, "learning_rate": 9.958531742601968e-06, "loss": 0.9191, "step": 234 }, { "epoch": 0.07028562883206221, "grad_norm": 2.0077273845672607, "learning_rate": 9.9579066952036e-06, "loss": 0.9519, "step": 235 }, { "epoch": 0.0705847166143263, "grad_norm": 2.171224594116211, "learning_rate": 9.957276992244039e-06, "loss": 0.9316, "step": 236 }, { "epoch": 0.0708838043965904, "grad_norm": 2.0877857208251953, "learning_rate": 9.956642634314582e-06, "loss": 0.9619, "step": 237 }, { "epoch": 0.0711828921788545, "grad_norm": 2.060274839401245, "learning_rate": 9.956003622010904e-06, "loss": 0.8542, "step": 238 }, { "epoch": 0.07148197996111859, "grad_norm": 2.165142774581909, "learning_rate": 9.955359955933048e-06, "loss": 0.881, "step": 239 }, { "epoch": 0.07178106774338268, "grad_norm": 2.4828040599823, "learning_rate": 9.95471163668543e-06, "loss": 0.9253, "step": 240 }, { "epoch": 0.07208015552564678, "grad_norm": 2.0746188163757324, "learning_rate": 9.954058664876832e-06, "loss": 0.8952, "step": 241 }, { "epoch": 0.07237924330791087, "grad_norm": 2.144286870956421, "learning_rate": 9.953401041120403e-06, "loss": 0.92, "step": 242 }, { "epoch": 0.07267833109017496, "grad_norm": 2.168682336807251, "learning_rate": 9.952738766033668e-06, "loss": 0.9242, "step": 243 }, { "epoch": 0.07297741887243907, "grad_norm": 2.3162496089935303, "learning_rate": 9.952071840238511e-06, "loss": 0.8911, "step": 244 }, { "epoch": 0.07327650665470316, "grad_norm": 2.0696685314178467, "learning_rate": 9.951400264361188e-06, "loss": 0.9631, "step": 245 }, { "epoch": 0.07357559443696725, "grad_norm": 2.1954448223114014, "learning_rate": 9.950724039032324e-06, "loss": 0.9579, "step": 246 }, { "epoch": 0.07387468221923134, "grad_norm": 2.266998529434204, "learning_rate": 9.950043164886902e-06, "loss": 0.9474, "step": 247 }, { "epoch": 0.07417377000149544, "grad_norm": 2.154869556427002, "learning_rate": 9.949357642564275e-06, "loss": 0.9433, "step": 248 }, { "epoch": 0.07447285778375953, "grad_norm": 2.0296430587768555, "learning_rate": 9.948667472708163e-06, "loss": 0.8807, "step": 249 }, { "epoch": 0.07477194556602362, "grad_norm": 2.4973790645599365, "learning_rate": 9.947972655966647e-06, "loss": 0.963, "step": 250 }, { "epoch": 0.07507103334828773, "grad_norm": 2.1504342555999756, "learning_rate": 9.947273192992171e-06, "loss": 0.9579, "step": 251 }, { "epoch": 0.07537012113055182, "grad_norm": 2.368147850036621, "learning_rate": 9.946569084441542e-06, "loss": 0.9696, "step": 252 }, { "epoch": 0.07566920891281591, "grad_norm": 2.0058436393737793, "learning_rate": 9.945860330975933e-06, "loss": 0.927, "step": 253 }, { "epoch": 0.07596829669508001, "grad_norm": 2.1533658504486084, "learning_rate": 9.945146933260876e-06, "loss": 0.9487, "step": 254 }, { "epoch": 0.0762673844773441, "grad_norm": 3.0010688304901123, "learning_rate": 9.94442889196626e-06, "loss": 0.8904, "step": 255 }, { "epoch": 0.07656647225960819, "grad_norm": 2.3670761585235596, "learning_rate": 9.94370620776634e-06, "loss": 0.9388, "step": 256 }, { "epoch": 0.07686556004187228, "grad_norm": 2.0407774448394775, "learning_rate": 9.942978881339732e-06, "loss": 0.9218, "step": 257 }, { "epoch": 0.07716464782413639, "grad_norm": 1.8988912105560303, "learning_rate": 9.942246913369409e-06, "loss": 0.8613, "step": 258 }, { "epoch": 0.07746373560640048, "grad_norm": 2.8930153846740723, "learning_rate": 9.941510304542695e-06, "loss": 0.932, "step": 259 }, { "epoch": 0.07776282338866457, "grad_norm": 2.1022799015045166, "learning_rate": 9.940769055551284e-06, "loss": 0.939, "step": 260 }, { "epoch": 0.07806191117092867, "grad_norm": 2.214641809463501, "learning_rate": 9.940023167091219e-06, "loss": 0.9714, "step": 261 }, { "epoch": 0.07836099895319276, "grad_norm": 2.633821487426758, "learning_rate": 9.939272639862905e-06, "loss": 0.9268, "step": 262 }, { "epoch": 0.07866008673545685, "grad_norm": 1.9098597764968872, "learning_rate": 9.9385174745711e-06, "loss": 0.8925, "step": 263 }, { "epoch": 0.07895917451772096, "grad_norm": 2.169691324234009, "learning_rate": 9.937757671924915e-06, "loss": 0.8804, "step": 264 }, { "epoch": 0.07925826229998505, "grad_norm": 1.957419991493225, "learning_rate": 9.936993232637818e-06, "loss": 0.9201, "step": 265 }, { "epoch": 0.07955735008224914, "grad_norm": 2.055464744567871, "learning_rate": 9.936224157427635e-06, "loss": 0.952, "step": 266 }, { "epoch": 0.07985643786451324, "grad_norm": 2.393570899963379, "learning_rate": 9.935450447016535e-06, "loss": 0.9154, "step": 267 }, { "epoch": 0.08015552564677733, "grad_norm": 2.075162887573242, "learning_rate": 9.934672102131052e-06, "loss": 0.9092, "step": 268 }, { "epoch": 0.08045461342904142, "grad_norm": 2.192011833190918, "learning_rate": 9.933889123502059e-06, "loss": 0.9208, "step": 269 }, { "epoch": 0.08075370121130551, "grad_norm": 2.3772759437561035, "learning_rate": 9.933101511864793e-06, "loss": 0.8847, "step": 270 }, { "epoch": 0.08105278899356962, "grad_norm": 2.3840949535369873, "learning_rate": 9.93230926795883e-06, "loss": 0.9372, "step": 271 }, { "epoch": 0.0813518767758337, "grad_norm": 2.1318485736846924, "learning_rate": 9.931512392528104e-06, "loss": 0.9101, "step": 272 }, { "epoch": 0.0816509645580978, "grad_norm": 1.9969408512115479, "learning_rate": 9.930710886320895e-06, "loss": 0.9651, "step": 273 }, { "epoch": 0.0819500523403619, "grad_norm": 2.073784828186035, "learning_rate": 9.929904750089829e-06, "loss": 0.9077, "step": 274 }, { "epoch": 0.08224914012262599, "grad_norm": 2.5252487659454346, "learning_rate": 9.929093984591884e-06, "loss": 0.9338, "step": 275 }, { "epoch": 0.08254822790489008, "grad_norm": 2.1214280128479004, "learning_rate": 9.928278590588382e-06, "loss": 0.8427, "step": 276 }, { "epoch": 0.08284731568715419, "grad_norm": 2.237415075302124, "learning_rate": 9.927458568844994e-06, "loss": 0.8773, "step": 277 }, { "epoch": 0.08314640346941828, "grad_norm": 2.072422504425049, "learning_rate": 9.926633920131732e-06, "loss": 0.9017, "step": 278 }, { "epoch": 0.08344549125168237, "grad_norm": 2.1608200073242188, "learning_rate": 9.925804645222957e-06, "loss": 0.9885, "step": 279 }, { "epoch": 0.08374457903394647, "grad_norm": 1.961508870124817, "learning_rate": 9.924970744897373e-06, "loss": 0.92, "step": 280 }, { "epoch": 0.08404366681621056, "grad_norm": 2.466007947921753, "learning_rate": 9.924132219938027e-06, "loss": 0.8882, "step": 281 }, { "epoch": 0.08434275459847465, "grad_norm": 2.1545567512512207, "learning_rate": 9.923289071132308e-06, "loss": 0.9652, "step": 282 }, { "epoch": 0.08464184238073874, "grad_norm": 2.409891366958618, "learning_rate": 9.922441299271948e-06, "loss": 0.8491, "step": 283 }, { "epoch": 0.08494093016300285, "grad_norm": 2.2108309268951416, "learning_rate": 9.92158890515302e-06, "loss": 0.935, "step": 284 }, { "epoch": 0.08524001794526694, "grad_norm": 2.7062766551971436, "learning_rate": 9.920731889575935e-06, "loss": 0.9462, "step": 285 }, { "epoch": 0.08553910572753103, "grad_norm": 2.172302007675171, "learning_rate": 9.919870253345446e-06, "loss": 0.9105, "step": 286 }, { "epoch": 0.08583819350979513, "grad_norm": 2.262796401977539, "learning_rate": 9.919003997270648e-06, "loss": 0.9092, "step": 287 }, { "epoch": 0.08613728129205922, "grad_norm": 2.2685511112213135, "learning_rate": 9.918133122164968e-06, "loss": 0.9573, "step": 288 }, { "epoch": 0.08643636907432331, "grad_norm": 2.016366481781006, "learning_rate": 9.917257628846172e-06, "loss": 0.92, "step": 289 }, { "epoch": 0.08673545685658741, "grad_norm": 1.888378620147705, "learning_rate": 9.916377518136367e-06, "loss": 0.8967, "step": 290 }, { "epoch": 0.0870345446388515, "grad_norm": 2.3834683895111084, "learning_rate": 9.915492790861986e-06, "loss": 0.9814, "step": 291 }, { "epoch": 0.0873336324211156, "grad_norm": 1.9368470907211304, "learning_rate": 9.91460344785381e-06, "loss": 0.941, "step": 292 }, { "epoch": 0.08763272020337969, "grad_norm": 2.1432392597198486, "learning_rate": 9.913709489946946e-06, "loss": 0.9651, "step": 293 }, { "epoch": 0.08793180798564379, "grad_norm": 2.1161293983459473, "learning_rate": 9.912810917980834e-06, "loss": 0.9233, "step": 294 }, { "epoch": 0.08823089576790788, "grad_norm": 1.984200119972229, "learning_rate": 9.911907732799251e-06, "loss": 0.9029, "step": 295 }, { "epoch": 0.08852998355017197, "grad_norm": 1.909569263458252, "learning_rate": 9.910999935250302e-06, "loss": 0.9403, "step": 296 }, { "epoch": 0.08882907133243607, "grad_norm": 2.0528907775878906, "learning_rate": 9.910087526186424e-06, "loss": 0.951, "step": 297 }, { "epoch": 0.08912815911470016, "grad_norm": 2.0173420906066895, "learning_rate": 9.909170506464389e-06, "loss": 0.9095, "step": 298 }, { "epoch": 0.08942724689696425, "grad_norm": 1.9984447956085205, "learning_rate": 9.908248876945291e-06, "loss": 0.9653, "step": 299 }, { "epoch": 0.08972633467922836, "grad_norm": 1.9002392292022705, "learning_rate": 9.907322638494558e-06, "loss": 0.8794, "step": 300 }, { "epoch": 0.09002542246149245, "grad_norm": 2.2681002616882324, "learning_rate": 9.906391791981944e-06, "loss": 0.9507, "step": 301 }, { "epoch": 0.09032451024375654, "grad_norm": 2.1023566722869873, "learning_rate": 9.90545633828153e-06, "loss": 0.9478, "step": 302 }, { "epoch": 0.09062359802602064, "grad_norm": 2.0600366592407227, "learning_rate": 9.904516278271721e-06, "loss": 0.868, "step": 303 }, { "epoch": 0.09092268580828473, "grad_norm": 2.0907702445983887, "learning_rate": 9.903571612835254e-06, "loss": 0.8949, "step": 304 }, { "epoch": 0.09122177359054882, "grad_norm": 2.4338979721069336, "learning_rate": 9.902622342859183e-06, "loss": 0.8888, "step": 305 }, { "epoch": 0.09152086137281291, "grad_norm": 1.931557536125183, "learning_rate": 9.901668469234892e-06, "loss": 0.9222, "step": 306 }, { "epoch": 0.09181994915507702, "grad_norm": 2.2555723190307617, "learning_rate": 9.900709992858083e-06, "loss": 0.8826, "step": 307 }, { "epoch": 0.09211903693734111, "grad_norm": 1.951943039894104, "learning_rate": 9.899746914628782e-06, "loss": 0.9395, "step": 308 }, { "epoch": 0.0924181247196052, "grad_norm": 2.0007481575012207, "learning_rate": 9.898779235451337e-06, "loss": 0.9486, "step": 309 }, { "epoch": 0.0927172125018693, "grad_norm": 2.092543840408325, "learning_rate": 9.897806956234417e-06, "loss": 0.8984, "step": 310 }, { "epoch": 0.0930163002841334, "grad_norm": 2.1782257556915283, "learning_rate": 9.896830077891007e-06, "loss": 0.9135, "step": 311 }, { "epoch": 0.09331538806639748, "grad_norm": 2.0207417011260986, "learning_rate": 9.895848601338414e-06, "loss": 0.9148, "step": 312 }, { "epoch": 0.09361447584866159, "grad_norm": 2.085836887359619, "learning_rate": 9.894862527498259e-06, "loss": 0.9336, "step": 313 }, { "epoch": 0.09391356363092568, "grad_norm": 2.2071681022644043, "learning_rate": 9.893871857296487e-06, "loss": 0.9279, "step": 314 }, { "epoch": 0.09421265141318977, "grad_norm": 2.1861345767974854, "learning_rate": 9.892876591663355e-06, "loss": 0.9395, "step": 315 }, { "epoch": 0.09451173919545386, "grad_norm": 2.0706663131713867, "learning_rate": 9.891876731533429e-06, "loss": 0.9329, "step": 316 }, { "epoch": 0.09481082697771796, "grad_norm": 2.1073272228240967, "learning_rate": 9.8908722778456e-06, "loss": 0.9127, "step": 317 }, { "epoch": 0.09510991475998205, "grad_norm": 2.2256219387054443, "learning_rate": 9.889863231543065e-06, "loss": 0.8838, "step": 318 }, { "epoch": 0.09540900254224614, "grad_norm": 2.5397820472717285, "learning_rate": 9.888849593573339e-06, "loss": 0.9267, "step": 319 }, { "epoch": 0.09570809032451025, "grad_norm": 2.0987069606781006, "learning_rate": 9.887831364888243e-06, "loss": 0.9214, "step": 320 }, { "epoch": 0.09600717810677434, "grad_norm": 2.827425718307495, "learning_rate": 9.886808546443914e-06, "loss": 0.8989, "step": 321 }, { "epoch": 0.09630626588903843, "grad_norm": 1.9034327268600464, "learning_rate": 9.885781139200794e-06, "loss": 0.977, "step": 322 }, { "epoch": 0.09660535367130253, "grad_norm": 1.9976065158843994, "learning_rate": 9.88474914412364e-06, "loss": 0.911, "step": 323 }, { "epoch": 0.09690444145356662, "grad_norm": 1.957942247390747, "learning_rate": 9.88371256218151e-06, "loss": 0.8722, "step": 324 }, { "epoch": 0.09720352923583071, "grad_norm": 1.9988555908203125, "learning_rate": 9.882671394347771e-06, "loss": 0.9498, "step": 325 }, { "epoch": 0.09750261701809482, "grad_norm": 2.0863683223724365, "learning_rate": 9.881625641600104e-06, "loss": 0.9531, "step": 326 }, { "epoch": 0.09780170480035891, "grad_norm": 2.006538152694702, "learning_rate": 9.880575304920484e-06, "loss": 0.924, "step": 327 }, { "epoch": 0.098100792582623, "grad_norm": 2.1244921684265137, "learning_rate": 9.879520385295197e-06, "loss": 0.8851, "step": 328 }, { "epoch": 0.09839988036488709, "grad_norm": 1.9595825672149658, "learning_rate": 9.878460883714831e-06, "loss": 0.9317, "step": 329 }, { "epoch": 0.09869896814715119, "grad_norm": 1.9879025220870972, "learning_rate": 9.877396801174277e-06, "loss": 0.8974, "step": 330 }, { "epoch": 0.09899805592941528, "grad_norm": 10.351932525634766, "learning_rate": 9.876328138672726e-06, "loss": 0.9329, "step": 331 }, { "epoch": 0.09929714371167937, "grad_norm": 2.1588828563690186, "learning_rate": 9.875254897213674e-06, "loss": 0.8648, "step": 332 }, { "epoch": 0.09959623149394348, "grad_norm": 1.8147989511489868, "learning_rate": 9.87417707780491e-06, "loss": 0.9022, "step": 333 }, { "epoch": 0.09989531927620757, "grad_norm": 2.3252570629119873, "learning_rate": 9.873094681458525e-06, "loss": 0.9232, "step": 334 }, { "epoch": 0.10019440705847166, "grad_norm": 2.3997271060943604, "learning_rate": 9.87200770919091e-06, "loss": 0.944, "step": 335 }, { "epoch": 0.10049349484073576, "grad_norm": 2.1597177982330322, "learning_rate": 9.870916162022752e-06, "loss": 0.965, "step": 336 }, { "epoch": 0.10079258262299985, "grad_norm": 2.1371512413024902, "learning_rate": 9.86982004097903e-06, "loss": 0.912, "step": 337 }, { "epoch": 0.10109167040526394, "grad_norm": 2.486417293548584, "learning_rate": 9.868719347089024e-06, "loss": 0.9368, "step": 338 }, { "epoch": 0.10139075818752805, "grad_norm": 1.980965256690979, "learning_rate": 9.867614081386302e-06, "loss": 0.9269, "step": 339 }, { "epoch": 0.10168984596979214, "grad_norm": 2.553180694580078, "learning_rate": 9.866504244908728e-06, "loss": 0.9256, "step": 340 }, { "epoch": 0.10198893375205623, "grad_norm": 2.0243916511535645, "learning_rate": 9.86538983869846e-06, "loss": 0.8743, "step": 341 }, { "epoch": 0.10228802153432032, "grad_norm": 2.3391964435577393, "learning_rate": 9.864270863801944e-06, "loss": 0.963, "step": 342 }, { "epoch": 0.10258710931658442, "grad_norm": 2.0203258991241455, "learning_rate": 9.863147321269918e-06, "loss": 0.9536, "step": 343 }, { "epoch": 0.10288619709884851, "grad_norm": 2.2075395584106445, "learning_rate": 9.862019212157406e-06, "loss": 0.9265, "step": 344 }, { "epoch": 0.1031852848811126, "grad_norm": 2.0447275638580322, "learning_rate": 9.860886537523721e-06, "loss": 0.9179, "step": 345 }, { "epoch": 0.1034843726633767, "grad_norm": 2.0379741191864014, "learning_rate": 9.859749298432468e-06, "loss": 0.9286, "step": 346 }, { "epoch": 0.1037834604456408, "grad_norm": 3.1145975589752197, "learning_rate": 9.858607495951534e-06, "loss": 0.9188, "step": 347 }, { "epoch": 0.10408254822790489, "grad_norm": 2.072021007537842, "learning_rate": 9.857461131153089e-06, "loss": 0.9323, "step": 348 }, { "epoch": 0.10438163601016899, "grad_norm": 1.9778414964675903, "learning_rate": 9.856310205113594e-06, "loss": 0.9436, "step": 349 }, { "epoch": 0.10468072379243308, "grad_norm": 1.9100221395492554, "learning_rate": 9.855154718913782e-06, "loss": 0.9195, "step": 350 }, { "epoch": 0.10497981157469717, "grad_norm": 2.101713180541992, "learning_rate": 9.853994673638679e-06, "loss": 0.89, "step": 351 }, { "epoch": 0.10527889935696126, "grad_norm": 2.0407423973083496, "learning_rate": 9.852830070377588e-06, "loss": 0.9182, "step": 352 }, { "epoch": 0.10557798713922537, "grad_norm": 1.9442716836929321, "learning_rate": 9.851660910224092e-06, "loss": 0.9226, "step": 353 }, { "epoch": 0.10587707492148946, "grad_norm": 2.2376959323883057, "learning_rate": 9.85048719427605e-06, "loss": 0.8752, "step": 354 }, { "epoch": 0.10617616270375355, "grad_norm": 2.2486202716827393, "learning_rate": 9.849308923635606e-06, "loss": 0.9804, "step": 355 }, { "epoch": 0.10647525048601765, "grad_norm": 2.205756902694702, "learning_rate": 9.848126099409175e-06, "loss": 1.0002, "step": 356 }, { "epoch": 0.10677433826828174, "grad_norm": 2.0094363689422607, "learning_rate": 9.846938722707446e-06, "loss": 0.8215, "step": 357 }, { "epoch": 0.10707342605054583, "grad_norm": 1.9633045196533203, "learning_rate": 9.845746794645393e-06, "loss": 0.9272, "step": 358 }, { "epoch": 0.10737251383280993, "grad_norm": 2.097144365310669, "learning_rate": 9.844550316342252e-06, "loss": 0.9626, "step": 359 }, { "epoch": 0.10767160161507403, "grad_norm": 2.0629072189331055, "learning_rate": 9.843349288921543e-06, "loss": 0.9262, "step": 360 }, { "epoch": 0.10797068939733812, "grad_norm": 1.9655308723449707, "learning_rate": 9.842143713511044e-06, "loss": 0.9345, "step": 361 }, { "epoch": 0.10826977717960222, "grad_norm": 2.3182992935180664, "learning_rate": 9.840933591242817e-06, "loss": 0.9186, "step": 362 }, { "epoch": 0.10856886496186631, "grad_norm": 1.893142819404602, "learning_rate": 9.839718923253186e-06, "loss": 0.9105, "step": 363 }, { "epoch": 0.1088679527441304, "grad_norm": 2.0374596118927, "learning_rate": 9.838499710682745e-06, "loss": 0.9146, "step": 364 }, { "epoch": 0.10916704052639449, "grad_norm": 2.150848150253296, "learning_rate": 9.837275954676357e-06, "loss": 0.8932, "step": 365 }, { "epoch": 0.1094661283086586, "grad_norm": 2.1626720428466797, "learning_rate": 9.836047656383152e-06, "loss": 0.911, "step": 366 }, { "epoch": 0.10976521609092268, "grad_norm": 2.082512140274048, "learning_rate": 9.834814816956521e-06, "loss": 0.8453, "step": 367 }, { "epoch": 0.11006430387318678, "grad_norm": 2.1975317001342773, "learning_rate": 9.833577437554121e-06, "loss": 0.8756, "step": 368 }, { "epoch": 0.11036339165545088, "grad_norm": 2.156377077102661, "learning_rate": 9.832335519337877e-06, "loss": 0.9661, "step": 369 }, { "epoch": 0.11066247943771497, "grad_norm": 1.9676083326339722, "learning_rate": 9.831089063473967e-06, "loss": 0.8855, "step": 370 }, { "epoch": 0.11096156721997906, "grad_norm": 2.4126598834991455, "learning_rate": 9.82983807113284e-06, "loss": 0.8946, "step": 371 }, { "epoch": 0.11126065500224316, "grad_norm": 2.4758460521698, "learning_rate": 9.828582543489194e-06, "loss": 0.9298, "step": 372 }, { "epoch": 0.11155974278450725, "grad_norm": 2.313663959503174, "learning_rate": 9.827322481721998e-06, "loss": 0.9907, "step": 373 }, { "epoch": 0.11185883056677134, "grad_norm": 1.9861655235290527, "learning_rate": 9.826057887014466e-06, "loss": 0.8911, "step": 374 }, { "epoch": 0.11215791834903545, "grad_norm": 2.2540431022644043, "learning_rate": 9.824788760554078e-06, "loss": 0.9425, "step": 375 }, { "epoch": 0.11245700613129954, "grad_norm": 2.0488195419311523, "learning_rate": 9.823515103532564e-06, "loss": 0.8807, "step": 376 }, { "epoch": 0.11275609391356363, "grad_norm": 2.3662540912628174, "learning_rate": 9.822236917145914e-06, "loss": 0.8759, "step": 377 }, { "epoch": 0.11305518169582772, "grad_norm": 2.3137526512145996, "learning_rate": 9.820954202594362e-06, "loss": 1.0326, "step": 378 }, { "epoch": 0.11335426947809182, "grad_norm": 2.2613751888275146, "learning_rate": 9.819666961082402e-06, "loss": 0.9044, "step": 379 }, { "epoch": 0.11365335726035591, "grad_norm": 2.1440529823303223, "learning_rate": 9.81837519381878e-06, "loss": 0.9281, "step": 380 }, { "epoch": 0.11395244504262, "grad_norm": 2.043168783187866, "learning_rate": 9.817078902016481e-06, "loss": 0.9878, "step": 381 }, { "epoch": 0.11425153282488411, "grad_norm": 1.9130514860153198, "learning_rate": 9.81577808689275e-06, "loss": 0.8108, "step": 382 }, { "epoch": 0.1145506206071482, "grad_norm": 2.1827640533447266, "learning_rate": 9.814472749669076e-06, "loss": 0.9171, "step": 383 }, { "epoch": 0.11484970838941229, "grad_norm": 2.5719187259674072, "learning_rate": 9.813162891571189e-06, "loss": 0.959, "step": 384 }, { "epoch": 0.11514879617167639, "grad_norm": 2.055058002471924, "learning_rate": 9.811848513829074e-06, "loss": 0.9112, "step": 385 }, { "epoch": 0.11544788395394048, "grad_norm": 2.073772668838501, "learning_rate": 9.810529617676952e-06, "loss": 0.9119, "step": 386 }, { "epoch": 0.11574697173620457, "grad_norm": 2.434068202972412, "learning_rate": 9.809206204353289e-06, "loss": 0.9187, "step": 387 }, { "epoch": 0.11604605951846866, "grad_norm": 1.9965574741363525, "learning_rate": 9.807878275100795e-06, "loss": 0.8995, "step": 388 }, { "epoch": 0.11634514730073277, "grad_norm": 2.3138413429260254, "learning_rate": 9.806545831166417e-06, "loss": 0.8858, "step": 389 }, { "epoch": 0.11664423508299686, "grad_norm": 2.789655923843384, "learning_rate": 9.805208873801346e-06, "loss": 0.9162, "step": 390 }, { "epoch": 0.11694332286526095, "grad_norm": 2.394583225250244, "learning_rate": 9.803867404261005e-06, "loss": 0.9331, "step": 391 }, { "epoch": 0.11724241064752505, "grad_norm": 2.4023385047912598, "learning_rate": 9.80252142380506e-06, "loss": 0.8203, "step": 392 }, { "epoch": 0.11754149842978914, "grad_norm": 2.34248685836792, "learning_rate": 9.80117093369741e-06, "loss": 0.8744, "step": 393 }, { "epoch": 0.11784058621205323, "grad_norm": 2.501880407333374, "learning_rate": 9.799815935206187e-06, "loss": 0.9569, "step": 394 }, { "epoch": 0.11813967399431734, "grad_norm": 1.9259960651397705, "learning_rate": 9.798456429603758e-06, "loss": 0.948, "step": 395 }, { "epoch": 0.11843876177658143, "grad_norm": 2.2183659076690674, "learning_rate": 9.797092418166725e-06, "loss": 0.8874, "step": 396 }, { "epoch": 0.11873784955884552, "grad_norm": 1.9987000226974487, "learning_rate": 9.795723902175918e-06, "loss": 0.9344, "step": 397 }, { "epoch": 0.11903693734110962, "grad_norm": 2.164144992828369, "learning_rate": 9.794350882916397e-06, "loss": 0.9386, "step": 398 }, { "epoch": 0.11933602512337371, "grad_norm": 1.9408130645751953, "learning_rate": 9.79297336167745e-06, "loss": 0.973, "step": 399 }, { "epoch": 0.1196351129056378, "grad_norm": 2.1518783569335938, "learning_rate": 9.791591339752596e-06, "loss": 0.9229, "step": 400 }, { "epoch": 0.11993420068790189, "grad_norm": 2.314598798751831, "learning_rate": 9.790204818439576e-06, "loss": 0.9118, "step": 401 }, { "epoch": 0.120233288470166, "grad_norm": 2.3080222606658936, "learning_rate": 9.788813799040358e-06, "loss": 0.9181, "step": 402 }, { "epoch": 0.12053237625243009, "grad_norm": 2.527000904083252, "learning_rate": 9.787418282861135e-06, "loss": 0.9358, "step": 403 }, { "epoch": 0.12083146403469418, "grad_norm": 2.4284911155700684, "learning_rate": 9.786018271212318e-06, "loss": 0.929, "step": 404 }, { "epoch": 0.12113055181695828, "grad_norm": 2.389965295791626, "learning_rate": 9.784613765408546e-06, "loss": 0.9229, "step": 405 }, { "epoch": 0.12142963959922237, "grad_norm": 2.519448757171631, "learning_rate": 9.783204766768672e-06, "loss": 0.9066, "step": 406 }, { "epoch": 0.12172872738148646, "grad_norm": 2.4015398025512695, "learning_rate": 9.781791276615774e-06, "loss": 0.9887, "step": 407 }, { "epoch": 0.12202781516375057, "grad_norm": 2.1792118549346924, "learning_rate": 9.780373296277137e-06, "loss": 0.8932, "step": 408 }, { "epoch": 0.12232690294601466, "grad_norm": 2.125664472579956, "learning_rate": 9.778950827084277e-06, "loss": 0.8232, "step": 409 }, { "epoch": 0.12262599072827875, "grad_norm": 1.7966945171356201, "learning_rate": 9.777523870372913e-06, "loss": 0.8866, "step": 410 }, { "epoch": 0.12292507851054285, "grad_norm": 2.1995155811309814, "learning_rate": 9.776092427482984e-06, "loss": 0.9495, "step": 411 }, { "epoch": 0.12322416629280694, "grad_norm": 2.0255017280578613, "learning_rate": 9.774656499758639e-06, "loss": 0.829, "step": 412 }, { "epoch": 0.12352325407507103, "grad_norm": 2.345156192779541, "learning_rate": 9.77321608854824e-06, "loss": 1.0094, "step": 413 }, { "epoch": 0.12382234185733512, "grad_norm": 1.9600709676742554, "learning_rate": 9.771771195204358e-06, "loss": 0.9223, "step": 414 }, { "epoch": 0.12412142963959923, "grad_norm": 2.002962350845337, "learning_rate": 9.770321821083774e-06, "loss": 0.8854, "step": 415 }, { "epoch": 0.12442051742186332, "grad_norm": 2.1382484436035156, "learning_rate": 9.768867967547472e-06, "loss": 0.9525, "step": 416 }, { "epoch": 0.1247196052041274, "grad_norm": 2.204545736312866, "learning_rate": 9.767409635960653e-06, "loss": 0.929, "step": 417 }, { "epoch": 0.1250186929863915, "grad_norm": 2.369882583618164, "learning_rate": 9.76594682769271e-06, "loss": 0.9076, "step": 418 }, { "epoch": 0.1253177807686556, "grad_norm": 1.8136152029037476, "learning_rate": 9.764479544117247e-06, "loss": 0.8654, "step": 419 }, { "epoch": 0.1256168685509197, "grad_norm": 1.985381841659546, "learning_rate": 9.76300778661207e-06, "loss": 0.917, "step": 420 }, { "epoch": 0.12591595633318378, "grad_norm": 2.0894107818603516, "learning_rate": 9.761531556559183e-06, "loss": 0.9042, "step": 421 }, { "epoch": 0.12621504411544787, "grad_norm": 3.225477933883667, "learning_rate": 9.760050855344795e-06, "loss": 0.8835, "step": 422 }, { "epoch": 0.126514131897712, "grad_norm": 2.0234105587005615, "learning_rate": 9.758565684359307e-06, "loss": 0.9486, "step": 423 }, { "epoch": 0.12681321967997608, "grad_norm": 2.0387184619903564, "learning_rate": 9.757076044997324e-06, "loss": 0.9653, "step": 424 }, { "epoch": 0.12711230746224017, "grad_norm": 2.5875580310821533, "learning_rate": 9.75558193865764e-06, "loss": 0.884, "step": 425 }, { "epoch": 0.12741139524450426, "grad_norm": 1.8660826683044434, "learning_rate": 9.754083366743249e-06, "loss": 0.8559, "step": 426 }, { "epoch": 0.12771048302676835, "grad_norm": 2.215099334716797, "learning_rate": 9.752580330661336e-06, "loss": 0.8741, "step": 427 }, { "epoch": 0.12800957080903244, "grad_norm": 1.8538357019424438, "learning_rate": 9.751072831823279e-06, "loss": 0.8119, "step": 428 }, { "epoch": 0.12830865859129656, "grad_norm": 2.0378899574279785, "learning_rate": 9.749560871644643e-06, "loss": 0.8897, "step": 429 }, { "epoch": 0.12860774637356065, "grad_norm": 2.5482470989227295, "learning_rate": 9.748044451545188e-06, "loss": 0.9485, "step": 430 }, { "epoch": 0.12890683415582474, "grad_norm": 2.2782816886901855, "learning_rate": 9.746523572948857e-06, "loss": 0.8913, "step": 431 }, { "epoch": 0.12920592193808883, "grad_norm": 2.7472846508026123, "learning_rate": 9.744998237283785e-06, "loss": 0.9277, "step": 432 }, { "epoch": 0.12950500972035292, "grad_norm": 2.572721004486084, "learning_rate": 9.743468445982284e-06, "loss": 0.9103, "step": 433 }, { "epoch": 0.129804097502617, "grad_norm": 2.1014654636383057, "learning_rate": 9.741934200480857e-06, "loss": 0.9065, "step": 434 }, { "epoch": 0.1301031852848811, "grad_norm": 2.0371885299682617, "learning_rate": 9.740395502220192e-06, "loss": 0.9803, "step": 435 }, { "epoch": 0.13040227306714522, "grad_norm": 2.1743862628936768, "learning_rate": 9.738852352645145e-06, "loss": 0.9115, "step": 436 }, { "epoch": 0.1307013608494093, "grad_norm": 1.9938279390335083, "learning_rate": 9.737304753204767e-06, "loss": 0.9346, "step": 437 }, { "epoch": 0.1310004486316734, "grad_norm": 2.049802541732788, "learning_rate": 9.735752705352278e-06, "loss": 0.9154, "step": 438 }, { "epoch": 0.1312995364139375, "grad_norm": 2.1544899940490723, "learning_rate": 9.734196210545079e-06, "loss": 0.8902, "step": 439 }, { "epoch": 0.13159862419620158, "grad_norm": 2.108680009841919, "learning_rate": 9.732635270244745e-06, "loss": 0.8939, "step": 440 }, { "epoch": 0.13189771197846567, "grad_norm": 2.638857364654541, "learning_rate": 9.731069885917029e-06, "loss": 0.8587, "step": 441 }, { "epoch": 0.13219679976072976, "grad_norm": 2.073992967605591, "learning_rate": 9.729500059031851e-06, "loss": 0.8961, "step": 442 }, { "epoch": 0.13249588754299388, "grad_norm": 2.0375585556030273, "learning_rate": 9.727925791063306e-06, "loss": 0.9135, "step": 443 }, { "epoch": 0.13279497532525797, "grad_norm": 6.514242172241211, "learning_rate": 9.726347083489661e-06, "loss": 0.8513, "step": 444 }, { "epoch": 0.13309406310752206, "grad_norm": 1.9424322843551636, "learning_rate": 9.724763937793352e-06, "loss": 0.8899, "step": 445 }, { "epoch": 0.13339315088978615, "grad_norm": 2.1400392055511475, "learning_rate": 9.723176355460978e-06, "loss": 0.9055, "step": 446 }, { "epoch": 0.13369223867205024, "grad_norm": 2.0108067989349365, "learning_rate": 9.721584337983303e-06, "loss": 0.8789, "step": 447 }, { "epoch": 0.13399132645431433, "grad_norm": 2.20151424407959, "learning_rate": 9.719987886855264e-06, "loss": 0.8838, "step": 448 }, { "epoch": 0.13429041423657845, "grad_norm": 2.425819158554077, "learning_rate": 9.718387003575957e-06, "loss": 0.8966, "step": 449 }, { "epoch": 0.13458950201884254, "grad_norm": 2.036841630935669, "learning_rate": 9.716781689648638e-06, "loss": 0.8732, "step": 450 }, { "epoch": 0.13488858980110663, "grad_norm": 2.2389724254608154, "learning_rate": 9.715171946580724e-06, "loss": 0.9227, "step": 451 }, { "epoch": 0.13518767758337072, "grad_norm": 2.315850257873535, "learning_rate": 9.713557775883793e-06, "loss": 0.9268, "step": 452 }, { "epoch": 0.1354867653656348, "grad_norm": 2.277662515640259, "learning_rate": 9.71193917907358e-06, "loss": 0.9822, "step": 453 }, { "epoch": 0.1357858531478989, "grad_norm": 3.9649994373321533, "learning_rate": 9.710316157669972e-06, "loss": 0.9136, "step": 454 }, { "epoch": 0.136084940930163, "grad_norm": 1.9879143238067627, "learning_rate": 9.708688713197021e-06, "loss": 0.8252, "step": 455 }, { "epoch": 0.1363840287124271, "grad_norm": 2.0324530601501465, "learning_rate": 9.707056847182921e-06, "loss": 0.981, "step": 456 }, { "epoch": 0.1366831164946912, "grad_norm": 1.916619062423706, "learning_rate": 9.705420561160024e-06, "loss": 0.881, "step": 457 }, { "epoch": 0.1369822042769553, "grad_norm": 1.952560305595398, "learning_rate": 9.703779856664833e-06, "loss": 0.9835, "step": 458 }, { "epoch": 0.13728129205921938, "grad_norm": 2.0453929901123047, "learning_rate": 9.702134735237994e-06, "loss": 0.9172, "step": 459 }, { "epoch": 0.13758037984148347, "grad_norm": 2.246044397354126, "learning_rate": 9.700485198424307e-06, "loss": 0.8492, "step": 460 }, { "epoch": 0.13787946762374756, "grad_norm": 2.027791738510132, "learning_rate": 9.69883124777272e-06, "loss": 0.9302, "step": 461 }, { "epoch": 0.13817855540601168, "grad_norm": 2.3756332397460938, "learning_rate": 9.697172884836315e-06, "loss": 0.8986, "step": 462 }, { "epoch": 0.13847764318827577, "grad_norm": 2.2262258529663086, "learning_rate": 9.695510111172329e-06, "loss": 0.915, "step": 463 }, { "epoch": 0.13877673097053986, "grad_norm": 2.0052430629730225, "learning_rate": 9.693842928342132e-06, "loss": 0.9191, "step": 464 }, { "epoch": 0.13907581875280395, "grad_norm": 2.0470070838928223, "learning_rate": 9.69217133791124e-06, "loss": 0.9042, "step": 465 }, { "epoch": 0.13937490653506804, "grad_norm": 2.083026647567749, "learning_rate": 9.690495341449304e-06, "loss": 0.9028, "step": 466 }, { "epoch": 0.13967399431733213, "grad_norm": 2.1527187824249268, "learning_rate": 9.688814940530115e-06, "loss": 0.9507, "step": 467 }, { "epoch": 0.13997308209959622, "grad_norm": 2.411761999130249, "learning_rate": 9.6871301367316e-06, "loss": 0.9668, "step": 468 }, { "epoch": 0.14027216988186034, "grad_norm": 2.144348382949829, "learning_rate": 9.68544093163582e-06, "loss": 0.8923, "step": 469 }, { "epoch": 0.14057125766412443, "grad_norm": 2.255469799041748, "learning_rate": 9.683747326828962e-06, "loss": 0.8916, "step": 470 }, { "epoch": 0.14087034544638852, "grad_norm": 2.2394800186157227, "learning_rate": 9.682049323901358e-06, "loss": 0.9379, "step": 471 }, { "epoch": 0.1411694332286526, "grad_norm": 1.9188294410705566, "learning_rate": 9.680346924447458e-06, "loss": 0.9105, "step": 472 }, { "epoch": 0.1414685210109167, "grad_norm": 2.2391576766967773, "learning_rate": 9.678640130065846e-06, "loss": 0.9572, "step": 473 }, { "epoch": 0.1417676087931808, "grad_norm": 2.2204818725585938, "learning_rate": 9.676928942359233e-06, "loss": 0.9713, "step": 474 }, { "epoch": 0.1420666965754449, "grad_norm": 2.006396532058716, "learning_rate": 9.675213362934454e-06, "loss": 0.9407, "step": 475 }, { "epoch": 0.142365784357709, "grad_norm": 2.2210874557495117, "learning_rate": 9.673493393402466e-06, "loss": 0.8588, "step": 476 }, { "epoch": 0.1426648721399731, "grad_norm": 2.752349853515625, "learning_rate": 9.671769035378352e-06, "loss": 0.891, "step": 477 }, { "epoch": 0.14296395992223718, "grad_norm": 2.542809247970581, "learning_rate": 9.670040290481315e-06, "loss": 0.9169, "step": 478 }, { "epoch": 0.14326304770450127, "grad_norm": 2.1507561206817627, "learning_rate": 9.668307160334676e-06, "loss": 0.9013, "step": 479 }, { "epoch": 0.14356213548676536, "grad_norm": 2.028667688369751, "learning_rate": 9.666569646565875e-06, "loss": 0.9, "step": 480 }, { "epoch": 0.14386122326902945, "grad_norm": 3.8036603927612305, "learning_rate": 9.664827750806465e-06, "loss": 0.8992, "step": 481 }, { "epoch": 0.14416031105129357, "grad_norm": 1.9214345216751099, "learning_rate": 9.663081474692123e-06, "loss": 0.9374, "step": 482 }, { "epoch": 0.14445939883355766, "grad_norm": 1.7897154092788696, "learning_rate": 9.661330819862626e-06, "loss": 0.9233, "step": 483 }, { "epoch": 0.14475848661582175, "grad_norm": 2.2490832805633545, "learning_rate": 9.659575787961872e-06, "loss": 0.9212, "step": 484 }, { "epoch": 0.14505757439808584, "grad_norm": 2.0244271755218506, "learning_rate": 9.657816380637868e-06, "loss": 0.832, "step": 485 }, { "epoch": 0.14535666218034993, "grad_norm": 2.1289620399475098, "learning_rate": 9.656052599542728e-06, "loss": 0.9329, "step": 486 }, { "epoch": 0.14565574996261402, "grad_norm": 1.986251950263977, "learning_rate": 9.654284446332673e-06, "loss": 0.933, "step": 487 }, { "epoch": 0.14595483774487814, "grad_norm": 2.6963627338409424, "learning_rate": 9.652511922668029e-06, "loss": 0.9226, "step": 488 }, { "epoch": 0.14625392552714223, "grad_norm": 2.0356945991516113, "learning_rate": 9.650735030213228e-06, "loss": 0.8273, "step": 489 }, { "epoch": 0.14655301330940632, "grad_norm": 2.0289361476898193, "learning_rate": 9.648953770636801e-06, "loss": 0.8608, "step": 490 }, { "epoch": 0.1468521010916704, "grad_norm": 2.0835697650909424, "learning_rate": 9.647168145611385e-06, "loss": 0.9083, "step": 491 }, { "epoch": 0.1471511888739345, "grad_norm": 2.1117544174194336, "learning_rate": 9.645378156813709e-06, "loss": 0.9325, "step": 492 }, { "epoch": 0.1474502766561986, "grad_norm": 2.056478500366211, "learning_rate": 9.643583805924608e-06, "loss": 0.8632, "step": 493 }, { "epoch": 0.14774936443846268, "grad_norm": 2.023174524307251, "learning_rate": 9.641785094629008e-06, "loss": 0.9283, "step": 494 }, { "epoch": 0.1480484522207268, "grad_norm": 2.264678955078125, "learning_rate": 9.639982024615928e-06, "loss": 0.93, "step": 495 }, { "epoch": 0.14834754000299089, "grad_norm": 2.2752087116241455, "learning_rate": 9.638174597578486e-06, "loss": 0.981, "step": 496 }, { "epoch": 0.14864662778525498, "grad_norm": 2.543433666229248, "learning_rate": 9.636362815213884e-06, "loss": 0.9396, "step": 497 }, { "epoch": 0.14894571556751907, "grad_norm": 2.147486925125122, "learning_rate": 9.63454667922342e-06, "loss": 0.8901, "step": 498 }, { "epoch": 0.14924480334978316, "grad_norm": 2.172703742980957, "learning_rate": 9.632726191312475e-06, "loss": 0.8328, "step": 499 }, { "epoch": 0.14954389113204725, "grad_norm": 2.3645336627960205, "learning_rate": 9.630901353190522e-06, "loss": 0.9618, "step": 500 }, { "epoch": 0.14984297891431134, "grad_norm": 2.0667076110839844, "learning_rate": 9.629072166571114e-06, "loss": 0.9379, "step": 501 }, { "epoch": 0.15014206669657545, "grad_norm": 2.072631359100342, "learning_rate": 9.627238633171889e-06, "loss": 0.8735, "step": 502 }, { "epoch": 0.15044115447883954, "grad_norm": 2.2678699493408203, "learning_rate": 9.625400754714568e-06, "loss": 0.9697, "step": 503 }, { "epoch": 0.15074024226110364, "grad_norm": 1.9290229082107544, "learning_rate": 9.623558532924952e-06, "loss": 0.8631, "step": 504 }, { "epoch": 0.15103933004336773, "grad_norm": 2.1237919330596924, "learning_rate": 9.621711969532917e-06, "loss": 0.9364, "step": 505 }, { "epoch": 0.15133841782563182, "grad_norm": 2.0870120525360107, "learning_rate": 9.61986106627242e-06, "loss": 0.8735, "step": 506 }, { "epoch": 0.1516375056078959, "grad_norm": 2.251906394958496, "learning_rate": 9.618005824881491e-06, "loss": 0.9068, "step": 507 }, { "epoch": 0.15193659339016002, "grad_norm": 2.124857187271118, "learning_rate": 9.616146247102233e-06, "loss": 0.9219, "step": 508 }, { "epoch": 0.15223568117242411, "grad_norm": 2.1165618896484375, "learning_rate": 9.614282334680827e-06, "loss": 0.8547, "step": 509 }, { "epoch": 0.1525347689546882, "grad_norm": 2.0622432231903076, "learning_rate": 9.612414089367512e-06, "loss": 0.9095, "step": 510 }, { "epoch": 0.1528338567369523, "grad_norm": 2.107755422592163, "learning_rate": 9.61054151291661e-06, "loss": 0.9151, "step": 511 }, { "epoch": 0.15313294451921639, "grad_norm": 2.351363182067871, "learning_rate": 9.608664607086497e-06, "loss": 0.9402, "step": 512 }, { "epoch": 0.15343203230148048, "grad_norm": 2.2824251651763916, "learning_rate": 9.606783373639626e-06, "loss": 0.9654, "step": 513 }, { "epoch": 0.15373112008374457, "grad_norm": 2.2703168392181396, "learning_rate": 9.604897814342504e-06, "loss": 0.9265, "step": 514 }, { "epoch": 0.15403020786600868, "grad_norm": 2.225274085998535, "learning_rate": 9.603007930965706e-06, "loss": 0.9416, "step": 515 }, { "epoch": 0.15432929564827277, "grad_norm": 2.1419920921325684, "learning_rate": 9.601113725283864e-06, "loss": 0.9344, "step": 516 }, { "epoch": 0.15462838343053686, "grad_norm": 1.9632388353347778, "learning_rate": 9.599215199075674e-06, "loss": 0.8984, "step": 517 }, { "epoch": 0.15492747121280095, "grad_norm": 3.509446859359741, "learning_rate": 9.597312354123882e-06, "loss": 0.9048, "step": 518 }, { "epoch": 0.15522655899506504, "grad_norm": 2.132478713989258, "learning_rate": 9.595405192215293e-06, "loss": 0.8974, "step": 519 }, { "epoch": 0.15552564677732914, "grad_norm": 2.0488393306732178, "learning_rate": 9.593493715140767e-06, "loss": 0.9049, "step": 520 }, { "epoch": 0.15582473455959325, "grad_norm": 2.0553998947143555, "learning_rate": 9.591577924695213e-06, "loss": 0.9236, "step": 521 }, { "epoch": 0.15612382234185734, "grad_norm": 2.128246784210205, "learning_rate": 9.589657822677592e-06, "loss": 0.8326, "step": 522 }, { "epoch": 0.15642291012412143, "grad_norm": 2.176199436187744, "learning_rate": 9.587733410890916e-06, "loss": 0.891, "step": 523 }, { "epoch": 0.15672199790638552, "grad_norm": 2.3568549156188965, "learning_rate": 9.585804691142237e-06, "loss": 0.9571, "step": 524 }, { "epoch": 0.15702108568864961, "grad_norm": 2.49210524559021, "learning_rate": 9.583871665242659e-06, "loss": 0.8746, "step": 525 }, { "epoch": 0.1573201734709137, "grad_norm": 3.3880116939544678, "learning_rate": 9.581934335007326e-06, "loss": 0.8414, "step": 526 }, { "epoch": 0.1576192612531778, "grad_norm": 2.1258962154388428, "learning_rate": 9.579992702255428e-06, "loss": 0.8628, "step": 527 }, { "epoch": 0.1579183490354419, "grad_norm": 2.127197742462158, "learning_rate": 9.57804676881019e-06, "loss": 0.9459, "step": 528 }, { "epoch": 0.158217436817706, "grad_norm": 2.2194063663482666, "learning_rate": 9.576096536498875e-06, "loss": 1.0308, "step": 529 }, { "epoch": 0.1585165245999701, "grad_norm": 2.1514530181884766, "learning_rate": 9.574142007152789e-06, "loss": 0.9136, "step": 530 }, { "epoch": 0.15881561238223418, "grad_norm": 1.793345332145691, "learning_rate": 9.572183182607269e-06, "loss": 0.8975, "step": 531 }, { "epoch": 0.15911470016449827, "grad_norm": 1.9824508428573608, "learning_rate": 9.570220064701686e-06, "loss": 0.8628, "step": 532 }, { "epoch": 0.15941378794676236, "grad_norm": 2.2090091705322266, "learning_rate": 9.568252655279438e-06, "loss": 0.8896, "step": 533 }, { "epoch": 0.15971287572902648, "grad_norm": 2.103137254714966, "learning_rate": 9.566280956187961e-06, "loss": 0.9521, "step": 534 }, { "epoch": 0.16001196351129057, "grad_norm": 2.147822618484497, "learning_rate": 9.564304969278714e-06, "loss": 0.921, "step": 535 }, { "epoch": 0.16031105129355466, "grad_norm": 2.009065866470337, "learning_rate": 9.562324696407181e-06, "loss": 0.9283, "step": 536 }, { "epoch": 0.16061013907581875, "grad_norm": 2.088993787765503, "learning_rate": 9.560340139432877e-06, "loss": 0.9135, "step": 537 }, { "epoch": 0.16090922685808284, "grad_norm": 2.1295697689056396, "learning_rate": 9.558351300219335e-06, "loss": 0.9662, "step": 538 }, { "epoch": 0.16120831464034693, "grad_norm": 1.9403108358383179, "learning_rate": 9.556358180634105e-06, "loss": 0.8592, "step": 539 }, { "epoch": 0.16150740242261102, "grad_norm": 2.2445156574249268, "learning_rate": 9.554360782548766e-06, "loss": 0.9059, "step": 540 }, { "epoch": 0.16180649020487514, "grad_norm": 2.686187267303467, "learning_rate": 9.55235910783891e-06, "loss": 0.9262, "step": 541 }, { "epoch": 0.16210557798713923, "grad_norm": 2.2117042541503906, "learning_rate": 9.550353158384142e-06, "loss": 0.8911, "step": 542 }, { "epoch": 0.16240466576940332, "grad_norm": 2.143015146255493, "learning_rate": 9.548342936068085e-06, "loss": 0.938, "step": 543 }, { "epoch": 0.1627037535516674, "grad_norm": 2.17206072807312, "learning_rate": 9.54632844277837e-06, "loss": 0.859, "step": 544 }, { "epoch": 0.1630028413339315, "grad_norm": 2.342336416244507, "learning_rate": 9.544309680406648e-06, "loss": 0.9606, "step": 545 }, { "epoch": 0.1633019291161956, "grad_norm": 2.310330629348755, "learning_rate": 9.542286650848567e-06, "loss": 0.901, "step": 546 }, { "epoch": 0.1636010168984597, "grad_norm": 2.1171629428863525, "learning_rate": 9.540259356003787e-06, "loss": 0.9386, "step": 547 }, { "epoch": 0.1639001046807238, "grad_norm": 1.9653538465499878, "learning_rate": 9.538227797775976e-06, "loss": 0.9182, "step": 548 }, { "epoch": 0.1641991924629879, "grad_norm": 2.1893310546875, "learning_rate": 9.536191978072802e-06, "loss": 0.8713, "step": 549 }, { "epoch": 0.16449828024525198, "grad_norm": 2.3446593284606934, "learning_rate": 9.534151898805934e-06, "loss": 0.9695, "step": 550 }, { "epoch": 0.16479736802751607, "grad_norm": 1.9539304971694946, "learning_rate": 9.532107561891044e-06, "loss": 0.9209, "step": 551 }, { "epoch": 0.16509645580978016, "grad_norm": 2.1393189430236816, "learning_rate": 9.5300589692478e-06, "loss": 0.9147, "step": 552 }, { "epoch": 0.16539554359204425, "grad_norm": 2.023454189300537, "learning_rate": 9.528006122799864e-06, "loss": 0.9058, "step": 553 }, { "epoch": 0.16569463137430837, "grad_norm": 2.2129530906677246, "learning_rate": 9.525949024474897e-06, "loss": 0.8832, "step": 554 }, { "epoch": 0.16599371915657246, "grad_norm": 2.2695486545562744, "learning_rate": 9.52388767620455e-06, "loss": 0.9015, "step": 555 }, { "epoch": 0.16629280693883655, "grad_norm": 2.0867884159088135, "learning_rate": 9.521822079924465e-06, "loss": 0.9587, "step": 556 }, { "epoch": 0.16659189472110064, "grad_norm": 2.057535409927368, "learning_rate": 9.519752237574273e-06, "loss": 0.876, "step": 557 }, { "epoch": 0.16689098250336473, "grad_norm": 2.349473476409912, "learning_rate": 9.517678151097591e-06, "loss": 0.8938, "step": 558 }, { "epoch": 0.16719007028562882, "grad_norm": 1.9976444244384766, "learning_rate": 9.515599822442025e-06, "loss": 0.8787, "step": 559 }, { "epoch": 0.16748915806789294, "grad_norm": 1.8872711658477783, "learning_rate": 9.51351725355916e-06, "loss": 0.8972, "step": 560 }, { "epoch": 0.16778824585015703, "grad_norm": 2.192647933959961, "learning_rate": 9.511430446404566e-06, "loss": 0.8992, "step": 561 }, { "epoch": 0.16808733363242112, "grad_norm": 1.963050127029419, "learning_rate": 9.50933940293779e-06, "loss": 0.8981, "step": 562 }, { "epoch": 0.1683864214146852, "grad_norm": 2.254265069961548, "learning_rate": 9.507244125122358e-06, "loss": 0.9137, "step": 563 }, { "epoch": 0.1686855091969493, "grad_norm": 2.055408477783203, "learning_rate": 9.505144614925776e-06, "loss": 0.8642, "step": 564 }, { "epoch": 0.1689845969792134, "grad_norm": 2.1534032821655273, "learning_rate": 9.503040874319519e-06, "loss": 0.853, "step": 565 }, { "epoch": 0.16928368476147748, "grad_norm": 2.0764992237091064, "learning_rate": 9.500932905279034e-06, "loss": 0.9297, "step": 566 }, { "epoch": 0.1695827725437416, "grad_norm": 2.5408670902252197, "learning_rate": 9.498820709783743e-06, "loss": 0.9017, "step": 567 }, { "epoch": 0.1698818603260057, "grad_norm": 1.9807052612304688, "learning_rate": 9.496704289817035e-06, "loss": 0.9036, "step": 568 }, { "epoch": 0.17018094810826978, "grad_norm": 2.0200202465057373, "learning_rate": 9.494583647366264e-06, "loss": 0.9276, "step": 569 }, { "epoch": 0.17048003589053387, "grad_norm": 2.6969799995422363, "learning_rate": 9.492458784422751e-06, "loss": 0.9299, "step": 570 }, { "epoch": 0.17077912367279796, "grad_norm": 2.13474440574646, "learning_rate": 9.49032970298178e-06, "loss": 0.8605, "step": 571 }, { "epoch": 0.17107821145506205, "grad_norm": 2.113443613052368, "learning_rate": 9.488196405042596e-06, "loss": 0.899, "step": 572 }, { "epoch": 0.17137729923732614, "grad_norm": 2.001905918121338, "learning_rate": 9.486058892608401e-06, "loss": 0.9299, "step": 573 }, { "epoch": 0.17167638701959026, "grad_norm": 2.0649333000183105, "learning_rate": 9.483917167686358e-06, "loss": 0.9047, "step": 574 }, { "epoch": 0.17197547480185435, "grad_norm": 2.1051251888275146, "learning_rate": 9.481771232287585e-06, "loss": 0.9602, "step": 575 }, { "epoch": 0.17227456258411844, "grad_norm": 2.1703364849090576, "learning_rate": 9.479621088427152e-06, "loss": 0.9516, "step": 576 }, { "epoch": 0.17257365036638253, "grad_norm": 2.0370984077453613, "learning_rate": 9.47746673812408e-06, "loss": 0.9349, "step": 577 }, { "epoch": 0.17287273814864662, "grad_norm": 2.011080265045166, "learning_rate": 9.475308183401347e-06, "loss": 0.9044, "step": 578 }, { "epoch": 0.1731718259309107, "grad_norm": 2.316357135772705, "learning_rate": 9.473145426285869e-06, "loss": 0.9124, "step": 579 }, { "epoch": 0.17347091371317483, "grad_norm": 1.9463818073272705, "learning_rate": 9.470978468808514e-06, "loss": 0.8763, "step": 580 }, { "epoch": 0.17377000149543892, "grad_norm": 1.968659520149231, "learning_rate": 9.46880731300409e-06, "loss": 0.9268, "step": 581 }, { "epoch": 0.174069089277703, "grad_norm": 2.0599594116210938, "learning_rate": 9.466631960911358e-06, "loss": 0.9104, "step": 582 }, { "epoch": 0.1743681770599671, "grad_norm": 2.0809824466705322, "learning_rate": 9.464452414573004e-06, "loss": 0.8891, "step": 583 }, { "epoch": 0.1746672648422312, "grad_norm": 2.1312577724456787, "learning_rate": 9.462268676035664e-06, "loss": 0.8817, "step": 584 }, { "epoch": 0.17496635262449528, "grad_norm": 2.0049941539764404, "learning_rate": 9.460080747349907e-06, "loss": 0.9531, "step": 585 }, { "epoch": 0.17526544040675937, "grad_norm": 2.150425910949707, "learning_rate": 9.457888630570234e-06, "loss": 0.9098, "step": 586 }, { "epoch": 0.1755645281890235, "grad_norm": 2.167587995529175, "learning_rate": 9.455692327755082e-06, "loss": 0.9588, "step": 587 }, { "epoch": 0.17586361597128758, "grad_norm": 2.4127707481384277, "learning_rate": 9.45349184096682e-06, "loss": 0.8625, "step": 588 }, { "epoch": 0.17616270375355167, "grad_norm": 2.0511698722839355, "learning_rate": 9.451287172271741e-06, "loss": 0.8822, "step": 589 }, { "epoch": 0.17646179153581576, "grad_norm": 2.5312917232513428, "learning_rate": 9.449078323740066e-06, "loss": 0.8686, "step": 590 }, { "epoch": 0.17676087931807985, "grad_norm": 2.2925243377685547, "learning_rate": 9.446865297445947e-06, "loss": 0.918, "step": 591 }, { "epoch": 0.17705996710034394, "grad_norm": 2.0267927646636963, "learning_rate": 9.444648095467453e-06, "loss": 0.9145, "step": 592 }, { "epoch": 0.17735905488260806, "grad_norm": 2.0410044193267822, "learning_rate": 9.442426719886572e-06, "loss": 0.8859, "step": 593 }, { "epoch": 0.17765814266487215, "grad_norm": 2.196380853652954, "learning_rate": 9.440201172789218e-06, "loss": 0.8862, "step": 594 }, { "epoch": 0.17795723044713624, "grad_norm": 1.9851776361465454, "learning_rate": 9.437971456265218e-06, "loss": 0.902, "step": 595 }, { "epoch": 0.17825631822940033, "grad_norm": 2.451723337173462, "learning_rate": 9.435737572408316e-06, "loss": 0.9367, "step": 596 }, { "epoch": 0.17855540601166442, "grad_norm": 2.0080554485321045, "learning_rate": 9.433499523316165e-06, "loss": 0.8867, "step": 597 }, { "epoch": 0.1788544937939285, "grad_norm": 1.9999934434890747, "learning_rate": 9.431257311090336e-06, "loss": 0.925, "step": 598 }, { "epoch": 0.1791535815761926, "grad_norm": 2.4421370029449463, "learning_rate": 9.429010937836302e-06, "loss": 0.8891, "step": 599 }, { "epoch": 0.17945266935845672, "grad_norm": 2.145138740539551, "learning_rate": 9.426760405663448e-06, "loss": 0.9917, "step": 600 }, { "epoch": 0.1797517571407208, "grad_norm": 2.2078468799591064, "learning_rate": 9.424505716685064e-06, "loss": 0.9157, "step": 601 }, { "epoch": 0.1800508449229849, "grad_norm": 2.0074973106384277, "learning_rate": 9.422246873018343e-06, "loss": 0.9333, "step": 602 }, { "epoch": 0.180349932705249, "grad_norm": 2.1012392044067383, "learning_rate": 9.419983876784378e-06, "loss": 0.8795, "step": 603 }, { "epoch": 0.18064902048751308, "grad_norm": 2.399244785308838, "learning_rate": 9.41771673010816e-06, "loss": 0.9087, "step": 604 }, { "epoch": 0.18094810826977717, "grad_norm": 2.1384222507476807, "learning_rate": 9.415445435118581e-06, "loss": 0.9576, "step": 605 }, { "epoch": 0.1812471960520413, "grad_norm": 2.3688583374023438, "learning_rate": 9.41316999394843e-06, "loss": 0.987, "step": 606 }, { "epoch": 0.18154628383430538, "grad_norm": 2.560091495513916, "learning_rate": 9.410890408734381e-06, "loss": 0.9409, "step": 607 }, { "epoch": 0.18184537161656947, "grad_norm": 2.1901168823242188, "learning_rate": 9.408606681617006e-06, "loss": 0.9444, "step": 608 }, { "epoch": 0.18214445939883356, "grad_norm": 1.9872572422027588, "learning_rate": 9.406318814740767e-06, "loss": 0.8859, "step": 609 }, { "epoch": 0.18244354718109765, "grad_norm": 2.624647378921509, "learning_rate": 9.404026810254007e-06, "loss": 0.89, "step": 610 }, { "epoch": 0.18274263496336174, "grad_norm": 2.0227911472320557, "learning_rate": 9.401730670308963e-06, "loss": 0.9622, "step": 611 }, { "epoch": 0.18304172274562583, "grad_norm": 2.027820587158203, "learning_rate": 9.399430397061746e-06, "loss": 0.9087, "step": 612 }, { "epoch": 0.18334081052788995, "grad_norm": 1.8522744178771973, "learning_rate": 9.397125992672358e-06, "loss": 0.8522, "step": 613 }, { "epoch": 0.18363989831015404, "grad_norm": 2.2822511196136475, "learning_rate": 9.394817459304671e-06, "loss": 0.8938, "step": 614 }, { "epoch": 0.18393898609241813, "grad_norm": 2.043996810913086, "learning_rate": 9.392504799126439e-06, "loss": 0.8616, "step": 615 }, { "epoch": 0.18423807387468222, "grad_norm": 2.0634210109710693, "learning_rate": 9.39018801430929e-06, "loss": 0.947, "step": 616 }, { "epoch": 0.1845371616569463, "grad_norm": 2.423686981201172, "learning_rate": 9.387867107028727e-06, "loss": 0.8957, "step": 617 }, { "epoch": 0.1848362494392104, "grad_norm": 2.285275936126709, "learning_rate": 9.385542079464123e-06, "loss": 0.9417, "step": 618 }, { "epoch": 0.18513533722147452, "grad_norm": 2.142771005630493, "learning_rate": 9.383212933798718e-06, "loss": 0.9038, "step": 619 }, { "epoch": 0.1854344250037386, "grad_norm": 2.4208383560180664, "learning_rate": 9.38087967221962e-06, "loss": 0.9003, "step": 620 }, { "epoch": 0.1857335127860027, "grad_norm": 2.3153443336486816, "learning_rate": 9.378542296917804e-06, "loss": 0.8711, "step": 621 }, { "epoch": 0.1860326005682668, "grad_norm": 2.1042747497558594, "learning_rate": 9.376200810088108e-06, "loss": 0.8473, "step": 622 }, { "epoch": 0.18633168835053088, "grad_norm": 2.0680673122406006, "learning_rate": 9.373855213929227e-06, "loss": 0.9215, "step": 623 }, { "epoch": 0.18663077613279497, "grad_norm": 2.872378349304199, "learning_rate": 9.371505510643714e-06, "loss": 0.8913, "step": 624 }, { "epoch": 0.18692986391505906, "grad_norm": 2.1997485160827637, "learning_rate": 9.369151702437987e-06, "loss": 0.9065, "step": 625 }, { "epoch": 0.18722895169732318, "grad_norm": 2.2325809001922607, "learning_rate": 9.366793791522308e-06, "loss": 0.9608, "step": 626 }, { "epoch": 0.18752803947958727, "grad_norm": 2.0623133182525635, "learning_rate": 9.364431780110801e-06, "loss": 0.8911, "step": 627 }, { "epoch": 0.18782712726185136, "grad_norm": 2.525805711746216, "learning_rate": 9.362065670421434e-06, "loss": 0.9205, "step": 628 }, { "epoch": 0.18812621504411545, "grad_norm": 2.136258840560913, "learning_rate": 9.359695464676025e-06, "loss": 0.8994, "step": 629 }, { "epoch": 0.18842530282637954, "grad_norm": 2.0483880043029785, "learning_rate": 9.35732116510024e-06, "loss": 0.9125, "step": 630 }, { "epoch": 0.18872439060864363, "grad_norm": 2.4517016410827637, "learning_rate": 9.354942773923588e-06, "loss": 0.9408, "step": 631 }, { "epoch": 0.18902347839090772, "grad_norm": 2.0407121181488037, "learning_rate": 9.352560293379417e-06, "loss": 0.8571, "step": 632 }, { "epoch": 0.18932256617317184, "grad_norm": 1.9562433958053589, "learning_rate": 9.350173725704922e-06, "loss": 0.9173, "step": 633 }, { "epoch": 0.18962165395543593, "grad_norm": 2.233436346054077, "learning_rate": 9.34778307314113e-06, "loss": 0.9056, "step": 634 }, { "epoch": 0.18992074173770002, "grad_norm": 2.114581346511841, "learning_rate": 9.345388337932906e-06, "loss": 0.9336, "step": 635 }, { "epoch": 0.1902198295199641, "grad_norm": 1.9790080785751343, "learning_rate": 9.342989522328947e-06, "loss": 0.8843, "step": 636 }, { "epoch": 0.1905189173022282, "grad_norm": 2.2083511352539062, "learning_rate": 9.340586628581783e-06, "loss": 0.9028, "step": 637 }, { "epoch": 0.1908180050844923, "grad_norm": 2.4242773056030273, "learning_rate": 9.338179658947774e-06, "loss": 0.923, "step": 638 }, { "epoch": 0.1911170928667564, "grad_norm": 2.216334104537964, "learning_rate": 9.335768615687108e-06, "loss": 0.8509, "step": 639 }, { "epoch": 0.1914161806490205, "grad_norm": 1.9585851430892944, "learning_rate": 9.333353501063796e-06, "loss": 0.9221, "step": 640 }, { "epoch": 0.19171526843128459, "grad_norm": 2.220524549484253, "learning_rate": 9.330934317345673e-06, "loss": 0.8628, "step": 641 }, { "epoch": 0.19201435621354868, "grad_norm": 2.2784574031829834, "learning_rate": 9.328511066804391e-06, "loss": 0.9249, "step": 642 }, { "epoch": 0.19231344399581277, "grad_norm": 1.8171837329864502, "learning_rate": 9.32608375171543e-06, "loss": 0.8828, "step": 643 }, { "epoch": 0.19261253177807686, "grad_norm": 2.1215155124664307, "learning_rate": 9.32365237435808e-06, "loss": 0.9119, "step": 644 }, { "epoch": 0.19291161956034095, "grad_norm": 1.9350662231445312, "learning_rate": 9.321216937015446e-06, "loss": 0.9257, "step": 645 }, { "epoch": 0.19321070734260506, "grad_norm": 2.0060207843780518, "learning_rate": 9.318777441974446e-06, "loss": 1.0029, "step": 646 }, { "epoch": 0.19350979512486916, "grad_norm": 2.2231523990631104, "learning_rate": 9.316333891525809e-06, "loss": 0.8988, "step": 647 }, { "epoch": 0.19380888290713325, "grad_norm": 2.4177989959716797, "learning_rate": 9.313886287964072e-06, "loss": 0.9359, "step": 648 }, { "epoch": 0.19410797068939734, "grad_norm": 2.2958145141601562, "learning_rate": 9.311434633587577e-06, "loss": 0.9104, "step": 649 }, { "epoch": 0.19440705847166143, "grad_norm": 2.0754222869873047, "learning_rate": 9.308978930698472e-06, "loss": 0.8623, "step": 650 }, { "epoch": 0.19470614625392552, "grad_norm": 2.0365681648254395, "learning_rate": 9.306519181602704e-06, "loss": 0.9348, "step": 651 }, { "epoch": 0.19500523403618963, "grad_norm": 2.428921699523926, "learning_rate": 9.304055388610019e-06, "loss": 0.95, "step": 652 }, { "epoch": 0.19530432181845372, "grad_norm": 2.0489699840545654, "learning_rate": 9.301587554033965e-06, "loss": 0.9669, "step": 653 }, { "epoch": 0.19560340960071781, "grad_norm": 2.162031412124634, "learning_rate": 9.299115680191876e-06, "loss": 0.8918, "step": 654 }, { "epoch": 0.1959024973829819, "grad_norm": 2.3100359439849854, "learning_rate": 9.296639769404892e-06, "loss": 0.9398, "step": 655 }, { "epoch": 0.196201585165246, "grad_norm": 1.9345213174819946, "learning_rate": 9.294159823997933e-06, "loss": 0.8898, "step": 656 }, { "epoch": 0.19650067294751009, "grad_norm": 2.124377727508545, "learning_rate": 9.291675846299711e-06, "loss": 0.8874, "step": 657 }, { "epoch": 0.19679976072977418, "grad_norm": 2.363574743270874, "learning_rate": 9.289187838642724e-06, "loss": 0.9501, "step": 658 }, { "epoch": 0.1970988485120383, "grad_norm": 1.8972347974777222, "learning_rate": 9.286695803363257e-06, "loss": 0.861, "step": 659 }, { "epoch": 0.19739793629430238, "grad_norm": 2.0143730640411377, "learning_rate": 9.284199742801373e-06, "loss": 0.9321, "step": 660 }, { "epoch": 0.19769702407656647, "grad_norm": 2.9841811656951904, "learning_rate": 9.281699659300917e-06, "loss": 0.9511, "step": 661 }, { "epoch": 0.19799611185883056, "grad_norm": 2.1328580379486084, "learning_rate": 9.279195555209513e-06, "loss": 0.8645, "step": 662 }, { "epoch": 0.19829519964109465, "grad_norm": 2.3238632678985596, "learning_rate": 9.276687432878554e-06, "loss": 0.9106, "step": 663 }, { "epoch": 0.19859428742335875, "grad_norm": 2.2906575202941895, "learning_rate": 9.274175294663215e-06, "loss": 0.9089, "step": 664 }, { "epoch": 0.19889337520562286, "grad_norm": 3.4702138900756836, "learning_rate": 9.271659142922438e-06, "loss": 0.9656, "step": 665 }, { "epoch": 0.19919246298788695, "grad_norm": 2.0288960933685303, "learning_rate": 9.26913898001893e-06, "loss": 0.9025, "step": 666 }, { "epoch": 0.19949155077015104, "grad_norm": 2.0889480113983154, "learning_rate": 9.26661480831917e-06, "loss": 0.929, "step": 667 }, { "epoch": 0.19979063855241513, "grad_norm": 2.2982118129730225, "learning_rate": 9.2640866301934e-06, "loss": 0.9058, "step": 668 }, { "epoch": 0.20008972633467922, "grad_norm": 2.013181209564209, "learning_rate": 9.261554448015625e-06, "loss": 0.849, "step": 669 }, { "epoch": 0.20038881411694331, "grad_norm": 2.555504083633423, "learning_rate": 9.259018264163604e-06, "loss": 0.8842, "step": 670 }, { "epoch": 0.2006879018992074, "grad_norm": 2.6550564765930176, "learning_rate": 9.25647808101886e-06, "loss": 0.8784, "step": 671 }, { "epoch": 0.20098698968147152, "grad_norm": 4.694149971008301, "learning_rate": 9.253933900966672e-06, "loss": 0.9527, "step": 672 }, { "epoch": 0.2012860774637356, "grad_norm": 1.9620747566223145, "learning_rate": 9.251385726396065e-06, "loss": 0.9059, "step": 673 }, { "epoch": 0.2015851652459997, "grad_norm": 2.1500370502471924, "learning_rate": 9.248833559699824e-06, "loss": 0.901, "step": 674 }, { "epoch": 0.2018842530282638, "grad_norm": 2.0955703258514404, "learning_rate": 9.246277403274475e-06, "loss": 0.9496, "step": 675 }, { "epoch": 0.20218334081052788, "grad_norm": 1.976662278175354, "learning_rate": 9.243717259520296e-06, "loss": 0.9011, "step": 676 }, { "epoch": 0.20248242859279197, "grad_norm": 1.9662333726882935, "learning_rate": 9.241153130841305e-06, "loss": 0.8184, "step": 677 }, { "epoch": 0.2027815163750561, "grad_norm": 2.040987491607666, "learning_rate": 9.238585019645265e-06, "loss": 0.9173, "step": 678 }, { "epoch": 0.20308060415732018, "grad_norm": 2.007378578186035, "learning_rate": 9.236012928343676e-06, "loss": 0.9494, "step": 679 }, { "epoch": 0.20337969193958427, "grad_norm": 2.031209707260132, "learning_rate": 9.233436859351778e-06, "loss": 0.9096, "step": 680 }, { "epoch": 0.20367877972184836, "grad_norm": 2.1632962226867676, "learning_rate": 9.230856815088546e-06, "loss": 0.8954, "step": 681 }, { "epoch": 0.20397786750411245, "grad_norm": 2.1705482006073, "learning_rate": 9.228272797976685e-06, "loss": 0.9573, "step": 682 }, { "epoch": 0.20427695528637654, "grad_norm": 2.204298734664917, "learning_rate": 9.22568481044263e-06, "loss": 0.9106, "step": 683 }, { "epoch": 0.20457604306864063, "grad_norm": 2.1152007579803467, "learning_rate": 9.223092854916552e-06, "loss": 0.9093, "step": 684 }, { "epoch": 0.20487513085090475, "grad_norm": 2.2549288272857666, "learning_rate": 9.220496933832338e-06, "loss": 0.9678, "step": 685 }, { "epoch": 0.20517421863316884, "grad_norm": 2.1189420223236084, "learning_rate": 9.217897049627605e-06, "loss": 0.8561, "step": 686 }, { "epoch": 0.20547330641543293, "grad_norm": 2.4237892627716064, "learning_rate": 9.21529320474369e-06, "loss": 0.9044, "step": 687 }, { "epoch": 0.20577239419769702, "grad_norm": 2.3180856704711914, "learning_rate": 9.212685401625649e-06, "loss": 0.9676, "step": 688 }, { "epoch": 0.2060714819799611, "grad_norm": 2.0727524757385254, "learning_rate": 9.210073642722256e-06, "loss": 0.8665, "step": 689 }, { "epoch": 0.2063705697622252, "grad_norm": 2.6192445755004883, "learning_rate": 9.207457930485996e-06, "loss": 0.9013, "step": 690 }, { "epoch": 0.20666965754448932, "grad_norm": 2.1228737831115723, "learning_rate": 9.20483826737307e-06, "loss": 0.8938, "step": 691 }, { "epoch": 0.2069687453267534, "grad_norm": 1.8610478639602661, "learning_rate": 9.202214655843386e-06, "loss": 0.9012, "step": 692 }, { "epoch": 0.2072678331090175, "grad_norm": 1.9940825700759888, "learning_rate": 9.199587098360563e-06, "loss": 0.9485, "step": 693 }, { "epoch": 0.2075669208912816, "grad_norm": 1.8049489259719849, "learning_rate": 9.196955597391923e-06, "loss": 0.9028, "step": 694 }, { "epoch": 0.20786600867354568, "grad_norm": 2.1234774589538574, "learning_rate": 9.19432015540849e-06, "loss": 0.9373, "step": 695 }, { "epoch": 0.20816509645580977, "grad_norm": 2.1765198707580566, "learning_rate": 9.191680774884992e-06, "loss": 0.8763, "step": 696 }, { "epoch": 0.20846418423807386, "grad_norm": 2.253967761993408, "learning_rate": 9.189037458299854e-06, "loss": 0.9359, "step": 697 }, { "epoch": 0.20876327202033798, "grad_norm": 2.000401496887207, "learning_rate": 9.186390208135194e-06, "loss": 0.8716, "step": 698 }, { "epoch": 0.20906235980260207, "grad_norm": 2.4362926483154297, "learning_rate": 9.18373902687683e-06, "loss": 0.9285, "step": 699 }, { "epoch": 0.20936144758486616, "grad_norm": 2.2922182083129883, "learning_rate": 9.181083917014262e-06, "loss": 0.8719, "step": 700 }, { "epoch": 0.20966053536713025, "grad_norm": 2.1369149684906006, "learning_rate": 9.17842488104069e-06, "loss": 0.9445, "step": 701 }, { "epoch": 0.20995962314939434, "grad_norm": 2.3500304222106934, "learning_rate": 9.175761921452992e-06, "loss": 0.9054, "step": 702 }, { "epoch": 0.21025871093165843, "grad_norm": 2.1307356357574463, "learning_rate": 9.173095040751738e-06, "loss": 0.9176, "step": 703 }, { "epoch": 0.21055779871392252, "grad_norm": 2.165228843688965, "learning_rate": 9.17042424144117e-06, "loss": 0.9058, "step": 704 }, { "epoch": 0.21085688649618664, "grad_norm": 1.839970350265503, "learning_rate": 9.16774952602922e-06, "loss": 0.8873, "step": 705 }, { "epoch": 0.21115597427845073, "grad_norm": 2.6557986736297607, "learning_rate": 9.165070897027487e-06, "loss": 0.894, "step": 706 }, { "epoch": 0.21145506206071482, "grad_norm": 2.0779685974121094, "learning_rate": 9.162388356951257e-06, "loss": 0.8844, "step": 707 }, { "epoch": 0.2117541498429789, "grad_norm": 2.0606939792633057, "learning_rate": 9.15970190831948e-06, "loss": 0.8626, "step": 708 }, { "epoch": 0.212053237625243, "grad_norm": 2.2478418350219727, "learning_rate": 9.157011553654776e-06, "loss": 0.9526, "step": 709 }, { "epoch": 0.2123523254075071, "grad_norm": 2.3522496223449707, "learning_rate": 9.154317295483437e-06, "loss": 0.9338, "step": 710 }, { "epoch": 0.2126514131897712, "grad_norm": 3.4344680309295654, "learning_rate": 9.151619136335419e-06, "loss": 0.8895, "step": 711 }, { "epoch": 0.2129505009720353, "grad_norm": 2.8587379455566406, "learning_rate": 9.14891707874434e-06, "loss": 0.8849, "step": 712 }, { "epoch": 0.2132495887542994, "grad_norm": 2.454369306564331, "learning_rate": 9.146211125247478e-06, "loss": 0.9164, "step": 713 }, { "epoch": 0.21354867653656348, "grad_norm": 2.435004711151123, "learning_rate": 9.143501278385773e-06, "loss": 0.8673, "step": 714 }, { "epoch": 0.21384776431882757, "grad_norm": 2.065308094024658, "learning_rate": 9.140787540703817e-06, "loss": 0.9281, "step": 715 }, { "epoch": 0.21414685210109166, "grad_norm": 1.918208122253418, "learning_rate": 9.138069914749859e-06, "loss": 0.8485, "step": 716 }, { "epoch": 0.21444593988335575, "grad_norm": 2.1426749229431152, "learning_rate": 9.135348403075795e-06, "loss": 0.9091, "step": 717 }, { "epoch": 0.21474502766561987, "grad_norm": 2.18416690826416, "learning_rate": 9.132623008237174e-06, "loss": 0.8832, "step": 718 }, { "epoch": 0.21504411544788396, "grad_norm": 2.059234142303467, "learning_rate": 9.12989373279319e-06, "loss": 0.9304, "step": 719 }, { "epoch": 0.21534320323014805, "grad_norm": 2.135817766189575, "learning_rate": 9.127160579306678e-06, "loss": 0.9175, "step": 720 }, { "epoch": 0.21564229101241214, "grad_norm": 2.0806386470794678, "learning_rate": 9.124423550344118e-06, "loss": 0.8933, "step": 721 }, { "epoch": 0.21594137879467623, "grad_norm": 2.127081871032715, "learning_rate": 9.12168264847563e-06, "loss": 0.8853, "step": 722 }, { "epoch": 0.21624046657694032, "grad_norm": 2.160672187805176, "learning_rate": 9.118937876274965e-06, "loss": 0.9629, "step": 723 }, { "epoch": 0.21653955435920444, "grad_norm": 2.013505458831787, "learning_rate": 9.116189236319515e-06, "loss": 0.919, "step": 724 }, { "epoch": 0.21683864214146853, "grad_norm": 2.021211624145508, "learning_rate": 9.113436731190302e-06, "loss": 0.9273, "step": 725 }, { "epoch": 0.21713772992373262, "grad_norm": 2.3637237548828125, "learning_rate": 9.110680363471973e-06, "loss": 0.9007, "step": 726 }, { "epoch": 0.2174368177059967, "grad_norm": 2.2751049995422363, "learning_rate": 9.10792013575281e-06, "loss": 0.896, "step": 727 }, { "epoch": 0.2177359054882608, "grad_norm": 2.1057891845703125, "learning_rate": 9.10515605062471e-06, "loss": 0.8683, "step": 728 }, { "epoch": 0.2180349932705249, "grad_norm": 2.2084121704101562, "learning_rate": 9.102388110683201e-06, "loss": 0.8807, "step": 729 }, { "epoch": 0.21833408105278898, "grad_norm": 2.9904234409332275, "learning_rate": 9.099616318527426e-06, "loss": 0.8987, "step": 730 }, { "epoch": 0.2186331688350531, "grad_norm": 2.0737035274505615, "learning_rate": 9.096840676760146e-06, "loss": 0.9013, "step": 731 }, { "epoch": 0.2189322566173172, "grad_norm": 2.003288984298706, "learning_rate": 9.09406118798774e-06, "loss": 0.899, "step": 732 }, { "epoch": 0.21923134439958128, "grad_norm": 2.343313694000244, "learning_rate": 9.091277854820191e-06, "loss": 0.9444, "step": 733 }, { "epoch": 0.21953043218184537, "grad_norm": 2.123699903488159, "learning_rate": 9.088490679871102e-06, "loss": 0.8312, "step": 734 }, { "epoch": 0.21982951996410946, "grad_norm": 2.0852057933807373, "learning_rate": 9.085699665757679e-06, "loss": 0.9505, "step": 735 }, { "epoch": 0.22012860774637355, "grad_norm": 2.132830858230591, "learning_rate": 9.082904815100732e-06, "loss": 0.9226, "step": 736 }, { "epoch": 0.22042769552863767, "grad_norm": 2.2305500507354736, "learning_rate": 9.080106130524675e-06, "loss": 0.8323, "step": 737 }, { "epoch": 0.22072678331090176, "grad_norm": 2.4033730030059814, "learning_rate": 9.07730361465752e-06, "loss": 0.8727, "step": 738 }, { "epoch": 0.22102587109316585, "grad_norm": 2.3713512420654297, "learning_rate": 9.07449727013088e-06, "loss": 0.9116, "step": 739 }, { "epoch": 0.22132495887542994, "grad_norm": 1.92775559425354, "learning_rate": 9.071687099579962e-06, "loss": 0.8953, "step": 740 }, { "epoch": 0.22162404665769403, "grad_norm": 2.0738866329193115, "learning_rate": 9.068873105643565e-06, "loss": 0.8981, "step": 741 }, { "epoch": 0.22192313443995812, "grad_norm": 2.0191781520843506, "learning_rate": 9.066055290964079e-06, "loss": 0.8884, "step": 742 }, { "epoch": 0.2222222222222222, "grad_norm": 2.2283360958099365, "learning_rate": 9.063233658187482e-06, "loss": 0.9338, "step": 743 }, { "epoch": 0.22252131000448633, "grad_norm": 2.0449678897857666, "learning_rate": 9.060408209963334e-06, "loss": 0.8835, "step": 744 }, { "epoch": 0.22282039778675042, "grad_norm": 2.122150421142578, "learning_rate": 9.057578948944783e-06, "loss": 0.8769, "step": 745 }, { "epoch": 0.2231194855690145, "grad_norm": 2.2087879180908203, "learning_rate": 9.054745877788554e-06, "loss": 0.9124, "step": 746 }, { "epoch": 0.2234185733512786, "grad_norm": 2.0811030864715576, "learning_rate": 9.051908999154948e-06, "loss": 0.8783, "step": 747 }, { "epoch": 0.2237176611335427, "grad_norm": 2.3077988624572754, "learning_rate": 9.049068315707847e-06, "loss": 0.9516, "step": 748 }, { "epoch": 0.22401674891580678, "grad_norm": 2.162431240081787, "learning_rate": 9.0462238301147e-06, "loss": 0.9188, "step": 749 }, { "epoch": 0.2243158366980709, "grad_norm": 2.0246407985687256, "learning_rate": 9.04337554504653e-06, "loss": 0.8875, "step": 750 }, { "epoch": 0.224614924480335, "grad_norm": 2.1116244792938232, "learning_rate": 9.040523463177928e-06, "loss": 0.9113, "step": 751 }, { "epoch": 0.22491401226259908, "grad_norm": 2.144303321838379, "learning_rate": 9.037667587187045e-06, "loss": 0.9074, "step": 752 }, { "epoch": 0.22521310004486317, "grad_norm": 1.9941705465316772, "learning_rate": 9.034807919755602e-06, "loss": 0.8994, "step": 753 }, { "epoch": 0.22551218782712726, "grad_norm": 2.0547330379486084, "learning_rate": 9.031944463568877e-06, "loss": 0.9673, "step": 754 }, { "epoch": 0.22581127560939135, "grad_norm": 2.443772554397583, "learning_rate": 9.029077221315703e-06, "loss": 0.8107, "step": 755 }, { "epoch": 0.22611036339165544, "grad_norm": 2.060100555419922, "learning_rate": 9.026206195688472e-06, "loss": 0.9194, "step": 756 }, { "epoch": 0.22640945117391956, "grad_norm": 2.1544454097747803, "learning_rate": 9.023331389383126e-06, "loss": 0.9351, "step": 757 }, { "epoch": 0.22670853895618365, "grad_norm": 2.701286554336548, "learning_rate": 9.02045280509916e-06, "loss": 0.9156, "step": 758 }, { "epoch": 0.22700762673844774, "grad_norm": 1.979743480682373, "learning_rate": 9.017570445539616e-06, "loss": 0.8732, "step": 759 }, { "epoch": 0.22730671452071183, "grad_norm": 2.1412441730499268, "learning_rate": 9.014684313411077e-06, "loss": 0.938, "step": 760 }, { "epoch": 0.22760580230297592, "grad_norm": 2.0477898120880127, "learning_rate": 9.011794411423675e-06, "loss": 0.9067, "step": 761 }, { "epoch": 0.22790489008524, "grad_norm": 2.4362220764160156, "learning_rate": 9.008900742291075e-06, "loss": 0.978, "step": 762 }, { "epoch": 0.2282039778675041, "grad_norm": 1.9870729446411133, "learning_rate": 9.006003308730487e-06, "loss": 0.9025, "step": 763 }, { "epoch": 0.22850306564976822, "grad_norm": 2.1972804069519043, "learning_rate": 9.003102113462647e-06, "loss": 0.9183, "step": 764 }, { "epoch": 0.2288021534320323, "grad_norm": 2.1518304347991943, "learning_rate": 9.000197159211834e-06, "loss": 0.8892, "step": 765 }, { "epoch": 0.2291012412142964, "grad_norm": 2.3985297679901123, "learning_rate": 8.997288448705846e-06, "loss": 0.9101, "step": 766 }, { "epoch": 0.2294003289965605, "grad_norm": 1.9348914623260498, "learning_rate": 8.994375984676014e-06, "loss": 0.9194, "step": 767 }, { "epoch": 0.22969941677882458, "grad_norm": 2.214837074279785, "learning_rate": 8.991459769857195e-06, "loss": 0.8772, "step": 768 }, { "epoch": 0.22999850456108867, "grad_norm": 2.0439629554748535, "learning_rate": 8.988539806987764e-06, "loss": 0.8935, "step": 769 }, { "epoch": 0.23029759234335279, "grad_norm": 2.1672284603118896, "learning_rate": 8.985616098809618e-06, "loss": 0.9364, "step": 770 }, { "epoch": 0.23059668012561688, "grad_norm": 2.020580530166626, "learning_rate": 8.982688648068169e-06, "loss": 0.8815, "step": 771 }, { "epoch": 0.23089576790788097, "grad_norm": 1.9076372385025024, "learning_rate": 8.979757457512347e-06, "loss": 0.9073, "step": 772 }, { "epoch": 0.23119485569014506, "grad_norm": 1.9205021858215332, "learning_rate": 8.976822529894588e-06, "loss": 0.8922, "step": 773 }, { "epoch": 0.23149394347240915, "grad_norm": 2.3895373344421387, "learning_rate": 8.973883867970844e-06, "loss": 0.9155, "step": 774 }, { "epoch": 0.23179303125467324, "grad_norm": 2.0352096557617188, "learning_rate": 8.970941474500565e-06, "loss": 0.895, "step": 775 }, { "epoch": 0.23209211903693733, "grad_norm": 2.4179985523223877, "learning_rate": 8.967995352246714e-06, "loss": 0.9121, "step": 776 }, { "epoch": 0.23239120681920145, "grad_norm": 2.219040632247925, "learning_rate": 8.965045503975752e-06, "loss": 0.8936, "step": 777 }, { "epoch": 0.23269029460146554, "grad_norm": 2.0656609535217285, "learning_rate": 8.962091932457635e-06, "loss": 0.9196, "step": 778 }, { "epoch": 0.23298938238372963, "grad_norm": 2.101527214050293, "learning_rate": 8.959134640465821e-06, "loss": 0.9364, "step": 779 }, { "epoch": 0.23328847016599372, "grad_norm": 1.9811075925827026, "learning_rate": 8.956173630777255e-06, "loss": 0.8896, "step": 780 }, { "epoch": 0.2335875579482578, "grad_norm": 2.0551488399505615, "learning_rate": 8.953208906172384e-06, "loss": 0.9158, "step": 781 }, { "epoch": 0.2338866457305219, "grad_norm": 2.0259034633636475, "learning_rate": 8.95024046943513e-06, "loss": 0.8779, "step": 782 }, { "epoch": 0.23418573351278602, "grad_norm": 2.0874710083007812, "learning_rate": 8.947268323352909e-06, "loss": 0.8564, "step": 783 }, { "epoch": 0.2344848212950501, "grad_norm": 2.325240135192871, "learning_rate": 8.944292470716617e-06, "loss": 0.8661, "step": 784 }, { "epoch": 0.2347839090773142, "grad_norm": 2.1217663288116455, "learning_rate": 8.941312914320636e-06, "loss": 0.9065, "step": 785 }, { "epoch": 0.23508299685957829, "grad_norm": 2.120793104171753, "learning_rate": 8.938329656962818e-06, "loss": 0.8563, "step": 786 }, { "epoch": 0.23538208464184238, "grad_norm": 2.220886707305908, "learning_rate": 8.935342701444495e-06, "loss": 0.9446, "step": 787 }, { "epoch": 0.23568117242410647, "grad_norm": 2.284255266189575, "learning_rate": 8.932352050570467e-06, "loss": 0.8532, "step": 788 }, { "epoch": 0.23598026020637056, "grad_norm": 2.067410707473755, "learning_rate": 8.929357707149014e-06, "loss": 0.9117, "step": 789 }, { "epoch": 0.23627934798863467, "grad_norm": 2.5636417865753174, "learning_rate": 8.926359673991874e-06, "loss": 0.931, "step": 790 }, { "epoch": 0.23657843577089877, "grad_norm": 2.0106894969940186, "learning_rate": 8.92335795391425e-06, "loss": 0.8643, "step": 791 }, { "epoch": 0.23687752355316286, "grad_norm": 2.1435186862945557, "learning_rate": 8.920352549734812e-06, "loss": 0.8904, "step": 792 }, { "epoch": 0.23717661133542695, "grad_norm": 2.0149857997894287, "learning_rate": 8.91734346427569e-06, "loss": 0.9247, "step": 793 }, { "epoch": 0.23747569911769104, "grad_norm": 2.211721897125244, "learning_rate": 8.914330700362461e-06, "loss": 0.8685, "step": 794 }, { "epoch": 0.23777478689995513, "grad_norm": 2.0011179447174072, "learning_rate": 8.91131426082417e-06, "loss": 0.9389, "step": 795 }, { "epoch": 0.23807387468221924, "grad_norm": 2.016836643218994, "learning_rate": 8.908294148493303e-06, "loss": 0.8947, "step": 796 }, { "epoch": 0.23837296246448333, "grad_norm": 2.734013319015503, "learning_rate": 8.905270366205798e-06, "loss": 0.8572, "step": 797 }, { "epoch": 0.23867205024674742, "grad_norm": 2.042064666748047, "learning_rate": 8.902242916801043e-06, "loss": 0.8803, "step": 798 }, { "epoch": 0.23897113802901152, "grad_norm": 2.1740360260009766, "learning_rate": 8.899211803121861e-06, "loss": 0.8652, "step": 799 }, { "epoch": 0.2392702258112756, "grad_norm": 2.180351734161377, "learning_rate": 8.896177028014524e-06, "loss": 0.9383, "step": 800 }, { "epoch": 0.2395693135935397, "grad_norm": 2.105390787124634, "learning_rate": 8.893138594328738e-06, "loss": 0.8802, "step": 801 }, { "epoch": 0.23986840137580379, "grad_norm": 2.106496810913086, "learning_rate": 8.890096504917647e-06, "loss": 0.9185, "step": 802 }, { "epoch": 0.2401674891580679, "grad_norm": 2.313713550567627, "learning_rate": 8.887050762637825e-06, "loss": 0.8836, "step": 803 }, { "epoch": 0.240466576940332, "grad_norm": 2.7017595767974854, "learning_rate": 8.884001370349275e-06, "loss": 0.9712, "step": 804 }, { "epoch": 0.24076566472259608, "grad_norm": 2.4117696285247803, "learning_rate": 8.880948330915435e-06, "loss": 0.9106, "step": 805 }, { "epoch": 0.24106475250486017, "grad_norm": 2.080770254135132, "learning_rate": 8.877891647203157e-06, "loss": 0.8791, "step": 806 }, { "epoch": 0.24136384028712426, "grad_norm": 2.1941232681274414, "learning_rate": 8.874831322082725e-06, "loss": 0.9322, "step": 807 }, { "epoch": 0.24166292806938836, "grad_norm": 2.1072330474853516, "learning_rate": 8.871767358427835e-06, "loss": 0.9023, "step": 808 }, { "epoch": 0.24196201585165247, "grad_norm": 2.2667267322540283, "learning_rate": 8.868699759115604e-06, "loss": 0.874, "step": 809 }, { "epoch": 0.24226110363391656, "grad_norm": 2.0561649799346924, "learning_rate": 8.86562852702656e-06, "loss": 0.9318, "step": 810 }, { "epoch": 0.24256019141618065, "grad_norm": 2.1599912643432617, "learning_rate": 8.862553665044644e-06, "loss": 0.9377, "step": 811 }, { "epoch": 0.24285927919844474, "grad_norm": 2.396315336227417, "learning_rate": 8.859475176057208e-06, "loss": 0.8701, "step": 812 }, { "epoch": 0.24315836698070883, "grad_norm": 1.9555197954177856, "learning_rate": 8.856393062955003e-06, "loss": 0.9093, "step": 813 }, { "epoch": 0.24345745476297292, "grad_norm": 2.0469048023223877, "learning_rate": 8.85330732863219e-06, "loss": 0.9222, "step": 814 }, { "epoch": 0.24375654254523701, "grad_norm": 2.276747465133667, "learning_rate": 8.850217975986326e-06, "loss": 0.833, "step": 815 }, { "epoch": 0.24405563032750113, "grad_norm": 2.2535011768341064, "learning_rate": 8.84712500791837e-06, "loss": 0.8962, "step": 816 }, { "epoch": 0.24435471810976522, "grad_norm": 2.18680477142334, "learning_rate": 8.844028427332667e-06, "loss": 0.9277, "step": 817 }, { "epoch": 0.2446538058920293, "grad_norm": 2.4426980018615723, "learning_rate": 8.840928237136967e-06, "loss": 0.8541, "step": 818 }, { "epoch": 0.2449528936742934, "grad_norm": 2.2209584712982178, "learning_rate": 8.837824440242402e-06, "loss": 0.8937, "step": 819 }, { "epoch": 0.2452519814565575, "grad_norm": 2.2815849781036377, "learning_rate": 8.834717039563488e-06, "loss": 0.8518, "step": 820 }, { "epoch": 0.24555106923882158, "grad_norm": 2.3616483211517334, "learning_rate": 8.83160603801813e-06, "loss": 0.9215, "step": 821 }, { "epoch": 0.2458501570210857, "grad_norm": 2.3371899127960205, "learning_rate": 8.828491438527614e-06, "loss": 0.8579, "step": 822 }, { "epoch": 0.2461492448033498, "grad_norm": 2.2648732662200928, "learning_rate": 8.825373244016604e-06, "loss": 0.9647, "step": 823 }, { "epoch": 0.24644833258561388, "grad_norm": 2.1428871154785156, "learning_rate": 8.822251457413138e-06, "loss": 0.8828, "step": 824 }, { "epoch": 0.24674742036787797, "grad_norm": 2.3218042850494385, "learning_rate": 8.819126081648627e-06, "loss": 0.8897, "step": 825 }, { "epoch": 0.24704650815014206, "grad_norm": 2.179537057876587, "learning_rate": 8.815997119657856e-06, "loss": 0.9118, "step": 826 }, { "epoch": 0.24734559593240615, "grad_norm": 1.9945372343063354, "learning_rate": 8.812864574378974e-06, "loss": 0.9094, "step": 827 }, { "epoch": 0.24764468371467024, "grad_norm": 2.1765918731689453, "learning_rate": 8.809728448753496e-06, "loss": 0.9501, "step": 828 }, { "epoch": 0.24794377149693436, "grad_norm": 2.060837984085083, "learning_rate": 8.8065887457263e-06, "loss": 0.8707, "step": 829 }, { "epoch": 0.24824285927919845, "grad_norm": 2.254173994064331, "learning_rate": 8.803445468245618e-06, "loss": 0.884, "step": 830 }, { "epoch": 0.24854194706146254, "grad_norm": 2.150906562805176, "learning_rate": 8.800298619263047e-06, "loss": 0.9745, "step": 831 }, { "epoch": 0.24884103484372663, "grad_norm": 2.1505417823791504, "learning_rate": 8.797148201733533e-06, "loss": 0.8864, "step": 832 }, { "epoch": 0.24914012262599072, "grad_norm": 2.945585250854492, "learning_rate": 8.793994218615371e-06, "loss": 0.9292, "step": 833 }, { "epoch": 0.2494392104082548, "grad_norm": 2.0824198722839355, "learning_rate": 8.79083667287021e-06, "loss": 0.9218, "step": 834 }, { "epoch": 0.2497382981905189, "grad_norm": 2.111649990081787, "learning_rate": 8.787675567463034e-06, "loss": 0.8939, "step": 835 }, { "epoch": 0.250037385972783, "grad_norm": 2.0960397720336914, "learning_rate": 8.784510905362185e-06, "loss": 0.8383, "step": 836 }, { "epoch": 0.2503364737550471, "grad_norm": 2.036592483520508, "learning_rate": 8.781342689539329e-06, "loss": 0.8753, "step": 837 }, { "epoch": 0.2506355615373112, "grad_norm": 2.0049991607666016, "learning_rate": 8.778170922969478e-06, "loss": 0.86, "step": 838 }, { "epoch": 0.2509346493195753, "grad_norm": 2.3637993335723877, "learning_rate": 8.774995608630979e-06, "loss": 0.9367, "step": 839 }, { "epoch": 0.2512337371018394, "grad_norm": 2.1808557510375977, "learning_rate": 8.771816749505504e-06, "loss": 0.9163, "step": 840 }, { "epoch": 0.2515328248841035, "grad_norm": 2.109550714492798, "learning_rate": 8.768634348578062e-06, "loss": 0.9944, "step": 841 }, { "epoch": 0.25183191266636756, "grad_norm": 2.7592811584472656, "learning_rate": 8.765448408836978e-06, "loss": 0.921, "step": 842 }, { "epoch": 0.2521310004486317, "grad_norm": 1.9245455265045166, "learning_rate": 8.762258933273908e-06, "loss": 0.9161, "step": 843 }, { "epoch": 0.25243008823089574, "grad_norm": 2.0845556259155273, "learning_rate": 8.759065924883827e-06, "loss": 0.9075, "step": 844 }, { "epoch": 0.25272917601315986, "grad_norm": 2.500300645828247, "learning_rate": 8.755869386665022e-06, "loss": 0.9483, "step": 845 }, { "epoch": 0.253028263795424, "grad_norm": 2.093935966491699, "learning_rate": 8.7526693216191e-06, "loss": 0.9163, "step": 846 }, { "epoch": 0.25332735157768804, "grad_norm": 2.179887533187866, "learning_rate": 8.749465732750982e-06, "loss": 0.8703, "step": 847 }, { "epoch": 0.25362643935995216, "grad_norm": 2.100419759750366, "learning_rate": 8.746258623068886e-06, "loss": 0.8415, "step": 848 }, { "epoch": 0.2539255271422162, "grad_norm": 2.605088233947754, "learning_rate": 8.74304799558435e-06, "loss": 0.9267, "step": 849 }, { "epoch": 0.25422461492448034, "grad_norm": 2.1310038566589355, "learning_rate": 8.739833853312208e-06, "loss": 0.9349, "step": 850 }, { "epoch": 0.2545237027067444, "grad_norm": 2.0149736404418945, "learning_rate": 8.736616199270595e-06, "loss": 0.974, "step": 851 }, { "epoch": 0.2548227904890085, "grad_norm": 1.9335001707077026, "learning_rate": 8.733395036480946e-06, "loss": 0.8734, "step": 852 }, { "epoch": 0.25512187827127264, "grad_norm": 2.1064655780792236, "learning_rate": 8.73017036796799e-06, "loss": 0.8586, "step": 853 }, { "epoch": 0.2554209660535367, "grad_norm": 2.258660316467285, "learning_rate": 8.726942196759744e-06, "loss": 0.8796, "step": 854 }, { "epoch": 0.2557200538358008, "grad_norm": 2.093735933303833, "learning_rate": 8.72371052588752e-06, "loss": 0.8972, "step": 855 }, { "epoch": 0.2560191416180649, "grad_norm": 2.077585458755493, "learning_rate": 8.720475358385912e-06, "loss": 0.9144, "step": 856 }, { "epoch": 0.256318229400329, "grad_norm": 2.2719504833221436, "learning_rate": 8.7172366972928e-06, "loss": 0.8895, "step": 857 }, { "epoch": 0.2566173171825931, "grad_norm": 2.113807439804077, "learning_rate": 8.713994545649343e-06, "loss": 0.9035, "step": 858 }, { "epoch": 0.2569164049648572, "grad_norm": 2.243272542953491, "learning_rate": 8.710748906499977e-06, "loss": 0.8668, "step": 859 }, { "epoch": 0.2572154927471213, "grad_norm": 2.1740472316741943, "learning_rate": 8.707499782892414e-06, "loss": 0.8652, "step": 860 }, { "epoch": 0.25751458052938536, "grad_norm": 2.064631223678589, "learning_rate": 8.704247177877643e-06, "loss": 0.8623, "step": 861 }, { "epoch": 0.2578136683116495, "grad_norm": 2.63271427154541, "learning_rate": 8.700991094509909e-06, "loss": 0.9474, "step": 862 }, { "epoch": 0.25811275609391354, "grad_norm": 2.195537805557251, "learning_rate": 8.697731535846739e-06, "loss": 0.8863, "step": 863 }, { "epoch": 0.25841184387617766, "grad_norm": 1.9830275774002075, "learning_rate": 8.69446850494891e-06, "loss": 0.9115, "step": 864 }, { "epoch": 0.2587109316584418, "grad_norm": 2.0274884700775146, "learning_rate": 8.691202004880468e-06, "loss": 0.9447, "step": 865 }, { "epoch": 0.25901001944070584, "grad_norm": 2.0495710372924805, "learning_rate": 8.687932038708712e-06, "loss": 0.8788, "step": 866 }, { "epoch": 0.25930910722296996, "grad_norm": 2.1419687271118164, "learning_rate": 8.684658609504199e-06, "loss": 0.938, "step": 867 }, { "epoch": 0.259608195005234, "grad_norm": 1.9966331720352173, "learning_rate": 8.681381720340736e-06, "loss": 0.8779, "step": 868 }, { "epoch": 0.25990728278749814, "grad_norm": 2.2589597702026367, "learning_rate": 8.67810137429538e-06, "loss": 0.925, "step": 869 }, { "epoch": 0.2602063705697622, "grad_norm": 2.3836519718170166, "learning_rate": 8.674817574448431e-06, "loss": 0.8641, "step": 870 }, { "epoch": 0.2605054583520263, "grad_norm": 2.340545177459717, "learning_rate": 8.671530323883437e-06, "loss": 0.9678, "step": 871 }, { "epoch": 0.26080454613429044, "grad_norm": 2.134702682495117, "learning_rate": 8.668239625687183e-06, "loss": 0.8677, "step": 872 }, { "epoch": 0.2611036339165545, "grad_norm": 1.9206935167312622, "learning_rate": 8.664945482949691e-06, "loss": 0.9216, "step": 873 }, { "epoch": 0.2614027216988186, "grad_norm": 2.222097396850586, "learning_rate": 8.661647898764221e-06, "loss": 0.9449, "step": 874 }, { "epoch": 0.2617018094810827, "grad_norm": 3.886031150817871, "learning_rate": 8.658346876227261e-06, "loss": 0.8875, "step": 875 }, { "epoch": 0.2620008972633468, "grad_norm": 2.1681060791015625, "learning_rate": 8.655042418438529e-06, "loss": 0.8905, "step": 876 }, { "epoch": 0.26229998504561086, "grad_norm": 2.5669548511505127, "learning_rate": 8.651734528500968e-06, "loss": 0.9181, "step": 877 }, { "epoch": 0.262599072827875, "grad_norm": 2.307936906814575, "learning_rate": 8.648423209520746e-06, "loss": 0.8991, "step": 878 }, { "epoch": 0.2628981606101391, "grad_norm": 2.139681577682495, "learning_rate": 8.64510846460725e-06, "loss": 0.946, "step": 879 }, { "epoch": 0.26319724839240316, "grad_norm": 2.059147596359253, "learning_rate": 8.641790296873081e-06, "loss": 0.9303, "step": 880 }, { "epoch": 0.2634963361746673, "grad_norm": 2.465872049331665, "learning_rate": 8.638468709434057e-06, "loss": 0.9405, "step": 881 }, { "epoch": 0.26379542395693134, "grad_norm": 1.9387989044189453, "learning_rate": 8.63514370540921e-06, "loss": 0.8686, "step": 882 }, { "epoch": 0.26409451173919546, "grad_norm": 2.742898464202881, "learning_rate": 8.631815287920773e-06, "loss": 0.8578, "step": 883 }, { "epoch": 0.2643935995214595, "grad_norm": 2.0936670303344727, "learning_rate": 8.62848346009419e-06, "loss": 0.8597, "step": 884 }, { "epoch": 0.26469268730372364, "grad_norm": 1.9758498668670654, "learning_rate": 8.625148225058107e-06, "loss": 0.8469, "step": 885 }, { "epoch": 0.26499177508598776, "grad_norm": 2.2053442001342773, "learning_rate": 8.621809585944366e-06, "loss": 0.8542, "step": 886 }, { "epoch": 0.2652908628682518, "grad_norm": 2.2858664989471436, "learning_rate": 8.61846754588801e-06, "loss": 0.8638, "step": 887 }, { "epoch": 0.26558995065051594, "grad_norm": 2.416813373565674, "learning_rate": 8.61512210802727e-06, "loss": 0.8843, "step": 888 }, { "epoch": 0.26588903843278, "grad_norm": 4.609827518463135, "learning_rate": 8.611773275503572e-06, "loss": 0.8903, "step": 889 }, { "epoch": 0.2661881262150441, "grad_norm": 2.123302698135376, "learning_rate": 8.608421051461529e-06, "loss": 0.9339, "step": 890 }, { "epoch": 0.26648721399730824, "grad_norm": 2.0752909183502197, "learning_rate": 8.605065439048936e-06, "loss": 0.8686, "step": 891 }, { "epoch": 0.2667863017795723, "grad_norm": 2.2354037761688232, "learning_rate": 8.601706441416776e-06, "loss": 0.8468, "step": 892 }, { "epoch": 0.2670853895618364, "grad_norm": 1.9131232500076294, "learning_rate": 8.598344061719204e-06, "loss": 0.9218, "step": 893 }, { "epoch": 0.2673844773441005, "grad_norm": 2.3510284423828125, "learning_rate": 8.594978303113552e-06, "loss": 0.9182, "step": 894 }, { "epoch": 0.2676835651263646, "grad_norm": 2.1390933990478516, "learning_rate": 8.59160916876033e-06, "loss": 0.892, "step": 895 }, { "epoch": 0.26798265290862866, "grad_norm": 2.5494792461395264, "learning_rate": 8.588236661823209e-06, "loss": 0.8502, "step": 896 }, { "epoch": 0.2682817406908928, "grad_norm": 2.170192003250122, "learning_rate": 8.584860785469036e-06, "loss": 0.8868, "step": 897 }, { "epoch": 0.2685808284731569, "grad_norm": 2.391944169998169, "learning_rate": 8.581481542867818e-06, "loss": 0.9062, "step": 898 }, { "epoch": 0.26887991625542096, "grad_norm": 2.8505380153656006, "learning_rate": 8.578098937192723e-06, "loss": 0.9674, "step": 899 }, { "epoch": 0.2691790040376851, "grad_norm": 1.946619987487793, "learning_rate": 8.574712971620075e-06, "loss": 0.8627, "step": 900 }, { "epoch": 0.26947809181994914, "grad_norm": 2.0867037773132324, "learning_rate": 8.571323649329352e-06, "loss": 0.9377, "step": 901 }, { "epoch": 0.26977717960221326, "grad_norm": 2.4951014518737793, "learning_rate": 8.567930973503196e-06, "loss": 0.9318, "step": 902 }, { "epoch": 0.2700762673844773, "grad_norm": 2.284148693084717, "learning_rate": 8.564534947327381e-06, "loss": 0.8068, "step": 903 }, { "epoch": 0.27037535516674144, "grad_norm": 3.0309672355651855, "learning_rate": 8.561135573990839e-06, "loss": 0.8911, "step": 904 }, { "epoch": 0.27067444294900556, "grad_norm": 2.2556300163269043, "learning_rate": 8.55773285668564e-06, "loss": 0.9089, "step": 905 }, { "epoch": 0.2709735307312696, "grad_norm": 2.222870349884033, "learning_rate": 8.554326798606994e-06, "loss": 0.8832, "step": 906 }, { "epoch": 0.27127261851353374, "grad_norm": 2.052119731903076, "learning_rate": 8.55091740295325e-06, "loss": 0.9426, "step": 907 }, { "epoch": 0.2715717062957978, "grad_norm": 2.0690882205963135, "learning_rate": 8.547504672925892e-06, "loss": 0.8684, "step": 908 }, { "epoch": 0.2718707940780619, "grad_norm": 2.1721673011779785, "learning_rate": 8.544088611729533e-06, "loss": 0.9336, "step": 909 }, { "epoch": 0.272169881860326, "grad_norm": 2.013841152191162, "learning_rate": 8.540669222571911e-06, "loss": 0.8821, "step": 910 }, { "epoch": 0.2724689696425901, "grad_norm": 2.150312900543213, "learning_rate": 8.537246508663894e-06, "loss": 0.8975, "step": 911 }, { "epoch": 0.2727680574248542, "grad_norm": 2.2693207263946533, "learning_rate": 8.533820473219472e-06, "loss": 0.8853, "step": 912 }, { "epoch": 0.2730671452071183, "grad_norm": 2.1575584411621094, "learning_rate": 8.53039111945575e-06, "loss": 0.8832, "step": 913 }, { "epoch": 0.2733662329893824, "grad_norm": 2.1844592094421387, "learning_rate": 8.526958450592952e-06, "loss": 0.9601, "step": 914 }, { "epoch": 0.27366532077164646, "grad_norm": 2.13958477973938, "learning_rate": 8.523522469854415e-06, "loss": 0.8805, "step": 915 }, { "epoch": 0.2739644085539106, "grad_norm": 2.262050151824951, "learning_rate": 8.520083180466585e-06, "loss": 0.9301, "step": 916 }, { "epoch": 0.2742634963361747, "grad_norm": 2.0712790489196777, "learning_rate": 8.516640585659012e-06, "loss": 0.9576, "step": 917 }, { "epoch": 0.27456258411843876, "grad_norm": 2.1151576042175293, "learning_rate": 8.513194688664356e-06, "loss": 0.9327, "step": 918 }, { "epoch": 0.2748616719007029, "grad_norm": 2.6473333835601807, "learning_rate": 8.509745492718375e-06, "loss": 0.9087, "step": 919 }, { "epoch": 0.27516075968296694, "grad_norm": 2.1104989051818848, "learning_rate": 8.506293001059922e-06, "loss": 0.891, "step": 920 }, { "epoch": 0.27545984746523106, "grad_norm": 2.2516367435455322, "learning_rate": 8.502837216930947e-06, "loss": 0.8448, "step": 921 }, { "epoch": 0.2757589352474951, "grad_norm": 2.252958059310913, "learning_rate": 8.499378143576496e-06, "loss": 0.9449, "step": 922 }, { "epoch": 0.27605802302975924, "grad_norm": 1.9987386465072632, "learning_rate": 8.495915784244694e-06, "loss": 0.8931, "step": 923 }, { "epoch": 0.27635711081202335, "grad_norm": 1.8049184083938599, "learning_rate": 8.49245014218676e-06, "loss": 0.8604, "step": 924 }, { "epoch": 0.2766561985942874, "grad_norm": 2.1568195819854736, "learning_rate": 8.488981220656993e-06, "loss": 0.9303, "step": 925 }, { "epoch": 0.27695528637655153, "grad_norm": 1.9846700429916382, "learning_rate": 8.48550902291277e-06, "loss": 0.9383, "step": 926 }, { "epoch": 0.2772543741588156, "grad_norm": 2.0359439849853516, "learning_rate": 8.482033552214546e-06, "loss": 0.8619, "step": 927 }, { "epoch": 0.2775534619410797, "grad_norm": 2.3073618412017822, "learning_rate": 8.478554811825846e-06, "loss": 0.8303, "step": 928 }, { "epoch": 0.2778525497233438, "grad_norm": 2.161088466644287, "learning_rate": 8.475072805013274e-06, "loss": 0.9199, "step": 929 }, { "epoch": 0.2781516375056079, "grad_norm": 2.1617698669433594, "learning_rate": 8.471587535046487e-06, "loss": 0.9659, "step": 930 }, { "epoch": 0.278450725287872, "grad_norm": 2.0522353649139404, "learning_rate": 8.468099005198224e-06, "loss": 0.927, "step": 931 }, { "epoch": 0.2787498130701361, "grad_norm": 2.01200270652771, "learning_rate": 8.46460721874427e-06, "loss": 0.9137, "step": 932 }, { "epoch": 0.2790489008524002, "grad_norm": 2.0459954738616943, "learning_rate": 8.461112178963475e-06, "loss": 0.9001, "step": 933 }, { "epoch": 0.27934798863466426, "grad_norm": 2.208639144897461, "learning_rate": 8.45761388913774e-06, "loss": 0.9172, "step": 934 }, { "epoch": 0.2796470764169284, "grad_norm": 8.726059913635254, "learning_rate": 8.454112352552025e-06, "loss": 0.8747, "step": 935 }, { "epoch": 0.27994616419919244, "grad_norm": 1.8563616275787354, "learning_rate": 8.450607572494332e-06, "loss": 0.9064, "step": 936 }, { "epoch": 0.28024525198145656, "grad_norm": 2.2314422130584717, "learning_rate": 8.447099552255708e-06, "loss": 0.8959, "step": 937 }, { "epoch": 0.2805443397637207, "grad_norm": 1.995360016822815, "learning_rate": 8.44358829513025e-06, "loss": 0.8547, "step": 938 }, { "epoch": 0.28084342754598474, "grad_norm": 2.189517021179199, "learning_rate": 8.44007380441509e-06, "loss": 0.8953, "step": 939 }, { "epoch": 0.28114251532824885, "grad_norm": 2.2503349781036377, "learning_rate": 8.436556083410392e-06, "loss": 0.8939, "step": 940 }, { "epoch": 0.2814416031105129, "grad_norm": 2.0672249794006348, "learning_rate": 8.433035135419358e-06, "loss": 0.855, "step": 941 }, { "epoch": 0.28174069089277703, "grad_norm": 2.1229608058929443, "learning_rate": 8.429510963748224e-06, "loss": 0.8767, "step": 942 }, { "epoch": 0.2820397786750411, "grad_norm": 2.0342421531677246, "learning_rate": 8.425983571706247e-06, "loss": 0.8222, "step": 943 }, { "epoch": 0.2823388664573052, "grad_norm": 2.5235116481781006, "learning_rate": 8.422452962605709e-06, "loss": 0.9006, "step": 944 }, { "epoch": 0.28263795423956933, "grad_norm": 2.129249095916748, "learning_rate": 8.418919139761914e-06, "loss": 0.9152, "step": 945 }, { "epoch": 0.2829370420218334, "grad_norm": 2.147489309310913, "learning_rate": 8.415382106493183e-06, "loss": 0.8773, "step": 946 }, { "epoch": 0.2832361298040975, "grad_norm": 2.2819974422454834, "learning_rate": 8.411841866120855e-06, "loss": 0.8954, "step": 947 }, { "epoch": 0.2835352175863616, "grad_norm": 2.1865124702453613, "learning_rate": 8.408298421969275e-06, "loss": 0.9085, "step": 948 }, { "epoch": 0.2838343053686257, "grad_norm": 2.3345165252685547, "learning_rate": 8.4047517773658e-06, "loss": 0.8728, "step": 949 }, { "epoch": 0.2841333931508898, "grad_norm": 2.480372190475464, "learning_rate": 8.40120193564079e-06, "loss": 0.8653, "step": 950 }, { "epoch": 0.2844324809331539, "grad_norm": 2.722959041595459, "learning_rate": 8.39764890012761e-06, "loss": 0.9058, "step": 951 }, { "epoch": 0.284731568715418, "grad_norm": 1.7961044311523438, "learning_rate": 8.394092674162625e-06, "loss": 0.89, "step": 952 }, { "epoch": 0.28503065649768206, "grad_norm": 2.2362101078033447, "learning_rate": 8.390533261085188e-06, "loss": 0.9134, "step": 953 }, { "epoch": 0.2853297442799462, "grad_norm": 2.443239688873291, "learning_rate": 8.386970664237653e-06, "loss": 0.9007, "step": 954 }, { "epoch": 0.28562883206221024, "grad_norm": 2.249608039855957, "learning_rate": 8.383404886965361e-06, "loss": 0.9084, "step": 955 }, { "epoch": 0.28592791984447435, "grad_norm": 2.142319679260254, "learning_rate": 8.37983593261664e-06, "loss": 0.8569, "step": 956 }, { "epoch": 0.28622700762673847, "grad_norm": 2.0231926441192627, "learning_rate": 8.376263804542798e-06, "loss": 0.8741, "step": 957 }, { "epoch": 0.28652609540900253, "grad_norm": 2.0692694187164307, "learning_rate": 8.372688506098128e-06, "loss": 0.9196, "step": 958 }, { "epoch": 0.28682518319126665, "grad_norm": 1.962756872177124, "learning_rate": 8.369110040639899e-06, "loss": 0.9804, "step": 959 }, { "epoch": 0.2871242709735307, "grad_norm": 2.242260456085205, "learning_rate": 8.365528411528348e-06, "loss": 0.8812, "step": 960 }, { "epoch": 0.28742335875579483, "grad_norm": 2.4887044429779053, "learning_rate": 8.361943622126694e-06, "loss": 0.8812, "step": 961 }, { "epoch": 0.2877224465380589, "grad_norm": 2.0006606578826904, "learning_rate": 8.358355675801112e-06, "loss": 0.8989, "step": 962 }, { "epoch": 0.288021534320323, "grad_norm": 2.1003410816192627, "learning_rate": 8.354764575920747e-06, "loss": 0.9217, "step": 963 }, { "epoch": 0.28832062210258713, "grad_norm": 2.1069066524505615, "learning_rate": 8.351170325857705e-06, "loss": 0.8834, "step": 964 }, { "epoch": 0.2886197098848512, "grad_norm": 2.2529613971710205, "learning_rate": 8.347572928987052e-06, "loss": 0.8835, "step": 965 }, { "epoch": 0.2889187976671153, "grad_norm": 2.0404515266418457, "learning_rate": 8.343972388686806e-06, "loss": 0.897, "step": 966 }, { "epoch": 0.2892178854493794, "grad_norm": 2.1792309284210205, "learning_rate": 8.340368708337934e-06, "loss": 0.8892, "step": 967 }, { "epoch": 0.2895169732316435, "grad_norm": 1.9516432285308838, "learning_rate": 8.336761891324357e-06, "loss": 0.8062, "step": 968 }, { "epoch": 0.28981606101390756, "grad_norm": 2.5453972816467285, "learning_rate": 8.333151941032941e-06, "loss": 0.9352, "step": 969 }, { "epoch": 0.2901151487961717, "grad_norm": 2.3865108489990234, "learning_rate": 8.32953886085349e-06, "loss": 0.9061, "step": 970 }, { "epoch": 0.2904142365784358, "grad_norm": 1.9320943355560303, "learning_rate": 8.325922654178752e-06, "loss": 0.9546, "step": 971 }, { "epoch": 0.29071332436069985, "grad_norm": 2.287036657333374, "learning_rate": 8.322303324404408e-06, "loss": 0.96, "step": 972 }, { "epoch": 0.29101241214296397, "grad_norm": 2.293832778930664, "learning_rate": 8.318680874929068e-06, "loss": 0.8994, "step": 973 }, { "epoch": 0.29131149992522803, "grad_norm": 2.0370614528656006, "learning_rate": 8.315055309154283e-06, "loss": 0.8848, "step": 974 }, { "epoch": 0.29161058770749215, "grad_norm": 2.184598684310913, "learning_rate": 8.311426630484513e-06, "loss": 0.8889, "step": 975 }, { "epoch": 0.29190967548975627, "grad_norm": 1.9042229652404785, "learning_rate": 8.30779484232716e-06, "loss": 0.8935, "step": 976 }, { "epoch": 0.29220876327202033, "grad_norm": 2.154979705810547, "learning_rate": 8.304159948092532e-06, "loss": 0.9201, "step": 977 }, { "epoch": 0.29250785105428445, "grad_norm": 2.6963534355163574, "learning_rate": 8.30052195119386e-06, "loss": 0.8768, "step": 978 }, { "epoch": 0.2928069388365485, "grad_norm": 8.801839828491211, "learning_rate": 8.296880855047284e-06, "loss": 0.9113, "step": 979 }, { "epoch": 0.29310602661881263, "grad_norm": 2.0573525428771973, "learning_rate": 8.293236663071859e-06, "loss": 0.9002, "step": 980 }, { "epoch": 0.2934051144010767, "grad_norm": 2.133679151535034, "learning_rate": 8.289589378689548e-06, "loss": 0.9107, "step": 981 }, { "epoch": 0.2937042021833408, "grad_norm": 2.232879400253296, "learning_rate": 8.28593900532521e-06, "loss": 0.9341, "step": 982 }, { "epoch": 0.29400328996560493, "grad_norm": 2.4538958072662354, "learning_rate": 8.28228554640661e-06, "loss": 0.8742, "step": 983 }, { "epoch": 0.294302377747869, "grad_norm": 2.0509963035583496, "learning_rate": 8.278629005364412e-06, "loss": 0.9323, "step": 984 }, { "epoch": 0.2946014655301331, "grad_norm": 2.0842254161834717, "learning_rate": 8.274969385632173e-06, "loss": 0.8791, "step": 985 }, { "epoch": 0.2949005533123972, "grad_norm": 4.286899566650391, "learning_rate": 8.271306690646336e-06, "loss": 0.9247, "step": 986 }, { "epoch": 0.2951996410946613, "grad_norm": 2.1074788570404053, "learning_rate": 8.267640923846242e-06, "loss": 0.9256, "step": 987 }, { "epoch": 0.29549872887692535, "grad_norm": 2.3957602977752686, "learning_rate": 8.263972088674103e-06, "loss": 0.9332, "step": 988 }, { "epoch": 0.29579781665918947, "grad_norm": 2.026813507080078, "learning_rate": 8.260300188575024e-06, "loss": 0.942, "step": 989 }, { "epoch": 0.2960969044414536, "grad_norm": 2.3570709228515625, "learning_rate": 8.256625226996981e-06, "loss": 0.893, "step": 990 }, { "epoch": 0.29639599222371765, "grad_norm": 1.852961778640747, "learning_rate": 8.252947207390832e-06, "loss": 0.8929, "step": 991 }, { "epoch": 0.29669508000598177, "grad_norm": 2.157203197479248, "learning_rate": 8.249266133210296e-06, "loss": 0.8945, "step": 992 }, { "epoch": 0.29699416778824583, "grad_norm": 1.9640438556671143, "learning_rate": 8.245582007911967e-06, "loss": 0.8892, "step": 993 }, { "epoch": 0.29729325557050995, "grad_norm": 2.2982442378997803, "learning_rate": 8.241894834955306e-06, "loss": 0.9542, "step": 994 }, { "epoch": 0.297592343352774, "grad_norm": 1.9362787008285522, "learning_rate": 8.238204617802633e-06, "loss": 0.8634, "step": 995 }, { "epoch": 0.29789143113503813, "grad_norm": 2.2592597007751465, "learning_rate": 8.234511359919125e-06, "loss": 0.905, "step": 996 }, { "epoch": 0.29819051891730225, "grad_norm": 2.103067398071289, "learning_rate": 8.230815064772815e-06, "loss": 0.9087, "step": 997 }, { "epoch": 0.2984896066995663, "grad_norm": 2.058659315109253, "learning_rate": 8.22711573583459e-06, "loss": 0.8571, "step": 998 }, { "epoch": 0.29878869448183043, "grad_norm": 1.987497091293335, "learning_rate": 8.223413376578182e-06, "loss": 0.8527, "step": 999 }, { "epoch": 0.2990877822640945, "grad_norm": 2.179502010345459, "learning_rate": 8.219707990480177e-06, "loss": 0.8747, "step": 1000 }, { "epoch": 0.2993868700463586, "grad_norm": 1.9865261316299438, "learning_rate": 8.215999581019993e-06, "loss": 0.8898, "step": 1001 }, { "epoch": 0.2996859578286227, "grad_norm": 2.3928921222686768, "learning_rate": 8.212288151679892e-06, "loss": 0.9434, "step": 1002 }, { "epoch": 0.2999850456108868, "grad_norm": 1.8100571632385254, "learning_rate": 8.208573705944972e-06, "loss": 0.9056, "step": 1003 }, { "epoch": 0.3002841333931509, "grad_norm": 2.1098434925079346, "learning_rate": 8.204856247303163e-06, "loss": 0.9096, "step": 1004 }, { "epoch": 0.30058322117541497, "grad_norm": 2.145660161972046, "learning_rate": 8.201135779245222e-06, "loss": 0.8974, "step": 1005 }, { "epoch": 0.3008823089576791, "grad_norm": 2.05322527885437, "learning_rate": 8.197412305264735e-06, "loss": 0.8592, "step": 1006 }, { "epoch": 0.30118139673994315, "grad_norm": 1.9729728698730469, "learning_rate": 8.193685828858109e-06, "loss": 0.8873, "step": 1007 }, { "epoch": 0.30148048452220727, "grad_norm": 2.024129629135132, "learning_rate": 8.189956353524568e-06, "loss": 0.8662, "step": 1008 }, { "epoch": 0.3017795723044714, "grad_norm": 2.067401170730591, "learning_rate": 8.18622388276616e-06, "loss": 0.8463, "step": 1009 }, { "epoch": 0.30207866008673545, "grad_norm": 2.049694538116455, "learning_rate": 8.182488420087737e-06, "loss": 0.8727, "step": 1010 }, { "epoch": 0.30237774786899957, "grad_norm": 2.034471035003662, "learning_rate": 8.178749968996965e-06, "loss": 0.8643, "step": 1011 }, { "epoch": 0.30267683565126363, "grad_norm": 1.9820784330368042, "learning_rate": 8.175008533004312e-06, "loss": 0.9672, "step": 1012 }, { "epoch": 0.30297592343352775, "grad_norm": 2.0715978145599365, "learning_rate": 8.171264115623056e-06, "loss": 0.8853, "step": 1013 }, { "epoch": 0.3032750112157918, "grad_norm": 2.16473650932312, "learning_rate": 8.167516720369268e-06, "loss": 0.9186, "step": 1014 }, { "epoch": 0.30357409899805593, "grad_norm": 2.2182767391204834, "learning_rate": 8.163766350761819e-06, "loss": 0.9256, "step": 1015 }, { "epoch": 0.30387318678032005, "grad_norm": 2.2718708515167236, "learning_rate": 8.160013010322372e-06, "loss": 0.8511, "step": 1016 }, { "epoch": 0.3041722745625841, "grad_norm": 2.299290895462036, "learning_rate": 8.156256702575378e-06, "loss": 0.8941, "step": 1017 }, { "epoch": 0.30447136234484823, "grad_norm": 2.2584455013275146, "learning_rate": 8.152497431048076e-06, "loss": 0.8681, "step": 1018 }, { "epoch": 0.3047704501271123, "grad_norm": 2.875140428543091, "learning_rate": 8.148735199270487e-06, "loss": 0.8578, "step": 1019 }, { "epoch": 0.3050695379093764, "grad_norm": 2.481945276260376, "learning_rate": 8.144970010775417e-06, "loss": 0.8814, "step": 1020 }, { "epoch": 0.30536862569164047, "grad_norm": 1.8645161390304565, "learning_rate": 8.141201869098439e-06, "loss": 0.8872, "step": 1021 }, { "epoch": 0.3056677134739046, "grad_norm": 2.1068193912506104, "learning_rate": 8.137430777777904e-06, "loss": 0.8974, "step": 1022 }, { "epoch": 0.3059668012561687, "grad_norm": 2.003674030303955, "learning_rate": 8.133656740354936e-06, "loss": 0.9239, "step": 1023 }, { "epoch": 0.30626588903843277, "grad_norm": 2.966543674468994, "learning_rate": 8.129879760373419e-06, "loss": 0.8955, "step": 1024 }, { "epoch": 0.3065649768206969, "grad_norm": 2.0569214820861816, "learning_rate": 8.126099841380008e-06, "loss": 0.8951, "step": 1025 }, { "epoch": 0.30686406460296095, "grad_norm": 2.4203011989593506, "learning_rate": 8.122316986924108e-06, "loss": 0.8798, "step": 1026 }, { "epoch": 0.30716315238522507, "grad_norm": 2.310976505279541, "learning_rate": 8.118531200557888e-06, "loss": 0.8932, "step": 1027 }, { "epoch": 0.30746224016748913, "grad_norm": 2.332551956176758, "learning_rate": 8.114742485836267e-06, "loss": 0.8628, "step": 1028 }, { "epoch": 0.30776132794975325, "grad_norm": 2.1254634857177734, "learning_rate": 8.110950846316915e-06, "loss": 0.8723, "step": 1029 }, { "epoch": 0.30806041573201737, "grad_norm": 2.114065408706665, "learning_rate": 8.107156285560249e-06, "loss": 0.8685, "step": 1030 }, { "epoch": 0.30835950351428143, "grad_norm": 2.179720163345337, "learning_rate": 8.103358807129424e-06, "loss": 0.9451, "step": 1031 }, { "epoch": 0.30865859129654555, "grad_norm": 2.2249691486358643, "learning_rate": 8.099558414590343e-06, "loss": 0.8712, "step": 1032 }, { "epoch": 0.3089576790788096, "grad_norm": 2.240722179412842, "learning_rate": 8.09575511151164e-06, "loss": 0.896, "step": 1033 }, { "epoch": 0.30925676686107373, "grad_norm": 2.197784185409546, "learning_rate": 8.091948901464683e-06, "loss": 0.9402, "step": 1034 }, { "epoch": 0.30955585464333785, "grad_norm": 2.3850064277648926, "learning_rate": 8.088139788023568e-06, "loss": 0.8963, "step": 1035 }, { "epoch": 0.3098549424256019, "grad_norm": 2.3050801753997803, "learning_rate": 8.084327774765121e-06, "loss": 0.9109, "step": 1036 }, { "epoch": 0.310154030207866, "grad_norm": 2.2620840072631836, "learning_rate": 8.08051286526889e-06, "loss": 0.9026, "step": 1037 }, { "epoch": 0.3104531179901301, "grad_norm": 2.0871269702911377, "learning_rate": 8.076695063117141e-06, "loss": 0.8859, "step": 1038 }, { "epoch": 0.3107522057723942, "grad_norm": 2.022071123123169, "learning_rate": 8.072874371894856e-06, "loss": 0.9005, "step": 1039 }, { "epoch": 0.31105129355465827, "grad_norm": 2.7916696071624756, "learning_rate": 8.069050795189732e-06, "loss": 0.9084, "step": 1040 }, { "epoch": 0.3113503813369224, "grad_norm": 1.8590518236160278, "learning_rate": 8.065224336592175e-06, "loss": 0.9087, "step": 1041 }, { "epoch": 0.3116494691191865, "grad_norm": 2.063265085220337, "learning_rate": 8.061394999695295e-06, "loss": 0.8808, "step": 1042 }, { "epoch": 0.31194855690145057, "grad_norm": 5.70310640335083, "learning_rate": 8.057562788094909e-06, "loss": 0.9192, "step": 1043 }, { "epoch": 0.3122476446837147, "grad_norm": 2.3967173099517822, "learning_rate": 8.053727705389527e-06, "loss": 0.9156, "step": 1044 }, { "epoch": 0.31254673246597875, "grad_norm": 3.5446176528930664, "learning_rate": 8.049889755180363e-06, "loss": 0.931, "step": 1045 }, { "epoch": 0.31284582024824287, "grad_norm": 2.718062400817871, "learning_rate": 8.046048941071316e-06, "loss": 0.9092, "step": 1046 }, { "epoch": 0.31314490803050693, "grad_norm": 2.0808985233306885, "learning_rate": 8.042205266668982e-06, "loss": 0.9488, "step": 1047 }, { "epoch": 0.31344399581277105, "grad_norm": 2.050569772720337, "learning_rate": 8.038358735582632e-06, "loss": 0.9085, "step": 1048 }, { "epoch": 0.31374308359503517, "grad_norm": 2.119657039642334, "learning_rate": 8.034509351424231e-06, "loss": 0.9961, "step": 1049 }, { "epoch": 0.31404217137729923, "grad_norm": 1.8796643018722534, "learning_rate": 8.030657117808415e-06, "loss": 0.8285, "step": 1050 }, { "epoch": 0.31434125915956335, "grad_norm": 2.1362431049346924, "learning_rate": 8.026802038352503e-06, "loss": 0.9086, "step": 1051 }, { "epoch": 0.3146403469418274, "grad_norm": 2.0861964225769043, "learning_rate": 8.02294411667648e-06, "loss": 0.8682, "step": 1052 }, { "epoch": 0.3149394347240915, "grad_norm": 2.6638879776000977, "learning_rate": 8.019083356403002e-06, "loss": 0.9752, "step": 1053 }, { "epoch": 0.3152385225063556, "grad_norm": 2.142808675765991, "learning_rate": 8.015219761157387e-06, "loss": 0.9409, "step": 1054 }, { "epoch": 0.3155376102886197, "grad_norm": 2.3526904582977295, "learning_rate": 8.011353334567625e-06, "loss": 0.86, "step": 1055 }, { "epoch": 0.3158366980708838, "grad_norm": 2.0853757858276367, "learning_rate": 8.007484080264355e-06, "loss": 0.8899, "step": 1056 }, { "epoch": 0.3161357858531479, "grad_norm": 2.6986024379730225, "learning_rate": 8.003612001880872e-06, "loss": 0.8822, "step": 1057 }, { "epoch": 0.316434873635412, "grad_norm": 2.1171698570251465, "learning_rate": 7.99973710305313e-06, "loss": 0.8846, "step": 1058 }, { "epoch": 0.31673396141767607, "grad_norm": 2.4689996242523193, "learning_rate": 7.995859387419726e-06, "loss": 0.8971, "step": 1059 }, { "epoch": 0.3170330491999402, "grad_norm": 2.29070782661438, "learning_rate": 7.9919788586219e-06, "loss": 0.8935, "step": 1060 }, { "epoch": 0.31733213698220425, "grad_norm": 2.1644835472106934, "learning_rate": 7.988095520303539e-06, "loss": 0.9459, "step": 1061 }, { "epoch": 0.31763122476446837, "grad_norm": 2.1110427379608154, "learning_rate": 7.984209376111165e-06, "loss": 0.8766, "step": 1062 }, { "epoch": 0.3179303125467325, "grad_norm": 2.1129724979400635, "learning_rate": 7.980320429693934e-06, "loss": 0.9044, "step": 1063 }, { "epoch": 0.31822940032899655, "grad_norm": 2.2810609340667725, "learning_rate": 7.976428684703637e-06, "loss": 0.8955, "step": 1064 }, { "epoch": 0.31852848811126067, "grad_norm": 2.1541552543640137, "learning_rate": 7.97253414479469e-06, "loss": 0.8913, "step": 1065 }, { "epoch": 0.31882757589352473, "grad_norm": 2.3683271408081055, "learning_rate": 7.968636813624134e-06, "loss": 0.9012, "step": 1066 }, { "epoch": 0.31912666367578885, "grad_norm": 2.910146474838257, "learning_rate": 7.964736694851632e-06, "loss": 0.9507, "step": 1067 }, { "epoch": 0.31942575145805296, "grad_norm": 2.6115987300872803, "learning_rate": 7.960833792139461e-06, "loss": 0.8838, "step": 1068 }, { "epoch": 0.319724839240317, "grad_norm": 2.096325397491455, "learning_rate": 7.95692810915252e-06, "loss": 0.9307, "step": 1069 }, { "epoch": 0.32002392702258114, "grad_norm": 2.157743215560913, "learning_rate": 7.953019649558309e-06, "loss": 0.9106, "step": 1070 }, { "epoch": 0.3203230148048452, "grad_norm": 1.819732666015625, "learning_rate": 7.949108417026941e-06, "loss": 0.8854, "step": 1071 }, { "epoch": 0.3206221025871093, "grad_norm": 2.0978963375091553, "learning_rate": 7.945194415231133e-06, "loss": 0.9262, "step": 1072 }, { "epoch": 0.3209211903693734, "grad_norm": 2.1166536808013916, "learning_rate": 7.9412776478462e-06, "loss": 0.9518, "step": 1073 }, { "epoch": 0.3212202781516375, "grad_norm": 2.0870025157928467, "learning_rate": 7.937358118550058e-06, "loss": 0.8643, "step": 1074 }, { "epoch": 0.3215193659339016, "grad_norm": 1.9905844926834106, "learning_rate": 7.933435831023211e-06, "loss": 0.851, "step": 1075 }, { "epoch": 0.3218184537161657, "grad_norm": 2.0716114044189453, "learning_rate": 7.929510788948755e-06, "loss": 0.9127, "step": 1076 }, { "epoch": 0.3221175414984298, "grad_norm": 2.575514554977417, "learning_rate": 7.925582996012375e-06, "loss": 0.8733, "step": 1077 }, { "epoch": 0.32241662928069387, "grad_norm": 2.340322732925415, "learning_rate": 7.921652455902337e-06, "loss": 0.8499, "step": 1078 }, { "epoch": 0.322715717062958, "grad_norm": 2.0147621631622314, "learning_rate": 7.917719172309487e-06, "loss": 0.8763, "step": 1079 }, { "epoch": 0.32301480484522205, "grad_norm": 2.360645294189453, "learning_rate": 7.913783148927246e-06, "loss": 0.9299, "step": 1080 }, { "epoch": 0.32331389262748617, "grad_norm": 2.543062925338745, "learning_rate": 7.909844389451611e-06, "loss": 0.9558, "step": 1081 }, { "epoch": 0.3236129804097503, "grad_norm": 2.123185873031616, "learning_rate": 7.905902897581145e-06, "loss": 0.9162, "step": 1082 }, { "epoch": 0.32391206819201435, "grad_norm": 2.1914279460906982, "learning_rate": 7.901958677016977e-06, "loss": 0.8507, "step": 1083 }, { "epoch": 0.32421115597427846, "grad_norm": 2.2362060546875, "learning_rate": 7.898011731462801e-06, "loss": 0.9069, "step": 1084 }, { "epoch": 0.3245102437565425, "grad_norm": 2.07302188873291, "learning_rate": 7.894062064624865e-06, "loss": 0.8521, "step": 1085 }, { "epoch": 0.32480933153880664, "grad_norm": 2.4338855743408203, "learning_rate": 7.890109680211979e-06, "loss": 0.9655, "step": 1086 }, { "epoch": 0.3251084193210707, "grad_norm": 2.2268922328948975, "learning_rate": 7.886154581935499e-06, "loss": 0.8766, "step": 1087 }, { "epoch": 0.3254075071033348, "grad_norm": 2.1086525917053223, "learning_rate": 7.88219677350933e-06, "loss": 0.8879, "step": 1088 }, { "epoch": 0.32570659488559894, "grad_norm": 2.2017781734466553, "learning_rate": 7.878236258649927e-06, "loss": 0.9098, "step": 1089 }, { "epoch": 0.326005682667863, "grad_norm": 1.9805959463119507, "learning_rate": 7.874273041076283e-06, "loss": 0.866, "step": 1090 }, { "epoch": 0.3263047704501271, "grad_norm": 2.603660821914673, "learning_rate": 7.870307124509926e-06, "loss": 0.8826, "step": 1091 }, { "epoch": 0.3266038582323912, "grad_norm": 5.160576820373535, "learning_rate": 7.86633851267492e-06, "loss": 0.8682, "step": 1092 }, { "epoch": 0.3269029460146553, "grad_norm": 2.3987326622009277, "learning_rate": 7.862367209297864e-06, "loss": 0.9046, "step": 1093 }, { "epoch": 0.3272020337969194, "grad_norm": 2.1719844341278076, "learning_rate": 7.85839321810788e-06, "loss": 0.8858, "step": 1094 }, { "epoch": 0.3275011215791835, "grad_norm": 2.2696585655212402, "learning_rate": 7.854416542836617e-06, "loss": 0.9156, "step": 1095 }, { "epoch": 0.3278002093614476, "grad_norm": 2.1409432888031006, "learning_rate": 7.85043718721824e-06, "loss": 0.9588, "step": 1096 }, { "epoch": 0.32809929714371167, "grad_norm": 2.1379499435424805, "learning_rate": 7.846455154989437e-06, "loss": 0.836, "step": 1097 }, { "epoch": 0.3283983849259758, "grad_norm": 2.1989331245422363, "learning_rate": 7.842470449889403e-06, "loss": 0.8655, "step": 1098 }, { "epoch": 0.32869747270823985, "grad_norm": 2.1201183795928955, "learning_rate": 7.838483075659846e-06, "loss": 0.94, "step": 1099 }, { "epoch": 0.32899656049050396, "grad_norm": 2.4078776836395264, "learning_rate": 7.83449303604498e-06, "loss": 0.9153, "step": 1100 }, { "epoch": 0.3292956482727681, "grad_norm": 2.291666269302368, "learning_rate": 7.830500334791525e-06, "loss": 0.8775, "step": 1101 }, { "epoch": 0.32959473605503214, "grad_norm": 2.211853504180908, "learning_rate": 7.826504975648696e-06, "loss": 0.8186, "step": 1102 }, { "epoch": 0.32989382383729626, "grad_norm": 2.117403030395508, "learning_rate": 7.822506962368204e-06, "loss": 0.9242, "step": 1103 }, { "epoch": 0.3301929116195603, "grad_norm": 2.8207409381866455, "learning_rate": 7.818506298704254e-06, "loss": 0.9693, "step": 1104 }, { "epoch": 0.33049199940182444, "grad_norm": 2.3084661960601807, "learning_rate": 7.814502988413539e-06, "loss": 0.8818, "step": 1105 }, { "epoch": 0.3307910871840885, "grad_norm": 2.0569798946380615, "learning_rate": 7.810497035255239e-06, "loss": 0.8375, "step": 1106 }, { "epoch": 0.3310901749663526, "grad_norm": 2.1192378997802734, "learning_rate": 7.80648844299101e-06, "loss": 0.8936, "step": 1107 }, { "epoch": 0.33138926274861674, "grad_norm": 2.1618270874023438, "learning_rate": 7.802477215384997e-06, "loss": 0.8866, "step": 1108 }, { "epoch": 0.3316883505308808, "grad_norm": 2.058131694793701, "learning_rate": 7.79846335620381e-06, "loss": 0.8348, "step": 1109 }, { "epoch": 0.3319874383131449, "grad_norm": 2.0043764114379883, "learning_rate": 7.794446869216527e-06, "loss": 0.8737, "step": 1110 }, { "epoch": 0.332286526095409, "grad_norm": 2.0539302825927734, "learning_rate": 7.79042775819471e-06, "loss": 0.8649, "step": 1111 }, { "epoch": 0.3325856138776731, "grad_norm": 2.135998487472534, "learning_rate": 7.786406026912368e-06, "loss": 0.876, "step": 1112 }, { "epoch": 0.33288470165993717, "grad_norm": 2.245650291442871, "learning_rate": 7.782381679145979e-06, "loss": 0.9506, "step": 1113 }, { "epoch": 0.3331837894422013, "grad_norm": 2.231684684753418, "learning_rate": 7.778354718674475e-06, "loss": 0.9227, "step": 1114 }, { "epoch": 0.3334828772244654, "grad_norm": 2.085310697555542, "learning_rate": 7.774325149279243e-06, "loss": 0.8595, "step": 1115 }, { "epoch": 0.33378196500672946, "grad_norm": 2.4437756538391113, "learning_rate": 7.770292974744119e-06, "loss": 0.9083, "step": 1116 }, { "epoch": 0.3340810527889936, "grad_norm": 2.05690336227417, "learning_rate": 7.766258198855386e-06, "loss": 0.8608, "step": 1117 }, { "epoch": 0.33438014057125764, "grad_norm": 2.1497645378112793, "learning_rate": 7.76222082540177e-06, "loss": 0.9469, "step": 1118 }, { "epoch": 0.33467922835352176, "grad_norm": 1.937268614768982, "learning_rate": 7.758180858174434e-06, "loss": 0.8738, "step": 1119 }, { "epoch": 0.3349783161357859, "grad_norm": 2.017754316329956, "learning_rate": 7.754138300966978e-06, "loss": 0.9277, "step": 1120 }, { "epoch": 0.33527740391804994, "grad_norm": 2.316157817840576, "learning_rate": 7.750093157575433e-06, "loss": 0.8303, "step": 1121 }, { "epoch": 0.33557649170031406, "grad_norm": 1.9127347469329834, "learning_rate": 7.746045431798264e-06, "loss": 0.8449, "step": 1122 }, { "epoch": 0.3358755794825781, "grad_norm": 2.504678249359131, "learning_rate": 7.74199512743635e-06, "loss": 0.8706, "step": 1123 }, { "epoch": 0.33617466726484224, "grad_norm": 2.361868143081665, "learning_rate": 7.737942248293001e-06, "loss": 0.8485, "step": 1124 }, { "epoch": 0.3364737550471063, "grad_norm": 2.1803417205810547, "learning_rate": 7.733886798173945e-06, "loss": 0.9411, "step": 1125 }, { "epoch": 0.3367728428293704, "grad_norm": 2.341651439666748, "learning_rate": 7.729828780887313e-06, "loss": 0.8792, "step": 1126 }, { "epoch": 0.33707193061163454, "grad_norm": 3.289027214050293, "learning_rate": 7.72576820024366e-06, "loss": 0.887, "step": 1127 }, { "epoch": 0.3373710183938986, "grad_norm": 2.261749029159546, "learning_rate": 7.72170506005594e-06, "loss": 0.8627, "step": 1128 }, { "epoch": 0.3376701061761627, "grad_norm": 2.0747668743133545, "learning_rate": 7.717639364139514e-06, "loss": 0.8781, "step": 1129 }, { "epoch": 0.3379691939584268, "grad_norm": 2.042966842651367, "learning_rate": 7.713571116312143e-06, "loss": 0.9182, "step": 1130 }, { "epoch": 0.3382682817406909, "grad_norm": 2.126767158508301, "learning_rate": 7.709500320393976e-06, "loss": 0.8372, "step": 1131 }, { "epoch": 0.33856736952295496, "grad_norm": 2.3632869720458984, "learning_rate": 7.70542698020757e-06, "loss": 0.9122, "step": 1132 }, { "epoch": 0.3388664573052191, "grad_norm": 2.106071710586548, "learning_rate": 7.70135109957786e-06, "loss": 0.8712, "step": 1133 }, { "epoch": 0.3391655450874832, "grad_norm": 2.173466205596924, "learning_rate": 7.697272682332168e-06, "loss": 0.8518, "step": 1134 }, { "epoch": 0.33946463286974726, "grad_norm": 3.3881030082702637, "learning_rate": 7.6931917323002e-06, "loss": 0.927, "step": 1135 }, { "epoch": 0.3397637206520114, "grad_norm": 2.172980785369873, "learning_rate": 7.689108253314038e-06, "loss": 0.8534, "step": 1136 }, { "epoch": 0.34006280843427544, "grad_norm": 2.206653594970703, "learning_rate": 7.685022249208142e-06, "loss": 0.8989, "step": 1137 }, { "epoch": 0.34036189621653956, "grad_norm": 2.3624019622802734, "learning_rate": 7.680933723819343e-06, "loss": 0.9005, "step": 1138 }, { "epoch": 0.3406609839988036, "grad_norm": 2.09505295753479, "learning_rate": 7.676842680986836e-06, "loss": 0.8593, "step": 1139 }, { "epoch": 0.34096007178106774, "grad_norm": 2.2383313179016113, "learning_rate": 7.67274912455218e-06, "loss": 0.8974, "step": 1140 }, { "epoch": 0.34125915956333186, "grad_norm": 2.109877347946167, "learning_rate": 7.6686530583593e-06, "loss": 0.9207, "step": 1141 }, { "epoch": 0.3415582473455959, "grad_norm": 2.1144893169403076, "learning_rate": 7.664554486254468e-06, "loss": 0.9756, "step": 1142 }, { "epoch": 0.34185733512786004, "grad_norm": 2.1827242374420166, "learning_rate": 7.660453412086323e-06, "loss": 0.8405, "step": 1143 }, { "epoch": 0.3421564229101241, "grad_norm": 2.243661403656006, "learning_rate": 7.656349839705838e-06, "loss": 0.8686, "step": 1144 }, { "epoch": 0.3424555106923882, "grad_norm": 2.1092488765716553, "learning_rate": 7.652243772966345e-06, "loss": 0.9636, "step": 1145 }, { "epoch": 0.3427545984746523, "grad_norm": 2.1706831455230713, "learning_rate": 7.648135215723511e-06, "loss": 0.932, "step": 1146 }, { "epoch": 0.3430536862569164, "grad_norm": 2.085942506790161, "learning_rate": 7.64402417183534e-06, "loss": 0.8828, "step": 1147 }, { "epoch": 0.3433527740391805, "grad_norm": 1.9786372184753418, "learning_rate": 7.639910645162179e-06, "loss": 0.8429, "step": 1148 }, { "epoch": 0.3436518618214446, "grad_norm": 2.2009172439575195, "learning_rate": 7.635794639566697e-06, "loss": 0.8433, "step": 1149 }, { "epoch": 0.3439509496037087, "grad_norm": 2.125955581665039, "learning_rate": 7.631676158913899e-06, "loss": 0.8367, "step": 1150 }, { "epoch": 0.34425003738597276, "grad_norm": 2.240755081176758, "learning_rate": 7.627555207071108e-06, "loss": 0.9468, "step": 1151 }, { "epoch": 0.3445491251682369, "grad_norm": 2.338275671005249, "learning_rate": 7.623431787907971e-06, "loss": 0.8796, "step": 1152 }, { "epoch": 0.344848212950501, "grad_norm": 2.1945836544036865, "learning_rate": 7.61930590529645e-06, "loss": 0.9221, "step": 1153 }, { "epoch": 0.34514730073276506, "grad_norm": 2.0636916160583496, "learning_rate": 7.6151775631108245e-06, "loss": 0.8659, "step": 1154 }, { "epoch": 0.3454463885150292, "grad_norm": 2.5366384983062744, "learning_rate": 7.611046765227675e-06, "loss": 0.8716, "step": 1155 }, { "epoch": 0.34574547629729324, "grad_norm": 2.136756658554077, "learning_rate": 7.606913515525896e-06, "loss": 0.8756, "step": 1156 }, { "epoch": 0.34604456407955736, "grad_norm": 2.010030508041382, "learning_rate": 7.602777817886678e-06, "loss": 0.8625, "step": 1157 }, { "epoch": 0.3463436518618214, "grad_norm": 2.30840802192688, "learning_rate": 7.59863967619352e-06, "loss": 0.9193, "step": 1158 }, { "epoch": 0.34664273964408554, "grad_norm": 1.8152525424957275, "learning_rate": 7.594499094332204e-06, "loss": 0.902, "step": 1159 }, { "epoch": 0.34694182742634966, "grad_norm": 2.1894428730010986, "learning_rate": 7.59035607619081e-06, "loss": 0.8237, "step": 1160 }, { "epoch": 0.3472409152086137, "grad_norm": 3.0160727500915527, "learning_rate": 7.586210625659707e-06, "loss": 0.9296, "step": 1161 }, { "epoch": 0.34754000299087784, "grad_norm": 2.040940046310425, "learning_rate": 7.582062746631542e-06, "loss": 0.8782, "step": 1162 }, { "epoch": 0.3478390907731419, "grad_norm": 2.0317156314849854, "learning_rate": 7.577912443001247e-06, "loss": 0.8564, "step": 1163 }, { "epoch": 0.348138178555406, "grad_norm": 2.3552584648132324, "learning_rate": 7.573759718666031e-06, "loss": 0.9416, "step": 1164 }, { "epoch": 0.3484372663376701, "grad_norm": 2.030806303024292, "learning_rate": 7.569604577525376e-06, "loss": 0.881, "step": 1165 }, { "epoch": 0.3487363541199342, "grad_norm": 1.9438731670379639, "learning_rate": 7.56544702348103e-06, "loss": 0.9088, "step": 1166 }, { "epoch": 0.3490354419021983, "grad_norm": 2.209420919418335, "learning_rate": 7.5612870604370106e-06, "loss": 0.8258, "step": 1167 }, { "epoch": 0.3493345296844624, "grad_norm": 2.3377366065979004, "learning_rate": 7.557124692299593e-06, "loss": 0.9203, "step": 1168 }, { "epoch": 0.3496336174667265, "grad_norm": 2.6400041580200195, "learning_rate": 7.552959922977317e-06, "loss": 0.896, "step": 1169 }, { "epoch": 0.34993270524899056, "grad_norm": 2.0778143405914307, "learning_rate": 7.548792756380972e-06, "loss": 0.8375, "step": 1170 }, { "epoch": 0.3502317930312547, "grad_norm": 2.2017855644226074, "learning_rate": 7.5446231964236025e-06, "loss": 0.8477, "step": 1171 }, { "epoch": 0.35053088081351874, "grad_norm": 2.0633323192596436, "learning_rate": 7.540451247020495e-06, "loss": 0.9097, "step": 1172 }, { "epoch": 0.35082996859578286, "grad_norm": 1.9257756471633911, "learning_rate": 7.536276912089187e-06, "loss": 0.9236, "step": 1173 }, { "epoch": 0.351129056378047, "grad_norm": 2.0126540660858154, "learning_rate": 7.53210019554945e-06, "loss": 0.9088, "step": 1174 }, { "epoch": 0.35142814416031104, "grad_norm": 2.171578884124756, "learning_rate": 7.527921101323292e-06, "loss": 0.9635, "step": 1175 }, { "epoch": 0.35172723194257516, "grad_norm": 1.8915561437606812, "learning_rate": 7.523739633334959e-06, "loss": 0.9117, "step": 1176 }, { "epoch": 0.3520263197248392, "grad_norm": 2.2859981060028076, "learning_rate": 7.5195557955109225e-06, "loss": 0.8845, "step": 1177 }, { "epoch": 0.35232540750710334, "grad_norm": 2.0096209049224854, "learning_rate": 7.515369591779876e-06, "loss": 0.9028, "step": 1178 }, { "epoch": 0.35262449528936746, "grad_norm": 2.0442938804626465, "learning_rate": 7.511181026072741e-06, "loss": 0.8748, "step": 1179 }, { "epoch": 0.3529235830716315, "grad_norm": 2.679999828338623, "learning_rate": 7.5069901023226545e-06, "loss": 0.9493, "step": 1180 }, { "epoch": 0.35322267085389564, "grad_norm": 2.1136202812194824, "learning_rate": 7.502796824464966e-06, "loss": 0.8618, "step": 1181 }, { "epoch": 0.3535217586361597, "grad_norm": 2.3624465465545654, "learning_rate": 7.498601196437238e-06, "loss": 0.8938, "step": 1182 }, { "epoch": 0.3538208464184238, "grad_norm": 2.252858877182007, "learning_rate": 7.494403222179235e-06, "loss": 0.9307, "step": 1183 }, { "epoch": 0.3541199342006879, "grad_norm": 1.959756851196289, "learning_rate": 7.490202905632933e-06, "loss": 0.9054, "step": 1184 }, { "epoch": 0.354419021982952, "grad_norm": 2.089568853378296, "learning_rate": 7.4860002507425004e-06, "loss": 0.8152, "step": 1185 }, { "epoch": 0.3547181097652161, "grad_norm": 2.2209372520446777, "learning_rate": 7.481795261454304e-06, "loss": 0.9346, "step": 1186 }, { "epoch": 0.3550171975474802, "grad_norm": 1.96195387840271, "learning_rate": 7.477587941716904e-06, "loss": 0.8953, "step": 1187 }, { "epoch": 0.3553162853297443, "grad_norm": 1.9518834352493286, "learning_rate": 7.4733782954810444e-06, "loss": 0.8784, "step": 1188 }, { "epoch": 0.35561537311200836, "grad_norm": 2.2924342155456543, "learning_rate": 7.469166326699658e-06, "loss": 0.9139, "step": 1189 }, { "epoch": 0.3559144608942725, "grad_norm": 1.926132082939148, "learning_rate": 7.4649520393278575e-06, "loss": 0.8736, "step": 1190 }, { "epoch": 0.35621354867653654, "grad_norm": 2.0872929096221924, "learning_rate": 7.460735437322933e-06, "loss": 0.8312, "step": 1191 }, { "epoch": 0.35651263645880066, "grad_norm": 2.3652865886688232, "learning_rate": 7.456516524644347e-06, "loss": 0.9327, "step": 1192 }, { "epoch": 0.3568117242410648, "grad_norm": 2.0446813106536865, "learning_rate": 7.452295305253731e-06, "loss": 0.868, "step": 1193 }, { "epoch": 0.35711081202332884, "grad_norm": 2.0284907817840576, "learning_rate": 7.448071783114887e-06, "loss": 0.8397, "step": 1194 }, { "epoch": 0.35740989980559296, "grad_norm": 1.9754235744476318, "learning_rate": 7.443845962193775e-06, "loss": 0.8364, "step": 1195 }, { "epoch": 0.357708987587857, "grad_norm": 2.0439059734344482, "learning_rate": 7.439617846458513e-06, "loss": 0.8808, "step": 1196 }, { "epoch": 0.35800807537012114, "grad_norm": 2.238527297973633, "learning_rate": 7.435387439879378e-06, "loss": 0.9408, "step": 1197 }, { "epoch": 0.3583071631523852, "grad_norm": 2.151413917541504, "learning_rate": 7.431154746428794e-06, "loss": 0.8905, "step": 1198 }, { "epoch": 0.3586062509346493, "grad_norm": 2.4831395149230957, "learning_rate": 7.4269197700813375e-06, "loss": 0.8881, "step": 1199 }, { "epoch": 0.35890533871691344, "grad_norm": 2.072937488555908, "learning_rate": 7.4226825148137225e-06, "loss": 0.8576, "step": 1200 }, { "epoch": 0.3592044264991775, "grad_norm": 2.2575669288635254, "learning_rate": 7.418442984604805e-06, "loss": 0.965, "step": 1201 }, { "epoch": 0.3595035142814416, "grad_norm": 2.3030834197998047, "learning_rate": 7.414201183435581e-06, "loss": 0.8716, "step": 1202 }, { "epoch": 0.3598026020637057, "grad_norm": 2.022218704223633, "learning_rate": 7.409957115289175e-06, "loss": 0.8812, "step": 1203 }, { "epoch": 0.3601016898459698, "grad_norm": 3.501890182495117, "learning_rate": 7.40571078415084e-06, "loss": 0.904, "step": 1204 }, { "epoch": 0.36040077762823386, "grad_norm": 2.0927236080169678, "learning_rate": 7.401462194007957e-06, "loss": 0.9034, "step": 1205 }, { "epoch": 0.360699865410498, "grad_norm": 2.082144021987915, "learning_rate": 7.397211348850025e-06, "loss": 0.8188, "step": 1206 }, { "epoch": 0.3609989531927621, "grad_norm": 2.34885835647583, "learning_rate": 7.392958252668663e-06, "loss": 0.898, "step": 1207 }, { "epoch": 0.36129804097502616, "grad_norm": 2.5679993629455566, "learning_rate": 7.388702909457603e-06, "loss": 0.9959, "step": 1208 }, { "epoch": 0.3615971287572903, "grad_norm": 1.9691216945648193, "learning_rate": 7.384445323212687e-06, "loss": 0.85, "step": 1209 }, { "epoch": 0.36189621653955434, "grad_norm": 1.9078412055969238, "learning_rate": 7.380185497931862e-06, "loss": 0.8748, "step": 1210 }, { "epoch": 0.36219530432181846, "grad_norm": 2.0111396312713623, "learning_rate": 7.375923437615179e-06, "loss": 0.8607, "step": 1211 }, { "epoch": 0.3624943921040826, "grad_norm": 1.8407340049743652, "learning_rate": 7.371659146264787e-06, "loss": 0.8558, "step": 1212 }, { "epoch": 0.36279347988634664, "grad_norm": 2.1526038646698, "learning_rate": 7.367392627884931e-06, "loss": 0.8572, "step": 1213 }, { "epoch": 0.36309256766861076, "grad_norm": 2.1646926403045654, "learning_rate": 7.363123886481947e-06, "loss": 0.9037, "step": 1214 }, { "epoch": 0.3633916554508748, "grad_norm": 2.0580105781555176, "learning_rate": 7.3588529260642564e-06, "loss": 0.9252, "step": 1215 }, { "epoch": 0.36369074323313894, "grad_norm": 3.504513740539551, "learning_rate": 7.3545797506423655e-06, "loss": 0.863, "step": 1216 }, { "epoch": 0.363989831015403, "grad_norm": 1.9755237102508545, "learning_rate": 7.3503043642288614e-06, "loss": 0.8212, "step": 1217 }, { "epoch": 0.3642889187976671, "grad_norm": 2.4403834342956543, "learning_rate": 7.3460267708384084e-06, "loss": 0.8984, "step": 1218 }, { "epoch": 0.36458800657993123, "grad_norm": 1.9869738817214966, "learning_rate": 7.3417469744877375e-06, "loss": 0.8825, "step": 1219 }, { "epoch": 0.3648870943621953, "grad_norm": 2.0719242095947266, "learning_rate": 7.337464979195658e-06, "loss": 0.8607, "step": 1220 }, { "epoch": 0.3651861821444594, "grad_norm": 2.0225322246551514, "learning_rate": 7.333180788983034e-06, "loss": 0.8798, "step": 1221 }, { "epoch": 0.3654852699267235, "grad_norm": 2.046408176422119, "learning_rate": 7.328894407872797e-06, "loss": 0.932, "step": 1222 }, { "epoch": 0.3657843577089876, "grad_norm": 2.867398262023926, "learning_rate": 7.324605839889936e-06, "loss": 0.9085, "step": 1223 }, { "epoch": 0.36608344549125166, "grad_norm": 2.5240366458892822, "learning_rate": 7.320315089061486e-06, "loss": 0.956, "step": 1224 }, { "epoch": 0.3663825332735158, "grad_norm": 2.6441283226013184, "learning_rate": 7.3160221594165415e-06, "loss": 0.8927, "step": 1225 }, { "epoch": 0.3666816210557799, "grad_norm": 3.0056354999542236, "learning_rate": 7.3117270549862385e-06, "loss": 0.8722, "step": 1226 }, { "epoch": 0.36698070883804396, "grad_norm": 1.8995763063430786, "learning_rate": 7.3074297798037515e-06, "loss": 0.8394, "step": 1227 }, { "epoch": 0.3672797966203081, "grad_norm": 1.9596030712127686, "learning_rate": 7.303130337904303e-06, "loss": 0.861, "step": 1228 }, { "epoch": 0.36757888440257214, "grad_norm": 2.03892183303833, "learning_rate": 7.298828733325138e-06, "loss": 0.9577, "step": 1229 }, { "epoch": 0.36787797218483625, "grad_norm": 1.9954408407211304, "learning_rate": 7.294524970105543e-06, "loss": 0.9529, "step": 1230 }, { "epoch": 0.3681770599671003, "grad_norm": 2.1314857006073, "learning_rate": 7.290219052286826e-06, "loss": 0.8712, "step": 1231 }, { "epoch": 0.36847614774936444, "grad_norm": 1.9649707078933716, "learning_rate": 7.285910983912317e-06, "loss": 0.8842, "step": 1232 }, { "epoch": 0.36877523553162855, "grad_norm": 1.9674016237258911, "learning_rate": 7.281600769027371e-06, "loss": 0.8521, "step": 1233 }, { "epoch": 0.3690743233138926, "grad_norm": 2.0994722843170166, "learning_rate": 7.277288411679352e-06, "loss": 0.8957, "step": 1234 }, { "epoch": 0.36937341109615673, "grad_norm": 3.112894058227539, "learning_rate": 7.272973915917642e-06, "loss": 0.8898, "step": 1235 }, { "epoch": 0.3696724988784208, "grad_norm": 2.205960512161255, "learning_rate": 7.268657285793625e-06, "loss": 0.8636, "step": 1236 }, { "epoch": 0.3699715866606849, "grad_norm": 2.0472023487091064, "learning_rate": 7.264338525360695e-06, "loss": 0.9346, "step": 1237 }, { "epoch": 0.37027067444294903, "grad_norm": 1.9783473014831543, "learning_rate": 7.260017638674244e-06, "loss": 0.8592, "step": 1238 }, { "epoch": 0.3705697622252131, "grad_norm": 1.916250467300415, "learning_rate": 7.255694629791659e-06, "loss": 0.8718, "step": 1239 }, { "epoch": 0.3708688500074772, "grad_norm": 1.9302411079406738, "learning_rate": 7.251369502772318e-06, "loss": 0.851, "step": 1240 }, { "epoch": 0.3711679377897413, "grad_norm": 2.1196632385253906, "learning_rate": 7.247042261677597e-06, "loss": 0.8597, "step": 1241 }, { "epoch": 0.3714670255720054, "grad_norm": 2.077707290649414, "learning_rate": 7.242712910570846e-06, "loss": 0.9129, "step": 1242 }, { "epoch": 0.37176611335426946, "grad_norm": 2.0523624420166016, "learning_rate": 7.238381453517405e-06, "loss": 0.8518, "step": 1243 }, { "epoch": 0.3720652011365336, "grad_norm": 2.268247365951538, "learning_rate": 7.234047894584586e-06, "loss": 0.8988, "step": 1244 }, { "epoch": 0.3723642889187977, "grad_norm": 2.2769861221313477, "learning_rate": 7.229712237841679e-06, "loss": 0.9792, "step": 1245 }, { "epoch": 0.37266337670106175, "grad_norm": 2.2180655002593994, "learning_rate": 7.225374487359937e-06, "loss": 0.8797, "step": 1246 }, { "epoch": 0.3729624644833259, "grad_norm": 2.1580779552459717, "learning_rate": 7.221034647212588e-06, "loss": 0.8424, "step": 1247 }, { "epoch": 0.37326155226558994, "grad_norm": 1.9612467288970947, "learning_rate": 7.216692721474816e-06, "loss": 0.8962, "step": 1248 }, { "epoch": 0.37356064004785405, "grad_norm": 1.9946616888046265, "learning_rate": 7.212348714223767e-06, "loss": 0.8891, "step": 1249 }, { "epoch": 0.3738597278301181, "grad_norm": 2.354027509689331, "learning_rate": 7.208002629538537e-06, "loss": 0.961, "step": 1250 }, { "epoch": 0.37415881561238223, "grad_norm": 2.660604238510132, "learning_rate": 7.203654471500179e-06, "loss": 0.916, "step": 1251 }, { "epoch": 0.37445790339464635, "grad_norm": 2.0297999382019043, "learning_rate": 7.199304244191687e-06, "loss": 0.9119, "step": 1252 }, { "epoch": 0.3747569911769104, "grad_norm": 2.479250907897949, "learning_rate": 7.1949519516980005e-06, "loss": 0.8735, "step": 1253 }, { "epoch": 0.37505607895917453, "grad_norm": 2.062535285949707, "learning_rate": 7.190597598106001e-06, "loss": 0.8888, "step": 1254 }, { "epoch": 0.3753551667414386, "grad_norm": 2.3791897296905518, "learning_rate": 7.186241187504499e-06, "loss": 0.9029, "step": 1255 }, { "epoch": 0.3756542545237027, "grad_norm": 2.263935089111328, "learning_rate": 7.1818827239842446e-06, "loss": 0.8815, "step": 1256 }, { "epoch": 0.3759533423059668, "grad_norm": 2.1895177364349365, "learning_rate": 7.177522211637906e-06, "loss": 0.8636, "step": 1257 }, { "epoch": 0.3762524300882309, "grad_norm": 2.2431154251098633, "learning_rate": 7.173159654560087e-06, "loss": 0.917, "step": 1258 }, { "epoch": 0.376551517870495, "grad_norm": 2.231492280960083, "learning_rate": 7.168795056847301e-06, "loss": 0.8775, "step": 1259 }, { "epoch": 0.3768506056527591, "grad_norm": 3.1531014442443848, "learning_rate": 7.164428422597982e-06, "loss": 0.8927, "step": 1260 }, { "epoch": 0.3771496934350232, "grad_norm": 2.3681740760803223, "learning_rate": 7.1600597559124765e-06, "loss": 0.9022, "step": 1261 }, { "epoch": 0.37744878121728725, "grad_norm": 1.8653111457824707, "learning_rate": 7.155689060893038e-06, "loss": 0.8394, "step": 1262 }, { "epoch": 0.3777478689995514, "grad_norm": 2.056736946105957, "learning_rate": 7.151316341643828e-06, "loss": 0.939, "step": 1263 }, { "epoch": 0.37804695678181544, "grad_norm": 2.360625982284546, "learning_rate": 7.146941602270905e-06, "loss": 0.8903, "step": 1264 }, { "epoch": 0.37834604456407955, "grad_norm": 2.1230721473693848, "learning_rate": 7.142564846882227e-06, "loss": 0.8837, "step": 1265 }, { "epoch": 0.37864513234634367, "grad_norm": 2.260882616043091, "learning_rate": 7.1381860795876415e-06, "loss": 0.8897, "step": 1266 }, { "epoch": 0.37894422012860773, "grad_norm": 2.341684579849243, "learning_rate": 7.13380530449889e-06, "loss": 0.9247, "step": 1267 }, { "epoch": 0.37924330791087185, "grad_norm": 2.3639349937438965, "learning_rate": 7.129422525729594e-06, "loss": 0.8895, "step": 1268 }, { "epoch": 0.3795423956931359, "grad_norm": 2.0694580078125, "learning_rate": 7.125037747395264e-06, "loss": 0.9142, "step": 1269 }, { "epoch": 0.37984148347540003, "grad_norm": 2.2114362716674805, "learning_rate": 7.120650973613279e-06, "loss": 0.8983, "step": 1270 }, { "epoch": 0.38014057125766415, "grad_norm": 2.0884993076324463, "learning_rate": 7.116262208502901e-06, "loss": 0.9299, "step": 1271 }, { "epoch": 0.3804396590399282, "grad_norm": 2.7368602752685547, "learning_rate": 7.111871456185253e-06, "loss": 0.9581, "step": 1272 }, { "epoch": 0.38073874682219233, "grad_norm": 2.0372238159179688, "learning_rate": 7.107478720783332e-06, "loss": 0.8375, "step": 1273 }, { "epoch": 0.3810378346044564, "grad_norm": 2.062199354171753, "learning_rate": 7.1030840064219906e-06, "loss": 0.8586, "step": 1274 }, { "epoch": 0.3813369223867205, "grad_norm": 2.0916006565093994, "learning_rate": 7.098687317227943e-06, "loss": 0.8436, "step": 1275 }, { "epoch": 0.3816360101689846, "grad_norm": 3.2354373931884766, "learning_rate": 7.09428865732976e-06, "loss": 0.8561, "step": 1276 }, { "epoch": 0.3819350979512487, "grad_norm": 2.2136874198913574, "learning_rate": 7.089888030857857e-06, "loss": 0.8683, "step": 1277 }, { "epoch": 0.3822341857335128, "grad_norm": 2.2299437522888184, "learning_rate": 7.0854854419445e-06, "loss": 0.9079, "step": 1278 }, { "epoch": 0.3825332735157769, "grad_norm": 2.0043888092041016, "learning_rate": 7.0810808947237975e-06, "loss": 0.8632, "step": 1279 }, { "epoch": 0.382832361298041, "grad_norm": 2.0726428031921387, "learning_rate": 7.076674393331697e-06, "loss": 0.8782, "step": 1280 }, { "epoch": 0.38313144908030505, "grad_norm": 2.341015577316284, "learning_rate": 7.0722659419059806e-06, "loss": 0.8188, "step": 1281 }, { "epoch": 0.38343053686256917, "grad_norm": 2.1638436317443848, "learning_rate": 7.0678555445862605e-06, "loss": 0.9162, "step": 1282 }, { "epoch": 0.38372962464483323, "grad_norm": 2.074019193649292, "learning_rate": 7.063443205513975e-06, "loss": 0.8906, "step": 1283 }, { "epoch": 0.38402871242709735, "grad_norm": 2.3808162212371826, "learning_rate": 7.059028928832394e-06, "loss": 0.8561, "step": 1284 }, { "epoch": 0.38432780020936147, "grad_norm": 2.4247355461120605, "learning_rate": 7.054612718686593e-06, "loss": 0.8707, "step": 1285 }, { "epoch": 0.38462688799162553, "grad_norm": 2.2808637619018555, "learning_rate": 7.0501945792234776e-06, "loss": 0.9352, "step": 1286 }, { "epoch": 0.38492597577388965, "grad_norm": 2.012786865234375, "learning_rate": 7.045774514591753e-06, "loss": 0.8899, "step": 1287 }, { "epoch": 0.3852250635561537, "grad_norm": 2.1573755741119385, "learning_rate": 7.041352528941939e-06, "loss": 0.8345, "step": 1288 }, { "epoch": 0.38552415133841783, "grad_norm": 2.168395757675171, "learning_rate": 7.036928626426358e-06, "loss": 0.9002, "step": 1289 }, { "epoch": 0.3858232391206819, "grad_norm": 2.1716387271881104, "learning_rate": 7.0325028111991325e-06, "loss": 0.8961, "step": 1290 }, { "epoch": 0.386122326902946, "grad_norm": 2.0443410873413086, "learning_rate": 7.02807508741618e-06, "loss": 0.9371, "step": 1291 }, { "epoch": 0.38642141468521013, "grad_norm": 2.012843370437622, "learning_rate": 7.0236454592352065e-06, "loss": 0.9642, "step": 1292 }, { "epoch": 0.3867205024674742, "grad_norm": 2.497241735458374, "learning_rate": 7.019213930815718e-06, "loss": 0.81, "step": 1293 }, { "epoch": 0.3870195902497383, "grad_norm": 1.9988161325454712, "learning_rate": 7.01478050631899e-06, "loss": 0.8626, "step": 1294 }, { "epoch": 0.3873186780320024, "grad_norm": 2.1592330932617188, "learning_rate": 7.010345189908092e-06, "loss": 0.9268, "step": 1295 }, { "epoch": 0.3876177658142665, "grad_norm": 2.2487714290618896, "learning_rate": 7.0059079857478596e-06, "loss": 0.9445, "step": 1296 }, { "epoch": 0.3879168535965306, "grad_norm": 2.8408148288726807, "learning_rate": 7.001468898004907e-06, "loss": 0.8775, "step": 1297 }, { "epoch": 0.38821594137879467, "grad_norm": 2.17474627494812, "learning_rate": 6.997027930847614e-06, "loss": 0.9132, "step": 1298 }, { "epoch": 0.3885150291610588, "grad_norm": 2.2820117473602295, "learning_rate": 6.992585088446129e-06, "loss": 0.9293, "step": 1299 }, { "epoch": 0.38881411694332285, "grad_norm": 2.313544750213623, "learning_rate": 6.988140374972357e-06, "loss": 0.9317, "step": 1300 }, { "epoch": 0.38911320472558697, "grad_norm": 3.1286370754241943, "learning_rate": 6.983693794599959e-06, "loss": 0.8257, "step": 1301 }, { "epoch": 0.38941229250785103, "grad_norm": 2.1234028339385986, "learning_rate": 6.979245351504358e-06, "loss": 0.873, "step": 1302 }, { "epoch": 0.38971138029011515, "grad_norm": 2.1605453491210938, "learning_rate": 6.974795049862715e-06, "loss": 0.9154, "step": 1303 }, { "epoch": 0.39001046807237927, "grad_norm": 2.0435287952423096, "learning_rate": 6.970342893853943e-06, "loss": 0.8755, "step": 1304 }, { "epoch": 0.39030955585464333, "grad_norm": 2.253572463989258, "learning_rate": 6.965888887658695e-06, "loss": 0.8839, "step": 1305 }, { "epoch": 0.39060864363690745, "grad_norm": 2.8019046783447266, "learning_rate": 6.961433035459361e-06, "loss": 0.8291, "step": 1306 }, { "epoch": 0.3909077314191715, "grad_norm": 2.07292103767395, "learning_rate": 6.956975341440061e-06, "loss": 0.8599, "step": 1307 }, { "epoch": 0.39120681920143563, "grad_norm": 2.2087161540985107, "learning_rate": 6.952515809786652e-06, "loss": 0.7995, "step": 1308 }, { "epoch": 0.3915059069836997, "grad_norm": 2.359589099884033, "learning_rate": 6.948054444686709e-06, "loss": 0.8787, "step": 1309 }, { "epoch": 0.3918049947659638, "grad_norm": 2.228322982788086, "learning_rate": 6.943591250329534e-06, "loss": 0.8435, "step": 1310 }, { "epoch": 0.39210408254822793, "grad_norm": 2.437018632888794, "learning_rate": 6.939126230906144e-06, "loss": 0.9286, "step": 1311 }, { "epoch": 0.392403170330492, "grad_norm": 2.120849370956421, "learning_rate": 6.934659390609271e-06, "loss": 0.9308, "step": 1312 }, { "epoch": 0.3927022581127561, "grad_norm": 2.212224006652832, "learning_rate": 6.930190733633355e-06, "loss": 0.8822, "step": 1313 }, { "epoch": 0.39300134589502017, "grad_norm": 2.5588886737823486, "learning_rate": 6.925720264174543e-06, "loss": 0.9232, "step": 1314 }, { "epoch": 0.3933004336772843, "grad_norm": 1.9439200162887573, "learning_rate": 6.921247986430686e-06, "loss": 0.842, "step": 1315 }, { "epoch": 0.39359952145954835, "grad_norm": 2.1173386573791504, "learning_rate": 6.9167739046013305e-06, "loss": 0.8909, "step": 1316 }, { "epoch": 0.39389860924181247, "grad_norm": 2.2649195194244385, "learning_rate": 6.912298022887716e-06, "loss": 0.9528, "step": 1317 }, { "epoch": 0.3941976970240766, "grad_norm": 2.464510917663574, "learning_rate": 6.907820345492775e-06, "loss": 0.8315, "step": 1318 }, { "epoch": 0.39449678480634065, "grad_norm": 2.4125235080718994, "learning_rate": 6.903340876621125e-06, "loss": 0.8804, "step": 1319 }, { "epoch": 0.39479587258860477, "grad_norm": 2.3536124229431152, "learning_rate": 6.8988596204790655e-06, "loss": 0.89, "step": 1320 }, { "epoch": 0.39509496037086883, "grad_norm": 2.231170177459717, "learning_rate": 6.894376581274578e-06, "loss": 0.944, "step": 1321 }, { "epoch": 0.39539404815313295, "grad_norm": 2.029658555984497, "learning_rate": 6.889891763217307e-06, "loss": 0.9074, "step": 1322 }, { "epoch": 0.39569313593539707, "grad_norm": 2.3207802772521973, "learning_rate": 6.8854051705185825e-06, "loss": 0.9324, "step": 1323 }, { "epoch": 0.39599222371766113, "grad_norm": 2.5107626914978027, "learning_rate": 6.880916807391388e-06, "loss": 0.8503, "step": 1324 }, { "epoch": 0.39629131149992525, "grad_norm": 2.097092866897583, "learning_rate": 6.876426678050379e-06, "loss": 0.8834, "step": 1325 }, { "epoch": 0.3965903992821893, "grad_norm": 2.1833341121673584, "learning_rate": 6.871934786711866e-06, "loss": 0.9051, "step": 1326 }, { "epoch": 0.39688948706445343, "grad_norm": 2.198503255844116, "learning_rate": 6.86744113759381e-06, "loss": 0.853, "step": 1327 }, { "epoch": 0.3971885748467175, "grad_norm": 2.312847852706909, "learning_rate": 6.862945734915829e-06, "loss": 0.8508, "step": 1328 }, { "epoch": 0.3974876626289816, "grad_norm": 1.9972444772720337, "learning_rate": 6.858448582899183e-06, "loss": 0.8712, "step": 1329 }, { "epoch": 0.3977867504112457, "grad_norm": 2.370939016342163, "learning_rate": 6.8539496857667785e-06, "loss": 0.946, "step": 1330 }, { "epoch": 0.3980858381935098, "grad_norm": 2.0037453174591064, "learning_rate": 6.849449047743158e-06, "loss": 0.9128, "step": 1331 }, { "epoch": 0.3983849259757739, "grad_norm": 2.085775375366211, "learning_rate": 6.844946673054498e-06, "loss": 0.9573, "step": 1332 }, { "epoch": 0.39868401375803797, "grad_norm": 2.3400015830993652, "learning_rate": 6.840442565928609e-06, "loss": 0.9057, "step": 1333 }, { "epoch": 0.3989831015403021, "grad_norm": 3.154818296432495, "learning_rate": 6.8359367305949256e-06, "loss": 0.936, "step": 1334 }, { "epoch": 0.39928218932256615, "grad_norm": 2.1518290042877197, "learning_rate": 6.831429171284506e-06, "loss": 0.8431, "step": 1335 }, { "epoch": 0.39958127710483027, "grad_norm": 2.1221060752868652, "learning_rate": 6.8269198922300274e-06, "loss": 0.8526, "step": 1336 }, { "epoch": 0.3998803648870944, "grad_norm": 2.02982234954834, "learning_rate": 6.822408897665782e-06, "loss": 0.8875, "step": 1337 }, { "epoch": 0.40017945266935845, "grad_norm": 2.2443315982818604, "learning_rate": 6.817896191827673e-06, "loss": 0.9422, "step": 1338 }, { "epoch": 0.40047854045162257, "grad_norm": 2.8562281131744385, "learning_rate": 6.81338177895321e-06, "loss": 0.944, "step": 1339 }, { "epoch": 0.40077762823388663, "grad_norm": 2.538736343383789, "learning_rate": 6.808865663281504e-06, "loss": 0.9058, "step": 1340 }, { "epoch": 0.40107671601615075, "grad_norm": 1.9902311563491821, "learning_rate": 6.8043478490532695e-06, "loss": 0.9178, "step": 1341 }, { "epoch": 0.4013758037984148, "grad_norm": 2.3839797973632812, "learning_rate": 6.799828340510811e-06, "loss": 0.9202, "step": 1342 }, { "epoch": 0.4016748915806789, "grad_norm": 2.066250801086426, "learning_rate": 6.795307141898027e-06, "loss": 0.9348, "step": 1343 }, { "epoch": 0.40197397936294305, "grad_norm": 2.2753660678863525, "learning_rate": 6.790784257460403e-06, "loss": 0.8549, "step": 1344 }, { "epoch": 0.4022730671452071, "grad_norm": 2.111600160598755, "learning_rate": 6.786259691445005e-06, "loss": 0.9319, "step": 1345 }, { "epoch": 0.4025721549274712, "grad_norm": 2.012474298477173, "learning_rate": 6.781733448100482e-06, "loss": 0.908, "step": 1346 }, { "epoch": 0.4028712427097353, "grad_norm": 2.0629093647003174, "learning_rate": 6.777205531677052e-06, "loss": 0.9229, "step": 1347 }, { "epoch": 0.4031703304919994, "grad_norm": 2.1048030853271484, "learning_rate": 6.772675946426511e-06, "loss": 0.9399, "step": 1348 }, { "epoch": 0.40346941827426347, "grad_norm": 2.3042640686035156, "learning_rate": 6.768144696602219e-06, "loss": 0.9429, "step": 1349 }, { "epoch": 0.4037685060565276, "grad_norm": 2.3745038509368896, "learning_rate": 6.763611786459097e-06, "loss": 0.8895, "step": 1350 }, { "epoch": 0.4040675938387917, "grad_norm": 2.1725432872772217, "learning_rate": 6.759077220253628e-06, "loss": 0.8788, "step": 1351 }, { "epoch": 0.40436668162105577, "grad_norm": 2.019379138946533, "learning_rate": 6.7545410022438495e-06, "loss": 0.9233, "step": 1352 }, { "epoch": 0.4046657694033199, "grad_norm": 2.4449126720428467, "learning_rate": 6.750003136689349e-06, "loss": 0.9178, "step": 1353 }, { "epoch": 0.40496485718558395, "grad_norm": 2.242352247238159, "learning_rate": 6.745463627851261e-06, "loss": 0.9093, "step": 1354 }, { "epoch": 0.40526394496784807, "grad_norm": 1.9235917329788208, "learning_rate": 6.740922479992264e-06, "loss": 0.8459, "step": 1355 }, { "epoch": 0.4055630327501122, "grad_norm": 2.8722193241119385, "learning_rate": 6.736379697376578e-06, "loss": 0.8775, "step": 1356 }, { "epoch": 0.40586212053237625, "grad_norm": 2.247926712036133, "learning_rate": 6.731835284269952e-06, "loss": 0.8896, "step": 1357 }, { "epoch": 0.40616120831464037, "grad_norm": 2.240703582763672, "learning_rate": 6.727289244939671e-06, "loss": 0.9577, "step": 1358 }, { "epoch": 0.4064602960969044, "grad_norm": 2.1739118099212646, "learning_rate": 6.722741583654545e-06, "loss": 0.9411, "step": 1359 }, { "epoch": 0.40675938387916855, "grad_norm": 2.339280128479004, "learning_rate": 6.718192304684909e-06, "loss": 0.9317, "step": 1360 }, { "epoch": 0.4070584716614326, "grad_norm": 2.0757529735565186, "learning_rate": 6.713641412302614e-06, "loss": 0.8411, "step": 1361 }, { "epoch": 0.4073575594436967, "grad_norm": 2.3165841102600098, "learning_rate": 6.7090889107810275e-06, "loss": 0.8341, "step": 1362 }, { "epoch": 0.40765664722596084, "grad_norm": 2.135087013244629, "learning_rate": 6.704534804395029e-06, "loss": 0.8995, "step": 1363 }, { "epoch": 0.4079557350082249, "grad_norm": 2.035902500152588, "learning_rate": 6.699979097421004e-06, "loss": 0.9223, "step": 1364 }, { "epoch": 0.408254822790489, "grad_norm": 1.9836678504943848, "learning_rate": 6.695421794136843e-06, "loss": 0.8844, "step": 1365 }, { "epoch": 0.4085539105727531, "grad_norm": 2.4977633953094482, "learning_rate": 6.690862898821928e-06, "loss": 0.8862, "step": 1366 }, { "epoch": 0.4088529983550172, "grad_norm": 2.736335515975952, "learning_rate": 6.686302415757149e-06, "loss": 0.8494, "step": 1367 }, { "epoch": 0.40915208613728127, "grad_norm": 2.1245808601379395, "learning_rate": 6.681740349224873e-06, "loss": 0.8969, "step": 1368 }, { "epoch": 0.4094511739195454, "grad_norm": 2.0335264205932617, "learning_rate": 6.677176703508963e-06, "loss": 0.8801, "step": 1369 }, { "epoch": 0.4097502617018095, "grad_norm": 2.0539474487304688, "learning_rate": 6.672611482894763e-06, "loss": 0.8746, "step": 1370 }, { "epoch": 0.41004934948407357, "grad_norm": 2.055691957473755, "learning_rate": 6.668044691669094e-06, "loss": 0.9886, "step": 1371 }, { "epoch": 0.4103484372663377, "grad_norm": 1.9550918340682983, "learning_rate": 6.663476334120254e-06, "loss": 0.8805, "step": 1372 }, { "epoch": 0.41064752504860175, "grad_norm": 2.0324432849884033, "learning_rate": 6.658906414538009e-06, "loss": 0.9073, "step": 1373 }, { "epoch": 0.41094661283086587, "grad_norm": 2.0042645931243896, "learning_rate": 6.6543349372135946e-06, "loss": 0.9355, "step": 1374 }, { "epoch": 0.4112457006131299, "grad_norm": 2.2687790393829346, "learning_rate": 6.649761906439708e-06, "loss": 0.8625, "step": 1375 }, { "epoch": 0.41154478839539405, "grad_norm": 2.0998497009277344, "learning_rate": 6.6451873265105045e-06, "loss": 0.9618, "step": 1376 }, { "epoch": 0.41184387617765816, "grad_norm": 2.288698434829712, "learning_rate": 6.6406112017215966e-06, "loss": 0.8238, "step": 1377 }, { "epoch": 0.4121429639599222, "grad_norm": 2.2064168453216553, "learning_rate": 6.6360335363700435e-06, "loss": 0.9414, "step": 1378 }, { "epoch": 0.41244205174218634, "grad_norm": 2.162803888320923, "learning_rate": 6.631454334754353e-06, "loss": 0.8818, "step": 1379 }, { "epoch": 0.4127411395244504, "grad_norm": 2.384692668914795, "learning_rate": 6.626873601174478e-06, "loss": 0.8921, "step": 1380 }, { "epoch": 0.4130402273067145, "grad_norm": 2.3181583881378174, "learning_rate": 6.622291339931806e-06, "loss": 0.9545, "step": 1381 }, { "epoch": 0.41333931508897864, "grad_norm": 2.6704277992248535, "learning_rate": 6.61770755532916e-06, "loss": 0.8492, "step": 1382 }, { "epoch": 0.4136384028712427, "grad_norm": 2.270303249359131, "learning_rate": 6.613122251670795e-06, "loss": 0.8922, "step": 1383 }, { "epoch": 0.4139374906535068, "grad_norm": 2.2969107627868652, "learning_rate": 6.608535433262391e-06, "loss": 0.8437, "step": 1384 }, { "epoch": 0.4142365784357709, "grad_norm": 2.187208890914917, "learning_rate": 6.60394710441105e-06, "loss": 0.8977, "step": 1385 }, { "epoch": 0.414535666218035, "grad_norm": 2.2451817989349365, "learning_rate": 6.599357269425294e-06, "loss": 0.8971, "step": 1386 }, { "epoch": 0.41483475400029907, "grad_norm": 2.226713180541992, "learning_rate": 6.594765932615059e-06, "loss": 0.8703, "step": 1387 }, { "epoch": 0.4151338417825632, "grad_norm": 2.2107319831848145, "learning_rate": 6.59017309829169e-06, "loss": 0.9104, "step": 1388 }, { "epoch": 0.4154329295648273, "grad_norm": 2.2261645793914795, "learning_rate": 6.585578770767939e-06, "loss": 0.8919, "step": 1389 }, { "epoch": 0.41573201734709136, "grad_norm": 2.304302215576172, "learning_rate": 6.5809829543579595e-06, "loss": 0.9039, "step": 1390 }, { "epoch": 0.4160311051293555, "grad_norm": 1.9284073114395142, "learning_rate": 6.576385653377303e-06, "loss": 0.905, "step": 1391 }, { "epoch": 0.41633019291161955, "grad_norm": 2.3396568298339844, "learning_rate": 6.5717868721429175e-06, "loss": 0.9459, "step": 1392 }, { "epoch": 0.41662928069388366, "grad_norm": 2.1134984493255615, "learning_rate": 6.56718661497314e-06, "loss": 0.8544, "step": 1393 }, { "epoch": 0.4169283684761477, "grad_norm": 2.3801631927490234, "learning_rate": 6.562584886187687e-06, "loss": 0.931, "step": 1394 }, { "epoch": 0.41722745625841184, "grad_norm": 2.030322313308716, "learning_rate": 6.557981690107669e-06, "loss": 0.8776, "step": 1395 }, { "epoch": 0.41752654404067596, "grad_norm": 2.03426194190979, "learning_rate": 6.553377031055564e-06, "loss": 0.8719, "step": 1396 }, { "epoch": 0.41782563182294, "grad_norm": 2.281463861465454, "learning_rate": 6.5487709133552275e-06, "loss": 0.8854, "step": 1397 }, { "epoch": 0.41812471960520414, "grad_norm": 2.1856586933135986, "learning_rate": 6.544163341331886e-06, "loss": 0.8488, "step": 1398 }, { "epoch": 0.4184238073874682, "grad_norm": 2.0283501148223877, "learning_rate": 6.539554319312129e-06, "loss": 0.9136, "step": 1399 }, { "epoch": 0.4187228951697323, "grad_norm": 1.912880539894104, "learning_rate": 6.534943851623911e-06, "loss": 0.8984, "step": 1400 }, { "epoch": 0.4190219829519964, "grad_norm": 2.0314700603485107, "learning_rate": 6.530331942596539e-06, "loss": 0.8596, "step": 1401 }, { "epoch": 0.4193210707342605, "grad_norm": 2.2505738735198975, "learning_rate": 6.525718596560679e-06, "loss": 0.9332, "step": 1402 }, { "epoch": 0.4196201585165246, "grad_norm": 2.1617250442504883, "learning_rate": 6.521103817848342e-06, "loss": 0.8529, "step": 1403 }, { "epoch": 0.4199192462987887, "grad_norm": 2.3623688220977783, "learning_rate": 6.516487610792888e-06, "loss": 0.93, "step": 1404 }, { "epoch": 0.4202183340810528, "grad_norm": 2.873596429824829, "learning_rate": 6.511869979729013e-06, "loss": 0.9487, "step": 1405 }, { "epoch": 0.42051742186331686, "grad_norm": 2.232736587524414, "learning_rate": 6.507250928992757e-06, "loss": 0.8656, "step": 1406 }, { "epoch": 0.420816509645581, "grad_norm": 2.086785316467285, "learning_rate": 6.5026304629214846e-06, "loss": 0.876, "step": 1407 }, { "epoch": 0.42111559742784505, "grad_norm": 2.0438830852508545, "learning_rate": 6.498008585853901e-06, "loss": 0.916, "step": 1408 }, { "epoch": 0.42141468521010916, "grad_norm": 2.1488447189331055, "learning_rate": 6.493385302130023e-06, "loss": 0.9312, "step": 1409 }, { "epoch": 0.4217137729923733, "grad_norm": 2.057352304458618, "learning_rate": 6.488760616091201e-06, "loss": 0.8653, "step": 1410 }, { "epoch": 0.42201286077463734, "grad_norm": 1.9596874713897705, "learning_rate": 6.484134532080091e-06, "loss": 0.8528, "step": 1411 }, { "epoch": 0.42231194855690146, "grad_norm": 2.2429635524749756, "learning_rate": 6.479507054440671e-06, "loss": 0.8606, "step": 1412 }, { "epoch": 0.4226110363391655, "grad_norm": 2.5565907955169678, "learning_rate": 6.474878187518221e-06, "loss": 0.9349, "step": 1413 }, { "epoch": 0.42291012412142964, "grad_norm": 2.156616449356079, "learning_rate": 6.470247935659328e-06, "loss": 0.8731, "step": 1414 }, { "epoch": 0.42320921190369376, "grad_norm": 2.5899970531463623, "learning_rate": 6.465616303211881e-06, "loss": 0.8649, "step": 1415 }, { "epoch": 0.4235082996859578, "grad_norm": 2.1333746910095215, "learning_rate": 6.460983294525064e-06, "loss": 0.9161, "step": 1416 }, { "epoch": 0.42380738746822194, "grad_norm": 2.0510709285736084, "learning_rate": 6.456348913949352e-06, "loss": 0.8875, "step": 1417 }, { "epoch": 0.424106475250486, "grad_norm": 2.54636549949646, "learning_rate": 6.451713165836511e-06, "loss": 0.9173, "step": 1418 }, { "epoch": 0.4244055630327501, "grad_norm": 2.3244433403015137, "learning_rate": 6.447076054539588e-06, "loss": 0.8708, "step": 1419 }, { "epoch": 0.4247046508150142, "grad_norm": 1.9579330682754517, "learning_rate": 6.442437584412912e-06, "loss": 0.8922, "step": 1420 }, { "epoch": 0.4250037385972783, "grad_norm": 2.0923328399658203, "learning_rate": 6.43779775981209e-06, "loss": 0.8883, "step": 1421 }, { "epoch": 0.4253028263795424, "grad_norm": 2.315042734146118, "learning_rate": 6.433156585093994e-06, "loss": 0.9082, "step": 1422 }, { "epoch": 0.4256019141618065, "grad_norm": 2.0330824851989746, "learning_rate": 6.4285140646167735e-06, "loss": 0.9146, "step": 1423 }, { "epoch": 0.4259010019440706, "grad_norm": 2.403721570968628, "learning_rate": 6.423870202739831e-06, "loss": 0.9066, "step": 1424 }, { "epoch": 0.42620008972633466, "grad_norm": 2.4314522743225098, "learning_rate": 6.41922500382384e-06, "loss": 0.8634, "step": 1425 }, { "epoch": 0.4264991775085988, "grad_norm": 2.0979936122894287, "learning_rate": 6.414578472230719e-06, "loss": 0.8801, "step": 1426 }, { "epoch": 0.42679826529086284, "grad_norm": 1.9856864213943481, "learning_rate": 6.409930612323646e-06, "loss": 0.9656, "step": 1427 }, { "epoch": 0.42709735307312696, "grad_norm": 2.62684965133667, "learning_rate": 6.405281428467041e-06, "loss": 0.8707, "step": 1428 }, { "epoch": 0.4273964408553911, "grad_norm": 3.1521048545837402, "learning_rate": 6.400630925026568e-06, "loss": 0.9089, "step": 1429 }, { "epoch": 0.42769552863765514, "grad_norm": 2.0546224117279053, "learning_rate": 6.395979106369132e-06, "loss": 0.8907, "step": 1430 }, { "epoch": 0.42799461641991926, "grad_norm": 1.9532287120819092, "learning_rate": 6.391325976862872e-06, "loss": 0.9723, "step": 1431 }, { "epoch": 0.4282937042021833, "grad_norm": 2.2809252738952637, "learning_rate": 6.386671540877162e-06, "loss": 0.8658, "step": 1432 }, { "epoch": 0.42859279198444744, "grad_norm": 2.072223663330078, "learning_rate": 6.382015802782592e-06, "loss": 0.9325, "step": 1433 }, { "epoch": 0.4288918797667115, "grad_norm": 1.9301838874816895, "learning_rate": 6.377358766950987e-06, "loss": 0.8032, "step": 1434 }, { "epoch": 0.4291909675489756, "grad_norm": 2.3652689456939697, "learning_rate": 6.372700437755381e-06, "loss": 0.8991, "step": 1435 }, { "epoch": 0.42949005533123974, "grad_norm": 2.2283451557159424, "learning_rate": 6.368040819570032e-06, "loss": 0.8964, "step": 1436 }, { "epoch": 0.4297891431135038, "grad_norm": 1.9467560052871704, "learning_rate": 6.3633799167703954e-06, "loss": 0.8489, "step": 1437 }, { "epoch": 0.4300882308957679, "grad_norm": 2.147559642791748, "learning_rate": 6.35871773373315e-06, "loss": 0.8996, "step": 1438 }, { "epoch": 0.430387318678032, "grad_norm": 1.7847281694412231, "learning_rate": 6.3540542748361585e-06, "loss": 0.9095, "step": 1439 }, { "epoch": 0.4306864064602961, "grad_norm": 2.037461757659912, "learning_rate": 6.349389544458497e-06, "loss": 0.9095, "step": 1440 }, { "epoch": 0.4309854942425602, "grad_norm": 2.3741161823272705, "learning_rate": 6.3447235469804255e-06, "loss": 0.9036, "step": 1441 }, { "epoch": 0.4312845820248243, "grad_norm": 2.278449058532715, "learning_rate": 6.3400562867833984e-06, "loss": 0.9152, "step": 1442 }, { "epoch": 0.4315836698070884, "grad_norm": 2.038419246673584, "learning_rate": 6.335387768250054e-06, "loss": 0.8422, "step": 1443 }, { "epoch": 0.43188275758935246, "grad_norm": 1.9891034364700317, "learning_rate": 6.330717995764215e-06, "loss": 0.9252, "step": 1444 }, { "epoch": 0.4321818453716166, "grad_norm": 2.251952886581421, "learning_rate": 6.326046973710878e-06, "loss": 0.9377, "step": 1445 }, { "epoch": 0.43248093315388064, "grad_norm": 2.153702735900879, "learning_rate": 6.321374706476212e-06, "loss": 0.9148, "step": 1446 }, { "epoch": 0.43278002093614476, "grad_norm": 2.0648107528686523, "learning_rate": 6.316701198447562e-06, "loss": 0.9162, "step": 1447 }, { "epoch": 0.4330791087184089, "grad_norm": 2.215205669403076, "learning_rate": 6.312026454013431e-06, "loss": 0.8474, "step": 1448 }, { "epoch": 0.43337819650067294, "grad_norm": 2.30306077003479, "learning_rate": 6.3073504775634884e-06, "loss": 0.8802, "step": 1449 }, { "epoch": 0.43367728428293706, "grad_norm": 2.1706416606903076, "learning_rate": 6.302673273488556e-06, "loss": 0.8553, "step": 1450 }, { "epoch": 0.4339763720652011, "grad_norm": 2.1952643394470215, "learning_rate": 6.297994846180611e-06, "loss": 0.8844, "step": 1451 }, { "epoch": 0.43427545984746524, "grad_norm": 2.567535877227783, "learning_rate": 6.293315200032777e-06, "loss": 0.9084, "step": 1452 }, { "epoch": 0.4345745476297293, "grad_norm": 2.1916451454162598, "learning_rate": 6.288634339439328e-06, "loss": 0.9034, "step": 1453 }, { "epoch": 0.4348736354119934, "grad_norm": 3.826876401901245, "learning_rate": 6.283952268795669e-06, "loss": 0.8492, "step": 1454 }, { "epoch": 0.43517272319425754, "grad_norm": 2.327446699142456, "learning_rate": 6.279268992498349e-06, "loss": 0.8719, "step": 1455 }, { "epoch": 0.4354718109765216, "grad_norm": 2.250972270965576, "learning_rate": 6.274584514945046e-06, "loss": 0.8795, "step": 1456 }, { "epoch": 0.4357708987587857, "grad_norm": 2.085019826889038, "learning_rate": 6.269898840534566e-06, "loss": 0.9179, "step": 1457 }, { "epoch": 0.4360699865410498, "grad_norm": 2.170017957687378, "learning_rate": 6.26521197366684e-06, "loss": 0.9055, "step": 1458 }, { "epoch": 0.4363690743233139, "grad_norm": 2.208730936050415, "learning_rate": 6.2605239187429175e-06, "loss": 0.8905, "step": 1459 }, { "epoch": 0.43666816210557796, "grad_norm": 2.1448724269866943, "learning_rate": 6.255834680164966e-06, "loss": 0.8376, "step": 1460 }, { "epoch": 0.4369672498878421, "grad_norm": 2.2588467597961426, "learning_rate": 6.2511442623362585e-06, "loss": 0.897, "step": 1461 }, { "epoch": 0.4372663376701062, "grad_norm": 2.190514087677002, "learning_rate": 6.246452669661184e-06, "loss": 0.9247, "step": 1462 }, { "epoch": 0.43756542545237026, "grad_norm": 2.1981520652770996, "learning_rate": 6.241759906545226e-06, "loss": 0.9229, "step": 1463 }, { "epoch": 0.4378645132346344, "grad_norm": 3.2648210525512695, "learning_rate": 6.237065977394976e-06, "loss": 0.8587, "step": 1464 }, { "epoch": 0.43816360101689844, "grad_norm": 2.371212959289551, "learning_rate": 6.23237088661811e-06, "loss": 0.8972, "step": 1465 }, { "epoch": 0.43846268879916256, "grad_norm": 2.029698610305786, "learning_rate": 6.227674638623406e-06, "loss": 0.8987, "step": 1466 }, { "epoch": 0.4387617765814266, "grad_norm": 2.2015223503112793, "learning_rate": 6.22297723782072e-06, "loss": 0.887, "step": 1467 }, { "epoch": 0.43906086436369074, "grad_norm": 2.1217586994171143, "learning_rate": 6.218278688620994e-06, "loss": 0.836, "step": 1468 }, { "epoch": 0.43935995214595486, "grad_norm": 2.0924172401428223, "learning_rate": 6.213578995436248e-06, "loss": 0.9043, "step": 1469 }, { "epoch": 0.4396590399282189, "grad_norm": 2.3361141681671143, "learning_rate": 6.208878162679577e-06, "loss": 0.8873, "step": 1470 }, { "epoch": 0.43995812771048304, "grad_norm": 2.0285022258758545, "learning_rate": 6.204176194765143e-06, "loss": 0.8598, "step": 1471 }, { "epoch": 0.4402572154927471, "grad_norm": 2.479367733001709, "learning_rate": 6.199473096108179e-06, "loss": 0.8758, "step": 1472 }, { "epoch": 0.4405563032750112, "grad_norm": 1.9214283227920532, "learning_rate": 6.194768871124976e-06, "loss": 0.8691, "step": 1473 }, { "epoch": 0.44085539105727534, "grad_norm": 2.0834641456604004, "learning_rate": 6.190063524232883e-06, "loss": 0.9145, "step": 1474 }, { "epoch": 0.4411544788395394, "grad_norm": 2.160783290863037, "learning_rate": 6.1853570598503045e-06, "loss": 0.8814, "step": 1475 }, { "epoch": 0.4414535666218035, "grad_norm": 2.0949411392211914, "learning_rate": 6.18064948239669e-06, "loss": 0.9053, "step": 1476 }, { "epoch": 0.4417526544040676, "grad_norm": 2.428309679031372, "learning_rate": 6.175940796292541e-06, "loss": 0.9086, "step": 1477 }, { "epoch": 0.4420517421863317, "grad_norm": 2.3531746864318848, "learning_rate": 6.171231005959393e-06, "loss": 0.9363, "step": 1478 }, { "epoch": 0.44235082996859576, "grad_norm": 2.950242757797241, "learning_rate": 6.166520115819825e-06, "loss": 0.9089, "step": 1479 }, { "epoch": 0.4426499177508599, "grad_norm": 2.270956516265869, "learning_rate": 6.161808130297442e-06, "loss": 0.8693, "step": 1480 }, { "epoch": 0.442949005533124, "grad_norm": 2.2948532104492188, "learning_rate": 6.157095053816882e-06, "loss": 0.8682, "step": 1481 }, { "epoch": 0.44324809331538806, "grad_norm": 2.146024703979492, "learning_rate": 6.152380890803806e-06, "loss": 0.8625, "step": 1482 }, { "epoch": 0.4435471810976522, "grad_norm": 2.1867592334747314, "learning_rate": 6.147665645684897e-06, "loss": 0.901, "step": 1483 }, { "epoch": 0.44384626887991624, "grad_norm": 2.1831977367401123, "learning_rate": 6.142949322887852e-06, "loss": 0.886, "step": 1484 }, { "epoch": 0.44414535666218036, "grad_norm": 2.130953311920166, "learning_rate": 6.138231926841381e-06, "loss": 0.9232, "step": 1485 }, { "epoch": 0.4444444444444444, "grad_norm": 1.9178695678710938, "learning_rate": 6.1335134619751994e-06, "loss": 0.8726, "step": 1486 }, { "epoch": 0.44474353222670854, "grad_norm": 1.9978245496749878, "learning_rate": 6.128793932720031e-06, "loss": 0.8545, "step": 1487 }, { "epoch": 0.44504262000897266, "grad_norm": 1.9284663200378418, "learning_rate": 6.1240733435075946e-06, "loss": 0.8885, "step": 1488 }, { "epoch": 0.4453417077912367, "grad_norm": 2.116771697998047, "learning_rate": 6.119351698770607e-06, "loss": 0.9057, "step": 1489 }, { "epoch": 0.44564079557350084, "grad_norm": 2.0264663696289062, "learning_rate": 6.1146290029427755e-06, "loss": 0.8311, "step": 1490 }, { "epoch": 0.4459398833557649, "grad_norm": 4.236113548278809, "learning_rate": 6.1099052604587935e-06, "loss": 0.8943, "step": 1491 }, { "epoch": 0.446238971138029, "grad_norm": 2.286720037460327, "learning_rate": 6.105180475754341e-06, "loss": 0.891, "step": 1492 }, { "epoch": 0.4465380589202931, "grad_norm": 2.1759912967681885, "learning_rate": 6.100454653266068e-06, "loss": 0.8752, "step": 1493 }, { "epoch": 0.4468371467025572, "grad_norm": 2.1354455947875977, "learning_rate": 6.095727797431607e-06, "loss": 0.9072, "step": 1494 }, { "epoch": 0.4471362344848213, "grad_norm": 2.1325252056121826, "learning_rate": 6.0909999126895605e-06, "loss": 0.942, "step": 1495 }, { "epoch": 0.4474353222670854, "grad_norm": 2.411929130554199, "learning_rate": 6.086271003479492e-06, "loss": 0.9179, "step": 1496 }, { "epoch": 0.4477344100493495, "grad_norm": 1.901544213294983, "learning_rate": 6.081541074241932e-06, "loss": 0.9501, "step": 1497 }, { "epoch": 0.44803349783161356, "grad_norm": 2.337282419204712, "learning_rate": 6.076810129418367e-06, "loss": 0.864, "step": 1498 }, { "epoch": 0.4483325856138777, "grad_norm": 2.062225580215454, "learning_rate": 6.072078173451235e-06, "loss": 0.8825, "step": 1499 }, { "epoch": 0.4486316733961418, "grad_norm": 2.063108205795288, "learning_rate": 6.067345210783927e-06, "loss": 0.8943, "step": 1500 }, { "epoch": 0.44893076117840586, "grad_norm": 2.2403666973114014, "learning_rate": 6.062611245860778e-06, "loss": 0.8677, "step": 1501 }, { "epoch": 0.44922984896067, "grad_norm": 2.1940529346466064, "learning_rate": 6.057876283127062e-06, "loss": 0.9276, "step": 1502 }, { "epoch": 0.44952893674293404, "grad_norm": 2.9306135177612305, "learning_rate": 6.053140327028996e-06, "loss": 0.8423, "step": 1503 }, { "epoch": 0.44982802452519816, "grad_norm": 2.4749915599823, "learning_rate": 6.048403382013721e-06, "loss": 0.945, "step": 1504 }, { "epoch": 0.4501271123074622, "grad_norm": 1.8501983880996704, "learning_rate": 6.043665452529315e-06, "loss": 0.8299, "step": 1505 }, { "epoch": 0.45042620008972634, "grad_norm": 2.3774564266204834, "learning_rate": 6.038926543024774e-06, "loss": 0.8666, "step": 1506 }, { "epoch": 0.45072528787199045, "grad_norm": 2.081108331680298, "learning_rate": 6.034186657950019e-06, "loss": 0.8966, "step": 1507 }, { "epoch": 0.4510243756542545, "grad_norm": 2.2512452602386475, "learning_rate": 6.029445801755884e-06, "loss": 0.8505, "step": 1508 }, { "epoch": 0.45132346343651863, "grad_norm": 1.9043219089508057, "learning_rate": 6.024703978894118e-06, "loss": 0.901, "step": 1509 }, { "epoch": 0.4516225512187827, "grad_norm": 2.054732084274292, "learning_rate": 6.019961193817371e-06, "loss": 0.8604, "step": 1510 }, { "epoch": 0.4519216390010468, "grad_norm": 2.2595772743225098, "learning_rate": 6.015217450979206e-06, "loss": 0.9164, "step": 1511 }, { "epoch": 0.4522207267833109, "grad_norm": 2.7936322689056396, "learning_rate": 6.010472754834078e-06, "loss": 0.8718, "step": 1512 }, { "epoch": 0.452519814565575, "grad_norm": 2.414186716079712, "learning_rate": 6.00572710983734e-06, "loss": 0.8893, "step": 1513 }, { "epoch": 0.4528189023478391, "grad_norm": 2.25728178024292, "learning_rate": 6.000980520445237e-06, "loss": 0.9211, "step": 1514 }, { "epoch": 0.4531179901301032, "grad_norm": 1.917590618133545, "learning_rate": 5.9962329911148985e-06, "loss": 0.794, "step": 1515 }, { "epoch": 0.4534170779123673, "grad_norm": 2.129985809326172, "learning_rate": 5.991484526304338e-06, "loss": 0.8077, "step": 1516 }, { "epoch": 0.45371616569463136, "grad_norm": 2.18475341796875, "learning_rate": 5.986735130472449e-06, "loss": 0.8457, "step": 1517 }, { "epoch": 0.4540152534768955, "grad_norm": 1.9252012968063354, "learning_rate": 5.981984808078993e-06, "loss": 0.8942, "step": 1518 }, { "epoch": 0.45431434125915954, "grad_norm": 2.072746753692627, "learning_rate": 5.97723356358461e-06, "loss": 0.8264, "step": 1519 }, { "epoch": 0.45461342904142366, "grad_norm": 1.9602493047714233, "learning_rate": 5.972481401450798e-06, "loss": 0.9072, "step": 1520 }, { "epoch": 0.4549125168236878, "grad_norm": 2.711942672729492, "learning_rate": 5.967728326139926e-06, "loss": 0.9273, "step": 1521 }, { "epoch": 0.45521160460595184, "grad_norm": 2.292996644973755, "learning_rate": 5.962974342115209e-06, "loss": 0.8913, "step": 1522 }, { "epoch": 0.45551069238821595, "grad_norm": 2.284029722213745, "learning_rate": 5.9582194538407235e-06, "loss": 0.9122, "step": 1523 }, { "epoch": 0.45580978017048, "grad_norm": 2.2466986179351807, "learning_rate": 5.9534636657813935e-06, "loss": 0.8829, "step": 1524 }, { "epoch": 0.45610886795274413, "grad_norm": 2.1907827854156494, "learning_rate": 5.948706982402987e-06, "loss": 0.8878, "step": 1525 }, { "epoch": 0.4564079557350082, "grad_norm": 2.6035373210906982, "learning_rate": 5.9439494081721125e-06, "loss": 0.9104, "step": 1526 }, { "epoch": 0.4567070435172723, "grad_norm": 2.1066620349884033, "learning_rate": 5.939190947556216e-06, "loss": 0.9055, "step": 1527 }, { "epoch": 0.45700613129953643, "grad_norm": 2.4734976291656494, "learning_rate": 5.934431605023575e-06, "loss": 0.8908, "step": 1528 }, { "epoch": 0.4573052190818005, "grad_norm": 1.853533148765564, "learning_rate": 5.929671385043296e-06, "loss": 0.8482, "step": 1529 }, { "epoch": 0.4576043068640646, "grad_norm": 2.5992119312286377, "learning_rate": 5.924910292085308e-06, "loss": 0.8864, "step": 1530 }, { "epoch": 0.4579033946463287, "grad_norm": 2.1858184337615967, "learning_rate": 5.920148330620362e-06, "loss": 0.8939, "step": 1531 }, { "epoch": 0.4582024824285928, "grad_norm": 2.2931652069091797, "learning_rate": 5.915385505120024e-06, "loss": 0.8498, "step": 1532 }, { "epoch": 0.4585015702108569, "grad_norm": 2.16166090965271, "learning_rate": 5.9106218200566646e-06, "loss": 0.863, "step": 1533 }, { "epoch": 0.458800657993121, "grad_norm": 2.1808972358703613, "learning_rate": 5.905857279903475e-06, "loss": 0.8303, "step": 1534 }, { "epoch": 0.4590997457753851, "grad_norm": 2.128483772277832, "learning_rate": 5.9010918891344375e-06, "loss": 0.8307, "step": 1535 }, { "epoch": 0.45939883355764916, "grad_norm": 2.0747058391571045, "learning_rate": 5.896325652224339e-06, "loss": 0.8905, "step": 1536 }, { "epoch": 0.4596979213399133, "grad_norm": 4.306790828704834, "learning_rate": 5.891558573648759e-06, "loss": 0.8308, "step": 1537 }, { "epoch": 0.45999700912217734, "grad_norm": 2.4703779220581055, "learning_rate": 5.886790657884067e-06, "loss": 0.9002, "step": 1538 }, { "epoch": 0.46029609690444145, "grad_norm": 2.567516803741455, "learning_rate": 5.8820219094074215e-06, "loss": 0.897, "step": 1539 }, { "epoch": 0.46059518468670557, "grad_norm": 2.065295934677124, "learning_rate": 5.877252332696759e-06, "loss": 0.9327, "step": 1540 }, { "epoch": 0.46089427246896963, "grad_norm": 2.127746820449829, "learning_rate": 5.8724819322307955e-06, "loss": 0.97, "step": 1541 }, { "epoch": 0.46119336025123375, "grad_norm": 2.509174108505249, "learning_rate": 5.8677107124890206e-06, "loss": 0.8862, "step": 1542 }, { "epoch": 0.4614924480334978, "grad_norm": 2.2266149520874023, "learning_rate": 5.862938677951695e-06, "loss": 0.8893, "step": 1543 }, { "epoch": 0.46179153581576193, "grad_norm": 2.086697816848755, "learning_rate": 5.85816583309984e-06, "loss": 0.8622, "step": 1544 }, { "epoch": 0.462090623598026, "grad_norm": 2.102893590927124, "learning_rate": 5.853392182415244e-06, "loss": 0.9156, "step": 1545 }, { "epoch": 0.4623897113802901, "grad_norm": 2.000528335571289, "learning_rate": 5.848617730380444e-06, "loss": 0.8683, "step": 1546 }, { "epoch": 0.46268879916255423, "grad_norm": 1.9418056011199951, "learning_rate": 5.843842481478739e-06, "loss": 0.8222, "step": 1547 }, { "epoch": 0.4629878869448183, "grad_norm": 2.4673209190368652, "learning_rate": 5.839066440194165e-06, "loss": 0.8768, "step": 1548 }, { "epoch": 0.4632869747270824, "grad_norm": 2.450906991958618, "learning_rate": 5.834289611011515e-06, "loss": 0.8949, "step": 1549 }, { "epoch": 0.4635860625093465, "grad_norm": 1.9984779357910156, "learning_rate": 5.82951199841631e-06, "loss": 0.8483, "step": 1550 }, { "epoch": 0.4638851502916106, "grad_norm": 2.649348497390747, "learning_rate": 5.824733606894818e-06, "loss": 0.9215, "step": 1551 }, { "epoch": 0.46418423807387466, "grad_norm": 2.1253883838653564, "learning_rate": 5.819954440934026e-06, "loss": 0.9684, "step": 1552 }, { "epoch": 0.4644833258561388, "grad_norm": 2.3454360961914062, "learning_rate": 5.815174505021659e-06, "loss": 0.971, "step": 1553 }, { "epoch": 0.4647824136384029, "grad_norm": 2.433222532272339, "learning_rate": 5.810393803646157e-06, "loss": 0.9055, "step": 1554 }, { "epoch": 0.46508150142066695, "grad_norm": 2.321289539337158, "learning_rate": 5.805612341296685e-06, "loss": 0.849, "step": 1555 }, { "epoch": 0.46538058920293107, "grad_norm": 3.190053701400757, "learning_rate": 5.800830122463117e-06, "loss": 0.7739, "step": 1556 }, { "epoch": 0.46567967698519513, "grad_norm": 2.034491777420044, "learning_rate": 5.7960471516360435e-06, "loss": 0.8335, "step": 1557 }, { "epoch": 0.46597876476745925, "grad_norm": 2.247438907623291, "learning_rate": 5.791263433306758e-06, "loss": 0.9304, "step": 1558 }, { "epoch": 0.46627785254972337, "grad_norm": 2.0169715881347656, "learning_rate": 5.786478971967249e-06, "loss": 0.8574, "step": 1559 }, { "epoch": 0.46657694033198743, "grad_norm": 2.1865758895874023, "learning_rate": 5.781693772110219e-06, "loss": 0.8566, "step": 1560 }, { "epoch": 0.46687602811425155, "grad_norm": 2.544795513153076, "learning_rate": 5.776907838229049e-06, "loss": 0.9105, "step": 1561 }, { "epoch": 0.4671751158965156, "grad_norm": 2.385190010070801, "learning_rate": 5.772121174817816e-06, "loss": 0.9065, "step": 1562 }, { "epoch": 0.46747420367877973, "grad_norm": 2.280348300933838, "learning_rate": 5.767333786371279e-06, "loss": 0.8954, "step": 1563 }, { "epoch": 0.4677732914610438, "grad_norm": 2.046884298324585, "learning_rate": 5.762545677384884e-06, "loss": 0.8629, "step": 1564 }, { "epoch": 0.4680723792433079, "grad_norm": 2.0465729236602783, "learning_rate": 5.757756852354743e-06, "loss": 0.9076, "step": 1565 }, { "epoch": 0.46837146702557203, "grad_norm": 2.39312744140625, "learning_rate": 5.752967315777653e-06, "loss": 0.9256, "step": 1566 }, { "epoch": 0.4686705548078361, "grad_norm": 1.9680663347244263, "learning_rate": 5.748177072151068e-06, "loss": 0.9038, "step": 1567 }, { "epoch": 0.4689696425901002, "grad_norm": 2.18277907371521, "learning_rate": 5.743386125973112e-06, "loss": 0.8565, "step": 1568 }, { "epoch": 0.4692687303723643, "grad_norm": 7.092438220977783, "learning_rate": 5.738594481742568e-06, "loss": 0.8857, "step": 1569 }, { "epoch": 0.4695678181546284, "grad_norm": 2.686141014099121, "learning_rate": 5.733802143958872e-06, "loss": 0.9252, "step": 1570 }, { "epoch": 0.46986690593689245, "grad_norm": 2.1322810649871826, "learning_rate": 5.729009117122117e-06, "loss": 0.8607, "step": 1571 }, { "epoch": 0.47016599371915657, "grad_norm": 2.3290140628814697, "learning_rate": 5.724215405733033e-06, "loss": 0.8271, "step": 1572 }, { "epoch": 0.4704650815014207, "grad_norm": 2.125929594039917, "learning_rate": 5.7194210142930065e-06, "loss": 0.8939, "step": 1573 }, { "epoch": 0.47076416928368475, "grad_norm": 2.010760545730591, "learning_rate": 5.714625947304048e-06, "loss": 0.8886, "step": 1574 }, { "epoch": 0.47106325706594887, "grad_norm": 2.104820966720581, "learning_rate": 5.709830209268814e-06, "loss": 0.9192, "step": 1575 }, { "epoch": 0.47136234484821293, "grad_norm": 2.2258405685424805, "learning_rate": 5.705033804690583e-06, "loss": 0.8631, "step": 1576 }, { "epoch": 0.47166143263047705, "grad_norm": 2.6589677333831787, "learning_rate": 5.7002367380732685e-06, "loss": 0.8963, "step": 1577 }, { "epoch": 0.4719605204127411, "grad_norm": 1.9986263513565063, "learning_rate": 5.695439013921391e-06, "loss": 0.9059, "step": 1578 }, { "epoch": 0.47225960819500523, "grad_norm": 2.460282802581787, "learning_rate": 5.6906406367401075e-06, "loss": 0.9167, "step": 1579 }, { "epoch": 0.47255869597726935, "grad_norm": 2.6512248516082764, "learning_rate": 5.6858416110351715e-06, "loss": 0.9445, "step": 1580 }, { "epoch": 0.4728577837595334, "grad_norm": 1.9939566850662231, "learning_rate": 5.681041941312954e-06, "loss": 0.8298, "step": 1581 }, { "epoch": 0.47315687154179753, "grad_norm": 2.1158132553100586, "learning_rate": 5.676241632080429e-06, "loss": 0.9211, "step": 1582 }, { "epoch": 0.4734559593240616, "grad_norm": 2.3486533164978027, "learning_rate": 5.6714406878451715e-06, "loss": 0.881, "step": 1583 }, { "epoch": 0.4737550471063257, "grad_norm": 2.1483192443847656, "learning_rate": 5.666639113115351e-06, "loss": 0.8916, "step": 1584 }, { "epoch": 0.47405413488858983, "grad_norm": 2.354092597961426, "learning_rate": 5.661836912399731e-06, "loss": 0.9639, "step": 1585 }, { "epoch": 0.4743532226708539, "grad_norm": 2.2734217643737793, "learning_rate": 5.657034090207663e-06, "loss": 0.902, "step": 1586 }, { "epoch": 0.474652310453118, "grad_norm": 2.073636770248413, "learning_rate": 5.652230651049077e-06, "loss": 0.8603, "step": 1587 }, { "epoch": 0.47495139823538207, "grad_norm": 2.528332471847534, "learning_rate": 5.647426599434493e-06, "loss": 0.8706, "step": 1588 }, { "epoch": 0.4752504860176462, "grad_norm": 2.0223429203033447, "learning_rate": 5.642621939874995e-06, "loss": 0.9048, "step": 1589 }, { "epoch": 0.47554957379991025, "grad_norm": 2.1750102043151855, "learning_rate": 5.637816676882244e-06, "loss": 0.9017, "step": 1590 }, { "epoch": 0.47584866158217437, "grad_norm": 2.0542757511138916, "learning_rate": 5.633010814968465e-06, "loss": 0.8709, "step": 1591 }, { "epoch": 0.4761477493644385, "grad_norm": 2.0071370601654053, "learning_rate": 5.628204358646448e-06, "loss": 0.8897, "step": 1592 }, { "epoch": 0.47644683714670255, "grad_norm": 1.959465742111206, "learning_rate": 5.623397312429537e-06, "loss": 0.8497, "step": 1593 }, { "epoch": 0.47674592492896667, "grad_norm": 2.3239848613739014, "learning_rate": 5.618589680831636e-06, "loss": 0.9063, "step": 1594 }, { "epoch": 0.47704501271123073, "grad_norm": 2.091277599334717, "learning_rate": 5.6137814683671935e-06, "loss": 0.8296, "step": 1595 }, { "epoch": 0.47734410049349485, "grad_norm": 2.1996347904205322, "learning_rate": 5.608972679551205e-06, "loss": 0.955, "step": 1596 }, { "epoch": 0.4776431882757589, "grad_norm": 2.2801361083984375, "learning_rate": 5.604163318899207e-06, "loss": 0.9746, "step": 1597 }, { "epoch": 0.47794227605802303, "grad_norm": 2.07092022895813, "learning_rate": 5.599353390927275e-06, "loss": 0.858, "step": 1598 }, { "epoch": 0.47824136384028715, "grad_norm": 2.173978328704834, "learning_rate": 5.594542900152015e-06, "loss": 0.8546, "step": 1599 }, { "epoch": 0.4785404516225512, "grad_norm": 2.0472958087921143, "learning_rate": 5.589731851090559e-06, "loss": 0.8302, "step": 1600 }, { "epoch": 0.47883953940481533, "grad_norm": 2.093161106109619, "learning_rate": 5.584920248260572e-06, "loss": 0.9197, "step": 1601 }, { "epoch": 0.4791386271870794, "grad_norm": 2.0454189777374268, "learning_rate": 5.580108096180229e-06, "loss": 0.8613, "step": 1602 }, { "epoch": 0.4794377149693435, "grad_norm": 2.0306379795074463, "learning_rate": 5.575295399368228e-06, "loss": 0.8909, "step": 1603 }, { "epoch": 0.47973680275160757, "grad_norm": 2.4592666625976562, "learning_rate": 5.570482162343772e-06, "loss": 0.8856, "step": 1604 }, { "epoch": 0.4800358905338717, "grad_norm": 2.2339096069335938, "learning_rate": 5.5656683896265786e-06, "loss": 0.8873, "step": 1605 }, { "epoch": 0.4803349783161358, "grad_norm": 2.1850879192352295, "learning_rate": 5.560854085736861e-06, "loss": 0.836, "step": 1606 }, { "epoch": 0.48063406609839987, "grad_norm": 2.143684148788452, "learning_rate": 5.556039255195338e-06, "loss": 0.9102, "step": 1607 }, { "epoch": 0.480933153880664, "grad_norm": 2.4111673831939697, "learning_rate": 5.551223902523218e-06, "loss": 0.8702, "step": 1608 }, { "epoch": 0.48123224166292805, "grad_norm": 2.1444075107574463, "learning_rate": 5.546408032242202e-06, "loss": 0.8558, "step": 1609 }, { "epoch": 0.48153132944519217, "grad_norm": 2.334900379180908, "learning_rate": 5.541591648874476e-06, "loss": 0.8559, "step": 1610 }, { "epoch": 0.48183041722745623, "grad_norm": 2.357182025909424, "learning_rate": 5.53677475694271e-06, "loss": 0.9633, "step": 1611 }, { "epoch": 0.48212950500972035, "grad_norm": 2.5398621559143066, "learning_rate": 5.531957360970048e-06, "loss": 0.8866, "step": 1612 }, { "epoch": 0.48242859279198447, "grad_norm": 2.1764354705810547, "learning_rate": 5.527139465480109e-06, "loss": 0.8559, "step": 1613 }, { "epoch": 0.48272768057424853, "grad_norm": 2.2581992149353027, "learning_rate": 5.5223210749969845e-06, "loss": 0.8821, "step": 1614 }, { "epoch": 0.48302676835651265, "grad_norm": 2.1128408908843994, "learning_rate": 5.5175021940452225e-06, "loss": 0.8149, "step": 1615 }, { "epoch": 0.4833258561387767, "grad_norm": 2.227670431137085, "learning_rate": 5.512682827149841e-06, "loss": 0.8488, "step": 1616 }, { "epoch": 0.48362494392104083, "grad_norm": 2.59446382522583, "learning_rate": 5.507862978836306e-06, "loss": 0.9396, "step": 1617 }, { "epoch": 0.48392403170330495, "grad_norm": 1.9124979972839355, "learning_rate": 5.503042653630543e-06, "loss": 0.8431, "step": 1618 }, { "epoch": 0.484223119485569, "grad_norm": 2.1420774459838867, "learning_rate": 5.49822185605892e-06, "loss": 0.8608, "step": 1619 }, { "epoch": 0.4845222072678331, "grad_norm": 2.473551034927368, "learning_rate": 5.4934005906482525e-06, "loss": 0.8897, "step": 1620 }, { "epoch": 0.4848212950500972, "grad_norm": 2.26887583732605, "learning_rate": 5.488578861925788e-06, "loss": 0.8807, "step": 1621 }, { "epoch": 0.4851203828323613, "grad_norm": 2.6222572326660156, "learning_rate": 5.4837566744192196e-06, "loss": 0.9199, "step": 1622 }, { "epoch": 0.48541947061462537, "grad_norm": 1.9710321426391602, "learning_rate": 5.478934032656663e-06, "loss": 0.8396, "step": 1623 }, { "epoch": 0.4857185583968895, "grad_norm": 2.254929304122925, "learning_rate": 5.4741109411666635e-06, "loss": 0.8915, "step": 1624 }, { "epoch": 0.4860176461791536, "grad_norm": 2.149587869644165, "learning_rate": 5.46928740447819e-06, "loss": 0.8813, "step": 1625 }, { "epoch": 0.48631673396141767, "grad_norm": 2.1627728939056396, "learning_rate": 5.464463427120626e-06, "loss": 0.905, "step": 1626 }, { "epoch": 0.4866158217436818, "grad_norm": 2.2807838916778564, "learning_rate": 5.459639013623772e-06, "loss": 0.8324, "step": 1627 }, { "epoch": 0.48691490952594585, "grad_norm": 2.267528772354126, "learning_rate": 5.454814168517836e-06, "loss": 0.9462, "step": 1628 }, { "epoch": 0.48721399730820997, "grad_norm": 2.0238230228424072, "learning_rate": 5.449988896333431e-06, "loss": 0.8983, "step": 1629 }, { "epoch": 0.48751308509047403, "grad_norm": 2.2757725715637207, "learning_rate": 5.445163201601575e-06, "loss": 0.8912, "step": 1630 }, { "epoch": 0.48781217287273815, "grad_norm": 2.049278497695923, "learning_rate": 5.440337088853679e-06, "loss": 0.9405, "step": 1631 }, { "epoch": 0.48811126065500227, "grad_norm": 2.0862600803375244, "learning_rate": 5.435510562621544e-06, "loss": 0.8788, "step": 1632 }, { "epoch": 0.48841034843726633, "grad_norm": 2.2579948902130127, "learning_rate": 5.4306836274373675e-06, "loss": 0.8856, "step": 1633 }, { "epoch": 0.48870943621953045, "grad_norm": 2.165532112121582, "learning_rate": 5.425856287833723e-06, "loss": 0.9605, "step": 1634 }, { "epoch": 0.4890085240017945, "grad_norm": 2.3782896995544434, "learning_rate": 5.421028548343568e-06, "loss": 0.9167, "step": 1635 }, { "epoch": 0.4893076117840586, "grad_norm": 2.0374510288238525, "learning_rate": 5.4162004135002336e-06, "loss": 0.8821, "step": 1636 }, { "epoch": 0.4896066995663227, "grad_norm": 1.9704091548919678, "learning_rate": 5.411371887837423e-06, "loss": 0.8555, "step": 1637 }, { "epoch": 0.4899057873485868, "grad_norm": 2.108069658279419, "learning_rate": 5.406542975889209e-06, "loss": 0.8923, "step": 1638 }, { "epoch": 0.4902048751308509, "grad_norm": 1.9993451833724976, "learning_rate": 5.40171368219002e-06, "loss": 0.8202, "step": 1639 }, { "epoch": 0.490503962913115, "grad_norm": 1.9073227643966675, "learning_rate": 5.396884011274651e-06, "loss": 0.8429, "step": 1640 }, { "epoch": 0.4908030506953791, "grad_norm": 2.4496750831604004, "learning_rate": 5.3920539676782455e-06, "loss": 0.8965, "step": 1641 }, { "epoch": 0.49110213847764317, "grad_norm": 2.0943257808685303, "learning_rate": 5.387223555936301e-06, "loss": 0.8812, "step": 1642 }, { "epoch": 0.4914012262599073, "grad_norm": 2.1366477012634277, "learning_rate": 5.382392780584655e-06, "loss": 0.8565, "step": 1643 }, { "epoch": 0.4917003140421714, "grad_norm": 2.341965675354004, "learning_rate": 5.377561646159495e-06, "loss": 0.8701, "step": 1644 }, { "epoch": 0.49199940182443547, "grad_norm": 1.8587591648101807, "learning_rate": 5.372730157197338e-06, "loss": 0.843, "step": 1645 }, { "epoch": 0.4922984896066996, "grad_norm": 2.045159339904785, "learning_rate": 5.367898318235037e-06, "loss": 0.8261, "step": 1646 }, { "epoch": 0.49259757738896365, "grad_norm": 1.949501872062683, "learning_rate": 5.363066133809773e-06, "loss": 0.9796, "step": 1647 }, { "epoch": 0.49289666517122777, "grad_norm": 2.1350393295288086, "learning_rate": 5.3582336084590535e-06, "loss": 0.9094, "step": 1648 }, { "epoch": 0.49319575295349183, "grad_norm": 2.0783162117004395, "learning_rate": 5.3534007467207024e-06, "loss": 0.8703, "step": 1649 }, { "epoch": 0.49349484073575595, "grad_norm": 2.1457388401031494, "learning_rate": 5.348567553132862e-06, "loss": 0.9055, "step": 1650 }, { "epoch": 0.49379392851802006, "grad_norm": 2.321758508682251, "learning_rate": 5.343734032233986e-06, "loss": 0.8832, "step": 1651 }, { "epoch": 0.4940930163002841, "grad_norm": 2.4263358116149902, "learning_rate": 5.338900188562836e-06, "loss": 0.8747, "step": 1652 }, { "epoch": 0.49439210408254824, "grad_norm": 2.091339349746704, "learning_rate": 5.334066026658475e-06, "loss": 0.8756, "step": 1653 }, { "epoch": 0.4946911918648123, "grad_norm": 2.2465944290161133, "learning_rate": 5.329231551060264e-06, "loss": 0.9013, "step": 1654 }, { "epoch": 0.4949902796470764, "grad_norm": 2.156437635421753, "learning_rate": 5.324396766307863e-06, "loss": 0.9693, "step": 1655 }, { "epoch": 0.4952893674293405, "grad_norm": 2.312997817993164, "learning_rate": 5.31956167694122e-06, "loss": 0.8966, "step": 1656 }, { "epoch": 0.4955884552116046, "grad_norm": 2.382277011871338, "learning_rate": 5.314726287500565e-06, "loss": 0.9316, "step": 1657 }, { "epoch": 0.4958875429938687, "grad_norm": 2.0583364963531494, "learning_rate": 5.309890602526416e-06, "loss": 0.8375, "step": 1658 }, { "epoch": 0.4961866307761328, "grad_norm": 2.5429179668426514, "learning_rate": 5.305054626559565e-06, "loss": 0.901, "step": 1659 }, { "epoch": 0.4964857185583969, "grad_norm": 1.965752124786377, "learning_rate": 5.30021836414108e-06, "loss": 0.9035, "step": 1660 }, { "epoch": 0.49678480634066097, "grad_norm": 2.134531021118164, "learning_rate": 5.295381819812293e-06, "loss": 0.9101, "step": 1661 }, { "epoch": 0.4970838941229251, "grad_norm": 2.1776933670043945, "learning_rate": 5.290544998114805e-06, "loss": 0.9685, "step": 1662 }, { "epoch": 0.49738298190518915, "grad_norm": 2.0108261108398438, "learning_rate": 5.2857079035904764e-06, "loss": 0.8817, "step": 1663 }, { "epoch": 0.49768206968745327, "grad_norm": 2.3489437103271484, "learning_rate": 5.280870540781425e-06, "loss": 0.9377, "step": 1664 }, { "epoch": 0.4979811574697174, "grad_norm": 2.753484010696411, "learning_rate": 5.2760329142300174e-06, "loss": 0.9054, "step": 1665 }, { "epoch": 0.49828024525198145, "grad_norm": 1.9787284135818481, "learning_rate": 5.271195028478871e-06, "loss": 0.8826, "step": 1666 }, { "epoch": 0.49857933303424556, "grad_norm": 2.215973138809204, "learning_rate": 5.266356888070843e-06, "loss": 0.9201, "step": 1667 }, { "epoch": 0.4988784208165096, "grad_norm": 2.1574788093566895, "learning_rate": 5.261518497549033e-06, "loss": 0.8976, "step": 1668 }, { "epoch": 0.49917750859877374, "grad_norm": 2.4908230304718018, "learning_rate": 5.256679861456776e-06, "loss": 0.9178, "step": 1669 }, { "epoch": 0.4994765963810378, "grad_norm": 2.461660623550415, "learning_rate": 5.251840984337634e-06, "loss": 0.9111, "step": 1670 }, { "epoch": 0.4997756841633019, "grad_norm": 2.195263385772705, "learning_rate": 5.247001870735398e-06, "loss": 0.8479, "step": 1671 }, { "epoch": 0.500074771945566, "grad_norm": 2.0689258575439453, "learning_rate": 5.242162525194082e-06, "loss": 0.8146, "step": 1672 }, { "epoch": 0.5003738597278301, "grad_norm": 2.1679046154022217, "learning_rate": 5.237322952257915e-06, "loss": 0.8671, "step": 1673 }, { "epoch": 0.5006729475100942, "grad_norm": 2.3397462368011475, "learning_rate": 5.232483156471339e-06, "loss": 0.9054, "step": 1674 }, { "epoch": 0.5009720352923583, "grad_norm": 2.35732102394104, "learning_rate": 5.227643142379009e-06, "loss": 0.9131, "step": 1675 }, { "epoch": 0.5012711230746224, "grad_norm": 2.091479778289795, "learning_rate": 5.222802914525782e-06, "loss": 0.8748, "step": 1676 }, { "epoch": 0.5015702108568865, "grad_norm": 2.1426877975463867, "learning_rate": 5.217962477456718e-06, "loss": 0.8588, "step": 1677 }, { "epoch": 0.5018692986391506, "grad_norm": 2.095028877258301, "learning_rate": 5.21312183571707e-06, "loss": 0.8435, "step": 1678 }, { "epoch": 0.5021683864214147, "grad_norm": 2.5602309703826904, "learning_rate": 5.208280993852287e-06, "loss": 0.806, "step": 1679 }, { "epoch": 0.5024674742036788, "grad_norm": 2.273801803588867, "learning_rate": 5.203439956408005e-06, "loss": 0.9354, "step": 1680 }, { "epoch": 0.5027665619859428, "grad_norm": 2.1699748039245605, "learning_rate": 5.198598727930041e-06, "loss": 0.8958, "step": 1681 }, { "epoch": 0.503065649768207, "grad_norm": 2.227309226989746, "learning_rate": 5.193757312964394e-06, "loss": 0.8542, "step": 1682 }, { "epoch": 0.5033647375504711, "grad_norm": 2.489361524581909, "learning_rate": 5.188915716057238e-06, "loss": 0.9201, "step": 1683 }, { "epoch": 0.5036638253327351, "grad_norm": 2.0391042232513428, "learning_rate": 5.184073941754916e-06, "loss": 0.8983, "step": 1684 }, { "epoch": 0.5039629131149993, "grad_norm": 2.2191414833068848, "learning_rate": 5.1792319946039405e-06, "loss": 0.8596, "step": 1685 }, { "epoch": 0.5042620008972634, "grad_norm": 1.9760476350784302, "learning_rate": 5.174389879150985e-06, "loss": 0.852, "step": 1686 }, { "epoch": 0.5045610886795274, "grad_norm": 2.1896417140960693, "learning_rate": 5.169547599942877e-06, "loss": 0.9297, "step": 1687 }, { "epoch": 0.5048601764617915, "grad_norm": 2.273454427719116, "learning_rate": 5.164705161526605e-06, "loss": 0.8604, "step": 1688 }, { "epoch": 0.5051592642440557, "grad_norm": 2.1158807277679443, "learning_rate": 5.159862568449302e-06, "loss": 0.8935, "step": 1689 }, { "epoch": 0.5054583520263197, "grad_norm": 1.9302103519439697, "learning_rate": 5.155019825258251e-06, "loss": 0.8605, "step": 1690 }, { "epoch": 0.5057574398085838, "grad_norm": 2.318484306335449, "learning_rate": 5.1501769365008654e-06, "loss": 0.8656, "step": 1691 }, { "epoch": 0.506056527590848, "grad_norm": 2.084721326828003, "learning_rate": 5.14533390672471e-06, "loss": 0.9658, "step": 1692 }, { "epoch": 0.506355615373112, "grad_norm": 2.0303187370300293, "learning_rate": 5.140490740477471e-06, "loss": 0.9282, "step": 1693 }, { "epoch": 0.5066547031553761, "grad_norm": 2.1841776371002197, "learning_rate": 5.135647442306966e-06, "loss": 0.8597, "step": 1694 }, { "epoch": 0.5069537909376401, "grad_norm": 2.235685348510742, "learning_rate": 5.130804016761138e-06, "loss": 0.8081, "step": 1695 }, { "epoch": 0.5072528787199043, "grad_norm": 2.4768335819244385, "learning_rate": 5.1259604683880485e-06, "loss": 0.858, "step": 1696 }, { "epoch": 0.5075519665021684, "grad_norm": 2.0676746368408203, "learning_rate": 5.121116801735873e-06, "loss": 0.9392, "step": 1697 }, { "epoch": 0.5078510542844324, "grad_norm": 1.9838848114013672, "learning_rate": 5.1162730213529e-06, "loss": 0.8712, "step": 1698 }, { "epoch": 0.5081501420666966, "grad_norm": 2.0639069080352783, "learning_rate": 5.1114291317875244e-06, "loss": 0.937, "step": 1699 }, { "epoch": 0.5084492298489607, "grad_norm": 2.1650733947753906, "learning_rate": 5.1065851375882425e-06, "loss": 0.9256, "step": 1700 }, { "epoch": 0.5087483176312247, "grad_norm": 2.06856369972229, "learning_rate": 5.101741043303651e-06, "loss": 0.8391, "step": 1701 }, { "epoch": 0.5090474054134888, "grad_norm": 2.1647884845733643, "learning_rate": 5.096896853482437e-06, "loss": 0.9528, "step": 1702 }, { "epoch": 0.509346493195753, "grad_norm": 2.096499443054199, "learning_rate": 5.092052572673383e-06, "loss": 0.896, "step": 1703 }, { "epoch": 0.509645580978017, "grad_norm": 2.0767080783843994, "learning_rate": 5.087208205425349e-06, "loss": 0.8342, "step": 1704 }, { "epoch": 0.5099446687602811, "grad_norm": 2.0533864498138428, "learning_rate": 5.082363756287285e-06, "loss": 0.8444, "step": 1705 }, { "epoch": 0.5102437565425453, "grad_norm": 1.9491111040115356, "learning_rate": 5.077519229808211e-06, "loss": 0.8563, "step": 1706 }, { "epoch": 0.5105428443248093, "grad_norm": 2.4588849544525146, "learning_rate": 5.072674630537223e-06, "loss": 0.8779, "step": 1707 }, { "epoch": 0.5108419321070734, "grad_norm": 2.1069846153259277, "learning_rate": 5.067829963023485e-06, "loss": 0.8106, "step": 1708 }, { "epoch": 0.5111410198893375, "grad_norm": 2.1459741592407227, "learning_rate": 5.062985231816225e-06, "loss": 0.8726, "step": 1709 }, { "epoch": 0.5114401076716016, "grad_norm": 2.2034380435943604, "learning_rate": 5.0581404414647276e-06, "loss": 0.8551, "step": 1710 }, { "epoch": 0.5117391954538657, "grad_norm": 2.2226204872131348, "learning_rate": 5.053295596518337e-06, "loss": 0.8249, "step": 1711 }, { "epoch": 0.5120382832361298, "grad_norm": 2.6262190341949463, "learning_rate": 5.04845070152645e-06, "loss": 0.8132, "step": 1712 }, { "epoch": 0.5123373710183939, "grad_norm": 2.5854530334472656, "learning_rate": 5.043605761038505e-06, "loss": 0.8906, "step": 1713 }, { "epoch": 0.512636458800658, "grad_norm": 2.1126480102539062, "learning_rate": 5.038760779603989e-06, "loss": 0.8558, "step": 1714 }, { "epoch": 0.5129355465829221, "grad_norm": 2.065145254135132, "learning_rate": 5.033915761772419e-06, "loss": 0.9153, "step": 1715 }, { "epoch": 0.5132346343651862, "grad_norm": 2.262531042098999, "learning_rate": 5.029070712093357e-06, "loss": 0.8982, "step": 1716 }, { "epoch": 0.5135337221474503, "grad_norm": 2.3323934078216553, "learning_rate": 5.024225635116386e-06, "loss": 0.8511, "step": 1717 }, { "epoch": 0.5138328099297144, "grad_norm": 2.4201579093933105, "learning_rate": 5.01938053539112e-06, "loss": 0.9291, "step": 1718 }, { "epoch": 0.5141318977119784, "grad_norm": 2.267472505569458, "learning_rate": 5.014535417467191e-06, "loss": 0.8826, "step": 1719 }, { "epoch": 0.5144309854942426, "grad_norm": 2.1321566104888916, "learning_rate": 5.009690285894252e-06, "loss": 0.8752, "step": 1720 }, { "epoch": 0.5147300732765067, "grad_norm": 2.258852958679199, "learning_rate": 5.004845145221965e-06, "loss": 0.8928, "step": 1721 }, { "epoch": 0.5150291610587707, "grad_norm": 3.130423069000244, "learning_rate": 5e-06, "loss": 0.8843, "step": 1722 }, { "epoch": 0.5153282488410349, "grad_norm": 2.5138838291168213, "learning_rate": 4.995154854778036e-06, "loss": 0.9203, "step": 1723 }, { "epoch": 0.515627336623299, "grad_norm": 2.0827386379241943, "learning_rate": 4.99030971410575e-06, "loss": 0.8523, "step": 1724 }, { "epoch": 0.515926424405563, "grad_norm": 2.2196617126464844, "learning_rate": 4.9854645825328096e-06, "loss": 0.8361, "step": 1725 }, { "epoch": 0.5162255121878271, "grad_norm": 1.9171820878982544, "learning_rate": 4.980619464608881e-06, "loss": 0.837, "step": 1726 }, { "epoch": 0.5165245999700913, "grad_norm": 2.297672748565674, "learning_rate": 4.975774364883617e-06, "loss": 0.907, "step": 1727 }, { "epoch": 0.5168236877523553, "grad_norm": 2.0687761306762695, "learning_rate": 4.9709292879066464e-06, "loss": 0.88, "step": 1728 }, { "epoch": 0.5171227755346194, "grad_norm": 1.938816785812378, "learning_rate": 4.966084238227582e-06, "loss": 0.8825, "step": 1729 }, { "epoch": 0.5174218633168836, "grad_norm": 2.0830540657043457, "learning_rate": 4.961239220396014e-06, "loss": 0.8914, "step": 1730 }, { "epoch": 0.5177209510991476, "grad_norm": 2.691883087158203, "learning_rate": 4.956394238961497e-06, "loss": 0.8626, "step": 1731 }, { "epoch": 0.5180200388814117, "grad_norm": 2.45129656791687, "learning_rate": 4.951549298473552e-06, "loss": 0.8771, "step": 1732 }, { "epoch": 0.5183191266636757, "grad_norm": 2.1908161640167236, "learning_rate": 4.946704403481663e-06, "loss": 0.9023, "step": 1733 }, { "epoch": 0.5186182144459399, "grad_norm": 2.0779082775115967, "learning_rate": 4.941859558535275e-06, "loss": 0.8637, "step": 1734 }, { "epoch": 0.518917302228204, "grad_norm": 2.3001840114593506, "learning_rate": 4.937014768183778e-06, "loss": 0.9191, "step": 1735 }, { "epoch": 0.519216390010468, "grad_norm": 2.1742446422576904, "learning_rate": 4.9321700369765165e-06, "loss": 0.9017, "step": 1736 }, { "epoch": 0.5195154777927322, "grad_norm": 2.1331582069396973, "learning_rate": 4.927325369462777e-06, "loss": 0.8776, "step": 1737 }, { "epoch": 0.5198145655749963, "grad_norm": 2.370137929916382, "learning_rate": 4.92248077019179e-06, "loss": 0.8607, "step": 1738 }, { "epoch": 0.5201136533572603, "grad_norm": 2.5680811405181885, "learning_rate": 4.917636243712716e-06, "loss": 0.9237, "step": 1739 }, { "epoch": 0.5204127411395244, "grad_norm": 2.489089012145996, "learning_rate": 4.912791794574653e-06, "loss": 0.9059, "step": 1740 }, { "epoch": 0.5207118289217886, "grad_norm": 2.254448890686035, "learning_rate": 4.9079474273266195e-06, "loss": 0.9067, "step": 1741 }, { "epoch": 0.5210109167040526, "grad_norm": 2.3009908199310303, "learning_rate": 4.903103146517564e-06, "loss": 0.9341, "step": 1742 }, { "epoch": 0.5213100044863167, "grad_norm": 2.3644094467163086, "learning_rate": 4.898258956696351e-06, "loss": 0.9, "step": 1743 }, { "epoch": 0.5216090922685809, "grad_norm": 2.1572651863098145, "learning_rate": 4.893414862411759e-06, "loss": 0.8707, "step": 1744 }, { "epoch": 0.5219081800508449, "grad_norm": 2.0835447311401367, "learning_rate": 4.888570868212478e-06, "loss": 0.879, "step": 1745 }, { "epoch": 0.522207267833109, "grad_norm": 2.0927553176879883, "learning_rate": 4.883726978647101e-06, "loss": 0.9055, "step": 1746 }, { "epoch": 0.5225063556153731, "grad_norm": 2.248781681060791, "learning_rate": 4.878883198264129e-06, "loss": 0.9222, "step": 1747 }, { "epoch": 0.5228054433976372, "grad_norm": 1.914617896080017, "learning_rate": 4.874039531611954e-06, "loss": 0.8895, "step": 1748 }, { "epoch": 0.5231045311799013, "grad_norm": 2.2857649326324463, "learning_rate": 4.8691959832388635e-06, "loss": 0.8723, "step": 1749 }, { "epoch": 0.5234036189621654, "grad_norm": 2.1132652759552, "learning_rate": 4.864352557693035e-06, "loss": 0.8217, "step": 1750 }, { "epoch": 0.5237027067444295, "grad_norm": 2.1504569053649902, "learning_rate": 4.859509259522531e-06, "loss": 0.8896, "step": 1751 }, { "epoch": 0.5240017945266936, "grad_norm": 2.2213175296783447, "learning_rate": 4.854666093275291e-06, "loss": 0.8398, "step": 1752 }, { "epoch": 0.5243008823089577, "grad_norm": 3.4005446434020996, "learning_rate": 4.849823063499136e-06, "loss": 0.8971, "step": 1753 }, { "epoch": 0.5245999700912217, "grad_norm": 2.093438148498535, "learning_rate": 4.844980174741752e-06, "loss": 0.9021, "step": 1754 }, { "epoch": 0.5248990578734859, "grad_norm": 2.164844274520874, "learning_rate": 4.840137431550698e-06, "loss": 0.9218, "step": 1755 }, { "epoch": 0.52519814565575, "grad_norm": 2.154075860977173, "learning_rate": 4.835294838473396e-06, "loss": 0.8783, "step": 1756 }, { "epoch": 0.525497233438014, "grad_norm": 2.497143030166626, "learning_rate": 4.8304524000571255e-06, "loss": 0.9348, "step": 1757 }, { "epoch": 0.5257963212202782, "grad_norm": 2.0018651485443115, "learning_rate": 4.825610120849018e-06, "loss": 0.8223, "step": 1758 }, { "epoch": 0.5260954090025423, "grad_norm": 1.9307399988174438, "learning_rate": 4.8207680053960594e-06, "loss": 0.8776, "step": 1759 }, { "epoch": 0.5263944967848063, "grad_norm": 2.275552272796631, "learning_rate": 4.815926058245085e-06, "loss": 0.8515, "step": 1760 }, { "epoch": 0.5266935845670704, "grad_norm": 2.260866403579712, "learning_rate": 4.811084283942764e-06, "loss": 0.8547, "step": 1761 }, { "epoch": 0.5269926723493346, "grad_norm": 2.0270347595214844, "learning_rate": 4.806242687035608e-06, "loss": 0.9457, "step": 1762 }, { "epoch": 0.5272917601315986, "grad_norm": 2.400954484939575, "learning_rate": 4.80140127206996e-06, "loss": 0.9287, "step": 1763 }, { "epoch": 0.5275908479138627, "grad_norm": 2.0988261699676514, "learning_rate": 4.796560043591996e-06, "loss": 0.8846, "step": 1764 }, { "epoch": 0.5278899356961269, "grad_norm": 2.267496109008789, "learning_rate": 4.791719006147714e-06, "loss": 0.8988, "step": 1765 }, { "epoch": 0.5281890234783909, "grad_norm": 2.0478272438049316, "learning_rate": 4.7868781642829326e-06, "loss": 0.8469, "step": 1766 }, { "epoch": 0.528488111260655, "grad_norm": 1.948096752166748, "learning_rate": 4.782037522543283e-06, "loss": 0.8964, "step": 1767 }, { "epoch": 0.528787199042919, "grad_norm": 2.2712175846099854, "learning_rate": 4.777197085474219e-06, "loss": 0.8701, "step": 1768 }, { "epoch": 0.5290862868251832, "grad_norm": 2.0704567432403564, "learning_rate": 4.772356857620992e-06, "loss": 0.9606, "step": 1769 }, { "epoch": 0.5293853746074473, "grad_norm": 2.007783889770508, "learning_rate": 4.767516843528664e-06, "loss": 0.901, "step": 1770 }, { "epoch": 0.5296844623897113, "grad_norm": 2.0680744647979736, "learning_rate": 4.762677047742088e-06, "loss": 0.9192, "step": 1771 }, { "epoch": 0.5299835501719755, "grad_norm": 2.1483588218688965, "learning_rate": 4.757837474805918e-06, "loss": 0.9588, "step": 1772 }, { "epoch": 0.5302826379542396, "grad_norm": 2.1364643573760986, "learning_rate": 4.7529981292646025e-06, "loss": 0.9023, "step": 1773 }, { "epoch": 0.5305817257365036, "grad_norm": 1.96449613571167, "learning_rate": 4.748159015662367e-06, "loss": 0.8863, "step": 1774 }, { "epoch": 0.5308808135187678, "grad_norm": 2.361649513244629, "learning_rate": 4.743320138543225e-06, "loss": 0.8413, "step": 1775 }, { "epoch": 0.5311799013010319, "grad_norm": 2.3085691928863525, "learning_rate": 4.738481502450967e-06, "loss": 0.8751, "step": 1776 }, { "epoch": 0.5314789890832959, "grad_norm": 2.0595881938934326, "learning_rate": 4.733643111929159e-06, "loss": 0.8787, "step": 1777 }, { "epoch": 0.53177807686556, "grad_norm": 2.1065218448638916, "learning_rate": 4.728804971521132e-06, "loss": 0.9186, "step": 1778 }, { "epoch": 0.5320771646478242, "grad_norm": 2.1742680072784424, "learning_rate": 4.723967085769985e-06, "loss": 0.9372, "step": 1779 }, { "epoch": 0.5323762524300882, "grad_norm": 2.3339638710021973, "learning_rate": 4.719129459218575e-06, "loss": 0.8704, "step": 1780 }, { "epoch": 0.5326753402123523, "grad_norm": 2.573352575302124, "learning_rate": 4.714292096409524e-06, "loss": 0.9243, "step": 1781 }, { "epoch": 0.5329744279946165, "grad_norm": 2.1012120246887207, "learning_rate": 4.709455001885196e-06, "loss": 0.9613, "step": 1782 }, { "epoch": 0.5332735157768805, "grad_norm": 2.2039730548858643, "learning_rate": 4.704618180187709e-06, "loss": 0.8925, "step": 1783 }, { "epoch": 0.5335726035591446, "grad_norm": 2.1439778804779053, "learning_rate": 4.699781635858923e-06, "loss": 0.9132, "step": 1784 }, { "epoch": 0.5338716913414087, "grad_norm": 2.0503296852111816, "learning_rate": 4.694945373440435e-06, "loss": 0.9406, "step": 1785 }, { "epoch": 0.5341707791236728, "grad_norm": 2.287712812423706, "learning_rate": 4.690109397473586e-06, "loss": 0.9211, "step": 1786 }, { "epoch": 0.5344698669059369, "grad_norm": 2.1375670433044434, "learning_rate": 4.685273712499436e-06, "loss": 0.913, "step": 1787 }, { "epoch": 0.534768954688201, "grad_norm": 1.86098051071167, "learning_rate": 4.680438323058783e-06, "loss": 0.904, "step": 1788 }, { "epoch": 0.5350680424704651, "grad_norm": 2.2652604579925537, "learning_rate": 4.675603233692137e-06, "loss": 0.917, "step": 1789 }, { "epoch": 0.5353671302527292, "grad_norm": 2.0635061264038086, "learning_rate": 4.670768448939737e-06, "loss": 0.8304, "step": 1790 }, { "epoch": 0.5356662180349933, "grad_norm": 2.3007218837738037, "learning_rate": 4.665933973341527e-06, "loss": 0.8505, "step": 1791 }, { "epoch": 0.5359653058172573, "grad_norm": 2.1822798252105713, "learning_rate": 4.661099811437166e-06, "loss": 0.8863, "step": 1792 }, { "epoch": 0.5362643935995215, "grad_norm": 2.219691753387451, "learning_rate": 4.656265967766014e-06, "loss": 0.8738, "step": 1793 }, { "epoch": 0.5365634813817856, "grad_norm": 1.9920014142990112, "learning_rate": 4.651432446867139e-06, "loss": 0.8647, "step": 1794 }, { "epoch": 0.5368625691640496, "grad_norm": 2.401527166366577, "learning_rate": 4.646599253279299e-06, "loss": 0.8942, "step": 1795 }, { "epoch": 0.5371616569463138, "grad_norm": 2.1516709327697754, "learning_rate": 4.641766391540949e-06, "loss": 0.9231, "step": 1796 }, { "epoch": 0.5374607447285779, "grad_norm": 2.093278169631958, "learning_rate": 4.636933866190228e-06, "loss": 0.8852, "step": 1797 }, { "epoch": 0.5377598325108419, "grad_norm": 2.242121458053589, "learning_rate": 4.632101681764964e-06, "loss": 0.8894, "step": 1798 }, { "epoch": 0.538058920293106, "grad_norm": 1.9730896949768066, "learning_rate": 4.627269842802664e-06, "loss": 0.8845, "step": 1799 }, { "epoch": 0.5383580080753702, "grad_norm": 2.1890223026275635, "learning_rate": 4.622438353840506e-06, "loss": 0.9143, "step": 1800 }, { "epoch": 0.5386570958576342, "grad_norm": 2.2115583419799805, "learning_rate": 4.617607219415346e-06, "loss": 0.8674, "step": 1801 }, { "epoch": 0.5389561836398983, "grad_norm": 1.9598685503005981, "learning_rate": 4.6127764440637e-06, "loss": 0.8371, "step": 1802 }, { "epoch": 0.5392552714221625, "grad_norm": 2.2081544399261475, "learning_rate": 4.607946032321755e-06, "loss": 0.8377, "step": 1803 }, { "epoch": 0.5395543592044265, "grad_norm": 2.1784210205078125, "learning_rate": 4.603115988725351e-06, "loss": 0.8635, "step": 1804 }, { "epoch": 0.5398534469866906, "grad_norm": 1.8666536808013916, "learning_rate": 4.598286317809983e-06, "loss": 0.8995, "step": 1805 }, { "epoch": 0.5401525347689546, "grad_norm": 2.055588722229004, "learning_rate": 4.593457024110792e-06, "loss": 0.9106, "step": 1806 }, { "epoch": 0.5404516225512188, "grad_norm": 2.339383125305176, "learning_rate": 4.588628112162578e-06, "loss": 0.8594, "step": 1807 }, { "epoch": 0.5407507103334829, "grad_norm": 2.1692025661468506, "learning_rate": 4.583799586499768e-06, "loss": 0.8438, "step": 1808 }, { "epoch": 0.5410497981157469, "grad_norm": 2.8493058681488037, "learning_rate": 4.578971451656435e-06, "loss": 0.9072, "step": 1809 }, { "epoch": 0.5413488858980111, "grad_norm": 2.337204694747925, "learning_rate": 4.574143712166279e-06, "loss": 0.8903, "step": 1810 }, { "epoch": 0.5416479736802752, "grad_norm": 2.079573392868042, "learning_rate": 4.569316372562634e-06, "loss": 0.9085, "step": 1811 }, { "epoch": 0.5419470614625392, "grad_norm": 2.478429079055786, "learning_rate": 4.564489437378457e-06, "loss": 0.8831, "step": 1812 }, { "epoch": 0.5422461492448033, "grad_norm": 2.5372555255889893, "learning_rate": 4.559662911146324e-06, "loss": 0.916, "step": 1813 }, { "epoch": 0.5425452370270675, "grad_norm": 2.009634494781494, "learning_rate": 4.554836798398425e-06, "loss": 0.8383, "step": 1814 }, { "epoch": 0.5428443248093315, "grad_norm": 2.2215487957000732, "learning_rate": 4.550011103666568e-06, "loss": 0.8916, "step": 1815 }, { "epoch": 0.5431434125915956, "grad_norm": 2.1458535194396973, "learning_rate": 4.545185831482166e-06, "loss": 0.9218, "step": 1816 }, { "epoch": 0.5434425003738598, "grad_norm": 2.210353374481201, "learning_rate": 4.5403609863762295e-06, "loss": 0.8709, "step": 1817 }, { "epoch": 0.5437415881561238, "grad_norm": 2.183340311050415, "learning_rate": 4.535536572879376e-06, "loss": 0.8868, "step": 1818 }, { "epoch": 0.5440406759383879, "grad_norm": 2.260093927383423, "learning_rate": 4.53071259552181e-06, "loss": 0.9164, "step": 1819 }, { "epoch": 0.544339763720652, "grad_norm": 1.996625304222107, "learning_rate": 4.525889058833337e-06, "loss": 0.891, "step": 1820 }, { "epoch": 0.5446388515029161, "grad_norm": 2.443636178970337, "learning_rate": 4.5210659673433386e-06, "loss": 0.8934, "step": 1821 }, { "epoch": 0.5449379392851802, "grad_norm": 2.18399715423584, "learning_rate": 4.516243325580782e-06, "loss": 0.8174, "step": 1822 }, { "epoch": 0.5452370270674443, "grad_norm": 2.337775230407715, "learning_rate": 4.511421138074213e-06, "loss": 0.8755, "step": 1823 }, { "epoch": 0.5455361148497084, "grad_norm": 2.635383367538452, "learning_rate": 4.50659940935175e-06, "loss": 0.9689, "step": 1824 }, { "epoch": 0.5458352026319725, "grad_norm": 2.012200117111206, "learning_rate": 4.5017781439410806e-06, "loss": 0.9111, "step": 1825 }, { "epoch": 0.5461342904142366, "grad_norm": 2.0822315216064453, "learning_rate": 4.496957346369458e-06, "loss": 0.8937, "step": 1826 }, { "epoch": 0.5464333781965006, "grad_norm": 2.2513535022735596, "learning_rate": 4.492137021163694e-06, "loss": 0.9096, "step": 1827 }, { "epoch": 0.5467324659787648, "grad_norm": 2.1024158000946045, "learning_rate": 4.4873171728501604e-06, "loss": 0.9149, "step": 1828 }, { "epoch": 0.5470315537610289, "grad_norm": 2.2702372074127197, "learning_rate": 4.482497805954779e-06, "loss": 0.8742, "step": 1829 }, { "epoch": 0.5473306415432929, "grad_norm": 2.2831454277038574, "learning_rate": 4.477678925003018e-06, "loss": 0.8678, "step": 1830 }, { "epoch": 0.5476297293255571, "grad_norm": 2.359964370727539, "learning_rate": 4.472860534519893e-06, "loss": 0.8856, "step": 1831 }, { "epoch": 0.5479288171078212, "grad_norm": 2.3626275062561035, "learning_rate": 4.468042639029952e-06, "loss": 0.931, "step": 1832 }, { "epoch": 0.5482279048900852, "grad_norm": 4.257081985473633, "learning_rate": 4.463225243057292e-06, "loss": 0.8964, "step": 1833 }, { "epoch": 0.5485269926723494, "grad_norm": 2.176769733428955, "learning_rate": 4.458408351125525e-06, "loss": 0.8886, "step": 1834 }, { "epoch": 0.5488260804546135, "grad_norm": 2.2299962043762207, "learning_rate": 4.453591967757801e-06, "loss": 0.9231, "step": 1835 }, { "epoch": 0.5491251682368775, "grad_norm": 2.0839271545410156, "learning_rate": 4.4487760974767835e-06, "loss": 0.8444, "step": 1836 }, { "epoch": 0.5494242560191416, "grad_norm": 2.201155424118042, "learning_rate": 4.4439607448046636e-06, "loss": 0.8662, "step": 1837 }, { "epoch": 0.5497233438014058, "grad_norm": 2.2098374366760254, "learning_rate": 4.43914591426314e-06, "loss": 0.8436, "step": 1838 }, { "epoch": 0.5500224315836698, "grad_norm": 2.0816283226013184, "learning_rate": 4.434331610373424e-06, "loss": 0.8502, "step": 1839 }, { "epoch": 0.5503215193659339, "grad_norm": 2.1641764640808105, "learning_rate": 4.4295178376562285e-06, "loss": 0.9283, "step": 1840 }, { "epoch": 0.550620607148198, "grad_norm": 2.002650022506714, "learning_rate": 4.424704600631774e-06, "loss": 0.8712, "step": 1841 }, { "epoch": 0.5509196949304621, "grad_norm": 2.139785051345825, "learning_rate": 4.419891903819773e-06, "loss": 0.8395, "step": 1842 }, { "epoch": 0.5512187827127262, "grad_norm": 2.210139751434326, "learning_rate": 4.4150797517394295e-06, "loss": 0.8746, "step": 1843 }, { "epoch": 0.5515178704949902, "grad_norm": 2.27203369140625, "learning_rate": 4.410268148909441e-06, "loss": 0.9129, "step": 1844 }, { "epoch": 0.5518169582772544, "grad_norm": 2.1294922828674316, "learning_rate": 4.405457099847986e-06, "loss": 0.8998, "step": 1845 }, { "epoch": 0.5521160460595185, "grad_norm": 2.281787395477295, "learning_rate": 4.400646609072727e-06, "loss": 0.8969, "step": 1846 }, { "epoch": 0.5524151338417825, "grad_norm": 2.0267505645751953, "learning_rate": 4.395836681100794e-06, "loss": 0.8861, "step": 1847 }, { "epoch": 0.5527142216240467, "grad_norm": 2.3561384677886963, "learning_rate": 4.391027320448798e-06, "loss": 0.8722, "step": 1848 }, { "epoch": 0.5530133094063108, "grad_norm": 2.024650812149048, "learning_rate": 4.386218531632808e-06, "loss": 0.8702, "step": 1849 }, { "epoch": 0.5533123971885748, "grad_norm": 2.1831765174865723, "learning_rate": 4.3814103191683655e-06, "loss": 0.8455, "step": 1850 }, { "epoch": 0.5536114849708389, "grad_norm": 2.337229013442993, "learning_rate": 4.376602687570464e-06, "loss": 0.8948, "step": 1851 }, { "epoch": 0.5539105727531031, "grad_norm": 2.597687244415283, "learning_rate": 4.371795641353555e-06, "loss": 0.8989, "step": 1852 }, { "epoch": 0.5542096605353671, "grad_norm": 2.4540083408355713, "learning_rate": 4.366989185031536e-06, "loss": 0.8474, "step": 1853 }, { "epoch": 0.5545087483176312, "grad_norm": 2.1036911010742188, "learning_rate": 4.362183323117757e-06, "loss": 0.867, "step": 1854 }, { "epoch": 0.5548078360998954, "grad_norm": 2.263775587081909, "learning_rate": 4.357378060125007e-06, "loss": 0.9134, "step": 1855 }, { "epoch": 0.5551069238821594, "grad_norm": 2.0517518520355225, "learning_rate": 4.3525734005655085e-06, "loss": 0.8724, "step": 1856 }, { "epoch": 0.5554060116644235, "grad_norm": 2.48331880569458, "learning_rate": 4.347769348950922e-06, "loss": 0.9702, "step": 1857 }, { "epoch": 0.5557050994466876, "grad_norm": 2.097397565841675, "learning_rate": 4.342965909792338e-06, "loss": 0.8871, "step": 1858 }, { "epoch": 0.5560041872289517, "grad_norm": 2.226527214050293, "learning_rate": 4.338163087600271e-06, "loss": 0.9314, "step": 1859 }, { "epoch": 0.5563032750112158, "grad_norm": 2.3147385120391846, "learning_rate": 4.33336088688465e-06, "loss": 0.9051, "step": 1860 }, { "epoch": 0.5566023627934799, "grad_norm": 1.837152123451233, "learning_rate": 4.328559312154831e-06, "loss": 0.9042, "step": 1861 }, { "epoch": 0.556901450575744, "grad_norm": 2.0995495319366455, "learning_rate": 4.323758367919572e-06, "loss": 0.9572, "step": 1862 }, { "epoch": 0.5572005383580081, "grad_norm": 1.9725443124771118, "learning_rate": 4.318958058687047e-06, "loss": 0.9364, "step": 1863 }, { "epoch": 0.5574996261402722, "grad_norm": 2.2355737686157227, "learning_rate": 4.31415838896483e-06, "loss": 0.8225, "step": 1864 }, { "epoch": 0.5577987139225362, "grad_norm": 2.124415397644043, "learning_rate": 4.309359363259895e-06, "loss": 0.8785, "step": 1865 }, { "epoch": 0.5580978017048004, "grad_norm": 2.1749050617218018, "learning_rate": 4.304560986078609e-06, "loss": 0.8785, "step": 1866 }, { "epoch": 0.5583968894870645, "grad_norm": 2.02217698097229, "learning_rate": 4.299763261926734e-06, "loss": 0.9135, "step": 1867 }, { "epoch": 0.5586959772693285, "grad_norm": 2.075554370880127, "learning_rate": 4.294966195309418e-06, "loss": 0.9385, "step": 1868 }, { "epoch": 0.5589950650515927, "grad_norm": 2.51904559135437, "learning_rate": 4.2901697907311876e-06, "loss": 0.9064, "step": 1869 }, { "epoch": 0.5592941528338568, "grad_norm": 2.1687233448028564, "learning_rate": 4.285374052695953e-06, "loss": 0.8607, "step": 1870 }, { "epoch": 0.5595932406161208, "grad_norm": 2.2533366680145264, "learning_rate": 4.280578985706995e-06, "loss": 0.895, "step": 1871 }, { "epoch": 0.5598923283983849, "grad_norm": 2.0120036602020264, "learning_rate": 4.2757845942669674e-06, "loss": 0.8538, "step": 1872 }, { "epoch": 0.560191416180649, "grad_norm": 2.2549288272857666, "learning_rate": 4.270990882877885e-06, "loss": 0.8636, "step": 1873 }, { "epoch": 0.5604905039629131, "grad_norm": 2.252396583557129, "learning_rate": 4.2661978560411274e-06, "loss": 0.88, "step": 1874 }, { "epoch": 0.5607895917451772, "grad_norm": 2.7459611892700195, "learning_rate": 4.261405518257434e-06, "loss": 0.9051, "step": 1875 }, { "epoch": 0.5610886795274413, "grad_norm": 2.0326454639434814, "learning_rate": 4.25661387402689e-06, "loss": 0.8389, "step": 1876 }, { "epoch": 0.5613877673097054, "grad_norm": 2.269606590270996, "learning_rate": 4.251822927848934e-06, "loss": 0.8971, "step": 1877 }, { "epoch": 0.5616868550919695, "grad_norm": 2.537938356399536, "learning_rate": 4.24703268422235e-06, "loss": 0.87, "step": 1878 }, { "epoch": 0.5619859428742335, "grad_norm": 2.1836159229278564, "learning_rate": 4.242243147645257e-06, "loss": 0.845, "step": 1879 }, { "epoch": 0.5622850306564977, "grad_norm": 2.0076863765716553, "learning_rate": 4.237454322615118e-06, "loss": 0.8579, "step": 1880 }, { "epoch": 0.5625841184387618, "grad_norm": 2.089862108230591, "learning_rate": 4.232666213628722e-06, "loss": 0.8719, "step": 1881 }, { "epoch": 0.5628832062210258, "grad_norm": 2.086724042892456, "learning_rate": 4.227878825182186e-06, "loss": 0.8063, "step": 1882 }, { "epoch": 0.56318229400329, "grad_norm": 2.5090909004211426, "learning_rate": 4.223092161770952e-06, "loss": 0.8479, "step": 1883 }, { "epoch": 0.5634813817855541, "grad_norm": 2.0167312622070312, "learning_rate": 4.218306227889782e-06, "loss": 0.9103, "step": 1884 }, { "epoch": 0.5637804695678181, "grad_norm": 2.080106735229492, "learning_rate": 4.213521028032751e-06, "loss": 0.9081, "step": 1885 }, { "epoch": 0.5640795573500822, "grad_norm": 2.357802629470825, "learning_rate": 4.2087365666932456e-06, "loss": 0.9338, "step": 1886 }, { "epoch": 0.5643786451323464, "grad_norm": 2.353455066680908, "learning_rate": 4.203952848363957e-06, "loss": 0.8344, "step": 1887 }, { "epoch": 0.5646777329146104, "grad_norm": 2.1348564624786377, "learning_rate": 4.199169877536884e-06, "loss": 0.8681, "step": 1888 }, { "epoch": 0.5649768206968745, "grad_norm": 2.248995304107666, "learning_rate": 4.194387658703317e-06, "loss": 0.9595, "step": 1889 }, { "epoch": 0.5652759084791387, "grad_norm": 2.0024280548095703, "learning_rate": 4.189606196353844e-06, "loss": 0.8556, "step": 1890 }, { "epoch": 0.5655749962614027, "grad_norm": 2.239835739135742, "learning_rate": 4.184825494978342e-06, "loss": 0.8702, "step": 1891 }, { "epoch": 0.5658740840436668, "grad_norm": 2.216196060180664, "learning_rate": 4.180045559065974e-06, "loss": 0.8684, "step": 1892 }, { "epoch": 0.566173171825931, "grad_norm": 2.1433985233306885, "learning_rate": 4.175266393105183e-06, "loss": 0.8847, "step": 1893 }, { "epoch": 0.566472259608195, "grad_norm": 2.0503642559051514, "learning_rate": 4.1704880015836905e-06, "loss": 0.8972, "step": 1894 }, { "epoch": 0.5667713473904591, "grad_norm": 2.086357831954956, "learning_rate": 4.165710388988487e-06, "loss": 0.8812, "step": 1895 }, { "epoch": 0.5670704351727232, "grad_norm": 5.001316070556641, "learning_rate": 4.1609335598058355e-06, "loss": 0.8242, "step": 1896 }, { "epoch": 0.5673695229549873, "grad_norm": 2.1502492427825928, "learning_rate": 4.156157518521264e-06, "loss": 0.8515, "step": 1897 }, { "epoch": 0.5676686107372514, "grad_norm": 2.1116724014282227, "learning_rate": 4.151382269619558e-06, "loss": 0.8725, "step": 1898 }, { "epoch": 0.5679676985195155, "grad_norm": 2.2542366981506348, "learning_rate": 4.146607817584759e-06, "loss": 0.9963, "step": 1899 }, { "epoch": 0.5682667863017796, "grad_norm": 2.0145456790924072, "learning_rate": 4.14183416690016e-06, "loss": 0.8369, "step": 1900 }, { "epoch": 0.5685658740840437, "grad_norm": 2.4876906871795654, "learning_rate": 4.137061322048307e-06, "loss": 0.8458, "step": 1901 }, { "epoch": 0.5688649618663078, "grad_norm": 1.978898525238037, "learning_rate": 4.13228928751098e-06, "loss": 0.8302, "step": 1902 }, { "epoch": 0.5691640496485718, "grad_norm": 1.9800769090652466, "learning_rate": 4.127518067769206e-06, "loss": 0.832, "step": 1903 }, { "epoch": 0.569463137430836, "grad_norm": 2.0714869499206543, "learning_rate": 4.122747667303242e-06, "loss": 0.9122, "step": 1904 }, { "epoch": 0.5697622252131, "grad_norm": 2.211766004562378, "learning_rate": 4.11797809059258e-06, "loss": 0.918, "step": 1905 }, { "epoch": 0.5700613129953641, "grad_norm": 2.18353533744812, "learning_rate": 4.1132093421159335e-06, "loss": 0.8877, "step": 1906 }, { "epoch": 0.5703604007776283, "grad_norm": 2.0971720218658447, "learning_rate": 4.108441426351243e-06, "loss": 0.8414, "step": 1907 }, { "epoch": 0.5706594885598923, "grad_norm": 2.211693286895752, "learning_rate": 4.103674347775663e-06, "loss": 0.8165, "step": 1908 }, { "epoch": 0.5709585763421564, "grad_norm": 1.8863152265548706, "learning_rate": 4.098908110865563e-06, "loss": 0.8867, "step": 1909 }, { "epoch": 0.5712576641244205, "grad_norm": 1.9861804246902466, "learning_rate": 4.094142720096526e-06, "loss": 0.8287, "step": 1910 }, { "epoch": 0.5715567519066846, "grad_norm": 2.0787179470062256, "learning_rate": 4.089378179943336e-06, "loss": 0.7899, "step": 1911 }, { "epoch": 0.5718558396889487, "grad_norm": 2.3846776485443115, "learning_rate": 4.084614494879979e-06, "loss": 0.9112, "step": 1912 }, { "epoch": 0.5721549274712128, "grad_norm": 2.2289583683013916, "learning_rate": 4.079851669379638e-06, "loss": 0.8568, "step": 1913 }, { "epoch": 0.5724540152534769, "grad_norm": 2.0268211364746094, "learning_rate": 4.0750897079146924e-06, "loss": 0.842, "step": 1914 }, { "epoch": 0.572753103035741, "grad_norm": 2.3111181259155273, "learning_rate": 4.070328614956705e-06, "loss": 0.8863, "step": 1915 }, { "epoch": 0.5730521908180051, "grad_norm": 2.245413303375244, "learning_rate": 4.065568394976426e-06, "loss": 0.8196, "step": 1916 }, { "epoch": 0.5733512786002691, "grad_norm": 2.165268898010254, "learning_rate": 4.060809052443784e-06, "loss": 0.9181, "step": 1917 }, { "epoch": 0.5736503663825333, "grad_norm": 2.3247413635253906, "learning_rate": 4.056050591827888e-06, "loss": 0.8969, "step": 1918 }, { "epoch": 0.5739494541647974, "grad_norm": 1.9621397256851196, "learning_rate": 4.051293017597014e-06, "loss": 0.8874, "step": 1919 }, { "epoch": 0.5742485419470614, "grad_norm": 3.304976463317871, "learning_rate": 4.046536334218609e-06, "loss": 0.8769, "step": 1920 }, { "epoch": 0.5745476297293256, "grad_norm": 2.0234527587890625, "learning_rate": 4.0417805461592764e-06, "loss": 0.8709, "step": 1921 }, { "epoch": 0.5748467175115897, "grad_norm": 2.151723623275757, "learning_rate": 4.037025657884793e-06, "loss": 0.8335, "step": 1922 }, { "epoch": 0.5751458052938537, "grad_norm": 2.0981791019439697, "learning_rate": 4.032271673860077e-06, "loss": 0.9171, "step": 1923 }, { "epoch": 0.5754448930761178, "grad_norm": 2.3272647857666016, "learning_rate": 4.0275185985492025e-06, "loss": 0.8986, "step": 1924 }, { "epoch": 0.575743980858382, "grad_norm": 2.221472978591919, "learning_rate": 4.022766436415392e-06, "loss": 0.8459, "step": 1925 }, { "epoch": 0.576043068640646, "grad_norm": 2.1581995487213135, "learning_rate": 4.018015191921008e-06, "loss": 0.8733, "step": 1926 }, { "epoch": 0.5763421564229101, "grad_norm": 2.1372601985931396, "learning_rate": 4.013264869527553e-06, "loss": 0.8821, "step": 1927 }, { "epoch": 0.5766412442051743, "grad_norm": 2.214317560195923, "learning_rate": 4.008515473695663e-06, "loss": 0.8566, "step": 1928 }, { "epoch": 0.5769403319874383, "grad_norm": 2.17333722114563, "learning_rate": 4.003767008885102e-06, "loss": 0.8826, "step": 1929 }, { "epoch": 0.5772394197697024, "grad_norm": 2.098672389984131, "learning_rate": 3.999019479554764e-06, "loss": 0.8645, "step": 1930 }, { "epoch": 0.5775385075519665, "grad_norm": 2.1901638507843018, "learning_rate": 3.9942728901626605e-06, "loss": 0.8536, "step": 1931 }, { "epoch": 0.5778375953342306, "grad_norm": 2.131582021713257, "learning_rate": 3.989527245165924e-06, "loss": 0.9431, "step": 1932 }, { "epoch": 0.5781366831164947, "grad_norm": 2.3762731552124023, "learning_rate": 3.984782549020797e-06, "loss": 0.8578, "step": 1933 }, { "epoch": 0.5784357708987587, "grad_norm": 2.298271894454956, "learning_rate": 3.980038806182629e-06, "loss": 0.9347, "step": 1934 }, { "epoch": 0.5787348586810229, "grad_norm": 2.777587413787842, "learning_rate": 3.975296021105885e-06, "loss": 0.8991, "step": 1935 }, { "epoch": 0.579033946463287, "grad_norm": 1.849566102027893, "learning_rate": 3.970554198244116e-06, "loss": 0.8442, "step": 1936 }, { "epoch": 0.579333034245551, "grad_norm": 2.26509690284729, "learning_rate": 3.965813342049983e-06, "loss": 0.8311, "step": 1937 }, { "epoch": 0.5796321220278151, "grad_norm": 2.247152805328369, "learning_rate": 3.961073456975227e-06, "loss": 0.8194, "step": 1938 }, { "epoch": 0.5799312098100793, "grad_norm": 2.0943026542663574, "learning_rate": 3.956334547470686e-06, "loss": 0.8644, "step": 1939 }, { "epoch": 0.5802302975923433, "grad_norm": 2.2379202842712402, "learning_rate": 3.95159661798628e-06, "loss": 0.8326, "step": 1940 }, { "epoch": 0.5805293853746074, "grad_norm": 2.2189743518829346, "learning_rate": 3.946859672971006e-06, "loss": 0.8987, "step": 1941 }, { "epoch": 0.5808284731568716, "grad_norm": 1.9391677379608154, "learning_rate": 3.9421237168729386e-06, "loss": 0.8633, "step": 1942 }, { "epoch": 0.5811275609391356, "grad_norm": 2.2374305725097656, "learning_rate": 3.937388754139223e-06, "loss": 0.8742, "step": 1943 }, { "epoch": 0.5814266487213997, "grad_norm": 1.9644733667373657, "learning_rate": 3.9326547892160746e-06, "loss": 0.8552, "step": 1944 }, { "epoch": 0.5817257365036638, "grad_norm": 2.043062925338745, "learning_rate": 3.927921826548767e-06, "loss": 0.8408, "step": 1945 }, { "epoch": 0.5820248242859279, "grad_norm": 2.1690549850463867, "learning_rate": 3.923189870581636e-06, "loss": 0.8966, "step": 1946 }, { "epoch": 0.582323912068192, "grad_norm": 2.093601703643799, "learning_rate": 3.918458925758068e-06, "loss": 0.8711, "step": 1947 }, { "epoch": 0.5826229998504561, "grad_norm": 2.1196494102478027, "learning_rate": 3.9137289965205086e-06, "loss": 0.8697, "step": 1948 }, { "epoch": 0.5829220876327202, "grad_norm": 2.245084524154663, "learning_rate": 3.909000087310441e-06, "loss": 0.8971, "step": 1949 }, { "epoch": 0.5832211754149843, "grad_norm": 2.1915507316589355, "learning_rate": 3.9042722025683945e-06, "loss": 0.8953, "step": 1950 }, { "epoch": 0.5835202631972484, "grad_norm": 2.5376031398773193, "learning_rate": 3.899545346733933e-06, "loss": 0.8811, "step": 1951 }, { "epoch": 0.5838193509795125, "grad_norm": 2.5270888805389404, "learning_rate": 3.894819524245661e-06, "loss": 0.9031, "step": 1952 }, { "epoch": 0.5841184387617766, "grad_norm": 2.1583974361419678, "learning_rate": 3.890094739541207e-06, "loss": 0.8337, "step": 1953 }, { "epoch": 0.5844175265440407, "grad_norm": 2.1285641193389893, "learning_rate": 3.885370997057225e-06, "loss": 0.8575, "step": 1954 }, { "epoch": 0.5847166143263047, "grad_norm": 1.9795979261398315, "learning_rate": 3.880648301229394e-06, "loss": 0.9084, "step": 1955 }, { "epoch": 0.5850157021085689, "grad_norm": 2.1139540672302246, "learning_rate": 3.875926656492406e-06, "loss": 0.8989, "step": 1956 }, { "epoch": 0.585314789890833, "grad_norm": 5.300511360168457, "learning_rate": 3.871206067279971e-06, "loss": 0.8356, "step": 1957 }, { "epoch": 0.585613877673097, "grad_norm": 1.9972556829452515, "learning_rate": 3.866486538024802e-06, "loss": 0.9266, "step": 1958 }, { "epoch": 0.5859129654553612, "grad_norm": 2.4035391807556152, "learning_rate": 3.861768073158623e-06, "loss": 0.909, "step": 1959 }, { "epoch": 0.5862120532376253, "grad_norm": 2.617302894592285, "learning_rate": 3.8570506771121484e-06, "loss": 0.9152, "step": 1960 }, { "epoch": 0.5865111410198893, "grad_norm": 3.034252405166626, "learning_rate": 3.852334354315104e-06, "loss": 0.8452, "step": 1961 }, { "epoch": 0.5868102288021534, "grad_norm": 2.104186773300171, "learning_rate": 3.847619109196195e-06, "loss": 0.9022, "step": 1962 }, { "epoch": 0.5871093165844176, "grad_norm": 2.7769033908843994, "learning_rate": 3.842904946183121e-06, "loss": 0.9437, "step": 1963 }, { "epoch": 0.5874084043666816, "grad_norm": 2.015623092651367, "learning_rate": 3.83819186970256e-06, "loss": 0.8843, "step": 1964 }, { "epoch": 0.5877074921489457, "grad_norm": 2.481797218322754, "learning_rate": 3.833479884180177e-06, "loss": 0.8777, "step": 1965 }, { "epoch": 0.5880065799312099, "grad_norm": 2.2545876502990723, "learning_rate": 3.828768994040608e-06, "loss": 0.8376, "step": 1966 }, { "epoch": 0.5883056677134739, "grad_norm": 2.3460800647735596, "learning_rate": 3.824059203707461e-06, "loss": 0.9099, "step": 1967 }, { "epoch": 0.588604755495738, "grad_norm": 2.1770646572113037, "learning_rate": 3.81935051760331e-06, "loss": 0.8297, "step": 1968 }, { "epoch": 0.588903843278002, "grad_norm": 2.089175224304199, "learning_rate": 3.8146429401496963e-06, "loss": 0.8356, "step": 1969 }, { "epoch": 0.5892029310602662, "grad_norm": 2.408475875854492, "learning_rate": 3.8099364757671188e-06, "loss": 0.8531, "step": 1970 }, { "epoch": 0.5895020188425303, "grad_norm": 2.1678035259246826, "learning_rate": 3.8052311288750255e-06, "loss": 0.8301, "step": 1971 }, { "epoch": 0.5898011066247943, "grad_norm": 2.0845117568969727, "learning_rate": 3.800526903891823e-06, "loss": 0.8219, "step": 1972 }, { "epoch": 0.5901001944070585, "grad_norm": 2.373549699783325, "learning_rate": 3.795823805234857e-06, "loss": 0.9357, "step": 1973 }, { "epoch": 0.5903992821893226, "grad_norm": 1.9555548429489136, "learning_rate": 3.791121837320425e-06, "loss": 0.8903, "step": 1974 }, { "epoch": 0.5906983699715866, "grad_norm": 1.9118921756744385, "learning_rate": 3.786421004563753e-06, "loss": 0.981, "step": 1975 }, { "epoch": 0.5909974577538507, "grad_norm": 2.890841007232666, "learning_rate": 3.7817213113790088e-06, "loss": 0.8438, "step": 1976 }, { "epoch": 0.5912965455361149, "grad_norm": 2.2857666015625, "learning_rate": 3.7770227621792815e-06, "loss": 0.9148, "step": 1977 }, { "epoch": 0.5915956333183789, "grad_norm": 2.1184494495391846, "learning_rate": 3.7723253613765954e-06, "loss": 0.8722, "step": 1978 }, { "epoch": 0.591894721100643, "grad_norm": 2.13742733001709, "learning_rate": 3.767629113381891e-06, "loss": 0.9501, "step": 1979 }, { "epoch": 0.5921938088829072, "grad_norm": 2.1198818683624268, "learning_rate": 3.762934022605027e-06, "loss": 0.8365, "step": 1980 }, { "epoch": 0.5924928966651712, "grad_norm": 2.192431926727295, "learning_rate": 3.758240093454775e-06, "loss": 0.8695, "step": 1981 }, { "epoch": 0.5927919844474353, "grad_norm": 1.8918261528015137, "learning_rate": 3.7535473303388175e-06, "loss": 0.8621, "step": 1982 }, { "epoch": 0.5930910722296994, "grad_norm": 2.61814546585083, "learning_rate": 3.7488557376637436e-06, "loss": 0.862, "step": 1983 }, { "epoch": 0.5933901600119635, "grad_norm": 2.802997589111328, "learning_rate": 3.744165319835037e-06, "loss": 0.9086, "step": 1984 }, { "epoch": 0.5936892477942276, "grad_norm": 2.055457353591919, "learning_rate": 3.739476081257085e-06, "loss": 0.825, "step": 1985 }, { "epoch": 0.5939883355764917, "grad_norm": 2.1942102909088135, "learning_rate": 3.7347880263331603e-06, "loss": 0.8657, "step": 1986 }, { "epoch": 0.5942874233587558, "grad_norm": 1.933117151260376, "learning_rate": 3.730101159465435e-06, "loss": 0.853, "step": 1987 }, { "epoch": 0.5945865111410199, "grad_norm": 2.233274459838867, "learning_rate": 3.725415485054955e-06, "loss": 0.847, "step": 1988 }, { "epoch": 0.594885598923284, "grad_norm": 2.3443949222564697, "learning_rate": 3.7207310075016533e-06, "loss": 0.836, "step": 1989 }, { "epoch": 0.595184686705548, "grad_norm": 2.0376861095428467, "learning_rate": 3.716047731204332e-06, "loss": 0.8533, "step": 1990 }, { "epoch": 0.5954837744878122, "grad_norm": 2.4184892177581787, "learning_rate": 3.711365660560674e-06, "loss": 0.8925, "step": 1991 }, { "epoch": 0.5957828622700763, "grad_norm": 2.433119297027588, "learning_rate": 3.706684799967224e-06, "loss": 0.8818, "step": 1992 }, { "epoch": 0.5960819500523403, "grad_norm": 2.318798065185547, "learning_rate": 3.702005153819391e-06, "loss": 0.9054, "step": 1993 }, { "epoch": 0.5963810378346045, "grad_norm": 2.084498167037964, "learning_rate": 3.6973267265114456e-06, "loss": 0.8918, "step": 1994 }, { "epoch": 0.5966801256168686, "grad_norm": 2.2021448612213135, "learning_rate": 3.6926495224365124e-06, "loss": 0.8702, "step": 1995 }, { "epoch": 0.5969792133991326, "grad_norm": 2.815838575363159, "learning_rate": 3.6879735459865708e-06, "loss": 0.9247, "step": 1996 }, { "epoch": 0.5972783011813967, "grad_norm": 2.327505350112915, "learning_rate": 3.68329880155244e-06, "loss": 0.8884, "step": 1997 }, { "epoch": 0.5975773889636609, "grad_norm": 2.029386281967163, "learning_rate": 3.6786252935237886e-06, "loss": 0.9214, "step": 1998 }, { "epoch": 0.5978764767459249, "grad_norm": 2.1360924243927, "learning_rate": 3.6739530262891245e-06, "loss": 0.8984, "step": 1999 }, { "epoch": 0.598175564528189, "grad_norm": 2.2430031299591064, "learning_rate": 3.669282004235787e-06, "loss": 0.9155, "step": 2000 }, { "epoch": 0.5984746523104532, "grad_norm": 2.069047689437866, "learning_rate": 3.6646122317499465e-06, "loss": 0.9004, "step": 2001 }, { "epoch": 0.5987737400927172, "grad_norm": 2.0386481285095215, "learning_rate": 3.6599437132166036e-06, "loss": 0.9003, "step": 2002 }, { "epoch": 0.5990728278749813, "grad_norm": 2.2522926330566406, "learning_rate": 3.655276453019575e-06, "loss": 0.8335, "step": 2003 }, { "epoch": 0.5993719156572453, "grad_norm": 2.183220148086548, "learning_rate": 3.650610455541504e-06, "loss": 0.9037, "step": 2004 }, { "epoch": 0.5996710034395095, "grad_norm": 2.1519722938537598, "learning_rate": 3.6459457251638423e-06, "loss": 0.9624, "step": 2005 }, { "epoch": 0.5999700912217736, "grad_norm": 2.3248963356018066, "learning_rate": 3.641282266266853e-06, "loss": 0.8812, "step": 2006 }, { "epoch": 0.6002691790040376, "grad_norm": 2.4997427463531494, "learning_rate": 3.636620083229604e-06, "loss": 0.9136, "step": 2007 }, { "epoch": 0.6005682667863018, "grad_norm": 2.2200355529785156, "learning_rate": 3.6319591804299703e-06, "loss": 0.927, "step": 2008 }, { "epoch": 0.6008673545685659, "grad_norm": 2.1341354846954346, "learning_rate": 3.6272995622446204e-06, "loss": 0.847, "step": 2009 }, { "epoch": 0.6011664423508299, "grad_norm": 2.342993974685669, "learning_rate": 3.622641233049016e-06, "loss": 0.9647, "step": 2010 }, { "epoch": 0.6014655301330941, "grad_norm": 2.2252047061920166, "learning_rate": 3.617984197217409e-06, "loss": 0.8996, "step": 2011 }, { "epoch": 0.6017646179153582, "grad_norm": 2.1814093589782715, "learning_rate": 3.6133284591228403e-06, "loss": 0.8613, "step": 2012 }, { "epoch": 0.6020637056976222, "grad_norm": 2.379979133605957, "learning_rate": 3.608674023137129e-06, "loss": 0.9226, "step": 2013 }, { "epoch": 0.6023627934798863, "grad_norm": 2.2555723190307617, "learning_rate": 3.6040208936308697e-06, "loss": 0.9284, "step": 2014 }, { "epoch": 0.6026618812621505, "grad_norm": 2.2628698348999023, "learning_rate": 3.599369074973433e-06, "loss": 0.922, "step": 2015 }, { "epoch": 0.6029609690444145, "grad_norm": 2.1673474311828613, "learning_rate": 3.5947185715329614e-06, "loss": 0.8711, "step": 2016 }, { "epoch": 0.6032600568266786, "grad_norm": 2.2329235076904297, "learning_rate": 3.5900693876763556e-06, "loss": 0.8421, "step": 2017 }, { "epoch": 0.6035591446089428, "grad_norm": 2.047427177429199, "learning_rate": 3.585421527769283e-06, "loss": 0.8237, "step": 2018 }, { "epoch": 0.6038582323912068, "grad_norm": 2.392745018005371, "learning_rate": 3.580774996176162e-06, "loss": 0.8732, "step": 2019 }, { "epoch": 0.6041573201734709, "grad_norm": 2.2860796451568604, "learning_rate": 3.5761297972601695e-06, "loss": 0.8583, "step": 2020 }, { "epoch": 0.604456407955735, "grad_norm": 2.2142395973205566, "learning_rate": 3.5714859353832286e-06, "loss": 0.8715, "step": 2021 }, { "epoch": 0.6047554957379991, "grad_norm": 2.1415741443634033, "learning_rate": 3.5668434149060076e-06, "loss": 0.8671, "step": 2022 }, { "epoch": 0.6050545835202632, "grad_norm": 2.354175329208374, "learning_rate": 3.562202240187913e-06, "loss": 0.8467, "step": 2023 }, { "epoch": 0.6053536713025273, "grad_norm": 2.2972540855407715, "learning_rate": 3.5575624155870885e-06, "loss": 0.8817, "step": 2024 }, { "epoch": 0.6056527590847914, "grad_norm": 2.037583351135254, "learning_rate": 3.552923945460413e-06, "loss": 0.8606, "step": 2025 }, { "epoch": 0.6059518468670555, "grad_norm": 2.1058173179626465, "learning_rate": 3.548286834163491e-06, "loss": 0.9246, "step": 2026 }, { "epoch": 0.6062509346493196, "grad_norm": 2.1884920597076416, "learning_rate": 3.543651086050649e-06, "loss": 0.845, "step": 2027 }, { "epoch": 0.6065500224315836, "grad_norm": 2.0341477394104004, "learning_rate": 3.5390167054749363e-06, "loss": 0.9421, "step": 2028 }, { "epoch": 0.6068491102138478, "grad_norm": 2.354527473449707, "learning_rate": 3.5343836967881194e-06, "loss": 0.9026, "step": 2029 }, { "epoch": 0.6071481979961119, "grad_norm": 2.743260622024536, "learning_rate": 3.529752064340673e-06, "loss": 0.8825, "step": 2030 }, { "epoch": 0.6074472857783759, "grad_norm": 2.0844125747680664, "learning_rate": 3.5251218124817803e-06, "loss": 0.8824, "step": 2031 }, { "epoch": 0.6077463735606401, "grad_norm": 2.7602717876434326, "learning_rate": 3.5204929455593316e-06, "loss": 0.8743, "step": 2032 }, { "epoch": 0.6080454613429042, "grad_norm": 2.3618717193603516, "learning_rate": 3.51586546791991e-06, "loss": 0.91, "step": 2033 }, { "epoch": 0.6083445491251682, "grad_norm": 2.223449945449829, "learning_rate": 3.511239383908801e-06, "loss": 0.9256, "step": 2034 }, { "epoch": 0.6086436369074323, "grad_norm": 2.1135735511779785, "learning_rate": 3.5066146978699785e-06, "loss": 0.8728, "step": 2035 }, { "epoch": 0.6089427246896965, "grad_norm": 2.7295336723327637, "learning_rate": 3.501991414146102e-06, "loss": 0.8941, "step": 2036 }, { "epoch": 0.6092418124719605, "grad_norm": 2.3350441455841064, "learning_rate": 3.4973695370785154e-06, "loss": 0.9657, "step": 2037 }, { "epoch": 0.6095409002542246, "grad_norm": 2.2375574111938477, "learning_rate": 3.4927490710072454e-06, "loss": 0.8743, "step": 2038 }, { "epoch": 0.6098399880364888, "grad_norm": 2.2993810176849365, "learning_rate": 3.488130020270989e-06, "loss": 0.8156, "step": 2039 }, { "epoch": 0.6101390758187528, "grad_norm": 2.0180747509002686, "learning_rate": 3.4835123892071145e-06, "loss": 0.8582, "step": 2040 }, { "epoch": 0.6104381636010169, "grad_norm": 2.0405259132385254, "learning_rate": 3.4788961821516576e-06, "loss": 0.8559, "step": 2041 }, { "epoch": 0.6107372513832809, "grad_norm": 2.3782012462615967, "learning_rate": 3.4742814034393224e-06, "loss": 0.83, "step": 2042 }, { "epoch": 0.6110363391655451, "grad_norm": 2.1717348098754883, "learning_rate": 3.4696680574034613e-06, "loss": 0.8796, "step": 2043 }, { "epoch": 0.6113354269478092, "grad_norm": 2.0850863456726074, "learning_rate": 3.46505614837609e-06, "loss": 0.8726, "step": 2044 }, { "epoch": 0.6116345147300732, "grad_norm": 2.1314446926116943, "learning_rate": 3.4604456806878704e-06, "loss": 0.9222, "step": 2045 }, { "epoch": 0.6119336025123374, "grad_norm": 2.169613838195801, "learning_rate": 3.4558366586681152e-06, "loss": 0.8801, "step": 2046 }, { "epoch": 0.6122326902946015, "grad_norm": 2.5643310546875, "learning_rate": 3.451229086644774e-06, "loss": 0.8916, "step": 2047 }, { "epoch": 0.6125317780768655, "grad_norm": 2.040142774581909, "learning_rate": 3.4466229689444384e-06, "loss": 0.8496, "step": 2048 }, { "epoch": 0.6128308658591296, "grad_norm": 1.933982491493225, "learning_rate": 3.442018309892333e-06, "loss": 0.8353, "step": 2049 }, { "epoch": 0.6131299536413938, "grad_norm": 2.455533742904663, "learning_rate": 3.4374151138123135e-06, "loss": 0.8133, "step": 2050 }, { "epoch": 0.6134290414236578, "grad_norm": 2.1020865440368652, "learning_rate": 3.432813385026862e-06, "loss": 0.8666, "step": 2051 }, { "epoch": 0.6137281292059219, "grad_norm": 2.522787094116211, "learning_rate": 3.4282131278570833e-06, "loss": 0.909, "step": 2052 }, { "epoch": 0.6140272169881861, "grad_norm": 2.5017073154449463, "learning_rate": 3.423614346622698e-06, "loss": 0.8183, "step": 2053 }, { "epoch": 0.6143263047704501, "grad_norm": 3.269373655319214, "learning_rate": 3.4190170456420413e-06, "loss": 0.8343, "step": 2054 }, { "epoch": 0.6146253925527142, "grad_norm": 2.277688503265381, "learning_rate": 3.4144212292320634e-06, "loss": 0.8661, "step": 2055 }, { "epoch": 0.6149244803349783, "grad_norm": 2.149608612060547, "learning_rate": 3.409826901708312e-06, "loss": 0.8503, "step": 2056 }, { "epoch": 0.6152235681172424, "grad_norm": 2.3871357440948486, "learning_rate": 3.4052340673849426e-06, "loss": 0.9379, "step": 2057 }, { "epoch": 0.6155226558995065, "grad_norm": 1.959328055381775, "learning_rate": 3.400642730574706e-06, "loss": 0.9084, "step": 2058 }, { "epoch": 0.6158217436817706, "grad_norm": 2.3856008052825928, "learning_rate": 3.3960528955889516e-06, "loss": 0.849, "step": 2059 }, { "epoch": 0.6161208314640347, "grad_norm": 2.0209243297576904, "learning_rate": 3.391464566737611e-06, "loss": 0.9202, "step": 2060 }, { "epoch": 0.6164199192462988, "grad_norm": 2.1328628063201904, "learning_rate": 3.386877748329208e-06, "loss": 0.7689, "step": 2061 }, { "epoch": 0.6167190070285629, "grad_norm": 2.3902482986450195, "learning_rate": 3.382292444670843e-06, "loss": 0.9055, "step": 2062 }, { "epoch": 0.6170180948108269, "grad_norm": 2.0580952167510986, "learning_rate": 3.3777086600681954e-06, "loss": 0.8695, "step": 2063 }, { "epoch": 0.6173171825930911, "grad_norm": 2.4447991847991943, "learning_rate": 3.3731263988255223e-06, "loss": 0.9267, "step": 2064 }, { "epoch": 0.6176162703753552, "grad_norm": 2.2077865600585938, "learning_rate": 3.3685456652456484e-06, "loss": 0.8853, "step": 2065 }, { "epoch": 0.6179153581576192, "grad_norm": 2.214366912841797, "learning_rate": 3.3639664636299586e-06, "loss": 0.8624, "step": 2066 }, { "epoch": 0.6182144459398834, "grad_norm": 2.0661075115203857, "learning_rate": 3.3593887982784047e-06, "loss": 0.92, "step": 2067 }, { "epoch": 0.6185135337221475, "grad_norm": 2.197662115097046, "learning_rate": 3.354812673489497e-06, "loss": 0.8838, "step": 2068 }, { "epoch": 0.6188126215044115, "grad_norm": 2.399949073791504, "learning_rate": 3.3502380935602942e-06, "loss": 0.8782, "step": 2069 }, { "epoch": 0.6191117092866757, "grad_norm": 2.1273229122161865, "learning_rate": 3.3456650627864075e-06, "loss": 0.8714, "step": 2070 }, { "epoch": 0.6194107970689398, "grad_norm": 2.1092450618743896, "learning_rate": 3.341093585461992e-06, "loss": 0.901, "step": 2071 }, { "epoch": 0.6197098848512038, "grad_norm": 2.700355291366577, "learning_rate": 3.336523665879748e-06, "loss": 0.9562, "step": 2072 }, { "epoch": 0.6200089726334679, "grad_norm": 2.193138360977173, "learning_rate": 3.331955308330907e-06, "loss": 0.9029, "step": 2073 }, { "epoch": 0.620308060415732, "grad_norm": 2.4678070545196533, "learning_rate": 3.327388517105239e-06, "loss": 0.9375, "step": 2074 }, { "epoch": 0.6206071481979961, "grad_norm": 2.0483779907226562, "learning_rate": 3.3228232964910377e-06, "loss": 0.8764, "step": 2075 }, { "epoch": 0.6209062359802602, "grad_norm": 2.1261301040649414, "learning_rate": 3.3182596507751288e-06, "loss": 0.8714, "step": 2076 }, { "epoch": 0.6212053237625244, "grad_norm": 2.254021644592285, "learning_rate": 3.313697584242853e-06, "loss": 0.9339, "step": 2077 }, { "epoch": 0.6215044115447884, "grad_norm": 2.2678346633911133, "learning_rate": 3.309137101178073e-06, "loss": 0.9232, "step": 2078 }, { "epoch": 0.6218034993270525, "grad_norm": 2.2676916122436523, "learning_rate": 3.3045782058631597e-06, "loss": 0.8935, "step": 2079 }, { "epoch": 0.6221025871093165, "grad_norm": 2.700670003890991, "learning_rate": 3.3000209025789965e-06, "loss": 0.8698, "step": 2080 }, { "epoch": 0.6224016748915807, "grad_norm": 2.0257534980773926, "learning_rate": 3.295465195604972e-06, "loss": 0.8544, "step": 2081 }, { "epoch": 0.6227007626738448, "grad_norm": 2.400681734085083, "learning_rate": 3.2909110892189745e-06, "loss": 0.8842, "step": 2082 }, { "epoch": 0.6229998504561088, "grad_norm": 2.3320424556732178, "learning_rate": 3.286358587697388e-06, "loss": 0.9473, "step": 2083 }, { "epoch": 0.623298938238373, "grad_norm": 2.114976406097412, "learning_rate": 3.2818076953150917e-06, "loss": 0.9078, "step": 2084 }, { "epoch": 0.6235980260206371, "grad_norm": 2.944469928741455, "learning_rate": 3.277258416345456e-06, "loss": 0.8491, "step": 2085 }, { "epoch": 0.6238971138029011, "grad_norm": 2.2292284965515137, "learning_rate": 3.2727107550603305e-06, "loss": 0.9443, "step": 2086 }, { "epoch": 0.6241962015851652, "grad_norm": 2.538411855697632, "learning_rate": 3.26816471573005e-06, "loss": 0.858, "step": 2087 }, { "epoch": 0.6244952893674294, "grad_norm": 2.190159320831299, "learning_rate": 3.2636203026234236e-06, "loss": 0.8784, "step": 2088 }, { "epoch": 0.6247943771496934, "grad_norm": 2.168684959411621, "learning_rate": 3.2590775200077364e-06, "loss": 0.9051, "step": 2089 }, { "epoch": 0.6250934649319575, "grad_norm": 2.8205857276916504, "learning_rate": 3.25453637214874e-06, "loss": 0.8487, "step": 2090 }, { "epoch": 0.6253925527142217, "grad_norm": 2.7196335792541504, "learning_rate": 3.249996863310654e-06, "loss": 0.8452, "step": 2091 }, { "epoch": 0.6256916404964857, "grad_norm": 2.1768152713775635, "learning_rate": 3.2454589977561513e-06, "loss": 0.9214, "step": 2092 }, { "epoch": 0.6259907282787498, "grad_norm": 2.082453966140747, "learning_rate": 3.2409227797463727e-06, "loss": 0.8735, "step": 2093 }, { "epoch": 0.6262898160610139, "grad_norm": 2.403716564178467, "learning_rate": 3.236388213540904e-06, "loss": 0.9189, "step": 2094 }, { "epoch": 0.626588903843278, "grad_norm": 2.0129692554473877, "learning_rate": 3.231855303397783e-06, "loss": 0.8273, "step": 2095 }, { "epoch": 0.6268879916255421, "grad_norm": 2.1648035049438477, "learning_rate": 3.2273240535734895e-06, "loss": 0.783, "step": 2096 }, { "epoch": 0.6271870794078062, "grad_norm": 2.62131404876709, "learning_rate": 3.2227944683229484e-06, "loss": 0.8897, "step": 2097 }, { "epoch": 0.6274861671900703, "grad_norm": 2.337027072906494, "learning_rate": 3.2182665518995203e-06, "loss": 0.8964, "step": 2098 }, { "epoch": 0.6277852549723344, "grad_norm": 2.10566782951355, "learning_rate": 3.2137403085549962e-06, "loss": 0.903, "step": 2099 }, { "epoch": 0.6280843427545985, "grad_norm": 2.10101580619812, "learning_rate": 3.2092157425395996e-06, "loss": 0.9052, "step": 2100 }, { "epoch": 0.6283834305368625, "grad_norm": 2.1335198879241943, "learning_rate": 3.2046928581019744e-06, "loss": 0.8996, "step": 2101 }, { "epoch": 0.6286825183191267, "grad_norm": 2.111341953277588, "learning_rate": 3.20017165948919e-06, "loss": 0.8119, "step": 2102 }, { "epoch": 0.6289816061013908, "grad_norm": 2.061383008956909, "learning_rate": 3.195652150946732e-06, "loss": 0.8691, "step": 2103 }, { "epoch": 0.6292806938836548, "grad_norm": 2.2480685710906982, "learning_rate": 3.1911343367184977e-06, "loss": 0.9346, "step": 2104 }, { "epoch": 0.629579781665919, "grad_norm": 2.557262420654297, "learning_rate": 3.1866182210467923e-06, "loss": 0.8524, "step": 2105 }, { "epoch": 0.629878869448183, "grad_norm": 1.9662914276123047, "learning_rate": 3.1821038081723283e-06, "loss": 0.8389, "step": 2106 }, { "epoch": 0.6301779572304471, "grad_norm": 2.6402640342712402, "learning_rate": 3.1775911023342197e-06, "loss": 0.9183, "step": 2107 }, { "epoch": 0.6304770450127112, "grad_norm": 2.2866876125335693, "learning_rate": 3.1730801077699747e-06, "loss": 0.8743, "step": 2108 }, { "epoch": 0.6307761327949754, "grad_norm": 2.001377820968628, "learning_rate": 3.168570828715496e-06, "loss": 0.8531, "step": 2109 }, { "epoch": 0.6310752205772394, "grad_norm": 2.2467100620269775, "learning_rate": 3.1640632694050753e-06, "loss": 0.8912, "step": 2110 }, { "epoch": 0.6313743083595035, "grad_norm": 2.1197097301483154, "learning_rate": 3.159557434071393e-06, "loss": 0.902, "step": 2111 }, { "epoch": 0.6316733961417677, "grad_norm": 2.5869805812835693, "learning_rate": 3.155053326945503e-06, "loss": 0.9128, "step": 2112 }, { "epoch": 0.6319724839240317, "grad_norm": 2.31728458404541, "learning_rate": 3.1505509522568444e-06, "loss": 0.9047, "step": 2113 }, { "epoch": 0.6322715717062958, "grad_norm": 2.689345359802246, "learning_rate": 3.1460503142332227e-06, "loss": 0.9221, "step": 2114 }, { "epoch": 0.6325706594885598, "grad_norm": 2.044860601425171, "learning_rate": 3.1415514171008176e-06, "loss": 0.8976, "step": 2115 }, { "epoch": 0.632869747270824, "grad_norm": 3.061042547225952, "learning_rate": 3.137054265084173e-06, "loss": 0.8969, "step": 2116 }, { "epoch": 0.6331688350530881, "grad_norm": 2.0528945922851562, "learning_rate": 3.1325588624061925e-06, "loss": 0.8319, "step": 2117 }, { "epoch": 0.6334679228353521, "grad_norm": 2.152280569076538, "learning_rate": 3.128065213288136e-06, "loss": 0.875, "step": 2118 }, { "epoch": 0.6337670106176163, "grad_norm": 2.2574961185455322, "learning_rate": 3.123573321949621e-06, "loss": 0.8977, "step": 2119 }, { "epoch": 0.6340660983998804, "grad_norm": 1.969578504562378, "learning_rate": 3.119083192608614e-06, "loss": 0.9114, "step": 2120 }, { "epoch": 0.6343651861821444, "grad_norm": 2.3324341773986816, "learning_rate": 3.114594829481421e-06, "loss": 0.9038, "step": 2121 }, { "epoch": 0.6346642739644085, "grad_norm": 2.381709575653076, "learning_rate": 3.110108236782694e-06, "loss": 0.9337, "step": 2122 }, { "epoch": 0.6349633617466727, "grad_norm": 2.109720230102539, "learning_rate": 3.105623418725424e-06, "loss": 0.8829, "step": 2123 }, { "epoch": 0.6352624495289367, "grad_norm": 2.122097969055176, "learning_rate": 3.101140379520935e-06, "loss": 0.8422, "step": 2124 }, { "epoch": 0.6355615373112008, "grad_norm": 2.31754207611084, "learning_rate": 3.0966591233788757e-06, "loss": 0.9072, "step": 2125 }, { "epoch": 0.635860625093465, "grad_norm": 2.0631825923919678, "learning_rate": 3.092179654507227e-06, "loss": 0.8462, "step": 2126 }, { "epoch": 0.636159712875729, "grad_norm": 2.1121208667755127, "learning_rate": 3.0877019771122848e-06, "loss": 0.8745, "step": 2127 }, { "epoch": 0.6364588006579931, "grad_norm": 2.054725170135498, "learning_rate": 3.0832260953986716e-06, "loss": 0.8839, "step": 2128 }, { "epoch": 0.6367578884402573, "grad_norm": 2.3422980308532715, "learning_rate": 3.078752013569315e-06, "loss": 0.9247, "step": 2129 }, { "epoch": 0.6370569762225213, "grad_norm": 2.1753101348876953, "learning_rate": 3.0742797358254584e-06, "loss": 0.8886, "step": 2130 }, { "epoch": 0.6373560640047854, "grad_norm": 2.0806379318237305, "learning_rate": 3.069809266366647e-06, "loss": 0.8706, "step": 2131 }, { "epoch": 0.6376551517870495, "grad_norm": 2.2395029067993164, "learning_rate": 3.06534060939073e-06, "loss": 0.8762, "step": 2132 }, { "epoch": 0.6379542395693136, "grad_norm": 2.0198051929473877, "learning_rate": 3.060873769093858e-06, "loss": 0.8821, "step": 2133 }, { "epoch": 0.6382533273515777, "grad_norm": 2.0550272464752197, "learning_rate": 3.0564087496704676e-06, "loss": 0.9118, "step": 2134 }, { "epoch": 0.6385524151338418, "grad_norm": 2.024930477142334, "learning_rate": 3.0519455553132914e-06, "loss": 0.8612, "step": 2135 }, { "epoch": 0.6388515029161059, "grad_norm": 1.963162899017334, "learning_rate": 3.047484190213349e-06, "loss": 0.84, "step": 2136 }, { "epoch": 0.63915059069837, "grad_norm": 2.2551920413970947, "learning_rate": 3.0430246585599402e-06, "loss": 0.8415, "step": 2137 }, { "epoch": 0.639449678480634, "grad_norm": 2.8535356521606445, "learning_rate": 3.0385669645406413e-06, "loss": 0.8674, "step": 2138 }, { "epoch": 0.6397487662628981, "grad_norm": 2.354712963104248, "learning_rate": 3.034111112341307e-06, "loss": 0.8916, "step": 2139 }, { "epoch": 0.6400478540451623, "grad_norm": 2.017209529876709, "learning_rate": 3.029657106146057e-06, "loss": 0.8675, "step": 2140 }, { "epoch": 0.6403469418274264, "grad_norm": 1.9967602491378784, "learning_rate": 3.025204950137286e-06, "loss": 0.8732, "step": 2141 }, { "epoch": 0.6406460296096904, "grad_norm": 2.409188985824585, "learning_rate": 3.020754648495644e-06, "loss": 0.9167, "step": 2142 }, { "epoch": 0.6409451173919546, "grad_norm": 2.1850333213806152, "learning_rate": 3.0163062054000424e-06, "loss": 0.853, "step": 2143 }, { "epoch": 0.6412442051742187, "grad_norm": 2.5124995708465576, "learning_rate": 3.0118596250276453e-06, "loss": 0.8528, "step": 2144 }, { "epoch": 0.6415432929564827, "grad_norm": 2.196676254272461, "learning_rate": 3.0074149115538725e-06, "loss": 0.8273, "step": 2145 }, { "epoch": 0.6418423807387468, "grad_norm": 3.4819087982177734, "learning_rate": 3.0029720691523873e-06, "loss": 0.8991, "step": 2146 }, { "epoch": 0.642141468521011, "grad_norm": 2.251174211502075, "learning_rate": 2.9985311019950945e-06, "loss": 0.8858, "step": 2147 }, { "epoch": 0.642440556303275, "grad_norm": 2.12601637840271, "learning_rate": 2.9940920142521413e-06, "loss": 0.8574, "step": 2148 }, { "epoch": 0.6427396440855391, "grad_norm": 2.038717746734619, "learning_rate": 2.9896548100919087e-06, "loss": 0.9118, "step": 2149 }, { "epoch": 0.6430387318678032, "grad_norm": 2.113351583480835, "learning_rate": 2.985219493681011e-06, "loss": 0.8856, "step": 2150 }, { "epoch": 0.6433378196500673, "grad_norm": 2.7094943523406982, "learning_rate": 2.980786069184285e-06, "loss": 0.9012, "step": 2151 }, { "epoch": 0.6436369074323314, "grad_norm": 2.1588261127471924, "learning_rate": 2.976354540764793e-06, "loss": 0.86, "step": 2152 }, { "epoch": 0.6439359952145954, "grad_norm": 2.177229166030884, "learning_rate": 2.971924912583822e-06, "loss": 0.8246, "step": 2153 }, { "epoch": 0.6442350829968596, "grad_norm": 2.0308444499969482, "learning_rate": 2.9674971888008696e-06, "loss": 0.8295, "step": 2154 }, { "epoch": 0.6445341707791237, "grad_norm": 2.11539363861084, "learning_rate": 2.9630713735736428e-06, "loss": 0.8106, "step": 2155 }, { "epoch": 0.6448332585613877, "grad_norm": 2.1649842262268066, "learning_rate": 2.9586474710580627e-06, "loss": 0.8967, "step": 2156 }, { "epoch": 0.6451323463436519, "grad_norm": 2.1138525009155273, "learning_rate": 2.954225485408248e-06, "loss": 0.888, "step": 2157 }, { "epoch": 0.645431434125916, "grad_norm": 2.051419258117676, "learning_rate": 2.9498054207765237e-06, "loss": 0.9037, "step": 2158 }, { "epoch": 0.64573052190818, "grad_norm": 2.7698121070861816, "learning_rate": 2.945387281313408e-06, "loss": 0.9505, "step": 2159 }, { "epoch": 0.6460296096904441, "grad_norm": 2.169983386993408, "learning_rate": 2.940971071167608e-06, "loss": 0.8825, "step": 2160 }, { "epoch": 0.6463286974727083, "grad_norm": 2.2105801105499268, "learning_rate": 2.936556794486024e-06, "loss": 0.8793, "step": 2161 }, { "epoch": 0.6466277852549723, "grad_norm": 2.088625431060791, "learning_rate": 2.932144455413741e-06, "loss": 0.8645, "step": 2162 }, { "epoch": 0.6469268730372364, "grad_norm": 2.283196449279785, "learning_rate": 2.9277340580940215e-06, "loss": 0.9149, "step": 2163 }, { "epoch": 0.6472259608195006, "grad_norm": 2.30704665184021, "learning_rate": 2.9233256066683047e-06, "loss": 0.8801, "step": 2164 }, { "epoch": 0.6475250486017646, "grad_norm": 2.224004030227661, "learning_rate": 2.9189191052762038e-06, "loss": 0.8516, "step": 2165 }, { "epoch": 0.6478241363840287, "grad_norm": 2.2132761478424072, "learning_rate": 2.914514558055502e-06, "loss": 0.9119, "step": 2166 }, { "epoch": 0.6481232241662928, "grad_norm": 2.1937100887298584, "learning_rate": 2.9101119691421453e-06, "loss": 0.8674, "step": 2167 }, { "epoch": 0.6484223119485569, "grad_norm": 2.4667510986328125, "learning_rate": 2.905711342670242e-06, "loss": 0.8958, "step": 2168 }, { "epoch": 0.648721399730821, "grad_norm": 2.0729968547821045, "learning_rate": 2.901312682772058e-06, "loss": 0.8535, "step": 2169 }, { "epoch": 0.649020487513085, "grad_norm": 2.433744430541992, "learning_rate": 2.896915993578011e-06, "loss": 0.8714, "step": 2170 }, { "epoch": 0.6493195752953492, "grad_norm": 2.580764055252075, "learning_rate": 2.8925212792166694e-06, "loss": 0.8817, "step": 2171 }, { "epoch": 0.6496186630776133, "grad_norm": 2.1338179111480713, "learning_rate": 2.8881285438147477e-06, "loss": 0.8747, "step": 2172 }, { "epoch": 0.6499177508598774, "grad_norm": 2.182663917541504, "learning_rate": 2.8837377914971003e-06, "loss": 0.9105, "step": 2173 }, { "epoch": 0.6502168386421414, "grad_norm": 2.1944401264190674, "learning_rate": 2.8793490263867212e-06, "loss": 0.8817, "step": 2174 }, { "epoch": 0.6505159264244056, "grad_norm": 2.3527581691741943, "learning_rate": 2.8749622526047373e-06, "loss": 0.8725, "step": 2175 }, { "epoch": 0.6508150142066697, "grad_norm": 2.200590133666992, "learning_rate": 2.8705774742704063e-06, "loss": 0.9803, "step": 2176 }, { "epoch": 0.6511141019889337, "grad_norm": 2.152148723602295, "learning_rate": 2.8661946955011145e-06, "loss": 0.851, "step": 2177 }, { "epoch": 0.6514131897711979, "grad_norm": 2.568113088607788, "learning_rate": 2.8618139204123597e-06, "loss": 0.8905, "step": 2178 }, { "epoch": 0.651712277553462, "grad_norm": 2.1110610961914062, "learning_rate": 2.8574351531177747e-06, "loss": 0.9089, "step": 2179 }, { "epoch": 0.652011365335726, "grad_norm": 2.1888177394866943, "learning_rate": 2.853058397729095e-06, "loss": 0.8972, "step": 2180 }, { "epoch": 0.6523104531179902, "grad_norm": 2.3264682292938232, "learning_rate": 2.8486836583561737e-06, "loss": 0.8481, "step": 2181 }, { "epoch": 0.6526095409002542, "grad_norm": 2.327993154525757, "learning_rate": 2.8443109391069616e-06, "loss": 0.8962, "step": 2182 }, { "epoch": 0.6529086286825183, "grad_norm": 2.2710065841674805, "learning_rate": 2.8399402440875248e-06, "loss": 0.9121, "step": 2183 }, { "epoch": 0.6532077164647824, "grad_norm": 2.2205522060394287, "learning_rate": 2.835571577402021e-06, "loss": 0.914, "step": 2184 }, { "epoch": 0.6535068042470465, "grad_norm": 2.2944436073303223, "learning_rate": 2.831204943152701e-06, "loss": 0.8794, "step": 2185 }, { "epoch": 0.6538058920293106, "grad_norm": 1.9966936111450195, "learning_rate": 2.8268403454399154e-06, "loss": 0.826, "step": 2186 }, { "epoch": 0.6541049798115747, "grad_norm": 2.2246253490448, "learning_rate": 2.8224777883620926e-06, "loss": 0.8319, "step": 2187 }, { "epoch": 0.6544040675938388, "grad_norm": 2.46073055267334, "learning_rate": 2.8181172760157575e-06, "loss": 0.8771, "step": 2188 }, { "epoch": 0.6547031553761029, "grad_norm": 2.216554880142212, "learning_rate": 2.8137588124955017e-06, "loss": 0.8915, "step": 2189 }, { "epoch": 0.655002243158367, "grad_norm": 2.1172542572021484, "learning_rate": 2.8094024018940012e-06, "loss": 0.8811, "step": 2190 }, { "epoch": 0.655301330940631, "grad_norm": 2.6877548694610596, "learning_rate": 2.8050480483020003e-06, "loss": 0.8651, "step": 2191 }, { "epoch": 0.6556004187228952, "grad_norm": 1.9714378118515015, "learning_rate": 2.8006957558083147e-06, "loss": 0.8947, "step": 2192 }, { "epoch": 0.6558995065051593, "grad_norm": 2.80487322807312, "learning_rate": 2.7963455284998225e-06, "loss": 0.865, "step": 2193 }, { "epoch": 0.6561985942874233, "grad_norm": 2.0462050437927246, "learning_rate": 2.7919973704614632e-06, "loss": 0.8921, "step": 2194 }, { "epoch": 0.6564976820696875, "grad_norm": 2.1239638328552246, "learning_rate": 2.7876512857762343e-06, "loss": 0.8805, "step": 2195 }, { "epoch": 0.6567967698519516, "grad_norm": 2.3182482719421387, "learning_rate": 2.7833072785251846e-06, "loss": 0.9464, "step": 2196 }, { "epoch": 0.6570958576342156, "grad_norm": 2.3101584911346436, "learning_rate": 2.778965352787413e-06, "loss": 0.9117, "step": 2197 }, { "epoch": 0.6573949454164797, "grad_norm": 2.3579821586608887, "learning_rate": 2.774625512640064e-06, "loss": 0.833, "step": 2198 }, { "epoch": 0.6576940331987439, "grad_norm": 2.13297176361084, "learning_rate": 2.7702877621583234e-06, "loss": 0.8838, "step": 2199 }, { "epoch": 0.6579931209810079, "grad_norm": 2.3779900074005127, "learning_rate": 2.7659521054154147e-06, "loss": 0.9331, "step": 2200 }, { "epoch": 0.658292208763272, "grad_norm": 2.322476387023926, "learning_rate": 2.7616185464825963e-06, "loss": 0.8815, "step": 2201 }, { "epoch": 0.6585912965455362, "grad_norm": 2.039630889892578, "learning_rate": 2.7572870894291542e-06, "loss": 0.8952, "step": 2202 }, { "epoch": 0.6588903843278002, "grad_norm": 2.116926908493042, "learning_rate": 2.752957738322406e-06, "loss": 0.8832, "step": 2203 }, { "epoch": 0.6591894721100643, "grad_norm": 2.687983274459839, "learning_rate": 2.748630497227682e-06, "loss": 0.828, "step": 2204 }, { "epoch": 0.6594885598923284, "grad_norm": 2.552121877670288, "learning_rate": 2.744305370208342e-06, "loss": 0.9018, "step": 2205 }, { "epoch": 0.6597876476745925, "grad_norm": 2.375343084335327, "learning_rate": 2.7399823613257565e-06, "loss": 0.8526, "step": 2206 }, { "epoch": 0.6600867354568566, "grad_norm": 1.9568301439285278, "learning_rate": 2.7356614746393063e-06, "loss": 0.8414, "step": 2207 }, { "epoch": 0.6603858232391207, "grad_norm": 2.6433401107788086, "learning_rate": 2.7313427142063742e-06, "loss": 0.8396, "step": 2208 }, { "epoch": 0.6606849110213848, "grad_norm": 2.885094165802002, "learning_rate": 2.7270260840823588e-06, "loss": 0.8995, "step": 2209 }, { "epoch": 0.6609839988036489, "grad_norm": 2.268428325653076, "learning_rate": 2.72271158832065e-06, "loss": 0.9041, "step": 2210 }, { "epoch": 0.661283086585913, "grad_norm": 2.2827062606811523, "learning_rate": 2.718399230972632e-06, "loss": 0.8761, "step": 2211 }, { "epoch": 0.661582174368177, "grad_norm": 1.9970813989639282, "learning_rate": 2.714089016087683e-06, "loss": 0.8586, "step": 2212 }, { "epoch": 0.6618812621504412, "grad_norm": 2.074535369873047, "learning_rate": 2.7097809477131754e-06, "loss": 0.8821, "step": 2213 }, { "epoch": 0.6621803499327052, "grad_norm": 2.236701250076294, "learning_rate": 2.705475029894459e-06, "loss": 0.9545, "step": 2214 }, { "epoch": 0.6624794377149693, "grad_norm": 2.35321044921875, "learning_rate": 2.7011712666748636e-06, "loss": 0.878, "step": 2215 }, { "epoch": 0.6627785254972335, "grad_norm": 2.2745068073272705, "learning_rate": 2.696869662095698e-06, "loss": 0.8911, "step": 2216 }, { "epoch": 0.6630776132794975, "grad_norm": 2.94311261177063, "learning_rate": 2.6925702201962493e-06, "loss": 0.8936, "step": 2217 }, { "epoch": 0.6633767010617616, "grad_norm": 2.17956805229187, "learning_rate": 2.6882729450137636e-06, "loss": 0.8581, "step": 2218 }, { "epoch": 0.6636757888440257, "grad_norm": 2.5199880599975586, "learning_rate": 2.6839778405834593e-06, "loss": 0.8466, "step": 2219 }, { "epoch": 0.6639748766262898, "grad_norm": 2.1823675632476807, "learning_rate": 2.6796849109385147e-06, "loss": 0.9046, "step": 2220 }, { "epoch": 0.6642739644085539, "grad_norm": 1.9741711616516113, "learning_rate": 2.6753941601100662e-06, "loss": 0.8744, "step": 2221 }, { "epoch": 0.664573052190818, "grad_norm": 2.3845901489257812, "learning_rate": 2.6711055921272033e-06, "loss": 0.9287, "step": 2222 }, { "epoch": 0.6648721399730821, "grad_norm": 2.1966824531555176, "learning_rate": 2.6668192110169664e-06, "loss": 0.9372, "step": 2223 }, { "epoch": 0.6651712277553462, "grad_norm": 2.088907241821289, "learning_rate": 2.6625350208043432e-06, "loss": 0.8615, "step": 2224 }, { "epoch": 0.6654703155376103, "grad_norm": 2.072026252746582, "learning_rate": 2.658253025512263e-06, "loss": 0.8907, "step": 2225 }, { "epoch": 0.6657694033198743, "grad_norm": 2.9769232273101807, "learning_rate": 2.6539732291615937e-06, "loss": 0.9184, "step": 2226 }, { "epoch": 0.6660684911021385, "grad_norm": 2.1704623699188232, "learning_rate": 2.6496956357711402e-06, "loss": 0.8631, "step": 2227 }, { "epoch": 0.6663675788844026, "grad_norm": 2.048449993133545, "learning_rate": 2.6454202493576366e-06, "loss": 0.871, "step": 2228 }, { "epoch": 0.6666666666666666, "grad_norm": 2.2354230880737305, "learning_rate": 2.641147073935746e-06, "loss": 0.8719, "step": 2229 }, { "epoch": 0.6669657544489308, "grad_norm": 2.383943557739258, "learning_rate": 2.6368761135180544e-06, "loss": 0.8599, "step": 2230 }, { "epoch": 0.6672648422311949, "grad_norm": 2.1751673221588135, "learning_rate": 2.632607372115069e-06, "loss": 0.8531, "step": 2231 }, { "epoch": 0.6675639300134589, "grad_norm": 2.5418906211853027, "learning_rate": 2.628340853735213e-06, "loss": 0.8825, "step": 2232 }, { "epoch": 0.667863017795723, "grad_norm": 2.2111854553222656, "learning_rate": 2.624076562384823e-06, "loss": 0.852, "step": 2233 }, { "epoch": 0.6681621055779872, "grad_norm": 1.9267024993896484, "learning_rate": 2.619814502068139e-06, "loss": 0.8968, "step": 2234 }, { "epoch": 0.6684611933602512, "grad_norm": 1.9770667552947998, "learning_rate": 2.6155546767873136e-06, "loss": 0.8402, "step": 2235 }, { "epoch": 0.6687602811425153, "grad_norm": 2.175518274307251, "learning_rate": 2.611297090542399e-06, "loss": 0.8792, "step": 2236 }, { "epoch": 0.6690593689247795, "grad_norm": 2.148965835571289, "learning_rate": 2.607041747331339e-06, "loss": 0.9141, "step": 2237 }, { "epoch": 0.6693584567070435, "grad_norm": 2.1132564544677734, "learning_rate": 2.6027886511499756e-06, "loss": 0.9389, "step": 2238 }, { "epoch": 0.6696575444893076, "grad_norm": 2.1933014392852783, "learning_rate": 2.598537805992044e-06, "loss": 0.9036, "step": 2239 }, { "epoch": 0.6699566322715718, "grad_norm": 2.0660884380340576, "learning_rate": 2.5942892158491626e-06, "loss": 0.8896, "step": 2240 }, { "epoch": 0.6702557200538358, "grad_norm": 2.0717625617980957, "learning_rate": 2.590042884710828e-06, "loss": 0.8735, "step": 2241 }, { "epoch": 0.6705548078360999, "grad_norm": 2.1274282932281494, "learning_rate": 2.585798816564419e-06, "loss": 0.8666, "step": 2242 }, { "epoch": 0.670853895618364, "grad_norm": 2.314661741256714, "learning_rate": 2.5815570153951942e-06, "loss": 0.8498, "step": 2243 }, { "epoch": 0.6711529834006281, "grad_norm": 2.0810320377349854, "learning_rate": 2.5773174851862796e-06, "loss": 0.8563, "step": 2244 }, { "epoch": 0.6714520711828922, "grad_norm": 2.238508701324463, "learning_rate": 2.573080229918664e-06, "loss": 0.8797, "step": 2245 }, { "epoch": 0.6717511589651562, "grad_norm": 2.1937456130981445, "learning_rate": 2.568845253571204e-06, "loss": 0.9252, "step": 2246 }, { "epoch": 0.6720502467474204, "grad_norm": 2.0478591918945312, "learning_rate": 2.564612560120623e-06, "loss": 0.9001, "step": 2247 }, { "epoch": 0.6723493345296845, "grad_norm": 2.037818431854248, "learning_rate": 2.5603821535414874e-06, "loss": 0.8998, "step": 2248 }, { "epoch": 0.6726484223119485, "grad_norm": 2.1550304889678955, "learning_rate": 2.556154037806226e-06, "loss": 0.8604, "step": 2249 }, { "epoch": 0.6729475100942126, "grad_norm": 2.086076498031616, "learning_rate": 2.5519282168851134e-06, "loss": 0.8673, "step": 2250 }, { "epoch": 0.6732465978764768, "grad_norm": 2.365283727645874, "learning_rate": 2.547704694746269e-06, "loss": 0.8673, "step": 2251 }, { "epoch": 0.6735456856587408, "grad_norm": 2.117530107498169, "learning_rate": 2.543483475355654e-06, "loss": 0.8486, "step": 2252 }, { "epoch": 0.6738447734410049, "grad_norm": 2.4912757873535156, "learning_rate": 2.5392645626770686e-06, "loss": 0.8539, "step": 2253 }, { "epoch": 0.6741438612232691, "grad_norm": 2.6631879806518555, "learning_rate": 2.5350479606721433e-06, "loss": 0.9042, "step": 2254 }, { "epoch": 0.6744429490055331, "grad_norm": 2.2374603748321533, "learning_rate": 2.5308336733003435e-06, "loss": 0.8723, "step": 2255 }, { "epoch": 0.6747420367877972, "grad_norm": 2.157712936401367, "learning_rate": 2.5266217045189572e-06, "loss": 0.9886, "step": 2256 }, { "epoch": 0.6750411245700613, "grad_norm": 2.154038190841675, "learning_rate": 2.522412058283098e-06, "loss": 0.9787, "step": 2257 }, { "epoch": 0.6753402123523254, "grad_norm": 2.3433175086975098, "learning_rate": 2.5182047385456967e-06, "loss": 0.8482, "step": 2258 }, { "epoch": 0.6756393001345895, "grad_norm": 2.020611047744751, "learning_rate": 2.513999749257501e-06, "loss": 0.8886, "step": 2259 }, { "epoch": 0.6759383879168536, "grad_norm": 2.529794931411743, "learning_rate": 2.509797094367068e-06, "loss": 0.8455, "step": 2260 }, { "epoch": 0.6762374756991177, "grad_norm": 2.3906443119049072, "learning_rate": 2.505596777820766e-06, "loss": 0.8802, "step": 2261 }, { "epoch": 0.6765365634813818, "grad_norm": 2.1177563667297363, "learning_rate": 2.5013988035627656e-06, "loss": 0.8206, "step": 2262 }, { "epoch": 0.6768356512636459, "grad_norm": 2.165881872177124, "learning_rate": 2.4972031755350366e-06, "loss": 0.8595, "step": 2263 }, { "epoch": 0.6771347390459099, "grad_norm": 1.971887230873108, "learning_rate": 2.493009897677346e-06, "loss": 0.8189, "step": 2264 }, { "epoch": 0.6774338268281741, "grad_norm": 2.054041624069214, "learning_rate": 2.4888189739272587e-06, "loss": 0.8827, "step": 2265 }, { "epoch": 0.6777329146104382, "grad_norm": 2.1844773292541504, "learning_rate": 2.484630408220126e-06, "loss": 0.8471, "step": 2266 }, { "epoch": 0.6780320023927022, "grad_norm": 2.305363655090332, "learning_rate": 2.480444204489081e-06, "loss": 0.9007, "step": 2267 }, { "epoch": 0.6783310901749664, "grad_norm": 2.0399584770202637, "learning_rate": 2.476260366665041e-06, "loss": 0.8978, "step": 2268 }, { "epoch": 0.6786301779572305, "grad_norm": 2.2741260528564453, "learning_rate": 2.472078898676708e-06, "loss": 0.9054, "step": 2269 }, { "epoch": 0.6789292657394945, "grad_norm": 2.2694923877716064, "learning_rate": 2.467899804450553e-06, "loss": 0.9256, "step": 2270 }, { "epoch": 0.6792283535217586, "grad_norm": 2.995882272720337, "learning_rate": 2.463723087910815e-06, "loss": 0.9191, "step": 2271 }, { "epoch": 0.6795274413040228, "grad_norm": 2.3508338928222656, "learning_rate": 2.4595487529795044e-06, "loss": 0.9323, "step": 2272 }, { "epoch": 0.6798265290862868, "grad_norm": 2.2119076251983643, "learning_rate": 2.4553768035763996e-06, "loss": 0.8627, "step": 2273 }, { "epoch": 0.6801256168685509, "grad_norm": 2.3081140518188477, "learning_rate": 2.451207243619029e-06, "loss": 0.8864, "step": 2274 }, { "epoch": 0.6804247046508151, "grad_norm": 2.361081600189209, "learning_rate": 2.447040077022685e-06, "loss": 0.8573, "step": 2275 }, { "epoch": 0.6807237924330791, "grad_norm": 1.9348764419555664, "learning_rate": 2.4428753077004067e-06, "loss": 0.8358, "step": 2276 }, { "epoch": 0.6810228802153432, "grad_norm": 2.2727043628692627, "learning_rate": 2.438712939562992e-06, "loss": 0.8701, "step": 2277 }, { "epoch": 0.6813219679976072, "grad_norm": 2.310985803604126, "learning_rate": 2.434552976518971e-06, "loss": 0.832, "step": 2278 }, { "epoch": 0.6816210557798714, "grad_norm": 2.574718713760376, "learning_rate": 2.430395422474625e-06, "loss": 0.8878, "step": 2279 }, { "epoch": 0.6819201435621355, "grad_norm": 2.0244836807250977, "learning_rate": 2.426240281333969e-06, "loss": 0.8873, "step": 2280 }, { "epoch": 0.6822192313443995, "grad_norm": 2.1363983154296875, "learning_rate": 2.422087556998754e-06, "loss": 0.8704, "step": 2281 }, { "epoch": 0.6825183191266637, "grad_norm": 1.9917606115341187, "learning_rate": 2.41793725336846e-06, "loss": 0.9273, "step": 2282 }, { "epoch": 0.6828174069089278, "grad_norm": 2.1035218238830566, "learning_rate": 2.4137893743402954e-06, "loss": 0.8395, "step": 2283 }, { "epoch": 0.6831164946911918, "grad_norm": 2.639920949935913, "learning_rate": 2.409643923809191e-06, "loss": 0.9375, "step": 2284 }, { "epoch": 0.6834155824734559, "grad_norm": 2.121453285217285, "learning_rate": 2.4055009056677977e-06, "loss": 0.9179, "step": 2285 }, { "epoch": 0.6837146702557201, "grad_norm": 2.074108839035034, "learning_rate": 2.4013603238064814e-06, "loss": 0.8872, "step": 2286 }, { "epoch": 0.6840137580379841, "grad_norm": 2.0953140258789062, "learning_rate": 2.397222182113322e-06, "loss": 0.9086, "step": 2287 }, { "epoch": 0.6843128458202482, "grad_norm": 1.9929487705230713, "learning_rate": 2.393086484474108e-06, "loss": 0.9064, "step": 2288 }, { "epoch": 0.6846119336025124, "grad_norm": 2.4442834854125977, "learning_rate": 2.3889532347723266e-06, "loss": 0.8592, "step": 2289 }, { "epoch": 0.6849110213847764, "grad_norm": 1.973323106765747, "learning_rate": 2.384822436889177e-06, "loss": 0.8511, "step": 2290 }, { "epoch": 0.6852101091670405, "grad_norm": 2.3557655811309814, "learning_rate": 2.3806940947035497e-06, "loss": 0.8953, "step": 2291 }, { "epoch": 0.6855091969493046, "grad_norm": 2.1282827854156494, "learning_rate": 2.3765682120920315e-06, "loss": 0.9155, "step": 2292 }, { "epoch": 0.6858082847315687, "grad_norm": 2.113818407058716, "learning_rate": 2.3724447929288925e-06, "loss": 0.8885, "step": 2293 }, { "epoch": 0.6861073725138328, "grad_norm": 2.1710355281829834, "learning_rate": 2.368323841086102e-06, "loss": 0.8408, "step": 2294 }, { "epoch": 0.6864064602960969, "grad_norm": 2.154489755630493, "learning_rate": 2.3642053604333032e-06, "loss": 0.8874, "step": 2295 }, { "epoch": 0.686705548078361, "grad_norm": 2.1459734439849854, "learning_rate": 2.3600893548378238e-06, "loss": 0.8695, "step": 2296 }, { "epoch": 0.6870046358606251, "grad_norm": 2.0105531215667725, "learning_rate": 2.3559758281646615e-06, "loss": 0.8887, "step": 2297 }, { "epoch": 0.6873037236428892, "grad_norm": 2.0394234657287598, "learning_rate": 2.35186478427649e-06, "loss": 0.9398, "step": 2298 }, { "epoch": 0.6876028114251533, "grad_norm": 2.4991533756256104, "learning_rate": 2.3477562270336564e-06, "loss": 0.9013, "step": 2299 }, { "epoch": 0.6879018992074174, "grad_norm": 2.248499870300293, "learning_rate": 2.343650160294163e-06, "loss": 0.935, "step": 2300 }, { "epoch": 0.6882009869896815, "grad_norm": 2.7260384559631348, "learning_rate": 2.3395465879136795e-06, "loss": 0.8298, "step": 2301 }, { "epoch": 0.6885000747719455, "grad_norm": 1.9342490434646606, "learning_rate": 2.3354455137455312e-06, "loss": 0.8921, "step": 2302 }, { "epoch": 0.6887991625542097, "grad_norm": 2.3423519134521484, "learning_rate": 2.3313469416407037e-06, "loss": 0.8663, "step": 2303 }, { "epoch": 0.6890982503364738, "grad_norm": 1.9074724912643433, "learning_rate": 2.3272508754478224e-06, "loss": 0.8912, "step": 2304 }, { "epoch": 0.6893973381187378, "grad_norm": 2.1662356853485107, "learning_rate": 2.3231573190131666e-06, "loss": 0.8435, "step": 2305 }, { "epoch": 0.689696425901002, "grad_norm": 2.3417203426361084, "learning_rate": 2.3190662761806586e-06, "loss": 0.8548, "step": 2306 }, { "epoch": 0.6899955136832661, "grad_norm": 2.1350154876708984, "learning_rate": 2.3149777507918587e-06, "loss": 0.8563, "step": 2307 }, { "epoch": 0.6902946014655301, "grad_norm": 2.0379533767700195, "learning_rate": 2.310891746685963e-06, "loss": 0.8625, "step": 2308 }, { "epoch": 0.6905936892477942, "grad_norm": 9.555312156677246, "learning_rate": 2.3068082676998022e-06, "loss": 0.8915, "step": 2309 }, { "epoch": 0.6908927770300584, "grad_norm": 2.577141523361206, "learning_rate": 2.3027273176678337e-06, "loss": 0.8958, "step": 2310 }, { "epoch": 0.6911918648123224, "grad_norm": 2.220254421234131, "learning_rate": 2.298648900422141e-06, "loss": 0.9123, "step": 2311 }, { "epoch": 0.6914909525945865, "grad_norm": 2.2773566246032715, "learning_rate": 2.2945730197924303e-06, "loss": 0.9663, "step": 2312 }, { "epoch": 0.6917900403768507, "grad_norm": 2.501680850982666, "learning_rate": 2.2904996796060243e-06, "loss": 0.8462, "step": 2313 }, { "epoch": 0.6920891281591147, "grad_norm": 2.5097310543060303, "learning_rate": 2.2864288836878616e-06, "loss": 0.8844, "step": 2314 }, { "epoch": 0.6923882159413788, "grad_norm": 2.1761839389801025, "learning_rate": 2.2823606358604868e-06, "loss": 0.8821, "step": 2315 }, { "epoch": 0.6926873037236428, "grad_norm": 2.588975667953491, "learning_rate": 2.278294939944061e-06, "loss": 0.9329, "step": 2316 }, { "epoch": 0.692986391505907, "grad_norm": 2.1174376010894775, "learning_rate": 2.2742317997563407e-06, "loss": 0.9142, "step": 2317 }, { "epoch": 0.6932854792881711, "grad_norm": 2.5007643699645996, "learning_rate": 2.2701712191126895e-06, "loss": 0.8518, "step": 2318 }, { "epoch": 0.6935845670704351, "grad_norm": 2.266716957092285, "learning_rate": 2.266113201826057e-06, "loss": 0.849, "step": 2319 }, { "epoch": 0.6938836548526993, "grad_norm": 2.1000795364379883, "learning_rate": 2.2620577517069986e-06, "loss": 0.8834, "step": 2320 }, { "epoch": 0.6941827426349634, "grad_norm": 3.1675398349761963, "learning_rate": 2.2580048725636506e-06, "loss": 0.912, "step": 2321 }, { "epoch": 0.6944818304172274, "grad_norm": 2.591334581375122, "learning_rate": 2.2539545682017394e-06, "loss": 0.8957, "step": 2322 }, { "epoch": 0.6947809181994915, "grad_norm": 2.2974982261657715, "learning_rate": 2.2499068424245667e-06, "loss": 0.8916, "step": 2323 }, { "epoch": 0.6950800059817557, "grad_norm": 2.2458508014678955, "learning_rate": 2.245861699033023e-06, "loss": 0.9242, "step": 2324 }, { "epoch": 0.6953790937640197, "grad_norm": 3.0967886447906494, "learning_rate": 2.2418191418255684e-06, "loss": 0.9034, "step": 2325 }, { "epoch": 0.6956781815462838, "grad_norm": 2.3070569038391113, "learning_rate": 2.2377791745982323e-06, "loss": 0.8788, "step": 2326 }, { "epoch": 0.695977269328548, "grad_norm": 1.9950950145721436, "learning_rate": 2.2337418011446154e-06, "loss": 0.9054, "step": 2327 }, { "epoch": 0.696276357110812, "grad_norm": 2.125906467437744, "learning_rate": 2.229707025255881e-06, "loss": 0.8849, "step": 2328 }, { "epoch": 0.6965754448930761, "grad_norm": 2.147915840148926, "learning_rate": 2.225674850720759e-06, "loss": 0.8248, "step": 2329 }, { "epoch": 0.6968745326753402, "grad_norm": 2.0654754638671875, "learning_rate": 2.2216452813255273e-06, "loss": 0.9069, "step": 2330 }, { "epoch": 0.6971736204576043, "grad_norm": 2.1700148582458496, "learning_rate": 2.2176183208540236e-06, "loss": 0.8646, "step": 2331 }, { "epoch": 0.6974727082398684, "grad_norm": 1.9871684312820435, "learning_rate": 2.2135939730876344e-06, "loss": 0.8794, "step": 2332 }, { "epoch": 0.6977717960221325, "grad_norm": 2.292957067489624, "learning_rate": 2.2095722418052916e-06, "loss": 0.844, "step": 2333 }, { "epoch": 0.6980708838043966, "grad_norm": 2.0291545391082764, "learning_rate": 2.2055531307834734e-06, "loss": 0.8628, "step": 2334 }, { "epoch": 0.6983699715866607, "grad_norm": 2.2500343322753906, "learning_rate": 2.2015366437961932e-06, "loss": 0.855, "step": 2335 }, { "epoch": 0.6986690593689248, "grad_norm": 2.2295093536376953, "learning_rate": 2.197522784615004e-06, "loss": 0.8512, "step": 2336 }, { "epoch": 0.6989681471511888, "grad_norm": 3.1951372623443604, "learning_rate": 2.1935115570089897e-06, "loss": 0.9426, "step": 2337 }, { "epoch": 0.699267234933453, "grad_norm": 2.300417184829712, "learning_rate": 2.189502964744763e-06, "loss": 0.8083, "step": 2338 }, { "epoch": 0.6995663227157171, "grad_norm": 1.9576852321624756, "learning_rate": 2.1854970115864623e-06, "loss": 0.859, "step": 2339 }, { "epoch": 0.6998654104979811, "grad_norm": 2.5813400745391846, "learning_rate": 2.1814937012957476e-06, "loss": 0.9243, "step": 2340 }, { "epoch": 0.7001644982802453, "grad_norm": 2.1125824451446533, "learning_rate": 2.1774930376317976e-06, "loss": 0.8415, "step": 2341 }, { "epoch": 0.7004635860625094, "grad_norm": 2.047724962234497, "learning_rate": 2.1734950243513054e-06, "loss": 0.8572, "step": 2342 }, { "epoch": 0.7007626738447734, "grad_norm": 2.078596830368042, "learning_rate": 2.1694996652084752e-06, "loss": 0.8764, "step": 2343 }, { "epoch": 0.7010617616270375, "grad_norm": 2.3263676166534424, "learning_rate": 2.165506963955022e-06, "loss": 0.8757, "step": 2344 }, { "epoch": 0.7013608494093017, "grad_norm": 2.0914480686187744, "learning_rate": 2.1615169243401557e-06, "loss": 0.8674, "step": 2345 }, { "epoch": 0.7016599371915657, "grad_norm": 2.796513557434082, "learning_rate": 2.1575295501105987e-06, "loss": 0.8775, "step": 2346 }, { "epoch": 0.7019590249738298, "grad_norm": 2.089794635772705, "learning_rate": 2.1535448450105644e-06, "loss": 0.8479, "step": 2347 }, { "epoch": 0.702258112756094, "grad_norm": 2.137242555618286, "learning_rate": 2.1495628127817618e-06, "loss": 0.8707, "step": 2348 }, { "epoch": 0.702557200538358, "grad_norm": 2.3532001972198486, "learning_rate": 2.1455834571633836e-06, "loss": 0.896, "step": 2349 }, { "epoch": 0.7028562883206221, "grad_norm": 2.1625113487243652, "learning_rate": 2.14160678189212e-06, "loss": 0.9124, "step": 2350 }, { "epoch": 0.7031553761028861, "grad_norm": 2.121356725692749, "learning_rate": 2.1376327907021385e-06, "loss": 0.9052, "step": 2351 }, { "epoch": 0.7034544638851503, "grad_norm": 2.488501787185669, "learning_rate": 2.133661487325082e-06, "loss": 0.9012, "step": 2352 }, { "epoch": 0.7037535516674144, "grad_norm": 2.201873302459717, "learning_rate": 2.1296928754900753e-06, "loss": 0.9296, "step": 2353 }, { "epoch": 0.7040526394496784, "grad_norm": 1.9190095663070679, "learning_rate": 2.125726958923718e-06, "loss": 0.9258, "step": 2354 }, { "epoch": 0.7043517272319426, "grad_norm": 2.183237075805664, "learning_rate": 2.1217637413500735e-06, "loss": 0.8909, "step": 2355 }, { "epoch": 0.7046508150142067, "grad_norm": 1.9382081031799316, "learning_rate": 2.1178032264906704e-06, "loss": 0.8679, "step": 2356 }, { "epoch": 0.7049499027964707, "grad_norm": 2.026721239089966, "learning_rate": 2.1138454180645035e-06, "loss": 0.9091, "step": 2357 }, { "epoch": 0.7052489905787349, "grad_norm": 2.277359962463379, "learning_rate": 2.109890319788023e-06, "loss": 0.8833, "step": 2358 }, { "epoch": 0.705548078360999, "grad_norm": 1.9638880491256714, "learning_rate": 2.105937935375136e-06, "loss": 0.8269, "step": 2359 }, { "epoch": 0.705847166143263, "grad_norm": 3.33099102973938, "learning_rate": 2.1019882685372016e-06, "loss": 0.901, "step": 2360 }, { "epoch": 0.7061462539255271, "grad_norm": 2.451127767562866, "learning_rate": 2.0980413229830248e-06, "loss": 0.8676, "step": 2361 }, { "epoch": 0.7064453417077913, "grad_norm": 2.2041873931884766, "learning_rate": 2.094097102418857e-06, "loss": 0.8902, "step": 2362 }, { "epoch": 0.7067444294900553, "grad_norm": 2.067532777786255, "learning_rate": 2.09015561054839e-06, "loss": 0.8235, "step": 2363 }, { "epoch": 0.7070435172723194, "grad_norm": 2.662902593612671, "learning_rate": 2.0862168510727545e-06, "loss": 0.869, "step": 2364 }, { "epoch": 0.7073426050545836, "grad_norm": 2.18502140045166, "learning_rate": 2.0822808276905144e-06, "loss": 0.8726, "step": 2365 }, { "epoch": 0.7076416928368476, "grad_norm": 2.2000820636749268, "learning_rate": 2.0783475440976635e-06, "loss": 0.8945, "step": 2366 }, { "epoch": 0.7079407806191117, "grad_norm": 3.1673035621643066, "learning_rate": 2.0744170039876255e-06, "loss": 0.8305, "step": 2367 }, { "epoch": 0.7082398684013758, "grad_norm": 2.4609410762786865, "learning_rate": 2.0704892110512458e-06, "loss": 0.9244, "step": 2368 }, { "epoch": 0.7085389561836399, "grad_norm": 2.5117573738098145, "learning_rate": 2.0665641689767902e-06, "loss": 0.8748, "step": 2369 }, { "epoch": 0.708838043965904, "grad_norm": 2.437666654586792, "learning_rate": 2.0626418814499428e-06, "loss": 0.8567, "step": 2370 }, { "epoch": 0.7091371317481681, "grad_norm": 2.166670560836792, "learning_rate": 2.0587223521537996e-06, "loss": 0.8505, "step": 2371 }, { "epoch": 0.7094362195304322, "grad_norm": 2.1977739334106445, "learning_rate": 2.0548055847688676e-06, "loss": 0.8882, "step": 2372 }, { "epoch": 0.7097353073126963, "grad_norm": 2.1289451122283936, "learning_rate": 2.0508915829730595e-06, "loss": 0.8692, "step": 2373 }, { "epoch": 0.7100343950949604, "grad_norm": 2.536137342453003, "learning_rate": 2.046980350441694e-06, "loss": 0.9089, "step": 2374 }, { "epoch": 0.7103334828772244, "grad_norm": 2.285073757171631, "learning_rate": 2.0430718908474813e-06, "loss": 0.8918, "step": 2375 }, { "epoch": 0.7106325706594886, "grad_norm": 2.3916730880737305, "learning_rate": 2.0391662078605383e-06, "loss": 0.8289, "step": 2376 }, { "epoch": 0.7109316584417527, "grad_norm": 2.1734509468078613, "learning_rate": 2.0352633051483705e-06, "loss": 0.8738, "step": 2377 }, { "epoch": 0.7112307462240167, "grad_norm": 2.3059258460998535, "learning_rate": 2.0313631863758677e-06, "loss": 0.8738, "step": 2378 }, { "epoch": 0.7115298340062809, "grad_norm": 2.143587827682495, "learning_rate": 2.02746585520531e-06, "loss": 0.843, "step": 2379 }, { "epoch": 0.711828921788545, "grad_norm": 2.469245672225952, "learning_rate": 2.0235713152963627e-06, "loss": 0.9636, "step": 2380 }, { "epoch": 0.712128009570809, "grad_norm": 4.539871692657471, "learning_rate": 2.019679570306068e-06, "loss": 0.8437, "step": 2381 }, { "epoch": 0.7124270973530731, "grad_norm": 2.313947916030884, "learning_rate": 2.0157906238888376e-06, "loss": 0.8729, "step": 2382 }, { "epoch": 0.7127261851353373, "grad_norm": 1.9761375188827515, "learning_rate": 2.0119044796964614e-06, "loss": 0.8814, "step": 2383 }, { "epoch": 0.7130252729176013, "grad_norm": 2.395420551300049, "learning_rate": 2.008021141378102e-06, "loss": 0.8861, "step": 2384 }, { "epoch": 0.7133243606998654, "grad_norm": 2.107828378677368, "learning_rate": 2.0041406125802764e-06, "loss": 0.9371, "step": 2385 }, { "epoch": 0.7136234484821296, "grad_norm": 2.098628520965576, "learning_rate": 2.0002628969468713e-06, "loss": 0.9016, "step": 2386 }, { "epoch": 0.7139225362643936, "grad_norm": 2.481363534927368, "learning_rate": 1.9963879981191288e-06, "loss": 0.8636, "step": 2387 }, { "epoch": 0.7142216240466577, "grad_norm": 2.144247531890869, "learning_rate": 1.9925159197356475e-06, "loss": 0.8897, "step": 2388 }, { "epoch": 0.7145207118289217, "grad_norm": 2.031421661376953, "learning_rate": 1.9886466654323765e-06, "loss": 0.8641, "step": 2389 }, { "epoch": 0.7148197996111859, "grad_norm": 2.150635004043579, "learning_rate": 1.9847802388426137e-06, "loss": 0.8529, "step": 2390 }, { "epoch": 0.71511888739345, "grad_norm": 2.0397963523864746, "learning_rate": 1.9809166435970006e-06, "loss": 0.8965, "step": 2391 }, { "epoch": 0.715417975175714, "grad_norm": 2.0846383571624756, "learning_rate": 1.9770558833235215e-06, "loss": 0.8593, "step": 2392 }, { "epoch": 0.7157170629579782, "grad_norm": 2.3858819007873535, "learning_rate": 1.973197961647498e-06, "loss": 0.8695, "step": 2393 }, { "epoch": 0.7160161507402423, "grad_norm": 2.2018251419067383, "learning_rate": 1.969342882191585e-06, "loss": 0.8497, "step": 2394 }, { "epoch": 0.7163152385225063, "grad_norm": 3.125469207763672, "learning_rate": 1.9654906485757707e-06, "loss": 0.894, "step": 2395 }, { "epoch": 0.7166143263047704, "grad_norm": 3.5392098426818848, "learning_rate": 1.9616412644173697e-06, "loss": 0.8269, "step": 2396 }, { "epoch": 0.7169134140870346, "grad_norm": 2.02653169631958, "learning_rate": 1.957794733331021e-06, "loss": 0.796, "step": 2397 }, { "epoch": 0.7172125018692986, "grad_norm": 2.3991315364837646, "learning_rate": 1.9539510589286848e-06, "loss": 0.8317, "step": 2398 }, { "epoch": 0.7175115896515627, "grad_norm": 2.066763162612915, "learning_rate": 1.950110244819638e-06, "loss": 0.8709, "step": 2399 }, { "epoch": 0.7178106774338269, "grad_norm": 2.3594281673431396, "learning_rate": 1.9462722946104727e-06, "loss": 0.915, "step": 2400 }, { "epoch": 0.7181097652160909, "grad_norm": 2.204791784286499, "learning_rate": 1.942437211905092e-06, "loss": 0.9061, "step": 2401 }, { "epoch": 0.718408852998355, "grad_norm": 2.520059823989868, "learning_rate": 1.9386050003047047e-06, "loss": 0.9391, "step": 2402 }, { "epoch": 0.7187079407806191, "grad_norm": 2.038604736328125, "learning_rate": 1.9347756634078273e-06, "loss": 0.7532, "step": 2403 }, { "epoch": 0.7190070285628832, "grad_norm": 2.034946918487549, "learning_rate": 1.93094920481027e-06, "loss": 0.9495, "step": 2404 }, { "epoch": 0.7193061163451473, "grad_norm": 2.3602540493011475, "learning_rate": 1.9271256281051443e-06, "loss": 0.8822, "step": 2405 }, { "epoch": 0.7196052041274114, "grad_norm": 2.321197748184204, "learning_rate": 1.92330493688286e-06, "loss": 0.8868, "step": 2406 }, { "epoch": 0.7199042919096755, "grad_norm": 2.6159892082214355, "learning_rate": 1.9194871347311115e-06, "loss": 0.9035, "step": 2407 }, { "epoch": 0.7202033796919396, "grad_norm": 2.7899022102355957, "learning_rate": 1.91567222523488e-06, "loss": 0.8331, "step": 2408 }, { "epoch": 0.7205024674742037, "grad_norm": 2.1793365478515625, "learning_rate": 1.9118602119764325e-06, "loss": 0.8968, "step": 2409 }, { "epoch": 0.7208015552564677, "grad_norm": 2.333775281906128, "learning_rate": 1.90805109853532e-06, "loss": 0.8984, "step": 2410 }, { "epoch": 0.7211006430387319, "grad_norm": 2.538916826248169, "learning_rate": 1.9042448884883618e-06, "loss": 0.9354, "step": 2411 }, { "epoch": 0.721399730820996, "grad_norm": 2.527853012084961, "learning_rate": 1.9004415854096586e-06, "loss": 0.8713, "step": 2412 }, { "epoch": 0.72169881860326, "grad_norm": 2.1618001461029053, "learning_rate": 1.8966411928705757e-06, "loss": 0.8901, "step": 2413 }, { "epoch": 0.7219979063855242, "grad_norm": 2.152045965194702, "learning_rate": 1.8928437144397538e-06, "loss": 0.8022, "step": 2414 }, { "epoch": 0.7222969941677883, "grad_norm": 2.2687647342681885, "learning_rate": 1.8890491536830863e-06, "loss": 0.9239, "step": 2415 }, { "epoch": 0.7225960819500523, "grad_norm": 2.7253506183624268, "learning_rate": 1.8852575141637347e-06, "loss": 0.8572, "step": 2416 }, { "epoch": 0.7228951697323165, "grad_norm": 2.2509765625, "learning_rate": 1.8814687994421138e-06, "loss": 0.859, "step": 2417 }, { "epoch": 0.7231942575145806, "grad_norm": 2.038696527481079, "learning_rate": 1.8776830130758939e-06, "loss": 0.8463, "step": 2418 }, { "epoch": 0.7234933452968446, "grad_norm": 2.2514755725860596, "learning_rate": 1.873900158619994e-06, "loss": 0.861, "step": 2419 }, { "epoch": 0.7237924330791087, "grad_norm": 2.2760226726531982, "learning_rate": 1.8701202396265815e-06, "loss": 0.8813, "step": 2420 }, { "epoch": 0.7240915208613729, "grad_norm": 2.405346155166626, "learning_rate": 1.866343259645066e-06, "loss": 0.9069, "step": 2421 }, { "epoch": 0.7243906086436369, "grad_norm": 2.0037643909454346, "learning_rate": 1.8625692222220977e-06, "loss": 0.8547, "step": 2422 }, { "epoch": 0.724689696425901, "grad_norm": 2.110745668411255, "learning_rate": 1.8587981309015635e-06, "loss": 0.9143, "step": 2423 }, { "epoch": 0.7249887842081651, "grad_norm": 2.136705160140991, "learning_rate": 1.8550299892245854e-06, "loss": 0.9168, "step": 2424 }, { "epoch": 0.7252878719904292, "grad_norm": 2.01470947265625, "learning_rate": 1.851264800729513e-06, "loss": 0.9212, "step": 2425 }, { "epoch": 0.7255869597726933, "grad_norm": 2.0562658309936523, "learning_rate": 1.8475025689519256e-06, "loss": 0.864, "step": 2426 }, { "epoch": 0.7258860475549573, "grad_norm": 2.0080339908599854, "learning_rate": 1.8437432974246238e-06, "loss": 0.8265, "step": 2427 }, { "epoch": 0.7261851353372215, "grad_norm": 2.613705635070801, "learning_rate": 1.8399869896776296e-06, "loss": 0.8829, "step": 2428 }, { "epoch": 0.7264842231194856, "grad_norm": 2.3634259700775146, "learning_rate": 1.8362336492381832e-06, "loss": 0.8154, "step": 2429 }, { "epoch": 0.7267833109017496, "grad_norm": 1.9769039154052734, "learning_rate": 1.8324832796307323e-06, "loss": 0.8733, "step": 2430 }, { "epoch": 0.7270823986840138, "grad_norm": 2.4055018424987793, "learning_rate": 1.8287358843769448e-06, "loss": 0.8399, "step": 2431 }, { "epoch": 0.7273814864662779, "grad_norm": 2.247570514678955, "learning_rate": 1.8249914669956886e-06, "loss": 0.9222, "step": 2432 }, { "epoch": 0.7276805742485419, "grad_norm": 2.070514678955078, "learning_rate": 1.8212500310030385e-06, "loss": 0.8755, "step": 2433 }, { "epoch": 0.727979662030806, "grad_norm": 2.1435415744781494, "learning_rate": 1.8175115799122656e-06, "loss": 0.868, "step": 2434 }, { "epoch": 0.7282787498130702, "grad_norm": 2.191434383392334, "learning_rate": 1.8137761172338404e-06, "loss": 0.9162, "step": 2435 }, { "epoch": 0.7285778375953342, "grad_norm": 2.050172805786133, "learning_rate": 1.810043646475431e-06, "loss": 0.9256, "step": 2436 }, { "epoch": 0.7288769253775983, "grad_norm": 2.717047691345215, "learning_rate": 1.8063141711418941e-06, "loss": 0.8889, "step": 2437 }, { "epoch": 0.7291760131598625, "grad_norm": 2.1351065635681152, "learning_rate": 1.8025876947352677e-06, "loss": 0.9078, "step": 2438 }, { "epoch": 0.7294751009421265, "grad_norm": 2.059396505355835, "learning_rate": 1.7988642207547784e-06, "loss": 0.8572, "step": 2439 }, { "epoch": 0.7297741887243906, "grad_norm": 2.208294630050659, "learning_rate": 1.795143752696839e-06, "loss": 0.8301, "step": 2440 }, { "epoch": 0.7300732765066547, "grad_norm": 2.3328773975372314, "learning_rate": 1.7914262940550292e-06, "loss": 0.7654, "step": 2441 }, { "epoch": 0.7303723642889188, "grad_norm": 2.2177000045776367, "learning_rate": 1.7877118483201095e-06, "loss": 0.9303, "step": 2442 }, { "epoch": 0.7306714520711829, "grad_norm": 2.029184341430664, "learning_rate": 1.784000418980007e-06, "loss": 0.8582, "step": 2443 }, { "epoch": 0.730970539853447, "grad_norm": 2.5888640880584717, "learning_rate": 1.7802920095198246e-06, "loss": 0.8337, "step": 2444 }, { "epoch": 0.7312696276357111, "grad_norm": 2.2251977920532227, "learning_rate": 1.7765866234218187e-06, "loss": 0.89, "step": 2445 }, { "epoch": 0.7315687154179752, "grad_norm": 2.163874387741089, "learning_rate": 1.7728842641654125e-06, "loss": 0.8326, "step": 2446 }, { "epoch": 0.7318678032002393, "grad_norm": 1.8732348680496216, "learning_rate": 1.7691849352271872e-06, "loss": 0.8508, "step": 2447 }, { "epoch": 0.7321668909825033, "grad_norm": 2.052471399307251, "learning_rate": 1.7654886400808774e-06, "loss": 0.8552, "step": 2448 }, { "epoch": 0.7324659787647675, "grad_norm": 2.5560250282287598, "learning_rate": 1.7617953821973682e-06, "loss": 0.8857, "step": 2449 }, { "epoch": 0.7327650665470316, "grad_norm": 2.212775230407715, "learning_rate": 1.758105165044694e-06, "loss": 0.8464, "step": 2450 }, { "epoch": 0.7330641543292956, "grad_norm": 2.2558443546295166, "learning_rate": 1.7544179920880333e-06, "loss": 0.8361, "step": 2451 }, { "epoch": 0.7333632421115598, "grad_norm": 2.2941172122955322, "learning_rate": 1.7507338667897062e-06, "loss": 0.856, "step": 2452 }, { "epoch": 0.7336623298938239, "grad_norm": 2.3680474758148193, "learning_rate": 1.7470527926091702e-06, "loss": 0.8907, "step": 2453 }, { "epoch": 0.7339614176760879, "grad_norm": 2.255455493927002, "learning_rate": 1.7433747730030188e-06, "loss": 0.888, "step": 2454 }, { "epoch": 0.734260505458352, "grad_norm": 2.3735716342926025, "learning_rate": 1.7396998114249786e-06, "loss": 0.9284, "step": 2455 }, { "epoch": 0.7345595932406161, "grad_norm": 2.3371143341064453, "learning_rate": 1.7360279113258977e-06, "loss": 0.8572, "step": 2456 }, { "epoch": 0.7348586810228802, "grad_norm": 2.4179999828338623, "learning_rate": 1.7323590761537595e-06, "loss": 0.8937, "step": 2457 }, { "epoch": 0.7351577688051443, "grad_norm": 1.977907657623291, "learning_rate": 1.7286933093536634e-06, "loss": 0.8772, "step": 2458 }, { "epoch": 0.7354568565874084, "grad_norm": 2.903635025024414, "learning_rate": 1.7250306143678292e-06, "loss": 0.925, "step": 2459 }, { "epoch": 0.7357559443696725, "grad_norm": 2.2103970050811768, "learning_rate": 1.7213709946355879e-06, "loss": 0.9089, "step": 2460 }, { "epoch": 0.7360550321519366, "grad_norm": 2.2252800464630127, "learning_rate": 1.7177144535933903e-06, "loss": 0.9388, "step": 2461 }, { "epoch": 0.7363541199342006, "grad_norm": 2.7722010612487793, "learning_rate": 1.7140609946747915e-06, "loss": 0.8528, "step": 2462 }, { "epoch": 0.7366532077164648, "grad_norm": 2.1417887210845947, "learning_rate": 1.7104106213104554e-06, "loss": 0.8834, "step": 2463 }, { "epoch": 0.7369522954987289, "grad_norm": 2.0783870220184326, "learning_rate": 1.7067633369281422e-06, "loss": 0.8734, "step": 2464 }, { "epoch": 0.7372513832809929, "grad_norm": 2.1798501014709473, "learning_rate": 1.7031191449527162e-06, "loss": 0.8441, "step": 2465 }, { "epoch": 0.7375504710632571, "grad_norm": 2.140900135040283, "learning_rate": 1.699478048806143e-06, "loss": 0.8991, "step": 2466 }, { "epoch": 0.7378495588455212, "grad_norm": 2.2197113037109375, "learning_rate": 1.6958400519074696e-06, "loss": 0.8163, "step": 2467 }, { "epoch": 0.7381486466277852, "grad_norm": 2.250652313232422, "learning_rate": 1.6922051576728415e-06, "loss": 0.8421, "step": 2468 }, { "epoch": 0.7384477344100493, "grad_norm": 2.1507351398468018, "learning_rate": 1.6885733695154855e-06, "loss": 0.8963, "step": 2469 }, { "epoch": 0.7387468221923135, "grad_norm": 2.12145733833313, "learning_rate": 1.6849446908457201e-06, "loss": 0.8697, "step": 2470 }, { "epoch": 0.7390459099745775, "grad_norm": 3.2319490909576416, "learning_rate": 1.6813191250709326e-06, "loss": 0.9206, "step": 2471 }, { "epoch": 0.7393449977568416, "grad_norm": 2.200773239135742, "learning_rate": 1.6776966755955941e-06, "loss": 0.8513, "step": 2472 }, { "epoch": 0.7396440855391058, "grad_norm": 2.234835386276245, "learning_rate": 1.674077345821249e-06, "loss": 0.9338, "step": 2473 }, { "epoch": 0.7399431733213698, "grad_norm": 2.3337056636810303, "learning_rate": 1.6704611391465103e-06, "loss": 0.9007, "step": 2474 }, { "epoch": 0.7402422611036339, "grad_norm": 2.0154037475585938, "learning_rate": 1.6668480589670604e-06, "loss": 0.9081, "step": 2475 }, { "epoch": 0.7405413488858981, "grad_norm": 1.8512455224990845, "learning_rate": 1.6632381086756439e-06, "loss": 0.867, "step": 2476 }, { "epoch": 0.7408404366681621, "grad_norm": 2.2643096446990967, "learning_rate": 1.6596312916620677e-06, "loss": 0.8951, "step": 2477 }, { "epoch": 0.7411395244504262, "grad_norm": 1.9458816051483154, "learning_rate": 1.6560276113131968e-06, "loss": 0.8322, "step": 2478 }, { "epoch": 0.7414386122326903, "grad_norm": 3.6378161907196045, "learning_rate": 1.6524270710129491e-06, "loss": 0.9182, "step": 2479 }, { "epoch": 0.7417377000149544, "grad_norm": 2.2936315536499023, "learning_rate": 1.6488296741422955e-06, "loss": 0.9009, "step": 2480 }, { "epoch": 0.7420367877972185, "grad_norm": 2.272062063217163, "learning_rate": 1.6452354240792561e-06, "loss": 0.8858, "step": 2481 }, { "epoch": 0.7423358755794826, "grad_norm": 2.655031442642212, "learning_rate": 1.64164432419889e-06, "loss": 0.8818, "step": 2482 }, { "epoch": 0.7426349633617467, "grad_norm": 2.1829984188079834, "learning_rate": 1.6380563778733078e-06, "loss": 0.85, "step": 2483 }, { "epoch": 0.7429340511440108, "grad_norm": 2.1928906440734863, "learning_rate": 1.6344715884716517e-06, "loss": 0.8872, "step": 2484 }, { "epoch": 0.7432331389262749, "grad_norm": 2.008911609649658, "learning_rate": 1.630889959360104e-06, "loss": 0.8676, "step": 2485 }, { "epoch": 0.7435322267085389, "grad_norm": 1.8788466453552246, "learning_rate": 1.627311493901872e-06, "loss": 0.8101, "step": 2486 }, { "epoch": 0.7438313144908031, "grad_norm": 2.1914150714874268, "learning_rate": 1.6237361954572023e-06, "loss": 0.869, "step": 2487 }, { "epoch": 0.7441304022730671, "grad_norm": 2.0450246334075928, "learning_rate": 1.6201640673833613e-06, "loss": 0.8408, "step": 2488 }, { "epoch": 0.7444294900553312, "grad_norm": 2.541006088256836, "learning_rate": 1.6165951130346408e-06, "loss": 0.9302, "step": 2489 }, { "epoch": 0.7447285778375954, "grad_norm": 2.477607488632202, "learning_rate": 1.6130293357623473e-06, "loss": 0.9312, "step": 2490 }, { "epoch": 0.7450276656198594, "grad_norm": 2.0380399227142334, "learning_rate": 1.6094667389148128e-06, "loss": 0.8759, "step": 2491 }, { "epoch": 0.7453267534021235, "grad_norm": 2.543776750564575, "learning_rate": 1.605907325837378e-06, "loss": 0.9179, "step": 2492 }, { "epoch": 0.7456258411843876, "grad_norm": 2.4036781787872314, "learning_rate": 1.6023510998723906e-06, "loss": 0.9026, "step": 2493 }, { "epoch": 0.7459249289666517, "grad_norm": 2.118663787841797, "learning_rate": 1.598798064359211e-06, "loss": 0.8631, "step": 2494 }, { "epoch": 0.7462240167489158, "grad_norm": 2.625515937805176, "learning_rate": 1.5952482226342003e-06, "loss": 0.8725, "step": 2495 }, { "epoch": 0.7465231045311799, "grad_norm": 2.2113704681396484, "learning_rate": 1.5917015780307265e-06, "loss": 0.9283, "step": 2496 }, { "epoch": 0.746822192313444, "grad_norm": 1.9584674835205078, "learning_rate": 1.5881581338791462e-06, "loss": 0.8542, "step": 2497 }, { "epoch": 0.7471212800957081, "grad_norm": 2.002887487411499, "learning_rate": 1.5846178935068173e-06, "loss": 0.8376, "step": 2498 }, { "epoch": 0.7474203678779722, "grad_norm": 2.428758382797241, "learning_rate": 1.5810808602380872e-06, "loss": 0.8804, "step": 2499 }, { "epoch": 0.7477194556602362, "grad_norm": 2.171161413192749, "learning_rate": 1.5775470373942926e-06, "loss": 0.8608, "step": 2500 }, { "epoch": 0.7480185434425004, "grad_norm": 2.349574327468872, "learning_rate": 1.5740164282937548e-06, "loss": 0.9126, "step": 2501 }, { "epoch": 0.7483176312247645, "grad_norm": 2.3179171085357666, "learning_rate": 1.5704890362517772e-06, "loss": 0.9417, "step": 2502 }, { "epoch": 0.7486167190070285, "grad_norm": 2.0808465480804443, "learning_rate": 1.5669648645806428e-06, "loss": 0.8029, "step": 2503 }, { "epoch": 0.7489158067892927, "grad_norm": 2.6368112564086914, "learning_rate": 1.5634439165896103e-06, "loss": 0.8446, "step": 2504 }, { "epoch": 0.7492148945715568, "grad_norm": 2.1701889038085938, "learning_rate": 1.5599261955849126e-06, "loss": 0.874, "step": 2505 }, { "epoch": 0.7495139823538208, "grad_norm": 2.1937646865844727, "learning_rate": 1.5564117048697503e-06, "loss": 0.874, "step": 2506 }, { "epoch": 0.7498130701360849, "grad_norm": 2.584517478942871, "learning_rate": 1.5529004477442921e-06, "loss": 0.8316, "step": 2507 }, { "epoch": 0.7501121579183491, "grad_norm": 1.907037615776062, "learning_rate": 1.5493924275056699e-06, "loss": 0.8608, "step": 2508 }, { "epoch": 0.7504112457006131, "grad_norm": 2.210529327392578, "learning_rate": 1.5458876474479757e-06, "loss": 0.8815, "step": 2509 }, { "epoch": 0.7507103334828772, "grad_norm": 2.2447450160980225, "learning_rate": 1.5423861108622601e-06, "loss": 0.9035, "step": 2510 }, { "epoch": 0.7510094212651414, "grad_norm": 2.5012800693511963, "learning_rate": 1.5388878210365283e-06, "loss": 0.8515, "step": 2511 }, { "epoch": 0.7513085090474054, "grad_norm": 2.096802234649658, "learning_rate": 1.5353927812557306e-06, "loss": 0.8352, "step": 2512 }, { "epoch": 0.7516075968296695, "grad_norm": 2.1098127365112305, "learning_rate": 1.5319009948017765e-06, "loss": 0.8741, "step": 2513 }, { "epoch": 0.7519066846119336, "grad_norm": 2.256786584854126, "learning_rate": 1.528412464953512e-06, "loss": 0.8219, "step": 2514 }, { "epoch": 0.7522057723941977, "grad_norm": 2.247316598892212, "learning_rate": 1.5249271949867294e-06, "loss": 0.9365, "step": 2515 }, { "epoch": 0.7525048601764618, "grad_norm": 2.795905113220215, "learning_rate": 1.5214451881741544e-06, "loss": 0.9707, "step": 2516 }, { "epoch": 0.7528039479587258, "grad_norm": 2.0600247383117676, "learning_rate": 1.5179664477854556e-06, "loss": 0.9097, "step": 2517 }, { "epoch": 0.75310303574099, "grad_norm": 2.1702637672424316, "learning_rate": 1.5144909770872324e-06, "loss": 0.8701, "step": 2518 }, { "epoch": 0.7534021235232541, "grad_norm": 1.98870849609375, "learning_rate": 1.5110187793430086e-06, "loss": 0.837, "step": 2519 }, { "epoch": 0.7537012113055181, "grad_norm": 2.0421955585479736, "learning_rate": 1.5075498578132398e-06, "loss": 0.8633, "step": 2520 }, { "epoch": 0.7540002990877822, "grad_norm": 2.0912203788757324, "learning_rate": 1.504084215755306e-06, "loss": 0.851, "step": 2521 }, { "epoch": 0.7542993868700464, "grad_norm": 2.779526710510254, "learning_rate": 1.5006218564235058e-06, "loss": 0.952, "step": 2522 }, { "epoch": 0.7545984746523104, "grad_norm": 2.1173150539398193, "learning_rate": 1.4971627830690533e-06, "loss": 0.8855, "step": 2523 }, { "epoch": 0.7548975624345745, "grad_norm": 2.5345187187194824, "learning_rate": 1.4937069989400782e-06, "loss": 0.9279, "step": 2524 }, { "epoch": 0.7551966502168387, "grad_norm": 2.140146255493164, "learning_rate": 1.4902545072816266e-06, "loss": 0.7961, "step": 2525 }, { "epoch": 0.7554957379991027, "grad_norm": 2.370677947998047, "learning_rate": 1.4868053113356446e-06, "loss": 0.8856, "step": 2526 }, { "epoch": 0.7557948257813668, "grad_norm": 2.2060608863830566, "learning_rate": 1.483359414340989e-06, "loss": 0.8945, "step": 2527 }, { "epoch": 0.7560939135636309, "grad_norm": 2.1616148948669434, "learning_rate": 1.4799168195334174e-06, "loss": 0.9352, "step": 2528 }, { "epoch": 0.756393001345895, "grad_norm": 1.9926570653915405, "learning_rate": 1.4764775301455859e-06, "loss": 0.8484, "step": 2529 }, { "epoch": 0.7566920891281591, "grad_norm": 2.1725032329559326, "learning_rate": 1.4730415494070482e-06, "loss": 0.8727, "step": 2530 }, { "epoch": 0.7569911769104232, "grad_norm": 2.206610918045044, "learning_rate": 1.4696088805442505e-06, "loss": 0.815, "step": 2531 }, { "epoch": 0.7572902646926873, "grad_norm": 2.2442948818206787, "learning_rate": 1.466179526780529e-06, "loss": 0.9054, "step": 2532 }, { "epoch": 0.7575893524749514, "grad_norm": 2.195108652114868, "learning_rate": 1.4627534913361064e-06, "loss": 0.852, "step": 2533 }, { "epoch": 0.7578884402572155, "grad_norm": 2.2842259407043457, "learning_rate": 1.4593307774280895e-06, "loss": 0.8574, "step": 2534 }, { "epoch": 0.7581875280394796, "grad_norm": 2.026488780975342, "learning_rate": 1.4559113882704683e-06, "loss": 0.9131, "step": 2535 }, { "epoch": 0.7584866158217437, "grad_norm": 2.2049691677093506, "learning_rate": 1.4524953270741077e-06, "loss": 0.9024, "step": 2536 }, { "epoch": 0.7587857036040078, "grad_norm": 2.145599603652954, "learning_rate": 1.4490825970467493e-06, "loss": 0.8372, "step": 2537 }, { "epoch": 0.7590847913862718, "grad_norm": 2.2673819065093994, "learning_rate": 1.4456732013930064e-06, "loss": 0.9135, "step": 2538 }, { "epoch": 0.759383879168536, "grad_norm": 3.023399829864502, "learning_rate": 1.442267143314361e-06, "loss": 0.8454, "step": 2539 }, { "epoch": 0.7596829669508001, "grad_norm": 2.198761463165283, "learning_rate": 1.4388644260091617e-06, "loss": 0.8372, "step": 2540 }, { "epoch": 0.7599820547330641, "grad_norm": 2.0331013202667236, "learning_rate": 1.435465052672621e-06, "loss": 0.9055, "step": 2541 }, { "epoch": 0.7602811425153283, "grad_norm": 2.1219210624694824, "learning_rate": 1.432069026496805e-06, "loss": 0.8792, "step": 2542 }, { "epoch": 0.7605802302975924, "grad_norm": 2.1041626930236816, "learning_rate": 1.4286763506706474e-06, "loss": 0.9068, "step": 2543 }, { "epoch": 0.7608793180798564, "grad_norm": 2.100292921066284, "learning_rate": 1.425287028379929e-06, "loss": 0.8943, "step": 2544 }, { "epoch": 0.7611784058621205, "grad_norm": 2.222883462905884, "learning_rate": 1.4219010628072806e-06, "loss": 0.907, "step": 2545 }, { "epoch": 0.7614774936443847, "grad_norm": 2.4814963340759277, "learning_rate": 1.418518457132182e-06, "loss": 0.9156, "step": 2546 }, { "epoch": 0.7617765814266487, "grad_norm": 2.127368450164795, "learning_rate": 1.4151392145309634e-06, "loss": 0.8951, "step": 2547 }, { "epoch": 0.7620756692089128, "grad_norm": 2.1272127628326416, "learning_rate": 1.4117633381767925e-06, "loss": 0.9194, "step": 2548 }, { "epoch": 0.762374756991177, "grad_norm": 2.3973605632781982, "learning_rate": 1.4083908312396727e-06, "loss": 0.8698, "step": 2549 }, { "epoch": 0.762673844773441, "grad_norm": 1.9183708429336548, "learning_rate": 1.4050216968864477e-06, "loss": 0.876, "step": 2550 }, { "epoch": 0.7629729325557051, "grad_norm": 2.0971062183380127, "learning_rate": 1.401655938280798e-06, "loss": 0.8592, "step": 2551 }, { "epoch": 0.7632720203379691, "grad_norm": 2.2127127647399902, "learning_rate": 1.3982935585832253e-06, "loss": 0.8032, "step": 2552 }, { "epoch": 0.7635711081202333, "grad_norm": 2.624629259109497, "learning_rate": 1.3949345609510645e-06, "loss": 0.919, "step": 2553 }, { "epoch": 0.7638701959024974, "grad_norm": 2.133455753326416, "learning_rate": 1.3915789485384718e-06, "loss": 0.8585, "step": 2554 }, { "epoch": 0.7641692836847614, "grad_norm": 2.150991201400757, "learning_rate": 1.3882267244964304e-06, "loss": 0.8795, "step": 2555 }, { "epoch": 0.7644683714670256, "grad_norm": 2.388026237487793, "learning_rate": 1.3848778919727324e-06, "loss": 0.8825, "step": 2556 }, { "epoch": 0.7647674592492897, "grad_norm": 2.1143929958343506, "learning_rate": 1.3815324541119924e-06, "loss": 0.8871, "step": 2557 }, { "epoch": 0.7650665470315537, "grad_norm": 2.211317300796509, "learning_rate": 1.3781904140556352e-06, "loss": 0.8618, "step": 2558 }, { "epoch": 0.7653656348138178, "grad_norm": 2.0483245849609375, "learning_rate": 1.3748517749418944e-06, "loss": 0.8256, "step": 2559 }, { "epoch": 0.765664722596082, "grad_norm": 2.1039726734161377, "learning_rate": 1.3715165399058106e-06, "loss": 0.8817, "step": 2560 }, { "epoch": 0.765963810378346, "grad_norm": 2.352363348007202, "learning_rate": 1.368184712079228e-06, "loss": 0.8786, "step": 2561 }, { "epoch": 0.7662628981606101, "grad_norm": 2.235250473022461, "learning_rate": 1.3648562945907916e-06, "loss": 0.9079, "step": 2562 }, { "epoch": 0.7665619859428743, "grad_norm": 4.489948749542236, "learning_rate": 1.3615312905659434e-06, "loss": 0.9051, "step": 2563 }, { "epoch": 0.7668610737251383, "grad_norm": 2.0706374645233154, "learning_rate": 1.3582097031269208e-06, "loss": 0.8752, "step": 2564 }, { "epoch": 0.7671601615074024, "grad_norm": 2.262972831726074, "learning_rate": 1.3548915353927516e-06, "loss": 0.8761, "step": 2565 }, { "epoch": 0.7674592492896665, "grad_norm": 2.417384386062622, "learning_rate": 1.3515767904792548e-06, "loss": 0.8232, "step": 2566 }, { "epoch": 0.7677583370719306, "grad_norm": 2.887690782546997, "learning_rate": 1.3482654714990323e-06, "loss": 0.9114, "step": 2567 }, { "epoch": 0.7680574248541947, "grad_norm": 2.1481332778930664, "learning_rate": 1.3449575815614719e-06, "loss": 0.8954, "step": 2568 }, { "epoch": 0.7683565126364588, "grad_norm": 2.073227643966675, "learning_rate": 1.3416531237727398e-06, "loss": 0.8495, "step": 2569 }, { "epoch": 0.7686556004187229, "grad_norm": 2.245189905166626, "learning_rate": 1.338352101235781e-06, "loss": 0.9276, "step": 2570 }, { "epoch": 0.768954688200987, "grad_norm": 2.4253644943237305, "learning_rate": 1.3350545170503087e-06, "loss": 0.8729, "step": 2571 }, { "epoch": 0.7692537759832511, "grad_norm": 1.930167555809021, "learning_rate": 1.3317603743128177e-06, "loss": 0.8215, "step": 2572 }, { "epoch": 0.7695528637655151, "grad_norm": 2.059185743331909, "learning_rate": 1.3284696761165634e-06, "loss": 0.8867, "step": 2573 }, { "epoch": 0.7698519515477793, "grad_norm": 1.9882283210754395, "learning_rate": 1.3251824255515704e-06, "loss": 0.8305, "step": 2574 }, { "epoch": 0.7701510393300434, "grad_norm": 2.3725180625915527, "learning_rate": 1.3218986257046217e-06, "loss": 0.9261, "step": 2575 }, { "epoch": 0.7704501271123074, "grad_norm": 1.931646466255188, "learning_rate": 1.3186182796592634e-06, "loss": 0.8986, "step": 2576 }, { "epoch": 0.7707492148945716, "grad_norm": 2.237041711807251, "learning_rate": 1.3153413904958024e-06, "loss": 0.8333, "step": 2577 }, { "epoch": 0.7710483026768357, "grad_norm": 1.9796706438064575, "learning_rate": 1.3120679612912896e-06, "loss": 0.8491, "step": 2578 }, { "epoch": 0.7713473904590997, "grad_norm": 2.0857818126678467, "learning_rate": 1.308797995119534e-06, "loss": 0.8162, "step": 2579 }, { "epoch": 0.7716464782413638, "grad_norm": 1.942095160484314, "learning_rate": 1.30553149505109e-06, "loss": 0.8511, "step": 2580 }, { "epoch": 0.771945566023628, "grad_norm": 2.355480909347534, "learning_rate": 1.302268464153263e-06, "loss": 0.8347, "step": 2581 }, { "epoch": 0.772244653805892, "grad_norm": 2.017162799835205, "learning_rate": 1.2990089054900918e-06, "loss": 0.8894, "step": 2582 }, { "epoch": 0.7725437415881561, "grad_norm": 2.447188377380371, "learning_rate": 1.2957528221223591e-06, "loss": 0.8625, "step": 2583 }, { "epoch": 0.7728428293704203, "grad_norm": 2.300422191619873, "learning_rate": 1.2925002171075846e-06, "loss": 0.9022, "step": 2584 }, { "epoch": 0.7731419171526843, "grad_norm": 2.1977531909942627, "learning_rate": 1.2892510935000252e-06, "loss": 0.8547, "step": 2585 }, { "epoch": 0.7734410049349484, "grad_norm": 2.483591079711914, "learning_rate": 1.2860054543506595e-06, "loss": 0.8645, "step": 2586 }, { "epoch": 0.7737400927172124, "grad_norm": 2.201073408126831, "learning_rate": 1.2827633027072017e-06, "loss": 0.8857, "step": 2587 }, { "epoch": 0.7740391804994766, "grad_norm": 2.1661407947540283, "learning_rate": 1.2795246416140895e-06, "loss": 0.8844, "step": 2588 }, { "epoch": 0.7743382682817407, "grad_norm": 2.5535428524017334, "learning_rate": 1.2762894741124814e-06, "loss": 0.8766, "step": 2589 }, { "epoch": 0.7746373560640047, "grad_norm": 2.1524014472961426, "learning_rate": 1.273057803240257e-06, "loss": 0.8872, "step": 2590 }, { "epoch": 0.7749364438462689, "grad_norm": 2.147458791732788, "learning_rate": 1.2698296320320113e-06, "loss": 0.8709, "step": 2591 }, { "epoch": 0.775235531628533, "grad_norm": 2.020853281021118, "learning_rate": 1.2666049635190535e-06, "loss": 0.8348, "step": 2592 }, { "epoch": 0.775534619410797, "grad_norm": 2.049917221069336, "learning_rate": 1.2633838007294048e-06, "loss": 0.8641, "step": 2593 }, { "epoch": 0.7758337071930612, "grad_norm": 2.315091133117676, "learning_rate": 1.260166146687793e-06, "loss": 0.8773, "step": 2594 }, { "epoch": 0.7761327949753253, "grad_norm": 2.323155641555786, "learning_rate": 1.2569520044156509e-06, "loss": 0.8965, "step": 2595 }, { "epoch": 0.7764318827575893, "grad_norm": 2.325894832611084, "learning_rate": 1.2537413769311163e-06, "loss": 0.9338, "step": 2596 }, { "epoch": 0.7767309705398534, "grad_norm": 2.932382106781006, "learning_rate": 1.25053426724902e-06, "loss": 0.8512, "step": 2597 }, { "epoch": 0.7770300583221176, "grad_norm": 2.556591272354126, "learning_rate": 1.247330678380899e-06, "loss": 0.8634, "step": 2598 }, { "epoch": 0.7773291461043816, "grad_norm": 2.2658884525299072, "learning_rate": 1.2441306133349785e-06, "loss": 0.8579, "step": 2599 }, { "epoch": 0.7776282338866457, "grad_norm": 2.2724556922912598, "learning_rate": 1.2409340751161753e-06, "loss": 0.8756, "step": 2600 }, { "epoch": 0.7779273216689099, "grad_norm": 2.667576789855957, "learning_rate": 1.2377410667260914e-06, "loss": 0.8762, "step": 2601 }, { "epoch": 0.7782264094511739, "grad_norm": 2.144519567489624, "learning_rate": 1.2345515911630223e-06, "loss": 0.925, "step": 2602 }, { "epoch": 0.778525497233438, "grad_norm": 2.0485308170318604, "learning_rate": 1.2313656514219408e-06, "loss": 0.8938, "step": 2603 }, { "epoch": 0.7788245850157021, "grad_norm": 2.426257371902466, "learning_rate": 1.2281832504944967e-06, "loss": 0.8807, "step": 2604 }, { "epoch": 0.7791236727979662, "grad_norm": 2.3869338035583496, "learning_rate": 1.2250043913690235e-06, "loss": 0.9317, "step": 2605 }, { "epoch": 0.7794227605802303, "grad_norm": 2.2160322666168213, "learning_rate": 1.2218290770305218e-06, "loss": 0.9239, "step": 2606 }, { "epoch": 0.7797218483624944, "grad_norm": 2.298067569732666, "learning_rate": 1.2186573104606735e-06, "loss": 0.9021, "step": 2607 }, { "epoch": 0.7800209361447585, "grad_norm": 1.941989779472351, "learning_rate": 1.2154890946378178e-06, "loss": 0.8917, "step": 2608 }, { "epoch": 0.7803200239270226, "grad_norm": 2.1306607723236084, "learning_rate": 1.2123244325369665e-06, "loss": 0.9489, "step": 2609 }, { "epoch": 0.7806191117092867, "grad_norm": 2.450730323791504, "learning_rate": 1.2091633271297916e-06, "loss": 0.8966, "step": 2610 }, { "epoch": 0.7809181994915507, "grad_norm": 2.227607011795044, "learning_rate": 1.20600578138463e-06, "loss": 0.964, "step": 2611 }, { "epoch": 0.7812172872738149, "grad_norm": 2.3694071769714355, "learning_rate": 1.2028517982664683e-06, "loss": 0.8921, "step": 2612 }, { "epoch": 0.781516375056079, "grad_norm": 2.409167528152466, "learning_rate": 1.1997013807369535e-06, "loss": 0.8245, "step": 2613 }, { "epoch": 0.781815462838343, "grad_norm": 2.1788597106933594, "learning_rate": 1.196554531754383e-06, "loss": 0.8802, "step": 2614 }, { "epoch": 0.7821145506206072, "grad_norm": 2.177119731903076, "learning_rate": 1.193411254273703e-06, "loss": 0.8077, "step": 2615 }, { "epoch": 0.7824136384028713, "grad_norm": 2.1945149898529053, "learning_rate": 1.1902715512465057e-06, "loss": 0.8969, "step": 2616 }, { "epoch": 0.7827127261851353, "grad_norm": 2.3304121494293213, "learning_rate": 1.1871354256210277e-06, "loss": 0.9133, "step": 2617 }, { "epoch": 0.7830118139673994, "grad_norm": 1.9412496089935303, "learning_rate": 1.1840028803421455e-06, "loss": 0.9354, "step": 2618 }, { "epoch": 0.7833109017496636, "grad_norm": 2.061119318008423, "learning_rate": 1.1808739183513745e-06, "loss": 0.8861, "step": 2619 }, { "epoch": 0.7836099895319276, "grad_norm": 2.4102911949157715, "learning_rate": 1.1777485425868639e-06, "loss": 0.8804, "step": 2620 }, { "epoch": 0.7839090773141917, "grad_norm": 2.1446495056152344, "learning_rate": 1.1746267559833973e-06, "loss": 0.8046, "step": 2621 }, { "epoch": 0.7842081650964559, "grad_norm": 2.227285385131836, "learning_rate": 1.1715085614723881e-06, "loss": 0.8308, "step": 2622 }, { "epoch": 0.7845072528787199, "grad_norm": 2.350365400314331, "learning_rate": 1.1683939619818708e-06, "loss": 0.8851, "step": 2623 }, { "epoch": 0.784806340660984, "grad_norm": 2.1954972743988037, "learning_rate": 1.1652829604365135e-06, "loss": 0.8318, "step": 2624 }, { "epoch": 0.785105428443248, "grad_norm": 2.406616449356079, "learning_rate": 1.1621755597575996e-06, "loss": 0.8969, "step": 2625 }, { "epoch": 0.7854045162255122, "grad_norm": 2.3221819400787354, "learning_rate": 1.1590717628630337e-06, "loss": 0.8756, "step": 2626 }, { "epoch": 0.7857036040077763, "grad_norm": 2.653132200241089, "learning_rate": 1.155971572667332e-06, "loss": 0.871, "step": 2627 }, { "epoch": 0.7860026917900403, "grad_norm": 2.0156147480010986, "learning_rate": 1.1528749920816319e-06, "loss": 0.8092, "step": 2628 }, { "epoch": 0.7863017795723045, "grad_norm": 2.1336748600006104, "learning_rate": 1.1497820240136753e-06, "loss": 0.8784, "step": 2629 }, { "epoch": 0.7866008673545686, "grad_norm": 2.165308952331543, "learning_rate": 1.1466926713678117e-06, "loss": 0.8952, "step": 2630 }, { "epoch": 0.7868999551368326, "grad_norm": 2.1592838764190674, "learning_rate": 1.143606937044997e-06, "loss": 0.9448, "step": 2631 }, { "epoch": 0.7871990429190967, "grad_norm": 2.1174044609069824, "learning_rate": 1.140524823942793e-06, "loss": 0.8685, "step": 2632 }, { "epoch": 0.7874981307013609, "grad_norm": 2.2701683044433594, "learning_rate": 1.137446334955357e-06, "loss": 0.9053, "step": 2633 }, { "epoch": 0.7877972184836249, "grad_norm": 2.148639678955078, "learning_rate": 1.1343714729734424e-06, "loss": 0.8767, "step": 2634 }, { "epoch": 0.788096306265889, "grad_norm": 3.3655192852020264, "learning_rate": 1.1313002408843986e-06, "loss": 0.894, "step": 2635 }, { "epoch": 0.7883953940481532, "grad_norm": 2.363206148147583, "learning_rate": 1.1282326415721657e-06, "loss": 0.9146, "step": 2636 }, { "epoch": 0.7886944818304172, "grad_norm": 2.1575839519500732, "learning_rate": 1.1251686779172772e-06, "loss": 0.825, "step": 2637 }, { "epoch": 0.7889935696126813, "grad_norm": 2.1943087577819824, "learning_rate": 1.122108352796844e-06, "loss": 0.8366, "step": 2638 }, { "epoch": 0.7892926573949454, "grad_norm": 2.7709505558013916, "learning_rate": 1.119051669084567e-06, "loss": 0.8814, "step": 2639 }, { "epoch": 0.7895917451772095, "grad_norm": 2.139381170272827, "learning_rate": 1.1159986296507259e-06, "loss": 0.8821, "step": 2640 }, { "epoch": 0.7898908329594736, "grad_norm": 2.560561418533325, "learning_rate": 1.112949237362177e-06, "loss": 0.8492, "step": 2641 }, { "epoch": 0.7901899207417377, "grad_norm": 2.324193000793457, "learning_rate": 1.1099034950823539e-06, "loss": 0.8877, "step": 2642 }, { "epoch": 0.7904890085240018, "grad_norm": 3.1617431640625, "learning_rate": 1.1068614056712624e-06, "loss": 0.9428, "step": 2643 }, { "epoch": 0.7907880963062659, "grad_norm": 2.341543674468994, "learning_rate": 1.103822971985477e-06, "loss": 0.8431, "step": 2644 }, { "epoch": 0.79108718408853, "grad_norm": 2.1462252140045166, "learning_rate": 1.1007881968781403e-06, "loss": 0.8626, "step": 2645 }, { "epoch": 0.7913862718707941, "grad_norm": 2.212362766265869, "learning_rate": 1.0977570831989593e-06, "loss": 0.9157, "step": 2646 }, { "epoch": 0.7916853596530582, "grad_norm": 2.108914375305176, "learning_rate": 1.0947296337942026e-06, "loss": 0.8421, "step": 2647 }, { "epoch": 0.7919844474353223, "grad_norm": 2.404512405395508, "learning_rate": 1.091705851506698e-06, "loss": 0.8562, "step": 2648 }, { "epoch": 0.7922835352175863, "grad_norm": 1.971222162246704, "learning_rate": 1.088685739175831e-06, "loss": 0.8428, "step": 2649 }, { "epoch": 0.7925826229998505, "grad_norm": 2.078317642211914, "learning_rate": 1.085669299637539e-06, "loss": 0.9152, "step": 2650 }, { "epoch": 0.7928817107821146, "grad_norm": 1.9506429433822632, "learning_rate": 1.0826565357243125e-06, "loss": 0.8391, "step": 2651 }, { "epoch": 0.7931807985643786, "grad_norm": 2.5235705375671387, "learning_rate": 1.0796474502651893e-06, "loss": 0.8763, "step": 2652 }, { "epoch": 0.7934798863466428, "grad_norm": 1.9295462369918823, "learning_rate": 1.0766420460857507e-06, "loss": 0.8457, "step": 2653 }, { "epoch": 0.7937789741289069, "grad_norm": 2.1573355197906494, "learning_rate": 1.0736403260081279e-06, "loss": 0.919, "step": 2654 }, { "epoch": 0.7940780619111709, "grad_norm": 2.134714126586914, "learning_rate": 1.070642292850987e-06, "loss": 0.8476, "step": 2655 }, { "epoch": 0.794377149693435, "grad_norm": 2.2278120517730713, "learning_rate": 1.067647949429534e-06, "loss": 0.86, "step": 2656 }, { "epoch": 0.7946762374756992, "grad_norm": 2.285137891769409, "learning_rate": 1.0646572985555071e-06, "loss": 0.8658, "step": 2657 }, { "epoch": 0.7949753252579632, "grad_norm": 2.0630249977111816, "learning_rate": 1.0616703430371833e-06, "loss": 0.851, "step": 2658 }, { "epoch": 0.7952744130402273, "grad_norm": 2.0970423221588135, "learning_rate": 1.0586870856793657e-06, "loss": 0.8427, "step": 2659 }, { "epoch": 0.7955735008224915, "grad_norm": 2.070500612258911, "learning_rate": 1.0557075292833836e-06, "loss": 0.9308, "step": 2660 }, { "epoch": 0.7958725886047555, "grad_norm": 2.071791648864746, "learning_rate": 1.052731676647092e-06, "loss": 0.925, "step": 2661 }, { "epoch": 0.7961716763870196, "grad_norm": 2.0589165687561035, "learning_rate": 1.049759530564871e-06, "loss": 0.938, "step": 2662 }, { "epoch": 0.7964707641692836, "grad_norm": 2.338690757751465, "learning_rate": 1.0467910938276182e-06, "loss": 0.8973, "step": 2663 }, { "epoch": 0.7967698519515478, "grad_norm": 2.236173152923584, "learning_rate": 1.0438263692227452e-06, "loss": 0.9023, "step": 2664 }, { "epoch": 0.7970689397338119, "grad_norm": 2.655717611312866, "learning_rate": 1.0408653595341812e-06, "loss": 0.8179, "step": 2665 }, { "epoch": 0.7973680275160759, "grad_norm": 2.1323041915893555, "learning_rate": 1.0379080675423664e-06, "loss": 0.8668, "step": 2666 }, { "epoch": 0.7976671152983401, "grad_norm": 2.3019919395446777, "learning_rate": 1.0349544960242496e-06, "loss": 0.9381, "step": 2667 }, { "epoch": 0.7979662030806042, "grad_norm": 2.4085683822631836, "learning_rate": 1.0320046477532864e-06, "loss": 0.8827, "step": 2668 }, { "epoch": 0.7982652908628682, "grad_norm": 3.0013515949249268, "learning_rate": 1.0290585254994356e-06, "loss": 0.8578, "step": 2669 }, { "epoch": 0.7985643786451323, "grad_norm": 2.7492516040802, "learning_rate": 1.0261161320291586e-06, "loss": 0.8395, "step": 2670 }, { "epoch": 0.7988634664273965, "grad_norm": 2.261178970336914, "learning_rate": 1.0231774701054126e-06, "loss": 0.8738, "step": 2671 }, { "epoch": 0.7991625542096605, "grad_norm": 2.0806801319122314, "learning_rate": 1.020242542487654e-06, "loss": 0.8427, "step": 2672 }, { "epoch": 0.7994616419919246, "grad_norm": 2.9425690174102783, "learning_rate": 1.017311351931831e-06, "loss": 0.859, "step": 2673 }, { "epoch": 0.7997607297741888, "grad_norm": 2.347458839416504, "learning_rate": 1.0143839011903822e-06, "loss": 0.8946, "step": 2674 }, { "epoch": 0.8000598175564528, "grad_norm": 2.27180552482605, "learning_rate": 1.0114601930122363e-06, "loss": 0.8873, "step": 2675 }, { "epoch": 0.8003589053387169, "grad_norm": 2.3010804653167725, "learning_rate": 1.0085402301428055e-06, "loss": 0.9657, "step": 2676 }, { "epoch": 0.800657993120981, "grad_norm": 2.4730207920074463, "learning_rate": 1.005624015323986e-06, "loss": 0.9074, "step": 2677 }, { "epoch": 0.8009570809032451, "grad_norm": 2.8061442375183105, "learning_rate": 1.0027115512941549e-06, "loss": 0.8725, "step": 2678 }, { "epoch": 0.8012561686855092, "grad_norm": 2.0415520668029785, "learning_rate": 9.998028407881672e-07, "loss": 0.8699, "step": 2679 }, { "epoch": 0.8015552564677733, "grad_norm": 1.9404364824295044, "learning_rate": 9.96897886537353e-07, "loss": 0.8893, "step": 2680 }, { "epoch": 0.8018543442500374, "grad_norm": 1.9787859916687012, "learning_rate": 9.939966912695143e-07, "loss": 0.8595, "step": 2681 }, { "epoch": 0.8021534320323015, "grad_norm": 2.109189987182617, "learning_rate": 9.910992577089269e-07, "loss": 0.882, "step": 2682 }, { "epoch": 0.8024525198145656, "grad_norm": 2.1873385906219482, "learning_rate": 9.882055885763264e-07, "loss": 0.8949, "step": 2683 }, { "epoch": 0.8027516075968296, "grad_norm": 2.4482386112213135, "learning_rate": 9.853156865889234e-07, "loss": 0.8785, "step": 2684 }, { "epoch": 0.8030506953790938, "grad_norm": 2.6369004249572754, "learning_rate": 9.824295544603863e-07, "loss": 0.8679, "step": 2685 }, { "epoch": 0.8033497831613579, "grad_norm": 2.052980422973633, "learning_rate": 9.795471949008411e-07, "loss": 0.9309, "step": 2686 }, { "epoch": 0.8036488709436219, "grad_norm": 2.225369453430176, "learning_rate": 9.766686106168744e-07, "loss": 0.9386, "step": 2687 }, { "epoch": 0.8039479587258861, "grad_norm": 2.252957820892334, "learning_rate": 9.73793804311529e-07, "loss": 0.9286, "step": 2688 }, { "epoch": 0.8042470465081502, "grad_norm": 2.0903658866882324, "learning_rate": 9.70922778684299e-07, "loss": 0.8778, "step": 2689 }, { "epoch": 0.8045461342904142, "grad_norm": 2.1804277896881104, "learning_rate": 9.680555364311251e-07, "loss": 0.9015, "step": 2690 }, { "epoch": 0.8048452220726783, "grad_norm": 2.710968017578125, "learning_rate": 9.651920802443971e-07, "loss": 0.8984, "step": 2691 }, { "epoch": 0.8051443098549425, "grad_norm": 2.252894878387451, "learning_rate": 9.623324128129557e-07, "loss": 0.8578, "step": 2692 }, { "epoch": 0.8054433976372065, "grad_norm": 2.4322805404663086, "learning_rate": 9.594765368220737e-07, "loss": 0.8803, "step": 2693 }, { "epoch": 0.8057424854194706, "grad_norm": 2.470078706741333, "learning_rate": 9.56624454953471e-07, "loss": 0.8901, "step": 2694 }, { "epoch": 0.8060415732017348, "grad_norm": 2.0892691612243652, "learning_rate": 9.537761698853016e-07, "loss": 0.9125, "step": 2695 }, { "epoch": 0.8063406609839988, "grad_norm": 2.031531572341919, "learning_rate": 9.509316842921551e-07, "loss": 0.8303, "step": 2696 }, { "epoch": 0.8066397487662629, "grad_norm": 2.1519904136657715, "learning_rate": 9.480910008450534e-07, "loss": 0.9098, "step": 2697 }, { "epoch": 0.8069388365485269, "grad_norm": 2.9573562145233154, "learning_rate": 9.452541222114481e-07, "loss": 0.8429, "step": 2698 }, { "epoch": 0.8072379243307911, "grad_norm": 2.2425944805145264, "learning_rate": 9.424210510552179e-07, "loss": 0.9453, "step": 2699 }, { "epoch": 0.8075370121130552, "grad_norm": 2.3245341777801514, "learning_rate": 9.395917900366663e-07, "loss": 0.8768, "step": 2700 }, { "epoch": 0.8078360998953192, "grad_norm": 2.2278664112091064, "learning_rate": 9.36766341812519e-07, "loss": 0.8998, "step": 2701 }, { "epoch": 0.8081351876775834, "grad_norm": 2.483635425567627, "learning_rate": 9.33944709035921e-07, "loss": 0.8737, "step": 2702 }, { "epoch": 0.8084342754598475, "grad_norm": 2.3165130615234375, "learning_rate": 9.31126894356435e-07, "loss": 0.8337, "step": 2703 }, { "epoch": 0.8087333632421115, "grad_norm": 1.9392845630645752, "learning_rate": 9.283129004200381e-07, "loss": 0.8312, "step": 2704 }, { "epoch": 0.8090324510243757, "grad_norm": 2.3056113719940186, "learning_rate": 9.255027298691205e-07, "loss": 0.9174, "step": 2705 }, { "epoch": 0.8093315388066398, "grad_norm": 2.300879955291748, "learning_rate": 9.226963853424815e-07, "loss": 0.9223, "step": 2706 }, { "epoch": 0.8096306265889038, "grad_norm": 1.9955374002456665, "learning_rate": 9.198938694753268e-07, "loss": 0.8259, "step": 2707 }, { "epoch": 0.8099297143711679, "grad_norm": 2.1008522510528564, "learning_rate": 9.170951848992693e-07, "loss": 0.8557, "step": 2708 }, { "epoch": 0.8102288021534321, "grad_norm": 2.0037474632263184, "learning_rate": 9.143003342423212e-07, "loss": 0.8496, "step": 2709 }, { "epoch": 0.8105278899356961, "grad_norm": 2.5364463329315186, "learning_rate": 9.115093201288977e-07, "loss": 0.9257, "step": 2710 }, { "epoch": 0.8108269777179602, "grad_norm": 1.8757383823394775, "learning_rate": 9.0872214517981e-07, "loss": 0.8338, "step": 2711 }, { "epoch": 0.8111260655002244, "grad_norm": 2.65364146232605, "learning_rate": 9.059388120122626e-07, "loss": 0.8921, "step": 2712 }, { "epoch": 0.8114251532824884, "grad_norm": 2.0527827739715576, "learning_rate": 9.031593232398539e-07, "loss": 0.862, "step": 2713 }, { "epoch": 0.8117242410647525, "grad_norm": 2.5519402027130127, "learning_rate": 9.003836814725742e-07, "loss": 0.9241, "step": 2714 }, { "epoch": 0.8120233288470166, "grad_norm": 2.1984663009643555, "learning_rate": 8.976118893168006e-07, "loss": 0.8725, "step": 2715 }, { "epoch": 0.8123224166292807, "grad_norm": 2.1712191104888916, "learning_rate": 8.94843949375292e-07, "loss": 0.8799, "step": 2716 }, { "epoch": 0.8126215044115448, "grad_norm": 1.9761104583740234, "learning_rate": 8.920798642471918e-07, "loss": 0.8253, "step": 2717 }, { "epoch": 0.8129205921938089, "grad_norm": 1.9534170627593994, "learning_rate": 8.893196365280282e-07, "loss": 0.8182, "step": 2718 }, { "epoch": 0.813219679976073, "grad_norm": 2.321129322052002, "learning_rate": 8.865632688097004e-07, "loss": 0.85, "step": 2719 }, { "epoch": 0.8135187677583371, "grad_norm": 2.2126221656799316, "learning_rate": 8.83810763680486e-07, "loss": 0.8938, "step": 2720 }, { "epoch": 0.8138178555406012, "grad_norm": 2.107250213623047, "learning_rate": 8.810621237250355e-07, "loss": 0.8961, "step": 2721 }, { "epoch": 0.8141169433228652, "grad_norm": 2.3630170822143555, "learning_rate": 8.783173515243725e-07, "loss": 0.8391, "step": 2722 }, { "epoch": 0.8144160311051294, "grad_norm": 2.5453169345855713, "learning_rate": 8.755764496558838e-07, "loss": 0.9081, "step": 2723 }, { "epoch": 0.8147151188873935, "grad_norm": 2.153806209564209, "learning_rate": 8.728394206933239e-07, "loss": 0.8723, "step": 2724 }, { "epoch": 0.8150142066696575, "grad_norm": 2.3862967491149902, "learning_rate": 8.701062672068122e-07, "loss": 0.8976, "step": 2725 }, { "epoch": 0.8153132944519217, "grad_norm": 2.1461236476898193, "learning_rate": 8.673769917628272e-07, "loss": 0.8376, "step": 2726 }, { "epoch": 0.8156123822341858, "grad_norm": 2.4138875007629395, "learning_rate": 8.646515969242065e-07, "loss": 0.8371, "step": 2727 }, { "epoch": 0.8159114700164498, "grad_norm": 2.0282957553863525, "learning_rate": 8.619300852501427e-07, "loss": 0.9394, "step": 2728 }, { "epoch": 0.8162105577987139, "grad_norm": 2.1673169136047363, "learning_rate": 8.592124592961843e-07, "loss": 0.9405, "step": 2729 }, { "epoch": 0.816509645580978, "grad_norm": 2.489934206008911, "learning_rate": 8.56498721614229e-07, "loss": 0.8471, "step": 2730 }, { "epoch": 0.8168087333632421, "grad_norm": 2.0569229125976562, "learning_rate": 8.537888747525236e-07, "loss": 0.8568, "step": 2731 }, { "epoch": 0.8171078211455062, "grad_norm": 2.1921114921569824, "learning_rate": 8.51082921255662e-07, "loss": 0.9092, "step": 2732 }, { "epoch": 0.8174069089277703, "grad_norm": 2.573490858078003, "learning_rate": 8.483808636645824e-07, "loss": 0.9617, "step": 2733 }, { "epoch": 0.8177059967100344, "grad_norm": 2.2590672969818115, "learning_rate": 8.456827045165638e-07, "loss": 0.8436, "step": 2734 }, { "epoch": 0.8180050844922985, "grad_norm": 2.312244176864624, "learning_rate": 8.429884463452248e-07, "loss": 0.8654, "step": 2735 }, { "epoch": 0.8183041722745625, "grad_norm": 2.600752115249634, "learning_rate": 8.402980916805215e-07, "loss": 0.9099, "step": 2736 }, { "epoch": 0.8186032600568267, "grad_norm": 2.068455457687378, "learning_rate": 8.376116430487441e-07, "loss": 0.8893, "step": 2737 }, { "epoch": 0.8189023478390908, "grad_norm": 2.6557464599609375, "learning_rate": 8.349291029725126e-07, "loss": 0.8743, "step": 2738 }, { "epoch": 0.8192014356213548, "grad_norm": 2.4469785690307617, "learning_rate": 8.322504739707821e-07, "loss": 0.8626, "step": 2739 }, { "epoch": 0.819500523403619, "grad_norm": 2.0078771114349365, "learning_rate": 8.295757585588304e-07, "loss": 0.86, "step": 2740 }, { "epoch": 0.8197996111858831, "grad_norm": 2.0724434852600098, "learning_rate": 8.269049592482648e-07, "loss": 0.9205, "step": 2741 }, { "epoch": 0.8200986989681471, "grad_norm": 2.443103313446045, "learning_rate": 8.242380785470088e-07, "loss": 0.8703, "step": 2742 }, { "epoch": 0.8203977867504112, "grad_norm": 2.173558473587036, "learning_rate": 8.215751189593107e-07, "loss": 0.8338, "step": 2743 }, { "epoch": 0.8206968745326754, "grad_norm": 2.472893714904785, "learning_rate": 8.189160829857396e-07, "loss": 0.8528, "step": 2744 }, { "epoch": 0.8209959623149394, "grad_norm": 2.3536782264709473, "learning_rate": 8.16260973123173e-07, "loss": 0.8687, "step": 2745 }, { "epoch": 0.8212950500972035, "grad_norm": 2.12846040725708, "learning_rate": 8.136097918648073e-07, "loss": 0.9178, "step": 2746 }, { "epoch": 0.8215941378794677, "grad_norm": 2.540383815765381, "learning_rate": 8.109625417001465e-07, "loss": 0.8036, "step": 2747 }, { "epoch": 0.8218932256617317, "grad_norm": 2.408097743988037, "learning_rate": 8.08319225115009e-07, "loss": 0.9237, "step": 2748 }, { "epoch": 0.8221923134439958, "grad_norm": 2.1759941577911377, "learning_rate": 8.056798445915115e-07, "loss": 0.8427, "step": 2749 }, { "epoch": 0.8224914012262599, "grad_norm": 2.3616888523101807, "learning_rate": 8.030444026080791e-07, "loss": 0.8368, "step": 2750 }, { "epoch": 0.822790489008524, "grad_norm": 2.402254581451416, "learning_rate": 8.004129016394374e-07, "loss": 0.9003, "step": 2751 }, { "epoch": 0.8230895767907881, "grad_norm": 2.160900115966797, "learning_rate": 7.977853441566152e-07, "loss": 0.9184, "step": 2752 }, { "epoch": 0.8233886645730522, "grad_norm": 2.3219950199127197, "learning_rate": 7.951617326269318e-07, "loss": 0.8662, "step": 2753 }, { "epoch": 0.8236877523553163, "grad_norm": 2.030374765396118, "learning_rate": 7.925420695140052e-07, "loss": 0.8708, "step": 2754 }, { "epoch": 0.8239868401375804, "grad_norm": 2.2049622535705566, "learning_rate": 7.899263572777454e-07, "loss": 0.8399, "step": 2755 }, { "epoch": 0.8242859279198445, "grad_norm": 2.437258005142212, "learning_rate": 7.873145983743513e-07, "loss": 0.8999, "step": 2756 }, { "epoch": 0.8245850157021085, "grad_norm": 3.1788647174835205, "learning_rate": 7.847067952563103e-07, "loss": 0.8381, "step": 2757 }, { "epoch": 0.8248841034843727, "grad_norm": 2.246718168258667, "learning_rate": 7.821029503723959e-07, "loss": 0.8419, "step": 2758 }, { "epoch": 0.8251831912666368, "grad_norm": 2.6384546756744385, "learning_rate": 7.795030661676633e-07, "loss": 0.9214, "step": 2759 }, { "epoch": 0.8254822790489008, "grad_norm": 2.426116466522217, "learning_rate": 7.769071450834498e-07, "loss": 0.8798, "step": 2760 }, { "epoch": 0.825781366831165, "grad_norm": 2.282867908477783, "learning_rate": 7.743151895573703e-07, "loss": 0.8838, "step": 2761 }, { "epoch": 0.826080454613429, "grad_norm": 2.318795919418335, "learning_rate": 7.717272020233169e-07, "loss": 0.9268, "step": 2762 }, { "epoch": 0.8263795423956931, "grad_norm": 2.4017651081085205, "learning_rate": 7.691431849114561e-07, "loss": 0.8742, "step": 2763 }, { "epoch": 0.8266786301779573, "grad_norm": 2.2882306575775146, "learning_rate": 7.665631406482216e-07, "loss": 0.8274, "step": 2764 }, { "epoch": 0.8269777179602213, "grad_norm": 2.3405394554138184, "learning_rate": 7.639870716563236e-07, "loss": 0.8975, "step": 2765 }, { "epoch": 0.8272768057424854, "grad_norm": 2.1887495517730713, "learning_rate": 7.614149803547354e-07, "loss": 0.9463, "step": 2766 }, { "epoch": 0.8275758935247495, "grad_norm": 2.217355966567993, "learning_rate": 7.588468691586964e-07, "loss": 0.8758, "step": 2767 }, { "epoch": 0.8278749813070136, "grad_norm": 2.5191285610198975, "learning_rate": 7.562827404797046e-07, "loss": 0.8363, "step": 2768 }, { "epoch": 0.8281740690892777, "grad_norm": 1.9491705894470215, "learning_rate": 7.537225967255252e-07, "loss": 0.9573, "step": 2769 }, { "epoch": 0.8284731568715418, "grad_norm": 2.400925397872925, "learning_rate": 7.511664403001778e-07, "loss": 0.8495, "step": 2770 }, { "epoch": 0.828772244653806, "grad_norm": 2.0974984169006348, "learning_rate": 7.486142736039364e-07, "loss": 0.865, "step": 2771 }, { "epoch": 0.82907133243607, "grad_norm": 2.328169584274292, "learning_rate": 7.460660990333307e-07, "loss": 0.8922, "step": 2772 }, { "epoch": 0.8293704202183341, "grad_norm": 2.1485674381256104, "learning_rate": 7.435219189811404e-07, "loss": 0.9449, "step": 2773 }, { "epoch": 0.8296695080005981, "grad_norm": 3.1017277240753174, "learning_rate": 7.409817358363986e-07, "loss": 0.8657, "step": 2774 }, { "epoch": 0.8299685957828623, "grad_norm": 2.140037775039673, "learning_rate": 7.38445551984378e-07, "loss": 0.8423, "step": 2775 }, { "epoch": 0.8302676835651264, "grad_norm": 2.7003860473632812, "learning_rate": 7.359133698066012e-07, "loss": 0.8528, "step": 2776 }, { "epoch": 0.8305667713473904, "grad_norm": 2.204752206802368, "learning_rate": 7.333851916808298e-07, "loss": 0.8729, "step": 2777 }, { "epoch": 0.8308658591296546, "grad_norm": 2.2413570880889893, "learning_rate": 7.308610199810717e-07, "loss": 0.86, "step": 2778 }, { "epoch": 0.8311649469119187, "grad_norm": 2.08685302734375, "learning_rate": 7.28340857077564e-07, "loss": 0.9243, "step": 2779 }, { "epoch": 0.8314640346941827, "grad_norm": 2.24499249458313, "learning_rate": 7.258247053367856e-07, "loss": 0.8581, "step": 2780 }, { "epoch": 0.8317631224764468, "grad_norm": 2.4123265743255615, "learning_rate": 7.233125671214469e-07, "loss": 0.8727, "step": 2781 }, { "epoch": 0.832062210258711, "grad_norm": 2.2246782779693604, "learning_rate": 7.208044447904893e-07, "loss": 0.937, "step": 2782 }, { "epoch": 0.832361298040975, "grad_norm": 2.2538633346557617, "learning_rate": 7.183003406990841e-07, "loss": 0.8664, "step": 2783 }, { "epoch": 0.8326603858232391, "grad_norm": 2.5097694396972656, "learning_rate": 7.158002571986283e-07, "loss": 0.8824, "step": 2784 }, { "epoch": 0.8329594736055033, "grad_norm": 2.4429426193237305, "learning_rate": 7.133041966367443e-07, "loss": 0.8897, "step": 2785 }, { "epoch": 0.8332585613877673, "grad_norm": 2.2659294605255127, "learning_rate": 7.108121613572771e-07, "loss": 0.8803, "step": 2786 }, { "epoch": 0.8335576491700314, "grad_norm": 2.3177225589752197, "learning_rate": 7.083241537002905e-07, "loss": 0.8855, "step": 2787 }, { "epoch": 0.8338567369522955, "grad_norm": 2.419252395629883, "learning_rate": 7.058401760020689e-07, "loss": 0.9384, "step": 2788 }, { "epoch": 0.8341558247345596, "grad_norm": 2.0986757278442383, "learning_rate": 7.033602305951104e-07, "loss": 0.934, "step": 2789 }, { "epoch": 0.8344549125168237, "grad_norm": 2.067105770111084, "learning_rate": 7.008843198081239e-07, "loss": 0.8803, "step": 2790 }, { "epoch": 0.8347540002990878, "grad_norm": 2.180145502090454, "learning_rate": 6.984124459660374e-07, "loss": 0.8715, "step": 2791 }, { "epoch": 0.8350530880813519, "grad_norm": 2.6787703037261963, "learning_rate": 6.95944611389982e-07, "loss": 0.8408, "step": 2792 }, { "epoch": 0.835352175863616, "grad_norm": 2.226616621017456, "learning_rate": 6.934808183972986e-07, "loss": 0.9028, "step": 2793 }, { "epoch": 0.83565126364588, "grad_norm": 2.2424962520599365, "learning_rate": 6.910210693015285e-07, "loss": 0.8924, "step": 2794 }, { "epoch": 0.8359503514281441, "grad_norm": 2.3864219188690186, "learning_rate": 6.885653664124226e-07, "loss": 0.9297, "step": 2795 }, { "epoch": 0.8362494392104083, "grad_norm": 2.365290880203247, "learning_rate": 6.861137120359296e-07, "loss": 0.8565, "step": 2796 }, { "epoch": 0.8365485269926723, "grad_norm": 2.2204227447509766, "learning_rate": 6.836661084741924e-07, "loss": 0.8225, "step": 2797 }, { "epoch": 0.8368476147749364, "grad_norm": 2.1759049892425537, "learning_rate": 6.812225580255549e-07, "loss": 0.7956, "step": 2798 }, { "epoch": 0.8371467025572006, "grad_norm": 2.1433522701263428, "learning_rate": 6.787830629845549e-07, "loss": 0.8949, "step": 2799 }, { "epoch": 0.8374457903394646, "grad_norm": 2.112456798553467, "learning_rate": 6.763476256419215e-07, "loss": 0.8713, "step": 2800 }, { "epoch": 0.8377448781217287, "grad_norm": 2.598015785217285, "learning_rate": 6.739162482845707e-07, "loss": 0.9244, "step": 2801 }, { "epoch": 0.8380439659039928, "grad_norm": 2.1184399127960205, "learning_rate": 6.714889331956087e-07, "loss": 0.8258, "step": 2802 }, { "epoch": 0.838343053686257, "grad_norm": 2.3327112197875977, "learning_rate": 6.690656826543285e-07, "loss": 0.9136, "step": 2803 }, { "epoch": 0.838642141468521, "grad_norm": 2.110128879547119, "learning_rate": 6.666464989362054e-07, "loss": 0.7871, "step": 2804 }, { "epoch": 0.8389412292507851, "grad_norm": 4.017728805541992, "learning_rate": 6.642313843128922e-07, "loss": 0.904, "step": 2805 }, { "epoch": 0.8392403170330492, "grad_norm": 2.6760265827178955, "learning_rate": 6.618203410522262e-07, "loss": 0.8869, "step": 2806 }, { "epoch": 0.8395394048153133, "grad_norm": 2.142077684402466, "learning_rate": 6.594133714182178e-07, "loss": 0.8507, "step": 2807 }, { "epoch": 0.8398384925975774, "grad_norm": 2.6661417484283447, "learning_rate": 6.570104776710551e-07, "loss": 0.845, "step": 2808 }, { "epoch": 0.8401375803798414, "grad_norm": 2.440425395965576, "learning_rate": 6.546116620670961e-07, "loss": 0.8556, "step": 2809 }, { "epoch": 0.8404366681621056, "grad_norm": 2.29923677444458, "learning_rate": 6.522169268588713e-07, "loss": 0.9096, "step": 2810 }, { "epoch": 0.8407357559443697, "grad_norm": 2.079932928085327, "learning_rate": 6.49826274295079e-07, "loss": 0.8714, "step": 2811 }, { "epoch": 0.8410348437266337, "grad_norm": 2.259460687637329, "learning_rate": 6.474397066205834e-07, "loss": 0.8857, "step": 2812 }, { "epoch": 0.8413339315088979, "grad_norm": 2.151576042175293, "learning_rate": 6.450572260764137e-07, "loss": 0.8455, "step": 2813 }, { "epoch": 0.841633019291162, "grad_norm": 2.717508554458618, "learning_rate": 6.42678834899761e-07, "loss": 0.8569, "step": 2814 }, { "epoch": 0.841932107073426, "grad_norm": 2.1216423511505127, "learning_rate": 6.403045353239757e-07, "loss": 0.8288, "step": 2815 }, { "epoch": 0.8422311948556901, "grad_norm": 2.016479969024658, "learning_rate": 6.379343295785673e-07, "loss": 0.8551, "step": 2816 }, { "epoch": 0.8425302826379543, "grad_norm": 2.473445177078247, "learning_rate": 6.355682198892005e-07, "loss": 0.9029, "step": 2817 }, { "epoch": 0.8428293704202183, "grad_norm": 2.130262851715088, "learning_rate": 6.33206208477693e-07, "loss": 0.8859, "step": 2818 }, { "epoch": 0.8431284582024824, "grad_norm": 2.3167624473571777, "learning_rate": 6.308482975620161e-07, "loss": 0.846, "step": 2819 }, { "epoch": 0.8434275459847466, "grad_norm": 2.489748477935791, "learning_rate": 6.284944893562872e-07, "loss": 0.8896, "step": 2820 }, { "epoch": 0.8437266337670106, "grad_norm": 2.5923454761505127, "learning_rate": 6.261447860707753e-07, "loss": 0.8688, "step": 2821 }, { "epoch": 0.8440257215492747, "grad_norm": 2.1276121139526367, "learning_rate": 6.23799189911894e-07, "loss": 0.856, "step": 2822 }, { "epoch": 0.8443248093315389, "grad_norm": 2.3296916484832764, "learning_rate": 6.214577030821967e-07, "loss": 0.8753, "step": 2823 }, { "epoch": 0.8446238971138029, "grad_norm": 2.326270580291748, "learning_rate": 6.191203277803798e-07, "loss": 0.8625, "step": 2824 }, { "epoch": 0.844922984896067, "grad_norm": 2.046274423599243, "learning_rate": 6.167870662012831e-07, "loss": 0.848, "step": 2825 }, { "epoch": 0.845222072678331, "grad_norm": 4.75659704208374, "learning_rate": 6.144579205358786e-07, "loss": 0.9437, "step": 2826 }, { "epoch": 0.8455211604605952, "grad_norm": 2.232623815536499, "learning_rate": 6.121328929712739e-07, "loss": 0.9092, "step": 2827 }, { "epoch": 0.8458202482428593, "grad_norm": 2.171138048171997, "learning_rate": 6.098119856907103e-07, "loss": 0.7842, "step": 2828 }, { "epoch": 0.8461193360251233, "grad_norm": 2.0148868560791016, "learning_rate": 6.074952008735624e-07, "loss": 0.8861, "step": 2829 }, { "epoch": 0.8464184238073875, "grad_norm": 2.032039165496826, "learning_rate": 6.051825406953316e-07, "loss": 0.9126, "step": 2830 }, { "epoch": 0.8467175115896516, "grad_norm": 2.1330437660217285, "learning_rate": 6.02874007327644e-07, "loss": 0.8873, "step": 2831 }, { "epoch": 0.8470165993719156, "grad_norm": 2.240612268447876, "learning_rate": 6.005696029382535e-07, "loss": 0.8318, "step": 2832 }, { "epoch": 0.8473156871541797, "grad_norm": 2.439371109008789, "learning_rate": 5.982693296910386e-07, "loss": 0.8906, "step": 2833 }, { "epoch": 0.8476147749364439, "grad_norm": 2.236402988433838, "learning_rate": 5.959731897459936e-07, "loss": 0.9375, "step": 2834 }, { "epoch": 0.847913862718708, "grad_norm": 2.4336323738098145, "learning_rate": 5.93681185259235e-07, "loss": 0.8861, "step": 2835 }, { "epoch": 0.848212950500972, "grad_norm": 2.013319730758667, "learning_rate": 5.91393318382995e-07, "loss": 0.8941, "step": 2836 }, { "epoch": 0.8485120382832362, "grad_norm": 2.425534248352051, "learning_rate": 5.891095912656208e-07, "loss": 0.8513, "step": 2837 }, { "epoch": 0.8488111260655002, "grad_norm": 2.1257028579711914, "learning_rate": 5.86830006051572e-07, "loss": 0.9416, "step": 2838 }, { "epoch": 0.8491102138477643, "grad_norm": 2.1715691089630127, "learning_rate": 5.845545648814188e-07, "loss": 0.8338, "step": 2839 }, { "epoch": 0.8494093016300284, "grad_norm": 2.4937121868133545, "learning_rate": 5.822832698918413e-07, "loss": 0.9481, "step": 2840 }, { "epoch": 0.8497083894122925, "grad_norm": 2.7325961589813232, "learning_rate": 5.800161232156238e-07, "loss": 0.9021, "step": 2841 }, { "epoch": 0.8500074771945566, "grad_norm": 2.1711673736572266, "learning_rate": 5.777531269816577e-07, "loss": 0.8769, "step": 2842 }, { "epoch": 0.8503065649768207, "grad_norm": 4.01958703994751, "learning_rate": 5.754942833149363e-07, "loss": 0.8573, "step": 2843 }, { "epoch": 0.8506056527590848, "grad_norm": 2.436004161834717, "learning_rate": 5.732395943365526e-07, "loss": 0.8865, "step": 2844 }, { "epoch": 0.8509047405413489, "grad_norm": 2.0554921627044678, "learning_rate": 5.709890621636993e-07, "loss": 0.8772, "step": 2845 }, { "epoch": 0.851203828323613, "grad_norm": 2.379852294921875, "learning_rate": 5.687426889096659e-07, "loss": 0.9314, "step": 2846 }, { "epoch": 0.851502916105877, "grad_norm": 2.166990041732788, "learning_rate": 5.665004766838356e-07, "loss": 0.8657, "step": 2847 }, { "epoch": 0.8518020038881412, "grad_norm": 2.680401086807251, "learning_rate": 5.642624275916852e-07, "loss": 0.8517, "step": 2848 }, { "epoch": 0.8521010916704053, "grad_norm": 2.3779635429382324, "learning_rate": 5.620285437347834e-07, "loss": 0.9088, "step": 2849 }, { "epoch": 0.8524001794526693, "grad_norm": 2.306083917617798, "learning_rate": 5.597988272107824e-07, "loss": 0.9157, "step": 2850 }, { "epoch": 0.8526992672349335, "grad_norm": 2.137026786804199, "learning_rate": 5.575732801134287e-07, "loss": 0.8692, "step": 2851 }, { "epoch": 0.8529983550171976, "grad_norm": 2.2428903579711914, "learning_rate": 5.553519045325501e-07, "loss": 0.8738, "step": 2852 }, { "epoch": 0.8532974427994616, "grad_norm": 1.9872409105300903, "learning_rate": 5.531347025540546e-07, "loss": 0.8414, "step": 2853 }, { "epoch": 0.8535965305817257, "grad_norm": 2.1094954013824463, "learning_rate": 5.509216762599339e-07, "loss": 0.889, "step": 2854 }, { "epoch": 0.8538956183639899, "grad_norm": 2.3629074096679688, "learning_rate": 5.487128277282605e-07, "loss": 0.8333, "step": 2855 }, { "epoch": 0.8541947061462539, "grad_norm": 2.7219908237457275, "learning_rate": 5.465081590331817e-07, "loss": 0.8649, "step": 2856 }, { "epoch": 0.854493793928518, "grad_norm": 2.388812303543091, "learning_rate": 5.443076722449186e-07, "loss": 0.8905, "step": 2857 }, { "epoch": 0.8547928817107822, "grad_norm": 2.5600762367248535, "learning_rate": 5.421113694297664e-07, "loss": 0.8364, "step": 2858 }, { "epoch": 0.8550919694930462, "grad_norm": 2.198455572128296, "learning_rate": 5.399192526500946e-07, "loss": 0.8618, "step": 2859 }, { "epoch": 0.8553910572753103, "grad_norm": 2.1065590381622314, "learning_rate": 5.377313239643367e-07, "loss": 0.8642, "step": 2860 }, { "epoch": 0.8556901450575743, "grad_norm": 2.1320383548736572, "learning_rate": 5.355475854269964e-07, "loss": 0.9812, "step": 2861 }, { "epoch": 0.8559892328398385, "grad_norm": 2.035574436187744, "learning_rate": 5.333680390886426e-07, "loss": 0.9105, "step": 2862 }, { "epoch": 0.8562883206221026, "grad_norm": 2.0718226432800293, "learning_rate": 5.311926869959094e-07, "loss": 0.8442, "step": 2863 }, { "epoch": 0.8565874084043666, "grad_norm": 2.03843092918396, "learning_rate": 5.290215311914881e-07, "loss": 0.8555, "step": 2864 }, { "epoch": 0.8568864961866308, "grad_norm": 2.646315813064575, "learning_rate": 5.268545737141323e-07, "loss": 0.8787, "step": 2865 }, { "epoch": 0.8571855839688949, "grad_norm": 2.284095287322998, "learning_rate": 5.246918165986537e-07, "loss": 0.8392, "step": 2866 }, { "epoch": 0.8574846717511589, "grad_norm": 2.6678733825683594, "learning_rate": 5.225332618759193e-07, "loss": 0.8968, "step": 2867 }, { "epoch": 0.857783759533423, "grad_norm": 2.2946438789367676, "learning_rate": 5.203789115728486e-07, "loss": 0.8743, "step": 2868 }, { "epoch": 0.8580828473156872, "grad_norm": 2.366504430770874, "learning_rate": 5.182287677124159e-07, "loss": 0.8676, "step": 2869 }, { "epoch": 0.8583819350979512, "grad_norm": 2.594679355621338, "learning_rate": 5.160828323136424e-07, "loss": 0.8419, "step": 2870 }, { "epoch": 0.8586810228802153, "grad_norm": 2.2412564754486084, "learning_rate": 5.139411073916001e-07, "loss": 0.8391, "step": 2871 }, { "epoch": 0.8589801106624795, "grad_norm": 1.99708890914917, "learning_rate": 5.118035949574057e-07, "loss": 0.9075, "step": 2872 }, { "epoch": 0.8592791984447435, "grad_norm": 4.057124614715576, "learning_rate": 5.096702970182204e-07, "loss": 0.8728, "step": 2873 }, { "epoch": 0.8595782862270076, "grad_norm": 2.0839643478393555, "learning_rate": 5.075412155772492e-07, "loss": 0.8949, "step": 2874 }, { "epoch": 0.8598773740092717, "grad_norm": 2.168790102005005, "learning_rate": 5.054163526337364e-07, "loss": 0.9047, "step": 2875 }, { "epoch": 0.8601764617915358, "grad_norm": 2.2221004962921143, "learning_rate": 5.032957101829661e-07, "loss": 0.9439, "step": 2876 }, { "epoch": 0.8604755495737999, "grad_norm": 2.5564372539520264, "learning_rate": 5.011792902162572e-07, "loss": 0.8869, "step": 2877 }, { "epoch": 0.860774637356064, "grad_norm": 2.3539485931396484, "learning_rate": 4.990670947209675e-07, "loss": 0.8927, "step": 2878 }, { "epoch": 0.8610737251383281, "grad_norm": 2.2060546875, "learning_rate": 4.969591256804824e-07, "loss": 0.8967, "step": 2879 }, { "epoch": 0.8613728129205922, "grad_norm": 2.5200107097625732, "learning_rate": 4.948553850742238e-07, "loss": 0.8919, "step": 2880 }, { "epoch": 0.8616719007028563, "grad_norm": 2.3758132457733154, "learning_rate": 4.927558748776412e-07, "loss": 0.8459, "step": 2881 }, { "epoch": 0.8619709884851204, "grad_norm": 2.2135157585144043, "learning_rate": 4.906605970622114e-07, "loss": 0.9193, "step": 2882 }, { "epoch": 0.8622700762673845, "grad_norm": 2.078861713409424, "learning_rate": 4.885695535954361e-07, "loss": 0.8607, "step": 2883 }, { "epoch": 0.8625691640496486, "grad_norm": 2.139439344406128, "learning_rate": 4.8648274644084e-07, "loss": 0.8551, "step": 2884 }, { "epoch": 0.8628682518319126, "grad_norm": 2.116248607635498, "learning_rate": 4.844001775579766e-07, "loss": 0.8562, "step": 2885 }, { "epoch": 0.8631673396141768, "grad_norm": 1.9701558351516724, "learning_rate": 4.8232184890241e-07, "loss": 0.8287, "step": 2886 }, { "epoch": 0.8634664273964409, "grad_norm": 2.1050848960876465, "learning_rate": 4.802477624257285e-07, "loss": 0.8422, "step": 2887 }, { "epoch": 0.8637655151787049, "grad_norm": 2.0514731407165527, "learning_rate": 4.781779200755354e-07, "loss": 0.8611, "step": 2888 }, { "epoch": 0.8640646029609691, "grad_norm": 2.298006057739258, "learning_rate": 4.7611232379545124e-07, "loss": 0.8576, "step": 2889 }, { "epoch": 0.8643636907432332, "grad_norm": 2.2259793281555176, "learning_rate": 4.740509755251038e-07, "loss": 0.9236, "step": 2890 }, { "epoch": 0.8646627785254972, "grad_norm": 2.173405170440674, "learning_rate": 4.71993877200137e-07, "loss": 0.9383, "step": 2891 }, { "epoch": 0.8649618663077613, "grad_norm": 2.6789562702178955, "learning_rate": 4.6994103075220175e-07, "loss": 0.8419, "step": 2892 }, { "epoch": 0.8652609540900255, "grad_norm": 2.4259514808654785, "learning_rate": 4.678924381089567e-07, "loss": 0.8502, "step": 2893 }, { "epoch": 0.8655600418722895, "grad_norm": 2.2012765407562256, "learning_rate": 4.658481011940663e-07, "loss": 0.9043, "step": 2894 }, { "epoch": 0.8658591296545536, "grad_norm": 2.5422286987304688, "learning_rate": 4.63808021927199e-07, "loss": 0.9245, "step": 2895 }, { "epoch": 0.8661582174368178, "grad_norm": 2.020296573638916, "learning_rate": 4.617722022240245e-07, "loss": 0.8659, "step": 2896 }, { "epoch": 0.8664573052190818, "grad_norm": 2.1232242584228516, "learning_rate": 4.597406439962138e-07, "loss": 0.9116, "step": 2897 }, { "epoch": 0.8667563930013459, "grad_norm": 2.135260820388794, "learning_rate": 4.5771334915143516e-07, "loss": 0.907, "step": 2898 }, { "epoch": 0.8670554807836099, "grad_norm": 2.2807416915893555, "learning_rate": 4.5569031959335374e-07, "loss": 0.9085, "step": 2899 }, { "epoch": 0.8673545685658741, "grad_norm": 2.207956552505493, "learning_rate": 4.536715572216299e-07, "loss": 0.8111, "step": 2900 }, { "epoch": 0.8676536563481382, "grad_norm": 2.5183634757995605, "learning_rate": 4.5165706393191676e-07, "loss": 0.8989, "step": 2901 }, { "epoch": 0.8679527441304022, "grad_norm": 2.3604602813720703, "learning_rate": 4.496468416158595e-07, "loss": 0.8244, "step": 2902 }, { "epoch": 0.8682518319126664, "grad_norm": 2.276991128921509, "learning_rate": 4.4764089216109144e-07, "loss": 0.8354, "step": 2903 }, { "epoch": 0.8685509196949305, "grad_norm": 2.172828435897827, "learning_rate": 4.456392174512347e-07, "loss": 0.9049, "step": 2904 }, { "epoch": 0.8688500074771945, "grad_norm": 2.4117698669433594, "learning_rate": 4.4364181936589536e-07, "loss": 0.8815, "step": 2905 }, { "epoch": 0.8691490952594586, "grad_norm": 1.9693514108657837, "learning_rate": 4.4164869978066684e-07, "loss": 0.8689, "step": 2906 }, { "epoch": 0.8694481830417228, "grad_norm": 2.159576416015625, "learning_rate": 4.3965986056712316e-07, "loss": 0.8624, "step": 2907 }, { "epoch": 0.8697472708239868, "grad_norm": 2.144148111343384, "learning_rate": 4.376753035928194e-07, "loss": 0.8747, "step": 2908 }, { "epoch": 0.8700463586062509, "grad_norm": 2.133045196533203, "learning_rate": 4.3569503072128703e-07, "loss": 0.8228, "step": 2909 }, { "epoch": 0.8703454463885151, "grad_norm": 2.2448630332946777, "learning_rate": 4.3371904381203976e-07, "loss": 0.791, "step": 2910 }, { "epoch": 0.8706445341707791, "grad_norm": 2.0865049362182617, "learning_rate": 4.3174734472056334e-07, "loss": 0.8544, "step": 2911 }, { "epoch": 0.8709436219530432, "grad_norm": 2.2423324584960938, "learning_rate": 4.2977993529831675e-07, "loss": 0.8619, "step": 2912 }, { "epoch": 0.8712427097353073, "grad_norm": 2.1426150798797607, "learning_rate": 4.278168173927322e-07, "loss": 0.9254, "step": 2913 }, { "epoch": 0.8715417975175714, "grad_norm": 4.742659568786621, "learning_rate": 4.258579928472106e-07, "loss": 0.889, "step": 2914 }, { "epoch": 0.8718408852998355, "grad_norm": 2.340486764907837, "learning_rate": 4.2390346350112634e-07, "loss": 0.8534, "step": 2915 }, { "epoch": 0.8721399730820996, "grad_norm": 2.4764037132263184, "learning_rate": 4.219532311898128e-07, "loss": 0.8706, "step": 2916 }, { "epoch": 0.8724390608643637, "grad_norm": 1.9425103664398193, "learning_rate": 4.200072977445735e-07, "loss": 0.8469, "step": 2917 }, { "epoch": 0.8727381486466278, "grad_norm": 2.2308945655822754, "learning_rate": 4.180656649926745e-07, "loss": 0.8868, "step": 2918 }, { "epoch": 0.8730372364288919, "grad_norm": 4.015642166137695, "learning_rate": 4.161283347573425e-07, "loss": 0.8881, "step": 2919 }, { "epoch": 0.8733363242111559, "grad_norm": 2.193537712097168, "learning_rate": 4.141953088577644e-07, "loss": 0.8858, "step": 2920 }, { "epoch": 0.8736354119934201, "grad_norm": 2.083890914916992, "learning_rate": 4.12266589109086e-07, "loss": 0.8796, "step": 2921 }, { "epoch": 0.8739344997756842, "grad_norm": 2.0323750972747803, "learning_rate": 4.103421773224081e-07, "loss": 0.8421, "step": 2922 }, { "epoch": 0.8742335875579482, "grad_norm": 2.256521224975586, "learning_rate": 4.0842207530478793e-07, "loss": 0.8157, "step": 2923 }, { "epoch": 0.8745326753402124, "grad_norm": 2.091134786605835, "learning_rate": 4.0650628485923385e-07, "loss": 0.8704, "step": 2924 }, { "epoch": 0.8748317631224765, "grad_norm": 2.3844668865203857, "learning_rate": 4.0459480778470786e-07, "loss": 0.9053, "step": 2925 }, { "epoch": 0.8751308509047405, "grad_norm": 2.123310089111328, "learning_rate": 4.026876458761192e-07, "loss": 0.8719, "step": 2926 }, { "epoch": 0.8754299386870046, "grad_norm": 2.789513349533081, "learning_rate": 4.0078480092432705e-07, "loss": 0.9695, "step": 2927 }, { "epoch": 0.8757290264692688, "grad_norm": 2.268672227859497, "learning_rate": 3.9888627471613595e-07, "loss": 0.9375, "step": 2928 }, { "epoch": 0.8760281142515328, "grad_norm": 2.2620387077331543, "learning_rate": 3.969920690342954e-07, "loss": 0.8822, "step": 2929 }, { "epoch": 0.8763272020337969, "grad_norm": 2.4726479053497314, "learning_rate": 3.9510218565749823e-07, "loss": 0.915, "step": 2930 }, { "epoch": 0.876626289816061, "grad_norm": 2.2077600955963135, "learning_rate": 3.9321662636037537e-07, "loss": 0.8405, "step": 2931 }, { "epoch": 0.8769253775983251, "grad_norm": 2.0676846504211426, "learning_rate": 3.913353929135033e-07, "loss": 0.8504, "step": 2932 }, { "epoch": 0.8772244653805892, "grad_norm": 2.172403573989868, "learning_rate": 3.8945848708339173e-07, "loss": 0.8249, "step": 2933 }, { "epoch": 0.8775235531628532, "grad_norm": 2.5080461502075195, "learning_rate": 3.8758591063248864e-07, "loss": 0.8282, "step": 2934 }, { "epoch": 0.8778226409451174, "grad_norm": 2.256213665008545, "learning_rate": 3.8571766531917466e-07, "loss": 0.8884, "step": 2935 }, { "epoch": 0.8781217287273815, "grad_norm": 2.319688081741333, "learning_rate": 3.838537528977659e-07, "loss": 0.8294, "step": 2936 }, { "epoch": 0.8784208165096455, "grad_norm": 2.651042938232422, "learning_rate": 3.8199417511851023e-07, "loss": 0.9436, "step": 2937 }, { "epoch": 0.8787199042919097, "grad_norm": 5.0999226570129395, "learning_rate": 3.8013893372758125e-07, "loss": 0.843, "step": 2938 }, { "epoch": 0.8790189920741738, "grad_norm": 2.491968870162964, "learning_rate": 3.782880304670833e-07, "loss": 0.892, "step": 2939 }, { "epoch": 0.8793180798564378, "grad_norm": 2.235929012298584, "learning_rate": 3.7644146707504826e-07, "loss": 0.9175, "step": 2940 }, { "epoch": 0.879617167638702, "grad_norm": 2.0128121376037598, "learning_rate": 3.7459924528543247e-07, "loss": 0.8631, "step": 2941 }, { "epoch": 0.8799162554209661, "grad_norm": 2.1288256645202637, "learning_rate": 3.727613668281116e-07, "loss": 0.9348, "step": 2942 }, { "epoch": 0.8802153432032301, "grad_norm": 2.23844051361084, "learning_rate": 3.709278334288874e-07, "loss": 0.8651, "step": 2943 }, { "epoch": 0.8805144309854942, "grad_norm": 2.454387664794922, "learning_rate": 3.6909864680947815e-07, "loss": 0.8766, "step": 2944 }, { "epoch": 0.8808135187677584, "grad_norm": 2.123145818710327, "learning_rate": 3.672738086875255e-07, "loss": 0.8622, "step": 2945 }, { "epoch": 0.8811126065500224, "grad_norm": 3.5420477390289307, "learning_rate": 3.6545332077658146e-07, "loss": 0.8819, "step": 2946 }, { "epoch": 0.8814116943322865, "grad_norm": 2.1183855533599854, "learning_rate": 3.63637184786117e-07, "loss": 0.9216, "step": 2947 }, { "epoch": 0.8817107821145507, "grad_norm": 2.4358959197998047, "learning_rate": 3.618254024215156e-07, "loss": 0.8831, "step": 2948 }, { "epoch": 0.8820098698968147, "grad_norm": 2.1747829914093018, "learning_rate": 3.6001797538407214e-07, "loss": 0.885, "step": 2949 }, { "epoch": 0.8823089576790788, "grad_norm": 1.9437754154205322, "learning_rate": 3.582149053709932e-07, "loss": 0.8898, "step": 2950 }, { "epoch": 0.8826080454613429, "grad_norm": 2.1242709159851074, "learning_rate": 3.564161940753924e-07, "loss": 0.8648, "step": 2951 }, { "epoch": 0.882907133243607, "grad_norm": 2.1740305423736572, "learning_rate": 3.5462184318629134e-07, "loss": 0.9246, "step": 2952 }, { "epoch": 0.8832062210258711, "grad_norm": 2.5916147232055664, "learning_rate": 3.528318543886172e-07, "loss": 0.9003, "step": 2953 }, { "epoch": 0.8835053088081352, "grad_norm": 2.0557801723480225, "learning_rate": 3.510462293632e-07, "loss": 0.8792, "step": 2954 }, { "epoch": 0.8838043965903993, "grad_norm": 2.1113929748535156, "learning_rate": 3.4926496978677393e-07, "loss": 0.9031, "step": 2955 }, { "epoch": 0.8841034843726634, "grad_norm": 2.0313100814819336, "learning_rate": 3.4748807733197223e-07, "loss": 0.839, "step": 2956 }, { "epoch": 0.8844025721549275, "grad_norm": 2.0204265117645264, "learning_rate": 3.457155536673279e-07, "loss": 0.8414, "step": 2957 }, { "epoch": 0.8847016599371915, "grad_norm": 2.171154737472534, "learning_rate": 3.439474004572724e-07, "loss": 0.905, "step": 2958 }, { "epoch": 0.8850007477194557, "grad_norm": 2.1605353355407715, "learning_rate": 3.4218361936213195e-07, "loss": 0.9076, "step": 2959 }, { "epoch": 0.8852998355017198, "grad_norm": 2.2206335067749023, "learning_rate": 3.4042421203812904e-07, "loss": 0.9016, "step": 2960 }, { "epoch": 0.8855989232839838, "grad_norm": 2.0372326374053955, "learning_rate": 3.386691801373754e-07, "loss": 0.8717, "step": 2961 }, { "epoch": 0.885898011066248, "grad_norm": 2.7249221801757812, "learning_rate": 3.369185253078794e-07, "loss": 0.8831, "step": 2962 }, { "epoch": 0.886197098848512, "grad_norm": 2.075125217437744, "learning_rate": 3.3517224919353555e-07, "loss": 0.8581, "step": 2963 }, { "epoch": 0.8864961866307761, "grad_norm": 2.3395864963531494, "learning_rate": 3.334303534341277e-07, "loss": 0.8449, "step": 2964 }, { "epoch": 0.8867952744130402, "grad_norm": 2.3829920291900635, "learning_rate": 3.3169283966532517e-07, "loss": 0.8845, "step": 2965 }, { "epoch": 0.8870943621953044, "grad_norm": 2.0599918365478516, "learning_rate": 3.2995970951868574e-07, "loss": 0.9131, "step": 2966 }, { "epoch": 0.8873934499775684, "grad_norm": 2.271725654602051, "learning_rate": 3.2823096462164915e-07, "loss": 0.8773, "step": 2967 }, { "epoch": 0.8876925377598325, "grad_norm": 2.3235039710998535, "learning_rate": 3.265066065975353e-07, "loss": 0.8829, "step": 2968 }, { "epoch": 0.8879916255420967, "grad_norm": 2.047645330429077, "learning_rate": 3.2478663706554724e-07, "loss": 0.8673, "step": 2969 }, { "epoch": 0.8882907133243607, "grad_norm": 2.1083030700683594, "learning_rate": 3.2307105764076694e-07, "loss": 0.8473, "step": 2970 }, { "epoch": 0.8885898011066248, "grad_norm": 2.319465160369873, "learning_rate": 3.213598699341547e-07, "loss": 0.9224, "step": 2971 }, { "epoch": 0.8888888888888888, "grad_norm": 2.1071813106536865, "learning_rate": 3.1965307555254343e-07, "loss": 0.8851, "step": 2972 }, { "epoch": 0.889187976671153, "grad_norm": 2.1072685718536377, "learning_rate": 3.1795067609864395e-07, "loss": 0.8546, "step": 2973 }, { "epoch": 0.8894870644534171, "grad_norm": 2.3492536544799805, "learning_rate": 3.162526731710386e-07, "loss": 0.9208, "step": 2974 }, { "epoch": 0.8897861522356811, "grad_norm": 2.1848936080932617, "learning_rate": 3.14559068364183e-07, "loss": 0.8841, "step": 2975 }, { "epoch": 0.8900852400179453, "grad_norm": 1.875930905342102, "learning_rate": 3.1286986326840076e-07, "loss": 0.8407, "step": 2976 }, { "epoch": 0.8903843278002094, "grad_norm": 2.38502836227417, "learning_rate": 3.1118505946988506e-07, "loss": 0.8107, "step": 2977 }, { "epoch": 0.8906834155824734, "grad_norm": 2.3817250728607178, "learning_rate": 3.095046585506967e-07, "loss": 0.9121, "step": 2978 }, { "epoch": 0.8909825033647375, "grad_norm": 2.5586397647857666, "learning_rate": 3.0782866208876163e-07, "loss": 0.8955, "step": 2979 }, { "epoch": 0.8912815911470017, "grad_norm": 2.3530220985412598, "learning_rate": 3.0615707165786937e-07, "loss": 0.8861, "step": 2980 }, { "epoch": 0.8915806789292657, "grad_norm": 2.422950267791748, "learning_rate": 3.044898888276726e-07, "loss": 0.8339, "step": 2981 }, { "epoch": 0.8918797667115298, "grad_norm": 2.5520896911621094, "learning_rate": 3.0282711516368524e-07, "loss": 0.8827, "step": 2982 }, { "epoch": 0.892178854493794, "grad_norm": 1.9717011451721191, "learning_rate": 3.011687522272816e-07, "loss": 0.9277, "step": 2983 }, { "epoch": 0.892477942276058, "grad_norm": 2.230227470397949, "learning_rate": 2.995148015756927e-07, "loss": 0.8914, "step": 2984 }, { "epoch": 0.8927770300583221, "grad_norm": 2.13443922996521, "learning_rate": 2.978652647620073e-07, "loss": 0.8269, "step": 2985 }, { "epoch": 0.8930761178405862, "grad_norm": 2.1153147220611572, "learning_rate": 2.962201433351697e-07, "loss": 0.8786, "step": 2986 }, { "epoch": 0.8933752056228503, "grad_norm": 2.1392316818237305, "learning_rate": 2.9457943883997696e-07, "loss": 0.8784, "step": 2987 }, { "epoch": 0.8936742934051144, "grad_norm": 2.3595526218414307, "learning_rate": 2.929431528170801e-07, "loss": 0.8652, "step": 2988 }, { "epoch": 0.8939733811873785, "grad_norm": 2.24009370803833, "learning_rate": 2.91311286802981e-07, "loss": 0.8466, "step": 2989 }, { "epoch": 0.8942724689696426, "grad_norm": 2.346776008605957, "learning_rate": 2.8968384233002855e-07, "loss": 0.8821, "step": 2990 }, { "epoch": 0.8945715567519067, "grad_norm": 1.892758846282959, "learning_rate": 2.8806082092642186e-07, "loss": 0.7848, "step": 2991 }, { "epoch": 0.8948706445341708, "grad_norm": 2.147080421447754, "learning_rate": 2.8644222411620793e-07, "loss": 0.9254, "step": 2992 }, { "epoch": 0.8951697323164348, "grad_norm": 2.356502056121826, "learning_rate": 2.848280534192777e-07, "loss": 0.874, "step": 2993 }, { "epoch": 0.895468820098699, "grad_norm": 1.928270697593689, "learning_rate": 2.832183103513636e-07, "loss": 0.8858, "step": 2994 }, { "epoch": 0.895767907880963, "grad_norm": 2.3531572818756104, "learning_rate": 2.816129964240433e-07, "loss": 0.8603, "step": 2995 }, { "epoch": 0.8960669956632271, "grad_norm": 2.3523988723754883, "learning_rate": 2.800121131447353e-07, "loss": 0.8868, "step": 2996 }, { "epoch": 0.8963660834454913, "grad_norm": 2.175767660140991, "learning_rate": 2.784156620166983e-07, "loss": 0.826, "step": 2997 }, { "epoch": 0.8966651712277554, "grad_norm": 2.1794474124908447, "learning_rate": 2.7682364453902487e-07, "loss": 0.8065, "step": 2998 }, { "epoch": 0.8969642590100194, "grad_norm": 2.3044538497924805, "learning_rate": 2.7523606220664854e-07, "loss": 0.828, "step": 2999 }, { "epoch": 0.8972633467922836, "grad_norm": 2.036825180053711, "learning_rate": 2.736529165103385e-07, "loss": 0.892, "step": 3000 }, { "epoch": 0.8975624345745477, "grad_norm": 2.101569652557373, "learning_rate": 2.7207420893669455e-07, "loss": 0.8831, "step": 3001 }, { "epoch": 0.8978615223568117, "grad_norm": 2.1484687328338623, "learning_rate": 2.704999409681508e-07, "loss": 0.883, "step": 3002 }, { "epoch": 0.8981606101390758, "grad_norm": 2.6164681911468506, "learning_rate": 2.6893011408297196e-07, "loss": 0.8895, "step": 3003 }, { "epoch": 0.89845969792134, "grad_norm": 2.3043830394744873, "learning_rate": 2.6736472975525564e-07, "loss": 0.9056, "step": 3004 }, { "epoch": 0.898758785703604, "grad_norm": 3.3272082805633545, "learning_rate": 2.65803789454922e-07, "loss": 0.8908, "step": 3005 }, { "epoch": 0.8990578734858681, "grad_norm": 2.664940357208252, "learning_rate": 2.6424729464772316e-07, "loss": 0.9109, "step": 3006 }, { "epoch": 0.8993569612681322, "grad_norm": 2.7418742179870605, "learning_rate": 2.626952467952343e-07, "loss": 0.9101, "step": 3007 }, { "epoch": 0.8996560490503963, "grad_norm": 2.0758965015411377, "learning_rate": 2.611476473548552e-07, "loss": 0.9327, "step": 3008 }, { "epoch": 0.8999551368326604, "grad_norm": 2.5876264572143555, "learning_rate": 2.596044977798101e-07, "loss": 0.8502, "step": 3009 }, { "epoch": 0.9002542246149244, "grad_norm": 1.9810014963150024, "learning_rate": 2.5806579951914214e-07, "loss": 0.8886, "step": 3010 }, { "epoch": 0.9005533123971886, "grad_norm": 6.275445938110352, "learning_rate": 2.5653155401771655e-07, "loss": 0.9099, "step": 3011 }, { "epoch": 0.9008524001794527, "grad_norm": 2.1561450958251953, "learning_rate": 2.550017627162166e-07, "loss": 0.8039, "step": 3012 }, { "epoch": 0.9011514879617167, "grad_norm": 2.364910125732422, "learning_rate": 2.534764270511431e-07, "loss": 0.8203, "step": 3013 }, { "epoch": 0.9014505757439809, "grad_norm": 2.3724257946014404, "learning_rate": 2.5195554845481306e-07, "loss": 0.8224, "step": 3014 }, { "epoch": 0.901749663526245, "grad_norm": 2.410355806350708, "learning_rate": 2.5043912835535867e-07, "loss": 0.8474, "step": 3015 }, { "epoch": 0.902048751308509, "grad_norm": 2.050471782684326, "learning_rate": 2.4892716817672304e-07, "loss": 0.9108, "step": 3016 }, { "epoch": 0.9023478390907731, "grad_norm": 2.17974591255188, "learning_rate": 2.474196693386649e-07, "loss": 0.8572, "step": 3017 }, { "epoch": 0.9026469268730373, "grad_norm": 2.5696873664855957, "learning_rate": 2.45916633256752e-07, "loss": 0.8351, "step": 3018 }, { "epoch": 0.9029460146553013, "grad_norm": 2.2004027366638184, "learning_rate": 2.4441806134236137e-07, "loss": 0.8779, "step": 3019 }, { "epoch": 0.9032451024375654, "grad_norm": 2.191739320755005, "learning_rate": 2.4292395500267796e-07, "loss": 0.9152, "step": 3020 }, { "epoch": 0.9035441902198296, "grad_norm": 2.2246885299682617, "learning_rate": 2.4143431564069344e-07, "loss": 0.8526, "step": 3021 }, { "epoch": 0.9038432780020936, "grad_norm": 2.4087464809417725, "learning_rate": 2.39949144655206e-07, "loss": 0.9128, "step": 3022 }, { "epoch": 0.9041423657843577, "grad_norm": 2.1534271240234375, "learning_rate": 2.38468443440818e-07, "loss": 0.8357, "step": 3023 }, { "epoch": 0.9044414535666218, "grad_norm": 2.4536380767822266, "learning_rate": 2.3699221338793155e-07, "loss": 0.862, "step": 3024 }, { "epoch": 0.9047405413488859, "grad_norm": 2.1809070110321045, "learning_rate": 2.355204558827534e-07, "loss": 0.8531, "step": 3025 }, { "epoch": 0.90503962913115, "grad_norm": 1.9712307453155518, "learning_rate": 2.340531723072914e-07, "loss": 0.8783, "step": 3026 }, { "epoch": 0.905338716913414, "grad_norm": 2.34480881690979, "learning_rate": 2.3259036403934843e-07, "loss": 0.8921, "step": 3027 }, { "epoch": 0.9056378046956782, "grad_norm": 2.6001765727996826, "learning_rate": 2.3113203245252734e-07, "loss": 0.919, "step": 3028 }, { "epoch": 0.9059368924779423, "grad_norm": 2.0059497356414795, "learning_rate": 2.2967817891622724e-07, "loss": 0.884, "step": 3029 }, { "epoch": 0.9062359802602064, "grad_norm": 2.799612283706665, "learning_rate": 2.2822880479564325e-07, "loss": 0.8686, "step": 3030 }, { "epoch": 0.9065350680424704, "grad_norm": 2.0856387615203857, "learning_rate": 2.2678391145176115e-07, "loss": 0.8503, "step": 3031 }, { "epoch": 0.9068341558247346, "grad_norm": 2.0391693115234375, "learning_rate": 2.2534350024136232e-07, "loss": 0.7968, "step": 3032 }, { "epoch": 0.9071332436069987, "grad_norm": 2.1602671146392822, "learning_rate": 2.2390757251701756e-07, "loss": 0.8915, "step": 3033 }, { "epoch": 0.9074323313892627, "grad_norm": 2.58516263961792, "learning_rate": 2.224761296270883e-07, "loss": 0.8649, "step": 3034 }, { "epoch": 0.9077314191715269, "grad_norm": 2.4945385456085205, "learning_rate": 2.2104917291572435e-07, "loss": 0.8291, "step": 3035 }, { "epoch": 0.908030506953791, "grad_norm": 2.350116729736328, "learning_rate": 2.196267037228633e-07, "loss": 0.833, "step": 3036 }, { "epoch": 0.908329594736055, "grad_norm": 1.9256478548049927, "learning_rate": 2.1820872338422838e-07, "loss": 0.8816, "step": 3037 }, { "epoch": 0.9086286825183191, "grad_norm": 2.4590001106262207, "learning_rate": 2.1679523323132835e-07, "loss": 0.8846, "step": 3038 }, { "epoch": 0.9089277703005832, "grad_norm": 2.239070415496826, "learning_rate": 2.153862345914548e-07, "loss": 0.8277, "step": 3039 }, { "epoch": 0.9092268580828473, "grad_norm": 1.9271856546401978, "learning_rate": 2.139817287876822e-07, "loss": 0.8955, "step": 3040 }, { "epoch": 0.9095259458651114, "grad_norm": 2.2852203845977783, "learning_rate": 2.125817171388672e-07, "loss": 0.8704, "step": 3041 }, { "epoch": 0.9098250336473755, "grad_norm": 2.42573881149292, "learning_rate": 2.111862009596427e-07, "loss": 0.8654, "step": 3042 }, { "epoch": 0.9101241214296396, "grad_norm": 2.1434457302093506, "learning_rate": 2.097951815604249e-07, "loss": 0.8584, "step": 3043 }, { "epoch": 0.9104232092119037, "grad_norm": 1.9392791986465454, "learning_rate": 2.0840866024740502e-07, "loss": 0.8923, "step": 3044 }, { "epoch": 0.9107222969941677, "grad_norm": 2.2277088165283203, "learning_rate": 2.070266383225511e-07, "loss": 0.8629, "step": 3045 }, { "epoch": 0.9110213847764319, "grad_norm": 2.0353071689605713, "learning_rate": 2.0564911708360447e-07, "loss": 0.8734, "step": 3046 }, { "epoch": 0.911320472558696, "grad_norm": 2.1576738357543945, "learning_rate": 2.0427609782408265e-07, "loss": 0.9037, "step": 3047 }, { "epoch": 0.91161956034096, "grad_norm": 2.2746191024780273, "learning_rate": 2.029075818332754e-07, "loss": 0.9984, "step": 3048 }, { "epoch": 0.9119186481232242, "grad_norm": 2.5469157695770264, "learning_rate": 2.0154357039624317e-07, "loss": 0.8573, "step": 3049 }, { "epoch": 0.9122177359054883, "grad_norm": 2.3097524642944336, "learning_rate": 2.0018406479381525e-07, "loss": 0.9365, "step": 3050 }, { "epoch": 0.9125168236877523, "grad_norm": 2.197268486022949, "learning_rate": 1.9882906630259158e-07, "loss": 0.8515, "step": 3051 }, { "epoch": 0.9128159114700164, "grad_norm": 2.122631311416626, "learning_rate": 1.9747857619494105e-07, "loss": 0.8634, "step": 3052 }, { "epoch": 0.9131149992522806, "grad_norm": 2.227604389190674, "learning_rate": 1.961325957389959e-07, "loss": 0.9197, "step": 3053 }, { "epoch": 0.9134140870345446, "grad_norm": 2.354135274887085, "learning_rate": 1.9479112619865513e-07, "loss": 0.8778, "step": 3054 }, { "epoch": 0.9137131748168087, "grad_norm": 2.8159165382385254, "learning_rate": 1.934541688335828e-07, "loss": 0.8818, "step": 3055 }, { "epoch": 0.9140122625990729, "grad_norm": 2.0981738567352295, "learning_rate": 1.9212172489920632e-07, "loss": 0.8947, "step": 3056 }, { "epoch": 0.9143113503813369, "grad_norm": 2.213423013687134, "learning_rate": 1.9079379564671207e-07, "loss": 0.8706, "step": 3057 }, { "epoch": 0.914610438163601, "grad_norm": 2.3026528358459473, "learning_rate": 1.8947038232304981e-07, "loss": 0.951, "step": 3058 }, { "epoch": 0.9149095259458652, "grad_norm": 2.3802030086517334, "learning_rate": 1.8815148617092772e-07, "loss": 0.8482, "step": 3059 }, { "epoch": 0.9152086137281292, "grad_norm": 2.0675158500671387, "learning_rate": 1.8683710842881174e-07, "loss": 0.8993, "step": 3060 }, { "epoch": 0.9155077015103933, "grad_norm": 2.3052334785461426, "learning_rate": 1.8552725033092635e-07, "loss": 0.8587, "step": 3061 }, { "epoch": 0.9158067892926574, "grad_norm": 1.8757573366165161, "learning_rate": 1.8422191310725147e-07, "loss": 0.8243, "step": 3062 }, { "epoch": 0.9161058770749215, "grad_norm": 3.4307289123535156, "learning_rate": 1.8292109798352054e-07, "loss": 0.8587, "step": 3063 }, { "epoch": 0.9164049648571856, "grad_norm": 2.1494503021240234, "learning_rate": 1.816248061812226e-07, "loss": 0.8867, "step": 3064 }, { "epoch": 0.9167040526394497, "grad_norm": 2.195208787918091, "learning_rate": 1.8033303891759835e-07, "loss": 0.8731, "step": 3065 }, { "epoch": 0.9170031404217138, "grad_norm": 1.9997055530548096, "learning_rate": 1.7904579740563921e-07, "loss": 0.817, "step": 3066 }, { "epoch": 0.9173022282039779, "grad_norm": 2.1471199989318848, "learning_rate": 1.7776308285408826e-07, "loss": 0.8012, "step": 3067 }, { "epoch": 0.917601315986242, "grad_norm": 2.096287727355957, "learning_rate": 1.7648489646743648e-07, "loss": 0.8595, "step": 3068 }, { "epoch": 0.917900403768506, "grad_norm": 2.173039197921753, "learning_rate": 1.752112394459232e-07, "loss": 0.9354, "step": 3069 }, { "epoch": 0.9181994915507702, "grad_norm": 1.762874960899353, "learning_rate": 1.7394211298553508e-07, "loss": 0.8141, "step": 3070 }, { "epoch": 0.9184985793330342, "grad_norm": 2.2695858478546143, "learning_rate": 1.726775182780044e-07, "loss": 0.9024, "step": 3071 }, { "epoch": 0.9187976671152983, "grad_norm": 2.0510189533233643, "learning_rate": 1.7141745651080565e-07, "loss": 0.8649, "step": 3072 }, { "epoch": 0.9190967548975625, "grad_norm": 2.170546054840088, "learning_rate": 1.7016192886716132e-07, "loss": 0.8914, "step": 3073 }, { "epoch": 0.9193958426798265, "grad_norm": 2.138303756713867, "learning_rate": 1.689109365260333e-07, "loss": 0.8363, "step": 3074 }, { "epoch": 0.9196949304620906, "grad_norm": 1.9595030546188354, "learning_rate": 1.676644806621247e-07, "loss": 0.885, "step": 3075 }, { "epoch": 0.9199940182443547, "grad_norm": 2.0969228744506836, "learning_rate": 1.664225624458793e-07, "loss": 0.7819, "step": 3076 }, { "epoch": 0.9202931060266188, "grad_norm": 2.252119541168213, "learning_rate": 1.651851830434803e-07, "loss": 0.9165, "step": 3077 }, { "epoch": 0.9205921938088829, "grad_norm": 2.1225643157958984, "learning_rate": 1.6395234361684943e-07, "loss": 0.8525, "step": 3078 }, { "epoch": 0.920891281591147, "grad_norm": 2.2565529346466064, "learning_rate": 1.6272404532364337e-07, "loss": 0.8503, "step": 3079 }, { "epoch": 0.9211903693734111, "grad_norm": 2.354475498199463, "learning_rate": 1.615002893172557e-07, "loss": 0.864, "step": 3080 }, { "epoch": 0.9214894571556752, "grad_norm": 2.4039580821990967, "learning_rate": 1.6028107674681547e-07, "loss": 0.9398, "step": 3081 }, { "epoch": 0.9217885449379393, "grad_norm": 2.1877338886260986, "learning_rate": 1.5906640875718525e-07, "loss": 0.8869, "step": 3082 }, { "epoch": 0.9220876327202033, "grad_norm": 2.081979751586914, "learning_rate": 1.5785628648895767e-07, "loss": 0.8504, "step": 3083 }, { "epoch": 0.9223867205024675, "grad_norm": 2.0050604343414307, "learning_rate": 1.5665071107845987e-07, "loss": 0.8094, "step": 3084 }, { "epoch": 0.9226858082847316, "grad_norm": 2.274786949157715, "learning_rate": 1.5544968365774792e-07, "loss": 0.905, "step": 3085 }, { "epoch": 0.9229848960669956, "grad_norm": 2.213257312774658, "learning_rate": 1.542532053546081e-07, "loss": 0.8517, "step": 3086 }, { "epoch": 0.9232839838492598, "grad_norm": 2.173444986343384, "learning_rate": 1.5306127729255382e-07, "loss": 0.8496, "step": 3087 }, { "epoch": 0.9235830716315239, "grad_norm": 2.43509840965271, "learning_rate": 1.5187390059082706e-07, "loss": 0.8414, "step": 3088 }, { "epoch": 0.9238821594137879, "grad_norm": 2.2792136669158936, "learning_rate": 1.5069107636439484e-07, "loss": 0.8528, "step": 3089 }, { "epoch": 0.924181247196052, "grad_norm": 2.0251007080078125, "learning_rate": 1.4951280572394977e-07, "loss": 0.8929, "step": 3090 }, { "epoch": 0.9244803349783162, "grad_norm": 2.352944850921631, "learning_rate": 1.483390897759096e-07, "loss": 0.9345, "step": 3091 }, { "epoch": 0.9247794227605802, "grad_norm": 2.184062957763672, "learning_rate": 1.4716992962241272e-07, "loss": 0.8505, "step": 3092 }, { "epoch": 0.9250785105428443, "grad_norm": 2.255427360534668, "learning_rate": 1.4600532636132147e-07, "loss": 0.8272, "step": 3093 }, { "epoch": 0.9253775983251085, "grad_norm": 2.3590776920318604, "learning_rate": 1.4484528108621942e-07, "loss": 0.839, "step": 3094 }, { "epoch": 0.9256766861073725, "grad_norm": 2.5181002616882324, "learning_rate": 1.4368979488640855e-07, "loss": 0.8629, "step": 3095 }, { "epoch": 0.9259757738896366, "grad_norm": 2.410407304763794, "learning_rate": 1.4253886884691148e-07, "loss": 0.9012, "step": 3096 }, { "epoch": 0.9262748616719007, "grad_norm": 1.9667772054672241, "learning_rate": 1.4139250404846704e-07, "loss": 0.8571, "step": 3097 }, { "epoch": 0.9265739494541648, "grad_norm": 2.3049490451812744, "learning_rate": 1.4025070156753196e-07, "loss": 0.8829, "step": 3098 }, { "epoch": 0.9268730372364289, "grad_norm": 2.4515442848205566, "learning_rate": 1.391134624762791e-07, "loss": 0.898, "step": 3099 }, { "epoch": 0.927172125018693, "grad_norm": 2.2549936771392822, "learning_rate": 1.3798078784259594e-07, "loss": 0.8761, "step": 3100 }, { "epoch": 0.9274712128009571, "grad_norm": 2.9878904819488525, "learning_rate": 1.368526787300839e-07, "loss": 0.854, "step": 3101 }, { "epoch": 0.9277703005832212, "grad_norm": 2.534759759902954, "learning_rate": 1.3572913619805616e-07, "loss": 0.9293, "step": 3102 }, { "epoch": 0.9280693883654852, "grad_norm": 2.327846050262451, "learning_rate": 1.3461016130153993e-07, "loss": 0.9124, "step": 3103 }, { "epoch": 0.9283684761477493, "grad_norm": 2.0768375396728516, "learning_rate": 1.3349575509127244e-07, "loss": 0.8296, "step": 3104 }, { "epoch": 0.9286675639300135, "grad_norm": 2.0695509910583496, "learning_rate": 1.3238591861369943e-07, "loss": 0.826, "step": 3105 }, { "epoch": 0.9289666517122775, "grad_norm": 2.0236189365386963, "learning_rate": 1.3128065291097724e-07, "loss": 0.7957, "step": 3106 }, { "epoch": 0.9292657394945416, "grad_norm": 2.3005383014678955, "learning_rate": 1.3017995902097013e-07, "loss": 0.9105, "step": 3107 }, { "epoch": 0.9295648272768058, "grad_norm": 2.805222749710083, "learning_rate": 1.290838379772491e-07, "loss": 0.8906, "step": 3108 }, { "epoch": 0.9298639150590698, "grad_norm": 2.5039520263671875, "learning_rate": 1.2799229080909026e-07, "loss": 0.8456, "step": 3109 }, { "epoch": 0.9301630028413339, "grad_norm": 1.918710708618164, "learning_rate": 1.2690531854147537e-07, "loss": 0.8912, "step": 3110 }, { "epoch": 0.9304620906235981, "grad_norm": 2.0471928119659424, "learning_rate": 1.2582292219509184e-07, "loss": 0.8629, "step": 3111 }, { "epoch": 0.9307611784058621, "grad_norm": 2.3498172760009766, "learning_rate": 1.2474510278632779e-07, "loss": 0.8958, "step": 3112 }, { "epoch": 0.9310602661881262, "grad_norm": 2.304612874984741, "learning_rate": 1.2367186132727415e-07, "loss": 0.8424, "step": 3113 }, { "epoch": 0.9313593539703903, "grad_norm": 2.3293817043304443, "learning_rate": 1.2260319882572425e-07, "loss": 0.8736, "step": 3114 }, { "epoch": 0.9316584417526544, "grad_norm": 2.1251609325408936, "learning_rate": 1.2153911628517036e-07, "loss": 0.8849, "step": 3115 }, { "epoch": 0.9319575295349185, "grad_norm": 2.4893758296966553, "learning_rate": 1.2047961470480485e-07, "loss": 0.8765, "step": 3116 }, { "epoch": 0.9322566173171826, "grad_norm": 2.8706533908843994, "learning_rate": 1.1942469507951803e-07, "loss": 0.9192, "step": 3117 }, { "epoch": 0.9325557050994467, "grad_norm": 3.054084300994873, "learning_rate": 1.1837435839989808e-07, "loss": 0.8608, "step": 3118 }, { "epoch": 0.9328547928817108, "grad_norm": 2.2071659564971924, "learning_rate": 1.1732860565222936e-07, "loss": 0.8838, "step": 3119 }, { "epoch": 0.9331538806639749, "grad_norm": 2.7025411128997803, "learning_rate": 1.162874378184925e-07, "loss": 0.8791, "step": 3120 }, { "epoch": 0.9334529684462389, "grad_norm": 2.2749032974243164, "learning_rate": 1.1525085587636209e-07, "loss": 0.9004, "step": 3121 }, { "epoch": 0.9337520562285031, "grad_norm": 2.0823028087615967, "learning_rate": 1.1421886079920619e-07, "loss": 0.8398, "step": 3122 }, { "epoch": 0.9340511440107672, "grad_norm": 2.5491020679473877, "learning_rate": 1.1319145355608684e-07, "loss": 0.8428, "step": 3123 }, { "epoch": 0.9343502317930312, "grad_norm": 2.091749429702759, "learning_rate": 1.1216863511175736e-07, "loss": 0.8829, "step": 3124 }, { "epoch": 0.9346493195752954, "grad_norm": 1.9744535684585571, "learning_rate": 1.111504064266622e-07, "loss": 0.8626, "step": 3125 }, { "epoch": 0.9349484073575595, "grad_norm": 2.148308038711548, "learning_rate": 1.1013676845693544e-07, "loss": 0.8978, "step": 3126 }, { "epoch": 0.9352474951398235, "grad_norm": 2.270429849624634, "learning_rate": 1.0912772215440182e-07, "loss": 0.9031, "step": 3127 }, { "epoch": 0.9355465829220876, "grad_norm": 2.096259593963623, "learning_rate": 1.0812326846657228e-07, "loss": 0.8672, "step": 3128 }, { "epoch": 0.9358456707043518, "grad_norm": 2.2482872009277344, "learning_rate": 1.0712340833664737e-07, "loss": 0.9353, "step": 3129 }, { "epoch": 0.9361447584866158, "grad_norm": 2.049384832382202, "learning_rate": 1.0612814270351324e-07, "loss": 0.8707, "step": 3130 }, { "epoch": 0.9364438462688799, "grad_norm": 2.4463841915130615, "learning_rate": 1.0513747250174123e-07, "loss": 0.8912, "step": 3131 }, { "epoch": 0.9367429340511441, "grad_norm": 2.4143035411834717, "learning_rate": 1.0415139866158774e-07, "loss": 0.8558, "step": 3132 }, { "epoch": 0.9370420218334081, "grad_norm": 2.3216850757598877, "learning_rate": 1.0316992210899435e-07, "loss": 0.8732, "step": 3133 }, { "epoch": 0.9373411096156722, "grad_norm": 2.182877779006958, "learning_rate": 1.0219304376558492e-07, "loss": 0.8637, "step": 3134 }, { "epoch": 0.9376401973979362, "grad_norm": 2.114579677581787, "learning_rate": 1.0122076454866347e-07, "loss": 0.8972, "step": 3135 }, { "epoch": 0.9379392851802004, "grad_norm": 2.8472323417663574, "learning_rate": 1.0025308537121859e-07, "loss": 0.9353, "step": 3136 }, { "epoch": 0.9382383729624645, "grad_norm": 2.277369499206543, "learning_rate": 9.929000714191838e-08, "loss": 0.7837, "step": 3137 }, { "epoch": 0.9385374607447285, "grad_norm": 2.1701290607452393, "learning_rate": 9.833153076510893e-08, "loss": 0.8199, "step": 3138 }, { "epoch": 0.9388365485269927, "grad_norm": 2.087203025817871, "learning_rate": 9.737765714081748e-08, "loss": 0.8451, "step": 3139 }, { "epoch": 0.9391356363092568, "grad_norm": 2.452965021133423, "learning_rate": 9.642838716474645e-08, "loss": 0.8746, "step": 3140 }, { "epoch": 0.9394347240915208, "grad_norm": 2.205457925796509, "learning_rate": 9.548372172827946e-08, "loss": 0.9043, "step": 3141 }, { "epoch": 0.9397338118737849, "grad_norm": 2.230736255645752, "learning_rate": 9.454366171847196e-08, "loss": 0.8123, "step": 3142 }, { "epoch": 0.9400328996560491, "grad_norm": 2.3335249423980713, "learning_rate": 9.360820801805726e-08, "loss": 0.9415, "step": 3143 }, { "epoch": 0.9403319874383131, "grad_norm": 2.261219024658203, "learning_rate": 9.267736150544271e-08, "loss": 0.862, "step": 3144 }, { "epoch": 0.9406310752205772, "grad_norm": 2.2626776695251465, "learning_rate": 9.175112305470913e-08, "loss": 0.9013, "step": 3145 }, { "epoch": 0.9409301630028414, "grad_norm": 2.2166965007781982, "learning_rate": 9.082949353561187e-08, "loss": 0.8734, "step": 3146 }, { "epoch": 0.9412292507851054, "grad_norm": 2.2106010913848877, "learning_rate": 8.991247381357593e-08, "loss": 0.8722, "step": 3147 }, { "epoch": 0.9415283385673695, "grad_norm": 2.184335947036743, "learning_rate": 8.900006474969913e-08, "loss": 0.8511, "step": 3148 }, { "epoch": 0.9418274263496336, "grad_norm": 2.23926043510437, "learning_rate": 8.809226720075059e-08, "loss": 0.9227, "step": 3149 }, { "epoch": 0.9421265141318977, "grad_norm": 2.0699987411499023, "learning_rate": 8.718908201916676e-08, "loss": 0.8577, "step": 3150 }, { "epoch": 0.9424256019141618, "grad_norm": 2.1489603519439697, "learning_rate": 8.629051005305478e-08, "loss": 0.8725, "step": 3151 }, { "epoch": 0.9427246896964259, "grad_norm": 2.144702911376953, "learning_rate": 8.539655214618969e-08, "loss": 0.8633, "step": 3152 }, { "epoch": 0.94302377747869, "grad_norm": 2.1517012119293213, "learning_rate": 8.450720913801336e-08, "loss": 0.8525, "step": 3153 }, { "epoch": 0.9433228652609541, "grad_norm": 2.7166996002197266, "learning_rate": 8.362248186363441e-08, "loss": 0.8661, "step": 3154 }, { "epoch": 0.9436219530432182, "grad_norm": 2.5095560550689697, "learning_rate": 8.274237115382777e-08, "loss": 0.8899, "step": 3155 }, { "epoch": 0.9439210408254822, "grad_norm": 1.9585520029067993, "learning_rate": 8.186687783503289e-08, "loss": 0.8388, "step": 3156 }, { "epoch": 0.9442201286077464, "grad_norm": 1.9611221551895142, "learning_rate": 8.09960027293516e-08, "loss": 0.865, "step": 3157 }, { "epoch": 0.9445192163900105, "grad_norm": 2.1907198429107666, "learning_rate": 8.012974665455308e-08, "loss": 0.8459, "step": 3158 }, { "epoch": 0.9448183041722745, "grad_norm": 1.9760518074035645, "learning_rate": 7.926811042406557e-08, "loss": 0.8583, "step": 3159 }, { "epoch": 0.9451173919545387, "grad_norm": 2.5390756130218506, "learning_rate": 7.841109484698184e-08, "loss": 0.8441, "step": 3160 }, { "epoch": 0.9454164797368028, "grad_norm": 2.3796050548553467, "learning_rate": 7.755870072805316e-08, "loss": 0.9199, "step": 3161 }, { "epoch": 0.9457155675190668, "grad_norm": 2.2138900756835938, "learning_rate": 7.67109288676926e-08, "loss": 0.8615, "step": 3162 }, { "epoch": 0.9460146553013309, "grad_norm": 2.243096113204956, "learning_rate": 7.586778006197337e-08, "loss": 0.8794, "step": 3163 }, { "epoch": 0.9463137430835951, "grad_norm": 2.174543857574463, "learning_rate": 7.50292551026277e-08, "loss": 0.8506, "step": 3164 }, { "epoch": 0.9466128308658591, "grad_norm": 2.1588523387908936, "learning_rate": 7.419535477704354e-08, "loss": 0.893, "step": 3165 }, { "epoch": 0.9469119186481232, "grad_norm": 2.700003147125244, "learning_rate": 7.336607986826839e-08, "loss": 0.8948, "step": 3166 }, { "epoch": 0.9472110064303874, "grad_norm": 2.2450125217437744, "learning_rate": 7.254143115500711e-08, "loss": 0.9378, "step": 3167 }, { "epoch": 0.9475100942126514, "grad_norm": 2.1712613105773926, "learning_rate": 7.17214094116181e-08, "loss": 0.9032, "step": 3168 }, { "epoch": 0.9478091819949155, "grad_norm": 2.061169385910034, "learning_rate": 7.090601540811648e-08, "loss": 0.8385, "step": 3169 }, { "epoch": 0.9481082697771797, "grad_norm": 2.3107004165649414, "learning_rate": 7.009524991017091e-08, "loss": 0.8972, "step": 3170 }, { "epoch": 0.9484073575594437, "grad_norm": 2.130436420440674, "learning_rate": 6.928911367910573e-08, "loss": 0.8358, "step": 3171 }, { "epoch": 0.9487064453417078, "grad_norm": 1.8804676532745361, "learning_rate": 6.848760747189598e-08, "loss": 0.8303, "step": 3172 }, { "epoch": 0.9490055331239718, "grad_norm": 2.337007522583008, "learning_rate": 6.769073204117016e-08, "loss": 0.8639, "step": 3173 }, { "epoch": 0.949304620906236, "grad_norm": 2.2401368618011475, "learning_rate": 6.689848813520805e-08, "loss": 0.8593, "step": 3174 }, { "epoch": 0.9496037086885001, "grad_norm": 2.1149163246154785, "learning_rate": 6.611087649794124e-08, "loss": 0.9015, "step": 3175 }, { "epoch": 0.9499027964707641, "grad_norm": 1.9981048107147217, "learning_rate": 6.532789786895033e-08, "loss": 0.8349, "step": 3176 }, { "epoch": 0.9502018842530283, "grad_norm": 2.1590218544006348, "learning_rate": 6.454955298346555e-08, "loss": 0.8133, "step": 3177 }, { "epoch": 0.9505009720352924, "grad_norm": 2.1845970153808594, "learning_rate": 6.377584257236724e-08, "loss": 0.8245, "step": 3178 }, { "epoch": 0.9508000598175564, "grad_norm": 2.182326555252075, "learning_rate": 6.300676736218258e-08, "loss": 0.8393, "step": 3179 }, { "epoch": 0.9510991475998205, "grad_norm": 2.257434129714966, "learning_rate": 6.224232807508667e-08, "loss": 0.8856, "step": 3180 }, { "epoch": 0.9513982353820847, "grad_norm": 2.1183433532714844, "learning_rate": 6.148252542890198e-08, "loss": 0.8869, "step": 3181 }, { "epoch": 0.9516973231643487, "grad_norm": 2.2285399436950684, "learning_rate": 6.072736013709557e-08, "loss": 0.8094, "step": 3182 }, { "epoch": 0.9519964109466128, "grad_norm": 3.4585511684417725, "learning_rate": 5.997683290878131e-08, "loss": 0.8941, "step": 3183 }, { "epoch": 0.952295498728877, "grad_norm": 1.9991477727890015, "learning_rate": 5.923094444871713e-08, "loss": 0.8084, "step": 3184 }, { "epoch": 0.952594586511141, "grad_norm": 2.074406385421753, "learning_rate": 5.848969545730554e-08, "loss": 0.9, "step": 3185 }, { "epoch": 0.9528936742934051, "grad_norm": 2.9580602645874023, "learning_rate": 5.775308663059309e-08, "loss": 0.9007, "step": 3186 }, { "epoch": 0.9531927620756692, "grad_norm": 2.017364263534546, "learning_rate": 5.702111866026705e-08, "loss": 0.8059, "step": 3187 }, { "epoch": 0.9534918498579333, "grad_norm": 2.1277098655700684, "learning_rate": 5.629379223365872e-08, "loss": 0.8706, "step": 3188 }, { "epoch": 0.9537909376401974, "grad_norm": 2.059542179107666, "learning_rate": 5.557110803374066e-08, "loss": 0.9001, "step": 3189 }, { "epoch": 0.9540900254224615, "grad_norm": 2.0158653259277344, "learning_rate": 5.485306673912616e-08, "loss": 0.8406, "step": 3190 }, { "epoch": 0.9543891132047256, "grad_norm": 1.9723950624465942, "learning_rate": 5.413966902406753e-08, "loss": 0.8643, "step": 3191 }, { "epoch": 0.9546882009869897, "grad_norm": 3.0057272911071777, "learning_rate": 5.343091555845781e-08, "loss": 0.8974, "step": 3192 }, { "epoch": 0.9549872887692538, "grad_norm": 2.209505558013916, "learning_rate": 5.272680700783073e-08, "loss": 0.8664, "step": 3193 }, { "epoch": 0.9552863765515178, "grad_norm": 2.107618808746338, "learning_rate": 5.2027344033354077e-08, "loss": 0.8866, "step": 3194 }, { "epoch": 0.955585464333782, "grad_norm": 2.018967390060425, "learning_rate": 5.1332527291837465e-08, "loss": 0.919, "step": 3195 }, { "epoch": 0.9558845521160461, "grad_norm": 2.184631586074829, "learning_rate": 5.06423574357251e-08, "loss": 0.8654, "step": 3196 }, { "epoch": 0.9561836398983101, "grad_norm": 2.344130039215088, "learning_rate": 4.9956835113099676e-08, "loss": 0.8607, "step": 3197 }, { "epoch": 0.9564827276805743, "grad_norm": 2.4150784015655518, "learning_rate": 4.927596096767795e-08, "loss": 0.8443, "step": 3198 }, { "epoch": 0.9567818154628384, "grad_norm": 2.020977020263672, "learning_rate": 4.8599735638812373e-08, "loss": 0.8749, "step": 3199 }, { "epoch": 0.9570809032451024, "grad_norm": 2.0778136253356934, "learning_rate": 4.7928159761490566e-08, "loss": 0.813, "step": 3200 }, { "epoch": 0.9573799910273665, "grad_norm": 2.6476290225982666, "learning_rate": 4.7261233966334196e-08, "loss": 0.8716, "step": 3201 }, { "epoch": 0.9576790788096307, "grad_norm": 2.015989065170288, "learning_rate": 4.659895887959787e-08, "loss": 0.9, "step": 3202 }, { "epoch": 0.9579781665918947, "grad_norm": 2.1230900287628174, "learning_rate": 4.594133512317023e-08, "loss": 0.8899, "step": 3203 }, { "epoch": 0.9582772543741588, "grad_norm": 2.5393989086151123, "learning_rate": 4.528836331457065e-08, "loss": 0.94, "step": 3204 }, { "epoch": 0.958576342156423, "grad_norm": 2.0683248043060303, "learning_rate": 4.4640044066951994e-08, "loss": 0.8335, "step": 3205 }, { "epoch": 0.958875429938687, "grad_norm": 2.0269734859466553, "learning_rate": 4.399637798909673e-08, "loss": 0.8482, "step": 3206 }, { "epoch": 0.9591745177209511, "grad_norm": 2.2300469875335693, "learning_rate": 4.335736568541915e-08, "loss": 0.8583, "step": 3207 }, { "epoch": 0.9594736055032151, "grad_norm": 2.7580301761627197, "learning_rate": 4.272300775596205e-08, "loss": 0.8787, "step": 3208 }, { "epoch": 0.9597726932854793, "grad_norm": 2.472670316696167, "learning_rate": 4.2093304796399504e-08, "loss": 0.8016, "step": 3209 }, { "epoch": 0.9600717810677434, "grad_norm": 1.9419887065887451, "learning_rate": 4.146825739803295e-08, "loss": 0.8602, "step": 3210 }, { "epoch": 0.9603708688500074, "grad_norm": 2.8324737548828125, "learning_rate": 4.084786614779346e-08, "loss": 0.8474, "step": 3211 }, { "epoch": 0.9606699566322716, "grad_norm": 1.9186546802520752, "learning_rate": 4.023213162823947e-08, "loss": 0.8736, "step": 3212 }, { "epoch": 0.9609690444145357, "grad_norm": 2.470005512237549, "learning_rate": 3.962105441755515e-08, "loss": 0.8415, "step": 3213 }, { "epoch": 0.9612681321967997, "grad_norm": 1.9938799142837524, "learning_rate": 3.9014635089554274e-08, "loss": 0.8663, "step": 3214 }, { "epoch": 0.9615672199790638, "grad_norm": 2.137734889984131, "learning_rate": 3.841287421367412e-08, "loss": 0.8875, "step": 3215 }, { "epoch": 0.961866307761328, "grad_norm": 2.36425518989563, "learning_rate": 3.781577235497935e-08, "loss": 0.9099, "step": 3216 }, { "epoch": 0.962165395543592, "grad_norm": 2.015622138977051, "learning_rate": 3.7223330074158126e-08, "loss": 0.8516, "step": 3217 }, { "epoch": 0.9624644833258561, "grad_norm": 2.941516399383545, "learning_rate": 3.663554792752544e-08, "loss": 0.913, "step": 3218 }, { "epoch": 0.9627635711081203, "grad_norm": 2.2365057468414307, "learning_rate": 3.605242646701812e-08, "loss": 0.8452, "step": 3219 }, { "epoch": 0.9630626588903843, "grad_norm": 2.125317335128784, "learning_rate": 3.547396624019817e-08, "loss": 0.8905, "step": 3220 }, { "epoch": 0.9633617466726484, "grad_norm": 2.34462571144104, "learning_rate": 3.490016779024885e-08, "loss": 0.9041, "step": 3221 }, { "epoch": 0.9636608344549125, "grad_norm": 2.0733444690704346, "learning_rate": 3.4331031655976955e-08, "loss": 0.8914, "step": 3222 }, { "epoch": 0.9639599222371766, "grad_norm": 2.1476993560791016, "learning_rate": 3.3766558371812754e-08, "loss": 0.9052, "step": 3223 }, { "epoch": 0.9642590100194407, "grad_norm": 2.0451300144195557, "learning_rate": 3.320674846780503e-08, "loss": 0.8362, "step": 3224 }, { "epoch": 0.9645580978017048, "grad_norm": 2.096048355102539, "learning_rate": 3.265160246962607e-08, "loss": 0.8508, "step": 3225 }, { "epoch": 0.9648571855839689, "grad_norm": 2.395622968673706, "learning_rate": 3.210112089856721e-08, "loss": 0.8917, "step": 3226 }, { "epoch": 0.965156273366233, "grad_norm": 2.2442121505737305, "learning_rate": 3.155530427153997e-08, "loss": 0.8722, "step": 3227 }, { "epoch": 0.9654553611484971, "grad_norm": 2.2104945182800293, "learning_rate": 3.1014153101076026e-08, "loss": 0.8423, "step": 3228 }, { "epoch": 0.9657544489307612, "grad_norm": 2.2236835956573486, "learning_rate": 3.0477667895326133e-08, "loss": 0.8588, "step": 3229 }, { "epoch": 0.9660535367130253, "grad_norm": 2.039926528930664, "learning_rate": 2.994584915805898e-08, "loss": 0.8648, "step": 3230 }, { "epoch": 0.9663526244952894, "grad_norm": 2.332326889038086, "learning_rate": 2.9418697388661766e-08, "loss": 0.8737, "step": 3231 }, { "epoch": 0.9666517122775534, "grad_norm": 2.3232364654541016, "learning_rate": 2.889621308213908e-08, "loss": 0.8522, "step": 3232 }, { "epoch": 0.9669508000598176, "grad_norm": 2.167475700378418, "learning_rate": 2.8378396729113466e-08, "loss": 0.8963, "step": 3233 }, { "epoch": 0.9672498878420817, "grad_norm": 2.342928886413574, "learning_rate": 2.7865248815822087e-08, "loss": 0.8424, "step": 3234 }, { "epoch": 0.9675489756243457, "grad_norm": 2.205280065536499, "learning_rate": 2.7356769824121166e-08, "loss": 0.8615, "step": 3235 }, { "epoch": 0.9678480634066099, "grad_norm": 2.075317144393921, "learning_rate": 2.6852960231480985e-08, "loss": 0.81, "step": 3236 }, { "epoch": 0.968147151188874, "grad_norm": 2.279663562774658, "learning_rate": 2.635382051098756e-08, "loss": 0.9396, "step": 3237 }, { "epoch": 0.968446238971138, "grad_norm": 2.3573215007781982, "learning_rate": 2.585935113134208e-08, "loss": 0.8941, "step": 3238 }, { "epoch": 0.9687453267534021, "grad_norm": 2.175955057144165, "learning_rate": 2.5369552556859243e-08, "loss": 0.9078, "step": 3239 }, { "epoch": 0.9690444145356663, "grad_norm": 2.253535747528076, "learning_rate": 2.4884425247468924e-08, "loss": 0.8542, "step": 3240 }, { "epoch": 0.9693435023179303, "grad_norm": 1.9835529327392578, "learning_rate": 2.44039696587145e-08, "loss": 0.8612, "step": 3241 }, { "epoch": 0.9696425901001944, "grad_norm": 2.2870469093322754, "learning_rate": 2.392818624175175e-08, "loss": 0.7867, "step": 3242 }, { "epoch": 0.9699416778824586, "grad_norm": 2.7104461193084717, "learning_rate": 2.345707544334941e-08, "loss": 0.9091, "step": 3243 }, { "epoch": 0.9702407656647226, "grad_norm": 2.0905916690826416, "learning_rate": 2.2990637705889717e-08, "loss": 0.8745, "step": 3244 }, { "epoch": 0.9705398534469867, "grad_norm": 2.2265257835388184, "learning_rate": 2.2528873467365098e-08, "loss": 0.9318, "step": 3245 }, { "epoch": 0.9708389412292507, "grad_norm": 2.2952864170074463, "learning_rate": 2.2071783161379812e-08, "loss": 0.8689, "step": 3246 }, { "epoch": 0.9711380290115149, "grad_norm": 3.5819203853607178, "learning_rate": 2.1619367217150522e-08, "loss": 0.8751, "step": 3247 }, { "epoch": 0.971437116793779, "grad_norm": 2.2800533771514893, "learning_rate": 2.1171626059503514e-08, "loss": 0.9078, "step": 3248 }, { "epoch": 0.971736204576043, "grad_norm": 1.9460597038269043, "learning_rate": 2.0728560108875807e-08, "loss": 0.817, "step": 3249 }, { "epoch": 0.9720352923583072, "grad_norm": 2.279193162918091, "learning_rate": 2.0290169781313483e-08, "loss": 0.8571, "step": 3250 }, { "epoch": 0.9723343801405713, "grad_norm": 2.4967823028564453, "learning_rate": 1.985645548847337e-08, "loss": 0.8939, "step": 3251 }, { "epoch": 0.9726334679228353, "grad_norm": 2.2954893112182617, "learning_rate": 1.9427417637619685e-08, "loss": 0.8802, "step": 3252 }, { "epoch": 0.9729325557050994, "grad_norm": 2.1584811210632324, "learning_rate": 1.9003056631627935e-08, "loss": 0.8801, "step": 3253 }, { "epoch": 0.9732316434873636, "grad_norm": 2.1160054206848145, "learning_rate": 1.8583372868979933e-08, "loss": 0.8819, "step": 3254 }, { "epoch": 0.9735307312696276, "grad_norm": 2.0774731636047363, "learning_rate": 1.8168366743765432e-08, "loss": 0.8657, "step": 3255 }, { "epoch": 0.9738298190518917, "grad_norm": 2.046501398086548, "learning_rate": 1.775803864568326e-08, "loss": 0.8849, "step": 3256 }, { "epoch": 0.9741289068341559, "grad_norm": 2.0868680477142334, "learning_rate": 1.7352388960038548e-08, "loss": 0.8667, "step": 3257 }, { "epoch": 0.9744279946164199, "grad_norm": 2.187516689300537, "learning_rate": 1.695141806774325e-08, "loss": 0.8095, "step": 3258 }, { "epoch": 0.974727082398684, "grad_norm": 2.1774449348449707, "learning_rate": 1.6555126345316197e-08, "loss": 0.8315, "step": 3259 }, { "epoch": 0.9750261701809481, "grad_norm": 2.25331974029541, "learning_rate": 1.6163514164882486e-08, "loss": 0.8704, "step": 3260 }, { "epoch": 0.9753252579632122, "grad_norm": 2.5866637229919434, "learning_rate": 1.577658189417186e-08, "loss": 0.846, "step": 3261 }, { "epoch": 0.9756243457454763, "grad_norm": 2.187948703765869, "learning_rate": 1.539432989652201e-08, "loss": 0.8565, "step": 3262 }, { "epoch": 0.9759234335277404, "grad_norm": 2.208529472351074, "learning_rate": 1.5016758530873033e-08, "loss": 0.8382, "step": 3263 }, { "epoch": 0.9762225213100045, "grad_norm": 2.3600127696990967, "learning_rate": 1.4643868151771323e-08, "loss": 0.9202, "step": 3264 }, { "epoch": 0.9765216090922686, "grad_norm": 2.182473659515381, "learning_rate": 1.4275659109367346e-08, "loss": 0.838, "step": 3265 }, { "epoch": 0.9768206968745327, "grad_norm": 2.2560346126556396, "learning_rate": 1.3912131749416746e-08, "loss": 0.9284, "step": 3266 }, { "epoch": 0.9771197846567967, "grad_norm": 2.134216070175171, "learning_rate": 1.3553286413277022e-08, "loss": 0.8703, "step": 3267 }, { "epoch": 0.9774188724390609, "grad_norm": 4.3539042472839355, "learning_rate": 1.3199123437910855e-08, "loss": 0.9051, "step": 3268 }, { "epoch": 0.977717960221325, "grad_norm": 2.304198980331421, "learning_rate": 1.2849643155882773e-08, "loss": 0.8729, "step": 3269 }, { "epoch": 0.978017048003589, "grad_norm": 2.2716002464294434, "learning_rate": 1.2504845895361384e-08, "loss": 0.9185, "step": 3270 }, { "epoch": 0.9783161357858532, "grad_norm": 2.0267622470855713, "learning_rate": 1.2164731980117694e-08, "loss": 0.9101, "step": 3271 }, { "epoch": 0.9786152235681173, "grad_norm": 2.2934250831604004, "learning_rate": 1.1829301729524567e-08, "loss": 0.8632, "step": 3272 }, { "epoch": 0.9789143113503813, "grad_norm": 2.3421738147735596, "learning_rate": 1.1498555458555604e-08, "loss": 0.8805, "step": 3273 }, { "epoch": 0.9792133991326454, "grad_norm": 2.1171538829803467, "learning_rate": 1.1172493477789037e-08, "loss": 0.8767, "step": 3274 }, { "epoch": 0.9795124869149096, "grad_norm": 2.153130054473877, "learning_rate": 1.085111609340217e-08, "loss": 0.8558, "step": 3275 }, { "epoch": 0.9798115746971736, "grad_norm": 2.2506422996520996, "learning_rate": 1.0534423607173604e-08, "loss": 0.8322, "step": 3276 }, { "epoch": 0.9801106624794377, "grad_norm": 2.4605002403259277, "learning_rate": 1.022241631648324e-08, "loss": 0.8454, "step": 3277 }, { "epoch": 0.9804097502617019, "grad_norm": 2.1318957805633545, "learning_rate": 9.915094514311719e-09, "loss": 0.8565, "step": 3278 }, { "epoch": 0.9807088380439659, "grad_norm": 2.369452714920044, "learning_rate": 9.612458489239308e-09, "loss": 0.8862, "step": 3279 }, { "epoch": 0.98100792582623, "grad_norm": 2.514833450317383, "learning_rate": 9.314508525446464e-09, "loss": 0.9544, "step": 3280 }, { "epoch": 0.981307013608494, "grad_norm": 2.2565410137176514, "learning_rate": 9.021244902713833e-09, "loss": 0.8913, "step": 3281 }, { "epoch": 0.9816061013907582, "grad_norm": 2.0018913745880127, "learning_rate": 8.732667896421131e-09, "loss": 0.8773, "step": 3282 }, { "epoch": 0.9819051891730223, "grad_norm": 2.0850577354431152, "learning_rate": 8.448777777546601e-09, "loss": 0.8949, "step": 3283 }, { "epoch": 0.9822042769552863, "grad_norm": 2.3372585773468018, "learning_rate": 8.169574812668668e-09, "loss": 0.8483, "step": 3284 }, { "epoch": 0.9825033647375505, "grad_norm": 1.9928077459335327, "learning_rate": 7.895059263963168e-09, "loss": 0.9348, "step": 3285 }, { "epoch": 0.9828024525198146, "grad_norm": 2.2848310470581055, "learning_rate": 7.625231389205567e-09, "loss": 0.8666, "step": 3286 }, { "epoch": 0.9831015403020786, "grad_norm": 1.9941561222076416, "learning_rate": 7.360091441768746e-09, "loss": 0.8288, "step": 3287 }, { "epoch": 0.9834006280843428, "grad_norm": 2.3706982135772705, "learning_rate": 7.099639670623548e-09, "loss": 0.9208, "step": 3288 }, { "epoch": 0.9836997158666069, "grad_norm": 2.658630847930908, "learning_rate": 6.8438763203393375e-09, "loss": 0.8032, "step": 3289 }, { "epoch": 0.9839988036488709, "grad_norm": 1.986126184463501, "learning_rate": 6.59280163108178e-09, "loss": 0.8724, "step": 3290 }, { "epoch": 0.984297891431135, "grad_norm": 2.062664031982422, "learning_rate": 6.346415838614506e-09, "loss": 0.8366, "step": 3291 }, { "epoch": 0.9845969792133992, "grad_norm": 1.888380765914917, "learning_rate": 6.104719174298557e-09, "loss": 0.9002, "step": 3292 }, { "epoch": 0.9848960669956632, "grad_norm": 3.4002184867858887, "learning_rate": 5.867711865090719e-09, "loss": 0.8661, "step": 3293 }, { "epoch": 0.9851951547779273, "grad_norm": 2.4136531352996826, "learning_rate": 5.635394133545191e-09, "loss": 0.8746, "step": 3294 }, { "epoch": 0.9854942425601915, "grad_norm": 2.5578107833862305, "learning_rate": 5.40776619781247e-09, "loss": 0.9392, "step": 3295 }, { "epoch": 0.9857933303424555, "grad_norm": 2.1634738445281982, "learning_rate": 5.184828271639353e-09, "loss": 0.8397, "step": 3296 }, { "epoch": 0.9860924181247196, "grad_norm": 2.191891670227051, "learning_rate": 4.966580564368384e-09, "loss": 0.8547, "step": 3297 }, { "epoch": 0.9863915059069837, "grad_norm": 2.167189121246338, "learning_rate": 4.7530232809378515e-09, "loss": 0.8501, "step": 3298 }, { "epoch": 0.9866905936892478, "grad_norm": 2.3678951263427734, "learning_rate": 4.54415662188179e-09, "loss": 0.9479, "step": 3299 }, { "epoch": 0.9869896814715119, "grad_norm": 2.134716033935547, "learning_rate": 4.339980783329423e-09, "loss": 0.8629, "step": 3300 }, { "epoch": 0.987288769253776, "grad_norm": 1.9066070318222046, "learning_rate": 4.140495957006274e-09, "loss": 0.8793, "step": 3301 }, { "epoch": 0.9875878570360401, "grad_norm": 2.0578203201293945, "learning_rate": 3.945702330230839e-09, "loss": 0.8509, "step": 3302 }, { "epoch": 0.9878869448183042, "grad_norm": 2.1208982467651367, "learning_rate": 3.755600085918465e-09, "loss": 0.8685, "step": 3303 }, { "epoch": 0.9881860326005683, "grad_norm": 1.9957811832427979, "learning_rate": 3.5701894025791383e-09, "loss": 0.8566, "step": 3304 }, { "epoch": 0.9884851203828323, "grad_norm": 2.5351362228393555, "learning_rate": 3.3894704543152578e-09, "loss": 0.9037, "step": 3305 }, { "epoch": 0.9887842081650965, "grad_norm": 2.1809449195861816, "learning_rate": 3.213443410826078e-09, "loss": 0.8229, "step": 3306 }, { "epoch": 0.9890832959473606, "grad_norm": 2.5041513442993164, "learning_rate": 3.0421084374038234e-09, "loss": 0.9496, "step": 3307 }, { "epoch": 0.9893823837296246, "grad_norm": 2.3867907524108887, "learning_rate": 2.875465694935353e-09, "loss": 0.8871, "step": 3308 }, { "epoch": 0.9896814715118888, "grad_norm": 2.1462981700897217, "learning_rate": 2.7135153399004967e-09, "loss": 0.8073, "step": 3309 }, { "epoch": 0.9899805592941529, "grad_norm": 2.2028048038482666, "learning_rate": 2.5562575243737176e-09, "loss": 0.9162, "step": 3310 }, { "epoch": 0.9902796470764169, "grad_norm": 2.1644530296325684, "learning_rate": 2.4036923960230053e-09, "loss": 0.892, "step": 3311 }, { "epoch": 0.990578734858681, "grad_norm": 2.3289718627929688, "learning_rate": 2.255820098109873e-09, "loss": 0.8553, "step": 3312 }, { "epoch": 0.9908778226409451, "grad_norm": 2.3625288009643555, "learning_rate": 2.112640769488805e-09, "loss": 0.8388, "step": 3313 }, { "epoch": 0.9911769104232092, "grad_norm": 2.3640336990356445, "learning_rate": 1.974154544607254e-09, "loss": 0.9169, "step": 3314 }, { "epoch": 0.9914759982054733, "grad_norm": 2.1946287155151367, "learning_rate": 1.8403615535067531e-09, "loss": 0.8475, "step": 3315 }, { "epoch": 0.9917750859877374, "grad_norm": 1.7235758304595947, "learning_rate": 1.71126192182125e-09, "loss": 0.8747, "step": 3316 }, { "epoch": 0.9920741737700015, "grad_norm": 2.4838268756866455, "learning_rate": 1.586855770777107e-09, "loss": 0.8965, "step": 3317 }, { "epoch": 0.9923732615522656, "grad_norm": 2.2230114936828613, "learning_rate": 1.4671432171947663e-09, "loss": 0.8951, "step": 3318 }, { "epoch": 0.9926723493345296, "grad_norm": 2.82185959815979, "learning_rate": 1.3521243734854195e-09, "loss": 0.8694, "step": 3319 }, { "epoch": 0.9929714371167938, "grad_norm": 2.4217398166656494, "learning_rate": 1.2417993476543377e-09, "loss": 0.8902, "step": 3320 }, { "epoch": 0.9932705248990579, "grad_norm": 2.3155174255371094, "learning_rate": 1.136168243298097e-09, "loss": 0.8946, "step": 3321 }, { "epoch": 0.9935696126813219, "grad_norm": 2.05985951423645, "learning_rate": 1.0352311596067976e-09, "loss": 0.8661, "step": 3322 }, { "epoch": 0.9938687004635861, "grad_norm": 2.117481231689453, "learning_rate": 9.389881913618448e-10, "loss": 0.9423, "step": 3323 }, { "epoch": 0.9941677882458502, "grad_norm": 2.3174641132354736, "learning_rate": 8.474394289376131e-10, "loss": 0.8454, "step": 3324 }, { "epoch": 0.9944668760281142, "grad_norm": 2.2463605403900146, "learning_rate": 7.605849582986713e-10, "loss": 0.8676, "step": 3325 }, { "epoch": 0.9947659638103783, "grad_norm": 2.367037773132324, "learning_rate": 6.784248610042232e-10, "loss": 0.8872, "step": 3326 }, { "epoch": 0.9950650515926425, "grad_norm": 2.2137022018432617, "learning_rate": 6.009592142036669e-10, "loss": 0.8525, "step": 3327 }, { "epoch": 0.9953641393749065, "grad_norm": 2.406859874725342, "learning_rate": 5.281880906382597e-10, "loss": 0.9492, "step": 3328 }, { "epoch": 0.9956632271571706, "grad_norm": 2.1485042572021484, "learning_rate": 4.6011155864111865e-10, "loss": 0.8922, "step": 3329 }, { "epoch": 0.9959623149394348, "grad_norm": 2.139531373977661, "learning_rate": 3.967296821383304e-10, "loss": 0.9225, "step": 3330 }, { "epoch": 0.9962614027216988, "grad_norm": 2.1426920890808105, "learning_rate": 3.380425206461757e-10, "loss": 0.819, "step": 3331 }, { "epoch": 0.9965604905039629, "grad_norm": 2.1145718097686768, "learning_rate": 2.8405012927223975e-10, "loss": 0.9141, "step": 3332 }, { "epoch": 0.996859578286227, "grad_norm": 2.5318174362182617, "learning_rate": 2.3475255871707737e-10, "loss": 0.9157, "step": 3333 }, { "epoch": 0.9971586660684911, "grad_norm": 2.0791304111480713, "learning_rate": 1.901498552714376e-10, "loss": 0.8596, "step": 3334 }, { "epoch": 0.9974577538507552, "grad_norm": 2.4466896057128906, "learning_rate": 1.5024206081848401e-10, "loss": 0.9098, "step": 3335 }, { "epoch": 0.9977568416330193, "grad_norm": 2.1766715049743652, "learning_rate": 1.1502921283212953e-10, "loss": 0.8288, "step": 3336 }, { "epoch": 0.9980559294152834, "grad_norm": 2.363459825515747, "learning_rate": 8.451134437814646e-11, "loss": 0.8494, "step": 3337 }, { "epoch": 0.9983550171975475, "grad_norm": 2.2856664657592773, "learning_rate": 5.86884841130564e-11, "loss": 0.8695, "step": 3338 }, { "epoch": 0.9986541049798116, "grad_norm": 2.4433987140655518, "learning_rate": 3.7560656284685305e-11, "loss": 0.8352, "step": 3339 }, { "epoch": 0.9989531927620756, "grad_norm": 2.2234396934509277, "learning_rate": 2.1127880733273764e-11, "loss": 0.8998, "step": 3340 }, { "epoch": 0.9992522805443398, "grad_norm": 2.3582518100738525, "learning_rate": 9.390172888701366e-12, "loss": 0.8689, "step": 3341 }, { "epoch": 0.9995513683266039, "grad_norm": 2.0257132053375244, "learning_rate": 2.347543773262295e-12, "loss": 0.8228, "step": 3342 }, { "epoch": 0.9998504561088679, "grad_norm": 2.217982769012451, "learning_rate": 0.0, "loss": 0.8942, "step": 3343 }, { "epoch": 0.9998504561088679, "step": 3343, "total_flos": 3.98916672639866e+19, "train_loss": 0.0894987233163638, "train_runtime": 1485.7442, "train_samples_per_second": 576.074, "train_steps_per_second": 2.25 } ], "logging_steps": 1.0, "max_steps": 3343, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 300, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 3.98916672639866e+19, "train_batch_size": 2, "trial_name": null, "trial_params": null }