{ "best_metric": null, "best_model_checkpoint": null, "epoch": 0.7265216736983586, "eval_steps": 500, "global_step": 17500, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 4.151552421133477e-05, "grad_norm": 10.459966659545898, "learning_rate": 1.3831258644536654e-08, "loss": 0.9989, "step": 1 }, { "epoch": 8.303104842266955e-05, "grad_norm": 7.495995998382568, "learning_rate": 2.766251728907331e-08, "loss": 1.0182, "step": 2 }, { "epoch": 0.00012454657263400432, "grad_norm": 7.587473392486572, "learning_rate": 4.1493775933609963e-08, "loss": 0.9817, "step": 3 }, { "epoch": 0.0001660620968453391, "grad_norm": 11.5823392868042, "learning_rate": 5.532503457814662e-08, "loss": 1.1408, "step": 4 }, { "epoch": 0.00020757762105667388, "grad_norm": 9.059793472290039, "learning_rate": 6.915629322268327e-08, "loss": 0.9812, "step": 5 }, { "epoch": 0.00024909314526800865, "grad_norm": 7.0719313621521, "learning_rate": 8.298755186721993e-08, "loss": 0.7679, "step": 6 }, { "epoch": 0.0002906086694793434, "grad_norm": 6.830554962158203, "learning_rate": 9.681881051175658e-08, "loss": 0.6992, "step": 7 }, { "epoch": 0.0003321241936906782, "grad_norm": 7.079957008361816, "learning_rate": 1.1065006915629324e-07, "loss": 0.9608, "step": 8 }, { "epoch": 0.000373639717902013, "grad_norm": 6.279632568359375, "learning_rate": 1.2448132780082988e-07, "loss": 0.9148, "step": 9 }, { "epoch": 0.00041515524211334777, "grad_norm": 10.110675811767578, "learning_rate": 1.3831258644536654e-07, "loss": 0.8335, "step": 10 }, { "epoch": 0.00045667076632468253, "grad_norm": 7.7138519287109375, "learning_rate": 1.5214384508990319e-07, "loss": 0.9647, "step": 11 }, { "epoch": 0.0004981862905360173, "grad_norm": 6.553215980529785, "learning_rate": 1.6597510373443985e-07, "loss": 1.1426, "step": 12 }, { "epoch": 0.0005397018147473521, "grad_norm": 8.081541061401367, "learning_rate": 1.798063623789765e-07, "loss": 0.9249, "step": 13 }, { "epoch": 0.0005812173389586868, "grad_norm": 11.821453094482422, "learning_rate": 1.9363762102351316e-07, "loss": 0.9532, "step": 14 }, { "epoch": 0.0006227328631700217, "grad_norm": 7.968032360076904, "learning_rate": 2.074688796680498e-07, "loss": 0.937, "step": 15 }, { "epoch": 0.0006642483873813564, "grad_norm": 13.394933700561523, "learning_rate": 2.2130013831258647e-07, "loss": 1.0037, "step": 16 }, { "epoch": 0.0007057639115926912, "grad_norm": 11.775710105895996, "learning_rate": 2.351313969571231e-07, "loss": 0.7922, "step": 17 }, { "epoch": 0.000747279435804026, "grad_norm": 8.327851295471191, "learning_rate": 2.4896265560165975e-07, "loss": 1.0704, "step": 18 }, { "epoch": 0.0007887949600153607, "grad_norm": 9.59449577331543, "learning_rate": 2.627939142461964e-07, "loss": 0.9043, "step": 19 }, { "epoch": 0.0008303104842266955, "grad_norm": 8.607476234436035, "learning_rate": 2.766251728907331e-07, "loss": 1.0481, "step": 20 }, { "epoch": 0.0008718260084380302, "grad_norm": 8.110937118530273, "learning_rate": 2.9045643153526976e-07, "loss": 0.8726, "step": 21 }, { "epoch": 0.0009133415326493651, "grad_norm": 9.439693450927734, "learning_rate": 3.0428769017980637e-07, "loss": 0.9999, "step": 22 }, { "epoch": 0.0009548570568606999, "grad_norm": 6.296890735626221, "learning_rate": 3.1811894882434304e-07, "loss": 1.0071, "step": 23 }, { "epoch": 0.0009963725810720346, "grad_norm": 8.541991233825684, "learning_rate": 3.319502074688797e-07, "loss": 0.7093, "step": 24 }, { "epoch": 0.0010378881052833693, "grad_norm": 8.28548812866211, "learning_rate": 3.457814661134164e-07, "loss": 0.9939, "step": 25 }, { "epoch": 0.0010794036294947042, "grad_norm": 9.304506301879883, "learning_rate": 3.59612724757953e-07, "loss": 0.8888, "step": 26 }, { "epoch": 0.001120919153706039, "grad_norm": 8.388057708740234, "learning_rate": 3.7344398340248966e-07, "loss": 0.9351, "step": 27 }, { "epoch": 0.0011624346779173737, "grad_norm": 10.595147132873535, "learning_rate": 3.872752420470263e-07, "loss": 0.9096, "step": 28 }, { "epoch": 0.0012039502021287086, "grad_norm": 5.902129650115967, "learning_rate": 4.01106500691563e-07, "loss": 0.8922, "step": 29 }, { "epoch": 0.0012454657263400433, "grad_norm": 7.745982646942139, "learning_rate": 4.149377593360996e-07, "loss": 0.9292, "step": 30 }, { "epoch": 0.001286981250551378, "grad_norm": 7.590599536895752, "learning_rate": 4.287690179806363e-07, "loss": 1.0559, "step": 31 }, { "epoch": 0.0013284967747627127, "grad_norm": 7.6458024978637695, "learning_rate": 4.4260027662517294e-07, "loss": 0.8797, "step": 32 }, { "epoch": 0.0013700122989740477, "grad_norm": 6.893218040466309, "learning_rate": 4.5643153526970956e-07, "loss": 0.7902, "step": 33 }, { "epoch": 0.0014115278231853824, "grad_norm": 8.018237113952637, "learning_rate": 4.702627939142462e-07, "loss": 1.0042, "step": 34 }, { "epoch": 0.001453043347396717, "grad_norm": 6.685311317443848, "learning_rate": 4.84094052558783e-07, "loss": 0.7523, "step": 35 }, { "epoch": 0.001494558871608052, "grad_norm": 6.356356143951416, "learning_rate": 4.979253112033195e-07, "loss": 0.7494, "step": 36 }, { "epoch": 0.0015360743958193867, "grad_norm": 6.726548194885254, "learning_rate": 5.117565698478562e-07, "loss": 0.9493, "step": 37 }, { "epoch": 0.0015775899200307214, "grad_norm": 7.512827396392822, "learning_rate": 5.255878284923928e-07, "loss": 0.972, "step": 38 }, { "epoch": 0.0016191054442420564, "grad_norm": 7.606923580169678, "learning_rate": 5.394190871369295e-07, "loss": 0.8421, "step": 39 }, { "epoch": 0.001660620968453391, "grad_norm": 6.021209239959717, "learning_rate": 5.532503457814662e-07, "loss": 0.8168, "step": 40 }, { "epoch": 0.0017021364926647258, "grad_norm": 8.463929176330566, "learning_rate": 5.670816044260028e-07, "loss": 0.9642, "step": 41 }, { "epoch": 0.0017436520168760605, "grad_norm": 6.313794136047363, "learning_rate": 5.809128630705395e-07, "loss": 0.7497, "step": 42 }, { "epoch": 0.0017851675410873954, "grad_norm": 7.121931076049805, "learning_rate": 5.947441217150761e-07, "loss": 0.8268, "step": 43 }, { "epoch": 0.0018266830652987301, "grad_norm": 9.545415878295898, "learning_rate": 6.085753803596127e-07, "loss": 0.862, "step": 44 }, { "epoch": 0.0018681985895100648, "grad_norm": 5.1650390625, "learning_rate": 6.224066390041494e-07, "loss": 0.8234, "step": 45 }, { "epoch": 0.0019097141137213998, "grad_norm": 3.3615853786468506, "learning_rate": 6.362378976486861e-07, "loss": 0.7902, "step": 46 }, { "epoch": 0.0019512296379327345, "grad_norm": 5.279922008514404, "learning_rate": 6.500691562932227e-07, "loss": 0.8008, "step": 47 }, { "epoch": 0.001992745162144069, "grad_norm": 5.0936994552612305, "learning_rate": 6.639004149377594e-07, "loss": 0.6372, "step": 48 }, { "epoch": 0.002034260686355404, "grad_norm": 6.204946041107178, "learning_rate": 6.77731673582296e-07, "loss": 0.7849, "step": 49 }, { "epoch": 0.0020757762105667386, "grad_norm": 4.517428874969482, "learning_rate": 6.915629322268328e-07, "loss": 0.6864, "step": 50 }, { "epoch": 0.0021172917347780738, "grad_norm": 4.420582294464111, "learning_rate": 7.053941908713693e-07, "loss": 0.7688, "step": 51 }, { "epoch": 0.0021588072589894085, "grad_norm": 4.804570198059082, "learning_rate": 7.19225449515906e-07, "loss": 0.7715, "step": 52 }, { "epoch": 0.002200322783200743, "grad_norm": 4.341346740722656, "learning_rate": 7.330567081604426e-07, "loss": 0.9211, "step": 53 }, { "epoch": 0.002241838307412078, "grad_norm": 3.590824604034424, "learning_rate": 7.468879668049793e-07, "loss": 0.7054, "step": 54 }, { "epoch": 0.0022833538316234126, "grad_norm": 4.4935455322265625, "learning_rate": 7.607192254495159e-07, "loss": 0.9132, "step": 55 }, { "epoch": 0.0023248693558347473, "grad_norm": 5.045493125915527, "learning_rate": 7.745504840940527e-07, "loss": 0.6732, "step": 56 }, { "epoch": 0.002366384880046082, "grad_norm": 3.7436625957489014, "learning_rate": 7.883817427385892e-07, "loss": 0.6864, "step": 57 }, { "epoch": 0.002407900404257417, "grad_norm": 3.0505523681640625, "learning_rate": 8.02213001383126e-07, "loss": 0.5603, "step": 58 }, { "epoch": 0.002449415928468752, "grad_norm": 4.019108772277832, "learning_rate": 8.160442600276625e-07, "loss": 0.5943, "step": 59 }, { "epoch": 0.0024909314526800866, "grad_norm": 4.114040374755859, "learning_rate": 8.298755186721992e-07, "loss": 0.7025, "step": 60 }, { "epoch": 0.0025324469768914213, "grad_norm": 3.746091365814209, "learning_rate": 8.43706777316736e-07, "loss": 0.7848, "step": 61 }, { "epoch": 0.002573962501102756, "grad_norm": 3.9986462593078613, "learning_rate": 8.575380359612726e-07, "loss": 0.7592, "step": 62 }, { "epoch": 0.0026154780253140907, "grad_norm": 4.223106384277344, "learning_rate": 8.713692946058091e-07, "loss": 0.6808, "step": 63 }, { "epoch": 0.0026569935495254255, "grad_norm": 3.885176420211792, "learning_rate": 8.852005532503459e-07, "loss": 0.69, "step": 64 }, { "epoch": 0.0026985090737367606, "grad_norm": 4.953071594238281, "learning_rate": 8.990318118948826e-07, "loss": 0.658, "step": 65 }, { "epoch": 0.0027400245979480953, "grad_norm": 3.3597326278686523, "learning_rate": 9.128630705394191e-07, "loss": 0.6352, "step": 66 }, { "epoch": 0.00278154012215943, "grad_norm": 3.284468412399292, "learning_rate": 9.266943291839559e-07, "loss": 0.5275, "step": 67 }, { "epoch": 0.0028230556463707647, "grad_norm": 3.7373857498168945, "learning_rate": 9.405255878284925e-07, "loss": 0.6311, "step": 68 }, { "epoch": 0.0028645711705820994, "grad_norm": 3.879077434539795, "learning_rate": 9.54356846473029e-07, "loss": 0.8368, "step": 69 }, { "epoch": 0.002906086694793434, "grad_norm": 3.5331387519836426, "learning_rate": 9.68188105117566e-07, "loss": 0.5653, "step": 70 }, { "epoch": 0.0029476022190047693, "grad_norm": 3.9076988697052, "learning_rate": 9.820193637621024e-07, "loss": 0.6267, "step": 71 }, { "epoch": 0.002989117743216104, "grad_norm": 4.001111030578613, "learning_rate": 9.95850622406639e-07, "loss": 0.8554, "step": 72 }, { "epoch": 0.0030306332674274387, "grad_norm": 3.8818676471710205, "learning_rate": 1.0096818810511757e-06, "loss": 0.6549, "step": 73 }, { "epoch": 0.0030721487916387734, "grad_norm": 3.787411689758301, "learning_rate": 1.0235131396957124e-06, "loss": 0.657, "step": 74 }, { "epoch": 0.003113664315850108, "grad_norm": 4.85059928894043, "learning_rate": 1.037344398340249e-06, "loss": 0.6251, "step": 75 }, { "epoch": 0.003155179840061443, "grad_norm": 3.95936918258667, "learning_rate": 1.0511756569847857e-06, "loss": 0.7357, "step": 76 }, { "epoch": 0.0031966953642727776, "grad_norm": 3.2045938968658447, "learning_rate": 1.0650069156293224e-06, "loss": 0.6315, "step": 77 }, { "epoch": 0.0032382108884841127, "grad_norm": 3.2165396213531494, "learning_rate": 1.078838174273859e-06, "loss": 0.6403, "step": 78 }, { "epoch": 0.0032797264126954474, "grad_norm": 3.7616231441497803, "learning_rate": 1.0926694329183957e-06, "loss": 0.6552, "step": 79 }, { "epoch": 0.003321241936906782, "grad_norm": 3.250486373901367, "learning_rate": 1.1065006915629324e-06, "loss": 0.6663, "step": 80 }, { "epoch": 0.003362757461118117, "grad_norm": 3.449876070022583, "learning_rate": 1.120331950207469e-06, "loss": 0.7149, "step": 81 }, { "epoch": 0.0034042729853294516, "grad_norm": 3.343782424926758, "learning_rate": 1.1341632088520057e-06, "loss": 0.6388, "step": 82 }, { "epoch": 0.0034457885095407863, "grad_norm": 3.988534688949585, "learning_rate": 1.1479944674965422e-06, "loss": 0.7894, "step": 83 }, { "epoch": 0.003487304033752121, "grad_norm": 3.1469573974609375, "learning_rate": 1.161825726141079e-06, "loss": 0.5589, "step": 84 }, { "epoch": 0.003528819557963456, "grad_norm": 3.2567152976989746, "learning_rate": 1.1756569847856155e-06, "loss": 0.7658, "step": 85 }, { "epoch": 0.003570335082174791, "grad_norm": 3.2737083435058594, "learning_rate": 1.1894882434301522e-06, "loss": 0.6775, "step": 86 }, { "epoch": 0.0036118506063861256, "grad_norm": 3.9176297187805176, "learning_rate": 1.2033195020746888e-06, "loss": 0.5191, "step": 87 }, { "epoch": 0.0036533661305974603, "grad_norm": 3.8002545833587646, "learning_rate": 1.2171507607192255e-06, "loss": 0.7094, "step": 88 }, { "epoch": 0.003694881654808795, "grad_norm": 2.992441177368164, "learning_rate": 1.2309820193637624e-06, "loss": 0.615, "step": 89 }, { "epoch": 0.0037363971790201297, "grad_norm": 3.913115978240967, "learning_rate": 1.2448132780082988e-06, "loss": 0.5693, "step": 90 }, { "epoch": 0.0037779127032314644, "grad_norm": 3.374166488647461, "learning_rate": 1.2586445366528355e-06, "loss": 0.6125, "step": 91 }, { "epoch": 0.0038194282274427995, "grad_norm": 4.049959659576416, "learning_rate": 1.2724757952973722e-06, "loss": 0.5872, "step": 92 }, { "epoch": 0.0038609437516541343, "grad_norm": 3.214226007461548, "learning_rate": 1.2863070539419086e-06, "loss": 0.643, "step": 93 }, { "epoch": 0.003902459275865469, "grad_norm": 3.063330888748169, "learning_rate": 1.3001383125864455e-06, "loss": 0.6227, "step": 94 }, { "epoch": 0.003943974800076804, "grad_norm": 4.605921745300293, "learning_rate": 1.3139695712309822e-06, "loss": 0.6831, "step": 95 }, { "epoch": 0.003985490324288138, "grad_norm": 3.9362192153930664, "learning_rate": 1.3278008298755188e-06, "loss": 0.6137, "step": 96 }, { "epoch": 0.004027005848499473, "grad_norm": 3.6910037994384766, "learning_rate": 1.3416320885200553e-06, "loss": 0.7095, "step": 97 }, { "epoch": 0.004068521372710808, "grad_norm": 2.993161201477051, "learning_rate": 1.355463347164592e-06, "loss": 0.7739, "step": 98 }, { "epoch": 0.0041100368969221425, "grad_norm": 3.2794554233551025, "learning_rate": 1.3692946058091288e-06, "loss": 0.6773, "step": 99 }, { "epoch": 0.004151552421133477, "grad_norm": 4.085877895355225, "learning_rate": 1.3831258644536655e-06, "loss": 0.4511, "step": 100 }, { "epoch": 0.004193067945344812, "grad_norm": 3.2344589233398438, "learning_rate": 1.3969571230982022e-06, "loss": 0.6212, "step": 101 }, { "epoch": 0.0042345834695561475, "grad_norm": 3.2105753421783447, "learning_rate": 1.4107883817427386e-06, "loss": 0.6852, "step": 102 }, { "epoch": 0.004276098993767482, "grad_norm": 3.6191928386688232, "learning_rate": 1.4246196403872753e-06, "loss": 0.4912, "step": 103 }, { "epoch": 0.004317614517978817, "grad_norm": 3.040008544921875, "learning_rate": 1.438450899031812e-06, "loss": 0.6236, "step": 104 }, { "epoch": 0.004359130042190152, "grad_norm": 3.685814142227173, "learning_rate": 1.4522821576763488e-06, "loss": 0.7447, "step": 105 }, { "epoch": 0.004400645566401486, "grad_norm": 3.4420769214630127, "learning_rate": 1.4661134163208853e-06, "loss": 0.7081, "step": 106 }, { "epoch": 0.004442161090612821, "grad_norm": 3.290405511856079, "learning_rate": 1.479944674965422e-06, "loss": 0.5822, "step": 107 }, { "epoch": 0.004483676614824156, "grad_norm": 3.576735496520996, "learning_rate": 1.4937759336099586e-06, "loss": 0.5789, "step": 108 }, { "epoch": 0.0045251921390354905, "grad_norm": 3.611250638961792, "learning_rate": 1.5076071922544953e-06, "loss": 0.5537, "step": 109 }, { "epoch": 0.004566707663246825, "grad_norm": 3.9324469566345215, "learning_rate": 1.5214384508990318e-06, "loss": 0.6766, "step": 110 }, { "epoch": 0.00460822318745816, "grad_norm": 3.336915969848633, "learning_rate": 1.5352697095435686e-06, "loss": 0.5702, "step": 111 }, { "epoch": 0.004649738711669495, "grad_norm": 3.213373899459839, "learning_rate": 1.5491009681881053e-06, "loss": 0.7122, "step": 112 }, { "epoch": 0.004691254235880829, "grad_norm": 3.1955461502075195, "learning_rate": 1.562932226832642e-06, "loss": 0.5333, "step": 113 }, { "epoch": 0.004732769760092164, "grad_norm": 3.673494815826416, "learning_rate": 1.5767634854771784e-06, "loss": 0.4927, "step": 114 }, { "epoch": 0.0047742852843035, "grad_norm": 2.871079921722412, "learning_rate": 1.590594744121715e-06, "loss": 0.5686, "step": 115 }, { "epoch": 0.004815800808514834, "grad_norm": 3.034919261932373, "learning_rate": 1.604426002766252e-06, "loss": 0.655, "step": 116 }, { "epoch": 0.004857316332726169, "grad_norm": 3.4824373722076416, "learning_rate": 1.6182572614107886e-06, "loss": 0.7424, "step": 117 }, { "epoch": 0.004898831856937504, "grad_norm": 3.419607162475586, "learning_rate": 1.632088520055325e-06, "loss": 0.6375, "step": 118 }, { "epoch": 0.0049403473811488385, "grad_norm": 2.8002994060516357, "learning_rate": 1.6459197786998618e-06, "loss": 0.5502, "step": 119 }, { "epoch": 0.004981862905360173, "grad_norm": 3.254840850830078, "learning_rate": 1.6597510373443984e-06, "loss": 0.6457, "step": 120 }, { "epoch": 0.005023378429571508, "grad_norm": 3.4771344661712646, "learning_rate": 1.673582295988935e-06, "loss": 0.6395, "step": 121 }, { "epoch": 0.005064893953782843, "grad_norm": 3.2211029529571533, "learning_rate": 1.687413554633472e-06, "loss": 0.485, "step": 122 }, { "epoch": 0.005106409477994177, "grad_norm": 3.3522980213165283, "learning_rate": 1.7012448132780084e-06, "loss": 0.5612, "step": 123 }, { "epoch": 0.005147925002205512, "grad_norm": 2.9894979000091553, "learning_rate": 1.715076071922545e-06, "loss": 0.5711, "step": 124 }, { "epoch": 0.005189440526416847, "grad_norm": 3.6795578002929688, "learning_rate": 1.7289073305670818e-06, "loss": 0.644, "step": 125 }, { "epoch": 0.0052309560506281815, "grad_norm": 3.3459973335266113, "learning_rate": 1.7427385892116182e-06, "loss": 0.5747, "step": 126 }, { "epoch": 0.005272471574839516, "grad_norm": 2.832531213760376, "learning_rate": 1.756569847856155e-06, "loss": 0.5888, "step": 127 }, { "epoch": 0.005313987099050851, "grad_norm": 3.935823917388916, "learning_rate": 1.7704011065006918e-06, "loss": 0.5576, "step": 128 }, { "epoch": 0.0053555026232621865, "grad_norm": 3.166489362716675, "learning_rate": 1.7842323651452284e-06, "loss": 0.6972, "step": 129 }, { "epoch": 0.005397018147473521, "grad_norm": 2.8468682765960693, "learning_rate": 1.7980636237897651e-06, "loss": 0.5734, "step": 130 }, { "epoch": 0.005438533671684856, "grad_norm": 2.906363010406494, "learning_rate": 1.8118948824343016e-06, "loss": 0.4186, "step": 131 }, { "epoch": 0.005480049195896191, "grad_norm": 3.332674980163574, "learning_rate": 1.8257261410788382e-06, "loss": 0.558, "step": 132 }, { "epoch": 0.005521564720107525, "grad_norm": 2.8327555656433105, "learning_rate": 1.8395573997233751e-06, "loss": 0.6198, "step": 133 }, { "epoch": 0.00556308024431886, "grad_norm": 3.2951042652130127, "learning_rate": 1.8533886583679118e-06, "loss": 0.6438, "step": 134 }, { "epoch": 0.005604595768530195, "grad_norm": 3.1163461208343506, "learning_rate": 1.8672199170124482e-06, "loss": 0.5889, "step": 135 }, { "epoch": 0.0056461112927415295, "grad_norm": 2.8606560230255127, "learning_rate": 1.881051175656985e-06, "loss": 0.4473, "step": 136 }, { "epoch": 0.005687626816952864, "grad_norm": 3.2484145164489746, "learning_rate": 1.8948824343015216e-06, "loss": 0.6646, "step": 137 }, { "epoch": 0.005729142341164199, "grad_norm": 4.125248432159424, "learning_rate": 1.908713692946058e-06, "loss": 0.6454, "step": 138 }, { "epoch": 0.005770657865375534, "grad_norm": 3.4270987510681152, "learning_rate": 1.922544951590595e-06, "loss": 0.6188, "step": 139 }, { "epoch": 0.005812173389586868, "grad_norm": 2.9100327491760254, "learning_rate": 1.936376210235132e-06, "loss": 0.5641, "step": 140 }, { "epoch": 0.005853688913798203, "grad_norm": 3.1299550533294678, "learning_rate": 1.9502074688796682e-06, "loss": 0.4828, "step": 141 }, { "epoch": 0.005895204438009539, "grad_norm": 3.3593904972076416, "learning_rate": 1.9640387275242047e-06, "loss": 0.677, "step": 142 }, { "epoch": 0.005936719962220873, "grad_norm": 3.3583483695983887, "learning_rate": 1.9778699861687416e-06, "loss": 0.6075, "step": 143 }, { "epoch": 0.005978235486432208, "grad_norm": 3.572324275970459, "learning_rate": 1.991701244813278e-06, "loss": 0.6012, "step": 144 }, { "epoch": 0.006019751010643543, "grad_norm": 4.075873851776123, "learning_rate": 2.005532503457815e-06, "loss": 0.645, "step": 145 }, { "epoch": 0.0060612665348548775, "grad_norm": 3.446446180343628, "learning_rate": 2.0193637621023514e-06, "loss": 0.5539, "step": 146 }, { "epoch": 0.006102782059066212, "grad_norm": 3.2743327617645264, "learning_rate": 2.0331950207468883e-06, "loss": 0.5658, "step": 147 }, { "epoch": 0.006144297583277547, "grad_norm": 3.1205976009368896, "learning_rate": 2.0470262793914247e-06, "loss": 0.6558, "step": 148 }, { "epoch": 0.006185813107488882, "grad_norm": 3.801539659500122, "learning_rate": 2.060857538035961e-06, "loss": 0.6553, "step": 149 }, { "epoch": 0.006227328631700216, "grad_norm": 4.184968948364258, "learning_rate": 2.074688796680498e-06, "loss": 0.6259, "step": 150 }, { "epoch": 0.006268844155911551, "grad_norm": 2.8705456256866455, "learning_rate": 2.088520055325035e-06, "loss": 0.5508, "step": 151 }, { "epoch": 0.006310359680122886, "grad_norm": 3.4174914360046387, "learning_rate": 2.1023513139695714e-06, "loss": 0.7156, "step": 152 }, { "epoch": 0.00635187520433422, "grad_norm": 3.0978262424468994, "learning_rate": 2.116182572614108e-06, "loss": 0.6996, "step": 153 }, { "epoch": 0.006393390728545555, "grad_norm": 3.2389233112335205, "learning_rate": 2.1300138312586447e-06, "loss": 0.6348, "step": 154 }, { "epoch": 0.00643490625275689, "grad_norm": 2.9004411697387695, "learning_rate": 2.143845089903181e-06, "loss": 0.5634, "step": 155 }, { "epoch": 0.0064764217769682254, "grad_norm": 2.993525981903076, "learning_rate": 2.157676348547718e-06, "loss": 0.6061, "step": 156 }, { "epoch": 0.00651793730117956, "grad_norm": 2.8440351486206055, "learning_rate": 2.171507607192255e-06, "loss": 0.67, "step": 157 }, { "epoch": 0.006559452825390895, "grad_norm": 2.8681094646453857, "learning_rate": 2.1853388658367914e-06, "loss": 0.5085, "step": 158 }, { "epoch": 0.00660096834960223, "grad_norm": 2.7327322959899902, "learning_rate": 2.199170124481328e-06, "loss": 0.6921, "step": 159 }, { "epoch": 0.006642483873813564, "grad_norm": 3.6202938556671143, "learning_rate": 2.2130013831258647e-06, "loss": 0.7741, "step": 160 }, { "epoch": 0.006683999398024899, "grad_norm": 4.092203617095947, "learning_rate": 2.2268326417704016e-06, "loss": 0.6844, "step": 161 }, { "epoch": 0.006725514922236234, "grad_norm": 4.065115451812744, "learning_rate": 2.240663900414938e-06, "loss": 0.7369, "step": 162 }, { "epoch": 0.006767030446447568, "grad_norm": 3.823734760284424, "learning_rate": 2.2544951590594745e-06, "loss": 0.7835, "step": 163 }, { "epoch": 0.006808545970658903, "grad_norm": 3.3332719802856445, "learning_rate": 2.2683264177040114e-06, "loss": 0.6496, "step": 164 }, { "epoch": 0.006850061494870238, "grad_norm": 2.845404863357544, "learning_rate": 2.282157676348548e-06, "loss": 0.6392, "step": 165 }, { "epoch": 0.0068915770190815726, "grad_norm": 3.1901509761810303, "learning_rate": 2.2959889349930843e-06, "loss": 0.5252, "step": 166 }, { "epoch": 0.006933092543292907, "grad_norm": 2.839006185531616, "learning_rate": 2.309820193637621e-06, "loss": 0.5606, "step": 167 }, { "epoch": 0.006974608067504242, "grad_norm": 3.3386640548706055, "learning_rate": 2.323651452282158e-06, "loss": 0.6856, "step": 168 }, { "epoch": 0.0070161235917155776, "grad_norm": 3.4259679317474365, "learning_rate": 2.3374827109266945e-06, "loss": 0.6301, "step": 169 }, { "epoch": 0.007057639115926912, "grad_norm": 2.8164772987365723, "learning_rate": 2.351313969571231e-06, "loss": 0.5074, "step": 170 }, { "epoch": 0.007099154640138247, "grad_norm": 3.1209237575531006, "learning_rate": 2.365145228215768e-06, "loss": 0.6008, "step": 171 }, { "epoch": 0.007140670164349582, "grad_norm": 3.305873394012451, "learning_rate": 2.3789764868603043e-06, "loss": 0.5616, "step": 172 }, { "epoch": 0.007182185688560916, "grad_norm": 3.0092244148254395, "learning_rate": 2.392807745504841e-06, "loss": 0.5951, "step": 173 }, { "epoch": 0.007223701212772251, "grad_norm": 5.045073986053467, "learning_rate": 2.4066390041493776e-06, "loss": 0.5017, "step": 174 }, { "epoch": 0.007265216736983586, "grad_norm": 2.7848739624023438, "learning_rate": 2.4204702627939145e-06, "loss": 0.4709, "step": 175 }, { "epoch": 0.0073067322611949205, "grad_norm": 2.6316373348236084, "learning_rate": 2.434301521438451e-06, "loss": 0.4527, "step": 176 }, { "epoch": 0.007348247785406255, "grad_norm": 3.2943239212036133, "learning_rate": 2.448132780082988e-06, "loss": 0.6568, "step": 177 }, { "epoch": 0.00738976330961759, "grad_norm": 3.330371856689453, "learning_rate": 2.4619640387275247e-06, "loss": 0.6161, "step": 178 }, { "epoch": 0.007431278833828925, "grad_norm": 2.7765607833862305, "learning_rate": 2.475795297372061e-06, "loss": 0.6371, "step": 179 }, { "epoch": 0.007472794358040259, "grad_norm": 2.5282583236694336, "learning_rate": 2.4896265560165977e-06, "loss": 0.5627, "step": 180 }, { "epoch": 0.007514309882251594, "grad_norm": 2.608863115310669, "learning_rate": 2.5034578146611345e-06, "loss": 0.6382, "step": 181 }, { "epoch": 0.007555825406462929, "grad_norm": 2.6973230838775635, "learning_rate": 2.517289073305671e-06, "loss": 0.5561, "step": 182 }, { "epoch": 0.007597340930674264, "grad_norm": 3.4375925064086914, "learning_rate": 2.5311203319502074e-06, "loss": 0.5041, "step": 183 }, { "epoch": 0.007638856454885599, "grad_norm": 3.638913631439209, "learning_rate": 2.5449515905947443e-06, "loss": 0.5431, "step": 184 }, { "epoch": 0.007680371979096934, "grad_norm": 3.1181092262268066, "learning_rate": 2.5587828492392808e-06, "loss": 0.6374, "step": 185 }, { "epoch": 0.0077218875033082685, "grad_norm": 3.1798925399780273, "learning_rate": 2.5726141078838172e-06, "loss": 0.5738, "step": 186 }, { "epoch": 0.007763403027519603, "grad_norm": 3.6046125888824463, "learning_rate": 2.5864453665283545e-06, "loss": 0.5064, "step": 187 }, { "epoch": 0.007804918551730938, "grad_norm": 3.0860960483551025, "learning_rate": 2.600276625172891e-06, "loss": 0.6542, "step": 188 }, { "epoch": 0.007846434075942273, "grad_norm": 2.601212501525879, "learning_rate": 2.614107883817428e-06, "loss": 0.5393, "step": 189 }, { "epoch": 0.007887949600153607, "grad_norm": 2.995072841644287, "learning_rate": 2.6279391424619643e-06, "loss": 0.7267, "step": 190 }, { "epoch": 0.007929465124364942, "grad_norm": 3.2160251140594482, "learning_rate": 2.6417704011065008e-06, "loss": 0.6841, "step": 191 }, { "epoch": 0.007970980648576277, "grad_norm": 3.022284746170044, "learning_rate": 2.6556016597510377e-06, "loss": 0.6924, "step": 192 }, { "epoch": 0.008012496172787612, "grad_norm": 3.5574488639831543, "learning_rate": 2.669432918395574e-06, "loss": 0.5025, "step": 193 }, { "epoch": 0.008054011696998946, "grad_norm": 2.9458134174346924, "learning_rate": 2.6832641770401106e-06, "loss": 0.5409, "step": 194 }, { "epoch": 0.008095527221210281, "grad_norm": 4.338388442993164, "learning_rate": 2.6970954356846475e-06, "loss": 0.5429, "step": 195 }, { "epoch": 0.008137042745421616, "grad_norm": 3.2586705684661865, "learning_rate": 2.710926694329184e-06, "loss": 0.5567, "step": 196 }, { "epoch": 0.00817855826963295, "grad_norm": 2.8139169216156006, "learning_rate": 2.724757952973721e-06, "loss": 0.5279, "step": 197 }, { "epoch": 0.008220073793844285, "grad_norm": 2.977264165878296, "learning_rate": 2.7385892116182577e-06, "loss": 0.6239, "step": 198 }, { "epoch": 0.00826158931805562, "grad_norm": 3.2197110652923584, "learning_rate": 2.7524204702627945e-06, "loss": 0.5113, "step": 199 }, { "epoch": 0.008303104842266954, "grad_norm": 2.4322924613952637, "learning_rate": 2.766251728907331e-06, "loss": 0.5298, "step": 200 }, { "epoch": 0.00834462036647829, "grad_norm": 3.151395082473755, "learning_rate": 2.7800829875518675e-06, "loss": 0.6537, "step": 201 }, { "epoch": 0.008386135890689624, "grad_norm": 3.046276330947876, "learning_rate": 2.7939142461964043e-06, "loss": 0.692, "step": 202 }, { "epoch": 0.00842765141490096, "grad_norm": 3.1704955101013184, "learning_rate": 2.807745504840941e-06, "loss": 0.6474, "step": 203 }, { "epoch": 0.008469166939112295, "grad_norm": 2.5712549686431885, "learning_rate": 2.8215767634854773e-06, "loss": 0.5656, "step": 204 }, { "epoch": 0.00851068246332363, "grad_norm": 3.458798408508301, "learning_rate": 2.835408022130014e-06, "loss": 0.6208, "step": 205 }, { "epoch": 0.008552197987534964, "grad_norm": 2.8905487060546875, "learning_rate": 2.8492392807745506e-06, "loss": 0.7057, "step": 206 }, { "epoch": 0.0085937135117463, "grad_norm": 2.751073122024536, "learning_rate": 2.863070539419087e-06, "loss": 0.654, "step": 207 }, { "epoch": 0.008635229035957634, "grad_norm": 4.320907115936279, "learning_rate": 2.876901798063624e-06, "loss": 0.6027, "step": 208 }, { "epoch": 0.008676744560168969, "grad_norm": 3.103193759918213, "learning_rate": 2.890733056708161e-06, "loss": 0.5129, "step": 209 }, { "epoch": 0.008718260084380303, "grad_norm": 3.0708186626434326, "learning_rate": 2.9045643153526977e-06, "loss": 0.5186, "step": 210 }, { "epoch": 0.008759775608591638, "grad_norm": 3.1115024089813232, "learning_rate": 2.918395573997234e-06, "loss": 0.5155, "step": 211 }, { "epoch": 0.008801291132802973, "grad_norm": 2.9362735748291016, "learning_rate": 2.9322268326417706e-06, "loss": 0.5808, "step": 212 }, { "epoch": 0.008842806657014307, "grad_norm": 3.4261646270751953, "learning_rate": 2.9460580912863075e-06, "loss": 0.5399, "step": 213 }, { "epoch": 0.008884322181225642, "grad_norm": 3.0638489723205566, "learning_rate": 2.959889349930844e-06, "loss": 0.7653, "step": 214 }, { "epoch": 0.008925837705436977, "grad_norm": 4.2414774894714355, "learning_rate": 2.9737206085753804e-06, "loss": 0.4766, "step": 215 }, { "epoch": 0.008967353229648312, "grad_norm": 5.447144508361816, "learning_rate": 2.9875518672199173e-06, "loss": 0.7083, "step": 216 }, { "epoch": 0.009008868753859646, "grad_norm": 2.9829330444335938, "learning_rate": 3.0013831258644537e-06, "loss": 0.6958, "step": 217 }, { "epoch": 0.009050384278070981, "grad_norm": 2.484302282333374, "learning_rate": 3.0152143845089906e-06, "loss": 0.5993, "step": 218 }, { "epoch": 0.009091899802282316, "grad_norm": 3.331953763961792, "learning_rate": 3.029045643153527e-06, "loss": 0.5505, "step": 219 }, { "epoch": 0.00913341532649365, "grad_norm": 3.2379279136657715, "learning_rate": 3.0428769017980635e-06, "loss": 0.667, "step": 220 }, { "epoch": 0.009174930850704985, "grad_norm": 3.142256259918213, "learning_rate": 3.056708160442601e-06, "loss": 0.5954, "step": 221 }, { "epoch": 0.00921644637491632, "grad_norm": 3.092782974243164, "learning_rate": 3.0705394190871373e-06, "loss": 0.4995, "step": 222 }, { "epoch": 0.009257961899127655, "grad_norm": 4.033438682556152, "learning_rate": 3.084370677731674e-06, "loss": 0.4754, "step": 223 }, { "epoch": 0.00929947742333899, "grad_norm": 3.1011598110198975, "learning_rate": 3.0982019363762106e-06, "loss": 0.5463, "step": 224 }, { "epoch": 0.009340992947550324, "grad_norm": 2.660299301147461, "learning_rate": 3.112033195020747e-06, "loss": 0.4942, "step": 225 }, { "epoch": 0.009382508471761659, "grad_norm": 3.015780210494995, "learning_rate": 3.125864453665284e-06, "loss": 0.5958, "step": 226 }, { "epoch": 0.009424023995972993, "grad_norm": 2.6997580528259277, "learning_rate": 3.1396957123098204e-06, "loss": 0.5984, "step": 227 }, { "epoch": 0.009465539520184328, "grad_norm": 3.5918312072753906, "learning_rate": 3.153526970954357e-06, "loss": 0.7439, "step": 228 }, { "epoch": 0.009507055044395663, "grad_norm": 3.141852855682373, "learning_rate": 3.1673582295988937e-06, "loss": 0.5779, "step": 229 }, { "epoch": 0.009548570568607, "grad_norm": 2.6438605785369873, "learning_rate": 3.18118948824343e-06, "loss": 0.5691, "step": 230 }, { "epoch": 0.009590086092818334, "grad_norm": 3.078070640563965, "learning_rate": 3.1950207468879666e-06, "loss": 0.6932, "step": 231 }, { "epoch": 0.009631601617029669, "grad_norm": 2.943774938583374, "learning_rate": 3.208852005532504e-06, "loss": 0.6174, "step": 232 }, { "epoch": 0.009673117141241003, "grad_norm": 2.6616737842559814, "learning_rate": 3.2226832641770404e-06, "loss": 0.497, "step": 233 }, { "epoch": 0.009714632665452338, "grad_norm": 2.9314284324645996, "learning_rate": 3.2365145228215773e-06, "loss": 0.5513, "step": 234 }, { "epoch": 0.009756148189663673, "grad_norm": 4.116387367248535, "learning_rate": 3.2503457814661137e-06, "loss": 0.5395, "step": 235 }, { "epoch": 0.009797663713875008, "grad_norm": 3.0858500003814697, "learning_rate": 3.26417704011065e-06, "loss": 0.387, "step": 236 }, { "epoch": 0.009839179238086342, "grad_norm": 2.5961990356445312, "learning_rate": 3.278008298755187e-06, "loss": 0.3329, "step": 237 }, { "epoch": 0.009880694762297677, "grad_norm": 3.4722955226898193, "learning_rate": 3.2918395573997235e-06, "loss": 0.4933, "step": 238 }, { "epoch": 0.009922210286509012, "grad_norm": 2.8867790699005127, "learning_rate": 3.3056708160442604e-06, "loss": 0.4601, "step": 239 }, { "epoch": 0.009963725810720346, "grad_norm": 2.8061060905456543, "learning_rate": 3.319502074688797e-06, "loss": 0.5416, "step": 240 }, { "epoch": 0.010005241334931681, "grad_norm": 3.2625184059143066, "learning_rate": 3.3333333333333333e-06, "loss": 0.493, "step": 241 }, { "epoch": 0.010046756859143016, "grad_norm": 2.995713472366333, "learning_rate": 3.34716459197787e-06, "loss": 0.6108, "step": 242 }, { "epoch": 0.01008827238335435, "grad_norm": 3.5481393337249756, "learning_rate": 3.360995850622407e-06, "loss": 0.5565, "step": 243 }, { "epoch": 0.010129787907565685, "grad_norm": 2.690784215927124, "learning_rate": 3.374827109266944e-06, "loss": 0.5283, "step": 244 }, { "epoch": 0.01017130343177702, "grad_norm": 2.806718349456787, "learning_rate": 3.3886583679114804e-06, "loss": 0.5212, "step": 245 }, { "epoch": 0.010212818955988355, "grad_norm": 2.717437267303467, "learning_rate": 3.402489626556017e-06, "loss": 0.4189, "step": 246 }, { "epoch": 0.01025433448019969, "grad_norm": 3.0017120838165283, "learning_rate": 3.4163208852005538e-06, "loss": 0.6314, "step": 247 }, { "epoch": 0.010295850004411024, "grad_norm": 3.80369234085083, "learning_rate": 3.43015214384509e-06, "loss": 0.6043, "step": 248 }, { "epoch": 0.010337365528622359, "grad_norm": 3.245269298553467, "learning_rate": 3.4439834024896267e-06, "loss": 0.4516, "step": 249 }, { "epoch": 0.010378881052833694, "grad_norm": 3.231433868408203, "learning_rate": 3.4578146611341635e-06, "loss": 0.6043, "step": 250 }, { "epoch": 0.010420396577045028, "grad_norm": 3.502758264541626, "learning_rate": 3.4716459197787e-06, "loss": 0.5461, "step": 251 }, { "epoch": 0.010461912101256363, "grad_norm": 3.693596363067627, "learning_rate": 3.4854771784232365e-06, "loss": 0.6328, "step": 252 }, { "epoch": 0.010503427625467698, "grad_norm": 2.922269582748413, "learning_rate": 3.4993084370677733e-06, "loss": 0.5425, "step": 253 }, { "epoch": 0.010544943149679032, "grad_norm": 3.3168439865112305, "learning_rate": 3.51313969571231e-06, "loss": 0.6486, "step": 254 }, { "epoch": 0.010586458673890367, "grad_norm": 3.024984836578369, "learning_rate": 3.526970954356847e-06, "loss": 0.7509, "step": 255 }, { "epoch": 0.010627974198101702, "grad_norm": 2.73795747756958, "learning_rate": 3.5408022130013836e-06, "loss": 0.5081, "step": 256 }, { "epoch": 0.010669489722313038, "grad_norm": 3.2240664958953857, "learning_rate": 3.55463347164592e-06, "loss": 0.5776, "step": 257 }, { "epoch": 0.010711005246524373, "grad_norm": 3.9837093353271484, "learning_rate": 3.568464730290457e-06, "loss": 0.5546, "step": 258 }, { "epoch": 0.010752520770735708, "grad_norm": 3.340139389038086, "learning_rate": 3.5822959889349933e-06, "loss": 0.559, "step": 259 }, { "epoch": 0.010794036294947042, "grad_norm": 3.901780843734741, "learning_rate": 3.5961272475795302e-06, "loss": 0.5896, "step": 260 }, { "epoch": 0.010835551819158377, "grad_norm": 3.214529514312744, "learning_rate": 3.6099585062240667e-06, "loss": 0.5621, "step": 261 }, { "epoch": 0.010877067343369712, "grad_norm": 3.059025526046753, "learning_rate": 3.623789764868603e-06, "loss": 0.5645, "step": 262 }, { "epoch": 0.010918582867581047, "grad_norm": 3.6346018314361572, "learning_rate": 3.63762102351314e-06, "loss": 0.3888, "step": 263 }, { "epoch": 0.010960098391792381, "grad_norm": 3.2187442779541016, "learning_rate": 3.6514522821576765e-06, "loss": 0.5738, "step": 264 }, { "epoch": 0.011001613916003716, "grad_norm": 3.9278745651245117, "learning_rate": 3.665283540802213e-06, "loss": 0.4509, "step": 265 }, { "epoch": 0.01104312944021505, "grad_norm": 3.1043529510498047, "learning_rate": 3.6791147994467502e-06, "loss": 0.6108, "step": 266 }, { "epoch": 0.011084644964426385, "grad_norm": 2.8592169284820557, "learning_rate": 3.6929460580912867e-06, "loss": 0.5419, "step": 267 }, { "epoch": 0.01112616048863772, "grad_norm": 3.0895135402679443, "learning_rate": 3.7067773167358236e-06, "loss": 0.5187, "step": 268 }, { "epoch": 0.011167676012849055, "grad_norm": 3.9174959659576416, "learning_rate": 3.72060857538036e-06, "loss": 0.5159, "step": 269 }, { "epoch": 0.01120919153706039, "grad_norm": 2.891540765762329, "learning_rate": 3.7344398340248965e-06, "loss": 0.6018, "step": 270 }, { "epoch": 0.011250707061271724, "grad_norm": 4.038370609283447, "learning_rate": 3.7482710926694334e-06, "loss": 0.5437, "step": 271 }, { "epoch": 0.011292222585483059, "grad_norm": 3.4221436977386475, "learning_rate": 3.76210235131397e-06, "loss": 0.5731, "step": 272 }, { "epoch": 0.011333738109694394, "grad_norm": 2.465074062347412, "learning_rate": 3.7759336099585063e-06, "loss": 0.5877, "step": 273 }, { "epoch": 0.011375253633905728, "grad_norm": 2.5629987716674805, "learning_rate": 3.789764868603043e-06, "loss": 0.5644, "step": 274 }, { "epoch": 0.011416769158117063, "grad_norm": 3.742922306060791, "learning_rate": 3.8035961272475796e-06, "loss": 0.5679, "step": 275 }, { "epoch": 0.011458284682328398, "grad_norm": 3.1597185134887695, "learning_rate": 3.817427385892116e-06, "loss": 0.5275, "step": 276 }, { "epoch": 0.011499800206539732, "grad_norm": 3.0944578647613525, "learning_rate": 3.831258644536653e-06, "loss": 0.6233, "step": 277 }, { "epoch": 0.011541315730751067, "grad_norm": 2.8710243701934814, "learning_rate": 3.84508990318119e-06, "loss": 0.4624, "step": 278 }, { "epoch": 0.011582831254962402, "grad_norm": 3.0089385509490967, "learning_rate": 3.858921161825726e-06, "loss": 0.372, "step": 279 }, { "epoch": 0.011624346779173737, "grad_norm": 3.4758031368255615, "learning_rate": 3.872752420470264e-06, "loss": 0.5543, "step": 280 }, { "epoch": 0.011665862303385071, "grad_norm": 3.2562546730041504, "learning_rate": 3.8865836791148e-06, "loss": 0.5408, "step": 281 }, { "epoch": 0.011707377827596406, "grad_norm": 2.727703809738159, "learning_rate": 3.9004149377593365e-06, "loss": 0.4195, "step": 282 }, { "epoch": 0.01174889335180774, "grad_norm": 2.62381649017334, "learning_rate": 3.914246196403873e-06, "loss": 0.4446, "step": 283 }, { "epoch": 0.011790408876019077, "grad_norm": 2.722885847091675, "learning_rate": 3.928077455048409e-06, "loss": 0.544, "step": 284 }, { "epoch": 0.011831924400230412, "grad_norm": 3.3394622802734375, "learning_rate": 3.941908713692946e-06, "loss": 0.4608, "step": 285 }, { "epoch": 0.011873439924441747, "grad_norm": 2.9234721660614014, "learning_rate": 3.955739972337483e-06, "loss": 0.5377, "step": 286 }, { "epoch": 0.011914955448653081, "grad_norm": 3.338905096054077, "learning_rate": 3.96957123098202e-06, "loss": 0.5704, "step": 287 }, { "epoch": 0.011956470972864416, "grad_norm": 3.0853307247161865, "learning_rate": 3.983402489626556e-06, "loss": 0.6097, "step": 288 }, { "epoch": 0.01199798649707575, "grad_norm": 2.7639729976654053, "learning_rate": 3.997233748271093e-06, "loss": 0.4987, "step": 289 }, { "epoch": 0.012039502021287085, "grad_norm": 3.364974021911621, "learning_rate": 4.01106500691563e-06, "loss": 0.5404, "step": 290 }, { "epoch": 0.01208101754549842, "grad_norm": 3.217263698577881, "learning_rate": 4.024896265560166e-06, "loss": 0.5993, "step": 291 }, { "epoch": 0.012122533069709755, "grad_norm": 3.0117413997650146, "learning_rate": 4.038727524204703e-06, "loss": 0.5403, "step": 292 }, { "epoch": 0.01216404859392109, "grad_norm": 3.188035011291504, "learning_rate": 4.05255878284924e-06, "loss": 0.5644, "step": 293 }, { "epoch": 0.012205564118132424, "grad_norm": 3.4293830394744873, "learning_rate": 4.0663900414937765e-06, "loss": 0.5535, "step": 294 }, { "epoch": 0.012247079642343759, "grad_norm": 2.4705851078033447, "learning_rate": 4.080221300138313e-06, "loss": 0.4566, "step": 295 }, { "epoch": 0.012288595166555094, "grad_norm": 2.7189266681671143, "learning_rate": 4.094052558782849e-06, "loss": 0.4692, "step": 296 }, { "epoch": 0.012330110690766428, "grad_norm": 2.9848430156707764, "learning_rate": 4.107883817427386e-06, "loss": 0.5863, "step": 297 }, { "epoch": 0.012371626214977763, "grad_norm": 4.063648700714111, "learning_rate": 4.121715076071922e-06, "loss": 0.5341, "step": 298 }, { "epoch": 0.012413141739189098, "grad_norm": 3.002403974533081, "learning_rate": 4.13554633471646e-06, "loss": 0.5911, "step": 299 }, { "epoch": 0.012454657263400433, "grad_norm": 3.642374277114868, "learning_rate": 4.149377593360996e-06, "loss": 0.4871, "step": 300 }, { "epoch": 0.012496172787611767, "grad_norm": 3.8945205211639404, "learning_rate": 4.163208852005533e-06, "loss": 0.4914, "step": 301 }, { "epoch": 0.012537688311823102, "grad_norm": 3.385343074798584, "learning_rate": 4.17704011065007e-06, "loss": 0.5224, "step": 302 }, { "epoch": 0.012579203836034437, "grad_norm": 2.8692452907562256, "learning_rate": 4.190871369294606e-06, "loss": 0.578, "step": 303 }, { "epoch": 0.012620719360245771, "grad_norm": 3.0042824745178223, "learning_rate": 4.204702627939143e-06, "loss": 0.4315, "step": 304 }, { "epoch": 0.012662234884457106, "grad_norm": 3.6564769744873047, "learning_rate": 4.218533886583679e-06, "loss": 0.6005, "step": 305 }, { "epoch": 0.01270375040866844, "grad_norm": 3.071967124938965, "learning_rate": 4.232365145228216e-06, "loss": 0.4987, "step": 306 }, { "epoch": 0.012745265932879776, "grad_norm": 2.6459243297576904, "learning_rate": 4.246196403872753e-06, "loss": 0.4483, "step": 307 }, { "epoch": 0.01278678145709111, "grad_norm": 3.3904824256896973, "learning_rate": 4.2600276625172894e-06, "loss": 0.5652, "step": 308 }, { "epoch": 0.012828296981302445, "grad_norm": 3.0711679458618164, "learning_rate": 4.273858921161826e-06, "loss": 0.49, "step": 309 }, { "epoch": 0.01286981250551378, "grad_norm": 3.088371992111206, "learning_rate": 4.287690179806362e-06, "loss": 0.6703, "step": 310 }, { "epoch": 0.012911328029725116, "grad_norm": 3.123436689376831, "learning_rate": 4.3015214384509e-06, "loss": 0.5068, "step": 311 }, { "epoch": 0.012952843553936451, "grad_norm": 2.676908254623413, "learning_rate": 4.315352697095436e-06, "loss": 0.5294, "step": 312 }, { "epoch": 0.012994359078147786, "grad_norm": 3.7913742065429688, "learning_rate": 4.3291839557399726e-06, "loss": 0.4763, "step": 313 }, { "epoch": 0.01303587460235912, "grad_norm": 2.958742618560791, "learning_rate": 4.34301521438451e-06, "loss": 0.4088, "step": 314 }, { "epoch": 0.013077390126570455, "grad_norm": 2.777979850769043, "learning_rate": 4.356846473029046e-06, "loss": 0.4548, "step": 315 }, { "epoch": 0.01311890565078179, "grad_norm": 2.776768684387207, "learning_rate": 4.370677731673583e-06, "loss": 0.5901, "step": 316 }, { "epoch": 0.013160421174993124, "grad_norm": 2.767091989517212, "learning_rate": 4.384508990318119e-06, "loss": 0.6661, "step": 317 }, { "epoch": 0.01320193669920446, "grad_norm": 2.765432357788086, "learning_rate": 4.398340248962656e-06, "loss": 0.3861, "step": 318 }, { "epoch": 0.013243452223415794, "grad_norm": 3.3584866523742676, "learning_rate": 4.412171507607192e-06, "loss": 0.4627, "step": 319 }, { "epoch": 0.013284967747627129, "grad_norm": 2.9380240440368652, "learning_rate": 4.4260027662517294e-06, "loss": 0.589, "step": 320 }, { "epoch": 0.013326483271838463, "grad_norm": 2.8727924823760986, "learning_rate": 4.439834024896266e-06, "loss": 0.512, "step": 321 }, { "epoch": 0.013367998796049798, "grad_norm": 2.5671870708465576, "learning_rate": 4.453665283540803e-06, "loss": 0.5891, "step": 322 }, { "epoch": 0.013409514320261133, "grad_norm": 2.9528250694274902, "learning_rate": 4.46749654218534e-06, "loss": 0.4941, "step": 323 }, { "epoch": 0.013451029844472467, "grad_norm": 2.6887571811676025, "learning_rate": 4.481327800829876e-06, "loss": 0.4638, "step": 324 }, { "epoch": 0.013492545368683802, "grad_norm": 4.1926727294921875, "learning_rate": 4.4951590594744126e-06, "loss": 0.6843, "step": 325 }, { "epoch": 0.013534060892895137, "grad_norm": 3.147386312484741, "learning_rate": 4.508990318118949e-06, "loss": 0.6525, "step": 326 }, { "epoch": 0.013575576417106472, "grad_norm": 3.5330960750579834, "learning_rate": 4.5228215767634855e-06, "loss": 0.6495, "step": 327 }, { "epoch": 0.013617091941317806, "grad_norm": 3.1638660430908203, "learning_rate": 4.536652835408023e-06, "loss": 0.4268, "step": 328 }, { "epoch": 0.013658607465529141, "grad_norm": 3.474837064743042, "learning_rate": 4.550484094052559e-06, "loss": 0.4819, "step": 329 }, { "epoch": 0.013700122989740476, "grad_norm": 3.428387403488159, "learning_rate": 4.564315352697096e-06, "loss": 0.6208, "step": 330 }, { "epoch": 0.01374163851395181, "grad_norm": 3.7199409008026123, "learning_rate": 4.578146611341632e-06, "loss": 0.7124, "step": 331 }, { "epoch": 0.013783154038163145, "grad_norm": 2.9733917713165283, "learning_rate": 4.591977869986169e-06, "loss": 0.5823, "step": 332 }, { "epoch": 0.01382466956237448, "grad_norm": 3.0391509532928467, "learning_rate": 4.605809128630706e-06, "loss": 0.5386, "step": 333 }, { "epoch": 0.013866185086585815, "grad_norm": 3.251690149307251, "learning_rate": 4.619640387275242e-06, "loss": 0.5841, "step": 334 }, { "epoch": 0.01390770061079715, "grad_norm": 3.581230878829956, "learning_rate": 4.63347164591978e-06, "loss": 0.6383, "step": 335 }, { "epoch": 0.013949216135008484, "grad_norm": 2.858675241470337, "learning_rate": 4.647302904564316e-06, "loss": 0.6152, "step": 336 }, { "epoch": 0.013990731659219819, "grad_norm": 2.803469657897949, "learning_rate": 4.661134163208853e-06, "loss": 0.5241, "step": 337 }, { "epoch": 0.014032247183431155, "grad_norm": 4.285501956939697, "learning_rate": 4.674965421853389e-06, "loss": 0.5921, "step": 338 }, { "epoch": 0.01407376270764249, "grad_norm": 3.4219799041748047, "learning_rate": 4.6887966804979255e-06, "loss": 0.542, "step": 339 }, { "epoch": 0.014115278231853825, "grad_norm": 2.413447618484497, "learning_rate": 4.702627939142462e-06, "loss": 0.4609, "step": 340 }, { "epoch": 0.01415679375606516, "grad_norm": 2.816197156906128, "learning_rate": 4.716459197786999e-06, "loss": 0.4018, "step": 341 }, { "epoch": 0.014198309280276494, "grad_norm": 3.442103385925293, "learning_rate": 4.730290456431536e-06, "loss": 0.4952, "step": 342 }, { "epoch": 0.014239824804487829, "grad_norm": 4.025960922241211, "learning_rate": 4.744121715076072e-06, "loss": 0.5959, "step": 343 }, { "epoch": 0.014281340328699163, "grad_norm": 2.9238574504852295, "learning_rate": 4.757952973720609e-06, "loss": 0.6246, "step": 344 }, { "epoch": 0.014322855852910498, "grad_norm": 2.6860644817352295, "learning_rate": 4.771784232365146e-06, "loss": 0.5906, "step": 345 }, { "epoch": 0.014364371377121833, "grad_norm": 3.0648250579833984, "learning_rate": 4.785615491009682e-06, "loss": 0.4505, "step": 346 }, { "epoch": 0.014405886901333168, "grad_norm": 2.3918306827545166, "learning_rate": 4.799446749654219e-06, "loss": 0.4527, "step": 347 }, { "epoch": 0.014447402425544502, "grad_norm": 2.744127035140991, "learning_rate": 4.813278008298755e-06, "loss": 0.4527, "step": 348 }, { "epoch": 0.014488917949755837, "grad_norm": 2.752089500427246, "learning_rate": 4.827109266943293e-06, "loss": 0.4386, "step": 349 }, { "epoch": 0.014530433473967172, "grad_norm": 2.946578025817871, "learning_rate": 4.840940525587829e-06, "loss": 0.4238, "step": 350 }, { "epoch": 0.014571948998178506, "grad_norm": 3.048161268234253, "learning_rate": 4.8547717842323655e-06, "loss": 0.4152, "step": 351 }, { "epoch": 0.014613464522389841, "grad_norm": 4.07288932800293, "learning_rate": 4.868603042876902e-06, "loss": 0.6468, "step": 352 }, { "epoch": 0.014654980046601176, "grad_norm": 3.277064085006714, "learning_rate": 4.882434301521438e-06, "loss": 0.6495, "step": 353 }, { "epoch": 0.01469649557081251, "grad_norm": 3.1008694171905518, "learning_rate": 4.896265560165976e-06, "loss": 0.7205, "step": 354 }, { "epoch": 0.014738011095023845, "grad_norm": 3.2905783653259277, "learning_rate": 4.910096818810512e-06, "loss": 0.4092, "step": 355 }, { "epoch": 0.01477952661923518, "grad_norm": 3.519928455352783, "learning_rate": 4.9239280774550495e-06, "loss": 0.4649, "step": 356 }, { "epoch": 0.014821042143446515, "grad_norm": 3.101219892501831, "learning_rate": 4.937759336099586e-06, "loss": 0.4616, "step": 357 }, { "epoch": 0.01486255766765785, "grad_norm": 3.250734567642212, "learning_rate": 4.951590594744122e-06, "loss": 0.4443, "step": 358 }, { "epoch": 0.014904073191869184, "grad_norm": 2.6317665576934814, "learning_rate": 4.965421853388659e-06, "loss": 0.4859, "step": 359 }, { "epoch": 0.014945588716080519, "grad_norm": 2.9152190685272217, "learning_rate": 4.979253112033195e-06, "loss": 0.5507, "step": 360 }, { "epoch": 0.014987104240291853, "grad_norm": 3.014798641204834, "learning_rate": 4.993084370677732e-06, "loss": 0.5444, "step": 361 }, { "epoch": 0.015028619764503188, "grad_norm": 3.8227241039276123, "learning_rate": 5.006915629322269e-06, "loss": 0.5878, "step": 362 }, { "epoch": 0.015070135288714523, "grad_norm": 3.4045326709747314, "learning_rate": 5.0207468879668055e-06, "loss": 0.514, "step": 363 }, { "epoch": 0.015111650812925858, "grad_norm": 2.9630701541900635, "learning_rate": 5.034578146611342e-06, "loss": 0.6705, "step": 364 }, { "epoch": 0.015153166337137194, "grad_norm": 2.8422393798828125, "learning_rate": 5.0484094052558784e-06, "loss": 0.4874, "step": 365 }, { "epoch": 0.015194681861348529, "grad_norm": 3.009655475616455, "learning_rate": 5.062240663900415e-06, "loss": 0.6482, "step": 366 }, { "epoch": 0.015236197385559863, "grad_norm": 3.2132620811462402, "learning_rate": 5.076071922544951e-06, "loss": 0.568, "step": 367 }, { "epoch": 0.015277712909771198, "grad_norm": 4.0302557945251465, "learning_rate": 5.089903181189489e-06, "loss": 0.609, "step": 368 }, { "epoch": 0.015319228433982533, "grad_norm": 2.919079303741455, "learning_rate": 5.103734439834025e-06, "loss": 0.4988, "step": 369 }, { "epoch": 0.015360743958193868, "grad_norm": 3.091975688934326, "learning_rate": 5.1175656984785616e-06, "loss": 0.5457, "step": 370 }, { "epoch": 0.015402259482405202, "grad_norm": 2.6801741123199463, "learning_rate": 5.131396957123098e-06, "loss": 0.3963, "step": 371 }, { "epoch": 0.015443775006616537, "grad_norm": 3.5842175483703613, "learning_rate": 5.1452282157676345e-06, "loss": 0.585, "step": 372 }, { "epoch": 0.015485290530827872, "grad_norm": 2.3941051959991455, "learning_rate": 5.159059474412173e-06, "loss": 0.5818, "step": 373 }, { "epoch": 0.015526806055039206, "grad_norm": 3.257728099822998, "learning_rate": 5.172890733056709e-06, "loss": 0.6378, "step": 374 }, { "epoch": 0.015568321579250541, "grad_norm": 2.7220308780670166, "learning_rate": 5.1867219917012455e-06, "loss": 0.6143, "step": 375 }, { "epoch": 0.015609837103461876, "grad_norm": 3.239837646484375, "learning_rate": 5.200553250345782e-06, "loss": 0.541, "step": 376 }, { "epoch": 0.01565135262767321, "grad_norm": 3.564047336578369, "learning_rate": 5.214384508990319e-06, "loss": 0.5607, "step": 377 }, { "epoch": 0.015692868151884545, "grad_norm": 2.932293176651001, "learning_rate": 5.228215767634856e-06, "loss": 0.598, "step": 378 }, { "epoch": 0.015734383676095882, "grad_norm": 2.620748519897461, "learning_rate": 5.242047026279392e-06, "loss": 0.4846, "step": 379 }, { "epoch": 0.015775899200307215, "grad_norm": 3.2048521041870117, "learning_rate": 5.255878284923929e-06, "loss": 0.5881, "step": 380 }, { "epoch": 0.01581741472451855, "grad_norm": 3.225799322128296, "learning_rate": 5.269709543568465e-06, "loss": 0.5082, "step": 381 }, { "epoch": 0.015858930248729884, "grad_norm": 2.3641602993011475, "learning_rate": 5.2835408022130016e-06, "loss": 0.5388, "step": 382 }, { "epoch": 0.01590044577294122, "grad_norm": 2.8053972721099854, "learning_rate": 5.297372060857539e-06, "loss": 0.5912, "step": 383 }, { "epoch": 0.015941961297152554, "grad_norm": 3.0996501445770264, "learning_rate": 5.311203319502075e-06, "loss": 0.561, "step": 384 }, { "epoch": 0.01598347682136389, "grad_norm": 2.796316146850586, "learning_rate": 5.325034578146612e-06, "loss": 0.6054, "step": 385 }, { "epoch": 0.016024992345575223, "grad_norm": 3.579057216644287, "learning_rate": 5.338865836791148e-06, "loss": 0.6679, "step": 386 }, { "epoch": 0.01606650786978656, "grad_norm": 3.0613372325897217, "learning_rate": 5.352697095435685e-06, "loss": 0.6178, "step": 387 }, { "epoch": 0.016108023393997892, "grad_norm": 2.848320484161377, "learning_rate": 5.366528354080221e-06, "loss": 0.4984, "step": 388 }, { "epoch": 0.01614953891820923, "grad_norm": 2.8282015323638916, "learning_rate": 5.3803596127247585e-06, "loss": 0.7191, "step": 389 }, { "epoch": 0.016191054442420562, "grad_norm": 2.6741974353790283, "learning_rate": 5.394190871369295e-06, "loss": 0.5563, "step": 390 }, { "epoch": 0.0162325699666319, "grad_norm": 2.794434070587158, "learning_rate": 5.408022130013831e-06, "loss": 0.3697, "step": 391 }, { "epoch": 0.01627408549084323, "grad_norm": 3.495966672897339, "learning_rate": 5.421853388658368e-06, "loss": 0.6092, "step": 392 }, { "epoch": 0.016315601015054568, "grad_norm": 2.5805397033691406, "learning_rate": 5.435684647302904e-06, "loss": 0.5645, "step": 393 }, { "epoch": 0.0163571165392659, "grad_norm": 2.6652629375457764, "learning_rate": 5.449515905947442e-06, "loss": 0.4871, "step": 394 }, { "epoch": 0.016398632063477237, "grad_norm": 2.9578053951263428, "learning_rate": 5.463347164591978e-06, "loss": 0.6068, "step": 395 }, { "epoch": 0.01644014758768857, "grad_norm": 3.577380418777466, "learning_rate": 5.477178423236515e-06, "loss": 0.4784, "step": 396 }, { "epoch": 0.016481663111899907, "grad_norm": 3.237849712371826, "learning_rate": 5.491009681881052e-06, "loss": 0.5862, "step": 397 }, { "epoch": 0.01652317863611124, "grad_norm": 2.8934834003448486, "learning_rate": 5.504840940525589e-06, "loss": 0.5899, "step": 398 }, { "epoch": 0.016564694160322576, "grad_norm": 2.9801812171936035, "learning_rate": 5.5186721991701256e-06, "loss": 0.658, "step": 399 }, { "epoch": 0.01660620968453391, "grad_norm": 3.402980089187622, "learning_rate": 5.532503457814662e-06, "loss": 0.5684, "step": 400 }, { "epoch": 0.016647725208745245, "grad_norm": 3.1019604206085205, "learning_rate": 5.5463347164591985e-06, "loss": 0.5898, "step": 401 }, { "epoch": 0.01668924073295658, "grad_norm": 2.8211960792541504, "learning_rate": 5.560165975103735e-06, "loss": 0.5509, "step": 402 }, { "epoch": 0.016730756257167915, "grad_norm": 4.052555561065674, "learning_rate": 5.573997233748271e-06, "loss": 0.6039, "step": 403 }, { "epoch": 0.016772271781379248, "grad_norm": 2.46396803855896, "learning_rate": 5.587828492392809e-06, "loss": 0.4535, "step": 404 }, { "epoch": 0.016813787305590584, "grad_norm": 3.0954437255859375, "learning_rate": 5.601659751037345e-06, "loss": 0.5307, "step": 405 }, { "epoch": 0.01685530282980192, "grad_norm": 2.904167413711548, "learning_rate": 5.615491009681882e-06, "loss": 0.558, "step": 406 }, { "epoch": 0.016896818354013254, "grad_norm": 3.168890953063965, "learning_rate": 5.629322268326418e-06, "loss": 0.4592, "step": 407 }, { "epoch": 0.01693833387822459, "grad_norm": 3.26910662651062, "learning_rate": 5.6431535269709545e-06, "loss": 0.6057, "step": 408 }, { "epoch": 0.016979849402435923, "grad_norm": 3.1760406494140625, "learning_rate": 5.656984785615491e-06, "loss": 0.4995, "step": 409 }, { "epoch": 0.01702136492664726, "grad_norm": 3.158069133758545, "learning_rate": 5.670816044260028e-06, "loss": 0.5598, "step": 410 }, { "epoch": 0.017062880450858593, "grad_norm": 3.224097728729248, "learning_rate": 5.684647302904565e-06, "loss": 0.5463, "step": 411 }, { "epoch": 0.01710439597506993, "grad_norm": 2.809896945953369, "learning_rate": 5.698478561549101e-06, "loss": 0.457, "step": 412 }, { "epoch": 0.017145911499281262, "grad_norm": 3.4677140712738037, "learning_rate": 5.712309820193638e-06, "loss": 0.5301, "step": 413 }, { "epoch": 0.0171874270234926, "grad_norm": 2.9901185035705566, "learning_rate": 5.726141078838174e-06, "loss": 0.6258, "step": 414 }, { "epoch": 0.01722894254770393, "grad_norm": 2.9609591960906982, "learning_rate": 5.739972337482711e-06, "loss": 0.5587, "step": 415 }, { "epoch": 0.017270458071915268, "grad_norm": 2.7476112842559814, "learning_rate": 5.753803596127248e-06, "loss": 0.6904, "step": 416 }, { "epoch": 0.0173119735961266, "grad_norm": 2.6822099685668945, "learning_rate": 5.767634854771784e-06, "loss": 0.4506, "step": 417 }, { "epoch": 0.017353489120337937, "grad_norm": 3.1075427532196045, "learning_rate": 5.781466113416322e-06, "loss": 0.4031, "step": 418 }, { "epoch": 0.01739500464454927, "grad_norm": 2.889071226119995, "learning_rate": 5.795297372060859e-06, "loss": 0.5986, "step": 419 }, { "epoch": 0.017436520168760607, "grad_norm": 2.923687696456909, "learning_rate": 5.809128630705395e-06, "loss": 0.4639, "step": 420 }, { "epoch": 0.01747803569297194, "grad_norm": 3.585698366165161, "learning_rate": 5.822959889349932e-06, "loss": 0.5422, "step": 421 }, { "epoch": 0.017519551217183276, "grad_norm": 3.472992420196533, "learning_rate": 5.836791147994468e-06, "loss": 0.5666, "step": 422 }, { "epoch": 0.01756106674139461, "grad_norm": 2.9659230709075928, "learning_rate": 5.850622406639005e-06, "loss": 0.5064, "step": 423 }, { "epoch": 0.017602582265605946, "grad_norm": 2.8675856590270996, "learning_rate": 5.864453665283541e-06, "loss": 0.531, "step": 424 }, { "epoch": 0.01764409778981728, "grad_norm": 3.013176679611206, "learning_rate": 5.8782849239280785e-06, "loss": 0.6911, "step": 425 }, { "epoch": 0.017685613314028615, "grad_norm": 3.081146240234375, "learning_rate": 5.892116182572615e-06, "loss": 0.5499, "step": 426 }, { "epoch": 0.017727128838239948, "grad_norm": 3.242591619491577, "learning_rate": 5.905947441217151e-06, "loss": 0.5634, "step": 427 }, { "epoch": 0.017768644362451284, "grad_norm": 2.7928802967071533, "learning_rate": 5.919778699861688e-06, "loss": 0.5913, "step": 428 }, { "epoch": 0.017810159886662617, "grad_norm": 2.82369327545166, "learning_rate": 5.933609958506224e-06, "loss": 0.5594, "step": 429 }, { "epoch": 0.017851675410873954, "grad_norm": 3.065826416015625, "learning_rate": 5.947441217150761e-06, "loss": 0.4566, "step": 430 }, { "epoch": 0.017893190935085287, "grad_norm": 2.987293243408203, "learning_rate": 5.961272475795298e-06, "loss": 0.411, "step": 431 }, { "epoch": 0.017934706459296623, "grad_norm": 2.870295524597168, "learning_rate": 5.9751037344398345e-06, "loss": 0.509, "step": 432 }, { "epoch": 0.01797622198350796, "grad_norm": 3.503751754760742, "learning_rate": 5.988934993084371e-06, "loss": 0.4865, "step": 433 }, { "epoch": 0.018017737507719293, "grad_norm": 2.9665911197662354, "learning_rate": 6.0027662517289074e-06, "loss": 0.4649, "step": 434 }, { "epoch": 0.01805925303193063, "grad_norm": 2.7526824474334717, "learning_rate": 6.016597510373444e-06, "loss": 0.6323, "step": 435 }, { "epoch": 0.018100768556141962, "grad_norm": 2.6278510093688965, "learning_rate": 6.030428769017981e-06, "loss": 0.5517, "step": 436 }, { "epoch": 0.0181422840803533, "grad_norm": 4.806606292724609, "learning_rate": 6.044260027662518e-06, "loss": 0.5264, "step": 437 }, { "epoch": 0.01818379960456463, "grad_norm": 3.212374210357666, "learning_rate": 6.058091286307054e-06, "loss": 0.5796, "step": 438 }, { "epoch": 0.018225315128775968, "grad_norm": 3.491468667984009, "learning_rate": 6.0719225449515906e-06, "loss": 0.6518, "step": 439 }, { "epoch": 0.0182668306529873, "grad_norm": 2.975454807281494, "learning_rate": 6.085753803596127e-06, "loss": 0.4364, "step": 440 }, { "epoch": 0.018308346177198637, "grad_norm": 3.8690595626831055, "learning_rate": 6.099585062240665e-06, "loss": 0.3891, "step": 441 }, { "epoch": 0.01834986170140997, "grad_norm": 2.5661630630493164, "learning_rate": 6.113416320885202e-06, "loss": 0.5548, "step": 442 }, { "epoch": 0.018391377225621307, "grad_norm": 3.822277069091797, "learning_rate": 6.127247579529738e-06, "loss": 0.5098, "step": 443 }, { "epoch": 0.01843289274983264, "grad_norm": 2.572784185409546, "learning_rate": 6.1410788381742745e-06, "loss": 0.4261, "step": 444 }, { "epoch": 0.018474408274043976, "grad_norm": 3.026252269744873, "learning_rate": 6.154910096818811e-06, "loss": 0.5654, "step": 445 }, { "epoch": 0.01851592379825531, "grad_norm": 3.7399003505706787, "learning_rate": 6.168741355463348e-06, "loss": 0.5209, "step": 446 }, { "epoch": 0.018557439322466646, "grad_norm": 3.096705913543701, "learning_rate": 6.182572614107885e-06, "loss": 0.528, "step": 447 }, { "epoch": 0.01859895484667798, "grad_norm": 2.5256636142730713, "learning_rate": 6.196403872752421e-06, "loss": 0.6153, "step": 448 }, { "epoch": 0.018640470370889315, "grad_norm": 2.4912703037261963, "learning_rate": 6.210235131396958e-06, "loss": 0.5051, "step": 449 }, { "epoch": 0.018681985895100648, "grad_norm": 2.922581672668457, "learning_rate": 6.224066390041494e-06, "loss": 0.3994, "step": 450 }, { "epoch": 0.018723501419311984, "grad_norm": 2.744086503982544, "learning_rate": 6.237897648686031e-06, "loss": 0.4399, "step": 451 }, { "epoch": 0.018765016943523317, "grad_norm": 3.815863847732544, "learning_rate": 6.251728907330568e-06, "loss": 0.7422, "step": 452 }, { "epoch": 0.018806532467734654, "grad_norm": 3.0962727069854736, "learning_rate": 6.265560165975104e-06, "loss": 0.6091, "step": 453 }, { "epoch": 0.018848047991945987, "grad_norm": 2.4808828830718994, "learning_rate": 6.279391424619641e-06, "loss": 0.5707, "step": 454 }, { "epoch": 0.018889563516157323, "grad_norm": 3.6529653072357178, "learning_rate": 6.293222683264177e-06, "loss": 0.5031, "step": 455 }, { "epoch": 0.018931079040368656, "grad_norm": 2.884275436401367, "learning_rate": 6.307053941908714e-06, "loss": 0.4958, "step": 456 }, { "epoch": 0.018972594564579993, "grad_norm": 3.206301689147949, "learning_rate": 6.320885200553251e-06, "loss": 0.5478, "step": 457 }, { "epoch": 0.019014110088791326, "grad_norm": 3.259263753890991, "learning_rate": 6.3347164591977875e-06, "loss": 0.6393, "step": 458 }, { "epoch": 0.019055625613002662, "grad_norm": 2.8891677856445312, "learning_rate": 6.348547717842324e-06, "loss": 0.5083, "step": 459 }, { "epoch": 0.019097141137214, "grad_norm": 2.5557498931884766, "learning_rate": 6.36237897648686e-06, "loss": 0.5923, "step": 460 }, { "epoch": 0.01913865666142533, "grad_norm": 3.1770923137664795, "learning_rate": 6.376210235131397e-06, "loss": 0.588, "step": 461 }, { "epoch": 0.019180172185636668, "grad_norm": 2.2692182064056396, "learning_rate": 6.390041493775933e-06, "loss": 0.4158, "step": 462 }, { "epoch": 0.019221687709848, "grad_norm": 3.06131649017334, "learning_rate": 6.403872752420471e-06, "loss": 0.4838, "step": 463 }, { "epoch": 0.019263203234059337, "grad_norm": 2.1308982372283936, "learning_rate": 6.417704011065008e-06, "loss": 0.462, "step": 464 }, { "epoch": 0.01930471875827067, "grad_norm": 2.789630889892578, "learning_rate": 6.431535269709544e-06, "loss": 0.5754, "step": 465 }, { "epoch": 0.019346234282482007, "grad_norm": 3.228976011276245, "learning_rate": 6.445366528354081e-06, "loss": 0.6116, "step": 466 }, { "epoch": 0.01938774980669334, "grad_norm": 3.5753705501556396, "learning_rate": 6.459197786998618e-06, "loss": 0.4803, "step": 467 }, { "epoch": 0.019429265330904676, "grad_norm": 2.3710992336273193, "learning_rate": 6.4730290456431546e-06, "loss": 0.4487, "step": 468 }, { "epoch": 0.01947078085511601, "grad_norm": 2.8556277751922607, "learning_rate": 6.486860304287691e-06, "loss": 0.492, "step": 469 }, { "epoch": 0.019512296379327346, "grad_norm": 4.3397955894470215, "learning_rate": 6.5006915629322275e-06, "loss": 0.5595, "step": 470 }, { "epoch": 0.01955381190353868, "grad_norm": 2.3879024982452393, "learning_rate": 6.514522821576764e-06, "loss": 0.47, "step": 471 }, { "epoch": 0.019595327427750015, "grad_norm": 2.5694899559020996, "learning_rate": 6.5283540802213e-06, "loss": 0.5501, "step": 472 }, { "epoch": 0.019636842951961348, "grad_norm": 2.8867268562316895, "learning_rate": 6.542185338865838e-06, "loss": 0.5215, "step": 473 }, { "epoch": 0.019678358476172685, "grad_norm": 3.4267592430114746, "learning_rate": 6.556016597510374e-06, "loss": 0.6158, "step": 474 }, { "epoch": 0.019719874000384018, "grad_norm": 3.452503204345703, "learning_rate": 6.569847856154911e-06, "loss": 0.5747, "step": 475 }, { "epoch": 0.019761389524595354, "grad_norm": 3.406935930252075, "learning_rate": 6.583679114799447e-06, "loss": 0.5034, "step": 476 }, { "epoch": 0.019802905048806687, "grad_norm": 3.5686728954315186, "learning_rate": 6.5975103734439835e-06, "loss": 0.3599, "step": 477 }, { "epoch": 0.019844420573018023, "grad_norm": 2.4496846199035645, "learning_rate": 6.611341632088521e-06, "loss": 0.499, "step": 478 }, { "epoch": 0.019885936097229356, "grad_norm": 2.429445743560791, "learning_rate": 6.625172890733057e-06, "loss": 0.418, "step": 479 }, { "epoch": 0.019927451621440693, "grad_norm": 3.200239896774292, "learning_rate": 6.639004149377594e-06, "loss": 0.6624, "step": 480 }, { "epoch": 0.019968967145652026, "grad_norm": 3.1705448627471924, "learning_rate": 6.65283540802213e-06, "loss": 0.5589, "step": 481 }, { "epoch": 0.020010482669863362, "grad_norm": 3.1231300830841064, "learning_rate": 6.666666666666667e-06, "loss": 0.6177, "step": 482 }, { "epoch": 0.020051998194074695, "grad_norm": 2.5370380878448486, "learning_rate": 6.680497925311203e-06, "loss": 0.5747, "step": 483 }, { "epoch": 0.02009351371828603, "grad_norm": 2.6173007488250732, "learning_rate": 6.69432918395574e-06, "loss": 0.4906, "step": 484 }, { "epoch": 0.020135029242497365, "grad_norm": 2.943448781967163, "learning_rate": 6.708160442600277e-06, "loss": 0.4787, "step": 485 }, { "epoch": 0.0201765447667087, "grad_norm": 3.069031000137329, "learning_rate": 6.721991701244814e-06, "loss": 0.5903, "step": 486 }, { "epoch": 0.020218060290920038, "grad_norm": 3.324537515640259, "learning_rate": 6.735822959889351e-06, "loss": 0.5845, "step": 487 }, { "epoch": 0.02025957581513137, "grad_norm": 2.738145589828491, "learning_rate": 6.749654218533888e-06, "loss": 0.6999, "step": 488 }, { "epoch": 0.020301091339342707, "grad_norm": 3.0008349418640137, "learning_rate": 6.763485477178424e-06, "loss": 0.6535, "step": 489 }, { "epoch": 0.02034260686355404, "grad_norm": 2.6525115966796875, "learning_rate": 6.777316735822961e-06, "loss": 0.4856, "step": 490 }, { "epoch": 0.020384122387765376, "grad_norm": 3.0553033351898193, "learning_rate": 6.791147994467497e-06, "loss": 0.5474, "step": 491 }, { "epoch": 0.02042563791197671, "grad_norm": 2.890136480331421, "learning_rate": 6.804979253112034e-06, "loss": 0.4408, "step": 492 }, { "epoch": 0.020467153436188046, "grad_norm": 3.619598150253296, "learning_rate": 6.81881051175657e-06, "loss": 0.6452, "step": 493 }, { "epoch": 0.02050866896039938, "grad_norm": 3.3779795169830322, "learning_rate": 6.8326417704011075e-06, "loss": 0.5762, "step": 494 }, { "epoch": 0.020550184484610715, "grad_norm": 3.5213608741760254, "learning_rate": 6.846473029045644e-06, "loss": 0.6504, "step": 495 }, { "epoch": 0.020591700008822048, "grad_norm": 3.1827757358551025, "learning_rate": 6.86030428769018e-06, "loss": 0.5018, "step": 496 }, { "epoch": 0.020633215533033385, "grad_norm": 2.724945545196533, "learning_rate": 6.874135546334717e-06, "loss": 0.5703, "step": 497 }, { "epoch": 0.020674731057244718, "grad_norm": 2.960106134414673, "learning_rate": 6.887966804979253e-06, "loss": 0.4724, "step": 498 }, { "epoch": 0.020716246581456054, "grad_norm": 2.7234702110290527, "learning_rate": 6.901798063623791e-06, "loss": 0.4432, "step": 499 }, { "epoch": 0.020757762105667387, "grad_norm": 3.073042392730713, "learning_rate": 6.915629322268327e-06, "loss": 0.5937, "step": 500 }, { "epoch": 0.020799277629878724, "grad_norm": 2.7129621505737305, "learning_rate": 6.9294605809128635e-06, "loss": 0.4917, "step": 501 }, { "epoch": 0.020840793154090057, "grad_norm": 2.872666358947754, "learning_rate": 6.9432918395574e-06, "loss": 0.5183, "step": 502 }, { "epoch": 0.020882308678301393, "grad_norm": 3.0399513244628906, "learning_rate": 6.9571230982019365e-06, "loss": 0.6323, "step": 503 }, { "epoch": 0.020923824202512726, "grad_norm": 2.9214680194854736, "learning_rate": 6.970954356846473e-06, "loss": 0.3814, "step": 504 }, { "epoch": 0.020965339726724062, "grad_norm": 3.303886890411377, "learning_rate": 6.98478561549101e-06, "loss": 0.6505, "step": 505 }, { "epoch": 0.021006855250935395, "grad_norm": 2.5806760787963867, "learning_rate": 6.998616874135547e-06, "loss": 0.403, "step": 506 }, { "epoch": 0.021048370775146732, "grad_norm": 3.1168360710144043, "learning_rate": 7.012448132780083e-06, "loss": 0.6101, "step": 507 }, { "epoch": 0.021089886299358065, "grad_norm": 3.1640982627868652, "learning_rate": 7.02627939142462e-06, "loss": 0.4579, "step": 508 }, { "epoch": 0.0211314018235694, "grad_norm": 2.5141077041625977, "learning_rate": 7.040110650069158e-06, "loss": 0.5667, "step": 509 }, { "epoch": 0.021172917347780734, "grad_norm": 2.9691085815429688, "learning_rate": 7.053941908713694e-06, "loss": 0.5932, "step": 510 }, { "epoch": 0.02121443287199207, "grad_norm": 3.1242284774780273, "learning_rate": 7.067773167358231e-06, "loss": 0.4161, "step": 511 }, { "epoch": 0.021255948396203404, "grad_norm": 2.721057176589966, "learning_rate": 7.081604426002767e-06, "loss": 0.4542, "step": 512 }, { "epoch": 0.02129746392041474, "grad_norm": 3.216341733932495, "learning_rate": 7.0954356846473036e-06, "loss": 0.6045, "step": 513 }, { "epoch": 0.021338979444626077, "grad_norm": 3.0969228744506836, "learning_rate": 7.10926694329184e-06, "loss": 0.4543, "step": 514 }, { "epoch": 0.02138049496883741, "grad_norm": 2.9397847652435303, "learning_rate": 7.123098201936377e-06, "loss": 0.6007, "step": 515 }, { "epoch": 0.021422010493048746, "grad_norm": 3.322984218597412, "learning_rate": 7.136929460580914e-06, "loss": 0.4538, "step": 516 }, { "epoch": 0.02146352601726008, "grad_norm": 3.3466806411743164, "learning_rate": 7.15076071922545e-06, "loss": 0.4704, "step": 517 }, { "epoch": 0.021505041541471415, "grad_norm": 2.796905755996704, "learning_rate": 7.164591977869987e-06, "loss": 0.6017, "step": 518 }, { "epoch": 0.02154655706568275, "grad_norm": 2.5928609371185303, "learning_rate": 7.178423236514523e-06, "loss": 0.5835, "step": 519 }, { "epoch": 0.021588072589894085, "grad_norm": 2.2468230724334717, "learning_rate": 7.1922544951590604e-06, "loss": 0.54, "step": 520 }, { "epoch": 0.021629588114105418, "grad_norm": 2.628056764602661, "learning_rate": 7.206085753803597e-06, "loss": 0.5856, "step": 521 }, { "epoch": 0.021671103638316754, "grad_norm": 2.4238860607147217, "learning_rate": 7.219917012448133e-06, "loss": 0.5261, "step": 522 }, { "epoch": 0.021712619162528087, "grad_norm": 3.1750195026397705, "learning_rate": 7.23374827109267e-06, "loss": 0.6128, "step": 523 }, { "epoch": 0.021754134686739424, "grad_norm": 2.2545371055603027, "learning_rate": 7.247579529737206e-06, "loss": 0.5088, "step": 524 }, { "epoch": 0.021795650210950757, "grad_norm": 2.7617552280426025, "learning_rate": 7.261410788381743e-06, "loss": 0.5932, "step": 525 }, { "epoch": 0.021837165735162093, "grad_norm": 2.6956448554992676, "learning_rate": 7.27524204702628e-06, "loss": 0.5006, "step": 526 }, { "epoch": 0.021878681259373426, "grad_norm": 3.1873550415039062, "learning_rate": 7.2890733056708165e-06, "loss": 0.5616, "step": 527 }, { "epoch": 0.021920196783584762, "grad_norm": 2.8252744674682617, "learning_rate": 7.302904564315353e-06, "loss": 0.445, "step": 528 }, { "epoch": 0.021961712307796095, "grad_norm": 2.8114004135131836, "learning_rate": 7.316735822959889e-06, "loss": 0.4693, "step": 529 }, { "epoch": 0.022003227832007432, "grad_norm": 2.9640421867370605, "learning_rate": 7.330567081604426e-06, "loss": 0.5069, "step": 530 }, { "epoch": 0.022044743356218765, "grad_norm": 2.6145129203796387, "learning_rate": 7.344398340248964e-06, "loss": 0.4694, "step": 531 }, { "epoch": 0.0220862588804301, "grad_norm": 3.1056911945343018, "learning_rate": 7.3582295988935005e-06, "loss": 0.5906, "step": 532 }, { "epoch": 0.022127774404641434, "grad_norm": 3.054353713989258, "learning_rate": 7.372060857538037e-06, "loss": 0.5335, "step": 533 }, { "epoch": 0.02216928992885277, "grad_norm": 3.0268406867980957, "learning_rate": 7.385892116182573e-06, "loss": 0.7352, "step": 534 }, { "epoch": 0.022210805453064104, "grad_norm": 3.1123478412628174, "learning_rate": 7.39972337482711e-06, "loss": 0.4496, "step": 535 }, { "epoch": 0.02225232097727544, "grad_norm": 2.642106056213379, "learning_rate": 7.413554633471647e-06, "loss": 0.5553, "step": 536 }, { "epoch": 0.022293836501486773, "grad_norm": 3.4253292083740234, "learning_rate": 7.427385892116184e-06, "loss": 0.6077, "step": 537 }, { "epoch": 0.02233535202569811, "grad_norm": 2.588682174682617, "learning_rate": 7.44121715076072e-06, "loss": 0.5048, "step": 538 }, { "epoch": 0.022376867549909443, "grad_norm": 3.495633125305176, "learning_rate": 7.4550484094052565e-06, "loss": 0.5545, "step": 539 }, { "epoch": 0.02241838307412078, "grad_norm": 2.5629494190216064, "learning_rate": 7.468879668049793e-06, "loss": 0.5901, "step": 540 }, { "epoch": 0.022459898598332115, "grad_norm": 2.711869239807129, "learning_rate": 7.48271092669433e-06, "loss": 0.4625, "step": 541 }, { "epoch": 0.02250141412254345, "grad_norm": 3.1166763305664062, "learning_rate": 7.496542185338867e-06, "loss": 0.613, "step": 542 }, { "epoch": 0.022542929646754785, "grad_norm": 2.150953769683838, "learning_rate": 7.510373443983403e-06, "loss": 0.5019, "step": 543 }, { "epoch": 0.022584445170966118, "grad_norm": 2.9549930095672607, "learning_rate": 7.52420470262794e-06, "loss": 0.6615, "step": 544 }, { "epoch": 0.022625960695177454, "grad_norm": 3.099341869354248, "learning_rate": 7.538035961272476e-06, "loss": 0.5614, "step": 545 }, { "epoch": 0.022667476219388787, "grad_norm": 3.362013339996338, "learning_rate": 7.5518672199170125e-06, "loss": 0.3823, "step": 546 }, { "epoch": 0.022708991743600124, "grad_norm": 2.4195566177368164, "learning_rate": 7.56569847856155e-06, "loss": 0.5271, "step": 547 }, { "epoch": 0.022750507267811457, "grad_norm": 3.4508049488067627, "learning_rate": 7.579529737206086e-06, "loss": 0.5343, "step": 548 }, { "epoch": 0.022792022792022793, "grad_norm": 3.0493521690368652, "learning_rate": 7.593360995850623e-06, "loss": 0.5091, "step": 549 }, { "epoch": 0.022833538316234126, "grad_norm": 4.913128852844238, "learning_rate": 7.607192254495159e-06, "loss": 0.5265, "step": 550 }, { "epoch": 0.022875053840445463, "grad_norm": 2.0952980518341064, "learning_rate": 7.621023513139696e-06, "loss": 0.495, "step": 551 }, { "epoch": 0.022916569364656796, "grad_norm": 4.43227481842041, "learning_rate": 7.634854771784232e-06, "loss": 0.5066, "step": 552 }, { "epoch": 0.022958084888868132, "grad_norm": 3.1451523303985596, "learning_rate": 7.64868603042877e-06, "loss": 0.4829, "step": 553 }, { "epoch": 0.022999600413079465, "grad_norm": 3.411574602127075, "learning_rate": 7.662517289073307e-06, "loss": 0.5408, "step": 554 }, { "epoch": 0.0230411159372908, "grad_norm": 2.7121856212615967, "learning_rate": 7.676348547717844e-06, "loss": 0.5527, "step": 555 }, { "epoch": 0.023082631461502134, "grad_norm": 2.859473943710327, "learning_rate": 7.69017980636238e-06, "loss": 0.4584, "step": 556 }, { "epoch": 0.02312414698571347, "grad_norm": 2.678196907043457, "learning_rate": 7.704011065006917e-06, "loss": 0.5742, "step": 557 }, { "epoch": 0.023165662509924804, "grad_norm": 2.5243735313415527, "learning_rate": 7.717842323651453e-06, "loss": 0.4748, "step": 558 }, { "epoch": 0.02320717803413614, "grad_norm": 3.052114248275757, "learning_rate": 7.73167358229599e-06, "loss": 0.551, "step": 559 }, { "epoch": 0.023248693558347473, "grad_norm": 2.9193363189697266, "learning_rate": 7.745504840940527e-06, "loss": 0.6075, "step": 560 }, { "epoch": 0.02329020908255881, "grad_norm": 3.1275174617767334, "learning_rate": 7.759336099585063e-06, "loss": 0.6287, "step": 561 }, { "epoch": 0.023331724606770143, "grad_norm": 3.127761125564575, "learning_rate": 7.7731673582296e-06, "loss": 0.4901, "step": 562 }, { "epoch": 0.02337324013098148, "grad_norm": 2.9048092365264893, "learning_rate": 7.786998616874136e-06, "loss": 0.4835, "step": 563 }, { "epoch": 0.023414755655192812, "grad_norm": 2.975688934326172, "learning_rate": 7.800829875518673e-06, "loss": 0.559, "step": 564 }, { "epoch": 0.02345627117940415, "grad_norm": 3.365473508834839, "learning_rate": 7.81466113416321e-06, "loss": 0.5685, "step": 565 }, { "epoch": 0.02349778670361548, "grad_norm": 2.6483166217803955, "learning_rate": 7.828492392807746e-06, "loss": 0.5791, "step": 566 }, { "epoch": 0.023539302227826818, "grad_norm": 3.134085178375244, "learning_rate": 7.842323651452283e-06, "loss": 0.5152, "step": 567 }, { "epoch": 0.023580817752038154, "grad_norm": 2.775531053543091, "learning_rate": 7.856154910096819e-06, "loss": 0.516, "step": 568 }, { "epoch": 0.023622333276249487, "grad_norm": 2.659790277481079, "learning_rate": 7.869986168741356e-06, "loss": 0.4571, "step": 569 }, { "epoch": 0.023663848800460824, "grad_norm": 3.263636827468872, "learning_rate": 7.883817427385892e-06, "loss": 0.5881, "step": 570 }, { "epoch": 0.023705364324672157, "grad_norm": 3.1920275688171387, "learning_rate": 7.897648686030429e-06, "loss": 0.5825, "step": 571 }, { "epoch": 0.023746879848883493, "grad_norm": 2.579608201980591, "learning_rate": 7.911479944674966e-06, "loss": 0.4875, "step": 572 }, { "epoch": 0.023788395373094826, "grad_norm": 2.955181360244751, "learning_rate": 7.925311203319502e-06, "loss": 0.6115, "step": 573 }, { "epoch": 0.023829910897306163, "grad_norm": 2.954576015472412, "learning_rate": 7.93914246196404e-06, "loss": 0.4863, "step": 574 }, { "epoch": 0.023871426421517496, "grad_norm": 2.832728385925293, "learning_rate": 7.952973720608575e-06, "loss": 0.4434, "step": 575 }, { "epoch": 0.023912941945728832, "grad_norm": 2.756777763366699, "learning_rate": 7.966804979253112e-06, "loss": 0.4498, "step": 576 }, { "epoch": 0.023954457469940165, "grad_norm": 2.826185464859009, "learning_rate": 7.98063623789765e-06, "loss": 0.5913, "step": 577 }, { "epoch": 0.0239959729941515, "grad_norm": 2.2916111946105957, "learning_rate": 7.994467496542187e-06, "loss": 0.498, "step": 578 }, { "epoch": 0.024037488518362835, "grad_norm": 3.039341926574707, "learning_rate": 8.008298755186722e-06, "loss": 0.606, "step": 579 }, { "epoch": 0.02407900404257417, "grad_norm": 2.736116647720337, "learning_rate": 8.02213001383126e-06, "loss": 0.6124, "step": 580 }, { "epoch": 0.024120519566785504, "grad_norm": 3.0679330825805664, "learning_rate": 8.035961272475797e-06, "loss": 0.501, "step": 581 }, { "epoch": 0.02416203509099684, "grad_norm": 3.037959575653076, "learning_rate": 8.049792531120333e-06, "loss": 0.7014, "step": 582 }, { "epoch": 0.024203550615208173, "grad_norm": 2.473731279373169, "learning_rate": 8.06362378976487e-06, "loss": 0.6587, "step": 583 }, { "epoch": 0.02424506613941951, "grad_norm": 3.057157278060913, "learning_rate": 8.077455048409405e-06, "loss": 0.5842, "step": 584 }, { "epoch": 0.024286581663630843, "grad_norm": 3.039693593978882, "learning_rate": 8.091286307053943e-06, "loss": 0.4326, "step": 585 }, { "epoch": 0.02432809718784218, "grad_norm": 2.437666654586792, "learning_rate": 8.10511756569848e-06, "loss": 0.5423, "step": 586 }, { "epoch": 0.024369612712053512, "grad_norm": 3.7906653881073, "learning_rate": 8.118948824343016e-06, "loss": 0.5817, "step": 587 }, { "epoch": 0.02441112823626485, "grad_norm": 2.96321964263916, "learning_rate": 8.132780082987553e-06, "loss": 0.4067, "step": 588 }, { "epoch": 0.02445264376047618, "grad_norm": 2.5873799324035645, "learning_rate": 8.146611341632089e-06, "loss": 0.5719, "step": 589 }, { "epoch": 0.024494159284687518, "grad_norm": 2.725888252258301, "learning_rate": 8.160442600276626e-06, "loss": 0.4669, "step": 590 }, { "epoch": 0.02453567480889885, "grad_norm": 3.0059397220611572, "learning_rate": 8.174273858921162e-06, "loss": 0.4316, "step": 591 }, { "epoch": 0.024577190333110187, "grad_norm": 2.738990545272827, "learning_rate": 8.188105117565699e-06, "loss": 0.608, "step": 592 }, { "epoch": 0.02461870585732152, "grad_norm": 2.574019432067871, "learning_rate": 8.201936376210236e-06, "loss": 0.6254, "step": 593 }, { "epoch": 0.024660221381532857, "grad_norm": 2.526790142059326, "learning_rate": 8.215767634854772e-06, "loss": 0.4346, "step": 594 }, { "epoch": 0.024701736905744193, "grad_norm": 2.848613739013672, "learning_rate": 8.229598893499309e-06, "loss": 0.4179, "step": 595 }, { "epoch": 0.024743252429955526, "grad_norm": 3.033494234085083, "learning_rate": 8.243430152143845e-06, "loss": 0.7483, "step": 596 }, { "epoch": 0.024784767954166863, "grad_norm": 2.6395905017852783, "learning_rate": 8.257261410788382e-06, "loss": 0.3667, "step": 597 }, { "epoch": 0.024826283478378196, "grad_norm": 2.8745100498199463, "learning_rate": 8.27109266943292e-06, "loss": 0.5029, "step": 598 }, { "epoch": 0.024867799002589532, "grad_norm": 2.779742479324341, "learning_rate": 8.284923928077457e-06, "loss": 0.5419, "step": 599 }, { "epoch": 0.024909314526800865, "grad_norm": 2.637371063232422, "learning_rate": 8.298755186721992e-06, "loss": 0.5531, "step": 600 }, { "epoch": 0.0249508300510122, "grad_norm": 3.075530529022217, "learning_rate": 8.31258644536653e-06, "loss": 0.4384, "step": 601 }, { "epoch": 0.024992345575223535, "grad_norm": 3.0656323432922363, "learning_rate": 8.326417704011067e-06, "loss": 0.5589, "step": 602 }, { "epoch": 0.02503386109943487, "grad_norm": 2.6319234371185303, "learning_rate": 8.340248962655602e-06, "loss": 0.5437, "step": 603 }, { "epoch": 0.025075376623646204, "grad_norm": 2.855272054672241, "learning_rate": 8.35408022130014e-06, "loss": 0.5509, "step": 604 }, { "epoch": 0.02511689214785754, "grad_norm": 3.5140533447265625, "learning_rate": 8.367911479944675e-06, "loss": 0.6098, "step": 605 }, { "epoch": 0.025158407672068873, "grad_norm": 3.3085713386535645, "learning_rate": 8.381742738589213e-06, "loss": 0.5561, "step": 606 }, { "epoch": 0.02519992319628021, "grad_norm": 2.560692548751831, "learning_rate": 8.39557399723375e-06, "loss": 0.6127, "step": 607 }, { "epoch": 0.025241438720491543, "grad_norm": 2.5854270458221436, "learning_rate": 8.409405255878286e-06, "loss": 0.4898, "step": 608 }, { "epoch": 0.02528295424470288, "grad_norm": 2.804008722305298, "learning_rate": 8.423236514522823e-06, "loss": 0.5298, "step": 609 }, { "epoch": 0.025324469768914212, "grad_norm": 2.868598222732544, "learning_rate": 8.437067773167358e-06, "loss": 0.5884, "step": 610 }, { "epoch": 0.02536598529312555, "grad_norm": 2.9103140830993652, "learning_rate": 8.450899031811896e-06, "loss": 0.4758, "step": 611 }, { "epoch": 0.02540750081733688, "grad_norm": 3.8003997802734375, "learning_rate": 8.464730290456431e-06, "loss": 0.4963, "step": 612 }, { "epoch": 0.025449016341548218, "grad_norm": 3.5374481678009033, "learning_rate": 8.478561549100969e-06, "loss": 0.4466, "step": 613 }, { "epoch": 0.02549053186575955, "grad_norm": 2.4259073734283447, "learning_rate": 8.492392807745506e-06, "loss": 0.5424, "step": 614 }, { "epoch": 0.025532047389970888, "grad_norm": 2.7850301265716553, "learning_rate": 8.506224066390042e-06, "loss": 0.5855, "step": 615 }, { "epoch": 0.02557356291418222, "grad_norm": 2.939199686050415, "learning_rate": 8.520055325034579e-06, "loss": 0.5421, "step": 616 }, { "epoch": 0.025615078438393557, "grad_norm": 3.0114879608154297, "learning_rate": 8.533886583679114e-06, "loss": 0.6025, "step": 617 }, { "epoch": 0.02565659396260489, "grad_norm": 2.7538552284240723, "learning_rate": 8.547717842323652e-06, "loss": 0.6589, "step": 618 }, { "epoch": 0.025698109486816226, "grad_norm": 3.144937515258789, "learning_rate": 8.561549100968189e-06, "loss": 0.4736, "step": 619 }, { "epoch": 0.02573962501102756, "grad_norm": 3.186443328857422, "learning_rate": 8.575380359612725e-06, "loss": 0.646, "step": 620 }, { "epoch": 0.025781140535238896, "grad_norm": 3.277801990509033, "learning_rate": 8.589211618257262e-06, "loss": 0.6673, "step": 621 }, { "epoch": 0.025822656059450232, "grad_norm": 3.330237865447998, "learning_rate": 8.6030428769018e-06, "loss": 0.6406, "step": 622 }, { "epoch": 0.025864171583661565, "grad_norm": 2.4915127754211426, "learning_rate": 8.616874135546337e-06, "loss": 0.3971, "step": 623 }, { "epoch": 0.025905687107872902, "grad_norm": 2.987844705581665, "learning_rate": 8.630705394190872e-06, "loss": 0.5339, "step": 624 }, { "epoch": 0.025947202632084235, "grad_norm": 2.683227062225342, "learning_rate": 8.64453665283541e-06, "loss": 0.5806, "step": 625 }, { "epoch": 0.02598871815629557, "grad_norm": 2.5215232372283936, "learning_rate": 8.658367911479945e-06, "loss": 0.4791, "step": 626 }, { "epoch": 0.026030233680506904, "grad_norm": 3.188260793685913, "learning_rate": 8.672199170124482e-06, "loss": 0.6896, "step": 627 }, { "epoch": 0.02607174920471824, "grad_norm": 3.1718082427978516, "learning_rate": 8.68603042876902e-06, "loss": 0.3901, "step": 628 }, { "epoch": 0.026113264728929574, "grad_norm": 3.2634053230285645, "learning_rate": 8.699861687413555e-06, "loss": 0.5251, "step": 629 }, { "epoch": 0.02615478025314091, "grad_norm": 2.7320716381073, "learning_rate": 8.713692946058093e-06, "loss": 0.5934, "step": 630 }, { "epoch": 0.026196295777352243, "grad_norm": 3.1114470958709717, "learning_rate": 8.727524204702628e-06, "loss": 0.5862, "step": 631 }, { "epoch": 0.02623781130156358, "grad_norm": 2.563279867172241, "learning_rate": 8.741355463347166e-06, "loss": 0.6508, "step": 632 }, { "epoch": 0.026279326825774912, "grad_norm": 2.6859443187713623, "learning_rate": 8.755186721991701e-06, "loss": 0.4517, "step": 633 }, { "epoch": 0.02632084234998625, "grad_norm": 2.669159173965454, "learning_rate": 8.769017980636238e-06, "loss": 0.4442, "step": 634 }, { "epoch": 0.026362357874197582, "grad_norm": 2.8788657188415527, "learning_rate": 8.782849239280776e-06, "loss": 0.5555, "step": 635 }, { "epoch": 0.02640387339840892, "grad_norm": 3.2465450763702393, "learning_rate": 8.796680497925311e-06, "loss": 0.6381, "step": 636 }, { "epoch": 0.02644538892262025, "grad_norm": 2.6608712673187256, "learning_rate": 8.810511756569849e-06, "loss": 0.5181, "step": 637 }, { "epoch": 0.026486904446831588, "grad_norm": 2.9378044605255127, "learning_rate": 8.824343015214384e-06, "loss": 0.565, "step": 638 }, { "epoch": 0.02652841997104292, "grad_norm": 3.3351850509643555, "learning_rate": 8.838174273858922e-06, "loss": 0.5607, "step": 639 }, { "epoch": 0.026569935495254257, "grad_norm": 3.1037750244140625, "learning_rate": 8.852005532503459e-06, "loss": 0.4933, "step": 640 }, { "epoch": 0.02661145101946559, "grad_norm": 3.2273733615875244, "learning_rate": 8.865836791147994e-06, "loss": 0.5507, "step": 641 }, { "epoch": 0.026652966543676927, "grad_norm": 2.875530242919922, "learning_rate": 8.879668049792532e-06, "loss": 0.5862, "step": 642 }, { "epoch": 0.02669448206788826, "grad_norm": 2.938900947570801, "learning_rate": 8.893499308437067e-06, "loss": 0.55, "step": 643 }, { "epoch": 0.026735997592099596, "grad_norm": 3.4708304405212402, "learning_rate": 8.907330567081606e-06, "loss": 0.4931, "step": 644 }, { "epoch": 0.02677751311631093, "grad_norm": 2.9316952228546143, "learning_rate": 8.921161825726142e-06, "loss": 0.4327, "step": 645 }, { "epoch": 0.026819028640522265, "grad_norm": 3.328627586364746, "learning_rate": 8.93499308437068e-06, "loss": 0.7191, "step": 646 }, { "epoch": 0.0268605441647336, "grad_norm": 2.9759671688079834, "learning_rate": 8.948824343015215e-06, "loss": 0.4613, "step": 647 }, { "epoch": 0.026902059688944935, "grad_norm": 2.7322685718536377, "learning_rate": 8.962655601659752e-06, "loss": 0.3496, "step": 648 }, { "epoch": 0.02694357521315627, "grad_norm": 2.6084070205688477, "learning_rate": 8.97648686030429e-06, "loss": 0.4233, "step": 649 }, { "epoch": 0.026985090737367604, "grad_norm": 2.882765531539917, "learning_rate": 8.990318118948825e-06, "loss": 0.4784, "step": 650 }, { "epoch": 0.02702660626157894, "grad_norm": 3.545409679412842, "learning_rate": 9.004149377593362e-06, "loss": 0.4603, "step": 651 }, { "epoch": 0.027068121785790274, "grad_norm": 2.725064277648926, "learning_rate": 9.017980636237898e-06, "loss": 0.5555, "step": 652 }, { "epoch": 0.02710963731000161, "grad_norm": 2.86013126373291, "learning_rate": 9.031811894882435e-06, "loss": 0.5858, "step": 653 }, { "epoch": 0.027151152834212943, "grad_norm": 3.145111083984375, "learning_rate": 9.045643153526971e-06, "loss": 0.6221, "step": 654 }, { "epoch": 0.02719266835842428, "grad_norm": 3.372627019882202, "learning_rate": 9.059474412171508e-06, "loss": 0.5194, "step": 655 }, { "epoch": 0.027234183882635613, "grad_norm": 2.762148380279541, "learning_rate": 9.073305670816046e-06, "loss": 0.5629, "step": 656 }, { "epoch": 0.02727569940684695, "grad_norm": 3.6672475337982178, "learning_rate": 9.087136929460581e-06, "loss": 0.4436, "step": 657 }, { "epoch": 0.027317214931058282, "grad_norm": 3.1459357738494873, "learning_rate": 9.100968188105118e-06, "loss": 0.6459, "step": 658 }, { "epoch": 0.02735873045526962, "grad_norm": 3.3290414810180664, "learning_rate": 9.114799446749654e-06, "loss": 0.5296, "step": 659 }, { "epoch": 0.02740024597948095, "grad_norm": 3.3154942989349365, "learning_rate": 9.128630705394191e-06, "loss": 0.5451, "step": 660 }, { "epoch": 0.027441761503692288, "grad_norm": 2.844195604324341, "learning_rate": 9.142461964038729e-06, "loss": 0.5039, "step": 661 }, { "epoch": 0.02748327702790362, "grad_norm": 3.0529959201812744, "learning_rate": 9.156293222683264e-06, "loss": 0.4766, "step": 662 }, { "epoch": 0.027524792552114957, "grad_norm": 2.541442632675171, "learning_rate": 9.170124481327802e-06, "loss": 0.5063, "step": 663 }, { "epoch": 0.02756630807632629, "grad_norm": 2.513502836227417, "learning_rate": 9.183955739972337e-06, "loss": 0.5251, "step": 664 }, { "epoch": 0.027607823600537627, "grad_norm": 2.9902520179748535, "learning_rate": 9.197786998616875e-06, "loss": 0.4729, "step": 665 }, { "epoch": 0.02764933912474896, "grad_norm": 2.765794038772583, "learning_rate": 9.211618257261412e-06, "loss": 0.4996, "step": 666 }, { "epoch": 0.027690854648960296, "grad_norm": 3.2294082641601562, "learning_rate": 9.225449515905949e-06, "loss": 0.7163, "step": 667 }, { "epoch": 0.02773237017317163, "grad_norm": 2.87190580368042, "learning_rate": 9.239280774550485e-06, "loss": 0.4384, "step": 668 }, { "epoch": 0.027773885697382966, "grad_norm": 2.4990439414978027, "learning_rate": 9.253112033195022e-06, "loss": 0.5051, "step": 669 }, { "epoch": 0.0278154012215943, "grad_norm": 2.9574341773986816, "learning_rate": 9.26694329183956e-06, "loss": 0.6173, "step": 670 }, { "epoch": 0.027856916745805635, "grad_norm": 2.917137622833252, "learning_rate": 9.280774550484095e-06, "loss": 0.5887, "step": 671 }, { "epoch": 0.027898432270016968, "grad_norm": 3.2120275497436523, "learning_rate": 9.294605809128632e-06, "loss": 0.4132, "step": 672 }, { "epoch": 0.027939947794228304, "grad_norm": 2.9105305671691895, "learning_rate": 9.308437067773168e-06, "loss": 0.599, "step": 673 }, { "epoch": 0.027981463318439637, "grad_norm": 3.4902844429016113, "learning_rate": 9.322268326417705e-06, "loss": 0.7676, "step": 674 }, { "epoch": 0.028022978842650974, "grad_norm": 3.1556222438812256, "learning_rate": 9.33609958506224e-06, "loss": 0.526, "step": 675 }, { "epoch": 0.02806449436686231, "grad_norm": 2.6463799476623535, "learning_rate": 9.349930843706778e-06, "loss": 0.4932, "step": 676 }, { "epoch": 0.028106009891073643, "grad_norm": 3.0498175621032715, "learning_rate": 9.363762102351315e-06, "loss": 0.6446, "step": 677 }, { "epoch": 0.02814752541528498, "grad_norm": 3.0870120525360107, "learning_rate": 9.377593360995851e-06, "loss": 0.5335, "step": 678 }, { "epoch": 0.028189040939496313, "grad_norm": 2.573516368865967, "learning_rate": 9.391424619640388e-06, "loss": 0.7372, "step": 679 }, { "epoch": 0.02823055646370765, "grad_norm": 2.745041608810425, "learning_rate": 9.405255878284924e-06, "loss": 0.5155, "step": 680 }, { "epoch": 0.028272071987918982, "grad_norm": 3.176398992538452, "learning_rate": 9.419087136929461e-06, "loss": 0.5295, "step": 681 }, { "epoch": 0.02831358751213032, "grad_norm": 2.990642786026001, "learning_rate": 9.432918395573998e-06, "loss": 0.6104, "step": 682 }, { "epoch": 0.02835510303634165, "grad_norm": 3.146812677383423, "learning_rate": 9.446749654218534e-06, "loss": 0.5994, "step": 683 }, { "epoch": 0.028396618560552988, "grad_norm": 2.7196381092071533, "learning_rate": 9.460580912863071e-06, "loss": 0.4489, "step": 684 }, { "epoch": 0.02843813408476432, "grad_norm": 3.2695069313049316, "learning_rate": 9.474412171507607e-06, "loss": 0.7031, "step": 685 }, { "epoch": 0.028479649608975657, "grad_norm": 4.009891510009766, "learning_rate": 9.488243430152144e-06, "loss": 0.6217, "step": 686 }, { "epoch": 0.02852116513318699, "grad_norm": 2.6900854110717773, "learning_rate": 9.502074688796682e-06, "loss": 0.6495, "step": 687 }, { "epoch": 0.028562680657398327, "grad_norm": 3.064884662628174, "learning_rate": 9.515905947441217e-06, "loss": 0.4828, "step": 688 }, { "epoch": 0.02860419618160966, "grad_norm": 3.71828031539917, "learning_rate": 9.529737206085755e-06, "loss": 0.6114, "step": 689 }, { "epoch": 0.028645711705820996, "grad_norm": 3.457075834274292, "learning_rate": 9.543568464730292e-06, "loss": 0.5172, "step": 690 }, { "epoch": 0.02868722723003233, "grad_norm": 3.194540500640869, "learning_rate": 9.557399723374829e-06, "loss": 0.5678, "step": 691 }, { "epoch": 0.028728742754243666, "grad_norm": 3.2634949684143066, "learning_rate": 9.571230982019365e-06, "loss": 0.4901, "step": 692 }, { "epoch": 0.028770258278455, "grad_norm": 2.7581892013549805, "learning_rate": 9.585062240663902e-06, "loss": 0.5401, "step": 693 }, { "epoch": 0.028811773802666335, "grad_norm": 2.6080570220947266, "learning_rate": 9.598893499308438e-06, "loss": 0.5126, "step": 694 }, { "epoch": 0.028853289326877668, "grad_norm": 2.9308247566223145, "learning_rate": 9.612724757952975e-06, "loss": 0.4147, "step": 695 }, { "epoch": 0.028894804851089004, "grad_norm": 2.807155132293701, "learning_rate": 9.62655601659751e-06, "loss": 0.6127, "step": 696 }, { "epoch": 0.028936320375300337, "grad_norm": 2.9169650077819824, "learning_rate": 9.640387275242048e-06, "loss": 0.5144, "step": 697 }, { "epoch": 0.028977835899511674, "grad_norm": 2.8406901359558105, "learning_rate": 9.654218533886585e-06, "loss": 0.5689, "step": 698 }, { "epoch": 0.029019351423723007, "grad_norm": 3.2116665840148926, "learning_rate": 9.66804979253112e-06, "loss": 0.4635, "step": 699 }, { "epoch": 0.029060866947934343, "grad_norm": 2.7650065422058105, "learning_rate": 9.681881051175658e-06, "loss": 0.5295, "step": 700 }, { "epoch": 0.029102382472145676, "grad_norm": 3.0481297969818115, "learning_rate": 9.695712309820194e-06, "loss": 0.4491, "step": 701 }, { "epoch": 0.029143897996357013, "grad_norm": 2.663404941558838, "learning_rate": 9.709543568464731e-06, "loss": 0.6858, "step": 702 }, { "epoch": 0.02918541352056835, "grad_norm": 2.4847774505615234, "learning_rate": 9.723374827109268e-06, "loss": 0.5286, "step": 703 }, { "epoch": 0.029226929044779682, "grad_norm": 2.727870464324951, "learning_rate": 9.737206085753804e-06, "loss": 0.6776, "step": 704 }, { "epoch": 0.02926844456899102, "grad_norm": 2.7579963207244873, "learning_rate": 9.751037344398341e-06, "loss": 0.614, "step": 705 }, { "epoch": 0.02930996009320235, "grad_norm": 3.4509482383728027, "learning_rate": 9.764868603042877e-06, "loss": 0.659, "step": 706 }, { "epoch": 0.029351475617413688, "grad_norm": 2.897150754928589, "learning_rate": 9.778699861687414e-06, "loss": 0.7166, "step": 707 }, { "epoch": 0.02939299114162502, "grad_norm": 3.2869880199432373, "learning_rate": 9.792531120331951e-06, "loss": 0.7109, "step": 708 }, { "epoch": 0.029434506665836357, "grad_norm": 3.0441908836364746, "learning_rate": 9.806362378976487e-06, "loss": 0.5277, "step": 709 }, { "epoch": 0.02947602219004769, "grad_norm": 2.9375205039978027, "learning_rate": 9.820193637621024e-06, "loss": 0.4118, "step": 710 }, { "epoch": 0.029517537714259027, "grad_norm": 2.866809606552124, "learning_rate": 9.83402489626556e-06, "loss": 0.6547, "step": 711 }, { "epoch": 0.02955905323847036, "grad_norm": 2.490107536315918, "learning_rate": 9.847856154910099e-06, "loss": 0.5641, "step": 712 }, { "epoch": 0.029600568762681696, "grad_norm": 2.6739141941070557, "learning_rate": 9.861687413554635e-06, "loss": 0.5138, "step": 713 }, { "epoch": 0.02964208428689303, "grad_norm": 2.8573081493377686, "learning_rate": 9.875518672199172e-06, "loss": 0.5613, "step": 714 }, { "epoch": 0.029683599811104366, "grad_norm": 3.690401077270508, "learning_rate": 9.889349930843707e-06, "loss": 0.5947, "step": 715 }, { "epoch": 0.0297251153353157, "grad_norm": 2.7564895153045654, "learning_rate": 9.903181189488245e-06, "loss": 0.5179, "step": 716 }, { "epoch": 0.029766630859527035, "grad_norm": 2.6528384685516357, "learning_rate": 9.91701244813278e-06, "loss": 0.496, "step": 717 }, { "epoch": 0.029808146383738368, "grad_norm": 2.6016125679016113, "learning_rate": 9.930843706777318e-06, "loss": 0.5298, "step": 718 }, { "epoch": 0.029849661907949705, "grad_norm": 2.5318398475646973, "learning_rate": 9.944674965421855e-06, "loss": 0.6234, "step": 719 }, { "epoch": 0.029891177432161038, "grad_norm": 2.5884954929351807, "learning_rate": 9.95850622406639e-06, "loss": 0.4839, "step": 720 }, { "epoch": 0.029932692956372374, "grad_norm": 2.869488477706909, "learning_rate": 9.972337482710928e-06, "loss": 0.5587, "step": 721 }, { "epoch": 0.029974208480583707, "grad_norm": 3.70536732673645, "learning_rate": 9.986168741355464e-06, "loss": 0.5297, "step": 722 }, { "epoch": 0.030015724004795043, "grad_norm": 2.820354461669922, "learning_rate": 1e-05, "loss": 0.5179, "step": 723 }, { "epoch": 0.030057239529006376, "grad_norm": 3.1387345790863037, "learning_rate": 9.99999995479928e-06, "loss": 0.4703, "step": 724 }, { "epoch": 0.030098755053217713, "grad_norm": 2.7293283939361572, "learning_rate": 9.99999981919712e-06, "loss": 0.5433, "step": 725 }, { "epoch": 0.030140270577429046, "grad_norm": 2.595386028289795, "learning_rate": 9.99999959319352e-06, "loss": 0.3503, "step": 726 }, { "epoch": 0.030181786101640382, "grad_norm": 3.130852699279785, "learning_rate": 9.999999276788488e-06, "loss": 0.5624, "step": 727 }, { "epoch": 0.030223301625851715, "grad_norm": 2.493776321411133, "learning_rate": 9.999998869982028e-06, "loss": 0.5019, "step": 728 }, { "epoch": 0.03026481715006305, "grad_norm": 2.941215991973877, "learning_rate": 9.999998372774145e-06, "loss": 0.4546, "step": 729 }, { "epoch": 0.030306332674274388, "grad_norm": 2.4875590801239014, "learning_rate": 9.999997785164852e-06, "loss": 0.4509, "step": 730 }, { "epoch": 0.03034784819848572, "grad_norm": 3.1638355255126953, "learning_rate": 9.999997107154158e-06, "loss": 0.3188, "step": 731 }, { "epoch": 0.030389363722697058, "grad_norm": 2.5937981605529785, "learning_rate": 9.999996338742075e-06, "loss": 0.4394, "step": 732 }, { "epoch": 0.03043087924690839, "grad_norm": 2.4473609924316406, "learning_rate": 9.999995479928618e-06, "loss": 0.4576, "step": 733 }, { "epoch": 0.030472394771119727, "grad_norm": 3.0080156326293945, "learning_rate": 9.999994530713798e-06, "loss": 0.3703, "step": 734 }, { "epoch": 0.03051391029533106, "grad_norm": 2.297788381576538, "learning_rate": 9.99999349109764e-06, "loss": 0.5024, "step": 735 }, { "epoch": 0.030555425819542396, "grad_norm": 3.0523838996887207, "learning_rate": 9.999992361080158e-06, "loss": 0.7172, "step": 736 }, { "epoch": 0.03059694134375373, "grad_norm": 3.701601266860962, "learning_rate": 9.99999114066137e-06, "loss": 0.6735, "step": 737 }, { "epoch": 0.030638456867965066, "grad_norm": 3.0128769874572754, "learning_rate": 9.999989829841303e-06, "loss": 0.7344, "step": 738 }, { "epoch": 0.0306799723921764, "grad_norm": 2.7563045024871826, "learning_rate": 9.999988428619979e-06, "loss": 0.5622, "step": 739 }, { "epoch": 0.030721487916387735, "grad_norm": 2.981039524078369, "learning_rate": 9.999986936997422e-06, "loss": 0.521, "step": 740 }, { "epoch": 0.030763003440599068, "grad_norm": 3.2518444061279297, "learning_rate": 9.999985354973661e-06, "loss": 0.4988, "step": 741 }, { "epoch": 0.030804518964810405, "grad_norm": 3.2952487468719482, "learning_rate": 9.999983682548723e-06, "loss": 0.6034, "step": 742 }, { "epoch": 0.030846034489021738, "grad_norm": 2.6441030502319336, "learning_rate": 9.999981919722639e-06, "loss": 0.6111, "step": 743 }, { "epoch": 0.030887550013233074, "grad_norm": 2.943635940551758, "learning_rate": 9.99998006649544e-06, "loss": 0.5854, "step": 744 }, { "epoch": 0.030929065537444407, "grad_norm": 3.2783994674682617, "learning_rate": 9.999978122867163e-06, "loss": 0.4749, "step": 745 }, { "epoch": 0.030970581061655744, "grad_norm": 2.9262897968292236, "learning_rate": 9.999976088837837e-06, "loss": 0.5039, "step": 746 }, { "epoch": 0.031012096585867076, "grad_norm": 3.986151933670044, "learning_rate": 9.999973964407504e-06, "loss": 0.7691, "step": 747 }, { "epoch": 0.031053612110078413, "grad_norm": 3.6043155193328857, "learning_rate": 9.9999717495762e-06, "loss": 0.5322, "step": 748 }, { "epoch": 0.031095127634289746, "grad_norm": 2.659541606903076, "learning_rate": 9.999969444343966e-06, "loss": 0.5902, "step": 749 }, { "epoch": 0.031136643158501082, "grad_norm": 3.1488559246063232, "learning_rate": 9.999967048710844e-06, "loss": 0.645, "step": 750 }, { "epoch": 0.031178158682712415, "grad_norm": 2.6096694469451904, "learning_rate": 9.999964562676874e-06, "loss": 0.5123, "step": 751 }, { "epoch": 0.031219674206923752, "grad_norm": 2.543435573577881, "learning_rate": 9.999961986242107e-06, "loss": 0.4461, "step": 752 }, { "epoch": 0.031261189731135085, "grad_norm": 2.28853440284729, "learning_rate": 9.999959319406584e-06, "loss": 0.5268, "step": 753 }, { "epoch": 0.03130270525534642, "grad_norm": 2.918225049972534, "learning_rate": 9.999956562170355e-06, "loss": 0.6025, "step": 754 }, { "epoch": 0.03134422077955776, "grad_norm": 2.3995189666748047, "learning_rate": 9.999953714533471e-06, "loss": 0.4996, "step": 755 }, { "epoch": 0.03138573630376909, "grad_norm": 3.223435401916504, "learning_rate": 9.999950776495983e-06, "loss": 0.5754, "step": 756 }, { "epoch": 0.031427251827980424, "grad_norm": 3.239018201828003, "learning_rate": 9.999947748057943e-06, "loss": 0.6738, "step": 757 }, { "epoch": 0.031468767352191764, "grad_norm": 3.1116843223571777, "learning_rate": 9.999944629219407e-06, "loss": 0.5903, "step": 758 }, { "epoch": 0.031510282876403096, "grad_norm": 2.9152703285217285, "learning_rate": 9.999941419980432e-06, "loss": 0.4596, "step": 759 }, { "epoch": 0.03155179840061443, "grad_norm": 3.4328231811523438, "learning_rate": 9.999938120341074e-06, "loss": 0.3011, "step": 760 }, { "epoch": 0.03159331392482576, "grad_norm": 2.8227579593658447, "learning_rate": 9.999934730301392e-06, "loss": 0.6313, "step": 761 }, { "epoch": 0.0316348294490371, "grad_norm": 2.8920280933380127, "learning_rate": 9.999931249861452e-06, "loss": 0.4119, "step": 762 }, { "epoch": 0.031676344973248435, "grad_norm": 2.5476691722869873, "learning_rate": 9.999927679021312e-06, "loss": 0.4245, "step": 763 }, { "epoch": 0.03171786049745977, "grad_norm": 2.857635259628296, "learning_rate": 9.99992401778104e-06, "loss": 0.5999, "step": 764 }, { "epoch": 0.0317593760216711, "grad_norm": 2.948702096939087, "learning_rate": 9.999920266140698e-06, "loss": 0.5452, "step": 765 }, { "epoch": 0.03180089154588244, "grad_norm": 3.164933204650879, "learning_rate": 9.99991642410036e-06, "loss": 0.4918, "step": 766 }, { "epoch": 0.031842407070093774, "grad_norm": 2.893876791000366, "learning_rate": 9.999912491660088e-06, "loss": 0.5722, "step": 767 }, { "epoch": 0.03188392259430511, "grad_norm": 2.433075428009033, "learning_rate": 9.99990846881996e-06, "loss": 0.5961, "step": 768 }, { "epoch": 0.03192543811851644, "grad_norm": 2.9083473682403564, "learning_rate": 9.999904355580045e-06, "loss": 0.4735, "step": 769 }, { "epoch": 0.03196695364272778, "grad_norm": 2.2003514766693115, "learning_rate": 9.999900151940418e-06, "loss": 0.4687, "step": 770 }, { "epoch": 0.03200846916693911, "grad_norm": 2.746760606765747, "learning_rate": 9.999895857901154e-06, "loss": 0.4531, "step": 771 }, { "epoch": 0.032049984691150446, "grad_norm": 2.8983914852142334, "learning_rate": 9.999891473462333e-06, "loss": 0.515, "step": 772 }, { "epoch": 0.03209150021536178, "grad_norm": 3.101701498031616, "learning_rate": 9.999886998624034e-06, "loss": 0.5451, "step": 773 }, { "epoch": 0.03213301573957312, "grad_norm": 2.6274054050445557, "learning_rate": 9.999882433386335e-06, "loss": 0.6605, "step": 774 }, { "epoch": 0.03217453126378445, "grad_norm": 2.863650321960449, "learning_rate": 9.99987777774932e-06, "loss": 0.5051, "step": 775 }, { "epoch": 0.032216046787995785, "grad_norm": 3.0561609268188477, "learning_rate": 9.999873031713076e-06, "loss": 0.594, "step": 776 }, { "epoch": 0.03225756231220712, "grad_norm": 2.421323537826538, "learning_rate": 9.999868195277684e-06, "loss": 0.4802, "step": 777 }, { "epoch": 0.03229907783641846, "grad_norm": 2.7964911460876465, "learning_rate": 9.999863268443236e-06, "loss": 0.6144, "step": 778 }, { "epoch": 0.03234059336062979, "grad_norm": 3.015399932861328, "learning_rate": 9.999858251209819e-06, "loss": 0.476, "step": 779 }, { "epoch": 0.032382108884841124, "grad_norm": 2.7114131450653076, "learning_rate": 9.999853143577523e-06, "loss": 0.4954, "step": 780 }, { "epoch": 0.03242362440905246, "grad_norm": 3.5894315242767334, "learning_rate": 9.99984794554644e-06, "loss": 0.585, "step": 781 }, { "epoch": 0.0324651399332638, "grad_norm": 2.9348864555358887, "learning_rate": 9.999842657116667e-06, "loss": 0.4602, "step": 782 }, { "epoch": 0.03250665545747513, "grad_norm": 2.5807785987854004, "learning_rate": 9.999837278288296e-06, "loss": 0.3687, "step": 783 }, { "epoch": 0.03254817098168646, "grad_norm": 2.8096024990081787, "learning_rate": 9.999831809061426e-06, "loss": 0.4154, "step": 784 }, { "epoch": 0.0325896865058978, "grad_norm": 2.6953125, "learning_rate": 9.999826249436158e-06, "loss": 0.536, "step": 785 }, { "epoch": 0.032631202030109135, "grad_norm": 3.3573904037475586, "learning_rate": 9.999820599412587e-06, "loss": 0.6113, "step": 786 }, { "epoch": 0.03267271755432047, "grad_norm": 2.8161513805389404, "learning_rate": 9.99981485899082e-06, "loss": 0.579, "step": 787 }, { "epoch": 0.0327142330785318, "grad_norm": 3.245950698852539, "learning_rate": 9.999809028170959e-06, "loss": 0.6933, "step": 788 }, { "epoch": 0.03275574860274314, "grad_norm": 2.8255937099456787, "learning_rate": 9.99980310695311e-06, "loss": 0.4082, "step": 789 }, { "epoch": 0.032797264126954474, "grad_norm": 3.0259597301483154, "learning_rate": 9.99979709533738e-06, "loss": 0.6653, "step": 790 }, { "epoch": 0.03283877965116581, "grad_norm": 2.4492104053497314, "learning_rate": 9.999790993323879e-06, "loss": 0.525, "step": 791 }, { "epoch": 0.03288029517537714, "grad_norm": 2.4978535175323486, "learning_rate": 9.999784800912713e-06, "loss": 0.5079, "step": 792 }, { "epoch": 0.03292181069958848, "grad_norm": 2.6086065769195557, "learning_rate": 9.999778518103998e-06, "loss": 0.5051, "step": 793 }, { "epoch": 0.03296332622379981, "grad_norm": 3.1872637271881104, "learning_rate": 9.999772144897846e-06, "loss": 0.6201, "step": 794 }, { "epoch": 0.033004841748011146, "grad_norm": 3.350464344024658, "learning_rate": 9.999765681294371e-06, "loss": 0.4268, "step": 795 }, { "epoch": 0.03304635727222248, "grad_norm": 2.585033893585205, "learning_rate": 9.999759127293693e-06, "loss": 0.5111, "step": 796 }, { "epoch": 0.03308787279643382, "grad_norm": 3.005439519882202, "learning_rate": 9.999752482895929e-06, "loss": 0.5017, "step": 797 }, { "epoch": 0.03312938832064515, "grad_norm": 2.829624652862549, "learning_rate": 9.9997457481012e-06, "loss": 0.6394, "step": 798 }, { "epoch": 0.033170903844856485, "grad_norm": 3.0066978931427, "learning_rate": 9.999738922909626e-06, "loss": 0.5213, "step": 799 }, { "epoch": 0.03321241936906782, "grad_norm": 3.188082456588745, "learning_rate": 9.99973200732133e-06, "loss": 0.6575, "step": 800 }, { "epoch": 0.03325393489327916, "grad_norm": 3.3363897800445557, "learning_rate": 9.999725001336437e-06, "loss": 0.3964, "step": 801 }, { "epoch": 0.03329545041749049, "grad_norm": 2.641979932785034, "learning_rate": 9.999717904955078e-06, "loss": 0.4656, "step": 802 }, { "epoch": 0.033336965941701824, "grad_norm": 3.301231622695923, "learning_rate": 9.999710718177378e-06, "loss": 0.6269, "step": 803 }, { "epoch": 0.03337848146591316, "grad_norm": 3.6059727668762207, "learning_rate": 9.999703441003465e-06, "loss": 0.4873, "step": 804 }, { "epoch": 0.0334199969901245, "grad_norm": 3.0651416778564453, "learning_rate": 9.999696073433475e-06, "loss": 0.5272, "step": 805 }, { "epoch": 0.03346151251433583, "grad_norm": 2.549229145050049, "learning_rate": 9.999688615467537e-06, "loss": 0.5252, "step": 806 }, { "epoch": 0.03350302803854716, "grad_norm": 3.264371871948242, "learning_rate": 9.999681067105789e-06, "loss": 0.6686, "step": 807 }, { "epoch": 0.033544543562758496, "grad_norm": 2.5046725273132324, "learning_rate": 9.999673428348366e-06, "loss": 0.6036, "step": 808 }, { "epoch": 0.033586059086969836, "grad_norm": 2.9826385974884033, "learning_rate": 9.999665699195407e-06, "loss": 0.5183, "step": 809 }, { "epoch": 0.03362757461118117, "grad_norm": 3.0277156829833984, "learning_rate": 9.999657879647049e-06, "loss": 0.4114, "step": 810 }, { "epoch": 0.0336690901353925, "grad_norm": 2.762138843536377, "learning_rate": 9.999649969703437e-06, "loss": 0.5085, "step": 811 }, { "epoch": 0.03371060565960384, "grad_norm": 2.781982421875, "learning_rate": 9.999641969364712e-06, "loss": 0.6017, "step": 812 }, { "epoch": 0.033752121183815174, "grad_norm": 3.0633256435394287, "learning_rate": 9.99963387863102e-06, "loss": 0.6854, "step": 813 }, { "epoch": 0.03379363670802651, "grad_norm": 2.9634451866149902, "learning_rate": 9.999625697502506e-06, "loss": 0.518, "step": 814 }, { "epoch": 0.03383515223223784, "grad_norm": 3.0075292587280273, "learning_rate": 9.999617425979318e-06, "loss": 0.4788, "step": 815 }, { "epoch": 0.03387666775644918, "grad_norm": 2.6686079502105713, "learning_rate": 9.999609064061606e-06, "loss": 0.5366, "step": 816 }, { "epoch": 0.03391818328066051, "grad_norm": 2.62614107131958, "learning_rate": 9.999600611749522e-06, "loss": 0.4622, "step": 817 }, { "epoch": 0.033959698804871846, "grad_norm": 2.9226131439208984, "learning_rate": 9.999592069043217e-06, "loss": 0.4984, "step": 818 }, { "epoch": 0.03400121432908318, "grad_norm": 2.7778067588806152, "learning_rate": 9.999583435942845e-06, "loss": 0.6575, "step": 819 }, { "epoch": 0.03404272985329452, "grad_norm": 4.057272911071777, "learning_rate": 9.999574712448565e-06, "loss": 0.5512, "step": 820 }, { "epoch": 0.03408424537750585, "grad_norm": 2.7410333156585693, "learning_rate": 9.999565898560536e-06, "loss": 0.6018, "step": 821 }, { "epoch": 0.034125760901717185, "grad_norm": 3.4587597846984863, "learning_rate": 9.99955699427891e-06, "loss": 0.5529, "step": 822 }, { "epoch": 0.03416727642592852, "grad_norm": 3.4149603843688965, "learning_rate": 9.999547999603854e-06, "loss": 0.5982, "step": 823 }, { "epoch": 0.03420879195013986, "grad_norm": 2.7487261295318604, "learning_rate": 9.999538914535529e-06, "loss": 0.6375, "step": 824 }, { "epoch": 0.03425030747435119, "grad_norm": 3.6340713500976562, "learning_rate": 9.9995297390741e-06, "loss": 0.7275, "step": 825 }, { "epoch": 0.034291822998562524, "grad_norm": 2.797713279724121, "learning_rate": 9.999520473219731e-06, "loss": 0.513, "step": 826 }, { "epoch": 0.03433333852277386, "grad_norm": 2.8697474002838135, "learning_rate": 9.999511116972592e-06, "loss": 0.5525, "step": 827 }, { "epoch": 0.0343748540469852, "grad_norm": 2.5921761989593506, "learning_rate": 9.999501670332851e-06, "loss": 0.527, "step": 828 }, { "epoch": 0.03441636957119653, "grad_norm": 2.4435646533966064, "learning_rate": 9.99949213330068e-06, "loss": 0.5002, "step": 829 }, { "epoch": 0.03445788509540786, "grad_norm": 2.633958578109741, "learning_rate": 9.999482505876247e-06, "loss": 0.603, "step": 830 }, { "epoch": 0.034499400619619196, "grad_norm": 3.009981155395508, "learning_rate": 9.999472788059732e-06, "loss": 0.652, "step": 831 }, { "epoch": 0.034540916143830536, "grad_norm": 3.011897325515747, "learning_rate": 9.999462979851307e-06, "loss": 0.4254, "step": 832 }, { "epoch": 0.03458243166804187, "grad_norm": 3.1706700325012207, "learning_rate": 9.999453081251149e-06, "loss": 0.636, "step": 833 }, { "epoch": 0.0346239471922532, "grad_norm": 2.776866912841797, "learning_rate": 9.99944309225944e-06, "loss": 0.5501, "step": 834 }, { "epoch": 0.034665462716464535, "grad_norm": 2.42569899559021, "learning_rate": 9.99943301287636e-06, "loss": 0.5166, "step": 835 }, { "epoch": 0.034706978240675875, "grad_norm": 2.3657796382904053, "learning_rate": 9.999422843102087e-06, "loss": 0.4353, "step": 836 }, { "epoch": 0.03474849376488721, "grad_norm": 2.878016471862793, "learning_rate": 9.999412582936811e-06, "loss": 0.5304, "step": 837 }, { "epoch": 0.03479000928909854, "grad_norm": 2.8234481811523438, "learning_rate": 9.999402232380712e-06, "loss": 0.4457, "step": 838 }, { "epoch": 0.03483152481330988, "grad_norm": 2.7247087955474854, "learning_rate": 9.99939179143398e-06, "loss": 0.5538, "step": 839 }, { "epoch": 0.03487304033752121, "grad_norm": 2.846123218536377, "learning_rate": 9.999381260096805e-06, "loss": 0.3972, "step": 840 }, { "epoch": 0.034914555861732546, "grad_norm": 2.997006416320801, "learning_rate": 9.999370638369377e-06, "loss": 0.5221, "step": 841 }, { "epoch": 0.03495607138594388, "grad_norm": 3.437483310699463, "learning_rate": 9.999359926251884e-06, "loss": 0.6257, "step": 842 }, { "epoch": 0.03499758691015522, "grad_norm": 2.8257815837860107, "learning_rate": 9.999349123744524e-06, "loss": 0.5494, "step": 843 }, { "epoch": 0.03503910243436655, "grad_norm": 3.1807847023010254, "learning_rate": 9.999338230847491e-06, "loss": 0.5578, "step": 844 }, { "epoch": 0.035080617958577885, "grad_norm": 2.3759608268737793, "learning_rate": 9.999327247560981e-06, "loss": 0.5584, "step": 845 }, { "epoch": 0.03512213348278922, "grad_norm": 3.833526134490967, "learning_rate": 9.999316173885193e-06, "loss": 0.6216, "step": 846 }, { "epoch": 0.03516364900700056, "grad_norm": 2.758885622024536, "learning_rate": 9.999305009820329e-06, "loss": 0.6305, "step": 847 }, { "epoch": 0.03520516453121189, "grad_norm": 2.4550609588623047, "learning_rate": 9.999293755366588e-06, "loss": 0.5352, "step": 848 }, { "epoch": 0.035246680055423224, "grad_norm": 2.8306326866149902, "learning_rate": 9.999282410524176e-06, "loss": 0.6133, "step": 849 }, { "epoch": 0.03528819557963456, "grad_norm": 2.9624035358428955, "learning_rate": 9.999270975293296e-06, "loss": 0.61, "step": 850 }, { "epoch": 0.0353297111038459, "grad_norm": 2.5067121982574463, "learning_rate": 9.999259449674158e-06, "loss": 0.4292, "step": 851 }, { "epoch": 0.03537122662805723, "grad_norm": 2.648488759994507, "learning_rate": 9.999247833666967e-06, "loss": 0.5625, "step": 852 }, { "epoch": 0.03541274215226856, "grad_norm": 2.712160110473633, "learning_rate": 9.999236127271934e-06, "loss": 0.4163, "step": 853 }, { "epoch": 0.035454257676479896, "grad_norm": 3.095217227935791, "learning_rate": 9.99922433048927e-06, "loss": 0.6942, "step": 854 }, { "epoch": 0.035495773200691236, "grad_norm": 2.8324761390686035, "learning_rate": 9.999212443319191e-06, "loss": 0.5018, "step": 855 }, { "epoch": 0.03553728872490257, "grad_norm": 2.539767026901245, "learning_rate": 9.999200465761909e-06, "loss": 0.4626, "step": 856 }, { "epoch": 0.0355788042491139, "grad_norm": 2.33957839012146, "learning_rate": 9.999188397817641e-06, "loss": 0.4807, "step": 857 }, { "epoch": 0.035620319773325235, "grad_norm": 2.548095703125, "learning_rate": 9.999176239486608e-06, "loss": 0.5809, "step": 858 }, { "epoch": 0.035661835297536575, "grad_norm": 2.820237159729004, "learning_rate": 9.999163990769025e-06, "loss": 0.5851, "step": 859 }, { "epoch": 0.03570335082174791, "grad_norm": 2.8563973903656006, "learning_rate": 9.999151651665118e-06, "loss": 0.4356, "step": 860 }, { "epoch": 0.03574486634595924, "grad_norm": 2.7604849338531494, "learning_rate": 9.999139222175108e-06, "loss": 0.6452, "step": 861 }, { "epoch": 0.035786381870170574, "grad_norm": 2.702580690383911, "learning_rate": 9.99912670229922e-06, "loss": 0.4542, "step": 862 }, { "epoch": 0.03582789739438191, "grad_norm": 3.4364774227142334, "learning_rate": 9.999114092037679e-06, "loss": 0.4625, "step": 863 }, { "epoch": 0.035869412918593246, "grad_norm": 2.9853785037994385, "learning_rate": 9.999101391390716e-06, "loss": 0.5265, "step": 864 }, { "epoch": 0.03591092844280458, "grad_norm": 3.003113031387329, "learning_rate": 9.999088600358558e-06, "loss": 0.5621, "step": 865 }, { "epoch": 0.03595244396701592, "grad_norm": 2.4996843338012695, "learning_rate": 9.999075718941437e-06, "loss": 0.5216, "step": 866 }, { "epoch": 0.03599395949122725, "grad_norm": 2.5397396087646484, "learning_rate": 9.999062747139587e-06, "loss": 0.4822, "step": 867 }, { "epoch": 0.036035475015438585, "grad_norm": 2.841240406036377, "learning_rate": 9.999049684953241e-06, "loss": 0.548, "step": 868 }, { "epoch": 0.03607699053964992, "grad_norm": 2.588554620742798, "learning_rate": 9.999036532382637e-06, "loss": 0.4603, "step": 869 }, { "epoch": 0.03611850606386126, "grad_norm": 2.817641496658325, "learning_rate": 9.99902328942801e-06, "loss": 0.646, "step": 870 }, { "epoch": 0.03616002158807259, "grad_norm": 2.9789962768554688, "learning_rate": 9.999009956089603e-06, "loss": 0.4742, "step": 871 }, { "epoch": 0.036201537112283924, "grad_norm": 2.6284406185150146, "learning_rate": 9.998996532367654e-06, "loss": 0.6766, "step": 872 }, { "epoch": 0.03624305263649526, "grad_norm": 2.3107516765594482, "learning_rate": 9.998983018262405e-06, "loss": 0.4006, "step": 873 }, { "epoch": 0.0362845681607066, "grad_norm": 2.6342391967773438, "learning_rate": 9.998969413774104e-06, "loss": 0.4914, "step": 874 }, { "epoch": 0.03632608368491793, "grad_norm": 2.582092761993408, "learning_rate": 9.998955718902996e-06, "loss": 0.644, "step": 875 }, { "epoch": 0.03636759920912926, "grad_norm": 3.1311097145080566, "learning_rate": 9.998941933649326e-06, "loss": 0.6095, "step": 876 }, { "epoch": 0.036409114733340596, "grad_norm": 2.5186638832092285, "learning_rate": 9.998928058013346e-06, "loss": 0.5582, "step": 877 }, { "epoch": 0.036450630257551936, "grad_norm": 2.6961281299591064, "learning_rate": 9.998914091995305e-06, "loss": 0.523, "step": 878 }, { "epoch": 0.03649214578176327, "grad_norm": 3.3397810459136963, "learning_rate": 9.998900035595459e-06, "loss": 0.4754, "step": 879 }, { "epoch": 0.0365336613059746, "grad_norm": 3.0512728691101074, "learning_rate": 9.998885888814056e-06, "loss": 0.5204, "step": 880 }, { "epoch": 0.036575176830185935, "grad_norm": 2.2840476036071777, "learning_rate": 9.998871651651356e-06, "loss": 0.5051, "step": 881 }, { "epoch": 0.036616692354397275, "grad_norm": 2.912152051925659, "learning_rate": 9.998857324107617e-06, "loss": 0.5628, "step": 882 }, { "epoch": 0.03665820787860861, "grad_norm": 3.09948992729187, "learning_rate": 9.998842906183097e-06, "loss": 0.4901, "step": 883 }, { "epoch": 0.03669972340281994, "grad_norm": 2.9974420070648193, "learning_rate": 9.998828397878055e-06, "loss": 0.5061, "step": 884 }, { "epoch": 0.036741238927031274, "grad_norm": 2.6265974044799805, "learning_rate": 9.998813799192756e-06, "loss": 0.5589, "step": 885 }, { "epoch": 0.036782754451242614, "grad_norm": 2.742664337158203, "learning_rate": 9.998799110127462e-06, "loss": 0.7146, "step": 886 }, { "epoch": 0.03682426997545395, "grad_norm": 2.9154489040374756, "learning_rate": 9.998784330682438e-06, "loss": 0.4231, "step": 887 }, { "epoch": 0.03686578549966528, "grad_norm": 3.1469154357910156, "learning_rate": 9.998769460857955e-06, "loss": 0.7651, "step": 888 }, { "epoch": 0.03690730102387661, "grad_norm": 2.612518787384033, "learning_rate": 9.99875450065428e-06, "loss": 0.4387, "step": 889 }, { "epoch": 0.03694881654808795, "grad_norm": 2.5871987342834473, "learning_rate": 9.99873945007168e-06, "loss": 0.5212, "step": 890 }, { "epoch": 0.036990332072299285, "grad_norm": 2.6051852703094482, "learning_rate": 9.99872430911043e-06, "loss": 0.5507, "step": 891 }, { "epoch": 0.03703184759651062, "grad_norm": 2.9346439838409424, "learning_rate": 9.998709077770805e-06, "loss": 0.5129, "step": 892 }, { "epoch": 0.03707336312072196, "grad_norm": 3.1150076389312744, "learning_rate": 9.99869375605308e-06, "loss": 0.5892, "step": 893 }, { "epoch": 0.03711487864493329, "grad_norm": 2.9211390018463135, "learning_rate": 9.998678343957528e-06, "loss": 0.5464, "step": 894 }, { "epoch": 0.037156394169144624, "grad_norm": 2.5961599349975586, "learning_rate": 9.998662841484435e-06, "loss": 0.5126, "step": 895 }, { "epoch": 0.03719790969335596, "grad_norm": 3.1359689235687256, "learning_rate": 9.998647248634073e-06, "loss": 0.6337, "step": 896 }, { "epoch": 0.0372394252175673, "grad_norm": 3.322390556335449, "learning_rate": 9.99863156540673e-06, "loss": 0.5908, "step": 897 }, { "epoch": 0.03728094074177863, "grad_norm": 3.6606481075286865, "learning_rate": 9.998615791802688e-06, "loss": 0.6121, "step": 898 }, { "epoch": 0.03732245626598996, "grad_norm": 2.7647223472595215, "learning_rate": 9.99859992782223e-06, "loss": 0.6001, "step": 899 }, { "epoch": 0.037363971790201296, "grad_norm": 2.847050189971924, "learning_rate": 9.998583973465647e-06, "loss": 0.4745, "step": 900 }, { "epoch": 0.037405487314412636, "grad_norm": 2.680637836456299, "learning_rate": 9.998567928733223e-06, "loss": 0.522, "step": 901 }, { "epoch": 0.03744700283862397, "grad_norm": 2.67802095413208, "learning_rate": 9.99855179362525e-06, "loss": 0.5042, "step": 902 }, { "epoch": 0.0374885183628353, "grad_norm": 3.31915545463562, "learning_rate": 9.99853556814202e-06, "loss": 0.5712, "step": 903 }, { "epoch": 0.037530033887046635, "grad_norm": 2.7169735431671143, "learning_rate": 9.998519252283826e-06, "loss": 0.5939, "step": 904 }, { "epoch": 0.037571549411257975, "grad_norm": 2.8753111362457275, "learning_rate": 9.998502846050966e-06, "loss": 0.5937, "step": 905 }, { "epoch": 0.03761306493546931, "grad_norm": 2.9639508724212646, "learning_rate": 9.998486349443731e-06, "loss": 0.5985, "step": 906 }, { "epoch": 0.03765458045968064, "grad_norm": 2.5194315910339355, "learning_rate": 9.998469762462424e-06, "loss": 0.6187, "step": 907 }, { "epoch": 0.037696095983891974, "grad_norm": 2.890453815460205, "learning_rate": 9.99845308510734e-06, "loss": 0.5313, "step": 908 }, { "epoch": 0.037737611508103314, "grad_norm": 2.6488494873046875, "learning_rate": 9.998436317378786e-06, "loss": 0.5831, "step": 909 }, { "epoch": 0.03777912703231465, "grad_norm": 2.886925458908081, "learning_rate": 9.998419459277062e-06, "loss": 0.4496, "step": 910 }, { "epoch": 0.03782064255652598, "grad_norm": 3.069944143295288, "learning_rate": 9.998402510802475e-06, "loss": 0.6084, "step": 911 }, { "epoch": 0.03786215808073731, "grad_norm": 2.7746047973632812, "learning_rate": 9.998385471955328e-06, "loss": 0.6382, "step": 912 }, { "epoch": 0.03790367360494865, "grad_norm": 3.2183568477630615, "learning_rate": 9.99836834273593e-06, "loss": 0.5599, "step": 913 }, { "epoch": 0.037945189129159985, "grad_norm": 2.9795377254486084, "learning_rate": 9.998351123144594e-06, "loss": 0.3489, "step": 914 }, { "epoch": 0.03798670465337132, "grad_norm": 2.8410840034484863, "learning_rate": 9.998333813181627e-06, "loss": 0.511, "step": 915 }, { "epoch": 0.03802822017758265, "grad_norm": 3.8549933433532715, "learning_rate": 9.998316412847346e-06, "loss": 0.6608, "step": 916 }, { "epoch": 0.03806973570179399, "grad_norm": 2.89497447013855, "learning_rate": 9.998298922142062e-06, "loss": 0.6592, "step": 917 }, { "epoch": 0.038111251226005324, "grad_norm": 3.075900077819824, "learning_rate": 9.998281341066094e-06, "loss": 0.4947, "step": 918 }, { "epoch": 0.03815276675021666, "grad_norm": 2.5499143600463867, "learning_rate": 9.998263669619758e-06, "loss": 0.4957, "step": 919 }, { "epoch": 0.038194282274428, "grad_norm": 3.3848583698272705, "learning_rate": 9.998245907803375e-06, "loss": 0.5791, "step": 920 }, { "epoch": 0.03823579779863933, "grad_norm": 3.115295171737671, "learning_rate": 9.998228055617264e-06, "loss": 0.5462, "step": 921 }, { "epoch": 0.03827731332285066, "grad_norm": 2.291433334350586, "learning_rate": 9.998210113061749e-06, "loss": 0.4519, "step": 922 }, { "epoch": 0.038318828847061996, "grad_norm": 2.8556363582611084, "learning_rate": 9.998192080137155e-06, "loss": 0.5325, "step": 923 }, { "epoch": 0.038360344371273336, "grad_norm": 2.4486565589904785, "learning_rate": 9.99817395684381e-06, "loss": 0.5391, "step": 924 }, { "epoch": 0.03840185989548467, "grad_norm": 2.56258487701416, "learning_rate": 9.998155743182036e-06, "loss": 0.5453, "step": 925 }, { "epoch": 0.038443375419696, "grad_norm": 2.562664270401001, "learning_rate": 9.998137439152166e-06, "loss": 0.506, "step": 926 }, { "epoch": 0.038484890943907335, "grad_norm": 3.520057201385498, "learning_rate": 9.998119044754531e-06, "loss": 0.4958, "step": 927 }, { "epoch": 0.038526406468118675, "grad_norm": 2.9385788440704346, "learning_rate": 9.998100559989463e-06, "loss": 0.5723, "step": 928 }, { "epoch": 0.03856792199233001, "grad_norm": 2.8117828369140625, "learning_rate": 9.998081984857298e-06, "loss": 0.5552, "step": 929 }, { "epoch": 0.03860943751654134, "grad_norm": 2.9145660400390625, "learning_rate": 9.998063319358367e-06, "loss": 0.3852, "step": 930 }, { "epoch": 0.038650953040752674, "grad_norm": 2.781374216079712, "learning_rate": 9.998044563493012e-06, "loss": 0.4197, "step": 931 }, { "epoch": 0.038692468564964014, "grad_norm": 2.5936310291290283, "learning_rate": 9.998025717261572e-06, "loss": 0.4188, "step": 932 }, { "epoch": 0.03873398408917535, "grad_norm": 2.752606153488159, "learning_rate": 9.998006780664385e-06, "loss": 0.5321, "step": 933 }, { "epoch": 0.03877549961338668, "grad_norm": 2.987100124359131, "learning_rate": 9.997987753701795e-06, "loss": 0.4747, "step": 934 }, { "epoch": 0.03881701513759801, "grad_norm": 3.5844380855560303, "learning_rate": 9.997968636374146e-06, "loss": 0.7132, "step": 935 }, { "epoch": 0.03885853066180935, "grad_norm": 2.649580240249634, "learning_rate": 9.997949428681784e-06, "loss": 0.464, "step": 936 }, { "epoch": 0.038900046186020686, "grad_norm": 2.4475507736206055, "learning_rate": 9.997930130625054e-06, "loss": 0.5338, "step": 937 }, { "epoch": 0.03894156171023202, "grad_norm": 2.768909215927124, "learning_rate": 9.99791074220431e-06, "loss": 0.4849, "step": 938 }, { "epoch": 0.03898307723444335, "grad_norm": 3.1503021717071533, "learning_rate": 9.997891263419896e-06, "loss": 0.5268, "step": 939 }, { "epoch": 0.03902459275865469, "grad_norm": 3.248183012008667, "learning_rate": 9.997871694272168e-06, "loss": 0.673, "step": 940 }, { "epoch": 0.039066108282866024, "grad_norm": 3.1626455783843994, "learning_rate": 9.99785203476148e-06, "loss": 0.4806, "step": 941 }, { "epoch": 0.03910762380707736, "grad_norm": 2.5253818035125732, "learning_rate": 9.997832284888185e-06, "loss": 0.5546, "step": 942 }, { "epoch": 0.03914913933128869, "grad_norm": 2.4835472106933594, "learning_rate": 9.997812444652643e-06, "loss": 0.4372, "step": 943 }, { "epoch": 0.03919065485550003, "grad_norm": 2.82613205909729, "learning_rate": 9.997792514055213e-06, "loss": 0.5633, "step": 944 }, { "epoch": 0.03923217037971136, "grad_norm": 2.4681808948516846, "learning_rate": 9.997772493096252e-06, "loss": 0.456, "step": 945 }, { "epoch": 0.039273685903922696, "grad_norm": 2.6633126735687256, "learning_rate": 9.997752381776125e-06, "loss": 0.5567, "step": 946 }, { "epoch": 0.039315201428134036, "grad_norm": 3.0769736766815186, "learning_rate": 9.997732180095193e-06, "loss": 0.5529, "step": 947 }, { "epoch": 0.03935671695234537, "grad_norm": 2.924543619155884, "learning_rate": 9.997711888053824e-06, "loss": 0.5488, "step": 948 }, { "epoch": 0.0393982324765567, "grad_norm": 2.823460817337036, "learning_rate": 9.997691505652382e-06, "loss": 0.5611, "step": 949 }, { "epoch": 0.039439748000768035, "grad_norm": 3.0030012130737305, "learning_rate": 9.997671032891238e-06, "loss": 0.4828, "step": 950 }, { "epoch": 0.039481263524979375, "grad_norm": 7.071869373321533, "learning_rate": 9.997650469770762e-06, "loss": 0.4448, "step": 951 }, { "epoch": 0.03952277904919071, "grad_norm": 2.723505735397339, "learning_rate": 9.997629816291325e-06, "loss": 0.4433, "step": 952 }, { "epoch": 0.03956429457340204, "grad_norm": 2.4052743911743164, "learning_rate": 9.9976090724533e-06, "loss": 0.45, "step": 953 }, { "epoch": 0.039605810097613374, "grad_norm": 2.861773729324341, "learning_rate": 9.99758823825706e-06, "loss": 0.6417, "step": 954 }, { "epoch": 0.039647325621824714, "grad_norm": 3.3252198696136475, "learning_rate": 9.997567313702987e-06, "loss": 0.6442, "step": 955 }, { "epoch": 0.03968884114603605, "grad_norm": 2.963974952697754, "learning_rate": 9.997546298791456e-06, "loss": 0.4986, "step": 956 }, { "epoch": 0.03973035667024738, "grad_norm": 2.5968472957611084, "learning_rate": 9.997525193522848e-06, "loss": 0.5163, "step": 957 }, { "epoch": 0.03977187219445871, "grad_norm": 2.399275064468384, "learning_rate": 9.997503997897544e-06, "loss": 0.5618, "step": 958 }, { "epoch": 0.03981338771867005, "grad_norm": 3.2715041637420654, "learning_rate": 9.997482711915926e-06, "loss": 0.539, "step": 959 }, { "epoch": 0.039854903242881386, "grad_norm": 3.0863447189331055, "learning_rate": 9.99746133557838e-06, "loss": 0.3877, "step": 960 }, { "epoch": 0.03989641876709272, "grad_norm": 2.97116756439209, "learning_rate": 9.997439868885295e-06, "loss": 0.4651, "step": 961 }, { "epoch": 0.03993793429130405, "grad_norm": 2.3893773555755615, "learning_rate": 9.997418311837054e-06, "loss": 0.509, "step": 962 }, { "epoch": 0.03997944981551539, "grad_norm": 2.594224691390991, "learning_rate": 9.997396664434051e-06, "loss": 0.4568, "step": 963 }, { "epoch": 0.040020965339726725, "grad_norm": 2.85867977142334, "learning_rate": 9.997374926676678e-06, "loss": 0.6491, "step": 964 }, { "epoch": 0.04006248086393806, "grad_norm": 2.7007884979248047, "learning_rate": 9.997353098565323e-06, "loss": 0.5572, "step": 965 }, { "epoch": 0.04010399638814939, "grad_norm": 2.909611940383911, "learning_rate": 9.997331180100384e-06, "loss": 0.6711, "step": 966 }, { "epoch": 0.04014551191236073, "grad_norm": 3.1442079544067383, "learning_rate": 9.997309171282257e-06, "loss": 0.4947, "step": 967 }, { "epoch": 0.04018702743657206, "grad_norm": 3.679658889770508, "learning_rate": 9.99728707211134e-06, "loss": 0.4738, "step": 968 }, { "epoch": 0.040228542960783396, "grad_norm": 4.051383972167969, "learning_rate": 9.997264882588031e-06, "loss": 0.5675, "step": 969 }, { "epoch": 0.04027005848499473, "grad_norm": 2.9157958030700684, "learning_rate": 9.997242602712736e-06, "loss": 0.4824, "step": 970 }, { "epoch": 0.04031157400920607, "grad_norm": 4.502997875213623, "learning_rate": 9.99722023248585e-06, "loss": 0.5254, "step": 971 }, { "epoch": 0.0403530895334174, "grad_norm": 3.55525803565979, "learning_rate": 9.997197771907786e-06, "loss": 0.6492, "step": 972 }, { "epoch": 0.040394605057628735, "grad_norm": 2.929495096206665, "learning_rate": 9.997175220978942e-06, "loss": 0.6611, "step": 973 }, { "epoch": 0.040436120581840075, "grad_norm": 2.6801583766937256, "learning_rate": 9.997152579699732e-06, "loss": 0.642, "step": 974 }, { "epoch": 0.04047763610605141, "grad_norm": 2.7407824993133545, "learning_rate": 9.997129848070563e-06, "loss": 0.5094, "step": 975 }, { "epoch": 0.04051915163026274, "grad_norm": 3.01904034614563, "learning_rate": 9.997107026091846e-06, "loss": 0.6725, "step": 976 }, { "epoch": 0.040560667154474074, "grad_norm": 3.0786235332489014, "learning_rate": 9.997084113763993e-06, "loss": 0.599, "step": 977 }, { "epoch": 0.040602182678685414, "grad_norm": 2.3971776962280273, "learning_rate": 9.99706111108742e-06, "loss": 0.5571, "step": 978 }, { "epoch": 0.04064369820289675, "grad_norm": 2.829338312149048, "learning_rate": 9.997038018062542e-06, "loss": 0.5586, "step": 979 }, { "epoch": 0.04068521372710808, "grad_norm": 2.8137617111206055, "learning_rate": 9.997014834689774e-06, "loss": 0.512, "step": 980 }, { "epoch": 0.04072672925131941, "grad_norm": 2.9036576747894287, "learning_rate": 9.99699156096954e-06, "loss": 0.5898, "step": 981 }, { "epoch": 0.04076824477553075, "grad_norm": 2.874208688735962, "learning_rate": 9.996968196902257e-06, "loss": 0.6652, "step": 982 }, { "epoch": 0.040809760299742086, "grad_norm": 2.8487746715545654, "learning_rate": 9.996944742488347e-06, "loss": 0.5247, "step": 983 }, { "epoch": 0.04085127582395342, "grad_norm": 2.344898223876953, "learning_rate": 9.996921197728239e-06, "loss": 0.4729, "step": 984 }, { "epoch": 0.04089279134816475, "grad_norm": 3.4011337757110596, "learning_rate": 9.996897562622354e-06, "loss": 0.6073, "step": 985 }, { "epoch": 0.04093430687237609, "grad_norm": 2.59018611907959, "learning_rate": 9.99687383717112e-06, "loss": 0.5996, "step": 986 }, { "epoch": 0.040975822396587425, "grad_norm": 2.5743625164031982, "learning_rate": 9.996850021374969e-06, "loss": 0.4289, "step": 987 }, { "epoch": 0.04101733792079876, "grad_norm": 3.3543591499328613, "learning_rate": 9.996826115234328e-06, "loss": 0.58, "step": 988 }, { "epoch": 0.04105885344501009, "grad_norm": 2.8088245391845703, "learning_rate": 9.996802118749631e-06, "loss": 0.4947, "step": 989 }, { "epoch": 0.04110036896922143, "grad_norm": 2.789224624633789, "learning_rate": 9.996778031921311e-06, "loss": 0.5529, "step": 990 }, { "epoch": 0.041141884493432763, "grad_norm": 2.6044557094573975, "learning_rate": 9.996753854749804e-06, "loss": 0.5217, "step": 991 }, { "epoch": 0.041183400017644096, "grad_norm": 2.4929680824279785, "learning_rate": 9.996729587235548e-06, "loss": 0.5571, "step": 992 }, { "epoch": 0.04122491554185543, "grad_norm": 2.1175525188446045, "learning_rate": 9.99670522937898e-06, "loss": 0.4623, "step": 993 }, { "epoch": 0.04126643106606677, "grad_norm": 2.6349244117736816, "learning_rate": 9.99668078118054e-06, "loss": 0.5435, "step": 994 }, { "epoch": 0.0413079465902781, "grad_norm": 2.9540674686431885, "learning_rate": 9.996656242640674e-06, "loss": 0.426, "step": 995 }, { "epoch": 0.041349462114489435, "grad_norm": 2.8687143325805664, "learning_rate": 9.996631613759821e-06, "loss": 0.4836, "step": 996 }, { "epoch": 0.04139097763870077, "grad_norm": 3.0379931926727295, "learning_rate": 9.996606894538427e-06, "loss": 0.5768, "step": 997 }, { "epoch": 0.04143249316291211, "grad_norm": 2.710510015487671, "learning_rate": 9.996582084976943e-06, "loss": 0.5176, "step": 998 }, { "epoch": 0.04147400868712344, "grad_norm": 2.5375537872314453, "learning_rate": 9.996557185075816e-06, "loss": 0.4098, "step": 999 }, { "epoch": 0.041515524211334774, "grad_norm": 3.3991522789001465, "learning_rate": 9.996532194835492e-06, "loss": 0.6433, "step": 1000 }, { "epoch": 0.041557039735546114, "grad_norm": 3.2447011470794678, "learning_rate": 9.996507114256424e-06, "loss": 0.4675, "step": 1001 }, { "epoch": 0.04159855525975745, "grad_norm": 2.788660764694214, "learning_rate": 9.99648194333907e-06, "loss": 0.4677, "step": 1002 }, { "epoch": 0.04164007078396878, "grad_norm": 3.4285433292388916, "learning_rate": 9.996456682083883e-06, "loss": 0.4388, "step": 1003 }, { "epoch": 0.04168158630818011, "grad_norm": 2.2345452308654785, "learning_rate": 9.996431330491317e-06, "loss": 0.4454, "step": 1004 }, { "epoch": 0.04172310183239145, "grad_norm": 2.151315450668335, "learning_rate": 9.996405888561833e-06, "loss": 0.2984, "step": 1005 }, { "epoch": 0.041764617356602786, "grad_norm": 3.2773241996765137, "learning_rate": 9.996380356295891e-06, "loss": 0.5531, "step": 1006 }, { "epoch": 0.04180613288081412, "grad_norm": 3.0632781982421875, "learning_rate": 9.996354733693953e-06, "loss": 0.5593, "step": 1007 }, { "epoch": 0.04184764840502545, "grad_norm": 2.9946324825286865, "learning_rate": 9.996329020756478e-06, "loss": 0.5572, "step": 1008 }, { "epoch": 0.04188916392923679, "grad_norm": 3.2798757553100586, "learning_rate": 9.996303217483938e-06, "loss": 0.7155, "step": 1009 }, { "epoch": 0.041930679453448125, "grad_norm": 2.6735239028930664, "learning_rate": 9.996277323876795e-06, "loss": 0.5379, "step": 1010 }, { "epoch": 0.04197219497765946, "grad_norm": 2.948888063430786, "learning_rate": 9.996251339935517e-06, "loss": 0.5572, "step": 1011 }, { "epoch": 0.04201371050187079, "grad_norm": 3.005378007888794, "learning_rate": 9.996225265660574e-06, "loss": 0.6128, "step": 1012 }, { "epoch": 0.04205522602608213, "grad_norm": 2.7316746711730957, "learning_rate": 9.996199101052438e-06, "loss": 0.6886, "step": 1013 }, { "epoch": 0.042096741550293464, "grad_norm": 2.9516987800598145, "learning_rate": 9.996172846111583e-06, "loss": 0.4982, "step": 1014 }, { "epoch": 0.0421382570745048, "grad_norm": 2.5710506439208984, "learning_rate": 9.996146500838484e-06, "loss": 0.6193, "step": 1015 }, { "epoch": 0.04217977259871613, "grad_norm": 2.78690767288208, "learning_rate": 9.996120065233614e-06, "loss": 0.5201, "step": 1016 }, { "epoch": 0.04222128812292747, "grad_norm": 2.9768030643463135, "learning_rate": 9.996093539297456e-06, "loss": 0.4335, "step": 1017 }, { "epoch": 0.0422628036471388, "grad_norm": 3.107349395751953, "learning_rate": 9.996066923030484e-06, "loss": 0.3724, "step": 1018 }, { "epoch": 0.042304319171350135, "grad_norm": 2.7963263988494873, "learning_rate": 9.996040216433183e-06, "loss": 0.5902, "step": 1019 }, { "epoch": 0.04234583469556147, "grad_norm": 2.849611282348633, "learning_rate": 9.996013419506035e-06, "loss": 0.3606, "step": 1020 }, { "epoch": 0.04238735021977281, "grad_norm": 2.483672618865967, "learning_rate": 9.995986532249524e-06, "loss": 0.4661, "step": 1021 }, { "epoch": 0.04242886574398414, "grad_norm": 2.851928234100342, "learning_rate": 9.995959554664137e-06, "loss": 0.6446, "step": 1022 }, { "epoch": 0.042470381268195474, "grad_norm": 2.58646821975708, "learning_rate": 9.99593248675036e-06, "loss": 0.6255, "step": 1023 }, { "epoch": 0.04251189679240681, "grad_norm": 2.879540205001831, "learning_rate": 9.995905328508687e-06, "loss": 0.4222, "step": 1024 }, { "epoch": 0.04255341231661815, "grad_norm": 2.817063570022583, "learning_rate": 9.995878079939602e-06, "loss": 0.7472, "step": 1025 }, { "epoch": 0.04259492784082948, "grad_norm": 2.697528839111328, "learning_rate": 9.995850741043603e-06, "loss": 0.5691, "step": 1026 }, { "epoch": 0.04263644336504081, "grad_norm": 2.983365774154663, "learning_rate": 9.995823311821183e-06, "loss": 0.5963, "step": 1027 }, { "epoch": 0.04267795888925215, "grad_norm": 2.9163360595703125, "learning_rate": 9.995795792272836e-06, "loss": 0.5343, "step": 1028 }, { "epoch": 0.042719474413463486, "grad_norm": 3.381298303604126, "learning_rate": 9.995768182399063e-06, "loss": 0.5972, "step": 1029 }, { "epoch": 0.04276098993767482, "grad_norm": 3.001899003982544, "learning_rate": 9.99574048220036e-06, "loss": 0.4767, "step": 1030 }, { "epoch": 0.04280250546188615, "grad_norm": 2.7015507221221924, "learning_rate": 9.995712691677231e-06, "loss": 0.5866, "step": 1031 }, { "epoch": 0.04284402098609749, "grad_norm": 2.9834046363830566, "learning_rate": 9.995684810830174e-06, "loss": 0.5555, "step": 1032 }, { "epoch": 0.042885536510308825, "grad_norm": 2.517461061477661, "learning_rate": 9.995656839659698e-06, "loss": 0.4311, "step": 1033 }, { "epoch": 0.04292705203452016, "grad_norm": 2.493016481399536, "learning_rate": 9.995628778166304e-06, "loss": 0.6445, "step": 1034 }, { "epoch": 0.04296856755873149, "grad_norm": 5.096752643585205, "learning_rate": 9.995600626350503e-06, "loss": 0.4997, "step": 1035 }, { "epoch": 0.04301008308294283, "grad_norm": 2.722102165222168, "learning_rate": 9.995572384212805e-06, "loss": 0.4927, "step": 1036 }, { "epoch": 0.043051598607154164, "grad_norm": 3.2529489994049072, "learning_rate": 9.995544051753715e-06, "loss": 0.5018, "step": 1037 }, { "epoch": 0.0430931141313655, "grad_norm": 2.6411588191986084, "learning_rate": 9.995515628973751e-06, "loss": 0.4976, "step": 1038 }, { "epoch": 0.04313462965557683, "grad_norm": 2.952420473098755, "learning_rate": 9.995487115873423e-06, "loss": 0.6588, "step": 1039 }, { "epoch": 0.04317614517978817, "grad_norm": 3.125514268875122, "learning_rate": 9.995458512453249e-06, "loss": 0.5661, "step": 1040 }, { "epoch": 0.0432176607039995, "grad_norm": 2.7146685123443604, "learning_rate": 9.995429818713744e-06, "loss": 0.5302, "step": 1041 }, { "epoch": 0.043259176228210836, "grad_norm": 2.8111813068389893, "learning_rate": 9.99540103465543e-06, "loss": 0.6621, "step": 1042 }, { "epoch": 0.04330069175242217, "grad_norm": 2.875852346420288, "learning_rate": 9.995372160278824e-06, "loss": 0.6199, "step": 1043 }, { "epoch": 0.04334220727663351, "grad_norm": 2.8291513919830322, "learning_rate": 9.995343195584451e-06, "loss": 0.4873, "step": 1044 }, { "epoch": 0.04338372280084484, "grad_norm": 2.6204450130462646, "learning_rate": 9.995314140572831e-06, "loss": 0.4662, "step": 1045 }, { "epoch": 0.043425238325056174, "grad_norm": 2.8203415870666504, "learning_rate": 9.995284995244492e-06, "loss": 0.4281, "step": 1046 }, { "epoch": 0.04346675384926751, "grad_norm": 3.164187431335449, "learning_rate": 9.995255759599963e-06, "loss": 0.6201, "step": 1047 }, { "epoch": 0.04350826937347885, "grad_norm": 2.4061825275421143, "learning_rate": 9.995226433639767e-06, "loss": 0.6052, "step": 1048 }, { "epoch": 0.04354978489769018, "grad_norm": 2.9828691482543945, "learning_rate": 9.99519701736444e-06, "loss": 0.6286, "step": 1049 }, { "epoch": 0.04359130042190151, "grad_norm": 2.841949462890625, "learning_rate": 9.995167510774508e-06, "loss": 0.5557, "step": 1050 }, { "epoch": 0.043632815946112846, "grad_norm": 2.6579980850219727, "learning_rate": 9.99513791387051e-06, "loss": 0.4751, "step": 1051 }, { "epoch": 0.043674331470324186, "grad_norm": 2.8134124279022217, "learning_rate": 9.995108226652978e-06, "loss": 0.5211, "step": 1052 }, { "epoch": 0.04371584699453552, "grad_norm": 2.8215889930725098, "learning_rate": 9.99507844912245e-06, "loss": 0.6793, "step": 1053 }, { "epoch": 0.04375736251874685, "grad_norm": 2.497373342514038, "learning_rate": 9.995048581279463e-06, "loss": 0.5514, "step": 1054 }, { "epoch": 0.04379887804295819, "grad_norm": 2.7092576026916504, "learning_rate": 9.995018623124559e-06, "loss": 0.5994, "step": 1055 }, { "epoch": 0.043840393567169525, "grad_norm": 2.183279514312744, "learning_rate": 9.994988574658277e-06, "loss": 0.3713, "step": 1056 }, { "epoch": 0.04388190909138086, "grad_norm": 2.892103672027588, "learning_rate": 9.994958435881164e-06, "loss": 0.5866, "step": 1057 }, { "epoch": 0.04392342461559219, "grad_norm": 3.49843692779541, "learning_rate": 9.994928206793761e-06, "loss": 0.6471, "step": 1058 }, { "epoch": 0.04396494013980353, "grad_norm": 2.7478654384613037, "learning_rate": 9.994897887396617e-06, "loss": 0.5491, "step": 1059 }, { "epoch": 0.044006455664014864, "grad_norm": 2.3897085189819336, "learning_rate": 9.99486747769028e-06, "loss": 0.537, "step": 1060 }, { "epoch": 0.0440479711882262, "grad_norm": 3.4798948764801025, "learning_rate": 9.994836977675299e-06, "loss": 0.5627, "step": 1061 }, { "epoch": 0.04408948671243753, "grad_norm": 2.605233669281006, "learning_rate": 9.994806387352227e-06, "loss": 0.437, "step": 1062 }, { "epoch": 0.04413100223664887, "grad_norm": 2.7061853408813477, "learning_rate": 9.994775706721614e-06, "loss": 0.369, "step": 1063 }, { "epoch": 0.0441725177608602, "grad_norm": 2.8385233879089355, "learning_rate": 9.994744935784017e-06, "loss": 0.4259, "step": 1064 }, { "epoch": 0.044214033285071536, "grad_norm": 2.9447131156921387, "learning_rate": 9.994714074539991e-06, "loss": 0.5869, "step": 1065 }, { "epoch": 0.04425554880928287, "grad_norm": 2.454296350479126, "learning_rate": 9.994683122990097e-06, "loss": 0.4596, "step": 1066 }, { "epoch": 0.04429706433349421, "grad_norm": 2.8988678455352783, "learning_rate": 9.99465208113489e-06, "loss": 0.6021, "step": 1067 }, { "epoch": 0.04433857985770554, "grad_norm": 2.412776231765747, "learning_rate": 9.994620948974935e-06, "loss": 0.3963, "step": 1068 }, { "epoch": 0.044380095381916874, "grad_norm": 2.3591129779815674, "learning_rate": 9.994589726510795e-06, "loss": 0.4146, "step": 1069 }, { "epoch": 0.04442161090612821, "grad_norm": 2.4012982845306396, "learning_rate": 9.994558413743032e-06, "loss": 0.4887, "step": 1070 }, { "epoch": 0.04446312643033955, "grad_norm": 2.598647117614746, "learning_rate": 9.994527010672213e-06, "loss": 0.5139, "step": 1071 }, { "epoch": 0.04450464195455088, "grad_norm": 2.5224101543426514, "learning_rate": 9.994495517298906e-06, "loss": 0.5861, "step": 1072 }, { "epoch": 0.04454615747876221, "grad_norm": 2.418461799621582, "learning_rate": 9.994463933623681e-06, "loss": 0.5879, "step": 1073 }, { "epoch": 0.044587673002973546, "grad_norm": 2.9496724605560303, "learning_rate": 9.99443225964711e-06, "loss": 0.5079, "step": 1074 }, { "epoch": 0.044629188527184886, "grad_norm": 3.122380256652832, "learning_rate": 9.99440049536976e-06, "loss": 0.5162, "step": 1075 }, { "epoch": 0.04467070405139622, "grad_norm": 2.420039653778076, "learning_rate": 9.994368640792213e-06, "loss": 0.4757, "step": 1076 }, { "epoch": 0.04471221957560755, "grad_norm": 2.6272783279418945, "learning_rate": 9.994336695915041e-06, "loss": 0.5274, "step": 1077 }, { "epoch": 0.044753735099818885, "grad_norm": 2.28511118888855, "learning_rate": 9.99430466073882e-06, "loss": 0.4142, "step": 1078 }, { "epoch": 0.044795250624030225, "grad_norm": 2.668351888656616, "learning_rate": 9.994272535264133e-06, "loss": 0.6225, "step": 1079 }, { "epoch": 0.04483676614824156, "grad_norm": 3.3809661865234375, "learning_rate": 9.994240319491559e-06, "loss": 0.5794, "step": 1080 }, { "epoch": 0.04487828167245289, "grad_norm": 3.228276491165161, "learning_rate": 9.994208013421681e-06, "loss": 0.4729, "step": 1081 }, { "epoch": 0.04491979719666423, "grad_norm": 2.284912109375, "learning_rate": 9.994175617055082e-06, "loss": 0.5894, "step": 1082 }, { "epoch": 0.044961312720875564, "grad_norm": 2.877546548843384, "learning_rate": 9.99414313039235e-06, "loss": 0.6163, "step": 1083 }, { "epoch": 0.0450028282450869, "grad_norm": 2.596768379211426, "learning_rate": 9.994110553434067e-06, "loss": 0.5333, "step": 1084 }, { "epoch": 0.04504434376929823, "grad_norm": 2.2824313640594482, "learning_rate": 9.994077886180828e-06, "loss": 0.5533, "step": 1085 }, { "epoch": 0.04508585929350957, "grad_norm": 3.727182149887085, "learning_rate": 9.994045128633221e-06, "loss": 0.5418, "step": 1086 }, { "epoch": 0.0451273748177209, "grad_norm": 2.7216126918792725, "learning_rate": 9.994012280791838e-06, "loss": 0.6629, "step": 1087 }, { "epoch": 0.045168890341932236, "grad_norm": 3.323674440383911, "learning_rate": 9.993979342657276e-06, "loss": 0.5468, "step": 1088 }, { "epoch": 0.04521040586614357, "grad_norm": 2.536961555480957, "learning_rate": 9.993946314230125e-06, "loss": 0.4344, "step": 1089 }, { "epoch": 0.04525192139035491, "grad_norm": 2.5930862426757812, "learning_rate": 9.993913195510988e-06, "loss": 0.4004, "step": 1090 }, { "epoch": 0.04529343691456624, "grad_norm": 2.3813161849975586, "learning_rate": 9.99387998650046e-06, "loss": 0.4173, "step": 1091 }, { "epoch": 0.045334952438777575, "grad_norm": 2.8440968990325928, "learning_rate": 9.99384668719914e-06, "loss": 0.45, "step": 1092 }, { "epoch": 0.04537646796298891, "grad_norm": 2.4720704555511475, "learning_rate": 9.993813297607635e-06, "loss": 0.5193, "step": 1093 }, { "epoch": 0.04541798348720025, "grad_norm": 3.4085328578948975, "learning_rate": 9.993779817726546e-06, "loss": 0.4488, "step": 1094 }, { "epoch": 0.04545949901141158, "grad_norm": 3.284459352493286, "learning_rate": 9.993746247556478e-06, "loss": 0.5571, "step": 1095 }, { "epoch": 0.04550101453562291, "grad_norm": 2.5782415866851807, "learning_rate": 9.99371258709804e-06, "loss": 0.4006, "step": 1096 }, { "epoch": 0.045542530059834246, "grad_norm": 2.6739635467529297, "learning_rate": 9.993678836351837e-06, "loss": 0.4035, "step": 1097 }, { "epoch": 0.045584045584045586, "grad_norm": 2.806891918182373, "learning_rate": 9.993644995318483e-06, "loss": 0.5519, "step": 1098 }, { "epoch": 0.04562556110825692, "grad_norm": 2.375427484512329, "learning_rate": 9.993611063998588e-06, "loss": 0.6253, "step": 1099 }, { "epoch": 0.04566707663246825, "grad_norm": 2.7560150623321533, "learning_rate": 9.993577042392763e-06, "loss": 0.6944, "step": 1100 }, { "epoch": 0.045708592156679585, "grad_norm": 2.901536464691162, "learning_rate": 9.993542930501629e-06, "loss": 0.5617, "step": 1101 }, { "epoch": 0.045750107680890925, "grad_norm": 2.5173161029815674, "learning_rate": 9.993508728325798e-06, "loss": 0.5466, "step": 1102 }, { "epoch": 0.04579162320510226, "grad_norm": 2.605506181716919, "learning_rate": 9.993474435865891e-06, "loss": 0.4086, "step": 1103 }, { "epoch": 0.04583313872931359, "grad_norm": 2.8991124629974365, "learning_rate": 9.993440053122524e-06, "loss": 0.5345, "step": 1104 }, { "epoch": 0.045874654253524924, "grad_norm": 2.966907024383545, "learning_rate": 9.993405580096325e-06, "loss": 0.5275, "step": 1105 }, { "epoch": 0.045916169777736264, "grad_norm": 2.8092575073242188, "learning_rate": 9.993371016787912e-06, "loss": 0.5298, "step": 1106 }, { "epoch": 0.0459576853019476, "grad_norm": 2.5013625621795654, "learning_rate": 9.993336363197912e-06, "loss": 0.4619, "step": 1107 }, { "epoch": 0.04599920082615893, "grad_norm": 2.9406676292419434, "learning_rate": 9.993301619326953e-06, "loss": 0.5402, "step": 1108 }, { "epoch": 0.04604071635037027, "grad_norm": 2.9033191204071045, "learning_rate": 9.99326678517566e-06, "loss": 0.4528, "step": 1109 }, { "epoch": 0.0460822318745816, "grad_norm": 2.470456838607788, "learning_rate": 9.993231860744665e-06, "loss": 0.4982, "step": 1110 }, { "epoch": 0.046123747398792936, "grad_norm": 3.1718525886535645, "learning_rate": 9.993196846034599e-06, "loss": 0.5851, "step": 1111 }, { "epoch": 0.04616526292300427, "grad_norm": 3.3253936767578125, "learning_rate": 9.993161741046094e-06, "loss": 0.675, "step": 1112 }, { "epoch": 0.04620677844721561, "grad_norm": 2.5919694900512695, "learning_rate": 9.993126545779785e-06, "loss": 0.4496, "step": 1113 }, { "epoch": 0.04624829397142694, "grad_norm": 2.305966377258301, "learning_rate": 9.993091260236309e-06, "loss": 0.4957, "step": 1114 }, { "epoch": 0.046289809495638275, "grad_norm": 3.4109058380126953, "learning_rate": 9.993055884416304e-06, "loss": 0.4422, "step": 1115 }, { "epoch": 0.04633132501984961, "grad_norm": 3.242922782897949, "learning_rate": 9.99302041832041e-06, "loss": 0.5573, "step": 1116 }, { "epoch": 0.04637284054406095, "grad_norm": 2.7396750450134277, "learning_rate": 9.992984861949267e-06, "loss": 0.6266, "step": 1117 }, { "epoch": 0.04641435606827228, "grad_norm": 2.8085153102874756, "learning_rate": 9.99294921530352e-06, "loss": 0.7546, "step": 1118 }, { "epoch": 0.046455871592483614, "grad_norm": 2.7615902423858643, "learning_rate": 9.99291347838381e-06, "loss": 0.489, "step": 1119 }, { "epoch": 0.046497387116694947, "grad_norm": 2.8158106803894043, "learning_rate": 9.992877651190787e-06, "loss": 0.6525, "step": 1120 }, { "epoch": 0.046538902640906286, "grad_norm": 3.6974241733551025, "learning_rate": 9.992841733725097e-06, "loss": 0.6174, "step": 1121 }, { "epoch": 0.04658041816511762, "grad_norm": 3.137939453125, "learning_rate": 9.992805725987388e-06, "loss": 0.5668, "step": 1122 }, { "epoch": 0.04662193368932895, "grad_norm": 2.7338132858276367, "learning_rate": 9.992769627978313e-06, "loss": 0.5267, "step": 1123 }, { "epoch": 0.046663449213540285, "grad_norm": 2.591792345046997, "learning_rate": 9.992733439698523e-06, "loss": 0.5251, "step": 1124 }, { "epoch": 0.046704964737751625, "grad_norm": 2.595224380493164, "learning_rate": 9.992697161148674e-06, "loss": 0.4647, "step": 1125 }, { "epoch": 0.04674648026196296, "grad_norm": 2.8020288944244385, "learning_rate": 9.992660792329422e-06, "loss": 0.4574, "step": 1126 }, { "epoch": 0.04678799578617429, "grad_norm": 2.5582401752471924, "learning_rate": 9.992624333241425e-06, "loss": 0.5719, "step": 1127 }, { "epoch": 0.046829511310385624, "grad_norm": 2.8703503608703613, "learning_rate": 9.99258778388534e-06, "loss": 0.5508, "step": 1128 }, { "epoch": 0.046871026834596964, "grad_norm": 2.302812337875366, "learning_rate": 9.992551144261828e-06, "loss": 0.4319, "step": 1129 }, { "epoch": 0.0469125423588083, "grad_norm": 2.6072142124176025, "learning_rate": 9.992514414371553e-06, "loss": 0.517, "step": 1130 }, { "epoch": 0.04695405788301963, "grad_norm": 2.6455979347229004, "learning_rate": 9.992477594215178e-06, "loss": 0.4846, "step": 1131 }, { "epoch": 0.04699557340723096, "grad_norm": 3.0354514122009277, "learning_rate": 9.992440683793368e-06, "loss": 0.5764, "step": 1132 }, { "epoch": 0.0470370889314423, "grad_norm": 2.662092685699463, "learning_rate": 9.992403683106792e-06, "loss": 0.5176, "step": 1133 }, { "epoch": 0.047078604455653636, "grad_norm": 2.450716495513916, "learning_rate": 9.99236659215612e-06, "loss": 0.5714, "step": 1134 }, { "epoch": 0.04712011997986497, "grad_norm": 3.9453632831573486, "learning_rate": 9.99232941094202e-06, "loss": 0.4744, "step": 1135 }, { "epoch": 0.04716163550407631, "grad_norm": 2.6831114292144775, "learning_rate": 9.992292139465166e-06, "loss": 0.2936, "step": 1136 }, { "epoch": 0.04720315102828764, "grad_norm": 2.287010431289673, "learning_rate": 9.992254777726231e-06, "loss": 0.4956, "step": 1137 }, { "epoch": 0.047244666552498975, "grad_norm": 2.7011685371398926, "learning_rate": 9.99221732572589e-06, "loss": 0.5396, "step": 1138 }, { "epoch": 0.04728618207671031, "grad_norm": 2.705427408218384, "learning_rate": 9.992179783464822e-06, "loss": 0.6155, "step": 1139 }, { "epoch": 0.04732769760092165, "grad_norm": 2.8967642784118652, "learning_rate": 9.992142150943703e-06, "loss": 0.6186, "step": 1140 }, { "epoch": 0.04736921312513298, "grad_norm": 2.849891185760498, "learning_rate": 9.992104428163216e-06, "loss": 0.4722, "step": 1141 }, { "epoch": 0.047410728649344314, "grad_norm": 2.722501516342163, "learning_rate": 9.992066615124041e-06, "loss": 0.4966, "step": 1142 }, { "epoch": 0.04745224417355565, "grad_norm": 2.8574936389923096, "learning_rate": 9.992028711826865e-06, "loss": 0.6925, "step": 1143 }, { "epoch": 0.047493759697766987, "grad_norm": 2.462158679962158, "learning_rate": 9.991990718272368e-06, "loss": 0.4516, "step": 1144 }, { "epoch": 0.04753527522197832, "grad_norm": 2.533472776412964, "learning_rate": 9.991952634461243e-06, "loss": 0.5245, "step": 1145 }, { "epoch": 0.04757679074618965, "grad_norm": 2.6738412380218506, "learning_rate": 9.991914460394174e-06, "loss": 0.6024, "step": 1146 }, { "epoch": 0.047618306270400985, "grad_norm": 2.919407606124878, "learning_rate": 9.991876196071852e-06, "loss": 0.5468, "step": 1147 }, { "epoch": 0.047659821794612325, "grad_norm": 2.2184770107269287, "learning_rate": 9.99183784149497e-06, "loss": 0.4597, "step": 1148 }, { "epoch": 0.04770133731882366, "grad_norm": 2.838475227355957, "learning_rate": 9.991799396664223e-06, "loss": 0.4419, "step": 1149 }, { "epoch": 0.04774285284303499, "grad_norm": 2.559274196624756, "learning_rate": 9.991760861580301e-06, "loss": 0.4278, "step": 1150 }, { "epoch": 0.047784368367246324, "grad_norm": 2.892815113067627, "learning_rate": 9.991722236243908e-06, "loss": 0.4903, "step": 1151 }, { "epoch": 0.047825883891457664, "grad_norm": 3.5596072673797607, "learning_rate": 9.991683520655735e-06, "loss": 0.5164, "step": 1152 }, { "epoch": 0.047867399415669, "grad_norm": 2.723719835281372, "learning_rate": 9.991644714816486e-06, "loss": 0.469, "step": 1153 }, { "epoch": 0.04790891493988033, "grad_norm": 3.1803784370422363, "learning_rate": 9.991605818726861e-06, "loss": 0.5465, "step": 1154 }, { "epoch": 0.04795043046409166, "grad_norm": 3.14420485496521, "learning_rate": 9.991566832387564e-06, "loss": 0.4297, "step": 1155 }, { "epoch": 0.047991945988303, "grad_norm": 2.911708116531372, "learning_rate": 9.991527755799303e-06, "loss": 0.385, "step": 1156 }, { "epoch": 0.048033461512514336, "grad_norm": 3.205512285232544, "learning_rate": 9.991488588962777e-06, "loss": 0.6583, "step": 1157 }, { "epoch": 0.04807497703672567, "grad_norm": 2.6129961013793945, "learning_rate": 9.991449331878701e-06, "loss": 0.5916, "step": 1158 }, { "epoch": 0.048116492560937, "grad_norm": 1.9188286066055298, "learning_rate": 9.991409984547783e-06, "loss": 0.3591, "step": 1159 }, { "epoch": 0.04815800808514834, "grad_norm": 1.6413317918777466, "learning_rate": 9.991370546970733e-06, "loss": 0.3204, "step": 1160 }, { "epoch": 0.048199523609359675, "grad_norm": 2.153527021408081, "learning_rate": 9.991331019148264e-06, "loss": 0.4351, "step": 1161 }, { "epoch": 0.04824103913357101, "grad_norm": 2.7612197399139404, "learning_rate": 9.991291401081092e-06, "loss": 0.4979, "step": 1162 }, { "epoch": 0.04828255465778235, "grad_norm": 3.678022623062134, "learning_rate": 9.991251692769933e-06, "loss": 0.5968, "step": 1163 }, { "epoch": 0.04832407018199368, "grad_norm": 2.9461042881011963, "learning_rate": 9.991211894215505e-06, "loss": 0.4892, "step": 1164 }, { "epoch": 0.048365585706205014, "grad_norm": 2.6268296241760254, "learning_rate": 9.991172005418528e-06, "loss": 0.3672, "step": 1165 }, { "epoch": 0.04840710123041635, "grad_norm": 3.002466917037964, "learning_rate": 9.991132026379722e-06, "loss": 0.4827, "step": 1166 }, { "epoch": 0.04844861675462769, "grad_norm": 2.6605277061462402, "learning_rate": 9.991091957099809e-06, "loss": 0.5341, "step": 1167 }, { "epoch": 0.04849013227883902, "grad_norm": 3.0745091438293457, "learning_rate": 9.991051797579516e-06, "loss": 0.5539, "step": 1168 }, { "epoch": 0.04853164780305035, "grad_norm": 2.7409417629241943, "learning_rate": 9.991011547819568e-06, "loss": 0.4507, "step": 1169 }, { "epoch": 0.048573163327261686, "grad_norm": 2.9246890544891357, "learning_rate": 9.990971207820693e-06, "loss": 0.6643, "step": 1170 }, { "epoch": 0.048614678851473025, "grad_norm": 2.345292329788208, "learning_rate": 9.990930777583618e-06, "loss": 0.4453, "step": 1171 }, { "epoch": 0.04865619437568436, "grad_norm": 3.6830263137817383, "learning_rate": 9.990890257109078e-06, "loss": 0.56, "step": 1172 }, { "epoch": 0.04869770989989569, "grad_norm": 2.620513439178467, "learning_rate": 9.990849646397803e-06, "loss": 0.624, "step": 1173 }, { "epoch": 0.048739225424107024, "grad_norm": 3.2248613834381104, "learning_rate": 9.990808945450528e-06, "loss": 0.5769, "step": 1174 }, { "epoch": 0.048780740948318364, "grad_norm": 2.3259971141815186, "learning_rate": 9.99076815426799e-06, "loss": 0.4737, "step": 1175 }, { "epoch": 0.0488222564725297, "grad_norm": 3.562102794647217, "learning_rate": 9.990727272850923e-06, "loss": 0.5971, "step": 1176 }, { "epoch": 0.04886377199674103, "grad_norm": 3.9042065143585205, "learning_rate": 9.99068630120007e-06, "loss": 0.5255, "step": 1177 }, { "epoch": 0.04890528752095236, "grad_norm": 2.7561187744140625, "learning_rate": 9.99064523931617e-06, "loss": 0.4407, "step": 1178 }, { "epoch": 0.0489468030451637, "grad_norm": 2.70215106010437, "learning_rate": 9.990604087199965e-06, "loss": 0.6999, "step": 1179 }, { "epoch": 0.048988318569375036, "grad_norm": 2.4666965007781982, "learning_rate": 9.9905628448522e-06, "loss": 0.5444, "step": 1180 }, { "epoch": 0.04902983409358637, "grad_norm": 2.6714210510253906, "learning_rate": 9.99052151227362e-06, "loss": 0.5524, "step": 1181 }, { "epoch": 0.0490713496177977, "grad_norm": 2.726109266281128, "learning_rate": 9.990480089464974e-06, "loss": 0.7176, "step": 1182 }, { "epoch": 0.04911286514200904, "grad_norm": 2.703010320663452, "learning_rate": 9.990438576427007e-06, "loss": 0.5491, "step": 1183 }, { "epoch": 0.049154380666220375, "grad_norm": 3.3107693195343018, "learning_rate": 9.990396973160474e-06, "loss": 0.6244, "step": 1184 }, { "epoch": 0.04919589619043171, "grad_norm": 2.8303472995758057, "learning_rate": 9.990355279666124e-06, "loss": 0.6177, "step": 1185 }, { "epoch": 0.04923741171464304, "grad_norm": 2.8618245124816895, "learning_rate": 9.990313495944715e-06, "loss": 0.5804, "step": 1186 }, { "epoch": 0.04927892723885438, "grad_norm": 2.487126588821411, "learning_rate": 9.990271621996997e-06, "loss": 0.546, "step": 1187 }, { "epoch": 0.049320442763065714, "grad_norm": 2.8457090854644775, "learning_rate": 9.99022965782373e-06, "loss": 0.5012, "step": 1188 }, { "epoch": 0.04936195828727705, "grad_norm": 2.7811031341552734, "learning_rate": 9.990187603425673e-06, "loss": 0.5654, "step": 1189 }, { "epoch": 0.04940347381148839, "grad_norm": 3.4550540447235107, "learning_rate": 9.990145458803586e-06, "loss": 0.5019, "step": 1190 }, { "epoch": 0.04944498933569972, "grad_norm": 3.040510892868042, "learning_rate": 9.99010322395823e-06, "loss": 0.6102, "step": 1191 }, { "epoch": 0.04948650485991105, "grad_norm": 3.841487169265747, "learning_rate": 9.990060898890371e-06, "loss": 0.5124, "step": 1192 }, { "epoch": 0.049528020384122386, "grad_norm": 2.7100470066070557, "learning_rate": 9.990018483600771e-06, "loss": 0.628, "step": 1193 }, { "epoch": 0.049569535908333726, "grad_norm": 2.6350817680358887, "learning_rate": 9.989975978090199e-06, "loss": 0.6464, "step": 1194 }, { "epoch": 0.04961105143254506, "grad_norm": 2.810297727584839, "learning_rate": 9.989933382359423e-06, "loss": 0.5495, "step": 1195 }, { "epoch": 0.04965256695675639, "grad_norm": 2.791334390640259, "learning_rate": 9.989890696409212e-06, "loss": 0.4655, "step": 1196 }, { "epoch": 0.049694082480967725, "grad_norm": 2.8416852951049805, "learning_rate": 9.98984792024034e-06, "loss": 0.481, "step": 1197 }, { "epoch": 0.049735598005179064, "grad_norm": 2.6509408950805664, "learning_rate": 9.989805053853581e-06, "loss": 0.5607, "step": 1198 }, { "epoch": 0.0497771135293904, "grad_norm": 2.7649447917938232, "learning_rate": 9.989762097249708e-06, "loss": 0.6217, "step": 1199 }, { "epoch": 0.04981862905360173, "grad_norm": 2.433206558227539, "learning_rate": 9.989719050429496e-06, "loss": 0.5822, "step": 1200 }, { "epoch": 0.04986014457781306, "grad_norm": 2.8459434509277344, "learning_rate": 9.989675913393727e-06, "loss": 0.5057, "step": 1201 }, { "epoch": 0.0499016601020244, "grad_norm": 2.899988889694214, "learning_rate": 9.989632686143178e-06, "loss": 0.5065, "step": 1202 }, { "epoch": 0.049943175626235736, "grad_norm": 2.8549091815948486, "learning_rate": 9.989589368678634e-06, "loss": 0.5777, "step": 1203 }, { "epoch": 0.04998469115044707, "grad_norm": 2.4181387424468994, "learning_rate": 9.989545961000874e-06, "loss": 0.4852, "step": 1204 }, { "epoch": 0.0500262066746584, "grad_norm": 3.0693209171295166, "learning_rate": 9.989502463110687e-06, "loss": 0.5903, "step": 1205 }, { "epoch": 0.05006772219886974, "grad_norm": 2.5759923458099365, "learning_rate": 9.989458875008855e-06, "loss": 0.5317, "step": 1206 }, { "epoch": 0.050109237723081075, "grad_norm": 2.915687322616577, "learning_rate": 9.989415196696169e-06, "loss": 0.5997, "step": 1207 }, { "epoch": 0.05015075324729241, "grad_norm": 3.14159893989563, "learning_rate": 9.989371428173419e-06, "loss": 0.4353, "step": 1208 }, { "epoch": 0.05019226877150374, "grad_norm": 2.4214415550231934, "learning_rate": 9.989327569441395e-06, "loss": 0.4697, "step": 1209 }, { "epoch": 0.05023378429571508, "grad_norm": 2.4325029850006104, "learning_rate": 9.989283620500892e-06, "loss": 0.4377, "step": 1210 }, { "epoch": 0.050275299819926414, "grad_norm": 2.9225337505340576, "learning_rate": 9.9892395813527e-06, "loss": 0.5258, "step": 1211 }, { "epoch": 0.05031681534413775, "grad_norm": 2.647333860397339, "learning_rate": 9.98919545199762e-06, "loss": 0.6378, "step": 1212 }, { "epoch": 0.05035833086834908, "grad_norm": 2.6332449913024902, "learning_rate": 9.989151232436447e-06, "loss": 0.4305, "step": 1213 }, { "epoch": 0.05039984639256042, "grad_norm": 2.9116175174713135, "learning_rate": 9.989106922669985e-06, "loss": 0.5683, "step": 1214 }, { "epoch": 0.05044136191677175, "grad_norm": 3.2968361377716064, "learning_rate": 9.98906252269903e-06, "loss": 0.6731, "step": 1215 }, { "epoch": 0.050482877440983086, "grad_norm": 3.726910352706909, "learning_rate": 9.989018032524384e-06, "loss": 0.5601, "step": 1216 }, { "epoch": 0.050524392965194426, "grad_norm": 2.5146877765655518, "learning_rate": 9.988973452146858e-06, "loss": 0.3618, "step": 1217 }, { "epoch": 0.05056590848940576, "grad_norm": 2.698444366455078, "learning_rate": 9.988928781567251e-06, "loss": 0.488, "step": 1218 }, { "epoch": 0.05060742401361709, "grad_norm": 2.8247082233428955, "learning_rate": 9.988884020786376e-06, "loss": 0.429, "step": 1219 }, { "epoch": 0.050648939537828425, "grad_norm": 2.6502888202667236, "learning_rate": 9.988839169805039e-06, "loss": 0.5434, "step": 1220 }, { "epoch": 0.050690455062039765, "grad_norm": 2.163590431213379, "learning_rate": 9.98879422862405e-06, "loss": 0.4756, "step": 1221 }, { "epoch": 0.0507319705862511, "grad_norm": 2.844175338745117, "learning_rate": 9.988749197244224e-06, "loss": 0.5314, "step": 1222 }, { "epoch": 0.05077348611046243, "grad_norm": 2.6896800994873047, "learning_rate": 9.988704075666377e-06, "loss": 0.5439, "step": 1223 }, { "epoch": 0.05081500163467376, "grad_norm": 2.462900400161743, "learning_rate": 9.98865886389132e-06, "loss": 0.5996, "step": 1224 }, { "epoch": 0.0508565171588851, "grad_norm": 4.518937587738037, "learning_rate": 9.988613561919875e-06, "loss": 0.6047, "step": 1225 }, { "epoch": 0.050898032683096436, "grad_norm": 2.898909091949463, "learning_rate": 9.988568169752859e-06, "loss": 0.5025, "step": 1226 }, { "epoch": 0.05093954820730777, "grad_norm": 2.8182411193847656, "learning_rate": 9.988522687391092e-06, "loss": 0.6743, "step": 1227 }, { "epoch": 0.0509810637315191, "grad_norm": 2.739208221435547, "learning_rate": 9.988477114835396e-06, "loss": 0.61, "step": 1228 }, { "epoch": 0.05102257925573044, "grad_norm": 2.69588041305542, "learning_rate": 9.988431452086596e-06, "loss": 0.3467, "step": 1229 }, { "epoch": 0.051064094779941775, "grad_norm": 2.6417202949523926, "learning_rate": 9.988385699145518e-06, "loss": 0.4972, "step": 1230 }, { "epoch": 0.05110561030415311, "grad_norm": 2.5817577838897705, "learning_rate": 9.988339856012988e-06, "loss": 0.5534, "step": 1231 }, { "epoch": 0.05114712582836444, "grad_norm": 3.197244644165039, "learning_rate": 9.988293922689837e-06, "loss": 0.4665, "step": 1232 }, { "epoch": 0.05118864135257578, "grad_norm": 2.9733784198760986, "learning_rate": 9.988247899176892e-06, "loss": 0.649, "step": 1233 }, { "epoch": 0.051230156876787114, "grad_norm": 2.961649179458618, "learning_rate": 9.988201785474988e-06, "loss": 0.54, "step": 1234 }, { "epoch": 0.05127167240099845, "grad_norm": 3.1341612339019775, "learning_rate": 9.98815558158496e-06, "loss": 0.6354, "step": 1235 }, { "epoch": 0.05131318792520978, "grad_norm": 3.1261279582977295, "learning_rate": 9.988109287507641e-06, "loss": 0.5441, "step": 1236 }, { "epoch": 0.05135470344942112, "grad_norm": 2.5746612548828125, "learning_rate": 9.988062903243867e-06, "loss": 0.536, "step": 1237 }, { "epoch": 0.05139621897363245, "grad_norm": 3.234860897064209, "learning_rate": 9.988016428794479e-06, "loss": 0.6294, "step": 1238 }, { "epoch": 0.051437734497843786, "grad_norm": 2.3794217109680176, "learning_rate": 9.987969864160314e-06, "loss": 0.4806, "step": 1239 }, { "epoch": 0.05147925002205512, "grad_norm": 2.939286708831787, "learning_rate": 9.987923209342219e-06, "loss": 0.4159, "step": 1240 }, { "epoch": 0.05152076554626646, "grad_norm": 3.1503729820251465, "learning_rate": 9.987876464341033e-06, "loss": 0.6107, "step": 1241 }, { "epoch": 0.05156228107047779, "grad_norm": 2.7020530700683594, "learning_rate": 9.987829629157603e-06, "loss": 0.5871, "step": 1242 }, { "epoch": 0.051603796594689125, "grad_norm": 2.659313440322876, "learning_rate": 9.987782703792776e-06, "loss": 0.6174, "step": 1243 }, { "epoch": 0.051645312118900465, "grad_norm": 2.9730544090270996, "learning_rate": 9.9877356882474e-06, "loss": 0.5732, "step": 1244 }, { "epoch": 0.0516868276431118, "grad_norm": 2.8191285133361816, "learning_rate": 9.987688582522325e-06, "loss": 0.4214, "step": 1245 }, { "epoch": 0.05172834316732313, "grad_norm": 2.547510862350464, "learning_rate": 9.987641386618403e-06, "loss": 0.5778, "step": 1246 }, { "epoch": 0.051769858691534464, "grad_norm": 2.9461357593536377, "learning_rate": 9.987594100536486e-06, "loss": 0.4507, "step": 1247 }, { "epoch": 0.051811374215745803, "grad_norm": 3.284649133682251, "learning_rate": 9.987546724277432e-06, "loss": 0.544, "step": 1248 }, { "epoch": 0.051852889739957136, "grad_norm": 2.6318113803863525, "learning_rate": 9.987499257842095e-06, "loss": 0.4763, "step": 1249 }, { "epoch": 0.05189440526416847, "grad_norm": 2.449782371520996, "learning_rate": 9.987451701231335e-06, "loss": 0.5686, "step": 1250 }, { "epoch": 0.0519359207883798, "grad_norm": 2.400346517562866, "learning_rate": 9.987404054446009e-06, "loss": 0.4612, "step": 1251 }, { "epoch": 0.05197743631259114, "grad_norm": 2.480652093887329, "learning_rate": 9.98735631748698e-06, "loss": 0.5223, "step": 1252 }, { "epoch": 0.052018951836802475, "grad_norm": 2.5485241413116455, "learning_rate": 9.987308490355112e-06, "loss": 0.5369, "step": 1253 }, { "epoch": 0.05206046736101381, "grad_norm": 2.050963878631592, "learning_rate": 9.987260573051268e-06, "loss": 0.4617, "step": 1254 }, { "epoch": 0.05210198288522514, "grad_norm": 2.6821060180664062, "learning_rate": 9.987212565576316e-06, "loss": 0.5366, "step": 1255 }, { "epoch": 0.05214349840943648, "grad_norm": 2.9955966472625732, "learning_rate": 9.987164467931124e-06, "loss": 0.5267, "step": 1256 }, { "epoch": 0.052185013933647814, "grad_norm": 3.239630699157715, "learning_rate": 9.98711628011656e-06, "loss": 0.5475, "step": 1257 }, { "epoch": 0.05222652945785915, "grad_norm": 2.586138963699341, "learning_rate": 9.987068002133497e-06, "loss": 0.4179, "step": 1258 }, { "epoch": 0.05226804498207048, "grad_norm": 2.6661376953125, "learning_rate": 9.987019633982807e-06, "loss": 0.4607, "step": 1259 }, { "epoch": 0.05230956050628182, "grad_norm": 2.921903610229492, "learning_rate": 9.986971175665365e-06, "loss": 0.4825, "step": 1260 }, { "epoch": 0.05235107603049315, "grad_norm": 2.333522081375122, "learning_rate": 9.986922627182047e-06, "loss": 0.5907, "step": 1261 }, { "epoch": 0.052392591554704486, "grad_norm": 3.046841859817505, "learning_rate": 9.986873988533728e-06, "loss": 0.5578, "step": 1262 }, { "epoch": 0.05243410707891582, "grad_norm": 2.9021055698394775, "learning_rate": 9.986825259721292e-06, "loss": 0.5892, "step": 1263 }, { "epoch": 0.05247562260312716, "grad_norm": 2.8246843814849854, "learning_rate": 9.986776440745618e-06, "loss": 0.5286, "step": 1264 }, { "epoch": 0.05251713812733849, "grad_norm": 2.5872890949249268, "learning_rate": 9.986727531607587e-06, "loss": 0.4315, "step": 1265 }, { "epoch": 0.052558653651549825, "grad_norm": 2.784306764602661, "learning_rate": 9.986678532308088e-06, "loss": 0.4998, "step": 1266 }, { "epoch": 0.05260016917576116, "grad_norm": 2.7964231967926025, "learning_rate": 9.986629442848001e-06, "loss": 0.4325, "step": 1267 }, { "epoch": 0.0526416846999725, "grad_norm": 3.0338151454925537, "learning_rate": 9.986580263228216e-06, "loss": 0.5568, "step": 1268 }, { "epoch": 0.05268320022418383, "grad_norm": 3.042436122894287, "learning_rate": 9.986530993449626e-06, "loss": 0.4903, "step": 1269 }, { "epoch": 0.052724715748395164, "grad_norm": 2.7160568237304688, "learning_rate": 9.986481633513115e-06, "loss": 0.6547, "step": 1270 }, { "epoch": 0.052766231272606504, "grad_norm": 2.6630477905273438, "learning_rate": 9.986432183419581e-06, "loss": 0.5746, "step": 1271 }, { "epoch": 0.05280774679681784, "grad_norm": 2.9330856800079346, "learning_rate": 9.986382643169914e-06, "loss": 0.4567, "step": 1272 }, { "epoch": 0.05284926232102917, "grad_norm": 3.050846815109253, "learning_rate": 9.986333012765013e-06, "loss": 0.5857, "step": 1273 }, { "epoch": 0.0528907778452405, "grad_norm": 3.1770801544189453, "learning_rate": 9.986283292205773e-06, "loss": 0.6179, "step": 1274 }, { "epoch": 0.05293229336945184, "grad_norm": 2.5253207683563232, "learning_rate": 9.986233481493093e-06, "loss": 0.4779, "step": 1275 }, { "epoch": 0.052973808893663175, "grad_norm": 2.5119926929473877, "learning_rate": 9.986183580627878e-06, "loss": 0.5155, "step": 1276 }, { "epoch": 0.05301532441787451, "grad_norm": 3.4755406379699707, "learning_rate": 9.986133589611023e-06, "loss": 0.5073, "step": 1277 }, { "epoch": 0.05305683994208584, "grad_norm": 2.4468812942504883, "learning_rate": 9.986083508443437e-06, "loss": 0.5006, "step": 1278 }, { "epoch": 0.05309835546629718, "grad_norm": 2.7121589183807373, "learning_rate": 9.986033337126024e-06, "loss": 0.4897, "step": 1279 }, { "epoch": 0.053139870990508514, "grad_norm": 2.8957669734954834, "learning_rate": 9.98598307565969e-06, "loss": 0.4708, "step": 1280 }, { "epoch": 0.05318138651471985, "grad_norm": 2.8920252323150635, "learning_rate": 9.985932724045347e-06, "loss": 0.4185, "step": 1281 }, { "epoch": 0.05322290203893118, "grad_norm": 2.9167943000793457, "learning_rate": 9.985882282283904e-06, "loss": 0.4954, "step": 1282 }, { "epoch": 0.05326441756314252, "grad_norm": 2.519716501235962, "learning_rate": 9.98583175037627e-06, "loss": 0.5603, "step": 1283 }, { "epoch": 0.05330593308735385, "grad_norm": 2.5835859775543213, "learning_rate": 9.98578112832336e-06, "loss": 0.5591, "step": 1284 }, { "epoch": 0.053347448611565186, "grad_norm": 2.6712958812713623, "learning_rate": 9.985730416126091e-06, "loss": 0.6167, "step": 1285 }, { "epoch": 0.05338896413577652, "grad_norm": 2.6221776008605957, "learning_rate": 9.985679613785379e-06, "loss": 0.5503, "step": 1286 }, { "epoch": 0.05343047965998786, "grad_norm": 3.8456838130950928, "learning_rate": 9.985628721302144e-06, "loss": 0.4894, "step": 1287 }, { "epoch": 0.05347199518419919, "grad_norm": 2.3318991661071777, "learning_rate": 9.985577738677303e-06, "loss": 0.6218, "step": 1288 }, { "epoch": 0.053513510708410525, "grad_norm": 3.179309368133545, "learning_rate": 9.985526665911783e-06, "loss": 0.4307, "step": 1289 }, { "epoch": 0.05355502623262186, "grad_norm": 2.8779685497283936, "learning_rate": 9.9854755030065e-06, "loss": 0.607, "step": 1290 }, { "epoch": 0.0535965417568332, "grad_norm": 3.171372413635254, "learning_rate": 9.985424249962384e-06, "loss": 0.3287, "step": 1291 }, { "epoch": 0.05363805728104453, "grad_norm": 2.7653462886810303, "learning_rate": 9.98537290678036e-06, "loss": 0.4739, "step": 1292 }, { "epoch": 0.053679572805255864, "grad_norm": 2.3795015811920166, "learning_rate": 9.985321473461358e-06, "loss": 0.4893, "step": 1293 }, { "epoch": 0.0537210883294672, "grad_norm": 2.8752806186676025, "learning_rate": 9.985269950006307e-06, "loss": 0.3975, "step": 1294 }, { "epoch": 0.05376260385367854, "grad_norm": 2.7362656593322754, "learning_rate": 9.985218336416138e-06, "loss": 0.5327, "step": 1295 }, { "epoch": 0.05380411937788987, "grad_norm": 2.9273526668548584, "learning_rate": 9.985166632691784e-06, "loss": 0.6068, "step": 1296 }, { "epoch": 0.0538456349021012, "grad_norm": 3.1282122135162354, "learning_rate": 9.98511483883418e-06, "loss": 0.5822, "step": 1297 }, { "epoch": 0.05388715042631254, "grad_norm": 2.785400629043579, "learning_rate": 9.985062954844264e-06, "loss": 0.5456, "step": 1298 }, { "epoch": 0.053928665950523876, "grad_norm": 2.5636348724365234, "learning_rate": 9.985010980722974e-06, "loss": 0.5044, "step": 1299 }, { "epoch": 0.05397018147473521, "grad_norm": 2.526829481124878, "learning_rate": 9.984958916471247e-06, "loss": 0.478, "step": 1300 }, { "epoch": 0.05401169699894654, "grad_norm": 3.310216188430786, "learning_rate": 9.984906762090025e-06, "loss": 0.4802, "step": 1301 }, { "epoch": 0.05405321252315788, "grad_norm": 2.84871506690979, "learning_rate": 9.984854517580254e-06, "loss": 0.4329, "step": 1302 }, { "epoch": 0.054094728047369214, "grad_norm": 3.087371826171875, "learning_rate": 9.984802182942876e-06, "loss": 0.53, "step": 1303 }, { "epoch": 0.05413624357158055, "grad_norm": 2.847860813140869, "learning_rate": 9.984749758178837e-06, "loss": 0.5094, "step": 1304 }, { "epoch": 0.05417775909579188, "grad_norm": 3.1137917041778564, "learning_rate": 9.984697243289085e-06, "loss": 0.541, "step": 1305 }, { "epoch": 0.05421927462000322, "grad_norm": 3.494417667388916, "learning_rate": 9.984644638274572e-06, "loss": 0.601, "step": 1306 }, { "epoch": 0.05426079014421455, "grad_norm": 3.242075204849243, "learning_rate": 9.984591943136246e-06, "loss": 0.4539, "step": 1307 }, { "epoch": 0.054302305668425886, "grad_norm": 3.364079475402832, "learning_rate": 9.984539157875062e-06, "loss": 0.4883, "step": 1308 }, { "epoch": 0.05434382119263722, "grad_norm": 3.1876540184020996, "learning_rate": 9.984486282491972e-06, "loss": 0.6179, "step": 1309 }, { "epoch": 0.05438533671684856, "grad_norm": 4.052188873291016, "learning_rate": 9.984433316987934e-06, "loss": 0.6862, "step": 1310 }, { "epoch": 0.05442685224105989, "grad_norm": 3.203523635864258, "learning_rate": 9.984380261363906e-06, "loss": 0.5436, "step": 1311 }, { "epoch": 0.054468367765271225, "grad_norm": 2.543627977371216, "learning_rate": 9.984327115620845e-06, "loss": 0.5544, "step": 1312 }, { "epoch": 0.05450988328948256, "grad_norm": 2.4956586360931396, "learning_rate": 9.984273879759713e-06, "loss": 0.5365, "step": 1313 }, { "epoch": 0.0545513988136939, "grad_norm": 3.3794212341308594, "learning_rate": 9.984220553781472e-06, "loss": 0.6423, "step": 1314 }, { "epoch": 0.05459291433790523, "grad_norm": 2.8705036640167236, "learning_rate": 9.98416713768709e-06, "loss": 0.6343, "step": 1315 }, { "epoch": 0.054634429862116564, "grad_norm": 2.9579920768737793, "learning_rate": 9.984113631477525e-06, "loss": 0.724, "step": 1316 }, { "epoch": 0.0546759453863279, "grad_norm": 3.340312957763672, "learning_rate": 9.984060035153752e-06, "loss": 0.5745, "step": 1317 }, { "epoch": 0.05471746091053924, "grad_norm": 2.4813175201416016, "learning_rate": 9.984006348716737e-06, "loss": 0.6141, "step": 1318 }, { "epoch": 0.05475897643475057, "grad_norm": 3.4081099033355713, "learning_rate": 9.983952572167447e-06, "loss": 0.4937, "step": 1319 }, { "epoch": 0.0548004919589619, "grad_norm": 3.417515277862549, "learning_rate": 9.983898705506863e-06, "loss": 0.6755, "step": 1320 }, { "epoch": 0.054842007483173236, "grad_norm": 3.466935873031616, "learning_rate": 9.98384474873595e-06, "loss": 0.6391, "step": 1321 }, { "epoch": 0.054883523007384576, "grad_norm": 2.4866724014282227, "learning_rate": 9.983790701855691e-06, "loss": 0.4987, "step": 1322 }, { "epoch": 0.05492503853159591, "grad_norm": 2.423903226852417, "learning_rate": 9.983736564867058e-06, "loss": 0.5141, "step": 1323 }, { "epoch": 0.05496655405580724, "grad_norm": 2.6608569622039795, "learning_rate": 9.98368233777103e-06, "loss": 0.547, "step": 1324 }, { "epoch": 0.05500806958001858, "grad_norm": 2.7292544841766357, "learning_rate": 9.98362802056859e-06, "loss": 0.6305, "step": 1325 }, { "epoch": 0.055049585104229914, "grad_norm": 2.6381375789642334, "learning_rate": 9.98357361326072e-06, "loss": 0.4165, "step": 1326 }, { "epoch": 0.05509110062844125, "grad_norm": 2.5078041553497314, "learning_rate": 9.983519115848402e-06, "loss": 0.4277, "step": 1327 }, { "epoch": 0.05513261615265258, "grad_norm": 3.19389009475708, "learning_rate": 9.98346452833262e-06, "loss": 0.5584, "step": 1328 }, { "epoch": 0.05517413167686392, "grad_norm": 2.7300190925598145, "learning_rate": 9.983409850714366e-06, "loss": 0.6442, "step": 1329 }, { "epoch": 0.05521564720107525, "grad_norm": 2.8311095237731934, "learning_rate": 9.983355082994623e-06, "loss": 0.4777, "step": 1330 }, { "epoch": 0.055257162725286586, "grad_norm": 2.608116865158081, "learning_rate": 9.983300225174386e-06, "loss": 0.5324, "step": 1331 }, { "epoch": 0.05529867824949792, "grad_norm": 2.931671380996704, "learning_rate": 9.983245277254642e-06, "loss": 0.5007, "step": 1332 }, { "epoch": 0.05534019377370926, "grad_norm": 2.7876110076904297, "learning_rate": 9.98319023923639e-06, "loss": 0.5436, "step": 1333 }, { "epoch": 0.05538170929792059, "grad_norm": 2.903301477432251, "learning_rate": 9.983135111120621e-06, "loss": 0.5149, "step": 1334 }, { "epoch": 0.055423224822131925, "grad_norm": 2.937424421310425, "learning_rate": 9.983079892908332e-06, "loss": 0.6915, "step": 1335 }, { "epoch": 0.05546474034634326, "grad_norm": 2.6974270343780518, "learning_rate": 9.983024584600522e-06, "loss": 0.5603, "step": 1336 }, { "epoch": 0.0555062558705546, "grad_norm": 2.938610076904297, "learning_rate": 9.982969186198192e-06, "loss": 0.5326, "step": 1337 }, { "epoch": 0.05554777139476593, "grad_norm": 2.5675251483917236, "learning_rate": 9.982913697702344e-06, "loss": 0.56, "step": 1338 }, { "epoch": 0.055589286918977264, "grad_norm": 2.6768743991851807, "learning_rate": 9.98285811911398e-06, "loss": 0.6031, "step": 1339 }, { "epoch": 0.0556308024431886, "grad_norm": 2.7667431831359863, "learning_rate": 9.982802450434104e-06, "loss": 0.5452, "step": 1340 }, { "epoch": 0.05567231796739994, "grad_norm": 2.799269199371338, "learning_rate": 9.982746691663723e-06, "loss": 0.6406, "step": 1341 }, { "epoch": 0.05571383349161127, "grad_norm": 2.976231813430786, "learning_rate": 9.982690842803847e-06, "loss": 0.6151, "step": 1342 }, { "epoch": 0.0557553490158226, "grad_norm": 2.7602837085723877, "learning_rate": 9.982634903855483e-06, "loss": 0.3995, "step": 1343 }, { "epoch": 0.055796864540033936, "grad_norm": 2.5152416229248047, "learning_rate": 9.982578874819644e-06, "loss": 0.3262, "step": 1344 }, { "epoch": 0.055838380064245276, "grad_norm": 2.0638928413391113, "learning_rate": 9.982522755697344e-06, "loss": 0.4052, "step": 1345 }, { "epoch": 0.05587989558845661, "grad_norm": 2.276738405227661, "learning_rate": 9.982466546489596e-06, "loss": 0.4223, "step": 1346 }, { "epoch": 0.05592141111266794, "grad_norm": 2.6062238216400146, "learning_rate": 9.982410247197416e-06, "loss": 0.4257, "step": 1347 }, { "epoch": 0.055962926636879275, "grad_norm": 3.019031524658203, "learning_rate": 9.982353857821822e-06, "loss": 0.4883, "step": 1348 }, { "epoch": 0.056004442161090615, "grad_norm": 3.063845157623291, "learning_rate": 9.982297378363837e-06, "loss": 0.5332, "step": 1349 }, { "epoch": 0.05604595768530195, "grad_norm": 2.8912322521209717, "learning_rate": 9.982240808824477e-06, "loss": 0.5657, "step": 1350 }, { "epoch": 0.05608747320951328, "grad_norm": 3.165194034576416, "learning_rate": 9.982184149204768e-06, "loss": 0.5881, "step": 1351 }, { "epoch": 0.05612898873372462, "grad_norm": 2.7542483806610107, "learning_rate": 9.982127399505732e-06, "loss": 0.5022, "step": 1352 }, { "epoch": 0.05617050425793595, "grad_norm": 2.6858882904052734, "learning_rate": 9.982070559728398e-06, "loss": 0.5225, "step": 1353 }, { "epoch": 0.056212019782147286, "grad_norm": 2.750077247619629, "learning_rate": 9.982013629873792e-06, "loss": 0.6256, "step": 1354 }, { "epoch": 0.05625353530635862, "grad_norm": 2.5256433486938477, "learning_rate": 9.981956609942945e-06, "loss": 0.5747, "step": 1355 }, { "epoch": 0.05629505083056996, "grad_norm": 2.352062702178955, "learning_rate": 9.981899499936885e-06, "loss": 0.593, "step": 1356 }, { "epoch": 0.05633656635478129, "grad_norm": 3.332383394241333, "learning_rate": 9.981842299856646e-06, "loss": 0.6458, "step": 1357 }, { "epoch": 0.056378081878992625, "grad_norm": 2.879474401473999, "learning_rate": 9.981785009703263e-06, "loss": 0.5748, "step": 1358 }, { "epoch": 0.05641959740320396, "grad_norm": 3.2890214920043945, "learning_rate": 9.981727629477771e-06, "loss": 0.557, "step": 1359 }, { "epoch": 0.0564611129274153, "grad_norm": 2.8663392066955566, "learning_rate": 9.981670159181207e-06, "loss": 0.664, "step": 1360 }, { "epoch": 0.05650262845162663, "grad_norm": 3.217648506164551, "learning_rate": 9.981612598814612e-06, "loss": 0.39, "step": 1361 }, { "epoch": 0.056544143975837964, "grad_norm": 3.5746500492095947, "learning_rate": 9.981554948379023e-06, "loss": 0.597, "step": 1362 }, { "epoch": 0.0565856595000493, "grad_norm": 2.3702549934387207, "learning_rate": 9.981497207875488e-06, "loss": 0.5515, "step": 1363 }, { "epoch": 0.05662717502426064, "grad_norm": 2.815784454345703, "learning_rate": 9.981439377305045e-06, "loss": 0.5889, "step": 1364 }, { "epoch": 0.05666869054847197, "grad_norm": 4.211533546447754, "learning_rate": 9.981381456668744e-06, "loss": 0.5614, "step": 1365 }, { "epoch": 0.0567102060726833, "grad_norm": 3.1364035606384277, "learning_rate": 9.981323445967629e-06, "loss": 0.5513, "step": 1366 }, { "epoch": 0.056751721596894636, "grad_norm": 2.777294397354126, "learning_rate": 9.981265345202751e-06, "loss": 0.4899, "step": 1367 }, { "epoch": 0.056793237121105976, "grad_norm": 3.0730323791503906, "learning_rate": 9.981207154375159e-06, "loss": 0.6486, "step": 1368 }, { "epoch": 0.05683475264531731, "grad_norm": 2.923792839050293, "learning_rate": 9.981148873485906e-06, "loss": 0.5669, "step": 1369 }, { "epoch": 0.05687626816952864, "grad_norm": 2.8471198081970215, "learning_rate": 9.981090502536048e-06, "loss": 0.5994, "step": 1370 }, { "epoch": 0.056917783693739975, "grad_norm": 2.405683755874634, "learning_rate": 9.981032041526635e-06, "loss": 0.5394, "step": 1371 }, { "epoch": 0.056959299217951315, "grad_norm": 2.4264142513275146, "learning_rate": 9.980973490458728e-06, "loss": 0.3749, "step": 1372 }, { "epoch": 0.05700081474216265, "grad_norm": 3.10575795173645, "learning_rate": 9.980914849333385e-06, "loss": 0.4654, "step": 1373 }, { "epoch": 0.05704233026637398, "grad_norm": 3.2180116176605225, "learning_rate": 9.980856118151666e-06, "loss": 0.5456, "step": 1374 }, { "epoch": 0.057083845790585314, "grad_norm": 2.2612826824188232, "learning_rate": 9.980797296914633e-06, "loss": 0.5784, "step": 1375 }, { "epoch": 0.057125361314796654, "grad_norm": 2.98664927482605, "learning_rate": 9.980738385623348e-06, "loss": 0.5212, "step": 1376 }, { "epoch": 0.057166876839007986, "grad_norm": 2.4636709690093994, "learning_rate": 9.980679384278879e-06, "loss": 0.5092, "step": 1377 }, { "epoch": 0.05720839236321932, "grad_norm": 2.147782325744629, "learning_rate": 9.98062029288229e-06, "loss": 0.4442, "step": 1378 }, { "epoch": 0.05724990788743066, "grad_norm": 3.0775134563446045, "learning_rate": 9.98056111143465e-06, "loss": 0.6643, "step": 1379 }, { "epoch": 0.05729142341164199, "grad_norm": 2.7433855533599854, "learning_rate": 9.980501839937032e-06, "loss": 0.6266, "step": 1380 }, { "epoch": 0.057332938935853325, "grad_norm": 2.449488401412964, "learning_rate": 9.980442478390502e-06, "loss": 0.5529, "step": 1381 }, { "epoch": 0.05737445446006466, "grad_norm": 3.2071337699890137, "learning_rate": 9.980383026796138e-06, "loss": 0.499, "step": 1382 }, { "epoch": 0.057415969984276, "grad_norm": 4.26848840713501, "learning_rate": 9.980323485155013e-06, "loss": 0.6463, "step": 1383 }, { "epoch": 0.05745748550848733, "grad_norm": 2.641014337539673, "learning_rate": 9.980263853468204e-06, "loss": 0.4191, "step": 1384 }, { "epoch": 0.057499001032698664, "grad_norm": 2.819591999053955, "learning_rate": 9.98020413173679e-06, "loss": 0.4295, "step": 1385 }, { "epoch": 0.05754051655691, "grad_norm": 3.471954345703125, "learning_rate": 9.980144319961849e-06, "loss": 0.6484, "step": 1386 }, { "epoch": 0.05758203208112134, "grad_norm": 2.6631596088409424, "learning_rate": 9.980084418144464e-06, "loss": 0.5773, "step": 1387 }, { "epoch": 0.05762354760533267, "grad_norm": 3.266390800476074, "learning_rate": 9.980024426285717e-06, "loss": 0.4883, "step": 1388 }, { "epoch": 0.057665063129544, "grad_norm": 3.4264461994171143, "learning_rate": 9.979964344386692e-06, "loss": 0.4705, "step": 1389 }, { "epoch": 0.057706578653755336, "grad_norm": 2.8551101684570312, "learning_rate": 9.979904172448478e-06, "loss": 0.5969, "step": 1390 }, { "epoch": 0.057748094177966676, "grad_norm": 2.979843854904175, "learning_rate": 9.97984391047216e-06, "loss": 0.6161, "step": 1391 }, { "epoch": 0.05778960970217801, "grad_norm": 2.8243777751922607, "learning_rate": 9.979783558458828e-06, "loss": 0.5435, "step": 1392 }, { "epoch": 0.05783112522638934, "grad_norm": 2.5700173377990723, "learning_rate": 9.979723116409576e-06, "loss": 0.4188, "step": 1393 }, { "epoch": 0.057872640750600675, "grad_norm": 2.8326404094696045, "learning_rate": 9.979662584325494e-06, "loss": 0.5107, "step": 1394 }, { "epoch": 0.057914156274812015, "grad_norm": 2.599573850631714, "learning_rate": 9.979601962207677e-06, "loss": 0.6129, "step": 1395 }, { "epoch": 0.05795567179902335, "grad_norm": 2.9155805110931396, "learning_rate": 9.979541250057222e-06, "loss": 0.5777, "step": 1396 }, { "epoch": 0.05799718732323468, "grad_norm": 3.0087966918945312, "learning_rate": 9.979480447875222e-06, "loss": 0.4558, "step": 1397 }, { "epoch": 0.058038702847446014, "grad_norm": 2.3537333011627197, "learning_rate": 9.979419555662783e-06, "loss": 0.5829, "step": 1398 }, { "epoch": 0.058080218371657354, "grad_norm": 2.734325408935547, "learning_rate": 9.979358573421005e-06, "loss": 0.4759, "step": 1399 }, { "epoch": 0.05812173389586869, "grad_norm": 2.473372220993042, "learning_rate": 9.979297501150987e-06, "loss": 0.468, "step": 1400 }, { "epoch": 0.05816324942008002, "grad_norm": 2.9265944957733154, "learning_rate": 9.979236338853834e-06, "loss": 0.5399, "step": 1401 }, { "epoch": 0.05820476494429135, "grad_norm": 3.088974952697754, "learning_rate": 9.979175086530654e-06, "loss": 0.5458, "step": 1402 }, { "epoch": 0.05824628046850269, "grad_norm": 3.072084903717041, "learning_rate": 9.979113744182554e-06, "loss": 0.5493, "step": 1403 }, { "epoch": 0.058287795992714025, "grad_norm": 2.5003364086151123, "learning_rate": 9.979052311810642e-06, "loss": 0.4787, "step": 1404 }, { "epoch": 0.05832931151692536, "grad_norm": 3.74812388420105, "learning_rate": 9.97899078941603e-06, "loss": 0.6837, "step": 1405 }, { "epoch": 0.0583708270411367, "grad_norm": 2.5338501930236816, "learning_rate": 9.978929176999828e-06, "loss": 0.4702, "step": 1406 }, { "epoch": 0.05841234256534803, "grad_norm": 2.942903757095337, "learning_rate": 9.978867474563151e-06, "loss": 0.6036, "step": 1407 }, { "epoch": 0.058453858089559364, "grad_norm": 2.7670857906341553, "learning_rate": 9.978805682107115e-06, "loss": 0.4767, "step": 1408 }, { "epoch": 0.0584953736137707, "grad_norm": 2.639744281768799, "learning_rate": 9.97874379963284e-06, "loss": 0.5842, "step": 1409 }, { "epoch": 0.05853688913798204, "grad_norm": 2.512972831726074, "learning_rate": 9.978681827141439e-06, "loss": 0.5869, "step": 1410 }, { "epoch": 0.05857840466219337, "grad_norm": 3.6295876502990723, "learning_rate": 9.978619764634036e-06, "loss": 0.5503, "step": 1411 }, { "epoch": 0.0586199201864047, "grad_norm": 2.6767537593841553, "learning_rate": 9.978557612111753e-06, "loss": 0.5656, "step": 1412 }, { "epoch": 0.058661435710616036, "grad_norm": 3.141876220703125, "learning_rate": 9.978495369575714e-06, "loss": 0.5707, "step": 1413 }, { "epoch": 0.058702951234827376, "grad_norm": 3.8686294555664062, "learning_rate": 9.978433037027044e-06, "loss": 0.5372, "step": 1414 }, { "epoch": 0.05874446675903871, "grad_norm": 2.4693753719329834, "learning_rate": 9.978370614466867e-06, "loss": 0.4309, "step": 1415 }, { "epoch": 0.05878598228325004, "grad_norm": 2.395141124725342, "learning_rate": 9.978308101896318e-06, "loss": 0.358, "step": 1416 }, { "epoch": 0.058827497807461375, "grad_norm": 2.7597570419311523, "learning_rate": 9.97824549931652e-06, "loss": 0.625, "step": 1417 }, { "epoch": 0.058869013331672715, "grad_norm": 3.649895191192627, "learning_rate": 9.978182806728612e-06, "loss": 0.6312, "step": 1418 }, { "epoch": 0.05891052885588405, "grad_norm": 4.111197471618652, "learning_rate": 9.978120024133724e-06, "loss": 0.5946, "step": 1419 }, { "epoch": 0.05895204438009538, "grad_norm": 2.467313528060913, "learning_rate": 9.978057151532989e-06, "loss": 0.4421, "step": 1420 }, { "epoch": 0.058993559904306714, "grad_norm": 3.1902077198028564, "learning_rate": 9.977994188927546e-06, "loss": 0.4447, "step": 1421 }, { "epoch": 0.059035075428518054, "grad_norm": 3.0973803997039795, "learning_rate": 9.977931136318534e-06, "loss": 0.5732, "step": 1422 }, { "epoch": 0.05907659095272939, "grad_norm": 3.0916507244110107, "learning_rate": 9.977867993707091e-06, "loss": 0.5419, "step": 1423 }, { "epoch": 0.05911810647694072, "grad_norm": 3.5773677825927734, "learning_rate": 9.977804761094362e-06, "loss": 0.5271, "step": 1424 }, { "epoch": 0.05915962200115205, "grad_norm": 2.6308226585388184, "learning_rate": 9.977741438481487e-06, "loss": 0.3879, "step": 1425 }, { "epoch": 0.05920113752536339, "grad_norm": 2.8266117572784424, "learning_rate": 9.977678025869612e-06, "loss": 0.5427, "step": 1426 }, { "epoch": 0.059242653049574726, "grad_norm": 2.7755162715911865, "learning_rate": 9.977614523259884e-06, "loss": 0.6374, "step": 1427 }, { "epoch": 0.05928416857378606, "grad_norm": 2.7660117149353027, "learning_rate": 9.97755093065345e-06, "loss": 0.6004, "step": 1428 }, { "epoch": 0.05932568409799739, "grad_norm": 3.703146457672119, "learning_rate": 9.97748724805146e-06, "loss": 0.4415, "step": 1429 }, { "epoch": 0.05936719962220873, "grad_norm": 3.5589888095855713, "learning_rate": 9.977423475455069e-06, "loss": 0.5078, "step": 1430 }, { "epoch": 0.059408715146420064, "grad_norm": 2.918365240097046, "learning_rate": 9.977359612865424e-06, "loss": 0.6071, "step": 1431 }, { "epoch": 0.0594502306706314, "grad_norm": 2.5555574893951416, "learning_rate": 9.977295660283683e-06, "loss": 0.5144, "step": 1432 }, { "epoch": 0.05949174619484274, "grad_norm": 2.3337745666503906, "learning_rate": 9.977231617711003e-06, "loss": 0.4661, "step": 1433 }, { "epoch": 0.05953326171905407, "grad_norm": 3.1178059577941895, "learning_rate": 9.977167485148539e-06, "loss": 0.4852, "step": 1434 }, { "epoch": 0.0595747772432654, "grad_norm": 2.5410685539245605, "learning_rate": 9.977103262597454e-06, "loss": 0.5741, "step": 1435 }, { "epoch": 0.059616292767476736, "grad_norm": 2.965765953063965, "learning_rate": 9.977038950058906e-06, "loss": 0.5863, "step": 1436 }, { "epoch": 0.059657808291688076, "grad_norm": 3.1757469177246094, "learning_rate": 9.976974547534059e-06, "loss": 0.6755, "step": 1437 }, { "epoch": 0.05969932381589941, "grad_norm": 3.086893320083618, "learning_rate": 9.97691005502408e-06, "loss": 0.7006, "step": 1438 }, { "epoch": 0.05974083934011074, "grad_norm": 3.3291268348693848, "learning_rate": 9.97684547253013e-06, "loss": 0.6123, "step": 1439 }, { "epoch": 0.059782354864322075, "grad_norm": 2.984846830368042, "learning_rate": 9.976780800053382e-06, "loss": 0.4251, "step": 1440 }, { "epoch": 0.059823870388533415, "grad_norm": 2.678128480911255, "learning_rate": 9.976716037595e-06, "loss": 0.6727, "step": 1441 }, { "epoch": 0.05986538591274475, "grad_norm": 2.867098093032837, "learning_rate": 9.97665118515616e-06, "loss": 0.518, "step": 1442 }, { "epoch": 0.05990690143695608, "grad_norm": 2.2921864986419678, "learning_rate": 9.976586242738032e-06, "loss": 0.5146, "step": 1443 }, { "epoch": 0.059948416961167414, "grad_norm": 2.7573482990264893, "learning_rate": 9.976521210341787e-06, "loss": 0.6999, "step": 1444 }, { "epoch": 0.059989932485378754, "grad_norm": 3.102935314178467, "learning_rate": 9.976456087968608e-06, "loss": 0.5687, "step": 1445 }, { "epoch": 0.06003144800959009, "grad_norm": 2.334202289581299, "learning_rate": 9.976390875619666e-06, "loss": 0.3283, "step": 1446 }, { "epoch": 0.06007296353380142, "grad_norm": 3.327752113342285, "learning_rate": 9.976325573296144e-06, "loss": 0.5926, "step": 1447 }, { "epoch": 0.06011447905801275, "grad_norm": 2.552271842956543, "learning_rate": 9.976260180999222e-06, "loss": 0.5358, "step": 1448 }, { "epoch": 0.06015599458222409, "grad_norm": 2.564924955368042, "learning_rate": 9.97619469873008e-06, "loss": 0.5653, "step": 1449 }, { "epoch": 0.060197510106435426, "grad_norm": 3.1483821868896484, "learning_rate": 9.976129126489904e-06, "loss": 0.5343, "step": 1450 }, { "epoch": 0.06023902563064676, "grad_norm": 2.8391146659851074, "learning_rate": 9.97606346427988e-06, "loss": 0.4747, "step": 1451 }, { "epoch": 0.06028054115485809, "grad_norm": 2.7840163707733154, "learning_rate": 9.975997712101193e-06, "loss": 0.6273, "step": 1452 }, { "epoch": 0.06032205667906943, "grad_norm": 2.8227384090423584, "learning_rate": 9.975931869955033e-06, "loss": 0.5355, "step": 1453 }, { "epoch": 0.060363572203280765, "grad_norm": 2.280423879623413, "learning_rate": 9.975865937842592e-06, "loss": 0.4919, "step": 1454 }, { "epoch": 0.0604050877274921, "grad_norm": 2.868567943572998, "learning_rate": 9.97579991576506e-06, "loss": 0.5731, "step": 1455 }, { "epoch": 0.06044660325170343, "grad_norm": 2.494875907897949, "learning_rate": 9.975733803723631e-06, "loss": 0.3713, "step": 1456 }, { "epoch": 0.06048811877591477, "grad_norm": 2.425668716430664, "learning_rate": 9.975667601719502e-06, "loss": 0.4633, "step": 1457 }, { "epoch": 0.0605296343001261, "grad_norm": 3.207099199295044, "learning_rate": 9.975601309753866e-06, "loss": 0.5774, "step": 1458 }, { "epoch": 0.060571149824337436, "grad_norm": 3.0462872982025146, "learning_rate": 9.975534927827926e-06, "loss": 0.4391, "step": 1459 }, { "epoch": 0.060612665348548776, "grad_norm": 3.005115270614624, "learning_rate": 9.97546845594288e-06, "loss": 0.4915, "step": 1460 }, { "epoch": 0.06065418087276011, "grad_norm": 2.5711116790771484, "learning_rate": 9.975401894099933e-06, "loss": 0.5978, "step": 1461 }, { "epoch": 0.06069569639697144, "grad_norm": 2.363044023513794, "learning_rate": 9.975335242300281e-06, "loss": 0.5872, "step": 1462 }, { "epoch": 0.060737211921182775, "grad_norm": 3.022514581680298, "learning_rate": 9.975268500545137e-06, "loss": 0.4724, "step": 1463 }, { "epoch": 0.060778727445394115, "grad_norm": 2.870847702026367, "learning_rate": 9.975201668835703e-06, "loss": 0.4903, "step": 1464 }, { "epoch": 0.06082024296960545, "grad_norm": 3.3923370838165283, "learning_rate": 9.975134747173189e-06, "loss": 0.4491, "step": 1465 }, { "epoch": 0.06086175849381678, "grad_norm": 2.631991147994995, "learning_rate": 9.975067735558805e-06, "loss": 0.4723, "step": 1466 }, { "epoch": 0.060903274018028114, "grad_norm": 3.3237600326538086, "learning_rate": 9.975000633993761e-06, "loss": 0.5114, "step": 1467 }, { "epoch": 0.060944789542239454, "grad_norm": 3.053476572036743, "learning_rate": 9.974933442479273e-06, "loss": 0.5873, "step": 1468 }, { "epoch": 0.06098630506645079, "grad_norm": 3.0409770011901855, "learning_rate": 9.974866161016556e-06, "loss": 0.6969, "step": 1469 }, { "epoch": 0.06102782059066212, "grad_norm": 2.1967275142669678, "learning_rate": 9.974798789606823e-06, "loss": 0.4611, "step": 1470 }, { "epoch": 0.06106933611487345, "grad_norm": 2.4310600757598877, "learning_rate": 9.974731328251294e-06, "loss": 0.499, "step": 1471 }, { "epoch": 0.06111085163908479, "grad_norm": 2.596785545349121, "learning_rate": 9.97466377695119e-06, "loss": 0.5449, "step": 1472 }, { "epoch": 0.061152367163296126, "grad_norm": 2.8181312084198, "learning_rate": 9.974596135707728e-06, "loss": 0.5048, "step": 1473 }, { "epoch": 0.06119388268750746, "grad_norm": 3.0187408924102783, "learning_rate": 9.974528404522137e-06, "loss": 0.5459, "step": 1474 }, { "epoch": 0.06123539821171879, "grad_norm": 3.0300307273864746, "learning_rate": 9.974460583395638e-06, "loss": 0.6117, "step": 1475 }, { "epoch": 0.06127691373593013, "grad_norm": 4.435058116912842, "learning_rate": 9.974392672329456e-06, "loss": 0.414, "step": 1476 }, { "epoch": 0.061318429260141465, "grad_norm": 2.478430986404419, "learning_rate": 9.974324671324823e-06, "loss": 0.5961, "step": 1477 }, { "epoch": 0.0613599447843528, "grad_norm": 2.5223307609558105, "learning_rate": 9.974256580382966e-06, "loss": 0.4817, "step": 1478 }, { "epoch": 0.06140146030856413, "grad_norm": 2.3044686317443848, "learning_rate": 9.974188399505116e-06, "loss": 0.5035, "step": 1479 }, { "epoch": 0.06144297583277547, "grad_norm": 2.7016382217407227, "learning_rate": 9.974120128692505e-06, "loss": 0.4199, "step": 1480 }, { "epoch": 0.0614844913569868, "grad_norm": 2.6520345211029053, "learning_rate": 9.97405176794637e-06, "loss": 0.5616, "step": 1481 }, { "epoch": 0.061526006881198136, "grad_norm": 2.4538726806640625, "learning_rate": 9.973983317267944e-06, "loss": 0.5166, "step": 1482 }, { "epoch": 0.06156752240540947, "grad_norm": 2.7738821506500244, "learning_rate": 9.973914776658466e-06, "loss": 0.4888, "step": 1483 }, { "epoch": 0.06160903792962081, "grad_norm": 2.795552968978882, "learning_rate": 9.973846146119175e-06, "loss": 0.5304, "step": 1484 }, { "epoch": 0.06165055345383214, "grad_norm": 3.0464162826538086, "learning_rate": 9.973777425651315e-06, "loss": 0.5184, "step": 1485 }, { "epoch": 0.061692068978043475, "grad_norm": 2.627384662628174, "learning_rate": 9.973708615256121e-06, "loss": 0.5318, "step": 1486 }, { "epoch": 0.061733584502254815, "grad_norm": 2.4823977947235107, "learning_rate": 9.973639714934844e-06, "loss": 0.6223, "step": 1487 }, { "epoch": 0.06177510002646615, "grad_norm": 5.306682109832764, "learning_rate": 9.973570724688728e-06, "loss": 0.5104, "step": 1488 }, { "epoch": 0.06181661555067748, "grad_norm": 2.7875001430511475, "learning_rate": 9.97350164451902e-06, "loss": 0.4667, "step": 1489 }, { "epoch": 0.061858131074888814, "grad_norm": 2.8461670875549316, "learning_rate": 9.973432474426968e-06, "loss": 0.4843, "step": 1490 }, { "epoch": 0.061899646599100154, "grad_norm": 2.660881519317627, "learning_rate": 9.973363214413823e-06, "loss": 0.5422, "step": 1491 }, { "epoch": 0.06194116212331149, "grad_norm": 3.4799578189849854, "learning_rate": 9.973293864480837e-06, "loss": 0.5517, "step": 1492 }, { "epoch": 0.06198267764752282, "grad_norm": 2.595547676086426, "learning_rate": 9.973224424629265e-06, "loss": 0.5487, "step": 1493 }, { "epoch": 0.06202419317173415, "grad_norm": 2.5593252182006836, "learning_rate": 9.973154894860362e-06, "loss": 0.5784, "step": 1494 }, { "epoch": 0.06206570869594549, "grad_norm": 2.7151973247528076, "learning_rate": 9.973085275175385e-06, "loss": 0.4182, "step": 1495 }, { "epoch": 0.062107224220156826, "grad_norm": 2.5789289474487305, "learning_rate": 9.973015565575595e-06, "loss": 0.4657, "step": 1496 }, { "epoch": 0.06214873974436816, "grad_norm": 3.2341978549957275, "learning_rate": 9.972945766062248e-06, "loss": 0.668, "step": 1497 }, { "epoch": 0.06219025526857949, "grad_norm": 2.4677531719207764, "learning_rate": 9.972875876636607e-06, "loss": 0.5756, "step": 1498 }, { "epoch": 0.06223177079279083, "grad_norm": 2.505566120147705, "learning_rate": 9.97280589729994e-06, "loss": 0.5343, "step": 1499 }, { "epoch": 0.062273286317002165, "grad_norm": 2.9009552001953125, "learning_rate": 9.972735828053506e-06, "loss": 0.5114, "step": 1500 }, { "epoch": 0.0623148018412135, "grad_norm": 3.488723039627075, "learning_rate": 9.972665668898577e-06, "loss": 0.5393, "step": 1501 }, { "epoch": 0.06235631736542483, "grad_norm": 2.506131410598755, "learning_rate": 9.972595419836418e-06, "loss": 0.563, "step": 1502 }, { "epoch": 0.06239783288963617, "grad_norm": 2.7739999294281006, "learning_rate": 9.972525080868302e-06, "loss": 0.4383, "step": 1503 }, { "epoch": 0.062439348413847504, "grad_norm": 2.7123961448669434, "learning_rate": 9.972454651995497e-06, "loss": 0.5367, "step": 1504 }, { "epoch": 0.06248086393805884, "grad_norm": 2.6540908813476562, "learning_rate": 9.972384133219281e-06, "loss": 0.5462, "step": 1505 }, { "epoch": 0.06252237946227017, "grad_norm": 2.1364731788635254, "learning_rate": 9.972313524540926e-06, "loss": 0.4695, "step": 1506 }, { "epoch": 0.0625638949864815, "grad_norm": 3.1761627197265625, "learning_rate": 9.97224282596171e-06, "loss": 0.4896, "step": 1507 }, { "epoch": 0.06260541051069284, "grad_norm": 2.8337905406951904, "learning_rate": 9.97217203748291e-06, "loss": 0.6219, "step": 1508 }, { "epoch": 0.06264692603490418, "grad_norm": 2.9085700511932373, "learning_rate": 9.972101159105807e-06, "loss": 0.6931, "step": 1509 }, { "epoch": 0.06268844155911552, "grad_norm": 2.5976884365081787, "learning_rate": 9.97203019083168e-06, "loss": 0.5361, "step": 1510 }, { "epoch": 0.06272995708332685, "grad_norm": 2.642961025238037, "learning_rate": 9.971959132661816e-06, "loss": 0.5124, "step": 1511 }, { "epoch": 0.06277147260753818, "grad_norm": 3.0177981853485107, "learning_rate": 9.971887984597498e-06, "loss": 0.6592, "step": 1512 }, { "epoch": 0.06281298813174951, "grad_norm": 2.303459644317627, "learning_rate": 9.971816746640009e-06, "loss": 0.3862, "step": 1513 }, { "epoch": 0.06285450365596085, "grad_norm": 2.9557526111602783, "learning_rate": 9.971745418790644e-06, "loss": 0.6114, "step": 1514 }, { "epoch": 0.06289601918017218, "grad_norm": 2.8554418087005615, "learning_rate": 9.971674001050687e-06, "loss": 0.468, "step": 1515 }, { "epoch": 0.06293753470438353, "grad_norm": 2.788081645965576, "learning_rate": 9.97160249342143e-06, "loss": 0.5387, "step": 1516 }, { "epoch": 0.06297905022859486, "grad_norm": 2.8725221157073975, "learning_rate": 9.97153089590417e-06, "loss": 0.5737, "step": 1517 }, { "epoch": 0.06302056575280619, "grad_norm": 3.1464600563049316, "learning_rate": 9.971459208500196e-06, "loss": 0.5403, "step": 1518 }, { "epoch": 0.06306208127701753, "grad_norm": 2.7299537658691406, "learning_rate": 9.971387431210808e-06, "loss": 0.4839, "step": 1519 }, { "epoch": 0.06310359680122886, "grad_norm": 3.1074812412261963, "learning_rate": 9.9713155640373e-06, "loss": 0.5671, "step": 1520 }, { "epoch": 0.06314511232544019, "grad_norm": 2.488659143447876, "learning_rate": 9.971243606980975e-06, "loss": 0.5001, "step": 1521 }, { "epoch": 0.06318662784965152, "grad_norm": 3.350057363510132, "learning_rate": 9.971171560043134e-06, "loss": 0.5185, "step": 1522 }, { "epoch": 0.06322814337386286, "grad_norm": 2.730184316635132, "learning_rate": 9.971099423225076e-06, "loss": 0.4934, "step": 1523 }, { "epoch": 0.0632696588980742, "grad_norm": 3.1269400119781494, "learning_rate": 9.97102719652811e-06, "loss": 0.4758, "step": 1524 }, { "epoch": 0.06331117442228554, "grad_norm": 3.899235725402832, "learning_rate": 9.970954879953539e-06, "loss": 0.702, "step": 1525 }, { "epoch": 0.06335268994649687, "grad_norm": 2.816473960876465, "learning_rate": 9.97088247350267e-06, "loss": 0.5591, "step": 1526 }, { "epoch": 0.0633942054707082, "grad_norm": 2.8133256435394287, "learning_rate": 9.970809977176814e-06, "loss": 0.5181, "step": 1527 }, { "epoch": 0.06343572099491954, "grad_norm": 3.0403764247894287, "learning_rate": 9.970737390977282e-06, "loss": 0.594, "step": 1528 }, { "epoch": 0.06347723651913087, "grad_norm": 3.2810113430023193, "learning_rate": 9.970664714905383e-06, "loss": 0.5996, "step": 1529 }, { "epoch": 0.0635187520433422, "grad_norm": 2.345338821411133, "learning_rate": 9.970591948962433e-06, "loss": 0.5231, "step": 1530 }, { "epoch": 0.06356026756755354, "grad_norm": 2.747661590576172, "learning_rate": 9.970519093149749e-06, "loss": 0.5873, "step": 1531 }, { "epoch": 0.06360178309176488, "grad_norm": 3.192495822906494, "learning_rate": 9.970446147468647e-06, "loss": 0.5481, "step": 1532 }, { "epoch": 0.06364329861597622, "grad_norm": 2.6930863857269287, "learning_rate": 9.970373111920447e-06, "loss": 0.5051, "step": 1533 }, { "epoch": 0.06368481414018755, "grad_norm": 2.943619966506958, "learning_rate": 9.970299986506467e-06, "loss": 0.7414, "step": 1534 }, { "epoch": 0.06372632966439888, "grad_norm": 2.9413630962371826, "learning_rate": 9.970226771228031e-06, "loss": 0.5257, "step": 1535 }, { "epoch": 0.06376784518861021, "grad_norm": 2.720374822616577, "learning_rate": 9.970153466086464e-06, "loss": 0.5725, "step": 1536 }, { "epoch": 0.06380936071282155, "grad_norm": 2.4574081897735596, "learning_rate": 9.970080071083086e-06, "loss": 0.4796, "step": 1537 }, { "epoch": 0.06385087623703288, "grad_norm": 2.71346378326416, "learning_rate": 9.97000658621923e-06, "loss": 0.6819, "step": 1538 }, { "epoch": 0.06389239176124423, "grad_norm": 2.511146306991577, "learning_rate": 9.969933011496224e-06, "loss": 0.5151, "step": 1539 }, { "epoch": 0.06393390728545556, "grad_norm": 3.0528087615966797, "learning_rate": 9.969859346915395e-06, "loss": 0.6193, "step": 1540 }, { "epoch": 0.0639754228096669, "grad_norm": 3.4396705627441406, "learning_rate": 9.969785592478076e-06, "loss": 0.4889, "step": 1541 }, { "epoch": 0.06401693833387823, "grad_norm": 2.7453839778900146, "learning_rate": 9.969711748185602e-06, "loss": 0.3073, "step": 1542 }, { "epoch": 0.06405845385808956, "grad_norm": 3.4059255123138428, "learning_rate": 9.969637814039308e-06, "loss": 0.5281, "step": 1543 }, { "epoch": 0.06409996938230089, "grad_norm": 3.189826488494873, "learning_rate": 9.96956379004053e-06, "loss": 0.5782, "step": 1544 }, { "epoch": 0.06414148490651223, "grad_norm": 3.049079179763794, "learning_rate": 9.969489676190603e-06, "loss": 0.6386, "step": 1545 }, { "epoch": 0.06418300043072356, "grad_norm": 2.896681547164917, "learning_rate": 9.969415472490873e-06, "loss": 0.5555, "step": 1546 }, { "epoch": 0.0642245159549349, "grad_norm": 2.5734615325927734, "learning_rate": 9.969341178942678e-06, "loss": 0.556, "step": 1547 }, { "epoch": 0.06426603147914624, "grad_norm": 2.906558036804199, "learning_rate": 9.969266795547364e-06, "loss": 0.7487, "step": 1548 }, { "epoch": 0.06430754700335757, "grad_norm": 2.240534543991089, "learning_rate": 9.969192322306271e-06, "loss": 0.4928, "step": 1549 }, { "epoch": 0.0643490625275689, "grad_norm": 2.6120316982269287, "learning_rate": 9.96911775922075e-06, "loss": 0.4258, "step": 1550 }, { "epoch": 0.06439057805178024, "grad_norm": 2.485964775085449, "learning_rate": 9.969043106292149e-06, "loss": 0.4779, "step": 1551 }, { "epoch": 0.06443209357599157, "grad_norm": 3.2619528770446777, "learning_rate": 9.968968363521814e-06, "loss": 0.5084, "step": 1552 }, { "epoch": 0.0644736091002029, "grad_norm": 2.786620616912842, "learning_rate": 9.968893530911101e-06, "loss": 0.491, "step": 1553 }, { "epoch": 0.06451512462441424, "grad_norm": 3.1124038696289062, "learning_rate": 9.968818608461358e-06, "loss": 0.4366, "step": 1554 }, { "epoch": 0.06455664014862558, "grad_norm": 2.6961915493011475, "learning_rate": 9.968743596173946e-06, "loss": 0.5843, "step": 1555 }, { "epoch": 0.06459815567283692, "grad_norm": 3.1352603435516357, "learning_rate": 9.968668494050216e-06, "loss": 0.5978, "step": 1556 }, { "epoch": 0.06463967119704825, "grad_norm": 2.2359936237335205, "learning_rate": 9.968593302091526e-06, "loss": 0.396, "step": 1557 }, { "epoch": 0.06468118672125958, "grad_norm": 3.3356285095214844, "learning_rate": 9.968518020299238e-06, "loss": 0.6092, "step": 1558 }, { "epoch": 0.06472270224547091, "grad_norm": 2.80210018157959, "learning_rate": 9.968442648674713e-06, "loss": 0.46, "step": 1559 }, { "epoch": 0.06476421776968225, "grad_norm": 3.0204896926879883, "learning_rate": 9.968367187219312e-06, "loss": 0.3802, "step": 1560 }, { "epoch": 0.06480573329389358, "grad_norm": 2.60347318649292, "learning_rate": 9.968291635934401e-06, "loss": 0.5564, "step": 1561 }, { "epoch": 0.06484724881810491, "grad_norm": 2.4516069889068604, "learning_rate": 9.968215994821345e-06, "loss": 0.5219, "step": 1562 }, { "epoch": 0.06488876434231626, "grad_norm": 3.0182018280029297, "learning_rate": 9.968140263881512e-06, "loss": 0.4811, "step": 1563 }, { "epoch": 0.0649302798665276, "grad_norm": 3.2979753017425537, "learning_rate": 9.968064443116269e-06, "loss": 0.6349, "step": 1564 }, { "epoch": 0.06497179539073893, "grad_norm": 2.527456521987915, "learning_rate": 9.96798853252699e-06, "loss": 0.5602, "step": 1565 }, { "epoch": 0.06501331091495026, "grad_norm": 2.6456189155578613, "learning_rate": 9.967912532115048e-06, "loss": 0.4947, "step": 1566 }, { "epoch": 0.06505482643916159, "grad_norm": 3.126300811767578, "learning_rate": 9.967836441881815e-06, "loss": 0.4092, "step": 1567 }, { "epoch": 0.06509634196337293, "grad_norm": 4.906215190887451, "learning_rate": 9.967760261828667e-06, "loss": 0.5209, "step": 1568 }, { "epoch": 0.06513785748758426, "grad_norm": 2.9258382320404053, "learning_rate": 9.96768399195698e-06, "loss": 0.576, "step": 1569 }, { "epoch": 0.0651793730117956, "grad_norm": 2.0635244846343994, "learning_rate": 9.967607632268137e-06, "loss": 0.5561, "step": 1570 }, { "epoch": 0.06522088853600694, "grad_norm": 2.560473680496216, "learning_rate": 9.967531182763515e-06, "loss": 0.6853, "step": 1571 }, { "epoch": 0.06526240406021827, "grad_norm": 3.1296226978302, "learning_rate": 9.967454643444496e-06, "loss": 0.5283, "step": 1572 }, { "epoch": 0.0653039195844296, "grad_norm": 3.525744915008545, "learning_rate": 9.967378014312468e-06, "loss": 0.4793, "step": 1573 }, { "epoch": 0.06534543510864094, "grad_norm": 2.859269618988037, "learning_rate": 9.967301295368811e-06, "loss": 0.5039, "step": 1574 }, { "epoch": 0.06538695063285227, "grad_norm": 3.4127166271209717, "learning_rate": 9.967224486614916e-06, "loss": 0.5062, "step": 1575 }, { "epoch": 0.0654284661570636, "grad_norm": 2.4808528423309326, "learning_rate": 9.967147588052171e-06, "loss": 0.5701, "step": 1576 }, { "epoch": 0.06546998168127494, "grad_norm": 2.7466366291046143, "learning_rate": 9.967070599681967e-06, "loss": 0.6196, "step": 1577 }, { "epoch": 0.06551149720548628, "grad_norm": 2.5295045375823975, "learning_rate": 9.966993521505694e-06, "loss": 0.4422, "step": 1578 }, { "epoch": 0.06555301272969762, "grad_norm": 2.576122522354126, "learning_rate": 9.966916353524745e-06, "loss": 0.542, "step": 1579 }, { "epoch": 0.06559452825390895, "grad_norm": 3.4166083335876465, "learning_rate": 9.96683909574052e-06, "loss": 0.671, "step": 1580 }, { "epoch": 0.06563604377812028, "grad_norm": 3.1755244731903076, "learning_rate": 9.96676174815441e-06, "loss": 0.5253, "step": 1581 }, { "epoch": 0.06567755930233161, "grad_norm": 2.425067901611328, "learning_rate": 9.966684310767815e-06, "loss": 0.5171, "step": 1582 }, { "epoch": 0.06571907482654295, "grad_norm": 2.9098703861236572, "learning_rate": 9.966606783582138e-06, "loss": 0.8359, "step": 1583 }, { "epoch": 0.06576059035075428, "grad_norm": 2.636916160583496, "learning_rate": 9.966529166598778e-06, "loss": 0.5438, "step": 1584 }, { "epoch": 0.06580210587496561, "grad_norm": 3.201064109802246, "learning_rate": 9.96645145981914e-06, "loss": 0.5584, "step": 1585 }, { "epoch": 0.06584362139917696, "grad_norm": 2.7960588932037354, "learning_rate": 9.966373663244629e-06, "loss": 0.5354, "step": 1586 }, { "epoch": 0.0658851369233883, "grad_norm": 2.4074513912200928, "learning_rate": 9.966295776876648e-06, "loss": 0.4735, "step": 1587 }, { "epoch": 0.06592665244759963, "grad_norm": 2.4413723945617676, "learning_rate": 9.966217800716609e-06, "loss": 0.4614, "step": 1588 }, { "epoch": 0.06596816797181096, "grad_norm": 3.3442928791046143, "learning_rate": 9.96613973476592e-06, "loss": 0.4695, "step": 1589 }, { "epoch": 0.06600968349602229, "grad_norm": 2.7426156997680664, "learning_rate": 9.966061579025996e-06, "loss": 0.6367, "step": 1590 }, { "epoch": 0.06605119902023363, "grad_norm": 2.823260545730591, "learning_rate": 9.965983333498246e-06, "loss": 0.5848, "step": 1591 }, { "epoch": 0.06609271454444496, "grad_norm": 2.2422385215759277, "learning_rate": 9.965904998184084e-06, "loss": 0.5351, "step": 1592 }, { "epoch": 0.0661342300686563, "grad_norm": 2.669462203979492, "learning_rate": 9.96582657308493e-06, "loss": 0.5547, "step": 1593 }, { "epoch": 0.06617574559286764, "grad_norm": 2.3870651721954346, "learning_rate": 9.9657480582022e-06, "loss": 0.3901, "step": 1594 }, { "epoch": 0.06621726111707897, "grad_norm": 3.199615001678467, "learning_rate": 9.965669453537314e-06, "loss": 0.5009, "step": 1595 }, { "epoch": 0.0662587766412903, "grad_norm": 3.656731128692627, "learning_rate": 9.965590759091694e-06, "loss": 0.5901, "step": 1596 }, { "epoch": 0.06630029216550164, "grad_norm": 2.8083293437957764, "learning_rate": 9.96551197486676e-06, "loss": 0.523, "step": 1597 }, { "epoch": 0.06634180768971297, "grad_norm": 3.391631603240967, "learning_rate": 9.96543310086394e-06, "loss": 0.6436, "step": 1598 }, { "epoch": 0.0663833232139243, "grad_norm": 2.418911933898926, "learning_rate": 9.965354137084656e-06, "loss": 0.6238, "step": 1599 }, { "epoch": 0.06642483873813564, "grad_norm": 2.502007246017456, "learning_rate": 9.96527508353034e-06, "loss": 0.5769, "step": 1600 }, { "epoch": 0.06646635426234698, "grad_norm": 2.8731844425201416, "learning_rate": 9.96519594020242e-06, "loss": 0.6438, "step": 1601 }, { "epoch": 0.06650786978655832, "grad_norm": 2.607666015625, "learning_rate": 9.965116707102324e-06, "loss": 0.5399, "step": 1602 }, { "epoch": 0.06654938531076965, "grad_norm": 2.568575859069824, "learning_rate": 9.965037384231487e-06, "loss": 0.5142, "step": 1603 }, { "epoch": 0.06659090083498098, "grad_norm": 2.78145694732666, "learning_rate": 9.964957971591345e-06, "loss": 0.5928, "step": 1604 }, { "epoch": 0.06663241635919231, "grad_norm": 2.9055657386779785, "learning_rate": 9.96487846918333e-06, "loss": 0.589, "step": 1605 }, { "epoch": 0.06667393188340365, "grad_norm": 3.2268500328063965, "learning_rate": 9.96479887700888e-06, "loss": 0.4811, "step": 1606 }, { "epoch": 0.06671544740761498, "grad_norm": 2.6471991539001465, "learning_rate": 9.96471919506944e-06, "loss": 0.5533, "step": 1607 }, { "epoch": 0.06675696293182631, "grad_norm": 2.8600547313690186, "learning_rate": 9.964639423366442e-06, "loss": 0.4771, "step": 1608 }, { "epoch": 0.06679847845603766, "grad_norm": 2.613184928894043, "learning_rate": 9.964559561901334e-06, "loss": 0.5728, "step": 1609 }, { "epoch": 0.066839993980249, "grad_norm": 3.1031954288482666, "learning_rate": 9.964479610675557e-06, "loss": 0.6043, "step": 1610 }, { "epoch": 0.06688150950446033, "grad_norm": 2.939727544784546, "learning_rate": 9.96439956969056e-06, "loss": 0.5253, "step": 1611 }, { "epoch": 0.06692302502867166, "grad_norm": 2.90814208984375, "learning_rate": 9.964319438947785e-06, "loss": 0.5985, "step": 1612 }, { "epoch": 0.06696454055288299, "grad_norm": 3.4113969802856445, "learning_rate": 9.964239218448684e-06, "loss": 0.6452, "step": 1613 }, { "epoch": 0.06700605607709433, "grad_norm": 2.634145736694336, "learning_rate": 9.964158908194708e-06, "loss": 0.6149, "step": 1614 }, { "epoch": 0.06704757160130566, "grad_norm": 2.8088953495025635, "learning_rate": 9.96407850818731e-06, "loss": 0.4996, "step": 1615 }, { "epoch": 0.06708908712551699, "grad_norm": 2.386127233505249, "learning_rate": 9.963998018427938e-06, "loss": 0.5108, "step": 1616 }, { "epoch": 0.06713060264972834, "grad_norm": 2.6863346099853516, "learning_rate": 9.963917438918054e-06, "loss": 0.3784, "step": 1617 }, { "epoch": 0.06717211817393967, "grad_norm": 2.2465522289276123, "learning_rate": 9.96383676965911e-06, "loss": 0.4138, "step": 1618 }, { "epoch": 0.067213633698151, "grad_norm": 2.315047264099121, "learning_rate": 9.963756010652568e-06, "loss": 0.5152, "step": 1619 }, { "epoch": 0.06725514922236234, "grad_norm": 3.1479475498199463, "learning_rate": 9.963675161899886e-06, "loss": 0.5772, "step": 1620 }, { "epoch": 0.06729666474657367, "grad_norm": 2.853854179382324, "learning_rate": 9.963594223402527e-06, "loss": 0.5305, "step": 1621 }, { "epoch": 0.067338180270785, "grad_norm": 2.7168843746185303, "learning_rate": 9.963513195161952e-06, "loss": 0.4938, "step": 1622 }, { "epoch": 0.06737969579499634, "grad_norm": 4.014892101287842, "learning_rate": 9.963432077179629e-06, "loss": 0.6372, "step": 1623 }, { "epoch": 0.06742121131920768, "grad_norm": 2.7251460552215576, "learning_rate": 9.963350869457023e-06, "loss": 0.6444, "step": 1624 }, { "epoch": 0.06746272684341902, "grad_norm": 3.0040276050567627, "learning_rate": 9.963269571995605e-06, "loss": 0.5825, "step": 1625 }, { "epoch": 0.06750424236763035, "grad_norm": 2.4657344818115234, "learning_rate": 9.96318818479684e-06, "loss": 0.4229, "step": 1626 }, { "epoch": 0.06754575789184168, "grad_norm": 3.3430349826812744, "learning_rate": 9.963106707862203e-06, "loss": 0.5262, "step": 1627 }, { "epoch": 0.06758727341605301, "grad_norm": 2.440906286239624, "learning_rate": 9.963025141193165e-06, "loss": 0.6, "step": 1628 }, { "epoch": 0.06762878894026435, "grad_norm": 2.583533763885498, "learning_rate": 9.962943484791203e-06, "loss": 0.5589, "step": 1629 }, { "epoch": 0.06767030446447568, "grad_norm": 2.5935070514678955, "learning_rate": 9.962861738657791e-06, "loss": 0.4573, "step": 1630 }, { "epoch": 0.06771181998868701, "grad_norm": 2.3182709217071533, "learning_rate": 9.96277990279441e-06, "loss": 0.4764, "step": 1631 }, { "epoch": 0.06775333551289836, "grad_norm": 3.5041329860687256, "learning_rate": 9.962697977202538e-06, "loss": 0.6386, "step": 1632 }, { "epoch": 0.0677948510371097, "grad_norm": 2.717890501022339, "learning_rate": 9.962615961883655e-06, "loss": 0.482, "step": 1633 }, { "epoch": 0.06783636656132103, "grad_norm": 3.0175349712371826, "learning_rate": 9.962533856839246e-06, "loss": 0.617, "step": 1634 }, { "epoch": 0.06787788208553236, "grad_norm": 2.7782487869262695, "learning_rate": 9.962451662070793e-06, "loss": 0.5308, "step": 1635 }, { "epoch": 0.06791939760974369, "grad_norm": 2.8509178161621094, "learning_rate": 9.962369377579786e-06, "loss": 0.4647, "step": 1636 }, { "epoch": 0.06796091313395503, "grad_norm": 2.7919864654541016, "learning_rate": 9.962287003367709e-06, "loss": 0.6458, "step": 1637 }, { "epoch": 0.06800242865816636, "grad_norm": 2.406826972961426, "learning_rate": 9.962204539436053e-06, "loss": 0.576, "step": 1638 }, { "epoch": 0.06804394418237769, "grad_norm": 2.5673987865448, "learning_rate": 9.962121985786309e-06, "loss": 0.4823, "step": 1639 }, { "epoch": 0.06808545970658904, "grad_norm": 2.791191577911377, "learning_rate": 9.962039342419968e-06, "loss": 0.4952, "step": 1640 }, { "epoch": 0.06812697523080037, "grad_norm": 2.7369544506073, "learning_rate": 9.961956609338526e-06, "loss": 0.4779, "step": 1641 }, { "epoch": 0.0681684907550117, "grad_norm": 2.597504138946533, "learning_rate": 9.961873786543478e-06, "loss": 0.5917, "step": 1642 }, { "epoch": 0.06821000627922304, "grad_norm": 2.719818115234375, "learning_rate": 9.961790874036323e-06, "loss": 0.5372, "step": 1643 }, { "epoch": 0.06825152180343437, "grad_norm": 2.7154624462127686, "learning_rate": 9.961707871818558e-06, "loss": 0.4547, "step": 1644 }, { "epoch": 0.0682930373276457, "grad_norm": 2.7766754627227783, "learning_rate": 9.961624779891685e-06, "loss": 0.4436, "step": 1645 }, { "epoch": 0.06833455285185704, "grad_norm": 2.8404910564422607, "learning_rate": 9.961541598257206e-06, "loss": 0.551, "step": 1646 }, { "epoch": 0.06837606837606838, "grad_norm": 2.6792123317718506, "learning_rate": 9.961458326916624e-06, "loss": 0.6826, "step": 1647 }, { "epoch": 0.06841758390027972, "grad_norm": 2.7075648307800293, "learning_rate": 9.961374965871446e-06, "loss": 0.4373, "step": 1648 }, { "epoch": 0.06845909942449105, "grad_norm": 3.263911485671997, "learning_rate": 9.961291515123178e-06, "loss": 0.603, "step": 1649 }, { "epoch": 0.06850061494870238, "grad_norm": 2.469589948654175, "learning_rate": 9.96120797467333e-06, "loss": 0.575, "step": 1650 }, { "epoch": 0.06854213047291371, "grad_norm": 2.7604739665985107, "learning_rate": 9.961124344523412e-06, "loss": 0.653, "step": 1651 }, { "epoch": 0.06858364599712505, "grad_norm": 3.334516763687134, "learning_rate": 9.961040624674934e-06, "loss": 0.5761, "step": 1652 }, { "epoch": 0.06862516152133638, "grad_norm": 2.6524975299835205, "learning_rate": 9.960956815129414e-06, "loss": 0.4858, "step": 1653 }, { "epoch": 0.06866667704554771, "grad_norm": 2.4118072986602783, "learning_rate": 9.960872915888364e-06, "loss": 0.409, "step": 1654 }, { "epoch": 0.06870819256975906, "grad_norm": 2.793879508972168, "learning_rate": 9.960788926953303e-06, "loss": 0.5424, "step": 1655 }, { "epoch": 0.0687497080939704, "grad_norm": 2.8979599475860596, "learning_rate": 9.960704848325747e-06, "loss": 0.4825, "step": 1656 }, { "epoch": 0.06879122361818173, "grad_norm": 3.3603107929229736, "learning_rate": 9.960620680007215e-06, "loss": 0.5045, "step": 1657 }, { "epoch": 0.06883273914239306, "grad_norm": 2.481863260269165, "learning_rate": 9.960536421999237e-06, "loss": 0.5066, "step": 1658 }, { "epoch": 0.06887425466660439, "grad_norm": 2.8055288791656494, "learning_rate": 9.960452074303327e-06, "loss": 0.5638, "step": 1659 }, { "epoch": 0.06891577019081573, "grad_norm": 3.0022237300872803, "learning_rate": 9.960367636921014e-06, "loss": 0.4685, "step": 1660 }, { "epoch": 0.06895728571502706, "grad_norm": 3.456005811691284, "learning_rate": 9.960283109853824e-06, "loss": 0.4731, "step": 1661 }, { "epoch": 0.06899880123923839, "grad_norm": 2.9636666774749756, "learning_rate": 9.960198493103287e-06, "loss": 0.5268, "step": 1662 }, { "epoch": 0.06904031676344974, "grad_norm": 2.8466997146606445, "learning_rate": 9.96011378667093e-06, "loss": 0.6194, "step": 1663 }, { "epoch": 0.06908183228766107, "grad_norm": 3.1414012908935547, "learning_rate": 9.960028990558289e-06, "loss": 0.6038, "step": 1664 }, { "epoch": 0.0691233478118724, "grad_norm": 3.1048147678375244, "learning_rate": 9.959944104766893e-06, "loss": 0.6754, "step": 1665 }, { "epoch": 0.06916486333608374, "grad_norm": 2.4421474933624268, "learning_rate": 9.959859129298275e-06, "loss": 0.4733, "step": 1666 }, { "epoch": 0.06920637886029507, "grad_norm": 2.4234237670898438, "learning_rate": 9.959774064153977e-06, "loss": 0.446, "step": 1667 }, { "epoch": 0.0692478943845064, "grad_norm": 2.7856574058532715, "learning_rate": 9.959688909335533e-06, "loss": 0.5209, "step": 1668 }, { "epoch": 0.06928940990871774, "grad_norm": 3.045595169067383, "learning_rate": 9.959603664844487e-06, "loss": 0.6265, "step": 1669 }, { "epoch": 0.06933092543292907, "grad_norm": 2.2709598541259766, "learning_rate": 9.959518330682373e-06, "loss": 0.5765, "step": 1670 }, { "epoch": 0.06937244095714042, "grad_norm": 2.526381254196167, "learning_rate": 9.959432906850742e-06, "loss": 0.4072, "step": 1671 }, { "epoch": 0.06941395648135175, "grad_norm": 2.929678440093994, "learning_rate": 9.959347393351133e-06, "loss": 0.548, "step": 1672 }, { "epoch": 0.06945547200556308, "grad_norm": 2.9961631298065186, "learning_rate": 9.959261790185093e-06, "loss": 0.6298, "step": 1673 }, { "epoch": 0.06949698752977441, "grad_norm": 2.7507524490356445, "learning_rate": 9.959176097354171e-06, "loss": 0.3808, "step": 1674 }, { "epoch": 0.06953850305398575, "grad_norm": 3.3185722827911377, "learning_rate": 9.959090314859916e-06, "loss": 0.5949, "step": 1675 }, { "epoch": 0.06958001857819708, "grad_norm": 3.249709129333496, "learning_rate": 9.959004442703879e-06, "loss": 0.6056, "step": 1676 }, { "epoch": 0.06962153410240841, "grad_norm": 2.4414443969726562, "learning_rate": 9.958918480887612e-06, "loss": 0.5466, "step": 1677 }, { "epoch": 0.06966304962661976, "grad_norm": 3.5192902088165283, "learning_rate": 9.95883242941267e-06, "loss": 0.626, "step": 1678 }, { "epoch": 0.0697045651508311, "grad_norm": 2.8972411155700684, "learning_rate": 9.958746288280607e-06, "loss": 0.5929, "step": 1679 }, { "epoch": 0.06974608067504243, "grad_norm": 2.882434129714966, "learning_rate": 9.958660057492982e-06, "loss": 0.6278, "step": 1680 }, { "epoch": 0.06978759619925376, "grad_norm": 2.23990535736084, "learning_rate": 9.958573737051356e-06, "loss": 0.4505, "step": 1681 }, { "epoch": 0.06982911172346509, "grad_norm": 2.6981663703918457, "learning_rate": 9.958487326957285e-06, "loss": 0.6303, "step": 1682 }, { "epoch": 0.06987062724767643, "grad_norm": 2.4420604705810547, "learning_rate": 9.958400827212335e-06, "loss": 0.4298, "step": 1683 }, { "epoch": 0.06991214277188776, "grad_norm": 2.3547308444976807, "learning_rate": 9.95831423781807e-06, "loss": 0.4713, "step": 1684 }, { "epoch": 0.06995365829609909, "grad_norm": 2.7882838249206543, "learning_rate": 9.958227558776052e-06, "loss": 0.6583, "step": 1685 }, { "epoch": 0.06999517382031044, "grad_norm": 2.852262258529663, "learning_rate": 9.958140790087854e-06, "loss": 0.4058, "step": 1686 }, { "epoch": 0.07003668934452177, "grad_norm": 2.6643126010894775, "learning_rate": 9.958053931755038e-06, "loss": 0.4872, "step": 1687 }, { "epoch": 0.0700782048687331, "grad_norm": 2.6493775844573975, "learning_rate": 9.95796698377918e-06, "loss": 0.5734, "step": 1688 }, { "epoch": 0.07011972039294444, "grad_norm": 2.5892841815948486, "learning_rate": 9.95787994616185e-06, "loss": 0.5932, "step": 1689 }, { "epoch": 0.07016123591715577, "grad_norm": 2.634521484375, "learning_rate": 9.95779281890462e-06, "loss": 0.5876, "step": 1690 }, { "epoch": 0.0702027514413671, "grad_norm": 2.6689555644989014, "learning_rate": 9.957705602009069e-06, "loss": 0.5397, "step": 1691 }, { "epoch": 0.07024426696557844, "grad_norm": 2.4935171604156494, "learning_rate": 9.95761829547677e-06, "loss": 0.4291, "step": 1692 }, { "epoch": 0.07028578248978977, "grad_norm": 3.604530096054077, "learning_rate": 9.957530899309303e-06, "loss": 0.5047, "step": 1693 }, { "epoch": 0.07032729801400112, "grad_norm": 3.568268060684204, "learning_rate": 9.957443413508249e-06, "loss": 0.5167, "step": 1694 }, { "epoch": 0.07036881353821245, "grad_norm": 2.866187572479248, "learning_rate": 9.957355838075188e-06, "loss": 0.4961, "step": 1695 }, { "epoch": 0.07041032906242378, "grad_norm": 2.7316787242889404, "learning_rate": 9.957268173011707e-06, "loss": 0.5878, "step": 1696 }, { "epoch": 0.07045184458663512, "grad_norm": 2.6213555335998535, "learning_rate": 9.957180418319388e-06, "loss": 0.4882, "step": 1697 }, { "epoch": 0.07049336011084645, "grad_norm": 3.3009092807769775, "learning_rate": 9.957092573999816e-06, "loss": 0.4966, "step": 1698 }, { "epoch": 0.07053487563505778, "grad_norm": 2.7688751220703125, "learning_rate": 9.957004640054584e-06, "loss": 0.5208, "step": 1699 }, { "epoch": 0.07057639115926911, "grad_norm": 2.7467973232269287, "learning_rate": 9.95691661648528e-06, "loss": 0.4667, "step": 1700 }, { "epoch": 0.07061790668348046, "grad_norm": 2.4475600719451904, "learning_rate": 9.956828503293492e-06, "loss": 0.6445, "step": 1701 }, { "epoch": 0.0706594222076918, "grad_norm": 2.619866371154785, "learning_rate": 9.956740300480818e-06, "loss": 0.4909, "step": 1702 }, { "epoch": 0.07070093773190313, "grad_norm": 3.257035255432129, "learning_rate": 9.95665200804885e-06, "loss": 0.7014, "step": 1703 }, { "epoch": 0.07074245325611446, "grad_norm": 2.526355028152466, "learning_rate": 9.956563625999186e-06, "loss": 0.504, "step": 1704 }, { "epoch": 0.07078396878032579, "grad_norm": 2.8413639068603516, "learning_rate": 9.95647515433342e-06, "loss": 0.6659, "step": 1705 }, { "epoch": 0.07082548430453713, "grad_norm": 2.7353506088256836, "learning_rate": 9.956386593053157e-06, "loss": 0.5547, "step": 1706 }, { "epoch": 0.07086699982874846, "grad_norm": 2.4756031036376953, "learning_rate": 9.956297942159995e-06, "loss": 0.4743, "step": 1707 }, { "epoch": 0.07090851535295979, "grad_norm": 3.089055299758911, "learning_rate": 9.956209201655539e-06, "loss": 0.4573, "step": 1708 }, { "epoch": 0.07095003087717114, "grad_norm": 2.7023496627807617, "learning_rate": 9.956120371541391e-06, "loss": 0.6031, "step": 1709 }, { "epoch": 0.07099154640138247, "grad_norm": 2.3867034912109375, "learning_rate": 9.956031451819159e-06, "loss": 0.5909, "step": 1710 }, { "epoch": 0.0710330619255938, "grad_norm": 2.089561939239502, "learning_rate": 9.95594244249045e-06, "loss": 0.4686, "step": 1711 }, { "epoch": 0.07107457744980514, "grad_norm": 2.4017937183380127, "learning_rate": 9.955853343556872e-06, "loss": 0.4767, "step": 1712 }, { "epoch": 0.07111609297401647, "grad_norm": 2.3107264041900635, "learning_rate": 9.955764155020037e-06, "loss": 0.3542, "step": 1713 }, { "epoch": 0.0711576084982278, "grad_norm": 2.7459559440612793, "learning_rate": 9.955674876881557e-06, "loss": 0.3935, "step": 1714 }, { "epoch": 0.07119912402243914, "grad_norm": 3.063619375228882, "learning_rate": 9.955585509143048e-06, "loss": 0.553, "step": 1715 }, { "epoch": 0.07124063954665047, "grad_norm": 2.8994414806365967, "learning_rate": 9.955496051806126e-06, "loss": 0.5518, "step": 1716 }, { "epoch": 0.07128215507086182, "grad_norm": 2.6342341899871826, "learning_rate": 9.955406504872405e-06, "loss": 0.5295, "step": 1717 }, { "epoch": 0.07132367059507315, "grad_norm": 2.6584153175354004, "learning_rate": 9.955316868343508e-06, "loss": 0.4253, "step": 1718 }, { "epoch": 0.07136518611928448, "grad_norm": 2.723733425140381, "learning_rate": 9.955227142221051e-06, "loss": 0.5326, "step": 1719 }, { "epoch": 0.07140670164349582, "grad_norm": 2.5651657581329346, "learning_rate": 9.955137326506661e-06, "loss": 0.5021, "step": 1720 }, { "epoch": 0.07144821716770715, "grad_norm": 3.3075149059295654, "learning_rate": 9.95504742120196e-06, "loss": 0.6681, "step": 1721 }, { "epoch": 0.07148973269191848, "grad_norm": 2.333441972732544, "learning_rate": 9.954957426308573e-06, "loss": 0.6134, "step": 1722 }, { "epoch": 0.07153124821612981, "grad_norm": 2.52140474319458, "learning_rate": 9.954867341828127e-06, "loss": 0.6573, "step": 1723 }, { "epoch": 0.07157276374034115, "grad_norm": 2.5248732566833496, "learning_rate": 9.954777167762253e-06, "loss": 0.5296, "step": 1724 }, { "epoch": 0.0716142792645525, "grad_norm": 2.5936119556427, "learning_rate": 9.954686904112578e-06, "loss": 0.6806, "step": 1725 }, { "epoch": 0.07165579478876383, "grad_norm": 2.5019545555114746, "learning_rate": 9.954596550880735e-06, "loss": 0.4778, "step": 1726 }, { "epoch": 0.07169731031297516, "grad_norm": 2.630718946456909, "learning_rate": 9.95450610806836e-06, "loss": 0.5577, "step": 1727 }, { "epoch": 0.07173882583718649, "grad_norm": 2.438852310180664, "learning_rate": 9.954415575677086e-06, "loss": 0.4836, "step": 1728 }, { "epoch": 0.07178034136139783, "grad_norm": 2.6114771366119385, "learning_rate": 9.95432495370855e-06, "loss": 0.4967, "step": 1729 }, { "epoch": 0.07182185688560916, "grad_norm": 2.4340314865112305, "learning_rate": 9.954234242164392e-06, "loss": 0.5629, "step": 1730 }, { "epoch": 0.07186337240982049, "grad_norm": 3.405306100845337, "learning_rate": 9.95414344104625e-06, "loss": 0.5805, "step": 1731 }, { "epoch": 0.07190488793403184, "grad_norm": 2.752594232559204, "learning_rate": 9.954052550355767e-06, "loss": 0.6637, "step": 1732 }, { "epoch": 0.07194640345824317, "grad_norm": 3.375434637069702, "learning_rate": 9.953961570094587e-06, "loss": 0.4215, "step": 1733 }, { "epoch": 0.0719879189824545, "grad_norm": 2.84220027923584, "learning_rate": 9.953870500264351e-06, "loss": 0.4744, "step": 1734 }, { "epoch": 0.07202943450666584, "grad_norm": 2.8339085578918457, "learning_rate": 9.95377934086671e-06, "loss": 0.6667, "step": 1735 }, { "epoch": 0.07207095003087717, "grad_norm": 2.715125799179077, "learning_rate": 9.95368809190331e-06, "loss": 0.5698, "step": 1736 }, { "epoch": 0.0721124655550885, "grad_norm": 3.2878494262695312, "learning_rate": 9.953596753375804e-06, "loss": 0.4429, "step": 1737 }, { "epoch": 0.07215398107929984, "grad_norm": 2.8864974975585938, "learning_rate": 9.95350532528584e-06, "loss": 0.5728, "step": 1738 }, { "epoch": 0.07219549660351117, "grad_norm": 3.53131365776062, "learning_rate": 9.95341380763507e-06, "loss": 0.5622, "step": 1739 }, { "epoch": 0.07223701212772252, "grad_norm": 2.509976625442505, "learning_rate": 9.953322200425153e-06, "loss": 0.5338, "step": 1740 }, { "epoch": 0.07227852765193385, "grad_norm": 2.686026096343994, "learning_rate": 9.95323050365774e-06, "loss": 0.5283, "step": 1741 }, { "epoch": 0.07232004317614518, "grad_norm": 2.9009547233581543, "learning_rate": 9.953138717334494e-06, "loss": 0.5758, "step": 1742 }, { "epoch": 0.07236155870035652, "grad_norm": 2.4895284175872803, "learning_rate": 9.953046841457072e-06, "loss": 0.4249, "step": 1743 }, { "epoch": 0.07240307422456785, "grad_norm": 2.6589128971099854, "learning_rate": 9.952954876027136e-06, "loss": 0.5177, "step": 1744 }, { "epoch": 0.07244458974877918, "grad_norm": 2.61650013923645, "learning_rate": 9.952862821046348e-06, "loss": 0.6063, "step": 1745 }, { "epoch": 0.07248610527299051, "grad_norm": 2.5056557655334473, "learning_rate": 9.952770676516372e-06, "loss": 0.5201, "step": 1746 }, { "epoch": 0.07252762079720185, "grad_norm": 3.031214475631714, "learning_rate": 9.952678442438875e-06, "loss": 0.584, "step": 1747 }, { "epoch": 0.0725691363214132, "grad_norm": 2.7796883583068848, "learning_rate": 9.952586118815524e-06, "loss": 0.7088, "step": 1748 }, { "epoch": 0.07261065184562453, "grad_norm": 3.0775420665740967, "learning_rate": 9.952493705647989e-06, "loss": 0.514, "step": 1749 }, { "epoch": 0.07265216736983586, "grad_norm": 2.701500415802002, "learning_rate": 9.95240120293794e-06, "loss": 0.4966, "step": 1750 }, { "epoch": 0.07269368289404719, "grad_norm": 2.2994370460510254, "learning_rate": 9.95230861068705e-06, "loss": 0.5448, "step": 1751 }, { "epoch": 0.07273519841825853, "grad_norm": 2.429629325866699, "learning_rate": 9.952215928896993e-06, "loss": 0.5226, "step": 1752 }, { "epoch": 0.07277671394246986, "grad_norm": 3.125091791152954, "learning_rate": 9.952123157569445e-06, "loss": 0.5475, "step": 1753 }, { "epoch": 0.07281822946668119, "grad_norm": 2.9333770275115967, "learning_rate": 9.952030296706081e-06, "loss": 0.6064, "step": 1754 }, { "epoch": 0.07285974499089254, "grad_norm": 2.6919925212860107, "learning_rate": 9.951937346308585e-06, "loss": 0.6514, "step": 1755 }, { "epoch": 0.07290126051510387, "grad_norm": 2.0528616905212402, "learning_rate": 9.951844306378631e-06, "loss": 0.3198, "step": 1756 }, { "epoch": 0.0729427760393152, "grad_norm": 2.929480791091919, "learning_rate": 9.951751176917906e-06, "loss": 0.4739, "step": 1757 }, { "epoch": 0.07298429156352654, "grad_norm": 2.757885217666626, "learning_rate": 9.951657957928094e-06, "loss": 0.4841, "step": 1758 }, { "epoch": 0.07302580708773787, "grad_norm": 2.571847438812256, "learning_rate": 9.951564649410877e-06, "loss": 0.5651, "step": 1759 }, { "epoch": 0.0730673226119492, "grad_norm": 3.575662612915039, "learning_rate": 9.951471251367946e-06, "loss": 0.4893, "step": 1760 }, { "epoch": 0.07310883813616054, "grad_norm": 2.2672016620635986, "learning_rate": 9.951377763800986e-06, "loss": 0.4808, "step": 1761 }, { "epoch": 0.07315035366037187, "grad_norm": 2.6099166870117188, "learning_rate": 9.95128418671169e-06, "loss": 0.5989, "step": 1762 }, { "epoch": 0.07319186918458322, "grad_norm": 2.5209033489227295, "learning_rate": 9.951190520101746e-06, "loss": 0.5353, "step": 1763 }, { "epoch": 0.07323338470879455, "grad_norm": 2.5804648399353027, "learning_rate": 9.951096763972853e-06, "loss": 0.4917, "step": 1764 }, { "epoch": 0.07327490023300588, "grad_norm": 3.0064537525177, "learning_rate": 9.951002918326701e-06, "loss": 0.5185, "step": 1765 }, { "epoch": 0.07331641575721722, "grad_norm": 2.3976757526397705, "learning_rate": 9.950908983164993e-06, "loss": 0.4953, "step": 1766 }, { "epoch": 0.07335793128142855, "grad_norm": 2.614516019821167, "learning_rate": 9.950814958489421e-06, "loss": 0.4601, "step": 1767 }, { "epoch": 0.07339944680563988, "grad_norm": 2.734846830368042, "learning_rate": 9.950720844301689e-06, "loss": 0.531, "step": 1768 }, { "epoch": 0.07344096232985121, "grad_norm": 3.227992296218872, "learning_rate": 9.950626640603495e-06, "loss": 0.5389, "step": 1769 }, { "epoch": 0.07348247785406255, "grad_norm": 2.6348888874053955, "learning_rate": 9.950532347396547e-06, "loss": 0.5378, "step": 1770 }, { "epoch": 0.0735239933782739, "grad_norm": 2.772592544555664, "learning_rate": 9.950437964682544e-06, "loss": 0.4825, "step": 1771 }, { "epoch": 0.07356550890248523, "grad_norm": 2.4118897914886475, "learning_rate": 9.9503434924632e-06, "loss": 0.5152, "step": 1772 }, { "epoch": 0.07360702442669656, "grad_norm": 3.051983594894409, "learning_rate": 9.950248930740216e-06, "loss": 0.6228, "step": 1773 }, { "epoch": 0.0736485399509079, "grad_norm": 2.5077641010284424, "learning_rate": 9.950154279515305e-06, "loss": 0.5265, "step": 1774 }, { "epoch": 0.07369005547511923, "grad_norm": 2.583108425140381, "learning_rate": 9.950059538790178e-06, "loss": 0.4637, "step": 1775 }, { "epoch": 0.07373157099933056, "grad_norm": 3.033522129058838, "learning_rate": 9.949964708566546e-06, "loss": 0.5521, "step": 1776 }, { "epoch": 0.07377308652354189, "grad_norm": 2.3725099563598633, "learning_rate": 9.949869788846127e-06, "loss": 0.5328, "step": 1777 }, { "epoch": 0.07381460204775322, "grad_norm": 2.336435556411743, "learning_rate": 9.949774779630637e-06, "loss": 0.3682, "step": 1778 }, { "epoch": 0.07385611757196457, "grad_norm": 3.2345035076141357, "learning_rate": 9.94967968092179e-06, "loss": 0.6266, "step": 1779 }, { "epoch": 0.0738976330961759, "grad_norm": 2.7254185676574707, "learning_rate": 9.94958449272131e-06, "loss": 0.5901, "step": 1780 }, { "epoch": 0.07393914862038724, "grad_norm": 2.4787540435791016, "learning_rate": 9.949489215030913e-06, "loss": 0.5389, "step": 1781 }, { "epoch": 0.07398066414459857, "grad_norm": 3.4740443229675293, "learning_rate": 9.949393847852325e-06, "loss": 0.4604, "step": 1782 }, { "epoch": 0.0740221796688099, "grad_norm": 3.161247730255127, "learning_rate": 9.949298391187272e-06, "loss": 0.4306, "step": 1783 }, { "epoch": 0.07406369519302124, "grad_norm": 3.3997385501861572, "learning_rate": 9.949202845037475e-06, "loss": 0.6066, "step": 1784 }, { "epoch": 0.07410521071723257, "grad_norm": 2.9016330242156982, "learning_rate": 9.949107209404664e-06, "loss": 0.5772, "step": 1785 }, { "epoch": 0.07414672624144392, "grad_norm": 2.8787147998809814, "learning_rate": 9.94901148429057e-06, "loss": 0.6393, "step": 1786 }, { "epoch": 0.07418824176565525, "grad_norm": 2.6298608779907227, "learning_rate": 9.948915669696921e-06, "loss": 0.3963, "step": 1787 }, { "epoch": 0.07422975728986658, "grad_norm": 2.2854204177856445, "learning_rate": 9.94881976562545e-06, "loss": 0.4191, "step": 1788 }, { "epoch": 0.07427127281407792, "grad_norm": 3.3201241493225098, "learning_rate": 9.94872377207789e-06, "loss": 0.5305, "step": 1789 }, { "epoch": 0.07431278833828925, "grad_norm": 2.6310887336730957, "learning_rate": 9.94862768905598e-06, "loss": 0.6931, "step": 1790 }, { "epoch": 0.07435430386250058, "grad_norm": 3.214045763015747, "learning_rate": 9.948531516561455e-06, "loss": 0.4845, "step": 1791 }, { "epoch": 0.07439581938671191, "grad_norm": 3.1912901401519775, "learning_rate": 9.948435254596054e-06, "loss": 0.5712, "step": 1792 }, { "epoch": 0.07443733491092325, "grad_norm": 2.4689884185791016, "learning_rate": 9.948338903161516e-06, "loss": 0.6569, "step": 1793 }, { "epoch": 0.0744788504351346, "grad_norm": 3.0089006423950195, "learning_rate": 9.948242462259585e-06, "loss": 0.5421, "step": 1794 }, { "epoch": 0.07452036595934593, "grad_norm": 2.9258217811584473, "learning_rate": 9.948145931892003e-06, "loss": 0.4025, "step": 1795 }, { "epoch": 0.07456188148355726, "grad_norm": 2.5344808101654053, "learning_rate": 9.948049312060517e-06, "loss": 0.5632, "step": 1796 }, { "epoch": 0.0746033970077686, "grad_norm": 2.49090313911438, "learning_rate": 9.947952602766874e-06, "loss": 0.6092, "step": 1797 }, { "epoch": 0.07464491253197993, "grad_norm": 2.85707950592041, "learning_rate": 9.947855804012822e-06, "loss": 0.6501, "step": 1798 }, { "epoch": 0.07468642805619126, "grad_norm": 2.6994733810424805, "learning_rate": 9.94775891580011e-06, "loss": 0.6349, "step": 1799 }, { "epoch": 0.07472794358040259, "grad_norm": 2.5672147274017334, "learning_rate": 9.94766193813049e-06, "loss": 0.4526, "step": 1800 }, { "epoch": 0.07476945910461393, "grad_norm": 2.67649245262146, "learning_rate": 9.947564871005717e-06, "loss": 0.5389, "step": 1801 }, { "epoch": 0.07481097462882527, "grad_norm": 2.719968557357788, "learning_rate": 9.947467714427546e-06, "loss": 0.6725, "step": 1802 }, { "epoch": 0.0748524901530366, "grad_norm": 2.550511598587036, "learning_rate": 9.947370468397731e-06, "loss": 0.6614, "step": 1803 }, { "epoch": 0.07489400567724794, "grad_norm": 2.820765972137451, "learning_rate": 9.947273132918035e-06, "loss": 0.5416, "step": 1804 }, { "epoch": 0.07493552120145927, "grad_norm": 3.1879444122314453, "learning_rate": 9.94717570799021e-06, "loss": 0.6533, "step": 1805 }, { "epoch": 0.0749770367256706, "grad_norm": 2.213289499282837, "learning_rate": 9.947078193616026e-06, "loss": 0.4356, "step": 1806 }, { "epoch": 0.07501855224988194, "grad_norm": 2.5991735458374023, "learning_rate": 9.946980589797242e-06, "loss": 0.5855, "step": 1807 }, { "epoch": 0.07506006777409327, "grad_norm": 3.102623462677002, "learning_rate": 9.946882896535622e-06, "loss": 0.45, "step": 1808 }, { "epoch": 0.07510158329830462, "grad_norm": 3.0833747386932373, "learning_rate": 9.946785113832932e-06, "loss": 0.526, "step": 1809 }, { "epoch": 0.07514309882251595, "grad_norm": 2.540799617767334, "learning_rate": 9.946687241690945e-06, "loss": 0.5419, "step": 1810 }, { "epoch": 0.07518461434672728, "grad_norm": 2.6081295013427734, "learning_rate": 9.946589280111423e-06, "loss": 0.4611, "step": 1811 }, { "epoch": 0.07522612987093862, "grad_norm": 2.383045196533203, "learning_rate": 9.946491229096143e-06, "loss": 0.4135, "step": 1812 }, { "epoch": 0.07526764539514995, "grad_norm": 3.03827166557312, "learning_rate": 9.946393088646876e-06, "loss": 0.5353, "step": 1813 }, { "epoch": 0.07530916091936128, "grad_norm": 2.2522900104522705, "learning_rate": 9.946294858765396e-06, "loss": 0.5333, "step": 1814 }, { "epoch": 0.07535067644357261, "grad_norm": 2.4934139251708984, "learning_rate": 9.946196539453479e-06, "loss": 0.4168, "step": 1815 }, { "epoch": 0.07539219196778395, "grad_norm": 2.3939034938812256, "learning_rate": 9.946098130712904e-06, "loss": 0.5441, "step": 1816 }, { "epoch": 0.0754337074919953, "grad_norm": 2.4274415969848633, "learning_rate": 9.945999632545448e-06, "loss": 0.5457, "step": 1817 }, { "epoch": 0.07547522301620663, "grad_norm": 2.3353798389434814, "learning_rate": 9.945901044952894e-06, "loss": 0.5911, "step": 1818 }, { "epoch": 0.07551673854041796, "grad_norm": 2.464677095413208, "learning_rate": 9.945802367937024e-06, "loss": 0.429, "step": 1819 }, { "epoch": 0.0755582540646293, "grad_norm": 2.9400599002838135, "learning_rate": 9.94570360149962e-06, "loss": 0.5059, "step": 1820 }, { "epoch": 0.07559976958884063, "grad_norm": 2.4035439491271973, "learning_rate": 9.94560474564247e-06, "loss": 0.3949, "step": 1821 }, { "epoch": 0.07564128511305196, "grad_norm": 2.73635196685791, "learning_rate": 9.94550580036736e-06, "loss": 0.5492, "step": 1822 }, { "epoch": 0.07568280063726329, "grad_norm": 2.4568545818328857, "learning_rate": 9.94540676567608e-06, "loss": 0.4686, "step": 1823 }, { "epoch": 0.07572431616147463, "grad_norm": 2.4998621940612793, "learning_rate": 9.94530764157042e-06, "loss": 0.468, "step": 1824 }, { "epoch": 0.07576583168568597, "grad_norm": 2.8023481369018555, "learning_rate": 9.945208428052174e-06, "loss": 0.5392, "step": 1825 }, { "epoch": 0.0758073472098973, "grad_norm": 2.205942153930664, "learning_rate": 9.945109125123133e-06, "loss": 0.4218, "step": 1826 }, { "epoch": 0.07584886273410864, "grad_norm": 2.7017970085144043, "learning_rate": 9.945009732785094e-06, "loss": 0.6479, "step": 1827 }, { "epoch": 0.07589037825831997, "grad_norm": 2.7043402194976807, "learning_rate": 9.944910251039855e-06, "loss": 0.5427, "step": 1828 }, { "epoch": 0.0759318937825313, "grad_norm": 2.948385715484619, "learning_rate": 9.944810679889212e-06, "loss": 0.615, "step": 1829 }, { "epoch": 0.07597340930674264, "grad_norm": 2.324906826019287, "learning_rate": 9.944711019334967e-06, "loss": 0.5033, "step": 1830 }, { "epoch": 0.07601492483095397, "grad_norm": 2.5789828300476074, "learning_rate": 9.944611269378922e-06, "loss": 0.4475, "step": 1831 }, { "epoch": 0.0760564403551653, "grad_norm": 2.3624608516693115, "learning_rate": 9.944511430022879e-06, "loss": 0.4478, "step": 1832 }, { "epoch": 0.07609795587937665, "grad_norm": 2.521817922592163, "learning_rate": 9.944411501268644e-06, "loss": 0.4927, "step": 1833 }, { "epoch": 0.07613947140358798, "grad_norm": 2.8557369709014893, "learning_rate": 9.944311483118024e-06, "loss": 0.4742, "step": 1834 }, { "epoch": 0.07618098692779932, "grad_norm": 2.22358775138855, "learning_rate": 9.944211375572827e-06, "loss": 0.4181, "step": 1835 }, { "epoch": 0.07622250245201065, "grad_norm": 2.8300654888153076, "learning_rate": 9.944111178634865e-06, "loss": 0.5699, "step": 1836 }, { "epoch": 0.07626401797622198, "grad_norm": 2.765866994857788, "learning_rate": 9.944010892305948e-06, "loss": 0.5682, "step": 1837 }, { "epoch": 0.07630553350043331, "grad_norm": 3.4917328357696533, "learning_rate": 9.943910516587887e-06, "loss": 0.6686, "step": 1838 }, { "epoch": 0.07634704902464465, "grad_norm": 2.309282064437866, "learning_rate": 9.9438100514825e-06, "loss": 0.5282, "step": 1839 }, { "epoch": 0.076388564548856, "grad_norm": 3.4673476219177246, "learning_rate": 9.943709496991602e-06, "loss": 0.6783, "step": 1840 }, { "epoch": 0.07643008007306733, "grad_norm": 2.358736276626587, "learning_rate": 9.943608853117011e-06, "loss": 0.4481, "step": 1841 }, { "epoch": 0.07647159559727866, "grad_norm": 2.984506607055664, "learning_rate": 9.943508119860548e-06, "loss": 0.674, "step": 1842 }, { "epoch": 0.07651311112149, "grad_norm": 2.871396541595459, "learning_rate": 9.943407297224032e-06, "loss": 0.556, "step": 1843 }, { "epoch": 0.07655462664570133, "grad_norm": 2.673699378967285, "learning_rate": 9.94330638520929e-06, "loss": 0.4851, "step": 1844 }, { "epoch": 0.07659614216991266, "grad_norm": 2.5239531993865967, "learning_rate": 9.943205383818142e-06, "loss": 0.4854, "step": 1845 }, { "epoch": 0.07663765769412399, "grad_norm": 3.0149118900299072, "learning_rate": 9.943104293052414e-06, "loss": 0.4365, "step": 1846 }, { "epoch": 0.07667917321833533, "grad_norm": 2.689399480819702, "learning_rate": 9.943003112913938e-06, "loss": 0.4401, "step": 1847 }, { "epoch": 0.07672068874254667, "grad_norm": 2.8707690238952637, "learning_rate": 9.94290184340454e-06, "loss": 0.5924, "step": 1848 }, { "epoch": 0.076762204266758, "grad_norm": 2.810020685195923, "learning_rate": 9.942800484526054e-06, "loss": 0.7935, "step": 1849 }, { "epoch": 0.07680371979096934, "grad_norm": 2.3412530422210693, "learning_rate": 9.942699036280308e-06, "loss": 0.4975, "step": 1850 }, { "epoch": 0.07684523531518067, "grad_norm": 3.2943336963653564, "learning_rate": 9.94259749866914e-06, "loss": 0.6083, "step": 1851 }, { "epoch": 0.076886750839392, "grad_norm": 2.9589345455169678, "learning_rate": 9.942495871694384e-06, "loss": 0.5128, "step": 1852 }, { "epoch": 0.07692826636360334, "grad_norm": 2.563688039779663, "learning_rate": 9.942394155357879e-06, "loss": 0.4329, "step": 1853 }, { "epoch": 0.07696978188781467, "grad_norm": 2.413346767425537, "learning_rate": 9.942292349661461e-06, "loss": 0.5686, "step": 1854 }, { "epoch": 0.077011297412026, "grad_norm": 2.2818965911865234, "learning_rate": 9.942190454606974e-06, "loss": 0.6387, "step": 1855 }, { "epoch": 0.07705281293623735, "grad_norm": 2.6953835487365723, "learning_rate": 9.942088470196261e-06, "loss": 0.4384, "step": 1856 }, { "epoch": 0.07709432846044868, "grad_norm": 2.6079049110412598, "learning_rate": 9.941986396431161e-06, "loss": 0.4934, "step": 1857 }, { "epoch": 0.07713584398466002, "grad_norm": 2.947380781173706, "learning_rate": 9.941884233313525e-06, "loss": 0.5926, "step": 1858 }, { "epoch": 0.07717735950887135, "grad_norm": 2.2788476943969727, "learning_rate": 9.941781980845195e-06, "loss": 0.5125, "step": 1859 }, { "epoch": 0.07721887503308268, "grad_norm": 2.5572152137756348, "learning_rate": 9.941679639028026e-06, "loss": 0.5363, "step": 1860 }, { "epoch": 0.07726039055729401, "grad_norm": 2.946317434310913, "learning_rate": 9.941577207863862e-06, "loss": 0.5329, "step": 1861 }, { "epoch": 0.07730190608150535, "grad_norm": 2.2847437858581543, "learning_rate": 9.941474687354557e-06, "loss": 0.4939, "step": 1862 }, { "epoch": 0.0773434216057167, "grad_norm": 2.584846258163452, "learning_rate": 9.941372077501967e-06, "loss": 0.5772, "step": 1863 }, { "epoch": 0.07738493712992803, "grad_norm": 3.738391399383545, "learning_rate": 9.941269378307946e-06, "loss": 0.5527, "step": 1864 }, { "epoch": 0.07742645265413936, "grad_norm": 3.204652786254883, "learning_rate": 9.94116658977435e-06, "loss": 0.4642, "step": 1865 }, { "epoch": 0.0774679681783507, "grad_norm": 2.489657163619995, "learning_rate": 9.941063711903038e-06, "loss": 0.5184, "step": 1866 }, { "epoch": 0.07750948370256203, "grad_norm": 2.604128360748291, "learning_rate": 9.940960744695869e-06, "loss": 0.6131, "step": 1867 }, { "epoch": 0.07755099922677336, "grad_norm": 2.8081960678100586, "learning_rate": 9.940857688154707e-06, "loss": 0.4185, "step": 1868 }, { "epoch": 0.07759251475098469, "grad_norm": 2.825058698654175, "learning_rate": 9.940754542281412e-06, "loss": 0.4405, "step": 1869 }, { "epoch": 0.07763403027519603, "grad_norm": 2.2024550437927246, "learning_rate": 9.940651307077853e-06, "loss": 0.4959, "step": 1870 }, { "epoch": 0.07767554579940737, "grad_norm": 2.6411821842193604, "learning_rate": 9.940547982545892e-06, "loss": 0.4582, "step": 1871 }, { "epoch": 0.0777170613236187, "grad_norm": 3.2969491481781006, "learning_rate": 9.940444568687402e-06, "loss": 0.7102, "step": 1872 }, { "epoch": 0.07775857684783004, "grad_norm": 2.6294214725494385, "learning_rate": 9.940341065504249e-06, "loss": 0.6076, "step": 1873 }, { "epoch": 0.07780009237204137, "grad_norm": 2.5082383155822754, "learning_rate": 9.940237472998305e-06, "loss": 0.506, "step": 1874 }, { "epoch": 0.0778416078962527, "grad_norm": 3.135464906692505, "learning_rate": 9.940133791171445e-06, "loss": 0.5672, "step": 1875 }, { "epoch": 0.07788312342046404, "grad_norm": 2.519962787628174, "learning_rate": 9.94003002002554e-06, "loss": 0.4914, "step": 1876 }, { "epoch": 0.07792463894467537, "grad_norm": 3.3004350662231445, "learning_rate": 9.93992615956247e-06, "loss": 0.7685, "step": 1877 }, { "epoch": 0.0779661544688867, "grad_norm": 2.941524028778076, "learning_rate": 9.93982220978411e-06, "loss": 0.5337, "step": 1878 }, { "epoch": 0.07800766999309805, "grad_norm": 3.2897047996520996, "learning_rate": 9.939718170692341e-06, "loss": 0.6029, "step": 1879 }, { "epoch": 0.07804918551730938, "grad_norm": 3.1822869777679443, "learning_rate": 9.939614042289045e-06, "loss": 0.4722, "step": 1880 }, { "epoch": 0.07809070104152072, "grad_norm": 2.7044124603271484, "learning_rate": 9.939509824576103e-06, "loss": 0.4321, "step": 1881 }, { "epoch": 0.07813221656573205, "grad_norm": 2.1644577980041504, "learning_rate": 9.939405517555398e-06, "loss": 0.4713, "step": 1882 }, { "epoch": 0.07817373208994338, "grad_norm": 2.1203887462615967, "learning_rate": 9.93930112122882e-06, "loss": 0.4644, "step": 1883 }, { "epoch": 0.07821524761415471, "grad_norm": 2.799781322479248, "learning_rate": 9.939196635598252e-06, "loss": 0.4453, "step": 1884 }, { "epoch": 0.07825676313836605, "grad_norm": 2.742264986038208, "learning_rate": 9.939092060665586e-06, "loss": 0.5691, "step": 1885 }, { "epoch": 0.07829827866257738, "grad_norm": 2.429455518722534, "learning_rate": 9.93898739643271e-06, "loss": 0.4534, "step": 1886 }, { "epoch": 0.07833979418678873, "grad_norm": 3.1531951427459717, "learning_rate": 9.938882642901522e-06, "loss": 0.5612, "step": 1887 }, { "epoch": 0.07838130971100006, "grad_norm": 3.766714334487915, "learning_rate": 9.93877780007391e-06, "loss": 0.6252, "step": 1888 }, { "epoch": 0.0784228252352114, "grad_norm": 2.7817394733428955, "learning_rate": 9.938672867951773e-06, "loss": 0.6236, "step": 1889 }, { "epoch": 0.07846434075942273, "grad_norm": 3.1176013946533203, "learning_rate": 9.938567846537006e-06, "loss": 0.6356, "step": 1890 }, { "epoch": 0.07850585628363406, "grad_norm": 2.905938148498535, "learning_rate": 9.938462735831508e-06, "loss": 0.5197, "step": 1891 }, { "epoch": 0.07854737180784539, "grad_norm": 2.94857120513916, "learning_rate": 9.938357535837181e-06, "loss": 0.4651, "step": 1892 }, { "epoch": 0.07858888733205673, "grad_norm": 2.6484920978546143, "learning_rate": 9.938252246555929e-06, "loss": 0.5535, "step": 1893 }, { "epoch": 0.07863040285626807, "grad_norm": 3.6264002323150635, "learning_rate": 9.93814686798965e-06, "loss": 0.3939, "step": 1894 }, { "epoch": 0.0786719183804794, "grad_norm": 2.7130930423736572, "learning_rate": 9.938041400140254e-06, "loss": 0.5693, "step": 1895 }, { "epoch": 0.07871343390469074, "grad_norm": 3.0342864990234375, "learning_rate": 9.937935843009645e-06, "loss": 0.5137, "step": 1896 }, { "epoch": 0.07875494942890207, "grad_norm": 2.650603771209717, "learning_rate": 9.937830196599734e-06, "loss": 0.4551, "step": 1897 }, { "epoch": 0.0787964649531134, "grad_norm": 2.4564051628112793, "learning_rate": 9.937724460912428e-06, "loss": 0.5223, "step": 1898 }, { "epoch": 0.07883798047732474, "grad_norm": 2.8179306983947754, "learning_rate": 9.937618635949642e-06, "loss": 0.6065, "step": 1899 }, { "epoch": 0.07887949600153607, "grad_norm": 2.37351393699646, "learning_rate": 9.937512721713286e-06, "loss": 0.3868, "step": 1900 }, { "epoch": 0.0789210115257474, "grad_norm": 2.9324803352355957, "learning_rate": 9.937406718205281e-06, "loss": 0.5577, "step": 1901 }, { "epoch": 0.07896252704995875, "grad_norm": 2.938283920288086, "learning_rate": 9.937300625427535e-06, "loss": 0.5728, "step": 1902 }, { "epoch": 0.07900404257417008, "grad_norm": 2.937645196914673, "learning_rate": 9.937194443381972e-06, "loss": 0.6468, "step": 1903 }, { "epoch": 0.07904555809838142, "grad_norm": 2.9398136138916016, "learning_rate": 9.937088172070512e-06, "loss": 0.4577, "step": 1904 }, { "epoch": 0.07908707362259275, "grad_norm": 2.5865724086761475, "learning_rate": 9.936981811495073e-06, "loss": 0.4529, "step": 1905 }, { "epoch": 0.07912858914680408, "grad_norm": 2.861041784286499, "learning_rate": 9.936875361657581e-06, "loss": 0.5997, "step": 1906 }, { "epoch": 0.07917010467101541, "grad_norm": 2.988290548324585, "learning_rate": 9.936768822559958e-06, "loss": 0.6222, "step": 1907 }, { "epoch": 0.07921162019522675, "grad_norm": 2.5605945587158203, "learning_rate": 9.936662194204132e-06, "loss": 0.7068, "step": 1908 }, { "epoch": 0.07925313571943808, "grad_norm": 2.233499050140381, "learning_rate": 9.936555476592032e-06, "loss": 0.3327, "step": 1909 }, { "epoch": 0.07929465124364943, "grad_norm": 2.9473137855529785, "learning_rate": 9.936448669725585e-06, "loss": 0.5228, "step": 1910 }, { "epoch": 0.07933616676786076, "grad_norm": 3.2618250846862793, "learning_rate": 9.936341773606723e-06, "loss": 0.6026, "step": 1911 }, { "epoch": 0.0793776822920721, "grad_norm": 2.533108949661255, "learning_rate": 9.93623478823738e-06, "loss": 0.498, "step": 1912 }, { "epoch": 0.07941919781628343, "grad_norm": 2.445624589920044, "learning_rate": 9.936127713619488e-06, "loss": 0.5146, "step": 1913 }, { "epoch": 0.07946071334049476, "grad_norm": 2.486072540283203, "learning_rate": 9.936020549754985e-06, "loss": 0.5059, "step": 1914 }, { "epoch": 0.07950222886470609, "grad_norm": 2.359107732772827, "learning_rate": 9.935913296645808e-06, "loss": 0.5479, "step": 1915 }, { "epoch": 0.07954374438891743, "grad_norm": 2.328892946243286, "learning_rate": 9.935805954293896e-06, "loss": 0.4504, "step": 1916 }, { "epoch": 0.07958525991312877, "grad_norm": 2.8663101196289062, "learning_rate": 9.935698522701189e-06, "loss": 0.4861, "step": 1917 }, { "epoch": 0.0796267754373401, "grad_norm": 2.9641222953796387, "learning_rate": 9.935591001869631e-06, "loss": 0.5175, "step": 1918 }, { "epoch": 0.07966829096155144, "grad_norm": 2.945267915725708, "learning_rate": 9.935483391801163e-06, "loss": 0.491, "step": 1919 }, { "epoch": 0.07970980648576277, "grad_norm": 2.488996744155884, "learning_rate": 9.935375692497735e-06, "loss": 0.5824, "step": 1920 }, { "epoch": 0.0797513220099741, "grad_norm": 2.909759759902954, "learning_rate": 9.935267903961292e-06, "loss": 0.5015, "step": 1921 }, { "epoch": 0.07979283753418544, "grad_norm": 2.9348809719085693, "learning_rate": 9.935160026193782e-06, "loss": 0.4773, "step": 1922 }, { "epoch": 0.07983435305839677, "grad_norm": 3.018172264099121, "learning_rate": 9.935052059197157e-06, "loss": 0.5345, "step": 1923 }, { "epoch": 0.0798758685826081, "grad_norm": 2.315500259399414, "learning_rate": 9.934944002973366e-06, "loss": 0.5876, "step": 1924 }, { "epoch": 0.07991738410681945, "grad_norm": 2.4142098426818848, "learning_rate": 9.934835857524368e-06, "loss": 0.5187, "step": 1925 }, { "epoch": 0.07995889963103078, "grad_norm": 3.2136390209198, "learning_rate": 9.934727622852112e-06, "loss": 0.6731, "step": 1926 }, { "epoch": 0.08000041515524212, "grad_norm": 2.562185764312744, "learning_rate": 9.934619298958561e-06, "loss": 0.6792, "step": 1927 }, { "epoch": 0.08004193067945345, "grad_norm": 3.0060982704162598, "learning_rate": 9.934510885845669e-06, "loss": 0.5095, "step": 1928 }, { "epoch": 0.08008344620366478, "grad_norm": 3.026505708694458, "learning_rate": 9.9344023835154e-06, "loss": 0.6316, "step": 1929 }, { "epoch": 0.08012496172787612, "grad_norm": 2.8262298107147217, "learning_rate": 9.934293791969712e-06, "loss": 0.5645, "step": 1930 }, { "epoch": 0.08016647725208745, "grad_norm": 2.9388368129730225, "learning_rate": 9.93418511121057e-06, "loss": 0.5653, "step": 1931 }, { "epoch": 0.08020799277629878, "grad_norm": 3.016765832901001, "learning_rate": 9.93407634123994e-06, "loss": 0.5945, "step": 1932 }, { "epoch": 0.08024950830051013, "grad_norm": 2.542283773422241, "learning_rate": 9.933967482059787e-06, "loss": 0.5432, "step": 1933 }, { "epoch": 0.08029102382472146, "grad_norm": 2.5604043006896973, "learning_rate": 9.93385853367208e-06, "loss": 0.541, "step": 1934 }, { "epoch": 0.0803325393489328, "grad_norm": 2.6949005126953125, "learning_rate": 9.933749496078788e-06, "loss": 0.5065, "step": 1935 }, { "epoch": 0.08037405487314413, "grad_norm": 2.2832653522491455, "learning_rate": 9.933640369281884e-06, "loss": 0.5242, "step": 1936 }, { "epoch": 0.08041557039735546, "grad_norm": 2.760106325149536, "learning_rate": 9.93353115328334e-06, "loss": 0.5948, "step": 1937 }, { "epoch": 0.08045708592156679, "grad_norm": 2.4131882190704346, "learning_rate": 9.93342184808513e-06, "loss": 0.3843, "step": 1938 }, { "epoch": 0.08049860144577813, "grad_norm": 2.971191167831421, "learning_rate": 9.933312453689232e-06, "loss": 0.5972, "step": 1939 }, { "epoch": 0.08054011696998946, "grad_norm": 2.161240577697754, "learning_rate": 9.933202970097621e-06, "loss": 0.3827, "step": 1940 }, { "epoch": 0.0805816324942008, "grad_norm": 2.3953027725219727, "learning_rate": 9.933093397312282e-06, "loss": 0.713, "step": 1941 }, { "epoch": 0.08062314801841214, "grad_norm": 2.562922954559326, "learning_rate": 9.932983735335188e-06, "loss": 0.6832, "step": 1942 }, { "epoch": 0.08066466354262347, "grad_norm": 3.352691411972046, "learning_rate": 9.932873984168329e-06, "loss": 0.5949, "step": 1943 }, { "epoch": 0.0807061790668348, "grad_norm": 2.6963677406311035, "learning_rate": 9.932764143813686e-06, "loss": 0.4891, "step": 1944 }, { "epoch": 0.08074769459104614, "grad_norm": 2.6689341068267822, "learning_rate": 9.932654214273245e-06, "loss": 0.5332, "step": 1945 }, { "epoch": 0.08078921011525747, "grad_norm": 2.1251792907714844, "learning_rate": 9.932544195548995e-06, "loss": 0.4848, "step": 1946 }, { "epoch": 0.0808307256394688, "grad_norm": 2.6187257766723633, "learning_rate": 9.932434087642924e-06, "loss": 0.5723, "step": 1947 }, { "epoch": 0.08087224116368015, "grad_norm": 2.3226664066314697, "learning_rate": 9.932323890557023e-06, "loss": 0.4868, "step": 1948 }, { "epoch": 0.08091375668789148, "grad_norm": 2.815521717071533, "learning_rate": 9.932213604293284e-06, "loss": 0.5324, "step": 1949 }, { "epoch": 0.08095527221210282, "grad_norm": 3.0380544662475586, "learning_rate": 9.932103228853702e-06, "loss": 0.4859, "step": 1950 }, { "epoch": 0.08099678773631415, "grad_norm": 2.9059934616088867, "learning_rate": 9.931992764240271e-06, "loss": 0.6131, "step": 1951 }, { "epoch": 0.08103830326052548, "grad_norm": 3.212571859359741, "learning_rate": 9.93188221045499e-06, "loss": 0.4546, "step": 1952 }, { "epoch": 0.08107981878473682, "grad_norm": 2.799938440322876, "learning_rate": 9.931771567499856e-06, "loss": 0.4148, "step": 1953 }, { "epoch": 0.08112133430894815, "grad_norm": 3.6904454231262207, "learning_rate": 9.931660835376873e-06, "loss": 0.6608, "step": 1954 }, { "epoch": 0.08116284983315948, "grad_norm": 2.7428643703460693, "learning_rate": 9.93155001408804e-06, "loss": 0.6289, "step": 1955 }, { "epoch": 0.08120436535737083, "grad_norm": 3.7965283393859863, "learning_rate": 9.931439103635362e-06, "loss": 0.5108, "step": 1956 }, { "epoch": 0.08124588088158216, "grad_norm": 2.650113344192505, "learning_rate": 9.93132810402084e-06, "loss": 0.4943, "step": 1957 }, { "epoch": 0.0812873964057935, "grad_norm": 2.701333522796631, "learning_rate": 9.931217015246487e-06, "loss": 0.5521, "step": 1958 }, { "epoch": 0.08132891193000483, "grad_norm": 2.4854490756988525, "learning_rate": 9.931105837314309e-06, "loss": 0.602, "step": 1959 }, { "epoch": 0.08137042745421616, "grad_norm": 2.824965476989746, "learning_rate": 9.930994570226316e-06, "loss": 0.5745, "step": 1960 }, { "epoch": 0.08141194297842749, "grad_norm": 2.724067449569702, "learning_rate": 9.93088321398452e-06, "loss": 0.6111, "step": 1961 }, { "epoch": 0.08145345850263883, "grad_norm": 2.1255996227264404, "learning_rate": 9.930771768590934e-06, "loss": 0.3646, "step": 1962 }, { "epoch": 0.08149497402685016, "grad_norm": 2.7753047943115234, "learning_rate": 9.930660234047574e-06, "loss": 0.6775, "step": 1963 }, { "epoch": 0.0815364895510615, "grad_norm": 2.51779842376709, "learning_rate": 9.930548610356453e-06, "loss": 0.455, "step": 1964 }, { "epoch": 0.08157800507527284, "grad_norm": 2.6735591888427734, "learning_rate": 9.930436897519595e-06, "loss": 0.5889, "step": 1965 }, { "epoch": 0.08161952059948417, "grad_norm": 2.8830695152282715, "learning_rate": 9.930325095539017e-06, "loss": 0.3228, "step": 1966 }, { "epoch": 0.0816610361236955, "grad_norm": 2.4912238121032715, "learning_rate": 9.930213204416739e-06, "loss": 0.4299, "step": 1967 }, { "epoch": 0.08170255164790684, "grad_norm": 2.7920713424682617, "learning_rate": 9.930101224154785e-06, "loss": 0.4871, "step": 1968 }, { "epoch": 0.08174406717211817, "grad_norm": 2.980313301086426, "learning_rate": 9.929989154755179e-06, "loss": 0.5001, "step": 1969 }, { "epoch": 0.0817855826963295, "grad_norm": 2.811966896057129, "learning_rate": 9.92987699621995e-06, "loss": 0.4523, "step": 1970 }, { "epoch": 0.08182709822054085, "grad_norm": 2.5203254222869873, "learning_rate": 9.929764748551123e-06, "loss": 0.4853, "step": 1971 }, { "epoch": 0.08186861374475218, "grad_norm": 2.693061351776123, "learning_rate": 9.929652411750728e-06, "loss": 0.5169, "step": 1972 }, { "epoch": 0.08191012926896352, "grad_norm": 2.4671475887298584, "learning_rate": 9.929539985820797e-06, "loss": 0.6026, "step": 1973 }, { "epoch": 0.08195164479317485, "grad_norm": 2.3863675594329834, "learning_rate": 9.929427470763364e-06, "loss": 0.4737, "step": 1974 }, { "epoch": 0.08199316031738618, "grad_norm": 2.9007883071899414, "learning_rate": 9.92931486658046e-06, "loss": 0.594, "step": 1975 }, { "epoch": 0.08203467584159752, "grad_norm": 2.5661282539367676, "learning_rate": 9.929202173274122e-06, "loss": 0.572, "step": 1976 }, { "epoch": 0.08207619136580885, "grad_norm": 2.453052520751953, "learning_rate": 9.929089390846389e-06, "loss": 0.4627, "step": 1977 }, { "epoch": 0.08211770689002018, "grad_norm": 2.5620524883270264, "learning_rate": 9.928976519299298e-06, "loss": 0.5329, "step": 1978 }, { "epoch": 0.08215922241423153, "grad_norm": 2.4966256618499756, "learning_rate": 9.928863558634891e-06, "loss": 0.4765, "step": 1979 }, { "epoch": 0.08220073793844286, "grad_norm": 2.754979372024536, "learning_rate": 9.92875050885521e-06, "loss": 0.6127, "step": 1980 }, { "epoch": 0.0822422534626542, "grad_norm": 3.241434097290039, "learning_rate": 9.9286373699623e-06, "loss": 0.4648, "step": 1981 }, { "epoch": 0.08228376898686553, "grad_norm": 2.508913278579712, "learning_rate": 9.928524141958204e-06, "loss": 0.4808, "step": 1982 }, { "epoch": 0.08232528451107686, "grad_norm": 2.202235460281372, "learning_rate": 9.928410824844974e-06, "loss": 0.4316, "step": 1983 }, { "epoch": 0.08236680003528819, "grad_norm": 2.635518789291382, "learning_rate": 9.928297418624655e-06, "loss": 0.5651, "step": 1984 }, { "epoch": 0.08240831555949953, "grad_norm": 2.753992795944214, "learning_rate": 9.928183923299297e-06, "loss": 0.5283, "step": 1985 }, { "epoch": 0.08244983108371086, "grad_norm": 2.5897083282470703, "learning_rate": 9.928070338870952e-06, "loss": 0.5164, "step": 1986 }, { "epoch": 0.0824913466079222, "grad_norm": 2.714099407196045, "learning_rate": 9.927956665341676e-06, "loss": 0.6301, "step": 1987 }, { "epoch": 0.08253286213213354, "grad_norm": 2.837691068649292, "learning_rate": 9.927842902713525e-06, "loss": 0.5507, "step": 1988 }, { "epoch": 0.08257437765634487, "grad_norm": 2.458998203277588, "learning_rate": 9.927729050988553e-06, "loss": 0.5003, "step": 1989 }, { "epoch": 0.0826158931805562, "grad_norm": 2.5420210361480713, "learning_rate": 9.927615110168817e-06, "loss": 0.4695, "step": 1990 }, { "epoch": 0.08265740870476754, "grad_norm": 3.0748353004455566, "learning_rate": 9.927501080256382e-06, "loss": 0.5373, "step": 1991 }, { "epoch": 0.08269892422897887, "grad_norm": 2.3497607707977295, "learning_rate": 9.927386961253307e-06, "loss": 0.5924, "step": 1992 }, { "epoch": 0.0827404397531902, "grad_norm": 2.714777946472168, "learning_rate": 9.927272753161655e-06, "loss": 0.5459, "step": 1993 }, { "epoch": 0.08278195527740154, "grad_norm": 2.7516262531280518, "learning_rate": 9.927158455983493e-06, "loss": 0.5264, "step": 1994 }, { "epoch": 0.08282347080161288, "grad_norm": 2.657984495162964, "learning_rate": 9.927044069720883e-06, "loss": 0.533, "step": 1995 }, { "epoch": 0.08286498632582422, "grad_norm": 2.719388008117676, "learning_rate": 9.926929594375898e-06, "loss": 0.5896, "step": 1996 }, { "epoch": 0.08290650185003555, "grad_norm": 2.879822015762329, "learning_rate": 9.926815029950605e-06, "loss": 0.5713, "step": 1997 }, { "epoch": 0.08294801737424688, "grad_norm": 2.6540727615356445, "learning_rate": 9.926700376447076e-06, "loss": 0.5514, "step": 1998 }, { "epoch": 0.08298953289845822, "grad_norm": 2.2141218185424805, "learning_rate": 9.926585633867385e-06, "loss": 0.4185, "step": 1999 }, { "epoch": 0.08303104842266955, "grad_norm": 2.7675399780273438, "learning_rate": 9.926470802213605e-06, "loss": 0.424, "step": 2000 }, { "epoch": 0.08307256394688088, "grad_norm": 2.550023317337036, "learning_rate": 9.926355881487815e-06, "loss": 0.4865, "step": 2001 }, { "epoch": 0.08311407947109223, "grad_norm": 2.3237006664276123, "learning_rate": 9.926240871692088e-06, "loss": 0.6086, "step": 2002 }, { "epoch": 0.08315559499530356, "grad_norm": 2.363237142562866, "learning_rate": 9.926125772828507e-06, "loss": 0.4495, "step": 2003 }, { "epoch": 0.0831971105195149, "grad_norm": 2.684175491333008, "learning_rate": 9.926010584899152e-06, "loss": 0.4422, "step": 2004 }, { "epoch": 0.08323862604372623, "grad_norm": 3.1946229934692383, "learning_rate": 9.925895307906108e-06, "loss": 0.5895, "step": 2005 }, { "epoch": 0.08328014156793756, "grad_norm": 3.149698495864868, "learning_rate": 9.925779941851455e-06, "loss": 0.5287, "step": 2006 }, { "epoch": 0.08332165709214889, "grad_norm": 2.3591599464416504, "learning_rate": 9.92566448673728e-06, "loss": 0.4187, "step": 2007 }, { "epoch": 0.08336317261636023, "grad_norm": 3.383164405822754, "learning_rate": 9.925548942565675e-06, "loss": 0.6856, "step": 2008 }, { "epoch": 0.08340468814057156, "grad_norm": 2.2047619819641113, "learning_rate": 9.925433309338723e-06, "loss": 0.4097, "step": 2009 }, { "epoch": 0.0834462036647829, "grad_norm": 2.3046486377716064, "learning_rate": 9.925317587058516e-06, "loss": 0.4526, "step": 2010 }, { "epoch": 0.08348771918899424, "grad_norm": 3.059513807296753, "learning_rate": 9.92520177572715e-06, "loss": 0.4888, "step": 2011 }, { "epoch": 0.08352923471320557, "grad_norm": 2.4766509532928467, "learning_rate": 9.925085875346714e-06, "loss": 0.5678, "step": 2012 }, { "epoch": 0.0835707502374169, "grad_norm": 3.242905616760254, "learning_rate": 9.924969885919307e-06, "loss": 0.4858, "step": 2013 }, { "epoch": 0.08361226576162824, "grad_norm": 2.615933418273926, "learning_rate": 9.924853807447024e-06, "loss": 0.5409, "step": 2014 }, { "epoch": 0.08365378128583957, "grad_norm": 2.44991135597229, "learning_rate": 9.924737639931965e-06, "loss": 0.5378, "step": 2015 }, { "epoch": 0.0836952968100509, "grad_norm": 3.202993631362915, "learning_rate": 9.92462138337623e-06, "loss": 0.4307, "step": 2016 }, { "epoch": 0.08373681233426224, "grad_norm": 3.0567667484283447, "learning_rate": 9.924505037781921e-06, "loss": 0.469, "step": 2017 }, { "epoch": 0.08377832785847358, "grad_norm": 2.920004367828369, "learning_rate": 9.924388603151143e-06, "loss": 0.4862, "step": 2018 }, { "epoch": 0.08381984338268492, "grad_norm": 2.7309634685516357, "learning_rate": 9.924272079485996e-06, "loss": 0.5268, "step": 2019 }, { "epoch": 0.08386135890689625, "grad_norm": 2.7755720615386963, "learning_rate": 9.924155466788594e-06, "loss": 0.5188, "step": 2020 }, { "epoch": 0.08390287443110758, "grad_norm": 2.523468494415283, "learning_rate": 9.924038765061042e-06, "loss": 0.4691, "step": 2021 }, { "epoch": 0.08394438995531892, "grad_norm": 3.828007459640503, "learning_rate": 9.923921974305449e-06, "loss": 0.5209, "step": 2022 }, { "epoch": 0.08398590547953025, "grad_norm": 2.6074790954589844, "learning_rate": 9.923805094523927e-06, "loss": 0.5506, "step": 2023 }, { "epoch": 0.08402742100374158, "grad_norm": 2.813868761062622, "learning_rate": 9.923688125718588e-06, "loss": 0.5835, "step": 2024 }, { "epoch": 0.08406893652795293, "grad_norm": 3.0593464374542236, "learning_rate": 9.92357106789155e-06, "loss": 0.5533, "step": 2025 }, { "epoch": 0.08411045205216426, "grad_norm": 3.567763090133667, "learning_rate": 9.92345392104493e-06, "loss": 0.3685, "step": 2026 }, { "epoch": 0.0841519675763756, "grad_norm": 2.714963912963867, "learning_rate": 9.923336685180842e-06, "loss": 0.6021, "step": 2027 }, { "epoch": 0.08419348310058693, "grad_norm": 2.4765758514404297, "learning_rate": 9.923219360301407e-06, "loss": 0.4586, "step": 2028 }, { "epoch": 0.08423499862479826, "grad_norm": 2.4913644790649414, "learning_rate": 9.92310194640875e-06, "loss": 0.4927, "step": 2029 }, { "epoch": 0.0842765141490096, "grad_norm": 2.6479177474975586, "learning_rate": 9.922984443504987e-06, "loss": 0.4912, "step": 2030 }, { "epoch": 0.08431802967322093, "grad_norm": 2.6958274841308594, "learning_rate": 9.922866851592248e-06, "loss": 0.4326, "step": 2031 }, { "epoch": 0.08435954519743226, "grad_norm": 2.822638511657715, "learning_rate": 9.922749170672658e-06, "loss": 0.5694, "step": 2032 }, { "epoch": 0.0844010607216436, "grad_norm": 2.6767032146453857, "learning_rate": 9.922631400748344e-06, "loss": 0.6151, "step": 2033 }, { "epoch": 0.08444257624585494, "grad_norm": 3.0287363529205322, "learning_rate": 9.922513541821434e-06, "loss": 0.674, "step": 2034 }, { "epoch": 0.08448409177006627, "grad_norm": 3.0293128490448, "learning_rate": 9.92239559389406e-06, "loss": 0.4999, "step": 2035 }, { "epoch": 0.0845256072942776, "grad_norm": 3.1434593200683594, "learning_rate": 9.922277556968358e-06, "loss": 0.5221, "step": 2036 }, { "epoch": 0.08456712281848894, "grad_norm": 2.5647354125976562, "learning_rate": 9.922159431046457e-06, "loss": 0.5378, "step": 2037 }, { "epoch": 0.08460863834270027, "grad_norm": 2.8030171394348145, "learning_rate": 9.922041216130495e-06, "loss": 0.4484, "step": 2038 }, { "epoch": 0.0846501538669116, "grad_norm": 3.040311574935913, "learning_rate": 9.92192291222261e-06, "loss": 0.5194, "step": 2039 }, { "epoch": 0.08469166939112294, "grad_norm": 2.5229666233062744, "learning_rate": 9.921804519324938e-06, "loss": 0.6028, "step": 2040 }, { "epoch": 0.08473318491533428, "grad_norm": 2.501986265182495, "learning_rate": 9.921686037439622e-06, "loss": 0.5107, "step": 2041 }, { "epoch": 0.08477470043954562, "grad_norm": 2.5284602642059326, "learning_rate": 9.921567466568805e-06, "loss": 0.4493, "step": 2042 }, { "epoch": 0.08481621596375695, "grad_norm": 3.2481496334075928, "learning_rate": 9.92144880671463e-06, "loss": 0.5224, "step": 2043 }, { "epoch": 0.08485773148796828, "grad_norm": 3.168912649154663, "learning_rate": 9.921330057879242e-06, "loss": 0.5006, "step": 2044 }, { "epoch": 0.08489924701217962, "grad_norm": 2.7817904949188232, "learning_rate": 9.921211220064787e-06, "loss": 0.5033, "step": 2045 }, { "epoch": 0.08494076253639095, "grad_norm": 2.796182632446289, "learning_rate": 9.921092293273415e-06, "loss": 0.5691, "step": 2046 }, { "epoch": 0.08498227806060228, "grad_norm": 2.404567241668701, "learning_rate": 9.920973277507277e-06, "loss": 0.4302, "step": 2047 }, { "epoch": 0.08502379358481361, "grad_norm": 2.568233013153076, "learning_rate": 9.920854172768522e-06, "loss": 0.5881, "step": 2048 }, { "epoch": 0.08506530910902496, "grad_norm": 2.4713759422302246, "learning_rate": 9.920734979059306e-06, "loss": 0.6374, "step": 2049 }, { "epoch": 0.0851068246332363, "grad_norm": 2.841127634048462, "learning_rate": 9.920615696381784e-06, "loss": 0.5763, "step": 2050 }, { "epoch": 0.08514834015744763, "grad_norm": 2.4507076740264893, "learning_rate": 9.920496324738112e-06, "loss": 0.5081, "step": 2051 }, { "epoch": 0.08518985568165896, "grad_norm": 2.7041378021240234, "learning_rate": 9.920376864130446e-06, "loss": 0.4209, "step": 2052 }, { "epoch": 0.0852313712058703, "grad_norm": 2.927299976348877, "learning_rate": 9.92025731456095e-06, "loss": 0.5191, "step": 2053 }, { "epoch": 0.08527288673008163, "grad_norm": 2.7375900745391846, "learning_rate": 9.920137676031784e-06, "loss": 0.5198, "step": 2054 }, { "epoch": 0.08531440225429296, "grad_norm": 2.873145341873169, "learning_rate": 9.920017948545109e-06, "loss": 0.5613, "step": 2055 }, { "epoch": 0.0853559177785043, "grad_norm": 2.8306658267974854, "learning_rate": 9.919898132103092e-06, "loss": 0.4848, "step": 2056 }, { "epoch": 0.08539743330271564, "grad_norm": 2.724144458770752, "learning_rate": 9.919778226707898e-06, "loss": 0.5606, "step": 2057 }, { "epoch": 0.08543894882692697, "grad_norm": 2.4438400268554688, "learning_rate": 9.919658232361698e-06, "loss": 0.5312, "step": 2058 }, { "epoch": 0.0854804643511383, "grad_norm": 2.990053415298462, "learning_rate": 9.919538149066658e-06, "loss": 0.5502, "step": 2059 }, { "epoch": 0.08552197987534964, "grad_norm": 2.905545949935913, "learning_rate": 9.91941797682495e-06, "loss": 0.5454, "step": 2060 }, { "epoch": 0.08556349539956097, "grad_norm": 2.800898313522339, "learning_rate": 9.919297715638747e-06, "loss": 0.5325, "step": 2061 }, { "epoch": 0.0856050109237723, "grad_norm": 3.0478618144989014, "learning_rate": 9.919177365510224e-06, "loss": 0.5373, "step": 2062 }, { "epoch": 0.08564652644798364, "grad_norm": 2.203582525253296, "learning_rate": 9.919056926441555e-06, "loss": 0.398, "step": 2063 }, { "epoch": 0.08568804197219498, "grad_norm": 2.896421432495117, "learning_rate": 9.91893639843492e-06, "loss": 0.5497, "step": 2064 }, { "epoch": 0.08572955749640632, "grad_norm": 2.527357816696167, "learning_rate": 9.918815781492497e-06, "loss": 0.4622, "step": 2065 }, { "epoch": 0.08577107302061765, "grad_norm": 2.434882164001465, "learning_rate": 9.918695075616468e-06, "loss": 0.5557, "step": 2066 }, { "epoch": 0.08581258854482898, "grad_norm": 2.8685476779937744, "learning_rate": 9.918574280809012e-06, "loss": 0.526, "step": 2067 }, { "epoch": 0.08585410406904032, "grad_norm": 2.7426774501800537, "learning_rate": 9.918453397072317e-06, "loss": 0.3435, "step": 2068 }, { "epoch": 0.08589561959325165, "grad_norm": 3.2041709423065186, "learning_rate": 9.918332424408566e-06, "loss": 0.4337, "step": 2069 }, { "epoch": 0.08593713511746298, "grad_norm": 3.0900659561157227, "learning_rate": 9.918211362819947e-06, "loss": 0.604, "step": 2070 }, { "epoch": 0.08597865064167431, "grad_norm": 2.6000289916992188, "learning_rate": 9.91809021230865e-06, "loss": 0.5205, "step": 2071 }, { "epoch": 0.08602016616588566, "grad_norm": 2.914365768432617, "learning_rate": 9.917968972876864e-06, "loss": 0.5006, "step": 2072 }, { "epoch": 0.086061681690097, "grad_norm": 2.758763313293457, "learning_rate": 9.91784764452678e-06, "loss": 0.44, "step": 2073 }, { "epoch": 0.08610319721430833, "grad_norm": 2.8739371299743652, "learning_rate": 9.917726227260596e-06, "loss": 0.557, "step": 2074 }, { "epoch": 0.08614471273851966, "grad_norm": 3.1462018489837646, "learning_rate": 9.917604721080504e-06, "loss": 0.5115, "step": 2075 }, { "epoch": 0.086186228262731, "grad_norm": 2.5627896785736084, "learning_rate": 9.9174831259887e-06, "loss": 0.5749, "step": 2076 }, { "epoch": 0.08622774378694233, "grad_norm": 2.9592854976654053, "learning_rate": 9.917361441987383e-06, "loss": 0.3599, "step": 2077 }, { "epoch": 0.08626925931115366, "grad_norm": 2.9686360359191895, "learning_rate": 9.917239669078756e-06, "loss": 0.5165, "step": 2078 }, { "epoch": 0.086310774835365, "grad_norm": 2.655402660369873, "learning_rate": 9.917117807265016e-06, "loss": 0.6598, "step": 2079 }, { "epoch": 0.08635229035957634, "grad_norm": 2.726823091506958, "learning_rate": 9.916995856548371e-06, "loss": 0.5416, "step": 2080 }, { "epoch": 0.08639380588378767, "grad_norm": 3.094644069671631, "learning_rate": 9.916873816931022e-06, "loss": 0.4708, "step": 2081 }, { "epoch": 0.086435321407999, "grad_norm": 2.5992114543914795, "learning_rate": 9.916751688415178e-06, "loss": 0.587, "step": 2082 }, { "epoch": 0.08647683693221034, "grad_norm": 2.329698324203491, "learning_rate": 9.916629471003045e-06, "loss": 0.5294, "step": 2083 }, { "epoch": 0.08651835245642167, "grad_norm": 2.677600860595703, "learning_rate": 9.916507164696835e-06, "loss": 0.6099, "step": 2084 }, { "epoch": 0.086559867980633, "grad_norm": 2.592723846435547, "learning_rate": 9.91638476949876e-06, "loss": 0.4805, "step": 2085 }, { "epoch": 0.08660138350484434, "grad_norm": 2.502376079559326, "learning_rate": 9.91626228541103e-06, "loss": 0.5073, "step": 2086 }, { "epoch": 0.08664289902905568, "grad_norm": 2.6320316791534424, "learning_rate": 9.91613971243586e-06, "loss": 0.4731, "step": 2087 }, { "epoch": 0.08668441455326702, "grad_norm": 3.15401291847229, "learning_rate": 9.916017050575468e-06, "loss": 0.6458, "step": 2088 }, { "epoch": 0.08672593007747835, "grad_norm": 2.3944787979125977, "learning_rate": 9.91589429983207e-06, "loss": 0.5908, "step": 2089 }, { "epoch": 0.08676744560168968, "grad_norm": 2.8485069274902344, "learning_rate": 9.915771460207888e-06, "loss": 0.5479, "step": 2090 }, { "epoch": 0.08680896112590102, "grad_norm": 2.4205923080444336, "learning_rate": 9.91564853170514e-06, "loss": 0.4996, "step": 2091 }, { "epoch": 0.08685047665011235, "grad_norm": 2.3833067417144775, "learning_rate": 9.915525514326048e-06, "loss": 0.4857, "step": 2092 }, { "epoch": 0.08689199217432368, "grad_norm": 2.9593560695648193, "learning_rate": 9.915402408072839e-06, "loss": 0.4606, "step": 2093 }, { "epoch": 0.08693350769853501, "grad_norm": 2.1889235973358154, "learning_rate": 9.91527921294774e-06, "loss": 0.5048, "step": 2094 }, { "epoch": 0.08697502322274636, "grad_norm": 2.6018292903900146, "learning_rate": 9.915155928952973e-06, "loss": 0.5377, "step": 2095 }, { "epoch": 0.0870165387469577, "grad_norm": 2.409663677215576, "learning_rate": 9.91503255609077e-06, "loss": 0.4605, "step": 2096 }, { "epoch": 0.08705805427116903, "grad_norm": 2.635505437850952, "learning_rate": 9.914909094363363e-06, "loss": 0.412, "step": 2097 }, { "epoch": 0.08709956979538036, "grad_norm": 3.672804117202759, "learning_rate": 9.914785543772985e-06, "loss": 0.4741, "step": 2098 }, { "epoch": 0.0871410853195917, "grad_norm": 2.9382898807525635, "learning_rate": 9.914661904321865e-06, "loss": 0.478, "step": 2099 }, { "epoch": 0.08718260084380303, "grad_norm": 2.5699026584625244, "learning_rate": 9.914538176012242e-06, "loss": 0.5142, "step": 2100 }, { "epoch": 0.08722411636801436, "grad_norm": 3.235842227935791, "learning_rate": 9.914414358846351e-06, "loss": 0.5918, "step": 2101 }, { "epoch": 0.08726563189222569, "grad_norm": 2.7280304431915283, "learning_rate": 9.914290452826435e-06, "loss": 0.6115, "step": 2102 }, { "epoch": 0.08730714741643704, "grad_norm": 2.291396379470825, "learning_rate": 9.914166457954729e-06, "loss": 0.5036, "step": 2103 }, { "epoch": 0.08734866294064837, "grad_norm": 2.6719229221343994, "learning_rate": 9.914042374233476e-06, "loss": 0.5939, "step": 2104 }, { "epoch": 0.0873901784648597, "grad_norm": 2.9023778438568115, "learning_rate": 9.913918201664922e-06, "loss": 0.5819, "step": 2105 }, { "epoch": 0.08743169398907104, "grad_norm": 2.7277238368988037, "learning_rate": 9.91379394025131e-06, "loss": 0.5494, "step": 2106 }, { "epoch": 0.08747320951328237, "grad_norm": 2.4311470985412598, "learning_rate": 9.913669589994887e-06, "loss": 0.3707, "step": 2107 }, { "epoch": 0.0875147250374937, "grad_norm": 2.7301454544067383, "learning_rate": 9.913545150897904e-06, "loss": 0.5795, "step": 2108 }, { "epoch": 0.08755624056170504, "grad_norm": 2.134005069732666, "learning_rate": 9.913420622962606e-06, "loss": 0.4527, "step": 2109 }, { "epoch": 0.08759775608591638, "grad_norm": 2.923013210296631, "learning_rate": 9.913296006191246e-06, "loss": 0.605, "step": 2110 }, { "epoch": 0.08763927161012772, "grad_norm": 2.3329977989196777, "learning_rate": 9.913171300586081e-06, "loss": 0.3895, "step": 2111 }, { "epoch": 0.08768078713433905, "grad_norm": 2.5714988708496094, "learning_rate": 9.913046506149361e-06, "loss": 0.4616, "step": 2112 }, { "epoch": 0.08772230265855038, "grad_norm": 2.924259901046753, "learning_rate": 9.912921622883346e-06, "loss": 0.5197, "step": 2113 }, { "epoch": 0.08776381818276172, "grad_norm": 2.7527859210968018, "learning_rate": 9.912796650790289e-06, "loss": 0.4353, "step": 2114 }, { "epoch": 0.08780533370697305, "grad_norm": 2.4916255474090576, "learning_rate": 9.912671589872453e-06, "loss": 0.3351, "step": 2115 }, { "epoch": 0.08784684923118438, "grad_norm": 2.7996411323547363, "learning_rate": 9.9125464401321e-06, "loss": 0.5827, "step": 2116 }, { "epoch": 0.08788836475539571, "grad_norm": 2.2177348136901855, "learning_rate": 9.912421201571491e-06, "loss": 0.411, "step": 2117 }, { "epoch": 0.08792988027960706, "grad_norm": 2.6471681594848633, "learning_rate": 9.91229587419289e-06, "loss": 0.4208, "step": 2118 }, { "epoch": 0.0879713958038184, "grad_norm": 2.647141218185425, "learning_rate": 9.912170457998564e-06, "loss": 0.4957, "step": 2119 }, { "epoch": 0.08801291132802973, "grad_norm": 2.9916467666625977, "learning_rate": 9.912044952990782e-06, "loss": 0.5642, "step": 2120 }, { "epoch": 0.08805442685224106, "grad_norm": 2.616589307785034, "learning_rate": 9.91191935917181e-06, "loss": 0.4514, "step": 2121 }, { "epoch": 0.0880959423764524, "grad_norm": 2.566528558731079, "learning_rate": 9.91179367654392e-06, "loss": 0.4805, "step": 2122 }, { "epoch": 0.08813745790066373, "grad_norm": 2.7063064575195312, "learning_rate": 9.911667905109385e-06, "loss": 0.4871, "step": 2123 }, { "epoch": 0.08817897342487506, "grad_norm": 2.5480051040649414, "learning_rate": 9.91154204487048e-06, "loss": 0.6474, "step": 2124 }, { "epoch": 0.08822048894908639, "grad_norm": 3.1732308864593506, "learning_rate": 9.911416095829477e-06, "loss": 0.6473, "step": 2125 }, { "epoch": 0.08826200447329774, "grad_norm": 2.5543100833892822, "learning_rate": 9.911290057988657e-06, "loss": 0.6725, "step": 2126 }, { "epoch": 0.08830351999750907, "grad_norm": 2.858482599258423, "learning_rate": 9.911163931350296e-06, "loss": 0.7189, "step": 2127 }, { "epoch": 0.0883450355217204, "grad_norm": 2.4583709239959717, "learning_rate": 9.911037715916675e-06, "loss": 0.5469, "step": 2128 }, { "epoch": 0.08838655104593174, "grad_norm": 3.31756329536438, "learning_rate": 9.910911411690078e-06, "loss": 0.5852, "step": 2129 }, { "epoch": 0.08842806657014307, "grad_norm": 2.3180792331695557, "learning_rate": 9.910785018672786e-06, "loss": 0.4884, "step": 2130 }, { "epoch": 0.0884695820943544, "grad_norm": 2.7048730850219727, "learning_rate": 9.910658536867087e-06, "loss": 0.4921, "step": 2131 }, { "epoch": 0.08851109761856574, "grad_norm": 2.2012217044830322, "learning_rate": 9.910531966275265e-06, "loss": 0.4383, "step": 2132 }, { "epoch": 0.08855261314277708, "grad_norm": 2.253305435180664, "learning_rate": 9.91040530689961e-06, "loss": 0.4254, "step": 2133 }, { "epoch": 0.08859412866698842, "grad_norm": 3.5683672428131104, "learning_rate": 9.910278558742411e-06, "loss": 0.6625, "step": 2134 }, { "epoch": 0.08863564419119975, "grad_norm": 2.627969264984131, "learning_rate": 9.910151721805961e-06, "loss": 0.4744, "step": 2135 }, { "epoch": 0.08867715971541108, "grad_norm": 2.1111791133880615, "learning_rate": 9.910024796092552e-06, "loss": 0.5424, "step": 2136 }, { "epoch": 0.08871867523962242, "grad_norm": 3.0536744594573975, "learning_rate": 9.90989778160448e-06, "loss": 0.567, "step": 2137 }, { "epoch": 0.08876019076383375, "grad_norm": 3.874415397644043, "learning_rate": 9.909770678344041e-06, "loss": 0.6644, "step": 2138 }, { "epoch": 0.08880170628804508, "grad_norm": 2.4919965267181396, "learning_rate": 9.909643486313533e-06, "loss": 0.6401, "step": 2139 }, { "epoch": 0.08884322181225641, "grad_norm": 2.4196808338165283, "learning_rate": 9.909516205515258e-06, "loss": 0.4976, "step": 2140 }, { "epoch": 0.08888473733646776, "grad_norm": 2.318814754486084, "learning_rate": 9.909388835951513e-06, "loss": 0.4886, "step": 2141 }, { "epoch": 0.0889262528606791, "grad_norm": 2.638291120529175, "learning_rate": 9.909261377624601e-06, "loss": 0.5987, "step": 2142 }, { "epoch": 0.08896776838489043, "grad_norm": 2.858893394470215, "learning_rate": 9.90913383053683e-06, "loss": 0.5714, "step": 2143 }, { "epoch": 0.08900928390910176, "grad_norm": 2.470836639404297, "learning_rate": 9.909006194690505e-06, "loss": 0.459, "step": 2144 }, { "epoch": 0.0890507994333131, "grad_norm": 2.9557745456695557, "learning_rate": 9.908878470087931e-06, "loss": 0.5172, "step": 2145 }, { "epoch": 0.08909231495752443, "grad_norm": 2.8881492614746094, "learning_rate": 9.908750656731421e-06, "loss": 0.374, "step": 2146 }, { "epoch": 0.08913383048173576, "grad_norm": 2.452699661254883, "learning_rate": 9.908622754623285e-06, "loss": 0.6113, "step": 2147 }, { "epoch": 0.08917534600594709, "grad_norm": 2.7407147884368896, "learning_rate": 9.908494763765833e-06, "loss": 0.5014, "step": 2148 }, { "epoch": 0.08921686153015844, "grad_norm": 2.366473436355591, "learning_rate": 9.908366684161382e-06, "loss": 0.4603, "step": 2149 }, { "epoch": 0.08925837705436977, "grad_norm": 3.059034824371338, "learning_rate": 9.908238515812245e-06, "loss": 0.6243, "step": 2150 }, { "epoch": 0.0892998925785811, "grad_norm": 2.1517117023468018, "learning_rate": 9.908110258720742e-06, "loss": 0.4396, "step": 2151 }, { "epoch": 0.08934140810279244, "grad_norm": 2.7307026386260986, "learning_rate": 9.90798191288919e-06, "loss": 0.5134, "step": 2152 }, { "epoch": 0.08938292362700377, "grad_norm": 2.597836494445801, "learning_rate": 9.907853478319912e-06, "loss": 0.4409, "step": 2153 }, { "epoch": 0.0894244391512151, "grad_norm": 2.6548268795013428, "learning_rate": 9.907724955015227e-06, "loss": 0.5346, "step": 2154 }, { "epoch": 0.08946595467542644, "grad_norm": 2.51560640335083, "learning_rate": 9.907596342977461e-06, "loss": 0.4662, "step": 2155 }, { "epoch": 0.08950747019963777, "grad_norm": 2.9100308418273926, "learning_rate": 9.907467642208938e-06, "loss": 0.5221, "step": 2156 }, { "epoch": 0.08954898572384912, "grad_norm": 2.5898234844207764, "learning_rate": 9.907338852711988e-06, "loss": 0.3535, "step": 2157 }, { "epoch": 0.08959050124806045, "grad_norm": 2.6841793060302734, "learning_rate": 9.907209974488934e-06, "loss": 0.5531, "step": 2158 }, { "epoch": 0.08963201677227178, "grad_norm": 2.6937832832336426, "learning_rate": 9.907081007542112e-06, "loss": 0.5336, "step": 2159 }, { "epoch": 0.08967353229648312, "grad_norm": 2.968397855758667, "learning_rate": 9.906951951873848e-06, "loss": 0.5775, "step": 2160 }, { "epoch": 0.08971504782069445, "grad_norm": 2.848701238632202, "learning_rate": 9.90682280748648e-06, "loss": 0.3691, "step": 2161 }, { "epoch": 0.08975656334490578, "grad_norm": 2.7015182971954346, "learning_rate": 9.906693574382342e-06, "loss": 0.4328, "step": 2162 }, { "epoch": 0.08979807886911712, "grad_norm": 3.187800407409668, "learning_rate": 9.906564252563769e-06, "loss": 0.5073, "step": 2163 }, { "epoch": 0.08983959439332846, "grad_norm": 2.244229793548584, "learning_rate": 9.906434842033102e-06, "loss": 0.6298, "step": 2164 }, { "epoch": 0.0898811099175398, "grad_norm": 2.883744955062866, "learning_rate": 9.906305342792676e-06, "loss": 0.5271, "step": 2165 }, { "epoch": 0.08992262544175113, "grad_norm": 2.855637311935425, "learning_rate": 9.906175754844836e-06, "loss": 0.597, "step": 2166 }, { "epoch": 0.08996414096596246, "grad_norm": 3.0240230560302734, "learning_rate": 9.906046078191924e-06, "loss": 0.6392, "step": 2167 }, { "epoch": 0.0900056564901738, "grad_norm": 2.9307940006256104, "learning_rate": 9.905916312836286e-06, "loss": 0.4143, "step": 2168 }, { "epoch": 0.09004717201438513, "grad_norm": 2.3740131855010986, "learning_rate": 9.905786458780267e-06, "loss": 0.4661, "step": 2169 }, { "epoch": 0.09008868753859646, "grad_norm": 3.0534017086029053, "learning_rate": 9.905656516026214e-06, "loss": 0.529, "step": 2170 }, { "epoch": 0.09013020306280779, "grad_norm": 2.4715192317962646, "learning_rate": 9.905526484576478e-06, "loss": 0.4647, "step": 2171 }, { "epoch": 0.09017171858701914, "grad_norm": 3.3726580142974854, "learning_rate": 9.905396364433409e-06, "loss": 0.4202, "step": 2172 }, { "epoch": 0.09021323411123047, "grad_norm": 2.9461584091186523, "learning_rate": 9.90526615559936e-06, "loss": 0.4264, "step": 2173 }, { "epoch": 0.0902547496354418, "grad_norm": 2.6940784454345703, "learning_rate": 9.905135858076685e-06, "loss": 0.5429, "step": 2174 }, { "epoch": 0.09029626515965314, "grad_norm": 2.5397090911865234, "learning_rate": 9.90500547186774e-06, "loss": 0.4778, "step": 2175 }, { "epoch": 0.09033778068386447, "grad_norm": 2.470494508743286, "learning_rate": 9.90487499697488e-06, "loss": 0.7038, "step": 2176 }, { "epoch": 0.0903792962080758, "grad_norm": 2.5945756435394287, "learning_rate": 9.904744433400468e-06, "loss": 0.5219, "step": 2177 }, { "epoch": 0.09042081173228714, "grad_norm": 2.8230979442596436, "learning_rate": 9.904613781146863e-06, "loss": 0.5584, "step": 2178 }, { "epoch": 0.09046232725649847, "grad_norm": 2.7029385566711426, "learning_rate": 9.904483040216428e-06, "loss": 0.4328, "step": 2179 }, { "epoch": 0.09050384278070982, "grad_norm": 2.4445290565490723, "learning_rate": 9.904352210611525e-06, "loss": 0.6437, "step": 2180 }, { "epoch": 0.09054535830492115, "grad_norm": 2.3861441612243652, "learning_rate": 9.904221292334521e-06, "loss": 0.503, "step": 2181 }, { "epoch": 0.09058687382913248, "grad_norm": 2.928870916366577, "learning_rate": 9.904090285387782e-06, "loss": 0.5559, "step": 2182 }, { "epoch": 0.09062838935334382, "grad_norm": 2.7447893619537354, "learning_rate": 9.903959189773678e-06, "loss": 0.4817, "step": 2183 }, { "epoch": 0.09066990487755515, "grad_norm": 4.137704849243164, "learning_rate": 9.903828005494577e-06, "loss": 0.5707, "step": 2184 }, { "epoch": 0.09071142040176648, "grad_norm": 2.1478052139282227, "learning_rate": 9.903696732552853e-06, "loss": 0.4646, "step": 2185 }, { "epoch": 0.09075293592597782, "grad_norm": 2.6803836822509766, "learning_rate": 9.903565370950878e-06, "loss": 0.5862, "step": 2186 }, { "epoch": 0.09079445145018916, "grad_norm": 2.420983076095581, "learning_rate": 9.903433920691028e-06, "loss": 0.5428, "step": 2187 }, { "epoch": 0.0908359669744005, "grad_norm": 2.707629919052124, "learning_rate": 9.903302381775682e-06, "loss": 0.5135, "step": 2188 }, { "epoch": 0.09087748249861183, "grad_norm": 2.4065802097320557, "learning_rate": 9.903170754207211e-06, "loss": 0.5016, "step": 2189 }, { "epoch": 0.09091899802282316, "grad_norm": 2.4015932083129883, "learning_rate": 9.903039037988003e-06, "loss": 0.5442, "step": 2190 }, { "epoch": 0.0909605135470345, "grad_norm": 2.582972764968872, "learning_rate": 9.902907233120435e-06, "loss": 0.5286, "step": 2191 }, { "epoch": 0.09100202907124583, "grad_norm": 3.547797441482544, "learning_rate": 9.902775339606892e-06, "loss": 0.5223, "step": 2192 }, { "epoch": 0.09104354459545716, "grad_norm": 3.478764772415161, "learning_rate": 9.902643357449755e-06, "loss": 0.5922, "step": 2193 }, { "epoch": 0.09108506011966849, "grad_norm": 2.4797985553741455, "learning_rate": 9.902511286651415e-06, "loss": 0.4508, "step": 2194 }, { "epoch": 0.09112657564387984, "grad_norm": 2.686075210571289, "learning_rate": 9.902379127214258e-06, "loss": 0.5827, "step": 2195 }, { "epoch": 0.09116809116809117, "grad_norm": 2.8064181804656982, "learning_rate": 9.902246879140674e-06, "loss": 0.4638, "step": 2196 }, { "epoch": 0.0912096066923025, "grad_norm": 2.901331901550293, "learning_rate": 9.902114542433052e-06, "loss": 0.4765, "step": 2197 }, { "epoch": 0.09125112221651384, "grad_norm": 2.5520739555358887, "learning_rate": 9.901982117093786e-06, "loss": 0.5271, "step": 2198 }, { "epoch": 0.09129263774072517, "grad_norm": 3.355832576751709, "learning_rate": 9.901849603125271e-06, "loss": 0.4862, "step": 2199 }, { "epoch": 0.0913341532649365, "grad_norm": 2.637272596359253, "learning_rate": 9.901717000529902e-06, "loss": 0.5619, "step": 2200 }, { "epoch": 0.09137566878914784, "grad_norm": 3.0847790241241455, "learning_rate": 9.901584309310078e-06, "loss": 0.5813, "step": 2201 }, { "epoch": 0.09141718431335917, "grad_norm": 3.364659547805786, "learning_rate": 9.901451529468196e-06, "loss": 0.6634, "step": 2202 }, { "epoch": 0.09145869983757052, "grad_norm": 2.9363582134246826, "learning_rate": 9.901318661006657e-06, "loss": 0.6014, "step": 2203 }, { "epoch": 0.09150021536178185, "grad_norm": 3.0357773303985596, "learning_rate": 9.901185703927864e-06, "loss": 0.4717, "step": 2204 }, { "epoch": 0.09154173088599318, "grad_norm": 2.7435901165008545, "learning_rate": 9.90105265823422e-06, "loss": 0.5561, "step": 2205 }, { "epoch": 0.09158324641020452, "grad_norm": 2.5295262336730957, "learning_rate": 9.900919523928134e-06, "loss": 0.5964, "step": 2206 }, { "epoch": 0.09162476193441585, "grad_norm": 3.447384834289551, "learning_rate": 9.900786301012008e-06, "loss": 0.4765, "step": 2207 }, { "epoch": 0.09166627745862718, "grad_norm": 2.76142954826355, "learning_rate": 9.900652989488255e-06, "loss": 0.6712, "step": 2208 }, { "epoch": 0.09170779298283852, "grad_norm": 2.657634735107422, "learning_rate": 9.900519589359284e-06, "loss": 0.4853, "step": 2209 }, { "epoch": 0.09174930850704985, "grad_norm": 4.5143256187438965, "learning_rate": 9.900386100627503e-06, "loss": 0.4771, "step": 2210 }, { "epoch": 0.0917908240312612, "grad_norm": 2.4957680702209473, "learning_rate": 9.900252523295331e-06, "loss": 0.5738, "step": 2211 }, { "epoch": 0.09183233955547253, "grad_norm": 2.3263211250305176, "learning_rate": 9.90011885736518e-06, "loss": 0.5933, "step": 2212 }, { "epoch": 0.09187385507968386, "grad_norm": 2.4587624073028564, "learning_rate": 9.899985102839469e-06, "loss": 0.5416, "step": 2213 }, { "epoch": 0.0919153706038952, "grad_norm": 3.148601770401001, "learning_rate": 9.899851259720614e-06, "loss": 0.5992, "step": 2214 }, { "epoch": 0.09195688612810653, "grad_norm": 2.326841354370117, "learning_rate": 9.899717328011036e-06, "loss": 0.527, "step": 2215 }, { "epoch": 0.09199840165231786, "grad_norm": 2.66778302192688, "learning_rate": 9.899583307713157e-06, "loss": 0.5255, "step": 2216 }, { "epoch": 0.09203991717652919, "grad_norm": 2.344637155532837, "learning_rate": 9.8994491988294e-06, "loss": 0.5856, "step": 2217 }, { "epoch": 0.09208143270074054, "grad_norm": 2.4109370708465576, "learning_rate": 9.899315001362188e-06, "loss": 0.5147, "step": 2218 }, { "epoch": 0.09212294822495187, "grad_norm": 2.7842373847961426, "learning_rate": 9.899180715313949e-06, "loss": 0.4888, "step": 2219 }, { "epoch": 0.0921644637491632, "grad_norm": 3.8186793327331543, "learning_rate": 9.899046340687111e-06, "loss": 0.6258, "step": 2220 }, { "epoch": 0.09220597927337454, "grad_norm": 2.687680244445801, "learning_rate": 9.898911877484103e-06, "loss": 0.5241, "step": 2221 }, { "epoch": 0.09224749479758587, "grad_norm": 2.4252512454986572, "learning_rate": 9.898777325707358e-06, "loss": 0.4512, "step": 2222 }, { "epoch": 0.0922890103217972, "grad_norm": 2.948518991470337, "learning_rate": 9.898642685359305e-06, "loss": 0.5035, "step": 2223 }, { "epoch": 0.09233052584600854, "grad_norm": 2.53312087059021, "learning_rate": 9.89850795644238e-06, "loss": 0.6141, "step": 2224 }, { "epoch": 0.09237204137021987, "grad_norm": 2.3007254600524902, "learning_rate": 9.898373138959019e-06, "loss": 0.4838, "step": 2225 }, { "epoch": 0.09241355689443122, "grad_norm": 2.8342745304107666, "learning_rate": 9.898238232911662e-06, "loss": 0.5017, "step": 2226 }, { "epoch": 0.09245507241864255, "grad_norm": 3.445845365524292, "learning_rate": 9.898103238302743e-06, "loss": 0.6387, "step": 2227 }, { "epoch": 0.09249658794285388, "grad_norm": 2.8888132572174072, "learning_rate": 9.897968155134708e-06, "loss": 0.6948, "step": 2228 }, { "epoch": 0.09253810346706522, "grad_norm": 2.5751540660858154, "learning_rate": 9.897832983409996e-06, "loss": 0.3731, "step": 2229 }, { "epoch": 0.09257961899127655, "grad_norm": 3.23237943649292, "learning_rate": 9.897697723131052e-06, "loss": 0.4712, "step": 2230 }, { "epoch": 0.09262113451548788, "grad_norm": 2.24954891204834, "learning_rate": 9.897562374300321e-06, "loss": 0.457, "step": 2231 }, { "epoch": 0.09266265003969922, "grad_norm": 3.465200424194336, "learning_rate": 9.897426936920252e-06, "loss": 0.4825, "step": 2232 }, { "epoch": 0.09270416556391055, "grad_norm": 2.530299425125122, "learning_rate": 9.897291410993291e-06, "loss": 0.6237, "step": 2233 }, { "epoch": 0.0927456810881219, "grad_norm": 2.3004679679870605, "learning_rate": 9.897155796521892e-06, "loss": 0.497, "step": 2234 }, { "epoch": 0.09278719661233323, "grad_norm": 2.202364921569824, "learning_rate": 9.897020093508502e-06, "loss": 0.5307, "step": 2235 }, { "epoch": 0.09282871213654456, "grad_norm": 2.7131123542785645, "learning_rate": 9.896884301955578e-06, "loss": 0.4256, "step": 2236 }, { "epoch": 0.0928702276607559, "grad_norm": 2.646132469177246, "learning_rate": 9.896748421865573e-06, "loss": 0.41, "step": 2237 }, { "epoch": 0.09291174318496723, "grad_norm": 2.6660635471343994, "learning_rate": 9.896612453240947e-06, "loss": 0.5108, "step": 2238 }, { "epoch": 0.09295325870917856, "grad_norm": 2.400125741958618, "learning_rate": 9.896476396084158e-06, "loss": 0.5144, "step": 2239 }, { "epoch": 0.09299477423338989, "grad_norm": 3.0478200912475586, "learning_rate": 9.896340250397662e-06, "loss": 0.5883, "step": 2240 }, { "epoch": 0.09303628975760124, "grad_norm": 3.2439181804656982, "learning_rate": 9.896204016183924e-06, "loss": 0.6405, "step": 2241 }, { "epoch": 0.09307780528181257, "grad_norm": 2.880051374435425, "learning_rate": 9.896067693445406e-06, "loss": 0.4841, "step": 2242 }, { "epoch": 0.0931193208060239, "grad_norm": 2.801706552505493, "learning_rate": 9.895931282184574e-06, "loss": 0.6122, "step": 2243 }, { "epoch": 0.09316083633023524, "grad_norm": 2.6609644889831543, "learning_rate": 9.89579478240389e-06, "loss": 0.631, "step": 2244 }, { "epoch": 0.09320235185444657, "grad_norm": 2.9119250774383545, "learning_rate": 9.895658194105829e-06, "loss": 0.6567, "step": 2245 }, { "epoch": 0.0932438673786579, "grad_norm": 3.2250704765319824, "learning_rate": 9.895521517292854e-06, "loss": 0.4892, "step": 2246 }, { "epoch": 0.09328538290286924, "grad_norm": 3.765510082244873, "learning_rate": 9.89538475196744e-06, "loss": 0.4377, "step": 2247 }, { "epoch": 0.09332689842708057, "grad_norm": 2.340522527694702, "learning_rate": 9.895247898132058e-06, "loss": 0.4462, "step": 2248 }, { "epoch": 0.09336841395129192, "grad_norm": 2.2617669105529785, "learning_rate": 9.895110955789184e-06, "loss": 0.5551, "step": 2249 }, { "epoch": 0.09340992947550325, "grad_norm": 2.941365957260132, "learning_rate": 9.894973924941293e-06, "loss": 0.5809, "step": 2250 }, { "epoch": 0.09345144499971458, "grad_norm": 3.298083543777466, "learning_rate": 9.894836805590863e-06, "loss": 0.3836, "step": 2251 }, { "epoch": 0.09349296052392592, "grad_norm": 2.888561964035034, "learning_rate": 9.89469959774037e-06, "loss": 0.4771, "step": 2252 }, { "epoch": 0.09353447604813725, "grad_norm": 2.9564871788024902, "learning_rate": 9.894562301392301e-06, "loss": 0.6247, "step": 2253 }, { "epoch": 0.09357599157234858, "grad_norm": 3.1122078895568848, "learning_rate": 9.894424916549133e-06, "loss": 0.6068, "step": 2254 }, { "epoch": 0.09361750709655992, "grad_norm": 2.83252215385437, "learning_rate": 9.894287443213353e-06, "loss": 0.4831, "step": 2255 }, { "epoch": 0.09365902262077125, "grad_norm": 2.1972451210021973, "learning_rate": 9.894149881387443e-06, "loss": 0.5324, "step": 2256 }, { "epoch": 0.0937005381449826, "grad_norm": 2.7438242435455322, "learning_rate": 9.894012231073895e-06, "loss": 0.5319, "step": 2257 }, { "epoch": 0.09374205366919393, "grad_norm": 2.9767751693725586, "learning_rate": 9.893874492275192e-06, "loss": 0.5468, "step": 2258 }, { "epoch": 0.09378356919340526, "grad_norm": 2.2056708335876465, "learning_rate": 9.893736664993832e-06, "loss": 0.4465, "step": 2259 }, { "epoch": 0.0938250847176166, "grad_norm": 2.917665958404541, "learning_rate": 9.8935987492323e-06, "loss": 0.6365, "step": 2260 }, { "epoch": 0.09386660024182793, "grad_norm": 3.8816096782684326, "learning_rate": 9.893460744993092e-06, "loss": 0.5612, "step": 2261 }, { "epoch": 0.09390811576603926, "grad_norm": 2.9469985961914062, "learning_rate": 9.893322652278705e-06, "loss": 0.6371, "step": 2262 }, { "epoch": 0.0939496312902506, "grad_norm": 2.534605026245117, "learning_rate": 9.893184471091631e-06, "loss": 0.5069, "step": 2263 }, { "epoch": 0.09399114681446193, "grad_norm": 2.8383102416992188, "learning_rate": 9.893046201434373e-06, "loss": 0.4667, "step": 2264 }, { "epoch": 0.09403266233867327, "grad_norm": 2.7596375942230225, "learning_rate": 9.89290784330943e-06, "loss": 0.5956, "step": 2265 }, { "epoch": 0.0940741778628846, "grad_norm": 2.491428852081299, "learning_rate": 9.892769396719302e-06, "loss": 0.5906, "step": 2266 }, { "epoch": 0.09411569338709594, "grad_norm": 3.0061912536621094, "learning_rate": 9.892630861666495e-06, "loss": 0.6464, "step": 2267 }, { "epoch": 0.09415720891130727, "grad_norm": 2.717750072479248, "learning_rate": 9.89249223815351e-06, "loss": 0.5573, "step": 2268 }, { "epoch": 0.0941987244355186, "grad_norm": 2.443984031677246, "learning_rate": 9.892353526182854e-06, "loss": 0.5009, "step": 2269 }, { "epoch": 0.09424023995972994, "grad_norm": 2.220763921737671, "learning_rate": 9.892214725757038e-06, "loss": 0.5154, "step": 2270 }, { "epoch": 0.09428175548394127, "grad_norm": 3.081686019897461, "learning_rate": 9.89207583687857e-06, "loss": 0.5114, "step": 2271 }, { "epoch": 0.09432327100815262, "grad_norm": 2.9070065021514893, "learning_rate": 9.891936859549959e-06, "loss": 0.6108, "step": 2272 }, { "epoch": 0.09436478653236395, "grad_norm": 2.178736925125122, "learning_rate": 9.89179779377372e-06, "loss": 0.5574, "step": 2273 }, { "epoch": 0.09440630205657528, "grad_norm": 2.5278732776641846, "learning_rate": 9.891658639552368e-06, "loss": 0.4452, "step": 2274 }, { "epoch": 0.09444781758078662, "grad_norm": 3.0041775703430176, "learning_rate": 9.891519396888418e-06, "loss": 0.5518, "step": 2275 }, { "epoch": 0.09448933310499795, "grad_norm": 2.2278528213500977, "learning_rate": 9.891380065784387e-06, "loss": 0.5192, "step": 2276 }, { "epoch": 0.09453084862920928, "grad_norm": 2.1342804431915283, "learning_rate": 9.891240646242795e-06, "loss": 0.5188, "step": 2277 }, { "epoch": 0.09457236415342062, "grad_norm": 2.7138631343841553, "learning_rate": 9.891101138266162e-06, "loss": 0.4477, "step": 2278 }, { "epoch": 0.09461387967763195, "grad_norm": 2.712834119796753, "learning_rate": 9.89096154185701e-06, "loss": 0.4532, "step": 2279 }, { "epoch": 0.0946553952018433, "grad_norm": 3.053663492202759, "learning_rate": 9.890821857017862e-06, "loss": 0.6437, "step": 2280 }, { "epoch": 0.09469691072605463, "grad_norm": 3.0248005390167236, "learning_rate": 9.890682083751249e-06, "loss": 0.5959, "step": 2281 }, { "epoch": 0.09473842625026596, "grad_norm": 3.0594711303710938, "learning_rate": 9.890542222059692e-06, "loss": 0.6048, "step": 2282 }, { "epoch": 0.0947799417744773, "grad_norm": 2.8660736083984375, "learning_rate": 9.890402271945723e-06, "loss": 0.5531, "step": 2283 }, { "epoch": 0.09482145729868863, "grad_norm": 2.834289073944092, "learning_rate": 9.890262233411869e-06, "loss": 0.5483, "step": 2284 }, { "epoch": 0.09486297282289996, "grad_norm": 3.5039162635803223, "learning_rate": 9.890122106460666e-06, "loss": 0.4611, "step": 2285 }, { "epoch": 0.0949044883471113, "grad_norm": 2.235024929046631, "learning_rate": 9.889981891094647e-06, "loss": 0.5294, "step": 2286 }, { "epoch": 0.09494600387132263, "grad_norm": 3.059877872467041, "learning_rate": 9.889841587316344e-06, "loss": 0.4982, "step": 2287 }, { "epoch": 0.09498751939553397, "grad_norm": 2.627592086791992, "learning_rate": 9.889701195128297e-06, "loss": 0.5871, "step": 2288 }, { "epoch": 0.0950290349197453, "grad_norm": 2.9322237968444824, "learning_rate": 9.889560714533043e-06, "loss": 0.5893, "step": 2289 }, { "epoch": 0.09507055044395664, "grad_norm": 2.2580857276916504, "learning_rate": 9.889420145533122e-06, "loss": 0.5443, "step": 2290 }, { "epoch": 0.09511206596816797, "grad_norm": 3.390927314758301, "learning_rate": 9.889279488131074e-06, "loss": 0.5677, "step": 2291 }, { "epoch": 0.0951535814923793, "grad_norm": 3.0619068145751953, "learning_rate": 9.889138742329445e-06, "loss": 0.6083, "step": 2292 }, { "epoch": 0.09519509701659064, "grad_norm": 2.594783306121826, "learning_rate": 9.888997908130777e-06, "loss": 0.4907, "step": 2293 }, { "epoch": 0.09523661254080197, "grad_norm": 2.9194889068603516, "learning_rate": 9.888856985537619e-06, "loss": 0.4314, "step": 2294 }, { "epoch": 0.09527812806501332, "grad_norm": 2.614504337310791, "learning_rate": 9.88871597455252e-06, "loss": 0.6295, "step": 2295 }, { "epoch": 0.09531964358922465, "grad_norm": 2.761096715927124, "learning_rate": 9.888574875178022e-06, "loss": 0.5223, "step": 2296 }, { "epoch": 0.09536115911343598, "grad_norm": 2.084444284439087, "learning_rate": 9.888433687416683e-06, "loss": 0.4052, "step": 2297 }, { "epoch": 0.09540267463764732, "grad_norm": 2.407268524169922, "learning_rate": 9.888292411271055e-06, "loss": 0.5539, "step": 2298 }, { "epoch": 0.09544419016185865, "grad_norm": 2.24179744720459, "learning_rate": 9.88815104674369e-06, "loss": 0.5345, "step": 2299 }, { "epoch": 0.09548570568606998, "grad_norm": 2.548844337463379, "learning_rate": 9.888009593837146e-06, "loss": 0.5748, "step": 2300 }, { "epoch": 0.09552722121028132, "grad_norm": 2.764214277267456, "learning_rate": 9.88786805255398e-06, "loss": 0.4761, "step": 2301 }, { "epoch": 0.09556873673449265, "grad_norm": 2.679675579071045, "learning_rate": 9.88772642289675e-06, "loss": 0.4451, "step": 2302 }, { "epoch": 0.095610252258704, "grad_norm": 1.8724333047866821, "learning_rate": 9.887584704868018e-06, "loss": 0.4705, "step": 2303 }, { "epoch": 0.09565176778291533, "grad_norm": 2.2613868713378906, "learning_rate": 9.887442898470345e-06, "loss": 0.422, "step": 2304 }, { "epoch": 0.09569328330712666, "grad_norm": 2.9417569637298584, "learning_rate": 9.887301003706298e-06, "loss": 0.6776, "step": 2305 }, { "epoch": 0.095734798831338, "grad_norm": 3.2091851234436035, "learning_rate": 9.887159020578437e-06, "loss": 0.6788, "step": 2306 }, { "epoch": 0.09577631435554933, "grad_norm": 2.1563072204589844, "learning_rate": 9.887016949089334e-06, "loss": 0.4092, "step": 2307 }, { "epoch": 0.09581782987976066, "grad_norm": 3.2718985080718994, "learning_rate": 9.886874789241557e-06, "loss": 0.6799, "step": 2308 }, { "epoch": 0.095859345403972, "grad_norm": 3.0761215686798096, "learning_rate": 9.886732541037673e-06, "loss": 0.3842, "step": 2309 }, { "epoch": 0.09590086092818333, "grad_norm": 2.6480023860931396, "learning_rate": 9.886590204480257e-06, "loss": 0.4238, "step": 2310 }, { "epoch": 0.09594237645239467, "grad_norm": 2.988715887069702, "learning_rate": 9.886447779571882e-06, "loss": 0.5685, "step": 2311 }, { "epoch": 0.095983891976606, "grad_norm": 2.661764144897461, "learning_rate": 9.886305266315122e-06, "loss": 0.602, "step": 2312 }, { "epoch": 0.09602540750081734, "grad_norm": 3.1732802391052246, "learning_rate": 9.886162664712556e-06, "loss": 0.4395, "step": 2313 }, { "epoch": 0.09606692302502867, "grad_norm": 3.1882143020629883, "learning_rate": 9.886019974766758e-06, "loss": 0.5517, "step": 2314 }, { "epoch": 0.09610843854924, "grad_norm": 2.917198419570923, "learning_rate": 9.885877196480312e-06, "loss": 0.5658, "step": 2315 }, { "epoch": 0.09614995407345134, "grad_norm": 3.1925957202911377, "learning_rate": 9.885734329855798e-06, "loss": 0.4394, "step": 2316 }, { "epoch": 0.09619146959766267, "grad_norm": 2.8691210746765137, "learning_rate": 9.8855913748958e-06, "loss": 0.5483, "step": 2317 }, { "epoch": 0.096232985121874, "grad_norm": 2.6680221557617188, "learning_rate": 9.8854483316029e-06, "loss": 0.6311, "step": 2318 }, { "epoch": 0.09627450064608535, "grad_norm": 3.1388702392578125, "learning_rate": 9.885305199979687e-06, "loss": 0.5682, "step": 2319 }, { "epoch": 0.09631601617029668, "grad_norm": 2.809584140777588, "learning_rate": 9.885161980028748e-06, "loss": 0.5374, "step": 2320 }, { "epoch": 0.09635753169450802, "grad_norm": 4.317324161529541, "learning_rate": 9.885018671752673e-06, "loss": 0.6805, "step": 2321 }, { "epoch": 0.09639904721871935, "grad_norm": 2.547283887863159, "learning_rate": 9.884875275154051e-06, "loss": 0.3559, "step": 2322 }, { "epoch": 0.09644056274293068, "grad_norm": 3.163609743118286, "learning_rate": 9.884731790235477e-06, "loss": 0.5185, "step": 2323 }, { "epoch": 0.09648207826714202, "grad_norm": 3.0820469856262207, "learning_rate": 9.884588216999544e-06, "loss": 0.5299, "step": 2324 }, { "epoch": 0.09652359379135335, "grad_norm": 3.1675782203674316, "learning_rate": 9.884444555448848e-06, "loss": 0.6261, "step": 2325 }, { "epoch": 0.0965651093155647, "grad_norm": 2.6680331230163574, "learning_rate": 9.884300805585987e-06, "loss": 0.5848, "step": 2326 }, { "epoch": 0.09660662483977603, "grad_norm": 2.6566877365112305, "learning_rate": 9.884156967413558e-06, "loss": 0.4402, "step": 2327 }, { "epoch": 0.09664814036398736, "grad_norm": 2.8909499645233154, "learning_rate": 9.884013040934166e-06, "loss": 0.6042, "step": 2328 }, { "epoch": 0.0966896558881987, "grad_norm": 2.2763543128967285, "learning_rate": 9.883869026150408e-06, "loss": 0.4921, "step": 2329 }, { "epoch": 0.09673117141241003, "grad_norm": 2.7306222915649414, "learning_rate": 9.883724923064892e-06, "loss": 0.4904, "step": 2330 }, { "epoch": 0.09677268693662136, "grad_norm": 2.658944845199585, "learning_rate": 9.883580731680221e-06, "loss": 0.4481, "step": 2331 }, { "epoch": 0.0968142024608327, "grad_norm": 2.900400161743164, "learning_rate": 9.883436451999003e-06, "loss": 0.6539, "step": 2332 }, { "epoch": 0.09685571798504403, "grad_norm": 2.9953744411468506, "learning_rate": 9.883292084023846e-06, "loss": 0.5901, "step": 2333 }, { "epoch": 0.09689723350925537, "grad_norm": 3.2986793518066406, "learning_rate": 9.883147627757361e-06, "loss": 0.4718, "step": 2334 }, { "epoch": 0.0969387490334667, "grad_norm": 2.574697732925415, "learning_rate": 9.883003083202161e-06, "loss": 0.4726, "step": 2335 }, { "epoch": 0.09698026455767804, "grad_norm": 3.143345355987549, "learning_rate": 9.882858450360855e-06, "loss": 0.5128, "step": 2336 }, { "epoch": 0.09702178008188937, "grad_norm": 2.450073003768921, "learning_rate": 9.882713729236062e-06, "loss": 0.4615, "step": 2337 }, { "epoch": 0.0970632956061007, "grad_norm": 2.8823678493499756, "learning_rate": 9.882568919830398e-06, "loss": 0.5601, "step": 2338 }, { "epoch": 0.09710481113031204, "grad_norm": 2.826274871826172, "learning_rate": 9.882424022146479e-06, "loss": 0.7057, "step": 2339 }, { "epoch": 0.09714632665452337, "grad_norm": 2.5406997203826904, "learning_rate": 9.882279036186927e-06, "loss": 0.4988, "step": 2340 }, { "epoch": 0.0971878421787347, "grad_norm": 2.7654030323028564, "learning_rate": 9.882133961954364e-06, "loss": 0.5372, "step": 2341 }, { "epoch": 0.09722935770294605, "grad_norm": 2.4097697734832764, "learning_rate": 9.881988799451412e-06, "loss": 0.4594, "step": 2342 }, { "epoch": 0.09727087322715738, "grad_norm": 2.5843918323516846, "learning_rate": 9.881843548680694e-06, "loss": 0.5056, "step": 2343 }, { "epoch": 0.09731238875136872, "grad_norm": 2.5169646739959717, "learning_rate": 9.881698209644837e-06, "loss": 0.5352, "step": 2344 }, { "epoch": 0.09735390427558005, "grad_norm": 2.724215269088745, "learning_rate": 9.88155278234647e-06, "loss": 0.4424, "step": 2345 }, { "epoch": 0.09739541979979138, "grad_norm": 3.1677889823913574, "learning_rate": 9.881407266788223e-06, "loss": 0.6677, "step": 2346 }, { "epoch": 0.09743693532400272, "grad_norm": 2.7475342750549316, "learning_rate": 9.881261662972725e-06, "loss": 0.4874, "step": 2347 }, { "epoch": 0.09747845084821405, "grad_norm": 2.789746046066284, "learning_rate": 9.881115970902609e-06, "loss": 0.546, "step": 2348 }, { "epoch": 0.0975199663724254, "grad_norm": 2.713017463684082, "learning_rate": 9.880970190580508e-06, "loss": 0.4935, "step": 2349 }, { "epoch": 0.09756148189663673, "grad_norm": 3.180522918701172, "learning_rate": 9.880824322009061e-06, "loss": 0.5609, "step": 2350 }, { "epoch": 0.09760299742084806, "grad_norm": 2.415286064147949, "learning_rate": 9.880678365190903e-06, "loss": 0.58, "step": 2351 }, { "epoch": 0.0976445129450594, "grad_norm": 2.65511155128479, "learning_rate": 9.880532320128673e-06, "loss": 0.4989, "step": 2352 }, { "epoch": 0.09768602846927073, "grad_norm": 3.1623289585113525, "learning_rate": 9.880386186825012e-06, "loss": 0.5509, "step": 2353 }, { "epoch": 0.09772754399348206, "grad_norm": 3.084972620010376, "learning_rate": 9.88023996528256e-06, "loss": 0.5184, "step": 2354 }, { "epoch": 0.0977690595176934, "grad_norm": 2.6378390789031982, "learning_rate": 9.880093655503966e-06, "loss": 0.5727, "step": 2355 }, { "epoch": 0.09781057504190473, "grad_norm": 2.8607401847839355, "learning_rate": 9.87994725749187e-06, "loss": 0.4987, "step": 2356 }, { "epoch": 0.09785209056611607, "grad_norm": 2.8769357204437256, "learning_rate": 9.879800771248925e-06, "loss": 0.383, "step": 2357 }, { "epoch": 0.0978936060903274, "grad_norm": 2.833045721054077, "learning_rate": 9.87965419677777e-06, "loss": 0.4554, "step": 2358 }, { "epoch": 0.09793512161453874, "grad_norm": 3.235056161880493, "learning_rate": 9.879507534081065e-06, "loss": 0.5972, "step": 2359 }, { "epoch": 0.09797663713875007, "grad_norm": 3.500980854034424, "learning_rate": 9.879360783161455e-06, "loss": 0.5035, "step": 2360 }, { "epoch": 0.0980181526629614, "grad_norm": 3.203761100769043, "learning_rate": 9.879213944021597e-06, "loss": 0.7124, "step": 2361 }, { "epoch": 0.09805966818717274, "grad_norm": 2.3103771209716797, "learning_rate": 9.879067016664143e-06, "loss": 0.4762, "step": 2362 }, { "epoch": 0.09810118371138407, "grad_norm": 3.2313199043273926, "learning_rate": 9.878920001091752e-06, "loss": 0.4971, "step": 2363 }, { "epoch": 0.0981426992355954, "grad_norm": 3.047945022583008, "learning_rate": 9.878772897307081e-06, "loss": 0.4455, "step": 2364 }, { "epoch": 0.09818421475980675, "grad_norm": 2.653528928756714, "learning_rate": 9.878625705312789e-06, "loss": 0.5644, "step": 2365 }, { "epoch": 0.09822573028401808, "grad_norm": 2.859612226486206, "learning_rate": 9.87847842511154e-06, "loss": 0.5161, "step": 2366 }, { "epoch": 0.09826724580822942, "grad_norm": 2.961092710494995, "learning_rate": 9.878331056705993e-06, "loss": 0.5396, "step": 2367 }, { "epoch": 0.09830876133244075, "grad_norm": 2.881798028945923, "learning_rate": 9.878183600098814e-06, "loss": 0.4994, "step": 2368 }, { "epoch": 0.09835027685665208, "grad_norm": 3.103361129760742, "learning_rate": 9.878036055292672e-06, "loss": 0.48, "step": 2369 }, { "epoch": 0.09839179238086342, "grad_norm": 2.528876781463623, "learning_rate": 9.877888422290232e-06, "loss": 0.6049, "step": 2370 }, { "epoch": 0.09843330790507475, "grad_norm": 2.8935556411743164, "learning_rate": 9.877740701094161e-06, "loss": 0.5986, "step": 2371 }, { "epoch": 0.09847482342928608, "grad_norm": 3.212444305419922, "learning_rate": 9.877592891707133e-06, "loss": 0.6839, "step": 2372 }, { "epoch": 0.09851633895349743, "grad_norm": 3.2017555236816406, "learning_rate": 9.87744499413182e-06, "loss": 0.517, "step": 2373 }, { "epoch": 0.09855785447770876, "grad_norm": 2.454819679260254, "learning_rate": 9.877297008370896e-06, "loss": 0.381, "step": 2374 }, { "epoch": 0.0985993700019201, "grad_norm": 2.6043198108673096, "learning_rate": 9.877148934427037e-06, "loss": 0.582, "step": 2375 }, { "epoch": 0.09864088552613143, "grad_norm": 2.341460943222046, "learning_rate": 9.877000772302919e-06, "loss": 0.4772, "step": 2376 }, { "epoch": 0.09868240105034276, "grad_norm": 2.8875532150268555, "learning_rate": 9.876852522001219e-06, "loss": 0.4571, "step": 2377 }, { "epoch": 0.0987239165745541, "grad_norm": 2.5486204624176025, "learning_rate": 9.876704183524622e-06, "loss": 0.5518, "step": 2378 }, { "epoch": 0.09876543209876543, "grad_norm": 3.194272518157959, "learning_rate": 9.876555756875807e-06, "loss": 0.6684, "step": 2379 }, { "epoch": 0.09880694762297677, "grad_norm": 3.2279112339019775, "learning_rate": 9.876407242057459e-06, "loss": 0.6957, "step": 2380 }, { "epoch": 0.0988484631471881, "grad_norm": 2.8424456119537354, "learning_rate": 9.876258639072263e-06, "loss": 0.5527, "step": 2381 }, { "epoch": 0.09888997867139944, "grad_norm": 2.2806427478790283, "learning_rate": 9.876109947922904e-06, "loss": 0.515, "step": 2382 }, { "epoch": 0.09893149419561077, "grad_norm": 2.5626490116119385, "learning_rate": 9.87596116861207e-06, "loss": 0.4961, "step": 2383 }, { "epoch": 0.0989730097198221, "grad_norm": 2.47599720954895, "learning_rate": 9.875812301142457e-06, "loss": 0.6176, "step": 2384 }, { "epoch": 0.09901452524403344, "grad_norm": 2.2920796871185303, "learning_rate": 9.87566334551675e-06, "loss": 0.555, "step": 2385 }, { "epoch": 0.09905604076824477, "grad_norm": 2.703679084777832, "learning_rate": 9.875514301737644e-06, "loss": 0.6165, "step": 2386 }, { "epoch": 0.0990975562924561, "grad_norm": 2.7847766876220703, "learning_rate": 9.875365169807835e-06, "loss": 0.6074, "step": 2387 }, { "epoch": 0.09913907181666745, "grad_norm": 2.9983816146850586, "learning_rate": 9.875215949730018e-06, "loss": 0.7438, "step": 2388 }, { "epoch": 0.09918058734087878, "grad_norm": 2.766108989715576, "learning_rate": 9.875066641506893e-06, "loss": 0.6193, "step": 2389 }, { "epoch": 0.09922210286509012, "grad_norm": 3.463567018508911, "learning_rate": 9.874917245141156e-06, "loss": 0.4722, "step": 2390 }, { "epoch": 0.09926361838930145, "grad_norm": 2.9083147048950195, "learning_rate": 9.874767760635511e-06, "loss": 0.784, "step": 2391 }, { "epoch": 0.09930513391351278, "grad_norm": 2.530941963195801, "learning_rate": 9.874618187992659e-06, "loss": 0.5006, "step": 2392 }, { "epoch": 0.09934664943772412, "grad_norm": 2.8025269508361816, "learning_rate": 9.874468527215305e-06, "loss": 0.5699, "step": 2393 }, { "epoch": 0.09938816496193545, "grad_norm": 3.0701937675476074, "learning_rate": 9.874318778306155e-06, "loss": 0.4517, "step": 2394 }, { "epoch": 0.09942968048614678, "grad_norm": 2.5567750930786133, "learning_rate": 9.87416894126792e-06, "loss": 0.5917, "step": 2395 }, { "epoch": 0.09947119601035813, "grad_norm": 2.820343017578125, "learning_rate": 9.874019016103302e-06, "loss": 0.4719, "step": 2396 }, { "epoch": 0.09951271153456946, "grad_norm": 2.6814260482788086, "learning_rate": 9.873869002815015e-06, "loss": 0.4626, "step": 2397 }, { "epoch": 0.0995542270587808, "grad_norm": 2.566377639770508, "learning_rate": 9.873718901405774e-06, "loss": 0.5456, "step": 2398 }, { "epoch": 0.09959574258299213, "grad_norm": 2.4340763092041016, "learning_rate": 9.873568711878289e-06, "loss": 0.4172, "step": 2399 }, { "epoch": 0.09963725810720346, "grad_norm": 2.858013153076172, "learning_rate": 9.873418434235276e-06, "loss": 0.4288, "step": 2400 }, { "epoch": 0.0996787736314148, "grad_norm": 2.7666280269622803, "learning_rate": 9.873268068479456e-06, "loss": 0.6401, "step": 2401 }, { "epoch": 0.09972028915562613, "grad_norm": 2.648561716079712, "learning_rate": 9.873117614613543e-06, "loss": 0.5676, "step": 2402 }, { "epoch": 0.09976180467983747, "grad_norm": 3.1989643573760986, "learning_rate": 9.872967072640258e-06, "loss": 0.6139, "step": 2403 }, { "epoch": 0.0998033202040488, "grad_norm": 3.460651397705078, "learning_rate": 9.872816442562325e-06, "loss": 0.3956, "step": 2404 }, { "epoch": 0.09984483572826014, "grad_norm": 2.3726625442504883, "learning_rate": 9.872665724382467e-06, "loss": 0.5021, "step": 2405 }, { "epoch": 0.09988635125247147, "grad_norm": 3.2820358276367188, "learning_rate": 9.872514918103407e-06, "loss": 0.4909, "step": 2406 }, { "epoch": 0.0999278667766828, "grad_norm": 2.75640869140625, "learning_rate": 9.872364023727873e-06, "loss": 0.4613, "step": 2407 }, { "epoch": 0.09996938230089414, "grad_norm": 3.116654396057129, "learning_rate": 9.872213041258594e-06, "loss": 0.5239, "step": 2408 }, { "epoch": 0.10001089782510547, "grad_norm": 2.733708620071411, "learning_rate": 9.872061970698297e-06, "loss": 0.4364, "step": 2409 }, { "epoch": 0.1000524133493168, "grad_norm": 2.422980546951294, "learning_rate": 9.871910812049716e-06, "loss": 0.4697, "step": 2410 }, { "epoch": 0.10009392887352815, "grad_norm": 2.599863052368164, "learning_rate": 9.871759565315585e-06, "loss": 0.5175, "step": 2411 }, { "epoch": 0.10013544439773948, "grad_norm": 2.8807265758514404, "learning_rate": 9.871608230498635e-06, "loss": 0.5268, "step": 2412 }, { "epoch": 0.10017695992195082, "grad_norm": 2.4005138874053955, "learning_rate": 9.871456807601605e-06, "loss": 0.626, "step": 2413 }, { "epoch": 0.10021847544616215, "grad_norm": 2.766695261001587, "learning_rate": 9.871305296627232e-06, "loss": 0.4396, "step": 2414 }, { "epoch": 0.10025999097037348, "grad_norm": 2.725454807281494, "learning_rate": 9.871153697578254e-06, "loss": 0.4978, "step": 2415 }, { "epoch": 0.10030150649458482, "grad_norm": 2.5168616771698, "learning_rate": 9.871002010457413e-06, "loss": 0.6656, "step": 2416 }, { "epoch": 0.10034302201879615, "grad_norm": 2.5595693588256836, "learning_rate": 9.870850235267452e-06, "loss": 0.3991, "step": 2417 }, { "epoch": 0.10038453754300748, "grad_norm": 2.862529993057251, "learning_rate": 9.870698372011115e-06, "loss": 0.4457, "step": 2418 }, { "epoch": 0.10042605306721883, "grad_norm": 2.8147151470184326, "learning_rate": 9.87054642069115e-06, "loss": 0.5167, "step": 2419 }, { "epoch": 0.10046756859143016, "grad_norm": 2.8640830516815186, "learning_rate": 9.870394381310299e-06, "loss": 0.5426, "step": 2420 }, { "epoch": 0.1005090841156415, "grad_norm": 2.6434595584869385, "learning_rate": 9.870242253871315e-06, "loss": 0.4804, "step": 2421 }, { "epoch": 0.10055059963985283, "grad_norm": 3.0234241485595703, "learning_rate": 9.870090038376947e-06, "loss": 0.4871, "step": 2422 }, { "epoch": 0.10059211516406416, "grad_norm": 2.6152734756469727, "learning_rate": 9.869937734829947e-06, "loss": 0.4841, "step": 2423 }, { "epoch": 0.1006336306882755, "grad_norm": 4.211944580078125, "learning_rate": 9.869785343233069e-06, "loss": 0.7368, "step": 2424 }, { "epoch": 0.10067514621248683, "grad_norm": 2.7942068576812744, "learning_rate": 9.869632863589069e-06, "loss": 0.4636, "step": 2425 }, { "epoch": 0.10071666173669816, "grad_norm": 2.7701590061187744, "learning_rate": 9.869480295900702e-06, "loss": 0.5726, "step": 2426 }, { "epoch": 0.1007581772609095, "grad_norm": 3.5645394325256348, "learning_rate": 9.869327640170728e-06, "loss": 0.5205, "step": 2427 }, { "epoch": 0.10079969278512084, "grad_norm": 2.5605461597442627, "learning_rate": 9.869174896401908e-06, "loss": 0.4813, "step": 2428 }, { "epoch": 0.10084120830933217, "grad_norm": 3.2740085124969482, "learning_rate": 9.869022064597001e-06, "loss": 0.5265, "step": 2429 }, { "epoch": 0.1008827238335435, "grad_norm": 2.464700222015381, "learning_rate": 9.868869144758772e-06, "loss": 0.445, "step": 2430 }, { "epoch": 0.10092423935775484, "grad_norm": 2.8789894580841064, "learning_rate": 9.868716136889987e-06, "loss": 0.6627, "step": 2431 }, { "epoch": 0.10096575488196617, "grad_norm": 2.778235912322998, "learning_rate": 9.86856304099341e-06, "loss": 0.53, "step": 2432 }, { "epoch": 0.1010072704061775, "grad_norm": 2.777268648147583, "learning_rate": 9.86840985707181e-06, "loss": 0.4382, "step": 2433 }, { "epoch": 0.10104878593038885, "grad_norm": 2.855987548828125, "learning_rate": 9.868256585127956e-06, "loss": 0.5024, "step": 2434 }, { "epoch": 0.10109030145460018, "grad_norm": 2.3235976696014404, "learning_rate": 9.86810322516462e-06, "loss": 0.4319, "step": 2435 }, { "epoch": 0.10113181697881152, "grad_norm": 3.1885762214660645, "learning_rate": 9.867949777184574e-06, "loss": 0.4383, "step": 2436 }, { "epoch": 0.10117333250302285, "grad_norm": 2.5069587230682373, "learning_rate": 9.867796241190595e-06, "loss": 0.6977, "step": 2437 }, { "epoch": 0.10121484802723418, "grad_norm": 2.5715479850769043, "learning_rate": 9.867642617185456e-06, "loss": 0.5185, "step": 2438 }, { "epoch": 0.10125636355144552, "grad_norm": 2.6522552967071533, "learning_rate": 9.867488905171934e-06, "loss": 0.5262, "step": 2439 }, { "epoch": 0.10129787907565685, "grad_norm": 2.4318454265594482, "learning_rate": 9.867335105152812e-06, "loss": 0.5613, "step": 2440 }, { "epoch": 0.10133939459986818, "grad_norm": 2.304391384124756, "learning_rate": 9.867181217130866e-06, "loss": 0.5154, "step": 2441 }, { "epoch": 0.10138091012407953, "grad_norm": 3.4364254474639893, "learning_rate": 9.867027241108882e-06, "loss": 0.4908, "step": 2442 }, { "epoch": 0.10142242564829086, "grad_norm": 2.8058056831359863, "learning_rate": 9.866873177089642e-06, "loss": 0.5516, "step": 2443 }, { "epoch": 0.1014639411725022, "grad_norm": 3.7777838706970215, "learning_rate": 9.866719025075933e-06, "loss": 0.6455, "step": 2444 }, { "epoch": 0.10150545669671353, "grad_norm": 2.5158815383911133, "learning_rate": 9.86656478507054e-06, "loss": 0.5662, "step": 2445 }, { "epoch": 0.10154697222092486, "grad_norm": 2.607893943786621, "learning_rate": 9.866410457076254e-06, "loss": 0.5006, "step": 2446 }, { "epoch": 0.1015884877451362, "grad_norm": 2.7643563747406006, "learning_rate": 9.866256041095864e-06, "loss": 0.6564, "step": 2447 }, { "epoch": 0.10163000326934753, "grad_norm": 2.3731677532196045, "learning_rate": 9.866101537132162e-06, "loss": 0.5367, "step": 2448 }, { "epoch": 0.10167151879355886, "grad_norm": 2.245180130004883, "learning_rate": 9.865946945187942e-06, "loss": 0.4312, "step": 2449 }, { "epoch": 0.1017130343177702, "grad_norm": 2.987740993499756, "learning_rate": 9.865792265265999e-06, "loss": 0.4051, "step": 2450 }, { "epoch": 0.10175454984198154, "grad_norm": 2.8898401260375977, "learning_rate": 9.86563749736913e-06, "loss": 0.484, "step": 2451 }, { "epoch": 0.10179606536619287, "grad_norm": 3.277087926864624, "learning_rate": 9.86548264150013e-06, "loss": 0.4923, "step": 2452 }, { "epoch": 0.1018375808904042, "grad_norm": 2.621004581451416, "learning_rate": 9.865327697661803e-06, "loss": 0.4435, "step": 2453 }, { "epoch": 0.10187909641461554, "grad_norm": 2.483691692352295, "learning_rate": 9.86517266585695e-06, "loss": 0.4823, "step": 2454 }, { "epoch": 0.10192061193882687, "grad_norm": 3.2505922317504883, "learning_rate": 9.865017546088371e-06, "loss": 0.4219, "step": 2455 }, { "epoch": 0.1019621274630382, "grad_norm": 2.382474184036255, "learning_rate": 9.864862338358873e-06, "loss": 0.4266, "step": 2456 }, { "epoch": 0.10200364298724955, "grad_norm": 2.522481679916382, "learning_rate": 9.864707042671263e-06, "loss": 0.3744, "step": 2457 }, { "epoch": 0.10204515851146088, "grad_norm": 2.684384822845459, "learning_rate": 9.864551659028348e-06, "loss": 0.5136, "step": 2458 }, { "epoch": 0.10208667403567222, "grad_norm": 2.5879364013671875, "learning_rate": 9.864396187432934e-06, "loss": 0.4906, "step": 2459 }, { "epoch": 0.10212818955988355, "grad_norm": 2.757373809814453, "learning_rate": 9.864240627887837e-06, "loss": 0.4907, "step": 2460 }, { "epoch": 0.10216970508409488, "grad_norm": 2.8523149490356445, "learning_rate": 9.864084980395868e-06, "loss": 0.5281, "step": 2461 }, { "epoch": 0.10221122060830622, "grad_norm": 3.066152334213257, "learning_rate": 9.863929244959838e-06, "loss": 0.5415, "step": 2462 }, { "epoch": 0.10225273613251755, "grad_norm": 2.893681764602661, "learning_rate": 9.863773421582567e-06, "loss": 0.5988, "step": 2463 }, { "epoch": 0.10229425165672888, "grad_norm": 2.113419532775879, "learning_rate": 9.863617510266869e-06, "loss": 0.3406, "step": 2464 }, { "epoch": 0.10233576718094023, "grad_norm": 2.612010955810547, "learning_rate": 9.863461511015568e-06, "loss": 0.5319, "step": 2465 }, { "epoch": 0.10237728270515156, "grad_norm": 2.933122396469116, "learning_rate": 9.863305423831478e-06, "loss": 0.5309, "step": 2466 }, { "epoch": 0.1024187982293629, "grad_norm": 2.837628126144409, "learning_rate": 9.863149248717425e-06, "loss": 0.4577, "step": 2467 }, { "epoch": 0.10246031375357423, "grad_norm": 3.4665462970733643, "learning_rate": 9.862992985676232e-06, "loss": 0.5039, "step": 2468 }, { "epoch": 0.10250182927778556, "grad_norm": 3.985841751098633, "learning_rate": 9.862836634710724e-06, "loss": 0.5998, "step": 2469 }, { "epoch": 0.1025433448019969, "grad_norm": 2.4537298679351807, "learning_rate": 9.862680195823728e-06, "loss": 0.541, "step": 2470 }, { "epoch": 0.10258486032620823, "grad_norm": 2.518176555633545, "learning_rate": 9.862523669018074e-06, "loss": 0.5065, "step": 2471 }, { "epoch": 0.10262637585041956, "grad_norm": 2.4944136142730713, "learning_rate": 9.86236705429659e-06, "loss": 0.5125, "step": 2472 }, { "epoch": 0.1026678913746309, "grad_norm": 2.6953561305999756, "learning_rate": 9.862210351662107e-06, "loss": 0.3875, "step": 2473 }, { "epoch": 0.10270940689884224, "grad_norm": 2.3685221672058105, "learning_rate": 9.86205356111746e-06, "loss": 0.504, "step": 2474 }, { "epoch": 0.10275092242305357, "grad_norm": 2.570297956466675, "learning_rate": 9.861896682665482e-06, "loss": 0.4112, "step": 2475 }, { "epoch": 0.1027924379472649, "grad_norm": 2.4109339714050293, "learning_rate": 9.861739716309012e-06, "loss": 0.6245, "step": 2476 }, { "epoch": 0.10283395347147624, "grad_norm": 2.96170711517334, "learning_rate": 9.861582662050885e-06, "loss": 0.4912, "step": 2477 }, { "epoch": 0.10287546899568757, "grad_norm": 2.4780097007751465, "learning_rate": 9.861425519893944e-06, "loss": 0.7131, "step": 2478 }, { "epoch": 0.1029169845198989, "grad_norm": 2.3259270191192627, "learning_rate": 9.861268289841028e-06, "loss": 0.5739, "step": 2479 }, { "epoch": 0.10295850004411024, "grad_norm": 2.5372862815856934, "learning_rate": 9.86111097189498e-06, "loss": 0.4954, "step": 2480 }, { "epoch": 0.10300001556832158, "grad_norm": 2.76953387260437, "learning_rate": 9.860953566058645e-06, "loss": 0.4269, "step": 2481 }, { "epoch": 0.10304153109253292, "grad_norm": 3.0187740325927734, "learning_rate": 9.860796072334868e-06, "loss": 0.6194, "step": 2482 }, { "epoch": 0.10308304661674425, "grad_norm": 2.830641508102417, "learning_rate": 9.860638490726497e-06, "loss": 0.6524, "step": 2483 }, { "epoch": 0.10312456214095558, "grad_norm": 3.268982172012329, "learning_rate": 9.860480821236381e-06, "loss": 0.6353, "step": 2484 }, { "epoch": 0.10316607766516692, "grad_norm": 2.7928569316864014, "learning_rate": 9.860323063867373e-06, "loss": 0.571, "step": 2485 }, { "epoch": 0.10320759318937825, "grad_norm": 2.4726881980895996, "learning_rate": 9.86016521862232e-06, "loss": 0.4295, "step": 2486 }, { "epoch": 0.10324910871358958, "grad_norm": 2.5441057682037354, "learning_rate": 9.860007285504079e-06, "loss": 0.5909, "step": 2487 }, { "epoch": 0.10329062423780093, "grad_norm": 3.3971118927001953, "learning_rate": 9.859849264515507e-06, "loss": 0.6451, "step": 2488 }, { "epoch": 0.10333213976201226, "grad_norm": 2.5810954570770264, "learning_rate": 9.859691155659458e-06, "loss": 0.488, "step": 2489 }, { "epoch": 0.1033736552862236, "grad_norm": 2.3774337768554688, "learning_rate": 9.859532958938793e-06, "loss": 0.4322, "step": 2490 }, { "epoch": 0.10341517081043493, "grad_norm": 2.7715861797332764, "learning_rate": 9.859374674356372e-06, "loss": 0.4951, "step": 2491 }, { "epoch": 0.10345668633464626, "grad_norm": 2.924400568008423, "learning_rate": 9.859216301915057e-06, "loss": 0.6139, "step": 2492 }, { "epoch": 0.1034982018588576, "grad_norm": 3.0495214462280273, "learning_rate": 9.859057841617709e-06, "loss": 0.5237, "step": 2493 }, { "epoch": 0.10353971738306893, "grad_norm": 2.269944190979004, "learning_rate": 9.858899293467196e-06, "loss": 0.3754, "step": 2494 }, { "epoch": 0.10358123290728026, "grad_norm": 2.8377575874328613, "learning_rate": 9.858740657466381e-06, "loss": 0.6227, "step": 2495 }, { "epoch": 0.10362274843149161, "grad_norm": 3.207331895828247, "learning_rate": 9.858581933618136e-06, "loss": 0.4034, "step": 2496 }, { "epoch": 0.10366426395570294, "grad_norm": 2.7218310832977295, "learning_rate": 9.85842312192533e-06, "loss": 0.556, "step": 2497 }, { "epoch": 0.10370577947991427, "grad_norm": 2.367396831512451, "learning_rate": 9.858264222390833e-06, "loss": 0.4494, "step": 2498 }, { "epoch": 0.1037472950041256, "grad_norm": 2.750302314758301, "learning_rate": 9.85810523501752e-06, "loss": 0.5591, "step": 2499 }, { "epoch": 0.10378881052833694, "grad_norm": 2.291440725326538, "learning_rate": 9.857946159808261e-06, "loss": 0.504, "step": 2500 }, { "epoch": 0.10383032605254827, "grad_norm": 2.497620105743408, "learning_rate": 9.857786996765937e-06, "loss": 0.5383, "step": 2501 }, { "epoch": 0.1038718415767596, "grad_norm": 2.593987226486206, "learning_rate": 9.857627745893423e-06, "loss": 0.4675, "step": 2502 }, { "epoch": 0.10391335710097094, "grad_norm": 2.4460911750793457, "learning_rate": 9.8574684071936e-06, "loss": 0.5622, "step": 2503 }, { "epoch": 0.10395487262518228, "grad_norm": 2.705523729324341, "learning_rate": 9.857308980669349e-06, "loss": 0.4078, "step": 2504 }, { "epoch": 0.10399638814939362, "grad_norm": 2.542567729949951, "learning_rate": 9.85714946632355e-06, "loss": 0.6763, "step": 2505 }, { "epoch": 0.10403790367360495, "grad_norm": 2.4795329570770264, "learning_rate": 9.85698986415909e-06, "loss": 0.6078, "step": 2506 }, { "epoch": 0.10407941919781628, "grad_norm": 2.526533603668213, "learning_rate": 9.856830174178853e-06, "loss": 0.5106, "step": 2507 }, { "epoch": 0.10412093472202762, "grad_norm": 2.737058162689209, "learning_rate": 9.856670396385725e-06, "loss": 0.4325, "step": 2508 }, { "epoch": 0.10416245024623895, "grad_norm": 2.1858229637145996, "learning_rate": 9.856510530782599e-06, "loss": 0.5443, "step": 2509 }, { "epoch": 0.10420396577045028, "grad_norm": 3.0509402751922607, "learning_rate": 9.85635057737236e-06, "loss": 0.5834, "step": 2510 }, { "epoch": 0.10424548129466163, "grad_norm": 2.8405652046203613, "learning_rate": 9.856190536157907e-06, "loss": 0.6452, "step": 2511 }, { "epoch": 0.10428699681887296, "grad_norm": 2.6828536987304688, "learning_rate": 9.856030407142125e-06, "loss": 0.4459, "step": 2512 }, { "epoch": 0.1043285123430843, "grad_norm": 2.9054925441741943, "learning_rate": 9.855870190327915e-06, "loss": 0.6059, "step": 2513 }, { "epoch": 0.10437002786729563, "grad_norm": 2.9335930347442627, "learning_rate": 9.855709885718173e-06, "loss": 0.4933, "step": 2514 }, { "epoch": 0.10441154339150696, "grad_norm": 2.402698040008545, "learning_rate": 9.855549493315797e-06, "loss": 0.5216, "step": 2515 }, { "epoch": 0.1044530589157183, "grad_norm": 3.095158100128174, "learning_rate": 9.855389013123683e-06, "loss": 0.6072, "step": 2516 }, { "epoch": 0.10449457443992963, "grad_norm": 2.7306478023529053, "learning_rate": 9.855228445144742e-06, "loss": 0.5235, "step": 2517 }, { "epoch": 0.10453608996414096, "grad_norm": 2.341299533843994, "learning_rate": 9.855067789381867e-06, "loss": 0.6381, "step": 2518 }, { "epoch": 0.10457760548835231, "grad_norm": 2.5765182971954346, "learning_rate": 9.85490704583797e-06, "loss": 0.4577, "step": 2519 }, { "epoch": 0.10461912101256364, "grad_norm": 2.567413330078125, "learning_rate": 9.85474621451595e-06, "loss": 0.4581, "step": 2520 }, { "epoch": 0.10466063653677497, "grad_norm": 2.772080421447754, "learning_rate": 9.854585295418723e-06, "loss": 0.5422, "step": 2521 }, { "epoch": 0.1047021520609863, "grad_norm": 2.4838383197784424, "learning_rate": 9.854424288549194e-06, "loss": 0.6306, "step": 2522 }, { "epoch": 0.10474366758519764, "grad_norm": 2.5062673091888428, "learning_rate": 9.854263193910274e-06, "loss": 0.4976, "step": 2523 }, { "epoch": 0.10478518310940897, "grad_norm": 2.7168290615081787, "learning_rate": 9.854102011504876e-06, "loss": 0.4907, "step": 2524 }, { "epoch": 0.1048266986336203, "grad_norm": 2.3051271438598633, "learning_rate": 9.853940741335914e-06, "loss": 0.4198, "step": 2525 }, { "epoch": 0.10486821415783164, "grad_norm": 2.668034076690674, "learning_rate": 9.853779383406305e-06, "loss": 0.5856, "step": 2526 }, { "epoch": 0.10490972968204298, "grad_norm": 2.4832961559295654, "learning_rate": 9.853617937718966e-06, "loss": 0.437, "step": 2527 }, { "epoch": 0.10495124520625432, "grad_norm": 2.766049861907959, "learning_rate": 9.853456404276816e-06, "loss": 0.5628, "step": 2528 }, { "epoch": 0.10499276073046565, "grad_norm": 3.4448039531707764, "learning_rate": 9.853294783082776e-06, "loss": 0.5811, "step": 2529 }, { "epoch": 0.10503427625467698, "grad_norm": 2.4802896976470947, "learning_rate": 9.853133074139766e-06, "loss": 0.5301, "step": 2530 }, { "epoch": 0.10507579177888832, "grad_norm": 3.2854137420654297, "learning_rate": 9.852971277450712e-06, "loss": 0.4897, "step": 2531 }, { "epoch": 0.10511730730309965, "grad_norm": 2.670212984085083, "learning_rate": 9.852809393018537e-06, "loss": 0.6206, "step": 2532 }, { "epoch": 0.10515882282731098, "grad_norm": 2.319226026535034, "learning_rate": 9.852647420846171e-06, "loss": 0.6083, "step": 2533 }, { "epoch": 0.10520033835152232, "grad_norm": 2.752779960632324, "learning_rate": 9.852485360936541e-06, "loss": 0.6294, "step": 2534 }, { "epoch": 0.10524185387573366, "grad_norm": 2.1883344650268555, "learning_rate": 9.852323213292576e-06, "loss": 0.5535, "step": 2535 }, { "epoch": 0.105283369399945, "grad_norm": 2.6015188694000244, "learning_rate": 9.85216097791721e-06, "loss": 0.5769, "step": 2536 }, { "epoch": 0.10532488492415633, "grad_norm": 3.062696933746338, "learning_rate": 9.851998654813375e-06, "loss": 0.4003, "step": 2537 }, { "epoch": 0.10536640044836766, "grad_norm": 2.6597983837127686, "learning_rate": 9.851836243984005e-06, "loss": 0.5353, "step": 2538 }, { "epoch": 0.105407915972579, "grad_norm": 2.5196237564086914, "learning_rate": 9.851673745432037e-06, "loss": 0.5021, "step": 2539 }, { "epoch": 0.10544943149679033, "grad_norm": 2.677020311355591, "learning_rate": 9.85151115916041e-06, "loss": 0.5713, "step": 2540 }, { "epoch": 0.10549094702100166, "grad_norm": 2.4100069999694824, "learning_rate": 9.85134848517206e-06, "loss": 0.5869, "step": 2541 }, { "epoch": 0.10553246254521301, "grad_norm": 2.4883334636688232, "learning_rate": 9.851185723469936e-06, "loss": 0.461, "step": 2542 }, { "epoch": 0.10557397806942434, "grad_norm": 2.7925221920013428, "learning_rate": 9.851022874056972e-06, "loss": 0.6832, "step": 2543 }, { "epoch": 0.10561549359363567, "grad_norm": 2.483811855316162, "learning_rate": 9.850859936936118e-06, "loss": 0.5068, "step": 2544 }, { "epoch": 0.105657009117847, "grad_norm": 2.1175029277801514, "learning_rate": 9.850696912110319e-06, "loss": 0.5341, "step": 2545 }, { "epoch": 0.10569852464205834, "grad_norm": 2.685300350189209, "learning_rate": 9.850533799582521e-06, "loss": 0.4901, "step": 2546 }, { "epoch": 0.10574004016626967, "grad_norm": 2.6832897663116455, "learning_rate": 9.850370599355671e-06, "loss": 0.5896, "step": 2547 }, { "epoch": 0.105781555690481, "grad_norm": 2.892392873764038, "learning_rate": 9.850207311432727e-06, "loss": 0.4583, "step": 2548 }, { "epoch": 0.10582307121469234, "grad_norm": 2.6187167167663574, "learning_rate": 9.850043935816633e-06, "loss": 0.4494, "step": 2549 }, { "epoch": 0.10586458673890368, "grad_norm": 2.6815578937530518, "learning_rate": 9.849880472510349e-06, "loss": 0.6269, "step": 2550 }, { "epoch": 0.10590610226311502, "grad_norm": 3.018460273742676, "learning_rate": 9.849716921516827e-06, "loss": 0.5407, "step": 2551 }, { "epoch": 0.10594761778732635, "grad_norm": 3.085087537765503, "learning_rate": 9.849553282839025e-06, "loss": 0.6897, "step": 2552 }, { "epoch": 0.10598913331153768, "grad_norm": 2.5937511920928955, "learning_rate": 9.849389556479905e-06, "loss": 0.4374, "step": 2553 }, { "epoch": 0.10603064883574902, "grad_norm": 2.6197824478149414, "learning_rate": 9.84922574244242e-06, "loss": 0.4087, "step": 2554 }, { "epoch": 0.10607216435996035, "grad_norm": 2.1970996856689453, "learning_rate": 9.849061840729538e-06, "loss": 0.5318, "step": 2555 }, { "epoch": 0.10611367988417168, "grad_norm": 3.170354127883911, "learning_rate": 9.848897851344221e-06, "loss": 0.5533, "step": 2556 }, { "epoch": 0.10615519540838302, "grad_norm": 2.8355767726898193, "learning_rate": 9.848733774289432e-06, "loss": 0.5575, "step": 2557 }, { "epoch": 0.10619671093259436, "grad_norm": 3.066544771194458, "learning_rate": 9.848569609568139e-06, "loss": 0.658, "step": 2558 }, { "epoch": 0.1062382264568057, "grad_norm": 2.982248306274414, "learning_rate": 9.84840535718331e-06, "loss": 0.4978, "step": 2559 }, { "epoch": 0.10627974198101703, "grad_norm": 2.585451126098633, "learning_rate": 9.848241017137915e-06, "loss": 0.5432, "step": 2560 }, { "epoch": 0.10632125750522836, "grad_norm": 2.530663013458252, "learning_rate": 9.848076589434925e-06, "loss": 0.5157, "step": 2561 }, { "epoch": 0.1063627730294397, "grad_norm": 2.6975553035736084, "learning_rate": 9.847912074077313e-06, "loss": 0.5685, "step": 2562 }, { "epoch": 0.10640428855365103, "grad_norm": 2.5372447967529297, "learning_rate": 9.847747471068052e-06, "loss": 0.4273, "step": 2563 }, { "epoch": 0.10644580407786236, "grad_norm": 2.659285068511963, "learning_rate": 9.84758278041012e-06, "loss": 0.5219, "step": 2564 }, { "epoch": 0.10648731960207371, "grad_norm": 2.3461177349090576, "learning_rate": 9.847418002106494e-06, "loss": 0.514, "step": 2565 }, { "epoch": 0.10652883512628504, "grad_norm": 2.705028772354126, "learning_rate": 9.847253136160154e-06, "loss": 0.6316, "step": 2566 }, { "epoch": 0.10657035065049637, "grad_norm": 2.659743309020996, "learning_rate": 9.84708818257408e-06, "loss": 0.4259, "step": 2567 }, { "epoch": 0.1066118661747077, "grad_norm": 2.5845096111297607, "learning_rate": 9.846923141351254e-06, "loss": 0.618, "step": 2568 }, { "epoch": 0.10665338169891904, "grad_norm": 2.7570695877075195, "learning_rate": 9.846758012494661e-06, "loss": 0.4982, "step": 2569 }, { "epoch": 0.10669489722313037, "grad_norm": 2.5753979682922363, "learning_rate": 9.846592796007287e-06, "loss": 0.5186, "step": 2570 }, { "epoch": 0.1067364127473417, "grad_norm": 2.404428005218506, "learning_rate": 9.846427491892117e-06, "loss": 0.5737, "step": 2571 }, { "epoch": 0.10677792827155304, "grad_norm": 2.8220129013061523, "learning_rate": 9.846262100152143e-06, "loss": 0.4802, "step": 2572 }, { "epoch": 0.10681944379576438, "grad_norm": 2.3624730110168457, "learning_rate": 9.84609662079035e-06, "loss": 0.4613, "step": 2573 }, { "epoch": 0.10686095931997572, "grad_norm": 2.205739736557007, "learning_rate": 9.845931053809736e-06, "loss": 0.4336, "step": 2574 }, { "epoch": 0.10690247484418705, "grad_norm": 2.573603868484497, "learning_rate": 9.845765399213292e-06, "loss": 0.5449, "step": 2575 }, { "epoch": 0.10694399036839838, "grad_norm": 3.541135311126709, "learning_rate": 9.845599657004013e-06, "loss": 0.5353, "step": 2576 }, { "epoch": 0.10698550589260972, "grad_norm": 3.271294355392456, "learning_rate": 9.845433827184894e-06, "loss": 0.7087, "step": 2577 }, { "epoch": 0.10702702141682105, "grad_norm": 2.516327142715454, "learning_rate": 9.845267909758936e-06, "loss": 0.5479, "step": 2578 }, { "epoch": 0.10706853694103238, "grad_norm": 2.25054669380188, "learning_rate": 9.845101904729135e-06, "loss": 0.3857, "step": 2579 }, { "epoch": 0.10711005246524372, "grad_norm": 2.4592978954315186, "learning_rate": 9.844935812098497e-06, "loss": 0.6202, "step": 2580 }, { "epoch": 0.10715156798945506, "grad_norm": 2.4631175994873047, "learning_rate": 9.844769631870024e-06, "loss": 0.4838, "step": 2581 }, { "epoch": 0.1071930835136664, "grad_norm": 2.8110885620117188, "learning_rate": 9.84460336404672e-06, "loss": 0.6088, "step": 2582 }, { "epoch": 0.10723459903787773, "grad_norm": 2.7069449424743652, "learning_rate": 9.844437008631589e-06, "loss": 0.4415, "step": 2583 }, { "epoch": 0.10727611456208906, "grad_norm": 2.8214712142944336, "learning_rate": 9.844270565627641e-06, "loss": 0.4598, "step": 2584 }, { "epoch": 0.1073176300863004, "grad_norm": 2.3199315071105957, "learning_rate": 9.844104035037884e-06, "loss": 0.5687, "step": 2585 }, { "epoch": 0.10735914561051173, "grad_norm": 2.3004655838012695, "learning_rate": 9.84393741686533e-06, "loss": 0.5256, "step": 2586 }, { "epoch": 0.10740066113472306, "grad_norm": 2.3724513053894043, "learning_rate": 9.843770711112994e-06, "loss": 0.5395, "step": 2587 }, { "epoch": 0.1074421766589344, "grad_norm": 3.3004565238952637, "learning_rate": 9.843603917783885e-06, "loss": 0.6117, "step": 2588 }, { "epoch": 0.10748369218314574, "grad_norm": 2.483248710632324, "learning_rate": 9.843437036881022e-06, "loss": 0.4795, "step": 2589 }, { "epoch": 0.10752520770735707, "grad_norm": 2.8030874729156494, "learning_rate": 9.84327006840742e-06, "loss": 0.5524, "step": 2590 }, { "epoch": 0.1075667232315684, "grad_norm": 2.876589298248291, "learning_rate": 9.8431030123661e-06, "loss": 0.6359, "step": 2591 }, { "epoch": 0.10760823875577974, "grad_norm": 2.4874818325042725, "learning_rate": 9.842935868760084e-06, "loss": 0.5257, "step": 2592 }, { "epoch": 0.10764975427999107, "grad_norm": 2.521278142929077, "learning_rate": 9.842768637592388e-06, "loss": 0.45, "step": 2593 }, { "epoch": 0.1076912698042024, "grad_norm": 2.8652219772338867, "learning_rate": 9.842601318866042e-06, "loss": 0.5121, "step": 2594 }, { "epoch": 0.10773278532841374, "grad_norm": 3.298882246017456, "learning_rate": 9.842433912584066e-06, "loss": 0.544, "step": 2595 }, { "epoch": 0.10777430085262509, "grad_norm": 3.01308012008667, "learning_rate": 9.84226641874949e-06, "loss": 0.5127, "step": 2596 }, { "epoch": 0.10781581637683642, "grad_norm": 2.5382239818573, "learning_rate": 9.842098837365341e-06, "loss": 0.5356, "step": 2597 }, { "epoch": 0.10785733190104775, "grad_norm": 2.9585206508636475, "learning_rate": 9.841931168434651e-06, "loss": 0.5027, "step": 2598 }, { "epoch": 0.10789884742525908, "grad_norm": 2.397733211517334, "learning_rate": 9.841763411960447e-06, "loss": 0.4607, "step": 2599 }, { "epoch": 0.10794036294947042, "grad_norm": 2.4114973545074463, "learning_rate": 9.841595567945768e-06, "loss": 0.588, "step": 2600 }, { "epoch": 0.10798187847368175, "grad_norm": 2.8059020042419434, "learning_rate": 9.841427636393645e-06, "loss": 0.457, "step": 2601 }, { "epoch": 0.10802339399789308, "grad_norm": 3.0844521522521973, "learning_rate": 9.841259617307113e-06, "loss": 0.4542, "step": 2602 }, { "epoch": 0.10806490952210442, "grad_norm": 2.852180242538452, "learning_rate": 9.841091510689213e-06, "loss": 0.4451, "step": 2603 }, { "epoch": 0.10810642504631576, "grad_norm": 2.513364553451538, "learning_rate": 9.840923316542984e-06, "loss": 0.4543, "step": 2604 }, { "epoch": 0.1081479405705271, "grad_norm": 2.85451078414917, "learning_rate": 9.840755034871465e-06, "loss": 0.6387, "step": 2605 }, { "epoch": 0.10818945609473843, "grad_norm": 2.493598461151123, "learning_rate": 9.8405866656777e-06, "loss": 0.5028, "step": 2606 }, { "epoch": 0.10823097161894976, "grad_norm": 2.596311569213867, "learning_rate": 9.840418208964733e-06, "loss": 0.533, "step": 2607 }, { "epoch": 0.1082724871431611, "grad_norm": 2.3290958404541016, "learning_rate": 9.84024966473561e-06, "loss": 0.5212, "step": 2608 }, { "epoch": 0.10831400266737243, "grad_norm": 2.5025153160095215, "learning_rate": 9.840081032993377e-06, "loss": 0.5123, "step": 2609 }, { "epoch": 0.10835551819158376, "grad_norm": 2.179698944091797, "learning_rate": 9.839912313741085e-06, "loss": 0.484, "step": 2610 }, { "epoch": 0.1083970337157951, "grad_norm": 2.4545018672943115, "learning_rate": 9.839743506981783e-06, "loss": 0.5436, "step": 2611 }, { "epoch": 0.10843854924000644, "grad_norm": 2.8604962825775146, "learning_rate": 9.839574612718522e-06, "loss": 0.5618, "step": 2612 }, { "epoch": 0.10848006476421777, "grad_norm": 2.7834084033966064, "learning_rate": 9.839405630954358e-06, "loss": 0.4361, "step": 2613 }, { "epoch": 0.1085215802884291, "grad_norm": 3.5243284702301025, "learning_rate": 9.839236561692346e-06, "loss": 0.5081, "step": 2614 }, { "epoch": 0.10856309581264044, "grad_norm": 2.5676727294921875, "learning_rate": 9.839067404935542e-06, "loss": 0.5928, "step": 2615 }, { "epoch": 0.10860461133685177, "grad_norm": 2.654569625854492, "learning_rate": 9.838898160687003e-06, "loss": 0.5432, "step": 2616 }, { "epoch": 0.1086461268610631, "grad_norm": 2.294323205947876, "learning_rate": 9.838728828949793e-06, "loss": 0.4698, "step": 2617 }, { "epoch": 0.10868764238527444, "grad_norm": 3.605962038040161, "learning_rate": 9.83855940972697e-06, "loss": 0.7964, "step": 2618 }, { "epoch": 0.10872915790948579, "grad_norm": 2.5182998180389404, "learning_rate": 9.838389903021597e-06, "loss": 0.3558, "step": 2619 }, { "epoch": 0.10877067343369712, "grad_norm": 2.6175663471221924, "learning_rate": 9.838220308836742e-06, "loss": 0.5282, "step": 2620 }, { "epoch": 0.10881218895790845, "grad_norm": 2.8353111743927, "learning_rate": 9.838050627175468e-06, "loss": 0.473, "step": 2621 }, { "epoch": 0.10885370448211978, "grad_norm": 2.871870756149292, "learning_rate": 9.837880858040845e-06, "loss": 0.672, "step": 2622 }, { "epoch": 0.10889522000633112, "grad_norm": 2.261589527130127, "learning_rate": 9.837711001435942e-06, "loss": 0.4768, "step": 2623 }, { "epoch": 0.10893673553054245, "grad_norm": 2.946991205215454, "learning_rate": 9.837541057363828e-06, "loss": 0.5102, "step": 2624 }, { "epoch": 0.10897825105475378, "grad_norm": 2.702392578125, "learning_rate": 9.837371025827579e-06, "loss": 0.5379, "step": 2625 }, { "epoch": 0.10901976657896512, "grad_norm": 2.639784097671509, "learning_rate": 9.837200906830266e-06, "loss": 0.5921, "step": 2626 }, { "epoch": 0.10906128210317646, "grad_norm": 2.8689866065979004, "learning_rate": 9.837030700374967e-06, "loss": 0.5092, "step": 2627 }, { "epoch": 0.1091027976273878, "grad_norm": 2.5550732612609863, "learning_rate": 9.836860406464758e-06, "loss": 0.6134, "step": 2628 }, { "epoch": 0.10914431315159913, "grad_norm": 2.6174607276916504, "learning_rate": 9.83669002510272e-06, "loss": 0.5697, "step": 2629 }, { "epoch": 0.10918582867581046, "grad_norm": 3.096302032470703, "learning_rate": 9.836519556291932e-06, "loss": 0.492, "step": 2630 }, { "epoch": 0.1092273442000218, "grad_norm": 2.708488702774048, "learning_rate": 9.836349000035477e-06, "loss": 0.5545, "step": 2631 }, { "epoch": 0.10926885972423313, "grad_norm": 2.5037877559661865, "learning_rate": 9.836178356336438e-06, "loss": 0.5246, "step": 2632 }, { "epoch": 0.10931037524844446, "grad_norm": 2.632868528366089, "learning_rate": 9.8360076251979e-06, "loss": 0.578, "step": 2633 }, { "epoch": 0.1093518907726558, "grad_norm": 2.714186429977417, "learning_rate": 9.835836806622948e-06, "loss": 0.5015, "step": 2634 }, { "epoch": 0.10939340629686714, "grad_norm": 2.875241994857788, "learning_rate": 9.835665900614677e-06, "loss": 0.4865, "step": 2635 }, { "epoch": 0.10943492182107847, "grad_norm": 2.543868064880371, "learning_rate": 9.835494907176169e-06, "loss": 0.5495, "step": 2636 }, { "epoch": 0.1094764373452898, "grad_norm": 2.7271101474761963, "learning_rate": 9.835323826310522e-06, "loss": 0.6513, "step": 2637 }, { "epoch": 0.10951795286950114, "grad_norm": 2.474839925765991, "learning_rate": 9.835152658020825e-06, "loss": 0.4854, "step": 2638 }, { "epoch": 0.10955946839371247, "grad_norm": 2.2715249061584473, "learning_rate": 9.834981402310175e-06, "loss": 0.4954, "step": 2639 }, { "epoch": 0.1096009839179238, "grad_norm": 2.7759592533111572, "learning_rate": 9.834810059181667e-06, "loss": 0.5444, "step": 2640 }, { "epoch": 0.10964249944213514, "grad_norm": 3.040233612060547, "learning_rate": 9.8346386286384e-06, "loss": 0.4772, "step": 2641 }, { "epoch": 0.10968401496634647, "grad_norm": 2.431401014328003, "learning_rate": 9.834467110683473e-06, "loss": 0.4933, "step": 2642 }, { "epoch": 0.10972553049055782, "grad_norm": 2.7722949981689453, "learning_rate": 9.834295505319986e-06, "loss": 0.4222, "step": 2643 }, { "epoch": 0.10976704601476915, "grad_norm": 2.6862142086029053, "learning_rate": 9.834123812551045e-06, "loss": 0.6107, "step": 2644 }, { "epoch": 0.10980856153898048, "grad_norm": 2.556536912918091, "learning_rate": 9.833952032379752e-06, "loss": 0.5848, "step": 2645 }, { "epoch": 0.10985007706319182, "grad_norm": 2.419771432876587, "learning_rate": 9.83378016480921e-06, "loss": 0.5871, "step": 2646 }, { "epoch": 0.10989159258740315, "grad_norm": 2.452890634536743, "learning_rate": 9.833608209842533e-06, "loss": 0.53, "step": 2647 }, { "epoch": 0.10993310811161448, "grad_norm": 2.764427900314331, "learning_rate": 9.833436167482825e-06, "loss": 0.5022, "step": 2648 }, { "epoch": 0.10997462363582582, "grad_norm": 2.441103935241699, "learning_rate": 9.833264037733198e-06, "loss": 0.5358, "step": 2649 }, { "epoch": 0.11001613916003716, "grad_norm": 2.6835315227508545, "learning_rate": 9.833091820596762e-06, "loss": 0.4806, "step": 2650 }, { "epoch": 0.1100576546842485, "grad_norm": 2.489769220352173, "learning_rate": 9.832919516076637e-06, "loss": 0.5272, "step": 2651 }, { "epoch": 0.11009917020845983, "grad_norm": 2.782073974609375, "learning_rate": 9.832747124175931e-06, "loss": 0.535, "step": 2652 }, { "epoch": 0.11014068573267116, "grad_norm": 2.419597625732422, "learning_rate": 9.832574644897766e-06, "loss": 0.4809, "step": 2653 }, { "epoch": 0.1101822012568825, "grad_norm": 2.8719422817230225, "learning_rate": 9.832402078245256e-06, "loss": 0.6226, "step": 2654 }, { "epoch": 0.11022371678109383, "grad_norm": 2.126950979232788, "learning_rate": 9.832229424221527e-06, "loss": 0.4842, "step": 2655 }, { "epoch": 0.11026523230530516, "grad_norm": 2.605358600616455, "learning_rate": 9.832056682829693e-06, "loss": 0.6324, "step": 2656 }, { "epoch": 0.1103067478295165, "grad_norm": 2.8135218620300293, "learning_rate": 9.831883854072885e-06, "loss": 0.5014, "step": 2657 }, { "epoch": 0.11034826335372784, "grad_norm": 2.8452157974243164, "learning_rate": 9.831710937954223e-06, "loss": 0.6295, "step": 2658 }, { "epoch": 0.11038977887793917, "grad_norm": 2.3458364009857178, "learning_rate": 9.831537934476834e-06, "loss": 0.4036, "step": 2659 }, { "epoch": 0.1104312944021505, "grad_norm": 2.6217479705810547, "learning_rate": 9.831364843643847e-06, "loss": 0.6207, "step": 2660 }, { "epoch": 0.11047280992636184, "grad_norm": 2.3954508304595947, "learning_rate": 9.831191665458391e-06, "loss": 0.4237, "step": 2661 }, { "epoch": 0.11051432545057317, "grad_norm": 2.8193857669830322, "learning_rate": 9.831018399923598e-06, "loss": 0.4818, "step": 2662 }, { "epoch": 0.1105558409747845, "grad_norm": 2.283719539642334, "learning_rate": 9.8308450470426e-06, "loss": 0.5058, "step": 2663 }, { "epoch": 0.11059735649899584, "grad_norm": 3.282559633255005, "learning_rate": 9.83067160681853e-06, "loss": 0.5424, "step": 2664 }, { "epoch": 0.11063887202320717, "grad_norm": 3.266902446746826, "learning_rate": 9.830498079254525e-06, "loss": 0.8229, "step": 2665 }, { "epoch": 0.11068038754741852, "grad_norm": 3.6203229427337646, "learning_rate": 9.830324464353721e-06, "loss": 0.5651, "step": 2666 }, { "epoch": 0.11072190307162985, "grad_norm": 2.607811450958252, "learning_rate": 9.83015076211926e-06, "loss": 0.5567, "step": 2667 }, { "epoch": 0.11076341859584118, "grad_norm": 2.589946746826172, "learning_rate": 9.829976972554282e-06, "loss": 0.4782, "step": 2668 }, { "epoch": 0.11080493412005252, "grad_norm": 2.359819173812866, "learning_rate": 9.82980309566193e-06, "loss": 0.615, "step": 2669 }, { "epoch": 0.11084644964426385, "grad_norm": 3.09389591217041, "learning_rate": 9.829629131445342e-06, "loss": 0.4022, "step": 2670 }, { "epoch": 0.11088796516847518, "grad_norm": 2.6577157974243164, "learning_rate": 9.829455079907669e-06, "loss": 0.6164, "step": 2671 }, { "epoch": 0.11092948069268652, "grad_norm": 3.288827896118164, "learning_rate": 9.829280941052056e-06, "loss": 0.6124, "step": 2672 }, { "epoch": 0.11097099621689786, "grad_norm": 2.209573984146118, "learning_rate": 9.82910671488165e-06, "loss": 0.5613, "step": 2673 }, { "epoch": 0.1110125117411092, "grad_norm": 2.778596878051758, "learning_rate": 9.828932401399604e-06, "loss": 0.3497, "step": 2674 }, { "epoch": 0.11105402726532053, "grad_norm": 2.6701090335845947, "learning_rate": 9.828758000609069e-06, "loss": 0.4916, "step": 2675 }, { "epoch": 0.11109554278953186, "grad_norm": 2.770005226135254, "learning_rate": 9.828583512513198e-06, "loss": 0.6373, "step": 2676 }, { "epoch": 0.1111370583137432, "grad_norm": 2.5289769172668457, "learning_rate": 9.828408937115144e-06, "loss": 0.5075, "step": 2677 }, { "epoch": 0.11117857383795453, "grad_norm": 2.3682987689971924, "learning_rate": 9.828234274418065e-06, "loss": 0.5125, "step": 2678 }, { "epoch": 0.11122008936216586, "grad_norm": 2.7980055809020996, "learning_rate": 9.82805952442512e-06, "loss": 0.5167, "step": 2679 }, { "epoch": 0.1112616048863772, "grad_norm": 2.801124095916748, "learning_rate": 9.827884687139465e-06, "loss": 0.5962, "step": 2680 }, { "epoch": 0.11130312041058854, "grad_norm": 2.9384210109710693, "learning_rate": 9.827709762564264e-06, "loss": 0.4976, "step": 2681 }, { "epoch": 0.11134463593479987, "grad_norm": 2.108748435974121, "learning_rate": 9.827534750702679e-06, "loss": 0.4611, "step": 2682 }, { "epoch": 0.1113861514590112, "grad_norm": 2.9746522903442383, "learning_rate": 9.827359651557875e-06, "loss": 0.5952, "step": 2683 }, { "epoch": 0.11142766698322254, "grad_norm": 2.720932722091675, "learning_rate": 9.827184465133018e-06, "loss": 0.5627, "step": 2684 }, { "epoch": 0.11146918250743387, "grad_norm": 2.507249355316162, "learning_rate": 9.827009191431271e-06, "loss": 0.5072, "step": 2685 }, { "epoch": 0.1115106980316452, "grad_norm": 2.682213544845581, "learning_rate": 9.826833830455809e-06, "loss": 0.5096, "step": 2686 }, { "epoch": 0.11155221355585654, "grad_norm": 2.476752996444702, "learning_rate": 9.8266583822098e-06, "loss": 0.4852, "step": 2687 }, { "epoch": 0.11159372908006787, "grad_norm": 2.5618574619293213, "learning_rate": 9.826482846696417e-06, "loss": 0.6117, "step": 2688 }, { "epoch": 0.11163524460427922, "grad_norm": 2.7924840450286865, "learning_rate": 9.826307223918832e-06, "loss": 0.4827, "step": 2689 }, { "epoch": 0.11167676012849055, "grad_norm": 2.1621973514556885, "learning_rate": 9.82613151388022e-06, "loss": 0.4, "step": 2690 }, { "epoch": 0.11171827565270188, "grad_norm": 2.5681095123291016, "learning_rate": 9.825955716583762e-06, "loss": 0.472, "step": 2691 }, { "epoch": 0.11175979117691322, "grad_norm": 2.969656467437744, "learning_rate": 9.825779832032632e-06, "loss": 0.5602, "step": 2692 }, { "epoch": 0.11180130670112455, "grad_norm": 2.674269199371338, "learning_rate": 9.825603860230011e-06, "loss": 0.7659, "step": 2693 }, { "epoch": 0.11184282222533588, "grad_norm": 1.9914528131484985, "learning_rate": 9.825427801179081e-06, "loss": 0.4323, "step": 2694 }, { "epoch": 0.11188433774954722, "grad_norm": 2.5081541538238525, "learning_rate": 9.825251654883028e-06, "loss": 0.497, "step": 2695 }, { "epoch": 0.11192585327375855, "grad_norm": 3.5542919635772705, "learning_rate": 9.825075421345032e-06, "loss": 0.6529, "step": 2696 }, { "epoch": 0.1119673687979699, "grad_norm": 2.882643699645996, "learning_rate": 9.824899100568283e-06, "loss": 0.64, "step": 2697 }, { "epoch": 0.11200888432218123, "grad_norm": 3.0705435276031494, "learning_rate": 9.824722692555969e-06, "loss": 0.5199, "step": 2698 }, { "epoch": 0.11205039984639256, "grad_norm": 2.22678542137146, "learning_rate": 9.824546197311275e-06, "loss": 0.5154, "step": 2699 }, { "epoch": 0.1120919153706039, "grad_norm": 2.448667049407959, "learning_rate": 9.824369614837397e-06, "loss": 0.5659, "step": 2700 }, { "epoch": 0.11213343089481523, "grad_norm": 2.5734269618988037, "learning_rate": 9.824192945137524e-06, "loss": 0.5768, "step": 2701 }, { "epoch": 0.11217494641902656, "grad_norm": 2.5432374477386475, "learning_rate": 9.824016188214854e-06, "loss": 0.4002, "step": 2702 }, { "epoch": 0.1122164619432379, "grad_norm": 2.756770372390747, "learning_rate": 9.823839344072582e-06, "loss": 0.3788, "step": 2703 }, { "epoch": 0.11225797746744924, "grad_norm": 2.539537191390991, "learning_rate": 9.823662412713901e-06, "loss": 0.5732, "step": 2704 }, { "epoch": 0.11229949299166057, "grad_norm": 3.019587993621826, "learning_rate": 9.823485394142015e-06, "loss": 0.6011, "step": 2705 }, { "epoch": 0.1123410085158719, "grad_norm": 2.763338804244995, "learning_rate": 9.823308288360122e-06, "loss": 0.6422, "step": 2706 }, { "epoch": 0.11238252404008324, "grad_norm": 2.5787301063537598, "learning_rate": 9.823131095371425e-06, "loss": 0.4074, "step": 2707 }, { "epoch": 0.11242403956429457, "grad_norm": 2.768226146697998, "learning_rate": 9.822953815179128e-06, "loss": 0.5247, "step": 2708 }, { "epoch": 0.1124655550885059, "grad_norm": 2.681149959564209, "learning_rate": 9.822776447786437e-06, "loss": 0.3993, "step": 2709 }, { "epoch": 0.11250707061271724, "grad_norm": 2.480166435241699, "learning_rate": 9.822598993196557e-06, "loss": 0.4107, "step": 2710 }, { "epoch": 0.11254858613692857, "grad_norm": 2.6142771244049072, "learning_rate": 9.822421451412697e-06, "loss": 0.5889, "step": 2711 }, { "epoch": 0.11259010166113992, "grad_norm": 2.353870391845703, "learning_rate": 9.822243822438066e-06, "loss": 0.5459, "step": 2712 }, { "epoch": 0.11263161718535125, "grad_norm": 2.8663604259490967, "learning_rate": 9.822066106275878e-06, "loss": 0.5668, "step": 2713 }, { "epoch": 0.11267313270956258, "grad_norm": 2.076655864715576, "learning_rate": 9.821888302929345e-06, "loss": 0.4205, "step": 2714 }, { "epoch": 0.11271464823377392, "grad_norm": 2.9748482704162598, "learning_rate": 9.821710412401682e-06, "loss": 0.4691, "step": 2715 }, { "epoch": 0.11275616375798525, "grad_norm": 2.619947671890259, "learning_rate": 9.821532434696104e-06, "loss": 0.4405, "step": 2716 }, { "epoch": 0.11279767928219658, "grad_norm": 2.5810186862945557, "learning_rate": 9.821354369815831e-06, "loss": 0.4747, "step": 2717 }, { "epoch": 0.11283919480640792, "grad_norm": 2.3885228633880615, "learning_rate": 9.821176217764081e-06, "loss": 0.5291, "step": 2718 }, { "epoch": 0.11288071033061925, "grad_norm": 2.2831711769104004, "learning_rate": 9.820997978544075e-06, "loss": 0.5248, "step": 2719 }, { "epoch": 0.1129222258548306, "grad_norm": 2.5562686920166016, "learning_rate": 9.820819652159036e-06, "loss": 0.6269, "step": 2720 }, { "epoch": 0.11296374137904193, "grad_norm": 2.412569046020508, "learning_rate": 9.820641238612187e-06, "loss": 0.6601, "step": 2721 }, { "epoch": 0.11300525690325326, "grad_norm": 2.6923720836639404, "learning_rate": 9.820462737906758e-06, "loss": 0.5934, "step": 2722 }, { "epoch": 0.1130467724274646, "grad_norm": 2.9533329010009766, "learning_rate": 9.82028415004597e-06, "loss": 0.5327, "step": 2723 }, { "epoch": 0.11308828795167593, "grad_norm": 2.9543700218200684, "learning_rate": 9.820105475033058e-06, "loss": 0.5932, "step": 2724 }, { "epoch": 0.11312980347588726, "grad_norm": 2.252769708633423, "learning_rate": 9.819926712871248e-06, "loss": 0.5639, "step": 2725 }, { "epoch": 0.1131713190000986, "grad_norm": 2.8798282146453857, "learning_rate": 9.819747863563773e-06, "loss": 0.4494, "step": 2726 }, { "epoch": 0.11321283452430994, "grad_norm": 2.9586851596832275, "learning_rate": 9.819568927113869e-06, "loss": 0.6012, "step": 2727 }, { "epoch": 0.11325435004852127, "grad_norm": 2.7426974773406982, "learning_rate": 9.819389903524769e-06, "loss": 0.5565, "step": 2728 }, { "epoch": 0.11329586557273261, "grad_norm": 3.1167356967926025, "learning_rate": 9.819210792799711e-06, "loss": 0.6274, "step": 2729 }, { "epoch": 0.11333738109694394, "grad_norm": 2.9453489780426025, "learning_rate": 9.819031594941932e-06, "loss": 0.592, "step": 2730 }, { "epoch": 0.11337889662115527, "grad_norm": 2.692891836166382, "learning_rate": 9.818852309954674e-06, "loss": 0.5559, "step": 2731 }, { "epoch": 0.1134204121453666, "grad_norm": 2.3653080463409424, "learning_rate": 9.818672937841176e-06, "loss": 0.5668, "step": 2732 }, { "epoch": 0.11346192766957794, "grad_norm": 2.844468832015991, "learning_rate": 9.818493478604683e-06, "loss": 0.3596, "step": 2733 }, { "epoch": 0.11350344319378927, "grad_norm": 2.9389967918395996, "learning_rate": 9.81831393224844e-06, "loss": 0.4517, "step": 2734 }, { "epoch": 0.11354495871800062, "grad_norm": 2.6107771396636963, "learning_rate": 9.81813429877569e-06, "loss": 0.4203, "step": 2735 }, { "epoch": 0.11358647424221195, "grad_norm": 2.544760227203369, "learning_rate": 9.817954578189686e-06, "loss": 0.5118, "step": 2736 }, { "epoch": 0.11362798976642328, "grad_norm": 3.1160435676574707, "learning_rate": 9.817774770493673e-06, "loss": 0.4543, "step": 2737 }, { "epoch": 0.11366950529063462, "grad_norm": 2.798072338104248, "learning_rate": 9.817594875690904e-06, "loss": 0.6651, "step": 2738 }, { "epoch": 0.11371102081484595, "grad_norm": 1.9219197034835815, "learning_rate": 9.81741489378463e-06, "loss": 0.4879, "step": 2739 }, { "epoch": 0.11375253633905728, "grad_norm": 2.679161787033081, "learning_rate": 9.817234824778106e-06, "loss": 0.3589, "step": 2740 }, { "epoch": 0.11379405186326862, "grad_norm": 2.63763689994812, "learning_rate": 9.817054668674589e-06, "loss": 0.4754, "step": 2741 }, { "epoch": 0.11383556738747995, "grad_norm": 3.377047300338745, "learning_rate": 9.816874425477334e-06, "loss": 0.6652, "step": 2742 }, { "epoch": 0.1138770829116913, "grad_norm": 3.568999767303467, "learning_rate": 9.816694095189602e-06, "loss": 0.5513, "step": 2743 }, { "epoch": 0.11391859843590263, "grad_norm": 2.90226149559021, "learning_rate": 9.816513677814654e-06, "loss": 0.5891, "step": 2744 }, { "epoch": 0.11396011396011396, "grad_norm": 2.440406322479248, "learning_rate": 9.816333173355748e-06, "loss": 0.6125, "step": 2745 }, { "epoch": 0.1140016294843253, "grad_norm": 2.171377420425415, "learning_rate": 9.81615258181615e-06, "loss": 0.513, "step": 2746 }, { "epoch": 0.11404314500853663, "grad_norm": 3.0029561519622803, "learning_rate": 9.815971903199126e-06, "loss": 0.5601, "step": 2747 }, { "epoch": 0.11408466053274796, "grad_norm": 2.604231834411621, "learning_rate": 9.815791137507942e-06, "loss": 0.6459, "step": 2748 }, { "epoch": 0.1141261760569593, "grad_norm": 3.034783363342285, "learning_rate": 9.815610284745865e-06, "loss": 0.5706, "step": 2749 }, { "epoch": 0.11416769158117063, "grad_norm": 3.067817449569702, "learning_rate": 9.815429344916167e-06, "loss": 0.4829, "step": 2750 }, { "epoch": 0.11420920710538197, "grad_norm": 2.853400707244873, "learning_rate": 9.815248318022118e-06, "loss": 0.6592, "step": 2751 }, { "epoch": 0.11425072262959331, "grad_norm": 2.782989025115967, "learning_rate": 9.815067204066993e-06, "loss": 0.5151, "step": 2752 }, { "epoch": 0.11429223815380464, "grad_norm": 2.3412926197052, "learning_rate": 9.814886003054065e-06, "loss": 0.481, "step": 2753 }, { "epoch": 0.11433375367801597, "grad_norm": 2.5814931392669678, "learning_rate": 9.81470471498661e-06, "loss": 0.6061, "step": 2754 }, { "epoch": 0.1143752692022273, "grad_norm": 2.5787813663482666, "learning_rate": 9.814523339867905e-06, "loss": 0.4242, "step": 2755 }, { "epoch": 0.11441678472643864, "grad_norm": 2.876849889755249, "learning_rate": 9.81434187770123e-06, "loss": 0.3536, "step": 2756 }, { "epoch": 0.11445830025064997, "grad_norm": 2.807889223098755, "learning_rate": 9.814160328489867e-06, "loss": 0.4978, "step": 2757 }, { "epoch": 0.11449981577486132, "grad_norm": 2.4239068031311035, "learning_rate": 9.813978692237098e-06, "loss": 0.587, "step": 2758 }, { "epoch": 0.11454133129907265, "grad_norm": 3.3805224895477295, "learning_rate": 9.813796968946206e-06, "loss": 0.3644, "step": 2759 }, { "epoch": 0.11458284682328398, "grad_norm": 2.586884021759033, "learning_rate": 9.813615158620477e-06, "loss": 0.4407, "step": 2760 }, { "epoch": 0.11462436234749532, "grad_norm": 2.1673500537872314, "learning_rate": 9.8134332612632e-06, "loss": 0.517, "step": 2761 }, { "epoch": 0.11466587787170665, "grad_norm": 2.7793452739715576, "learning_rate": 9.81325127687766e-06, "loss": 0.6139, "step": 2762 }, { "epoch": 0.11470739339591798, "grad_norm": 2.519571304321289, "learning_rate": 9.813069205467151e-06, "loss": 0.4457, "step": 2763 }, { "epoch": 0.11474890892012932, "grad_norm": 2.3267757892608643, "learning_rate": 9.812887047034965e-06, "loss": 0.6145, "step": 2764 }, { "epoch": 0.11479042444434065, "grad_norm": 3.1043460369110107, "learning_rate": 9.812704801584391e-06, "loss": 0.6658, "step": 2765 }, { "epoch": 0.114831939968552, "grad_norm": 2.5613207817077637, "learning_rate": 9.812522469118728e-06, "loss": 0.5582, "step": 2766 }, { "epoch": 0.11487345549276333, "grad_norm": 2.511338949203491, "learning_rate": 9.812340049641274e-06, "loss": 0.5178, "step": 2767 }, { "epoch": 0.11491497101697466, "grad_norm": 2.57399582862854, "learning_rate": 9.812157543155322e-06, "loss": 0.5379, "step": 2768 }, { "epoch": 0.114956486541186, "grad_norm": 2.713139772415161, "learning_rate": 9.811974949664176e-06, "loss": 0.4811, "step": 2769 }, { "epoch": 0.11499800206539733, "grad_norm": 2.5441324710845947, "learning_rate": 9.811792269171134e-06, "loss": 0.5444, "step": 2770 }, { "epoch": 0.11503951758960866, "grad_norm": 2.569746255874634, "learning_rate": 9.811609501679502e-06, "loss": 0.6492, "step": 2771 }, { "epoch": 0.11508103311382, "grad_norm": 3.020733594894409, "learning_rate": 9.811426647192584e-06, "loss": 0.553, "step": 2772 }, { "epoch": 0.11512254863803133, "grad_norm": 3.465506076812744, "learning_rate": 9.811243705713685e-06, "loss": 0.5529, "step": 2773 }, { "epoch": 0.11516406416224267, "grad_norm": 2.3417675495147705, "learning_rate": 9.811060677246113e-06, "loss": 0.5226, "step": 2774 }, { "epoch": 0.11520557968645401, "grad_norm": 2.536651134490967, "learning_rate": 9.810877561793178e-06, "loss": 0.647, "step": 2775 }, { "epoch": 0.11524709521066534, "grad_norm": 2.562720537185669, "learning_rate": 9.81069435935819e-06, "loss": 0.5224, "step": 2776 }, { "epoch": 0.11528861073487667, "grad_norm": 2.547063112258911, "learning_rate": 9.81051106994446e-06, "loss": 0.5661, "step": 2777 }, { "epoch": 0.115330126259088, "grad_norm": 2.7887470722198486, "learning_rate": 9.810327693555304e-06, "loss": 0.4323, "step": 2778 }, { "epoch": 0.11537164178329934, "grad_norm": 2.6986963748931885, "learning_rate": 9.810144230194036e-06, "loss": 0.612, "step": 2779 }, { "epoch": 0.11541315730751067, "grad_norm": 2.8088417053222656, "learning_rate": 9.809960679863976e-06, "loss": 0.4465, "step": 2780 }, { "epoch": 0.11545467283172202, "grad_norm": 2.744112014770508, "learning_rate": 9.80977704256844e-06, "loss": 0.5582, "step": 2781 }, { "epoch": 0.11549618835593335, "grad_norm": 2.5572946071624756, "learning_rate": 9.809593318310747e-06, "loss": 0.6556, "step": 2782 }, { "epoch": 0.11553770388014468, "grad_norm": 2.520066499710083, "learning_rate": 9.809409507094222e-06, "loss": 0.5449, "step": 2783 }, { "epoch": 0.11557921940435602, "grad_norm": 2.3104405403137207, "learning_rate": 9.809225608922187e-06, "loss": 0.5017, "step": 2784 }, { "epoch": 0.11562073492856735, "grad_norm": 2.8567817211151123, "learning_rate": 9.809041623797966e-06, "loss": 0.5537, "step": 2785 }, { "epoch": 0.11566225045277868, "grad_norm": 2.6982004642486572, "learning_rate": 9.808857551724886e-06, "loss": 0.4764, "step": 2786 }, { "epoch": 0.11570376597699002, "grad_norm": 2.6869266033172607, "learning_rate": 9.808673392706276e-06, "loss": 0.4985, "step": 2787 }, { "epoch": 0.11574528150120135, "grad_norm": 2.7912557125091553, "learning_rate": 9.808489146745466e-06, "loss": 0.6148, "step": 2788 }, { "epoch": 0.1157867970254127, "grad_norm": 2.9743008613586426, "learning_rate": 9.808304813845786e-06, "loss": 0.5406, "step": 2789 }, { "epoch": 0.11582831254962403, "grad_norm": 2.356778860092163, "learning_rate": 9.808120394010567e-06, "loss": 0.4566, "step": 2790 }, { "epoch": 0.11586982807383536, "grad_norm": 2.785207748413086, "learning_rate": 9.807935887243147e-06, "loss": 0.6, "step": 2791 }, { "epoch": 0.1159113435980467, "grad_norm": 2.4587461948394775, "learning_rate": 9.807751293546861e-06, "loss": 0.5129, "step": 2792 }, { "epoch": 0.11595285912225803, "grad_norm": 2.319113254547119, "learning_rate": 9.807566612925044e-06, "loss": 0.5301, "step": 2793 }, { "epoch": 0.11599437464646936, "grad_norm": 2.792325735092163, "learning_rate": 9.807381845381038e-06, "loss": 0.6865, "step": 2794 }, { "epoch": 0.1160358901706807, "grad_norm": 2.7209415435791016, "learning_rate": 9.807196990918184e-06, "loss": 0.5294, "step": 2795 }, { "epoch": 0.11607740569489203, "grad_norm": 2.760619640350342, "learning_rate": 9.807012049539822e-06, "loss": 0.5217, "step": 2796 }, { "epoch": 0.11611892121910337, "grad_norm": 2.530602216720581, "learning_rate": 9.806827021249295e-06, "loss": 0.4873, "step": 2797 }, { "epoch": 0.11616043674331471, "grad_norm": 3.3139960765838623, "learning_rate": 9.806641906049952e-06, "loss": 0.4849, "step": 2798 }, { "epoch": 0.11620195226752604, "grad_norm": 2.845773220062256, "learning_rate": 9.806456703945137e-06, "loss": 0.6112, "step": 2799 }, { "epoch": 0.11624346779173737, "grad_norm": 3.456141710281372, "learning_rate": 9.806271414938199e-06, "loss": 0.5407, "step": 2800 }, { "epoch": 0.1162849833159487, "grad_norm": 3.158907890319824, "learning_rate": 9.806086039032489e-06, "loss": 0.5873, "step": 2801 }, { "epoch": 0.11632649884016004, "grad_norm": 2.6149823665618896, "learning_rate": 9.805900576231358e-06, "loss": 0.5836, "step": 2802 }, { "epoch": 0.11636801436437137, "grad_norm": 2.596395492553711, "learning_rate": 9.805715026538158e-06, "loss": 0.4959, "step": 2803 }, { "epoch": 0.1164095298885827, "grad_norm": 2.8525359630584717, "learning_rate": 9.805529389956248e-06, "loss": 0.4749, "step": 2804 }, { "epoch": 0.11645104541279405, "grad_norm": 2.9856390953063965, "learning_rate": 9.805343666488981e-06, "loss": 0.5852, "step": 2805 }, { "epoch": 0.11649256093700538, "grad_norm": 2.136337995529175, "learning_rate": 9.805157856139713e-06, "loss": 0.495, "step": 2806 }, { "epoch": 0.11653407646121672, "grad_norm": 2.7076399326324463, "learning_rate": 9.80497195891181e-06, "loss": 0.5051, "step": 2807 }, { "epoch": 0.11657559198542805, "grad_norm": 2.286522388458252, "learning_rate": 9.804785974808623e-06, "loss": 0.4099, "step": 2808 }, { "epoch": 0.11661710750963938, "grad_norm": 2.7043039798736572, "learning_rate": 9.804599903833525e-06, "loss": 0.5471, "step": 2809 }, { "epoch": 0.11665862303385072, "grad_norm": 2.8899824619293213, "learning_rate": 9.804413745989876e-06, "loss": 0.4755, "step": 2810 }, { "epoch": 0.11670013855806205, "grad_norm": 2.9711835384368896, "learning_rate": 9.804227501281041e-06, "loss": 0.5681, "step": 2811 }, { "epoch": 0.1167416540822734, "grad_norm": 2.6321499347686768, "learning_rate": 9.804041169710388e-06, "loss": 0.544, "step": 2812 }, { "epoch": 0.11678316960648473, "grad_norm": 2.415409803390503, "learning_rate": 9.803854751281285e-06, "loss": 0.4117, "step": 2813 }, { "epoch": 0.11682468513069606, "grad_norm": 2.3733675479888916, "learning_rate": 9.803668245997104e-06, "loss": 0.475, "step": 2814 }, { "epoch": 0.1168662006549074, "grad_norm": 2.5347635746002197, "learning_rate": 9.803481653861217e-06, "loss": 0.5233, "step": 2815 }, { "epoch": 0.11690771617911873, "grad_norm": 2.384546995162964, "learning_rate": 9.803294974876997e-06, "loss": 0.587, "step": 2816 }, { "epoch": 0.11694923170333006, "grad_norm": 2.6117875576019287, "learning_rate": 9.80310820904782e-06, "loss": 0.4977, "step": 2817 }, { "epoch": 0.1169907472275414, "grad_norm": 2.724602222442627, "learning_rate": 9.802921356377062e-06, "loss": 0.5841, "step": 2818 }, { "epoch": 0.11703226275175273, "grad_norm": 2.325421094894409, "learning_rate": 9.802734416868102e-06, "loss": 0.5522, "step": 2819 }, { "epoch": 0.11707377827596407, "grad_norm": 2.520617961883545, "learning_rate": 9.802547390524317e-06, "loss": 0.5208, "step": 2820 }, { "epoch": 0.11711529380017541, "grad_norm": 2.7656917572021484, "learning_rate": 9.802360277349094e-06, "loss": 0.5687, "step": 2821 }, { "epoch": 0.11715680932438674, "grad_norm": 2.3516592979431152, "learning_rate": 9.802173077345811e-06, "loss": 0.5813, "step": 2822 }, { "epoch": 0.11719832484859807, "grad_norm": 2.9473423957824707, "learning_rate": 9.801985790517856e-06, "loss": 0.6817, "step": 2823 }, { "epoch": 0.1172398403728094, "grad_norm": 2.7587740421295166, "learning_rate": 9.801798416868613e-06, "loss": 0.4957, "step": 2824 }, { "epoch": 0.11728135589702074, "grad_norm": 2.77721905708313, "learning_rate": 9.80161095640147e-06, "loss": 0.5949, "step": 2825 }, { "epoch": 0.11732287142123207, "grad_norm": 2.551403045654297, "learning_rate": 9.801423409119817e-06, "loss": 0.5367, "step": 2826 }, { "epoch": 0.1173643869454434, "grad_norm": 2.8182642459869385, "learning_rate": 9.801235775027047e-06, "loss": 0.5712, "step": 2827 }, { "epoch": 0.11740590246965475, "grad_norm": 2.348986864089966, "learning_rate": 9.801048054126547e-06, "loss": 0.3636, "step": 2828 }, { "epoch": 0.11744741799386609, "grad_norm": 2.3767507076263428, "learning_rate": 9.800860246421717e-06, "loss": 0.4395, "step": 2829 }, { "epoch": 0.11748893351807742, "grad_norm": 2.134706974029541, "learning_rate": 9.80067235191595e-06, "loss": 0.4541, "step": 2830 }, { "epoch": 0.11753044904228875, "grad_norm": 3.071812391281128, "learning_rate": 9.800484370612642e-06, "loss": 0.3898, "step": 2831 }, { "epoch": 0.11757196456650008, "grad_norm": 2.6976635456085205, "learning_rate": 9.800296302515194e-06, "loss": 0.526, "step": 2832 }, { "epoch": 0.11761348009071142, "grad_norm": 2.6968231201171875, "learning_rate": 9.800108147627004e-06, "loss": 0.415, "step": 2833 }, { "epoch": 0.11765499561492275, "grad_norm": 2.4005982875823975, "learning_rate": 9.799919905951474e-06, "loss": 0.6134, "step": 2834 }, { "epoch": 0.1176965111391341, "grad_norm": 2.537700653076172, "learning_rate": 9.79973157749201e-06, "loss": 0.679, "step": 2835 }, { "epoch": 0.11773802666334543, "grad_norm": 2.9581024646759033, "learning_rate": 9.799543162252016e-06, "loss": 0.5787, "step": 2836 }, { "epoch": 0.11777954218755676, "grad_norm": 2.164153575897217, "learning_rate": 9.799354660234898e-06, "loss": 0.3783, "step": 2837 }, { "epoch": 0.1178210577117681, "grad_norm": 2.5850679874420166, "learning_rate": 9.799166071444066e-06, "loss": 0.5423, "step": 2838 }, { "epoch": 0.11786257323597943, "grad_norm": 3.3295748233795166, "learning_rate": 9.798977395882926e-06, "loss": 0.5177, "step": 2839 }, { "epoch": 0.11790408876019076, "grad_norm": 2.923797607421875, "learning_rate": 9.798788633554892e-06, "loss": 0.4911, "step": 2840 }, { "epoch": 0.1179456042844021, "grad_norm": 2.688791513442993, "learning_rate": 9.798599784463376e-06, "loss": 0.5603, "step": 2841 }, { "epoch": 0.11798711980861343, "grad_norm": 2.584376335144043, "learning_rate": 9.798410848611795e-06, "loss": 0.5243, "step": 2842 }, { "epoch": 0.11802863533282477, "grad_norm": 2.617943286895752, "learning_rate": 9.79822182600356e-06, "loss": 0.5413, "step": 2843 }, { "epoch": 0.11807015085703611, "grad_norm": 2.629509210586548, "learning_rate": 9.798032716642092e-06, "loss": 0.4083, "step": 2844 }, { "epoch": 0.11811166638124744, "grad_norm": 2.896056652069092, "learning_rate": 9.79784352053081e-06, "loss": 0.4987, "step": 2845 }, { "epoch": 0.11815318190545877, "grad_norm": 3.4974329471588135, "learning_rate": 9.797654237673136e-06, "loss": 0.6717, "step": 2846 }, { "epoch": 0.1181946974296701, "grad_norm": 2.7589917182922363, "learning_rate": 9.797464868072489e-06, "loss": 0.6433, "step": 2847 }, { "epoch": 0.11823621295388144, "grad_norm": 2.8208141326904297, "learning_rate": 9.797275411732294e-06, "loss": 0.5379, "step": 2848 }, { "epoch": 0.11827772847809277, "grad_norm": 2.539079189300537, "learning_rate": 9.797085868655976e-06, "loss": 0.3932, "step": 2849 }, { "epoch": 0.1183192440023041, "grad_norm": 2.916073799133301, "learning_rate": 9.796896238846964e-06, "loss": 0.5518, "step": 2850 }, { "epoch": 0.11836075952651545, "grad_norm": 2.255007743835449, "learning_rate": 9.796706522308686e-06, "loss": 0.4421, "step": 2851 }, { "epoch": 0.11840227505072679, "grad_norm": 2.396298408508301, "learning_rate": 9.79651671904457e-06, "loss": 0.5021, "step": 2852 }, { "epoch": 0.11844379057493812, "grad_norm": 2.611372709274292, "learning_rate": 9.79632682905805e-06, "loss": 0.5169, "step": 2853 }, { "epoch": 0.11848530609914945, "grad_norm": 2.5337321758270264, "learning_rate": 9.796136852352558e-06, "loss": 0.5391, "step": 2854 }, { "epoch": 0.11852682162336078, "grad_norm": 2.886423110961914, "learning_rate": 9.79594678893153e-06, "loss": 0.6014, "step": 2855 }, { "epoch": 0.11856833714757212, "grad_norm": 3.0724761486053467, "learning_rate": 9.795756638798401e-06, "loss": 0.4226, "step": 2856 }, { "epoch": 0.11860985267178345, "grad_norm": 2.882234573364258, "learning_rate": 9.79556640195661e-06, "loss": 0.43, "step": 2857 }, { "epoch": 0.11865136819599478, "grad_norm": 2.8125791549682617, "learning_rate": 9.795376078409598e-06, "loss": 0.4722, "step": 2858 }, { "epoch": 0.11869288372020613, "grad_norm": 2.427187204360962, "learning_rate": 9.795185668160803e-06, "loss": 0.571, "step": 2859 }, { "epoch": 0.11873439924441746, "grad_norm": 2.9197518825531006, "learning_rate": 9.794995171213668e-06, "loss": 0.5739, "step": 2860 }, { "epoch": 0.1187759147686288, "grad_norm": 2.4904637336730957, "learning_rate": 9.794804587571638e-06, "loss": 0.5147, "step": 2861 }, { "epoch": 0.11881743029284013, "grad_norm": 3.137661933898926, "learning_rate": 9.794613917238162e-06, "loss": 0.5768, "step": 2862 }, { "epoch": 0.11885894581705146, "grad_norm": 2.6787493228912354, "learning_rate": 9.794423160216682e-06, "loss": 0.5399, "step": 2863 }, { "epoch": 0.1189004613412628, "grad_norm": 3.631600856781006, "learning_rate": 9.79423231651065e-06, "loss": 0.5341, "step": 2864 }, { "epoch": 0.11894197686547413, "grad_norm": 2.858733654022217, "learning_rate": 9.794041386123517e-06, "loss": 0.4178, "step": 2865 }, { "epoch": 0.11898349238968547, "grad_norm": 2.6288158893585205, "learning_rate": 9.793850369058731e-06, "loss": 0.4501, "step": 2866 }, { "epoch": 0.11902500791389681, "grad_norm": 2.8187365531921387, "learning_rate": 9.79365926531975e-06, "loss": 0.6165, "step": 2867 }, { "epoch": 0.11906652343810814, "grad_norm": 2.553069591522217, "learning_rate": 9.793468074910028e-06, "loss": 0.4829, "step": 2868 }, { "epoch": 0.11910803896231947, "grad_norm": 2.8986973762512207, "learning_rate": 9.793276797833023e-06, "loss": 0.5276, "step": 2869 }, { "epoch": 0.1191495544865308, "grad_norm": 3.151914358139038, "learning_rate": 9.793085434092188e-06, "loss": 0.472, "step": 2870 }, { "epoch": 0.11919107001074214, "grad_norm": 2.529589891433716, "learning_rate": 9.792893983690991e-06, "loss": 0.4715, "step": 2871 }, { "epoch": 0.11923258553495347, "grad_norm": 3.191443681716919, "learning_rate": 9.792702446632888e-06, "loss": 0.5564, "step": 2872 }, { "epoch": 0.1192741010591648, "grad_norm": 3.2495014667510986, "learning_rate": 9.792510822921343e-06, "loss": 0.5768, "step": 2873 }, { "epoch": 0.11931561658337615, "grad_norm": 2.624131441116333, "learning_rate": 9.792319112559821e-06, "loss": 0.499, "step": 2874 }, { "epoch": 0.11935713210758749, "grad_norm": 2.325711488723755, "learning_rate": 9.792127315551789e-06, "loss": 0.5127, "step": 2875 }, { "epoch": 0.11939864763179882, "grad_norm": 2.4559435844421387, "learning_rate": 9.791935431900712e-06, "loss": 0.447, "step": 2876 }, { "epoch": 0.11944016315601015, "grad_norm": 2.4525082111358643, "learning_rate": 9.791743461610062e-06, "loss": 0.56, "step": 2877 }, { "epoch": 0.11948167868022148, "grad_norm": 2.2915141582489014, "learning_rate": 9.79155140468331e-06, "loss": 0.5912, "step": 2878 }, { "epoch": 0.11952319420443282, "grad_norm": 2.8259897232055664, "learning_rate": 9.791359261123927e-06, "loss": 0.5085, "step": 2879 }, { "epoch": 0.11956470972864415, "grad_norm": 2.3277440071105957, "learning_rate": 9.791167030935387e-06, "loss": 0.5263, "step": 2880 }, { "epoch": 0.11960622525285548, "grad_norm": 2.79777455329895, "learning_rate": 9.790974714121166e-06, "loss": 0.6151, "step": 2881 }, { "epoch": 0.11964774077706683, "grad_norm": 2.3377668857574463, "learning_rate": 9.790782310684743e-06, "loss": 0.6029, "step": 2882 }, { "epoch": 0.11968925630127816, "grad_norm": 2.93155837059021, "learning_rate": 9.790589820629594e-06, "loss": 0.606, "step": 2883 }, { "epoch": 0.1197307718254895, "grad_norm": 3.2602596282958984, "learning_rate": 9.7903972439592e-06, "loss": 0.4149, "step": 2884 }, { "epoch": 0.11977228734970083, "grad_norm": 2.817826747894287, "learning_rate": 9.790204580677043e-06, "loss": 0.657, "step": 2885 }, { "epoch": 0.11981380287391216, "grad_norm": 2.4822676181793213, "learning_rate": 9.790011830786606e-06, "loss": 0.4349, "step": 2886 }, { "epoch": 0.1198553183981235, "grad_norm": 2.5469446182250977, "learning_rate": 9.789818994291375e-06, "loss": 0.5786, "step": 2887 }, { "epoch": 0.11989683392233483, "grad_norm": 2.495469093322754, "learning_rate": 9.789626071194835e-06, "loss": 0.6137, "step": 2888 }, { "epoch": 0.11993834944654617, "grad_norm": 2.682619571685791, "learning_rate": 9.789433061500477e-06, "loss": 0.4357, "step": 2889 }, { "epoch": 0.11997986497075751, "grad_norm": 2.448871374130249, "learning_rate": 9.789239965211788e-06, "loss": 0.682, "step": 2890 }, { "epoch": 0.12002138049496884, "grad_norm": 2.2845301628112793, "learning_rate": 9.78904678233226e-06, "loss": 0.4818, "step": 2891 }, { "epoch": 0.12006289601918017, "grad_norm": 2.7250468730926514, "learning_rate": 9.788853512865387e-06, "loss": 0.4875, "step": 2892 }, { "epoch": 0.1201044115433915, "grad_norm": 2.8129684925079346, "learning_rate": 9.78866015681466e-06, "loss": 0.596, "step": 2893 }, { "epoch": 0.12014592706760284, "grad_norm": 2.598823070526123, "learning_rate": 9.788466714183578e-06, "loss": 0.5058, "step": 2894 }, { "epoch": 0.12018744259181417, "grad_norm": 2.5511667728424072, "learning_rate": 9.788273184975638e-06, "loss": 0.5414, "step": 2895 }, { "epoch": 0.1202289581160255, "grad_norm": 2.7557156085968018, "learning_rate": 9.78807956919434e-06, "loss": 0.4804, "step": 2896 }, { "epoch": 0.12027047364023685, "grad_norm": 2.1988749504089355, "learning_rate": 9.787885866843182e-06, "loss": 0.443, "step": 2897 }, { "epoch": 0.12031198916444819, "grad_norm": 2.336840867996216, "learning_rate": 9.787692077925665e-06, "loss": 0.4624, "step": 2898 }, { "epoch": 0.12035350468865952, "grad_norm": 2.7052268981933594, "learning_rate": 9.787498202445298e-06, "loss": 0.5353, "step": 2899 }, { "epoch": 0.12039502021287085, "grad_norm": 2.027329444885254, "learning_rate": 9.787304240405584e-06, "loss": 0.3892, "step": 2900 }, { "epoch": 0.12043653573708218, "grad_norm": 2.5242302417755127, "learning_rate": 9.787110191810027e-06, "loss": 0.3293, "step": 2901 }, { "epoch": 0.12047805126129352, "grad_norm": 2.5059261322021484, "learning_rate": 9.786916056662142e-06, "loss": 0.47, "step": 2902 }, { "epoch": 0.12051956678550485, "grad_norm": 3.0546042919158936, "learning_rate": 9.786721834965431e-06, "loss": 0.4689, "step": 2903 }, { "epoch": 0.12056108230971618, "grad_norm": 3.04665207862854, "learning_rate": 9.786527526723412e-06, "loss": 0.6366, "step": 2904 }, { "epoch": 0.12060259783392753, "grad_norm": 2.454413652420044, "learning_rate": 9.786333131939593e-06, "loss": 0.5793, "step": 2905 }, { "epoch": 0.12064411335813886, "grad_norm": 2.648599863052368, "learning_rate": 9.786138650617494e-06, "loss": 0.4616, "step": 2906 }, { "epoch": 0.1206856288823502, "grad_norm": 2.7983222007751465, "learning_rate": 9.785944082760627e-06, "loss": 0.5483, "step": 2907 }, { "epoch": 0.12072714440656153, "grad_norm": 2.895383358001709, "learning_rate": 9.785749428372513e-06, "loss": 0.5472, "step": 2908 }, { "epoch": 0.12076865993077286, "grad_norm": 2.3301005363464355, "learning_rate": 9.78555468745667e-06, "loss": 0.4685, "step": 2909 }, { "epoch": 0.1208101754549842, "grad_norm": 2.510883092880249, "learning_rate": 9.78535986001662e-06, "loss": 0.4218, "step": 2910 }, { "epoch": 0.12085169097919553, "grad_norm": 2.4058213233947754, "learning_rate": 9.78516494605588e-06, "loss": 0.4733, "step": 2911 }, { "epoch": 0.12089320650340686, "grad_norm": 2.7033534049987793, "learning_rate": 9.784969945577982e-06, "loss": 0.6243, "step": 2912 }, { "epoch": 0.12093472202761821, "grad_norm": 2.6367454528808594, "learning_rate": 9.784774858586448e-06, "loss": 0.6059, "step": 2913 }, { "epoch": 0.12097623755182954, "grad_norm": 2.7722373008728027, "learning_rate": 9.784579685084805e-06, "loss": 0.4587, "step": 2914 }, { "epoch": 0.12101775307604087, "grad_norm": 2.7843594551086426, "learning_rate": 9.784384425076582e-06, "loss": 0.5173, "step": 2915 }, { "epoch": 0.1210592686002522, "grad_norm": 2.4567036628723145, "learning_rate": 9.78418907856531e-06, "loss": 0.5221, "step": 2916 }, { "epoch": 0.12110078412446354, "grad_norm": 2.588470697402954, "learning_rate": 9.78399364555452e-06, "loss": 0.6092, "step": 2917 }, { "epoch": 0.12114229964867487, "grad_norm": 2.492170810699463, "learning_rate": 9.783798126047746e-06, "loss": 0.5277, "step": 2918 }, { "epoch": 0.1211838151728862, "grad_norm": 2.567647933959961, "learning_rate": 9.783602520048524e-06, "loss": 0.527, "step": 2919 }, { "epoch": 0.12122533069709755, "grad_norm": 2.330111503601074, "learning_rate": 9.783406827560388e-06, "loss": 0.4638, "step": 2920 }, { "epoch": 0.12126684622130889, "grad_norm": 2.6298017501831055, "learning_rate": 9.783211048586879e-06, "loss": 0.6074, "step": 2921 }, { "epoch": 0.12130836174552022, "grad_norm": 2.7034077644348145, "learning_rate": 9.783015183131534e-06, "loss": 0.4238, "step": 2922 }, { "epoch": 0.12134987726973155, "grad_norm": 2.5394070148468018, "learning_rate": 9.782819231197898e-06, "loss": 0.5646, "step": 2923 }, { "epoch": 0.12139139279394288, "grad_norm": 2.442915916442871, "learning_rate": 9.782623192789511e-06, "loss": 0.5523, "step": 2924 }, { "epoch": 0.12143290831815422, "grad_norm": 3.53918719291687, "learning_rate": 9.782427067909918e-06, "loss": 0.454, "step": 2925 }, { "epoch": 0.12147442384236555, "grad_norm": 2.322469711303711, "learning_rate": 9.782230856562664e-06, "loss": 0.481, "step": 2926 }, { "epoch": 0.12151593936657688, "grad_norm": 2.269378900527954, "learning_rate": 9.7820345587513e-06, "loss": 0.4046, "step": 2927 }, { "epoch": 0.12155745489078823, "grad_norm": 3.1678102016448975, "learning_rate": 9.781838174479372e-06, "loss": 0.656, "step": 2928 }, { "epoch": 0.12159897041499956, "grad_norm": 2.6700329780578613, "learning_rate": 9.781641703750431e-06, "loss": 0.5339, "step": 2929 }, { "epoch": 0.1216404859392109, "grad_norm": 2.6582441329956055, "learning_rate": 9.781445146568029e-06, "loss": 0.6183, "step": 2930 }, { "epoch": 0.12168200146342223, "grad_norm": 2.488219738006592, "learning_rate": 9.781248502935723e-06, "loss": 0.4496, "step": 2931 }, { "epoch": 0.12172351698763356, "grad_norm": 2.305734157562256, "learning_rate": 9.781051772857064e-06, "loss": 0.5014, "step": 2932 }, { "epoch": 0.1217650325118449, "grad_norm": 2.284926176071167, "learning_rate": 9.780854956335613e-06, "loss": 0.5133, "step": 2933 }, { "epoch": 0.12180654803605623, "grad_norm": 2.591334104537964, "learning_rate": 9.780658053374923e-06, "loss": 0.3978, "step": 2934 }, { "epoch": 0.12184806356026756, "grad_norm": 3.1655449867248535, "learning_rate": 9.78046106397856e-06, "loss": 0.4616, "step": 2935 }, { "epoch": 0.12188957908447891, "grad_norm": 2.5437779426574707, "learning_rate": 9.780263988150082e-06, "loss": 0.5768, "step": 2936 }, { "epoch": 0.12193109460869024, "grad_norm": 3.188997745513916, "learning_rate": 9.780066825893055e-06, "loss": 0.5157, "step": 2937 }, { "epoch": 0.12197261013290157, "grad_norm": 2.8942620754241943, "learning_rate": 9.77986957721104e-06, "loss": 0.4764, "step": 2938 }, { "epoch": 0.1220141256571129, "grad_norm": 3.0787127017974854, "learning_rate": 9.779672242107608e-06, "loss": 0.5585, "step": 2939 }, { "epoch": 0.12205564118132424, "grad_norm": 2.2283670902252197, "learning_rate": 9.779474820586321e-06, "loss": 0.6471, "step": 2940 }, { "epoch": 0.12209715670553557, "grad_norm": 2.296212673187256, "learning_rate": 9.779277312650752e-06, "loss": 0.5356, "step": 2941 }, { "epoch": 0.1221386722297469, "grad_norm": 2.359855890274048, "learning_rate": 9.779079718304473e-06, "loss": 0.5923, "step": 2942 }, { "epoch": 0.12218018775395825, "grad_norm": 2.649111747741699, "learning_rate": 9.778882037551056e-06, "loss": 0.6121, "step": 2943 }, { "epoch": 0.12222170327816959, "grad_norm": 2.424142599105835, "learning_rate": 9.778684270394072e-06, "loss": 0.5839, "step": 2944 }, { "epoch": 0.12226321880238092, "grad_norm": 2.1734869480133057, "learning_rate": 9.7784864168371e-06, "loss": 0.5438, "step": 2945 }, { "epoch": 0.12230473432659225, "grad_norm": 3.800710678100586, "learning_rate": 9.778288476883716e-06, "loss": 0.5466, "step": 2946 }, { "epoch": 0.12234624985080358, "grad_norm": 2.4155287742614746, "learning_rate": 9.7780904505375e-06, "loss": 0.3874, "step": 2947 }, { "epoch": 0.12238776537501492, "grad_norm": 3.2854464054107666, "learning_rate": 9.777892337802033e-06, "loss": 0.3587, "step": 2948 }, { "epoch": 0.12242928089922625, "grad_norm": 2.4695048332214355, "learning_rate": 9.777694138680893e-06, "loss": 0.5123, "step": 2949 }, { "epoch": 0.12247079642343758, "grad_norm": 2.6969587802886963, "learning_rate": 9.777495853177665e-06, "loss": 0.5131, "step": 2950 }, { "epoch": 0.12251231194764893, "grad_norm": 2.9779369831085205, "learning_rate": 9.777297481295937e-06, "loss": 0.5129, "step": 2951 }, { "epoch": 0.12255382747186026, "grad_norm": 2.624800682067871, "learning_rate": 9.777099023039293e-06, "loss": 0.471, "step": 2952 }, { "epoch": 0.1225953429960716, "grad_norm": 2.6667654514312744, "learning_rate": 9.776900478411322e-06, "loss": 0.5046, "step": 2953 }, { "epoch": 0.12263685852028293, "grad_norm": 2.915877103805542, "learning_rate": 9.776701847415613e-06, "loss": 0.4261, "step": 2954 }, { "epoch": 0.12267837404449426, "grad_norm": 2.521939754486084, "learning_rate": 9.776503130055758e-06, "loss": 0.6374, "step": 2955 }, { "epoch": 0.1227198895687056, "grad_norm": 3.41819429397583, "learning_rate": 9.77630432633535e-06, "loss": 0.4991, "step": 2956 }, { "epoch": 0.12276140509291693, "grad_norm": 2.381333827972412, "learning_rate": 9.776105436257982e-06, "loss": 0.4342, "step": 2957 }, { "epoch": 0.12280292061712826, "grad_norm": 2.3119471073150635, "learning_rate": 9.775906459827252e-06, "loss": 0.5424, "step": 2958 }, { "epoch": 0.12284443614133961, "grad_norm": 2.6740026473999023, "learning_rate": 9.775707397046756e-06, "loss": 0.5097, "step": 2959 }, { "epoch": 0.12288595166555094, "grad_norm": 2.1691219806671143, "learning_rate": 9.775508247920094e-06, "loss": 0.4782, "step": 2960 }, { "epoch": 0.12292746718976227, "grad_norm": 2.7727596759796143, "learning_rate": 9.775309012450867e-06, "loss": 0.7308, "step": 2961 }, { "epoch": 0.1229689827139736, "grad_norm": 2.4540088176727295, "learning_rate": 9.775109690642676e-06, "loss": 0.4323, "step": 2962 }, { "epoch": 0.12301049823818494, "grad_norm": 2.952864646911621, "learning_rate": 9.774910282499126e-06, "loss": 0.5814, "step": 2963 }, { "epoch": 0.12305201376239627, "grad_norm": 3.144115924835205, "learning_rate": 9.774710788023823e-06, "loss": 0.641, "step": 2964 }, { "epoch": 0.1230935292866076, "grad_norm": 2.19804310798645, "learning_rate": 9.774511207220369e-06, "loss": 0.5091, "step": 2965 }, { "epoch": 0.12313504481081894, "grad_norm": 2.56143856048584, "learning_rate": 9.77431154009238e-06, "loss": 0.5945, "step": 2966 }, { "epoch": 0.12317656033503029, "grad_norm": 3.3241734504699707, "learning_rate": 9.77411178664346e-06, "loss": 0.6084, "step": 2967 }, { "epoch": 0.12321807585924162, "grad_norm": 2.3613970279693604, "learning_rate": 9.773911946877223e-06, "loss": 0.4637, "step": 2968 }, { "epoch": 0.12325959138345295, "grad_norm": 2.522777557373047, "learning_rate": 9.773712020797283e-06, "loss": 0.4703, "step": 2969 }, { "epoch": 0.12330110690766428, "grad_norm": 2.5287394523620605, "learning_rate": 9.773512008407254e-06, "loss": 0.5061, "step": 2970 }, { "epoch": 0.12334262243187562, "grad_norm": 2.4240305423736572, "learning_rate": 9.773311909710751e-06, "loss": 0.497, "step": 2971 }, { "epoch": 0.12338413795608695, "grad_norm": 2.574803113937378, "learning_rate": 9.773111724711394e-06, "loss": 0.4651, "step": 2972 }, { "epoch": 0.12342565348029828, "grad_norm": 2.757314443588257, "learning_rate": 9.7729114534128e-06, "loss": 0.5334, "step": 2973 }, { "epoch": 0.12346716900450963, "grad_norm": 2.9297351837158203, "learning_rate": 9.772711095818592e-06, "loss": 0.4588, "step": 2974 }, { "epoch": 0.12350868452872096, "grad_norm": 3.010810375213623, "learning_rate": 9.772510651932392e-06, "loss": 0.4496, "step": 2975 }, { "epoch": 0.1235502000529323, "grad_norm": 2.4538726806640625, "learning_rate": 9.772310121757822e-06, "loss": 0.4437, "step": 2976 }, { "epoch": 0.12359171557714363, "grad_norm": 2.3643925189971924, "learning_rate": 9.772109505298511e-06, "loss": 0.5106, "step": 2977 }, { "epoch": 0.12363323110135496, "grad_norm": 3.224104642868042, "learning_rate": 9.771908802558085e-06, "loss": 0.6264, "step": 2978 }, { "epoch": 0.1236747466255663, "grad_norm": 2.347201347351074, "learning_rate": 9.77170801354017e-06, "loss": 0.5349, "step": 2979 }, { "epoch": 0.12371626214977763, "grad_norm": 3.4931867122650146, "learning_rate": 9.771507138248402e-06, "loss": 0.5185, "step": 2980 }, { "epoch": 0.12375777767398896, "grad_norm": 2.5449044704437256, "learning_rate": 9.771306176686408e-06, "loss": 0.4449, "step": 2981 }, { "epoch": 0.12379929319820031, "grad_norm": 3.00561785697937, "learning_rate": 9.771105128857824e-06, "loss": 0.6053, "step": 2982 }, { "epoch": 0.12384080872241164, "grad_norm": 2.587413787841797, "learning_rate": 9.770903994766283e-06, "loss": 0.4577, "step": 2983 }, { "epoch": 0.12388232424662297, "grad_norm": 2.9417455196380615, "learning_rate": 9.770702774415424e-06, "loss": 0.5658, "step": 2984 }, { "epoch": 0.12392383977083431, "grad_norm": 2.923578977584839, "learning_rate": 9.770501467808882e-06, "loss": 0.6557, "step": 2985 }, { "epoch": 0.12396535529504564, "grad_norm": 2.830894947052002, "learning_rate": 9.7703000749503e-06, "loss": 0.4504, "step": 2986 }, { "epoch": 0.12400687081925697, "grad_norm": 3.025904655456543, "learning_rate": 9.770098595843315e-06, "loss": 0.6699, "step": 2987 }, { "epoch": 0.1240483863434683, "grad_norm": 2.550783395767212, "learning_rate": 9.769897030491576e-06, "loss": 0.6638, "step": 2988 }, { "epoch": 0.12408990186767964, "grad_norm": 2.6400351524353027, "learning_rate": 9.769695378898722e-06, "loss": 0.4988, "step": 2989 }, { "epoch": 0.12413141739189099, "grad_norm": 2.569809913635254, "learning_rate": 9.769493641068402e-06, "loss": 0.534, "step": 2990 }, { "epoch": 0.12417293291610232, "grad_norm": 2.637308359146118, "learning_rate": 9.76929181700426e-06, "loss": 0.6116, "step": 2991 }, { "epoch": 0.12421444844031365, "grad_norm": 3.6592884063720703, "learning_rate": 9.76908990670995e-06, "loss": 0.4619, "step": 2992 }, { "epoch": 0.12425596396452498, "grad_norm": 2.7860496044158936, "learning_rate": 9.768887910189118e-06, "loss": 0.5819, "step": 2993 }, { "epoch": 0.12429747948873632, "grad_norm": 2.392418146133423, "learning_rate": 9.76868582744542e-06, "loss": 0.5148, "step": 2994 }, { "epoch": 0.12433899501294765, "grad_norm": 2.6885080337524414, "learning_rate": 9.768483658482507e-06, "loss": 0.506, "step": 2995 }, { "epoch": 0.12438051053715898, "grad_norm": 2.6271984577178955, "learning_rate": 9.768281403304032e-06, "loss": 0.5291, "step": 2996 }, { "epoch": 0.12442202606137033, "grad_norm": 2.725623369216919, "learning_rate": 9.768079061913658e-06, "loss": 0.525, "step": 2997 }, { "epoch": 0.12446354158558166, "grad_norm": 3.250016212463379, "learning_rate": 9.767876634315038e-06, "loss": 0.6444, "step": 2998 }, { "epoch": 0.124505057109793, "grad_norm": 2.1334385871887207, "learning_rate": 9.767674120511836e-06, "loss": 0.517, "step": 2999 }, { "epoch": 0.12454657263400433, "grad_norm": 3.1357998847961426, "learning_rate": 9.767471520507713e-06, "loss": 0.5486, "step": 3000 }, { "epoch": 0.12458808815821566, "grad_norm": 3.070227861404419, "learning_rate": 9.76726883430633e-06, "loss": 0.6633, "step": 3001 }, { "epoch": 0.124629603682427, "grad_norm": 2.1374378204345703, "learning_rate": 9.76706606191135e-06, "loss": 0.3837, "step": 3002 }, { "epoch": 0.12467111920663833, "grad_norm": 2.4464609622955322, "learning_rate": 9.766863203326444e-06, "loss": 0.4723, "step": 3003 }, { "epoch": 0.12471263473084966, "grad_norm": 2.467491626739502, "learning_rate": 9.766660258555275e-06, "loss": 0.6291, "step": 3004 }, { "epoch": 0.12475415025506101, "grad_norm": 2.30350661277771, "learning_rate": 9.766457227601518e-06, "loss": 0.5758, "step": 3005 }, { "epoch": 0.12479566577927234, "grad_norm": 2.8281009197235107, "learning_rate": 9.766254110468838e-06, "loss": 0.5194, "step": 3006 }, { "epoch": 0.12483718130348367, "grad_norm": 2.4071218967437744, "learning_rate": 9.76605090716091e-06, "loss": 0.4581, "step": 3007 }, { "epoch": 0.12487869682769501, "grad_norm": 2.6477127075195312, "learning_rate": 9.765847617681408e-06, "loss": 0.572, "step": 3008 }, { "epoch": 0.12492021235190634, "grad_norm": 2.967844247817993, "learning_rate": 9.765644242034009e-06, "loss": 0.5447, "step": 3009 }, { "epoch": 0.12496172787611767, "grad_norm": 2.583292245864868, "learning_rate": 9.765440780222387e-06, "loss": 0.4078, "step": 3010 }, { "epoch": 0.125003243400329, "grad_norm": 2.715287446975708, "learning_rate": 9.765237232250223e-06, "loss": 0.4221, "step": 3011 }, { "epoch": 0.12504475892454034, "grad_norm": 2.511211633682251, "learning_rate": 9.765033598121195e-06, "loss": 0.5205, "step": 3012 }, { "epoch": 0.12508627444875167, "grad_norm": 2.807020664215088, "learning_rate": 9.764829877838988e-06, "loss": 0.556, "step": 3013 }, { "epoch": 0.125127789972963, "grad_norm": 2.326329469680786, "learning_rate": 9.764626071407282e-06, "loss": 0.4633, "step": 3014 }, { "epoch": 0.12516930549717434, "grad_norm": 2.6001696586608887, "learning_rate": 9.764422178829763e-06, "loss": 0.3897, "step": 3015 }, { "epoch": 0.12521082102138567, "grad_norm": 2.5611400604248047, "learning_rate": 9.764218200110119e-06, "loss": 0.5934, "step": 3016 }, { "epoch": 0.12525233654559703, "grad_norm": 2.9869179725646973, "learning_rate": 9.764014135252036e-06, "loss": 0.5682, "step": 3017 }, { "epoch": 0.12529385206980836, "grad_norm": 2.9000864028930664, "learning_rate": 9.763809984259204e-06, "loss": 0.4628, "step": 3018 }, { "epoch": 0.1253353675940197, "grad_norm": 2.9515998363494873, "learning_rate": 9.763605747135316e-06, "loss": 0.5708, "step": 3019 }, { "epoch": 0.12537688311823103, "grad_norm": 2.3778316974639893, "learning_rate": 9.763401423884062e-06, "loss": 0.5589, "step": 3020 }, { "epoch": 0.12541839864244236, "grad_norm": 2.4967041015625, "learning_rate": 9.763197014509136e-06, "loss": 0.3942, "step": 3021 }, { "epoch": 0.1254599141666537, "grad_norm": 2.3430521488189697, "learning_rate": 9.762992519014235e-06, "loss": 0.4837, "step": 3022 }, { "epoch": 0.12550142969086503, "grad_norm": 2.636582374572754, "learning_rate": 9.76278793740306e-06, "loss": 0.417, "step": 3023 }, { "epoch": 0.12554294521507636, "grad_norm": 2.6703386306762695, "learning_rate": 9.762583269679304e-06, "loss": 0.6645, "step": 3024 }, { "epoch": 0.1255844607392877, "grad_norm": 3.0026893615722656, "learning_rate": 9.76237851584667e-06, "loss": 0.4291, "step": 3025 }, { "epoch": 0.12562597626349903, "grad_norm": 2.615299701690674, "learning_rate": 9.762173675908859e-06, "loss": 0.4934, "step": 3026 }, { "epoch": 0.12566749178771036, "grad_norm": 2.4233803749084473, "learning_rate": 9.761968749869576e-06, "loss": 0.498, "step": 3027 }, { "epoch": 0.1257090073119217, "grad_norm": 3.0055320262908936, "learning_rate": 9.761763737732526e-06, "loss": 0.4219, "step": 3028 }, { "epoch": 0.12575052283613303, "grad_norm": 2.566061496734619, "learning_rate": 9.761558639501413e-06, "loss": 0.5521, "step": 3029 }, { "epoch": 0.12579203836034436, "grad_norm": 2.997408151626587, "learning_rate": 9.76135345517995e-06, "loss": 0.5633, "step": 3030 }, { "epoch": 0.1258335538845557, "grad_norm": 4.116724491119385, "learning_rate": 9.761148184771842e-06, "loss": 0.4229, "step": 3031 }, { "epoch": 0.12587506940876705, "grad_norm": 2.5089218616485596, "learning_rate": 9.760942828280803e-06, "loss": 0.4472, "step": 3032 }, { "epoch": 0.1259165849329784, "grad_norm": 2.664902925491333, "learning_rate": 9.760737385710546e-06, "loss": 0.5318, "step": 3033 }, { "epoch": 0.12595810045718972, "grad_norm": 2.823639154434204, "learning_rate": 9.760531857064786e-06, "loss": 0.4196, "step": 3034 }, { "epoch": 0.12599961598140105, "grad_norm": 3.1217448711395264, "learning_rate": 9.760326242347236e-06, "loss": 0.6552, "step": 3035 }, { "epoch": 0.12604113150561239, "grad_norm": 2.438354730606079, "learning_rate": 9.760120541561617e-06, "loss": 0.5325, "step": 3036 }, { "epoch": 0.12608264702982372, "grad_norm": 2.5730531215667725, "learning_rate": 9.759914754711646e-06, "loss": 0.6277, "step": 3037 }, { "epoch": 0.12612416255403505, "grad_norm": 2.732266902923584, "learning_rate": 9.759708881801044e-06, "loss": 0.549, "step": 3038 }, { "epoch": 0.12616567807824638, "grad_norm": 2.27001953125, "learning_rate": 9.759502922833532e-06, "loss": 0.3569, "step": 3039 }, { "epoch": 0.12620719360245772, "grad_norm": 3.1841652393341064, "learning_rate": 9.759296877812838e-06, "loss": 0.4702, "step": 3040 }, { "epoch": 0.12624870912666905, "grad_norm": 3.845393657684326, "learning_rate": 9.759090746742683e-06, "loss": 0.6228, "step": 3041 }, { "epoch": 0.12629022465088038, "grad_norm": 2.2237114906311035, "learning_rate": 9.758884529626797e-06, "loss": 0.5583, "step": 3042 }, { "epoch": 0.12633174017509172, "grad_norm": 2.768226385116577, "learning_rate": 9.758678226468905e-06, "loss": 0.5755, "step": 3043 }, { "epoch": 0.12637325569930305, "grad_norm": 2.7558674812316895, "learning_rate": 9.758471837272741e-06, "loss": 0.4626, "step": 3044 }, { "epoch": 0.12641477122351438, "grad_norm": 2.5739450454711914, "learning_rate": 9.758265362042035e-06, "loss": 0.6094, "step": 3045 }, { "epoch": 0.12645628674772572, "grad_norm": 2.439697504043579, "learning_rate": 9.758058800780517e-06, "loss": 0.4682, "step": 3046 }, { "epoch": 0.12649780227193705, "grad_norm": 2.4672670364379883, "learning_rate": 9.757852153491927e-06, "loss": 0.571, "step": 3047 }, { "epoch": 0.1265393177961484, "grad_norm": 2.245598077774048, "learning_rate": 9.757645420179998e-06, "loss": 0.4808, "step": 3048 }, { "epoch": 0.12658083332035974, "grad_norm": 2.625544786453247, "learning_rate": 9.757438600848467e-06, "loss": 0.5271, "step": 3049 }, { "epoch": 0.12662234884457108, "grad_norm": 2.780565023422241, "learning_rate": 9.757231695501077e-06, "loss": 0.5868, "step": 3050 }, { "epoch": 0.1266638643687824, "grad_norm": 2.902885675430298, "learning_rate": 9.757024704141566e-06, "loss": 0.5828, "step": 3051 }, { "epoch": 0.12670537989299374, "grad_norm": 2.5596187114715576, "learning_rate": 9.756817626773679e-06, "loss": 0.5979, "step": 3052 }, { "epoch": 0.12674689541720507, "grad_norm": 2.587960958480835, "learning_rate": 9.756610463401154e-06, "loss": 0.5242, "step": 3053 }, { "epoch": 0.1267884109414164, "grad_norm": 2.353271484375, "learning_rate": 9.756403214027743e-06, "loss": 0.511, "step": 3054 }, { "epoch": 0.12682992646562774, "grad_norm": 2.464365005493164, "learning_rate": 9.756195878657191e-06, "loss": 0.5154, "step": 3055 }, { "epoch": 0.12687144198983907, "grad_norm": 2.6408722400665283, "learning_rate": 9.755988457293249e-06, "loss": 0.4085, "step": 3056 }, { "epoch": 0.1269129575140504, "grad_norm": 2.48992919921875, "learning_rate": 9.755780949939662e-06, "loss": 0.5444, "step": 3057 }, { "epoch": 0.12695447303826174, "grad_norm": 2.5597128868103027, "learning_rate": 9.755573356600186e-06, "loss": 0.5523, "step": 3058 }, { "epoch": 0.12699598856247307, "grad_norm": 2.5978987216949463, "learning_rate": 9.755365677278572e-06, "loss": 0.5558, "step": 3059 }, { "epoch": 0.1270375040866844, "grad_norm": 2.462552070617676, "learning_rate": 9.755157911978575e-06, "loss": 0.5349, "step": 3060 }, { "epoch": 0.12707901961089574, "grad_norm": 2.5484142303466797, "learning_rate": 9.754950060703953e-06, "loss": 0.5137, "step": 3061 }, { "epoch": 0.12712053513510707, "grad_norm": 2.355090856552124, "learning_rate": 9.754742123458465e-06, "loss": 0.6009, "step": 3062 }, { "epoch": 0.12716205065931843, "grad_norm": 2.6545088291168213, "learning_rate": 9.754534100245867e-06, "loss": 0.5822, "step": 3063 }, { "epoch": 0.12720356618352976, "grad_norm": 2.648465633392334, "learning_rate": 9.754325991069923e-06, "loss": 0.4096, "step": 3064 }, { "epoch": 0.1272450817077411, "grad_norm": 2.838294267654419, "learning_rate": 9.754117795934397e-06, "loss": 0.5995, "step": 3065 }, { "epoch": 0.12728659723195243, "grad_norm": 2.2398335933685303, "learning_rate": 9.753909514843047e-06, "loss": 0.4611, "step": 3066 }, { "epoch": 0.12732811275616376, "grad_norm": 3.1344974040985107, "learning_rate": 9.753701147799645e-06, "loss": 0.5419, "step": 3067 }, { "epoch": 0.1273696282803751, "grad_norm": 2.052855968475342, "learning_rate": 9.753492694807956e-06, "loss": 0.5097, "step": 3068 }, { "epoch": 0.12741114380458643, "grad_norm": 2.790034770965576, "learning_rate": 9.753284155871748e-06, "loss": 0.5437, "step": 3069 }, { "epoch": 0.12745265932879776, "grad_norm": 2.3617289066314697, "learning_rate": 9.753075530994793e-06, "loss": 0.5881, "step": 3070 }, { "epoch": 0.1274941748530091, "grad_norm": 2.471203327178955, "learning_rate": 9.752866820180862e-06, "loss": 0.6348, "step": 3071 }, { "epoch": 0.12753569037722043, "grad_norm": 2.6003286838531494, "learning_rate": 9.75265802343373e-06, "loss": 0.4689, "step": 3072 }, { "epoch": 0.12757720590143176, "grad_norm": 2.55340838432312, "learning_rate": 9.75244914075717e-06, "loss": 0.508, "step": 3073 }, { "epoch": 0.1276187214256431, "grad_norm": 2.9336180686950684, "learning_rate": 9.752240172154961e-06, "loss": 0.4837, "step": 3074 }, { "epoch": 0.12766023694985443, "grad_norm": 2.9176905155181885, "learning_rate": 9.75203111763088e-06, "loss": 0.5625, "step": 3075 }, { "epoch": 0.12770175247406576, "grad_norm": 2.844251871109009, "learning_rate": 9.751821977188705e-06, "loss": 0.5628, "step": 3076 }, { "epoch": 0.1277432679982771, "grad_norm": 3.2745633125305176, "learning_rate": 9.75161275083222e-06, "loss": 0.5885, "step": 3077 }, { "epoch": 0.12778478352248845, "grad_norm": 2.5410327911376953, "learning_rate": 9.751403438565206e-06, "loss": 0.5041, "step": 3078 }, { "epoch": 0.1278262990466998, "grad_norm": 3.1562814712524414, "learning_rate": 9.75119404039145e-06, "loss": 0.6451, "step": 3079 }, { "epoch": 0.12786781457091112, "grad_norm": 2.445486068725586, "learning_rate": 9.750984556314736e-06, "loss": 0.6268, "step": 3080 }, { "epoch": 0.12790933009512245, "grad_norm": 2.3420603275299072, "learning_rate": 9.750774986338851e-06, "loss": 0.3631, "step": 3081 }, { "epoch": 0.1279508456193338, "grad_norm": 2.624824285507202, "learning_rate": 9.750565330467584e-06, "loss": 0.4518, "step": 3082 }, { "epoch": 0.12799236114354512, "grad_norm": 2.8741672039031982, "learning_rate": 9.750355588704728e-06, "loss": 0.5476, "step": 3083 }, { "epoch": 0.12803387666775645, "grad_norm": 3.0475711822509766, "learning_rate": 9.750145761054073e-06, "loss": 0.5226, "step": 3084 }, { "epoch": 0.12807539219196779, "grad_norm": 2.496137857437134, "learning_rate": 9.749935847519414e-06, "loss": 0.5304, "step": 3085 }, { "epoch": 0.12811690771617912, "grad_norm": 2.510075569152832, "learning_rate": 9.749725848104545e-06, "loss": 0.5121, "step": 3086 }, { "epoch": 0.12815842324039045, "grad_norm": 2.492396831512451, "learning_rate": 9.749515762813266e-06, "loss": 0.5966, "step": 3087 }, { "epoch": 0.12819993876460178, "grad_norm": 2.416588306427002, "learning_rate": 9.74930559164937e-06, "loss": 0.5315, "step": 3088 }, { "epoch": 0.12824145428881312, "grad_norm": 2.4297773838043213, "learning_rate": 9.749095334616663e-06, "loss": 0.5886, "step": 3089 }, { "epoch": 0.12828296981302445, "grad_norm": 2.6537070274353027, "learning_rate": 9.748884991718941e-06, "loss": 0.5373, "step": 3090 }, { "epoch": 0.12832448533723578, "grad_norm": 2.2067384719848633, "learning_rate": 9.74867456296001e-06, "loss": 0.3992, "step": 3091 }, { "epoch": 0.12836600086144712, "grad_norm": 2.9281134605407715, "learning_rate": 9.748464048343675e-06, "loss": 0.5415, "step": 3092 }, { "epoch": 0.12840751638565845, "grad_norm": 2.6172800064086914, "learning_rate": 9.74825344787374e-06, "loss": 0.5966, "step": 3093 }, { "epoch": 0.1284490319098698, "grad_norm": 2.650969982147217, "learning_rate": 9.748042761554018e-06, "loss": 0.5483, "step": 3094 }, { "epoch": 0.12849054743408114, "grad_norm": 2.226346969604492, "learning_rate": 9.74783198938831e-06, "loss": 0.5427, "step": 3095 }, { "epoch": 0.12853206295829248, "grad_norm": 2.3293631076812744, "learning_rate": 9.747621131380433e-06, "loss": 0.4919, "step": 3096 }, { "epoch": 0.1285735784825038, "grad_norm": 2.2444515228271484, "learning_rate": 9.747410187534197e-06, "loss": 0.4916, "step": 3097 }, { "epoch": 0.12861509400671514, "grad_norm": 2.572200059890747, "learning_rate": 9.747199157853417e-06, "loss": 0.5229, "step": 3098 }, { "epoch": 0.12865660953092647, "grad_norm": 3.0145363807678223, "learning_rate": 9.746988042341907e-06, "loss": 0.5161, "step": 3099 }, { "epoch": 0.1286981250551378, "grad_norm": 2.7032148838043213, "learning_rate": 9.746776841003486e-06, "loss": 0.5625, "step": 3100 }, { "epoch": 0.12873964057934914, "grad_norm": 2.8045034408569336, "learning_rate": 9.746565553841971e-06, "loss": 0.5298, "step": 3101 }, { "epoch": 0.12878115610356047, "grad_norm": 2.7163045406341553, "learning_rate": 9.746354180861183e-06, "loss": 0.5825, "step": 3102 }, { "epoch": 0.1288226716277718, "grad_norm": 3.3864479064941406, "learning_rate": 9.746142722064943e-06, "loss": 0.5402, "step": 3103 }, { "epoch": 0.12886418715198314, "grad_norm": 2.831432580947876, "learning_rate": 9.745931177457076e-06, "loss": 0.5447, "step": 3104 }, { "epoch": 0.12890570267619447, "grad_norm": 2.4672300815582275, "learning_rate": 9.745719547041403e-06, "loss": 0.478, "step": 3105 }, { "epoch": 0.1289472182004058, "grad_norm": 2.7878754138946533, "learning_rate": 9.745507830821754e-06, "loss": 0.5024, "step": 3106 }, { "epoch": 0.12898873372461714, "grad_norm": 2.9904189109802246, "learning_rate": 9.745296028801955e-06, "loss": 0.4485, "step": 3107 }, { "epoch": 0.12903024924882847, "grad_norm": 2.5306339263916016, "learning_rate": 9.745084140985836e-06, "loss": 0.4332, "step": 3108 }, { "epoch": 0.12907176477303983, "grad_norm": 2.243941068649292, "learning_rate": 9.744872167377228e-06, "loss": 0.498, "step": 3109 }, { "epoch": 0.12911328029725117, "grad_norm": 2.6127943992614746, "learning_rate": 9.744660107979966e-06, "loss": 0.5604, "step": 3110 }, { "epoch": 0.1291547958214625, "grad_norm": 2.870715379714966, "learning_rate": 9.74444796279788e-06, "loss": 0.3304, "step": 3111 }, { "epoch": 0.12919631134567383, "grad_norm": 2.7423903942108154, "learning_rate": 9.744235731834806e-06, "loss": 0.4637, "step": 3112 }, { "epoch": 0.12923782686988516, "grad_norm": 3.1544413566589355, "learning_rate": 9.744023415094584e-06, "loss": 0.503, "step": 3113 }, { "epoch": 0.1292793423940965, "grad_norm": 2.9235401153564453, "learning_rate": 9.74381101258105e-06, "loss": 0.7179, "step": 3114 }, { "epoch": 0.12932085791830783, "grad_norm": 3.2559573650360107, "learning_rate": 9.743598524298048e-06, "loss": 0.4726, "step": 3115 }, { "epoch": 0.12936237344251916, "grad_norm": 2.495084762573242, "learning_rate": 9.743385950249415e-06, "loss": 0.5787, "step": 3116 }, { "epoch": 0.1294038889667305, "grad_norm": 2.485349178314209, "learning_rate": 9.743173290438998e-06, "loss": 0.6412, "step": 3117 }, { "epoch": 0.12944540449094183, "grad_norm": 2.4039218425750732, "learning_rate": 9.74296054487064e-06, "loss": 0.6075, "step": 3118 }, { "epoch": 0.12948692001515316, "grad_norm": 2.4393062591552734, "learning_rate": 9.742747713548191e-06, "loss": 0.5373, "step": 3119 }, { "epoch": 0.1295284355393645, "grad_norm": 2.506046772003174, "learning_rate": 9.742534796475494e-06, "loss": 0.5218, "step": 3120 }, { "epoch": 0.12956995106357583, "grad_norm": 2.466336488723755, "learning_rate": 9.7423217936564e-06, "loss": 0.6969, "step": 3121 }, { "epoch": 0.12961146658778716, "grad_norm": 2.6519529819488525, "learning_rate": 9.742108705094764e-06, "loss": 0.4744, "step": 3122 }, { "epoch": 0.1296529821119985, "grad_norm": 2.5427074432373047, "learning_rate": 9.741895530794433e-06, "loss": 0.598, "step": 3123 }, { "epoch": 0.12969449763620983, "grad_norm": 3.0285582542419434, "learning_rate": 9.741682270759265e-06, "loss": 0.5557, "step": 3124 }, { "epoch": 0.1297360131604212, "grad_norm": 2.338279962539673, "learning_rate": 9.741468924993115e-06, "loss": 0.4737, "step": 3125 }, { "epoch": 0.12977752868463252, "grad_norm": 2.359020948410034, "learning_rate": 9.74125549349984e-06, "loss": 0.499, "step": 3126 }, { "epoch": 0.12981904420884385, "grad_norm": 2.496058940887451, "learning_rate": 9.741041976283298e-06, "loss": 0.5608, "step": 3127 }, { "epoch": 0.1298605597330552, "grad_norm": 4.542051792144775, "learning_rate": 9.740828373347352e-06, "loss": 0.3929, "step": 3128 }, { "epoch": 0.12990207525726652, "grad_norm": 3.2311127185821533, "learning_rate": 9.740614684695864e-06, "loss": 0.5486, "step": 3129 }, { "epoch": 0.12994359078147785, "grad_norm": 2.873392105102539, "learning_rate": 9.740400910332694e-06, "loss": 0.5685, "step": 3130 }, { "epoch": 0.12998510630568919, "grad_norm": 2.7479114532470703, "learning_rate": 9.740187050261708e-06, "loss": 0.6069, "step": 3131 }, { "epoch": 0.13002662182990052, "grad_norm": 2.4775588512420654, "learning_rate": 9.739973104486777e-06, "loss": 0.4934, "step": 3132 }, { "epoch": 0.13006813735411185, "grad_norm": 2.517594575881958, "learning_rate": 9.739759073011764e-06, "loss": 0.6141, "step": 3133 }, { "epoch": 0.13010965287832318, "grad_norm": 2.9930973052978516, "learning_rate": 9.739544955840542e-06, "loss": 0.5614, "step": 3134 }, { "epoch": 0.13015116840253452, "grad_norm": 3.6926791667938232, "learning_rate": 9.739330752976981e-06, "loss": 0.544, "step": 3135 }, { "epoch": 0.13019268392674585, "grad_norm": 2.4102070331573486, "learning_rate": 9.739116464424953e-06, "loss": 0.5734, "step": 3136 }, { "epoch": 0.13023419945095718, "grad_norm": 2.3792636394500732, "learning_rate": 9.738902090188334e-06, "loss": 0.4694, "step": 3137 }, { "epoch": 0.13027571497516852, "grad_norm": 2.7563111782073975, "learning_rate": 9.738687630270999e-06, "loss": 0.4522, "step": 3138 }, { "epoch": 0.13031723049937985, "grad_norm": 2.5703117847442627, "learning_rate": 9.738473084676824e-06, "loss": 0.4878, "step": 3139 }, { "epoch": 0.1303587460235912, "grad_norm": 2.4511830806732178, "learning_rate": 9.738258453409694e-06, "loss": 0.4787, "step": 3140 }, { "epoch": 0.13040026154780254, "grad_norm": 2.5868911743164062, "learning_rate": 9.738043736473482e-06, "loss": 0.5191, "step": 3141 }, { "epoch": 0.13044177707201388, "grad_norm": 2.3844785690307617, "learning_rate": 9.737828933872076e-06, "loss": 0.5244, "step": 3142 }, { "epoch": 0.1304832925962252, "grad_norm": 2.776468276977539, "learning_rate": 9.737614045609356e-06, "loss": 0.5911, "step": 3143 }, { "epoch": 0.13052480812043654, "grad_norm": 2.677872657775879, "learning_rate": 9.73739907168921e-06, "loss": 0.5492, "step": 3144 }, { "epoch": 0.13056632364464787, "grad_norm": 2.6311604976654053, "learning_rate": 9.73718401211552e-06, "loss": 0.5159, "step": 3145 }, { "epoch": 0.1306078391688592, "grad_norm": 2.076565742492676, "learning_rate": 9.73696886689218e-06, "loss": 0.4848, "step": 3146 }, { "epoch": 0.13064935469307054, "grad_norm": 2.3314905166625977, "learning_rate": 9.736753636023077e-06, "loss": 0.4364, "step": 3147 }, { "epoch": 0.13069087021728187, "grad_norm": 2.600147247314453, "learning_rate": 9.736538319512103e-06, "loss": 0.5571, "step": 3148 }, { "epoch": 0.1307323857414932, "grad_norm": 2.9011032581329346, "learning_rate": 9.736322917363152e-06, "loss": 0.455, "step": 3149 }, { "epoch": 0.13077390126570454, "grad_norm": 2.358698606491089, "learning_rate": 9.736107429580116e-06, "loss": 0.5572, "step": 3150 }, { "epoch": 0.13081541678991587, "grad_norm": 2.3001837730407715, "learning_rate": 9.735891856166893e-06, "loss": 0.5391, "step": 3151 }, { "epoch": 0.1308569323141272, "grad_norm": 2.8121113777160645, "learning_rate": 9.735676197127382e-06, "loss": 0.4108, "step": 3152 }, { "epoch": 0.13089844783833854, "grad_norm": 2.658658742904663, "learning_rate": 9.735460452465477e-06, "loss": 0.433, "step": 3153 }, { "epoch": 0.13093996336254987, "grad_norm": 2.5715136528015137, "learning_rate": 9.735244622185085e-06, "loss": 0.4877, "step": 3154 }, { "epoch": 0.1309814788867612, "grad_norm": 2.6969237327575684, "learning_rate": 9.735028706290104e-06, "loss": 0.6962, "step": 3155 }, { "epoch": 0.13102299441097257, "grad_norm": 2.891505002975464, "learning_rate": 9.73481270478444e-06, "loss": 0.5912, "step": 3156 }, { "epoch": 0.1310645099351839, "grad_norm": 2.6540942192077637, "learning_rate": 9.734596617671996e-06, "loss": 0.6841, "step": 3157 }, { "epoch": 0.13110602545939523, "grad_norm": 2.4634687900543213, "learning_rate": 9.734380444956681e-06, "loss": 0.4775, "step": 3158 }, { "epoch": 0.13114754098360656, "grad_norm": 2.7168161869049072, "learning_rate": 9.734164186642404e-06, "loss": 0.6765, "step": 3159 }, { "epoch": 0.1311890565078179, "grad_norm": 2.3322980403900146, "learning_rate": 9.733947842733073e-06, "loss": 0.5372, "step": 3160 }, { "epoch": 0.13123057203202923, "grad_norm": 2.5145208835601807, "learning_rate": 9.733731413232602e-06, "loss": 0.6267, "step": 3161 }, { "epoch": 0.13127208755624056, "grad_norm": 2.1717722415924072, "learning_rate": 9.733514898144902e-06, "loss": 0.4949, "step": 3162 }, { "epoch": 0.1313136030804519, "grad_norm": 3.039585590362549, "learning_rate": 9.733298297473888e-06, "loss": 0.38, "step": 3163 }, { "epoch": 0.13135511860466323, "grad_norm": 2.65727162361145, "learning_rate": 9.733081611223476e-06, "loss": 0.495, "step": 3164 }, { "epoch": 0.13139663412887456, "grad_norm": 2.685849666595459, "learning_rate": 9.732864839397585e-06, "loss": 0.6167, "step": 3165 }, { "epoch": 0.1314381496530859, "grad_norm": 2.4591522216796875, "learning_rate": 9.732647982000133e-06, "loss": 0.6369, "step": 3166 }, { "epoch": 0.13147966517729723, "grad_norm": 2.30633282661438, "learning_rate": 9.732431039035042e-06, "loss": 0.5365, "step": 3167 }, { "epoch": 0.13152118070150856, "grad_norm": 2.646374464035034, "learning_rate": 9.732214010506234e-06, "loss": 0.5536, "step": 3168 }, { "epoch": 0.1315626962257199, "grad_norm": 2.7518043518066406, "learning_rate": 9.731996896417633e-06, "loss": 0.7278, "step": 3169 }, { "epoch": 0.13160421174993123, "grad_norm": 3.273581027984619, "learning_rate": 9.731779696773163e-06, "loss": 0.417, "step": 3170 }, { "epoch": 0.1316457272741426, "grad_norm": 2.2879395484924316, "learning_rate": 9.731562411576751e-06, "loss": 0.5802, "step": 3171 }, { "epoch": 0.13168724279835392, "grad_norm": 2.657001256942749, "learning_rate": 9.73134504083233e-06, "loss": 0.6016, "step": 3172 }, { "epoch": 0.13172875832256525, "grad_norm": 2.718661308288574, "learning_rate": 9.731127584543825e-06, "loss": 0.4929, "step": 3173 }, { "epoch": 0.1317702738467766, "grad_norm": 2.5110692977905273, "learning_rate": 9.73091004271517e-06, "loss": 0.4664, "step": 3174 }, { "epoch": 0.13181178937098792, "grad_norm": 2.45967173576355, "learning_rate": 9.730692415350298e-06, "loss": 0.5321, "step": 3175 }, { "epoch": 0.13185330489519925, "grad_norm": 2.799670934677124, "learning_rate": 9.730474702453142e-06, "loss": 0.5867, "step": 3176 }, { "epoch": 0.13189482041941059, "grad_norm": 2.734619140625, "learning_rate": 9.730256904027641e-06, "loss": 0.4013, "step": 3177 }, { "epoch": 0.13193633594362192, "grad_norm": 2.291428804397583, "learning_rate": 9.730039020077734e-06, "loss": 0.5506, "step": 3178 }, { "epoch": 0.13197785146783325, "grad_norm": 2.6810996532440186, "learning_rate": 9.729821050607356e-06, "loss": 0.5311, "step": 3179 }, { "epoch": 0.13201936699204458, "grad_norm": 2.994168281555176, "learning_rate": 9.729602995620449e-06, "loss": 0.5651, "step": 3180 }, { "epoch": 0.13206088251625592, "grad_norm": 2.7720108032226562, "learning_rate": 9.72938485512096e-06, "loss": 0.4533, "step": 3181 }, { "epoch": 0.13210239804046725, "grad_norm": 3.329864978790283, "learning_rate": 9.729166629112827e-06, "loss": 0.6211, "step": 3182 }, { "epoch": 0.13214391356467858, "grad_norm": 2.9738385677337646, "learning_rate": 9.728948317599998e-06, "loss": 0.58, "step": 3183 }, { "epoch": 0.13218542908888992, "grad_norm": 2.535737991333008, "learning_rate": 9.728729920586422e-06, "loss": 0.4602, "step": 3184 }, { "epoch": 0.13222694461310125, "grad_norm": 2.64463472366333, "learning_rate": 9.728511438076045e-06, "loss": 0.6196, "step": 3185 }, { "epoch": 0.1322684601373126, "grad_norm": 2.511470079421997, "learning_rate": 9.728292870072819e-06, "loss": 0.423, "step": 3186 }, { "epoch": 0.13230997566152394, "grad_norm": 2.5801823139190674, "learning_rate": 9.728074216580693e-06, "loss": 0.4789, "step": 3187 }, { "epoch": 0.13235149118573528, "grad_norm": 2.4091522693634033, "learning_rate": 9.727855477603626e-06, "loss": 0.4137, "step": 3188 }, { "epoch": 0.1323930067099466, "grad_norm": 2.5557098388671875, "learning_rate": 9.727636653145567e-06, "loss": 0.425, "step": 3189 }, { "epoch": 0.13243452223415794, "grad_norm": 2.602853536605835, "learning_rate": 9.727417743210475e-06, "loss": 0.4617, "step": 3190 }, { "epoch": 0.13247603775836927, "grad_norm": 2.600045919418335, "learning_rate": 9.727198747802308e-06, "loss": 0.4847, "step": 3191 }, { "epoch": 0.1325175532825806, "grad_norm": 3.2918336391448975, "learning_rate": 9.726979666925024e-06, "loss": 0.5462, "step": 3192 }, { "epoch": 0.13255906880679194, "grad_norm": 2.475818634033203, "learning_rate": 9.726760500582586e-06, "loss": 0.3471, "step": 3193 }, { "epoch": 0.13260058433100327, "grad_norm": 2.4314515590667725, "learning_rate": 9.726541248778955e-06, "loss": 0.4756, "step": 3194 }, { "epoch": 0.1326420998552146, "grad_norm": 2.2468128204345703, "learning_rate": 9.726321911518097e-06, "loss": 0.543, "step": 3195 }, { "epoch": 0.13268361537942594, "grad_norm": 2.7038772106170654, "learning_rate": 9.726102488803976e-06, "loss": 0.5391, "step": 3196 }, { "epoch": 0.13272513090363727, "grad_norm": 3.9523346424102783, "learning_rate": 9.72588298064056e-06, "loss": 0.5464, "step": 3197 }, { "epoch": 0.1327666464278486, "grad_norm": 2.983773946762085, "learning_rate": 9.725663387031818e-06, "loss": 0.4437, "step": 3198 }, { "epoch": 0.13280816195205994, "grad_norm": 2.9452762603759766, "learning_rate": 9.72544370798172e-06, "loss": 0.5041, "step": 3199 }, { "epoch": 0.13284967747627127, "grad_norm": 2.6288654804229736, "learning_rate": 9.725223943494238e-06, "loss": 0.5158, "step": 3200 }, { "epoch": 0.1328911930004826, "grad_norm": 2.3378255367279053, "learning_rate": 9.725004093573343e-06, "loss": 0.5089, "step": 3201 }, { "epoch": 0.13293270852469397, "grad_norm": 2.53106689453125, "learning_rate": 9.724784158223014e-06, "loss": 0.579, "step": 3202 }, { "epoch": 0.1329742240489053, "grad_norm": 3.2976484298706055, "learning_rate": 9.724564137447225e-06, "loss": 0.5664, "step": 3203 }, { "epoch": 0.13301573957311663, "grad_norm": 2.918416976928711, "learning_rate": 9.724344031249956e-06, "loss": 0.514, "step": 3204 }, { "epoch": 0.13305725509732796, "grad_norm": 2.3878884315490723, "learning_rate": 9.724123839635182e-06, "loss": 0.5879, "step": 3205 }, { "epoch": 0.1330987706215393, "grad_norm": 2.3508334159851074, "learning_rate": 9.723903562606888e-06, "loss": 0.4115, "step": 3206 }, { "epoch": 0.13314028614575063, "grad_norm": 2.4180872440338135, "learning_rate": 9.723683200169059e-06, "loss": 0.414, "step": 3207 }, { "epoch": 0.13318180166996196, "grad_norm": 2.5490338802337646, "learning_rate": 9.723462752325674e-06, "loss": 0.5543, "step": 3208 }, { "epoch": 0.1332233171941733, "grad_norm": 2.4986824989318848, "learning_rate": 9.72324221908072e-06, "loss": 0.582, "step": 3209 }, { "epoch": 0.13326483271838463, "grad_norm": 2.7578442096710205, "learning_rate": 9.723021600438187e-06, "loss": 0.5424, "step": 3210 }, { "epoch": 0.13330634824259596, "grad_norm": 2.1018693447113037, "learning_rate": 9.722800896402062e-06, "loss": 0.4916, "step": 3211 }, { "epoch": 0.1333478637668073, "grad_norm": 2.1746134757995605, "learning_rate": 9.722580106976335e-06, "loss": 0.4606, "step": 3212 }, { "epoch": 0.13338937929101863, "grad_norm": 2.742249011993408, "learning_rate": 9.722359232165e-06, "loss": 0.4559, "step": 3213 }, { "epoch": 0.13343089481522996, "grad_norm": 2.7287535667419434, "learning_rate": 9.722138271972047e-06, "loss": 0.5547, "step": 3214 }, { "epoch": 0.1334724103394413, "grad_norm": 2.735334873199463, "learning_rate": 9.721917226401474e-06, "loss": 0.5449, "step": 3215 }, { "epoch": 0.13351392586365263, "grad_norm": 2.89475154876709, "learning_rate": 9.721696095457275e-06, "loss": 0.6174, "step": 3216 }, { "epoch": 0.133555441387864, "grad_norm": 2.935866355895996, "learning_rate": 9.721474879143452e-06, "loss": 0.3907, "step": 3217 }, { "epoch": 0.13359695691207532, "grad_norm": 2.630899667739868, "learning_rate": 9.721253577464001e-06, "loss": 0.5239, "step": 3218 }, { "epoch": 0.13363847243628665, "grad_norm": 2.63905668258667, "learning_rate": 9.721032190422925e-06, "loss": 0.4505, "step": 3219 }, { "epoch": 0.133679987960498, "grad_norm": 3.208442211151123, "learning_rate": 9.720810718024226e-06, "loss": 0.5995, "step": 3220 }, { "epoch": 0.13372150348470932, "grad_norm": 2.102412700653076, "learning_rate": 9.72058916027191e-06, "loss": 0.4335, "step": 3221 }, { "epoch": 0.13376301900892065, "grad_norm": 2.926417112350464, "learning_rate": 9.720367517169978e-06, "loss": 0.6903, "step": 3222 }, { "epoch": 0.13380453453313199, "grad_norm": 2.7302284240722656, "learning_rate": 9.720145788722444e-06, "loss": 0.718, "step": 3223 }, { "epoch": 0.13384605005734332, "grad_norm": 3.1168816089630127, "learning_rate": 9.719923974933312e-06, "loss": 0.6809, "step": 3224 }, { "epoch": 0.13388756558155465, "grad_norm": 3.009335994720459, "learning_rate": 9.719702075806594e-06, "loss": 0.5701, "step": 3225 }, { "epoch": 0.13392908110576598, "grad_norm": 2.867727518081665, "learning_rate": 9.719480091346302e-06, "loss": 0.6732, "step": 3226 }, { "epoch": 0.13397059662997732, "grad_norm": 2.910306930541992, "learning_rate": 9.71925802155645e-06, "loss": 0.5765, "step": 3227 }, { "epoch": 0.13401211215418865, "grad_norm": 2.4227077960968018, "learning_rate": 9.719035866441053e-06, "loss": 0.656, "step": 3228 }, { "epoch": 0.13405362767839998, "grad_norm": 2.5136826038360596, "learning_rate": 9.718813626004128e-06, "loss": 0.5279, "step": 3229 }, { "epoch": 0.13409514320261132, "grad_norm": 3.087655782699585, "learning_rate": 9.71859130024969e-06, "loss": 0.5643, "step": 3230 }, { "epoch": 0.13413665872682265, "grad_norm": 2.6136474609375, "learning_rate": 9.718368889181763e-06, "loss": 0.4325, "step": 3231 }, { "epoch": 0.13417817425103398, "grad_norm": 2.6648945808410645, "learning_rate": 9.718146392804369e-06, "loss": 0.5801, "step": 3232 }, { "epoch": 0.13421968977524534, "grad_norm": 3.3038148880004883, "learning_rate": 9.717923811121525e-06, "loss": 0.6198, "step": 3233 }, { "epoch": 0.13426120529945668, "grad_norm": 2.694098472595215, "learning_rate": 9.71770114413726e-06, "loss": 0.569, "step": 3234 }, { "epoch": 0.134302720823668, "grad_norm": 2.872711420059204, "learning_rate": 9.717478391855596e-06, "loss": 0.4465, "step": 3235 }, { "epoch": 0.13434423634787934, "grad_norm": 2.4708497524261475, "learning_rate": 9.717255554280567e-06, "loss": 0.4384, "step": 3236 }, { "epoch": 0.13438575187209068, "grad_norm": 2.445598840713501, "learning_rate": 9.717032631416194e-06, "loss": 0.5854, "step": 3237 }, { "epoch": 0.134427267396302, "grad_norm": 2.6891584396362305, "learning_rate": 9.716809623266514e-06, "loss": 0.5462, "step": 3238 }, { "epoch": 0.13446878292051334, "grad_norm": 2.402308702468872, "learning_rate": 9.716586529835557e-06, "loss": 0.5149, "step": 3239 }, { "epoch": 0.13451029844472467, "grad_norm": 2.8697352409362793, "learning_rate": 9.716363351127354e-06, "loss": 0.4595, "step": 3240 }, { "epoch": 0.134551813968936, "grad_norm": 2.536315679550171, "learning_rate": 9.716140087145942e-06, "loss": 0.5684, "step": 3241 }, { "epoch": 0.13459332949314734, "grad_norm": 3.008466958999634, "learning_rate": 9.715916737895361e-06, "loss": 0.6376, "step": 3242 }, { "epoch": 0.13463484501735867, "grad_norm": 2.9385159015655518, "learning_rate": 9.715693303379643e-06, "loss": 0.5763, "step": 3243 }, { "epoch": 0.13467636054157, "grad_norm": 3.4082190990448, "learning_rate": 9.715469783602831e-06, "loss": 0.5749, "step": 3244 }, { "epoch": 0.13471787606578134, "grad_norm": 2.5306713581085205, "learning_rate": 9.715246178568967e-06, "loss": 0.5092, "step": 3245 }, { "epoch": 0.13475939158999267, "grad_norm": 2.4387214183807373, "learning_rate": 9.715022488282093e-06, "loss": 0.531, "step": 3246 }, { "epoch": 0.134800907114204, "grad_norm": 2.3101322650909424, "learning_rate": 9.714798712746255e-06, "loss": 0.5543, "step": 3247 }, { "epoch": 0.13484242263841537, "grad_norm": 2.7960565090179443, "learning_rate": 9.714574851965495e-06, "loss": 0.5704, "step": 3248 }, { "epoch": 0.1348839381626267, "grad_norm": 3.1812756061553955, "learning_rate": 9.714350905943863e-06, "loss": 0.5127, "step": 3249 }, { "epoch": 0.13492545368683803, "grad_norm": 2.8640010356903076, "learning_rate": 9.71412687468541e-06, "loss": 0.6111, "step": 3250 }, { "epoch": 0.13496696921104936, "grad_norm": 2.7077057361602783, "learning_rate": 9.713902758194183e-06, "loss": 0.4407, "step": 3251 }, { "epoch": 0.1350084847352607, "grad_norm": 3.238816499710083, "learning_rate": 9.713678556474234e-06, "loss": 0.5081, "step": 3252 }, { "epoch": 0.13505000025947203, "grad_norm": 2.4137394428253174, "learning_rate": 9.71345426952962e-06, "loss": 0.4055, "step": 3253 }, { "epoch": 0.13509151578368336, "grad_norm": 2.261890172958374, "learning_rate": 9.713229897364394e-06, "loss": 0.5132, "step": 3254 }, { "epoch": 0.1351330313078947, "grad_norm": 2.4720301628112793, "learning_rate": 9.713005439982612e-06, "loss": 0.5349, "step": 3255 }, { "epoch": 0.13517454683210603, "grad_norm": 2.5171141624450684, "learning_rate": 9.712780897388332e-06, "loss": 0.4345, "step": 3256 }, { "epoch": 0.13521606235631736, "grad_norm": 2.520847797393799, "learning_rate": 9.712556269585619e-06, "loss": 0.5092, "step": 3257 }, { "epoch": 0.1352575778805287, "grad_norm": 2.215106964111328, "learning_rate": 9.712331556578528e-06, "loss": 0.467, "step": 3258 }, { "epoch": 0.13529909340474003, "grad_norm": 2.641777276992798, "learning_rate": 9.712106758371123e-06, "loss": 0.5732, "step": 3259 }, { "epoch": 0.13534060892895136, "grad_norm": 2.7567856311798096, "learning_rate": 9.711881874967471e-06, "loss": 0.6747, "step": 3260 }, { "epoch": 0.1353821244531627, "grad_norm": 2.593378782272339, "learning_rate": 9.711656906371636e-06, "loss": 0.3821, "step": 3261 }, { "epoch": 0.13542363997737403, "grad_norm": 2.778155565261841, "learning_rate": 9.711431852587687e-06, "loss": 0.5594, "step": 3262 }, { "epoch": 0.13546515550158536, "grad_norm": 2.715588092803955, "learning_rate": 9.711206713619692e-06, "loss": 0.4996, "step": 3263 }, { "epoch": 0.13550667102579672, "grad_norm": 2.3629519939422607, "learning_rate": 9.710981489471721e-06, "loss": 0.4015, "step": 3264 }, { "epoch": 0.13554818655000805, "grad_norm": 2.6885342597961426, "learning_rate": 9.710756180147846e-06, "loss": 0.5059, "step": 3265 }, { "epoch": 0.1355897020742194, "grad_norm": 2.7825188636779785, "learning_rate": 9.710530785652144e-06, "loss": 0.6, "step": 3266 }, { "epoch": 0.13563121759843072, "grad_norm": 3.4666402339935303, "learning_rate": 9.710305305988685e-06, "loss": 0.5348, "step": 3267 }, { "epoch": 0.13567273312264205, "grad_norm": 2.719933271408081, "learning_rate": 9.710079741161552e-06, "loss": 0.4983, "step": 3268 }, { "epoch": 0.13571424864685339, "grad_norm": 2.3177199363708496, "learning_rate": 9.709854091174817e-06, "loss": 0.4991, "step": 3269 }, { "epoch": 0.13575576417106472, "grad_norm": 2.722604751586914, "learning_rate": 9.709628356032562e-06, "loss": 0.6021, "step": 3270 }, { "epoch": 0.13579727969527605, "grad_norm": 2.7340919971466064, "learning_rate": 9.70940253573887e-06, "loss": 0.6228, "step": 3271 }, { "epoch": 0.13583879521948738, "grad_norm": 2.416404962539673, "learning_rate": 9.709176630297822e-06, "loss": 0.5034, "step": 3272 }, { "epoch": 0.13588031074369872, "grad_norm": 2.803863525390625, "learning_rate": 9.708950639713503e-06, "loss": 0.5517, "step": 3273 }, { "epoch": 0.13592182626791005, "grad_norm": 2.7531166076660156, "learning_rate": 9.708724563990001e-06, "loss": 0.42, "step": 3274 }, { "epoch": 0.13596334179212138, "grad_norm": 2.632817506790161, "learning_rate": 9.7084984031314e-06, "loss": 0.6173, "step": 3275 }, { "epoch": 0.13600485731633272, "grad_norm": 2.6047842502593994, "learning_rate": 9.708272157141791e-06, "loss": 0.6101, "step": 3276 }, { "epoch": 0.13604637284054405, "grad_norm": 3.0076632499694824, "learning_rate": 9.708045826025265e-06, "loss": 0.6341, "step": 3277 }, { "epoch": 0.13608788836475538, "grad_norm": 2.3410890102386475, "learning_rate": 9.707819409785914e-06, "loss": 0.3875, "step": 3278 }, { "epoch": 0.13612940388896674, "grad_norm": 3.1857001781463623, "learning_rate": 9.70759290842783e-06, "loss": 0.5237, "step": 3279 }, { "epoch": 0.13617091941317808, "grad_norm": 3.068281888961792, "learning_rate": 9.707366321955109e-06, "loss": 0.6389, "step": 3280 }, { "epoch": 0.1362124349373894, "grad_norm": 2.5932369232177734, "learning_rate": 9.70713965037185e-06, "loss": 0.5966, "step": 3281 }, { "epoch": 0.13625395046160074, "grad_norm": 2.6238863468170166, "learning_rate": 9.706912893682148e-06, "loss": 0.5183, "step": 3282 }, { "epoch": 0.13629546598581208, "grad_norm": 2.7205138206481934, "learning_rate": 9.706686051890104e-06, "loss": 0.5889, "step": 3283 }, { "epoch": 0.1363369815100234, "grad_norm": 2.641528367996216, "learning_rate": 9.70645912499982e-06, "loss": 0.489, "step": 3284 }, { "epoch": 0.13637849703423474, "grad_norm": 2.753140926361084, "learning_rate": 9.706232113015398e-06, "loss": 0.6897, "step": 3285 }, { "epoch": 0.13642001255844607, "grad_norm": 2.592949390411377, "learning_rate": 9.706005015940942e-06, "loss": 0.5428, "step": 3286 }, { "epoch": 0.1364615280826574, "grad_norm": 2.3029980659484863, "learning_rate": 9.705777833780561e-06, "loss": 0.5633, "step": 3287 }, { "epoch": 0.13650304360686874, "grad_norm": 3.049668073654175, "learning_rate": 9.70555056653836e-06, "loss": 0.6586, "step": 3288 }, { "epoch": 0.13654455913108007, "grad_norm": 3.1647212505340576, "learning_rate": 9.705323214218447e-06, "loss": 0.6324, "step": 3289 }, { "epoch": 0.1365860746552914, "grad_norm": 2.681079387664795, "learning_rate": 9.705095776824935e-06, "loss": 0.5074, "step": 3290 }, { "epoch": 0.13662759017950274, "grad_norm": 2.651179790496826, "learning_rate": 9.704868254361936e-06, "loss": 0.4193, "step": 3291 }, { "epoch": 0.13666910570371407, "grad_norm": 3.033473014831543, "learning_rate": 9.704640646833562e-06, "loss": 0.588, "step": 3292 }, { "epoch": 0.1367106212279254, "grad_norm": 2.7225825786590576, "learning_rate": 9.70441295424393e-06, "loss": 0.6081, "step": 3293 }, { "epoch": 0.13675213675213677, "grad_norm": 2.7833380699157715, "learning_rate": 9.704185176597154e-06, "loss": 0.4365, "step": 3294 }, { "epoch": 0.1367936522763481, "grad_norm": 2.9262330532073975, "learning_rate": 9.703957313897357e-06, "loss": 0.6114, "step": 3295 }, { "epoch": 0.13683516780055943, "grad_norm": 2.606473445892334, "learning_rate": 9.703729366148653e-06, "loss": 0.4966, "step": 3296 }, { "epoch": 0.13687668332477076, "grad_norm": 2.6072018146514893, "learning_rate": 9.703501333355167e-06, "loss": 0.5735, "step": 3297 }, { "epoch": 0.1369181988489821, "grad_norm": 2.6893558502197266, "learning_rate": 9.703273215521022e-06, "loss": 0.5327, "step": 3298 }, { "epoch": 0.13695971437319343, "grad_norm": 2.5360045433044434, "learning_rate": 9.70304501265034e-06, "loss": 0.5533, "step": 3299 }, { "epoch": 0.13700122989740476, "grad_norm": 2.5910260677337646, "learning_rate": 9.70281672474725e-06, "loss": 0.4751, "step": 3300 }, { "epoch": 0.1370427454216161, "grad_norm": 2.2497012615203857, "learning_rate": 9.702588351815878e-06, "loss": 0.5147, "step": 3301 }, { "epoch": 0.13708426094582743, "grad_norm": 2.7645888328552246, "learning_rate": 9.702359893860354e-06, "loss": 0.5765, "step": 3302 }, { "epoch": 0.13712577647003876, "grad_norm": 2.7108161449432373, "learning_rate": 9.702131350884807e-06, "loss": 0.4637, "step": 3303 }, { "epoch": 0.1371672919942501, "grad_norm": 2.9872188568115234, "learning_rate": 9.70190272289337e-06, "loss": 0.6204, "step": 3304 }, { "epoch": 0.13720880751846143, "grad_norm": 2.0878279209136963, "learning_rate": 9.701674009890176e-06, "loss": 0.619, "step": 3305 }, { "epoch": 0.13725032304267276, "grad_norm": 2.431615114212036, "learning_rate": 9.70144521187936e-06, "loss": 0.4797, "step": 3306 }, { "epoch": 0.1372918385668841, "grad_norm": 2.378502130508423, "learning_rate": 9.701216328865062e-06, "loss": 0.5081, "step": 3307 }, { "epoch": 0.13733335409109543, "grad_norm": 2.638948917388916, "learning_rate": 9.700987360851414e-06, "loss": 0.5965, "step": 3308 }, { "epoch": 0.13737486961530676, "grad_norm": 2.499162197113037, "learning_rate": 9.700758307842563e-06, "loss": 0.52, "step": 3309 }, { "epoch": 0.13741638513951812, "grad_norm": 2.2220823764801025, "learning_rate": 9.700529169842646e-06, "loss": 0.3951, "step": 3310 }, { "epoch": 0.13745790066372945, "grad_norm": 2.88323712348938, "learning_rate": 9.700299946855807e-06, "loss": 0.5441, "step": 3311 }, { "epoch": 0.1374994161879408, "grad_norm": 3.239222526550293, "learning_rate": 9.70007063888619e-06, "loss": 0.5169, "step": 3312 }, { "epoch": 0.13754093171215212, "grad_norm": 2.7981724739074707, "learning_rate": 9.69984124593794e-06, "loss": 0.4724, "step": 3313 }, { "epoch": 0.13758244723636345, "grad_norm": 2.9780895709991455, "learning_rate": 9.699611768015207e-06, "loss": 0.5836, "step": 3314 }, { "epoch": 0.1376239627605748, "grad_norm": 2.8784115314483643, "learning_rate": 9.699382205122138e-06, "loss": 0.7041, "step": 3315 }, { "epoch": 0.13766547828478612, "grad_norm": 2.7492430210113525, "learning_rate": 9.699152557262883e-06, "loss": 0.5959, "step": 3316 }, { "epoch": 0.13770699380899745, "grad_norm": 2.58441162109375, "learning_rate": 9.698922824441597e-06, "loss": 0.5953, "step": 3317 }, { "epoch": 0.13774850933320879, "grad_norm": 2.335130453109741, "learning_rate": 9.698693006662433e-06, "loss": 0.3637, "step": 3318 }, { "epoch": 0.13779002485742012, "grad_norm": 2.1960082054138184, "learning_rate": 9.698463103929542e-06, "loss": 0.4992, "step": 3319 }, { "epoch": 0.13783154038163145, "grad_norm": 2.4311728477478027, "learning_rate": 9.698233116247086e-06, "loss": 0.5493, "step": 3320 }, { "epoch": 0.13787305590584278, "grad_norm": 3.781444549560547, "learning_rate": 9.698003043619221e-06, "loss": 0.5816, "step": 3321 }, { "epoch": 0.13791457143005412, "grad_norm": 2.4314658641815186, "learning_rate": 9.697772886050106e-06, "loss": 0.4833, "step": 3322 }, { "epoch": 0.13795608695426545, "grad_norm": 2.5538766384124756, "learning_rate": 9.697542643543905e-06, "loss": 0.5049, "step": 3323 }, { "epoch": 0.13799760247847678, "grad_norm": 2.790904998779297, "learning_rate": 9.697312316104777e-06, "loss": 0.5523, "step": 3324 }, { "epoch": 0.13803911800268814, "grad_norm": 2.633582830429077, "learning_rate": 9.697081903736889e-06, "loss": 0.5323, "step": 3325 }, { "epoch": 0.13808063352689948, "grad_norm": 3.228442907333374, "learning_rate": 9.696851406444406e-06, "loss": 0.486, "step": 3326 }, { "epoch": 0.1381221490511108, "grad_norm": 2.7423698902130127, "learning_rate": 9.696620824231496e-06, "loss": 0.566, "step": 3327 }, { "epoch": 0.13816366457532214, "grad_norm": 2.341245412826538, "learning_rate": 9.696390157102328e-06, "loss": 0.4635, "step": 3328 }, { "epoch": 0.13820518009953348, "grad_norm": 2.674164295196533, "learning_rate": 9.696159405061072e-06, "loss": 0.5332, "step": 3329 }, { "epoch": 0.1382466956237448, "grad_norm": 2.621338367462158, "learning_rate": 9.6959285681119e-06, "loss": 0.5294, "step": 3330 }, { "epoch": 0.13828821114795614, "grad_norm": 3.0745811462402344, "learning_rate": 9.695697646258987e-06, "loss": 0.6732, "step": 3331 }, { "epoch": 0.13832972667216747, "grad_norm": 2.8569343090057373, "learning_rate": 9.695466639506508e-06, "loss": 0.564, "step": 3332 }, { "epoch": 0.1383712421963788, "grad_norm": 2.3745710849761963, "learning_rate": 9.695235547858638e-06, "loss": 0.5918, "step": 3333 }, { "epoch": 0.13841275772059014, "grad_norm": 2.8801050186157227, "learning_rate": 9.695004371319554e-06, "loss": 0.7327, "step": 3334 }, { "epoch": 0.13845427324480147, "grad_norm": 2.4193480014801025, "learning_rate": 9.69477310989344e-06, "loss": 0.5636, "step": 3335 }, { "epoch": 0.1384957887690128, "grad_norm": 2.5173676013946533, "learning_rate": 9.694541763584475e-06, "loss": 0.607, "step": 3336 }, { "epoch": 0.13853730429322414, "grad_norm": 2.970336437225342, "learning_rate": 9.694310332396842e-06, "loss": 0.5431, "step": 3337 }, { "epoch": 0.13857881981743547, "grad_norm": 3.1666781902313232, "learning_rate": 9.694078816334724e-06, "loss": 0.527, "step": 3338 }, { "epoch": 0.1386203353416468, "grad_norm": 2.5898845195770264, "learning_rate": 9.693847215402309e-06, "loss": 0.4757, "step": 3339 }, { "epoch": 0.13866185086585814, "grad_norm": 2.222001314163208, "learning_rate": 9.693615529603782e-06, "loss": 0.4687, "step": 3340 }, { "epoch": 0.1387033663900695, "grad_norm": 2.2731821537017822, "learning_rate": 9.693383758943333e-06, "loss": 0.5989, "step": 3341 }, { "epoch": 0.13874488191428083, "grad_norm": 2.491393804550171, "learning_rate": 9.693151903425153e-06, "loss": 0.4555, "step": 3342 }, { "epoch": 0.13878639743849217, "grad_norm": 2.98595929145813, "learning_rate": 9.692919963053436e-06, "loss": 0.4508, "step": 3343 }, { "epoch": 0.1388279129627035, "grad_norm": 3.1965861320495605, "learning_rate": 9.692687937832373e-06, "loss": 0.5338, "step": 3344 }, { "epoch": 0.13886942848691483, "grad_norm": 2.4594364166259766, "learning_rate": 9.692455827766159e-06, "loss": 0.577, "step": 3345 }, { "epoch": 0.13891094401112616, "grad_norm": 2.9133718013763428, "learning_rate": 9.69222363285899e-06, "loss": 0.5618, "step": 3346 }, { "epoch": 0.1389524595353375, "grad_norm": 2.461177349090576, "learning_rate": 9.691991353115066e-06, "loss": 0.5134, "step": 3347 }, { "epoch": 0.13899397505954883, "grad_norm": 3.015535354614258, "learning_rate": 9.691758988538586e-06, "loss": 0.5351, "step": 3348 }, { "epoch": 0.13903549058376016, "grad_norm": 3.025224447250366, "learning_rate": 9.691526539133752e-06, "loss": 0.5342, "step": 3349 }, { "epoch": 0.1390770061079715, "grad_norm": 2.568080425262451, "learning_rate": 9.691294004904765e-06, "loss": 0.5274, "step": 3350 }, { "epoch": 0.13911852163218283, "grad_norm": 2.7373783588409424, "learning_rate": 9.69106138585583e-06, "loss": 0.4598, "step": 3351 }, { "epoch": 0.13916003715639416, "grad_norm": 2.9594926834106445, "learning_rate": 9.690828681991153e-06, "loss": 0.5197, "step": 3352 }, { "epoch": 0.1392015526806055, "grad_norm": 2.6845884323120117, "learning_rate": 9.690595893314942e-06, "loss": 0.5284, "step": 3353 }, { "epoch": 0.13924306820481683, "grad_norm": 3.4027020931243896, "learning_rate": 9.690363019831405e-06, "loss": 0.6138, "step": 3354 }, { "epoch": 0.13928458372902816, "grad_norm": 2.913041353225708, "learning_rate": 9.690130061544753e-06, "loss": 0.4972, "step": 3355 }, { "epoch": 0.13932609925323952, "grad_norm": 2.5059096813201904, "learning_rate": 9.689897018459198e-06, "loss": 0.3692, "step": 3356 }, { "epoch": 0.13936761477745085, "grad_norm": 2.6474711894989014, "learning_rate": 9.68966389057895e-06, "loss": 0.5631, "step": 3357 }, { "epoch": 0.1394091303016622, "grad_norm": 3.224468231201172, "learning_rate": 9.689430677908231e-06, "loss": 0.469, "step": 3358 }, { "epoch": 0.13945064582587352, "grad_norm": 2.3927111625671387, "learning_rate": 9.689197380451252e-06, "loss": 0.5211, "step": 3359 }, { "epoch": 0.13949216135008485, "grad_norm": 2.987027645111084, "learning_rate": 9.688963998212233e-06, "loss": 0.471, "step": 3360 }, { "epoch": 0.1395336768742962, "grad_norm": 2.6564183235168457, "learning_rate": 9.688730531195394e-06, "loss": 0.5319, "step": 3361 }, { "epoch": 0.13957519239850752, "grad_norm": 2.4460597038269043, "learning_rate": 9.688496979404955e-06, "loss": 0.5851, "step": 3362 }, { "epoch": 0.13961670792271885, "grad_norm": 3.102038860321045, "learning_rate": 9.68826334284514e-06, "loss": 0.6581, "step": 3363 }, { "epoch": 0.13965822344693019, "grad_norm": 2.9348301887512207, "learning_rate": 9.688029621520171e-06, "loss": 0.652, "step": 3364 }, { "epoch": 0.13969973897114152, "grad_norm": 2.21090030670166, "learning_rate": 9.687795815434278e-06, "loss": 0.3654, "step": 3365 }, { "epoch": 0.13974125449535285, "grad_norm": 2.500839948654175, "learning_rate": 9.687561924591682e-06, "loss": 0.4996, "step": 3366 }, { "epoch": 0.13978277001956418, "grad_norm": 3.1925785541534424, "learning_rate": 9.687327948996617e-06, "loss": 0.5576, "step": 3367 }, { "epoch": 0.13982428554377552, "grad_norm": 2.385284423828125, "learning_rate": 9.687093888653312e-06, "loss": 0.6201, "step": 3368 }, { "epoch": 0.13986580106798685, "grad_norm": 2.8968632221221924, "learning_rate": 9.686859743565997e-06, "loss": 0.5514, "step": 3369 }, { "epoch": 0.13990731659219818, "grad_norm": 2.6681911945343018, "learning_rate": 9.68662551373891e-06, "loss": 0.4699, "step": 3370 }, { "epoch": 0.13994883211640952, "grad_norm": 2.298293113708496, "learning_rate": 9.686391199176279e-06, "loss": 0.5175, "step": 3371 }, { "epoch": 0.13999034764062088, "grad_norm": 3.1033716201782227, "learning_rate": 9.686156799882347e-06, "loss": 0.4575, "step": 3372 }, { "epoch": 0.1400318631648322, "grad_norm": 2.511247158050537, "learning_rate": 9.685922315861349e-06, "loss": 0.5611, "step": 3373 }, { "epoch": 0.14007337868904354, "grad_norm": 2.39418888092041, "learning_rate": 9.685687747117524e-06, "loss": 0.4021, "step": 3374 }, { "epoch": 0.14011489421325488, "grad_norm": 2.762653350830078, "learning_rate": 9.685453093655115e-06, "loss": 0.3957, "step": 3375 }, { "epoch": 0.1401564097374662, "grad_norm": 3.317577362060547, "learning_rate": 9.685218355478363e-06, "loss": 0.5802, "step": 3376 }, { "epoch": 0.14019792526167754, "grad_norm": 2.5609562397003174, "learning_rate": 9.684983532591511e-06, "loss": 0.438, "step": 3377 }, { "epoch": 0.14023944078588887, "grad_norm": 2.8784496784210205, "learning_rate": 9.68474862499881e-06, "loss": 0.5461, "step": 3378 }, { "epoch": 0.1402809563101002, "grad_norm": 2.2930591106414795, "learning_rate": 9.684513632704502e-06, "loss": 0.5476, "step": 3379 }, { "epoch": 0.14032247183431154, "grad_norm": 2.321960926055908, "learning_rate": 9.684278555712836e-06, "loss": 0.4998, "step": 3380 }, { "epoch": 0.14036398735852287, "grad_norm": 3.514554977416992, "learning_rate": 9.684043394028063e-06, "loss": 0.5139, "step": 3381 }, { "epoch": 0.1404055028827342, "grad_norm": 2.0102415084838867, "learning_rate": 9.683808147654438e-06, "loss": 0.4146, "step": 3382 }, { "epoch": 0.14044701840694554, "grad_norm": 3.1569454669952393, "learning_rate": 9.683572816596213e-06, "loss": 0.5775, "step": 3383 }, { "epoch": 0.14048853393115687, "grad_norm": 2.3996376991271973, "learning_rate": 9.683337400857638e-06, "loss": 0.4699, "step": 3384 }, { "epoch": 0.1405300494553682, "grad_norm": 2.0148770809173584, "learning_rate": 9.683101900442972e-06, "loss": 0.4731, "step": 3385 }, { "epoch": 0.14057156497957954, "grad_norm": 3.219494581222534, "learning_rate": 9.682866315356477e-06, "loss": 0.348, "step": 3386 }, { "epoch": 0.1406130805037909, "grad_norm": 2.784195899963379, "learning_rate": 9.682630645602409e-06, "loss": 0.437, "step": 3387 }, { "epoch": 0.14065459602800223, "grad_norm": 2.5869364738464355, "learning_rate": 9.682394891185027e-06, "loss": 0.5204, "step": 3388 }, { "epoch": 0.14069611155221357, "grad_norm": 2.216365337371826, "learning_rate": 9.682159052108598e-06, "loss": 0.4399, "step": 3389 }, { "epoch": 0.1407376270764249, "grad_norm": 2.803129196166992, "learning_rate": 9.681923128377382e-06, "loss": 0.4746, "step": 3390 }, { "epoch": 0.14077914260063623, "grad_norm": 2.972079277038574, "learning_rate": 9.681687119995648e-06, "loss": 0.5895, "step": 3391 }, { "epoch": 0.14082065812484756, "grad_norm": 2.6358985900878906, "learning_rate": 9.68145102696766e-06, "loss": 0.3558, "step": 3392 }, { "epoch": 0.1408621736490589, "grad_norm": 2.5154268741607666, "learning_rate": 9.68121484929769e-06, "loss": 0.5315, "step": 3393 }, { "epoch": 0.14090368917327023, "grad_norm": 2.5727009773254395, "learning_rate": 9.680978586990005e-06, "loss": 0.4259, "step": 3394 }, { "epoch": 0.14094520469748156, "grad_norm": 2.6889915466308594, "learning_rate": 9.680742240048877e-06, "loss": 0.6222, "step": 3395 }, { "epoch": 0.1409867202216929, "grad_norm": 2.6775288581848145, "learning_rate": 9.680505808478583e-06, "loss": 0.5345, "step": 3396 }, { "epoch": 0.14102823574590423, "grad_norm": 2.1842916011810303, "learning_rate": 9.68026929228339e-06, "loss": 0.5085, "step": 3397 }, { "epoch": 0.14106975127011556, "grad_norm": 2.7541990280151367, "learning_rate": 9.680032691467584e-06, "loss": 0.6892, "step": 3398 }, { "epoch": 0.1411112667943269, "grad_norm": 3.01796555519104, "learning_rate": 9.679796006035436e-06, "loss": 0.5232, "step": 3399 }, { "epoch": 0.14115278231853823, "grad_norm": 2.6089766025543213, "learning_rate": 9.679559235991227e-06, "loss": 0.5504, "step": 3400 }, { "epoch": 0.14119429784274956, "grad_norm": 2.697057008743286, "learning_rate": 9.67932238133924e-06, "loss": 0.4718, "step": 3401 }, { "epoch": 0.14123581336696092, "grad_norm": 2.7955691814422607, "learning_rate": 9.679085442083753e-06, "loss": 0.4574, "step": 3402 }, { "epoch": 0.14127732889117225, "grad_norm": 2.5862483978271484, "learning_rate": 9.678848418229054e-06, "loss": 0.6017, "step": 3403 }, { "epoch": 0.1413188444153836, "grad_norm": 2.404343366622925, "learning_rate": 9.678611309779428e-06, "loss": 0.5355, "step": 3404 }, { "epoch": 0.14136035993959492, "grad_norm": 3.0051472187042236, "learning_rate": 9.678374116739159e-06, "loss": 0.6413, "step": 3405 }, { "epoch": 0.14140187546380625, "grad_norm": 2.9043734073638916, "learning_rate": 9.67813683911254e-06, "loss": 0.5124, "step": 3406 }, { "epoch": 0.1414433909880176, "grad_norm": 2.99481463432312, "learning_rate": 9.677899476903857e-06, "loss": 0.495, "step": 3407 }, { "epoch": 0.14148490651222892, "grad_norm": 3.102370500564575, "learning_rate": 9.677662030117403e-06, "loss": 0.4809, "step": 3408 }, { "epoch": 0.14152642203644025, "grad_norm": 2.6436102390289307, "learning_rate": 9.677424498757472e-06, "loss": 0.4828, "step": 3409 }, { "epoch": 0.14156793756065159, "grad_norm": 2.6362826824188232, "learning_rate": 9.677186882828357e-06, "loss": 0.6493, "step": 3410 }, { "epoch": 0.14160945308486292, "grad_norm": 2.836907386779785, "learning_rate": 9.676949182334355e-06, "loss": 0.5578, "step": 3411 }, { "epoch": 0.14165096860907425, "grad_norm": 2.222043991088867, "learning_rate": 9.676711397279764e-06, "loss": 0.4457, "step": 3412 }, { "epoch": 0.14169248413328558, "grad_norm": 2.459251880645752, "learning_rate": 9.676473527668884e-06, "loss": 0.5032, "step": 3413 }, { "epoch": 0.14173399965749692, "grad_norm": 2.263298988342285, "learning_rate": 9.676235573506015e-06, "loss": 0.4879, "step": 3414 }, { "epoch": 0.14177551518170825, "grad_norm": 2.3540313243865967, "learning_rate": 9.675997534795456e-06, "loss": 0.5899, "step": 3415 }, { "epoch": 0.14181703070591958, "grad_norm": 2.586313247680664, "learning_rate": 9.675759411541516e-06, "loss": 0.4406, "step": 3416 }, { "epoch": 0.14185854623013092, "grad_norm": 2.8357620239257812, "learning_rate": 9.675521203748498e-06, "loss": 0.5421, "step": 3417 }, { "epoch": 0.14190006175434228, "grad_norm": 2.553964853286743, "learning_rate": 9.675282911420708e-06, "loss": 0.5533, "step": 3418 }, { "epoch": 0.1419415772785536, "grad_norm": 2.6704227924346924, "learning_rate": 9.675044534562456e-06, "loss": 0.5156, "step": 3419 }, { "epoch": 0.14198309280276494, "grad_norm": 2.4346532821655273, "learning_rate": 9.674806073178052e-06, "loss": 0.4593, "step": 3420 }, { "epoch": 0.14202460832697628, "grad_norm": 2.5966646671295166, "learning_rate": 9.674567527271806e-06, "loss": 0.5766, "step": 3421 }, { "epoch": 0.1420661238511876, "grad_norm": 2.207411289215088, "learning_rate": 9.674328896848034e-06, "loss": 0.5105, "step": 3422 }, { "epoch": 0.14210763937539894, "grad_norm": 2.610724687576294, "learning_rate": 9.674090181911044e-06, "loss": 0.5379, "step": 3423 }, { "epoch": 0.14214915489961027, "grad_norm": 2.2114245891571045, "learning_rate": 9.67385138246516e-06, "loss": 0.5342, "step": 3424 }, { "epoch": 0.1421906704238216, "grad_norm": 2.8509342670440674, "learning_rate": 9.673612498514695e-06, "loss": 0.4819, "step": 3425 }, { "epoch": 0.14223218594803294, "grad_norm": 2.607006549835205, "learning_rate": 9.673373530063968e-06, "loss": 0.5949, "step": 3426 }, { "epoch": 0.14227370147224427, "grad_norm": 2.712015151977539, "learning_rate": 9.673134477117302e-06, "loss": 0.6123, "step": 3427 }, { "epoch": 0.1423152169964556, "grad_norm": 2.656738758087158, "learning_rate": 9.672895339679014e-06, "loss": 0.6038, "step": 3428 }, { "epoch": 0.14235673252066694, "grad_norm": 2.742802143096924, "learning_rate": 9.672656117753435e-06, "loss": 0.5691, "step": 3429 }, { "epoch": 0.14239824804487827, "grad_norm": 2.9007833003997803, "learning_rate": 9.672416811344886e-06, "loss": 0.521, "step": 3430 }, { "epoch": 0.1424397635690896, "grad_norm": 2.754862070083618, "learning_rate": 9.672177420457693e-06, "loss": 0.4585, "step": 3431 }, { "epoch": 0.14248127909330094, "grad_norm": 3.161743640899658, "learning_rate": 9.671937945096189e-06, "loss": 0.5599, "step": 3432 }, { "epoch": 0.1425227946175123, "grad_norm": 2.1329855918884277, "learning_rate": 9.671698385264697e-06, "loss": 0.5062, "step": 3433 }, { "epoch": 0.14256431014172363, "grad_norm": 2.6391711235046387, "learning_rate": 9.671458740967552e-06, "loss": 0.4388, "step": 3434 }, { "epoch": 0.14260582566593497, "grad_norm": 2.8561642169952393, "learning_rate": 9.671219012209087e-06, "loss": 0.5036, "step": 3435 }, { "epoch": 0.1426473411901463, "grad_norm": 2.6348438262939453, "learning_rate": 9.670979198993635e-06, "loss": 0.6165, "step": 3436 }, { "epoch": 0.14268885671435763, "grad_norm": 2.3876559734344482, "learning_rate": 9.670739301325534e-06, "loss": 0.5376, "step": 3437 }, { "epoch": 0.14273037223856896, "grad_norm": 2.465928792953491, "learning_rate": 9.67049931920912e-06, "loss": 0.4918, "step": 3438 }, { "epoch": 0.1427718877627803, "grad_norm": 2.8089966773986816, "learning_rate": 9.670259252648733e-06, "loss": 0.4361, "step": 3439 }, { "epoch": 0.14281340328699163, "grad_norm": 2.666409492492676, "learning_rate": 9.670019101648712e-06, "loss": 0.5835, "step": 3440 }, { "epoch": 0.14285491881120296, "grad_norm": 2.5952231884002686, "learning_rate": 9.669778866213397e-06, "loss": 0.5208, "step": 3441 }, { "epoch": 0.1428964343354143, "grad_norm": 2.810312509536743, "learning_rate": 9.669538546347137e-06, "loss": 0.6062, "step": 3442 }, { "epoch": 0.14293794985962563, "grad_norm": 2.9023070335388184, "learning_rate": 9.669298142054273e-06, "loss": 0.5104, "step": 3443 }, { "epoch": 0.14297946538383696, "grad_norm": 2.618335485458374, "learning_rate": 9.669057653339153e-06, "loss": 0.4529, "step": 3444 }, { "epoch": 0.1430209809080483, "grad_norm": 2.8622207641601562, "learning_rate": 9.668817080206125e-06, "loss": 0.5447, "step": 3445 }, { "epoch": 0.14306249643225963, "grad_norm": 2.2507827281951904, "learning_rate": 9.668576422659539e-06, "loss": 0.5465, "step": 3446 }, { "epoch": 0.14310401195647096, "grad_norm": 2.354602813720703, "learning_rate": 9.668335680703746e-06, "loss": 0.5711, "step": 3447 }, { "epoch": 0.1431455274806823, "grad_norm": 2.780315399169922, "learning_rate": 9.668094854343097e-06, "loss": 0.5146, "step": 3448 }, { "epoch": 0.14318704300489365, "grad_norm": 3.1106643676757812, "learning_rate": 9.667853943581948e-06, "loss": 0.5339, "step": 3449 }, { "epoch": 0.143228558529105, "grad_norm": 2.090634346008301, "learning_rate": 9.667612948424654e-06, "loss": 0.4759, "step": 3450 }, { "epoch": 0.14327007405331632, "grad_norm": 2.6809182167053223, "learning_rate": 9.667371868875575e-06, "loss": 0.4272, "step": 3451 }, { "epoch": 0.14331158957752765, "grad_norm": 2.718350410461426, "learning_rate": 9.667130704939065e-06, "loss": 0.6834, "step": 3452 }, { "epoch": 0.143353105101739, "grad_norm": 2.7202558517456055, "learning_rate": 9.666889456619486e-06, "loss": 0.6525, "step": 3453 }, { "epoch": 0.14339462062595032, "grad_norm": 2.6296234130859375, "learning_rate": 9.666648123921201e-06, "loss": 0.5557, "step": 3454 }, { "epoch": 0.14343613615016165, "grad_norm": 2.7896342277526855, "learning_rate": 9.666406706848575e-06, "loss": 0.5482, "step": 3455 }, { "epoch": 0.14347765167437299, "grad_norm": 2.3689749240875244, "learning_rate": 9.666165205405969e-06, "loss": 0.5206, "step": 3456 }, { "epoch": 0.14351916719858432, "grad_norm": 2.2650949954986572, "learning_rate": 9.66592361959775e-06, "loss": 0.5978, "step": 3457 }, { "epoch": 0.14356068272279565, "grad_norm": 2.4566426277160645, "learning_rate": 9.665681949428288e-06, "loss": 0.6099, "step": 3458 }, { "epoch": 0.14360219824700698, "grad_norm": 2.8991334438323975, "learning_rate": 9.665440194901951e-06, "loss": 0.6302, "step": 3459 }, { "epoch": 0.14364371377121832, "grad_norm": 2.6227922439575195, "learning_rate": 9.665198356023113e-06, "loss": 0.5709, "step": 3460 }, { "epoch": 0.14368522929542965, "grad_norm": 2.6143598556518555, "learning_rate": 9.664956432796143e-06, "loss": 0.5192, "step": 3461 }, { "epoch": 0.14372674481964098, "grad_norm": 2.9330735206604004, "learning_rate": 9.664714425225414e-06, "loss": 0.649, "step": 3462 }, { "epoch": 0.14376826034385232, "grad_norm": 5.283153533935547, "learning_rate": 9.664472333315305e-06, "loss": 0.4395, "step": 3463 }, { "epoch": 0.14380977586806368, "grad_norm": 2.2961342334747314, "learning_rate": 9.664230157070192e-06, "loss": 0.4845, "step": 3464 }, { "epoch": 0.143851291392275, "grad_norm": 2.3655238151550293, "learning_rate": 9.663987896494454e-06, "loss": 0.5167, "step": 3465 }, { "epoch": 0.14389280691648634, "grad_norm": 2.3148367404937744, "learning_rate": 9.66374555159247e-06, "loss": 0.5289, "step": 3466 }, { "epoch": 0.14393432244069768, "grad_norm": 2.5229716300964355, "learning_rate": 9.663503122368622e-06, "loss": 0.4312, "step": 3467 }, { "epoch": 0.143975837964909, "grad_norm": 2.4308218955993652, "learning_rate": 9.663260608827294e-06, "loss": 0.5185, "step": 3468 }, { "epoch": 0.14401735348912034, "grad_norm": 2.2977726459503174, "learning_rate": 9.66301801097287e-06, "loss": 0.5152, "step": 3469 }, { "epoch": 0.14405886901333168, "grad_norm": 3.293992042541504, "learning_rate": 9.662775328809735e-06, "loss": 0.4016, "step": 3470 }, { "epoch": 0.144100384537543, "grad_norm": 2.8075313568115234, "learning_rate": 9.662532562342279e-06, "loss": 0.5353, "step": 3471 }, { "epoch": 0.14414190006175434, "grad_norm": 3.26554012298584, "learning_rate": 9.66228971157489e-06, "loss": 0.5361, "step": 3472 }, { "epoch": 0.14418341558596567, "grad_norm": 3.2511348724365234, "learning_rate": 9.66204677651196e-06, "loss": 0.5693, "step": 3473 }, { "epoch": 0.144224931110177, "grad_norm": 2.7698473930358887, "learning_rate": 9.661803757157879e-06, "loss": 0.4653, "step": 3474 }, { "epoch": 0.14426644663438834, "grad_norm": 2.7824504375457764, "learning_rate": 9.661560653517044e-06, "loss": 0.5154, "step": 3475 }, { "epoch": 0.14430796215859967, "grad_norm": 3.7557413578033447, "learning_rate": 9.661317465593848e-06, "loss": 0.5596, "step": 3476 }, { "epoch": 0.144349477682811, "grad_norm": 2.627319574356079, "learning_rate": 9.661074193392689e-06, "loss": 0.6246, "step": 3477 }, { "epoch": 0.14439099320702234, "grad_norm": 2.7231078147888184, "learning_rate": 9.660830836917966e-06, "loss": 0.5549, "step": 3478 }, { "epoch": 0.14443250873123367, "grad_norm": 2.768397092819214, "learning_rate": 9.660587396174077e-06, "loss": 0.5103, "step": 3479 }, { "epoch": 0.14447402425544503, "grad_norm": 2.506343364715576, "learning_rate": 9.660343871165425e-06, "loss": 0.5327, "step": 3480 }, { "epoch": 0.14451553977965637, "grad_norm": 2.5140013694763184, "learning_rate": 9.660100261896412e-06, "loss": 0.5275, "step": 3481 }, { "epoch": 0.1445570553038677, "grad_norm": 2.6341540813446045, "learning_rate": 9.659856568371442e-06, "loss": 0.5094, "step": 3482 }, { "epoch": 0.14459857082807903, "grad_norm": 2.9342832565307617, "learning_rate": 9.659612790594924e-06, "loss": 0.5403, "step": 3483 }, { "epoch": 0.14464008635229036, "grad_norm": 2.577876567840576, "learning_rate": 9.659368928571265e-06, "loss": 0.3255, "step": 3484 }, { "epoch": 0.1446816018765017, "grad_norm": 2.3405489921569824, "learning_rate": 9.65912498230487e-06, "loss": 0.504, "step": 3485 }, { "epoch": 0.14472311740071303, "grad_norm": 2.727339506149292, "learning_rate": 9.658880951800154e-06, "loss": 0.5555, "step": 3486 }, { "epoch": 0.14476463292492436, "grad_norm": 2.3171403408050537, "learning_rate": 9.658636837061526e-06, "loss": 0.3358, "step": 3487 }, { "epoch": 0.1448061484491357, "grad_norm": 2.4946351051330566, "learning_rate": 9.658392638093403e-06, "loss": 0.4104, "step": 3488 }, { "epoch": 0.14484766397334703, "grad_norm": 2.799666404724121, "learning_rate": 9.658148354900196e-06, "loss": 0.5878, "step": 3489 }, { "epoch": 0.14488917949755836, "grad_norm": 2.2646234035491943, "learning_rate": 9.657903987486325e-06, "loss": 0.5575, "step": 3490 }, { "epoch": 0.1449306950217697, "grad_norm": 3.6459624767303467, "learning_rate": 9.657659535856209e-06, "loss": 0.5863, "step": 3491 }, { "epoch": 0.14497221054598103, "grad_norm": 2.626460552215576, "learning_rate": 9.657415000014265e-06, "loss": 0.6195, "step": 3492 }, { "epoch": 0.14501372607019236, "grad_norm": 2.567941665649414, "learning_rate": 9.657170379964915e-06, "loss": 0.5594, "step": 3493 }, { "epoch": 0.1450552415944037, "grad_norm": 3.208390474319458, "learning_rate": 9.656925675712584e-06, "loss": 0.5957, "step": 3494 }, { "epoch": 0.14509675711861506, "grad_norm": 2.7442069053649902, "learning_rate": 9.656680887261693e-06, "loss": 0.4304, "step": 3495 }, { "epoch": 0.1451382726428264, "grad_norm": 2.739487409591675, "learning_rate": 9.65643601461667e-06, "loss": 0.5411, "step": 3496 }, { "epoch": 0.14517978816703772, "grad_norm": 2.9700875282287598, "learning_rate": 9.656191057781941e-06, "loss": 0.5997, "step": 3497 }, { "epoch": 0.14522130369124905, "grad_norm": 2.410339832305908, "learning_rate": 9.655946016761936e-06, "loss": 0.5861, "step": 3498 }, { "epoch": 0.1452628192154604, "grad_norm": 2.0487115383148193, "learning_rate": 9.655700891561084e-06, "loss": 0.523, "step": 3499 }, { "epoch": 0.14530433473967172, "grad_norm": 2.182323455810547, "learning_rate": 9.65545568218382e-06, "loss": 0.496, "step": 3500 }, { "epoch": 0.14534585026388305, "grad_norm": 2.7611405849456787, "learning_rate": 9.655210388634575e-06, "loss": 0.5261, "step": 3501 }, { "epoch": 0.14538736578809439, "grad_norm": 2.108067035675049, "learning_rate": 9.654965010917783e-06, "loss": 0.4295, "step": 3502 }, { "epoch": 0.14542888131230572, "grad_norm": 2.571002960205078, "learning_rate": 9.654719549037882e-06, "loss": 0.5773, "step": 3503 }, { "epoch": 0.14547039683651705, "grad_norm": 2.4520177841186523, "learning_rate": 9.65447400299931e-06, "loss": 0.5128, "step": 3504 }, { "epoch": 0.14551191236072838, "grad_norm": 2.41208553314209, "learning_rate": 9.654228372806508e-06, "loss": 0.4553, "step": 3505 }, { "epoch": 0.14555342788493972, "grad_norm": 2.4489619731903076, "learning_rate": 9.653982658463915e-06, "loss": 0.5194, "step": 3506 }, { "epoch": 0.14559494340915105, "grad_norm": 2.7652411460876465, "learning_rate": 9.653736859975972e-06, "loss": 0.6335, "step": 3507 }, { "epoch": 0.14563645893336238, "grad_norm": 3.606668472290039, "learning_rate": 9.653490977347129e-06, "loss": 0.4873, "step": 3508 }, { "epoch": 0.14567797445757372, "grad_norm": 2.9766902923583984, "learning_rate": 9.653245010581824e-06, "loss": 0.5756, "step": 3509 }, { "epoch": 0.14571948998178508, "grad_norm": 3.2332470417022705, "learning_rate": 9.652998959684511e-06, "loss": 0.5401, "step": 3510 }, { "epoch": 0.1457610055059964, "grad_norm": 3.4605743885040283, "learning_rate": 9.652752824659635e-06, "loss": 0.5135, "step": 3511 }, { "epoch": 0.14580252103020774, "grad_norm": 2.8510138988494873, "learning_rate": 9.652506605511644e-06, "loss": 0.5724, "step": 3512 }, { "epoch": 0.14584403655441908, "grad_norm": 2.843128204345703, "learning_rate": 9.652260302244996e-06, "loss": 0.6748, "step": 3513 }, { "epoch": 0.1458855520786304, "grad_norm": 3.2165961265563965, "learning_rate": 9.65201391486414e-06, "loss": 0.5263, "step": 3514 }, { "epoch": 0.14592706760284174, "grad_norm": 2.550689220428467, "learning_rate": 9.65176744337353e-06, "loss": 0.5426, "step": 3515 }, { "epoch": 0.14596858312705308, "grad_norm": 2.7413570880889893, "learning_rate": 9.651520887777626e-06, "loss": 0.5531, "step": 3516 }, { "epoch": 0.1460100986512644, "grad_norm": 3.065857172012329, "learning_rate": 9.651274248080882e-06, "loss": 0.4622, "step": 3517 }, { "epoch": 0.14605161417547574, "grad_norm": 2.390005350112915, "learning_rate": 9.65102752428776e-06, "loss": 0.6108, "step": 3518 }, { "epoch": 0.14609312969968707, "grad_norm": 2.92122483253479, "learning_rate": 9.650780716402721e-06, "loss": 0.5028, "step": 3519 }, { "epoch": 0.1461346452238984, "grad_norm": 2.2192654609680176, "learning_rate": 9.650533824430224e-06, "loss": 0.5859, "step": 3520 }, { "epoch": 0.14617616074810974, "grad_norm": 2.8190882205963135, "learning_rate": 9.650286848374735e-06, "loss": 0.4145, "step": 3521 }, { "epoch": 0.14621767627232107, "grad_norm": 2.1231043338775635, "learning_rate": 9.65003978824072e-06, "loss": 0.4272, "step": 3522 }, { "epoch": 0.1462591917965324, "grad_norm": 2.4263854026794434, "learning_rate": 9.649792644032645e-06, "loss": 0.4159, "step": 3523 }, { "epoch": 0.14630070732074374, "grad_norm": 2.3717145919799805, "learning_rate": 9.64954541575498e-06, "loss": 0.4374, "step": 3524 }, { "epoch": 0.14634222284495507, "grad_norm": 3.050823211669922, "learning_rate": 9.649298103412193e-06, "loss": 0.6177, "step": 3525 }, { "epoch": 0.14638373836916643, "grad_norm": 2.5446434020996094, "learning_rate": 9.649050707008755e-06, "loss": 0.5257, "step": 3526 }, { "epoch": 0.14642525389337777, "grad_norm": 2.6086552143096924, "learning_rate": 9.648803226549141e-06, "loss": 0.5416, "step": 3527 }, { "epoch": 0.1464667694175891, "grad_norm": 2.599971294403076, "learning_rate": 9.648555662037826e-06, "loss": 0.5586, "step": 3528 }, { "epoch": 0.14650828494180043, "grad_norm": 2.7865700721740723, "learning_rate": 9.648308013479283e-06, "loss": 0.5513, "step": 3529 }, { "epoch": 0.14654980046601176, "grad_norm": 2.818532705307007, "learning_rate": 9.648060280877993e-06, "loss": 0.6562, "step": 3530 }, { "epoch": 0.1465913159902231, "grad_norm": 2.430802345275879, "learning_rate": 9.647812464238434e-06, "loss": 0.5099, "step": 3531 }, { "epoch": 0.14663283151443443, "grad_norm": 2.6705493927001953, "learning_rate": 9.647564563565085e-06, "loss": 0.5186, "step": 3532 }, { "epoch": 0.14667434703864576, "grad_norm": 2.8553974628448486, "learning_rate": 9.647316578862427e-06, "loss": 0.4762, "step": 3533 }, { "epoch": 0.1467158625628571, "grad_norm": 3.0085864067077637, "learning_rate": 9.647068510134947e-06, "loss": 0.3978, "step": 3534 }, { "epoch": 0.14675737808706843, "grad_norm": 3.107097864151001, "learning_rate": 9.64682035738713e-06, "loss": 0.4767, "step": 3535 }, { "epoch": 0.14679889361127976, "grad_norm": 2.935701847076416, "learning_rate": 9.64657212062346e-06, "loss": 0.591, "step": 3536 }, { "epoch": 0.1468404091354911, "grad_norm": 3.3462836742401123, "learning_rate": 9.646323799848429e-06, "loss": 0.4828, "step": 3537 }, { "epoch": 0.14688192465970243, "grad_norm": 2.8198623657226562, "learning_rate": 9.646075395066523e-06, "loss": 0.5251, "step": 3538 }, { "epoch": 0.14692344018391376, "grad_norm": 2.395163059234619, "learning_rate": 9.645826906282234e-06, "loss": 0.5337, "step": 3539 }, { "epoch": 0.1469649557081251, "grad_norm": 2.297607660293579, "learning_rate": 9.645578333500056e-06, "loss": 0.3951, "step": 3540 }, { "epoch": 0.14700647123233646, "grad_norm": 2.7411630153656006, "learning_rate": 9.645329676724485e-06, "loss": 0.5624, "step": 3541 }, { "epoch": 0.1470479867565478, "grad_norm": 1.9840209484100342, "learning_rate": 9.645080935960011e-06, "loss": 0.5973, "step": 3542 }, { "epoch": 0.14708950228075912, "grad_norm": 2.8813138008117676, "learning_rate": 9.644832111211137e-06, "loss": 0.4754, "step": 3543 }, { "epoch": 0.14713101780497045, "grad_norm": 2.68320369720459, "learning_rate": 9.64458320248236e-06, "loss": 0.483, "step": 3544 }, { "epoch": 0.1471725333291818, "grad_norm": 2.905430555343628, "learning_rate": 9.644334209778179e-06, "loss": 0.5358, "step": 3545 }, { "epoch": 0.14721404885339312, "grad_norm": 2.8148298263549805, "learning_rate": 9.644085133103096e-06, "loss": 0.6043, "step": 3546 }, { "epoch": 0.14725556437760445, "grad_norm": 2.7416880130767822, "learning_rate": 9.643835972461616e-06, "loss": 0.4762, "step": 3547 }, { "epoch": 0.1472970799018158, "grad_norm": 2.5017902851104736, "learning_rate": 9.643586727858243e-06, "loss": 0.6227, "step": 3548 }, { "epoch": 0.14733859542602712, "grad_norm": 2.675276041030884, "learning_rate": 9.643337399297485e-06, "loss": 0.4702, "step": 3549 }, { "epoch": 0.14738011095023845, "grad_norm": 2.7494235038757324, "learning_rate": 9.643087986783846e-06, "loss": 0.564, "step": 3550 }, { "epoch": 0.14742162647444979, "grad_norm": 2.783039093017578, "learning_rate": 9.64283849032184e-06, "loss": 0.5533, "step": 3551 }, { "epoch": 0.14746314199866112, "grad_norm": 2.3310623168945312, "learning_rate": 9.642588909915976e-06, "loss": 0.543, "step": 3552 }, { "epoch": 0.14750465752287245, "grad_norm": 3.026965379714966, "learning_rate": 9.642339245570765e-06, "loss": 0.5655, "step": 3553 }, { "epoch": 0.14754617304708378, "grad_norm": 2.489696741104126, "learning_rate": 9.642089497290723e-06, "loss": 0.488, "step": 3554 }, { "epoch": 0.14758768857129512, "grad_norm": 2.1830174922943115, "learning_rate": 9.641839665080363e-06, "loss": 0.6003, "step": 3555 }, { "epoch": 0.14762920409550645, "grad_norm": 2.6629583835601807, "learning_rate": 9.641589748944207e-06, "loss": 0.5429, "step": 3556 }, { "epoch": 0.1476707196197178, "grad_norm": 2.769731044769287, "learning_rate": 9.641339748886769e-06, "loss": 0.5589, "step": 3557 }, { "epoch": 0.14771223514392914, "grad_norm": 2.480966806411743, "learning_rate": 9.641089664912571e-06, "loss": 0.5495, "step": 3558 }, { "epoch": 0.14775375066814048, "grad_norm": 2.959031343460083, "learning_rate": 9.640839497026134e-06, "loss": 0.4958, "step": 3559 }, { "epoch": 0.1477952661923518, "grad_norm": 2.7769253253936768, "learning_rate": 9.64058924523198e-06, "loss": 0.6806, "step": 3560 }, { "epoch": 0.14783678171656314, "grad_norm": 2.561218738555908, "learning_rate": 9.640338909534636e-06, "loss": 0.5661, "step": 3561 }, { "epoch": 0.14787829724077448, "grad_norm": 2.5690932273864746, "learning_rate": 9.640088489938627e-06, "loss": 0.5532, "step": 3562 }, { "epoch": 0.1479198127649858, "grad_norm": 2.8924918174743652, "learning_rate": 9.639837986448482e-06, "loss": 0.6311, "step": 3563 }, { "epoch": 0.14796132828919714, "grad_norm": 2.774214267730713, "learning_rate": 9.639587399068725e-06, "loss": 0.448, "step": 3564 }, { "epoch": 0.14800284381340847, "grad_norm": 2.9679930210113525, "learning_rate": 9.639336727803895e-06, "loss": 0.6331, "step": 3565 }, { "epoch": 0.1480443593376198, "grad_norm": 2.386357545852661, "learning_rate": 9.639085972658517e-06, "loss": 0.567, "step": 3566 }, { "epoch": 0.14808587486183114, "grad_norm": 2.994342803955078, "learning_rate": 9.638835133637129e-06, "loss": 0.6167, "step": 3567 }, { "epoch": 0.14812739038604247, "grad_norm": 2.947097063064575, "learning_rate": 9.638584210744265e-06, "loss": 0.6453, "step": 3568 }, { "epoch": 0.1481689059102538, "grad_norm": 2.538506507873535, "learning_rate": 9.63833320398446e-06, "loss": 0.5756, "step": 3569 }, { "epoch": 0.14821042143446514, "grad_norm": 2.3834288120269775, "learning_rate": 9.638082113362254e-06, "loss": 0.383, "step": 3570 }, { "epoch": 0.14825193695867647, "grad_norm": 2.5665037631988525, "learning_rate": 9.637830938882186e-06, "loss": 0.5356, "step": 3571 }, { "epoch": 0.14829345248288783, "grad_norm": 2.553818464279175, "learning_rate": 9.6375796805488e-06, "loss": 0.5763, "step": 3572 }, { "epoch": 0.14833496800709917, "grad_norm": 2.934068441390991, "learning_rate": 9.637328338366635e-06, "loss": 0.4688, "step": 3573 }, { "epoch": 0.1483764835313105, "grad_norm": 2.73612642288208, "learning_rate": 9.637076912340237e-06, "loss": 0.5889, "step": 3574 }, { "epoch": 0.14841799905552183, "grad_norm": 2.678678512573242, "learning_rate": 9.636825402474153e-06, "loss": 0.6109, "step": 3575 }, { "epoch": 0.14845951457973317, "grad_norm": 2.992879629135132, "learning_rate": 9.63657380877293e-06, "loss": 0.5113, "step": 3576 }, { "epoch": 0.1485010301039445, "grad_norm": 2.695017099380493, "learning_rate": 9.636322131241114e-06, "loss": 0.6596, "step": 3577 }, { "epoch": 0.14854254562815583, "grad_norm": 2.363402843475342, "learning_rate": 9.636070369883258e-06, "loss": 0.5776, "step": 3578 }, { "epoch": 0.14858406115236716, "grad_norm": 3.590787410736084, "learning_rate": 9.635818524703913e-06, "loss": 0.5022, "step": 3579 }, { "epoch": 0.1486255766765785, "grad_norm": 2.3654541969299316, "learning_rate": 9.635566595707633e-06, "loss": 0.391, "step": 3580 }, { "epoch": 0.14866709220078983, "grad_norm": 2.0338683128356934, "learning_rate": 9.635314582898974e-06, "loss": 0.4164, "step": 3581 }, { "epoch": 0.14870860772500116, "grad_norm": 3.0805375576019287, "learning_rate": 9.63506248628249e-06, "loss": 0.5943, "step": 3582 }, { "epoch": 0.1487501232492125, "grad_norm": 2.389012575149536, "learning_rate": 9.63481030586274e-06, "loss": 0.613, "step": 3583 }, { "epoch": 0.14879163877342383, "grad_norm": 2.3816475868225098, "learning_rate": 9.634558041644286e-06, "loss": 0.4919, "step": 3584 }, { "epoch": 0.14883315429763516, "grad_norm": 2.1882331371307373, "learning_rate": 9.634305693631686e-06, "loss": 0.4702, "step": 3585 }, { "epoch": 0.1488746698218465, "grad_norm": 2.5968337059020996, "learning_rate": 9.634053261829503e-06, "loss": 0.3618, "step": 3586 }, { "epoch": 0.14891618534605783, "grad_norm": 2.856776237487793, "learning_rate": 9.6338007462423e-06, "loss": 0.6058, "step": 3587 }, { "epoch": 0.1489577008702692, "grad_norm": 2.2260375022888184, "learning_rate": 9.633548146874645e-06, "loss": 0.5499, "step": 3588 }, { "epoch": 0.14899921639448052, "grad_norm": 2.2664482593536377, "learning_rate": 9.633295463731105e-06, "loss": 0.5019, "step": 3589 }, { "epoch": 0.14904073191869185, "grad_norm": 2.7379963397979736, "learning_rate": 9.633042696816246e-06, "loss": 0.5546, "step": 3590 }, { "epoch": 0.1490822474429032, "grad_norm": 2.8532168865203857, "learning_rate": 9.63278984613464e-06, "loss": 0.4991, "step": 3591 }, { "epoch": 0.14912376296711452, "grad_norm": 2.6107118129730225, "learning_rate": 9.63253691169086e-06, "loss": 0.4469, "step": 3592 }, { "epoch": 0.14916527849132585, "grad_norm": 2.2627906799316406, "learning_rate": 9.632283893489477e-06, "loss": 0.5635, "step": 3593 }, { "epoch": 0.1492067940155372, "grad_norm": 2.107633113861084, "learning_rate": 9.632030791535063e-06, "loss": 0.4265, "step": 3594 }, { "epoch": 0.14924830953974852, "grad_norm": 2.4276435375213623, "learning_rate": 9.6317776058322e-06, "loss": 0.5605, "step": 3595 }, { "epoch": 0.14928982506395985, "grad_norm": 2.475433349609375, "learning_rate": 9.631524336385462e-06, "loss": 0.4203, "step": 3596 }, { "epoch": 0.14933134058817119, "grad_norm": 1.874914526939392, "learning_rate": 9.63127098319943e-06, "loss": 0.4399, "step": 3597 }, { "epoch": 0.14937285611238252, "grad_norm": 2.6615777015686035, "learning_rate": 9.631017546278681e-06, "loss": 0.4329, "step": 3598 }, { "epoch": 0.14941437163659385, "grad_norm": 2.3169639110565186, "learning_rate": 9.630764025627803e-06, "loss": 0.517, "step": 3599 }, { "epoch": 0.14945588716080518, "grad_norm": 3.463191509246826, "learning_rate": 9.630510421251375e-06, "loss": 0.49, "step": 3600 }, { "epoch": 0.14949740268501652, "grad_norm": 2.5820138454437256, "learning_rate": 9.630256733153983e-06, "loss": 0.5044, "step": 3601 }, { "epoch": 0.14953891820922785, "grad_norm": 2.7435197830200195, "learning_rate": 9.630002961340216e-06, "loss": 0.5154, "step": 3602 }, { "epoch": 0.1495804337334392, "grad_norm": 2.424013137817383, "learning_rate": 9.629749105814664e-06, "loss": 0.5433, "step": 3603 }, { "epoch": 0.14962194925765054, "grad_norm": 2.65509033203125, "learning_rate": 9.629495166581909e-06, "loss": 0.5147, "step": 3604 }, { "epoch": 0.14966346478186188, "grad_norm": 2.515824794769287, "learning_rate": 9.629241143646549e-06, "loss": 0.4617, "step": 3605 }, { "epoch": 0.1497049803060732, "grad_norm": 2.3964951038360596, "learning_rate": 9.628987037013175e-06, "loss": 0.4488, "step": 3606 }, { "epoch": 0.14974649583028454, "grad_norm": 2.5849196910858154, "learning_rate": 9.628732846686381e-06, "loss": 0.6692, "step": 3607 }, { "epoch": 0.14978801135449588, "grad_norm": 2.3933322429656982, "learning_rate": 9.628478572670764e-06, "loss": 0.4335, "step": 3608 }, { "epoch": 0.1498295268787072, "grad_norm": 2.6573233604431152, "learning_rate": 9.628224214970919e-06, "loss": 0.5315, "step": 3609 }, { "epoch": 0.14987104240291854, "grad_norm": 2.1029531955718994, "learning_rate": 9.627969773591445e-06, "loss": 0.4486, "step": 3610 }, { "epoch": 0.14991255792712987, "grad_norm": 2.0324742794036865, "learning_rate": 9.627715248536945e-06, "loss": 0.5448, "step": 3611 }, { "epoch": 0.1499540734513412, "grad_norm": 2.1225030422210693, "learning_rate": 9.627460639812018e-06, "loss": 0.4459, "step": 3612 }, { "epoch": 0.14999558897555254, "grad_norm": 2.479097366333008, "learning_rate": 9.627205947421271e-06, "loss": 0.4164, "step": 3613 }, { "epoch": 0.15003710449976387, "grad_norm": 3.09135103225708, "learning_rate": 9.626951171369306e-06, "loss": 0.5994, "step": 3614 }, { "epoch": 0.1500786200239752, "grad_norm": 2.219470739364624, "learning_rate": 9.626696311660731e-06, "loss": 0.4371, "step": 3615 }, { "epoch": 0.15012013554818654, "grad_norm": 3.046790361404419, "learning_rate": 9.626441368300151e-06, "loss": 0.6177, "step": 3616 }, { "epoch": 0.15016165107239787, "grad_norm": 3.1201744079589844, "learning_rate": 9.626186341292178e-06, "loss": 0.5757, "step": 3617 }, { "epoch": 0.15020316659660923, "grad_norm": 2.5507266521453857, "learning_rate": 9.625931230641425e-06, "loss": 0.4914, "step": 3618 }, { "epoch": 0.15024468212082057, "grad_norm": 3.0295352935791016, "learning_rate": 9.6256760363525e-06, "loss": 0.5611, "step": 3619 }, { "epoch": 0.1502861976450319, "grad_norm": 2.4222114086151123, "learning_rate": 9.62542075843002e-06, "loss": 0.4927, "step": 3620 }, { "epoch": 0.15032771316924323, "grad_norm": 2.483624219894409, "learning_rate": 9.625165396878599e-06, "loss": 0.564, "step": 3621 }, { "epoch": 0.15036922869345457, "grad_norm": 2.422633409500122, "learning_rate": 9.624909951702855e-06, "loss": 0.5408, "step": 3622 }, { "epoch": 0.1504107442176659, "grad_norm": 2.2239248752593994, "learning_rate": 9.624654422907405e-06, "loss": 0.4609, "step": 3623 }, { "epoch": 0.15045225974187723, "grad_norm": 2.6067373752593994, "learning_rate": 9.62439881049687e-06, "loss": 0.4319, "step": 3624 }, { "epoch": 0.15049377526608856, "grad_norm": 2.91957688331604, "learning_rate": 9.624143114475872e-06, "loss": 0.5539, "step": 3625 }, { "epoch": 0.1505352907902999, "grad_norm": 3.231900930404663, "learning_rate": 9.623887334849033e-06, "loss": 0.6327, "step": 3626 }, { "epoch": 0.15057680631451123, "grad_norm": 3.302560567855835, "learning_rate": 9.62363147162098e-06, "loss": 0.5404, "step": 3627 }, { "epoch": 0.15061832183872256, "grad_norm": 2.742342710494995, "learning_rate": 9.623375524796337e-06, "loss": 0.3552, "step": 3628 }, { "epoch": 0.1506598373629339, "grad_norm": 2.394347667694092, "learning_rate": 9.62311949437973e-06, "loss": 0.4541, "step": 3629 }, { "epoch": 0.15070135288714523, "grad_norm": 2.7464802265167236, "learning_rate": 9.62286338037579e-06, "loss": 0.4182, "step": 3630 }, { "epoch": 0.15074286841135656, "grad_norm": 2.6726508140563965, "learning_rate": 9.62260718278915e-06, "loss": 0.4976, "step": 3631 }, { "epoch": 0.1507843839355679, "grad_norm": 3.167294502258301, "learning_rate": 9.62235090162444e-06, "loss": 0.6221, "step": 3632 }, { "epoch": 0.15082589945977923, "grad_norm": 2.4372167587280273, "learning_rate": 9.62209453688629e-06, "loss": 0.6786, "step": 3633 }, { "epoch": 0.1508674149839906, "grad_norm": 2.477304697036743, "learning_rate": 9.621838088579342e-06, "loss": 0.5854, "step": 3634 }, { "epoch": 0.15090893050820192, "grad_norm": 2.94187331199646, "learning_rate": 9.621581556708228e-06, "loss": 0.699, "step": 3635 }, { "epoch": 0.15095044603241325, "grad_norm": 2.7707462310791016, "learning_rate": 9.621324941277587e-06, "loss": 0.5908, "step": 3636 }, { "epoch": 0.1509919615566246, "grad_norm": 2.6301281452178955, "learning_rate": 9.62106824229206e-06, "loss": 0.6509, "step": 3637 }, { "epoch": 0.15103347708083592, "grad_norm": 2.391061544418335, "learning_rate": 9.620811459756285e-06, "loss": 0.4895, "step": 3638 }, { "epoch": 0.15107499260504725, "grad_norm": 4.000722885131836, "learning_rate": 9.62055459367491e-06, "loss": 0.5925, "step": 3639 }, { "epoch": 0.1511165081292586, "grad_norm": 2.633556842803955, "learning_rate": 9.620297644052572e-06, "loss": 0.4931, "step": 3640 }, { "epoch": 0.15115802365346992, "grad_norm": 2.4446492195129395, "learning_rate": 9.620040610893925e-06, "loss": 0.5264, "step": 3641 }, { "epoch": 0.15119953917768125, "grad_norm": 2.6280758380889893, "learning_rate": 9.61978349420361e-06, "loss": 0.5437, "step": 3642 }, { "epoch": 0.15124105470189259, "grad_norm": 2.8959243297576904, "learning_rate": 9.619526293986279e-06, "loss": 0.6165, "step": 3643 }, { "epoch": 0.15128257022610392, "grad_norm": 3.3438475131988525, "learning_rate": 9.619269010246581e-06, "loss": 0.5209, "step": 3644 }, { "epoch": 0.15132408575031525, "grad_norm": 2.4396369457244873, "learning_rate": 9.619011642989167e-06, "loss": 0.5509, "step": 3645 }, { "epoch": 0.15136560127452658, "grad_norm": 3.024242877960205, "learning_rate": 9.61875419221869e-06, "loss": 0.7146, "step": 3646 }, { "epoch": 0.15140711679873792, "grad_norm": 2.905975580215454, "learning_rate": 9.618496657939808e-06, "loss": 0.4904, "step": 3647 }, { "epoch": 0.15144863232294925, "grad_norm": 2.272245168685913, "learning_rate": 9.618239040157175e-06, "loss": 0.479, "step": 3648 }, { "epoch": 0.1514901478471606, "grad_norm": 2.491063117980957, "learning_rate": 9.617981338875449e-06, "loss": 0.5257, "step": 3649 }, { "epoch": 0.15153166337137194, "grad_norm": 2.428276538848877, "learning_rate": 9.61772355409929e-06, "loss": 0.4962, "step": 3650 }, { "epoch": 0.15157317889558328, "grad_norm": 2.647139549255371, "learning_rate": 9.617465685833357e-06, "loss": 0.5829, "step": 3651 }, { "epoch": 0.1516146944197946, "grad_norm": 2.763746738433838, "learning_rate": 9.617207734082314e-06, "loss": 0.5624, "step": 3652 }, { "epoch": 0.15165620994400594, "grad_norm": 2.4949593544006348, "learning_rate": 9.616949698850823e-06, "loss": 0.4675, "step": 3653 }, { "epoch": 0.15169772546821728, "grad_norm": 2.509216785430908, "learning_rate": 9.616691580143553e-06, "loss": 0.4836, "step": 3654 }, { "epoch": 0.1517392409924286, "grad_norm": 2.711134672164917, "learning_rate": 9.616433377965166e-06, "loss": 0.5997, "step": 3655 }, { "epoch": 0.15178075651663994, "grad_norm": 2.750560998916626, "learning_rate": 9.616175092320335e-06, "loss": 0.5599, "step": 3656 }, { "epoch": 0.15182227204085127, "grad_norm": 2.4401957988739014, "learning_rate": 9.615916723213728e-06, "loss": 0.563, "step": 3657 }, { "epoch": 0.1518637875650626, "grad_norm": 2.3811304569244385, "learning_rate": 9.615658270650015e-06, "loss": 0.7323, "step": 3658 }, { "epoch": 0.15190530308927394, "grad_norm": 2.403024673461914, "learning_rate": 9.61539973463387e-06, "loss": 0.5411, "step": 3659 }, { "epoch": 0.15194681861348527, "grad_norm": 2.6579575538635254, "learning_rate": 9.615141115169968e-06, "loss": 0.4044, "step": 3660 }, { "epoch": 0.1519883341376966, "grad_norm": 2.6235880851745605, "learning_rate": 9.614882412262984e-06, "loss": 0.5208, "step": 3661 }, { "epoch": 0.15202984966190794, "grad_norm": 2.74025559425354, "learning_rate": 9.614623625917596e-06, "loss": 0.5419, "step": 3662 }, { "epoch": 0.15207136518611927, "grad_norm": 2.57757830619812, "learning_rate": 9.614364756138484e-06, "loss": 0.4713, "step": 3663 }, { "epoch": 0.1521128807103306, "grad_norm": 2.6545584201812744, "learning_rate": 9.614105802930324e-06, "loss": 0.5705, "step": 3664 }, { "epoch": 0.15215439623454197, "grad_norm": 2.897711753845215, "learning_rate": 9.613846766297806e-06, "loss": 0.6579, "step": 3665 }, { "epoch": 0.1521959117587533, "grad_norm": 2.471210479736328, "learning_rate": 9.613587646245605e-06, "loss": 0.5952, "step": 3666 }, { "epoch": 0.15223742728296463, "grad_norm": 2.6710526943206787, "learning_rate": 9.613328442778411e-06, "loss": 0.5044, "step": 3667 }, { "epoch": 0.15227894280717597, "grad_norm": 2.874570846557617, "learning_rate": 9.613069155900906e-06, "loss": 0.5017, "step": 3668 }, { "epoch": 0.1523204583313873, "grad_norm": 2.6687653064727783, "learning_rate": 9.612809785617785e-06, "loss": 0.5055, "step": 3669 }, { "epoch": 0.15236197385559863, "grad_norm": 2.620853900909424, "learning_rate": 9.612550331933731e-06, "loss": 0.4968, "step": 3670 }, { "epoch": 0.15240348937980996, "grad_norm": 2.6224255561828613, "learning_rate": 9.61229079485344e-06, "loss": 0.4651, "step": 3671 }, { "epoch": 0.1524450049040213, "grad_norm": 2.3849620819091797, "learning_rate": 9.6120311743816e-06, "loss": 0.4287, "step": 3672 }, { "epoch": 0.15248652042823263, "grad_norm": 2.4316720962524414, "learning_rate": 9.611771470522908e-06, "loss": 0.5336, "step": 3673 }, { "epoch": 0.15252803595244396, "grad_norm": 2.4123482704162598, "learning_rate": 9.611511683282057e-06, "loss": 0.4815, "step": 3674 }, { "epoch": 0.1525695514766553, "grad_norm": 2.8942596912384033, "learning_rate": 9.611251812663748e-06, "loss": 0.6665, "step": 3675 }, { "epoch": 0.15261106700086663, "grad_norm": 2.865274429321289, "learning_rate": 9.610991858672676e-06, "loss": 0.5732, "step": 3676 }, { "epoch": 0.15265258252507796, "grad_norm": 2.878974676132202, "learning_rate": 9.610731821313541e-06, "loss": 0.5419, "step": 3677 }, { "epoch": 0.1526940980492893, "grad_norm": 2.523970603942871, "learning_rate": 9.610471700591047e-06, "loss": 0.5462, "step": 3678 }, { "epoch": 0.15273561357350063, "grad_norm": 2.863980531692505, "learning_rate": 9.610211496509895e-06, "loss": 0.4213, "step": 3679 }, { "epoch": 0.152777129097712, "grad_norm": 2.542177200317383, "learning_rate": 9.609951209074793e-06, "loss": 0.4784, "step": 3680 }, { "epoch": 0.15281864462192332, "grad_norm": 2.9792182445526123, "learning_rate": 9.609690838290442e-06, "loss": 0.5379, "step": 3681 }, { "epoch": 0.15286016014613465, "grad_norm": 2.28102707862854, "learning_rate": 9.609430384161551e-06, "loss": 0.501, "step": 3682 }, { "epoch": 0.152901675670346, "grad_norm": 2.5853700637817383, "learning_rate": 9.609169846692833e-06, "loss": 0.5153, "step": 3683 }, { "epoch": 0.15294319119455732, "grad_norm": 2.6239070892333984, "learning_rate": 9.608909225888993e-06, "loss": 0.4739, "step": 3684 }, { "epoch": 0.15298470671876865, "grad_norm": 3.1945888996124268, "learning_rate": 9.608648521754746e-06, "loss": 0.5057, "step": 3685 }, { "epoch": 0.15302622224298, "grad_norm": 2.3529739379882812, "learning_rate": 9.608387734294806e-06, "loss": 0.4722, "step": 3686 }, { "epoch": 0.15306773776719132, "grad_norm": 3.7144088745117188, "learning_rate": 9.608126863513889e-06, "loss": 0.5826, "step": 3687 }, { "epoch": 0.15310925329140265, "grad_norm": 3.1807479858398438, "learning_rate": 9.607865909416708e-06, "loss": 0.5153, "step": 3688 }, { "epoch": 0.15315076881561399, "grad_norm": 4.034715175628662, "learning_rate": 9.607604872007985e-06, "loss": 0.5376, "step": 3689 }, { "epoch": 0.15319228433982532, "grad_norm": 2.748600482940674, "learning_rate": 9.607343751292437e-06, "loss": 0.5919, "step": 3690 }, { "epoch": 0.15323379986403665, "grad_norm": 2.3478896617889404, "learning_rate": 9.607082547274784e-06, "loss": 0.5247, "step": 3691 }, { "epoch": 0.15327531538824798, "grad_norm": 2.361583709716797, "learning_rate": 9.606821259959753e-06, "loss": 0.6976, "step": 3692 }, { "epoch": 0.15331683091245932, "grad_norm": 2.7311906814575195, "learning_rate": 9.606559889352065e-06, "loss": 0.4583, "step": 3693 }, { "epoch": 0.15335834643667065, "grad_norm": 2.9733893871307373, "learning_rate": 9.606298435456448e-06, "loss": 0.6259, "step": 3694 }, { "epoch": 0.15339986196088198, "grad_norm": 2.517557144165039, "learning_rate": 9.606036898277627e-06, "loss": 0.4954, "step": 3695 }, { "epoch": 0.15344137748509334, "grad_norm": 3.089777708053589, "learning_rate": 9.605775277820329e-06, "loss": 0.4501, "step": 3696 }, { "epoch": 0.15348289300930468, "grad_norm": 2.959261417388916, "learning_rate": 9.60551357408929e-06, "loss": 0.7108, "step": 3697 }, { "epoch": 0.153524408533516, "grad_norm": 2.683973550796509, "learning_rate": 9.605251787089234e-06, "loss": 0.6785, "step": 3698 }, { "epoch": 0.15356592405772734, "grad_norm": 3.0025393962860107, "learning_rate": 9.6049899168249e-06, "loss": 0.3807, "step": 3699 }, { "epoch": 0.15360743958193868, "grad_norm": 2.629260301589966, "learning_rate": 9.604727963301023e-06, "loss": 0.4682, "step": 3700 }, { "epoch": 0.15364895510615, "grad_norm": 2.524367570877075, "learning_rate": 9.604465926522334e-06, "loss": 0.4622, "step": 3701 }, { "epoch": 0.15369047063036134, "grad_norm": 2.4544918537139893, "learning_rate": 9.604203806493578e-06, "loss": 0.5956, "step": 3702 }, { "epoch": 0.15373198615457268, "grad_norm": 2.6909260749816895, "learning_rate": 9.603941603219487e-06, "loss": 0.5, "step": 3703 }, { "epoch": 0.153773501678784, "grad_norm": 2.3503847122192383, "learning_rate": 9.603679316704806e-06, "loss": 0.5274, "step": 3704 }, { "epoch": 0.15381501720299534, "grad_norm": 2.831573724746704, "learning_rate": 9.603416946954275e-06, "loss": 0.6805, "step": 3705 }, { "epoch": 0.15385653272720667, "grad_norm": 2.698312759399414, "learning_rate": 9.60315449397264e-06, "loss": 0.5095, "step": 3706 }, { "epoch": 0.153898048251418, "grad_norm": 3.049985647201538, "learning_rate": 9.602891957764644e-06, "loss": 0.5712, "step": 3707 }, { "epoch": 0.15393956377562934, "grad_norm": 2.8208959102630615, "learning_rate": 9.602629338335035e-06, "loss": 0.4979, "step": 3708 }, { "epoch": 0.15398107929984067, "grad_norm": 2.6227738857269287, "learning_rate": 9.60236663568856e-06, "loss": 0.5571, "step": 3709 }, { "epoch": 0.154022594824052, "grad_norm": 2.687983989715576, "learning_rate": 9.602103849829971e-06, "loss": 0.5832, "step": 3710 }, { "epoch": 0.15406411034826337, "grad_norm": 2.356929063796997, "learning_rate": 9.601840980764016e-06, "loss": 0.6357, "step": 3711 }, { "epoch": 0.1541056258724747, "grad_norm": 2.386772632598877, "learning_rate": 9.601578028495453e-06, "loss": 0.6365, "step": 3712 }, { "epoch": 0.15414714139668603, "grad_norm": 3.1594889163970947, "learning_rate": 9.60131499302903e-06, "loss": 0.6994, "step": 3713 }, { "epoch": 0.15418865692089737, "grad_norm": 3.3170039653778076, "learning_rate": 9.601051874369507e-06, "loss": 0.4209, "step": 3714 }, { "epoch": 0.1542301724451087, "grad_norm": 2.6820995807647705, "learning_rate": 9.600788672521641e-06, "loss": 0.4631, "step": 3715 }, { "epoch": 0.15427168796932003, "grad_norm": 3.3696682453155518, "learning_rate": 9.600525387490188e-06, "loss": 0.6426, "step": 3716 }, { "epoch": 0.15431320349353136, "grad_norm": 2.520297050476074, "learning_rate": 9.60026201927991e-06, "loss": 0.4563, "step": 3717 }, { "epoch": 0.1543547190177427, "grad_norm": 2.774360179901123, "learning_rate": 9.59999856789557e-06, "loss": 0.5102, "step": 3718 }, { "epoch": 0.15439623454195403, "grad_norm": 2.7875475883483887, "learning_rate": 9.599735033341929e-06, "loss": 0.4468, "step": 3719 }, { "epoch": 0.15443775006616536, "grad_norm": 3.0101029872894287, "learning_rate": 9.599471415623754e-06, "loss": 0.562, "step": 3720 }, { "epoch": 0.1544792655903767, "grad_norm": 2.7025246620178223, "learning_rate": 9.599207714745808e-06, "loss": 0.5554, "step": 3721 }, { "epoch": 0.15452078111458803, "grad_norm": 2.857987642288208, "learning_rate": 9.598943930712862e-06, "loss": 0.4149, "step": 3722 }, { "epoch": 0.15456229663879936, "grad_norm": 3.0299055576324463, "learning_rate": 9.598680063529684e-06, "loss": 0.6023, "step": 3723 }, { "epoch": 0.1546038121630107, "grad_norm": 2.779658555984497, "learning_rate": 9.598416113201046e-06, "loss": 0.4528, "step": 3724 }, { "epoch": 0.15464532768722203, "grad_norm": 2.220757246017456, "learning_rate": 9.598152079731717e-06, "loss": 0.4895, "step": 3725 }, { "epoch": 0.1546868432114334, "grad_norm": 2.590442180633545, "learning_rate": 9.597887963126476e-06, "loss": 0.5521, "step": 3726 }, { "epoch": 0.15472835873564472, "grad_norm": 2.2868359088897705, "learning_rate": 9.597623763390094e-06, "loss": 0.4588, "step": 3727 }, { "epoch": 0.15476987425985606, "grad_norm": 2.621264934539795, "learning_rate": 9.597359480527348e-06, "loss": 0.5821, "step": 3728 }, { "epoch": 0.1548113897840674, "grad_norm": 2.5718724727630615, "learning_rate": 9.597095114543018e-06, "loss": 0.6543, "step": 3729 }, { "epoch": 0.15485290530827872, "grad_norm": 2.445737838745117, "learning_rate": 9.596830665441885e-06, "loss": 0.5887, "step": 3730 }, { "epoch": 0.15489442083249005, "grad_norm": 2.526911497116089, "learning_rate": 9.596566133228726e-06, "loss": 0.583, "step": 3731 }, { "epoch": 0.1549359363567014, "grad_norm": 2.5461325645446777, "learning_rate": 9.596301517908329e-06, "loss": 0.6679, "step": 3732 }, { "epoch": 0.15497745188091272, "grad_norm": 2.440088987350464, "learning_rate": 9.596036819485475e-06, "loss": 0.403, "step": 3733 }, { "epoch": 0.15501896740512405, "grad_norm": 2.8544600009918213, "learning_rate": 9.59577203796495e-06, "loss": 0.6306, "step": 3734 }, { "epoch": 0.15506048292933539, "grad_norm": 2.340078592300415, "learning_rate": 9.595507173351541e-06, "loss": 0.5356, "step": 3735 }, { "epoch": 0.15510199845354672, "grad_norm": 2.486889600753784, "learning_rate": 9.59524222565004e-06, "loss": 0.4235, "step": 3736 }, { "epoch": 0.15514351397775805, "grad_norm": 2.1390318870544434, "learning_rate": 9.594977194865235e-06, "loss": 0.4486, "step": 3737 }, { "epoch": 0.15518502950196938, "grad_norm": 3.325198173522949, "learning_rate": 9.594712081001916e-06, "loss": 0.4961, "step": 3738 }, { "epoch": 0.15522654502618072, "grad_norm": 2.506134271621704, "learning_rate": 9.594446884064879e-06, "loss": 0.4381, "step": 3739 }, { "epoch": 0.15526806055039205, "grad_norm": 2.829902172088623, "learning_rate": 9.594181604058919e-06, "loss": 0.5838, "step": 3740 }, { "epoch": 0.15530957607460338, "grad_norm": 2.3544747829437256, "learning_rate": 9.593916240988829e-06, "loss": 0.4816, "step": 3741 }, { "epoch": 0.15535109159881474, "grad_norm": 2.71328067779541, "learning_rate": 9.593650794859412e-06, "loss": 0.6788, "step": 3742 }, { "epoch": 0.15539260712302608, "grad_norm": 2.060661792755127, "learning_rate": 9.593385265675463e-06, "loss": 0.4995, "step": 3743 }, { "epoch": 0.1554341226472374, "grad_norm": 2.440178155899048, "learning_rate": 9.593119653441785e-06, "loss": 0.4376, "step": 3744 }, { "epoch": 0.15547563817144874, "grad_norm": 2.5731120109558105, "learning_rate": 9.59285395816318e-06, "loss": 0.5434, "step": 3745 }, { "epoch": 0.15551715369566008, "grad_norm": 2.5616095066070557, "learning_rate": 9.59258817984445e-06, "loss": 0.5404, "step": 3746 }, { "epoch": 0.1555586692198714, "grad_norm": 2.4548516273498535, "learning_rate": 9.592322318490404e-06, "loss": 0.4774, "step": 3747 }, { "epoch": 0.15560018474408274, "grad_norm": 2.471740484237671, "learning_rate": 9.592056374105846e-06, "loss": 0.5678, "step": 3748 }, { "epoch": 0.15564170026829408, "grad_norm": 2.6998422145843506, "learning_rate": 9.591790346695586e-06, "loss": 0.5304, "step": 3749 }, { "epoch": 0.1556832157925054, "grad_norm": 2.901247262954712, "learning_rate": 9.591524236264432e-06, "loss": 0.7445, "step": 3750 }, { "epoch": 0.15572473131671674, "grad_norm": 3.096705675125122, "learning_rate": 9.591258042817196e-06, "loss": 0.5784, "step": 3751 }, { "epoch": 0.15576624684092807, "grad_norm": 2.38613224029541, "learning_rate": 9.590991766358692e-06, "loss": 0.5621, "step": 3752 }, { "epoch": 0.1558077623651394, "grad_norm": 2.2348790168762207, "learning_rate": 9.590725406893735e-06, "loss": 0.4117, "step": 3753 }, { "epoch": 0.15584927788935074, "grad_norm": 2.6904659271240234, "learning_rate": 9.590458964427136e-06, "loss": 0.4591, "step": 3754 }, { "epoch": 0.15589079341356207, "grad_norm": 2.4428625106811523, "learning_rate": 9.590192438963718e-06, "loss": 0.5596, "step": 3755 }, { "epoch": 0.1559323089377734, "grad_norm": 2.896289587020874, "learning_rate": 9.589925830508298e-06, "loss": 0.5111, "step": 3756 }, { "epoch": 0.15597382446198477, "grad_norm": 3.029738664627075, "learning_rate": 9.589659139065695e-06, "loss": 0.5724, "step": 3757 }, { "epoch": 0.1560153399861961, "grad_norm": 2.033622980117798, "learning_rate": 9.589392364640733e-06, "loss": 0.4507, "step": 3758 }, { "epoch": 0.15605685551040743, "grad_norm": 2.127304792404175, "learning_rate": 9.589125507238234e-06, "loss": 0.582, "step": 3759 }, { "epoch": 0.15609837103461877, "grad_norm": 2.752645492553711, "learning_rate": 9.588858566863021e-06, "loss": 0.5883, "step": 3760 }, { "epoch": 0.1561398865588301, "grad_norm": 2.860393524169922, "learning_rate": 9.588591543519924e-06, "loss": 0.4539, "step": 3761 }, { "epoch": 0.15618140208304143, "grad_norm": 2.5629727840423584, "learning_rate": 9.588324437213772e-06, "loss": 0.52, "step": 3762 }, { "epoch": 0.15622291760725276, "grad_norm": 2.13258695602417, "learning_rate": 9.58805724794939e-06, "loss": 0.3422, "step": 3763 }, { "epoch": 0.1562644331314641, "grad_norm": 2.6253528594970703, "learning_rate": 9.587789975731609e-06, "loss": 0.525, "step": 3764 }, { "epoch": 0.15630594865567543, "grad_norm": 2.8431005477905273, "learning_rate": 9.587522620565263e-06, "loss": 0.6402, "step": 3765 }, { "epoch": 0.15634746417988676, "grad_norm": 2.196855068206787, "learning_rate": 9.587255182455187e-06, "loss": 0.4955, "step": 3766 }, { "epoch": 0.1563889797040981, "grad_norm": 2.4354896545410156, "learning_rate": 9.586987661406213e-06, "loss": 0.4419, "step": 3767 }, { "epoch": 0.15643049522830943, "grad_norm": 2.784252405166626, "learning_rate": 9.586720057423183e-06, "loss": 0.4765, "step": 3768 }, { "epoch": 0.15647201075252076, "grad_norm": 2.370663642883301, "learning_rate": 9.58645237051093e-06, "loss": 0.492, "step": 3769 }, { "epoch": 0.1565135262767321, "grad_norm": 2.814042568206787, "learning_rate": 9.586184600674298e-06, "loss": 0.5129, "step": 3770 }, { "epoch": 0.15655504180094343, "grad_norm": 3.1338307857513428, "learning_rate": 9.585916747918125e-06, "loss": 0.5356, "step": 3771 }, { "epoch": 0.15659655732515476, "grad_norm": 2.8414809703826904, "learning_rate": 9.585648812247255e-06, "loss": 0.4837, "step": 3772 }, { "epoch": 0.15663807284936612, "grad_norm": 2.566957950592041, "learning_rate": 9.585380793666534e-06, "loss": 0.4432, "step": 3773 }, { "epoch": 0.15667958837357746, "grad_norm": 2.586644172668457, "learning_rate": 9.585112692180807e-06, "loss": 0.504, "step": 3774 }, { "epoch": 0.1567211038977888, "grad_norm": 3.071666717529297, "learning_rate": 9.584844507794919e-06, "loss": 0.5795, "step": 3775 }, { "epoch": 0.15676261942200012, "grad_norm": 2.479966163635254, "learning_rate": 9.584576240513724e-06, "loss": 0.4321, "step": 3776 }, { "epoch": 0.15680413494621145, "grad_norm": 2.6733016967773438, "learning_rate": 9.584307890342065e-06, "loss": 0.535, "step": 3777 }, { "epoch": 0.1568456504704228, "grad_norm": 2.395503282546997, "learning_rate": 9.584039457284802e-06, "loss": 0.5126, "step": 3778 }, { "epoch": 0.15688716599463412, "grad_norm": 2.2668604850769043, "learning_rate": 9.583770941346782e-06, "loss": 0.4305, "step": 3779 }, { "epoch": 0.15692868151884545, "grad_norm": 2.5123484134674072, "learning_rate": 9.583502342532863e-06, "loss": 0.5758, "step": 3780 }, { "epoch": 0.15697019704305679, "grad_norm": 2.8488426208496094, "learning_rate": 9.5832336608479e-06, "loss": 0.586, "step": 3781 }, { "epoch": 0.15701171256726812, "grad_norm": 2.5432515144348145, "learning_rate": 9.582964896296752e-06, "loss": 0.5251, "step": 3782 }, { "epoch": 0.15705322809147945, "grad_norm": 2.3905141353607178, "learning_rate": 9.582696048884277e-06, "loss": 0.4541, "step": 3783 }, { "epoch": 0.15709474361569079, "grad_norm": 3.0728771686553955, "learning_rate": 9.582427118615337e-06, "loss": 0.5981, "step": 3784 }, { "epoch": 0.15713625913990212, "grad_norm": 2.8229830265045166, "learning_rate": 9.582158105494794e-06, "loss": 0.5631, "step": 3785 }, { "epoch": 0.15717777466411345, "grad_norm": 2.646562099456787, "learning_rate": 9.58188900952751e-06, "loss": 0.4753, "step": 3786 }, { "epoch": 0.15721929018832478, "grad_norm": 2.977102041244507, "learning_rate": 9.581619830718354e-06, "loss": 0.461, "step": 3787 }, { "epoch": 0.15726080571253614, "grad_norm": 2.6498639583587646, "learning_rate": 9.581350569072192e-06, "loss": 0.5755, "step": 3788 }, { "epoch": 0.15730232123674748, "grad_norm": 2.737575054168701, "learning_rate": 9.58108122459389e-06, "loss": 0.5643, "step": 3789 }, { "epoch": 0.1573438367609588, "grad_norm": 2.643763303756714, "learning_rate": 9.580811797288317e-06, "loss": 0.4789, "step": 3790 }, { "epoch": 0.15738535228517014, "grad_norm": 2.856658458709717, "learning_rate": 9.580542287160348e-06, "loss": 0.5196, "step": 3791 }, { "epoch": 0.15742686780938148, "grad_norm": 2.8260743618011475, "learning_rate": 9.580272694214855e-06, "loss": 0.6042, "step": 3792 }, { "epoch": 0.1574683833335928, "grad_norm": 2.5216782093048096, "learning_rate": 9.580003018456712e-06, "loss": 0.5759, "step": 3793 }, { "epoch": 0.15750989885780414, "grad_norm": 2.981790542602539, "learning_rate": 9.579733259890792e-06, "loss": 0.599, "step": 3794 }, { "epoch": 0.15755141438201548, "grad_norm": 2.2889363765716553, "learning_rate": 9.579463418521975e-06, "loss": 0.4844, "step": 3795 }, { "epoch": 0.1575929299062268, "grad_norm": 2.562615394592285, "learning_rate": 9.57919349435514e-06, "loss": 0.5261, "step": 3796 }, { "epoch": 0.15763444543043814, "grad_norm": 3.358858346939087, "learning_rate": 9.578923487395169e-06, "loss": 0.6324, "step": 3797 }, { "epoch": 0.15767596095464947, "grad_norm": 2.520583391189575, "learning_rate": 9.57865339764694e-06, "loss": 0.4078, "step": 3798 }, { "epoch": 0.1577174764788608, "grad_norm": 2.0926263332366943, "learning_rate": 9.578383225115336e-06, "loss": 0.4047, "step": 3799 }, { "epoch": 0.15775899200307214, "grad_norm": 2.625554084777832, "learning_rate": 9.578112969805245e-06, "loss": 0.5111, "step": 3800 }, { "epoch": 0.15780050752728347, "grad_norm": 2.6450259685516357, "learning_rate": 9.577842631721553e-06, "loss": 0.4711, "step": 3801 }, { "epoch": 0.1578420230514948, "grad_norm": 2.564521074295044, "learning_rate": 9.577572210869147e-06, "loss": 0.5374, "step": 3802 }, { "epoch": 0.15788353857570614, "grad_norm": 2.414891481399536, "learning_rate": 9.577301707252912e-06, "loss": 0.4419, "step": 3803 }, { "epoch": 0.1579250540999175, "grad_norm": 3.2129054069519043, "learning_rate": 9.577031120877747e-06, "loss": 0.5491, "step": 3804 }, { "epoch": 0.15796656962412883, "grad_norm": 3.080777883529663, "learning_rate": 9.57676045174854e-06, "loss": 0.6618, "step": 3805 }, { "epoch": 0.15800808514834017, "grad_norm": 2.3826401233673096, "learning_rate": 9.576489699870182e-06, "loss": 0.4804, "step": 3806 }, { "epoch": 0.1580496006725515, "grad_norm": 2.5723793506622314, "learning_rate": 9.576218865247573e-06, "loss": 0.5193, "step": 3807 }, { "epoch": 0.15809111619676283, "grad_norm": 2.282857894897461, "learning_rate": 9.575947947885607e-06, "loss": 0.5103, "step": 3808 }, { "epoch": 0.15813263172097416, "grad_norm": 2.5080668926239014, "learning_rate": 9.575676947789185e-06, "loss": 0.5813, "step": 3809 }, { "epoch": 0.1581741472451855, "grad_norm": 2.9771971702575684, "learning_rate": 9.575405864963203e-06, "loss": 0.5583, "step": 3810 }, { "epoch": 0.15821566276939683, "grad_norm": 2.5889687538146973, "learning_rate": 9.575134699412564e-06, "loss": 0.4972, "step": 3811 }, { "epoch": 0.15825717829360816, "grad_norm": 2.6562671661376953, "learning_rate": 9.574863451142173e-06, "loss": 0.6054, "step": 3812 }, { "epoch": 0.1582986938178195, "grad_norm": 2.3232204914093018, "learning_rate": 9.57459212015693e-06, "loss": 0.5325, "step": 3813 }, { "epoch": 0.15834020934203083, "grad_norm": 2.7405190467834473, "learning_rate": 9.574320706461747e-06, "loss": 0.5291, "step": 3814 }, { "epoch": 0.15838172486624216, "grad_norm": 2.486145257949829, "learning_rate": 9.574049210061522e-06, "loss": 0.5074, "step": 3815 }, { "epoch": 0.1584232403904535, "grad_norm": 2.486579656600952, "learning_rate": 9.573777630961172e-06, "loss": 0.491, "step": 3816 }, { "epoch": 0.15846475591466483, "grad_norm": 2.5774409770965576, "learning_rate": 9.573505969165602e-06, "loss": 0.5308, "step": 3817 }, { "epoch": 0.15850627143887616, "grad_norm": 2.4412364959716797, "learning_rate": 9.573234224679728e-06, "loss": 0.5096, "step": 3818 }, { "epoch": 0.15854778696308752, "grad_norm": 2.4162116050720215, "learning_rate": 9.57296239750846e-06, "loss": 0.427, "step": 3819 }, { "epoch": 0.15858930248729886, "grad_norm": 2.8232717514038086, "learning_rate": 9.572690487656714e-06, "loss": 0.6221, "step": 3820 }, { "epoch": 0.1586308180115102, "grad_norm": 2.861863613128662, "learning_rate": 9.572418495129405e-06, "loss": 0.4987, "step": 3821 }, { "epoch": 0.15867233353572152, "grad_norm": 2.4677648544311523, "learning_rate": 9.572146419931454e-06, "loss": 0.5671, "step": 3822 }, { "epoch": 0.15871384905993285, "grad_norm": 2.2475364208221436, "learning_rate": 9.571874262067777e-06, "loss": 0.492, "step": 3823 }, { "epoch": 0.1587553645841442, "grad_norm": 2.565896987915039, "learning_rate": 9.571602021543294e-06, "loss": 0.5994, "step": 3824 }, { "epoch": 0.15879688010835552, "grad_norm": 2.543360948562622, "learning_rate": 9.571329698362931e-06, "loss": 0.5929, "step": 3825 }, { "epoch": 0.15883839563256685, "grad_norm": 2.590268135070801, "learning_rate": 9.571057292531607e-06, "loss": 0.4978, "step": 3826 }, { "epoch": 0.1588799111567782, "grad_norm": 2.101166009902954, "learning_rate": 9.570784804054251e-06, "loss": 0.5194, "step": 3827 }, { "epoch": 0.15892142668098952, "grad_norm": 2.4899590015411377, "learning_rate": 9.570512232935789e-06, "loss": 0.3955, "step": 3828 }, { "epoch": 0.15896294220520085, "grad_norm": 2.3759765625, "learning_rate": 9.570239579181146e-06, "loss": 0.4637, "step": 3829 }, { "epoch": 0.15900445772941219, "grad_norm": 3.211034059524536, "learning_rate": 9.569966842795257e-06, "loss": 0.5714, "step": 3830 }, { "epoch": 0.15904597325362352, "grad_norm": 2.556060314178467, "learning_rate": 9.56969402378305e-06, "loss": 0.5522, "step": 3831 }, { "epoch": 0.15908748877783485, "grad_norm": 2.5116147994995117, "learning_rate": 9.569421122149455e-06, "loss": 0.5921, "step": 3832 }, { "epoch": 0.15912900430204618, "grad_norm": 3.320439100265503, "learning_rate": 9.569148137899413e-06, "loss": 0.6012, "step": 3833 }, { "epoch": 0.15917051982625754, "grad_norm": 2.2738265991210938, "learning_rate": 9.568875071037854e-06, "loss": 0.6119, "step": 3834 }, { "epoch": 0.15921203535046888, "grad_norm": 1.9263427257537842, "learning_rate": 9.568601921569715e-06, "loss": 0.5214, "step": 3835 }, { "epoch": 0.1592535508746802, "grad_norm": 2.679854154586792, "learning_rate": 9.56832868949994e-06, "loss": 0.4788, "step": 3836 }, { "epoch": 0.15929506639889154, "grad_norm": 2.390976905822754, "learning_rate": 9.568055374833463e-06, "loss": 0.5594, "step": 3837 }, { "epoch": 0.15933658192310288, "grad_norm": 2.8781442642211914, "learning_rate": 9.567781977575227e-06, "loss": 0.6766, "step": 3838 }, { "epoch": 0.1593780974473142, "grad_norm": 3.150813341140747, "learning_rate": 9.56750849773018e-06, "loss": 0.4544, "step": 3839 }, { "epoch": 0.15941961297152554, "grad_norm": 2.908214569091797, "learning_rate": 9.56723493530326e-06, "loss": 0.5584, "step": 3840 }, { "epoch": 0.15946112849573688, "grad_norm": 2.9180684089660645, "learning_rate": 9.566961290299418e-06, "loss": 0.5345, "step": 3841 }, { "epoch": 0.1595026440199482, "grad_norm": 2.3735251426696777, "learning_rate": 9.566687562723598e-06, "loss": 0.4467, "step": 3842 }, { "epoch": 0.15954415954415954, "grad_norm": 2.675692319869995, "learning_rate": 9.56641375258075e-06, "loss": 0.5858, "step": 3843 }, { "epoch": 0.15958567506837087, "grad_norm": 2.4168643951416016, "learning_rate": 9.566139859875827e-06, "loss": 0.5383, "step": 3844 }, { "epoch": 0.1596271905925822, "grad_norm": 2.3718490600585938, "learning_rate": 9.565865884613778e-06, "loss": 0.5625, "step": 3845 }, { "epoch": 0.15966870611679354, "grad_norm": 2.4469480514526367, "learning_rate": 9.565591826799559e-06, "loss": 0.5106, "step": 3846 }, { "epoch": 0.15971022164100487, "grad_norm": 2.6363015174865723, "learning_rate": 9.565317686438122e-06, "loss": 0.5501, "step": 3847 }, { "epoch": 0.1597517371652162, "grad_norm": 2.357933282852173, "learning_rate": 9.565043463534427e-06, "loss": 0.6048, "step": 3848 }, { "epoch": 0.15979325268942754, "grad_norm": 2.680687665939331, "learning_rate": 9.56476915809343e-06, "loss": 0.4974, "step": 3849 }, { "epoch": 0.1598347682136389, "grad_norm": 2.3627419471740723, "learning_rate": 9.564494770120089e-06, "loss": 0.3507, "step": 3850 }, { "epoch": 0.15987628373785023, "grad_norm": 2.5207467079162598, "learning_rate": 9.564220299619369e-06, "loss": 0.5059, "step": 3851 }, { "epoch": 0.15991779926206157, "grad_norm": 2.6653709411621094, "learning_rate": 9.56394574659623e-06, "loss": 0.5174, "step": 3852 }, { "epoch": 0.1599593147862729, "grad_norm": 2.818237543106079, "learning_rate": 9.563671111055637e-06, "loss": 0.5344, "step": 3853 }, { "epoch": 0.16000083031048423, "grad_norm": 3.2162463665008545, "learning_rate": 9.563396393002555e-06, "loss": 0.6983, "step": 3854 }, { "epoch": 0.16004234583469557, "grad_norm": 2.8600189685821533, "learning_rate": 9.563121592441949e-06, "loss": 0.4847, "step": 3855 }, { "epoch": 0.1600838613589069, "grad_norm": 2.597118377685547, "learning_rate": 9.56284670937879e-06, "loss": 0.5182, "step": 3856 }, { "epoch": 0.16012537688311823, "grad_norm": 3.1440484523773193, "learning_rate": 9.56257174381805e-06, "loss": 0.5717, "step": 3857 }, { "epoch": 0.16016689240732956, "grad_norm": 2.0525567531585693, "learning_rate": 9.562296695764695e-06, "loss": 0.5254, "step": 3858 }, { "epoch": 0.1602084079315409, "grad_norm": 2.557032823562622, "learning_rate": 9.562021565223702e-06, "loss": 0.5384, "step": 3859 }, { "epoch": 0.16024992345575223, "grad_norm": 2.6959447860717773, "learning_rate": 9.561746352200043e-06, "loss": 0.4904, "step": 3860 }, { "epoch": 0.16029143897996356, "grad_norm": 2.313628911972046, "learning_rate": 9.561471056698697e-06, "loss": 0.5196, "step": 3861 }, { "epoch": 0.1603329545041749, "grad_norm": 2.6304993629455566, "learning_rate": 9.561195678724638e-06, "loss": 0.546, "step": 3862 }, { "epoch": 0.16037447002838623, "grad_norm": 2.8088533878326416, "learning_rate": 9.560920218282847e-06, "loss": 0.6588, "step": 3863 }, { "epoch": 0.16041598555259756, "grad_norm": 2.511990785598755, "learning_rate": 9.560644675378303e-06, "loss": 0.4533, "step": 3864 }, { "epoch": 0.16045750107680892, "grad_norm": 2.7870497703552246, "learning_rate": 9.560369050015989e-06, "loss": 0.3721, "step": 3865 }, { "epoch": 0.16049901660102026, "grad_norm": 2.2597546577453613, "learning_rate": 9.560093342200888e-06, "loss": 0.3471, "step": 3866 }, { "epoch": 0.1605405321252316, "grad_norm": 3.3380110263824463, "learning_rate": 9.559817551937986e-06, "loss": 0.5975, "step": 3867 }, { "epoch": 0.16058204764944292, "grad_norm": 2.573538064956665, "learning_rate": 9.559541679232269e-06, "loss": 0.6132, "step": 3868 }, { "epoch": 0.16062356317365425, "grad_norm": 2.5831210613250732, "learning_rate": 9.559265724088723e-06, "loss": 0.574, "step": 3869 }, { "epoch": 0.1606650786978656, "grad_norm": 2.2299818992614746, "learning_rate": 9.55898968651234e-06, "loss": 0.6445, "step": 3870 }, { "epoch": 0.16070659422207692, "grad_norm": 2.3902835845947266, "learning_rate": 9.558713566508106e-06, "loss": 0.5612, "step": 3871 }, { "epoch": 0.16074810974628825, "grad_norm": 2.8891794681549072, "learning_rate": 9.55843736408102e-06, "loss": 0.4722, "step": 3872 }, { "epoch": 0.1607896252704996, "grad_norm": 2.2763965129852295, "learning_rate": 9.558161079236073e-06, "loss": 0.5432, "step": 3873 }, { "epoch": 0.16083114079471092, "grad_norm": 2.8955938816070557, "learning_rate": 9.557884711978257e-06, "loss": 0.6379, "step": 3874 }, { "epoch": 0.16087265631892225, "grad_norm": 2.6343886852264404, "learning_rate": 9.557608262312573e-06, "loss": 0.5613, "step": 3875 }, { "epoch": 0.16091417184313359, "grad_norm": 2.2168166637420654, "learning_rate": 9.557331730244018e-06, "loss": 0.4789, "step": 3876 }, { "epoch": 0.16095568736734492, "grad_norm": 2.8560233116149902, "learning_rate": 9.557055115777592e-06, "loss": 0.6223, "step": 3877 }, { "epoch": 0.16099720289155625, "grad_norm": 2.6631736755371094, "learning_rate": 9.556778418918296e-06, "loss": 0.6278, "step": 3878 }, { "epoch": 0.16103871841576758, "grad_norm": 2.3445730209350586, "learning_rate": 9.556501639671133e-06, "loss": 0.5851, "step": 3879 }, { "epoch": 0.16108023393997892, "grad_norm": 2.3085365295410156, "learning_rate": 9.556224778041106e-06, "loss": 0.4689, "step": 3880 }, { "epoch": 0.16112174946419028, "grad_norm": 3.0606353282928467, "learning_rate": 9.555947834033224e-06, "loss": 0.4506, "step": 3881 }, { "epoch": 0.1611632649884016, "grad_norm": 2.48706316947937, "learning_rate": 9.55567080765249e-06, "loss": 0.5104, "step": 3882 }, { "epoch": 0.16120478051261294, "grad_norm": 2.3055715560913086, "learning_rate": 9.555393698903914e-06, "loss": 0.4133, "step": 3883 }, { "epoch": 0.16124629603682428, "grad_norm": 2.702223777770996, "learning_rate": 9.555116507792508e-06, "loss": 0.6033, "step": 3884 }, { "epoch": 0.1612878115610356, "grad_norm": 2.3547561168670654, "learning_rate": 9.554839234323283e-06, "loss": 0.4396, "step": 3885 }, { "epoch": 0.16132932708524694, "grad_norm": 2.546149730682373, "learning_rate": 9.554561878501251e-06, "loss": 0.4383, "step": 3886 }, { "epoch": 0.16137084260945828, "grad_norm": 2.906970739364624, "learning_rate": 9.55428444033143e-06, "loss": 0.6918, "step": 3887 }, { "epoch": 0.1614123581336696, "grad_norm": 2.3742353916168213, "learning_rate": 9.55400691981883e-06, "loss": 0.5943, "step": 3888 }, { "epoch": 0.16145387365788094, "grad_norm": 2.28623104095459, "learning_rate": 9.553729316968474e-06, "loss": 0.5605, "step": 3889 }, { "epoch": 0.16149538918209227, "grad_norm": 2.6081647872924805, "learning_rate": 9.55345163178538e-06, "loss": 0.5569, "step": 3890 }, { "epoch": 0.1615369047063036, "grad_norm": 2.207791805267334, "learning_rate": 9.553173864274567e-06, "loss": 0.4558, "step": 3891 }, { "epoch": 0.16157842023051494, "grad_norm": 2.7803542613983154, "learning_rate": 9.552896014441058e-06, "loss": 0.5693, "step": 3892 }, { "epoch": 0.16161993575472627, "grad_norm": 2.3541207313537598, "learning_rate": 9.552618082289878e-06, "loss": 0.5063, "step": 3893 }, { "epoch": 0.1616614512789376, "grad_norm": 3.2028775215148926, "learning_rate": 9.55234006782605e-06, "loss": 0.529, "step": 3894 }, { "epoch": 0.16170296680314894, "grad_norm": 2.3095240592956543, "learning_rate": 9.552061971054601e-06, "loss": 0.5762, "step": 3895 }, { "epoch": 0.1617444823273603, "grad_norm": 2.685072422027588, "learning_rate": 9.55178379198056e-06, "loss": 0.5103, "step": 3896 }, { "epoch": 0.16178599785157163, "grad_norm": 3.013227939605713, "learning_rate": 9.551505530608957e-06, "loss": 0.4417, "step": 3897 }, { "epoch": 0.16182751337578297, "grad_norm": 2.5967860221862793, "learning_rate": 9.55122718694482e-06, "loss": 0.6237, "step": 3898 }, { "epoch": 0.1618690288999943, "grad_norm": 2.4371864795684814, "learning_rate": 9.550948760993187e-06, "loss": 0.4182, "step": 3899 }, { "epoch": 0.16191054442420563, "grad_norm": 2.731962203979492, "learning_rate": 9.550670252759086e-06, "loss": 0.5717, "step": 3900 }, { "epoch": 0.16195205994841697, "grad_norm": 2.6514580249786377, "learning_rate": 9.550391662247556e-06, "loss": 0.5628, "step": 3901 }, { "epoch": 0.1619935754726283, "grad_norm": 2.4503800868988037, "learning_rate": 9.550112989463633e-06, "loss": 0.591, "step": 3902 }, { "epoch": 0.16203509099683963, "grad_norm": 2.6553592681884766, "learning_rate": 9.549834234412356e-06, "loss": 0.4438, "step": 3903 }, { "epoch": 0.16207660652105096, "grad_norm": 2.223031997680664, "learning_rate": 9.549555397098764e-06, "loss": 0.4514, "step": 3904 }, { "epoch": 0.1621181220452623, "grad_norm": 2.3840866088867188, "learning_rate": 9.549276477527898e-06, "loss": 0.5882, "step": 3905 }, { "epoch": 0.16215963756947363, "grad_norm": 2.613232135772705, "learning_rate": 9.548997475704804e-06, "loss": 0.5383, "step": 3906 }, { "epoch": 0.16220115309368496, "grad_norm": 2.5516934394836426, "learning_rate": 9.548718391634524e-06, "loss": 0.4882, "step": 3907 }, { "epoch": 0.1622426686178963, "grad_norm": 2.706617832183838, "learning_rate": 9.548439225322104e-06, "loss": 0.4048, "step": 3908 }, { "epoch": 0.16228418414210763, "grad_norm": 2.6967551708221436, "learning_rate": 9.548159976772593e-06, "loss": 0.3798, "step": 3909 }, { "epoch": 0.16232569966631896, "grad_norm": 2.956726551055908, "learning_rate": 9.547880645991038e-06, "loss": 0.5469, "step": 3910 }, { "epoch": 0.1623672151905303, "grad_norm": 2.443180799484253, "learning_rate": 9.547601232982488e-06, "loss": 0.4142, "step": 3911 }, { "epoch": 0.16240873071474166, "grad_norm": 2.6885135173797607, "learning_rate": 9.547321737752e-06, "loss": 0.5281, "step": 3912 }, { "epoch": 0.162450246238953, "grad_norm": 2.7040677070617676, "learning_rate": 9.547042160304624e-06, "loss": 0.6374, "step": 3913 }, { "epoch": 0.16249176176316432, "grad_norm": 2.865875005722046, "learning_rate": 9.546762500645414e-06, "loss": 0.6663, "step": 3914 }, { "epoch": 0.16253327728737565, "grad_norm": 2.274768829345703, "learning_rate": 9.546482758779427e-06, "loss": 0.665, "step": 3915 }, { "epoch": 0.162574792811587, "grad_norm": 3.242323875427246, "learning_rate": 9.546202934711722e-06, "loss": 0.5291, "step": 3916 }, { "epoch": 0.16261630833579832, "grad_norm": 2.3266022205352783, "learning_rate": 9.545923028447358e-06, "loss": 0.5516, "step": 3917 }, { "epoch": 0.16265782386000965, "grad_norm": 2.9167444705963135, "learning_rate": 9.545643039991395e-06, "loss": 0.4735, "step": 3918 }, { "epoch": 0.162699339384221, "grad_norm": 2.5186774730682373, "learning_rate": 9.545362969348895e-06, "loss": 0.5485, "step": 3919 }, { "epoch": 0.16274085490843232, "grad_norm": 2.035893201828003, "learning_rate": 9.545082816524925e-06, "loss": 0.427, "step": 3920 }, { "epoch": 0.16278237043264365, "grad_norm": 2.8272979259490967, "learning_rate": 9.544802581524545e-06, "loss": 0.4802, "step": 3921 }, { "epoch": 0.16282388595685499, "grad_norm": 2.7796576023101807, "learning_rate": 9.544522264352825e-06, "loss": 0.6241, "step": 3922 }, { "epoch": 0.16286540148106632, "grad_norm": 2.2796642780303955, "learning_rate": 9.544241865014833e-06, "loss": 0.5603, "step": 3923 }, { "epoch": 0.16290691700527765, "grad_norm": 3.279198169708252, "learning_rate": 9.543961383515638e-06, "loss": 0.4271, "step": 3924 }, { "epoch": 0.16294843252948898, "grad_norm": 2.4782135486602783, "learning_rate": 9.54368081986031e-06, "loss": 0.511, "step": 3925 }, { "epoch": 0.16298994805370032, "grad_norm": 2.3750076293945312, "learning_rate": 9.543400174053925e-06, "loss": 0.4662, "step": 3926 }, { "epoch": 0.16303146357791168, "grad_norm": 2.354491710662842, "learning_rate": 9.543119446101556e-06, "loss": 0.4785, "step": 3927 }, { "epoch": 0.163072979102123, "grad_norm": 3.2105765342712402, "learning_rate": 9.542838636008277e-06, "loss": 0.5494, "step": 3928 }, { "epoch": 0.16311449462633434, "grad_norm": 2.559030294418335, "learning_rate": 9.542557743779165e-06, "loss": 0.5818, "step": 3929 }, { "epoch": 0.16315601015054568, "grad_norm": 2.2107040882110596, "learning_rate": 9.5422767694193e-06, "loss": 0.5791, "step": 3930 }, { "epoch": 0.163197525674757, "grad_norm": 2.5284814834594727, "learning_rate": 9.541995712933763e-06, "loss": 0.5519, "step": 3931 }, { "epoch": 0.16323904119896834, "grad_norm": 3.0231096744537354, "learning_rate": 9.541714574327634e-06, "loss": 0.5503, "step": 3932 }, { "epoch": 0.16328055672317968, "grad_norm": 2.8989014625549316, "learning_rate": 9.541433353605996e-06, "loss": 0.6475, "step": 3933 }, { "epoch": 0.163322072247391, "grad_norm": 2.2974579334259033, "learning_rate": 9.541152050773935e-06, "loss": 0.5068, "step": 3934 }, { "epoch": 0.16336358777160234, "grad_norm": 2.518404960632324, "learning_rate": 9.540870665836535e-06, "loss": 0.5013, "step": 3935 }, { "epoch": 0.16340510329581368, "grad_norm": 2.3851218223571777, "learning_rate": 9.540589198798885e-06, "loss": 0.5761, "step": 3936 }, { "epoch": 0.163446618820025, "grad_norm": 2.6531684398651123, "learning_rate": 9.540307649666071e-06, "loss": 0.4896, "step": 3937 }, { "epoch": 0.16348813434423634, "grad_norm": 2.732457399368286, "learning_rate": 9.54002601844319e-06, "loss": 0.5526, "step": 3938 }, { "epoch": 0.16352964986844767, "grad_norm": 2.9346282482147217, "learning_rate": 9.539744305135328e-06, "loss": 0.6314, "step": 3939 }, { "epoch": 0.163571165392659, "grad_norm": 2.6470186710357666, "learning_rate": 9.53946250974758e-06, "loss": 0.5282, "step": 3940 }, { "epoch": 0.16361268091687034, "grad_norm": 3.4442317485809326, "learning_rate": 9.539180632285044e-06, "loss": 0.6414, "step": 3941 }, { "epoch": 0.1636541964410817, "grad_norm": 3.245412588119507, "learning_rate": 9.53889867275281e-06, "loss": 0.4318, "step": 3942 }, { "epoch": 0.16369571196529303, "grad_norm": 2.4385604858398438, "learning_rate": 9.538616631155983e-06, "loss": 0.4975, "step": 3943 }, { "epoch": 0.16373722748950437, "grad_norm": 2.6746509075164795, "learning_rate": 9.538334507499658e-06, "loss": 0.4128, "step": 3944 }, { "epoch": 0.1637787430137157, "grad_norm": 2.806989908218384, "learning_rate": 9.538052301788937e-06, "loss": 0.3579, "step": 3945 }, { "epoch": 0.16382025853792703, "grad_norm": 2.455350160598755, "learning_rate": 9.537770014028922e-06, "loss": 0.5771, "step": 3946 }, { "epoch": 0.16386177406213837, "grad_norm": 2.699603796005249, "learning_rate": 9.537487644224717e-06, "loss": 0.6671, "step": 3947 }, { "epoch": 0.1639032895863497, "grad_norm": 2.2859885692596436, "learning_rate": 9.53720519238143e-06, "loss": 0.5335, "step": 3948 }, { "epoch": 0.16394480511056103, "grad_norm": 2.356308698654175, "learning_rate": 9.536922658504162e-06, "loss": 0.3745, "step": 3949 }, { "epoch": 0.16398632063477236, "grad_norm": 2.613511085510254, "learning_rate": 9.536640042598026e-06, "loss": 0.495, "step": 3950 }, { "epoch": 0.1640278361589837, "grad_norm": 3.1125612258911133, "learning_rate": 9.53635734466813e-06, "loss": 0.5624, "step": 3951 }, { "epoch": 0.16406935168319503, "grad_norm": 2.7440788745880127, "learning_rate": 9.536074564719586e-06, "loss": 0.6591, "step": 3952 }, { "epoch": 0.16411086720740636, "grad_norm": 3.1468114852905273, "learning_rate": 9.535791702757507e-06, "loss": 0.4868, "step": 3953 }, { "epoch": 0.1641523827316177, "grad_norm": 2.779245376586914, "learning_rate": 9.535508758787007e-06, "loss": 0.4551, "step": 3954 }, { "epoch": 0.16419389825582903, "grad_norm": 2.388017416000366, "learning_rate": 9.5352257328132e-06, "loss": 0.5697, "step": 3955 }, { "epoch": 0.16423541378004036, "grad_norm": 2.8864588737487793, "learning_rate": 9.534942624841207e-06, "loss": 0.7148, "step": 3956 }, { "epoch": 0.1642769293042517, "grad_norm": 2.7285685539245605, "learning_rate": 9.53465943487614e-06, "loss": 0.5211, "step": 3957 }, { "epoch": 0.16431844482846306, "grad_norm": 2.6079702377319336, "learning_rate": 9.534376162923127e-06, "loss": 0.6012, "step": 3958 }, { "epoch": 0.1643599603526744, "grad_norm": 2.3324906826019287, "learning_rate": 9.534092808987286e-06, "loss": 0.6005, "step": 3959 }, { "epoch": 0.16440147587688572, "grad_norm": 2.2802693843841553, "learning_rate": 9.533809373073737e-06, "loss": 0.6053, "step": 3960 }, { "epoch": 0.16444299140109706, "grad_norm": 2.647228479385376, "learning_rate": 9.53352585518761e-06, "loss": 0.5109, "step": 3961 }, { "epoch": 0.1644845069253084, "grad_norm": 2.3265957832336426, "learning_rate": 9.53324225533403e-06, "loss": 0.5741, "step": 3962 }, { "epoch": 0.16452602244951972, "grad_norm": 3.1047005653381348, "learning_rate": 9.532958573518121e-06, "loss": 0.463, "step": 3963 }, { "epoch": 0.16456753797373105, "grad_norm": 2.8606698513031006, "learning_rate": 9.532674809745015e-06, "loss": 0.4982, "step": 3964 }, { "epoch": 0.1646090534979424, "grad_norm": 2.68003511428833, "learning_rate": 9.532390964019844e-06, "loss": 0.5973, "step": 3965 }, { "epoch": 0.16465056902215372, "grad_norm": 2.453138589859009, "learning_rate": 9.532107036347734e-06, "loss": 0.6877, "step": 3966 }, { "epoch": 0.16469208454636505, "grad_norm": 3.030759811401367, "learning_rate": 9.531823026733826e-06, "loss": 0.634, "step": 3967 }, { "epoch": 0.16473360007057639, "grad_norm": 2.452768325805664, "learning_rate": 9.531538935183252e-06, "loss": 0.6409, "step": 3968 }, { "epoch": 0.16477511559478772, "grad_norm": 2.7571048736572266, "learning_rate": 9.531254761701144e-06, "loss": 0.6197, "step": 3969 }, { "epoch": 0.16481663111899905, "grad_norm": 2.3168632984161377, "learning_rate": 9.530970506292647e-06, "loss": 0.4334, "step": 3970 }, { "epoch": 0.16485814664321038, "grad_norm": 2.7648720741271973, "learning_rate": 9.530686168962897e-06, "loss": 0.6594, "step": 3971 }, { "epoch": 0.16489966216742172, "grad_norm": 2.6633243560791016, "learning_rate": 9.530401749717034e-06, "loss": 0.5512, "step": 3972 }, { "epoch": 0.16494117769163308, "grad_norm": 2.928577423095703, "learning_rate": 9.530117248560203e-06, "loss": 0.6348, "step": 3973 }, { "epoch": 0.1649826932158444, "grad_norm": 2.235985517501831, "learning_rate": 9.529832665497545e-06, "loss": 0.4424, "step": 3974 }, { "epoch": 0.16502420874005574, "grad_norm": 2.632634162902832, "learning_rate": 9.529548000534207e-06, "loss": 0.6344, "step": 3975 }, { "epoch": 0.16506572426426708, "grad_norm": 2.6699841022491455, "learning_rate": 9.529263253675337e-06, "loss": 0.6511, "step": 3976 }, { "epoch": 0.1651072397884784, "grad_norm": 2.52805495262146, "learning_rate": 9.52897842492608e-06, "loss": 0.5752, "step": 3977 }, { "epoch": 0.16514875531268974, "grad_norm": 3.0596532821655273, "learning_rate": 9.528693514291588e-06, "loss": 0.4759, "step": 3978 }, { "epoch": 0.16519027083690108, "grad_norm": 2.88326096534729, "learning_rate": 9.528408521777012e-06, "loss": 0.7222, "step": 3979 }, { "epoch": 0.1652317863611124, "grad_norm": 2.228635549545288, "learning_rate": 9.528123447387508e-06, "loss": 0.3625, "step": 3980 }, { "epoch": 0.16527330188532374, "grad_norm": 2.2279818058013916, "learning_rate": 9.527838291128222e-06, "loss": 0.4218, "step": 3981 }, { "epoch": 0.16531481740953508, "grad_norm": 2.620781660079956, "learning_rate": 9.527553053004316e-06, "loss": 0.5217, "step": 3982 }, { "epoch": 0.1653563329337464, "grad_norm": 2.950246572494507, "learning_rate": 9.527267733020948e-06, "loss": 0.6303, "step": 3983 }, { "epoch": 0.16539784845795774, "grad_norm": 2.53462290763855, "learning_rate": 9.526982331183273e-06, "loss": 0.5388, "step": 3984 }, { "epoch": 0.16543936398216907, "grad_norm": 3.1572930812835693, "learning_rate": 9.526696847496453e-06, "loss": 0.5024, "step": 3985 }, { "epoch": 0.1654808795063804, "grad_norm": 2.055373191833496, "learning_rate": 9.526411281965648e-06, "loss": 0.4443, "step": 3986 }, { "epoch": 0.16552239503059174, "grad_norm": 2.377617359161377, "learning_rate": 9.526125634596024e-06, "loss": 0.4927, "step": 3987 }, { "epoch": 0.16556391055480307, "grad_norm": 2.4521496295928955, "learning_rate": 9.525839905392744e-06, "loss": 0.4557, "step": 3988 }, { "epoch": 0.16560542607901443, "grad_norm": 2.5707767009735107, "learning_rate": 9.525554094360973e-06, "loss": 0.6211, "step": 3989 }, { "epoch": 0.16564694160322577, "grad_norm": 2.9051742553710938, "learning_rate": 9.52526820150588e-06, "loss": 0.6064, "step": 3990 }, { "epoch": 0.1656884571274371, "grad_norm": 2.899906873703003, "learning_rate": 9.524982226832633e-06, "loss": 0.5567, "step": 3991 }, { "epoch": 0.16572997265164843, "grad_norm": 2.834669589996338, "learning_rate": 9.524696170346404e-06, "loss": 0.6741, "step": 3992 }, { "epoch": 0.16577148817585977, "grad_norm": 2.5894625186920166, "learning_rate": 9.524410032052364e-06, "loss": 0.54, "step": 3993 }, { "epoch": 0.1658130037000711, "grad_norm": 2.3734958171844482, "learning_rate": 9.524123811955685e-06, "loss": 0.5098, "step": 3994 }, { "epoch": 0.16585451922428243, "grad_norm": 3.3882319927215576, "learning_rate": 9.523837510061545e-06, "loss": 0.4494, "step": 3995 }, { "epoch": 0.16589603474849376, "grad_norm": 2.6057324409484863, "learning_rate": 9.52355112637512e-06, "loss": 0.6232, "step": 3996 }, { "epoch": 0.1659375502727051, "grad_norm": 2.420919418334961, "learning_rate": 9.523264660901584e-06, "loss": 0.4523, "step": 3997 }, { "epoch": 0.16597906579691643, "grad_norm": 2.6554336547851562, "learning_rate": 9.52297811364612e-06, "loss": 0.4653, "step": 3998 }, { "epoch": 0.16602058132112776, "grad_norm": 3.1627211570739746, "learning_rate": 9.52269148461391e-06, "loss": 0.5183, "step": 3999 }, { "epoch": 0.1660620968453391, "grad_norm": 2.7094180583953857, "learning_rate": 9.522404773810132e-06, "loss": 0.5261, "step": 4000 }, { "epoch": 0.16610361236955043, "grad_norm": 2.122673749923706, "learning_rate": 9.522117981239973e-06, "loss": 0.4181, "step": 4001 }, { "epoch": 0.16614512789376176, "grad_norm": 2.6943891048431396, "learning_rate": 9.521831106908618e-06, "loss": 0.5083, "step": 4002 }, { "epoch": 0.1661866434179731, "grad_norm": 2.5344455242156982, "learning_rate": 9.521544150821254e-06, "loss": 0.6992, "step": 4003 }, { "epoch": 0.16622815894218446, "grad_norm": 2.875533103942871, "learning_rate": 9.521257112983067e-06, "loss": 0.668, "step": 4004 }, { "epoch": 0.1662696744663958, "grad_norm": 2.4725725650787354, "learning_rate": 9.520969993399249e-06, "loss": 0.4946, "step": 4005 }, { "epoch": 0.16631118999060712, "grad_norm": 2.7144930362701416, "learning_rate": 9.52068279207499e-06, "loss": 0.4984, "step": 4006 }, { "epoch": 0.16635270551481846, "grad_norm": 2.82127046585083, "learning_rate": 9.520395509015484e-06, "loss": 0.5866, "step": 4007 }, { "epoch": 0.1663942210390298, "grad_norm": 2.292877674102783, "learning_rate": 9.520108144225922e-06, "loss": 0.5566, "step": 4008 }, { "epoch": 0.16643573656324112, "grad_norm": 2.393057346343994, "learning_rate": 9.519820697711504e-06, "loss": 0.4623, "step": 4009 }, { "epoch": 0.16647725208745245, "grad_norm": 2.9679253101348877, "learning_rate": 9.519533169477425e-06, "loss": 0.6119, "step": 4010 }, { "epoch": 0.1665187676116638, "grad_norm": 2.930126905441284, "learning_rate": 9.519245559528882e-06, "loss": 0.5849, "step": 4011 }, { "epoch": 0.16656028313587512, "grad_norm": 2.9491825103759766, "learning_rate": 9.518957867871078e-06, "loss": 0.573, "step": 4012 }, { "epoch": 0.16660179866008645, "grad_norm": 2.3761990070343018, "learning_rate": 9.518670094509214e-06, "loss": 0.4125, "step": 4013 }, { "epoch": 0.16664331418429779, "grad_norm": 2.165003776550293, "learning_rate": 9.518382239448492e-06, "loss": 0.464, "step": 4014 }, { "epoch": 0.16668482970850912, "grad_norm": 2.9692535400390625, "learning_rate": 9.518094302694115e-06, "loss": 0.5395, "step": 4015 }, { "epoch": 0.16672634523272045, "grad_norm": 2.2822587490081787, "learning_rate": 9.517806284251293e-06, "loss": 0.5289, "step": 4016 }, { "epoch": 0.16676786075693179, "grad_norm": 2.4156322479248047, "learning_rate": 9.51751818412523e-06, "loss": 0.6102, "step": 4017 }, { "epoch": 0.16680937628114312, "grad_norm": 2.257844924926758, "learning_rate": 9.517230002321134e-06, "loss": 0.5777, "step": 4018 }, { "epoch": 0.16685089180535445, "grad_norm": 2.4963600635528564, "learning_rate": 9.516941738844222e-06, "loss": 0.528, "step": 4019 }, { "epoch": 0.1668924073295658, "grad_norm": 1.930771827697754, "learning_rate": 9.516653393699697e-06, "loss": 0.5448, "step": 4020 }, { "epoch": 0.16693392285377714, "grad_norm": 2.5935163497924805, "learning_rate": 9.516364966892779e-06, "loss": 0.4393, "step": 4021 }, { "epoch": 0.16697543837798848, "grad_norm": 2.6940202713012695, "learning_rate": 9.516076458428681e-06, "loss": 0.6381, "step": 4022 }, { "epoch": 0.1670169539021998, "grad_norm": 2.4398343563079834, "learning_rate": 9.51578786831262e-06, "loss": 0.6125, "step": 4023 }, { "epoch": 0.16705846942641114, "grad_norm": 3.0078163146972656, "learning_rate": 9.51549919654981e-06, "loss": 0.5182, "step": 4024 }, { "epoch": 0.16709998495062248, "grad_norm": 2.7088570594787598, "learning_rate": 9.515210443145475e-06, "loss": 0.461, "step": 4025 }, { "epoch": 0.1671415004748338, "grad_norm": 2.58762526512146, "learning_rate": 9.514921608104833e-06, "loss": 0.514, "step": 4026 }, { "epoch": 0.16718301599904514, "grad_norm": 2.6636195182800293, "learning_rate": 9.514632691433108e-06, "loss": 0.6258, "step": 4027 }, { "epoch": 0.16722453152325648, "grad_norm": 2.505770683288574, "learning_rate": 9.514343693135522e-06, "loss": 0.5554, "step": 4028 }, { "epoch": 0.1672660470474678, "grad_norm": 2.5961313247680664, "learning_rate": 9.5140546132173e-06, "loss": 0.453, "step": 4029 }, { "epoch": 0.16730756257167914, "grad_norm": 2.7642860412597656, "learning_rate": 9.51376545168367e-06, "loss": 0.5602, "step": 4030 }, { "epoch": 0.16734907809589047, "grad_norm": 2.8378453254699707, "learning_rate": 9.51347620853986e-06, "loss": 0.4834, "step": 4031 }, { "epoch": 0.1673905936201018, "grad_norm": 2.302762985229492, "learning_rate": 9.513186883791098e-06, "loss": 0.4634, "step": 4032 }, { "epoch": 0.16743210914431314, "grad_norm": 2.721045732498169, "learning_rate": 9.512897477442618e-06, "loss": 0.6559, "step": 4033 }, { "epoch": 0.16747362466852447, "grad_norm": 2.63694167137146, "learning_rate": 9.512607989499649e-06, "loss": 0.6183, "step": 4034 }, { "epoch": 0.16751514019273583, "grad_norm": 2.8071515560150146, "learning_rate": 9.512318419967427e-06, "loss": 0.5514, "step": 4035 }, { "epoch": 0.16755665571694717, "grad_norm": 3.6599791049957275, "learning_rate": 9.512028768851191e-06, "loss": 0.5748, "step": 4036 }, { "epoch": 0.1675981712411585, "grad_norm": 2.5984857082366943, "learning_rate": 9.511739036156171e-06, "loss": 0.614, "step": 4037 }, { "epoch": 0.16763968676536983, "grad_norm": 3.2374587059020996, "learning_rate": 9.51144922188761e-06, "loss": 0.5981, "step": 4038 }, { "epoch": 0.16768120228958117, "grad_norm": 2.6302998065948486, "learning_rate": 9.511159326050745e-06, "loss": 0.6244, "step": 4039 }, { "epoch": 0.1677227178137925, "grad_norm": 2.5626907348632812, "learning_rate": 9.510869348650822e-06, "loss": 0.5851, "step": 4040 }, { "epoch": 0.16776423333800383, "grad_norm": 2.8502306938171387, "learning_rate": 9.510579289693079e-06, "loss": 0.586, "step": 4041 }, { "epoch": 0.16780574886221516, "grad_norm": 2.3825314044952393, "learning_rate": 9.510289149182762e-06, "loss": 0.3448, "step": 4042 }, { "epoch": 0.1678472643864265, "grad_norm": 2.602011203765869, "learning_rate": 9.509998927125118e-06, "loss": 0.5531, "step": 4043 }, { "epoch": 0.16788877991063783, "grad_norm": 2.884425640106201, "learning_rate": 9.509708623525393e-06, "loss": 0.3929, "step": 4044 }, { "epoch": 0.16793029543484916, "grad_norm": 3.043365716934204, "learning_rate": 9.509418238388838e-06, "loss": 0.5793, "step": 4045 }, { "epoch": 0.1679718109590605, "grad_norm": 2.529536724090576, "learning_rate": 9.509127771720699e-06, "loss": 0.5702, "step": 4046 }, { "epoch": 0.16801332648327183, "grad_norm": 2.689342975616455, "learning_rate": 9.508837223526232e-06, "loss": 0.5531, "step": 4047 }, { "epoch": 0.16805484200748316, "grad_norm": 2.759028673171997, "learning_rate": 9.508546593810687e-06, "loss": 0.5263, "step": 4048 }, { "epoch": 0.1680963575316945, "grad_norm": 5.242265224456787, "learning_rate": 9.508255882579322e-06, "loss": 0.4191, "step": 4049 }, { "epoch": 0.16813787305590586, "grad_norm": 2.618475914001465, "learning_rate": 9.50796508983739e-06, "loss": 0.4409, "step": 4050 }, { "epoch": 0.1681793885801172, "grad_norm": 3.039761781692505, "learning_rate": 9.50767421559015e-06, "loss": 0.5995, "step": 4051 }, { "epoch": 0.16822090410432852, "grad_norm": 2.408879280090332, "learning_rate": 9.50738325984286e-06, "loss": 0.5645, "step": 4052 }, { "epoch": 0.16826241962853986, "grad_norm": 3.1468849182128906, "learning_rate": 9.507092222600783e-06, "loss": 0.5838, "step": 4053 }, { "epoch": 0.1683039351527512, "grad_norm": 1.9815161228179932, "learning_rate": 9.506801103869178e-06, "loss": 0.4505, "step": 4054 }, { "epoch": 0.16834545067696252, "grad_norm": 2.0599913597106934, "learning_rate": 9.506509903653311e-06, "loss": 0.4023, "step": 4055 }, { "epoch": 0.16838696620117385, "grad_norm": 2.429311990737915, "learning_rate": 9.506218621958448e-06, "loss": 0.4985, "step": 4056 }, { "epoch": 0.1684284817253852, "grad_norm": 2.722614049911499, "learning_rate": 9.50592725878985e-06, "loss": 0.3361, "step": 4057 }, { "epoch": 0.16846999724959652, "grad_norm": 2.4419710636138916, "learning_rate": 9.50563581415279e-06, "loss": 0.4835, "step": 4058 }, { "epoch": 0.16851151277380785, "grad_norm": 2.7971484661102295, "learning_rate": 9.505344288052536e-06, "loss": 0.4915, "step": 4059 }, { "epoch": 0.1685530282980192, "grad_norm": 2.5260636806488037, "learning_rate": 9.505052680494357e-06, "loss": 0.5153, "step": 4060 }, { "epoch": 0.16859454382223052, "grad_norm": 2.7642948627471924, "learning_rate": 9.504760991483528e-06, "loss": 0.6745, "step": 4061 }, { "epoch": 0.16863605934644185, "grad_norm": 2.297999620437622, "learning_rate": 9.504469221025323e-06, "loss": 0.526, "step": 4062 }, { "epoch": 0.16867757487065319, "grad_norm": 2.587913751602173, "learning_rate": 9.504177369125017e-06, "loss": 0.5865, "step": 4063 }, { "epoch": 0.16871909039486452, "grad_norm": 2.783046007156372, "learning_rate": 9.503885435787883e-06, "loss": 0.5239, "step": 4064 }, { "epoch": 0.16876060591907585, "grad_norm": 2.8637914657592773, "learning_rate": 9.503593421019204e-06, "loss": 0.4494, "step": 4065 }, { "epoch": 0.1688021214432872, "grad_norm": 2.6898255348205566, "learning_rate": 9.503301324824257e-06, "loss": 0.5118, "step": 4066 }, { "epoch": 0.16884363696749854, "grad_norm": 2.8273160457611084, "learning_rate": 9.503009147208324e-06, "loss": 0.6663, "step": 4067 }, { "epoch": 0.16888515249170988, "grad_norm": 2.263347864151001, "learning_rate": 9.502716888176688e-06, "loss": 0.4421, "step": 4068 }, { "epoch": 0.1689266680159212, "grad_norm": 2.4698538780212402, "learning_rate": 9.502424547734634e-06, "loss": 0.5268, "step": 4069 }, { "epoch": 0.16896818354013254, "grad_norm": 2.280418872833252, "learning_rate": 9.502132125887445e-06, "loss": 0.4756, "step": 4070 }, { "epoch": 0.16900969906434388, "grad_norm": 2.871722936630249, "learning_rate": 9.50183962264041e-06, "loss": 0.4452, "step": 4071 }, { "epoch": 0.1690512145885552, "grad_norm": 2.7814834117889404, "learning_rate": 9.501547037998817e-06, "loss": 0.6793, "step": 4072 }, { "epoch": 0.16909273011276654, "grad_norm": 3.3219501972198486, "learning_rate": 9.501254371967957e-06, "loss": 0.607, "step": 4073 }, { "epoch": 0.16913424563697788, "grad_norm": 2.585191488265991, "learning_rate": 9.50096162455312e-06, "loss": 0.5531, "step": 4074 }, { "epoch": 0.1691757611611892, "grad_norm": 2.2425036430358887, "learning_rate": 9.500668795759598e-06, "loss": 0.5501, "step": 4075 }, { "epoch": 0.16921727668540054, "grad_norm": 3.0039291381835938, "learning_rate": 9.500375885592687e-06, "loss": 0.526, "step": 4076 }, { "epoch": 0.16925879220961187, "grad_norm": 2.357724189758301, "learning_rate": 9.500082894057685e-06, "loss": 0.5501, "step": 4077 }, { "epoch": 0.1693003077338232, "grad_norm": 2.9802229404449463, "learning_rate": 9.499789821159885e-06, "loss": 0.3962, "step": 4078 }, { "epoch": 0.16934182325803454, "grad_norm": 2.6030383110046387, "learning_rate": 9.499496666904591e-06, "loss": 0.5745, "step": 4079 }, { "epoch": 0.16938333878224587, "grad_norm": 3.0719118118286133, "learning_rate": 9.499203431297097e-06, "loss": 0.585, "step": 4080 }, { "epoch": 0.16942485430645723, "grad_norm": 3.176447868347168, "learning_rate": 9.49891011434271e-06, "loss": 0.4487, "step": 4081 }, { "epoch": 0.16946636983066857, "grad_norm": 3.991551160812378, "learning_rate": 9.498616716046732e-06, "loss": 0.5377, "step": 4082 }, { "epoch": 0.1695078853548799, "grad_norm": 3.403604507446289, "learning_rate": 9.498323236414466e-06, "loss": 0.3509, "step": 4083 }, { "epoch": 0.16954940087909123, "grad_norm": 2.204739809036255, "learning_rate": 9.498029675451218e-06, "loss": 0.4422, "step": 4084 }, { "epoch": 0.16959091640330257, "grad_norm": 2.514251708984375, "learning_rate": 9.4977360331623e-06, "loss": 0.5355, "step": 4085 }, { "epoch": 0.1696324319275139, "grad_norm": 2.497889757156372, "learning_rate": 9.497442309553017e-06, "loss": 0.5807, "step": 4086 }, { "epoch": 0.16967394745172523, "grad_norm": 3.0415759086608887, "learning_rate": 9.49714850462868e-06, "loss": 0.631, "step": 4087 }, { "epoch": 0.16971546297593657, "grad_norm": 2.4385788440704346, "learning_rate": 9.496854618394602e-06, "loss": 0.376, "step": 4088 }, { "epoch": 0.1697569785001479, "grad_norm": 2.570868492126465, "learning_rate": 9.496560650856097e-06, "loss": 0.5644, "step": 4089 }, { "epoch": 0.16979849402435923, "grad_norm": 2.8042900562286377, "learning_rate": 9.496266602018479e-06, "loss": 0.5996, "step": 4090 }, { "epoch": 0.16984000954857056, "grad_norm": 2.8345863819122314, "learning_rate": 9.495972471887065e-06, "loss": 0.5648, "step": 4091 }, { "epoch": 0.1698815250727819, "grad_norm": 2.3330471515655518, "learning_rate": 9.495678260467172e-06, "loss": 0.4462, "step": 4092 }, { "epoch": 0.16992304059699323, "grad_norm": 2.6956114768981934, "learning_rate": 9.495383967764122e-06, "loss": 0.5638, "step": 4093 }, { "epoch": 0.16996455612120456, "grad_norm": 2.6255760192871094, "learning_rate": 9.495089593783233e-06, "loss": 0.4291, "step": 4094 }, { "epoch": 0.1700060716454159, "grad_norm": 2.714329957962036, "learning_rate": 9.494795138529828e-06, "loss": 0.436, "step": 4095 }, { "epoch": 0.17004758716962723, "grad_norm": 2.1222970485687256, "learning_rate": 9.494500602009232e-06, "loss": 0.6149, "step": 4096 }, { "epoch": 0.1700891026938386, "grad_norm": 2.7456674575805664, "learning_rate": 9.49420598422677e-06, "loss": 0.4561, "step": 4097 }, { "epoch": 0.17013061821804992, "grad_norm": 3.0003573894500732, "learning_rate": 9.493911285187768e-06, "loss": 0.5037, "step": 4098 }, { "epoch": 0.17017213374226126, "grad_norm": 2.259730100631714, "learning_rate": 9.493616504897555e-06, "loss": 0.68, "step": 4099 }, { "epoch": 0.1702136492664726, "grad_norm": 3.035508871078491, "learning_rate": 9.49332164336146e-06, "loss": 0.7109, "step": 4100 }, { "epoch": 0.17025516479068392, "grad_norm": 2.7633321285247803, "learning_rate": 9.493026700584815e-06, "loss": 0.6373, "step": 4101 }, { "epoch": 0.17029668031489525, "grad_norm": 2.4137957096099854, "learning_rate": 9.492731676572952e-06, "loss": 0.5209, "step": 4102 }, { "epoch": 0.1703381958391066, "grad_norm": 2.6455090045928955, "learning_rate": 9.492436571331205e-06, "loss": 0.4509, "step": 4103 }, { "epoch": 0.17037971136331792, "grad_norm": 2.5507447719573975, "learning_rate": 9.49214138486491e-06, "loss": 0.5516, "step": 4104 }, { "epoch": 0.17042122688752925, "grad_norm": 2.7579658031463623, "learning_rate": 9.491846117179404e-06, "loss": 0.61, "step": 4105 }, { "epoch": 0.1704627424117406, "grad_norm": 2.7127187252044678, "learning_rate": 9.491550768280027e-06, "loss": 0.4446, "step": 4106 }, { "epoch": 0.17050425793595192, "grad_norm": 2.349998712539673, "learning_rate": 9.491255338172116e-06, "loss": 0.6679, "step": 4107 }, { "epoch": 0.17054577346016325, "grad_norm": 2.3174946308135986, "learning_rate": 9.490959826861014e-06, "loss": 0.472, "step": 4108 }, { "epoch": 0.17058728898437459, "grad_norm": 2.4995954036712646, "learning_rate": 9.490664234352063e-06, "loss": 0.5126, "step": 4109 }, { "epoch": 0.17062880450858592, "grad_norm": 2.4475910663604736, "learning_rate": 9.49036856065061e-06, "loss": 0.5187, "step": 4110 }, { "epoch": 0.17067032003279725, "grad_norm": 2.7062714099884033, "learning_rate": 9.490072805761998e-06, "loss": 0.4801, "step": 4111 }, { "epoch": 0.1707118355570086, "grad_norm": 3.201817750930786, "learning_rate": 9.489776969691576e-06, "loss": 0.5799, "step": 4112 }, { "epoch": 0.17075335108121995, "grad_norm": 2.7068607807159424, "learning_rate": 9.489481052444693e-06, "loss": 0.549, "step": 4113 }, { "epoch": 0.17079486660543128, "grad_norm": 2.418468952178955, "learning_rate": 9.489185054026697e-06, "loss": 0.6089, "step": 4114 }, { "epoch": 0.1708363821296426, "grad_norm": 2.6096115112304688, "learning_rate": 9.488888974442942e-06, "loss": 0.489, "step": 4115 }, { "epoch": 0.17087789765385394, "grad_norm": 2.3289377689361572, "learning_rate": 9.488592813698781e-06, "loss": 0.7008, "step": 4116 }, { "epoch": 0.17091941317806528, "grad_norm": 2.3880460262298584, "learning_rate": 9.488296571799568e-06, "loss": 0.3753, "step": 4117 }, { "epoch": 0.1709609287022766, "grad_norm": 2.5734341144561768, "learning_rate": 9.488000248750659e-06, "loss": 0.4717, "step": 4118 }, { "epoch": 0.17100244422648794, "grad_norm": 2.7964913845062256, "learning_rate": 9.487703844557413e-06, "loss": 0.62, "step": 4119 }, { "epoch": 0.17104395975069928, "grad_norm": 2.991107702255249, "learning_rate": 9.487407359225186e-06, "loss": 0.6432, "step": 4120 }, { "epoch": 0.1710854752749106, "grad_norm": 2.3079121112823486, "learning_rate": 9.487110792759342e-06, "loss": 0.3415, "step": 4121 }, { "epoch": 0.17112699079912194, "grad_norm": 2.424466609954834, "learning_rate": 9.486814145165242e-06, "loss": 0.4984, "step": 4122 }, { "epoch": 0.17116850632333327, "grad_norm": 2.457780361175537, "learning_rate": 9.48651741644825e-06, "loss": 0.5662, "step": 4123 }, { "epoch": 0.1712100218475446, "grad_norm": 2.203312635421753, "learning_rate": 9.486220606613727e-06, "loss": 0.4597, "step": 4124 }, { "epoch": 0.17125153737175594, "grad_norm": 2.976743221282959, "learning_rate": 9.485923715667043e-06, "loss": 0.5155, "step": 4125 }, { "epoch": 0.17129305289596727, "grad_norm": 2.5904526710510254, "learning_rate": 9.485626743613566e-06, "loss": 0.3936, "step": 4126 }, { "epoch": 0.1713345684201786, "grad_norm": 2.036996364593506, "learning_rate": 9.485329690458664e-06, "loss": 0.337, "step": 4127 }, { "epoch": 0.17137608394438997, "grad_norm": 3.0785512924194336, "learning_rate": 9.48503255620771e-06, "loss": 0.6365, "step": 4128 }, { "epoch": 0.1714175994686013, "grad_norm": 2.507753849029541, "learning_rate": 9.484735340866072e-06, "loss": 0.5682, "step": 4129 }, { "epoch": 0.17145911499281263, "grad_norm": 2.2516930103302, "learning_rate": 9.484438044439129e-06, "loss": 0.6679, "step": 4130 }, { "epoch": 0.17150063051702397, "grad_norm": 2.122859001159668, "learning_rate": 9.48414066693225e-06, "loss": 0.5106, "step": 4131 }, { "epoch": 0.1715421460412353, "grad_norm": 2.7388851642608643, "learning_rate": 9.48384320835082e-06, "loss": 0.5971, "step": 4132 }, { "epoch": 0.17158366156544663, "grad_norm": 2.560696840286255, "learning_rate": 9.483545668700209e-06, "loss": 0.5204, "step": 4133 }, { "epoch": 0.17162517708965797, "grad_norm": 2.484997034072876, "learning_rate": 9.483248047985801e-06, "loss": 0.49, "step": 4134 }, { "epoch": 0.1716666926138693, "grad_norm": 2.6708261966705322, "learning_rate": 9.482950346212976e-06, "loss": 0.4671, "step": 4135 }, { "epoch": 0.17170820813808063, "grad_norm": 3.609426736831665, "learning_rate": 9.482652563387118e-06, "loss": 0.6615, "step": 4136 }, { "epoch": 0.17174972366229196, "grad_norm": 2.5786216259002686, "learning_rate": 9.482354699513608e-06, "loss": 0.521, "step": 4137 }, { "epoch": 0.1717912391865033, "grad_norm": 2.333005428314209, "learning_rate": 9.482056754597832e-06, "loss": 0.5197, "step": 4138 }, { "epoch": 0.17183275471071463, "grad_norm": 2.498256206512451, "learning_rate": 9.481758728645181e-06, "loss": 0.5632, "step": 4139 }, { "epoch": 0.17187427023492596, "grad_norm": 2.533796787261963, "learning_rate": 9.481460621661039e-06, "loss": 0.5287, "step": 4140 }, { "epoch": 0.1719157857591373, "grad_norm": 2.4843318462371826, "learning_rate": 9.481162433650797e-06, "loss": 0.5405, "step": 4141 }, { "epoch": 0.17195730128334863, "grad_norm": 2.8728787899017334, "learning_rate": 9.480864164619846e-06, "loss": 0.4733, "step": 4142 }, { "epoch": 0.17199881680756, "grad_norm": 2.593140125274658, "learning_rate": 9.48056581457358e-06, "loss": 0.6, "step": 4143 }, { "epoch": 0.17204033233177132, "grad_norm": 2.843571424484253, "learning_rate": 9.480267383517394e-06, "loss": 0.6324, "step": 4144 }, { "epoch": 0.17208184785598266, "grad_norm": 2.30019211769104, "learning_rate": 9.47996887145668e-06, "loss": 0.4749, "step": 4145 }, { "epoch": 0.172123363380194, "grad_norm": 2.656426191329956, "learning_rate": 9.47967027839684e-06, "loss": 0.5437, "step": 4146 }, { "epoch": 0.17216487890440532, "grad_norm": 2.4156405925750732, "learning_rate": 9.479371604343268e-06, "loss": 0.5363, "step": 4147 }, { "epoch": 0.17220639442861665, "grad_norm": 2.0718424320220947, "learning_rate": 9.479072849301368e-06, "loss": 0.516, "step": 4148 }, { "epoch": 0.172247909952828, "grad_norm": 2.587111473083496, "learning_rate": 9.478774013276537e-06, "loss": 0.5684, "step": 4149 }, { "epoch": 0.17228942547703932, "grad_norm": 2.463942289352417, "learning_rate": 9.478475096274184e-06, "loss": 0.5218, "step": 4150 }, { "epoch": 0.17233094100125065, "grad_norm": 3.7365572452545166, "learning_rate": 9.47817609829971e-06, "loss": 0.6187, "step": 4151 }, { "epoch": 0.172372456525462, "grad_norm": 3.01031494140625, "learning_rate": 9.47787701935852e-06, "loss": 0.5047, "step": 4152 }, { "epoch": 0.17241397204967332, "grad_norm": 2.489997148513794, "learning_rate": 9.477577859456025e-06, "loss": 0.4672, "step": 4153 }, { "epoch": 0.17245548757388465, "grad_norm": 2.5434837341308594, "learning_rate": 9.47727861859763e-06, "loss": 0.4789, "step": 4154 }, { "epoch": 0.17249700309809599, "grad_norm": 2.781207323074341, "learning_rate": 9.476979296788746e-06, "loss": 0.53, "step": 4155 }, { "epoch": 0.17253851862230732, "grad_norm": 2.9157750606536865, "learning_rate": 9.476679894034788e-06, "loss": 0.6335, "step": 4156 }, { "epoch": 0.17258003414651865, "grad_norm": 3.9396727085113525, "learning_rate": 9.476380410341166e-06, "loss": 0.5639, "step": 4157 }, { "epoch": 0.17262154967073, "grad_norm": 2.2733356952667236, "learning_rate": 9.476080845713296e-06, "loss": 0.5658, "step": 4158 }, { "epoch": 0.17266306519494135, "grad_norm": 2.75229549407959, "learning_rate": 9.475781200156596e-06, "loss": 0.4066, "step": 4159 }, { "epoch": 0.17270458071915268, "grad_norm": 2.540858268737793, "learning_rate": 9.47548147367648e-06, "loss": 0.6415, "step": 4160 }, { "epoch": 0.172746096243364, "grad_norm": 2.9317243099212646, "learning_rate": 9.47518166627837e-06, "loss": 0.524, "step": 4161 }, { "epoch": 0.17278761176757534, "grad_norm": 2.496924638748169, "learning_rate": 9.474881777967684e-06, "loss": 0.4346, "step": 4162 }, { "epoch": 0.17282912729178668, "grad_norm": 2.641873598098755, "learning_rate": 9.474581808749847e-06, "loss": 0.6345, "step": 4163 }, { "epoch": 0.172870642815998, "grad_norm": 2.6492698192596436, "learning_rate": 9.47428175863028e-06, "loss": 0.4923, "step": 4164 }, { "epoch": 0.17291215834020934, "grad_norm": 2.6969003677368164, "learning_rate": 9.47398162761441e-06, "loss": 0.5358, "step": 4165 }, { "epoch": 0.17295367386442068, "grad_norm": 2.495805025100708, "learning_rate": 9.473681415707663e-06, "loss": 0.4596, "step": 4166 }, { "epoch": 0.172995189388632, "grad_norm": 1.9828455448150635, "learning_rate": 9.473381122915466e-06, "loss": 0.4589, "step": 4167 }, { "epoch": 0.17303670491284334, "grad_norm": 2.651820421218872, "learning_rate": 9.47308074924325e-06, "loss": 0.648, "step": 4168 }, { "epoch": 0.17307822043705468, "grad_norm": 2.8246333599090576, "learning_rate": 9.472780294696444e-06, "loss": 0.4816, "step": 4169 }, { "epoch": 0.173119735961266, "grad_norm": 2.3441083431243896, "learning_rate": 9.47247975928048e-06, "loss": 0.4414, "step": 4170 }, { "epoch": 0.17316125148547734, "grad_norm": 2.283923625946045, "learning_rate": 9.472179143000794e-06, "loss": 0.6403, "step": 4171 }, { "epoch": 0.17320276700968867, "grad_norm": 2.435948371887207, "learning_rate": 9.47187844586282e-06, "loss": 0.6061, "step": 4172 }, { "epoch": 0.1732442825339, "grad_norm": 2.5787911415100098, "learning_rate": 9.471577667871995e-06, "loss": 0.4805, "step": 4173 }, { "epoch": 0.17328579805811137, "grad_norm": 2.6103591918945312, "learning_rate": 9.471276809033756e-06, "loss": 0.408, "step": 4174 }, { "epoch": 0.1733273135823227, "grad_norm": 2.850311279296875, "learning_rate": 9.470975869353544e-06, "loss": 0.4822, "step": 4175 }, { "epoch": 0.17336882910653403, "grad_norm": 3.0540988445281982, "learning_rate": 9.4706748488368e-06, "loss": 0.5544, "step": 4176 }, { "epoch": 0.17341034463074537, "grad_norm": 2.4644548892974854, "learning_rate": 9.470373747488966e-06, "loss": 0.5849, "step": 4177 }, { "epoch": 0.1734518601549567, "grad_norm": 2.59781551361084, "learning_rate": 9.470072565315487e-06, "loss": 0.4948, "step": 4178 }, { "epoch": 0.17349337567916803, "grad_norm": 2.432023763656616, "learning_rate": 9.469771302321806e-06, "loss": 0.5829, "step": 4179 }, { "epoch": 0.17353489120337937, "grad_norm": 3.02679705619812, "learning_rate": 9.469469958513372e-06, "loss": 0.5641, "step": 4180 }, { "epoch": 0.1735764067275907, "grad_norm": 2.4556121826171875, "learning_rate": 9.469168533895632e-06, "loss": 0.5035, "step": 4181 }, { "epoch": 0.17361792225180203, "grad_norm": 2.638538122177124, "learning_rate": 9.468867028474039e-06, "loss": 0.4869, "step": 4182 }, { "epoch": 0.17365943777601336, "grad_norm": 2.9678304195404053, "learning_rate": 9.46856544225404e-06, "loss": 0.6633, "step": 4183 }, { "epoch": 0.1737009533002247, "grad_norm": 2.538931131362915, "learning_rate": 9.468263775241092e-06, "loss": 0.7002, "step": 4184 }, { "epoch": 0.17374246882443603, "grad_norm": 2.6841354370117188, "learning_rate": 9.467962027440645e-06, "loss": 0.415, "step": 4185 }, { "epoch": 0.17378398434864736, "grad_norm": 2.610304355621338, "learning_rate": 9.467660198858157e-06, "loss": 0.5926, "step": 4186 }, { "epoch": 0.1738254998728587, "grad_norm": 2.5338363647460938, "learning_rate": 9.467358289499087e-06, "loss": 0.5238, "step": 4187 }, { "epoch": 0.17386701539707003, "grad_norm": 2.2702112197875977, "learning_rate": 9.467056299368888e-06, "loss": 0.4682, "step": 4188 }, { "epoch": 0.1739085309212814, "grad_norm": 2.749767780303955, "learning_rate": 9.466754228473027e-06, "loss": 0.4321, "step": 4189 }, { "epoch": 0.17395004644549272, "grad_norm": 2.9329864978790283, "learning_rate": 9.466452076816961e-06, "loss": 0.4519, "step": 4190 }, { "epoch": 0.17399156196970406, "grad_norm": 2.3684871196746826, "learning_rate": 9.466149844406152e-06, "loss": 0.3753, "step": 4191 }, { "epoch": 0.1740330774939154, "grad_norm": 2.306152582168579, "learning_rate": 9.465847531246068e-06, "loss": 0.5107, "step": 4192 }, { "epoch": 0.17407459301812672, "grad_norm": 3.1381664276123047, "learning_rate": 9.465545137342175e-06, "loss": 0.5876, "step": 4193 }, { "epoch": 0.17411610854233806, "grad_norm": 2.234626054763794, "learning_rate": 9.465242662699937e-06, "loss": 0.4925, "step": 4194 }, { "epoch": 0.1741576240665494, "grad_norm": 2.8020591735839844, "learning_rate": 9.464940107324825e-06, "loss": 0.5913, "step": 4195 }, { "epoch": 0.17419913959076072, "grad_norm": 2.6245527267456055, "learning_rate": 9.464637471222308e-06, "loss": 0.5329, "step": 4196 }, { "epoch": 0.17424065511497205, "grad_norm": 2.8922290802001953, "learning_rate": 9.464334754397861e-06, "loss": 0.5603, "step": 4197 }, { "epoch": 0.1742821706391834, "grad_norm": 2.751315116882324, "learning_rate": 9.464031956856953e-06, "loss": 0.6071, "step": 4198 }, { "epoch": 0.17432368616339472, "grad_norm": 2.949643850326538, "learning_rate": 9.463729078605062e-06, "loss": 0.4191, "step": 4199 }, { "epoch": 0.17436520168760605, "grad_norm": 3.0188939571380615, "learning_rate": 9.46342611964766e-06, "loss": 0.4178, "step": 4200 }, { "epoch": 0.17440671721181739, "grad_norm": 2.7358932495117188, "learning_rate": 9.463123079990231e-06, "loss": 0.6566, "step": 4201 }, { "epoch": 0.17444823273602872, "grad_norm": 2.193235397338867, "learning_rate": 9.462819959638248e-06, "loss": 0.522, "step": 4202 }, { "epoch": 0.17448974826024005, "grad_norm": 2.2276153564453125, "learning_rate": 9.462516758597193e-06, "loss": 0.6004, "step": 4203 }, { "epoch": 0.17453126378445138, "grad_norm": 2.2550294399261475, "learning_rate": 9.46221347687255e-06, "loss": 0.4765, "step": 4204 }, { "epoch": 0.17457277930866275, "grad_norm": 2.5711116790771484, "learning_rate": 9.461910114469802e-06, "loss": 0.4282, "step": 4205 }, { "epoch": 0.17461429483287408, "grad_norm": 2.5708329677581787, "learning_rate": 9.461606671394432e-06, "loss": 0.5949, "step": 4206 }, { "epoch": 0.1746558103570854, "grad_norm": 2.503492593765259, "learning_rate": 9.461303147651927e-06, "loss": 0.486, "step": 4207 }, { "epoch": 0.17469732588129674, "grad_norm": 2.490337610244751, "learning_rate": 9.460999543247776e-06, "loss": 0.4061, "step": 4208 }, { "epoch": 0.17473884140550808, "grad_norm": 2.3914902210235596, "learning_rate": 9.460695858187467e-06, "loss": 0.6212, "step": 4209 }, { "epoch": 0.1747803569297194, "grad_norm": 3.5716638565063477, "learning_rate": 9.46039209247649e-06, "loss": 0.5917, "step": 4210 }, { "epoch": 0.17482187245393074, "grad_norm": 2.4903390407562256, "learning_rate": 9.46008824612034e-06, "loss": 0.666, "step": 4211 }, { "epoch": 0.17486338797814208, "grad_norm": 2.4917237758636475, "learning_rate": 9.45978431912451e-06, "loss": 0.4374, "step": 4212 }, { "epoch": 0.1749049035023534, "grad_norm": 2.6266770362854004, "learning_rate": 9.459480311494493e-06, "loss": 0.4506, "step": 4213 }, { "epoch": 0.17494641902656474, "grad_norm": 2.4910173416137695, "learning_rate": 9.459176223235786e-06, "loss": 0.577, "step": 4214 }, { "epoch": 0.17498793455077608, "grad_norm": 2.6838583946228027, "learning_rate": 9.458872054353888e-06, "loss": 0.5129, "step": 4215 }, { "epoch": 0.1750294500749874, "grad_norm": 2.414444923400879, "learning_rate": 9.458567804854297e-06, "loss": 0.4158, "step": 4216 }, { "epoch": 0.17507096559919874, "grad_norm": 2.3175711631774902, "learning_rate": 9.458263474742517e-06, "loss": 0.4202, "step": 4217 }, { "epoch": 0.17511248112341007, "grad_norm": 2.5384039878845215, "learning_rate": 9.457959064024047e-06, "loss": 0.3643, "step": 4218 }, { "epoch": 0.1751539966476214, "grad_norm": 2.194589138031006, "learning_rate": 9.457654572704394e-06, "loss": 0.4668, "step": 4219 }, { "epoch": 0.17519551217183277, "grad_norm": 2.6216506958007812, "learning_rate": 9.457350000789059e-06, "loss": 0.5369, "step": 4220 }, { "epoch": 0.1752370276960441, "grad_norm": 2.9840164184570312, "learning_rate": 9.457045348283552e-06, "loss": 0.6593, "step": 4221 }, { "epoch": 0.17527854322025543, "grad_norm": 3.7609357833862305, "learning_rate": 9.45674061519338e-06, "loss": 0.6839, "step": 4222 }, { "epoch": 0.17532005874446677, "grad_norm": 2.564697504043579, "learning_rate": 9.456435801524055e-06, "loss": 0.4293, "step": 4223 }, { "epoch": 0.1753615742686781, "grad_norm": 2.768956422805786, "learning_rate": 9.456130907281084e-06, "loss": 0.6233, "step": 4224 }, { "epoch": 0.17540308979288943, "grad_norm": 2.305468797683716, "learning_rate": 9.455825932469984e-06, "loss": 0.4718, "step": 4225 }, { "epoch": 0.17544460531710077, "grad_norm": 2.773838758468628, "learning_rate": 9.455520877096265e-06, "loss": 0.6701, "step": 4226 }, { "epoch": 0.1754861208413121, "grad_norm": 2.6131937503814697, "learning_rate": 9.455215741165447e-06, "loss": 0.4117, "step": 4227 }, { "epoch": 0.17552763636552343, "grad_norm": 2.259230852127075, "learning_rate": 9.45491052468304e-06, "loss": 0.3707, "step": 4228 }, { "epoch": 0.17556915188973476, "grad_norm": 2.5926544666290283, "learning_rate": 9.45460522765457e-06, "loss": 0.4717, "step": 4229 }, { "epoch": 0.1756106674139461, "grad_norm": 2.750746965408325, "learning_rate": 9.454299850085553e-06, "loss": 0.5762, "step": 4230 }, { "epoch": 0.17565218293815743, "grad_norm": 3.0749564170837402, "learning_rate": 9.45399439198151e-06, "loss": 0.6447, "step": 4231 }, { "epoch": 0.17569369846236876, "grad_norm": 2.991426944732666, "learning_rate": 9.453688853347965e-06, "loss": 0.4802, "step": 4232 }, { "epoch": 0.1757352139865801, "grad_norm": 2.53886079788208, "learning_rate": 9.453383234190443e-06, "loss": 0.5405, "step": 4233 }, { "epoch": 0.17577672951079143, "grad_norm": 2.877239227294922, "learning_rate": 9.453077534514466e-06, "loss": 0.5155, "step": 4234 }, { "epoch": 0.17581824503500276, "grad_norm": 2.827651262283325, "learning_rate": 9.452771754325565e-06, "loss": 0.5574, "step": 4235 }, { "epoch": 0.17585976055921412, "grad_norm": 2.022304058074951, "learning_rate": 9.452465893629267e-06, "loss": 0.5005, "step": 4236 }, { "epoch": 0.17590127608342546, "grad_norm": 2.436539649963379, "learning_rate": 9.452159952431104e-06, "loss": 0.5528, "step": 4237 }, { "epoch": 0.1759427916076368, "grad_norm": 2.4562182426452637, "learning_rate": 9.451853930736602e-06, "loss": 0.5576, "step": 4238 }, { "epoch": 0.17598430713184812, "grad_norm": 2.4971773624420166, "learning_rate": 9.451547828551301e-06, "loss": 0.5379, "step": 4239 }, { "epoch": 0.17602582265605946, "grad_norm": 2.7017722129821777, "learning_rate": 9.451241645880733e-06, "loss": 0.6097, "step": 4240 }, { "epoch": 0.1760673381802708, "grad_norm": 2.583265542984009, "learning_rate": 9.450935382730431e-06, "loss": 0.629, "step": 4241 }, { "epoch": 0.17610885370448212, "grad_norm": 3.0617008209228516, "learning_rate": 9.450629039105934e-06, "loss": 0.681, "step": 4242 }, { "epoch": 0.17615036922869345, "grad_norm": 2.8572609424591064, "learning_rate": 9.450322615012783e-06, "loss": 0.4702, "step": 4243 }, { "epoch": 0.1761918847529048, "grad_norm": 2.6577491760253906, "learning_rate": 9.450016110456515e-06, "loss": 0.6539, "step": 4244 }, { "epoch": 0.17623340027711612, "grad_norm": 2.3787167072296143, "learning_rate": 9.449709525442672e-06, "loss": 0.4793, "step": 4245 }, { "epoch": 0.17627491580132745, "grad_norm": 2.4893479347229004, "learning_rate": 9.4494028599768e-06, "loss": 0.5697, "step": 4246 }, { "epoch": 0.17631643132553879, "grad_norm": 3.9577250480651855, "learning_rate": 9.449096114064441e-06, "loss": 0.7327, "step": 4247 }, { "epoch": 0.17635794684975012, "grad_norm": 2.3173792362213135, "learning_rate": 9.448789287711144e-06, "loss": 0.6694, "step": 4248 }, { "epoch": 0.17639946237396145, "grad_norm": 2.2147881984710693, "learning_rate": 9.448482380922453e-06, "loss": 0.6341, "step": 4249 }, { "epoch": 0.17644097789817278, "grad_norm": 2.2685546875, "learning_rate": 9.448175393703918e-06, "loss": 0.4251, "step": 4250 }, { "epoch": 0.17648249342238415, "grad_norm": 2.292666435241699, "learning_rate": 9.44786832606109e-06, "loss": 0.4728, "step": 4251 }, { "epoch": 0.17652400894659548, "grad_norm": 2.527402639389038, "learning_rate": 9.447561177999521e-06, "loss": 0.5354, "step": 4252 }, { "epoch": 0.1765655244708068, "grad_norm": 2.5703959465026855, "learning_rate": 9.447253949524762e-06, "loss": 0.6427, "step": 4253 }, { "epoch": 0.17660703999501814, "grad_norm": 2.894716739654541, "learning_rate": 9.446946640642372e-06, "loss": 0.6697, "step": 4254 }, { "epoch": 0.17664855551922948, "grad_norm": 2.487185001373291, "learning_rate": 9.446639251357905e-06, "loss": 0.6138, "step": 4255 }, { "epoch": 0.1766900710434408, "grad_norm": 3.240048885345459, "learning_rate": 9.446331781676918e-06, "loss": 0.4449, "step": 4256 }, { "epoch": 0.17673158656765214, "grad_norm": 2.4878392219543457, "learning_rate": 9.44602423160497e-06, "loss": 0.5305, "step": 4257 }, { "epoch": 0.17677310209186348, "grad_norm": 2.824220657348633, "learning_rate": 9.445716601147623e-06, "loss": 0.5727, "step": 4258 }, { "epoch": 0.1768146176160748, "grad_norm": 2.739546298980713, "learning_rate": 9.445408890310439e-06, "loss": 0.5275, "step": 4259 }, { "epoch": 0.17685613314028614, "grad_norm": 2.491410732269287, "learning_rate": 9.44510109909898e-06, "loss": 0.5171, "step": 4260 }, { "epoch": 0.17689764866449748, "grad_norm": 2.22918438911438, "learning_rate": 9.444793227518813e-06, "loss": 0.4263, "step": 4261 }, { "epoch": 0.1769391641887088, "grad_norm": 2.6807143688201904, "learning_rate": 9.444485275575503e-06, "loss": 0.6544, "step": 4262 }, { "epoch": 0.17698067971292014, "grad_norm": 2.3172404766082764, "learning_rate": 9.444177243274619e-06, "loss": 0.5591, "step": 4263 }, { "epoch": 0.17702219523713147, "grad_norm": 2.5479791164398193, "learning_rate": 9.443869130621728e-06, "loss": 0.5736, "step": 4264 }, { "epoch": 0.1770637107613428, "grad_norm": 2.4574389457702637, "learning_rate": 9.443560937622402e-06, "loss": 0.5813, "step": 4265 }, { "epoch": 0.17710522628555417, "grad_norm": 2.060192823410034, "learning_rate": 9.443252664282215e-06, "loss": 0.3949, "step": 4266 }, { "epoch": 0.1771467418097655, "grad_norm": 2.700953483581543, "learning_rate": 9.442944310606739e-06, "loss": 0.4553, "step": 4267 }, { "epoch": 0.17718825733397683, "grad_norm": 2.8209874629974365, "learning_rate": 9.442635876601549e-06, "loss": 0.3948, "step": 4268 }, { "epoch": 0.17722977285818817, "grad_norm": 2.5493340492248535, "learning_rate": 9.44232736227222e-06, "loss": 0.4736, "step": 4269 }, { "epoch": 0.1772712883823995, "grad_norm": 2.4164583683013916, "learning_rate": 9.442018767624333e-06, "loss": 0.4176, "step": 4270 }, { "epoch": 0.17731280390661083, "grad_norm": 2.4764318466186523, "learning_rate": 9.441710092663467e-06, "loss": 0.5112, "step": 4271 }, { "epoch": 0.17735431943082217, "grad_norm": 2.709117889404297, "learning_rate": 9.441401337395202e-06, "loss": 0.4169, "step": 4272 }, { "epoch": 0.1773958349550335, "grad_norm": 2.920430898666382, "learning_rate": 9.44109250182512e-06, "loss": 0.6106, "step": 4273 }, { "epoch": 0.17743735047924483, "grad_norm": 2.358613967895508, "learning_rate": 9.440783585958809e-06, "loss": 0.4929, "step": 4274 }, { "epoch": 0.17747886600345616, "grad_norm": 2.43341326713562, "learning_rate": 9.440474589801848e-06, "loss": 0.5222, "step": 4275 }, { "epoch": 0.1775203815276675, "grad_norm": 3.301029682159424, "learning_rate": 9.440165513359824e-06, "loss": 0.5796, "step": 4276 }, { "epoch": 0.17756189705187883, "grad_norm": 2.3130531311035156, "learning_rate": 9.439856356638332e-06, "loss": 0.4469, "step": 4277 }, { "epoch": 0.17760341257609016, "grad_norm": 2.5882132053375244, "learning_rate": 9.439547119642955e-06, "loss": 0.5279, "step": 4278 }, { "epoch": 0.1776449281003015, "grad_norm": 2.7445759773254395, "learning_rate": 9.439237802379287e-06, "loss": 0.5023, "step": 4279 }, { "epoch": 0.17768644362451283, "grad_norm": 2.5139310359954834, "learning_rate": 9.438928404852921e-06, "loss": 0.568, "step": 4280 }, { "epoch": 0.17772795914872416, "grad_norm": 2.7384042739868164, "learning_rate": 9.438618927069449e-06, "loss": 0.541, "step": 4281 }, { "epoch": 0.17776947467293552, "grad_norm": 2.481912851333618, "learning_rate": 9.438309369034466e-06, "loss": 0.4321, "step": 4282 }, { "epoch": 0.17781099019714686, "grad_norm": 3.55220103263855, "learning_rate": 9.437999730753573e-06, "loss": 0.6099, "step": 4283 }, { "epoch": 0.1778525057213582, "grad_norm": 2.8833909034729004, "learning_rate": 9.437690012232364e-06, "loss": 0.608, "step": 4284 }, { "epoch": 0.17789402124556952, "grad_norm": 2.4316697120666504, "learning_rate": 9.437380213476442e-06, "loss": 0.4724, "step": 4285 }, { "epoch": 0.17793553676978086, "grad_norm": 2.660831928253174, "learning_rate": 9.437070334491404e-06, "loss": 0.5626, "step": 4286 }, { "epoch": 0.1779770522939922, "grad_norm": 3.300677537918091, "learning_rate": 9.436760375282858e-06, "loss": 0.553, "step": 4287 }, { "epoch": 0.17801856781820352, "grad_norm": 2.8771023750305176, "learning_rate": 9.436450335856406e-06, "loss": 0.5258, "step": 4288 }, { "epoch": 0.17806008334241485, "grad_norm": 2.1434378623962402, "learning_rate": 9.436140216217652e-06, "loss": 0.4191, "step": 4289 }, { "epoch": 0.1781015988666262, "grad_norm": 2.4467601776123047, "learning_rate": 9.435830016372206e-06, "loss": 0.529, "step": 4290 }, { "epoch": 0.17814311439083752, "grad_norm": 5.367650032043457, "learning_rate": 9.435519736325672e-06, "loss": 0.5563, "step": 4291 }, { "epoch": 0.17818462991504885, "grad_norm": 2.4100170135498047, "learning_rate": 9.435209376083662e-06, "loss": 0.4268, "step": 4292 }, { "epoch": 0.1782261454392602, "grad_norm": 2.448761224746704, "learning_rate": 9.43489893565179e-06, "loss": 0.4741, "step": 4293 }, { "epoch": 0.17826766096347152, "grad_norm": 2.5916218757629395, "learning_rate": 9.434588415035668e-06, "loss": 0.4857, "step": 4294 }, { "epoch": 0.17830917648768285, "grad_norm": 2.3795223236083984, "learning_rate": 9.434277814240908e-06, "loss": 0.6387, "step": 4295 }, { "epoch": 0.17835069201189419, "grad_norm": 2.529405355453491, "learning_rate": 9.433967133273127e-06, "loss": 0.4765, "step": 4296 }, { "epoch": 0.17839220753610555, "grad_norm": 2.41707444190979, "learning_rate": 9.433656372137942e-06, "loss": 0.6481, "step": 4297 }, { "epoch": 0.17843372306031688, "grad_norm": 2.2686126232147217, "learning_rate": 9.433345530840974e-06, "loss": 0.3817, "step": 4298 }, { "epoch": 0.1784752385845282, "grad_norm": 2.4766860008239746, "learning_rate": 9.43303460938784e-06, "loss": 0.512, "step": 4299 }, { "epoch": 0.17851675410873954, "grad_norm": 2.5046839714050293, "learning_rate": 9.43272360778416e-06, "loss": 0.559, "step": 4300 }, { "epoch": 0.17855826963295088, "grad_norm": 2.58722186088562, "learning_rate": 9.432412526035562e-06, "loss": 0.5848, "step": 4301 }, { "epoch": 0.1785997851571622, "grad_norm": 2.175720453262329, "learning_rate": 9.432101364147668e-06, "loss": 0.3937, "step": 4302 }, { "epoch": 0.17864130068137354, "grad_norm": 2.991325616836548, "learning_rate": 9.431790122126102e-06, "loss": 0.6045, "step": 4303 }, { "epoch": 0.17868281620558488, "grad_norm": 2.459627151489258, "learning_rate": 9.431478799976495e-06, "loss": 0.5518, "step": 4304 }, { "epoch": 0.1787243317297962, "grad_norm": 2.136800527572632, "learning_rate": 9.431167397704473e-06, "loss": 0.4443, "step": 4305 }, { "epoch": 0.17876584725400754, "grad_norm": 2.467477321624756, "learning_rate": 9.430855915315668e-06, "loss": 0.5208, "step": 4306 }, { "epoch": 0.17880736277821888, "grad_norm": 2.7081680297851562, "learning_rate": 9.43054435281571e-06, "loss": 0.5523, "step": 4307 }, { "epoch": 0.1788488783024302, "grad_norm": 2.9892430305480957, "learning_rate": 9.430232710210234e-06, "loss": 0.5382, "step": 4308 }, { "epoch": 0.17889039382664154, "grad_norm": 2.712791919708252, "learning_rate": 9.429920987504873e-06, "loss": 0.426, "step": 4309 }, { "epoch": 0.17893190935085287, "grad_norm": 2.932554006576538, "learning_rate": 9.429609184705265e-06, "loss": 0.5824, "step": 4310 }, { "epoch": 0.1789734248750642, "grad_norm": 2.439389705657959, "learning_rate": 9.429297301817045e-06, "loss": 0.6732, "step": 4311 }, { "epoch": 0.17901494039927554, "grad_norm": 2.640662908554077, "learning_rate": 9.428985338845854e-06, "loss": 0.3869, "step": 4312 }, { "epoch": 0.1790564559234869, "grad_norm": 2.3940000534057617, "learning_rate": 9.42867329579733e-06, "loss": 0.5464, "step": 4313 }, { "epoch": 0.17909797144769823, "grad_norm": 2.7404093742370605, "learning_rate": 9.428361172677117e-06, "loss": 0.5443, "step": 4314 }, { "epoch": 0.17913948697190957, "grad_norm": 2.7660906314849854, "learning_rate": 9.428048969490857e-06, "loss": 0.6131, "step": 4315 }, { "epoch": 0.1791810024961209, "grad_norm": 2.652566432952881, "learning_rate": 9.427736686244198e-06, "loss": 0.4969, "step": 4316 }, { "epoch": 0.17922251802033223, "grad_norm": 2.1754167079925537, "learning_rate": 9.42742432294278e-06, "loss": 0.4421, "step": 4317 }, { "epoch": 0.17926403354454357, "grad_norm": 2.700711488723755, "learning_rate": 9.427111879592257e-06, "loss": 0.7237, "step": 4318 }, { "epoch": 0.1793055490687549, "grad_norm": 3.408564805984497, "learning_rate": 9.426799356198273e-06, "loss": 0.59, "step": 4319 }, { "epoch": 0.17934706459296623, "grad_norm": 2.340646266937256, "learning_rate": 9.426486752766481e-06, "loss": 0.4807, "step": 4320 }, { "epoch": 0.17938858011717757, "grad_norm": 2.480583906173706, "learning_rate": 9.426174069302533e-06, "loss": 0.5188, "step": 4321 }, { "epoch": 0.1794300956413889, "grad_norm": 2.6217832565307617, "learning_rate": 9.425861305812083e-06, "loss": 0.5362, "step": 4322 }, { "epoch": 0.17947161116560023, "grad_norm": 2.519005537033081, "learning_rate": 9.425548462300784e-06, "loss": 0.62, "step": 4323 }, { "epoch": 0.17951312668981156, "grad_norm": 2.461817502975464, "learning_rate": 9.425235538774295e-06, "loss": 0.5098, "step": 4324 }, { "epoch": 0.1795546422140229, "grad_norm": 2.7407000064849854, "learning_rate": 9.42492253523827e-06, "loss": 0.5914, "step": 4325 }, { "epoch": 0.17959615773823423, "grad_norm": 2.6588592529296875, "learning_rate": 9.42460945169837e-06, "loss": 0.4166, "step": 4326 }, { "epoch": 0.17963767326244556, "grad_norm": 2.456510305404663, "learning_rate": 9.424296288160258e-06, "loss": 0.4995, "step": 4327 }, { "epoch": 0.17967918878665692, "grad_norm": 2.945491075515747, "learning_rate": 9.423983044629592e-06, "loss": 0.533, "step": 4328 }, { "epoch": 0.17972070431086826, "grad_norm": 2.933021306991577, "learning_rate": 9.423669721112036e-06, "loss": 0.5424, "step": 4329 }, { "epoch": 0.1797622198350796, "grad_norm": 3.1507351398468018, "learning_rate": 9.423356317613259e-06, "loss": 0.6299, "step": 4330 }, { "epoch": 0.17980373535929092, "grad_norm": 2.8795430660247803, "learning_rate": 9.423042834138925e-06, "loss": 0.521, "step": 4331 }, { "epoch": 0.17984525088350226, "grad_norm": 2.648249626159668, "learning_rate": 9.4227292706947e-06, "loss": 0.5347, "step": 4332 }, { "epoch": 0.1798867664077136, "grad_norm": 2.629699468612671, "learning_rate": 9.422415627286255e-06, "loss": 0.5693, "step": 4333 }, { "epoch": 0.17992828193192492, "grad_norm": 2.796355962753296, "learning_rate": 9.422101903919262e-06, "loss": 0.5979, "step": 4334 }, { "epoch": 0.17996979745613625, "grad_norm": 2.9329726696014404, "learning_rate": 9.421788100599392e-06, "loss": 0.6157, "step": 4335 }, { "epoch": 0.1800113129803476, "grad_norm": 2.4655656814575195, "learning_rate": 9.421474217332318e-06, "loss": 0.6117, "step": 4336 }, { "epoch": 0.18005282850455892, "grad_norm": 2.531787872314453, "learning_rate": 9.421160254123716e-06, "loss": 0.5542, "step": 4337 }, { "epoch": 0.18009434402877025, "grad_norm": 2.9685723781585693, "learning_rate": 9.420846210979262e-06, "loss": 0.4858, "step": 4338 }, { "epoch": 0.1801358595529816, "grad_norm": 2.8841896057128906, "learning_rate": 9.420532087904635e-06, "loss": 0.5098, "step": 4339 }, { "epoch": 0.18017737507719292, "grad_norm": 2.566267251968384, "learning_rate": 9.420217884905511e-06, "loss": 0.5161, "step": 4340 }, { "epoch": 0.18021889060140425, "grad_norm": 2.6267549991607666, "learning_rate": 9.419903601987577e-06, "loss": 0.5918, "step": 4341 }, { "epoch": 0.18026040612561559, "grad_norm": 2.514328718185425, "learning_rate": 9.41958923915651e-06, "loss": 0.5416, "step": 4342 }, { "epoch": 0.18030192164982692, "grad_norm": 2.8854146003723145, "learning_rate": 9.419274796417996e-06, "loss": 0.6562, "step": 4343 }, { "epoch": 0.18034343717403828, "grad_norm": 2.348456621170044, "learning_rate": 9.418960273777721e-06, "loss": 0.6136, "step": 4344 }, { "epoch": 0.1803849526982496, "grad_norm": 2.7219009399414062, "learning_rate": 9.41864567124137e-06, "loss": 0.5664, "step": 4345 }, { "epoch": 0.18042646822246095, "grad_norm": 2.6299407482147217, "learning_rate": 9.418330988814633e-06, "loss": 0.422, "step": 4346 }, { "epoch": 0.18046798374667228, "grad_norm": 2.405143976211548, "learning_rate": 9.418016226503196e-06, "loss": 0.3838, "step": 4347 }, { "epoch": 0.1805094992708836, "grad_norm": 2.936891794204712, "learning_rate": 9.417701384312752e-06, "loss": 0.5406, "step": 4348 }, { "epoch": 0.18055101479509494, "grad_norm": 2.7090816497802734, "learning_rate": 9.417386462248996e-06, "loss": 0.5729, "step": 4349 }, { "epoch": 0.18059253031930628, "grad_norm": 2.6826841831207275, "learning_rate": 9.417071460317616e-06, "loss": 0.5586, "step": 4350 }, { "epoch": 0.1806340458435176, "grad_norm": 2.3114778995513916, "learning_rate": 9.416756378524314e-06, "loss": 0.4441, "step": 4351 }, { "epoch": 0.18067556136772894, "grad_norm": 3.0775742530822754, "learning_rate": 9.416441216874782e-06, "loss": 0.5491, "step": 4352 }, { "epoch": 0.18071707689194028, "grad_norm": 2.416973114013672, "learning_rate": 9.416125975374722e-06, "loss": 0.5011, "step": 4353 }, { "epoch": 0.1807585924161516, "grad_norm": 2.604342460632324, "learning_rate": 9.415810654029829e-06, "loss": 0.4827, "step": 4354 }, { "epoch": 0.18080010794036294, "grad_norm": 2.782111883163452, "learning_rate": 9.415495252845807e-06, "loss": 0.507, "step": 4355 }, { "epoch": 0.18084162346457427, "grad_norm": 3.2901124954223633, "learning_rate": 9.41517977182836e-06, "loss": 0.4849, "step": 4356 }, { "epoch": 0.1808831389887856, "grad_norm": 2.4146082401275635, "learning_rate": 9.414864210983188e-06, "loss": 0.6164, "step": 4357 }, { "epoch": 0.18092465451299694, "grad_norm": 2.271829605102539, "learning_rate": 9.414548570316e-06, "loss": 0.5377, "step": 4358 }, { "epoch": 0.1809661700372083, "grad_norm": 2.381669521331787, "learning_rate": 9.414232849832501e-06, "loss": 0.5217, "step": 4359 }, { "epoch": 0.18100768556141963, "grad_norm": 2.5879733562469482, "learning_rate": 9.4139170495384e-06, "loss": 0.5629, "step": 4360 }, { "epoch": 0.18104920108563097, "grad_norm": 2.621779203414917, "learning_rate": 9.413601169439405e-06, "loss": 0.5336, "step": 4361 }, { "epoch": 0.1810907166098423, "grad_norm": 2.309272289276123, "learning_rate": 9.41328520954123e-06, "loss": 0.6523, "step": 4362 }, { "epoch": 0.18113223213405363, "grad_norm": 2.7110722064971924, "learning_rate": 9.412969169849586e-06, "loss": 0.468, "step": 4363 }, { "epoch": 0.18117374765826497, "grad_norm": 2.063260316848755, "learning_rate": 9.412653050370188e-06, "loss": 0.5912, "step": 4364 }, { "epoch": 0.1812152631824763, "grad_norm": 2.6420180797576904, "learning_rate": 9.412336851108751e-06, "loss": 0.435, "step": 4365 }, { "epoch": 0.18125677870668763, "grad_norm": 2.493206024169922, "learning_rate": 9.412020572070991e-06, "loss": 0.5241, "step": 4366 }, { "epoch": 0.18129829423089897, "grad_norm": 2.394806385040283, "learning_rate": 9.41170421326263e-06, "loss": 0.4773, "step": 4367 }, { "epoch": 0.1813398097551103, "grad_norm": 2.3189032077789307, "learning_rate": 9.411387774689382e-06, "loss": 0.4724, "step": 4368 }, { "epoch": 0.18138132527932163, "grad_norm": 2.229254722595215, "learning_rate": 9.411071256356972e-06, "loss": 0.5271, "step": 4369 }, { "epoch": 0.18142284080353296, "grad_norm": 3.4323856830596924, "learning_rate": 9.410754658271124e-06, "loss": 0.699, "step": 4370 }, { "epoch": 0.1814643563277443, "grad_norm": 2.442852735519409, "learning_rate": 9.410437980437558e-06, "loss": 0.4579, "step": 4371 }, { "epoch": 0.18150587185195563, "grad_norm": 2.722261428833008, "learning_rate": 9.410121222862006e-06, "loss": 0.5368, "step": 4372 }, { "epoch": 0.18154738737616696, "grad_norm": 2.094945192337036, "learning_rate": 9.409804385550187e-06, "loss": 0.4259, "step": 4373 }, { "epoch": 0.18158890290037832, "grad_norm": 3.2964823246002197, "learning_rate": 9.409487468507836e-06, "loss": 0.5132, "step": 4374 }, { "epoch": 0.18163041842458966, "grad_norm": 2.4819061756134033, "learning_rate": 9.409170471740681e-06, "loss": 0.4703, "step": 4375 }, { "epoch": 0.181671933948801, "grad_norm": 2.2687535285949707, "learning_rate": 9.40885339525445e-06, "loss": 0.4694, "step": 4376 }, { "epoch": 0.18171344947301232, "grad_norm": 2.666712760925293, "learning_rate": 9.408536239054881e-06, "loss": 0.4966, "step": 4377 }, { "epoch": 0.18175496499722366, "grad_norm": 2.3508243560791016, "learning_rate": 9.408219003147708e-06, "loss": 0.433, "step": 4378 }, { "epoch": 0.181796480521435, "grad_norm": 2.495185375213623, "learning_rate": 9.40790168753866e-06, "loss": 0.587, "step": 4379 }, { "epoch": 0.18183799604564632, "grad_norm": 2.2034947872161865, "learning_rate": 9.407584292233484e-06, "loss": 0.3884, "step": 4380 }, { "epoch": 0.18187951156985765, "grad_norm": 2.267298936843872, "learning_rate": 9.40726681723791e-06, "loss": 0.5662, "step": 4381 }, { "epoch": 0.181921027094069, "grad_norm": 2.5781099796295166, "learning_rate": 9.406949262557685e-06, "loss": 0.4447, "step": 4382 }, { "epoch": 0.18196254261828032, "grad_norm": 2.2114298343658447, "learning_rate": 9.406631628198543e-06, "loss": 0.4845, "step": 4383 }, { "epoch": 0.18200405814249165, "grad_norm": 2.813520669937134, "learning_rate": 9.406313914166235e-06, "loss": 0.6397, "step": 4384 }, { "epoch": 0.182045573666703, "grad_norm": 3.0793845653533936, "learning_rate": 9.405996120466498e-06, "loss": 0.6117, "step": 4385 }, { "epoch": 0.18208708919091432, "grad_norm": 2.534031867980957, "learning_rate": 9.405678247105083e-06, "loss": 0.5205, "step": 4386 }, { "epoch": 0.18212860471512565, "grad_norm": 2.6671595573425293, "learning_rate": 9.405360294087736e-06, "loss": 0.4766, "step": 4387 }, { "epoch": 0.18217012023933699, "grad_norm": 2.4883010387420654, "learning_rate": 9.405042261420206e-06, "loss": 0.4953, "step": 4388 }, { "epoch": 0.18221163576354832, "grad_norm": 2.69244122505188, "learning_rate": 9.40472414910824e-06, "loss": 0.5112, "step": 4389 }, { "epoch": 0.18225315128775968, "grad_norm": 2.8758347034454346, "learning_rate": 9.404405957157593e-06, "loss": 0.5977, "step": 4390 }, { "epoch": 0.182294666811971, "grad_norm": 2.61395263671875, "learning_rate": 9.404087685574015e-06, "loss": 0.5502, "step": 4391 }, { "epoch": 0.18233618233618235, "grad_norm": 2.9216148853302, "learning_rate": 9.403769334363264e-06, "loss": 0.4851, "step": 4392 }, { "epoch": 0.18237769786039368, "grad_norm": 2.546370506286621, "learning_rate": 9.403450903531092e-06, "loss": 0.4836, "step": 4393 }, { "epoch": 0.182419213384605, "grad_norm": 2.3861875534057617, "learning_rate": 9.403132393083262e-06, "loss": 0.5755, "step": 4394 }, { "epoch": 0.18246072890881634, "grad_norm": 3.001607656478882, "learning_rate": 9.402813803025526e-06, "loss": 0.535, "step": 4395 }, { "epoch": 0.18250224443302768, "grad_norm": 2.541919231414795, "learning_rate": 9.40249513336365e-06, "loss": 0.5395, "step": 4396 }, { "epoch": 0.182543759957239, "grad_norm": 2.8568994998931885, "learning_rate": 9.40217638410339e-06, "loss": 0.5508, "step": 4397 }, { "epoch": 0.18258527548145034, "grad_norm": 2.301224946975708, "learning_rate": 9.401857555250515e-06, "loss": 0.4803, "step": 4398 }, { "epoch": 0.18262679100566168, "grad_norm": 2.70151424407959, "learning_rate": 9.401538646810784e-06, "loss": 0.3888, "step": 4399 }, { "epoch": 0.182668306529873, "grad_norm": 2.4186787605285645, "learning_rate": 9.401219658789969e-06, "loss": 0.3853, "step": 4400 }, { "epoch": 0.18270982205408434, "grad_norm": 2.77002215385437, "learning_rate": 9.400900591193832e-06, "loss": 0.5722, "step": 4401 }, { "epoch": 0.18275133757829568, "grad_norm": 2.7096030712127686, "learning_rate": 9.400581444028143e-06, "loss": 0.5969, "step": 4402 }, { "epoch": 0.182792853102507, "grad_norm": 2.4789509773254395, "learning_rate": 9.400262217298674e-06, "loss": 0.6464, "step": 4403 }, { "epoch": 0.18283436862671834, "grad_norm": 2.567045211791992, "learning_rate": 9.399942911011197e-06, "loss": 0.5307, "step": 4404 }, { "epoch": 0.1828758841509297, "grad_norm": 2.896214485168457, "learning_rate": 9.399623525171481e-06, "loss": 0.5542, "step": 4405 }, { "epoch": 0.18291739967514103, "grad_norm": 3.0753121376037598, "learning_rate": 9.399304059785306e-06, "loss": 0.5636, "step": 4406 }, { "epoch": 0.18295891519935237, "grad_norm": 1.9616382122039795, "learning_rate": 9.398984514858446e-06, "loss": 0.4843, "step": 4407 }, { "epoch": 0.1830004307235637, "grad_norm": 2.9519267082214355, "learning_rate": 9.398664890396675e-06, "loss": 0.6857, "step": 4408 }, { "epoch": 0.18304194624777503, "grad_norm": 2.4431374073028564, "learning_rate": 9.398345186405778e-06, "loss": 0.5889, "step": 4409 }, { "epoch": 0.18308346177198637, "grad_norm": 4.145503044128418, "learning_rate": 9.398025402891532e-06, "loss": 0.642, "step": 4410 }, { "epoch": 0.1831249772961977, "grad_norm": 2.1517953872680664, "learning_rate": 9.397705539859719e-06, "loss": 0.4162, "step": 4411 }, { "epoch": 0.18316649282040903, "grad_norm": 2.6296334266662598, "learning_rate": 9.397385597316121e-06, "loss": 0.6023, "step": 4412 }, { "epoch": 0.18320800834462037, "grad_norm": 3.454572916030884, "learning_rate": 9.397065575266524e-06, "loss": 0.4873, "step": 4413 }, { "epoch": 0.1832495238688317, "grad_norm": 2.1205928325653076, "learning_rate": 9.396745473716716e-06, "loss": 0.5959, "step": 4414 }, { "epoch": 0.18329103939304303, "grad_norm": 2.318739652633667, "learning_rate": 9.396425292672479e-06, "loss": 0.5291, "step": 4415 }, { "epoch": 0.18333255491725436, "grad_norm": 2.6216609477996826, "learning_rate": 9.396105032139608e-06, "loss": 0.5468, "step": 4416 }, { "epoch": 0.1833740704414657, "grad_norm": 2.4964210987091064, "learning_rate": 9.395784692123891e-06, "loss": 0.4301, "step": 4417 }, { "epoch": 0.18341558596567703, "grad_norm": 2.32943058013916, "learning_rate": 9.39546427263112e-06, "loss": 0.5537, "step": 4418 }, { "epoch": 0.18345710148988836, "grad_norm": 3.29105544090271, "learning_rate": 9.395143773667089e-06, "loss": 0.6382, "step": 4419 }, { "epoch": 0.1834986170140997, "grad_norm": 2.388831615447998, "learning_rate": 9.394823195237587e-06, "loss": 0.501, "step": 4420 }, { "epoch": 0.18354013253831106, "grad_norm": 2.318760395050049, "learning_rate": 9.39450253734842e-06, "loss": 0.4307, "step": 4421 }, { "epoch": 0.1835816480625224, "grad_norm": 2.3547279834747314, "learning_rate": 9.394181800005378e-06, "loss": 0.4795, "step": 4422 }, { "epoch": 0.18362316358673372, "grad_norm": 2.8440065383911133, "learning_rate": 9.393860983214263e-06, "loss": 0.5073, "step": 4423 }, { "epoch": 0.18366467911094506, "grad_norm": 2.640002727508545, "learning_rate": 9.393540086980875e-06, "loss": 0.5189, "step": 4424 }, { "epoch": 0.1837061946351564, "grad_norm": 3.303764820098877, "learning_rate": 9.393219111311016e-06, "loss": 0.5276, "step": 4425 }, { "epoch": 0.18374771015936772, "grad_norm": 2.550987720489502, "learning_rate": 9.39289805621049e-06, "loss": 0.4451, "step": 4426 }, { "epoch": 0.18378922568357905, "grad_norm": 2.5583512783050537, "learning_rate": 9.3925769216851e-06, "loss": 0.5559, "step": 4427 }, { "epoch": 0.1838307412077904, "grad_norm": 2.503889322280884, "learning_rate": 9.392255707740653e-06, "loss": 0.3817, "step": 4428 }, { "epoch": 0.18387225673200172, "grad_norm": 2.4459962844848633, "learning_rate": 9.391934414382957e-06, "loss": 0.6761, "step": 4429 }, { "epoch": 0.18391377225621305, "grad_norm": 2.287050247192383, "learning_rate": 9.391613041617823e-06, "loss": 0.4787, "step": 4430 }, { "epoch": 0.1839552877804244, "grad_norm": 2.7740681171417236, "learning_rate": 9.391291589451056e-06, "loss": 0.6152, "step": 4431 }, { "epoch": 0.18399680330463572, "grad_norm": 2.565485715866089, "learning_rate": 9.390970057888473e-06, "loss": 0.5571, "step": 4432 }, { "epoch": 0.18403831882884705, "grad_norm": 3.442650079727173, "learning_rate": 9.390648446935885e-06, "loss": 0.5282, "step": 4433 }, { "epoch": 0.18407983435305839, "grad_norm": 2.9271137714385986, "learning_rate": 9.390326756599109e-06, "loss": 0.5141, "step": 4434 }, { "epoch": 0.18412134987726972, "grad_norm": 2.753206253051758, "learning_rate": 9.390004986883958e-06, "loss": 0.5833, "step": 4435 }, { "epoch": 0.18416286540148108, "grad_norm": 2.8416855335235596, "learning_rate": 9.389683137796253e-06, "loss": 0.4632, "step": 4436 }, { "epoch": 0.1842043809256924, "grad_norm": 2.664428472518921, "learning_rate": 9.38936120934181e-06, "loss": 0.4924, "step": 4437 }, { "epoch": 0.18424589644990375, "grad_norm": 2.634916067123413, "learning_rate": 9.389039201526453e-06, "loss": 0.5738, "step": 4438 }, { "epoch": 0.18428741197411508, "grad_norm": 2.701626777648926, "learning_rate": 9.388717114356e-06, "loss": 0.6514, "step": 4439 }, { "epoch": 0.1843289274983264, "grad_norm": 2.724335193634033, "learning_rate": 9.388394947836278e-06, "loss": 0.5761, "step": 4440 }, { "epoch": 0.18437044302253774, "grad_norm": 2.7143986225128174, "learning_rate": 9.388072701973113e-06, "loss": 0.4563, "step": 4441 }, { "epoch": 0.18441195854674908, "grad_norm": 2.489041328430176, "learning_rate": 9.387750376772325e-06, "loss": 0.5335, "step": 4442 }, { "epoch": 0.1844534740709604, "grad_norm": 2.0654025077819824, "learning_rate": 9.387427972239746e-06, "loss": 0.4404, "step": 4443 }, { "epoch": 0.18449498959517174, "grad_norm": 2.4240615367889404, "learning_rate": 9.387105488381205e-06, "loss": 0.5298, "step": 4444 }, { "epoch": 0.18453650511938308, "grad_norm": 2.246776819229126, "learning_rate": 9.386782925202533e-06, "loss": 0.4955, "step": 4445 }, { "epoch": 0.1845780206435944, "grad_norm": 2.469913959503174, "learning_rate": 9.386460282709562e-06, "loss": 0.5094, "step": 4446 }, { "epoch": 0.18461953616780574, "grad_norm": 2.3603012561798096, "learning_rate": 9.386137560908122e-06, "loss": 0.5428, "step": 4447 }, { "epoch": 0.18466105169201708, "grad_norm": 2.9422519207000732, "learning_rate": 9.385814759804053e-06, "loss": 0.577, "step": 4448 }, { "epoch": 0.1847025672162284, "grad_norm": 2.219555139541626, "learning_rate": 9.38549187940319e-06, "loss": 0.4026, "step": 4449 }, { "epoch": 0.18474408274043974, "grad_norm": 3.155349016189575, "learning_rate": 9.385168919711368e-06, "loss": 0.6565, "step": 4450 }, { "epoch": 0.18478559826465107, "grad_norm": 2.35839581489563, "learning_rate": 9.384845880734428e-06, "loss": 0.5346, "step": 4451 }, { "epoch": 0.18482711378886243, "grad_norm": 3.49277400970459, "learning_rate": 9.38452276247821e-06, "loss": 0.5805, "step": 4452 }, { "epoch": 0.18486862931307377, "grad_norm": 2.5474958419799805, "learning_rate": 9.384199564948558e-06, "loss": 0.5191, "step": 4453 }, { "epoch": 0.1849101448372851, "grad_norm": 2.5235276222229004, "learning_rate": 9.383876288151314e-06, "loss": 0.5905, "step": 4454 }, { "epoch": 0.18495166036149643, "grad_norm": 2.961205244064331, "learning_rate": 9.383552932092323e-06, "loss": 0.6231, "step": 4455 }, { "epoch": 0.18499317588570777, "grad_norm": 3.0650315284729004, "learning_rate": 9.38322949677743e-06, "loss": 0.5166, "step": 4456 }, { "epoch": 0.1850346914099191, "grad_norm": 2.393554925918579, "learning_rate": 9.382905982212487e-06, "loss": 0.514, "step": 4457 }, { "epoch": 0.18507620693413043, "grad_norm": 2.4454116821289062, "learning_rate": 9.382582388403339e-06, "loss": 0.3515, "step": 4458 }, { "epoch": 0.18511772245834177, "grad_norm": 2.630387544631958, "learning_rate": 9.382258715355838e-06, "loss": 0.5219, "step": 4459 }, { "epoch": 0.1851592379825531, "grad_norm": 2.259204626083374, "learning_rate": 9.381934963075836e-06, "loss": 0.4966, "step": 4460 }, { "epoch": 0.18520075350676443, "grad_norm": 2.619605541229248, "learning_rate": 9.381611131569187e-06, "loss": 0.549, "step": 4461 }, { "epoch": 0.18524226903097576, "grad_norm": 2.1723814010620117, "learning_rate": 9.381287220841748e-06, "loss": 0.4655, "step": 4462 }, { "epoch": 0.1852837845551871, "grad_norm": 2.857656240463257, "learning_rate": 9.380963230899371e-06, "loss": 0.539, "step": 4463 }, { "epoch": 0.18532530007939843, "grad_norm": 2.426715612411499, "learning_rate": 9.380639161747917e-06, "loss": 0.5833, "step": 4464 }, { "epoch": 0.18536681560360976, "grad_norm": 2.365940809249878, "learning_rate": 9.380315013393244e-06, "loss": 0.5663, "step": 4465 }, { "epoch": 0.1854083311278211, "grad_norm": 2.3464741706848145, "learning_rate": 9.379990785841215e-06, "loss": 0.4862, "step": 4466 }, { "epoch": 0.18544984665203246, "grad_norm": 2.62214732170105, "learning_rate": 9.379666479097688e-06, "loss": 0.5956, "step": 4467 }, { "epoch": 0.1854913621762438, "grad_norm": 2.6733226776123047, "learning_rate": 9.379342093168528e-06, "loss": 0.4888, "step": 4468 }, { "epoch": 0.18553287770045512, "grad_norm": 2.2492921352386475, "learning_rate": 9.379017628059602e-06, "loss": 0.5071, "step": 4469 }, { "epoch": 0.18557439322466646, "grad_norm": 2.577357769012451, "learning_rate": 9.378693083776776e-06, "loss": 0.556, "step": 4470 }, { "epoch": 0.1856159087488778, "grad_norm": 2.669509172439575, "learning_rate": 9.378368460325916e-06, "loss": 0.508, "step": 4471 }, { "epoch": 0.18565742427308912, "grad_norm": 2.3488757610321045, "learning_rate": 9.378043757712891e-06, "loss": 0.5179, "step": 4472 }, { "epoch": 0.18569893979730046, "grad_norm": 2.7584996223449707, "learning_rate": 9.377718975943573e-06, "loss": 0.6161, "step": 4473 }, { "epoch": 0.1857404553215118, "grad_norm": 2.4579968452453613, "learning_rate": 9.377394115023836e-06, "loss": 0.5266, "step": 4474 }, { "epoch": 0.18578197084572312, "grad_norm": 3.102928876876831, "learning_rate": 9.37706917495955e-06, "loss": 0.5657, "step": 4475 }, { "epoch": 0.18582348636993445, "grad_norm": 2.440269947052002, "learning_rate": 9.376744155756592e-06, "loss": 0.5136, "step": 4476 }, { "epoch": 0.1858650018941458, "grad_norm": 2.526036024093628, "learning_rate": 9.37641905742084e-06, "loss": 0.5979, "step": 4477 }, { "epoch": 0.18590651741835712, "grad_norm": 2.339289426803589, "learning_rate": 9.376093879958167e-06, "loss": 0.5571, "step": 4478 }, { "epoch": 0.18594803294256845, "grad_norm": 2.808821678161621, "learning_rate": 9.375768623374458e-06, "loss": 0.6602, "step": 4479 }, { "epoch": 0.18598954846677979, "grad_norm": 2.7442328929901123, "learning_rate": 9.37544328767559e-06, "loss": 0.5818, "step": 4480 }, { "epoch": 0.18603106399099112, "grad_norm": 2.8050050735473633, "learning_rate": 9.375117872867448e-06, "loss": 0.5085, "step": 4481 }, { "epoch": 0.18607257951520248, "grad_norm": 2.942373037338257, "learning_rate": 9.37479237895591e-06, "loss": 0.6193, "step": 4482 }, { "epoch": 0.1861140950394138, "grad_norm": 2.5740456581115723, "learning_rate": 9.374466805946867e-06, "loss": 0.5053, "step": 4483 }, { "epoch": 0.18615561056362515, "grad_norm": 2.390184164047241, "learning_rate": 9.374141153846204e-06, "loss": 0.7284, "step": 4484 }, { "epoch": 0.18619712608783648, "grad_norm": 2.815153121948242, "learning_rate": 9.373815422659806e-06, "loss": 0.5047, "step": 4485 }, { "epoch": 0.1862386416120478, "grad_norm": 2.8135106563568115, "learning_rate": 9.373489612393566e-06, "loss": 0.6381, "step": 4486 }, { "epoch": 0.18628015713625914, "grad_norm": 3.1027567386627197, "learning_rate": 9.373163723053374e-06, "loss": 0.7033, "step": 4487 }, { "epoch": 0.18632167266047048, "grad_norm": 2.9876763820648193, "learning_rate": 9.372837754645121e-06, "loss": 0.7648, "step": 4488 }, { "epoch": 0.1863631881846818, "grad_norm": 2.554386615753174, "learning_rate": 9.372511707174701e-06, "loss": 0.597, "step": 4489 }, { "epoch": 0.18640470370889314, "grad_norm": 2.7441611289978027, "learning_rate": 9.372185580648008e-06, "loss": 0.5118, "step": 4490 }, { "epoch": 0.18644621923310448, "grad_norm": 2.8398373126983643, "learning_rate": 9.37185937507094e-06, "loss": 0.3999, "step": 4491 }, { "epoch": 0.1864877347573158, "grad_norm": 2.5478875637054443, "learning_rate": 9.371533090449394e-06, "loss": 0.5096, "step": 4492 }, { "epoch": 0.18652925028152714, "grad_norm": 2.678253650665283, "learning_rate": 9.37120672678927e-06, "loss": 0.4895, "step": 4493 }, { "epoch": 0.18657076580573848, "grad_norm": 2.6795105934143066, "learning_rate": 9.370880284096469e-06, "loss": 0.5033, "step": 4494 }, { "epoch": 0.1866122813299498, "grad_norm": 2.6363003253936768, "learning_rate": 9.370553762376893e-06, "loss": 0.4697, "step": 4495 }, { "epoch": 0.18665379685416114, "grad_norm": 2.610764980316162, "learning_rate": 9.370227161636446e-06, "loss": 0.3841, "step": 4496 }, { "epoch": 0.18669531237837247, "grad_norm": 2.337639570236206, "learning_rate": 9.36990048188103e-06, "loss": 0.5066, "step": 4497 }, { "epoch": 0.18673682790258384, "grad_norm": 2.7633414268493652, "learning_rate": 9.369573723116554e-06, "loss": 0.6092, "step": 4498 }, { "epoch": 0.18677834342679517, "grad_norm": 2.7181215286254883, "learning_rate": 9.369246885348926e-06, "loss": 0.4552, "step": 4499 }, { "epoch": 0.1868198589510065, "grad_norm": 3.0281057357788086, "learning_rate": 9.368919968584055e-06, "loss": 0.5753, "step": 4500 }, { "epoch": 0.18686137447521783, "grad_norm": 3.136042594909668, "learning_rate": 9.368592972827852e-06, "loss": 0.6972, "step": 4501 }, { "epoch": 0.18690288999942917, "grad_norm": 2.898810863494873, "learning_rate": 9.368265898086229e-06, "loss": 0.5744, "step": 4502 }, { "epoch": 0.1869444055236405, "grad_norm": 2.7617383003234863, "learning_rate": 9.3679387443651e-06, "loss": 0.5123, "step": 4503 }, { "epoch": 0.18698592104785183, "grad_norm": 2.4942564964294434, "learning_rate": 9.367611511670377e-06, "loss": 0.5671, "step": 4504 }, { "epoch": 0.18702743657206317, "grad_norm": 2.83347749710083, "learning_rate": 9.367284200007981e-06, "loss": 0.7758, "step": 4505 }, { "epoch": 0.1870689520962745, "grad_norm": 2.413118600845337, "learning_rate": 9.366956809383829e-06, "loss": 0.5306, "step": 4506 }, { "epoch": 0.18711046762048583, "grad_norm": 2.8240575790405273, "learning_rate": 9.366629339803837e-06, "loss": 0.6445, "step": 4507 }, { "epoch": 0.18715198314469716, "grad_norm": 2.813812017440796, "learning_rate": 9.366301791273929e-06, "loss": 0.7189, "step": 4508 }, { "epoch": 0.1871934986689085, "grad_norm": 3.2571558952331543, "learning_rate": 9.365974163800024e-06, "loss": 0.69, "step": 4509 }, { "epoch": 0.18723501419311983, "grad_norm": 3.0031492710113525, "learning_rate": 9.36564645738805e-06, "loss": 0.3717, "step": 4510 }, { "epoch": 0.18727652971733116, "grad_norm": 2.3741815090179443, "learning_rate": 9.36531867204393e-06, "loss": 0.3678, "step": 4511 }, { "epoch": 0.1873180452415425, "grad_norm": 2.883523941040039, "learning_rate": 9.36499080777359e-06, "loss": 0.5243, "step": 4512 }, { "epoch": 0.18735956076575386, "grad_norm": 2.406787872314453, "learning_rate": 9.364662864582958e-06, "loss": 0.4561, "step": 4513 }, { "epoch": 0.1874010762899652, "grad_norm": 2.0099637508392334, "learning_rate": 9.364334842477962e-06, "loss": 0.4151, "step": 4514 }, { "epoch": 0.18744259181417652, "grad_norm": 2.479633331298828, "learning_rate": 9.364006741464536e-06, "loss": 0.4894, "step": 4515 }, { "epoch": 0.18748410733838786, "grad_norm": 2.4278268814086914, "learning_rate": 9.363678561548608e-06, "loss": 0.3829, "step": 4516 }, { "epoch": 0.1875256228625992, "grad_norm": 3.727590799331665, "learning_rate": 9.363350302736117e-06, "loss": 0.554, "step": 4517 }, { "epoch": 0.18756713838681052, "grad_norm": 2.3440964221954346, "learning_rate": 9.363021965032993e-06, "loss": 0.5414, "step": 4518 }, { "epoch": 0.18760865391102186, "grad_norm": 2.8375418186187744, "learning_rate": 9.362693548445173e-06, "loss": 0.5489, "step": 4519 }, { "epoch": 0.1876501694352332, "grad_norm": 2.7595722675323486, "learning_rate": 9.362365052978599e-06, "loss": 0.5902, "step": 4520 }, { "epoch": 0.18769168495944452, "grad_norm": 2.667578935623169, "learning_rate": 9.362036478639206e-06, "loss": 0.5252, "step": 4521 }, { "epoch": 0.18773320048365585, "grad_norm": 2.318077325820923, "learning_rate": 9.361707825432936e-06, "loss": 0.4278, "step": 4522 }, { "epoch": 0.1877747160078672, "grad_norm": 2.870944023132324, "learning_rate": 9.36137909336573e-06, "loss": 0.5248, "step": 4523 }, { "epoch": 0.18781623153207852, "grad_norm": 2.7566943168640137, "learning_rate": 9.361050282443535e-06, "loss": 0.479, "step": 4524 }, { "epoch": 0.18785774705628985, "grad_norm": 2.5616631507873535, "learning_rate": 9.360721392672293e-06, "loss": 0.7288, "step": 4525 }, { "epoch": 0.1878992625805012, "grad_norm": 2.5776071548461914, "learning_rate": 9.360392424057952e-06, "loss": 0.5421, "step": 4526 }, { "epoch": 0.18794077810471252, "grad_norm": 2.7643373012542725, "learning_rate": 9.360063376606458e-06, "loss": 0.55, "step": 4527 }, { "epoch": 0.18798229362892385, "grad_norm": 2.4403481483459473, "learning_rate": 9.359734250323765e-06, "loss": 0.4221, "step": 4528 }, { "epoch": 0.1880238091531352, "grad_norm": 3.5650010108947754, "learning_rate": 9.359405045215816e-06, "loss": 0.6088, "step": 4529 }, { "epoch": 0.18806532467734655, "grad_norm": 2.7674520015716553, "learning_rate": 9.35907576128857e-06, "loss": 0.6271, "step": 4530 }, { "epoch": 0.18810684020155788, "grad_norm": 2.5839896202087402, "learning_rate": 9.358746398547975e-06, "loss": 0.5982, "step": 4531 }, { "epoch": 0.1881483557257692, "grad_norm": 2.5728814601898193, "learning_rate": 9.358416956999991e-06, "loss": 0.5363, "step": 4532 }, { "epoch": 0.18818987124998054, "grad_norm": 2.7378551959991455, "learning_rate": 9.358087436650572e-06, "loss": 0.6188, "step": 4533 }, { "epoch": 0.18823138677419188, "grad_norm": 2.683126211166382, "learning_rate": 9.357757837505674e-06, "loss": 0.6241, "step": 4534 }, { "epoch": 0.1882729022984032, "grad_norm": 2.4596002101898193, "learning_rate": 9.35742815957126e-06, "loss": 0.561, "step": 4535 }, { "epoch": 0.18831441782261454, "grad_norm": 2.9951820373535156, "learning_rate": 9.357098402853288e-06, "loss": 0.509, "step": 4536 }, { "epoch": 0.18835593334682588, "grad_norm": 2.595907688140869, "learning_rate": 9.356768567357724e-06, "loss": 0.503, "step": 4537 }, { "epoch": 0.1883974488710372, "grad_norm": 2.498990535736084, "learning_rate": 9.356438653090525e-06, "loss": 0.5424, "step": 4538 }, { "epoch": 0.18843896439524854, "grad_norm": 2.8982555866241455, "learning_rate": 9.356108660057662e-06, "loss": 0.6494, "step": 4539 }, { "epoch": 0.18848047991945988, "grad_norm": 2.7245523929595947, "learning_rate": 9.355778588265096e-06, "loss": 0.4827, "step": 4540 }, { "epoch": 0.1885219954436712, "grad_norm": 2.5373740196228027, "learning_rate": 9.355448437718799e-06, "loss": 0.5814, "step": 4541 }, { "epoch": 0.18856351096788254, "grad_norm": 2.520807981491089, "learning_rate": 9.355118208424741e-06, "loss": 0.5185, "step": 4542 }, { "epoch": 0.18860502649209387, "grad_norm": 2.4190478324890137, "learning_rate": 9.35478790038889e-06, "loss": 0.4909, "step": 4543 }, { "epoch": 0.18864654201630524, "grad_norm": 2.3307697772979736, "learning_rate": 9.354457513617215e-06, "loss": 0.5768, "step": 4544 }, { "epoch": 0.18868805754051657, "grad_norm": 2.732412576675415, "learning_rate": 9.354127048115696e-06, "loss": 0.5496, "step": 4545 }, { "epoch": 0.1887295730647279, "grad_norm": 2.659501791000366, "learning_rate": 9.353796503890304e-06, "loss": 0.4315, "step": 4546 }, { "epoch": 0.18877108858893923, "grad_norm": 2.419663429260254, "learning_rate": 9.353465880947017e-06, "loss": 0.451, "step": 4547 }, { "epoch": 0.18881260411315057, "grad_norm": 2.9420015811920166, "learning_rate": 9.35313517929181e-06, "loss": 0.5305, "step": 4548 }, { "epoch": 0.1888541196373619, "grad_norm": 2.450622081756592, "learning_rate": 9.352804398930667e-06, "loss": 0.4514, "step": 4549 }, { "epoch": 0.18889563516157323, "grad_norm": 2.274717092514038, "learning_rate": 9.352473539869565e-06, "loss": 0.6021, "step": 4550 }, { "epoch": 0.18893715068578457, "grad_norm": 2.857595682144165, "learning_rate": 9.352142602114487e-06, "loss": 0.7812, "step": 4551 }, { "epoch": 0.1889786662099959, "grad_norm": 2.492358922958374, "learning_rate": 9.351811585671415e-06, "loss": 0.5798, "step": 4552 }, { "epoch": 0.18902018173420723, "grad_norm": 2.7956910133361816, "learning_rate": 9.351480490546337e-06, "loss": 0.471, "step": 4553 }, { "epoch": 0.18906169725841857, "grad_norm": 2.542576313018799, "learning_rate": 9.351149316745236e-06, "loss": 0.5357, "step": 4554 }, { "epoch": 0.1891032127826299, "grad_norm": 2.2082221508026123, "learning_rate": 9.350818064274101e-06, "loss": 0.523, "step": 4555 }, { "epoch": 0.18914472830684123, "grad_norm": 2.793848991394043, "learning_rate": 9.350486733138923e-06, "loss": 0.4936, "step": 4556 }, { "epoch": 0.18918624383105256, "grad_norm": 2.9232382774353027, "learning_rate": 9.35015532334569e-06, "loss": 0.5995, "step": 4557 }, { "epoch": 0.1892277593552639, "grad_norm": 3.1552228927612305, "learning_rate": 9.349823834900396e-06, "loss": 0.6024, "step": 4558 }, { "epoch": 0.18926927487947523, "grad_norm": 2.0512096881866455, "learning_rate": 9.349492267809033e-06, "loss": 0.5901, "step": 4559 }, { "epoch": 0.1893107904036866, "grad_norm": 3.4566810131073, "learning_rate": 9.349160622077596e-06, "loss": 0.5495, "step": 4560 }, { "epoch": 0.18935230592789792, "grad_norm": 2.9961917400360107, "learning_rate": 9.34882889771208e-06, "loss": 0.5225, "step": 4561 }, { "epoch": 0.18939382145210926, "grad_norm": 2.411449909210205, "learning_rate": 9.348497094718486e-06, "loss": 0.4019, "step": 4562 }, { "epoch": 0.1894353369763206, "grad_norm": 2.4621200561523438, "learning_rate": 9.348165213102809e-06, "loss": 0.5028, "step": 4563 }, { "epoch": 0.18947685250053192, "grad_norm": 2.4651949405670166, "learning_rate": 9.347833252871052e-06, "loss": 0.4678, "step": 4564 }, { "epoch": 0.18951836802474326, "grad_norm": 2.8304240703582764, "learning_rate": 9.347501214029218e-06, "loss": 0.6931, "step": 4565 }, { "epoch": 0.1895598835489546, "grad_norm": 2.603297233581543, "learning_rate": 9.347169096583309e-06, "loss": 0.4299, "step": 4566 }, { "epoch": 0.18960139907316592, "grad_norm": 2.176635980606079, "learning_rate": 9.34683690053933e-06, "loss": 0.4149, "step": 4567 }, { "epoch": 0.18964291459737725, "grad_norm": 2.516707420349121, "learning_rate": 9.346504625903286e-06, "loss": 0.5818, "step": 4568 }, { "epoch": 0.1896844301215886, "grad_norm": 2.4144017696380615, "learning_rate": 9.346172272681184e-06, "loss": 0.3866, "step": 4569 }, { "epoch": 0.18972594564579992, "grad_norm": 2.0338134765625, "learning_rate": 9.345839840879036e-06, "loss": 0.3629, "step": 4570 }, { "epoch": 0.18976746117001125, "grad_norm": 2.6330149173736572, "learning_rate": 9.345507330502852e-06, "loss": 0.5902, "step": 4571 }, { "epoch": 0.1898089766942226, "grad_norm": 2.484931468963623, "learning_rate": 9.345174741558643e-06, "loss": 0.6297, "step": 4572 }, { "epoch": 0.18985049221843392, "grad_norm": 3.1864585876464844, "learning_rate": 9.34484207405242e-06, "loss": 0.6627, "step": 4573 }, { "epoch": 0.18989200774264525, "grad_norm": 2.293196678161621, "learning_rate": 9.344509327990202e-06, "loss": 0.4368, "step": 4574 }, { "epoch": 0.1899335232668566, "grad_norm": 2.9905588626861572, "learning_rate": 9.344176503378003e-06, "loss": 0.602, "step": 4575 }, { "epoch": 0.18997503879106795, "grad_norm": 2.338797092437744, "learning_rate": 9.34384360022184e-06, "loss": 0.5768, "step": 4576 }, { "epoch": 0.19001655431527928, "grad_norm": 2.4013376235961914, "learning_rate": 9.343510618527734e-06, "loss": 0.5675, "step": 4577 }, { "epoch": 0.1900580698394906, "grad_norm": 3.012587308883667, "learning_rate": 9.343177558301702e-06, "loss": 0.5379, "step": 4578 }, { "epoch": 0.19009958536370195, "grad_norm": 2.897571086883545, "learning_rate": 9.342844419549771e-06, "loss": 0.3835, "step": 4579 }, { "epoch": 0.19014110088791328, "grad_norm": 2.341876983642578, "learning_rate": 9.342511202277957e-06, "loss": 0.5919, "step": 4580 }, { "epoch": 0.1901826164121246, "grad_norm": 2.2174129486083984, "learning_rate": 9.34217790649229e-06, "loss": 0.5045, "step": 4581 }, { "epoch": 0.19022413193633594, "grad_norm": 2.433439016342163, "learning_rate": 9.341844532198798e-06, "loss": 0.4584, "step": 4582 }, { "epoch": 0.19026564746054728, "grad_norm": 2.6703484058380127, "learning_rate": 9.341511079403503e-06, "loss": 0.4976, "step": 4583 }, { "epoch": 0.1903071629847586, "grad_norm": 2.369934320449829, "learning_rate": 9.341177548112437e-06, "loss": 0.4004, "step": 4584 }, { "epoch": 0.19034867850896994, "grad_norm": 2.2616841793060303, "learning_rate": 9.34084393833163e-06, "loss": 0.6315, "step": 4585 }, { "epoch": 0.19039019403318128, "grad_norm": 2.176859140396118, "learning_rate": 9.340510250067112e-06, "loss": 0.4191, "step": 4586 }, { "epoch": 0.1904317095573926, "grad_norm": 2.7010295391082764, "learning_rate": 9.340176483324918e-06, "loss": 0.5089, "step": 4587 }, { "epoch": 0.19047322508160394, "grad_norm": 2.9573726654052734, "learning_rate": 9.339842638111083e-06, "loss": 0.4881, "step": 4588 }, { "epoch": 0.19051474060581527, "grad_norm": 2.7784366607666016, "learning_rate": 9.339508714431641e-06, "loss": 0.6457, "step": 4589 }, { "epoch": 0.19055625613002664, "grad_norm": 2.126801013946533, "learning_rate": 9.33917471229263e-06, "loss": 0.3548, "step": 4590 }, { "epoch": 0.19059777165423797, "grad_norm": 2.458286762237549, "learning_rate": 9.338840631700093e-06, "loss": 0.486, "step": 4591 }, { "epoch": 0.1906392871784493, "grad_norm": 2.694674491882324, "learning_rate": 9.338506472660065e-06, "loss": 0.543, "step": 4592 }, { "epoch": 0.19068080270266063, "grad_norm": 2.249995470046997, "learning_rate": 9.33817223517859e-06, "loss": 0.4917, "step": 4593 }, { "epoch": 0.19072231822687197, "grad_norm": 2.8378400802612305, "learning_rate": 9.337837919261708e-06, "loss": 0.4482, "step": 4594 }, { "epoch": 0.1907638337510833, "grad_norm": 2.286085367202759, "learning_rate": 9.337503524915469e-06, "loss": 0.5457, "step": 4595 }, { "epoch": 0.19080534927529463, "grad_norm": 2.1881330013275146, "learning_rate": 9.337169052145914e-06, "loss": 0.4378, "step": 4596 }, { "epoch": 0.19084686479950597, "grad_norm": 2.5883209705352783, "learning_rate": 9.336834500959093e-06, "loss": 0.5609, "step": 4597 }, { "epoch": 0.1908883803237173, "grad_norm": 2.2758779525756836, "learning_rate": 9.336499871361055e-06, "loss": 0.5347, "step": 4598 }, { "epoch": 0.19092989584792863, "grad_norm": 2.6259591579437256, "learning_rate": 9.336165163357849e-06, "loss": 0.5158, "step": 4599 }, { "epoch": 0.19097141137213997, "grad_norm": 2.531888008117676, "learning_rate": 9.335830376955528e-06, "loss": 0.6393, "step": 4600 }, { "epoch": 0.1910129268963513, "grad_norm": 2.8740837574005127, "learning_rate": 9.335495512160142e-06, "loss": 0.5153, "step": 4601 }, { "epoch": 0.19105444242056263, "grad_norm": 2.407496929168701, "learning_rate": 9.335160568977749e-06, "loss": 0.5469, "step": 4602 }, { "epoch": 0.19109595794477396, "grad_norm": 2.715884208679199, "learning_rate": 9.334825547414403e-06, "loss": 0.6423, "step": 4603 }, { "epoch": 0.1911374734689853, "grad_norm": 2.7214748859405518, "learning_rate": 9.334490447476162e-06, "loss": 0.5335, "step": 4604 }, { "epoch": 0.19117898899319663, "grad_norm": 2.4131152629852295, "learning_rate": 9.334155269169084e-06, "loss": 0.5269, "step": 4605 }, { "epoch": 0.191220504517408, "grad_norm": 3.18208646774292, "learning_rate": 9.33382001249923e-06, "loss": 0.5562, "step": 4606 }, { "epoch": 0.19126202004161932, "grad_norm": 2.327679395675659, "learning_rate": 9.33348467747266e-06, "loss": 0.4541, "step": 4607 }, { "epoch": 0.19130353556583066, "grad_norm": 2.4280574321746826, "learning_rate": 9.333149264095438e-06, "loss": 0.5408, "step": 4608 }, { "epoch": 0.191345051090042, "grad_norm": 3.0425162315368652, "learning_rate": 9.33281377237363e-06, "loss": 0.5351, "step": 4609 }, { "epoch": 0.19138656661425332, "grad_norm": 2.2247374057769775, "learning_rate": 9.332478202313298e-06, "loss": 0.4574, "step": 4610 }, { "epoch": 0.19142808213846466, "grad_norm": 2.3792502880096436, "learning_rate": 9.332142553920513e-06, "loss": 0.5556, "step": 4611 }, { "epoch": 0.191469597662676, "grad_norm": 2.8286166191101074, "learning_rate": 9.331806827201342e-06, "loss": 0.5541, "step": 4612 }, { "epoch": 0.19151111318688732, "grad_norm": 2.2574853897094727, "learning_rate": 9.331471022161852e-06, "loss": 0.433, "step": 4613 }, { "epoch": 0.19155262871109865, "grad_norm": 3.1878607273101807, "learning_rate": 9.331135138808122e-06, "loss": 0.5028, "step": 4614 }, { "epoch": 0.19159414423531, "grad_norm": 2.37416672706604, "learning_rate": 9.330799177146219e-06, "loss": 0.5523, "step": 4615 }, { "epoch": 0.19163565975952132, "grad_norm": 3.1840527057647705, "learning_rate": 9.330463137182217e-06, "loss": 0.3881, "step": 4616 }, { "epoch": 0.19167717528373265, "grad_norm": 2.517378807067871, "learning_rate": 9.330127018922195e-06, "loss": 0.5637, "step": 4617 }, { "epoch": 0.191718690807944, "grad_norm": 2.7649173736572266, "learning_rate": 9.329790822372226e-06, "loss": 0.6486, "step": 4618 }, { "epoch": 0.19176020633215532, "grad_norm": 2.6983094215393066, "learning_rate": 9.329454547538393e-06, "loss": 0.5309, "step": 4619 }, { "epoch": 0.19180172185636665, "grad_norm": 3.0692944526672363, "learning_rate": 9.329118194426773e-06, "loss": 0.6016, "step": 4620 }, { "epoch": 0.191843237380578, "grad_norm": 3.2052109241485596, "learning_rate": 9.328781763043448e-06, "loss": 0.5714, "step": 4621 }, { "epoch": 0.19188475290478935, "grad_norm": 2.521239995956421, "learning_rate": 9.328445253394502e-06, "loss": 0.597, "step": 4622 }, { "epoch": 0.19192626842900068, "grad_norm": 2.6221468448638916, "learning_rate": 9.328108665486019e-06, "loss": 0.5631, "step": 4623 }, { "epoch": 0.191967783953212, "grad_norm": 2.412311553955078, "learning_rate": 9.327771999324082e-06, "loss": 0.5978, "step": 4624 }, { "epoch": 0.19200929947742335, "grad_norm": 2.18772554397583, "learning_rate": 9.327435254914781e-06, "loss": 0.497, "step": 4625 }, { "epoch": 0.19205081500163468, "grad_norm": 2.4045283794403076, "learning_rate": 9.327098432264203e-06, "loss": 0.5013, "step": 4626 }, { "epoch": 0.192092330525846, "grad_norm": 2.214102029800415, "learning_rate": 9.326761531378439e-06, "loss": 0.5164, "step": 4627 }, { "epoch": 0.19213384605005734, "grad_norm": 2.73170804977417, "learning_rate": 9.326424552263577e-06, "loss": 0.6214, "step": 4628 }, { "epoch": 0.19217536157426868, "grad_norm": 2.4061741828918457, "learning_rate": 9.326087494925715e-06, "loss": 0.5426, "step": 4629 }, { "epoch": 0.19221687709848, "grad_norm": 2.436116933822632, "learning_rate": 9.325750359370943e-06, "loss": 0.6206, "step": 4630 }, { "epoch": 0.19225839262269134, "grad_norm": 2.4605629444122314, "learning_rate": 9.325413145605356e-06, "loss": 0.4591, "step": 4631 }, { "epoch": 0.19229990814690268, "grad_norm": 3.3594958782196045, "learning_rate": 9.325075853635054e-06, "loss": 0.4414, "step": 4632 }, { "epoch": 0.192341423671114, "grad_norm": 2.529203414916992, "learning_rate": 9.324738483466133e-06, "loss": 0.5744, "step": 4633 }, { "epoch": 0.19238293919532534, "grad_norm": 2.679771661758423, "learning_rate": 9.324401035104695e-06, "loss": 0.697, "step": 4634 }, { "epoch": 0.19242445471953668, "grad_norm": 1.9620842933654785, "learning_rate": 9.32406350855684e-06, "loss": 0.4315, "step": 4635 }, { "epoch": 0.192465970243748, "grad_norm": 2.687629222869873, "learning_rate": 9.32372590382867e-06, "loss": 0.3834, "step": 4636 }, { "epoch": 0.19250748576795937, "grad_norm": 2.8479228019714355, "learning_rate": 9.323388220926288e-06, "loss": 0.7333, "step": 4637 }, { "epoch": 0.1925490012921707, "grad_norm": 3.4093682765960693, "learning_rate": 9.323050459855802e-06, "loss": 0.5459, "step": 4638 }, { "epoch": 0.19259051681638203, "grad_norm": 2.9592666625976562, "learning_rate": 9.322712620623318e-06, "loss": 0.5295, "step": 4639 }, { "epoch": 0.19263203234059337, "grad_norm": 3.263627767562866, "learning_rate": 9.322374703234945e-06, "loss": 0.492, "step": 4640 }, { "epoch": 0.1926735478648047, "grad_norm": 2.3550665378570557, "learning_rate": 9.32203670769679e-06, "loss": 0.5458, "step": 4641 }, { "epoch": 0.19271506338901603, "grad_norm": 2.3707733154296875, "learning_rate": 9.321698634014966e-06, "loss": 0.4528, "step": 4642 }, { "epoch": 0.19275657891322737, "grad_norm": 2.4313223361968994, "learning_rate": 9.321360482195585e-06, "loss": 0.348, "step": 4643 }, { "epoch": 0.1927980944374387, "grad_norm": 2.9835593700408936, "learning_rate": 9.32102225224476e-06, "loss": 0.437, "step": 4644 }, { "epoch": 0.19283960996165003, "grad_norm": 2.6862223148345947, "learning_rate": 9.32068394416861e-06, "loss": 0.5806, "step": 4645 }, { "epoch": 0.19288112548586137, "grad_norm": 2.521101474761963, "learning_rate": 9.320345557973245e-06, "loss": 0.5214, "step": 4646 }, { "epoch": 0.1929226410100727, "grad_norm": 2.6501574516296387, "learning_rate": 9.32000709366479e-06, "loss": 0.6049, "step": 4647 }, { "epoch": 0.19296415653428403, "grad_norm": 2.6032192707061768, "learning_rate": 9.31966855124936e-06, "loss": 0.368, "step": 4648 }, { "epoch": 0.19300567205849536, "grad_norm": 2.4436581134796143, "learning_rate": 9.319329930733079e-06, "loss": 0.591, "step": 4649 }, { "epoch": 0.1930471875827067, "grad_norm": 2.461439609527588, "learning_rate": 9.318991232122065e-06, "loss": 0.444, "step": 4650 }, { "epoch": 0.19308870310691803, "grad_norm": 2.3419134616851807, "learning_rate": 9.318652455422448e-06, "loss": 0.5033, "step": 4651 }, { "epoch": 0.1931302186311294, "grad_norm": 2.950498104095459, "learning_rate": 9.318313600640347e-06, "loss": 0.6066, "step": 4652 }, { "epoch": 0.19317173415534072, "grad_norm": 3.4697265625, "learning_rate": 9.317974667781895e-06, "loss": 0.5459, "step": 4653 }, { "epoch": 0.19321324967955206, "grad_norm": 2.3298227787017822, "learning_rate": 9.317635656853215e-06, "loss": 0.5645, "step": 4654 }, { "epoch": 0.1932547652037634, "grad_norm": 3.060344696044922, "learning_rate": 9.317296567860436e-06, "loss": 0.5714, "step": 4655 }, { "epoch": 0.19329628072797472, "grad_norm": 3.068554639816284, "learning_rate": 9.316957400809692e-06, "loss": 0.6052, "step": 4656 }, { "epoch": 0.19333779625218606, "grad_norm": 2.2171881198883057, "learning_rate": 9.316618155707113e-06, "loss": 0.4014, "step": 4657 }, { "epoch": 0.1933793117763974, "grad_norm": 2.7203314304351807, "learning_rate": 9.316278832558835e-06, "loss": 0.6793, "step": 4658 }, { "epoch": 0.19342082730060872, "grad_norm": 2.6791393756866455, "learning_rate": 9.31593943137099e-06, "loss": 0.5262, "step": 4659 }, { "epoch": 0.19346234282482005, "grad_norm": 2.505074977874756, "learning_rate": 9.315599952149717e-06, "loss": 0.4411, "step": 4660 }, { "epoch": 0.1935038583490314, "grad_norm": 2.8658297061920166, "learning_rate": 9.315260394901154e-06, "loss": 0.5502, "step": 4661 }, { "epoch": 0.19354537387324272, "grad_norm": 2.2619426250457764, "learning_rate": 9.314920759631438e-06, "loss": 0.644, "step": 4662 }, { "epoch": 0.19358688939745405, "grad_norm": 2.6449477672576904, "learning_rate": 9.31458104634671e-06, "loss": 0.5452, "step": 4663 }, { "epoch": 0.1936284049216654, "grad_norm": 2.3915576934814453, "learning_rate": 9.314241255053117e-06, "loss": 0.5547, "step": 4664 }, { "epoch": 0.19366992044587672, "grad_norm": 2.34835147857666, "learning_rate": 9.313901385756794e-06, "loss": 0.4199, "step": 4665 }, { "epoch": 0.19371143597008805, "grad_norm": 2.662736177444458, "learning_rate": 9.313561438463895e-06, "loss": 0.5336, "step": 4666 }, { "epoch": 0.19375295149429939, "grad_norm": 2.8989410400390625, "learning_rate": 9.313221413180557e-06, "loss": 0.5894, "step": 4667 }, { "epoch": 0.19379446701851075, "grad_norm": 2.4315500259399414, "learning_rate": 9.312881309912936e-06, "loss": 0.5368, "step": 4668 }, { "epoch": 0.19383598254272208, "grad_norm": 2.627312183380127, "learning_rate": 9.312541128667176e-06, "loss": 0.5075, "step": 4669 }, { "epoch": 0.1938774980669334, "grad_norm": 3.0866613388061523, "learning_rate": 9.31220086944943e-06, "loss": 0.4946, "step": 4670 }, { "epoch": 0.19391901359114475, "grad_norm": 2.6437838077545166, "learning_rate": 9.31186053226585e-06, "loss": 0.4267, "step": 4671 }, { "epoch": 0.19396052911535608, "grad_norm": 3.2021660804748535, "learning_rate": 9.311520117122588e-06, "loss": 0.7036, "step": 4672 }, { "epoch": 0.1940020446395674, "grad_norm": 2.575796127319336, "learning_rate": 9.311179624025802e-06, "loss": 0.6647, "step": 4673 }, { "epoch": 0.19404356016377874, "grad_norm": 2.9562203884124756, "learning_rate": 9.310839052981642e-06, "loss": 0.5441, "step": 4674 }, { "epoch": 0.19408507568799008, "grad_norm": 3.0737369060516357, "learning_rate": 9.310498403996272e-06, "loss": 0.4089, "step": 4675 }, { "epoch": 0.1941265912122014, "grad_norm": 2.5158355236053467, "learning_rate": 9.310157677075847e-06, "loss": 0.3988, "step": 4676 }, { "epoch": 0.19416810673641274, "grad_norm": 2.482024908065796, "learning_rate": 9.309816872226528e-06, "loss": 0.5588, "step": 4677 }, { "epoch": 0.19420962226062408, "grad_norm": 2.4433071613311768, "learning_rate": 9.309475989454481e-06, "loss": 0.6196, "step": 4678 }, { "epoch": 0.1942511377848354, "grad_norm": 3.0682034492492676, "learning_rate": 9.309135028765865e-06, "loss": 0.566, "step": 4679 }, { "epoch": 0.19429265330904674, "grad_norm": 2.803598165512085, "learning_rate": 9.308793990166845e-06, "loss": 0.5506, "step": 4680 }, { "epoch": 0.19433416883325808, "grad_norm": 2.535522699356079, "learning_rate": 9.308452873663586e-06, "loss": 0.548, "step": 4681 }, { "epoch": 0.1943756843574694, "grad_norm": 2.6268959045410156, "learning_rate": 9.30811167926226e-06, "loss": 0.5045, "step": 4682 }, { "epoch": 0.19441719988168077, "grad_norm": 2.512406826019287, "learning_rate": 9.307770406969032e-06, "loss": 0.4321, "step": 4683 }, { "epoch": 0.1944587154058921, "grad_norm": 2.532144784927368, "learning_rate": 9.307429056790072e-06, "loss": 0.4276, "step": 4684 }, { "epoch": 0.19450023093010343, "grad_norm": 2.7377376556396484, "learning_rate": 9.307087628731554e-06, "loss": 0.4903, "step": 4685 }, { "epoch": 0.19454174645431477, "grad_norm": 2.3958582878112793, "learning_rate": 9.306746122799653e-06, "loss": 0.5826, "step": 4686 }, { "epoch": 0.1945832619785261, "grad_norm": 2.622358798980713, "learning_rate": 9.306404539000538e-06, "loss": 0.4534, "step": 4687 }, { "epoch": 0.19462477750273743, "grad_norm": 2.519235134124756, "learning_rate": 9.306062877340387e-06, "loss": 0.6578, "step": 4688 }, { "epoch": 0.19466629302694877, "grad_norm": 2.423612117767334, "learning_rate": 9.305721137825378e-06, "loss": 0.4775, "step": 4689 }, { "epoch": 0.1947078085511601, "grad_norm": 2.769212245941162, "learning_rate": 9.305379320461692e-06, "loss": 0.5251, "step": 4690 }, { "epoch": 0.19474932407537143, "grad_norm": 2.339111566543579, "learning_rate": 9.305037425255506e-06, "loss": 0.3127, "step": 4691 }, { "epoch": 0.19479083959958277, "grad_norm": 2.6871581077575684, "learning_rate": 9.304695452213003e-06, "loss": 0.4744, "step": 4692 }, { "epoch": 0.1948323551237941, "grad_norm": 2.4875617027282715, "learning_rate": 9.304353401340365e-06, "loss": 0.5792, "step": 4693 }, { "epoch": 0.19487387064800543, "grad_norm": 2.2316441535949707, "learning_rate": 9.304011272643776e-06, "loss": 0.4452, "step": 4694 }, { "epoch": 0.19491538617221676, "grad_norm": 2.823683738708496, "learning_rate": 9.303669066129423e-06, "loss": 0.5545, "step": 4695 }, { "epoch": 0.1949569016964281, "grad_norm": 2.341763496398926, "learning_rate": 9.303326781803493e-06, "loss": 0.5014, "step": 4696 }, { "epoch": 0.19499841722063943, "grad_norm": 2.6843268871307373, "learning_rate": 9.302984419672176e-06, "loss": 0.4609, "step": 4697 }, { "epoch": 0.1950399327448508, "grad_norm": 2.607419013977051, "learning_rate": 9.302641979741658e-06, "loss": 0.5404, "step": 4698 }, { "epoch": 0.19508144826906212, "grad_norm": 3.152611017227173, "learning_rate": 9.302299462018134e-06, "loss": 0.6269, "step": 4699 }, { "epoch": 0.19512296379327346, "grad_norm": 2.4709959030151367, "learning_rate": 9.301956866507795e-06, "loss": 0.3328, "step": 4700 }, { "epoch": 0.1951644793174848, "grad_norm": 2.868842124938965, "learning_rate": 9.301614193216837e-06, "loss": 0.7148, "step": 4701 }, { "epoch": 0.19520599484169612, "grad_norm": 2.8986005783081055, "learning_rate": 9.301271442151453e-06, "loss": 0.5907, "step": 4702 }, { "epoch": 0.19524751036590746, "grad_norm": 3.3489866256713867, "learning_rate": 9.300928613317844e-06, "loss": 0.5283, "step": 4703 }, { "epoch": 0.1952890258901188, "grad_norm": 2.1713709831237793, "learning_rate": 9.300585706722205e-06, "loss": 0.5368, "step": 4704 }, { "epoch": 0.19533054141433012, "grad_norm": 2.4477427005767822, "learning_rate": 9.300242722370735e-06, "loss": 0.6207, "step": 4705 }, { "epoch": 0.19537205693854146, "grad_norm": 2.5520598888397217, "learning_rate": 9.29989966026964e-06, "loss": 0.4179, "step": 4706 }, { "epoch": 0.1954135724627528, "grad_norm": 2.891862630844116, "learning_rate": 9.299556520425116e-06, "loss": 0.499, "step": 4707 }, { "epoch": 0.19545508798696412, "grad_norm": 2.807173490524292, "learning_rate": 9.299213302843375e-06, "loss": 0.6733, "step": 4708 }, { "epoch": 0.19549660351117545, "grad_norm": 2.409719467163086, "learning_rate": 9.298870007530615e-06, "loss": 0.5091, "step": 4709 }, { "epoch": 0.1955381190353868, "grad_norm": 2.2847859859466553, "learning_rate": 9.298526634493048e-06, "loss": 0.5389, "step": 4710 }, { "epoch": 0.19557963455959812, "grad_norm": 2.1501882076263428, "learning_rate": 9.298183183736881e-06, "loss": 0.5564, "step": 4711 }, { "epoch": 0.19562115008380945, "grad_norm": 2.6361167430877686, "learning_rate": 9.297839655268323e-06, "loss": 0.669, "step": 4712 }, { "epoch": 0.19566266560802079, "grad_norm": 2.739306688308716, "learning_rate": 9.297496049093583e-06, "loss": 0.5158, "step": 4713 }, { "epoch": 0.19570418113223215, "grad_norm": 2.69378662109375, "learning_rate": 9.297152365218877e-06, "loss": 0.4823, "step": 4714 }, { "epoch": 0.19574569665644348, "grad_norm": 2.537760019302368, "learning_rate": 9.296808603650421e-06, "loss": 0.4093, "step": 4715 }, { "epoch": 0.1957872121806548, "grad_norm": 2.1690733432769775, "learning_rate": 9.296464764394422e-06, "loss": 0.5622, "step": 4716 }, { "epoch": 0.19582872770486615, "grad_norm": 2.5563912391662598, "learning_rate": 9.296120847457104e-06, "loss": 0.6168, "step": 4717 }, { "epoch": 0.19587024322907748, "grad_norm": 2.691275119781494, "learning_rate": 9.295776852844682e-06, "loss": 0.6025, "step": 4718 }, { "epoch": 0.1959117587532888, "grad_norm": 3.371837615966797, "learning_rate": 9.295432780563378e-06, "loss": 0.5734, "step": 4719 }, { "epoch": 0.19595327427750014, "grad_norm": 2.4286210536956787, "learning_rate": 9.29508863061941e-06, "loss": 0.623, "step": 4720 }, { "epoch": 0.19599478980171148, "grad_norm": 2.7563107013702393, "learning_rate": 9.294744403019001e-06, "loss": 0.5374, "step": 4721 }, { "epoch": 0.1960363053259228, "grad_norm": 3.0700302124023438, "learning_rate": 9.294400097768377e-06, "loss": 0.7157, "step": 4722 }, { "epoch": 0.19607782085013414, "grad_norm": 2.359570026397705, "learning_rate": 9.294055714873759e-06, "loss": 0.5484, "step": 4723 }, { "epoch": 0.19611933637434548, "grad_norm": 2.323258399963379, "learning_rate": 9.293711254341377e-06, "loss": 0.4264, "step": 4724 }, { "epoch": 0.1961608518985568, "grad_norm": 2.6579487323760986, "learning_rate": 9.293366716177458e-06, "loss": 0.5133, "step": 4725 }, { "epoch": 0.19620236742276814, "grad_norm": 2.653320550918579, "learning_rate": 9.293022100388233e-06, "loss": 0.5007, "step": 4726 }, { "epoch": 0.19624388294697948, "grad_norm": 2.321040630340576, "learning_rate": 9.29267740697993e-06, "loss": 0.4666, "step": 4727 }, { "epoch": 0.1962853984711908, "grad_norm": 2.766596555709839, "learning_rate": 9.292332635958781e-06, "loss": 0.4855, "step": 4728 }, { "epoch": 0.19632691399540217, "grad_norm": 2.4430863857269287, "learning_rate": 9.291987787331022e-06, "loss": 0.4841, "step": 4729 }, { "epoch": 0.1963684295196135, "grad_norm": 2.484238624572754, "learning_rate": 9.291642861102887e-06, "loss": 0.5908, "step": 4730 }, { "epoch": 0.19640994504382484, "grad_norm": 2.5756890773773193, "learning_rate": 9.291297857280611e-06, "loss": 0.5838, "step": 4731 }, { "epoch": 0.19645146056803617, "grad_norm": 2.8563692569732666, "learning_rate": 9.290952775870433e-06, "loss": 0.5347, "step": 4732 }, { "epoch": 0.1964929760922475, "grad_norm": 2.648463487625122, "learning_rate": 9.290607616878593e-06, "loss": 0.564, "step": 4733 }, { "epoch": 0.19653449161645883, "grad_norm": 2.7498435974121094, "learning_rate": 9.29026238031133e-06, "loss": 0.5943, "step": 4734 }, { "epoch": 0.19657600714067017, "grad_norm": 2.922037124633789, "learning_rate": 9.289917066174887e-06, "loss": 0.6304, "step": 4735 }, { "epoch": 0.1966175226648815, "grad_norm": 3.215968370437622, "learning_rate": 9.289571674475506e-06, "loss": 0.5185, "step": 4736 }, { "epoch": 0.19665903818909283, "grad_norm": 2.459515333175659, "learning_rate": 9.289226205219432e-06, "loss": 0.5504, "step": 4737 }, { "epoch": 0.19670055371330417, "grad_norm": 2.7848501205444336, "learning_rate": 9.288880658412913e-06, "loss": 0.6305, "step": 4738 }, { "epoch": 0.1967420692375155, "grad_norm": 3.0295443534851074, "learning_rate": 9.288535034062197e-06, "loss": 0.605, "step": 4739 }, { "epoch": 0.19678358476172683, "grad_norm": 2.655395746231079, "learning_rate": 9.28818933217353e-06, "loss": 0.483, "step": 4740 }, { "epoch": 0.19682510028593816, "grad_norm": 3.234407663345337, "learning_rate": 9.287843552753165e-06, "loss": 0.4945, "step": 4741 }, { "epoch": 0.1968666158101495, "grad_norm": 2.661212921142578, "learning_rate": 9.28749769580735e-06, "loss": 0.4571, "step": 4742 }, { "epoch": 0.19690813133436083, "grad_norm": 2.3824820518493652, "learning_rate": 9.287151761342343e-06, "loss": 0.4598, "step": 4743 }, { "epoch": 0.19694964685857216, "grad_norm": 2.7189407348632812, "learning_rate": 9.286805749364396e-06, "loss": 0.4277, "step": 4744 }, { "epoch": 0.19699116238278352, "grad_norm": 2.3471875190734863, "learning_rate": 9.286459659879765e-06, "loss": 0.5106, "step": 4745 }, { "epoch": 0.19703267790699486, "grad_norm": 2.67657732963562, "learning_rate": 9.286113492894707e-06, "loss": 0.5411, "step": 4746 }, { "epoch": 0.1970741934312062, "grad_norm": 2.8083786964416504, "learning_rate": 9.285767248415485e-06, "loss": 0.737, "step": 4747 }, { "epoch": 0.19711570895541752, "grad_norm": 3.0269968509674072, "learning_rate": 9.285420926448355e-06, "loss": 0.5552, "step": 4748 }, { "epoch": 0.19715722447962886, "grad_norm": 2.8496742248535156, "learning_rate": 9.285074526999577e-06, "loss": 0.5292, "step": 4749 }, { "epoch": 0.1971987400038402, "grad_norm": 2.7593305110931396, "learning_rate": 9.284728050075418e-06, "loss": 0.5805, "step": 4750 }, { "epoch": 0.19724025552805152, "grad_norm": 2.6473333835601807, "learning_rate": 9.28438149568214e-06, "loss": 0.5464, "step": 4751 }, { "epoch": 0.19728177105226286, "grad_norm": 2.6031136512756348, "learning_rate": 9.28403486382601e-06, "loss": 0.5685, "step": 4752 }, { "epoch": 0.1973232865764742, "grad_norm": 2.5168893337249756, "learning_rate": 9.283688154513295e-06, "loss": 0.5046, "step": 4753 }, { "epoch": 0.19736480210068552, "grad_norm": 2.525874376296997, "learning_rate": 9.283341367750264e-06, "loss": 0.5592, "step": 4754 }, { "epoch": 0.19740631762489685, "grad_norm": 2.9518237113952637, "learning_rate": 9.282994503543185e-06, "loss": 0.6159, "step": 4755 }, { "epoch": 0.1974478331491082, "grad_norm": 3.170240640640259, "learning_rate": 9.282647561898333e-06, "loss": 0.4487, "step": 4756 }, { "epoch": 0.19748934867331952, "grad_norm": 2.3572275638580322, "learning_rate": 9.282300542821978e-06, "loss": 0.5618, "step": 4757 }, { "epoch": 0.19753086419753085, "grad_norm": 2.823969841003418, "learning_rate": 9.281953446320395e-06, "loss": 0.4914, "step": 4758 }, { "epoch": 0.1975723797217422, "grad_norm": 2.5689151287078857, "learning_rate": 9.281606272399859e-06, "loss": 0.5554, "step": 4759 }, { "epoch": 0.19761389524595355, "grad_norm": 3.06437349319458, "learning_rate": 9.281259021066646e-06, "loss": 0.725, "step": 4760 }, { "epoch": 0.19765541077016488, "grad_norm": 2.5424346923828125, "learning_rate": 9.28091169232704e-06, "loss": 0.6456, "step": 4761 }, { "epoch": 0.1976969262943762, "grad_norm": 2.408576011657715, "learning_rate": 9.280564286187312e-06, "loss": 0.5469, "step": 4762 }, { "epoch": 0.19773844181858755, "grad_norm": 2.378523588180542, "learning_rate": 9.280216802653752e-06, "loss": 0.5915, "step": 4763 }, { "epoch": 0.19777995734279888, "grad_norm": 2.599466323852539, "learning_rate": 9.279869241732635e-06, "loss": 0.502, "step": 4764 }, { "epoch": 0.1978214728670102, "grad_norm": 2.4039981365203857, "learning_rate": 9.27952160343025e-06, "loss": 0.5765, "step": 4765 }, { "epoch": 0.19786298839122154, "grad_norm": 2.5032477378845215, "learning_rate": 9.279173887752881e-06, "loss": 0.3714, "step": 4766 }, { "epoch": 0.19790450391543288, "grad_norm": 2.7338290214538574, "learning_rate": 9.278826094706816e-06, "loss": 0.5308, "step": 4767 }, { "epoch": 0.1979460194396442, "grad_norm": 3.0099220275878906, "learning_rate": 9.27847822429834e-06, "loss": 0.6219, "step": 4768 }, { "epoch": 0.19798753496385554, "grad_norm": 3.347050428390503, "learning_rate": 9.278130276533745e-06, "loss": 0.5357, "step": 4769 }, { "epoch": 0.19802905048806688, "grad_norm": 2.650486707687378, "learning_rate": 9.277782251419323e-06, "loss": 0.4563, "step": 4770 }, { "epoch": 0.1980705660122782, "grad_norm": 2.7236790657043457, "learning_rate": 9.277434148961364e-06, "loss": 0.6652, "step": 4771 }, { "epoch": 0.19811208153648954, "grad_norm": 2.509230375289917, "learning_rate": 9.277085969166162e-06, "loss": 0.5156, "step": 4772 }, { "epoch": 0.19815359706070088, "grad_norm": 2.2759923934936523, "learning_rate": 9.276737712040012e-06, "loss": 0.6075, "step": 4773 }, { "epoch": 0.1981951125849122, "grad_norm": 2.5633068084716797, "learning_rate": 9.276389377589214e-06, "loss": 0.5743, "step": 4774 }, { "epoch": 0.19823662810912354, "grad_norm": 2.6341679096221924, "learning_rate": 9.276040965820062e-06, "loss": 0.5559, "step": 4775 }, { "epoch": 0.1982781436333349, "grad_norm": 2.171337842941284, "learning_rate": 9.275692476738857e-06, "loss": 0.473, "step": 4776 }, { "epoch": 0.19831965915754624, "grad_norm": 2.563182830810547, "learning_rate": 9.2753439103519e-06, "loss": 0.4503, "step": 4777 }, { "epoch": 0.19836117468175757, "grad_norm": 2.327916145324707, "learning_rate": 9.274995266665492e-06, "loss": 0.4624, "step": 4778 }, { "epoch": 0.1984026902059689, "grad_norm": 2.488084554672241, "learning_rate": 9.274646545685936e-06, "loss": 0.4656, "step": 4779 }, { "epoch": 0.19844420573018023, "grad_norm": 2.51749587059021, "learning_rate": 9.274297747419542e-06, "loss": 0.4013, "step": 4780 }, { "epoch": 0.19848572125439157, "grad_norm": 2.5961225032806396, "learning_rate": 9.273948871872611e-06, "loss": 0.6706, "step": 4781 }, { "epoch": 0.1985272367786029, "grad_norm": 2.3548130989074707, "learning_rate": 9.273599919051452e-06, "loss": 0.5314, "step": 4782 }, { "epoch": 0.19856875230281423, "grad_norm": 2.431861162185669, "learning_rate": 9.273250888962375e-06, "loss": 0.4106, "step": 4783 }, { "epoch": 0.19861026782702557, "grad_norm": 2.564770460128784, "learning_rate": 9.27290178161169e-06, "loss": 0.6064, "step": 4784 }, { "epoch": 0.1986517833512369, "grad_norm": 2.4991846084594727, "learning_rate": 9.272552597005711e-06, "loss": 0.4944, "step": 4785 }, { "epoch": 0.19869329887544823, "grad_norm": 2.2209062576293945, "learning_rate": 9.272203335150748e-06, "loss": 0.5532, "step": 4786 }, { "epoch": 0.19873481439965957, "grad_norm": 2.509768009185791, "learning_rate": 9.271853996053117e-06, "loss": 0.6235, "step": 4787 }, { "epoch": 0.1987763299238709, "grad_norm": 3.131272315979004, "learning_rate": 9.271504579719134e-06, "loss": 0.5489, "step": 4788 }, { "epoch": 0.19881784544808223, "grad_norm": 2.4946069717407227, "learning_rate": 9.27115508615512e-06, "loss": 0.4409, "step": 4789 }, { "epoch": 0.19885936097229356, "grad_norm": 2.100651741027832, "learning_rate": 9.270805515367388e-06, "loss": 0.3583, "step": 4790 }, { "epoch": 0.19890087649650492, "grad_norm": 2.5845487117767334, "learning_rate": 9.270455867362262e-06, "loss": 0.5007, "step": 4791 }, { "epoch": 0.19894239202071626, "grad_norm": 2.7125189304351807, "learning_rate": 9.270106142146063e-06, "loss": 0.4966, "step": 4792 }, { "epoch": 0.1989839075449276, "grad_norm": 2.484151840209961, "learning_rate": 9.269756339725115e-06, "loss": 0.4644, "step": 4793 }, { "epoch": 0.19902542306913892, "grad_norm": 2.5178098678588867, "learning_rate": 9.269406460105742e-06, "loss": 0.6092, "step": 4794 }, { "epoch": 0.19906693859335026, "grad_norm": 2.6603140830993652, "learning_rate": 9.269056503294269e-06, "loss": 0.5156, "step": 4795 }, { "epoch": 0.1991084541175616, "grad_norm": 2.369779348373413, "learning_rate": 9.268706469297023e-06, "loss": 0.5703, "step": 4796 }, { "epoch": 0.19914996964177292, "grad_norm": 2.4483180046081543, "learning_rate": 9.268356358120333e-06, "loss": 0.6041, "step": 4797 }, { "epoch": 0.19919148516598426, "grad_norm": 2.7203829288482666, "learning_rate": 9.268006169770532e-06, "loss": 0.5681, "step": 4798 }, { "epoch": 0.1992330006901956, "grad_norm": 2.9568729400634766, "learning_rate": 9.26765590425395e-06, "loss": 0.6017, "step": 4799 }, { "epoch": 0.19927451621440692, "grad_norm": 2.742074489593506, "learning_rate": 9.267305561576915e-06, "loss": 0.5761, "step": 4800 }, { "epoch": 0.19931603173861825, "grad_norm": 3.0170106887817383, "learning_rate": 9.266955141745771e-06, "loss": 0.5106, "step": 4801 }, { "epoch": 0.1993575472628296, "grad_norm": 1.9644014835357666, "learning_rate": 9.266604644766844e-06, "loss": 0.5185, "step": 4802 }, { "epoch": 0.19939906278704092, "grad_norm": 2.596698045730591, "learning_rate": 9.266254070646477e-06, "loss": 0.5797, "step": 4803 }, { "epoch": 0.19944057831125225, "grad_norm": 2.625711679458618, "learning_rate": 9.265903419391005e-06, "loss": 0.7019, "step": 4804 }, { "epoch": 0.1994820938354636, "grad_norm": 3.1416678428649902, "learning_rate": 9.265552691006771e-06, "loss": 0.6156, "step": 4805 }, { "epoch": 0.19952360935967495, "grad_norm": 2.256802797317505, "learning_rate": 9.265201885500115e-06, "loss": 0.3949, "step": 4806 }, { "epoch": 0.19956512488388628, "grad_norm": 2.1365749835968018, "learning_rate": 9.26485100287738e-06, "loss": 0.4045, "step": 4807 }, { "epoch": 0.1996066404080976, "grad_norm": 2.9808316230773926, "learning_rate": 9.264500043144908e-06, "loss": 0.4808, "step": 4808 }, { "epoch": 0.19964815593230895, "grad_norm": 2.368006467819214, "learning_rate": 9.264149006309048e-06, "loss": 0.5332, "step": 4809 }, { "epoch": 0.19968967145652028, "grad_norm": 3.5906145572662354, "learning_rate": 9.263797892376143e-06, "loss": 0.4133, "step": 4810 }, { "epoch": 0.1997311869807316, "grad_norm": 2.2967560291290283, "learning_rate": 9.263446701352544e-06, "loss": 0.5856, "step": 4811 }, { "epoch": 0.19977270250494295, "grad_norm": 2.3475067615509033, "learning_rate": 9.2630954332446e-06, "loss": 0.5452, "step": 4812 }, { "epoch": 0.19981421802915428, "grad_norm": 2.405548572540283, "learning_rate": 9.262744088058662e-06, "loss": 0.5052, "step": 4813 }, { "epoch": 0.1998557335533656, "grad_norm": 2.530034065246582, "learning_rate": 9.262392665801083e-06, "loss": 0.5154, "step": 4814 }, { "epoch": 0.19989724907757694, "grad_norm": 2.6569759845733643, "learning_rate": 9.262041166478215e-06, "loss": 0.5667, "step": 4815 }, { "epoch": 0.19993876460178828, "grad_norm": 3.8440961837768555, "learning_rate": 9.261689590096412e-06, "loss": 0.5096, "step": 4816 }, { "epoch": 0.1999802801259996, "grad_norm": 2.822190284729004, "learning_rate": 9.261337936662035e-06, "loss": 0.7339, "step": 4817 }, { "epoch": 0.20002179565021094, "grad_norm": 2.266514301300049, "learning_rate": 9.260986206181441e-06, "loss": 0.5462, "step": 4818 }, { "epoch": 0.20006331117442228, "grad_norm": 2.3702237606048584, "learning_rate": 9.260634398660987e-06, "loss": 0.546, "step": 4819 }, { "epoch": 0.2001048266986336, "grad_norm": 2.283897638320923, "learning_rate": 9.260282514107034e-06, "loss": 0.5339, "step": 4820 }, { "epoch": 0.20014634222284494, "grad_norm": 2.3188517093658447, "learning_rate": 9.259930552525947e-06, "loss": 0.4379, "step": 4821 }, { "epoch": 0.2001878577470563, "grad_norm": 3.1861703395843506, "learning_rate": 9.259578513924088e-06, "loss": 0.5908, "step": 4822 }, { "epoch": 0.20022937327126764, "grad_norm": 2.6380090713500977, "learning_rate": 9.25922639830782e-06, "loss": 0.478, "step": 4823 }, { "epoch": 0.20027088879547897, "grad_norm": 2.3705132007598877, "learning_rate": 9.258874205683512e-06, "loss": 0.5314, "step": 4824 }, { "epoch": 0.2003124043196903, "grad_norm": 3.134192705154419, "learning_rate": 9.25852193605753e-06, "loss": 0.6903, "step": 4825 }, { "epoch": 0.20035391984390163, "grad_norm": 2.3712897300720215, "learning_rate": 9.258169589436246e-06, "loss": 0.5607, "step": 4826 }, { "epoch": 0.20039543536811297, "grad_norm": 2.5525214672088623, "learning_rate": 9.257817165826027e-06, "loss": 0.5866, "step": 4827 }, { "epoch": 0.2004369508923243, "grad_norm": 2.559276580810547, "learning_rate": 9.257464665233247e-06, "loss": 0.4508, "step": 4828 }, { "epoch": 0.20047846641653563, "grad_norm": 3.1170647144317627, "learning_rate": 9.257112087664277e-06, "loss": 0.4789, "step": 4829 }, { "epoch": 0.20051998194074697, "grad_norm": 2.5517053604125977, "learning_rate": 9.256759433125495e-06, "loss": 0.4986, "step": 4830 }, { "epoch": 0.2005614974649583, "grad_norm": 2.8878674507141113, "learning_rate": 9.256406701623275e-06, "loss": 0.6527, "step": 4831 }, { "epoch": 0.20060301298916963, "grad_norm": 2.3558855056762695, "learning_rate": 9.256053893163997e-06, "loss": 0.4786, "step": 4832 }, { "epoch": 0.20064452851338097, "grad_norm": 2.611625909805298, "learning_rate": 9.255701007754034e-06, "loss": 0.5449, "step": 4833 }, { "epoch": 0.2006860440375923, "grad_norm": 2.5512449741363525, "learning_rate": 9.255348045399774e-06, "loss": 0.4562, "step": 4834 }, { "epoch": 0.20072755956180363, "grad_norm": 2.510824680328369, "learning_rate": 9.254995006107592e-06, "loss": 0.5436, "step": 4835 }, { "epoch": 0.20076907508601496, "grad_norm": 2.7386157512664795, "learning_rate": 9.254641889883875e-06, "loss": 0.4755, "step": 4836 }, { "epoch": 0.20081059061022632, "grad_norm": 2.8008875846862793, "learning_rate": 9.254288696735007e-06, "loss": 0.4972, "step": 4837 }, { "epoch": 0.20085210613443766, "grad_norm": 2.849069833755493, "learning_rate": 9.253935426667372e-06, "loss": 0.4629, "step": 4838 }, { "epoch": 0.200893621658649, "grad_norm": 2.706712484359741, "learning_rate": 9.253582079687358e-06, "loss": 0.5012, "step": 4839 }, { "epoch": 0.20093513718286032, "grad_norm": 4.327095031738281, "learning_rate": 9.253228655801356e-06, "loss": 0.846, "step": 4840 }, { "epoch": 0.20097665270707166, "grad_norm": 3.0114753246307373, "learning_rate": 9.252875155015753e-06, "loss": 0.6059, "step": 4841 }, { "epoch": 0.201018168231283, "grad_norm": 2.0747177600860596, "learning_rate": 9.252521577336941e-06, "loss": 0.5216, "step": 4842 }, { "epoch": 0.20105968375549432, "grad_norm": 2.6193959712982178, "learning_rate": 9.252167922771314e-06, "loss": 0.4819, "step": 4843 }, { "epoch": 0.20110119927970566, "grad_norm": 2.5324065685272217, "learning_rate": 9.251814191325263e-06, "loss": 0.4997, "step": 4844 }, { "epoch": 0.201142714803917, "grad_norm": 2.470524549484253, "learning_rate": 9.251460383005188e-06, "loss": 0.4669, "step": 4845 }, { "epoch": 0.20118423032812832, "grad_norm": 2.6220219135284424, "learning_rate": 9.251106497817484e-06, "loss": 0.4563, "step": 4846 }, { "epoch": 0.20122574585233965, "grad_norm": 2.597538948059082, "learning_rate": 9.250752535768548e-06, "loss": 0.4603, "step": 4847 }, { "epoch": 0.201267261376551, "grad_norm": 2.769221305847168, "learning_rate": 9.250398496864782e-06, "loss": 0.4077, "step": 4848 }, { "epoch": 0.20130877690076232, "grad_norm": 2.4053802490234375, "learning_rate": 9.250044381112586e-06, "loss": 0.5355, "step": 4849 }, { "epoch": 0.20135029242497365, "grad_norm": 2.873339891433716, "learning_rate": 9.249690188518361e-06, "loss": 0.6087, "step": 4850 }, { "epoch": 0.201391807949185, "grad_norm": 2.8637256622314453, "learning_rate": 9.249335919088514e-06, "loss": 0.4991, "step": 4851 }, { "epoch": 0.20143332347339632, "grad_norm": 2.1332263946533203, "learning_rate": 9.24898157282945e-06, "loss": 0.5363, "step": 4852 }, { "epoch": 0.20147483899760768, "grad_norm": 2.5924205780029297, "learning_rate": 9.248627149747573e-06, "loss": 0.495, "step": 4853 }, { "epoch": 0.201516354521819, "grad_norm": 2.3621954917907715, "learning_rate": 9.248272649849292e-06, "loss": 0.6134, "step": 4854 }, { "epoch": 0.20155787004603035, "grad_norm": 2.194636821746826, "learning_rate": 9.247918073141018e-06, "loss": 0.5785, "step": 4855 }, { "epoch": 0.20159938557024168, "grad_norm": 2.6055049896240234, "learning_rate": 9.24756341962916e-06, "loss": 0.568, "step": 4856 }, { "epoch": 0.201640901094453, "grad_norm": 2.5605413913726807, "learning_rate": 9.247208689320133e-06, "loss": 0.6156, "step": 4857 }, { "epoch": 0.20168241661866435, "grad_norm": 2.4243218898773193, "learning_rate": 9.246853882220345e-06, "loss": 0.5589, "step": 4858 }, { "epoch": 0.20172393214287568, "grad_norm": 2.7870655059814453, "learning_rate": 9.246498998336218e-06, "loss": 0.4407, "step": 4859 }, { "epoch": 0.201765447667087, "grad_norm": 2.0495662689208984, "learning_rate": 9.246144037674165e-06, "loss": 0.4868, "step": 4860 }, { "epoch": 0.20180696319129834, "grad_norm": 3.3091912269592285, "learning_rate": 9.245789000240602e-06, "loss": 0.6332, "step": 4861 }, { "epoch": 0.20184847871550968, "grad_norm": 2.77130126953125, "learning_rate": 9.245433886041952e-06, "loss": 0.5323, "step": 4862 }, { "epoch": 0.201889994239721, "grad_norm": 2.408525228500366, "learning_rate": 9.245078695084632e-06, "loss": 0.4806, "step": 4863 }, { "epoch": 0.20193150976393234, "grad_norm": 2.357050657272339, "learning_rate": 9.244723427375067e-06, "loss": 0.4319, "step": 4864 }, { "epoch": 0.20197302528814368, "grad_norm": 2.92146897315979, "learning_rate": 9.244368082919679e-06, "loss": 0.4915, "step": 4865 }, { "epoch": 0.202014540812355, "grad_norm": 2.42806339263916, "learning_rate": 9.244012661724892e-06, "loss": 0.5412, "step": 4866 }, { "epoch": 0.20205605633656634, "grad_norm": 2.7388715744018555, "learning_rate": 9.243657163797132e-06, "loss": 0.4765, "step": 4867 }, { "epoch": 0.2020975718607777, "grad_norm": 2.379817485809326, "learning_rate": 9.24330158914283e-06, "loss": 0.4772, "step": 4868 }, { "epoch": 0.20213908738498904, "grad_norm": 2.5952000617980957, "learning_rate": 9.24294593776841e-06, "loss": 0.4733, "step": 4869 }, { "epoch": 0.20218060290920037, "grad_norm": 2.686556339263916, "learning_rate": 9.242590209680304e-06, "loss": 0.5249, "step": 4870 }, { "epoch": 0.2022221184334117, "grad_norm": 2.683997392654419, "learning_rate": 9.242234404884946e-06, "loss": 0.5866, "step": 4871 }, { "epoch": 0.20226363395762303, "grad_norm": 2.709791898727417, "learning_rate": 9.241878523388767e-06, "loss": 0.4828, "step": 4872 }, { "epoch": 0.20230514948183437, "grad_norm": 2.6120662689208984, "learning_rate": 9.2415225651982e-06, "loss": 0.6401, "step": 4873 }, { "epoch": 0.2023466650060457, "grad_norm": 3.0003654956817627, "learning_rate": 9.241166530319684e-06, "loss": 0.4763, "step": 4874 }, { "epoch": 0.20238818053025703, "grad_norm": 2.4969112873077393, "learning_rate": 9.240810418759656e-06, "loss": 0.5821, "step": 4875 }, { "epoch": 0.20242969605446837, "grad_norm": 2.954646110534668, "learning_rate": 9.24045423052455e-06, "loss": 0.6773, "step": 4876 }, { "epoch": 0.2024712115786797, "grad_norm": 2.4696242809295654, "learning_rate": 9.24009796562081e-06, "loss": 0.5494, "step": 4877 }, { "epoch": 0.20251272710289103, "grad_norm": 2.857163667678833, "learning_rate": 9.23974162405488e-06, "loss": 0.3881, "step": 4878 }, { "epoch": 0.20255424262710237, "grad_norm": 4.698413848876953, "learning_rate": 9.239385205833196e-06, "loss": 0.6328, "step": 4879 }, { "epoch": 0.2025957581513137, "grad_norm": 3.0105478763580322, "learning_rate": 9.239028710962206e-06, "loss": 0.5965, "step": 4880 }, { "epoch": 0.20263727367552503, "grad_norm": 2.9540348052978516, "learning_rate": 9.238672139448354e-06, "loss": 0.5335, "step": 4881 }, { "epoch": 0.20267878919973636, "grad_norm": 2.352323293685913, "learning_rate": 9.238315491298092e-06, "loss": 0.5837, "step": 4882 }, { "epoch": 0.2027203047239477, "grad_norm": 2.9521942138671875, "learning_rate": 9.23795876651786e-06, "loss": 0.5348, "step": 4883 }, { "epoch": 0.20276182024815906, "grad_norm": 2.7055318355560303, "learning_rate": 9.237601965114115e-06, "loss": 0.6506, "step": 4884 }, { "epoch": 0.2028033357723704, "grad_norm": 2.310608386993408, "learning_rate": 9.237245087093303e-06, "loss": 0.6858, "step": 4885 }, { "epoch": 0.20284485129658172, "grad_norm": 2.7305309772491455, "learning_rate": 9.23688813246188e-06, "loss": 0.4771, "step": 4886 }, { "epoch": 0.20288636682079306, "grad_norm": 2.5843582153320312, "learning_rate": 9.236531101226299e-06, "loss": 0.4974, "step": 4887 }, { "epoch": 0.2029278823450044, "grad_norm": 2.153508186340332, "learning_rate": 9.236173993393011e-06, "loss": 0.4569, "step": 4888 }, { "epoch": 0.20296939786921572, "grad_norm": 2.149272918701172, "learning_rate": 9.23581680896848e-06, "loss": 0.4531, "step": 4889 }, { "epoch": 0.20301091339342706, "grad_norm": 2.054656982421875, "learning_rate": 9.235459547959159e-06, "loss": 0.4274, "step": 4890 }, { "epoch": 0.2030524289176384, "grad_norm": 2.1502819061279297, "learning_rate": 9.235102210371508e-06, "loss": 0.5355, "step": 4891 }, { "epoch": 0.20309394444184972, "grad_norm": 2.5410995483398438, "learning_rate": 9.234744796211989e-06, "loss": 0.4005, "step": 4892 }, { "epoch": 0.20313545996606105, "grad_norm": 2.2507381439208984, "learning_rate": 9.234387305487063e-06, "loss": 0.4112, "step": 4893 }, { "epoch": 0.2031769754902724, "grad_norm": 2.727569103240967, "learning_rate": 9.234029738203194e-06, "loss": 0.5516, "step": 4894 }, { "epoch": 0.20321849101448372, "grad_norm": 2.677821159362793, "learning_rate": 9.233672094366847e-06, "loss": 0.5078, "step": 4895 }, { "epoch": 0.20326000653869505, "grad_norm": 2.827256202697754, "learning_rate": 9.233314373984489e-06, "loss": 0.5945, "step": 4896 }, { "epoch": 0.2033015220629064, "grad_norm": 2.4465925693511963, "learning_rate": 9.232956577062585e-06, "loss": 0.5467, "step": 4897 }, { "epoch": 0.20334303758711772, "grad_norm": 2.344931125640869, "learning_rate": 9.23259870360761e-06, "loss": 0.5235, "step": 4898 }, { "epoch": 0.20338455311132908, "grad_norm": 2.6767687797546387, "learning_rate": 9.232240753626027e-06, "loss": 0.5101, "step": 4899 }, { "epoch": 0.2034260686355404, "grad_norm": 2.6049180030822754, "learning_rate": 9.231882727124312e-06, "loss": 0.4724, "step": 4900 }, { "epoch": 0.20346758415975175, "grad_norm": 2.4360015392303467, "learning_rate": 9.231524624108938e-06, "loss": 0.38, "step": 4901 }, { "epoch": 0.20350909968396308, "grad_norm": 2.3644752502441406, "learning_rate": 9.231166444586379e-06, "loss": 0.3344, "step": 4902 }, { "epoch": 0.2035506152081744, "grad_norm": 2.4158475399017334, "learning_rate": 9.23080818856311e-06, "loss": 0.4997, "step": 4903 }, { "epoch": 0.20359213073238575, "grad_norm": 2.2511444091796875, "learning_rate": 9.230449856045611e-06, "loss": 0.4525, "step": 4904 }, { "epoch": 0.20363364625659708, "grad_norm": 2.419692277908325, "learning_rate": 9.23009144704036e-06, "loss": 0.4034, "step": 4905 }, { "epoch": 0.2036751617808084, "grad_norm": 2.138049840927124, "learning_rate": 9.229732961553835e-06, "loss": 0.5429, "step": 4906 }, { "epoch": 0.20371667730501974, "grad_norm": 2.627957582473755, "learning_rate": 9.229374399592518e-06, "loss": 0.5556, "step": 4907 }, { "epoch": 0.20375819282923108, "grad_norm": 2.7932472229003906, "learning_rate": 9.229015761162894e-06, "loss": 0.5686, "step": 4908 }, { "epoch": 0.2037997083534424, "grad_norm": 2.8101983070373535, "learning_rate": 9.228657046271446e-06, "loss": 0.4439, "step": 4909 }, { "epoch": 0.20384122387765374, "grad_norm": 2.1146905422210693, "learning_rate": 9.22829825492466e-06, "loss": 0.456, "step": 4910 }, { "epoch": 0.20388273940186508, "grad_norm": 2.4978489875793457, "learning_rate": 9.227939387129023e-06, "loss": 0.476, "step": 4911 }, { "epoch": 0.2039242549260764, "grad_norm": 2.1777255535125732, "learning_rate": 9.227580442891022e-06, "loss": 0.5401, "step": 4912 }, { "epoch": 0.20396577045028774, "grad_norm": 2.602299690246582, "learning_rate": 9.22722142221715e-06, "loss": 0.4871, "step": 4913 }, { "epoch": 0.2040072859744991, "grad_norm": 2.9453511238098145, "learning_rate": 9.226862325113894e-06, "loss": 0.5388, "step": 4914 }, { "epoch": 0.20404880149871044, "grad_norm": 2.5037841796875, "learning_rate": 9.226503151587751e-06, "loss": 0.4063, "step": 4915 }, { "epoch": 0.20409031702292177, "grad_norm": 2.392179489135742, "learning_rate": 9.22614390164521e-06, "loss": 0.4426, "step": 4916 }, { "epoch": 0.2041318325471331, "grad_norm": 2.67718768119812, "learning_rate": 9.225784575292772e-06, "loss": 0.4581, "step": 4917 }, { "epoch": 0.20417334807134443, "grad_norm": 2.5926694869995117, "learning_rate": 9.22542517253693e-06, "loss": 0.4991, "step": 4918 }, { "epoch": 0.20421486359555577, "grad_norm": 2.7267141342163086, "learning_rate": 9.225065693384183e-06, "loss": 0.6117, "step": 4919 }, { "epoch": 0.2042563791197671, "grad_norm": 3.338282823562622, "learning_rate": 9.22470613784103e-06, "loss": 0.4886, "step": 4920 }, { "epoch": 0.20429789464397843, "grad_norm": 2.624565839767456, "learning_rate": 9.224346505913972e-06, "loss": 0.5127, "step": 4921 }, { "epoch": 0.20433941016818977, "grad_norm": 2.677196741104126, "learning_rate": 9.223986797609513e-06, "loss": 0.6279, "step": 4922 }, { "epoch": 0.2043809256924011, "grad_norm": 2.7493510246276855, "learning_rate": 9.223627012934156e-06, "loss": 0.607, "step": 4923 }, { "epoch": 0.20442244121661243, "grad_norm": 2.507301092147827, "learning_rate": 9.223267151894403e-06, "loss": 0.6164, "step": 4924 }, { "epoch": 0.20446395674082377, "grad_norm": 2.2726564407348633, "learning_rate": 9.222907214496764e-06, "loss": 0.6192, "step": 4925 }, { "epoch": 0.2045054722650351, "grad_norm": 3.121131420135498, "learning_rate": 9.222547200747744e-06, "loss": 0.4373, "step": 4926 }, { "epoch": 0.20454698778924643, "grad_norm": 2.614995002746582, "learning_rate": 9.222187110653852e-06, "loss": 0.5862, "step": 4927 }, { "epoch": 0.20458850331345776, "grad_norm": 2.2965798377990723, "learning_rate": 9.221826944221604e-06, "loss": 0.495, "step": 4928 }, { "epoch": 0.2046300188376691, "grad_norm": 2.6884381771087646, "learning_rate": 9.221466701457506e-06, "loss": 0.6798, "step": 4929 }, { "epoch": 0.20467153436188046, "grad_norm": 3.0531136989593506, "learning_rate": 9.221106382368074e-06, "loss": 0.6089, "step": 4930 }, { "epoch": 0.2047130498860918, "grad_norm": 2.7659175395965576, "learning_rate": 9.220745986959822e-06, "loss": 0.5977, "step": 4931 }, { "epoch": 0.20475456541030312, "grad_norm": 3.6279959678649902, "learning_rate": 9.220385515239266e-06, "loss": 0.5411, "step": 4932 }, { "epoch": 0.20479608093451446, "grad_norm": 2.3706541061401367, "learning_rate": 9.220024967212921e-06, "loss": 0.5084, "step": 4933 }, { "epoch": 0.2048375964587258, "grad_norm": 2.8742358684539795, "learning_rate": 9.219664342887312e-06, "loss": 0.5489, "step": 4934 }, { "epoch": 0.20487911198293712, "grad_norm": 2.8311781883239746, "learning_rate": 9.219303642268953e-06, "loss": 0.5488, "step": 4935 }, { "epoch": 0.20492062750714846, "grad_norm": 2.7268102169036865, "learning_rate": 9.218942865364369e-06, "loss": 0.5374, "step": 4936 }, { "epoch": 0.2049621430313598, "grad_norm": 2.363635540008545, "learning_rate": 9.218582012180082e-06, "loss": 0.6286, "step": 4937 }, { "epoch": 0.20500365855557112, "grad_norm": 2.473203182220459, "learning_rate": 9.218221082722617e-06, "loss": 0.5575, "step": 4938 }, { "epoch": 0.20504517407978246, "grad_norm": 2.735992193222046, "learning_rate": 9.217860076998498e-06, "loss": 0.5695, "step": 4939 }, { "epoch": 0.2050866896039938, "grad_norm": 2.2433810234069824, "learning_rate": 9.21749899501425e-06, "loss": 0.6522, "step": 4940 }, { "epoch": 0.20512820512820512, "grad_norm": 2.3477072715759277, "learning_rate": 9.217137836776408e-06, "loss": 0.3396, "step": 4941 }, { "epoch": 0.20516972065241645, "grad_norm": 2.185086488723755, "learning_rate": 9.216776602291499e-06, "loss": 0.4818, "step": 4942 }, { "epoch": 0.2052112361766278, "grad_norm": 3.003493309020996, "learning_rate": 9.216415291566052e-06, "loss": 0.613, "step": 4943 }, { "epoch": 0.20525275170083912, "grad_norm": 2.855921745300293, "learning_rate": 9.216053904606603e-06, "loss": 0.6428, "step": 4944 }, { "epoch": 0.20529426722505048, "grad_norm": 2.4952738285064697, "learning_rate": 9.215692441419683e-06, "loss": 0.4971, "step": 4945 }, { "epoch": 0.2053357827492618, "grad_norm": 2.359396457672119, "learning_rate": 9.215330902011828e-06, "loss": 0.6056, "step": 4946 }, { "epoch": 0.20537729827347315, "grad_norm": 3.183016538619995, "learning_rate": 9.214969286389577e-06, "loss": 0.4903, "step": 4947 }, { "epoch": 0.20541881379768448, "grad_norm": 3.0860893726348877, "learning_rate": 9.214607594559464e-06, "loss": 0.5345, "step": 4948 }, { "epoch": 0.2054603293218958, "grad_norm": 2.2956795692443848, "learning_rate": 9.214245826528032e-06, "loss": 0.4301, "step": 4949 }, { "epoch": 0.20550184484610715, "grad_norm": 2.325359344482422, "learning_rate": 9.213883982301822e-06, "loss": 0.56, "step": 4950 }, { "epoch": 0.20554336037031848, "grad_norm": 2.5829169750213623, "learning_rate": 9.213522061887374e-06, "loss": 0.5573, "step": 4951 }, { "epoch": 0.2055848758945298, "grad_norm": 2.5277514457702637, "learning_rate": 9.213160065291233e-06, "loss": 0.5986, "step": 4952 }, { "epoch": 0.20562639141874114, "grad_norm": 2.553138256072998, "learning_rate": 9.212797992519942e-06, "loss": 0.4483, "step": 4953 }, { "epoch": 0.20566790694295248, "grad_norm": 2.6328773498535156, "learning_rate": 9.21243584358005e-06, "loss": 0.5664, "step": 4954 }, { "epoch": 0.2057094224671638, "grad_norm": 2.538151502609253, "learning_rate": 9.212073618478105e-06, "loss": 0.4474, "step": 4955 }, { "epoch": 0.20575093799137514, "grad_norm": 2.4769363403320312, "learning_rate": 9.211711317220654e-06, "loss": 0.4885, "step": 4956 }, { "epoch": 0.20579245351558648, "grad_norm": 2.3311305046081543, "learning_rate": 9.211348939814249e-06, "loss": 0.4915, "step": 4957 }, { "epoch": 0.2058339690397978, "grad_norm": 2.5123324394226074, "learning_rate": 9.21098648626544e-06, "loss": 0.5636, "step": 4958 }, { "epoch": 0.20587548456400914, "grad_norm": 2.389401912689209, "learning_rate": 9.210623956580785e-06, "loss": 0.4235, "step": 4959 }, { "epoch": 0.20591700008822048, "grad_norm": 2.478888750076294, "learning_rate": 9.210261350766832e-06, "loss": 0.5579, "step": 4960 }, { "epoch": 0.20595851561243184, "grad_norm": 2.023679733276367, "learning_rate": 9.20989866883014e-06, "loss": 0.4711, "step": 4961 }, { "epoch": 0.20600003113664317, "grad_norm": 2.6142969131469727, "learning_rate": 9.209535910777267e-06, "loss": 0.4903, "step": 4962 }, { "epoch": 0.2060415466608545, "grad_norm": 2.239284038543701, "learning_rate": 9.209173076614773e-06, "loss": 0.5587, "step": 4963 }, { "epoch": 0.20608306218506584, "grad_norm": 2.7263236045837402, "learning_rate": 9.208810166349215e-06, "loss": 0.4842, "step": 4964 }, { "epoch": 0.20612457770927717, "grad_norm": 2.0731582641601562, "learning_rate": 9.208447179987156e-06, "loss": 0.5153, "step": 4965 }, { "epoch": 0.2061660932334885, "grad_norm": 2.880525588989258, "learning_rate": 9.208084117535161e-06, "loss": 0.6843, "step": 4966 }, { "epoch": 0.20620760875769983, "grad_norm": 2.924760580062866, "learning_rate": 9.20772097899979e-06, "loss": 0.6358, "step": 4967 }, { "epoch": 0.20624912428191117, "grad_norm": 3.283212184906006, "learning_rate": 9.207357764387612e-06, "loss": 0.4814, "step": 4968 }, { "epoch": 0.2062906398061225, "grad_norm": 3.072687864303589, "learning_rate": 9.206994473705194e-06, "loss": 0.6516, "step": 4969 }, { "epoch": 0.20633215533033383, "grad_norm": 2.5205509662628174, "learning_rate": 9.206631106959101e-06, "loss": 0.4247, "step": 4970 }, { "epoch": 0.20637367085454517, "grad_norm": 2.675945997238159, "learning_rate": 9.206267664155906e-06, "loss": 0.4854, "step": 4971 }, { "epoch": 0.2064151863787565, "grad_norm": 2.6789321899414062, "learning_rate": 9.20590414530218e-06, "loss": 0.5669, "step": 4972 }, { "epoch": 0.20645670190296783, "grad_norm": 3.016169309616089, "learning_rate": 9.205540550404496e-06, "loss": 0.5539, "step": 4973 }, { "epoch": 0.20649821742717916, "grad_norm": 2.759328842163086, "learning_rate": 9.205176879469422e-06, "loss": 0.435, "step": 4974 }, { "epoch": 0.2065397329513905, "grad_norm": 2.596226453781128, "learning_rate": 9.204813132503543e-06, "loss": 0.4572, "step": 4975 }, { "epoch": 0.20658124847560186, "grad_norm": 2.725459098815918, "learning_rate": 9.204449309513429e-06, "loss": 0.5273, "step": 4976 }, { "epoch": 0.2066227639998132, "grad_norm": 2.6612229347229004, "learning_rate": 9.204085410505657e-06, "loss": 0.62, "step": 4977 }, { "epoch": 0.20666427952402452, "grad_norm": 2.741685152053833, "learning_rate": 9.203721435486811e-06, "loss": 0.4967, "step": 4978 }, { "epoch": 0.20670579504823586, "grad_norm": 2.414010524749756, "learning_rate": 9.20335738446347e-06, "loss": 0.5484, "step": 4979 }, { "epoch": 0.2067473105724472, "grad_norm": 2.169731378555298, "learning_rate": 9.202993257442216e-06, "loss": 0.4034, "step": 4980 }, { "epoch": 0.20678882609665852, "grad_norm": 2.3121323585510254, "learning_rate": 9.202629054429632e-06, "loss": 0.4546, "step": 4981 }, { "epoch": 0.20683034162086986, "grad_norm": 2.921074151992798, "learning_rate": 9.202264775432305e-06, "loss": 0.4776, "step": 4982 }, { "epoch": 0.2068718571450812, "grad_norm": 3.3853960037231445, "learning_rate": 9.201900420456817e-06, "loss": 0.5981, "step": 4983 }, { "epoch": 0.20691337266929252, "grad_norm": 2.779808759689331, "learning_rate": 9.20153598950976e-06, "loss": 0.4984, "step": 4984 }, { "epoch": 0.20695488819350386, "grad_norm": 2.87274432182312, "learning_rate": 9.201171482597721e-06, "loss": 0.5001, "step": 4985 }, { "epoch": 0.2069964037177152, "grad_norm": 2.826331377029419, "learning_rate": 9.20080689972729e-06, "loss": 0.5638, "step": 4986 }, { "epoch": 0.20703791924192652, "grad_norm": 2.673588514328003, "learning_rate": 9.200442240905061e-06, "loss": 0.5626, "step": 4987 }, { "epoch": 0.20707943476613785, "grad_norm": 2.4814517498016357, "learning_rate": 9.200077506137626e-06, "loss": 0.4884, "step": 4988 }, { "epoch": 0.2071209502903492, "grad_norm": 2.267505645751953, "learning_rate": 9.199712695431577e-06, "loss": 0.5065, "step": 4989 }, { "epoch": 0.20716246581456052, "grad_norm": 2.5491673946380615, "learning_rate": 9.199347808793513e-06, "loss": 0.6465, "step": 4990 }, { "epoch": 0.20720398133877185, "grad_norm": 3.0231263637542725, "learning_rate": 9.19898284623003e-06, "loss": 0.464, "step": 4991 }, { "epoch": 0.20724549686298321, "grad_norm": 3.1149210929870605, "learning_rate": 9.198617807747725e-06, "loss": 0.547, "step": 4992 }, { "epoch": 0.20728701238719455, "grad_norm": 2.8756966590881348, "learning_rate": 9.198252693353202e-06, "loss": 0.5175, "step": 4993 }, { "epoch": 0.20732852791140588, "grad_norm": 3.3221912384033203, "learning_rate": 9.19788750305306e-06, "loss": 0.4539, "step": 4994 }, { "epoch": 0.2073700434356172, "grad_norm": 2.2258894443511963, "learning_rate": 9.197522236853902e-06, "loss": 0.421, "step": 4995 }, { "epoch": 0.20741155895982855, "grad_norm": 2.2091097831726074, "learning_rate": 9.197156894762333e-06, "loss": 0.602, "step": 4996 }, { "epoch": 0.20745307448403988, "grad_norm": 2.483201265335083, "learning_rate": 9.196791476784956e-06, "loss": 0.5791, "step": 4997 }, { "epoch": 0.2074945900082512, "grad_norm": 2.7778639793395996, "learning_rate": 9.196425982928381e-06, "loss": 0.4752, "step": 4998 }, { "epoch": 0.20753610553246254, "grad_norm": 2.3558459281921387, "learning_rate": 9.196060413199213e-06, "loss": 0.4803, "step": 4999 }, { "epoch": 0.20757762105667388, "grad_norm": 2.5546233654022217, "learning_rate": 9.195694767604065e-06, "loss": 0.4521, "step": 5000 }, { "epoch": 0.2076191365808852, "grad_norm": 2.479217529296875, "learning_rate": 9.195329046149544e-06, "loss": 0.5179, "step": 5001 }, { "epoch": 0.20766065210509654, "grad_norm": 2.521404981613159, "learning_rate": 9.194963248842266e-06, "loss": 0.5324, "step": 5002 }, { "epoch": 0.20770216762930788, "grad_norm": 2.905566692352295, "learning_rate": 9.194597375688843e-06, "loss": 0.5216, "step": 5003 }, { "epoch": 0.2077436831535192, "grad_norm": 2.521730661392212, "learning_rate": 9.194231426695892e-06, "loss": 0.7193, "step": 5004 }, { "epoch": 0.20778519867773054, "grad_norm": 2.5459983348846436, "learning_rate": 9.193865401870026e-06, "loss": 0.5954, "step": 5005 }, { "epoch": 0.20782671420194188, "grad_norm": 2.817643165588379, "learning_rate": 9.193499301217865e-06, "loss": 0.4605, "step": 5006 }, { "epoch": 0.20786822972615324, "grad_norm": 2.6738533973693848, "learning_rate": 9.193133124746029e-06, "loss": 0.5663, "step": 5007 }, { "epoch": 0.20790974525036457, "grad_norm": 2.6196937561035156, "learning_rate": 9.192766872461137e-06, "loss": 0.5592, "step": 5008 }, { "epoch": 0.2079512607745759, "grad_norm": 2.8029491901397705, "learning_rate": 9.19240054436981e-06, "loss": 0.453, "step": 5009 }, { "epoch": 0.20799277629878724, "grad_norm": 2.3183577060699463, "learning_rate": 9.192034140478674e-06, "loss": 0.6202, "step": 5010 }, { "epoch": 0.20803429182299857, "grad_norm": 2.765990972518921, "learning_rate": 9.191667660794353e-06, "loss": 0.6641, "step": 5011 }, { "epoch": 0.2080758073472099, "grad_norm": 2.40740966796875, "learning_rate": 9.19130110532347e-06, "loss": 0.4885, "step": 5012 }, { "epoch": 0.20811732287142123, "grad_norm": 2.713670492172241, "learning_rate": 9.190934474072658e-06, "loss": 0.5394, "step": 5013 }, { "epoch": 0.20815883839563257, "grad_norm": 3.078014612197876, "learning_rate": 9.190567767048542e-06, "loss": 0.5279, "step": 5014 }, { "epoch": 0.2082003539198439, "grad_norm": 2.709425926208496, "learning_rate": 9.190200984257754e-06, "loss": 0.5857, "step": 5015 }, { "epoch": 0.20824186944405523, "grad_norm": 2.469555139541626, "learning_rate": 9.189834125706922e-06, "loss": 0.4464, "step": 5016 }, { "epoch": 0.20828338496826657, "grad_norm": 2.7957262992858887, "learning_rate": 9.189467191402684e-06, "loss": 0.6367, "step": 5017 }, { "epoch": 0.2083249004924779, "grad_norm": 2.5530083179473877, "learning_rate": 9.189100181351668e-06, "loss": 0.4719, "step": 5018 }, { "epoch": 0.20836641601668923, "grad_norm": 2.2452847957611084, "learning_rate": 9.188733095560517e-06, "loss": 0.5161, "step": 5019 }, { "epoch": 0.20840793154090057, "grad_norm": 2.3125998973846436, "learning_rate": 9.188365934035863e-06, "loss": 0.4614, "step": 5020 }, { "epoch": 0.2084494470651119, "grad_norm": 2.5865941047668457, "learning_rate": 9.187998696784344e-06, "loss": 0.5923, "step": 5021 }, { "epoch": 0.20849096258932326, "grad_norm": 2.9500887393951416, "learning_rate": 9.187631383812604e-06, "loss": 0.5331, "step": 5022 }, { "epoch": 0.2085324781135346, "grad_norm": 2.658893346786499, "learning_rate": 9.18726399512728e-06, "loss": 0.6589, "step": 5023 }, { "epoch": 0.20857399363774592, "grad_norm": 2.392033576965332, "learning_rate": 9.186896530735016e-06, "loss": 0.4259, "step": 5024 }, { "epoch": 0.20861550916195726, "grad_norm": 2.415225028991699, "learning_rate": 9.186528990642456e-06, "loss": 0.5734, "step": 5025 }, { "epoch": 0.2086570246861686, "grad_norm": 2.461219310760498, "learning_rate": 9.186161374856245e-06, "loss": 0.541, "step": 5026 }, { "epoch": 0.20869854021037992, "grad_norm": 2.407386064529419, "learning_rate": 9.185793683383031e-06, "loss": 0.6025, "step": 5027 }, { "epoch": 0.20874005573459126, "grad_norm": 2.497952938079834, "learning_rate": 9.185425916229461e-06, "loss": 0.6233, "step": 5028 }, { "epoch": 0.2087815712588026, "grad_norm": 2.578028678894043, "learning_rate": 9.185058073402182e-06, "loss": 0.6453, "step": 5029 }, { "epoch": 0.20882308678301392, "grad_norm": 2.6634836196899414, "learning_rate": 9.18469015490785e-06, "loss": 0.5922, "step": 5030 }, { "epoch": 0.20886460230722526, "grad_norm": 2.1850454807281494, "learning_rate": 9.18432216075311e-06, "loss": 0.5353, "step": 5031 }, { "epoch": 0.2089061178314366, "grad_norm": 2.440802812576294, "learning_rate": 9.183954090944623e-06, "loss": 0.5058, "step": 5032 }, { "epoch": 0.20894763335564792, "grad_norm": 2.7905733585357666, "learning_rate": 9.183585945489037e-06, "loss": 0.5474, "step": 5033 }, { "epoch": 0.20898914887985925, "grad_norm": 2.416632890701294, "learning_rate": 9.183217724393012e-06, "loss": 0.6643, "step": 5034 }, { "epoch": 0.2090306644040706, "grad_norm": 2.42151141166687, "learning_rate": 9.182849427663205e-06, "loss": 0.586, "step": 5035 }, { "epoch": 0.20907217992828192, "grad_norm": 2.9175326824188232, "learning_rate": 9.182481055306276e-06, "loss": 0.6163, "step": 5036 }, { "epoch": 0.20911369545249325, "grad_norm": 2.1120150089263916, "learning_rate": 9.182112607328882e-06, "loss": 0.5874, "step": 5037 }, { "epoch": 0.20915521097670461, "grad_norm": 2.906059741973877, "learning_rate": 9.181744083737688e-06, "loss": 0.4991, "step": 5038 }, { "epoch": 0.20919672650091595, "grad_norm": 2.925765037536621, "learning_rate": 9.181375484539356e-06, "loss": 0.4842, "step": 5039 }, { "epoch": 0.20923824202512728, "grad_norm": 2.5547499656677246, "learning_rate": 9.181006809740549e-06, "loss": 0.4777, "step": 5040 }, { "epoch": 0.2092797575493386, "grad_norm": 2.7024636268615723, "learning_rate": 9.180638059347933e-06, "loss": 0.6265, "step": 5041 }, { "epoch": 0.20932127307354995, "grad_norm": 2.308696985244751, "learning_rate": 9.180269233368178e-06, "loss": 0.4995, "step": 5042 }, { "epoch": 0.20936278859776128, "grad_norm": 2.426870107650757, "learning_rate": 9.179900331807949e-06, "loss": 0.5038, "step": 5043 }, { "epoch": 0.2094043041219726, "grad_norm": 2.0987696647644043, "learning_rate": 9.179531354673917e-06, "loss": 0.3779, "step": 5044 }, { "epoch": 0.20944581964618394, "grad_norm": 2.5722501277923584, "learning_rate": 9.179162301972754e-06, "loss": 0.5229, "step": 5045 }, { "epoch": 0.20948733517039528, "grad_norm": 2.63442325592041, "learning_rate": 9.178793173711133e-06, "loss": 0.405, "step": 5046 }, { "epoch": 0.2095288506946066, "grad_norm": 2.8423354625701904, "learning_rate": 9.178423969895726e-06, "loss": 0.5741, "step": 5047 }, { "epoch": 0.20957036621881794, "grad_norm": 2.2261955738067627, "learning_rate": 9.178054690533209e-06, "loss": 0.3886, "step": 5048 }, { "epoch": 0.20961188174302928, "grad_norm": 2.8537425994873047, "learning_rate": 9.177685335630259e-06, "loss": 0.4508, "step": 5049 }, { "epoch": 0.2096533972672406, "grad_norm": 2.3478198051452637, "learning_rate": 9.177315905193554e-06, "loss": 0.5594, "step": 5050 }, { "epoch": 0.20969491279145194, "grad_norm": 2.4089434146881104, "learning_rate": 9.176946399229773e-06, "loss": 0.6423, "step": 5051 }, { "epoch": 0.20973642831566328, "grad_norm": 2.6592917442321777, "learning_rate": 9.176576817745598e-06, "loss": 0.4928, "step": 5052 }, { "epoch": 0.20977794383987464, "grad_norm": 2.7937276363372803, "learning_rate": 9.17620716074771e-06, "loss": 0.6529, "step": 5053 }, { "epoch": 0.20981945936408597, "grad_norm": 2.80737566947937, "learning_rate": 9.175837428242793e-06, "loss": 0.654, "step": 5054 }, { "epoch": 0.2098609748882973, "grad_norm": 2.4289612770080566, "learning_rate": 9.175467620237533e-06, "loss": 0.4445, "step": 5055 }, { "epoch": 0.20990249041250864, "grad_norm": 2.701359510421753, "learning_rate": 9.175097736738612e-06, "loss": 0.5579, "step": 5056 }, { "epoch": 0.20994400593671997, "grad_norm": 3.051941394805908, "learning_rate": 9.174727777752722e-06, "loss": 0.4904, "step": 5057 }, { "epoch": 0.2099855214609313, "grad_norm": 2.6874654293060303, "learning_rate": 9.17435774328655e-06, "loss": 0.5687, "step": 5058 }, { "epoch": 0.21002703698514263, "grad_norm": 2.672316551208496, "learning_rate": 9.173987633346789e-06, "loss": 0.5856, "step": 5059 }, { "epoch": 0.21006855250935397, "grad_norm": 2.6824212074279785, "learning_rate": 9.173617447940126e-06, "loss": 0.5663, "step": 5060 }, { "epoch": 0.2101100680335653, "grad_norm": 2.590418577194214, "learning_rate": 9.173247187073258e-06, "loss": 0.4729, "step": 5061 }, { "epoch": 0.21015158355777663, "grad_norm": 2.6508631706237793, "learning_rate": 9.172876850752876e-06, "loss": 0.6679, "step": 5062 }, { "epoch": 0.21019309908198797, "grad_norm": 2.8711273670196533, "learning_rate": 9.17250643898568e-06, "loss": 0.6102, "step": 5063 }, { "epoch": 0.2102346146061993, "grad_norm": 2.640460252761841, "learning_rate": 9.172135951778365e-06, "loss": 0.5212, "step": 5064 }, { "epoch": 0.21027613013041063, "grad_norm": 2.363269329071045, "learning_rate": 9.171765389137627e-06, "loss": 0.5601, "step": 5065 }, { "epoch": 0.21031764565462197, "grad_norm": 2.550098419189453, "learning_rate": 9.171394751070173e-06, "loss": 0.58, "step": 5066 }, { "epoch": 0.2103591611788333, "grad_norm": 3.064378499984741, "learning_rate": 9.171024037582694e-06, "loss": 0.3663, "step": 5067 }, { "epoch": 0.21040067670304463, "grad_norm": 3.2432773113250732, "learning_rate": 9.170653248681902e-06, "loss": 0.4558, "step": 5068 }, { "epoch": 0.210442192227256, "grad_norm": 2.3979454040527344, "learning_rate": 9.170282384374497e-06, "loss": 0.5143, "step": 5069 }, { "epoch": 0.21048370775146732, "grad_norm": 2.6587586402893066, "learning_rate": 9.169911444667181e-06, "loss": 0.563, "step": 5070 }, { "epoch": 0.21052522327567866, "grad_norm": 2.6509339809417725, "learning_rate": 9.169540429566668e-06, "loss": 0.4859, "step": 5071 }, { "epoch": 0.21056673879989, "grad_norm": 2.7378687858581543, "learning_rate": 9.16916933907966e-06, "loss": 0.4283, "step": 5072 }, { "epoch": 0.21060825432410132, "grad_norm": 2.0793914794921875, "learning_rate": 9.16879817321287e-06, "loss": 0.4195, "step": 5073 }, { "epoch": 0.21064976984831266, "grad_norm": 2.3569419384002686, "learning_rate": 9.168426931973008e-06, "loss": 0.4599, "step": 5074 }, { "epoch": 0.210691285372524, "grad_norm": 2.3436689376831055, "learning_rate": 9.168055615366781e-06, "loss": 0.534, "step": 5075 }, { "epoch": 0.21073280089673532, "grad_norm": 2.322624921798706, "learning_rate": 9.167684223400913e-06, "loss": 0.4431, "step": 5076 }, { "epoch": 0.21077431642094666, "grad_norm": 2.776998519897461, "learning_rate": 9.16731275608211e-06, "loss": 0.4279, "step": 5077 }, { "epoch": 0.210815831945158, "grad_norm": 2.350177049636841, "learning_rate": 9.166941213417092e-06, "loss": 0.4045, "step": 5078 }, { "epoch": 0.21085734746936932, "grad_norm": 2.7206928730010986, "learning_rate": 9.166569595412576e-06, "loss": 0.5037, "step": 5079 }, { "epoch": 0.21089886299358065, "grad_norm": 2.78432297706604, "learning_rate": 9.16619790207528e-06, "loss": 0.5507, "step": 5080 }, { "epoch": 0.210940378517792, "grad_norm": 2.3488011360168457, "learning_rate": 9.165826133411925e-06, "loss": 0.5581, "step": 5081 }, { "epoch": 0.21098189404200332, "grad_norm": 2.398557186126709, "learning_rate": 9.165454289429233e-06, "loss": 0.5582, "step": 5082 }, { "epoch": 0.21102340956621465, "grad_norm": 2.4873576164245605, "learning_rate": 9.165082370133929e-06, "loss": 0.6662, "step": 5083 }, { "epoch": 0.21106492509042601, "grad_norm": 2.2400424480438232, "learning_rate": 9.164710375532733e-06, "loss": 0.5698, "step": 5084 }, { "epoch": 0.21110644061463735, "grad_norm": 2.463967800140381, "learning_rate": 9.164338305632373e-06, "loss": 0.4532, "step": 5085 }, { "epoch": 0.21114795613884868, "grad_norm": 2.457362174987793, "learning_rate": 9.163966160439576e-06, "loss": 0.526, "step": 5086 }, { "epoch": 0.21118947166306, "grad_norm": 2.8188493251800537, "learning_rate": 9.163593939961071e-06, "loss": 0.5517, "step": 5087 }, { "epoch": 0.21123098718727135, "grad_norm": 2.392397880554199, "learning_rate": 9.163221644203588e-06, "loss": 0.6477, "step": 5088 }, { "epoch": 0.21127250271148268, "grad_norm": 2.3534529209136963, "learning_rate": 9.162849273173857e-06, "loss": 0.5212, "step": 5089 }, { "epoch": 0.211314018235694, "grad_norm": 2.8446810245513916, "learning_rate": 9.162476826878612e-06, "loss": 0.5475, "step": 5090 }, { "epoch": 0.21135553375990535, "grad_norm": 3.171584129333496, "learning_rate": 9.162104305324587e-06, "loss": 0.5985, "step": 5091 }, { "epoch": 0.21139704928411668, "grad_norm": 2.5246944427490234, "learning_rate": 9.161731708518516e-06, "loss": 0.5109, "step": 5092 }, { "epoch": 0.211438564808328, "grad_norm": 2.0773470401763916, "learning_rate": 9.161359036467135e-06, "loss": 0.5836, "step": 5093 }, { "epoch": 0.21148008033253934, "grad_norm": 2.9600777626037598, "learning_rate": 9.160986289177183e-06, "loss": 0.5527, "step": 5094 }, { "epoch": 0.21152159585675068, "grad_norm": 3.0531742572784424, "learning_rate": 9.1606134666554e-06, "loss": 0.4229, "step": 5095 }, { "epoch": 0.211563111380962, "grad_norm": 2.2469847202301025, "learning_rate": 9.160240568908527e-06, "loss": 0.4392, "step": 5096 }, { "epoch": 0.21160462690517334, "grad_norm": 2.1449782848358154, "learning_rate": 9.159867595943305e-06, "loss": 0.4729, "step": 5097 }, { "epoch": 0.21164614242938468, "grad_norm": 2.467628240585327, "learning_rate": 9.159494547766478e-06, "loss": 0.6522, "step": 5098 }, { "epoch": 0.211687657953596, "grad_norm": 3.044342041015625, "learning_rate": 9.15912142438479e-06, "loss": 0.4457, "step": 5099 }, { "epoch": 0.21172917347780737, "grad_norm": 2.640054225921631, "learning_rate": 9.158748225804988e-06, "loss": 0.5154, "step": 5100 }, { "epoch": 0.2117706890020187, "grad_norm": 2.2105860710144043, "learning_rate": 9.158374952033819e-06, "loss": 0.531, "step": 5101 }, { "epoch": 0.21181220452623004, "grad_norm": 2.0859766006469727, "learning_rate": 9.158001603078035e-06, "loss": 0.5783, "step": 5102 }, { "epoch": 0.21185372005044137, "grad_norm": 2.146257162094116, "learning_rate": 9.15762817894438e-06, "loss": 0.5976, "step": 5103 }, { "epoch": 0.2118952355746527, "grad_norm": 2.3503494262695312, "learning_rate": 9.157254679639612e-06, "loss": 0.5114, "step": 5104 }, { "epoch": 0.21193675109886403, "grad_norm": 2.4833621978759766, "learning_rate": 9.15688110517048e-06, "loss": 0.6159, "step": 5105 }, { "epoch": 0.21197826662307537, "grad_norm": 2.5503768920898438, "learning_rate": 9.15650745554374e-06, "loss": 0.5644, "step": 5106 }, { "epoch": 0.2120197821472867, "grad_norm": 3.557864189147949, "learning_rate": 9.156133730766146e-06, "loss": 0.5674, "step": 5107 }, { "epoch": 0.21206129767149803, "grad_norm": 2.784696102142334, "learning_rate": 9.155759930844456e-06, "loss": 0.4608, "step": 5108 }, { "epoch": 0.21210281319570937, "grad_norm": 2.7353861331939697, "learning_rate": 9.15538605578543e-06, "loss": 0.3667, "step": 5109 }, { "epoch": 0.2121443287199207, "grad_norm": 2.7533721923828125, "learning_rate": 9.155012105595826e-06, "loss": 0.5005, "step": 5110 }, { "epoch": 0.21218584424413203, "grad_norm": 2.8360700607299805, "learning_rate": 9.154638080282405e-06, "loss": 0.4587, "step": 5111 }, { "epoch": 0.21222735976834337, "grad_norm": 2.8657798767089844, "learning_rate": 9.154263979851932e-06, "loss": 0.445, "step": 5112 }, { "epoch": 0.2122688752925547, "grad_norm": 2.661926031112671, "learning_rate": 9.153889804311167e-06, "loss": 0.5844, "step": 5113 }, { "epoch": 0.21231039081676603, "grad_norm": 2.1371543407440186, "learning_rate": 9.153515553666876e-06, "loss": 0.4991, "step": 5114 }, { "epoch": 0.2123519063409774, "grad_norm": 2.4354395866394043, "learning_rate": 9.153141227925828e-06, "loss": 0.4841, "step": 5115 }, { "epoch": 0.21239342186518873, "grad_norm": 2.5589442253112793, "learning_rate": 9.15276682709479e-06, "loss": 0.5498, "step": 5116 }, { "epoch": 0.21243493738940006, "grad_norm": 2.8040406703948975, "learning_rate": 9.15239235118053e-06, "loss": 0.5289, "step": 5117 }, { "epoch": 0.2124764529136114, "grad_norm": 2.333815097808838, "learning_rate": 9.15201780018982e-06, "loss": 0.5081, "step": 5118 }, { "epoch": 0.21251796843782272, "grad_norm": 2.6969053745269775, "learning_rate": 9.151643174129432e-06, "loss": 0.5609, "step": 5119 }, { "epoch": 0.21255948396203406, "grad_norm": 2.936091899871826, "learning_rate": 9.151268473006136e-06, "loss": 0.5713, "step": 5120 }, { "epoch": 0.2126009994862454, "grad_norm": 2.9378230571746826, "learning_rate": 9.150893696826713e-06, "loss": 0.6404, "step": 5121 }, { "epoch": 0.21264251501045672, "grad_norm": 2.372076988220215, "learning_rate": 9.150518845597934e-06, "loss": 0.4201, "step": 5122 }, { "epoch": 0.21268403053466806, "grad_norm": 2.4418957233428955, "learning_rate": 9.150143919326577e-06, "loss": 0.3978, "step": 5123 }, { "epoch": 0.2127255460588794, "grad_norm": 2.4606006145477295, "learning_rate": 9.149768918019423e-06, "loss": 0.5266, "step": 5124 }, { "epoch": 0.21276706158309072, "grad_norm": 2.368330240249634, "learning_rate": 9.14939384168325e-06, "loss": 0.5942, "step": 5125 }, { "epoch": 0.21280857710730205, "grad_norm": 2.260218620300293, "learning_rate": 9.149018690324841e-06, "loss": 0.5359, "step": 5126 }, { "epoch": 0.2128500926315134, "grad_norm": 2.410698652267456, "learning_rate": 9.148643463950979e-06, "loss": 0.5051, "step": 5127 }, { "epoch": 0.21289160815572472, "grad_norm": 2.0396156311035156, "learning_rate": 9.148268162568446e-06, "loss": 0.501, "step": 5128 }, { "epoch": 0.21293312367993605, "grad_norm": 2.2590951919555664, "learning_rate": 9.147892786184029e-06, "loss": 0.3978, "step": 5129 }, { "epoch": 0.21297463920414741, "grad_norm": 2.3148510456085205, "learning_rate": 9.147517334804516e-06, "loss": 0.5879, "step": 5130 }, { "epoch": 0.21301615472835875, "grad_norm": 2.2208521366119385, "learning_rate": 9.147141808436693e-06, "loss": 0.4568, "step": 5131 }, { "epoch": 0.21305767025257008, "grad_norm": 2.496070146560669, "learning_rate": 9.146766207087352e-06, "loss": 0.5695, "step": 5132 }, { "epoch": 0.2130991857767814, "grad_norm": 2.3318097591400146, "learning_rate": 9.146390530763281e-06, "loss": 0.438, "step": 5133 }, { "epoch": 0.21314070130099275, "grad_norm": 2.956692934036255, "learning_rate": 9.146014779471275e-06, "loss": 0.5759, "step": 5134 }, { "epoch": 0.21318221682520408, "grad_norm": 2.4180908203125, "learning_rate": 9.145638953218127e-06, "loss": 0.5517, "step": 5135 }, { "epoch": 0.2132237323494154, "grad_norm": 2.351102113723755, "learning_rate": 9.14526305201063e-06, "loss": 0.596, "step": 5136 }, { "epoch": 0.21326524787362675, "grad_norm": 3.514117479324341, "learning_rate": 9.144887075855586e-06, "loss": 0.6368, "step": 5137 }, { "epoch": 0.21330676339783808, "grad_norm": 2.6918230056762695, "learning_rate": 9.144511024759786e-06, "loss": 0.6306, "step": 5138 }, { "epoch": 0.2133482789220494, "grad_norm": 2.74125337600708, "learning_rate": 9.144134898730034e-06, "loss": 0.4998, "step": 5139 }, { "epoch": 0.21338979444626074, "grad_norm": 2.537473678588867, "learning_rate": 9.143758697773127e-06, "loss": 0.5213, "step": 5140 }, { "epoch": 0.21343130997047208, "grad_norm": 2.476637840270996, "learning_rate": 9.14338242189587e-06, "loss": 0.6129, "step": 5141 }, { "epoch": 0.2134728254946834, "grad_norm": 2.640446186065674, "learning_rate": 9.143006071105063e-06, "loss": 0.5508, "step": 5142 }, { "epoch": 0.21351434101889474, "grad_norm": 2.9363112449645996, "learning_rate": 9.142629645407512e-06, "loss": 0.5026, "step": 5143 }, { "epoch": 0.21355585654310608, "grad_norm": 2.4075541496276855, "learning_rate": 9.142253144810024e-06, "loss": 0.5091, "step": 5144 }, { "epoch": 0.2135973720673174, "grad_norm": 2.582882881164551, "learning_rate": 9.141876569319405e-06, "loss": 0.5333, "step": 5145 }, { "epoch": 0.21363888759152877, "grad_norm": 2.7612266540527344, "learning_rate": 9.141499918942463e-06, "loss": 0.5532, "step": 5146 }, { "epoch": 0.2136804031157401, "grad_norm": 2.6391916275024414, "learning_rate": 9.14112319368601e-06, "loss": 0.4662, "step": 5147 }, { "epoch": 0.21372191863995144, "grad_norm": 3.7835192680358887, "learning_rate": 9.140746393556853e-06, "loss": 0.5455, "step": 5148 }, { "epoch": 0.21376343416416277, "grad_norm": 2.157839775085449, "learning_rate": 9.140369518561812e-06, "loss": 0.6346, "step": 5149 }, { "epoch": 0.2138049496883741, "grad_norm": 2.4979379177093506, "learning_rate": 9.139992568707696e-06, "loss": 0.4109, "step": 5150 }, { "epoch": 0.21384646521258543, "grad_norm": 2.4727938175201416, "learning_rate": 9.139615544001319e-06, "loss": 0.5448, "step": 5151 }, { "epoch": 0.21388798073679677, "grad_norm": 3.03265643119812, "learning_rate": 9.139238444449502e-06, "loss": 0.5616, "step": 5152 }, { "epoch": 0.2139294962610081, "grad_norm": 2.476935386657715, "learning_rate": 9.138861270059059e-06, "loss": 0.5023, "step": 5153 }, { "epoch": 0.21397101178521943, "grad_norm": 2.3515021800994873, "learning_rate": 9.13848402083681e-06, "loss": 0.5151, "step": 5154 }, { "epoch": 0.21401252730943077, "grad_norm": 2.26031231880188, "learning_rate": 9.13810669678958e-06, "loss": 0.5149, "step": 5155 }, { "epoch": 0.2140540428336421, "grad_norm": 2.494636058807373, "learning_rate": 9.137729297924188e-06, "loss": 0.5945, "step": 5156 }, { "epoch": 0.21409555835785343, "grad_norm": 2.7936346530914307, "learning_rate": 9.137351824247455e-06, "loss": 0.4687, "step": 5157 }, { "epoch": 0.21413707388206477, "grad_norm": 2.767948627471924, "learning_rate": 9.13697427576621e-06, "loss": 0.403, "step": 5158 }, { "epoch": 0.2141785894062761, "grad_norm": 2.3488426208496094, "learning_rate": 9.13659665248728e-06, "loss": 0.6247, "step": 5159 }, { "epoch": 0.21422010493048743, "grad_norm": 2.486652135848999, "learning_rate": 9.136218954417487e-06, "loss": 0.5125, "step": 5160 }, { "epoch": 0.2142616204546988, "grad_norm": 2.8851399421691895, "learning_rate": 9.135841181563666e-06, "loss": 0.673, "step": 5161 }, { "epoch": 0.21430313597891013, "grad_norm": 2.3064639568328857, "learning_rate": 9.13546333393264e-06, "loss": 0.586, "step": 5162 }, { "epoch": 0.21434465150312146, "grad_norm": 3.4333012104034424, "learning_rate": 9.13508541153125e-06, "loss": 0.4457, "step": 5163 }, { "epoch": 0.2143861670273328, "grad_norm": 2.6325902938842773, "learning_rate": 9.13470741436632e-06, "loss": 0.5323, "step": 5164 }, { "epoch": 0.21442768255154412, "grad_norm": 2.460462808609009, "learning_rate": 9.13432934244469e-06, "loss": 0.512, "step": 5165 }, { "epoch": 0.21446919807575546, "grad_norm": 3.1754512786865234, "learning_rate": 9.133951195773194e-06, "loss": 0.5913, "step": 5166 }, { "epoch": 0.2145107135999668, "grad_norm": 2.634859800338745, "learning_rate": 9.133572974358669e-06, "loss": 0.4809, "step": 5167 }, { "epoch": 0.21455222912417812, "grad_norm": 1.9097263813018799, "learning_rate": 9.133194678207952e-06, "loss": 0.4876, "step": 5168 }, { "epoch": 0.21459374464838946, "grad_norm": 2.561041831970215, "learning_rate": 9.132816307327886e-06, "loss": 0.5526, "step": 5169 }, { "epoch": 0.2146352601726008, "grad_norm": 3.1193573474884033, "learning_rate": 9.132437861725307e-06, "loss": 0.4237, "step": 5170 }, { "epoch": 0.21467677569681212, "grad_norm": 2.1833550930023193, "learning_rate": 9.132059341407063e-06, "loss": 0.4524, "step": 5171 }, { "epoch": 0.21471829122102346, "grad_norm": 2.976743221282959, "learning_rate": 9.131680746379993e-06, "loss": 0.5228, "step": 5172 }, { "epoch": 0.2147598067452348, "grad_norm": 2.972141981124878, "learning_rate": 9.131302076650944e-06, "loss": 0.6706, "step": 5173 }, { "epoch": 0.21480132226944612, "grad_norm": 3.0227153301239014, "learning_rate": 9.130923332226765e-06, "loss": 0.5458, "step": 5174 }, { "epoch": 0.21484283779365745, "grad_norm": 2.5405468940734863, "learning_rate": 9.130544513114301e-06, "loss": 0.6156, "step": 5175 }, { "epoch": 0.2148843533178688, "grad_norm": 3.3411736488342285, "learning_rate": 9.130165619320401e-06, "loss": 0.693, "step": 5176 }, { "epoch": 0.21492586884208015, "grad_norm": 2.546656608581543, "learning_rate": 9.129786650851914e-06, "loss": 0.5447, "step": 5177 }, { "epoch": 0.21496738436629148, "grad_norm": 2.299288749694824, "learning_rate": 9.129407607715697e-06, "loss": 0.4778, "step": 5178 }, { "epoch": 0.2150088998905028, "grad_norm": 2.6399571895599365, "learning_rate": 9.129028489918602e-06, "loss": 0.6592, "step": 5179 }, { "epoch": 0.21505041541471415, "grad_norm": 2.6549906730651855, "learning_rate": 9.128649297467478e-06, "loss": 0.4864, "step": 5180 }, { "epoch": 0.21509193093892548, "grad_norm": 2.9251067638397217, "learning_rate": 9.128270030369185e-06, "loss": 0.5883, "step": 5181 }, { "epoch": 0.2151334464631368, "grad_norm": 2.6663360595703125, "learning_rate": 9.12789068863058e-06, "loss": 0.5414, "step": 5182 }, { "epoch": 0.21517496198734815, "grad_norm": 2.8071322441101074, "learning_rate": 9.127511272258524e-06, "loss": 0.5645, "step": 5183 }, { "epoch": 0.21521647751155948, "grad_norm": 2.983698844909668, "learning_rate": 9.127131781259872e-06, "loss": 0.4833, "step": 5184 }, { "epoch": 0.2152579930357708, "grad_norm": 2.396306276321411, "learning_rate": 9.12675221564149e-06, "loss": 0.4869, "step": 5185 }, { "epoch": 0.21529950855998214, "grad_norm": 2.9038846492767334, "learning_rate": 9.126372575410236e-06, "loss": 0.6402, "step": 5186 }, { "epoch": 0.21534102408419348, "grad_norm": 2.5504043102264404, "learning_rate": 9.125992860572979e-06, "loss": 0.6158, "step": 5187 }, { "epoch": 0.2153825396084048, "grad_norm": 2.4356160163879395, "learning_rate": 9.125613071136582e-06, "loss": 0.5927, "step": 5188 }, { "epoch": 0.21542405513261614, "grad_norm": 2.547090530395508, "learning_rate": 9.125233207107908e-06, "loss": 0.5128, "step": 5189 }, { "epoch": 0.21546557065682748, "grad_norm": 2.293012857437134, "learning_rate": 9.124853268493831e-06, "loss": 0.4586, "step": 5190 }, { "epoch": 0.2155070861810388, "grad_norm": 2.8802602291107178, "learning_rate": 9.124473255301219e-06, "loss": 0.5331, "step": 5191 }, { "epoch": 0.21554860170525017, "grad_norm": 2.560657024383545, "learning_rate": 9.124093167536941e-06, "loss": 0.4993, "step": 5192 }, { "epoch": 0.2155901172294615, "grad_norm": 2.896641254425049, "learning_rate": 9.123713005207868e-06, "loss": 0.6372, "step": 5193 }, { "epoch": 0.21563163275367284, "grad_norm": 2.299985885620117, "learning_rate": 9.123332768320876e-06, "loss": 0.6178, "step": 5194 }, { "epoch": 0.21567314827788417, "grad_norm": 2.496839761734009, "learning_rate": 9.12295245688284e-06, "loss": 0.5577, "step": 5195 }, { "epoch": 0.2157146638020955, "grad_norm": 2.689253807067871, "learning_rate": 9.122572070900635e-06, "loss": 0.6319, "step": 5196 }, { "epoch": 0.21575617932630684, "grad_norm": 2.3793370723724365, "learning_rate": 9.12219161038114e-06, "loss": 0.4328, "step": 5197 }, { "epoch": 0.21579769485051817, "grad_norm": 2.821735143661499, "learning_rate": 9.12181107533123e-06, "loss": 0.5407, "step": 5198 }, { "epoch": 0.2158392103747295, "grad_norm": 2.2517597675323486, "learning_rate": 9.121430465757789e-06, "loss": 0.5034, "step": 5199 }, { "epoch": 0.21588072589894083, "grad_norm": 2.09580659866333, "learning_rate": 9.121049781667696e-06, "loss": 0.4921, "step": 5200 }, { "epoch": 0.21592224142315217, "grad_norm": 2.250044107437134, "learning_rate": 9.120669023067837e-06, "loss": 0.6141, "step": 5201 }, { "epoch": 0.2159637569473635, "grad_norm": 2.746823310852051, "learning_rate": 9.120288189965094e-06, "loss": 0.6713, "step": 5202 }, { "epoch": 0.21600527247157483, "grad_norm": 2.961138963699341, "learning_rate": 9.119907282366352e-06, "loss": 0.5089, "step": 5203 }, { "epoch": 0.21604678799578617, "grad_norm": 2.6909689903259277, "learning_rate": 9.1195263002785e-06, "loss": 0.5393, "step": 5204 }, { "epoch": 0.2160883035199975, "grad_norm": 2.2531378269195557, "learning_rate": 9.119145243708425e-06, "loss": 0.5263, "step": 5205 }, { "epoch": 0.21612981904420883, "grad_norm": 2.5653250217437744, "learning_rate": 9.118764112663015e-06, "loss": 0.5558, "step": 5206 }, { "epoch": 0.21617133456842016, "grad_norm": 2.7662911415100098, "learning_rate": 9.118382907149164e-06, "loss": 0.5693, "step": 5207 }, { "epoch": 0.21621285009263153, "grad_norm": 3.4004504680633545, "learning_rate": 9.118001627173764e-06, "loss": 0.6198, "step": 5208 }, { "epoch": 0.21625436561684286, "grad_norm": 2.206066370010376, "learning_rate": 9.117620272743706e-06, "loss": 0.4243, "step": 5209 }, { "epoch": 0.2162958811410542, "grad_norm": 2.355300188064575, "learning_rate": 9.117238843865888e-06, "loss": 0.5438, "step": 5210 }, { "epoch": 0.21633739666526552, "grad_norm": 3.0902082920074463, "learning_rate": 9.116857340547203e-06, "loss": 0.5083, "step": 5211 }, { "epoch": 0.21637891218947686, "grad_norm": 2.6812572479248047, "learning_rate": 9.116475762794551e-06, "loss": 0.4596, "step": 5212 }, { "epoch": 0.2164204277136882, "grad_norm": 2.3131988048553467, "learning_rate": 9.116094110614833e-06, "loss": 0.5906, "step": 5213 }, { "epoch": 0.21646194323789952, "grad_norm": 2.45926833152771, "learning_rate": 9.115712384014946e-06, "loss": 0.4793, "step": 5214 }, { "epoch": 0.21650345876211086, "grad_norm": 2.843474864959717, "learning_rate": 9.115330583001793e-06, "loss": 0.6506, "step": 5215 }, { "epoch": 0.2165449742863222, "grad_norm": 2.8168551921844482, "learning_rate": 9.114948707582277e-06, "loss": 0.5829, "step": 5216 }, { "epoch": 0.21658648981053352, "grad_norm": 2.8000288009643555, "learning_rate": 9.1145667577633e-06, "loss": 0.4779, "step": 5217 }, { "epoch": 0.21662800533474486, "grad_norm": 2.155022621154785, "learning_rate": 9.114184733551772e-06, "loss": 0.6414, "step": 5218 }, { "epoch": 0.2166695208589562, "grad_norm": 2.891141176223755, "learning_rate": 9.113802634954598e-06, "loss": 0.5644, "step": 5219 }, { "epoch": 0.21671103638316752, "grad_norm": 2.364793062210083, "learning_rate": 9.113420461978688e-06, "loss": 0.5109, "step": 5220 }, { "epoch": 0.21675255190737885, "grad_norm": 2.4255387783050537, "learning_rate": 9.113038214630947e-06, "loss": 0.5575, "step": 5221 }, { "epoch": 0.2167940674315902, "grad_norm": 2.425664186477661, "learning_rate": 9.112655892918291e-06, "loss": 0.4734, "step": 5222 }, { "epoch": 0.21683558295580155, "grad_norm": 2.6166648864746094, "learning_rate": 9.112273496847633e-06, "loss": 0.5029, "step": 5223 }, { "epoch": 0.21687709848001288, "grad_norm": 2.345200777053833, "learning_rate": 9.111891026425883e-06, "loss": 0.4842, "step": 5224 }, { "epoch": 0.21691861400422421, "grad_norm": 2.609611749649048, "learning_rate": 9.111508481659959e-06, "loss": 0.6413, "step": 5225 }, { "epoch": 0.21696012952843555, "grad_norm": 2.313558578491211, "learning_rate": 9.111125862556776e-06, "loss": 0.5887, "step": 5226 }, { "epoch": 0.21700164505264688, "grad_norm": 2.86008358001709, "learning_rate": 9.110743169123254e-06, "loss": 0.5018, "step": 5227 }, { "epoch": 0.2170431605768582, "grad_norm": 2.5862555503845215, "learning_rate": 9.110360401366309e-06, "loss": 0.6208, "step": 5228 }, { "epoch": 0.21708467610106955, "grad_norm": 2.5366084575653076, "learning_rate": 9.109977559292863e-06, "loss": 0.6259, "step": 5229 }, { "epoch": 0.21712619162528088, "grad_norm": 2.448822498321533, "learning_rate": 9.109594642909839e-06, "loss": 0.5649, "step": 5230 }, { "epoch": 0.2171677071494922, "grad_norm": 2.316267490386963, "learning_rate": 9.109211652224159e-06, "loss": 0.4964, "step": 5231 }, { "epoch": 0.21720922267370354, "grad_norm": 2.3976051807403564, "learning_rate": 9.108828587242748e-06, "loss": 0.6216, "step": 5232 }, { "epoch": 0.21725073819791488, "grad_norm": 2.867063283920288, "learning_rate": 9.108445447972531e-06, "loss": 0.5564, "step": 5233 }, { "epoch": 0.2172922537221262, "grad_norm": 2.4597020149230957, "learning_rate": 9.108062234420438e-06, "loss": 0.6508, "step": 5234 }, { "epoch": 0.21733376924633754, "grad_norm": 2.4936368465423584, "learning_rate": 9.107678946593395e-06, "loss": 0.5505, "step": 5235 }, { "epoch": 0.21737528477054888, "grad_norm": 2.1417925357818604, "learning_rate": 9.10729558449833e-06, "loss": 0.4322, "step": 5236 }, { "epoch": 0.2174168002947602, "grad_norm": 2.520448684692383, "learning_rate": 9.10691214814218e-06, "loss": 0.6082, "step": 5237 }, { "epoch": 0.21745831581897157, "grad_norm": 2.5658206939697266, "learning_rate": 9.106528637531874e-06, "loss": 0.5056, "step": 5238 }, { "epoch": 0.2174998313431829, "grad_norm": 2.3189642429351807, "learning_rate": 9.106145052674347e-06, "loss": 0.5421, "step": 5239 }, { "epoch": 0.21754134686739424, "grad_norm": 1.9768052101135254, "learning_rate": 9.105761393576534e-06, "loss": 0.5099, "step": 5240 }, { "epoch": 0.21758286239160557, "grad_norm": 2.4593029022216797, "learning_rate": 9.10537766024537e-06, "loss": 0.5534, "step": 5241 }, { "epoch": 0.2176243779158169, "grad_norm": 2.455526113510132, "learning_rate": 9.104993852687796e-06, "loss": 0.4326, "step": 5242 }, { "epoch": 0.21766589344002824, "grad_norm": 3.4651126861572266, "learning_rate": 9.104609970910749e-06, "loss": 0.4656, "step": 5243 }, { "epoch": 0.21770740896423957, "grad_norm": 2.780815601348877, "learning_rate": 9.104226014921171e-06, "loss": 0.5042, "step": 5244 }, { "epoch": 0.2177489244884509, "grad_norm": 2.317502498626709, "learning_rate": 9.103841984726004e-06, "loss": 0.4256, "step": 5245 }, { "epoch": 0.21779044001266223, "grad_norm": 2.530035972595215, "learning_rate": 9.103457880332192e-06, "loss": 0.5282, "step": 5246 }, { "epoch": 0.21783195553687357, "grad_norm": 2.275160551071167, "learning_rate": 9.103073701746679e-06, "loss": 0.6002, "step": 5247 }, { "epoch": 0.2178734710610849, "grad_norm": 3.0055813789367676, "learning_rate": 9.102689448976409e-06, "loss": 0.6081, "step": 5248 }, { "epoch": 0.21791498658529623, "grad_norm": 2.6589388847351074, "learning_rate": 9.102305122028331e-06, "loss": 0.5707, "step": 5249 }, { "epoch": 0.21795650210950757, "grad_norm": 2.519105911254883, "learning_rate": 9.101920720909394e-06, "loss": 0.5655, "step": 5250 }, { "epoch": 0.2179980176337189, "grad_norm": 2.7291290760040283, "learning_rate": 9.10153624562655e-06, "loss": 0.556, "step": 5251 }, { "epoch": 0.21803953315793023, "grad_norm": 2.6098010540008545, "learning_rate": 9.101151696186748e-06, "loss": 0.461, "step": 5252 }, { "epoch": 0.21808104868214157, "grad_norm": 2.727667808532715, "learning_rate": 9.10076707259694e-06, "loss": 0.4744, "step": 5253 }, { "epoch": 0.21812256420635293, "grad_norm": 2.3374171257019043, "learning_rate": 9.100382374864081e-06, "loss": 0.6286, "step": 5254 }, { "epoch": 0.21816407973056426, "grad_norm": 2.7616424560546875, "learning_rate": 9.099997602995128e-06, "loss": 0.5764, "step": 5255 }, { "epoch": 0.2182055952547756, "grad_norm": 2.5794451236724854, "learning_rate": 9.099612756997038e-06, "loss": 0.555, "step": 5256 }, { "epoch": 0.21824711077898692, "grad_norm": 2.671025276184082, "learning_rate": 9.099227836876764e-06, "loss": 0.515, "step": 5257 }, { "epoch": 0.21828862630319826, "grad_norm": 2.4551963806152344, "learning_rate": 9.098842842641273e-06, "loss": 0.5765, "step": 5258 }, { "epoch": 0.2183301418274096, "grad_norm": 2.4205944538116455, "learning_rate": 9.09845777429752e-06, "loss": 0.6015, "step": 5259 }, { "epoch": 0.21837165735162092, "grad_norm": 2.358274221420288, "learning_rate": 9.098072631852469e-06, "loss": 0.5048, "step": 5260 }, { "epoch": 0.21841317287583226, "grad_norm": 2.523820400238037, "learning_rate": 9.097687415313084e-06, "loss": 0.4205, "step": 5261 }, { "epoch": 0.2184546884000436, "grad_norm": 2.712926149368286, "learning_rate": 9.097302124686329e-06, "loss": 0.4361, "step": 5262 }, { "epoch": 0.21849620392425492, "grad_norm": 2.622642755508423, "learning_rate": 9.096916759979171e-06, "loss": 0.492, "step": 5263 }, { "epoch": 0.21853771944846626, "grad_norm": 2.601999044418335, "learning_rate": 9.096531321198578e-06, "loss": 0.4695, "step": 5264 }, { "epoch": 0.2185792349726776, "grad_norm": 2.157454252243042, "learning_rate": 9.096145808351518e-06, "loss": 0.5007, "step": 5265 }, { "epoch": 0.21862075049688892, "grad_norm": 3.0172877311706543, "learning_rate": 9.09576022144496e-06, "loss": 0.6348, "step": 5266 }, { "epoch": 0.21866226602110025, "grad_norm": 2.606686592102051, "learning_rate": 9.095374560485877e-06, "loss": 0.5117, "step": 5267 }, { "epoch": 0.2187037815453116, "grad_norm": 2.298981189727783, "learning_rate": 9.094988825481242e-06, "loss": 0.3911, "step": 5268 }, { "epoch": 0.21874529706952295, "grad_norm": 3.197650194168091, "learning_rate": 9.094603016438029e-06, "loss": 0.6182, "step": 5269 }, { "epoch": 0.21878681259373428, "grad_norm": 2.228928565979004, "learning_rate": 9.094217133363211e-06, "loss": 0.4997, "step": 5270 }, { "epoch": 0.21882832811794561, "grad_norm": 2.9527547359466553, "learning_rate": 9.093831176263769e-06, "loss": 0.6039, "step": 5271 }, { "epoch": 0.21886984364215695, "grad_norm": 2.7138190269470215, "learning_rate": 9.09344514514668e-06, "loss": 0.5631, "step": 5272 }, { "epoch": 0.21891135916636828, "grad_norm": 2.637389659881592, "learning_rate": 9.093059040018921e-06, "loss": 0.5373, "step": 5273 }, { "epoch": 0.2189528746905796, "grad_norm": 2.653695583343506, "learning_rate": 9.092672860887476e-06, "loss": 0.6025, "step": 5274 }, { "epoch": 0.21899439021479095, "grad_norm": 2.293489694595337, "learning_rate": 9.092286607759326e-06, "loss": 0.5726, "step": 5275 }, { "epoch": 0.21903590573900228, "grad_norm": 2.424067497253418, "learning_rate": 9.091900280641455e-06, "loss": 0.5014, "step": 5276 }, { "epoch": 0.2190774212632136, "grad_norm": 2.577306032180786, "learning_rate": 9.091513879540845e-06, "loss": 0.5489, "step": 5277 }, { "epoch": 0.21911893678742494, "grad_norm": 2.721353769302368, "learning_rate": 9.091127404464487e-06, "loss": 0.4923, "step": 5278 }, { "epoch": 0.21916045231163628, "grad_norm": 2.6695902347564697, "learning_rate": 9.090740855419367e-06, "loss": 0.599, "step": 5279 }, { "epoch": 0.2192019678358476, "grad_norm": 2.768845558166504, "learning_rate": 9.090354232412472e-06, "loss": 0.5962, "step": 5280 }, { "epoch": 0.21924348336005894, "grad_norm": 2.681735038757324, "learning_rate": 9.089967535450794e-06, "loss": 0.4729, "step": 5281 }, { "epoch": 0.21928499888427028, "grad_norm": 2.836458444595337, "learning_rate": 9.089580764541324e-06, "loss": 0.4851, "step": 5282 }, { "epoch": 0.2193265144084816, "grad_norm": 2.675086736679077, "learning_rate": 9.089193919691056e-06, "loss": 0.455, "step": 5283 }, { "epoch": 0.21936802993269294, "grad_norm": 2.724562644958496, "learning_rate": 9.08880700090698e-06, "loss": 0.4834, "step": 5284 }, { "epoch": 0.2194095454569043, "grad_norm": 2.423021078109741, "learning_rate": 9.0884200081961e-06, "loss": 0.4246, "step": 5285 }, { "epoch": 0.21945106098111564, "grad_norm": 2.2178778648376465, "learning_rate": 9.088032941565403e-06, "loss": 0.4506, "step": 5286 }, { "epoch": 0.21949257650532697, "grad_norm": 2.7082645893096924, "learning_rate": 9.087645801021895e-06, "loss": 0.6004, "step": 5287 }, { "epoch": 0.2195340920295383, "grad_norm": 2.9799718856811523, "learning_rate": 9.087258586572571e-06, "loss": 0.5208, "step": 5288 }, { "epoch": 0.21957560755374964, "grad_norm": 2.5828497409820557, "learning_rate": 9.086871298224434e-06, "loss": 0.5251, "step": 5289 }, { "epoch": 0.21961712307796097, "grad_norm": 3.4194071292877197, "learning_rate": 9.086483935984487e-06, "loss": 0.456, "step": 5290 }, { "epoch": 0.2196586386021723, "grad_norm": 2.4210798740386963, "learning_rate": 9.086096499859733e-06, "loss": 0.4934, "step": 5291 }, { "epoch": 0.21970015412638363, "grad_norm": 2.6644845008850098, "learning_rate": 9.085708989857177e-06, "loss": 0.6316, "step": 5292 }, { "epoch": 0.21974166965059497, "grad_norm": 2.828603982925415, "learning_rate": 9.085321405983822e-06, "loss": 0.4284, "step": 5293 }, { "epoch": 0.2197831851748063, "grad_norm": 2.2609405517578125, "learning_rate": 9.08493374824668e-06, "loss": 0.3337, "step": 5294 }, { "epoch": 0.21982470069901763, "grad_norm": 2.2879538536071777, "learning_rate": 9.084546016652758e-06, "loss": 0.5083, "step": 5295 }, { "epoch": 0.21986621622322897, "grad_norm": 2.3650963306427, "learning_rate": 9.084158211209067e-06, "loss": 0.545, "step": 5296 }, { "epoch": 0.2199077317474403, "grad_norm": 2.5270891189575195, "learning_rate": 9.083770331922619e-06, "loss": 0.5452, "step": 5297 }, { "epoch": 0.21994924727165163, "grad_norm": 2.4707629680633545, "learning_rate": 9.083382378800424e-06, "loss": 0.5607, "step": 5298 }, { "epoch": 0.21999076279586297, "grad_norm": 2.5524072647094727, "learning_rate": 9.082994351849502e-06, "loss": 0.5909, "step": 5299 }, { "epoch": 0.22003227832007433, "grad_norm": 2.764472007751465, "learning_rate": 9.082606251076864e-06, "loss": 0.4809, "step": 5300 }, { "epoch": 0.22007379384428566, "grad_norm": 2.683091402053833, "learning_rate": 9.082218076489527e-06, "loss": 0.6226, "step": 5301 }, { "epoch": 0.220115309368497, "grad_norm": 2.762366771697998, "learning_rate": 9.081829828094511e-06, "loss": 0.4156, "step": 5302 }, { "epoch": 0.22015682489270832, "grad_norm": 2.349641799926758, "learning_rate": 9.081441505898837e-06, "loss": 0.4748, "step": 5303 }, { "epoch": 0.22019834041691966, "grad_norm": 2.6643755435943604, "learning_rate": 9.081053109909523e-06, "loss": 0.6637, "step": 5304 }, { "epoch": 0.220239855941131, "grad_norm": 2.7174465656280518, "learning_rate": 9.080664640133592e-06, "loss": 0.652, "step": 5305 }, { "epoch": 0.22028137146534232, "grad_norm": 2.2099316120147705, "learning_rate": 9.080276096578068e-06, "loss": 0.4836, "step": 5306 }, { "epoch": 0.22032288698955366, "grad_norm": 2.529557228088379, "learning_rate": 9.079887479249976e-06, "loss": 0.4869, "step": 5307 }, { "epoch": 0.220364402513765, "grad_norm": 2.5176467895507812, "learning_rate": 9.079498788156344e-06, "loss": 0.5623, "step": 5308 }, { "epoch": 0.22040591803797632, "grad_norm": 2.3159546852111816, "learning_rate": 9.079110023304195e-06, "loss": 0.4672, "step": 5309 }, { "epoch": 0.22044743356218766, "grad_norm": 2.4736781120300293, "learning_rate": 9.078721184700565e-06, "loss": 0.4832, "step": 5310 }, { "epoch": 0.220488949086399, "grad_norm": 3.6167664527893066, "learning_rate": 9.078332272352479e-06, "loss": 0.4689, "step": 5311 }, { "epoch": 0.22053046461061032, "grad_norm": 2.49702525138855, "learning_rate": 9.07794328626697e-06, "loss": 0.3877, "step": 5312 }, { "epoch": 0.22057198013482165, "grad_norm": 2.397815704345703, "learning_rate": 9.07755422645107e-06, "loss": 0.4798, "step": 5313 }, { "epoch": 0.220613495659033, "grad_norm": 2.782665967941284, "learning_rate": 9.077165092911814e-06, "loss": 0.4424, "step": 5314 }, { "epoch": 0.22065501118324432, "grad_norm": 3.477250814437866, "learning_rate": 9.07677588565624e-06, "loss": 0.5837, "step": 5315 }, { "epoch": 0.22069652670745568, "grad_norm": 2.226574182510376, "learning_rate": 9.076386604691381e-06, "loss": 0.4433, "step": 5316 }, { "epoch": 0.22073804223166701, "grad_norm": 2.1447336673736572, "learning_rate": 9.075997250024278e-06, "loss": 0.5026, "step": 5317 }, { "epoch": 0.22077955775587835, "grad_norm": 2.486107349395752, "learning_rate": 9.075607821661968e-06, "loss": 0.4493, "step": 5318 }, { "epoch": 0.22082107328008968, "grad_norm": 2.66298246383667, "learning_rate": 9.075218319611498e-06, "loss": 0.4918, "step": 5319 }, { "epoch": 0.220862588804301, "grad_norm": 2.7193663120269775, "learning_rate": 9.074828743879903e-06, "loss": 0.4993, "step": 5320 }, { "epoch": 0.22090410432851235, "grad_norm": 2.5842134952545166, "learning_rate": 9.07443909447423e-06, "loss": 0.4186, "step": 5321 }, { "epoch": 0.22094561985272368, "grad_norm": 2.772719621658325, "learning_rate": 9.074049371401525e-06, "loss": 0.519, "step": 5322 }, { "epoch": 0.220987135376935, "grad_norm": 2.463780164718628, "learning_rate": 9.073659574668833e-06, "loss": 0.5429, "step": 5323 }, { "epoch": 0.22102865090114635, "grad_norm": 2.850781202316284, "learning_rate": 9.073269704283201e-06, "loss": 0.7194, "step": 5324 }, { "epoch": 0.22107016642535768, "grad_norm": 2.848637342453003, "learning_rate": 9.07287976025168e-06, "loss": 0.546, "step": 5325 }, { "epoch": 0.221111681949569, "grad_norm": 2.793590545654297, "learning_rate": 9.072489742581318e-06, "loss": 0.4564, "step": 5326 }, { "epoch": 0.22115319747378034, "grad_norm": 2.4969286918640137, "learning_rate": 9.072099651279167e-06, "loss": 0.5253, "step": 5327 }, { "epoch": 0.22119471299799168, "grad_norm": 2.672985553741455, "learning_rate": 9.07170948635228e-06, "loss": 0.487, "step": 5328 }, { "epoch": 0.221236228522203, "grad_norm": 2.3545548915863037, "learning_rate": 9.071319247807714e-06, "loss": 0.5865, "step": 5329 }, { "epoch": 0.22127774404641434, "grad_norm": 2.350715398788452, "learning_rate": 9.070928935652522e-06, "loss": 0.5429, "step": 5330 }, { "epoch": 0.2213192595706257, "grad_norm": 2.434234142303467, "learning_rate": 9.070538549893762e-06, "loss": 0.375, "step": 5331 }, { "epoch": 0.22136077509483704, "grad_norm": 2.547376871109009, "learning_rate": 9.07014809053849e-06, "loss": 0.7245, "step": 5332 }, { "epoch": 0.22140229061904837, "grad_norm": 2.311295986175537, "learning_rate": 9.069757557593768e-06, "loss": 0.5561, "step": 5333 }, { "epoch": 0.2214438061432597, "grad_norm": 2.650742530822754, "learning_rate": 9.069366951066657e-06, "loss": 0.4242, "step": 5334 }, { "epoch": 0.22148532166747104, "grad_norm": 2.1912174224853516, "learning_rate": 9.068976270964219e-06, "loss": 0.4636, "step": 5335 }, { "epoch": 0.22152683719168237, "grad_norm": 3.034909725189209, "learning_rate": 9.068585517293516e-06, "loss": 0.6533, "step": 5336 }, { "epoch": 0.2215683527158937, "grad_norm": 2.728755474090576, "learning_rate": 9.068194690061614e-06, "loss": 0.4292, "step": 5337 }, { "epoch": 0.22160986824010503, "grad_norm": 2.9468812942504883, "learning_rate": 9.067803789275581e-06, "loss": 0.4295, "step": 5338 }, { "epoch": 0.22165138376431637, "grad_norm": 2.4717257022857666, "learning_rate": 9.067412814942482e-06, "loss": 0.4765, "step": 5339 }, { "epoch": 0.2216928992885277, "grad_norm": 2.611724615097046, "learning_rate": 9.067021767069387e-06, "loss": 0.6, "step": 5340 }, { "epoch": 0.22173441481273903, "grad_norm": 2.14572811126709, "learning_rate": 9.066630645663366e-06, "loss": 0.4429, "step": 5341 }, { "epoch": 0.22177593033695037, "grad_norm": 2.5921623706817627, "learning_rate": 9.066239450731491e-06, "loss": 0.6207, "step": 5342 }, { "epoch": 0.2218174458611617, "grad_norm": 2.302522659301758, "learning_rate": 9.065848182280835e-06, "loss": 0.595, "step": 5343 }, { "epoch": 0.22185896138537303, "grad_norm": 2.537796974182129, "learning_rate": 9.065456840318473e-06, "loss": 0.5248, "step": 5344 }, { "epoch": 0.22190047690958437, "grad_norm": 2.808063268661499, "learning_rate": 9.065065424851475e-06, "loss": 0.5328, "step": 5345 }, { "epoch": 0.22194199243379573, "grad_norm": 2.642932891845703, "learning_rate": 9.064673935886928e-06, "loss": 0.4052, "step": 5346 }, { "epoch": 0.22198350795800706, "grad_norm": 2.4490301609039307, "learning_rate": 9.064282373431901e-06, "loss": 0.5155, "step": 5347 }, { "epoch": 0.2220250234822184, "grad_norm": 2.976954936981201, "learning_rate": 9.06389073749348e-06, "loss": 0.627, "step": 5348 }, { "epoch": 0.22206653900642973, "grad_norm": 2.0764598846435547, "learning_rate": 9.063499028078742e-06, "loss": 0.3595, "step": 5349 }, { "epoch": 0.22210805453064106, "grad_norm": 2.8267242908477783, "learning_rate": 9.063107245194768e-06, "loss": 0.5061, "step": 5350 }, { "epoch": 0.2221495700548524, "grad_norm": 2.817601442337036, "learning_rate": 9.062715388848647e-06, "loss": 0.6016, "step": 5351 }, { "epoch": 0.22219108557906372, "grad_norm": 2.550950765609741, "learning_rate": 9.06232345904746e-06, "loss": 0.4956, "step": 5352 }, { "epoch": 0.22223260110327506, "grad_norm": 2.5274322032928467, "learning_rate": 9.061931455798294e-06, "loss": 0.5835, "step": 5353 }, { "epoch": 0.2222741166274864, "grad_norm": 3.0246169567108154, "learning_rate": 9.061539379108235e-06, "loss": 0.5206, "step": 5354 }, { "epoch": 0.22231563215169772, "grad_norm": 10.41424560546875, "learning_rate": 9.061147228984375e-06, "loss": 0.6797, "step": 5355 }, { "epoch": 0.22235714767590906, "grad_norm": 2.765113353729248, "learning_rate": 9.060755005433803e-06, "loss": 0.6065, "step": 5356 }, { "epoch": 0.2223986632001204, "grad_norm": 2.352743148803711, "learning_rate": 9.060362708463609e-06, "loss": 0.6117, "step": 5357 }, { "epoch": 0.22244017872433172, "grad_norm": 2.1726534366607666, "learning_rate": 9.059970338080886e-06, "loss": 0.4389, "step": 5358 }, { "epoch": 0.22248169424854305, "grad_norm": 2.4263434410095215, "learning_rate": 9.05957789429273e-06, "loss": 0.4385, "step": 5359 }, { "epoch": 0.2225232097727544, "grad_norm": 2.589167356491089, "learning_rate": 9.059185377106236e-06, "loss": 0.5306, "step": 5360 }, { "epoch": 0.22256472529696572, "grad_norm": 2.750469207763672, "learning_rate": 9.058792786528501e-06, "loss": 0.4643, "step": 5361 }, { "epoch": 0.22260624082117708, "grad_norm": 2.5995020866394043, "learning_rate": 9.058400122566622e-06, "loss": 0.5337, "step": 5362 }, { "epoch": 0.22264775634538841, "grad_norm": 3.1313936710357666, "learning_rate": 9.058007385227699e-06, "loss": 0.4898, "step": 5363 }, { "epoch": 0.22268927186959975, "grad_norm": 2.528249979019165, "learning_rate": 9.057614574518832e-06, "loss": 0.599, "step": 5364 }, { "epoch": 0.22273078739381108, "grad_norm": 2.329169511795044, "learning_rate": 9.057221690447124e-06, "loss": 0.5904, "step": 5365 }, { "epoch": 0.2227723029180224, "grad_norm": 2.7568705081939697, "learning_rate": 9.05682873301968e-06, "loss": 0.4602, "step": 5366 }, { "epoch": 0.22281381844223375, "grad_norm": 2.4446005821228027, "learning_rate": 9.056435702243601e-06, "loss": 0.428, "step": 5367 }, { "epoch": 0.22285533396644508, "grad_norm": 2.431732416152954, "learning_rate": 9.056042598125996e-06, "loss": 0.4303, "step": 5368 }, { "epoch": 0.2228968494906564, "grad_norm": 2.3618767261505127, "learning_rate": 9.055649420673973e-06, "loss": 0.4847, "step": 5369 }, { "epoch": 0.22293836501486775, "grad_norm": 2.054255723953247, "learning_rate": 9.055256169894639e-06, "loss": 0.4579, "step": 5370 }, { "epoch": 0.22297988053907908, "grad_norm": 2.4481542110443115, "learning_rate": 9.054862845795103e-06, "loss": 0.5885, "step": 5371 }, { "epoch": 0.2230213960632904, "grad_norm": 2.686518907546997, "learning_rate": 9.054469448382479e-06, "loss": 0.4421, "step": 5372 }, { "epoch": 0.22306291158750174, "grad_norm": 2.704922676086426, "learning_rate": 9.05407597766388e-06, "loss": 0.5825, "step": 5373 }, { "epoch": 0.22310442711171308, "grad_norm": 2.984665870666504, "learning_rate": 9.053682433646419e-06, "loss": 0.5867, "step": 5374 }, { "epoch": 0.2231459426359244, "grad_norm": 2.773817539215088, "learning_rate": 9.05328881633721e-06, "loss": 0.4457, "step": 5375 }, { "epoch": 0.22318745816013574, "grad_norm": 2.759740114212036, "learning_rate": 9.05289512574337e-06, "loss": 0.4535, "step": 5376 }, { "epoch": 0.2232289736843471, "grad_norm": 2.657278299331665, "learning_rate": 9.052501361872019e-06, "loss": 0.5179, "step": 5377 }, { "epoch": 0.22327048920855844, "grad_norm": 3.9424469470977783, "learning_rate": 9.052107524730275e-06, "loss": 0.5863, "step": 5378 }, { "epoch": 0.22331200473276977, "grad_norm": 2.6865170001983643, "learning_rate": 9.051713614325259e-06, "loss": 0.5927, "step": 5379 }, { "epoch": 0.2233535202569811, "grad_norm": 3.472860813140869, "learning_rate": 9.051319630664094e-06, "loss": 0.5031, "step": 5380 }, { "epoch": 0.22339503578119244, "grad_norm": 2.6725218296051025, "learning_rate": 9.050925573753901e-06, "loss": 0.5494, "step": 5381 }, { "epoch": 0.22343655130540377, "grad_norm": 2.720346212387085, "learning_rate": 9.050531443601807e-06, "loss": 0.6766, "step": 5382 }, { "epoch": 0.2234780668296151, "grad_norm": 2.5191874504089355, "learning_rate": 9.050137240214937e-06, "loss": 0.3884, "step": 5383 }, { "epoch": 0.22351958235382643, "grad_norm": 2.479715347290039, "learning_rate": 9.04974296360042e-06, "loss": 0.4929, "step": 5384 }, { "epoch": 0.22356109787803777, "grad_norm": 2.556554079055786, "learning_rate": 9.049348613765379e-06, "loss": 0.4725, "step": 5385 }, { "epoch": 0.2236026134022491, "grad_norm": 2.0037448406219482, "learning_rate": 9.04895419071695e-06, "loss": 0.5563, "step": 5386 }, { "epoch": 0.22364412892646043, "grad_norm": 2.5966928005218506, "learning_rate": 9.048559694462262e-06, "loss": 0.547, "step": 5387 }, { "epoch": 0.22368564445067177, "grad_norm": 2.5879907608032227, "learning_rate": 9.048165125008448e-06, "loss": 0.5738, "step": 5388 }, { "epoch": 0.2237271599748831, "grad_norm": 2.566655158996582, "learning_rate": 9.04777048236264e-06, "loss": 0.5412, "step": 5389 }, { "epoch": 0.22376867549909443, "grad_norm": 2.4741666316986084, "learning_rate": 9.047375766531979e-06, "loss": 0.5088, "step": 5390 }, { "epoch": 0.22381019102330577, "grad_norm": 2.628032684326172, "learning_rate": 9.046980977523594e-06, "loss": 0.5626, "step": 5391 }, { "epoch": 0.2238517065475171, "grad_norm": 2.155379295349121, "learning_rate": 9.046586115344628e-06, "loss": 0.548, "step": 5392 }, { "epoch": 0.22389322207172846, "grad_norm": 2.663362741470337, "learning_rate": 9.046191180002218e-06, "loss": 0.7084, "step": 5393 }, { "epoch": 0.2239347375959398, "grad_norm": 2.3869621753692627, "learning_rate": 9.045796171503506e-06, "loss": 0.6102, "step": 5394 }, { "epoch": 0.22397625312015113, "grad_norm": 2.3119637966156006, "learning_rate": 9.045401089855633e-06, "loss": 0.5757, "step": 5395 }, { "epoch": 0.22401776864436246, "grad_norm": 2.2893831729888916, "learning_rate": 9.04500593506574e-06, "loss": 0.5352, "step": 5396 }, { "epoch": 0.2240592841685738, "grad_norm": 2.6506898403167725, "learning_rate": 9.044610707140977e-06, "loss": 0.5387, "step": 5397 }, { "epoch": 0.22410079969278512, "grad_norm": 2.8461060523986816, "learning_rate": 9.044215406088486e-06, "loss": 0.6925, "step": 5398 }, { "epoch": 0.22414231521699646, "grad_norm": 2.3667638301849365, "learning_rate": 9.043820031915413e-06, "loss": 0.5813, "step": 5399 }, { "epoch": 0.2241838307412078, "grad_norm": 2.9138729572296143, "learning_rate": 9.043424584628911e-06, "loss": 0.5398, "step": 5400 }, { "epoch": 0.22422534626541912, "grad_norm": 2.981569290161133, "learning_rate": 9.043029064236125e-06, "loss": 0.5899, "step": 5401 }, { "epoch": 0.22426686178963046, "grad_norm": 3.6454217433929443, "learning_rate": 9.04263347074421e-06, "loss": 0.4235, "step": 5402 }, { "epoch": 0.2243083773138418, "grad_norm": 2.5841798782348633, "learning_rate": 9.042237804160313e-06, "loss": 0.5486, "step": 5403 }, { "epoch": 0.22434989283805312, "grad_norm": 2.7808289527893066, "learning_rate": 9.041842064491593e-06, "loss": 0.5147, "step": 5404 }, { "epoch": 0.22439140836226446, "grad_norm": 2.4742395877838135, "learning_rate": 9.041446251745206e-06, "loss": 0.3845, "step": 5405 }, { "epoch": 0.2244329238864758, "grad_norm": 2.4171411991119385, "learning_rate": 9.041050365928303e-06, "loss": 0.5907, "step": 5406 }, { "epoch": 0.22447443941068712, "grad_norm": 2.533630609512329, "learning_rate": 9.040654407048046e-06, "loss": 0.4424, "step": 5407 }, { "epoch": 0.22451595493489848, "grad_norm": 2.887320041656494, "learning_rate": 9.040258375111592e-06, "loss": 0.6511, "step": 5408 }, { "epoch": 0.22455747045910981, "grad_norm": 2.896256923675537, "learning_rate": 9.039862270126102e-06, "loss": 0.5012, "step": 5409 }, { "epoch": 0.22459898598332115, "grad_norm": 3.134650230407715, "learning_rate": 9.039466092098738e-06, "loss": 0.5929, "step": 5410 }, { "epoch": 0.22464050150753248, "grad_norm": 2.2638752460479736, "learning_rate": 9.039069841036664e-06, "loss": 0.4156, "step": 5411 }, { "epoch": 0.2246820170317438, "grad_norm": 2.332085132598877, "learning_rate": 9.038673516947042e-06, "loss": 0.4836, "step": 5412 }, { "epoch": 0.22472353255595515, "grad_norm": 2.3651793003082275, "learning_rate": 9.038277119837039e-06, "loss": 0.5171, "step": 5413 }, { "epoch": 0.22476504808016648, "grad_norm": 2.6350111961364746, "learning_rate": 9.03788064971382e-06, "loss": 0.4357, "step": 5414 }, { "epoch": 0.2248065636043778, "grad_norm": 2.597756862640381, "learning_rate": 9.037484106584557e-06, "loss": 0.5294, "step": 5415 }, { "epoch": 0.22484807912858915, "grad_norm": 2.858628273010254, "learning_rate": 9.037087490456418e-06, "loss": 0.4973, "step": 5416 }, { "epoch": 0.22488959465280048, "grad_norm": 2.7128422260284424, "learning_rate": 9.036690801336572e-06, "loss": 0.6091, "step": 5417 }, { "epoch": 0.2249311101770118, "grad_norm": 2.8401947021484375, "learning_rate": 9.036294039232195e-06, "loss": 0.7046, "step": 5418 }, { "epoch": 0.22497262570122314, "grad_norm": 2.350882053375244, "learning_rate": 9.035897204150457e-06, "loss": 0.4174, "step": 5419 }, { "epoch": 0.22501414122543448, "grad_norm": 2.7023301124572754, "learning_rate": 9.035500296098534e-06, "loss": 0.5155, "step": 5420 }, { "epoch": 0.2250556567496458, "grad_norm": 2.473611354827881, "learning_rate": 9.035103315083603e-06, "loss": 0.5806, "step": 5421 }, { "epoch": 0.22509717227385714, "grad_norm": 2.2554426193237305, "learning_rate": 9.03470626111284e-06, "loss": 0.3231, "step": 5422 }, { "epoch": 0.22513868779806848, "grad_norm": 2.660898447036743, "learning_rate": 9.034309134193425e-06, "loss": 0.6545, "step": 5423 }, { "epoch": 0.22518020332227984, "grad_norm": 2.7212729454040527, "learning_rate": 9.033911934332539e-06, "loss": 0.6042, "step": 5424 }, { "epoch": 0.22522171884649117, "grad_norm": 2.905985116958618, "learning_rate": 9.03351466153736e-06, "loss": 0.4889, "step": 5425 }, { "epoch": 0.2252632343707025, "grad_norm": 2.6217243671417236, "learning_rate": 9.033117315815077e-06, "loss": 0.6682, "step": 5426 }, { "epoch": 0.22530474989491384, "grad_norm": 3.007572650909424, "learning_rate": 9.03271989717287e-06, "loss": 0.6776, "step": 5427 }, { "epoch": 0.22534626541912517, "grad_norm": 2.2341039180755615, "learning_rate": 9.032322405617924e-06, "loss": 0.5905, "step": 5428 }, { "epoch": 0.2253877809433365, "grad_norm": 2.5338127613067627, "learning_rate": 9.031924841157425e-06, "loss": 0.6935, "step": 5429 }, { "epoch": 0.22542929646754784, "grad_norm": 2.8580715656280518, "learning_rate": 9.031527203798565e-06, "loss": 0.5165, "step": 5430 }, { "epoch": 0.22547081199175917, "grad_norm": 3.447645664215088, "learning_rate": 9.031129493548531e-06, "loss": 0.5154, "step": 5431 }, { "epoch": 0.2255123275159705, "grad_norm": 1.9546058177947998, "learning_rate": 9.030731710414513e-06, "loss": 0.3639, "step": 5432 }, { "epoch": 0.22555384304018183, "grad_norm": 2.6406428813934326, "learning_rate": 9.030333854403703e-06, "loss": 0.5485, "step": 5433 }, { "epoch": 0.22559535856439317, "grad_norm": 2.2709617614746094, "learning_rate": 9.029935925523298e-06, "loss": 0.5101, "step": 5434 }, { "epoch": 0.2256368740886045, "grad_norm": 2.3501219749450684, "learning_rate": 9.029537923780487e-06, "loss": 0.6018, "step": 5435 }, { "epoch": 0.22567838961281583, "grad_norm": 2.2705752849578857, "learning_rate": 9.029139849182471e-06, "loss": 0.4764, "step": 5436 }, { "epoch": 0.22571990513702717, "grad_norm": 2.044116258621216, "learning_rate": 9.028741701736444e-06, "loss": 0.5229, "step": 5437 }, { "epoch": 0.2257614206612385, "grad_norm": 2.567833423614502, "learning_rate": 9.028343481449606e-06, "loss": 0.5566, "step": 5438 }, { "epoch": 0.22580293618544986, "grad_norm": 2.385179281234741, "learning_rate": 9.027945188329157e-06, "loss": 0.4436, "step": 5439 }, { "epoch": 0.2258444517096612, "grad_norm": 2.51175856590271, "learning_rate": 9.027546822382298e-06, "loss": 0.6591, "step": 5440 }, { "epoch": 0.22588596723387253, "grad_norm": 2.423414468765259, "learning_rate": 9.027148383616233e-06, "loss": 0.5151, "step": 5441 }, { "epoch": 0.22592748275808386, "grad_norm": 2.733414649963379, "learning_rate": 9.026749872038161e-06, "loss": 0.4627, "step": 5442 }, { "epoch": 0.2259689982822952, "grad_norm": 2.5766313076019287, "learning_rate": 9.026351287655294e-06, "loss": 0.5048, "step": 5443 }, { "epoch": 0.22601051380650652, "grad_norm": 2.8000264167785645, "learning_rate": 9.025952630474834e-06, "loss": 0.441, "step": 5444 }, { "epoch": 0.22605202933071786, "grad_norm": 2.3633434772491455, "learning_rate": 9.02555390050399e-06, "loss": 0.4155, "step": 5445 }, { "epoch": 0.2260935448549292, "grad_norm": 2.691927671432495, "learning_rate": 9.025155097749972e-06, "loss": 0.4964, "step": 5446 }, { "epoch": 0.22613506037914052, "grad_norm": 2.3896546363830566, "learning_rate": 9.024756222219988e-06, "loss": 0.5898, "step": 5447 }, { "epoch": 0.22617657590335186, "grad_norm": 2.404576539993286, "learning_rate": 9.024357273921252e-06, "loss": 0.4568, "step": 5448 }, { "epoch": 0.2262180914275632, "grad_norm": 2.493891954421997, "learning_rate": 9.023958252860975e-06, "loss": 0.4581, "step": 5449 }, { "epoch": 0.22625960695177452, "grad_norm": 2.9130005836486816, "learning_rate": 9.023559159046376e-06, "loss": 0.4999, "step": 5450 }, { "epoch": 0.22630112247598586, "grad_norm": 2.744680643081665, "learning_rate": 9.023159992484667e-06, "loss": 0.5951, "step": 5451 }, { "epoch": 0.2263426380001972, "grad_norm": 2.470076084136963, "learning_rate": 9.022760753183065e-06, "loss": 0.5373, "step": 5452 }, { "epoch": 0.22638415352440852, "grad_norm": 2.5571491718292236, "learning_rate": 9.022361441148791e-06, "loss": 0.4767, "step": 5453 }, { "epoch": 0.22642566904861988, "grad_norm": 2.556976079940796, "learning_rate": 9.021962056389059e-06, "loss": 0.5383, "step": 5454 }, { "epoch": 0.22646718457283121, "grad_norm": 2.747816324234009, "learning_rate": 9.021562598911096e-06, "loss": 0.6105, "step": 5455 }, { "epoch": 0.22650870009704255, "grad_norm": 2.08141827583313, "learning_rate": 9.021163068722122e-06, "loss": 0.4039, "step": 5456 }, { "epoch": 0.22655021562125388, "grad_norm": 2.5124223232269287, "learning_rate": 9.020763465829361e-06, "loss": 0.5897, "step": 5457 }, { "epoch": 0.22659173114546521, "grad_norm": 2.9416754245758057, "learning_rate": 9.020363790240037e-06, "loss": 0.4568, "step": 5458 }, { "epoch": 0.22663324666967655, "grad_norm": 2.895925760269165, "learning_rate": 9.019964041961377e-06, "loss": 0.5024, "step": 5459 }, { "epoch": 0.22667476219388788, "grad_norm": 2.5385019779205322, "learning_rate": 9.01956422100061e-06, "loss": 0.4957, "step": 5460 }, { "epoch": 0.2267162777180992, "grad_norm": 2.5633859634399414, "learning_rate": 9.019164327364961e-06, "loss": 0.5094, "step": 5461 }, { "epoch": 0.22675779324231055, "grad_norm": 2.851754903793335, "learning_rate": 9.018764361061662e-06, "loss": 0.6115, "step": 5462 }, { "epoch": 0.22679930876652188, "grad_norm": 3.1068363189697266, "learning_rate": 9.018364322097947e-06, "loss": 0.5124, "step": 5463 }, { "epoch": 0.2268408242907332, "grad_norm": 3.8249189853668213, "learning_rate": 9.017964210481047e-06, "loss": 0.5505, "step": 5464 }, { "epoch": 0.22688233981494454, "grad_norm": 2.5078396797180176, "learning_rate": 9.017564026218195e-06, "loss": 0.6902, "step": 5465 }, { "epoch": 0.22692385533915588, "grad_norm": 2.4045298099517822, "learning_rate": 9.017163769316629e-06, "loss": 0.415, "step": 5466 }, { "epoch": 0.2269653708633672, "grad_norm": 2.8266942501068115, "learning_rate": 9.016763439783582e-06, "loss": 0.5855, "step": 5467 }, { "epoch": 0.22700688638757854, "grad_norm": 2.450491428375244, "learning_rate": 9.016363037626294e-06, "loss": 0.4489, "step": 5468 }, { "epoch": 0.22704840191178988, "grad_norm": 2.6037662029266357, "learning_rate": 9.015962562852007e-06, "loss": 0.5937, "step": 5469 }, { "epoch": 0.22708991743600124, "grad_norm": 2.271298885345459, "learning_rate": 9.015562015467958e-06, "loss": 0.4862, "step": 5470 }, { "epoch": 0.22713143296021257, "grad_norm": 2.6395068168640137, "learning_rate": 9.01516139548139e-06, "loss": 0.5628, "step": 5471 }, { "epoch": 0.2271729484844239, "grad_norm": 2.4987399578094482, "learning_rate": 9.014760702899547e-06, "loss": 0.5448, "step": 5472 }, { "epoch": 0.22721446400863524, "grad_norm": 2.2290053367614746, "learning_rate": 9.014359937729673e-06, "loss": 0.5265, "step": 5473 }, { "epoch": 0.22725597953284657, "grad_norm": 2.489748477935791, "learning_rate": 9.013959099979015e-06, "loss": 0.5835, "step": 5474 }, { "epoch": 0.2272974950570579, "grad_norm": 2.72045636177063, "learning_rate": 9.013558189654819e-06, "loss": 0.586, "step": 5475 }, { "epoch": 0.22733901058126924, "grad_norm": 2.527756929397583, "learning_rate": 9.013157206764333e-06, "loss": 0.4774, "step": 5476 }, { "epoch": 0.22738052610548057, "grad_norm": 3.5669119358062744, "learning_rate": 9.01275615131481e-06, "loss": 0.6281, "step": 5477 }, { "epoch": 0.2274220416296919, "grad_norm": 2.4406416416168213, "learning_rate": 9.0123550233135e-06, "loss": 0.4175, "step": 5478 }, { "epoch": 0.22746355715390323, "grad_norm": 2.2595486640930176, "learning_rate": 9.011953822767651e-06, "loss": 0.5685, "step": 5479 }, { "epoch": 0.22750507267811457, "grad_norm": 2.3339359760284424, "learning_rate": 9.011552549684524e-06, "loss": 0.5845, "step": 5480 }, { "epoch": 0.2275465882023259, "grad_norm": 2.231550931930542, "learning_rate": 9.011151204071369e-06, "loss": 0.461, "step": 5481 }, { "epoch": 0.22758810372653723, "grad_norm": 2.777709484100342, "learning_rate": 9.010749785935445e-06, "loss": 0.6168, "step": 5482 }, { "epoch": 0.22762961925074857, "grad_norm": 2.6890342235565186, "learning_rate": 9.010348295284009e-06, "loss": 0.4745, "step": 5483 }, { "epoch": 0.2276711347749599, "grad_norm": 2.724592685699463, "learning_rate": 9.009946732124318e-06, "loss": 0.694, "step": 5484 }, { "epoch": 0.22771265029917126, "grad_norm": 3.951160430908203, "learning_rate": 9.009545096463636e-06, "loss": 0.4519, "step": 5485 }, { "epoch": 0.2277541658233826, "grad_norm": 2.6512982845306396, "learning_rate": 9.009143388309223e-06, "loss": 0.4623, "step": 5486 }, { "epoch": 0.22779568134759393, "grad_norm": 2.4659032821655273, "learning_rate": 9.00874160766834e-06, "loss": 0.5337, "step": 5487 }, { "epoch": 0.22783719687180526, "grad_norm": 2.6375210285186768, "learning_rate": 9.008339754548255e-06, "loss": 0.574, "step": 5488 }, { "epoch": 0.2278787123960166, "grad_norm": 2.378819704055786, "learning_rate": 9.007937828956231e-06, "loss": 0.3872, "step": 5489 }, { "epoch": 0.22792022792022792, "grad_norm": 2.797074317932129, "learning_rate": 9.007535830899537e-06, "loss": 0.5685, "step": 5490 }, { "epoch": 0.22796174344443926, "grad_norm": 2.553729295730591, "learning_rate": 9.00713376038544e-06, "loss": 0.5849, "step": 5491 }, { "epoch": 0.2280032589686506, "grad_norm": 2.7068355083465576, "learning_rate": 9.006731617421209e-06, "loss": 0.4497, "step": 5492 }, { "epoch": 0.22804477449286192, "grad_norm": 2.3301784992218018, "learning_rate": 9.006329402014115e-06, "loss": 0.4341, "step": 5493 }, { "epoch": 0.22808629001707326, "grad_norm": 3.1787524223327637, "learning_rate": 9.00592711417143e-06, "loss": 0.5368, "step": 5494 }, { "epoch": 0.2281278055412846, "grad_norm": 2.3246936798095703, "learning_rate": 9.005524753900432e-06, "loss": 0.5574, "step": 5495 }, { "epoch": 0.22816932106549592, "grad_norm": 2.1742312908172607, "learning_rate": 9.005122321208388e-06, "loss": 0.5187, "step": 5496 }, { "epoch": 0.22821083658970726, "grad_norm": 2.6560144424438477, "learning_rate": 9.004719816102581e-06, "loss": 0.4792, "step": 5497 }, { "epoch": 0.2282523521139186, "grad_norm": 2.6042168140411377, "learning_rate": 9.004317238590283e-06, "loss": 0.5437, "step": 5498 }, { "epoch": 0.22829386763812992, "grad_norm": 2.606167793273926, "learning_rate": 9.003914588678776e-06, "loss": 0.4195, "step": 5499 }, { "epoch": 0.22833538316234125, "grad_norm": 3.0228769779205322, "learning_rate": 9.003511866375339e-06, "loss": 0.6365, "step": 5500 }, { "epoch": 0.22837689868655262, "grad_norm": 2.349439859390259, "learning_rate": 9.003109071687254e-06, "loss": 0.4802, "step": 5501 }, { "epoch": 0.22841841421076395, "grad_norm": 2.2266299724578857, "learning_rate": 9.002706204621802e-06, "loss": 0.4526, "step": 5502 }, { "epoch": 0.22845992973497528, "grad_norm": 2.391003131866455, "learning_rate": 9.00230326518627e-06, "loss": 0.5385, "step": 5503 }, { "epoch": 0.22850144525918661, "grad_norm": 2.1116783618927, "learning_rate": 9.00190025338794e-06, "loss": 0.5338, "step": 5504 }, { "epoch": 0.22854296078339795, "grad_norm": 2.3173816204071045, "learning_rate": 9.001497169234102e-06, "loss": 0.4557, "step": 5505 }, { "epoch": 0.22858447630760928, "grad_norm": 2.393054485321045, "learning_rate": 9.00109401273204e-06, "loss": 0.511, "step": 5506 }, { "epoch": 0.2286259918318206, "grad_norm": 2.487009048461914, "learning_rate": 9.000690783889043e-06, "loss": 0.4395, "step": 5507 }, { "epoch": 0.22866750735603195, "grad_norm": 3.7561707496643066, "learning_rate": 9.000287482712407e-06, "loss": 0.5617, "step": 5508 }, { "epoch": 0.22870902288024328, "grad_norm": 2.073148012161255, "learning_rate": 8.999884109209418e-06, "loss": 0.4643, "step": 5509 }, { "epoch": 0.2287505384044546, "grad_norm": 3.121408224105835, "learning_rate": 8.999480663387373e-06, "loss": 0.7373, "step": 5510 }, { "epoch": 0.22879205392866594, "grad_norm": 2.7372686862945557, "learning_rate": 8.999077145253564e-06, "loss": 0.5058, "step": 5511 }, { "epoch": 0.22883356945287728, "grad_norm": 2.271618366241455, "learning_rate": 8.998673554815288e-06, "loss": 0.5042, "step": 5512 }, { "epoch": 0.2288750849770886, "grad_norm": 2.3100650310516357, "learning_rate": 8.998269892079842e-06, "loss": 0.4276, "step": 5513 }, { "epoch": 0.22891660050129994, "grad_norm": 2.561248779296875, "learning_rate": 8.997866157054522e-06, "loss": 0.4577, "step": 5514 }, { "epoch": 0.22895811602551128, "grad_norm": 2.3646490573883057, "learning_rate": 8.99746234974663e-06, "loss": 0.5625, "step": 5515 }, { "epoch": 0.22899963154972264, "grad_norm": 2.780791759490967, "learning_rate": 8.997058470163468e-06, "loss": 0.563, "step": 5516 }, { "epoch": 0.22904114707393397, "grad_norm": 2.3029279708862305, "learning_rate": 8.996654518312338e-06, "loss": 0.5154, "step": 5517 }, { "epoch": 0.2290826625981453, "grad_norm": 3.383708953857422, "learning_rate": 8.99625049420054e-06, "loss": 0.7211, "step": 5518 }, { "epoch": 0.22912417812235664, "grad_norm": 2.4130241870880127, "learning_rate": 8.995846397835382e-06, "loss": 0.6479, "step": 5519 }, { "epoch": 0.22916569364656797, "grad_norm": 3.273008346557617, "learning_rate": 8.995442229224168e-06, "loss": 0.6547, "step": 5520 }, { "epoch": 0.2292072091707793, "grad_norm": 3.0222291946411133, "learning_rate": 8.995037988374209e-06, "loss": 0.5545, "step": 5521 }, { "epoch": 0.22924872469499064, "grad_norm": 2.574852228164673, "learning_rate": 8.99463367529281e-06, "loss": 0.6303, "step": 5522 }, { "epoch": 0.22929024021920197, "grad_norm": 2.5172998905181885, "learning_rate": 8.994229289987285e-06, "loss": 0.6415, "step": 5523 }, { "epoch": 0.2293317557434133, "grad_norm": 2.5007119178771973, "learning_rate": 8.993824832464942e-06, "loss": 0.5897, "step": 5524 }, { "epoch": 0.22937327126762463, "grad_norm": 2.3714919090270996, "learning_rate": 8.993420302733093e-06, "loss": 0.4264, "step": 5525 }, { "epoch": 0.22941478679183597, "grad_norm": 2.351649284362793, "learning_rate": 8.993015700799054e-06, "loss": 0.5701, "step": 5526 }, { "epoch": 0.2294563023160473, "grad_norm": 3.055419445037842, "learning_rate": 8.992611026670145e-06, "loss": 0.6428, "step": 5527 }, { "epoch": 0.22949781784025863, "grad_norm": 2.4314157962799072, "learning_rate": 8.992206280353672e-06, "loss": 0.4622, "step": 5528 }, { "epoch": 0.22953933336446997, "grad_norm": 3.2836151123046875, "learning_rate": 8.991801461856961e-06, "loss": 0.6386, "step": 5529 }, { "epoch": 0.2295808488886813, "grad_norm": 2.575610637664795, "learning_rate": 8.99139657118733e-06, "loss": 0.4612, "step": 5530 }, { "epoch": 0.22962236441289263, "grad_norm": 2.4159164428710938, "learning_rate": 8.990991608352098e-06, "loss": 0.6227, "step": 5531 }, { "epoch": 0.229663879937104, "grad_norm": 2.9634835720062256, "learning_rate": 8.990586573358587e-06, "loss": 0.4714, "step": 5532 }, { "epoch": 0.22970539546131533, "grad_norm": 3.2779388427734375, "learning_rate": 8.99018146621412e-06, "loss": 0.6765, "step": 5533 }, { "epoch": 0.22974691098552666, "grad_norm": 3.009279251098633, "learning_rate": 8.989776286926023e-06, "loss": 0.5153, "step": 5534 }, { "epoch": 0.229788426509738, "grad_norm": 2.995619535446167, "learning_rate": 8.989371035501622e-06, "loss": 0.5416, "step": 5535 }, { "epoch": 0.22982994203394932, "grad_norm": 2.3386387825012207, "learning_rate": 8.98896571194824e-06, "loss": 0.5469, "step": 5536 }, { "epoch": 0.22987145755816066, "grad_norm": 2.181227207183838, "learning_rate": 8.98856031627321e-06, "loss": 0.3793, "step": 5537 }, { "epoch": 0.229912973082372, "grad_norm": 2.8659138679504395, "learning_rate": 8.98815484848386e-06, "loss": 0.574, "step": 5538 }, { "epoch": 0.22995448860658332, "grad_norm": 2.19305682182312, "learning_rate": 8.987749308587519e-06, "loss": 0.5773, "step": 5539 }, { "epoch": 0.22999600413079466, "grad_norm": 2.4482853412628174, "learning_rate": 8.987343696591523e-06, "loss": 0.5479, "step": 5540 }, { "epoch": 0.230037519655006, "grad_norm": 2.955284833908081, "learning_rate": 8.986938012503203e-06, "loss": 0.4744, "step": 5541 }, { "epoch": 0.23007903517921732, "grad_norm": 2.83988094329834, "learning_rate": 8.986532256329893e-06, "loss": 0.584, "step": 5542 }, { "epoch": 0.23012055070342866, "grad_norm": 2.256549596786499, "learning_rate": 8.986126428078933e-06, "loss": 0.5237, "step": 5543 }, { "epoch": 0.23016206622764, "grad_norm": 2.83050274848938, "learning_rate": 8.985720527757658e-06, "loss": 0.4231, "step": 5544 }, { "epoch": 0.23020358175185132, "grad_norm": 2.7407748699188232, "learning_rate": 8.985314555373408e-06, "loss": 0.4829, "step": 5545 }, { "epoch": 0.23024509727606265, "grad_norm": 2.870023012161255, "learning_rate": 8.984908510933519e-06, "loss": 0.6044, "step": 5546 }, { "epoch": 0.23028661280027402, "grad_norm": 3.2650306224823, "learning_rate": 8.984502394445338e-06, "loss": 0.6238, "step": 5547 }, { "epoch": 0.23032812832448535, "grad_norm": 2.6940579414367676, "learning_rate": 8.984096205916205e-06, "loss": 0.4841, "step": 5548 }, { "epoch": 0.23036964384869668, "grad_norm": 2.497281551361084, "learning_rate": 8.983689945353464e-06, "loss": 0.5251, "step": 5549 }, { "epoch": 0.23041115937290801, "grad_norm": 2.52618408203125, "learning_rate": 8.98328361276446e-06, "loss": 0.4669, "step": 5550 }, { "epoch": 0.23045267489711935, "grad_norm": 2.922797679901123, "learning_rate": 8.982877208156541e-06, "loss": 0.5634, "step": 5551 }, { "epoch": 0.23049419042133068, "grad_norm": 3.180177927017212, "learning_rate": 8.982470731537054e-06, "loss": 0.5207, "step": 5552 }, { "epoch": 0.230535705945542, "grad_norm": 2.441173553466797, "learning_rate": 8.982064182913348e-06, "loss": 0.5757, "step": 5553 }, { "epoch": 0.23057722146975335, "grad_norm": 2.3483362197875977, "learning_rate": 8.981657562292774e-06, "loss": 0.4281, "step": 5554 }, { "epoch": 0.23061873699396468, "grad_norm": 2.042325735092163, "learning_rate": 8.981250869682683e-06, "loss": 0.5428, "step": 5555 }, { "epoch": 0.230660252518176, "grad_norm": 2.9677035808563232, "learning_rate": 8.98084410509043e-06, "loss": 0.4307, "step": 5556 }, { "epoch": 0.23070176804238735, "grad_norm": 2.178006410598755, "learning_rate": 8.980437268523365e-06, "loss": 0.4855, "step": 5557 }, { "epoch": 0.23074328356659868, "grad_norm": 2.8683032989501953, "learning_rate": 8.980030359988851e-06, "loss": 0.4504, "step": 5558 }, { "epoch": 0.23078479909081, "grad_norm": 2.720857858657837, "learning_rate": 8.979623379494238e-06, "loss": 0.4335, "step": 5559 }, { "epoch": 0.23082631461502134, "grad_norm": 2.817639112472534, "learning_rate": 8.97921632704689e-06, "loss": 0.5181, "step": 5560 }, { "epoch": 0.23086783013923268, "grad_norm": 2.4357590675354004, "learning_rate": 8.978809202654161e-06, "loss": 0.6362, "step": 5561 }, { "epoch": 0.23090934566344404, "grad_norm": 2.644023895263672, "learning_rate": 8.978402006323417e-06, "loss": 0.576, "step": 5562 }, { "epoch": 0.23095086118765537, "grad_norm": 2.831533432006836, "learning_rate": 8.977994738062018e-06, "loss": 0.54, "step": 5563 }, { "epoch": 0.2309923767118667, "grad_norm": 2.7571496963500977, "learning_rate": 8.977587397877325e-06, "loss": 0.6994, "step": 5564 }, { "epoch": 0.23103389223607804, "grad_norm": 2.6125717163085938, "learning_rate": 8.977179985776707e-06, "loss": 0.6919, "step": 5565 }, { "epoch": 0.23107540776028937, "grad_norm": 2.2829997539520264, "learning_rate": 8.976772501767529e-06, "loss": 0.5814, "step": 5566 }, { "epoch": 0.2311169232845007, "grad_norm": 2.2295455932617188, "learning_rate": 8.976364945857158e-06, "loss": 0.5559, "step": 5567 }, { "epoch": 0.23115843880871204, "grad_norm": 2.8444700241088867, "learning_rate": 8.975957318052961e-06, "loss": 0.574, "step": 5568 }, { "epoch": 0.23119995433292337, "grad_norm": 2.9605486392974854, "learning_rate": 8.975549618362312e-06, "loss": 0.6127, "step": 5569 }, { "epoch": 0.2312414698571347, "grad_norm": 2.602858781814575, "learning_rate": 8.975141846792578e-06, "loss": 0.5504, "step": 5570 }, { "epoch": 0.23128298538134603, "grad_norm": 2.334350824356079, "learning_rate": 8.974734003351135e-06, "loss": 0.4294, "step": 5571 }, { "epoch": 0.23132450090555737, "grad_norm": 2.733928680419922, "learning_rate": 8.974326088045356e-06, "loss": 0.5948, "step": 5572 }, { "epoch": 0.2313660164297687, "grad_norm": 2.586653232574463, "learning_rate": 8.973918100882615e-06, "loss": 0.5691, "step": 5573 }, { "epoch": 0.23140753195398003, "grad_norm": 2.474881887435913, "learning_rate": 8.973510041870287e-06, "loss": 0.499, "step": 5574 }, { "epoch": 0.23144904747819137, "grad_norm": 2.6750376224517822, "learning_rate": 8.973101911015756e-06, "loss": 0.5586, "step": 5575 }, { "epoch": 0.2314905630024027, "grad_norm": 2.992654323577881, "learning_rate": 8.972693708326397e-06, "loss": 0.6814, "step": 5576 }, { "epoch": 0.23153207852661403, "grad_norm": 2.53857684135437, "learning_rate": 8.972285433809589e-06, "loss": 0.5055, "step": 5577 }, { "epoch": 0.2315735940508254, "grad_norm": 2.346958637237549, "learning_rate": 8.971877087472716e-06, "loss": 0.5548, "step": 5578 }, { "epoch": 0.23161510957503673, "grad_norm": 3.267364263534546, "learning_rate": 8.97146866932316e-06, "loss": 0.5829, "step": 5579 }, { "epoch": 0.23165662509924806, "grad_norm": 2.9696648120880127, "learning_rate": 8.971060179368305e-06, "loss": 0.6106, "step": 5580 }, { "epoch": 0.2316981406234594, "grad_norm": 2.562286376953125, "learning_rate": 8.970651617615538e-06, "loss": 0.6104, "step": 5581 }, { "epoch": 0.23173965614767073, "grad_norm": 2.413257598876953, "learning_rate": 8.970242984072245e-06, "loss": 0.3461, "step": 5582 }, { "epoch": 0.23178117167188206, "grad_norm": 2.976684093475342, "learning_rate": 8.969834278745817e-06, "loss": 0.6208, "step": 5583 }, { "epoch": 0.2318226871960934, "grad_norm": 3.3545026779174805, "learning_rate": 8.969425501643638e-06, "loss": 0.6075, "step": 5584 }, { "epoch": 0.23186420272030472, "grad_norm": 2.346208333969116, "learning_rate": 8.969016652773104e-06, "loss": 0.5165, "step": 5585 }, { "epoch": 0.23190571824451606, "grad_norm": 2.7537879943847656, "learning_rate": 8.968607732141603e-06, "loss": 0.5518, "step": 5586 }, { "epoch": 0.2319472337687274, "grad_norm": 2.763233184814453, "learning_rate": 8.968198739756532e-06, "loss": 0.666, "step": 5587 }, { "epoch": 0.23198874929293872, "grad_norm": 1.8975926637649536, "learning_rate": 8.967789675625284e-06, "loss": 0.4244, "step": 5588 }, { "epoch": 0.23203026481715006, "grad_norm": 2.485433578491211, "learning_rate": 8.967380539755254e-06, "loss": 0.6211, "step": 5589 }, { "epoch": 0.2320717803413614, "grad_norm": 3.3355963230133057, "learning_rate": 8.96697133215384e-06, "loss": 0.4545, "step": 5590 }, { "epoch": 0.23211329586557272, "grad_norm": 2.540321111679077, "learning_rate": 8.966562052828443e-06, "loss": 0.4721, "step": 5591 }, { "epoch": 0.23215481138978405, "grad_norm": 3.177184581756592, "learning_rate": 8.966152701786457e-06, "loss": 0.6694, "step": 5592 }, { "epoch": 0.23219632691399542, "grad_norm": 2.7682013511657715, "learning_rate": 8.965743279035291e-06, "loss": 0.483, "step": 5593 }, { "epoch": 0.23223784243820675, "grad_norm": 2.2739217281341553, "learning_rate": 8.965333784582342e-06, "loss": 0.5285, "step": 5594 }, { "epoch": 0.23227935796241808, "grad_norm": 2.6245365142822266, "learning_rate": 8.964924218435013e-06, "loss": 0.4904, "step": 5595 }, { "epoch": 0.23232087348662941, "grad_norm": 2.4803414344787598, "learning_rate": 8.964514580600714e-06, "loss": 0.4946, "step": 5596 }, { "epoch": 0.23236238901084075, "grad_norm": 2.187429189682007, "learning_rate": 8.964104871086849e-06, "loss": 0.4954, "step": 5597 }, { "epoch": 0.23240390453505208, "grad_norm": 2.4711978435516357, "learning_rate": 8.963695089900824e-06, "loss": 0.5504, "step": 5598 }, { "epoch": 0.2324454200592634, "grad_norm": 2.3911726474761963, "learning_rate": 8.96328523705005e-06, "loss": 0.6291, "step": 5599 }, { "epoch": 0.23248693558347475, "grad_norm": 2.5651049613952637, "learning_rate": 8.962875312541937e-06, "loss": 0.6181, "step": 5600 }, { "epoch": 0.23252845110768608, "grad_norm": 2.234111785888672, "learning_rate": 8.962465316383894e-06, "loss": 0.6287, "step": 5601 }, { "epoch": 0.2325699666318974, "grad_norm": 3.4065465927124023, "learning_rate": 8.962055248583338e-06, "loss": 0.6447, "step": 5602 }, { "epoch": 0.23261148215610875, "grad_norm": 2.844651937484741, "learning_rate": 8.96164510914768e-06, "loss": 0.6816, "step": 5603 }, { "epoch": 0.23265299768032008, "grad_norm": 3.1451292037963867, "learning_rate": 8.961234898084337e-06, "loss": 0.5477, "step": 5604 }, { "epoch": 0.2326945132045314, "grad_norm": 2.663830518722534, "learning_rate": 8.960824615400725e-06, "loss": 0.4682, "step": 5605 }, { "epoch": 0.23273602872874274, "grad_norm": 2.5711662769317627, "learning_rate": 8.960414261104264e-06, "loss": 0.6244, "step": 5606 }, { "epoch": 0.23277754425295408, "grad_norm": 2.360379934310913, "learning_rate": 8.960003835202369e-06, "loss": 0.4759, "step": 5607 }, { "epoch": 0.2328190597771654, "grad_norm": 2.832451581954956, "learning_rate": 8.959593337702465e-06, "loss": 0.5182, "step": 5608 }, { "epoch": 0.23286057530137677, "grad_norm": 2.828479766845703, "learning_rate": 8.959182768611972e-06, "loss": 0.5728, "step": 5609 }, { "epoch": 0.2329020908255881, "grad_norm": 2.787120819091797, "learning_rate": 8.958772127938312e-06, "loss": 0.454, "step": 5610 }, { "epoch": 0.23294360634979944, "grad_norm": 2.0187525749206543, "learning_rate": 8.958361415688914e-06, "loss": 0.3834, "step": 5611 }, { "epoch": 0.23298512187401077, "grad_norm": 2.3560903072357178, "learning_rate": 8.957950631871198e-06, "loss": 0.6637, "step": 5612 }, { "epoch": 0.2330266373982221, "grad_norm": 2.8881826400756836, "learning_rate": 8.957539776492594e-06, "loss": 0.4675, "step": 5613 }, { "epoch": 0.23306815292243344, "grad_norm": 2.169349431991577, "learning_rate": 8.957128849560532e-06, "loss": 0.5202, "step": 5614 }, { "epoch": 0.23310966844664477, "grad_norm": 2.407637357711792, "learning_rate": 8.956717851082437e-06, "loss": 0.4875, "step": 5615 }, { "epoch": 0.2331511839708561, "grad_norm": 2.247436285018921, "learning_rate": 8.956306781065744e-06, "loss": 0.3979, "step": 5616 }, { "epoch": 0.23319269949506743, "grad_norm": 2.3407819271087646, "learning_rate": 8.955895639517886e-06, "loss": 0.3872, "step": 5617 }, { "epoch": 0.23323421501927877, "grad_norm": 3.8627142906188965, "learning_rate": 8.955484426446293e-06, "loss": 0.6575, "step": 5618 }, { "epoch": 0.2332757305434901, "grad_norm": 2.7641384601593018, "learning_rate": 8.955073141858401e-06, "loss": 0.5846, "step": 5619 }, { "epoch": 0.23331724606770143, "grad_norm": 2.6244068145751953, "learning_rate": 8.954661785761648e-06, "loss": 0.5473, "step": 5620 }, { "epoch": 0.23335876159191277, "grad_norm": 2.230828285217285, "learning_rate": 8.954250358163467e-06, "loss": 0.4827, "step": 5621 }, { "epoch": 0.2334002771161241, "grad_norm": 3.0885419845581055, "learning_rate": 8.953838859071303e-06, "loss": 0.5018, "step": 5622 }, { "epoch": 0.23344179264033543, "grad_norm": 2.8032400608062744, "learning_rate": 8.953427288492592e-06, "loss": 0.4857, "step": 5623 }, { "epoch": 0.2334833081645468, "grad_norm": 2.990828275680542, "learning_rate": 8.953015646434774e-06, "loss": 0.6264, "step": 5624 }, { "epoch": 0.23352482368875813, "grad_norm": 2.642392158508301, "learning_rate": 8.952603932905298e-06, "loss": 0.6499, "step": 5625 }, { "epoch": 0.23356633921296946, "grad_norm": 2.701209783554077, "learning_rate": 8.9521921479116e-06, "loss": 0.4723, "step": 5626 }, { "epoch": 0.2336078547371808, "grad_norm": 2.202587127685547, "learning_rate": 8.95178029146113e-06, "loss": 0.3635, "step": 5627 }, { "epoch": 0.23364937026139213, "grad_norm": 2.835296630859375, "learning_rate": 8.951368363561333e-06, "loss": 0.4716, "step": 5628 }, { "epoch": 0.23369088578560346, "grad_norm": 2.880483388900757, "learning_rate": 8.950956364219659e-06, "loss": 0.6065, "step": 5629 }, { "epoch": 0.2337324013098148, "grad_norm": 2.6845362186431885, "learning_rate": 8.950544293443552e-06, "loss": 0.4469, "step": 5630 }, { "epoch": 0.23377391683402612, "grad_norm": 2.4964072704315186, "learning_rate": 8.950132151240467e-06, "loss": 0.4333, "step": 5631 }, { "epoch": 0.23381543235823746, "grad_norm": 2.498181104660034, "learning_rate": 8.949719937617854e-06, "loss": 0.4463, "step": 5632 }, { "epoch": 0.2338569478824488, "grad_norm": 2.86098575592041, "learning_rate": 8.949307652583168e-06, "loss": 0.6063, "step": 5633 }, { "epoch": 0.23389846340666012, "grad_norm": 2.853184461593628, "learning_rate": 8.94889529614386e-06, "loss": 0.7682, "step": 5634 }, { "epoch": 0.23393997893087146, "grad_norm": 2.6964616775512695, "learning_rate": 8.948482868307386e-06, "loss": 0.3753, "step": 5635 }, { "epoch": 0.2339814944550828, "grad_norm": 2.50620174407959, "learning_rate": 8.948070369081203e-06, "loss": 0.5666, "step": 5636 }, { "epoch": 0.23402300997929412, "grad_norm": 2.4554502964019775, "learning_rate": 8.94765779847277e-06, "loss": 0.6504, "step": 5637 }, { "epoch": 0.23406452550350546, "grad_norm": 2.294895648956299, "learning_rate": 8.947245156489548e-06, "loss": 0.5067, "step": 5638 }, { "epoch": 0.2341060410277168, "grad_norm": 2.5930533409118652, "learning_rate": 8.946832443138994e-06, "loss": 0.4237, "step": 5639 }, { "epoch": 0.23414755655192815, "grad_norm": 2.3411049842834473, "learning_rate": 8.946419658428573e-06, "loss": 0.5868, "step": 5640 }, { "epoch": 0.23418907207613948, "grad_norm": 2.7590534687042236, "learning_rate": 8.946006802365747e-06, "loss": 0.5534, "step": 5641 }, { "epoch": 0.23423058760035081, "grad_norm": 2.870105504989624, "learning_rate": 8.94559387495798e-06, "loss": 0.4238, "step": 5642 }, { "epoch": 0.23427210312456215, "grad_norm": 3.2575364112854004, "learning_rate": 8.94518087621274e-06, "loss": 0.6743, "step": 5643 }, { "epoch": 0.23431361864877348, "grad_norm": 2.5339293479919434, "learning_rate": 8.944767806137493e-06, "loss": 0.4302, "step": 5644 }, { "epoch": 0.2343551341729848, "grad_norm": 2.622412919998169, "learning_rate": 8.944354664739705e-06, "loss": 0.653, "step": 5645 }, { "epoch": 0.23439664969719615, "grad_norm": 2.8045263290405273, "learning_rate": 8.943941452026848e-06, "loss": 0.5349, "step": 5646 }, { "epoch": 0.23443816522140748, "grad_norm": 2.4950456619262695, "learning_rate": 8.943528168006395e-06, "loss": 0.47, "step": 5647 }, { "epoch": 0.2344796807456188, "grad_norm": 3.3070902824401855, "learning_rate": 8.943114812685813e-06, "loss": 0.5994, "step": 5648 }, { "epoch": 0.23452119626983015, "grad_norm": 2.8147268295288086, "learning_rate": 8.94270138607258e-06, "loss": 0.5288, "step": 5649 }, { "epoch": 0.23456271179404148, "grad_norm": 2.581278085708618, "learning_rate": 8.94228788817417e-06, "loss": 0.5159, "step": 5650 }, { "epoch": 0.2346042273182528, "grad_norm": 2.4201247692108154, "learning_rate": 8.941874318998056e-06, "loss": 0.4396, "step": 5651 }, { "epoch": 0.23464574284246414, "grad_norm": 3.260941982269287, "learning_rate": 8.941460678551722e-06, "loss": 0.4466, "step": 5652 }, { "epoch": 0.23468725836667548, "grad_norm": 2.5284764766693115, "learning_rate": 8.941046966842641e-06, "loss": 0.538, "step": 5653 }, { "epoch": 0.2347287738908868, "grad_norm": 2.6391444206237793, "learning_rate": 8.940633183878295e-06, "loss": 0.4775, "step": 5654 }, { "epoch": 0.23477028941509817, "grad_norm": 3.6831252574920654, "learning_rate": 8.940219329666167e-06, "loss": 0.5171, "step": 5655 }, { "epoch": 0.2348118049393095, "grad_norm": 2.296541452407837, "learning_rate": 8.939805404213735e-06, "loss": 0.5353, "step": 5656 }, { "epoch": 0.23485332046352084, "grad_norm": 2.4664063453674316, "learning_rate": 8.939391407528488e-06, "loss": 0.4903, "step": 5657 }, { "epoch": 0.23489483598773217, "grad_norm": 2.2592520713806152, "learning_rate": 8.938977339617907e-06, "loss": 0.5996, "step": 5658 }, { "epoch": 0.2349363515119435, "grad_norm": 3.308016061782837, "learning_rate": 8.938563200489481e-06, "loss": 0.5172, "step": 5659 }, { "epoch": 0.23497786703615484, "grad_norm": 2.403038263320923, "learning_rate": 8.938148990150697e-06, "loss": 0.5392, "step": 5660 }, { "epoch": 0.23501938256036617, "grad_norm": 2.564509391784668, "learning_rate": 8.937734708609045e-06, "loss": 0.4795, "step": 5661 }, { "epoch": 0.2350608980845775, "grad_norm": 2.631941795349121, "learning_rate": 8.937320355872014e-06, "loss": 0.5136, "step": 5662 }, { "epoch": 0.23510241360878883, "grad_norm": 2.518171787261963, "learning_rate": 8.936905931947095e-06, "loss": 0.5879, "step": 5663 }, { "epoch": 0.23514392913300017, "grad_norm": 2.685840368270874, "learning_rate": 8.936491436841783e-06, "loss": 0.6267, "step": 5664 }, { "epoch": 0.2351854446572115, "grad_norm": 2.8710319995880127, "learning_rate": 8.936076870563572e-06, "loss": 0.5176, "step": 5665 }, { "epoch": 0.23522696018142283, "grad_norm": 2.7902965545654297, "learning_rate": 8.935662233119956e-06, "loss": 0.5664, "step": 5666 }, { "epoch": 0.23526847570563417, "grad_norm": 2.4040632247924805, "learning_rate": 8.935247524518433e-06, "loss": 0.553, "step": 5667 }, { "epoch": 0.2353099912298455, "grad_norm": 2.5523428916931152, "learning_rate": 8.9348327447665e-06, "loss": 0.5975, "step": 5668 }, { "epoch": 0.23535150675405683, "grad_norm": 2.4825291633605957, "learning_rate": 8.934417893871657e-06, "loss": 0.4997, "step": 5669 }, { "epoch": 0.2353930222782682, "grad_norm": 2.757807731628418, "learning_rate": 8.934002971841404e-06, "loss": 0.4314, "step": 5670 }, { "epoch": 0.23543453780247953, "grad_norm": 2.215446710586548, "learning_rate": 8.933587978683243e-06, "loss": 0.5074, "step": 5671 }, { "epoch": 0.23547605332669086, "grad_norm": 2.9494857788085938, "learning_rate": 8.933172914404677e-06, "loss": 0.4878, "step": 5672 }, { "epoch": 0.2355175688509022, "grad_norm": 2.770395278930664, "learning_rate": 8.932757779013214e-06, "loss": 0.4268, "step": 5673 }, { "epoch": 0.23555908437511353, "grad_norm": 2.7083740234375, "learning_rate": 8.932342572516355e-06, "loss": 0.4769, "step": 5674 }, { "epoch": 0.23560059989932486, "grad_norm": 2.4388115406036377, "learning_rate": 8.931927294921608e-06, "loss": 0.5492, "step": 5675 }, { "epoch": 0.2356421154235362, "grad_norm": 2.1710565090179443, "learning_rate": 8.931511946236483e-06, "loss": 0.4606, "step": 5676 }, { "epoch": 0.23568363094774752, "grad_norm": 2.2745165824890137, "learning_rate": 8.931096526468487e-06, "loss": 0.4762, "step": 5677 }, { "epoch": 0.23572514647195886, "grad_norm": 2.483210325241089, "learning_rate": 8.930681035625137e-06, "loss": 0.37, "step": 5678 }, { "epoch": 0.2357666619961702, "grad_norm": 2.4133102893829346, "learning_rate": 8.930265473713939e-06, "loss": 0.5217, "step": 5679 }, { "epoch": 0.23580817752038152, "grad_norm": 2.305510997772217, "learning_rate": 8.929849840742407e-06, "loss": 0.4656, "step": 5680 }, { "epoch": 0.23584969304459286, "grad_norm": 3.3743743896484375, "learning_rate": 8.929434136718058e-06, "loss": 0.3962, "step": 5681 }, { "epoch": 0.2358912085688042, "grad_norm": 2.50510835647583, "learning_rate": 8.929018361648408e-06, "loss": 0.5, "step": 5682 }, { "epoch": 0.23593272409301552, "grad_norm": 2.186927556991577, "learning_rate": 8.928602515540972e-06, "loss": 0.5012, "step": 5683 }, { "epoch": 0.23597423961722686, "grad_norm": 2.8686087131500244, "learning_rate": 8.928186598403272e-06, "loss": 0.6499, "step": 5684 }, { "epoch": 0.2360157551414382, "grad_norm": 2.346836566925049, "learning_rate": 8.927770610242826e-06, "loss": 0.5768, "step": 5685 }, { "epoch": 0.23605727066564955, "grad_norm": 2.6536099910736084, "learning_rate": 8.927354551067153e-06, "loss": 0.6145, "step": 5686 }, { "epoch": 0.23609878618986088, "grad_norm": 2.787374258041382, "learning_rate": 8.92693842088378e-06, "loss": 0.517, "step": 5687 }, { "epoch": 0.23614030171407221, "grad_norm": 3.137664318084717, "learning_rate": 8.926522219700226e-06, "loss": 0.4752, "step": 5688 }, { "epoch": 0.23618181723828355, "grad_norm": 2.112086057662964, "learning_rate": 8.92610594752402e-06, "loss": 0.5757, "step": 5689 }, { "epoch": 0.23622333276249488, "grad_norm": 2.740391492843628, "learning_rate": 8.925689604362688e-06, "loss": 0.5963, "step": 5690 }, { "epoch": 0.2362648482867062, "grad_norm": 2.701131582260132, "learning_rate": 8.925273190223756e-06, "loss": 0.5933, "step": 5691 }, { "epoch": 0.23630636381091755, "grad_norm": 2.5960428714752197, "learning_rate": 8.924856705114753e-06, "loss": 0.3822, "step": 5692 }, { "epoch": 0.23634787933512888, "grad_norm": 2.606701135635376, "learning_rate": 8.92444014904321e-06, "loss": 0.4081, "step": 5693 }, { "epoch": 0.2363893948593402, "grad_norm": 2.7279207706451416, "learning_rate": 8.924023522016655e-06, "loss": 0.5504, "step": 5694 }, { "epoch": 0.23643091038355155, "grad_norm": 2.5369362831115723, "learning_rate": 8.923606824042628e-06, "loss": 0.5185, "step": 5695 }, { "epoch": 0.23647242590776288, "grad_norm": 2.2972331047058105, "learning_rate": 8.923190055128655e-06, "loss": 0.4951, "step": 5696 }, { "epoch": 0.2365139414319742, "grad_norm": 2.226306676864624, "learning_rate": 8.922773215282277e-06, "loss": 0.5389, "step": 5697 }, { "epoch": 0.23655545695618554, "grad_norm": 2.1452226638793945, "learning_rate": 8.922356304511029e-06, "loss": 0.4215, "step": 5698 }, { "epoch": 0.23659697248039688, "grad_norm": 2.4633617401123047, "learning_rate": 8.921939322822449e-06, "loss": 0.5268, "step": 5699 }, { "epoch": 0.2366384880046082, "grad_norm": 2.525247097015381, "learning_rate": 8.921522270224072e-06, "loss": 0.5219, "step": 5700 }, { "epoch": 0.23668000352881957, "grad_norm": 2.7145488262176514, "learning_rate": 8.921105146723446e-06, "loss": 0.5591, "step": 5701 }, { "epoch": 0.2367215190530309, "grad_norm": 2.8526501655578613, "learning_rate": 8.920687952328106e-06, "loss": 0.4975, "step": 5702 }, { "epoch": 0.23676303457724224, "grad_norm": 2.6821889877319336, "learning_rate": 8.920270687045601e-06, "loss": 0.4599, "step": 5703 }, { "epoch": 0.23680455010145357, "grad_norm": 2.9368860721588135, "learning_rate": 8.919853350883468e-06, "loss": 0.6079, "step": 5704 }, { "epoch": 0.2368460656256649, "grad_norm": 3.161104679107666, "learning_rate": 8.919435943849259e-06, "loss": 0.6016, "step": 5705 }, { "epoch": 0.23688758114987624, "grad_norm": 2.497905731201172, "learning_rate": 8.919018465950517e-06, "loss": 0.5246, "step": 5706 }, { "epoch": 0.23692909667408757, "grad_norm": 2.0026772022247314, "learning_rate": 8.918600917194793e-06, "loss": 0.5259, "step": 5707 }, { "epoch": 0.2369706121982989, "grad_norm": 2.400176763534546, "learning_rate": 8.918183297589636e-06, "loss": 0.4267, "step": 5708 }, { "epoch": 0.23701212772251024, "grad_norm": 2.6257567405700684, "learning_rate": 8.917765607142594e-06, "loss": 0.5928, "step": 5709 }, { "epoch": 0.23705364324672157, "grad_norm": 2.8027515411376953, "learning_rate": 8.91734784586122e-06, "loss": 0.5647, "step": 5710 }, { "epoch": 0.2370951587709329, "grad_norm": 2.283876419067383, "learning_rate": 8.916930013753069e-06, "loss": 0.4922, "step": 5711 }, { "epoch": 0.23713667429514423, "grad_norm": 2.4928317070007324, "learning_rate": 8.916512110825694e-06, "loss": 0.5359, "step": 5712 }, { "epoch": 0.23717818981935557, "grad_norm": 2.638679265975952, "learning_rate": 8.916094137086651e-06, "loss": 0.455, "step": 5713 }, { "epoch": 0.2372197053435669, "grad_norm": 2.427806854248047, "learning_rate": 8.915676092543497e-06, "loss": 0.6284, "step": 5714 }, { "epoch": 0.23726122086777823, "grad_norm": 2.6702117919921875, "learning_rate": 8.915257977203791e-06, "loss": 0.4721, "step": 5715 }, { "epoch": 0.23730273639198957, "grad_norm": 2.4841742515563965, "learning_rate": 8.914839791075091e-06, "loss": 0.4731, "step": 5716 }, { "epoch": 0.23734425191620093, "grad_norm": 2.8940863609313965, "learning_rate": 8.914421534164961e-06, "loss": 0.5728, "step": 5717 }, { "epoch": 0.23738576744041226, "grad_norm": 2.024906873703003, "learning_rate": 8.914003206480962e-06, "loss": 0.5252, "step": 5718 }, { "epoch": 0.2374272829646236, "grad_norm": 2.5938148498535156, "learning_rate": 8.913584808030655e-06, "loss": 0.5249, "step": 5719 }, { "epoch": 0.23746879848883493, "grad_norm": 2.4570627212524414, "learning_rate": 8.913166338821609e-06, "loss": 0.5405, "step": 5720 }, { "epoch": 0.23751031401304626, "grad_norm": 2.836265802383423, "learning_rate": 8.912747798861386e-06, "loss": 0.6227, "step": 5721 }, { "epoch": 0.2375518295372576, "grad_norm": 3.2958524227142334, "learning_rate": 8.912329188157553e-06, "loss": 0.587, "step": 5722 }, { "epoch": 0.23759334506146892, "grad_norm": 3.0162930488586426, "learning_rate": 8.911910506717685e-06, "loss": 0.542, "step": 5723 }, { "epoch": 0.23763486058568026, "grad_norm": 2.759361505508423, "learning_rate": 8.911491754549345e-06, "loss": 0.5255, "step": 5724 }, { "epoch": 0.2376763761098916, "grad_norm": 2.4840168952941895, "learning_rate": 8.911072931660107e-06, "loss": 0.4329, "step": 5725 }, { "epoch": 0.23771789163410292, "grad_norm": 3.0915729999542236, "learning_rate": 8.910654038057543e-06, "loss": 0.6042, "step": 5726 }, { "epoch": 0.23775940715831426, "grad_norm": 2.8388826847076416, "learning_rate": 8.910235073749226e-06, "loss": 0.5136, "step": 5727 }, { "epoch": 0.2378009226825256, "grad_norm": 2.5283498764038086, "learning_rate": 8.909816038742734e-06, "loss": 0.5299, "step": 5728 }, { "epoch": 0.23784243820673692, "grad_norm": 2.9662394523620605, "learning_rate": 8.90939693304564e-06, "loss": 0.697, "step": 5729 }, { "epoch": 0.23788395373094826, "grad_norm": 2.6405720710754395, "learning_rate": 8.908977756665522e-06, "loss": 0.5711, "step": 5730 }, { "epoch": 0.2379254692551596, "grad_norm": 2.4254720211029053, "learning_rate": 8.90855850960996e-06, "loss": 0.4138, "step": 5731 }, { "epoch": 0.23796698477937095, "grad_norm": 2.204073190689087, "learning_rate": 8.908139191886534e-06, "loss": 0.4946, "step": 5732 }, { "epoch": 0.23800850030358228, "grad_norm": 1.9912058115005493, "learning_rate": 8.907719803502823e-06, "loss": 0.4514, "step": 5733 }, { "epoch": 0.23805001582779362, "grad_norm": 3.127838134765625, "learning_rate": 8.907300344466413e-06, "loss": 0.6995, "step": 5734 }, { "epoch": 0.23809153135200495, "grad_norm": 2.6140244007110596, "learning_rate": 8.906880814784886e-06, "loss": 0.4178, "step": 5735 }, { "epoch": 0.23813304687621628, "grad_norm": 2.853684663772583, "learning_rate": 8.906461214465829e-06, "loss": 0.5825, "step": 5736 }, { "epoch": 0.23817456240042761, "grad_norm": 2.512523889541626, "learning_rate": 8.906041543516829e-06, "loss": 0.6489, "step": 5737 }, { "epoch": 0.23821607792463895, "grad_norm": 2.476529359817505, "learning_rate": 8.905621801945467e-06, "loss": 0.4933, "step": 5738 }, { "epoch": 0.23825759344885028, "grad_norm": 3.114542007446289, "learning_rate": 8.90520198975934e-06, "loss": 0.4918, "step": 5739 }, { "epoch": 0.2382991089730616, "grad_norm": 2.9923784732818604, "learning_rate": 8.904782106966037e-06, "loss": 0.5461, "step": 5740 }, { "epoch": 0.23834062449727295, "grad_norm": 2.0015087127685547, "learning_rate": 8.904362153573145e-06, "loss": 0.5297, "step": 5741 }, { "epoch": 0.23838214002148428, "grad_norm": 2.592057704925537, "learning_rate": 8.903942129588261e-06, "loss": 0.5509, "step": 5742 }, { "epoch": 0.2384236555456956, "grad_norm": 2.69682240486145, "learning_rate": 8.903522035018979e-06, "loss": 0.4952, "step": 5743 }, { "epoch": 0.23846517106990694, "grad_norm": 2.604200601577759, "learning_rate": 8.903101869872891e-06, "loss": 0.49, "step": 5744 }, { "epoch": 0.23850668659411828, "grad_norm": 2.1410071849823, "learning_rate": 8.9026816341576e-06, "loss": 0.3716, "step": 5745 }, { "epoch": 0.2385482021183296, "grad_norm": 2.6244266033172607, "learning_rate": 8.902261327880699e-06, "loss": 0.5541, "step": 5746 }, { "epoch": 0.23858971764254094, "grad_norm": 2.2073328495025635, "learning_rate": 8.901840951049785e-06, "loss": 0.461, "step": 5747 }, { "epoch": 0.2386312331667523, "grad_norm": 3.1124799251556396, "learning_rate": 8.901420503672466e-06, "loss": 0.5124, "step": 5748 }, { "epoch": 0.23867274869096364, "grad_norm": 3.6747565269470215, "learning_rate": 8.900999985756338e-06, "loss": 0.6346, "step": 5749 }, { "epoch": 0.23871426421517497, "grad_norm": 2.8791744709014893, "learning_rate": 8.900579397309007e-06, "loss": 0.5047, "step": 5750 }, { "epoch": 0.2387557797393863, "grad_norm": 2.246891975402832, "learning_rate": 8.900158738338075e-06, "loss": 0.3713, "step": 5751 }, { "epoch": 0.23879729526359764, "grad_norm": 2.631517171859741, "learning_rate": 8.899738008851148e-06, "loss": 0.4176, "step": 5752 }, { "epoch": 0.23883881078780897, "grad_norm": 2.1592769622802734, "learning_rate": 8.899317208855837e-06, "loss": 0.4726, "step": 5753 }, { "epoch": 0.2388803263120203, "grad_norm": 2.4309890270233154, "learning_rate": 8.898896338359743e-06, "loss": 0.6427, "step": 5754 }, { "epoch": 0.23892184183623164, "grad_norm": 2.3602800369262695, "learning_rate": 8.898475397370484e-06, "loss": 0.5061, "step": 5755 }, { "epoch": 0.23896335736044297, "grad_norm": 2.2664031982421875, "learning_rate": 8.898054385895662e-06, "loss": 0.6617, "step": 5756 }, { "epoch": 0.2390048728846543, "grad_norm": 2.4364757537841797, "learning_rate": 8.897633303942895e-06, "loss": 0.5728, "step": 5757 }, { "epoch": 0.23904638840886563, "grad_norm": 2.9347763061523438, "learning_rate": 8.897212151519794e-06, "loss": 0.5617, "step": 5758 }, { "epoch": 0.23908790393307697, "grad_norm": 2.4480278491973877, "learning_rate": 8.896790928633974e-06, "loss": 0.5402, "step": 5759 }, { "epoch": 0.2391294194572883, "grad_norm": 2.27431321144104, "learning_rate": 8.896369635293052e-06, "loss": 0.3655, "step": 5760 }, { "epoch": 0.23917093498149963, "grad_norm": 2.059159517288208, "learning_rate": 8.895948271504641e-06, "loss": 0.543, "step": 5761 }, { "epoch": 0.23921245050571097, "grad_norm": 2.1739487648010254, "learning_rate": 8.895526837276365e-06, "loss": 0.3636, "step": 5762 }, { "epoch": 0.23925396602992233, "grad_norm": 2.9259226322174072, "learning_rate": 8.895105332615841e-06, "loss": 0.6554, "step": 5763 }, { "epoch": 0.23929548155413366, "grad_norm": 2.750518560409546, "learning_rate": 8.894683757530689e-06, "loss": 0.5801, "step": 5764 }, { "epoch": 0.239336997078345, "grad_norm": 2.7129173278808594, "learning_rate": 8.894262112028533e-06, "loss": 0.6017, "step": 5765 }, { "epoch": 0.23937851260255633, "grad_norm": 2.539876937866211, "learning_rate": 8.893840396116995e-06, "loss": 0.5102, "step": 5766 }, { "epoch": 0.23942002812676766, "grad_norm": 2.693693161010742, "learning_rate": 8.893418609803699e-06, "loss": 0.6879, "step": 5767 }, { "epoch": 0.239461543650979, "grad_norm": 2.592878818511963, "learning_rate": 8.892996753096275e-06, "loss": 0.5812, "step": 5768 }, { "epoch": 0.23950305917519032, "grad_norm": 2.981443166732788, "learning_rate": 8.892574826002345e-06, "loss": 0.5493, "step": 5769 }, { "epoch": 0.23954457469940166, "grad_norm": 2.7511837482452393, "learning_rate": 8.892152828529541e-06, "loss": 0.6039, "step": 5770 }, { "epoch": 0.239586090223613, "grad_norm": 2.4163320064544678, "learning_rate": 8.891730760685492e-06, "loss": 0.5895, "step": 5771 }, { "epoch": 0.23962760574782432, "grad_norm": 2.355827569961548, "learning_rate": 8.89130862247783e-06, "loss": 0.5629, "step": 5772 }, { "epoch": 0.23966912127203566, "grad_norm": 2.785108804702759, "learning_rate": 8.890886413914186e-06, "loss": 0.5584, "step": 5773 }, { "epoch": 0.239710636796247, "grad_norm": 2.2227439880371094, "learning_rate": 8.890464135002195e-06, "loss": 0.4359, "step": 5774 }, { "epoch": 0.23975215232045832, "grad_norm": 2.475557565689087, "learning_rate": 8.89004178574949e-06, "loss": 0.599, "step": 5775 }, { "epoch": 0.23979366784466966, "grad_norm": 3.1486153602600098, "learning_rate": 8.88961936616371e-06, "loss": 0.6202, "step": 5776 }, { "epoch": 0.239835183368881, "grad_norm": 2.616652727127075, "learning_rate": 8.889196876252487e-06, "loss": 0.4275, "step": 5777 }, { "epoch": 0.23987669889309235, "grad_norm": 2.4152493476867676, "learning_rate": 8.888774316023467e-06, "loss": 0.507, "step": 5778 }, { "epoch": 0.23991821441730368, "grad_norm": 1.8698866367340088, "learning_rate": 8.888351685484285e-06, "loss": 0.4659, "step": 5779 }, { "epoch": 0.23995972994151502, "grad_norm": 2.5477285385131836, "learning_rate": 8.887928984642584e-06, "loss": 0.5663, "step": 5780 }, { "epoch": 0.24000124546572635, "grad_norm": 2.703157901763916, "learning_rate": 8.887506213506005e-06, "loss": 0.4223, "step": 5781 }, { "epoch": 0.24004276098993768, "grad_norm": 1.8858505487442017, "learning_rate": 8.887083372082195e-06, "loss": 0.416, "step": 5782 }, { "epoch": 0.24008427651414901, "grad_norm": 2.8441414833068848, "learning_rate": 8.886660460378798e-06, "loss": 0.4926, "step": 5783 }, { "epoch": 0.24012579203836035, "grad_norm": 2.0648155212402344, "learning_rate": 8.886237478403457e-06, "loss": 0.4668, "step": 5784 }, { "epoch": 0.24016730756257168, "grad_norm": 2.76564621925354, "learning_rate": 8.885814426163823e-06, "loss": 0.5695, "step": 5785 }, { "epoch": 0.240208823086783, "grad_norm": 2.3547792434692383, "learning_rate": 8.885391303667546e-06, "loss": 0.419, "step": 5786 }, { "epoch": 0.24025033861099435, "grad_norm": 2.271768808364868, "learning_rate": 8.884968110922273e-06, "loss": 0.4321, "step": 5787 }, { "epoch": 0.24029185413520568, "grad_norm": 2.193422317504883, "learning_rate": 8.884544847935657e-06, "loss": 0.5063, "step": 5788 }, { "epoch": 0.240333369659417, "grad_norm": 2.6108360290527344, "learning_rate": 8.88412151471535e-06, "loss": 0.6297, "step": 5789 }, { "epoch": 0.24037488518362835, "grad_norm": 2.093777894973755, "learning_rate": 8.883698111269008e-06, "loss": 0.5211, "step": 5790 }, { "epoch": 0.24041640070783968, "grad_norm": 2.6949269771575928, "learning_rate": 8.883274637604282e-06, "loss": 0.6524, "step": 5791 }, { "epoch": 0.240457916232051, "grad_norm": 2.137122631072998, "learning_rate": 8.882851093728834e-06, "loss": 0.4966, "step": 5792 }, { "epoch": 0.24049943175626234, "grad_norm": 2.7885282039642334, "learning_rate": 8.882427479650318e-06, "loss": 0.4493, "step": 5793 }, { "epoch": 0.2405409472804737, "grad_norm": 2.2244834899902344, "learning_rate": 8.882003795376394e-06, "loss": 0.521, "step": 5794 }, { "epoch": 0.24058246280468504, "grad_norm": 2.7288944721221924, "learning_rate": 8.881580040914724e-06, "loss": 0.5169, "step": 5795 }, { "epoch": 0.24062397832889637, "grad_norm": 2.455630302429199, "learning_rate": 8.881156216272965e-06, "loss": 0.6451, "step": 5796 }, { "epoch": 0.2406654938531077, "grad_norm": 2.479966402053833, "learning_rate": 8.880732321458785e-06, "loss": 0.463, "step": 5797 }, { "epoch": 0.24070700937731904, "grad_norm": 2.3543434143066406, "learning_rate": 8.880308356479846e-06, "loss": 0.5438, "step": 5798 }, { "epoch": 0.24074852490153037, "grad_norm": 2.738375663757324, "learning_rate": 8.879884321343813e-06, "loss": 0.6254, "step": 5799 }, { "epoch": 0.2407900404257417, "grad_norm": 2.449551820755005, "learning_rate": 8.879460216058353e-06, "loss": 0.5061, "step": 5800 }, { "epoch": 0.24083155594995304, "grad_norm": 2.4734959602355957, "learning_rate": 8.879036040631135e-06, "loss": 0.6857, "step": 5801 }, { "epoch": 0.24087307147416437, "grad_norm": 2.286932945251465, "learning_rate": 8.878611795069827e-06, "loss": 0.634, "step": 5802 }, { "epoch": 0.2409145869983757, "grad_norm": 2.3844692707061768, "learning_rate": 8.878187479382098e-06, "loss": 0.5433, "step": 5803 }, { "epoch": 0.24095610252258703, "grad_norm": 2.366098403930664, "learning_rate": 8.877763093575625e-06, "loss": 0.6462, "step": 5804 }, { "epoch": 0.24099761804679837, "grad_norm": 2.7915213108062744, "learning_rate": 8.877338637658074e-06, "loss": 0.5605, "step": 5805 }, { "epoch": 0.2410391335710097, "grad_norm": 2.640423536300659, "learning_rate": 8.876914111637124e-06, "loss": 0.5331, "step": 5806 }, { "epoch": 0.24108064909522103, "grad_norm": 2.78619122505188, "learning_rate": 8.876489515520452e-06, "loss": 0.5939, "step": 5807 }, { "epoch": 0.24112216461943237, "grad_norm": 3.2396135330200195, "learning_rate": 8.87606484931573e-06, "loss": 0.4358, "step": 5808 }, { "epoch": 0.24116368014364373, "grad_norm": 2.4718313217163086, "learning_rate": 8.875640113030638e-06, "loss": 0.6123, "step": 5809 }, { "epoch": 0.24120519566785506, "grad_norm": 2.4440231323242188, "learning_rate": 8.875215306672856e-06, "loss": 0.5559, "step": 5810 }, { "epoch": 0.2412467111920664, "grad_norm": 2.65258526802063, "learning_rate": 8.874790430250065e-06, "loss": 0.5731, "step": 5811 }, { "epoch": 0.24128822671627773, "grad_norm": 2.1937694549560547, "learning_rate": 8.874365483769946e-06, "loss": 0.4635, "step": 5812 }, { "epoch": 0.24132974224048906, "grad_norm": 2.3458008766174316, "learning_rate": 8.873940467240181e-06, "loss": 0.3659, "step": 5813 }, { "epoch": 0.2413712577647004, "grad_norm": 2.6543335914611816, "learning_rate": 8.873515380668458e-06, "loss": 0.4665, "step": 5814 }, { "epoch": 0.24141277328891173, "grad_norm": 2.638719320297241, "learning_rate": 8.87309022406246e-06, "loss": 0.5524, "step": 5815 }, { "epoch": 0.24145428881312306, "grad_norm": 2.402827739715576, "learning_rate": 8.872664997429873e-06, "loss": 0.3513, "step": 5816 }, { "epoch": 0.2414958043373344, "grad_norm": 2.4358584880828857, "learning_rate": 8.872239700778387e-06, "loss": 0.3728, "step": 5817 }, { "epoch": 0.24153731986154572, "grad_norm": 2.3165297508239746, "learning_rate": 8.871814334115692e-06, "loss": 0.5703, "step": 5818 }, { "epoch": 0.24157883538575706, "grad_norm": 2.340339422225952, "learning_rate": 8.871388897449476e-06, "loss": 0.4663, "step": 5819 }, { "epoch": 0.2416203509099684, "grad_norm": 2.2963969707489014, "learning_rate": 8.870963390787435e-06, "loss": 0.5711, "step": 5820 }, { "epoch": 0.24166186643417972, "grad_norm": 2.6921539306640625, "learning_rate": 8.87053781413726e-06, "loss": 0.6925, "step": 5821 }, { "epoch": 0.24170338195839106, "grad_norm": 2.994445323944092, "learning_rate": 8.870112167506643e-06, "loss": 0.6249, "step": 5822 }, { "epoch": 0.2417448974826024, "grad_norm": 2.025535821914673, "learning_rate": 8.869686450903286e-06, "loss": 0.5283, "step": 5823 }, { "epoch": 0.24178641300681372, "grad_norm": 2.4432342052459717, "learning_rate": 8.86926066433488e-06, "loss": 0.563, "step": 5824 }, { "epoch": 0.24182792853102508, "grad_norm": 2.74284029006958, "learning_rate": 8.868834807809128e-06, "loss": 0.5463, "step": 5825 }, { "epoch": 0.24186944405523642, "grad_norm": 2.8887484073638916, "learning_rate": 8.868408881333726e-06, "loss": 0.4902, "step": 5826 }, { "epoch": 0.24191095957944775, "grad_norm": 2.1536076068878174, "learning_rate": 8.867982884916377e-06, "loss": 0.5998, "step": 5827 }, { "epoch": 0.24195247510365908, "grad_norm": 2.7143023014068604, "learning_rate": 8.867556818564784e-06, "loss": 0.5308, "step": 5828 }, { "epoch": 0.24199399062787041, "grad_norm": 2.7907521724700928, "learning_rate": 8.867130682286647e-06, "loss": 0.6199, "step": 5829 }, { "epoch": 0.24203550615208175, "grad_norm": 2.9061710834503174, "learning_rate": 8.866704476089674e-06, "loss": 0.5482, "step": 5830 }, { "epoch": 0.24207702167629308, "grad_norm": 2.7160799503326416, "learning_rate": 8.866278199981568e-06, "loss": 0.4936, "step": 5831 }, { "epoch": 0.2421185372005044, "grad_norm": 3.0933310985565186, "learning_rate": 8.865851853970039e-06, "loss": 0.6331, "step": 5832 }, { "epoch": 0.24216005272471575, "grad_norm": 2.068511486053467, "learning_rate": 8.865425438062794e-06, "loss": 0.3783, "step": 5833 }, { "epoch": 0.24220156824892708, "grad_norm": 2.704458236694336, "learning_rate": 8.864998952267543e-06, "loss": 0.393, "step": 5834 }, { "epoch": 0.2422430837731384, "grad_norm": 2.550150156021118, "learning_rate": 8.864572396591996e-06, "loss": 0.5215, "step": 5835 }, { "epoch": 0.24228459929734975, "grad_norm": 2.130779266357422, "learning_rate": 8.864145771043867e-06, "loss": 0.5144, "step": 5836 }, { "epoch": 0.24232611482156108, "grad_norm": 2.415343999862671, "learning_rate": 8.863719075630867e-06, "loss": 0.6261, "step": 5837 }, { "epoch": 0.2423676303457724, "grad_norm": 2.6143808364868164, "learning_rate": 8.863292310360716e-06, "loss": 0.5139, "step": 5838 }, { "epoch": 0.24240914586998374, "grad_norm": 2.1307930946350098, "learning_rate": 8.862865475241125e-06, "loss": 0.5481, "step": 5839 }, { "epoch": 0.2424506613941951, "grad_norm": 3.4500527381896973, "learning_rate": 8.862438570279811e-06, "loss": 0.4921, "step": 5840 }, { "epoch": 0.24249217691840644, "grad_norm": 2.707024097442627, "learning_rate": 8.862011595484497e-06, "loss": 0.5628, "step": 5841 }, { "epoch": 0.24253369244261777, "grad_norm": 3.3682892322540283, "learning_rate": 8.861584550862898e-06, "loss": 0.6115, "step": 5842 }, { "epoch": 0.2425752079668291, "grad_norm": 2.5426855087280273, "learning_rate": 8.861157436422738e-06, "loss": 0.4074, "step": 5843 }, { "epoch": 0.24261672349104044, "grad_norm": 2.386354684829712, "learning_rate": 8.860730252171739e-06, "loss": 0.5512, "step": 5844 }, { "epoch": 0.24265823901525177, "grad_norm": 2.3524653911590576, "learning_rate": 8.860302998117626e-06, "loss": 0.5149, "step": 5845 }, { "epoch": 0.2426997545394631, "grad_norm": 2.995194911956787, "learning_rate": 8.859875674268119e-06, "loss": 0.5337, "step": 5846 }, { "epoch": 0.24274127006367444, "grad_norm": 2.689556121826172, "learning_rate": 8.85944828063095e-06, "loss": 0.64, "step": 5847 }, { "epoch": 0.24278278558788577, "grad_norm": 2.444796323776245, "learning_rate": 8.859020817213841e-06, "loss": 0.482, "step": 5848 }, { "epoch": 0.2428243011120971, "grad_norm": 2.4970219135284424, "learning_rate": 8.858593284024524e-06, "loss": 0.5253, "step": 5849 }, { "epoch": 0.24286581663630843, "grad_norm": 2.5551936626434326, "learning_rate": 8.858165681070728e-06, "loss": 0.5042, "step": 5850 }, { "epoch": 0.24290733216051977, "grad_norm": 2.970752716064453, "learning_rate": 8.857738008360185e-06, "loss": 0.4398, "step": 5851 }, { "epoch": 0.2429488476847311, "grad_norm": 3.278163433074951, "learning_rate": 8.857310265900628e-06, "loss": 0.5037, "step": 5852 }, { "epoch": 0.24299036320894243, "grad_norm": 2.8336517810821533, "learning_rate": 8.856882453699789e-06, "loss": 0.5283, "step": 5853 }, { "epoch": 0.24303187873315377, "grad_norm": 2.931671619415283, "learning_rate": 8.856454571765403e-06, "loss": 0.4553, "step": 5854 }, { "epoch": 0.2430733942573651, "grad_norm": 2.141789197921753, "learning_rate": 8.856026620105207e-06, "loss": 0.519, "step": 5855 }, { "epoch": 0.24311490978157646, "grad_norm": 2.45863938331604, "learning_rate": 8.85559859872694e-06, "loss": 0.5425, "step": 5856 }, { "epoch": 0.2431564253057878, "grad_norm": 2.5481526851654053, "learning_rate": 8.855170507638335e-06, "loss": 0.5626, "step": 5857 }, { "epoch": 0.24319794082999913, "grad_norm": 2.2220733165740967, "learning_rate": 8.854742346847139e-06, "loss": 0.5309, "step": 5858 }, { "epoch": 0.24323945635421046, "grad_norm": 2.7608611583709717, "learning_rate": 8.854314116361091e-06, "loss": 0.5987, "step": 5859 }, { "epoch": 0.2432809718784218, "grad_norm": 2.4868698120117188, "learning_rate": 8.85388581618793e-06, "loss": 0.5595, "step": 5860 }, { "epoch": 0.24332248740263313, "grad_norm": 2.365417003631592, "learning_rate": 8.853457446335406e-06, "loss": 0.4834, "step": 5861 }, { "epoch": 0.24336400292684446, "grad_norm": 2.5014231204986572, "learning_rate": 8.853029006811259e-06, "loss": 0.5268, "step": 5862 }, { "epoch": 0.2434055184510558, "grad_norm": 2.5611047744750977, "learning_rate": 8.852600497623236e-06, "loss": 0.4366, "step": 5863 }, { "epoch": 0.24344703397526712, "grad_norm": 2.83237886428833, "learning_rate": 8.852171918779086e-06, "loss": 0.7086, "step": 5864 }, { "epoch": 0.24348854949947846, "grad_norm": 2.842413902282715, "learning_rate": 8.85174327028656e-06, "loss": 0.6046, "step": 5865 }, { "epoch": 0.2435300650236898, "grad_norm": 2.49338436126709, "learning_rate": 8.851314552153402e-06, "loss": 0.4968, "step": 5866 }, { "epoch": 0.24357158054790112, "grad_norm": 2.7420432567596436, "learning_rate": 8.850885764387368e-06, "loss": 0.5844, "step": 5867 }, { "epoch": 0.24361309607211246, "grad_norm": 2.6201326847076416, "learning_rate": 8.85045690699621e-06, "loss": 0.4974, "step": 5868 }, { "epoch": 0.2436546115963238, "grad_norm": 3.254218339920044, "learning_rate": 8.850027979987681e-06, "loss": 0.6289, "step": 5869 }, { "epoch": 0.24369612712053512, "grad_norm": 2.4030649662017822, "learning_rate": 8.849598983369536e-06, "loss": 0.4563, "step": 5870 }, { "epoch": 0.24373764264474648, "grad_norm": 2.588127851486206, "learning_rate": 8.849169917149532e-06, "loss": 0.521, "step": 5871 }, { "epoch": 0.24377915816895782, "grad_norm": 2.32375431060791, "learning_rate": 8.848740781335427e-06, "loss": 0.5219, "step": 5872 }, { "epoch": 0.24382067369316915, "grad_norm": 2.816812753677368, "learning_rate": 8.848311575934978e-06, "loss": 0.4946, "step": 5873 }, { "epoch": 0.24386218921738048, "grad_norm": 2.309398889541626, "learning_rate": 8.847882300955947e-06, "loss": 0.56, "step": 5874 }, { "epoch": 0.24390370474159181, "grad_norm": 2.4207539558410645, "learning_rate": 8.847452956406097e-06, "loss": 0.4103, "step": 5875 }, { "epoch": 0.24394522026580315, "grad_norm": 3.16804575920105, "learning_rate": 8.847023542293186e-06, "loss": 0.6481, "step": 5876 }, { "epoch": 0.24398673579001448, "grad_norm": 2.5202791690826416, "learning_rate": 8.846594058624983e-06, "loss": 0.5062, "step": 5877 }, { "epoch": 0.2440282513142258, "grad_norm": 3.110182285308838, "learning_rate": 8.846164505409249e-06, "loss": 0.4857, "step": 5878 }, { "epoch": 0.24406976683843715, "grad_norm": 2.65169095993042, "learning_rate": 8.845734882653751e-06, "loss": 0.5403, "step": 5879 }, { "epoch": 0.24411128236264848, "grad_norm": 2.367331027984619, "learning_rate": 8.84530519036626e-06, "loss": 0.376, "step": 5880 }, { "epoch": 0.2441527978868598, "grad_norm": 2.6697998046875, "learning_rate": 8.844875428554543e-06, "loss": 0.4981, "step": 5881 }, { "epoch": 0.24419431341107115, "grad_norm": 2.614837169647217, "learning_rate": 8.844445597226368e-06, "loss": 0.5234, "step": 5882 }, { "epoch": 0.24423582893528248, "grad_norm": 2.6690635681152344, "learning_rate": 8.84401569638951e-06, "loss": 0.709, "step": 5883 }, { "epoch": 0.2442773444594938, "grad_norm": 2.2521512508392334, "learning_rate": 8.843585726051738e-06, "loss": 0.3898, "step": 5884 }, { "epoch": 0.24431885998370514, "grad_norm": 2.399758815765381, "learning_rate": 8.84315568622083e-06, "loss": 0.4136, "step": 5885 }, { "epoch": 0.2443603755079165, "grad_norm": 2.16667103767395, "learning_rate": 8.842725576904561e-06, "loss": 0.5667, "step": 5886 }, { "epoch": 0.24440189103212784, "grad_norm": 2.8030245304107666, "learning_rate": 8.842295398110704e-06, "loss": 0.5592, "step": 5887 }, { "epoch": 0.24444340655633917, "grad_norm": 2.8773751258850098, "learning_rate": 8.84186514984704e-06, "loss": 0.5514, "step": 5888 }, { "epoch": 0.2444849220805505, "grad_norm": 2.4772279262542725, "learning_rate": 8.841434832121345e-06, "loss": 0.4341, "step": 5889 }, { "epoch": 0.24452643760476184, "grad_norm": 2.373995065689087, "learning_rate": 8.841004444941403e-06, "loss": 0.4884, "step": 5890 }, { "epoch": 0.24456795312897317, "grad_norm": 2.922344207763672, "learning_rate": 8.840573988314993e-06, "loss": 0.6377, "step": 5891 }, { "epoch": 0.2446094686531845, "grad_norm": 1.8892278671264648, "learning_rate": 8.840143462249897e-06, "loss": 0.4592, "step": 5892 }, { "epoch": 0.24465098417739584, "grad_norm": 2.6724164485931396, "learning_rate": 8.839712866753901e-06, "loss": 0.4493, "step": 5893 }, { "epoch": 0.24469249970160717, "grad_norm": 2.575512170791626, "learning_rate": 8.83928220183479e-06, "loss": 0.712, "step": 5894 }, { "epoch": 0.2447340152258185, "grad_norm": 2.24556040763855, "learning_rate": 8.83885146750035e-06, "loss": 0.4721, "step": 5895 }, { "epoch": 0.24477553075002983, "grad_norm": 2.7375290393829346, "learning_rate": 8.83842066375837e-06, "loss": 0.534, "step": 5896 }, { "epoch": 0.24481704627424117, "grad_norm": 3.0349855422973633, "learning_rate": 8.837989790616637e-06, "loss": 0.5361, "step": 5897 }, { "epoch": 0.2448585617984525, "grad_norm": 2.9259231090545654, "learning_rate": 8.837558848082943e-06, "loss": 0.5941, "step": 5898 }, { "epoch": 0.24490007732266383, "grad_norm": 2.262331962585449, "learning_rate": 8.837127836165078e-06, "loss": 0.5394, "step": 5899 }, { "epoch": 0.24494159284687517, "grad_norm": 2.5164730548858643, "learning_rate": 8.836696754870835e-06, "loss": 0.6282, "step": 5900 }, { "epoch": 0.2449831083710865, "grad_norm": 2.5726547241210938, "learning_rate": 8.83626560420801e-06, "loss": 0.449, "step": 5901 }, { "epoch": 0.24502462389529786, "grad_norm": 3.3080854415893555, "learning_rate": 8.835834384184398e-06, "loss": 0.5089, "step": 5902 }, { "epoch": 0.2450661394195092, "grad_norm": 2.519165515899658, "learning_rate": 8.835403094807793e-06, "loss": 0.5109, "step": 5903 }, { "epoch": 0.24510765494372053, "grad_norm": 2.3655855655670166, "learning_rate": 8.834971736085995e-06, "loss": 0.4255, "step": 5904 }, { "epoch": 0.24514917046793186, "grad_norm": 2.5551979541778564, "learning_rate": 8.834540308026805e-06, "loss": 0.555, "step": 5905 }, { "epoch": 0.2451906859921432, "grad_norm": 2.5090365409851074, "learning_rate": 8.834108810638018e-06, "loss": 0.5722, "step": 5906 }, { "epoch": 0.24523220151635453, "grad_norm": 2.56366229057312, "learning_rate": 8.833677243927439e-06, "loss": 0.5572, "step": 5907 }, { "epoch": 0.24527371704056586, "grad_norm": 2.1045284271240234, "learning_rate": 8.833245607902871e-06, "loss": 0.4393, "step": 5908 }, { "epoch": 0.2453152325647772, "grad_norm": 3.883498430252075, "learning_rate": 8.832813902572117e-06, "loss": 0.4127, "step": 5909 }, { "epoch": 0.24535674808898852, "grad_norm": 2.3128273487091064, "learning_rate": 8.832382127942984e-06, "loss": 0.51, "step": 5910 }, { "epoch": 0.24539826361319986, "grad_norm": 2.3137431144714355, "learning_rate": 8.831950284023279e-06, "loss": 0.4118, "step": 5911 }, { "epoch": 0.2454397791374112, "grad_norm": 2.6086273193359375, "learning_rate": 8.831518370820805e-06, "loss": 0.7061, "step": 5912 }, { "epoch": 0.24548129466162252, "grad_norm": 2.2782368659973145, "learning_rate": 8.831086388343376e-06, "loss": 0.6366, "step": 5913 }, { "epoch": 0.24552281018583386, "grad_norm": 2.449341297149658, "learning_rate": 8.8306543365988e-06, "loss": 0.4716, "step": 5914 }, { "epoch": 0.2455643257100452, "grad_norm": 2.4644925594329834, "learning_rate": 8.83022221559489e-06, "loss": 0.4175, "step": 5915 }, { "epoch": 0.24560584123425652, "grad_norm": 2.0131564140319824, "learning_rate": 8.829790025339459e-06, "loss": 0.4868, "step": 5916 }, { "epoch": 0.24564735675846788, "grad_norm": 2.8426501750946045, "learning_rate": 8.82935776584032e-06, "loss": 0.5295, "step": 5917 }, { "epoch": 0.24568887228267922, "grad_norm": 2.776263475418091, "learning_rate": 8.828925437105289e-06, "loss": 0.4956, "step": 5918 }, { "epoch": 0.24573038780689055, "grad_norm": 2.1490767002105713, "learning_rate": 8.828493039142183e-06, "loss": 0.5413, "step": 5919 }, { "epoch": 0.24577190333110188, "grad_norm": 2.7451822757720947, "learning_rate": 8.828060571958818e-06, "loss": 0.3845, "step": 5920 }, { "epoch": 0.24581341885531321, "grad_norm": 2.581392765045166, "learning_rate": 8.827628035563017e-06, "loss": 0.6853, "step": 5921 }, { "epoch": 0.24585493437952455, "grad_norm": 2.636089324951172, "learning_rate": 8.827195429962595e-06, "loss": 0.4453, "step": 5922 }, { "epoch": 0.24589644990373588, "grad_norm": 2.6875171661376953, "learning_rate": 8.826762755165378e-06, "loss": 0.3529, "step": 5923 }, { "epoch": 0.2459379654279472, "grad_norm": 2.4145967960357666, "learning_rate": 8.826330011179188e-06, "loss": 0.4158, "step": 5924 }, { "epoch": 0.24597948095215855, "grad_norm": 2.2250285148620605, "learning_rate": 8.825897198011847e-06, "loss": 0.379, "step": 5925 }, { "epoch": 0.24602099647636988, "grad_norm": 2.623218536376953, "learning_rate": 8.825464315671182e-06, "loss": 0.4676, "step": 5926 }, { "epoch": 0.2460625120005812, "grad_norm": 2.31335186958313, "learning_rate": 8.82503136416502e-06, "loss": 0.4467, "step": 5927 }, { "epoch": 0.24610402752479255, "grad_norm": 2.5637035369873047, "learning_rate": 8.82459834350119e-06, "loss": 0.5824, "step": 5928 }, { "epoch": 0.24614554304900388, "grad_norm": 2.518031120300293, "learning_rate": 8.824165253687517e-06, "loss": 0.5137, "step": 5929 }, { "epoch": 0.2461870585732152, "grad_norm": 2.064695358276367, "learning_rate": 8.823732094731835e-06, "loss": 0.4403, "step": 5930 }, { "epoch": 0.24622857409742654, "grad_norm": 2.734740734100342, "learning_rate": 8.823298866641974e-06, "loss": 0.5416, "step": 5931 }, { "epoch": 0.24627008962163788, "grad_norm": 2.4990973472595215, "learning_rate": 8.822865569425769e-06, "loss": 0.5392, "step": 5932 }, { "epoch": 0.24631160514584924, "grad_norm": 2.7685468196868896, "learning_rate": 8.822432203091051e-06, "loss": 0.6572, "step": 5933 }, { "epoch": 0.24635312067006057, "grad_norm": 2.8371596336364746, "learning_rate": 8.82199876764566e-06, "loss": 0.5928, "step": 5934 }, { "epoch": 0.2463946361942719, "grad_norm": 2.1227214336395264, "learning_rate": 8.821565263097425e-06, "loss": 0.4899, "step": 5935 }, { "epoch": 0.24643615171848324, "grad_norm": 2.6401102542877197, "learning_rate": 8.821131689454193e-06, "loss": 0.4551, "step": 5936 }, { "epoch": 0.24647766724269457, "grad_norm": 2.558070659637451, "learning_rate": 8.820698046723796e-06, "loss": 0.6897, "step": 5937 }, { "epoch": 0.2465191827669059, "grad_norm": 2.8228678703308105, "learning_rate": 8.820264334914077e-06, "loss": 0.533, "step": 5938 }, { "epoch": 0.24656069829111724, "grad_norm": 2.876620292663574, "learning_rate": 8.819830554032879e-06, "loss": 0.446, "step": 5939 }, { "epoch": 0.24660221381532857, "grad_norm": 2.391533374786377, "learning_rate": 8.819396704088043e-06, "loss": 0.4875, "step": 5940 }, { "epoch": 0.2466437293395399, "grad_norm": 3.148952007293701, "learning_rate": 8.818962785087414e-06, "loss": 0.5819, "step": 5941 }, { "epoch": 0.24668524486375124, "grad_norm": 2.616976261138916, "learning_rate": 8.818528797038837e-06, "loss": 0.5402, "step": 5942 }, { "epoch": 0.24672676038796257, "grad_norm": 2.3874993324279785, "learning_rate": 8.818094739950157e-06, "loss": 0.5475, "step": 5943 }, { "epoch": 0.2467682759121739, "grad_norm": 2.3361175060272217, "learning_rate": 8.817660613829226e-06, "loss": 0.6307, "step": 5944 }, { "epoch": 0.24680979143638523, "grad_norm": 2.504883050918579, "learning_rate": 8.81722641868389e-06, "loss": 0.4613, "step": 5945 }, { "epoch": 0.24685130696059657, "grad_norm": 2.8885934352874756, "learning_rate": 8.816792154522001e-06, "loss": 0.4032, "step": 5946 }, { "epoch": 0.2468928224848079, "grad_norm": 2.3471415042877197, "learning_rate": 8.81635782135141e-06, "loss": 0.5518, "step": 5947 }, { "epoch": 0.24693433800901926, "grad_norm": 3.0130693912506104, "learning_rate": 8.815923419179966e-06, "loss": 0.739, "step": 5948 }, { "epoch": 0.2469758535332306, "grad_norm": 2.92862606048584, "learning_rate": 8.81548894801553e-06, "loss": 0.4719, "step": 5949 }, { "epoch": 0.24701736905744193, "grad_norm": 2.3627405166625977, "learning_rate": 8.815054407865953e-06, "loss": 0.4558, "step": 5950 }, { "epoch": 0.24705888458165326, "grad_norm": 2.675844430923462, "learning_rate": 8.814619798739093e-06, "loss": 0.5167, "step": 5951 }, { "epoch": 0.2471004001058646, "grad_norm": 2.320197343826294, "learning_rate": 8.814185120642808e-06, "loss": 0.5558, "step": 5952 }, { "epoch": 0.24714191563007593, "grad_norm": 3.310668468475342, "learning_rate": 8.813750373584956e-06, "loss": 0.5387, "step": 5953 }, { "epoch": 0.24718343115428726, "grad_norm": 2.4384238719940186, "learning_rate": 8.813315557573397e-06, "loss": 0.504, "step": 5954 }, { "epoch": 0.2472249466784986, "grad_norm": 2.672220468521118, "learning_rate": 8.812880672615996e-06, "loss": 0.5212, "step": 5955 }, { "epoch": 0.24726646220270992, "grad_norm": 2.594043016433716, "learning_rate": 8.812445718720612e-06, "loss": 0.6811, "step": 5956 }, { "epoch": 0.24730797772692126, "grad_norm": 2.411292314529419, "learning_rate": 8.81201069589511e-06, "loss": 0.5662, "step": 5957 }, { "epoch": 0.2473494932511326, "grad_norm": 2.7319228649139404, "learning_rate": 8.811575604147356e-06, "loss": 0.5266, "step": 5958 }, { "epoch": 0.24739100877534392, "grad_norm": 2.456589698791504, "learning_rate": 8.811140443485218e-06, "loss": 0.6173, "step": 5959 }, { "epoch": 0.24743252429955526, "grad_norm": 2.5755739212036133, "learning_rate": 8.810705213916562e-06, "loss": 0.4433, "step": 5960 }, { "epoch": 0.2474740398237666, "grad_norm": 2.8922972679138184, "learning_rate": 8.810269915449255e-06, "loss": 0.4817, "step": 5961 }, { "epoch": 0.24751555534797792, "grad_norm": 2.3289592266082764, "learning_rate": 8.809834548091172e-06, "loss": 0.4742, "step": 5962 }, { "epoch": 0.24755707087218926, "grad_norm": 2.671496868133545, "learning_rate": 8.809399111850182e-06, "loss": 0.6602, "step": 5963 }, { "epoch": 0.24759858639640062, "grad_norm": 2.4403512477874756, "learning_rate": 8.808963606734158e-06, "loss": 0.4972, "step": 5964 }, { "epoch": 0.24764010192061195, "grad_norm": 2.6125588417053223, "learning_rate": 8.808528032750973e-06, "loss": 0.4713, "step": 5965 }, { "epoch": 0.24768161744482328, "grad_norm": 2.008911609649658, "learning_rate": 8.808092389908504e-06, "loss": 0.4759, "step": 5966 }, { "epoch": 0.24772313296903462, "grad_norm": 2.401474714279175, "learning_rate": 8.807656678214629e-06, "loss": 0.4116, "step": 5967 }, { "epoch": 0.24776464849324595, "grad_norm": 2.911604404449463, "learning_rate": 8.80722089767722e-06, "loss": 0.7745, "step": 5968 }, { "epoch": 0.24780616401745728, "grad_norm": 2.3379335403442383, "learning_rate": 8.806785048304162e-06, "loss": 0.5613, "step": 5969 }, { "epoch": 0.24784767954166861, "grad_norm": 2.6269352436065674, "learning_rate": 8.806349130103334e-06, "loss": 0.5149, "step": 5970 }, { "epoch": 0.24788919506587995, "grad_norm": 2.562790870666504, "learning_rate": 8.805913143082617e-06, "loss": 0.5434, "step": 5971 }, { "epoch": 0.24793071059009128, "grad_norm": 2.883652687072754, "learning_rate": 8.80547708724989e-06, "loss": 0.7633, "step": 5972 }, { "epoch": 0.2479722261143026, "grad_norm": 3.0093681812286377, "learning_rate": 8.805040962613043e-06, "loss": 0.5774, "step": 5973 }, { "epoch": 0.24801374163851395, "grad_norm": 2.683317184448242, "learning_rate": 8.804604769179958e-06, "loss": 0.5515, "step": 5974 }, { "epoch": 0.24805525716272528, "grad_norm": 2.8263354301452637, "learning_rate": 8.804168506958521e-06, "loss": 0.6507, "step": 5975 }, { "epoch": 0.2480967726869366, "grad_norm": 2.317444086074829, "learning_rate": 8.803732175956623e-06, "loss": 0.4614, "step": 5976 }, { "epoch": 0.24813828821114794, "grad_norm": 2.389342784881592, "learning_rate": 8.80329577618215e-06, "loss": 0.5227, "step": 5977 }, { "epoch": 0.24817980373535928, "grad_norm": 2.3227813243865967, "learning_rate": 8.802859307642993e-06, "loss": 0.5272, "step": 5978 }, { "epoch": 0.24822131925957064, "grad_norm": 3.0512144565582275, "learning_rate": 8.802422770347044e-06, "loss": 0.6379, "step": 5979 }, { "epoch": 0.24826283478378197, "grad_norm": 2.6267921924591064, "learning_rate": 8.801986164302196e-06, "loss": 0.3422, "step": 5980 }, { "epoch": 0.2483043503079933, "grad_norm": 2.5490903854370117, "learning_rate": 8.801549489516342e-06, "loss": 0.5951, "step": 5981 }, { "epoch": 0.24834586583220464, "grad_norm": 2.8355679512023926, "learning_rate": 8.801112745997376e-06, "loss": 0.5101, "step": 5982 }, { "epoch": 0.24838738135641597, "grad_norm": 2.910773277282715, "learning_rate": 8.800675933753195e-06, "loss": 0.5627, "step": 5983 }, { "epoch": 0.2484288968806273, "grad_norm": 2.7329678535461426, "learning_rate": 8.8002390527917e-06, "loss": 0.5555, "step": 5984 }, { "epoch": 0.24847041240483864, "grad_norm": 2.881730318069458, "learning_rate": 8.799802103120787e-06, "loss": 0.4801, "step": 5985 }, { "epoch": 0.24851192792904997, "grad_norm": 3.08597993850708, "learning_rate": 8.799365084748357e-06, "loss": 0.5756, "step": 5986 }, { "epoch": 0.2485534434532613, "grad_norm": 2.7034339904785156, "learning_rate": 8.79892799768231e-06, "loss": 0.5817, "step": 5987 }, { "epoch": 0.24859495897747264, "grad_norm": 2.709892511367798, "learning_rate": 8.798490841930553e-06, "loss": 0.5437, "step": 5988 }, { "epoch": 0.24863647450168397, "grad_norm": 2.160893678665161, "learning_rate": 8.798053617500984e-06, "loss": 0.4545, "step": 5989 }, { "epoch": 0.2486779900258953, "grad_norm": 2.5103461742401123, "learning_rate": 8.797616324401512e-06, "loss": 0.7016, "step": 5990 }, { "epoch": 0.24871950555010663, "grad_norm": 2.8076224327087402, "learning_rate": 8.79717896264004e-06, "loss": 0.6742, "step": 5991 }, { "epoch": 0.24876102107431797, "grad_norm": 2.222236394882202, "learning_rate": 8.796741532224481e-06, "loss": 0.4407, "step": 5992 }, { "epoch": 0.2488025365985293, "grad_norm": 3.0668346881866455, "learning_rate": 8.79630403316274e-06, "loss": 0.45, "step": 5993 }, { "epoch": 0.24884405212274066, "grad_norm": 2.465665817260742, "learning_rate": 8.795866465462726e-06, "loss": 0.4757, "step": 5994 }, { "epoch": 0.248885567646952, "grad_norm": 2.6376748085021973, "learning_rate": 8.795428829132355e-06, "loss": 0.4868, "step": 5995 }, { "epoch": 0.24892708317116333, "grad_norm": 2.391847610473633, "learning_rate": 8.794991124179535e-06, "loss": 0.5774, "step": 5996 }, { "epoch": 0.24896859869537466, "grad_norm": 2.505873441696167, "learning_rate": 8.79455335061218e-06, "loss": 0.5527, "step": 5997 }, { "epoch": 0.249010114219586, "grad_norm": 2.30679988861084, "learning_rate": 8.79411550843821e-06, "loss": 0.477, "step": 5998 }, { "epoch": 0.24905162974379733, "grad_norm": 2.52673602104187, "learning_rate": 8.793677597665535e-06, "loss": 0.5089, "step": 5999 }, { "epoch": 0.24909314526800866, "grad_norm": 2.7153165340423584, "learning_rate": 8.793239618302078e-06, "loss": 0.4637, "step": 6000 }, { "epoch": 0.24913466079222, "grad_norm": 2.5896682739257812, "learning_rate": 8.792801570355755e-06, "loss": 0.3662, "step": 6001 }, { "epoch": 0.24917617631643132, "grad_norm": 2.8461897373199463, "learning_rate": 8.792363453834486e-06, "loss": 0.544, "step": 6002 }, { "epoch": 0.24921769184064266, "grad_norm": 2.5791327953338623, "learning_rate": 8.791925268746193e-06, "loss": 0.5474, "step": 6003 }, { "epoch": 0.249259207364854, "grad_norm": 2.4697377681732178, "learning_rate": 8.791487015098798e-06, "loss": 0.5206, "step": 6004 }, { "epoch": 0.24930072288906532, "grad_norm": 2.9822998046875, "learning_rate": 8.791048692900224e-06, "loss": 0.6372, "step": 6005 }, { "epoch": 0.24934223841327666, "grad_norm": 2.400765895843506, "learning_rate": 8.790610302158398e-06, "loss": 0.5645, "step": 6006 }, { "epoch": 0.249383753937488, "grad_norm": 3.074052095413208, "learning_rate": 8.790171842881246e-06, "loss": 0.3692, "step": 6007 }, { "epoch": 0.24942526946169932, "grad_norm": 3.2064197063446045, "learning_rate": 8.789733315076693e-06, "loss": 0.495, "step": 6008 }, { "epoch": 0.24946678498591066, "grad_norm": 2.3843331336975098, "learning_rate": 8.78929471875267e-06, "loss": 0.4324, "step": 6009 }, { "epoch": 0.24950830051012202, "grad_norm": 2.9366345405578613, "learning_rate": 8.788856053917106e-06, "loss": 0.5821, "step": 6010 }, { "epoch": 0.24954981603433335, "grad_norm": 2.656144618988037, "learning_rate": 8.788417320577932e-06, "loss": 0.4913, "step": 6011 }, { "epoch": 0.24959133155854468, "grad_norm": 2.616137742996216, "learning_rate": 8.787978518743081e-06, "loss": 0.3969, "step": 6012 }, { "epoch": 0.24963284708275602, "grad_norm": 2.434948682785034, "learning_rate": 8.787539648420489e-06, "loss": 0.5318, "step": 6013 }, { "epoch": 0.24967436260696735, "grad_norm": 2.3803863525390625, "learning_rate": 8.787100709618087e-06, "loss": 0.4441, "step": 6014 }, { "epoch": 0.24971587813117868, "grad_norm": 2.617201328277588, "learning_rate": 8.786661702343811e-06, "loss": 0.5665, "step": 6015 }, { "epoch": 0.24975739365539001, "grad_norm": 3.0733447074890137, "learning_rate": 8.7862226266056e-06, "loss": 0.4547, "step": 6016 }, { "epoch": 0.24979890917960135, "grad_norm": 2.5456182956695557, "learning_rate": 8.785783482411394e-06, "loss": 0.4898, "step": 6017 }, { "epoch": 0.24984042470381268, "grad_norm": 2.4730422496795654, "learning_rate": 8.785344269769132e-06, "loss": 0.4836, "step": 6018 }, { "epoch": 0.249881940228024, "grad_norm": 2.5350019931793213, "learning_rate": 8.784904988686753e-06, "loss": 0.6033, "step": 6019 }, { "epoch": 0.24992345575223535, "grad_norm": 3.53944993019104, "learning_rate": 8.784465639172203e-06, "loss": 0.683, "step": 6020 }, { "epoch": 0.24996497127644668, "grad_norm": 2.790637493133545, "learning_rate": 8.784026221233422e-06, "loss": 0.5205, "step": 6021 }, { "epoch": 0.250006486800658, "grad_norm": 2.772156000137329, "learning_rate": 8.783586734878357e-06, "loss": 0.5452, "step": 6022 }, { "epoch": 0.25004800232486935, "grad_norm": 2.2536966800689697, "learning_rate": 8.783147180114952e-06, "loss": 0.5791, "step": 6023 }, { "epoch": 0.2500895178490807, "grad_norm": 2.6789791584014893, "learning_rate": 8.782707556951155e-06, "loss": 0.5873, "step": 6024 }, { "epoch": 0.250131033373292, "grad_norm": 2.570255756378174, "learning_rate": 8.782267865394918e-06, "loss": 0.6103, "step": 6025 }, { "epoch": 0.25017254889750334, "grad_norm": 2.512687921524048, "learning_rate": 8.781828105454185e-06, "loss": 0.5906, "step": 6026 }, { "epoch": 0.2502140644217147, "grad_norm": 2.4216480255126953, "learning_rate": 8.781388277136911e-06, "loss": 0.4242, "step": 6027 }, { "epoch": 0.250255579945926, "grad_norm": 2.5599818229675293, "learning_rate": 8.780948380451047e-06, "loss": 0.5502, "step": 6028 }, { "epoch": 0.25029709547013734, "grad_norm": 2.4257771968841553, "learning_rate": 8.780508415404548e-06, "loss": 0.5454, "step": 6029 }, { "epoch": 0.2503386109943487, "grad_norm": 2.17120623588562, "learning_rate": 8.780068382005364e-06, "loss": 0.5173, "step": 6030 }, { "epoch": 0.25038012651856, "grad_norm": 2.607278823852539, "learning_rate": 8.779628280261457e-06, "loss": 0.5494, "step": 6031 }, { "epoch": 0.25042164204277134, "grad_norm": 2.9297451972961426, "learning_rate": 8.779188110180778e-06, "loss": 0.5357, "step": 6032 }, { "epoch": 0.25046315756698273, "grad_norm": 2.7555410861968994, "learning_rate": 8.778747871771293e-06, "loss": 0.5051, "step": 6033 }, { "epoch": 0.25050467309119406, "grad_norm": 2.631852149963379, "learning_rate": 8.778307565040954e-06, "loss": 0.5233, "step": 6034 }, { "epoch": 0.2505461886154054, "grad_norm": 2.560610771179199, "learning_rate": 8.777867189997726e-06, "loss": 0.5093, "step": 6035 }, { "epoch": 0.25058770413961673, "grad_norm": 2.3836381435394287, "learning_rate": 8.777426746649571e-06, "loss": 0.6027, "step": 6036 }, { "epoch": 0.25062921966382806, "grad_norm": 2.5009236335754395, "learning_rate": 8.77698623500445e-06, "loss": 0.5393, "step": 6037 }, { "epoch": 0.2506707351880394, "grad_norm": 2.2596969604492188, "learning_rate": 8.776545655070332e-06, "loss": 0.5241, "step": 6038 }, { "epoch": 0.25071225071225073, "grad_norm": 2.9885191917419434, "learning_rate": 8.776105006855177e-06, "loss": 0.5491, "step": 6039 }, { "epoch": 0.25075376623646206, "grad_norm": 2.284405469894409, "learning_rate": 8.775664290366958e-06, "loss": 0.4081, "step": 6040 }, { "epoch": 0.2507952817606734, "grad_norm": 2.7201929092407227, "learning_rate": 8.77522350561364e-06, "loss": 0.4887, "step": 6041 }, { "epoch": 0.2508367972848847, "grad_norm": 2.119549512863159, "learning_rate": 8.77478265260319e-06, "loss": 0.5845, "step": 6042 }, { "epoch": 0.25087831280909606, "grad_norm": 2.129591226577759, "learning_rate": 8.774341731343584e-06, "loss": 0.4652, "step": 6043 }, { "epoch": 0.2509198283333074, "grad_norm": 2.8173158168792725, "learning_rate": 8.773900741842791e-06, "loss": 0.5248, "step": 6044 }, { "epoch": 0.2509613438575187, "grad_norm": 2.5839884281158447, "learning_rate": 8.773459684108785e-06, "loss": 0.6267, "step": 6045 }, { "epoch": 0.25100285938173006, "grad_norm": 2.3267765045166016, "learning_rate": 8.773018558149541e-06, "loss": 0.6066, "step": 6046 }, { "epoch": 0.2510443749059414, "grad_norm": 2.4529614448547363, "learning_rate": 8.772577363973032e-06, "loss": 0.3649, "step": 6047 }, { "epoch": 0.2510858904301527, "grad_norm": 2.9769113063812256, "learning_rate": 8.772136101587238e-06, "loss": 0.4746, "step": 6048 }, { "epoch": 0.25112740595436406, "grad_norm": 2.5808260440826416, "learning_rate": 8.771694771000137e-06, "loss": 0.4936, "step": 6049 }, { "epoch": 0.2511689214785754, "grad_norm": 2.605663776397705, "learning_rate": 8.771253372219706e-06, "loss": 0.6505, "step": 6050 }, { "epoch": 0.2512104370027867, "grad_norm": 2.4331090450286865, "learning_rate": 8.770811905253929e-06, "loss": 0.4356, "step": 6051 }, { "epoch": 0.25125195252699806, "grad_norm": 3.3152318000793457, "learning_rate": 8.770370370110783e-06, "loss": 0.4178, "step": 6052 }, { "epoch": 0.2512934680512094, "grad_norm": 3.1380844116210938, "learning_rate": 8.769928766798257e-06, "loss": 0.5491, "step": 6053 }, { "epoch": 0.2513349835754207, "grad_norm": 2.603670835494995, "learning_rate": 8.76948709532433e-06, "loss": 0.4807, "step": 6054 }, { "epoch": 0.25137649909963206, "grad_norm": 3.12290620803833, "learning_rate": 8.76904535569699e-06, "loss": 0.5143, "step": 6055 }, { "epoch": 0.2514180146238434, "grad_norm": 2.442692995071411, "learning_rate": 8.768603547924224e-06, "loss": 0.3847, "step": 6056 }, { "epoch": 0.2514595301480547, "grad_norm": 2.646686315536499, "learning_rate": 8.76816167201402e-06, "loss": 0.5403, "step": 6057 }, { "epoch": 0.25150104567226605, "grad_norm": 2.452786684036255, "learning_rate": 8.767719727974367e-06, "loss": 0.4143, "step": 6058 }, { "epoch": 0.2515425611964774, "grad_norm": 2.500349521636963, "learning_rate": 8.767277715813255e-06, "loss": 0.5465, "step": 6059 }, { "epoch": 0.2515840767206887, "grad_norm": 2.337512493133545, "learning_rate": 8.766835635538676e-06, "loss": 0.5999, "step": 6060 }, { "epoch": 0.25162559224490005, "grad_norm": 3.153451681137085, "learning_rate": 8.766393487158622e-06, "loss": 0.5358, "step": 6061 }, { "epoch": 0.2516671077691114, "grad_norm": 2.6534438133239746, "learning_rate": 8.76595127068109e-06, "loss": 0.535, "step": 6062 }, { "epoch": 0.2517086232933227, "grad_norm": 2.8910298347473145, "learning_rate": 8.765508986114073e-06, "loss": 0.5085, "step": 6063 }, { "epoch": 0.2517501388175341, "grad_norm": 2.671527147293091, "learning_rate": 8.765066633465567e-06, "loss": 0.5046, "step": 6064 }, { "epoch": 0.25179165434174544, "grad_norm": 2.5329105854034424, "learning_rate": 8.764624212743573e-06, "loss": 0.4101, "step": 6065 }, { "epoch": 0.2518331698659568, "grad_norm": 2.329359769821167, "learning_rate": 8.764181723956086e-06, "loss": 0.5887, "step": 6066 }, { "epoch": 0.2518746853901681, "grad_norm": 2.7954025268554688, "learning_rate": 8.763739167111109e-06, "loss": 0.564, "step": 6067 }, { "epoch": 0.25191620091437944, "grad_norm": 2.6092119216918945, "learning_rate": 8.763296542216646e-06, "loss": 0.5351, "step": 6068 }, { "epoch": 0.2519577164385908, "grad_norm": 3.139312505722046, "learning_rate": 8.762853849280692e-06, "loss": 0.5458, "step": 6069 }, { "epoch": 0.2519992319628021, "grad_norm": 3.4647226333618164, "learning_rate": 8.76241108831126e-06, "loss": 0.4544, "step": 6070 }, { "epoch": 0.25204074748701344, "grad_norm": 2.546590566635132, "learning_rate": 8.76196825931635e-06, "loss": 0.5353, "step": 6071 }, { "epoch": 0.25208226301122477, "grad_norm": 2.3506357669830322, "learning_rate": 8.761525362303968e-06, "loss": 0.5545, "step": 6072 }, { "epoch": 0.2521237785354361, "grad_norm": 2.387394905090332, "learning_rate": 8.761082397282124e-06, "loss": 0.5289, "step": 6073 }, { "epoch": 0.25216529405964744, "grad_norm": 2.2721071243286133, "learning_rate": 8.760639364258827e-06, "loss": 0.4148, "step": 6074 }, { "epoch": 0.25220680958385877, "grad_norm": 2.768541097640991, "learning_rate": 8.760196263242086e-06, "loss": 0.5206, "step": 6075 }, { "epoch": 0.2522483251080701, "grad_norm": 2.3195436000823975, "learning_rate": 8.759753094239912e-06, "loss": 0.4518, "step": 6076 }, { "epoch": 0.25228984063228144, "grad_norm": 2.220795154571533, "learning_rate": 8.759309857260318e-06, "loss": 0.4525, "step": 6077 }, { "epoch": 0.25233135615649277, "grad_norm": 2.6042728424072266, "learning_rate": 8.75886655231132e-06, "loss": 0.5156, "step": 6078 }, { "epoch": 0.2523728716807041, "grad_norm": 2.4964818954467773, "learning_rate": 8.75842317940093e-06, "loss": 0.5905, "step": 6079 }, { "epoch": 0.25241438720491544, "grad_norm": 2.9830970764160156, "learning_rate": 8.757979738537165e-06, "loss": 0.5652, "step": 6080 }, { "epoch": 0.25245590272912677, "grad_norm": 3.0663981437683105, "learning_rate": 8.757536229728045e-06, "loss": 0.6617, "step": 6081 }, { "epoch": 0.2524974182533381, "grad_norm": 2.351306438446045, "learning_rate": 8.757092652981586e-06, "loss": 0.3946, "step": 6082 }, { "epoch": 0.25253893377754943, "grad_norm": 3.080448865890503, "learning_rate": 8.75664900830581e-06, "loss": 0.5071, "step": 6083 }, { "epoch": 0.25258044930176077, "grad_norm": 2.5140998363494873, "learning_rate": 8.756205295708736e-06, "loss": 0.4161, "step": 6084 }, { "epoch": 0.2526219648259721, "grad_norm": 2.2672853469848633, "learning_rate": 8.755761515198388e-06, "loss": 0.3884, "step": 6085 }, { "epoch": 0.25266348035018343, "grad_norm": 2.1237685680389404, "learning_rate": 8.75531766678279e-06, "loss": 0.4551, "step": 6086 }, { "epoch": 0.25270499587439477, "grad_norm": 2.2294070720672607, "learning_rate": 8.754873750469964e-06, "loss": 0.5339, "step": 6087 }, { "epoch": 0.2527465113986061, "grad_norm": 2.836127281188965, "learning_rate": 8.75442976626794e-06, "loss": 0.4987, "step": 6088 }, { "epoch": 0.25278802692281743, "grad_norm": 2.6867547035217285, "learning_rate": 8.753985714184742e-06, "loss": 0.525, "step": 6089 }, { "epoch": 0.25282954244702877, "grad_norm": 3.2885701656341553, "learning_rate": 8.753541594228402e-06, "loss": 0.4933, "step": 6090 }, { "epoch": 0.2528710579712401, "grad_norm": 2.6824567317962646, "learning_rate": 8.753097406406948e-06, "loss": 0.4058, "step": 6091 }, { "epoch": 0.25291257349545143, "grad_norm": 3.0099384784698486, "learning_rate": 8.752653150728412e-06, "loss": 0.4198, "step": 6092 }, { "epoch": 0.25295408901966276, "grad_norm": 2.64376163482666, "learning_rate": 8.752208827200824e-06, "loss": 0.6518, "step": 6093 }, { "epoch": 0.2529956045438741, "grad_norm": 2.6075711250305176, "learning_rate": 8.751764435832219e-06, "loss": 0.5992, "step": 6094 }, { "epoch": 0.2530371200680855, "grad_norm": 2.3994665145874023, "learning_rate": 8.75131997663063e-06, "loss": 0.4879, "step": 6095 }, { "epoch": 0.2530786355922968, "grad_norm": 2.431636095046997, "learning_rate": 8.750875449604097e-06, "loss": 0.5252, "step": 6096 }, { "epoch": 0.25312015111650815, "grad_norm": 2.489075183868408, "learning_rate": 8.750430854760655e-06, "loss": 0.4686, "step": 6097 }, { "epoch": 0.2531616666407195, "grad_norm": 2.329878807067871, "learning_rate": 8.74998619210834e-06, "loss": 0.3503, "step": 6098 }, { "epoch": 0.2532031821649308, "grad_norm": 2.913264036178589, "learning_rate": 8.749541461655196e-06, "loss": 0.3864, "step": 6099 }, { "epoch": 0.25324469768914215, "grad_norm": 2.830030679702759, "learning_rate": 8.74909666340926e-06, "loss": 0.5091, "step": 6100 }, { "epoch": 0.2532862132133535, "grad_norm": 2.5483174324035645, "learning_rate": 8.748651797378577e-06, "loss": 0.4573, "step": 6101 }, { "epoch": 0.2533277287375648, "grad_norm": 2.186554193496704, "learning_rate": 8.748206863571188e-06, "loss": 0.3811, "step": 6102 }, { "epoch": 0.25336924426177615, "grad_norm": 2.8261280059814453, "learning_rate": 8.747761861995138e-06, "loss": 0.6135, "step": 6103 }, { "epoch": 0.2534107597859875, "grad_norm": 2.134382963180542, "learning_rate": 8.747316792658476e-06, "loss": 0.4671, "step": 6104 }, { "epoch": 0.2534522753101988, "grad_norm": 2.2374775409698486, "learning_rate": 8.746871655569245e-06, "loss": 0.5338, "step": 6105 }, { "epoch": 0.25349379083441015, "grad_norm": 2.4490461349487305, "learning_rate": 8.746426450735492e-06, "loss": 0.4961, "step": 6106 }, { "epoch": 0.2535353063586215, "grad_norm": 2.6269679069519043, "learning_rate": 8.745981178165271e-06, "loss": 0.4962, "step": 6107 }, { "epoch": 0.2535768218828328, "grad_norm": 2.5325543880462646, "learning_rate": 8.745535837866631e-06, "loss": 0.5615, "step": 6108 }, { "epoch": 0.25361833740704415, "grad_norm": 2.61061954498291, "learning_rate": 8.745090429847623e-06, "loss": 0.6487, "step": 6109 }, { "epoch": 0.2536598529312555, "grad_norm": 2.6959452629089355, "learning_rate": 8.744644954116302e-06, "loss": 0.4295, "step": 6110 }, { "epoch": 0.2537013684554668, "grad_norm": 2.4315695762634277, "learning_rate": 8.744199410680718e-06, "loss": 0.5179, "step": 6111 }, { "epoch": 0.25374288397967815, "grad_norm": 3.1465954780578613, "learning_rate": 8.743753799548931e-06, "loss": 0.4953, "step": 6112 }, { "epoch": 0.2537843995038895, "grad_norm": 2.5251376628875732, "learning_rate": 8.743308120728997e-06, "loss": 0.595, "step": 6113 }, { "epoch": 0.2538259150281008, "grad_norm": 2.7967066764831543, "learning_rate": 8.742862374228971e-06, "loss": 0.6409, "step": 6114 }, { "epoch": 0.25386743055231215, "grad_norm": 2.345761775970459, "learning_rate": 8.742416560056916e-06, "loss": 0.4466, "step": 6115 }, { "epoch": 0.2539089460765235, "grad_norm": 2.9506499767303467, "learning_rate": 8.741970678220892e-06, "loss": 0.4132, "step": 6116 }, { "epoch": 0.2539504616007348, "grad_norm": 3.0347962379455566, "learning_rate": 8.741524728728958e-06, "loss": 0.5979, "step": 6117 }, { "epoch": 0.25399197712494614, "grad_norm": 2.780982732772827, "learning_rate": 8.741078711589179e-06, "loss": 0.3517, "step": 6118 }, { "epoch": 0.2540334926491575, "grad_norm": 2.637558698654175, "learning_rate": 8.740632626809619e-06, "loss": 0.5709, "step": 6119 }, { "epoch": 0.2540750081733688, "grad_norm": 2.5891809463500977, "learning_rate": 8.740186474398342e-06, "loss": 0.5492, "step": 6120 }, { "epoch": 0.25411652369758014, "grad_norm": 2.6699259281158447, "learning_rate": 8.739740254363416e-06, "loss": 0.4697, "step": 6121 }, { "epoch": 0.2541580392217915, "grad_norm": 2.5157737731933594, "learning_rate": 8.739293966712908e-06, "loss": 0.5134, "step": 6122 }, { "epoch": 0.2541995547460028, "grad_norm": 2.5350453853607178, "learning_rate": 8.738847611454887e-06, "loss": 0.5041, "step": 6123 }, { "epoch": 0.25424107027021414, "grad_norm": 2.467172145843506, "learning_rate": 8.738401188597426e-06, "loss": 0.5056, "step": 6124 }, { "epoch": 0.2542825857944255, "grad_norm": 2.7869229316711426, "learning_rate": 8.737954698148591e-06, "loss": 0.606, "step": 6125 }, { "epoch": 0.25432410131863686, "grad_norm": 2.5161261558532715, "learning_rate": 8.73750814011646e-06, "loss": 0.4345, "step": 6126 }, { "epoch": 0.2543656168428482, "grad_norm": 2.430281400680542, "learning_rate": 8.737061514509101e-06, "loss": 0.5754, "step": 6127 }, { "epoch": 0.25440713236705953, "grad_norm": 2.654356002807617, "learning_rate": 8.736614821334596e-06, "loss": 0.5613, "step": 6128 }, { "epoch": 0.25444864789127086, "grad_norm": 2.182718276977539, "learning_rate": 8.736168060601017e-06, "loss": 0.536, "step": 6129 }, { "epoch": 0.2544901634154822, "grad_norm": 2.199392557144165, "learning_rate": 8.735721232316444e-06, "loss": 0.4784, "step": 6130 }, { "epoch": 0.25453167893969353, "grad_norm": 3.08396577835083, "learning_rate": 8.73527433648895e-06, "loss": 0.5155, "step": 6131 }, { "epoch": 0.25457319446390486, "grad_norm": 3.956906318664551, "learning_rate": 8.734827373126625e-06, "loss": 0.6527, "step": 6132 }, { "epoch": 0.2546147099881162, "grad_norm": 2.444489002227783, "learning_rate": 8.734380342237541e-06, "loss": 0.5869, "step": 6133 }, { "epoch": 0.2546562255123275, "grad_norm": 2.549943447113037, "learning_rate": 8.733933243829784e-06, "loss": 0.4362, "step": 6134 }, { "epoch": 0.25469774103653886, "grad_norm": 3.2833497524261475, "learning_rate": 8.73348607791144e-06, "loss": 0.5728, "step": 6135 }, { "epoch": 0.2547392565607502, "grad_norm": 2.0936098098754883, "learning_rate": 8.73303884449059e-06, "loss": 0.5514, "step": 6136 }, { "epoch": 0.2547807720849615, "grad_norm": 2.0887770652770996, "learning_rate": 8.732591543575322e-06, "loss": 0.5844, "step": 6137 }, { "epoch": 0.25482228760917286, "grad_norm": 2.680081367492676, "learning_rate": 8.732144175173723e-06, "loss": 0.4574, "step": 6138 }, { "epoch": 0.2548638031333842, "grad_norm": 2.7962520122528076, "learning_rate": 8.731696739293882e-06, "loss": 0.547, "step": 6139 }, { "epoch": 0.2549053186575955, "grad_norm": 2.5167150497436523, "learning_rate": 8.731249235943886e-06, "loss": 0.5148, "step": 6140 }, { "epoch": 0.25494683418180686, "grad_norm": 2.2881393432617188, "learning_rate": 8.730801665131831e-06, "loss": 0.4949, "step": 6141 }, { "epoch": 0.2549883497060182, "grad_norm": 2.2764878273010254, "learning_rate": 8.730354026865806e-06, "loss": 0.4474, "step": 6142 }, { "epoch": 0.2550298652302295, "grad_norm": 2.5400869846343994, "learning_rate": 8.729906321153905e-06, "loss": 0.4399, "step": 6143 }, { "epoch": 0.25507138075444086, "grad_norm": 2.9270882606506348, "learning_rate": 8.729458548004223e-06, "loss": 0.444, "step": 6144 }, { "epoch": 0.2551128962786522, "grad_norm": 2.2789902687072754, "learning_rate": 8.729010707424855e-06, "loss": 0.4929, "step": 6145 }, { "epoch": 0.2551544118028635, "grad_norm": 2.320904493331909, "learning_rate": 8.7285627994239e-06, "loss": 0.6829, "step": 6146 }, { "epoch": 0.25519592732707486, "grad_norm": 2.610520124435425, "learning_rate": 8.728114824009452e-06, "loss": 0.4986, "step": 6147 }, { "epoch": 0.2552374428512862, "grad_norm": 3.0544393062591553, "learning_rate": 8.727666781189615e-06, "loss": 0.4829, "step": 6148 }, { "epoch": 0.2552789583754975, "grad_norm": 2.7850749492645264, "learning_rate": 8.727218670972488e-06, "loss": 0.6194, "step": 6149 }, { "epoch": 0.25532047389970886, "grad_norm": 2.2681944370269775, "learning_rate": 8.726770493366175e-06, "loss": 0.4231, "step": 6150 }, { "epoch": 0.2553619894239202, "grad_norm": 2.412349224090576, "learning_rate": 8.726322248378775e-06, "loss": 0.4682, "step": 6151 }, { "epoch": 0.2554035049481315, "grad_norm": 2.31107497215271, "learning_rate": 8.725873936018393e-06, "loss": 0.5073, "step": 6152 }, { "epoch": 0.25544502047234285, "grad_norm": 2.4460978507995605, "learning_rate": 8.72542555629314e-06, "loss": 0.5652, "step": 6153 }, { "epoch": 0.2554865359965542, "grad_norm": 2.469722032546997, "learning_rate": 8.724977109211118e-06, "loss": 0.5769, "step": 6154 }, { "epoch": 0.2555280515207655, "grad_norm": 2.890490770339966, "learning_rate": 8.724528594780436e-06, "loss": 0.6551, "step": 6155 }, { "epoch": 0.2555695670449769, "grad_norm": 2.7637417316436768, "learning_rate": 8.724080013009203e-06, "loss": 0.5634, "step": 6156 }, { "epoch": 0.25561108256918824, "grad_norm": 2.520993947982788, "learning_rate": 8.723631363905531e-06, "loss": 0.466, "step": 6157 }, { "epoch": 0.2556525980933996, "grad_norm": 2.2525429725646973, "learning_rate": 8.72318264747753e-06, "loss": 0.6093, "step": 6158 }, { "epoch": 0.2556941136176109, "grad_norm": 2.4436841011047363, "learning_rate": 8.722733863733314e-06, "loss": 0.5566, "step": 6159 }, { "epoch": 0.25573562914182224, "grad_norm": 2.2309274673461914, "learning_rate": 8.722285012680995e-06, "loss": 0.3985, "step": 6160 }, { "epoch": 0.2557771446660336, "grad_norm": 2.385516405105591, "learning_rate": 8.721836094328693e-06, "loss": 0.4184, "step": 6161 }, { "epoch": 0.2558186601902449, "grad_norm": 2.137760877609253, "learning_rate": 8.72138710868452e-06, "loss": 0.5433, "step": 6162 }, { "epoch": 0.25586017571445624, "grad_norm": 2.5381577014923096, "learning_rate": 8.720938055756596e-06, "loss": 0.5877, "step": 6163 }, { "epoch": 0.2559016912386676, "grad_norm": 2.6998603343963623, "learning_rate": 8.720488935553038e-06, "loss": 0.6037, "step": 6164 }, { "epoch": 0.2559432067628789, "grad_norm": 3.168353319168091, "learning_rate": 8.72003974808197e-06, "loss": 0.6038, "step": 6165 }, { "epoch": 0.25598472228709024, "grad_norm": 2.5858066082000732, "learning_rate": 8.719590493351509e-06, "loss": 0.5578, "step": 6166 }, { "epoch": 0.25602623781130157, "grad_norm": 2.2197866439819336, "learning_rate": 8.719141171369781e-06, "loss": 0.4082, "step": 6167 }, { "epoch": 0.2560677533355129, "grad_norm": 2.9386284351348877, "learning_rate": 8.718691782144908e-06, "loss": 0.3719, "step": 6168 }, { "epoch": 0.25610926885972424, "grad_norm": 2.5472512245178223, "learning_rate": 8.718242325685017e-06, "loss": 0.5604, "step": 6169 }, { "epoch": 0.25615078438393557, "grad_norm": 2.442491054534912, "learning_rate": 8.717792801998231e-06, "loss": 0.6625, "step": 6170 }, { "epoch": 0.2561922999081469, "grad_norm": 3.3604419231414795, "learning_rate": 8.717343211092682e-06, "loss": 0.5392, "step": 6171 }, { "epoch": 0.25623381543235824, "grad_norm": 2.2707948684692383, "learning_rate": 8.716893552976493e-06, "loss": 0.4615, "step": 6172 }, { "epoch": 0.25627533095656957, "grad_norm": 2.7242631912231445, "learning_rate": 8.7164438276578e-06, "loss": 0.658, "step": 6173 }, { "epoch": 0.2563168464807809, "grad_norm": 2.4650306701660156, "learning_rate": 8.71599403514473e-06, "loss": 0.5693, "step": 6174 }, { "epoch": 0.25635836200499224, "grad_norm": 2.6697378158569336, "learning_rate": 8.715544175445418e-06, "loss": 0.553, "step": 6175 }, { "epoch": 0.25639987752920357, "grad_norm": 2.6340811252593994, "learning_rate": 8.715094248567998e-06, "loss": 0.4673, "step": 6176 }, { "epoch": 0.2564413930534149, "grad_norm": 2.603724241256714, "learning_rate": 8.714644254520599e-06, "loss": 0.4151, "step": 6177 }, { "epoch": 0.25648290857762623, "grad_norm": 2.3995449542999268, "learning_rate": 8.714194193311362e-06, "loss": 0.461, "step": 6178 }, { "epoch": 0.25652442410183757, "grad_norm": 2.146230697631836, "learning_rate": 8.713744064948426e-06, "loss": 0.4349, "step": 6179 }, { "epoch": 0.2565659396260489, "grad_norm": 2.4112367630004883, "learning_rate": 8.713293869439924e-06, "loss": 0.5118, "step": 6180 }, { "epoch": 0.25660745515026023, "grad_norm": 2.172804594039917, "learning_rate": 8.712843606794002e-06, "loss": 0.4846, "step": 6181 }, { "epoch": 0.25664897067447157, "grad_norm": 2.3504106998443604, "learning_rate": 8.712393277018794e-06, "loss": 0.4384, "step": 6182 }, { "epoch": 0.2566904861986829, "grad_norm": 2.456861734390259, "learning_rate": 8.711942880122449e-06, "loss": 0.4403, "step": 6183 }, { "epoch": 0.25673200172289423, "grad_norm": 2.3325488567352295, "learning_rate": 8.711492416113103e-06, "loss": 0.5153, "step": 6184 }, { "epoch": 0.25677351724710556, "grad_norm": 2.5267794132232666, "learning_rate": 8.711041884998906e-06, "loss": 0.6124, "step": 6185 }, { "epoch": 0.2568150327713169, "grad_norm": 2.4006104469299316, "learning_rate": 8.710591286788003e-06, "loss": 0.5586, "step": 6186 }, { "epoch": 0.2568565482955283, "grad_norm": 2.9931392669677734, "learning_rate": 8.710140621488539e-06, "loss": 0.6285, "step": 6187 }, { "epoch": 0.2568980638197396, "grad_norm": 3.3088784217834473, "learning_rate": 8.709689889108664e-06, "loss": 0.496, "step": 6188 }, { "epoch": 0.25693957934395095, "grad_norm": 2.5451414585113525, "learning_rate": 8.709239089656524e-06, "loss": 0.5434, "step": 6189 }, { "epoch": 0.2569810948681623, "grad_norm": 2.98370361328125, "learning_rate": 8.708788223140277e-06, "loss": 0.5825, "step": 6190 }, { "epoch": 0.2570226103923736, "grad_norm": 2.458237648010254, "learning_rate": 8.708337289568065e-06, "loss": 0.5648, "step": 6191 }, { "epoch": 0.25706412591658495, "grad_norm": 2.1255040168762207, "learning_rate": 8.707886288948048e-06, "loss": 0.5113, "step": 6192 }, { "epoch": 0.2571056414407963, "grad_norm": 2.3566370010375977, "learning_rate": 8.707435221288379e-06, "loss": 0.4563, "step": 6193 }, { "epoch": 0.2571471569650076, "grad_norm": 2.3292877674102783, "learning_rate": 8.706984086597212e-06, "loss": 0.5487, "step": 6194 }, { "epoch": 0.25718867248921895, "grad_norm": 1.9639694690704346, "learning_rate": 8.706532884882704e-06, "loss": 0.3904, "step": 6195 }, { "epoch": 0.2572301880134303, "grad_norm": 3.0136890411376953, "learning_rate": 8.706081616153012e-06, "loss": 0.5843, "step": 6196 }, { "epoch": 0.2572717035376416, "grad_norm": 1.9895182847976685, "learning_rate": 8.705630280416299e-06, "loss": 0.5286, "step": 6197 }, { "epoch": 0.25731321906185295, "grad_norm": 2.593061923980713, "learning_rate": 8.70517887768072e-06, "loss": 0.497, "step": 6198 }, { "epoch": 0.2573547345860643, "grad_norm": 2.196587562561035, "learning_rate": 8.704727407954439e-06, "loss": 0.4816, "step": 6199 }, { "epoch": 0.2573962501102756, "grad_norm": 2.0851891040802, "learning_rate": 8.70427587124562e-06, "loss": 0.46, "step": 6200 }, { "epoch": 0.25743776563448695, "grad_norm": 2.4157636165618896, "learning_rate": 8.703824267562424e-06, "loss": 0.4873, "step": 6201 }, { "epoch": 0.2574792811586983, "grad_norm": 3.078601598739624, "learning_rate": 8.70337259691302e-06, "loss": 0.4181, "step": 6202 }, { "epoch": 0.2575207966829096, "grad_norm": 2.284865617752075, "learning_rate": 8.702920859305571e-06, "loss": 0.5417, "step": 6203 }, { "epoch": 0.25756231220712095, "grad_norm": 2.666019916534424, "learning_rate": 8.702469054748244e-06, "loss": 0.5158, "step": 6204 }, { "epoch": 0.2576038277313323, "grad_norm": 2.549227237701416, "learning_rate": 8.702017183249212e-06, "loss": 0.4839, "step": 6205 }, { "epoch": 0.2576453432555436, "grad_norm": 3.543097496032715, "learning_rate": 8.70156524481664e-06, "loss": 0.73, "step": 6206 }, { "epoch": 0.25768685877975495, "grad_norm": 2.43125319480896, "learning_rate": 8.701113239458702e-06, "loss": 0.5068, "step": 6207 }, { "epoch": 0.2577283743039663, "grad_norm": 2.293837308883667, "learning_rate": 8.700661167183571e-06, "loss": 0.4999, "step": 6208 }, { "epoch": 0.2577698898281776, "grad_norm": 2.608149290084839, "learning_rate": 8.700209027999418e-06, "loss": 0.3953, "step": 6209 }, { "epoch": 0.25781140535238894, "grad_norm": 2.3171017169952393, "learning_rate": 8.69975682191442e-06, "loss": 0.5616, "step": 6210 }, { "epoch": 0.2578529208766003, "grad_norm": 2.7650561332702637, "learning_rate": 8.699304548936751e-06, "loss": 0.6329, "step": 6211 }, { "epoch": 0.2578944364008116, "grad_norm": 2.7633121013641357, "learning_rate": 8.69885220907459e-06, "loss": 0.3903, "step": 6212 }, { "epoch": 0.25793595192502294, "grad_norm": 2.247821569442749, "learning_rate": 8.698399802336117e-06, "loss": 0.5726, "step": 6213 }, { "epoch": 0.2579774674492343, "grad_norm": 2.6829488277435303, "learning_rate": 8.697947328729508e-06, "loss": 0.6242, "step": 6214 }, { "epoch": 0.2580189829734456, "grad_norm": 2.3260467052459717, "learning_rate": 8.697494788262946e-06, "loss": 0.4614, "step": 6215 }, { "epoch": 0.25806049849765694, "grad_norm": 3.279664993286133, "learning_rate": 8.697042180944613e-06, "loss": 0.4991, "step": 6216 }, { "epoch": 0.2581020140218683, "grad_norm": 2.718886137008667, "learning_rate": 8.69658950678269e-06, "loss": 0.5581, "step": 6217 }, { "epoch": 0.25814352954607966, "grad_norm": 2.656707763671875, "learning_rate": 8.696136765785364e-06, "loss": 0.5275, "step": 6218 }, { "epoch": 0.258185045070291, "grad_norm": 2.749995470046997, "learning_rate": 8.69568395796082e-06, "loss": 0.6182, "step": 6219 }, { "epoch": 0.25822656059450233, "grad_norm": 2.8850367069244385, "learning_rate": 8.695231083317246e-06, "loss": 0.5614, "step": 6220 }, { "epoch": 0.25826807611871366, "grad_norm": 2.3553526401519775, "learning_rate": 8.694778141862828e-06, "loss": 0.4857, "step": 6221 }, { "epoch": 0.258309591642925, "grad_norm": 2.4156925678253174, "learning_rate": 8.694325133605755e-06, "loss": 0.4494, "step": 6222 }, { "epoch": 0.25835110716713633, "grad_norm": 2.5285685062408447, "learning_rate": 8.69387205855422e-06, "loss": 0.4899, "step": 6223 }, { "epoch": 0.25839262269134766, "grad_norm": 2.5760602951049805, "learning_rate": 8.693418916716411e-06, "loss": 0.4721, "step": 6224 }, { "epoch": 0.258434138215559, "grad_norm": 3.5046095848083496, "learning_rate": 8.692965708100527e-06, "loss": 0.5109, "step": 6225 }, { "epoch": 0.25847565373977033, "grad_norm": 2.221564292907715, "learning_rate": 8.692512432714758e-06, "loss": 0.5133, "step": 6226 }, { "epoch": 0.25851716926398166, "grad_norm": 2.4271490573883057, "learning_rate": 8.6920590905673e-06, "loss": 0.5242, "step": 6227 }, { "epoch": 0.258558684788193, "grad_norm": 2.642791271209717, "learning_rate": 8.691605681666348e-06, "loss": 0.5729, "step": 6228 }, { "epoch": 0.2586002003124043, "grad_norm": 1.915185809135437, "learning_rate": 8.691152206020101e-06, "loss": 0.4129, "step": 6229 }, { "epoch": 0.25864171583661566, "grad_norm": 2.926053762435913, "learning_rate": 8.690698663636759e-06, "loss": 0.4315, "step": 6230 }, { "epoch": 0.258683231360827, "grad_norm": 2.3195385932922363, "learning_rate": 8.690245054524522e-06, "loss": 0.5917, "step": 6231 }, { "epoch": 0.2587247468850383, "grad_norm": 2.588555335998535, "learning_rate": 8.68979137869159e-06, "loss": 0.4208, "step": 6232 }, { "epoch": 0.25876626240924966, "grad_norm": 2.7405636310577393, "learning_rate": 8.689337636146166e-06, "loss": 0.5862, "step": 6233 }, { "epoch": 0.258807777933461, "grad_norm": 2.7656381130218506, "learning_rate": 8.688883826896458e-06, "loss": 0.5436, "step": 6234 }, { "epoch": 0.2588492934576723, "grad_norm": 2.8336403369903564, "learning_rate": 8.688429950950662e-06, "loss": 0.3738, "step": 6235 }, { "epoch": 0.25889080898188366, "grad_norm": 2.2438485622406006, "learning_rate": 8.687976008316991e-06, "loss": 0.3798, "step": 6236 }, { "epoch": 0.258932324506095, "grad_norm": 2.492370843887329, "learning_rate": 8.687521999003652e-06, "loss": 0.7798, "step": 6237 }, { "epoch": 0.2589738400303063, "grad_norm": 2.9802982807159424, "learning_rate": 8.68706792301885e-06, "loss": 0.5476, "step": 6238 }, { "epoch": 0.25901535555451766, "grad_norm": 2.754323720932007, "learning_rate": 8.6866137803708e-06, "loss": 0.4095, "step": 6239 }, { "epoch": 0.259056871078729, "grad_norm": 2.9543492794036865, "learning_rate": 8.68615957106771e-06, "loss": 0.5101, "step": 6240 }, { "epoch": 0.2590983866029403, "grad_norm": 2.671668529510498, "learning_rate": 8.68570529511779e-06, "loss": 0.4825, "step": 6241 }, { "epoch": 0.25913990212715166, "grad_norm": 2.238574981689453, "learning_rate": 8.685250952529259e-06, "loss": 0.51, "step": 6242 }, { "epoch": 0.259181417651363, "grad_norm": 2.4865667819976807, "learning_rate": 8.684796543310325e-06, "loss": 0.4443, "step": 6243 }, { "epoch": 0.2592229331755743, "grad_norm": 2.372274398803711, "learning_rate": 8.684342067469209e-06, "loss": 0.547, "step": 6244 }, { "epoch": 0.25926444869978565, "grad_norm": 2.6519932746887207, "learning_rate": 8.683887525014127e-06, "loss": 0.4363, "step": 6245 }, { "epoch": 0.259305964223997, "grad_norm": 2.5370423793792725, "learning_rate": 8.683432915953299e-06, "loss": 0.5353, "step": 6246 }, { "epoch": 0.2593474797482083, "grad_norm": 2.1028614044189453, "learning_rate": 8.682978240294939e-06, "loss": 0.6826, "step": 6247 }, { "epoch": 0.25938899527241965, "grad_norm": 2.329322338104248, "learning_rate": 8.68252349804727e-06, "loss": 0.5164, "step": 6248 }, { "epoch": 0.25943051079663104, "grad_norm": 3.0932774543762207, "learning_rate": 8.682068689218517e-06, "loss": 0.5234, "step": 6249 }, { "epoch": 0.2594720263208424, "grad_norm": 2.202852725982666, "learning_rate": 8.6816138138169e-06, "loss": 0.698, "step": 6250 }, { "epoch": 0.2595135418450537, "grad_norm": 2.3598506450653076, "learning_rate": 8.681158871850646e-06, "loss": 0.5467, "step": 6251 }, { "epoch": 0.25955505736926504, "grad_norm": 2.6271204948425293, "learning_rate": 8.680703863327975e-06, "loss": 0.5224, "step": 6252 }, { "epoch": 0.2595965728934764, "grad_norm": 2.5185155868530273, "learning_rate": 8.68024878825712e-06, "loss": 0.5949, "step": 6253 }, { "epoch": 0.2596380884176877, "grad_norm": 2.656461477279663, "learning_rate": 8.679793646646306e-06, "loss": 0.5809, "step": 6254 }, { "epoch": 0.25967960394189904, "grad_norm": 3.382244348526001, "learning_rate": 8.679338438503762e-06, "loss": 0.7107, "step": 6255 }, { "epoch": 0.2597211194661104, "grad_norm": 2.356672763824463, "learning_rate": 8.678883163837716e-06, "loss": 0.6272, "step": 6256 }, { "epoch": 0.2597626349903217, "grad_norm": 2.1063809394836426, "learning_rate": 8.678427822656405e-06, "loss": 0.4863, "step": 6257 }, { "epoch": 0.25980415051453304, "grad_norm": 2.5028858184814453, "learning_rate": 8.677972414968059e-06, "loss": 0.4664, "step": 6258 }, { "epoch": 0.25984566603874437, "grad_norm": 2.920494318008423, "learning_rate": 8.67751694078091e-06, "loss": 0.3864, "step": 6259 }, { "epoch": 0.2598871815629557, "grad_norm": 2.4889485836029053, "learning_rate": 8.677061400103196e-06, "loss": 0.4708, "step": 6260 }, { "epoch": 0.25992869708716704, "grad_norm": 3.048617362976074, "learning_rate": 8.67660579294315e-06, "loss": 0.4106, "step": 6261 }, { "epoch": 0.25997021261137837, "grad_norm": 3.075876235961914, "learning_rate": 8.676150119309012e-06, "loss": 0.5104, "step": 6262 }, { "epoch": 0.2600117281355897, "grad_norm": 3.1336286067962646, "learning_rate": 8.675694379209021e-06, "loss": 0.5587, "step": 6263 }, { "epoch": 0.26005324365980104, "grad_norm": 2.492205858230591, "learning_rate": 8.675238572651417e-06, "loss": 0.6409, "step": 6264 }, { "epoch": 0.26009475918401237, "grad_norm": 2.2575433254241943, "learning_rate": 8.67478269964444e-06, "loss": 0.5164, "step": 6265 }, { "epoch": 0.2601362747082237, "grad_norm": 2.5306313037872314, "learning_rate": 8.674326760196333e-06, "loss": 0.5722, "step": 6266 }, { "epoch": 0.26017779023243504, "grad_norm": 2.8481712341308594, "learning_rate": 8.673870754315336e-06, "loss": 0.5231, "step": 6267 }, { "epoch": 0.26021930575664637, "grad_norm": 2.4258322715759277, "learning_rate": 8.6734146820097e-06, "loss": 0.7057, "step": 6268 }, { "epoch": 0.2602608212808577, "grad_norm": 2.35839581489563, "learning_rate": 8.672958543287666e-06, "loss": 0.4765, "step": 6269 }, { "epoch": 0.26030233680506903, "grad_norm": 2.389660596847534, "learning_rate": 8.672502338157485e-06, "loss": 0.4878, "step": 6270 }, { "epoch": 0.26034385232928037, "grad_norm": 2.2907350063323975, "learning_rate": 8.672046066627402e-06, "loss": 0.503, "step": 6271 }, { "epoch": 0.2603853678534917, "grad_norm": 2.4436099529266357, "learning_rate": 8.671589728705667e-06, "loss": 0.4336, "step": 6272 }, { "epoch": 0.26042688337770303, "grad_norm": 1.9686071872711182, "learning_rate": 8.671133324400533e-06, "loss": 0.5373, "step": 6273 }, { "epoch": 0.26046839890191437, "grad_norm": 2.581324338912964, "learning_rate": 8.67067685372025e-06, "loss": 0.5435, "step": 6274 }, { "epoch": 0.2605099144261257, "grad_norm": 2.5625877380371094, "learning_rate": 8.670220316673071e-06, "loss": 0.5987, "step": 6275 }, { "epoch": 0.26055142995033703, "grad_norm": 2.34903883934021, "learning_rate": 8.66976371326725e-06, "loss": 0.4992, "step": 6276 }, { "epoch": 0.26059294547454837, "grad_norm": 2.442351818084717, "learning_rate": 8.669307043511043e-06, "loss": 0.501, "step": 6277 }, { "epoch": 0.2606344609987597, "grad_norm": 3.0216667652130127, "learning_rate": 8.668850307412708e-06, "loss": 0.6588, "step": 6278 }, { "epoch": 0.26067597652297103, "grad_norm": 2.380901336669922, "learning_rate": 8.668393504980503e-06, "loss": 0.5136, "step": 6279 }, { "epoch": 0.2607174920471824, "grad_norm": 2.1425135135650635, "learning_rate": 8.667936636222685e-06, "loss": 0.4462, "step": 6280 }, { "epoch": 0.26075900757139375, "grad_norm": 2.5460944175720215, "learning_rate": 8.667479701147515e-06, "loss": 0.5372, "step": 6281 }, { "epoch": 0.2608005230956051, "grad_norm": 2.152118444442749, "learning_rate": 8.667022699763256e-06, "loss": 0.4805, "step": 6282 }, { "epoch": 0.2608420386198164, "grad_norm": 2.4239444732666016, "learning_rate": 8.66656563207817e-06, "loss": 0.4986, "step": 6283 }, { "epoch": 0.26088355414402775, "grad_norm": 2.6998002529144287, "learning_rate": 8.666108498100518e-06, "loss": 0.4494, "step": 6284 }, { "epoch": 0.2609250696682391, "grad_norm": 2.5492875576019287, "learning_rate": 8.665651297838572e-06, "loss": 0.5075, "step": 6285 }, { "epoch": 0.2609665851924504, "grad_norm": 2.4359586238861084, "learning_rate": 8.66519403130059e-06, "loss": 0.4802, "step": 6286 }, { "epoch": 0.26100810071666175, "grad_norm": 2.6561641693115234, "learning_rate": 8.664736698494844e-06, "loss": 0.5728, "step": 6287 }, { "epoch": 0.2610496162408731, "grad_norm": 2.7225542068481445, "learning_rate": 8.664279299429603e-06, "loss": 0.5995, "step": 6288 }, { "epoch": 0.2610911317650844, "grad_norm": 3.245562791824341, "learning_rate": 8.663821834113136e-06, "loss": 0.509, "step": 6289 }, { "epoch": 0.26113264728929575, "grad_norm": 2.517822027206421, "learning_rate": 8.663364302553713e-06, "loss": 0.5736, "step": 6290 }, { "epoch": 0.2611741628135071, "grad_norm": 2.394096851348877, "learning_rate": 8.662906704759611e-06, "loss": 0.5213, "step": 6291 }, { "epoch": 0.2612156783377184, "grad_norm": 2.3977696895599365, "learning_rate": 8.662449040739098e-06, "loss": 0.5102, "step": 6292 }, { "epoch": 0.26125719386192975, "grad_norm": 2.2975032329559326, "learning_rate": 8.66199131050045e-06, "loss": 0.443, "step": 6293 }, { "epoch": 0.2612987093861411, "grad_norm": 2.8407859802246094, "learning_rate": 8.661533514051945e-06, "loss": 0.5485, "step": 6294 }, { "epoch": 0.2613402249103524, "grad_norm": 2.46732234954834, "learning_rate": 8.661075651401856e-06, "loss": 0.5353, "step": 6295 }, { "epoch": 0.26138174043456375, "grad_norm": 2.2081665992736816, "learning_rate": 8.660617722558467e-06, "loss": 0.481, "step": 6296 }, { "epoch": 0.2614232559587751, "grad_norm": 2.317106008529663, "learning_rate": 8.660159727530053e-06, "loss": 0.5374, "step": 6297 }, { "epoch": 0.2614647714829864, "grad_norm": 2.9343619346618652, "learning_rate": 8.659701666324897e-06, "loss": 0.4694, "step": 6298 }, { "epoch": 0.26150628700719775, "grad_norm": 3.0764687061309814, "learning_rate": 8.659243538951278e-06, "loss": 0.5203, "step": 6299 }, { "epoch": 0.2615478025314091, "grad_norm": 2.422616481781006, "learning_rate": 8.658785345417484e-06, "loss": 0.4596, "step": 6300 }, { "epoch": 0.2615893180556204, "grad_norm": 2.2935950756073, "learning_rate": 8.658327085731795e-06, "loss": 0.4993, "step": 6301 }, { "epoch": 0.26163083357983175, "grad_norm": 2.2160754203796387, "learning_rate": 8.657868759902498e-06, "loss": 0.5166, "step": 6302 }, { "epoch": 0.2616723491040431, "grad_norm": 2.394602060317993, "learning_rate": 8.65741036793788e-06, "loss": 0.479, "step": 6303 }, { "epoch": 0.2617138646282544, "grad_norm": 3.143716812133789, "learning_rate": 8.656951909846227e-06, "loss": 0.4801, "step": 6304 }, { "epoch": 0.26175538015246574, "grad_norm": 2.852038621902466, "learning_rate": 8.65649338563583e-06, "loss": 0.6671, "step": 6305 }, { "epoch": 0.2617968956766771, "grad_norm": 2.2754435539245605, "learning_rate": 8.656034795314978e-06, "loss": 0.4392, "step": 6306 }, { "epoch": 0.2618384112008884, "grad_norm": 2.175259590148926, "learning_rate": 8.655576138891965e-06, "loss": 0.5035, "step": 6307 }, { "epoch": 0.26187992672509974, "grad_norm": 2.522962808609009, "learning_rate": 8.65511741637508e-06, "loss": 0.5385, "step": 6308 }, { "epoch": 0.2619214422493111, "grad_norm": 2.9249343872070312, "learning_rate": 8.65465862777262e-06, "loss": 0.468, "step": 6309 }, { "epoch": 0.2619629577735224, "grad_norm": 2.8619656562805176, "learning_rate": 8.654199773092878e-06, "loss": 0.4837, "step": 6310 }, { "epoch": 0.2620044732977338, "grad_norm": 2.548213481903076, "learning_rate": 8.65374085234415e-06, "loss": 0.4756, "step": 6311 }, { "epoch": 0.26204598882194513, "grad_norm": 2.88626766204834, "learning_rate": 8.653281865534736e-06, "loss": 0.5438, "step": 6312 }, { "epoch": 0.26208750434615646, "grad_norm": 2.5666537284851074, "learning_rate": 8.652822812672932e-06, "loss": 0.5719, "step": 6313 }, { "epoch": 0.2621290198703678, "grad_norm": 2.934828281402588, "learning_rate": 8.652363693767039e-06, "loss": 0.4964, "step": 6314 }, { "epoch": 0.26217053539457913, "grad_norm": 2.7471578121185303, "learning_rate": 8.651904508825356e-06, "loss": 0.4098, "step": 6315 }, { "epoch": 0.26221205091879046, "grad_norm": 2.806661367416382, "learning_rate": 8.651445257856189e-06, "loss": 0.4618, "step": 6316 }, { "epoch": 0.2622535664430018, "grad_norm": 2.7987780570983887, "learning_rate": 8.650985940867838e-06, "loss": 0.5784, "step": 6317 }, { "epoch": 0.26229508196721313, "grad_norm": 2.714186906814575, "learning_rate": 8.650526557868609e-06, "loss": 0.4757, "step": 6318 }, { "epoch": 0.26233659749142446, "grad_norm": 3.1216673851013184, "learning_rate": 8.650067108866809e-06, "loss": 0.6167, "step": 6319 }, { "epoch": 0.2623781130156358, "grad_norm": 2.333644390106201, "learning_rate": 8.64960759387074e-06, "loss": 0.5478, "step": 6320 }, { "epoch": 0.2624196285398471, "grad_norm": 2.251347780227661, "learning_rate": 8.649148012888717e-06, "loss": 0.4234, "step": 6321 }, { "epoch": 0.26246114406405846, "grad_norm": 2.546271800994873, "learning_rate": 8.648688365929046e-06, "loss": 0.5919, "step": 6322 }, { "epoch": 0.2625026595882698, "grad_norm": 2.4189627170562744, "learning_rate": 8.648228653000038e-06, "loss": 0.5022, "step": 6323 }, { "epoch": 0.2625441751124811, "grad_norm": 2.060314893722534, "learning_rate": 8.647768874110003e-06, "loss": 0.4731, "step": 6324 }, { "epoch": 0.26258569063669246, "grad_norm": 2.4665513038635254, "learning_rate": 8.647309029267257e-06, "loss": 0.4079, "step": 6325 }, { "epoch": 0.2626272061609038, "grad_norm": 2.2908246517181396, "learning_rate": 8.646849118480111e-06, "loss": 0.4815, "step": 6326 }, { "epoch": 0.2626687216851151, "grad_norm": 2.56266450881958, "learning_rate": 8.646389141756882e-06, "loss": 0.6034, "step": 6327 }, { "epoch": 0.26271023720932646, "grad_norm": 2.319338321685791, "learning_rate": 8.645929099105886e-06, "loss": 0.4118, "step": 6328 }, { "epoch": 0.2627517527335378, "grad_norm": 2.2429895401000977, "learning_rate": 8.645468990535441e-06, "loss": 0.557, "step": 6329 }, { "epoch": 0.2627932682577491, "grad_norm": 2.6515913009643555, "learning_rate": 8.645008816053869e-06, "loss": 0.6754, "step": 6330 }, { "epoch": 0.26283478378196046, "grad_norm": 2.716280937194824, "learning_rate": 8.644548575669483e-06, "loss": 0.563, "step": 6331 }, { "epoch": 0.2628762993061718, "grad_norm": 2.459580183029175, "learning_rate": 8.644088269390609e-06, "loss": 0.5681, "step": 6332 }, { "epoch": 0.2629178148303831, "grad_norm": 2.5653140544891357, "learning_rate": 8.64362789722557e-06, "loss": 0.6166, "step": 6333 }, { "epoch": 0.26295933035459446, "grad_norm": 2.539383888244629, "learning_rate": 8.643167459182688e-06, "loss": 0.5376, "step": 6334 }, { "epoch": 0.2630008458788058, "grad_norm": 2.182007074356079, "learning_rate": 8.642706955270288e-06, "loss": 0.4349, "step": 6335 }, { "epoch": 0.2630423614030171, "grad_norm": 2.334408760070801, "learning_rate": 8.642246385496698e-06, "loss": 0.394, "step": 6336 }, { "epoch": 0.26308387692722845, "grad_norm": 2.8524234294891357, "learning_rate": 8.641785749870242e-06, "loss": 0.6351, "step": 6337 }, { "epoch": 0.2631253924514398, "grad_norm": 2.564990520477295, "learning_rate": 8.641325048399249e-06, "loss": 0.4729, "step": 6338 }, { "epoch": 0.2631669079756511, "grad_norm": 2.624851942062378, "learning_rate": 8.640864281092051e-06, "loss": 0.419, "step": 6339 }, { "epoch": 0.26320842349986245, "grad_norm": 2.0748302936553955, "learning_rate": 8.640403447956977e-06, "loss": 0.3641, "step": 6340 }, { "epoch": 0.2632499390240738, "grad_norm": 2.5973634719848633, "learning_rate": 8.639942549002358e-06, "loss": 0.5071, "step": 6341 }, { "epoch": 0.2632914545482852, "grad_norm": 3.0469348430633545, "learning_rate": 8.63948158423653e-06, "loss": 0.4348, "step": 6342 }, { "epoch": 0.2633329700724965, "grad_norm": 2.456571578979492, "learning_rate": 8.639020553667827e-06, "loss": 0.4967, "step": 6343 }, { "epoch": 0.26337448559670784, "grad_norm": 2.8396730422973633, "learning_rate": 8.638559457304582e-06, "loss": 0.6202, "step": 6344 }, { "epoch": 0.2634160011209192, "grad_norm": 3.5708305835723877, "learning_rate": 8.638098295155133e-06, "loss": 0.4301, "step": 6345 }, { "epoch": 0.2634575166451305, "grad_norm": 2.5608932971954346, "learning_rate": 8.63763706722782e-06, "loss": 0.5656, "step": 6346 }, { "epoch": 0.26349903216934184, "grad_norm": 2.676037311553955, "learning_rate": 8.63717577353098e-06, "loss": 0.609, "step": 6347 }, { "epoch": 0.2635405476935532, "grad_norm": 2.833794593811035, "learning_rate": 8.636714414072952e-06, "loss": 0.7004, "step": 6348 }, { "epoch": 0.2635820632177645, "grad_norm": 2.535531997680664, "learning_rate": 8.63625298886208e-06, "loss": 0.6481, "step": 6349 }, { "epoch": 0.26362357874197584, "grad_norm": 2.258073329925537, "learning_rate": 8.635791497906705e-06, "loss": 0.437, "step": 6350 }, { "epoch": 0.26366509426618717, "grad_norm": 2.4793896675109863, "learning_rate": 8.635329941215174e-06, "loss": 0.4485, "step": 6351 }, { "epoch": 0.2637066097903985, "grad_norm": 2.4692139625549316, "learning_rate": 8.634868318795829e-06, "loss": 0.4802, "step": 6352 }, { "epoch": 0.26374812531460984, "grad_norm": 2.786667823791504, "learning_rate": 8.634406630657015e-06, "loss": 0.6008, "step": 6353 }, { "epoch": 0.26378964083882117, "grad_norm": 3.111096143722534, "learning_rate": 8.633944876807083e-06, "loss": 0.5521, "step": 6354 }, { "epoch": 0.2638311563630325, "grad_norm": 2.1250407695770264, "learning_rate": 8.633483057254382e-06, "loss": 0.5133, "step": 6355 }, { "epoch": 0.26387267188724384, "grad_norm": 2.507128953933716, "learning_rate": 8.63302117200726e-06, "loss": 0.4413, "step": 6356 }, { "epoch": 0.26391418741145517, "grad_norm": 2.622368097305298, "learning_rate": 8.632559221074063e-06, "loss": 0.6256, "step": 6357 }, { "epoch": 0.2639557029356665, "grad_norm": 3.3630149364471436, "learning_rate": 8.632097204463152e-06, "loss": 0.5239, "step": 6358 }, { "epoch": 0.26399721845987784, "grad_norm": 2.973902940750122, "learning_rate": 8.631635122182878e-06, "loss": 0.6541, "step": 6359 }, { "epoch": 0.26403873398408917, "grad_norm": 2.376147747039795, "learning_rate": 8.631172974241592e-06, "loss": 0.5302, "step": 6360 }, { "epoch": 0.2640802495083005, "grad_norm": 2.3204851150512695, "learning_rate": 8.630710760647654e-06, "loss": 0.4631, "step": 6361 }, { "epoch": 0.26412176503251183, "grad_norm": 2.868149757385254, "learning_rate": 8.630248481409416e-06, "loss": 0.6483, "step": 6362 }, { "epoch": 0.26416328055672317, "grad_norm": 3.0424907207489014, "learning_rate": 8.629786136535241e-06, "loss": 0.5595, "step": 6363 }, { "epoch": 0.2642047960809345, "grad_norm": 2.723884344100952, "learning_rate": 8.629323726033488e-06, "loss": 0.565, "step": 6364 }, { "epoch": 0.26424631160514583, "grad_norm": 2.0424227714538574, "learning_rate": 8.628861249912512e-06, "loss": 0.5797, "step": 6365 }, { "epoch": 0.26428782712935717, "grad_norm": 2.422772169113159, "learning_rate": 8.62839870818068e-06, "loss": 0.6164, "step": 6366 }, { "epoch": 0.2643293426535685, "grad_norm": 2.3309361934661865, "learning_rate": 8.627936100846356e-06, "loss": 0.5362, "step": 6367 }, { "epoch": 0.26437085817777983, "grad_norm": 2.5919547080993652, "learning_rate": 8.6274734279179e-06, "loss": 0.4789, "step": 6368 }, { "epoch": 0.26441237370199117, "grad_norm": 2.197261333465576, "learning_rate": 8.627010689403678e-06, "loss": 0.5487, "step": 6369 }, { "epoch": 0.2644538892262025, "grad_norm": 2.9042820930480957, "learning_rate": 8.62654788531206e-06, "loss": 0.4936, "step": 6370 }, { "epoch": 0.26449540475041383, "grad_norm": 2.614150285720825, "learning_rate": 8.626085015651408e-06, "loss": 0.6193, "step": 6371 }, { "epoch": 0.2645369202746252, "grad_norm": 2.6929991245269775, "learning_rate": 8.625622080430094e-06, "loss": 0.6309, "step": 6372 }, { "epoch": 0.26457843579883655, "grad_norm": 2.2919857501983643, "learning_rate": 8.625159079656489e-06, "loss": 0.5326, "step": 6373 }, { "epoch": 0.2646199513230479, "grad_norm": 3.622084856033325, "learning_rate": 8.624696013338963e-06, "loss": 0.537, "step": 6374 }, { "epoch": 0.2646614668472592, "grad_norm": 2.574540376663208, "learning_rate": 8.624232881485887e-06, "loss": 0.6028, "step": 6375 }, { "epoch": 0.26470298237147055, "grad_norm": 2.5391695499420166, "learning_rate": 8.623769684105639e-06, "loss": 0.5037, "step": 6376 }, { "epoch": 0.2647444978956819, "grad_norm": 2.0858190059661865, "learning_rate": 8.623306421206588e-06, "loss": 0.5273, "step": 6377 }, { "epoch": 0.2647860134198932, "grad_norm": 1.9712409973144531, "learning_rate": 8.622843092797114e-06, "loss": 0.5933, "step": 6378 }, { "epoch": 0.26482752894410455, "grad_norm": 2.2266087532043457, "learning_rate": 8.62237969888559e-06, "loss": 0.4591, "step": 6379 }, { "epoch": 0.2648690444683159, "grad_norm": 2.596498489379883, "learning_rate": 8.621916239480399e-06, "loss": 0.4624, "step": 6380 }, { "epoch": 0.2649105599925272, "grad_norm": 2.467982292175293, "learning_rate": 8.621452714589918e-06, "loss": 0.4154, "step": 6381 }, { "epoch": 0.26495207551673855, "grad_norm": 2.2155518531799316, "learning_rate": 8.62098912422253e-06, "loss": 0.4473, "step": 6382 }, { "epoch": 0.2649935910409499, "grad_norm": 2.062378168106079, "learning_rate": 8.62052546838661e-06, "loss": 0.4958, "step": 6383 }, { "epoch": 0.2650351065651612, "grad_norm": 2.403748035430908, "learning_rate": 8.62006174709055e-06, "loss": 0.2849, "step": 6384 }, { "epoch": 0.26507662208937255, "grad_norm": 2.652134895324707, "learning_rate": 8.61959796034273e-06, "loss": 0.5324, "step": 6385 }, { "epoch": 0.2651181376135839, "grad_norm": 2.7129502296447754, "learning_rate": 8.619134108151535e-06, "loss": 0.5736, "step": 6386 }, { "epoch": 0.2651596531377952, "grad_norm": 2.9394776821136475, "learning_rate": 8.61867019052535e-06, "loss": 0.4199, "step": 6387 }, { "epoch": 0.26520116866200655, "grad_norm": 2.789979934692383, "learning_rate": 8.618206207472568e-06, "loss": 0.5125, "step": 6388 }, { "epoch": 0.2652426841862179, "grad_norm": 2.412569046020508, "learning_rate": 8.617742159001574e-06, "loss": 0.475, "step": 6389 }, { "epoch": 0.2652841997104292, "grad_norm": 2.4356234073638916, "learning_rate": 8.61727804512076e-06, "loss": 0.4954, "step": 6390 }, { "epoch": 0.26532571523464055, "grad_norm": 2.9521310329437256, "learning_rate": 8.616813865838514e-06, "loss": 0.661, "step": 6391 }, { "epoch": 0.2653672307588519, "grad_norm": 2.868453025817871, "learning_rate": 8.616349621163233e-06, "loss": 0.6166, "step": 6392 }, { "epoch": 0.2654087462830632, "grad_norm": 2.7337050437927246, "learning_rate": 8.615885311103306e-06, "loss": 0.6505, "step": 6393 }, { "epoch": 0.26545026180727455, "grad_norm": 2.7778635025024414, "learning_rate": 8.615420935667134e-06, "loss": 0.5705, "step": 6394 }, { "epoch": 0.2654917773314859, "grad_norm": 3.121311902999878, "learning_rate": 8.614956494863107e-06, "loss": 0.5867, "step": 6395 }, { "epoch": 0.2655332928556972, "grad_norm": 2.80145525932312, "learning_rate": 8.614491988699625e-06, "loss": 0.5377, "step": 6396 }, { "epoch": 0.26557480837990854, "grad_norm": 2.5941524505615234, "learning_rate": 8.614027417185085e-06, "loss": 0.5605, "step": 6397 }, { "epoch": 0.2656163239041199, "grad_norm": 2.6573965549468994, "learning_rate": 8.613562780327889e-06, "loss": 0.4485, "step": 6398 }, { "epoch": 0.2656578394283312, "grad_norm": 2.2814126014709473, "learning_rate": 8.613098078136436e-06, "loss": 0.5108, "step": 6399 }, { "epoch": 0.26569935495254254, "grad_norm": 2.405374765396118, "learning_rate": 8.61263331061913e-06, "loss": 0.5425, "step": 6400 }, { "epoch": 0.2657408704767539, "grad_norm": 2.7901740074157715, "learning_rate": 8.612168477784371e-06, "loss": 0.4104, "step": 6401 }, { "epoch": 0.2657823860009652, "grad_norm": 2.3455047607421875, "learning_rate": 8.611703579640564e-06, "loss": 0.4067, "step": 6402 }, { "epoch": 0.2658239015251766, "grad_norm": 2.6567790508270264, "learning_rate": 8.611238616196115e-06, "loss": 0.6253, "step": 6403 }, { "epoch": 0.26586541704938793, "grad_norm": 1.9751334190368652, "learning_rate": 8.610773587459433e-06, "loss": 0.5633, "step": 6404 }, { "epoch": 0.26590693257359926, "grad_norm": 2.499739170074463, "learning_rate": 8.610308493438921e-06, "loss": 0.519, "step": 6405 }, { "epoch": 0.2659484480978106, "grad_norm": 2.390298366546631, "learning_rate": 8.609843334142993e-06, "loss": 0.541, "step": 6406 }, { "epoch": 0.26598996362202193, "grad_norm": 2.560292959213257, "learning_rate": 8.609378109580059e-06, "loss": 0.5146, "step": 6407 }, { "epoch": 0.26603147914623326, "grad_norm": 2.4239721298217773, "learning_rate": 8.608912819758523e-06, "loss": 0.5098, "step": 6408 }, { "epoch": 0.2660729946704446, "grad_norm": 2.333413600921631, "learning_rate": 8.608447464686807e-06, "loss": 0.543, "step": 6409 }, { "epoch": 0.26611451019465593, "grad_norm": 2.5050766468048096, "learning_rate": 8.60798204437332e-06, "loss": 0.6824, "step": 6410 }, { "epoch": 0.26615602571886726, "grad_norm": 2.2888574600219727, "learning_rate": 8.607516558826477e-06, "loss": 0.5478, "step": 6411 }, { "epoch": 0.2661975412430786, "grad_norm": 2.871264934539795, "learning_rate": 8.607051008054696e-06, "loss": 0.5143, "step": 6412 }, { "epoch": 0.2662390567672899, "grad_norm": 2.4633359909057617, "learning_rate": 8.606585392066392e-06, "loss": 0.476, "step": 6413 }, { "epoch": 0.26628057229150126, "grad_norm": 3.2178735733032227, "learning_rate": 8.606119710869984e-06, "loss": 0.6303, "step": 6414 }, { "epoch": 0.2663220878157126, "grad_norm": 2.476985454559326, "learning_rate": 8.605653964473895e-06, "loss": 0.4999, "step": 6415 }, { "epoch": 0.2663636033399239, "grad_norm": 2.704440116882324, "learning_rate": 8.60518815288654e-06, "loss": 0.5129, "step": 6416 }, { "epoch": 0.26640511886413526, "grad_norm": 2.485417366027832, "learning_rate": 8.604722276116347e-06, "loss": 0.4673, "step": 6417 }, { "epoch": 0.2664466343883466, "grad_norm": 2.585966110229492, "learning_rate": 8.604256334171734e-06, "loss": 0.5197, "step": 6418 }, { "epoch": 0.2664881499125579, "grad_norm": 2.3353335857391357, "learning_rate": 8.603790327061129e-06, "loss": 0.5158, "step": 6419 }, { "epoch": 0.26652966543676926, "grad_norm": 2.329169750213623, "learning_rate": 8.603324254792955e-06, "loss": 0.4897, "step": 6420 }, { "epoch": 0.2665711809609806, "grad_norm": 2.821449041366577, "learning_rate": 8.60285811737564e-06, "loss": 0.531, "step": 6421 }, { "epoch": 0.2666126964851919, "grad_norm": 2.608492612838745, "learning_rate": 8.602391914817612e-06, "loss": 0.5177, "step": 6422 }, { "epoch": 0.26665421200940326, "grad_norm": 2.3263096809387207, "learning_rate": 8.6019256471273e-06, "loss": 0.4533, "step": 6423 }, { "epoch": 0.2666957275336146, "grad_norm": 2.3914852142333984, "learning_rate": 8.601459314313134e-06, "loss": 0.6004, "step": 6424 }, { "epoch": 0.2667372430578259, "grad_norm": 2.5025110244750977, "learning_rate": 8.600992916383545e-06, "loss": 0.5542, "step": 6425 }, { "epoch": 0.26677875858203726, "grad_norm": 2.3697237968444824, "learning_rate": 8.600526453346966e-06, "loss": 0.5793, "step": 6426 }, { "epoch": 0.2668202741062486, "grad_norm": 2.6535232067108154, "learning_rate": 8.600059925211832e-06, "loss": 0.5285, "step": 6427 }, { "epoch": 0.2668617896304599, "grad_norm": 3.068174362182617, "learning_rate": 8.599593331986577e-06, "loss": 0.6841, "step": 6428 }, { "epoch": 0.26690330515467126, "grad_norm": 2.2759101390838623, "learning_rate": 8.599126673679636e-06, "loss": 0.4846, "step": 6429 }, { "epoch": 0.2669448206788826, "grad_norm": 2.5022776126861572, "learning_rate": 8.598659950299449e-06, "loss": 0.4886, "step": 6430 }, { "epoch": 0.2669863362030939, "grad_norm": 2.546116828918457, "learning_rate": 8.598193161854451e-06, "loss": 0.445, "step": 6431 }, { "epoch": 0.26702785172730525, "grad_norm": 2.538731575012207, "learning_rate": 8.597726308353085e-06, "loss": 0.4837, "step": 6432 }, { "epoch": 0.2670693672515166, "grad_norm": 2.4774179458618164, "learning_rate": 8.597259389803791e-06, "loss": 0.5436, "step": 6433 }, { "epoch": 0.267110882775728, "grad_norm": 2.0864548683166504, "learning_rate": 8.59679240621501e-06, "loss": 0.4596, "step": 6434 }, { "epoch": 0.2671523982999393, "grad_norm": 2.5290205478668213, "learning_rate": 8.596325357595184e-06, "loss": 0.6367, "step": 6435 }, { "epoch": 0.26719391382415064, "grad_norm": 2.4312002658843994, "learning_rate": 8.59585824395276e-06, "loss": 0.559, "step": 6436 }, { "epoch": 0.267235429348362, "grad_norm": 2.693053960800171, "learning_rate": 8.595391065296182e-06, "loss": 0.7064, "step": 6437 }, { "epoch": 0.2672769448725733, "grad_norm": 2.679516553878784, "learning_rate": 8.594923821633896e-06, "loss": 0.4507, "step": 6438 }, { "epoch": 0.26731846039678464, "grad_norm": 3.1225032806396484, "learning_rate": 8.594456512974352e-06, "loss": 0.5729, "step": 6439 }, { "epoch": 0.267359975920996, "grad_norm": 2.454181432723999, "learning_rate": 8.593989139326e-06, "loss": 0.6529, "step": 6440 }, { "epoch": 0.2674014914452073, "grad_norm": 2.7975986003875732, "learning_rate": 8.593521700697286e-06, "loss": 0.5079, "step": 6441 }, { "epoch": 0.26744300696941864, "grad_norm": 2.8625710010528564, "learning_rate": 8.593054197096666e-06, "loss": 0.5635, "step": 6442 }, { "epoch": 0.26748452249363, "grad_norm": 2.545565128326416, "learning_rate": 8.592586628532589e-06, "loss": 0.5426, "step": 6443 }, { "epoch": 0.2675260380178413, "grad_norm": 3.2950384616851807, "learning_rate": 8.592118995013513e-06, "loss": 0.6025, "step": 6444 }, { "epoch": 0.26756755354205264, "grad_norm": 2.358154058456421, "learning_rate": 8.591651296547887e-06, "loss": 0.5279, "step": 6445 }, { "epoch": 0.26760906906626397, "grad_norm": 2.283038854598999, "learning_rate": 8.591183533144172e-06, "loss": 0.4494, "step": 6446 }, { "epoch": 0.2676505845904753, "grad_norm": 2.8585503101348877, "learning_rate": 8.590715704810823e-06, "loss": 0.4598, "step": 6447 }, { "epoch": 0.26769210011468664, "grad_norm": 2.333709716796875, "learning_rate": 8.590247811556298e-06, "loss": 0.4622, "step": 6448 }, { "epoch": 0.26773361563889797, "grad_norm": 2.505018711090088, "learning_rate": 8.58977985338906e-06, "loss": 0.5885, "step": 6449 }, { "epoch": 0.2677751311631093, "grad_norm": 2.2868502140045166, "learning_rate": 8.589311830317566e-06, "loss": 0.5011, "step": 6450 }, { "epoch": 0.26781664668732064, "grad_norm": 2.4996020793914795, "learning_rate": 8.588843742350279e-06, "loss": 0.4928, "step": 6451 }, { "epoch": 0.26785816221153197, "grad_norm": 2.038558006286621, "learning_rate": 8.588375589495665e-06, "loss": 0.5386, "step": 6452 }, { "epoch": 0.2678996777357433, "grad_norm": 2.3048338890075684, "learning_rate": 8.587907371762185e-06, "loss": 0.4067, "step": 6453 }, { "epoch": 0.26794119325995464, "grad_norm": 2.4196524620056152, "learning_rate": 8.587439089158305e-06, "loss": 0.5202, "step": 6454 }, { "epoch": 0.26798270878416597, "grad_norm": 2.3038480281829834, "learning_rate": 8.586970741692492e-06, "loss": 0.6409, "step": 6455 }, { "epoch": 0.2680242243083773, "grad_norm": 2.544400930404663, "learning_rate": 8.586502329373214e-06, "loss": 0.6082, "step": 6456 }, { "epoch": 0.26806573983258863, "grad_norm": 2.347322463989258, "learning_rate": 8.586033852208941e-06, "loss": 0.4839, "step": 6457 }, { "epoch": 0.26810725535679997, "grad_norm": 2.5614936351776123, "learning_rate": 8.585565310208143e-06, "loss": 0.5752, "step": 6458 }, { "epoch": 0.2681487708810113, "grad_norm": 2.1823227405548096, "learning_rate": 8.58509670337929e-06, "loss": 0.5745, "step": 6459 }, { "epoch": 0.26819028640522263, "grad_norm": 2.326329469680786, "learning_rate": 8.584628031730854e-06, "loss": 0.4883, "step": 6460 }, { "epoch": 0.26823180192943397, "grad_norm": 2.105656623840332, "learning_rate": 8.584159295271312e-06, "loss": 0.6425, "step": 6461 }, { "epoch": 0.2682733174536453, "grad_norm": 2.9386463165283203, "learning_rate": 8.583690494009135e-06, "loss": 0.5098, "step": 6462 }, { "epoch": 0.26831483297785663, "grad_norm": 2.3969664573669434, "learning_rate": 8.583221627952804e-06, "loss": 0.6111, "step": 6463 }, { "epoch": 0.26835634850206797, "grad_norm": 2.3711822032928467, "learning_rate": 8.58275269711079e-06, "loss": 0.5162, "step": 6464 }, { "epoch": 0.26839786402627935, "grad_norm": 2.341623067855835, "learning_rate": 8.582283701491576e-06, "loss": 0.5411, "step": 6465 }, { "epoch": 0.2684393795504907, "grad_norm": 2.9983112812042236, "learning_rate": 8.58181464110364e-06, "loss": 0.4441, "step": 6466 }, { "epoch": 0.268480895074702, "grad_norm": 1.9466522932052612, "learning_rate": 8.581345515955465e-06, "loss": 0.4302, "step": 6467 }, { "epoch": 0.26852241059891335, "grad_norm": 2.4564247131347656, "learning_rate": 8.580876326055527e-06, "loss": 0.4892, "step": 6468 }, { "epoch": 0.2685639261231247, "grad_norm": 2.8051328659057617, "learning_rate": 8.580407071412315e-06, "loss": 0.606, "step": 6469 }, { "epoch": 0.268605441647336, "grad_norm": 2.594133138656616, "learning_rate": 8.57993775203431e-06, "loss": 0.4466, "step": 6470 }, { "epoch": 0.26864695717154735, "grad_norm": 2.6113767623901367, "learning_rate": 8.57946836793e-06, "loss": 0.5794, "step": 6471 }, { "epoch": 0.2686884726957587, "grad_norm": 2.213778257369995, "learning_rate": 8.57899891910787e-06, "loss": 0.6063, "step": 6472 }, { "epoch": 0.26872998821997, "grad_norm": 2.346891403198242, "learning_rate": 8.578529405576406e-06, "loss": 0.4649, "step": 6473 }, { "epoch": 0.26877150374418135, "grad_norm": 2.0488686561584473, "learning_rate": 8.578059827344101e-06, "loss": 0.3715, "step": 6474 }, { "epoch": 0.2688130192683927, "grad_norm": 2.92500901222229, "learning_rate": 8.577590184419442e-06, "loss": 0.5063, "step": 6475 }, { "epoch": 0.268854534792604, "grad_norm": 3.417508363723755, "learning_rate": 8.577120476810922e-06, "loss": 0.6688, "step": 6476 }, { "epoch": 0.26889605031681535, "grad_norm": 2.2038838863372803, "learning_rate": 8.576650704527032e-06, "loss": 0.4095, "step": 6477 }, { "epoch": 0.2689375658410267, "grad_norm": 3.519160747528076, "learning_rate": 8.576180867576267e-06, "loss": 0.5684, "step": 6478 }, { "epoch": 0.268979081365238, "grad_norm": 2.322300672531128, "learning_rate": 8.57571096596712e-06, "loss": 0.501, "step": 6479 }, { "epoch": 0.26902059688944935, "grad_norm": 2.365939140319824, "learning_rate": 8.57524099970809e-06, "loss": 0.5464, "step": 6480 }, { "epoch": 0.2690621124136607, "grad_norm": 2.4468133449554443, "learning_rate": 8.574770968807672e-06, "loss": 0.5527, "step": 6481 }, { "epoch": 0.269103627937872, "grad_norm": 2.3363184928894043, "learning_rate": 8.574300873274362e-06, "loss": 0.5198, "step": 6482 }, { "epoch": 0.26914514346208335, "grad_norm": 2.490572452545166, "learning_rate": 8.573830713116663e-06, "loss": 0.4626, "step": 6483 }, { "epoch": 0.2691866589862947, "grad_norm": 2.19389009475708, "learning_rate": 8.573360488343077e-06, "loss": 0.3786, "step": 6484 }, { "epoch": 0.269228174510506, "grad_norm": 2.6417717933654785, "learning_rate": 8.572890198962103e-06, "loss": 0.5712, "step": 6485 }, { "epoch": 0.26926969003471735, "grad_norm": 2.9075214862823486, "learning_rate": 8.572419844982243e-06, "loss": 0.593, "step": 6486 }, { "epoch": 0.2693112055589287, "grad_norm": 2.470832347869873, "learning_rate": 8.571949426412002e-06, "loss": 0.4075, "step": 6487 }, { "epoch": 0.26935272108314, "grad_norm": 2.679417371749878, "learning_rate": 8.571478943259885e-06, "loss": 0.5801, "step": 6488 }, { "epoch": 0.26939423660735135, "grad_norm": 2.200899839401245, "learning_rate": 8.571008395534401e-06, "loss": 0.4735, "step": 6489 }, { "epoch": 0.2694357521315627, "grad_norm": 2.412083148956299, "learning_rate": 8.570537783244058e-06, "loss": 0.7041, "step": 6490 }, { "epoch": 0.269477267655774, "grad_norm": 2.7605271339416504, "learning_rate": 8.57006710639736e-06, "loss": 0.5391, "step": 6491 }, { "epoch": 0.26951878317998534, "grad_norm": 2.6536529064178467, "learning_rate": 8.569596365002818e-06, "loss": 0.5113, "step": 6492 }, { "epoch": 0.2695602987041967, "grad_norm": 2.393791675567627, "learning_rate": 8.56912555906895e-06, "loss": 0.4786, "step": 6493 }, { "epoch": 0.269601814228408, "grad_norm": 2.4388303756713867, "learning_rate": 8.56865468860426e-06, "loss": 0.5775, "step": 6494 }, { "epoch": 0.26964332975261934, "grad_norm": 2.6943132877349854, "learning_rate": 8.568183753617264e-06, "loss": 0.6396, "step": 6495 }, { "epoch": 0.26968484527683073, "grad_norm": 2.315237522125244, "learning_rate": 8.567712754116478e-06, "loss": 0.4533, "step": 6496 }, { "epoch": 0.26972636080104206, "grad_norm": 2.8067338466644287, "learning_rate": 8.567241690110418e-06, "loss": 0.7433, "step": 6497 }, { "epoch": 0.2697678763252534, "grad_norm": 2.579366683959961, "learning_rate": 8.566770561607598e-06, "loss": 0.5456, "step": 6498 }, { "epoch": 0.26980939184946473, "grad_norm": 2.2707326412200928, "learning_rate": 8.56629936861654e-06, "loss": 0.5439, "step": 6499 }, { "epoch": 0.26985090737367606, "grad_norm": 2.9450979232788086, "learning_rate": 8.565828111145761e-06, "loss": 0.6875, "step": 6500 }, { "epoch": 0.2698924228978874, "grad_norm": 2.250178337097168, "learning_rate": 8.565356789203781e-06, "loss": 0.4887, "step": 6501 }, { "epoch": 0.26993393842209873, "grad_norm": 2.1550683975219727, "learning_rate": 8.564885402799124e-06, "loss": 0.3837, "step": 6502 }, { "epoch": 0.26997545394631006, "grad_norm": 2.8146960735321045, "learning_rate": 8.564413951940312e-06, "loss": 0.471, "step": 6503 }, { "epoch": 0.2700169694705214, "grad_norm": 2.4958858489990234, "learning_rate": 8.563942436635866e-06, "loss": 0.598, "step": 6504 }, { "epoch": 0.27005848499473273, "grad_norm": 2.5934300422668457, "learning_rate": 8.563470856894316e-06, "loss": 0.6366, "step": 6505 }, { "epoch": 0.27010000051894406, "grad_norm": 2.52258038520813, "learning_rate": 8.562999212724183e-06, "loss": 0.6207, "step": 6506 }, { "epoch": 0.2701415160431554, "grad_norm": 2.251683473587036, "learning_rate": 8.562527504134e-06, "loss": 0.5528, "step": 6507 }, { "epoch": 0.2701830315673667, "grad_norm": 2.924605131149292, "learning_rate": 8.56205573113229e-06, "loss": 0.6101, "step": 6508 }, { "epoch": 0.27022454709157806, "grad_norm": 2.721194267272949, "learning_rate": 8.561583893727588e-06, "loss": 0.6489, "step": 6509 }, { "epoch": 0.2702660626157894, "grad_norm": 2.6628074645996094, "learning_rate": 8.561111991928424e-06, "loss": 0.5918, "step": 6510 }, { "epoch": 0.2703075781400007, "grad_norm": 3.2494025230407715, "learning_rate": 8.560640025743325e-06, "loss": 0.6142, "step": 6511 }, { "epoch": 0.27034909366421206, "grad_norm": 2.268150568008423, "learning_rate": 8.56016799518083e-06, "loss": 0.5479, "step": 6512 }, { "epoch": 0.2703906091884234, "grad_norm": 2.251187562942505, "learning_rate": 8.559695900249471e-06, "loss": 0.4361, "step": 6513 }, { "epoch": 0.2704321247126347, "grad_norm": 2.631586790084839, "learning_rate": 8.559223740957785e-06, "loss": 0.5245, "step": 6514 }, { "epoch": 0.27047364023684606, "grad_norm": 2.1810853481292725, "learning_rate": 8.558751517314308e-06, "loss": 0.438, "step": 6515 }, { "epoch": 0.2705151557610574, "grad_norm": 2.181488037109375, "learning_rate": 8.558279229327577e-06, "loss": 0.5181, "step": 6516 }, { "epoch": 0.2705566712852687, "grad_norm": 2.5644590854644775, "learning_rate": 8.557806877006132e-06, "loss": 0.5725, "step": 6517 }, { "epoch": 0.27059818680948006, "grad_norm": 2.811708450317383, "learning_rate": 8.557334460358514e-06, "loss": 0.5975, "step": 6518 }, { "epoch": 0.2706397023336914, "grad_norm": 2.7648367881774902, "learning_rate": 8.556861979393263e-06, "loss": 0.5465, "step": 6519 }, { "epoch": 0.2706812178579027, "grad_norm": 2.54006028175354, "learning_rate": 8.556389434118922e-06, "loss": 0.564, "step": 6520 }, { "epoch": 0.27072273338211406, "grad_norm": 2.7234246730804443, "learning_rate": 8.555916824544035e-06, "loss": 0.8083, "step": 6521 }, { "epoch": 0.2707642489063254, "grad_norm": 2.720290422439575, "learning_rate": 8.555444150677147e-06, "loss": 0.5866, "step": 6522 }, { "epoch": 0.2708057644305367, "grad_norm": 2.575605869293213, "learning_rate": 8.554971412526805e-06, "loss": 0.472, "step": 6523 }, { "epoch": 0.27084727995474805, "grad_norm": 3.0528554916381836, "learning_rate": 8.554498610101554e-06, "loss": 0.5248, "step": 6524 }, { "epoch": 0.2708887954789594, "grad_norm": 2.671025037765503, "learning_rate": 8.554025743409943e-06, "loss": 0.5569, "step": 6525 }, { "epoch": 0.2709303110031707, "grad_norm": 2.6664414405822754, "learning_rate": 8.553552812460524e-06, "loss": 0.5702, "step": 6526 }, { "epoch": 0.2709718265273821, "grad_norm": 2.8741614818573, "learning_rate": 8.553079817261845e-06, "loss": 0.6003, "step": 6527 }, { "epoch": 0.27101334205159344, "grad_norm": 2.4463679790496826, "learning_rate": 8.552606757822458e-06, "loss": 0.5713, "step": 6528 }, { "epoch": 0.2710548575758048, "grad_norm": 2.9687654972076416, "learning_rate": 8.552133634150917e-06, "loss": 0.544, "step": 6529 }, { "epoch": 0.2710963731000161, "grad_norm": 2.5134472846984863, "learning_rate": 8.551660446255777e-06, "loss": 0.482, "step": 6530 }, { "epoch": 0.27113788862422744, "grad_norm": 3.2026331424713135, "learning_rate": 8.551187194145591e-06, "loss": 0.4947, "step": 6531 }, { "epoch": 0.2711794041484388, "grad_norm": 2.391885757446289, "learning_rate": 8.550713877828919e-06, "loss": 0.3918, "step": 6532 }, { "epoch": 0.2712209196726501, "grad_norm": 2.8097445964813232, "learning_rate": 8.550240497314315e-06, "loss": 0.4178, "step": 6533 }, { "epoch": 0.27126243519686144, "grad_norm": 2.2028799057006836, "learning_rate": 8.54976705261034e-06, "loss": 0.5083, "step": 6534 }, { "epoch": 0.2713039507210728, "grad_norm": 2.404705047607422, "learning_rate": 8.549293543725554e-06, "loss": 0.4225, "step": 6535 }, { "epoch": 0.2713454662452841, "grad_norm": 2.8611767292022705, "learning_rate": 8.548819970668518e-06, "loss": 0.6229, "step": 6536 }, { "epoch": 0.27138698176949544, "grad_norm": 2.417879819869995, "learning_rate": 8.548346333447794e-06, "loss": 0.5501, "step": 6537 }, { "epoch": 0.27142849729370677, "grad_norm": 2.6143362522125244, "learning_rate": 8.547872632071945e-06, "loss": 0.5482, "step": 6538 }, { "epoch": 0.2714700128179181, "grad_norm": 2.3219411373138428, "learning_rate": 8.547398866549537e-06, "loss": 0.3818, "step": 6539 }, { "epoch": 0.27151152834212944, "grad_norm": 2.510739803314209, "learning_rate": 8.546925036889133e-06, "loss": 0.443, "step": 6540 }, { "epoch": 0.27155304386634077, "grad_norm": 3.0841355323791504, "learning_rate": 8.546451143099304e-06, "loss": 0.5313, "step": 6541 }, { "epoch": 0.2715945593905521, "grad_norm": 2.2569055557250977, "learning_rate": 8.545977185188615e-06, "loss": 0.5442, "step": 6542 }, { "epoch": 0.27163607491476344, "grad_norm": 3.3936586380004883, "learning_rate": 8.545503163165637e-06, "loss": 0.6603, "step": 6543 }, { "epoch": 0.27167759043897477, "grad_norm": 2.6910626888275146, "learning_rate": 8.54502907703894e-06, "loss": 0.4976, "step": 6544 }, { "epoch": 0.2717191059631861, "grad_norm": 2.7706305980682373, "learning_rate": 8.544554926817095e-06, "loss": 0.6424, "step": 6545 }, { "epoch": 0.27176062148739744, "grad_norm": 2.598285675048828, "learning_rate": 8.544080712508676e-06, "loss": 0.5699, "step": 6546 }, { "epoch": 0.27180213701160877, "grad_norm": 2.663900852203369, "learning_rate": 8.543606434122255e-06, "loss": 0.556, "step": 6547 }, { "epoch": 0.2718436525358201, "grad_norm": 2.5829732418060303, "learning_rate": 8.543132091666409e-06, "loss": 0.6296, "step": 6548 }, { "epoch": 0.27188516806003143, "grad_norm": 2.444507598876953, "learning_rate": 8.542657685149714e-06, "loss": 0.4418, "step": 6549 }, { "epoch": 0.27192668358424277, "grad_norm": 2.689964771270752, "learning_rate": 8.542183214580747e-06, "loss": 0.4666, "step": 6550 }, { "epoch": 0.2719681991084541, "grad_norm": 2.1915106773376465, "learning_rate": 8.541708679968086e-06, "loss": 0.5247, "step": 6551 }, { "epoch": 0.27200971463266543, "grad_norm": 2.254307985305786, "learning_rate": 8.541234081320311e-06, "loss": 0.4971, "step": 6552 }, { "epoch": 0.27205123015687677, "grad_norm": 2.5602262020111084, "learning_rate": 8.540759418646003e-06, "loss": 0.605, "step": 6553 }, { "epoch": 0.2720927456810881, "grad_norm": 2.7109835147857666, "learning_rate": 8.540284691953744e-06, "loss": 0.53, "step": 6554 }, { "epoch": 0.27213426120529943, "grad_norm": 2.6277456283569336, "learning_rate": 8.539809901252118e-06, "loss": 0.5394, "step": 6555 }, { "epoch": 0.27217577672951077, "grad_norm": 2.4412224292755127, "learning_rate": 8.539335046549707e-06, "loss": 0.5097, "step": 6556 }, { "epoch": 0.2722172922537221, "grad_norm": 2.773941993713379, "learning_rate": 8.538860127855101e-06, "loss": 0.4525, "step": 6557 }, { "epoch": 0.2722588077779335, "grad_norm": 2.6601827144622803, "learning_rate": 8.538385145176883e-06, "loss": 0.656, "step": 6558 }, { "epoch": 0.2723003233021448, "grad_norm": 2.348457098007202, "learning_rate": 8.537910098523641e-06, "loss": 0.5555, "step": 6559 }, { "epoch": 0.27234183882635615, "grad_norm": 2.4053282737731934, "learning_rate": 8.537434987903966e-06, "loss": 0.3992, "step": 6560 }, { "epoch": 0.2723833543505675, "grad_norm": 2.6658496856689453, "learning_rate": 8.536959813326446e-06, "loss": 0.5444, "step": 6561 }, { "epoch": 0.2724248698747788, "grad_norm": 3.0354902744293213, "learning_rate": 8.536484574799673e-06, "loss": 0.495, "step": 6562 }, { "epoch": 0.27246638539899015, "grad_norm": 2.1722512245178223, "learning_rate": 8.536009272332238e-06, "loss": 0.5125, "step": 6563 }, { "epoch": 0.2725079009232015, "grad_norm": 2.862898111343384, "learning_rate": 8.535533905932739e-06, "loss": 0.579, "step": 6564 }, { "epoch": 0.2725494164474128, "grad_norm": 2.5254015922546387, "learning_rate": 8.535058475609765e-06, "loss": 0.5072, "step": 6565 }, { "epoch": 0.27259093197162415, "grad_norm": 3.091289520263672, "learning_rate": 8.534582981371917e-06, "loss": 0.5904, "step": 6566 }, { "epoch": 0.2726324474958355, "grad_norm": 2.534980297088623, "learning_rate": 8.53410742322779e-06, "loss": 0.5822, "step": 6567 }, { "epoch": 0.2726739630200468, "grad_norm": 2.605973243713379, "learning_rate": 8.533631801185982e-06, "loss": 0.492, "step": 6568 }, { "epoch": 0.27271547854425815, "grad_norm": 2.366180419921875, "learning_rate": 8.533156115255092e-06, "loss": 0.6735, "step": 6569 }, { "epoch": 0.2727569940684695, "grad_norm": 2.6297807693481445, "learning_rate": 8.532680365443721e-06, "loss": 0.4911, "step": 6570 }, { "epoch": 0.2727985095926808, "grad_norm": 2.815314292907715, "learning_rate": 8.53220455176047e-06, "loss": 0.5207, "step": 6571 }, { "epoch": 0.27284002511689215, "grad_norm": 3.680255651473999, "learning_rate": 8.531728674213943e-06, "loss": 0.5954, "step": 6572 }, { "epoch": 0.2728815406411035, "grad_norm": 2.425478935241699, "learning_rate": 8.531252732812744e-06, "loss": 0.5589, "step": 6573 }, { "epoch": 0.2729230561653148, "grad_norm": 2.8506546020507812, "learning_rate": 8.53077672756548e-06, "loss": 0.4379, "step": 6574 }, { "epoch": 0.27296457168952615, "grad_norm": 2.3248021602630615, "learning_rate": 8.530300658480752e-06, "loss": 0.5406, "step": 6575 }, { "epoch": 0.2730060872137375, "grad_norm": 2.2984566688537598, "learning_rate": 8.529824525567172e-06, "loss": 0.5669, "step": 6576 }, { "epoch": 0.2730476027379488, "grad_norm": 2.3765640258789062, "learning_rate": 8.529348328833346e-06, "loss": 0.7005, "step": 6577 }, { "epoch": 0.27308911826216015, "grad_norm": 2.9628641605377197, "learning_rate": 8.528872068287885e-06, "loss": 0.3977, "step": 6578 }, { "epoch": 0.2731306337863715, "grad_norm": 2.9515960216522217, "learning_rate": 8.528395743939401e-06, "loss": 0.6273, "step": 6579 }, { "epoch": 0.2731721493105828, "grad_norm": 2.4950952529907227, "learning_rate": 8.527919355796505e-06, "loss": 0.4986, "step": 6580 }, { "epoch": 0.27321366483479415, "grad_norm": 2.1036181449890137, "learning_rate": 8.52744290386781e-06, "loss": 0.5356, "step": 6581 }, { "epoch": 0.2732551803590055, "grad_norm": 2.140871286392212, "learning_rate": 8.52696638816193e-06, "loss": 0.4535, "step": 6582 }, { "epoch": 0.2732966958832168, "grad_norm": 2.5291051864624023, "learning_rate": 8.526489808687481e-06, "loss": 0.5827, "step": 6583 }, { "epoch": 0.27333821140742814, "grad_norm": 2.0862274169921875, "learning_rate": 8.526013165453082e-06, "loss": 0.5072, "step": 6584 }, { "epoch": 0.2733797269316395, "grad_norm": 2.6639771461486816, "learning_rate": 8.525536458467347e-06, "loss": 0.4599, "step": 6585 }, { "epoch": 0.2734212424558508, "grad_norm": 2.717651605606079, "learning_rate": 8.525059687738895e-06, "loss": 0.6228, "step": 6586 }, { "epoch": 0.27346275798006214, "grad_norm": 2.9304137229919434, "learning_rate": 8.52458285327635e-06, "loss": 0.4383, "step": 6587 }, { "epoch": 0.27350427350427353, "grad_norm": 2.692845344543457, "learning_rate": 8.524105955088331e-06, "loss": 0.652, "step": 6588 }, { "epoch": 0.27354578902848486, "grad_norm": 3.410141944885254, "learning_rate": 8.523628993183458e-06, "loss": 0.4411, "step": 6589 }, { "epoch": 0.2735873045526962, "grad_norm": 2.3759121894836426, "learning_rate": 8.52315196757036e-06, "loss": 0.6304, "step": 6590 }, { "epoch": 0.27362882007690753, "grad_norm": 2.7496562004089355, "learning_rate": 8.522674878257658e-06, "loss": 0.6245, "step": 6591 }, { "epoch": 0.27367033560111886, "grad_norm": 2.3979527950286865, "learning_rate": 8.52219772525398e-06, "loss": 0.5204, "step": 6592 }, { "epoch": 0.2737118511253302, "grad_norm": 2.580631971359253, "learning_rate": 8.52172050856795e-06, "loss": 0.4609, "step": 6593 }, { "epoch": 0.27375336664954153, "grad_norm": 2.9156007766723633, "learning_rate": 8.521243228208201e-06, "loss": 0.5356, "step": 6594 }, { "epoch": 0.27379488217375286, "grad_norm": 2.467099189758301, "learning_rate": 8.520765884183356e-06, "loss": 0.5289, "step": 6595 }, { "epoch": 0.2738363976979642, "grad_norm": 2.407973527908325, "learning_rate": 8.520288476502051e-06, "loss": 0.553, "step": 6596 }, { "epoch": 0.27387791322217553, "grad_norm": 2.8386406898498535, "learning_rate": 8.519811005172916e-06, "loss": 0.591, "step": 6597 }, { "epoch": 0.27391942874638686, "grad_norm": 2.9077932834625244, "learning_rate": 8.519333470204583e-06, "loss": 0.6385, "step": 6598 }, { "epoch": 0.2739609442705982, "grad_norm": 2.4717442989349365, "learning_rate": 8.518855871605684e-06, "loss": 0.5231, "step": 6599 }, { "epoch": 0.2740024597948095, "grad_norm": 2.702440023422241, "learning_rate": 8.518378209384862e-06, "loss": 0.47, "step": 6600 }, { "epoch": 0.27404397531902086, "grad_norm": 2.217888355255127, "learning_rate": 8.517900483550742e-06, "loss": 0.5739, "step": 6601 }, { "epoch": 0.2740854908432322, "grad_norm": 2.20371675491333, "learning_rate": 8.517422694111971e-06, "loss": 0.574, "step": 6602 }, { "epoch": 0.2741270063674435, "grad_norm": 2.7504537105560303, "learning_rate": 8.516944841077183e-06, "loss": 0.4457, "step": 6603 }, { "epoch": 0.27416852189165486, "grad_norm": 2.6667017936706543, "learning_rate": 8.516466924455017e-06, "loss": 0.5193, "step": 6604 }, { "epoch": 0.2742100374158662, "grad_norm": 2.6187989711761475, "learning_rate": 8.515988944254118e-06, "loss": 0.5631, "step": 6605 }, { "epoch": 0.2742515529400775, "grad_norm": 2.3825902938842773, "learning_rate": 8.515510900483124e-06, "loss": 0.4707, "step": 6606 }, { "epoch": 0.27429306846428886, "grad_norm": 2.662428379058838, "learning_rate": 8.515032793150681e-06, "loss": 0.4138, "step": 6607 }, { "epoch": 0.2743345839885002, "grad_norm": 2.7303261756896973, "learning_rate": 8.51455462226543e-06, "loss": 0.5505, "step": 6608 }, { "epoch": 0.2743760995127115, "grad_norm": 2.327951192855835, "learning_rate": 8.514076387836022e-06, "loss": 0.5808, "step": 6609 }, { "epoch": 0.27441761503692286, "grad_norm": 2.5011956691741943, "learning_rate": 8.513598089871096e-06, "loss": 0.4666, "step": 6610 }, { "epoch": 0.2744591305611342, "grad_norm": 2.4443750381469727, "learning_rate": 8.513119728379305e-06, "loss": 0.5393, "step": 6611 }, { "epoch": 0.2745006460853455, "grad_norm": 2.6956498622894287, "learning_rate": 8.512641303369298e-06, "loss": 0.5434, "step": 6612 }, { "epoch": 0.27454216160955686, "grad_norm": 2.457767963409424, "learning_rate": 8.512162814849723e-06, "loss": 0.5796, "step": 6613 }, { "epoch": 0.2745836771337682, "grad_norm": 2.2409088611602783, "learning_rate": 8.511684262829231e-06, "loss": 0.5593, "step": 6614 }, { "epoch": 0.2746251926579795, "grad_norm": 2.304373264312744, "learning_rate": 8.511205647316476e-06, "loss": 0.4165, "step": 6615 }, { "epoch": 0.27466670818219086, "grad_norm": 2.4136617183685303, "learning_rate": 8.51072696832011e-06, "loss": 0.5636, "step": 6616 }, { "epoch": 0.2747082237064022, "grad_norm": 3.5957963466644287, "learning_rate": 8.51024822584879e-06, "loss": 0.5314, "step": 6617 }, { "epoch": 0.2747497392306135, "grad_norm": 2.4739949703216553, "learning_rate": 8.509769419911169e-06, "loss": 0.4955, "step": 6618 }, { "epoch": 0.2747912547548249, "grad_norm": 2.858266592025757, "learning_rate": 8.509290550515907e-06, "loss": 0.5357, "step": 6619 }, { "epoch": 0.27483277027903624, "grad_norm": 2.4603517055511475, "learning_rate": 8.508811617671659e-06, "loss": 0.6221, "step": 6620 }, { "epoch": 0.2748742858032476, "grad_norm": 2.628242254257202, "learning_rate": 8.508332621387087e-06, "loss": 0.5469, "step": 6621 }, { "epoch": 0.2749158013274589, "grad_norm": 2.380409002304077, "learning_rate": 8.507853561670849e-06, "loss": 0.5591, "step": 6622 }, { "epoch": 0.27495731685167024, "grad_norm": 3.080336570739746, "learning_rate": 8.507374438531606e-06, "loss": 0.6764, "step": 6623 }, { "epoch": 0.2749988323758816, "grad_norm": 2.5188286304473877, "learning_rate": 8.506895251978025e-06, "loss": 0.553, "step": 6624 }, { "epoch": 0.2750403479000929, "grad_norm": 2.8392961025238037, "learning_rate": 8.506416002018767e-06, "loss": 0.4568, "step": 6625 }, { "epoch": 0.27508186342430424, "grad_norm": 2.5357894897460938, "learning_rate": 8.505936688662495e-06, "loss": 0.5191, "step": 6626 }, { "epoch": 0.2751233789485156, "grad_norm": 2.4303524494171143, "learning_rate": 8.505457311917878e-06, "loss": 0.4726, "step": 6627 }, { "epoch": 0.2751648944727269, "grad_norm": 2.4039394855499268, "learning_rate": 8.504977871793583e-06, "loss": 0.4616, "step": 6628 }, { "epoch": 0.27520640999693824, "grad_norm": 1.9013077020645142, "learning_rate": 8.504498368298276e-06, "loss": 0.3617, "step": 6629 }, { "epoch": 0.2752479255211496, "grad_norm": 2.1075990200042725, "learning_rate": 8.50401880144063e-06, "loss": 0.4166, "step": 6630 }, { "epoch": 0.2752894410453609, "grad_norm": 2.3023805618286133, "learning_rate": 8.503539171229314e-06, "loss": 0.5383, "step": 6631 }, { "epoch": 0.27533095656957224, "grad_norm": 3.035827875137329, "learning_rate": 8.503059477672998e-06, "loss": 0.5762, "step": 6632 }, { "epoch": 0.27537247209378357, "grad_norm": 2.7811152935028076, "learning_rate": 8.50257972078036e-06, "loss": 0.6488, "step": 6633 }, { "epoch": 0.2754139876179949, "grad_norm": 3.590719223022461, "learning_rate": 8.50209990056007e-06, "loss": 0.5248, "step": 6634 }, { "epoch": 0.27545550314220624, "grad_norm": 2.7754459381103516, "learning_rate": 8.501620017020803e-06, "loss": 0.6674, "step": 6635 }, { "epoch": 0.27549701866641757, "grad_norm": 2.6705987453460693, "learning_rate": 8.501140070171238e-06, "loss": 0.615, "step": 6636 }, { "epoch": 0.2755385341906289, "grad_norm": 2.827585458755493, "learning_rate": 8.500660060020052e-06, "loss": 0.678, "step": 6637 }, { "epoch": 0.27558004971484024, "grad_norm": 2.636695146560669, "learning_rate": 8.500179986575923e-06, "loss": 0.5776, "step": 6638 }, { "epoch": 0.27562156523905157, "grad_norm": 2.831925392150879, "learning_rate": 8.499699849847531e-06, "loss": 0.5532, "step": 6639 }, { "epoch": 0.2756630807632629, "grad_norm": 2.657003879547119, "learning_rate": 8.499219649843555e-06, "loss": 0.4937, "step": 6640 }, { "epoch": 0.27570459628747424, "grad_norm": 2.9020180702209473, "learning_rate": 8.498739386572681e-06, "loss": 0.4545, "step": 6641 }, { "epoch": 0.27574611181168557, "grad_norm": 2.382899761199951, "learning_rate": 8.49825906004359e-06, "loss": 0.4815, "step": 6642 }, { "epoch": 0.2757876273358969, "grad_norm": 2.1962177753448486, "learning_rate": 8.497778670264967e-06, "loss": 0.4937, "step": 6643 }, { "epoch": 0.27582914286010823, "grad_norm": 2.556443452835083, "learning_rate": 8.497298217245498e-06, "loss": 0.6385, "step": 6644 }, { "epoch": 0.27587065838431957, "grad_norm": 2.016378164291382, "learning_rate": 8.496817700993869e-06, "loss": 0.5037, "step": 6645 }, { "epoch": 0.2759121739085309, "grad_norm": 2.3527472019195557, "learning_rate": 8.496337121518767e-06, "loss": 0.5034, "step": 6646 }, { "epoch": 0.27595368943274223, "grad_norm": 2.238457679748535, "learning_rate": 8.495856478828883e-06, "loss": 0.4762, "step": 6647 }, { "epoch": 0.27599520495695357, "grad_norm": 3.23512864112854, "learning_rate": 8.495375772932906e-06, "loss": 0.5144, "step": 6648 }, { "epoch": 0.2760367204811649, "grad_norm": 2.660825729370117, "learning_rate": 8.494895003839528e-06, "loss": 0.5099, "step": 6649 }, { "epoch": 0.2760782360053763, "grad_norm": 2.546341896057129, "learning_rate": 8.49441417155744e-06, "loss": 0.5662, "step": 6650 }, { "epoch": 0.2761197515295876, "grad_norm": 3.1149039268493652, "learning_rate": 8.493933276095338e-06, "loss": 0.4813, "step": 6651 }, { "epoch": 0.27616126705379895, "grad_norm": 3.026960611343384, "learning_rate": 8.493452317461914e-06, "loss": 0.4485, "step": 6652 }, { "epoch": 0.2762027825780103, "grad_norm": 2.2559568881988525, "learning_rate": 8.492971295665865e-06, "loss": 0.5434, "step": 6653 }, { "epoch": 0.2762442981022216, "grad_norm": 2.2924556732177734, "learning_rate": 8.49249021071589e-06, "loss": 0.4712, "step": 6654 }, { "epoch": 0.27628581362643295, "grad_norm": 2.442798614501953, "learning_rate": 8.492009062620682e-06, "loss": 0.6244, "step": 6655 }, { "epoch": 0.2763273291506443, "grad_norm": 2.231611728668213, "learning_rate": 8.491527851388948e-06, "loss": 0.465, "step": 6656 }, { "epoch": 0.2763688446748556, "grad_norm": 2.357304096221924, "learning_rate": 8.49104657702938e-06, "loss": 0.503, "step": 6657 }, { "epoch": 0.27641036019906695, "grad_norm": 2.2731571197509766, "learning_rate": 8.490565239550686e-06, "loss": 0.64, "step": 6658 }, { "epoch": 0.2764518757232783, "grad_norm": 2.564115047454834, "learning_rate": 8.490083838961567e-06, "loss": 0.5177, "step": 6659 }, { "epoch": 0.2764933912474896, "grad_norm": 2.7832577228546143, "learning_rate": 8.489602375270725e-06, "loss": 0.6428, "step": 6660 }, { "epoch": 0.27653490677170095, "grad_norm": 2.395115375518799, "learning_rate": 8.489120848486865e-06, "loss": 0.5311, "step": 6661 }, { "epoch": 0.2765764222959123, "grad_norm": 2.5470285415649414, "learning_rate": 8.488639258618697e-06, "loss": 0.3983, "step": 6662 }, { "epoch": 0.2766179378201236, "grad_norm": 2.718393564224243, "learning_rate": 8.488157605674924e-06, "loss": 0.5281, "step": 6663 }, { "epoch": 0.27665945334433495, "grad_norm": 2.4444282054901123, "learning_rate": 8.487675889664258e-06, "loss": 0.4488, "step": 6664 }, { "epoch": 0.2767009688685463, "grad_norm": 2.7634246349334717, "learning_rate": 8.487194110595406e-06, "loss": 0.4163, "step": 6665 }, { "epoch": 0.2767424843927576, "grad_norm": 2.4324278831481934, "learning_rate": 8.486712268477077e-06, "loss": 0.5201, "step": 6666 }, { "epoch": 0.27678399991696895, "grad_norm": 2.9285383224487305, "learning_rate": 8.486230363317987e-06, "loss": 0.4353, "step": 6667 }, { "epoch": 0.2768255154411803, "grad_norm": 2.5691494941711426, "learning_rate": 8.485748395126847e-06, "loss": 0.5767, "step": 6668 }, { "epoch": 0.2768670309653916, "grad_norm": 2.3814821243286133, "learning_rate": 8.485266363912371e-06, "loss": 0.6526, "step": 6669 }, { "epoch": 0.27690854648960295, "grad_norm": 2.226924419403076, "learning_rate": 8.484784269683274e-06, "loss": 0.4774, "step": 6670 }, { "epoch": 0.2769500620138143, "grad_norm": 3.175104856491089, "learning_rate": 8.484302112448276e-06, "loss": 0.445, "step": 6671 }, { "epoch": 0.2769915775380256, "grad_norm": 2.426471710205078, "learning_rate": 8.483819892216087e-06, "loss": 0.4424, "step": 6672 }, { "epoch": 0.27703309306223695, "grad_norm": 2.5231223106384277, "learning_rate": 8.483337608995434e-06, "loss": 0.5187, "step": 6673 }, { "epoch": 0.2770746085864483, "grad_norm": 2.677614450454712, "learning_rate": 8.482855262795031e-06, "loss": 0.5974, "step": 6674 }, { "epoch": 0.2771161241106596, "grad_norm": 2.915647029876709, "learning_rate": 8.482372853623601e-06, "loss": 0.4996, "step": 6675 }, { "epoch": 0.27715763963487094, "grad_norm": 2.3883557319641113, "learning_rate": 8.481890381489869e-06, "loss": 0.4639, "step": 6676 }, { "epoch": 0.2771991551590823, "grad_norm": 2.6043102741241455, "learning_rate": 8.481407846402552e-06, "loss": 0.514, "step": 6677 }, { "epoch": 0.2772406706832936, "grad_norm": 2.3205320835113525, "learning_rate": 8.48092524837038e-06, "loss": 0.4933, "step": 6678 }, { "epoch": 0.27728218620750494, "grad_norm": 2.863630533218384, "learning_rate": 8.480442587402073e-06, "loss": 0.6199, "step": 6679 }, { "epoch": 0.2773237017317163, "grad_norm": 3.2926087379455566, "learning_rate": 8.479959863506362e-06, "loss": 0.5124, "step": 6680 }, { "epoch": 0.27736521725592767, "grad_norm": 2.8517379760742188, "learning_rate": 8.479477076691975e-06, "loss": 0.5872, "step": 6681 }, { "epoch": 0.277406732780139, "grad_norm": 2.0628163814544678, "learning_rate": 8.478994226967638e-06, "loss": 0.4198, "step": 6682 }, { "epoch": 0.27744824830435033, "grad_norm": 1.9080601930618286, "learning_rate": 8.478511314342084e-06, "loss": 0.4861, "step": 6683 }, { "epoch": 0.27748976382856166, "grad_norm": 2.641026258468628, "learning_rate": 8.478028338824042e-06, "loss": 0.5519, "step": 6684 }, { "epoch": 0.277531279352773, "grad_norm": 2.724780559539795, "learning_rate": 8.477545300422247e-06, "loss": 0.527, "step": 6685 }, { "epoch": 0.27757279487698433, "grad_norm": 2.531919240951538, "learning_rate": 8.477062199145428e-06, "loss": 0.4993, "step": 6686 }, { "epoch": 0.27761431040119566, "grad_norm": 3.228710651397705, "learning_rate": 8.476579035002324e-06, "loss": 0.4905, "step": 6687 }, { "epoch": 0.277655825925407, "grad_norm": 2.5678398609161377, "learning_rate": 8.476095808001667e-06, "loss": 0.5867, "step": 6688 }, { "epoch": 0.27769734144961833, "grad_norm": 2.6122841835021973, "learning_rate": 8.475612518152199e-06, "loss": 0.5292, "step": 6689 }, { "epoch": 0.27773885697382966, "grad_norm": 2.3857626914978027, "learning_rate": 8.475129165462652e-06, "loss": 0.5325, "step": 6690 }, { "epoch": 0.277780372498041, "grad_norm": 2.44771146774292, "learning_rate": 8.47464574994177e-06, "loss": 0.4738, "step": 6691 }, { "epoch": 0.27782188802225233, "grad_norm": 2.363333225250244, "learning_rate": 8.47416227159829e-06, "loss": 0.4675, "step": 6692 }, { "epoch": 0.27786340354646366, "grad_norm": 2.3239293098449707, "learning_rate": 8.473678730440956e-06, "loss": 0.6, "step": 6693 }, { "epoch": 0.277904919070675, "grad_norm": 2.2778635025024414, "learning_rate": 8.47319512647851e-06, "loss": 0.3461, "step": 6694 }, { "epoch": 0.2779464345948863, "grad_norm": 2.4765307903289795, "learning_rate": 8.472711459719693e-06, "loss": 0.4228, "step": 6695 }, { "epoch": 0.27798795011909766, "grad_norm": 2.38283109664917, "learning_rate": 8.472227730173252e-06, "loss": 0.5286, "step": 6696 }, { "epoch": 0.278029465643309, "grad_norm": 2.502331018447876, "learning_rate": 8.471743937847934e-06, "loss": 0.5309, "step": 6697 }, { "epoch": 0.2780709811675203, "grad_norm": 2.341596841812134, "learning_rate": 8.471260082752483e-06, "loss": 0.4699, "step": 6698 }, { "epoch": 0.27811249669173166, "grad_norm": 2.1103811264038086, "learning_rate": 8.47077616489565e-06, "loss": 0.4896, "step": 6699 }, { "epoch": 0.278154012215943, "grad_norm": 2.152194023132324, "learning_rate": 8.470292184286184e-06, "loss": 0.5287, "step": 6700 }, { "epoch": 0.2781955277401543, "grad_norm": 2.796328544616699, "learning_rate": 8.469808140932837e-06, "loss": 0.5839, "step": 6701 }, { "epoch": 0.27823704326436566, "grad_norm": 3.0553812980651855, "learning_rate": 8.469324034844355e-06, "loss": 0.423, "step": 6702 }, { "epoch": 0.278278558788577, "grad_norm": 2.3517396450042725, "learning_rate": 8.468839866029497e-06, "loss": 0.4404, "step": 6703 }, { "epoch": 0.2783200743127883, "grad_norm": 2.735888719558716, "learning_rate": 8.468355634497014e-06, "loss": 0.5195, "step": 6704 }, { "epoch": 0.27836158983699966, "grad_norm": 1.9755375385284424, "learning_rate": 8.46787134025566e-06, "loss": 0.4385, "step": 6705 }, { "epoch": 0.278403105361211, "grad_norm": 2.2101283073425293, "learning_rate": 8.467386983314194e-06, "loss": 0.4992, "step": 6706 }, { "epoch": 0.2784446208854223, "grad_norm": 2.5348904132843018, "learning_rate": 8.466902563681372e-06, "loss": 0.4943, "step": 6707 }, { "epoch": 0.27848613640963366, "grad_norm": 2.5450968742370605, "learning_rate": 8.466418081365953e-06, "loss": 0.5211, "step": 6708 }, { "epoch": 0.278527651933845, "grad_norm": 2.6790931224823, "learning_rate": 8.465933536376694e-06, "loss": 0.5155, "step": 6709 }, { "epoch": 0.2785691674580563, "grad_norm": 2.799393892288208, "learning_rate": 8.46544892872236e-06, "loss": 0.5711, "step": 6710 }, { "epoch": 0.27861068298226765, "grad_norm": 2.452725887298584, "learning_rate": 8.464964258411708e-06, "loss": 0.3886, "step": 6711 }, { "epoch": 0.27865219850647904, "grad_norm": 2.9013335704803467, "learning_rate": 8.464479525453503e-06, "loss": 0.5428, "step": 6712 }, { "epoch": 0.2786937140306904, "grad_norm": 3.284074068069458, "learning_rate": 8.463994729856513e-06, "loss": 0.6118, "step": 6713 }, { "epoch": 0.2787352295549017, "grad_norm": 2.589405059814453, "learning_rate": 8.463509871629499e-06, "loss": 0.4731, "step": 6714 }, { "epoch": 0.27877674507911304, "grad_norm": 2.764418363571167, "learning_rate": 8.463024950781226e-06, "loss": 0.5672, "step": 6715 }, { "epoch": 0.2788182606033244, "grad_norm": 2.5163352489471436, "learning_rate": 8.462539967320466e-06, "loss": 0.541, "step": 6716 }, { "epoch": 0.2788597761275357, "grad_norm": 2.430405378341675, "learning_rate": 8.462054921255984e-06, "loss": 0.5241, "step": 6717 }, { "epoch": 0.27890129165174704, "grad_norm": 2.37131667137146, "learning_rate": 8.461569812596552e-06, "loss": 0.4926, "step": 6718 }, { "epoch": 0.2789428071759584, "grad_norm": 2.9475302696228027, "learning_rate": 8.46108464135094e-06, "loss": 0.4016, "step": 6719 }, { "epoch": 0.2789843227001697, "grad_norm": 2.8672828674316406, "learning_rate": 8.46059940752792e-06, "loss": 0.4359, "step": 6720 }, { "epoch": 0.27902583822438104, "grad_norm": 2.3616514205932617, "learning_rate": 8.460114111136264e-06, "loss": 0.5624, "step": 6721 }, { "epoch": 0.2790673537485924, "grad_norm": 2.585084915161133, "learning_rate": 8.45962875218475e-06, "loss": 0.4745, "step": 6722 }, { "epoch": 0.2791088692728037, "grad_norm": 2.608870029449463, "learning_rate": 8.45914333068215e-06, "loss": 0.5423, "step": 6723 }, { "epoch": 0.27915038479701504, "grad_norm": 2.4271602630615234, "learning_rate": 8.45865784663724e-06, "loss": 0.434, "step": 6724 }, { "epoch": 0.27919190032122637, "grad_norm": 2.245910167694092, "learning_rate": 8.4581723000588e-06, "loss": 0.4409, "step": 6725 }, { "epoch": 0.2792334158454377, "grad_norm": 2.8827619552612305, "learning_rate": 8.45768669095561e-06, "loss": 0.6653, "step": 6726 }, { "epoch": 0.27927493136964904, "grad_norm": 2.7050836086273193, "learning_rate": 8.457201019336445e-06, "loss": 0.461, "step": 6727 }, { "epoch": 0.27931644689386037, "grad_norm": 2.6606078147888184, "learning_rate": 8.45671528521009e-06, "loss": 0.394, "step": 6728 }, { "epoch": 0.2793579624180717, "grad_norm": 2.9023587703704834, "learning_rate": 8.456229488585328e-06, "loss": 0.4917, "step": 6729 }, { "epoch": 0.27939947794228304, "grad_norm": 2.7314562797546387, "learning_rate": 8.455743629470941e-06, "loss": 0.6324, "step": 6730 }, { "epoch": 0.27944099346649437, "grad_norm": 2.330864429473877, "learning_rate": 8.455257707875711e-06, "loss": 0.5095, "step": 6731 }, { "epoch": 0.2794825089907057, "grad_norm": 2.5356130599975586, "learning_rate": 8.454771723808425e-06, "loss": 0.6291, "step": 6732 }, { "epoch": 0.27952402451491704, "grad_norm": 3.2162554264068604, "learning_rate": 8.454285677277872e-06, "loss": 0.5618, "step": 6733 }, { "epoch": 0.27956554003912837, "grad_norm": 2.5804007053375244, "learning_rate": 8.45379956829284e-06, "loss": 0.5157, "step": 6734 }, { "epoch": 0.2796070555633397, "grad_norm": 2.4897372722625732, "learning_rate": 8.453313396862113e-06, "loss": 0.5231, "step": 6735 }, { "epoch": 0.27964857108755103, "grad_norm": 2.259166955947876, "learning_rate": 8.452827162994486e-06, "loss": 0.4522, "step": 6736 }, { "epoch": 0.27969008661176237, "grad_norm": 2.3421471118927, "learning_rate": 8.45234086669875e-06, "loss": 0.5047, "step": 6737 }, { "epoch": 0.2797316021359737, "grad_norm": 3.0666041374206543, "learning_rate": 8.451854507983694e-06, "loss": 0.4917, "step": 6738 }, { "epoch": 0.27977311766018503, "grad_norm": 2.1621742248535156, "learning_rate": 8.451368086858114e-06, "loss": 0.5462, "step": 6739 }, { "epoch": 0.27981463318439637, "grad_norm": 3.125997304916382, "learning_rate": 8.450881603330805e-06, "loss": 0.5197, "step": 6740 }, { "epoch": 0.2798561487086077, "grad_norm": 2.7066690921783447, "learning_rate": 8.450395057410561e-06, "loss": 0.6174, "step": 6741 }, { "epoch": 0.27989766423281903, "grad_norm": 2.8247199058532715, "learning_rate": 8.44990844910618e-06, "loss": 0.3877, "step": 6742 }, { "epoch": 0.2799391797570304, "grad_norm": 2.4564309120178223, "learning_rate": 8.449421778426462e-06, "loss": 0.5064, "step": 6743 }, { "epoch": 0.27998069528124175, "grad_norm": 2.749534845352173, "learning_rate": 8.4489350453802e-06, "loss": 0.5163, "step": 6744 }, { "epoch": 0.2800222108054531, "grad_norm": 2.104811429977417, "learning_rate": 8.4484482499762e-06, "loss": 0.4353, "step": 6745 }, { "epoch": 0.2800637263296644, "grad_norm": 2.30800199508667, "learning_rate": 8.447961392223263e-06, "loss": 0.4281, "step": 6746 }, { "epoch": 0.28010524185387575, "grad_norm": 2.9624733924865723, "learning_rate": 8.447474472130189e-06, "loss": 0.5447, "step": 6747 }, { "epoch": 0.2801467573780871, "grad_norm": 2.3625688552856445, "learning_rate": 8.446987489705783e-06, "loss": 0.5958, "step": 6748 }, { "epoch": 0.2801882729022984, "grad_norm": 2.5557236671447754, "learning_rate": 8.446500444958851e-06, "loss": 0.475, "step": 6749 }, { "epoch": 0.28022978842650975, "grad_norm": 3.358442783355713, "learning_rate": 8.446013337898196e-06, "loss": 0.6298, "step": 6750 }, { "epoch": 0.2802713039507211, "grad_norm": 2.4584312438964844, "learning_rate": 8.445526168532628e-06, "loss": 0.4546, "step": 6751 }, { "epoch": 0.2803128194749324, "grad_norm": 3.7305471897125244, "learning_rate": 8.445038936870953e-06, "loss": 0.5359, "step": 6752 }, { "epoch": 0.28035433499914375, "grad_norm": 2.3903613090515137, "learning_rate": 8.44455164292198e-06, "loss": 0.402, "step": 6753 }, { "epoch": 0.2803958505233551, "grad_norm": 2.888334035873413, "learning_rate": 8.44406428669452e-06, "loss": 0.4827, "step": 6754 }, { "epoch": 0.2804373660475664, "grad_norm": 2.125455141067505, "learning_rate": 8.443576868197388e-06, "loss": 0.4563, "step": 6755 }, { "epoch": 0.28047888157177775, "grad_norm": 2.2196342945098877, "learning_rate": 8.443089387439391e-06, "loss": 0.5317, "step": 6756 }, { "epoch": 0.2805203970959891, "grad_norm": 2.8430094718933105, "learning_rate": 8.442601844429346e-06, "loss": 0.5777, "step": 6757 }, { "epoch": 0.2805619126202004, "grad_norm": 3.066349506378174, "learning_rate": 8.44211423917607e-06, "loss": 0.5738, "step": 6758 }, { "epoch": 0.28060342814441175, "grad_norm": 2.577449321746826, "learning_rate": 8.441626571688372e-06, "loss": 0.5898, "step": 6759 }, { "epoch": 0.2806449436686231, "grad_norm": 2.8649563789367676, "learning_rate": 8.441138841975077e-06, "loss": 0.5088, "step": 6760 }, { "epoch": 0.2806864591928344, "grad_norm": 2.354391098022461, "learning_rate": 8.440651050044998e-06, "loss": 0.3364, "step": 6761 }, { "epoch": 0.28072797471704575, "grad_norm": 2.283691167831421, "learning_rate": 8.440163195906959e-06, "loss": 0.4462, "step": 6762 }, { "epoch": 0.2807694902412571, "grad_norm": 2.1756808757781982, "learning_rate": 8.439675279569775e-06, "loss": 0.6312, "step": 6763 }, { "epoch": 0.2808110057654684, "grad_norm": 2.7027573585510254, "learning_rate": 8.43918730104227e-06, "loss": 0.4686, "step": 6764 }, { "epoch": 0.28085252128967975, "grad_norm": 2.539128065109253, "learning_rate": 8.438699260333269e-06, "loss": 0.6146, "step": 6765 }, { "epoch": 0.2808940368138911, "grad_norm": 2.8336923122406006, "learning_rate": 8.438211157451595e-06, "loss": 0.4292, "step": 6766 }, { "epoch": 0.2809355523381024, "grad_norm": 2.4554529190063477, "learning_rate": 8.43772299240607e-06, "loss": 0.4825, "step": 6767 }, { "epoch": 0.28097706786231375, "grad_norm": 2.1108481884002686, "learning_rate": 8.437234765205525e-06, "loss": 0.3622, "step": 6768 }, { "epoch": 0.2810185833865251, "grad_norm": 2.5981802940368652, "learning_rate": 8.436746475858784e-06, "loss": 0.4978, "step": 6769 }, { "epoch": 0.2810600989107364, "grad_norm": 2.8245413303375244, "learning_rate": 8.436258124374675e-06, "loss": 0.4846, "step": 6770 }, { "epoch": 0.28110161443494774, "grad_norm": 1.8544187545776367, "learning_rate": 8.43576971076203e-06, "loss": 0.4048, "step": 6771 }, { "epoch": 0.2811431299591591, "grad_norm": 2.83101487159729, "learning_rate": 8.435281235029678e-06, "loss": 0.6089, "step": 6772 }, { "epoch": 0.2811846454833704, "grad_norm": 2.550092935562134, "learning_rate": 8.43479269718645e-06, "loss": 0.6181, "step": 6773 }, { "epoch": 0.2812261610075818, "grad_norm": 2.7997424602508545, "learning_rate": 8.43430409724118e-06, "loss": 0.5364, "step": 6774 }, { "epoch": 0.28126767653179313, "grad_norm": 2.44563364982605, "learning_rate": 8.433815435202704e-06, "loss": 0.5941, "step": 6775 }, { "epoch": 0.28130919205600446, "grad_norm": 2.9035465717315674, "learning_rate": 8.433326711079853e-06, "loss": 0.6889, "step": 6776 }, { "epoch": 0.2813507075802158, "grad_norm": 2.2749717235565186, "learning_rate": 8.432837924881468e-06, "loss": 0.5874, "step": 6777 }, { "epoch": 0.28139222310442713, "grad_norm": 2.382368803024292, "learning_rate": 8.432349076616381e-06, "loss": 0.5575, "step": 6778 }, { "epoch": 0.28143373862863846, "grad_norm": 2.6857290267944336, "learning_rate": 8.431860166293435e-06, "loss": 0.463, "step": 6779 }, { "epoch": 0.2814752541528498, "grad_norm": 2.646681785583496, "learning_rate": 8.431371193921467e-06, "loss": 0.451, "step": 6780 }, { "epoch": 0.28151676967706113, "grad_norm": 2.506826162338257, "learning_rate": 8.430882159509321e-06, "loss": 0.6442, "step": 6781 }, { "epoch": 0.28155828520127246, "grad_norm": 2.553232431411743, "learning_rate": 8.430393063065835e-06, "loss": 0.5606, "step": 6782 }, { "epoch": 0.2815998007254838, "grad_norm": 2.183318614959717, "learning_rate": 8.429903904599853e-06, "loss": 0.6241, "step": 6783 }, { "epoch": 0.28164131624969513, "grad_norm": 2.08615779876709, "learning_rate": 8.42941468412022e-06, "loss": 0.514, "step": 6784 }, { "epoch": 0.28168283177390646, "grad_norm": 2.5329675674438477, "learning_rate": 8.428925401635783e-06, "loss": 0.5258, "step": 6785 }, { "epoch": 0.2817243472981178, "grad_norm": 3.2795217037200928, "learning_rate": 8.428436057155385e-06, "loss": 0.5134, "step": 6786 }, { "epoch": 0.2817658628223291, "grad_norm": 2.4281482696533203, "learning_rate": 8.427946650687874e-06, "loss": 0.5267, "step": 6787 }, { "epoch": 0.28180737834654046, "grad_norm": 2.7982115745544434, "learning_rate": 8.427457182242102e-06, "loss": 0.7484, "step": 6788 }, { "epoch": 0.2818488938707518, "grad_norm": 2.159675359725952, "learning_rate": 8.426967651826914e-06, "loss": 0.4362, "step": 6789 }, { "epoch": 0.2818904093949631, "grad_norm": 3.2302448749542236, "learning_rate": 8.426478059451163e-06, "loss": 0.5977, "step": 6790 }, { "epoch": 0.28193192491917446, "grad_norm": 2.5836400985717773, "learning_rate": 8.425988405123705e-06, "loss": 0.6685, "step": 6791 }, { "epoch": 0.2819734404433858, "grad_norm": 2.136763572692871, "learning_rate": 8.425498688853384e-06, "loss": 0.4548, "step": 6792 }, { "epoch": 0.2820149559675971, "grad_norm": 2.8527698516845703, "learning_rate": 8.425008910649062e-06, "loss": 0.6097, "step": 6793 }, { "epoch": 0.28205647149180846, "grad_norm": 2.542951822280884, "learning_rate": 8.424519070519592e-06, "loss": 0.5377, "step": 6794 }, { "epoch": 0.2820979870160198, "grad_norm": 2.834432363510132, "learning_rate": 8.424029168473829e-06, "loss": 0.5554, "step": 6795 }, { "epoch": 0.2821395025402311, "grad_norm": 2.2251408100128174, "learning_rate": 8.423539204520632e-06, "loss": 0.5226, "step": 6796 }, { "epoch": 0.28218101806444246, "grad_norm": 3.064178705215454, "learning_rate": 8.423049178668859e-06, "loss": 0.6802, "step": 6797 }, { "epoch": 0.2822225335886538, "grad_norm": 2.7069761753082275, "learning_rate": 8.422559090927372e-06, "loss": 0.5874, "step": 6798 }, { "epoch": 0.2822640491128651, "grad_norm": 2.536968231201172, "learning_rate": 8.422068941305029e-06, "loss": 0.548, "step": 6799 }, { "epoch": 0.28230556463707646, "grad_norm": 2.478189706802368, "learning_rate": 8.421578729810693e-06, "loss": 0.5457, "step": 6800 }, { "epoch": 0.2823470801612878, "grad_norm": 2.169403553009033, "learning_rate": 8.421088456453226e-06, "loss": 0.4171, "step": 6801 }, { "epoch": 0.2823885956854991, "grad_norm": 2.268270254135132, "learning_rate": 8.420598121241496e-06, "loss": 0.485, "step": 6802 }, { "epoch": 0.28243011120971045, "grad_norm": 2.551689863204956, "learning_rate": 8.420107724184366e-06, "loss": 0.5259, "step": 6803 }, { "epoch": 0.28247162673392184, "grad_norm": 2.9731829166412354, "learning_rate": 8.4196172652907e-06, "loss": 0.5346, "step": 6804 }, { "epoch": 0.2825131422581332, "grad_norm": 2.445146322250366, "learning_rate": 8.41912674456937e-06, "loss": 0.4482, "step": 6805 }, { "epoch": 0.2825546577823445, "grad_norm": 2.4590506553649902, "learning_rate": 8.418636162029244e-06, "loss": 0.3531, "step": 6806 }, { "epoch": 0.28259617330655584, "grad_norm": 2.509946584701538, "learning_rate": 8.418145517679188e-06, "loss": 0.5805, "step": 6807 }, { "epoch": 0.2826376888307672, "grad_norm": 2.578134298324585, "learning_rate": 8.417654811528079e-06, "loss": 0.6946, "step": 6808 }, { "epoch": 0.2826792043549785, "grad_norm": 3.085535764694214, "learning_rate": 8.417164043584783e-06, "loss": 0.5401, "step": 6809 }, { "epoch": 0.28272071987918984, "grad_norm": 2.563042163848877, "learning_rate": 8.416673213858179e-06, "loss": 0.5789, "step": 6810 }, { "epoch": 0.2827622354034012, "grad_norm": 2.431816339492798, "learning_rate": 8.416182322357136e-06, "loss": 0.4697, "step": 6811 }, { "epoch": 0.2828037509276125, "grad_norm": 2.5629546642303467, "learning_rate": 8.415691369090533e-06, "loss": 0.5929, "step": 6812 }, { "epoch": 0.28284526645182384, "grad_norm": 2.8873000144958496, "learning_rate": 8.415200354067245e-06, "loss": 0.5468, "step": 6813 }, { "epoch": 0.2828867819760352, "grad_norm": 2.449732780456543, "learning_rate": 8.414709277296153e-06, "loss": 0.4579, "step": 6814 }, { "epoch": 0.2829282975002465, "grad_norm": 2.2165138721466064, "learning_rate": 8.41421813878613e-06, "loss": 0.5401, "step": 6815 }, { "epoch": 0.28296981302445784, "grad_norm": 2.135593891143799, "learning_rate": 8.413726938546061e-06, "loss": 0.4742, "step": 6816 }, { "epoch": 0.28301132854866917, "grad_norm": 2.1752524375915527, "learning_rate": 8.413235676584824e-06, "loss": 0.5477, "step": 6817 }, { "epoch": 0.2830528440728805, "grad_norm": 2.0825209617614746, "learning_rate": 8.4127443529113e-06, "loss": 0.5268, "step": 6818 }, { "epoch": 0.28309435959709184, "grad_norm": 2.1527516841888428, "learning_rate": 8.412252967534378e-06, "loss": 0.5266, "step": 6819 }, { "epoch": 0.28313587512130317, "grad_norm": 2.1703813076019287, "learning_rate": 8.411761520462935e-06, "loss": 0.5797, "step": 6820 }, { "epoch": 0.2831773906455145, "grad_norm": 2.2336418628692627, "learning_rate": 8.411270011705865e-06, "loss": 0.503, "step": 6821 }, { "epoch": 0.28321890616972584, "grad_norm": 2.9831695556640625, "learning_rate": 8.410778441272047e-06, "loss": 0.5322, "step": 6822 }, { "epoch": 0.28326042169393717, "grad_norm": 2.502641439437866, "learning_rate": 8.410286809170371e-06, "loss": 0.587, "step": 6823 }, { "epoch": 0.2833019372181485, "grad_norm": 2.2609477043151855, "learning_rate": 8.40979511540973e-06, "loss": 0.4844, "step": 6824 }, { "epoch": 0.28334345274235984, "grad_norm": 2.9955854415893555, "learning_rate": 8.409303359999007e-06, "loss": 0.5306, "step": 6825 }, { "epoch": 0.28338496826657117, "grad_norm": 2.722451686859131, "learning_rate": 8.408811542947098e-06, "loss": 0.5338, "step": 6826 }, { "epoch": 0.2834264837907825, "grad_norm": 3.0067527294158936, "learning_rate": 8.408319664262894e-06, "loss": 0.623, "step": 6827 }, { "epoch": 0.28346799931499383, "grad_norm": 2.5870730876922607, "learning_rate": 8.407827723955287e-06, "loss": 0.4984, "step": 6828 }, { "epoch": 0.28350951483920517, "grad_norm": 2.125084400177002, "learning_rate": 8.407335722033174e-06, "loss": 0.5999, "step": 6829 }, { "epoch": 0.2835510303634165, "grad_norm": 2.8471946716308594, "learning_rate": 8.406843658505448e-06, "loss": 0.5317, "step": 6830 }, { "epoch": 0.28359254588762783, "grad_norm": 2.634833812713623, "learning_rate": 8.406351533381008e-06, "loss": 0.4196, "step": 6831 }, { "epoch": 0.28363406141183917, "grad_norm": 2.824617862701416, "learning_rate": 8.405859346668749e-06, "loss": 0.5647, "step": 6832 }, { "epoch": 0.2836755769360505, "grad_norm": 2.5793938636779785, "learning_rate": 8.40536709837757e-06, "loss": 0.5092, "step": 6833 }, { "epoch": 0.28371709246026183, "grad_norm": 2.922543525695801, "learning_rate": 8.404874788516374e-06, "loss": 0.3627, "step": 6834 }, { "epoch": 0.2837586079844732, "grad_norm": 2.2709033489227295, "learning_rate": 8.404382417094061e-06, "loss": 0.3261, "step": 6835 }, { "epoch": 0.28380012350868455, "grad_norm": 2.458684206008911, "learning_rate": 8.403889984119534e-06, "loss": 0.3911, "step": 6836 }, { "epoch": 0.2838416390328959, "grad_norm": 3.1677889823913574, "learning_rate": 8.403397489601693e-06, "loss": 0.5715, "step": 6837 }, { "epoch": 0.2838831545571072, "grad_norm": 2.368807792663574, "learning_rate": 8.402904933549444e-06, "loss": 0.5377, "step": 6838 }, { "epoch": 0.28392467008131855, "grad_norm": 2.568340539932251, "learning_rate": 8.402412315971693e-06, "loss": 0.4647, "step": 6839 }, { "epoch": 0.2839661856055299, "grad_norm": 2.400423526763916, "learning_rate": 8.401919636877348e-06, "loss": 0.5365, "step": 6840 }, { "epoch": 0.2840077011297412, "grad_norm": 2.297706365585327, "learning_rate": 8.401426896275316e-06, "loss": 0.4499, "step": 6841 }, { "epoch": 0.28404921665395255, "grad_norm": 2.0923516750335693, "learning_rate": 8.400934094174503e-06, "loss": 0.4813, "step": 6842 }, { "epoch": 0.2840907321781639, "grad_norm": 2.5502126216888428, "learning_rate": 8.400441230583822e-06, "loss": 0.5268, "step": 6843 }, { "epoch": 0.2841322477023752, "grad_norm": 2.614718437194824, "learning_rate": 8.399948305512184e-06, "loss": 0.4627, "step": 6844 }, { "epoch": 0.28417376322658655, "grad_norm": 2.784458875656128, "learning_rate": 8.3994553189685e-06, "loss": 0.4473, "step": 6845 }, { "epoch": 0.2842152787507979, "grad_norm": 2.2582297325134277, "learning_rate": 8.398962270961684e-06, "loss": 0.5687, "step": 6846 }, { "epoch": 0.2842567942750092, "grad_norm": 2.6488375663757324, "learning_rate": 8.398469161500651e-06, "loss": 0.502, "step": 6847 }, { "epoch": 0.28429830979922055, "grad_norm": 2.28044056892395, "learning_rate": 8.397975990594317e-06, "loss": 0.4544, "step": 6848 }, { "epoch": 0.2843398253234319, "grad_norm": 2.45479679107666, "learning_rate": 8.397482758251597e-06, "loss": 0.5923, "step": 6849 }, { "epoch": 0.2843813408476432, "grad_norm": 2.432886838912964, "learning_rate": 8.396989464481407e-06, "loss": 0.5206, "step": 6850 }, { "epoch": 0.28442285637185455, "grad_norm": 2.9484355449676514, "learning_rate": 8.39649610929267e-06, "loss": 0.6199, "step": 6851 }, { "epoch": 0.2844643718960659, "grad_norm": 2.5931711196899414, "learning_rate": 8.396002692694306e-06, "loss": 0.5167, "step": 6852 }, { "epoch": 0.2845058874202772, "grad_norm": 2.350720167160034, "learning_rate": 8.395509214695235e-06, "loss": 0.5579, "step": 6853 }, { "epoch": 0.28454740294448855, "grad_norm": 3.240804433822632, "learning_rate": 8.395015675304376e-06, "loss": 0.5887, "step": 6854 }, { "epoch": 0.2845889184686999, "grad_norm": 2.1558949947357178, "learning_rate": 8.394522074530655e-06, "loss": 0.4833, "step": 6855 }, { "epoch": 0.2846304339929112, "grad_norm": 2.491562604904175, "learning_rate": 8.394028412382998e-06, "loss": 0.4529, "step": 6856 }, { "epoch": 0.28467194951712255, "grad_norm": 3.0448570251464844, "learning_rate": 8.393534688870329e-06, "loss": 0.5206, "step": 6857 }, { "epoch": 0.2847134650413339, "grad_norm": 2.448657512664795, "learning_rate": 8.393040904001575e-06, "loss": 0.5014, "step": 6858 }, { "epoch": 0.2847549805655452, "grad_norm": 2.3134446144104004, "learning_rate": 8.392547057785662e-06, "loss": 0.4105, "step": 6859 }, { "epoch": 0.28479649608975655, "grad_norm": 2.494335651397705, "learning_rate": 8.39205315023152e-06, "loss": 0.3945, "step": 6860 }, { "epoch": 0.2848380116139679, "grad_norm": 2.2210237979888916, "learning_rate": 8.391559181348081e-06, "loss": 0.4595, "step": 6861 }, { "epoch": 0.2848795271381792, "grad_norm": 2.845454692840576, "learning_rate": 8.391065151144274e-06, "loss": 0.539, "step": 6862 }, { "epoch": 0.28492104266239054, "grad_norm": 1.9630498886108398, "learning_rate": 8.390571059629032e-06, "loss": 0.5002, "step": 6863 }, { "epoch": 0.2849625581866019, "grad_norm": 2.0812926292419434, "learning_rate": 8.390076906811287e-06, "loss": 0.5478, "step": 6864 }, { "epoch": 0.2850040737108132, "grad_norm": 2.9797353744506836, "learning_rate": 8.389582692699976e-06, "loss": 0.4814, "step": 6865 }, { "epoch": 0.2850455892350246, "grad_norm": 2.8852896690368652, "learning_rate": 8.38908841730403e-06, "loss": 0.5984, "step": 6866 }, { "epoch": 0.28508710475923593, "grad_norm": 2.2991530895233154, "learning_rate": 8.38859408063239e-06, "loss": 0.5971, "step": 6867 }, { "epoch": 0.28512862028344726, "grad_norm": 2.845222234725952, "learning_rate": 8.388099682693994e-06, "loss": 0.5169, "step": 6868 }, { "epoch": 0.2851701358076586, "grad_norm": 2.614502429962158, "learning_rate": 8.387605223497777e-06, "loss": 0.4525, "step": 6869 }, { "epoch": 0.28521165133186993, "grad_norm": 1.8138008117675781, "learning_rate": 8.387110703052683e-06, "loss": 0.4552, "step": 6870 }, { "epoch": 0.28525316685608126, "grad_norm": 2.317621946334839, "learning_rate": 8.38661612136765e-06, "loss": 0.3968, "step": 6871 }, { "epoch": 0.2852946823802926, "grad_norm": 2.9728827476501465, "learning_rate": 8.386121478451622e-06, "loss": 0.4209, "step": 6872 }, { "epoch": 0.28533619790450393, "grad_norm": 2.309401035308838, "learning_rate": 8.385626774313542e-06, "loss": 0.5849, "step": 6873 }, { "epoch": 0.28537771342871526, "grad_norm": 2.7604901790618896, "learning_rate": 8.385132008962354e-06, "loss": 0.5749, "step": 6874 }, { "epoch": 0.2854192289529266, "grad_norm": 2.409907579421997, "learning_rate": 8.384637182407002e-06, "loss": 0.5564, "step": 6875 }, { "epoch": 0.28546074447713793, "grad_norm": 2.5499253273010254, "learning_rate": 8.384142294656435e-06, "loss": 0.4622, "step": 6876 }, { "epoch": 0.28550226000134926, "grad_norm": 2.6540181636810303, "learning_rate": 8.383647345719602e-06, "loss": 0.5615, "step": 6877 }, { "epoch": 0.2855437755255606, "grad_norm": 2.855736017227173, "learning_rate": 8.383152335605446e-06, "loss": 0.5067, "step": 6878 }, { "epoch": 0.2855852910497719, "grad_norm": 2.5540270805358887, "learning_rate": 8.382657264322924e-06, "loss": 0.5491, "step": 6879 }, { "epoch": 0.28562680657398326, "grad_norm": 2.693699359893799, "learning_rate": 8.38216213188098e-06, "loss": 0.4995, "step": 6880 }, { "epoch": 0.2856683220981946, "grad_norm": 2.9226267337799072, "learning_rate": 8.381666938288572e-06, "loss": 0.6068, "step": 6881 }, { "epoch": 0.2857098376224059, "grad_norm": 2.089679002761841, "learning_rate": 8.381171683554653e-06, "loss": 0.5604, "step": 6882 }, { "epoch": 0.28575135314661726, "grad_norm": 2.369943380355835, "learning_rate": 8.380676367688171e-06, "loss": 0.5781, "step": 6883 }, { "epoch": 0.2857928686708286, "grad_norm": 2.3790855407714844, "learning_rate": 8.38018099069809e-06, "loss": 0.5982, "step": 6884 }, { "epoch": 0.2858343841950399, "grad_norm": 2.713578224182129, "learning_rate": 8.379685552593359e-06, "loss": 0.5177, "step": 6885 }, { "epoch": 0.28587589971925126, "grad_norm": 3.303767442703247, "learning_rate": 8.379190053382942e-06, "loss": 0.3578, "step": 6886 }, { "epoch": 0.2859174152434626, "grad_norm": 2.1071391105651855, "learning_rate": 8.378694493075794e-06, "loss": 0.4888, "step": 6887 }, { "epoch": 0.2859589307676739, "grad_norm": 2.4822256565093994, "learning_rate": 8.378198871680874e-06, "loss": 0.577, "step": 6888 }, { "epoch": 0.28600044629188526, "grad_norm": 2.8243234157562256, "learning_rate": 8.377703189207148e-06, "loss": 0.5332, "step": 6889 }, { "epoch": 0.2860419618160966, "grad_norm": 2.6205568313598633, "learning_rate": 8.37720744566357e-06, "loss": 0.4871, "step": 6890 }, { "epoch": 0.2860834773403079, "grad_norm": 2.423319101333618, "learning_rate": 8.376711641059113e-06, "loss": 0.5499, "step": 6891 }, { "epoch": 0.28612499286451926, "grad_norm": 2.208913803100586, "learning_rate": 8.376215775402734e-06, "loss": 0.5542, "step": 6892 }, { "epoch": 0.2861665083887306, "grad_norm": 2.3012161254882812, "learning_rate": 8.3757198487034e-06, "loss": 0.4122, "step": 6893 }, { "epoch": 0.2862080239129419, "grad_norm": 2.4540090560913086, "learning_rate": 8.375223860970078e-06, "loss": 0.4796, "step": 6894 }, { "epoch": 0.28624953943715326, "grad_norm": 2.417855978012085, "learning_rate": 8.374727812211738e-06, "loss": 0.4479, "step": 6895 }, { "epoch": 0.2862910549613646, "grad_norm": 2.999586582183838, "learning_rate": 8.374231702437345e-06, "loss": 0.5127, "step": 6896 }, { "epoch": 0.286332570485576, "grad_norm": 2.045952081680298, "learning_rate": 8.37373553165587e-06, "loss": 0.5235, "step": 6897 }, { "epoch": 0.2863740860097873, "grad_norm": 2.4956936836242676, "learning_rate": 8.373239299876285e-06, "loss": 0.4853, "step": 6898 }, { "epoch": 0.28641560153399864, "grad_norm": 2.8940863609313965, "learning_rate": 8.37274300710756e-06, "loss": 0.3693, "step": 6899 }, { "epoch": 0.28645711705821, "grad_norm": 2.4013314247131348, "learning_rate": 8.372246653358669e-06, "loss": 0.539, "step": 6900 }, { "epoch": 0.2864986325824213, "grad_norm": 2.318671941757202, "learning_rate": 8.371750238638588e-06, "loss": 0.4723, "step": 6901 }, { "epoch": 0.28654014810663264, "grad_norm": 2.325857400894165, "learning_rate": 8.371253762956291e-06, "loss": 0.5502, "step": 6902 }, { "epoch": 0.286581663630844, "grad_norm": 2.706420421600342, "learning_rate": 8.370757226320754e-06, "loss": 0.3595, "step": 6903 }, { "epoch": 0.2866231791550553, "grad_norm": 2.4019603729248047, "learning_rate": 8.370260628740953e-06, "loss": 0.5796, "step": 6904 }, { "epoch": 0.28666469467926664, "grad_norm": 2.300140857696533, "learning_rate": 8.369763970225871e-06, "loss": 0.6207, "step": 6905 }, { "epoch": 0.286706210203478, "grad_norm": 2.0622663497924805, "learning_rate": 8.369267250784483e-06, "loss": 0.4764, "step": 6906 }, { "epoch": 0.2867477257276893, "grad_norm": 2.6949243545532227, "learning_rate": 8.368770470425773e-06, "loss": 0.6345, "step": 6907 }, { "epoch": 0.28678924125190064, "grad_norm": 2.49664306640625, "learning_rate": 8.368273629158723e-06, "loss": 0.5466, "step": 6908 }, { "epoch": 0.286830756776112, "grad_norm": 3.1106975078582764, "learning_rate": 8.367776726992315e-06, "loss": 0.5882, "step": 6909 }, { "epoch": 0.2868722723003233, "grad_norm": 2.4890379905700684, "learning_rate": 8.36727976393553e-06, "loss": 0.5078, "step": 6910 }, { "epoch": 0.28691378782453464, "grad_norm": 2.527245283126831, "learning_rate": 8.36678273999736e-06, "loss": 0.5557, "step": 6911 }, { "epoch": 0.28695530334874597, "grad_norm": 2.647012710571289, "learning_rate": 8.366285655186785e-06, "loss": 0.4985, "step": 6912 }, { "epoch": 0.2869968188729573, "grad_norm": 2.1604154109954834, "learning_rate": 8.365788509512796e-06, "loss": 0.5505, "step": 6913 }, { "epoch": 0.28703833439716864, "grad_norm": 2.5166499614715576, "learning_rate": 8.36529130298438e-06, "loss": 0.5321, "step": 6914 }, { "epoch": 0.28707984992137997, "grad_norm": 2.5852625370025635, "learning_rate": 8.364794035610527e-06, "loss": 0.5536, "step": 6915 }, { "epoch": 0.2871213654455913, "grad_norm": 2.3948893547058105, "learning_rate": 8.36429670740023e-06, "loss": 0.4396, "step": 6916 }, { "epoch": 0.28716288096980264, "grad_norm": 2.292381525039673, "learning_rate": 8.363799318362476e-06, "loss": 0.4974, "step": 6917 }, { "epoch": 0.28720439649401397, "grad_norm": 2.4247426986694336, "learning_rate": 8.363301868506264e-06, "loss": 0.5605, "step": 6918 }, { "epoch": 0.2872459120182253, "grad_norm": 3.4759063720703125, "learning_rate": 8.362804357840581e-06, "loss": 0.5026, "step": 6919 }, { "epoch": 0.28728742754243664, "grad_norm": 2.6401164531707764, "learning_rate": 8.36230678637443e-06, "loss": 0.5374, "step": 6920 }, { "epoch": 0.28732894306664797, "grad_norm": 2.283092737197876, "learning_rate": 8.361809154116801e-06, "loss": 0.5948, "step": 6921 }, { "epoch": 0.2873704585908593, "grad_norm": 2.5771737098693848, "learning_rate": 8.361311461076694e-06, "loss": 0.4539, "step": 6922 }, { "epoch": 0.28741197411507063, "grad_norm": 2.1717689037323, "learning_rate": 8.360813707263107e-06, "loss": 0.4292, "step": 6923 }, { "epoch": 0.28745348963928197, "grad_norm": 2.695765972137451, "learning_rate": 8.36031589268504e-06, "loss": 0.5818, "step": 6924 }, { "epoch": 0.2874950051634933, "grad_norm": 2.439167022705078, "learning_rate": 8.35981801735149e-06, "loss": 0.4979, "step": 6925 }, { "epoch": 0.28753652068770463, "grad_norm": 2.2114672660827637, "learning_rate": 8.359320081271464e-06, "loss": 0.4831, "step": 6926 }, { "epoch": 0.28757803621191597, "grad_norm": 2.1829559803009033, "learning_rate": 8.358822084453964e-06, "loss": 0.4199, "step": 6927 }, { "epoch": 0.28761955173612735, "grad_norm": 2.3597629070281982, "learning_rate": 8.358324026907992e-06, "loss": 0.6433, "step": 6928 }, { "epoch": 0.2876610672603387, "grad_norm": 2.535813331604004, "learning_rate": 8.357825908642554e-06, "loss": 0.4841, "step": 6929 }, { "epoch": 0.28770258278455, "grad_norm": 2.3578104972839355, "learning_rate": 8.357327729666654e-06, "loss": 0.4271, "step": 6930 }, { "epoch": 0.28774409830876135, "grad_norm": 2.2316267490386963, "learning_rate": 8.356829489989302e-06, "loss": 0.4814, "step": 6931 }, { "epoch": 0.2877856138329727, "grad_norm": 2.7974369525909424, "learning_rate": 8.356331189619504e-06, "loss": 0.5928, "step": 6932 }, { "epoch": 0.287827129357184, "grad_norm": 2.367652654647827, "learning_rate": 8.355832828566273e-06, "loss": 0.6254, "step": 6933 }, { "epoch": 0.28786864488139535, "grad_norm": 2.38704514503479, "learning_rate": 8.355334406838616e-06, "loss": 0.514, "step": 6934 }, { "epoch": 0.2879101604056067, "grad_norm": 2.9453866481781006, "learning_rate": 8.354835924445546e-06, "loss": 0.539, "step": 6935 }, { "epoch": 0.287951675929818, "grad_norm": 2.413073778152466, "learning_rate": 8.354337381396075e-06, "loss": 0.3972, "step": 6936 }, { "epoch": 0.28799319145402935, "grad_norm": 3.1116251945495605, "learning_rate": 8.353838777699218e-06, "loss": 0.4482, "step": 6937 }, { "epoch": 0.2880347069782407, "grad_norm": 2.4331119060516357, "learning_rate": 8.353340113363987e-06, "loss": 0.5072, "step": 6938 }, { "epoch": 0.288076222502452, "grad_norm": 2.497464179992676, "learning_rate": 8.352841388399404e-06, "loss": 0.4533, "step": 6939 }, { "epoch": 0.28811773802666335, "grad_norm": 2.512296438217163, "learning_rate": 8.352342602814481e-06, "loss": 0.507, "step": 6940 }, { "epoch": 0.2881592535508747, "grad_norm": 2.3237950801849365, "learning_rate": 8.351843756618236e-06, "loss": 0.5621, "step": 6941 }, { "epoch": 0.288200769075086, "grad_norm": 2.6239497661590576, "learning_rate": 8.351344849819692e-06, "loss": 0.4998, "step": 6942 }, { "epoch": 0.28824228459929735, "grad_norm": 2.036128282546997, "learning_rate": 8.350845882427865e-06, "loss": 0.4816, "step": 6943 }, { "epoch": 0.2882838001235087, "grad_norm": 2.7067112922668457, "learning_rate": 8.350346854451778e-06, "loss": 0.7194, "step": 6944 }, { "epoch": 0.28832531564772, "grad_norm": 2.6467409133911133, "learning_rate": 8.349847765900457e-06, "loss": 0.6555, "step": 6945 }, { "epoch": 0.28836683117193135, "grad_norm": 2.1170918941497803, "learning_rate": 8.349348616782922e-06, "loss": 0.4481, "step": 6946 }, { "epoch": 0.2884083466961427, "grad_norm": 2.484750986099243, "learning_rate": 8.348849407108196e-06, "loss": 0.6644, "step": 6947 }, { "epoch": 0.288449862220354, "grad_norm": 2.0201199054718018, "learning_rate": 8.348350136885311e-06, "loss": 0.473, "step": 6948 }, { "epoch": 0.28849137774456535, "grad_norm": 2.9052462577819824, "learning_rate": 8.347850806123289e-06, "loss": 0.4932, "step": 6949 }, { "epoch": 0.2885328932687767, "grad_norm": 2.474576234817505, "learning_rate": 8.34735141483116e-06, "loss": 0.6454, "step": 6950 }, { "epoch": 0.288574408792988, "grad_norm": 3.2215209007263184, "learning_rate": 8.346851963017952e-06, "loss": 0.4816, "step": 6951 }, { "epoch": 0.28861592431719935, "grad_norm": 2.4771223068237305, "learning_rate": 8.346352450692695e-06, "loss": 0.4879, "step": 6952 }, { "epoch": 0.2886574398414107, "grad_norm": 3.3827874660491943, "learning_rate": 8.345852877864423e-06, "loss": 0.5961, "step": 6953 }, { "epoch": 0.288698955365622, "grad_norm": 2.4819698333740234, "learning_rate": 8.345353244542165e-06, "loss": 0.338, "step": 6954 }, { "epoch": 0.28874047088983334, "grad_norm": 2.412461757659912, "learning_rate": 8.344853550734958e-06, "loss": 0.6458, "step": 6955 }, { "epoch": 0.2887819864140447, "grad_norm": 2.577860116958618, "learning_rate": 8.344353796451832e-06, "loss": 0.6515, "step": 6956 }, { "epoch": 0.288823501938256, "grad_norm": 2.5137863159179688, "learning_rate": 8.343853981701827e-06, "loss": 0.6018, "step": 6957 }, { "epoch": 0.28886501746246734, "grad_norm": 2.7307653427124023, "learning_rate": 8.343354106493978e-06, "loss": 0.4775, "step": 6958 }, { "epoch": 0.28890653298667873, "grad_norm": 2.9146296977996826, "learning_rate": 8.342854170837325e-06, "loss": 0.5148, "step": 6959 }, { "epoch": 0.28894804851089007, "grad_norm": 2.327772855758667, "learning_rate": 8.342354174740904e-06, "loss": 0.4983, "step": 6960 }, { "epoch": 0.2889895640351014, "grad_norm": 2.84704852104187, "learning_rate": 8.341854118213754e-06, "loss": 0.5775, "step": 6961 }, { "epoch": 0.28903107955931273, "grad_norm": 2.4110093116760254, "learning_rate": 8.34135400126492e-06, "loss": 0.5347, "step": 6962 }, { "epoch": 0.28907259508352406, "grad_norm": 2.5397427082061768, "learning_rate": 8.340853823903444e-06, "loss": 0.4014, "step": 6963 }, { "epoch": 0.2891141106077354, "grad_norm": 2.662126064300537, "learning_rate": 8.340353586138363e-06, "loss": 0.6074, "step": 6964 }, { "epoch": 0.28915562613194673, "grad_norm": 2.745164155960083, "learning_rate": 8.33985328797873e-06, "loss": 0.5551, "step": 6965 }, { "epoch": 0.28919714165615806, "grad_norm": 2.6137051582336426, "learning_rate": 8.339352929433587e-06, "loss": 0.5373, "step": 6966 }, { "epoch": 0.2892386571803694, "grad_norm": 2.2480013370513916, "learning_rate": 8.338852510511981e-06, "loss": 0.5345, "step": 6967 }, { "epoch": 0.28928017270458073, "grad_norm": 2.050924062728882, "learning_rate": 8.338352031222958e-06, "loss": 0.4779, "step": 6968 }, { "epoch": 0.28932168822879206, "grad_norm": 2.2275636196136475, "learning_rate": 8.337851491575569e-06, "loss": 0.495, "step": 6969 }, { "epoch": 0.2893632037530034, "grad_norm": 3.0040977001190186, "learning_rate": 8.337350891578861e-06, "loss": 0.5506, "step": 6970 }, { "epoch": 0.28940471927721473, "grad_norm": 2.5109970569610596, "learning_rate": 8.336850231241888e-06, "loss": 0.4646, "step": 6971 }, { "epoch": 0.28944623480142606, "grad_norm": 2.2879292964935303, "learning_rate": 8.336349510573703e-06, "loss": 0.6459, "step": 6972 }, { "epoch": 0.2894877503256374, "grad_norm": 3.1483497619628906, "learning_rate": 8.335848729583356e-06, "loss": 0.4628, "step": 6973 }, { "epoch": 0.2895292658498487, "grad_norm": 2.4452476501464844, "learning_rate": 8.335347888279901e-06, "loss": 0.5317, "step": 6974 }, { "epoch": 0.28957078137406006, "grad_norm": 2.6898293495178223, "learning_rate": 8.334846986672396e-06, "loss": 0.656, "step": 6975 }, { "epoch": 0.2896122968982714, "grad_norm": 2.521280527114868, "learning_rate": 8.334346024769896e-06, "loss": 0.7278, "step": 6976 }, { "epoch": 0.2896538124224827, "grad_norm": 2.5760183334350586, "learning_rate": 8.33384500258146e-06, "loss": 0.5133, "step": 6977 }, { "epoch": 0.28969532794669406, "grad_norm": 2.90773344039917, "learning_rate": 8.333343920116144e-06, "loss": 0.6128, "step": 6978 }, { "epoch": 0.2897368434709054, "grad_norm": 2.8143653869628906, "learning_rate": 8.33284277738301e-06, "loss": 0.5512, "step": 6979 }, { "epoch": 0.2897783589951167, "grad_norm": 2.0201079845428467, "learning_rate": 8.332341574391116e-06, "loss": 0.5319, "step": 6980 }, { "epoch": 0.28981987451932806, "grad_norm": 2.7056727409362793, "learning_rate": 8.331840311149528e-06, "loss": 0.4329, "step": 6981 }, { "epoch": 0.2898613900435394, "grad_norm": 2.122466802597046, "learning_rate": 8.331338987667306e-06, "loss": 0.4907, "step": 6982 }, { "epoch": 0.2899029055677507, "grad_norm": 2.4728498458862305, "learning_rate": 8.330837603953515e-06, "loss": 0.6051, "step": 6983 }, { "epoch": 0.28994442109196206, "grad_norm": 2.7282512187957764, "learning_rate": 8.33033616001722e-06, "loss": 0.6653, "step": 6984 }, { "epoch": 0.2899859366161734, "grad_norm": 2.402722120285034, "learning_rate": 8.329834655867488e-06, "loss": 0.5605, "step": 6985 }, { "epoch": 0.2900274521403847, "grad_norm": 2.4274375438690186, "learning_rate": 8.329333091513386e-06, "loss": 0.5355, "step": 6986 }, { "epoch": 0.29006896766459606, "grad_norm": 2.3857853412628174, "learning_rate": 8.32883146696398e-06, "loss": 0.4047, "step": 6987 }, { "epoch": 0.2901104831888074, "grad_norm": 2.8888702392578125, "learning_rate": 8.328329782228342e-06, "loss": 0.4555, "step": 6988 }, { "epoch": 0.2901519987130187, "grad_norm": 1.9589024782180786, "learning_rate": 8.327828037315544e-06, "loss": 0.54, "step": 6989 }, { "epoch": 0.2901935142372301, "grad_norm": 2.5673344135284424, "learning_rate": 8.327326232234655e-06, "loss": 0.5351, "step": 6990 }, { "epoch": 0.29023502976144144, "grad_norm": 2.1396281719207764, "learning_rate": 8.326824366994748e-06, "loss": 0.5173, "step": 6991 }, { "epoch": 0.2902765452856528, "grad_norm": 2.9352030754089355, "learning_rate": 8.326322441604898e-06, "loss": 0.4659, "step": 6992 }, { "epoch": 0.2903180608098641, "grad_norm": 2.5398306846618652, "learning_rate": 8.325820456074181e-06, "loss": 0.4836, "step": 6993 }, { "epoch": 0.29035957633407544, "grad_norm": 2.6438770294189453, "learning_rate": 8.325318410411668e-06, "loss": 0.4465, "step": 6994 }, { "epoch": 0.2904010918582868, "grad_norm": 2.4501640796661377, "learning_rate": 8.324816304626444e-06, "loss": 0.5308, "step": 6995 }, { "epoch": 0.2904426073824981, "grad_norm": 2.501056671142578, "learning_rate": 8.32431413872758e-06, "loss": 0.5211, "step": 6996 }, { "epoch": 0.29048412290670944, "grad_norm": 2.59618878364563, "learning_rate": 8.323811912724159e-06, "loss": 0.3425, "step": 6997 }, { "epoch": 0.2905256384309208, "grad_norm": 2.3266539573669434, "learning_rate": 8.32330962662526e-06, "loss": 0.5865, "step": 6998 }, { "epoch": 0.2905671539551321, "grad_norm": 2.4052858352661133, "learning_rate": 8.322807280439966e-06, "loss": 0.5181, "step": 6999 }, { "epoch": 0.29060866947934344, "grad_norm": 2.5694427490234375, "learning_rate": 8.322304874177359e-06, "loss": 0.6024, "step": 7000 }, { "epoch": 0.2906501850035548, "grad_norm": 2.8772902488708496, "learning_rate": 8.321802407846523e-06, "loss": 0.5302, "step": 7001 }, { "epoch": 0.2906917005277661, "grad_norm": 2.93522572517395, "learning_rate": 8.321299881456539e-06, "loss": 0.6923, "step": 7002 }, { "epoch": 0.29073321605197744, "grad_norm": 2.6635823249816895, "learning_rate": 8.320797295016497e-06, "loss": 0.5024, "step": 7003 }, { "epoch": 0.29077473157618877, "grad_norm": 2.5819575786590576, "learning_rate": 8.320294648535485e-06, "loss": 0.4607, "step": 7004 }, { "epoch": 0.2908162471004001, "grad_norm": 2.5895562171936035, "learning_rate": 8.319791942022586e-06, "loss": 0.504, "step": 7005 }, { "epoch": 0.29085776262461144, "grad_norm": 2.8682661056518555, "learning_rate": 8.319289175486892e-06, "loss": 0.5394, "step": 7006 }, { "epoch": 0.29089927814882277, "grad_norm": 2.7800302505493164, "learning_rate": 8.318786348937493e-06, "loss": 0.6053, "step": 7007 }, { "epoch": 0.2909407936730341, "grad_norm": 2.261772871017456, "learning_rate": 8.318283462383481e-06, "loss": 0.4724, "step": 7008 }, { "epoch": 0.29098230919724544, "grad_norm": 2.3337557315826416, "learning_rate": 8.317780515833948e-06, "loss": 0.4525, "step": 7009 }, { "epoch": 0.29102382472145677, "grad_norm": 2.8108036518096924, "learning_rate": 8.317277509297987e-06, "loss": 0.6105, "step": 7010 }, { "epoch": 0.2910653402456681, "grad_norm": 2.750114679336548, "learning_rate": 8.316774442784691e-06, "loss": 0.6186, "step": 7011 }, { "epoch": 0.29110685576987944, "grad_norm": 2.5419254302978516, "learning_rate": 8.316271316303159e-06, "loss": 0.5117, "step": 7012 }, { "epoch": 0.29114837129409077, "grad_norm": 3.2851932048797607, "learning_rate": 8.315768129862485e-06, "loss": 0.5715, "step": 7013 }, { "epoch": 0.2911898868183021, "grad_norm": 2.2413742542266846, "learning_rate": 8.315264883471767e-06, "loss": 0.4812, "step": 7014 }, { "epoch": 0.29123140234251343, "grad_norm": 2.5349531173706055, "learning_rate": 8.314761577140105e-06, "loss": 0.5204, "step": 7015 }, { "epoch": 0.29127291786672477, "grad_norm": 2.0635697841644287, "learning_rate": 8.314258210876599e-06, "loss": 0.484, "step": 7016 }, { "epoch": 0.2913144333909361, "grad_norm": 2.2360689640045166, "learning_rate": 8.313754784690347e-06, "loss": 0.4916, "step": 7017 }, { "epoch": 0.29135594891514743, "grad_norm": 2.378048896789551, "learning_rate": 8.313251298590454e-06, "loss": 0.5806, "step": 7018 }, { "epoch": 0.29139746443935877, "grad_norm": 2.7240536212921143, "learning_rate": 8.312747752586024e-06, "loss": 0.4512, "step": 7019 }, { "epoch": 0.29143897996357016, "grad_norm": 3.141158103942871, "learning_rate": 8.312244146686159e-06, "loss": 0.555, "step": 7020 }, { "epoch": 0.2914804954877815, "grad_norm": 2.140002727508545, "learning_rate": 8.311740480899966e-06, "loss": 0.522, "step": 7021 }, { "epoch": 0.2915220110119928, "grad_norm": 2.072483539581299, "learning_rate": 8.311236755236549e-06, "loss": 0.424, "step": 7022 }, { "epoch": 0.29156352653620415, "grad_norm": 2.6221446990966797, "learning_rate": 8.310732969705018e-06, "loss": 0.6034, "step": 7023 }, { "epoch": 0.2916050420604155, "grad_norm": 2.373899221420288, "learning_rate": 8.31022912431448e-06, "loss": 0.5645, "step": 7024 }, { "epoch": 0.2916465575846268, "grad_norm": 2.488999843597412, "learning_rate": 8.309725219074045e-06, "loss": 0.5865, "step": 7025 }, { "epoch": 0.29168807310883815, "grad_norm": 2.6456456184387207, "learning_rate": 8.309221253992825e-06, "loss": 0.4363, "step": 7026 }, { "epoch": 0.2917295886330495, "grad_norm": 3.1613271236419678, "learning_rate": 8.30871722907993e-06, "loss": 0.6335, "step": 7027 }, { "epoch": 0.2917711041572608, "grad_norm": 2.3195226192474365, "learning_rate": 8.308213144344475e-06, "loss": 0.4089, "step": 7028 }, { "epoch": 0.29181261968147215, "grad_norm": 2.894896984100342, "learning_rate": 8.307708999795573e-06, "loss": 0.5567, "step": 7029 }, { "epoch": 0.2918541352056835, "grad_norm": 2.2975473403930664, "learning_rate": 8.307204795442339e-06, "loss": 0.4144, "step": 7030 }, { "epoch": 0.2918956507298948, "grad_norm": 2.323941230773926, "learning_rate": 8.306700531293887e-06, "loss": 0.4768, "step": 7031 }, { "epoch": 0.29193716625410615, "grad_norm": 2.814940929412842, "learning_rate": 8.306196207359338e-06, "loss": 0.6555, "step": 7032 }, { "epoch": 0.2919786817783175, "grad_norm": 2.458866596221924, "learning_rate": 8.305691823647809e-06, "loss": 0.5436, "step": 7033 }, { "epoch": 0.2920201973025288, "grad_norm": 2.5982863903045654, "learning_rate": 8.305187380168416e-06, "loss": 0.5969, "step": 7034 }, { "epoch": 0.29206171282674015, "grad_norm": 2.5729689598083496, "learning_rate": 8.304682876930286e-06, "loss": 0.5793, "step": 7035 }, { "epoch": 0.2921032283509515, "grad_norm": 2.8236069679260254, "learning_rate": 8.304178313942536e-06, "loss": 0.522, "step": 7036 }, { "epoch": 0.2921447438751628, "grad_norm": 2.4220824241638184, "learning_rate": 8.303673691214289e-06, "loss": 0.4586, "step": 7037 }, { "epoch": 0.29218625939937415, "grad_norm": 2.240959644317627, "learning_rate": 8.30316900875467e-06, "loss": 0.5252, "step": 7038 }, { "epoch": 0.2922277749235855, "grad_norm": 2.642136812210083, "learning_rate": 8.302664266572804e-06, "loss": 0.5549, "step": 7039 }, { "epoch": 0.2922692904477968, "grad_norm": 2.3441343307495117, "learning_rate": 8.302159464677815e-06, "loss": 0.5414, "step": 7040 }, { "epoch": 0.29231080597200815, "grad_norm": 2.6061036586761475, "learning_rate": 8.301654603078832e-06, "loss": 0.4396, "step": 7041 }, { "epoch": 0.2923523214962195, "grad_norm": 2.4689290523529053, "learning_rate": 8.301149681784981e-06, "loss": 0.3807, "step": 7042 }, { "epoch": 0.2923938370204308, "grad_norm": 2.733978748321533, "learning_rate": 8.300644700805394e-06, "loss": 0.6923, "step": 7043 }, { "epoch": 0.29243535254464215, "grad_norm": 2.352343797683716, "learning_rate": 8.300139660149197e-06, "loss": 0.4762, "step": 7044 }, { "epoch": 0.2924768680688535, "grad_norm": 2.420921564102173, "learning_rate": 8.299634559825524e-06, "loss": 0.5236, "step": 7045 }, { "epoch": 0.2925183835930648, "grad_norm": 2.6395249366760254, "learning_rate": 8.299129399843508e-06, "loss": 0.569, "step": 7046 }, { "epoch": 0.29255989911727615, "grad_norm": 1.9516973495483398, "learning_rate": 8.298624180212283e-06, "loss": 0.4697, "step": 7047 }, { "epoch": 0.2926014146414875, "grad_norm": 2.1313652992248535, "learning_rate": 8.29811890094098e-06, "loss": 0.3964, "step": 7048 }, { "epoch": 0.2926429301656988, "grad_norm": 2.588358163833618, "learning_rate": 8.297613562038738e-06, "loss": 0.5612, "step": 7049 }, { "epoch": 0.29268444568991014, "grad_norm": 2.487086772918701, "learning_rate": 8.297108163514692e-06, "loss": 0.4494, "step": 7050 }, { "epoch": 0.29272596121412153, "grad_norm": 2.347301959991455, "learning_rate": 8.29660270537798e-06, "loss": 0.5304, "step": 7051 }, { "epoch": 0.29276747673833287, "grad_norm": 3.4477715492248535, "learning_rate": 8.29609718763774e-06, "loss": 0.6496, "step": 7052 }, { "epoch": 0.2928089922625442, "grad_norm": 2.2910337448120117, "learning_rate": 8.295591610303113e-06, "loss": 0.5236, "step": 7053 }, { "epoch": 0.29285050778675553, "grad_norm": 2.4442710876464844, "learning_rate": 8.295085973383241e-06, "loss": 0.554, "step": 7054 }, { "epoch": 0.29289202331096686, "grad_norm": 2.4209787845611572, "learning_rate": 8.294580276887265e-06, "loss": 0.5416, "step": 7055 }, { "epoch": 0.2929335388351782, "grad_norm": 2.2959251403808594, "learning_rate": 8.294074520824326e-06, "loss": 0.5169, "step": 7056 }, { "epoch": 0.29297505435938953, "grad_norm": 2.5636963844299316, "learning_rate": 8.293568705203572e-06, "loss": 0.4932, "step": 7057 }, { "epoch": 0.29301656988360086, "grad_norm": 2.961998224258423, "learning_rate": 8.293062830034146e-06, "loss": 0.5855, "step": 7058 }, { "epoch": 0.2930580854078122, "grad_norm": 2.0845730304718018, "learning_rate": 8.292556895325195e-06, "loss": 0.4031, "step": 7059 }, { "epoch": 0.29309960093202353, "grad_norm": 2.783513069152832, "learning_rate": 8.292050901085865e-06, "loss": 0.5227, "step": 7060 }, { "epoch": 0.29314111645623486, "grad_norm": 2.397315263748169, "learning_rate": 8.291544847325306e-06, "loss": 0.5435, "step": 7061 }, { "epoch": 0.2931826319804462, "grad_norm": 2.398953437805176, "learning_rate": 8.291038734052669e-06, "loss": 0.5893, "step": 7062 }, { "epoch": 0.29322414750465753, "grad_norm": 2.621011972427368, "learning_rate": 8.290532561277103e-06, "loss": 0.6207, "step": 7063 }, { "epoch": 0.29326566302886886, "grad_norm": 2.5890135765075684, "learning_rate": 8.29002632900776e-06, "loss": 0.5271, "step": 7064 }, { "epoch": 0.2933071785530802, "grad_norm": 3.0446295738220215, "learning_rate": 8.28952003725379e-06, "loss": 0.6333, "step": 7065 }, { "epoch": 0.2933486940772915, "grad_norm": 2.1941187381744385, "learning_rate": 8.289013686024353e-06, "loss": 0.4867, "step": 7066 }, { "epoch": 0.29339020960150286, "grad_norm": 2.41583251953125, "learning_rate": 8.2885072753286e-06, "loss": 0.5344, "step": 7067 }, { "epoch": 0.2934317251257142, "grad_norm": 2.451496124267578, "learning_rate": 8.288000805175685e-06, "loss": 0.5578, "step": 7068 }, { "epoch": 0.2934732406499255, "grad_norm": 3.5773746967315674, "learning_rate": 8.28749427557477e-06, "loss": 0.3832, "step": 7069 }, { "epoch": 0.29351475617413686, "grad_norm": 2.3026041984558105, "learning_rate": 8.286987686535011e-06, "loss": 0.436, "step": 7070 }, { "epoch": 0.2935562716983482, "grad_norm": 2.6368274688720703, "learning_rate": 8.286481038065567e-06, "loss": 0.4683, "step": 7071 }, { "epoch": 0.2935977872225595, "grad_norm": 2.2059710025787354, "learning_rate": 8.285974330175596e-06, "loss": 0.4454, "step": 7072 }, { "epoch": 0.29363930274677086, "grad_norm": 3.0031776428222656, "learning_rate": 8.285467562874265e-06, "loss": 0.5146, "step": 7073 }, { "epoch": 0.2936808182709822, "grad_norm": 2.1104507446289062, "learning_rate": 8.284960736170732e-06, "loss": 0.5268, "step": 7074 }, { "epoch": 0.2937223337951935, "grad_norm": 2.327524185180664, "learning_rate": 8.284453850074163e-06, "loss": 0.4328, "step": 7075 }, { "epoch": 0.29376384931940486, "grad_norm": 2.251319408416748, "learning_rate": 8.283946904593721e-06, "loss": 0.4828, "step": 7076 }, { "epoch": 0.2938053648436162, "grad_norm": 2.538097858428955, "learning_rate": 8.283439899738574e-06, "loss": 0.434, "step": 7077 }, { "epoch": 0.2938468803678275, "grad_norm": 2.117032527923584, "learning_rate": 8.282932835517886e-06, "loss": 0.4166, "step": 7078 }, { "epoch": 0.29388839589203886, "grad_norm": 2.6865053176879883, "learning_rate": 8.282425711940825e-06, "loss": 0.5815, "step": 7079 }, { "epoch": 0.2939299114162502, "grad_norm": 3.22998046875, "learning_rate": 8.281918529016564e-06, "loss": 0.4185, "step": 7080 }, { "epoch": 0.2939714269404615, "grad_norm": 2.3870460987091064, "learning_rate": 8.281411286754266e-06, "loss": 0.3675, "step": 7081 }, { "epoch": 0.2940129424646729, "grad_norm": 2.428373336791992, "learning_rate": 8.280903985163108e-06, "loss": 0.5197, "step": 7082 }, { "epoch": 0.29405445798888424, "grad_norm": 2.4506289958953857, "learning_rate": 8.280396624252262e-06, "loss": 0.5657, "step": 7083 }, { "epoch": 0.2940959735130956, "grad_norm": 2.3415064811706543, "learning_rate": 8.2798892040309e-06, "loss": 0.488, "step": 7084 }, { "epoch": 0.2941374890373069, "grad_norm": 2.3987131118774414, "learning_rate": 8.279381724508194e-06, "loss": 0.5356, "step": 7085 }, { "epoch": 0.29417900456151824, "grad_norm": 2.1166021823883057, "learning_rate": 8.27887418569332e-06, "loss": 0.4044, "step": 7086 }, { "epoch": 0.2942205200857296, "grad_norm": 2.4548022747039795, "learning_rate": 8.278366587595457e-06, "loss": 0.4429, "step": 7087 }, { "epoch": 0.2942620356099409, "grad_norm": 2.4714603424072266, "learning_rate": 8.27785893022378e-06, "loss": 0.6776, "step": 7088 }, { "epoch": 0.29430355113415224, "grad_norm": 2.5464162826538086, "learning_rate": 8.27735121358747e-06, "loss": 0.4868, "step": 7089 }, { "epoch": 0.2943450666583636, "grad_norm": 2.3056893348693848, "learning_rate": 8.276843437695705e-06, "loss": 0.4618, "step": 7090 }, { "epoch": 0.2943865821825749, "grad_norm": 2.461751699447632, "learning_rate": 8.276335602557668e-06, "loss": 0.5964, "step": 7091 }, { "epoch": 0.29442809770678624, "grad_norm": 2.394756555557251, "learning_rate": 8.275827708182536e-06, "loss": 0.5216, "step": 7092 }, { "epoch": 0.2944696132309976, "grad_norm": 2.8169052600860596, "learning_rate": 8.275319754579496e-06, "loss": 0.4845, "step": 7093 }, { "epoch": 0.2945111287552089, "grad_norm": 2.6428215503692627, "learning_rate": 8.27481174175773e-06, "loss": 0.5649, "step": 7094 }, { "epoch": 0.29455264427942024, "grad_norm": 1.950069546699524, "learning_rate": 8.274303669726427e-06, "loss": 0.4348, "step": 7095 }, { "epoch": 0.2945941598036316, "grad_norm": 2.7213470935821533, "learning_rate": 8.273795538494767e-06, "loss": 0.4461, "step": 7096 }, { "epoch": 0.2946356753278429, "grad_norm": 2.2204489707946777, "learning_rate": 8.27328734807194e-06, "loss": 0.4548, "step": 7097 }, { "epoch": 0.29467719085205424, "grad_norm": 2.4411375522613525, "learning_rate": 8.272779098467137e-06, "loss": 0.5589, "step": 7098 }, { "epoch": 0.29471870637626557, "grad_norm": 2.250279426574707, "learning_rate": 8.27227078968954e-06, "loss": 0.4985, "step": 7099 }, { "epoch": 0.2947602219004769, "grad_norm": 2.2057254314422607, "learning_rate": 8.271762421748349e-06, "loss": 0.5133, "step": 7100 }, { "epoch": 0.29480173742468824, "grad_norm": 2.316554307937622, "learning_rate": 8.271253994652746e-06, "loss": 0.4786, "step": 7101 }, { "epoch": 0.29484325294889957, "grad_norm": 2.77876353263855, "learning_rate": 8.27074550841193e-06, "loss": 0.5629, "step": 7102 }, { "epoch": 0.2948847684731109, "grad_norm": 2.4085402488708496, "learning_rate": 8.270236963035091e-06, "loss": 0.505, "step": 7103 }, { "epoch": 0.29492628399732224, "grad_norm": 2.7131760120391846, "learning_rate": 8.269728358531428e-06, "loss": 0.6113, "step": 7104 }, { "epoch": 0.29496779952153357, "grad_norm": 2.188077688217163, "learning_rate": 8.269219694910132e-06, "loss": 0.5775, "step": 7105 }, { "epoch": 0.2950093150457449, "grad_norm": 3.412623643875122, "learning_rate": 8.268710972180403e-06, "loss": 0.6456, "step": 7106 }, { "epoch": 0.29505083056995624, "grad_norm": 2.780330181121826, "learning_rate": 8.268202190351436e-06, "loss": 0.5005, "step": 7107 }, { "epoch": 0.29509234609416757, "grad_norm": 2.7638912200927734, "learning_rate": 8.267693349432432e-06, "loss": 0.577, "step": 7108 }, { "epoch": 0.2951338616183789, "grad_norm": 2.1853690147399902, "learning_rate": 8.26718444943259e-06, "loss": 0.5341, "step": 7109 }, { "epoch": 0.29517537714259023, "grad_norm": 2.311631679534912, "learning_rate": 8.266675490361112e-06, "loss": 0.4035, "step": 7110 }, { "epoch": 0.29521689266680157, "grad_norm": 2.199733018875122, "learning_rate": 8.266166472227198e-06, "loss": 0.3769, "step": 7111 }, { "epoch": 0.2952584081910129, "grad_norm": 2.601274013519287, "learning_rate": 8.265657395040055e-06, "loss": 0.4828, "step": 7112 }, { "epoch": 0.2952999237152243, "grad_norm": 2.632697343826294, "learning_rate": 8.265148258808884e-06, "loss": 0.6413, "step": 7113 }, { "epoch": 0.2953414392394356, "grad_norm": 2.224712610244751, "learning_rate": 8.26463906354289e-06, "loss": 0.4382, "step": 7114 }, { "epoch": 0.29538295476364695, "grad_norm": 2.3533518314361572, "learning_rate": 8.264129809251282e-06, "loss": 0.5605, "step": 7115 }, { "epoch": 0.2954244702878583, "grad_norm": 1.985255241394043, "learning_rate": 8.263620495943267e-06, "loss": 0.4345, "step": 7116 }, { "epoch": 0.2954659858120696, "grad_norm": 2.8229711055755615, "learning_rate": 8.263111123628052e-06, "loss": 0.4824, "step": 7117 }, { "epoch": 0.29550750133628095, "grad_norm": 2.2660093307495117, "learning_rate": 8.262601692314848e-06, "loss": 0.4525, "step": 7118 }, { "epoch": 0.2955490168604923, "grad_norm": 2.5797574520111084, "learning_rate": 8.262092202012865e-06, "loss": 0.4666, "step": 7119 }, { "epoch": 0.2955905323847036, "grad_norm": 2.8494653701782227, "learning_rate": 8.261582652731312e-06, "loss": 0.6338, "step": 7120 }, { "epoch": 0.29563204790891495, "grad_norm": 2.846363067626953, "learning_rate": 8.261073044479408e-06, "loss": 0.6823, "step": 7121 }, { "epoch": 0.2956735634331263, "grad_norm": 2.7706711292266846, "learning_rate": 8.260563377266363e-06, "loss": 0.567, "step": 7122 }, { "epoch": 0.2957150789573376, "grad_norm": 2.983074903488159, "learning_rate": 8.260053651101391e-06, "loss": 0.4756, "step": 7123 }, { "epoch": 0.29575659448154895, "grad_norm": 2.544046640396118, "learning_rate": 8.25954386599371e-06, "loss": 0.6137, "step": 7124 }, { "epoch": 0.2957981100057603, "grad_norm": 3.1873972415924072, "learning_rate": 8.259034021952537e-06, "loss": 0.5622, "step": 7125 }, { "epoch": 0.2958396255299716, "grad_norm": 2.3136000633239746, "learning_rate": 8.258524118987088e-06, "loss": 0.4885, "step": 7126 }, { "epoch": 0.29588114105418295, "grad_norm": 2.2511985301971436, "learning_rate": 8.258014157106585e-06, "loss": 0.5335, "step": 7127 }, { "epoch": 0.2959226565783943, "grad_norm": 2.506361484527588, "learning_rate": 8.257504136320247e-06, "loss": 0.5593, "step": 7128 }, { "epoch": 0.2959641721026056, "grad_norm": 2.57293438911438, "learning_rate": 8.256994056637293e-06, "loss": 0.4661, "step": 7129 }, { "epoch": 0.29600568762681695, "grad_norm": 2.3727328777313232, "learning_rate": 8.256483918066951e-06, "loss": 0.4688, "step": 7130 }, { "epoch": 0.2960472031510283, "grad_norm": 2.7484443187713623, "learning_rate": 8.255973720618438e-06, "loss": 0.4933, "step": 7131 }, { "epoch": 0.2960887186752396, "grad_norm": 3.088775634765625, "learning_rate": 8.255463464300984e-06, "loss": 0.542, "step": 7132 }, { "epoch": 0.29613023419945095, "grad_norm": 2.3237411975860596, "learning_rate": 8.254953149123812e-06, "loss": 0.4745, "step": 7133 }, { "epoch": 0.2961717497236623, "grad_norm": 1.9912837743759155, "learning_rate": 8.254442775096148e-06, "loss": 0.4381, "step": 7134 }, { "epoch": 0.2962132652478736, "grad_norm": 2.8574609756469727, "learning_rate": 8.253932342227221e-06, "loss": 0.4218, "step": 7135 }, { "epoch": 0.29625478077208495, "grad_norm": 2.2462685108184814, "learning_rate": 8.25342185052626e-06, "loss": 0.4173, "step": 7136 }, { "epoch": 0.2962962962962963, "grad_norm": 2.470506429672241, "learning_rate": 8.252911300002494e-06, "loss": 0.6207, "step": 7137 }, { "epoch": 0.2963378118205076, "grad_norm": 2.7451961040496826, "learning_rate": 8.252400690665154e-06, "loss": 0.5856, "step": 7138 }, { "epoch": 0.29637932734471895, "grad_norm": 2.2587289810180664, "learning_rate": 8.25189002252347e-06, "loss": 0.5884, "step": 7139 }, { "epoch": 0.2964208428689303, "grad_norm": 2.4883170127868652, "learning_rate": 8.251379295586678e-06, "loss": 0.4451, "step": 7140 }, { "epoch": 0.2964623583931416, "grad_norm": 2.3180363178253174, "learning_rate": 8.250868509864013e-06, "loss": 0.5656, "step": 7141 }, { "epoch": 0.29650387391735294, "grad_norm": 3.0286073684692383, "learning_rate": 8.250357665364708e-06, "loss": 0.4895, "step": 7142 }, { "epoch": 0.2965453894415643, "grad_norm": 1.8732178211212158, "learning_rate": 8.249846762098e-06, "loss": 0.3953, "step": 7143 }, { "epoch": 0.29658690496577567, "grad_norm": 2.1057260036468506, "learning_rate": 8.249335800073124e-06, "loss": 0.5509, "step": 7144 }, { "epoch": 0.296628420489987, "grad_norm": 2.304729700088501, "learning_rate": 8.248824779299321e-06, "loss": 0.5166, "step": 7145 }, { "epoch": 0.29666993601419833, "grad_norm": 2.9717142581939697, "learning_rate": 8.24831369978583e-06, "loss": 0.6179, "step": 7146 }, { "epoch": 0.29671145153840967, "grad_norm": 2.08742094039917, "learning_rate": 8.24780256154189e-06, "loss": 0.415, "step": 7147 }, { "epoch": 0.296752967062621, "grad_norm": 2.8010001182556152, "learning_rate": 8.247291364576743e-06, "loss": 0.5016, "step": 7148 }, { "epoch": 0.29679448258683233, "grad_norm": 2.8042900562286377, "learning_rate": 8.246780108899635e-06, "loss": 0.5031, "step": 7149 }, { "epoch": 0.29683599811104366, "grad_norm": 2.604055166244507, "learning_rate": 8.246268794519805e-06, "loss": 0.5197, "step": 7150 }, { "epoch": 0.296877513635255, "grad_norm": 2.5590920448303223, "learning_rate": 8.2457574214465e-06, "loss": 0.5401, "step": 7151 }, { "epoch": 0.29691902915946633, "grad_norm": 2.129335403442383, "learning_rate": 8.245245989688964e-06, "loss": 0.4494, "step": 7152 }, { "epoch": 0.29696054468367766, "grad_norm": 2.3854825496673584, "learning_rate": 8.244734499256446e-06, "loss": 0.4987, "step": 7153 }, { "epoch": 0.297002060207889, "grad_norm": 2.4603617191314697, "learning_rate": 8.244222950158194e-06, "loss": 0.4126, "step": 7154 }, { "epoch": 0.29704357573210033, "grad_norm": 2.589444875717163, "learning_rate": 8.243711342403454e-06, "loss": 0.4487, "step": 7155 }, { "epoch": 0.29708509125631166, "grad_norm": 3.3375837802886963, "learning_rate": 8.243199676001479e-06, "loss": 0.5519, "step": 7156 }, { "epoch": 0.297126606780523, "grad_norm": 2.2177281379699707, "learning_rate": 8.24268795096152e-06, "loss": 0.5521, "step": 7157 }, { "epoch": 0.29716812230473433, "grad_norm": 2.512324094772339, "learning_rate": 8.242176167292827e-06, "loss": 0.5984, "step": 7158 }, { "epoch": 0.29720963782894566, "grad_norm": 2.2439353466033936, "learning_rate": 8.241664325004654e-06, "loss": 0.4669, "step": 7159 }, { "epoch": 0.297251153353157, "grad_norm": 2.7491819858551025, "learning_rate": 8.241152424106256e-06, "loss": 0.4825, "step": 7160 }, { "epoch": 0.2972926688773683, "grad_norm": 2.9130470752716064, "learning_rate": 8.240640464606889e-06, "loss": 0.4527, "step": 7161 }, { "epoch": 0.29733418440157966, "grad_norm": 2.1385273933410645, "learning_rate": 8.240128446515808e-06, "loss": 0.4441, "step": 7162 }, { "epoch": 0.297375699925791, "grad_norm": 2.382641553878784, "learning_rate": 8.239616369842271e-06, "loss": 0.592, "step": 7163 }, { "epoch": 0.2974172154500023, "grad_norm": 2.4626927375793457, "learning_rate": 8.239104234595536e-06, "loss": 0.5634, "step": 7164 }, { "epoch": 0.29745873097421366, "grad_norm": 2.594712018966675, "learning_rate": 8.238592040784863e-06, "loss": 0.7099, "step": 7165 }, { "epoch": 0.297500246498425, "grad_norm": 2.4384942054748535, "learning_rate": 8.23807978841951e-06, "loss": 0.4436, "step": 7166 }, { "epoch": 0.2975417620226363, "grad_norm": 2.447751522064209, "learning_rate": 8.237567477508744e-06, "loss": 0.485, "step": 7167 }, { "epoch": 0.29758327754684766, "grad_norm": 2.163198232650757, "learning_rate": 8.237055108061824e-06, "loss": 0.4871, "step": 7168 }, { "epoch": 0.297624793071059, "grad_norm": 2.5914738178253174, "learning_rate": 8.236542680088015e-06, "loss": 0.5939, "step": 7169 }, { "epoch": 0.2976663085952703, "grad_norm": 2.8137335777282715, "learning_rate": 8.23603019359658e-06, "loss": 0.527, "step": 7170 }, { "epoch": 0.29770782411948166, "grad_norm": 3.2071545124053955, "learning_rate": 8.235517648596788e-06, "loss": 0.4641, "step": 7171 }, { "epoch": 0.297749339643693, "grad_norm": 2.9505839347839355, "learning_rate": 8.235005045097903e-06, "loss": 0.484, "step": 7172 }, { "epoch": 0.2977908551679043, "grad_norm": 3.1357038021087646, "learning_rate": 8.234492383109196e-06, "loss": 0.6464, "step": 7173 }, { "epoch": 0.29783237069211566, "grad_norm": 2.056373119354248, "learning_rate": 8.233979662639933e-06, "loss": 0.4, "step": 7174 }, { "epoch": 0.29787388621632704, "grad_norm": 2.235403299331665, "learning_rate": 8.233466883699388e-06, "loss": 0.5146, "step": 7175 }, { "epoch": 0.2979154017405384, "grad_norm": 2.2364790439605713, "learning_rate": 8.232954046296827e-06, "loss": 0.5795, "step": 7176 }, { "epoch": 0.2979569172647497, "grad_norm": 2.3434884548187256, "learning_rate": 8.232441150441527e-06, "loss": 0.5968, "step": 7177 }, { "epoch": 0.29799843278896104, "grad_norm": 2.483980178833008, "learning_rate": 8.231928196142757e-06, "loss": 0.5999, "step": 7178 }, { "epoch": 0.2980399483131724, "grad_norm": 2.1908397674560547, "learning_rate": 8.231415183409794e-06, "loss": 0.5553, "step": 7179 }, { "epoch": 0.2980814638373837, "grad_norm": 2.7992241382598877, "learning_rate": 8.230902112251915e-06, "loss": 0.563, "step": 7180 }, { "epoch": 0.29812297936159504, "grad_norm": 2.3993144035339355, "learning_rate": 8.230388982678393e-06, "loss": 0.5693, "step": 7181 }, { "epoch": 0.2981644948858064, "grad_norm": 2.1564698219299316, "learning_rate": 8.229875794698507e-06, "loss": 0.495, "step": 7182 }, { "epoch": 0.2982060104100177, "grad_norm": 2.203547477722168, "learning_rate": 8.229362548321536e-06, "loss": 0.5383, "step": 7183 }, { "epoch": 0.29824752593422904, "grad_norm": 2.4780170917510986, "learning_rate": 8.22884924355676e-06, "loss": 0.6303, "step": 7184 }, { "epoch": 0.2982890414584404, "grad_norm": 2.582543134689331, "learning_rate": 8.228335880413458e-06, "loss": 0.5232, "step": 7185 }, { "epoch": 0.2983305569826517, "grad_norm": 2.6443710327148438, "learning_rate": 8.227822458900915e-06, "loss": 0.5854, "step": 7186 }, { "epoch": 0.29837207250686304, "grad_norm": 2.570998430252075, "learning_rate": 8.22730897902841e-06, "loss": 0.4874, "step": 7187 }, { "epoch": 0.2984135880310744, "grad_norm": 3.0601441860198975, "learning_rate": 8.22679544080523e-06, "loss": 0.5836, "step": 7188 }, { "epoch": 0.2984551035552857, "grad_norm": 2.220517635345459, "learning_rate": 8.226281844240655e-06, "loss": 0.3943, "step": 7189 }, { "epoch": 0.29849661907949704, "grad_norm": 2.8463354110717773, "learning_rate": 8.225768189343977e-06, "loss": 0.5856, "step": 7190 }, { "epoch": 0.29853813460370837, "grad_norm": 2.725620746612549, "learning_rate": 8.225254476124479e-06, "loss": 0.564, "step": 7191 }, { "epoch": 0.2985796501279197, "grad_norm": 2.2832040786743164, "learning_rate": 8.22474070459145e-06, "loss": 0.6326, "step": 7192 }, { "epoch": 0.29862116565213104, "grad_norm": 2.22135853767395, "learning_rate": 8.224226874754182e-06, "loss": 0.5429, "step": 7193 }, { "epoch": 0.29866268117634237, "grad_norm": 2.635552406311035, "learning_rate": 8.22371298662196e-06, "loss": 0.5306, "step": 7194 }, { "epoch": 0.2987041967005537, "grad_norm": 2.6897056102752686, "learning_rate": 8.22319904020408e-06, "loss": 0.6581, "step": 7195 }, { "epoch": 0.29874571222476504, "grad_norm": 2.0013692378997803, "learning_rate": 8.222685035509833e-06, "loss": 0.4331, "step": 7196 }, { "epoch": 0.29878722774897637, "grad_norm": 2.18953800201416, "learning_rate": 8.222170972548509e-06, "loss": 0.4382, "step": 7197 }, { "epoch": 0.2988287432731877, "grad_norm": 2.2429347038269043, "learning_rate": 8.221656851329406e-06, "loss": 0.4417, "step": 7198 }, { "epoch": 0.29887025879739904, "grad_norm": 2.4812917709350586, "learning_rate": 8.221142671861819e-06, "loss": 0.4471, "step": 7199 }, { "epoch": 0.29891177432161037, "grad_norm": 2.91998028755188, "learning_rate": 8.220628434155045e-06, "loss": 0.5604, "step": 7200 }, { "epoch": 0.2989532898458217, "grad_norm": 2.2946934700012207, "learning_rate": 8.22011413821838e-06, "loss": 0.4827, "step": 7201 }, { "epoch": 0.29899480537003303, "grad_norm": 2.5381264686584473, "learning_rate": 8.219599784061124e-06, "loss": 0.5303, "step": 7202 }, { "epoch": 0.29903632089424437, "grad_norm": 3.1356794834136963, "learning_rate": 8.219085371692573e-06, "loss": 0.59, "step": 7203 }, { "epoch": 0.2990778364184557, "grad_norm": 2.544955015182495, "learning_rate": 8.218570901122033e-06, "loss": 0.5032, "step": 7204 }, { "epoch": 0.2991193519426671, "grad_norm": 2.3387341499328613, "learning_rate": 8.218056372358802e-06, "loss": 0.4512, "step": 7205 }, { "epoch": 0.2991608674668784, "grad_norm": 2.156705141067505, "learning_rate": 8.217541785412186e-06, "loss": 0.5468, "step": 7206 }, { "epoch": 0.29920238299108975, "grad_norm": 2.3188178539276123, "learning_rate": 8.217027140291485e-06, "loss": 0.4612, "step": 7207 }, { "epoch": 0.2992438985153011, "grad_norm": 2.606732130050659, "learning_rate": 8.216512437006008e-06, "loss": 0.5417, "step": 7208 }, { "epoch": 0.2992854140395124, "grad_norm": 2.3409616947174072, "learning_rate": 8.21599767556506e-06, "loss": 0.5414, "step": 7209 }, { "epoch": 0.29932692956372375, "grad_norm": 2.4519646167755127, "learning_rate": 8.215482855977944e-06, "loss": 0.3892, "step": 7210 }, { "epoch": 0.2993684450879351, "grad_norm": 2.0322070121765137, "learning_rate": 8.214967978253973e-06, "loss": 0.3953, "step": 7211 }, { "epoch": 0.2994099606121464, "grad_norm": 2.401379108428955, "learning_rate": 8.214453042402455e-06, "loss": 0.6258, "step": 7212 }, { "epoch": 0.29945147613635775, "grad_norm": 2.2736246585845947, "learning_rate": 8.213938048432697e-06, "loss": 0.58, "step": 7213 }, { "epoch": 0.2994929916605691, "grad_norm": 2.4236867427825928, "learning_rate": 8.213422996354014e-06, "loss": 0.5625, "step": 7214 }, { "epoch": 0.2995345071847804, "grad_norm": 2.899364948272705, "learning_rate": 8.212907886175718e-06, "loss": 0.4578, "step": 7215 }, { "epoch": 0.29957602270899175, "grad_norm": 2.8056528568267822, "learning_rate": 8.21239271790712e-06, "loss": 0.394, "step": 7216 }, { "epoch": 0.2996175382332031, "grad_norm": 2.7867469787597656, "learning_rate": 8.211877491557536e-06, "loss": 0.6606, "step": 7217 }, { "epoch": 0.2996590537574144, "grad_norm": 2.6575684547424316, "learning_rate": 8.211362207136281e-06, "loss": 0.4662, "step": 7218 }, { "epoch": 0.29970056928162575, "grad_norm": 2.538940906524658, "learning_rate": 8.210846864652673e-06, "loss": 0.5393, "step": 7219 }, { "epoch": 0.2997420848058371, "grad_norm": 3.845402479171753, "learning_rate": 8.210331464116026e-06, "loss": 0.4629, "step": 7220 }, { "epoch": 0.2997836003300484, "grad_norm": 2.422987461090088, "learning_rate": 8.209816005535665e-06, "loss": 0.4753, "step": 7221 }, { "epoch": 0.29982511585425975, "grad_norm": 2.9710237979888916, "learning_rate": 8.209300488920901e-06, "loss": 0.4913, "step": 7222 }, { "epoch": 0.2998666313784711, "grad_norm": 2.767442226409912, "learning_rate": 8.208784914281061e-06, "loss": 0.6764, "step": 7223 }, { "epoch": 0.2999081469026824, "grad_norm": 2.2946503162384033, "learning_rate": 8.208269281625466e-06, "loss": 0.5304, "step": 7224 }, { "epoch": 0.29994966242689375, "grad_norm": 2.165656566619873, "learning_rate": 8.207753590963435e-06, "loss": 0.5361, "step": 7225 }, { "epoch": 0.2999911779511051, "grad_norm": 2.4132704734802246, "learning_rate": 8.207237842304297e-06, "loss": 0.496, "step": 7226 }, { "epoch": 0.3000326934753164, "grad_norm": 2.4692764282226562, "learning_rate": 8.206722035657372e-06, "loss": 0.4494, "step": 7227 }, { "epoch": 0.30007420899952775, "grad_norm": 2.1759700775146484, "learning_rate": 8.206206171031989e-06, "loss": 0.3544, "step": 7228 }, { "epoch": 0.3001157245237391, "grad_norm": 2.6154978275299072, "learning_rate": 8.205690248437475e-06, "loss": 0.6519, "step": 7229 }, { "epoch": 0.3001572400479504, "grad_norm": 2.5722436904907227, "learning_rate": 8.205174267883158e-06, "loss": 0.6028, "step": 7230 }, { "epoch": 0.30019875557216175, "grad_norm": 2.5319533348083496, "learning_rate": 8.204658229378367e-06, "loss": 0.5598, "step": 7231 }, { "epoch": 0.3002402710963731, "grad_norm": 2.8541417121887207, "learning_rate": 8.204142132932429e-06, "loss": 0.5131, "step": 7232 }, { "epoch": 0.3002817866205844, "grad_norm": 2.8022847175598145, "learning_rate": 8.203625978554678e-06, "loss": 0.5897, "step": 7233 }, { "epoch": 0.30032330214479575, "grad_norm": 2.922985792160034, "learning_rate": 8.203109766254447e-06, "loss": 0.4992, "step": 7234 }, { "epoch": 0.3003648176690071, "grad_norm": 2.6499061584472656, "learning_rate": 8.202593496041067e-06, "loss": 0.4424, "step": 7235 }, { "epoch": 0.30040633319321847, "grad_norm": 2.1962575912475586, "learning_rate": 8.202077167923875e-06, "loss": 0.5831, "step": 7236 }, { "epoch": 0.3004478487174298, "grad_norm": 2.606001377105713, "learning_rate": 8.201560781912204e-06, "loss": 0.4693, "step": 7237 }, { "epoch": 0.30048936424164113, "grad_norm": 2.34090256690979, "learning_rate": 8.201044338015391e-06, "loss": 0.5195, "step": 7238 }, { "epoch": 0.30053087976585247, "grad_norm": 2.701509952545166, "learning_rate": 8.200527836242775e-06, "loss": 0.6563, "step": 7239 }, { "epoch": 0.3005723952900638, "grad_norm": 3.0642776489257812, "learning_rate": 8.200011276603694e-06, "loss": 0.5856, "step": 7240 }, { "epoch": 0.30061391081427513, "grad_norm": 2.6348791122436523, "learning_rate": 8.199494659107484e-06, "loss": 0.5584, "step": 7241 }, { "epoch": 0.30065542633848646, "grad_norm": 2.6192219257354736, "learning_rate": 8.19897798376349e-06, "loss": 0.4536, "step": 7242 }, { "epoch": 0.3006969418626978, "grad_norm": 2.1145036220550537, "learning_rate": 8.198461250581051e-06, "loss": 0.6358, "step": 7243 }, { "epoch": 0.30073845738690913, "grad_norm": 2.2990031242370605, "learning_rate": 8.197944459569511e-06, "loss": 0.3029, "step": 7244 }, { "epoch": 0.30077997291112046, "grad_norm": 2.8058998584747314, "learning_rate": 8.197427610738215e-06, "loss": 0.5288, "step": 7245 }, { "epoch": 0.3008214884353318, "grad_norm": 2.370574951171875, "learning_rate": 8.196910704096503e-06, "loss": 0.5508, "step": 7246 }, { "epoch": 0.30086300395954313, "grad_norm": 2.595810651779175, "learning_rate": 8.196393739653728e-06, "loss": 0.5533, "step": 7247 }, { "epoch": 0.30090451948375446, "grad_norm": 2.211275815963745, "learning_rate": 8.19587671741923e-06, "loss": 0.5416, "step": 7248 }, { "epoch": 0.3009460350079658, "grad_norm": 2.6925649642944336, "learning_rate": 8.19535963740236e-06, "loss": 0.6213, "step": 7249 }, { "epoch": 0.30098755053217713, "grad_norm": 2.5717809200286865, "learning_rate": 8.194842499612468e-06, "loss": 0.5778, "step": 7250 }, { "epoch": 0.30102906605638846, "grad_norm": 2.661630153656006, "learning_rate": 8.194325304058903e-06, "loss": 0.6163, "step": 7251 }, { "epoch": 0.3010705815805998, "grad_norm": 1.9058905839920044, "learning_rate": 8.193808050751015e-06, "loss": 0.4001, "step": 7252 }, { "epoch": 0.3011120971048111, "grad_norm": 2.583137035369873, "learning_rate": 8.193290739698156e-06, "loss": 0.6383, "step": 7253 }, { "epoch": 0.30115361262902246, "grad_norm": 2.365734815597534, "learning_rate": 8.192773370909682e-06, "loss": 0.5964, "step": 7254 }, { "epoch": 0.3011951281532338, "grad_norm": 2.4242801666259766, "learning_rate": 8.192255944394944e-06, "loss": 0.7325, "step": 7255 }, { "epoch": 0.3012366436774451, "grad_norm": 2.6696557998657227, "learning_rate": 8.191738460163297e-06, "loss": 0.5132, "step": 7256 }, { "epoch": 0.30127815920165646, "grad_norm": 2.368983745574951, "learning_rate": 8.191220918224102e-06, "loss": 0.4628, "step": 7257 }, { "epoch": 0.3013196747258678, "grad_norm": 2.932892084121704, "learning_rate": 8.19070331858671e-06, "loss": 0.612, "step": 7258 }, { "epoch": 0.3013611902500791, "grad_norm": 2.9222002029418945, "learning_rate": 8.190185661260484e-06, "loss": 0.3983, "step": 7259 }, { "epoch": 0.30140270577429046, "grad_norm": 2.364767551422119, "learning_rate": 8.189667946254782e-06, "loss": 0.4642, "step": 7260 }, { "epoch": 0.3014442212985018, "grad_norm": 2.3477139472961426, "learning_rate": 8.189150173578963e-06, "loss": 0.5152, "step": 7261 }, { "epoch": 0.3014857368227131, "grad_norm": 2.126661777496338, "learning_rate": 8.18863234324239e-06, "loss": 0.4456, "step": 7262 }, { "epoch": 0.30152725234692446, "grad_norm": 2.4756410121917725, "learning_rate": 8.188114455254426e-06, "loss": 0.5321, "step": 7263 }, { "epoch": 0.3015687678711358, "grad_norm": 2.8737194538116455, "learning_rate": 8.187596509624432e-06, "loss": 0.5486, "step": 7264 }, { "epoch": 0.3016102833953471, "grad_norm": 2.516850471496582, "learning_rate": 8.187078506361776e-06, "loss": 0.4656, "step": 7265 }, { "epoch": 0.30165179891955846, "grad_norm": 2.531858205795288, "learning_rate": 8.18656044547582e-06, "loss": 0.5433, "step": 7266 }, { "epoch": 0.30169331444376984, "grad_norm": 2.2132277488708496, "learning_rate": 8.186042326975933e-06, "loss": 0.4557, "step": 7267 }, { "epoch": 0.3017348299679812, "grad_norm": 2.7070391178131104, "learning_rate": 8.185524150871486e-06, "loss": 0.4697, "step": 7268 }, { "epoch": 0.3017763454921925, "grad_norm": 2.355390787124634, "learning_rate": 8.18500591717184e-06, "loss": 0.5892, "step": 7269 }, { "epoch": 0.30181786101640384, "grad_norm": 2.09613037109375, "learning_rate": 8.184487625886372e-06, "loss": 0.5733, "step": 7270 }, { "epoch": 0.3018593765406152, "grad_norm": 2.4052252769470215, "learning_rate": 8.183969277024448e-06, "loss": 0.5704, "step": 7271 }, { "epoch": 0.3019008920648265, "grad_norm": 2.497694492340088, "learning_rate": 8.183450870595443e-06, "loss": 0.4991, "step": 7272 }, { "epoch": 0.30194240758903784, "grad_norm": 2.4085235595703125, "learning_rate": 8.182932406608727e-06, "loss": 0.4959, "step": 7273 }, { "epoch": 0.3019839231132492, "grad_norm": 2.6492433547973633, "learning_rate": 8.182413885073677e-06, "loss": 0.5258, "step": 7274 }, { "epoch": 0.3020254386374605, "grad_norm": 2.469977378845215, "learning_rate": 8.181895305999665e-06, "loss": 0.5743, "step": 7275 }, { "epoch": 0.30206695416167184, "grad_norm": 2.384317398071289, "learning_rate": 8.181376669396071e-06, "loss": 0.528, "step": 7276 }, { "epoch": 0.3021084696858832, "grad_norm": 2.2670345306396484, "learning_rate": 8.180857975272269e-06, "loss": 0.4813, "step": 7277 }, { "epoch": 0.3021499852100945, "grad_norm": 2.2859771251678467, "learning_rate": 8.180339223637637e-06, "loss": 0.3509, "step": 7278 }, { "epoch": 0.30219150073430584, "grad_norm": 2.791771173477173, "learning_rate": 8.179820414501555e-06, "loss": 0.4571, "step": 7279 }, { "epoch": 0.3022330162585172, "grad_norm": 2.560434579849243, "learning_rate": 8.179301547873407e-06, "loss": 0.5812, "step": 7280 }, { "epoch": 0.3022745317827285, "grad_norm": 2.2038748264312744, "learning_rate": 8.178782623762567e-06, "loss": 0.4946, "step": 7281 }, { "epoch": 0.30231604730693984, "grad_norm": 2.516704559326172, "learning_rate": 8.178263642178422e-06, "loss": 0.5404, "step": 7282 }, { "epoch": 0.30235756283115117, "grad_norm": 2.563932180404663, "learning_rate": 8.177744603130354e-06, "loss": 0.6109, "step": 7283 }, { "epoch": 0.3023990783553625, "grad_norm": 2.5569374561309814, "learning_rate": 8.177225506627748e-06, "loss": 0.4798, "step": 7284 }, { "epoch": 0.30244059387957384, "grad_norm": 2.592794895172119, "learning_rate": 8.17670635267999e-06, "loss": 0.4617, "step": 7285 }, { "epoch": 0.30248210940378517, "grad_norm": 1.9771323204040527, "learning_rate": 8.176187141296464e-06, "loss": 0.4361, "step": 7286 }, { "epoch": 0.3025236249279965, "grad_norm": 2.4155383110046387, "learning_rate": 8.175667872486558e-06, "loss": 0.4966, "step": 7287 }, { "epoch": 0.30256514045220784, "grad_norm": 2.6407570838928223, "learning_rate": 8.175148546259664e-06, "loss": 0.4701, "step": 7288 }, { "epoch": 0.30260665597641917, "grad_norm": 2.755004405975342, "learning_rate": 8.174629162625168e-06, "loss": 0.5294, "step": 7289 }, { "epoch": 0.3026481715006305, "grad_norm": 2.320380449295044, "learning_rate": 8.174109721592463e-06, "loss": 0.5045, "step": 7290 }, { "epoch": 0.30268968702484184, "grad_norm": 2.493131637573242, "learning_rate": 8.173590223170937e-06, "loss": 0.5848, "step": 7291 }, { "epoch": 0.30273120254905317, "grad_norm": 2.413270950317383, "learning_rate": 8.173070667369986e-06, "loss": 0.5846, "step": 7292 }, { "epoch": 0.3027727180732645, "grad_norm": 2.747293710708618, "learning_rate": 8.172551054199002e-06, "loss": 0.5685, "step": 7293 }, { "epoch": 0.30281423359747583, "grad_norm": 2.014575481414795, "learning_rate": 8.172031383667382e-06, "loss": 0.5371, "step": 7294 }, { "epoch": 0.30285574912168717, "grad_norm": 2.2202441692352295, "learning_rate": 8.17151165578452e-06, "loss": 0.4645, "step": 7295 }, { "epoch": 0.3028972646458985, "grad_norm": 2.569775104522705, "learning_rate": 8.170991870559813e-06, "loss": 0.5562, "step": 7296 }, { "epoch": 0.30293878017010983, "grad_norm": 2.9634318351745605, "learning_rate": 8.17047202800266e-06, "loss": 0.5179, "step": 7297 }, { "epoch": 0.3029802956943212, "grad_norm": 2.6504251956939697, "learning_rate": 8.169952128122458e-06, "loss": 0.5821, "step": 7298 }, { "epoch": 0.30302181121853256, "grad_norm": 2.635948657989502, "learning_rate": 8.169432170928607e-06, "loss": 0.7209, "step": 7299 }, { "epoch": 0.3030633267427439, "grad_norm": 3.2172672748565674, "learning_rate": 8.168912156430509e-06, "loss": 0.5846, "step": 7300 }, { "epoch": 0.3031048422669552, "grad_norm": 2.5243191719055176, "learning_rate": 8.168392084637566e-06, "loss": 0.4074, "step": 7301 }, { "epoch": 0.30314635779116655, "grad_norm": 2.54526424407959, "learning_rate": 8.167871955559182e-06, "loss": 0.5236, "step": 7302 }, { "epoch": 0.3031878733153779, "grad_norm": 2.4477999210357666, "learning_rate": 8.167351769204758e-06, "loss": 0.4777, "step": 7303 }, { "epoch": 0.3032293888395892, "grad_norm": 2.469888925552368, "learning_rate": 8.166831525583702e-06, "loss": 0.5929, "step": 7304 }, { "epoch": 0.30327090436380055, "grad_norm": 2.194908380508423, "learning_rate": 8.166311224705422e-06, "loss": 0.5371, "step": 7305 }, { "epoch": 0.3033124198880119, "grad_norm": 2.3557097911834717, "learning_rate": 8.16579086657932e-06, "loss": 0.443, "step": 7306 }, { "epoch": 0.3033539354122232, "grad_norm": 2.057230234146118, "learning_rate": 8.165270451214806e-06, "loss": 0.491, "step": 7307 }, { "epoch": 0.30339545093643455, "grad_norm": 2.4630446434020996, "learning_rate": 8.16474997862129e-06, "loss": 0.4957, "step": 7308 }, { "epoch": 0.3034369664606459, "grad_norm": 2.4582715034484863, "learning_rate": 8.164229448808183e-06, "loss": 0.5845, "step": 7309 }, { "epoch": 0.3034784819848572, "grad_norm": 2.6034417152404785, "learning_rate": 8.163708861784896e-06, "loss": 0.653, "step": 7310 }, { "epoch": 0.30351999750906855, "grad_norm": 2.0621867179870605, "learning_rate": 8.16318821756084e-06, "loss": 0.3522, "step": 7311 }, { "epoch": 0.3035615130332799, "grad_norm": 2.396026134490967, "learning_rate": 8.162667516145429e-06, "loss": 0.5491, "step": 7312 }, { "epoch": 0.3036030285574912, "grad_norm": 3.148097515106201, "learning_rate": 8.162146757548077e-06, "loss": 0.6629, "step": 7313 }, { "epoch": 0.30364454408170255, "grad_norm": 2.4799997806549072, "learning_rate": 8.161625941778202e-06, "loss": 0.5369, "step": 7314 }, { "epoch": 0.3036860596059139, "grad_norm": 2.4983620643615723, "learning_rate": 8.16110506884522e-06, "loss": 0.5346, "step": 7315 }, { "epoch": 0.3037275751301252, "grad_norm": 2.5064873695373535, "learning_rate": 8.160584138758544e-06, "loss": 0.3869, "step": 7316 }, { "epoch": 0.30376909065433655, "grad_norm": 2.674924850463867, "learning_rate": 8.160063151527596e-06, "loss": 0.5209, "step": 7317 }, { "epoch": 0.3038106061785479, "grad_norm": 3.1834754943847656, "learning_rate": 8.159542107161798e-06, "loss": 0.5303, "step": 7318 }, { "epoch": 0.3038521217027592, "grad_norm": 2.4979875087738037, "learning_rate": 8.159021005670565e-06, "loss": 0.5791, "step": 7319 }, { "epoch": 0.30389363722697055, "grad_norm": 2.7424118518829346, "learning_rate": 8.158499847063324e-06, "loss": 0.6389, "step": 7320 }, { "epoch": 0.3039351527511819, "grad_norm": 2.2975621223449707, "learning_rate": 8.157978631349494e-06, "loss": 0.5295, "step": 7321 }, { "epoch": 0.3039766682753932, "grad_norm": 2.0046825408935547, "learning_rate": 8.157457358538501e-06, "loss": 0.5379, "step": 7322 }, { "epoch": 0.30401818379960455, "grad_norm": 2.8617687225341797, "learning_rate": 8.156936028639768e-06, "loss": 0.6304, "step": 7323 }, { "epoch": 0.3040596993238159, "grad_norm": 2.5825321674346924, "learning_rate": 8.156414641662725e-06, "loss": 0.575, "step": 7324 }, { "epoch": 0.3041012148480272, "grad_norm": 2.126354455947876, "learning_rate": 8.155893197616792e-06, "loss": 0.359, "step": 7325 }, { "epoch": 0.30414273037223855, "grad_norm": 2.725151538848877, "learning_rate": 8.1553716965114e-06, "loss": 0.5242, "step": 7326 }, { "epoch": 0.3041842458964499, "grad_norm": 2.3124167919158936, "learning_rate": 8.154850138355978e-06, "loss": 0.4109, "step": 7327 }, { "epoch": 0.3042257614206612, "grad_norm": 2.4779574871063232, "learning_rate": 8.154328523159958e-06, "loss": 0.4766, "step": 7328 }, { "epoch": 0.3042672769448726, "grad_norm": 2.490257501602173, "learning_rate": 8.153806850932771e-06, "loss": 0.5604, "step": 7329 }, { "epoch": 0.30430879246908393, "grad_norm": 2.3106465339660645, "learning_rate": 8.153285121683843e-06, "loss": 0.5284, "step": 7330 }, { "epoch": 0.30435030799329527, "grad_norm": 2.700164318084717, "learning_rate": 8.152763335422612e-06, "loss": 0.5609, "step": 7331 }, { "epoch": 0.3043918235175066, "grad_norm": 2.1907010078430176, "learning_rate": 8.152241492158513e-06, "loss": 0.5419, "step": 7332 }, { "epoch": 0.30443333904171793, "grad_norm": 2.9867453575134277, "learning_rate": 8.15171959190098e-06, "loss": 0.5169, "step": 7333 }, { "epoch": 0.30447485456592926, "grad_norm": 2.5709681510925293, "learning_rate": 8.151197634659445e-06, "loss": 0.4662, "step": 7334 }, { "epoch": 0.3045163700901406, "grad_norm": 2.37455153465271, "learning_rate": 8.150675620443351e-06, "loss": 0.4087, "step": 7335 }, { "epoch": 0.30455788561435193, "grad_norm": 2.8153653144836426, "learning_rate": 8.150153549262132e-06, "loss": 0.4435, "step": 7336 }, { "epoch": 0.30459940113856326, "grad_norm": 2.0430307388305664, "learning_rate": 8.14963142112523e-06, "loss": 0.3892, "step": 7337 }, { "epoch": 0.3046409166627746, "grad_norm": 2.8920209407806396, "learning_rate": 8.149109236042083e-06, "loss": 0.5724, "step": 7338 }, { "epoch": 0.30468243218698593, "grad_norm": 2.420797824859619, "learning_rate": 8.148586994022136e-06, "loss": 0.4963, "step": 7339 }, { "epoch": 0.30472394771119726, "grad_norm": 2.186985731124878, "learning_rate": 8.148064695074826e-06, "loss": 0.5796, "step": 7340 }, { "epoch": 0.3047654632354086, "grad_norm": 2.0729095935821533, "learning_rate": 8.1475423392096e-06, "loss": 0.4009, "step": 7341 }, { "epoch": 0.30480697875961993, "grad_norm": 1.9859380722045898, "learning_rate": 8.1470199264359e-06, "loss": 0.3364, "step": 7342 }, { "epoch": 0.30484849428383126, "grad_norm": 2.656853437423706, "learning_rate": 8.146497456763173e-06, "loss": 0.4397, "step": 7343 }, { "epoch": 0.3048900098080426, "grad_norm": 2.0956954956054688, "learning_rate": 8.145974930200866e-06, "loss": 0.4489, "step": 7344 }, { "epoch": 0.3049315253322539, "grad_norm": 2.699098587036133, "learning_rate": 8.145452346758426e-06, "loss": 0.5441, "step": 7345 }, { "epoch": 0.30497304085646526, "grad_norm": 2.583707571029663, "learning_rate": 8.1449297064453e-06, "loss": 0.6439, "step": 7346 }, { "epoch": 0.3050145563806766, "grad_norm": 2.3750758171081543, "learning_rate": 8.144407009270939e-06, "loss": 0.5708, "step": 7347 }, { "epoch": 0.3050560719048879, "grad_norm": 2.1528172492980957, "learning_rate": 8.143884255244791e-06, "loss": 0.4283, "step": 7348 }, { "epoch": 0.30509758742909926, "grad_norm": 2.474860906600952, "learning_rate": 8.143361444376312e-06, "loss": 0.5998, "step": 7349 }, { "epoch": 0.3051391029533106, "grad_norm": 2.81121826171875, "learning_rate": 8.142838576674952e-06, "loss": 0.4724, "step": 7350 }, { "epoch": 0.3051806184775219, "grad_norm": 3.2102298736572266, "learning_rate": 8.142315652150164e-06, "loss": 0.5244, "step": 7351 }, { "epoch": 0.30522213400173326, "grad_norm": 2.4712471961975098, "learning_rate": 8.141792670811402e-06, "loss": 0.4452, "step": 7352 }, { "epoch": 0.3052636495259446, "grad_norm": 2.366868257522583, "learning_rate": 8.141269632668124e-06, "loss": 0.4643, "step": 7353 }, { "epoch": 0.3053051650501559, "grad_norm": 2.4114692211151123, "learning_rate": 8.140746537729785e-06, "loss": 0.608, "step": 7354 }, { "epoch": 0.30534668057436726, "grad_norm": 3.0966365337371826, "learning_rate": 8.140223386005845e-06, "loss": 0.5034, "step": 7355 }, { "epoch": 0.3053881960985786, "grad_norm": 2.2708468437194824, "learning_rate": 8.13970017750576e-06, "loss": 0.4676, "step": 7356 }, { "epoch": 0.3054297116227899, "grad_norm": 2.514301061630249, "learning_rate": 8.139176912238991e-06, "loss": 0.5524, "step": 7357 }, { "epoch": 0.30547122714700126, "grad_norm": 2.2752339839935303, "learning_rate": 8.138653590214997e-06, "loss": 0.4649, "step": 7358 }, { "epoch": 0.3055127426712126, "grad_norm": 2.2021939754486084, "learning_rate": 8.138130211443245e-06, "loss": 0.4965, "step": 7359 }, { "epoch": 0.305554258195424, "grad_norm": 2.1129262447357178, "learning_rate": 8.137606775933191e-06, "loss": 0.4828, "step": 7360 }, { "epoch": 0.3055957737196353, "grad_norm": 2.063161611557007, "learning_rate": 8.137083283694304e-06, "loss": 0.4508, "step": 7361 }, { "epoch": 0.30563728924384664, "grad_norm": 2.5443551540374756, "learning_rate": 8.136559734736047e-06, "loss": 0.5693, "step": 7362 }, { "epoch": 0.305678804768058, "grad_norm": 2.296268939971924, "learning_rate": 8.136036129067885e-06, "loss": 0.5136, "step": 7363 }, { "epoch": 0.3057203202922693, "grad_norm": 2.016975164413452, "learning_rate": 8.135512466699285e-06, "loss": 0.5449, "step": 7364 }, { "epoch": 0.30576183581648064, "grad_norm": 3.314577341079712, "learning_rate": 8.134988747639719e-06, "loss": 0.4553, "step": 7365 }, { "epoch": 0.305803351340692, "grad_norm": 2.111294746398926, "learning_rate": 8.13446497189865e-06, "loss": 0.3875, "step": 7366 }, { "epoch": 0.3058448668649033, "grad_norm": 2.764996290206909, "learning_rate": 8.133941139485551e-06, "loss": 0.4971, "step": 7367 }, { "epoch": 0.30588638238911464, "grad_norm": 2.176886796951294, "learning_rate": 8.133417250409894e-06, "loss": 0.6025, "step": 7368 }, { "epoch": 0.305927897913326, "grad_norm": 2.811523914337158, "learning_rate": 8.13289330468115e-06, "loss": 0.5896, "step": 7369 }, { "epoch": 0.3059694134375373, "grad_norm": 2.6175198554992676, "learning_rate": 8.132369302308792e-06, "loss": 0.4716, "step": 7370 }, { "epoch": 0.30601092896174864, "grad_norm": 2.289031505584717, "learning_rate": 8.131845243302293e-06, "loss": 0.4436, "step": 7371 }, { "epoch": 0.30605244448596, "grad_norm": 2.626417636871338, "learning_rate": 8.13132112767113e-06, "loss": 0.6038, "step": 7372 }, { "epoch": 0.3060939600101713, "grad_norm": 2.1615195274353027, "learning_rate": 8.13079695542478e-06, "loss": 0.5413, "step": 7373 }, { "epoch": 0.30613547553438264, "grad_norm": 1.7620095014572144, "learning_rate": 8.130272726572717e-06, "loss": 0.4299, "step": 7374 }, { "epoch": 0.306176991058594, "grad_norm": 2.3458409309387207, "learning_rate": 8.12974844112442e-06, "loss": 0.6018, "step": 7375 }, { "epoch": 0.3062185065828053, "grad_norm": 2.7155532836914062, "learning_rate": 8.12922409908937e-06, "loss": 0.4844, "step": 7376 }, { "epoch": 0.30626002210701664, "grad_norm": 2.391921043395996, "learning_rate": 8.128699700477048e-06, "loss": 0.5946, "step": 7377 }, { "epoch": 0.30630153763122797, "grad_norm": 3.084174394607544, "learning_rate": 8.128175245296933e-06, "loss": 0.4534, "step": 7378 }, { "epoch": 0.3063430531554393, "grad_norm": 3.3721840381622314, "learning_rate": 8.127650733558505e-06, "loss": 0.5504, "step": 7379 }, { "epoch": 0.30638456867965064, "grad_norm": 2.926908016204834, "learning_rate": 8.127126165271253e-06, "loss": 0.474, "step": 7380 }, { "epoch": 0.30642608420386197, "grad_norm": 2.3045432567596436, "learning_rate": 8.126601540444658e-06, "loss": 0.3841, "step": 7381 }, { "epoch": 0.3064675997280733, "grad_norm": 2.4810476303100586, "learning_rate": 8.126076859088206e-06, "loss": 0.5432, "step": 7382 }, { "epoch": 0.30650911525228464, "grad_norm": 2.7414321899414062, "learning_rate": 8.125552121211385e-06, "loss": 0.4, "step": 7383 }, { "epoch": 0.30655063077649597, "grad_norm": 2.307544708251953, "learning_rate": 8.125027326823678e-06, "loss": 0.4997, "step": 7384 }, { "epoch": 0.3065921463007073, "grad_norm": 2.5602657794952393, "learning_rate": 8.124502475934578e-06, "loss": 0.5047, "step": 7385 }, { "epoch": 0.30663366182491864, "grad_norm": 2.818047046661377, "learning_rate": 8.123977568553572e-06, "loss": 0.6243, "step": 7386 }, { "epoch": 0.30667517734912997, "grad_norm": 2.3583197593688965, "learning_rate": 8.12345260469015e-06, "loss": 0.5266, "step": 7387 }, { "epoch": 0.3067166928733413, "grad_norm": 2.4847829341888428, "learning_rate": 8.122927584353806e-06, "loss": 0.5578, "step": 7388 }, { "epoch": 0.30675820839755263, "grad_norm": 2.257498264312744, "learning_rate": 8.12240250755403e-06, "loss": 0.4381, "step": 7389 }, { "epoch": 0.30679972392176397, "grad_norm": 2.3126769065856934, "learning_rate": 8.121877374300318e-06, "loss": 0.4733, "step": 7390 }, { "epoch": 0.30684123944597536, "grad_norm": 2.273191452026367, "learning_rate": 8.121352184602163e-06, "loss": 0.4547, "step": 7391 }, { "epoch": 0.3068827549701867, "grad_norm": 3.0604424476623535, "learning_rate": 8.12082693846906e-06, "loss": 0.5019, "step": 7392 }, { "epoch": 0.306924270494398, "grad_norm": 2.962322950363159, "learning_rate": 8.120301635910507e-06, "loss": 0.4832, "step": 7393 }, { "epoch": 0.30696578601860935, "grad_norm": 2.147839069366455, "learning_rate": 8.119776276936002e-06, "loss": 0.4958, "step": 7394 }, { "epoch": 0.3070073015428207, "grad_norm": 3.0503525733947754, "learning_rate": 8.119250861555041e-06, "loss": 0.5174, "step": 7395 }, { "epoch": 0.307048817067032, "grad_norm": 2.2926137447357178, "learning_rate": 8.118725389777126e-06, "loss": 0.523, "step": 7396 }, { "epoch": 0.30709033259124335, "grad_norm": 2.528409242630005, "learning_rate": 8.118199861611757e-06, "loss": 0.4517, "step": 7397 }, { "epoch": 0.3071318481154547, "grad_norm": 2.1886610984802246, "learning_rate": 8.117674277068437e-06, "loss": 0.4082, "step": 7398 }, { "epoch": 0.307173363639666, "grad_norm": 2.6292965412139893, "learning_rate": 8.117148636156667e-06, "loss": 0.5481, "step": 7399 }, { "epoch": 0.30721487916387735, "grad_norm": 2.2529749870300293, "learning_rate": 8.11662293888595e-06, "loss": 0.4656, "step": 7400 }, { "epoch": 0.3072563946880887, "grad_norm": 2.4273223876953125, "learning_rate": 8.116097185265793e-06, "loss": 0.5803, "step": 7401 }, { "epoch": 0.3072979102123, "grad_norm": 2.5734174251556396, "learning_rate": 8.1155713753057e-06, "loss": 0.3507, "step": 7402 }, { "epoch": 0.30733942573651135, "grad_norm": 2.695221424102783, "learning_rate": 8.115045509015179e-06, "loss": 0.5821, "step": 7403 }, { "epoch": 0.3073809412607227, "grad_norm": 2.3515570163726807, "learning_rate": 8.114519586403737e-06, "loss": 0.478, "step": 7404 }, { "epoch": 0.307422456784934, "grad_norm": 2.7898266315460205, "learning_rate": 8.113993607480883e-06, "loss": 0.5903, "step": 7405 }, { "epoch": 0.30746397230914535, "grad_norm": 2.7614669799804688, "learning_rate": 8.113467572256128e-06, "loss": 0.6539, "step": 7406 }, { "epoch": 0.3075054878333567, "grad_norm": 2.515040874481201, "learning_rate": 8.11294148073898e-06, "loss": 0.4599, "step": 7407 }, { "epoch": 0.307547003357568, "grad_norm": 2.533730983734131, "learning_rate": 8.112415332938954e-06, "loss": 0.5294, "step": 7408 }, { "epoch": 0.30758851888177935, "grad_norm": 2.5077505111694336, "learning_rate": 8.111889128865562e-06, "loss": 0.5287, "step": 7409 }, { "epoch": 0.3076300344059907, "grad_norm": 2.6156117916107178, "learning_rate": 8.111362868528317e-06, "loss": 0.5497, "step": 7410 }, { "epoch": 0.307671549930202, "grad_norm": 2.0421299934387207, "learning_rate": 8.110836551936737e-06, "loss": 0.3588, "step": 7411 }, { "epoch": 0.30771306545441335, "grad_norm": 2.4790492057800293, "learning_rate": 8.110310179100331e-06, "loss": 0.447, "step": 7412 }, { "epoch": 0.3077545809786247, "grad_norm": 2.4633705615997314, "learning_rate": 8.109783750028623e-06, "loss": 0.4485, "step": 7413 }, { "epoch": 0.307796096502836, "grad_norm": 3.190757989883423, "learning_rate": 8.10925726473113e-06, "loss": 0.5214, "step": 7414 }, { "epoch": 0.30783761202704735, "grad_norm": 2.6981542110443115, "learning_rate": 8.108730723217367e-06, "loss": 0.6098, "step": 7415 }, { "epoch": 0.3078791275512587, "grad_norm": 2.0225870609283447, "learning_rate": 8.108204125496856e-06, "loss": 0.4052, "step": 7416 }, { "epoch": 0.30792064307547, "grad_norm": 3.641774892807007, "learning_rate": 8.107677471579121e-06, "loss": 0.6144, "step": 7417 }, { "epoch": 0.30796215859968135, "grad_norm": 2.617931604385376, "learning_rate": 8.10715076147368e-06, "loss": 0.4622, "step": 7418 }, { "epoch": 0.3080036741238927, "grad_norm": 3.1223549842834473, "learning_rate": 8.106623995190058e-06, "loss": 0.4035, "step": 7419 }, { "epoch": 0.308045189648104, "grad_norm": 2.6992251873016357, "learning_rate": 8.106097172737779e-06, "loss": 0.536, "step": 7420 }, { "epoch": 0.3080867051723154, "grad_norm": 2.659620523452759, "learning_rate": 8.105570294126369e-06, "loss": 0.4297, "step": 7421 }, { "epoch": 0.30812822069652673, "grad_norm": 2.339244842529297, "learning_rate": 8.10504335936535e-06, "loss": 0.5303, "step": 7422 }, { "epoch": 0.30816973622073807, "grad_norm": 2.669238328933716, "learning_rate": 8.104516368464254e-06, "loss": 0.5572, "step": 7423 }, { "epoch": 0.3082112517449494, "grad_norm": 2.5106184482574463, "learning_rate": 8.103989321432607e-06, "loss": 0.4628, "step": 7424 }, { "epoch": 0.30825276726916073, "grad_norm": 2.4600188732147217, "learning_rate": 8.103462218279939e-06, "loss": 0.4977, "step": 7425 }, { "epoch": 0.30829428279337207, "grad_norm": 2.7140355110168457, "learning_rate": 8.102935059015778e-06, "loss": 0.4927, "step": 7426 }, { "epoch": 0.3083357983175834, "grad_norm": 2.177706003189087, "learning_rate": 8.102407843649658e-06, "loss": 0.4416, "step": 7427 }, { "epoch": 0.30837731384179473, "grad_norm": 1.8372211456298828, "learning_rate": 8.10188057219111e-06, "loss": 0.4936, "step": 7428 }, { "epoch": 0.30841882936600606, "grad_norm": 2.204709529876709, "learning_rate": 8.101353244649666e-06, "loss": 0.5085, "step": 7429 }, { "epoch": 0.3084603448902174, "grad_norm": 1.9783254861831665, "learning_rate": 8.100825861034861e-06, "loss": 0.6219, "step": 7430 }, { "epoch": 0.30850186041442873, "grad_norm": 2.376094102859497, "learning_rate": 8.100298421356234e-06, "loss": 0.5748, "step": 7431 }, { "epoch": 0.30854337593864006, "grad_norm": 2.0976977348327637, "learning_rate": 8.099770925623317e-06, "loss": 0.384, "step": 7432 }, { "epoch": 0.3085848914628514, "grad_norm": 2.298671007156372, "learning_rate": 8.099243373845646e-06, "loss": 0.5234, "step": 7433 }, { "epoch": 0.30862640698706273, "grad_norm": 2.329113245010376, "learning_rate": 8.098715766032762e-06, "loss": 0.4817, "step": 7434 }, { "epoch": 0.30866792251127406, "grad_norm": 2.498950242996216, "learning_rate": 8.098188102194204e-06, "loss": 0.5267, "step": 7435 }, { "epoch": 0.3087094380354854, "grad_norm": 2.4774670600891113, "learning_rate": 8.097660382339513e-06, "loss": 0.4371, "step": 7436 }, { "epoch": 0.30875095355969673, "grad_norm": 2.8725132942199707, "learning_rate": 8.09713260647823e-06, "loss": 0.6091, "step": 7437 }, { "epoch": 0.30879246908390806, "grad_norm": 2.1617300510406494, "learning_rate": 8.096604774619895e-06, "loss": 0.4167, "step": 7438 }, { "epoch": 0.3088339846081194, "grad_norm": 2.7865121364593506, "learning_rate": 8.096076886774054e-06, "loss": 0.5792, "step": 7439 }, { "epoch": 0.3088755001323307, "grad_norm": 2.4719431400299072, "learning_rate": 8.09554894295025e-06, "loss": 0.7623, "step": 7440 }, { "epoch": 0.30891701565654206, "grad_norm": 2.3509955406188965, "learning_rate": 8.09502094315803e-06, "loss": 0.4252, "step": 7441 }, { "epoch": 0.3089585311807534, "grad_norm": 2.719359874725342, "learning_rate": 8.094492887406939e-06, "loss": 0.4885, "step": 7442 }, { "epoch": 0.3090000467049647, "grad_norm": 2.8920605182647705, "learning_rate": 8.093964775706524e-06, "loss": 0.4617, "step": 7443 }, { "epoch": 0.30904156222917606, "grad_norm": 2.566244602203369, "learning_rate": 8.093436608066335e-06, "loss": 0.5065, "step": 7444 }, { "epoch": 0.3090830777533874, "grad_norm": 2.7594950199127197, "learning_rate": 8.092908384495919e-06, "loss": 0.5008, "step": 7445 }, { "epoch": 0.3091245932775987, "grad_norm": 2.126986265182495, "learning_rate": 8.092380105004829e-06, "loss": 0.5025, "step": 7446 }, { "epoch": 0.30916610880181006, "grad_norm": 2.6305298805236816, "learning_rate": 8.091851769602615e-06, "loss": 0.5074, "step": 7447 }, { "epoch": 0.3092076243260214, "grad_norm": 2.5660738945007324, "learning_rate": 8.09132337829883e-06, "loss": 0.4865, "step": 7448 }, { "epoch": 0.3092491398502327, "grad_norm": 2.387592077255249, "learning_rate": 8.090794931103026e-06, "loss": 0.442, "step": 7449 }, { "epoch": 0.30929065537444406, "grad_norm": 2.379455327987671, "learning_rate": 8.09026642802476e-06, "loss": 0.5139, "step": 7450 }, { "epoch": 0.3093321708986554, "grad_norm": 2.94874906539917, "learning_rate": 8.089737869073588e-06, "loss": 0.4694, "step": 7451 }, { "epoch": 0.3093736864228668, "grad_norm": 2.6881470680236816, "learning_rate": 8.089209254259062e-06, "loss": 0.6259, "step": 7452 }, { "epoch": 0.3094152019470781, "grad_norm": 2.408212423324585, "learning_rate": 8.088680583590743e-06, "loss": 0.5279, "step": 7453 }, { "epoch": 0.30945671747128944, "grad_norm": 2.4911739826202393, "learning_rate": 8.08815185707819e-06, "loss": 0.6129, "step": 7454 }, { "epoch": 0.3094982329955008, "grad_norm": 2.227689743041992, "learning_rate": 8.08762307473096e-06, "loss": 0.3755, "step": 7455 }, { "epoch": 0.3095397485197121, "grad_norm": 2.303586959838867, "learning_rate": 8.087094236558616e-06, "loss": 0.5679, "step": 7456 }, { "epoch": 0.30958126404392344, "grad_norm": 2.3885271549224854, "learning_rate": 8.086565342570718e-06, "loss": 0.4645, "step": 7457 }, { "epoch": 0.3096227795681348, "grad_norm": 2.44730544090271, "learning_rate": 8.086036392776828e-06, "loss": 0.4558, "step": 7458 }, { "epoch": 0.3096642950923461, "grad_norm": 2.644191026687622, "learning_rate": 8.08550738718651e-06, "loss": 0.6396, "step": 7459 }, { "epoch": 0.30970581061655744, "grad_norm": 2.5130505561828613, "learning_rate": 8.084978325809332e-06, "loss": 0.5668, "step": 7460 }, { "epoch": 0.3097473261407688, "grad_norm": 2.4205949306488037, "learning_rate": 8.084449208654855e-06, "loss": 0.5818, "step": 7461 }, { "epoch": 0.3097888416649801, "grad_norm": 1.9307645559310913, "learning_rate": 8.083920035732646e-06, "loss": 0.5284, "step": 7462 }, { "epoch": 0.30983035718919144, "grad_norm": 2.336850881576538, "learning_rate": 8.083390807052276e-06, "loss": 0.4456, "step": 7463 }, { "epoch": 0.3098718727134028, "grad_norm": 2.611955165863037, "learning_rate": 8.082861522623311e-06, "loss": 0.5287, "step": 7464 }, { "epoch": 0.3099133882376141, "grad_norm": 2.416302442550659, "learning_rate": 8.082332182455321e-06, "loss": 0.7213, "step": 7465 }, { "epoch": 0.30995490376182544, "grad_norm": 2.3294425010681152, "learning_rate": 8.081802786557875e-06, "loss": 0.6243, "step": 7466 }, { "epoch": 0.3099964192860368, "grad_norm": 2.199841260910034, "learning_rate": 8.081273334940549e-06, "loss": 0.4596, "step": 7467 }, { "epoch": 0.3100379348102481, "grad_norm": 2.960815191268921, "learning_rate": 8.08074382761291e-06, "loss": 0.634, "step": 7468 }, { "epoch": 0.31007945033445944, "grad_norm": 2.574671506881714, "learning_rate": 8.080214264584539e-06, "loss": 0.5964, "step": 7469 }, { "epoch": 0.31012096585867077, "grad_norm": 2.9648561477661133, "learning_rate": 8.079684645865002e-06, "loss": 0.5963, "step": 7470 }, { "epoch": 0.3101624813828821, "grad_norm": 2.4412779808044434, "learning_rate": 8.079154971463883e-06, "loss": 0.4141, "step": 7471 }, { "epoch": 0.31020399690709344, "grad_norm": 2.6852529048919678, "learning_rate": 8.07862524139075e-06, "loss": 0.6839, "step": 7472 }, { "epoch": 0.31024551243130477, "grad_norm": 2.486281156539917, "learning_rate": 8.078095455655188e-06, "loss": 0.5535, "step": 7473 }, { "epoch": 0.3102870279555161, "grad_norm": 2.8220245838165283, "learning_rate": 8.077565614266773e-06, "loss": 0.6695, "step": 7474 }, { "epoch": 0.31032854347972744, "grad_norm": 2.342588424682617, "learning_rate": 8.077035717235084e-06, "loss": 0.6004, "step": 7475 }, { "epoch": 0.31037005900393877, "grad_norm": 1.9626437425613403, "learning_rate": 8.076505764569702e-06, "loss": 0.4936, "step": 7476 }, { "epoch": 0.3104115745281501, "grad_norm": 2.866124153137207, "learning_rate": 8.07597575628021e-06, "loss": 0.4884, "step": 7477 }, { "epoch": 0.31045309005236144, "grad_norm": 3.1000795364379883, "learning_rate": 8.075445692376188e-06, "loss": 0.555, "step": 7478 }, { "epoch": 0.31049460557657277, "grad_norm": 2.927114963531494, "learning_rate": 8.074915572867224e-06, "loss": 0.5999, "step": 7479 }, { "epoch": 0.3105361211007841, "grad_norm": 2.31903338432312, "learning_rate": 8.074385397762898e-06, "loss": 0.4962, "step": 7480 }, { "epoch": 0.31057763662499543, "grad_norm": 2.986776828765869, "learning_rate": 8.0738551670728e-06, "loss": 0.3751, "step": 7481 }, { "epoch": 0.31061915214920677, "grad_norm": 2.35994815826416, "learning_rate": 8.073324880806512e-06, "loss": 0.5836, "step": 7482 }, { "epoch": 0.31066066767341816, "grad_norm": 2.2374000549316406, "learning_rate": 8.072794538973627e-06, "loss": 0.4604, "step": 7483 }, { "epoch": 0.3107021831976295, "grad_norm": 2.904132127761841, "learning_rate": 8.07226414158373e-06, "loss": 0.6081, "step": 7484 }, { "epoch": 0.3107436987218408, "grad_norm": 2.2324976921081543, "learning_rate": 8.07173368864641e-06, "loss": 0.4592, "step": 7485 }, { "epoch": 0.31078521424605215, "grad_norm": 2.411386013031006, "learning_rate": 8.071203180171262e-06, "loss": 0.4075, "step": 7486 }, { "epoch": 0.3108267297702635, "grad_norm": 2.425882577896118, "learning_rate": 8.070672616167874e-06, "loss": 0.4752, "step": 7487 }, { "epoch": 0.3108682452944748, "grad_norm": 3.178654432296753, "learning_rate": 8.07014199664584e-06, "loss": 0.4868, "step": 7488 }, { "epoch": 0.31090976081868615, "grad_norm": 2.539243221282959, "learning_rate": 8.069611321614754e-06, "loss": 0.5085, "step": 7489 }, { "epoch": 0.3109512763428975, "grad_norm": 1.9915926456451416, "learning_rate": 8.069080591084211e-06, "loss": 0.4085, "step": 7490 }, { "epoch": 0.3109927918671088, "grad_norm": 2.7920892238616943, "learning_rate": 8.068549805063806e-06, "loss": 0.4603, "step": 7491 }, { "epoch": 0.31103430739132015, "grad_norm": 3.0571036338806152, "learning_rate": 8.068018963563136e-06, "loss": 0.6255, "step": 7492 }, { "epoch": 0.3110758229155315, "grad_norm": 2.328207492828369, "learning_rate": 8.0674880665918e-06, "loss": 0.5469, "step": 7493 }, { "epoch": 0.3111173384397428, "grad_norm": 2.34208345413208, "learning_rate": 8.066957114159394e-06, "loss": 0.5825, "step": 7494 }, { "epoch": 0.31115885396395415, "grad_norm": 2.0985584259033203, "learning_rate": 8.06642610627552e-06, "loss": 0.4359, "step": 7495 }, { "epoch": 0.3112003694881655, "grad_norm": 2.309849739074707, "learning_rate": 8.065895042949778e-06, "loss": 0.6437, "step": 7496 }, { "epoch": 0.3112418850123768, "grad_norm": 2.572869062423706, "learning_rate": 8.065363924191772e-06, "loss": 0.4861, "step": 7497 }, { "epoch": 0.31128340053658815, "grad_norm": 2.563788890838623, "learning_rate": 8.064832750011099e-06, "loss": 0.5341, "step": 7498 }, { "epoch": 0.3113249160607995, "grad_norm": 2.537473678588867, "learning_rate": 8.064301520417367e-06, "loss": 0.4774, "step": 7499 }, { "epoch": 0.3113664315850108, "grad_norm": 2.712125062942505, "learning_rate": 8.063770235420184e-06, "loss": 0.6266, "step": 7500 }, { "epoch": 0.31140794710922215, "grad_norm": 2.7803094387054443, "learning_rate": 8.06323889502915e-06, "loss": 0.5608, "step": 7501 }, { "epoch": 0.3114494626334335, "grad_norm": 2.2741923332214355, "learning_rate": 8.062707499253873e-06, "loss": 0.536, "step": 7502 }, { "epoch": 0.3114909781576448, "grad_norm": 2.3338873386383057, "learning_rate": 8.06217604810396e-06, "loss": 0.412, "step": 7503 }, { "epoch": 0.31153249368185615, "grad_norm": 2.5573904514312744, "learning_rate": 8.061644541589024e-06, "loss": 0.4998, "step": 7504 }, { "epoch": 0.3115740092060675, "grad_norm": 2.299567937850952, "learning_rate": 8.061112979718672e-06, "loss": 0.5496, "step": 7505 }, { "epoch": 0.3116155247302788, "grad_norm": 2.865422010421753, "learning_rate": 8.060581362502515e-06, "loss": 0.5421, "step": 7506 }, { "epoch": 0.31165704025449015, "grad_norm": 2.497746467590332, "learning_rate": 8.060049689950165e-06, "loss": 0.4977, "step": 7507 }, { "epoch": 0.3116985557787015, "grad_norm": 2.478317975997925, "learning_rate": 8.059517962071234e-06, "loss": 0.5209, "step": 7508 }, { "epoch": 0.3117400713029128, "grad_norm": 2.429987668991089, "learning_rate": 8.058986178875337e-06, "loss": 0.5658, "step": 7509 }, { "epoch": 0.31178158682712415, "grad_norm": 2.7425742149353027, "learning_rate": 8.058454340372089e-06, "loss": 0.5895, "step": 7510 }, { "epoch": 0.3118231023513355, "grad_norm": 3.223440170288086, "learning_rate": 8.057922446571103e-06, "loss": 0.4964, "step": 7511 }, { "epoch": 0.3118646178755468, "grad_norm": 2.310673475265503, "learning_rate": 8.057390497481999e-06, "loss": 0.5314, "step": 7512 }, { "epoch": 0.31190613339975815, "grad_norm": 2.5853593349456787, "learning_rate": 8.056858493114392e-06, "loss": 0.483, "step": 7513 }, { "epoch": 0.31194764892396953, "grad_norm": 2.22965145111084, "learning_rate": 8.056326433477904e-06, "loss": 0.4772, "step": 7514 }, { "epoch": 0.31198916444818087, "grad_norm": 2.6190567016601562, "learning_rate": 8.055794318582151e-06, "loss": 0.5389, "step": 7515 }, { "epoch": 0.3120306799723922, "grad_norm": 3.562559127807617, "learning_rate": 8.055262148436757e-06, "loss": 0.6063, "step": 7516 }, { "epoch": 0.31207219549660353, "grad_norm": 2.589399814605713, "learning_rate": 8.054729923051346e-06, "loss": 0.5636, "step": 7517 }, { "epoch": 0.31211371102081487, "grad_norm": 2.629246950149536, "learning_rate": 8.054197642435532e-06, "loss": 0.5397, "step": 7518 }, { "epoch": 0.3121552265450262, "grad_norm": 2.409693956375122, "learning_rate": 8.053665306598949e-06, "loss": 0.4867, "step": 7519 }, { "epoch": 0.31219674206923753, "grad_norm": 2.4982995986938477, "learning_rate": 8.053132915551214e-06, "loss": 0.6153, "step": 7520 }, { "epoch": 0.31223825759344886, "grad_norm": 2.3386993408203125, "learning_rate": 8.052600469301958e-06, "loss": 0.6111, "step": 7521 }, { "epoch": 0.3122797731176602, "grad_norm": 2.539994955062866, "learning_rate": 8.052067967860805e-06, "loss": 0.4702, "step": 7522 }, { "epoch": 0.31232128864187153, "grad_norm": 2.6431002616882324, "learning_rate": 8.051535411237384e-06, "loss": 0.5642, "step": 7523 }, { "epoch": 0.31236280416608286, "grad_norm": 2.2130374908447266, "learning_rate": 8.05100279944132e-06, "loss": 0.5339, "step": 7524 }, { "epoch": 0.3124043196902942, "grad_norm": 2.306342363357544, "learning_rate": 8.050470132482251e-06, "loss": 0.7553, "step": 7525 }, { "epoch": 0.31244583521450553, "grad_norm": 3.341667413711548, "learning_rate": 8.049937410369799e-06, "loss": 0.6388, "step": 7526 }, { "epoch": 0.31248735073871686, "grad_norm": 2.443549633026123, "learning_rate": 8.0494046331136e-06, "loss": 0.5886, "step": 7527 }, { "epoch": 0.3125288662629282, "grad_norm": 2.31980562210083, "learning_rate": 8.048871800723289e-06, "loss": 0.4419, "step": 7528 }, { "epoch": 0.31257038178713953, "grad_norm": 2.2837414741516113, "learning_rate": 8.048338913208493e-06, "loss": 0.6346, "step": 7529 }, { "epoch": 0.31261189731135086, "grad_norm": 1.9930745363235474, "learning_rate": 8.047805970578855e-06, "loss": 0.4838, "step": 7530 }, { "epoch": 0.3126534128355622, "grad_norm": 2.535818338394165, "learning_rate": 8.047272972844003e-06, "loss": 0.5965, "step": 7531 }, { "epoch": 0.3126949283597735, "grad_norm": 2.5859453678131104, "learning_rate": 8.046739920013579e-06, "loss": 0.5993, "step": 7532 }, { "epoch": 0.31273644388398486, "grad_norm": 2.286487579345703, "learning_rate": 8.04620681209722e-06, "loss": 0.5088, "step": 7533 }, { "epoch": 0.3127779594081962, "grad_norm": 2.546731948852539, "learning_rate": 8.045673649104562e-06, "loss": 0.4156, "step": 7534 }, { "epoch": 0.3128194749324075, "grad_norm": 2.6700098514556885, "learning_rate": 8.045140431045248e-06, "loss": 0.7144, "step": 7535 }, { "epoch": 0.31286099045661886, "grad_norm": 2.81058669090271, "learning_rate": 8.044607157928917e-06, "loss": 0.5209, "step": 7536 }, { "epoch": 0.3129025059808302, "grad_norm": 3.154935836791992, "learning_rate": 8.04407382976521e-06, "loss": 0.5963, "step": 7537 }, { "epoch": 0.3129440215050415, "grad_norm": 2.8312740325927734, "learning_rate": 8.04354044656377e-06, "loss": 0.3407, "step": 7538 }, { "epoch": 0.31298553702925286, "grad_norm": 2.417600154876709, "learning_rate": 8.043007008334243e-06, "loss": 0.4858, "step": 7539 }, { "epoch": 0.3130270525534642, "grad_norm": 2.812948703765869, "learning_rate": 8.042473515086274e-06, "loss": 0.5213, "step": 7540 }, { "epoch": 0.3130685680776755, "grad_norm": 2.5495707988739014, "learning_rate": 8.041939966829503e-06, "loss": 0.5164, "step": 7541 }, { "epoch": 0.31311008360188686, "grad_norm": 2.3204879760742188, "learning_rate": 8.041406363573582e-06, "loss": 0.5546, "step": 7542 }, { "epoch": 0.3131515991260982, "grad_norm": 2.3189899921417236, "learning_rate": 8.04087270532816e-06, "loss": 0.4948, "step": 7543 }, { "epoch": 0.3131931146503095, "grad_norm": 2.699831247329712, "learning_rate": 8.040338992102881e-06, "loss": 0.5273, "step": 7544 }, { "epoch": 0.3132346301745209, "grad_norm": 2.5865039825439453, "learning_rate": 8.039805223907396e-06, "loss": 0.5616, "step": 7545 }, { "epoch": 0.31327614569873224, "grad_norm": 2.404681444168091, "learning_rate": 8.039271400751355e-06, "loss": 0.5268, "step": 7546 }, { "epoch": 0.3133176612229436, "grad_norm": 2.387002944946289, "learning_rate": 8.038737522644415e-06, "loss": 0.5333, "step": 7547 }, { "epoch": 0.3133591767471549, "grad_norm": 2.7226321697235107, "learning_rate": 8.038203589596223e-06, "loss": 0.5088, "step": 7548 }, { "epoch": 0.31340069227136624, "grad_norm": 2.2746450901031494, "learning_rate": 8.037669601616434e-06, "loss": 0.5122, "step": 7549 }, { "epoch": 0.3134422077955776, "grad_norm": 2.428370952606201, "learning_rate": 8.037135558714704e-06, "loss": 0.5148, "step": 7550 }, { "epoch": 0.3134837233197889, "grad_norm": 2.573610544204712, "learning_rate": 8.036601460900687e-06, "loss": 0.5182, "step": 7551 }, { "epoch": 0.31352523884400024, "grad_norm": 2.1631250381469727, "learning_rate": 8.03606730818404e-06, "loss": 0.5517, "step": 7552 }, { "epoch": 0.3135667543682116, "grad_norm": 2.6638259887695312, "learning_rate": 8.035533100574422e-06, "loss": 0.57, "step": 7553 }, { "epoch": 0.3136082698924229, "grad_norm": 2.1577625274658203, "learning_rate": 8.03499883808149e-06, "loss": 0.4777, "step": 7554 }, { "epoch": 0.31364978541663424, "grad_norm": 2.32124662399292, "learning_rate": 8.034464520714903e-06, "loss": 0.5582, "step": 7555 }, { "epoch": 0.3136913009408456, "grad_norm": 2.993940591812134, "learning_rate": 8.033930148484325e-06, "loss": 0.4343, "step": 7556 }, { "epoch": 0.3137328164650569, "grad_norm": 2.510113000869751, "learning_rate": 8.033395721399413e-06, "loss": 0.4744, "step": 7557 }, { "epoch": 0.31377433198926824, "grad_norm": 2.1135284900665283, "learning_rate": 8.032861239469833e-06, "loss": 0.4439, "step": 7558 }, { "epoch": 0.3138158475134796, "grad_norm": 3.166245460510254, "learning_rate": 8.032326702705249e-06, "loss": 0.4738, "step": 7559 }, { "epoch": 0.3138573630376909, "grad_norm": 3.087965726852417, "learning_rate": 8.031792111115323e-06, "loss": 0.7066, "step": 7560 }, { "epoch": 0.31389887856190224, "grad_norm": 2.2101693153381348, "learning_rate": 8.031257464709723e-06, "loss": 0.5254, "step": 7561 }, { "epoch": 0.31394039408611357, "grad_norm": 2.6055710315704346, "learning_rate": 8.030722763498114e-06, "loss": 0.4608, "step": 7562 }, { "epoch": 0.3139819096103249, "grad_norm": 3.4384939670562744, "learning_rate": 8.030188007490164e-06, "loss": 0.5515, "step": 7563 }, { "epoch": 0.31402342513453624, "grad_norm": 2.3693745136260986, "learning_rate": 8.02965319669554e-06, "loss": 0.5497, "step": 7564 }, { "epoch": 0.31406494065874757, "grad_norm": 2.205599308013916, "learning_rate": 8.029118331123914e-06, "loss": 0.513, "step": 7565 }, { "epoch": 0.3141064561829589, "grad_norm": 2.229063034057617, "learning_rate": 8.028583410784953e-06, "loss": 0.426, "step": 7566 }, { "epoch": 0.31414797170717024, "grad_norm": 2.85418438911438, "learning_rate": 8.028048435688333e-06, "loss": 0.5364, "step": 7567 }, { "epoch": 0.31418948723138157, "grad_norm": 2.858067035675049, "learning_rate": 8.027513405843725e-06, "loss": 0.4981, "step": 7568 }, { "epoch": 0.3142310027555929, "grad_norm": 3.3425159454345703, "learning_rate": 8.026978321260804e-06, "loss": 0.4953, "step": 7569 }, { "epoch": 0.31427251827980424, "grad_norm": 2.1638717651367188, "learning_rate": 8.026443181949239e-06, "loss": 0.5401, "step": 7570 }, { "epoch": 0.31431403380401557, "grad_norm": 2.8872616291046143, "learning_rate": 8.025907987918709e-06, "loss": 0.4688, "step": 7571 }, { "epoch": 0.3143555493282269, "grad_norm": 2.444077730178833, "learning_rate": 8.025372739178891e-06, "loss": 0.5189, "step": 7572 }, { "epoch": 0.31439706485243823, "grad_norm": 2.3080782890319824, "learning_rate": 8.024837435739463e-06, "loss": 0.4816, "step": 7573 }, { "epoch": 0.31443858037664957, "grad_norm": 2.422769546508789, "learning_rate": 8.024302077610103e-06, "loss": 0.5827, "step": 7574 }, { "epoch": 0.3144800959008609, "grad_norm": 2.339747905731201, "learning_rate": 8.023766664800488e-06, "loss": 0.6677, "step": 7575 }, { "epoch": 0.3145216114250723, "grad_norm": 2.7355246543884277, "learning_rate": 8.0232311973203e-06, "loss": 0.5234, "step": 7576 }, { "epoch": 0.3145631269492836, "grad_norm": 2.7133781909942627, "learning_rate": 8.022695675179222e-06, "loss": 0.59, "step": 7577 }, { "epoch": 0.31460464247349496, "grad_norm": 2.952763557434082, "learning_rate": 8.022160098386932e-06, "loss": 0.5111, "step": 7578 }, { "epoch": 0.3146461579977063, "grad_norm": 2.2735812664031982, "learning_rate": 8.021624466953121e-06, "loss": 0.5848, "step": 7579 }, { "epoch": 0.3146876735219176, "grad_norm": 2.612506628036499, "learning_rate": 8.021088780887464e-06, "loss": 0.4437, "step": 7580 }, { "epoch": 0.31472918904612895, "grad_norm": 2.396954298019409, "learning_rate": 8.020553040199654e-06, "loss": 0.3942, "step": 7581 }, { "epoch": 0.3147707045703403, "grad_norm": 2.8558595180511475, "learning_rate": 8.020017244899374e-06, "loss": 0.5806, "step": 7582 }, { "epoch": 0.3148122200945516, "grad_norm": 2.449622392654419, "learning_rate": 8.019481394996312e-06, "loss": 0.4401, "step": 7583 }, { "epoch": 0.31485373561876295, "grad_norm": 2.7271831035614014, "learning_rate": 8.018945490500154e-06, "loss": 0.6021, "step": 7584 }, { "epoch": 0.3148952511429743, "grad_norm": 2.2790775299072266, "learning_rate": 8.018409531420594e-06, "loss": 0.4891, "step": 7585 }, { "epoch": 0.3149367666671856, "grad_norm": 2.4378764629364014, "learning_rate": 8.017873517767318e-06, "loss": 0.5311, "step": 7586 }, { "epoch": 0.31497828219139695, "grad_norm": 2.0490832328796387, "learning_rate": 8.01733744955002e-06, "loss": 0.4047, "step": 7587 }, { "epoch": 0.3150197977156083, "grad_norm": 2.631890058517456, "learning_rate": 8.016801326778391e-06, "loss": 0.5116, "step": 7588 }, { "epoch": 0.3150613132398196, "grad_norm": 2.6873626708984375, "learning_rate": 8.016265149462124e-06, "loss": 0.51, "step": 7589 }, { "epoch": 0.31510282876403095, "grad_norm": 2.6294121742248535, "learning_rate": 8.015728917610914e-06, "loss": 0.5439, "step": 7590 }, { "epoch": 0.3151443442882423, "grad_norm": 2.2493064403533936, "learning_rate": 8.015192631234457e-06, "loss": 0.7616, "step": 7591 }, { "epoch": 0.3151858598124536, "grad_norm": 2.4628942012786865, "learning_rate": 8.014656290342446e-06, "loss": 0.6614, "step": 7592 }, { "epoch": 0.31522737533666495, "grad_norm": 2.423204183578491, "learning_rate": 8.01411989494458e-06, "loss": 0.5574, "step": 7593 }, { "epoch": 0.3152688908608763, "grad_norm": 2.3399999141693115, "learning_rate": 8.013583445050561e-06, "loss": 0.5886, "step": 7594 }, { "epoch": 0.3153104063850876, "grad_norm": 2.526801109313965, "learning_rate": 8.013046940670082e-06, "loss": 0.4115, "step": 7595 }, { "epoch": 0.31535192190929895, "grad_norm": 2.0350000858306885, "learning_rate": 8.012510381812846e-06, "loss": 0.4648, "step": 7596 }, { "epoch": 0.3153934374335103, "grad_norm": 2.5299649238586426, "learning_rate": 8.011973768488554e-06, "loss": 0.5575, "step": 7597 }, { "epoch": 0.3154349529577216, "grad_norm": 2.830409049987793, "learning_rate": 8.011437100706909e-06, "loss": 0.5244, "step": 7598 }, { "epoch": 0.31547646848193295, "grad_norm": 2.2410430908203125, "learning_rate": 8.010900378477612e-06, "loss": 0.4336, "step": 7599 }, { "epoch": 0.3155179840061443, "grad_norm": 2.683340311050415, "learning_rate": 8.01036360181037e-06, "loss": 0.4366, "step": 7600 }, { "epoch": 0.3155594995303556, "grad_norm": 2.2130484580993652, "learning_rate": 8.009826770714886e-06, "loss": 0.566, "step": 7601 }, { "epoch": 0.31560101505456695, "grad_norm": 2.0720338821411133, "learning_rate": 8.009289885200866e-06, "loss": 0.6055, "step": 7602 }, { "epoch": 0.3156425305787783, "grad_norm": 2.793516159057617, "learning_rate": 8.008752945278018e-06, "loss": 0.5127, "step": 7603 }, { "epoch": 0.3156840461029896, "grad_norm": 2.358867883682251, "learning_rate": 8.008215950956048e-06, "loss": 0.4466, "step": 7604 }, { "epoch": 0.31572556162720095, "grad_norm": 2.6396148204803467, "learning_rate": 8.007678902244668e-06, "loss": 0.5482, "step": 7605 }, { "epoch": 0.3157670771514123, "grad_norm": 2.0250344276428223, "learning_rate": 8.007141799153585e-06, "loss": 0.5747, "step": 7606 }, { "epoch": 0.31580859267562367, "grad_norm": 2.6965830326080322, "learning_rate": 8.006604641692513e-06, "loss": 0.4516, "step": 7607 }, { "epoch": 0.315850108199835, "grad_norm": 2.4302377700805664, "learning_rate": 8.006067429871163e-06, "loss": 0.5117, "step": 7608 }, { "epoch": 0.31589162372404633, "grad_norm": 2.631303548812866, "learning_rate": 8.005530163699246e-06, "loss": 0.5318, "step": 7609 }, { "epoch": 0.31593313924825767, "grad_norm": 1.9748914241790771, "learning_rate": 8.00499284318648e-06, "loss": 0.3812, "step": 7610 }, { "epoch": 0.315974654772469, "grad_norm": 3.267753839492798, "learning_rate": 8.004455468342573e-06, "loss": 0.7315, "step": 7611 }, { "epoch": 0.31601617029668033, "grad_norm": 2.3235573768615723, "learning_rate": 8.003918039177248e-06, "loss": 0.4888, "step": 7612 }, { "epoch": 0.31605768582089167, "grad_norm": 2.3658382892608643, "learning_rate": 8.003380555700218e-06, "loss": 0.6104, "step": 7613 }, { "epoch": 0.316099201345103, "grad_norm": 2.1891934871673584, "learning_rate": 8.002843017921203e-06, "loss": 0.6399, "step": 7614 }, { "epoch": 0.31614071686931433, "grad_norm": 2.5069196224212646, "learning_rate": 8.002305425849919e-06, "loss": 0.5617, "step": 7615 }, { "epoch": 0.31618223239352566, "grad_norm": 2.4934372901916504, "learning_rate": 8.00176777949609e-06, "loss": 0.6349, "step": 7616 }, { "epoch": 0.316223747917737, "grad_norm": 2.178558588027954, "learning_rate": 8.00123007886943e-06, "loss": 0.4811, "step": 7617 }, { "epoch": 0.31626526344194833, "grad_norm": 2.5623810291290283, "learning_rate": 8.00069232397967e-06, "loss": 0.4654, "step": 7618 }, { "epoch": 0.31630677896615966, "grad_norm": 2.1834187507629395, "learning_rate": 8.000154514836526e-06, "loss": 0.4667, "step": 7619 }, { "epoch": 0.316348294490371, "grad_norm": 2.7918295860290527, "learning_rate": 7.999616651449722e-06, "loss": 0.5801, "step": 7620 }, { "epoch": 0.31638981001458233, "grad_norm": 2.209357261657715, "learning_rate": 7.999078733828987e-06, "loss": 0.4392, "step": 7621 }, { "epoch": 0.31643132553879366, "grad_norm": 2.7489941120147705, "learning_rate": 7.998540761984042e-06, "loss": 0.4864, "step": 7622 }, { "epoch": 0.316472841063005, "grad_norm": 2.6271755695343018, "learning_rate": 7.998002735924618e-06, "loss": 0.553, "step": 7623 }, { "epoch": 0.31651435658721633, "grad_norm": 2.1700081825256348, "learning_rate": 7.997464655660438e-06, "loss": 0.5309, "step": 7624 }, { "epoch": 0.31655587211142766, "grad_norm": 2.720350980758667, "learning_rate": 7.996926521201235e-06, "loss": 0.5441, "step": 7625 }, { "epoch": 0.316597387635639, "grad_norm": 2.4770724773406982, "learning_rate": 7.996388332556735e-06, "loss": 0.4783, "step": 7626 }, { "epoch": 0.3166389031598503, "grad_norm": 2.3899247646331787, "learning_rate": 7.995850089736672e-06, "loss": 0.4552, "step": 7627 }, { "epoch": 0.31668041868406166, "grad_norm": 2.4119632244110107, "learning_rate": 7.995311792750774e-06, "loss": 0.4638, "step": 7628 }, { "epoch": 0.316721934208273, "grad_norm": 3.146313190460205, "learning_rate": 7.994773441608777e-06, "loss": 0.5632, "step": 7629 }, { "epoch": 0.3167634497324843, "grad_norm": 3.285639524459839, "learning_rate": 7.994235036320413e-06, "loss": 0.4617, "step": 7630 }, { "epoch": 0.31680496525669566, "grad_norm": 2.9414329528808594, "learning_rate": 7.993696576895416e-06, "loss": 0.5801, "step": 7631 }, { "epoch": 0.316846480780907, "grad_norm": 2.598362684249878, "learning_rate": 7.993158063343523e-06, "loss": 0.509, "step": 7632 }, { "epoch": 0.3168879963051183, "grad_norm": 2.564622163772583, "learning_rate": 7.992619495674467e-06, "loss": 0.6323, "step": 7633 }, { "epoch": 0.31692951182932966, "grad_norm": 3.105182409286499, "learning_rate": 7.99208087389799e-06, "loss": 0.5932, "step": 7634 }, { "epoch": 0.316971027353541, "grad_norm": 2.393531084060669, "learning_rate": 7.991542198023827e-06, "loss": 0.4294, "step": 7635 }, { "epoch": 0.3170125428777523, "grad_norm": 2.3945324420928955, "learning_rate": 7.99100346806172e-06, "loss": 0.6026, "step": 7636 }, { "epoch": 0.3170540584019637, "grad_norm": 2.3641488552093506, "learning_rate": 7.990464684021408e-06, "loss": 0.3738, "step": 7637 }, { "epoch": 0.31709557392617505, "grad_norm": 2.914879322052002, "learning_rate": 7.989925845912632e-06, "loss": 0.6052, "step": 7638 }, { "epoch": 0.3171370894503864, "grad_norm": 2.8177740573883057, "learning_rate": 7.989386953745135e-06, "loss": 0.6225, "step": 7639 }, { "epoch": 0.3171786049745977, "grad_norm": 2.9282708168029785, "learning_rate": 7.988848007528661e-06, "loss": 0.5328, "step": 7640 }, { "epoch": 0.31722012049880904, "grad_norm": 2.2590091228485107, "learning_rate": 7.988309007272953e-06, "loss": 0.5623, "step": 7641 }, { "epoch": 0.3172616360230204, "grad_norm": 2.1787173748016357, "learning_rate": 7.987769952987757e-06, "loss": 0.5877, "step": 7642 }, { "epoch": 0.3173031515472317, "grad_norm": 1.9212874174118042, "learning_rate": 7.987230844682818e-06, "loss": 0.4826, "step": 7643 }, { "epoch": 0.31734466707144304, "grad_norm": 2.613948345184326, "learning_rate": 7.986691682367887e-06, "loss": 0.4729, "step": 7644 }, { "epoch": 0.3173861825956544, "grad_norm": 3.0479066371917725, "learning_rate": 7.986152466052707e-06, "loss": 0.5527, "step": 7645 }, { "epoch": 0.3174276981198657, "grad_norm": 2.372347354888916, "learning_rate": 7.985613195747031e-06, "loss": 0.4323, "step": 7646 }, { "epoch": 0.31746921364407704, "grad_norm": 2.712831735610962, "learning_rate": 7.985073871460607e-06, "loss": 0.6038, "step": 7647 }, { "epoch": 0.3175107291682884, "grad_norm": 2.6930747032165527, "learning_rate": 7.984534493203186e-06, "loss": 0.6032, "step": 7648 }, { "epoch": 0.3175522446924997, "grad_norm": 2.7958829402923584, "learning_rate": 7.983995060984524e-06, "loss": 0.6085, "step": 7649 }, { "epoch": 0.31759376021671104, "grad_norm": 2.4554176330566406, "learning_rate": 7.983455574814369e-06, "loss": 0.4634, "step": 7650 }, { "epoch": 0.3176352757409224, "grad_norm": 2.6966629028320312, "learning_rate": 7.982916034702479e-06, "loss": 0.5192, "step": 7651 }, { "epoch": 0.3176767912651337, "grad_norm": 2.440441846847534, "learning_rate": 7.982376440658605e-06, "loss": 0.4941, "step": 7652 }, { "epoch": 0.31771830678934504, "grad_norm": 2.212934732437134, "learning_rate": 7.981836792692508e-06, "loss": 0.3651, "step": 7653 }, { "epoch": 0.3177598223135564, "grad_norm": 2.4324755668640137, "learning_rate": 7.98129709081394e-06, "loss": 0.4553, "step": 7654 }, { "epoch": 0.3178013378377677, "grad_norm": 2.174931526184082, "learning_rate": 7.980757335032663e-06, "loss": 0.6519, "step": 7655 }, { "epoch": 0.31784285336197904, "grad_norm": 2.9001662731170654, "learning_rate": 7.980217525358433e-06, "loss": 0.669, "step": 7656 }, { "epoch": 0.31788436888619037, "grad_norm": 2.3299336433410645, "learning_rate": 7.979677661801014e-06, "loss": 0.5228, "step": 7657 }, { "epoch": 0.3179258844104017, "grad_norm": 2.755546808242798, "learning_rate": 7.97913774437016e-06, "loss": 0.5011, "step": 7658 }, { "epoch": 0.31796739993461304, "grad_norm": 2.612861156463623, "learning_rate": 7.97859777307564e-06, "loss": 0.5032, "step": 7659 }, { "epoch": 0.31800891545882437, "grad_norm": 2.5240225791931152, "learning_rate": 7.978057747927212e-06, "loss": 0.5442, "step": 7660 }, { "epoch": 0.3180504309830357, "grad_norm": 2.949084520339966, "learning_rate": 7.977517668934642e-06, "loss": 0.4818, "step": 7661 }, { "epoch": 0.31809194650724704, "grad_norm": 2.7511889934539795, "learning_rate": 7.976977536107693e-06, "loss": 0.4131, "step": 7662 }, { "epoch": 0.31813346203145837, "grad_norm": 2.79848313331604, "learning_rate": 7.976437349456134e-06, "loss": 0.5329, "step": 7663 }, { "epoch": 0.3181749775556697, "grad_norm": 2.882683753967285, "learning_rate": 7.97589710898973e-06, "loss": 0.6879, "step": 7664 }, { "epoch": 0.31821649307988104, "grad_norm": 2.6786231994628906, "learning_rate": 7.975356814718247e-06, "loss": 0.4368, "step": 7665 }, { "epoch": 0.31825800860409237, "grad_norm": 2.692136526107788, "learning_rate": 7.974816466651456e-06, "loss": 0.55, "step": 7666 }, { "epoch": 0.3182995241283037, "grad_norm": 3.2412800788879395, "learning_rate": 7.974276064799125e-06, "loss": 0.4485, "step": 7667 }, { "epoch": 0.3183410396525151, "grad_norm": 2.797619581222534, "learning_rate": 7.973735609171027e-06, "loss": 0.5093, "step": 7668 }, { "epoch": 0.3183825551767264, "grad_norm": 2.7028467655181885, "learning_rate": 7.973195099776932e-06, "loss": 0.5864, "step": 7669 }, { "epoch": 0.31842407070093776, "grad_norm": 2.254455804824829, "learning_rate": 7.972654536626612e-06, "loss": 0.4602, "step": 7670 }, { "epoch": 0.3184655862251491, "grad_norm": 2.2970011234283447, "learning_rate": 7.97211391972984e-06, "loss": 0.6128, "step": 7671 }, { "epoch": 0.3185071017493604, "grad_norm": 2.8983044624328613, "learning_rate": 7.971573249096394e-06, "loss": 0.6628, "step": 7672 }, { "epoch": 0.31854861727357175, "grad_norm": 2.547605037689209, "learning_rate": 7.971032524736045e-06, "loss": 0.4948, "step": 7673 }, { "epoch": 0.3185901327977831, "grad_norm": 2.5411744117736816, "learning_rate": 7.970491746658574e-06, "loss": 0.5128, "step": 7674 }, { "epoch": 0.3186316483219944, "grad_norm": 2.0640413761138916, "learning_rate": 7.969950914873754e-06, "loss": 0.4314, "step": 7675 }, { "epoch": 0.31867316384620575, "grad_norm": 2.2241132259368896, "learning_rate": 7.969410029391365e-06, "loss": 0.6459, "step": 7676 }, { "epoch": 0.3187146793704171, "grad_norm": 2.4187159538269043, "learning_rate": 7.968869090221189e-06, "loss": 0.5918, "step": 7677 }, { "epoch": 0.3187561948946284, "grad_norm": 2.62540340423584, "learning_rate": 7.968328097373001e-06, "loss": 0.6319, "step": 7678 }, { "epoch": 0.31879771041883975, "grad_norm": 2.7314157485961914, "learning_rate": 7.967787050856588e-06, "loss": 0.4725, "step": 7679 }, { "epoch": 0.3188392259430511, "grad_norm": 2.803419351577759, "learning_rate": 7.967245950681728e-06, "loss": 0.458, "step": 7680 }, { "epoch": 0.3188807414672624, "grad_norm": 2.5567567348480225, "learning_rate": 7.966704796858209e-06, "loss": 0.5762, "step": 7681 }, { "epoch": 0.31892225699147375, "grad_norm": 2.6661863327026367, "learning_rate": 7.96616358939581e-06, "loss": 0.5239, "step": 7682 }, { "epoch": 0.3189637725156851, "grad_norm": 2.5651445388793945, "learning_rate": 7.96562232830432e-06, "loss": 0.4847, "step": 7683 }, { "epoch": 0.3190052880398964, "grad_norm": 3.1291069984436035, "learning_rate": 7.965081013593521e-06, "loss": 0.6419, "step": 7684 }, { "epoch": 0.31904680356410775, "grad_norm": 2.6664249897003174, "learning_rate": 7.964539645273204e-06, "loss": 0.527, "step": 7685 }, { "epoch": 0.3190883190883191, "grad_norm": 2.274674654006958, "learning_rate": 7.963998223353154e-06, "loss": 0.5356, "step": 7686 }, { "epoch": 0.3191298346125304, "grad_norm": 2.3308990001678467, "learning_rate": 7.963456747843164e-06, "loss": 0.4722, "step": 7687 }, { "epoch": 0.31917135013674175, "grad_norm": 2.5290489196777344, "learning_rate": 7.96291521875302e-06, "loss": 0.4684, "step": 7688 }, { "epoch": 0.3192128656609531, "grad_norm": 2.5369515419006348, "learning_rate": 7.962373636092517e-06, "loss": 0.4875, "step": 7689 }, { "epoch": 0.3192543811851644, "grad_norm": 2.221259832382202, "learning_rate": 7.961831999871444e-06, "loss": 0.4964, "step": 7690 }, { "epoch": 0.31929589670937575, "grad_norm": 2.7749862670898438, "learning_rate": 7.961290310099594e-06, "loss": 0.5476, "step": 7691 }, { "epoch": 0.3193374122335871, "grad_norm": 2.5511856079101562, "learning_rate": 7.960748566786762e-06, "loss": 0.6106, "step": 7692 }, { "epoch": 0.3193789277577984, "grad_norm": 2.3553566932678223, "learning_rate": 7.960206769942743e-06, "loss": 0.4194, "step": 7693 }, { "epoch": 0.31942044328200975, "grad_norm": 1.9401766061782837, "learning_rate": 7.959664919577332e-06, "loss": 0.3596, "step": 7694 }, { "epoch": 0.3194619588062211, "grad_norm": 2.5000691413879395, "learning_rate": 7.959123015700328e-06, "loss": 0.5232, "step": 7695 }, { "epoch": 0.3195034743304324, "grad_norm": 3.7983529567718506, "learning_rate": 7.958581058321524e-06, "loss": 0.6026, "step": 7696 }, { "epoch": 0.31954498985464375, "grad_norm": 2.185826063156128, "learning_rate": 7.958039047450724e-06, "loss": 0.5007, "step": 7697 }, { "epoch": 0.3195865053788551, "grad_norm": 2.1184749603271484, "learning_rate": 7.957496983097723e-06, "loss": 0.589, "step": 7698 }, { "epoch": 0.31962802090306647, "grad_norm": 2.4205942153930664, "learning_rate": 7.956954865272326e-06, "loss": 0.5116, "step": 7699 }, { "epoch": 0.3196695364272778, "grad_norm": 2.7383995056152344, "learning_rate": 7.956412693984333e-06, "loss": 0.6027, "step": 7700 }, { "epoch": 0.31971105195148913, "grad_norm": 2.6350908279418945, "learning_rate": 7.955870469243545e-06, "loss": 0.5643, "step": 7701 }, { "epoch": 0.31975256747570047, "grad_norm": 2.2581684589385986, "learning_rate": 7.955328191059767e-06, "loss": 0.5585, "step": 7702 }, { "epoch": 0.3197940829999118, "grad_norm": 2.3310811519622803, "learning_rate": 7.954785859442804e-06, "loss": 0.4919, "step": 7703 }, { "epoch": 0.31983559852412313, "grad_norm": 2.511469841003418, "learning_rate": 7.954243474402461e-06, "loss": 0.4441, "step": 7704 }, { "epoch": 0.31987711404833447, "grad_norm": 2.624424457550049, "learning_rate": 7.953701035948545e-06, "loss": 0.511, "step": 7705 }, { "epoch": 0.3199186295725458, "grad_norm": 2.0865671634674072, "learning_rate": 7.953158544090862e-06, "loss": 0.5142, "step": 7706 }, { "epoch": 0.31996014509675713, "grad_norm": 3.032775640487671, "learning_rate": 7.952615998839222e-06, "loss": 0.5716, "step": 7707 }, { "epoch": 0.32000166062096846, "grad_norm": 2.512437582015991, "learning_rate": 7.952073400203432e-06, "loss": 0.4866, "step": 7708 }, { "epoch": 0.3200431761451798, "grad_norm": 2.424323320388794, "learning_rate": 7.951530748193307e-06, "loss": 0.5571, "step": 7709 }, { "epoch": 0.32008469166939113, "grad_norm": 3.2179925441741943, "learning_rate": 7.950988042818653e-06, "loss": 0.5951, "step": 7710 }, { "epoch": 0.32012620719360246, "grad_norm": 2.6923086643218994, "learning_rate": 7.950445284089286e-06, "loss": 0.5621, "step": 7711 }, { "epoch": 0.3201677227178138, "grad_norm": 2.7061707973480225, "learning_rate": 7.949902472015017e-06, "loss": 0.509, "step": 7712 }, { "epoch": 0.32020923824202513, "grad_norm": 2.5025150775909424, "learning_rate": 7.949359606605662e-06, "loss": 0.6362, "step": 7713 }, { "epoch": 0.32025075376623646, "grad_norm": 2.769908905029297, "learning_rate": 7.948816687871035e-06, "loss": 0.4938, "step": 7714 }, { "epoch": 0.3202922692904478, "grad_norm": 2.168299913406372, "learning_rate": 7.948273715820951e-06, "loss": 0.5081, "step": 7715 }, { "epoch": 0.32033378481465913, "grad_norm": 2.6138699054718018, "learning_rate": 7.94773069046523e-06, "loss": 0.5856, "step": 7716 }, { "epoch": 0.32037530033887046, "grad_norm": 2.333648920059204, "learning_rate": 7.947187611813687e-06, "loss": 0.4801, "step": 7717 }, { "epoch": 0.3204168158630818, "grad_norm": 2.3504552841186523, "learning_rate": 7.946644479876145e-06, "loss": 0.4301, "step": 7718 }, { "epoch": 0.3204583313872931, "grad_norm": 2.031719446182251, "learning_rate": 7.946101294662418e-06, "loss": 0.484, "step": 7719 }, { "epoch": 0.32049984691150446, "grad_norm": 2.406337022781372, "learning_rate": 7.945558056182332e-06, "loss": 0.6152, "step": 7720 }, { "epoch": 0.3205413624357158, "grad_norm": 2.4835081100463867, "learning_rate": 7.94501476444571e-06, "loss": 0.5329, "step": 7721 }, { "epoch": 0.3205828779599271, "grad_norm": 2.5188791751861572, "learning_rate": 7.94447141946237e-06, "loss": 0.57, "step": 7722 }, { "epoch": 0.32062439348413846, "grad_norm": 2.276360273361206, "learning_rate": 7.943928021242136e-06, "loss": 0.5798, "step": 7723 }, { "epoch": 0.3206659090083498, "grad_norm": 2.840757369995117, "learning_rate": 7.943384569794837e-06, "loss": 0.5755, "step": 7724 }, { "epoch": 0.3207074245325611, "grad_norm": 2.748072385787964, "learning_rate": 7.942841065130296e-06, "loss": 0.6013, "step": 7725 }, { "epoch": 0.32074894005677246, "grad_norm": 2.6003305912017822, "learning_rate": 7.942297507258342e-06, "loss": 0.5264, "step": 7726 }, { "epoch": 0.3207904555809838, "grad_norm": 2.230422019958496, "learning_rate": 7.941753896188799e-06, "loss": 0.5872, "step": 7727 }, { "epoch": 0.3208319711051951, "grad_norm": 3.2369964122772217, "learning_rate": 7.941210231931497e-06, "loss": 0.6681, "step": 7728 }, { "epoch": 0.32087348662940646, "grad_norm": 2.3794875144958496, "learning_rate": 7.94066651449627e-06, "loss": 0.596, "step": 7729 }, { "epoch": 0.32091500215361785, "grad_norm": 2.464832067489624, "learning_rate": 7.940122743892943e-06, "loss": 0.5076, "step": 7730 }, { "epoch": 0.3209565176778292, "grad_norm": 2.370732069015503, "learning_rate": 7.939578920131348e-06, "loss": 0.6241, "step": 7731 }, { "epoch": 0.3209980332020405, "grad_norm": 2.3338472843170166, "learning_rate": 7.939035043221319e-06, "loss": 0.498, "step": 7732 }, { "epoch": 0.32103954872625184, "grad_norm": 2.872650623321533, "learning_rate": 7.938491113172691e-06, "loss": 0.5331, "step": 7733 }, { "epoch": 0.3210810642504632, "grad_norm": 2.1641347408294678, "learning_rate": 7.937947129995294e-06, "loss": 0.4607, "step": 7734 }, { "epoch": 0.3211225797746745, "grad_norm": 1.9899717569351196, "learning_rate": 7.937403093698969e-06, "loss": 0.4442, "step": 7735 }, { "epoch": 0.32116409529888584, "grad_norm": 2.191375970840454, "learning_rate": 7.936859004293547e-06, "loss": 0.4649, "step": 7736 }, { "epoch": 0.3212056108230972, "grad_norm": 3.0170726776123047, "learning_rate": 7.93631486178887e-06, "loss": 0.5773, "step": 7737 }, { "epoch": 0.3212471263473085, "grad_norm": 2.6295928955078125, "learning_rate": 7.935770666194771e-06, "loss": 0.5439, "step": 7738 }, { "epoch": 0.32128864187151984, "grad_norm": 3.2493388652801514, "learning_rate": 7.935226417521096e-06, "loss": 0.4367, "step": 7739 }, { "epoch": 0.3213301573957312, "grad_norm": 2.3859026432037354, "learning_rate": 7.934682115777679e-06, "loss": 0.6423, "step": 7740 }, { "epoch": 0.3213716729199425, "grad_norm": 2.015533208847046, "learning_rate": 7.934137760974363e-06, "loss": 0.4113, "step": 7741 }, { "epoch": 0.32141318844415384, "grad_norm": 2.850459337234497, "learning_rate": 7.933593353120992e-06, "loss": 0.4484, "step": 7742 }, { "epoch": 0.3214547039683652, "grad_norm": 2.273392915725708, "learning_rate": 7.933048892227406e-06, "loss": 0.4962, "step": 7743 }, { "epoch": 0.3214962194925765, "grad_norm": 2.1805832386016846, "learning_rate": 7.932504378303452e-06, "loss": 0.469, "step": 7744 }, { "epoch": 0.32153773501678784, "grad_norm": 2.7337965965270996, "learning_rate": 7.931959811358973e-06, "loss": 0.6526, "step": 7745 }, { "epoch": 0.3215792505409992, "grad_norm": 2.4844045639038086, "learning_rate": 7.931415191403816e-06, "loss": 0.5018, "step": 7746 }, { "epoch": 0.3216207660652105, "grad_norm": 2.315859317779541, "learning_rate": 7.930870518447827e-06, "loss": 0.4309, "step": 7747 }, { "epoch": 0.32166228158942184, "grad_norm": 2.835688591003418, "learning_rate": 7.930325792500855e-06, "loss": 0.5845, "step": 7748 }, { "epoch": 0.32170379711363317, "grad_norm": 2.216763734817505, "learning_rate": 7.929781013572747e-06, "loss": 0.5621, "step": 7749 }, { "epoch": 0.3217453126378445, "grad_norm": 2.672624111175537, "learning_rate": 7.929236181673355e-06, "loss": 0.5453, "step": 7750 }, { "epoch": 0.32178682816205584, "grad_norm": 2.5989749431610107, "learning_rate": 7.928691296812528e-06, "loss": 0.5072, "step": 7751 }, { "epoch": 0.32182834368626717, "grad_norm": 2.252150297164917, "learning_rate": 7.928146359000117e-06, "loss": 0.5233, "step": 7752 }, { "epoch": 0.3218698592104785, "grad_norm": 2.6883387565612793, "learning_rate": 7.927601368245978e-06, "loss": 0.6544, "step": 7753 }, { "epoch": 0.32191137473468984, "grad_norm": 2.123755931854248, "learning_rate": 7.927056324559961e-06, "loss": 0.4411, "step": 7754 }, { "epoch": 0.32195289025890117, "grad_norm": 2.14847469329834, "learning_rate": 7.926511227951922e-06, "loss": 0.4819, "step": 7755 }, { "epoch": 0.3219944057831125, "grad_norm": 2.445307493209839, "learning_rate": 7.925966078431717e-06, "loss": 0.5335, "step": 7756 }, { "epoch": 0.32203592130732384, "grad_norm": 2.023322582244873, "learning_rate": 7.925420876009202e-06, "loss": 0.5589, "step": 7757 }, { "epoch": 0.32207743683153517, "grad_norm": 2.4254517555236816, "learning_rate": 7.924875620694233e-06, "loss": 0.5641, "step": 7758 }, { "epoch": 0.3221189523557465, "grad_norm": 2.5989627838134766, "learning_rate": 7.924330312496672e-06, "loss": 0.5799, "step": 7759 }, { "epoch": 0.32216046787995783, "grad_norm": 3.100054979324341, "learning_rate": 7.923784951426375e-06, "loss": 0.5874, "step": 7760 }, { "epoch": 0.3222019834041692, "grad_norm": 2.972703456878662, "learning_rate": 7.923239537493204e-06, "loss": 0.5974, "step": 7761 }, { "epoch": 0.32224349892838056, "grad_norm": 2.7819674015045166, "learning_rate": 7.922694070707017e-06, "loss": 0.5941, "step": 7762 }, { "epoch": 0.3222850144525919, "grad_norm": 2.354576826095581, "learning_rate": 7.922148551077682e-06, "loss": 0.51, "step": 7763 }, { "epoch": 0.3223265299768032, "grad_norm": 2.86383056640625, "learning_rate": 7.921602978615058e-06, "loss": 0.4645, "step": 7764 }, { "epoch": 0.32236804550101456, "grad_norm": 2.2627665996551514, "learning_rate": 7.92105735332901e-06, "loss": 0.4937, "step": 7765 }, { "epoch": 0.3224095610252259, "grad_norm": 2.6867313385009766, "learning_rate": 7.9205116752294e-06, "loss": 0.4951, "step": 7766 }, { "epoch": 0.3224510765494372, "grad_norm": 2.5887935161590576, "learning_rate": 7.9199659443261e-06, "loss": 0.5344, "step": 7767 }, { "epoch": 0.32249259207364855, "grad_norm": 2.490973949432373, "learning_rate": 7.919420160628976e-06, "loss": 0.5571, "step": 7768 }, { "epoch": 0.3225341075978599, "grad_norm": 2.855268716812134, "learning_rate": 7.918874324147891e-06, "loss": 0.6018, "step": 7769 }, { "epoch": 0.3225756231220712, "grad_norm": 2.3432350158691406, "learning_rate": 7.918328434892719e-06, "loss": 0.4273, "step": 7770 }, { "epoch": 0.32261713864628255, "grad_norm": 2.211681604385376, "learning_rate": 7.917782492873327e-06, "loss": 0.5332, "step": 7771 }, { "epoch": 0.3226586541704939, "grad_norm": 2.955061912536621, "learning_rate": 7.917236498099586e-06, "loss": 0.4583, "step": 7772 }, { "epoch": 0.3227001696947052, "grad_norm": 2.2867579460144043, "learning_rate": 7.916690450581368e-06, "loss": 0.496, "step": 7773 }, { "epoch": 0.32274168521891655, "grad_norm": 2.5985801219940186, "learning_rate": 7.916144350328547e-06, "loss": 0.4018, "step": 7774 }, { "epoch": 0.3227832007431279, "grad_norm": 2.120884656906128, "learning_rate": 7.915598197350995e-06, "loss": 0.3347, "step": 7775 }, { "epoch": 0.3228247162673392, "grad_norm": 2.2629857063293457, "learning_rate": 7.915051991658587e-06, "loss": 0.5107, "step": 7776 }, { "epoch": 0.32286623179155055, "grad_norm": 2.5209224224090576, "learning_rate": 7.914505733261198e-06, "loss": 0.5557, "step": 7777 }, { "epoch": 0.3229077473157619, "grad_norm": 2.495950698852539, "learning_rate": 7.913959422168707e-06, "loss": 0.3392, "step": 7778 }, { "epoch": 0.3229492628399732, "grad_norm": 2.967038154602051, "learning_rate": 7.913413058390989e-06, "loss": 0.6384, "step": 7779 }, { "epoch": 0.32299077836418455, "grad_norm": 3.1293859481811523, "learning_rate": 7.912866641937924e-06, "loss": 0.4672, "step": 7780 }, { "epoch": 0.3230322938883959, "grad_norm": 2.1894569396972656, "learning_rate": 7.91232017281939e-06, "loss": 0.4504, "step": 7781 }, { "epoch": 0.3230738094126072, "grad_norm": 2.275200843811035, "learning_rate": 7.911773651045267e-06, "loss": 0.6108, "step": 7782 }, { "epoch": 0.32311532493681855, "grad_norm": 2.463151216506958, "learning_rate": 7.911227076625438e-06, "loss": 0.5464, "step": 7783 }, { "epoch": 0.3231568404610299, "grad_norm": 2.744816780090332, "learning_rate": 7.910680449569785e-06, "loss": 0.4967, "step": 7784 }, { "epoch": 0.3231983559852412, "grad_norm": 2.1239731311798096, "learning_rate": 7.91013376988819e-06, "loss": 0.4557, "step": 7785 }, { "epoch": 0.32323987150945255, "grad_norm": 2.4724271297454834, "learning_rate": 7.909587037590535e-06, "loss": 0.4842, "step": 7786 }, { "epoch": 0.3232813870336639, "grad_norm": 2.69697904586792, "learning_rate": 7.909040252686712e-06, "loss": 0.4966, "step": 7787 }, { "epoch": 0.3233229025578752, "grad_norm": 2.8686530590057373, "learning_rate": 7.9084934151866e-06, "loss": 0.4539, "step": 7788 }, { "epoch": 0.32336441808208655, "grad_norm": 2.4481468200683594, "learning_rate": 7.90794652510009e-06, "loss": 0.5672, "step": 7789 }, { "epoch": 0.3234059336062979, "grad_norm": 2.4084770679473877, "learning_rate": 7.907399582437067e-06, "loss": 0.5216, "step": 7790 }, { "epoch": 0.3234474491305092, "grad_norm": 2.3204658031463623, "learning_rate": 7.906852587207425e-06, "loss": 0.4627, "step": 7791 }, { "epoch": 0.3234889646547206, "grad_norm": 2.9649038314819336, "learning_rate": 7.906305539421048e-06, "loss": 0.5373, "step": 7792 }, { "epoch": 0.32353048017893193, "grad_norm": 2.378239870071411, "learning_rate": 7.905758439087829e-06, "loss": 0.4335, "step": 7793 }, { "epoch": 0.32357199570314327, "grad_norm": 2.1931192874908447, "learning_rate": 7.90521128621766e-06, "loss": 0.539, "step": 7794 }, { "epoch": 0.3236135112273546, "grad_norm": 2.367899179458618, "learning_rate": 7.904664080820434e-06, "loss": 0.5576, "step": 7795 }, { "epoch": 0.32365502675156593, "grad_norm": 2.695119619369507, "learning_rate": 7.904116822906044e-06, "loss": 0.6531, "step": 7796 }, { "epoch": 0.32369654227577727, "grad_norm": 2.4664392471313477, "learning_rate": 7.903569512484383e-06, "loss": 0.5303, "step": 7797 }, { "epoch": 0.3237380577999886, "grad_norm": 2.348906993865967, "learning_rate": 7.903022149565351e-06, "loss": 0.5892, "step": 7798 }, { "epoch": 0.32377957332419993, "grad_norm": 2.4260456562042236, "learning_rate": 7.902474734158841e-06, "loss": 0.4415, "step": 7799 }, { "epoch": 0.32382108884841126, "grad_norm": 2.819490432739258, "learning_rate": 7.901927266274751e-06, "loss": 0.5693, "step": 7800 }, { "epoch": 0.3238626043726226, "grad_norm": 2.3646011352539062, "learning_rate": 7.90137974592298e-06, "loss": 0.4732, "step": 7801 }, { "epoch": 0.32390411989683393, "grad_norm": 2.528407096862793, "learning_rate": 7.900832173113426e-06, "loss": 0.5639, "step": 7802 }, { "epoch": 0.32394563542104526, "grad_norm": 2.3761446475982666, "learning_rate": 7.900284547855992e-06, "loss": 0.4877, "step": 7803 }, { "epoch": 0.3239871509452566, "grad_norm": 2.7791664600372314, "learning_rate": 7.899736870160575e-06, "loss": 0.723, "step": 7804 }, { "epoch": 0.32402866646946793, "grad_norm": 2.4871208667755127, "learning_rate": 7.899189140037083e-06, "loss": 0.5031, "step": 7805 }, { "epoch": 0.32407018199367926, "grad_norm": 2.071136236190796, "learning_rate": 7.898641357495412e-06, "loss": 0.4894, "step": 7806 }, { "epoch": 0.3241116975178906, "grad_norm": 2.310194969177246, "learning_rate": 7.898093522545471e-06, "loss": 0.4963, "step": 7807 }, { "epoch": 0.32415321304210193, "grad_norm": 2.3396406173706055, "learning_rate": 7.897545635197165e-06, "loss": 0.5262, "step": 7808 }, { "epoch": 0.32419472856631326, "grad_norm": 2.78188419342041, "learning_rate": 7.896997695460399e-06, "loss": 0.557, "step": 7809 }, { "epoch": 0.3242362440905246, "grad_norm": 2.415205955505371, "learning_rate": 7.896449703345077e-06, "loss": 0.3716, "step": 7810 }, { "epoch": 0.3242777596147359, "grad_norm": 2.7161059379577637, "learning_rate": 7.895901658861111e-06, "loss": 0.5661, "step": 7811 }, { "epoch": 0.32431927513894726, "grad_norm": 2.1262052059173584, "learning_rate": 7.895353562018409e-06, "loss": 0.4137, "step": 7812 }, { "epoch": 0.3243607906631586, "grad_norm": 2.3824236392974854, "learning_rate": 7.89480541282688e-06, "loss": 0.6094, "step": 7813 }, { "epoch": 0.3244023061873699, "grad_norm": 2.3998844623565674, "learning_rate": 7.894257211296433e-06, "loss": 0.4232, "step": 7814 }, { "epoch": 0.32444382171158126, "grad_norm": 2.8821730613708496, "learning_rate": 7.893708957436982e-06, "loss": 0.4855, "step": 7815 }, { "epoch": 0.3244853372357926, "grad_norm": 2.034736156463623, "learning_rate": 7.89316065125844e-06, "loss": 0.5498, "step": 7816 }, { "epoch": 0.3245268527600039, "grad_norm": 2.835111141204834, "learning_rate": 7.89261229277072e-06, "loss": 0.4802, "step": 7817 }, { "epoch": 0.32456836828421526, "grad_norm": 2.511841297149658, "learning_rate": 7.892063881983736e-06, "loss": 0.4426, "step": 7818 }, { "epoch": 0.3246098838084266, "grad_norm": 2.197188138961792, "learning_rate": 7.891515418907401e-06, "loss": 0.6055, "step": 7819 }, { "epoch": 0.3246513993326379, "grad_norm": 2.4008755683898926, "learning_rate": 7.890966903551636e-06, "loss": 0.4199, "step": 7820 }, { "epoch": 0.32469291485684926, "grad_norm": 2.476151704788208, "learning_rate": 7.890418335926356e-06, "loss": 0.4614, "step": 7821 }, { "epoch": 0.3247344303810606, "grad_norm": 2.1216676235198975, "learning_rate": 7.889869716041478e-06, "loss": 0.3942, "step": 7822 }, { "epoch": 0.324775945905272, "grad_norm": 2.4702582359313965, "learning_rate": 7.889321043906923e-06, "loss": 0.5445, "step": 7823 }, { "epoch": 0.3248174614294833, "grad_norm": 2.381084680557251, "learning_rate": 7.888772319532612e-06, "loss": 0.5339, "step": 7824 }, { "epoch": 0.32485897695369464, "grad_norm": 3.0088720321655273, "learning_rate": 7.888223542928464e-06, "loss": 0.4755, "step": 7825 }, { "epoch": 0.324900492477906, "grad_norm": 2.6820144653320312, "learning_rate": 7.887674714104402e-06, "loss": 0.363, "step": 7826 }, { "epoch": 0.3249420080021173, "grad_norm": 2.066276788711548, "learning_rate": 7.887125833070349e-06, "loss": 0.5001, "step": 7827 }, { "epoch": 0.32498352352632864, "grad_norm": 1.8401669263839722, "learning_rate": 7.886576899836228e-06, "loss": 0.4356, "step": 7828 }, { "epoch": 0.32502503905054, "grad_norm": 2.7586090564727783, "learning_rate": 7.886027914411964e-06, "loss": 0.4312, "step": 7829 }, { "epoch": 0.3250665545747513, "grad_norm": 2.3771514892578125, "learning_rate": 7.885478876807486e-06, "loss": 0.5572, "step": 7830 }, { "epoch": 0.32510807009896264, "grad_norm": 2.994128942489624, "learning_rate": 7.884929787032719e-06, "loss": 0.4895, "step": 7831 }, { "epoch": 0.325149585623174, "grad_norm": 2.716323137283325, "learning_rate": 7.884380645097586e-06, "loss": 0.4435, "step": 7832 }, { "epoch": 0.3251911011473853, "grad_norm": 2.6774089336395264, "learning_rate": 7.88383145101202e-06, "loss": 0.455, "step": 7833 }, { "epoch": 0.32523261667159664, "grad_norm": 2.3201096057891846, "learning_rate": 7.883282204785953e-06, "loss": 0.4958, "step": 7834 }, { "epoch": 0.325274132195808, "grad_norm": 2.3418426513671875, "learning_rate": 7.88273290642931e-06, "loss": 0.56, "step": 7835 }, { "epoch": 0.3253156477200193, "grad_norm": 2.5778262615203857, "learning_rate": 7.882183555952027e-06, "loss": 0.5157, "step": 7836 }, { "epoch": 0.32535716324423064, "grad_norm": 3.144554853439331, "learning_rate": 7.881634153364033e-06, "loss": 0.4941, "step": 7837 }, { "epoch": 0.325398678768442, "grad_norm": 2.3931479454040527, "learning_rate": 7.881084698675265e-06, "loss": 0.5037, "step": 7838 }, { "epoch": 0.3254401942926533, "grad_norm": 2.34885573387146, "learning_rate": 7.880535191895656e-06, "loss": 0.5567, "step": 7839 }, { "epoch": 0.32548170981686464, "grad_norm": 2.6878840923309326, "learning_rate": 7.879985633035136e-06, "loss": 0.5274, "step": 7840 }, { "epoch": 0.325523225341076, "grad_norm": 2.511871099472046, "learning_rate": 7.87943602210365e-06, "loss": 0.4125, "step": 7841 }, { "epoch": 0.3255647408652873, "grad_norm": 3.164057731628418, "learning_rate": 7.878886359111128e-06, "loss": 0.6172, "step": 7842 }, { "epoch": 0.32560625638949864, "grad_norm": 2.2472891807556152, "learning_rate": 7.878336644067513e-06, "loss": 0.4638, "step": 7843 }, { "epoch": 0.32564777191370997, "grad_norm": 2.9763383865356445, "learning_rate": 7.87778687698274e-06, "loss": 0.5602, "step": 7844 }, { "epoch": 0.3256892874379213, "grad_norm": 2.6664650440216064, "learning_rate": 7.87723705786675e-06, "loss": 0.6225, "step": 7845 }, { "epoch": 0.32573080296213264, "grad_norm": 2.6375439167022705, "learning_rate": 7.876687186729487e-06, "loss": 0.5501, "step": 7846 }, { "epoch": 0.32577231848634397, "grad_norm": 2.2707056999206543, "learning_rate": 7.87613726358089e-06, "loss": 0.5947, "step": 7847 }, { "epoch": 0.3258138340105553, "grad_norm": 2.5605618953704834, "learning_rate": 7.875587288430901e-06, "loss": 0.5929, "step": 7848 }, { "epoch": 0.32585534953476664, "grad_norm": 3.070779800415039, "learning_rate": 7.875037261289467e-06, "loss": 0.465, "step": 7849 }, { "epoch": 0.32589686505897797, "grad_norm": 2.1907620429992676, "learning_rate": 7.87448718216653e-06, "loss": 0.599, "step": 7850 }, { "epoch": 0.3259383805831893, "grad_norm": 2.3837625980377197, "learning_rate": 7.873937051072037e-06, "loss": 0.4577, "step": 7851 }, { "epoch": 0.32597989610740064, "grad_norm": 2.506002187728882, "learning_rate": 7.87338686801593e-06, "loss": 0.584, "step": 7852 }, { "epoch": 0.326021411631612, "grad_norm": 2.2911107540130615, "learning_rate": 7.872836633008162e-06, "loss": 0.515, "step": 7853 }, { "epoch": 0.32606292715582336, "grad_norm": 2.542586088180542, "learning_rate": 7.872286346058681e-06, "loss": 0.5368, "step": 7854 }, { "epoch": 0.3261044426800347, "grad_norm": 2.468031883239746, "learning_rate": 7.871736007177435e-06, "loss": 0.4837, "step": 7855 }, { "epoch": 0.326145958204246, "grad_norm": 2.507915735244751, "learning_rate": 7.871185616374375e-06, "loss": 0.5618, "step": 7856 }, { "epoch": 0.32618747372845736, "grad_norm": 4.127782821655273, "learning_rate": 7.870635173659448e-06, "loss": 0.6559, "step": 7857 }, { "epoch": 0.3262289892526687, "grad_norm": 2.721222400665283, "learning_rate": 7.870084679042613e-06, "loss": 0.7393, "step": 7858 }, { "epoch": 0.32627050477688, "grad_norm": 2.7987916469573975, "learning_rate": 7.869534132533818e-06, "loss": 0.6148, "step": 7859 }, { "epoch": 0.32631202030109135, "grad_norm": 2.495821952819824, "learning_rate": 7.868983534143019e-06, "loss": 0.6202, "step": 7860 }, { "epoch": 0.3263535358253027, "grad_norm": 2.4877164363861084, "learning_rate": 7.868432883880173e-06, "loss": 0.453, "step": 7861 }, { "epoch": 0.326395051349514, "grad_norm": 2.5818307399749756, "learning_rate": 7.86788218175523e-06, "loss": 0.5222, "step": 7862 }, { "epoch": 0.32643656687372535, "grad_norm": 2.3766870498657227, "learning_rate": 7.867331427778154e-06, "loss": 0.4782, "step": 7863 }, { "epoch": 0.3264780823979367, "grad_norm": 2.0372753143310547, "learning_rate": 7.866780621958898e-06, "loss": 0.4997, "step": 7864 }, { "epoch": 0.326519597922148, "grad_norm": 2.300391912460327, "learning_rate": 7.866229764307422e-06, "loss": 0.5681, "step": 7865 }, { "epoch": 0.32656111344635935, "grad_norm": 1.8309760093688965, "learning_rate": 7.865678854833685e-06, "loss": 0.4726, "step": 7866 }, { "epoch": 0.3266026289705707, "grad_norm": 2.421081066131592, "learning_rate": 7.865127893547649e-06, "loss": 0.519, "step": 7867 }, { "epoch": 0.326644144494782, "grad_norm": 2.669468402862549, "learning_rate": 7.864576880459273e-06, "loss": 0.4141, "step": 7868 }, { "epoch": 0.32668566001899335, "grad_norm": 2.4767415523529053, "learning_rate": 7.864025815578524e-06, "loss": 0.5475, "step": 7869 }, { "epoch": 0.3267271755432047, "grad_norm": 2.854947566986084, "learning_rate": 7.863474698915363e-06, "loss": 0.6246, "step": 7870 }, { "epoch": 0.326768691067416, "grad_norm": 2.839324474334717, "learning_rate": 7.862923530479752e-06, "loss": 0.5744, "step": 7871 }, { "epoch": 0.32681020659162735, "grad_norm": 2.415024757385254, "learning_rate": 7.862372310281658e-06, "loss": 0.5778, "step": 7872 }, { "epoch": 0.3268517221158387, "grad_norm": 2.670980215072632, "learning_rate": 7.86182103833105e-06, "loss": 0.5451, "step": 7873 }, { "epoch": 0.32689323764005, "grad_norm": 2.305663585662842, "learning_rate": 7.861269714637892e-06, "loss": 0.5435, "step": 7874 }, { "epoch": 0.32693475316426135, "grad_norm": 2.1953535079956055, "learning_rate": 7.860718339212152e-06, "loss": 0.5267, "step": 7875 }, { "epoch": 0.3269762686884727, "grad_norm": 2.5091731548309326, "learning_rate": 7.8601669120638e-06, "loss": 0.4979, "step": 7876 }, { "epoch": 0.327017784212684, "grad_norm": 2.2964906692504883, "learning_rate": 7.859615433202808e-06, "loss": 0.5116, "step": 7877 }, { "epoch": 0.32705929973689535, "grad_norm": 2.6543495655059814, "learning_rate": 7.859063902639142e-06, "loss": 0.4705, "step": 7878 }, { "epoch": 0.3271008152611067, "grad_norm": 2.2569515705108643, "learning_rate": 7.858512320382778e-06, "loss": 0.5431, "step": 7879 }, { "epoch": 0.327142330785318, "grad_norm": 3.0441689491271973, "learning_rate": 7.857960686443687e-06, "loss": 0.5636, "step": 7880 }, { "epoch": 0.32718384630952935, "grad_norm": 2.6849279403686523, "learning_rate": 7.857409000831842e-06, "loss": 0.5497, "step": 7881 }, { "epoch": 0.3272253618337407, "grad_norm": 3.7236692905426025, "learning_rate": 7.85685726355722e-06, "loss": 0.4605, "step": 7882 }, { "epoch": 0.327266877357952, "grad_norm": 2.4071450233459473, "learning_rate": 7.856305474629796e-06, "loss": 0.4029, "step": 7883 }, { "epoch": 0.3273083928821634, "grad_norm": 2.89294695854187, "learning_rate": 7.855753634059543e-06, "loss": 0.5792, "step": 7884 }, { "epoch": 0.32734990840637473, "grad_norm": 2.7810025215148926, "learning_rate": 7.855201741856443e-06, "loss": 0.4967, "step": 7885 }, { "epoch": 0.32739142393058607, "grad_norm": 2.7276148796081543, "learning_rate": 7.854649798030472e-06, "loss": 0.4199, "step": 7886 }, { "epoch": 0.3274329394547974, "grad_norm": 2.0420970916748047, "learning_rate": 7.85409780259161e-06, "loss": 0.6103, "step": 7887 }, { "epoch": 0.32747445497900873, "grad_norm": 2.748194694519043, "learning_rate": 7.853545755549837e-06, "loss": 0.5398, "step": 7888 }, { "epoch": 0.32751597050322007, "grad_norm": 2.49729323387146, "learning_rate": 7.852993656915135e-06, "loss": 0.4382, "step": 7889 }, { "epoch": 0.3275574860274314, "grad_norm": 2.699727773666382, "learning_rate": 7.852441506697484e-06, "loss": 0.5657, "step": 7890 }, { "epoch": 0.32759900155164273, "grad_norm": 2.556025505065918, "learning_rate": 7.85188930490687e-06, "loss": 0.3975, "step": 7891 }, { "epoch": 0.32764051707585407, "grad_norm": 2.677922010421753, "learning_rate": 7.851337051553275e-06, "loss": 0.443, "step": 7892 }, { "epoch": 0.3276820326000654, "grad_norm": 2.1624178886413574, "learning_rate": 7.850784746646684e-06, "loss": 0.3681, "step": 7893 }, { "epoch": 0.32772354812427673, "grad_norm": 2.489429473876953, "learning_rate": 7.850232390197081e-06, "loss": 0.4125, "step": 7894 }, { "epoch": 0.32776506364848806, "grad_norm": 2.109790086746216, "learning_rate": 7.849679982214459e-06, "loss": 0.4779, "step": 7895 }, { "epoch": 0.3278065791726994, "grad_norm": 2.4428188800811768, "learning_rate": 7.849127522708798e-06, "loss": 0.5964, "step": 7896 }, { "epoch": 0.32784809469691073, "grad_norm": 2.252842903137207, "learning_rate": 7.848575011690093e-06, "loss": 0.5235, "step": 7897 }, { "epoch": 0.32788961022112206, "grad_norm": 2.635716199874878, "learning_rate": 7.848022449168328e-06, "loss": 0.4792, "step": 7898 }, { "epoch": 0.3279311257453334, "grad_norm": 2.5635688304901123, "learning_rate": 7.847469835153499e-06, "loss": 0.3699, "step": 7899 }, { "epoch": 0.32797264126954473, "grad_norm": 2.5933663845062256, "learning_rate": 7.846917169655593e-06, "loss": 0.4922, "step": 7900 }, { "epoch": 0.32801415679375606, "grad_norm": 2.025770664215088, "learning_rate": 7.846364452684604e-06, "loss": 0.4298, "step": 7901 }, { "epoch": 0.3280556723179674, "grad_norm": 2.60286283493042, "learning_rate": 7.845811684250527e-06, "loss": 0.5357, "step": 7902 }, { "epoch": 0.32809718784217873, "grad_norm": 2.586005926132202, "learning_rate": 7.84525886436335e-06, "loss": 0.6784, "step": 7903 }, { "epoch": 0.32813870336639006, "grad_norm": 2.536555767059326, "learning_rate": 7.844705993033077e-06, "loss": 0.4976, "step": 7904 }, { "epoch": 0.3281802188906014, "grad_norm": 2.660489559173584, "learning_rate": 7.844153070269697e-06, "loss": 0.4837, "step": 7905 }, { "epoch": 0.3282217344148127, "grad_norm": 2.1445305347442627, "learning_rate": 7.843600096083212e-06, "loss": 0.5043, "step": 7906 }, { "epoch": 0.32826324993902406, "grad_norm": 2.843120813369751, "learning_rate": 7.843047070483615e-06, "loss": 0.4339, "step": 7907 }, { "epoch": 0.3283047654632354, "grad_norm": 2.137319326400757, "learning_rate": 7.842493993480911e-06, "loss": 0.4989, "step": 7908 }, { "epoch": 0.3283462809874467, "grad_norm": 3.854020833969116, "learning_rate": 7.841940865085094e-06, "loss": 0.6455, "step": 7909 }, { "epoch": 0.32838779651165806, "grad_norm": 2.258793592453003, "learning_rate": 7.841387685306169e-06, "loss": 0.4889, "step": 7910 }, { "epoch": 0.3284293120358694, "grad_norm": 2.1723203659057617, "learning_rate": 7.840834454154134e-06, "loss": 0.4397, "step": 7911 }, { "epoch": 0.3284708275600807, "grad_norm": 2.582209348678589, "learning_rate": 7.840281171638992e-06, "loss": 0.4829, "step": 7912 }, { "epoch": 0.32851234308429206, "grad_norm": 2.2695255279541016, "learning_rate": 7.83972783777075e-06, "loss": 0.4675, "step": 7913 }, { "epoch": 0.3285538586085034, "grad_norm": 2.478433132171631, "learning_rate": 7.839174452559409e-06, "loss": 0.5099, "step": 7914 }, { "epoch": 0.3285953741327148, "grad_norm": 2.137089252471924, "learning_rate": 7.838621016014976e-06, "loss": 0.5492, "step": 7915 }, { "epoch": 0.3286368896569261, "grad_norm": 2.7111635208129883, "learning_rate": 7.838067528147457e-06, "loss": 0.6675, "step": 7916 }, { "epoch": 0.32867840518113745, "grad_norm": 3.1425669193267822, "learning_rate": 7.83751398896686e-06, "loss": 0.4831, "step": 7917 }, { "epoch": 0.3287199207053488, "grad_norm": 3.356795310974121, "learning_rate": 7.83696039848319e-06, "loss": 0.5887, "step": 7918 }, { "epoch": 0.3287614362295601, "grad_norm": 2.188014268875122, "learning_rate": 7.83640675670646e-06, "loss": 0.4929, "step": 7919 }, { "epoch": 0.32880295175377144, "grad_norm": 2.6391563415527344, "learning_rate": 7.835853063646676e-06, "loss": 0.6475, "step": 7920 }, { "epoch": 0.3288444672779828, "grad_norm": 2.41916823387146, "learning_rate": 7.835299319313854e-06, "loss": 0.5782, "step": 7921 }, { "epoch": 0.3288859828021941, "grad_norm": 5.48432731628418, "learning_rate": 7.834745523718003e-06, "loss": 0.4788, "step": 7922 }, { "epoch": 0.32892749832640544, "grad_norm": 2.531964063644409, "learning_rate": 7.834191676869135e-06, "loss": 0.4337, "step": 7923 }, { "epoch": 0.3289690138506168, "grad_norm": 2.34865403175354, "learning_rate": 7.833637778777264e-06, "loss": 0.4763, "step": 7924 }, { "epoch": 0.3290105293748281, "grad_norm": 2.2330873012542725, "learning_rate": 7.833083829452405e-06, "loss": 0.5065, "step": 7925 }, { "epoch": 0.32905204489903944, "grad_norm": 2.388838052749634, "learning_rate": 7.832529828904575e-06, "loss": 0.5178, "step": 7926 }, { "epoch": 0.3290935604232508, "grad_norm": 2.8085646629333496, "learning_rate": 7.83197577714379e-06, "loss": 0.5307, "step": 7927 }, { "epoch": 0.3291350759474621, "grad_norm": 2.718130111694336, "learning_rate": 7.831421674180064e-06, "loss": 0.4653, "step": 7928 }, { "epoch": 0.32917659147167344, "grad_norm": 2.400926351547241, "learning_rate": 7.83086752002342e-06, "loss": 0.484, "step": 7929 }, { "epoch": 0.3292181069958848, "grad_norm": 2.297767162322998, "learning_rate": 7.830313314683876e-06, "loss": 0.5813, "step": 7930 }, { "epoch": 0.3292596225200961, "grad_norm": 2.45680832862854, "learning_rate": 7.829759058171448e-06, "loss": 0.4935, "step": 7931 }, { "epoch": 0.32930113804430744, "grad_norm": 2.3804707527160645, "learning_rate": 7.829204750496164e-06, "loss": 0.4786, "step": 7932 }, { "epoch": 0.3293426535685188, "grad_norm": 2.403201103210449, "learning_rate": 7.82865039166804e-06, "loss": 0.5095, "step": 7933 }, { "epoch": 0.3293841690927301, "grad_norm": 2.425816535949707, "learning_rate": 7.828095981697103e-06, "loss": 0.5072, "step": 7934 }, { "epoch": 0.32942568461694144, "grad_norm": 3.2844693660736084, "learning_rate": 7.827541520593376e-06, "loss": 0.5065, "step": 7935 }, { "epoch": 0.32946720014115277, "grad_norm": 2.866718292236328, "learning_rate": 7.826987008366884e-06, "loss": 0.4671, "step": 7936 }, { "epoch": 0.3295087156653641, "grad_norm": 2.5708858966827393, "learning_rate": 7.82643244502765e-06, "loss": 0.469, "step": 7937 }, { "epoch": 0.32955023118957544, "grad_norm": 2.543353319168091, "learning_rate": 7.825877830585705e-06, "loss": 0.5268, "step": 7938 }, { "epoch": 0.32959174671378677, "grad_norm": 2.159954786300659, "learning_rate": 7.825323165051072e-06, "loss": 0.4424, "step": 7939 }, { "epoch": 0.3296332622379981, "grad_norm": 2.8263814449310303, "learning_rate": 7.824768448433783e-06, "loss": 0.6456, "step": 7940 }, { "epoch": 0.32967477776220944, "grad_norm": 2.2158572673797607, "learning_rate": 7.824213680743867e-06, "loss": 0.4422, "step": 7941 }, { "epoch": 0.32971629328642077, "grad_norm": 2.3986921310424805, "learning_rate": 7.823658861991352e-06, "loss": 0.5501, "step": 7942 }, { "epoch": 0.3297578088106321, "grad_norm": 2.5536606311798096, "learning_rate": 7.823103992186272e-06, "loss": 0.4723, "step": 7943 }, { "epoch": 0.32979932433484344, "grad_norm": 2.339789628982544, "learning_rate": 7.822549071338657e-06, "loss": 0.5851, "step": 7944 }, { "epoch": 0.32984083985905477, "grad_norm": 2.3897969722747803, "learning_rate": 7.821994099458544e-06, "loss": 0.4978, "step": 7945 }, { "epoch": 0.32988235538326616, "grad_norm": 2.8911659717559814, "learning_rate": 7.82143907655596e-06, "loss": 0.598, "step": 7946 }, { "epoch": 0.3299238709074775, "grad_norm": 2.348742961883545, "learning_rate": 7.820884002640947e-06, "loss": 0.5368, "step": 7947 }, { "epoch": 0.3299653864316888, "grad_norm": 3.2554285526275635, "learning_rate": 7.820328877723538e-06, "loss": 0.5049, "step": 7948 }, { "epoch": 0.33000690195590016, "grad_norm": 2.344045639038086, "learning_rate": 7.81977370181377e-06, "loss": 0.4065, "step": 7949 }, { "epoch": 0.3300484174801115, "grad_norm": 2.8751325607299805, "learning_rate": 7.81921847492168e-06, "loss": 0.533, "step": 7950 }, { "epoch": 0.3300899330043228, "grad_norm": 2.8458878993988037, "learning_rate": 7.81866319705731e-06, "loss": 0.4104, "step": 7951 }, { "epoch": 0.33013144852853415, "grad_norm": 2.35906720161438, "learning_rate": 7.818107868230694e-06, "loss": 0.5798, "step": 7952 }, { "epoch": 0.3301729640527455, "grad_norm": 2.3570849895477295, "learning_rate": 7.817552488451876e-06, "loss": 0.5232, "step": 7953 }, { "epoch": 0.3302144795769568, "grad_norm": 2.593681573867798, "learning_rate": 7.816997057730898e-06, "loss": 0.5166, "step": 7954 }, { "epoch": 0.33025599510116815, "grad_norm": 2.7332546710968018, "learning_rate": 7.8164415760778e-06, "loss": 0.6607, "step": 7955 }, { "epoch": 0.3302975106253795, "grad_norm": 2.530414342880249, "learning_rate": 7.815886043502625e-06, "loss": 0.434, "step": 7956 }, { "epoch": 0.3303390261495908, "grad_norm": 2.3946800231933594, "learning_rate": 7.815330460015423e-06, "loss": 0.5025, "step": 7957 }, { "epoch": 0.33038054167380215, "grad_norm": 2.1749706268310547, "learning_rate": 7.814774825626232e-06, "loss": 0.3029, "step": 7958 }, { "epoch": 0.3304220571980135, "grad_norm": 2.6237823963165283, "learning_rate": 7.8142191403451e-06, "loss": 0.5293, "step": 7959 }, { "epoch": 0.3304635727222248, "grad_norm": 2.3409533500671387, "learning_rate": 7.813663404182077e-06, "loss": 0.5574, "step": 7960 }, { "epoch": 0.33050508824643615, "grad_norm": 2.719651460647583, "learning_rate": 7.813107617147208e-06, "loss": 0.5669, "step": 7961 }, { "epoch": 0.3305466037706475, "grad_norm": 2.3221275806427, "learning_rate": 7.812551779250543e-06, "loss": 0.5654, "step": 7962 }, { "epoch": 0.3305881192948588, "grad_norm": 2.502800703048706, "learning_rate": 7.811995890502128e-06, "loss": 0.4242, "step": 7963 }, { "epoch": 0.33062963481907015, "grad_norm": 2.2475204467773438, "learning_rate": 7.81143995091202e-06, "loss": 0.441, "step": 7964 }, { "epoch": 0.3306711503432815, "grad_norm": 2.2912023067474365, "learning_rate": 7.810883960490268e-06, "loss": 0.4289, "step": 7965 }, { "epoch": 0.3307126658674928, "grad_norm": 2.6408891677856445, "learning_rate": 7.810327919246923e-06, "loss": 0.541, "step": 7966 }, { "epoch": 0.33075418139170415, "grad_norm": 3.0350773334503174, "learning_rate": 7.809771827192037e-06, "loss": 0.5354, "step": 7967 }, { "epoch": 0.3307956969159155, "grad_norm": 2.655367374420166, "learning_rate": 7.809215684335668e-06, "loss": 0.7989, "step": 7968 }, { "epoch": 0.3308372124401268, "grad_norm": 2.646679639816284, "learning_rate": 7.808659490687872e-06, "loss": 0.594, "step": 7969 }, { "epoch": 0.33087872796433815, "grad_norm": 2.525952100753784, "learning_rate": 7.808103246258697e-06, "loss": 0.5079, "step": 7970 }, { "epoch": 0.3309202434885495, "grad_norm": 2.427560329437256, "learning_rate": 7.80754695105821e-06, "loss": 0.4778, "step": 7971 }, { "epoch": 0.3309617590127608, "grad_norm": 2.8094029426574707, "learning_rate": 7.806990605096466e-06, "loss": 0.5849, "step": 7972 }, { "epoch": 0.33100327453697215, "grad_norm": 2.687964916229248, "learning_rate": 7.806434208383522e-06, "loss": 0.619, "step": 7973 }, { "epoch": 0.3310447900611835, "grad_norm": 3.5173046588897705, "learning_rate": 7.805877760929437e-06, "loss": 0.6978, "step": 7974 }, { "epoch": 0.3310863055853948, "grad_norm": 2.8457767963409424, "learning_rate": 7.805321262744274e-06, "loss": 0.5027, "step": 7975 }, { "epoch": 0.33112782110960615, "grad_norm": 2.3437445163726807, "learning_rate": 7.804764713838094e-06, "loss": 0.4703, "step": 7976 }, { "epoch": 0.33116933663381753, "grad_norm": 2.2945594787597656, "learning_rate": 7.80420811422096e-06, "loss": 0.6553, "step": 7977 }, { "epoch": 0.33121085215802887, "grad_norm": 2.668923854827881, "learning_rate": 7.803651463902936e-06, "loss": 0.3566, "step": 7978 }, { "epoch": 0.3312523676822402, "grad_norm": 2.9194746017456055, "learning_rate": 7.803094762894083e-06, "loss": 0.5603, "step": 7979 }, { "epoch": 0.33129388320645153, "grad_norm": 2.2008368968963623, "learning_rate": 7.80253801120447e-06, "loss": 0.3949, "step": 7980 }, { "epoch": 0.33133539873066287, "grad_norm": 2.81506085395813, "learning_rate": 7.801981208844163e-06, "loss": 0.624, "step": 7981 }, { "epoch": 0.3313769142548742, "grad_norm": 2.819249153137207, "learning_rate": 7.801424355823227e-06, "loss": 0.6981, "step": 7982 }, { "epoch": 0.33141842977908553, "grad_norm": 2.5659961700439453, "learning_rate": 7.80086745215173e-06, "loss": 0.5721, "step": 7983 }, { "epoch": 0.33145994530329687, "grad_norm": 2.314363718032837, "learning_rate": 7.800310497839745e-06, "loss": 0.5074, "step": 7984 }, { "epoch": 0.3315014608275082, "grad_norm": 2.432771921157837, "learning_rate": 7.799753492897338e-06, "loss": 0.4314, "step": 7985 }, { "epoch": 0.33154297635171953, "grad_norm": 2.5973827838897705, "learning_rate": 7.79919643733458e-06, "loss": 0.6112, "step": 7986 }, { "epoch": 0.33158449187593086, "grad_norm": 2.407073497772217, "learning_rate": 7.798639331161544e-06, "loss": 0.3982, "step": 7987 }, { "epoch": 0.3316260074001422, "grad_norm": 2.6166324615478516, "learning_rate": 7.798082174388302e-06, "loss": 0.4118, "step": 7988 }, { "epoch": 0.33166752292435353, "grad_norm": 2.9932925701141357, "learning_rate": 7.797524967024927e-06, "loss": 0.6482, "step": 7989 }, { "epoch": 0.33170903844856486, "grad_norm": 1.8482720851898193, "learning_rate": 7.796967709081497e-06, "loss": 0.5522, "step": 7990 }, { "epoch": 0.3317505539727762, "grad_norm": 2.544755220413208, "learning_rate": 7.796410400568082e-06, "loss": 0.4983, "step": 7991 }, { "epoch": 0.33179206949698753, "grad_norm": 2.827077865600586, "learning_rate": 7.795853041494763e-06, "loss": 0.7406, "step": 7992 }, { "epoch": 0.33183358502119886, "grad_norm": 3.156773328781128, "learning_rate": 7.795295631871613e-06, "loss": 0.5142, "step": 7993 }, { "epoch": 0.3318751005454102, "grad_norm": 2.1817626953125, "learning_rate": 7.794738171708714e-06, "loss": 0.528, "step": 7994 }, { "epoch": 0.33191661606962153, "grad_norm": 2.71689772605896, "learning_rate": 7.794180661016143e-06, "loss": 0.5315, "step": 7995 }, { "epoch": 0.33195813159383286, "grad_norm": 2.474653720855713, "learning_rate": 7.793623099803979e-06, "loss": 0.474, "step": 7996 }, { "epoch": 0.3319996471180442, "grad_norm": 2.678011894226074, "learning_rate": 7.793065488082305e-06, "loss": 0.5448, "step": 7997 }, { "epoch": 0.3320411626422555, "grad_norm": 2.941481113433838, "learning_rate": 7.792507825861202e-06, "loss": 0.6823, "step": 7998 }, { "epoch": 0.33208267816646686, "grad_norm": 2.6984126567840576, "learning_rate": 7.791950113150753e-06, "loss": 0.4652, "step": 7999 }, { "epoch": 0.3321241936906782, "grad_norm": 2.6102445125579834, "learning_rate": 7.79139234996104e-06, "loss": 0.6226, "step": 8000 }, { "epoch": 0.3321657092148895, "grad_norm": 1.9982562065124512, "learning_rate": 7.790834536302151e-06, "loss": 0.4428, "step": 8001 }, { "epoch": 0.33220722473910086, "grad_norm": 2.4957613945007324, "learning_rate": 7.790276672184168e-06, "loss": 0.555, "step": 8002 }, { "epoch": 0.3322487402633122, "grad_norm": 2.880368232727051, "learning_rate": 7.789718757617178e-06, "loss": 0.5208, "step": 8003 }, { "epoch": 0.3322902557875235, "grad_norm": 2.8238017559051514, "learning_rate": 7.789160792611267e-06, "loss": 0.5583, "step": 8004 }, { "epoch": 0.33233177131173486, "grad_norm": 2.1760575771331787, "learning_rate": 7.788602777176528e-06, "loss": 0.3812, "step": 8005 }, { "epoch": 0.3323732868359462, "grad_norm": 2.565483331680298, "learning_rate": 7.788044711323046e-06, "loss": 0.535, "step": 8006 }, { "epoch": 0.3324148023601575, "grad_norm": 2.48708438873291, "learning_rate": 7.787486595060913e-06, "loss": 0.466, "step": 8007 }, { "epoch": 0.3324563178843689, "grad_norm": 2.4438111782073975, "learning_rate": 7.786928428400218e-06, "loss": 0.5344, "step": 8008 }, { "epoch": 0.33249783340858025, "grad_norm": 3.032719612121582, "learning_rate": 7.786370211351053e-06, "loss": 0.5783, "step": 8009 }, { "epoch": 0.3325393489327916, "grad_norm": 3.2037432193756104, "learning_rate": 7.785811943923512e-06, "loss": 0.583, "step": 8010 }, { "epoch": 0.3325808644570029, "grad_norm": 2.212472677230835, "learning_rate": 7.78525362612769e-06, "loss": 0.5787, "step": 8011 }, { "epoch": 0.33262237998121424, "grad_norm": 2.4906880855560303, "learning_rate": 7.784695257973677e-06, "loss": 0.5595, "step": 8012 }, { "epoch": 0.3326638955054256, "grad_norm": 2.9262921810150146, "learning_rate": 7.784136839471573e-06, "loss": 0.543, "step": 8013 }, { "epoch": 0.3327054110296369, "grad_norm": 2.4372565746307373, "learning_rate": 7.783578370631471e-06, "loss": 0.6411, "step": 8014 }, { "epoch": 0.33274692655384824, "grad_norm": 1.984963059425354, "learning_rate": 7.783019851463468e-06, "loss": 0.4889, "step": 8015 }, { "epoch": 0.3327884420780596, "grad_norm": 2.650311231613159, "learning_rate": 7.782461281977668e-06, "loss": 0.4138, "step": 8016 }, { "epoch": 0.3328299576022709, "grad_norm": 2.672691583633423, "learning_rate": 7.781902662184164e-06, "loss": 0.4321, "step": 8017 }, { "epoch": 0.33287147312648224, "grad_norm": 2.6646387577056885, "learning_rate": 7.781343992093057e-06, "loss": 0.5259, "step": 8018 }, { "epoch": 0.3329129886506936, "grad_norm": 2.156471014022827, "learning_rate": 7.780785271714452e-06, "loss": 0.569, "step": 8019 }, { "epoch": 0.3329545041749049, "grad_norm": 2.1917388439178467, "learning_rate": 7.780226501058445e-06, "loss": 0.4614, "step": 8020 }, { "epoch": 0.33299601969911624, "grad_norm": 2.527787685394287, "learning_rate": 7.779667680135143e-06, "loss": 0.499, "step": 8021 }, { "epoch": 0.3330375352233276, "grad_norm": 2.6570065021514893, "learning_rate": 7.779108808954647e-06, "loss": 0.4835, "step": 8022 }, { "epoch": 0.3330790507475389, "grad_norm": 2.660451889038086, "learning_rate": 7.778549887527064e-06, "loss": 0.5792, "step": 8023 }, { "epoch": 0.33312056627175024, "grad_norm": 1.8398091793060303, "learning_rate": 7.777990915862497e-06, "loss": 0.4151, "step": 8024 }, { "epoch": 0.3331620817959616, "grad_norm": 3.317884683609009, "learning_rate": 7.777431893971055e-06, "loss": 0.5237, "step": 8025 }, { "epoch": 0.3332035973201729, "grad_norm": 2.5053365230560303, "learning_rate": 7.776872821862842e-06, "loss": 0.4157, "step": 8026 }, { "epoch": 0.33324511284438424, "grad_norm": 2.3423800468444824, "learning_rate": 7.776313699547971e-06, "loss": 0.3949, "step": 8027 }, { "epoch": 0.33328662836859557, "grad_norm": 2.988665819168091, "learning_rate": 7.775754527036544e-06, "loss": 0.6375, "step": 8028 }, { "epoch": 0.3333281438928069, "grad_norm": 2.6169261932373047, "learning_rate": 7.775195304338678e-06, "loss": 0.6175, "step": 8029 }, { "epoch": 0.33336965941701824, "grad_norm": 2.3349170684814453, "learning_rate": 7.774636031464479e-06, "loss": 0.4133, "step": 8030 }, { "epoch": 0.33341117494122957, "grad_norm": 2.428729772567749, "learning_rate": 7.774076708424062e-06, "loss": 0.6448, "step": 8031 }, { "epoch": 0.3334526904654409, "grad_norm": 2.3719370365142822, "learning_rate": 7.773517335227539e-06, "loss": 0.4941, "step": 8032 }, { "epoch": 0.33349420598965224, "grad_norm": 2.1632683277130127, "learning_rate": 7.772957911885023e-06, "loss": 0.4109, "step": 8033 }, { "epoch": 0.33353572151386357, "grad_norm": 2.328155755996704, "learning_rate": 7.772398438406628e-06, "loss": 0.6648, "step": 8034 }, { "epoch": 0.3335772370380749, "grad_norm": 2.5283563137054443, "learning_rate": 7.771838914802469e-06, "loss": 0.5485, "step": 8035 }, { "epoch": 0.33361875256228624, "grad_norm": 3.2109203338623047, "learning_rate": 7.771279341082667e-06, "loss": 0.4557, "step": 8036 }, { "epoch": 0.33366026808649757, "grad_norm": 2.3031044006347656, "learning_rate": 7.770719717257334e-06, "loss": 0.5469, "step": 8037 }, { "epoch": 0.3337017836107089, "grad_norm": 2.657236337661743, "learning_rate": 7.770160043336589e-06, "loss": 0.5965, "step": 8038 }, { "epoch": 0.3337432991349203, "grad_norm": 2.477215051651001, "learning_rate": 7.769600319330553e-06, "loss": 0.6067, "step": 8039 }, { "epoch": 0.3337848146591316, "grad_norm": 2.4363112449645996, "learning_rate": 7.769040545249344e-06, "loss": 0.4466, "step": 8040 }, { "epoch": 0.33382633018334296, "grad_norm": 2.1400868892669678, "learning_rate": 7.768480721103084e-06, "loss": 0.4623, "step": 8041 }, { "epoch": 0.3338678457075543, "grad_norm": 2.3187975883483887, "learning_rate": 7.767920846901895e-06, "loss": 0.6259, "step": 8042 }, { "epoch": 0.3339093612317656, "grad_norm": 2.2119650840759277, "learning_rate": 7.767360922655898e-06, "loss": 0.5218, "step": 8043 }, { "epoch": 0.33395087675597696, "grad_norm": 1.9750748872756958, "learning_rate": 7.76680094837522e-06, "loss": 0.4094, "step": 8044 }, { "epoch": 0.3339923922801883, "grad_norm": 2.609315872192383, "learning_rate": 7.766240924069984e-06, "loss": 0.5585, "step": 8045 }, { "epoch": 0.3340339078043996, "grad_norm": 2.742889404296875, "learning_rate": 7.765680849750311e-06, "loss": 0.4841, "step": 8046 }, { "epoch": 0.33407542332861095, "grad_norm": 2.117255687713623, "learning_rate": 7.765120725426334e-06, "loss": 0.5029, "step": 8047 }, { "epoch": 0.3341169388528223, "grad_norm": 2.2746877670288086, "learning_rate": 7.764560551108176e-06, "loss": 0.4464, "step": 8048 }, { "epoch": 0.3341584543770336, "grad_norm": 2.7225899696350098, "learning_rate": 7.764000326805967e-06, "loss": 0.4471, "step": 8049 }, { "epoch": 0.33419996990124495, "grad_norm": 2.6206555366516113, "learning_rate": 7.763440052529836e-06, "loss": 0.5495, "step": 8050 }, { "epoch": 0.3342414854254563, "grad_norm": 3.3166987895965576, "learning_rate": 7.762879728289911e-06, "loss": 0.5802, "step": 8051 }, { "epoch": 0.3342830009496676, "grad_norm": 2.2625584602355957, "learning_rate": 7.762319354096325e-06, "loss": 0.5573, "step": 8052 }, { "epoch": 0.33432451647387895, "grad_norm": 3.1435444355010986, "learning_rate": 7.76175892995921e-06, "loss": 0.6292, "step": 8053 }, { "epoch": 0.3343660319980903, "grad_norm": 2.86982798576355, "learning_rate": 7.761198455888696e-06, "loss": 0.4869, "step": 8054 }, { "epoch": 0.3344075475223016, "grad_norm": 3.616807222366333, "learning_rate": 7.76063793189492e-06, "loss": 0.6044, "step": 8055 }, { "epoch": 0.33444906304651295, "grad_norm": 2.7654244899749756, "learning_rate": 7.760077357988012e-06, "loss": 0.5414, "step": 8056 }, { "epoch": 0.3344905785707243, "grad_norm": 2.1701900959014893, "learning_rate": 7.759516734178109e-06, "loss": 0.5202, "step": 8057 }, { "epoch": 0.3345320940949356, "grad_norm": 2.5743372440338135, "learning_rate": 7.758956060475352e-06, "loss": 0.4365, "step": 8058 }, { "epoch": 0.33457360961914695, "grad_norm": 2.293503999710083, "learning_rate": 7.758395336889872e-06, "loss": 0.6124, "step": 8059 }, { "epoch": 0.3346151251433583, "grad_norm": 2.8272252082824707, "learning_rate": 7.757834563431807e-06, "loss": 0.5285, "step": 8060 }, { "epoch": 0.3346566406675696, "grad_norm": 2.2946321964263916, "learning_rate": 7.757273740111301e-06, "loss": 0.4368, "step": 8061 }, { "epoch": 0.33469815619178095, "grad_norm": 2.410597801208496, "learning_rate": 7.75671286693849e-06, "loss": 0.5435, "step": 8062 }, { "epoch": 0.3347396717159923, "grad_norm": 2.354783296585083, "learning_rate": 7.756151943923518e-06, "loss": 0.4991, "step": 8063 }, { "epoch": 0.3347811872402036, "grad_norm": 2.418402910232544, "learning_rate": 7.755590971076521e-06, "loss": 0.5281, "step": 8064 }, { "epoch": 0.33482270276441495, "grad_norm": 2.4755241870880127, "learning_rate": 7.755029948407648e-06, "loss": 0.4663, "step": 8065 }, { "epoch": 0.3348642182886263, "grad_norm": 2.7090187072753906, "learning_rate": 7.754468875927038e-06, "loss": 0.4087, "step": 8066 }, { "epoch": 0.3349057338128376, "grad_norm": 2.560401678085327, "learning_rate": 7.753907753644835e-06, "loss": 0.5083, "step": 8067 }, { "epoch": 0.33494724933704895, "grad_norm": 2.465085983276367, "learning_rate": 7.75334658157119e-06, "loss": 0.5243, "step": 8068 }, { "epoch": 0.33498876486126034, "grad_norm": 2.1681978702545166, "learning_rate": 7.752785359716243e-06, "loss": 0.4793, "step": 8069 }, { "epoch": 0.33503028038547167, "grad_norm": 2.0782470703125, "learning_rate": 7.752224088090143e-06, "loss": 0.5915, "step": 8070 }, { "epoch": 0.335071795909683, "grad_norm": 2.391181230545044, "learning_rate": 7.751662766703038e-06, "loss": 0.4627, "step": 8071 }, { "epoch": 0.33511331143389433, "grad_norm": 2.5742433071136475, "learning_rate": 7.75110139556508e-06, "loss": 0.6314, "step": 8072 }, { "epoch": 0.33515482695810567, "grad_norm": 2.2740061283111572, "learning_rate": 7.750539974686413e-06, "loss": 0.6151, "step": 8073 }, { "epoch": 0.335196342482317, "grad_norm": 2.8735663890838623, "learning_rate": 7.749978504077191e-06, "loss": 0.6669, "step": 8074 }, { "epoch": 0.33523785800652833, "grad_norm": 2.434720516204834, "learning_rate": 7.749416983747564e-06, "loss": 0.5132, "step": 8075 }, { "epoch": 0.33527937353073967, "grad_norm": 2.4952900409698486, "learning_rate": 7.748855413707688e-06, "loss": 0.5162, "step": 8076 }, { "epoch": 0.335320889054951, "grad_norm": 2.14133882522583, "learning_rate": 7.748293793967711e-06, "loss": 0.4911, "step": 8077 }, { "epoch": 0.33536240457916233, "grad_norm": 3.5154366493225098, "learning_rate": 7.747732124537792e-06, "loss": 0.4549, "step": 8078 }, { "epoch": 0.33540392010337367, "grad_norm": 2.775933265686035, "learning_rate": 7.747170405428084e-06, "loss": 0.5378, "step": 8079 }, { "epoch": 0.335445435627585, "grad_norm": 2.7035574913024902, "learning_rate": 7.746608636648742e-06, "loss": 0.6016, "step": 8080 }, { "epoch": 0.33548695115179633, "grad_norm": 2.6371421813964844, "learning_rate": 7.746046818209926e-06, "loss": 0.5187, "step": 8081 }, { "epoch": 0.33552846667600766, "grad_norm": 2.4568028450012207, "learning_rate": 7.74548495012179e-06, "loss": 0.5058, "step": 8082 }, { "epoch": 0.335569982200219, "grad_norm": 2.169851541519165, "learning_rate": 7.744923032394496e-06, "loss": 0.5897, "step": 8083 }, { "epoch": 0.33561149772443033, "grad_norm": 2.737508773803711, "learning_rate": 7.744361065038203e-06, "loss": 0.6729, "step": 8084 }, { "epoch": 0.33565301324864166, "grad_norm": 2.2810351848602295, "learning_rate": 7.74379904806307e-06, "loss": 0.4389, "step": 8085 }, { "epoch": 0.335694528772853, "grad_norm": 2.277256965637207, "learning_rate": 7.743236981479259e-06, "loss": 0.401, "step": 8086 }, { "epoch": 0.33573604429706433, "grad_norm": 3.119457960128784, "learning_rate": 7.742674865296934e-06, "loss": 0.4151, "step": 8087 }, { "epoch": 0.33577755982127566, "grad_norm": 2.520841598510742, "learning_rate": 7.742112699526257e-06, "loss": 0.437, "step": 8088 }, { "epoch": 0.335819075345487, "grad_norm": 2.442636728286743, "learning_rate": 7.741550484177389e-06, "loss": 0.5083, "step": 8089 }, { "epoch": 0.33586059086969833, "grad_norm": 3.0824320316314697, "learning_rate": 7.7409882192605e-06, "loss": 0.5524, "step": 8090 }, { "epoch": 0.33590210639390966, "grad_norm": 2.5709948539733887, "learning_rate": 7.740425904785756e-06, "loss": 0.5631, "step": 8091 }, { "epoch": 0.335943621918121, "grad_norm": 1.9291304349899292, "learning_rate": 7.739863540763319e-06, "loss": 0.3824, "step": 8092 }, { "epoch": 0.3359851374423323, "grad_norm": 2.5754194259643555, "learning_rate": 7.739301127203361e-06, "loss": 0.3974, "step": 8093 }, { "epoch": 0.33602665296654366, "grad_norm": 2.3665595054626465, "learning_rate": 7.738738664116048e-06, "loss": 0.4331, "step": 8094 }, { "epoch": 0.336068168490755, "grad_norm": 2.433076858520508, "learning_rate": 7.738176151511552e-06, "loss": 0.5357, "step": 8095 }, { "epoch": 0.3361096840149663, "grad_norm": 2.180588483810425, "learning_rate": 7.73761358940004e-06, "loss": 0.4353, "step": 8096 }, { "epoch": 0.33615119953917766, "grad_norm": 2.9949421882629395, "learning_rate": 7.737050977791687e-06, "loss": 0.6006, "step": 8097 }, { "epoch": 0.336192715063389, "grad_norm": 2.737502336502075, "learning_rate": 7.736488316696663e-06, "loss": 0.5592, "step": 8098 }, { "epoch": 0.3362342305876003, "grad_norm": 2.371565103530884, "learning_rate": 7.73592560612514e-06, "loss": 0.5067, "step": 8099 }, { "epoch": 0.3362757461118117, "grad_norm": 2.6261723041534424, "learning_rate": 7.735362846087296e-06, "loss": 0.6695, "step": 8100 }, { "epoch": 0.33631726163602305, "grad_norm": 2.770663261413574, "learning_rate": 7.734800036593301e-06, "loss": 0.6287, "step": 8101 }, { "epoch": 0.3363587771602344, "grad_norm": 2.6048290729522705, "learning_rate": 7.734237177653334e-06, "loss": 0.5249, "step": 8102 }, { "epoch": 0.3364002926844457, "grad_norm": 2.622218608856201, "learning_rate": 7.733674269277572e-06, "loss": 0.5043, "step": 8103 }, { "epoch": 0.33644180820865704, "grad_norm": 2.7761805057525635, "learning_rate": 7.733111311476189e-06, "loss": 0.6172, "step": 8104 }, { "epoch": 0.3364833237328684, "grad_norm": 2.3540890216827393, "learning_rate": 7.732548304259367e-06, "loss": 0.5203, "step": 8105 }, { "epoch": 0.3365248392570797, "grad_norm": 2.39886474609375, "learning_rate": 7.731985247637284e-06, "loss": 0.5749, "step": 8106 }, { "epoch": 0.33656635478129104, "grad_norm": 2.5372154712677, "learning_rate": 7.73142214162012e-06, "loss": 0.5546, "step": 8107 }, { "epoch": 0.3366078703055024, "grad_norm": 2.310319423675537, "learning_rate": 7.730858986218056e-06, "loss": 0.5284, "step": 8108 }, { "epoch": 0.3366493858297137, "grad_norm": 2.3780763149261475, "learning_rate": 7.730295781441275e-06, "loss": 0.585, "step": 8109 }, { "epoch": 0.33669090135392504, "grad_norm": 2.3329756259918213, "learning_rate": 7.72973252729996e-06, "loss": 0.5253, "step": 8110 }, { "epoch": 0.3367324168781364, "grad_norm": 3.021047353744507, "learning_rate": 7.729169223804295e-06, "loss": 0.5375, "step": 8111 }, { "epoch": 0.3367739324023477, "grad_norm": 2.5010695457458496, "learning_rate": 7.72860587096446e-06, "loss": 0.5454, "step": 8112 }, { "epoch": 0.33681544792655904, "grad_norm": 2.637899160385132, "learning_rate": 7.728042468790647e-06, "loss": 0.494, "step": 8113 }, { "epoch": 0.3368569634507704, "grad_norm": 2.4190709590911865, "learning_rate": 7.727479017293038e-06, "loss": 0.3791, "step": 8114 }, { "epoch": 0.3368984789749817, "grad_norm": 2.0013997554779053, "learning_rate": 7.726915516481824e-06, "loss": 0.4365, "step": 8115 }, { "epoch": 0.33693999449919304, "grad_norm": 1.9418588876724243, "learning_rate": 7.726351966367191e-06, "loss": 0.5574, "step": 8116 }, { "epoch": 0.3369815100234044, "grad_norm": 2.833310842514038, "learning_rate": 7.725788366959328e-06, "loss": 0.5458, "step": 8117 }, { "epoch": 0.3370230255476157, "grad_norm": 2.4053122997283936, "learning_rate": 7.725224718268428e-06, "loss": 0.5103, "step": 8118 }, { "epoch": 0.33706454107182704, "grad_norm": 2.419321298599243, "learning_rate": 7.724661020304678e-06, "loss": 0.4724, "step": 8119 }, { "epoch": 0.3371060565960384, "grad_norm": 2.245349407196045, "learning_rate": 7.72409727307827e-06, "loss": 0.4626, "step": 8120 }, { "epoch": 0.3371475721202497, "grad_norm": 2.5128960609436035, "learning_rate": 7.7235334765994e-06, "loss": 0.4567, "step": 8121 }, { "epoch": 0.33718908764446104, "grad_norm": 2.322514533996582, "learning_rate": 7.722969630878261e-06, "loss": 0.544, "step": 8122 }, { "epoch": 0.33723060316867237, "grad_norm": 3.5832157135009766, "learning_rate": 7.722405735925043e-06, "loss": 0.5117, "step": 8123 }, { "epoch": 0.3372721186928837, "grad_norm": 2.608630895614624, "learning_rate": 7.721841791749946e-06, "loss": 0.5841, "step": 8124 }, { "epoch": 0.33731363421709504, "grad_norm": 2.326667547225952, "learning_rate": 7.721277798363165e-06, "loss": 0.4606, "step": 8125 }, { "epoch": 0.33735514974130637, "grad_norm": 2.4232771396636963, "learning_rate": 7.720713755774898e-06, "loss": 0.554, "step": 8126 }, { "epoch": 0.3373966652655177, "grad_norm": 2.656221389770508, "learning_rate": 7.72014966399534e-06, "loss": 0.4159, "step": 8127 }, { "epoch": 0.33743818078972904, "grad_norm": 2.522660970687866, "learning_rate": 7.719585523034693e-06, "loss": 0.4087, "step": 8128 }, { "epoch": 0.33747969631394037, "grad_norm": 2.6942343711853027, "learning_rate": 7.719021332903157e-06, "loss": 0.6472, "step": 8129 }, { "epoch": 0.3375212118381517, "grad_norm": 2.103086471557617, "learning_rate": 7.71845709361093e-06, "loss": 0.4422, "step": 8130 }, { "epoch": 0.3375627273623631, "grad_norm": 2.434528112411499, "learning_rate": 7.717892805168216e-06, "loss": 0.5899, "step": 8131 }, { "epoch": 0.3376042428865744, "grad_norm": 2.400005340576172, "learning_rate": 7.717328467585217e-06, "loss": 0.6274, "step": 8132 }, { "epoch": 0.33764575841078576, "grad_norm": 3.014533042907715, "learning_rate": 7.716764080872136e-06, "loss": 0.4262, "step": 8133 }, { "epoch": 0.3376872739349971, "grad_norm": 2.4809343814849854, "learning_rate": 7.716199645039177e-06, "loss": 0.478, "step": 8134 }, { "epoch": 0.3377287894592084, "grad_norm": 2.1311097145080566, "learning_rate": 7.715635160096546e-06, "loss": 0.4782, "step": 8135 }, { "epoch": 0.33777030498341976, "grad_norm": 3.0878546237945557, "learning_rate": 7.715070626054446e-06, "loss": 0.4591, "step": 8136 }, { "epoch": 0.3378118205076311, "grad_norm": 2.467280864715576, "learning_rate": 7.714506042923092e-06, "loss": 0.4553, "step": 8137 }, { "epoch": 0.3378533360318424, "grad_norm": 2.347881555557251, "learning_rate": 7.713941410712681e-06, "loss": 0.5144, "step": 8138 }, { "epoch": 0.33789485155605375, "grad_norm": 2.254998207092285, "learning_rate": 7.71337672943343e-06, "loss": 0.5825, "step": 8139 }, { "epoch": 0.3379363670802651, "grad_norm": 2.4319405555725098, "learning_rate": 7.712811999095545e-06, "loss": 0.4627, "step": 8140 }, { "epoch": 0.3379778826044764, "grad_norm": 2.3711390495300293, "learning_rate": 7.712247219709236e-06, "loss": 0.4713, "step": 8141 }, { "epoch": 0.33801939812868775, "grad_norm": 2.516509532928467, "learning_rate": 7.711682391284715e-06, "loss": 0.6929, "step": 8142 }, { "epoch": 0.3380609136528991, "grad_norm": 2.6312873363494873, "learning_rate": 7.711117513832196e-06, "loss": 0.6496, "step": 8143 }, { "epoch": 0.3381024291771104, "grad_norm": 3.34108567237854, "learning_rate": 7.710552587361893e-06, "loss": 0.4731, "step": 8144 }, { "epoch": 0.33814394470132175, "grad_norm": 2.880910634994507, "learning_rate": 7.709987611884016e-06, "loss": 0.6138, "step": 8145 }, { "epoch": 0.3381854602255331, "grad_norm": 2.4251763820648193, "learning_rate": 7.709422587408782e-06, "loss": 0.4939, "step": 8146 }, { "epoch": 0.3382269757497444, "grad_norm": 2.3215649127960205, "learning_rate": 7.708857513946406e-06, "loss": 0.654, "step": 8147 }, { "epoch": 0.33826849127395575, "grad_norm": 2.107459306716919, "learning_rate": 7.708292391507105e-06, "loss": 0.5608, "step": 8148 }, { "epoch": 0.3383100067981671, "grad_norm": 2.2203595638275146, "learning_rate": 7.707727220101098e-06, "loss": 0.5269, "step": 8149 }, { "epoch": 0.3383515223223784, "grad_norm": 2.2225723266601562, "learning_rate": 7.707161999738603e-06, "loss": 0.4732, "step": 8150 }, { "epoch": 0.33839303784658975, "grad_norm": 2.534241199493408, "learning_rate": 7.706596730429837e-06, "loss": 0.5583, "step": 8151 }, { "epoch": 0.3384345533708011, "grad_norm": 2.9182968139648438, "learning_rate": 7.706031412185025e-06, "loss": 0.453, "step": 8152 }, { "epoch": 0.3384760688950124, "grad_norm": 2.971571922302246, "learning_rate": 7.705466045014381e-06, "loss": 0.4395, "step": 8153 }, { "epoch": 0.33851758441922375, "grad_norm": 3.2730324268341064, "learning_rate": 7.704900628928133e-06, "loss": 0.5628, "step": 8154 }, { "epoch": 0.3385590999434351, "grad_norm": 2.4101507663726807, "learning_rate": 7.704335163936502e-06, "loss": 0.4735, "step": 8155 }, { "epoch": 0.3386006154676464, "grad_norm": 2.3361191749572754, "learning_rate": 7.703769650049712e-06, "loss": 0.4304, "step": 8156 }, { "epoch": 0.33864213099185775, "grad_norm": 2.426957845687866, "learning_rate": 7.703204087277989e-06, "loss": 0.6522, "step": 8157 }, { "epoch": 0.3386836465160691, "grad_norm": 2.7117860317230225, "learning_rate": 7.702638475631555e-06, "loss": 0.5349, "step": 8158 }, { "epoch": 0.3387251620402804, "grad_norm": 2.36248779296875, "learning_rate": 7.702072815120637e-06, "loss": 0.5184, "step": 8159 }, { "epoch": 0.33876667756449175, "grad_norm": 2.3481576442718506, "learning_rate": 7.701507105755467e-06, "loss": 0.5702, "step": 8160 }, { "epoch": 0.3388081930887031, "grad_norm": 2.9256341457366943, "learning_rate": 7.70094134754627e-06, "loss": 0.5925, "step": 8161 }, { "epoch": 0.33884970861291447, "grad_norm": 2.8021774291992188, "learning_rate": 7.70037554050327e-06, "loss": 0.501, "step": 8162 }, { "epoch": 0.3388912241371258, "grad_norm": 2.907773017883301, "learning_rate": 7.699809684636707e-06, "loss": 0.4775, "step": 8163 }, { "epoch": 0.33893273966133713, "grad_norm": 2.4973621368408203, "learning_rate": 7.699243779956805e-06, "loss": 0.4761, "step": 8164 }, { "epoch": 0.33897425518554847, "grad_norm": 2.4598116874694824, "learning_rate": 7.698677826473798e-06, "loss": 0.4464, "step": 8165 }, { "epoch": 0.3390157707097598, "grad_norm": 2.6280782222747803, "learning_rate": 7.698111824197917e-06, "loss": 0.4469, "step": 8166 }, { "epoch": 0.33905728623397113, "grad_norm": 2.1669037342071533, "learning_rate": 7.697545773139397e-06, "loss": 0.4806, "step": 8167 }, { "epoch": 0.33909880175818247, "grad_norm": 2.301875591278076, "learning_rate": 7.69697967330847e-06, "loss": 0.5285, "step": 8168 }, { "epoch": 0.3391403172823938, "grad_norm": 2.216766834259033, "learning_rate": 7.696413524715376e-06, "loss": 0.3891, "step": 8169 }, { "epoch": 0.33918183280660513, "grad_norm": 2.1350440979003906, "learning_rate": 7.695847327370347e-06, "loss": 0.41, "step": 8170 }, { "epoch": 0.33922334833081647, "grad_norm": 2.8559250831604004, "learning_rate": 7.695281081283621e-06, "loss": 0.6677, "step": 8171 }, { "epoch": 0.3392648638550278, "grad_norm": 2.485827922821045, "learning_rate": 7.694714786465439e-06, "loss": 0.6192, "step": 8172 }, { "epoch": 0.33930637937923913, "grad_norm": 2.3932111263275146, "learning_rate": 7.694148442926033e-06, "loss": 0.5242, "step": 8173 }, { "epoch": 0.33934789490345046, "grad_norm": 2.429939031600952, "learning_rate": 7.693582050675648e-06, "loss": 0.4505, "step": 8174 }, { "epoch": 0.3393894104276618, "grad_norm": 2.3826329708099365, "learning_rate": 7.693015609724524e-06, "loss": 0.4139, "step": 8175 }, { "epoch": 0.33943092595187313, "grad_norm": 2.2512447834014893, "learning_rate": 7.6924491200829e-06, "loss": 0.4488, "step": 8176 }, { "epoch": 0.33947244147608446, "grad_norm": 2.3409228324890137, "learning_rate": 7.691882581761021e-06, "loss": 0.6364, "step": 8177 }, { "epoch": 0.3395139570002958, "grad_norm": 2.1456174850463867, "learning_rate": 7.69131599476913e-06, "loss": 0.4513, "step": 8178 }, { "epoch": 0.33955547252450713, "grad_norm": 2.5089595317840576, "learning_rate": 7.690749359117468e-06, "loss": 0.691, "step": 8179 }, { "epoch": 0.33959698804871846, "grad_norm": 2.5592055320739746, "learning_rate": 7.690182674816284e-06, "loss": 0.5091, "step": 8180 }, { "epoch": 0.3396385035729298, "grad_norm": 2.4894163608551025, "learning_rate": 7.68961594187582e-06, "loss": 0.5716, "step": 8181 }, { "epoch": 0.33968001909714113, "grad_norm": 2.492455244064331, "learning_rate": 7.689049160306326e-06, "loss": 0.4834, "step": 8182 }, { "epoch": 0.33972153462135246, "grad_norm": 2.575493335723877, "learning_rate": 7.688482330118047e-06, "loss": 0.5517, "step": 8183 }, { "epoch": 0.3397630501455638, "grad_norm": 2.6872291564941406, "learning_rate": 7.687915451321234e-06, "loss": 0.5226, "step": 8184 }, { "epoch": 0.3398045656697751, "grad_norm": 2.723613977432251, "learning_rate": 7.687348523926134e-06, "loss": 0.5041, "step": 8185 }, { "epoch": 0.33984608119398646, "grad_norm": 2.8811442852020264, "learning_rate": 7.686781547942998e-06, "loss": 0.545, "step": 8186 }, { "epoch": 0.3398875967181978, "grad_norm": 2.4036099910736084, "learning_rate": 7.686214523382078e-06, "loss": 0.5463, "step": 8187 }, { "epoch": 0.3399291122424091, "grad_norm": 2.6843624114990234, "learning_rate": 7.685647450253624e-06, "loss": 0.564, "step": 8188 }, { "epoch": 0.33997062776662046, "grad_norm": 2.284777879714966, "learning_rate": 7.68508032856789e-06, "loss": 0.5256, "step": 8189 }, { "epoch": 0.3400121432908318, "grad_norm": 2.0326576232910156, "learning_rate": 7.68451315833513e-06, "loss": 0.5854, "step": 8190 }, { "epoch": 0.3400536588150431, "grad_norm": 2.6047604084014893, "learning_rate": 7.683945939565599e-06, "loss": 0.4582, "step": 8191 }, { "epoch": 0.34009517433925446, "grad_norm": 2.4457812309265137, "learning_rate": 7.68337867226955e-06, "loss": 0.5934, "step": 8192 }, { "epoch": 0.34013668986346585, "grad_norm": 2.849869966506958, "learning_rate": 7.682811356457245e-06, "loss": 0.6193, "step": 8193 }, { "epoch": 0.3401782053876772, "grad_norm": 2.3815252780914307, "learning_rate": 7.682243992138934e-06, "loss": 0.515, "step": 8194 }, { "epoch": 0.3402197209118885, "grad_norm": 2.9163239002227783, "learning_rate": 7.68167657932488e-06, "loss": 0.5783, "step": 8195 }, { "epoch": 0.34026123643609985, "grad_norm": 2.4533395767211914, "learning_rate": 7.68110911802534e-06, "loss": 0.507, "step": 8196 }, { "epoch": 0.3403027519603112, "grad_norm": 2.4811713695526123, "learning_rate": 7.680541608250574e-06, "loss": 0.5766, "step": 8197 }, { "epoch": 0.3403442674845225, "grad_norm": 2.5136561393737793, "learning_rate": 7.679974050010843e-06, "loss": 0.4568, "step": 8198 }, { "epoch": 0.34038578300873384, "grad_norm": 2.1756279468536377, "learning_rate": 7.67940644331641e-06, "loss": 0.3825, "step": 8199 }, { "epoch": 0.3404272985329452, "grad_norm": 2.0486505031585693, "learning_rate": 7.678838788177534e-06, "loss": 0.4496, "step": 8200 }, { "epoch": 0.3404688140571565, "grad_norm": 2.7073707580566406, "learning_rate": 7.678271084604483e-06, "loss": 0.5118, "step": 8201 }, { "epoch": 0.34051032958136784, "grad_norm": 2.499610662460327, "learning_rate": 7.677703332607518e-06, "loss": 0.5645, "step": 8202 }, { "epoch": 0.3405518451055792, "grad_norm": 2.5082268714904785, "learning_rate": 7.677135532196905e-06, "loss": 0.6848, "step": 8203 }, { "epoch": 0.3405933606297905, "grad_norm": 2.5637283325195312, "learning_rate": 7.676567683382909e-06, "loss": 0.5791, "step": 8204 }, { "epoch": 0.34063487615400184, "grad_norm": 2.2283480167388916, "learning_rate": 7.675999786175798e-06, "loss": 0.6128, "step": 8205 }, { "epoch": 0.3406763916782132, "grad_norm": 2.465461015701294, "learning_rate": 7.675431840585842e-06, "loss": 0.6018, "step": 8206 }, { "epoch": 0.3407179072024245, "grad_norm": 2.8140761852264404, "learning_rate": 7.674863846623304e-06, "loss": 0.4238, "step": 8207 }, { "epoch": 0.34075942272663584, "grad_norm": 2.6187102794647217, "learning_rate": 7.674295804298458e-06, "loss": 0.6338, "step": 8208 }, { "epoch": 0.3408009382508472, "grad_norm": 3.1461119651794434, "learning_rate": 7.673727713621573e-06, "loss": 0.519, "step": 8209 }, { "epoch": 0.3408424537750585, "grad_norm": 2.5199127197265625, "learning_rate": 7.67315957460292e-06, "loss": 0.5665, "step": 8210 }, { "epoch": 0.34088396929926984, "grad_norm": 2.2595393657684326, "learning_rate": 7.672591387252773e-06, "loss": 0.4604, "step": 8211 }, { "epoch": 0.3409254848234812, "grad_norm": 2.9842231273651123, "learning_rate": 7.672023151581401e-06, "loss": 0.5056, "step": 8212 }, { "epoch": 0.3409670003476925, "grad_norm": 2.553067922592163, "learning_rate": 7.671454867599082e-06, "loss": 0.4728, "step": 8213 }, { "epoch": 0.34100851587190384, "grad_norm": 3.0160744190216064, "learning_rate": 7.670886535316086e-06, "loss": 0.562, "step": 8214 }, { "epoch": 0.34105003139611517, "grad_norm": 2.072042465209961, "learning_rate": 7.670318154742695e-06, "loss": 0.4881, "step": 8215 }, { "epoch": 0.3410915469203265, "grad_norm": 2.6652560234069824, "learning_rate": 7.669749725889182e-06, "loss": 0.4347, "step": 8216 }, { "epoch": 0.34113306244453784, "grad_norm": 2.380056858062744, "learning_rate": 7.669181248765822e-06, "loss": 0.4568, "step": 8217 }, { "epoch": 0.34117457796874917, "grad_norm": 2.1570701599121094, "learning_rate": 7.668612723382897e-06, "loss": 0.5492, "step": 8218 }, { "epoch": 0.3412160934929605, "grad_norm": 2.521841049194336, "learning_rate": 7.668044149750686e-06, "loss": 0.4749, "step": 8219 }, { "epoch": 0.34125760901717184, "grad_norm": 2.919328212738037, "learning_rate": 7.667475527879466e-06, "loss": 0.4691, "step": 8220 }, { "epoch": 0.34129912454138317, "grad_norm": 2.313978433609009, "learning_rate": 7.666906857779521e-06, "loss": 0.4925, "step": 8221 }, { "epoch": 0.3413406400655945, "grad_norm": 2.2817745208740234, "learning_rate": 7.66633813946113e-06, "loss": 0.6096, "step": 8222 }, { "epoch": 0.34138215558980584, "grad_norm": 3.3683454990386963, "learning_rate": 7.665769372934577e-06, "loss": 0.5366, "step": 8223 }, { "epoch": 0.3414236711140172, "grad_norm": 2.7219362258911133, "learning_rate": 7.665200558210148e-06, "loss": 0.5183, "step": 8224 }, { "epoch": 0.34146518663822856, "grad_norm": 2.3602969646453857, "learning_rate": 7.664631695298122e-06, "loss": 0.6625, "step": 8225 }, { "epoch": 0.3415067021624399, "grad_norm": 2.376880168914795, "learning_rate": 7.664062784208787e-06, "loss": 0.5192, "step": 8226 }, { "epoch": 0.3415482176866512, "grad_norm": 3.0320727825164795, "learning_rate": 7.66349382495243e-06, "loss": 0.5977, "step": 8227 }, { "epoch": 0.34158973321086256, "grad_norm": 2.433176040649414, "learning_rate": 7.662924817539336e-06, "loss": 0.5598, "step": 8228 }, { "epoch": 0.3416312487350739, "grad_norm": 2.786884069442749, "learning_rate": 7.662355761979794e-06, "loss": 0.4702, "step": 8229 }, { "epoch": 0.3416727642592852, "grad_norm": 2.2804505825042725, "learning_rate": 7.661786658284095e-06, "loss": 0.6068, "step": 8230 }, { "epoch": 0.34171427978349656, "grad_norm": 2.3127424716949463, "learning_rate": 7.661217506462521e-06, "loss": 0.4126, "step": 8231 }, { "epoch": 0.3417557953077079, "grad_norm": 2.2177791595458984, "learning_rate": 7.660648306525371e-06, "loss": 0.5472, "step": 8232 }, { "epoch": 0.3417973108319192, "grad_norm": 2.404306411743164, "learning_rate": 7.660079058482931e-06, "loss": 0.5015, "step": 8233 }, { "epoch": 0.34183882635613055, "grad_norm": 2.6609489917755127, "learning_rate": 7.659509762345497e-06, "loss": 0.371, "step": 8234 }, { "epoch": 0.3418803418803419, "grad_norm": 2.758307456970215, "learning_rate": 7.658940418123359e-06, "loss": 0.6142, "step": 8235 }, { "epoch": 0.3419218574045532, "grad_norm": 2.839285135269165, "learning_rate": 7.658371025826812e-06, "loss": 0.3751, "step": 8236 }, { "epoch": 0.34196337292876455, "grad_norm": 2.7376608848571777, "learning_rate": 7.657801585466148e-06, "loss": 0.523, "step": 8237 }, { "epoch": 0.3420048884529759, "grad_norm": 2.5145411491394043, "learning_rate": 7.657232097051669e-06, "loss": 0.5608, "step": 8238 }, { "epoch": 0.3420464039771872, "grad_norm": 2.7097480297088623, "learning_rate": 7.656662560593666e-06, "loss": 0.5469, "step": 8239 }, { "epoch": 0.34208791950139855, "grad_norm": 2.768249988555908, "learning_rate": 7.656092976102438e-06, "loss": 0.5219, "step": 8240 }, { "epoch": 0.3421294350256099, "grad_norm": 2.360104560852051, "learning_rate": 7.655523343588284e-06, "loss": 0.3894, "step": 8241 }, { "epoch": 0.3421709505498212, "grad_norm": 2.3852710723876953, "learning_rate": 7.654953663061503e-06, "loss": 0.4463, "step": 8242 }, { "epoch": 0.34221246607403255, "grad_norm": 3.8917365074157715, "learning_rate": 7.654383934532395e-06, "loss": 0.7354, "step": 8243 }, { "epoch": 0.3422539815982439, "grad_norm": 2.203098773956299, "learning_rate": 7.653814158011258e-06, "loss": 0.5269, "step": 8244 }, { "epoch": 0.3422954971224552, "grad_norm": 2.5727696418762207, "learning_rate": 7.653244333508398e-06, "loss": 0.5599, "step": 8245 }, { "epoch": 0.34233701264666655, "grad_norm": 3.354316234588623, "learning_rate": 7.652674461034113e-06, "loss": 0.5091, "step": 8246 }, { "epoch": 0.3423785281708779, "grad_norm": 2.4384331703186035, "learning_rate": 7.652104540598712e-06, "loss": 0.4647, "step": 8247 }, { "epoch": 0.3424200436950892, "grad_norm": 2.1965792179107666, "learning_rate": 7.651534572212499e-06, "loss": 0.5626, "step": 8248 }, { "epoch": 0.34246155921930055, "grad_norm": 2.337211847305298, "learning_rate": 7.650964555885772e-06, "loss": 0.4718, "step": 8249 }, { "epoch": 0.3425030747435119, "grad_norm": 2.4479663372039795, "learning_rate": 7.650394491628843e-06, "loss": 0.548, "step": 8250 }, { "epoch": 0.3425445902677232, "grad_norm": 2.223954916000366, "learning_rate": 7.649824379452017e-06, "loss": 0.4111, "step": 8251 }, { "epoch": 0.34258610579193455, "grad_norm": 2.4604976177215576, "learning_rate": 7.649254219365605e-06, "loss": 0.6335, "step": 8252 }, { "epoch": 0.3426276213161459, "grad_norm": 2.4006588459014893, "learning_rate": 7.64868401137991e-06, "loss": 0.6049, "step": 8253 }, { "epoch": 0.3426691368403572, "grad_norm": 2.1876211166381836, "learning_rate": 7.648113755505246e-06, "loss": 0.4287, "step": 8254 }, { "epoch": 0.3427106523645686, "grad_norm": 2.6677799224853516, "learning_rate": 7.647543451751922e-06, "loss": 0.6751, "step": 8255 }, { "epoch": 0.34275216788877994, "grad_norm": 2.3138813972473145, "learning_rate": 7.646973100130251e-06, "loss": 0.5628, "step": 8256 }, { "epoch": 0.34279368341299127, "grad_norm": 2.6883561611175537, "learning_rate": 7.64640270065054e-06, "loss": 0.4766, "step": 8257 }, { "epoch": 0.3428351989372026, "grad_norm": 2.763394832611084, "learning_rate": 7.645832253323109e-06, "loss": 0.5107, "step": 8258 }, { "epoch": 0.34287671446141393, "grad_norm": 2.1709513664245605, "learning_rate": 7.645261758158264e-06, "loss": 0.4218, "step": 8259 }, { "epoch": 0.34291822998562527, "grad_norm": 2.6149117946624756, "learning_rate": 7.644691215166328e-06, "loss": 0.5451, "step": 8260 }, { "epoch": 0.3429597455098366, "grad_norm": 2.582488536834717, "learning_rate": 7.644120624357611e-06, "loss": 0.4247, "step": 8261 }, { "epoch": 0.34300126103404793, "grad_norm": 2.685837745666504, "learning_rate": 7.64354998574243e-06, "loss": 0.5618, "step": 8262 }, { "epoch": 0.34304277655825927, "grad_norm": 2.5810887813568115, "learning_rate": 7.642979299331105e-06, "loss": 0.5787, "step": 8263 }, { "epoch": 0.3430842920824706, "grad_norm": 2.4741342067718506, "learning_rate": 7.642408565133952e-06, "loss": 0.4799, "step": 8264 }, { "epoch": 0.34312580760668193, "grad_norm": 2.4177334308624268, "learning_rate": 7.64183778316129e-06, "loss": 0.6499, "step": 8265 }, { "epoch": 0.34316732313089326, "grad_norm": 3.2165145874023438, "learning_rate": 7.641266953423441e-06, "loss": 0.4858, "step": 8266 }, { "epoch": 0.3432088386551046, "grad_norm": 2.165818691253662, "learning_rate": 7.640696075930722e-06, "loss": 0.5118, "step": 8267 }, { "epoch": 0.34325035417931593, "grad_norm": 3.6597094535827637, "learning_rate": 7.640125150693458e-06, "loss": 0.5301, "step": 8268 }, { "epoch": 0.34329186970352726, "grad_norm": 2.477398633956909, "learning_rate": 7.639554177721971e-06, "loss": 0.3597, "step": 8269 }, { "epoch": 0.3433333852277386, "grad_norm": 2.529254198074341, "learning_rate": 7.63898315702658e-06, "loss": 0.4615, "step": 8270 }, { "epoch": 0.34337490075194993, "grad_norm": 2.4494118690490723, "learning_rate": 7.638412088617618e-06, "loss": 0.5267, "step": 8271 }, { "epoch": 0.34341641627616126, "grad_norm": 2.3146183490753174, "learning_rate": 7.6378409725054e-06, "loss": 0.6199, "step": 8272 }, { "epoch": 0.3434579318003726, "grad_norm": 2.806684732437134, "learning_rate": 7.63726980870026e-06, "loss": 0.5001, "step": 8273 }, { "epoch": 0.34349944732458393, "grad_norm": 2.389240026473999, "learning_rate": 7.636698597212523e-06, "loss": 0.5463, "step": 8274 }, { "epoch": 0.34354096284879526, "grad_norm": 2.451887369155884, "learning_rate": 7.636127338052513e-06, "loss": 0.561, "step": 8275 }, { "epoch": 0.3435824783730066, "grad_norm": 2.0747697353363037, "learning_rate": 7.635556031230562e-06, "loss": 0.6185, "step": 8276 }, { "epoch": 0.3436239938972179, "grad_norm": 2.324580669403076, "learning_rate": 7.634984676756997e-06, "loss": 0.527, "step": 8277 }, { "epoch": 0.34366550942142926, "grad_norm": 2.6389665603637695, "learning_rate": 7.63441327464215e-06, "loss": 0.5369, "step": 8278 }, { "epoch": 0.3437070249456406, "grad_norm": 3.6427853107452393, "learning_rate": 7.633841824896351e-06, "loss": 0.4475, "step": 8279 }, { "epoch": 0.3437485404698519, "grad_norm": 3.036748170852661, "learning_rate": 7.633270327529936e-06, "loss": 0.5897, "step": 8280 }, { "epoch": 0.34379005599406326, "grad_norm": 2.7041542530059814, "learning_rate": 7.632698782553232e-06, "loss": 0.5367, "step": 8281 }, { "epoch": 0.3438315715182746, "grad_norm": 2.373812198638916, "learning_rate": 7.632127189976574e-06, "loss": 0.5044, "step": 8282 }, { "epoch": 0.3438730870424859, "grad_norm": 2.479952335357666, "learning_rate": 7.6315555498103e-06, "loss": 0.5733, "step": 8283 }, { "epoch": 0.34391460256669726, "grad_norm": 2.122206211090088, "learning_rate": 7.630983862064742e-06, "loss": 0.4773, "step": 8284 }, { "epoch": 0.34395611809090865, "grad_norm": 2.3830435276031494, "learning_rate": 7.630412126750237e-06, "loss": 0.419, "step": 8285 }, { "epoch": 0.34399763361512, "grad_norm": 2.784605026245117, "learning_rate": 7.629840343877123e-06, "loss": 0.6651, "step": 8286 }, { "epoch": 0.3440391491393313, "grad_norm": 5.454967975616455, "learning_rate": 7.629268513455739e-06, "loss": 0.499, "step": 8287 }, { "epoch": 0.34408066466354265, "grad_norm": 2.7446184158325195, "learning_rate": 7.628696635496422e-06, "loss": 0.4946, "step": 8288 }, { "epoch": 0.344122180187754, "grad_norm": 2.6358954906463623, "learning_rate": 7.62812471000951e-06, "loss": 0.4326, "step": 8289 }, { "epoch": 0.3441636957119653, "grad_norm": 2.397789478302002, "learning_rate": 7.627552737005348e-06, "loss": 0.5655, "step": 8290 }, { "epoch": 0.34420521123617664, "grad_norm": 2.6489531993865967, "learning_rate": 7.626980716494274e-06, "loss": 0.6501, "step": 8291 }, { "epoch": 0.344246726760388, "grad_norm": 2.4047629833221436, "learning_rate": 7.626408648486632e-06, "loss": 0.5028, "step": 8292 }, { "epoch": 0.3442882422845993, "grad_norm": 2.312814474105835, "learning_rate": 7.625836532992764e-06, "loss": 0.5294, "step": 8293 }, { "epoch": 0.34432975780881064, "grad_norm": 2.7650537490844727, "learning_rate": 7.625264370023015e-06, "loss": 0.4551, "step": 8294 }, { "epoch": 0.344371273333022, "grad_norm": 2.7121775150299072, "learning_rate": 7.62469215958773e-06, "loss": 0.6266, "step": 8295 }, { "epoch": 0.3444127888572333, "grad_norm": 3.249945640563965, "learning_rate": 7.6241199016972535e-06, "loss": 0.422, "step": 8296 }, { "epoch": 0.34445430438144464, "grad_norm": 2.4510104656219482, "learning_rate": 7.623547596361933e-06, "loss": 0.4467, "step": 8297 }, { "epoch": 0.344495819905656, "grad_norm": 2.3481523990631104, "learning_rate": 7.6229752435921165e-06, "loss": 0.5282, "step": 8298 }, { "epoch": 0.3445373354298673, "grad_norm": 2.8396553993225098, "learning_rate": 7.6224028433981515e-06, "loss": 0.4011, "step": 8299 }, { "epoch": 0.34457885095407864, "grad_norm": 2.244245767593384, "learning_rate": 7.621830395790386e-06, "loss": 0.483, "step": 8300 }, { "epoch": 0.34462036647829, "grad_norm": 2.1456007957458496, "learning_rate": 7.621257900779173e-06, "loss": 0.4908, "step": 8301 }, { "epoch": 0.3446618820025013, "grad_norm": 2.5959558486938477, "learning_rate": 7.62068535837486e-06, "loss": 0.5342, "step": 8302 }, { "epoch": 0.34470339752671264, "grad_norm": 2.7076921463012695, "learning_rate": 7.620112768587799e-06, "loss": 0.4104, "step": 8303 }, { "epoch": 0.344744913050924, "grad_norm": 2.4912619590759277, "learning_rate": 7.619540131428348e-06, "loss": 0.5607, "step": 8304 }, { "epoch": 0.3447864285751353, "grad_norm": 3.1083743572235107, "learning_rate": 7.618967446906853e-06, "loss": 0.5823, "step": 8305 }, { "epoch": 0.34482794409934664, "grad_norm": 2.260319948196411, "learning_rate": 7.618394715033675e-06, "loss": 0.5147, "step": 8306 }, { "epoch": 0.344869459623558, "grad_norm": 2.6796905994415283, "learning_rate": 7.617821935819163e-06, "loss": 0.5207, "step": 8307 }, { "epoch": 0.3449109751477693, "grad_norm": 2.234097957611084, "learning_rate": 7.617249109273677e-06, "loss": 0.4009, "step": 8308 }, { "epoch": 0.34495249067198064, "grad_norm": 2.7166998386383057, "learning_rate": 7.616676235407572e-06, "loss": 0.6295, "step": 8309 }, { "epoch": 0.34499400619619197, "grad_norm": 2.759024143218994, "learning_rate": 7.616103314231207e-06, "loss": 0.4198, "step": 8310 }, { "epoch": 0.3450355217204033, "grad_norm": 2.787398338317871, "learning_rate": 7.61553034575494e-06, "loss": 0.5861, "step": 8311 }, { "epoch": 0.34507703724461464, "grad_norm": 2.5492782592773438, "learning_rate": 7.61495732998913e-06, "loss": 0.6095, "step": 8312 }, { "epoch": 0.34511855276882597, "grad_norm": 2.5910773277282715, "learning_rate": 7.614384266944139e-06, "loss": 0.5675, "step": 8313 }, { "epoch": 0.3451600682930373, "grad_norm": 1.8444143533706665, "learning_rate": 7.6138111566303264e-06, "loss": 0.3863, "step": 8314 }, { "epoch": 0.34520158381724864, "grad_norm": 3.350965738296509, "learning_rate": 7.6132379990580545e-06, "loss": 0.6562, "step": 8315 }, { "epoch": 0.34524309934146, "grad_norm": 2.2588493824005127, "learning_rate": 7.612664794237685e-06, "loss": 0.5323, "step": 8316 }, { "epoch": 0.34528461486567136, "grad_norm": 2.4456090927124023, "learning_rate": 7.612091542179585e-06, "loss": 0.5407, "step": 8317 }, { "epoch": 0.3453261303898827, "grad_norm": 2.721848726272583, "learning_rate": 7.611518242894115e-06, "loss": 0.564, "step": 8318 }, { "epoch": 0.345367645914094, "grad_norm": 2.622255325317383, "learning_rate": 7.610944896391644e-06, "loss": 0.5378, "step": 8319 }, { "epoch": 0.34540916143830536, "grad_norm": 2.567996025085449, "learning_rate": 7.610371502682537e-06, "loss": 0.4558, "step": 8320 }, { "epoch": 0.3454506769625167, "grad_norm": 2.3052988052368164, "learning_rate": 7.60979806177716e-06, "loss": 0.5383, "step": 8321 }, { "epoch": 0.345492192486728, "grad_norm": 2.8636200428009033, "learning_rate": 7.609224573685881e-06, "loss": 0.4847, "step": 8322 }, { "epoch": 0.34553370801093936, "grad_norm": 2.3066909313201904, "learning_rate": 7.608651038419071e-06, "loss": 0.6176, "step": 8323 }, { "epoch": 0.3455752235351507, "grad_norm": 2.8186864852905273, "learning_rate": 7.608077455987099e-06, "loss": 0.5765, "step": 8324 }, { "epoch": 0.345616739059362, "grad_norm": 2.9192283153533936, "learning_rate": 7.607503826400333e-06, "loss": 0.5528, "step": 8325 }, { "epoch": 0.34565825458357335, "grad_norm": 3.3433022499084473, "learning_rate": 7.6069301496691475e-06, "loss": 0.472, "step": 8326 }, { "epoch": 0.3456997701077847, "grad_norm": 2.77390456199646, "learning_rate": 7.606356425803913e-06, "loss": 0.5784, "step": 8327 }, { "epoch": 0.345741285631996, "grad_norm": 2.5443227291107178, "learning_rate": 7.605782654815004e-06, "loss": 0.4983, "step": 8328 }, { "epoch": 0.34578280115620735, "grad_norm": 2.0729193687438965, "learning_rate": 7.605208836712792e-06, "loss": 0.3852, "step": 8329 }, { "epoch": 0.3458243166804187, "grad_norm": 2.243292808532715, "learning_rate": 7.604634971507654e-06, "loss": 0.5879, "step": 8330 }, { "epoch": 0.34586583220463, "grad_norm": 2.7263760566711426, "learning_rate": 7.604061059209965e-06, "loss": 0.4177, "step": 8331 }, { "epoch": 0.34590734772884135, "grad_norm": 2.4784305095672607, "learning_rate": 7.6034870998301025e-06, "loss": 0.4871, "step": 8332 }, { "epoch": 0.3459488632530527, "grad_norm": 2.684152603149414, "learning_rate": 7.602913093378441e-06, "loss": 0.4459, "step": 8333 }, { "epoch": 0.345990378777264, "grad_norm": 4.496871471405029, "learning_rate": 7.602339039865362e-06, "loss": 0.5431, "step": 8334 }, { "epoch": 0.34603189430147535, "grad_norm": 2.178272247314453, "learning_rate": 7.601764939301242e-06, "loss": 0.4532, "step": 8335 }, { "epoch": 0.3460734098256867, "grad_norm": 3.139399290084839, "learning_rate": 7.601190791696462e-06, "loss": 0.5471, "step": 8336 }, { "epoch": 0.346114925349898, "grad_norm": 2.556770086288452, "learning_rate": 7.6006165970614045e-06, "loss": 0.4605, "step": 8337 }, { "epoch": 0.34615644087410935, "grad_norm": 2.2966740131378174, "learning_rate": 7.6000423554064484e-06, "loss": 0.444, "step": 8338 }, { "epoch": 0.3461979563983207, "grad_norm": 2.289322853088379, "learning_rate": 7.599468066741976e-06, "loss": 0.5616, "step": 8339 }, { "epoch": 0.346239471922532, "grad_norm": 2.2489333152770996, "learning_rate": 7.598893731078374e-06, "loss": 0.4961, "step": 8340 }, { "epoch": 0.34628098744674335, "grad_norm": 2.033203601837158, "learning_rate": 7.598319348426022e-06, "loss": 0.5128, "step": 8341 }, { "epoch": 0.3463225029709547, "grad_norm": 2.5019350051879883, "learning_rate": 7.597744918795307e-06, "loss": 0.5529, "step": 8342 }, { "epoch": 0.346364018495166, "grad_norm": 2.6407649517059326, "learning_rate": 7.5971704421966175e-06, "loss": 0.5525, "step": 8343 }, { "epoch": 0.34640553401937735, "grad_norm": 1.8701286315917969, "learning_rate": 7.596595918640335e-06, "loss": 0.4727, "step": 8344 }, { "epoch": 0.3464470495435887, "grad_norm": 2.6025140285491943, "learning_rate": 7.596021348136853e-06, "loss": 0.4435, "step": 8345 }, { "epoch": 0.3464885650678, "grad_norm": 2.12654447555542, "learning_rate": 7.595446730696554e-06, "loss": 0.4626, "step": 8346 }, { "epoch": 0.3465300805920114, "grad_norm": 2.793283224105835, "learning_rate": 7.594872066329831e-06, "loss": 0.4759, "step": 8347 }, { "epoch": 0.34657159611622274, "grad_norm": 2.991379976272583, "learning_rate": 7.594297355047073e-06, "loss": 0.4857, "step": 8348 }, { "epoch": 0.34661311164043407, "grad_norm": 2.4536123275756836, "learning_rate": 7.593722596858671e-06, "loss": 0.5287, "step": 8349 }, { "epoch": 0.3466546271646454, "grad_norm": 3.3881309032440186, "learning_rate": 7.593147791775019e-06, "loss": 0.5741, "step": 8350 }, { "epoch": 0.34669614268885673, "grad_norm": 2.819523572921753, "learning_rate": 7.592572939806505e-06, "loss": 0.6374, "step": 8351 }, { "epoch": 0.34673765821306807, "grad_norm": 2.645601987838745, "learning_rate": 7.591998040963525e-06, "loss": 0.4876, "step": 8352 }, { "epoch": 0.3467791737372794, "grad_norm": 2.368762493133545, "learning_rate": 7.591423095256475e-06, "loss": 0.4304, "step": 8353 }, { "epoch": 0.34682068926149073, "grad_norm": 2.285090684890747, "learning_rate": 7.590848102695747e-06, "loss": 0.5477, "step": 8354 }, { "epoch": 0.34686220478570207, "grad_norm": 2.5590929985046387, "learning_rate": 7.5902730632917395e-06, "loss": 0.4957, "step": 8355 }, { "epoch": 0.3469037203099134, "grad_norm": 2.314516544342041, "learning_rate": 7.589697977054847e-06, "loss": 0.4318, "step": 8356 }, { "epoch": 0.34694523583412473, "grad_norm": 2.5126452445983887, "learning_rate": 7.5891228439954686e-06, "loss": 0.5157, "step": 8357 }, { "epoch": 0.34698675135833607, "grad_norm": 2.948119878768921, "learning_rate": 7.588547664124005e-06, "loss": 0.5188, "step": 8358 }, { "epoch": 0.3470282668825474, "grad_norm": 2.51371169090271, "learning_rate": 7.587972437450853e-06, "loss": 0.626, "step": 8359 }, { "epoch": 0.34706978240675873, "grad_norm": 2.3514466285705566, "learning_rate": 7.587397163986412e-06, "loss": 0.6071, "step": 8360 }, { "epoch": 0.34711129793097006, "grad_norm": 2.888709306716919, "learning_rate": 7.586821843741086e-06, "loss": 0.5131, "step": 8361 }, { "epoch": 0.3471528134551814, "grad_norm": 2.4416208267211914, "learning_rate": 7.5862464767252744e-06, "loss": 0.5425, "step": 8362 }, { "epoch": 0.34719432897939273, "grad_norm": 2.891040086746216, "learning_rate": 7.585671062949382e-06, "loss": 0.5324, "step": 8363 }, { "epoch": 0.34723584450360406, "grad_norm": 2.9327845573425293, "learning_rate": 7.585095602423811e-06, "loss": 0.5233, "step": 8364 }, { "epoch": 0.3472773600278154, "grad_norm": 2.6013317108154297, "learning_rate": 7.584520095158966e-06, "loss": 0.4597, "step": 8365 }, { "epoch": 0.34731887555202673, "grad_norm": 2.9285318851470947, "learning_rate": 7.583944541165254e-06, "loss": 0.4244, "step": 8366 }, { "epoch": 0.34736039107623806, "grad_norm": 2.079883575439453, "learning_rate": 7.583368940453078e-06, "loss": 0.3692, "step": 8367 }, { "epoch": 0.3474019066004494, "grad_norm": 1.8158191442489624, "learning_rate": 7.5827932930328485e-06, "loss": 0.4368, "step": 8368 }, { "epoch": 0.34744342212466073, "grad_norm": 2.3356897830963135, "learning_rate": 7.5822175989149715e-06, "loss": 0.4311, "step": 8369 }, { "epoch": 0.34748493764887206, "grad_norm": 2.548931121826172, "learning_rate": 7.581641858109855e-06, "loss": 0.6248, "step": 8370 }, { "epoch": 0.3475264531730834, "grad_norm": 2.552832841873169, "learning_rate": 7.5810660706279115e-06, "loss": 0.5892, "step": 8371 }, { "epoch": 0.3475679686972947, "grad_norm": 2.435609817504883, "learning_rate": 7.580490236479547e-06, "loss": 0.5609, "step": 8372 }, { "epoch": 0.34760948422150606, "grad_norm": 2.30208158493042, "learning_rate": 7.579914355675177e-06, "loss": 0.4973, "step": 8373 }, { "epoch": 0.3476509997457174, "grad_norm": 2.8409154415130615, "learning_rate": 7.579338428225211e-06, "loss": 0.5435, "step": 8374 }, { "epoch": 0.3476925152699287, "grad_norm": 2.1686758995056152, "learning_rate": 7.578762454140064e-06, "loss": 0.5222, "step": 8375 }, { "epoch": 0.34773403079414006, "grad_norm": 2.565023899078369, "learning_rate": 7.578186433430148e-06, "loss": 0.6503, "step": 8376 }, { "epoch": 0.3477755463183514, "grad_norm": 2.5032403469085693, "learning_rate": 7.577610366105879e-06, "loss": 0.3921, "step": 8377 }, { "epoch": 0.3478170618425628, "grad_norm": 3.051703691482544, "learning_rate": 7.577034252177671e-06, "loss": 0.5075, "step": 8378 }, { "epoch": 0.3478585773667741, "grad_norm": 2.488956928253174, "learning_rate": 7.5764580916559405e-06, "loss": 0.5338, "step": 8379 }, { "epoch": 0.34790009289098545, "grad_norm": 2.09333872795105, "learning_rate": 7.575881884551104e-06, "loss": 0.6195, "step": 8380 }, { "epoch": 0.3479416084151968, "grad_norm": 2.6161186695098877, "learning_rate": 7.575305630873582e-06, "loss": 0.3822, "step": 8381 }, { "epoch": 0.3479831239394081, "grad_norm": 3.0357556343078613, "learning_rate": 7.574729330633792e-06, "loss": 0.4186, "step": 8382 }, { "epoch": 0.34802463946361945, "grad_norm": 2.8555641174316406, "learning_rate": 7.574152983842152e-06, "loss": 0.5539, "step": 8383 }, { "epoch": 0.3480661549878308, "grad_norm": 2.180387496948242, "learning_rate": 7.573576590509086e-06, "loss": 0.6271, "step": 8384 }, { "epoch": 0.3481076705120421, "grad_norm": 2.6043989658355713, "learning_rate": 7.573000150645012e-06, "loss": 0.6209, "step": 8385 }, { "epoch": 0.34814918603625344, "grad_norm": 2.0974032878875732, "learning_rate": 7.572423664260355e-06, "loss": 0.51, "step": 8386 }, { "epoch": 0.3481907015604648, "grad_norm": 2.405820846557617, "learning_rate": 7.571847131365536e-06, "loss": 0.5724, "step": 8387 }, { "epoch": 0.3482322170846761, "grad_norm": 2.582127332687378, "learning_rate": 7.5712705519709795e-06, "loss": 0.5489, "step": 8388 }, { "epoch": 0.34827373260888744, "grad_norm": 2.3827691078186035, "learning_rate": 7.570693926087112e-06, "loss": 0.5653, "step": 8389 }, { "epoch": 0.3483152481330988, "grad_norm": 1.876692533493042, "learning_rate": 7.570117253724356e-06, "loss": 0.3201, "step": 8390 }, { "epoch": 0.3483567636573101, "grad_norm": 2.209625720977783, "learning_rate": 7.569540534893139e-06, "loss": 0.6263, "step": 8391 }, { "epoch": 0.34839827918152144, "grad_norm": 2.2543792724609375, "learning_rate": 7.568963769603889e-06, "loss": 0.7091, "step": 8392 }, { "epoch": 0.3484397947057328, "grad_norm": 2.5891125202178955, "learning_rate": 7.568386957867033e-06, "loss": 0.533, "step": 8393 }, { "epoch": 0.3484813102299441, "grad_norm": 2.5616583824157715, "learning_rate": 7.567810099693e-06, "loss": 0.6141, "step": 8394 }, { "epoch": 0.34852282575415544, "grad_norm": 3.0374345779418945, "learning_rate": 7.56723319509222e-06, "loss": 0.6427, "step": 8395 }, { "epoch": 0.3485643412783668, "grad_norm": 2.3908658027648926, "learning_rate": 7.566656244075126e-06, "loss": 0.6945, "step": 8396 }, { "epoch": 0.3486058568025781, "grad_norm": 2.5909512042999268, "learning_rate": 7.566079246652147e-06, "loss": 0.4856, "step": 8397 }, { "epoch": 0.34864737232678944, "grad_norm": 2.6205875873565674, "learning_rate": 7.565502202833714e-06, "loss": 0.6082, "step": 8398 }, { "epoch": 0.3486888878510008, "grad_norm": 2.1810739040374756, "learning_rate": 7.5649251126302635e-06, "loss": 0.4649, "step": 8399 }, { "epoch": 0.3487304033752121, "grad_norm": 2.4191863536834717, "learning_rate": 7.5643479760522265e-06, "loss": 0.5548, "step": 8400 }, { "epoch": 0.34877191889942344, "grad_norm": 2.3159165382385254, "learning_rate": 7.563770793110039e-06, "loss": 0.6223, "step": 8401 }, { "epoch": 0.34881343442363477, "grad_norm": 2.069492816925049, "learning_rate": 7.563193563814137e-06, "loss": 0.5974, "step": 8402 }, { "epoch": 0.3488549499478461, "grad_norm": 2.408249616622925, "learning_rate": 7.562616288174957e-06, "loss": 0.4521, "step": 8403 }, { "epoch": 0.34889646547205744, "grad_norm": 2.5856661796569824, "learning_rate": 7.5620389662029346e-06, "loss": 0.4946, "step": 8404 }, { "epoch": 0.34893798099626877, "grad_norm": 3.106327533721924, "learning_rate": 7.5614615979085105e-06, "loss": 0.6438, "step": 8405 }, { "epoch": 0.3489794965204801, "grad_norm": 2.3852219581604004, "learning_rate": 7.560884183302122e-06, "loss": 0.5911, "step": 8406 }, { "epoch": 0.34902101204469144, "grad_norm": 2.4443531036376953, "learning_rate": 7.560306722394208e-06, "loss": 0.5289, "step": 8407 }, { "epoch": 0.34906252756890277, "grad_norm": 3.32191801071167, "learning_rate": 7.559729215195213e-06, "loss": 0.5514, "step": 8408 }, { "epoch": 0.34910404309311416, "grad_norm": 2.911681652069092, "learning_rate": 7.559151661715574e-06, "loss": 0.6142, "step": 8409 }, { "epoch": 0.3491455586173255, "grad_norm": 2.583688259124756, "learning_rate": 7.558574061965736e-06, "loss": 0.4472, "step": 8410 }, { "epoch": 0.3491870741415368, "grad_norm": 2.556936740875244, "learning_rate": 7.5579964159561415e-06, "loss": 0.6222, "step": 8411 }, { "epoch": 0.34922858966574816, "grad_norm": 2.7095696926116943, "learning_rate": 7.5574187236972344e-06, "loss": 0.4549, "step": 8412 }, { "epoch": 0.3492701051899595, "grad_norm": 2.209472417831421, "learning_rate": 7.556840985199459e-06, "loss": 0.512, "step": 8413 }, { "epoch": 0.3493116207141708, "grad_norm": 2.2715325355529785, "learning_rate": 7.556263200473263e-06, "loss": 0.5583, "step": 8414 }, { "epoch": 0.34935313623838216, "grad_norm": 2.434573173522949, "learning_rate": 7.555685369529091e-06, "loss": 0.4372, "step": 8415 }, { "epoch": 0.3493946517625935, "grad_norm": 2.2722890377044678, "learning_rate": 7.555107492377392e-06, "loss": 0.5433, "step": 8416 }, { "epoch": 0.3494361672868048, "grad_norm": 2.32552170753479, "learning_rate": 7.5545295690286105e-06, "loss": 0.4379, "step": 8417 }, { "epoch": 0.34947768281101615, "grad_norm": 2.25530743598938, "learning_rate": 7.5539515994932e-06, "loss": 0.5867, "step": 8418 }, { "epoch": 0.3495191983352275, "grad_norm": 3.0047385692596436, "learning_rate": 7.553373583781607e-06, "loss": 0.6035, "step": 8419 }, { "epoch": 0.3495607138594388, "grad_norm": 2.2992382049560547, "learning_rate": 7.552795521904286e-06, "loss": 0.5628, "step": 8420 }, { "epoch": 0.34960222938365015, "grad_norm": 2.4783742427825928, "learning_rate": 7.552217413871683e-06, "loss": 0.6095, "step": 8421 }, { "epoch": 0.3496437449078615, "grad_norm": 2.2851622104644775, "learning_rate": 7.551639259694255e-06, "loss": 0.5213, "step": 8422 }, { "epoch": 0.3496852604320728, "grad_norm": 2.200855016708374, "learning_rate": 7.551061059382455e-06, "loss": 0.5404, "step": 8423 }, { "epoch": 0.34972677595628415, "grad_norm": 2.328629732131958, "learning_rate": 7.550482812946734e-06, "loss": 0.5628, "step": 8424 }, { "epoch": 0.3497682914804955, "grad_norm": 2.2579140663146973, "learning_rate": 7.5499045203975505e-06, "loss": 0.4215, "step": 8425 }, { "epoch": 0.3498098070047068, "grad_norm": 2.2618744373321533, "learning_rate": 7.549326181745356e-06, "loss": 0.4611, "step": 8426 }, { "epoch": 0.34985132252891815, "grad_norm": 2.831294536590576, "learning_rate": 7.548747797000611e-06, "loss": 0.4171, "step": 8427 }, { "epoch": 0.3498928380531295, "grad_norm": 2.511530876159668, "learning_rate": 7.548169366173772e-06, "loss": 0.5895, "step": 8428 }, { "epoch": 0.3499343535773408, "grad_norm": 2.4463934898376465, "learning_rate": 7.547590889275295e-06, "loss": 0.5432, "step": 8429 }, { "epoch": 0.34997586910155215, "grad_norm": 3.3125579357147217, "learning_rate": 7.54701236631564e-06, "loss": 0.7739, "step": 8430 }, { "epoch": 0.3500173846257635, "grad_norm": 2.335998296737671, "learning_rate": 7.546433797305268e-06, "loss": 0.5278, "step": 8431 }, { "epoch": 0.3500589001499748, "grad_norm": 2.6911702156066895, "learning_rate": 7.545855182254638e-06, "loss": 0.4656, "step": 8432 }, { "epoch": 0.35010041567418615, "grad_norm": 2.1259877681732178, "learning_rate": 7.545276521174214e-06, "loss": 0.5297, "step": 8433 }, { "epoch": 0.3501419311983975, "grad_norm": 2.6795380115509033, "learning_rate": 7.544697814074456e-06, "loss": 0.5388, "step": 8434 }, { "epoch": 0.3501834467226088, "grad_norm": 1.8566625118255615, "learning_rate": 7.544119060965829e-06, "loss": 0.5767, "step": 8435 }, { "epoch": 0.35022496224682015, "grad_norm": 2.401379108428955, "learning_rate": 7.543540261858795e-06, "loss": 0.4606, "step": 8436 }, { "epoch": 0.3502664777710315, "grad_norm": 2.4390573501586914, "learning_rate": 7.542961416763819e-06, "loss": 0.54, "step": 8437 }, { "epoch": 0.3503079932952428, "grad_norm": 2.6196329593658447, "learning_rate": 7.5423825256913695e-06, "loss": 0.5704, "step": 8438 }, { "epoch": 0.35034950881945415, "grad_norm": 2.2026712894439697, "learning_rate": 7.541803588651911e-06, "loss": 0.4953, "step": 8439 }, { "epoch": 0.35039102434366554, "grad_norm": 2.2044076919555664, "learning_rate": 7.54122460565591e-06, "loss": 0.4753, "step": 8440 }, { "epoch": 0.35043253986787687, "grad_norm": 2.784193277359009, "learning_rate": 7.540645576713837e-06, "loss": 0.4112, "step": 8441 }, { "epoch": 0.3504740553920882, "grad_norm": 2.260280132293701, "learning_rate": 7.540066501836158e-06, "loss": 0.5396, "step": 8442 }, { "epoch": 0.35051557091629953, "grad_norm": 2.466240882873535, "learning_rate": 7.539487381033345e-06, "loss": 0.5887, "step": 8443 }, { "epoch": 0.35055708644051087, "grad_norm": 2.6388862133026123, "learning_rate": 7.53890821431587e-06, "loss": 0.4343, "step": 8444 }, { "epoch": 0.3505986019647222, "grad_norm": 2.503082036972046, "learning_rate": 7.5383290016942e-06, "loss": 0.5798, "step": 8445 }, { "epoch": 0.35064011748893353, "grad_norm": 2.371877431869507, "learning_rate": 7.5377497431788124e-06, "loss": 0.3828, "step": 8446 }, { "epoch": 0.35068163301314487, "grad_norm": 2.7223494052886963, "learning_rate": 7.537170438780177e-06, "loss": 0.4298, "step": 8447 }, { "epoch": 0.3507231485373562, "grad_norm": 2.843207359313965, "learning_rate": 7.536591088508769e-06, "loss": 0.588, "step": 8448 }, { "epoch": 0.35076466406156753, "grad_norm": 2.8676681518554688, "learning_rate": 7.536011692375061e-06, "loss": 0.5248, "step": 8449 }, { "epoch": 0.35080617958577887, "grad_norm": 2.1408603191375732, "learning_rate": 7.535432250389532e-06, "loss": 0.475, "step": 8450 }, { "epoch": 0.3508476951099902, "grad_norm": 3.0656912326812744, "learning_rate": 7.534852762562659e-06, "loss": 0.5636, "step": 8451 }, { "epoch": 0.35088921063420153, "grad_norm": 2.1749584674835205, "learning_rate": 7.534273228904916e-06, "loss": 0.4514, "step": 8452 }, { "epoch": 0.35093072615841286, "grad_norm": 3.0134875774383545, "learning_rate": 7.533693649426781e-06, "loss": 0.4951, "step": 8453 }, { "epoch": 0.3509722416826242, "grad_norm": 2.5061278343200684, "learning_rate": 7.533114024138737e-06, "loss": 0.4481, "step": 8454 }, { "epoch": 0.35101375720683553, "grad_norm": 2.4586198329925537, "learning_rate": 7.532534353051261e-06, "loss": 0.4874, "step": 8455 }, { "epoch": 0.35105527273104686, "grad_norm": 2.2269601821899414, "learning_rate": 7.531954636174832e-06, "loss": 0.4831, "step": 8456 }, { "epoch": 0.3510967882552582, "grad_norm": 2.7044174671173096, "learning_rate": 7.531374873519935e-06, "loss": 0.5192, "step": 8457 }, { "epoch": 0.35113830377946953, "grad_norm": 2.6273789405822754, "learning_rate": 7.53079506509705e-06, "loss": 0.5788, "step": 8458 }, { "epoch": 0.35117981930368086, "grad_norm": 2.131608247756958, "learning_rate": 7.530215210916662e-06, "loss": 0.5167, "step": 8459 }, { "epoch": 0.3512213348278922, "grad_norm": 2.733372449874878, "learning_rate": 7.529635310989252e-06, "loss": 0.4599, "step": 8460 }, { "epoch": 0.35126285035210353, "grad_norm": 2.65568208694458, "learning_rate": 7.5290553653253075e-06, "loss": 0.5432, "step": 8461 }, { "epoch": 0.35130436587631486, "grad_norm": 2.395775556564331, "learning_rate": 7.528475373935312e-06, "loss": 0.5226, "step": 8462 }, { "epoch": 0.3513458814005262, "grad_norm": 2.3929622173309326, "learning_rate": 7.527895336829754e-06, "loss": 0.5261, "step": 8463 }, { "epoch": 0.3513873969247375, "grad_norm": 2.9175755977630615, "learning_rate": 7.527315254019119e-06, "loss": 0.5003, "step": 8464 }, { "epoch": 0.35142891244894886, "grad_norm": 2.663388729095459, "learning_rate": 7.526735125513898e-06, "loss": 0.409, "step": 8465 }, { "epoch": 0.3514704279731602, "grad_norm": 2.2328710556030273, "learning_rate": 7.5261549513245735e-06, "loss": 0.5967, "step": 8466 }, { "epoch": 0.3515119434973715, "grad_norm": 2.2354736328125, "learning_rate": 7.525574731461643e-06, "loss": 0.5122, "step": 8467 }, { "epoch": 0.35155345902158286, "grad_norm": 2.6112656593322754, "learning_rate": 7.5249944659355925e-06, "loss": 0.5255, "step": 8468 }, { "epoch": 0.3515949745457942, "grad_norm": 3.1340744495391846, "learning_rate": 7.5244141547569135e-06, "loss": 0.6085, "step": 8469 }, { "epoch": 0.3516364900700055, "grad_norm": 2.413606882095337, "learning_rate": 7.5238337979361e-06, "loss": 0.434, "step": 8470 }, { "epoch": 0.3516780055942169, "grad_norm": 2.9196271896362305, "learning_rate": 7.523253395483643e-06, "loss": 0.5979, "step": 8471 }, { "epoch": 0.35171952111842825, "grad_norm": 3.00018048286438, "learning_rate": 7.522672947410037e-06, "loss": 0.6299, "step": 8472 }, { "epoch": 0.3517610366426396, "grad_norm": 2.6745474338531494, "learning_rate": 7.522092453725778e-06, "loss": 0.4815, "step": 8473 }, { "epoch": 0.3518025521668509, "grad_norm": 2.1984219551086426, "learning_rate": 7.52151191444136e-06, "loss": 0.4893, "step": 8474 }, { "epoch": 0.35184406769106225, "grad_norm": 2.464104175567627, "learning_rate": 7.520931329567279e-06, "loss": 0.565, "step": 8475 }, { "epoch": 0.3518855832152736, "grad_norm": 2.2831060886383057, "learning_rate": 7.520350699114034e-06, "loss": 0.5901, "step": 8476 }, { "epoch": 0.3519270987394849, "grad_norm": 2.335298776626587, "learning_rate": 7.519770023092121e-06, "loss": 0.4645, "step": 8477 }, { "epoch": 0.35196861426369624, "grad_norm": 2.2497575283050537, "learning_rate": 7.519189301512042e-06, "loss": 0.457, "step": 8478 }, { "epoch": 0.3520101297879076, "grad_norm": 3.0036182403564453, "learning_rate": 7.518608534384292e-06, "loss": 0.6418, "step": 8479 }, { "epoch": 0.3520516453121189, "grad_norm": 2.595585823059082, "learning_rate": 7.518027721719376e-06, "loss": 0.4924, "step": 8480 }, { "epoch": 0.35209316083633024, "grad_norm": 2.7931535243988037, "learning_rate": 7.517446863527791e-06, "loss": 0.5063, "step": 8481 }, { "epoch": 0.3521346763605416, "grad_norm": 2.7771103382110596, "learning_rate": 7.516865959820041e-06, "loss": 0.6188, "step": 8482 }, { "epoch": 0.3521761918847529, "grad_norm": 2.5605528354644775, "learning_rate": 7.51628501060663e-06, "loss": 0.4768, "step": 8483 }, { "epoch": 0.35221770740896424, "grad_norm": 2.548882246017456, "learning_rate": 7.51570401589806e-06, "loss": 0.5754, "step": 8484 }, { "epoch": 0.3522592229331756, "grad_norm": 2.653794050216675, "learning_rate": 7.515122975704838e-06, "loss": 0.3516, "step": 8485 }, { "epoch": 0.3523007384573869, "grad_norm": 3.190793991088867, "learning_rate": 7.514541890037466e-06, "loss": 0.4725, "step": 8486 }, { "epoch": 0.35234225398159824, "grad_norm": 2.141592264175415, "learning_rate": 7.513960758906452e-06, "loss": 0.5246, "step": 8487 }, { "epoch": 0.3523837695058096, "grad_norm": 2.4712231159210205, "learning_rate": 7.513379582322302e-06, "loss": 0.3844, "step": 8488 }, { "epoch": 0.3524252850300209, "grad_norm": 2.768016815185547, "learning_rate": 7.512798360295526e-06, "loss": 0.57, "step": 8489 }, { "epoch": 0.35246680055423224, "grad_norm": 2.943431854248047, "learning_rate": 7.512217092836631e-06, "loss": 0.4527, "step": 8490 }, { "epoch": 0.3525083160784436, "grad_norm": 2.7954952716827393, "learning_rate": 7.511635779956128e-06, "loss": 0.4116, "step": 8491 }, { "epoch": 0.3525498316026549, "grad_norm": 2.179456949234009, "learning_rate": 7.511054421664525e-06, "loss": 0.4775, "step": 8492 }, { "epoch": 0.35259134712686624, "grad_norm": 2.6301188468933105, "learning_rate": 7.510473017972335e-06, "loss": 0.6127, "step": 8493 }, { "epoch": 0.35263286265107757, "grad_norm": 2.667328357696533, "learning_rate": 7.509891568890069e-06, "loss": 0.615, "step": 8494 }, { "epoch": 0.3526743781752889, "grad_norm": 2.290024757385254, "learning_rate": 7.50931007442824e-06, "loss": 0.411, "step": 8495 }, { "epoch": 0.35271589369950024, "grad_norm": 2.747237205505371, "learning_rate": 7.508728534597362e-06, "loss": 0.5043, "step": 8496 }, { "epoch": 0.35275740922371157, "grad_norm": 2.852860450744629, "learning_rate": 7.508146949407947e-06, "loss": 0.5968, "step": 8497 }, { "epoch": 0.3527989247479229, "grad_norm": 2.917877435684204, "learning_rate": 7.507565318870514e-06, "loss": 0.5391, "step": 8498 }, { "epoch": 0.35284044027213424, "grad_norm": 2.5097334384918213, "learning_rate": 7.506983642995576e-06, "loss": 0.615, "step": 8499 }, { "epoch": 0.35288195579634557, "grad_norm": 2.3721413612365723, "learning_rate": 7.506401921793653e-06, "loss": 0.4586, "step": 8500 }, { "epoch": 0.35292347132055696, "grad_norm": 2.647559642791748, "learning_rate": 7.50582015527526e-06, "loss": 0.5282, "step": 8501 }, { "epoch": 0.3529649868447683, "grad_norm": 2.1979994773864746, "learning_rate": 7.505238343450916e-06, "loss": 0.4544, "step": 8502 }, { "epoch": 0.3530065023689796, "grad_norm": 2.6560022830963135, "learning_rate": 7.504656486331141e-06, "loss": 0.3735, "step": 8503 }, { "epoch": 0.35304801789319096, "grad_norm": 2.564445734024048, "learning_rate": 7.504074583926455e-06, "loss": 0.5787, "step": 8504 }, { "epoch": 0.3530895334174023, "grad_norm": 2.663306713104248, "learning_rate": 7.503492636247378e-06, "loss": 0.7516, "step": 8505 }, { "epoch": 0.3531310489416136, "grad_norm": 2.330048084259033, "learning_rate": 7.502910643304433e-06, "loss": 0.5558, "step": 8506 }, { "epoch": 0.35317256446582496, "grad_norm": 2.897538661956787, "learning_rate": 7.502328605108142e-06, "loss": 0.4973, "step": 8507 }, { "epoch": 0.3532140799900363, "grad_norm": 2.026984691619873, "learning_rate": 7.501746521669029e-06, "loss": 0.4335, "step": 8508 }, { "epoch": 0.3532555955142476, "grad_norm": 2.2653465270996094, "learning_rate": 7.5011643929976175e-06, "loss": 0.4885, "step": 8509 }, { "epoch": 0.35329711103845896, "grad_norm": 2.250422954559326, "learning_rate": 7.500582219104433e-06, "loss": 0.5379, "step": 8510 }, { "epoch": 0.3533386265626703, "grad_norm": 2.6413395404815674, "learning_rate": 7.500000000000001e-06, "loss": 0.4832, "step": 8511 }, { "epoch": 0.3533801420868816, "grad_norm": 2.380457639694214, "learning_rate": 7.4994177356948484e-06, "loss": 0.4663, "step": 8512 }, { "epoch": 0.35342165761109295, "grad_norm": 2.460188627243042, "learning_rate": 7.498835426199503e-06, "loss": 0.4884, "step": 8513 }, { "epoch": 0.3534631731353043, "grad_norm": 1.9883135557174683, "learning_rate": 7.4982530715244925e-06, "loss": 0.5338, "step": 8514 }, { "epoch": 0.3535046886595156, "grad_norm": 2.166888475418091, "learning_rate": 7.497670671680347e-06, "loss": 0.4911, "step": 8515 }, { "epoch": 0.35354620418372695, "grad_norm": 2.7079734802246094, "learning_rate": 7.497088226677594e-06, "loss": 0.4603, "step": 8516 }, { "epoch": 0.3535877197079383, "grad_norm": 2.6909375190734863, "learning_rate": 7.496505736526769e-06, "loss": 0.4571, "step": 8517 }, { "epoch": 0.3536292352321496, "grad_norm": 2.3738317489624023, "learning_rate": 7.495923201238399e-06, "loss": 0.6167, "step": 8518 }, { "epoch": 0.35367075075636095, "grad_norm": 2.721402406692505, "learning_rate": 7.495340620823018e-06, "loss": 0.4429, "step": 8519 }, { "epoch": 0.3537122662805723, "grad_norm": 2.390679359436035, "learning_rate": 7.494757995291161e-06, "loss": 0.5598, "step": 8520 }, { "epoch": 0.3537537818047836, "grad_norm": 2.7303898334503174, "learning_rate": 7.494175324653359e-06, "loss": 0.4353, "step": 8521 }, { "epoch": 0.35379529732899495, "grad_norm": 2.527013063430786, "learning_rate": 7.493592608920148e-06, "loss": 0.4825, "step": 8522 }, { "epoch": 0.3538368128532063, "grad_norm": 2.5272939205169678, "learning_rate": 7.493009848102064e-06, "loss": 0.4944, "step": 8523 }, { "epoch": 0.3538783283774176, "grad_norm": 2.2093236446380615, "learning_rate": 7.492427042209645e-06, "loss": 0.4888, "step": 8524 }, { "epoch": 0.35391984390162895, "grad_norm": 2.666550636291504, "learning_rate": 7.491844191253424e-06, "loss": 0.5688, "step": 8525 }, { "epoch": 0.3539613594258403, "grad_norm": 2.713555335998535, "learning_rate": 7.4912612952439434e-06, "loss": 0.4538, "step": 8526 }, { "epoch": 0.3540028749500516, "grad_norm": 2.7054953575134277, "learning_rate": 7.49067835419174e-06, "loss": 0.5931, "step": 8527 }, { "epoch": 0.35404439047426295, "grad_norm": 2.952937126159668, "learning_rate": 7.490095368107355e-06, "loss": 0.4744, "step": 8528 }, { "epoch": 0.3540859059984743, "grad_norm": 2.8347091674804688, "learning_rate": 7.489512337001327e-06, "loss": 0.5548, "step": 8529 }, { "epoch": 0.3541274215226856, "grad_norm": 2.927938222885132, "learning_rate": 7.488929260884199e-06, "loss": 0.4813, "step": 8530 }, { "epoch": 0.35416893704689695, "grad_norm": 2.173574209213257, "learning_rate": 7.488346139766513e-06, "loss": 0.5566, "step": 8531 }, { "epoch": 0.35421045257110834, "grad_norm": 2.5943734645843506, "learning_rate": 7.4877629736588095e-06, "loss": 0.6128, "step": 8532 }, { "epoch": 0.35425196809531967, "grad_norm": 2.4130029678344727, "learning_rate": 7.487179762571637e-06, "loss": 0.5129, "step": 8533 }, { "epoch": 0.354293483619531, "grad_norm": 2.079829216003418, "learning_rate": 7.486596506515536e-06, "loss": 0.4413, "step": 8534 }, { "epoch": 0.35433499914374234, "grad_norm": 2.741117000579834, "learning_rate": 7.486013205501053e-06, "loss": 0.4674, "step": 8535 }, { "epoch": 0.35437651466795367, "grad_norm": 2.4420785903930664, "learning_rate": 7.485429859538735e-06, "loss": 0.4979, "step": 8536 }, { "epoch": 0.354418030192165, "grad_norm": 2.3995985984802246, "learning_rate": 7.48484646863913e-06, "loss": 0.5065, "step": 8537 }, { "epoch": 0.35445954571637633, "grad_norm": 2.3398780822753906, "learning_rate": 7.484263032812783e-06, "loss": 0.5538, "step": 8538 }, { "epoch": 0.35450106124058767, "grad_norm": 2.2236626148223877, "learning_rate": 7.483679552070246e-06, "loss": 0.5409, "step": 8539 }, { "epoch": 0.354542576764799, "grad_norm": 2.273411273956299, "learning_rate": 7.483096026422066e-06, "loss": 0.4653, "step": 8540 }, { "epoch": 0.35458409228901033, "grad_norm": 2.37099027633667, "learning_rate": 7.482512455878794e-06, "loss": 0.4182, "step": 8541 }, { "epoch": 0.35462560781322167, "grad_norm": 2.293421745300293, "learning_rate": 7.4819288404509806e-06, "loss": 0.5225, "step": 8542 }, { "epoch": 0.354667123337433, "grad_norm": 2.533504009246826, "learning_rate": 7.48134518014918e-06, "loss": 0.5386, "step": 8543 }, { "epoch": 0.35470863886164433, "grad_norm": 2.2975902557373047, "learning_rate": 7.480761474983943e-06, "loss": 0.5697, "step": 8544 }, { "epoch": 0.35475015438585566, "grad_norm": 3.0379726886749268, "learning_rate": 7.480177724965823e-06, "loss": 0.5499, "step": 8545 }, { "epoch": 0.354791669910067, "grad_norm": 2.065340280532837, "learning_rate": 7.4795939301053744e-06, "loss": 0.4404, "step": 8546 }, { "epoch": 0.35483318543427833, "grad_norm": 2.5363831520080566, "learning_rate": 7.479010090413152e-06, "loss": 0.4214, "step": 8547 }, { "epoch": 0.35487470095848966, "grad_norm": 2.85927152633667, "learning_rate": 7.478426205899716e-06, "loss": 0.5211, "step": 8548 }, { "epoch": 0.354916216482701, "grad_norm": 1.8839045763015747, "learning_rate": 7.477842276575617e-06, "loss": 0.3778, "step": 8549 }, { "epoch": 0.35495773200691233, "grad_norm": 2.4862124919891357, "learning_rate": 7.477258302451417e-06, "loss": 0.463, "step": 8550 }, { "epoch": 0.35499924753112366, "grad_norm": 2.3447999954223633, "learning_rate": 7.47667428353767e-06, "loss": 0.5914, "step": 8551 }, { "epoch": 0.355040763055335, "grad_norm": 2.8568615913391113, "learning_rate": 7.476090219844941e-06, "loss": 0.5402, "step": 8552 }, { "epoch": 0.35508227857954633, "grad_norm": 2.763328790664673, "learning_rate": 7.475506111383787e-06, "loss": 0.4586, "step": 8553 }, { "epoch": 0.35512379410375766, "grad_norm": 2.5351977348327637, "learning_rate": 7.474921958164768e-06, "loss": 0.5736, "step": 8554 }, { "epoch": 0.355165309627969, "grad_norm": 2.4261739253997803, "learning_rate": 7.474337760198447e-06, "loss": 0.6005, "step": 8555 }, { "epoch": 0.3552068251521803, "grad_norm": 2.21224308013916, "learning_rate": 7.473753517495386e-06, "loss": 0.468, "step": 8556 }, { "epoch": 0.35524834067639166, "grad_norm": 2.3124897480010986, "learning_rate": 7.473169230066149e-06, "loss": 0.5482, "step": 8557 }, { "epoch": 0.355289856200603, "grad_norm": 2.681199550628662, "learning_rate": 7.472584897921299e-06, "loss": 0.4841, "step": 8558 }, { "epoch": 0.3553313717248143, "grad_norm": 2.42134428024292, "learning_rate": 7.472000521071401e-06, "loss": 0.5642, "step": 8559 }, { "epoch": 0.35537288724902566, "grad_norm": 2.859346628189087, "learning_rate": 7.471416099527022e-06, "loss": 0.5468, "step": 8560 }, { "epoch": 0.355414402773237, "grad_norm": 2.3599958419799805, "learning_rate": 7.470831633298728e-06, "loss": 0.6315, "step": 8561 }, { "epoch": 0.3554559182974483, "grad_norm": 2.446300983428955, "learning_rate": 7.470247122397083e-06, "loss": 0.3861, "step": 8562 }, { "epoch": 0.3554974338216597, "grad_norm": 2.3164432048797607, "learning_rate": 7.46966256683266e-06, "loss": 0.4606, "step": 8563 }, { "epoch": 0.35553894934587105, "grad_norm": 2.697810173034668, "learning_rate": 7.469077966616026e-06, "loss": 0.5518, "step": 8564 }, { "epoch": 0.3555804648700824, "grad_norm": 2.224933385848999, "learning_rate": 7.46849332175775e-06, "loss": 0.5622, "step": 8565 }, { "epoch": 0.3556219803942937, "grad_norm": 2.7744596004486084, "learning_rate": 7.4679086322684045e-06, "loss": 0.4515, "step": 8566 }, { "epoch": 0.35566349591850505, "grad_norm": 2.0035359859466553, "learning_rate": 7.467323898158559e-06, "loss": 0.4676, "step": 8567 }, { "epoch": 0.3557050114427164, "grad_norm": 2.6391584873199463, "learning_rate": 7.466739119438786e-06, "loss": 0.6303, "step": 8568 }, { "epoch": 0.3557465269669277, "grad_norm": 2.274841785430908, "learning_rate": 7.466154296119659e-06, "loss": 0.5084, "step": 8569 }, { "epoch": 0.35578804249113904, "grad_norm": 2.7597908973693848, "learning_rate": 7.465569428211752e-06, "loss": 0.4476, "step": 8570 }, { "epoch": 0.3558295580153504, "grad_norm": 2.502985715866089, "learning_rate": 7.464984515725638e-06, "loss": 0.4301, "step": 8571 }, { "epoch": 0.3558710735395617, "grad_norm": 2.2360970973968506, "learning_rate": 7.464399558671894e-06, "loss": 0.4532, "step": 8572 }, { "epoch": 0.35591258906377304, "grad_norm": 3.457658290863037, "learning_rate": 7.463814557061095e-06, "loss": 0.5115, "step": 8573 }, { "epoch": 0.3559541045879844, "grad_norm": 2.309143304824829, "learning_rate": 7.46322951090382e-06, "loss": 0.5996, "step": 8574 }, { "epoch": 0.3559956201121957, "grad_norm": 2.283339500427246, "learning_rate": 7.462644420210645e-06, "loss": 0.4268, "step": 8575 }, { "epoch": 0.35603713563640704, "grad_norm": 2.340350866317749, "learning_rate": 7.462059284992149e-06, "loss": 0.6147, "step": 8576 }, { "epoch": 0.3560786511606184, "grad_norm": 2.226270914077759, "learning_rate": 7.461474105258911e-06, "loss": 0.6663, "step": 8577 }, { "epoch": 0.3561201666848297, "grad_norm": 2.002897024154663, "learning_rate": 7.460888881021513e-06, "loss": 0.5744, "step": 8578 }, { "epoch": 0.35616168220904104, "grad_norm": 2.7041053771972656, "learning_rate": 7.460303612290533e-06, "loss": 0.4663, "step": 8579 }, { "epoch": 0.3562031977332524, "grad_norm": 2.8068976402282715, "learning_rate": 7.459718299076555e-06, "loss": 0.4723, "step": 8580 }, { "epoch": 0.3562447132574637, "grad_norm": 3.0261104106903076, "learning_rate": 7.459132941390161e-06, "loss": 0.4417, "step": 8581 }, { "epoch": 0.35628622878167504, "grad_norm": 2.228612184524536, "learning_rate": 7.458547539241936e-06, "loss": 0.5715, "step": 8582 }, { "epoch": 0.3563277443058864, "grad_norm": 2.5031070709228516, "learning_rate": 7.457962092642462e-06, "loss": 0.5504, "step": 8583 }, { "epoch": 0.3563692598300977, "grad_norm": 2.5538082122802734, "learning_rate": 7.457376601602324e-06, "loss": 0.5324, "step": 8584 }, { "epoch": 0.35641077535430904, "grad_norm": 2.5128819942474365, "learning_rate": 7.456791066132111e-06, "loss": 0.5337, "step": 8585 }, { "epoch": 0.3564522908785204, "grad_norm": 2.3106443881988525, "learning_rate": 7.456205486242405e-06, "loss": 0.5595, "step": 8586 }, { "epoch": 0.3564938064027317, "grad_norm": 2.8836631774902344, "learning_rate": 7.455619861943798e-06, "loss": 0.5784, "step": 8587 }, { "epoch": 0.35653532192694304, "grad_norm": 2.6488282680511475, "learning_rate": 7.455034193246874e-06, "loss": 0.4687, "step": 8588 }, { "epoch": 0.35657683745115437, "grad_norm": 2.352821111679077, "learning_rate": 7.454448480162226e-06, "loss": 0.3882, "step": 8589 }, { "epoch": 0.3566183529753657, "grad_norm": 2.5700395107269287, "learning_rate": 7.45386272270044e-06, "loss": 0.5591, "step": 8590 }, { "epoch": 0.35665986849957704, "grad_norm": 2.5654594898223877, "learning_rate": 7.4532769208721115e-06, "loss": 0.5164, "step": 8591 }, { "epoch": 0.35670138402378837, "grad_norm": 2.707099676132202, "learning_rate": 7.452691074687827e-06, "loss": 0.4617, "step": 8592 }, { "epoch": 0.3567428995479997, "grad_norm": 2.578850030899048, "learning_rate": 7.452105184158181e-06, "loss": 0.416, "step": 8593 }, { "epoch": 0.3567844150722111, "grad_norm": 2.5375442504882812, "learning_rate": 7.451519249293766e-06, "loss": 0.5879, "step": 8594 }, { "epoch": 0.3568259305964224, "grad_norm": 2.270214557647705, "learning_rate": 7.4509332701051785e-06, "loss": 0.5041, "step": 8595 }, { "epoch": 0.35686744612063376, "grad_norm": 2.2948849201202393, "learning_rate": 7.4503472466030105e-06, "loss": 0.3486, "step": 8596 }, { "epoch": 0.3569089616448451, "grad_norm": 2.290189743041992, "learning_rate": 7.4497611787978566e-06, "loss": 0.404, "step": 8597 }, { "epoch": 0.3569504771690564, "grad_norm": 2.883991241455078, "learning_rate": 7.4491750667003156e-06, "loss": 0.5008, "step": 8598 }, { "epoch": 0.35699199269326776, "grad_norm": 2.9832375049591064, "learning_rate": 7.448588910320983e-06, "loss": 0.5524, "step": 8599 }, { "epoch": 0.3570335082174791, "grad_norm": 2.520333766937256, "learning_rate": 7.448002709670458e-06, "loss": 0.3972, "step": 8600 }, { "epoch": 0.3570750237416904, "grad_norm": 2.2726986408233643, "learning_rate": 7.4474164647593376e-06, "loss": 0.4441, "step": 8601 }, { "epoch": 0.35711653926590176, "grad_norm": 2.2582340240478516, "learning_rate": 7.4468301755982216e-06, "loss": 0.4933, "step": 8602 }, { "epoch": 0.3571580547901131, "grad_norm": 2.7949085235595703, "learning_rate": 7.4462438421977114e-06, "loss": 0.7484, "step": 8603 }, { "epoch": 0.3571995703143244, "grad_norm": 3.040024518966675, "learning_rate": 7.445657464568408e-06, "loss": 0.638, "step": 8604 }, { "epoch": 0.35724108583853575, "grad_norm": 2.553068161010742, "learning_rate": 7.445071042720911e-06, "loss": 0.4578, "step": 8605 }, { "epoch": 0.3572826013627471, "grad_norm": 2.569486618041992, "learning_rate": 7.444484576665827e-06, "loss": 0.543, "step": 8606 }, { "epoch": 0.3573241168869584, "grad_norm": 2.3975863456726074, "learning_rate": 7.443898066413755e-06, "loss": 0.4463, "step": 8607 }, { "epoch": 0.35736563241116975, "grad_norm": 2.532585859298706, "learning_rate": 7.443311511975304e-06, "loss": 0.5831, "step": 8608 }, { "epoch": 0.3574071479353811, "grad_norm": 2.383714199066162, "learning_rate": 7.442724913361076e-06, "loss": 0.624, "step": 8609 }, { "epoch": 0.3574486634595924, "grad_norm": 2.836651563644409, "learning_rate": 7.442138270581676e-06, "loss": 0.5925, "step": 8610 }, { "epoch": 0.35749017898380375, "grad_norm": 2.3384604454040527, "learning_rate": 7.441551583647714e-06, "loss": 0.5315, "step": 8611 }, { "epoch": 0.3575316945080151, "grad_norm": 2.124117136001587, "learning_rate": 7.440964852569794e-06, "loss": 0.3871, "step": 8612 }, { "epoch": 0.3575732100322264, "grad_norm": 2.3201382160186768, "learning_rate": 7.440378077358527e-06, "loss": 0.7055, "step": 8613 }, { "epoch": 0.35761472555643775, "grad_norm": 2.8627824783325195, "learning_rate": 7.4397912580245205e-06, "loss": 0.5464, "step": 8614 }, { "epoch": 0.3576562410806491, "grad_norm": 2.4191319942474365, "learning_rate": 7.439204394578386e-06, "loss": 0.5508, "step": 8615 }, { "epoch": 0.3576977566048604, "grad_norm": 2.323124647140503, "learning_rate": 7.43861748703073e-06, "loss": 0.4677, "step": 8616 }, { "epoch": 0.35773927212907175, "grad_norm": 2.5105772018432617, "learning_rate": 7.438030535392169e-06, "loss": 0.4042, "step": 8617 }, { "epoch": 0.3577807876532831, "grad_norm": 3.569546937942505, "learning_rate": 7.437443539673312e-06, "loss": 0.5698, "step": 8618 }, { "epoch": 0.3578223031774944, "grad_norm": 2.6990933418273926, "learning_rate": 7.436856499884776e-06, "loss": 0.7419, "step": 8619 }, { "epoch": 0.35786381870170575, "grad_norm": 2.459439277648926, "learning_rate": 7.43626941603717e-06, "loss": 0.5833, "step": 8620 }, { "epoch": 0.3579053342259171, "grad_norm": 2.2417819499969482, "learning_rate": 7.435682288141111e-06, "loss": 0.4654, "step": 8621 }, { "epoch": 0.3579468497501284, "grad_norm": 2.6341779232025146, "learning_rate": 7.435095116207215e-06, "loss": 0.6389, "step": 8622 }, { "epoch": 0.35798836527433975, "grad_norm": 2.7585809230804443, "learning_rate": 7.434507900246096e-06, "loss": 0.5777, "step": 8623 }, { "epoch": 0.3580298807985511, "grad_norm": 2.7566940784454346, "learning_rate": 7.4339206402683735e-06, "loss": 0.5141, "step": 8624 }, { "epoch": 0.35807139632276247, "grad_norm": 2.5638632774353027, "learning_rate": 7.433333336284665e-06, "loss": 0.673, "step": 8625 }, { "epoch": 0.3581129118469738, "grad_norm": 2.216919422149658, "learning_rate": 7.432745988305588e-06, "loss": 0.4876, "step": 8626 }, { "epoch": 0.35815442737118514, "grad_norm": 2.697641611099243, "learning_rate": 7.432158596341761e-06, "loss": 0.452, "step": 8627 }, { "epoch": 0.35819594289539647, "grad_norm": 2.528843641281128, "learning_rate": 7.4315711604038066e-06, "loss": 0.4251, "step": 8628 }, { "epoch": 0.3582374584196078, "grad_norm": 2.7306509017944336, "learning_rate": 7.430983680502344e-06, "loss": 0.477, "step": 8629 }, { "epoch": 0.35827897394381913, "grad_norm": 2.366241455078125, "learning_rate": 7.430396156647996e-06, "loss": 0.5311, "step": 8630 }, { "epoch": 0.35832048946803047, "grad_norm": 3.0263609886169434, "learning_rate": 7.429808588851386e-06, "loss": 0.5019, "step": 8631 }, { "epoch": 0.3583620049922418, "grad_norm": 2.736954689025879, "learning_rate": 7.429220977123135e-06, "loss": 0.5665, "step": 8632 }, { "epoch": 0.35840352051645313, "grad_norm": 2.973842144012451, "learning_rate": 7.42863332147387e-06, "loss": 0.4874, "step": 8633 }, { "epoch": 0.35844503604066447, "grad_norm": 2.5005791187286377, "learning_rate": 7.428045621914213e-06, "loss": 0.5625, "step": 8634 }, { "epoch": 0.3584865515648758, "grad_norm": 2.4730238914489746, "learning_rate": 7.4274578784547935e-06, "loss": 0.5442, "step": 8635 }, { "epoch": 0.35852806708908713, "grad_norm": 2.4486682415008545, "learning_rate": 7.426870091106233e-06, "loss": 0.5838, "step": 8636 }, { "epoch": 0.35856958261329847, "grad_norm": 2.124255895614624, "learning_rate": 7.426282259879164e-06, "loss": 0.4399, "step": 8637 }, { "epoch": 0.3586110981375098, "grad_norm": 2.2573704719543457, "learning_rate": 7.425694384784211e-06, "loss": 0.4339, "step": 8638 }, { "epoch": 0.35865261366172113, "grad_norm": 2.283520221710205, "learning_rate": 7.425106465832006e-06, "loss": 0.4955, "step": 8639 }, { "epoch": 0.35869412918593246, "grad_norm": 2.1700241565704346, "learning_rate": 7.4245185030331754e-06, "loss": 0.4092, "step": 8640 }, { "epoch": 0.3587356447101438, "grad_norm": 2.254631996154785, "learning_rate": 7.423930496398353e-06, "loss": 0.5669, "step": 8641 }, { "epoch": 0.35877716023435513, "grad_norm": 2.3165063858032227, "learning_rate": 7.423342445938166e-06, "loss": 0.4457, "step": 8642 }, { "epoch": 0.35881867575856646, "grad_norm": 2.6823840141296387, "learning_rate": 7.422754351663252e-06, "loss": 0.6244, "step": 8643 }, { "epoch": 0.3588601912827778, "grad_norm": 2.452694892883301, "learning_rate": 7.4221662135842384e-06, "loss": 0.5629, "step": 8644 }, { "epoch": 0.35890170680698913, "grad_norm": 2.4004862308502197, "learning_rate": 7.421578031711763e-06, "loss": 0.5757, "step": 8645 }, { "epoch": 0.35894322233120046, "grad_norm": 2.1086575984954834, "learning_rate": 7.420989806056459e-06, "loss": 0.3659, "step": 8646 }, { "epoch": 0.3589847378554118, "grad_norm": 2.930220603942871, "learning_rate": 7.4204015366289616e-06, "loss": 0.5787, "step": 8647 }, { "epoch": 0.35902625337962313, "grad_norm": 2.8628313541412354, "learning_rate": 7.4198132234399064e-06, "loss": 0.6391, "step": 8648 }, { "epoch": 0.35906776890383446, "grad_norm": 2.5660107135772705, "learning_rate": 7.419224866499933e-06, "loss": 0.5781, "step": 8649 }, { "epoch": 0.3591092844280458, "grad_norm": 2.3909800052642822, "learning_rate": 7.418636465819673e-06, "loss": 0.5271, "step": 8650 }, { "epoch": 0.3591507999522571, "grad_norm": 2.461458444595337, "learning_rate": 7.4180480214097695e-06, "loss": 0.5661, "step": 8651 }, { "epoch": 0.35919231547646846, "grad_norm": 2.45040225982666, "learning_rate": 7.417459533280862e-06, "loss": 0.6726, "step": 8652 }, { "epoch": 0.3592338310006798, "grad_norm": 2.528907060623169, "learning_rate": 7.416871001443587e-06, "loss": 0.4414, "step": 8653 }, { "epoch": 0.3592753465248911, "grad_norm": 2.1191465854644775, "learning_rate": 7.41628242590859e-06, "loss": 0.4324, "step": 8654 }, { "epoch": 0.35931686204910246, "grad_norm": 2.813427209854126, "learning_rate": 7.415693806686509e-06, "loss": 0.4343, "step": 8655 }, { "epoch": 0.35935837757331385, "grad_norm": 2.306760549545288, "learning_rate": 7.415105143787988e-06, "loss": 0.5711, "step": 8656 }, { "epoch": 0.3593998930975252, "grad_norm": 2.5898489952087402, "learning_rate": 7.414516437223671e-06, "loss": 0.5618, "step": 8657 }, { "epoch": 0.3594414086217365, "grad_norm": 2.3820786476135254, "learning_rate": 7.413927687004201e-06, "loss": 0.4062, "step": 8658 }, { "epoch": 0.35948292414594785, "grad_norm": 2.386244297027588, "learning_rate": 7.413338893140221e-06, "loss": 0.5628, "step": 8659 }, { "epoch": 0.3595244396701592, "grad_norm": 2.0347983837127686, "learning_rate": 7.412750055642379e-06, "loss": 0.4244, "step": 8660 }, { "epoch": 0.3595659551943705, "grad_norm": 2.530445098876953, "learning_rate": 7.412161174521321e-06, "loss": 0.5459, "step": 8661 }, { "epoch": 0.35960747071858185, "grad_norm": 2.2693498134613037, "learning_rate": 7.411572249787693e-06, "loss": 0.5614, "step": 8662 }, { "epoch": 0.3596489862427932, "grad_norm": 2.282100200653076, "learning_rate": 7.410983281452144e-06, "loss": 0.4818, "step": 8663 }, { "epoch": 0.3596905017670045, "grad_norm": 2.1962878704071045, "learning_rate": 7.410394269525322e-06, "loss": 0.4499, "step": 8664 }, { "epoch": 0.35973201729121584, "grad_norm": 2.235860824584961, "learning_rate": 7.409805214017877e-06, "loss": 0.5342, "step": 8665 }, { "epoch": 0.3597735328154272, "grad_norm": 2.729933977127075, "learning_rate": 7.409216114940458e-06, "loss": 0.4636, "step": 8666 }, { "epoch": 0.3598150483396385, "grad_norm": 2.3710689544677734, "learning_rate": 7.4086269723037194e-06, "loss": 0.3827, "step": 8667 }, { "epoch": 0.35985656386384984, "grad_norm": 1.8799097537994385, "learning_rate": 7.40803778611831e-06, "loss": 0.3457, "step": 8668 }, { "epoch": 0.3598980793880612, "grad_norm": 2.6065597534179688, "learning_rate": 7.4074485563948825e-06, "loss": 0.574, "step": 8669 }, { "epoch": 0.3599395949122725, "grad_norm": 2.0156021118164062, "learning_rate": 7.406859283144092e-06, "loss": 0.4078, "step": 8670 }, { "epoch": 0.35998111043648384, "grad_norm": 2.821279764175415, "learning_rate": 7.406269966376593e-06, "loss": 0.4879, "step": 8671 }, { "epoch": 0.3600226259606952, "grad_norm": 2.5553836822509766, "learning_rate": 7.4056806061030384e-06, "loss": 0.5368, "step": 8672 }, { "epoch": 0.3600641414849065, "grad_norm": 2.414271831512451, "learning_rate": 7.405091202334085e-06, "loss": 0.485, "step": 8673 }, { "epoch": 0.36010565700911784, "grad_norm": 2.0559611320495605, "learning_rate": 7.404501755080389e-06, "loss": 0.449, "step": 8674 }, { "epoch": 0.3601471725333292, "grad_norm": 2.406083822250366, "learning_rate": 7.40391226435261e-06, "loss": 0.4764, "step": 8675 }, { "epoch": 0.3601886880575405, "grad_norm": 2.416769504547119, "learning_rate": 7.403322730161402e-06, "loss": 0.4165, "step": 8676 }, { "epoch": 0.36023020358175184, "grad_norm": 2.3671212196350098, "learning_rate": 7.402733152517427e-06, "loss": 0.5648, "step": 8677 }, { "epoch": 0.3602717191059632, "grad_norm": 2.6548960208892822, "learning_rate": 7.402143531431345e-06, "loss": 0.3934, "step": 8678 }, { "epoch": 0.3603132346301745, "grad_norm": 2.267571449279785, "learning_rate": 7.4015538669138144e-06, "loss": 0.5218, "step": 8679 }, { "epoch": 0.36035475015438584, "grad_norm": 2.4654381275177, "learning_rate": 7.400964158975499e-06, "loss": 0.65, "step": 8680 }, { "epoch": 0.36039626567859717, "grad_norm": 2.6480259895324707, "learning_rate": 7.400374407627058e-06, "loss": 0.5202, "step": 8681 }, { "epoch": 0.3604377812028085, "grad_norm": 2.36513614654541, "learning_rate": 7.399784612879157e-06, "loss": 0.4654, "step": 8682 }, { "epoch": 0.36047929672701984, "grad_norm": 2.633631944656372, "learning_rate": 7.3991947747424575e-06, "loss": 0.6227, "step": 8683 }, { "epoch": 0.36052081225123117, "grad_norm": 2.2971298694610596, "learning_rate": 7.398604893227627e-06, "loss": 0.4282, "step": 8684 }, { "epoch": 0.3605623277754425, "grad_norm": 2.095701217651367, "learning_rate": 7.398014968345327e-06, "loss": 0.3926, "step": 8685 }, { "epoch": 0.36060384329965384, "grad_norm": 2.6716504096984863, "learning_rate": 7.397425000106225e-06, "loss": 0.5701, "step": 8686 }, { "epoch": 0.3606453588238652, "grad_norm": 2.520540237426758, "learning_rate": 7.396834988520989e-06, "loss": 0.5423, "step": 8687 }, { "epoch": 0.36068687434807656, "grad_norm": 2.4942843914031982, "learning_rate": 7.396244933600285e-06, "loss": 0.552, "step": 8688 }, { "epoch": 0.3607283898722879, "grad_norm": 2.8077003955841064, "learning_rate": 7.395654835354782e-06, "loss": 0.6217, "step": 8689 }, { "epoch": 0.3607699053964992, "grad_norm": 2.6913390159606934, "learning_rate": 7.395064693795149e-06, "loss": 0.608, "step": 8690 }, { "epoch": 0.36081142092071056, "grad_norm": 2.643538475036621, "learning_rate": 7.394474508932057e-06, "loss": 0.5485, "step": 8691 }, { "epoch": 0.3608529364449219, "grad_norm": 2.4983372688293457, "learning_rate": 7.393884280776174e-06, "loss": 0.447, "step": 8692 }, { "epoch": 0.3608944519691332, "grad_norm": 2.2070114612579346, "learning_rate": 7.393294009338174e-06, "loss": 0.6401, "step": 8693 }, { "epoch": 0.36093596749334456, "grad_norm": 2.505882501602173, "learning_rate": 7.392703694628728e-06, "loss": 0.5854, "step": 8694 }, { "epoch": 0.3609774830175559, "grad_norm": 2.738727331161499, "learning_rate": 7.3921133366585105e-06, "loss": 0.4859, "step": 8695 }, { "epoch": 0.3610189985417672, "grad_norm": 2.310077667236328, "learning_rate": 7.391522935438193e-06, "loss": 0.4922, "step": 8696 }, { "epoch": 0.36106051406597856, "grad_norm": 2.417121648788452, "learning_rate": 7.390932490978453e-06, "loss": 0.4903, "step": 8697 }, { "epoch": 0.3611020295901899, "grad_norm": 2.4163546562194824, "learning_rate": 7.390342003289965e-06, "loss": 0.6555, "step": 8698 }, { "epoch": 0.3611435451144012, "grad_norm": 2.088958740234375, "learning_rate": 7.3897514723834045e-06, "loss": 0.4756, "step": 8699 }, { "epoch": 0.36118506063861255, "grad_norm": 2.410181760787964, "learning_rate": 7.389160898269446e-06, "loss": 0.5724, "step": 8700 }, { "epoch": 0.3612265761628239, "grad_norm": 2.3630731105804443, "learning_rate": 7.388570280958772e-06, "loss": 0.5492, "step": 8701 }, { "epoch": 0.3612680916870352, "grad_norm": 2.3537826538085938, "learning_rate": 7.387979620462058e-06, "loss": 0.5613, "step": 8702 }, { "epoch": 0.36130960721124655, "grad_norm": 2.4595553874969482, "learning_rate": 7.387388916789983e-06, "loss": 0.4306, "step": 8703 }, { "epoch": 0.3613511227354579, "grad_norm": 2.3992111682891846, "learning_rate": 7.386798169953231e-06, "loss": 0.6102, "step": 8704 }, { "epoch": 0.3613926382596692, "grad_norm": 2.1664650440216064, "learning_rate": 7.386207379962478e-06, "loss": 0.4635, "step": 8705 }, { "epoch": 0.36143415378388055, "grad_norm": 2.815856456756592, "learning_rate": 7.385616546828409e-06, "loss": 0.6868, "step": 8706 }, { "epoch": 0.3614756693080919, "grad_norm": 2.6182515621185303, "learning_rate": 7.385025670561705e-06, "loss": 0.5818, "step": 8707 }, { "epoch": 0.3615171848323032, "grad_norm": 2.4756503105163574, "learning_rate": 7.38443475117305e-06, "loss": 0.5955, "step": 8708 }, { "epoch": 0.36155870035651455, "grad_norm": 2.538588285446167, "learning_rate": 7.3838437886731264e-06, "loss": 0.4596, "step": 8709 }, { "epoch": 0.3616002158807259, "grad_norm": 2.5235395431518555, "learning_rate": 7.38325278307262e-06, "loss": 0.4238, "step": 8710 }, { "epoch": 0.3616417314049372, "grad_norm": 2.5752358436584473, "learning_rate": 7.382661734382217e-06, "loss": 0.6216, "step": 8711 }, { "epoch": 0.36168324692914855, "grad_norm": 3.0350918769836426, "learning_rate": 7.382070642612603e-06, "loss": 0.4484, "step": 8712 }, { "epoch": 0.3617247624533599, "grad_norm": 2.4480504989624023, "learning_rate": 7.381479507774465e-06, "loss": 0.5466, "step": 8713 }, { "epoch": 0.3617662779775712, "grad_norm": 2.676504611968994, "learning_rate": 7.380888329878492e-06, "loss": 0.506, "step": 8714 }, { "epoch": 0.36180779350178255, "grad_norm": 2.1426308155059814, "learning_rate": 7.3802971089353696e-06, "loss": 0.5294, "step": 8715 }, { "epoch": 0.3618493090259939, "grad_norm": 2.6790173053741455, "learning_rate": 7.37970584495579e-06, "loss": 0.5644, "step": 8716 }, { "epoch": 0.36189082455020527, "grad_norm": 2.126054286956787, "learning_rate": 7.379114537950444e-06, "loss": 0.522, "step": 8717 }, { "epoch": 0.3619323400744166, "grad_norm": 2.2632007598876953, "learning_rate": 7.37852318793002e-06, "loss": 0.4811, "step": 8718 }, { "epoch": 0.36197385559862794, "grad_norm": 2.2937839031219482, "learning_rate": 7.377931794905213e-06, "loss": 0.5113, "step": 8719 }, { "epoch": 0.36201537112283927, "grad_norm": 2.5018346309661865, "learning_rate": 7.377340358886712e-06, "loss": 0.5962, "step": 8720 }, { "epoch": 0.3620568866470506, "grad_norm": 2.4230642318725586, "learning_rate": 7.376748879885214e-06, "loss": 0.5602, "step": 8721 }, { "epoch": 0.36209840217126193, "grad_norm": 2.8663489818573, "learning_rate": 7.376157357911409e-06, "loss": 0.4806, "step": 8722 }, { "epoch": 0.36213991769547327, "grad_norm": 2.5833635330200195, "learning_rate": 7.375565792975996e-06, "loss": 0.4735, "step": 8723 }, { "epoch": 0.3621814332196846, "grad_norm": 3.0325794219970703, "learning_rate": 7.374974185089667e-06, "loss": 0.477, "step": 8724 }, { "epoch": 0.36222294874389593, "grad_norm": 2.529188871383667, "learning_rate": 7.374382534263123e-06, "loss": 0.6471, "step": 8725 }, { "epoch": 0.36226446426810727, "grad_norm": 2.2694153785705566, "learning_rate": 7.373790840507055e-06, "loss": 0.5717, "step": 8726 }, { "epoch": 0.3623059797923186, "grad_norm": 2.6381218433380127, "learning_rate": 7.373199103832167e-06, "loss": 0.5249, "step": 8727 }, { "epoch": 0.36234749531652993, "grad_norm": 2.0359699726104736, "learning_rate": 7.372607324249153e-06, "loss": 0.4809, "step": 8728 }, { "epoch": 0.36238901084074127, "grad_norm": 2.671062707901001, "learning_rate": 7.372015501768716e-06, "loss": 0.5162, "step": 8729 }, { "epoch": 0.3624305263649526, "grad_norm": 2.367011308670044, "learning_rate": 7.371423636401555e-06, "loss": 0.3622, "step": 8730 }, { "epoch": 0.36247204188916393, "grad_norm": 2.554701805114746, "learning_rate": 7.370831728158371e-06, "loss": 0.4802, "step": 8731 }, { "epoch": 0.36251355741337526, "grad_norm": 2.9470651149749756, "learning_rate": 7.370239777049866e-06, "loss": 0.5861, "step": 8732 }, { "epoch": 0.3625550729375866, "grad_norm": 2.231698751449585, "learning_rate": 7.369647783086742e-06, "loss": 0.4266, "step": 8733 }, { "epoch": 0.36259658846179793, "grad_norm": 2.698467254638672, "learning_rate": 7.369055746279704e-06, "loss": 0.4382, "step": 8734 }, { "epoch": 0.36263810398600926, "grad_norm": 2.874417543411255, "learning_rate": 7.368463666639455e-06, "loss": 0.5266, "step": 8735 }, { "epoch": 0.3626796195102206, "grad_norm": 2.7160325050354004, "learning_rate": 7.3678715441767e-06, "loss": 0.6678, "step": 8736 }, { "epoch": 0.36272113503443193, "grad_norm": 3.4617416858673096, "learning_rate": 7.367279378902146e-06, "loss": 0.3685, "step": 8737 }, { "epoch": 0.36276265055864326, "grad_norm": 2.695378541946411, "learning_rate": 7.366687170826498e-06, "loss": 0.4526, "step": 8738 }, { "epoch": 0.3628041660828546, "grad_norm": 2.2522165775299072, "learning_rate": 7.366094919960463e-06, "loss": 0.5441, "step": 8739 }, { "epoch": 0.36284568160706593, "grad_norm": 2.300443172454834, "learning_rate": 7.365502626314751e-06, "loss": 0.4366, "step": 8740 }, { "epoch": 0.36288719713127726, "grad_norm": 2.4082515239715576, "learning_rate": 7.364910289900069e-06, "loss": 0.5068, "step": 8741 }, { "epoch": 0.3629287126554886, "grad_norm": 2.6169915199279785, "learning_rate": 7.364317910727128e-06, "loss": 0.5992, "step": 8742 }, { "epoch": 0.3629702281796999, "grad_norm": 2.6951165199279785, "learning_rate": 7.363725488806637e-06, "loss": 0.5534, "step": 8743 }, { "epoch": 0.36301174370391126, "grad_norm": 3.079096555709839, "learning_rate": 7.363133024149307e-06, "loss": 0.5018, "step": 8744 }, { "epoch": 0.3630532592281226, "grad_norm": 2.4531097412109375, "learning_rate": 7.362540516765852e-06, "loss": 0.5706, "step": 8745 }, { "epoch": 0.3630947747523339, "grad_norm": 2.345832109451294, "learning_rate": 7.361947966666982e-06, "loss": 0.5495, "step": 8746 }, { "epoch": 0.36313629027654526, "grad_norm": 2.4045939445495605, "learning_rate": 7.361355373863415e-06, "loss": 0.4098, "step": 8747 }, { "epoch": 0.36317780580075665, "grad_norm": 2.752382278442383, "learning_rate": 7.360762738365859e-06, "loss": 0.6182, "step": 8748 }, { "epoch": 0.363219321324968, "grad_norm": 2.577249765396118, "learning_rate": 7.360170060185035e-06, "loss": 0.4779, "step": 8749 }, { "epoch": 0.3632608368491793, "grad_norm": 2.460738182067871, "learning_rate": 7.359577339331655e-06, "loss": 0.4586, "step": 8750 }, { "epoch": 0.36330235237339065, "grad_norm": 3.0411295890808105, "learning_rate": 7.358984575816437e-06, "loss": 0.5879, "step": 8751 }, { "epoch": 0.363343867897602, "grad_norm": 2.289757490158081, "learning_rate": 7.358391769650096e-06, "loss": 0.5902, "step": 8752 }, { "epoch": 0.3633853834218133, "grad_norm": 2.866206169128418, "learning_rate": 7.357798920843354e-06, "loss": 0.6987, "step": 8753 }, { "epoch": 0.36342689894602465, "grad_norm": 2.240554094314575, "learning_rate": 7.357206029406927e-06, "loss": 0.4526, "step": 8754 }, { "epoch": 0.363468414470236, "grad_norm": 2.408329486846924, "learning_rate": 7.356613095351535e-06, "loss": 0.5572, "step": 8755 }, { "epoch": 0.3635099299944473, "grad_norm": 2.7276570796966553, "learning_rate": 7.3560201186879e-06, "loss": 0.6455, "step": 8756 }, { "epoch": 0.36355144551865864, "grad_norm": 2.6508681774139404, "learning_rate": 7.355427099426742e-06, "loss": 0.5937, "step": 8757 }, { "epoch": 0.36359296104287, "grad_norm": 2.93052077293396, "learning_rate": 7.354834037578783e-06, "loss": 0.5949, "step": 8758 }, { "epoch": 0.3636344765670813, "grad_norm": 2.5175795555114746, "learning_rate": 7.354240933154746e-06, "loss": 0.6088, "step": 8759 }, { "epoch": 0.36367599209129264, "grad_norm": 2.4460999965667725, "learning_rate": 7.353647786165354e-06, "loss": 0.5275, "step": 8760 }, { "epoch": 0.363717507615504, "grad_norm": 2.79774808883667, "learning_rate": 7.353054596621332e-06, "loss": 0.4818, "step": 8761 }, { "epoch": 0.3637590231397153, "grad_norm": 2.5370914936065674, "learning_rate": 7.352461364533405e-06, "loss": 0.4666, "step": 8762 }, { "epoch": 0.36380053866392664, "grad_norm": 3.397732734680176, "learning_rate": 7.351868089912298e-06, "loss": 0.5903, "step": 8763 }, { "epoch": 0.363842054188138, "grad_norm": 2.48154616355896, "learning_rate": 7.35127477276874e-06, "loss": 0.5469, "step": 8764 }, { "epoch": 0.3638835697123493, "grad_norm": 2.1464455127716064, "learning_rate": 7.350681413113454e-06, "loss": 0.4405, "step": 8765 }, { "epoch": 0.36392508523656064, "grad_norm": 3.0062997341156006, "learning_rate": 7.350088010957171e-06, "loss": 0.5663, "step": 8766 }, { "epoch": 0.363966600760772, "grad_norm": 2.507903814315796, "learning_rate": 7.3494945663106185e-06, "loss": 0.4589, "step": 8767 }, { "epoch": 0.3640081162849833, "grad_norm": 2.327810287475586, "learning_rate": 7.34890107918453e-06, "loss": 0.4344, "step": 8768 }, { "epoch": 0.36404963180919464, "grad_norm": 3.187948226928711, "learning_rate": 7.3483075495896296e-06, "loss": 0.6581, "step": 8769 }, { "epoch": 0.364091147333406, "grad_norm": 3.32806658744812, "learning_rate": 7.347713977536652e-06, "loss": 0.5443, "step": 8770 }, { "epoch": 0.3641326628576173, "grad_norm": 3.0684030055999756, "learning_rate": 7.347120363036331e-06, "loss": 0.6041, "step": 8771 }, { "epoch": 0.36417417838182864, "grad_norm": 2.531273603439331, "learning_rate": 7.346526706099396e-06, "loss": 0.3852, "step": 8772 }, { "epoch": 0.36421569390604, "grad_norm": 2.668245553970337, "learning_rate": 7.345933006736583e-06, "loss": 0.6653, "step": 8773 }, { "epoch": 0.3642572094302513, "grad_norm": 2.0426735877990723, "learning_rate": 7.345339264958624e-06, "loss": 0.4148, "step": 8774 }, { "epoch": 0.36429872495446264, "grad_norm": 2.605269193649292, "learning_rate": 7.3447454807762565e-06, "loss": 0.4623, "step": 8775 }, { "epoch": 0.36434024047867397, "grad_norm": 2.6500842571258545, "learning_rate": 7.344151654200213e-06, "loss": 0.4905, "step": 8776 }, { "epoch": 0.3643817560028853, "grad_norm": 2.159973621368408, "learning_rate": 7.343557785241234e-06, "loss": 0.4451, "step": 8777 }, { "epoch": 0.36442327152709664, "grad_norm": 2.3804545402526855, "learning_rate": 7.342963873910054e-06, "loss": 0.5095, "step": 8778 }, { "epoch": 0.364464787051308, "grad_norm": 2.457991361618042, "learning_rate": 7.342369920217412e-06, "loss": 0.5151, "step": 8779 }, { "epoch": 0.36450630257551936, "grad_norm": 2.19848370552063, "learning_rate": 7.341775924174045e-06, "loss": 0.6155, "step": 8780 }, { "epoch": 0.3645478180997307, "grad_norm": 2.443281650543213, "learning_rate": 7.3411818857906955e-06, "loss": 0.5307, "step": 8781 }, { "epoch": 0.364589333623942, "grad_norm": 2.2333571910858154, "learning_rate": 7.340587805078103e-06, "loss": 0.6502, "step": 8782 }, { "epoch": 0.36463084914815336, "grad_norm": 2.3197360038757324, "learning_rate": 7.339993682047007e-06, "loss": 0.4197, "step": 8783 }, { "epoch": 0.3646723646723647, "grad_norm": 2.638735771179199, "learning_rate": 7.3393995167081525e-06, "loss": 0.5221, "step": 8784 }, { "epoch": 0.364713880196576, "grad_norm": 2.5094316005706787, "learning_rate": 7.338805309072279e-06, "loss": 0.6771, "step": 8785 }, { "epoch": 0.36475539572078736, "grad_norm": 2.3343253135681152, "learning_rate": 7.338211059150133e-06, "loss": 0.7353, "step": 8786 }, { "epoch": 0.3647969112449987, "grad_norm": 2.7456564903259277, "learning_rate": 7.337616766952455e-06, "loss": 0.4376, "step": 8787 }, { "epoch": 0.36483842676921, "grad_norm": 2.4399940967559814, "learning_rate": 7.3370224324899944e-06, "loss": 0.5639, "step": 8788 }, { "epoch": 0.36487994229342136, "grad_norm": 2.5275542736053467, "learning_rate": 7.336428055773493e-06, "loss": 0.6077, "step": 8789 }, { "epoch": 0.3649214578176327, "grad_norm": 2.461333990097046, "learning_rate": 7.3358336368137e-06, "loss": 0.5043, "step": 8790 }, { "epoch": 0.364962973341844, "grad_norm": 2.788147449493408, "learning_rate": 7.335239175621361e-06, "loss": 0.5375, "step": 8791 }, { "epoch": 0.36500448886605535, "grad_norm": 2.7457590103149414, "learning_rate": 7.334644672207225e-06, "loss": 0.4137, "step": 8792 }, { "epoch": 0.3650460043902667, "grad_norm": 2.696380138397217, "learning_rate": 7.33405012658204e-06, "loss": 0.5615, "step": 8793 }, { "epoch": 0.365087519914478, "grad_norm": 2.9394748210906982, "learning_rate": 7.333455538756557e-06, "loss": 0.6047, "step": 8794 }, { "epoch": 0.36512903543868935, "grad_norm": 2.4491922855377197, "learning_rate": 7.332860908741524e-06, "loss": 0.5066, "step": 8795 }, { "epoch": 0.3651705509629007, "grad_norm": 2.781630277633667, "learning_rate": 7.332266236547694e-06, "loss": 0.5493, "step": 8796 }, { "epoch": 0.365212066487112, "grad_norm": 2.602370500564575, "learning_rate": 7.331671522185817e-06, "loss": 0.4715, "step": 8797 }, { "epoch": 0.36525358201132335, "grad_norm": 2.5696117877960205, "learning_rate": 7.3310767656666474e-06, "loss": 0.4757, "step": 8798 }, { "epoch": 0.3652950975355347, "grad_norm": 2.342768430709839, "learning_rate": 7.330481967000938e-06, "loss": 0.5508, "step": 8799 }, { "epoch": 0.365336613059746, "grad_norm": 2.3682749271392822, "learning_rate": 7.329887126199443e-06, "loss": 0.4144, "step": 8800 }, { "epoch": 0.36537812858395735, "grad_norm": 2.4096479415893555, "learning_rate": 7.3292922432729185e-06, "loss": 0.5769, "step": 8801 }, { "epoch": 0.3654196441081687, "grad_norm": 2.4687931537628174, "learning_rate": 7.328697318232117e-06, "loss": 0.5857, "step": 8802 }, { "epoch": 0.36546115963238, "grad_norm": 2.3851559162139893, "learning_rate": 7.328102351087799e-06, "loss": 0.4976, "step": 8803 }, { "epoch": 0.36550267515659135, "grad_norm": 1.9932383298873901, "learning_rate": 7.327507341850717e-06, "loss": 0.5376, "step": 8804 }, { "epoch": 0.3655441906808027, "grad_norm": 2.4890785217285156, "learning_rate": 7.326912290531634e-06, "loss": 0.6389, "step": 8805 }, { "epoch": 0.365585706205014, "grad_norm": 2.5131468772888184, "learning_rate": 7.326317197141304e-06, "loss": 0.5332, "step": 8806 }, { "epoch": 0.36562722172922535, "grad_norm": 2.493673801422119, "learning_rate": 7.325722061690489e-06, "loss": 0.5649, "step": 8807 }, { "epoch": 0.3656687372534367, "grad_norm": 2.8517680168151855, "learning_rate": 7.325126884189948e-06, "loss": 0.4363, "step": 8808 }, { "epoch": 0.365710252777648, "grad_norm": 1.9220608472824097, "learning_rate": 7.324531664650445e-06, "loss": 0.5706, "step": 8809 }, { "epoch": 0.3657517683018594, "grad_norm": 2.6837356090545654, "learning_rate": 7.323936403082737e-06, "loss": 0.5178, "step": 8810 }, { "epoch": 0.36579328382607074, "grad_norm": 2.6669833660125732, "learning_rate": 7.3233410994975895e-06, "loss": 0.524, "step": 8811 }, { "epoch": 0.36583479935028207, "grad_norm": 2.1923465728759766, "learning_rate": 7.322745753905767e-06, "loss": 0.5533, "step": 8812 }, { "epoch": 0.3658763148744934, "grad_norm": 2.2918622493743896, "learning_rate": 7.32215036631803e-06, "loss": 0.4766, "step": 8813 }, { "epoch": 0.36591783039870474, "grad_norm": 2.355863571166992, "learning_rate": 7.321554936745145e-06, "loss": 0.6853, "step": 8814 }, { "epoch": 0.36595934592291607, "grad_norm": 2.078788995742798, "learning_rate": 7.32095946519788e-06, "loss": 0.4153, "step": 8815 }, { "epoch": 0.3660008614471274, "grad_norm": 2.3594446182250977, "learning_rate": 7.3203639516869975e-06, "loss": 0.4661, "step": 8816 }, { "epoch": 0.36604237697133873, "grad_norm": 2.395305871963501, "learning_rate": 7.319768396223266e-06, "loss": 0.5774, "step": 8817 }, { "epoch": 0.36608389249555007, "grad_norm": 2.6076951026916504, "learning_rate": 7.319172798817454e-06, "loss": 0.5251, "step": 8818 }, { "epoch": 0.3661254080197614, "grad_norm": 2.1950204372406006, "learning_rate": 7.3185771594803265e-06, "loss": 0.3548, "step": 8819 }, { "epoch": 0.36616692354397273, "grad_norm": 2.500803232192993, "learning_rate": 7.317981478222658e-06, "loss": 0.4715, "step": 8820 }, { "epoch": 0.36620843906818407, "grad_norm": 2.892017364501953, "learning_rate": 7.317385755055215e-06, "loss": 0.61, "step": 8821 }, { "epoch": 0.3662499545923954, "grad_norm": 2.1795382499694824, "learning_rate": 7.31678998998877e-06, "loss": 0.6106, "step": 8822 }, { "epoch": 0.36629147011660673, "grad_norm": 3.1193344593048096, "learning_rate": 7.316194183034096e-06, "loss": 0.5897, "step": 8823 }, { "epoch": 0.36633298564081807, "grad_norm": 2.289132833480835, "learning_rate": 7.315598334201961e-06, "loss": 0.5201, "step": 8824 }, { "epoch": 0.3663745011650294, "grad_norm": 2.7060625553131104, "learning_rate": 7.315002443503142e-06, "loss": 0.6171, "step": 8825 }, { "epoch": 0.36641601668924073, "grad_norm": 2.3465425968170166, "learning_rate": 7.314406510948412e-06, "loss": 0.5795, "step": 8826 }, { "epoch": 0.36645753221345206, "grad_norm": 2.591088056564331, "learning_rate": 7.313810536548543e-06, "loss": 0.7441, "step": 8827 }, { "epoch": 0.3664990477376634, "grad_norm": 2.7366278171539307, "learning_rate": 7.313214520314316e-06, "loss": 0.5885, "step": 8828 }, { "epoch": 0.36654056326187473, "grad_norm": 2.446974515914917, "learning_rate": 7.312618462256502e-06, "loss": 0.4443, "step": 8829 }, { "epoch": 0.36658207878608606, "grad_norm": 2.2902674674987793, "learning_rate": 7.312022362385879e-06, "loss": 0.4912, "step": 8830 }, { "epoch": 0.3666235943102974, "grad_norm": 1.963570475578308, "learning_rate": 7.311426220713226e-06, "loss": 0.3933, "step": 8831 }, { "epoch": 0.36666510983450873, "grad_norm": 2.964974880218506, "learning_rate": 7.31083003724932e-06, "loss": 0.6777, "step": 8832 }, { "epoch": 0.36670662535872006, "grad_norm": 2.7863376140594482, "learning_rate": 7.310233812004942e-06, "loss": 0.5678, "step": 8833 }, { "epoch": 0.3667481408829314, "grad_norm": 2.458967924118042, "learning_rate": 7.309637544990869e-06, "loss": 0.5784, "step": 8834 }, { "epoch": 0.36678965640714273, "grad_norm": 3.017298460006714, "learning_rate": 7.309041236217886e-06, "loss": 0.7824, "step": 8835 }, { "epoch": 0.36683117193135406, "grad_norm": 2.2896265983581543, "learning_rate": 7.3084448856967704e-06, "loss": 0.4345, "step": 8836 }, { "epoch": 0.3668726874555654, "grad_norm": 2.770745277404785, "learning_rate": 7.307848493438305e-06, "loss": 0.4836, "step": 8837 }, { "epoch": 0.3669142029797767, "grad_norm": 2.330984115600586, "learning_rate": 7.3072520594532754e-06, "loss": 0.473, "step": 8838 }, { "epoch": 0.36695571850398806, "grad_norm": 2.9562602043151855, "learning_rate": 7.306655583752464e-06, "loss": 0.4798, "step": 8839 }, { "epoch": 0.3669972340281994, "grad_norm": 1.9489785432815552, "learning_rate": 7.306059066346652e-06, "loss": 0.489, "step": 8840 }, { "epoch": 0.3670387495524108, "grad_norm": 2.911885976791382, "learning_rate": 7.30546250724663e-06, "loss": 0.465, "step": 8841 }, { "epoch": 0.3670802650766221, "grad_norm": 2.3686017990112305, "learning_rate": 7.304865906463181e-06, "loss": 0.4873, "step": 8842 }, { "epoch": 0.36712178060083345, "grad_norm": 2.3251214027404785, "learning_rate": 7.304269264007091e-06, "loss": 0.4496, "step": 8843 }, { "epoch": 0.3671632961250448, "grad_norm": 2.714709758758545, "learning_rate": 7.30367257988915e-06, "loss": 0.5075, "step": 8844 }, { "epoch": 0.3672048116492561, "grad_norm": 2.3017959594726562, "learning_rate": 7.303075854120143e-06, "loss": 0.4899, "step": 8845 }, { "epoch": 0.36724632717346745, "grad_norm": 2.160381317138672, "learning_rate": 7.302479086710862e-06, "loss": 0.4371, "step": 8846 }, { "epoch": 0.3672878426976788, "grad_norm": 2.837799310684204, "learning_rate": 7.301882277672095e-06, "loss": 0.5344, "step": 8847 }, { "epoch": 0.3673293582218901, "grad_norm": 2.098306179046631, "learning_rate": 7.301285427014633e-06, "loss": 0.3889, "step": 8848 }, { "epoch": 0.36737087374610145, "grad_norm": 2.6607208251953125, "learning_rate": 7.300688534749266e-06, "loss": 0.5564, "step": 8849 }, { "epoch": 0.3674123892703128, "grad_norm": 2.7054922580718994, "learning_rate": 7.300091600886788e-06, "loss": 0.3451, "step": 8850 }, { "epoch": 0.3674539047945241, "grad_norm": 2.144272804260254, "learning_rate": 7.299494625437992e-06, "loss": 0.374, "step": 8851 }, { "epoch": 0.36749542031873544, "grad_norm": 2.098215341567993, "learning_rate": 7.29889760841367e-06, "loss": 0.4114, "step": 8852 }, { "epoch": 0.3675369358429468, "grad_norm": 2.4827020168304443, "learning_rate": 7.2983005498246165e-06, "loss": 0.424, "step": 8853 }, { "epoch": 0.3675784513671581, "grad_norm": 2.9711081981658936, "learning_rate": 7.297703449681626e-06, "loss": 0.623, "step": 8854 }, { "epoch": 0.36761996689136944, "grad_norm": 2.561830759048462, "learning_rate": 7.297106307995496e-06, "loss": 0.3591, "step": 8855 }, { "epoch": 0.3676614824155808, "grad_norm": 2.809337854385376, "learning_rate": 7.296509124777021e-06, "loss": 0.6736, "step": 8856 }, { "epoch": 0.3677029979397921, "grad_norm": 3.2512564659118652, "learning_rate": 7.2959119000369995e-06, "loss": 0.629, "step": 8857 }, { "epoch": 0.36774451346400344, "grad_norm": 2.413282871246338, "learning_rate": 7.295314633786228e-06, "loss": 0.5247, "step": 8858 }, { "epoch": 0.3677860289882148, "grad_norm": 2.083742141723633, "learning_rate": 7.294717326035508e-06, "loss": 0.3542, "step": 8859 }, { "epoch": 0.3678275445124261, "grad_norm": 2.7388789653778076, "learning_rate": 7.294119976795635e-06, "loss": 0.5055, "step": 8860 }, { "epoch": 0.36786906003663744, "grad_norm": 2.454930305480957, "learning_rate": 7.293522586077414e-06, "loss": 0.4925, "step": 8861 }, { "epoch": 0.3679105755608488, "grad_norm": 2.12432861328125, "learning_rate": 7.292925153891642e-06, "loss": 0.5327, "step": 8862 }, { "epoch": 0.3679520910850601, "grad_norm": 2.949430465698242, "learning_rate": 7.292327680249123e-06, "loss": 0.4995, "step": 8863 }, { "epoch": 0.36799360660927144, "grad_norm": 2.2264404296875, "learning_rate": 7.2917301651606585e-06, "loss": 0.4198, "step": 8864 }, { "epoch": 0.3680351221334828, "grad_norm": 2.905937433242798, "learning_rate": 7.291132608637053e-06, "loss": 0.5459, "step": 8865 }, { "epoch": 0.3680766376576941, "grad_norm": 2.1918888092041016, "learning_rate": 7.290535010689109e-06, "loss": 0.4888, "step": 8866 }, { "epoch": 0.36811815318190544, "grad_norm": 2.121196746826172, "learning_rate": 7.289937371327631e-06, "loss": 0.5716, "step": 8867 }, { "epoch": 0.36815966870611677, "grad_norm": 2.4423704147338867, "learning_rate": 7.289339690563426e-06, "loss": 0.4781, "step": 8868 }, { "epoch": 0.3682011842303281, "grad_norm": 2.8954544067382812, "learning_rate": 7.2887419684073e-06, "loss": 0.5771, "step": 8869 }, { "epoch": 0.36824269975453944, "grad_norm": 2.403897523880005, "learning_rate": 7.288144204870059e-06, "loss": 0.5676, "step": 8870 }, { "epoch": 0.36828421527875077, "grad_norm": 2.6573047637939453, "learning_rate": 7.287546399962511e-06, "loss": 0.4243, "step": 8871 }, { "epoch": 0.36832573080296216, "grad_norm": 2.822634696960449, "learning_rate": 7.286948553695466e-06, "loss": 0.4015, "step": 8872 }, { "epoch": 0.3683672463271735, "grad_norm": 2.2628893852233887, "learning_rate": 7.28635066607973e-06, "loss": 0.5567, "step": 8873 }, { "epoch": 0.3684087618513848, "grad_norm": 3.2848424911499023, "learning_rate": 7.285752737126117e-06, "loss": 0.577, "step": 8874 }, { "epoch": 0.36845027737559616, "grad_norm": 2.1022276878356934, "learning_rate": 7.2851547668454335e-06, "loss": 0.616, "step": 8875 }, { "epoch": 0.3684917928998075, "grad_norm": 3.447307825088501, "learning_rate": 7.284556755248495e-06, "loss": 0.4669, "step": 8876 }, { "epoch": 0.3685333084240188, "grad_norm": 2.1045100688934326, "learning_rate": 7.283958702346111e-06, "loss": 0.4751, "step": 8877 }, { "epoch": 0.36857482394823016, "grad_norm": 2.3455915451049805, "learning_rate": 7.283360608149096e-06, "loss": 0.42, "step": 8878 }, { "epoch": 0.3686163394724415, "grad_norm": 2.244450807571411, "learning_rate": 7.2827624726682635e-06, "loss": 0.4701, "step": 8879 }, { "epoch": 0.3686578549966528, "grad_norm": 2.778153419494629, "learning_rate": 7.2821642959144255e-06, "loss": 0.6638, "step": 8880 }, { "epoch": 0.36869937052086416, "grad_norm": 2.198955535888672, "learning_rate": 7.281566077898401e-06, "loss": 0.3961, "step": 8881 }, { "epoch": 0.3687408860450755, "grad_norm": 2.569552183151245, "learning_rate": 7.2809678186310025e-06, "loss": 0.5653, "step": 8882 }, { "epoch": 0.3687824015692868, "grad_norm": 2.643770456314087, "learning_rate": 7.2803695181230506e-06, "loss": 0.6418, "step": 8883 }, { "epoch": 0.36882391709349815, "grad_norm": 2.2540018558502197, "learning_rate": 7.279771176385358e-06, "loss": 0.6512, "step": 8884 }, { "epoch": 0.3688654326177095, "grad_norm": 2.3945655822753906, "learning_rate": 7.279172793428748e-06, "loss": 0.6082, "step": 8885 }, { "epoch": 0.3689069481419208, "grad_norm": 2.4740006923675537, "learning_rate": 7.278574369264035e-06, "loss": 0.4887, "step": 8886 }, { "epoch": 0.36894846366613215, "grad_norm": 2.2247588634490967, "learning_rate": 7.277975903902043e-06, "loss": 0.5759, "step": 8887 }, { "epoch": 0.3689899791903435, "grad_norm": 2.256763458251953, "learning_rate": 7.277377397353588e-06, "loss": 0.6403, "step": 8888 }, { "epoch": 0.3690314947145548, "grad_norm": 2.5736329555511475, "learning_rate": 7.276778849629493e-06, "loss": 0.4313, "step": 8889 }, { "epoch": 0.36907301023876615, "grad_norm": 1.871288537979126, "learning_rate": 7.276180260740582e-06, "loss": 0.4053, "step": 8890 }, { "epoch": 0.3691145257629775, "grad_norm": 2.5281355381011963, "learning_rate": 7.275581630697675e-06, "loss": 0.521, "step": 8891 }, { "epoch": 0.3691560412871888, "grad_norm": 3.0754566192626953, "learning_rate": 7.274982959511596e-06, "loss": 0.5749, "step": 8892 }, { "epoch": 0.36919755681140015, "grad_norm": 2.1923575401306152, "learning_rate": 7.2743842471931696e-06, "loss": 0.5863, "step": 8893 }, { "epoch": 0.3692390723356115, "grad_norm": 2.2442500591278076, "learning_rate": 7.27378549375322e-06, "loss": 0.3662, "step": 8894 }, { "epoch": 0.3692805878598228, "grad_norm": 2.217538833618164, "learning_rate": 7.273186699202572e-06, "loss": 0.5186, "step": 8895 }, { "epoch": 0.36932210338403415, "grad_norm": 2.83453631401062, "learning_rate": 7.2725878635520564e-06, "loss": 0.5763, "step": 8896 }, { "epoch": 0.3693636189082455, "grad_norm": 2.1194283962249756, "learning_rate": 7.271988986812495e-06, "loss": 0.5806, "step": 8897 }, { "epoch": 0.3694051344324568, "grad_norm": 2.2525854110717773, "learning_rate": 7.271390068994719e-06, "loss": 0.4586, "step": 8898 }, { "epoch": 0.36944664995666815, "grad_norm": 2.6368703842163086, "learning_rate": 7.2707911101095545e-06, "loss": 0.5176, "step": 8899 }, { "epoch": 0.3694881654808795, "grad_norm": 2.19118332862854, "learning_rate": 7.270192110167834e-06, "loss": 0.4983, "step": 8900 }, { "epoch": 0.3695296810050908, "grad_norm": 2.3404600620269775, "learning_rate": 7.269593069180384e-06, "loss": 0.4957, "step": 8901 }, { "epoch": 0.36957119652930215, "grad_norm": 2.368623971939087, "learning_rate": 7.268993987158038e-06, "loss": 0.5607, "step": 8902 }, { "epoch": 0.36961271205351354, "grad_norm": 2.422976493835449, "learning_rate": 7.268394864111627e-06, "loss": 0.464, "step": 8903 }, { "epoch": 0.36965422757772487, "grad_norm": 2.1745522022247314, "learning_rate": 7.267795700051983e-06, "loss": 0.4541, "step": 8904 }, { "epoch": 0.3696957431019362, "grad_norm": 2.427903175354004, "learning_rate": 7.267196494989939e-06, "loss": 0.5198, "step": 8905 }, { "epoch": 0.36973725862614754, "grad_norm": 2.3039989471435547, "learning_rate": 7.266597248936329e-06, "loss": 0.5612, "step": 8906 }, { "epoch": 0.36977877415035887, "grad_norm": 2.640375852584839, "learning_rate": 7.265997961901987e-06, "loss": 0.5483, "step": 8907 }, { "epoch": 0.3698202896745702, "grad_norm": 2.4350736141204834, "learning_rate": 7.2653986338977486e-06, "loss": 0.5786, "step": 8908 }, { "epoch": 0.36986180519878153, "grad_norm": 2.7639453411102295, "learning_rate": 7.26479926493445e-06, "loss": 0.5484, "step": 8909 }, { "epoch": 0.36990332072299287, "grad_norm": 2.4880471229553223, "learning_rate": 7.264199855022929e-06, "loss": 0.6168, "step": 8910 }, { "epoch": 0.3699448362472042, "grad_norm": 2.7314565181732178, "learning_rate": 7.263600404174021e-06, "loss": 0.5553, "step": 8911 }, { "epoch": 0.36998635177141553, "grad_norm": 2.454352617263794, "learning_rate": 7.263000912398564e-06, "loss": 0.5771, "step": 8912 }, { "epoch": 0.37002786729562687, "grad_norm": 2.554090976715088, "learning_rate": 7.262401379707401e-06, "loss": 0.5449, "step": 8913 }, { "epoch": 0.3700693828198382, "grad_norm": 2.353170394897461, "learning_rate": 7.261801806111367e-06, "loss": 0.3983, "step": 8914 }, { "epoch": 0.37011089834404953, "grad_norm": 2.6254775524139404, "learning_rate": 7.261202191621306e-06, "loss": 0.5077, "step": 8915 }, { "epoch": 0.37015241386826087, "grad_norm": 2.354456901550293, "learning_rate": 7.260602536248056e-06, "loss": 0.6139, "step": 8916 }, { "epoch": 0.3701939293924722, "grad_norm": 2.714919328689575, "learning_rate": 7.2600028400024616e-06, "loss": 0.4888, "step": 8917 }, { "epoch": 0.37023544491668353, "grad_norm": 2.466395139694214, "learning_rate": 7.259403102895364e-06, "loss": 0.5814, "step": 8918 }, { "epoch": 0.37027696044089486, "grad_norm": 2.0965654850006104, "learning_rate": 7.258803324937607e-06, "loss": 0.5591, "step": 8919 }, { "epoch": 0.3703184759651062, "grad_norm": 2.459714651107788, "learning_rate": 7.258203506140036e-06, "loss": 0.4353, "step": 8920 }, { "epoch": 0.37035999148931753, "grad_norm": 2.346191644668579, "learning_rate": 7.257603646513492e-06, "loss": 0.5065, "step": 8921 }, { "epoch": 0.37040150701352886, "grad_norm": 2.582271099090576, "learning_rate": 7.257003746068826e-06, "loss": 0.5542, "step": 8922 }, { "epoch": 0.3704430225377402, "grad_norm": 2.3670785427093506, "learning_rate": 7.256403804816881e-06, "loss": 0.4475, "step": 8923 }, { "epoch": 0.37048453806195153, "grad_norm": 2.3612115383148193, "learning_rate": 7.255803822768504e-06, "loss": 0.4258, "step": 8924 }, { "epoch": 0.37052605358616286, "grad_norm": 2.3758363723754883, "learning_rate": 7.2552037999345445e-06, "loss": 0.5757, "step": 8925 }, { "epoch": 0.3705675691103742, "grad_norm": 2.267613649368286, "learning_rate": 7.25460373632585e-06, "loss": 0.5949, "step": 8926 }, { "epoch": 0.37060908463458553, "grad_norm": 2.637937545776367, "learning_rate": 7.25400363195327e-06, "loss": 0.4467, "step": 8927 }, { "epoch": 0.37065060015879686, "grad_norm": 2.1520233154296875, "learning_rate": 7.2534034868276546e-06, "loss": 0.5599, "step": 8928 }, { "epoch": 0.3706921156830082, "grad_norm": 2.725710868835449, "learning_rate": 7.252803300959855e-06, "loss": 0.4191, "step": 8929 }, { "epoch": 0.3707336312072195, "grad_norm": 2.2054049968719482, "learning_rate": 7.252203074360723e-06, "loss": 0.4432, "step": 8930 }, { "epoch": 0.37077514673143086, "grad_norm": 2.792029857635498, "learning_rate": 7.251602807041111e-06, "loss": 0.5017, "step": 8931 }, { "epoch": 0.3708166622556422, "grad_norm": 2.9845471382141113, "learning_rate": 7.251002499011869e-06, "loss": 0.6584, "step": 8932 }, { "epoch": 0.3708581777798536, "grad_norm": 2.4429945945739746, "learning_rate": 7.250402150283854e-06, "loss": 0.5691, "step": 8933 }, { "epoch": 0.3708996933040649, "grad_norm": 2.2782654762268066, "learning_rate": 7.249801760867918e-06, "loss": 0.4082, "step": 8934 }, { "epoch": 0.37094120882827625, "grad_norm": 2.491185426712036, "learning_rate": 7.2492013307749195e-06, "loss": 0.5325, "step": 8935 }, { "epoch": 0.3709827243524876, "grad_norm": 2.33307147026062, "learning_rate": 7.248600860015712e-06, "loss": 0.6684, "step": 8936 }, { "epoch": 0.3710242398766989, "grad_norm": 2.368178367614746, "learning_rate": 7.248000348601154e-06, "loss": 0.4736, "step": 8937 }, { "epoch": 0.37106575540091025, "grad_norm": 2.6075656414031982, "learning_rate": 7.2473997965421e-06, "loss": 0.498, "step": 8938 }, { "epoch": 0.3711072709251216, "grad_norm": 2.380403995513916, "learning_rate": 7.246799203849411e-06, "loss": 0.5449, "step": 8939 }, { "epoch": 0.3711487864493329, "grad_norm": 2.4186179637908936, "learning_rate": 7.246198570533944e-06, "loss": 0.4196, "step": 8940 }, { "epoch": 0.37119030197354425, "grad_norm": 2.2943081855773926, "learning_rate": 7.24559789660656e-06, "loss": 0.5216, "step": 8941 }, { "epoch": 0.3712318174977556, "grad_norm": 3.216364860534668, "learning_rate": 7.244997182078119e-06, "loss": 0.5455, "step": 8942 }, { "epoch": 0.3712733330219669, "grad_norm": 2.025731086730957, "learning_rate": 7.244396426959482e-06, "loss": 0.3874, "step": 8943 }, { "epoch": 0.37131484854617824, "grad_norm": 2.564450740814209, "learning_rate": 7.2437956312615095e-06, "loss": 0.5131, "step": 8944 }, { "epoch": 0.3713563640703896, "grad_norm": 2.4387807846069336, "learning_rate": 7.243194794995066e-06, "loss": 0.4523, "step": 8945 }, { "epoch": 0.3713978795946009, "grad_norm": 2.6329762935638428, "learning_rate": 7.242593918171014e-06, "loss": 0.5765, "step": 8946 }, { "epoch": 0.37143939511881224, "grad_norm": 2.4108991622924805, "learning_rate": 7.241993000800218e-06, "loss": 0.5012, "step": 8947 }, { "epoch": 0.3714809106430236, "grad_norm": 3.050107002258301, "learning_rate": 7.241392042893542e-06, "loss": 0.4469, "step": 8948 }, { "epoch": 0.3715224261672349, "grad_norm": 2.699772357940674, "learning_rate": 7.240791044461853e-06, "loss": 0.6381, "step": 8949 }, { "epoch": 0.37156394169144624, "grad_norm": 2.5841236114501953, "learning_rate": 7.240190005516015e-06, "loss": 0.5447, "step": 8950 }, { "epoch": 0.3716054572156576, "grad_norm": 2.7454657554626465, "learning_rate": 7.2395889260668935e-06, "loss": 0.5612, "step": 8951 }, { "epoch": 0.3716469727398689, "grad_norm": 2.2467422485351562, "learning_rate": 7.238987806125363e-06, "loss": 0.4542, "step": 8952 }, { "epoch": 0.37168848826408024, "grad_norm": 2.2600948810577393, "learning_rate": 7.238386645702284e-06, "loss": 0.5624, "step": 8953 }, { "epoch": 0.3717300037882916, "grad_norm": 2.6661605834960938, "learning_rate": 7.237785444808532e-06, "loss": 0.5365, "step": 8954 }, { "epoch": 0.3717715193125029, "grad_norm": 2.9497904777526855, "learning_rate": 7.237184203454972e-06, "loss": 0.5388, "step": 8955 }, { "epoch": 0.37181303483671424, "grad_norm": 2.396357297897339, "learning_rate": 7.2365829216524785e-06, "loss": 0.547, "step": 8956 }, { "epoch": 0.3718545503609256, "grad_norm": 2.628023862838745, "learning_rate": 7.235981599411921e-06, "loss": 0.524, "step": 8957 }, { "epoch": 0.3718960658851369, "grad_norm": 2.800846576690674, "learning_rate": 7.235380236744172e-06, "loss": 0.5343, "step": 8958 }, { "epoch": 0.37193758140934824, "grad_norm": 2.4246621131896973, "learning_rate": 7.234778833660103e-06, "loss": 0.5995, "step": 8959 }, { "epoch": 0.37197909693355957, "grad_norm": 2.2828307151794434, "learning_rate": 7.234177390170588e-06, "loss": 0.4862, "step": 8960 }, { "epoch": 0.3720206124577709, "grad_norm": 2.214452028274536, "learning_rate": 7.233575906286503e-06, "loss": 0.4165, "step": 8961 }, { "epoch": 0.37206212798198224, "grad_norm": 2.81257700920105, "learning_rate": 7.232974382018721e-06, "loss": 0.4739, "step": 8962 }, { "epoch": 0.37210364350619357, "grad_norm": 2.8386576175689697, "learning_rate": 7.232372817378119e-06, "loss": 0.5503, "step": 8963 }, { "epoch": 0.37214515903040496, "grad_norm": 2.1831109523773193, "learning_rate": 7.231771212375574e-06, "loss": 0.5612, "step": 8964 }, { "epoch": 0.3721866745546163, "grad_norm": 2.244832992553711, "learning_rate": 7.231169567021961e-06, "loss": 0.4572, "step": 8965 }, { "epoch": 0.3722281900788276, "grad_norm": 2.699486255645752, "learning_rate": 7.230567881328159e-06, "loss": 0.5541, "step": 8966 }, { "epoch": 0.37226970560303896, "grad_norm": 2.3110835552215576, "learning_rate": 7.2299661553050474e-06, "loss": 0.4581, "step": 8967 }, { "epoch": 0.3723112211272503, "grad_norm": 2.4234070777893066, "learning_rate": 7.229364388963503e-06, "loss": 0.5855, "step": 8968 }, { "epoch": 0.3723527366514616, "grad_norm": 2.1214027404785156, "learning_rate": 7.22876258231441e-06, "loss": 0.4057, "step": 8969 }, { "epoch": 0.37239425217567296, "grad_norm": 2.225693464279175, "learning_rate": 7.2281607353686474e-06, "loss": 0.5431, "step": 8970 }, { "epoch": 0.3724357676998843, "grad_norm": 2.258098602294922, "learning_rate": 7.227558848137096e-06, "loss": 0.5744, "step": 8971 }, { "epoch": 0.3724772832240956, "grad_norm": 2.326082706451416, "learning_rate": 7.226956920630639e-06, "loss": 0.6111, "step": 8972 }, { "epoch": 0.37251879874830696, "grad_norm": 2.39196515083313, "learning_rate": 7.226354952860157e-06, "loss": 0.4929, "step": 8973 }, { "epoch": 0.3725603142725183, "grad_norm": 2.197099208831787, "learning_rate": 7.225752944836538e-06, "loss": 0.5394, "step": 8974 }, { "epoch": 0.3726018297967296, "grad_norm": 2.475217819213867, "learning_rate": 7.225150896570663e-06, "loss": 0.4248, "step": 8975 }, { "epoch": 0.37264334532094096, "grad_norm": 2.3515148162841797, "learning_rate": 7.224548808073419e-06, "loss": 0.4719, "step": 8976 }, { "epoch": 0.3726848608451523, "grad_norm": 4.889882564544678, "learning_rate": 7.223946679355691e-06, "loss": 0.5981, "step": 8977 }, { "epoch": 0.3727263763693636, "grad_norm": 2.2173240184783936, "learning_rate": 7.223344510428366e-06, "loss": 0.3961, "step": 8978 }, { "epoch": 0.37276789189357495, "grad_norm": 2.8246142864227295, "learning_rate": 7.222742301302331e-06, "loss": 0.44, "step": 8979 }, { "epoch": 0.3728094074177863, "grad_norm": 2.4806652069091797, "learning_rate": 7.222140051988475e-06, "loss": 0.5272, "step": 8980 }, { "epoch": 0.3728509229419976, "grad_norm": 2.4774646759033203, "learning_rate": 7.221537762497687e-06, "loss": 0.5643, "step": 8981 }, { "epoch": 0.37289243846620895, "grad_norm": 2.4316928386688232, "learning_rate": 7.220935432840855e-06, "loss": 0.4526, "step": 8982 }, { "epoch": 0.3729339539904203, "grad_norm": 2.561461925506592, "learning_rate": 7.2203330630288714e-06, "loss": 0.5761, "step": 8983 }, { "epoch": 0.3729754695146316, "grad_norm": 2.2438433170318604, "learning_rate": 7.219730653072625e-06, "loss": 0.5313, "step": 8984 }, { "epoch": 0.37301698503884295, "grad_norm": 2.5390431880950928, "learning_rate": 7.21912820298301e-06, "loss": 0.5434, "step": 8985 }, { "epoch": 0.3730585005630543, "grad_norm": 2.2942564487457275, "learning_rate": 7.218525712770915e-06, "loss": 0.5707, "step": 8986 }, { "epoch": 0.3731000160872656, "grad_norm": 2.8552582263946533, "learning_rate": 7.217923182447237e-06, "loss": 0.586, "step": 8987 }, { "epoch": 0.37314153161147695, "grad_norm": 2.6855015754699707, "learning_rate": 7.217320612022869e-06, "loss": 0.6604, "step": 8988 }, { "epoch": 0.3731830471356883, "grad_norm": 2.4820237159729004, "learning_rate": 7.2167180015087045e-06, "loss": 0.4779, "step": 8989 }, { "epoch": 0.3732245626598996, "grad_norm": 2.656275510787964, "learning_rate": 7.21611535091564e-06, "loss": 0.5112, "step": 8990 }, { "epoch": 0.37326607818411095, "grad_norm": 2.1795759201049805, "learning_rate": 7.215512660254571e-06, "loss": 0.4883, "step": 8991 }, { "epoch": 0.3733075937083223, "grad_norm": 2.522758960723877, "learning_rate": 7.214909929536395e-06, "loss": 0.4868, "step": 8992 }, { "epoch": 0.3733491092325336, "grad_norm": 2.168048143386841, "learning_rate": 7.214307158772009e-06, "loss": 0.5974, "step": 8993 }, { "epoch": 0.37339062475674495, "grad_norm": 2.805579900741577, "learning_rate": 7.21370434797231e-06, "loss": 0.3791, "step": 8994 }, { "epoch": 0.37343214028095634, "grad_norm": 2.4526188373565674, "learning_rate": 7.2131014971482005e-06, "loss": 0.5163, "step": 8995 }, { "epoch": 0.37347365580516767, "grad_norm": 2.4678637981414795, "learning_rate": 7.2124986063105776e-06, "loss": 0.6126, "step": 8996 }, { "epoch": 0.373515171329379, "grad_norm": 2.2815592288970947, "learning_rate": 7.2118956754703405e-06, "loss": 0.4827, "step": 8997 }, { "epoch": 0.37355668685359034, "grad_norm": 2.38289213180542, "learning_rate": 7.211292704638394e-06, "loss": 0.5686, "step": 8998 }, { "epoch": 0.37359820237780167, "grad_norm": 2.5077438354492188, "learning_rate": 7.210689693825637e-06, "loss": 0.651, "step": 8999 }, { "epoch": 0.373639717902013, "grad_norm": 2.9209156036376953, "learning_rate": 7.210086643042973e-06, "loss": 0.5744, "step": 9000 }, { "epoch": 0.37368123342622434, "grad_norm": 2.5807881355285645, "learning_rate": 7.2094835523013055e-06, "loss": 0.5695, "step": 9001 }, { "epoch": 0.37372274895043567, "grad_norm": 2.8154296875, "learning_rate": 7.20888042161154e-06, "loss": 0.5108, "step": 9002 }, { "epoch": 0.373764264474647, "grad_norm": 2.813037395477295, "learning_rate": 7.208277250984577e-06, "loss": 0.5028, "step": 9003 }, { "epoch": 0.37380577999885833, "grad_norm": 2.6206560134887695, "learning_rate": 7.207674040431327e-06, "loss": 0.4552, "step": 9004 }, { "epoch": 0.37384729552306967, "grad_norm": 2.6152796745300293, "learning_rate": 7.207070789962693e-06, "loss": 0.5513, "step": 9005 }, { "epoch": 0.373888811047281, "grad_norm": 3.0092718601226807, "learning_rate": 7.206467499589584e-06, "loss": 0.5359, "step": 9006 }, { "epoch": 0.37393032657149233, "grad_norm": 2.734679698944092, "learning_rate": 7.205864169322905e-06, "loss": 0.5985, "step": 9007 }, { "epoch": 0.37397184209570367, "grad_norm": 2.7353241443634033, "learning_rate": 7.205260799173568e-06, "loss": 0.508, "step": 9008 }, { "epoch": 0.374013357619915, "grad_norm": 2.223362684249878, "learning_rate": 7.20465738915248e-06, "loss": 0.4839, "step": 9009 }, { "epoch": 0.37405487314412633, "grad_norm": 2.958364725112915, "learning_rate": 7.20405393927055e-06, "loss": 0.4733, "step": 9010 }, { "epoch": 0.37409638866833766, "grad_norm": 2.4356627464294434, "learning_rate": 7.203450449538689e-06, "loss": 0.4276, "step": 9011 }, { "epoch": 0.374137904192549, "grad_norm": 2.1557533740997314, "learning_rate": 7.20284691996781e-06, "loss": 0.5, "step": 9012 }, { "epoch": 0.37417941971676033, "grad_norm": 2.0517568588256836, "learning_rate": 7.202243350568823e-06, "loss": 0.364, "step": 9013 }, { "epoch": 0.37422093524097166, "grad_norm": 2.2388219833374023, "learning_rate": 7.201639741352641e-06, "loss": 0.4098, "step": 9014 }, { "epoch": 0.374262450765183, "grad_norm": 2.33204984664917, "learning_rate": 7.20103609233018e-06, "loss": 0.4574, "step": 9015 }, { "epoch": 0.37430396628939433, "grad_norm": 2.7513132095336914, "learning_rate": 7.2004324035123495e-06, "loss": 0.5629, "step": 9016 }, { "epoch": 0.37434548181360566, "grad_norm": 2.8467440605163574, "learning_rate": 7.199828674910068e-06, "loss": 0.5478, "step": 9017 }, { "epoch": 0.374386997337817, "grad_norm": 2.4188880920410156, "learning_rate": 7.199224906534249e-06, "loss": 0.4767, "step": 9018 }, { "epoch": 0.37442851286202833, "grad_norm": 2.7722532749176025, "learning_rate": 7.198621098395812e-06, "loss": 0.5381, "step": 9019 }, { "epoch": 0.37447002838623966, "grad_norm": 3.371272087097168, "learning_rate": 7.198017250505671e-06, "loss": 0.5826, "step": 9020 }, { "epoch": 0.374511543910451, "grad_norm": 2.173759698867798, "learning_rate": 7.1974133628747435e-06, "loss": 0.4781, "step": 9021 }, { "epoch": 0.3745530594346623, "grad_norm": 3.1722521781921387, "learning_rate": 7.19680943551395e-06, "loss": 0.3878, "step": 9022 }, { "epoch": 0.37459457495887366, "grad_norm": 2.2773187160491943, "learning_rate": 7.196205468434208e-06, "loss": 0.6622, "step": 9023 }, { "epoch": 0.374636090483085, "grad_norm": 2.9029293060302734, "learning_rate": 7.195601461646439e-06, "loss": 0.5325, "step": 9024 }, { "epoch": 0.3746776060072963, "grad_norm": 2.1461637020111084, "learning_rate": 7.194997415161562e-06, "loss": 0.5248, "step": 9025 }, { "epoch": 0.3747191215315077, "grad_norm": 2.5427496433258057, "learning_rate": 7.1943933289905e-06, "loss": 0.6248, "step": 9026 }, { "epoch": 0.37476063705571905, "grad_norm": 2.5766866207122803, "learning_rate": 7.193789203144172e-06, "loss": 0.5543, "step": 9027 }, { "epoch": 0.3748021525799304, "grad_norm": 3.305410861968994, "learning_rate": 7.193185037633505e-06, "loss": 0.6674, "step": 9028 }, { "epoch": 0.3748436681041417, "grad_norm": 2.6028146743774414, "learning_rate": 7.19258083246942e-06, "loss": 0.5977, "step": 9029 }, { "epoch": 0.37488518362835305, "grad_norm": 2.4133055210113525, "learning_rate": 7.191976587662841e-06, "loss": 0.5391, "step": 9030 }, { "epoch": 0.3749266991525644, "grad_norm": 2.059854030609131, "learning_rate": 7.1913723032246944e-06, "loss": 0.4303, "step": 9031 }, { "epoch": 0.3749682146767757, "grad_norm": 2.6492908000946045, "learning_rate": 7.190767979165905e-06, "loss": 0.5006, "step": 9032 }, { "epoch": 0.37500973020098705, "grad_norm": 2.699755907058716, "learning_rate": 7.190163615497399e-06, "loss": 0.4591, "step": 9033 }, { "epoch": 0.3750512457251984, "grad_norm": 2.6479249000549316, "learning_rate": 7.189559212230102e-06, "loss": 0.5602, "step": 9034 }, { "epoch": 0.3750927612494097, "grad_norm": 2.5992836952209473, "learning_rate": 7.188954769374945e-06, "loss": 0.4653, "step": 9035 }, { "epoch": 0.37513427677362104, "grad_norm": 2.2890024185180664, "learning_rate": 7.1883502869428535e-06, "loss": 0.5138, "step": 9036 }, { "epoch": 0.3751757922978324, "grad_norm": 2.344265937805176, "learning_rate": 7.187745764944759e-06, "loss": 0.5373, "step": 9037 }, { "epoch": 0.3752173078220437, "grad_norm": 2.88096284866333, "learning_rate": 7.18714120339159e-06, "loss": 0.5705, "step": 9038 }, { "epoch": 0.37525882334625504, "grad_norm": 2.284034013748169, "learning_rate": 7.186536602294278e-06, "loss": 0.5536, "step": 9039 }, { "epoch": 0.3753003388704664, "grad_norm": 2.9375879764556885, "learning_rate": 7.185931961663753e-06, "loss": 0.5301, "step": 9040 }, { "epoch": 0.3753418543946777, "grad_norm": 3.1235437393188477, "learning_rate": 7.1853272815109496e-06, "loss": 0.7474, "step": 9041 }, { "epoch": 0.37538336991888904, "grad_norm": 2.1332318782806396, "learning_rate": 7.1847225618467975e-06, "loss": 0.4719, "step": 9042 }, { "epoch": 0.3754248854431004, "grad_norm": 2.574159860610962, "learning_rate": 7.184117802682234e-06, "loss": 0.6224, "step": 9043 }, { "epoch": 0.3754664009673117, "grad_norm": 2.280468225479126, "learning_rate": 7.1835130040281885e-06, "loss": 0.6165, "step": 9044 }, { "epoch": 0.37550791649152304, "grad_norm": 2.755239486694336, "learning_rate": 7.182908165895601e-06, "loss": 0.5307, "step": 9045 }, { "epoch": 0.3755494320157344, "grad_norm": 2.3775174617767334, "learning_rate": 7.1823032882954035e-06, "loss": 0.5605, "step": 9046 }, { "epoch": 0.3755909475399457, "grad_norm": 2.5697946548461914, "learning_rate": 7.181698371238533e-06, "loss": 0.5545, "step": 9047 }, { "epoch": 0.37563246306415704, "grad_norm": 2.3948001861572266, "learning_rate": 7.181093414735929e-06, "loss": 0.4962, "step": 9048 }, { "epoch": 0.3756739785883684, "grad_norm": 2.356992483139038, "learning_rate": 7.180488418798526e-06, "loss": 0.4478, "step": 9049 }, { "epoch": 0.3757154941125797, "grad_norm": 3.0497100353240967, "learning_rate": 7.179883383437264e-06, "loss": 0.4938, "step": 9050 }, { "epoch": 0.37575700963679104, "grad_norm": 2.3630785942077637, "learning_rate": 7.179278308663082e-06, "loss": 0.4625, "step": 9051 }, { "epoch": 0.3757985251610024, "grad_norm": 2.5398974418640137, "learning_rate": 7.178673194486921e-06, "loss": 0.4627, "step": 9052 }, { "epoch": 0.3758400406852137, "grad_norm": 2.3352513313293457, "learning_rate": 7.178068040919718e-06, "loss": 0.5488, "step": 9053 }, { "epoch": 0.37588155620942504, "grad_norm": 2.601433038711548, "learning_rate": 7.177462847972419e-06, "loss": 0.3994, "step": 9054 }, { "epoch": 0.37592307173363637, "grad_norm": 2.6574018001556396, "learning_rate": 7.1768576156559634e-06, "loss": 0.5014, "step": 9055 }, { "epoch": 0.3759645872578477, "grad_norm": 2.2311887741088867, "learning_rate": 7.176252343981296e-06, "loss": 0.4445, "step": 9056 }, { "epoch": 0.3760061027820591, "grad_norm": 2.496912956237793, "learning_rate": 7.175647032959358e-06, "loss": 0.7267, "step": 9057 }, { "epoch": 0.3760476183062704, "grad_norm": 2.820842981338501, "learning_rate": 7.1750416826010945e-06, "loss": 0.5379, "step": 9058 }, { "epoch": 0.37608913383048176, "grad_norm": 2.126676559448242, "learning_rate": 7.174436292917451e-06, "loss": 0.516, "step": 9059 }, { "epoch": 0.3761306493546931, "grad_norm": 1.974894404411316, "learning_rate": 7.173830863919372e-06, "loss": 0.5217, "step": 9060 }, { "epoch": 0.3761721648789044, "grad_norm": 2.614565134048462, "learning_rate": 7.173225395617806e-06, "loss": 0.4725, "step": 9061 }, { "epoch": 0.37621368040311576, "grad_norm": 2.247227430343628, "learning_rate": 7.172619888023697e-06, "loss": 0.6068, "step": 9062 }, { "epoch": 0.3762551959273271, "grad_norm": 2.330869436264038, "learning_rate": 7.172014341147995e-06, "loss": 0.5908, "step": 9063 }, { "epoch": 0.3762967114515384, "grad_norm": 2.3230576515197754, "learning_rate": 7.171408755001645e-06, "loss": 0.5242, "step": 9064 }, { "epoch": 0.37633822697574976, "grad_norm": 2.0637874603271484, "learning_rate": 7.170803129595603e-06, "loss": 0.5305, "step": 9065 }, { "epoch": 0.3763797424999611, "grad_norm": 2.8073692321777344, "learning_rate": 7.170197464940811e-06, "loss": 0.4811, "step": 9066 }, { "epoch": 0.3764212580241724, "grad_norm": 2.3968207836151123, "learning_rate": 7.169591761048226e-06, "loss": 0.548, "step": 9067 }, { "epoch": 0.37646277354838376, "grad_norm": 2.6886022090911865, "learning_rate": 7.168986017928794e-06, "loss": 0.417, "step": 9068 }, { "epoch": 0.3765042890725951, "grad_norm": 2.217613458633423, "learning_rate": 7.168380235593473e-06, "loss": 0.4173, "step": 9069 }, { "epoch": 0.3765458045968064, "grad_norm": 2.3347158432006836, "learning_rate": 7.167774414053209e-06, "loss": 0.3872, "step": 9070 }, { "epoch": 0.37658732012101775, "grad_norm": 2.29421329498291, "learning_rate": 7.167168553318961e-06, "loss": 0.4516, "step": 9071 }, { "epoch": 0.3766288356452291, "grad_norm": 2.811870574951172, "learning_rate": 7.166562653401681e-06, "loss": 0.6436, "step": 9072 }, { "epoch": 0.3766703511694404, "grad_norm": 2.2727105617523193, "learning_rate": 7.165956714312323e-06, "loss": 0.4114, "step": 9073 }, { "epoch": 0.37671186669365175, "grad_norm": 2.23427677154541, "learning_rate": 7.165350736061843e-06, "loss": 0.3793, "step": 9074 }, { "epoch": 0.3767533822178631, "grad_norm": 2.412611246109009, "learning_rate": 7.164744718661198e-06, "loss": 0.4235, "step": 9075 }, { "epoch": 0.3767948977420744, "grad_norm": 2.3700716495513916, "learning_rate": 7.164138662121344e-06, "loss": 0.6186, "step": 9076 }, { "epoch": 0.37683641326628575, "grad_norm": 2.6004981994628906, "learning_rate": 7.1635325664532396e-06, "loss": 0.5609, "step": 9077 }, { "epoch": 0.3768779287904971, "grad_norm": 1.8560211658477783, "learning_rate": 7.162926431667843e-06, "loss": 0.4969, "step": 9078 }, { "epoch": 0.3769194443147084, "grad_norm": 2.1585259437561035, "learning_rate": 7.162320257776112e-06, "loss": 0.5382, "step": 9079 }, { "epoch": 0.37696095983891975, "grad_norm": 2.4325740337371826, "learning_rate": 7.161714044789009e-06, "loss": 0.533, "step": 9080 }, { "epoch": 0.3770024753631311, "grad_norm": 2.3041539192199707, "learning_rate": 7.161107792717492e-06, "loss": 0.5187, "step": 9081 }, { "epoch": 0.3770439908873424, "grad_norm": 2.6614608764648438, "learning_rate": 7.160501501572523e-06, "loss": 0.5079, "step": 9082 }, { "epoch": 0.37708550641155375, "grad_norm": 2.7359182834625244, "learning_rate": 7.1598951713650655e-06, "loss": 0.4945, "step": 9083 }, { "epoch": 0.3771270219357651, "grad_norm": 2.832836389541626, "learning_rate": 7.15928880210608e-06, "loss": 0.496, "step": 9084 }, { "epoch": 0.3771685374599764, "grad_norm": 2.439756393432617, "learning_rate": 7.158682393806531e-06, "loss": 0.4698, "step": 9085 }, { "epoch": 0.37721005298418775, "grad_norm": 2.2694849967956543, "learning_rate": 7.158075946477382e-06, "loss": 0.6476, "step": 9086 }, { "epoch": 0.3772515685083991, "grad_norm": 2.5296976566314697, "learning_rate": 7.157469460129597e-06, "loss": 0.5679, "step": 9087 }, { "epoch": 0.37729308403261047, "grad_norm": 2.1357123851776123, "learning_rate": 7.156862934774144e-06, "loss": 0.4147, "step": 9088 }, { "epoch": 0.3773345995568218, "grad_norm": 2.5917952060699463, "learning_rate": 7.156256370421985e-06, "loss": 0.5689, "step": 9089 }, { "epoch": 0.37737611508103314, "grad_norm": 2.7369022369384766, "learning_rate": 7.155649767084092e-06, "loss": 0.6002, "step": 9090 }, { "epoch": 0.37741763060524447, "grad_norm": 2.181741237640381, "learning_rate": 7.155043124771427e-06, "loss": 0.5293, "step": 9091 }, { "epoch": 0.3774591461294558, "grad_norm": 2.1796586513519287, "learning_rate": 7.1544364434949635e-06, "loss": 0.4384, "step": 9092 }, { "epoch": 0.37750066165366714, "grad_norm": 2.013031244277954, "learning_rate": 7.153829723265666e-06, "loss": 0.4726, "step": 9093 }, { "epoch": 0.37754217717787847, "grad_norm": 2.308133602142334, "learning_rate": 7.153222964094508e-06, "loss": 0.4694, "step": 9094 }, { "epoch": 0.3775836927020898, "grad_norm": 2.2983176708221436, "learning_rate": 7.152616165992458e-06, "loss": 0.5187, "step": 9095 }, { "epoch": 0.37762520822630113, "grad_norm": 2.2725117206573486, "learning_rate": 7.1520093289704866e-06, "loss": 0.4956, "step": 9096 }, { "epoch": 0.37766672375051247, "grad_norm": 2.4410436153411865, "learning_rate": 7.151402453039568e-06, "loss": 0.4808, "step": 9097 }, { "epoch": 0.3777082392747238, "grad_norm": 2.1599180698394775, "learning_rate": 7.1507955382106706e-06, "loss": 0.3522, "step": 9098 }, { "epoch": 0.37774975479893513, "grad_norm": 2.6313490867614746, "learning_rate": 7.150188584494773e-06, "loss": 0.4124, "step": 9099 }, { "epoch": 0.37779127032314647, "grad_norm": 2.433464288711548, "learning_rate": 7.149581591902843e-06, "loss": 0.4437, "step": 9100 }, { "epoch": 0.3778327858473578, "grad_norm": 2.459968328475952, "learning_rate": 7.148974560445859e-06, "loss": 0.4077, "step": 9101 }, { "epoch": 0.37787430137156913, "grad_norm": 2.3463985919952393, "learning_rate": 7.148367490134796e-06, "loss": 0.6766, "step": 9102 }, { "epoch": 0.37791581689578047, "grad_norm": 2.425550699234009, "learning_rate": 7.14776038098063e-06, "loss": 0.6348, "step": 9103 }, { "epoch": 0.3779573324199918, "grad_norm": 3.1239964962005615, "learning_rate": 7.147153232994336e-06, "loss": 0.5204, "step": 9104 }, { "epoch": 0.37799884794420313, "grad_norm": 2.4691741466522217, "learning_rate": 7.146546046186893e-06, "loss": 0.434, "step": 9105 }, { "epoch": 0.37804036346841446, "grad_norm": 2.1650359630584717, "learning_rate": 7.145938820569279e-06, "loss": 0.6007, "step": 9106 }, { "epoch": 0.3780818789926258, "grad_norm": 3.1222269535064697, "learning_rate": 7.145331556152472e-06, "loss": 0.61, "step": 9107 }, { "epoch": 0.37812339451683713, "grad_norm": 2.7019433975219727, "learning_rate": 7.144724252947454e-06, "loss": 0.4768, "step": 9108 }, { "epoch": 0.37816491004104846, "grad_norm": 2.8082118034362793, "learning_rate": 7.1441169109652005e-06, "loss": 0.513, "step": 9109 }, { "epoch": 0.3782064255652598, "grad_norm": 2.529536485671997, "learning_rate": 7.143509530216698e-06, "loss": 0.5495, "step": 9110 }, { "epoch": 0.37824794108947113, "grad_norm": 2.859884262084961, "learning_rate": 7.142902110712925e-06, "loss": 0.5901, "step": 9111 }, { "epoch": 0.37828945661368246, "grad_norm": 3.1659514904022217, "learning_rate": 7.142294652464864e-06, "loss": 0.6088, "step": 9112 }, { "epoch": 0.3783309721378938, "grad_norm": 2.3404481410980225, "learning_rate": 7.141687155483496e-06, "loss": 0.4859, "step": 9113 }, { "epoch": 0.37837248766210513, "grad_norm": 3.013546943664551, "learning_rate": 7.141079619779809e-06, "loss": 0.4998, "step": 9114 }, { "epoch": 0.37841400318631646, "grad_norm": 2.5701987743377686, "learning_rate": 7.1404720453647855e-06, "loss": 0.5032, "step": 9115 }, { "epoch": 0.3784555187105278, "grad_norm": 2.439847946166992, "learning_rate": 7.13986443224941e-06, "loss": 0.5958, "step": 9116 }, { "epoch": 0.3784970342347391, "grad_norm": 3.013176679611206, "learning_rate": 7.139256780444668e-06, "loss": 0.4773, "step": 9117 }, { "epoch": 0.37853854975895046, "grad_norm": 3.1407206058502197, "learning_rate": 7.138649089961547e-06, "loss": 0.5672, "step": 9118 }, { "epoch": 0.37858006528316185, "grad_norm": 2.5812904834747314, "learning_rate": 7.138041360811035e-06, "loss": 0.3859, "step": 9119 }, { "epoch": 0.3786215808073732, "grad_norm": 2.605273723602295, "learning_rate": 7.137433593004118e-06, "loss": 0.5378, "step": 9120 }, { "epoch": 0.3786630963315845, "grad_norm": 2.1202640533447266, "learning_rate": 7.136825786551786e-06, "loss": 0.4238, "step": 9121 }, { "epoch": 0.37870461185579585, "grad_norm": 2.5857222080230713, "learning_rate": 7.136217941465028e-06, "loss": 0.5959, "step": 9122 }, { "epoch": 0.3787461273800072, "grad_norm": 2.3787941932678223, "learning_rate": 7.1356100577548346e-06, "loss": 0.4868, "step": 9123 }, { "epoch": 0.3787876429042185, "grad_norm": 2.4769701957702637, "learning_rate": 7.135002135432195e-06, "loss": 0.5539, "step": 9124 }, { "epoch": 0.37882915842842985, "grad_norm": 3.2936952114105225, "learning_rate": 7.134394174508102e-06, "loss": 0.5828, "step": 9125 }, { "epoch": 0.3788706739526412, "grad_norm": 2.6463165283203125, "learning_rate": 7.133786174993547e-06, "loss": 0.4777, "step": 9126 }, { "epoch": 0.3789121894768525, "grad_norm": 2.9048497676849365, "learning_rate": 7.133178136899522e-06, "loss": 0.5238, "step": 9127 }, { "epoch": 0.37895370500106385, "grad_norm": 3.073976516723633, "learning_rate": 7.132570060237022e-06, "loss": 0.6459, "step": 9128 }, { "epoch": 0.3789952205252752, "grad_norm": 2.5868217945098877, "learning_rate": 7.131961945017041e-06, "loss": 0.5915, "step": 9129 }, { "epoch": 0.3790367360494865, "grad_norm": 2.544008255004883, "learning_rate": 7.1313537912505725e-06, "loss": 0.5645, "step": 9130 }, { "epoch": 0.37907825157369784, "grad_norm": 2.4894485473632812, "learning_rate": 7.130745598948613e-06, "loss": 0.5115, "step": 9131 }, { "epoch": 0.3791197670979092, "grad_norm": 2.3454208374023438, "learning_rate": 7.13013736812216e-06, "loss": 0.5651, "step": 9132 }, { "epoch": 0.3791612826221205, "grad_norm": 2.6328392028808594, "learning_rate": 7.129529098782208e-06, "loss": 0.643, "step": 9133 }, { "epoch": 0.37920279814633184, "grad_norm": 2.2596724033355713, "learning_rate": 7.128920790939758e-06, "loss": 0.5287, "step": 9134 }, { "epoch": 0.3792443136705432, "grad_norm": 2.573594093322754, "learning_rate": 7.1283124446058046e-06, "loss": 0.608, "step": 9135 }, { "epoch": 0.3792858291947545, "grad_norm": 2.6272151470184326, "learning_rate": 7.1277040597913515e-06, "loss": 0.6631, "step": 9136 }, { "epoch": 0.37932734471896584, "grad_norm": 2.287212371826172, "learning_rate": 7.127095636507394e-06, "loss": 0.5491, "step": 9137 }, { "epoch": 0.3793688602431772, "grad_norm": 2.5652413368225098, "learning_rate": 7.126487174764936e-06, "loss": 0.4494, "step": 9138 }, { "epoch": 0.3794103757673885, "grad_norm": 2.22796368598938, "learning_rate": 7.125878674574973e-06, "loss": 0.4625, "step": 9139 }, { "epoch": 0.37945189129159984, "grad_norm": 2.8403427600860596, "learning_rate": 7.125270135948514e-06, "loss": 0.5205, "step": 9140 }, { "epoch": 0.3794934068158112, "grad_norm": 2.18557071685791, "learning_rate": 7.1246615588965575e-06, "loss": 0.5865, "step": 9141 }, { "epoch": 0.3795349223400225, "grad_norm": 2.7703323364257812, "learning_rate": 7.1240529434301074e-06, "loss": 0.5528, "step": 9142 }, { "epoch": 0.37957643786423384, "grad_norm": 2.312706708908081, "learning_rate": 7.123444289560169e-06, "loss": 0.5343, "step": 9143 }, { "epoch": 0.3796179533884452, "grad_norm": 2.444601058959961, "learning_rate": 7.122835597297744e-06, "loss": 0.7181, "step": 9144 }, { "epoch": 0.3796594689126565, "grad_norm": 2.5102322101593018, "learning_rate": 7.122226866653841e-06, "loss": 0.4951, "step": 9145 }, { "epoch": 0.37970098443686784, "grad_norm": 2.9863100051879883, "learning_rate": 7.121618097639462e-06, "loss": 0.7227, "step": 9146 }, { "epoch": 0.37974249996107917, "grad_norm": 2.2664477825164795, "learning_rate": 7.121009290265619e-06, "loss": 0.4903, "step": 9147 }, { "epoch": 0.3797840154852905, "grad_norm": 2.3556694984436035, "learning_rate": 7.1204004445433165e-06, "loss": 0.5007, "step": 9148 }, { "epoch": 0.3798255310095019, "grad_norm": 2.311879873275757, "learning_rate": 7.1197915604835625e-06, "loss": 0.5574, "step": 9149 }, { "epoch": 0.3798670465337132, "grad_norm": 2.579085111618042, "learning_rate": 7.119182638097366e-06, "loss": 0.6076, "step": 9150 }, { "epoch": 0.37990856205792456, "grad_norm": 2.342550039291382, "learning_rate": 7.118573677395737e-06, "loss": 0.6588, "step": 9151 }, { "epoch": 0.3799500775821359, "grad_norm": 2.727189302444458, "learning_rate": 7.117964678389684e-06, "loss": 0.3949, "step": 9152 }, { "epoch": 0.3799915931063472, "grad_norm": 2.713740348815918, "learning_rate": 7.11735564109022e-06, "loss": 0.616, "step": 9153 }, { "epoch": 0.38003310863055856, "grad_norm": 2.441375494003296, "learning_rate": 7.1167465655083555e-06, "loss": 0.5075, "step": 9154 }, { "epoch": 0.3800746241547699, "grad_norm": 2.412977457046509, "learning_rate": 7.116137451655103e-06, "loss": 0.2908, "step": 9155 }, { "epoch": 0.3801161396789812, "grad_norm": 2.2443807125091553, "learning_rate": 7.115528299541475e-06, "loss": 0.405, "step": 9156 }, { "epoch": 0.38015765520319256, "grad_norm": 2.545916795730591, "learning_rate": 7.114919109178486e-06, "loss": 0.6157, "step": 9157 }, { "epoch": 0.3801991707274039, "grad_norm": 2.321065902709961, "learning_rate": 7.1143098805771505e-06, "loss": 0.421, "step": 9158 }, { "epoch": 0.3802406862516152, "grad_norm": 2.801943302154541, "learning_rate": 7.113700613748482e-06, "loss": 0.5425, "step": 9159 }, { "epoch": 0.38028220177582656, "grad_norm": 2.4716031551361084, "learning_rate": 7.113091308703498e-06, "loss": 0.4785, "step": 9160 }, { "epoch": 0.3803237173000379, "grad_norm": 2.683742046356201, "learning_rate": 7.112481965453213e-06, "loss": 0.636, "step": 9161 }, { "epoch": 0.3803652328242492, "grad_norm": 2.8898239135742188, "learning_rate": 7.111872584008647e-06, "loss": 0.5111, "step": 9162 }, { "epoch": 0.38040674834846055, "grad_norm": 2.464078664779663, "learning_rate": 7.111263164380815e-06, "loss": 0.4975, "step": 9163 }, { "epoch": 0.3804482638726719, "grad_norm": 2.76511549949646, "learning_rate": 7.110653706580737e-06, "loss": 0.4782, "step": 9164 }, { "epoch": 0.3804897793968832, "grad_norm": 2.864173650741577, "learning_rate": 7.11004421061943e-06, "loss": 0.5598, "step": 9165 }, { "epoch": 0.38053129492109455, "grad_norm": 2.6118667125701904, "learning_rate": 7.109434676507917e-06, "loss": 0.5077, "step": 9166 }, { "epoch": 0.3805728104453059, "grad_norm": 2.203962802886963, "learning_rate": 7.108825104257215e-06, "loss": 0.4111, "step": 9167 }, { "epoch": 0.3806143259695172, "grad_norm": 2.944533109664917, "learning_rate": 7.10821549387835e-06, "loss": 0.4962, "step": 9168 }, { "epoch": 0.38065584149372855, "grad_norm": 2.6985695362091064, "learning_rate": 7.107605845382339e-06, "loss": 0.6547, "step": 9169 }, { "epoch": 0.3806973570179399, "grad_norm": 2.461754322052002, "learning_rate": 7.1069961587802085e-06, "loss": 0.4888, "step": 9170 }, { "epoch": 0.3807388725421512, "grad_norm": 2.3746252059936523, "learning_rate": 7.106386434082979e-06, "loss": 0.3131, "step": 9171 }, { "epoch": 0.38078038806636255, "grad_norm": 2.385481357574463, "learning_rate": 7.105776671301676e-06, "loss": 0.5768, "step": 9172 }, { "epoch": 0.3808219035905739, "grad_norm": 2.503671646118164, "learning_rate": 7.1051668704473245e-06, "loss": 0.5769, "step": 9173 }, { "epoch": 0.3808634191147852, "grad_norm": 2.7185535430908203, "learning_rate": 7.104557031530948e-06, "loss": 0.3645, "step": 9174 }, { "epoch": 0.38090493463899655, "grad_norm": 2.145364284515381, "learning_rate": 7.1039471545635755e-06, "loss": 0.3939, "step": 9175 }, { "epoch": 0.3809464501632079, "grad_norm": 2.3687925338745117, "learning_rate": 7.103337239556231e-06, "loss": 0.586, "step": 9176 }, { "epoch": 0.3809879656874192, "grad_norm": 2.158450126647949, "learning_rate": 7.1027272865199426e-06, "loss": 0.542, "step": 9177 }, { "epoch": 0.38102948121163055, "grad_norm": 2.4351372718811035, "learning_rate": 7.102117295465739e-06, "loss": 0.5011, "step": 9178 }, { "epoch": 0.3810709967358419, "grad_norm": 2.686664342880249, "learning_rate": 7.101507266404649e-06, "loss": 0.5027, "step": 9179 }, { "epoch": 0.38111251226005327, "grad_norm": 3.11083984375, "learning_rate": 7.100897199347702e-06, "loss": 0.5218, "step": 9180 }, { "epoch": 0.3811540277842646, "grad_norm": 2.3742496967315674, "learning_rate": 7.1002870943059275e-06, "loss": 0.6452, "step": 9181 }, { "epoch": 0.38119554330847594, "grad_norm": 2.197443962097168, "learning_rate": 7.099676951290358e-06, "loss": 0.4965, "step": 9182 }, { "epoch": 0.38123705883268727, "grad_norm": 2.013395309448242, "learning_rate": 7.099066770312023e-06, "loss": 0.3973, "step": 9183 }, { "epoch": 0.3812785743568986, "grad_norm": 2.8255152702331543, "learning_rate": 7.098456551381956e-06, "loss": 0.5281, "step": 9184 }, { "epoch": 0.38132008988110994, "grad_norm": 2.405306100845337, "learning_rate": 7.097846294511189e-06, "loss": 0.5692, "step": 9185 }, { "epoch": 0.38136160540532127, "grad_norm": 2.8492178916931152, "learning_rate": 7.0972359997107575e-06, "loss": 0.4728, "step": 9186 }, { "epoch": 0.3814031209295326, "grad_norm": 2.911625385284424, "learning_rate": 7.096625666991695e-06, "loss": 0.5365, "step": 9187 }, { "epoch": 0.38144463645374393, "grad_norm": 2.231931209564209, "learning_rate": 7.0960152963650355e-06, "loss": 0.4612, "step": 9188 }, { "epoch": 0.38148615197795527, "grad_norm": 2.4729278087615967, "learning_rate": 7.095404887841815e-06, "loss": 0.446, "step": 9189 }, { "epoch": 0.3815276675021666, "grad_norm": 2.8112900257110596, "learning_rate": 7.094794441433071e-06, "loss": 0.4924, "step": 9190 }, { "epoch": 0.38156918302637793, "grad_norm": 2.6425106525421143, "learning_rate": 7.094183957149838e-06, "loss": 0.4318, "step": 9191 }, { "epoch": 0.38161069855058927, "grad_norm": 2.58671498298645, "learning_rate": 7.093573435003158e-06, "loss": 0.5179, "step": 9192 }, { "epoch": 0.3816522140748006, "grad_norm": 2.5824227333068848, "learning_rate": 7.092962875004065e-06, "loss": 0.5005, "step": 9193 }, { "epoch": 0.38169372959901193, "grad_norm": 2.1716718673706055, "learning_rate": 7.0923522771636015e-06, "loss": 0.4522, "step": 9194 }, { "epoch": 0.38173524512322327, "grad_norm": 2.2010560035705566, "learning_rate": 7.091741641492803e-06, "loss": 0.5184, "step": 9195 }, { "epoch": 0.3817767606474346, "grad_norm": 2.5475687980651855, "learning_rate": 7.091130968002715e-06, "loss": 0.5107, "step": 9196 }, { "epoch": 0.38181827617164593, "grad_norm": 2.3162577152252197, "learning_rate": 7.0905202567043756e-06, "loss": 0.5017, "step": 9197 }, { "epoch": 0.38185979169585726, "grad_norm": 2.9099035263061523, "learning_rate": 7.0899095076088265e-06, "loss": 0.4609, "step": 9198 }, { "epoch": 0.3819013072200686, "grad_norm": 2.6013739109039307, "learning_rate": 7.089298720727114e-06, "loss": 0.4789, "step": 9199 }, { "epoch": 0.38194282274427993, "grad_norm": 2.540637731552124, "learning_rate": 7.088687896070276e-06, "loss": 0.4502, "step": 9200 }, { "epoch": 0.38198433826849126, "grad_norm": 2.320098638534546, "learning_rate": 7.088077033649359e-06, "loss": 0.4865, "step": 9201 }, { "epoch": 0.3820258537927026, "grad_norm": 2.5973470211029053, "learning_rate": 7.087466133475408e-06, "loss": 0.3494, "step": 9202 }, { "epoch": 0.38206736931691393, "grad_norm": 2.307645320892334, "learning_rate": 7.086855195559468e-06, "loss": 0.4605, "step": 9203 }, { "epoch": 0.38210888484112526, "grad_norm": 2.8277573585510254, "learning_rate": 7.0862442199125836e-06, "loss": 0.4757, "step": 9204 }, { "epoch": 0.3821504003653366, "grad_norm": 2.8851280212402344, "learning_rate": 7.085633206545803e-06, "loss": 0.6236, "step": 9205 }, { "epoch": 0.38219191588954793, "grad_norm": 2.4314653873443604, "learning_rate": 7.085022155470173e-06, "loss": 0.5447, "step": 9206 }, { "epoch": 0.38223343141375926, "grad_norm": 2.167177200317383, "learning_rate": 7.084411066696742e-06, "loss": 0.5324, "step": 9207 }, { "epoch": 0.3822749469379706, "grad_norm": 2.3624398708343506, "learning_rate": 7.083799940236555e-06, "loss": 0.5435, "step": 9208 }, { "epoch": 0.3823164624621819, "grad_norm": 2.3876729011535645, "learning_rate": 7.0831887761006675e-06, "loss": 0.457, "step": 9209 }, { "epoch": 0.38235797798639326, "grad_norm": 2.2189109325408936, "learning_rate": 7.082577574300126e-06, "loss": 0.6077, "step": 9210 }, { "epoch": 0.38239949351060465, "grad_norm": 2.7048826217651367, "learning_rate": 7.08196633484598e-06, "loss": 0.5096, "step": 9211 }, { "epoch": 0.382441009034816, "grad_norm": 1.9370149374008179, "learning_rate": 7.081355057749285e-06, "loss": 0.5542, "step": 9212 }, { "epoch": 0.3824825245590273, "grad_norm": 3.0582547187805176, "learning_rate": 7.08074374302109e-06, "loss": 0.8022, "step": 9213 }, { "epoch": 0.38252404008323865, "grad_norm": 2.301386594772339, "learning_rate": 7.080132390672447e-06, "loss": 0.5069, "step": 9214 }, { "epoch": 0.38256555560745, "grad_norm": 2.601200580596924, "learning_rate": 7.0795210007144135e-06, "loss": 0.4769, "step": 9215 }, { "epoch": 0.3826070711316613, "grad_norm": 2.3213448524475098, "learning_rate": 7.078909573158039e-06, "loss": 0.5064, "step": 9216 }, { "epoch": 0.38264858665587265, "grad_norm": 2.8607821464538574, "learning_rate": 7.078298108014381e-06, "loss": 0.521, "step": 9217 }, { "epoch": 0.382690102180084, "grad_norm": 2.7809693813323975, "learning_rate": 7.0776866052944935e-06, "loss": 0.5983, "step": 9218 }, { "epoch": 0.3827316177042953, "grad_norm": 2.663058042526245, "learning_rate": 7.0770750650094335e-06, "loss": 0.5201, "step": 9219 }, { "epoch": 0.38277313322850665, "grad_norm": 2.2635791301727295, "learning_rate": 7.076463487170257e-06, "loss": 0.6228, "step": 9220 }, { "epoch": 0.382814648752718, "grad_norm": 2.8300726413726807, "learning_rate": 7.075851871788022e-06, "loss": 0.5366, "step": 9221 }, { "epoch": 0.3828561642769293, "grad_norm": 2.884626865386963, "learning_rate": 7.075240218873789e-06, "loss": 0.4426, "step": 9222 }, { "epoch": 0.38289767980114064, "grad_norm": 2.4273393154144287, "learning_rate": 7.074628528438613e-06, "loss": 0.6265, "step": 9223 }, { "epoch": 0.382939195325352, "grad_norm": 2.1502082347869873, "learning_rate": 7.074016800493555e-06, "loss": 0.5208, "step": 9224 }, { "epoch": 0.3829807108495633, "grad_norm": 2.724836826324463, "learning_rate": 7.0734050350496764e-06, "loss": 0.5438, "step": 9225 }, { "epoch": 0.38302222637377464, "grad_norm": 2.600214719772339, "learning_rate": 7.072793232118037e-06, "loss": 0.4556, "step": 9226 }, { "epoch": 0.383063741897986, "grad_norm": 2.5183939933776855, "learning_rate": 7.072181391709698e-06, "loss": 0.5023, "step": 9227 }, { "epoch": 0.3831052574221973, "grad_norm": 2.907575845718384, "learning_rate": 7.071569513835722e-06, "loss": 0.6054, "step": 9228 }, { "epoch": 0.38314677294640864, "grad_norm": 2.1647632122039795, "learning_rate": 7.070957598507173e-06, "loss": 0.399, "step": 9229 }, { "epoch": 0.38318828847062, "grad_norm": 2.317483901977539, "learning_rate": 7.070345645735113e-06, "loss": 0.5836, "step": 9230 }, { "epoch": 0.3832298039948313, "grad_norm": 2.516209840774536, "learning_rate": 7.069733655530609e-06, "loss": 0.5684, "step": 9231 }, { "epoch": 0.38327131951904264, "grad_norm": 2.32796573638916, "learning_rate": 7.0691216279047205e-06, "loss": 0.4272, "step": 9232 }, { "epoch": 0.383312835043254, "grad_norm": 2.881281614303589, "learning_rate": 7.068509562868519e-06, "loss": 0.4834, "step": 9233 }, { "epoch": 0.3833543505674653, "grad_norm": 2.9933407306671143, "learning_rate": 7.067897460433066e-06, "loss": 0.4343, "step": 9234 }, { "epoch": 0.38339586609167664, "grad_norm": 2.6931750774383545, "learning_rate": 7.067285320609434e-06, "loss": 0.4453, "step": 9235 }, { "epoch": 0.383437381615888, "grad_norm": 2.1327641010284424, "learning_rate": 7.066673143408686e-06, "loss": 0.5181, "step": 9236 }, { "epoch": 0.3834788971400993, "grad_norm": 2.752866268157959, "learning_rate": 7.066060928841891e-06, "loss": 0.5461, "step": 9237 }, { "epoch": 0.38352041266431064, "grad_norm": 2.5138256549835205, "learning_rate": 7.06544867692012e-06, "loss": 0.3903, "step": 9238 }, { "epoch": 0.38356192818852197, "grad_norm": 3.9217028617858887, "learning_rate": 7.064836387654443e-06, "loss": 0.5345, "step": 9239 }, { "epoch": 0.3836034437127333, "grad_norm": 2.9839797019958496, "learning_rate": 7.064224061055926e-06, "loss": 0.5157, "step": 9240 }, { "epoch": 0.38364495923694464, "grad_norm": 2.0917422771453857, "learning_rate": 7.063611697135645e-06, "loss": 0.4225, "step": 9241 }, { "epoch": 0.383686474761156, "grad_norm": 2.9987220764160156, "learning_rate": 7.062999295904669e-06, "loss": 0.6906, "step": 9242 }, { "epoch": 0.38372799028536736, "grad_norm": 2.1814162731170654, "learning_rate": 7.062386857374071e-06, "loss": 0.4444, "step": 9243 }, { "epoch": 0.3837695058095787, "grad_norm": 2.257685661315918, "learning_rate": 7.061774381554925e-06, "loss": 0.4641, "step": 9244 }, { "epoch": 0.38381102133379, "grad_norm": 2.3856513500213623, "learning_rate": 7.061161868458303e-06, "loss": 0.4644, "step": 9245 }, { "epoch": 0.38385253685800136, "grad_norm": 1.8686909675598145, "learning_rate": 7.060549318095281e-06, "loss": 0.4268, "step": 9246 }, { "epoch": 0.3838940523822127, "grad_norm": 3.139563798904419, "learning_rate": 7.059936730476933e-06, "loss": 0.6713, "step": 9247 }, { "epoch": 0.383935567906424, "grad_norm": 2.5323123931884766, "learning_rate": 7.059324105614336e-06, "loss": 0.5145, "step": 9248 }, { "epoch": 0.38397708343063536, "grad_norm": 2.088606595993042, "learning_rate": 7.0587114435185646e-06, "loss": 0.4428, "step": 9249 }, { "epoch": 0.3840185989548467, "grad_norm": 2.6634531021118164, "learning_rate": 7.058098744200697e-06, "loss": 0.6549, "step": 9250 }, { "epoch": 0.384060114479058, "grad_norm": 2.3545796871185303, "learning_rate": 7.05748600767181e-06, "loss": 0.3805, "step": 9251 }, { "epoch": 0.38410163000326936, "grad_norm": 2.2680821418762207, "learning_rate": 7.056873233942985e-06, "loss": 0.5084, "step": 9252 }, { "epoch": 0.3841431455274807, "grad_norm": 2.25701904296875, "learning_rate": 7.056260423025298e-06, "loss": 0.4327, "step": 9253 }, { "epoch": 0.384184661051692, "grad_norm": 1.8245712518692017, "learning_rate": 7.05564757492983e-06, "loss": 0.3971, "step": 9254 }, { "epoch": 0.38422617657590336, "grad_norm": 2.35036301612854, "learning_rate": 7.055034689667661e-06, "loss": 0.4604, "step": 9255 }, { "epoch": 0.3842676921001147, "grad_norm": 2.653029203414917, "learning_rate": 7.054421767249873e-06, "loss": 0.542, "step": 9256 }, { "epoch": 0.384309207624326, "grad_norm": 2.3331804275512695, "learning_rate": 7.053808807687548e-06, "loss": 0.5511, "step": 9257 }, { "epoch": 0.38435072314853735, "grad_norm": 2.3160741329193115, "learning_rate": 7.053195810991767e-06, "loss": 0.5146, "step": 9258 }, { "epoch": 0.3843922386727487, "grad_norm": 2.912696123123169, "learning_rate": 7.0525827771736135e-06, "loss": 0.5551, "step": 9259 }, { "epoch": 0.38443375419696, "grad_norm": 2.2333035469055176, "learning_rate": 7.051969706244172e-06, "loss": 0.5812, "step": 9260 }, { "epoch": 0.38447526972117135, "grad_norm": 2.2776143550872803, "learning_rate": 7.0513565982145275e-06, "loss": 0.5044, "step": 9261 }, { "epoch": 0.3845167852453827, "grad_norm": 2.2490158081054688, "learning_rate": 7.050743453095763e-06, "loss": 0.5524, "step": 9262 }, { "epoch": 0.384558300769594, "grad_norm": 2.0179176330566406, "learning_rate": 7.0501302708989675e-06, "loss": 0.6499, "step": 9263 }, { "epoch": 0.38459981629380535, "grad_norm": 2.086686372756958, "learning_rate": 7.049517051635224e-06, "loss": 0.5292, "step": 9264 }, { "epoch": 0.3846413318180167, "grad_norm": 2.3808200359344482, "learning_rate": 7.048903795315622e-06, "loss": 0.3405, "step": 9265 }, { "epoch": 0.384682847342228, "grad_norm": 2.4830167293548584, "learning_rate": 7.04829050195125e-06, "loss": 0.5172, "step": 9266 }, { "epoch": 0.38472436286643935, "grad_norm": 2.415731430053711, "learning_rate": 7.0476771715531944e-06, "loss": 0.4996, "step": 9267 }, { "epoch": 0.3847658783906507, "grad_norm": 2.664306163787842, "learning_rate": 7.047063804132546e-06, "loss": 0.5867, "step": 9268 }, { "epoch": 0.384807393914862, "grad_norm": 2.359196901321411, "learning_rate": 7.046450399700393e-06, "loss": 0.3205, "step": 9269 }, { "epoch": 0.38484890943907335, "grad_norm": 2.2144861221313477, "learning_rate": 7.0458369582678276e-06, "loss": 0.5352, "step": 9270 }, { "epoch": 0.3848904249632847, "grad_norm": 2.2323977947235107, "learning_rate": 7.0452234798459395e-06, "loss": 0.4965, "step": 9271 }, { "epoch": 0.384931940487496, "grad_norm": 2.148348569869995, "learning_rate": 7.044609964445822e-06, "loss": 0.481, "step": 9272 }, { "epoch": 0.3849734560117074, "grad_norm": 2.4304866790771484, "learning_rate": 7.0439964120785665e-06, "loss": 0.4309, "step": 9273 }, { "epoch": 0.38501497153591874, "grad_norm": 2.597397565841675, "learning_rate": 7.043382822755268e-06, "loss": 0.5146, "step": 9274 }, { "epoch": 0.38505648706013007, "grad_norm": 3.045077085494995, "learning_rate": 7.042769196487018e-06, "loss": 0.6925, "step": 9275 }, { "epoch": 0.3850980025843414, "grad_norm": 2.5410892963409424, "learning_rate": 7.0421555332849136e-06, "loss": 0.5142, "step": 9276 }, { "epoch": 0.38513951810855274, "grad_norm": 2.7969775199890137, "learning_rate": 7.041541833160046e-06, "loss": 0.6319, "step": 9277 }, { "epoch": 0.38518103363276407, "grad_norm": 2.3504364490509033, "learning_rate": 7.040928096123516e-06, "loss": 0.5272, "step": 9278 }, { "epoch": 0.3852225491569754, "grad_norm": 2.3136096000671387, "learning_rate": 7.040314322186417e-06, "loss": 0.4753, "step": 9279 }, { "epoch": 0.38526406468118674, "grad_norm": 2.462811231613159, "learning_rate": 7.039700511359846e-06, "loss": 0.5524, "step": 9280 }, { "epoch": 0.38530558020539807, "grad_norm": 3.050126552581787, "learning_rate": 7.0390866636549035e-06, "loss": 0.5712, "step": 9281 }, { "epoch": 0.3853470957296094, "grad_norm": 2.4814770221710205, "learning_rate": 7.038472779082685e-06, "loss": 0.6187, "step": 9282 }, { "epoch": 0.38538861125382073, "grad_norm": 2.540342330932617, "learning_rate": 7.037858857654292e-06, "loss": 0.4581, "step": 9283 }, { "epoch": 0.38543012677803207, "grad_norm": 2.5962116718292236, "learning_rate": 7.0372448993808214e-06, "loss": 0.6236, "step": 9284 }, { "epoch": 0.3854716423022434, "grad_norm": 2.3605520725250244, "learning_rate": 7.036630904273379e-06, "loss": 0.5678, "step": 9285 }, { "epoch": 0.38551315782645473, "grad_norm": 2.1552393436431885, "learning_rate": 7.036016872343061e-06, "loss": 0.5308, "step": 9286 }, { "epoch": 0.38555467335066607, "grad_norm": 3.112888813018799, "learning_rate": 7.035402803600971e-06, "loss": 0.5409, "step": 9287 }, { "epoch": 0.3855961888748774, "grad_norm": 2.207352876663208, "learning_rate": 7.034788698058213e-06, "loss": 0.4275, "step": 9288 }, { "epoch": 0.38563770439908873, "grad_norm": 2.2076146602630615, "learning_rate": 7.034174555725888e-06, "loss": 0.4743, "step": 9289 }, { "epoch": 0.38567921992330007, "grad_norm": 2.7076754570007324, "learning_rate": 7.0335603766151e-06, "loss": 0.4309, "step": 9290 }, { "epoch": 0.3857207354475114, "grad_norm": 2.135849714279175, "learning_rate": 7.032946160736956e-06, "loss": 0.5302, "step": 9291 }, { "epoch": 0.38576225097172273, "grad_norm": 2.608015298843384, "learning_rate": 7.0323319081025584e-06, "loss": 0.5734, "step": 9292 }, { "epoch": 0.38580376649593406, "grad_norm": 2.581221103668213, "learning_rate": 7.031717618723013e-06, "loss": 0.5365, "step": 9293 }, { "epoch": 0.3858452820201454, "grad_norm": 2.50752329826355, "learning_rate": 7.031103292609429e-06, "loss": 0.5201, "step": 9294 }, { "epoch": 0.38588679754435673, "grad_norm": 2.4064621925354004, "learning_rate": 7.0304889297729115e-06, "loss": 0.4825, "step": 9295 }, { "epoch": 0.38592831306856806, "grad_norm": 2.3635404109954834, "learning_rate": 7.0298745302245696e-06, "loss": 0.5904, "step": 9296 }, { "epoch": 0.3859698285927794, "grad_norm": 3.1123790740966797, "learning_rate": 7.02926009397551e-06, "loss": 0.5344, "step": 9297 }, { "epoch": 0.38601134411699073, "grad_norm": 2.5917186737060547, "learning_rate": 7.0286456210368435e-06, "loss": 0.6513, "step": 9298 }, { "epoch": 0.38605285964120206, "grad_norm": 2.687919855117798, "learning_rate": 7.02803111141968e-06, "loss": 0.452, "step": 9299 }, { "epoch": 0.3860943751654134, "grad_norm": 2.267745018005371, "learning_rate": 7.0274165651351285e-06, "loss": 0.4533, "step": 9300 }, { "epoch": 0.38613589068962473, "grad_norm": 2.8081724643707275, "learning_rate": 7.026801982194301e-06, "loss": 0.5517, "step": 9301 }, { "epoch": 0.38617740621383606, "grad_norm": 2.926140308380127, "learning_rate": 7.026187362608311e-06, "loss": 0.6442, "step": 9302 }, { "epoch": 0.3862189217380474, "grad_norm": 2.7114498615264893, "learning_rate": 7.025572706388268e-06, "loss": 0.5133, "step": 9303 }, { "epoch": 0.3862604372622588, "grad_norm": 2.3813743591308594, "learning_rate": 7.024958013545287e-06, "loss": 0.5319, "step": 9304 }, { "epoch": 0.3863019527864701, "grad_norm": 2.4634201526641846, "learning_rate": 7.024343284090482e-06, "loss": 0.4419, "step": 9305 }, { "epoch": 0.38634346831068145, "grad_norm": 2.066337823867798, "learning_rate": 7.023728518034967e-06, "loss": 0.4161, "step": 9306 }, { "epoch": 0.3863849838348928, "grad_norm": 2.7009479999542236, "learning_rate": 7.023113715389857e-06, "loss": 0.6459, "step": 9307 }, { "epoch": 0.3864264993591041, "grad_norm": 2.467243194580078, "learning_rate": 7.022498876166267e-06, "loss": 0.5774, "step": 9308 }, { "epoch": 0.38646801488331545, "grad_norm": 2.052541971206665, "learning_rate": 7.021884000375315e-06, "loss": 0.4844, "step": 9309 }, { "epoch": 0.3865095304075268, "grad_norm": 3.0839226245880127, "learning_rate": 7.021269088028118e-06, "loss": 0.639, "step": 9310 }, { "epoch": 0.3865510459317381, "grad_norm": 2.5141379833221436, "learning_rate": 7.020654139135792e-06, "loss": 0.629, "step": 9311 }, { "epoch": 0.38659256145594945, "grad_norm": 2.204554557800293, "learning_rate": 7.020039153709457e-06, "loss": 0.6163, "step": 9312 }, { "epoch": 0.3866340769801608, "grad_norm": 2.7407479286193848, "learning_rate": 7.019424131760233e-06, "loss": 0.5216, "step": 9313 }, { "epoch": 0.3866755925043721, "grad_norm": 2.259765148162842, "learning_rate": 7.018809073299238e-06, "loss": 0.4558, "step": 9314 }, { "epoch": 0.38671710802858345, "grad_norm": 2.5118465423583984, "learning_rate": 7.0181939783375926e-06, "loss": 0.4036, "step": 9315 }, { "epoch": 0.3867586235527948, "grad_norm": 2.5032684803009033, "learning_rate": 7.017578846886419e-06, "loss": 0.5811, "step": 9316 }, { "epoch": 0.3868001390770061, "grad_norm": 2.2591309547424316, "learning_rate": 7.0169636789568386e-06, "loss": 0.4456, "step": 9317 }, { "epoch": 0.38684165460121744, "grad_norm": 2.699512004852295, "learning_rate": 7.016348474559973e-06, "loss": 0.5295, "step": 9318 }, { "epoch": 0.3868831701254288, "grad_norm": 2.6378555297851562, "learning_rate": 7.015733233706946e-06, "loss": 0.6046, "step": 9319 }, { "epoch": 0.3869246856496401, "grad_norm": 2.8561155796051025, "learning_rate": 7.015117956408881e-06, "loss": 0.4409, "step": 9320 }, { "epoch": 0.38696620117385144, "grad_norm": 2.3723371028900146, "learning_rate": 7.0145026426769025e-06, "loss": 0.6805, "step": 9321 }, { "epoch": 0.3870077166980628, "grad_norm": 2.3223683834075928, "learning_rate": 7.013887292522136e-06, "loss": 0.4621, "step": 9322 }, { "epoch": 0.3870492322222741, "grad_norm": 2.9136478900909424, "learning_rate": 7.013271905955705e-06, "loss": 0.6978, "step": 9323 }, { "epoch": 0.38709074774648544, "grad_norm": 2.7468292713165283, "learning_rate": 7.01265648298874e-06, "loss": 0.4846, "step": 9324 }, { "epoch": 0.3871322632706968, "grad_norm": 2.72280216217041, "learning_rate": 7.0120410236323645e-06, "loss": 0.5587, "step": 9325 }, { "epoch": 0.3871737787949081, "grad_norm": 2.3747646808624268, "learning_rate": 7.011425527897709e-06, "loss": 0.5218, "step": 9326 }, { "epoch": 0.38721529431911944, "grad_norm": 2.536167621612549, "learning_rate": 7.010809995795897e-06, "loss": 0.4684, "step": 9327 }, { "epoch": 0.3872568098433308, "grad_norm": 2.4411730766296387, "learning_rate": 7.010194427338064e-06, "loss": 0.6495, "step": 9328 }, { "epoch": 0.3872983253675421, "grad_norm": 2.337357521057129, "learning_rate": 7.0095788225353335e-06, "loss": 0.6208, "step": 9329 }, { "epoch": 0.38733984089175344, "grad_norm": 2.750321388244629, "learning_rate": 7.00896318139884e-06, "loss": 0.4964, "step": 9330 }, { "epoch": 0.3873813564159648, "grad_norm": 3.261516809463501, "learning_rate": 7.008347503939714e-06, "loss": 0.381, "step": 9331 }, { "epoch": 0.3874228719401761, "grad_norm": 2.3540608882904053, "learning_rate": 7.007731790169086e-06, "loss": 0.5612, "step": 9332 }, { "epoch": 0.38746438746438744, "grad_norm": 2.1603403091430664, "learning_rate": 7.007116040098087e-06, "loss": 0.5013, "step": 9333 }, { "epoch": 0.38750590298859877, "grad_norm": 3.3103959560394287, "learning_rate": 7.006500253737853e-06, "loss": 0.4911, "step": 9334 }, { "epoch": 0.38754741851281016, "grad_norm": 2.1316041946411133, "learning_rate": 7.0058844310995154e-06, "loss": 0.4435, "step": 9335 }, { "epoch": 0.3875889340370215, "grad_norm": 2.311152696609497, "learning_rate": 7.005268572194208e-06, "loss": 0.4596, "step": 9336 }, { "epoch": 0.3876304495612328, "grad_norm": 2.4743711948394775, "learning_rate": 7.004652677033069e-06, "loss": 0.3897, "step": 9337 }, { "epoch": 0.38767196508544416, "grad_norm": 2.1715569496154785, "learning_rate": 7.00403674562723e-06, "loss": 0.4801, "step": 9338 }, { "epoch": 0.3877134806096555, "grad_norm": 2.4019112586975098, "learning_rate": 7.00342077798783e-06, "loss": 0.5643, "step": 9339 }, { "epoch": 0.3877549961338668, "grad_norm": 2.325188636779785, "learning_rate": 7.002804774126005e-06, "loss": 0.5227, "step": 9340 }, { "epoch": 0.38779651165807816, "grad_norm": 1.8763164281845093, "learning_rate": 7.002188734052893e-06, "loss": 0.4195, "step": 9341 }, { "epoch": 0.3878380271822895, "grad_norm": 2.2743947505950928, "learning_rate": 7.0015726577796305e-06, "loss": 0.4896, "step": 9342 }, { "epoch": 0.3878795427065008, "grad_norm": 3.1107559204101562, "learning_rate": 7.000956545317358e-06, "loss": 0.5586, "step": 9343 }, { "epoch": 0.38792105823071216, "grad_norm": 2.512084722518921, "learning_rate": 7.000340396677214e-06, "loss": 0.5323, "step": 9344 }, { "epoch": 0.3879625737549235, "grad_norm": 2.173018217086792, "learning_rate": 6.999724211870339e-06, "loss": 0.5195, "step": 9345 }, { "epoch": 0.3880040892791348, "grad_norm": 2.654690742492676, "learning_rate": 6.999107990907875e-06, "loss": 0.5216, "step": 9346 }, { "epoch": 0.38804560480334616, "grad_norm": 3.413255453109741, "learning_rate": 6.998491733800961e-06, "loss": 0.685, "step": 9347 }, { "epoch": 0.3880871203275575, "grad_norm": 2.5251622200012207, "learning_rate": 6.997875440560741e-06, "loss": 0.5786, "step": 9348 }, { "epoch": 0.3881286358517688, "grad_norm": 2.4596405029296875, "learning_rate": 6.997259111198357e-06, "loss": 0.3282, "step": 9349 }, { "epoch": 0.38817015137598015, "grad_norm": 2.5538573265075684, "learning_rate": 6.996642745724953e-06, "loss": 0.5093, "step": 9350 }, { "epoch": 0.3882116669001915, "grad_norm": 2.6042747497558594, "learning_rate": 6.996026344151672e-06, "loss": 0.4811, "step": 9351 }, { "epoch": 0.3882531824244028, "grad_norm": 2.2886483669281006, "learning_rate": 6.99540990648966e-06, "loss": 0.5307, "step": 9352 }, { "epoch": 0.38829469794861415, "grad_norm": 2.292450428009033, "learning_rate": 6.99479343275006e-06, "loss": 0.473, "step": 9353 }, { "epoch": 0.3883362134728255, "grad_norm": 2.206774950027466, "learning_rate": 6.994176922944021e-06, "loss": 0.6681, "step": 9354 }, { "epoch": 0.3883777289970368, "grad_norm": 2.034423351287842, "learning_rate": 6.993560377082689e-06, "loss": 0.5612, "step": 9355 }, { "epoch": 0.38841924452124815, "grad_norm": 2.3934316635131836, "learning_rate": 6.9929437951772105e-06, "loss": 0.4734, "step": 9356 }, { "epoch": 0.3884607600454595, "grad_norm": 2.2379255294799805, "learning_rate": 6.992327177238734e-06, "loss": 0.7364, "step": 9357 }, { "epoch": 0.3885022755696708, "grad_norm": 2.0429394245147705, "learning_rate": 6.9917105232784075e-06, "loss": 0.4814, "step": 9358 }, { "epoch": 0.38854379109388215, "grad_norm": 2.790820598602295, "learning_rate": 6.9910938333073805e-06, "loss": 0.489, "step": 9359 }, { "epoch": 0.3885853066180935, "grad_norm": 2.222943067550659, "learning_rate": 6.9904771073368035e-06, "loss": 0.5446, "step": 9360 }, { "epoch": 0.3886268221423048, "grad_norm": 2.9950363636016846, "learning_rate": 6.989860345377827e-06, "loss": 0.6129, "step": 9361 }, { "epoch": 0.38866833766651615, "grad_norm": 2.0570666790008545, "learning_rate": 6.989243547441601e-06, "loss": 0.492, "step": 9362 }, { "epoch": 0.3887098531907275, "grad_norm": 2.481978416442871, "learning_rate": 6.98862671353928e-06, "loss": 0.5933, "step": 9363 }, { "epoch": 0.3887513687149388, "grad_norm": 3.2809038162231445, "learning_rate": 6.988009843682012e-06, "loss": 0.6484, "step": 9364 }, { "epoch": 0.3887928842391502, "grad_norm": 1.9913889169692993, "learning_rate": 6.987392937880954e-06, "loss": 0.3939, "step": 9365 }, { "epoch": 0.38883439976336154, "grad_norm": 2.3539252281188965, "learning_rate": 6.9867759961472596e-06, "loss": 0.4006, "step": 9366 }, { "epoch": 0.38887591528757287, "grad_norm": 2.319288969039917, "learning_rate": 6.9861590184920824e-06, "loss": 0.6009, "step": 9367 }, { "epoch": 0.3889174308117842, "grad_norm": 2.4447193145751953, "learning_rate": 6.985542004926578e-06, "loss": 0.4952, "step": 9368 }, { "epoch": 0.38895894633599554, "grad_norm": 2.348484992980957, "learning_rate": 6.984924955461901e-06, "loss": 0.4829, "step": 9369 }, { "epoch": 0.38900046186020687, "grad_norm": 2.570101261138916, "learning_rate": 6.984307870109211e-06, "loss": 0.388, "step": 9370 }, { "epoch": 0.3890419773844182, "grad_norm": 2.6022446155548096, "learning_rate": 6.98369074887966e-06, "loss": 0.625, "step": 9371 }, { "epoch": 0.38908349290862954, "grad_norm": 2.2417783737182617, "learning_rate": 6.98307359178441e-06, "loss": 0.4318, "step": 9372 }, { "epoch": 0.38912500843284087, "grad_norm": 2.3467602729797363, "learning_rate": 6.982456398834616e-06, "loss": 0.5623, "step": 9373 }, { "epoch": 0.3891665239570522, "grad_norm": 2.4610207080841064, "learning_rate": 6.981839170041441e-06, "loss": 0.577, "step": 9374 }, { "epoch": 0.38920803948126353, "grad_norm": 2.1681525707244873, "learning_rate": 6.981221905416041e-06, "loss": 0.4671, "step": 9375 }, { "epoch": 0.38924955500547487, "grad_norm": 3.735163688659668, "learning_rate": 6.98060460496958e-06, "loss": 0.7, "step": 9376 }, { "epoch": 0.3892910705296862, "grad_norm": 2.016099214553833, "learning_rate": 6.979987268713215e-06, "loss": 0.4451, "step": 9377 }, { "epoch": 0.38933258605389753, "grad_norm": 2.475573778152466, "learning_rate": 6.97936989665811e-06, "loss": 0.4666, "step": 9378 }, { "epoch": 0.38937410157810887, "grad_norm": 2.1744256019592285, "learning_rate": 6.978752488815426e-06, "loss": 0.5391, "step": 9379 }, { "epoch": 0.3894156171023202, "grad_norm": 2.492039203643799, "learning_rate": 6.978135045196328e-06, "loss": 0.4339, "step": 9380 }, { "epoch": 0.38945713262653153, "grad_norm": 2.4181911945343018, "learning_rate": 6.977517565811977e-06, "loss": 0.5505, "step": 9381 }, { "epoch": 0.38949864815074287, "grad_norm": 2.136439085006714, "learning_rate": 6.97690005067354e-06, "loss": 0.5782, "step": 9382 }, { "epoch": 0.3895401636749542, "grad_norm": 2.3820433616638184, "learning_rate": 6.97628249979218e-06, "loss": 0.3956, "step": 9383 }, { "epoch": 0.38958167919916553, "grad_norm": 2.888449192047119, "learning_rate": 6.975664913179063e-06, "loss": 0.4532, "step": 9384 }, { "epoch": 0.38962319472337686, "grad_norm": 2.342947244644165, "learning_rate": 6.975047290845354e-06, "loss": 0.5623, "step": 9385 }, { "epoch": 0.3896647102475882, "grad_norm": 3.1248908042907715, "learning_rate": 6.97442963280222e-06, "loss": 0.4924, "step": 9386 }, { "epoch": 0.38970622577179953, "grad_norm": 2.480799436569214, "learning_rate": 6.973811939060831e-06, "loss": 0.4232, "step": 9387 }, { "epoch": 0.38974774129601086, "grad_norm": 2.7628190517425537, "learning_rate": 6.97319420963235e-06, "loss": 0.6048, "step": 9388 }, { "epoch": 0.3897892568202222, "grad_norm": 2.592839002609253, "learning_rate": 6.972576444527952e-06, "loss": 0.5118, "step": 9389 }, { "epoch": 0.38983077234443353, "grad_norm": 2.144073247909546, "learning_rate": 6.971958643758803e-06, "loss": 0.4491, "step": 9390 }, { "epoch": 0.38987228786864486, "grad_norm": 2.6509265899658203, "learning_rate": 6.971340807336072e-06, "loss": 0.5027, "step": 9391 }, { "epoch": 0.3899138033928562, "grad_norm": 2.5356571674346924, "learning_rate": 6.970722935270932e-06, "loss": 0.5385, "step": 9392 }, { "epoch": 0.38995531891706753, "grad_norm": 2.6954405307769775, "learning_rate": 6.970105027574554e-06, "loss": 0.5661, "step": 9393 }, { "epoch": 0.38999683444127886, "grad_norm": 2.252046823501587, "learning_rate": 6.969487084258109e-06, "loss": 0.57, "step": 9394 }, { "epoch": 0.3900383499654902, "grad_norm": 2.3333537578582764, "learning_rate": 6.9688691053327674e-06, "loss": 0.4296, "step": 9395 }, { "epoch": 0.3900798654897016, "grad_norm": 2.4269399642944336, "learning_rate": 6.968251090809708e-06, "loss": 0.4729, "step": 9396 }, { "epoch": 0.3901213810139129, "grad_norm": 3.074835777282715, "learning_rate": 6.9676330407001e-06, "loss": 0.5395, "step": 9397 }, { "epoch": 0.39016289653812425, "grad_norm": 2.364896297454834, "learning_rate": 6.96701495501512e-06, "loss": 0.5595, "step": 9398 }, { "epoch": 0.3902044120623356, "grad_norm": 1.8485745191574097, "learning_rate": 6.966396833765941e-06, "loss": 0.404, "step": 9399 }, { "epoch": 0.3902459275865469, "grad_norm": 2.746462345123291, "learning_rate": 6.9657786769637435e-06, "loss": 0.576, "step": 9400 }, { "epoch": 0.39028744311075825, "grad_norm": 2.407181739807129, "learning_rate": 6.965160484619699e-06, "loss": 0.4413, "step": 9401 }, { "epoch": 0.3903289586349696, "grad_norm": 2.6402242183685303, "learning_rate": 6.964542256744986e-06, "loss": 0.4961, "step": 9402 }, { "epoch": 0.3903704741591809, "grad_norm": 2.237600564956665, "learning_rate": 6.963923993350784e-06, "loss": 0.5382, "step": 9403 }, { "epoch": 0.39041198968339225, "grad_norm": 2.1503915786743164, "learning_rate": 6.963305694448271e-06, "loss": 0.5808, "step": 9404 }, { "epoch": 0.3904535052076036, "grad_norm": 2.1821699142456055, "learning_rate": 6.962687360048622e-06, "loss": 0.5053, "step": 9405 }, { "epoch": 0.3904950207318149, "grad_norm": 2.6070523262023926, "learning_rate": 6.962068990163023e-06, "loss": 0.7075, "step": 9406 }, { "epoch": 0.39053653625602625, "grad_norm": 2.3231189250946045, "learning_rate": 6.961450584802649e-06, "loss": 0.4078, "step": 9407 }, { "epoch": 0.3905780517802376, "grad_norm": 2.843703508377075, "learning_rate": 6.9608321439786845e-06, "loss": 0.5105, "step": 9408 }, { "epoch": 0.3906195673044489, "grad_norm": 3.2206623554229736, "learning_rate": 6.960213667702309e-06, "loss": 0.5216, "step": 9409 }, { "epoch": 0.39066108282866024, "grad_norm": 2.4688267707824707, "learning_rate": 6.959595155984706e-06, "loss": 0.4708, "step": 9410 }, { "epoch": 0.3907025983528716, "grad_norm": 2.946200132369995, "learning_rate": 6.958976608837057e-06, "loss": 0.4999, "step": 9411 }, { "epoch": 0.3907441138770829, "grad_norm": 2.5788660049438477, "learning_rate": 6.958358026270547e-06, "loss": 0.4294, "step": 9412 }, { "epoch": 0.39078562940129424, "grad_norm": 2.2017085552215576, "learning_rate": 6.9577394082963575e-06, "loss": 0.5335, "step": 9413 }, { "epoch": 0.3908271449255056, "grad_norm": 3.4656903743743896, "learning_rate": 6.957120754925677e-06, "loss": 0.5121, "step": 9414 }, { "epoch": 0.3908686604497169, "grad_norm": 3.106818675994873, "learning_rate": 6.956502066169689e-06, "loss": 0.5524, "step": 9415 }, { "epoch": 0.39091017597392824, "grad_norm": 2.537693977355957, "learning_rate": 6.955883342039579e-06, "loss": 0.5702, "step": 9416 }, { "epoch": 0.3909516914981396, "grad_norm": 2.466862201690674, "learning_rate": 6.955264582546536e-06, "loss": 0.4537, "step": 9417 }, { "epoch": 0.3909932070223509, "grad_norm": 2.363678216934204, "learning_rate": 6.9546457877017436e-06, "loss": 0.5472, "step": 9418 }, { "epoch": 0.39103472254656224, "grad_norm": 2.4241995811462402, "learning_rate": 6.954026957516393e-06, "loss": 0.5504, "step": 9419 }, { "epoch": 0.3910762380707736, "grad_norm": 2.562849998474121, "learning_rate": 6.953408092001673e-06, "loss": 0.6523, "step": 9420 }, { "epoch": 0.3911177535949849, "grad_norm": 2.2994577884674072, "learning_rate": 6.95278919116877e-06, "loss": 0.5107, "step": 9421 }, { "epoch": 0.39115926911919624, "grad_norm": 2.250910520553589, "learning_rate": 6.9521702550288765e-06, "loss": 0.5455, "step": 9422 }, { "epoch": 0.3912007846434076, "grad_norm": 2.2663426399230957, "learning_rate": 6.951551283593183e-06, "loss": 0.484, "step": 9423 }, { "epoch": 0.3912423001676189, "grad_norm": 2.500901937484741, "learning_rate": 6.950932276872877e-06, "loss": 0.505, "step": 9424 }, { "epoch": 0.39128381569183024, "grad_norm": 2.609745502471924, "learning_rate": 6.950313234879155e-06, "loss": 0.5371, "step": 9425 }, { "epoch": 0.39132533121604157, "grad_norm": 2.1429152488708496, "learning_rate": 6.949694157623207e-06, "loss": 0.4827, "step": 9426 }, { "epoch": 0.39136684674025296, "grad_norm": 2.3731689453125, "learning_rate": 6.9490750451162266e-06, "loss": 0.5208, "step": 9427 }, { "epoch": 0.3914083622644643, "grad_norm": 2.059258222579956, "learning_rate": 6.948455897369408e-06, "loss": 0.4528, "step": 9428 }, { "epoch": 0.3914498777886756, "grad_norm": 1.9303823709487915, "learning_rate": 6.9478367143939455e-06, "loss": 0.425, "step": 9429 }, { "epoch": 0.39149139331288696, "grad_norm": 1.963945746421814, "learning_rate": 6.947217496201033e-06, "loss": 0.5512, "step": 9430 }, { "epoch": 0.3915329088370983, "grad_norm": 2.4364230632781982, "learning_rate": 6.946598242801867e-06, "loss": 0.6014, "step": 9431 }, { "epoch": 0.3915744243613096, "grad_norm": 2.172180414199829, "learning_rate": 6.945978954207644e-06, "loss": 0.5983, "step": 9432 }, { "epoch": 0.39161593988552096, "grad_norm": 2.771557331085205, "learning_rate": 6.945359630429561e-06, "loss": 0.5218, "step": 9433 }, { "epoch": 0.3916574554097323, "grad_norm": 2.1945931911468506, "learning_rate": 6.944740271478816e-06, "loss": 0.5291, "step": 9434 }, { "epoch": 0.3916989709339436, "grad_norm": 2.772444248199463, "learning_rate": 6.944120877366605e-06, "loss": 0.5103, "step": 9435 }, { "epoch": 0.39174048645815496, "grad_norm": 2.244288206100464, "learning_rate": 6.943501448104128e-06, "loss": 0.5338, "step": 9436 }, { "epoch": 0.3917820019823663, "grad_norm": 2.35762619972229, "learning_rate": 6.942881983702584e-06, "loss": 0.6789, "step": 9437 }, { "epoch": 0.3918235175065776, "grad_norm": 2.4618821144104004, "learning_rate": 6.9422624841731745e-06, "loss": 0.3929, "step": 9438 }, { "epoch": 0.39186503303078896, "grad_norm": 2.7580223083496094, "learning_rate": 6.9416429495271e-06, "loss": 0.4808, "step": 9439 }, { "epoch": 0.3919065485550003, "grad_norm": 2.2445640563964844, "learning_rate": 6.94102337977556e-06, "loss": 0.3775, "step": 9440 }, { "epoch": 0.3919480640792116, "grad_norm": 2.671048164367676, "learning_rate": 6.94040377492976e-06, "loss": 0.5393, "step": 9441 }, { "epoch": 0.39198957960342296, "grad_norm": 1.963026523590088, "learning_rate": 6.9397841350008985e-06, "loss": 0.4704, "step": 9442 }, { "epoch": 0.3920310951276343, "grad_norm": 2.339315176010132, "learning_rate": 6.939164460000182e-06, "loss": 0.4787, "step": 9443 }, { "epoch": 0.3920726106518456, "grad_norm": 2.2548558712005615, "learning_rate": 6.938544749938812e-06, "loss": 0.4686, "step": 9444 }, { "epoch": 0.39211412617605695, "grad_norm": 3.0382349491119385, "learning_rate": 6.937925004827996e-06, "loss": 0.5417, "step": 9445 }, { "epoch": 0.3921556417002683, "grad_norm": 2.2196993827819824, "learning_rate": 6.937305224678937e-06, "loss": 0.5037, "step": 9446 }, { "epoch": 0.3921971572244796, "grad_norm": 2.162623882293701, "learning_rate": 6.936685409502839e-06, "loss": 0.547, "step": 9447 }, { "epoch": 0.39223867274869095, "grad_norm": 4.4950175285339355, "learning_rate": 6.9360655593109116e-06, "loss": 0.6091, "step": 9448 }, { "epoch": 0.3922801882729023, "grad_norm": 2.013134002685547, "learning_rate": 6.935445674114363e-06, "loss": 0.3958, "step": 9449 }, { "epoch": 0.3923217037971136, "grad_norm": 2.261902332305908, "learning_rate": 6.934825753924396e-06, "loss": 0.453, "step": 9450 }, { "epoch": 0.39236321932132495, "grad_norm": 2.298689365386963, "learning_rate": 6.9342057987522225e-06, "loss": 0.4772, "step": 9451 }, { "epoch": 0.3924047348455363, "grad_norm": 2.706204652786255, "learning_rate": 6.93358580860905e-06, "loss": 0.4748, "step": 9452 }, { "epoch": 0.3924462503697476, "grad_norm": 2.467116594314575, "learning_rate": 6.932965783506089e-06, "loss": 0.4765, "step": 9453 }, { "epoch": 0.39248776589395895, "grad_norm": 2.4288370609283447, "learning_rate": 6.93234572345455e-06, "loss": 0.4564, "step": 9454 }, { "epoch": 0.3925292814181703, "grad_norm": 2.51332688331604, "learning_rate": 6.931725628465643e-06, "loss": 0.5444, "step": 9455 }, { "epoch": 0.3925707969423816, "grad_norm": 2.75368332862854, "learning_rate": 6.931105498550581e-06, "loss": 0.6854, "step": 9456 }, { "epoch": 0.39261231246659295, "grad_norm": 2.7261531352996826, "learning_rate": 6.9304853337205735e-06, "loss": 0.6119, "step": 9457 }, { "epoch": 0.39265382799080434, "grad_norm": 2.4471752643585205, "learning_rate": 6.9298651339868366e-06, "loss": 0.5015, "step": 9458 }, { "epoch": 0.39269534351501567, "grad_norm": 2.7315971851348877, "learning_rate": 6.929244899360581e-06, "loss": 0.6581, "step": 9459 }, { "epoch": 0.392736859039227, "grad_norm": 2.443295955657959, "learning_rate": 6.928624629853021e-06, "loss": 0.5706, "step": 9460 }, { "epoch": 0.39277837456343834, "grad_norm": 2.241917371749878, "learning_rate": 6.928004325475372e-06, "loss": 0.4713, "step": 9461 }, { "epoch": 0.39281989008764967, "grad_norm": 2.374528169631958, "learning_rate": 6.927383986238849e-06, "loss": 0.5236, "step": 9462 }, { "epoch": 0.392861405611861, "grad_norm": 2.0647263526916504, "learning_rate": 6.926763612154667e-06, "loss": 0.4233, "step": 9463 }, { "epoch": 0.39290292113607234, "grad_norm": 2.411924123764038, "learning_rate": 6.926143203234046e-06, "loss": 0.5705, "step": 9464 }, { "epoch": 0.39294443666028367, "grad_norm": 2.4005584716796875, "learning_rate": 6.925522759488199e-06, "loss": 0.5256, "step": 9465 }, { "epoch": 0.392985952184495, "grad_norm": 2.255049228668213, "learning_rate": 6.924902280928345e-06, "loss": 0.5549, "step": 9466 }, { "epoch": 0.39302746770870634, "grad_norm": 1.8483985662460327, "learning_rate": 6.924281767565704e-06, "loss": 0.3679, "step": 9467 }, { "epoch": 0.39306898323291767, "grad_norm": 2.5641322135925293, "learning_rate": 6.923661219411494e-06, "loss": 0.6623, "step": 9468 }, { "epoch": 0.393110498757129, "grad_norm": 2.4475419521331787, "learning_rate": 6.923040636476935e-06, "loss": 0.4883, "step": 9469 }, { "epoch": 0.39315201428134033, "grad_norm": 2.5992438793182373, "learning_rate": 6.922420018773246e-06, "loss": 0.6044, "step": 9470 }, { "epoch": 0.39319352980555167, "grad_norm": 3.055706262588501, "learning_rate": 6.92179936631165e-06, "loss": 0.4444, "step": 9471 }, { "epoch": 0.393235045329763, "grad_norm": 2.6807355880737305, "learning_rate": 6.921178679103366e-06, "loss": 0.7119, "step": 9472 }, { "epoch": 0.39327656085397433, "grad_norm": 2.2432525157928467, "learning_rate": 6.92055795715962e-06, "loss": 0.5076, "step": 9473 }, { "epoch": 0.39331807637818567, "grad_norm": 2.6285080909729004, "learning_rate": 6.919937200491631e-06, "loss": 0.5355, "step": 9474 }, { "epoch": 0.393359591902397, "grad_norm": 2.2782275676727295, "learning_rate": 6.919316409110625e-06, "loss": 0.5779, "step": 9475 }, { "epoch": 0.39340110742660833, "grad_norm": 2.2511749267578125, "learning_rate": 6.9186955830278234e-06, "loss": 0.5161, "step": 9476 }, { "epoch": 0.39344262295081966, "grad_norm": 2.138472080230713, "learning_rate": 6.918074722254454e-06, "loss": 0.4479, "step": 9477 }, { "epoch": 0.393484138475031, "grad_norm": 2.727379083633423, "learning_rate": 6.917453826801741e-06, "loss": 0.5045, "step": 9478 }, { "epoch": 0.39352565399924233, "grad_norm": 2.346944808959961, "learning_rate": 6.916832896680908e-06, "loss": 0.5816, "step": 9479 }, { "epoch": 0.39356716952345366, "grad_norm": 2.2628774642944336, "learning_rate": 6.916211931903186e-06, "loss": 0.5015, "step": 9480 }, { "epoch": 0.393608685047665, "grad_norm": 2.3965985774993896, "learning_rate": 6.9155909324797985e-06, "loss": 0.3855, "step": 9481 }, { "epoch": 0.39365020057187633, "grad_norm": 2.483288049697876, "learning_rate": 6.914969898421977e-06, "loss": 0.5229, "step": 9482 }, { "epoch": 0.39369171609608766, "grad_norm": 2.691504716873169, "learning_rate": 6.9143488297409455e-06, "loss": 0.5231, "step": 9483 }, { "epoch": 0.393733231620299, "grad_norm": 2.2030179500579834, "learning_rate": 6.913727726447936e-06, "loss": 0.5361, "step": 9484 }, { "epoch": 0.39377474714451033, "grad_norm": 2.268033266067505, "learning_rate": 6.9131065885541795e-06, "loss": 0.4515, "step": 9485 }, { "epoch": 0.39381626266872166, "grad_norm": 2.544426679611206, "learning_rate": 6.912485416070903e-06, "loss": 0.5627, "step": 9486 }, { "epoch": 0.393857778192933, "grad_norm": 2.801407814025879, "learning_rate": 6.911864209009338e-06, "loss": 0.4059, "step": 9487 }, { "epoch": 0.3938992937171443, "grad_norm": 2.8497071266174316, "learning_rate": 6.911242967380718e-06, "loss": 0.4896, "step": 9488 }, { "epoch": 0.3939408092413557, "grad_norm": 2.537832498550415, "learning_rate": 6.910621691196274e-06, "loss": 0.5084, "step": 9489 }, { "epoch": 0.39398232476556705, "grad_norm": 2.075809955596924, "learning_rate": 6.91000038046724e-06, "loss": 0.4476, "step": 9490 }, { "epoch": 0.3940238402897784, "grad_norm": 2.2910726070404053, "learning_rate": 6.909379035204847e-06, "loss": 0.4057, "step": 9491 }, { "epoch": 0.3940653558139897, "grad_norm": 2.6276235580444336, "learning_rate": 6.908757655420331e-06, "loss": 0.6779, "step": 9492 }, { "epoch": 0.39410687133820105, "grad_norm": 2.1308770179748535, "learning_rate": 6.908136241124928e-06, "loss": 0.4453, "step": 9493 }, { "epoch": 0.3941483868624124, "grad_norm": 2.5688469409942627, "learning_rate": 6.90751479232987e-06, "loss": 0.4057, "step": 9494 }, { "epoch": 0.3941899023866237, "grad_norm": 2.4445464611053467, "learning_rate": 6.9068933090463955e-06, "loss": 0.4372, "step": 9495 }, { "epoch": 0.39423141791083505, "grad_norm": 3.3276426792144775, "learning_rate": 6.90627179128574e-06, "loss": 0.4998, "step": 9496 }, { "epoch": 0.3942729334350464, "grad_norm": 3.0741801261901855, "learning_rate": 6.905650239059142e-06, "loss": 0.591, "step": 9497 }, { "epoch": 0.3943144489592577, "grad_norm": 2.756662607192993, "learning_rate": 6.905028652377838e-06, "loss": 0.4222, "step": 9498 }, { "epoch": 0.39435596448346905, "grad_norm": 2.7567269802093506, "learning_rate": 6.9044070312530665e-06, "loss": 0.4943, "step": 9499 }, { "epoch": 0.3943974800076804, "grad_norm": 2.309267997741699, "learning_rate": 6.903785375696067e-06, "loss": 0.5631, "step": 9500 }, { "epoch": 0.3944389955318917, "grad_norm": 2.1415016651153564, "learning_rate": 6.9031636857180795e-06, "loss": 0.4814, "step": 9501 }, { "epoch": 0.39448051105610304, "grad_norm": 2.607351064682007, "learning_rate": 6.902541961330343e-06, "loss": 0.455, "step": 9502 }, { "epoch": 0.3945220265803144, "grad_norm": 2.5162417888641357, "learning_rate": 6.901920202544101e-06, "loss": 0.634, "step": 9503 }, { "epoch": 0.3945635421045257, "grad_norm": 2.648191452026367, "learning_rate": 6.901298409370593e-06, "loss": 0.5947, "step": 9504 }, { "epoch": 0.39460505762873704, "grad_norm": 2.07051944732666, "learning_rate": 6.900676581821061e-06, "loss": 0.4632, "step": 9505 }, { "epoch": 0.3946465731529484, "grad_norm": 2.2403182983398438, "learning_rate": 6.9000547199067504e-06, "loss": 0.3117, "step": 9506 }, { "epoch": 0.3946880886771597, "grad_norm": 2.3970937728881836, "learning_rate": 6.8994328236389006e-06, "loss": 0.5487, "step": 9507 }, { "epoch": 0.39472960420137104, "grad_norm": 2.6224727630615234, "learning_rate": 6.898810893028759e-06, "loss": 0.5756, "step": 9508 }, { "epoch": 0.3947711197255824, "grad_norm": 2.929807186126709, "learning_rate": 6.898188928087568e-06, "loss": 0.5459, "step": 9509 }, { "epoch": 0.3948126352497937, "grad_norm": 3.120710611343384, "learning_rate": 6.8975669288265755e-06, "loss": 0.5487, "step": 9510 }, { "epoch": 0.39485415077400504, "grad_norm": 2.3602793216705322, "learning_rate": 6.896944895257026e-06, "loss": 0.5974, "step": 9511 }, { "epoch": 0.3948956662982164, "grad_norm": 3.0935373306274414, "learning_rate": 6.8963228273901665e-06, "loss": 0.6191, "step": 9512 }, { "epoch": 0.3949371818224277, "grad_norm": 2.6366889476776123, "learning_rate": 6.895700725237242e-06, "loss": 0.5539, "step": 9513 }, { "epoch": 0.39497869734663904, "grad_norm": 2.3372812271118164, "learning_rate": 6.895078588809503e-06, "loss": 0.432, "step": 9514 }, { "epoch": 0.3950202128708504, "grad_norm": 2.0086333751678467, "learning_rate": 6.894456418118196e-06, "loss": 0.4901, "step": 9515 }, { "epoch": 0.3950617283950617, "grad_norm": 2.6705479621887207, "learning_rate": 6.893834213174573e-06, "loss": 0.5505, "step": 9516 }, { "epoch": 0.39510324391927304, "grad_norm": 2.0874133110046387, "learning_rate": 6.89321197398988e-06, "loss": 0.3568, "step": 9517 }, { "epoch": 0.3951447594434844, "grad_norm": 2.1417930126190186, "learning_rate": 6.892589700575368e-06, "loss": 0.5045, "step": 9518 }, { "epoch": 0.3951862749676957, "grad_norm": 2.3948581218719482, "learning_rate": 6.891967392942289e-06, "loss": 0.5651, "step": 9519 }, { "epoch": 0.3952277904919071, "grad_norm": 2.5487825870513916, "learning_rate": 6.891345051101895e-06, "loss": 0.4321, "step": 9520 }, { "epoch": 0.3952693060161184, "grad_norm": 2.3508307933807373, "learning_rate": 6.890722675065438e-06, "loss": 0.4722, "step": 9521 }, { "epoch": 0.39531082154032976, "grad_norm": 2.983579158782959, "learning_rate": 6.890100264844169e-06, "loss": 0.5637, "step": 9522 }, { "epoch": 0.3953523370645411, "grad_norm": 2.5859975814819336, "learning_rate": 6.889477820449343e-06, "loss": 0.4634, "step": 9523 }, { "epoch": 0.3953938525887524, "grad_norm": 2.535219430923462, "learning_rate": 6.888855341892214e-06, "loss": 0.5451, "step": 9524 }, { "epoch": 0.39543536811296376, "grad_norm": 2.7466747760772705, "learning_rate": 6.888232829184035e-06, "loss": 0.5381, "step": 9525 }, { "epoch": 0.3954768836371751, "grad_norm": 2.324240207672119, "learning_rate": 6.887610282336061e-06, "loss": 0.5343, "step": 9526 }, { "epoch": 0.3955183991613864, "grad_norm": 2.363391637802124, "learning_rate": 6.886987701359552e-06, "loss": 0.5324, "step": 9527 }, { "epoch": 0.39555991468559776, "grad_norm": 2.908578395843506, "learning_rate": 6.8863650862657596e-06, "loss": 0.5677, "step": 9528 }, { "epoch": 0.3956014302098091, "grad_norm": 2.389268159866333, "learning_rate": 6.885742437065943e-06, "loss": 0.4128, "step": 9529 }, { "epoch": 0.3956429457340204, "grad_norm": 2.6224029064178467, "learning_rate": 6.8851197537713585e-06, "loss": 0.5022, "step": 9530 }, { "epoch": 0.39568446125823176, "grad_norm": 1.9749212265014648, "learning_rate": 6.884497036393267e-06, "loss": 0.4019, "step": 9531 }, { "epoch": 0.3957259767824431, "grad_norm": 2.211949348449707, "learning_rate": 6.883874284942925e-06, "loss": 0.486, "step": 9532 }, { "epoch": 0.3957674923066544, "grad_norm": 2.0829877853393555, "learning_rate": 6.883251499431594e-06, "loss": 0.2993, "step": 9533 }, { "epoch": 0.39580900783086576, "grad_norm": 2.4362123012542725, "learning_rate": 6.8826286798705325e-06, "loss": 0.5609, "step": 9534 }, { "epoch": 0.3958505233550771, "grad_norm": 2.3691866397857666, "learning_rate": 6.882005826271001e-06, "loss": 0.4106, "step": 9535 }, { "epoch": 0.3958920388792884, "grad_norm": 2.505054473876953, "learning_rate": 6.881382938644263e-06, "loss": 0.4487, "step": 9536 }, { "epoch": 0.39593355440349975, "grad_norm": 2.3392269611358643, "learning_rate": 6.88076001700158e-06, "loss": 0.5183, "step": 9537 }, { "epoch": 0.3959750699277111, "grad_norm": 2.454525947570801, "learning_rate": 6.880137061354213e-06, "loss": 0.5055, "step": 9538 }, { "epoch": 0.3960165854519224, "grad_norm": 2.498783588409424, "learning_rate": 6.879514071713424e-06, "loss": 0.6341, "step": 9539 }, { "epoch": 0.39605810097613375, "grad_norm": 2.611647367477417, "learning_rate": 6.878891048090481e-06, "loss": 0.5797, "step": 9540 }, { "epoch": 0.3960996165003451, "grad_norm": 2.6829257011413574, "learning_rate": 6.878267990496645e-06, "loss": 0.5585, "step": 9541 }, { "epoch": 0.3961411320245564, "grad_norm": 2.8342933654785156, "learning_rate": 6.877644898943183e-06, "loss": 0.6182, "step": 9542 }, { "epoch": 0.39618264754876775, "grad_norm": 2.3843178749084473, "learning_rate": 6.8770217734413606e-06, "loss": 0.449, "step": 9543 }, { "epoch": 0.3962241630729791, "grad_norm": 2.3380913734436035, "learning_rate": 6.876398614002443e-06, "loss": 0.4908, "step": 9544 }, { "epoch": 0.3962656785971904, "grad_norm": 2.2723443508148193, "learning_rate": 6.875775420637697e-06, "loss": 0.55, "step": 9545 }, { "epoch": 0.39630719412140175, "grad_norm": 2.523648500442505, "learning_rate": 6.875152193358391e-06, "loss": 0.5341, "step": 9546 }, { "epoch": 0.3963487096456131, "grad_norm": 2.9344723224639893, "learning_rate": 6.8745289321757936e-06, "loss": 0.6106, "step": 9547 }, { "epoch": 0.3963902251698244, "grad_norm": 2.079477548599243, "learning_rate": 6.8739056371011725e-06, "loss": 0.4711, "step": 9548 }, { "epoch": 0.39643174069403575, "grad_norm": 3.704927444458008, "learning_rate": 6.873282308145797e-06, "loss": 0.7415, "step": 9549 }, { "epoch": 0.3964732562182471, "grad_norm": 2.2612152099609375, "learning_rate": 6.872658945320938e-06, "loss": 0.3354, "step": 9550 }, { "epoch": 0.39651477174245847, "grad_norm": 2.3824803829193115, "learning_rate": 6.872035548637867e-06, "loss": 0.5186, "step": 9551 }, { "epoch": 0.3965562872666698, "grad_norm": 2.3358237743377686, "learning_rate": 6.871412118107851e-06, "loss": 0.5134, "step": 9552 }, { "epoch": 0.39659780279088114, "grad_norm": 2.425034761428833, "learning_rate": 6.870788653742165e-06, "loss": 0.5519, "step": 9553 }, { "epoch": 0.39663931831509247, "grad_norm": 2.6584696769714355, "learning_rate": 6.870165155552082e-06, "loss": 0.5548, "step": 9554 }, { "epoch": 0.3966808338393038, "grad_norm": 2.7701711654663086, "learning_rate": 6.869541623548873e-06, "loss": 0.5186, "step": 9555 }, { "epoch": 0.39672234936351514, "grad_norm": 2.876344680786133, "learning_rate": 6.868918057743812e-06, "loss": 0.6211, "step": 9556 }, { "epoch": 0.39676386488772647, "grad_norm": 2.4424374103546143, "learning_rate": 6.868294458148176e-06, "loss": 0.5296, "step": 9557 }, { "epoch": 0.3968053804119378, "grad_norm": 2.1439902782440186, "learning_rate": 6.867670824773236e-06, "loss": 0.6036, "step": 9558 }, { "epoch": 0.39684689593614914, "grad_norm": 2.3856606483459473, "learning_rate": 6.867047157630269e-06, "loss": 0.5869, "step": 9559 }, { "epoch": 0.39688841146036047, "grad_norm": 2.182368516921997, "learning_rate": 6.866423456730552e-06, "loss": 0.4917, "step": 9560 }, { "epoch": 0.3969299269845718, "grad_norm": 2.453327178955078, "learning_rate": 6.8657997220853615e-06, "loss": 0.5846, "step": 9561 }, { "epoch": 0.39697144250878313, "grad_norm": 2.1865034103393555, "learning_rate": 6.865175953705973e-06, "loss": 0.5735, "step": 9562 }, { "epoch": 0.39701295803299447, "grad_norm": 2.3433337211608887, "learning_rate": 6.864552151603666e-06, "loss": 0.4867, "step": 9563 }, { "epoch": 0.3970544735572058, "grad_norm": 2.858173131942749, "learning_rate": 6.863928315789719e-06, "loss": 0.5511, "step": 9564 }, { "epoch": 0.39709598908141713, "grad_norm": 2.4684507846832275, "learning_rate": 6.863304446275411e-06, "loss": 0.4727, "step": 9565 }, { "epoch": 0.39713750460562847, "grad_norm": 2.6060502529144287, "learning_rate": 6.862680543072022e-06, "loss": 0.5965, "step": 9566 }, { "epoch": 0.3971790201298398, "grad_norm": 2.453385829925537, "learning_rate": 6.86205660619083e-06, "loss": 0.5344, "step": 9567 }, { "epoch": 0.39722053565405113, "grad_norm": 2.108764886856079, "learning_rate": 6.861432635643119e-06, "loss": 0.5376, "step": 9568 }, { "epoch": 0.39726205117826247, "grad_norm": 2.239841938018799, "learning_rate": 6.86080863144017e-06, "loss": 0.4377, "step": 9569 }, { "epoch": 0.3973035667024738, "grad_norm": 2.5871620178222656, "learning_rate": 6.860184593593264e-06, "loss": 0.4711, "step": 9570 }, { "epoch": 0.39734508222668513, "grad_norm": 2.4768714904785156, "learning_rate": 6.859560522113684e-06, "loss": 0.5678, "step": 9571 }, { "epoch": 0.39738659775089646, "grad_norm": 2.5643115043640137, "learning_rate": 6.858936417012714e-06, "loss": 0.5295, "step": 9572 }, { "epoch": 0.3974281132751078, "grad_norm": 2.955547571182251, "learning_rate": 6.858312278301638e-06, "loss": 0.4588, "step": 9573 }, { "epoch": 0.39746962879931913, "grad_norm": 2.6620922088623047, "learning_rate": 6.857688105991741e-06, "loss": 0.6309, "step": 9574 }, { "epoch": 0.39751114432353046, "grad_norm": 2.882366895675659, "learning_rate": 6.857063900094306e-06, "loss": 0.7052, "step": 9575 }, { "epoch": 0.3975526598477418, "grad_norm": 2.2626843452453613, "learning_rate": 6.8564396606206215e-06, "loss": 0.4972, "step": 9576 }, { "epoch": 0.39759417537195313, "grad_norm": 2.4643261432647705, "learning_rate": 6.855815387581974e-06, "loss": 0.4177, "step": 9577 }, { "epoch": 0.39763569089616446, "grad_norm": 2.2179689407348633, "learning_rate": 6.855191080989647e-06, "loss": 0.4299, "step": 9578 }, { "epoch": 0.3976772064203758, "grad_norm": 2.8293616771698, "learning_rate": 6.854566740854932e-06, "loss": 0.5368, "step": 9579 }, { "epoch": 0.39771872194458713, "grad_norm": 2.525128126144409, "learning_rate": 6.853942367189115e-06, "loss": 0.4358, "step": 9580 }, { "epoch": 0.3977602374687985, "grad_norm": 2.4099533557891846, "learning_rate": 6.853317960003487e-06, "loss": 0.381, "step": 9581 }, { "epoch": 0.39780175299300985, "grad_norm": 3.1864750385284424, "learning_rate": 6.852693519309335e-06, "loss": 0.4981, "step": 9582 }, { "epoch": 0.3978432685172212, "grad_norm": 2.4446604251861572, "learning_rate": 6.8520690451179505e-06, "loss": 0.6366, "step": 9583 }, { "epoch": 0.3978847840414325, "grad_norm": 2.485454559326172, "learning_rate": 6.851444537440623e-06, "loss": 0.4859, "step": 9584 }, { "epoch": 0.39792629956564385, "grad_norm": 2.0775184631347656, "learning_rate": 6.850819996288645e-06, "loss": 0.5237, "step": 9585 }, { "epoch": 0.3979678150898552, "grad_norm": 2.9652485847473145, "learning_rate": 6.850195421673308e-06, "loss": 0.5031, "step": 9586 }, { "epoch": 0.3980093306140665, "grad_norm": 2.4396347999572754, "learning_rate": 6.849570813605906e-06, "loss": 0.4816, "step": 9587 }, { "epoch": 0.39805084613827785, "grad_norm": 2.297891139984131, "learning_rate": 6.84894617209773e-06, "loss": 0.6468, "step": 9588 }, { "epoch": 0.3980923616624892, "grad_norm": 2.5160341262817383, "learning_rate": 6.848321497160073e-06, "loss": 0.5095, "step": 9589 }, { "epoch": 0.3981338771867005, "grad_norm": 2.932217836380005, "learning_rate": 6.847696788804231e-06, "loss": 0.6485, "step": 9590 }, { "epoch": 0.39817539271091185, "grad_norm": 2.3254079818725586, "learning_rate": 6.847072047041499e-06, "loss": 0.44, "step": 9591 }, { "epoch": 0.3982169082351232, "grad_norm": 2.2660751342773438, "learning_rate": 6.846447271883172e-06, "loss": 0.5111, "step": 9592 }, { "epoch": 0.3982584237593345, "grad_norm": 2.5064525604248047, "learning_rate": 6.845822463340546e-06, "loss": 0.6563, "step": 9593 }, { "epoch": 0.39829993928354585, "grad_norm": 2.557393789291382, "learning_rate": 6.845197621424917e-06, "loss": 0.5283, "step": 9594 }, { "epoch": 0.3983414548077572, "grad_norm": 2.7189643383026123, "learning_rate": 6.844572746147584e-06, "loss": 0.5005, "step": 9595 }, { "epoch": 0.3983829703319685, "grad_norm": 2.388504981994629, "learning_rate": 6.843947837519844e-06, "loss": 0.4717, "step": 9596 }, { "epoch": 0.39842448585617984, "grad_norm": 2.466902732849121, "learning_rate": 6.843322895552995e-06, "loss": 0.5458, "step": 9597 }, { "epoch": 0.3984660013803912, "grad_norm": 2.89453125, "learning_rate": 6.842697920258338e-06, "loss": 0.6267, "step": 9598 }, { "epoch": 0.3985075169046025, "grad_norm": 2.45326828956604, "learning_rate": 6.842072911647169e-06, "loss": 0.5149, "step": 9599 }, { "epoch": 0.39854903242881384, "grad_norm": 2.5845298767089844, "learning_rate": 6.841447869730794e-06, "loss": 0.69, "step": 9600 }, { "epoch": 0.3985905479530252, "grad_norm": 2.0656676292419434, "learning_rate": 6.8408227945205095e-06, "loss": 0.3769, "step": 9601 }, { "epoch": 0.3986320634772365, "grad_norm": 2.533600091934204, "learning_rate": 6.840197686027618e-06, "loss": 0.6182, "step": 9602 }, { "epoch": 0.39867357900144784, "grad_norm": 2.7905077934265137, "learning_rate": 6.839572544263422e-06, "loss": 0.4195, "step": 9603 }, { "epoch": 0.3987150945256592, "grad_norm": 2.8160688877105713, "learning_rate": 6.838947369239224e-06, "loss": 0.556, "step": 9604 }, { "epoch": 0.3987566100498705, "grad_norm": 2.405109167098999, "learning_rate": 6.838322160966328e-06, "loss": 0.4697, "step": 9605 }, { "epoch": 0.39879812557408184, "grad_norm": 2.120548725128174, "learning_rate": 6.837696919456038e-06, "loss": 0.3924, "step": 9606 }, { "epoch": 0.3988396410982932, "grad_norm": 2.074252128601074, "learning_rate": 6.837071644719657e-06, "loss": 0.4133, "step": 9607 }, { "epoch": 0.3988811566225045, "grad_norm": 2.530216693878174, "learning_rate": 6.836446336768491e-06, "loss": 0.5555, "step": 9608 }, { "epoch": 0.39892267214671584, "grad_norm": 2.2015419006347656, "learning_rate": 6.835820995613846e-06, "loss": 0.5241, "step": 9609 }, { "epoch": 0.3989641876709272, "grad_norm": 2.9008562564849854, "learning_rate": 6.835195621267029e-06, "loss": 0.5026, "step": 9610 }, { "epoch": 0.3990057031951385, "grad_norm": 2.211496114730835, "learning_rate": 6.834570213739346e-06, "loss": 0.5036, "step": 9611 }, { "epoch": 0.3990472187193499, "grad_norm": 2.486409902572632, "learning_rate": 6.833944773042104e-06, "loss": 0.3856, "step": 9612 }, { "epoch": 0.3990887342435612, "grad_norm": 2.832418918609619, "learning_rate": 6.8333192991866125e-06, "loss": 0.5394, "step": 9613 }, { "epoch": 0.39913024976777256, "grad_norm": 2.3098697662353516, "learning_rate": 6.832693792184181e-06, "loss": 0.6498, "step": 9614 }, { "epoch": 0.3991717652919839, "grad_norm": 2.7256875038146973, "learning_rate": 6.832068252046116e-06, "loss": 0.4327, "step": 9615 }, { "epoch": 0.3992132808161952, "grad_norm": 2.4865148067474365, "learning_rate": 6.831442678783729e-06, "loss": 0.5662, "step": 9616 }, { "epoch": 0.39925479634040656, "grad_norm": 2.0987184047698975, "learning_rate": 6.830817072408329e-06, "loss": 0.4887, "step": 9617 }, { "epoch": 0.3992963118646179, "grad_norm": 2.418596029281616, "learning_rate": 6.830191432931231e-06, "loss": 0.4697, "step": 9618 }, { "epoch": 0.3993378273888292, "grad_norm": 2.918854236602783, "learning_rate": 6.829565760363743e-06, "loss": 0.4605, "step": 9619 }, { "epoch": 0.39937934291304056, "grad_norm": 2.0511748790740967, "learning_rate": 6.82894005471718e-06, "loss": 0.4725, "step": 9620 }, { "epoch": 0.3994208584372519, "grad_norm": 2.1466658115386963, "learning_rate": 6.828314316002852e-06, "loss": 0.4086, "step": 9621 }, { "epoch": 0.3994623739614632, "grad_norm": 2.2802209854125977, "learning_rate": 6.827688544232076e-06, "loss": 0.405, "step": 9622 }, { "epoch": 0.39950388948567456, "grad_norm": 2.5460970401763916, "learning_rate": 6.827062739416162e-06, "loss": 0.5596, "step": 9623 }, { "epoch": 0.3995454050098859, "grad_norm": 2.1461751461029053, "learning_rate": 6.82643690156643e-06, "loss": 0.4945, "step": 9624 }, { "epoch": 0.3995869205340972, "grad_norm": 2.135716676712036, "learning_rate": 6.82581103069419e-06, "loss": 0.595, "step": 9625 }, { "epoch": 0.39962843605830856, "grad_norm": 2.351848602294922, "learning_rate": 6.825185126810761e-06, "loss": 0.4623, "step": 9626 }, { "epoch": 0.3996699515825199, "grad_norm": 2.918213129043579, "learning_rate": 6.8245591899274595e-06, "loss": 0.6452, "step": 9627 }, { "epoch": 0.3997114671067312, "grad_norm": 2.771763801574707, "learning_rate": 6.823933220055602e-06, "loss": 0.6125, "step": 9628 }, { "epoch": 0.39975298263094255, "grad_norm": 2.651299476623535, "learning_rate": 6.823307217206505e-06, "loss": 0.5821, "step": 9629 }, { "epoch": 0.3997944981551539, "grad_norm": 2.408693313598633, "learning_rate": 6.822681181391489e-06, "loss": 0.5742, "step": 9630 }, { "epoch": 0.3998360136793652, "grad_norm": 2.402003765106201, "learning_rate": 6.822055112621871e-06, "loss": 0.4036, "step": 9631 }, { "epoch": 0.39987752920357655, "grad_norm": 2.3334264755249023, "learning_rate": 6.821429010908972e-06, "loss": 0.5858, "step": 9632 }, { "epoch": 0.3999190447277879, "grad_norm": 2.0657172203063965, "learning_rate": 6.820802876264112e-06, "loss": 0.4527, "step": 9633 }, { "epoch": 0.3999605602519992, "grad_norm": 2.7746853828430176, "learning_rate": 6.82017670869861e-06, "loss": 0.5528, "step": 9634 }, { "epoch": 0.40000207577621055, "grad_norm": 2.467815399169922, "learning_rate": 6.819550508223789e-06, "loss": 0.48, "step": 9635 }, { "epoch": 0.4000435913004219, "grad_norm": 3.1077845096588135, "learning_rate": 6.818924274850971e-06, "loss": 0.4806, "step": 9636 }, { "epoch": 0.4000851068246332, "grad_norm": 2.417792320251465, "learning_rate": 6.818298008591477e-06, "loss": 0.5205, "step": 9637 }, { "epoch": 0.40012662234884455, "grad_norm": 3.0180881023406982, "learning_rate": 6.81767170945663e-06, "loss": 0.5778, "step": 9638 }, { "epoch": 0.4001681378730559, "grad_norm": 2.310849189758301, "learning_rate": 6.817045377457757e-06, "loss": 0.5885, "step": 9639 }, { "epoch": 0.4002096533972672, "grad_norm": 2.518219232559204, "learning_rate": 6.816419012606179e-06, "loss": 0.61, "step": 9640 }, { "epoch": 0.40025116892147855, "grad_norm": 2.394594430923462, "learning_rate": 6.815792614913221e-06, "loss": 0.6356, "step": 9641 }, { "epoch": 0.4002926844456899, "grad_norm": 2.5711605548858643, "learning_rate": 6.815166184390209e-06, "loss": 0.4838, "step": 9642 }, { "epoch": 0.40033419996990127, "grad_norm": 2.7143568992614746, "learning_rate": 6.81453972104847e-06, "loss": 0.5334, "step": 9643 }, { "epoch": 0.4003757154941126, "grad_norm": 2.2424635887145996, "learning_rate": 6.81391322489933e-06, "loss": 0.3887, "step": 9644 }, { "epoch": 0.40041723101832394, "grad_norm": 2.5458478927612305, "learning_rate": 6.813286695954114e-06, "loss": 0.4068, "step": 9645 }, { "epoch": 0.40045874654253527, "grad_norm": 2.5264275074005127, "learning_rate": 6.812660134224155e-06, "loss": 0.7019, "step": 9646 }, { "epoch": 0.4005002620667466, "grad_norm": 2.8072690963745117, "learning_rate": 6.812033539720776e-06, "loss": 0.5832, "step": 9647 }, { "epoch": 0.40054177759095794, "grad_norm": 2.345245122909546, "learning_rate": 6.811406912455309e-06, "loss": 0.5544, "step": 9648 }, { "epoch": 0.40058329311516927, "grad_norm": 2.1583385467529297, "learning_rate": 6.810780252439083e-06, "loss": 0.5104, "step": 9649 }, { "epoch": 0.4006248086393806, "grad_norm": 2.1401193141937256, "learning_rate": 6.810153559683428e-06, "loss": 0.461, "step": 9650 }, { "epoch": 0.40066632416359194, "grad_norm": 2.298145294189453, "learning_rate": 6.809526834199675e-06, "loss": 0.4467, "step": 9651 }, { "epoch": 0.40070783968780327, "grad_norm": 2.3724968433380127, "learning_rate": 6.8089000759991555e-06, "loss": 0.4266, "step": 9652 }, { "epoch": 0.4007493552120146, "grad_norm": 2.281390428543091, "learning_rate": 6.808273285093201e-06, "loss": 0.3883, "step": 9653 }, { "epoch": 0.40079087073622593, "grad_norm": 2.618286371231079, "learning_rate": 6.807646461493145e-06, "loss": 0.4362, "step": 9654 }, { "epoch": 0.40083238626043727, "grad_norm": 3.0283780097961426, "learning_rate": 6.807019605210319e-06, "loss": 0.3782, "step": 9655 }, { "epoch": 0.4008739017846486, "grad_norm": 2.4371347427368164, "learning_rate": 6.806392716256057e-06, "loss": 0.4438, "step": 9656 }, { "epoch": 0.40091541730885993, "grad_norm": 2.4705097675323486, "learning_rate": 6.805765794641696e-06, "loss": 0.6633, "step": 9657 }, { "epoch": 0.40095693283307127, "grad_norm": 2.4434690475463867, "learning_rate": 6.805138840378567e-06, "loss": 0.38, "step": 9658 }, { "epoch": 0.4009984483572826, "grad_norm": 2.242445707321167, "learning_rate": 6.80451185347801e-06, "loss": 0.4871, "step": 9659 }, { "epoch": 0.40103996388149393, "grad_norm": 2.1894168853759766, "learning_rate": 6.803884833951355e-06, "loss": 0.5712, "step": 9660 }, { "epoch": 0.40108147940570527, "grad_norm": 2.282061815261841, "learning_rate": 6.803257781809945e-06, "loss": 0.5412, "step": 9661 }, { "epoch": 0.4011229949299166, "grad_norm": 2.3269410133361816, "learning_rate": 6.802630697065114e-06, "loss": 0.4671, "step": 9662 }, { "epoch": 0.40116451045412793, "grad_norm": 2.413403272628784, "learning_rate": 6.8020035797282e-06, "loss": 0.4386, "step": 9663 }, { "epoch": 0.40120602597833926, "grad_norm": 2.6270241737365723, "learning_rate": 6.8013764298105425e-06, "loss": 0.4497, "step": 9664 }, { "epoch": 0.4012475415025506, "grad_norm": 2.931623935699463, "learning_rate": 6.800749247323479e-06, "loss": 0.4465, "step": 9665 }, { "epoch": 0.40128905702676193, "grad_norm": 3.2946887016296387, "learning_rate": 6.800122032278351e-06, "loss": 0.4759, "step": 9666 }, { "epoch": 0.40133057255097326, "grad_norm": 2.6604645252227783, "learning_rate": 6.799494784686498e-06, "loss": 0.7208, "step": 9667 }, { "epoch": 0.4013720880751846, "grad_norm": 2.8174617290496826, "learning_rate": 6.79886750455926e-06, "loss": 0.4487, "step": 9668 }, { "epoch": 0.40141360359939593, "grad_norm": 2.7662224769592285, "learning_rate": 6.798240191907979e-06, "loss": 0.4729, "step": 9669 }, { "epoch": 0.40145511912360726, "grad_norm": 2.8243160247802734, "learning_rate": 6.7976128467439975e-06, "loss": 0.5907, "step": 9670 }, { "epoch": 0.4014966346478186, "grad_norm": 2.499844551086426, "learning_rate": 6.796985469078656e-06, "loss": 0.3824, "step": 9671 }, { "epoch": 0.40153815017202993, "grad_norm": 2.0099172592163086, "learning_rate": 6.7963580589233014e-06, "loss": 0.4909, "step": 9672 }, { "epoch": 0.40157966569624126, "grad_norm": 2.743579626083374, "learning_rate": 6.795730616289274e-06, "loss": 0.5447, "step": 9673 }, { "epoch": 0.40162118122045265, "grad_norm": 2.0713202953338623, "learning_rate": 6.79510314118792e-06, "loss": 0.4577, "step": 9674 }, { "epoch": 0.401662696744664, "grad_norm": 3.158703327178955, "learning_rate": 6.794475633630584e-06, "loss": 0.6222, "step": 9675 }, { "epoch": 0.4017042122688753, "grad_norm": 2.461500406265259, "learning_rate": 6.793848093628611e-06, "loss": 0.4776, "step": 9676 }, { "epoch": 0.40174572779308665, "grad_norm": 2.4277329444885254, "learning_rate": 6.793220521193347e-06, "loss": 0.646, "step": 9677 }, { "epoch": 0.401787243317298, "grad_norm": 1.8484807014465332, "learning_rate": 6.79259291633614e-06, "loss": 0.4671, "step": 9678 }, { "epoch": 0.4018287588415093, "grad_norm": 2.432816743850708, "learning_rate": 6.791965279068336e-06, "loss": 0.6363, "step": 9679 }, { "epoch": 0.40187027436572065, "grad_norm": 3.3024377822875977, "learning_rate": 6.791337609401283e-06, "loss": 0.7326, "step": 9680 }, { "epoch": 0.401911789889932, "grad_norm": 2.357762575149536, "learning_rate": 6.79070990734633e-06, "loss": 0.484, "step": 9681 }, { "epoch": 0.4019533054141433, "grad_norm": 2.383167028427124, "learning_rate": 6.790082172914826e-06, "loss": 0.5527, "step": 9682 }, { "epoch": 0.40199482093835465, "grad_norm": 2.4763424396514893, "learning_rate": 6.78945440611812e-06, "loss": 0.5679, "step": 9683 }, { "epoch": 0.402036336462566, "grad_norm": 2.2559828758239746, "learning_rate": 6.788826606967563e-06, "loss": 0.4895, "step": 9684 }, { "epoch": 0.4020778519867773, "grad_norm": 2.2479450702667236, "learning_rate": 6.788198775474504e-06, "loss": 0.488, "step": 9685 }, { "epoch": 0.40211936751098865, "grad_norm": 2.667715072631836, "learning_rate": 6.787570911650296e-06, "loss": 0.6434, "step": 9686 }, { "epoch": 0.4021608830352, "grad_norm": 2.3019824028015137, "learning_rate": 6.786943015506292e-06, "loss": 0.6414, "step": 9687 }, { "epoch": 0.4022023985594113, "grad_norm": 2.402233839035034, "learning_rate": 6.786315087053843e-06, "loss": 0.5225, "step": 9688 }, { "epoch": 0.40224391408362264, "grad_norm": 2.69838285446167, "learning_rate": 6.785687126304301e-06, "loss": 0.5374, "step": 9689 }, { "epoch": 0.402285429607834, "grad_norm": 2.661057710647583, "learning_rate": 6.785059133269022e-06, "loss": 0.5456, "step": 9690 }, { "epoch": 0.4023269451320453, "grad_norm": 2.9188742637634277, "learning_rate": 6.78443110795936e-06, "loss": 0.5814, "step": 9691 }, { "epoch": 0.40236846065625664, "grad_norm": 1.9135923385620117, "learning_rate": 6.783803050386669e-06, "loss": 0.4018, "step": 9692 }, { "epoch": 0.402409976180468, "grad_norm": 2.7249999046325684, "learning_rate": 6.783174960562304e-06, "loss": 0.5812, "step": 9693 }, { "epoch": 0.4024514917046793, "grad_norm": 2.5693774223327637, "learning_rate": 6.782546838497622e-06, "loss": 0.4712, "step": 9694 }, { "epoch": 0.40249300722889064, "grad_norm": 2.586291551589966, "learning_rate": 6.781918684203979e-06, "loss": 0.5365, "step": 9695 }, { "epoch": 0.402534522753102, "grad_norm": 2.485112190246582, "learning_rate": 6.781290497692734e-06, "loss": 0.4659, "step": 9696 }, { "epoch": 0.4025760382773133, "grad_norm": 2.8843109607696533, "learning_rate": 6.780662278975241e-06, "loss": 0.5277, "step": 9697 }, { "epoch": 0.40261755380152464, "grad_norm": 2.8514108657836914, "learning_rate": 6.780034028062863e-06, "loss": 0.567, "step": 9698 }, { "epoch": 0.402659069325736, "grad_norm": 2.308992862701416, "learning_rate": 6.7794057449669545e-06, "loss": 0.4486, "step": 9699 }, { "epoch": 0.4027005848499473, "grad_norm": 2.534900188446045, "learning_rate": 6.77877742969888e-06, "loss": 0.4229, "step": 9700 }, { "epoch": 0.40274210037415864, "grad_norm": 1.91641104221344, "learning_rate": 6.778149082269995e-06, "loss": 0.3384, "step": 9701 }, { "epoch": 0.40278361589837, "grad_norm": 2.198425769805908, "learning_rate": 6.777520702691662e-06, "loss": 0.4556, "step": 9702 }, { "epoch": 0.4028251314225813, "grad_norm": 2.3728749752044678, "learning_rate": 6.776892290975243e-06, "loss": 0.5431, "step": 9703 }, { "epoch": 0.40286664694679264, "grad_norm": 3.0025501251220703, "learning_rate": 6.7762638471320995e-06, "loss": 0.4804, "step": 9704 }, { "epoch": 0.402908162471004, "grad_norm": 2.216154098510742, "learning_rate": 6.775635371173595e-06, "loss": 0.5138, "step": 9705 }, { "epoch": 0.40294967799521536, "grad_norm": 2.234513521194458, "learning_rate": 6.775006863111088e-06, "loss": 0.4443, "step": 9706 }, { "epoch": 0.4029911935194267, "grad_norm": 2.40496563911438, "learning_rate": 6.774378322955947e-06, "loss": 0.5425, "step": 9707 }, { "epoch": 0.403032709043638, "grad_norm": 2.3474678993225098, "learning_rate": 6.773749750719534e-06, "loss": 0.5029, "step": 9708 }, { "epoch": 0.40307422456784936, "grad_norm": 2.355172634124756, "learning_rate": 6.7731211464132155e-06, "loss": 0.4728, "step": 9709 }, { "epoch": 0.4031157400920607, "grad_norm": 2.3489742279052734, "learning_rate": 6.772492510048353e-06, "loss": 0.5052, "step": 9710 }, { "epoch": 0.403157255616272, "grad_norm": 2.0972719192504883, "learning_rate": 6.771863841636317e-06, "loss": 0.4481, "step": 9711 }, { "epoch": 0.40319877114048336, "grad_norm": 2.523280620574951, "learning_rate": 6.7712351411884716e-06, "loss": 0.464, "step": 9712 }, { "epoch": 0.4032402866646947, "grad_norm": 2.0359342098236084, "learning_rate": 6.7706064087161845e-06, "loss": 0.5511, "step": 9713 }, { "epoch": 0.403281802188906, "grad_norm": 2.3887436389923096, "learning_rate": 6.769977644230823e-06, "loss": 0.5676, "step": 9714 }, { "epoch": 0.40332331771311736, "grad_norm": 2.4931507110595703, "learning_rate": 6.769348847743756e-06, "loss": 0.5048, "step": 9715 }, { "epoch": 0.4033648332373287, "grad_norm": 2.144859552383423, "learning_rate": 6.76872001926635e-06, "loss": 0.4103, "step": 9716 }, { "epoch": 0.40340634876154, "grad_norm": 2.6707613468170166, "learning_rate": 6.7680911588099794e-06, "loss": 0.4345, "step": 9717 }, { "epoch": 0.40344786428575136, "grad_norm": 2.1575613021850586, "learning_rate": 6.767462266386009e-06, "loss": 0.4477, "step": 9718 }, { "epoch": 0.4034893798099627, "grad_norm": 2.436965227127075, "learning_rate": 6.766833342005812e-06, "loss": 0.5867, "step": 9719 }, { "epoch": 0.403530895334174, "grad_norm": 2.413254976272583, "learning_rate": 6.766204385680759e-06, "loss": 0.5485, "step": 9720 }, { "epoch": 0.40357241085838536, "grad_norm": 2.556466817855835, "learning_rate": 6.76557539742222e-06, "loss": 0.5533, "step": 9721 }, { "epoch": 0.4036139263825967, "grad_norm": 2.1662073135375977, "learning_rate": 6.7649463772415715e-06, "loss": 0.562, "step": 9722 }, { "epoch": 0.403655441906808, "grad_norm": 1.9810400009155273, "learning_rate": 6.764317325150183e-06, "loss": 0.3788, "step": 9723 }, { "epoch": 0.40369695743101935, "grad_norm": 2.727456569671631, "learning_rate": 6.763688241159427e-06, "loss": 0.5456, "step": 9724 }, { "epoch": 0.4037384729552307, "grad_norm": 2.2515170574188232, "learning_rate": 6.763059125280681e-06, "loss": 0.5688, "step": 9725 }, { "epoch": 0.403779988479442, "grad_norm": 2.7691802978515625, "learning_rate": 6.762429977525318e-06, "loss": 0.5893, "step": 9726 }, { "epoch": 0.40382150400365335, "grad_norm": 2.2412006855010986, "learning_rate": 6.761800797904712e-06, "loss": 0.5281, "step": 9727 }, { "epoch": 0.4038630195278647, "grad_norm": 2.4347331523895264, "learning_rate": 6.76117158643024e-06, "loss": 0.5916, "step": 9728 }, { "epoch": 0.403904535052076, "grad_norm": 2.33379864692688, "learning_rate": 6.760542343113278e-06, "loss": 0.548, "step": 9729 }, { "epoch": 0.40394605057628735, "grad_norm": 2.4606080055236816, "learning_rate": 6.759913067965203e-06, "loss": 0.5724, "step": 9730 }, { "epoch": 0.4039875661004987, "grad_norm": 2.8559272289276123, "learning_rate": 6.7592837609973925e-06, "loss": 0.5495, "step": 9731 }, { "epoch": 0.40402908162471, "grad_norm": 2.136962413787842, "learning_rate": 6.758654422221225e-06, "loss": 0.5188, "step": 9732 }, { "epoch": 0.40407059714892135, "grad_norm": 2.0877811908721924, "learning_rate": 6.758025051648078e-06, "loss": 0.4527, "step": 9733 }, { "epoch": 0.4041121126731327, "grad_norm": 2.682224750518799, "learning_rate": 6.7573956492893326e-06, "loss": 0.6016, "step": 9734 }, { "epoch": 0.404153628197344, "grad_norm": 2.684135913848877, "learning_rate": 6.756766215156366e-06, "loss": 0.446, "step": 9735 }, { "epoch": 0.4041951437215554, "grad_norm": 2.424657106399536, "learning_rate": 6.75613674926056e-06, "loss": 0.6214, "step": 9736 }, { "epoch": 0.40423665924576674, "grad_norm": 2.1086602210998535, "learning_rate": 6.755507251613296e-06, "loss": 0.5079, "step": 9737 }, { "epoch": 0.40427817476997807, "grad_norm": 2.5279605388641357, "learning_rate": 6.754877722225954e-06, "loss": 0.5571, "step": 9738 }, { "epoch": 0.4043196902941894, "grad_norm": 2.6862380504608154, "learning_rate": 6.754248161109919e-06, "loss": 0.5978, "step": 9739 }, { "epoch": 0.40436120581840074, "grad_norm": 2.130922317504883, "learning_rate": 6.7536185682765686e-06, "loss": 0.5596, "step": 9740 }, { "epoch": 0.40440272134261207, "grad_norm": 2.5066797733306885, "learning_rate": 6.752988943737291e-06, "loss": 0.6255, "step": 9741 }, { "epoch": 0.4044442368668234, "grad_norm": 2.4290993213653564, "learning_rate": 6.752359287503467e-06, "loss": 0.4966, "step": 9742 }, { "epoch": 0.40448575239103474, "grad_norm": 2.6246492862701416, "learning_rate": 6.751729599586484e-06, "loss": 0.5004, "step": 9743 }, { "epoch": 0.40452726791524607, "grad_norm": 2.586034059524536, "learning_rate": 6.751099879997723e-06, "loss": 0.5434, "step": 9744 }, { "epoch": 0.4045687834394574, "grad_norm": 2.382686138153076, "learning_rate": 6.750470128748573e-06, "loss": 0.5771, "step": 9745 }, { "epoch": 0.40461029896366874, "grad_norm": 2.525892734527588, "learning_rate": 6.749840345850418e-06, "loss": 0.4921, "step": 9746 }, { "epoch": 0.40465181448788007, "grad_norm": 2.8160595893859863, "learning_rate": 6.749210531314644e-06, "loss": 0.5115, "step": 9747 }, { "epoch": 0.4046933300120914, "grad_norm": 2.528444290161133, "learning_rate": 6.748580685152641e-06, "loss": 0.6386, "step": 9748 }, { "epoch": 0.40473484553630273, "grad_norm": 2.114978313446045, "learning_rate": 6.7479508073757925e-06, "loss": 0.4862, "step": 9749 }, { "epoch": 0.40477636106051407, "grad_norm": 2.4808709621429443, "learning_rate": 6.747320897995493e-06, "loss": 0.5154, "step": 9750 }, { "epoch": 0.4048178765847254, "grad_norm": 2.6208770275115967, "learning_rate": 6.746690957023125e-06, "loss": 0.4619, "step": 9751 }, { "epoch": 0.40485939210893673, "grad_norm": 2.4475479125976562, "learning_rate": 6.746060984470084e-06, "loss": 0.5481, "step": 9752 }, { "epoch": 0.40490090763314807, "grad_norm": 2.151231288909912, "learning_rate": 6.7454309803477544e-06, "loss": 0.5915, "step": 9753 }, { "epoch": 0.4049424231573594, "grad_norm": 3.071721315383911, "learning_rate": 6.744800944667531e-06, "loss": 0.4928, "step": 9754 }, { "epoch": 0.40498393868157073, "grad_norm": 3.7319884300231934, "learning_rate": 6.744170877440804e-06, "loss": 0.5846, "step": 9755 }, { "epoch": 0.40502545420578207, "grad_norm": 2.1106772422790527, "learning_rate": 6.743540778678964e-06, "loss": 0.4913, "step": 9756 }, { "epoch": 0.4050669697299934, "grad_norm": 2.4395744800567627, "learning_rate": 6.742910648393406e-06, "loss": 0.5251, "step": 9757 }, { "epoch": 0.40510848525420473, "grad_norm": 2.0606613159179688, "learning_rate": 6.74228048659552e-06, "loss": 0.4869, "step": 9758 }, { "epoch": 0.40515000077841606, "grad_norm": 2.735442638397217, "learning_rate": 6.7416502932967e-06, "loss": 0.5321, "step": 9759 }, { "epoch": 0.4051915163026274, "grad_norm": 2.3555684089660645, "learning_rate": 6.741020068508339e-06, "loss": 0.4223, "step": 9760 }, { "epoch": 0.40523303182683873, "grad_norm": 2.1819658279418945, "learning_rate": 6.740389812241836e-06, "loss": 0.4818, "step": 9761 }, { "epoch": 0.40527454735105006, "grad_norm": 2.5081489086151123, "learning_rate": 6.739759524508583e-06, "loss": 0.625, "step": 9762 }, { "epoch": 0.4053160628752614, "grad_norm": 2.3979926109313965, "learning_rate": 6.739129205319976e-06, "loss": 0.5181, "step": 9763 }, { "epoch": 0.40535757839947273, "grad_norm": 2.5715014934539795, "learning_rate": 6.738498854687412e-06, "loss": 0.6583, "step": 9764 }, { "epoch": 0.40539909392368406, "grad_norm": 2.743128776550293, "learning_rate": 6.7378684726222875e-06, "loss": 0.5191, "step": 9765 }, { "epoch": 0.4054406094478954, "grad_norm": 2.908026933670044, "learning_rate": 6.737238059136e-06, "loss": 0.5572, "step": 9766 }, { "epoch": 0.4054821249721068, "grad_norm": 2.1347579956054688, "learning_rate": 6.736607614239947e-06, "loss": 0.4792, "step": 9767 }, { "epoch": 0.4055236404963181, "grad_norm": 2.8221988677978516, "learning_rate": 6.735977137945529e-06, "loss": 0.4516, "step": 9768 }, { "epoch": 0.40556515602052945, "grad_norm": 2.5475881099700928, "learning_rate": 6.7353466302641435e-06, "loss": 0.5184, "step": 9769 }, { "epoch": 0.4056066715447408, "grad_norm": 2.4018304347991943, "learning_rate": 6.734716091207192e-06, "loss": 0.4307, "step": 9770 }, { "epoch": 0.4056481870689521, "grad_norm": 2.369762897491455, "learning_rate": 6.7340855207860724e-06, "loss": 0.6514, "step": 9771 }, { "epoch": 0.40568970259316345, "grad_norm": 2.423645257949829, "learning_rate": 6.733454919012186e-06, "loss": 0.4634, "step": 9772 }, { "epoch": 0.4057312181173748, "grad_norm": 2.859889268875122, "learning_rate": 6.732824285896935e-06, "loss": 0.4599, "step": 9773 }, { "epoch": 0.4057727336415861, "grad_norm": 2.384158134460449, "learning_rate": 6.732193621451724e-06, "loss": 0.5426, "step": 9774 }, { "epoch": 0.40581424916579745, "grad_norm": 2.1833181381225586, "learning_rate": 6.73156292568795e-06, "loss": 0.4082, "step": 9775 }, { "epoch": 0.4058557646900088, "grad_norm": 2.166672945022583, "learning_rate": 6.730932198617021e-06, "loss": 0.4443, "step": 9776 }, { "epoch": 0.4058972802142201, "grad_norm": 2.438575029373169, "learning_rate": 6.730301440250337e-06, "loss": 0.5538, "step": 9777 }, { "epoch": 0.40593879573843145, "grad_norm": 2.374037504196167, "learning_rate": 6.729670650599307e-06, "loss": 0.6526, "step": 9778 }, { "epoch": 0.4059803112626428, "grad_norm": 2.892230749130249, "learning_rate": 6.7290398296753316e-06, "loss": 0.4978, "step": 9779 }, { "epoch": 0.4060218267868541, "grad_norm": 2.490089178085327, "learning_rate": 6.728408977489818e-06, "loss": 0.5802, "step": 9780 }, { "epoch": 0.40606334231106544, "grad_norm": 2.4710917472839355, "learning_rate": 6.727778094054172e-06, "loss": 0.4079, "step": 9781 }, { "epoch": 0.4061048578352768, "grad_norm": 2.1302499771118164, "learning_rate": 6.727147179379799e-06, "loss": 0.5144, "step": 9782 }, { "epoch": 0.4061463733594881, "grad_norm": 2.5852372646331787, "learning_rate": 6.726516233478108e-06, "loss": 0.4821, "step": 9783 }, { "epoch": 0.40618788888369944, "grad_norm": 2.794818878173828, "learning_rate": 6.725885256360507e-06, "loss": 0.6159, "step": 9784 }, { "epoch": 0.4062294044079108, "grad_norm": 2.2916343212127686, "learning_rate": 6.725254248038402e-06, "loss": 0.372, "step": 9785 }, { "epoch": 0.4062709199321221, "grad_norm": 13.024474143981934, "learning_rate": 6.724623208523202e-06, "loss": 0.7134, "step": 9786 }, { "epoch": 0.40631243545633344, "grad_norm": 2.378894329071045, "learning_rate": 6.723992137826319e-06, "loss": 0.6094, "step": 9787 }, { "epoch": 0.4063539509805448, "grad_norm": 2.564059019088745, "learning_rate": 6.72336103595916e-06, "loss": 0.4063, "step": 9788 }, { "epoch": 0.4063954665047561, "grad_norm": 2.0357038974761963, "learning_rate": 6.7227299029331375e-06, "loss": 0.4811, "step": 9789 }, { "epoch": 0.40643698202896744, "grad_norm": 2.3584377765655518, "learning_rate": 6.7220987387596605e-06, "loss": 0.56, "step": 9790 }, { "epoch": 0.4064784975531788, "grad_norm": 2.117502212524414, "learning_rate": 6.721467543450144e-06, "loss": 0.4612, "step": 9791 }, { "epoch": 0.4065200130773901, "grad_norm": 2.679394006729126, "learning_rate": 6.720836317015997e-06, "loss": 0.6491, "step": 9792 }, { "epoch": 0.40656152860160144, "grad_norm": 2.4939422607421875, "learning_rate": 6.720205059468633e-06, "loss": 0.4746, "step": 9793 }, { "epoch": 0.4066030441258128, "grad_norm": 2.359121322631836, "learning_rate": 6.719573770819467e-06, "loss": 0.4985, "step": 9794 }, { "epoch": 0.4066445596500241, "grad_norm": 2.47381591796875, "learning_rate": 6.718942451079911e-06, "loss": 0.4862, "step": 9795 }, { "epoch": 0.40668607517423544, "grad_norm": 2.5015904903411865, "learning_rate": 6.718311100261379e-06, "loss": 0.4169, "step": 9796 }, { "epoch": 0.40672759069844683, "grad_norm": 2.201931953430176, "learning_rate": 6.7176797183752885e-06, "loss": 0.6114, "step": 9797 }, { "epoch": 0.40676910622265816, "grad_norm": 2.9878530502319336, "learning_rate": 6.717048305433053e-06, "loss": 0.6791, "step": 9798 }, { "epoch": 0.4068106217468695, "grad_norm": 2.672304153442383, "learning_rate": 6.716416861446088e-06, "loss": 0.6437, "step": 9799 }, { "epoch": 0.4068521372710808, "grad_norm": 2.5775513648986816, "learning_rate": 6.715785386425813e-06, "loss": 0.4976, "step": 9800 }, { "epoch": 0.40689365279529216, "grad_norm": 2.4976413249969482, "learning_rate": 6.7151538803836425e-06, "loss": 0.4705, "step": 9801 }, { "epoch": 0.4069351683195035, "grad_norm": 2.4303083419799805, "learning_rate": 6.714522343330997e-06, "loss": 0.5371, "step": 9802 }, { "epoch": 0.4069766838437148, "grad_norm": 2.5075602531433105, "learning_rate": 6.7138907752792925e-06, "loss": 0.5128, "step": 9803 }, { "epoch": 0.40701819936792616, "grad_norm": 2.423771858215332, "learning_rate": 6.713259176239949e-06, "loss": 0.509, "step": 9804 }, { "epoch": 0.4070597148921375, "grad_norm": 2.7932825088500977, "learning_rate": 6.7126275462243855e-06, "loss": 0.4037, "step": 9805 }, { "epoch": 0.4071012304163488, "grad_norm": 2.342477798461914, "learning_rate": 6.711995885244022e-06, "loss": 0.4726, "step": 9806 }, { "epoch": 0.40714274594056016, "grad_norm": 2.394540786743164, "learning_rate": 6.7113641933102816e-06, "loss": 0.5235, "step": 9807 }, { "epoch": 0.4071842614647715, "grad_norm": 2.5705723762512207, "learning_rate": 6.7107324704345815e-06, "loss": 0.3905, "step": 9808 }, { "epoch": 0.4072257769889828, "grad_norm": 3.0492103099823, "learning_rate": 6.710100716628345e-06, "loss": 0.6433, "step": 9809 }, { "epoch": 0.40726729251319416, "grad_norm": 2.634199619293213, "learning_rate": 6.709468931902995e-06, "loss": 0.6835, "step": 9810 }, { "epoch": 0.4073088080374055, "grad_norm": 2.689906358718872, "learning_rate": 6.708837116269954e-06, "loss": 0.3899, "step": 9811 }, { "epoch": 0.4073503235616168, "grad_norm": 2.283694267272949, "learning_rate": 6.708205269740644e-06, "loss": 0.4347, "step": 9812 }, { "epoch": 0.40739183908582816, "grad_norm": 2.7915215492248535, "learning_rate": 6.707573392326493e-06, "loss": 0.3769, "step": 9813 }, { "epoch": 0.4074333546100395, "grad_norm": 2.6304891109466553, "learning_rate": 6.706941484038921e-06, "loss": 0.5171, "step": 9814 }, { "epoch": 0.4074748701342508, "grad_norm": 2.6991045475006104, "learning_rate": 6.706309544889355e-06, "loss": 0.4068, "step": 9815 }, { "epoch": 0.40751638565846215, "grad_norm": 2.2167696952819824, "learning_rate": 6.705677574889222e-06, "loss": 0.4938, "step": 9816 }, { "epoch": 0.4075579011826735, "grad_norm": 2.5587756633758545, "learning_rate": 6.705045574049947e-06, "loss": 0.3655, "step": 9817 }, { "epoch": 0.4075994167068848, "grad_norm": 2.1427178382873535, "learning_rate": 6.7044135423829545e-06, "loss": 0.4346, "step": 9818 }, { "epoch": 0.40764093223109615, "grad_norm": 2.873340129852295, "learning_rate": 6.7037814798996755e-06, "loss": 0.512, "step": 9819 }, { "epoch": 0.4076824477553075, "grad_norm": 2.5359206199645996, "learning_rate": 6.703149386611536e-06, "loss": 0.4783, "step": 9820 }, { "epoch": 0.4077239632795188, "grad_norm": 2.344444751739502, "learning_rate": 6.702517262529965e-06, "loss": 0.4961, "step": 9821 }, { "epoch": 0.40776547880373015, "grad_norm": 3.7184300422668457, "learning_rate": 6.70188510766639e-06, "loss": 0.4247, "step": 9822 }, { "epoch": 0.4078069943279415, "grad_norm": 2.7896816730499268, "learning_rate": 6.701252922032243e-06, "loss": 0.628, "step": 9823 }, { "epoch": 0.4078485098521528, "grad_norm": 2.402387857437134, "learning_rate": 6.7006207056389516e-06, "loss": 0.523, "step": 9824 }, { "epoch": 0.40789002537636415, "grad_norm": 2.3776767253875732, "learning_rate": 6.699988458497949e-06, "loss": 0.5559, "step": 9825 }, { "epoch": 0.4079315409005755, "grad_norm": 2.6183178424835205, "learning_rate": 6.699356180620664e-06, "loss": 0.4929, "step": 9826 }, { "epoch": 0.4079730564247868, "grad_norm": 2.0604264736175537, "learning_rate": 6.698723872018529e-06, "loss": 0.5474, "step": 9827 }, { "epoch": 0.4080145719489982, "grad_norm": 2.476313352584839, "learning_rate": 6.6980915327029785e-06, "loss": 0.5035, "step": 9828 }, { "epoch": 0.40805608747320954, "grad_norm": 2.538661479949951, "learning_rate": 6.6974591626854425e-06, "loss": 0.6, "step": 9829 }, { "epoch": 0.40809760299742087, "grad_norm": 2.4297611713409424, "learning_rate": 6.696826761977355e-06, "loss": 0.4079, "step": 9830 }, { "epoch": 0.4081391185216322, "grad_norm": 2.564112663269043, "learning_rate": 6.6961943305901515e-06, "loss": 0.4888, "step": 9831 }, { "epoch": 0.40818063404584354, "grad_norm": 2.354707717895508, "learning_rate": 6.6955618685352664e-06, "loss": 0.4315, "step": 9832 }, { "epoch": 0.40822214957005487, "grad_norm": 2.3785250186920166, "learning_rate": 6.694929375824133e-06, "loss": 0.4759, "step": 9833 }, { "epoch": 0.4082636650942662, "grad_norm": 2.171975612640381, "learning_rate": 6.694296852468189e-06, "loss": 0.6016, "step": 9834 }, { "epoch": 0.40830518061847754, "grad_norm": 2.0384418964385986, "learning_rate": 6.693664298478868e-06, "loss": 0.3204, "step": 9835 }, { "epoch": 0.40834669614268887, "grad_norm": 2.7744312286376953, "learning_rate": 6.693031713867609e-06, "loss": 0.4847, "step": 9836 }, { "epoch": 0.4083882116669002, "grad_norm": 2.2544240951538086, "learning_rate": 6.692399098645848e-06, "loss": 0.4423, "step": 9837 }, { "epoch": 0.40842972719111154, "grad_norm": 2.813234806060791, "learning_rate": 6.691766452825024e-06, "loss": 0.4567, "step": 9838 }, { "epoch": 0.40847124271532287, "grad_norm": 2.3750243186950684, "learning_rate": 6.691133776416575e-06, "loss": 0.668, "step": 9839 }, { "epoch": 0.4085127582395342, "grad_norm": 2.4410388469696045, "learning_rate": 6.6905010694319395e-06, "loss": 0.5306, "step": 9840 }, { "epoch": 0.40855427376374553, "grad_norm": 2.489150047302246, "learning_rate": 6.689868331882559e-06, "loss": 0.5752, "step": 9841 }, { "epoch": 0.40859578928795687, "grad_norm": 2.2877016067504883, "learning_rate": 6.689235563779869e-06, "loss": 0.4631, "step": 9842 }, { "epoch": 0.4086373048121682, "grad_norm": 2.0003161430358887, "learning_rate": 6.688602765135316e-06, "loss": 0.4356, "step": 9843 }, { "epoch": 0.40867882033637953, "grad_norm": 2.2403881549835205, "learning_rate": 6.687969935960336e-06, "loss": 0.5911, "step": 9844 }, { "epoch": 0.40872033586059087, "grad_norm": 2.576833724975586, "learning_rate": 6.687337076266375e-06, "loss": 0.5339, "step": 9845 }, { "epoch": 0.4087618513848022, "grad_norm": 2.1252474784851074, "learning_rate": 6.686704186064873e-06, "loss": 0.3424, "step": 9846 }, { "epoch": 0.40880336690901353, "grad_norm": 2.6463699340820312, "learning_rate": 6.686071265367273e-06, "loss": 0.461, "step": 9847 }, { "epoch": 0.40884488243322487, "grad_norm": 3.1803319454193115, "learning_rate": 6.685438314185018e-06, "loss": 0.5792, "step": 9848 }, { "epoch": 0.4088863979574362, "grad_norm": 2.5515313148498535, "learning_rate": 6.6848053325295525e-06, "loss": 0.5131, "step": 9849 }, { "epoch": 0.40892791348164753, "grad_norm": 2.3563599586486816, "learning_rate": 6.684172320412321e-06, "loss": 0.4991, "step": 9850 }, { "epoch": 0.40896942900585886, "grad_norm": 3.2179110050201416, "learning_rate": 6.6835392778447685e-06, "loss": 0.5335, "step": 9851 }, { "epoch": 0.4090109445300702, "grad_norm": 2.2941205501556396, "learning_rate": 6.682906204838341e-06, "loss": 0.4872, "step": 9852 }, { "epoch": 0.40905246005428153, "grad_norm": 2.4875245094299316, "learning_rate": 6.682273101404483e-06, "loss": 0.5181, "step": 9853 }, { "epoch": 0.40909397557849286, "grad_norm": 2.8476712703704834, "learning_rate": 6.681639967554644e-06, "loss": 0.496, "step": 9854 }, { "epoch": 0.4091354911027042, "grad_norm": 2.3476202487945557, "learning_rate": 6.681006803300268e-06, "loss": 0.5687, "step": 9855 }, { "epoch": 0.40917700662691553, "grad_norm": 2.5737321376800537, "learning_rate": 6.680373608652807e-06, "loss": 0.5447, "step": 9856 }, { "epoch": 0.40921852215112686, "grad_norm": 2.7995831966400146, "learning_rate": 6.679740383623705e-06, "loss": 0.6688, "step": 9857 }, { "epoch": 0.4092600376753382, "grad_norm": 2.7674193382263184, "learning_rate": 6.6791071282244135e-06, "loss": 0.5006, "step": 9858 }, { "epoch": 0.4093015531995496, "grad_norm": 2.102464437484741, "learning_rate": 6.678473842466381e-06, "loss": 0.4902, "step": 9859 }, { "epoch": 0.4093430687237609, "grad_norm": 2.8328380584716797, "learning_rate": 6.677840526361058e-06, "loss": 0.6336, "step": 9860 }, { "epoch": 0.40938458424797225, "grad_norm": 2.6427254676818848, "learning_rate": 6.677207179919893e-06, "loss": 0.3884, "step": 9861 }, { "epoch": 0.4094260997721836, "grad_norm": 2.325329542160034, "learning_rate": 6.67657380315434e-06, "loss": 0.5247, "step": 9862 }, { "epoch": 0.4094676152963949, "grad_norm": 2.744210720062256, "learning_rate": 6.67594039607585e-06, "loss": 0.5023, "step": 9863 }, { "epoch": 0.40950913082060625, "grad_norm": 1.8934143781661987, "learning_rate": 6.675306958695874e-06, "loss": 0.4648, "step": 9864 }, { "epoch": 0.4095506463448176, "grad_norm": 2.5657925605773926, "learning_rate": 6.6746734910258645e-06, "loss": 0.5439, "step": 9865 }, { "epoch": 0.4095921618690289, "grad_norm": 2.5482890605926514, "learning_rate": 6.674039993077276e-06, "loss": 0.4915, "step": 9866 }, { "epoch": 0.40963367739324025, "grad_norm": 2.268342971801758, "learning_rate": 6.673406464861563e-06, "loss": 0.4872, "step": 9867 }, { "epoch": 0.4096751929174516, "grad_norm": 2.684119462966919, "learning_rate": 6.672772906390177e-06, "loss": 0.4738, "step": 9868 }, { "epoch": 0.4097167084416629, "grad_norm": 2.557338237762451, "learning_rate": 6.6721393176745764e-06, "loss": 0.5109, "step": 9869 }, { "epoch": 0.40975822396587425, "grad_norm": 3.3826606273651123, "learning_rate": 6.6715056987262136e-06, "loss": 0.6377, "step": 9870 }, { "epoch": 0.4097997394900856, "grad_norm": 2.2636404037475586, "learning_rate": 6.670872049556547e-06, "loss": 0.3539, "step": 9871 }, { "epoch": 0.4098412550142969, "grad_norm": 2.064951181411743, "learning_rate": 6.670238370177033e-06, "loss": 0.4826, "step": 9872 }, { "epoch": 0.40988277053850825, "grad_norm": 2.358487844467163, "learning_rate": 6.669604660599127e-06, "loss": 0.4189, "step": 9873 }, { "epoch": 0.4099242860627196, "grad_norm": 2.470569372177124, "learning_rate": 6.6689709208342855e-06, "loss": 0.3461, "step": 9874 }, { "epoch": 0.4099658015869309, "grad_norm": 2.846961736679077, "learning_rate": 6.668337150893971e-06, "loss": 0.647, "step": 9875 }, { "epoch": 0.41000731711114224, "grad_norm": 2.681046724319458, "learning_rate": 6.667703350789639e-06, "loss": 0.3701, "step": 9876 }, { "epoch": 0.4100488326353536, "grad_norm": 2.944988965988159, "learning_rate": 6.66706952053275e-06, "loss": 0.4629, "step": 9877 }, { "epoch": 0.4100903481595649, "grad_norm": 2.77038836479187, "learning_rate": 6.666435660134763e-06, "loss": 0.4335, "step": 9878 }, { "epoch": 0.41013186368377624, "grad_norm": 2.606955051422119, "learning_rate": 6.665801769607139e-06, "loss": 0.3895, "step": 9879 }, { "epoch": 0.4101733792079876, "grad_norm": 2.7885823249816895, "learning_rate": 6.6651678489613405e-06, "loss": 0.5803, "step": 9880 }, { "epoch": 0.4102148947321989, "grad_norm": 2.555248498916626, "learning_rate": 6.664533898208826e-06, "loss": 0.5039, "step": 9881 }, { "epoch": 0.41025641025641024, "grad_norm": 2.7315800189971924, "learning_rate": 6.66389991736106e-06, "loss": 0.5724, "step": 9882 }, { "epoch": 0.4102979257806216, "grad_norm": 2.279162645339966, "learning_rate": 6.663265906429502e-06, "loss": 0.4748, "step": 9883 }, { "epoch": 0.4103394413048329, "grad_norm": 2.3403029441833496, "learning_rate": 6.662631865425618e-06, "loss": 0.5859, "step": 9884 }, { "epoch": 0.41038095682904424, "grad_norm": 2.599184513092041, "learning_rate": 6.661997794360872e-06, "loss": 0.5685, "step": 9885 }, { "epoch": 0.4104224723532556, "grad_norm": 2.346675157546997, "learning_rate": 6.661363693246725e-06, "loss": 0.4837, "step": 9886 }, { "epoch": 0.4104639878774669, "grad_norm": 2.673593759536743, "learning_rate": 6.660729562094644e-06, "loss": 0.4963, "step": 9887 }, { "epoch": 0.41050550340167824, "grad_norm": 3.6073954105377197, "learning_rate": 6.660095400916094e-06, "loss": 0.8414, "step": 9888 }, { "epoch": 0.4105470189258896, "grad_norm": 2.4416425228118896, "learning_rate": 6.659461209722541e-06, "loss": 0.5172, "step": 9889 }, { "epoch": 0.41058853445010096, "grad_norm": 2.2929022312164307, "learning_rate": 6.658826988525451e-06, "loss": 0.5763, "step": 9890 }, { "epoch": 0.4106300499743123, "grad_norm": 3.561358690261841, "learning_rate": 6.65819273733629e-06, "loss": 0.6172, "step": 9891 }, { "epoch": 0.4106715654985236, "grad_norm": 2.3477790355682373, "learning_rate": 6.657558456166526e-06, "loss": 0.488, "step": 9892 }, { "epoch": 0.41071308102273496, "grad_norm": 2.814464569091797, "learning_rate": 6.656924145027629e-06, "loss": 0.6187, "step": 9893 }, { "epoch": 0.4107545965469463, "grad_norm": 2.9773435592651367, "learning_rate": 6.656289803931064e-06, "loss": 0.464, "step": 9894 }, { "epoch": 0.4107961120711576, "grad_norm": 2.2221808433532715, "learning_rate": 6.6556554328883035e-06, "loss": 0.4513, "step": 9895 }, { "epoch": 0.41083762759536896, "grad_norm": 2.8524210453033447, "learning_rate": 6.655021031910816e-06, "loss": 0.511, "step": 9896 }, { "epoch": 0.4108791431195803, "grad_norm": 3.529203414916992, "learning_rate": 6.65438660101007e-06, "loss": 0.6341, "step": 9897 }, { "epoch": 0.4109206586437916, "grad_norm": 2.4800920486450195, "learning_rate": 6.653752140197538e-06, "loss": 0.5136, "step": 9898 }, { "epoch": 0.41096217416800296, "grad_norm": 2.6396901607513428, "learning_rate": 6.653117649484691e-06, "loss": 0.5979, "step": 9899 }, { "epoch": 0.4110036896922143, "grad_norm": 2.079219341278076, "learning_rate": 6.652483128883e-06, "loss": 0.5271, "step": 9900 }, { "epoch": 0.4110452052164256, "grad_norm": 2.6221842765808105, "learning_rate": 6.651848578403937e-06, "loss": 0.5904, "step": 9901 }, { "epoch": 0.41108672074063696, "grad_norm": 2.437251567840576, "learning_rate": 6.6512139980589765e-06, "loss": 0.377, "step": 9902 }, { "epoch": 0.4111282362648483, "grad_norm": 2.273176670074463, "learning_rate": 6.65057938785959e-06, "loss": 0.3491, "step": 9903 }, { "epoch": 0.4111697517890596, "grad_norm": 2.0004844665527344, "learning_rate": 6.649944747817253e-06, "loss": 0.4257, "step": 9904 }, { "epoch": 0.41121126731327096, "grad_norm": 3.094127893447876, "learning_rate": 6.64931007794344e-06, "loss": 0.4688, "step": 9905 }, { "epoch": 0.4112527828374823, "grad_norm": 1.9583489894866943, "learning_rate": 6.648675378249624e-06, "loss": 0.3752, "step": 9906 }, { "epoch": 0.4112942983616936, "grad_norm": 2.826140880584717, "learning_rate": 6.648040648747283e-06, "loss": 0.5897, "step": 9907 }, { "epoch": 0.41133581388590496, "grad_norm": 2.325146198272705, "learning_rate": 6.647405889447891e-06, "loss": 0.5118, "step": 9908 }, { "epoch": 0.4113773294101163, "grad_norm": 2.6681995391845703, "learning_rate": 6.646771100362928e-06, "loss": 0.546, "step": 9909 }, { "epoch": 0.4114188449343276, "grad_norm": 2.331786632537842, "learning_rate": 6.6461362815038664e-06, "loss": 0.4347, "step": 9910 }, { "epoch": 0.41146036045853895, "grad_norm": 2.9536519050598145, "learning_rate": 6.645501432882188e-06, "loss": 0.5355, "step": 9911 }, { "epoch": 0.4115018759827503, "grad_norm": 2.845973014831543, "learning_rate": 6.644866554509369e-06, "loss": 0.496, "step": 9912 }, { "epoch": 0.4115433915069616, "grad_norm": 2.116694211959839, "learning_rate": 6.644231646396888e-06, "loss": 0.5461, "step": 9913 }, { "epoch": 0.41158490703117295, "grad_norm": 3.6763856410980225, "learning_rate": 6.643596708556226e-06, "loss": 0.5662, "step": 9914 }, { "epoch": 0.4116264225553843, "grad_norm": 2.363490581512451, "learning_rate": 6.64296174099886e-06, "loss": 0.4695, "step": 9915 }, { "epoch": 0.4116679380795956, "grad_norm": 1.8591277599334717, "learning_rate": 6.642326743736274e-06, "loss": 0.5232, "step": 9916 }, { "epoch": 0.41170945360380695, "grad_norm": 2.890666961669922, "learning_rate": 6.641691716779946e-06, "loss": 0.6671, "step": 9917 }, { "epoch": 0.4117509691280183, "grad_norm": 2.524334669113159, "learning_rate": 6.641056660141359e-06, "loss": 0.5418, "step": 9918 }, { "epoch": 0.4117924846522296, "grad_norm": 2.3398048877716064, "learning_rate": 6.640421573831994e-06, "loss": 0.4859, "step": 9919 }, { "epoch": 0.41183400017644095, "grad_norm": 2.303206443786621, "learning_rate": 6.639786457863334e-06, "loss": 0.5626, "step": 9920 }, { "epoch": 0.41187551570065234, "grad_norm": 2.317753791809082, "learning_rate": 6.639151312246863e-06, "loss": 0.5238, "step": 9921 }, { "epoch": 0.4119170312248637, "grad_norm": 2.6224992275238037, "learning_rate": 6.6385161369940645e-06, "loss": 0.6216, "step": 9922 }, { "epoch": 0.411958546749075, "grad_norm": 3.050626277923584, "learning_rate": 6.637880932116421e-06, "loss": 0.5508, "step": 9923 }, { "epoch": 0.41200006227328634, "grad_norm": 2.7514522075653076, "learning_rate": 6.6372456976254184e-06, "loss": 0.5375, "step": 9924 }, { "epoch": 0.41204157779749767, "grad_norm": 2.5086166858673096, "learning_rate": 6.6366104335325425e-06, "loss": 0.4712, "step": 9925 }, { "epoch": 0.412083093321709, "grad_norm": 2.404703140258789, "learning_rate": 6.635975139849277e-06, "loss": 0.6653, "step": 9926 }, { "epoch": 0.41212460884592034, "grad_norm": 2.866212844848633, "learning_rate": 6.635339816587109e-06, "loss": 0.5331, "step": 9927 }, { "epoch": 0.41216612437013167, "grad_norm": 3.064413070678711, "learning_rate": 6.634704463757526e-06, "loss": 0.5888, "step": 9928 }, { "epoch": 0.412207639894343, "grad_norm": 2.4237468242645264, "learning_rate": 6.6340690813720166e-06, "loss": 0.4816, "step": 9929 }, { "epoch": 0.41224915541855434, "grad_norm": 2.1882567405700684, "learning_rate": 6.633433669442066e-06, "loss": 0.4407, "step": 9930 }, { "epoch": 0.41229067094276567, "grad_norm": 2.3517916202545166, "learning_rate": 6.632798227979165e-06, "loss": 0.603, "step": 9931 }, { "epoch": 0.412332186466977, "grad_norm": 2.4485762119293213, "learning_rate": 6.6321627569947985e-06, "loss": 0.598, "step": 9932 }, { "epoch": 0.41237370199118834, "grad_norm": 2.2056756019592285, "learning_rate": 6.631527256500461e-06, "loss": 0.663, "step": 9933 }, { "epoch": 0.41241521751539967, "grad_norm": 2.233863353729248, "learning_rate": 6.630891726507641e-06, "loss": 0.5995, "step": 9934 }, { "epoch": 0.412456733039611, "grad_norm": 2.9701879024505615, "learning_rate": 6.630256167027829e-06, "loss": 0.5153, "step": 9935 }, { "epoch": 0.41249824856382233, "grad_norm": 2.5267586708068848, "learning_rate": 6.6296205780725136e-06, "loss": 0.4899, "step": 9936 }, { "epoch": 0.41253976408803367, "grad_norm": 2.446715831756592, "learning_rate": 6.628984959653188e-06, "loss": 0.5999, "step": 9937 }, { "epoch": 0.412581279612245, "grad_norm": 2.1912662982940674, "learning_rate": 6.628349311781348e-06, "loss": 0.5165, "step": 9938 }, { "epoch": 0.41262279513645633, "grad_norm": 2.506410598754883, "learning_rate": 6.62771363446848e-06, "loss": 0.3476, "step": 9939 }, { "epoch": 0.41266431066066767, "grad_norm": 2.9414429664611816, "learning_rate": 6.627077927726082e-06, "loss": 0.6646, "step": 9940 }, { "epoch": 0.412705826184879, "grad_norm": 2.375211000442505, "learning_rate": 6.626442191565645e-06, "loss": 0.6312, "step": 9941 }, { "epoch": 0.41274734170909033, "grad_norm": 1.9617153406143188, "learning_rate": 6.625806425998666e-06, "loss": 0.4822, "step": 9942 }, { "epoch": 0.41278885723330166, "grad_norm": 2.275362730026245, "learning_rate": 6.625170631036637e-06, "loss": 0.5508, "step": 9943 }, { "epoch": 0.412830372757513, "grad_norm": 2.787733554840088, "learning_rate": 6.624534806691055e-06, "loss": 0.4689, "step": 9944 }, { "epoch": 0.41287188828172433, "grad_norm": 2.5629541873931885, "learning_rate": 6.623898952973414e-06, "loss": 0.6511, "step": 9945 }, { "epoch": 0.41291340380593566, "grad_norm": 2.576664686203003, "learning_rate": 6.623263069895214e-06, "loss": 0.6726, "step": 9946 }, { "epoch": 0.412954919330147, "grad_norm": 2.384160041809082, "learning_rate": 6.622627157467949e-06, "loss": 0.4567, "step": 9947 }, { "epoch": 0.41299643485435833, "grad_norm": 3.0796077251434326, "learning_rate": 6.621991215703117e-06, "loss": 0.5522, "step": 9948 }, { "epoch": 0.41303795037856966, "grad_norm": 2.290668487548828, "learning_rate": 6.6213552446122165e-06, "loss": 0.4298, "step": 9949 }, { "epoch": 0.413079465902781, "grad_norm": 2.3667736053466797, "learning_rate": 6.620719244206745e-06, "loss": 0.5156, "step": 9950 }, { "epoch": 0.41312098142699233, "grad_norm": 2.136589527130127, "learning_rate": 6.620083214498205e-06, "loss": 0.538, "step": 9951 }, { "epoch": 0.4131624969512037, "grad_norm": 2.968721628189087, "learning_rate": 6.619447155498091e-06, "loss": 0.7507, "step": 9952 }, { "epoch": 0.41320401247541505, "grad_norm": 2.458686590194702, "learning_rate": 6.618811067217907e-06, "loss": 0.5068, "step": 9953 }, { "epoch": 0.4132455279996264, "grad_norm": 2.6838088035583496, "learning_rate": 6.618174949669152e-06, "loss": 0.5096, "step": 9954 }, { "epoch": 0.4132870435238377, "grad_norm": 3.2331438064575195, "learning_rate": 6.617538802863328e-06, "loss": 0.4339, "step": 9955 }, { "epoch": 0.41332855904804905, "grad_norm": 2.482027053833008, "learning_rate": 6.616902626811934e-06, "loss": 0.4915, "step": 9956 }, { "epoch": 0.4133700745722604, "grad_norm": 2.7373461723327637, "learning_rate": 6.616266421526477e-06, "loss": 0.5125, "step": 9957 }, { "epoch": 0.4134115900964717, "grad_norm": 2.384796380996704, "learning_rate": 6.615630187018456e-06, "loss": 0.4892, "step": 9958 }, { "epoch": 0.41345310562068305, "grad_norm": 2.1655685901641846, "learning_rate": 6.614993923299376e-06, "loss": 0.5099, "step": 9959 }, { "epoch": 0.4134946211448944, "grad_norm": 2.6623167991638184, "learning_rate": 6.61435763038074e-06, "loss": 0.5248, "step": 9960 }, { "epoch": 0.4135361366691057, "grad_norm": 2.3982455730438232, "learning_rate": 6.613721308274053e-06, "loss": 0.5654, "step": 9961 }, { "epoch": 0.41357765219331705, "grad_norm": 2.8059005737304688, "learning_rate": 6.6130849569908205e-06, "loss": 0.6077, "step": 9962 }, { "epoch": 0.4136191677175284, "grad_norm": 2.6431517601013184, "learning_rate": 6.612448576542545e-06, "loss": 0.6256, "step": 9963 }, { "epoch": 0.4136606832417397, "grad_norm": 2.6023495197296143, "learning_rate": 6.611812166940736e-06, "loss": 0.5042, "step": 9964 }, { "epoch": 0.41370219876595105, "grad_norm": 2.467400312423706, "learning_rate": 6.611175728196898e-06, "loss": 0.5445, "step": 9965 }, { "epoch": 0.4137437142901624, "grad_norm": 2.9009106159210205, "learning_rate": 6.6105392603225385e-06, "loss": 0.4563, "step": 9966 }, { "epoch": 0.4137852298143737, "grad_norm": 2.2911980152130127, "learning_rate": 6.609902763329165e-06, "loss": 0.4346, "step": 9967 }, { "epoch": 0.41382674533858504, "grad_norm": 2.375298261642456, "learning_rate": 6.609266237228285e-06, "loss": 0.5004, "step": 9968 }, { "epoch": 0.4138682608627964, "grad_norm": 2.050201416015625, "learning_rate": 6.608629682031408e-06, "loss": 0.4944, "step": 9969 }, { "epoch": 0.4139097763870077, "grad_norm": 2.385054588317871, "learning_rate": 6.6079930977500425e-06, "loss": 0.5509, "step": 9970 }, { "epoch": 0.41395129191121904, "grad_norm": 2.548362970352173, "learning_rate": 6.607356484395698e-06, "loss": 0.5011, "step": 9971 }, { "epoch": 0.4139928074354304, "grad_norm": 2.2784087657928467, "learning_rate": 6.606719841979886e-06, "loss": 0.4387, "step": 9972 }, { "epoch": 0.4140343229596417, "grad_norm": 2.6567957401275635, "learning_rate": 6.606083170514115e-06, "loss": 0.6159, "step": 9973 }, { "epoch": 0.41407583848385304, "grad_norm": 2.807194232940674, "learning_rate": 6.6054464700099e-06, "loss": 0.5751, "step": 9974 }, { "epoch": 0.4141173540080644, "grad_norm": 2.9388105869293213, "learning_rate": 6.604809740478748e-06, "loss": 0.6376, "step": 9975 }, { "epoch": 0.4141588695322757, "grad_norm": 2.3340492248535156, "learning_rate": 6.604172981932172e-06, "loss": 0.4491, "step": 9976 }, { "epoch": 0.41420038505648704, "grad_norm": 2.7006123065948486, "learning_rate": 6.603536194381688e-06, "loss": 0.4685, "step": 9977 }, { "epoch": 0.4142419005806984, "grad_norm": 2.51581072807312, "learning_rate": 6.602899377838807e-06, "loss": 0.5769, "step": 9978 }, { "epoch": 0.4142834161049097, "grad_norm": 2.1700592041015625, "learning_rate": 6.602262532315043e-06, "loss": 0.4852, "step": 9979 }, { "epoch": 0.41432493162912104, "grad_norm": 2.29692006111145, "learning_rate": 6.601625657821911e-06, "loss": 0.5531, "step": 9980 }, { "epoch": 0.4143664471533324, "grad_norm": 2.0538532733917236, "learning_rate": 6.600988754370925e-06, "loss": 0.5351, "step": 9981 }, { "epoch": 0.4144079626775437, "grad_norm": 2.719773054122925, "learning_rate": 6.600351821973601e-06, "loss": 0.3932, "step": 9982 }, { "epoch": 0.4144494782017551, "grad_norm": 2.757153272628784, "learning_rate": 6.599714860641455e-06, "loss": 0.614, "step": 9983 }, { "epoch": 0.41449099372596643, "grad_norm": 2.359562635421753, "learning_rate": 6.5990778703860035e-06, "loss": 0.5419, "step": 9984 }, { "epoch": 0.41453250925017776, "grad_norm": 3.3495635986328125, "learning_rate": 6.598440851218763e-06, "loss": 0.5016, "step": 9985 }, { "epoch": 0.4145740247743891, "grad_norm": 2.8900644779205322, "learning_rate": 6.5978038031512505e-06, "loss": 0.5693, "step": 9986 }, { "epoch": 0.4146155402986004, "grad_norm": 2.556194305419922, "learning_rate": 6.597166726194986e-06, "loss": 0.595, "step": 9987 }, { "epoch": 0.41465705582281176, "grad_norm": 2.24800443649292, "learning_rate": 6.596529620361486e-06, "loss": 0.5319, "step": 9988 }, { "epoch": 0.4146985713470231, "grad_norm": 2.4774038791656494, "learning_rate": 6.59589248566227e-06, "loss": 0.5215, "step": 9989 }, { "epoch": 0.4147400868712344, "grad_norm": 2.093317985534668, "learning_rate": 6.595255322108858e-06, "loss": 0.5447, "step": 9990 }, { "epoch": 0.41478160239544576, "grad_norm": 2.8199520111083984, "learning_rate": 6.59461812971277e-06, "loss": 0.4909, "step": 9991 }, { "epoch": 0.4148231179196571, "grad_norm": 2.121452808380127, "learning_rate": 6.5939809084855266e-06, "loss": 0.5612, "step": 9992 }, { "epoch": 0.4148646334438684, "grad_norm": 2.494293451309204, "learning_rate": 6.593343658438649e-06, "loss": 0.6032, "step": 9993 }, { "epoch": 0.41490614896807976, "grad_norm": 2.479785919189453, "learning_rate": 6.59270637958366e-06, "loss": 0.4197, "step": 9994 }, { "epoch": 0.4149476644922911, "grad_norm": 2.6665878295898438, "learning_rate": 6.592069071932078e-06, "loss": 0.6077, "step": 9995 }, { "epoch": 0.4149891800165024, "grad_norm": 2.273587703704834, "learning_rate": 6.59143173549543e-06, "loss": 0.4317, "step": 9996 }, { "epoch": 0.41503069554071376, "grad_norm": 2.2891714572906494, "learning_rate": 6.590794370285238e-06, "loss": 0.4809, "step": 9997 }, { "epoch": 0.4150722110649251, "grad_norm": 2.046599864959717, "learning_rate": 6.590156976313024e-06, "loss": 0.4154, "step": 9998 }, { "epoch": 0.4151137265891364, "grad_norm": 2.237975835800171, "learning_rate": 6.589519553590314e-06, "loss": 0.5333, "step": 9999 }, { "epoch": 0.41515524211334776, "grad_norm": 2.3896865844726562, "learning_rate": 6.588882102128633e-06, "loss": 0.5453, "step": 10000 }, { "epoch": 0.4151967576375591, "grad_norm": 2.422917604446411, "learning_rate": 6.588244621939506e-06, "loss": 0.4406, "step": 10001 }, { "epoch": 0.4152382731617704, "grad_norm": 1.8898905515670776, "learning_rate": 6.587607113034456e-06, "loss": 0.4652, "step": 10002 }, { "epoch": 0.41527978868598175, "grad_norm": 3.110975503921509, "learning_rate": 6.586969575425014e-06, "loss": 0.705, "step": 10003 }, { "epoch": 0.4153213042101931, "grad_norm": 3.1764488220214844, "learning_rate": 6.586332009122703e-06, "loss": 0.5967, "step": 10004 }, { "epoch": 0.4153628197344044, "grad_norm": 2.154155969619751, "learning_rate": 6.585694414139053e-06, "loss": 0.508, "step": 10005 }, { "epoch": 0.41540433525861575, "grad_norm": 2.3849987983703613, "learning_rate": 6.585056790485591e-06, "loss": 0.4981, "step": 10006 }, { "epoch": 0.4154458507828271, "grad_norm": 2.0174782276153564, "learning_rate": 6.584419138173846e-06, "loss": 0.4092, "step": 10007 }, { "epoch": 0.4154873663070384, "grad_norm": 2.6455392837524414, "learning_rate": 6.583781457215345e-06, "loss": 0.5325, "step": 10008 }, { "epoch": 0.41552888183124975, "grad_norm": 2.491668939590454, "learning_rate": 6.58314374762162e-06, "loss": 0.4266, "step": 10009 }, { "epoch": 0.4155703973554611, "grad_norm": 2.3550617694854736, "learning_rate": 6.582506009404198e-06, "loss": 0.3967, "step": 10010 }, { "epoch": 0.4156119128796724, "grad_norm": 2.744938611984253, "learning_rate": 6.581868242574613e-06, "loss": 0.6002, "step": 10011 }, { "epoch": 0.41565342840388375, "grad_norm": 2.2153303623199463, "learning_rate": 6.5812304471443935e-06, "loss": 0.5721, "step": 10012 }, { "epoch": 0.41569494392809514, "grad_norm": 2.431649684906006, "learning_rate": 6.580592623125071e-06, "loss": 0.4987, "step": 10013 }, { "epoch": 0.4157364594523065, "grad_norm": 2.3313090801239014, "learning_rate": 6.579954770528181e-06, "loss": 0.5296, "step": 10014 }, { "epoch": 0.4157779749765178, "grad_norm": 3.2860095500946045, "learning_rate": 6.57931688936525e-06, "loss": 0.5103, "step": 10015 }, { "epoch": 0.41581949050072914, "grad_norm": 2.2539401054382324, "learning_rate": 6.578678979647817e-06, "loss": 0.4193, "step": 10016 }, { "epoch": 0.41586100602494047, "grad_norm": 2.632312297821045, "learning_rate": 6.5780410413874105e-06, "loss": 0.592, "step": 10017 }, { "epoch": 0.4159025215491518, "grad_norm": 2.521963596343994, "learning_rate": 6.57740307459557e-06, "loss": 0.4809, "step": 10018 }, { "epoch": 0.41594403707336314, "grad_norm": 2.274022340774536, "learning_rate": 6.576765079283824e-06, "loss": 0.4859, "step": 10019 }, { "epoch": 0.41598555259757447, "grad_norm": 2.111328125, "learning_rate": 6.576127055463713e-06, "loss": 0.4963, "step": 10020 }, { "epoch": 0.4160270681217858, "grad_norm": 2.1283674240112305, "learning_rate": 6.575489003146771e-06, "loss": 0.4754, "step": 10021 }, { "epoch": 0.41606858364599714, "grad_norm": 2.1025309562683105, "learning_rate": 6.574850922344533e-06, "loss": 0.4929, "step": 10022 }, { "epoch": 0.41611009917020847, "grad_norm": 2.2673544883728027, "learning_rate": 6.5742128130685354e-06, "loss": 0.4435, "step": 10023 }, { "epoch": 0.4161516146944198, "grad_norm": 2.8605613708496094, "learning_rate": 6.573574675330319e-06, "loss": 0.5554, "step": 10024 }, { "epoch": 0.41619313021863114, "grad_norm": 2.2102487087249756, "learning_rate": 6.572936509141415e-06, "loss": 0.4847, "step": 10025 }, { "epoch": 0.41623464574284247, "grad_norm": 2.688857078552246, "learning_rate": 6.572298314513369e-06, "loss": 0.512, "step": 10026 }, { "epoch": 0.4162761612670538, "grad_norm": 2.4568042755126953, "learning_rate": 6.571660091457716e-06, "loss": 0.6554, "step": 10027 }, { "epoch": 0.41631767679126513, "grad_norm": 2.649050712585449, "learning_rate": 6.571021839985993e-06, "loss": 0.5091, "step": 10028 }, { "epoch": 0.41635919231547647, "grad_norm": 2.94791841506958, "learning_rate": 6.570383560109745e-06, "loss": 0.4568, "step": 10029 }, { "epoch": 0.4164007078396878, "grad_norm": 1.9166370630264282, "learning_rate": 6.569745251840508e-06, "loss": 0.5476, "step": 10030 }, { "epoch": 0.41644222336389913, "grad_norm": 2.61446475982666, "learning_rate": 6.569106915189825e-06, "loss": 0.6031, "step": 10031 }, { "epoch": 0.41648373888811047, "grad_norm": 2.8569366931915283, "learning_rate": 6.568468550169237e-06, "loss": 0.4142, "step": 10032 }, { "epoch": 0.4165252544123218, "grad_norm": 2.2317564487457275, "learning_rate": 6.567830156790286e-06, "loss": 0.4441, "step": 10033 }, { "epoch": 0.41656676993653313, "grad_norm": 1.5576797723770142, "learning_rate": 6.567191735064512e-06, "loss": 0.4451, "step": 10034 }, { "epoch": 0.41660828546074447, "grad_norm": 3.932570457458496, "learning_rate": 6.566553285003461e-06, "loss": 0.392, "step": 10035 }, { "epoch": 0.4166498009849558, "grad_norm": 2.265618324279785, "learning_rate": 6.565914806618674e-06, "loss": 0.557, "step": 10036 }, { "epoch": 0.41669131650916713, "grad_norm": 1.9558937549591064, "learning_rate": 6.565276299921698e-06, "loss": 0.5259, "step": 10037 }, { "epoch": 0.41673283203337846, "grad_norm": 3.002659320831299, "learning_rate": 6.564637764924073e-06, "loss": 0.5535, "step": 10038 }, { "epoch": 0.4167743475575898, "grad_norm": 2.5813705921173096, "learning_rate": 6.5639992016373475e-06, "loss": 0.47, "step": 10039 }, { "epoch": 0.41681586308180113, "grad_norm": 2.263091802597046, "learning_rate": 6.5633606100730665e-06, "loss": 0.5444, "step": 10040 }, { "epoch": 0.41685737860601246, "grad_norm": 2.069565534591675, "learning_rate": 6.562721990242774e-06, "loss": 0.4976, "step": 10041 }, { "epoch": 0.4168988941302238, "grad_norm": 2.8050127029418945, "learning_rate": 6.5620833421580176e-06, "loss": 0.5634, "step": 10042 }, { "epoch": 0.41694040965443513, "grad_norm": 2.921548366546631, "learning_rate": 6.561444665830344e-06, "loss": 0.6869, "step": 10043 }, { "epoch": 0.4169819251786465, "grad_norm": 2.6624538898468018, "learning_rate": 6.560805961271301e-06, "loss": 0.5325, "step": 10044 }, { "epoch": 0.41702344070285785, "grad_norm": 2.447355270385742, "learning_rate": 6.560167228492436e-06, "loss": 0.5979, "step": 10045 }, { "epoch": 0.4170649562270692, "grad_norm": 2.6044797897338867, "learning_rate": 6.559528467505298e-06, "loss": 0.4035, "step": 10046 }, { "epoch": 0.4171064717512805, "grad_norm": 1.8924829959869385, "learning_rate": 6.558889678321436e-06, "loss": 0.3545, "step": 10047 }, { "epoch": 0.41714798727549185, "grad_norm": 3.411034345626831, "learning_rate": 6.5582508609524e-06, "loss": 0.6862, "step": 10048 }, { "epoch": 0.4171895027997032, "grad_norm": 2.213844060897827, "learning_rate": 6.5576120154097375e-06, "loss": 0.4675, "step": 10049 }, { "epoch": 0.4172310183239145, "grad_norm": 2.3474204540252686, "learning_rate": 6.556973141705002e-06, "loss": 0.5638, "step": 10050 }, { "epoch": 0.41727253384812585, "grad_norm": 2.519045114517212, "learning_rate": 6.556334239849743e-06, "loss": 0.5183, "step": 10051 }, { "epoch": 0.4173140493723372, "grad_norm": 2.309800863265991, "learning_rate": 6.555695309855512e-06, "loss": 0.5633, "step": 10052 }, { "epoch": 0.4173555648965485, "grad_norm": 2.1431033611297607, "learning_rate": 6.5550563517338624e-06, "loss": 0.5702, "step": 10053 }, { "epoch": 0.41739708042075985, "grad_norm": 2.6207284927368164, "learning_rate": 6.554417365496345e-06, "loss": 0.4879, "step": 10054 }, { "epoch": 0.4174385959449712, "grad_norm": 2.395500421524048, "learning_rate": 6.553778351154515e-06, "loss": 0.5681, "step": 10055 }, { "epoch": 0.4174801114691825, "grad_norm": 2.702941417694092, "learning_rate": 6.553139308719923e-06, "loss": 0.555, "step": 10056 }, { "epoch": 0.41752162699339385, "grad_norm": 2.6083242893218994, "learning_rate": 6.5525002382041245e-06, "loss": 0.4064, "step": 10057 }, { "epoch": 0.4175631425176052, "grad_norm": 3.1926770210266113, "learning_rate": 6.551861139618675e-06, "loss": 0.5465, "step": 10058 }, { "epoch": 0.4176046580418165, "grad_norm": 2.06009840965271, "learning_rate": 6.551222012975129e-06, "loss": 0.5717, "step": 10059 }, { "epoch": 0.41764617356602785, "grad_norm": 2.3553144931793213, "learning_rate": 6.550582858285041e-06, "loss": 0.481, "step": 10060 }, { "epoch": 0.4176876890902392, "grad_norm": 1.8945083618164062, "learning_rate": 6.549943675559969e-06, "loss": 0.3955, "step": 10061 }, { "epoch": 0.4177292046144505, "grad_norm": 2.68935227394104, "learning_rate": 6.549304464811467e-06, "loss": 0.5484, "step": 10062 }, { "epoch": 0.41777072013866184, "grad_norm": 2.8481059074401855, "learning_rate": 6.548665226051095e-06, "loss": 0.594, "step": 10063 }, { "epoch": 0.4178122356628732, "grad_norm": 2.2843644618988037, "learning_rate": 6.548025959290408e-06, "loss": 0.5396, "step": 10064 }, { "epoch": 0.4178537511870845, "grad_norm": 2.38972544670105, "learning_rate": 6.547386664540968e-06, "loss": 0.5447, "step": 10065 }, { "epoch": 0.41789526671129584, "grad_norm": 2.3444807529449463, "learning_rate": 6.54674734181433e-06, "loss": 0.4326, "step": 10066 }, { "epoch": 0.4179367822355072, "grad_norm": 2.5285537242889404, "learning_rate": 6.546107991122053e-06, "loss": 0.3883, "step": 10067 }, { "epoch": 0.4179782977597185, "grad_norm": 2.444483757019043, "learning_rate": 6.545468612475699e-06, "loss": 0.4752, "step": 10068 }, { "epoch": 0.41801981328392984, "grad_norm": 1.9876325130462646, "learning_rate": 6.544829205886827e-06, "loss": 0.5571, "step": 10069 }, { "epoch": 0.4180613288081412, "grad_norm": 3.188669443130493, "learning_rate": 6.544189771366998e-06, "loss": 0.4762, "step": 10070 }, { "epoch": 0.4181028443323525, "grad_norm": 2.4877395629882812, "learning_rate": 6.5435503089277705e-06, "loss": 0.5979, "step": 10071 }, { "epoch": 0.41814435985656384, "grad_norm": 2.0709357261657715, "learning_rate": 6.542910818580712e-06, "loss": 0.5874, "step": 10072 }, { "epoch": 0.4181858753807752, "grad_norm": 2.8019065856933594, "learning_rate": 6.542271300337378e-06, "loss": 0.5343, "step": 10073 }, { "epoch": 0.4182273909049865, "grad_norm": 2.2636501789093018, "learning_rate": 6.5416317542093365e-06, "loss": 0.4958, "step": 10074 }, { "epoch": 0.4182689064291979, "grad_norm": 2.5673351287841797, "learning_rate": 6.540992180208148e-06, "loss": 0.6285, "step": 10075 }, { "epoch": 0.41831042195340923, "grad_norm": 2.6608803272247314, "learning_rate": 6.540352578345377e-06, "loss": 0.5066, "step": 10076 }, { "epoch": 0.41835193747762056, "grad_norm": 2.747480869293213, "learning_rate": 6.5397129486325864e-06, "loss": 0.4949, "step": 10077 }, { "epoch": 0.4183934530018319, "grad_norm": 2.432058811187744, "learning_rate": 6.5390732910813435e-06, "loss": 0.4705, "step": 10078 }, { "epoch": 0.4184349685260432, "grad_norm": 1.9884967803955078, "learning_rate": 6.538433605703212e-06, "loss": 0.3115, "step": 10079 }, { "epoch": 0.41847648405025456, "grad_norm": 2.3828365802764893, "learning_rate": 6.537793892509755e-06, "loss": 0.5284, "step": 10080 }, { "epoch": 0.4185179995744659, "grad_norm": 1.997473120689392, "learning_rate": 6.537154151512544e-06, "loss": 0.578, "step": 10081 }, { "epoch": 0.4185595150986772, "grad_norm": 2.209496021270752, "learning_rate": 6.536514382723141e-06, "loss": 0.4569, "step": 10082 }, { "epoch": 0.41860103062288856, "grad_norm": 2.870319366455078, "learning_rate": 6.535874586153115e-06, "loss": 0.4581, "step": 10083 }, { "epoch": 0.4186425461470999, "grad_norm": 2.293367862701416, "learning_rate": 6.535234761814033e-06, "loss": 0.513, "step": 10084 }, { "epoch": 0.4186840616713112, "grad_norm": 2.284755229949951, "learning_rate": 6.534594909717467e-06, "loss": 0.5518, "step": 10085 }, { "epoch": 0.41872557719552256, "grad_norm": 2.315194845199585, "learning_rate": 6.533955029874981e-06, "loss": 0.6047, "step": 10086 }, { "epoch": 0.4187670927197339, "grad_norm": 3.0282087326049805, "learning_rate": 6.5333151222981465e-06, "loss": 0.4846, "step": 10087 }, { "epoch": 0.4188086082439452, "grad_norm": 2.420266628265381, "learning_rate": 6.532675186998531e-06, "loss": 0.5955, "step": 10088 }, { "epoch": 0.41885012376815656, "grad_norm": 2.6556813716888428, "learning_rate": 6.532035223987708e-06, "loss": 0.4916, "step": 10089 }, { "epoch": 0.4188916392923679, "grad_norm": 2.168184757232666, "learning_rate": 6.5313952332772455e-06, "loss": 0.5053, "step": 10090 }, { "epoch": 0.4189331548165792, "grad_norm": 2.4563684463500977, "learning_rate": 6.530755214878717e-06, "loss": 0.4908, "step": 10091 }, { "epoch": 0.41897467034079056, "grad_norm": 2.9896907806396484, "learning_rate": 6.530115168803692e-06, "loss": 0.5406, "step": 10092 }, { "epoch": 0.4190161858650019, "grad_norm": 2.743705987930298, "learning_rate": 6.529475095063744e-06, "loss": 0.387, "step": 10093 }, { "epoch": 0.4190577013892132, "grad_norm": 2.9309606552124023, "learning_rate": 6.528834993670446e-06, "loss": 0.3951, "step": 10094 }, { "epoch": 0.41909921691342455, "grad_norm": 2.5344796180725098, "learning_rate": 6.52819486463537e-06, "loss": 0.4403, "step": 10095 }, { "epoch": 0.4191407324376359, "grad_norm": 2.8969550132751465, "learning_rate": 6.527554707970091e-06, "loss": 0.4931, "step": 10096 }, { "epoch": 0.4191822479618472, "grad_norm": 2.6013388633728027, "learning_rate": 6.526914523686182e-06, "loss": 0.6003, "step": 10097 }, { "epoch": 0.41922376348605855, "grad_norm": 2.3983216285705566, "learning_rate": 6.5262743117952184e-06, "loss": 0.439, "step": 10098 }, { "epoch": 0.4192652790102699, "grad_norm": 2.6501526832580566, "learning_rate": 6.5256340723087755e-06, "loss": 0.5942, "step": 10099 }, { "epoch": 0.4193067945344812, "grad_norm": 2.250007152557373, "learning_rate": 6.524993805238428e-06, "loss": 0.5246, "step": 10100 }, { "epoch": 0.41934831005869255, "grad_norm": 2.384902000427246, "learning_rate": 6.524353510595754e-06, "loss": 0.5139, "step": 10101 }, { "epoch": 0.4193898255829039, "grad_norm": 2.5682120323181152, "learning_rate": 6.523713188392329e-06, "loss": 0.4556, "step": 10102 }, { "epoch": 0.4194313411071152, "grad_norm": 2.123311758041382, "learning_rate": 6.52307283863973e-06, "loss": 0.4364, "step": 10103 }, { "epoch": 0.41947285663132655, "grad_norm": 2.6806702613830566, "learning_rate": 6.522432461349536e-06, "loss": 0.4832, "step": 10104 }, { "epoch": 0.4195143721555379, "grad_norm": 3.0349302291870117, "learning_rate": 6.521792056533324e-06, "loss": 0.7074, "step": 10105 }, { "epoch": 0.4195558876797493, "grad_norm": 2.24906063079834, "learning_rate": 6.521151624202673e-06, "loss": 0.4436, "step": 10106 }, { "epoch": 0.4195974032039606, "grad_norm": 2.2226169109344482, "learning_rate": 6.520511164369161e-06, "loss": 0.4533, "step": 10107 }, { "epoch": 0.41963891872817194, "grad_norm": 2.407829761505127, "learning_rate": 6.519870677044369e-06, "loss": 0.519, "step": 10108 }, { "epoch": 0.41968043425238327, "grad_norm": 2.142367124557495, "learning_rate": 6.519230162239878e-06, "loss": 0.438, "step": 10109 }, { "epoch": 0.4197219497765946, "grad_norm": 2.1131467819213867, "learning_rate": 6.518589619967267e-06, "loss": 0.4842, "step": 10110 }, { "epoch": 0.41976346530080594, "grad_norm": 2.5435163974761963, "learning_rate": 6.517949050238119e-06, "loss": 0.5576, "step": 10111 }, { "epoch": 0.41980498082501727, "grad_norm": 3.1071994304656982, "learning_rate": 6.517308453064014e-06, "loss": 0.6762, "step": 10112 }, { "epoch": 0.4198464963492286, "grad_norm": 2.5298821926116943, "learning_rate": 6.516667828456535e-06, "loss": 0.5381, "step": 10113 }, { "epoch": 0.41988801187343994, "grad_norm": 2.41892671585083, "learning_rate": 6.516027176427264e-06, "loss": 0.5589, "step": 10114 }, { "epoch": 0.41992952739765127, "grad_norm": 2.2874739170074463, "learning_rate": 6.515386496987786e-06, "loss": 0.4931, "step": 10115 }, { "epoch": 0.4199710429218626, "grad_norm": 2.393880605697632, "learning_rate": 6.514745790149683e-06, "loss": 0.6605, "step": 10116 }, { "epoch": 0.42001255844607394, "grad_norm": 2.985208511352539, "learning_rate": 6.514105055924539e-06, "loss": 0.5489, "step": 10117 }, { "epoch": 0.42005407397028527, "grad_norm": 2.3059182167053223, "learning_rate": 6.513464294323939e-06, "loss": 0.3379, "step": 10118 }, { "epoch": 0.4200955894944966, "grad_norm": 2.1034598350524902, "learning_rate": 6.512823505359469e-06, "loss": 0.449, "step": 10119 }, { "epoch": 0.42013710501870793, "grad_norm": 2.429293155670166, "learning_rate": 6.512182689042713e-06, "loss": 0.5059, "step": 10120 }, { "epoch": 0.42017862054291927, "grad_norm": 2.989030122756958, "learning_rate": 6.511541845385259e-06, "loss": 0.6444, "step": 10121 }, { "epoch": 0.4202201360671306, "grad_norm": 2.5146353244781494, "learning_rate": 6.510900974398693e-06, "loss": 0.5411, "step": 10122 }, { "epoch": 0.42026165159134193, "grad_norm": 2.403231620788574, "learning_rate": 6.5102600760946e-06, "loss": 0.5331, "step": 10123 }, { "epoch": 0.42030316711555327, "grad_norm": 2.473905086517334, "learning_rate": 6.509619150484572e-06, "loss": 0.5499, "step": 10124 }, { "epoch": 0.4203446826397646, "grad_norm": 2.1347451210021973, "learning_rate": 6.5089781975801924e-06, "loss": 0.4747, "step": 10125 }, { "epoch": 0.42038619816397593, "grad_norm": 2.661726474761963, "learning_rate": 6.508337217393054e-06, "loss": 0.7587, "step": 10126 }, { "epoch": 0.42042771368818727, "grad_norm": 2.4480414390563965, "learning_rate": 6.507696209934742e-06, "loss": 0.4836, "step": 10127 }, { "epoch": 0.4204692292123986, "grad_norm": 2.4310452938079834, "learning_rate": 6.50705517521685e-06, "loss": 0.5251, "step": 10128 }, { "epoch": 0.42051074473660993, "grad_norm": 2.376692771911621, "learning_rate": 6.506414113250965e-06, "loss": 0.4297, "step": 10129 }, { "epoch": 0.42055226026082126, "grad_norm": 2.8626275062561035, "learning_rate": 6.505773024048678e-06, "loss": 0.6735, "step": 10130 }, { "epoch": 0.4205937757850326, "grad_norm": 2.4801976680755615, "learning_rate": 6.505131907621582e-06, "loss": 0.5747, "step": 10131 }, { "epoch": 0.42063529130924393, "grad_norm": 2.8020262718200684, "learning_rate": 6.504490763981267e-06, "loss": 0.5102, "step": 10132 }, { "epoch": 0.42067680683345526, "grad_norm": 2.3152668476104736, "learning_rate": 6.503849593139325e-06, "loss": 0.4083, "step": 10133 }, { "epoch": 0.4207183223576666, "grad_norm": 2.531785011291504, "learning_rate": 6.503208395107348e-06, "loss": 0.5356, "step": 10134 }, { "epoch": 0.42075983788187793, "grad_norm": 3.023463726043701, "learning_rate": 6.502567169896932e-06, "loss": 0.6026, "step": 10135 }, { "epoch": 0.42080135340608926, "grad_norm": 2.3764936923980713, "learning_rate": 6.501925917519667e-06, "loss": 0.6646, "step": 10136 }, { "epoch": 0.42084286893030065, "grad_norm": 2.279909372329712, "learning_rate": 6.501284637987148e-06, "loss": 0.5462, "step": 10137 }, { "epoch": 0.420884384454512, "grad_norm": 2.181678533554077, "learning_rate": 6.500643331310972e-06, "loss": 0.4295, "step": 10138 }, { "epoch": 0.4209258999787233, "grad_norm": 3.0285823345184326, "learning_rate": 6.500001997502731e-06, "loss": 0.6946, "step": 10139 }, { "epoch": 0.42096741550293465, "grad_norm": 2.8416690826416016, "learning_rate": 6.4993606365740214e-06, "loss": 0.6008, "step": 10140 }, { "epoch": 0.421008931027146, "grad_norm": 1.9853019714355469, "learning_rate": 6.498719248536439e-06, "loss": 0.3676, "step": 10141 }, { "epoch": 0.4210504465513573, "grad_norm": 2.243126630783081, "learning_rate": 6.498077833401582e-06, "loss": 0.6349, "step": 10142 }, { "epoch": 0.42109196207556865, "grad_norm": 2.7243306636810303, "learning_rate": 6.497436391181044e-06, "loss": 0.5865, "step": 10143 }, { "epoch": 0.42113347759978, "grad_norm": 2.4016568660736084, "learning_rate": 6.496794921886427e-06, "loss": 0.6588, "step": 10144 }, { "epoch": 0.4211749931239913, "grad_norm": 2.909834384918213, "learning_rate": 6.496153425529325e-06, "loss": 0.5439, "step": 10145 }, { "epoch": 0.42121650864820265, "grad_norm": 2.3179595470428467, "learning_rate": 6.495511902121337e-06, "loss": 0.5384, "step": 10146 }, { "epoch": 0.421258024172414, "grad_norm": 2.386357307434082, "learning_rate": 6.494870351674063e-06, "loss": 0.4253, "step": 10147 }, { "epoch": 0.4212995396966253, "grad_norm": 2.1728100776672363, "learning_rate": 6.494228774199103e-06, "loss": 0.6108, "step": 10148 }, { "epoch": 0.42134105522083665, "grad_norm": 2.7407846450805664, "learning_rate": 6.4935871697080555e-06, "loss": 0.4898, "step": 10149 }, { "epoch": 0.421382570745048, "grad_norm": 2.3000192642211914, "learning_rate": 6.492945538212521e-06, "loss": 0.525, "step": 10150 }, { "epoch": 0.4214240862692593, "grad_norm": 2.247243881225586, "learning_rate": 6.492303879724102e-06, "loss": 0.4559, "step": 10151 }, { "epoch": 0.42146560179347065, "grad_norm": 2.6108651161193848, "learning_rate": 6.491662194254398e-06, "loss": 0.5374, "step": 10152 }, { "epoch": 0.421507117317682, "grad_norm": 2.52237868309021, "learning_rate": 6.491020481815011e-06, "loss": 0.5683, "step": 10153 }, { "epoch": 0.4215486328418933, "grad_norm": 2.26971697807312, "learning_rate": 6.490378742417546e-06, "loss": 0.6007, "step": 10154 }, { "epoch": 0.42159014836610464, "grad_norm": 2.3263325691223145, "learning_rate": 6.489736976073603e-06, "loss": 0.5089, "step": 10155 }, { "epoch": 0.421631663890316, "grad_norm": 2.448272943496704, "learning_rate": 6.4890951827947845e-06, "loss": 0.604, "step": 10156 }, { "epoch": 0.4216731794145273, "grad_norm": 2.4293181896209717, "learning_rate": 6.488453362592698e-06, "loss": 0.4695, "step": 10157 }, { "epoch": 0.42171469493873864, "grad_norm": 2.3159515857696533, "learning_rate": 6.487811515478946e-06, "loss": 0.5426, "step": 10158 }, { "epoch": 0.42175621046295, "grad_norm": 2.1949410438537598, "learning_rate": 6.48716964146513e-06, "loss": 0.4638, "step": 10159 }, { "epoch": 0.4217977259871613, "grad_norm": 2.799215793609619, "learning_rate": 6.48652774056286e-06, "loss": 0.5773, "step": 10160 }, { "epoch": 0.42183924151137264, "grad_norm": 2.2342355251312256, "learning_rate": 6.48588581278374e-06, "loss": 0.5037, "step": 10161 }, { "epoch": 0.421880757035584, "grad_norm": 2.9031405448913574, "learning_rate": 6.4852438581393766e-06, "loss": 0.6559, "step": 10162 }, { "epoch": 0.4219222725597953, "grad_norm": 2.4200241565704346, "learning_rate": 6.484601876641375e-06, "loss": 0.4569, "step": 10163 }, { "epoch": 0.42196378808400664, "grad_norm": 2.588754177093506, "learning_rate": 6.483959868301344e-06, "loss": 0.4082, "step": 10164 }, { "epoch": 0.422005303608218, "grad_norm": 2.74763822555542, "learning_rate": 6.483317833130891e-06, "loss": 0.4759, "step": 10165 }, { "epoch": 0.4220468191324293, "grad_norm": 2.162703514099121, "learning_rate": 6.482675771141624e-06, "loss": 0.5448, "step": 10166 }, { "epoch": 0.42208833465664064, "grad_norm": 2.235154867172241, "learning_rate": 6.482033682345152e-06, "loss": 0.5, "step": 10167 }, { "epoch": 0.42212985018085203, "grad_norm": 2.4597768783569336, "learning_rate": 6.481391566753085e-06, "loss": 0.5505, "step": 10168 }, { "epoch": 0.42217136570506336, "grad_norm": 2.6883256435394287, "learning_rate": 6.48074942437703e-06, "loss": 0.6506, "step": 10169 }, { "epoch": 0.4222128812292747, "grad_norm": 2.716015577316284, "learning_rate": 6.480107255228598e-06, "loss": 0.5226, "step": 10170 }, { "epoch": 0.422254396753486, "grad_norm": 2.7577064037323, "learning_rate": 6.479465059319402e-06, "loss": 0.5229, "step": 10171 }, { "epoch": 0.42229591227769736, "grad_norm": 2.6656599044799805, "learning_rate": 6.47882283666105e-06, "loss": 0.5067, "step": 10172 }, { "epoch": 0.4223374278019087, "grad_norm": 2.2168562412261963, "learning_rate": 6.4781805872651536e-06, "loss": 0.4185, "step": 10173 }, { "epoch": 0.42237894332612, "grad_norm": 2.0668087005615234, "learning_rate": 6.477538311143329e-06, "loss": 0.4228, "step": 10174 }, { "epoch": 0.42242045885033136, "grad_norm": 3.987804889678955, "learning_rate": 6.476896008307183e-06, "loss": 0.6005, "step": 10175 }, { "epoch": 0.4224619743745427, "grad_norm": 2.2682814598083496, "learning_rate": 6.4762536787683335e-06, "loss": 0.4711, "step": 10176 }, { "epoch": 0.422503489898754, "grad_norm": 2.4928503036499023, "learning_rate": 6.4756113225383924e-06, "loss": 0.4968, "step": 10177 }, { "epoch": 0.42254500542296536, "grad_norm": 2.627526044845581, "learning_rate": 6.474968939628972e-06, "loss": 0.5241, "step": 10178 }, { "epoch": 0.4225865209471767, "grad_norm": 2.85002064704895, "learning_rate": 6.474326530051688e-06, "loss": 0.64, "step": 10179 }, { "epoch": 0.422628036471388, "grad_norm": 2.6875381469726562, "learning_rate": 6.473684093818157e-06, "loss": 0.5152, "step": 10180 }, { "epoch": 0.42266955199559936, "grad_norm": 2.131178379058838, "learning_rate": 6.4730416309399916e-06, "loss": 0.5358, "step": 10181 }, { "epoch": 0.4227110675198107, "grad_norm": 2.5168333053588867, "learning_rate": 6.472399141428809e-06, "loss": 0.5066, "step": 10182 }, { "epoch": 0.422752583044022, "grad_norm": 2.2000176906585693, "learning_rate": 6.471756625296225e-06, "loss": 0.5627, "step": 10183 }, { "epoch": 0.42279409856823336, "grad_norm": 2.359804630279541, "learning_rate": 6.4711140825538586e-06, "loss": 0.5652, "step": 10184 }, { "epoch": 0.4228356140924447, "grad_norm": 2.737689256668091, "learning_rate": 6.470471513213323e-06, "loss": 0.5311, "step": 10185 }, { "epoch": 0.422877129616656, "grad_norm": 2.1517486572265625, "learning_rate": 6.469828917286241e-06, "loss": 0.4133, "step": 10186 }, { "epoch": 0.42291864514086736, "grad_norm": 2.0820319652557373, "learning_rate": 6.4691862947842274e-06, "loss": 0.5021, "step": 10187 }, { "epoch": 0.4229601606650787, "grad_norm": 2.9504549503326416, "learning_rate": 6.468543645718901e-06, "loss": 0.5495, "step": 10188 }, { "epoch": 0.42300167618929, "grad_norm": 2.561410903930664, "learning_rate": 6.467900970101885e-06, "loss": 0.5464, "step": 10189 }, { "epoch": 0.42304319171350135, "grad_norm": 2.4079723358154297, "learning_rate": 6.4672582679447935e-06, "loss": 0.5278, "step": 10190 }, { "epoch": 0.4230847072377127, "grad_norm": 3.2237486839294434, "learning_rate": 6.466615539259252e-06, "loss": 0.3655, "step": 10191 }, { "epoch": 0.423126222761924, "grad_norm": 2.4881200790405273, "learning_rate": 6.465972784056877e-06, "loss": 0.4164, "step": 10192 }, { "epoch": 0.42316773828613535, "grad_norm": 2.886707067489624, "learning_rate": 6.465330002349293e-06, "loss": 0.5989, "step": 10193 }, { "epoch": 0.4232092538103467, "grad_norm": 2.152783155441284, "learning_rate": 6.464687194148121e-06, "loss": 0.4076, "step": 10194 }, { "epoch": 0.423250769334558, "grad_norm": 2.2918314933776855, "learning_rate": 6.46404435946498e-06, "loss": 0.5225, "step": 10195 }, { "epoch": 0.42329228485876935, "grad_norm": 2.0339739322662354, "learning_rate": 6.463401498311497e-06, "loss": 0.5235, "step": 10196 }, { "epoch": 0.4233338003829807, "grad_norm": 3.1025185585021973, "learning_rate": 6.4627586106992915e-06, "loss": 0.6249, "step": 10197 }, { "epoch": 0.423375315907192, "grad_norm": 2.218515396118164, "learning_rate": 6.462115696639989e-06, "loss": 0.3842, "step": 10198 }, { "epoch": 0.4234168314314034, "grad_norm": 2.179539442062378, "learning_rate": 6.461472756145214e-06, "loss": 0.4211, "step": 10199 }, { "epoch": 0.42345834695561474, "grad_norm": 2.449693441390991, "learning_rate": 6.460829789226589e-06, "loss": 0.5427, "step": 10200 }, { "epoch": 0.4234998624798261, "grad_norm": 2.477942705154419, "learning_rate": 6.460186795895741e-06, "loss": 0.5263, "step": 10201 }, { "epoch": 0.4235413780040374, "grad_norm": 2.5811374187469482, "learning_rate": 6.459543776164296e-06, "loss": 0.3868, "step": 10202 }, { "epoch": 0.42358289352824874, "grad_norm": 2.587423086166382, "learning_rate": 6.458900730043876e-06, "loss": 0.5408, "step": 10203 }, { "epoch": 0.42362440905246007, "grad_norm": 2.215371608734131, "learning_rate": 6.458257657546113e-06, "loss": 0.3992, "step": 10204 }, { "epoch": 0.4236659245766714, "grad_norm": 2.3413851261138916, "learning_rate": 6.45761455868263e-06, "loss": 0.6069, "step": 10205 }, { "epoch": 0.42370744010088274, "grad_norm": 2.295135259628296, "learning_rate": 6.456971433465057e-06, "loss": 0.5644, "step": 10206 }, { "epoch": 0.42374895562509407, "grad_norm": 2.2448298931121826, "learning_rate": 6.45632828190502e-06, "loss": 0.5248, "step": 10207 }, { "epoch": 0.4237904711493054, "grad_norm": 2.5019071102142334, "learning_rate": 6.455685104014149e-06, "loss": 0.5211, "step": 10208 }, { "epoch": 0.42383198667351674, "grad_norm": 2.2523913383483887, "learning_rate": 6.4550418998040686e-06, "loss": 0.5188, "step": 10209 }, { "epoch": 0.42387350219772807, "grad_norm": 2.4738988876342773, "learning_rate": 6.454398669286414e-06, "loss": 0.5037, "step": 10210 }, { "epoch": 0.4239150177219394, "grad_norm": 2.683394193649292, "learning_rate": 6.453755412472811e-06, "loss": 0.7008, "step": 10211 }, { "epoch": 0.42395653324615074, "grad_norm": 2.473721504211426, "learning_rate": 6.453112129374891e-06, "loss": 0.422, "step": 10212 }, { "epoch": 0.42399804877036207, "grad_norm": 2.276062488555908, "learning_rate": 6.452468820004285e-06, "loss": 0.4645, "step": 10213 }, { "epoch": 0.4240395642945734, "grad_norm": 2.4200313091278076, "learning_rate": 6.451825484372624e-06, "loss": 0.4406, "step": 10214 }, { "epoch": 0.42408107981878473, "grad_norm": 2.628789186477661, "learning_rate": 6.45118212249154e-06, "loss": 0.5312, "step": 10215 }, { "epoch": 0.42412259534299607, "grad_norm": 3.249878406524658, "learning_rate": 6.4505387343726644e-06, "loss": 0.6375, "step": 10216 }, { "epoch": 0.4241641108672074, "grad_norm": 2.509216070175171, "learning_rate": 6.449895320027632e-06, "loss": 0.6415, "step": 10217 }, { "epoch": 0.42420562639141873, "grad_norm": 2.841038227081299, "learning_rate": 6.449251879468073e-06, "loss": 0.5157, "step": 10218 }, { "epoch": 0.42424714191563007, "grad_norm": 2.5244967937469482, "learning_rate": 6.4486084127056225e-06, "loss": 0.6123, "step": 10219 }, { "epoch": 0.4242886574398414, "grad_norm": 2.443807601928711, "learning_rate": 6.447964919751915e-06, "loss": 0.3995, "step": 10220 }, { "epoch": 0.42433017296405273, "grad_norm": 2.322141408920288, "learning_rate": 6.4473214006185846e-06, "loss": 0.4598, "step": 10221 }, { "epoch": 0.42437168848826406, "grad_norm": 2.0240814685821533, "learning_rate": 6.446677855317265e-06, "loss": 0.3608, "step": 10222 }, { "epoch": 0.4244132040124754, "grad_norm": 2.260254144668579, "learning_rate": 6.446034283859593e-06, "loss": 0.4581, "step": 10223 }, { "epoch": 0.42445471953668673, "grad_norm": 2.6170690059661865, "learning_rate": 6.4453906862572046e-06, "loss": 0.4804, "step": 10224 }, { "epoch": 0.42449623506089806, "grad_norm": 2.6340060234069824, "learning_rate": 6.444747062521737e-06, "loss": 0.4874, "step": 10225 }, { "epoch": 0.4245377505851094, "grad_norm": 2.3992602825164795, "learning_rate": 6.444103412664824e-06, "loss": 0.6022, "step": 10226 }, { "epoch": 0.42457926610932073, "grad_norm": 2.494765043258667, "learning_rate": 6.443459736698106e-06, "loss": 0.515, "step": 10227 }, { "epoch": 0.42462078163353206, "grad_norm": 2.689303398132324, "learning_rate": 6.442816034633219e-06, "loss": 0.5668, "step": 10228 }, { "epoch": 0.42466229715774345, "grad_norm": 2.3744914531707764, "learning_rate": 6.442172306481803e-06, "loss": 0.5144, "step": 10229 }, { "epoch": 0.4247038126819548, "grad_norm": 2.597943067550659, "learning_rate": 6.441528552255497e-06, "loss": 0.6294, "step": 10230 }, { "epoch": 0.4247453282061661, "grad_norm": 2.2217226028442383, "learning_rate": 6.440884771965938e-06, "loss": 0.5131, "step": 10231 }, { "epoch": 0.42478684373037745, "grad_norm": 2.5725257396698, "learning_rate": 6.440240965624769e-06, "loss": 0.4973, "step": 10232 }, { "epoch": 0.4248283592545888, "grad_norm": 2.406898021697998, "learning_rate": 6.439597133243627e-06, "loss": 0.411, "step": 10233 }, { "epoch": 0.4248698747788001, "grad_norm": 2.25481915473938, "learning_rate": 6.438953274834153e-06, "loss": 0.4506, "step": 10234 }, { "epoch": 0.42491139030301145, "grad_norm": 2.186889410018921, "learning_rate": 6.43830939040799e-06, "loss": 0.4371, "step": 10235 }, { "epoch": 0.4249529058272228, "grad_norm": 2.69571590423584, "learning_rate": 6.437665479976778e-06, "loss": 0.4971, "step": 10236 }, { "epoch": 0.4249944213514341, "grad_norm": 2.77016544342041, "learning_rate": 6.43702154355216e-06, "loss": 0.5772, "step": 10237 }, { "epoch": 0.42503593687564545, "grad_norm": 2.4334633350372314, "learning_rate": 6.436377581145778e-06, "loss": 0.5508, "step": 10238 }, { "epoch": 0.4250774523998568, "grad_norm": 2.582547903060913, "learning_rate": 6.435733592769277e-06, "loss": 0.4078, "step": 10239 }, { "epoch": 0.4251189679240681, "grad_norm": 3.0979015827178955, "learning_rate": 6.435089578434296e-06, "loss": 0.4823, "step": 10240 }, { "epoch": 0.42516048344827945, "grad_norm": 2.6056129932403564, "learning_rate": 6.434445538152484e-06, "loss": 0.4466, "step": 10241 }, { "epoch": 0.4252019989724908, "grad_norm": 2.3976259231567383, "learning_rate": 6.433801471935481e-06, "loss": 0.5364, "step": 10242 }, { "epoch": 0.4252435144967021, "grad_norm": 2.633349657058716, "learning_rate": 6.433157379794937e-06, "loss": 0.6059, "step": 10243 }, { "epoch": 0.42528503002091345, "grad_norm": 2.478996515274048, "learning_rate": 6.4325132617424925e-06, "loss": 0.6389, "step": 10244 }, { "epoch": 0.4253265455451248, "grad_norm": 2.2705202102661133, "learning_rate": 6.431869117789797e-06, "loss": 0.5644, "step": 10245 }, { "epoch": 0.4253680610693361, "grad_norm": 2.685784101486206, "learning_rate": 6.431224947948494e-06, "loss": 0.5202, "step": 10246 }, { "epoch": 0.42540957659354744, "grad_norm": 2.314669370651245, "learning_rate": 6.430580752230232e-06, "loss": 0.5119, "step": 10247 }, { "epoch": 0.4254510921177588, "grad_norm": 2.3169238567352295, "learning_rate": 6.4299365306466565e-06, "loss": 0.479, "step": 10248 }, { "epoch": 0.4254926076419701, "grad_norm": 2.6207611560821533, "learning_rate": 6.429292283209418e-06, "loss": 0.619, "step": 10249 }, { "epoch": 0.42553412316618144, "grad_norm": 2.388640880584717, "learning_rate": 6.428648009930162e-06, "loss": 0.4574, "step": 10250 }, { "epoch": 0.4255756386903928, "grad_norm": 2.5712313652038574, "learning_rate": 6.428003710820539e-06, "loss": 0.7211, "step": 10251 }, { "epoch": 0.4256171542146041, "grad_norm": 2.3093247413635254, "learning_rate": 6.427359385892196e-06, "loss": 0.6213, "step": 10252 }, { "epoch": 0.42565866973881544, "grad_norm": 2.549215078353882, "learning_rate": 6.426715035156785e-06, "loss": 0.5594, "step": 10253 }, { "epoch": 0.4257001852630268, "grad_norm": 2.7372701168060303, "learning_rate": 6.426070658625953e-06, "loss": 0.62, "step": 10254 }, { "epoch": 0.4257417007872381, "grad_norm": 2.165632486343384, "learning_rate": 6.425426256311353e-06, "loss": 0.6821, "step": 10255 }, { "epoch": 0.42578321631144944, "grad_norm": 2.2999267578125, "learning_rate": 6.424781828224638e-06, "loss": 0.5309, "step": 10256 }, { "epoch": 0.4258247318356608, "grad_norm": 2.949902057647705, "learning_rate": 6.424137374377455e-06, "loss": 0.5807, "step": 10257 }, { "epoch": 0.4258662473598721, "grad_norm": 2.4313738346099854, "learning_rate": 6.423492894781458e-06, "loss": 0.3908, "step": 10258 }, { "epoch": 0.42590776288408344, "grad_norm": 2.8882975578308105, "learning_rate": 6.4228483894483e-06, "loss": 0.5328, "step": 10259 }, { "epoch": 0.42594927840829483, "grad_norm": 2.791581630706787, "learning_rate": 6.422203858389633e-06, "loss": 0.5684, "step": 10260 }, { "epoch": 0.42599079393250616, "grad_norm": 2.004783868789673, "learning_rate": 6.4215593016171085e-06, "loss": 0.5536, "step": 10261 }, { "epoch": 0.4260323094567175, "grad_norm": 2.7080907821655273, "learning_rate": 6.420914719142384e-06, "loss": 0.5584, "step": 10262 }, { "epoch": 0.42607382498092883, "grad_norm": 2.404684066772461, "learning_rate": 6.4202701109771105e-06, "loss": 0.4482, "step": 10263 }, { "epoch": 0.42611534050514016, "grad_norm": 2.5001583099365234, "learning_rate": 6.419625477132945e-06, "loss": 0.4392, "step": 10264 }, { "epoch": 0.4261568560293515, "grad_norm": 2.3535706996917725, "learning_rate": 6.418980817621541e-06, "loss": 0.4899, "step": 10265 }, { "epoch": 0.4261983715535628, "grad_norm": 2.6037447452545166, "learning_rate": 6.4183361324545545e-06, "loss": 0.572, "step": 10266 }, { "epoch": 0.42623988707777416, "grad_norm": 2.263068675994873, "learning_rate": 6.417691421643642e-06, "loss": 0.463, "step": 10267 }, { "epoch": 0.4262814026019855, "grad_norm": 2.6608126163482666, "learning_rate": 6.41704668520046e-06, "loss": 0.6318, "step": 10268 }, { "epoch": 0.4263229181261968, "grad_norm": 2.2270915508270264, "learning_rate": 6.416401923136666e-06, "loss": 0.4779, "step": 10269 }, { "epoch": 0.42636443365040816, "grad_norm": 2.639000177383423, "learning_rate": 6.415757135463917e-06, "loss": 0.5248, "step": 10270 }, { "epoch": 0.4264059491746195, "grad_norm": 2.5188486576080322, "learning_rate": 6.415112322193871e-06, "loss": 0.5994, "step": 10271 }, { "epoch": 0.4264474646988308, "grad_norm": 2.3599815368652344, "learning_rate": 6.414467483338187e-06, "loss": 0.5282, "step": 10272 }, { "epoch": 0.42648898022304216, "grad_norm": 2.6318483352661133, "learning_rate": 6.4138226189085225e-06, "loss": 0.5449, "step": 10273 }, { "epoch": 0.4265304957472535, "grad_norm": 3.0336320400238037, "learning_rate": 6.413177728916537e-06, "loss": 0.5602, "step": 10274 }, { "epoch": 0.4265720112714648, "grad_norm": 2.119086265563965, "learning_rate": 6.412532813373892e-06, "loss": 0.4999, "step": 10275 }, { "epoch": 0.42661352679567616, "grad_norm": 2.5704526901245117, "learning_rate": 6.4118878722922454e-06, "loss": 0.5237, "step": 10276 }, { "epoch": 0.4266550423198875, "grad_norm": 2.9951884746551514, "learning_rate": 6.411242905683259e-06, "loss": 0.5503, "step": 10277 }, { "epoch": 0.4266965578440988, "grad_norm": 2.2743496894836426, "learning_rate": 6.410597913558594e-06, "loss": 0.6056, "step": 10278 }, { "epoch": 0.42673807336831016, "grad_norm": 2.5330240726470947, "learning_rate": 6.409952895929914e-06, "loss": 0.3837, "step": 10279 }, { "epoch": 0.4267795888925215, "grad_norm": 2.0832204818725586, "learning_rate": 6.409307852808877e-06, "loss": 0.4345, "step": 10280 }, { "epoch": 0.4268211044167328, "grad_norm": 2.1967062950134277, "learning_rate": 6.408662784207149e-06, "loss": 0.5685, "step": 10281 }, { "epoch": 0.42686261994094415, "grad_norm": 2.515347957611084, "learning_rate": 6.4080176901363925e-06, "loss": 0.685, "step": 10282 }, { "epoch": 0.4269041354651555, "grad_norm": 2.676229238510132, "learning_rate": 6.407372570608269e-06, "loss": 0.4859, "step": 10283 }, { "epoch": 0.4269456509893668, "grad_norm": 2.328319549560547, "learning_rate": 6.406727425634444e-06, "loss": 0.5902, "step": 10284 }, { "epoch": 0.42698716651357815, "grad_norm": 2.385202169418335, "learning_rate": 6.406082255226582e-06, "loss": 0.5545, "step": 10285 }, { "epoch": 0.4270286820377895, "grad_norm": 2.6138463020324707, "learning_rate": 6.405437059396349e-06, "loss": 0.5379, "step": 10286 }, { "epoch": 0.4270701975620008, "grad_norm": 2.369156837463379, "learning_rate": 6.404791838155406e-06, "loss": 0.5006, "step": 10287 }, { "epoch": 0.42711171308621215, "grad_norm": 2.4214141368865967, "learning_rate": 6.404146591515423e-06, "loss": 0.4676, "step": 10288 }, { "epoch": 0.4271532286104235, "grad_norm": 2.238239288330078, "learning_rate": 6.4035013194880655e-06, "loss": 0.5597, "step": 10289 }, { "epoch": 0.4271947441346348, "grad_norm": 2.401428461074829, "learning_rate": 6.402856022084999e-06, "loss": 0.3612, "step": 10290 }, { "epoch": 0.4272362596588462, "grad_norm": 2.3959238529205322, "learning_rate": 6.402210699317891e-06, "loss": 0.4216, "step": 10291 }, { "epoch": 0.42727777518305754, "grad_norm": 2.8970484733581543, "learning_rate": 6.40156535119841e-06, "loss": 0.4759, "step": 10292 }, { "epoch": 0.4273192907072689, "grad_norm": 2.080000162124634, "learning_rate": 6.400919977738222e-06, "loss": 0.5226, "step": 10293 }, { "epoch": 0.4273608062314802, "grad_norm": 2.5866193771362305, "learning_rate": 6.400274578948998e-06, "loss": 0.6254, "step": 10294 }, { "epoch": 0.42740232175569154, "grad_norm": 2.881375312805176, "learning_rate": 6.3996291548424075e-06, "loss": 0.5365, "step": 10295 }, { "epoch": 0.42744383727990287, "grad_norm": 3.0757858753204346, "learning_rate": 6.398983705430117e-06, "loss": 0.3913, "step": 10296 }, { "epoch": 0.4274853528041142, "grad_norm": 2.519434690475464, "learning_rate": 6.398338230723798e-06, "loss": 0.5209, "step": 10297 }, { "epoch": 0.42752686832832554, "grad_norm": 2.4750022888183594, "learning_rate": 6.397692730735121e-06, "loss": 0.6796, "step": 10298 }, { "epoch": 0.42756838385253687, "grad_norm": 3.8484716415405273, "learning_rate": 6.397047205475757e-06, "loss": 0.5923, "step": 10299 }, { "epoch": 0.4276098993767482, "grad_norm": 2.0819411277770996, "learning_rate": 6.396401654957376e-06, "loss": 0.4745, "step": 10300 }, { "epoch": 0.42765141490095954, "grad_norm": 2.3249735832214355, "learning_rate": 6.395756079191652e-06, "loss": 0.5111, "step": 10301 }, { "epoch": 0.42769293042517087, "grad_norm": 2.693162679672241, "learning_rate": 6.395110478190254e-06, "loss": 0.4639, "step": 10302 }, { "epoch": 0.4277344459493822, "grad_norm": 2.6649796962738037, "learning_rate": 6.394464851964857e-06, "loss": 0.5133, "step": 10303 }, { "epoch": 0.42777596147359354, "grad_norm": 2.1387619972229004, "learning_rate": 6.393819200527134e-06, "loss": 0.2667, "step": 10304 }, { "epoch": 0.42781747699780487, "grad_norm": 3.4726502895355225, "learning_rate": 6.393173523888757e-06, "loss": 0.4745, "step": 10305 }, { "epoch": 0.4278589925220162, "grad_norm": 2.3585634231567383, "learning_rate": 6.392527822061402e-06, "loss": 0.4206, "step": 10306 }, { "epoch": 0.42790050804622753, "grad_norm": 2.3664281368255615, "learning_rate": 6.391882095056742e-06, "loss": 0.4045, "step": 10307 }, { "epoch": 0.42794202357043887, "grad_norm": 1.756731629371643, "learning_rate": 6.391236342886453e-06, "loss": 0.3994, "step": 10308 }, { "epoch": 0.4279835390946502, "grad_norm": 2.255114793777466, "learning_rate": 6.39059056556221e-06, "loss": 0.4534, "step": 10309 }, { "epoch": 0.42802505461886153, "grad_norm": 2.294196844100952, "learning_rate": 6.389944763095689e-06, "loss": 0.4709, "step": 10310 }, { "epoch": 0.42806657014307287, "grad_norm": 2.6129496097564697, "learning_rate": 6.389298935498565e-06, "loss": 0.6469, "step": 10311 }, { "epoch": 0.4281080856672842, "grad_norm": 2.935544729232788, "learning_rate": 6.388653082782518e-06, "loss": 0.578, "step": 10312 }, { "epoch": 0.42814960119149553, "grad_norm": 2.395793914794922, "learning_rate": 6.3880072049592216e-06, "loss": 0.6371, "step": 10313 }, { "epoch": 0.42819111671570687, "grad_norm": 2.8126792907714844, "learning_rate": 6.387361302040355e-06, "loss": 0.4581, "step": 10314 }, { "epoch": 0.4282326322399182, "grad_norm": 2.450716495513916, "learning_rate": 6.386715374037595e-06, "loss": 0.4421, "step": 10315 }, { "epoch": 0.42827414776412953, "grad_norm": 2.8703689575195312, "learning_rate": 6.386069420962623e-06, "loss": 0.5561, "step": 10316 }, { "epoch": 0.42831566328834086, "grad_norm": 3.269266366958618, "learning_rate": 6.385423442827116e-06, "loss": 0.566, "step": 10317 }, { "epoch": 0.4283571788125522, "grad_norm": 2.5705015659332275, "learning_rate": 6.3847774396427534e-06, "loss": 0.5474, "step": 10318 }, { "epoch": 0.42839869433676353, "grad_norm": 2.2443487644195557, "learning_rate": 6.384131411421215e-06, "loss": 0.4524, "step": 10319 }, { "epoch": 0.42844020986097486, "grad_norm": 2.33542799949646, "learning_rate": 6.383485358174182e-06, "loss": 0.5414, "step": 10320 }, { "epoch": 0.4284817253851862, "grad_norm": 2.400495767593384, "learning_rate": 6.382839279913335e-06, "loss": 0.6505, "step": 10321 }, { "epoch": 0.4285232409093976, "grad_norm": 2.4287703037261963, "learning_rate": 6.382193176650357e-06, "loss": 0.4619, "step": 10322 }, { "epoch": 0.4285647564336089, "grad_norm": 2.335808515548706, "learning_rate": 6.381547048396927e-06, "loss": 0.5058, "step": 10323 }, { "epoch": 0.42860627195782025, "grad_norm": 2.9318888187408447, "learning_rate": 6.380900895164728e-06, "loss": 0.7198, "step": 10324 }, { "epoch": 0.4286477874820316, "grad_norm": 2.188323497772217, "learning_rate": 6.380254716965442e-06, "loss": 0.5775, "step": 10325 }, { "epoch": 0.4286893030062429, "grad_norm": 2.2351720333099365, "learning_rate": 6.379608513810753e-06, "loss": 0.4567, "step": 10326 }, { "epoch": 0.42873081853045425, "grad_norm": 2.5469415187835693, "learning_rate": 6.378962285712346e-06, "loss": 0.5288, "step": 10327 }, { "epoch": 0.4287723340546656, "grad_norm": 1.811938762664795, "learning_rate": 6.3783160326819015e-06, "loss": 0.4025, "step": 10328 }, { "epoch": 0.4288138495788769, "grad_norm": 2.519444704055786, "learning_rate": 6.377669754731107e-06, "loss": 0.5611, "step": 10329 }, { "epoch": 0.42885536510308825, "grad_norm": 2.583494186401367, "learning_rate": 6.377023451871646e-06, "loss": 0.4945, "step": 10330 }, { "epoch": 0.4288968806272996, "grad_norm": 2.4364068508148193, "learning_rate": 6.376377124115204e-06, "loss": 0.7341, "step": 10331 }, { "epoch": 0.4289383961515109, "grad_norm": 2.8832151889801025, "learning_rate": 6.3757307714734666e-06, "loss": 0.5684, "step": 10332 }, { "epoch": 0.42897991167572225, "grad_norm": 2.476707696914673, "learning_rate": 6.375084393958121e-06, "loss": 0.3692, "step": 10333 }, { "epoch": 0.4290214271999336, "grad_norm": 2.8199784755706787, "learning_rate": 6.374437991580852e-06, "loss": 0.5488, "step": 10334 }, { "epoch": 0.4290629427241449, "grad_norm": 2.173534393310547, "learning_rate": 6.3737915643533484e-06, "loss": 0.5489, "step": 10335 }, { "epoch": 0.42910445824835625, "grad_norm": 2.7101399898529053, "learning_rate": 6.3731451122872985e-06, "loss": 0.504, "step": 10336 }, { "epoch": 0.4291459737725676, "grad_norm": 2.5521581172943115, "learning_rate": 6.3724986353943885e-06, "loss": 0.4998, "step": 10337 }, { "epoch": 0.4291874892967789, "grad_norm": 2.3586244583129883, "learning_rate": 6.371852133686308e-06, "loss": 0.5604, "step": 10338 }, { "epoch": 0.42922900482099025, "grad_norm": 2.5605413913726807, "learning_rate": 6.371205607174744e-06, "loss": 0.5742, "step": 10339 }, { "epoch": 0.4292705203452016, "grad_norm": 2.646120548248291, "learning_rate": 6.370559055871389e-06, "loss": 0.4849, "step": 10340 }, { "epoch": 0.4293120358694129, "grad_norm": 2.588965654373169, "learning_rate": 6.369912479787929e-06, "loss": 0.5295, "step": 10341 }, { "epoch": 0.42935355139362424, "grad_norm": 2.257014036178589, "learning_rate": 6.36926587893606e-06, "loss": 0.5193, "step": 10342 }, { "epoch": 0.4293950669178356, "grad_norm": 2.913409948348999, "learning_rate": 6.368619253327466e-06, "loss": 0.4568, "step": 10343 }, { "epoch": 0.4294365824420469, "grad_norm": 2.9180350303649902, "learning_rate": 6.367972602973844e-06, "loss": 0.5878, "step": 10344 }, { "epoch": 0.42947809796625824, "grad_norm": 2.2850043773651123, "learning_rate": 6.367325927886881e-06, "loss": 0.5105, "step": 10345 }, { "epoch": 0.4295196134904696, "grad_norm": 2.335583448410034, "learning_rate": 6.366679228078273e-06, "loss": 0.3411, "step": 10346 }, { "epoch": 0.4295611290146809, "grad_norm": 2.8398923873901367, "learning_rate": 6.36603250355971e-06, "loss": 0.5342, "step": 10347 }, { "epoch": 0.42960264453889224, "grad_norm": 2.5325405597686768, "learning_rate": 6.365385754342885e-06, "loss": 0.6396, "step": 10348 }, { "epoch": 0.4296441600631036, "grad_norm": 2.3570854663848877, "learning_rate": 6.364738980439492e-06, "loss": 0.4568, "step": 10349 }, { "epoch": 0.4296856755873149, "grad_norm": 2.6596295833587646, "learning_rate": 6.364092181861224e-06, "loss": 0.467, "step": 10350 }, { "epoch": 0.42972719111152624, "grad_norm": 2.832261085510254, "learning_rate": 6.363445358619779e-06, "loss": 0.49, "step": 10351 }, { "epoch": 0.4297687066357376, "grad_norm": 2.679626703262329, "learning_rate": 6.362798510726847e-06, "loss": 0.6273, "step": 10352 }, { "epoch": 0.42981022215994896, "grad_norm": 2.371506929397583, "learning_rate": 6.362151638194125e-06, "loss": 0.5615, "step": 10353 }, { "epoch": 0.4298517376841603, "grad_norm": 2.481085777282715, "learning_rate": 6.361504741033309e-06, "loss": 0.6361, "step": 10354 }, { "epoch": 0.42989325320837163, "grad_norm": 2.2485878467559814, "learning_rate": 6.360857819256094e-06, "loss": 0.6055, "step": 10355 }, { "epoch": 0.42993476873258296, "grad_norm": 2.1938817501068115, "learning_rate": 6.360210872874179e-06, "loss": 0.5173, "step": 10356 }, { "epoch": 0.4299762842567943, "grad_norm": 2.120178699493408, "learning_rate": 6.35956390189926e-06, "loss": 0.4103, "step": 10357 }, { "epoch": 0.4300177997810056, "grad_norm": 2.240233898162842, "learning_rate": 6.358916906343031e-06, "loss": 0.4666, "step": 10358 }, { "epoch": 0.43005931530521696, "grad_norm": 3.0123233795166016, "learning_rate": 6.3582698862171945e-06, "loss": 0.5811, "step": 10359 }, { "epoch": 0.4301008308294283, "grad_norm": 2.0355799198150635, "learning_rate": 6.357622841533446e-06, "loss": 0.5157, "step": 10360 }, { "epoch": 0.4301423463536396, "grad_norm": 2.097177028656006, "learning_rate": 6.356975772303486e-06, "loss": 0.4544, "step": 10361 }, { "epoch": 0.43018386187785096, "grad_norm": 2.205867052078247, "learning_rate": 6.356328678539013e-06, "loss": 0.5906, "step": 10362 }, { "epoch": 0.4302253774020623, "grad_norm": 2.382962465286255, "learning_rate": 6.355681560251726e-06, "loss": 0.4599, "step": 10363 }, { "epoch": 0.4302668929262736, "grad_norm": 2.3191025257110596, "learning_rate": 6.355034417453326e-06, "loss": 0.5019, "step": 10364 }, { "epoch": 0.43030840845048496, "grad_norm": 2.347151756286621, "learning_rate": 6.354387250155513e-06, "loss": 0.5791, "step": 10365 }, { "epoch": 0.4303499239746963, "grad_norm": 2.403660774230957, "learning_rate": 6.353740058369989e-06, "loss": 0.5569, "step": 10366 }, { "epoch": 0.4303914394989076, "grad_norm": 2.9227752685546875, "learning_rate": 6.353092842108453e-06, "loss": 0.5927, "step": 10367 }, { "epoch": 0.43043295502311896, "grad_norm": 2.17733097076416, "learning_rate": 6.35244560138261e-06, "loss": 0.5245, "step": 10368 }, { "epoch": 0.4304744705473303, "grad_norm": 2.01456618309021, "learning_rate": 6.351798336204159e-06, "loss": 0.4345, "step": 10369 }, { "epoch": 0.4305159860715416, "grad_norm": 1.935149908065796, "learning_rate": 6.351151046584806e-06, "loss": 0.5752, "step": 10370 }, { "epoch": 0.43055750159575296, "grad_norm": 2.7432844638824463, "learning_rate": 6.3505037325362515e-06, "loss": 0.4411, "step": 10371 }, { "epoch": 0.4305990171199643, "grad_norm": 2.3581809997558594, "learning_rate": 6.349856394070202e-06, "loss": 0.5493, "step": 10372 }, { "epoch": 0.4306405326441756, "grad_norm": 2.1681525707244873, "learning_rate": 6.349209031198358e-06, "loss": 0.3821, "step": 10373 }, { "epoch": 0.43068204816838696, "grad_norm": 2.4001011848449707, "learning_rate": 6.348561643932426e-06, "loss": 0.4873, "step": 10374 }, { "epoch": 0.4307235636925983, "grad_norm": 2.3181049823760986, "learning_rate": 6.3479142322841116e-06, "loss": 0.5508, "step": 10375 }, { "epoch": 0.4307650792168096, "grad_norm": 2.4662983417510986, "learning_rate": 6.347266796265119e-06, "loss": 0.6107, "step": 10376 }, { "epoch": 0.43080659474102095, "grad_norm": 2.404205322265625, "learning_rate": 6.346619335887154e-06, "loss": 0.5579, "step": 10377 }, { "epoch": 0.4308481102652323, "grad_norm": 2.2907164096832275, "learning_rate": 6.345971851161924e-06, "loss": 0.4646, "step": 10378 }, { "epoch": 0.4308896257894436, "grad_norm": 2.495422840118408, "learning_rate": 6.3453243421011345e-06, "loss": 0.5015, "step": 10379 }, { "epoch": 0.43093114131365495, "grad_norm": 2.6218819618225098, "learning_rate": 6.344676808716493e-06, "loss": 0.5577, "step": 10380 }, { "epoch": 0.4309726568378663, "grad_norm": 2.4720118045806885, "learning_rate": 6.344029251019708e-06, "loss": 0.5426, "step": 10381 }, { "epoch": 0.4310141723620776, "grad_norm": 2.3864991664886475, "learning_rate": 6.343381669022486e-06, "loss": 0.5583, "step": 10382 }, { "epoch": 0.43105568788628895, "grad_norm": 2.4626688957214355, "learning_rate": 6.342734062736536e-06, "loss": 0.4501, "step": 10383 }, { "epoch": 0.43109720341050034, "grad_norm": 2.2771852016448975, "learning_rate": 6.342086432173568e-06, "loss": 0.4913, "step": 10384 }, { "epoch": 0.4311387189347117, "grad_norm": 2.3699629306793213, "learning_rate": 6.34143877734529e-06, "loss": 0.5518, "step": 10385 }, { "epoch": 0.431180234458923, "grad_norm": 2.7688422203063965, "learning_rate": 6.340791098263411e-06, "loss": 0.497, "step": 10386 }, { "epoch": 0.43122174998313434, "grad_norm": 2.650695323944092, "learning_rate": 6.340143394939645e-06, "loss": 0.6189, "step": 10387 }, { "epoch": 0.43126326550734567, "grad_norm": 2.5965447425842285, "learning_rate": 6.3394956673857e-06, "loss": 0.4434, "step": 10388 }, { "epoch": 0.431304781031557, "grad_norm": 2.8682286739349365, "learning_rate": 6.338847915613285e-06, "loss": 0.632, "step": 10389 }, { "epoch": 0.43134629655576834, "grad_norm": 2.6129565238952637, "learning_rate": 6.3382001396341155e-06, "loss": 0.5264, "step": 10390 }, { "epoch": 0.43138781207997967, "grad_norm": 2.185526132583618, "learning_rate": 6.3375523394599e-06, "loss": 0.4268, "step": 10391 }, { "epoch": 0.431429327604191, "grad_norm": 2.376971483230591, "learning_rate": 6.336904515102355e-06, "loss": 0.5125, "step": 10392 }, { "epoch": 0.43147084312840234, "grad_norm": 2.3090670108795166, "learning_rate": 6.33625666657319e-06, "loss": 0.4248, "step": 10393 }, { "epoch": 0.43151235865261367, "grad_norm": 2.3081307411193848, "learning_rate": 6.335608793884119e-06, "loss": 0.5191, "step": 10394 }, { "epoch": 0.431553874176825, "grad_norm": 2.8750476837158203, "learning_rate": 6.334960897046856e-06, "loss": 0.5904, "step": 10395 }, { "epoch": 0.43159538970103634, "grad_norm": 2.556788206100464, "learning_rate": 6.334312976073116e-06, "loss": 0.6297, "step": 10396 }, { "epoch": 0.43163690522524767, "grad_norm": 2.7284512519836426, "learning_rate": 6.333665030974612e-06, "loss": 0.4336, "step": 10397 }, { "epoch": 0.431678420749459, "grad_norm": 2.6311049461364746, "learning_rate": 6.333017061763061e-06, "loss": 0.539, "step": 10398 }, { "epoch": 0.43171993627367033, "grad_norm": 2.536558151245117, "learning_rate": 6.332369068450175e-06, "loss": 0.4434, "step": 10399 }, { "epoch": 0.43176145179788167, "grad_norm": 2.2149078845977783, "learning_rate": 6.3317210510476745e-06, "loss": 0.4439, "step": 10400 }, { "epoch": 0.431802967322093, "grad_norm": 3.0053188800811768, "learning_rate": 6.331073009567273e-06, "loss": 0.6689, "step": 10401 }, { "epoch": 0.43184448284630433, "grad_norm": 2.09018874168396, "learning_rate": 6.330424944020687e-06, "loss": 0.478, "step": 10402 }, { "epoch": 0.43188599837051567, "grad_norm": 2.687924385070801, "learning_rate": 6.329776854419636e-06, "loss": 0.6579, "step": 10403 }, { "epoch": 0.431927513894727, "grad_norm": 2.5956451892852783, "learning_rate": 6.329128740775834e-06, "loss": 0.6026, "step": 10404 }, { "epoch": 0.43196902941893833, "grad_norm": 1.953826665878296, "learning_rate": 6.328480603101004e-06, "loss": 0.4832, "step": 10405 }, { "epoch": 0.43201054494314967, "grad_norm": 2.6214911937713623, "learning_rate": 6.32783244140686e-06, "loss": 0.4746, "step": 10406 }, { "epoch": 0.432052060467361, "grad_norm": 2.9159393310546875, "learning_rate": 6.327184255705123e-06, "loss": 0.5436, "step": 10407 }, { "epoch": 0.43209357599157233, "grad_norm": 2.3898444175720215, "learning_rate": 6.326536046007512e-06, "loss": 0.4841, "step": 10408 }, { "epoch": 0.43213509151578366, "grad_norm": 2.4183390140533447, "learning_rate": 6.325887812325747e-06, "loss": 0.5488, "step": 10409 }, { "epoch": 0.432176607039995, "grad_norm": 2.6928415298461914, "learning_rate": 6.325239554671547e-06, "loss": 0.4145, "step": 10410 }, { "epoch": 0.43221812256420633, "grad_norm": 2.534125804901123, "learning_rate": 6.324591273056637e-06, "loss": 0.6077, "step": 10411 }, { "epoch": 0.43225963808841766, "grad_norm": 2.3326456546783447, "learning_rate": 6.323942967492732e-06, "loss": 0.5645, "step": 10412 }, { "epoch": 0.432301153612629, "grad_norm": 2.724519968032837, "learning_rate": 6.323294637991558e-06, "loss": 0.665, "step": 10413 }, { "epoch": 0.43234266913684033, "grad_norm": 2.2532780170440674, "learning_rate": 6.322646284564836e-06, "loss": 0.4121, "step": 10414 }, { "epoch": 0.4323841846610517, "grad_norm": 2.520576238632202, "learning_rate": 6.321997907224287e-06, "loss": 0.6258, "step": 10415 }, { "epoch": 0.43242570018526305, "grad_norm": 2.402367115020752, "learning_rate": 6.321349505981635e-06, "loss": 0.4575, "step": 10416 }, { "epoch": 0.4324672157094744, "grad_norm": 2.066373825073242, "learning_rate": 6.320701080848603e-06, "loss": 0.5823, "step": 10417 }, { "epoch": 0.4325087312336857, "grad_norm": 3.060637950897217, "learning_rate": 6.320052631836915e-06, "loss": 0.5837, "step": 10418 }, { "epoch": 0.43255024675789705, "grad_norm": 2.8747010231018066, "learning_rate": 6.319404158958294e-06, "loss": 0.6088, "step": 10419 }, { "epoch": 0.4325917622821084, "grad_norm": 1.9266040325164795, "learning_rate": 6.318755662224467e-06, "loss": 0.3507, "step": 10420 }, { "epoch": 0.4326332778063197, "grad_norm": 2.3817124366760254, "learning_rate": 6.318107141647156e-06, "loss": 0.4173, "step": 10421 }, { "epoch": 0.43267479333053105, "grad_norm": 2.6560802459716797, "learning_rate": 6.31745859723809e-06, "loss": 0.5818, "step": 10422 }, { "epoch": 0.4327163088547424, "grad_norm": 2.5876593589782715, "learning_rate": 6.3168100290089905e-06, "loss": 0.4525, "step": 10423 }, { "epoch": 0.4327578243789537, "grad_norm": 2.41367769241333, "learning_rate": 6.316161436971588e-06, "loss": 0.5441, "step": 10424 }, { "epoch": 0.43279933990316505, "grad_norm": 2.4420032501220703, "learning_rate": 6.315512821137606e-06, "loss": 0.3862, "step": 10425 }, { "epoch": 0.4328408554273764, "grad_norm": 2.3537960052490234, "learning_rate": 6.314864181518775e-06, "loss": 0.5081, "step": 10426 }, { "epoch": 0.4328823709515877, "grad_norm": 2.446161985397339, "learning_rate": 6.31421551812682e-06, "loss": 0.4719, "step": 10427 }, { "epoch": 0.43292388647579905, "grad_norm": 3.037165641784668, "learning_rate": 6.313566830973468e-06, "loss": 0.6356, "step": 10428 }, { "epoch": 0.4329654020000104, "grad_norm": 2.4534175395965576, "learning_rate": 6.31291812007045e-06, "loss": 0.5993, "step": 10429 }, { "epoch": 0.4330069175242217, "grad_norm": 2.7654459476470947, "learning_rate": 6.312269385429494e-06, "loss": 0.6278, "step": 10430 }, { "epoch": 0.43304843304843305, "grad_norm": 2.6649911403656006, "learning_rate": 6.311620627062329e-06, "loss": 0.5051, "step": 10431 }, { "epoch": 0.4330899485726444, "grad_norm": 2.6236352920532227, "learning_rate": 6.310971844980685e-06, "loss": 0.4074, "step": 10432 }, { "epoch": 0.4331314640968557, "grad_norm": 2.572937250137329, "learning_rate": 6.310323039196293e-06, "loss": 0.5039, "step": 10433 }, { "epoch": 0.43317297962106704, "grad_norm": 2.903219699859619, "learning_rate": 6.309674209720882e-06, "loss": 0.6541, "step": 10434 }, { "epoch": 0.4332144951452784, "grad_norm": 2.791252851486206, "learning_rate": 6.309025356566184e-06, "loss": 0.6703, "step": 10435 }, { "epoch": 0.4332560106694897, "grad_norm": 2.2137999534606934, "learning_rate": 6.30837647974393e-06, "loss": 0.5165, "step": 10436 }, { "epoch": 0.43329752619370104, "grad_norm": 2.4330179691314697, "learning_rate": 6.307727579265852e-06, "loss": 0.5051, "step": 10437 }, { "epoch": 0.4333390417179124, "grad_norm": 6.1338677406311035, "learning_rate": 6.307078655143683e-06, "loss": 0.5889, "step": 10438 }, { "epoch": 0.4333805572421237, "grad_norm": 3.49853253364563, "learning_rate": 6.3064297073891555e-06, "loss": 0.4019, "step": 10439 }, { "epoch": 0.43342207276633504, "grad_norm": 2.9718639850616455, "learning_rate": 6.3057807360140026e-06, "loss": 0.502, "step": 10440 }, { "epoch": 0.4334635882905464, "grad_norm": 2.5510094165802, "learning_rate": 6.305131741029956e-06, "loss": 0.5119, "step": 10441 }, { "epoch": 0.4335051038147577, "grad_norm": 2.6072051525115967, "learning_rate": 6.304482722448752e-06, "loss": 0.5295, "step": 10442 }, { "epoch": 0.43354661933896904, "grad_norm": 2.54524564743042, "learning_rate": 6.303833680282125e-06, "loss": 0.68, "step": 10443 }, { "epoch": 0.4335881348631804, "grad_norm": 2.8487229347229004, "learning_rate": 6.303184614541808e-06, "loss": 0.5341, "step": 10444 }, { "epoch": 0.43362965038739176, "grad_norm": 2.3804261684417725, "learning_rate": 6.3025355252395395e-06, "loss": 0.6008, "step": 10445 }, { "epoch": 0.4336711659116031, "grad_norm": 2.2359299659729004, "learning_rate": 6.3018864123870526e-06, "loss": 0.4339, "step": 10446 }, { "epoch": 0.43371268143581443, "grad_norm": 2.59057879447937, "learning_rate": 6.3012372759960825e-06, "loss": 0.6237, "step": 10447 }, { "epoch": 0.43375419696002576, "grad_norm": 2.6171820163726807, "learning_rate": 6.300588116078369e-06, "loss": 0.5446, "step": 10448 }, { "epoch": 0.4337957124842371, "grad_norm": 2.2588143348693848, "learning_rate": 6.2999389326456464e-06, "loss": 0.4135, "step": 10449 }, { "epoch": 0.43383722800844843, "grad_norm": 2.3182532787323, "learning_rate": 6.299289725709654e-06, "loss": 0.4349, "step": 10450 }, { "epoch": 0.43387874353265976, "grad_norm": 2.383557081222534, "learning_rate": 6.298640495282129e-06, "loss": 0.579, "step": 10451 }, { "epoch": 0.4339202590568711, "grad_norm": 2.9096405506134033, "learning_rate": 6.29799124137481e-06, "loss": 0.4123, "step": 10452 }, { "epoch": 0.4339617745810824, "grad_norm": 2.349416732788086, "learning_rate": 6.297341963999435e-06, "loss": 0.4593, "step": 10453 }, { "epoch": 0.43400329010529376, "grad_norm": 2.1301000118255615, "learning_rate": 6.296692663167742e-06, "loss": 0.4527, "step": 10454 }, { "epoch": 0.4340448056295051, "grad_norm": 2.0959489345550537, "learning_rate": 6.296043338891473e-06, "loss": 0.4963, "step": 10455 }, { "epoch": 0.4340863211537164, "grad_norm": 2.2250919342041016, "learning_rate": 6.295393991182366e-06, "loss": 0.3403, "step": 10456 }, { "epoch": 0.43412783667792776, "grad_norm": 2.5401065349578857, "learning_rate": 6.294744620052164e-06, "loss": 0.5819, "step": 10457 }, { "epoch": 0.4341693522021391, "grad_norm": 2.423266887664795, "learning_rate": 6.294095225512604e-06, "loss": 0.4242, "step": 10458 }, { "epoch": 0.4342108677263504, "grad_norm": 2.677915096282959, "learning_rate": 6.293445807575431e-06, "loss": 0.596, "step": 10459 }, { "epoch": 0.43425238325056176, "grad_norm": 2.6113436222076416, "learning_rate": 6.2927963662523835e-06, "loss": 0.4976, "step": 10460 }, { "epoch": 0.4342938987747731, "grad_norm": 2.5179123878479004, "learning_rate": 6.292146901555207e-06, "loss": 0.5848, "step": 10461 }, { "epoch": 0.4343354142989844, "grad_norm": 2.4533045291900635, "learning_rate": 6.291497413495639e-06, "loss": 0.4969, "step": 10462 }, { "epoch": 0.43437692982319576, "grad_norm": 2.5672192573547363, "learning_rate": 6.290847902085429e-06, "loss": 0.6081, "step": 10463 }, { "epoch": 0.4344184453474071, "grad_norm": 2.5682549476623535, "learning_rate": 6.290198367336315e-06, "loss": 0.6552, "step": 10464 }, { "epoch": 0.4344599608716184, "grad_norm": 2.5497124195098877, "learning_rate": 6.289548809260043e-06, "loss": 0.5465, "step": 10465 }, { "epoch": 0.43450147639582976, "grad_norm": 2.3333394527435303, "learning_rate": 6.288899227868358e-06, "loss": 0.4943, "step": 10466 }, { "epoch": 0.4345429919200411, "grad_norm": 2.2026214599609375, "learning_rate": 6.288249623173002e-06, "loss": 0.4384, "step": 10467 }, { "epoch": 0.4345845074442524, "grad_norm": 2.7797720432281494, "learning_rate": 6.287599995185721e-06, "loss": 0.6311, "step": 10468 }, { "epoch": 0.43462602296846375, "grad_norm": 2.5376663208007812, "learning_rate": 6.2869503439182626e-06, "loss": 0.4035, "step": 10469 }, { "epoch": 0.4346675384926751, "grad_norm": 2.533356189727783, "learning_rate": 6.28630066938237e-06, "loss": 0.4847, "step": 10470 }, { "epoch": 0.4347090540168864, "grad_norm": 2.4336719512939453, "learning_rate": 6.285650971589791e-06, "loss": 0.5603, "step": 10471 }, { "epoch": 0.43475056954109775, "grad_norm": 2.7351698875427246, "learning_rate": 6.2850012505522715e-06, "loss": 0.4168, "step": 10472 }, { "epoch": 0.4347920850653091, "grad_norm": 2.020275115966797, "learning_rate": 6.284351506281558e-06, "loss": 0.4349, "step": 10473 }, { "epoch": 0.4348336005895204, "grad_norm": 2.4048147201538086, "learning_rate": 6.283701738789401e-06, "loss": 0.5239, "step": 10474 }, { "epoch": 0.43487511611373175, "grad_norm": 2.7079639434814453, "learning_rate": 6.283051948087545e-06, "loss": 0.5643, "step": 10475 }, { "epoch": 0.43491663163794314, "grad_norm": 2.381666898727417, "learning_rate": 6.282402134187742e-06, "loss": 0.5852, "step": 10476 }, { "epoch": 0.4349581471621545, "grad_norm": 2.8593015670776367, "learning_rate": 6.281752297101736e-06, "loss": 0.5929, "step": 10477 }, { "epoch": 0.4349996626863658, "grad_norm": 2.1494300365448, "learning_rate": 6.281102436841282e-06, "loss": 0.534, "step": 10478 }, { "epoch": 0.43504117821057714, "grad_norm": 2.5097577571868896, "learning_rate": 6.280452553418126e-06, "loss": 0.3308, "step": 10479 }, { "epoch": 0.4350826937347885, "grad_norm": 2.271145820617676, "learning_rate": 6.2798026468440176e-06, "loss": 0.4892, "step": 10480 }, { "epoch": 0.4351242092589998, "grad_norm": 2.328028440475464, "learning_rate": 6.2791527171307085e-06, "loss": 0.5321, "step": 10481 }, { "epoch": 0.43516572478321114, "grad_norm": 2.2351200580596924, "learning_rate": 6.278502764289949e-06, "loss": 0.3784, "step": 10482 }, { "epoch": 0.43520724030742247, "grad_norm": 2.6359071731567383, "learning_rate": 6.277852788333493e-06, "loss": 0.534, "step": 10483 }, { "epoch": 0.4352487558316338, "grad_norm": 2.501007080078125, "learning_rate": 6.277202789273089e-06, "loss": 0.4033, "step": 10484 }, { "epoch": 0.43529027135584514, "grad_norm": 2.9952282905578613, "learning_rate": 6.276552767120491e-06, "loss": 0.5008, "step": 10485 }, { "epoch": 0.43533178688005647, "grad_norm": 2.5952165126800537, "learning_rate": 6.27590272188745e-06, "loss": 0.4317, "step": 10486 }, { "epoch": 0.4353733024042678, "grad_norm": 2.052780866622925, "learning_rate": 6.2752526535857216e-06, "loss": 0.5449, "step": 10487 }, { "epoch": 0.43541481792847914, "grad_norm": 2.392401933670044, "learning_rate": 6.2746025622270554e-06, "loss": 0.5009, "step": 10488 }, { "epoch": 0.43545633345269047, "grad_norm": 2.062340021133423, "learning_rate": 6.27395244782321e-06, "loss": 0.4249, "step": 10489 }, { "epoch": 0.4354978489769018, "grad_norm": 2.1096057891845703, "learning_rate": 6.273302310385936e-06, "loss": 0.5739, "step": 10490 }, { "epoch": 0.43553936450111314, "grad_norm": 2.0491039752960205, "learning_rate": 6.272652149926989e-06, "loss": 0.4441, "step": 10491 }, { "epoch": 0.43558088002532447, "grad_norm": 2.821955919265747, "learning_rate": 6.272001966458125e-06, "loss": 0.5356, "step": 10492 }, { "epoch": 0.4356223955495358, "grad_norm": 2.2799694538116455, "learning_rate": 6.271351759991099e-06, "loss": 0.4817, "step": 10493 }, { "epoch": 0.43566391107374713, "grad_norm": 2.6761324405670166, "learning_rate": 6.270701530537666e-06, "loss": 0.5724, "step": 10494 }, { "epoch": 0.43570542659795847, "grad_norm": 2.0851588249206543, "learning_rate": 6.270051278109582e-06, "loss": 0.4905, "step": 10495 }, { "epoch": 0.4357469421221698, "grad_norm": 3.2403881549835205, "learning_rate": 6.269401002718607e-06, "loss": 0.5818, "step": 10496 }, { "epoch": 0.43578845764638113, "grad_norm": 2.072389602661133, "learning_rate": 6.268750704376494e-06, "loss": 0.492, "step": 10497 }, { "epoch": 0.43582997317059247, "grad_norm": 2.882280111312866, "learning_rate": 6.268100383095004e-06, "loss": 0.6552, "step": 10498 }, { "epoch": 0.4358714886948038, "grad_norm": 1.95417058467865, "learning_rate": 6.267450038885893e-06, "loss": 0.4834, "step": 10499 }, { "epoch": 0.43591300421901513, "grad_norm": 1.9624733924865723, "learning_rate": 6.26679967176092e-06, "loss": 0.4492, "step": 10500 }, { "epoch": 0.43595451974322647, "grad_norm": 2.4756863117218018, "learning_rate": 6.266149281731844e-06, "loss": 0.4622, "step": 10501 }, { "epoch": 0.4359960352674378, "grad_norm": 2.949406385421753, "learning_rate": 6.265498868810424e-06, "loss": 0.6179, "step": 10502 }, { "epoch": 0.43603755079164913, "grad_norm": 2.097890853881836, "learning_rate": 6.26484843300842e-06, "loss": 0.4137, "step": 10503 }, { "epoch": 0.43607906631586046, "grad_norm": 2.55336594581604, "learning_rate": 6.2641979743375915e-06, "loss": 0.7145, "step": 10504 }, { "epoch": 0.4361205818400718, "grad_norm": 2.448366165161133, "learning_rate": 6.263547492809699e-06, "loss": 0.4689, "step": 10505 }, { "epoch": 0.43616209736428313, "grad_norm": 3.1140756607055664, "learning_rate": 6.262896988436504e-06, "loss": 0.5971, "step": 10506 }, { "epoch": 0.4362036128884945, "grad_norm": 3.2689108848571777, "learning_rate": 6.262246461229768e-06, "loss": 0.5081, "step": 10507 }, { "epoch": 0.43624512841270585, "grad_norm": 2.666480302810669, "learning_rate": 6.26159591120125e-06, "loss": 0.4609, "step": 10508 }, { "epoch": 0.4362866439369172, "grad_norm": 2.0879573822021484, "learning_rate": 6.260945338362716e-06, "loss": 0.5355, "step": 10509 }, { "epoch": 0.4363281594611285, "grad_norm": 2.4278271198272705, "learning_rate": 6.2602947427259265e-06, "loss": 0.6023, "step": 10510 }, { "epoch": 0.43636967498533985, "grad_norm": 2.2507128715515137, "learning_rate": 6.2596441243026454e-06, "loss": 0.4727, "step": 10511 }, { "epoch": 0.4364111905095512, "grad_norm": 2.264314889907837, "learning_rate": 6.258993483104634e-06, "loss": 0.5255, "step": 10512 }, { "epoch": 0.4364527060337625, "grad_norm": 2.2466366291046143, "learning_rate": 6.258342819143659e-06, "loss": 0.4568, "step": 10513 }, { "epoch": 0.43649422155797385, "grad_norm": 2.7165186405181885, "learning_rate": 6.257692132431482e-06, "loss": 0.4973, "step": 10514 }, { "epoch": 0.4365357370821852, "grad_norm": 2.258253812789917, "learning_rate": 6.257041422979871e-06, "loss": 0.4966, "step": 10515 }, { "epoch": 0.4365772526063965, "grad_norm": 2.751431941986084, "learning_rate": 6.2563906908005866e-06, "loss": 0.6143, "step": 10516 }, { "epoch": 0.43661876813060785, "grad_norm": 2.5238163471221924, "learning_rate": 6.255739935905396e-06, "loss": 0.5338, "step": 10517 }, { "epoch": 0.4366602836548192, "grad_norm": 2.3140933513641357, "learning_rate": 6.255089158306067e-06, "loss": 0.4739, "step": 10518 }, { "epoch": 0.4367017991790305, "grad_norm": 2.486917495727539, "learning_rate": 6.254438358014364e-06, "loss": 0.5444, "step": 10519 }, { "epoch": 0.43674331470324185, "grad_norm": 2.6042349338531494, "learning_rate": 6.253787535042053e-06, "loss": 0.4957, "step": 10520 }, { "epoch": 0.4367848302274532, "grad_norm": 2.2580838203430176, "learning_rate": 6.253136689400902e-06, "loss": 0.5155, "step": 10521 }, { "epoch": 0.4368263457516645, "grad_norm": 2.9772796630859375, "learning_rate": 6.25248582110268e-06, "loss": 0.5096, "step": 10522 }, { "epoch": 0.43686786127587585, "grad_norm": 2.9319143295288086, "learning_rate": 6.251834930159151e-06, "loss": 0.5423, "step": 10523 }, { "epoch": 0.4369093768000872, "grad_norm": 2.465834856033325, "learning_rate": 6.251184016582088e-06, "loss": 0.4378, "step": 10524 }, { "epoch": 0.4369508923242985, "grad_norm": 2.7549593448638916, "learning_rate": 6.2505330803832556e-06, "loss": 0.5359, "step": 10525 }, { "epoch": 0.43699240784850985, "grad_norm": 2.0240988731384277, "learning_rate": 6.2498821215744255e-06, "loss": 0.473, "step": 10526 }, { "epoch": 0.4370339233727212, "grad_norm": 2.3455138206481934, "learning_rate": 6.249231140167365e-06, "loss": 0.5598, "step": 10527 }, { "epoch": 0.4370754388969325, "grad_norm": 2.2455575466156006, "learning_rate": 6.248580136173847e-06, "loss": 0.4652, "step": 10528 }, { "epoch": 0.43711695442114384, "grad_norm": 2.3777599334716797, "learning_rate": 6.247929109605642e-06, "loss": 0.6144, "step": 10529 }, { "epoch": 0.4371584699453552, "grad_norm": 2.943178653717041, "learning_rate": 6.247278060474516e-06, "loss": 0.5436, "step": 10530 }, { "epoch": 0.4371999854695665, "grad_norm": 1.9800633192062378, "learning_rate": 6.246626988792244e-06, "loss": 0.3352, "step": 10531 }, { "epoch": 0.43724150099377784, "grad_norm": 2.804344654083252, "learning_rate": 6.245975894570597e-06, "loss": 0.454, "step": 10532 }, { "epoch": 0.4372830165179892, "grad_norm": 2.824502944946289, "learning_rate": 6.245324777821346e-06, "loss": 0.446, "step": 10533 }, { "epoch": 0.4373245320422005, "grad_norm": 2.7169339656829834, "learning_rate": 6.244673638556264e-06, "loss": 0.461, "step": 10534 }, { "epoch": 0.43736604756641184, "grad_norm": 2.5320303440093994, "learning_rate": 6.244022476787125e-06, "loss": 0.5109, "step": 10535 }, { "epoch": 0.4374075630906232, "grad_norm": 2.3181874752044678, "learning_rate": 6.243371292525701e-06, "loss": 0.5504, "step": 10536 }, { "epoch": 0.4374490786148345, "grad_norm": 2.566842555999756, "learning_rate": 6.242720085783764e-06, "loss": 0.5004, "step": 10537 }, { "epoch": 0.4374905941390459, "grad_norm": 2.036783218383789, "learning_rate": 6.242068856573091e-06, "loss": 0.5831, "step": 10538 }, { "epoch": 0.43753210966325723, "grad_norm": 2.6046786308288574, "learning_rate": 6.241417604905455e-06, "loss": 0.31, "step": 10539 }, { "epoch": 0.43757362518746856, "grad_norm": 2.58689284324646, "learning_rate": 6.240766330792631e-06, "loss": 0.5786, "step": 10540 }, { "epoch": 0.4376151407116799, "grad_norm": 2.9731876850128174, "learning_rate": 6.240115034246393e-06, "loss": 0.6043, "step": 10541 }, { "epoch": 0.43765665623589123, "grad_norm": 2.0361685752868652, "learning_rate": 6.23946371527852e-06, "loss": 0.4869, "step": 10542 }, { "epoch": 0.43769817176010256, "grad_norm": 2.369386672973633, "learning_rate": 6.238812373900784e-06, "loss": 0.476, "step": 10543 }, { "epoch": 0.4377396872843139, "grad_norm": 2.413362503051758, "learning_rate": 6.238161010124963e-06, "loss": 0.5245, "step": 10544 }, { "epoch": 0.4377812028085252, "grad_norm": 2.2622551918029785, "learning_rate": 6.237509623962837e-06, "loss": 0.4279, "step": 10545 }, { "epoch": 0.43782271833273656, "grad_norm": 2.5671563148498535, "learning_rate": 6.236858215426176e-06, "loss": 0.494, "step": 10546 }, { "epoch": 0.4378642338569479, "grad_norm": 2.2798001766204834, "learning_rate": 6.236206784526765e-06, "loss": 0.6159, "step": 10547 }, { "epoch": 0.4379057493811592, "grad_norm": 2.042067527770996, "learning_rate": 6.2355553312763775e-06, "loss": 0.3264, "step": 10548 }, { "epoch": 0.43794726490537056, "grad_norm": 2.343398094177246, "learning_rate": 6.234903855686793e-06, "loss": 0.6705, "step": 10549 }, { "epoch": 0.4379887804295819, "grad_norm": 1.965242624282837, "learning_rate": 6.2342523577697925e-06, "loss": 0.5209, "step": 10550 }, { "epoch": 0.4380302959537932, "grad_norm": 2.4030873775482178, "learning_rate": 6.233600837537153e-06, "loss": 0.4895, "step": 10551 }, { "epoch": 0.43807181147800456, "grad_norm": 2.479790449142456, "learning_rate": 6.232949295000655e-06, "loss": 0.6638, "step": 10552 }, { "epoch": 0.4381133270022159, "grad_norm": 2.3751823902130127, "learning_rate": 6.232297730172077e-06, "loss": 0.6339, "step": 10553 }, { "epoch": 0.4381548425264272, "grad_norm": 2.0885169506073, "learning_rate": 6.231646143063202e-06, "loss": 0.4827, "step": 10554 }, { "epoch": 0.43819635805063856, "grad_norm": 2.7983336448669434, "learning_rate": 6.23099453368581e-06, "loss": 0.6485, "step": 10555 }, { "epoch": 0.4382378735748499, "grad_norm": 2.03294038772583, "learning_rate": 6.230342902051683e-06, "loss": 0.4322, "step": 10556 }, { "epoch": 0.4382793890990612, "grad_norm": 2.5489189624786377, "learning_rate": 6.229691248172599e-06, "loss": 0.5639, "step": 10557 }, { "epoch": 0.43832090462327256, "grad_norm": 2.1254630088806152, "learning_rate": 6.229039572060344e-06, "loss": 0.4036, "step": 10558 }, { "epoch": 0.4383624201474839, "grad_norm": 2.5341999530792236, "learning_rate": 6.2283878737267e-06, "loss": 0.5419, "step": 10559 }, { "epoch": 0.4384039356716952, "grad_norm": 2.1798572540283203, "learning_rate": 6.227736153183449e-06, "loss": 0.4948, "step": 10560 }, { "epoch": 0.43844545119590655, "grad_norm": 2.4642953872680664, "learning_rate": 6.227084410442374e-06, "loss": 0.4557, "step": 10561 }, { "epoch": 0.4384869667201179, "grad_norm": 2.6479156017303467, "learning_rate": 6.226432645515259e-06, "loss": 0.6542, "step": 10562 }, { "epoch": 0.4385284822443292, "grad_norm": 2.368443012237549, "learning_rate": 6.225780858413889e-06, "loss": 0.6224, "step": 10563 }, { "epoch": 0.43856999776854055, "grad_norm": 2.739969253540039, "learning_rate": 6.225129049150046e-06, "loss": 0.4025, "step": 10564 }, { "epoch": 0.4386115132927519, "grad_norm": 2.381561517715454, "learning_rate": 6.224477217735519e-06, "loss": 0.6672, "step": 10565 }, { "epoch": 0.4386530288169632, "grad_norm": 2.4219930171966553, "learning_rate": 6.223825364182089e-06, "loss": 0.4332, "step": 10566 }, { "epoch": 0.43869454434117455, "grad_norm": 2.2302286624908447, "learning_rate": 6.223173488501546e-06, "loss": 0.5825, "step": 10567 }, { "epoch": 0.4387360598653859, "grad_norm": 1.860585331916809, "learning_rate": 6.222521590705672e-06, "loss": 0.3547, "step": 10568 }, { "epoch": 0.4387775753895973, "grad_norm": 2.9333412647247314, "learning_rate": 6.221869670806257e-06, "loss": 0.4019, "step": 10569 }, { "epoch": 0.4388190909138086, "grad_norm": 1.9943902492523193, "learning_rate": 6.221217728815084e-06, "loss": 0.4107, "step": 10570 }, { "epoch": 0.43886060643801994, "grad_norm": 2.3543455600738525, "learning_rate": 6.220565764743944e-06, "loss": 0.4971, "step": 10571 }, { "epoch": 0.4389021219622313, "grad_norm": 2.4241089820861816, "learning_rate": 6.219913778604622e-06, "loss": 0.7033, "step": 10572 }, { "epoch": 0.4389436374864426, "grad_norm": 2.188692569732666, "learning_rate": 6.2192617704089085e-06, "loss": 0.6119, "step": 10573 }, { "epoch": 0.43898515301065394, "grad_norm": 2.312195301055908, "learning_rate": 6.218609740168589e-06, "loss": 0.5641, "step": 10574 }, { "epoch": 0.43902666853486527, "grad_norm": 2.3445117473602295, "learning_rate": 6.217957687895455e-06, "loss": 0.5786, "step": 10575 }, { "epoch": 0.4390681840590766, "grad_norm": 2.5103812217712402, "learning_rate": 6.217305613601296e-06, "loss": 0.6102, "step": 10576 }, { "epoch": 0.43910969958328794, "grad_norm": 2.419649124145508, "learning_rate": 6.216653517297899e-06, "loss": 0.4082, "step": 10577 }, { "epoch": 0.43915121510749927, "grad_norm": 2.5840673446655273, "learning_rate": 6.2160013989970565e-06, "loss": 0.5308, "step": 10578 }, { "epoch": 0.4391927306317106, "grad_norm": 2.6805624961853027, "learning_rate": 6.215349258710558e-06, "loss": 0.4739, "step": 10579 }, { "epoch": 0.43923424615592194, "grad_norm": 2.088958501815796, "learning_rate": 6.214697096450195e-06, "loss": 0.4866, "step": 10580 }, { "epoch": 0.43927576168013327, "grad_norm": 2.2825636863708496, "learning_rate": 6.214044912227759e-06, "loss": 0.5482, "step": 10581 }, { "epoch": 0.4393172772043446, "grad_norm": 2.3421711921691895, "learning_rate": 6.21339270605504e-06, "loss": 0.4821, "step": 10582 }, { "epoch": 0.43935879272855594, "grad_norm": 3.0679104328155518, "learning_rate": 6.212740477943832e-06, "loss": 0.5495, "step": 10583 }, { "epoch": 0.43940030825276727, "grad_norm": 2.118428945541382, "learning_rate": 6.212088227905925e-06, "loss": 0.6156, "step": 10584 }, { "epoch": 0.4394418237769786, "grad_norm": 2.432293653488159, "learning_rate": 6.211435955953116e-06, "loss": 0.5143, "step": 10585 }, { "epoch": 0.43948333930118993, "grad_norm": 2.263651132583618, "learning_rate": 6.210783662097194e-06, "loss": 0.6093, "step": 10586 }, { "epoch": 0.43952485482540127, "grad_norm": 2.123262643814087, "learning_rate": 6.210131346349953e-06, "loss": 0.4955, "step": 10587 }, { "epoch": 0.4395663703496126, "grad_norm": 2.1572787761688232, "learning_rate": 6.2094790087231905e-06, "loss": 0.3753, "step": 10588 }, { "epoch": 0.43960788587382393, "grad_norm": 2.3189101219177246, "learning_rate": 6.208826649228698e-06, "loss": 0.6323, "step": 10589 }, { "epoch": 0.43964940139803527, "grad_norm": 2.647174119949341, "learning_rate": 6.208174267878272e-06, "loss": 0.5295, "step": 10590 }, { "epoch": 0.4396909169222466, "grad_norm": 2.332174301147461, "learning_rate": 6.207521864683708e-06, "loss": 0.4884, "step": 10591 }, { "epoch": 0.43973243244645793, "grad_norm": 2.882646322250366, "learning_rate": 6.206869439656798e-06, "loss": 0.5296, "step": 10592 }, { "epoch": 0.43977394797066927, "grad_norm": 2.9527924060821533, "learning_rate": 6.206216992809343e-06, "loss": 0.5553, "step": 10593 }, { "epoch": 0.4398154634948806, "grad_norm": 2.6132700443267822, "learning_rate": 6.205564524153137e-06, "loss": 0.4917, "step": 10594 }, { "epoch": 0.43985697901909193, "grad_norm": 2.1242315769195557, "learning_rate": 6.204912033699977e-06, "loss": 0.5402, "step": 10595 }, { "epoch": 0.43989849454330326, "grad_norm": 2.6072659492492676, "learning_rate": 6.204259521461658e-06, "loss": 0.6262, "step": 10596 }, { "epoch": 0.4399400100675146, "grad_norm": 2.0757992267608643, "learning_rate": 6.203606987449981e-06, "loss": 0.3713, "step": 10597 }, { "epoch": 0.43998152559172593, "grad_norm": 2.9629125595092773, "learning_rate": 6.202954431676743e-06, "loss": 0.3904, "step": 10598 }, { "epoch": 0.44002304111593726, "grad_norm": 2.368121862411499, "learning_rate": 6.2023018541537425e-06, "loss": 0.5123, "step": 10599 }, { "epoch": 0.44006455664014865, "grad_norm": 2.6039011478424072, "learning_rate": 6.201649254892776e-06, "loss": 0.4811, "step": 10600 }, { "epoch": 0.44010607216436, "grad_norm": 2.6115100383758545, "learning_rate": 6.200996633905646e-06, "loss": 0.566, "step": 10601 }, { "epoch": 0.4401475876885713, "grad_norm": 2.130507230758667, "learning_rate": 6.200343991204151e-06, "loss": 0.5441, "step": 10602 }, { "epoch": 0.44018910321278265, "grad_norm": 2.374124050140381, "learning_rate": 6.199691326800091e-06, "loss": 0.5058, "step": 10603 }, { "epoch": 0.440230618736994, "grad_norm": 2.3876893520355225, "learning_rate": 6.199038640705265e-06, "loss": 0.5293, "step": 10604 }, { "epoch": 0.4402721342612053, "grad_norm": 3.407501220703125, "learning_rate": 6.1983859329314745e-06, "loss": 0.4946, "step": 10605 }, { "epoch": 0.44031364978541665, "grad_norm": 2.023874521255493, "learning_rate": 6.197733203490521e-06, "loss": 0.3297, "step": 10606 }, { "epoch": 0.440355165309628, "grad_norm": 2.7334041595458984, "learning_rate": 6.197080452394208e-06, "loss": 0.4508, "step": 10607 }, { "epoch": 0.4403966808338393, "grad_norm": 2.1675753593444824, "learning_rate": 6.1964276796543344e-06, "loss": 0.5063, "step": 10608 }, { "epoch": 0.44043819635805065, "grad_norm": 2.3745129108428955, "learning_rate": 6.195774885282703e-06, "loss": 0.5807, "step": 10609 }, { "epoch": 0.440479711882262, "grad_norm": 2.6318581104278564, "learning_rate": 6.1951220692911164e-06, "loss": 0.6785, "step": 10610 }, { "epoch": 0.4405212274064733, "grad_norm": 2.4645256996154785, "learning_rate": 6.194469231691379e-06, "loss": 0.4964, "step": 10611 }, { "epoch": 0.44056274293068465, "grad_norm": 2.0582733154296875, "learning_rate": 6.193816372495294e-06, "loss": 0.4598, "step": 10612 }, { "epoch": 0.440604258454896, "grad_norm": 2.600902557373047, "learning_rate": 6.193163491714663e-06, "loss": 0.6118, "step": 10613 }, { "epoch": 0.4406457739791073, "grad_norm": 2.2953269481658936, "learning_rate": 6.192510589361293e-06, "loss": 0.4246, "step": 10614 }, { "epoch": 0.44068728950331865, "grad_norm": 2.2378571033477783, "learning_rate": 6.1918576654469895e-06, "loss": 0.6236, "step": 10615 }, { "epoch": 0.44072880502753, "grad_norm": 2.350454092025757, "learning_rate": 6.191204719983554e-06, "loss": 0.4669, "step": 10616 }, { "epoch": 0.4407703205517413, "grad_norm": 2.5341386795043945, "learning_rate": 6.190551752982795e-06, "loss": 0.5339, "step": 10617 }, { "epoch": 0.44081183607595265, "grad_norm": 3.0261073112487793, "learning_rate": 6.189898764456517e-06, "loss": 0.5894, "step": 10618 }, { "epoch": 0.440853351600164, "grad_norm": 1.9533863067626953, "learning_rate": 6.1892457544165265e-06, "loss": 0.5464, "step": 10619 }, { "epoch": 0.4408948671243753, "grad_norm": 2.034316301345825, "learning_rate": 6.18859272287463e-06, "loss": 0.339, "step": 10620 }, { "epoch": 0.44093638264858664, "grad_norm": 2.3304028511047363, "learning_rate": 6.187939669842634e-06, "loss": 0.6867, "step": 10621 }, { "epoch": 0.440977898172798, "grad_norm": 2.373297691345215, "learning_rate": 6.187286595332347e-06, "loss": 0.5451, "step": 10622 }, { "epoch": 0.4410194136970093, "grad_norm": 2.3298840522766113, "learning_rate": 6.186633499355576e-06, "loss": 0.4381, "step": 10623 }, { "epoch": 0.44106092922122064, "grad_norm": 2.0372347831726074, "learning_rate": 6.185980381924128e-06, "loss": 0.5651, "step": 10624 }, { "epoch": 0.441102444745432, "grad_norm": 2.5172812938690186, "learning_rate": 6.1853272430498146e-06, "loss": 0.54, "step": 10625 }, { "epoch": 0.4411439602696433, "grad_norm": 2.991220235824585, "learning_rate": 6.18467408274444e-06, "loss": 0.5287, "step": 10626 }, { "epoch": 0.44118547579385464, "grad_norm": 2.5887515544891357, "learning_rate": 6.1840209010198205e-06, "loss": 0.6143, "step": 10627 }, { "epoch": 0.441226991318066, "grad_norm": 2.6007800102233887, "learning_rate": 6.183367697887759e-06, "loss": 0.539, "step": 10628 }, { "epoch": 0.4412685068422773, "grad_norm": 2.486051082611084, "learning_rate": 6.18271447336007e-06, "loss": 0.5583, "step": 10629 }, { "epoch": 0.44131002236648864, "grad_norm": 2.3368453979492188, "learning_rate": 6.182061227448561e-06, "loss": 0.4273, "step": 10630 }, { "epoch": 0.44135153789070003, "grad_norm": 2.6447901725769043, "learning_rate": 6.181407960165046e-06, "loss": 0.5499, "step": 10631 }, { "epoch": 0.44139305341491136, "grad_norm": 2.643308162689209, "learning_rate": 6.180754671521334e-06, "loss": 0.7613, "step": 10632 }, { "epoch": 0.4414345689391227, "grad_norm": 2.063351631164551, "learning_rate": 6.180101361529237e-06, "loss": 0.4642, "step": 10633 }, { "epoch": 0.44147608446333403, "grad_norm": 2.1313068866729736, "learning_rate": 6.179448030200568e-06, "loss": 0.4856, "step": 10634 }, { "epoch": 0.44151759998754536, "grad_norm": 2.5741028785705566, "learning_rate": 6.178794677547138e-06, "loss": 0.5384, "step": 10635 }, { "epoch": 0.4415591155117567, "grad_norm": 2.5744762420654297, "learning_rate": 6.17814130358076e-06, "loss": 0.4193, "step": 10636 }, { "epoch": 0.441600631035968, "grad_norm": 1.9536230564117432, "learning_rate": 6.177487908313247e-06, "loss": 0.5178, "step": 10637 }, { "epoch": 0.44164214656017936, "grad_norm": 2.3476507663726807, "learning_rate": 6.1768344917564146e-06, "loss": 0.6094, "step": 10638 }, { "epoch": 0.4416836620843907, "grad_norm": 2.4783742427825928, "learning_rate": 6.176181053922074e-06, "loss": 0.6895, "step": 10639 }, { "epoch": 0.441725177608602, "grad_norm": 2.187067985534668, "learning_rate": 6.1755275948220424e-06, "loss": 0.5536, "step": 10640 }, { "epoch": 0.44176669313281336, "grad_norm": 2.4326725006103516, "learning_rate": 6.174874114468132e-06, "loss": 0.5562, "step": 10641 }, { "epoch": 0.4418082086570247, "grad_norm": 2.0668869018554688, "learning_rate": 6.174220612872159e-06, "loss": 0.4672, "step": 10642 }, { "epoch": 0.441849724181236, "grad_norm": 2.4282493591308594, "learning_rate": 6.173567090045941e-06, "loss": 0.6677, "step": 10643 }, { "epoch": 0.44189123970544736, "grad_norm": 2.328657627105713, "learning_rate": 6.1729135460012905e-06, "loss": 0.4622, "step": 10644 }, { "epoch": 0.4419327552296587, "grad_norm": 2.6080267429351807, "learning_rate": 6.172259980750024e-06, "loss": 0.507, "step": 10645 }, { "epoch": 0.44197427075387, "grad_norm": 2.4243409633636475, "learning_rate": 6.171606394303961e-06, "loss": 0.4804, "step": 10646 }, { "epoch": 0.44201578627808136, "grad_norm": 2.7459332942962646, "learning_rate": 6.170952786674915e-06, "loss": 0.5389, "step": 10647 }, { "epoch": 0.4420573018022927, "grad_norm": 1.8182802200317383, "learning_rate": 6.170299157874707e-06, "loss": 0.3545, "step": 10648 }, { "epoch": 0.442098817326504, "grad_norm": 2.256057024002075, "learning_rate": 6.169645507915151e-06, "loss": 0.551, "step": 10649 }, { "epoch": 0.44214033285071536, "grad_norm": 2.23405122756958, "learning_rate": 6.168991836808067e-06, "loss": 0.5779, "step": 10650 }, { "epoch": 0.4421818483749267, "grad_norm": 2.408750295639038, "learning_rate": 6.168338144565274e-06, "loss": 0.5524, "step": 10651 }, { "epoch": 0.442223363899138, "grad_norm": 2.570809841156006, "learning_rate": 6.167684431198591e-06, "loss": 0.5278, "step": 10652 }, { "epoch": 0.44226487942334936, "grad_norm": 2.8140437602996826, "learning_rate": 6.167030696719838e-06, "loss": 0.5666, "step": 10653 }, { "epoch": 0.4423063949475607, "grad_norm": 2.4073379039764404, "learning_rate": 6.166376941140831e-06, "loss": 0.5159, "step": 10654 }, { "epoch": 0.442347910471772, "grad_norm": 2.168517827987671, "learning_rate": 6.165723164473394e-06, "loss": 0.5468, "step": 10655 }, { "epoch": 0.44238942599598335, "grad_norm": 2.6296489238739014, "learning_rate": 6.165069366729347e-06, "loss": 0.5761, "step": 10656 }, { "epoch": 0.4424309415201947, "grad_norm": 3.1428775787353516, "learning_rate": 6.16441554792051e-06, "loss": 0.511, "step": 10657 }, { "epoch": 0.442472457044406, "grad_norm": 2.383817195892334, "learning_rate": 6.163761708058703e-06, "loss": 0.5746, "step": 10658 }, { "epoch": 0.44251397256861735, "grad_norm": 2.078089475631714, "learning_rate": 6.16310784715575e-06, "loss": 0.4977, "step": 10659 }, { "epoch": 0.4425554880928287, "grad_norm": 2.427035331726074, "learning_rate": 6.162453965223472e-06, "loss": 0.5073, "step": 10660 }, { "epoch": 0.4425970036170401, "grad_norm": 2.630211353302002, "learning_rate": 6.16180006227369e-06, "loss": 0.5537, "step": 10661 }, { "epoch": 0.4426385191412514, "grad_norm": 2.714827537536621, "learning_rate": 6.16114613831823e-06, "loss": 0.6112, "step": 10662 }, { "epoch": 0.44268003466546274, "grad_norm": 2.3215720653533936, "learning_rate": 6.160492193368911e-06, "loss": 0.4795, "step": 10663 }, { "epoch": 0.4427215501896741, "grad_norm": 2.5537679195404053, "learning_rate": 6.15983822743756e-06, "loss": 0.5825, "step": 10664 }, { "epoch": 0.4427630657138854, "grad_norm": 2.2489383220672607, "learning_rate": 6.159184240535999e-06, "loss": 0.5031, "step": 10665 }, { "epoch": 0.44280458123809674, "grad_norm": 2.6219170093536377, "learning_rate": 6.158530232676053e-06, "loss": 0.5171, "step": 10666 }, { "epoch": 0.4428460967623081, "grad_norm": 2.5453598499298096, "learning_rate": 6.157876203869546e-06, "loss": 0.5169, "step": 10667 }, { "epoch": 0.4428876122865194, "grad_norm": 2.4663519859313965, "learning_rate": 6.157222154128302e-06, "loss": 0.5543, "step": 10668 }, { "epoch": 0.44292912781073074, "grad_norm": 2.457894802093506, "learning_rate": 6.156568083464152e-06, "loss": 0.5191, "step": 10669 }, { "epoch": 0.44297064333494207, "grad_norm": 2.407493829727173, "learning_rate": 6.155913991888916e-06, "loss": 0.5671, "step": 10670 }, { "epoch": 0.4430121588591534, "grad_norm": 3.3327999114990234, "learning_rate": 6.15525987941442e-06, "loss": 0.6247, "step": 10671 }, { "epoch": 0.44305367438336474, "grad_norm": 3.654284715652466, "learning_rate": 6.154605746052495e-06, "loss": 0.7094, "step": 10672 }, { "epoch": 0.44309518990757607, "grad_norm": 2.261766195297241, "learning_rate": 6.1539515918149636e-06, "loss": 0.5598, "step": 10673 }, { "epoch": 0.4431367054317874, "grad_norm": 2.190812110900879, "learning_rate": 6.153297416713655e-06, "loss": 0.5188, "step": 10674 }, { "epoch": 0.44317822095599874, "grad_norm": 2.342646360397339, "learning_rate": 6.152643220760397e-06, "loss": 0.5274, "step": 10675 }, { "epoch": 0.44321973648021007, "grad_norm": 2.2089762687683105, "learning_rate": 6.1519890039670174e-06, "loss": 0.4388, "step": 10676 }, { "epoch": 0.4432612520044214, "grad_norm": 2.787677526473999, "learning_rate": 6.151334766345345e-06, "loss": 0.5688, "step": 10677 }, { "epoch": 0.44330276752863274, "grad_norm": 2.8039207458496094, "learning_rate": 6.150680507907207e-06, "loss": 0.7031, "step": 10678 }, { "epoch": 0.44334428305284407, "grad_norm": 2.4363508224487305, "learning_rate": 6.150026228664435e-06, "loss": 0.5304, "step": 10679 }, { "epoch": 0.4433857985770554, "grad_norm": 2.086228609085083, "learning_rate": 6.149371928628856e-06, "loss": 0.405, "step": 10680 }, { "epoch": 0.44342731410126673, "grad_norm": 2.2116730213165283, "learning_rate": 6.148717607812301e-06, "loss": 0.4208, "step": 10681 }, { "epoch": 0.44346882962547807, "grad_norm": 2.342689037322998, "learning_rate": 6.1480632662266025e-06, "loss": 0.5359, "step": 10682 }, { "epoch": 0.4435103451496894, "grad_norm": 2.4705185890197754, "learning_rate": 6.147408903883588e-06, "loss": 0.4276, "step": 10683 }, { "epoch": 0.44355186067390073, "grad_norm": 2.423309326171875, "learning_rate": 6.146754520795091e-06, "loss": 0.5627, "step": 10684 }, { "epoch": 0.44359337619811207, "grad_norm": 1.8438749313354492, "learning_rate": 6.1461001169729405e-06, "loss": 0.3698, "step": 10685 }, { "epoch": 0.4436348917223234, "grad_norm": 2.546206474304199, "learning_rate": 6.14544569242897e-06, "loss": 0.4305, "step": 10686 }, { "epoch": 0.44367640724653473, "grad_norm": 2.7786624431610107, "learning_rate": 6.144791247175011e-06, "loss": 0.6322, "step": 10687 }, { "epoch": 0.44371792277074606, "grad_norm": 2.118149757385254, "learning_rate": 6.144136781222897e-06, "loss": 0.5754, "step": 10688 }, { "epoch": 0.4437594382949574, "grad_norm": 2.2565369606018066, "learning_rate": 6.143482294584459e-06, "loss": 0.4584, "step": 10689 }, { "epoch": 0.44380095381916873, "grad_norm": 2.010676860809326, "learning_rate": 6.1428277872715325e-06, "loss": 0.427, "step": 10690 }, { "epoch": 0.44384246934338006, "grad_norm": 2.3571414947509766, "learning_rate": 6.142173259295949e-06, "loss": 0.463, "step": 10691 }, { "epoch": 0.44388398486759145, "grad_norm": 2.0439019203186035, "learning_rate": 6.141518710669545e-06, "loss": 0.5907, "step": 10692 }, { "epoch": 0.4439255003918028, "grad_norm": 2.2059309482574463, "learning_rate": 6.140864141404153e-06, "loss": 0.4254, "step": 10693 }, { "epoch": 0.4439670159160141, "grad_norm": 2.790754556655884, "learning_rate": 6.140209551511609e-06, "loss": 0.4322, "step": 10694 }, { "epoch": 0.44400853144022545, "grad_norm": 2.1388301849365234, "learning_rate": 6.139554941003747e-06, "loss": 0.6239, "step": 10695 }, { "epoch": 0.4440500469644368, "grad_norm": 2.1249208450317383, "learning_rate": 6.138900309892404e-06, "loss": 0.4611, "step": 10696 }, { "epoch": 0.4440915624886481, "grad_norm": 2.9462287425994873, "learning_rate": 6.1382456581894145e-06, "loss": 0.6109, "step": 10697 }, { "epoch": 0.44413307801285945, "grad_norm": 2.324876308441162, "learning_rate": 6.137590985906615e-06, "loss": 0.4519, "step": 10698 }, { "epoch": 0.4441745935370708, "grad_norm": 2.570636034011841, "learning_rate": 6.136936293055844e-06, "loss": 0.5365, "step": 10699 }, { "epoch": 0.4442161090612821, "grad_norm": 2.3427329063415527, "learning_rate": 6.136281579648936e-06, "loss": 0.553, "step": 10700 }, { "epoch": 0.44425762458549345, "grad_norm": 2.6249537467956543, "learning_rate": 6.135626845697731e-06, "loss": 0.6078, "step": 10701 }, { "epoch": 0.4442991401097048, "grad_norm": 2.461820125579834, "learning_rate": 6.134972091214063e-06, "loss": 0.5372, "step": 10702 }, { "epoch": 0.4443406556339161, "grad_norm": 2.208127975463867, "learning_rate": 6.134317316209775e-06, "loss": 0.5108, "step": 10703 }, { "epoch": 0.44438217115812745, "grad_norm": 2.483746290206909, "learning_rate": 6.1336625206967005e-06, "loss": 0.5587, "step": 10704 }, { "epoch": 0.4444236866823388, "grad_norm": 2.211498975753784, "learning_rate": 6.133007704686682e-06, "loss": 0.544, "step": 10705 }, { "epoch": 0.4444652022065501, "grad_norm": 2.076284170150757, "learning_rate": 6.132352868191558e-06, "loss": 0.4854, "step": 10706 }, { "epoch": 0.44450671773076145, "grad_norm": 2.0054664611816406, "learning_rate": 6.1316980112231675e-06, "loss": 0.5782, "step": 10707 }, { "epoch": 0.4445482332549728, "grad_norm": 2.5048272609710693, "learning_rate": 6.13104313379335e-06, "loss": 0.4954, "step": 10708 }, { "epoch": 0.4445897487791841, "grad_norm": 2.8843748569488525, "learning_rate": 6.130388235913948e-06, "loss": 0.4961, "step": 10709 }, { "epoch": 0.44463126430339545, "grad_norm": 2.5022480487823486, "learning_rate": 6.1297333175967995e-06, "loss": 0.5209, "step": 10710 }, { "epoch": 0.4446727798276068, "grad_norm": 2.7040328979492188, "learning_rate": 6.129078378853748e-06, "loss": 0.503, "step": 10711 }, { "epoch": 0.4447142953518181, "grad_norm": 2.5908091068267822, "learning_rate": 6.128423419696634e-06, "loss": 0.6059, "step": 10712 }, { "epoch": 0.44475581087602944, "grad_norm": 2.114743232727051, "learning_rate": 6.127768440137298e-06, "loss": 0.5603, "step": 10713 }, { "epoch": 0.4447973264002408, "grad_norm": 2.673154354095459, "learning_rate": 6.127113440187585e-06, "loss": 0.4321, "step": 10714 }, { "epoch": 0.4448388419244521, "grad_norm": 2.581197500228882, "learning_rate": 6.126458419859336e-06, "loss": 0.5595, "step": 10715 }, { "epoch": 0.44488035744866344, "grad_norm": 2.3774328231811523, "learning_rate": 6.125803379164393e-06, "loss": 0.4357, "step": 10716 }, { "epoch": 0.4449218729728748, "grad_norm": 2.2130777835845947, "learning_rate": 6.125148318114601e-06, "loss": 0.4777, "step": 10717 }, { "epoch": 0.4449633884970861, "grad_norm": 2.3046467304229736, "learning_rate": 6.124493236721804e-06, "loss": 0.5489, "step": 10718 }, { "epoch": 0.44500490402129744, "grad_norm": 2.6092820167541504, "learning_rate": 6.123838134997844e-06, "loss": 0.5106, "step": 10719 }, { "epoch": 0.4450464195455088, "grad_norm": 2.3907182216644287, "learning_rate": 6.123183012954568e-06, "loss": 0.7058, "step": 10720 }, { "epoch": 0.4450879350697201, "grad_norm": 2.424112319946289, "learning_rate": 6.1225278706038175e-06, "loss": 0.5447, "step": 10721 }, { "epoch": 0.44512945059393144, "grad_norm": 3.071136474609375, "learning_rate": 6.121872707957441e-06, "loss": 0.5547, "step": 10722 }, { "epoch": 0.44517096611814283, "grad_norm": 2.3901076316833496, "learning_rate": 6.121217525027282e-06, "loss": 0.5873, "step": 10723 }, { "epoch": 0.44521248164235416, "grad_norm": 2.4996278285980225, "learning_rate": 6.1205623218251874e-06, "loss": 0.575, "step": 10724 }, { "epoch": 0.4452539971665655, "grad_norm": 2.4309539794921875, "learning_rate": 6.1199070983630025e-06, "loss": 0.5439, "step": 10725 }, { "epoch": 0.44529551269077683, "grad_norm": 2.570908546447754, "learning_rate": 6.1192518546525746e-06, "loss": 0.6165, "step": 10726 }, { "epoch": 0.44533702821498816, "grad_norm": 2.793694257736206, "learning_rate": 6.118596590705751e-06, "loss": 0.4439, "step": 10727 }, { "epoch": 0.4453785437391995, "grad_norm": 2.6403138637542725, "learning_rate": 6.117941306534377e-06, "loss": 0.5835, "step": 10728 }, { "epoch": 0.44542005926341083, "grad_norm": 2.4000837802886963, "learning_rate": 6.117286002150305e-06, "loss": 0.5042, "step": 10729 }, { "epoch": 0.44546157478762216, "grad_norm": 2.488123655319214, "learning_rate": 6.116630677565377e-06, "loss": 0.5545, "step": 10730 }, { "epoch": 0.4455030903118335, "grad_norm": 2.3492825031280518, "learning_rate": 6.115975332791446e-06, "loss": 0.4699, "step": 10731 }, { "epoch": 0.4455446058360448, "grad_norm": 2.6156654357910156, "learning_rate": 6.115319967840358e-06, "loss": 0.5154, "step": 10732 }, { "epoch": 0.44558612136025616, "grad_norm": 3.04939341545105, "learning_rate": 6.1146645827239656e-06, "loss": 0.5862, "step": 10733 }, { "epoch": 0.4456276368844675, "grad_norm": 2.5539042949676514, "learning_rate": 6.114009177454115e-06, "loss": 0.5372, "step": 10734 }, { "epoch": 0.4456691524086788, "grad_norm": 2.221224546432495, "learning_rate": 6.113353752042658e-06, "loss": 0.4551, "step": 10735 }, { "epoch": 0.44571066793289016, "grad_norm": 1.9613721370697021, "learning_rate": 6.112698306501445e-06, "loss": 0.4402, "step": 10736 }, { "epoch": 0.4457521834571015, "grad_norm": 2.1974756717681885, "learning_rate": 6.112042840842323e-06, "loss": 0.5074, "step": 10737 }, { "epoch": 0.4457936989813128, "grad_norm": 2.3801279067993164, "learning_rate": 6.111387355077148e-06, "loss": 0.5084, "step": 10738 }, { "epoch": 0.44583521450552416, "grad_norm": 2.5065364837646484, "learning_rate": 6.110731849217769e-06, "loss": 0.4647, "step": 10739 }, { "epoch": 0.4458767300297355, "grad_norm": 2.452084541320801, "learning_rate": 6.110076323276039e-06, "loss": 0.3996, "step": 10740 }, { "epoch": 0.4459182455539468, "grad_norm": 2.7433598041534424, "learning_rate": 6.109420777263807e-06, "loss": 0.6865, "step": 10741 }, { "epoch": 0.44595976107815816, "grad_norm": 2.3827035427093506, "learning_rate": 6.108765211192929e-06, "loss": 0.5651, "step": 10742 }, { "epoch": 0.4460012766023695, "grad_norm": 2.58152437210083, "learning_rate": 6.108109625075256e-06, "loss": 0.5813, "step": 10743 }, { "epoch": 0.4460427921265808, "grad_norm": 2.2084147930145264, "learning_rate": 6.107454018922642e-06, "loss": 0.4388, "step": 10744 }, { "epoch": 0.44608430765079216, "grad_norm": 2.5901076793670654, "learning_rate": 6.106798392746939e-06, "loss": 0.5674, "step": 10745 }, { "epoch": 0.4461258231750035, "grad_norm": 2.5349807739257812, "learning_rate": 6.106142746560003e-06, "loss": 0.6261, "step": 10746 }, { "epoch": 0.4461673386992148, "grad_norm": 2.240278720855713, "learning_rate": 6.105487080373686e-06, "loss": 0.376, "step": 10747 }, { "epoch": 0.44620885422342615, "grad_norm": 2.4938852787017822, "learning_rate": 6.104831394199846e-06, "loss": 0.4884, "step": 10748 }, { "epoch": 0.4462503697476375, "grad_norm": 2.15655779838562, "learning_rate": 6.104175688050336e-06, "loss": 0.4513, "step": 10749 }, { "epoch": 0.4462918852718488, "grad_norm": 2.7385857105255127, "learning_rate": 6.10351996193701e-06, "loss": 0.407, "step": 10750 }, { "epoch": 0.44633340079606015, "grad_norm": 2.5514373779296875, "learning_rate": 6.102864215871726e-06, "loss": 0.5362, "step": 10751 }, { "epoch": 0.4463749163202715, "grad_norm": 2.738654375076294, "learning_rate": 6.102208449866338e-06, "loss": 0.5274, "step": 10752 }, { "epoch": 0.4464164318444828, "grad_norm": 2.531369209289551, "learning_rate": 6.101552663932704e-06, "loss": 0.5328, "step": 10753 }, { "epoch": 0.4464579473686942, "grad_norm": 2.6468639373779297, "learning_rate": 6.10089685808268e-06, "loss": 0.5887, "step": 10754 }, { "epoch": 0.44649946289290554, "grad_norm": 2.561354637145996, "learning_rate": 6.100241032328125e-06, "loss": 0.5768, "step": 10755 }, { "epoch": 0.4465409784171169, "grad_norm": 2.8128745555877686, "learning_rate": 6.099585186680893e-06, "loss": 0.4353, "step": 10756 }, { "epoch": 0.4465824939413282, "grad_norm": 2.2528648376464844, "learning_rate": 6.098929321152845e-06, "loss": 0.5797, "step": 10757 }, { "epoch": 0.44662400946553954, "grad_norm": 2.3982906341552734, "learning_rate": 6.098273435755837e-06, "loss": 0.6489, "step": 10758 }, { "epoch": 0.4466655249897509, "grad_norm": 2.5299997329711914, "learning_rate": 6.09761753050173e-06, "loss": 0.474, "step": 10759 }, { "epoch": 0.4467070405139622, "grad_norm": 2.6340301036834717, "learning_rate": 6.09696160540238e-06, "loss": 0.5325, "step": 10760 }, { "epoch": 0.44674855603817354, "grad_norm": 2.4069976806640625, "learning_rate": 6.09630566046965e-06, "loss": 0.6714, "step": 10761 }, { "epoch": 0.44679007156238487, "grad_norm": 2.5424766540527344, "learning_rate": 6.095649695715397e-06, "loss": 0.5531, "step": 10762 }, { "epoch": 0.4468315870865962, "grad_norm": 2.0822901725769043, "learning_rate": 6.094993711151482e-06, "loss": 0.5191, "step": 10763 }, { "epoch": 0.44687310261080754, "grad_norm": 2.3852641582489014, "learning_rate": 6.0943377067897655e-06, "loss": 0.5189, "step": 10764 }, { "epoch": 0.44691461813501887, "grad_norm": 3.123708486557007, "learning_rate": 6.093681682642106e-06, "loss": 0.6169, "step": 10765 }, { "epoch": 0.4469561336592302, "grad_norm": 2.481266975402832, "learning_rate": 6.093025638720369e-06, "loss": 0.6413, "step": 10766 }, { "epoch": 0.44699764918344154, "grad_norm": 2.5381529331207275, "learning_rate": 6.092369575036411e-06, "loss": 0.4374, "step": 10767 }, { "epoch": 0.44703916470765287, "grad_norm": 2.4172353744506836, "learning_rate": 6.0917134916020985e-06, "loss": 0.5275, "step": 10768 }, { "epoch": 0.4470806802318642, "grad_norm": 2.3753373622894287, "learning_rate": 6.091057388429289e-06, "loss": 0.5709, "step": 10769 }, { "epoch": 0.44712219575607554, "grad_norm": 2.813114643096924, "learning_rate": 6.090401265529849e-06, "loss": 0.3539, "step": 10770 }, { "epoch": 0.44716371128028687, "grad_norm": 2.1116299629211426, "learning_rate": 6.089745122915638e-06, "loss": 0.3905, "step": 10771 }, { "epoch": 0.4472052268044982, "grad_norm": 2.502527952194214, "learning_rate": 6.089088960598524e-06, "loss": 0.534, "step": 10772 }, { "epoch": 0.44724674232870953, "grad_norm": 2.512974977493286, "learning_rate": 6.088432778590365e-06, "loss": 0.5528, "step": 10773 }, { "epoch": 0.44728825785292087, "grad_norm": 2.259803533554077, "learning_rate": 6.08777657690303e-06, "loss": 0.4736, "step": 10774 }, { "epoch": 0.4473297733771322, "grad_norm": 2.574204683303833, "learning_rate": 6.087120355548379e-06, "loss": 0.5062, "step": 10775 }, { "epoch": 0.44737128890134353, "grad_norm": 1.9998623132705688, "learning_rate": 6.086464114538279e-06, "loss": 0.5641, "step": 10776 }, { "epoch": 0.44741280442555487, "grad_norm": 2.3765511512756348, "learning_rate": 6.085807853884595e-06, "loss": 0.5213, "step": 10777 }, { "epoch": 0.4474543199497662, "grad_norm": 2.2899527549743652, "learning_rate": 6.085151573599192e-06, "loss": 0.6202, "step": 10778 }, { "epoch": 0.44749583547397753, "grad_norm": 2.109055995941162, "learning_rate": 6.084495273693937e-06, "loss": 0.536, "step": 10779 }, { "epoch": 0.44753735099818887, "grad_norm": 1.9523258209228516, "learning_rate": 6.083838954180692e-06, "loss": 0.4604, "step": 10780 }, { "epoch": 0.4475788665224002, "grad_norm": 2.3682985305786133, "learning_rate": 6.083182615071329e-06, "loss": 0.6835, "step": 10781 }, { "epoch": 0.44762038204661153, "grad_norm": 2.5151689052581787, "learning_rate": 6.08252625637771e-06, "loss": 0.5307, "step": 10782 }, { "epoch": 0.44766189757082286, "grad_norm": 2.622344732284546, "learning_rate": 6.081869878111706e-06, "loss": 0.4635, "step": 10783 }, { "epoch": 0.4477034130950342, "grad_norm": 2.3876922130584717, "learning_rate": 6.081213480285182e-06, "loss": 0.5603, "step": 10784 }, { "epoch": 0.4477449286192456, "grad_norm": 2.3994596004486084, "learning_rate": 6.0805570629100075e-06, "loss": 0.4887, "step": 10785 }, { "epoch": 0.4477864441434569, "grad_norm": 3.243082284927368, "learning_rate": 6.079900625998049e-06, "loss": 0.4207, "step": 10786 }, { "epoch": 0.44782795966766825, "grad_norm": 2.4081172943115234, "learning_rate": 6.079244169561176e-06, "loss": 0.5949, "step": 10787 }, { "epoch": 0.4478694751918796, "grad_norm": 2.563673496246338, "learning_rate": 6.078587693611258e-06, "loss": 0.4618, "step": 10788 }, { "epoch": 0.4479109907160909, "grad_norm": 2.131462812423706, "learning_rate": 6.077931198160162e-06, "loss": 0.5674, "step": 10789 }, { "epoch": 0.44795250624030225, "grad_norm": 2.9163589477539062, "learning_rate": 6.07727468321976e-06, "loss": 0.5466, "step": 10790 }, { "epoch": 0.4479940217645136, "grad_norm": 2.612433671951294, "learning_rate": 6.076618148801921e-06, "loss": 0.4521, "step": 10791 }, { "epoch": 0.4480355372887249, "grad_norm": 2.4092965126037598, "learning_rate": 6.075961594918517e-06, "loss": 0.5073, "step": 10792 }, { "epoch": 0.44807705281293625, "grad_norm": 2.4936001300811768, "learning_rate": 6.075305021581416e-06, "loss": 0.6426, "step": 10793 }, { "epoch": 0.4481185683371476, "grad_norm": 2.5208091735839844, "learning_rate": 6.074648428802491e-06, "loss": 0.514, "step": 10794 }, { "epoch": 0.4481600838613589, "grad_norm": 2.2449028491973877, "learning_rate": 6.0739918165936114e-06, "loss": 0.5251, "step": 10795 }, { "epoch": 0.44820159938557025, "grad_norm": 2.011662006378174, "learning_rate": 6.073335184966652e-06, "loss": 0.5144, "step": 10796 }, { "epoch": 0.4482431149097816, "grad_norm": 2.3752098083496094, "learning_rate": 6.072678533933481e-06, "loss": 0.5351, "step": 10797 }, { "epoch": 0.4482846304339929, "grad_norm": 2.510619640350342, "learning_rate": 6.072021863505975e-06, "loss": 0.5448, "step": 10798 }, { "epoch": 0.44832614595820425, "grad_norm": 3.385712146759033, "learning_rate": 6.071365173696003e-06, "loss": 0.6207, "step": 10799 }, { "epoch": 0.4483676614824156, "grad_norm": 2.3859429359436035, "learning_rate": 6.070708464515441e-06, "loss": 0.5613, "step": 10800 }, { "epoch": 0.4484091770066269, "grad_norm": 3.02091383934021, "learning_rate": 6.070051735976161e-06, "loss": 0.6355, "step": 10801 }, { "epoch": 0.44845069253083825, "grad_norm": 2.0763604640960693, "learning_rate": 6.069394988090037e-06, "loss": 0.3716, "step": 10802 }, { "epoch": 0.4484922080550496, "grad_norm": 2.549964427947998, "learning_rate": 6.068738220868944e-06, "loss": 0.6209, "step": 10803 }, { "epoch": 0.4485337235792609, "grad_norm": 2.445042610168457, "learning_rate": 6.0680814343247556e-06, "loss": 0.5856, "step": 10804 }, { "epoch": 0.44857523910347225, "grad_norm": 3.103180408477783, "learning_rate": 6.067424628469347e-06, "loss": 0.674, "step": 10805 }, { "epoch": 0.4486167546276836, "grad_norm": 2.3970558643341064, "learning_rate": 6.0667678033145926e-06, "loss": 0.5916, "step": 10806 }, { "epoch": 0.4486582701518949, "grad_norm": 1.8417826890945435, "learning_rate": 6.06611095887237e-06, "loss": 0.3198, "step": 10807 }, { "epoch": 0.44869978567610624, "grad_norm": 2.993452548980713, "learning_rate": 6.065454095154553e-06, "loss": 0.5002, "step": 10808 }, { "epoch": 0.4487413012003176, "grad_norm": 2.7465412616729736, "learning_rate": 6.06479721217302e-06, "loss": 0.4856, "step": 10809 }, { "epoch": 0.4487828167245289, "grad_norm": 2.9496548175811768, "learning_rate": 6.0641403099396455e-06, "loss": 0.7206, "step": 10810 }, { "epoch": 0.44882433224874024, "grad_norm": 2.756892681121826, "learning_rate": 6.063483388466308e-06, "loss": 0.6502, "step": 10811 }, { "epoch": 0.4488658477729516, "grad_norm": 2.650359869003296, "learning_rate": 6.062826447764883e-06, "loss": 0.6407, "step": 10812 }, { "epoch": 0.4489073632971629, "grad_norm": 2.419956684112549, "learning_rate": 6.062169487847252e-06, "loss": 0.5878, "step": 10813 }, { "epoch": 0.44894887882137424, "grad_norm": 2.9701857566833496, "learning_rate": 6.061512508725289e-06, "loss": 0.4808, "step": 10814 }, { "epoch": 0.4489903943455856, "grad_norm": 2.1234700679779053, "learning_rate": 6.060855510410873e-06, "loss": 0.4462, "step": 10815 }, { "epoch": 0.44903190986979696, "grad_norm": 3.945063829421997, "learning_rate": 6.060198492915887e-06, "loss": 0.5947, "step": 10816 }, { "epoch": 0.4490734253940083, "grad_norm": 2.656083583831787, "learning_rate": 6.059541456252203e-06, "loss": 0.699, "step": 10817 }, { "epoch": 0.44911494091821963, "grad_norm": 2.692579984664917, "learning_rate": 6.058884400431706e-06, "loss": 0.5397, "step": 10818 }, { "epoch": 0.44915645644243096, "grad_norm": 2.2904038429260254, "learning_rate": 6.058227325466273e-06, "loss": 0.4861, "step": 10819 }, { "epoch": 0.4491979719666423, "grad_norm": 2.725794792175293, "learning_rate": 6.0575702313677855e-06, "loss": 0.7024, "step": 10820 }, { "epoch": 0.44923948749085363, "grad_norm": 2.572913885116577, "learning_rate": 6.056913118148122e-06, "loss": 0.6315, "step": 10821 }, { "epoch": 0.44928100301506496, "grad_norm": 2.942185640335083, "learning_rate": 6.056255985819166e-06, "loss": 0.5222, "step": 10822 }, { "epoch": 0.4493225185392763, "grad_norm": 2.1456117630004883, "learning_rate": 6.055598834392796e-06, "loss": 0.5644, "step": 10823 }, { "epoch": 0.4493640340634876, "grad_norm": 2.152796983718872, "learning_rate": 6.054941663880897e-06, "loss": 0.4179, "step": 10824 }, { "epoch": 0.44940554958769896, "grad_norm": 2.4531781673431396, "learning_rate": 6.054284474295347e-06, "loss": 0.6138, "step": 10825 }, { "epoch": 0.4494470651119103, "grad_norm": 2.7660882472991943, "learning_rate": 6.053627265648031e-06, "loss": 0.5553, "step": 10826 }, { "epoch": 0.4494885806361216, "grad_norm": 2.8364360332489014, "learning_rate": 6.052970037950831e-06, "loss": 0.5918, "step": 10827 }, { "epoch": 0.44953009616033296, "grad_norm": 2.6591598987579346, "learning_rate": 6.052312791215627e-06, "loss": 0.6573, "step": 10828 }, { "epoch": 0.4495716116845443, "grad_norm": 3.117687225341797, "learning_rate": 6.051655525454306e-06, "loss": 0.6786, "step": 10829 }, { "epoch": 0.4496131272087556, "grad_norm": 2.1937997341156006, "learning_rate": 6.050998240678748e-06, "loss": 0.4505, "step": 10830 }, { "epoch": 0.44965464273296696, "grad_norm": 2.8531527519226074, "learning_rate": 6.0503409369008405e-06, "loss": 0.5889, "step": 10831 }, { "epoch": 0.4496961582571783, "grad_norm": 2.4688172340393066, "learning_rate": 6.049683614132466e-06, "loss": 0.5591, "step": 10832 }, { "epoch": 0.4497376737813896, "grad_norm": 2.597583293914795, "learning_rate": 6.04902627238551e-06, "loss": 0.5349, "step": 10833 }, { "epoch": 0.44977918930560096, "grad_norm": 2.741056442260742, "learning_rate": 6.048368911671855e-06, "loss": 0.5648, "step": 10834 }, { "epoch": 0.4498207048298123, "grad_norm": 2.7215118408203125, "learning_rate": 6.047711532003389e-06, "loss": 0.5979, "step": 10835 }, { "epoch": 0.4498622203540236, "grad_norm": 2.3275411128997803, "learning_rate": 6.0470541333919955e-06, "loss": 0.3893, "step": 10836 }, { "epoch": 0.44990373587823496, "grad_norm": 2.2906136512756348, "learning_rate": 6.046396715849562e-06, "loss": 0.4951, "step": 10837 }, { "epoch": 0.4499452514024463, "grad_norm": 2.018554449081421, "learning_rate": 6.045739279387974e-06, "loss": 0.4994, "step": 10838 }, { "epoch": 0.4499867669266576, "grad_norm": 2.4837844371795654, "learning_rate": 6.045081824019119e-06, "loss": 0.4206, "step": 10839 }, { "epoch": 0.45002828245086895, "grad_norm": 2.5663094520568848, "learning_rate": 6.044424349754884e-06, "loss": 0.5182, "step": 10840 }, { "epoch": 0.4500697979750803, "grad_norm": 2.502807378768921, "learning_rate": 6.043766856607155e-06, "loss": 0.5482, "step": 10841 }, { "epoch": 0.4501113134992916, "grad_norm": 2.4822189807891846, "learning_rate": 6.0431093445878195e-06, "loss": 0.5371, "step": 10842 }, { "epoch": 0.45015282902350295, "grad_norm": 2.711214303970337, "learning_rate": 6.042451813708767e-06, "loss": 0.4373, "step": 10843 }, { "epoch": 0.4501943445477143, "grad_norm": 2.811251163482666, "learning_rate": 6.041794263981886e-06, "loss": 0.5661, "step": 10844 }, { "epoch": 0.4502358600719256, "grad_norm": 2.5223631858825684, "learning_rate": 6.041136695419063e-06, "loss": 0.5605, "step": 10845 }, { "epoch": 0.45027737559613695, "grad_norm": 2.6229240894317627, "learning_rate": 6.04047910803219e-06, "loss": 0.5426, "step": 10846 }, { "epoch": 0.45031889112034834, "grad_norm": 2.5352602005004883, "learning_rate": 6.039821501833153e-06, "loss": 0.5273, "step": 10847 }, { "epoch": 0.4503604066445597, "grad_norm": 2.498896360397339, "learning_rate": 6.039163876833846e-06, "loss": 0.3872, "step": 10848 }, { "epoch": 0.450401922168771, "grad_norm": 2.48789381980896, "learning_rate": 6.038506233046154e-06, "loss": 0.6948, "step": 10849 }, { "epoch": 0.45044343769298234, "grad_norm": 2.1783788204193115, "learning_rate": 6.037848570481973e-06, "loss": 0.4475, "step": 10850 }, { "epoch": 0.4504849532171937, "grad_norm": 2.526139736175537, "learning_rate": 6.037190889153188e-06, "loss": 0.4599, "step": 10851 }, { "epoch": 0.450526468741405, "grad_norm": 2.6768457889556885, "learning_rate": 6.0365331890716934e-06, "loss": 0.4822, "step": 10852 }, { "epoch": 0.45056798426561634, "grad_norm": 2.499854564666748, "learning_rate": 6.03587547024938e-06, "loss": 0.4639, "step": 10853 }, { "epoch": 0.45060949978982767, "grad_norm": 2.592221260070801, "learning_rate": 6.035217732698141e-06, "loss": 0.6362, "step": 10854 }, { "epoch": 0.450651015314039, "grad_norm": 2.1914865970611572, "learning_rate": 6.034559976429865e-06, "loss": 0.5103, "step": 10855 }, { "epoch": 0.45069253083825034, "grad_norm": 2.6665256023406982, "learning_rate": 6.033902201456447e-06, "loss": 0.5006, "step": 10856 }, { "epoch": 0.45073404636246167, "grad_norm": 2.908923387527466, "learning_rate": 6.03324440778978e-06, "loss": 0.6036, "step": 10857 }, { "epoch": 0.450775561886673, "grad_norm": 2.6993629932403564, "learning_rate": 6.032586595441755e-06, "loss": 0.4796, "step": 10858 }, { "epoch": 0.45081707741088434, "grad_norm": 2.781822443008423, "learning_rate": 6.031928764424268e-06, "loss": 0.4363, "step": 10859 }, { "epoch": 0.45085859293509567, "grad_norm": 2.952021360397339, "learning_rate": 6.031270914749211e-06, "loss": 0.5477, "step": 10860 }, { "epoch": 0.450900108459307, "grad_norm": 2.8422341346740723, "learning_rate": 6.030613046428479e-06, "loss": 0.6862, "step": 10861 }, { "epoch": 0.45094162398351834, "grad_norm": 2.215207576751709, "learning_rate": 6.0299551594739644e-06, "loss": 0.4679, "step": 10862 }, { "epoch": 0.45098313950772967, "grad_norm": 2.352471113204956, "learning_rate": 6.029297253897566e-06, "loss": 0.5473, "step": 10863 }, { "epoch": 0.451024655031941, "grad_norm": 2.5415775775909424, "learning_rate": 6.028639329711176e-06, "loss": 0.4567, "step": 10864 }, { "epoch": 0.45106617055615233, "grad_norm": 2.3783111572265625, "learning_rate": 6.027981386926689e-06, "loss": 0.5387, "step": 10865 }, { "epoch": 0.45110768608036367, "grad_norm": 2.6052515506744385, "learning_rate": 6.027323425556004e-06, "loss": 0.5816, "step": 10866 }, { "epoch": 0.451149201604575, "grad_norm": 2.2879512310028076, "learning_rate": 6.026665445611015e-06, "loss": 0.6528, "step": 10867 }, { "epoch": 0.45119071712878633, "grad_norm": 3.0914180278778076, "learning_rate": 6.0260074471036175e-06, "loss": 0.6802, "step": 10868 }, { "epoch": 0.45123223265299767, "grad_norm": 2.2603211402893066, "learning_rate": 6.025349430045711e-06, "loss": 0.451, "step": 10869 }, { "epoch": 0.451273748177209, "grad_norm": 2.1876842975616455, "learning_rate": 6.02469139444919e-06, "loss": 0.6518, "step": 10870 }, { "epoch": 0.45131526370142033, "grad_norm": 2.676257371902466, "learning_rate": 6.024033340325954e-06, "loss": 0.5294, "step": 10871 }, { "epoch": 0.45135677922563167, "grad_norm": 2.323957681655884, "learning_rate": 6.0233752676879e-06, "loss": 0.4387, "step": 10872 }, { "epoch": 0.451398294749843, "grad_norm": 2.22623348236084, "learning_rate": 6.022717176546926e-06, "loss": 0.4624, "step": 10873 }, { "epoch": 0.45143981027405433, "grad_norm": 3.17043399810791, "learning_rate": 6.022059066914932e-06, "loss": 0.4635, "step": 10874 }, { "epoch": 0.45148132579826566, "grad_norm": 2.0252256393432617, "learning_rate": 6.021400938803813e-06, "loss": 0.441, "step": 10875 }, { "epoch": 0.451522841322477, "grad_norm": 2.3152613639831543, "learning_rate": 6.0207427922254726e-06, "loss": 0.5583, "step": 10876 }, { "epoch": 0.4515643568466884, "grad_norm": 2.4276480674743652, "learning_rate": 6.020084627191808e-06, "loss": 0.6049, "step": 10877 }, { "epoch": 0.4516058723708997, "grad_norm": 2.538191318511963, "learning_rate": 6.019426443714719e-06, "loss": 0.4631, "step": 10878 }, { "epoch": 0.45164738789511105, "grad_norm": 3.04726505279541, "learning_rate": 6.018768241806107e-06, "loss": 0.5152, "step": 10879 }, { "epoch": 0.4516889034193224, "grad_norm": 2.3392069339752197, "learning_rate": 6.01811002147787e-06, "loss": 0.4434, "step": 10880 }, { "epoch": 0.4517304189435337, "grad_norm": 2.6854591369628906, "learning_rate": 6.017451782741911e-06, "loss": 0.5613, "step": 10881 }, { "epoch": 0.45177193446774505, "grad_norm": 2.3403735160827637, "learning_rate": 6.0167935256101295e-06, "loss": 0.3577, "step": 10882 }, { "epoch": 0.4518134499919564, "grad_norm": 2.4697818756103516, "learning_rate": 6.016135250094429e-06, "loss": 0.6184, "step": 10883 }, { "epoch": 0.4518549655161677, "grad_norm": 2.5106139183044434, "learning_rate": 6.015476956206711e-06, "loss": 0.5549, "step": 10884 }, { "epoch": 0.45189648104037905, "grad_norm": 2.381010055541992, "learning_rate": 6.014818643958876e-06, "loss": 0.6155, "step": 10885 }, { "epoch": 0.4519379965645904, "grad_norm": 2.172161340713501, "learning_rate": 6.014160313362827e-06, "loss": 0.4708, "step": 10886 }, { "epoch": 0.4519795120888017, "grad_norm": 2.36913800239563, "learning_rate": 6.013501964430468e-06, "loss": 0.4713, "step": 10887 }, { "epoch": 0.45202102761301305, "grad_norm": 2.255091905593872, "learning_rate": 6.012843597173701e-06, "loss": 0.5148, "step": 10888 }, { "epoch": 0.4520625431372244, "grad_norm": 2.5529723167419434, "learning_rate": 6.012185211604431e-06, "loss": 0.5351, "step": 10889 }, { "epoch": 0.4521040586614357, "grad_norm": 2.6057517528533936, "learning_rate": 6.01152680773456e-06, "loss": 0.4787, "step": 10890 }, { "epoch": 0.45214557418564705, "grad_norm": 2.3019039630889893, "learning_rate": 6.010868385575993e-06, "loss": 0.5226, "step": 10891 }, { "epoch": 0.4521870897098584, "grad_norm": 2.5246715545654297, "learning_rate": 6.0102099451406325e-06, "loss": 0.4681, "step": 10892 }, { "epoch": 0.4522286052340697, "grad_norm": 2.1558659076690674, "learning_rate": 6.009551486440387e-06, "loss": 0.3749, "step": 10893 }, { "epoch": 0.45227012075828105, "grad_norm": 2.351034164428711, "learning_rate": 6.008893009487158e-06, "loss": 0.5093, "step": 10894 }, { "epoch": 0.4523116362824924, "grad_norm": 2.4767777919769287, "learning_rate": 6.008234514292853e-06, "loss": 0.6948, "step": 10895 }, { "epoch": 0.4523531518067037, "grad_norm": 2.9308314323425293, "learning_rate": 6.007576000869379e-06, "loss": 0.5114, "step": 10896 }, { "epoch": 0.45239466733091505, "grad_norm": 2.871821165084839, "learning_rate": 6.006917469228638e-06, "loss": 0.5271, "step": 10897 }, { "epoch": 0.4524361828551264, "grad_norm": 2.1677072048187256, "learning_rate": 6.006258919382541e-06, "loss": 0.4727, "step": 10898 }, { "epoch": 0.4524776983793377, "grad_norm": 2.7130556106567383, "learning_rate": 6.0056003513429914e-06, "loss": 0.4822, "step": 10899 }, { "epoch": 0.45251921390354904, "grad_norm": 2.6657707691192627, "learning_rate": 6.004941765121899e-06, "loss": 0.6063, "step": 10900 }, { "epoch": 0.4525607294277604, "grad_norm": 2.870511531829834, "learning_rate": 6.004283160731168e-06, "loss": 0.6236, "step": 10901 }, { "epoch": 0.4526022449519717, "grad_norm": 3.0829174518585205, "learning_rate": 6.0036245381827095e-06, "loss": 0.6409, "step": 10902 }, { "epoch": 0.45264376047618304, "grad_norm": 2.6032910346984863, "learning_rate": 6.00296589748843e-06, "loss": 0.6991, "step": 10903 }, { "epoch": 0.4526852760003944, "grad_norm": 2.68965744972229, "learning_rate": 6.002307238660238e-06, "loss": 0.4683, "step": 10904 }, { "epoch": 0.4527267915246057, "grad_norm": 2.233961820602417, "learning_rate": 6.00164856171004e-06, "loss": 0.4593, "step": 10905 }, { "epoch": 0.45276830704881704, "grad_norm": 2.35288405418396, "learning_rate": 6.0009898666497515e-06, "loss": 0.6269, "step": 10906 }, { "epoch": 0.4528098225730284, "grad_norm": 2.130063056945801, "learning_rate": 6.000331153491273e-06, "loss": 0.5526, "step": 10907 }, { "epoch": 0.45285133809723976, "grad_norm": 2.5678012371063232, "learning_rate": 5.999672422246521e-06, "loss": 0.569, "step": 10908 }, { "epoch": 0.4528928536214511, "grad_norm": 2.7465858459472656, "learning_rate": 5.9990136729274055e-06, "loss": 0.545, "step": 10909 }, { "epoch": 0.45293436914566243, "grad_norm": 2.512157678604126, "learning_rate": 5.998354905545831e-06, "loss": 0.4969, "step": 10910 }, { "epoch": 0.45297588466987376, "grad_norm": 2.095417022705078, "learning_rate": 5.9976961201137155e-06, "loss": 0.5644, "step": 10911 }, { "epoch": 0.4530174001940851, "grad_norm": 2.504709243774414, "learning_rate": 5.997037316642964e-06, "loss": 0.4943, "step": 10912 }, { "epoch": 0.45305891571829643, "grad_norm": 2.471674919128418, "learning_rate": 5.996378495145492e-06, "loss": 0.5912, "step": 10913 }, { "epoch": 0.45310043124250776, "grad_norm": 2.624284029006958, "learning_rate": 5.995719655633209e-06, "loss": 0.6069, "step": 10914 }, { "epoch": 0.4531419467667191, "grad_norm": 2.31217360496521, "learning_rate": 5.995060798118027e-06, "loss": 0.5062, "step": 10915 }, { "epoch": 0.45318346229093043, "grad_norm": 2.6370863914489746, "learning_rate": 5.994401922611861e-06, "loss": 0.6909, "step": 10916 }, { "epoch": 0.45322497781514176, "grad_norm": 1.91560959815979, "learning_rate": 5.9937430291266204e-06, "loss": 0.4934, "step": 10917 }, { "epoch": 0.4532664933393531, "grad_norm": 1.9115724563598633, "learning_rate": 5.99308411767422e-06, "loss": 0.4994, "step": 10918 }, { "epoch": 0.4533080088635644, "grad_norm": 2.244797706604004, "learning_rate": 5.992425188266572e-06, "loss": 0.3844, "step": 10919 }, { "epoch": 0.45334952438777576, "grad_norm": 2.319014310836792, "learning_rate": 5.9917662409155896e-06, "loss": 0.4405, "step": 10920 }, { "epoch": 0.4533910399119871, "grad_norm": 2.3629281520843506, "learning_rate": 5.991107275633189e-06, "loss": 0.4264, "step": 10921 }, { "epoch": 0.4534325554361984, "grad_norm": 2.531916856765747, "learning_rate": 5.990448292431283e-06, "loss": 0.578, "step": 10922 }, { "epoch": 0.45347407096040976, "grad_norm": 2.4686224460601807, "learning_rate": 5.9897892913217845e-06, "loss": 0.5732, "step": 10923 }, { "epoch": 0.4535155864846211, "grad_norm": 2.793243169784546, "learning_rate": 5.989130272316612e-06, "loss": 0.5316, "step": 10924 }, { "epoch": 0.4535571020088324, "grad_norm": 2.739769697189331, "learning_rate": 5.98847123542768e-06, "loss": 0.4379, "step": 10925 }, { "epoch": 0.45359861753304376, "grad_norm": 2.422020435333252, "learning_rate": 5.987812180666902e-06, "loss": 0.4864, "step": 10926 }, { "epoch": 0.4536401330572551, "grad_norm": 2.2433924674987793, "learning_rate": 5.987153108046194e-06, "loss": 0.4144, "step": 10927 }, { "epoch": 0.4536816485814664, "grad_norm": 2.2084171772003174, "learning_rate": 5.986494017577476e-06, "loss": 0.4881, "step": 10928 }, { "epoch": 0.45372316410567776, "grad_norm": 2.475253105163574, "learning_rate": 5.985834909272661e-06, "loss": 0.5157, "step": 10929 }, { "epoch": 0.4537646796298891, "grad_norm": 2.5880606174468994, "learning_rate": 5.985175783143666e-06, "loss": 0.4974, "step": 10930 }, { "epoch": 0.4538061951541004, "grad_norm": 2.43279767036438, "learning_rate": 5.984516639202408e-06, "loss": 0.643, "step": 10931 }, { "epoch": 0.45384771067831176, "grad_norm": 2.4969980716705322, "learning_rate": 5.983857477460807e-06, "loss": 0.542, "step": 10932 }, { "epoch": 0.4538892262025231, "grad_norm": 2.331707715988159, "learning_rate": 5.983198297930778e-06, "loss": 0.4674, "step": 10933 }, { "epoch": 0.4539307417267344, "grad_norm": 2.3793387413024902, "learning_rate": 5.982539100624242e-06, "loss": 0.6284, "step": 10934 }, { "epoch": 0.45397225725094575, "grad_norm": 2.5185961723327637, "learning_rate": 5.981879885553113e-06, "loss": 0.54, "step": 10935 }, { "epoch": 0.4540137727751571, "grad_norm": 2.416541337966919, "learning_rate": 5.981220652729315e-06, "loss": 0.552, "step": 10936 }, { "epoch": 0.4540552882993684, "grad_norm": 2.399120330810547, "learning_rate": 5.980561402164764e-06, "loss": 0.4189, "step": 10937 }, { "epoch": 0.45409680382357975, "grad_norm": 3.0145349502563477, "learning_rate": 5.97990213387138e-06, "loss": 0.5563, "step": 10938 }, { "epoch": 0.45413831934779114, "grad_norm": 3.2435338497161865, "learning_rate": 5.979242847861084e-06, "loss": 0.4847, "step": 10939 }, { "epoch": 0.4541798348720025, "grad_norm": 2.365542411804199, "learning_rate": 5.978583544145794e-06, "loss": 0.4807, "step": 10940 }, { "epoch": 0.4542213503962138, "grad_norm": 2.4045886993408203, "learning_rate": 5.977924222737432e-06, "loss": 0.4801, "step": 10941 }, { "epoch": 0.45426286592042514, "grad_norm": 2.3313629627227783, "learning_rate": 5.977264883647919e-06, "loss": 0.4506, "step": 10942 }, { "epoch": 0.4543043814446365, "grad_norm": 2.428941011428833, "learning_rate": 5.976605526889175e-06, "loss": 0.3641, "step": 10943 }, { "epoch": 0.4543458969688478, "grad_norm": 2.9752612113952637, "learning_rate": 5.97594615247312e-06, "loss": 0.4451, "step": 10944 }, { "epoch": 0.45438741249305914, "grad_norm": 2.205368757247925, "learning_rate": 5.97528676041168e-06, "loss": 0.4777, "step": 10945 }, { "epoch": 0.4544289280172705, "grad_norm": 2.679108142852783, "learning_rate": 5.974627350716771e-06, "loss": 0.5926, "step": 10946 }, { "epoch": 0.4544704435414818, "grad_norm": 2.2428171634674072, "learning_rate": 5.973967923400321e-06, "loss": 0.6102, "step": 10947 }, { "epoch": 0.45451195906569314, "grad_norm": 2.216809034347534, "learning_rate": 5.973308478474249e-06, "loss": 0.4901, "step": 10948 }, { "epoch": 0.45455347458990447, "grad_norm": 2.3378689289093018, "learning_rate": 5.972649015950478e-06, "loss": 0.4973, "step": 10949 }, { "epoch": 0.4545949901141158, "grad_norm": 2.495849847793579, "learning_rate": 5.971989535840934e-06, "loss": 0.4604, "step": 10950 }, { "epoch": 0.45463650563832714, "grad_norm": 2.292708158493042, "learning_rate": 5.971330038157539e-06, "loss": 0.5, "step": 10951 }, { "epoch": 0.45467802116253847, "grad_norm": 1.9960962533950806, "learning_rate": 5.970670522912217e-06, "loss": 0.4771, "step": 10952 }, { "epoch": 0.4547195366867498, "grad_norm": 2.7841053009033203, "learning_rate": 5.970010990116892e-06, "loss": 0.4574, "step": 10953 }, { "epoch": 0.45476105221096114, "grad_norm": 2.683816432952881, "learning_rate": 5.96935143978349e-06, "loss": 0.4031, "step": 10954 }, { "epoch": 0.45480256773517247, "grad_norm": 2.918609380722046, "learning_rate": 5.9686918719239325e-06, "loss": 0.467, "step": 10955 }, { "epoch": 0.4548440832593838, "grad_norm": 2.1126716136932373, "learning_rate": 5.968032286550149e-06, "loss": 0.4108, "step": 10956 }, { "epoch": 0.45488559878359514, "grad_norm": 2.2774062156677246, "learning_rate": 5.9673726836740594e-06, "loss": 0.4889, "step": 10957 }, { "epoch": 0.45492711430780647, "grad_norm": 2.284886360168457, "learning_rate": 5.966713063307596e-06, "loss": 0.6873, "step": 10958 }, { "epoch": 0.4549686298320178, "grad_norm": 2.415459156036377, "learning_rate": 5.96605342546268e-06, "loss": 0.5054, "step": 10959 }, { "epoch": 0.45501014535622913, "grad_norm": 2.7509167194366455, "learning_rate": 5.965393770151241e-06, "loss": 0.515, "step": 10960 }, { "epoch": 0.45505166088044047, "grad_norm": 2.362788438796997, "learning_rate": 5.964734097385203e-06, "loss": 0.4441, "step": 10961 }, { "epoch": 0.4550931764046518, "grad_norm": 2.536198616027832, "learning_rate": 5.964074407176495e-06, "loss": 0.564, "step": 10962 }, { "epoch": 0.45513469192886313, "grad_norm": 2.166956901550293, "learning_rate": 5.963414699537043e-06, "loss": 0.6108, "step": 10963 }, { "epoch": 0.45517620745307447, "grad_norm": 2.3246514797210693, "learning_rate": 5.962754974478776e-06, "loss": 0.6015, "step": 10964 }, { "epoch": 0.4552177229772858, "grad_norm": 2.1177353858947754, "learning_rate": 5.9620952320136225e-06, "loss": 0.523, "step": 10965 }, { "epoch": 0.45525923850149713, "grad_norm": 2.5107157230377197, "learning_rate": 5.96143547215351e-06, "loss": 0.5235, "step": 10966 }, { "epoch": 0.45530075402570847, "grad_norm": 2.0383410453796387, "learning_rate": 5.9607756949103655e-06, "loss": 0.406, "step": 10967 }, { "epoch": 0.4553422695499198, "grad_norm": 2.228212833404541, "learning_rate": 5.9601159002961214e-06, "loss": 0.5598, "step": 10968 }, { "epoch": 0.45538378507413113, "grad_norm": 2.6187121868133545, "learning_rate": 5.9594560883227045e-06, "loss": 0.6527, "step": 10969 }, { "epoch": 0.4554253005983425, "grad_norm": 2.2301528453826904, "learning_rate": 5.958796259002044e-06, "loss": 0.5079, "step": 10970 }, { "epoch": 0.45546681612255385, "grad_norm": 3.579885721206665, "learning_rate": 5.9581364123460715e-06, "loss": 0.5878, "step": 10971 }, { "epoch": 0.4555083316467652, "grad_norm": 2.7605185508728027, "learning_rate": 5.957476548366716e-06, "loss": 0.4957, "step": 10972 }, { "epoch": 0.4555498471709765, "grad_norm": 2.322235345840454, "learning_rate": 5.95681666707591e-06, "loss": 0.5436, "step": 10973 }, { "epoch": 0.45559136269518785, "grad_norm": 2.4959349632263184, "learning_rate": 5.956156768485582e-06, "loss": 0.565, "step": 10974 }, { "epoch": 0.4556328782193992, "grad_norm": 2.273541212081909, "learning_rate": 5.9554968526076614e-06, "loss": 0.5662, "step": 10975 }, { "epoch": 0.4556743937436105, "grad_norm": 3.1551198959350586, "learning_rate": 5.954836919454086e-06, "loss": 0.4997, "step": 10976 }, { "epoch": 0.45571590926782185, "grad_norm": 2.0718507766723633, "learning_rate": 5.954176969036783e-06, "loss": 0.3712, "step": 10977 }, { "epoch": 0.4557574247920332, "grad_norm": 2.1191797256469727, "learning_rate": 5.953517001367684e-06, "loss": 0.4261, "step": 10978 }, { "epoch": 0.4557989403162445, "grad_norm": 2.25115704536438, "learning_rate": 5.952857016458724e-06, "loss": 0.4221, "step": 10979 }, { "epoch": 0.45584045584045585, "grad_norm": 2.204918146133423, "learning_rate": 5.952197014321834e-06, "loss": 0.4426, "step": 10980 }, { "epoch": 0.4558819713646672, "grad_norm": 2.302328586578369, "learning_rate": 5.951536994968948e-06, "loss": 0.5476, "step": 10981 }, { "epoch": 0.4559234868888785, "grad_norm": 2.525832176208496, "learning_rate": 5.950876958412e-06, "loss": 0.735, "step": 10982 }, { "epoch": 0.45596500241308985, "grad_norm": 2.4364264011383057, "learning_rate": 5.95021690466292e-06, "loss": 0.5038, "step": 10983 }, { "epoch": 0.4560065179373012, "grad_norm": 2.4571731090545654, "learning_rate": 5.949556833733646e-06, "loss": 0.5223, "step": 10984 }, { "epoch": 0.4560480334615125, "grad_norm": 2.5290205478668213, "learning_rate": 5.94889674563611e-06, "loss": 0.4242, "step": 10985 }, { "epoch": 0.45608954898572385, "grad_norm": 2.4109508991241455, "learning_rate": 5.948236640382249e-06, "loss": 0.5877, "step": 10986 }, { "epoch": 0.4561310645099352, "grad_norm": 2.9101595878601074, "learning_rate": 5.947576517983994e-06, "loss": 0.6212, "step": 10987 }, { "epoch": 0.4561725800341465, "grad_norm": 2.0813047885894775, "learning_rate": 5.946916378453283e-06, "loss": 0.3465, "step": 10988 }, { "epoch": 0.45621409555835785, "grad_norm": 2.717146396636963, "learning_rate": 5.946256221802052e-06, "loss": 0.5788, "step": 10989 }, { "epoch": 0.4562556110825692, "grad_norm": 2.3135790824890137, "learning_rate": 5.9455960480422334e-06, "loss": 0.5543, "step": 10990 }, { "epoch": 0.4562971266067805, "grad_norm": 2.228741407394409, "learning_rate": 5.944935857185768e-06, "loss": 0.5288, "step": 10991 }, { "epoch": 0.45633864213099185, "grad_norm": 2.695441484451294, "learning_rate": 5.944275649244589e-06, "loss": 0.449, "step": 10992 }, { "epoch": 0.4563801576552032, "grad_norm": 2.311556100845337, "learning_rate": 5.943615424230635e-06, "loss": 0.5751, "step": 10993 }, { "epoch": 0.4564216731794145, "grad_norm": 2.2131893634796143, "learning_rate": 5.942955182155843e-06, "loss": 0.4599, "step": 10994 }, { "epoch": 0.45646318870362584, "grad_norm": 2.338468074798584, "learning_rate": 5.9422949230321485e-06, "loss": 0.5024, "step": 10995 }, { "epoch": 0.4565047042278372, "grad_norm": 2.503061056137085, "learning_rate": 5.941634646871489e-06, "loss": 0.4664, "step": 10996 }, { "epoch": 0.4565462197520485, "grad_norm": 2.282670497894287, "learning_rate": 5.9409743536858044e-06, "loss": 0.4752, "step": 10997 }, { "epoch": 0.45658773527625984, "grad_norm": 2.224015474319458, "learning_rate": 5.940314043487032e-06, "loss": 0.5246, "step": 10998 }, { "epoch": 0.4566292508004712, "grad_norm": 2.5672247409820557, "learning_rate": 5.9396537162871114e-06, "loss": 0.4647, "step": 10999 }, { "epoch": 0.4566707663246825, "grad_norm": 2.196115255355835, "learning_rate": 5.93899337209798e-06, "loss": 0.488, "step": 11000 }, { "epoch": 0.4567122818488939, "grad_norm": 2.4579498767852783, "learning_rate": 5.938333010931578e-06, "loss": 0.5608, "step": 11001 }, { "epoch": 0.45675379737310523, "grad_norm": 2.4664576053619385, "learning_rate": 5.9376726327998445e-06, "loss": 0.7246, "step": 11002 }, { "epoch": 0.45679531289731656, "grad_norm": 2.0426559448242188, "learning_rate": 5.93701223771472e-06, "loss": 0.55, "step": 11003 }, { "epoch": 0.4568368284215279, "grad_norm": 2.5277037620544434, "learning_rate": 5.9363518256881445e-06, "loss": 0.6577, "step": 11004 }, { "epoch": 0.45687834394573923, "grad_norm": 2.552891492843628, "learning_rate": 5.935691396732059e-06, "loss": 0.496, "step": 11005 }, { "epoch": 0.45691985946995056, "grad_norm": 2.3265340328216553, "learning_rate": 5.935030950858401e-06, "loss": 0.4829, "step": 11006 }, { "epoch": 0.4569613749941619, "grad_norm": 2.5850319862365723, "learning_rate": 5.934370488079115e-06, "loss": 0.6076, "step": 11007 }, { "epoch": 0.45700289051837323, "grad_norm": 2.442233085632324, "learning_rate": 5.933710008406142e-06, "loss": 0.4088, "step": 11008 }, { "epoch": 0.45704440604258456, "grad_norm": 2.6368892192840576, "learning_rate": 5.933049511851421e-06, "loss": 0.4549, "step": 11009 }, { "epoch": 0.4570859215667959, "grad_norm": 2.0559723377227783, "learning_rate": 5.932388998426898e-06, "loss": 0.6321, "step": 11010 }, { "epoch": 0.4571274370910072, "grad_norm": 2.0640900135040283, "learning_rate": 5.931728468144512e-06, "loss": 0.4283, "step": 11011 }, { "epoch": 0.45716895261521856, "grad_norm": 2.607923984527588, "learning_rate": 5.931067921016207e-06, "loss": 0.5703, "step": 11012 }, { "epoch": 0.4572104681394299, "grad_norm": 2.895231246948242, "learning_rate": 5.930407357053924e-06, "loss": 0.487, "step": 11013 }, { "epoch": 0.4572519836636412, "grad_norm": 2.2545504570007324, "learning_rate": 5.929746776269612e-06, "loss": 0.5043, "step": 11014 }, { "epoch": 0.45729349918785256, "grad_norm": 2.3831915855407715, "learning_rate": 5.9290861786752066e-06, "loss": 0.4763, "step": 11015 }, { "epoch": 0.4573350147120639, "grad_norm": 2.5542125701904297, "learning_rate": 5.928425564282655e-06, "loss": 0.6099, "step": 11016 }, { "epoch": 0.4573765302362752, "grad_norm": 2.8135509490966797, "learning_rate": 5.927764933103903e-06, "loss": 0.5474, "step": 11017 }, { "epoch": 0.45741804576048656, "grad_norm": 2.8735768795013428, "learning_rate": 5.927104285150895e-06, "loss": 0.3905, "step": 11018 }, { "epoch": 0.4574595612846979, "grad_norm": 2.3690662384033203, "learning_rate": 5.926443620435572e-06, "loss": 0.5003, "step": 11019 }, { "epoch": 0.4575010768089092, "grad_norm": 2.135373115539551, "learning_rate": 5.925782938969883e-06, "loss": 0.4698, "step": 11020 }, { "epoch": 0.45754259233312056, "grad_norm": 2.5640742778778076, "learning_rate": 5.925122240765771e-06, "loss": 0.4726, "step": 11021 }, { "epoch": 0.4575841078573319, "grad_norm": 2.6234817504882812, "learning_rate": 5.924461525835181e-06, "loss": 0.5533, "step": 11022 }, { "epoch": 0.4576256233815432, "grad_norm": 2.3067336082458496, "learning_rate": 5.923800794190062e-06, "loss": 0.3994, "step": 11023 }, { "epoch": 0.45766713890575456, "grad_norm": 2.5454790592193604, "learning_rate": 5.923140045842357e-06, "loss": 0.5755, "step": 11024 }, { "epoch": 0.4577086544299659, "grad_norm": 2.3882017135620117, "learning_rate": 5.922479280804015e-06, "loss": 0.4837, "step": 11025 }, { "epoch": 0.4577501699541772, "grad_norm": 2.2915971279144287, "learning_rate": 5.921818499086981e-06, "loss": 0.4374, "step": 11026 }, { "epoch": 0.45779168547838855, "grad_norm": 2.4099502563476562, "learning_rate": 5.921157700703203e-06, "loss": 0.4175, "step": 11027 }, { "epoch": 0.4578332010025999, "grad_norm": 2.0458240509033203, "learning_rate": 5.9204968856646286e-06, "loss": 0.48, "step": 11028 }, { "epoch": 0.4578747165268112, "grad_norm": 2.055943012237549, "learning_rate": 5.9198360539832035e-06, "loss": 0.4447, "step": 11029 }, { "epoch": 0.45791623205102255, "grad_norm": 2.4208152294158936, "learning_rate": 5.9191752056708795e-06, "loss": 0.5643, "step": 11030 }, { "epoch": 0.4579577475752339, "grad_norm": 2.671311855316162, "learning_rate": 5.918514340739603e-06, "loss": 0.4112, "step": 11031 }, { "epoch": 0.4579992630994453, "grad_norm": 2.1673479080200195, "learning_rate": 5.917853459201321e-06, "loss": 0.4516, "step": 11032 }, { "epoch": 0.4580407786236566, "grad_norm": 2.375044822692871, "learning_rate": 5.917192561067985e-06, "loss": 0.6413, "step": 11033 }, { "epoch": 0.45808229414786794, "grad_norm": 2.1812429428100586, "learning_rate": 5.916531646351543e-06, "loss": 0.5482, "step": 11034 }, { "epoch": 0.4581238096720793, "grad_norm": 2.429112195968628, "learning_rate": 5.9158707150639435e-06, "loss": 0.6299, "step": 11035 }, { "epoch": 0.4581653251962906, "grad_norm": 2.368638753890991, "learning_rate": 5.915209767217138e-06, "loss": 0.4328, "step": 11036 }, { "epoch": 0.45820684072050194, "grad_norm": 2.764314651489258, "learning_rate": 5.914548802823077e-06, "loss": 0.46, "step": 11037 }, { "epoch": 0.4582483562447133, "grad_norm": 2.0924580097198486, "learning_rate": 5.91388782189371e-06, "loss": 0.5407, "step": 11038 }, { "epoch": 0.4582898717689246, "grad_norm": 2.3662683963775635, "learning_rate": 5.913226824440986e-06, "loss": 0.5604, "step": 11039 }, { "epoch": 0.45833138729313594, "grad_norm": 2.4476308822631836, "learning_rate": 5.91256581047686e-06, "loss": 0.5812, "step": 11040 }, { "epoch": 0.45837290281734727, "grad_norm": 2.8656578063964844, "learning_rate": 5.911904780013279e-06, "loss": 0.4877, "step": 11041 }, { "epoch": 0.4584144183415586, "grad_norm": 2.504551410675049, "learning_rate": 5.911243733062197e-06, "loss": 0.3909, "step": 11042 }, { "epoch": 0.45845593386576994, "grad_norm": 2.284498453140259, "learning_rate": 5.910582669635567e-06, "loss": 0.4079, "step": 11043 }, { "epoch": 0.45849744938998127, "grad_norm": 2.2614846229553223, "learning_rate": 5.909921589745341e-06, "loss": 0.6133, "step": 11044 }, { "epoch": 0.4585389649141926, "grad_norm": 2.926754951477051, "learning_rate": 5.909260493403468e-06, "loss": 0.503, "step": 11045 }, { "epoch": 0.45858048043840394, "grad_norm": 3.297717571258545, "learning_rate": 5.908599380621905e-06, "loss": 0.4645, "step": 11046 }, { "epoch": 0.45862199596261527, "grad_norm": 3.127742290496826, "learning_rate": 5.907938251412603e-06, "loss": 0.4406, "step": 11047 }, { "epoch": 0.4586635114868266, "grad_norm": 2.550220012664795, "learning_rate": 5.907277105787513e-06, "loss": 0.5285, "step": 11048 }, { "epoch": 0.45870502701103794, "grad_norm": 2.2028629779815674, "learning_rate": 5.9066159437585945e-06, "loss": 0.5162, "step": 11049 }, { "epoch": 0.45874654253524927, "grad_norm": 2.6403207778930664, "learning_rate": 5.905954765337797e-06, "loss": 0.452, "step": 11050 }, { "epoch": 0.4587880580594606, "grad_norm": 2.5905075073242188, "learning_rate": 5.9052935705370775e-06, "loss": 0.4843, "step": 11051 }, { "epoch": 0.45882957358367193, "grad_norm": 2.284137487411499, "learning_rate": 5.904632359368388e-06, "loss": 0.3885, "step": 11052 }, { "epoch": 0.45887108910788327, "grad_norm": 2.2905759811401367, "learning_rate": 5.903971131843686e-06, "loss": 0.404, "step": 11053 }, { "epoch": 0.4589126046320946, "grad_norm": 2.6404154300689697, "learning_rate": 5.903309887974923e-06, "loss": 0.622, "step": 11054 }, { "epoch": 0.45895412015630593, "grad_norm": 2.560833215713501, "learning_rate": 5.902648627774059e-06, "loss": 0.5809, "step": 11055 }, { "epoch": 0.45899563568051727, "grad_norm": 2.860994577407837, "learning_rate": 5.901987351253047e-06, "loss": 0.4471, "step": 11056 }, { "epoch": 0.4590371512047286, "grad_norm": 2.1931660175323486, "learning_rate": 5.901326058423843e-06, "loss": 0.5677, "step": 11057 }, { "epoch": 0.45907866672893993, "grad_norm": 2.6788127422332764, "learning_rate": 5.900664749298406e-06, "loss": 0.5753, "step": 11058 }, { "epoch": 0.45912018225315127, "grad_norm": 2.4565889835357666, "learning_rate": 5.900003423888688e-06, "loss": 0.6229, "step": 11059 }, { "epoch": 0.4591616977773626, "grad_norm": 2.427168130874634, "learning_rate": 5.899342082206652e-06, "loss": 0.6195, "step": 11060 }, { "epoch": 0.45920321330157393, "grad_norm": 3.102886438369751, "learning_rate": 5.898680724264249e-06, "loss": 0.5081, "step": 11061 }, { "epoch": 0.45924472882578526, "grad_norm": 2.5228002071380615, "learning_rate": 5.8980193500734405e-06, "loss": 0.5272, "step": 11062 }, { "epoch": 0.45928624434999665, "grad_norm": 2.4680697917938232, "learning_rate": 5.897357959646184e-06, "loss": 0.469, "step": 11063 }, { "epoch": 0.459327759874208, "grad_norm": 2.291982412338257, "learning_rate": 5.896696552994436e-06, "loss": 0.5928, "step": 11064 }, { "epoch": 0.4593692753984193, "grad_norm": 2.0429294109344482, "learning_rate": 5.896035130130155e-06, "loss": 0.4452, "step": 11065 }, { "epoch": 0.45941079092263065, "grad_norm": 2.8338496685028076, "learning_rate": 5.895373691065304e-06, "loss": 0.568, "step": 11066 }, { "epoch": 0.459452306446842, "grad_norm": 1.9962025880813599, "learning_rate": 5.894712235811835e-06, "loss": 0.4996, "step": 11067 }, { "epoch": 0.4594938219710533, "grad_norm": 2.259089231491089, "learning_rate": 5.894050764381713e-06, "loss": 0.4577, "step": 11068 }, { "epoch": 0.45953533749526465, "grad_norm": 2.0035619735717773, "learning_rate": 5.893389276786894e-06, "loss": 0.5112, "step": 11069 }, { "epoch": 0.459576853019476, "grad_norm": 2.5250277519226074, "learning_rate": 5.892727773039341e-06, "loss": 0.6465, "step": 11070 }, { "epoch": 0.4596183685436873, "grad_norm": 2.603421211242676, "learning_rate": 5.892066253151012e-06, "loss": 0.7724, "step": 11071 }, { "epoch": 0.45965988406789865, "grad_norm": 3.079608917236328, "learning_rate": 5.891404717133867e-06, "loss": 0.588, "step": 11072 }, { "epoch": 0.45970139959211, "grad_norm": 2.886962413787842, "learning_rate": 5.8907431649998695e-06, "loss": 0.5557, "step": 11073 }, { "epoch": 0.4597429151163213, "grad_norm": 2.179027795791626, "learning_rate": 5.890081596760977e-06, "loss": 0.6042, "step": 11074 }, { "epoch": 0.45978443064053265, "grad_norm": 2.0827791690826416, "learning_rate": 5.889420012429154e-06, "loss": 0.5025, "step": 11075 }, { "epoch": 0.459825946164744, "grad_norm": 2.6942970752716064, "learning_rate": 5.88875841201636e-06, "loss": 0.5493, "step": 11076 }, { "epoch": 0.4598674616889553, "grad_norm": 2.4465787410736084, "learning_rate": 5.888096795534558e-06, "loss": 0.6478, "step": 11077 }, { "epoch": 0.45990897721316665, "grad_norm": 2.2218406200408936, "learning_rate": 5.887435162995709e-06, "loss": 0.4061, "step": 11078 }, { "epoch": 0.459950492737378, "grad_norm": 2.596463918685913, "learning_rate": 5.886773514411779e-06, "loss": 0.4056, "step": 11079 }, { "epoch": 0.4599920082615893, "grad_norm": 2.670581340789795, "learning_rate": 5.886111849794726e-06, "loss": 0.4186, "step": 11080 }, { "epoch": 0.46003352378580065, "grad_norm": 2.7801992893218994, "learning_rate": 5.885450169156517e-06, "loss": 0.4505, "step": 11081 }, { "epoch": 0.460075039310012, "grad_norm": 2.280120849609375, "learning_rate": 5.884788472509112e-06, "loss": 0.6425, "step": 11082 }, { "epoch": 0.4601165548342233, "grad_norm": 2.7057278156280518, "learning_rate": 5.884126759864478e-06, "loss": 0.4515, "step": 11083 }, { "epoch": 0.46015807035843465, "grad_norm": 2.3264851570129395, "learning_rate": 5.883465031234577e-06, "loss": 0.4674, "step": 11084 }, { "epoch": 0.460199585882646, "grad_norm": 2.6950554847717285, "learning_rate": 5.8828032866313725e-06, "loss": 0.4181, "step": 11085 }, { "epoch": 0.4602411014068573, "grad_norm": 2.1073381900787354, "learning_rate": 5.882141526066831e-06, "loss": 0.4279, "step": 11086 }, { "epoch": 0.46028261693106864, "grad_norm": 2.7560784816741943, "learning_rate": 5.8814797495529165e-06, "loss": 0.5536, "step": 11087 }, { "epoch": 0.46032413245528, "grad_norm": 2.357405185699463, "learning_rate": 5.8808179571015935e-06, "loss": 0.5606, "step": 11088 }, { "epoch": 0.4603656479794913, "grad_norm": 3.0790276527404785, "learning_rate": 5.880156148724828e-06, "loss": 0.5029, "step": 11089 }, { "epoch": 0.46040716350370264, "grad_norm": 2.5085811614990234, "learning_rate": 5.879494324434587e-06, "loss": 0.6221, "step": 11090 }, { "epoch": 0.460448679027914, "grad_norm": 2.9002296924591064, "learning_rate": 5.878832484242833e-06, "loss": 0.6282, "step": 11091 }, { "epoch": 0.4604901945521253, "grad_norm": 2.8800625801086426, "learning_rate": 5.878170628161536e-06, "loss": 0.566, "step": 11092 }, { "epoch": 0.4605317100763367, "grad_norm": 2.5515756607055664, "learning_rate": 5.87750875620266e-06, "loss": 0.5411, "step": 11093 }, { "epoch": 0.46057322560054803, "grad_norm": 2.484056234359741, "learning_rate": 5.8768468683781735e-06, "loss": 0.5043, "step": 11094 }, { "epoch": 0.46061474112475936, "grad_norm": 2.177227020263672, "learning_rate": 5.876184964700041e-06, "loss": 0.4473, "step": 11095 }, { "epoch": 0.4606562566489707, "grad_norm": 2.2678897380828857, "learning_rate": 5.875523045180235e-06, "loss": 0.5204, "step": 11096 }, { "epoch": 0.46069777217318203, "grad_norm": 2.290341377258301, "learning_rate": 5.874861109830719e-06, "loss": 0.5294, "step": 11097 }, { "epoch": 0.46073928769739336, "grad_norm": 3.0431041717529297, "learning_rate": 5.87419915866346e-06, "loss": 0.7454, "step": 11098 }, { "epoch": 0.4607808032216047, "grad_norm": 2.398094415664673, "learning_rate": 5.873537191690431e-06, "loss": 0.4537, "step": 11099 }, { "epoch": 0.46082231874581603, "grad_norm": 2.2887015342712402, "learning_rate": 5.872875208923594e-06, "loss": 0.5591, "step": 11100 }, { "epoch": 0.46086383427002736, "grad_norm": 2.3074588775634766, "learning_rate": 5.872213210374925e-06, "loss": 0.5627, "step": 11101 }, { "epoch": 0.4609053497942387, "grad_norm": 2.534550666809082, "learning_rate": 5.871551196056389e-06, "loss": 0.4776, "step": 11102 }, { "epoch": 0.46094686531845, "grad_norm": 2.4619312286376953, "learning_rate": 5.870889165979956e-06, "loss": 0.5017, "step": 11103 }, { "epoch": 0.46098838084266136, "grad_norm": 2.551179885864258, "learning_rate": 5.870227120157594e-06, "loss": 0.6646, "step": 11104 }, { "epoch": 0.4610298963668727, "grad_norm": 3.102470636367798, "learning_rate": 5.869565058601278e-06, "loss": 0.5973, "step": 11105 }, { "epoch": 0.461071411891084, "grad_norm": 2.158402681350708, "learning_rate": 5.868902981322972e-06, "loss": 0.537, "step": 11106 }, { "epoch": 0.46111292741529536, "grad_norm": 2.2546799182891846, "learning_rate": 5.8682408883346535e-06, "loss": 0.6732, "step": 11107 }, { "epoch": 0.4611544429395067, "grad_norm": 2.5313172340393066, "learning_rate": 5.8675787796482866e-06, "loss": 0.4121, "step": 11108 }, { "epoch": 0.461195958463718, "grad_norm": 2.3787682056427, "learning_rate": 5.866916655275846e-06, "loss": 0.452, "step": 11109 }, { "epoch": 0.46123747398792936, "grad_norm": 2.1393826007843018, "learning_rate": 5.866254515229302e-06, "loss": 0.4688, "step": 11110 }, { "epoch": 0.4612789895121407, "grad_norm": 2.598660469055176, "learning_rate": 5.865592359520626e-06, "loss": 0.7091, "step": 11111 }, { "epoch": 0.461320505036352, "grad_norm": 2.386892080307007, "learning_rate": 5.864930188161794e-06, "loss": 0.5783, "step": 11112 }, { "epoch": 0.46136202056056336, "grad_norm": 2.601240396499634, "learning_rate": 5.864268001164771e-06, "loss": 0.5736, "step": 11113 }, { "epoch": 0.4614035360847747, "grad_norm": 2.419440269470215, "learning_rate": 5.863605798541536e-06, "loss": 0.5671, "step": 11114 }, { "epoch": 0.461445051608986, "grad_norm": 2.0413076877593994, "learning_rate": 5.862943580304058e-06, "loss": 0.45, "step": 11115 }, { "epoch": 0.46148656713319736, "grad_norm": 2.395294427871704, "learning_rate": 5.862281346464312e-06, "loss": 0.6441, "step": 11116 }, { "epoch": 0.4615280826574087, "grad_norm": 2.2511894702911377, "learning_rate": 5.86161909703427e-06, "loss": 0.5191, "step": 11117 }, { "epoch": 0.46156959818162, "grad_norm": 2.45410418510437, "learning_rate": 5.860956832025907e-06, "loss": 0.5664, "step": 11118 }, { "epoch": 0.46161111370583136, "grad_norm": 2.495836019515991, "learning_rate": 5.860294551451196e-06, "loss": 0.4293, "step": 11119 }, { "epoch": 0.4616526292300427, "grad_norm": 2.318160057067871, "learning_rate": 5.859632255322112e-06, "loss": 0.5252, "step": 11120 }, { "epoch": 0.461694144754254, "grad_norm": 2.520951747894287, "learning_rate": 5.858969943650627e-06, "loss": 0.5094, "step": 11121 }, { "epoch": 0.46173566027846535, "grad_norm": 1.9910074472427368, "learning_rate": 5.858307616448721e-06, "loss": 0.4242, "step": 11122 }, { "epoch": 0.4617771758026767, "grad_norm": 2.4983644485473633, "learning_rate": 5.857645273728364e-06, "loss": 0.4891, "step": 11123 }, { "epoch": 0.4618186913268881, "grad_norm": 2.387232542037964, "learning_rate": 5.856982915501533e-06, "loss": 0.5241, "step": 11124 }, { "epoch": 0.4618602068510994, "grad_norm": 2.188570261001587, "learning_rate": 5.856320541780206e-06, "loss": 0.5331, "step": 11125 }, { "epoch": 0.46190172237531074, "grad_norm": 2.5755112171173096, "learning_rate": 5.855658152576355e-06, "loss": 0.5836, "step": 11126 }, { "epoch": 0.4619432378995221, "grad_norm": 3.676198720932007, "learning_rate": 5.854995747901958e-06, "loss": 0.4617, "step": 11127 }, { "epoch": 0.4619847534237334, "grad_norm": 2.44808292388916, "learning_rate": 5.854333327768991e-06, "loss": 0.6058, "step": 11128 }, { "epoch": 0.46202626894794474, "grad_norm": 3.0281219482421875, "learning_rate": 5.853670892189432e-06, "loss": 0.5607, "step": 11129 }, { "epoch": 0.4620677844721561, "grad_norm": 2.406611919403076, "learning_rate": 5.853008441175257e-06, "loss": 0.5618, "step": 11130 }, { "epoch": 0.4621092999963674, "grad_norm": 3.0532824993133545, "learning_rate": 5.852345974738445e-06, "loss": 0.438, "step": 11131 }, { "epoch": 0.46215081552057874, "grad_norm": 2.4358975887298584, "learning_rate": 5.8516834928909695e-06, "loss": 0.4491, "step": 11132 }, { "epoch": 0.4621923310447901, "grad_norm": 2.4225287437438965, "learning_rate": 5.851020995644813e-06, "loss": 0.6858, "step": 11133 }, { "epoch": 0.4622338465690014, "grad_norm": 1.982372760772705, "learning_rate": 5.85035848301195e-06, "loss": 0.4599, "step": 11134 }, { "epoch": 0.46227536209321274, "grad_norm": 2.380293369293213, "learning_rate": 5.849695955004362e-06, "loss": 0.4836, "step": 11135 }, { "epoch": 0.46231687761742407, "grad_norm": 2.1960418224334717, "learning_rate": 5.849033411634027e-06, "loss": 0.5428, "step": 11136 }, { "epoch": 0.4623583931416354, "grad_norm": 2.688920021057129, "learning_rate": 5.848370852912921e-06, "loss": 0.568, "step": 11137 }, { "epoch": 0.46239990866584674, "grad_norm": 2.5087926387786865, "learning_rate": 5.847708278853026e-06, "loss": 0.5377, "step": 11138 }, { "epoch": 0.46244142419005807, "grad_norm": 2.648177146911621, "learning_rate": 5.8470456894663206e-06, "loss": 0.5191, "step": 11139 }, { "epoch": 0.4624829397142694, "grad_norm": 2.421393871307373, "learning_rate": 5.8463830847647866e-06, "loss": 0.5864, "step": 11140 }, { "epoch": 0.46252445523848074, "grad_norm": 2.002822160720825, "learning_rate": 5.8457204647604004e-06, "loss": 0.35, "step": 11141 }, { "epoch": 0.46256597076269207, "grad_norm": 2.416942596435547, "learning_rate": 5.845057829465146e-06, "loss": 0.4952, "step": 11142 }, { "epoch": 0.4626074862869034, "grad_norm": 2.242706298828125, "learning_rate": 5.844395178891001e-06, "loss": 0.5179, "step": 11143 }, { "epoch": 0.46264900181111474, "grad_norm": 2.156860113143921, "learning_rate": 5.84373251304995e-06, "loss": 0.5423, "step": 11144 }, { "epoch": 0.46269051733532607, "grad_norm": 2.972034215927124, "learning_rate": 5.84306983195397e-06, "loss": 0.5738, "step": 11145 }, { "epoch": 0.4627320328595374, "grad_norm": 2.462684392929077, "learning_rate": 5.842407135615046e-06, "loss": 0.6871, "step": 11146 }, { "epoch": 0.46277354838374873, "grad_norm": 2.109480381011963, "learning_rate": 5.841744424045157e-06, "loss": 0.3952, "step": 11147 }, { "epoch": 0.46281506390796007, "grad_norm": 2.3581416606903076, "learning_rate": 5.841081697256287e-06, "loss": 0.485, "step": 11148 }, { "epoch": 0.4628565794321714, "grad_norm": 2.07084584236145, "learning_rate": 5.840418955260417e-06, "loss": 0.4332, "step": 11149 }, { "epoch": 0.46289809495638273, "grad_norm": 2.5434861183166504, "learning_rate": 5.83975619806953e-06, "loss": 0.4849, "step": 11150 }, { "epoch": 0.46293961048059407, "grad_norm": 2.5979177951812744, "learning_rate": 5.839093425695609e-06, "loss": 0.5218, "step": 11151 }, { "epoch": 0.4629811260048054, "grad_norm": 2.3122777938842773, "learning_rate": 5.838430638150636e-06, "loss": 0.5573, "step": 11152 }, { "epoch": 0.46302264152901673, "grad_norm": 2.5311903953552246, "learning_rate": 5.837767835446597e-06, "loss": 0.4938, "step": 11153 }, { "epoch": 0.46306415705322806, "grad_norm": 2.6498379707336426, "learning_rate": 5.837105017595474e-06, "loss": 0.6116, "step": 11154 }, { "epoch": 0.46310567257743945, "grad_norm": 2.331529378890991, "learning_rate": 5.83644218460925e-06, "loss": 0.6508, "step": 11155 }, { "epoch": 0.4631471881016508, "grad_norm": 2.174424886703491, "learning_rate": 5.83577933649991e-06, "loss": 0.5311, "step": 11156 }, { "epoch": 0.4631887036258621, "grad_norm": 2.5505850315093994, "learning_rate": 5.835116473279441e-06, "loss": 0.5509, "step": 11157 }, { "epoch": 0.46323021915007345, "grad_norm": 1.9980579614639282, "learning_rate": 5.834453594959823e-06, "loss": 0.6077, "step": 11158 }, { "epoch": 0.4632717346742848, "grad_norm": 2.360630989074707, "learning_rate": 5.833790701553045e-06, "loss": 0.5683, "step": 11159 }, { "epoch": 0.4633132501984961, "grad_norm": 2.2657790184020996, "learning_rate": 5.833127793071089e-06, "loss": 0.3995, "step": 11160 }, { "epoch": 0.46335476572270745, "grad_norm": 2.610614061355591, "learning_rate": 5.832464869525945e-06, "loss": 0.4694, "step": 11161 }, { "epoch": 0.4633962812469188, "grad_norm": 2.578075408935547, "learning_rate": 5.831801930929595e-06, "loss": 0.5214, "step": 11162 }, { "epoch": 0.4634377967711301, "grad_norm": 2.407729148864746, "learning_rate": 5.831138977294025e-06, "loss": 0.4492, "step": 11163 }, { "epoch": 0.46347931229534145, "grad_norm": 2.580395221710205, "learning_rate": 5.830476008631224e-06, "loss": 0.6476, "step": 11164 }, { "epoch": 0.4635208278195528, "grad_norm": 2.590806245803833, "learning_rate": 5.829813024953177e-06, "loss": 0.6192, "step": 11165 }, { "epoch": 0.4635623433437641, "grad_norm": 2.4154446125030518, "learning_rate": 5.829150026271871e-06, "loss": 0.3741, "step": 11166 }, { "epoch": 0.46360385886797545, "grad_norm": 2.6358585357666016, "learning_rate": 5.8284870125992945e-06, "loss": 0.4571, "step": 11167 }, { "epoch": 0.4636453743921868, "grad_norm": 2.5153696537017822, "learning_rate": 5.827823983947434e-06, "loss": 0.6076, "step": 11168 }, { "epoch": 0.4636868899163981, "grad_norm": 1.8224472999572754, "learning_rate": 5.827160940328276e-06, "loss": 0.4614, "step": 11169 }, { "epoch": 0.46372840544060945, "grad_norm": 2.9242489337921143, "learning_rate": 5.826497881753811e-06, "loss": 0.5011, "step": 11170 }, { "epoch": 0.4637699209648208, "grad_norm": 2.354229688644409, "learning_rate": 5.825834808236025e-06, "loss": 0.5292, "step": 11171 }, { "epoch": 0.4638114364890321, "grad_norm": 2.059638261795044, "learning_rate": 5.82517171978691e-06, "loss": 0.4223, "step": 11172 }, { "epoch": 0.46385295201324345, "grad_norm": 2.130911111831665, "learning_rate": 5.824508616418449e-06, "loss": 0.4312, "step": 11173 }, { "epoch": 0.4638944675374548, "grad_norm": 2.2765729427337646, "learning_rate": 5.8238454981426375e-06, "loss": 0.5512, "step": 11174 }, { "epoch": 0.4639359830616661, "grad_norm": 2.459726095199585, "learning_rate": 5.823182364971462e-06, "loss": 0.4852, "step": 11175 }, { "epoch": 0.46397749858587745, "grad_norm": 2.751309394836426, "learning_rate": 5.822519216916909e-06, "loss": 0.5694, "step": 11176 }, { "epoch": 0.4640190141100888, "grad_norm": 2.155918598175049, "learning_rate": 5.821856053990974e-06, "loss": 0.3957, "step": 11177 }, { "epoch": 0.4640605296343001, "grad_norm": 2.941706418991089, "learning_rate": 5.821192876205644e-06, "loss": 0.6371, "step": 11178 }, { "epoch": 0.46410204515851144, "grad_norm": 2.627427577972412, "learning_rate": 5.82052968357291e-06, "loss": 0.564, "step": 11179 }, { "epoch": 0.4641435606827228, "grad_norm": 2.364452838897705, "learning_rate": 5.819866476104763e-06, "loss": 0.5412, "step": 11180 }, { "epoch": 0.4641850762069341, "grad_norm": 2.224332094192505, "learning_rate": 5.819203253813194e-06, "loss": 0.4737, "step": 11181 }, { "epoch": 0.46422659173114544, "grad_norm": 1.8406368494033813, "learning_rate": 5.818540016710193e-06, "loss": 0.3784, "step": 11182 }, { "epoch": 0.4642681072553568, "grad_norm": 2.58587908744812, "learning_rate": 5.817876764807754e-06, "loss": 0.5906, "step": 11183 }, { "epoch": 0.4643096227795681, "grad_norm": 2.61114764213562, "learning_rate": 5.817213498117866e-06, "loss": 0.4878, "step": 11184 }, { "epoch": 0.46435113830377944, "grad_norm": 2.4663116931915283, "learning_rate": 5.816550216652523e-06, "loss": 0.5559, "step": 11185 }, { "epoch": 0.46439265382799083, "grad_norm": 2.767678737640381, "learning_rate": 5.815886920423717e-06, "loss": 0.5998, "step": 11186 }, { "epoch": 0.46443416935220216, "grad_norm": 2.105102062225342, "learning_rate": 5.8152236094434415e-06, "loss": 0.4084, "step": 11187 }, { "epoch": 0.4644756848764135, "grad_norm": 2.230473518371582, "learning_rate": 5.814560283723687e-06, "loss": 0.3911, "step": 11188 }, { "epoch": 0.46451720040062483, "grad_norm": 2.6734132766723633, "learning_rate": 5.813896943276447e-06, "loss": 0.4479, "step": 11189 }, { "epoch": 0.46455871592483616, "grad_norm": 2.7653000354766846, "learning_rate": 5.813233588113716e-06, "loss": 0.5015, "step": 11190 }, { "epoch": 0.4646002314490475, "grad_norm": 2.4987237453460693, "learning_rate": 5.812570218247487e-06, "loss": 0.4646, "step": 11191 }, { "epoch": 0.46464174697325883, "grad_norm": 2.520968198776245, "learning_rate": 5.811906833689756e-06, "loss": 0.4175, "step": 11192 }, { "epoch": 0.46468326249747016, "grad_norm": 2.2050118446350098, "learning_rate": 5.811243434452514e-06, "loss": 0.5311, "step": 11193 }, { "epoch": 0.4647247780216815, "grad_norm": 2.3237011432647705, "learning_rate": 5.8105800205477566e-06, "loss": 0.4851, "step": 11194 }, { "epoch": 0.46476629354589283, "grad_norm": 1.9723560810089111, "learning_rate": 5.809916591987479e-06, "loss": 0.5195, "step": 11195 }, { "epoch": 0.46480780907010416, "grad_norm": 2.969866991043091, "learning_rate": 5.809253148783677e-06, "loss": 0.5208, "step": 11196 }, { "epoch": 0.4648493245943155, "grad_norm": 2.3001160621643066, "learning_rate": 5.808589690948344e-06, "loss": 0.5247, "step": 11197 }, { "epoch": 0.4648908401185268, "grad_norm": 2.3395187854766846, "learning_rate": 5.807926218493477e-06, "loss": 0.4624, "step": 11198 }, { "epoch": 0.46493235564273816, "grad_norm": 2.180206775665283, "learning_rate": 5.807262731431069e-06, "loss": 0.532, "step": 11199 }, { "epoch": 0.4649738711669495, "grad_norm": 2.1589789390563965, "learning_rate": 5.806599229773121e-06, "loss": 0.4086, "step": 11200 }, { "epoch": 0.4650153866911608, "grad_norm": 2.028017282485962, "learning_rate": 5.805935713531625e-06, "loss": 0.5339, "step": 11201 }, { "epoch": 0.46505690221537216, "grad_norm": 2.5944511890411377, "learning_rate": 5.80527218271858e-06, "loss": 0.5009, "step": 11202 }, { "epoch": 0.4650984177395835, "grad_norm": 2.1979551315307617, "learning_rate": 5.804608637345981e-06, "loss": 0.6159, "step": 11203 }, { "epoch": 0.4651399332637948, "grad_norm": 2.2626736164093018, "learning_rate": 5.803945077425826e-06, "loss": 0.4713, "step": 11204 }, { "epoch": 0.46518144878800616, "grad_norm": 2.786001682281494, "learning_rate": 5.803281502970112e-06, "loss": 0.5173, "step": 11205 }, { "epoch": 0.4652229643122175, "grad_norm": 2.7135891914367676, "learning_rate": 5.8026179139908365e-06, "loss": 0.5432, "step": 11206 }, { "epoch": 0.4652644798364288, "grad_norm": 2.1891584396362305, "learning_rate": 5.801954310499999e-06, "loss": 0.5125, "step": 11207 }, { "epoch": 0.46530599536064016, "grad_norm": 2.730679512023926, "learning_rate": 5.801290692509595e-06, "loss": 0.5799, "step": 11208 }, { "epoch": 0.4653475108848515, "grad_norm": 2.5229997634887695, "learning_rate": 5.800627060031625e-06, "loss": 0.4808, "step": 11209 }, { "epoch": 0.4653890264090628, "grad_norm": 2.564549684524536, "learning_rate": 5.799963413078087e-06, "loss": 0.6454, "step": 11210 }, { "epoch": 0.46543054193327416, "grad_norm": 2.660076379776001, "learning_rate": 5.799299751660981e-06, "loss": 0.4896, "step": 11211 }, { "epoch": 0.4654720574574855, "grad_norm": 2.396376848220825, "learning_rate": 5.798636075792304e-06, "loss": 0.5134, "step": 11212 }, { "epoch": 0.4655135729816968, "grad_norm": 2.6602272987365723, "learning_rate": 5.797972385484057e-06, "loss": 0.3988, "step": 11213 }, { "epoch": 0.46555508850590815, "grad_norm": 1.8768733739852905, "learning_rate": 5.797308680748239e-06, "loss": 0.5578, "step": 11214 }, { "epoch": 0.4655966040301195, "grad_norm": 2.357975482940674, "learning_rate": 5.79664496159685e-06, "loss": 0.4441, "step": 11215 }, { "epoch": 0.4656381195543308, "grad_norm": 2.3240554332733154, "learning_rate": 5.79598122804189e-06, "loss": 0.446, "step": 11216 }, { "epoch": 0.4656796350785422, "grad_norm": 2.1400675773620605, "learning_rate": 5.795317480095361e-06, "loss": 0.484, "step": 11217 }, { "epoch": 0.46572115060275354, "grad_norm": 2.7607979774475098, "learning_rate": 5.794653717769263e-06, "loss": 0.6012, "step": 11218 }, { "epoch": 0.4657626661269649, "grad_norm": 2.200911045074463, "learning_rate": 5.793989941075595e-06, "loss": 0.3909, "step": 11219 }, { "epoch": 0.4658041816511762, "grad_norm": 2.740879535675049, "learning_rate": 5.793326150026362e-06, "loss": 0.4911, "step": 11220 }, { "epoch": 0.46584569717538754, "grad_norm": 2.6605639457702637, "learning_rate": 5.792662344633561e-06, "loss": 0.4597, "step": 11221 }, { "epoch": 0.4658872126995989, "grad_norm": 2.4476044178009033, "learning_rate": 5.791998524909199e-06, "loss": 0.5458, "step": 11222 }, { "epoch": 0.4659287282238102, "grad_norm": 1.970864176750183, "learning_rate": 5.791334690865275e-06, "loss": 0.3449, "step": 11223 }, { "epoch": 0.46597024374802154, "grad_norm": 2.409693956375122, "learning_rate": 5.79067084251379e-06, "loss": 0.6659, "step": 11224 }, { "epoch": 0.4660117592722329, "grad_norm": 2.6010308265686035, "learning_rate": 5.79000697986675e-06, "loss": 0.6662, "step": 11225 }, { "epoch": 0.4660532747964442, "grad_norm": 3.245805263519287, "learning_rate": 5.789343102936156e-06, "loss": 0.6185, "step": 11226 }, { "epoch": 0.46609479032065554, "grad_norm": 2.5223400592803955, "learning_rate": 5.788679211734011e-06, "loss": 0.503, "step": 11227 }, { "epoch": 0.46613630584486687, "grad_norm": 2.644155979156494, "learning_rate": 5.788015306272319e-06, "loss": 0.4683, "step": 11228 }, { "epoch": 0.4661778213690782, "grad_norm": 2.2109479904174805, "learning_rate": 5.7873513865630816e-06, "loss": 0.5698, "step": 11229 }, { "epoch": 0.46621933689328954, "grad_norm": 2.09440541267395, "learning_rate": 5.786687452618303e-06, "loss": 0.6332, "step": 11230 }, { "epoch": 0.46626085241750087, "grad_norm": 2.765761613845825, "learning_rate": 5.786023504449991e-06, "loss": 0.5032, "step": 11231 }, { "epoch": 0.4663023679417122, "grad_norm": 2.494697332382202, "learning_rate": 5.785359542070147e-06, "loss": 0.5803, "step": 11232 }, { "epoch": 0.46634388346592354, "grad_norm": 1.853898286819458, "learning_rate": 5.784695565490777e-06, "loss": 0.4327, "step": 11233 }, { "epoch": 0.46638539899013487, "grad_norm": 2.25455379486084, "learning_rate": 5.784031574723883e-06, "loss": 0.4497, "step": 11234 }, { "epoch": 0.4664269145143462, "grad_norm": 2.8017306327819824, "learning_rate": 5.783367569781474e-06, "loss": 0.6687, "step": 11235 }, { "epoch": 0.46646843003855754, "grad_norm": 2.409729480743408, "learning_rate": 5.782703550675552e-06, "loss": 0.5789, "step": 11236 }, { "epoch": 0.46650994556276887, "grad_norm": 2.5783798694610596, "learning_rate": 5.782039517418125e-06, "loss": 0.6541, "step": 11237 }, { "epoch": 0.4665514610869802, "grad_norm": 2.962383508682251, "learning_rate": 5.781375470021199e-06, "loss": 0.6024, "step": 11238 }, { "epoch": 0.46659297661119153, "grad_norm": 2.841557025909424, "learning_rate": 5.7807114084967775e-06, "loss": 0.62, "step": 11239 }, { "epoch": 0.46663449213540287, "grad_norm": 2.4321093559265137, "learning_rate": 5.78004733285687e-06, "loss": 0.556, "step": 11240 }, { "epoch": 0.4666760076596142, "grad_norm": 2.646892786026001, "learning_rate": 5.7793832431134825e-06, "loss": 0.4601, "step": 11241 }, { "epoch": 0.46671752318382553, "grad_norm": 2.0123987197875977, "learning_rate": 5.778719139278619e-06, "loss": 0.3503, "step": 11242 }, { "epoch": 0.46675903870803687, "grad_norm": 2.460667848587036, "learning_rate": 5.77805502136429e-06, "loss": 0.5037, "step": 11243 }, { "epoch": 0.4668005542322482, "grad_norm": 2.415971517562866, "learning_rate": 5.7773908893825025e-06, "loss": 0.48, "step": 11244 }, { "epoch": 0.46684206975645953, "grad_norm": 2.3665337562561035, "learning_rate": 5.7767267433452636e-06, "loss": 0.7098, "step": 11245 }, { "epoch": 0.46688358528067087, "grad_norm": 2.5292370319366455, "learning_rate": 5.776062583264581e-06, "loss": 0.6874, "step": 11246 }, { "epoch": 0.4669251008048822, "grad_norm": 2.7178292274475098, "learning_rate": 5.775398409152463e-06, "loss": 0.5484, "step": 11247 }, { "epoch": 0.4669666163290936, "grad_norm": 2.7159385681152344, "learning_rate": 5.774734221020919e-06, "loss": 0.5053, "step": 11248 }, { "epoch": 0.4670081318533049, "grad_norm": 2.6054604053497314, "learning_rate": 5.774070018881955e-06, "loss": 0.431, "step": 11249 }, { "epoch": 0.46704964737751625, "grad_norm": 2.1773288249969482, "learning_rate": 5.773405802747585e-06, "loss": 0.436, "step": 11250 }, { "epoch": 0.4670911629017276, "grad_norm": 2.374016046524048, "learning_rate": 5.772741572629814e-06, "loss": 0.5229, "step": 11251 }, { "epoch": 0.4671326784259389, "grad_norm": 2.3251307010650635, "learning_rate": 5.772077328540652e-06, "loss": 0.5054, "step": 11252 }, { "epoch": 0.46717419395015025, "grad_norm": 2.346177816390991, "learning_rate": 5.77141307049211e-06, "loss": 0.5703, "step": 11253 }, { "epoch": 0.4672157094743616, "grad_norm": 2.948150157928467, "learning_rate": 5.770748798496198e-06, "loss": 0.589, "step": 11254 }, { "epoch": 0.4672572249985729, "grad_norm": 2.1930248737335205, "learning_rate": 5.770084512564923e-06, "loss": 0.5557, "step": 11255 }, { "epoch": 0.46729874052278425, "grad_norm": 2.356144905090332, "learning_rate": 5.769420212710299e-06, "loss": 0.5608, "step": 11256 }, { "epoch": 0.4673402560469956, "grad_norm": 2.385308265686035, "learning_rate": 5.768755898944337e-06, "loss": 0.519, "step": 11257 }, { "epoch": 0.4673817715712069, "grad_norm": 2.2399699687957764, "learning_rate": 5.768091571279046e-06, "loss": 0.5899, "step": 11258 }, { "epoch": 0.46742328709541825, "grad_norm": 2.467514991760254, "learning_rate": 5.767427229726438e-06, "loss": 0.5451, "step": 11259 }, { "epoch": 0.4674648026196296, "grad_norm": 2.339204788208008, "learning_rate": 5.766762874298523e-06, "loss": 0.5474, "step": 11260 }, { "epoch": 0.4675063181438409, "grad_norm": 1.994442105293274, "learning_rate": 5.766098505007318e-06, "loss": 0.4951, "step": 11261 }, { "epoch": 0.46754783366805225, "grad_norm": 2.838294744491577, "learning_rate": 5.765434121864828e-06, "loss": 0.5414, "step": 11262 }, { "epoch": 0.4675893491922636, "grad_norm": 2.7671959400177, "learning_rate": 5.76476972488307e-06, "loss": 0.4537, "step": 11263 }, { "epoch": 0.4676308647164749, "grad_norm": 2.572277307510376, "learning_rate": 5.764105314074055e-06, "loss": 0.5056, "step": 11264 }, { "epoch": 0.46767238024068625, "grad_norm": 1.9890294075012207, "learning_rate": 5.763440889449796e-06, "loss": 0.5155, "step": 11265 }, { "epoch": 0.4677138957648976, "grad_norm": 2.430014133453369, "learning_rate": 5.7627764510223036e-06, "loss": 0.4587, "step": 11266 }, { "epoch": 0.4677554112891089, "grad_norm": 1.9635950326919556, "learning_rate": 5.762111998803595e-06, "loss": 0.4607, "step": 11267 }, { "epoch": 0.46779692681332025, "grad_norm": 3.064962387084961, "learning_rate": 5.7614475328056804e-06, "loss": 0.5974, "step": 11268 }, { "epoch": 0.4678384423375316, "grad_norm": 2.442462205886841, "learning_rate": 5.7607830530405765e-06, "loss": 0.5508, "step": 11269 }, { "epoch": 0.4678799578617429, "grad_norm": 2.6587045192718506, "learning_rate": 5.7601185595202944e-06, "loss": 0.49, "step": 11270 }, { "epoch": 0.46792147338595425, "grad_norm": 2.276974678039551, "learning_rate": 5.7594540522568495e-06, "loss": 0.5247, "step": 11271 }, { "epoch": 0.4679629889101656, "grad_norm": 2.0453577041625977, "learning_rate": 5.7587895312622576e-06, "loss": 0.496, "step": 11272 }, { "epoch": 0.4680045044343769, "grad_norm": 2.2152774333953857, "learning_rate": 5.7581249965485305e-06, "loss": 0.5135, "step": 11273 }, { "epoch": 0.46804601995858824, "grad_norm": 2.500833034515381, "learning_rate": 5.757460448127688e-06, "loss": 0.57, "step": 11274 }, { "epoch": 0.4680875354827996, "grad_norm": 2.181638240814209, "learning_rate": 5.756795886011739e-06, "loss": 0.4882, "step": 11275 }, { "epoch": 0.4681290510070109, "grad_norm": 2.8045125007629395, "learning_rate": 5.7561313102127045e-06, "loss": 0.5317, "step": 11276 }, { "epoch": 0.46817056653122224, "grad_norm": 2.2141947746276855, "learning_rate": 5.755466720742599e-06, "loss": 0.3706, "step": 11277 }, { "epoch": 0.4682120820554336, "grad_norm": 2.3547544479370117, "learning_rate": 5.754802117613435e-06, "loss": 0.4711, "step": 11278 }, { "epoch": 0.46825359757964496, "grad_norm": 2.744218349456787, "learning_rate": 5.754137500837232e-06, "loss": 0.5503, "step": 11279 }, { "epoch": 0.4682951131038563, "grad_norm": 2.145962715148926, "learning_rate": 5.753472870426006e-06, "loss": 0.4169, "step": 11280 }, { "epoch": 0.46833662862806763, "grad_norm": 2.6135478019714355, "learning_rate": 5.7528082263917716e-06, "loss": 0.565, "step": 11281 }, { "epoch": 0.46837814415227896, "grad_norm": 2.1956162452697754, "learning_rate": 5.752143568746549e-06, "loss": 0.5749, "step": 11282 }, { "epoch": 0.4684196596764903, "grad_norm": 2.484757661819458, "learning_rate": 5.751478897502353e-06, "loss": 0.5649, "step": 11283 }, { "epoch": 0.46846117520070163, "grad_norm": 2.0960686206817627, "learning_rate": 5.750814212671202e-06, "loss": 0.4736, "step": 11284 }, { "epoch": 0.46850269072491296, "grad_norm": 2.1293222904205322, "learning_rate": 5.750149514265115e-06, "loss": 0.3471, "step": 11285 }, { "epoch": 0.4685442062491243, "grad_norm": 2.3268699645996094, "learning_rate": 5.749484802296107e-06, "loss": 0.5113, "step": 11286 }, { "epoch": 0.46858572177333563, "grad_norm": 2.947075128555298, "learning_rate": 5.7488200767761984e-06, "loss": 0.5591, "step": 11287 }, { "epoch": 0.46862723729754696, "grad_norm": 2.5355734825134277, "learning_rate": 5.748155337717406e-06, "loss": 0.6227, "step": 11288 }, { "epoch": 0.4686687528217583, "grad_norm": 2.311790943145752, "learning_rate": 5.7474905851317505e-06, "loss": 0.6202, "step": 11289 }, { "epoch": 0.4687102683459696, "grad_norm": 2.3463351726531982, "learning_rate": 5.746825819031251e-06, "loss": 0.4892, "step": 11290 }, { "epoch": 0.46875178387018096, "grad_norm": 2.031233072280884, "learning_rate": 5.746161039427924e-06, "loss": 0.4838, "step": 11291 }, { "epoch": 0.4687932993943923, "grad_norm": 2.380153179168701, "learning_rate": 5.745496246333791e-06, "loss": 0.4595, "step": 11292 }, { "epoch": 0.4688348149186036, "grad_norm": 2.9408223628997803, "learning_rate": 5.744831439760871e-06, "loss": 0.6535, "step": 11293 }, { "epoch": 0.46887633044281496, "grad_norm": 2.65518856048584, "learning_rate": 5.744166619721182e-06, "loss": 0.6065, "step": 11294 }, { "epoch": 0.4689178459670263, "grad_norm": 2.2471461296081543, "learning_rate": 5.743501786226749e-06, "loss": 0.5536, "step": 11295 }, { "epoch": 0.4689593614912376, "grad_norm": 2.687873125076294, "learning_rate": 5.742836939289587e-06, "loss": 0.5417, "step": 11296 }, { "epoch": 0.46900087701544896, "grad_norm": 2.4017999172210693, "learning_rate": 5.742172078921718e-06, "loss": 0.5425, "step": 11297 }, { "epoch": 0.4690423925396603, "grad_norm": 2.3578484058380127, "learning_rate": 5.741507205135168e-06, "loss": 0.5033, "step": 11298 }, { "epoch": 0.4690839080638716, "grad_norm": 2.149357557296753, "learning_rate": 5.74084231794195e-06, "loss": 0.5502, "step": 11299 }, { "epoch": 0.46912542358808296, "grad_norm": 2.3172545433044434, "learning_rate": 5.740177417354091e-06, "loss": 0.5913, "step": 11300 }, { "epoch": 0.4691669391122943, "grad_norm": 1.8031392097473145, "learning_rate": 5.739512503383611e-06, "loss": 0.3984, "step": 11301 }, { "epoch": 0.4692084546365056, "grad_norm": 1.9581812620162964, "learning_rate": 5.738847576042532e-06, "loss": 0.4958, "step": 11302 }, { "epoch": 0.46924997016071696, "grad_norm": 1.7539972066879272, "learning_rate": 5.738182635342876e-06, "loss": 0.4236, "step": 11303 }, { "epoch": 0.4692914856849283, "grad_norm": 2.5372161865234375, "learning_rate": 5.737517681296666e-06, "loss": 0.5504, "step": 11304 }, { "epoch": 0.4693330012091396, "grad_norm": 2.1238396167755127, "learning_rate": 5.736852713915921e-06, "loss": 0.3061, "step": 11305 }, { "epoch": 0.46937451673335095, "grad_norm": 2.444338798522949, "learning_rate": 5.736187733212669e-06, "loss": 0.5, "step": 11306 }, { "epoch": 0.4694160322575623, "grad_norm": 2.662371873855591, "learning_rate": 5.73552273919893e-06, "loss": 0.642, "step": 11307 }, { "epoch": 0.4694575477817736, "grad_norm": 2.339308023452759, "learning_rate": 5.734857731886728e-06, "loss": 0.4503, "step": 11308 }, { "epoch": 0.469499063305985, "grad_norm": 2.3728573322296143, "learning_rate": 5.734192711288085e-06, "loss": 0.6294, "step": 11309 }, { "epoch": 0.46954057883019634, "grad_norm": 2.284207344055176, "learning_rate": 5.733527677415027e-06, "loss": 0.3993, "step": 11310 }, { "epoch": 0.4695820943544077, "grad_norm": 2.3246772289276123, "learning_rate": 5.732862630279578e-06, "loss": 0.3751, "step": 11311 }, { "epoch": 0.469623609878619, "grad_norm": 2.1660633087158203, "learning_rate": 5.73219756989376e-06, "loss": 0.4351, "step": 11312 }, { "epoch": 0.46966512540283034, "grad_norm": 2.591775417327881, "learning_rate": 5.7315324962696e-06, "loss": 0.4915, "step": 11313 }, { "epoch": 0.4697066409270417, "grad_norm": 2.072736978530884, "learning_rate": 5.730867409419121e-06, "loss": 0.525, "step": 11314 }, { "epoch": 0.469748156451253, "grad_norm": 2.3945810794830322, "learning_rate": 5.730202309354349e-06, "loss": 0.5239, "step": 11315 }, { "epoch": 0.46978967197546434, "grad_norm": 2.449716329574585, "learning_rate": 5.729537196087309e-06, "loss": 0.5195, "step": 11316 }, { "epoch": 0.4698311874996757, "grad_norm": 2.2560253143310547, "learning_rate": 5.728872069630027e-06, "loss": 0.4543, "step": 11317 }, { "epoch": 0.469872703023887, "grad_norm": 2.181866407394409, "learning_rate": 5.728206929994526e-06, "loss": 0.4653, "step": 11318 }, { "epoch": 0.46991421854809834, "grad_norm": 3.0394747257232666, "learning_rate": 5.727541777192835e-06, "loss": 0.5749, "step": 11319 }, { "epoch": 0.46995573407230967, "grad_norm": 2.00142240524292, "learning_rate": 5.726876611236978e-06, "loss": 0.5235, "step": 11320 }, { "epoch": 0.469997249596521, "grad_norm": 2.050757646560669, "learning_rate": 5.726211432138983e-06, "loss": 0.4048, "step": 11321 }, { "epoch": 0.47003876512073234, "grad_norm": 2.632974624633789, "learning_rate": 5.725546239910875e-06, "loss": 0.4669, "step": 11322 }, { "epoch": 0.47008028064494367, "grad_norm": 2.017533302307129, "learning_rate": 5.724881034564682e-06, "loss": 0.2487, "step": 11323 }, { "epoch": 0.470121796169155, "grad_norm": 2.2935898303985596, "learning_rate": 5.7242158161124326e-06, "loss": 0.4979, "step": 11324 }, { "epoch": 0.47016331169336634, "grad_norm": 2.281567096710205, "learning_rate": 5.723550584566151e-06, "loss": 0.5363, "step": 11325 }, { "epoch": 0.47020482721757767, "grad_norm": 2.371119737625122, "learning_rate": 5.722885339937867e-06, "loss": 0.4238, "step": 11326 }, { "epoch": 0.470246342741789, "grad_norm": 2.3900299072265625, "learning_rate": 5.722220082239608e-06, "loss": 0.4595, "step": 11327 }, { "epoch": 0.47028785826600034, "grad_norm": 2.213879346847534, "learning_rate": 5.721554811483401e-06, "loss": 0.3964, "step": 11328 }, { "epoch": 0.47032937379021167, "grad_norm": 2.450943946838379, "learning_rate": 5.720889527681276e-06, "loss": 0.4761, "step": 11329 }, { "epoch": 0.470370889314423, "grad_norm": 2.365025520324707, "learning_rate": 5.720224230845261e-06, "loss": 0.5762, "step": 11330 }, { "epoch": 0.47041240483863433, "grad_norm": 2.028461456298828, "learning_rate": 5.719558920987382e-06, "loss": 0.4557, "step": 11331 }, { "epoch": 0.47045392036284567, "grad_norm": 2.329726219177246, "learning_rate": 5.7188935981196725e-06, "loss": 0.5549, "step": 11332 }, { "epoch": 0.470495435887057, "grad_norm": 2.794485569000244, "learning_rate": 5.718228262254157e-06, "loss": 0.5223, "step": 11333 }, { "epoch": 0.47053695141126833, "grad_norm": 2.520129442214966, "learning_rate": 5.71756291340287e-06, "loss": 0.5027, "step": 11334 }, { "epoch": 0.47057846693547967, "grad_norm": 2.341280937194824, "learning_rate": 5.716897551577838e-06, "loss": 0.5784, "step": 11335 }, { "epoch": 0.470619982459691, "grad_norm": 2.5718607902526855, "learning_rate": 5.71623217679109e-06, "loss": 0.5579, "step": 11336 }, { "epoch": 0.47066149798390233, "grad_norm": 3.1409969329833984, "learning_rate": 5.71556678905466e-06, "loss": 0.5476, "step": 11337 }, { "epoch": 0.47070301350811367, "grad_norm": 2.5684399604797363, "learning_rate": 5.714901388380575e-06, "loss": 0.5701, "step": 11338 }, { "epoch": 0.470744529032325, "grad_norm": 2.1483867168426514, "learning_rate": 5.7142359747808675e-06, "loss": 0.3859, "step": 11339 }, { "epoch": 0.4707860445565364, "grad_norm": 2.3876287937164307, "learning_rate": 5.713570548267568e-06, "loss": 0.5407, "step": 11340 }, { "epoch": 0.4708275600807477, "grad_norm": 2.9645049571990967, "learning_rate": 5.712905108852707e-06, "loss": 0.6413, "step": 11341 }, { "epoch": 0.47086907560495905, "grad_norm": 2.0766232013702393, "learning_rate": 5.712239656548317e-06, "loss": 0.4774, "step": 11342 }, { "epoch": 0.4709105911291704, "grad_norm": 2.2621943950653076, "learning_rate": 5.711574191366427e-06, "loss": 0.5402, "step": 11343 }, { "epoch": 0.4709521066533817, "grad_norm": 2.7837185859680176, "learning_rate": 5.710908713319071e-06, "loss": 0.5524, "step": 11344 }, { "epoch": 0.47099362217759305, "grad_norm": 2.3762028217315674, "learning_rate": 5.710243222418281e-06, "loss": 0.4524, "step": 11345 }, { "epoch": 0.4710351377018044, "grad_norm": 2.3632519245147705, "learning_rate": 5.709577718676088e-06, "loss": 0.6334, "step": 11346 }, { "epoch": 0.4710766532260157, "grad_norm": 3.339714765548706, "learning_rate": 5.708912202104526e-06, "loss": 0.5375, "step": 11347 }, { "epoch": 0.47111816875022705, "grad_norm": 2.148983955383301, "learning_rate": 5.708246672715627e-06, "loss": 0.5132, "step": 11348 }, { "epoch": 0.4711596842744384, "grad_norm": 2.484548568725586, "learning_rate": 5.707581130521424e-06, "loss": 0.5562, "step": 11349 }, { "epoch": 0.4712011997986497, "grad_norm": 2.9047529697418213, "learning_rate": 5.70691557553395e-06, "loss": 0.5222, "step": 11350 }, { "epoch": 0.47124271532286105, "grad_norm": 1.9956837892532349, "learning_rate": 5.706250007765239e-06, "loss": 0.4676, "step": 11351 }, { "epoch": 0.4712842308470724, "grad_norm": 2.1912283897399902, "learning_rate": 5.705584427227325e-06, "loss": 0.5642, "step": 11352 }, { "epoch": 0.4713257463712837, "grad_norm": 2.737274646759033, "learning_rate": 5.704918833932238e-06, "loss": 0.6343, "step": 11353 }, { "epoch": 0.47136726189549505, "grad_norm": 2.404981851577759, "learning_rate": 5.7042532278920185e-06, "loss": 0.5002, "step": 11354 }, { "epoch": 0.4714087774197064, "grad_norm": 3.08111310005188, "learning_rate": 5.7035876091186974e-06, "loss": 0.4453, "step": 11355 }, { "epoch": 0.4714502929439177, "grad_norm": 2.289095401763916, "learning_rate": 5.7029219776243095e-06, "loss": 0.6507, "step": 11356 }, { "epoch": 0.47149180846812905, "grad_norm": 2.2362353801727295, "learning_rate": 5.702256333420888e-06, "loss": 0.4886, "step": 11357 }, { "epoch": 0.4715333239923404, "grad_norm": 2.1638026237487793, "learning_rate": 5.701590676520471e-06, "loss": 0.4206, "step": 11358 }, { "epoch": 0.4715748395165517, "grad_norm": 2.1115851402282715, "learning_rate": 5.7009250069350915e-06, "loss": 0.3542, "step": 11359 }, { "epoch": 0.47161635504076305, "grad_norm": 2.240619421005249, "learning_rate": 5.700259324676787e-06, "loss": 0.5411, "step": 11360 }, { "epoch": 0.4716578705649744, "grad_norm": 2.340941905975342, "learning_rate": 5.699593629757591e-06, "loss": 0.4525, "step": 11361 }, { "epoch": 0.4716993860891857, "grad_norm": 2.334996461868286, "learning_rate": 5.6989279221895415e-06, "loss": 0.4481, "step": 11362 }, { "epoch": 0.47174090161339705, "grad_norm": 3.213648557662964, "learning_rate": 5.698262201984672e-06, "loss": 0.6793, "step": 11363 }, { "epoch": 0.4717824171376084, "grad_norm": 2.477015972137451, "learning_rate": 5.6975964691550215e-06, "loss": 0.6465, "step": 11364 }, { "epoch": 0.4718239326618197, "grad_norm": 2.4987759590148926, "learning_rate": 5.696930723712626e-06, "loss": 0.5002, "step": 11365 }, { "epoch": 0.47186544818603104, "grad_norm": 2.4308207035064697, "learning_rate": 5.696264965669523e-06, "loss": 0.5018, "step": 11366 }, { "epoch": 0.4719069637102424, "grad_norm": 2.2954678535461426, "learning_rate": 5.695599195037748e-06, "loss": 0.426, "step": 11367 }, { "epoch": 0.4719484792344537, "grad_norm": 2.385877847671509, "learning_rate": 5.694933411829339e-06, "loss": 0.6173, "step": 11368 }, { "epoch": 0.47198999475866504, "grad_norm": 2.28737473487854, "learning_rate": 5.694267616056334e-06, "loss": 0.4646, "step": 11369 }, { "epoch": 0.4720315102828764, "grad_norm": 2.423370599746704, "learning_rate": 5.6936018077307695e-06, "loss": 0.61, "step": 11370 }, { "epoch": 0.47207302580708776, "grad_norm": 3.143911600112915, "learning_rate": 5.692935986864685e-06, "loss": 0.5692, "step": 11371 }, { "epoch": 0.4721145413312991, "grad_norm": 3.0918943881988525, "learning_rate": 5.692270153470118e-06, "loss": 0.6372, "step": 11372 }, { "epoch": 0.47215605685551043, "grad_norm": 2.528188705444336, "learning_rate": 5.6916043075591065e-06, "loss": 0.5824, "step": 11373 }, { "epoch": 0.47219757237972176, "grad_norm": 2.4238779544830322, "learning_rate": 5.690938449143691e-06, "loss": 0.5198, "step": 11374 }, { "epoch": 0.4722390879039331, "grad_norm": 2.6613354682922363, "learning_rate": 5.690272578235908e-06, "loss": 0.597, "step": 11375 }, { "epoch": 0.47228060342814443, "grad_norm": 2.308727264404297, "learning_rate": 5.689606694847798e-06, "loss": 0.6241, "step": 11376 }, { "epoch": 0.47232211895235576, "grad_norm": 2.5806658267974854, "learning_rate": 5.6889407989914e-06, "loss": 0.5467, "step": 11377 }, { "epoch": 0.4723636344765671, "grad_norm": 2.9972198009490967, "learning_rate": 5.688274890678756e-06, "loss": 0.6461, "step": 11378 }, { "epoch": 0.47240515000077843, "grad_norm": 2.172003746032715, "learning_rate": 5.6876089699219016e-06, "loss": 0.4318, "step": 11379 }, { "epoch": 0.47244666552498976, "grad_norm": 2.0858664512634277, "learning_rate": 5.686943036732878e-06, "loss": 0.5266, "step": 11380 }, { "epoch": 0.4724881810492011, "grad_norm": 2.2394232749938965, "learning_rate": 5.686277091123729e-06, "loss": 0.5144, "step": 11381 }, { "epoch": 0.4725296965734124, "grad_norm": 3.104402542114258, "learning_rate": 5.685611133106491e-06, "loss": 0.3712, "step": 11382 }, { "epoch": 0.47257121209762376, "grad_norm": 2.682882308959961, "learning_rate": 5.684945162693205e-06, "loss": 0.54, "step": 11383 }, { "epoch": 0.4726127276218351, "grad_norm": 2.2826554775238037, "learning_rate": 5.684279179895915e-06, "loss": 0.7013, "step": 11384 }, { "epoch": 0.4726542431460464, "grad_norm": 1.9755877256393433, "learning_rate": 5.683613184726658e-06, "loss": 0.5452, "step": 11385 }, { "epoch": 0.47269575867025776, "grad_norm": 2.104410171508789, "learning_rate": 5.6829471771974796e-06, "loss": 0.4823, "step": 11386 }, { "epoch": 0.4727372741944691, "grad_norm": 3.228484630584717, "learning_rate": 5.682281157320418e-06, "loss": 0.5536, "step": 11387 }, { "epoch": 0.4727787897186804, "grad_norm": 2.745270252227783, "learning_rate": 5.681615125107518e-06, "loss": 0.67, "step": 11388 }, { "epoch": 0.47282030524289176, "grad_norm": 2.537006139755249, "learning_rate": 5.6809490805708175e-06, "loss": 0.5072, "step": 11389 }, { "epoch": 0.4728618207671031, "grad_norm": 2.6572837829589844, "learning_rate": 5.680283023722363e-06, "loss": 0.5014, "step": 11390 }, { "epoch": 0.4729033362913144, "grad_norm": 2.535916328430176, "learning_rate": 5.679616954574195e-06, "loss": 0.628, "step": 11391 }, { "epoch": 0.47294485181552576, "grad_norm": 2.104055166244507, "learning_rate": 5.678950873138358e-06, "loss": 0.4121, "step": 11392 }, { "epoch": 0.4729863673397371, "grad_norm": 2.075838088989258, "learning_rate": 5.678284779426891e-06, "loss": 0.4064, "step": 11393 }, { "epoch": 0.4730278828639484, "grad_norm": 2.455048084259033, "learning_rate": 5.677618673451842e-06, "loss": 0.5179, "step": 11394 }, { "epoch": 0.47306939838815976, "grad_norm": 2.2941038608551025, "learning_rate": 5.6769525552252514e-06, "loss": 0.5465, "step": 11395 }, { "epoch": 0.4731109139123711, "grad_norm": 2.6654186248779297, "learning_rate": 5.6762864247591635e-06, "loss": 0.4778, "step": 11396 }, { "epoch": 0.4731524294365824, "grad_norm": 2.4295365810394287, "learning_rate": 5.675620282065621e-06, "loss": 0.6285, "step": 11397 }, { "epoch": 0.47319394496079376, "grad_norm": 2.1666996479034424, "learning_rate": 5.67495412715667e-06, "loss": 0.4542, "step": 11398 }, { "epoch": 0.4732354604850051, "grad_norm": 2.791975736618042, "learning_rate": 5.6742879600443544e-06, "loss": 0.502, "step": 11399 }, { "epoch": 0.4732769760092164, "grad_norm": 2.6593716144561768, "learning_rate": 5.673621780740717e-06, "loss": 0.5965, "step": 11400 }, { "epoch": 0.47331849153342775, "grad_norm": 2.979036331176758, "learning_rate": 5.672955589257805e-06, "loss": 0.7013, "step": 11401 }, { "epoch": 0.47336000705763914, "grad_norm": 2.625769853591919, "learning_rate": 5.6722893856076596e-06, "loss": 0.5824, "step": 11402 }, { "epoch": 0.4734015225818505, "grad_norm": 2.8890368938446045, "learning_rate": 5.67162316980233e-06, "loss": 0.6287, "step": 11403 }, { "epoch": 0.4734430381060618, "grad_norm": 2.5301995277404785, "learning_rate": 5.67095694185386e-06, "loss": 0.4451, "step": 11404 }, { "epoch": 0.47348455363027314, "grad_norm": 2.4022579193115234, "learning_rate": 5.670290701774295e-06, "loss": 0.5668, "step": 11405 }, { "epoch": 0.4735260691544845, "grad_norm": 2.2865607738494873, "learning_rate": 5.66962444957568e-06, "loss": 0.4341, "step": 11406 }, { "epoch": 0.4735675846786958, "grad_norm": 2.501642942428589, "learning_rate": 5.6689581852700634e-06, "loss": 0.5692, "step": 11407 }, { "epoch": 0.47360910020290714, "grad_norm": 2.0956621170043945, "learning_rate": 5.66829190886949e-06, "loss": 0.4847, "step": 11408 }, { "epoch": 0.4736506157271185, "grad_norm": 2.554948568344116, "learning_rate": 5.667625620386005e-06, "loss": 0.5309, "step": 11409 }, { "epoch": 0.4736921312513298, "grad_norm": 2.398014783859253, "learning_rate": 5.666959319831657e-06, "loss": 0.5419, "step": 11410 }, { "epoch": 0.47373364677554114, "grad_norm": 2.588747262954712, "learning_rate": 5.666293007218492e-06, "loss": 0.5211, "step": 11411 }, { "epoch": 0.4737751622997525, "grad_norm": 2.618645191192627, "learning_rate": 5.6656266825585585e-06, "loss": 0.601, "step": 11412 }, { "epoch": 0.4738166778239638, "grad_norm": 2.976327419281006, "learning_rate": 5.6649603458639e-06, "loss": 0.5202, "step": 11413 }, { "epoch": 0.47385819334817514, "grad_norm": 2.331040620803833, "learning_rate": 5.664293997146569e-06, "loss": 0.5201, "step": 11414 }, { "epoch": 0.47389970887238647, "grad_norm": 2.691850185394287, "learning_rate": 5.663627636418611e-06, "loss": 0.452, "step": 11415 }, { "epoch": 0.4739412243965978, "grad_norm": 2.252518892288208, "learning_rate": 5.662961263692073e-06, "loss": 0.4925, "step": 11416 }, { "epoch": 0.47398273992080914, "grad_norm": 3.7592825889587402, "learning_rate": 5.662294878979004e-06, "loss": 0.4954, "step": 11417 }, { "epoch": 0.47402425544502047, "grad_norm": 2.3041818141937256, "learning_rate": 5.661628482291455e-06, "loss": 0.6097, "step": 11418 }, { "epoch": 0.4740657709692318, "grad_norm": 1.9404938220977783, "learning_rate": 5.660962073641472e-06, "loss": 0.5155, "step": 11419 }, { "epoch": 0.47410728649344314, "grad_norm": 2.3984265327453613, "learning_rate": 5.660295653041102e-06, "loss": 0.473, "step": 11420 }, { "epoch": 0.47414880201765447, "grad_norm": 2.8265867233276367, "learning_rate": 5.659629220502399e-06, "loss": 0.5622, "step": 11421 }, { "epoch": 0.4741903175418658, "grad_norm": 2.488611936569214, "learning_rate": 5.658962776037407e-06, "loss": 0.638, "step": 11422 }, { "epoch": 0.47423183306607714, "grad_norm": 2.514373779296875, "learning_rate": 5.658296319658179e-06, "loss": 0.5132, "step": 11423 }, { "epoch": 0.47427334859028847, "grad_norm": 2.566715717315674, "learning_rate": 5.6576298513767635e-06, "loss": 0.3599, "step": 11424 }, { "epoch": 0.4743148641144998, "grad_norm": 2.688400983810425, "learning_rate": 5.656963371205211e-06, "loss": 0.4637, "step": 11425 }, { "epoch": 0.47435637963871113, "grad_norm": 2.2161154747009277, "learning_rate": 5.656296879155572e-06, "loss": 0.4946, "step": 11426 }, { "epoch": 0.47439789516292247, "grad_norm": 1.9442552328109741, "learning_rate": 5.655630375239895e-06, "loss": 0.3954, "step": 11427 }, { "epoch": 0.4744394106871338, "grad_norm": 1.9777767658233643, "learning_rate": 5.654963859470232e-06, "loss": 0.4179, "step": 11428 }, { "epoch": 0.47448092621134513, "grad_norm": 3.0468149185180664, "learning_rate": 5.654297331858635e-06, "loss": 0.5244, "step": 11429 }, { "epoch": 0.47452244173555647, "grad_norm": 2.259803295135498, "learning_rate": 5.653630792417153e-06, "loss": 0.489, "step": 11430 }, { "epoch": 0.4745639572597678, "grad_norm": 1.941776156425476, "learning_rate": 5.652964241157838e-06, "loss": 0.4812, "step": 11431 }, { "epoch": 0.47460547278397913, "grad_norm": 2.350799322128296, "learning_rate": 5.652297678092742e-06, "loss": 0.5209, "step": 11432 }, { "epoch": 0.4746469883081905, "grad_norm": 4.44063663482666, "learning_rate": 5.651631103233914e-06, "loss": 0.661, "step": 11433 }, { "epoch": 0.47468850383240185, "grad_norm": 2.2689666748046875, "learning_rate": 5.650964516593409e-06, "loss": 0.6039, "step": 11434 }, { "epoch": 0.4747300193566132, "grad_norm": 2.4983623027801514, "learning_rate": 5.650297918183277e-06, "loss": 0.5255, "step": 11435 }, { "epoch": 0.4747715348808245, "grad_norm": 2.808793067932129, "learning_rate": 5.649631308015573e-06, "loss": 0.6108, "step": 11436 }, { "epoch": 0.47481305040503585, "grad_norm": 2.3062171936035156, "learning_rate": 5.648964686102346e-06, "loss": 0.5374, "step": 11437 }, { "epoch": 0.4748545659292472, "grad_norm": 3.032346725463867, "learning_rate": 5.648298052455652e-06, "loss": 0.5284, "step": 11438 }, { "epoch": 0.4748960814534585, "grad_norm": 2.4894533157348633, "learning_rate": 5.64763140708754e-06, "loss": 0.4923, "step": 11439 }, { "epoch": 0.47493759697766985, "grad_norm": 2.70101261138916, "learning_rate": 5.6469647500100675e-06, "loss": 0.4822, "step": 11440 }, { "epoch": 0.4749791125018812, "grad_norm": 2.475524663925171, "learning_rate": 5.646298081235285e-06, "loss": 0.5631, "step": 11441 }, { "epoch": 0.4750206280260925, "grad_norm": 2.374570846557617, "learning_rate": 5.645631400775248e-06, "loss": 0.5689, "step": 11442 }, { "epoch": 0.47506214355030385, "grad_norm": 2.634401798248291, "learning_rate": 5.644964708642008e-06, "loss": 0.5056, "step": 11443 }, { "epoch": 0.4751036590745152, "grad_norm": 2.4816384315490723, "learning_rate": 5.644298004847621e-06, "loss": 0.4425, "step": 11444 }, { "epoch": 0.4751451745987265, "grad_norm": 2.2405734062194824, "learning_rate": 5.64363128940414e-06, "loss": 0.5664, "step": 11445 }, { "epoch": 0.47518669012293785, "grad_norm": 2.3219265937805176, "learning_rate": 5.64296456232362e-06, "loss": 0.4183, "step": 11446 }, { "epoch": 0.4752282056471492, "grad_norm": 2.5720040798187256, "learning_rate": 5.642297823618115e-06, "loss": 0.6576, "step": 11447 }, { "epoch": 0.4752697211713605, "grad_norm": 2.645583391189575, "learning_rate": 5.64163107329968e-06, "loss": 0.5884, "step": 11448 }, { "epoch": 0.47531123669557185, "grad_norm": 3.0702056884765625, "learning_rate": 5.640964311380372e-06, "loss": 0.5559, "step": 11449 }, { "epoch": 0.4753527522197832, "grad_norm": 2.093839168548584, "learning_rate": 5.640297537872241e-06, "loss": 0.5156, "step": 11450 }, { "epoch": 0.4753942677439945, "grad_norm": 2.5256783962249756, "learning_rate": 5.639630752787349e-06, "loss": 0.5074, "step": 11451 }, { "epoch": 0.47543578326820585, "grad_norm": 2.2530200481414795, "learning_rate": 5.638963956137748e-06, "loss": 0.459, "step": 11452 }, { "epoch": 0.4754772987924172, "grad_norm": 2.4872827529907227, "learning_rate": 5.638297147935494e-06, "loss": 0.5358, "step": 11453 }, { "epoch": 0.4755188143166285, "grad_norm": 2.2055623531341553, "learning_rate": 5.637630328192642e-06, "loss": 0.5051, "step": 11454 }, { "epoch": 0.47556032984083985, "grad_norm": 2.433833360671997, "learning_rate": 5.636963496921253e-06, "loss": 0.4063, "step": 11455 }, { "epoch": 0.4756018453650512, "grad_norm": 2.2308263778686523, "learning_rate": 5.6362966541333785e-06, "loss": 0.6672, "step": 11456 }, { "epoch": 0.4756433608892625, "grad_norm": 2.738713502883911, "learning_rate": 5.635629799841078e-06, "loss": 0.5199, "step": 11457 }, { "epoch": 0.47568487641347384, "grad_norm": 2.953970193862915, "learning_rate": 5.634962934056408e-06, "loss": 0.4802, "step": 11458 }, { "epoch": 0.4757263919376852, "grad_norm": 2.2626514434814453, "learning_rate": 5.634296056791424e-06, "loss": 0.558, "step": 11459 }, { "epoch": 0.4757679074618965, "grad_norm": 2.3679308891296387, "learning_rate": 5.633629168058185e-06, "loss": 0.5598, "step": 11460 }, { "epoch": 0.47580942298610784, "grad_norm": 2.4257266521453857, "learning_rate": 5.632962267868747e-06, "loss": 0.6359, "step": 11461 }, { "epoch": 0.4758509385103192, "grad_norm": 2.2094743251800537, "learning_rate": 5.63229535623517e-06, "loss": 0.5514, "step": 11462 }, { "epoch": 0.4758924540345305, "grad_norm": 2.0874059200286865, "learning_rate": 5.63162843316951e-06, "loss": 0.4091, "step": 11463 }, { "epoch": 0.4759339695587419, "grad_norm": 2.0556883811950684, "learning_rate": 5.6309614986838265e-06, "loss": 0.3906, "step": 11464 }, { "epoch": 0.47597548508295323, "grad_norm": 2.3900763988494873, "learning_rate": 5.6302945527901765e-06, "loss": 0.3956, "step": 11465 }, { "epoch": 0.47601700060716456, "grad_norm": 2.161971092224121, "learning_rate": 5.62962759550062e-06, "loss": 0.5939, "step": 11466 }, { "epoch": 0.4760585161313759, "grad_norm": 2.6034507751464844, "learning_rate": 5.628960626827213e-06, "loss": 0.5285, "step": 11467 }, { "epoch": 0.47610003165558723, "grad_norm": 2.412625551223755, "learning_rate": 5.628293646782019e-06, "loss": 0.6101, "step": 11468 }, { "epoch": 0.47614154717979856, "grad_norm": 2.5699429512023926, "learning_rate": 5.627626655377094e-06, "loss": 0.653, "step": 11469 }, { "epoch": 0.4761830627040099, "grad_norm": 2.2722299098968506, "learning_rate": 5.626959652624498e-06, "loss": 0.3864, "step": 11470 }, { "epoch": 0.47622457822822123, "grad_norm": 2.2139475345611572, "learning_rate": 5.6262926385362926e-06, "loss": 0.6238, "step": 11471 }, { "epoch": 0.47626609375243256, "grad_norm": 2.4526560306549072, "learning_rate": 5.625625613124534e-06, "loss": 0.4255, "step": 11472 }, { "epoch": 0.4763076092766439, "grad_norm": 2.518481969833374, "learning_rate": 5.6249585764012836e-06, "loss": 0.4665, "step": 11473 }, { "epoch": 0.47634912480085523, "grad_norm": 2.6732747554779053, "learning_rate": 5.624291528378603e-06, "loss": 0.4891, "step": 11474 }, { "epoch": 0.47639064032506656, "grad_norm": 2.345027208328247, "learning_rate": 5.623624469068552e-06, "loss": 0.4075, "step": 11475 }, { "epoch": 0.4764321558492779, "grad_norm": 2.3918604850769043, "learning_rate": 5.622957398483188e-06, "loss": 0.4195, "step": 11476 }, { "epoch": 0.4764736713734892, "grad_norm": 2.3148984909057617, "learning_rate": 5.622290316634578e-06, "loss": 0.4268, "step": 11477 }, { "epoch": 0.47651518689770056, "grad_norm": 2.773953437805176, "learning_rate": 5.621623223534778e-06, "loss": 0.6104, "step": 11478 }, { "epoch": 0.4765567024219119, "grad_norm": 2.547173261642456, "learning_rate": 5.620956119195852e-06, "loss": 0.5422, "step": 11479 }, { "epoch": 0.4765982179461232, "grad_norm": 3.1761889457702637, "learning_rate": 5.62028900362986e-06, "loss": 0.4505, "step": 11480 }, { "epoch": 0.47663973347033456, "grad_norm": 2.381415367126465, "learning_rate": 5.619621876848864e-06, "loss": 0.5571, "step": 11481 }, { "epoch": 0.4766812489945459, "grad_norm": 2.1972458362579346, "learning_rate": 5.618954738864925e-06, "loss": 0.4793, "step": 11482 }, { "epoch": 0.4767227645187572, "grad_norm": 3.484178304672241, "learning_rate": 5.618287589690106e-06, "loss": 0.5526, "step": 11483 }, { "epoch": 0.47676428004296856, "grad_norm": 2.3310394287109375, "learning_rate": 5.617620429336471e-06, "loss": 0.5101, "step": 11484 }, { "epoch": 0.4768057955671799, "grad_norm": 3.019943952560425, "learning_rate": 5.616953257816079e-06, "loss": 0.4798, "step": 11485 }, { "epoch": 0.4768473110913912, "grad_norm": 2.2672135829925537, "learning_rate": 5.616286075140996e-06, "loss": 0.5748, "step": 11486 }, { "epoch": 0.47688882661560256, "grad_norm": 2.1452033519744873, "learning_rate": 5.6156188813232806e-06, "loss": 0.4947, "step": 11487 }, { "epoch": 0.4769303421398139, "grad_norm": 2.6064224243164062, "learning_rate": 5.614951676375e-06, "loss": 0.4798, "step": 11488 }, { "epoch": 0.4769718576640252, "grad_norm": 3.1785850524902344, "learning_rate": 5.614284460308215e-06, "loss": 0.822, "step": 11489 }, { "epoch": 0.47701337318823656, "grad_norm": 2.5178921222686768, "learning_rate": 5.61361723313499e-06, "loss": 0.4861, "step": 11490 }, { "epoch": 0.4770548887124479, "grad_norm": 1.7583346366882324, "learning_rate": 5.61294999486739e-06, "loss": 0.383, "step": 11491 }, { "epoch": 0.4770964042366592, "grad_norm": 2.455554246902466, "learning_rate": 5.612282745517475e-06, "loss": 0.5745, "step": 11492 }, { "epoch": 0.47713791976087055, "grad_norm": 2.2428436279296875, "learning_rate": 5.611615485097314e-06, "loss": 0.5604, "step": 11493 }, { "epoch": 0.4771794352850819, "grad_norm": 2.8618319034576416, "learning_rate": 5.610948213618968e-06, "loss": 0.5078, "step": 11494 }, { "epoch": 0.4772209508092933, "grad_norm": 2.7234067916870117, "learning_rate": 5.610280931094501e-06, "loss": 0.7384, "step": 11495 }, { "epoch": 0.4772624663335046, "grad_norm": 2.7084991931915283, "learning_rate": 5.60961363753598e-06, "loss": 0.5043, "step": 11496 }, { "epoch": 0.47730398185771594, "grad_norm": 2.282611131668091, "learning_rate": 5.608946332955468e-06, "loss": 0.4908, "step": 11497 }, { "epoch": 0.4773454973819273, "grad_norm": 3.2915737628936768, "learning_rate": 5.608279017365031e-06, "loss": 0.5484, "step": 11498 }, { "epoch": 0.4773870129061386, "grad_norm": 2.4334022998809814, "learning_rate": 5.607611690776735e-06, "loss": 0.6116, "step": 11499 }, { "epoch": 0.47742852843034994, "grad_norm": 2.392686605453491, "learning_rate": 5.606944353202642e-06, "loss": 0.3869, "step": 11500 }, { "epoch": 0.4774700439545613, "grad_norm": 2.4339001178741455, "learning_rate": 5.606277004654823e-06, "loss": 0.5575, "step": 11501 }, { "epoch": 0.4775115594787726, "grad_norm": 2.261139392852783, "learning_rate": 5.605609645145339e-06, "loss": 0.4752, "step": 11502 }, { "epoch": 0.47755307500298394, "grad_norm": 2.753959894180298, "learning_rate": 5.60494227468626e-06, "loss": 0.4629, "step": 11503 }, { "epoch": 0.4775945905271953, "grad_norm": 2.899134397506714, "learning_rate": 5.604274893289648e-06, "loss": 0.5554, "step": 11504 }, { "epoch": 0.4776361060514066, "grad_norm": 2.4627175331115723, "learning_rate": 5.603607500967574e-06, "loss": 0.527, "step": 11505 }, { "epoch": 0.47767762157561794, "grad_norm": 2.0662271976470947, "learning_rate": 5.6029400977321e-06, "loss": 0.4712, "step": 11506 }, { "epoch": 0.47771913709982927, "grad_norm": 2.246903657913208, "learning_rate": 5.602272683595298e-06, "loss": 0.5176, "step": 11507 }, { "epoch": 0.4777606526240406, "grad_norm": 2.342404842376709, "learning_rate": 5.601605258569231e-06, "loss": 0.4504, "step": 11508 }, { "epoch": 0.47780216814825194, "grad_norm": 2.3252246379852295, "learning_rate": 5.600937822665967e-06, "loss": 0.6007, "step": 11509 }, { "epoch": 0.47784368367246327, "grad_norm": 3.020568609237671, "learning_rate": 5.600270375897575e-06, "loss": 0.5523, "step": 11510 }, { "epoch": 0.4778851991966746, "grad_norm": 2.6212852001190186, "learning_rate": 5.59960291827612e-06, "loss": 0.4716, "step": 11511 }, { "epoch": 0.47792671472088594, "grad_norm": 2.457193374633789, "learning_rate": 5.598935449813674e-06, "loss": 0.518, "step": 11512 }, { "epoch": 0.47796823024509727, "grad_norm": 2.4314920902252197, "learning_rate": 5.5982679705223e-06, "loss": 0.6232, "step": 11513 }, { "epoch": 0.4780097457693086, "grad_norm": 2.2861809730529785, "learning_rate": 5.597600480414069e-06, "loss": 0.3418, "step": 11514 }, { "epoch": 0.47805126129351994, "grad_norm": 2.7081198692321777, "learning_rate": 5.59693297950105e-06, "loss": 0.6302, "step": 11515 }, { "epoch": 0.47809277681773127, "grad_norm": 2.3031911849975586, "learning_rate": 5.59626546779531e-06, "loss": 0.4481, "step": 11516 }, { "epoch": 0.4781342923419426, "grad_norm": 2.781501054763794, "learning_rate": 5.595597945308918e-06, "loss": 0.4221, "step": 11517 }, { "epoch": 0.47817580786615393, "grad_norm": 2.7066760063171387, "learning_rate": 5.594930412053945e-06, "loss": 0.5859, "step": 11518 }, { "epoch": 0.47821732339036527, "grad_norm": 2.8460562229156494, "learning_rate": 5.594262868042457e-06, "loss": 0.4358, "step": 11519 }, { "epoch": 0.4782588389145766, "grad_norm": 2.3664186000823975, "learning_rate": 5.593595313286526e-06, "loss": 0.5638, "step": 11520 }, { "epoch": 0.47830035443878793, "grad_norm": 2.4149670600891113, "learning_rate": 5.59292774779822e-06, "loss": 0.5007, "step": 11521 }, { "epoch": 0.47834186996299927, "grad_norm": 2.8159055709838867, "learning_rate": 5.592260171589611e-06, "loss": 0.4835, "step": 11522 }, { "epoch": 0.4783833854872106, "grad_norm": 2.5208046436309814, "learning_rate": 5.591592584672767e-06, "loss": 0.4979, "step": 11523 }, { "epoch": 0.47842490101142193, "grad_norm": 3.194795846939087, "learning_rate": 5.590924987059757e-06, "loss": 0.4824, "step": 11524 }, { "epoch": 0.4784664165356333, "grad_norm": 2.7692315578460693, "learning_rate": 5.590257378762655e-06, "loss": 0.5629, "step": 11525 }, { "epoch": 0.47850793205984465, "grad_norm": 2.0540812015533447, "learning_rate": 5.589589759793528e-06, "loss": 0.4351, "step": 11526 }, { "epoch": 0.478549447584056, "grad_norm": 2.1191301345825195, "learning_rate": 5.5889221301644485e-06, "loss": 0.4759, "step": 11527 }, { "epoch": 0.4785909631082673, "grad_norm": 2.282029151916504, "learning_rate": 5.588254489887487e-06, "loss": 0.4702, "step": 11528 }, { "epoch": 0.47863247863247865, "grad_norm": 2.0643725395202637, "learning_rate": 5.587586838974714e-06, "loss": 0.4461, "step": 11529 }, { "epoch": 0.47867399415669, "grad_norm": 2.298204183578491, "learning_rate": 5.586919177438203e-06, "loss": 0.4854, "step": 11530 }, { "epoch": 0.4787155096809013, "grad_norm": 2.7511703968048096, "learning_rate": 5.586251505290023e-06, "loss": 0.3955, "step": 11531 }, { "epoch": 0.47875702520511265, "grad_norm": 2.7651801109313965, "learning_rate": 5.585583822542248e-06, "loss": 0.5051, "step": 11532 }, { "epoch": 0.478798540729324, "grad_norm": 2.143061399459839, "learning_rate": 5.584916129206949e-06, "loss": 0.4732, "step": 11533 }, { "epoch": 0.4788400562535353, "grad_norm": 2.2503490447998047, "learning_rate": 5.584248425296196e-06, "loss": 0.5346, "step": 11534 }, { "epoch": 0.47888157177774665, "grad_norm": 2.410454034805298, "learning_rate": 5.583580710822064e-06, "loss": 0.4018, "step": 11535 }, { "epoch": 0.478923087301958, "grad_norm": 2.012256145477295, "learning_rate": 5.5829129857966255e-06, "loss": 0.5196, "step": 11536 }, { "epoch": 0.4789646028261693, "grad_norm": 2.5297391414642334, "learning_rate": 5.58224525023195e-06, "loss": 0.6565, "step": 11537 }, { "epoch": 0.47900611835038065, "grad_norm": 2.5995306968688965, "learning_rate": 5.581577504140114e-06, "loss": 0.6186, "step": 11538 }, { "epoch": 0.479047633874592, "grad_norm": 3.18719482421875, "learning_rate": 5.580909747533189e-06, "loss": 0.5587, "step": 11539 }, { "epoch": 0.4790891493988033, "grad_norm": 2.8465733528137207, "learning_rate": 5.580241980423249e-06, "loss": 0.6263, "step": 11540 }, { "epoch": 0.47913066492301465, "grad_norm": 2.512420415878296, "learning_rate": 5.579574202822366e-06, "loss": 0.535, "step": 11541 }, { "epoch": 0.479172180447226, "grad_norm": 2.9468419551849365, "learning_rate": 5.578906414742614e-06, "loss": 0.5565, "step": 11542 }, { "epoch": 0.4792136959714373, "grad_norm": 2.5187768936157227, "learning_rate": 5.578238616196067e-06, "loss": 0.3924, "step": 11543 }, { "epoch": 0.47925521149564865, "grad_norm": 2.8748347759246826, "learning_rate": 5.5775708071948e-06, "loss": 0.6095, "step": 11544 }, { "epoch": 0.47929672701986, "grad_norm": 2.805661678314209, "learning_rate": 5.576902987750885e-06, "loss": 0.5009, "step": 11545 }, { "epoch": 0.4793382425440713, "grad_norm": 2.3595902919769287, "learning_rate": 5.576235157876399e-06, "loss": 0.5655, "step": 11546 }, { "epoch": 0.47937975806828265, "grad_norm": 2.0835907459259033, "learning_rate": 5.575567317583415e-06, "loss": 0.6348, "step": 11547 }, { "epoch": 0.479421273592494, "grad_norm": 2.859078884124756, "learning_rate": 5.574899466884009e-06, "loss": 0.5224, "step": 11548 }, { "epoch": 0.4794627891167053, "grad_norm": 2.314987897872925, "learning_rate": 5.574231605790253e-06, "loss": 0.5463, "step": 11549 }, { "epoch": 0.47950430464091665, "grad_norm": 2.2119572162628174, "learning_rate": 5.573563734314225e-06, "loss": 0.5631, "step": 11550 }, { "epoch": 0.479545820165128, "grad_norm": 2.4272210597991943, "learning_rate": 5.5728958524679995e-06, "loss": 0.4332, "step": 11551 }, { "epoch": 0.4795873356893393, "grad_norm": 2.4520163536071777, "learning_rate": 5.572227960263651e-06, "loss": 0.4927, "step": 11552 }, { "epoch": 0.47962885121355064, "grad_norm": 2.4379351139068604, "learning_rate": 5.571560057713258e-06, "loss": 0.4949, "step": 11553 }, { "epoch": 0.479670366737762, "grad_norm": 2.6370906829833984, "learning_rate": 5.570892144828892e-06, "loss": 0.5187, "step": 11554 }, { "epoch": 0.4797118822619733, "grad_norm": 2.089531183242798, "learning_rate": 5.570224221622633e-06, "loss": 0.412, "step": 11555 }, { "epoch": 0.4797533977861847, "grad_norm": 2.0724737644195557, "learning_rate": 5.569556288106555e-06, "loss": 0.3929, "step": 11556 }, { "epoch": 0.47979491331039603, "grad_norm": 2.538628339767456, "learning_rate": 5.568888344292736e-06, "loss": 0.4612, "step": 11557 }, { "epoch": 0.47983642883460736, "grad_norm": 2.5984432697296143, "learning_rate": 5.5682203901932505e-06, "loss": 0.7873, "step": 11558 }, { "epoch": 0.4798779443588187, "grad_norm": 2.668452024459839, "learning_rate": 5.567552425820177e-06, "loss": 0.4972, "step": 11559 }, { "epoch": 0.47991945988303003, "grad_norm": 2.8919782638549805, "learning_rate": 5.566884451185592e-06, "loss": 0.6039, "step": 11560 }, { "epoch": 0.47996097540724136, "grad_norm": 2.4724230766296387, "learning_rate": 5.566216466301574e-06, "loss": 0.5082, "step": 11561 }, { "epoch": 0.4800024909314527, "grad_norm": 2.115931272506714, "learning_rate": 5.565548471180199e-06, "loss": 0.5035, "step": 11562 }, { "epoch": 0.48004400645566403, "grad_norm": 2.836721658706665, "learning_rate": 5.564880465833543e-06, "loss": 0.4908, "step": 11563 }, { "epoch": 0.48008552197987536, "grad_norm": 2.5174150466918945, "learning_rate": 5.564212450273686e-06, "loss": 0.4922, "step": 11564 }, { "epoch": 0.4801270375040867, "grad_norm": 2.1834373474121094, "learning_rate": 5.563544424512706e-06, "loss": 0.4901, "step": 11565 }, { "epoch": 0.48016855302829803, "grad_norm": 2.5810353755950928, "learning_rate": 5.562876388562679e-06, "loss": 0.4053, "step": 11566 }, { "epoch": 0.48021006855250936, "grad_norm": 2.151150941848755, "learning_rate": 5.562208342435686e-06, "loss": 0.5741, "step": 11567 }, { "epoch": 0.4802515840767207, "grad_norm": 2.3664093017578125, "learning_rate": 5.561540286143804e-06, "loss": 0.4779, "step": 11568 }, { "epoch": 0.480293099600932, "grad_norm": 2.748795747756958, "learning_rate": 5.56087221969911e-06, "loss": 0.537, "step": 11569 }, { "epoch": 0.48033461512514336, "grad_norm": 2.139164686203003, "learning_rate": 5.560204143113686e-06, "loss": 0.5477, "step": 11570 }, { "epoch": 0.4803761306493547, "grad_norm": 2.679147720336914, "learning_rate": 5.559536056399609e-06, "loss": 0.5324, "step": 11571 }, { "epoch": 0.480417646173566, "grad_norm": 2.7046194076538086, "learning_rate": 5.558867959568961e-06, "loss": 0.4832, "step": 11572 }, { "epoch": 0.48045916169777736, "grad_norm": 2.5210659503936768, "learning_rate": 5.558199852633817e-06, "loss": 0.5873, "step": 11573 }, { "epoch": 0.4805006772219887, "grad_norm": 2.4483566284179688, "learning_rate": 5.55753173560626e-06, "loss": 0.5301, "step": 11574 }, { "epoch": 0.4805421927462, "grad_norm": 2.6088364124298096, "learning_rate": 5.556863608498368e-06, "loss": 0.5865, "step": 11575 }, { "epoch": 0.48058370827041136, "grad_norm": 1.8344002962112427, "learning_rate": 5.556195471322222e-06, "loss": 0.5185, "step": 11576 }, { "epoch": 0.4806252237946227, "grad_norm": 2.319997549057007, "learning_rate": 5.5555273240899e-06, "loss": 0.3874, "step": 11577 }, { "epoch": 0.480666739318834, "grad_norm": 2.084409475326538, "learning_rate": 5.554859166813484e-06, "loss": 0.4978, "step": 11578 }, { "epoch": 0.48070825484304536, "grad_norm": 2.4118082523345947, "learning_rate": 5.5541909995050554e-06, "loss": 0.5297, "step": 11579 }, { "epoch": 0.4807497703672567, "grad_norm": 1.9158416986465454, "learning_rate": 5.553522822176694e-06, "loss": 0.4557, "step": 11580 }, { "epoch": 0.480791285891468, "grad_norm": 2.4489612579345703, "learning_rate": 5.55285463484048e-06, "loss": 0.5784, "step": 11581 }, { "epoch": 0.48083280141567936, "grad_norm": 2.4892578125, "learning_rate": 5.552186437508494e-06, "loss": 0.511, "step": 11582 }, { "epoch": 0.4808743169398907, "grad_norm": 2.9072704315185547, "learning_rate": 5.55151823019282e-06, "loss": 0.5334, "step": 11583 }, { "epoch": 0.480915832464102, "grad_norm": 2.6634299755096436, "learning_rate": 5.550850012905535e-06, "loss": 0.4138, "step": 11584 }, { "epoch": 0.48095734798831336, "grad_norm": 2.021164894104004, "learning_rate": 5.550181785658725e-06, "loss": 0.5367, "step": 11585 }, { "epoch": 0.4809988635125247, "grad_norm": 2.493201732635498, "learning_rate": 5.549513548464469e-06, "loss": 0.7191, "step": 11586 }, { "epoch": 0.4810403790367361, "grad_norm": 2.0836880207061768, "learning_rate": 5.54884530133485e-06, "loss": 0.421, "step": 11587 }, { "epoch": 0.4810818945609474, "grad_norm": 2.2017292976379395, "learning_rate": 5.548177044281949e-06, "loss": 0.52, "step": 11588 }, { "epoch": 0.48112341008515874, "grad_norm": 2.7932872772216797, "learning_rate": 5.547508777317851e-06, "loss": 0.5, "step": 11589 }, { "epoch": 0.4811649256093701, "grad_norm": 1.987372875213623, "learning_rate": 5.546840500454633e-06, "loss": 0.5177, "step": 11590 }, { "epoch": 0.4812064411335814, "grad_norm": 2.5032527446746826, "learning_rate": 5.5461722137043835e-06, "loss": 0.6118, "step": 11591 }, { "epoch": 0.48124795665779274, "grad_norm": 2.744516134262085, "learning_rate": 5.5455039170791826e-06, "loss": 0.6136, "step": 11592 }, { "epoch": 0.4812894721820041, "grad_norm": 2.557554006576538, "learning_rate": 5.544835610591113e-06, "loss": 0.5196, "step": 11593 }, { "epoch": 0.4813309877062154, "grad_norm": 2.4661872386932373, "learning_rate": 5.5441672942522595e-06, "loss": 0.5665, "step": 11594 }, { "epoch": 0.48137250323042674, "grad_norm": 2.3746705055236816, "learning_rate": 5.543498968074704e-06, "loss": 0.5919, "step": 11595 }, { "epoch": 0.4814140187546381, "grad_norm": 2.413776159286499, "learning_rate": 5.542830632070531e-06, "loss": 0.5929, "step": 11596 }, { "epoch": 0.4814555342788494, "grad_norm": 2.3528239727020264, "learning_rate": 5.542162286251824e-06, "loss": 0.6233, "step": 11597 }, { "epoch": 0.48149704980306074, "grad_norm": 2.760035991668701, "learning_rate": 5.541493930630665e-06, "loss": 0.3186, "step": 11598 }, { "epoch": 0.4815385653272721, "grad_norm": 2.2703959941864014, "learning_rate": 5.540825565219141e-06, "loss": 0.7326, "step": 11599 }, { "epoch": 0.4815800808514834, "grad_norm": 2.4737398624420166, "learning_rate": 5.5401571900293334e-06, "loss": 0.5308, "step": 11600 }, { "epoch": 0.48162159637569474, "grad_norm": 2.809415102005005, "learning_rate": 5.539488805073331e-06, "loss": 0.4718, "step": 11601 }, { "epoch": 0.48166311189990607, "grad_norm": 1.9987915754318237, "learning_rate": 5.538820410363214e-06, "loss": 0.4929, "step": 11602 }, { "epoch": 0.4817046274241174, "grad_norm": 2.6179308891296387, "learning_rate": 5.538152005911067e-06, "loss": 0.496, "step": 11603 }, { "epoch": 0.48174614294832874, "grad_norm": 3.0842819213867188, "learning_rate": 5.537483591728978e-06, "loss": 0.6631, "step": 11604 }, { "epoch": 0.48178765847254007, "grad_norm": 2.79524827003479, "learning_rate": 5.536815167829031e-06, "loss": 0.5317, "step": 11605 }, { "epoch": 0.4818291739967514, "grad_norm": 1.8658534288406372, "learning_rate": 5.53614673422331e-06, "loss": 0.5332, "step": 11606 }, { "epoch": 0.48187068952096274, "grad_norm": 3.1283180713653564, "learning_rate": 5.5354782909239025e-06, "loss": 0.6078, "step": 11607 }, { "epoch": 0.48191220504517407, "grad_norm": 2.513603687286377, "learning_rate": 5.5348098379428924e-06, "loss": 0.4375, "step": 11608 }, { "epoch": 0.4819537205693854, "grad_norm": 2.9172964096069336, "learning_rate": 5.534141375292368e-06, "loss": 0.5895, "step": 11609 }, { "epoch": 0.48199523609359674, "grad_norm": 2.777146577835083, "learning_rate": 5.533472902984411e-06, "loss": 0.4996, "step": 11610 }, { "epoch": 0.48203675161780807, "grad_norm": 2.2182552814483643, "learning_rate": 5.532804421031113e-06, "loss": 0.476, "step": 11611 }, { "epoch": 0.4820782671420194, "grad_norm": 2.414323568344116, "learning_rate": 5.532135929444556e-06, "loss": 0.5627, "step": 11612 }, { "epoch": 0.48211978266623073, "grad_norm": 2.7207424640655518, "learning_rate": 5.531467428236827e-06, "loss": 0.4741, "step": 11613 }, { "epoch": 0.48216129819044207, "grad_norm": 2.500683069229126, "learning_rate": 5.530798917420016e-06, "loss": 0.5728, "step": 11614 }, { "epoch": 0.4822028137146534, "grad_norm": 2.5834100246429443, "learning_rate": 5.530130397006207e-06, "loss": 0.5401, "step": 11615 }, { "epoch": 0.48224432923886473, "grad_norm": 2.7273123264312744, "learning_rate": 5.529461867007486e-06, "loss": 0.4373, "step": 11616 }, { "epoch": 0.48228584476307607, "grad_norm": 2.160722494125366, "learning_rate": 5.528793327435942e-06, "loss": 0.5467, "step": 11617 }, { "epoch": 0.48232736028728745, "grad_norm": 2.422377347946167, "learning_rate": 5.528124778303664e-06, "loss": 0.509, "step": 11618 }, { "epoch": 0.4823688758114988, "grad_norm": 2.4365670680999756, "learning_rate": 5.527456219622736e-06, "loss": 0.6686, "step": 11619 }, { "epoch": 0.4824103913357101, "grad_norm": 2.6738784313201904, "learning_rate": 5.526787651405249e-06, "loss": 0.6507, "step": 11620 }, { "epoch": 0.48245190685992145, "grad_norm": 2.4924476146698, "learning_rate": 5.526119073663287e-06, "loss": 0.557, "step": 11621 }, { "epoch": 0.4824934223841328, "grad_norm": 2.836117744445801, "learning_rate": 5.525450486408944e-06, "loss": 0.5096, "step": 11622 }, { "epoch": 0.4825349379083441, "grad_norm": 2.4691054821014404, "learning_rate": 5.524781889654302e-06, "loss": 0.4365, "step": 11623 }, { "epoch": 0.48257645343255545, "grad_norm": 2.174105167388916, "learning_rate": 5.524113283411454e-06, "loss": 0.5786, "step": 11624 }, { "epoch": 0.4826179689567668, "grad_norm": 2.4772167205810547, "learning_rate": 5.5234446676924866e-06, "loss": 0.4872, "step": 11625 }, { "epoch": 0.4826594844809781, "grad_norm": 2.2230727672576904, "learning_rate": 5.522776042509489e-06, "loss": 0.451, "step": 11626 }, { "epoch": 0.48270100000518945, "grad_norm": 3.382664680480957, "learning_rate": 5.5221074078745474e-06, "loss": 0.4888, "step": 11627 }, { "epoch": 0.4827425155294008, "grad_norm": 2.358964204788208, "learning_rate": 5.521438763799756e-06, "loss": 0.3779, "step": 11628 }, { "epoch": 0.4827840310536121, "grad_norm": 2.5561389923095703, "learning_rate": 5.5207701102972e-06, "loss": 0.4164, "step": 11629 }, { "epoch": 0.48282554657782345, "grad_norm": 1.780078649520874, "learning_rate": 5.520101447378969e-06, "loss": 0.4967, "step": 11630 }, { "epoch": 0.4828670621020348, "grad_norm": 2.7927730083465576, "learning_rate": 5.519432775057158e-06, "loss": 0.5889, "step": 11631 }, { "epoch": 0.4829085776262461, "grad_norm": 2.434946060180664, "learning_rate": 5.518764093343849e-06, "loss": 0.5833, "step": 11632 }, { "epoch": 0.48295009315045745, "grad_norm": 2.3731117248535156, "learning_rate": 5.518095402251137e-06, "loss": 0.5062, "step": 11633 }, { "epoch": 0.4829916086746688, "grad_norm": 2.296602249145508, "learning_rate": 5.517426701791111e-06, "loss": 0.4079, "step": 11634 }, { "epoch": 0.4830331241988801, "grad_norm": 2.5135350227355957, "learning_rate": 5.516757991975861e-06, "loss": 0.5422, "step": 11635 }, { "epoch": 0.48307463972309145, "grad_norm": 2.767244815826416, "learning_rate": 5.516089272817476e-06, "loss": 0.3797, "step": 11636 }, { "epoch": 0.4831161552473028, "grad_norm": 2.4204392433166504, "learning_rate": 5.5154205443280505e-06, "loss": 0.4975, "step": 11637 }, { "epoch": 0.4831576707715141, "grad_norm": 2.3897972106933594, "learning_rate": 5.514751806519673e-06, "loss": 0.5372, "step": 11638 }, { "epoch": 0.48319918629572545, "grad_norm": 2.606268882751465, "learning_rate": 5.5140830594044344e-06, "loss": 0.6256, "step": 11639 }, { "epoch": 0.4832407018199368, "grad_norm": 2.67702054977417, "learning_rate": 5.513414302994424e-06, "loss": 0.5043, "step": 11640 }, { "epoch": 0.4832822173441481, "grad_norm": 2.6790249347686768, "learning_rate": 5.512745537301737e-06, "loss": 0.4777, "step": 11641 }, { "epoch": 0.48332373286835945, "grad_norm": 2.618802547454834, "learning_rate": 5.512076762338462e-06, "loss": 0.6273, "step": 11642 }, { "epoch": 0.4833652483925708, "grad_norm": 2.592641592025757, "learning_rate": 5.511407978116692e-06, "loss": 0.5892, "step": 11643 }, { "epoch": 0.4834067639167821, "grad_norm": 2.1478333473205566, "learning_rate": 5.510739184648517e-06, "loss": 0.4746, "step": 11644 }, { "epoch": 0.48344827944099344, "grad_norm": 2.367431402206421, "learning_rate": 5.510070381946031e-06, "loss": 0.5276, "step": 11645 }, { "epoch": 0.4834897949652048, "grad_norm": 2.3286020755767822, "learning_rate": 5.5094015700213254e-06, "loss": 0.5924, "step": 11646 }, { "epoch": 0.4835313104894161, "grad_norm": 2.4919326305389404, "learning_rate": 5.508732748886493e-06, "loss": 0.549, "step": 11647 }, { "epoch": 0.48357282601362744, "grad_norm": 2.220127582550049, "learning_rate": 5.508063918553626e-06, "loss": 0.4205, "step": 11648 }, { "epoch": 0.48361434153783883, "grad_norm": 2.499778985977173, "learning_rate": 5.507395079034816e-06, "loss": 0.5284, "step": 11649 }, { "epoch": 0.48365585706205017, "grad_norm": 2.2009994983673096, "learning_rate": 5.506726230342159e-06, "loss": 0.4458, "step": 11650 }, { "epoch": 0.4836973725862615, "grad_norm": 2.284376859664917, "learning_rate": 5.506057372487744e-06, "loss": 0.3831, "step": 11651 }, { "epoch": 0.48373888811047283, "grad_norm": 2.2100343704223633, "learning_rate": 5.505388505483667e-06, "loss": 0.5757, "step": 11652 }, { "epoch": 0.48378040363468416, "grad_norm": 2.3012852668762207, "learning_rate": 5.504719629342018e-06, "loss": 0.4249, "step": 11653 }, { "epoch": 0.4838219191588955, "grad_norm": 2.3329405784606934, "learning_rate": 5.504050744074895e-06, "loss": 0.5618, "step": 11654 }, { "epoch": 0.48386343468310683, "grad_norm": 2.593093156814575, "learning_rate": 5.503381849694387e-06, "loss": 0.556, "step": 11655 }, { "epoch": 0.48390495020731816, "grad_norm": 1.8544405698776245, "learning_rate": 5.502712946212592e-06, "loss": 0.4523, "step": 11656 }, { "epoch": 0.4839464657315295, "grad_norm": 2.517080783843994, "learning_rate": 5.5020440336416e-06, "loss": 0.5926, "step": 11657 }, { "epoch": 0.48398798125574083, "grad_norm": 2.3408517837524414, "learning_rate": 5.501375111993507e-06, "loss": 0.5539, "step": 11658 }, { "epoch": 0.48402949677995216, "grad_norm": 2.2570550441741943, "learning_rate": 5.500706181280409e-06, "loss": 0.4972, "step": 11659 }, { "epoch": 0.4840710123041635, "grad_norm": 3.150631904602051, "learning_rate": 5.500037241514399e-06, "loss": 0.4771, "step": 11660 }, { "epoch": 0.48411252782837483, "grad_norm": 2.4825565814971924, "learning_rate": 5.499368292707572e-06, "loss": 0.6824, "step": 11661 }, { "epoch": 0.48415404335258616, "grad_norm": 2.4292311668395996, "learning_rate": 5.498699334872021e-06, "loss": 0.5262, "step": 11662 }, { "epoch": 0.4841955588767975, "grad_norm": 2.238152265548706, "learning_rate": 5.498030368019843e-06, "loss": 0.623, "step": 11663 }, { "epoch": 0.4842370744010088, "grad_norm": 2.3951122760772705, "learning_rate": 5.497361392163133e-06, "loss": 0.6533, "step": 11664 }, { "epoch": 0.48427858992522016, "grad_norm": 2.345226526260376, "learning_rate": 5.496692407313985e-06, "loss": 0.35, "step": 11665 }, { "epoch": 0.4843201054494315, "grad_norm": 2.2630999088287354, "learning_rate": 5.496023413484495e-06, "loss": 0.4802, "step": 11666 }, { "epoch": 0.4843616209736428, "grad_norm": 2.2073609828948975, "learning_rate": 5.4953544106867594e-06, "loss": 0.419, "step": 11667 }, { "epoch": 0.48440313649785416, "grad_norm": 2.705406665802002, "learning_rate": 5.494685398932872e-06, "loss": 0.4878, "step": 11668 }, { "epoch": 0.4844446520220655, "grad_norm": 2.3151071071624756, "learning_rate": 5.494016378234931e-06, "loss": 0.6133, "step": 11669 }, { "epoch": 0.4844861675462768, "grad_norm": 3.0798377990722656, "learning_rate": 5.49334734860503e-06, "loss": 0.5319, "step": 11670 }, { "epoch": 0.48452768307048816, "grad_norm": 2.9006359577178955, "learning_rate": 5.4926783100552675e-06, "loss": 0.4616, "step": 11671 }, { "epoch": 0.4845691985946995, "grad_norm": 2.005192279815674, "learning_rate": 5.49200926259774e-06, "loss": 0.4953, "step": 11672 }, { "epoch": 0.4846107141189108, "grad_norm": 2.49275803565979, "learning_rate": 5.491340206244542e-06, "loss": 0.5387, "step": 11673 }, { "epoch": 0.48465222964312216, "grad_norm": 2.7664742469787598, "learning_rate": 5.490671141007773e-06, "loss": 0.5936, "step": 11674 }, { "epoch": 0.4846937451673335, "grad_norm": 2.5494754314422607, "learning_rate": 5.490002066899527e-06, "loss": 0.3426, "step": 11675 }, { "epoch": 0.4847352606915448, "grad_norm": 3.579988718032837, "learning_rate": 5.489332983931903e-06, "loss": 0.4734, "step": 11676 }, { "epoch": 0.48477677621575616, "grad_norm": 2.5857887268066406, "learning_rate": 5.4886638921169985e-06, "loss": 0.6068, "step": 11677 }, { "epoch": 0.4848182917399675, "grad_norm": 2.544001340866089, "learning_rate": 5.487994791466909e-06, "loss": 0.5478, "step": 11678 }, { "epoch": 0.4848598072641788, "grad_norm": 1.867415428161621, "learning_rate": 5.4873256819937325e-06, "loss": 0.3339, "step": 11679 }, { "epoch": 0.4849013227883902, "grad_norm": 3.154097080230713, "learning_rate": 5.486656563709569e-06, "loss": 0.6872, "step": 11680 }, { "epoch": 0.48494283831260154, "grad_norm": 2.213057518005371, "learning_rate": 5.485987436626512e-06, "loss": 0.6352, "step": 11681 }, { "epoch": 0.4849843538368129, "grad_norm": 2.3765389919281006, "learning_rate": 5.485318300756664e-06, "loss": 0.6008, "step": 11682 }, { "epoch": 0.4850258693610242, "grad_norm": 2.4112486839294434, "learning_rate": 5.484649156112121e-06, "loss": 0.4322, "step": 11683 }, { "epoch": 0.48506738488523554, "grad_norm": 2.6594512462615967, "learning_rate": 5.483980002704979e-06, "loss": 0.4719, "step": 11684 }, { "epoch": 0.4851089004094469, "grad_norm": 3.394493579864502, "learning_rate": 5.4833108405473425e-06, "loss": 0.4861, "step": 11685 }, { "epoch": 0.4851504159336582, "grad_norm": 2.654916524887085, "learning_rate": 5.482641669651305e-06, "loss": 0.6455, "step": 11686 }, { "epoch": 0.48519193145786954, "grad_norm": 2.7288153171539307, "learning_rate": 5.481972490028969e-06, "loss": 0.629, "step": 11687 }, { "epoch": 0.4852334469820809, "grad_norm": 2.187342643737793, "learning_rate": 5.481303301692428e-06, "loss": 0.5004, "step": 11688 }, { "epoch": 0.4852749625062922, "grad_norm": 2.527115821838379, "learning_rate": 5.480634104653787e-06, "loss": 0.5531, "step": 11689 }, { "epoch": 0.48531647803050354, "grad_norm": 2.7207374572753906, "learning_rate": 5.479964898925143e-06, "loss": 0.642, "step": 11690 }, { "epoch": 0.4853579935547149, "grad_norm": 2.29272198677063, "learning_rate": 5.479295684518595e-06, "loss": 0.5486, "step": 11691 }, { "epoch": 0.4853995090789262, "grad_norm": 2.3965625762939453, "learning_rate": 5.478626461446241e-06, "loss": 0.4276, "step": 11692 }, { "epoch": 0.48544102460313754, "grad_norm": 2.4192962646484375, "learning_rate": 5.477957229720185e-06, "loss": 0.6106, "step": 11693 }, { "epoch": 0.48548254012734887, "grad_norm": 2.277179718017578, "learning_rate": 5.477287989352524e-06, "loss": 0.7316, "step": 11694 }, { "epoch": 0.4855240556515602, "grad_norm": 2.4027631282806396, "learning_rate": 5.476618740355358e-06, "loss": 0.628, "step": 11695 }, { "epoch": 0.48556557117577154, "grad_norm": 2.1705880165100098, "learning_rate": 5.475949482740788e-06, "loss": 0.4614, "step": 11696 }, { "epoch": 0.48560708669998287, "grad_norm": 2.689845561981201, "learning_rate": 5.475280216520913e-06, "loss": 0.5208, "step": 11697 }, { "epoch": 0.4856486022241942, "grad_norm": 2.2550501823425293, "learning_rate": 5.4746109417078365e-06, "loss": 0.5265, "step": 11698 }, { "epoch": 0.48569011774840554, "grad_norm": 2.3722681999206543, "learning_rate": 5.473941658313656e-06, "loss": 0.6385, "step": 11699 }, { "epoch": 0.48573163327261687, "grad_norm": 2.5018417835235596, "learning_rate": 5.473272366350475e-06, "loss": 0.5117, "step": 11700 }, { "epoch": 0.4857731487968282, "grad_norm": 2.8744215965270996, "learning_rate": 5.472603065830393e-06, "loss": 0.4904, "step": 11701 }, { "epoch": 0.48581466432103954, "grad_norm": 3.0706794261932373, "learning_rate": 5.47193375676551e-06, "loss": 0.5424, "step": 11702 }, { "epoch": 0.48585617984525087, "grad_norm": 2.1248044967651367, "learning_rate": 5.471264439167932e-06, "loss": 0.4466, "step": 11703 }, { "epoch": 0.4858976953694622, "grad_norm": 2.826171636581421, "learning_rate": 5.470595113049755e-06, "loss": 0.6012, "step": 11704 }, { "epoch": 0.48593921089367353, "grad_norm": 3.2499992847442627, "learning_rate": 5.469925778423082e-06, "loss": 0.3865, "step": 11705 }, { "epoch": 0.48598072641788487, "grad_norm": 2.637629508972168, "learning_rate": 5.469256435300016e-06, "loss": 0.4969, "step": 11706 }, { "epoch": 0.4860222419420962, "grad_norm": 2.0932791233062744, "learning_rate": 5.468587083692658e-06, "loss": 0.429, "step": 11707 }, { "epoch": 0.48606375746630753, "grad_norm": 2.1794357299804688, "learning_rate": 5.467917723613111e-06, "loss": 0.4782, "step": 11708 }, { "epoch": 0.48610527299051887, "grad_norm": 2.746446371078491, "learning_rate": 5.467248355073476e-06, "loss": 0.5298, "step": 11709 }, { "epoch": 0.4861467885147302, "grad_norm": 2.6297340393066406, "learning_rate": 5.466578978085856e-06, "loss": 0.5602, "step": 11710 }, { "epoch": 0.4861883040389416, "grad_norm": 2.847628355026245, "learning_rate": 5.465909592662355e-06, "loss": 0.5595, "step": 11711 }, { "epoch": 0.4862298195631529, "grad_norm": 2.9030983448028564, "learning_rate": 5.465240198815073e-06, "loss": 0.5533, "step": 11712 }, { "epoch": 0.48627133508736425, "grad_norm": 2.434631824493408, "learning_rate": 5.464570796556115e-06, "loss": 0.5872, "step": 11713 }, { "epoch": 0.4863128506115756, "grad_norm": 2.505873918533325, "learning_rate": 5.463901385897582e-06, "loss": 0.6381, "step": 11714 }, { "epoch": 0.4863543661357869, "grad_norm": 2.553938865661621, "learning_rate": 5.463231966851579e-06, "loss": 0.5756, "step": 11715 }, { "epoch": 0.48639588165999825, "grad_norm": 2.348013401031494, "learning_rate": 5.4625625394302095e-06, "loss": 0.4543, "step": 11716 }, { "epoch": 0.4864373971842096, "grad_norm": 2.5945310592651367, "learning_rate": 5.461893103645575e-06, "loss": 0.4979, "step": 11717 }, { "epoch": 0.4864789127084209, "grad_norm": 1.668135404586792, "learning_rate": 5.461223659509779e-06, "loss": 0.3225, "step": 11718 }, { "epoch": 0.48652042823263225, "grad_norm": 2.9881386756896973, "learning_rate": 5.4605542070349285e-06, "loss": 0.5912, "step": 11719 }, { "epoch": 0.4865619437568436, "grad_norm": 2.1807029247283936, "learning_rate": 5.4598847462331225e-06, "loss": 0.4442, "step": 11720 }, { "epoch": 0.4866034592810549, "grad_norm": 2.210020065307617, "learning_rate": 5.45921527711647e-06, "loss": 0.5148, "step": 11721 }, { "epoch": 0.48664497480526625, "grad_norm": 2.2548389434814453, "learning_rate": 5.458545799697072e-06, "loss": 0.6549, "step": 11722 }, { "epoch": 0.4866864903294776, "grad_norm": 2.232774019241333, "learning_rate": 5.457876313987034e-06, "loss": 0.5211, "step": 11723 }, { "epoch": 0.4867280058536889, "grad_norm": 2.1653544902801514, "learning_rate": 5.45720681999846e-06, "loss": 0.5865, "step": 11724 }, { "epoch": 0.48676952137790025, "grad_norm": 2.3409578800201416, "learning_rate": 5.4565373177434534e-06, "loss": 0.5164, "step": 11725 }, { "epoch": 0.4868110369021116, "grad_norm": 3.3601815700531006, "learning_rate": 5.455867807234123e-06, "loss": 0.4477, "step": 11726 }, { "epoch": 0.4868525524263229, "grad_norm": 3.188101053237915, "learning_rate": 5.4551982884825695e-06, "loss": 0.6126, "step": 11727 }, { "epoch": 0.48689406795053425, "grad_norm": 2.111746311187744, "learning_rate": 5.454528761500901e-06, "loss": 0.5146, "step": 11728 }, { "epoch": 0.4869355834747456, "grad_norm": 2.4734599590301514, "learning_rate": 5.453859226301222e-06, "loss": 0.6358, "step": 11729 }, { "epoch": 0.4869770989989569, "grad_norm": 2.4713943004608154, "learning_rate": 5.453189682895638e-06, "loss": 0.3877, "step": 11730 }, { "epoch": 0.48701861452316825, "grad_norm": 2.19340181350708, "learning_rate": 5.452520131296251e-06, "loss": 0.3589, "step": 11731 }, { "epoch": 0.4870601300473796, "grad_norm": 2.7264695167541504, "learning_rate": 5.451850571515171e-06, "loss": 0.7086, "step": 11732 }, { "epoch": 0.4871016455715909, "grad_norm": 2.1520445346832275, "learning_rate": 5.451181003564502e-06, "loss": 0.5004, "step": 11733 }, { "epoch": 0.48714316109580225, "grad_norm": 2.30743670463562, "learning_rate": 5.450511427456352e-06, "loss": 0.5976, "step": 11734 }, { "epoch": 0.4871846766200136, "grad_norm": 2.7339749336242676, "learning_rate": 5.449841843202823e-06, "loss": 0.5636, "step": 11735 }, { "epoch": 0.4872261921442249, "grad_norm": 2.593120813369751, "learning_rate": 5.449172250816026e-06, "loss": 0.5336, "step": 11736 }, { "epoch": 0.48726770766843625, "grad_norm": 2.5094380378723145, "learning_rate": 5.448502650308064e-06, "loss": 0.4984, "step": 11737 }, { "epoch": 0.4873092231926476, "grad_norm": 2.611840009689331, "learning_rate": 5.4478330416910455e-06, "loss": 0.5128, "step": 11738 }, { "epoch": 0.4873507387168589, "grad_norm": 2.2381155490875244, "learning_rate": 5.447163424977076e-06, "loss": 0.6511, "step": 11739 }, { "epoch": 0.48739225424107024, "grad_norm": 2.7905893325805664, "learning_rate": 5.446493800178264e-06, "loss": 0.5101, "step": 11740 }, { "epoch": 0.48743376976528163, "grad_norm": 2.941633462905884, "learning_rate": 5.445824167306714e-06, "loss": 0.5918, "step": 11741 }, { "epoch": 0.48747528528949297, "grad_norm": 2.1373159885406494, "learning_rate": 5.4451545263745355e-06, "loss": 0.5013, "step": 11742 }, { "epoch": 0.4875168008137043, "grad_norm": 2.620534896850586, "learning_rate": 5.444484877393835e-06, "loss": 0.3982, "step": 11743 }, { "epoch": 0.48755831633791563, "grad_norm": 1.6969974040985107, "learning_rate": 5.443815220376718e-06, "loss": 0.368, "step": 11744 }, { "epoch": 0.48759983186212696, "grad_norm": 2.799126148223877, "learning_rate": 5.443145555335296e-06, "loss": 0.5474, "step": 11745 }, { "epoch": 0.4876413473863383, "grad_norm": 2.6951904296875, "learning_rate": 5.4424758822816736e-06, "loss": 0.4813, "step": 11746 }, { "epoch": 0.48768286291054963, "grad_norm": 2.327052116394043, "learning_rate": 5.44180620122796e-06, "loss": 0.4614, "step": 11747 }, { "epoch": 0.48772437843476096, "grad_norm": 2.241133213043213, "learning_rate": 5.441136512186262e-06, "loss": 0.483, "step": 11748 }, { "epoch": 0.4877658939589723, "grad_norm": 2.1959636211395264, "learning_rate": 5.44046681516869e-06, "loss": 0.4113, "step": 11749 }, { "epoch": 0.48780740948318363, "grad_norm": 2.531975507736206, "learning_rate": 5.43979711018735e-06, "loss": 0.554, "step": 11750 }, { "epoch": 0.48784892500739496, "grad_norm": 2.098436117172241, "learning_rate": 5.4391273972543525e-06, "loss": 0.4438, "step": 11751 }, { "epoch": 0.4878904405316063, "grad_norm": 2.432250738143921, "learning_rate": 5.438457676381804e-06, "loss": 0.5373, "step": 11752 }, { "epoch": 0.48793195605581763, "grad_norm": 2.0575106143951416, "learning_rate": 5.437787947581816e-06, "loss": 0.4438, "step": 11753 }, { "epoch": 0.48797347158002896, "grad_norm": 2.540268659591675, "learning_rate": 5.437118210866494e-06, "loss": 0.4746, "step": 11754 }, { "epoch": 0.4880149871042403, "grad_norm": 2.737445831298828, "learning_rate": 5.43644846624795e-06, "loss": 0.5845, "step": 11755 }, { "epoch": 0.4880565026284516, "grad_norm": 2.664992094039917, "learning_rate": 5.435778713738292e-06, "loss": 0.3111, "step": 11756 }, { "epoch": 0.48809801815266296, "grad_norm": 2.0623128414154053, "learning_rate": 5.4351089533496286e-06, "loss": 0.4612, "step": 11757 }, { "epoch": 0.4881395336768743, "grad_norm": 2.0109734535217285, "learning_rate": 5.43443918509407e-06, "loss": 0.4309, "step": 11758 }, { "epoch": 0.4881810492010856, "grad_norm": 2.627840757369995, "learning_rate": 5.433769408983726e-06, "loss": 0.3771, "step": 11759 }, { "epoch": 0.48822256472529696, "grad_norm": 2.47216534614563, "learning_rate": 5.433099625030705e-06, "loss": 0.5679, "step": 11760 }, { "epoch": 0.4882640802495083, "grad_norm": 2.14589524269104, "learning_rate": 5.4324298332471185e-06, "loss": 0.4177, "step": 11761 }, { "epoch": 0.4883055957737196, "grad_norm": 2.2749814987182617, "learning_rate": 5.431760033645077e-06, "loss": 0.4101, "step": 11762 }, { "epoch": 0.48834711129793096, "grad_norm": 2.3047194480895996, "learning_rate": 5.431090226236688e-06, "loss": 0.4181, "step": 11763 }, { "epoch": 0.4883886268221423, "grad_norm": 3.0345511436462402, "learning_rate": 5.4304204110340644e-06, "loss": 0.4934, "step": 11764 }, { "epoch": 0.4884301423463536, "grad_norm": 2.459686756134033, "learning_rate": 5.429750588049315e-06, "loss": 0.5516, "step": 11765 }, { "epoch": 0.48847165787056496, "grad_norm": 4.249824523925781, "learning_rate": 5.429080757294552e-06, "loss": 0.5606, "step": 11766 }, { "epoch": 0.4885131733947763, "grad_norm": 2.4343018531799316, "learning_rate": 5.428410918781885e-06, "loss": 0.4577, "step": 11767 }, { "epoch": 0.4885546889189876, "grad_norm": 2.3656680583953857, "learning_rate": 5.427741072523426e-06, "loss": 0.5242, "step": 11768 }, { "epoch": 0.48859620444319896, "grad_norm": 2.5042495727539062, "learning_rate": 5.427071218531283e-06, "loss": 0.4522, "step": 11769 }, { "epoch": 0.4886377199674103, "grad_norm": 2.438710927963257, "learning_rate": 5.426401356817571e-06, "loss": 0.4731, "step": 11770 }, { "epoch": 0.4886792354916216, "grad_norm": 2.4131195545196533, "learning_rate": 5.425731487394399e-06, "loss": 0.6069, "step": 11771 }, { "epoch": 0.488720751015833, "grad_norm": 2.573089361190796, "learning_rate": 5.425061610273878e-06, "loss": 0.4024, "step": 11772 }, { "epoch": 0.48876226654004434, "grad_norm": 2.0487794876098633, "learning_rate": 5.424391725468123e-06, "loss": 0.5521, "step": 11773 }, { "epoch": 0.4888037820642557, "grad_norm": 2.2731211185455322, "learning_rate": 5.423721832989242e-06, "loss": 0.4917, "step": 11774 }, { "epoch": 0.488845297588467, "grad_norm": 2.2378642559051514, "learning_rate": 5.423051932849348e-06, "loss": 0.4488, "step": 11775 }, { "epoch": 0.48888681311267834, "grad_norm": 2.527230978012085, "learning_rate": 5.422382025060552e-06, "loss": 0.5497, "step": 11776 }, { "epoch": 0.4889283286368897, "grad_norm": 2.2943174839019775, "learning_rate": 5.421712109634968e-06, "loss": 0.5458, "step": 11777 }, { "epoch": 0.488969844161101, "grad_norm": 2.889566421508789, "learning_rate": 5.421042186584708e-06, "loss": 0.5016, "step": 11778 }, { "epoch": 0.48901135968531234, "grad_norm": 2.494210720062256, "learning_rate": 5.420372255921884e-06, "loss": 0.4839, "step": 11779 }, { "epoch": 0.4890528752095237, "grad_norm": 2.49489426612854, "learning_rate": 5.419702317658609e-06, "loss": 0.4885, "step": 11780 }, { "epoch": 0.489094390733735, "grad_norm": 2.1083855628967285, "learning_rate": 5.419032371806994e-06, "loss": 0.576, "step": 11781 }, { "epoch": 0.48913590625794634, "grad_norm": 2.292234182357788, "learning_rate": 5.418362418379154e-06, "loss": 0.4995, "step": 11782 }, { "epoch": 0.4891774217821577, "grad_norm": 2.232492685317993, "learning_rate": 5.417692457387199e-06, "loss": 0.4877, "step": 11783 }, { "epoch": 0.489218937306369, "grad_norm": 2.2064921855926514, "learning_rate": 5.417022488843245e-06, "loss": 0.6143, "step": 11784 }, { "epoch": 0.48926045283058034, "grad_norm": 2.5828073024749756, "learning_rate": 5.416352512759404e-06, "loss": 0.4331, "step": 11785 }, { "epoch": 0.48930196835479167, "grad_norm": 2.5322446823120117, "learning_rate": 5.415682529147791e-06, "loss": 0.6023, "step": 11786 }, { "epoch": 0.489343483879003, "grad_norm": 2.6790621280670166, "learning_rate": 5.415012538020516e-06, "loss": 0.5264, "step": 11787 }, { "epoch": 0.48938499940321434, "grad_norm": 2.31465220451355, "learning_rate": 5.414342539389696e-06, "loss": 0.644, "step": 11788 }, { "epoch": 0.48942651492742567, "grad_norm": 2.221203565597534, "learning_rate": 5.413672533267444e-06, "loss": 0.5125, "step": 11789 }, { "epoch": 0.489468030451637, "grad_norm": 3.0901994705200195, "learning_rate": 5.413002519665872e-06, "loss": 0.4356, "step": 11790 }, { "epoch": 0.48950954597584834, "grad_norm": 3.223310708999634, "learning_rate": 5.4123324985970945e-06, "loss": 0.4618, "step": 11791 }, { "epoch": 0.48955106150005967, "grad_norm": 2.4392919540405273, "learning_rate": 5.411662470073229e-06, "loss": 0.4682, "step": 11792 }, { "epoch": 0.489592577024271, "grad_norm": 1.9827154874801636, "learning_rate": 5.410992434106387e-06, "loss": 0.4479, "step": 11793 }, { "epoch": 0.48963409254848234, "grad_norm": 3.0343523025512695, "learning_rate": 5.410322390708682e-06, "loss": 0.5673, "step": 11794 }, { "epoch": 0.48967560807269367, "grad_norm": 2.1214513778686523, "learning_rate": 5.409652339892231e-06, "loss": 0.5006, "step": 11795 }, { "epoch": 0.489717123596905, "grad_norm": 2.1202754974365234, "learning_rate": 5.4089822816691465e-06, "loss": 0.5595, "step": 11796 }, { "epoch": 0.48975863912111633, "grad_norm": 2.2465553283691406, "learning_rate": 5.408312216051545e-06, "loss": 0.6177, "step": 11797 }, { "epoch": 0.48980015464532767, "grad_norm": 2.534099578857422, "learning_rate": 5.407642143051542e-06, "loss": 0.7113, "step": 11798 }, { "epoch": 0.489841670169539, "grad_norm": 2.160722255706787, "learning_rate": 5.4069720626812505e-06, "loss": 0.483, "step": 11799 }, { "epoch": 0.48988318569375033, "grad_norm": 2.2307939529418945, "learning_rate": 5.406301974952787e-06, "loss": 0.5129, "step": 11800 }, { "epoch": 0.48992470121796167, "grad_norm": 1.948508381843567, "learning_rate": 5.405631879878267e-06, "loss": 0.4497, "step": 11801 }, { "epoch": 0.489966216742173, "grad_norm": 2.063127279281616, "learning_rate": 5.404961777469804e-06, "loss": 0.3534, "step": 11802 }, { "epoch": 0.4900077322663844, "grad_norm": 2.1170132160186768, "learning_rate": 5.404291667739518e-06, "loss": 0.4375, "step": 11803 }, { "epoch": 0.4900492477905957, "grad_norm": 2.2982282638549805, "learning_rate": 5.4036215506995195e-06, "loss": 0.6151, "step": 11804 }, { "epoch": 0.49009076331480705, "grad_norm": 2.385472297668457, "learning_rate": 5.402951426361929e-06, "loss": 0.5151, "step": 11805 }, { "epoch": 0.4901322788390184, "grad_norm": 2.3355319499969482, "learning_rate": 5.40228129473886e-06, "loss": 0.5243, "step": 11806 }, { "epoch": 0.4901737943632297, "grad_norm": 2.148550033569336, "learning_rate": 5.4016111558424285e-06, "loss": 0.4679, "step": 11807 }, { "epoch": 0.49021530988744105, "grad_norm": 2.5939724445343018, "learning_rate": 5.4009410096847535e-06, "loss": 0.5688, "step": 11808 }, { "epoch": 0.4902568254116524, "grad_norm": 2.560054302215576, "learning_rate": 5.400270856277947e-06, "loss": 0.5534, "step": 11809 }, { "epoch": 0.4902983409358637, "grad_norm": 2.3577115535736084, "learning_rate": 5.39960069563413e-06, "loss": 0.3398, "step": 11810 }, { "epoch": 0.49033985646007505, "grad_norm": 2.476022481918335, "learning_rate": 5.398930527765416e-06, "loss": 0.4594, "step": 11811 }, { "epoch": 0.4903813719842864, "grad_norm": 3.125513792037964, "learning_rate": 5.398260352683924e-06, "loss": 0.6548, "step": 11812 }, { "epoch": 0.4904228875084977, "grad_norm": 1.9209126234054565, "learning_rate": 5.397590170401768e-06, "loss": 0.3989, "step": 11813 }, { "epoch": 0.49046440303270905, "grad_norm": 2.52980375289917, "learning_rate": 5.3969199809310705e-06, "loss": 0.6105, "step": 11814 }, { "epoch": 0.4905059185569204, "grad_norm": 2.0813651084899902, "learning_rate": 5.396249784283943e-06, "loss": 0.4601, "step": 11815 }, { "epoch": 0.4905474340811317, "grad_norm": 2.7302656173706055, "learning_rate": 5.395579580472506e-06, "loss": 0.4921, "step": 11816 }, { "epoch": 0.49058894960534305, "grad_norm": 2.2710747718811035, "learning_rate": 5.394909369508875e-06, "loss": 0.5854, "step": 11817 }, { "epoch": 0.4906304651295544, "grad_norm": 2.415008068084717, "learning_rate": 5.394239151405171e-06, "loss": 0.4214, "step": 11818 }, { "epoch": 0.4906719806537657, "grad_norm": 2.3711369037628174, "learning_rate": 5.393568926173508e-06, "loss": 0.5099, "step": 11819 }, { "epoch": 0.49071349617797705, "grad_norm": 2.1480724811553955, "learning_rate": 5.392898693826005e-06, "loss": 0.4249, "step": 11820 }, { "epoch": 0.4907550117021884, "grad_norm": 2.528825521469116, "learning_rate": 5.3922284543747815e-06, "loss": 0.6254, "step": 11821 }, { "epoch": 0.4907965272263997, "grad_norm": 2.7675259113311768, "learning_rate": 5.391558207831953e-06, "loss": 0.6352, "step": 11822 }, { "epoch": 0.49083804275061105, "grad_norm": 1.9178459644317627, "learning_rate": 5.39088795420964e-06, "loss": 0.4274, "step": 11823 }, { "epoch": 0.4908795582748224, "grad_norm": 2.159658193588257, "learning_rate": 5.39021769351996e-06, "loss": 0.515, "step": 11824 }, { "epoch": 0.4909210737990337, "grad_norm": 2.770944833755493, "learning_rate": 5.389547425775034e-06, "loss": 0.5517, "step": 11825 }, { "epoch": 0.49096258932324505, "grad_norm": 2.5063862800598145, "learning_rate": 5.388877150986974e-06, "loss": 0.5195, "step": 11826 }, { "epoch": 0.4910041048474564, "grad_norm": 2.6476950645446777, "learning_rate": 5.388206869167907e-06, "loss": 0.5552, "step": 11827 }, { "epoch": 0.4910456203716677, "grad_norm": 2.2062487602233887, "learning_rate": 5.387536580329945e-06, "loss": 0.5268, "step": 11828 }, { "epoch": 0.49108713589587905, "grad_norm": 2.409367084503174, "learning_rate": 5.386866284485212e-06, "loss": 0.5829, "step": 11829 }, { "epoch": 0.4911286514200904, "grad_norm": 2.785644769668579, "learning_rate": 5.386195981645823e-06, "loss": 0.5461, "step": 11830 }, { "epoch": 0.4911701669443017, "grad_norm": 2.4769980907440186, "learning_rate": 5.385525671823901e-06, "loss": 0.4328, "step": 11831 }, { "epoch": 0.49121168246851304, "grad_norm": 2.584120273590088, "learning_rate": 5.3848553550315645e-06, "loss": 0.5532, "step": 11832 }, { "epoch": 0.4912531979927244, "grad_norm": 1.9798402786254883, "learning_rate": 5.38418503128093e-06, "loss": 0.4563, "step": 11833 }, { "epoch": 0.49129471351693577, "grad_norm": 3.361248016357422, "learning_rate": 5.3835147005841204e-06, "loss": 0.499, "step": 11834 }, { "epoch": 0.4913362290411471, "grad_norm": 2.3140082359313965, "learning_rate": 5.3828443629532536e-06, "loss": 0.6183, "step": 11835 }, { "epoch": 0.49137774456535843, "grad_norm": 2.669264554977417, "learning_rate": 5.382174018400451e-06, "loss": 0.6227, "step": 11836 }, { "epoch": 0.49141926008956976, "grad_norm": 2.3293185234069824, "learning_rate": 5.381503666937832e-06, "loss": 0.5169, "step": 11837 }, { "epoch": 0.4914607756137811, "grad_norm": 2.1235549449920654, "learning_rate": 5.3808333085775176e-06, "loss": 0.5545, "step": 11838 }, { "epoch": 0.49150229113799243, "grad_norm": 2.4226417541503906, "learning_rate": 5.380162943331626e-06, "loss": 0.414, "step": 11839 }, { "epoch": 0.49154380666220376, "grad_norm": 2.7375428676605225, "learning_rate": 5.37949257121228e-06, "loss": 0.4127, "step": 11840 }, { "epoch": 0.4915853221864151, "grad_norm": 4.274534225463867, "learning_rate": 5.3788221922315975e-06, "loss": 0.5481, "step": 11841 }, { "epoch": 0.49162683771062643, "grad_norm": 2.476170539855957, "learning_rate": 5.3781518064017015e-06, "loss": 0.4865, "step": 11842 }, { "epoch": 0.49166835323483776, "grad_norm": 2.4728193283081055, "learning_rate": 5.377481413734711e-06, "loss": 0.4957, "step": 11843 }, { "epoch": 0.4917098687590491, "grad_norm": 2.7101376056671143, "learning_rate": 5.376811014242749e-06, "loss": 0.6559, "step": 11844 }, { "epoch": 0.49175138428326043, "grad_norm": 2.7632877826690674, "learning_rate": 5.376140607937935e-06, "loss": 0.5212, "step": 11845 }, { "epoch": 0.49179289980747176, "grad_norm": 2.819478750228882, "learning_rate": 5.375470194832389e-06, "loss": 0.5278, "step": 11846 }, { "epoch": 0.4918344153316831, "grad_norm": 2.0808053016662598, "learning_rate": 5.374799774938236e-06, "loss": 0.5574, "step": 11847 }, { "epoch": 0.4918759308558944, "grad_norm": 2.5347771644592285, "learning_rate": 5.374129348267594e-06, "loss": 0.5027, "step": 11848 }, { "epoch": 0.49191744638010576, "grad_norm": 2.3571972846984863, "learning_rate": 5.373458914832585e-06, "loss": 0.6104, "step": 11849 }, { "epoch": 0.4919589619043171, "grad_norm": 2.1438663005828857, "learning_rate": 5.372788474645331e-06, "loss": 0.5548, "step": 11850 }, { "epoch": 0.4920004774285284, "grad_norm": 2.7656214237213135, "learning_rate": 5.372118027717955e-06, "loss": 0.4822, "step": 11851 }, { "epoch": 0.49204199295273976, "grad_norm": 2.2369396686553955, "learning_rate": 5.371447574062577e-06, "loss": 0.4522, "step": 11852 }, { "epoch": 0.4920835084769511, "grad_norm": 2.818988800048828, "learning_rate": 5.37077711369132e-06, "loss": 0.4295, "step": 11853 }, { "epoch": 0.4921250240011624, "grad_norm": 2.344034194946289, "learning_rate": 5.370106646616306e-06, "loss": 0.5572, "step": 11854 }, { "epoch": 0.49216653952537376, "grad_norm": 2.436199903488159, "learning_rate": 5.369436172849657e-06, "loss": 0.5269, "step": 11855 }, { "epoch": 0.4922080550495851, "grad_norm": 2.6033718585968018, "learning_rate": 5.368765692403495e-06, "loss": 0.6038, "step": 11856 }, { "epoch": 0.4922495705737964, "grad_norm": 2.4301366806030273, "learning_rate": 5.368095205289943e-06, "loss": 0.5042, "step": 11857 }, { "epoch": 0.49229108609800776, "grad_norm": 2.0509376525878906, "learning_rate": 5.367424711521125e-06, "loss": 0.4089, "step": 11858 }, { "epoch": 0.4923326016222191, "grad_norm": 1.9410812854766846, "learning_rate": 5.366754211109162e-06, "loss": 0.532, "step": 11859 }, { "epoch": 0.4923741171464304, "grad_norm": 2.669567823410034, "learning_rate": 5.366083704066177e-06, "loss": 0.4347, "step": 11860 }, { "epoch": 0.49241563267064176, "grad_norm": 2.542809009552002, "learning_rate": 5.3654131904042915e-06, "loss": 0.4183, "step": 11861 }, { "epoch": 0.4924571481948531, "grad_norm": 2.855311393737793, "learning_rate": 5.364742670135632e-06, "loss": 0.5616, "step": 11862 }, { "epoch": 0.4924986637190644, "grad_norm": 2.3603951930999756, "learning_rate": 5.36407214327232e-06, "loss": 0.5811, "step": 11863 }, { "epoch": 0.49254017924327576, "grad_norm": 2.5221362113952637, "learning_rate": 5.3634016098264776e-06, "loss": 0.4899, "step": 11864 }, { "epoch": 0.49258169476748714, "grad_norm": 2.2483248710632324, "learning_rate": 5.36273106981023e-06, "loss": 0.509, "step": 11865 }, { "epoch": 0.4926232102916985, "grad_norm": 2.1424484252929688, "learning_rate": 5.3620605232357e-06, "loss": 0.4882, "step": 11866 }, { "epoch": 0.4926647258159098, "grad_norm": 2.3397762775421143, "learning_rate": 5.361389970115011e-06, "loss": 0.6925, "step": 11867 }, { "epoch": 0.49270624134012114, "grad_norm": 2.1677794456481934, "learning_rate": 5.360719410460288e-06, "loss": 0.5082, "step": 11868 }, { "epoch": 0.4927477568643325, "grad_norm": 3.009796142578125, "learning_rate": 5.360048844283653e-06, "loss": 0.4882, "step": 11869 }, { "epoch": 0.4927892723885438, "grad_norm": 1.8080953359603882, "learning_rate": 5.359378271597232e-06, "loss": 0.4163, "step": 11870 }, { "epoch": 0.49283078791275514, "grad_norm": 2.361557960510254, "learning_rate": 5.358707692413148e-06, "loss": 0.487, "step": 11871 }, { "epoch": 0.4928723034369665, "grad_norm": 2.099663019180298, "learning_rate": 5.3580371067435254e-06, "loss": 0.5789, "step": 11872 }, { "epoch": 0.4929138189611778, "grad_norm": 2.2595572471618652, "learning_rate": 5.35736651460049e-06, "loss": 0.5005, "step": 11873 }, { "epoch": 0.49295533448538914, "grad_norm": 2.699171304702759, "learning_rate": 5.356695915996162e-06, "loss": 0.4893, "step": 11874 }, { "epoch": 0.4929968500096005, "grad_norm": 2.410665273666382, "learning_rate": 5.356025310942672e-06, "loss": 0.5891, "step": 11875 }, { "epoch": 0.4930383655338118, "grad_norm": 2.661375045776367, "learning_rate": 5.35535469945214e-06, "loss": 0.443, "step": 11876 }, { "epoch": 0.49307988105802314, "grad_norm": 1.84292471408844, "learning_rate": 5.354684081536693e-06, "loss": 0.3034, "step": 11877 }, { "epoch": 0.4931213965822345, "grad_norm": 2.150733470916748, "learning_rate": 5.3540134572084556e-06, "loss": 0.4209, "step": 11878 }, { "epoch": 0.4931629121064458, "grad_norm": 2.000272750854492, "learning_rate": 5.353342826479553e-06, "loss": 0.4821, "step": 11879 }, { "epoch": 0.49320442763065714, "grad_norm": 2.770240545272827, "learning_rate": 5.35267218936211e-06, "loss": 0.7233, "step": 11880 }, { "epoch": 0.49324594315486847, "grad_norm": 2.2107553482055664, "learning_rate": 5.3520015458682526e-06, "loss": 0.5302, "step": 11881 }, { "epoch": 0.4932874586790798, "grad_norm": 2.473686695098877, "learning_rate": 5.351330896010105e-06, "loss": 0.5946, "step": 11882 }, { "epoch": 0.49332897420329114, "grad_norm": 2.252681016921997, "learning_rate": 5.350660239799795e-06, "loss": 0.4259, "step": 11883 }, { "epoch": 0.49337048972750247, "grad_norm": 2.6411101818084717, "learning_rate": 5.349989577249447e-06, "loss": 0.6024, "step": 11884 }, { "epoch": 0.4934120052517138, "grad_norm": 2.6341464519500732, "learning_rate": 5.349318908371185e-06, "loss": 0.5267, "step": 11885 }, { "epoch": 0.49345352077592514, "grad_norm": 2.5733985900878906, "learning_rate": 5.348648233177137e-06, "loss": 0.4651, "step": 11886 }, { "epoch": 0.49349503630013647, "grad_norm": 2.144131660461426, "learning_rate": 5.347977551679429e-06, "loss": 0.5484, "step": 11887 }, { "epoch": 0.4935365518243478, "grad_norm": 2.368964433670044, "learning_rate": 5.347306863890186e-06, "loss": 0.4604, "step": 11888 }, { "epoch": 0.49357806734855914, "grad_norm": 2.275545358657837, "learning_rate": 5.346636169821534e-06, "loss": 0.5229, "step": 11889 }, { "epoch": 0.49361958287277047, "grad_norm": 2.521784782409668, "learning_rate": 5.345965469485601e-06, "loss": 0.4224, "step": 11890 }, { "epoch": 0.4936610983969818, "grad_norm": 2.741597890853882, "learning_rate": 5.345294762894511e-06, "loss": 0.5533, "step": 11891 }, { "epoch": 0.49370261392119313, "grad_norm": 2.8463666439056396, "learning_rate": 5.344624050060395e-06, "loss": 0.6235, "step": 11892 }, { "epoch": 0.49374412944540447, "grad_norm": 1.9184870719909668, "learning_rate": 5.343953330995375e-06, "loss": 0.4581, "step": 11893 }, { "epoch": 0.4937856449696158, "grad_norm": 2.3560590744018555, "learning_rate": 5.34328260571158e-06, "loss": 0.6592, "step": 11894 }, { "epoch": 0.49382716049382713, "grad_norm": 2.0012400150299072, "learning_rate": 5.342611874221137e-06, "loss": 0.5576, "step": 11895 }, { "epoch": 0.4938686760180385, "grad_norm": 2.8613626956939697, "learning_rate": 5.3419411365361725e-06, "loss": 0.4681, "step": 11896 }, { "epoch": 0.49391019154224985, "grad_norm": 2.1746411323547363, "learning_rate": 5.341270392668815e-06, "loss": 0.4829, "step": 11897 }, { "epoch": 0.4939517070664612, "grad_norm": 2.3154726028442383, "learning_rate": 5.340599642631188e-06, "loss": 0.5231, "step": 11898 }, { "epoch": 0.4939932225906725, "grad_norm": 3.6867034435272217, "learning_rate": 5.339928886435423e-06, "loss": 0.6129, "step": 11899 }, { "epoch": 0.49403473811488385, "grad_norm": 2.8514175415039062, "learning_rate": 5.339258124093644e-06, "loss": 0.5244, "step": 11900 }, { "epoch": 0.4940762536390952, "grad_norm": 2.712689161300659, "learning_rate": 5.338587355617981e-06, "loss": 0.5278, "step": 11901 }, { "epoch": 0.4941177691633065, "grad_norm": 2.273559331893921, "learning_rate": 5.337916581020561e-06, "loss": 0.465, "step": 11902 }, { "epoch": 0.49415928468751785, "grad_norm": 2.213320016860962, "learning_rate": 5.337245800313513e-06, "loss": 0.4856, "step": 11903 }, { "epoch": 0.4942008002117292, "grad_norm": 2.282691717147827, "learning_rate": 5.336575013508962e-06, "loss": 0.5714, "step": 11904 }, { "epoch": 0.4942423157359405, "grad_norm": 2.1334376335144043, "learning_rate": 5.335904220619039e-06, "loss": 0.552, "step": 11905 }, { "epoch": 0.49428383126015185, "grad_norm": 2.077820062637329, "learning_rate": 5.33523342165587e-06, "loss": 0.3941, "step": 11906 }, { "epoch": 0.4943253467843632, "grad_norm": 2.8458125591278076, "learning_rate": 5.3345626166315855e-06, "loss": 0.5052, "step": 11907 }, { "epoch": 0.4943668623085745, "grad_norm": 2.6889028549194336, "learning_rate": 5.333891805558311e-06, "loss": 0.6925, "step": 11908 }, { "epoch": 0.49440837783278585, "grad_norm": 2.645179033279419, "learning_rate": 5.333220988448178e-06, "loss": 0.4371, "step": 11909 }, { "epoch": 0.4944498933569972, "grad_norm": 2.2326955795288086, "learning_rate": 5.332550165313312e-06, "loss": 0.5035, "step": 11910 }, { "epoch": 0.4944914088812085, "grad_norm": 2.205749988555908, "learning_rate": 5.3318793361658424e-06, "loss": 0.4152, "step": 11911 }, { "epoch": 0.49453292440541985, "grad_norm": 2.4377880096435547, "learning_rate": 5.331208501017901e-06, "loss": 0.3834, "step": 11912 }, { "epoch": 0.4945744399296312, "grad_norm": 2.6215152740478516, "learning_rate": 5.330537659881613e-06, "loss": 0.6234, "step": 11913 }, { "epoch": 0.4946159554538425, "grad_norm": 1.9394404888153076, "learning_rate": 5.32986681276911e-06, "loss": 0.4612, "step": 11914 }, { "epoch": 0.49465747097805385, "grad_norm": 2.2943575382232666, "learning_rate": 5.329195959692518e-06, "loss": 0.6119, "step": 11915 }, { "epoch": 0.4946989865022652, "grad_norm": 2.0806446075439453, "learning_rate": 5.32852510066397e-06, "loss": 0.5531, "step": 11916 }, { "epoch": 0.4947405020264765, "grad_norm": 2.307811737060547, "learning_rate": 5.327854235695592e-06, "loss": 0.5094, "step": 11917 }, { "epoch": 0.49478201755068785, "grad_norm": 2.4479618072509766, "learning_rate": 5.327183364799517e-06, "loss": 0.4921, "step": 11918 }, { "epoch": 0.4948235330748992, "grad_norm": 2.202303886413574, "learning_rate": 5.326512487987871e-06, "loss": 0.417, "step": 11919 }, { "epoch": 0.4948650485991105, "grad_norm": 2.140054702758789, "learning_rate": 5.325841605272786e-06, "loss": 0.4951, "step": 11920 }, { "epoch": 0.49490656412332185, "grad_norm": 2.406471014022827, "learning_rate": 5.325170716666389e-06, "loss": 0.5773, "step": 11921 }, { "epoch": 0.4949480796475332, "grad_norm": 2.284144639968872, "learning_rate": 5.324499822180813e-06, "loss": 0.5416, "step": 11922 }, { "epoch": 0.4949895951717445, "grad_norm": 2.1201794147491455, "learning_rate": 5.323828921828187e-06, "loss": 0.4989, "step": 11923 }, { "epoch": 0.49503111069595584, "grad_norm": 2.295785903930664, "learning_rate": 5.32315801562064e-06, "loss": 0.5515, "step": 11924 }, { "epoch": 0.4950726262201672, "grad_norm": 2.4048633575439453, "learning_rate": 5.322487103570304e-06, "loss": 0.4328, "step": 11925 }, { "epoch": 0.4951141417443785, "grad_norm": 2.4425487518310547, "learning_rate": 5.321816185689307e-06, "loss": 0.5891, "step": 11926 }, { "epoch": 0.4951556572685899, "grad_norm": 2.5707502365112305, "learning_rate": 5.32114526198978e-06, "loss": 0.489, "step": 11927 }, { "epoch": 0.49519717279280123, "grad_norm": 1.8163790702819824, "learning_rate": 5.320474332483856e-06, "loss": 0.419, "step": 11928 }, { "epoch": 0.49523868831701257, "grad_norm": 2.1291351318359375, "learning_rate": 5.319803397183663e-06, "loss": 0.5514, "step": 11929 }, { "epoch": 0.4952802038412239, "grad_norm": 2.3097784519195557, "learning_rate": 5.319132456101333e-06, "loss": 0.4988, "step": 11930 }, { "epoch": 0.49532171936543523, "grad_norm": 2.340679168701172, "learning_rate": 5.3184615092489955e-06, "loss": 0.3846, "step": 11931 }, { "epoch": 0.49536323488964656, "grad_norm": 2.581778049468994, "learning_rate": 5.317790556638781e-06, "loss": 0.5596, "step": 11932 }, { "epoch": 0.4954047504138579, "grad_norm": 2.659787654876709, "learning_rate": 5.317119598282823e-06, "loss": 0.4837, "step": 11933 }, { "epoch": 0.49544626593806923, "grad_norm": 2.602915048599243, "learning_rate": 5.316448634193251e-06, "loss": 0.5114, "step": 11934 }, { "epoch": 0.49548778146228056, "grad_norm": 2.116363286972046, "learning_rate": 5.315777664382197e-06, "loss": 0.4206, "step": 11935 }, { "epoch": 0.4955292969864919, "grad_norm": 2.0325348377227783, "learning_rate": 5.315106688861792e-06, "loss": 0.562, "step": 11936 }, { "epoch": 0.49557081251070323, "grad_norm": 2.1067066192626953, "learning_rate": 5.314435707644166e-06, "loss": 0.577, "step": 11937 }, { "epoch": 0.49561232803491456, "grad_norm": 2.2340004444122314, "learning_rate": 5.313764720741453e-06, "loss": 0.3499, "step": 11938 }, { "epoch": 0.4956538435591259, "grad_norm": 2.335052967071533, "learning_rate": 5.313093728165781e-06, "loss": 0.4581, "step": 11939 }, { "epoch": 0.49569535908333723, "grad_norm": 2.312310218811035, "learning_rate": 5.3124227299292865e-06, "loss": 0.3943, "step": 11940 }, { "epoch": 0.49573687460754856, "grad_norm": 2.085954427719116, "learning_rate": 5.3117517260440975e-06, "loss": 0.4836, "step": 11941 }, { "epoch": 0.4957783901317599, "grad_norm": 2.7495148181915283, "learning_rate": 5.31108071652235e-06, "loss": 0.4532, "step": 11942 }, { "epoch": 0.4958199056559712, "grad_norm": 2.6311540603637695, "learning_rate": 5.31040970137617e-06, "loss": 0.6018, "step": 11943 }, { "epoch": 0.49586142118018256, "grad_norm": 2.5031392574310303, "learning_rate": 5.309738680617696e-06, "loss": 0.5352, "step": 11944 }, { "epoch": 0.4959029367043939, "grad_norm": 2.696664571762085, "learning_rate": 5.309067654259056e-06, "loss": 0.5553, "step": 11945 }, { "epoch": 0.4959444522286052, "grad_norm": 2.1951005458831787, "learning_rate": 5.308396622312384e-06, "loss": 0.5798, "step": 11946 }, { "epoch": 0.49598596775281656, "grad_norm": 2.1823220252990723, "learning_rate": 5.307725584789814e-06, "loss": 0.5564, "step": 11947 }, { "epoch": 0.4960274832770279, "grad_norm": 2.0486512184143066, "learning_rate": 5.307054541703473e-06, "loss": 0.4719, "step": 11948 }, { "epoch": 0.4960689988012392, "grad_norm": 2.4112980365753174, "learning_rate": 5.3063834930655e-06, "loss": 0.4759, "step": 11949 }, { "epoch": 0.49611051432545056, "grad_norm": 2.147404193878174, "learning_rate": 5.305712438888025e-06, "loss": 0.5753, "step": 11950 }, { "epoch": 0.4961520298496619, "grad_norm": 2.529421329498291, "learning_rate": 5.30504137918318e-06, "loss": 0.529, "step": 11951 }, { "epoch": 0.4961935453738732, "grad_norm": 3.038046360015869, "learning_rate": 5.304370313963098e-06, "loss": 0.5259, "step": 11952 }, { "epoch": 0.49623506089808456, "grad_norm": 2.670642137527466, "learning_rate": 5.303699243239916e-06, "loss": 0.6891, "step": 11953 }, { "epoch": 0.4962765764222959, "grad_norm": 2.784633159637451, "learning_rate": 5.303028167025762e-06, "loss": 0.5639, "step": 11954 }, { "epoch": 0.4963180919465072, "grad_norm": 2.228579044342041, "learning_rate": 5.3023570853327725e-06, "loss": 0.4648, "step": 11955 }, { "epoch": 0.49635960747071856, "grad_norm": 2.4252939224243164, "learning_rate": 5.301685998173078e-06, "loss": 0.4403, "step": 11956 }, { "epoch": 0.49640112299492994, "grad_norm": 2.7689735889434814, "learning_rate": 5.3010149055588155e-06, "loss": 0.4411, "step": 11957 }, { "epoch": 0.4964426385191413, "grad_norm": 2.224804639816284, "learning_rate": 5.300343807502116e-06, "loss": 0.4066, "step": 11958 }, { "epoch": 0.4964841540433526, "grad_norm": 2.500075101852417, "learning_rate": 5.299672704015114e-06, "loss": 0.6019, "step": 11959 }, { "epoch": 0.49652566956756394, "grad_norm": 3.9845027923583984, "learning_rate": 5.2990015951099436e-06, "loss": 0.5902, "step": 11960 }, { "epoch": 0.4965671850917753, "grad_norm": 2.5375897884368896, "learning_rate": 5.298330480798738e-06, "loss": 0.4268, "step": 11961 }, { "epoch": 0.4966087006159866, "grad_norm": 2.745666265487671, "learning_rate": 5.297659361093631e-06, "loss": 0.5192, "step": 11962 }, { "epoch": 0.49665021614019794, "grad_norm": 2.291998863220215, "learning_rate": 5.296988236006758e-06, "loss": 0.4511, "step": 11963 }, { "epoch": 0.4966917316644093, "grad_norm": 2.4447500705718994, "learning_rate": 5.2963171055502505e-06, "loss": 0.5269, "step": 11964 }, { "epoch": 0.4967332471886206, "grad_norm": 2.4154694080352783, "learning_rate": 5.2956459697362435e-06, "loss": 0.465, "step": 11965 }, { "epoch": 0.49677476271283194, "grad_norm": 2.4621336460113525, "learning_rate": 5.294974828576874e-06, "loss": 0.5942, "step": 11966 }, { "epoch": 0.4968162782370433, "grad_norm": 2.9251747131347656, "learning_rate": 5.294303682084274e-06, "loss": 0.4567, "step": 11967 }, { "epoch": 0.4968577937612546, "grad_norm": 2.142972230911255, "learning_rate": 5.293632530270579e-06, "loss": 0.5038, "step": 11968 }, { "epoch": 0.49689930928546594, "grad_norm": 2.260312557220459, "learning_rate": 5.292961373147922e-06, "loss": 0.4661, "step": 11969 }, { "epoch": 0.4969408248096773, "grad_norm": 2.941516637802124, "learning_rate": 5.2922902107284415e-06, "loss": 0.6002, "step": 11970 }, { "epoch": 0.4969823403338886, "grad_norm": 2.1772561073303223, "learning_rate": 5.2916190430242675e-06, "loss": 0.3839, "step": 11971 }, { "epoch": 0.49702385585809994, "grad_norm": 2.070239543914795, "learning_rate": 5.290947870047538e-06, "loss": 0.52, "step": 11972 }, { "epoch": 0.49706537138231127, "grad_norm": 2.207921028137207, "learning_rate": 5.290276691810388e-06, "loss": 0.4972, "step": 11973 }, { "epoch": 0.4971068869065226, "grad_norm": 2.162794589996338, "learning_rate": 5.28960550832495e-06, "loss": 0.581, "step": 11974 }, { "epoch": 0.49714840243073394, "grad_norm": 2.058804988861084, "learning_rate": 5.288934319603362e-06, "loss": 0.4953, "step": 11975 }, { "epoch": 0.49718991795494527, "grad_norm": 2.608322858810425, "learning_rate": 5.288263125657757e-06, "loss": 0.4725, "step": 11976 }, { "epoch": 0.4972314334791566, "grad_norm": 2.1905975341796875, "learning_rate": 5.287591926500273e-06, "loss": 0.3628, "step": 11977 }, { "epoch": 0.49727294900336794, "grad_norm": 2.286257743835449, "learning_rate": 5.286920722143043e-06, "loss": 0.5496, "step": 11978 }, { "epoch": 0.49731446452757927, "grad_norm": 2.486767053604126, "learning_rate": 5.286249512598205e-06, "loss": 0.4534, "step": 11979 }, { "epoch": 0.4973559800517906, "grad_norm": 2.508470296859741, "learning_rate": 5.285578297877891e-06, "loss": 0.5286, "step": 11980 }, { "epoch": 0.49739749557600194, "grad_norm": 2.6024224758148193, "learning_rate": 5.2849070779942415e-06, "loss": 0.564, "step": 11981 }, { "epoch": 0.49743901110021327, "grad_norm": 2.538116693496704, "learning_rate": 5.284235852959389e-06, "loss": 0.4959, "step": 11982 }, { "epoch": 0.4974805266244246, "grad_norm": 1.8030189275741577, "learning_rate": 5.28356462278547e-06, "loss": 0.3334, "step": 11983 }, { "epoch": 0.49752204214863593, "grad_norm": 2.7815468311309814, "learning_rate": 5.282893387484621e-06, "loss": 0.5454, "step": 11984 }, { "epoch": 0.49756355767284727, "grad_norm": 2.1034374237060547, "learning_rate": 5.282222147068978e-06, "loss": 0.4622, "step": 11985 }, { "epoch": 0.4976050731970586, "grad_norm": 2.2472140789031982, "learning_rate": 5.281550901550678e-06, "loss": 0.413, "step": 11986 }, { "epoch": 0.49764658872126993, "grad_norm": 2.9786276817321777, "learning_rate": 5.280879650941857e-06, "loss": 0.4791, "step": 11987 }, { "epoch": 0.4976881042454813, "grad_norm": 2.579545259475708, "learning_rate": 5.280208395254649e-06, "loss": 0.5533, "step": 11988 }, { "epoch": 0.49772961976969265, "grad_norm": 2.086411476135254, "learning_rate": 5.279537134501195e-06, "loss": 0.3333, "step": 11989 }, { "epoch": 0.497771135293904, "grad_norm": 2.556666851043701, "learning_rate": 5.278865868693627e-06, "loss": 0.3966, "step": 11990 }, { "epoch": 0.4978126508181153, "grad_norm": 2.4542644023895264, "learning_rate": 5.278194597844083e-06, "loss": 0.4708, "step": 11991 }, { "epoch": 0.49785416634232665, "grad_norm": 2.224458694458008, "learning_rate": 5.277523321964701e-06, "loss": 0.5319, "step": 11992 }, { "epoch": 0.497895681866538, "grad_norm": 2.329146146774292, "learning_rate": 5.276852041067618e-06, "loss": 0.5044, "step": 11993 }, { "epoch": 0.4979371973907493, "grad_norm": 2.7966201305389404, "learning_rate": 5.276180755164972e-06, "loss": 0.567, "step": 11994 }, { "epoch": 0.49797871291496065, "grad_norm": 2.5483362674713135, "learning_rate": 5.275509464268895e-06, "loss": 0.4633, "step": 11995 }, { "epoch": 0.498020228439172, "grad_norm": 2.6594512462615967, "learning_rate": 5.274838168391529e-06, "loss": 0.5637, "step": 11996 }, { "epoch": 0.4980617439633833, "grad_norm": 2.2958173751831055, "learning_rate": 5.274166867545009e-06, "loss": 0.5775, "step": 11997 }, { "epoch": 0.49810325948759465, "grad_norm": 2.472181797027588, "learning_rate": 5.273495561741475e-06, "loss": 0.5726, "step": 11998 }, { "epoch": 0.498144775011806, "grad_norm": 2.764392375946045, "learning_rate": 5.272824250993062e-06, "loss": 0.5954, "step": 11999 }, { "epoch": 0.4981862905360173, "grad_norm": 2.4484047889709473, "learning_rate": 5.272152935311907e-06, "loss": 0.4154, "step": 12000 }, { "epoch": 0.49822780606022865, "grad_norm": 2.3624789714813232, "learning_rate": 5.271481614710149e-06, "loss": 0.3991, "step": 12001 }, { "epoch": 0.49826932158444, "grad_norm": 2.313682794570923, "learning_rate": 5.270810289199924e-06, "loss": 0.5444, "step": 12002 }, { "epoch": 0.4983108371086513, "grad_norm": 3.0143916606903076, "learning_rate": 5.2701389587933715e-06, "loss": 0.5846, "step": 12003 }, { "epoch": 0.49835235263286265, "grad_norm": 2.779844284057617, "learning_rate": 5.2694676235026294e-06, "loss": 0.542, "step": 12004 }, { "epoch": 0.498393868157074, "grad_norm": 2.6798899173736572, "learning_rate": 5.268796283339834e-06, "loss": 0.5732, "step": 12005 }, { "epoch": 0.4984353836812853, "grad_norm": 2.1842103004455566, "learning_rate": 5.268124938317125e-06, "loss": 0.4863, "step": 12006 }, { "epoch": 0.49847689920549665, "grad_norm": 2.353229522705078, "learning_rate": 5.26745358844664e-06, "loss": 0.5094, "step": 12007 }, { "epoch": 0.498518414729708, "grad_norm": 2.2090063095092773, "learning_rate": 5.266782233740516e-06, "loss": 0.4851, "step": 12008 }, { "epoch": 0.4985599302539193, "grad_norm": 2.248774528503418, "learning_rate": 5.266110874210893e-06, "loss": 0.4442, "step": 12009 }, { "epoch": 0.49860144577813065, "grad_norm": 2.4079883098602295, "learning_rate": 5.265439509869909e-06, "loss": 0.4991, "step": 12010 }, { "epoch": 0.498642961302342, "grad_norm": 1.9825615882873535, "learning_rate": 5.264768140729703e-06, "loss": 0.3998, "step": 12011 }, { "epoch": 0.4986844768265533, "grad_norm": 2.173617124557495, "learning_rate": 5.264096766802411e-06, "loss": 0.6825, "step": 12012 }, { "epoch": 0.49872599235076465, "grad_norm": 2.7347209453582764, "learning_rate": 5.263425388100175e-06, "loss": 0.397, "step": 12013 }, { "epoch": 0.498767507874976, "grad_norm": 2.191009521484375, "learning_rate": 5.26275400463513e-06, "loss": 0.622, "step": 12014 }, { "epoch": 0.4988090233991873, "grad_norm": 2.349566698074341, "learning_rate": 5.262082616419418e-06, "loss": 0.4896, "step": 12015 }, { "epoch": 0.49885053892339865, "grad_norm": 2.041135787963867, "learning_rate": 5.261411223465177e-06, "loss": 0.3967, "step": 12016 }, { "epoch": 0.49889205444761, "grad_norm": 3.138542652130127, "learning_rate": 5.260739825784546e-06, "loss": 0.529, "step": 12017 }, { "epoch": 0.4989335699718213, "grad_norm": 2.2856855392456055, "learning_rate": 5.2600684233896624e-06, "loss": 0.468, "step": 12018 }, { "epoch": 0.4989750854960327, "grad_norm": 2.5238635540008545, "learning_rate": 5.259397016292667e-06, "loss": 0.4615, "step": 12019 }, { "epoch": 0.49901660102024403, "grad_norm": 3.0916831493377686, "learning_rate": 5.2587256045057e-06, "loss": 0.5989, "step": 12020 }, { "epoch": 0.49905811654445537, "grad_norm": 2.3342325687408447, "learning_rate": 5.258054188040897e-06, "loss": 0.5312, "step": 12021 }, { "epoch": 0.4990996320686667, "grad_norm": 2.0642528533935547, "learning_rate": 5.257382766910403e-06, "loss": 0.5475, "step": 12022 }, { "epoch": 0.49914114759287803, "grad_norm": 2.1172831058502197, "learning_rate": 5.256711341126352e-06, "loss": 0.4449, "step": 12023 }, { "epoch": 0.49918266311708936, "grad_norm": 2.3797478675842285, "learning_rate": 5.256039910700888e-06, "loss": 0.4062, "step": 12024 }, { "epoch": 0.4992241786413007, "grad_norm": 2.1054186820983887, "learning_rate": 5.255368475646148e-06, "loss": 0.5505, "step": 12025 }, { "epoch": 0.49926569416551203, "grad_norm": 1.9140180349349976, "learning_rate": 5.254697035974272e-06, "loss": 0.4175, "step": 12026 }, { "epoch": 0.49930720968972336, "grad_norm": 2.3319027423858643, "learning_rate": 5.2540255916974005e-06, "loss": 0.5236, "step": 12027 }, { "epoch": 0.4993487252139347, "grad_norm": 2.6251795291900635, "learning_rate": 5.253354142827673e-06, "loss": 0.413, "step": 12028 }, { "epoch": 0.49939024073814603, "grad_norm": 2.97257399559021, "learning_rate": 5.2526826893772286e-06, "loss": 0.4641, "step": 12029 }, { "epoch": 0.49943175626235736, "grad_norm": 2.2378687858581543, "learning_rate": 5.25201123135821e-06, "loss": 0.5209, "step": 12030 }, { "epoch": 0.4994732717865687, "grad_norm": 2.3946750164031982, "learning_rate": 5.251339768782754e-06, "loss": 0.4767, "step": 12031 }, { "epoch": 0.49951478731078003, "grad_norm": 2.2472126483917236, "learning_rate": 5.250668301663002e-06, "loss": 0.499, "step": 12032 }, { "epoch": 0.49955630283499136, "grad_norm": 2.351405143737793, "learning_rate": 5.249996830011097e-06, "loss": 0.5802, "step": 12033 }, { "epoch": 0.4995978183592027, "grad_norm": 2.245511770248413, "learning_rate": 5.2493253538391765e-06, "loss": 0.5277, "step": 12034 }, { "epoch": 0.499639333883414, "grad_norm": 2.54144024848938, "learning_rate": 5.248653873159382e-06, "loss": 0.4585, "step": 12035 }, { "epoch": 0.49968084940762536, "grad_norm": 2.349316120147705, "learning_rate": 5.247982387983852e-06, "loss": 0.4177, "step": 12036 }, { "epoch": 0.4997223649318367, "grad_norm": 2.8052291870117188, "learning_rate": 5.247310898324731e-06, "loss": 0.5205, "step": 12037 }, { "epoch": 0.499763880456048, "grad_norm": 2.6291697025299072, "learning_rate": 5.246639404194158e-06, "loss": 0.5492, "step": 12038 }, { "epoch": 0.49980539598025936, "grad_norm": 2.546440362930298, "learning_rate": 5.2459679056042735e-06, "loss": 0.3875, "step": 12039 }, { "epoch": 0.4998469115044707, "grad_norm": 2.2912964820861816, "learning_rate": 5.245296402567216e-06, "loss": 0.566, "step": 12040 }, { "epoch": 0.499888427028682, "grad_norm": 2.305546522140503, "learning_rate": 5.244624895095132e-06, "loss": 0.3572, "step": 12041 }, { "epoch": 0.49992994255289336, "grad_norm": 2.853532552719116, "learning_rate": 5.2439533832001565e-06, "loss": 0.4927, "step": 12042 }, { "epoch": 0.4999714580771047, "grad_norm": 2.401963472366333, "learning_rate": 5.243281866894435e-06, "loss": 0.4252, "step": 12043 }, { "epoch": 0.500012973601316, "grad_norm": 2.369805335998535, "learning_rate": 5.242610346190107e-06, "loss": 0.5416, "step": 12044 }, { "epoch": 0.5000544891255274, "grad_norm": 2.634974479675293, "learning_rate": 5.241938821099313e-06, "loss": 0.5176, "step": 12045 }, { "epoch": 0.5000960046497387, "grad_norm": 2.398369312286377, "learning_rate": 5.241267291634197e-06, "loss": 0.4464, "step": 12046 }, { "epoch": 0.5001375201739501, "grad_norm": 2.6383039951324463, "learning_rate": 5.240595757806896e-06, "loss": 0.5762, "step": 12047 }, { "epoch": 0.5001790356981614, "grad_norm": 2.956937074661255, "learning_rate": 5.239924219629558e-06, "loss": 0.5301, "step": 12048 }, { "epoch": 0.5002205512223727, "grad_norm": 2.299374580383301, "learning_rate": 5.239252677114319e-06, "loss": 0.5047, "step": 12049 }, { "epoch": 0.500262066746584, "grad_norm": 2.4145898818969727, "learning_rate": 5.238581130273323e-06, "loss": 0.5367, "step": 12050 }, { "epoch": 0.5003035822707954, "grad_norm": 2.917590856552124, "learning_rate": 5.237909579118713e-06, "loss": 0.6407, "step": 12051 }, { "epoch": 0.5003450977950067, "grad_norm": 2.799906015396118, "learning_rate": 5.237238023662627e-06, "loss": 0.4963, "step": 12052 }, { "epoch": 0.5003866133192181, "grad_norm": 2.698150396347046, "learning_rate": 5.23656646391721e-06, "loss": 0.3736, "step": 12053 }, { "epoch": 0.5004281288434294, "grad_norm": 2.2360620498657227, "learning_rate": 5.2358948998946035e-06, "loss": 0.44, "step": 12054 }, { "epoch": 0.5004696443676407, "grad_norm": 2.723381996154785, "learning_rate": 5.235223331606948e-06, "loss": 0.4584, "step": 12055 }, { "epoch": 0.500511159891852, "grad_norm": 2.230272054672241, "learning_rate": 5.234551759066389e-06, "loss": 0.459, "step": 12056 }, { "epoch": 0.5005526754160634, "grad_norm": 2.1394355297088623, "learning_rate": 5.233880182285064e-06, "loss": 0.4686, "step": 12057 }, { "epoch": 0.5005941909402747, "grad_norm": 2.691470146179199, "learning_rate": 5.233208601275118e-06, "loss": 0.5984, "step": 12058 }, { "epoch": 0.5006357064644861, "grad_norm": 2.1124393939971924, "learning_rate": 5.232537016048696e-06, "loss": 0.4689, "step": 12059 }, { "epoch": 0.5006772219886974, "grad_norm": 2.095194101333618, "learning_rate": 5.231865426617935e-06, "loss": 0.3834, "step": 12060 }, { "epoch": 0.5007187375129087, "grad_norm": 2.1635472774505615, "learning_rate": 5.231193832994982e-06, "loss": 0.4277, "step": 12061 }, { "epoch": 0.50076025303712, "grad_norm": 2.2360546588897705, "learning_rate": 5.230522235191976e-06, "loss": 0.5483, "step": 12062 }, { "epoch": 0.5008017685613314, "grad_norm": 2.2506957054138184, "learning_rate": 5.229850633221063e-06, "loss": 0.4037, "step": 12063 }, { "epoch": 0.5008432840855427, "grad_norm": 2.1805782318115234, "learning_rate": 5.229179027094384e-06, "loss": 0.577, "step": 12064 }, { "epoch": 0.5008847996097541, "grad_norm": 2.268449306488037, "learning_rate": 5.228507416824083e-06, "loss": 0.3273, "step": 12065 }, { "epoch": 0.5009263151339655, "grad_norm": 2.380201816558838, "learning_rate": 5.227835802422301e-06, "loss": 0.6574, "step": 12066 }, { "epoch": 0.5009678306581767, "grad_norm": 2.225304365158081, "learning_rate": 5.227164183901184e-06, "loss": 0.4259, "step": 12067 }, { "epoch": 0.5010093461823881, "grad_norm": 2.110340118408203, "learning_rate": 5.22649256127287e-06, "loss": 0.5035, "step": 12068 }, { "epoch": 0.5010508617065994, "grad_norm": 2.436957836151123, "learning_rate": 5.225820934549507e-06, "loss": 0.4086, "step": 12069 }, { "epoch": 0.5010923772308108, "grad_norm": 3.0553159713745117, "learning_rate": 5.225149303743236e-06, "loss": 0.5302, "step": 12070 }, { "epoch": 0.5011338927550221, "grad_norm": 2.9716269969940186, "learning_rate": 5.224477668866199e-06, "loss": 0.4034, "step": 12071 }, { "epoch": 0.5011754082792335, "grad_norm": 2.2053327560424805, "learning_rate": 5.223806029930542e-06, "loss": 0.4636, "step": 12072 }, { "epoch": 0.5012169238034447, "grad_norm": 2.3071417808532715, "learning_rate": 5.223134386948408e-06, "loss": 0.6192, "step": 12073 }, { "epoch": 0.5012584393276561, "grad_norm": 2.153411626815796, "learning_rate": 5.222462739931941e-06, "loss": 0.4634, "step": 12074 }, { "epoch": 0.5012999548518674, "grad_norm": 2.5275321006774902, "learning_rate": 5.221791088893282e-06, "loss": 0.602, "step": 12075 }, { "epoch": 0.5013414703760788, "grad_norm": 2.4690792560577393, "learning_rate": 5.221119433844578e-06, "loss": 0.7264, "step": 12076 }, { "epoch": 0.5013829859002901, "grad_norm": 2.32791805267334, "learning_rate": 5.22044777479797e-06, "loss": 0.4559, "step": 12077 }, { "epoch": 0.5014245014245015, "grad_norm": 2.6794261932373047, "learning_rate": 5.219776111765603e-06, "loss": 0.6719, "step": 12078 }, { "epoch": 0.5014660169487127, "grad_norm": 2.617774248123169, "learning_rate": 5.219104444759619e-06, "loss": 0.5835, "step": 12079 }, { "epoch": 0.5015075324729241, "grad_norm": 2.3757543563842773, "learning_rate": 5.218432773792166e-06, "loss": 0.5616, "step": 12080 }, { "epoch": 0.5015490479971354, "grad_norm": 2.369338274002075, "learning_rate": 5.217761098875383e-06, "loss": 0.4768, "step": 12081 }, { "epoch": 0.5015905635213468, "grad_norm": 2.3857421875, "learning_rate": 5.217089420021417e-06, "loss": 0.5482, "step": 12082 }, { "epoch": 0.5016320790455581, "grad_norm": 2.7293686866760254, "learning_rate": 5.216417737242413e-06, "loss": 0.4298, "step": 12083 }, { "epoch": 0.5016735945697695, "grad_norm": 2.308748960494995, "learning_rate": 5.215746050550513e-06, "loss": 0.4958, "step": 12084 }, { "epoch": 0.5017151100939807, "grad_norm": 2.562091112136841, "learning_rate": 5.215074359957862e-06, "loss": 0.5415, "step": 12085 }, { "epoch": 0.5017566256181921, "grad_norm": 2.1052017211914062, "learning_rate": 5.2144026654766045e-06, "loss": 0.4623, "step": 12086 }, { "epoch": 0.5017981411424034, "grad_norm": 2.1102962493896484, "learning_rate": 5.213730967118886e-06, "loss": 0.5459, "step": 12087 }, { "epoch": 0.5018396566666148, "grad_norm": 2.3306825160980225, "learning_rate": 5.213059264896849e-06, "loss": 0.4766, "step": 12088 }, { "epoch": 0.5018811721908261, "grad_norm": 2.7493414878845215, "learning_rate": 5.21238755882264e-06, "loss": 0.4548, "step": 12089 }, { "epoch": 0.5019226877150375, "grad_norm": 2.2867839336395264, "learning_rate": 5.211715848908403e-06, "loss": 0.3472, "step": 12090 }, { "epoch": 0.5019642032392487, "grad_norm": 2.673941135406494, "learning_rate": 5.211044135166282e-06, "loss": 0.5417, "step": 12091 }, { "epoch": 0.5020057187634601, "grad_norm": 2.4019572734832764, "learning_rate": 5.2103724176084216e-06, "loss": 0.4103, "step": 12092 }, { "epoch": 0.5020472342876714, "grad_norm": 2.6567039489746094, "learning_rate": 5.209700696246968e-06, "loss": 0.63, "step": 12093 }, { "epoch": 0.5020887498118828, "grad_norm": 2.3990790843963623, "learning_rate": 5.2090289710940645e-06, "loss": 0.5162, "step": 12094 }, { "epoch": 0.5021302653360941, "grad_norm": 2.231360912322998, "learning_rate": 5.2083572421618575e-06, "loss": 0.5064, "step": 12095 }, { "epoch": 0.5021717808603055, "grad_norm": 3.0221056938171387, "learning_rate": 5.207685509462491e-06, "loss": 0.5241, "step": 12096 }, { "epoch": 0.5022132963845168, "grad_norm": 2.1150779724121094, "learning_rate": 5.207013773008111e-06, "loss": 0.5097, "step": 12097 }, { "epoch": 0.5022548119087281, "grad_norm": 2.7636685371398926, "learning_rate": 5.2063420328108626e-06, "loss": 0.6244, "step": 12098 }, { "epoch": 0.5022963274329395, "grad_norm": 2.5281717777252197, "learning_rate": 5.205670288882889e-06, "loss": 0.4954, "step": 12099 }, { "epoch": 0.5023378429571508, "grad_norm": 2.5738906860351562, "learning_rate": 5.20499854123634e-06, "loss": 0.5984, "step": 12100 }, { "epoch": 0.5023793584813622, "grad_norm": 2.542081594467163, "learning_rate": 5.204326789883357e-06, "loss": 0.51, "step": 12101 }, { "epoch": 0.5024208740055734, "grad_norm": 2.223639726638794, "learning_rate": 5.203655034836087e-06, "loss": 0.4642, "step": 12102 }, { "epoch": 0.5024623895297848, "grad_norm": 2.588737726211548, "learning_rate": 5.202983276106676e-06, "loss": 0.5921, "step": 12103 }, { "epoch": 0.5025039050539961, "grad_norm": 2.1734840869903564, "learning_rate": 5.202311513707267e-06, "loss": 0.5934, "step": 12104 }, { "epoch": 0.5025454205782075, "grad_norm": 2.529029130935669, "learning_rate": 5.2016397476500094e-06, "loss": 0.4882, "step": 12105 }, { "epoch": 0.5025869361024188, "grad_norm": 2.32220196723938, "learning_rate": 5.200967977947046e-06, "loss": 0.4266, "step": 12106 }, { "epoch": 0.5026284516266302, "grad_norm": 2.1195082664489746, "learning_rate": 5.200296204610523e-06, "loss": 0.4816, "step": 12107 }, { "epoch": 0.5026699671508414, "grad_norm": 3.3197133541107178, "learning_rate": 5.199624427652589e-06, "loss": 0.5747, "step": 12108 }, { "epoch": 0.5027114826750528, "grad_norm": 2.5999600887298584, "learning_rate": 5.1989526470853856e-06, "loss": 0.5652, "step": 12109 }, { "epoch": 0.5027529981992641, "grad_norm": 2.223501205444336, "learning_rate": 5.198280862921062e-06, "loss": 0.6504, "step": 12110 }, { "epoch": 0.5027945137234755, "grad_norm": 2.63777756690979, "learning_rate": 5.197609075171763e-06, "loss": 0.5709, "step": 12111 }, { "epoch": 0.5028360292476868, "grad_norm": 2.5693447589874268, "learning_rate": 5.196937283849634e-06, "loss": 0.3944, "step": 12112 }, { "epoch": 0.5028775447718982, "grad_norm": 2.1327104568481445, "learning_rate": 5.1962654889668244e-06, "loss": 0.445, "step": 12113 }, { "epoch": 0.5029190602961094, "grad_norm": 2.788029432296753, "learning_rate": 5.195593690535478e-06, "loss": 0.4743, "step": 12114 }, { "epoch": 0.5029605758203208, "grad_norm": 2.4867711067199707, "learning_rate": 5.194921888567739e-06, "loss": 0.6348, "step": 12115 }, { "epoch": 0.5030020913445321, "grad_norm": 2.201002836227417, "learning_rate": 5.1942500830757585e-06, "loss": 0.5265, "step": 12116 }, { "epoch": 0.5030436068687435, "grad_norm": 2.3755667209625244, "learning_rate": 5.19357827407168e-06, "loss": 0.464, "step": 12117 }, { "epoch": 0.5030851223929548, "grad_norm": 2.7156009674072266, "learning_rate": 5.192906461567648e-06, "loss": 0.6001, "step": 12118 }, { "epoch": 0.5031266379171662, "grad_norm": 2.822932243347168, "learning_rate": 5.192234645575814e-06, "loss": 0.5433, "step": 12119 }, { "epoch": 0.5031681534413774, "grad_norm": 2.5561957359313965, "learning_rate": 5.191562826108322e-06, "loss": 0.6582, "step": 12120 }, { "epoch": 0.5032096689655888, "grad_norm": 2.264664888381958, "learning_rate": 5.190891003177319e-06, "loss": 0.3385, "step": 12121 }, { "epoch": 0.5032511844898001, "grad_norm": 2.3868489265441895, "learning_rate": 5.1902191767949506e-06, "loss": 0.597, "step": 12122 }, { "epoch": 0.5032927000140115, "grad_norm": 2.5558204650878906, "learning_rate": 5.189547346973366e-06, "loss": 0.4986, "step": 12123 }, { "epoch": 0.5033342155382228, "grad_norm": 2.531172752380371, "learning_rate": 5.1888755137247095e-06, "loss": 0.4225, "step": 12124 }, { "epoch": 0.5033757310624342, "grad_norm": 2.688720941543579, "learning_rate": 5.188203677061129e-06, "loss": 0.6031, "step": 12125 }, { "epoch": 0.5034172465866454, "grad_norm": 2.581294298171997, "learning_rate": 5.187531836994774e-06, "loss": 0.5017, "step": 12126 }, { "epoch": 0.5034587621108568, "grad_norm": 2.4011147022247314, "learning_rate": 5.1868599935377874e-06, "loss": 0.4756, "step": 12127 }, { "epoch": 0.5035002776350682, "grad_norm": 2.2166359424591064, "learning_rate": 5.186188146702318e-06, "loss": 0.579, "step": 12128 }, { "epoch": 0.5035417931592795, "grad_norm": 2.610785484313965, "learning_rate": 5.185516296500515e-06, "loss": 0.6893, "step": 12129 }, { "epoch": 0.5035833086834909, "grad_norm": 2.4614436626434326, "learning_rate": 5.184844442944523e-06, "loss": 0.4072, "step": 12130 }, { "epoch": 0.5036248242077022, "grad_norm": 2.1979074478149414, "learning_rate": 5.18417258604649e-06, "loss": 0.3834, "step": 12131 }, { "epoch": 0.5036663397319135, "grad_norm": 2.2923550605773926, "learning_rate": 5.183500725818565e-06, "loss": 0.4846, "step": 12132 }, { "epoch": 0.5037078552561248, "grad_norm": 1.996890902519226, "learning_rate": 5.182828862272891e-06, "loss": 0.4063, "step": 12133 }, { "epoch": 0.5037493707803362, "grad_norm": 2.7121565341949463, "learning_rate": 5.182156995421621e-06, "loss": 0.5758, "step": 12134 }, { "epoch": 0.5037908863045475, "grad_norm": 2.3058483600616455, "learning_rate": 5.181485125276898e-06, "loss": 0.397, "step": 12135 }, { "epoch": 0.5038324018287589, "grad_norm": 2.1916356086730957, "learning_rate": 5.180813251850874e-06, "loss": 0.3986, "step": 12136 }, { "epoch": 0.5038739173529702, "grad_norm": 2.279456377029419, "learning_rate": 5.180141375155692e-06, "loss": 0.5812, "step": 12137 }, { "epoch": 0.5039154328771815, "grad_norm": 2.3515658378601074, "learning_rate": 5.1794694952035015e-06, "loss": 0.5981, "step": 12138 }, { "epoch": 0.5039569484013928, "grad_norm": 1.8991578817367554, "learning_rate": 5.178797612006453e-06, "loss": 0.411, "step": 12139 }, { "epoch": 0.5039984639256042, "grad_norm": 2.598095655441284, "learning_rate": 5.178125725576693e-06, "loss": 0.5575, "step": 12140 }, { "epoch": 0.5040399794498155, "grad_norm": 2.4119646549224854, "learning_rate": 5.177453835926366e-06, "loss": 0.6425, "step": 12141 }, { "epoch": 0.5040814949740269, "grad_norm": 2.3518059253692627, "learning_rate": 5.176781943067623e-06, "loss": 0.5331, "step": 12142 }, { "epoch": 0.5041230104982382, "grad_norm": 2.3498356342315674, "learning_rate": 5.176110047012613e-06, "loss": 0.6601, "step": 12143 }, { "epoch": 0.5041645260224495, "grad_norm": 2.317474842071533, "learning_rate": 5.175438147773482e-06, "loss": 0.4962, "step": 12144 }, { "epoch": 0.5042060415466608, "grad_norm": 2.8508455753326416, "learning_rate": 5.174766245362378e-06, "loss": 0.5515, "step": 12145 }, { "epoch": 0.5042475570708722, "grad_norm": 2.718322277069092, "learning_rate": 5.17409433979145e-06, "loss": 0.4224, "step": 12146 }, { "epoch": 0.5042890725950835, "grad_norm": 2.170112133026123, "learning_rate": 5.173422431072847e-06, "loss": 0.4956, "step": 12147 }, { "epoch": 0.5043305881192949, "grad_norm": 2.260136127471924, "learning_rate": 5.172750519218715e-06, "loss": 0.4974, "step": 12148 }, { "epoch": 0.5043721036435062, "grad_norm": 2.0003254413604736, "learning_rate": 5.172078604241205e-06, "loss": 0.3271, "step": 12149 }, { "epoch": 0.5044136191677175, "grad_norm": 2.822993040084839, "learning_rate": 5.1714066861524634e-06, "loss": 0.4655, "step": 12150 }, { "epoch": 0.5044551346919288, "grad_norm": 2.2784407138824463, "learning_rate": 5.170734764964641e-06, "loss": 0.4754, "step": 12151 }, { "epoch": 0.5044966502161402, "grad_norm": 2.5968782901763916, "learning_rate": 5.1700628406898835e-06, "loss": 0.6163, "step": 12152 }, { "epoch": 0.5045381657403515, "grad_norm": 2.6661317348480225, "learning_rate": 5.169390913340342e-06, "loss": 0.5952, "step": 12153 }, { "epoch": 0.5045796812645629, "grad_norm": 2.510911703109741, "learning_rate": 5.168718982928163e-06, "loss": 0.5798, "step": 12154 }, { "epoch": 0.5046211967887742, "grad_norm": 2.5490481853485107, "learning_rate": 5.1680470494654965e-06, "loss": 0.6332, "step": 12155 }, { "epoch": 0.5046627123129855, "grad_norm": 2.709815263748169, "learning_rate": 5.167375112964491e-06, "loss": 0.5979, "step": 12156 }, { "epoch": 0.5047042278371968, "grad_norm": 2.310556173324585, "learning_rate": 5.166703173437294e-06, "loss": 0.6753, "step": 12157 }, { "epoch": 0.5047457433614082, "grad_norm": 2.2872684001922607, "learning_rate": 5.166031230896057e-06, "loss": 0.569, "step": 12158 }, { "epoch": 0.5047872588856196, "grad_norm": 2.3589138984680176, "learning_rate": 5.165359285352927e-06, "loss": 0.5363, "step": 12159 }, { "epoch": 0.5048287744098309, "grad_norm": 2.3348755836486816, "learning_rate": 5.164687336820054e-06, "loss": 0.567, "step": 12160 }, { "epoch": 0.5048702899340423, "grad_norm": 2.136474370956421, "learning_rate": 5.164015385309586e-06, "loss": 0.6025, "step": 12161 }, { "epoch": 0.5049118054582535, "grad_norm": 2.3145744800567627, "learning_rate": 5.163343430833673e-06, "loss": 0.5268, "step": 12162 }, { "epoch": 0.5049533209824649, "grad_norm": 2.463387966156006, "learning_rate": 5.1626714734044615e-06, "loss": 0.5175, "step": 12163 }, { "epoch": 0.5049948365066762, "grad_norm": 2.824629545211792, "learning_rate": 5.161999513034105e-06, "loss": 0.6302, "step": 12164 }, { "epoch": 0.5050363520308876, "grad_norm": 2.5201220512390137, "learning_rate": 5.16132754973475e-06, "loss": 0.4212, "step": 12165 }, { "epoch": 0.5050778675550989, "grad_norm": 3.01904296875, "learning_rate": 5.160655583518547e-06, "loss": 0.6105, "step": 12166 }, { "epoch": 0.5051193830793103, "grad_norm": 2.47312068939209, "learning_rate": 5.1599836143976445e-06, "loss": 0.6095, "step": 12167 }, { "epoch": 0.5051608986035215, "grad_norm": 3.3330299854278564, "learning_rate": 5.159311642384192e-06, "loss": 0.4548, "step": 12168 }, { "epoch": 0.5052024141277329, "grad_norm": 2.4521517753601074, "learning_rate": 5.15863966749034e-06, "loss": 0.5382, "step": 12169 }, { "epoch": 0.5052439296519442, "grad_norm": 2.2361128330230713, "learning_rate": 5.157967689728235e-06, "loss": 0.5272, "step": 12170 }, { "epoch": 0.5052854451761556, "grad_norm": 2.44181752204895, "learning_rate": 5.157295709110031e-06, "loss": 0.5144, "step": 12171 }, { "epoch": 0.5053269607003669, "grad_norm": 2.2648022174835205, "learning_rate": 5.156623725647872e-06, "loss": 0.4943, "step": 12172 }, { "epoch": 0.5053684762245783, "grad_norm": 2.873595952987671, "learning_rate": 5.155951739353914e-06, "loss": 0.5343, "step": 12173 }, { "epoch": 0.5054099917487895, "grad_norm": 2.2922329902648926, "learning_rate": 5.155279750240302e-06, "loss": 0.4952, "step": 12174 }, { "epoch": 0.5054515072730009, "grad_norm": 2.514173984527588, "learning_rate": 5.154607758319189e-06, "loss": 0.5196, "step": 12175 }, { "epoch": 0.5054930227972122, "grad_norm": 2.1695642471313477, "learning_rate": 5.153935763602721e-06, "loss": 0.3831, "step": 12176 }, { "epoch": 0.5055345383214236, "grad_norm": 2.8686721324920654, "learning_rate": 5.153263766103051e-06, "loss": 0.5593, "step": 12177 }, { "epoch": 0.5055760538456349, "grad_norm": 2.392073154449463, "learning_rate": 5.152591765832328e-06, "loss": 0.4494, "step": 12178 }, { "epoch": 0.5056175693698463, "grad_norm": 2.2836241722106934, "learning_rate": 5.151919762802703e-06, "loss": 0.6145, "step": 12179 }, { "epoch": 0.5056590848940575, "grad_norm": 2.5166513919830322, "learning_rate": 5.151247757026324e-06, "loss": 0.412, "step": 12180 }, { "epoch": 0.5057006004182689, "grad_norm": 2.7255213260650635, "learning_rate": 5.150575748515341e-06, "loss": 0.6063, "step": 12181 }, { "epoch": 0.5057421159424802, "grad_norm": 2.5050344467163086, "learning_rate": 5.149903737281906e-06, "loss": 0.5815, "step": 12182 }, { "epoch": 0.5057836314666916, "grad_norm": 2.159660816192627, "learning_rate": 5.149231723338168e-06, "loss": 0.471, "step": 12183 }, { "epoch": 0.5058251469909029, "grad_norm": 2.704207181930542, "learning_rate": 5.1485597066962775e-06, "loss": 0.464, "step": 12184 }, { "epoch": 0.5058666625151143, "grad_norm": 2.361013174057007, "learning_rate": 5.147887687368385e-06, "loss": 0.5082, "step": 12185 }, { "epoch": 0.5059081780393255, "grad_norm": 2.314976215362549, "learning_rate": 5.147215665366641e-06, "loss": 0.4154, "step": 12186 }, { "epoch": 0.5059496935635369, "grad_norm": 2.5956082344055176, "learning_rate": 5.146543640703193e-06, "loss": 0.4794, "step": 12187 }, { "epoch": 0.5059912090877482, "grad_norm": 2.349081516265869, "learning_rate": 5.1458716133901955e-06, "loss": 0.5743, "step": 12188 }, { "epoch": 0.5060327246119596, "grad_norm": 1.93596613407135, "learning_rate": 5.1451995834397975e-06, "loss": 0.4878, "step": 12189 }, { "epoch": 0.506074240136171, "grad_norm": 2.2777416706085205, "learning_rate": 5.144527550864148e-06, "loss": 0.5859, "step": 12190 }, { "epoch": 0.5061157556603822, "grad_norm": 2.687055826187134, "learning_rate": 5.143855515675399e-06, "loss": 0.4568, "step": 12191 }, { "epoch": 0.5061572711845936, "grad_norm": 2.098345994949341, "learning_rate": 5.143183477885701e-06, "loss": 0.4842, "step": 12192 }, { "epoch": 0.5061987867088049, "grad_norm": 2.436384677886963, "learning_rate": 5.142511437507206e-06, "loss": 0.406, "step": 12193 }, { "epoch": 0.5062403022330163, "grad_norm": 2.802755355834961, "learning_rate": 5.1418393945520605e-06, "loss": 0.5566, "step": 12194 }, { "epoch": 0.5062818177572276, "grad_norm": 2.745260238647461, "learning_rate": 5.14116734903242e-06, "loss": 0.4503, "step": 12195 }, { "epoch": 0.506323333281439, "grad_norm": 2.6428158283233643, "learning_rate": 5.140495300960432e-06, "loss": 0.4907, "step": 12196 }, { "epoch": 0.5063648488056502, "grad_norm": 2.495185375213623, "learning_rate": 5.139823250348249e-06, "loss": 0.5651, "step": 12197 }, { "epoch": 0.5064063643298616, "grad_norm": 3.3688786029815674, "learning_rate": 5.13915119720802e-06, "loss": 0.4652, "step": 12198 }, { "epoch": 0.5064478798540729, "grad_norm": 2.119962453842163, "learning_rate": 5.138479141551898e-06, "loss": 0.5662, "step": 12199 }, { "epoch": 0.5064893953782843, "grad_norm": 2.3823797702789307, "learning_rate": 5.137807083392034e-06, "loss": 0.4895, "step": 12200 }, { "epoch": 0.5065309109024956, "grad_norm": 2.3957104682922363, "learning_rate": 5.137135022740578e-06, "loss": 0.4047, "step": 12201 }, { "epoch": 0.506572426426707, "grad_norm": 2.5016636848449707, "learning_rate": 5.136462959609681e-06, "loss": 0.4164, "step": 12202 }, { "epoch": 0.5066139419509182, "grad_norm": 2.2785561084747314, "learning_rate": 5.1357908940114945e-06, "loss": 0.4948, "step": 12203 }, { "epoch": 0.5066554574751296, "grad_norm": 2.7226901054382324, "learning_rate": 5.135118825958169e-06, "loss": 0.5334, "step": 12204 }, { "epoch": 0.5066969729993409, "grad_norm": 2.479163885116577, "learning_rate": 5.134446755461857e-06, "loss": 0.4259, "step": 12205 }, { "epoch": 0.5067384885235523, "grad_norm": 2.6339688301086426, "learning_rate": 5.133774682534709e-06, "loss": 0.5952, "step": 12206 }, { "epoch": 0.5067800040477636, "grad_norm": 2.9009969234466553, "learning_rate": 5.133102607188875e-06, "loss": 0.5311, "step": 12207 }, { "epoch": 0.506821519571975, "grad_norm": 2.6170809268951416, "learning_rate": 5.1324305294365086e-06, "loss": 0.6602, "step": 12208 }, { "epoch": 0.5068630350961862, "grad_norm": 2.790785312652588, "learning_rate": 5.131758449289759e-06, "loss": 0.5727, "step": 12209 }, { "epoch": 0.5069045506203976, "grad_norm": 2.217656373977661, "learning_rate": 5.131086366760779e-06, "loss": 0.5932, "step": 12210 }, { "epoch": 0.5069460661446089, "grad_norm": 2.5042951107025146, "learning_rate": 5.13041428186172e-06, "loss": 0.4831, "step": 12211 }, { "epoch": 0.5069875816688203, "grad_norm": 2.0304672718048096, "learning_rate": 5.129742194604733e-06, "loss": 0.5123, "step": 12212 }, { "epoch": 0.5070290971930316, "grad_norm": 2.3604214191436768, "learning_rate": 5.129070105001969e-06, "loss": 0.5753, "step": 12213 }, { "epoch": 0.507070612717243, "grad_norm": 2.3003292083740234, "learning_rate": 5.128398013065582e-06, "loss": 0.5683, "step": 12214 }, { "epoch": 0.5071121282414542, "grad_norm": 2.116083860397339, "learning_rate": 5.12772591880772e-06, "loss": 0.5774, "step": 12215 }, { "epoch": 0.5071536437656656, "grad_norm": 3.075552463531494, "learning_rate": 5.127053822240539e-06, "loss": 0.5426, "step": 12216 }, { "epoch": 0.5071951592898769, "grad_norm": 2.9894070625305176, "learning_rate": 5.126381723376186e-06, "loss": 0.584, "step": 12217 }, { "epoch": 0.5072366748140883, "grad_norm": 2.5928971767425537, "learning_rate": 5.125709622226816e-06, "loss": 0.529, "step": 12218 }, { "epoch": 0.5072781903382996, "grad_norm": 2.2447988986968994, "learning_rate": 5.1250375188045805e-06, "loss": 0.4772, "step": 12219 }, { "epoch": 0.507319705862511, "grad_norm": 3.4862051010131836, "learning_rate": 5.124365413121629e-06, "loss": 0.5042, "step": 12220 }, { "epoch": 0.5073612213867223, "grad_norm": 2.2715811729431152, "learning_rate": 5.123693305190116e-06, "loss": 0.5651, "step": 12221 }, { "epoch": 0.5074027369109336, "grad_norm": 2.3317062854766846, "learning_rate": 5.123021195022192e-06, "loss": 0.6873, "step": 12222 }, { "epoch": 0.507444252435145, "grad_norm": 2.6582579612731934, "learning_rate": 5.1223490826300104e-06, "loss": 0.5764, "step": 12223 }, { "epoch": 0.5074857679593563, "grad_norm": 2.1197192668914795, "learning_rate": 5.121676968025721e-06, "loss": 0.3576, "step": 12224 }, { "epoch": 0.5075272834835677, "grad_norm": 2.313161849975586, "learning_rate": 5.121004851221477e-06, "loss": 0.5885, "step": 12225 }, { "epoch": 0.507568799007779, "grad_norm": 2.1995227336883545, "learning_rate": 5.12033273222943e-06, "loss": 0.4094, "step": 12226 }, { "epoch": 0.5076103145319903, "grad_norm": 2.4129738807678223, "learning_rate": 5.119660611061734e-06, "loss": 0.6152, "step": 12227 }, { "epoch": 0.5076518300562016, "grad_norm": 2.5701589584350586, "learning_rate": 5.118988487730537e-06, "loss": 0.5591, "step": 12228 }, { "epoch": 0.507693345580413, "grad_norm": 2.8136188983917236, "learning_rate": 5.118316362247997e-06, "loss": 0.5567, "step": 12229 }, { "epoch": 0.5077348611046243, "grad_norm": 2.230684757232666, "learning_rate": 5.1176442346262615e-06, "loss": 0.488, "step": 12230 }, { "epoch": 0.5077763766288357, "grad_norm": 2.330824136734009, "learning_rate": 5.1169721048774845e-06, "loss": 0.4138, "step": 12231 }, { "epoch": 0.507817892153047, "grad_norm": 2.01804518699646, "learning_rate": 5.116299973013819e-06, "loss": 0.4948, "step": 12232 }, { "epoch": 0.5078594076772583, "grad_norm": 2.6733601093292236, "learning_rate": 5.115627839047415e-06, "loss": 0.3866, "step": 12233 }, { "epoch": 0.5079009232014696, "grad_norm": 2.2437357902526855, "learning_rate": 5.114955702990427e-06, "loss": 0.5448, "step": 12234 }, { "epoch": 0.507942438725681, "grad_norm": 1.9505236148834229, "learning_rate": 5.114283564855006e-06, "loss": 0.4751, "step": 12235 }, { "epoch": 0.5079839542498923, "grad_norm": 2.846734046936035, "learning_rate": 5.113611424653307e-06, "loss": 0.601, "step": 12236 }, { "epoch": 0.5080254697741037, "grad_norm": 2.2612533569335938, "learning_rate": 5.1129392823974784e-06, "loss": 0.4224, "step": 12237 }, { "epoch": 0.508066985298315, "grad_norm": 2.137277126312256, "learning_rate": 5.1122671380996765e-06, "loss": 0.4843, "step": 12238 }, { "epoch": 0.5081085008225263, "grad_norm": 2.1590771675109863, "learning_rate": 5.111594991772052e-06, "loss": 0.489, "step": 12239 }, { "epoch": 0.5081500163467376, "grad_norm": 2.1884257793426514, "learning_rate": 5.1109228434267585e-06, "loss": 0.5656, "step": 12240 }, { "epoch": 0.508191531870949, "grad_norm": 2.204902410507202, "learning_rate": 5.110250693075947e-06, "loss": 0.427, "step": 12241 }, { "epoch": 0.5082330473951603, "grad_norm": 2.146744966506958, "learning_rate": 5.109578540731772e-06, "loss": 0.3977, "step": 12242 }, { "epoch": 0.5082745629193717, "grad_norm": 2.693610668182373, "learning_rate": 5.108906386406385e-06, "loss": 0.5906, "step": 12243 }, { "epoch": 0.508316078443583, "grad_norm": 2.234358072280884, "learning_rate": 5.10823423011194e-06, "loss": 0.5656, "step": 12244 }, { "epoch": 0.5083575939677943, "grad_norm": 2.3059771060943604, "learning_rate": 5.107562071860589e-06, "loss": 0.4447, "step": 12245 }, { "epoch": 0.5083991094920056, "grad_norm": 2.3407981395721436, "learning_rate": 5.1068899116644835e-06, "loss": 0.455, "step": 12246 }, { "epoch": 0.508440625016217, "grad_norm": 2.3469436168670654, "learning_rate": 5.106217749535778e-06, "loss": 0.5747, "step": 12247 }, { "epoch": 0.5084821405404283, "grad_norm": 2.3307230472564697, "learning_rate": 5.105545585486624e-06, "loss": 0.5426, "step": 12248 }, { "epoch": 0.5085236560646397, "grad_norm": 2.374865770339966, "learning_rate": 5.104873419529177e-06, "loss": 0.4931, "step": 12249 }, { "epoch": 0.508565171588851, "grad_norm": 2.255204916000366, "learning_rate": 5.104201251675588e-06, "loss": 0.5695, "step": 12250 }, { "epoch": 0.5086066871130623, "grad_norm": 2.061511278152466, "learning_rate": 5.1035290819380105e-06, "loss": 0.5372, "step": 12251 }, { "epoch": 0.5086482026372737, "grad_norm": 2.2474303245544434, "learning_rate": 5.102856910328596e-06, "loss": 0.4579, "step": 12252 }, { "epoch": 0.508689718161485, "grad_norm": 2.5606014728546143, "learning_rate": 5.1021847368595e-06, "loss": 0.5484, "step": 12253 }, { "epoch": 0.5087312336856964, "grad_norm": 3.2782671451568604, "learning_rate": 5.101512561542874e-06, "loss": 0.5124, "step": 12254 }, { "epoch": 0.5087727492099077, "grad_norm": 2.815976619720459, "learning_rate": 5.100840384390873e-06, "loss": 0.7279, "step": 12255 }, { "epoch": 0.5088142647341191, "grad_norm": 2.9967668056488037, "learning_rate": 5.100168205415646e-06, "loss": 0.5122, "step": 12256 }, { "epoch": 0.5088557802583303, "grad_norm": 2.3508291244506836, "learning_rate": 5.0994960246293514e-06, "loss": 0.5724, "step": 12257 }, { "epoch": 0.5088972957825417, "grad_norm": 2.5632405281066895, "learning_rate": 5.098823842044141e-06, "loss": 0.5306, "step": 12258 }, { "epoch": 0.508938811306753, "grad_norm": 2.399209976196289, "learning_rate": 5.098151657672164e-06, "loss": 0.4679, "step": 12259 }, { "epoch": 0.5089803268309644, "grad_norm": 2.316682815551758, "learning_rate": 5.0974794715255785e-06, "loss": 0.5357, "step": 12260 }, { "epoch": 0.5090218423551757, "grad_norm": 2.485089063644409, "learning_rate": 5.096807283616535e-06, "loss": 0.5664, "step": 12261 }, { "epoch": 0.5090633578793871, "grad_norm": 2.5020172595977783, "learning_rate": 5.09613509395719e-06, "loss": 0.5163, "step": 12262 }, { "epoch": 0.5091048734035983, "grad_norm": 2.6551871299743652, "learning_rate": 5.095462902559692e-06, "loss": 0.5743, "step": 12263 }, { "epoch": 0.5091463889278097, "grad_norm": 2.577005386352539, "learning_rate": 5.094790709436199e-06, "loss": 0.442, "step": 12264 }, { "epoch": 0.509187904452021, "grad_norm": 2.681398391723633, "learning_rate": 5.094118514598862e-06, "loss": 0.4774, "step": 12265 }, { "epoch": 0.5092294199762324, "grad_norm": 2.0851759910583496, "learning_rate": 5.093446318059836e-06, "loss": 0.514, "step": 12266 }, { "epoch": 0.5092709355004437, "grad_norm": 2.107342481613159, "learning_rate": 5.092774119831272e-06, "loss": 0.3793, "step": 12267 }, { "epoch": 0.509312451024655, "grad_norm": 2.901411771774292, "learning_rate": 5.0921019199253265e-06, "loss": 0.5896, "step": 12268 }, { "epoch": 0.5093539665488663, "grad_norm": 2.4409565925598145, "learning_rate": 5.091429718354152e-06, "loss": 0.6111, "step": 12269 }, { "epoch": 0.5093954820730777, "grad_norm": 2.6729321479797363, "learning_rate": 5.090757515129901e-06, "loss": 0.5841, "step": 12270 }, { "epoch": 0.509436997597289, "grad_norm": 2.1318886280059814, "learning_rate": 5.090085310264728e-06, "loss": 0.4784, "step": 12271 }, { "epoch": 0.5094785131215004, "grad_norm": 2.3118302822113037, "learning_rate": 5.089413103770787e-06, "loss": 0.4672, "step": 12272 }, { "epoch": 0.5095200286457117, "grad_norm": 2.035412311553955, "learning_rate": 5.0887408956602316e-06, "loss": 0.5069, "step": 12273 }, { "epoch": 0.509561544169923, "grad_norm": 2.124131202697754, "learning_rate": 5.0880686859452145e-06, "loss": 0.4648, "step": 12274 }, { "epoch": 0.5096030596941343, "grad_norm": 2.732339382171631, "learning_rate": 5.0873964746378915e-06, "loss": 0.6291, "step": 12275 }, { "epoch": 0.5096445752183457, "grad_norm": 2.2396061420440674, "learning_rate": 5.086724261750413e-06, "loss": 0.4433, "step": 12276 }, { "epoch": 0.509686090742557, "grad_norm": 2.081166982650757, "learning_rate": 5.086052047294936e-06, "loss": 0.4531, "step": 12277 }, { "epoch": 0.5097276062667684, "grad_norm": 2.29291033744812, "learning_rate": 5.085379831283612e-06, "loss": 0.5655, "step": 12278 }, { "epoch": 0.5097691217909797, "grad_norm": 2.6488263607025146, "learning_rate": 5.084707613728598e-06, "loss": 0.5617, "step": 12279 }, { "epoch": 0.509810637315191, "grad_norm": 2.4126241207122803, "learning_rate": 5.0840353946420436e-06, "loss": 0.5782, "step": 12280 }, { "epoch": 0.5098521528394024, "grad_norm": 2.598557233810425, "learning_rate": 5.083363174036108e-06, "loss": 0.5418, "step": 12281 }, { "epoch": 0.5098936683636137, "grad_norm": 2.0886759757995605, "learning_rate": 5.082690951922939e-06, "loss": 0.4567, "step": 12282 }, { "epoch": 0.5099351838878251, "grad_norm": 2.1857707500457764, "learning_rate": 5.082018728314697e-06, "loss": 0.4579, "step": 12283 }, { "epoch": 0.5099766994120364, "grad_norm": 2.8266499042510986, "learning_rate": 5.081346503223531e-06, "loss": 0.4658, "step": 12284 }, { "epoch": 0.5100182149362478, "grad_norm": 2.609884023666382, "learning_rate": 5.080674276661597e-06, "loss": 0.5257, "step": 12285 }, { "epoch": 0.510059730460459, "grad_norm": 2.667332649230957, "learning_rate": 5.080002048641049e-06, "loss": 0.5737, "step": 12286 }, { "epoch": 0.5101012459846704, "grad_norm": 2.0071346759796143, "learning_rate": 5.07932981917404e-06, "loss": 0.4808, "step": 12287 }, { "epoch": 0.5101427615088817, "grad_norm": 2.0371158123016357, "learning_rate": 5.078657588272726e-06, "loss": 0.5329, "step": 12288 }, { "epoch": 0.5101842770330931, "grad_norm": 2.547308921813965, "learning_rate": 5.0779853559492595e-06, "loss": 0.4367, "step": 12289 }, { "epoch": 0.5102257925573044, "grad_norm": 2.6985561847686768, "learning_rate": 5.077313122215796e-06, "loss": 0.3827, "step": 12290 }, { "epoch": 0.5102673080815158, "grad_norm": 2.5759458541870117, "learning_rate": 5.076640887084488e-06, "loss": 0.6572, "step": 12291 }, { "epoch": 0.510308823605727, "grad_norm": 2.601149320602417, "learning_rate": 5.075968650567492e-06, "loss": 0.474, "step": 12292 }, { "epoch": 0.5103503391299384, "grad_norm": 2.471254825592041, "learning_rate": 5.0752964126769585e-06, "loss": 0.5269, "step": 12293 }, { "epoch": 0.5103918546541497, "grad_norm": 2.380232334136963, "learning_rate": 5.074624173425047e-06, "loss": 0.3941, "step": 12294 }, { "epoch": 0.5104333701783611, "grad_norm": 2.84731125831604, "learning_rate": 5.073951932823907e-06, "loss": 0.6379, "step": 12295 }, { "epoch": 0.5104748857025724, "grad_norm": 2.326875925064087, "learning_rate": 5.073279690885697e-06, "loss": 0.498, "step": 12296 }, { "epoch": 0.5105164012267838, "grad_norm": 2.267066240310669, "learning_rate": 5.0726074476225675e-06, "loss": 0.5337, "step": 12297 }, { "epoch": 0.510557916750995, "grad_norm": 2.713376998901367, "learning_rate": 5.071935203046674e-06, "loss": 0.6376, "step": 12298 }, { "epoch": 0.5105994322752064, "grad_norm": 2.0482258796691895, "learning_rate": 5.0712629571701726e-06, "loss": 0.4368, "step": 12299 }, { "epoch": 0.5106409477994177, "grad_norm": 2.205703020095825, "learning_rate": 5.070590710005214e-06, "loss": 0.4712, "step": 12300 }, { "epoch": 0.5106824633236291, "grad_norm": 3.3700928688049316, "learning_rate": 5.069918461563957e-06, "loss": 0.4651, "step": 12301 }, { "epoch": 0.5107239788478404, "grad_norm": 2.6632742881774902, "learning_rate": 5.069246211858553e-06, "loss": 0.484, "step": 12302 }, { "epoch": 0.5107654943720518, "grad_norm": 1.946660041809082, "learning_rate": 5.068573960901159e-06, "loss": 0.3588, "step": 12303 }, { "epoch": 0.510807009896263, "grad_norm": 2.1715030670166016, "learning_rate": 5.067901708703926e-06, "loss": 0.3892, "step": 12304 }, { "epoch": 0.5108485254204744, "grad_norm": 2.5766077041625977, "learning_rate": 5.067229455279013e-06, "loss": 0.5228, "step": 12305 }, { "epoch": 0.5108900409446857, "grad_norm": 2.771909475326538, "learning_rate": 5.06655720063857e-06, "loss": 0.5435, "step": 12306 }, { "epoch": 0.5109315564688971, "grad_norm": 2.424128293991089, "learning_rate": 5.065884944794755e-06, "loss": 0.4793, "step": 12307 }, { "epoch": 0.5109730719931084, "grad_norm": 2.2363977432250977, "learning_rate": 5.065212687759721e-06, "loss": 0.5365, "step": 12308 }, { "epoch": 0.5110145875173198, "grad_norm": 2.9906845092773438, "learning_rate": 5.064540429545621e-06, "loss": 0.4261, "step": 12309 }, { "epoch": 0.511056103041531, "grad_norm": 2.3820712566375732, "learning_rate": 5.063868170164614e-06, "loss": 0.298, "step": 12310 }, { "epoch": 0.5110976185657424, "grad_norm": 2.0717082023620605, "learning_rate": 5.063195909628851e-06, "loss": 0.4549, "step": 12311 }, { "epoch": 0.5111391340899538, "grad_norm": 2.263216257095337, "learning_rate": 5.062523647950487e-06, "loss": 0.4316, "step": 12312 }, { "epoch": 0.5111806496141651, "grad_norm": 2.1911935806274414, "learning_rate": 5.061851385141677e-06, "loss": 0.6075, "step": 12313 }, { "epoch": 0.5112221651383765, "grad_norm": 2.4351181983947754, "learning_rate": 5.061179121214579e-06, "loss": 0.5744, "step": 12314 }, { "epoch": 0.5112636806625878, "grad_norm": 2.411194324493408, "learning_rate": 5.060506856181342e-06, "loss": 0.5945, "step": 12315 }, { "epoch": 0.5113051961867991, "grad_norm": 2.4946560859680176, "learning_rate": 5.059834590054124e-06, "loss": 0.5445, "step": 12316 }, { "epoch": 0.5113467117110104, "grad_norm": 2.2747559547424316, "learning_rate": 5.059162322845081e-06, "loss": 0.5128, "step": 12317 }, { "epoch": 0.5113882272352218, "grad_norm": 2.2174389362335205, "learning_rate": 5.058490054566365e-06, "loss": 0.369, "step": 12318 }, { "epoch": 0.5114297427594331, "grad_norm": 2.5063393115997314, "learning_rate": 5.057817785230131e-06, "loss": 0.4864, "step": 12319 }, { "epoch": 0.5114712582836445, "grad_norm": 2.350147247314453, "learning_rate": 5.057145514848537e-06, "loss": 0.5391, "step": 12320 }, { "epoch": 0.5115127738078558, "grad_norm": 2.6177632808685303, "learning_rate": 5.056473243433735e-06, "loss": 0.5856, "step": 12321 }, { "epoch": 0.5115542893320671, "grad_norm": 3.711949586868286, "learning_rate": 5.055800970997879e-06, "loss": 0.5708, "step": 12322 }, { "epoch": 0.5115958048562784, "grad_norm": 2.2189366817474365, "learning_rate": 5.055128697553128e-06, "loss": 0.4836, "step": 12323 }, { "epoch": 0.5116373203804898, "grad_norm": 2.5646770000457764, "learning_rate": 5.054456423111632e-06, "loss": 0.5925, "step": 12324 }, { "epoch": 0.5116788359047011, "grad_norm": 2.028989553451538, "learning_rate": 5.053784147685549e-06, "loss": 0.6047, "step": 12325 }, { "epoch": 0.5117203514289125, "grad_norm": 2.0773355960845947, "learning_rate": 5.053111871287032e-06, "loss": 0.4891, "step": 12326 }, { "epoch": 0.5117618669531238, "grad_norm": 1.93868088722229, "learning_rate": 5.052439593928239e-06, "loss": 0.4235, "step": 12327 }, { "epoch": 0.5118033824773351, "grad_norm": 2.2046613693237305, "learning_rate": 5.051767315621321e-06, "loss": 0.4278, "step": 12328 }, { "epoch": 0.5118448980015464, "grad_norm": 2.708266258239746, "learning_rate": 5.0510950363784365e-06, "loss": 0.5082, "step": 12329 }, { "epoch": 0.5118864135257578, "grad_norm": 2.482987403869629, "learning_rate": 5.050422756211738e-06, "loss": 0.4147, "step": 12330 }, { "epoch": 0.5119279290499691, "grad_norm": 2.3633716106414795, "learning_rate": 5.049750475133383e-06, "loss": 0.5007, "step": 12331 }, { "epoch": 0.5119694445741805, "grad_norm": 2.8101842403411865, "learning_rate": 5.049078193155523e-06, "loss": 0.6154, "step": 12332 }, { "epoch": 0.5120109600983918, "grad_norm": 1.9599016904830933, "learning_rate": 5.0484059102903174e-06, "loss": 0.5085, "step": 12333 }, { "epoch": 0.5120524756226031, "grad_norm": 2.617492914199829, "learning_rate": 5.047733626549918e-06, "loss": 0.5163, "step": 12334 }, { "epoch": 0.5120939911468144, "grad_norm": 2.504453420639038, "learning_rate": 5.0470613419464795e-06, "loss": 0.4525, "step": 12335 }, { "epoch": 0.5121355066710258, "grad_norm": 2.5265681743621826, "learning_rate": 5.046389056492161e-06, "loss": 0.6654, "step": 12336 }, { "epoch": 0.5121770221952371, "grad_norm": 2.69769287109375, "learning_rate": 5.045716770199114e-06, "loss": 0.4457, "step": 12337 }, { "epoch": 0.5122185377194485, "grad_norm": 2.0297582149505615, "learning_rate": 5.045044483079493e-06, "loss": 0.374, "step": 12338 }, { "epoch": 0.5122600532436598, "grad_norm": 3.3917295932769775, "learning_rate": 5.044372195145455e-06, "loss": 0.4802, "step": 12339 }, { "epoch": 0.5123015687678711, "grad_norm": 2.556816339492798, "learning_rate": 5.0436999064091575e-06, "loss": 0.5733, "step": 12340 }, { "epoch": 0.5123430842920824, "grad_norm": 2.3431828022003174, "learning_rate": 5.043027616882751e-06, "loss": 0.4655, "step": 12341 }, { "epoch": 0.5123845998162938, "grad_norm": 2.374189615249634, "learning_rate": 5.042355326578393e-06, "loss": 0.6045, "step": 12342 }, { "epoch": 0.5124261153405052, "grad_norm": 2.6142640113830566, "learning_rate": 5.041683035508238e-06, "loss": 0.5462, "step": 12343 }, { "epoch": 0.5124676308647165, "grad_norm": 2.8959596157073975, "learning_rate": 5.0410107436844435e-06, "loss": 0.6076, "step": 12344 }, { "epoch": 0.5125091463889279, "grad_norm": 2.642181634902954, "learning_rate": 5.040338451119161e-06, "loss": 0.5343, "step": 12345 }, { "epoch": 0.5125506619131391, "grad_norm": 2.151604175567627, "learning_rate": 5.039666157824549e-06, "loss": 0.6502, "step": 12346 }, { "epoch": 0.5125921774373505, "grad_norm": 2.5346858501434326, "learning_rate": 5.038993863812761e-06, "loss": 0.592, "step": 12347 }, { "epoch": 0.5126336929615618, "grad_norm": 2.3112165927886963, "learning_rate": 5.038321569095954e-06, "loss": 0.539, "step": 12348 }, { "epoch": 0.5126752084857732, "grad_norm": 2.4829087257385254, "learning_rate": 5.037649273686281e-06, "loss": 0.4524, "step": 12349 }, { "epoch": 0.5127167240099845, "grad_norm": 2.445765256881714, "learning_rate": 5.036976977595898e-06, "loss": 0.6129, "step": 12350 }, { "epoch": 0.5127582395341959, "grad_norm": 2.3584463596343994, "learning_rate": 5.036304680836959e-06, "loss": 0.5594, "step": 12351 }, { "epoch": 0.5127997550584071, "grad_norm": 2.235651969909668, "learning_rate": 5.0356323834216225e-06, "loss": 0.558, "step": 12352 }, { "epoch": 0.5128412705826185, "grad_norm": 2.1369049549102783, "learning_rate": 5.034960085362041e-06, "loss": 0.3531, "step": 12353 }, { "epoch": 0.5128827861068298, "grad_norm": 2.161426067352295, "learning_rate": 5.034287786670373e-06, "loss": 0.3284, "step": 12354 }, { "epoch": 0.5129243016310412, "grad_norm": 2.129993438720703, "learning_rate": 5.033615487358771e-06, "loss": 0.6375, "step": 12355 }, { "epoch": 0.5129658171552525, "grad_norm": 3.2157809734344482, "learning_rate": 5.032943187439392e-06, "loss": 0.5395, "step": 12356 }, { "epoch": 0.5130073326794639, "grad_norm": 2.6792426109313965, "learning_rate": 5.03227088692439e-06, "loss": 0.6033, "step": 12357 }, { "epoch": 0.5130488482036751, "grad_norm": 2.3949944972991943, "learning_rate": 5.031598585825921e-06, "loss": 0.6679, "step": 12358 }, { "epoch": 0.5130903637278865, "grad_norm": 1.9186437129974365, "learning_rate": 5.030926284156142e-06, "loss": 0.5008, "step": 12359 }, { "epoch": 0.5131318792520978, "grad_norm": 2.0599961280822754, "learning_rate": 5.030253981927205e-06, "loss": 0.5203, "step": 12360 }, { "epoch": 0.5131733947763092, "grad_norm": 2.448625087738037, "learning_rate": 5.0295816791512685e-06, "loss": 0.5378, "step": 12361 }, { "epoch": 0.5132149103005205, "grad_norm": 2.1825644969940186, "learning_rate": 5.028909375840485e-06, "loss": 0.5257, "step": 12362 }, { "epoch": 0.5132564258247319, "grad_norm": 2.590841293334961, "learning_rate": 5.028237072007014e-06, "loss": 0.5534, "step": 12363 }, { "epoch": 0.5132979413489431, "grad_norm": 2.9027130603790283, "learning_rate": 5.027564767663007e-06, "loss": 0.5682, "step": 12364 }, { "epoch": 0.5133394568731545, "grad_norm": 2.468517541885376, "learning_rate": 5.026892462820621e-06, "loss": 0.6097, "step": 12365 }, { "epoch": 0.5133809723973658, "grad_norm": 2.6226301193237305, "learning_rate": 5.026220157492012e-06, "loss": 0.563, "step": 12366 }, { "epoch": 0.5134224879215772, "grad_norm": 2.5199944972991943, "learning_rate": 5.0255478516893345e-06, "loss": 0.509, "step": 12367 }, { "epoch": 0.5134640034457885, "grad_norm": 2.1383056640625, "learning_rate": 5.024875545424744e-06, "loss": 0.4989, "step": 12368 }, { "epoch": 0.5135055189699999, "grad_norm": 2.135085344314575, "learning_rate": 5.0242032387103974e-06, "loss": 0.5514, "step": 12369 }, { "epoch": 0.5135470344942111, "grad_norm": 2.625755548477173, "learning_rate": 5.0235309315584504e-06, "loss": 0.5049, "step": 12370 }, { "epoch": 0.5135885500184225, "grad_norm": 3.4497487545013428, "learning_rate": 5.022858623981055e-06, "loss": 0.5516, "step": 12371 }, { "epoch": 0.5136300655426338, "grad_norm": 2.3250317573547363, "learning_rate": 5.022186315990371e-06, "loss": 0.5011, "step": 12372 }, { "epoch": 0.5136715810668452, "grad_norm": 2.59967041015625, "learning_rate": 5.021514007598551e-06, "loss": 0.585, "step": 12373 }, { "epoch": 0.5137130965910566, "grad_norm": 2.737259864807129, "learning_rate": 5.020841698817753e-06, "loss": 0.5423, "step": 12374 }, { "epoch": 0.5137546121152679, "grad_norm": 2.094456195831299, "learning_rate": 5.020169389660129e-06, "loss": 0.5939, "step": 12375 }, { "epoch": 0.5137961276394792, "grad_norm": 1.9893609285354614, "learning_rate": 5.019497080137838e-06, "loss": 0.4143, "step": 12376 }, { "epoch": 0.5138376431636905, "grad_norm": 2.3197615146636963, "learning_rate": 5.018824770263033e-06, "loss": 0.4104, "step": 12377 }, { "epoch": 0.5138791586879019, "grad_norm": 1.8291915655136108, "learning_rate": 5.018152460047872e-06, "loss": 0.44, "step": 12378 }, { "epoch": 0.5139206742121132, "grad_norm": 2.536360502243042, "learning_rate": 5.017480149504509e-06, "loss": 0.6249, "step": 12379 }, { "epoch": 0.5139621897363246, "grad_norm": 3.02473783493042, "learning_rate": 5.016807838645099e-06, "loss": 0.5102, "step": 12380 }, { "epoch": 0.5140037052605358, "grad_norm": 2.553004503250122, "learning_rate": 5.016135527481799e-06, "loss": 0.5933, "step": 12381 }, { "epoch": 0.5140452207847472, "grad_norm": 2.736823797225952, "learning_rate": 5.015463216026764e-06, "loss": 0.508, "step": 12382 }, { "epoch": 0.5140867363089585, "grad_norm": 3.001873254776001, "learning_rate": 5.014790904292149e-06, "loss": 0.6341, "step": 12383 }, { "epoch": 0.5141282518331699, "grad_norm": 2.188110828399658, "learning_rate": 5.014118592290112e-06, "loss": 0.4325, "step": 12384 }, { "epoch": 0.5141697673573812, "grad_norm": 2.397369146347046, "learning_rate": 5.013446280032805e-06, "loss": 0.5723, "step": 12385 }, { "epoch": 0.5142112828815926, "grad_norm": 2.48443341255188, "learning_rate": 5.012773967532386e-06, "loss": 0.4492, "step": 12386 }, { "epoch": 0.5142527984058038, "grad_norm": 2.6024863719940186, "learning_rate": 5.01210165480101e-06, "loss": 0.4739, "step": 12387 }, { "epoch": 0.5142943139300152, "grad_norm": 2.047578811645508, "learning_rate": 5.011429341850833e-06, "loss": 0.4281, "step": 12388 }, { "epoch": 0.5143358294542265, "grad_norm": 2.6546437740325928, "learning_rate": 5.01075702869401e-06, "loss": 0.5698, "step": 12389 }, { "epoch": 0.5143773449784379, "grad_norm": 2.5330448150634766, "learning_rate": 5.010084715342695e-06, "loss": 0.4205, "step": 12390 }, { "epoch": 0.5144188605026492, "grad_norm": 2.9659931659698486, "learning_rate": 5.009412401809048e-06, "loss": 0.6077, "step": 12391 }, { "epoch": 0.5144603760268606, "grad_norm": 2.2837045192718506, "learning_rate": 5.008740088105221e-06, "loss": 0.557, "step": 12392 }, { "epoch": 0.5145018915510718, "grad_norm": 2.449406385421753, "learning_rate": 5.00806777424337e-06, "loss": 0.4801, "step": 12393 }, { "epoch": 0.5145434070752832, "grad_norm": 2.1484882831573486, "learning_rate": 5.007395460235654e-06, "loss": 0.4485, "step": 12394 }, { "epoch": 0.5145849225994945, "grad_norm": 2.4481067657470703, "learning_rate": 5.006723146094222e-06, "loss": 0.4357, "step": 12395 }, { "epoch": 0.5146264381237059, "grad_norm": 2.706537961959839, "learning_rate": 5.006050831831236e-06, "loss": 0.5841, "step": 12396 }, { "epoch": 0.5146679536479172, "grad_norm": 1.9774640798568726, "learning_rate": 5.005378517458849e-06, "loss": 0.4155, "step": 12397 }, { "epoch": 0.5147094691721286, "grad_norm": 2.616328716278076, "learning_rate": 5.004706202989217e-06, "loss": 0.4883, "step": 12398 }, { "epoch": 0.5147509846963398, "grad_norm": 2.668455123901367, "learning_rate": 5.004033888434495e-06, "loss": 0.6328, "step": 12399 }, { "epoch": 0.5147925002205512, "grad_norm": 2.250753164291382, "learning_rate": 5.003361573806841e-06, "loss": 0.5339, "step": 12400 }, { "epoch": 0.5148340157447625, "grad_norm": 2.408078670501709, "learning_rate": 5.002689259118406e-06, "loss": 0.561, "step": 12401 }, { "epoch": 0.5148755312689739, "grad_norm": 1.8486753702163696, "learning_rate": 5.0020169443813505e-06, "loss": 0.4125, "step": 12402 }, { "epoch": 0.5149170467931852, "grad_norm": 2.4794626235961914, "learning_rate": 5.0013446296078265e-06, "loss": 0.5384, "step": 12403 }, { "epoch": 0.5149585623173966, "grad_norm": 2.451568126678467, "learning_rate": 5.000672314809991e-06, "loss": 0.4295, "step": 12404 }, { "epoch": 0.515000077841608, "grad_norm": 2.5655856132507324, "learning_rate": 5e-06, "loss": 0.4891, "step": 12405 }, { "epoch": 0.5150415933658192, "grad_norm": 2.2503793239593506, "learning_rate": 4.99932768519001e-06, "loss": 0.5851, "step": 12406 }, { "epoch": 0.5150831088900306, "grad_norm": 2.345543146133423, "learning_rate": 4.998655370392174e-06, "loss": 0.4933, "step": 12407 }, { "epoch": 0.5151246244142419, "grad_norm": 3.2986552715301514, "learning_rate": 4.997983055618653e-06, "loss": 0.6893, "step": 12408 }, { "epoch": 0.5151661399384533, "grad_norm": 3.0796685218811035, "learning_rate": 4.997310740881596e-06, "loss": 0.3892, "step": 12409 }, { "epoch": 0.5152076554626646, "grad_norm": 2.226111650466919, "learning_rate": 4.996638426193161e-06, "loss": 0.5262, "step": 12410 }, { "epoch": 0.515249170986876, "grad_norm": 2.6972131729125977, "learning_rate": 4.995966111565506e-06, "loss": 0.6897, "step": 12411 }, { "epoch": 0.5152906865110872, "grad_norm": 2.383788585662842, "learning_rate": 4.995293797010785e-06, "loss": 0.452, "step": 12412 }, { "epoch": 0.5153322020352986, "grad_norm": 2.3493053913116455, "learning_rate": 4.994621482541153e-06, "loss": 0.5462, "step": 12413 }, { "epoch": 0.5153737175595099, "grad_norm": 2.163971185684204, "learning_rate": 4.993949168168766e-06, "loss": 0.5629, "step": 12414 }, { "epoch": 0.5154152330837213, "grad_norm": 2.167999744415283, "learning_rate": 4.993276853905778e-06, "loss": 0.463, "step": 12415 }, { "epoch": 0.5154567486079326, "grad_norm": 2.3198091983795166, "learning_rate": 4.99260453976435e-06, "loss": 0.5727, "step": 12416 }, { "epoch": 0.515498264132144, "grad_norm": 2.3995249271392822, "learning_rate": 4.991932225756631e-06, "loss": 0.5833, "step": 12417 }, { "epoch": 0.5155397796563552, "grad_norm": 2.4469802379608154, "learning_rate": 4.99125991189478e-06, "loss": 0.5298, "step": 12418 }, { "epoch": 0.5155812951805666, "grad_norm": 2.6115574836730957, "learning_rate": 4.990587598190953e-06, "loss": 0.5334, "step": 12419 }, { "epoch": 0.5156228107047779, "grad_norm": 2.1765966415405273, "learning_rate": 4.989915284657304e-06, "loss": 0.5346, "step": 12420 }, { "epoch": 0.5156643262289893, "grad_norm": 2.134437322616577, "learning_rate": 4.989242971305993e-06, "loss": 0.5483, "step": 12421 }, { "epoch": 0.5157058417532006, "grad_norm": 2.7415430545806885, "learning_rate": 4.9885706581491685e-06, "loss": 0.4911, "step": 12422 }, { "epoch": 0.5157473572774119, "grad_norm": 2.141106128692627, "learning_rate": 4.9878983451989904e-06, "loss": 0.3876, "step": 12423 }, { "epoch": 0.5157888728016232, "grad_norm": 2.419630527496338, "learning_rate": 4.9872260324676146e-06, "loss": 0.4862, "step": 12424 }, { "epoch": 0.5158303883258346, "grad_norm": 2.4630041122436523, "learning_rate": 4.986553719967197e-06, "loss": 0.4336, "step": 12425 }, { "epoch": 0.5158719038500459, "grad_norm": 2.483842134475708, "learning_rate": 4.985881407709891e-06, "loss": 0.4977, "step": 12426 }, { "epoch": 0.5159134193742573, "grad_norm": 2.321744203567505, "learning_rate": 4.985209095707852e-06, "loss": 0.5261, "step": 12427 }, { "epoch": 0.5159549348984686, "grad_norm": 2.426734447479248, "learning_rate": 4.984536783973237e-06, "loss": 0.4069, "step": 12428 }, { "epoch": 0.5159964504226799, "grad_norm": 2.634274959564209, "learning_rate": 4.9838644725182035e-06, "loss": 0.4269, "step": 12429 }, { "epoch": 0.5160379659468912, "grad_norm": 2.08351993560791, "learning_rate": 4.983192161354902e-06, "loss": 0.3993, "step": 12430 }, { "epoch": 0.5160794814711026, "grad_norm": 2.2662858963012695, "learning_rate": 4.982519850495493e-06, "loss": 0.5065, "step": 12431 }, { "epoch": 0.5161209969953139, "grad_norm": 2.580781936645508, "learning_rate": 4.981847539952129e-06, "loss": 0.5223, "step": 12432 }, { "epoch": 0.5161625125195253, "grad_norm": 1.9702208042144775, "learning_rate": 4.981175229736967e-06, "loss": 0.4447, "step": 12433 }, { "epoch": 0.5162040280437366, "grad_norm": 2.8484535217285156, "learning_rate": 4.9805029198621645e-06, "loss": 0.4293, "step": 12434 }, { "epoch": 0.5162455435679479, "grad_norm": 2.0203487873077393, "learning_rate": 4.979830610339872e-06, "loss": 0.3409, "step": 12435 }, { "epoch": 0.5162870590921593, "grad_norm": 1.9738750457763672, "learning_rate": 4.979158301182248e-06, "loss": 0.3855, "step": 12436 }, { "epoch": 0.5163285746163706, "grad_norm": 2.303555965423584, "learning_rate": 4.9784859924014494e-06, "loss": 0.5553, "step": 12437 }, { "epoch": 0.516370090140582, "grad_norm": 2.944948434829712, "learning_rate": 4.97781368400963e-06, "loss": 0.5427, "step": 12438 }, { "epoch": 0.5164116056647933, "grad_norm": 2.805955410003662, "learning_rate": 4.977141376018946e-06, "loss": 0.6328, "step": 12439 }, { "epoch": 0.5164531211890047, "grad_norm": 2.026672124862671, "learning_rate": 4.976469068441551e-06, "loss": 0.5681, "step": 12440 }, { "epoch": 0.5164946367132159, "grad_norm": 2.534736394882202, "learning_rate": 4.9757967612896025e-06, "loss": 0.4919, "step": 12441 }, { "epoch": 0.5165361522374273, "grad_norm": 2.236867904663086, "learning_rate": 4.975124454575257e-06, "loss": 0.6687, "step": 12442 }, { "epoch": 0.5165776677616386, "grad_norm": 2.1684153079986572, "learning_rate": 4.974452148310667e-06, "loss": 0.4214, "step": 12443 }, { "epoch": 0.51661918328585, "grad_norm": 2.0821914672851562, "learning_rate": 4.9737798425079894e-06, "loss": 0.4211, "step": 12444 }, { "epoch": 0.5166606988100613, "grad_norm": 2.2454686164855957, "learning_rate": 4.97310753717938e-06, "loss": 0.4637, "step": 12445 }, { "epoch": 0.5167022143342727, "grad_norm": 2.5782225131988525, "learning_rate": 4.972435232336994e-06, "loss": 0.4823, "step": 12446 }, { "epoch": 0.5167437298584839, "grad_norm": 2.155083179473877, "learning_rate": 4.971762927992989e-06, "loss": 0.4772, "step": 12447 }, { "epoch": 0.5167852453826953, "grad_norm": 2.627906084060669, "learning_rate": 4.971090624159516e-06, "loss": 0.4095, "step": 12448 }, { "epoch": 0.5168267609069066, "grad_norm": 2.321810483932495, "learning_rate": 4.970418320848732e-06, "loss": 0.5692, "step": 12449 }, { "epoch": 0.516868276431118, "grad_norm": 3.2916643619537354, "learning_rate": 4.9697460180727964e-06, "loss": 0.6382, "step": 12450 }, { "epoch": 0.5169097919553293, "grad_norm": 3.0961811542510986, "learning_rate": 4.96907371584386e-06, "loss": 0.6436, "step": 12451 }, { "epoch": 0.5169513074795407, "grad_norm": 2.9611167907714844, "learning_rate": 4.9684014141740805e-06, "loss": 0.4989, "step": 12452 }, { "epoch": 0.5169928230037519, "grad_norm": 2.2874863147735596, "learning_rate": 4.967729113075611e-06, "loss": 0.4167, "step": 12453 }, { "epoch": 0.5170343385279633, "grad_norm": 2.6100523471832275, "learning_rate": 4.967056812560609e-06, "loss": 0.4798, "step": 12454 }, { "epoch": 0.5170758540521746, "grad_norm": 1.9006447792053223, "learning_rate": 4.966384512641231e-06, "loss": 0.4761, "step": 12455 }, { "epoch": 0.517117369576386, "grad_norm": 2.280496835708618, "learning_rate": 4.965712213329629e-06, "loss": 0.4654, "step": 12456 }, { "epoch": 0.5171588851005973, "grad_norm": 2.8868064880371094, "learning_rate": 4.9650399146379594e-06, "loss": 0.5554, "step": 12457 }, { "epoch": 0.5172004006248087, "grad_norm": 2.389927864074707, "learning_rate": 4.964367616578378e-06, "loss": 0.5255, "step": 12458 }, { "epoch": 0.5172419161490199, "grad_norm": 1.702864170074463, "learning_rate": 4.963695319163041e-06, "loss": 0.506, "step": 12459 }, { "epoch": 0.5172834316732313, "grad_norm": 2.3022544384002686, "learning_rate": 4.9630230224041055e-06, "loss": 0.503, "step": 12460 }, { "epoch": 0.5173249471974426, "grad_norm": 2.2029733657836914, "learning_rate": 4.962350726313722e-06, "loss": 0.4902, "step": 12461 }, { "epoch": 0.517366462721654, "grad_norm": 2.4102492332458496, "learning_rate": 4.961678430904048e-06, "loss": 0.7027, "step": 12462 }, { "epoch": 0.5174079782458653, "grad_norm": 2.1404571533203125, "learning_rate": 4.96100613618724e-06, "loss": 0.5413, "step": 12463 }, { "epoch": 0.5174494937700767, "grad_norm": 2.4813232421875, "learning_rate": 4.960333842175453e-06, "loss": 0.5191, "step": 12464 }, { "epoch": 0.5174910092942879, "grad_norm": 2.595398426055908, "learning_rate": 4.9596615488808406e-06, "loss": 0.4937, "step": 12465 }, { "epoch": 0.5175325248184993, "grad_norm": 2.4252898693084717, "learning_rate": 4.958989256315558e-06, "loss": 0.5732, "step": 12466 }, { "epoch": 0.5175740403427107, "grad_norm": 2.879880428314209, "learning_rate": 4.9583169644917625e-06, "loss": 0.5754, "step": 12467 }, { "epoch": 0.517615555866922, "grad_norm": 2.4022274017333984, "learning_rate": 4.957644673421609e-06, "loss": 0.5656, "step": 12468 }, { "epoch": 0.5176570713911334, "grad_norm": 2.731942892074585, "learning_rate": 4.956972383117251e-06, "loss": 0.393, "step": 12469 }, { "epoch": 0.5176985869153446, "grad_norm": 2.639345169067383, "learning_rate": 4.956300093590844e-06, "loss": 0.7051, "step": 12470 }, { "epoch": 0.517740102439556, "grad_norm": 3.1528825759887695, "learning_rate": 4.9556278048545445e-06, "loss": 0.4239, "step": 12471 }, { "epoch": 0.5177816179637673, "grad_norm": 2.3846352100372314, "learning_rate": 4.954955516920507e-06, "loss": 0.5726, "step": 12472 }, { "epoch": 0.5178231334879787, "grad_norm": 2.161134958267212, "learning_rate": 4.954283229800889e-06, "loss": 0.4937, "step": 12473 }, { "epoch": 0.51786464901219, "grad_norm": 2.546475887298584, "learning_rate": 4.953610943507841e-06, "loss": 0.6021, "step": 12474 }, { "epoch": 0.5179061645364014, "grad_norm": 3.017087459564209, "learning_rate": 4.952938658053521e-06, "loss": 0.5044, "step": 12475 }, { "epoch": 0.5179476800606126, "grad_norm": 3.064086675643921, "learning_rate": 4.9522663734500835e-06, "loss": 0.5465, "step": 12476 }, { "epoch": 0.517989195584824, "grad_norm": 2.3633365631103516, "learning_rate": 4.951594089709685e-06, "loss": 0.605, "step": 12477 }, { "epoch": 0.5180307111090353, "grad_norm": 2.595212936401367, "learning_rate": 4.950921806844478e-06, "loss": 0.6307, "step": 12478 }, { "epoch": 0.5180722266332467, "grad_norm": 2.1000618934631348, "learning_rate": 4.950249524866619e-06, "loss": 0.4295, "step": 12479 }, { "epoch": 0.518113742157458, "grad_norm": 2.0094962120056152, "learning_rate": 4.949577243788262e-06, "loss": 0.3561, "step": 12480 }, { "epoch": 0.5181552576816694, "grad_norm": 2.814847230911255, "learning_rate": 4.948904963621566e-06, "loss": 0.4495, "step": 12481 }, { "epoch": 0.5181967732058806, "grad_norm": 2.4386491775512695, "learning_rate": 4.94823268437868e-06, "loss": 0.6215, "step": 12482 }, { "epoch": 0.518238288730092, "grad_norm": 2.216768503189087, "learning_rate": 4.947560406071763e-06, "loss": 0.4286, "step": 12483 }, { "epoch": 0.5182798042543033, "grad_norm": 2.7175450325012207, "learning_rate": 4.946888128712968e-06, "loss": 0.5598, "step": 12484 }, { "epoch": 0.5183213197785147, "grad_norm": 2.983457565307617, "learning_rate": 4.946215852314454e-06, "loss": 0.5828, "step": 12485 }, { "epoch": 0.518362835302726, "grad_norm": 2.5632591247558594, "learning_rate": 4.94554357688837e-06, "loss": 0.5823, "step": 12486 }, { "epoch": 0.5184043508269374, "grad_norm": 3.008821487426758, "learning_rate": 4.944871302446874e-06, "loss": 0.595, "step": 12487 }, { "epoch": 0.5184458663511486, "grad_norm": 2.282970666885376, "learning_rate": 4.944199029002122e-06, "loss": 0.499, "step": 12488 }, { "epoch": 0.51848738187536, "grad_norm": 2.6811211109161377, "learning_rate": 4.943526756566266e-06, "loss": 0.3863, "step": 12489 }, { "epoch": 0.5185288973995713, "grad_norm": 2.5193581581115723, "learning_rate": 4.942854485151464e-06, "loss": 0.5155, "step": 12490 }, { "epoch": 0.5185704129237827, "grad_norm": 2.3092265129089355, "learning_rate": 4.94218221476987e-06, "loss": 0.5443, "step": 12491 }, { "epoch": 0.518611928447994, "grad_norm": 2.1265408992767334, "learning_rate": 4.941509945433636e-06, "loss": 0.5313, "step": 12492 }, { "epoch": 0.5186534439722054, "grad_norm": 2.370910167694092, "learning_rate": 4.94083767715492e-06, "loss": 0.497, "step": 12493 }, { "epoch": 0.5186949594964166, "grad_norm": 2.251255750656128, "learning_rate": 4.940165409945877e-06, "loss": 0.4863, "step": 12494 }, { "epoch": 0.518736475020628, "grad_norm": 2.2081589698791504, "learning_rate": 4.939493143818659e-06, "loss": 0.487, "step": 12495 }, { "epoch": 0.5187779905448393, "grad_norm": 2.1074483394622803, "learning_rate": 4.938820878785423e-06, "loss": 0.5175, "step": 12496 }, { "epoch": 0.5188195060690507, "grad_norm": 2.6022844314575195, "learning_rate": 4.938148614858323e-06, "loss": 0.6883, "step": 12497 }, { "epoch": 0.5188610215932621, "grad_norm": 2.6685469150543213, "learning_rate": 4.937476352049515e-06, "loss": 0.2876, "step": 12498 }, { "epoch": 0.5189025371174734, "grad_norm": 2.2586445808410645, "learning_rate": 4.936804090371151e-06, "loss": 0.4372, "step": 12499 }, { "epoch": 0.5189440526416847, "grad_norm": 3.1395394802093506, "learning_rate": 4.936131829835388e-06, "loss": 0.6493, "step": 12500 }, { "epoch": 0.518985568165896, "grad_norm": 2.6030917167663574, "learning_rate": 4.93545957045438e-06, "loss": 0.4355, "step": 12501 }, { "epoch": 0.5190270836901074, "grad_norm": 2.0150487422943115, "learning_rate": 4.934787312240281e-06, "loss": 0.4139, "step": 12502 }, { "epoch": 0.5190685992143187, "grad_norm": 2.804037094116211, "learning_rate": 4.934115055205247e-06, "loss": 0.4906, "step": 12503 }, { "epoch": 0.5191101147385301, "grad_norm": 2.883758068084717, "learning_rate": 4.933442799361432e-06, "loss": 0.5963, "step": 12504 }, { "epoch": 0.5191516302627414, "grad_norm": 2.777674913406372, "learning_rate": 4.932770544720989e-06, "loss": 0.5548, "step": 12505 }, { "epoch": 0.5191931457869527, "grad_norm": 2.423151731491089, "learning_rate": 4.932098291296074e-06, "loss": 0.5777, "step": 12506 }, { "epoch": 0.519234661311164, "grad_norm": 3.0850741863250732, "learning_rate": 4.931426039098844e-06, "loss": 0.6016, "step": 12507 }, { "epoch": 0.5192761768353754, "grad_norm": 2.19273042678833, "learning_rate": 4.9307537881414485e-06, "loss": 0.5929, "step": 12508 }, { "epoch": 0.5193176923595867, "grad_norm": 2.4826228618621826, "learning_rate": 4.930081538436044e-06, "loss": 0.401, "step": 12509 }, { "epoch": 0.5193592078837981, "grad_norm": 2.8057820796966553, "learning_rate": 4.929409289994787e-06, "loss": 0.5251, "step": 12510 }, { "epoch": 0.5194007234080094, "grad_norm": 2.558907985687256, "learning_rate": 4.928737042829831e-06, "loss": 0.4343, "step": 12511 }, { "epoch": 0.5194422389322207, "grad_norm": 2.2058346271514893, "learning_rate": 4.928064796953328e-06, "loss": 0.4389, "step": 12512 }, { "epoch": 0.519483754456432, "grad_norm": 2.509580135345459, "learning_rate": 4.927392552377434e-06, "loss": 0.5243, "step": 12513 }, { "epoch": 0.5195252699806434, "grad_norm": 2.8282082080841064, "learning_rate": 4.926720309114306e-06, "loss": 0.5594, "step": 12514 }, { "epoch": 0.5195667855048547, "grad_norm": 2.825155735015869, "learning_rate": 4.926048067176093e-06, "loss": 0.5111, "step": 12515 }, { "epoch": 0.5196083010290661, "grad_norm": 3.4953064918518066, "learning_rate": 4.925375826574955e-06, "loss": 0.6241, "step": 12516 }, { "epoch": 0.5196498165532774, "grad_norm": 2.1607508659362793, "learning_rate": 4.924703587323042e-06, "loss": 0.4234, "step": 12517 }, { "epoch": 0.5196913320774887, "grad_norm": 2.0591020584106445, "learning_rate": 4.92403134943251e-06, "loss": 0.4558, "step": 12518 }, { "epoch": 0.5197328476017, "grad_norm": 1.6926772594451904, "learning_rate": 4.923359112915512e-06, "loss": 0.3902, "step": 12519 }, { "epoch": 0.5197743631259114, "grad_norm": 2.2055773735046387, "learning_rate": 4.922686877784206e-06, "loss": 0.5718, "step": 12520 }, { "epoch": 0.5198158786501227, "grad_norm": 2.4122109413146973, "learning_rate": 4.922014644050742e-06, "loss": 0.4831, "step": 12521 }, { "epoch": 0.5198573941743341, "grad_norm": 2.571240186691284, "learning_rate": 4.921342411727275e-06, "loss": 0.6117, "step": 12522 }, { "epoch": 0.5198989096985454, "grad_norm": 2.2773447036743164, "learning_rate": 4.9206701808259605e-06, "loss": 0.4855, "step": 12523 }, { "epoch": 0.5199404252227567, "grad_norm": 2.968759059906006, "learning_rate": 4.919997951358953e-06, "loss": 0.5948, "step": 12524 }, { "epoch": 0.519981940746968, "grad_norm": 2.1857008934020996, "learning_rate": 4.919325723338405e-06, "loss": 0.4681, "step": 12525 }, { "epoch": 0.5200234562711794, "grad_norm": 2.6747682094573975, "learning_rate": 4.91865349677647e-06, "loss": 0.5601, "step": 12526 }, { "epoch": 0.5200649717953907, "grad_norm": 2.5868494510650635, "learning_rate": 4.917981271685304e-06, "loss": 0.4395, "step": 12527 }, { "epoch": 0.5201064873196021, "grad_norm": 2.5753915309906006, "learning_rate": 4.917309048077061e-06, "loss": 0.4802, "step": 12528 }, { "epoch": 0.5201480028438135, "grad_norm": 2.246565341949463, "learning_rate": 4.9166368259638936e-06, "loss": 0.5681, "step": 12529 }, { "epoch": 0.5201895183680247, "grad_norm": 2.0936033725738525, "learning_rate": 4.915964605357957e-06, "loss": 0.5064, "step": 12530 }, { "epoch": 0.5202310338922361, "grad_norm": 2.159961462020874, "learning_rate": 4.915292386271403e-06, "loss": 0.5248, "step": 12531 }, { "epoch": 0.5202725494164474, "grad_norm": 2.0310187339782715, "learning_rate": 4.914620168716388e-06, "loss": 0.4378, "step": 12532 }, { "epoch": 0.5203140649406588, "grad_norm": 2.4568731784820557, "learning_rate": 4.913947952705067e-06, "loss": 0.5126, "step": 12533 }, { "epoch": 0.5203555804648701, "grad_norm": 2.502087116241455, "learning_rate": 4.913275738249589e-06, "loss": 0.6188, "step": 12534 }, { "epoch": 0.5203970959890815, "grad_norm": 2.832298994064331, "learning_rate": 4.912603525362111e-06, "loss": 0.5632, "step": 12535 }, { "epoch": 0.5204386115132927, "grad_norm": 2.547950267791748, "learning_rate": 4.911931314054786e-06, "loss": 0.474, "step": 12536 }, { "epoch": 0.5204801270375041, "grad_norm": 2.4338266849517822, "learning_rate": 4.911259104339771e-06, "loss": 0.4964, "step": 12537 }, { "epoch": 0.5205216425617154, "grad_norm": 2.3182449340820312, "learning_rate": 4.910586896229215e-06, "loss": 0.5903, "step": 12538 }, { "epoch": 0.5205631580859268, "grad_norm": 2.6466732025146484, "learning_rate": 4.9099146897352725e-06, "loss": 0.597, "step": 12539 }, { "epoch": 0.5206046736101381, "grad_norm": 2.240262031555176, "learning_rate": 4.9092424848701e-06, "loss": 0.3502, "step": 12540 }, { "epoch": 0.5206461891343495, "grad_norm": 2.638964891433716, "learning_rate": 4.908570281645849e-06, "loss": 0.4876, "step": 12541 }, { "epoch": 0.5206877046585607, "grad_norm": 2.812840223312378, "learning_rate": 4.907898080074674e-06, "loss": 0.5017, "step": 12542 }, { "epoch": 0.5207292201827721, "grad_norm": 2.308095932006836, "learning_rate": 4.90722588016873e-06, "loss": 0.4841, "step": 12543 }, { "epoch": 0.5207707357069834, "grad_norm": 2.1149587631225586, "learning_rate": 4.906553681940166e-06, "loss": 0.5518, "step": 12544 }, { "epoch": 0.5208122512311948, "grad_norm": 2.395515203475952, "learning_rate": 4.905881485401138e-06, "loss": 0.5402, "step": 12545 }, { "epoch": 0.5208537667554061, "grad_norm": 2.1468746662139893, "learning_rate": 4.9052092905638034e-06, "loss": 0.4683, "step": 12546 }, { "epoch": 0.5208952822796175, "grad_norm": 2.688926935195923, "learning_rate": 4.904537097440309e-06, "loss": 0.5958, "step": 12547 }, { "epoch": 0.5209367978038287, "grad_norm": 2.2346272468566895, "learning_rate": 4.9038649060428126e-06, "loss": 0.5448, "step": 12548 }, { "epoch": 0.5209783133280401, "grad_norm": 2.900588035583496, "learning_rate": 4.9031927163834655e-06, "loss": 0.5689, "step": 12549 }, { "epoch": 0.5210198288522514, "grad_norm": 2.6760199069976807, "learning_rate": 4.902520528474424e-06, "loss": 0.5508, "step": 12550 }, { "epoch": 0.5210613443764628, "grad_norm": 2.406142234802246, "learning_rate": 4.901848342327837e-06, "loss": 0.4844, "step": 12551 }, { "epoch": 0.5211028599006741, "grad_norm": 2.2443854808807373, "learning_rate": 4.901176157955862e-06, "loss": 0.5431, "step": 12552 }, { "epoch": 0.5211443754248855, "grad_norm": 2.1206703186035156, "learning_rate": 4.900503975370649e-06, "loss": 0.5462, "step": 12553 }, { "epoch": 0.5211858909490967, "grad_norm": 2.475069761276245, "learning_rate": 4.899831794584354e-06, "loss": 0.5487, "step": 12554 }, { "epoch": 0.5212274064733081, "grad_norm": 2.332348108291626, "learning_rate": 4.89915961560913e-06, "loss": 0.5005, "step": 12555 }, { "epoch": 0.5212689219975194, "grad_norm": 2.583423614501953, "learning_rate": 4.898487438457128e-06, "loss": 0.5038, "step": 12556 }, { "epoch": 0.5213104375217308, "grad_norm": 2.4988667964935303, "learning_rate": 4.8978152631405015e-06, "loss": 0.5665, "step": 12557 }, { "epoch": 0.5213519530459421, "grad_norm": 2.627227306365967, "learning_rate": 4.897143089671405e-06, "loss": 0.6094, "step": 12558 }, { "epoch": 0.5213934685701535, "grad_norm": 2.625955581665039, "learning_rate": 4.896470918061992e-06, "loss": 0.4603, "step": 12559 }, { "epoch": 0.5214349840943648, "grad_norm": 2.9022791385650635, "learning_rate": 4.895798748324414e-06, "loss": 0.5339, "step": 12560 }, { "epoch": 0.5214764996185761, "grad_norm": 2.084300994873047, "learning_rate": 4.895126580470824e-06, "loss": 0.513, "step": 12561 }, { "epoch": 0.5215180151427875, "grad_norm": 2.4719910621643066, "learning_rate": 4.894454414513376e-06, "loss": 0.6449, "step": 12562 }, { "epoch": 0.5215595306669988, "grad_norm": 2.2876079082489014, "learning_rate": 4.893782250464224e-06, "loss": 0.3277, "step": 12563 }, { "epoch": 0.5216010461912102, "grad_norm": 2.388864517211914, "learning_rate": 4.893110088335519e-06, "loss": 0.5703, "step": 12564 }, { "epoch": 0.5216425617154214, "grad_norm": 2.44787859916687, "learning_rate": 4.892437928139413e-06, "loss": 0.5764, "step": 12565 }, { "epoch": 0.5216840772396328, "grad_norm": 2.2605834007263184, "learning_rate": 4.891765769888062e-06, "loss": 0.5968, "step": 12566 }, { "epoch": 0.5217255927638441, "grad_norm": 2.43312668800354, "learning_rate": 4.891093613593615e-06, "loss": 0.4343, "step": 12567 }, { "epoch": 0.5217671082880555, "grad_norm": 2.268153429031372, "learning_rate": 4.89042145926823e-06, "loss": 0.5253, "step": 12568 }, { "epoch": 0.5218086238122668, "grad_norm": 2.4505579471588135, "learning_rate": 4.889749306924054e-06, "loss": 0.5037, "step": 12569 }, { "epoch": 0.5218501393364782, "grad_norm": 2.147338390350342, "learning_rate": 4.889077156573242e-06, "loss": 0.4117, "step": 12570 }, { "epoch": 0.5218916548606894, "grad_norm": 2.376837968826294, "learning_rate": 4.888405008227948e-06, "loss": 0.5537, "step": 12571 }, { "epoch": 0.5219331703849008, "grad_norm": 2.6036884784698486, "learning_rate": 4.887732861900325e-06, "loss": 0.6713, "step": 12572 }, { "epoch": 0.5219746859091121, "grad_norm": 2.685518980026245, "learning_rate": 4.887060717602522e-06, "loss": 0.4425, "step": 12573 }, { "epoch": 0.5220162014333235, "grad_norm": 2.3464395999908447, "learning_rate": 4.886388575346694e-06, "loss": 0.5405, "step": 12574 }, { "epoch": 0.5220577169575348, "grad_norm": 3.3372886180877686, "learning_rate": 4.8857164351449935e-06, "loss": 0.6544, "step": 12575 }, { "epoch": 0.5220992324817462, "grad_norm": 2.221935272216797, "learning_rate": 4.885044297009575e-06, "loss": 0.6032, "step": 12576 }, { "epoch": 0.5221407480059574, "grad_norm": 2.203054904937744, "learning_rate": 4.884372160952587e-06, "loss": 0.5768, "step": 12577 }, { "epoch": 0.5221822635301688, "grad_norm": 2.3146049976348877, "learning_rate": 4.883700026986183e-06, "loss": 0.5559, "step": 12578 }, { "epoch": 0.5222237790543801, "grad_norm": 2.5314013957977295, "learning_rate": 4.883027895122517e-06, "loss": 0.7044, "step": 12579 }, { "epoch": 0.5222652945785915, "grad_norm": 2.8056209087371826, "learning_rate": 4.882355765373739e-06, "loss": 0.5283, "step": 12580 }, { "epoch": 0.5223068101028028, "grad_norm": 2.6646878719329834, "learning_rate": 4.881683637752005e-06, "loss": 0.6416, "step": 12581 }, { "epoch": 0.5223483256270142, "grad_norm": 2.421438217163086, "learning_rate": 4.881011512269464e-06, "loss": 0.4978, "step": 12582 }, { "epoch": 0.5223898411512254, "grad_norm": 1.9307612180709839, "learning_rate": 4.8803393889382675e-06, "loss": 0.4886, "step": 12583 }, { "epoch": 0.5224313566754368, "grad_norm": 2.269153118133545, "learning_rate": 4.8796672677705704e-06, "loss": 0.4865, "step": 12584 }, { "epoch": 0.5224728721996481, "grad_norm": 2.704496383666992, "learning_rate": 4.878995148778525e-06, "loss": 0.5922, "step": 12585 }, { "epoch": 0.5225143877238595, "grad_norm": 2.7563891410827637, "learning_rate": 4.878323031974282e-06, "loss": 0.5641, "step": 12586 }, { "epoch": 0.5225559032480708, "grad_norm": 2.8767237663269043, "learning_rate": 4.877650917369991e-06, "loss": 0.5378, "step": 12587 }, { "epoch": 0.5225974187722822, "grad_norm": 2.373784303665161, "learning_rate": 4.876978804977808e-06, "loss": 0.509, "step": 12588 }, { "epoch": 0.5226389342964934, "grad_norm": 2.5348174571990967, "learning_rate": 4.876306694809886e-06, "loss": 0.4089, "step": 12589 }, { "epoch": 0.5226804498207048, "grad_norm": 2.8821399211883545, "learning_rate": 4.875634586878373e-06, "loss": 0.4888, "step": 12590 }, { "epoch": 0.5227219653449162, "grad_norm": 2.8420863151550293, "learning_rate": 4.874962481195421e-06, "loss": 0.6335, "step": 12591 }, { "epoch": 0.5227634808691275, "grad_norm": 3.284379482269287, "learning_rate": 4.8742903777731854e-06, "loss": 0.711, "step": 12592 }, { "epoch": 0.5228049963933389, "grad_norm": 2.1861979961395264, "learning_rate": 4.873618276623814e-06, "loss": 0.4174, "step": 12593 }, { "epoch": 0.5228465119175502, "grad_norm": 2.975477457046509, "learning_rate": 4.872946177759464e-06, "loss": 0.729, "step": 12594 }, { "epoch": 0.5228880274417615, "grad_norm": 3.125633716583252, "learning_rate": 4.872274081192281e-06, "loss": 0.5501, "step": 12595 }, { "epoch": 0.5229295429659728, "grad_norm": 2.5363121032714844, "learning_rate": 4.871601986934419e-06, "loss": 0.4869, "step": 12596 }, { "epoch": 0.5229710584901842, "grad_norm": 2.849388837814331, "learning_rate": 4.8709298949980314e-06, "loss": 0.574, "step": 12597 }, { "epoch": 0.5230125740143955, "grad_norm": 2.657114028930664, "learning_rate": 4.8702578053952695e-06, "loss": 0.4588, "step": 12598 }, { "epoch": 0.5230540895386069, "grad_norm": 2.375673770904541, "learning_rate": 4.869585718138282e-06, "loss": 0.4998, "step": 12599 }, { "epoch": 0.5230956050628182, "grad_norm": 2.3780839443206787, "learning_rate": 4.868913633239222e-06, "loss": 0.5865, "step": 12600 }, { "epoch": 0.5231371205870295, "grad_norm": 2.170848846435547, "learning_rate": 4.868241550710242e-06, "loss": 0.4755, "step": 12601 }, { "epoch": 0.5231786361112408, "grad_norm": 2.1765429973602295, "learning_rate": 4.867569470563495e-06, "loss": 0.5252, "step": 12602 }, { "epoch": 0.5232201516354522, "grad_norm": 2.6355674266815186, "learning_rate": 4.866897392811127e-06, "loss": 0.5668, "step": 12603 }, { "epoch": 0.5232616671596635, "grad_norm": 2.877995729446411, "learning_rate": 4.866225317465293e-06, "loss": 0.5352, "step": 12604 }, { "epoch": 0.5233031826838749, "grad_norm": 2.4488351345062256, "learning_rate": 4.8655532445381444e-06, "loss": 0.695, "step": 12605 }, { "epoch": 0.5233446982080862, "grad_norm": 2.9371981620788574, "learning_rate": 4.864881174041832e-06, "loss": 0.4771, "step": 12606 }, { "epoch": 0.5233862137322975, "grad_norm": 2.0680532455444336, "learning_rate": 4.864209105988508e-06, "loss": 0.5381, "step": 12607 }, { "epoch": 0.5234277292565088, "grad_norm": 2.5456953048706055, "learning_rate": 4.863537040390321e-06, "loss": 0.4463, "step": 12608 }, { "epoch": 0.5234692447807202, "grad_norm": 2.3916025161743164, "learning_rate": 4.862864977259423e-06, "loss": 0.4871, "step": 12609 }, { "epoch": 0.5235107603049315, "grad_norm": 2.6840689182281494, "learning_rate": 4.862192916607966e-06, "loss": 0.6633, "step": 12610 }, { "epoch": 0.5235522758291429, "grad_norm": 2.2558374404907227, "learning_rate": 4.861520858448103e-06, "loss": 0.5722, "step": 12611 }, { "epoch": 0.5235937913533542, "grad_norm": 2.6149091720581055, "learning_rate": 4.860848802791981e-06, "loss": 0.4424, "step": 12612 }, { "epoch": 0.5236353068775655, "grad_norm": 2.6673953533172607, "learning_rate": 4.860176749651753e-06, "loss": 0.5451, "step": 12613 }, { "epoch": 0.5236768224017768, "grad_norm": 2.7989602088928223, "learning_rate": 4.859504699039569e-06, "loss": 0.5472, "step": 12614 }, { "epoch": 0.5237183379259882, "grad_norm": 1.9732757806777954, "learning_rate": 4.858832650967582e-06, "loss": 0.4934, "step": 12615 }, { "epoch": 0.5237598534501995, "grad_norm": 2.265233039855957, "learning_rate": 4.85816060544794e-06, "loss": 0.5299, "step": 12616 }, { "epoch": 0.5238013689744109, "grad_norm": 2.668604612350464, "learning_rate": 4.857488562492796e-06, "loss": 0.5179, "step": 12617 }, { "epoch": 0.5238428844986222, "grad_norm": 2.5849452018737793, "learning_rate": 4.8568165221143e-06, "loss": 0.5252, "step": 12618 }, { "epoch": 0.5238844000228335, "grad_norm": 2.3087668418884277, "learning_rate": 4.856144484324602e-06, "loss": 0.4924, "step": 12619 }, { "epoch": 0.5239259155470448, "grad_norm": 2.5777480602264404, "learning_rate": 4.855472449135854e-06, "loss": 0.4109, "step": 12620 }, { "epoch": 0.5239674310712562, "grad_norm": 2.25913143157959, "learning_rate": 4.854800416560205e-06, "loss": 0.4045, "step": 12621 }, { "epoch": 0.5240089465954676, "grad_norm": 2.52300763130188, "learning_rate": 4.854128386609805e-06, "loss": 0.4945, "step": 12622 }, { "epoch": 0.5240504621196789, "grad_norm": 2.319417715072632, "learning_rate": 4.853456359296807e-06, "loss": 0.4251, "step": 12623 }, { "epoch": 0.5240919776438903, "grad_norm": 2.2117083072662354, "learning_rate": 4.852784334633363e-06, "loss": 0.4833, "step": 12624 }, { "epoch": 0.5241334931681015, "grad_norm": 2.5080513954162598, "learning_rate": 4.852112312631617e-06, "loss": 0.4593, "step": 12625 }, { "epoch": 0.5241750086923129, "grad_norm": 2.2160630226135254, "learning_rate": 4.851440293303723e-06, "loss": 0.413, "step": 12626 }, { "epoch": 0.5242165242165242, "grad_norm": 2.7983510494232178, "learning_rate": 4.8507682766618325e-06, "loss": 0.6504, "step": 12627 }, { "epoch": 0.5242580397407356, "grad_norm": 2.512641191482544, "learning_rate": 4.850096262718096e-06, "loss": 0.4297, "step": 12628 }, { "epoch": 0.5242995552649469, "grad_norm": 3.0075221061706543, "learning_rate": 4.84942425148466e-06, "loss": 0.4247, "step": 12629 }, { "epoch": 0.5243410707891583, "grad_norm": 2.269965887069702, "learning_rate": 4.848752242973677e-06, "loss": 0.4021, "step": 12630 }, { "epoch": 0.5243825863133695, "grad_norm": 2.6183066368103027, "learning_rate": 4.848080237197299e-06, "loss": 0.4998, "step": 12631 }, { "epoch": 0.5244241018375809, "grad_norm": 2.6024587154388428, "learning_rate": 4.847408234167673e-06, "loss": 0.5111, "step": 12632 }, { "epoch": 0.5244656173617922, "grad_norm": 2.2132010459899902, "learning_rate": 4.846736233896951e-06, "loss": 0.4679, "step": 12633 }, { "epoch": 0.5245071328860036, "grad_norm": 3.339311361312866, "learning_rate": 4.846064236397281e-06, "loss": 0.5326, "step": 12634 }, { "epoch": 0.5245486484102149, "grad_norm": 2.626325845718384, "learning_rate": 4.845392241680813e-06, "loss": 0.3741, "step": 12635 }, { "epoch": 0.5245901639344263, "grad_norm": 2.465534210205078, "learning_rate": 4.8447202497596975e-06, "loss": 0.402, "step": 12636 }, { "epoch": 0.5246316794586375, "grad_norm": 2.2588295936584473, "learning_rate": 4.844048260646088e-06, "loss": 0.428, "step": 12637 }, { "epoch": 0.5246731949828489, "grad_norm": 2.254300117492676, "learning_rate": 4.843376274352129e-06, "loss": 0.6036, "step": 12638 }, { "epoch": 0.5247147105070602, "grad_norm": 2.6135053634643555, "learning_rate": 4.842704290889971e-06, "loss": 0.6651, "step": 12639 }, { "epoch": 0.5247562260312716, "grad_norm": 2.291959524154663, "learning_rate": 4.8420323102717655e-06, "loss": 0.5712, "step": 12640 }, { "epoch": 0.5247977415554829, "grad_norm": 2.2411837577819824, "learning_rate": 4.841360332509663e-06, "loss": 0.4653, "step": 12641 }, { "epoch": 0.5248392570796943, "grad_norm": 2.666731595993042, "learning_rate": 4.840688357615809e-06, "loss": 0.403, "step": 12642 }, { "epoch": 0.5248807726039055, "grad_norm": 2.4010088443756104, "learning_rate": 4.840016385602356e-06, "loss": 0.6101, "step": 12643 }, { "epoch": 0.5249222881281169, "grad_norm": 2.6017444133758545, "learning_rate": 4.839344416481454e-06, "loss": 0.5519, "step": 12644 }, { "epoch": 0.5249638036523282, "grad_norm": 2.495603322982788, "learning_rate": 4.8386724502652515e-06, "loss": 0.5605, "step": 12645 }, { "epoch": 0.5250053191765396, "grad_norm": 2.250406503677368, "learning_rate": 4.838000486965897e-06, "loss": 0.4617, "step": 12646 }, { "epoch": 0.5250468347007509, "grad_norm": 2.7107772827148438, "learning_rate": 4.837328526595539e-06, "loss": 0.4677, "step": 12647 }, { "epoch": 0.5250883502249623, "grad_norm": 3.112004518508911, "learning_rate": 4.8366565691663294e-06, "loss": 0.6375, "step": 12648 }, { "epoch": 0.5251298657491735, "grad_norm": 2.017732858657837, "learning_rate": 4.835984614690415e-06, "loss": 0.5294, "step": 12649 }, { "epoch": 0.5251713812733849, "grad_norm": 2.1380224227905273, "learning_rate": 4.835312663179949e-06, "loss": 0.5406, "step": 12650 }, { "epoch": 0.5252128967975962, "grad_norm": 2.8286972045898438, "learning_rate": 4.834640714647075e-06, "loss": 0.6231, "step": 12651 }, { "epoch": 0.5252544123218076, "grad_norm": 2.0206971168518066, "learning_rate": 4.833968769103944e-06, "loss": 0.5047, "step": 12652 }, { "epoch": 0.525295927846019, "grad_norm": 2.834885835647583, "learning_rate": 4.833296826562706e-06, "loss": 0.4795, "step": 12653 }, { "epoch": 0.5253374433702303, "grad_norm": 2.4491589069366455, "learning_rate": 4.832624887035511e-06, "loss": 0.5354, "step": 12654 }, { "epoch": 0.5253789588944416, "grad_norm": 2.9646799564361572, "learning_rate": 4.831952950534505e-06, "loss": 0.5114, "step": 12655 }, { "epoch": 0.5254204744186529, "grad_norm": 2.348440408706665, "learning_rate": 4.8312810170718384e-06, "loss": 0.5828, "step": 12656 }, { "epoch": 0.5254619899428643, "grad_norm": 2.4746742248535156, "learning_rate": 4.830609086659659e-06, "loss": 0.5717, "step": 12657 }, { "epoch": 0.5255035054670756, "grad_norm": 2.724332571029663, "learning_rate": 4.829937159310118e-06, "loss": 0.5018, "step": 12658 }, { "epoch": 0.525545020991287, "grad_norm": 2.3505547046661377, "learning_rate": 4.829265235035362e-06, "loss": 0.4093, "step": 12659 }, { "epoch": 0.5255865365154982, "grad_norm": 2.1955761909484863, "learning_rate": 4.828593313847537e-06, "loss": 0.518, "step": 12660 }, { "epoch": 0.5256280520397096, "grad_norm": 2.2287070751190186, "learning_rate": 4.827921395758796e-06, "loss": 0.4907, "step": 12661 }, { "epoch": 0.5256695675639209, "grad_norm": 2.2457475662231445, "learning_rate": 4.827249480781285e-06, "loss": 0.5601, "step": 12662 }, { "epoch": 0.5257110830881323, "grad_norm": 2.309163808822632, "learning_rate": 4.826577568927155e-06, "loss": 0.6042, "step": 12663 }, { "epoch": 0.5257525986123436, "grad_norm": 2.128430128097534, "learning_rate": 4.825905660208551e-06, "loss": 0.3654, "step": 12664 }, { "epoch": 0.525794114136555, "grad_norm": 3.045644760131836, "learning_rate": 4.825233754637623e-06, "loss": 0.7115, "step": 12665 }, { "epoch": 0.5258356296607662, "grad_norm": 1.9665166139602661, "learning_rate": 4.824561852226518e-06, "loss": 0.4436, "step": 12666 }, { "epoch": 0.5258771451849776, "grad_norm": 2.1108317375183105, "learning_rate": 4.823889952987389e-06, "loss": 0.4434, "step": 12667 }, { "epoch": 0.5259186607091889, "grad_norm": 2.532829761505127, "learning_rate": 4.8232180569323776e-06, "loss": 0.4772, "step": 12668 }, { "epoch": 0.5259601762334003, "grad_norm": 2.281683921813965, "learning_rate": 4.822546164073635e-06, "loss": 0.454, "step": 12669 }, { "epoch": 0.5260016917576116, "grad_norm": 2.1675331592559814, "learning_rate": 4.821874274423309e-06, "loss": 0.5705, "step": 12670 }, { "epoch": 0.526043207281823, "grad_norm": 2.2859833240509033, "learning_rate": 4.821202387993548e-06, "loss": 0.4013, "step": 12671 }, { "epoch": 0.5260847228060342, "grad_norm": 2.115145683288574, "learning_rate": 4.820530504796499e-06, "loss": 0.3897, "step": 12672 }, { "epoch": 0.5261262383302456, "grad_norm": 2.3056223392486572, "learning_rate": 4.81985862484431e-06, "loss": 0.5534, "step": 12673 }, { "epoch": 0.5261677538544569, "grad_norm": 2.6563477516174316, "learning_rate": 4.819186748149128e-06, "loss": 0.5566, "step": 12674 }, { "epoch": 0.5262092693786683, "grad_norm": 2.4326183795928955, "learning_rate": 4.818514874723103e-06, "loss": 0.3676, "step": 12675 }, { "epoch": 0.5262507849028796, "grad_norm": 2.5569756031036377, "learning_rate": 4.817843004578382e-06, "loss": 0.4678, "step": 12676 }, { "epoch": 0.526292300427091, "grad_norm": 2.1760241985321045, "learning_rate": 4.817171137727111e-06, "loss": 0.4742, "step": 12677 }, { "epoch": 0.5263338159513022, "grad_norm": 2.6247596740722656, "learning_rate": 4.816499274181438e-06, "loss": 0.7433, "step": 12678 }, { "epoch": 0.5263753314755136, "grad_norm": 2.086854934692383, "learning_rate": 4.815827413953511e-06, "loss": 0.4121, "step": 12679 }, { "epoch": 0.5264168469997249, "grad_norm": 2.5024657249450684, "learning_rate": 4.8151555570554795e-06, "loss": 0.5574, "step": 12680 }, { "epoch": 0.5264583625239363, "grad_norm": 2.876826763153076, "learning_rate": 4.814483703499487e-06, "loss": 0.4583, "step": 12681 }, { "epoch": 0.5264998780481476, "grad_norm": 2.5166103839874268, "learning_rate": 4.813811853297683e-06, "loss": 0.4444, "step": 12682 }, { "epoch": 0.526541393572359, "grad_norm": 2.556291341781616, "learning_rate": 4.813140006462214e-06, "loss": 0.4365, "step": 12683 }, { "epoch": 0.5265829090965704, "grad_norm": 2.822326183319092, "learning_rate": 4.812468163005229e-06, "loss": 0.5521, "step": 12684 }, { "epoch": 0.5266244246207816, "grad_norm": 2.232945442199707, "learning_rate": 4.811796322938873e-06, "loss": 0.5474, "step": 12685 }, { "epoch": 0.526665940144993, "grad_norm": 2.5241341590881348, "learning_rate": 4.811124486275292e-06, "loss": 0.5852, "step": 12686 }, { "epoch": 0.5267074556692043, "grad_norm": 2.528136968612671, "learning_rate": 4.810452653026636e-06, "loss": 0.491, "step": 12687 }, { "epoch": 0.5267489711934157, "grad_norm": 2.5172529220581055, "learning_rate": 4.809780823205049e-06, "loss": 0.5364, "step": 12688 }, { "epoch": 0.526790486717627, "grad_norm": 2.216879367828369, "learning_rate": 4.8091089968226836e-06, "loss": 0.5334, "step": 12689 }, { "epoch": 0.5268320022418383, "grad_norm": 2.247657537460327, "learning_rate": 4.80843717389168e-06, "loss": 0.5308, "step": 12690 }, { "epoch": 0.5268735177660496, "grad_norm": 2.400563955307007, "learning_rate": 4.807765354424187e-06, "loss": 0.526, "step": 12691 }, { "epoch": 0.526915033290261, "grad_norm": 2.438087224960327, "learning_rate": 4.807093538432351e-06, "loss": 0.4887, "step": 12692 }, { "epoch": 0.5269565488144723, "grad_norm": 2.2244536876678467, "learning_rate": 4.806421725928323e-06, "loss": 0.5832, "step": 12693 }, { "epoch": 0.5269980643386837, "grad_norm": 2.8230936527252197, "learning_rate": 4.805749916924243e-06, "loss": 0.7299, "step": 12694 }, { "epoch": 0.527039579862895, "grad_norm": 2.6243736743927, "learning_rate": 4.805078111432263e-06, "loss": 0.6595, "step": 12695 }, { "epoch": 0.5270810953871063, "grad_norm": 2.7587850093841553, "learning_rate": 4.804406309464525e-06, "loss": 0.6265, "step": 12696 }, { "epoch": 0.5271226109113176, "grad_norm": 2.439718723297119, "learning_rate": 4.803734511033177e-06, "loss": 0.5663, "step": 12697 }, { "epoch": 0.527164126435529, "grad_norm": 2.5164499282836914, "learning_rate": 4.803062716150367e-06, "loss": 0.4642, "step": 12698 }, { "epoch": 0.5272056419597403, "grad_norm": 3.0204272270202637, "learning_rate": 4.802390924828239e-06, "loss": 0.4769, "step": 12699 }, { "epoch": 0.5272471574839517, "grad_norm": 2.3332152366638184, "learning_rate": 4.801719137078939e-06, "loss": 0.4716, "step": 12700 }, { "epoch": 0.527288673008163, "grad_norm": 2.0354244709014893, "learning_rate": 4.801047352914614e-06, "loss": 0.3412, "step": 12701 }, { "epoch": 0.5273301885323743, "grad_norm": 2.336244821548462, "learning_rate": 4.800375572347414e-06, "loss": 0.4763, "step": 12702 }, { "epoch": 0.5273717040565856, "grad_norm": 2.391465663909912, "learning_rate": 4.799703795389478e-06, "loss": 0.6297, "step": 12703 }, { "epoch": 0.527413219580797, "grad_norm": 2.2406420707702637, "learning_rate": 4.799032022052955e-06, "loss": 0.4613, "step": 12704 }, { "epoch": 0.5274547351050083, "grad_norm": 2.2167532444000244, "learning_rate": 4.798360252349991e-06, "loss": 0.4312, "step": 12705 }, { "epoch": 0.5274962506292197, "grad_norm": 2.8432962894439697, "learning_rate": 4.797688486292734e-06, "loss": 0.6335, "step": 12706 }, { "epoch": 0.527537766153431, "grad_norm": 3.028780460357666, "learning_rate": 4.797016723893326e-06, "loss": 0.4867, "step": 12707 }, { "epoch": 0.5275792816776423, "grad_norm": 2.256917715072632, "learning_rate": 4.796344965163914e-06, "loss": 0.4921, "step": 12708 }, { "epoch": 0.5276207972018536, "grad_norm": 2.369208574295044, "learning_rate": 4.795673210116644e-06, "loss": 0.5099, "step": 12709 }, { "epoch": 0.527662312726065, "grad_norm": 2.736393928527832, "learning_rate": 4.795001458763662e-06, "loss": 0.5545, "step": 12710 }, { "epoch": 0.5277038282502763, "grad_norm": 2.718280076980591, "learning_rate": 4.7943297111171115e-06, "loss": 0.4712, "step": 12711 }, { "epoch": 0.5277453437744877, "grad_norm": 2.3416144847869873, "learning_rate": 4.793657967189139e-06, "loss": 0.537, "step": 12712 }, { "epoch": 0.5277868592986991, "grad_norm": 2.4007768630981445, "learning_rate": 4.7929862269918895e-06, "loss": 0.5085, "step": 12713 }, { "epoch": 0.5278283748229103, "grad_norm": 2.7072744369506836, "learning_rate": 4.79231449053751e-06, "loss": 0.6064, "step": 12714 }, { "epoch": 0.5278698903471217, "grad_norm": 2.2908761501312256, "learning_rate": 4.791642757838145e-06, "loss": 0.5866, "step": 12715 }, { "epoch": 0.527911405871333, "grad_norm": 2.583383083343506, "learning_rate": 4.790971028905937e-06, "loss": 0.5954, "step": 12716 }, { "epoch": 0.5279529213955444, "grad_norm": 2.707449197769165, "learning_rate": 4.790299303753034e-06, "loss": 0.4633, "step": 12717 }, { "epoch": 0.5279944369197557, "grad_norm": 2.7619831562042236, "learning_rate": 4.789627582391579e-06, "loss": 0.4521, "step": 12718 }, { "epoch": 0.5280359524439671, "grad_norm": 2.5926148891448975, "learning_rate": 4.78895586483372e-06, "loss": 0.4802, "step": 12719 }, { "epoch": 0.5280774679681783, "grad_norm": 2.2352418899536133, "learning_rate": 4.788284151091599e-06, "loss": 0.4621, "step": 12720 }, { "epoch": 0.5281189834923897, "grad_norm": 2.15228009223938, "learning_rate": 4.787612441177361e-06, "loss": 0.4805, "step": 12721 }, { "epoch": 0.528160499016601, "grad_norm": 2.1032726764678955, "learning_rate": 4.786940735103152e-06, "loss": 0.471, "step": 12722 }, { "epoch": 0.5282020145408124, "grad_norm": 2.3152687549591064, "learning_rate": 4.7862690328811156e-06, "loss": 0.5253, "step": 12723 }, { "epoch": 0.5282435300650237, "grad_norm": 2.196690320968628, "learning_rate": 4.785597334523397e-06, "loss": 0.5665, "step": 12724 }, { "epoch": 0.5282850455892351, "grad_norm": 2.151543617248535, "learning_rate": 4.7849256400421394e-06, "loss": 0.5378, "step": 12725 }, { "epoch": 0.5283265611134463, "grad_norm": 2.856476306915283, "learning_rate": 4.784253949449488e-06, "loss": 0.5351, "step": 12726 }, { "epoch": 0.5283680766376577, "grad_norm": 2.4942734241485596, "learning_rate": 4.783582262757588e-06, "loss": 0.5665, "step": 12727 }, { "epoch": 0.528409592161869, "grad_norm": 2.0452663898468018, "learning_rate": 4.782910579978585e-06, "loss": 0.5121, "step": 12728 }, { "epoch": 0.5284511076860804, "grad_norm": 2.4171786308288574, "learning_rate": 4.782238901124618e-06, "loss": 0.4817, "step": 12729 }, { "epoch": 0.5284926232102917, "grad_norm": 2.39801287651062, "learning_rate": 4.781567226207836e-06, "loss": 0.5108, "step": 12730 }, { "epoch": 0.5285341387345031, "grad_norm": 2.3974273204803467, "learning_rate": 4.780895555240381e-06, "loss": 0.4655, "step": 12731 }, { "epoch": 0.5285756542587143, "grad_norm": 2.4743616580963135, "learning_rate": 4.780223888234399e-06, "loss": 0.6141, "step": 12732 }, { "epoch": 0.5286171697829257, "grad_norm": 2.256963014602661, "learning_rate": 4.7795522252020315e-06, "loss": 0.5132, "step": 12733 }, { "epoch": 0.528658685307137, "grad_norm": 2.6667492389678955, "learning_rate": 4.778880566155424e-06, "loss": 0.4688, "step": 12734 }, { "epoch": 0.5287002008313484, "grad_norm": 2.403167724609375, "learning_rate": 4.778208911106718e-06, "loss": 0.5568, "step": 12735 }, { "epoch": 0.5287417163555597, "grad_norm": 2.460559844970703, "learning_rate": 4.7775372600680604e-06, "loss": 0.6337, "step": 12736 }, { "epoch": 0.528783231879771, "grad_norm": 2.4559073448181152, "learning_rate": 4.776865613051592e-06, "loss": 0.4828, "step": 12737 }, { "epoch": 0.5288247474039823, "grad_norm": 2.4687137603759766, "learning_rate": 4.7761939700694584e-06, "loss": 0.5574, "step": 12738 }, { "epoch": 0.5288662629281937, "grad_norm": 4.3427629470825195, "learning_rate": 4.775522331133801e-06, "loss": 0.7017, "step": 12739 }, { "epoch": 0.528907778452405, "grad_norm": 2.6093263626098633, "learning_rate": 4.774850696256765e-06, "loss": 0.5189, "step": 12740 }, { "epoch": 0.5289492939766164, "grad_norm": 2.5704615116119385, "learning_rate": 4.774179065450496e-06, "loss": 0.5983, "step": 12741 }, { "epoch": 0.5289908095008277, "grad_norm": 2.5497076511383057, "learning_rate": 4.773507438727132e-06, "loss": 0.5124, "step": 12742 }, { "epoch": 0.529032325025039, "grad_norm": 2.25014066696167, "learning_rate": 4.772835816098819e-06, "loss": 0.5666, "step": 12743 }, { "epoch": 0.5290738405492504, "grad_norm": 2.2369186878204346, "learning_rate": 4.7721641975777e-06, "loss": 0.4771, "step": 12744 }, { "epoch": 0.5291153560734617, "grad_norm": 2.6179745197296143, "learning_rate": 4.771492583175919e-06, "loss": 0.4837, "step": 12745 }, { "epoch": 0.5291568715976731, "grad_norm": 2.308957815170288, "learning_rate": 4.770820972905617e-06, "loss": 0.5655, "step": 12746 }, { "epoch": 0.5291983871218844, "grad_norm": 2.5019307136535645, "learning_rate": 4.770149366778938e-06, "loss": 0.6013, "step": 12747 }, { "epoch": 0.5292399026460958, "grad_norm": 2.5312576293945312, "learning_rate": 4.769477764808025e-06, "loss": 0.6892, "step": 12748 }, { "epoch": 0.529281418170307, "grad_norm": 2.4419984817504883, "learning_rate": 4.76880616700502e-06, "loss": 0.5191, "step": 12749 }, { "epoch": 0.5293229336945184, "grad_norm": 2.466452121734619, "learning_rate": 4.768134573382067e-06, "loss": 0.5292, "step": 12750 }, { "epoch": 0.5293644492187297, "grad_norm": 2.3611512184143066, "learning_rate": 4.767462983951307e-06, "loss": 0.5343, "step": 12751 }, { "epoch": 0.5294059647429411, "grad_norm": 2.721404552459717, "learning_rate": 4.766791398724882e-06, "loss": 0.6832, "step": 12752 }, { "epoch": 0.5294474802671524, "grad_norm": 2.461026191711426, "learning_rate": 4.766119817714937e-06, "loss": 0.4995, "step": 12753 }, { "epoch": 0.5294889957913638, "grad_norm": 2.6743738651275635, "learning_rate": 4.765448240933615e-06, "loss": 0.5221, "step": 12754 }, { "epoch": 0.529530511315575, "grad_norm": 2.8888237476348877, "learning_rate": 4.764776668393054e-06, "loss": 0.5478, "step": 12755 }, { "epoch": 0.5295720268397864, "grad_norm": 2.3296165466308594, "learning_rate": 4.764105100105398e-06, "loss": 0.6652, "step": 12756 }, { "epoch": 0.5296135423639977, "grad_norm": 3.255863904953003, "learning_rate": 4.76343353608279e-06, "loss": 0.5373, "step": 12757 }, { "epoch": 0.5296550578882091, "grad_norm": 2.4162769317626953, "learning_rate": 4.7627619763373754e-06, "loss": 0.6556, "step": 12758 }, { "epoch": 0.5296965734124204, "grad_norm": 2.6776771545410156, "learning_rate": 4.762090420881289e-06, "loss": 0.4479, "step": 12759 }, { "epoch": 0.5297380889366318, "grad_norm": 2.638523578643799, "learning_rate": 4.761418869726678e-06, "loss": 0.6178, "step": 12760 }, { "epoch": 0.529779604460843, "grad_norm": 2.693193197250366, "learning_rate": 4.760747322885682e-06, "loss": 0.5277, "step": 12761 }, { "epoch": 0.5298211199850544, "grad_norm": 2.4533228874206543, "learning_rate": 4.760075780370444e-06, "loss": 0.5534, "step": 12762 }, { "epoch": 0.5298626355092657, "grad_norm": 3.4342477321624756, "learning_rate": 4.7594042421931045e-06, "loss": 0.5112, "step": 12763 }, { "epoch": 0.5299041510334771, "grad_norm": 2.8812448978424072, "learning_rate": 4.758732708365805e-06, "loss": 0.5144, "step": 12764 }, { "epoch": 0.5299456665576884, "grad_norm": 2.9772861003875732, "learning_rate": 4.758061178900687e-06, "loss": 0.6955, "step": 12765 }, { "epoch": 0.5299871820818998, "grad_norm": 2.4451427459716797, "learning_rate": 4.757389653809894e-06, "loss": 0.6022, "step": 12766 }, { "epoch": 0.530028697606111, "grad_norm": 3.038229465484619, "learning_rate": 4.7567181331055676e-06, "loss": 0.6488, "step": 12767 }, { "epoch": 0.5300702131303224, "grad_norm": 1.858174204826355, "learning_rate": 4.756046616799845e-06, "loss": 0.4643, "step": 12768 }, { "epoch": 0.5301117286545337, "grad_norm": 1.9458205699920654, "learning_rate": 4.755375104904871e-06, "loss": 0.3885, "step": 12769 }, { "epoch": 0.5301532441787451, "grad_norm": 2.2079312801361084, "learning_rate": 4.754703597432784e-06, "loss": 0.4984, "step": 12770 }, { "epoch": 0.5301947597029564, "grad_norm": 2.363555908203125, "learning_rate": 4.75403209439573e-06, "loss": 0.5941, "step": 12771 }, { "epoch": 0.5302362752271678, "grad_norm": 2.449669361114502, "learning_rate": 4.753360595805843e-06, "loss": 0.5242, "step": 12772 }, { "epoch": 0.530277790751379, "grad_norm": 2.3612372875213623, "learning_rate": 4.75268910167527e-06, "loss": 0.4987, "step": 12773 }, { "epoch": 0.5303193062755904, "grad_norm": 2.787029981613159, "learning_rate": 4.752017612016147e-06, "loss": 0.4668, "step": 12774 }, { "epoch": 0.5303608217998018, "grad_norm": 2.645045042037964, "learning_rate": 4.7513461268406205e-06, "loss": 0.4451, "step": 12775 }, { "epoch": 0.5304023373240131, "grad_norm": 2.7193214893341064, "learning_rate": 4.750674646160825e-06, "loss": 0.3028, "step": 12776 }, { "epoch": 0.5304438528482245, "grad_norm": 2.880502223968506, "learning_rate": 4.750003169988904e-06, "loss": 0.5361, "step": 12777 }, { "epoch": 0.5304853683724358, "grad_norm": 2.4719934463500977, "learning_rate": 4.749331698336998e-06, "loss": 0.5473, "step": 12778 }, { "epoch": 0.5305268838966472, "grad_norm": 2.0788869857788086, "learning_rate": 4.748660231217249e-06, "loss": 0.5054, "step": 12779 }, { "epoch": 0.5305683994208584, "grad_norm": 2.201491117477417, "learning_rate": 4.747988768641793e-06, "loss": 0.482, "step": 12780 }, { "epoch": 0.5306099149450698, "grad_norm": 2.202162504196167, "learning_rate": 4.747317310622773e-06, "loss": 0.4844, "step": 12781 }, { "epoch": 0.5306514304692811, "grad_norm": 2.6019411087036133, "learning_rate": 4.746645857172328e-06, "loss": 0.4957, "step": 12782 }, { "epoch": 0.5306929459934925, "grad_norm": 2.2695438861846924, "learning_rate": 4.745974408302602e-06, "loss": 0.4704, "step": 12783 }, { "epoch": 0.5307344615177038, "grad_norm": 2.4133565425872803, "learning_rate": 4.74530296402573e-06, "loss": 0.4835, "step": 12784 }, { "epoch": 0.5307759770419151, "grad_norm": 2.319000005722046, "learning_rate": 4.744631524353853e-06, "loss": 0.5744, "step": 12785 }, { "epoch": 0.5308174925661264, "grad_norm": 2.635972738265991, "learning_rate": 4.743960089299113e-06, "loss": 0.5147, "step": 12786 }, { "epoch": 0.5308590080903378, "grad_norm": 3.4873032569885254, "learning_rate": 4.7432886588736485e-06, "loss": 0.5442, "step": 12787 }, { "epoch": 0.5309005236145491, "grad_norm": 2.4573817253112793, "learning_rate": 4.7426172330895994e-06, "loss": 0.5266, "step": 12788 }, { "epoch": 0.5309420391387605, "grad_norm": 2.5897562503814697, "learning_rate": 4.741945811959104e-06, "loss": 0.5456, "step": 12789 }, { "epoch": 0.5309835546629718, "grad_norm": 2.507020950317383, "learning_rate": 4.7412743954943015e-06, "loss": 0.7074, "step": 12790 }, { "epoch": 0.5310250701871831, "grad_norm": 2.2270562648773193, "learning_rate": 4.740602983707334e-06, "loss": 0.504, "step": 12791 }, { "epoch": 0.5310665857113944, "grad_norm": 2.7395741939544678, "learning_rate": 4.73993157661034e-06, "loss": 0.6007, "step": 12792 }, { "epoch": 0.5311081012356058, "grad_norm": 2.724740743637085, "learning_rate": 4.739260174215457e-06, "loss": 0.4956, "step": 12793 }, { "epoch": 0.5311496167598171, "grad_norm": 2.128631114959717, "learning_rate": 4.738588776534825e-06, "loss": 0.481, "step": 12794 }, { "epoch": 0.5311911322840285, "grad_norm": 2.538252592086792, "learning_rate": 4.737917383580583e-06, "loss": 0.5774, "step": 12795 }, { "epoch": 0.5312326478082398, "grad_norm": 2.052255392074585, "learning_rate": 4.737245995364871e-06, "loss": 0.5209, "step": 12796 }, { "epoch": 0.5312741633324511, "grad_norm": 2.317656993865967, "learning_rate": 4.736574611899828e-06, "loss": 0.4758, "step": 12797 }, { "epoch": 0.5313156788566624, "grad_norm": 2.5093979835510254, "learning_rate": 4.73590323319759e-06, "loss": 0.5005, "step": 12798 }, { "epoch": 0.5313571943808738, "grad_norm": 2.613415479660034, "learning_rate": 4.735231859270299e-06, "loss": 0.5389, "step": 12799 }, { "epoch": 0.5313987099050851, "grad_norm": 2.152698040008545, "learning_rate": 4.7345604901300915e-06, "loss": 0.4221, "step": 12800 }, { "epoch": 0.5314402254292965, "grad_norm": 2.2639663219451904, "learning_rate": 4.7338891257891085e-06, "loss": 0.4583, "step": 12801 }, { "epoch": 0.5314817409535078, "grad_norm": 2.3134377002716064, "learning_rate": 4.733217766259485e-06, "loss": 0.4984, "step": 12802 }, { "epoch": 0.5315232564777191, "grad_norm": 2.1134982109069824, "learning_rate": 4.7325464115533614e-06, "loss": 0.4764, "step": 12803 }, { "epoch": 0.5315647720019304, "grad_norm": 2.5051825046539307, "learning_rate": 4.731875061682875e-06, "loss": 0.6009, "step": 12804 }, { "epoch": 0.5316062875261418, "grad_norm": 2.1144309043884277, "learning_rate": 4.731203716660167e-06, "loss": 0.4498, "step": 12805 }, { "epoch": 0.5316478030503532, "grad_norm": 2.47070050239563, "learning_rate": 4.730532376497372e-06, "loss": 0.5767, "step": 12806 }, { "epoch": 0.5316893185745645, "grad_norm": 2.365311861038208, "learning_rate": 4.729861041206629e-06, "loss": 0.5098, "step": 12807 }, { "epoch": 0.5317308340987759, "grad_norm": 2.328483819961548, "learning_rate": 4.7291897108000765e-06, "loss": 0.4191, "step": 12808 }, { "epoch": 0.5317723496229871, "grad_norm": 2.310115337371826, "learning_rate": 4.728518385289853e-06, "loss": 0.4804, "step": 12809 }, { "epoch": 0.5318138651471985, "grad_norm": 2.717836618423462, "learning_rate": 4.727847064688094e-06, "loss": 0.5731, "step": 12810 }, { "epoch": 0.5318553806714098, "grad_norm": 2.3413257598876953, "learning_rate": 4.727175749006939e-06, "loss": 0.4028, "step": 12811 }, { "epoch": 0.5318968961956212, "grad_norm": 2.3134312629699707, "learning_rate": 4.726504438258526e-06, "loss": 0.4744, "step": 12812 }, { "epoch": 0.5319384117198325, "grad_norm": 2.1272788047790527, "learning_rate": 4.7258331324549906e-06, "loss": 0.5043, "step": 12813 }, { "epoch": 0.5319799272440439, "grad_norm": 3.2633678913116455, "learning_rate": 4.725161831608473e-06, "loss": 0.4745, "step": 12814 }, { "epoch": 0.5320214427682551, "grad_norm": 2.886204242706299, "learning_rate": 4.724490535731106e-06, "loss": 0.5379, "step": 12815 }, { "epoch": 0.5320629582924665, "grad_norm": 2.3384053707122803, "learning_rate": 4.72381924483503e-06, "loss": 0.4845, "step": 12816 }, { "epoch": 0.5321044738166778, "grad_norm": 2.386322259902954, "learning_rate": 4.723147958932382e-06, "loss": 0.4461, "step": 12817 }, { "epoch": 0.5321459893408892, "grad_norm": 2.5695724487304688, "learning_rate": 4.7224766780353005e-06, "loss": 0.6779, "step": 12818 }, { "epoch": 0.5321875048651005, "grad_norm": 2.019946575164795, "learning_rate": 4.721805402155919e-06, "loss": 0.5018, "step": 12819 }, { "epoch": 0.5322290203893119, "grad_norm": 2.841069221496582, "learning_rate": 4.721134131306375e-06, "loss": 0.4523, "step": 12820 }, { "epoch": 0.5322705359135231, "grad_norm": 2.869256019592285, "learning_rate": 4.720462865498807e-06, "loss": 0.6198, "step": 12821 }, { "epoch": 0.5323120514377345, "grad_norm": 1.9789111614227295, "learning_rate": 4.7197916047453514e-06, "loss": 0.5811, "step": 12822 }, { "epoch": 0.5323535669619458, "grad_norm": 2.206211566925049, "learning_rate": 4.719120349058146e-06, "loss": 0.5155, "step": 12823 }, { "epoch": 0.5323950824861572, "grad_norm": 2.4641058444976807, "learning_rate": 4.7184490984493235e-06, "loss": 0.4564, "step": 12824 }, { "epoch": 0.5324365980103685, "grad_norm": 2.3975508213043213, "learning_rate": 4.717777852931023e-06, "loss": 0.5493, "step": 12825 }, { "epoch": 0.5324781135345799, "grad_norm": 2.3789124488830566, "learning_rate": 4.717106612515379e-06, "loss": 0.6076, "step": 12826 }, { "epoch": 0.5325196290587911, "grad_norm": 2.1651930809020996, "learning_rate": 4.7164353772145324e-06, "loss": 0.5179, "step": 12827 }, { "epoch": 0.5325611445830025, "grad_norm": 2.33471941947937, "learning_rate": 4.715764147040613e-06, "loss": 0.5245, "step": 12828 }, { "epoch": 0.5326026601072138, "grad_norm": 2.3488075733184814, "learning_rate": 4.715092922005759e-06, "loss": 0.5918, "step": 12829 }, { "epoch": 0.5326441756314252, "grad_norm": 2.3782143592834473, "learning_rate": 4.7144217021221086e-06, "loss": 0.4366, "step": 12830 }, { "epoch": 0.5326856911556365, "grad_norm": 2.606403350830078, "learning_rate": 4.713750487401798e-06, "loss": 0.4533, "step": 12831 }, { "epoch": 0.5327272066798479, "grad_norm": 2.4192821979522705, "learning_rate": 4.713079277856958e-06, "loss": 0.6017, "step": 12832 }, { "epoch": 0.5327687222040591, "grad_norm": 2.5095908641815186, "learning_rate": 4.712408073499728e-06, "loss": 0.5801, "step": 12833 }, { "epoch": 0.5328102377282705, "grad_norm": 2.7498178482055664, "learning_rate": 4.7117368743422435e-06, "loss": 0.6137, "step": 12834 }, { "epoch": 0.5328517532524818, "grad_norm": 2.3245561122894287, "learning_rate": 4.71106568039664e-06, "loss": 0.5997, "step": 12835 }, { "epoch": 0.5328932687766932, "grad_norm": 2.7062134742736816, "learning_rate": 4.710394491675052e-06, "loss": 0.5962, "step": 12836 }, { "epoch": 0.5329347843009046, "grad_norm": 2.351654052734375, "learning_rate": 4.709723308189614e-06, "loss": 0.5495, "step": 12837 }, { "epoch": 0.5329762998251159, "grad_norm": 2.5012545585632324, "learning_rate": 4.709052129952463e-06, "loss": 0.4976, "step": 12838 }, { "epoch": 0.5330178153493272, "grad_norm": 2.081207036972046, "learning_rate": 4.708380956975733e-06, "loss": 0.4576, "step": 12839 }, { "epoch": 0.5330593308735385, "grad_norm": 2.43522310256958, "learning_rate": 4.707709789271561e-06, "loss": 0.516, "step": 12840 }, { "epoch": 0.5331008463977499, "grad_norm": 2.4152798652648926, "learning_rate": 4.707038626852079e-06, "loss": 0.6377, "step": 12841 }, { "epoch": 0.5331423619219612, "grad_norm": 2.385803461074829, "learning_rate": 4.706367469729422e-06, "loss": 0.5028, "step": 12842 }, { "epoch": 0.5331838774461726, "grad_norm": 2.753290891647339, "learning_rate": 4.705696317915726e-06, "loss": 0.4755, "step": 12843 }, { "epoch": 0.5332253929703838, "grad_norm": 2.4414377212524414, "learning_rate": 4.705025171423128e-06, "loss": 0.5512, "step": 12844 }, { "epoch": 0.5332669084945952, "grad_norm": 2.098092555999756, "learning_rate": 4.704354030263757e-06, "loss": 0.4895, "step": 12845 }, { "epoch": 0.5333084240188065, "grad_norm": 2.1511969566345215, "learning_rate": 4.703682894449751e-06, "loss": 0.5629, "step": 12846 }, { "epoch": 0.5333499395430179, "grad_norm": 2.424705982208252, "learning_rate": 4.703011763993244e-06, "loss": 0.5787, "step": 12847 }, { "epoch": 0.5333914550672292, "grad_norm": 2.4209256172180176, "learning_rate": 4.70234063890637e-06, "loss": 0.5322, "step": 12848 }, { "epoch": 0.5334329705914406, "grad_norm": 2.9732110500335693, "learning_rate": 4.701669519201264e-06, "loss": 0.5338, "step": 12849 }, { "epoch": 0.5334744861156518, "grad_norm": 2.2007577419281006, "learning_rate": 4.700998404890057e-06, "loss": 0.5111, "step": 12850 }, { "epoch": 0.5335160016398632, "grad_norm": 2.5037333965301514, "learning_rate": 4.700327295984887e-06, "loss": 0.4831, "step": 12851 }, { "epoch": 0.5335575171640745, "grad_norm": 2.923154354095459, "learning_rate": 4.699656192497885e-06, "loss": 0.5915, "step": 12852 }, { "epoch": 0.5335990326882859, "grad_norm": 2.3292064666748047, "learning_rate": 4.698985094441187e-06, "loss": 0.4515, "step": 12853 }, { "epoch": 0.5336405482124972, "grad_norm": 2.3290529251098633, "learning_rate": 4.698314001826923e-06, "loss": 0.7177, "step": 12854 }, { "epoch": 0.5336820637367086, "grad_norm": 2.552614450454712, "learning_rate": 4.697642914667229e-06, "loss": 0.4679, "step": 12855 }, { "epoch": 0.5337235792609198, "grad_norm": 2.1911330223083496, "learning_rate": 4.696971832974239e-06, "loss": 0.6669, "step": 12856 }, { "epoch": 0.5337650947851312, "grad_norm": 1.9427355527877808, "learning_rate": 4.696300756760087e-06, "loss": 0.3813, "step": 12857 }, { "epoch": 0.5338066103093425, "grad_norm": 2.456852674484253, "learning_rate": 4.6956296860369026e-06, "loss": 0.5637, "step": 12858 }, { "epoch": 0.5338481258335539, "grad_norm": 2.701983690261841, "learning_rate": 4.694958620816821e-06, "loss": 0.6017, "step": 12859 }, { "epoch": 0.5338896413577652, "grad_norm": 2.732008457183838, "learning_rate": 4.694287561111976e-06, "loss": 0.5909, "step": 12860 }, { "epoch": 0.5339311568819766, "grad_norm": 2.38942289352417, "learning_rate": 4.693616506934502e-06, "loss": 0.3724, "step": 12861 }, { "epoch": 0.5339726724061878, "grad_norm": 2.0045557022094727, "learning_rate": 4.6929454582965275e-06, "loss": 0.4189, "step": 12862 }, { "epoch": 0.5340141879303992, "grad_norm": 2.3852927684783936, "learning_rate": 4.692274415210189e-06, "loss": 0.4511, "step": 12863 }, { "epoch": 0.5340557034546105, "grad_norm": 2.000706911087036, "learning_rate": 4.691603377687617e-06, "loss": 0.6328, "step": 12864 }, { "epoch": 0.5340972189788219, "grad_norm": 2.6358041763305664, "learning_rate": 4.690932345740945e-06, "loss": 0.4861, "step": 12865 }, { "epoch": 0.5341387345030332, "grad_norm": 3.0271148681640625, "learning_rate": 4.690261319382306e-06, "loss": 0.552, "step": 12866 }, { "epoch": 0.5341802500272446, "grad_norm": 2.5427536964416504, "learning_rate": 4.689590298623831e-06, "loss": 0.4604, "step": 12867 }, { "epoch": 0.534221765551456, "grad_norm": 2.5662951469421387, "learning_rate": 4.688919283477652e-06, "loss": 0.6059, "step": 12868 }, { "epoch": 0.5342632810756672, "grad_norm": 2.5891273021698, "learning_rate": 4.6882482739559025e-06, "loss": 0.5415, "step": 12869 }, { "epoch": 0.5343047965998786, "grad_norm": 2.1141419410705566, "learning_rate": 4.687577270070716e-06, "loss": 0.4716, "step": 12870 }, { "epoch": 0.5343463121240899, "grad_norm": 2.3011484146118164, "learning_rate": 4.68690627183422e-06, "loss": 0.63, "step": 12871 }, { "epoch": 0.5343878276483013, "grad_norm": 2.0867230892181396, "learning_rate": 4.686235279258549e-06, "loss": 0.5205, "step": 12872 }, { "epoch": 0.5344293431725126, "grad_norm": 2.7138142585754395, "learning_rate": 4.6855642923558345e-06, "loss": 0.5286, "step": 12873 }, { "epoch": 0.534470858696724, "grad_norm": 2.0452630519866943, "learning_rate": 4.68489331113821e-06, "loss": 0.6369, "step": 12874 }, { "epoch": 0.5345123742209352, "grad_norm": 2.0747039318084717, "learning_rate": 4.684222335617805e-06, "loss": 0.4628, "step": 12875 }, { "epoch": 0.5345538897451466, "grad_norm": 2.626893997192383, "learning_rate": 4.6835513658067515e-06, "loss": 0.5523, "step": 12876 }, { "epoch": 0.5345954052693579, "grad_norm": 3.1208629608154297, "learning_rate": 4.682880401717178e-06, "loss": 0.6141, "step": 12877 }, { "epoch": 0.5346369207935693, "grad_norm": 2.629417657852173, "learning_rate": 4.682209443361219e-06, "loss": 0.6711, "step": 12878 }, { "epoch": 0.5346784363177806, "grad_norm": 2.5578348636627197, "learning_rate": 4.681538490751008e-06, "loss": 0.5672, "step": 12879 }, { "epoch": 0.534719951841992, "grad_norm": 2.2518489360809326, "learning_rate": 4.68086754389867e-06, "loss": 0.5472, "step": 12880 }, { "epoch": 0.5347614673662032, "grad_norm": 2.561486005783081, "learning_rate": 4.680196602816338e-06, "loss": 0.5246, "step": 12881 }, { "epoch": 0.5348029828904146, "grad_norm": 1.9076670408248901, "learning_rate": 4.6795256675161445e-06, "loss": 0.3574, "step": 12882 }, { "epoch": 0.5348444984146259, "grad_norm": 2.827230215072632, "learning_rate": 4.678854738010221e-06, "loss": 0.5884, "step": 12883 }, { "epoch": 0.5348860139388373, "grad_norm": 2.6764113903045654, "learning_rate": 4.678183814310695e-06, "loss": 0.6567, "step": 12884 }, { "epoch": 0.5349275294630486, "grad_norm": 2.224294662475586, "learning_rate": 4.677512896429698e-06, "loss": 0.6116, "step": 12885 }, { "epoch": 0.53496904498726, "grad_norm": 2.627394914627075, "learning_rate": 4.67684198437936e-06, "loss": 0.6726, "step": 12886 }, { "epoch": 0.5350105605114712, "grad_norm": 2.2261340618133545, "learning_rate": 4.6761710781718136e-06, "loss": 0.3861, "step": 12887 }, { "epoch": 0.5350520760356826, "grad_norm": 2.4569458961486816, "learning_rate": 4.675500177819188e-06, "loss": 0.4795, "step": 12888 }, { "epoch": 0.5350935915598939, "grad_norm": 2.5126352310180664, "learning_rate": 4.6748292833336125e-06, "loss": 0.7986, "step": 12889 }, { "epoch": 0.5351351070841053, "grad_norm": 2.7088727951049805, "learning_rate": 4.6741583947272165e-06, "loss": 0.6106, "step": 12890 }, { "epoch": 0.5351766226083166, "grad_norm": 2.098726987838745, "learning_rate": 4.67348751201213e-06, "loss": 0.3886, "step": 12891 }, { "epoch": 0.5352181381325279, "grad_norm": 1.9328529834747314, "learning_rate": 4.672816635200486e-06, "loss": 0.3675, "step": 12892 }, { "epoch": 0.5352596536567392, "grad_norm": 1.9130759239196777, "learning_rate": 4.6721457643044085e-06, "loss": 0.4687, "step": 12893 }, { "epoch": 0.5353011691809506, "grad_norm": 2.4490766525268555, "learning_rate": 4.671474899336031e-06, "loss": 0.5509, "step": 12894 }, { "epoch": 0.5353426847051619, "grad_norm": 2.2764601707458496, "learning_rate": 4.670804040307482e-06, "loss": 0.4548, "step": 12895 }, { "epoch": 0.5353842002293733, "grad_norm": 2.0720441341400146, "learning_rate": 4.6701331872308925e-06, "loss": 0.5464, "step": 12896 }, { "epoch": 0.5354257157535846, "grad_norm": 2.3185887336730957, "learning_rate": 4.669462340118388e-06, "loss": 0.46, "step": 12897 }, { "epoch": 0.5354672312777959, "grad_norm": 2.0500478744506836, "learning_rate": 4.6687914989821e-06, "loss": 0.4116, "step": 12898 }, { "epoch": 0.5355087468020073, "grad_norm": 2.4120283126831055, "learning_rate": 4.668120663834157e-06, "loss": 0.7098, "step": 12899 }, { "epoch": 0.5355502623262186, "grad_norm": 2.892812728881836, "learning_rate": 4.667449834686689e-06, "loss": 0.5715, "step": 12900 }, { "epoch": 0.53559177785043, "grad_norm": 2.1720926761627197, "learning_rate": 4.666779011551824e-06, "loss": 0.4785, "step": 12901 }, { "epoch": 0.5356332933746413, "grad_norm": 2.1521522998809814, "learning_rate": 4.666108194441691e-06, "loss": 0.5341, "step": 12902 }, { "epoch": 0.5356748088988527, "grad_norm": 1.944356918334961, "learning_rate": 4.665437383368416e-06, "loss": 0.4441, "step": 12903 }, { "epoch": 0.5357163244230639, "grad_norm": 2.3784666061401367, "learning_rate": 4.6647665783441295e-06, "loss": 0.6022, "step": 12904 }, { "epoch": 0.5357578399472753, "grad_norm": 2.129465341567993, "learning_rate": 4.664095779380963e-06, "loss": 0.4005, "step": 12905 }, { "epoch": 0.5357993554714866, "grad_norm": 2.653384208679199, "learning_rate": 4.663424986491039e-06, "loss": 0.5005, "step": 12906 }, { "epoch": 0.535840870995698, "grad_norm": 2.4655063152313232, "learning_rate": 4.662754199686488e-06, "loss": 0.3931, "step": 12907 }, { "epoch": 0.5358823865199093, "grad_norm": 2.751817226409912, "learning_rate": 4.662083418979439e-06, "loss": 0.4491, "step": 12908 }, { "epoch": 0.5359239020441207, "grad_norm": 2.1792168617248535, "learning_rate": 4.661412644382021e-06, "loss": 0.4541, "step": 12909 }, { "epoch": 0.5359654175683319, "grad_norm": 2.38209867477417, "learning_rate": 4.660741875906358e-06, "loss": 0.6237, "step": 12910 }, { "epoch": 0.5360069330925433, "grad_norm": 2.2061166763305664, "learning_rate": 4.660071113564579e-06, "loss": 0.5392, "step": 12911 }, { "epoch": 0.5360484486167546, "grad_norm": 2.324833869934082, "learning_rate": 4.659400357368813e-06, "loss": 0.4915, "step": 12912 }, { "epoch": 0.536089964140966, "grad_norm": 2.594162702560425, "learning_rate": 4.658729607331187e-06, "loss": 0.5479, "step": 12913 }, { "epoch": 0.5361314796651773, "grad_norm": 2.0250446796417236, "learning_rate": 4.658058863463828e-06, "loss": 0.449, "step": 12914 }, { "epoch": 0.5361729951893887, "grad_norm": 2.3542919158935547, "learning_rate": 4.657388125778864e-06, "loss": 0.6378, "step": 12915 }, { "epoch": 0.5362145107135999, "grad_norm": 2.4424471855163574, "learning_rate": 4.656717394288421e-06, "loss": 0.5639, "step": 12916 }, { "epoch": 0.5362560262378113, "grad_norm": 2.372195243835449, "learning_rate": 4.656046669004625e-06, "loss": 0.4558, "step": 12917 }, { "epoch": 0.5362975417620226, "grad_norm": 2.422729730606079, "learning_rate": 4.655375949939607e-06, "loss": 0.5624, "step": 12918 }, { "epoch": 0.536339057286234, "grad_norm": 2.5467865467071533, "learning_rate": 4.654705237105489e-06, "loss": 0.5507, "step": 12919 }, { "epoch": 0.5363805728104453, "grad_norm": 2.9716150760650635, "learning_rate": 4.6540345305144e-06, "loss": 0.6379, "step": 12920 }, { "epoch": 0.5364220883346567, "grad_norm": 2.2321889400482178, "learning_rate": 4.653363830178467e-06, "loss": 0.5263, "step": 12921 }, { "epoch": 0.5364636038588679, "grad_norm": 2.8958089351654053, "learning_rate": 4.652693136109817e-06, "loss": 0.5317, "step": 12922 }, { "epoch": 0.5365051193830793, "grad_norm": 2.1042604446411133, "learning_rate": 4.652022448320573e-06, "loss": 0.5408, "step": 12923 }, { "epoch": 0.5365466349072906, "grad_norm": 2.5525877475738525, "learning_rate": 4.651351766822864e-06, "loss": 0.5042, "step": 12924 }, { "epoch": 0.536588150431502, "grad_norm": 2.2320735454559326, "learning_rate": 4.6506810916288156e-06, "loss": 0.4556, "step": 12925 }, { "epoch": 0.5366296659557133, "grad_norm": 2.622840404510498, "learning_rate": 4.650010422750556e-06, "loss": 0.4537, "step": 12926 }, { "epoch": 0.5366711814799247, "grad_norm": 2.4561474323272705, "learning_rate": 4.649339760200206e-06, "loss": 0.4778, "step": 12927 }, { "epoch": 0.5367126970041359, "grad_norm": 2.5017096996307373, "learning_rate": 4.6486691039898955e-06, "loss": 0.6302, "step": 12928 }, { "epoch": 0.5367542125283473, "grad_norm": 1.9919853210449219, "learning_rate": 4.647998454131748e-06, "loss": 0.4354, "step": 12929 }, { "epoch": 0.5367957280525587, "grad_norm": 2.2890822887420654, "learning_rate": 4.647327810637891e-06, "loss": 0.5562, "step": 12930 }, { "epoch": 0.53683724357677, "grad_norm": 2.5326521396636963, "learning_rate": 4.646657173520449e-06, "loss": 0.5233, "step": 12931 }, { "epoch": 0.5368787591009814, "grad_norm": 2.793994903564453, "learning_rate": 4.645986542791546e-06, "loss": 0.6373, "step": 12932 }, { "epoch": 0.5369202746251927, "grad_norm": 2.784139394760132, "learning_rate": 4.645315918463308e-06, "loss": 0.6623, "step": 12933 }, { "epoch": 0.536961790149404, "grad_norm": 2.77826189994812, "learning_rate": 4.644645300547861e-06, "loss": 0.417, "step": 12934 }, { "epoch": 0.5370033056736153, "grad_norm": 2.771249294281006, "learning_rate": 4.643974689057331e-06, "loss": 0.5911, "step": 12935 }, { "epoch": 0.5370448211978267, "grad_norm": 2.3775622844696045, "learning_rate": 4.643304084003839e-06, "loss": 0.4596, "step": 12936 }, { "epoch": 0.537086336722038, "grad_norm": 1.8862056732177734, "learning_rate": 4.642633485399513e-06, "loss": 0.433, "step": 12937 }, { "epoch": 0.5371278522462494, "grad_norm": 2.313802719116211, "learning_rate": 4.641962893256475e-06, "loss": 0.3997, "step": 12938 }, { "epoch": 0.5371693677704606, "grad_norm": 2.3712363243103027, "learning_rate": 4.641292307586854e-06, "loss": 0.4635, "step": 12939 }, { "epoch": 0.537210883294672, "grad_norm": 2.495177745819092, "learning_rate": 4.64062172840277e-06, "loss": 0.4328, "step": 12940 }, { "epoch": 0.5372523988188833, "grad_norm": 2.6992902755737305, "learning_rate": 4.639951155716349e-06, "loss": 0.4324, "step": 12941 }, { "epoch": 0.5372939143430947, "grad_norm": 2.521414279937744, "learning_rate": 4.639280589539714e-06, "loss": 0.3779, "step": 12942 }, { "epoch": 0.537335429867306, "grad_norm": 2.9155452251434326, "learning_rate": 4.638610029884989e-06, "loss": 0.577, "step": 12943 }, { "epoch": 0.5373769453915174, "grad_norm": 2.523965835571289, "learning_rate": 4.637939476764303e-06, "loss": 0.4256, "step": 12944 }, { "epoch": 0.5374184609157286, "grad_norm": 2.2734599113464355, "learning_rate": 4.637268930189772e-06, "loss": 0.4876, "step": 12945 }, { "epoch": 0.53745997643994, "grad_norm": 2.304337739944458, "learning_rate": 4.636598390173523e-06, "loss": 0.4748, "step": 12946 }, { "epoch": 0.5375014919641513, "grad_norm": 1.9580881595611572, "learning_rate": 4.635927856727681e-06, "loss": 0.5025, "step": 12947 }, { "epoch": 0.5375430074883627, "grad_norm": 1.7668269872665405, "learning_rate": 4.63525732986437e-06, "loss": 0.4278, "step": 12948 }, { "epoch": 0.537584523012574, "grad_norm": 2.323094606399536, "learning_rate": 4.634586809595709e-06, "loss": 0.5587, "step": 12949 }, { "epoch": 0.5376260385367854, "grad_norm": 2.222691774368286, "learning_rate": 4.633916295933824e-06, "loss": 0.6267, "step": 12950 }, { "epoch": 0.5376675540609966, "grad_norm": 2.3203697204589844, "learning_rate": 4.6332457888908386e-06, "loss": 0.5608, "step": 12951 }, { "epoch": 0.537709069585208, "grad_norm": 2.492445468902588, "learning_rate": 4.6325752884788765e-06, "loss": 0.6216, "step": 12952 }, { "epoch": 0.5377505851094193, "grad_norm": 3.0338969230651855, "learning_rate": 4.6319047947100574e-06, "loss": 0.4735, "step": 12953 }, { "epoch": 0.5377921006336307, "grad_norm": 2.1182124614715576, "learning_rate": 4.631234307596506e-06, "loss": 0.4181, "step": 12954 }, { "epoch": 0.537833616157842, "grad_norm": 2.1092026233673096, "learning_rate": 4.6305638271503445e-06, "loss": 0.4106, "step": 12955 }, { "epoch": 0.5378751316820534, "grad_norm": 2.6061408519744873, "learning_rate": 4.629893353383695e-06, "loss": 0.4849, "step": 12956 }, { "epoch": 0.5379166472062646, "grad_norm": 2.294907569885254, "learning_rate": 4.629222886308682e-06, "loss": 0.6742, "step": 12957 }, { "epoch": 0.537958162730476, "grad_norm": 2.0242269039154053, "learning_rate": 4.628552425937425e-06, "loss": 0.371, "step": 12958 }, { "epoch": 0.5379996782546873, "grad_norm": 2.4785375595092773, "learning_rate": 4.627881972282046e-06, "loss": 0.5369, "step": 12959 }, { "epoch": 0.5380411937788987, "grad_norm": 2.2945520877838135, "learning_rate": 4.627211525354669e-06, "loss": 0.5029, "step": 12960 }, { "epoch": 0.5380827093031101, "grad_norm": 2.2866528034210205, "learning_rate": 4.626541085167417e-06, "loss": 0.5224, "step": 12961 }, { "epoch": 0.5381242248273214, "grad_norm": 2.47139310836792, "learning_rate": 4.625870651732408e-06, "loss": 0.4318, "step": 12962 }, { "epoch": 0.5381657403515328, "grad_norm": 2.556558609008789, "learning_rate": 4.625200225061765e-06, "loss": 0.4365, "step": 12963 }, { "epoch": 0.538207255875744, "grad_norm": 2.646942138671875, "learning_rate": 4.62452980516761e-06, "loss": 0.4433, "step": 12964 }, { "epoch": 0.5382487713999554, "grad_norm": 2.2893130779266357, "learning_rate": 4.623859392062067e-06, "loss": 0.381, "step": 12965 }, { "epoch": 0.5382902869241667, "grad_norm": 2.7723686695098877, "learning_rate": 4.623188985757252e-06, "loss": 0.3765, "step": 12966 }, { "epoch": 0.5383318024483781, "grad_norm": 2.4927027225494385, "learning_rate": 4.6225185862652895e-06, "loss": 0.5473, "step": 12967 }, { "epoch": 0.5383733179725894, "grad_norm": 2.3522133827209473, "learning_rate": 4.621848193598299e-06, "loss": 0.5011, "step": 12968 }, { "epoch": 0.5384148334968007, "grad_norm": 2.381139039993286, "learning_rate": 4.6211778077684024e-06, "loss": 0.5611, "step": 12969 }, { "epoch": 0.538456349021012, "grad_norm": 2.57328462600708, "learning_rate": 4.620507428787722e-06, "loss": 0.4959, "step": 12970 }, { "epoch": 0.5384978645452234, "grad_norm": 2.77197003364563, "learning_rate": 4.619837056668375e-06, "loss": 0.5194, "step": 12971 }, { "epoch": 0.5385393800694347, "grad_norm": 2.8069562911987305, "learning_rate": 4.619166691422483e-06, "loss": 0.5635, "step": 12972 }, { "epoch": 0.5385808955936461, "grad_norm": 2.33213472366333, "learning_rate": 4.618496333062168e-06, "loss": 0.4118, "step": 12973 }, { "epoch": 0.5386224111178574, "grad_norm": 2.532259702682495, "learning_rate": 4.6178259815995504e-06, "loss": 0.5614, "step": 12974 }, { "epoch": 0.5386639266420687, "grad_norm": 1.8968820571899414, "learning_rate": 4.617155637046748e-06, "loss": 0.5258, "step": 12975 }, { "epoch": 0.53870544216628, "grad_norm": 2.375643491744995, "learning_rate": 4.616485299415881e-06, "loss": 0.4418, "step": 12976 }, { "epoch": 0.5387469576904914, "grad_norm": 2.7967827320098877, "learning_rate": 4.615814968719071e-06, "loss": 0.5111, "step": 12977 }, { "epoch": 0.5387884732147027, "grad_norm": 2.552739143371582, "learning_rate": 4.615144644968439e-06, "loss": 0.484, "step": 12978 }, { "epoch": 0.5388299887389141, "grad_norm": 2.166391372680664, "learning_rate": 4.6144743281761e-06, "loss": 0.4282, "step": 12979 }, { "epoch": 0.5388715042631254, "grad_norm": 2.50947642326355, "learning_rate": 4.613804018354179e-06, "loss": 0.516, "step": 12980 }, { "epoch": 0.5389130197873367, "grad_norm": 2.4432334899902344, "learning_rate": 4.61313371551479e-06, "loss": 0.5062, "step": 12981 }, { "epoch": 0.538954535311548, "grad_norm": 2.918058156967163, "learning_rate": 4.6124634196700554e-06, "loss": 0.4033, "step": 12982 }, { "epoch": 0.5389960508357594, "grad_norm": 2.6251790523529053, "learning_rate": 4.611793130832096e-06, "loss": 0.5339, "step": 12983 }, { "epoch": 0.5390375663599707, "grad_norm": 2.5116071701049805, "learning_rate": 4.6111228490130265e-06, "loss": 0.5595, "step": 12984 }, { "epoch": 0.5390790818841821, "grad_norm": 2.398453950881958, "learning_rate": 4.610452574224968e-06, "loss": 0.6384, "step": 12985 }, { "epoch": 0.5391205974083934, "grad_norm": 2.461698293685913, "learning_rate": 4.609782306480039e-06, "loss": 0.538, "step": 12986 }, { "epoch": 0.5391621129326047, "grad_norm": 2.922234535217285, "learning_rate": 4.609112045790361e-06, "loss": 0.5504, "step": 12987 }, { "epoch": 0.539203628456816, "grad_norm": 2.496584415435791, "learning_rate": 4.608441792168048e-06, "loss": 0.6209, "step": 12988 }, { "epoch": 0.5392451439810274, "grad_norm": 2.0854580402374268, "learning_rate": 4.60777154562522e-06, "loss": 0.5687, "step": 12989 }, { "epoch": 0.5392866595052387, "grad_norm": 2.746774196624756, "learning_rate": 4.6071013061739964e-06, "loss": 0.4368, "step": 12990 }, { "epoch": 0.5393281750294501, "grad_norm": 2.017707586288452, "learning_rate": 4.606431073826494e-06, "loss": 0.4607, "step": 12991 }, { "epoch": 0.5393696905536615, "grad_norm": 2.299156665802002, "learning_rate": 4.605760848594832e-06, "loss": 0.4061, "step": 12992 }, { "epoch": 0.5394112060778727, "grad_norm": 2.9989869594573975, "learning_rate": 4.605090630491126e-06, "loss": 0.6019, "step": 12993 }, { "epoch": 0.5394527216020841, "grad_norm": 2.1811604499816895, "learning_rate": 4.604420419527496e-06, "loss": 0.5586, "step": 12994 }, { "epoch": 0.5394942371262954, "grad_norm": 2.2997920513153076, "learning_rate": 4.603750215716057e-06, "loss": 0.4631, "step": 12995 }, { "epoch": 0.5395357526505068, "grad_norm": 2.3606576919555664, "learning_rate": 4.603080019068932e-06, "loss": 0.4054, "step": 12996 }, { "epoch": 0.5395772681747181, "grad_norm": 2.387735366821289, "learning_rate": 4.602409829598232e-06, "loss": 0.5544, "step": 12997 }, { "epoch": 0.5396187836989295, "grad_norm": 2.174750566482544, "learning_rate": 4.601739647316078e-06, "loss": 0.4253, "step": 12998 }, { "epoch": 0.5396602992231407, "grad_norm": 2.390336275100708, "learning_rate": 4.601069472234584e-06, "loss": 0.4576, "step": 12999 }, { "epoch": 0.5397018147473521, "grad_norm": 2.5899856090545654, "learning_rate": 4.6003993043658725e-06, "loss": 0.4479, "step": 13000 }, { "epoch": 0.5397433302715634, "grad_norm": 2.744826555252075, "learning_rate": 4.599729143722055e-06, "loss": 0.5207, "step": 13001 }, { "epoch": 0.5397848457957748, "grad_norm": 3.2026190757751465, "learning_rate": 4.599058990315248e-06, "loss": 0.5083, "step": 13002 }, { "epoch": 0.5398263613199861, "grad_norm": 2.6473262310028076, "learning_rate": 4.598388844157572e-06, "loss": 0.6559, "step": 13003 }, { "epoch": 0.5398678768441975, "grad_norm": 2.2501132488250732, "learning_rate": 4.597718705261142e-06, "loss": 0.5591, "step": 13004 }, { "epoch": 0.5399093923684087, "grad_norm": 1.896813988685608, "learning_rate": 4.597048573638072e-06, "loss": 0.3568, "step": 13005 }, { "epoch": 0.5399509078926201, "grad_norm": 2.4245357513427734, "learning_rate": 4.596378449300481e-06, "loss": 0.5806, "step": 13006 }, { "epoch": 0.5399924234168314, "grad_norm": 2.3778390884399414, "learning_rate": 4.595708332260484e-06, "loss": 0.5854, "step": 13007 }, { "epoch": 0.5400339389410428, "grad_norm": 2.526860237121582, "learning_rate": 4.595038222530196e-06, "loss": 0.5578, "step": 13008 }, { "epoch": 0.5400754544652541, "grad_norm": 2.367865800857544, "learning_rate": 4.594368120121736e-06, "loss": 0.4716, "step": 13009 }, { "epoch": 0.5401169699894655, "grad_norm": 2.709155797958374, "learning_rate": 4.593698025047215e-06, "loss": 0.5747, "step": 13010 }, { "epoch": 0.5401584855136767, "grad_norm": 2.5543131828308105, "learning_rate": 4.593027937318751e-06, "loss": 0.6281, "step": 13011 }, { "epoch": 0.5402000010378881, "grad_norm": 3.362809896469116, "learning_rate": 4.592357856948459e-06, "loss": 0.4705, "step": 13012 }, { "epoch": 0.5402415165620994, "grad_norm": 2.1810009479522705, "learning_rate": 4.5916877839484565e-06, "loss": 0.5195, "step": 13013 }, { "epoch": 0.5402830320863108, "grad_norm": 1.9392180442810059, "learning_rate": 4.591017718330854e-06, "loss": 0.3565, "step": 13014 }, { "epoch": 0.5403245476105221, "grad_norm": 2.3720908164978027, "learning_rate": 4.59034766010777e-06, "loss": 0.4556, "step": 13015 }, { "epoch": 0.5403660631347335, "grad_norm": 2.566659927368164, "learning_rate": 4.589677609291319e-06, "loss": 0.549, "step": 13016 }, { "epoch": 0.5404075786589447, "grad_norm": 2.5926342010498047, "learning_rate": 4.589007565893615e-06, "loss": 0.5655, "step": 13017 }, { "epoch": 0.5404490941831561, "grad_norm": 2.3643150329589844, "learning_rate": 4.5883375299267725e-06, "loss": 0.4465, "step": 13018 }, { "epoch": 0.5404906097073674, "grad_norm": 2.219552516937256, "learning_rate": 4.587667501402906e-06, "loss": 0.5772, "step": 13019 }, { "epoch": 0.5405321252315788, "grad_norm": 2.2501580715179443, "learning_rate": 4.5869974803341296e-06, "loss": 0.5136, "step": 13020 }, { "epoch": 0.5405736407557901, "grad_norm": 2.9529037475585938, "learning_rate": 4.586327466732557e-06, "loss": 0.5097, "step": 13021 }, { "epoch": 0.5406151562800015, "grad_norm": 2.313772439956665, "learning_rate": 4.585657460610306e-06, "loss": 0.5769, "step": 13022 }, { "epoch": 0.5406566718042128, "grad_norm": 2.3083834648132324, "learning_rate": 4.5849874619794855e-06, "loss": 0.5353, "step": 13023 }, { "epoch": 0.5406981873284241, "grad_norm": 2.4262595176696777, "learning_rate": 4.58431747085221e-06, "loss": 0.5298, "step": 13024 }, { "epoch": 0.5407397028526355, "grad_norm": 2.724327325820923, "learning_rate": 4.583647487240596e-06, "loss": 0.5385, "step": 13025 }, { "epoch": 0.5407812183768468, "grad_norm": 2.3666975498199463, "learning_rate": 4.582977511156757e-06, "loss": 0.6023, "step": 13026 }, { "epoch": 0.5408227339010582, "grad_norm": 2.598663091659546, "learning_rate": 4.582307542612802e-06, "loss": 0.4439, "step": 13027 }, { "epoch": 0.5408642494252694, "grad_norm": 2.4814414978027344, "learning_rate": 4.581637581620848e-06, "loss": 0.4327, "step": 13028 }, { "epoch": 0.5409057649494808, "grad_norm": 2.3295602798461914, "learning_rate": 4.580967628193008e-06, "loss": 0.5732, "step": 13029 }, { "epoch": 0.5409472804736921, "grad_norm": 2.047646999359131, "learning_rate": 4.580297682341394e-06, "loss": 0.5288, "step": 13030 }, { "epoch": 0.5409887959979035, "grad_norm": 2.3927688598632812, "learning_rate": 4.579627744078118e-06, "loss": 0.5193, "step": 13031 }, { "epoch": 0.5410303115221148, "grad_norm": 2.223188877105713, "learning_rate": 4.578957813415293e-06, "loss": 0.4915, "step": 13032 }, { "epoch": 0.5410718270463262, "grad_norm": 2.7973594665527344, "learning_rate": 4.578287890365032e-06, "loss": 0.5438, "step": 13033 }, { "epoch": 0.5411133425705374, "grad_norm": 2.3366506099700928, "learning_rate": 4.577617974939449e-06, "loss": 0.4659, "step": 13034 }, { "epoch": 0.5411548580947488, "grad_norm": 2.2833962440490723, "learning_rate": 4.576948067150655e-06, "loss": 0.5499, "step": 13035 }, { "epoch": 0.5411963736189601, "grad_norm": 2.372652292251587, "learning_rate": 4.5762781670107605e-06, "loss": 0.5039, "step": 13036 }, { "epoch": 0.5412378891431715, "grad_norm": 2.4859373569488525, "learning_rate": 4.575608274531878e-06, "loss": 0.4854, "step": 13037 }, { "epoch": 0.5412794046673828, "grad_norm": 2.020069122314453, "learning_rate": 4.574938389726121e-06, "loss": 0.4206, "step": 13038 }, { "epoch": 0.5413209201915942, "grad_norm": 2.0271341800689697, "learning_rate": 4.574268512605603e-06, "loss": 0.3155, "step": 13039 }, { "epoch": 0.5413624357158054, "grad_norm": 2.4870407581329346, "learning_rate": 4.5735986431824305e-06, "loss": 0.5787, "step": 13040 }, { "epoch": 0.5414039512400168, "grad_norm": 2.409428119659424, "learning_rate": 4.5729287814687175e-06, "loss": 0.5566, "step": 13041 }, { "epoch": 0.5414454667642281, "grad_norm": 2.2874414920806885, "learning_rate": 4.572258927476576e-06, "loss": 0.5116, "step": 13042 }, { "epoch": 0.5414869822884395, "grad_norm": 2.1067440509796143, "learning_rate": 4.571589081218116e-06, "loss": 0.5037, "step": 13043 }, { "epoch": 0.5415284978126508, "grad_norm": 2.4507384300231934, "learning_rate": 4.570919242705449e-06, "loss": 0.4547, "step": 13044 }, { "epoch": 0.5415700133368622, "grad_norm": 2.347827196121216, "learning_rate": 4.5702494119506866e-06, "loss": 0.4906, "step": 13045 }, { "epoch": 0.5416115288610734, "grad_norm": 2.533339738845825, "learning_rate": 4.569579588965936e-06, "loss": 0.5124, "step": 13046 }, { "epoch": 0.5416530443852848, "grad_norm": 2.5650715827941895, "learning_rate": 4.568909773763312e-06, "loss": 0.6335, "step": 13047 }, { "epoch": 0.5416945599094961, "grad_norm": 2.032412528991699, "learning_rate": 4.568239966354926e-06, "loss": 0.3663, "step": 13048 }, { "epoch": 0.5417360754337075, "grad_norm": 2.554429054260254, "learning_rate": 4.567570166752882e-06, "loss": 0.556, "step": 13049 }, { "epoch": 0.5417775909579188, "grad_norm": 2.3022897243499756, "learning_rate": 4.5669003749692955e-06, "loss": 0.5624, "step": 13050 }, { "epoch": 0.5418191064821302, "grad_norm": 2.12850022315979, "learning_rate": 4.566230591016275e-06, "loss": 0.4356, "step": 13051 }, { "epoch": 0.5418606220063414, "grad_norm": 2.5418291091918945, "learning_rate": 4.5655608149059326e-06, "loss": 0.4716, "step": 13052 }, { "epoch": 0.5419021375305528, "grad_norm": 2.3141632080078125, "learning_rate": 4.564891046650373e-06, "loss": 0.5254, "step": 13053 }, { "epoch": 0.5419436530547642, "grad_norm": 2.382110357284546, "learning_rate": 4.564221286261709e-06, "loss": 0.4496, "step": 13054 }, { "epoch": 0.5419851685789755, "grad_norm": 2.311556100845337, "learning_rate": 4.563551533752051e-06, "loss": 0.4294, "step": 13055 }, { "epoch": 0.5420266841031869, "grad_norm": 2.424011468887329, "learning_rate": 4.562881789133507e-06, "loss": 0.5287, "step": 13056 }, { "epoch": 0.5420681996273982, "grad_norm": 3.220088243484497, "learning_rate": 4.5622120524181855e-06, "loss": 0.5286, "step": 13057 }, { "epoch": 0.5421097151516096, "grad_norm": 2.7229130268096924, "learning_rate": 4.561542323618197e-06, "loss": 0.5809, "step": 13058 }, { "epoch": 0.5421512306758208, "grad_norm": 2.1742477416992188, "learning_rate": 4.560872602745648e-06, "loss": 0.5114, "step": 13059 }, { "epoch": 0.5421927462000322, "grad_norm": 1.9755407571792603, "learning_rate": 4.56020288981265e-06, "loss": 0.5122, "step": 13060 }, { "epoch": 0.5422342617242435, "grad_norm": 2.567781925201416, "learning_rate": 4.559533184831311e-06, "loss": 0.5254, "step": 13061 }, { "epoch": 0.5422757772484549, "grad_norm": 2.196033477783203, "learning_rate": 4.5588634878137385e-06, "loss": 0.4306, "step": 13062 }, { "epoch": 0.5423172927726662, "grad_norm": 2.4774646759033203, "learning_rate": 4.558193798772041e-06, "loss": 0.4333, "step": 13063 }, { "epoch": 0.5423588082968775, "grad_norm": 2.6172497272491455, "learning_rate": 4.557524117718327e-06, "loss": 0.4835, "step": 13064 }, { "epoch": 0.5424003238210888, "grad_norm": 2.6960010528564453, "learning_rate": 4.556854444664706e-06, "loss": 0.594, "step": 13065 }, { "epoch": 0.5424418393453002, "grad_norm": 2.162262201309204, "learning_rate": 4.556184779623283e-06, "loss": 0.4904, "step": 13066 }, { "epoch": 0.5424833548695115, "grad_norm": 2.559699296951294, "learning_rate": 4.555515122606167e-06, "loss": 0.4561, "step": 13067 }, { "epoch": 0.5425248703937229, "grad_norm": 2.1445724964141846, "learning_rate": 4.554845473625466e-06, "loss": 0.3773, "step": 13068 }, { "epoch": 0.5425663859179342, "grad_norm": 2.1317765712738037, "learning_rate": 4.554175832693288e-06, "loss": 0.5458, "step": 13069 }, { "epoch": 0.5426079014421455, "grad_norm": 2.620673179626465, "learning_rate": 4.553506199821738e-06, "loss": 0.5568, "step": 13070 }, { "epoch": 0.5426494169663568, "grad_norm": 2.3816514015197754, "learning_rate": 4.552836575022925e-06, "loss": 0.5925, "step": 13071 }, { "epoch": 0.5426909324905682, "grad_norm": 2.7303061485290527, "learning_rate": 4.552166958308955e-06, "loss": 0.4993, "step": 13072 }, { "epoch": 0.5427324480147795, "grad_norm": 2.1625816822052, "learning_rate": 4.551497349691939e-06, "loss": 0.5212, "step": 13073 }, { "epoch": 0.5427739635389909, "grad_norm": 2.1656601428985596, "learning_rate": 4.550827749183977e-06, "loss": 0.5707, "step": 13074 }, { "epoch": 0.5428154790632022, "grad_norm": 3.517169713973999, "learning_rate": 4.550158156797178e-06, "loss": 0.565, "step": 13075 }, { "epoch": 0.5428569945874135, "grad_norm": 3.0272297859191895, "learning_rate": 4.54948857254365e-06, "loss": 0.7645, "step": 13076 }, { "epoch": 0.5428985101116248, "grad_norm": 2.3095216751098633, "learning_rate": 4.548818996435498e-06, "loss": 0.4564, "step": 13077 }, { "epoch": 0.5429400256358362, "grad_norm": 2.2837255001068115, "learning_rate": 4.548149428484831e-06, "loss": 0.5445, "step": 13078 }, { "epoch": 0.5429815411600475, "grad_norm": 2.2023959159851074, "learning_rate": 4.54747986870375e-06, "loss": 0.4584, "step": 13079 }, { "epoch": 0.5430230566842589, "grad_norm": 2.6901345252990723, "learning_rate": 4.546810317104364e-06, "loss": 0.5868, "step": 13080 }, { "epoch": 0.5430645722084702, "grad_norm": 2.6174166202545166, "learning_rate": 4.546140773698779e-06, "loss": 0.7228, "step": 13081 }, { "epoch": 0.5431060877326815, "grad_norm": 2.324601888656616, "learning_rate": 4.5454712384991e-06, "loss": 0.6432, "step": 13082 }, { "epoch": 0.5431476032568928, "grad_norm": 2.757793426513672, "learning_rate": 4.544801711517431e-06, "loss": 0.5531, "step": 13083 }, { "epoch": 0.5431891187811042, "grad_norm": 2.3872745037078857, "learning_rate": 4.544132192765878e-06, "loss": 0.6598, "step": 13084 }, { "epoch": 0.5432306343053156, "grad_norm": 1.9869508743286133, "learning_rate": 4.5434626822565465e-06, "loss": 0.4017, "step": 13085 }, { "epoch": 0.5432721498295269, "grad_norm": 2.2504658699035645, "learning_rate": 4.5427931800015435e-06, "loss": 0.4235, "step": 13086 }, { "epoch": 0.5433136653537383, "grad_norm": 2.16338849067688, "learning_rate": 4.542123686012969e-06, "loss": 0.522, "step": 13087 }, { "epoch": 0.5433551808779495, "grad_norm": 2.065612316131592, "learning_rate": 4.54145420030293e-06, "loss": 0.5567, "step": 13088 }, { "epoch": 0.5433966964021609, "grad_norm": 2.6490225791931152, "learning_rate": 4.540784722883532e-06, "loss": 0.444, "step": 13089 }, { "epoch": 0.5434382119263722, "grad_norm": 2.051586866378784, "learning_rate": 4.5401152537668775e-06, "loss": 0.5207, "step": 13090 }, { "epoch": 0.5434797274505836, "grad_norm": 2.112720251083374, "learning_rate": 4.539445792965075e-06, "loss": 0.5799, "step": 13091 }, { "epoch": 0.5435212429747949, "grad_norm": 2.2764477729797363, "learning_rate": 4.538776340490222e-06, "loss": 0.5479, "step": 13092 }, { "epoch": 0.5435627584990063, "grad_norm": 2.204030752182007, "learning_rate": 4.538106896354427e-06, "loss": 0.5355, "step": 13093 }, { "epoch": 0.5436042740232175, "grad_norm": 2.4133546352386475, "learning_rate": 4.537437460569792e-06, "loss": 0.5001, "step": 13094 }, { "epoch": 0.5436457895474289, "grad_norm": 2.4563941955566406, "learning_rate": 4.536768033148422e-06, "loss": 0.5652, "step": 13095 }, { "epoch": 0.5436873050716402, "grad_norm": 2.157310962677002, "learning_rate": 4.53609861410242e-06, "loss": 0.4845, "step": 13096 }, { "epoch": 0.5437288205958516, "grad_norm": 2.4538168907165527, "learning_rate": 4.535429203443887e-06, "loss": 0.503, "step": 13097 }, { "epoch": 0.5437703361200629, "grad_norm": 2.728316307067871, "learning_rate": 4.534759801184928e-06, "loss": 0.5293, "step": 13098 }, { "epoch": 0.5438118516442743, "grad_norm": 2.6620595455169678, "learning_rate": 4.534090407337648e-06, "loss": 0.5018, "step": 13099 }, { "epoch": 0.5438533671684855, "grad_norm": 2.2560927867889404, "learning_rate": 4.5334210219141455e-06, "loss": 0.44, "step": 13100 }, { "epoch": 0.5438948826926969, "grad_norm": 2.4340732097625732, "learning_rate": 4.532751644926525e-06, "loss": 0.5077, "step": 13101 }, { "epoch": 0.5439363982169082, "grad_norm": 2.539421319961548, "learning_rate": 4.532082276386891e-06, "loss": 0.5323, "step": 13102 }, { "epoch": 0.5439779137411196, "grad_norm": 1.9008638858795166, "learning_rate": 4.531412916307343e-06, "loss": 0.4206, "step": 13103 }, { "epoch": 0.5440194292653309, "grad_norm": 2.258904218673706, "learning_rate": 4.530743564699986e-06, "loss": 0.5098, "step": 13104 }, { "epoch": 0.5440609447895423, "grad_norm": 2.3078787326812744, "learning_rate": 4.53007422157692e-06, "loss": 0.4916, "step": 13105 }, { "epoch": 0.5441024603137535, "grad_norm": 2.2078969478607178, "learning_rate": 4.529404886950247e-06, "loss": 0.4984, "step": 13106 }, { "epoch": 0.5441439758379649, "grad_norm": 2.2576892375946045, "learning_rate": 4.528735560832071e-06, "loss": 0.3114, "step": 13107 }, { "epoch": 0.5441854913621762, "grad_norm": 2.1850857734680176, "learning_rate": 4.5280662432344904e-06, "loss": 0.6117, "step": 13108 }, { "epoch": 0.5442270068863876, "grad_norm": 2.446678400039673, "learning_rate": 4.527396934169609e-06, "loss": 0.5408, "step": 13109 }, { "epoch": 0.5442685224105989, "grad_norm": 2.60477614402771, "learning_rate": 4.526727633649526e-06, "loss": 0.5061, "step": 13110 }, { "epoch": 0.5443100379348103, "grad_norm": 2.473754644393921, "learning_rate": 4.526058341686344e-06, "loss": 0.4599, "step": 13111 }, { "epoch": 0.5443515534590215, "grad_norm": 2.752492666244507, "learning_rate": 4.525389058292165e-06, "loss": 0.4972, "step": 13112 }, { "epoch": 0.5443930689832329, "grad_norm": 2.4595255851745605, "learning_rate": 4.524719783479088e-06, "loss": 0.5035, "step": 13113 }, { "epoch": 0.5444345845074442, "grad_norm": 2.2372348308563232, "learning_rate": 4.524050517259214e-06, "loss": 0.5462, "step": 13114 }, { "epoch": 0.5444761000316556, "grad_norm": 2.329871892929077, "learning_rate": 4.523381259644643e-06, "loss": 0.6256, "step": 13115 }, { "epoch": 0.544517615555867, "grad_norm": 2.2634992599487305, "learning_rate": 4.522712010647476e-06, "loss": 0.6138, "step": 13116 }, { "epoch": 0.5445591310800783, "grad_norm": 2.4463953971862793, "learning_rate": 4.522042770279817e-06, "loss": 0.5402, "step": 13117 }, { "epoch": 0.5446006466042896, "grad_norm": 2.0793230533599854, "learning_rate": 4.52137353855376e-06, "loss": 0.4467, "step": 13118 }, { "epoch": 0.5446421621285009, "grad_norm": 2.1948490142822266, "learning_rate": 4.520704315481406e-06, "loss": 0.573, "step": 13119 }, { "epoch": 0.5446836776527123, "grad_norm": 2.7838213443756104, "learning_rate": 4.520035101074858e-06, "loss": 0.6761, "step": 13120 }, { "epoch": 0.5447251931769236, "grad_norm": 2.185532808303833, "learning_rate": 4.5193658953462145e-06, "loss": 0.5517, "step": 13121 }, { "epoch": 0.544766708701135, "grad_norm": 2.204357624053955, "learning_rate": 4.518696698307573e-06, "loss": 0.3817, "step": 13122 }, { "epoch": 0.5448082242253462, "grad_norm": 2.3804163932800293, "learning_rate": 4.518027509971034e-06, "loss": 0.4732, "step": 13123 }, { "epoch": 0.5448497397495576, "grad_norm": 3.928581476211548, "learning_rate": 4.517358330348696e-06, "loss": 0.6243, "step": 13124 }, { "epoch": 0.5448912552737689, "grad_norm": 2.2991583347320557, "learning_rate": 4.51668915945266e-06, "loss": 0.4836, "step": 13125 }, { "epoch": 0.5449327707979803, "grad_norm": 2.9080562591552734, "learning_rate": 4.516019997295022e-06, "loss": 0.4401, "step": 13126 }, { "epoch": 0.5449742863221916, "grad_norm": 2.233549118041992, "learning_rate": 4.515350843887882e-06, "loss": 0.5013, "step": 13127 }, { "epoch": 0.545015801846403, "grad_norm": 2.5189437866210938, "learning_rate": 4.514681699243337e-06, "loss": 0.6556, "step": 13128 }, { "epoch": 0.5450573173706142, "grad_norm": 2.253394603729248, "learning_rate": 4.514012563373488e-06, "loss": 0.6036, "step": 13129 }, { "epoch": 0.5450988328948256, "grad_norm": 2.611440420150757, "learning_rate": 4.513343436290434e-06, "loss": 0.6743, "step": 13130 }, { "epoch": 0.5451403484190369, "grad_norm": 1.9680578708648682, "learning_rate": 4.512674318006268e-06, "loss": 0.526, "step": 13131 }, { "epoch": 0.5451818639432483, "grad_norm": 2.4707984924316406, "learning_rate": 4.512005208533092e-06, "loss": 0.5313, "step": 13132 }, { "epoch": 0.5452233794674596, "grad_norm": 1.853411078453064, "learning_rate": 4.511336107883003e-06, "loss": 0.3899, "step": 13133 }, { "epoch": 0.545264894991671, "grad_norm": 3.049912929534912, "learning_rate": 4.5106670160680985e-06, "loss": 0.5594, "step": 13134 }, { "epoch": 0.5453064105158822, "grad_norm": 3.282052755355835, "learning_rate": 4.509997933100474e-06, "loss": 0.5227, "step": 13135 }, { "epoch": 0.5453479260400936, "grad_norm": 3.438441514968872, "learning_rate": 4.509328858992228e-06, "loss": 0.5927, "step": 13136 }, { "epoch": 0.5453894415643049, "grad_norm": 2.6114022731781006, "learning_rate": 4.508659793755458e-06, "loss": 0.5826, "step": 13137 }, { "epoch": 0.5454309570885163, "grad_norm": 2.3563718795776367, "learning_rate": 4.507990737402262e-06, "loss": 0.5319, "step": 13138 }, { "epoch": 0.5454724726127276, "grad_norm": 2.885957956314087, "learning_rate": 4.507321689944733e-06, "loss": 0.509, "step": 13139 }, { "epoch": 0.545513988136939, "grad_norm": 2.7408154010772705, "learning_rate": 4.506652651394971e-06, "loss": 0.5705, "step": 13140 }, { "epoch": 0.5455555036611502, "grad_norm": 2.8942058086395264, "learning_rate": 4.50598362176507e-06, "loss": 0.5162, "step": 13141 }, { "epoch": 0.5455970191853616, "grad_norm": 2.3255913257598877, "learning_rate": 4.505314601067129e-06, "loss": 0.426, "step": 13142 }, { "epoch": 0.5456385347095729, "grad_norm": 2.2892017364501953, "learning_rate": 4.504645589313243e-06, "loss": 0.4632, "step": 13143 }, { "epoch": 0.5456800502337843, "grad_norm": 2.5632221698760986, "learning_rate": 4.503976586515507e-06, "loss": 0.6961, "step": 13144 }, { "epoch": 0.5457215657579957, "grad_norm": 2.147494316101074, "learning_rate": 4.5033075926860155e-06, "loss": 0.5531, "step": 13145 }, { "epoch": 0.545763081282207, "grad_norm": 2.5331716537475586, "learning_rate": 4.502638607836868e-06, "loss": 0.5449, "step": 13146 }, { "epoch": 0.5458045968064184, "grad_norm": 2.363126039505005, "learning_rate": 4.5019696319801585e-06, "loss": 0.4497, "step": 13147 }, { "epoch": 0.5458461123306296, "grad_norm": 2.5769600868225098, "learning_rate": 4.5013006651279805e-06, "loss": 0.6341, "step": 13148 }, { "epoch": 0.545887627854841, "grad_norm": 2.4296913146972656, "learning_rate": 4.50063170729243e-06, "loss": 0.5356, "step": 13149 }, { "epoch": 0.5459291433790523, "grad_norm": 2.442507743835449, "learning_rate": 4.499962758485601e-06, "loss": 0.4356, "step": 13150 }, { "epoch": 0.5459706589032637, "grad_norm": 2.1635637283325195, "learning_rate": 4.499293818719593e-06, "loss": 0.4517, "step": 13151 }, { "epoch": 0.546012174427475, "grad_norm": 2.941486120223999, "learning_rate": 4.4986248880064934e-06, "loss": 0.6124, "step": 13152 }, { "epoch": 0.5460536899516863, "grad_norm": 2.3871243000030518, "learning_rate": 4.497955966358401e-06, "loss": 0.4475, "step": 13153 }, { "epoch": 0.5460952054758976, "grad_norm": 2.484031915664673, "learning_rate": 4.49728705378741e-06, "loss": 0.444, "step": 13154 }, { "epoch": 0.546136721000109, "grad_norm": 2.397054433822632, "learning_rate": 4.496618150305614e-06, "loss": 0.3744, "step": 13155 }, { "epoch": 0.5461782365243203, "grad_norm": 2.569965362548828, "learning_rate": 4.495949255925108e-06, "loss": 0.4906, "step": 13156 }, { "epoch": 0.5462197520485317, "grad_norm": 2.110705852508545, "learning_rate": 4.495280370657983e-06, "loss": 0.5095, "step": 13157 }, { "epoch": 0.546261267572743, "grad_norm": 2.3766555786132812, "learning_rate": 4.494611494516335e-06, "loss": 0.4476, "step": 13158 }, { "epoch": 0.5463027830969543, "grad_norm": 2.332998275756836, "learning_rate": 4.493942627512258e-06, "loss": 0.5041, "step": 13159 }, { "epoch": 0.5463442986211656, "grad_norm": 2.341059923171997, "learning_rate": 4.493273769657843e-06, "loss": 0.5418, "step": 13160 }, { "epoch": 0.546385814145377, "grad_norm": 2.3660240173339844, "learning_rate": 4.492604920965185e-06, "loss": 0.5611, "step": 13161 }, { "epoch": 0.5464273296695883, "grad_norm": 2.517714738845825, "learning_rate": 4.491936081446375e-06, "loss": 0.5722, "step": 13162 }, { "epoch": 0.5464688451937997, "grad_norm": 2.695222854614258, "learning_rate": 4.491267251113507e-06, "loss": 0.4501, "step": 13163 }, { "epoch": 0.546510360718011, "grad_norm": 2.5384793281555176, "learning_rate": 4.490598429978676e-06, "loss": 0.585, "step": 13164 }, { "epoch": 0.5465518762422223, "grad_norm": 2.42868971824646, "learning_rate": 4.489929618053971e-06, "loss": 0.5707, "step": 13165 }, { "epoch": 0.5465933917664336, "grad_norm": 2.810295343399048, "learning_rate": 4.489260815351484e-06, "loss": 0.3495, "step": 13166 }, { "epoch": 0.546634907290645, "grad_norm": 2.4403388500213623, "learning_rate": 4.48859202188331e-06, "loss": 0.4522, "step": 13167 }, { "epoch": 0.5466764228148563, "grad_norm": 2.738778829574585, "learning_rate": 4.487923237661539e-06, "loss": 0.5351, "step": 13168 }, { "epoch": 0.5467179383390677, "grad_norm": 2.5742287635803223, "learning_rate": 4.4872544626982656e-06, "loss": 0.5313, "step": 13169 }, { "epoch": 0.546759453863279, "grad_norm": 2.042012929916382, "learning_rate": 4.486585697005577e-06, "loss": 0.5286, "step": 13170 }, { "epoch": 0.5468009693874903, "grad_norm": 2.4680252075195312, "learning_rate": 4.485916940595567e-06, "loss": 0.5539, "step": 13171 }, { "epoch": 0.5468424849117016, "grad_norm": 2.313415765762329, "learning_rate": 4.485248193480328e-06, "loss": 0.4317, "step": 13172 }, { "epoch": 0.546884000435913, "grad_norm": 2.4229657649993896, "learning_rate": 4.48457945567195e-06, "loss": 0.5774, "step": 13173 }, { "epoch": 0.5469255159601243, "grad_norm": 2.5765650272369385, "learning_rate": 4.483910727182525e-06, "loss": 0.5668, "step": 13174 }, { "epoch": 0.5469670314843357, "grad_norm": 2.81477689743042, "learning_rate": 4.483242008024141e-06, "loss": 0.6753, "step": 13175 }, { "epoch": 0.5470085470085471, "grad_norm": 2.257925510406494, "learning_rate": 4.48257329820889e-06, "loss": 0.5006, "step": 13176 }, { "epoch": 0.5470500625327583, "grad_norm": 2.75258469581604, "learning_rate": 4.481904597748865e-06, "loss": 0.4182, "step": 13177 }, { "epoch": 0.5470915780569697, "grad_norm": 2.405682325363159, "learning_rate": 4.481235906656153e-06, "loss": 0.5989, "step": 13178 }, { "epoch": 0.547133093581181, "grad_norm": 2.4559335708618164, "learning_rate": 4.480567224942845e-06, "loss": 0.5113, "step": 13179 }, { "epoch": 0.5471746091053924, "grad_norm": 2.100074052810669, "learning_rate": 4.47989855262103e-06, "loss": 0.5186, "step": 13180 }, { "epoch": 0.5472161246296037, "grad_norm": 2.3009777069091797, "learning_rate": 4.479229889702801e-06, "loss": 0.489, "step": 13181 }, { "epoch": 0.5472576401538151, "grad_norm": 2.7964518070220947, "learning_rate": 4.478561236200246e-06, "loss": 0.6599, "step": 13182 }, { "epoch": 0.5472991556780263, "grad_norm": 2.020878314971924, "learning_rate": 4.477892592125453e-06, "loss": 0.5322, "step": 13183 }, { "epoch": 0.5473406712022377, "grad_norm": 2.1028172969818115, "learning_rate": 4.477223957490513e-06, "loss": 0.5311, "step": 13184 }, { "epoch": 0.547382186726449, "grad_norm": 2.377795696258545, "learning_rate": 4.476555332307515e-06, "loss": 0.4571, "step": 13185 }, { "epoch": 0.5474237022506604, "grad_norm": 2.339306354522705, "learning_rate": 4.475886716588548e-06, "loss": 0.3745, "step": 13186 }, { "epoch": 0.5474652177748717, "grad_norm": 2.141500949859619, "learning_rate": 4.475218110345699e-06, "loss": 0.5035, "step": 13187 }, { "epoch": 0.5475067332990831, "grad_norm": 2.214327573776245, "learning_rate": 4.4745495135910575e-06, "loss": 0.5192, "step": 13188 }, { "epoch": 0.5475482488232943, "grad_norm": 2.317145824432373, "learning_rate": 4.473880926336712e-06, "loss": 0.5173, "step": 13189 }, { "epoch": 0.5475897643475057, "grad_norm": 2.654825448989868, "learning_rate": 4.473212348594754e-06, "loss": 0.5622, "step": 13190 }, { "epoch": 0.547631279871717, "grad_norm": 2.4608957767486572, "learning_rate": 4.472543780377265e-06, "loss": 0.4697, "step": 13191 }, { "epoch": 0.5476727953959284, "grad_norm": 2.4328370094299316, "learning_rate": 4.471875221696338e-06, "loss": 0.4016, "step": 13192 }, { "epoch": 0.5477143109201397, "grad_norm": 2.3307433128356934, "learning_rate": 4.471206672564058e-06, "loss": 0.5614, "step": 13193 }, { "epoch": 0.5477558264443511, "grad_norm": 2.7937662601470947, "learning_rate": 4.470538132992514e-06, "loss": 0.4712, "step": 13194 }, { "epoch": 0.5477973419685623, "grad_norm": 2.519369602203369, "learning_rate": 4.469869602993796e-06, "loss": 0.4206, "step": 13195 }, { "epoch": 0.5478388574927737, "grad_norm": 2.444413185119629, "learning_rate": 4.469201082579986e-06, "loss": 0.4834, "step": 13196 }, { "epoch": 0.547880373016985, "grad_norm": 2.5239667892456055, "learning_rate": 4.468532571763174e-06, "loss": 0.5094, "step": 13197 }, { "epoch": 0.5479218885411964, "grad_norm": 2.3402514457702637, "learning_rate": 4.467864070555446e-06, "loss": 0.5389, "step": 13198 }, { "epoch": 0.5479634040654077, "grad_norm": 2.9049293994903564, "learning_rate": 4.467195578968889e-06, "loss": 0.5175, "step": 13199 }, { "epoch": 0.548004919589619, "grad_norm": 2.1323013305664062, "learning_rate": 4.46652709701559e-06, "loss": 0.5967, "step": 13200 }, { "epoch": 0.5480464351138303, "grad_norm": 2.107187271118164, "learning_rate": 4.465858624707634e-06, "loss": 0.5317, "step": 13201 }, { "epoch": 0.5480879506380417, "grad_norm": 2.2504916191101074, "learning_rate": 4.4651901620571075e-06, "loss": 0.4386, "step": 13202 }, { "epoch": 0.548129466162253, "grad_norm": 3.4196298122406006, "learning_rate": 4.464521709076099e-06, "loss": 0.6601, "step": 13203 }, { "epoch": 0.5481709816864644, "grad_norm": 2.4735655784606934, "learning_rate": 4.463853265776691e-06, "loss": 0.4018, "step": 13204 }, { "epoch": 0.5482124972106757, "grad_norm": 2.3755056858062744, "learning_rate": 4.46318483217097e-06, "loss": 0.5967, "step": 13205 }, { "epoch": 0.548254012734887, "grad_norm": 2.132171392440796, "learning_rate": 4.462516408271023e-06, "loss": 0.5384, "step": 13206 }, { "epoch": 0.5482955282590984, "grad_norm": 2.6095693111419678, "learning_rate": 4.461847994088933e-06, "loss": 0.4456, "step": 13207 }, { "epoch": 0.5483370437833097, "grad_norm": 2.257655143737793, "learning_rate": 4.4611795896367885e-06, "loss": 0.5151, "step": 13208 }, { "epoch": 0.5483785593075211, "grad_norm": 2.78312087059021, "learning_rate": 4.4605111949266715e-06, "loss": 0.4726, "step": 13209 }, { "epoch": 0.5484200748317324, "grad_norm": 2.3861443996429443, "learning_rate": 4.459842809970667e-06, "loss": 0.5947, "step": 13210 }, { "epoch": 0.5484615903559438, "grad_norm": 2.742753744125366, "learning_rate": 4.45917443478086e-06, "loss": 0.5415, "step": 13211 }, { "epoch": 0.548503105880155, "grad_norm": 2.358218193054199, "learning_rate": 4.458506069369336e-06, "loss": 0.4215, "step": 13212 }, { "epoch": 0.5485446214043664, "grad_norm": 2.5143051147460938, "learning_rate": 4.457837713748179e-06, "loss": 0.6409, "step": 13213 }, { "epoch": 0.5485861369285777, "grad_norm": 2.3091368675231934, "learning_rate": 4.45716936792947e-06, "loss": 0.4711, "step": 13214 }, { "epoch": 0.5486276524527891, "grad_norm": 2.6328561305999756, "learning_rate": 4.456501031925297e-06, "loss": 0.5353, "step": 13215 }, { "epoch": 0.5486691679770004, "grad_norm": 1.9600071907043457, "learning_rate": 4.455832705747742e-06, "loss": 0.4675, "step": 13216 }, { "epoch": 0.5487106835012118, "grad_norm": 2.3533780574798584, "learning_rate": 4.455164389408888e-06, "loss": 0.626, "step": 13217 }, { "epoch": 0.548752199025423, "grad_norm": 2.8824291229248047, "learning_rate": 4.454496082920818e-06, "loss": 0.5421, "step": 13218 }, { "epoch": 0.5487937145496344, "grad_norm": 2.5550789833068848, "learning_rate": 4.453827786295617e-06, "loss": 0.5355, "step": 13219 }, { "epoch": 0.5488352300738457, "grad_norm": 2.2804298400878906, "learning_rate": 4.453159499545369e-06, "loss": 0.5778, "step": 13220 }, { "epoch": 0.5488767455980571, "grad_norm": 2.320728302001953, "learning_rate": 4.452491222682152e-06, "loss": 0.5238, "step": 13221 }, { "epoch": 0.5489182611222684, "grad_norm": 2.3578553199768066, "learning_rate": 4.451822955718052e-06, "loss": 0.5657, "step": 13222 }, { "epoch": 0.5489597766464798, "grad_norm": 2.356908082962036, "learning_rate": 4.4511546986651526e-06, "loss": 0.5546, "step": 13223 }, { "epoch": 0.549001292170691, "grad_norm": 2.932903528213501, "learning_rate": 4.450486451535532e-06, "loss": 0.6162, "step": 13224 }, { "epoch": 0.5490428076949024, "grad_norm": 2.8393170833587646, "learning_rate": 4.449818214341277e-06, "loss": 0.4145, "step": 13225 }, { "epoch": 0.5490843232191137, "grad_norm": 2.126255512237549, "learning_rate": 4.449149987094467e-06, "loss": 0.5628, "step": 13226 }, { "epoch": 0.5491258387433251, "grad_norm": 2.1216683387756348, "learning_rate": 4.448481769807182e-06, "loss": 0.59, "step": 13227 }, { "epoch": 0.5491673542675364, "grad_norm": 2.373661756515503, "learning_rate": 4.447813562491507e-06, "loss": 0.5744, "step": 13228 }, { "epoch": 0.5492088697917478, "grad_norm": 2.3493919372558594, "learning_rate": 4.447145365159523e-06, "loss": 0.483, "step": 13229 }, { "epoch": 0.549250385315959, "grad_norm": 2.2685465812683105, "learning_rate": 4.446477177823308e-06, "loss": 0.4361, "step": 13230 }, { "epoch": 0.5492919008401704, "grad_norm": 2.7794010639190674, "learning_rate": 4.445809000494945e-06, "loss": 0.5028, "step": 13231 }, { "epoch": 0.5493334163643817, "grad_norm": 2.711057186126709, "learning_rate": 4.445140833186516e-06, "loss": 0.5643, "step": 13232 }, { "epoch": 0.5493749318885931, "grad_norm": 2.7764732837677, "learning_rate": 4.444472675910103e-06, "loss": 0.4936, "step": 13233 }, { "epoch": 0.5494164474128044, "grad_norm": 2.681878089904785, "learning_rate": 4.4438045286777806e-06, "loss": 0.4434, "step": 13234 }, { "epoch": 0.5494579629370158, "grad_norm": 2.8067588806152344, "learning_rate": 4.4431363915016335e-06, "loss": 0.581, "step": 13235 }, { "epoch": 0.549499478461227, "grad_norm": 2.447983741760254, "learning_rate": 4.442468264393742e-06, "loss": 0.6031, "step": 13236 }, { "epoch": 0.5495409939854384, "grad_norm": 2.2218523025512695, "learning_rate": 4.4418001473661835e-06, "loss": 0.4427, "step": 13237 }, { "epoch": 0.5495825095096498, "grad_norm": 2.3635659217834473, "learning_rate": 4.4411320404310406e-06, "loss": 0.4773, "step": 13238 }, { "epoch": 0.5496240250338611, "grad_norm": 2.3831019401550293, "learning_rate": 4.440463943600392e-06, "loss": 0.5678, "step": 13239 }, { "epoch": 0.5496655405580725, "grad_norm": 2.448443651199341, "learning_rate": 4.4397958568863146e-06, "loss": 0.3923, "step": 13240 }, { "epoch": 0.5497070560822838, "grad_norm": 2.905668020248413, "learning_rate": 4.439127780300891e-06, "loss": 0.6926, "step": 13241 }, { "epoch": 0.5497485716064952, "grad_norm": 3.503314256668091, "learning_rate": 4.438459713856199e-06, "loss": 0.458, "step": 13242 }, { "epoch": 0.5497900871307064, "grad_norm": 3.8753418922424316, "learning_rate": 4.437791657564316e-06, "loss": 0.5036, "step": 13243 }, { "epoch": 0.5498316026549178, "grad_norm": 1.9216195344924927, "learning_rate": 4.437123611437322e-06, "loss": 0.4624, "step": 13244 }, { "epoch": 0.5498731181791291, "grad_norm": 2.356132745742798, "learning_rate": 4.436455575487295e-06, "loss": 0.4693, "step": 13245 }, { "epoch": 0.5499146337033405, "grad_norm": 2.3935673236846924, "learning_rate": 4.435787549726316e-06, "loss": 0.6974, "step": 13246 }, { "epoch": 0.5499561492275518, "grad_norm": 2.0156819820404053, "learning_rate": 4.435119534166459e-06, "loss": 0.4293, "step": 13247 }, { "epoch": 0.5499976647517631, "grad_norm": 3.1047163009643555, "learning_rate": 4.4344515288198025e-06, "loss": 0.5045, "step": 13248 }, { "epoch": 0.5500391802759744, "grad_norm": 2.916224241256714, "learning_rate": 4.433783533698428e-06, "loss": 0.6298, "step": 13249 }, { "epoch": 0.5500806958001858, "grad_norm": 2.5207204818725586, "learning_rate": 4.433115548814408e-06, "loss": 0.6799, "step": 13250 }, { "epoch": 0.5501222113243971, "grad_norm": 2.0679447650909424, "learning_rate": 4.4324475741798235e-06, "loss": 0.4888, "step": 13251 }, { "epoch": 0.5501637268486085, "grad_norm": 2.233008623123169, "learning_rate": 4.431779609806751e-06, "loss": 0.4145, "step": 13252 }, { "epoch": 0.5502052423728198, "grad_norm": 2.468358039855957, "learning_rate": 4.431111655707265e-06, "loss": 0.4666, "step": 13253 }, { "epoch": 0.5502467578970311, "grad_norm": 2.6366822719573975, "learning_rate": 4.430443711893446e-06, "loss": 0.6457, "step": 13254 }, { "epoch": 0.5502882734212424, "grad_norm": 2.5313313007354736, "learning_rate": 4.42977577837737e-06, "loss": 0.6086, "step": 13255 }, { "epoch": 0.5503297889454538, "grad_norm": 2.1550042629241943, "learning_rate": 4.429107855171109e-06, "loss": 0.5663, "step": 13256 }, { "epoch": 0.5503713044696651, "grad_norm": 2.5693187713623047, "learning_rate": 4.4284399422867444e-06, "loss": 0.5916, "step": 13257 }, { "epoch": 0.5504128199938765, "grad_norm": 2.491650104522705, "learning_rate": 4.427772039736349e-06, "loss": 0.4613, "step": 13258 }, { "epoch": 0.5504543355180878, "grad_norm": 2.847891330718994, "learning_rate": 4.427104147532002e-06, "loss": 0.5464, "step": 13259 }, { "epoch": 0.5504958510422991, "grad_norm": 2.367403030395508, "learning_rate": 4.426436265685776e-06, "loss": 0.5067, "step": 13260 }, { "epoch": 0.5505373665665104, "grad_norm": 3.2439987659454346, "learning_rate": 4.425768394209748e-06, "loss": 0.6075, "step": 13261 }, { "epoch": 0.5505788820907218, "grad_norm": 2.4376895427703857, "learning_rate": 4.425100533115993e-06, "loss": 0.5065, "step": 13262 }, { "epoch": 0.5506203976149331, "grad_norm": 2.7089390754699707, "learning_rate": 4.424432682416585e-06, "loss": 0.4736, "step": 13263 }, { "epoch": 0.5506619131391445, "grad_norm": 2.6587085723876953, "learning_rate": 4.423764842123602e-06, "loss": 0.5564, "step": 13264 }, { "epoch": 0.5507034286633558, "grad_norm": 2.3829901218414307, "learning_rate": 4.4230970122491164e-06, "loss": 0.4999, "step": 13265 }, { "epoch": 0.5507449441875671, "grad_norm": 2.5722427368164062, "learning_rate": 4.422429192805202e-06, "loss": 0.6201, "step": 13266 }, { "epoch": 0.5507864597117784, "grad_norm": 2.7383434772491455, "learning_rate": 4.421761383803933e-06, "loss": 0.4626, "step": 13267 }, { "epoch": 0.5508279752359898, "grad_norm": 2.2780632972717285, "learning_rate": 4.4210935852573886e-06, "loss": 0.5412, "step": 13268 }, { "epoch": 0.5508694907602012, "grad_norm": 2.5227346420288086, "learning_rate": 4.420425797177637e-06, "loss": 0.4046, "step": 13269 }, { "epoch": 0.5509110062844125, "grad_norm": 2.3576200008392334, "learning_rate": 4.419758019576752e-06, "loss": 0.5202, "step": 13270 }, { "epoch": 0.5509525218086239, "grad_norm": 2.126645565032959, "learning_rate": 4.419090252466811e-06, "loss": 0.5022, "step": 13271 }, { "epoch": 0.5509940373328351, "grad_norm": 2.047341823577881, "learning_rate": 4.418422495859887e-06, "loss": 0.4507, "step": 13272 }, { "epoch": 0.5510355528570465, "grad_norm": 2.4751718044281006, "learning_rate": 4.417754749768051e-06, "loss": 0.4824, "step": 13273 }, { "epoch": 0.5510770683812578, "grad_norm": 2.5861709117889404, "learning_rate": 4.417087014203376e-06, "loss": 0.5117, "step": 13274 }, { "epoch": 0.5511185839054692, "grad_norm": 2.493309497833252, "learning_rate": 4.416419289177938e-06, "loss": 0.5782, "step": 13275 }, { "epoch": 0.5511600994296805, "grad_norm": 2.4852819442749023, "learning_rate": 4.415751574703804e-06, "loss": 0.4873, "step": 13276 }, { "epoch": 0.5512016149538919, "grad_norm": 2.8395373821258545, "learning_rate": 4.415083870793053e-06, "loss": 0.6847, "step": 13277 }, { "epoch": 0.5512431304781031, "grad_norm": 2.341118097305298, "learning_rate": 4.414416177457754e-06, "loss": 0.5212, "step": 13278 }, { "epoch": 0.5512846460023145, "grad_norm": 2.4478960037231445, "learning_rate": 4.413748494709978e-06, "loss": 0.5952, "step": 13279 }, { "epoch": 0.5513261615265258, "grad_norm": 2.5363516807556152, "learning_rate": 4.413080822561797e-06, "loss": 0.5344, "step": 13280 }, { "epoch": 0.5513676770507372, "grad_norm": 2.6320478916168213, "learning_rate": 4.412413161025287e-06, "loss": 0.4278, "step": 13281 }, { "epoch": 0.5514091925749485, "grad_norm": 2.4380064010620117, "learning_rate": 4.411745510112515e-06, "loss": 0.6322, "step": 13282 }, { "epoch": 0.5514507080991599, "grad_norm": 2.501685619354248, "learning_rate": 4.411077869835552e-06, "loss": 0.4647, "step": 13283 }, { "epoch": 0.5514922236233711, "grad_norm": 2.352428674697876, "learning_rate": 4.410410240206473e-06, "loss": 0.6283, "step": 13284 }, { "epoch": 0.5515337391475825, "grad_norm": 2.3858771324157715, "learning_rate": 4.409742621237348e-06, "loss": 0.5685, "step": 13285 }, { "epoch": 0.5515752546717938, "grad_norm": 2.6965506076812744, "learning_rate": 4.409075012940244e-06, "loss": 0.4773, "step": 13286 }, { "epoch": 0.5516167701960052, "grad_norm": 2.8116767406463623, "learning_rate": 4.4084074153272346e-06, "loss": 0.456, "step": 13287 }, { "epoch": 0.5516582857202165, "grad_norm": 2.664943218231201, "learning_rate": 4.40773982841039e-06, "loss": 0.6883, "step": 13288 }, { "epoch": 0.5516998012444279, "grad_norm": 2.5923452377319336, "learning_rate": 4.40707225220178e-06, "loss": 0.6129, "step": 13289 }, { "epoch": 0.5517413167686391, "grad_norm": 3.03926420211792, "learning_rate": 4.4064046867134755e-06, "loss": 0.6239, "step": 13290 }, { "epoch": 0.5517828322928505, "grad_norm": 2.7554519176483154, "learning_rate": 4.405737131957545e-06, "loss": 0.5335, "step": 13291 }, { "epoch": 0.5518243478170618, "grad_norm": 2.397308588027954, "learning_rate": 4.4050695879460565e-06, "loss": 0.5533, "step": 13292 }, { "epoch": 0.5518658633412732, "grad_norm": 2.374389171600342, "learning_rate": 4.404402054691082e-06, "loss": 0.6281, "step": 13293 }, { "epoch": 0.5519073788654845, "grad_norm": 2.372210741043091, "learning_rate": 4.403734532204692e-06, "loss": 0.49, "step": 13294 }, { "epoch": 0.5519488943896959, "grad_norm": 2.329831838607788, "learning_rate": 4.403067020498952e-06, "loss": 0.4326, "step": 13295 }, { "epoch": 0.5519904099139071, "grad_norm": 3.2052321434020996, "learning_rate": 4.402399519585932e-06, "loss": 0.5311, "step": 13296 }, { "epoch": 0.5520319254381185, "grad_norm": 2.2786989212036133, "learning_rate": 4.401732029477701e-06, "loss": 0.3857, "step": 13297 }, { "epoch": 0.5520734409623298, "grad_norm": 1.9992934465408325, "learning_rate": 4.401064550186329e-06, "loss": 0.4973, "step": 13298 }, { "epoch": 0.5521149564865412, "grad_norm": 2.3059873580932617, "learning_rate": 4.400397081723881e-06, "loss": 0.4965, "step": 13299 }, { "epoch": 0.5521564720107526, "grad_norm": 2.133920907974243, "learning_rate": 4.399729624102426e-06, "loss": 0.5285, "step": 13300 }, { "epoch": 0.5521979875349639, "grad_norm": 2.327831983566284, "learning_rate": 4.3990621773340345e-06, "loss": 0.661, "step": 13301 }, { "epoch": 0.5522395030591752, "grad_norm": 2.4813196659088135, "learning_rate": 4.39839474143077e-06, "loss": 0.471, "step": 13302 }, { "epoch": 0.5522810185833865, "grad_norm": 2.5782392024993896, "learning_rate": 4.3977273164047045e-06, "loss": 0.5271, "step": 13303 }, { "epoch": 0.5523225341075979, "grad_norm": 1.7252085208892822, "learning_rate": 4.397059902267901e-06, "loss": 0.5491, "step": 13304 }, { "epoch": 0.5523640496318092, "grad_norm": 2.508915424346924, "learning_rate": 4.396392499032428e-06, "loss": 0.4551, "step": 13305 }, { "epoch": 0.5524055651560206, "grad_norm": 2.2874114513397217, "learning_rate": 4.395725106710352e-06, "loss": 0.5985, "step": 13306 }, { "epoch": 0.5524470806802319, "grad_norm": 2.162753105163574, "learning_rate": 4.395057725313743e-06, "loss": 0.5113, "step": 13307 }, { "epoch": 0.5524885962044432, "grad_norm": 2.238959550857544, "learning_rate": 4.3943903548546615e-06, "loss": 0.5578, "step": 13308 }, { "epoch": 0.5525301117286545, "grad_norm": 3.4882407188415527, "learning_rate": 4.393722995345178e-06, "loss": 0.4499, "step": 13309 }, { "epoch": 0.5525716272528659, "grad_norm": 2.397611618041992, "learning_rate": 4.393055646797358e-06, "loss": 0.7061, "step": 13310 }, { "epoch": 0.5526131427770772, "grad_norm": 2.3163795471191406, "learning_rate": 4.392388309223268e-06, "loss": 0.5734, "step": 13311 }, { "epoch": 0.5526546583012886, "grad_norm": 2.0655016899108887, "learning_rate": 4.3917209826349705e-06, "loss": 0.4401, "step": 13312 }, { "epoch": 0.5526961738254998, "grad_norm": 2.743182420730591, "learning_rate": 4.391053667044533e-06, "loss": 0.5156, "step": 13313 }, { "epoch": 0.5527376893497112, "grad_norm": 2.060309410095215, "learning_rate": 4.390386362464022e-06, "loss": 0.4778, "step": 13314 }, { "epoch": 0.5527792048739225, "grad_norm": 2.428439140319824, "learning_rate": 4.3897190689055e-06, "loss": 0.5499, "step": 13315 }, { "epoch": 0.5528207203981339, "grad_norm": 2.332566738128662, "learning_rate": 4.389051786381035e-06, "loss": 0.5171, "step": 13316 }, { "epoch": 0.5528622359223452, "grad_norm": 2.5409677028656006, "learning_rate": 4.388384514902689e-06, "loss": 0.5252, "step": 13317 }, { "epoch": 0.5529037514465566, "grad_norm": 2.909879207611084, "learning_rate": 4.3877172544825255e-06, "loss": 0.5854, "step": 13318 }, { "epoch": 0.5529452669707678, "grad_norm": 2.359933376312256, "learning_rate": 4.387050005132611e-06, "loss": 0.4692, "step": 13319 }, { "epoch": 0.5529867824949792, "grad_norm": 2.3095035552978516, "learning_rate": 4.386382766865012e-06, "loss": 0.4396, "step": 13320 }, { "epoch": 0.5530282980191905, "grad_norm": 2.686086893081665, "learning_rate": 4.385715539691787e-06, "loss": 0.6621, "step": 13321 }, { "epoch": 0.5530698135434019, "grad_norm": 2.0079550743103027, "learning_rate": 4.3850483236250015e-06, "loss": 0.4115, "step": 13322 }, { "epoch": 0.5531113290676132, "grad_norm": 3.156345844268799, "learning_rate": 4.38438111867672e-06, "loss": 0.4629, "step": 13323 }, { "epoch": 0.5531528445918246, "grad_norm": 2.9394261837005615, "learning_rate": 4.383713924859007e-06, "loss": 0.4811, "step": 13324 }, { "epoch": 0.5531943601160358, "grad_norm": 2.6172821521759033, "learning_rate": 4.383046742183922e-06, "loss": 0.5909, "step": 13325 }, { "epoch": 0.5532358756402472, "grad_norm": 2.1538541316986084, "learning_rate": 4.38237957066353e-06, "loss": 0.5628, "step": 13326 }, { "epoch": 0.5532773911644585, "grad_norm": 2.3536789417266846, "learning_rate": 4.3817124103098944e-06, "loss": 0.6276, "step": 13327 }, { "epoch": 0.5533189066886699, "grad_norm": 2.4589438438415527, "learning_rate": 4.381045261135075e-06, "loss": 0.4988, "step": 13328 }, { "epoch": 0.5533604222128812, "grad_norm": 2.613473892211914, "learning_rate": 4.380378123151139e-06, "loss": 0.3956, "step": 13329 }, { "epoch": 0.5534019377370926, "grad_norm": 2.4194955825805664, "learning_rate": 4.379710996370143e-06, "loss": 0.6906, "step": 13330 }, { "epoch": 0.553443453261304, "grad_norm": 2.304517984390259, "learning_rate": 4.379043880804149e-06, "loss": 0.5274, "step": 13331 }, { "epoch": 0.5534849687855152, "grad_norm": 2.5634608268737793, "learning_rate": 4.378376776465223e-06, "loss": 0.6051, "step": 13332 }, { "epoch": 0.5535264843097266, "grad_norm": 3.020043134689331, "learning_rate": 4.3777096833654245e-06, "loss": 0.6208, "step": 13333 }, { "epoch": 0.5535679998339379, "grad_norm": 2.0285301208496094, "learning_rate": 4.3770426015168125e-06, "loss": 0.4689, "step": 13334 }, { "epoch": 0.5536095153581493, "grad_norm": 2.348914861679077, "learning_rate": 4.37637553093145e-06, "loss": 0.5655, "step": 13335 }, { "epoch": 0.5536510308823606, "grad_norm": 2.3988871574401855, "learning_rate": 4.375708471621398e-06, "loss": 0.5055, "step": 13336 }, { "epoch": 0.553692546406572, "grad_norm": 2.268601417541504, "learning_rate": 4.375041423598718e-06, "loss": 0.4899, "step": 13337 }, { "epoch": 0.5537340619307832, "grad_norm": 2.4707274436950684, "learning_rate": 4.374374386875468e-06, "loss": 0.4984, "step": 13338 }, { "epoch": 0.5537755774549946, "grad_norm": 2.116342544555664, "learning_rate": 4.373707361463709e-06, "loss": 0.5452, "step": 13339 }, { "epoch": 0.5538170929792059, "grad_norm": 2.037309169769287, "learning_rate": 4.373040347375503e-06, "loss": 0.4377, "step": 13340 }, { "epoch": 0.5538586085034173, "grad_norm": 2.432563066482544, "learning_rate": 4.372373344622906e-06, "loss": 0.5203, "step": 13341 }, { "epoch": 0.5539001240276286, "grad_norm": 2.5009961128234863, "learning_rate": 4.371706353217983e-06, "loss": 0.3977, "step": 13342 }, { "epoch": 0.55394163955184, "grad_norm": 2.6792032718658447, "learning_rate": 4.371039373172788e-06, "loss": 0.528, "step": 13343 }, { "epoch": 0.5539831550760512, "grad_norm": 2.3424479961395264, "learning_rate": 4.370372404499382e-06, "loss": 0.4762, "step": 13344 }, { "epoch": 0.5540246706002626, "grad_norm": 2.223763942718506, "learning_rate": 4.369705447209824e-06, "loss": 0.5364, "step": 13345 }, { "epoch": 0.5540661861244739, "grad_norm": 2.450138568878174, "learning_rate": 4.369038501316176e-06, "loss": 0.5351, "step": 13346 }, { "epoch": 0.5541077016486853, "grad_norm": 2.5994701385498047, "learning_rate": 4.368371566830491e-06, "loss": 0.4066, "step": 13347 }, { "epoch": 0.5541492171728966, "grad_norm": 2.1921546459198, "learning_rate": 4.3677046437648315e-06, "loss": 0.4454, "step": 13348 }, { "epoch": 0.554190732697108, "grad_norm": 2.384570360183716, "learning_rate": 4.367037732131254e-06, "loss": 0.546, "step": 13349 }, { "epoch": 0.5542322482213192, "grad_norm": 3.204582452774048, "learning_rate": 4.366370831941818e-06, "loss": 0.3803, "step": 13350 }, { "epoch": 0.5542737637455306, "grad_norm": 2.160238265991211, "learning_rate": 4.365703943208578e-06, "loss": 0.3295, "step": 13351 }, { "epoch": 0.5543152792697419, "grad_norm": 2.49359393119812, "learning_rate": 4.365037065943594e-06, "loss": 0.4585, "step": 13352 }, { "epoch": 0.5543567947939533, "grad_norm": 2.5649051666259766, "learning_rate": 4.364370200158923e-06, "loss": 0.5303, "step": 13353 }, { "epoch": 0.5543983103181646, "grad_norm": 2.1433892250061035, "learning_rate": 4.3637033458666214e-06, "loss": 0.4816, "step": 13354 }, { "epoch": 0.5544398258423759, "grad_norm": 2.311767816543579, "learning_rate": 4.363036503078749e-06, "loss": 0.3257, "step": 13355 }, { "epoch": 0.5544813413665872, "grad_norm": 2.4000558853149414, "learning_rate": 4.3623696718073585e-06, "loss": 0.478, "step": 13356 }, { "epoch": 0.5545228568907986, "grad_norm": 2.6429691314697266, "learning_rate": 4.361702852064507e-06, "loss": 0.5462, "step": 13357 }, { "epoch": 0.5545643724150099, "grad_norm": 2.6142585277557373, "learning_rate": 4.361036043862253e-06, "loss": 0.4665, "step": 13358 }, { "epoch": 0.5546058879392213, "grad_norm": 2.011518716812134, "learning_rate": 4.360369247212653e-06, "loss": 0.4322, "step": 13359 }, { "epoch": 0.5546474034634326, "grad_norm": 2.3448596000671387, "learning_rate": 4.35970246212776e-06, "loss": 0.5479, "step": 13360 }, { "epoch": 0.5546889189876439, "grad_norm": 2.624359130859375, "learning_rate": 4.359035688619631e-06, "loss": 0.4484, "step": 13361 }, { "epoch": 0.5547304345118553, "grad_norm": 2.1798617839813232, "learning_rate": 4.358368926700321e-06, "loss": 0.4451, "step": 13362 }, { "epoch": 0.5547719500360666, "grad_norm": 2.411095142364502, "learning_rate": 4.357702176381887e-06, "loss": 0.5746, "step": 13363 }, { "epoch": 0.554813465560278, "grad_norm": 2.287956953048706, "learning_rate": 4.357035437676382e-06, "loss": 0.5105, "step": 13364 }, { "epoch": 0.5548549810844893, "grad_norm": 3.0456175804138184, "learning_rate": 4.356368710595861e-06, "loss": 0.391, "step": 13365 }, { "epoch": 0.5548964966087007, "grad_norm": 2.623560667037964, "learning_rate": 4.355701995152381e-06, "loss": 0.5381, "step": 13366 }, { "epoch": 0.5549380121329119, "grad_norm": 2.334804058074951, "learning_rate": 4.355035291357993e-06, "loss": 0.5764, "step": 13367 }, { "epoch": 0.5549795276571233, "grad_norm": 2.528193235397339, "learning_rate": 4.3543685992247546e-06, "loss": 0.4407, "step": 13368 }, { "epoch": 0.5550210431813346, "grad_norm": 2.1569957733154297, "learning_rate": 4.3537019187647165e-06, "loss": 0.5847, "step": 13369 }, { "epoch": 0.555062558705546, "grad_norm": 2.652655601501465, "learning_rate": 4.353035249989933e-06, "loss": 0.4414, "step": 13370 }, { "epoch": 0.5551040742297573, "grad_norm": 3.018097162246704, "learning_rate": 4.35236859291246e-06, "loss": 0.5822, "step": 13371 }, { "epoch": 0.5551455897539687, "grad_norm": 2.4094841480255127, "learning_rate": 4.351701947544351e-06, "loss": 0.5088, "step": 13372 }, { "epoch": 0.5551871052781799, "grad_norm": 2.416142702102661, "learning_rate": 4.351035313897655e-06, "loss": 0.4134, "step": 13373 }, { "epoch": 0.5552286208023913, "grad_norm": 2.6924209594726562, "learning_rate": 4.350368691984428e-06, "loss": 0.5749, "step": 13374 }, { "epoch": 0.5552701363266026, "grad_norm": 2.5279946327209473, "learning_rate": 4.349702081816723e-06, "loss": 0.3879, "step": 13375 }, { "epoch": 0.555311651850814, "grad_norm": 2.6806273460388184, "learning_rate": 4.349035483406593e-06, "loss": 0.5311, "step": 13376 }, { "epoch": 0.5553531673750253, "grad_norm": 2.713223457336426, "learning_rate": 4.3483688967660875e-06, "loss": 0.6313, "step": 13377 }, { "epoch": 0.5553946828992367, "grad_norm": 2.7345356941223145, "learning_rate": 4.34770232190726e-06, "loss": 0.4782, "step": 13378 }, { "epoch": 0.5554361984234479, "grad_norm": 2.2300264835357666, "learning_rate": 4.347035758842164e-06, "loss": 0.5908, "step": 13379 }, { "epoch": 0.5554777139476593, "grad_norm": 2.44640851020813, "learning_rate": 4.346369207582849e-06, "loss": 0.5279, "step": 13380 }, { "epoch": 0.5555192294718706, "grad_norm": 2.067340612411499, "learning_rate": 4.345702668141367e-06, "loss": 0.4411, "step": 13381 }, { "epoch": 0.555560744996082, "grad_norm": 2.8994956016540527, "learning_rate": 4.345036140529769e-06, "loss": 0.645, "step": 13382 }, { "epoch": 0.5556022605202933, "grad_norm": 2.7251029014587402, "learning_rate": 4.3443696247601056e-06, "loss": 0.5376, "step": 13383 }, { "epoch": 0.5556437760445047, "grad_norm": 2.3573203086853027, "learning_rate": 4.343703120844429e-06, "loss": 0.5913, "step": 13384 }, { "epoch": 0.5556852915687159, "grad_norm": 2.1847031116485596, "learning_rate": 4.343036628794791e-06, "loss": 0.4942, "step": 13385 }, { "epoch": 0.5557268070929273, "grad_norm": 2.4657065868377686, "learning_rate": 4.342370148623238e-06, "loss": 0.4794, "step": 13386 }, { "epoch": 0.5557683226171386, "grad_norm": 2.1813950538635254, "learning_rate": 4.341703680341822e-06, "loss": 0.3879, "step": 13387 }, { "epoch": 0.55580983814135, "grad_norm": 2.166063070297241, "learning_rate": 4.341037223962594e-06, "loss": 0.4598, "step": 13388 }, { "epoch": 0.5558513536655613, "grad_norm": 2.603053092956543, "learning_rate": 4.340370779497605e-06, "loss": 0.4551, "step": 13389 }, { "epoch": 0.5558928691897727, "grad_norm": 2.043383836746216, "learning_rate": 4.339704346958899e-06, "loss": 0.592, "step": 13390 }, { "epoch": 0.5559343847139839, "grad_norm": 2.9203579425811768, "learning_rate": 4.3390379263585304e-06, "loss": 0.6113, "step": 13391 }, { "epoch": 0.5559759002381953, "grad_norm": 2.6383516788482666, "learning_rate": 4.338371517708546e-06, "loss": 0.4643, "step": 13392 }, { "epoch": 0.5560174157624067, "grad_norm": 2.160372257232666, "learning_rate": 4.3377051210209965e-06, "loss": 0.5051, "step": 13393 }, { "epoch": 0.556058931286618, "grad_norm": 2.1132025718688965, "learning_rate": 4.3370387363079296e-06, "loss": 0.4075, "step": 13394 }, { "epoch": 0.5561004468108294, "grad_norm": 2.609037160873413, "learning_rate": 4.336372363581391e-06, "loss": 0.5997, "step": 13395 }, { "epoch": 0.5561419623350407, "grad_norm": 2.5233852863311768, "learning_rate": 4.335706002853432e-06, "loss": 0.4356, "step": 13396 }, { "epoch": 0.556183477859252, "grad_norm": 2.2320196628570557, "learning_rate": 4.3350396541361005e-06, "loss": 0.4093, "step": 13397 }, { "epoch": 0.5562249933834633, "grad_norm": 2.322636842727661, "learning_rate": 4.334373317441445e-06, "loss": 0.5123, "step": 13398 }, { "epoch": 0.5562665089076747, "grad_norm": 3.7230172157287598, "learning_rate": 4.33370699278151e-06, "loss": 0.6433, "step": 13399 }, { "epoch": 0.556308024431886, "grad_norm": 2.5504393577575684, "learning_rate": 4.3330406801683444e-06, "loss": 0.5003, "step": 13400 }, { "epoch": 0.5563495399560974, "grad_norm": 2.1190950870513916, "learning_rate": 4.332374379613995e-06, "loss": 0.4256, "step": 13401 }, { "epoch": 0.5563910554803086, "grad_norm": 2.143436908721924, "learning_rate": 4.3317080911305136e-06, "loss": 0.4526, "step": 13402 }, { "epoch": 0.55643257100452, "grad_norm": 2.0774195194244385, "learning_rate": 4.331041814729938e-06, "loss": 0.6009, "step": 13403 }, { "epoch": 0.5564740865287313, "grad_norm": 1.8962821960449219, "learning_rate": 4.330375550424321e-06, "loss": 0.5064, "step": 13404 }, { "epoch": 0.5565156020529427, "grad_norm": 2.3110830783843994, "learning_rate": 4.329709298225707e-06, "loss": 0.5545, "step": 13405 }, { "epoch": 0.556557117577154, "grad_norm": 2.6791880130767822, "learning_rate": 4.329043058146142e-06, "loss": 0.5333, "step": 13406 }, { "epoch": 0.5565986331013654, "grad_norm": 2.3103702068328857, "learning_rate": 4.328376830197672e-06, "loss": 0.4392, "step": 13407 }, { "epoch": 0.5566401486255766, "grad_norm": 2.7560818195343018, "learning_rate": 4.327710614392341e-06, "loss": 0.6407, "step": 13408 }, { "epoch": 0.556681664149788, "grad_norm": 2.805790901184082, "learning_rate": 4.3270444107421976e-06, "loss": 0.5251, "step": 13409 }, { "epoch": 0.5567231796739993, "grad_norm": 2.514596700668335, "learning_rate": 4.326378219259283e-06, "loss": 0.4894, "step": 13410 }, { "epoch": 0.5567646951982107, "grad_norm": 3.614734411239624, "learning_rate": 4.325712039955648e-06, "loss": 0.5356, "step": 13411 }, { "epoch": 0.556806210722422, "grad_norm": 1.9886231422424316, "learning_rate": 4.325045872843331e-06, "loss": 0.3961, "step": 13412 }, { "epoch": 0.5568477262466334, "grad_norm": 2.3283839225769043, "learning_rate": 4.3243797179343795e-06, "loss": 0.4316, "step": 13413 }, { "epoch": 0.5568892417708446, "grad_norm": 2.6297452449798584, "learning_rate": 4.323713575240837e-06, "loss": 0.5785, "step": 13414 }, { "epoch": 0.556930757295056, "grad_norm": 2.519329071044922, "learning_rate": 4.32304744477475e-06, "loss": 0.4831, "step": 13415 }, { "epoch": 0.5569722728192673, "grad_norm": 2.0712828636169434, "learning_rate": 4.32238132654816e-06, "loss": 0.5254, "step": 13416 }, { "epoch": 0.5570137883434787, "grad_norm": 2.866892099380493, "learning_rate": 4.32171522057311e-06, "loss": 0.5662, "step": 13417 }, { "epoch": 0.55705530386769, "grad_norm": 1.8250471353530884, "learning_rate": 4.321049126861644e-06, "loss": 0.5315, "step": 13418 }, { "epoch": 0.5570968193919014, "grad_norm": 2.3424699306488037, "learning_rate": 4.320383045425806e-06, "loss": 0.4668, "step": 13419 }, { "epoch": 0.5571383349161126, "grad_norm": 2.3522653579711914, "learning_rate": 4.319716976277639e-06, "loss": 0.5041, "step": 13420 }, { "epoch": 0.557179850440324, "grad_norm": 2.812138795852661, "learning_rate": 4.319050919429183e-06, "loss": 0.6382, "step": 13421 }, { "epoch": 0.5572213659645353, "grad_norm": 2.8703980445861816, "learning_rate": 4.318384874892484e-06, "loss": 0.4632, "step": 13422 }, { "epoch": 0.5572628814887467, "grad_norm": 2.335841655731201, "learning_rate": 4.317718842679583e-06, "loss": 0.475, "step": 13423 }, { "epoch": 0.5573043970129581, "grad_norm": 2.6309545040130615, "learning_rate": 4.317052822802523e-06, "loss": 0.5063, "step": 13424 }, { "epoch": 0.5573459125371694, "grad_norm": 2.765782117843628, "learning_rate": 4.316386815273343e-06, "loss": 0.5954, "step": 13425 }, { "epoch": 0.5573874280613808, "grad_norm": 2.003669023513794, "learning_rate": 4.3157208201040865e-06, "loss": 0.4997, "step": 13426 }, { "epoch": 0.557428943585592, "grad_norm": 2.550657272338867, "learning_rate": 4.315054837306795e-06, "loss": 0.5666, "step": 13427 }, { "epoch": 0.5574704591098034, "grad_norm": 3.192417860031128, "learning_rate": 4.314388866893512e-06, "loss": 0.484, "step": 13428 }, { "epoch": 0.5575119746340147, "grad_norm": 2.3842718601226807, "learning_rate": 4.313722908876273e-06, "loss": 0.5712, "step": 13429 }, { "epoch": 0.5575534901582261, "grad_norm": 2.339738607406616, "learning_rate": 4.313056963267123e-06, "loss": 0.568, "step": 13430 }, { "epoch": 0.5575950056824374, "grad_norm": 2.375792980194092, "learning_rate": 4.3123910300781e-06, "loss": 0.5182, "step": 13431 }, { "epoch": 0.5576365212066487, "grad_norm": 1.8933902978897095, "learning_rate": 4.311725109321247e-06, "loss": 0.4662, "step": 13432 }, { "epoch": 0.55767803673086, "grad_norm": 2.866253137588501, "learning_rate": 4.3110592010086014e-06, "loss": 0.5051, "step": 13433 }, { "epoch": 0.5577195522550714, "grad_norm": 2.091341257095337, "learning_rate": 4.3103933051522025e-06, "loss": 0.588, "step": 13434 }, { "epoch": 0.5577610677792827, "grad_norm": 2.3177034854888916, "learning_rate": 4.309727421764093e-06, "loss": 0.396, "step": 13435 }, { "epoch": 0.5578025833034941, "grad_norm": 2.1531879901885986, "learning_rate": 4.30906155085631e-06, "loss": 0.541, "step": 13436 }, { "epoch": 0.5578440988277054, "grad_norm": 2.8994009494781494, "learning_rate": 4.308395692440895e-06, "loss": 0.5494, "step": 13437 }, { "epoch": 0.5578856143519167, "grad_norm": 2.0759174823760986, "learning_rate": 4.307729846529884e-06, "loss": 0.5086, "step": 13438 }, { "epoch": 0.557927129876128, "grad_norm": 2.3295648097991943, "learning_rate": 4.3070640131353165e-06, "loss": 0.5688, "step": 13439 }, { "epoch": 0.5579686454003394, "grad_norm": 2.941474437713623, "learning_rate": 4.306398192269231e-06, "loss": 0.626, "step": 13440 }, { "epoch": 0.5580101609245507, "grad_norm": 2.3414018154144287, "learning_rate": 4.305732383943669e-06, "loss": 0.5034, "step": 13441 }, { "epoch": 0.5580516764487621, "grad_norm": 2.015244722366333, "learning_rate": 4.305066588170663e-06, "loss": 0.4551, "step": 13442 }, { "epoch": 0.5580931919729734, "grad_norm": 2.579824447631836, "learning_rate": 4.304400804962255e-06, "loss": 0.4693, "step": 13443 }, { "epoch": 0.5581347074971847, "grad_norm": 1.8603699207305908, "learning_rate": 4.303735034330478e-06, "loss": 0.3928, "step": 13444 }, { "epoch": 0.558176223021396, "grad_norm": 2.2745847702026367, "learning_rate": 4.303069276287375e-06, "loss": 0.3848, "step": 13445 }, { "epoch": 0.5582177385456074, "grad_norm": 2.98178768157959, "learning_rate": 4.30240353084498e-06, "loss": 0.455, "step": 13446 }, { "epoch": 0.5582592540698187, "grad_norm": 2.102247953414917, "learning_rate": 4.301737798015329e-06, "loss": 0.4154, "step": 13447 }, { "epoch": 0.5583007695940301, "grad_norm": 2.4956226348876953, "learning_rate": 4.30107207781046e-06, "loss": 0.5673, "step": 13448 }, { "epoch": 0.5583422851182414, "grad_norm": 2.2205581665039062, "learning_rate": 4.300406370242409e-06, "loss": 0.4266, "step": 13449 }, { "epoch": 0.5583838006424527, "grad_norm": 2.18638277053833, "learning_rate": 4.299740675323216e-06, "loss": 0.4487, "step": 13450 }, { "epoch": 0.558425316166664, "grad_norm": 2.3657760620117188, "learning_rate": 4.29907499306491e-06, "loss": 0.4778, "step": 13451 }, { "epoch": 0.5584668316908754, "grad_norm": 2.308300495147705, "learning_rate": 4.29840932347953e-06, "loss": 0.4621, "step": 13452 }, { "epoch": 0.5585083472150867, "grad_norm": 2.359506130218506, "learning_rate": 4.297743666579112e-06, "loss": 0.4825, "step": 13453 }, { "epoch": 0.5585498627392981, "grad_norm": 2.441722869873047, "learning_rate": 4.297078022375693e-06, "loss": 0.5671, "step": 13454 }, { "epoch": 0.5585913782635095, "grad_norm": 2.078138589859009, "learning_rate": 4.296412390881304e-06, "loss": 0.4774, "step": 13455 }, { "epoch": 0.5586328937877207, "grad_norm": 2.936339855194092, "learning_rate": 4.295746772107982e-06, "loss": 0.6916, "step": 13456 }, { "epoch": 0.5586744093119321, "grad_norm": 2.4352123737335205, "learning_rate": 4.295081166067761e-06, "loss": 0.4747, "step": 13457 }, { "epoch": 0.5587159248361434, "grad_norm": 2.320645809173584, "learning_rate": 4.294415572772678e-06, "loss": 0.4629, "step": 13458 }, { "epoch": 0.5587574403603548, "grad_norm": 2.6090304851531982, "learning_rate": 4.293749992234764e-06, "loss": 0.5295, "step": 13459 }, { "epoch": 0.5587989558845661, "grad_norm": 2.3070790767669678, "learning_rate": 4.293084424466052e-06, "loss": 0.4135, "step": 13460 }, { "epoch": 0.5588404714087775, "grad_norm": 2.1245784759521484, "learning_rate": 4.292418869478577e-06, "loss": 0.4894, "step": 13461 }, { "epoch": 0.5588819869329887, "grad_norm": 2.5441832542419434, "learning_rate": 4.2917533272843734e-06, "loss": 0.5913, "step": 13462 }, { "epoch": 0.5589235024572001, "grad_norm": 2.4036240577697754, "learning_rate": 4.2910877978954764e-06, "loss": 0.4648, "step": 13463 }, { "epoch": 0.5589650179814114, "grad_norm": 2.461787462234497, "learning_rate": 4.290422281323913e-06, "loss": 0.5437, "step": 13464 }, { "epoch": 0.5590065335056228, "grad_norm": 2.3623082637786865, "learning_rate": 4.2897567775817204e-06, "loss": 0.4569, "step": 13465 }, { "epoch": 0.5590480490298341, "grad_norm": 2.435231924057007, "learning_rate": 4.28909128668093e-06, "loss": 0.5753, "step": 13466 }, { "epoch": 0.5590895645540455, "grad_norm": 2.6000072956085205, "learning_rate": 4.2884258086335755e-06, "loss": 0.4593, "step": 13467 }, { "epoch": 0.5591310800782567, "grad_norm": 2.376354217529297, "learning_rate": 4.287760343451686e-06, "loss": 0.5821, "step": 13468 }, { "epoch": 0.5591725956024681, "grad_norm": 2.269545078277588, "learning_rate": 4.287094891147295e-06, "loss": 0.617, "step": 13469 }, { "epoch": 0.5592141111266794, "grad_norm": 2.24853253364563, "learning_rate": 4.286429451732433e-06, "loss": 0.4514, "step": 13470 }, { "epoch": 0.5592556266508908, "grad_norm": 2.8423609733581543, "learning_rate": 4.285764025219133e-06, "loss": 0.6226, "step": 13471 }, { "epoch": 0.5592971421751021, "grad_norm": 2.4132187366485596, "learning_rate": 4.285098611619427e-06, "loss": 0.6828, "step": 13472 }, { "epoch": 0.5593386576993135, "grad_norm": 2.1315293312072754, "learning_rate": 4.284433210945341e-06, "loss": 0.3941, "step": 13473 }, { "epoch": 0.5593801732235247, "grad_norm": 2.7065553665161133, "learning_rate": 4.28376782320891e-06, "loss": 0.5717, "step": 13474 }, { "epoch": 0.5594216887477361, "grad_norm": 2.495466709136963, "learning_rate": 4.283102448422163e-06, "loss": 0.4847, "step": 13475 }, { "epoch": 0.5594632042719474, "grad_norm": 2.4738497734069824, "learning_rate": 4.282437086597132e-06, "loss": 0.6063, "step": 13476 }, { "epoch": 0.5595047197961588, "grad_norm": 2.2886695861816406, "learning_rate": 4.2817717377458435e-06, "loss": 0.5085, "step": 13477 }, { "epoch": 0.5595462353203701, "grad_norm": 1.9582111835479736, "learning_rate": 4.281106401880329e-06, "loss": 0.529, "step": 13478 }, { "epoch": 0.5595877508445815, "grad_norm": 3.10830020904541, "learning_rate": 4.2804410790126184e-06, "loss": 0.7294, "step": 13479 }, { "epoch": 0.5596292663687927, "grad_norm": 2.383504629135132, "learning_rate": 4.279775769154742e-06, "loss": 0.3223, "step": 13480 }, { "epoch": 0.5596707818930041, "grad_norm": 2.422866106033325, "learning_rate": 4.279110472318725e-06, "loss": 0.5833, "step": 13481 }, { "epoch": 0.5597122974172154, "grad_norm": 2.297757863998413, "learning_rate": 4.2784451885166e-06, "loss": 0.5697, "step": 13482 }, { "epoch": 0.5597538129414268, "grad_norm": 2.3253278732299805, "learning_rate": 4.277779917760393e-06, "loss": 0.5383, "step": 13483 }, { "epoch": 0.5597953284656381, "grad_norm": 2.5186169147491455, "learning_rate": 4.277114660062134e-06, "loss": 0.4875, "step": 13484 }, { "epoch": 0.5598368439898495, "grad_norm": 2.273235559463501, "learning_rate": 4.276449415433851e-06, "loss": 0.4528, "step": 13485 }, { "epoch": 0.5598783595140608, "grad_norm": 1.9617884159088135, "learning_rate": 4.275784183887568e-06, "loss": 0.4122, "step": 13486 }, { "epoch": 0.5599198750382721, "grad_norm": 2.1058974266052246, "learning_rate": 4.275118965435317e-06, "loss": 0.5093, "step": 13487 }, { "epoch": 0.5599613905624835, "grad_norm": 2.373258352279663, "learning_rate": 4.274453760089125e-06, "loss": 0.4993, "step": 13488 }, { "epoch": 0.5600029060866948, "grad_norm": 2.4085779190063477, "learning_rate": 4.273788567861019e-06, "loss": 0.4539, "step": 13489 }, { "epoch": 0.5600444216109062, "grad_norm": 2.293983221054077, "learning_rate": 4.273123388763024e-06, "loss": 0.4366, "step": 13490 }, { "epoch": 0.5600859371351175, "grad_norm": 2.285099983215332, "learning_rate": 4.272458222807166e-06, "loss": 0.4123, "step": 13491 }, { "epoch": 0.5601274526593288, "grad_norm": 2.463775634765625, "learning_rate": 4.271793070005474e-06, "loss": 0.6531, "step": 13492 }, { "epoch": 0.5601689681835401, "grad_norm": 2.3997485637664795, "learning_rate": 4.271127930369976e-06, "loss": 0.5443, "step": 13493 }, { "epoch": 0.5602104837077515, "grad_norm": 1.9072866439819336, "learning_rate": 4.270462803912692e-06, "loss": 0.3591, "step": 13494 }, { "epoch": 0.5602519992319628, "grad_norm": 2.52154803276062, "learning_rate": 4.269797690645653e-06, "loss": 0.5645, "step": 13495 }, { "epoch": 0.5602935147561742, "grad_norm": 2.0434861183166504, "learning_rate": 4.269132590580879e-06, "loss": 0.3864, "step": 13496 }, { "epoch": 0.5603350302803854, "grad_norm": 2.584362745285034, "learning_rate": 4.268467503730403e-06, "loss": 0.4532, "step": 13497 }, { "epoch": 0.5603765458045968, "grad_norm": 2.4776766300201416, "learning_rate": 4.267802430106241e-06, "loss": 0.5155, "step": 13498 }, { "epoch": 0.5604180613288081, "grad_norm": 2.6594736576080322, "learning_rate": 4.267137369720424e-06, "loss": 0.7593, "step": 13499 }, { "epoch": 0.5604595768530195, "grad_norm": 2.516848564147949, "learning_rate": 4.2664723225849736e-06, "loss": 0.6199, "step": 13500 }, { "epoch": 0.5605010923772308, "grad_norm": 1.961548089981079, "learning_rate": 4.265807288711918e-06, "loss": 0.4715, "step": 13501 }, { "epoch": 0.5605426079014422, "grad_norm": 2.2291154861450195, "learning_rate": 4.265142268113275e-06, "loss": 0.5097, "step": 13502 }, { "epoch": 0.5605841234256534, "grad_norm": 2.5097403526306152, "learning_rate": 4.264477260801072e-06, "loss": 0.3653, "step": 13503 }, { "epoch": 0.5606256389498648, "grad_norm": 2.1039061546325684, "learning_rate": 4.2638122667873315e-06, "loss": 0.509, "step": 13504 }, { "epoch": 0.5606671544740761, "grad_norm": 2.6427032947540283, "learning_rate": 4.263147286084079e-06, "loss": 0.5177, "step": 13505 }, { "epoch": 0.5607086699982875, "grad_norm": 2.542921781539917, "learning_rate": 4.262482318703337e-06, "loss": 0.4876, "step": 13506 }, { "epoch": 0.5607501855224988, "grad_norm": 2.459057331085205, "learning_rate": 4.261817364657125e-06, "loss": 0.4999, "step": 13507 }, { "epoch": 0.5607917010467102, "grad_norm": 3.029608964920044, "learning_rate": 4.261152423957469e-06, "loss": 0.5815, "step": 13508 }, { "epoch": 0.5608332165709214, "grad_norm": 2.3549225330352783, "learning_rate": 4.26048749661639e-06, "loss": 0.4576, "step": 13509 }, { "epoch": 0.5608747320951328, "grad_norm": 2.1119840145111084, "learning_rate": 4.259822582645911e-06, "loss": 0.4002, "step": 13510 }, { "epoch": 0.5609162476193441, "grad_norm": 2.5135350227355957, "learning_rate": 4.259157682058052e-06, "loss": 0.4758, "step": 13511 }, { "epoch": 0.5609577631435555, "grad_norm": 3.0336310863494873, "learning_rate": 4.258492794864835e-06, "loss": 0.3915, "step": 13512 }, { "epoch": 0.5609992786677668, "grad_norm": 2.1839077472686768, "learning_rate": 4.257827921078282e-06, "loss": 0.4218, "step": 13513 }, { "epoch": 0.5610407941919782, "grad_norm": 2.5856680870056152, "learning_rate": 4.257163060710417e-06, "loss": 0.5846, "step": 13514 }, { "epoch": 0.5610823097161894, "grad_norm": 2.0988121032714844, "learning_rate": 4.256498213773255e-06, "loss": 0.5272, "step": 13515 }, { "epoch": 0.5611238252404008, "grad_norm": 2.2571237087249756, "learning_rate": 4.255833380278819e-06, "loss": 0.4976, "step": 13516 }, { "epoch": 0.5611653407646122, "grad_norm": 2.2829108238220215, "learning_rate": 4.255168560239131e-06, "loss": 0.4393, "step": 13517 }, { "epoch": 0.5612068562888235, "grad_norm": 2.36169171333313, "learning_rate": 4.254503753666211e-06, "loss": 0.5298, "step": 13518 }, { "epoch": 0.5612483718130349, "grad_norm": 3.0441765785217285, "learning_rate": 4.253838960572077e-06, "loss": 0.5366, "step": 13519 }, { "epoch": 0.5612898873372462, "grad_norm": 2.4026944637298584, "learning_rate": 4.253174180968751e-06, "loss": 0.3848, "step": 13520 }, { "epoch": 0.5613314028614576, "grad_norm": 2.2303574085235596, "learning_rate": 4.25250941486825e-06, "loss": 0.4347, "step": 13521 }, { "epoch": 0.5613729183856688, "grad_norm": 2.227602005004883, "learning_rate": 4.251844662282594e-06, "loss": 0.492, "step": 13522 }, { "epoch": 0.5614144339098802, "grad_norm": 1.8820655345916748, "learning_rate": 4.251179923223804e-06, "loss": 0.3867, "step": 13523 }, { "epoch": 0.5614559494340915, "grad_norm": 2.17752742767334, "learning_rate": 4.250515197703895e-06, "loss": 0.4738, "step": 13524 }, { "epoch": 0.5614974649583029, "grad_norm": 3.164276123046875, "learning_rate": 4.249850485734887e-06, "loss": 0.5445, "step": 13525 }, { "epoch": 0.5615389804825142, "grad_norm": 2.3419644832611084, "learning_rate": 4.249185787328798e-06, "loss": 0.4175, "step": 13526 }, { "epoch": 0.5615804960067255, "grad_norm": 2.3865787982940674, "learning_rate": 4.248521102497649e-06, "loss": 0.4876, "step": 13527 }, { "epoch": 0.5616220115309368, "grad_norm": 2.747135877609253, "learning_rate": 4.247856431253453e-06, "loss": 0.4126, "step": 13528 }, { "epoch": 0.5616635270551482, "grad_norm": 2.2229042053222656, "learning_rate": 4.24719177360823e-06, "loss": 0.5285, "step": 13529 }, { "epoch": 0.5617050425793595, "grad_norm": 1.9585658311843872, "learning_rate": 4.2465271295739965e-06, "loss": 0.3796, "step": 13530 }, { "epoch": 0.5617465581035709, "grad_norm": 2.5110702514648438, "learning_rate": 4.24586249916277e-06, "loss": 0.4592, "step": 13531 }, { "epoch": 0.5617880736277822, "grad_norm": 2.4186408519744873, "learning_rate": 4.245197882386567e-06, "loss": 0.56, "step": 13532 }, { "epoch": 0.5618295891519935, "grad_norm": 1.8835992813110352, "learning_rate": 4.244533279257403e-06, "loss": 0.4118, "step": 13533 }, { "epoch": 0.5618711046762048, "grad_norm": 2.3096370697021484, "learning_rate": 4.243868689787296e-06, "loss": 0.4103, "step": 13534 }, { "epoch": 0.5619126202004162, "grad_norm": 2.034925699234009, "learning_rate": 4.243204113988261e-06, "loss": 0.4515, "step": 13535 }, { "epoch": 0.5619541357246275, "grad_norm": 2.3667922019958496, "learning_rate": 4.242539551872315e-06, "loss": 0.567, "step": 13536 }, { "epoch": 0.5619956512488389, "grad_norm": 2.191023111343384, "learning_rate": 4.24187500345147e-06, "loss": 0.4255, "step": 13537 }, { "epoch": 0.5620371667730502, "grad_norm": 2.2683537006378174, "learning_rate": 4.241210468737743e-06, "loss": 0.5335, "step": 13538 }, { "epoch": 0.5620786822972615, "grad_norm": 2.6451973915100098, "learning_rate": 4.2405459477431505e-06, "loss": 0.5459, "step": 13539 }, { "epoch": 0.5621201978214728, "grad_norm": 2.3939225673675537, "learning_rate": 4.239881440479708e-06, "loss": 0.6377, "step": 13540 }, { "epoch": 0.5621617133456842, "grad_norm": 2.4855523109436035, "learning_rate": 4.239216946959426e-06, "loss": 0.5491, "step": 13541 }, { "epoch": 0.5622032288698955, "grad_norm": 2.8470685482025146, "learning_rate": 4.23855246719432e-06, "loss": 0.5799, "step": 13542 }, { "epoch": 0.5622447443941069, "grad_norm": 2.5174100399017334, "learning_rate": 4.237888001196406e-06, "loss": 0.5201, "step": 13543 }, { "epoch": 0.5622862599183182, "grad_norm": 2.4454760551452637, "learning_rate": 4.237223548977697e-06, "loss": 0.517, "step": 13544 }, { "epoch": 0.5623277754425295, "grad_norm": 2.489786148071289, "learning_rate": 4.236559110550207e-06, "loss": 0.5174, "step": 13545 }, { "epoch": 0.5623692909667408, "grad_norm": 2.4523561000823975, "learning_rate": 4.235894685925946e-06, "loss": 0.4174, "step": 13546 }, { "epoch": 0.5624108064909522, "grad_norm": 2.404115676879883, "learning_rate": 4.235230275116931e-06, "loss": 0.4542, "step": 13547 }, { "epoch": 0.5624523220151636, "grad_norm": 2.6237244606018066, "learning_rate": 4.234565878135172e-06, "loss": 0.5021, "step": 13548 }, { "epoch": 0.5624938375393749, "grad_norm": 2.197288990020752, "learning_rate": 4.233901494992685e-06, "loss": 0.4324, "step": 13549 }, { "epoch": 0.5625353530635863, "grad_norm": 2.3921115398406982, "learning_rate": 4.233237125701477e-06, "loss": 0.6493, "step": 13550 }, { "epoch": 0.5625768685877975, "grad_norm": 2.4750688076019287, "learning_rate": 4.232572770273563e-06, "loss": 0.4428, "step": 13551 }, { "epoch": 0.5626183841120089, "grad_norm": 2.176563024520874, "learning_rate": 4.231908428720955e-06, "loss": 0.5683, "step": 13552 }, { "epoch": 0.5626598996362202, "grad_norm": 2.473475933074951, "learning_rate": 4.231244101055665e-06, "loss": 0.6142, "step": 13553 }, { "epoch": 0.5627014151604316, "grad_norm": 2.2532100677490234, "learning_rate": 4.2305797872897025e-06, "loss": 0.474, "step": 13554 }, { "epoch": 0.5627429306846429, "grad_norm": 3.0810587406158447, "learning_rate": 4.229915487435078e-06, "loss": 0.5725, "step": 13555 }, { "epoch": 0.5627844462088543, "grad_norm": 2.5584542751312256, "learning_rate": 4.229251201503805e-06, "loss": 0.5884, "step": 13556 }, { "epoch": 0.5628259617330655, "grad_norm": 2.673758029937744, "learning_rate": 4.228586929507892e-06, "loss": 0.5086, "step": 13557 }, { "epoch": 0.5628674772572769, "grad_norm": 2.1399543285369873, "learning_rate": 4.227922671459351e-06, "loss": 0.3914, "step": 13558 }, { "epoch": 0.5629089927814882, "grad_norm": 2.6735165119171143, "learning_rate": 4.2272584273701875e-06, "loss": 0.598, "step": 13559 }, { "epoch": 0.5629505083056996, "grad_norm": 2.431445598602295, "learning_rate": 4.226594197252417e-06, "loss": 0.3927, "step": 13560 }, { "epoch": 0.5629920238299109, "grad_norm": 2.2357771396636963, "learning_rate": 4.225929981118045e-06, "loss": 0.6141, "step": 13561 }, { "epoch": 0.5630335393541223, "grad_norm": 2.666934013366699, "learning_rate": 4.225265778979084e-06, "loss": 0.444, "step": 13562 }, { "epoch": 0.5630750548783335, "grad_norm": 2.8820042610168457, "learning_rate": 4.224601590847539e-06, "loss": 0.64, "step": 13563 }, { "epoch": 0.5631165704025449, "grad_norm": 2.8415653705596924, "learning_rate": 4.223937416735421e-06, "loss": 0.5826, "step": 13564 }, { "epoch": 0.5631580859267562, "grad_norm": 2.449340343475342, "learning_rate": 4.223273256654737e-06, "loss": 0.4772, "step": 13565 }, { "epoch": 0.5631996014509676, "grad_norm": 2.3174567222595215, "learning_rate": 4.2226091106175e-06, "loss": 0.4795, "step": 13566 }, { "epoch": 0.5632411169751789, "grad_norm": 2.590536117553711, "learning_rate": 4.2219449786357114e-06, "loss": 0.5588, "step": 13567 }, { "epoch": 0.5632826324993903, "grad_norm": 2.720097541809082, "learning_rate": 4.2212808607213826e-06, "loss": 0.3564, "step": 13568 }, { "epoch": 0.5633241480236015, "grad_norm": 2.411939859390259, "learning_rate": 4.220616756886519e-06, "loss": 0.5305, "step": 13569 }, { "epoch": 0.5633656635478129, "grad_norm": 2.6728498935699463, "learning_rate": 4.219952667143131e-06, "loss": 0.5841, "step": 13570 }, { "epoch": 0.5634071790720242, "grad_norm": 2.012401819229126, "learning_rate": 4.219288591503224e-06, "loss": 0.5539, "step": 13571 }, { "epoch": 0.5634486945962356, "grad_norm": 2.7431752681732178, "learning_rate": 4.2186245299788026e-06, "loss": 0.5843, "step": 13572 }, { "epoch": 0.5634902101204469, "grad_norm": 2.4179792404174805, "learning_rate": 4.217960482581876e-06, "loss": 0.5278, "step": 13573 }, { "epoch": 0.5635317256446583, "grad_norm": 2.339718818664551, "learning_rate": 4.217296449324449e-06, "loss": 0.5192, "step": 13574 }, { "epoch": 0.5635732411688695, "grad_norm": 2.1662304401397705, "learning_rate": 4.216632430218528e-06, "loss": 0.5193, "step": 13575 }, { "epoch": 0.5636147566930809, "grad_norm": 2.3480803966522217, "learning_rate": 4.215968425276118e-06, "loss": 0.3945, "step": 13576 }, { "epoch": 0.5636562722172923, "grad_norm": 2.3233673572540283, "learning_rate": 4.215304434509225e-06, "loss": 0.5103, "step": 13577 }, { "epoch": 0.5636977877415036, "grad_norm": 2.3687379360198975, "learning_rate": 4.214640457929853e-06, "loss": 0.5348, "step": 13578 }, { "epoch": 0.563739303265715, "grad_norm": 2.3174448013305664, "learning_rate": 4.2139764955500105e-06, "loss": 0.4147, "step": 13579 }, { "epoch": 0.5637808187899263, "grad_norm": 2.914658784866333, "learning_rate": 4.2133125473816975e-06, "loss": 0.523, "step": 13580 }, { "epoch": 0.5638223343141376, "grad_norm": 2.128082275390625, "learning_rate": 4.21264861343692e-06, "loss": 0.4908, "step": 13581 }, { "epoch": 0.5638638498383489, "grad_norm": 1.9083523750305176, "learning_rate": 4.211984693727683e-06, "loss": 0.4358, "step": 13582 }, { "epoch": 0.5639053653625603, "grad_norm": 2.4392106533050537, "learning_rate": 4.211320788265991e-06, "loss": 0.5534, "step": 13583 }, { "epoch": 0.5639468808867716, "grad_norm": 2.5862364768981934, "learning_rate": 4.210656897063846e-06, "loss": 0.6374, "step": 13584 }, { "epoch": 0.563988396410983, "grad_norm": 1.9609239101409912, "learning_rate": 4.209993020133251e-06, "loss": 0.5229, "step": 13585 }, { "epoch": 0.5640299119351943, "grad_norm": 2.271563768386841, "learning_rate": 4.209329157486211e-06, "loss": 0.5439, "step": 13586 }, { "epoch": 0.5640714274594056, "grad_norm": 2.666637659072876, "learning_rate": 4.208665309134726e-06, "loss": 0.5715, "step": 13587 }, { "epoch": 0.5641129429836169, "grad_norm": 2.027914047241211, "learning_rate": 4.208001475090803e-06, "loss": 0.5711, "step": 13588 }, { "epoch": 0.5641544585078283, "grad_norm": 2.5316970348358154, "learning_rate": 4.207337655366439e-06, "loss": 0.4531, "step": 13589 }, { "epoch": 0.5641959740320396, "grad_norm": 2.46873140335083, "learning_rate": 4.20667384997364e-06, "loss": 0.4709, "step": 13590 }, { "epoch": 0.564237489556251, "grad_norm": 2.346173048019409, "learning_rate": 4.206010058924405e-06, "loss": 0.5732, "step": 13591 }, { "epoch": 0.5642790050804622, "grad_norm": 2.3609976768493652, "learning_rate": 4.20534628223074e-06, "loss": 0.4632, "step": 13592 }, { "epoch": 0.5643205206046736, "grad_norm": 2.096345901489258, "learning_rate": 4.204682519904641e-06, "loss": 0.5543, "step": 13593 }, { "epoch": 0.5643620361288849, "grad_norm": 2.8746256828308105, "learning_rate": 4.204018771958111e-06, "loss": 0.5653, "step": 13594 }, { "epoch": 0.5644035516530963, "grad_norm": 2.5949766635894775, "learning_rate": 4.2033550384031504e-06, "loss": 0.4947, "step": 13595 }, { "epoch": 0.5644450671773076, "grad_norm": 2.1859006881713867, "learning_rate": 4.202691319251763e-06, "loss": 0.4313, "step": 13596 }, { "epoch": 0.564486582701519, "grad_norm": 2.615576982498169, "learning_rate": 4.202027614515945e-06, "loss": 0.4213, "step": 13597 }, { "epoch": 0.5645280982257302, "grad_norm": 2.6373801231384277, "learning_rate": 4.201363924207697e-06, "loss": 0.5428, "step": 13598 }, { "epoch": 0.5645696137499416, "grad_norm": 2.8383586406707764, "learning_rate": 4.200700248339021e-06, "loss": 0.4627, "step": 13599 }, { "epoch": 0.5646111292741529, "grad_norm": 2.6651179790496826, "learning_rate": 4.200036586921913e-06, "loss": 0.5921, "step": 13600 }, { "epoch": 0.5646526447983643, "grad_norm": 2.2884764671325684, "learning_rate": 4.1993729399683765e-06, "loss": 0.6545, "step": 13601 }, { "epoch": 0.5646941603225756, "grad_norm": 2.5688610076904297, "learning_rate": 4.198709307490407e-06, "loss": 0.4716, "step": 13602 }, { "epoch": 0.564735675846787, "grad_norm": 2.0185861587524414, "learning_rate": 4.198045689500003e-06, "loss": 0.5047, "step": 13603 }, { "epoch": 0.5647771913709982, "grad_norm": 2.1940455436706543, "learning_rate": 4.197382086009164e-06, "loss": 0.6177, "step": 13604 }, { "epoch": 0.5648187068952096, "grad_norm": 2.5247385501861572, "learning_rate": 4.196718497029891e-06, "loss": 0.5225, "step": 13605 }, { "epoch": 0.5648602224194209, "grad_norm": 2.2306525707244873, "learning_rate": 4.196054922574177e-06, "loss": 0.6028, "step": 13606 }, { "epoch": 0.5649017379436323, "grad_norm": 2.7627503871917725, "learning_rate": 4.195391362654021e-06, "loss": 0.4382, "step": 13607 }, { "epoch": 0.5649432534678437, "grad_norm": 2.482229709625244, "learning_rate": 4.194727817281421e-06, "loss": 0.4852, "step": 13608 }, { "epoch": 0.564984768992055, "grad_norm": 2.570650577545166, "learning_rate": 4.194064286468376e-06, "loss": 0.4888, "step": 13609 }, { "epoch": 0.5650262845162664, "grad_norm": 2.578993320465088, "learning_rate": 4.19340077022688e-06, "loss": 0.5479, "step": 13610 }, { "epoch": 0.5650678000404776, "grad_norm": 1.9979356527328491, "learning_rate": 4.1927372685689315e-06, "loss": 0.5849, "step": 13611 }, { "epoch": 0.565109315564689, "grad_norm": 2.3182806968688965, "learning_rate": 4.1920737815065246e-06, "loss": 0.4157, "step": 13612 }, { "epoch": 0.5651508310889003, "grad_norm": 2.1175858974456787, "learning_rate": 4.1914103090516565e-06, "loss": 0.5125, "step": 13613 }, { "epoch": 0.5651923466131117, "grad_norm": 2.640016555786133, "learning_rate": 4.190746851216325e-06, "loss": 0.5472, "step": 13614 }, { "epoch": 0.565233862137323, "grad_norm": 2.21030855178833, "learning_rate": 4.1900834080125216e-06, "loss": 0.5858, "step": 13615 }, { "epoch": 0.5652753776615344, "grad_norm": 2.1697275638580322, "learning_rate": 4.189419979452244e-06, "loss": 0.3803, "step": 13616 }, { "epoch": 0.5653168931857456, "grad_norm": 2.855288028717041, "learning_rate": 4.188756565547487e-06, "loss": 0.5841, "step": 13617 }, { "epoch": 0.565358408709957, "grad_norm": 2.7971136569976807, "learning_rate": 4.188093166310247e-06, "loss": 0.4671, "step": 13618 }, { "epoch": 0.5653999242341683, "grad_norm": 2.582163095474243, "learning_rate": 4.1874297817525145e-06, "loss": 0.6234, "step": 13619 }, { "epoch": 0.5654414397583797, "grad_norm": 2.1795825958251953, "learning_rate": 4.186766411886285e-06, "loss": 0.5811, "step": 13620 }, { "epoch": 0.565482955282591, "grad_norm": 2.2933413982391357, "learning_rate": 4.186103056723554e-06, "loss": 0.4011, "step": 13621 }, { "epoch": 0.5655244708068023, "grad_norm": 3.0471863746643066, "learning_rate": 4.185439716276315e-06, "loss": 0.5983, "step": 13622 }, { "epoch": 0.5655659863310136, "grad_norm": 2.757563829421997, "learning_rate": 4.184776390556561e-06, "loss": 0.4953, "step": 13623 }, { "epoch": 0.565607501855225, "grad_norm": 2.5634939670562744, "learning_rate": 4.184113079576285e-06, "loss": 0.7045, "step": 13624 }, { "epoch": 0.5656490173794363, "grad_norm": 1.9986658096313477, "learning_rate": 4.183449783347478e-06, "loss": 0.5265, "step": 13625 }, { "epoch": 0.5656905329036477, "grad_norm": 2.4157962799072266, "learning_rate": 4.182786501882135e-06, "loss": 0.5237, "step": 13626 }, { "epoch": 0.565732048427859, "grad_norm": 2.640190839767456, "learning_rate": 4.182123235192248e-06, "loss": 0.6975, "step": 13627 }, { "epoch": 0.5657735639520703, "grad_norm": 2.4794888496398926, "learning_rate": 4.181459983289808e-06, "loss": 0.4585, "step": 13628 }, { "epoch": 0.5658150794762816, "grad_norm": 2.947551727294922, "learning_rate": 4.180796746186808e-06, "loss": 0.5998, "step": 13629 }, { "epoch": 0.565856595000493, "grad_norm": 2.6473419666290283, "learning_rate": 4.180133523895238e-06, "loss": 0.5632, "step": 13630 }, { "epoch": 0.5658981105247043, "grad_norm": 2.3719801902770996, "learning_rate": 4.1794703164270925e-06, "loss": 0.5054, "step": 13631 }, { "epoch": 0.5659396260489157, "grad_norm": 2.314155101776123, "learning_rate": 4.178807123794358e-06, "loss": 0.4417, "step": 13632 }, { "epoch": 0.565981141573127, "grad_norm": 2.718649387359619, "learning_rate": 4.1781439460090275e-06, "loss": 0.5646, "step": 13633 }, { "epoch": 0.5660226570973383, "grad_norm": 2.320931911468506, "learning_rate": 4.177480783083091e-06, "loss": 0.5562, "step": 13634 }, { "epoch": 0.5660641726215496, "grad_norm": 1.9773553609848022, "learning_rate": 4.176817635028541e-06, "loss": 0.5724, "step": 13635 }, { "epoch": 0.566105688145761, "grad_norm": 2.3019819259643555, "learning_rate": 4.176154501857365e-06, "loss": 0.4403, "step": 13636 }, { "epoch": 0.5661472036699723, "grad_norm": 3.615182876586914, "learning_rate": 4.175491383581552e-06, "loss": 0.5421, "step": 13637 }, { "epoch": 0.5661887191941837, "grad_norm": 2.5358781814575195, "learning_rate": 4.174828280213093e-06, "loss": 0.4908, "step": 13638 }, { "epoch": 0.5662302347183951, "grad_norm": 2.013237953186035, "learning_rate": 4.174165191763976e-06, "loss": 0.4429, "step": 13639 }, { "epoch": 0.5662717502426063, "grad_norm": 2.432751417160034, "learning_rate": 4.173502118246191e-06, "loss": 0.5997, "step": 13640 }, { "epoch": 0.5663132657668177, "grad_norm": 2.4685659408569336, "learning_rate": 4.1728390596717255e-06, "loss": 0.5415, "step": 13641 }, { "epoch": 0.566354781291029, "grad_norm": 2.1796092987060547, "learning_rate": 4.172176016052568e-06, "loss": 0.5961, "step": 13642 }, { "epoch": 0.5663962968152404, "grad_norm": 2.426424026489258, "learning_rate": 4.171512987400706e-06, "loss": 0.4168, "step": 13643 }, { "epoch": 0.5664378123394517, "grad_norm": 2.2641940116882324, "learning_rate": 4.1708499737281305e-06, "loss": 0.4684, "step": 13644 }, { "epoch": 0.5664793278636631, "grad_norm": 2.017958879470825, "learning_rate": 4.170186975046825e-06, "loss": 0.4935, "step": 13645 }, { "epoch": 0.5665208433878743, "grad_norm": 2.366358518600464, "learning_rate": 4.169523991368778e-06, "loss": 0.5572, "step": 13646 }, { "epoch": 0.5665623589120857, "grad_norm": 2.8550806045532227, "learning_rate": 4.168861022705976e-06, "loss": 0.6932, "step": 13647 }, { "epoch": 0.566603874436297, "grad_norm": 2.676485776901245, "learning_rate": 4.168198069070409e-06, "loss": 0.4972, "step": 13648 }, { "epoch": 0.5666453899605084, "grad_norm": 2.430750608444214, "learning_rate": 4.167535130474058e-06, "loss": 0.5826, "step": 13649 }, { "epoch": 0.5666869054847197, "grad_norm": 2.2804901599884033, "learning_rate": 4.166872206928912e-06, "loss": 0.5808, "step": 13650 }, { "epoch": 0.5667284210089311, "grad_norm": 2.270275592803955, "learning_rate": 4.166209298446957e-06, "loss": 0.5387, "step": 13651 }, { "epoch": 0.5667699365331423, "grad_norm": 2.1180260181427, "learning_rate": 4.165546405040177e-06, "loss": 0.5524, "step": 13652 }, { "epoch": 0.5668114520573537, "grad_norm": 3.4689128398895264, "learning_rate": 4.164883526720562e-06, "loss": 0.6292, "step": 13653 }, { "epoch": 0.566852967581565, "grad_norm": 2.1530449390411377, "learning_rate": 4.164220663500091e-06, "loss": 0.435, "step": 13654 }, { "epoch": 0.5668944831057764, "grad_norm": 2.3394763469696045, "learning_rate": 4.163557815390751e-06, "loss": 0.4931, "step": 13655 }, { "epoch": 0.5669359986299877, "grad_norm": 2.1849985122680664, "learning_rate": 4.162894982404527e-06, "loss": 0.5126, "step": 13656 }, { "epoch": 0.5669775141541991, "grad_norm": 2.4793245792388916, "learning_rate": 4.162232164553405e-06, "loss": 0.4834, "step": 13657 }, { "epoch": 0.5670190296784103, "grad_norm": 2.590670108795166, "learning_rate": 4.161569361849365e-06, "loss": 0.5598, "step": 13658 }, { "epoch": 0.5670605452026217, "grad_norm": 2.1721134185791016, "learning_rate": 4.160906574304392e-06, "loss": 0.3334, "step": 13659 }, { "epoch": 0.567102060726833, "grad_norm": 3.298185348510742, "learning_rate": 4.160243801930471e-06, "loss": 0.6355, "step": 13660 }, { "epoch": 0.5671435762510444, "grad_norm": 2.197160482406616, "learning_rate": 4.159581044739585e-06, "loss": 0.476, "step": 13661 }, { "epoch": 0.5671850917752557, "grad_norm": 2.7523434162139893, "learning_rate": 4.158918302743715e-06, "loss": 0.449, "step": 13662 }, { "epoch": 0.5672266072994671, "grad_norm": 2.5736799240112305, "learning_rate": 4.158255575954845e-06, "loss": 0.4815, "step": 13663 }, { "epoch": 0.5672681228236783, "grad_norm": 2.4282782077789307, "learning_rate": 4.1575928643849555e-06, "loss": 0.5893, "step": 13664 }, { "epoch": 0.5673096383478897, "grad_norm": 2.2751288414001465, "learning_rate": 4.1569301680460304e-06, "loss": 0.5872, "step": 13665 }, { "epoch": 0.567351153872101, "grad_norm": 2.678715467453003, "learning_rate": 4.156267486950053e-06, "loss": 0.6083, "step": 13666 }, { "epoch": 0.5673926693963124, "grad_norm": 2.1944007873535156, "learning_rate": 4.155604821109e-06, "loss": 0.4994, "step": 13667 }, { "epoch": 0.5674341849205237, "grad_norm": 2.7738871574401855, "learning_rate": 4.154942170534856e-06, "loss": 0.5478, "step": 13668 }, { "epoch": 0.567475700444735, "grad_norm": 2.0578174591064453, "learning_rate": 4.1542795352395995e-06, "loss": 0.4677, "step": 13669 }, { "epoch": 0.5675172159689464, "grad_norm": 2.3484182357788086, "learning_rate": 4.153616915235216e-06, "loss": 0.4522, "step": 13670 }, { "epoch": 0.5675587314931577, "grad_norm": 2.232283353805542, "learning_rate": 4.15295431053368e-06, "loss": 0.498, "step": 13671 }, { "epoch": 0.5676002470173691, "grad_norm": 2.64096999168396, "learning_rate": 4.1522917211469754e-06, "loss": 0.6791, "step": 13672 }, { "epoch": 0.5676417625415804, "grad_norm": 2.474428176879883, "learning_rate": 4.15162914708708e-06, "loss": 0.6348, "step": 13673 }, { "epoch": 0.5676832780657918, "grad_norm": 2.1175734996795654, "learning_rate": 4.150966588365977e-06, "loss": 0.5897, "step": 13674 }, { "epoch": 0.567724793590003, "grad_norm": 2.563730239868164, "learning_rate": 4.1503040449956395e-06, "loss": 0.4708, "step": 13675 }, { "epoch": 0.5677663091142144, "grad_norm": 1.9159436225891113, "learning_rate": 4.149641516988051e-06, "loss": 0.3862, "step": 13676 }, { "epoch": 0.5678078246384257, "grad_norm": 2.258943796157837, "learning_rate": 4.148979004355189e-06, "loss": 0.4757, "step": 13677 }, { "epoch": 0.5678493401626371, "grad_norm": 2.036956548690796, "learning_rate": 4.1483165071090305e-06, "loss": 0.4554, "step": 13678 }, { "epoch": 0.5678908556868484, "grad_norm": 2.2842652797698975, "learning_rate": 4.147654025261558e-06, "loss": 0.5883, "step": 13679 }, { "epoch": 0.5679323712110598, "grad_norm": 2.4046640396118164, "learning_rate": 4.1469915588247445e-06, "loss": 0.4349, "step": 13680 }, { "epoch": 0.567973886735271, "grad_norm": 2.5334417819976807, "learning_rate": 4.146329107810569e-06, "loss": 0.4, "step": 13681 }, { "epoch": 0.5680154022594824, "grad_norm": 2.983715295791626, "learning_rate": 4.145666672231009e-06, "loss": 0.5411, "step": 13682 }, { "epoch": 0.5680569177836937, "grad_norm": 2.5541388988494873, "learning_rate": 4.145004252098044e-06, "loss": 0.5843, "step": 13683 }, { "epoch": 0.5680984333079051, "grad_norm": 2.320870876312256, "learning_rate": 4.144341847423647e-06, "loss": 0.6761, "step": 13684 }, { "epoch": 0.5681399488321164, "grad_norm": 2.46494722366333, "learning_rate": 4.143679458219796e-06, "loss": 0.6516, "step": 13685 }, { "epoch": 0.5681814643563278, "grad_norm": 2.527672529220581, "learning_rate": 4.143017084498467e-06, "loss": 0.6456, "step": 13686 }, { "epoch": 0.568222979880539, "grad_norm": 2.278787136077881, "learning_rate": 4.142354726271638e-06, "loss": 0.5746, "step": 13687 }, { "epoch": 0.5682644954047504, "grad_norm": 2.5678553581237793, "learning_rate": 4.141692383551281e-06, "loss": 0.6265, "step": 13688 }, { "epoch": 0.5683060109289617, "grad_norm": 2.867507219314575, "learning_rate": 4.1410300563493736e-06, "loss": 0.5963, "step": 13689 }, { "epoch": 0.5683475264531731, "grad_norm": 2.468024969100952, "learning_rate": 4.14036774467789e-06, "loss": 0.6029, "step": 13690 }, { "epoch": 0.5683890419773844, "grad_norm": 2.323793411254883, "learning_rate": 4.139705448548805e-06, "loss": 0.5652, "step": 13691 }, { "epoch": 0.5684305575015958, "grad_norm": 2.3444595336914062, "learning_rate": 4.139043167974096e-06, "loss": 0.5195, "step": 13692 }, { "epoch": 0.568472073025807, "grad_norm": 2.2659144401550293, "learning_rate": 4.1383809029657315e-06, "loss": 0.4933, "step": 13693 }, { "epoch": 0.5685135885500184, "grad_norm": 2.90844464302063, "learning_rate": 4.137718653535689e-06, "loss": 0.5875, "step": 13694 }, { "epoch": 0.5685551040742297, "grad_norm": 2.1658291816711426, "learning_rate": 4.137056419695943e-06, "loss": 0.6124, "step": 13695 }, { "epoch": 0.5685966195984411, "grad_norm": 2.2832207679748535, "learning_rate": 4.136394201458467e-06, "loss": 0.5741, "step": 13696 }, { "epoch": 0.5686381351226524, "grad_norm": 2.468782901763916, "learning_rate": 4.13573199883523e-06, "loss": 0.5171, "step": 13697 }, { "epoch": 0.5686796506468638, "grad_norm": 2.57623553276062, "learning_rate": 4.135069811838208e-06, "loss": 0.4653, "step": 13698 }, { "epoch": 0.568721166171075, "grad_norm": 2.091269016265869, "learning_rate": 4.1344076404793736e-06, "loss": 0.3945, "step": 13699 }, { "epoch": 0.5687626816952864, "grad_norm": 2.4240036010742188, "learning_rate": 4.1337454847707e-06, "loss": 0.6681, "step": 13700 }, { "epoch": 0.5688041972194978, "grad_norm": 2.307218551635742, "learning_rate": 4.133083344724156e-06, "loss": 0.5139, "step": 13701 }, { "epoch": 0.5688457127437091, "grad_norm": 2.734682321548462, "learning_rate": 4.132421220351716e-06, "loss": 0.5407, "step": 13702 }, { "epoch": 0.5688872282679205, "grad_norm": 2.271355628967285, "learning_rate": 4.131759111665349e-06, "loss": 0.4641, "step": 13703 }, { "epoch": 0.5689287437921318, "grad_norm": 2.4133987426757812, "learning_rate": 4.131097018677028e-06, "loss": 0.433, "step": 13704 }, { "epoch": 0.5689702593163432, "grad_norm": 2.3194570541381836, "learning_rate": 4.1304349413987246e-06, "loss": 0.6206, "step": 13705 }, { "epoch": 0.5690117748405544, "grad_norm": 2.438124179840088, "learning_rate": 4.129772879842407e-06, "loss": 0.4176, "step": 13706 }, { "epoch": 0.5690532903647658, "grad_norm": 2.160315752029419, "learning_rate": 4.129110834020046e-06, "loss": 0.3685, "step": 13707 }, { "epoch": 0.5690948058889771, "grad_norm": 2.558047294616699, "learning_rate": 4.128448803943612e-06, "loss": 0.6008, "step": 13708 }, { "epoch": 0.5691363214131885, "grad_norm": 3.381713628768921, "learning_rate": 4.127786789625077e-06, "loss": 0.5451, "step": 13709 }, { "epoch": 0.5691778369373998, "grad_norm": 2.414339780807495, "learning_rate": 4.127124791076407e-06, "loss": 0.5063, "step": 13710 }, { "epoch": 0.5692193524616111, "grad_norm": 2.3658719062805176, "learning_rate": 4.126462808309572e-06, "loss": 0.6176, "step": 13711 }, { "epoch": 0.5692608679858224, "grad_norm": 3.3987419605255127, "learning_rate": 4.12580084133654e-06, "loss": 0.6618, "step": 13712 }, { "epoch": 0.5693023835100338, "grad_norm": 2.4240810871124268, "learning_rate": 4.1251388901692846e-06, "loss": 0.53, "step": 13713 }, { "epoch": 0.5693438990342451, "grad_norm": 2.73431658744812, "learning_rate": 4.124476954819767e-06, "loss": 0.5674, "step": 13714 }, { "epoch": 0.5693854145584565, "grad_norm": 2.178109645843506, "learning_rate": 4.12381503529996e-06, "loss": 0.4899, "step": 13715 }, { "epoch": 0.5694269300826678, "grad_norm": 2.726776361465454, "learning_rate": 4.123153131621828e-06, "loss": 0.6244, "step": 13716 }, { "epoch": 0.5694684456068791, "grad_norm": 2.2214179039001465, "learning_rate": 4.1224912437973405e-06, "loss": 0.6091, "step": 13717 }, { "epoch": 0.5695099611310904, "grad_norm": 2.268242835998535, "learning_rate": 4.121829371838467e-06, "loss": 0.5324, "step": 13718 }, { "epoch": 0.5695514766553018, "grad_norm": 2.1439778804779053, "learning_rate": 4.121167515757168e-06, "loss": 0.5325, "step": 13719 }, { "epoch": 0.5695929921795131, "grad_norm": 2.3957679271698, "learning_rate": 4.120505675565415e-06, "loss": 0.5378, "step": 13720 }, { "epoch": 0.5696345077037245, "grad_norm": 2.573432207107544, "learning_rate": 4.119843851275173e-06, "loss": 0.5459, "step": 13721 }, { "epoch": 0.5696760232279358, "grad_norm": 2.2377774715423584, "learning_rate": 4.119182042898408e-06, "loss": 0.546, "step": 13722 }, { "epoch": 0.5697175387521471, "grad_norm": 2.0930192470550537, "learning_rate": 4.118520250447085e-06, "loss": 0.5102, "step": 13723 }, { "epoch": 0.5697590542763584, "grad_norm": 2.6221930980682373, "learning_rate": 4.11785847393317e-06, "loss": 0.4781, "step": 13724 }, { "epoch": 0.5698005698005698, "grad_norm": 2.9553654193878174, "learning_rate": 4.117196713368629e-06, "loss": 0.6647, "step": 13725 }, { "epoch": 0.5698420853247811, "grad_norm": 3.652104616165161, "learning_rate": 4.116534968765426e-06, "loss": 0.6909, "step": 13726 }, { "epoch": 0.5698836008489925, "grad_norm": 2.7755937576293945, "learning_rate": 4.115873240135524e-06, "loss": 0.5278, "step": 13727 }, { "epoch": 0.5699251163732038, "grad_norm": 2.352769136428833, "learning_rate": 4.1152115274908895e-06, "loss": 0.4994, "step": 13728 }, { "epoch": 0.5699666318974151, "grad_norm": 2.418139696121216, "learning_rate": 4.1145498308434854e-06, "loss": 0.5011, "step": 13729 }, { "epoch": 0.5700081474216264, "grad_norm": 2.975884199142456, "learning_rate": 4.113888150205274e-06, "loss": 0.5476, "step": 13730 }, { "epoch": 0.5700496629458378, "grad_norm": 2.153820753097534, "learning_rate": 4.113226485588224e-06, "loss": 0.5258, "step": 13731 }, { "epoch": 0.5700911784700492, "grad_norm": 2.3011367321014404, "learning_rate": 4.1125648370042915e-06, "loss": 0.5391, "step": 13732 }, { "epoch": 0.5701326939942605, "grad_norm": 2.758981227874756, "learning_rate": 4.111903204465443e-06, "loss": 0.4921, "step": 13733 }, { "epoch": 0.5701742095184719, "grad_norm": 2.393378973007202, "learning_rate": 4.111241587983641e-06, "loss": 0.5618, "step": 13734 }, { "epoch": 0.5702157250426831, "grad_norm": 2.520073890686035, "learning_rate": 4.110579987570848e-06, "loss": 0.5334, "step": 13735 }, { "epoch": 0.5702572405668945, "grad_norm": 2.2484400272369385, "learning_rate": 4.109918403239024e-06, "loss": 0.5136, "step": 13736 }, { "epoch": 0.5702987560911058, "grad_norm": 2.3476359844207764, "learning_rate": 4.109256835000132e-06, "loss": 0.5049, "step": 13737 }, { "epoch": 0.5703402716153172, "grad_norm": 2.080214738845825, "learning_rate": 4.108595282866135e-06, "loss": 0.432, "step": 13738 }, { "epoch": 0.5703817871395285, "grad_norm": 2.5947744846343994, "learning_rate": 4.107933746848991e-06, "loss": 0.6145, "step": 13739 }, { "epoch": 0.5704233026637399, "grad_norm": 2.301164388656616, "learning_rate": 4.107272226960661e-06, "loss": 0.5885, "step": 13740 }, { "epoch": 0.5704648181879511, "grad_norm": 2.8042593002319336, "learning_rate": 4.106610723213107e-06, "loss": 0.5638, "step": 13741 }, { "epoch": 0.5705063337121625, "grad_norm": 2.0286550521850586, "learning_rate": 4.105949235618288e-06, "loss": 0.5751, "step": 13742 }, { "epoch": 0.5705478492363738, "grad_norm": 2.624375343322754, "learning_rate": 4.105287764188165e-06, "loss": 0.4777, "step": 13743 }, { "epoch": 0.5705893647605852, "grad_norm": 2.35079288482666, "learning_rate": 4.1046263089347e-06, "loss": 0.4811, "step": 13744 }, { "epoch": 0.5706308802847965, "grad_norm": 2.88022780418396, "learning_rate": 4.103964869869846e-06, "loss": 0.5953, "step": 13745 }, { "epoch": 0.5706723958090079, "grad_norm": 2.313624620437622, "learning_rate": 4.1033034470055655e-06, "loss": 0.6085, "step": 13746 }, { "epoch": 0.5707139113332191, "grad_norm": 2.4437265396118164, "learning_rate": 4.102642040353817e-06, "loss": 0.5284, "step": 13747 }, { "epoch": 0.5707554268574305, "grad_norm": 2.313472032546997, "learning_rate": 4.101980649926561e-06, "loss": 0.4081, "step": 13748 }, { "epoch": 0.5707969423816418, "grad_norm": 2.4260404109954834, "learning_rate": 4.101319275735752e-06, "loss": 0.6276, "step": 13749 }, { "epoch": 0.5708384579058532, "grad_norm": 2.2585365772247314, "learning_rate": 4.100657917793351e-06, "loss": 0.5514, "step": 13750 }, { "epoch": 0.5708799734300645, "grad_norm": 2.163628101348877, "learning_rate": 4.099996576111313e-06, "loss": 0.5006, "step": 13751 }, { "epoch": 0.5709214889542759, "grad_norm": 2.716043710708618, "learning_rate": 4.099335250701597e-06, "loss": 0.6306, "step": 13752 }, { "epoch": 0.5709630044784871, "grad_norm": 1.8571439981460571, "learning_rate": 4.098673941576158e-06, "loss": 0.4256, "step": 13753 }, { "epoch": 0.5710045200026985, "grad_norm": 1.9637634754180908, "learning_rate": 4.098012648746955e-06, "loss": 0.5543, "step": 13754 }, { "epoch": 0.5710460355269098, "grad_norm": 2.7370636463165283, "learning_rate": 4.097351372225943e-06, "loss": 0.5593, "step": 13755 }, { "epoch": 0.5710875510511212, "grad_norm": 2.266277313232422, "learning_rate": 4.096690112025077e-06, "loss": 0.4459, "step": 13756 }, { "epoch": 0.5711290665753325, "grad_norm": 2.614086866378784, "learning_rate": 4.096028868156318e-06, "loss": 0.6953, "step": 13757 }, { "epoch": 0.5711705820995439, "grad_norm": 2.315281391143799, "learning_rate": 4.095367640631614e-06, "loss": 0.506, "step": 13758 }, { "epoch": 0.5712120976237551, "grad_norm": 2.139615297317505, "learning_rate": 4.094706429462925e-06, "loss": 0.4911, "step": 13759 }, { "epoch": 0.5712536131479665, "grad_norm": 2.421680212020874, "learning_rate": 4.094045234662203e-06, "loss": 0.457, "step": 13760 }, { "epoch": 0.5712951286721778, "grad_norm": 2.5177597999572754, "learning_rate": 4.093384056241408e-06, "loss": 0.4444, "step": 13761 }, { "epoch": 0.5713366441963892, "grad_norm": 2.4610002040863037, "learning_rate": 4.092722894212488e-06, "loss": 0.3831, "step": 13762 }, { "epoch": 0.5713781597206006, "grad_norm": 2.315514087677002, "learning_rate": 4.0920617485873995e-06, "loss": 0.5659, "step": 13763 }, { "epoch": 0.5714196752448119, "grad_norm": 2.102248430252075, "learning_rate": 4.091400619378097e-06, "loss": 0.4068, "step": 13764 }, { "epoch": 0.5714611907690232, "grad_norm": 2.1597769260406494, "learning_rate": 4.0907395065965336e-06, "loss": 0.3616, "step": 13765 }, { "epoch": 0.5715027062932345, "grad_norm": 2.3637943267822266, "learning_rate": 4.090078410254661e-06, "loss": 0.6164, "step": 13766 }, { "epoch": 0.5715442218174459, "grad_norm": 2.0227246284484863, "learning_rate": 4.0894173303644335e-06, "loss": 0.5444, "step": 13767 }, { "epoch": 0.5715857373416572, "grad_norm": 2.3789074420928955, "learning_rate": 4.088756266937803e-06, "loss": 0.585, "step": 13768 }, { "epoch": 0.5716272528658686, "grad_norm": 2.4752097129821777, "learning_rate": 4.088095219986721e-06, "loss": 0.6214, "step": 13769 }, { "epoch": 0.5716687683900799, "grad_norm": 2.169760227203369, "learning_rate": 4.087434189523143e-06, "loss": 0.6289, "step": 13770 }, { "epoch": 0.5717102839142912, "grad_norm": 2.596935749053955, "learning_rate": 4.086773175559015e-06, "loss": 0.4567, "step": 13771 }, { "epoch": 0.5717517994385025, "grad_norm": 2.2915494441986084, "learning_rate": 4.086112178106292e-06, "loss": 0.5163, "step": 13772 }, { "epoch": 0.5717933149627139, "grad_norm": 2.5300791263580322, "learning_rate": 4.085451197176924e-06, "loss": 0.5695, "step": 13773 }, { "epoch": 0.5718348304869252, "grad_norm": 2.5798165798187256, "learning_rate": 4.084790232782864e-06, "loss": 0.6567, "step": 13774 }, { "epoch": 0.5718763460111366, "grad_norm": 2.4034268856048584, "learning_rate": 4.084129284936058e-06, "loss": 0.4826, "step": 13775 }, { "epoch": 0.5719178615353478, "grad_norm": 1.9839165210723877, "learning_rate": 4.083468353648459e-06, "loss": 0.3934, "step": 13776 }, { "epoch": 0.5719593770595592, "grad_norm": 2.590625286102295, "learning_rate": 4.082807438932017e-06, "loss": 0.5623, "step": 13777 }, { "epoch": 0.5720008925837705, "grad_norm": 3.0020980834960938, "learning_rate": 4.082146540798681e-06, "loss": 0.4869, "step": 13778 }, { "epoch": 0.5720424081079819, "grad_norm": 2.1161017417907715, "learning_rate": 4.081485659260399e-06, "loss": 0.4672, "step": 13779 }, { "epoch": 0.5720839236321932, "grad_norm": 2.2543387413024902, "learning_rate": 4.080824794329122e-06, "loss": 0.5805, "step": 13780 }, { "epoch": 0.5721254391564046, "grad_norm": 2.096607208251953, "learning_rate": 4.080163946016797e-06, "loss": 0.4679, "step": 13781 }, { "epoch": 0.5721669546806158, "grad_norm": 2.4320099353790283, "learning_rate": 4.079503114335372e-06, "loss": 0.4953, "step": 13782 }, { "epoch": 0.5722084702048272, "grad_norm": 2.6363275051116943, "learning_rate": 4.078842299296799e-06, "loss": 0.4966, "step": 13783 }, { "epoch": 0.5722499857290385, "grad_norm": 2.2537055015563965, "learning_rate": 4.078181500913021e-06, "loss": 0.4637, "step": 13784 }, { "epoch": 0.5722915012532499, "grad_norm": 1.92079496383667, "learning_rate": 4.077520719195986e-06, "loss": 0.3693, "step": 13785 }, { "epoch": 0.5723330167774612, "grad_norm": 2.7387142181396484, "learning_rate": 4.076859954157643e-06, "loss": 0.5351, "step": 13786 }, { "epoch": 0.5723745323016726, "grad_norm": 2.147616147994995, "learning_rate": 4.07619920580994e-06, "loss": 0.3974, "step": 13787 }, { "epoch": 0.5724160478258838, "grad_norm": 2.151240348815918, "learning_rate": 4.07553847416482e-06, "loss": 0.5238, "step": 13788 }, { "epoch": 0.5724575633500952, "grad_norm": 2.4661223888397217, "learning_rate": 4.07487775923423e-06, "loss": 0.3916, "step": 13789 }, { "epoch": 0.5724990788743065, "grad_norm": 2.204840898513794, "learning_rate": 4.074217061030119e-06, "loss": 0.4584, "step": 13790 }, { "epoch": 0.5725405943985179, "grad_norm": 2.2586004734039307, "learning_rate": 4.073556379564429e-06, "loss": 0.5192, "step": 13791 }, { "epoch": 0.5725821099227292, "grad_norm": 2.241323709487915, "learning_rate": 4.072895714849106e-06, "loss": 0.537, "step": 13792 }, { "epoch": 0.5726236254469406, "grad_norm": 2.633668899536133, "learning_rate": 4.072235066896098e-06, "loss": 0.6153, "step": 13793 }, { "epoch": 0.572665140971152, "grad_norm": 2.6787211894989014, "learning_rate": 4.071574435717345e-06, "loss": 0.5282, "step": 13794 }, { "epoch": 0.5727066564953632, "grad_norm": 2.3152873516082764, "learning_rate": 4.070913821324797e-06, "loss": 0.5648, "step": 13795 }, { "epoch": 0.5727481720195746, "grad_norm": 2.451622486114502, "learning_rate": 4.0702532237303915e-06, "loss": 0.5554, "step": 13796 }, { "epoch": 0.5727896875437859, "grad_norm": 2.866511583328247, "learning_rate": 4.069592642946077e-06, "loss": 0.5235, "step": 13797 }, { "epoch": 0.5728312030679973, "grad_norm": 2.5310304164886475, "learning_rate": 4.068932078983795e-06, "loss": 0.5223, "step": 13798 }, { "epoch": 0.5728727185922086, "grad_norm": 2.237135410308838, "learning_rate": 4.068271531855489e-06, "loss": 0.5552, "step": 13799 }, { "epoch": 0.57291423411642, "grad_norm": 3.0758519172668457, "learning_rate": 4.067611001573104e-06, "loss": 0.5212, "step": 13800 }, { "epoch": 0.5729557496406312, "grad_norm": 2.512787103652954, "learning_rate": 4.06695048814858e-06, "loss": 0.6195, "step": 13801 }, { "epoch": 0.5729972651648426, "grad_norm": 2.842252254486084, "learning_rate": 4.066289991593859e-06, "loss": 0.5384, "step": 13802 }, { "epoch": 0.5730387806890539, "grad_norm": 2.1681408882141113, "learning_rate": 4.065629511920886e-06, "loss": 0.564, "step": 13803 }, { "epoch": 0.5730802962132653, "grad_norm": 2.471006393432617, "learning_rate": 4.0649690491416e-06, "loss": 0.5002, "step": 13804 }, { "epoch": 0.5731218117374766, "grad_norm": 2.7999978065490723, "learning_rate": 4.064308603267943e-06, "loss": 0.451, "step": 13805 }, { "epoch": 0.573163327261688, "grad_norm": 2.2622671127319336, "learning_rate": 4.063648174311856e-06, "loss": 0.4305, "step": 13806 }, { "epoch": 0.5732048427858992, "grad_norm": 2.6097922325134277, "learning_rate": 4.06298776228528e-06, "loss": 0.5029, "step": 13807 }, { "epoch": 0.5732463583101106, "grad_norm": 2.3542325496673584, "learning_rate": 4.062327367200157e-06, "loss": 0.3765, "step": 13808 }, { "epoch": 0.5732878738343219, "grad_norm": 2.970952033996582, "learning_rate": 4.061666989068423e-06, "loss": 0.593, "step": 13809 }, { "epoch": 0.5733293893585333, "grad_norm": 3.3531339168548584, "learning_rate": 4.061006627902021e-06, "loss": 0.701, "step": 13810 }, { "epoch": 0.5733709048827446, "grad_norm": 2.3957841396331787, "learning_rate": 4.06034628371289e-06, "loss": 0.5161, "step": 13811 }, { "epoch": 0.573412420406956, "grad_norm": 2.5749051570892334, "learning_rate": 4.059685956512968e-06, "loss": 0.4565, "step": 13812 }, { "epoch": 0.5734539359311672, "grad_norm": 2.5060489177703857, "learning_rate": 4.059025646314197e-06, "loss": 0.5654, "step": 13813 }, { "epoch": 0.5734954514553786, "grad_norm": 2.3407368659973145, "learning_rate": 4.058365353128513e-06, "loss": 0.4408, "step": 13814 }, { "epoch": 0.5735369669795899, "grad_norm": 2.281520128250122, "learning_rate": 4.057705076967853e-06, "loss": 0.5503, "step": 13815 }, { "epoch": 0.5735784825038013, "grad_norm": 2.1315882205963135, "learning_rate": 4.057044817844159e-06, "loss": 0.4422, "step": 13816 }, { "epoch": 0.5736199980280126, "grad_norm": 2.7792563438415527, "learning_rate": 4.056384575769366e-06, "loss": 0.5403, "step": 13817 }, { "epoch": 0.573661513552224, "grad_norm": 2.4765188694000244, "learning_rate": 4.055724350755412e-06, "loss": 0.6628, "step": 13818 }, { "epoch": 0.5737030290764352, "grad_norm": 2.1657469272613525, "learning_rate": 4.0550641428142325e-06, "loss": 0.4453, "step": 13819 }, { "epoch": 0.5737445446006466, "grad_norm": 2.003513813018799, "learning_rate": 4.054403951957766e-06, "loss": 0.3548, "step": 13820 }, { "epoch": 0.5737860601248579, "grad_norm": 2.229815721511841, "learning_rate": 4.053743778197951e-06, "loss": 0.5691, "step": 13821 }, { "epoch": 0.5738275756490693, "grad_norm": 2.542253017425537, "learning_rate": 4.053083621546718e-06, "loss": 0.5569, "step": 13822 }, { "epoch": 0.5738690911732806, "grad_norm": 2.4442670345306396, "learning_rate": 4.052423482016007e-06, "loss": 0.437, "step": 13823 }, { "epoch": 0.5739106066974919, "grad_norm": 2.4365785121917725, "learning_rate": 4.051763359617753e-06, "loss": 0.6256, "step": 13824 }, { "epoch": 0.5739521222217033, "grad_norm": 2.4532792568206787, "learning_rate": 4.05110325436389e-06, "loss": 0.6094, "step": 13825 }, { "epoch": 0.5739936377459146, "grad_norm": 2.2140724658966064, "learning_rate": 4.050443166266356e-06, "loss": 0.4072, "step": 13826 }, { "epoch": 0.574035153270126, "grad_norm": 2.1257331371307373, "learning_rate": 4.049783095337081e-06, "loss": 0.4657, "step": 13827 }, { "epoch": 0.5740766687943373, "grad_norm": 4.10096549987793, "learning_rate": 4.049123041588002e-06, "loss": 0.5146, "step": 13828 }, { "epoch": 0.5741181843185487, "grad_norm": 2.6576426029205322, "learning_rate": 4.048463005031053e-06, "loss": 0.646, "step": 13829 }, { "epoch": 0.5741596998427599, "grad_norm": 2.9741973876953125, "learning_rate": 4.047802985678167e-06, "loss": 0.5208, "step": 13830 }, { "epoch": 0.5742012153669713, "grad_norm": 2.5565345287323, "learning_rate": 4.047142983541278e-06, "loss": 0.4695, "step": 13831 }, { "epoch": 0.5742427308911826, "grad_norm": 2.0968337059020996, "learning_rate": 4.046482998632317e-06, "loss": 0.4185, "step": 13832 }, { "epoch": 0.574284246415394, "grad_norm": 2.4172556400299072, "learning_rate": 4.045823030963218e-06, "loss": 0.4427, "step": 13833 }, { "epoch": 0.5743257619396053, "grad_norm": 2.2036633491516113, "learning_rate": 4.045163080545917e-06, "loss": 0.5293, "step": 13834 }, { "epoch": 0.5743672774638167, "grad_norm": 2.232564926147461, "learning_rate": 4.044503147392339e-06, "loss": 0.6042, "step": 13835 }, { "epoch": 0.5744087929880279, "grad_norm": 2.716456651687622, "learning_rate": 4.043843231514421e-06, "loss": 0.3728, "step": 13836 }, { "epoch": 0.5744503085122393, "grad_norm": 2.6272716522216797, "learning_rate": 4.0431833329240915e-06, "loss": 0.5157, "step": 13837 }, { "epoch": 0.5744918240364506, "grad_norm": 3.180896520614624, "learning_rate": 4.042523451633284e-06, "loss": 0.7239, "step": 13838 }, { "epoch": 0.574533339560662, "grad_norm": 2.468541145324707, "learning_rate": 4.04186358765393e-06, "loss": 0.5407, "step": 13839 }, { "epoch": 0.5745748550848733, "grad_norm": 2.650200366973877, "learning_rate": 4.041203740997957e-06, "loss": 0.7048, "step": 13840 }, { "epoch": 0.5746163706090847, "grad_norm": 3.450270891189575, "learning_rate": 4.040543911677297e-06, "loss": 0.3966, "step": 13841 }, { "epoch": 0.5746578861332959, "grad_norm": 2.168170928955078, "learning_rate": 4.03988409970388e-06, "loss": 0.5412, "step": 13842 }, { "epoch": 0.5746994016575073, "grad_norm": 2.349320888519287, "learning_rate": 4.039224305089635e-06, "loss": 0.5497, "step": 13843 }, { "epoch": 0.5747409171817186, "grad_norm": 2.266674041748047, "learning_rate": 4.038564527846492e-06, "loss": 0.3822, "step": 13844 }, { "epoch": 0.57478243270593, "grad_norm": 2.163017988204956, "learning_rate": 4.037904767986378e-06, "loss": 0.6465, "step": 13845 }, { "epoch": 0.5748239482301413, "grad_norm": 2.4727039337158203, "learning_rate": 4.037245025521224e-06, "loss": 0.457, "step": 13846 }, { "epoch": 0.5748654637543527, "grad_norm": 2.5537145137786865, "learning_rate": 4.036585300462959e-06, "loss": 0.4539, "step": 13847 }, { "epoch": 0.5749069792785639, "grad_norm": 2.120586395263672, "learning_rate": 4.035925592823508e-06, "loss": 0.5385, "step": 13848 }, { "epoch": 0.5749484948027753, "grad_norm": 2.62663197517395, "learning_rate": 4.035265902614799e-06, "loss": 0.4584, "step": 13849 }, { "epoch": 0.5749900103269866, "grad_norm": 2.654533863067627, "learning_rate": 4.034606229848761e-06, "loss": 0.5208, "step": 13850 }, { "epoch": 0.575031525851198, "grad_norm": 2.6031861305236816, "learning_rate": 4.03394657453732e-06, "loss": 0.582, "step": 13851 }, { "epoch": 0.5750730413754093, "grad_norm": 2.3925185203552246, "learning_rate": 4.033286936692406e-06, "loss": 0.4747, "step": 13852 }, { "epoch": 0.5751145568996207, "grad_norm": 2.1351771354675293, "learning_rate": 4.032627316325941e-06, "loss": 0.501, "step": 13853 }, { "epoch": 0.5751560724238319, "grad_norm": 2.0606095790863037, "learning_rate": 4.031967713449853e-06, "loss": 0.4532, "step": 13854 }, { "epoch": 0.5751975879480433, "grad_norm": 2.081824541091919, "learning_rate": 4.031308128076068e-06, "loss": 0.5031, "step": 13855 }, { "epoch": 0.5752391034722547, "grad_norm": 2.7304532527923584, "learning_rate": 4.030648560216513e-06, "loss": 0.4619, "step": 13856 }, { "epoch": 0.575280618996466, "grad_norm": 2.0088813304901123, "learning_rate": 4.0299890098831096e-06, "loss": 0.6194, "step": 13857 }, { "epoch": 0.5753221345206774, "grad_norm": 2.207611560821533, "learning_rate": 4.029329477087784e-06, "loss": 0.5556, "step": 13858 }, { "epoch": 0.5753636500448887, "grad_norm": 2.6849775314331055, "learning_rate": 4.028669961842461e-06, "loss": 0.4048, "step": 13859 }, { "epoch": 0.5754051655691, "grad_norm": 2.3412439823150635, "learning_rate": 4.028010464159068e-06, "loss": 0.4395, "step": 13860 }, { "epoch": 0.5754466810933113, "grad_norm": 2.48140025138855, "learning_rate": 4.0273509840495225e-06, "loss": 0.5553, "step": 13861 }, { "epoch": 0.5754881966175227, "grad_norm": 2.3620455265045166, "learning_rate": 4.026691521525753e-06, "loss": 0.4791, "step": 13862 }, { "epoch": 0.575529712141734, "grad_norm": 2.3398966789245605, "learning_rate": 4.026032076599681e-06, "loss": 0.6151, "step": 13863 }, { "epoch": 0.5755712276659454, "grad_norm": 2.3142106533050537, "learning_rate": 4.025372649283229e-06, "loss": 0.5407, "step": 13864 }, { "epoch": 0.5756127431901567, "grad_norm": 2.0815134048461914, "learning_rate": 4.024713239588323e-06, "loss": 0.4842, "step": 13865 }, { "epoch": 0.575654258714368, "grad_norm": 2.22917103767395, "learning_rate": 4.024053847526881e-06, "loss": 0.524, "step": 13866 }, { "epoch": 0.5756957742385793, "grad_norm": 2.3728106021881104, "learning_rate": 4.023394473110827e-06, "loss": 0.463, "step": 13867 }, { "epoch": 0.5757372897627907, "grad_norm": 2.5414960384368896, "learning_rate": 4.022735116352082e-06, "loss": 0.5576, "step": 13868 }, { "epoch": 0.575778805287002, "grad_norm": 3.027590751647949, "learning_rate": 4.022075777262569e-06, "loss": 0.5845, "step": 13869 }, { "epoch": 0.5758203208112134, "grad_norm": 2.469649314880371, "learning_rate": 4.0214164558542076e-06, "loss": 0.5611, "step": 13870 }, { "epoch": 0.5758618363354246, "grad_norm": 2.3886661529541016, "learning_rate": 4.020757152138917e-06, "loss": 0.4706, "step": 13871 }, { "epoch": 0.575903351859636, "grad_norm": 2.88822865486145, "learning_rate": 4.02009786612862e-06, "loss": 0.6137, "step": 13872 }, { "epoch": 0.5759448673838473, "grad_norm": 2.31272554397583, "learning_rate": 4.019438597835238e-06, "loss": 0.4615, "step": 13873 }, { "epoch": 0.5759863829080587, "grad_norm": 2.23653244972229, "learning_rate": 4.018779347270687e-06, "loss": 0.3823, "step": 13874 }, { "epoch": 0.57602789843227, "grad_norm": 2.2899041175842285, "learning_rate": 4.018120114446888e-06, "loss": 0.6718, "step": 13875 }, { "epoch": 0.5760694139564814, "grad_norm": 2.615981340408325, "learning_rate": 4.0174608993757606e-06, "loss": 0.4934, "step": 13876 }, { "epoch": 0.5761109294806926, "grad_norm": 2.404024124145508, "learning_rate": 4.016801702069223e-06, "loss": 0.4605, "step": 13877 }, { "epoch": 0.576152445004904, "grad_norm": 2.402218818664551, "learning_rate": 4.016142522539195e-06, "loss": 0.5449, "step": 13878 }, { "epoch": 0.5761939605291153, "grad_norm": 2.5004642009735107, "learning_rate": 4.015483360797593e-06, "loss": 0.5092, "step": 13879 }, { "epoch": 0.5762354760533267, "grad_norm": 2.3557662963867188, "learning_rate": 4.014824216856336e-06, "loss": 0.6001, "step": 13880 }, { "epoch": 0.576276991577538, "grad_norm": 2.85886287689209, "learning_rate": 4.014165090727341e-06, "loss": 0.5642, "step": 13881 }, { "epoch": 0.5763185071017494, "grad_norm": 2.6142117977142334, "learning_rate": 4.013505982422526e-06, "loss": 0.5337, "step": 13882 }, { "epoch": 0.5763600226259606, "grad_norm": 2.267056941986084, "learning_rate": 4.012846891953806e-06, "loss": 0.4518, "step": 13883 }, { "epoch": 0.576401538150172, "grad_norm": 2.332834243774414, "learning_rate": 4.0121878193331e-06, "loss": 0.6948, "step": 13884 }, { "epoch": 0.5764430536743833, "grad_norm": 2.819946527481079, "learning_rate": 4.011528764572321e-06, "loss": 0.4874, "step": 13885 }, { "epoch": 0.5764845691985947, "grad_norm": 2.6504387855529785, "learning_rate": 4.0108697276833896e-06, "loss": 0.6499, "step": 13886 }, { "epoch": 0.5765260847228061, "grad_norm": 2.7347233295440674, "learning_rate": 4.010210708678216e-06, "loss": 0.6269, "step": 13887 }, { "epoch": 0.5765676002470174, "grad_norm": 2.225316286087036, "learning_rate": 4.009551707568719e-06, "loss": 0.4854, "step": 13888 }, { "epoch": 0.5766091157712288, "grad_norm": 2.362520933151245, "learning_rate": 4.008892724366813e-06, "loss": 0.5214, "step": 13889 }, { "epoch": 0.57665063129544, "grad_norm": 2.4276087284088135, "learning_rate": 4.00823375908441e-06, "loss": 0.5431, "step": 13890 }, { "epoch": 0.5766921468196514, "grad_norm": 2.8921220302581787, "learning_rate": 4.00757481173343e-06, "loss": 0.4008, "step": 13891 }, { "epoch": 0.5767336623438627, "grad_norm": 2.5383851528167725, "learning_rate": 4.006915882325782e-06, "loss": 0.5544, "step": 13892 }, { "epoch": 0.5767751778680741, "grad_norm": 2.4245505332946777, "learning_rate": 4.00625697087338e-06, "loss": 0.4743, "step": 13893 }, { "epoch": 0.5768166933922854, "grad_norm": 2.458162546157837, "learning_rate": 4.00559807738814e-06, "loss": 0.5614, "step": 13894 }, { "epoch": 0.5768582089164968, "grad_norm": 2.2091124057769775, "learning_rate": 4.004939201881974e-06, "loss": 0.4294, "step": 13895 }, { "epoch": 0.576899724440708, "grad_norm": 2.3752689361572266, "learning_rate": 4.004280344366793e-06, "loss": 0.558, "step": 13896 }, { "epoch": 0.5769412399649194, "grad_norm": 2.345862865447998, "learning_rate": 4.00362150485451e-06, "loss": 0.5284, "step": 13897 }, { "epoch": 0.5769827554891307, "grad_norm": 2.2400400638580322, "learning_rate": 4.002962683357036e-06, "loss": 0.5062, "step": 13898 }, { "epoch": 0.5770242710133421, "grad_norm": 2.3182871341705322, "learning_rate": 4.002303879886288e-06, "loss": 0.4844, "step": 13899 }, { "epoch": 0.5770657865375534, "grad_norm": 2.134061574935913, "learning_rate": 4.00164509445417e-06, "loss": 0.4461, "step": 13900 }, { "epoch": 0.5771073020617647, "grad_norm": 2.5036568641662598, "learning_rate": 4.000986327072597e-06, "loss": 0.5549, "step": 13901 }, { "epoch": 0.577148817585976, "grad_norm": 2.536834239959717, "learning_rate": 4.000327577753479e-06, "loss": 0.5696, "step": 13902 }, { "epoch": 0.5771903331101874, "grad_norm": 2.500044584274292, "learning_rate": 3.999668846508726e-06, "loss": 0.4989, "step": 13903 }, { "epoch": 0.5772318486343987, "grad_norm": 2.907088041305542, "learning_rate": 3.999010133350252e-06, "loss": 0.5862, "step": 13904 }, { "epoch": 0.5772733641586101, "grad_norm": 2.4997146129608154, "learning_rate": 3.9983514382899605e-06, "loss": 0.5754, "step": 13905 }, { "epoch": 0.5773148796828214, "grad_norm": 2.0937142372131348, "learning_rate": 3.997692761339763e-06, "loss": 0.4321, "step": 13906 }, { "epoch": 0.5773563952070327, "grad_norm": 2.708669900894165, "learning_rate": 3.997034102511572e-06, "loss": 0.4967, "step": 13907 }, { "epoch": 0.577397910731244, "grad_norm": 2.3293039798736572, "learning_rate": 3.996375461817292e-06, "loss": 0.5133, "step": 13908 }, { "epoch": 0.5774394262554554, "grad_norm": 2.054783344268799, "learning_rate": 3.995716839268833e-06, "loss": 0.4573, "step": 13909 }, { "epoch": 0.5774809417796667, "grad_norm": 2.671820640563965, "learning_rate": 3.995058234878103e-06, "loss": 0.4635, "step": 13910 }, { "epoch": 0.5775224573038781, "grad_norm": 2.117633819580078, "learning_rate": 3.994399648657009e-06, "loss": 0.6162, "step": 13911 }, { "epoch": 0.5775639728280894, "grad_norm": 2.543287515640259, "learning_rate": 3.993741080617461e-06, "loss": 0.4336, "step": 13912 }, { "epoch": 0.5776054883523007, "grad_norm": 2.725507974624634, "learning_rate": 3.993082530771363e-06, "loss": 0.4085, "step": 13913 }, { "epoch": 0.577647003876512, "grad_norm": 2.1988885402679443, "learning_rate": 3.992423999130623e-06, "loss": 0.4355, "step": 13914 }, { "epoch": 0.5776885194007234, "grad_norm": 2.668391704559326, "learning_rate": 3.991765485707147e-06, "loss": 0.5774, "step": 13915 }, { "epoch": 0.5777300349249347, "grad_norm": 2.3995275497436523, "learning_rate": 3.991106990512842e-06, "loss": 0.7491, "step": 13916 }, { "epoch": 0.5777715504491461, "grad_norm": 2.7180967330932617, "learning_rate": 3.990448513559615e-06, "loss": 0.5414, "step": 13917 }, { "epoch": 0.5778130659733575, "grad_norm": 2.417722702026367, "learning_rate": 3.989790054859368e-06, "loss": 0.4722, "step": 13918 }, { "epoch": 0.5778545814975687, "grad_norm": 2.1604297161102295, "learning_rate": 3.989131614424009e-06, "loss": 0.4013, "step": 13919 }, { "epoch": 0.5778960970217801, "grad_norm": 2.493155002593994, "learning_rate": 3.988473192265442e-06, "loss": 0.5922, "step": 13920 }, { "epoch": 0.5779376125459914, "grad_norm": 2.548631429672241, "learning_rate": 3.987814788395571e-06, "loss": 0.4941, "step": 13921 }, { "epoch": 0.5779791280702028, "grad_norm": 2.6874759197235107, "learning_rate": 3.9871564028263e-06, "loss": 0.5135, "step": 13922 }, { "epoch": 0.5780206435944141, "grad_norm": 2.535278558731079, "learning_rate": 3.986498035569533e-06, "loss": 0.4749, "step": 13923 }, { "epoch": 0.5780621591186255, "grad_norm": 2.2820167541503906, "learning_rate": 3.985839686637173e-06, "loss": 0.4831, "step": 13924 }, { "epoch": 0.5781036746428367, "grad_norm": 2.548147678375244, "learning_rate": 3.985181356041126e-06, "loss": 0.4705, "step": 13925 }, { "epoch": 0.5781451901670481, "grad_norm": 2.4078457355499268, "learning_rate": 3.984523043793292e-06, "loss": 0.6179, "step": 13926 }, { "epoch": 0.5781867056912594, "grad_norm": 2.184699773788452, "learning_rate": 3.983864749905571e-06, "loss": 0.5005, "step": 13927 }, { "epoch": 0.5782282212154708, "grad_norm": 2.1223607063293457, "learning_rate": 3.9832064743898704e-06, "loss": 0.5996, "step": 13928 }, { "epoch": 0.5782697367396821, "grad_norm": 2.8600058555603027, "learning_rate": 3.9825482172580895e-06, "loss": 0.4747, "step": 13929 }, { "epoch": 0.5783112522638935, "grad_norm": 2.526435136795044, "learning_rate": 3.981889978522132e-06, "loss": 0.531, "step": 13930 }, { "epoch": 0.5783527677881047, "grad_norm": 2.308502197265625, "learning_rate": 3.9812317581938955e-06, "loss": 0.6177, "step": 13931 }, { "epoch": 0.5783942833123161, "grad_norm": 2.302064895629883, "learning_rate": 3.980573556285282e-06, "loss": 0.5167, "step": 13932 }, { "epoch": 0.5784357988365274, "grad_norm": 2.6670663356781006, "learning_rate": 3.979915372808193e-06, "loss": 0.4613, "step": 13933 }, { "epoch": 0.5784773143607388, "grad_norm": 2.3631439208984375, "learning_rate": 3.979257207774529e-06, "loss": 0.4841, "step": 13934 }, { "epoch": 0.5785188298849501, "grad_norm": 2.4845237731933594, "learning_rate": 3.978599061196188e-06, "loss": 0.4366, "step": 13935 }, { "epoch": 0.5785603454091615, "grad_norm": 3.0222327709198, "learning_rate": 3.977940933085071e-06, "loss": 0.5963, "step": 13936 }, { "epoch": 0.5786018609333727, "grad_norm": 1.9871467351913452, "learning_rate": 3.977282823453075e-06, "loss": 0.5196, "step": 13937 }, { "epoch": 0.5786433764575841, "grad_norm": 2.804872512817383, "learning_rate": 3.9766247323121025e-06, "loss": 0.5083, "step": 13938 }, { "epoch": 0.5786848919817954, "grad_norm": 2.2452075481414795, "learning_rate": 3.975966659674048e-06, "loss": 0.5613, "step": 13939 }, { "epoch": 0.5787264075060068, "grad_norm": 2.649174928665161, "learning_rate": 3.975308605550811e-06, "loss": 0.5196, "step": 13940 }, { "epoch": 0.5787679230302181, "grad_norm": 2.334247350692749, "learning_rate": 3.97465056995429e-06, "loss": 0.5181, "step": 13941 }, { "epoch": 0.5788094385544295, "grad_norm": 2.0231058597564697, "learning_rate": 3.973992552896385e-06, "loss": 0.3955, "step": 13942 }, { "epoch": 0.5788509540786407, "grad_norm": 2.194209575653076, "learning_rate": 3.9733345543889875e-06, "loss": 0.4786, "step": 13943 }, { "epoch": 0.5788924696028521, "grad_norm": 2.483846664428711, "learning_rate": 3.972676574443998e-06, "loss": 0.5147, "step": 13944 }, { "epoch": 0.5789339851270634, "grad_norm": 2.8561644554138184, "learning_rate": 3.9720186130733115e-06, "loss": 0.6855, "step": 13945 }, { "epoch": 0.5789755006512748, "grad_norm": 2.154024839401245, "learning_rate": 3.971360670288825e-06, "loss": 0.4621, "step": 13946 }, { "epoch": 0.5790170161754861, "grad_norm": 2.4553050994873047, "learning_rate": 3.9707027461024355e-06, "loss": 0.4815, "step": 13947 }, { "epoch": 0.5790585316996975, "grad_norm": 2.551893949508667, "learning_rate": 3.970044840526036e-06, "loss": 0.4012, "step": 13948 }, { "epoch": 0.5791000472239088, "grad_norm": 2.364048957824707, "learning_rate": 3.969386953571522e-06, "loss": 0.469, "step": 13949 }, { "epoch": 0.5791415627481201, "grad_norm": 2.4912474155426025, "learning_rate": 3.96872908525079e-06, "loss": 0.5628, "step": 13950 }, { "epoch": 0.5791830782723315, "grad_norm": 2.7837836742401123, "learning_rate": 3.968071235575734e-06, "loss": 0.3939, "step": 13951 }, { "epoch": 0.5792245937965428, "grad_norm": 2.529942274093628, "learning_rate": 3.967413404558247e-06, "loss": 0.6365, "step": 13952 }, { "epoch": 0.5792661093207542, "grad_norm": 2.555706024169922, "learning_rate": 3.9667555922102214e-06, "loss": 0.6214, "step": 13953 }, { "epoch": 0.5793076248449655, "grad_norm": 2.5803279876708984, "learning_rate": 3.966097798543554e-06, "loss": 0.5721, "step": 13954 }, { "epoch": 0.5793491403691768, "grad_norm": 2.7270545959472656, "learning_rate": 3.965440023570136e-06, "loss": 0.5598, "step": 13955 }, { "epoch": 0.5793906558933881, "grad_norm": 2.644148588180542, "learning_rate": 3.964782267301861e-06, "loss": 0.4653, "step": 13956 }, { "epoch": 0.5794321714175995, "grad_norm": 2.476757526397705, "learning_rate": 3.9641245297506205e-06, "loss": 0.4981, "step": 13957 }, { "epoch": 0.5794736869418108, "grad_norm": 2.6255087852478027, "learning_rate": 3.963466810928308e-06, "loss": 0.5758, "step": 13958 }, { "epoch": 0.5795152024660222, "grad_norm": 3.3028564453125, "learning_rate": 3.962809110846813e-06, "loss": 0.6214, "step": 13959 }, { "epoch": 0.5795567179902334, "grad_norm": 2.428603410720825, "learning_rate": 3.96215142951803e-06, "loss": 0.4283, "step": 13960 }, { "epoch": 0.5795982335144448, "grad_norm": 2.2343058586120605, "learning_rate": 3.961493766953847e-06, "loss": 0.4794, "step": 13961 }, { "epoch": 0.5796397490386561, "grad_norm": 2.3025965690612793, "learning_rate": 3.960836123166156e-06, "loss": 0.4999, "step": 13962 }, { "epoch": 0.5796812645628675, "grad_norm": 2.789865493774414, "learning_rate": 3.960178498166846e-06, "loss": 0.5926, "step": 13963 }, { "epoch": 0.5797227800870788, "grad_norm": 2.7303473949432373, "learning_rate": 3.959520891967813e-06, "loss": 0.4811, "step": 13964 }, { "epoch": 0.5797642956112902, "grad_norm": 2.8036093711853027, "learning_rate": 3.958863304580939e-06, "loss": 0.6614, "step": 13965 }, { "epoch": 0.5798058111355014, "grad_norm": 2.401641845703125, "learning_rate": 3.958205736018116e-06, "loss": 0.397, "step": 13966 }, { "epoch": 0.5798473266597128, "grad_norm": 2.3563313484191895, "learning_rate": 3.9575481862912335e-06, "loss": 0.5492, "step": 13967 }, { "epoch": 0.5798888421839241, "grad_norm": 2.996596574783325, "learning_rate": 3.956890655412183e-06, "loss": 0.5023, "step": 13968 }, { "epoch": 0.5799303577081355, "grad_norm": 2.1881344318389893, "learning_rate": 3.956233143392847e-06, "loss": 0.4926, "step": 13969 }, { "epoch": 0.5799718732323468, "grad_norm": 2.0546863079071045, "learning_rate": 3.955575650245117e-06, "loss": 0.4274, "step": 13970 }, { "epoch": 0.5800133887565582, "grad_norm": 2.1430232524871826, "learning_rate": 3.954918175980882e-06, "loss": 0.5807, "step": 13971 }, { "epoch": 0.5800549042807694, "grad_norm": 2.7155282497406006, "learning_rate": 3.954260720612026e-06, "loss": 0.7002, "step": 13972 }, { "epoch": 0.5800964198049808, "grad_norm": 2.8633759021759033, "learning_rate": 3.953603284150439e-06, "loss": 0.516, "step": 13973 }, { "epoch": 0.5801379353291921, "grad_norm": 2.354661464691162, "learning_rate": 3.952945866608007e-06, "loss": 0.5335, "step": 13974 }, { "epoch": 0.5801794508534035, "grad_norm": 2.2919390201568604, "learning_rate": 3.952288467996613e-06, "loss": 0.4681, "step": 13975 }, { "epoch": 0.5802209663776148, "grad_norm": 2.362776756286621, "learning_rate": 3.951631088328146e-06, "loss": 0.4407, "step": 13976 }, { "epoch": 0.5802624819018262, "grad_norm": 2.462414026260376, "learning_rate": 3.950973727614494e-06, "loss": 0.4335, "step": 13977 }, { "epoch": 0.5803039974260374, "grad_norm": 2.2465312480926514, "learning_rate": 3.950316385867536e-06, "loss": 0.517, "step": 13978 }, { "epoch": 0.5803455129502488, "grad_norm": 2.1896514892578125, "learning_rate": 3.94965906309916e-06, "loss": 0.4546, "step": 13979 }, { "epoch": 0.5803870284744602, "grad_norm": 2.456207513809204, "learning_rate": 3.9490017593212524e-06, "loss": 0.4373, "step": 13980 }, { "epoch": 0.5804285439986715, "grad_norm": 2.2116382122039795, "learning_rate": 3.948344474545697e-06, "loss": 0.4992, "step": 13981 }, { "epoch": 0.5804700595228829, "grad_norm": 2.685391426086426, "learning_rate": 3.947687208784375e-06, "loss": 0.431, "step": 13982 }, { "epoch": 0.5805115750470942, "grad_norm": 2.6985719203948975, "learning_rate": 3.947029962049172e-06, "loss": 0.5537, "step": 13983 }, { "epoch": 0.5805530905713056, "grad_norm": 2.5637898445129395, "learning_rate": 3.9463727343519706e-06, "loss": 0.5357, "step": 13984 }, { "epoch": 0.5805946060955168, "grad_norm": 2.6990864276885986, "learning_rate": 3.945715525704653e-06, "loss": 0.5376, "step": 13985 }, { "epoch": 0.5806361216197282, "grad_norm": 2.2949557304382324, "learning_rate": 3.945058336119104e-06, "loss": 0.4546, "step": 13986 }, { "epoch": 0.5806776371439395, "grad_norm": 2.0896365642547607, "learning_rate": 3.944401165607205e-06, "loss": 0.588, "step": 13987 }, { "epoch": 0.5807191526681509, "grad_norm": 1.9488507509231567, "learning_rate": 3.9437440141808354e-06, "loss": 0.4483, "step": 13988 }, { "epoch": 0.5807606681923622, "grad_norm": 2.6129281520843506, "learning_rate": 3.9430868818518786e-06, "loss": 0.4164, "step": 13989 }, { "epoch": 0.5808021837165736, "grad_norm": 2.150913715362549, "learning_rate": 3.942429768632218e-06, "loss": 0.4975, "step": 13990 }, { "epoch": 0.5808436992407848, "grad_norm": 2.8453452587127686, "learning_rate": 3.941772674533729e-06, "loss": 0.5252, "step": 13991 }, { "epoch": 0.5808852147649962, "grad_norm": 2.5414528846740723, "learning_rate": 3.941115599568296e-06, "loss": 0.5725, "step": 13992 }, { "epoch": 0.5809267302892075, "grad_norm": 2.4490838050842285, "learning_rate": 3.940458543747798e-06, "loss": 0.4547, "step": 13993 }, { "epoch": 0.5809682458134189, "grad_norm": 2.5529565811157227, "learning_rate": 3.939801507084117e-06, "loss": 0.4487, "step": 13994 }, { "epoch": 0.5810097613376302, "grad_norm": 2.0662667751312256, "learning_rate": 3.9391444895891275e-06, "loss": 0.4043, "step": 13995 }, { "epoch": 0.5810512768618415, "grad_norm": 2.694632053375244, "learning_rate": 3.9384874912747115e-06, "loss": 0.5283, "step": 13996 }, { "epoch": 0.5810927923860528, "grad_norm": 2.2168524265289307, "learning_rate": 3.93783051215275e-06, "loss": 0.6253, "step": 13997 }, { "epoch": 0.5811343079102642, "grad_norm": 2.0265016555786133, "learning_rate": 3.937173552235117e-06, "loss": 0.3991, "step": 13998 }, { "epoch": 0.5811758234344755, "grad_norm": 2.9209232330322266, "learning_rate": 3.936516611533693e-06, "loss": 0.5383, "step": 13999 }, { "epoch": 0.5812173389586869, "grad_norm": 2.1076536178588867, "learning_rate": 3.935859690060356e-06, "loss": 0.4436, "step": 14000 }, { "epoch": 0.5812588544828982, "grad_norm": 2.0274322032928467, "learning_rate": 3.935202787826981e-06, "loss": 0.4506, "step": 14001 }, { "epoch": 0.5813003700071095, "grad_norm": 2.198246955871582, "learning_rate": 3.934545904845448e-06, "loss": 0.4322, "step": 14002 }, { "epoch": 0.5813418855313208, "grad_norm": 2.723543405532837, "learning_rate": 3.933889041127633e-06, "loss": 0.5006, "step": 14003 }, { "epoch": 0.5813834010555322, "grad_norm": 1.944467544555664, "learning_rate": 3.933232196685409e-06, "loss": 0.5156, "step": 14004 }, { "epoch": 0.5814249165797435, "grad_norm": 2.3738620281219482, "learning_rate": 3.932575371530655e-06, "loss": 0.559, "step": 14005 }, { "epoch": 0.5814664321039549, "grad_norm": 2.263139009475708, "learning_rate": 3.931918565675245e-06, "loss": 0.562, "step": 14006 }, { "epoch": 0.5815079476281662, "grad_norm": 2.573117733001709, "learning_rate": 3.931261779131058e-06, "loss": 0.5057, "step": 14007 }, { "epoch": 0.5815494631523775, "grad_norm": 2.592531681060791, "learning_rate": 3.930605011909964e-06, "loss": 0.5712, "step": 14008 }, { "epoch": 0.5815909786765889, "grad_norm": 2.1629669666290283, "learning_rate": 3.92994826402384e-06, "loss": 0.4564, "step": 14009 }, { "epoch": 0.5816324942008002, "grad_norm": 2.231890916824341, "learning_rate": 3.9292915354845606e-06, "loss": 0.535, "step": 14010 }, { "epoch": 0.5816740097250116, "grad_norm": 2.0948238372802734, "learning_rate": 3.9286348263039975e-06, "loss": 0.5877, "step": 14011 }, { "epoch": 0.5817155252492229, "grad_norm": 2.236314058303833, "learning_rate": 3.927978136494027e-06, "loss": 0.4429, "step": 14012 }, { "epoch": 0.5817570407734343, "grad_norm": 2.5475621223449707, "learning_rate": 3.92732146606652e-06, "loss": 0.647, "step": 14013 }, { "epoch": 0.5817985562976455, "grad_norm": 2.2794926166534424, "learning_rate": 3.926664815033349e-06, "loss": 0.5801, "step": 14014 }, { "epoch": 0.5818400718218569, "grad_norm": 2.089139223098755, "learning_rate": 3.9260081834063885e-06, "loss": 0.3532, "step": 14015 }, { "epoch": 0.5818815873460682, "grad_norm": 2.3754658699035645, "learning_rate": 3.9253515711975115e-06, "loss": 0.5327, "step": 14016 }, { "epoch": 0.5819231028702796, "grad_norm": 2.430454969406128, "learning_rate": 3.924694978418586e-06, "loss": 0.6015, "step": 14017 }, { "epoch": 0.5819646183944909, "grad_norm": 2.448110580444336, "learning_rate": 3.924038405081484e-06, "loss": 0.5033, "step": 14018 }, { "epoch": 0.5820061339187023, "grad_norm": 2.492859363555908, "learning_rate": 3.9233818511980786e-06, "loss": 0.5042, "step": 14019 }, { "epoch": 0.5820476494429135, "grad_norm": 2.482485771179199, "learning_rate": 3.9227253167802415e-06, "loss": 0.6364, "step": 14020 }, { "epoch": 0.5820891649671249, "grad_norm": 2.3728277683258057, "learning_rate": 3.922068801839839e-06, "loss": 0.4905, "step": 14021 }, { "epoch": 0.5821306804913362, "grad_norm": 2.2666401863098145, "learning_rate": 3.921412306388744e-06, "loss": 0.3673, "step": 14022 }, { "epoch": 0.5821721960155476, "grad_norm": 2.3995590209960938, "learning_rate": 3.920755830438826e-06, "loss": 0.4589, "step": 14023 }, { "epoch": 0.5822137115397589, "grad_norm": 2.678311586380005, "learning_rate": 3.920099374001952e-06, "loss": 0.4005, "step": 14024 }, { "epoch": 0.5822552270639703, "grad_norm": 2.2570993900299072, "learning_rate": 3.919442937089996e-06, "loss": 0.4506, "step": 14025 }, { "epoch": 0.5822967425881815, "grad_norm": 2.2716684341430664, "learning_rate": 3.918786519714819e-06, "loss": 0.5064, "step": 14026 }, { "epoch": 0.5823382581123929, "grad_norm": 2.525728464126587, "learning_rate": 3.918130121888295e-06, "loss": 0.5732, "step": 14027 }, { "epoch": 0.5823797736366042, "grad_norm": 2.317976951599121, "learning_rate": 3.91747374362229e-06, "loss": 0.5738, "step": 14028 }, { "epoch": 0.5824212891608156, "grad_norm": 2.3694679737091064, "learning_rate": 3.916817384928673e-06, "loss": 0.5664, "step": 14029 }, { "epoch": 0.5824628046850269, "grad_norm": 2.204458713531494, "learning_rate": 3.916161045819309e-06, "loss": 0.5636, "step": 14030 }, { "epoch": 0.5825043202092383, "grad_norm": 2.1926684379577637, "learning_rate": 3.915504726306066e-06, "loss": 0.5333, "step": 14031 }, { "epoch": 0.5825458357334495, "grad_norm": 2.2600224018096924, "learning_rate": 3.914848426400808e-06, "loss": 0.5533, "step": 14032 }, { "epoch": 0.5825873512576609, "grad_norm": 2.3885748386383057, "learning_rate": 3.914192146115407e-06, "loss": 0.5971, "step": 14033 }, { "epoch": 0.5826288667818722, "grad_norm": 2.195481538772583, "learning_rate": 3.913535885461722e-06, "loss": 0.4725, "step": 14034 }, { "epoch": 0.5826703823060836, "grad_norm": 2.422875165939331, "learning_rate": 3.912879644451622e-06, "loss": 0.5606, "step": 14035 }, { "epoch": 0.5827118978302949, "grad_norm": 2.5797977447509766, "learning_rate": 3.912223423096972e-06, "loss": 0.5747, "step": 14036 }, { "epoch": 0.5827534133545063, "grad_norm": 2.05013108253479, "learning_rate": 3.911567221409636e-06, "loss": 0.5351, "step": 14037 }, { "epoch": 0.5827949288787175, "grad_norm": 2.230433225631714, "learning_rate": 3.910911039401479e-06, "loss": 0.5254, "step": 14038 }, { "epoch": 0.5828364444029289, "grad_norm": 2.194201707839966, "learning_rate": 3.910254877084363e-06, "loss": 0.4086, "step": 14039 }, { "epoch": 0.5828779599271403, "grad_norm": 2.400003671646118, "learning_rate": 3.909598734470152e-06, "loss": 0.6199, "step": 14040 }, { "epoch": 0.5829194754513516, "grad_norm": 2.12618088722229, "learning_rate": 3.908942611570712e-06, "loss": 0.3882, "step": 14041 }, { "epoch": 0.582960990975563, "grad_norm": 2.4231181144714355, "learning_rate": 3.908286508397905e-06, "loss": 0.5516, "step": 14042 }, { "epoch": 0.5830025064997743, "grad_norm": 2.6567647457122803, "learning_rate": 3.9076304249635905e-06, "loss": 0.5923, "step": 14043 }, { "epoch": 0.5830440220239856, "grad_norm": 2.340437412261963, "learning_rate": 3.906974361279633e-06, "loss": 0.6499, "step": 14044 }, { "epoch": 0.5830855375481969, "grad_norm": 2.528975009918213, "learning_rate": 3.906318317357894e-06, "loss": 0.5156, "step": 14045 }, { "epoch": 0.5831270530724083, "grad_norm": 2.6857314109802246, "learning_rate": 3.905662293210237e-06, "loss": 0.5048, "step": 14046 }, { "epoch": 0.5831685685966196, "grad_norm": 2.1244611740112305, "learning_rate": 3.905006288848519e-06, "loss": 0.5481, "step": 14047 }, { "epoch": 0.583210084120831, "grad_norm": 2.3727164268493652, "learning_rate": 3.9043503042846035e-06, "loss": 0.5811, "step": 14048 }, { "epoch": 0.5832515996450423, "grad_norm": 2.457012891769409, "learning_rate": 3.9036943395303505e-06, "loss": 0.5975, "step": 14049 }, { "epoch": 0.5832931151692536, "grad_norm": 2.1675751209259033, "learning_rate": 3.903038394597619e-06, "loss": 0.4648, "step": 14050 }, { "epoch": 0.5833346306934649, "grad_norm": 2.927762031555176, "learning_rate": 3.902382469498272e-06, "loss": 0.5502, "step": 14051 }, { "epoch": 0.5833761462176763, "grad_norm": 2.8283851146698, "learning_rate": 3.9017265642441645e-06, "loss": 0.5214, "step": 14052 }, { "epoch": 0.5834176617418876, "grad_norm": 2.513009786605835, "learning_rate": 3.901070678847156e-06, "loss": 0.4986, "step": 14053 }, { "epoch": 0.583459177266099, "grad_norm": 2.2614126205444336, "learning_rate": 3.900414813319108e-06, "loss": 0.5014, "step": 14054 }, { "epoch": 0.5835006927903102, "grad_norm": 2.2285823822021484, "learning_rate": 3.899758967671879e-06, "loss": 0.458, "step": 14055 }, { "epoch": 0.5835422083145216, "grad_norm": 2.1940219402313232, "learning_rate": 3.899103141917321e-06, "loss": 0.5519, "step": 14056 }, { "epoch": 0.5835837238387329, "grad_norm": 2.667780876159668, "learning_rate": 3.898447336067297e-06, "loss": 0.5966, "step": 14057 }, { "epoch": 0.5836252393629443, "grad_norm": 1.6908729076385498, "learning_rate": 3.897791550133663e-06, "loss": 0.4233, "step": 14058 }, { "epoch": 0.5836667548871556, "grad_norm": 1.887117624282837, "learning_rate": 3.897135784128276e-06, "loss": 0.431, "step": 14059 }, { "epoch": 0.583708270411367, "grad_norm": 2.6811108589172363, "learning_rate": 3.896480038062991e-06, "loss": 0.4347, "step": 14060 }, { "epoch": 0.5837497859355782, "grad_norm": 2.5298938751220703, "learning_rate": 3.895824311949665e-06, "loss": 0.6399, "step": 14061 }, { "epoch": 0.5837913014597896, "grad_norm": 2.1469533443450928, "learning_rate": 3.895168605800155e-06, "loss": 0.4023, "step": 14062 }, { "epoch": 0.5838328169840009, "grad_norm": 2.347759246826172, "learning_rate": 3.894512919626313e-06, "loss": 0.4797, "step": 14063 }, { "epoch": 0.5838743325082123, "grad_norm": 2.602910280227661, "learning_rate": 3.89385725344e-06, "loss": 0.5703, "step": 14064 }, { "epoch": 0.5839158480324236, "grad_norm": 2.108119249343872, "learning_rate": 3.893201607253063e-06, "loss": 0.4599, "step": 14065 }, { "epoch": 0.583957363556635, "grad_norm": 2.288555145263672, "learning_rate": 3.89254598107736e-06, "loss": 0.6493, "step": 14066 }, { "epoch": 0.5839988790808462, "grad_norm": 1.964287281036377, "learning_rate": 3.891890374924745e-06, "loss": 0.4112, "step": 14067 }, { "epoch": 0.5840403946050576, "grad_norm": 2.6904525756835938, "learning_rate": 3.891234788807073e-06, "loss": 0.7096, "step": 14068 }, { "epoch": 0.5840819101292689, "grad_norm": 2.08577036857605, "learning_rate": 3.890579222736194e-06, "loss": 0.4816, "step": 14069 }, { "epoch": 0.5841234256534803, "grad_norm": 2.3948874473571777, "learning_rate": 3.889923676723963e-06, "loss": 0.4366, "step": 14070 }, { "epoch": 0.5841649411776917, "grad_norm": 2.462686777114868, "learning_rate": 3.889268150782232e-06, "loss": 0.5392, "step": 14071 }, { "epoch": 0.584206456701903, "grad_norm": 2.0333542823791504, "learning_rate": 3.888612644922854e-06, "loss": 0.5963, "step": 14072 }, { "epoch": 0.5842479722261144, "grad_norm": 2.9441566467285156, "learning_rate": 3.887957159157678e-06, "loss": 0.5481, "step": 14073 }, { "epoch": 0.5842894877503256, "grad_norm": 2.0400431156158447, "learning_rate": 3.887301693498558e-06, "loss": 0.5832, "step": 14074 }, { "epoch": 0.584331003274537, "grad_norm": 2.3723349571228027, "learning_rate": 3.886646247957344e-06, "loss": 0.4437, "step": 14075 }, { "epoch": 0.5843725187987483, "grad_norm": 2.7251951694488525, "learning_rate": 3.885990822545886e-06, "loss": 0.5478, "step": 14076 }, { "epoch": 0.5844140343229597, "grad_norm": 2.4650278091430664, "learning_rate": 3.885335417276037e-06, "loss": 0.5734, "step": 14077 }, { "epoch": 0.584455549847171, "grad_norm": 2.163022041320801, "learning_rate": 3.884680032159643e-06, "loss": 0.4207, "step": 14078 }, { "epoch": 0.5844970653713824, "grad_norm": 2.074080228805542, "learning_rate": 3.884024667208556e-06, "loss": 0.5205, "step": 14079 }, { "epoch": 0.5845385808955936, "grad_norm": 2.20550537109375, "learning_rate": 3.883369322434624e-06, "loss": 0.4976, "step": 14080 }, { "epoch": 0.584580096419805, "grad_norm": 2.1862690448760986, "learning_rate": 3.8827139978496985e-06, "loss": 0.5234, "step": 14081 }, { "epoch": 0.5846216119440163, "grad_norm": 2.286720037460327, "learning_rate": 3.882058693465624e-06, "loss": 0.5132, "step": 14082 }, { "epoch": 0.5846631274682277, "grad_norm": 2.6563777923583984, "learning_rate": 3.8814034092942506e-06, "loss": 0.497, "step": 14083 }, { "epoch": 0.584704642992439, "grad_norm": 2.5572116374969482, "learning_rate": 3.880748145347426e-06, "loss": 0.4776, "step": 14084 }, { "epoch": 0.5847461585166503, "grad_norm": 2.080674409866333, "learning_rate": 3.880092901637e-06, "loss": 0.3268, "step": 14085 }, { "epoch": 0.5847876740408616, "grad_norm": 2.262901544570923, "learning_rate": 3.879437678174814e-06, "loss": 0.5792, "step": 14086 }, { "epoch": 0.584829189565073, "grad_norm": 2.221829414367676, "learning_rate": 3.878782474972719e-06, "loss": 0.4344, "step": 14087 }, { "epoch": 0.5848707050892843, "grad_norm": 2.0324296951293945, "learning_rate": 3.8781272920425605e-06, "loss": 0.5073, "step": 14088 }, { "epoch": 0.5849122206134957, "grad_norm": 2.5215470790863037, "learning_rate": 3.877472129396183e-06, "loss": 0.4417, "step": 14089 }, { "epoch": 0.584953736137707, "grad_norm": 2.3628597259521484, "learning_rate": 3.876816987045435e-06, "loss": 0.6972, "step": 14090 }, { "epoch": 0.5849952516619183, "grad_norm": 2.4112789630889893, "learning_rate": 3.876161865002157e-06, "loss": 0.5135, "step": 14091 }, { "epoch": 0.5850367671861296, "grad_norm": 2.4412243366241455, "learning_rate": 3.8755067632781975e-06, "loss": 0.5026, "step": 14092 }, { "epoch": 0.585078282710341, "grad_norm": 2.059558391571045, "learning_rate": 3.874851681885399e-06, "loss": 0.4395, "step": 14093 }, { "epoch": 0.5851197982345523, "grad_norm": 2.6615829467773438, "learning_rate": 3.874196620835608e-06, "loss": 0.6067, "step": 14094 }, { "epoch": 0.5851613137587637, "grad_norm": 2.464600086212158, "learning_rate": 3.873541580140666e-06, "loss": 0.5129, "step": 14095 }, { "epoch": 0.585202829282975, "grad_norm": 2.785609722137451, "learning_rate": 3.872886559812416e-06, "loss": 0.5098, "step": 14096 }, { "epoch": 0.5852443448071863, "grad_norm": 2.9078712463378906, "learning_rate": 3.872231559862702e-06, "loss": 0.5291, "step": 14097 }, { "epoch": 0.5852858603313976, "grad_norm": 3.2845730781555176, "learning_rate": 3.871576580303369e-06, "loss": 0.5181, "step": 14098 }, { "epoch": 0.585327375855609, "grad_norm": 2.9281044006347656, "learning_rate": 3.870921621146254e-06, "loss": 0.5664, "step": 14099 }, { "epoch": 0.5853688913798203, "grad_norm": 2.07137131690979, "learning_rate": 3.870266682403201e-06, "loss": 0.5307, "step": 14100 }, { "epoch": 0.5854104069040317, "grad_norm": 2.4102306365966797, "learning_rate": 3.8696117640860536e-06, "loss": 0.6075, "step": 14101 }, { "epoch": 0.5854519224282431, "grad_norm": 2.8291683197021484, "learning_rate": 3.868956866206652e-06, "loss": 0.4118, "step": 14102 }, { "epoch": 0.5854934379524543, "grad_norm": 2.4363245964050293, "learning_rate": 3.868301988776835e-06, "loss": 0.4902, "step": 14103 }, { "epoch": 0.5855349534766657, "grad_norm": 2.250688314437866, "learning_rate": 3.867647131808444e-06, "loss": 0.542, "step": 14104 }, { "epoch": 0.585576469000877, "grad_norm": 2.7758262157440186, "learning_rate": 3.866992295313319e-06, "loss": 0.4759, "step": 14105 }, { "epoch": 0.5856179845250884, "grad_norm": 2.0397727489471436, "learning_rate": 3.8663374793033e-06, "loss": 0.619, "step": 14106 }, { "epoch": 0.5856595000492997, "grad_norm": 2.7820231914520264, "learning_rate": 3.8656826837902285e-06, "loss": 0.6036, "step": 14107 }, { "epoch": 0.5857010155735111, "grad_norm": 2.2427306175231934, "learning_rate": 3.8650279087859384e-06, "loss": 0.5274, "step": 14108 }, { "epoch": 0.5857425310977223, "grad_norm": 2.217341423034668, "learning_rate": 3.864373154302271e-06, "loss": 0.4323, "step": 14109 }, { "epoch": 0.5857840466219337, "grad_norm": 2.4665539264678955, "learning_rate": 3.863718420351065e-06, "loss": 0.5385, "step": 14110 }, { "epoch": 0.585825562146145, "grad_norm": 2.1104328632354736, "learning_rate": 3.863063706944159e-06, "loss": 0.5021, "step": 14111 }, { "epoch": 0.5858670776703564, "grad_norm": 1.8694441318511963, "learning_rate": 3.862409014093386e-06, "loss": 0.4406, "step": 14112 }, { "epoch": 0.5859085931945677, "grad_norm": 2.8732082843780518, "learning_rate": 3.861754341810586e-06, "loss": 0.4685, "step": 14113 }, { "epoch": 0.5859501087187791, "grad_norm": 2.498624086380005, "learning_rate": 3.861099690107598e-06, "loss": 0.4898, "step": 14114 }, { "epoch": 0.5859916242429903, "grad_norm": 2.2227988243103027, "learning_rate": 3.860445058996255e-06, "loss": 0.5296, "step": 14115 }, { "epoch": 0.5860331397672017, "grad_norm": 2.420576572418213, "learning_rate": 3.859790448488394e-06, "loss": 0.4961, "step": 14116 }, { "epoch": 0.586074655291413, "grad_norm": 2.8369343280792236, "learning_rate": 3.859135858595849e-06, "loss": 0.4691, "step": 14117 }, { "epoch": 0.5861161708156244, "grad_norm": 2.066189765930176, "learning_rate": 3.858481289330457e-06, "loss": 0.498, "step": 14118 }, { "epoch": 0.5861576863398357, "grad_norm": 2.0665276050567627, "learning_rate": 3.857826740704052e-06, "loss": 0.5598, "step": 14119 }, { "epoch": 0.5861992018640471, "grad_norm": 2.4073002338409424, "learning_rate": 3.857172212728471e-06, "loss": 0.5781, "step": 14120 }, { "epoch": 0.5862407173882583, "grad_norm": 2.224850654602051, "learning_rate": 3.856517705415543e-06, "loss": 0.4671, "step": 14121 }, { "epoch": 0.5862822329124697, "grad_norm": 2.914689540863037, "learning_rate": 3.855863218777105e-06, "loss": 0.5041, "step": 14122 }, { "epoch": 0.586323748436681, "grad_norm": 2.242033004760742, "learning_rate": 3.85520875282499e-06, "loss": 0.4746, "step": 14123 }, { "epoch": 0.5863652639608924, "grad_norm": 3.7128210067749023, "learning_rate": 3.854554307571032e-06, "loss": 0.6763, "step": 14124 }, { "epoch": 0.5864067794851037, "grad_norm": 2.4007728099823, "learning_rate": 3.853899883027061e-06, "loss": 0.5176, "step": 14125 }, { "epoch": 0.5864482950093151, "grad_norm": 2.185900926589966, "learning_rate": 3.85324547920491e-06, "loss": 0.5239, "step": 14126 }, { "epoch": 0.5864898105335263, "grad_norm": 1.946893334388733, "learning_rate": 3.852591096116413e-06, "loss": 0.4301, "step": 14127 }, { "epoch": 0.5865313260577377, "grad_norm": 2.6799702644348145, "learning_rate": 3.851936733773399e-06, "loss": 0.4017, "step": 14128 }, { "epoch": 0.586572841581949, "grad_norm": 2.7306437492370605, "learning_rate": 3.8512823921877e-06, "loss": 0.5944, "step": 14129 }, { "epoch": 0.5866143571061604, "grad_norm": 2.3490169048309326, "learning_rate": 3.850628071371145e-06, "loss": 0.5012, "step": 14130 }, { "epoch": 0.5866558726303717, "grad_norm": 2.153499126434326, "learning_rate": 3.849973771335567e-06, "loss": 0.44, "step": 14131 }, { "epoch": 0.586697388154583, "grad_norm": 2.561669111251831, "learning_rate": 3.849319492092794e-06, "loss": 0.4919, "step": 14132 }, { "epoch": 0.5867389036787944, "grad_norm": 2.5773720741271973, "learning_rate": 3.848665233654658e-06, "loss": 0.5368, "step": 14133 }, { "epoch": 0.5867804192030057, "grad_norm": 2.8943495750427246, "learning_rate": 3.848010996032984e-06, "loss": 0.4839, "step": 14134 }, { "epoch": 0.5868219347272171, "grad_norm": 2.3572824001312256, "learning_rate": 3.847356779239603e-06, "loss": 0.4914, "step": 14135 }, { "epoch": 0.5868634502514284, "grad_norm": 2.463698387145996, "learning_rate": 3.846702583286345e-06, "loss": 0.661, "step": 14136 }, { "epoch": 0.5869049657756398, "grad_norm": 2.9359171390533447, "learning_rate": 3.846048408185039e-06, "loss": 0.5427, "step": 14137 }, { "epoch": 0.586946481299851, "grad_norm": 2.872459650039673, "learning_rate": 3.845394253947507e-06, "loss": 0.5056, "step": 14138 }, { "epoch": 0.5869879968240624, "grad_norm": 2.6170058250427246, "learning_rate": 3.8447401205855815e-06, "loss": 0.4659, "step": 14139 }, { "epoch": 0.5870295123482737, "grad_norm": 2.294132947921753, "learning_rate": 3.844086008111086e-06, "loss": 0.553, "step": 14140 }, { "epoch": 0.5870710278724851, "grad_norm": 2.7012083530426025, "learning_rate": 3.8434319165358505e-06, "loss": 0.5742, "step": 14141 }, { "epoch": 0.5871125433966964, "grad_norm": 2.6496198177337646, "learning_rate": 3.842777845871698e-06, "loss": 0.5653, "step": 14142 }, { "epoch": 0.5871540589209078, "grad_norm": 2.991201162338257, "learning_rate": 3.842123796130455e-06, "loss": 0.4105, "step": 14143 }, { "epoch": 0.587195574445119, "grad_norm": 2.6823394298553467, "learning_rate": 3.841469767323948e-06, "loss": 0.4849, "step": 14144 }, { "epoch": 0.5872370899693304, "grad_norm": 2.3577804565429688, "learning_rate": 3.840815759464002e-06, "loss": 0.5199, "step": 14145 }, { "epoch": 0.5872786054935417, "grad_norm": 2.4486136436462402, "learning_rate": 3.8401617725624425e-06, "loss": 0.4516, "step": 14146 }, { "epoch": 0.5873201210177531, "grad_norm": 2.2023253440856934, "learning_rate": 3.83950780663109e-06, "loss": 0.446, "step": 14147 }, { "epoch": 0.5873616365419644, "grad_norm": 2.2463674545288086, "learning_rate": 3.838853861681772e-06, "loss": 0.462, "step": 14148 }, { "epoch": 0.5874031520661758, "grad_norm": 2.796952724456787, "learning_rate": 3.83819993772631e-06, "loss": 0.4398, "step": 14149 }, { "epoch": 0.587444667590387, "grad_norm": 2.142584800720215, "learning_rate": 3.83754603477653e-06, "loss": 0.5536, "step": 14150 }, { "epoch": 0.5874861831145984, "grad_norm": 2.4181995391845703, "learning_rate": 3.836892152844251e-06, "loss": 0.5487, "step": 14151 }, { "epoch": 0.5875276986388097, "grad_norm": 2.0433425903320312, "learning_rate": 3.836238291941298e-06, "loss": 0.407, "step": 14152 }, { "epoch": 0.5875692141630211, "grad_norm": 2.3982601165771484, "learning_rate": 3.835584452079491e-06, "loss": 0.5016, "step": 14153 }, { "epoch": 0.5876107296872324, "grad_norm": 2.305063486099243, "learning_rate": 3.834930633270654e-06, "loss": 0.3613, "step": 14154 }, { "epoch": 0.5876522452114438, "grad_norm": 2.440892219543457, "learning_rate": 3.834276835526607e-06, "loss": 0.725, "step": 14155 }, { "epoch": 0.587693760735655, "grad_norm": 2.2126376628875732, "learning_rate": 3.83362305885917e-06, "loss": 0.3329, "step": 14156 }, { "epoch": 0.5877352762598664, "grad_norm": 2.0406830310821533, "learning_rate": 3.8329693032801635e-06, "loss": 0.4472, "step": 14157 }, { "epoch": 0.5877767917840777, "grad_norm": 2.214580535888672, "learning_rate": 3.8323155688014095e-06, "loss": 0.5556, "step": 14158 }, { "epoch": 0.5878183073082891, "grad_norm": 2.3595638275146484, "learning_rate": 3.831661855434727e-06, "loss": 0.6124, "step": 14159 }, { "epoch": 0.5878598228325004, "grad_norm": 2.695716381072998, "learning_rate": 3.831008163191935e-06, "loss": 0.5465, "step": 14160 }, { "epoch": 0.5879013383567118, "grad_norm": 2.750195264816284, "learning_rate": 3.83035449208485e-06, "loss": 0.6521, "step": 14161 }, { "epoch": 0.587942853880923, "grad_norm": 2.210232973098755, "learning_rate": 3.829700842125294e-06, "loss": 0.5192, "step": 14162 }, { "epoch": 0.5879843694051344, "grad_norm": 2.0357067584991455, "learning_rate": 3.829047213325086e-06, "loss": 0.5262, "step": 14163 }, { "epoch": 0.5880258849293458, "grad_norm": 2.400815010070801, "learning_rate": 3.828393605696041e-06, "loss": 0.6884, "step": 14164 }, { "epoch": 0.5880674004535571, "grad_norm": 2.459627866744995, "learning_rate": 3.827740019249977e-06, "loss": 0.5269, "step": 14165 }, { "epoch": 0.5881089159777685, "grad_norm": 2.7499167919158936, "learning_rate": 3.82708645399871e-06, "loss": 0.4541, "step": 14166 }, { "epoch": 0.5881504315019798, "grad_norm": 2.1985585689544678, "learning_rate": 3.82643290995406e-06, "loss": 0.4771, "step": 14167 }, { "epoch": 0.5881919470261912, "grad_norm": 2.4955780506134033, "learning_rate": 3.8257793871278416e-06, "loss": 0.5793, "step": 14168 }, { "epoch": 0.5882334625504024, "grad_norm": 2.413813352584839, "learning_rate": 3.825125885531869e-06, "loss": 0.6519, "step": 14169 }, { "epoch": 0.5882749780746138, "grad_norm": 2.9943621158599854, "learning_rate": 3.824472405177958e-06, "loss": 0.5544, "step": 14170 }, { "epoch": 0.5883164935988251, "grad_norm": 2.375507354736328, "learning_rate": 3.823818946077926e-06, "loss": 0.548, "step": 14171 }, { "epoch": 0.5883580091230365, "grad_norm": 2.106548547744751, "learning_rate": 3.823165508243588e-06, "loss": 0.4708, "step": 14172 }, { "epoch": 0.5883995246472478, "grad_norm": 2.3530569076538086, "learning_rate": 3.822512091686754e-06, "loss": 0.6381, "step": 14173 }, { "epoch": 0.5884410401714592, "grad_norm": 2.748680591583252, "learning_rate": 3.821858696419241e-06, "loss": 0.5576, "step": 14174 }, { "epoch": 0.5884825556956704, "grad_norm": 2.221694231033325, "learning_rate": 3.821205322452863e-06, "loss": 0.5353, "step": 14175 }, { "epoch": 0.5885240712198818, "grad_norm": 2.3473000526428223, "learning_rate": 3.820551969799435e-06, "loss": 0.5392, "step": 14176 }, { "epoch": 0.5885655867440931, "grad_norm": 2.2428433895111084, "learning_rate": 3.819898638470765e-06, "loss": 0.57, "step": 14177 }, { "epoch": 0.5886071022683045, "grad_norm": 1.9924037456512451, "learning_rate": 3.8192453284786675e-06, "loss": 0.4691, "step": 14178 }, { "epoch": 0.5886486177925158, "grad_norm": 1.8876184225082397, "learning_rate": 3.818592039834955e-06, "loss": 0.4382, "step": 14179 }, { "epoch": 0.5886901333167271, "grad_norm": 2.137720823287964, "learning_rate": 3.8179387725514396e-06, "loss": 0.4256, "step": 14180 }, { "epoch": 0.5887316488409384, "grad_norm": 2.713742733001709, "learning_rate": 3.817285526639933e-06, "loss": 0.5241, "step": 14181 }, { "epoch": 0.5887731643651498, "grad_norm": 2.4545087814331055, "learning_rate": 3.816632302112242e-06, "loss": 0.4448, "step": 14182 }, { "epoch": 0.5888146798893611, "grad_norm": 2.5715785026550293, "learning_rate": 3.815979098980181e-06, "loss": 0.5417, "step": 14183 }, { "epoch": 0.5888561954135725, "grad_norm": 2.5664234161376953, "learning_rate": 3.815325917255559e-06, "loss": 0.4679, "step": 14184 }, { "epoch": 0.5888977109377838, "grad_norm": 2.27793550491333, "learning_rate": 3.8146727569501884e-06, "loss": 0.4494, "step": 14185 }, { "epoch": 0.5889392264619951, "grad_norm": 2.559661865234375, "learning_rate": 3.814019618075874e-06, "loss": 0.6536, "step": 14186 }, { "epoch": 0.5889807419862064, "grad_norm": 2.5855467319488525, "learning_rate": 3.813366500644426e-06, "loss": 0.4606, "step": 14187 }, { "epoch": 0.5890222575104178, "grad_norm": 2.0992941856384277, "learning_rate": 3.812713404667654e-06, "loss": 0.5468, "step": 14188 }, { "epoch": 0.5890637730346291, "grad_norm": 2.2137274742126465, "learning_rate": 3.812060330157368e-06, "loss": 0.4582, "step": 14189 }, { "epoch": 0.5891052885588405, "grad_norm": 2.194012403488159, "learning_rate": 3.811407277125372e-06, "loss": 0.3905, "step": 14190 }, { "epoch": 0.5891468040830518, "grad_norm": 3.182389974594116, "learning_rate": 3.810754245583475e-06, "loss": 0.4169, "step": 14191 }, { "epoch": 0.5891883196072631, "grad_norm": 2.1288630962371826, "learning_rate": 3.810101235543484e-06, "loss": 0.5882, "step": 14192 }, { "epoch": 0.5892298351314744, "grad_norm": 2.3693552017211914, "learning_rate": 3.8094482470172063e-06, "loss": 0.5679, "step": 14193 }, { "epoch": 0.5892713506556858, "grad_norm": 2.094115972518921, "learning_rate": 3.8087952800164474e-06, "loss": 0.5247, "step": 14194 }, { "epoch": 0.5893128661798972, "grad_norm": 2.7070631980895996, "learning_rate": 3.808142334553012e-06, "loss": 0.4243, "step": 14195 }, { "epoch": 0.5893543817041085, "grad_norm": 1.916878581047058, "learning_rate": 3.8074894106387068e-06, "loss": 0.5045, "step": 14196 }, { "epoch": 0.5893958972283199, "grad_norm": 2.3646810054779053, "learning_rate": 3.8068365082853366e-06, "loss": 0.5796, "step": 14197 }, { "epoch": 0.5894374127525311, "grad_norm": 2.4629201889038086, "learning_rate": 3.806183627504709e-06, "loss": 0.4897, "step": 14198 }, { "epoch": 0.5894789282767425, "grad_norm": 2.2548606395721436, "learning_rate": 3.805530768308623e-06, "loss": 0.5777, "step": 14199 }, { "epoch": 0.5895204438009538, "grad_norm": 2.2755508422851562, "learning_rate": 3.8048779307088844e-06, "loss": 0.5491, "step": 14200 }, { "epoch": 0.5895619593251652, "grad_norm": 2.470663547515869, "learning_rate": 3.804225114717298e-06, "loss": 0.4863, "step": 14201 }, { "epoch": 0.5896034748493765, "grad_norm": 2.3438777923583984, "learning_rate": 3.803572320345668e-06, "loss": 0.5889, "step": 14202 }, { "epoch": 0.5896449903735879, "grad_norm": 2.3658745288848877, "learning_rate": 3.8029195476057932e-06, "loss": 0.5131, "step": 14203 }, { "epoch": 0.5896865058977991, "grad_norm": 2.486358404159546, "learning_rate": 3.8022667965094796e-06, "loss": 0.6849, "step": 14204 }, { "epoch": 0.5897280214220105, "grad_norm": 2.6202924251556396, "learning_rate": 3.8016140670685263e-06, "loss": 0.3909, "step": 14205 }, { "epoch": 0.5897695369462218, "grad_norm": 3.1568503379821777, "learning_rate": 3.8009613592947368e-06, "loss": 0.5745, "step": 14206 }, { "epoch": 0.5898110524704332, "grad_norm": 2.1759033203125, "learning_rate": 3.8003086731999115e-06, "loss": 0.5686, "step": 14207 }, { "epoch": 0.5898525679946445, "grad_norm": 2.2239346504211426, "learning_rate": 3.79965600879585e-06, "loss": 0.5683, "step": 14208 }, { "epoch": 0.5898940835188559, "grad_norm": 2.291229248046875, "learning_rate": 3.799003366094354e-06, "loss": 0.6647, "step": 14209 }, { "epoch": 0.5899355990430671, "grad_norm": 2.2249934673309326, "learning_rate": 3.7983507451072236e-06, "loss": 0.613, "step": 14210 }, { "epoch": 0.5899771145672785, "grad_norm": 2.9812417030334473, "learning_rate": 3.7976981458462605e-06, "loss": 0.4756, "step": 14211 }, { "epoch": 0.5900186300914898, "grad_norm": 1.9797961711883545, "learning_rate": 3.7970455683232586e-06, "loss": 0.5022, "step": 14212 }, { "epoch": 0.5900601456157012, "grad_norm": 2.3568031787872314, "learning_rate": 3.79639301255002e-06, "loss": 0.6108, "step": 14213 }, { "epoch": 0.5901016611399125, "grad_norm": 2.2440342903137207, "learning_rate": 3.7957404785383424e-06, "loss": 0.4902, "step": 14214 }, { "epoch": 0.5901431766641239, "grad_norm": 2.3319456577301025, "learning_rate": 3.7950879663000263e-06, "loss": 0.5859, "step": 14215 }, { "epoch": 0.5901846921883351, "grad_norm": 2.5669617652893066, "learning_rate": 3.794435475846865e-06, "loss": 0.5272, "step": 14216 }, { "epoch": 0.5902262077125465, "grad_norm": 2.1564037799835205, "learning_rate": 3.7937830071906585e-06, "loss": 0.5039, "step": 14217 }, { "epoch": 0.5902677232367578, "grad_norm": 2.2367148399353027, "learning_rate": 3.793130560343202e-06, "loss": 0.4088, "step": 14218 }, { "epoch": 0.5903092387609692, "grad_norm": 2.9965338706970215, "learning_rate": 3.792478135316295e-06, "loss": 0.5061, "step": 14219 }, { "epoch": 0.5903507542851805, "grad_norm": 2.478692054748535, "learning_rate": 3.791825732121729e-06, "loss": 0.5024, "step": 14220 }, { "epoch": 0.5903922698093919, "grad_norm": 2.338318109512329, "learning_rate": 3.791173350771302e-06, "loss": 0.5975, "step": 14221 }, { "epoch": 0.5904337853336031, "grad_norm": 1.9060357809066772, "learning_rate": 3.79052099127681e-06, "loss": 0.4095, "step": 14222 }, { "epoch": 0.5904753008578145, "grad_norm": 2.4590773582458496, "learning_rate": 3.789868653650046e-06, "loss": 0.4669, "step": 14223 }, { "epoch": 0.5905168163820258, "grad_norm": 2.3959827423095703, "learning_rate": 3.7892163379028083e-06, "loss": 0.3443, "step": 14224 }, { "epoch": 0.5905583319062372, "grad_norm": 2.384448289871216, "learning_rate": 3.7885640440468863e-06, "loss": 0.5414, "step": 14225 }, { "epoch": 0.5905998474304486, "grad_norm": 2.664789915084839, "learning_rate": 3.787911772094075e-06, "loss": 0.5677, "step": 14226 }, { "epoch": 0.5906413629546599, "grad_norm": 2.1955666542053223, "learning_rate": 3.7872595220561685e-06, "loss": 0.6223, "step": 14227 }, { "epoch": 0.5906828784788712, "grad_norm": 2.2667856216430664, "learning_rate": 3.7866072939449618e-06, "loss": 0.4431, "step": 14228 }, { "epoch": 0.5907243940030825, "grad_norm": 1.8945252895355225, "learning_rate": 3.785955087772242e-06, "loss": 0.474, "step": 14229 }, { "epoch": 0.5907659095272939, "grad_norm": 2.514376163482666, "learning_rate": 3.785302903549806e-06, "loss": 0.5133, "step": 14230 }, { "epoch": 0.5908074250515052, "grad_norm": 2.2566802501678467, "learning_rate": 3.7846507412894425e-06, "loss": 0.4791, "step": 14231 }, { "epoch": 0.5908489405757166, "grad_norm": 2.572690725326538, "learning_rate": 3.783998601002946e-06, "loss": 0.542, "step": 14232 }, { "epoch": 0.5908904560999279, "grad_norm": 2.163810968399048, "learning_rate": 3.7833464827021028e-06, "loss": 0.4922, "step": 14233 }, { "epoch": 0.5909319716241392, "grad_norm": 2.6055901050567627, "learning_rate": 3.782694386398706e-06, "loss": 0.5593, "step": 14234 }, { "epoch": 0.5909734871483505, "grad_norm": 2.1613855361938477, "learning_rate": 3.7820423121045456e-06, "loss": 0.5118, "step": 14235 }, { "epoch": 0.5910150026725619, "grad_norm": 2.2833008766174316, "learning_rate": 3.7813902598314135e-06, "loss": 0.4806, "step": 14236 }, { "epoch": 0.5910565181967732, "grad_norm": 2.046261787414551, "learning_rate": 3.7807382295910945e-06, "loss": 0.5744, "step": 14237 }, { "epoch": 0.5910980337209846, "grad_norm": 3.132215976715088, "learning_rate": 3.7800862213953792e-06, "loss": 0.5677, "step": 14238 }, { "epoch": 0.5911395492451958, "grad_norm": 2.199307918548584, "learning_rate": 3.7794342352560576e-06, "loss": 0.4636, "step": 14239 }, { "epoch": 0.5911810647694072, "grad_norm": 2.5454959869384766, "learning_rate": 3.7787822711849165e-06, "loss": 0.5518, "step": 14240 }, { "epoch": 0.5912225802936185, "grad_norm": 2.2995193004608154, "learning_rate": 3.7781303291937453e-06, "loss": 0.6007, "step": 14241 }, { "epoch": 0.5912640958178299, "grad_norm": 2.397353410720825, "learning_rate": 3.777478409294329e-06, "loss": 0.4882, "step": 14242 }, { "epoch": 0.5913056113420412, "grad_norm": 2.356611490249634, "learning_rate": 3.7768265114984557e-06, "loss": 0.5433, "step": 14243 }, { "epoch": 0.5913471268662526, "grad_norm": 2.551050901412964, "learning_rate": 3.7761746358179106e-06, "loss": 0.602, "step": 14244 }, { "epoch": 0.5913886423904638, "grad_norm": 2.1889195442199707, "learning_rate": 3.7755227822644834e-06, "loss": 0.4328, "step": 14245 }, { "epoch": 0.5914301579146752, "grad_norm": 2.7407331466674805, "learning_rate": 3.7748709508499547e-06, "loss": 0.6742, "step": 14246 }, { "epoch": 0.5914716734388865, "grad_norm": 2.8484268188476562, "learning_rate": 3.774219141586113e-06, "loss": 0.6221, "step": 14247 }, { "epoch": 0.5915131889630979, "grad_norm": 2.4865851402282715, "learning_rate": 3.773567354484742e-06, "loss": 0.499, "step": 14248 }, { "epoch": 0.5915547044873092, "grad_norm": 2.6606879234313965, "learning_rate": 3.7729155895576285e-06, "loss": 0.4494, "step": 14249 }, { "epoch": 0.5915962200115206, "grad_norm": 2.1504759788513184, "learning_rate": 3.7722638468165535e-06, "loss": 0.4433, "step": 14250 }, { "epoch": 0.5916377355357318, "grad_norm": 2.9197824001312256, "learning_rate": 3.7716121262733013e-06, "loss": 0.4784, "step": 14251 }, { "epoch": 0.5916792510599432, "grad_norm": 2.207608222961426, "learning_rate": 3.770960427939656e-06, "loss": 0.5787, "step": 14252 }, { "epoch": 0.5917207665841545, "grad_norm": 2.0392794609069824, "learning_rate": 3.770308751827402e-06, "loss": 0.4004, "step": 14253 }, { "epoch": 0.5917622821083659, "grad_norm": 2.2555654048919678, "learning_rate": 3.76965709794832e-06, "loss": 0.5879, "step": 14254 }, { "epoch": 0.5918037976325772, "grad_norm": 2.5725789070129395, "learning_rate": 3.7690054663141907e-06, "loss": 0.4119, "step": 14255 }, { "epoch": 0.5918453131567886, "grad_norm": 2.6909921169281006, "learning_rate": 3.768353856936799e-06, "loss": 0.3201, "step": 14256 }, { "epoch": 0.591886828681, "grad_norm": 2.9577348232269287, "learning_rate": 3.767702269827923e-06, "loss": 0.5924, "step": 14257 }, { "epoch": 0.5919283442052112, "grad_norm": 2.69484281539917, "learning_rate": 3.7670507049993483e-06, "loss": 0.6121, "step": 14258 }, { "epoch": 0.5919698597294226, "grad_norm": 2.1665902137756348, "learning_rate": 3.7663991624628495e-06, "loss": 0.5579, "step": 14259 }, { "epoch": 0.5920113752536339, "grad_norm": 2.108330249786377, "learning_rate": 3.765747642230209e-06, "loss": 0.4452, "step": 14260 }, { "epoch": 0.5920528907778453, "grad_norm": 2.4573991298675537, "learning_rate": 3.765096144313207e-06, "loss": 0.604, "step": 14261 }, { "epoch": 0.5920944063020566, "grad_norm": 2.6200785636901855, "learning_rate": 3.764444668723625e-06, "loss": 0.5188, "step": 14262 }, { "epoch": 0.592135921826268, "grad_norm": 2.1482436656951904, "learning_rate": 3.7637932154732376e-06, "loss": 0.4424, "step": 14263 }, { "epoch": 0.5921774373504792, "grad_norm": 2.3610053062438965, "learning_rate": 3.763141784573825e-06, "loss": 0.5824, "step": 14264 }, { "epoch": 0.5922189528746906, "grad_norm": 2.5215630531311035, "learning_rate": 3.7624903760371655e-06, "loss": 0.6117, "step": 14265 }, { "epoch": 0.5922604683989019, "grad_norm": 2.010099172592163, "learning_rate": 3.7618389898750376e-06, "loss": 0.5432, "step": 14266 }, { "epoch": 0.5923019839231133, "grad_norm": 2.187992811203003, "learning_rate": 3.7611876260992175e-06, "loss": 0.5346, "step": 14267 }, { "epoch": 0.5923434994473246, "grad_norm": 2.503239393234253, "learning_rate": 3.7605362847214816e-06, "loss": 0.5649, "step": 14268 }, { "epoch": 0.592385014971536, "grad_norm": 2.5700509548187256, "learning_rate": 3.7598849657536075e-06, "loss": 0.4955, "step": 14269 }, { "epoch": 0.5924265304957472, "grad_norm": 2.285487413406372, "learning_rate": 3.75923366920737e-06, "loss": 0.546, "step": 14270 }, { "epoch": 0.5924680460199586, "grad_norm": 2.0041933059692383, "learning_rate": 3.758582395094547e-06, "loss": 0.5369, "step": 14271 }, { "epoch": 0.5925095615441699, "grad_norm": 2.659250020980835, "learning_rate": 3.7579311434269106e-06, "loss": 0.5367, "step": 14272 }, { "epoch": 0.5925510770683813, "grad_norm": 2.4709866046905518, "learning_rate": 3.7572799142162363e-06, "loss": 0.4371, "step": 14273 }, { "epoch": 0.5925925925925926, "grad_norm": 2.411449432373047, "learning_rate": 3.7566287074743e-06, "loss": 0.5677, "step": 14274 }, { "epoch": 0.592634108116804, "grad_norm": 2.2321975231170654, "learning_rate": 3.755977523212877e-06, "loss": 0.5305, "step": 14275 }, { "epoch": 0.5926756236410152, "grad_norm": 2.857545852661133, "learning_rate": 3.755326361443737e-06, "loss": 0.5961, "step": 14276 }, { "epoch": 0.5927171391652266, "grad_norm": 2.3374342918395996, "learning_rate": 3.7546752221786553e-06, "loss": 0.4641, "step": 14277 }, { "epoch": 0.5927586546894379, "grad_norm": 1.8651063442230225, "learning_rate": 3.7540241054294043e-06, "loss": 0.5252, "step": 14278 }, { "epoch": 0.5928001702136493, "grad_norm": 2.2782843112945557, "learning_rate": 3.7533730112077572e-06, "loss": 0.6081, "step": 14279 }, { "epoch": 0.5928416857378606, "grad_norm": 2.3836567401885986, "learning_rate": 3.7527219395254854e-06, "loss": 0.5596, "step": 14280 }, { "epoch": 0.592883201262072, "grad_norm": 2.189920663833618, "learning_rate": 3.7520708903943604e-06, "loss": 0.474, "step": 14281 }, { "epoch": 0.5929247167862832, "grad_norm": 2.1039986610412598, "learning_rate": 3.7514198638261535e-06, "loss": 0.4703, "step": 14282 }, { "epoch": 0.5929662323104946, "grad_norm": 2.250460386276245, "learning_rate": 3.7507688598326346e-06, "loss": 0.5604, "step": 14283 }, { "epoch": 0.5930077478347059, "grad_norm": 1.8152751922607422, "learning_rate": 3.750117878425577e-06, "loss": 0.4414, "step": 14284 }, { "epoch": 0.5930492633589173, "grad_norm": 2.766669988632202, "learning_rate": 3.749466919616746e-06, "loss": 0.6068, "step": 14285 }, { "epoch": 0.5930907788831286, "grad_norm": 2.5038199424743652, "learning_rate": 3.748815983417914e-06, "loss": 0.4376, "step": 14286 }, { "epoch": 0.5931322944073399, "grad_norm": 2.093179225921631, "learning_rate": 3.748165069840849e-06, "loss": 0.5381, "step": 14287 }, { "epoch": 0.5931738099315513, "grad_norm": 2.3223085403442383, "learning_rate": 3.747514178897323e-06, "loss": 0.5736, "step": 14288 }, { "epoch": 0.5932153254557626, "grad_norm": 3.28340220451355, "learning_rate": 3.7468633105990994e-06, "loss": 0.5987, "step": 14289 }, { "epoch": 0.593256840979974, "grad_norm": 2.7027909755706787, "learning_rate": 3.746212464957948e-06, "loss": 0.5651, "step": 14290 }, { "epoch": 0.5932983565041853, "grad_norm": 2.877310037612915, "learning_rate": 3.7455616419856366e-06, "loss": 0.5595, "step": 14291 }, { "epoch": 0.5933398720283967, "grad_norm": 2.4797542095184326, "learning_rate": 3.744910841693934e-06, "loss": 0.5902, "step": 14292 }, { "epoch": 0.5933813875526079, "grad_norm": 2.4692699909210205, "learning_rate": 3.7442600640946045e-06, "loss": 0.496, "step": 14293 }, { "epoch": 0.5934229030768193, "grad_norm": 2.876098155975342, "learning_rate": 3.7436093091994147e-06, "loss": 0.6617, "step": 14294 }, { "epoch": 0.5934644186010306, "grad_norm": 2.4898881912231445, "learning_rate": 3.7429585770201314e-06, "loss": 0.6219, "step": 14295 }, { "epoch": 0.593505934125242, "grad_norm": 2.1586978435516357, "learning_rate": 3.7423078675685175e-06, "loss": 0.4721, "step": 14296 }, { "epoch": 0.5935474496494533, "grad_norm": 2.295311212539673, "learning_rate": 3.741657180856343e-06, "loss": 0.4362, "step": 14297 }, { "epoch": 0.5935889651736647, "grad_norm": 2.1332008838653564, "learning_rate": 3.741006516895367e-06, "loss": 0.5651, "step": 14298 }, { "epoch": 0.5936304806978759, "grad_norm": 2.171027183532715, "learning_rate": 3.740355875697356e-06, "loss": 0.4441, "step": 14299 }, { "epoch": 0.5936719962220873, "grad_norm": 2.549593448638916, "learning_rate": 3.739705257274074e-06, "loss": 0.4768, "step": 14300 }, { "epoch": 0.5937135117462986, "grad_norm": 2.512747287750244, "learning_rate": 3.7390546616372857e-06, "loss": 0.4061, "step": 14301 }, { "epoch": 0.59375502727051, "grad_norm": 2.142585515975952, "learning_rate": 3.738404088798751e-06, "loss": 0.5155, "step": 14302 }, { "epoch": 0.5937965427947213, "grad_norm": 2.4053003787994385, "learning_rate": 3.7377535387702336e-06, "loss": 0.5924, "step": 14303 }, { "epoch": 0.5938380583189327, "grad_norm": 2.183854818344116, "learning_rate": 3.7371030115634965e-06, "loss": 0.4965, "step": 14304 }, { "epoch": 0.5938795738431439, "grad_norm": 2.275486946105957, "learning_rate": 3.736452507190302e-06, "loss": 0.3426, "step": 14305 }, { "epoch": 0.5939210893673553, "grad_norm": 2.150597333908081, "learning_rate": 3.73580202566241e-06, "loss": 0.5343, "step": 14306 }, { "epoch": 0.5939626048915666, "grad_norm": 2.55568528175354, "learning_rate": 3.7351515669915805e-06, "loss": 0.5624, "step": 14307 }, { "epoch": 0.594004120415778, "grad_norm": 2.4488942623138428, "learning_rate": 3.7345011311895774e-06, "loss": 0.558, "step": 14308 }, { "epoch": 0.5940456359399893, "grad_norm": 2.309180736541748, "learning_rate": 3.7338507182681568e-06, "loss": 0.5664, "step": 14309 }, { "epoch": 0.5940871514642007, "grad_norm": 2.4926459789276123, "learning_rate": 3.7332003282390814e-06, "loss": 0.5994, "step": 14310 }, { "epoch": 0.5941286669884119, "grad_norm": 2.164745330810547, "learning_rate": 3.7325499611141086e-06, "loss": 0.5805, "step": 14311 }, { "epoch": 0.5941701825126233, "grad_norm": 2.9917256832122803, "learning_rate": 3.731899616904997e-06, "loss": 0.5072, "step": 14312 }, { "epoch": 0.5942116980368346, "grad_norm": 2.4231436252593994, "learning_rate": 3.7312492956235058e-06, "loss": 0.4673, "step": 14313 }, { "epoch": 0.594253213561046, "grad_norm": 2.1191694736480713, "learning_rate": 3.730598997281396e-06, "loss": 0.4222, "step": 14314 }, { "epoch": 0.5942947290852573, "grad_norm": 2.916931390762329, "learning_rate": 3.7299487218904198e-06, "loss": 0.5135, "step": 14315 }, { "epoch": 0.5943362446094687, "grad_norm": 2.284799814224243, "learning_rate": 3.729298469462336e-06, "loss": 0.4316, "step": 14316 }, { "epoch": 0.5943777601336799, "grad_norm": 2.5029618740081787, "learning_rate": 3.7286482400089026e-06, "loss": 0.5739, "step": 14317 }, { "epoch": 0.5944192756578913, "grad_norm": 2.8096675872802734, "learning_rate": 3.7279980335418763e-06, "loss": 0.3979, "step": 14318 }, { "epoch": 0.5944607911821027, "grad_norm": 2.388087034225464, "learning_rate": 3.727347850073012e-06, "loss": 0.5637, "step": 14319 }, { "epoch": 0.594502306706314, "grad_norm": 2.8632187843322754, "learning_rate": 3.726697689614065e-06, "loss": 0.5569, "step": 14320 }, { "epoch": 0.5945438222305254, "grad_norm": 2.7110321521759033, "learning_rate": 3.7260475521767916e-06, "loss": 0.5509, "step": 14321 }, { "epoch": 0.5945853377547367, "grad_norm": 2.30548095703125, "learning_rate": 3.7253974377729445e-06, "loss": 0.5947, "step": 14322 }, { "epoch": 0.594626853278948, "grad_norm": 2.096622943878174, "learning_rate": 3.7247473464142814e-06, "loss": 0.5379, "step": 14323 }, { "epoch": 0.5946683688031593, "grad_norm": 2.3972103595733643, "learning_rate": 3.7240972781125518e-06, "loss": 0.5161, "step": 14324 }, { "epoch": 0.5947098843273707, "grad_norm": 2.3761179447174072, "learning_rate": 3.7234472328795108e-06, "loss": 0.586, "step": 14325 }, { "epoch": 0.594751399851582, "grad_norm": 2.624122381210327, "learning_rate": 3.722797210726912e-06, "loss": 0.3837, "step": 14326 }, { "epoch": 0.5947929153757934, "grad_norm": 2.2141120433807373, "learning_rate": 3.7221472116665096e-06, "loss": 0.5276, "step": 14327 }, { "epoch": 0.5948344309000047, "grad_norm": 1.9890806674957275, "learning_rate": 3.721497235710052e-06, "loss": 0.3658, "step": 14328 }, { "epoch": 0.594875946424216, "grad_norm": 2.8795089721679688, "learning_rate": 3.720847282869293e-06, "loss": 0.4329, "step": 14329 }, { "epoch": 0.5949174619484273, "grad_norm": 2.24784779548645, "learning_rate": 3.7201973531559832e-06, "loss": 0.5026, "step": 14330 }, { "epoch": 0.5949589774726387, "grad_norm": 2.0586929321289062, "learning_rate": 3.719547446581876e-06, "loss": 0.4582, "step": 14331 }, { "epoch": 0.59500049299685, "grad_norm": 2.0742547512054443, "learning_rate": 3.71889756315872e-06, "loss": 0.4672, "step": 14332 }, { "epoch": 0.5950420085210614, "grad_norm": 2.1642589569091797, "learning_rate": 3.718247702898265e-06, "loss": 0.5421, "step": 14333 }, { "epoch": 0.5950835240452726, "grad_norm": 3.064560890197754, "learning_rate": 3.71759786581226e-06, "loss": 0.6453, "step": 14334 }, { "epoch": 0.595125039569484, "grad_norm": 2.4616641998291016, "learning_rate": 3.716948051912455e-06, "loss": 0.638, "step": 14335 }, { "epoch": 0.5951665550936953, "grad_norm": 3.1402668952941895, "learning_rate": 3.7162982612106014e-06, "loss": 0.5203, "step": 14336 }, { "epoch": 0.5952080706179067, "grad_norm": 2.1813974380493164, "learning_rate": 3.715648493718443e-06, "loss": 0.5016, "step": 14337 }, { "epoch": 0.595249586142118, "grad_norm": 2.313720703125, "learning_rate": 3.71499874944773e-06, "loss": 0.4105, "step": 14338 }, { "epoch": 0.5952911016663294, "grad_norm": 2.649973154067993, "learning_rate": 3.71434902841021e-06, "loss": 0.4801, "step": 14339 }, { "epoch": 0.5953326171905406, "grad_norm": 2.588625431060791, "learning_rate": 3.7136993306176327e-06, "loss": 0.6918, "step": 14340 }, { "epoch": 0.595374132714752, "grad_norm": 2.2151153087615967, "learning_rate": 3.71304965608174e-06, "loss": 0.5405, "step": 14341 }, { "epoch": 0.5954156482389633, "grad_norm": 2.4708805084228516, "learning_rate": 3.7124000048142793e-06, "loss": 0.5285, "step": 14342 }, { "epoch": 0.5954571637631747, "grad_norm": 2.913081169128418, "learning_rate": 3.711750376826999e-06, "loss": 0.5426, "step": 14343 }, { "epoch": 0.595498679287386, "grad_norm": 2.888327121734619, "learning_rate": 3.711100772131644e-06, "loss": 0.4897, "step": 14344 }, { "epoch": 0.5955401948115974, "grad_norm": 2.4388427734375, "learning_rate": 3.7104511907399575e-06, "loss": 0.5816, "step": 14345 }, { "epoch": 0.5955817103358086, "grad_norm": 2.8810412883758545, "learning_rate": 3.7098016326636863e-06, "loss": 0.5827, "step": 14346 }, { "epoch": 0.59562322586002, "grad_norm": 2.0575244426727295, "learning_rate": 3.709152097914572e-06, "loss": 0.4392, "step": 14347 }, { "epoch": 0.5956647413842313, "grad_norm": 2.8213605880737305, "learning_rate": 3.70850258650436e-06, "loss": 0.5154, "step": 14348 }, { "epoch": 0.5957062569084427, "grad_norm": 2.3655784130096436, "learning_rate": 3.7078530984447956e-06, "loss": 0.4504, "step": 14349 }, { "epoch": 0.5957477724326541, "grad_norm": 2.32468318939209, "learning_rate": 3.7072036337476173e-06, "loss": 0.6231, "step": 14350 }, { "epoch": 0.5957892879568654, "grad_norm": 2.1540160179138184, "learning_rate": 3.7065541924245707e-06, "loss": 0.394, "step": 14351 }, { "epoch": 0.5958308034810768, "grad_norm": 2.340235471725464, "learning_rate": 3.705904774487396e-06, "loss": 0.5995, "step": 14352 }, { "epoch": 0.595872319005288, "grad_norm": 2.303093910217285, "learning_rate": 3.7052553799478387e-06, "loss": 0.4154, "step": 14353 }, { "epoch": 0.5959138345294994, "grad_norm": 2.4497551918029785, "learning_rate": 3.704606008817635e-06, "loss": 0.3903, "step": 14354 }, { "epoch": 0.5959553500537107, "grad_norm": 2.7015774250030518, "learning_rate": 3.703956661108528e-06, "loss": 0.5187, "step": 14355 }, { "epoch": 0.5959968655779221, "grad_norm": 2.375856876373291, "learning_rate": 3.7033073368322583e-06, "loss": 0.4488, "step": 14356 }, { "epoch": 0.5960383811021334, "grad_norm": 2.56964111328125, "learning_rate": 3.702658036000567e-06, "loss": 0.4394, "step": 14357 }, { "epoch": 0.5960798966263448, "grad_norm": 2.379941940307617, "learning_rate": 3.7020087586251917e-06, "loss": 0.5339, "step": 14358 }, { "epoch": 0.596121412150556, "grad_norm": 2.283460855484009, "learning_rate": 3.701359504717873e-06, "loss": 0.435, "step": 14359 }, { "epoch": 0.5961629276747674, "grad_norm": 2.4778640270233154, "learning_rate": 3.7007102742903466e-06, "loss": 0.6818, "step": 14360 }, { "epoch": 0.5962044431989787, "grad_norm": 2.6297686100006104, "learning_rate": 3.7000610673543535e-06, "loss": 0.5269, "step": 14361 }, { "epoch": 0.5962459587231901, "grad_norm": 1.9152164459228516, "learning_rate": 3.699411883921633e-06, "loss": 0.4319, "step": 14362 }, { "epoch": 0.5962874742474014, "grad_norm": 2.5304436683654785, "learning_rate": 3.698762724003919e-06, "loss": 0.4964, "step": 14363 }, { "epoch": 0.5963289897716127, "grad_norm": 3.163316488265991, "learning_rate": 3.698113587612949e-06, "loss": 0.6372, "step": 14364 }, { "epoch": 0.596370505295824, "grad_norm": 2.3315939903259277, "learning_rate": 3.6974644747604614e-06, "loss": 0.5904, "step": 14365 }, { "epoch": 0.5964120208200354, "grad_norm": 2.4344944953918457, "learning_rate": 3.6968153854581933e-06, "loss": 0.6547, "step": 14366 }, { "epoch": 0.5964535363442467, "grad_norm": 2.1861023902893066, "learning_rate": 3.6961663197178767e-06, "loss": 0.539, "step": 14367 }, { "epoch": 0.5964950518684581, "grad_norm": 2.65476131439209, "learning_rate": 3.695517277551249e-06, "loss": 0.5072, "step": 14368 }, { "epoch": 0.5965365673926694, "grad_norm": 2.4028594493865967, "learning_rate": 3.694868258970045e-06, "loss": 0.4977, "step": 14369 }, { "epoch": 0.5965780829168807, "grad_norm": 2.0472962856292725, "learning_rate": 3.694219263985999e-06, "loss": 0.4712, "step": 14370 }, { "epoch": 0.596619598441092, "grad_norm": 2.3476014137268066, "learning_rate": 3.693570292610846e-06, "loss": 0.5063, "step": 14371 }, { "epoch": 0.5966611139653034, "grad_norm": 2.4032578468322754, "learning_rate": 3.6929213448563183e-06, "loss": 0.5693, "step": 14372 }, { "epoch": 0.5967026294895147, "grad_norm": 1.9313212633132935, "learning_rate": 3.6922724207341486e-06, "loss": 0.455, "step": 14373 }, { "epoch": 0.5967441450137261, "grad_norm": 2.4252378940582275, "learning_rate": 3.69162352025607e-06, "loss": 0.461, "step": 14374 }, { "epoch": 0.5967856605379374, "grad_norm": 2.2260518074035645, "learning_rate": 3.6909746434338184e-06, "loss": 0.4497, "step": 14375 }, { "epoch": 0.5968271760621487, "grad_norm": 2.186018228530884, "learning_rate": 3.69032579027912e-06, "loss": 0.5499, "step": 14376 }, { "epoch": 0.59686869158636, "grad_norm": 2.4669063091278076, "learning_rate": 3.6896769608037088e-06, "loss": 0.6877, "step": 14377 }, { "epoch": 0.5969102071105714, "grad_norm": 2.1350715160369873, "learning_rate": 3.6890281550193152e-06, "loss": 0.5531, "step": 14378 }, { "epoch": 0.5969517226347827, "grad_norm": 2.383460521697998, "learning_rate": 3.6883793729376734e-06, "loss": 0.6442, "step": 14379 }, { "epoch": 0.5969932381589941, "grad_norm": 2.4498496055603027, "learning_rate": 3.6877306145705078e-06, "loss": 0.6228, "step": 14380 }, { "epoch": 0.5970347536832055, "grad_norm": 2.4551546573638916, "learning_rate": 3.687081879929551e-06, "loss": 0.5378, "step": 14381 }, { "epoch": 0.5970762692074167, "grad_norm": 2.2960054874420166, "learning_rate": 3.686433169026533e-06, "loss": 0.5819, "step": 14382 }, { "epoch": 0.5971177847316281, "grad_norm": 2.1825973987579346, "learning_rate": 3.6857844818731833e-06, "loss": 0.4665, "step": 14383 }, { "epoch": 0.5971593002558394, "grad_norm": 2.073214054107666, "learning_rate": 3.6851358184812267e-06, "loss": 0.5338, "step": 14384 }, { "epoch": 0.5972008157800508, "grad_norm": 2.059776782989502, "learning_rate": 3.6844871788623946e-06, "loss": 0.4796, "step": 14385 }, { "epoch": 0.5972423313042621, "grad_norm": 2.3256852626800537, "learning_rate": 3.683838563028413e-06, "loss": 0.4526, "step": 14386 }, { "epoch": 0.5972838468284735, "grad_norm": 1.96620774269104, "learning_rate": 3.683189970991009e-06, "loss": 0.4175, "step": 14387 }, { "epoch": 0.5973253623526847, "grad_norm": 2.085732936859131, "learning_rate": 3.682541402761912e-06, "loss": 0.5013, "step": 14388 }, { "epoch": 0.5973668778768961, "grad_norm": 2.4404470920562744, "learning_rate": 3.6818928583528447e-06, "loss": 0.573, "step": 14389 }, { "epoch": 0.5974083934011074, "grad_norm": 2.3377811908721924, "learning_rate": 3.6812443377755334e-06, "loss": 0.6556, "step": 14390 }, { "epoch": 0.5974499089253188, "grad_norm": 2.1868391036987305, "learning_rate": 3.680595841041706e-06, "loss": 0.4405, "step": 14391 }, { "epoch": 0.5974914244495301, "grad_norm": 2.081205368041992, "learning_rate": 3.679947368163087e-06, "loss": 0.5035, "step": 14392 }, { "epoch": 0.5975329399737415, "grad_norm": 2.1040966510772705, "learning_rate": 3.6792989191513983e-06, "loss": 0.4515, "step": 14393 }, { "epoch": 0.5975744554979527, "grad_norm": 2.377589225769043, "learning_rate": 3.678650494018366e-06, "loss": 0.7, "step": 14394 }, { "epoch": 0.5976159710221641, "grad_norm": 3.05243182182312, "learning_rate": 3.6780020927757134e-06, "loss": 0.5956, "step": 14395 }, { "epoch": 0.5976574865463754, "grad_norm": 2.0619335174560547, "learning_rate": 3.677353715435166e-06, "loss": 0.537, "step": 14396 }, { "epoch": 0.5976990020705868, "grad_norm": 2.5052287578582764, "learning_rate": 3.6767053620084433e-06, "loss": 0.4861, "step": 14397 }, { "epoch": 0.5977405175947981, "grad_norm": 2.9138243198394775, "learning_rate": 3.6760570325072692e-06, "loss": 0.4472, "step": 14398 }, { "epoch": 0.5977820331190095, "grad_norm": 2.4809746742248535, "learning_rate": 3.675408726943365e-06, "loss": 0.7128, "step": 14399 }, { "epoch": 0.5978235486432207, "grad_norm": 2.4456980228424072, "learning_rate": 3.6747604453284526e-06, "loss": 0.4956, "step": 14400 }, { "epoch": 0.5978650641674321, "grad_norm": 2.313234329223633, "learning_rate": 3.674112187674255e-06, "loss": 0.518, "step": 14401 }, { "epoch": 0.5979065796916434, "grad_norm": 2.157278537750244, "learning_rate": 3.6734639539924897e-06, "loss": 0.5426, "step": 14402 }, { "epoch": 0.5979480952158548, "grad_norm": 2.525620937347412, "learning_rate": 3.6728157442948786e-06, "loss": 0.6171, "step": 14403 }, { "epoch": 0.5979896107400661, "grad_norm": 2.2187938690185547, "learning_rate": 3.672167558593141e-06, "loss": 0.6045, "step": 14404 }, { "epoch": 0.5980311262642775, "grad_norm": 2.3821613788604736, "learning_rate": 3.6715193968989992e-06, "loss": 0.4934, "step": 14405 }, { "epoch": 0.5980726417884887, "grad_norm": 2.0505199432373047, "learning_rate": 3.6708712592241667e-06, "loss": 0.6276, "step": 14406 }, { "epoch": 0.5981141573127001, "grad_norm": 2.5923290252685547, "learning_rate": 3.6702231455803662e-06, "loss": 0.5222, "step": 14407 }, { "epoch": 0.5981556728369114, "grad_norm": 2.3107948303222656, "learning_rate": 3.669575055979313e-06, "loss": 0.6832, "step": 14408 }, { "epoch": 0.5981971883611228, "grad_norm": 2.6555426120758057, "learning_rate": 3.6689269904327297e-06, "loss": 0.5582, "step": 14409 }, { "epoch": 0.5982387038853342, "grad_norm": 2.5395944118499756, "learning_rate": 3.6682789489523267e-06, "loss": 0.6322, "step": 14410 }, { "epoch": 0.5982802194095455, "grad_norm": 2.161423444747925, "learning_rate": 3.667630931549826e-06, "loss": 0.4369, "step": 14411 }, { "epoch": 0.5983217349337568, "grad_norm": 1.9743196964263916, "learning_rate": 3.666982938236941e-06, "loss": 0.5241, "step": 14412 }, { "epoch": 0.5983632504579681, "grad_norm": 2.7823894023895264, "learning_rate": 3.6663349690253886e-06, "loss": 0.5211, "step": 14413 }, { "epoch": 0.5984047659821795, "grad_norm": 2.659170627593994, "learning_rate": 3.6656870239268865e-06, "loss": 0.6097, "step": 14414 }, { "epoch": 0.5984462815063908, "grad_norm": 2.6932570934295654, "learning_rate": 3.6650391029531452e-06, "loss": 0.5834, "step": 14415 }, { "epoch": 0.5984877970306022, "grad_norm": 2.0879297256469727, "learning_rate": 3.6643912061158822e-06, "loss": 0.4918, "step": 14416 }, { "epoch": 0.5985293125548135, "grad_norm": 2.485821008682251, "learning_rate": 3.6637433334268107e-06, "loss": 0.5185, "step": 14417 }, { "epoch": 0.5985708280790248, "grad_norm": 2.857466459274292, "learning_rate": 3.6630954848976472e-06, "loss": 0.4782, "step": 14418 }, { "epoch": 0.5986123436032361, "grad_norm": 2.5754141807556152, "learning_rate": 3.6624476605401004e-06, "loss": 0.4893, "step": 14419 }, { "epoch": 0.5986538591274475, "grad_norm": 2.14121413230896, "learning_rate": 3.6617998603658854e-06, "loss": 0.491, "step": 14420 }, { "epoch": 0.5986953746516588, "grad_norm": 2.519730567932129, "learning_rate": 3.6611520843867155e-06, "loss": 0.5837, "step": 14421 }, { "epoch": 0.5987368901758702, "grad_norm": 2.6704070568084717, "learning_rate": 3.660504332614303e-06, "loss": 0.432, "step": 14422 }, { "epoch": 0.5987784057000815, "grad_norm": 2.0521090030670166, "learning_rate": 3.659856605060357e-06, "loss": 0.3848, "step": 14423 }, { "epoch": 0.5988199212242928, "grad_norm": 2.2757019996643066, "learning_rate": 3.6592089017365896e-06, "loss": 0.4807, "step": 14424 }, { "epoch": 0.5988614367485041, "grad_norm": 2.42353892326355, "learning_rate": 3.6585612226547117e-06, "loss": 0.5398, "step": 14425 }, { "epoch": 0.5989029522727155, "grad_norm": 2.652630567550659, "learning_rate": 3.6579135678264335e-06, "loss": 0.4848, "step": 14426 }, { "epoch": 0.5989444677969268, "grad_norm": 2.790356159210205, "learning_rate": 3.6572659372634663e-06, "loss": 0.5166, "step": 14427 }, { "epoch": 0.5989859833211382, "grad_norm": 2.6163747310638428, "learning_rate": 3.656618330977516e-06, "loss": 0.5264, "step": 14428 }, { "epoch": 0.5990274988453494, "grad_norm": 2.3666985034942627, "learning_rate": 3.6559707489802933e-06, "loss": 0.4664, "step": 14429 }, { "epoch": 0.5990690143695608, "grad_norm": 2.4306833744049072, "learning_rate": 3.6553231912835074e-06, "loss": 0.5632, "step": 14430 }, { "epoch": 0.5991105298937721, "grad_norm": 3.117457151412964, "learning_rate": 3.654675657898867e-06, "loss": 0.4605, "step": 14431 }, { "epoch": 0.5991520454179835, "grad_norm": 2.507598400115967, "learning_rate": 3.654028148838078e-06, "loss": 0.487, "step": 14432 }, { "epoch": 0.5991935609421948, "grad_norm": 2.7523224353790283, "learning_rate": 3.6533806641128466e-06, "loss": 0.5671, "step": 14433 }, { "epoch": 0.5992350764664062, "grad_norm": 2.6452925205230713, "learning_rate": 3.6527332037348816e-06, "loss": 0.3579, "step": 14434 }, { "epoch": 0.5992765919906174, "grad_norm": 2.5075197219848633, "learning_rate": 3.6520857677158905e-06, "loss": 0.483, "step": 14435 }, { "epoch": 0.5993181075148288, "grad_norm": 2.1843507289886475, "learning_rate": 3.6514383560675747e-06, "loss": 0.5583, "step": 14436 }, { "epoch": 0.5993596230390401, "grad_norm": 2.638892412185669, "learning_rate": 3.650790968801644e-06, "loss": 0.59, "step": 14437 }, { "epoch": 0.5994011385632515, "grad_norm": 2.487583875656128, "learning_rate": 3.6501436059298e-06, "loss": 0.6077, "step": 14438 }, { "epoch": 0.5994426540874628, "grad_norm": 2.2724146842956543, "learning_rate": 3.649496267463749e-06, "loss": 0.5058, "step": 14439 }, { "epoch": 0.5994841696116742, "grad_norm": 2.455883264541626, "learning_rate": 3.648848953415196e-06, "loss": 0.3774, "step": 14440 }, { "epoch": 0.5995256851358856, "grad_norm": 2.8201122283935547, "learning_rate": 3.6482016637958424e-06, "loss": 0.7065, "step": 14441 }, { "epoch": 0.5995672006600968, "grad_norm": 2.9786601066589355, "learning_rate": 3.6475543986173913e-06, "loss": 0.5404, "step": 14442 }, { "epoch": 0.5996087161843082, "grad_norm": 2.551102876663208, "learning_rate": 3.6469071578915473e-06, "loss": 0.4562, "step": 14443 }, { "epoch": 0.5996502317085195, "grad_norm": 2.2531826496124268, "learning_rate": 3.6462599416300138e-06, "loss": 0.509, "step": 14444 }, { "epoch": 0.5996917472327309, "grad_norm": 2.3521008491516113, "learning_rate": 3.6456127498444883e-06, "loss": 0.6877, "step": 14445 }, { "epoch": 0.5997332627569422, "grad_norm": 2.0199649333953857, "learning_rate": 3.6449655825466743e-06, "loss": 0.5562, "step": 14446 }, { "epoch": 0.5997747782811536, "grad_norm": 3.7018580436706543, "learning_rate": 3.6443184397482745e-06, "loss": 0.5462, "step": 14447 }, { "epoch": 0.5998162938053648, "grad_norm": 2.8094277381896973, "learning_rate": 3.6436713214609888e-06, "loss": 0.5779, "step": 14448 }, { "epoch": 0.5998578093295762, "grad_norm": 2.4621026515960693, "learning_rate": 3.6430242276965148e-06, "loss": 0.4552, "step": 14449 }, { "epoch": 0.5998993248537875, "grad_norm": 1.9431557655334473, "learning_rate": 3.642377158466556e-06, "loss": 0.5022, "step": 14450 }, { "epoch": 0.5999408403779989, "grad_norm": 2.660104274749756, "learning_rate": 3.641730113782807e-06, "loss": 0.4292, "step": 14451 }, { "epoch": 0.5999823559022102, "grad_norm": 2.7839977741241455, "learning_rate": 3.6410830936569695e-06, "loss": 0.5635, "step": 14452 }, { "epoch": 0.6000238714264216, "grad_norm": 2.3890628814697266, "learning_rate": 3.6404360981007438e-06, "loss": 0.5168, "step": 14453 }, { "epoch": 0.6000653869506328, "grad_norm": 3.114788293838501, "learning_rate": 3.6397891271258224e-06, "loss": 0.5913, "step": 14454 }, { "epoch": 0.6001069024748442, "grad_norm": 2.0633411407470703, "learning_rate": 3.6391421807439064e-06, "loss": 0.4649, "step": 14455 }, { "epoch": 0.6001484179990555, "grad_norm": 2.622194290161133, "learning_rate": 3.6384952589666924e-06, "loss": 0.4708, "step": 14456 }, { "epoch": 0.6001899335232669, "grad_norm": 2.4261770248413086, "learning_rate": 3.6378483618058774e-06, "loss": 0.4201, "step": 14457 }, { "epoch": 0.6002314490474782, "grad_norm": 2.4333033561706543, "learning_rate": 3.6372014892731557e-06, "loss": 0.3726, "step": 14458 }, { "epoch": 0.6002729645716895, "grad_norm": 2.698070526123047, "learning_rate": 3.6365546413802235e-06, "loss": 0.3892, "step": 14459 }, { "epoch": 0.6003144800959008, "grad_norm": 2.7487051486968994, "learning_rate": 3.635907818138776e-06, "loss": 0.4146, "step": 14460 }, { "epoch": 0.6003559956201122, "grad_norm": 2.768368721008301, "learning_rate": 3.6352610195605104e-06, "loss": 0.628, "step": 14461 }, { "epoch": 0.6003975111443235, "grad_norm": 2.2045977115631104, "learning_rate": 3.634614245657117e-06, "loss": 0.4821, "step": 14462 }, { "epoch": 0.6004390266685349, "grad_norm": 2.4205048084259033, "learning_rate": 3.6339674964402925e-06, "loss": 0.5079, "step": 14463 }, { "epoch": 0.6004805421927462, "grad_norm": 2.510599374771118, "learning_rate": 3.633320771921729e-06, "loss": 0.5303, "step": 14464 }, { "epoch": 0.6005220577169575, "grad_norm": 2.5150485038757324, "learning_rate": 3.6326740721131195e-06, "loss": 0.6086, "step": 14465 }, { "epoch": 0.6005635732411688, "grad_norm": 2.8253791332244873, "learning_rate": 3.632027397026159e-06, "loss": 0.5526, "step": 14466 }, { "epoch": 0.6006050887653802, "grad_norm": 2.6785781383514404, "learning_rate": 3.6313807466725347e-06, "loss": 0.5949, "step": 14467 }, { "epoch": 0.6006466042895915, "grad_norm": 2.644944429397583, "learning_rate": 3.6307341210639415e-06, "loss": 0.3843, "step": 14468 }, { "epoch": 0.6006881198138029, "grad_norm": 2.320079803466797, "learning_rate": 3.63008752021207e-06, "loss": 0.585, "step": 14469 }, { "epoch": 0.6007296353380142, "grad_norm": 2.2456252574920654, "learning_rate": 3.6294409441286133e-06, "loss": 0.5473, "step": 14470 }, { "epoch": 0.6007711508622255, "grad_norm": 2.170466423034668, "learning_rate": 3.6287943928252573e-06, "loss": 0.6015, "step": 14471 }, { "epoch": 0.6008126663864369, "grad_norm": 2.8193771839141846, "learning_rate": 3.628147866313694e-06, "loss": 0.4606, "step": 14472 }, { "epoch": 0.6008541819106482, "grad_norm": 2.315001964569092, "learning_rate": 3.6275013646056136e-06, "loss": 0.5632, "step": 14473 }, { "epoch": 0.6008956974348596, "grad_norm": 2.6520471572875977, "learning_rate": 3.6268548877127036e-06, "loss": 0.5342, "step": 14474 }, { "epoch": 0.6009372129590709, "grad_norm": 2.425466537475586, "learning_rate": 3.626208435646652e-06, "loss": 0.5195, "step": 14475 }, { "epoch": 0.6009787284832823, "grad_norm": 2.0831656455993652, "learning_rate": 3.6255620084191495e-06, "loss": 0.3377, "step": 14476 }, { "epoch": 0.6010202440074935, "grad_norm": 2.500504493713379, "learning_rate": 3.624915606041881e-06, "loss": 0.6396, "step": 14477 }, { "epoch": 0.6010617595317049, "grad_norm": 2.3740272521972656, "learning_rate": 3.6242692285265343e-06, "loss": 0.5045, "step": 14478 }, { "epoch": 0.6011032750559162, "grad_norm": 2.200265645980835, "learning_rate": 3.6236228758847987e-06, "loss": 0.4608, "step": 14479 }, { "epoch": 0.6011447905801276, "grad_norm": 2.287475347518921, "learning_rate": 3.6229765481283563e-06, "loss": 0.4211, "step": 14480 }, { "epoch": 0.6011863061043389, "grad_norm": 2.6099884510040283, "learning_rate": 3.622330245268895e-06, "loss": 0.6048, "step": 14481 }, { "epoch": 0.6012278216285503, "grad_norm": 2.282719373703003, "learning_rate": 3.6216839673180994e-06, "loss": 0.6278, "step": 14482 }, { "epoch": 0.6012693371527615, "grad_norm": 2.0812973976135254, "learning_rate": 3.621037714287657e-06, "loss": 0.536, "step": 14483 }, { "epoch": 0.6013108526769729, "grad_norm": 2.355440616607666, "learning_rate": 3.6203914861892483e-06, "loss": 0.5946, "step": 14484 }, { "epoch": 0.6013523682011842, "grad_norm": 1.9442936182022095, "learning_rate": 3.6197452830345592e-06, "loss": 0.4128, "step": 14485 }, { "epoch": 0.6013938837253956, "grad_norm": 2.578232526779175, "learning_rate": 3.6190991048352744e-06, "loss": 0.5079, "step": 14486 }, { "epoch": 0.6014353992496069, "grad_norm": 2.477487087249756, "learning_rate": 3.6184529516030755e-06, "loss": 0.5434, "step": 14487 }, { "epoch": 0.6014769147738183, "grad_norm": 2.6843297481536865, "learning_rate": 3.6178068233496444e-06, "loss": 0.5522, "step": 14488 }, { "epoch": 0.6015184302980295, "grad_norm": 2.3430838584899902, "learning_rate": 3.6171607200866655e-06, "loss": 0.4199, "step": 14489 }, { "epoch": 0.6015599458222409, "grad_norm": 2.3111369609832764, "learning_rate": 3.6165146418258187e-06, "loss": 0.5511, "step": 14490 }, { "epoch": 0.6016014613464522, "grad_norm": 2.275481939315796, "learning_rate": 3.615868588578785e-06, "loss": 0.4286, "step": 14491 }, { "epoch": 0.6016429768706636, "grad_norm": 2.590684652328491, "learning_rate": 3.6152225603572487e-06, "loss": 0.5557, "step": 14492 }, { "epoch": 0.6016844923948749, "grad_norm": 2.312849283218384, "learning_rate": 3.6145765571728863e-06, "loss": 0.4721, "step": 14493 }, { "epoch": 0.6017260079190863, "grad_norm": 2.2809128761291504, "learning_rate": 3.613930579037378e-06, "loss": 0.5825, "step": 14494 }, { "epoch": 0.6017675234432975, "grad_norm": 3.1083457469940186, "learning_rate": 3.6132846259624054e-06, "loss": 0.5143, "step": 14495 }, { "epoch": 0.6018090389675089, "grad_norm": 2.1225454807281494, "learning_rate": 3.6126386979596477e-06, "loss": 0.421, "step": 14496 }, { "epoch": 0.6018505544917202, "grad_norm": 2.2412192821502686, "learning_rate": 3.6119927950407805e-06, "loss": 0.5199, "step": 14497 }, { "epoch": 0.6018920700159316, "grad_norm": 2.2502682209014893, "learning_rate": 3.6113469172174832e-06, "loss": 0.5183, "step": 14498 }, { "epoch": 0.6019335855401429, "grad_norm": 2.4055635929107666, "learning_rate": 3.6107010645014355e-06, "loss": 0.4189, "step": 14499 }, { "epoch": 0.6019751010643543, "grad_norm": 2.42185640335083, "learning_rate": 3.610055236904313e-06, "loss": 0.6175, "step": 14500 }, { "epoch": 0.6020166165885655, "grad_norm": 2.1813602447509766, "learning_rate": 3.6094094344377905e-06, "loss": 0.5714, "step": 14501 }, { "epoch": 0.6020581321127769, "grad_norm": 2.4572980403900146, "learning_rate": 3.608763657113549e-06, "loss": 0.6631, "step": 14502 }, { "epoch": 0.6020996476369883, "grad_norm": 2.6529006958007812, "learning_rate": 3.6081179049432592e-06, "loss": 0.5794, "step": 14503 }, { "epoch": 0.6021411631611996, "grad_norm": 2.5222525596618652, "learning_rate": 3.607472177938599e-06, "loss": 0.5322, "step": 14504 }, { "epoch": 0.602182678685411, "grad_norm": 2.2226369380950928, "learning_rate": 3.606826476111245e-06, "loss": 0.6182, "step": 14505 }, { "epoch": 0.6022241942096223, "grad_norm": 2.3401529788970947, "learning_rate": 3.6061807994728682e-06, "loss": 0.4662, "step": 14506 }, { "epoch": 0.6022657097338336, "grad_norm": 2.205726385116577, "learning_rate": 3.6055351480351443e-06, "loss": 0.4818, "step": 14507 }, { "epoch": 0.6023072252580449, "grad_norm": 2.3288819789886475, "learning_rate": 3.604889521809747e-06, "loss": 0.4967, "step": 14508 }, { "epoch": 0.6023487407822563, "grad_norm": 1.9068580865859985, "learning_rate": 3.6042439208083513e-06, "loss": 0.4569, "step": 14509 }, { "epoch": 0.6023902563064676, "grad_norm": 2.037665605545044, "learning_rate": 3.6035983450426256e-06, "loss": 0.4542, "step": 14510 }, { "epoch": 0.602431771830679, "grad_norm": 2.5849151611328125, "learning_rate": 3.6029527945242436e-06, "loss": 0.531, "step": 14511 }, { "epoch": 0.6024732873548903, "grad_norm": 2.0729358196258545, "learning_rate": 3.6023072692648797e-06, "loss": 0.5183, "step": 14512 }, { "epoch": 0.6025148028791016, "grad_norm": 2.2137808799743652, "learning_rate": 3.6016617692762033e-06, "loss": 0.4491, "step": 14513 }, { "epoch": 0.6025563184033129, "grad_norm": 2.4316935539245605, "learning_rate": 3.6010162945698833e-06, "loss": 0.4917, "step": 14514 }, { "epoch": 0.6025978339275243, "grad_norm": 3.1128504276275635, "learning_rate": 3.6003708451575937e-06, "loss": 0.5204, "step": 14515 }, { "epoch": 0.6026393494517356, "grad_norm": 2.5419530868530273, "learning_rate": 3.5997254210510014e-06, "loss": 0.5053, "step": 14516 }, { "epoch": 0.602680864975947, "grad_norm": 2.5259058475494385, "learning_rate": 3.5990800222617774e-06, "loss": 0.7836, "step": 14517 }, { "epoch": 0.6027223805001583, "grad_norm": 2.1267504692077637, "learning_rate": 3.598434648801592e-06, "loss": 0.5042, "step": 14518 }, { "epoch": 0.6027638960243696, "grad_norm": 2.308781385421753, "learning_rate": 3.597789300682111e-06, "loss": 0.5361, "step": 14519 }, { "epoch": 0.6028054115485809, "grad_norm": 2.596278190612793, "learning_rate": 3.597143977915002e-06, "loss": 0.5877, "step": 14520 }, { "epoch": 0.6028469270727923, "grad_norm": 1.9303451776504517, "learning_rate": 3.596498680511935e-06, "loss": 0.435, "step": 14521 }, { "epoch": 0.6028884425970036, "grad_norm": 2.3974523544311523, "learning_rate": 3.595853408484579e-06, "loss": 0.2877, "step": 14522 }, { "epoch": 0.602929958121215, "grad_norm": 2.170269727706909, "learning_rate": 3.5952081618445954e-06, "loss": 0.407, "step": 14523 }, { "epoch": 0.6029714736454262, "grad_norm": 2.363983631134033, "learning_rate": 3.594562940603653e-06, "loss": 0.5413, "step": 14524 }, { "epoch": 0.6030129891696376, "grad_norm": 2.043821096420288, "learning_rate": 3.593917744773419e-06, "loss": 0.531, "step": 14525 }, { "epoch": 0.6030545046938489, "grad_norm": 2.231414794921875, "learning_rate": 3.593272574365557e-06, "loss": 0.4802, "step": 14526 }, { "epoch": 0.6030960202180603, "grad_norm": 2.696478843688965, "learning_rate": 3.592627429391732e-06, "loss": 0.474, "step": 14527 }, { "epoch": 0.6031375357422716, "grad_norm": 2.2325124740600586, "learning_rate": 3.5919823098636096e-06, "loss": 0.3994, "step": 14528 }, { "epoch": 0.603179051266483, "grad_norm": 2.099980115890503, "learning_rate": 3.5913372157928515e-06, "loss": 0.4082, "step": 14529 }, { "epoch": 0.6032205667906942, "grad_norm": 2.4946722984313965, "learning_rate": 3.5906921471911247e-06, "loss": 0.5922, "step": 14530 }, { "epoch": 0.6032620823149056, "grad_norm": 2.4646522998809814, "learning_rate": 3.590047104070089e-06, "loss": 0.4325, "step": 14531 }, { "epoch": 0.6033035978391169, "grad_norm": 2.1951537132263184, "learning_rate": 3.589402086441407e-06, "loss": 0.5464, "step": 14532 }, { "epoch": 0.6033451133633283, "grad_norm": 2.171470880508423, "learning_rate": 3.5887570943167423e-06, "loss": 0.5722, "step": 14533 }, { "epoch": 0.6033866288875397, "grad_norm": 2.4501748085021973, "learning_rate": 3.588112127707756e-06, "loss": 0.626, "step": 14534 }, { "epoch": 0.603428144411751, "grad_norm": 2.6386542320251465, "learning_rate": 3.5874671866261112e-06, "loss": 0.659, "step": 14535 }, { "epoch": 0.6034696599359624, "grad_norm": 2.483105421066284, "learning_rate": 3.586822271083464e-06, "loss": 0.5398, "step": 14536 }, { "epoch": 0.6035111754601736, "grad_norm": 2.589083433151245, "learning_rate": 3.586177381091479e-06, "loss": 0.4938, "step": 14537 }, { "epoch": 0.603552690984385, "grad_norm": 2.3882646560668945, "learning_rate": 3.585532516661815e-06, "loss": 0.5216, "step": 14538 }, { "epoch": 0.6035942065085963, "grad_norm": 2.451693296432495, "learning_rate": 3.5848876778061304e-06, "loss": 0.4268, "step": 14539 }, { "epoch": 0.6036357220328077, "grad_norm": 2.387929916381836, "learning_rate": 3.584242864536085e-06, "loss": 0.4779, "step": 14540 }, { "epoch": 0.603677237557019, "grad_norm": 2.173475503921509, "learning_rate": 3.583598076863335e-06, "loss": 0.4754, "step": 14541 }, { "epoch": 0.6037187530812304, "grad_norm": 2.460625410079956, "learning_rate": 3.5829533147995403e-06, "loss": 0.4811, "step": 14542 }, { "epoch": 0.6037602686054416, "grad_norm": 2.137751579284668, "learning_rate": 3.5823085783563603e-06, "loss": 0.6358, "step": 14543 }, { "epoch": 0.603801784129653, "grad_norm": 2.503323793411255, "learning_rate": 3.5816638675454476e-06, "loss": 0.4847, "step": 14544 }, { "epoch": 0.6038432996538643, "grad_norm": 3.7901852130889893, "learning_rate": 3.581019182378461e-06, "loss": 0.377, "step": 14545 }, { "epoch": 0.6038848151780757, "grad_norm": 2.652095079421997, "learning_rate": 3.5803745228670566e-06, "loss": 0.456, "step": 14546 }, { "epoch": 0.603926330702287, "grad_norm": 2.162874698638916, "learning_rate": 3.5797298890228903e-06, "loss": 0.3829, "step": 14547 }, { "epoch": 0.6039678462264984, "grad_norm": 3.4389047622680664, "learning_rate": 3.579085280857619e-06, "loss": 0.4841, "step": 14548 }, { "epoch": 0.6040093617507096, "grad_norm": 2.393070936203003, "learning_rate": 3.5784406983828927e-06, "loss": 0.4845, "step": 14549 }, { "epoch": 0.604050877274921, "grad_norm": 3.03954815864563, "learning_rate": 3.577796141610369e-06, "loss": 0.4341, "step": 14550 }, { "epoch": 0.6040923927991323, "grad_norm": 2.671416997909546, "learning_rate": 3.577151610551701e-06, "loss": 0.6398, "step": 14551 }, { "epoch": 0.6041339083233437, "grad_norm": 2.6864309310913086, "learning_rate": 3.5765071052185428e-06, "loss": 0.5207, "step": 14552 }, { "epoch": 0.604175423847555, "grad_norm": 3.029043674468994, "learning_rate": 3.575862625622547e-06, "loss": 0.5686, "step": 14553 }, { "epoch": 0.6042169393717663, "grad_norm": 2.2955901622772217, "learning_rate": 3.575218171775363e-06, "loss": 0.4979, "step": 14554 }, { "epoch": 0.6042584548959776, "grad_norm": 2.270350933074951, "learning_rate": 3.5745737436886465e-06, "loss": 0.5294, "step": 14555 }, { "epoch": 0.604299970420189, "grad_norm": 2.4625911712646484, "learning_rate": 3.5739293413740483e-06, "loss": 0.4187, "step": 14556 }, { "epoch": 0.6043414859444003, "grad_norm": 2.2445526123046875, "learning_rate": 3.5732849648432177e-06, "loss": 0.4994, "step": 14557 }, { "epoch": 0.6043830014686117, "grad_norm": 2.3360965251922607, "learning_rate": 3.5726406141078057e-06, "loss": 0.4954, "step": 14558 }, { "epoch": 0.604424516992823, "grad_norm": 2.7125017642974854, "learning_rate": 3.5719962891794624e-06, "loss": 0.3777, "step": 14559 }, { "epoch": 0.6044660325170343, "grad_norm": 2.7550246715545654, "learning_rate": 3.5713519900698386e-06, "loss": 0.5273, "step": 14560 }, { "epoch": 0.6045075480412456, "grad_norm": 2.4387383460998535, "learning_rate": 3.5707077167905846e-06, "loss": 0.4453, "step": 14561 }, { "epoch": 0.604549063565457, "grad_norm": 2.0594875812530518, "learning_rate": 3.5700634693533443e-06, "loss": 0.5415, "step": 14562 }, { "epoch": 0.6045905790896683, "grad_norm": 2.273552656173706, "learning_rate": 3.569419247769769e-06, "loss": 0.4046, "step": 14563 }, { "epoch": 0.6046320946138797, "grad_norm": 2.828568696975708, "learning_rate": 3.568775052051507e-06, "loss": 0.4836, "step": 14564 }, { "epoch": 0.6046736101380911, "grad_norm": 2.2599430084228516, "learning_rate": 3.5681308822102046e-06, "loss": 0.5208, "step": 14565 }, { "epoch": 0.6047151256623023, "grad_norm": 2.108670234680176, "learning_rate": 3.5674867382575083e-06, "loss": 0.5022, "step": 14566 }, { "epoch": 0.6047566411865137, "grad_norm": 2.551992416381836, "learning_rate": 3.5668426202050645e-06, "loss": 0.4779, "step": 14567 }, { "epoch": 0.604798156710725, "grad_norm": 3.5789668560028076, "learning_rate": 3.5661985280645185e-06, "loss": 0.6051, "step": 14568 }, { "epoch": 0.6048396722349364, "grad_norm": 2.4860243797302246, "learning_rate": 3.5655544618475184e-06, "loss": 0.4616, "step": 14569 }, { "epoch": 0.6048811877591477, "grad_norm": 2.6875226497650146, "learning_rate": 3.564910421565705e-06, "loss": 0.5103, "step": 14570 }, { "epoch": 0.6049227032833591, "grad_norm": 2.2663843631744385, "learning_rate": 3.5642664072307256e-06, "loss": 0.5286, "step": 14571 }, { "epoch": 0.6049642188075703, "grad_norm": 2.5454370975494385, "learning_rate": 3.5636224188542224e-06, "loss": 0.5301, "step": 14572 }, { "epoch": 0.6050057343317817, "grad_norm": 2.1298210620880127, "learning_rate": 3.5629784564478397e-06, "loss": 0.5824, "step": 14573 }, { "epoch": 0.605047249855993, "grad_norm": 2.162990093231201, "learning_rate": 3.562334520023224e-06, "loss": 0.4819, "step": 14574 }, { "epoch": 0.6050887653802044, "grad_norm": 2.066826820373535, "learning_rate": 3.561690609592012e-06, "loss": 0.4467, "step": 14575 }, { "epoch": 0.6051302809044157, "grad_norm": 2.3810935020446777, "learning_rate": 3.5610467251658476e-06, "loss": 0.4942, "step": 14576 }, { "epoch": 0.6051717964286271, "grad_norm": 2.3489491939544678, "learning_rate": 3.560402866756375e-06, "loss": 0.5068, "step": 14577 }, { "epoch": 0.6052133119528383, "grad_norm": 2.2091522216796875, "learning_rate": 3.559759034375233e-06, "loss": 0.5307, "step": 14578 }, { "epoch": 0.6052548274770497, "grad_norm": 2.319906234741211, "learning_rate": 3.5591152280340636e-06, "loss": 0.4891, "step": 14579 }, { "epoch": 0.605296343001261, "grad_norm": 2.5780322551727295, "learning_rate": 3.5584714477445045e-06, "loss": 0.5768, "step": 14580 }, { "epoch": 0.6053378585254724, "grad_norm": 2.1522040367126465, "learning_rate": 3.5578276935181966e-06, "loss": 0.456, "step": 14581 }, { "epoch": 0.6053793740496837, "grad_norm": 1.8795658349990845, "learning_rate": 3.557183965366783e-06, "loss": 0.3605, "step": 14582 }, { "epoch": 0.6054208895738951, "grad_norm": 2.2268965244293213, "learning_rate": 3.5565402633018963e-06, "loss": 0.3655, "step": 14583 }, { "epoch": 0.6054624050981063, "grad_norm": 2.3122763633728027, "learning_rate": 3.5558965873351783e-06, "loss": 0.556, "step": 14584 }, { "epoch": 0.6055039206223177, "grad_norm": 2.304527997970581, "learning_rate": 3.5552529374782652e-06, "loss": 0.5586, "step": 14585 }, { "epoch": 0.605545436146529, "grad_norm": 2.426252841949463, "learning_rate": 3.5546093137427963e-06, "loss": 0.4584, "step": 14586 }, { "epoch": 0.6055869516707404, "grad_norm": 2.893031358718872, "learning_rate": 3.553965716140409e-06, "loss": 0.5835, "step": 14587 }, { "epoch": 0.6056284671949517, "grad_norm": 2.088791847229004, "learning_rate": 3.553322144682737e-06, "loss": 0.5566, "step": 14588 }, { "epoch": 0.6056699827191631, "grad_norm": 2.067582607269287, "learning_rate": 3.552678599381417e-06, "loss": 0.5678, "step": 14589 }, { "epoch": 0.6057114982433743, "grad_norm": 2.691509962081909, "learning_rate": 3.5520350802480864e-06, "loss": 0.6293, "step": 14590 }, { "epoch": 0.6057530137675857, "grad_norm": 2.400052785873413, "learning_rate": 3.5513915872943788e-06, "loss": 0.6094, "step": 14591 }, { "epoch": 0.605794529291797, "grad_norm": 2.541471481323242, "learning_rate": 3.550748120531929e-06, "loss": 0.4905, "step": 14592 }, { "epoch": 0.6058360448160084, "grad_norm": 2.6783363819122314, "learning_rate": 3.550104679972369e-06, "loss": 0.4317, "step": 14593 }, { "epoch": 0.6058775603402197, "grad_norm": 2.9633848667144775, "learning_rate": 3.549461265627336e-06, "loss": 0.561, "step": 14594 }, { "epoch": 0.6059190758644311, "grad_norm": 2.175091028213501, "learning_rate": 3.5488178775084626e-06, "loss": 0.3698, "step": 14595 }, { "epoch": 0.6059605913886424, "grad_norm": 2.161288261413574, "learning_rate": 3.548174515627378e-06, "loss": 0.5287, "step": 14596 }, { "epoch": 0.6060021069128537, "grad_norm": 1.9660680294036865, "learning_rate": 3.547531179995717e-06, "loss": 0.474, "step": 14597 }, { "epoch": 0.6060436224370651, "grad_norm": 2.5863959789276123, "learning_rate": 3.5468878706251104e-06, "loss": 0.5242, "step": 14598 }, { "epoch": 0.6060851379612764, "grad_norm": 2.513671398162842, "learning_rate": 3.54624458752719e-06, "loss": 0.606, "step": 14599 }, { "epoch": 0.6061266534854878, "grad_norm": 2.184464931488037, "learning_rate": 3.5456013307135884e-06, "loss": 0.5887, "step": 14600 }, { "epoch": 0.606168169009699, "grad_norm": 2.4647955894470215, "learning_rate": 3.5449581001959327e-06, "loss": 0.5808, "step": 14601 }, { "epoch": 0.6062096845339104, "grad_norm": 2.103098154067993, "learning_rate": 3.544314895985853e-06, "loss": 0.575, "step": 14602 }, { "epoch": 0.6062512000581217, "grad_norm": 2.838545322418213, "learning_rate": 3.543671718094981e-06, "loss": 0.4345, "step": 14603 }, { "epoch": 0.6062927155823331, "grad_norm": 2.127293348312378, "learning_rate": 3.5430285665349444e-06, "loss": 0.4138, "step": 14604 }, { "epoch": 0.6063342311065444, "grad_norm": 2.335036039352417, "learning_rate": 3.5423854413173707e-06, "loss": 0.4765, "step": 14605 }, { "epoch": 0.6063757466307558, "grad_norm": 2.221386432647705, "learning_rate": 3.5417423424538877e-06, "loss": 0.4395, "step": 14606 }, { "epoch": 0.606417262154967, "grad_norm": 2.5710089206695557, "learning_rate": 3.5410992699561237e-06, "loss": 0.4132, "step": 14607 }, { "epoch": 0.6064587776791784, "grad_norm": 2.170705556869507, "learning_rate": 3.540456223835707e-06, "loss": 0.5874, "step": 14608 }, { "epoch": 0.6065002932033897, "grad_norm": 2.4148266315460205, "learning_rate": 3.5398132041042605e-06, "loss": 0.5638, "step": 14609 }, { "epoch": 0.6065418087276011, "grad_norm": 2.6987674236297607, "learning_rate": 3.5391702107734123e-06, "loss": 0.5545, "step": 14610 }, { "epoch": 0.6065833242518124, "grad_norm": 1.9804037809371948, "learning_rate": 3.5385272438547874e-06, "loss": 0.5525, "step": 14611 }, { "epoch": 0.6066248397760238, "grad_norm": 2.3442091941833496, "learning_rate": 3.537884303360012e-06, "loss": 0.5429, "step": 14612 }, { "epoch": 0.606666355300235, "grad_norm": 2.4782650470733643, "learning_rate": 3.5372413893007106e-06, "loss": 0.5536, "step": 14613 }, { "epoch": 0.6067078708244464, "grad_norm": 2.3540256023406982, "learning_rate": 3.5365985016885053e-06, "loss": 0.5232, "step": 14614 }, { "epoch": 0.6067493863486577, "grad_norm": 2.459958076477051, "learning_rate": 3.5359556405350204e-06, "loss": 0.4626, "step": 14615 }, { "epoch": 0.6067909018728691, "grad_norm": 2.250005006790161, "learning_rate": 3.535312805851881e-06, "loss": 0.3709, "step": 14616 }, { "epoch": 0.6068324173970804, "grad_norm": 2.6522610187530518, "learning_rate": 3.534669997650708e-06, "loss": 0.6635, "step": 14617 }, { "epoch": 0.6068739329212918, "grad_norm": 1.993748664855957, "learning_rate": 3.5340272159431243e-06, "loss": 0.3976, "step": 14618 }, { "epoch": 0.606915448445503, "grad_norm": 2.4068567752838135, "learning_rate": 3.5333844607407497e-06, "loss": 0.4853, "step": 14619 }, { "epoch": 0.6069569639697144, "grad_norm": 2.6017673015594482, "learning_rate": 3.532741732055206e-06, "loss": 0.5817, "step": 14620 }, { "epoch": 0.6069984794939257, "grad_norm": 2.6384613513946533, "learning_rate": 3.5320990298981177e-06, "loss": 0.432, "step": 14621 }, { "epoch": 0.6070399950181371, "grad_norm": 2.1795847415924072, "learning_rate": 3.5314563542810996e-06, "loss": 0.4339, "step": 14622 }, { "epoch": 0.6070815105423484, "grad_norm": 2.415886640548706, "learning_rate": 3.5308137052157742e-06, "loss": 0.5866, "step": 14623 }, { "epoch": 0.6071230260665598, "grad_norm": 1.9611612558364868, "learning_rate": 3.5301710827137604e-06, "loss": 0.497, "step": 14624 }, { "epoch": 0.607164541590771, "grad_norm": 1.9750609397888184, "learning_rate": 3.5295284867866764e-06, "loss": 0.4667, "step": 14625 }, { "epoch": 0.6072060571149824, "grad_norm": 2.4325597286224365, "learning_rate": 3.5288859174461444e-06, "loss": 0.606, "step": 14626 }, { "epoch": 0.6072475726391938, "grad_norm": 2.0241751670837402, "learning_rate": 3.528243374703776e-06, "loss": 0.5778, "step": 14627 }, { "epoch": 0.6072890881634051, "grad_norm": 2.2518515586853027, "learning_rate": 3.527600858571192e-06, "loss": 0.5223, "step": 14628 }, { "epoch": 0.6073306036876165, "grad_norm": 2.8121683597564697, "learning_rate": 3.5269583690600097e-06, "loss": 0.5824, "step": 14629 }, { "epoch": 0.6073721192118278, "grad_norm": 2.205573320388794, "learning_rate": 3.526315906181845e-06, "loss": 0.5009, "step": 14630 }, { "epoch": 0.6074136347360392, "grad_norm": 2.465837240219116, "learning_rate": 3.525673469948313e-06, "loss": 0.5088, "step": 14631 }, { "epoch": 0.6074551502602504, "grad_norm": 2.819552183151245, "learning_rate": 3.525031060371029e-06, "loss": 0.4527, "step": 14632 }, { "epoch": 0.6074966657844618, "grad_norm": 1.8583496809005737, "learning_rate": 3.524388677461609e-06, "loss": 0.4745, "step": 14633 }, { "epoch": 0.6075381813086731, "grad_norm": 2.920469045639038, "learning_rate": 3.523746321231668e-06, "loss": 0.4935, "step": 14634 }, { "epoch": 0.6075796968328845, "grad_norm": 2.406175136566162, "learning_rate": 3.523103991692818e-06, "loss": 0.5649, "step": 14635 }, { "epoch": 0.6076212123570958, "grad_norm": 2.305866003036499, "learning_rate": 3.5224616888566727e-06, "loss": 0.4686, "step": 14636 }, { "epoch": 0.6076627278813072, "grad_norm": 3.5866034030914307, "learning_rate": 3.521819412734846e-06, "loss": 0.4683, "step": 14637 }, { "epoch": 0.6077042434055184, "grad_norm": 2.47359561920166, "learning_rate": 3.521177163338951e-06, "loss": 0.4815, "step": 14638 }, { "epoch": 0.6077457589297298, "grad_norm": 2.491635799407959, "learning_rate": 3.5205349406806004e-06, "loss": 0.3854, "step": 14639 }, { "epoch": 0.6077872744539411, "grad_norm": 2.104349374771118, "learning_rate": 3.5198927447714034e-06, "loss": 0.5193, "step": 14640 }, { "epoch": 0.6078287899781525, "grad_norm": 2.2760496139526367, "learning_rate": 3.519250575622971e-06, "loss": 0.5127, "step": 14641 }, { "epoch": 0.6078703055023638, "grad_norm": 2.4227821826934814, "learning_rate": 3.5186084332469174e-06, "loss": 0.3469, "step": 14642 }, { "epoch": 0.6079118210265751, "grad_norm": 2.340298652648926, "learning_rate": 3.5179663176548494e-06, "loss": 0.6271, "step": 14643 }, { "epoch": 0.6079533365507864, "grad_norm": 2.432645559310913, "learning_rate": 3.5173242288583777e-06, "loss": 0.4452, "step": 14644 }, { "epoch": 0.6079948520749978, "grad_norm": 2.488464832305908, "learning_rate": 3.5166821668691096e-06, "loss": 0.4482, "step": 14645 }, { "epoch": 0.6080363675992091, "grad_norm": 2.4720566272735596, "learning_rate": 3.5160401316986564e-06, "loss": 0.5723, "step": 14646 }, { "epoch": 0.6080778831234205, "grad_norm": 2.379546642303467, "learning_rate": 3.5153981233586277e-06, "loss": 0.4789, "step": 14647 }, { "epoch": 0.6081193986476318, "grad_norm": 2.069678544998169, "learning_rate": 3.514756141860626e-06, "loss": 0.488, "step": 14648 }, { "epoch": 0.6081609141718431, "grad_norm": 2.209444284439087, "learning_rate": 3.5141141872162613e-06, "loss": 0.5129, "step": 14649 }, { "epoch": 0.6082024296960544, "grad_norm": 2.1639227867126465, "learning_rate": 3.5134722594371408e-06, "loss": 0.4413, "step": 14650 }, { "epoch": 0.6082439452202658, "grad_norm": 2.465579032897949, "learning_rate": 3.51283035853487e-06, "loss": 0.4525, "step": 14651 }, { "epoch": 0.6082854607444771, "grad_norm": 2.4203507900238037, "learning_rate": 3.5121884845210576e-06, "loss": 0.6001, "step": 14652 }, { "epoch": 0.6083269762686885, "grad_norm": 2.556612253189087, "learning_rate": 3.5115466374073037e-06, "loss": 0.5403, "step": 14653 }, { "epoch": 0.6083684917928998, "grad_norm": 2.2501795291900635, "learning_rate": 3.5109048172052163e-06, "loss": 0.4882, "step": 14654 }, { "epoch": 0.6084100073171111, "grad_norm": 2.3940911293029785, "learning_rate": 3.5102630239263986e-06, "loss": 0.5902, "step": 14655 }, { "epoch": 0.6084515228413224, "grad_norm": 3.0546181201934814, "learning_rate": 3.5096212575824556e-06, "loss": 0.6218, "step": 14656 }, { "epoch": 0.6084930383655338, "grad_norm": 2.231444835662842, "learning_rate": 3.5089795181849897e-06, "loss": 0.6299, "step": 14657 }, { "epoch": 0.6085345538897452, "grad_norm": 2.319885730743408, "learning_rate": 3.508337805745603e-06, "loss": 0.4469, "step": 14658 }, { "epoch": 0.6085760694139565, "grad_norm": 3.1615219116210938, "learning_rate": 3.5076961202758988e-06, "loss": 0.4332, "step": 14659 }, { "epoch": 0.6086175849381679, "grad_norm": 2.1210172176361084, "learning_rate": 3.5070544617874807e-06, "loss": 0.6275, "step": 14660 }, { "epoch": 0.6086591004623791, "grad_norm": 2.7599635124206543, "learning_rate": 3.5064128302919466e-06, "loss": 0.5583, "step": 14661 }, { "epoch": 0.6087006159865905, "grad_norm": 2.765106201171875, "learning_rate": 3.5057712258008985e-06, "loss": 0.5173, "step": 14662 }, { "epoch": 0.6087421315108018, "grad_norm": 2.0138611793518066, "learning_rate": 3.505129648325938e-06, "loss": 0.5245, "step": 14663 }, { "epoch": 0.6087836470350132, "grad_norm": 2.783777952194214, "learning_rate": 3.5044880978786633e-06, "loss": 0.7151, "step": 14664 }, { "epoch": 0.6088251625592245, "grad_norm": 2.5159218311309814, "learning_rate": 3.503846574470678e-06, "loss": 0.5147, "step": 14665 }, { "epoch": 0.6088666780834359, "grad_norm": 2.633208751678467, "learning_rate": 3.503205078113575e-06, "loss": 0.4883, "step": 14666 }, { "epoch": 0.6089081936076471, "grad_norm": 2.299630880355835, "learning_rate": 3.5025636088189575e-06, "loss": 0.4171, "step": 14667 }, { "epoch": 0.6089497091318585, "grad_norm": 2.2038445472717285, "learning_rate": 3.5019221665984195e-06, "loss": 0.5276, "step": 14668 }, { "epoch": 0.6089912246560698, "grad_norm": 2.1790895462036133, "learning_rate": 3.501280751463562e-06, "loss": 0.6588, "step": 14669 }, { "epoch": 0.6090327401802812, "grad_norm": 2.4485089778900146, "learning_rate": 3.5006393634259807e-06, "loss": 0.553, "step": 14670 }, { "epoch": 0.6090742557044925, "grad_norm": 1.8889007568359375, "learning_rate": 3.4999980024972703e-06, "loss": 0.4595, "step": 14671 }, { "epoch": 0.6091157712287039, "grad_norm": 2.6215550899505615, "learning_rate": 3.499356668689029e-06, "loss": 0.6382, "step": 14672 }, { "epoch": 0.6091572867529151, "grad_norm": 2.5570461750030518, "learning_rate": 3.4987153620128534e-06, "loss": 0.5103, "step": 14673 }, { "epoch": 0.6091988022771265, "grad_norm": 2.2363269329071045, "learning_rate": 3.498074082480335e-06, "loss": 0.5121, "step": 14674 }, { "epoch": 0.6092403178013378, "grad_norm": 2.5521912574768066, "learning_rate": 3.4974328301030696e-06, "loss": 0.6321, "step": 14675 }, { "epoch": 0.6092818333255492, "grad_norm": 2.212822437286377, "learning_rate": 3.496791604892652e-06, "loss": 0.4821, "step": 14676 }, { "epoch": 0.6093233488497605, "grad_norm": 2.6206023693084717, "learning_rate": 3.4961504068606778e-06, "loss": 0.5288, "step": 14677 }, { "epoch": 0.6093648643739719, "grad_norm": 2.44332218170166, "learning_rate": 3.495509236018735e-06, "loss": 0.4608, "step": 14678 }, { "epoch": 0.6094063798981831, "grad_norm": 2.14148211479187, "learning_rate": 3.4948680923784197e-06, "loss": 0.4258, "step": 14679 }, { "epoch": 0.6094478954223945, "grad_norm": 2.486658811569214, "learning_rate": 3.494226975951323e-06, "loss": 0.5653, "step": 14680 }, { "epoch": 0.6094894109466058, "grad_norm": 1.9796799421310425, "learning_rate": 3.4935858867490365e-06, "loss": 0.423, "step": 14681 }, { "epoch": 0.6095309264708172, "grad_norm": 2.1520533561706543, "learning_rate": 3.4929448247831523e-06, "loss": 0.4434, "step": 14682 }, { "epoch": 0.6095724419950285, "grad_norm": 2.8569936752319336, "learning_rate": 3.4923037900652596e-06, "loss": 0.508, "step": 14683 }, { "epoch": 0.6096139575192399, "grad_norm": 2.5558743476867676, "learning_rate": 3.491662782606948e-06, "loss": 0.5159, "step": 14684 }, { "epoch": 0.6096554730434511, "grad_norm": 2.3398759365081787, "learning_rate": 3.4910218024198084e-06, "loss": 0.6513, "step": 14685 }, { "epoch": 0.6096969885676625, "grad_norm": 2.17596173286438, "learning_rate": 3.4903808495154314e-06, "loss": 0.3995, "step": 14686 }, { "epoch": 0.6097385040918738, "grad_norm": 2.3968665599823, "learning_rate": 3.4897399239054015e-06, "loss": 0.5336, "step": 14687 }, { "epoch": 0.6097800196160852, "grad_norm": 2.573220729827881, "learning_rate": 3.489099025601309e-06, "loss": 0.4523, "step": 14688 }, { "epoch": 0.6098215351402966, "grad_norm": 2.7150518894195557, "learning_rate": 3.4884581546147424e-06, "loss": 0.5938, "step": 14689 }, { "epoch": 0.6098630506645079, "grad_norm": 2.365987539291382, "learning_rate": 3.4878173109572893e-06, "loss": 0.4946, "step": 14690 }, { "epoch": 0.6099045661887192, "grad_norm": 2.8443703651428223, "learning_rate": 3.487176494640533e-06, "loss": 0.5298, "step": 14691 }, { "epoch": 0.6099460817129305, "grad_norm": 2.3326950073242188, "learning_rate": 3.4865357056760617e-06, "loss": 0.3825, "step": 14692 }, { "epoch": 0.6099875972371419, "grad_norm": 2.796194314956665, "learning_rate": 3.485894944075463e-06, "loss": 0.475, "step": 14693 }, { "epoch": 0.6100291127613532, "grad_norm": 2.6299588680267334, "learning_rate": 3.4852542098503183e-06, "loss": 0.4504, "step": 14694 }, { "epoch": 0.6100706282855646, "grad_norm": 2.9591546058654785, "learning_rate": 3.484613503012215e-06, "loss": 0.5183, "step": 14695 }, { "epoch": 0.6101121438097759, "grad_norm": 2.409203290939331, "learning_rate": 3.4839728235727372e-06, "loss": 0.5382, "step": 14696 }, { "epoch": 0.6101536593339872, "grad_norm": 2.3859457969665527, "learning_rate": 3.4833321715434657e-06, "loss": 0.5221, "step": 14697 }, { "epoch": 0.6101951748581985, "grad_norm": 3.5350773334503174, "learning_rate": 3.482691546935987e-06, "loss": 0.6522, "step": 14698 }, { "epoch": 0.6102366903824099, "grad_norm": 2.28193736076355, "learning_rate": 3.482050949761883e-06, "loss": 0.4831, "step": 14699 }, { "epoch": 0.6102782059066212, "grad_norm": 3.3706257343292236, "learning_rate": 3.481410380032734e-06, "loss": 0.6332, "step": 14700 }, { "epoch": 0.6103197214308326, "grad_norm": 2.7946670055389404, "learning_rate": 3.480769837760123e-06, "loss": 0.5478, "step": 14701 }, { "epoch": 0.6103612369550439, "grad_norm": 2.503838300704956, "learning_rate": 3.480129322955631e-06, "loss": 0.4293, "step": 14702 }, { "epoch": 0.6104027524792552, "grad_norm": 2.2592923641204834, "learning_rate": 3.4794888356308415e-06, "loss": 0.5928, "step": 14703 }, { "epoch": 0.6104442680034665, "grad_norm": 2.737501859664917, "learning_rate": 3.4788483757973298e-06, "loss": 0.5116, "step": 14704 }, { "epoch": 0.6104857835276779, "grad_norm": 2.3701047897338867, "learning_rate": 3.4782079434666776e-06, "loss": 0.6133, "step": 14705 }, { "epoch": 0.6105272990518892, "grad_norm": 2.607074499130249, "learning_rate": 3.477567538650466e-06, "loss": 0.5342, "step": 14706 }, { "epoch": 0.6105688145761006, "grad_norm": 2.5528619289398193, "learning_rate": 3.4769271613602705e-06, "loss": 0.5064, "step": 14707 }, { "epoch": 0.6106103301003118, "grad_norm": 2.188999891281128, "learning_rate": 3.4762868116076722e-06, "loss": 0.3902, "step": 14708 }, { "epoch": 0.6106518456245232, "grad_norm": 2.3256688117980957, "learning_rate": 3.4756464894042475e-06, "loss": 0.5122, "step": 14709 }, { "epoch": 0.6106933611487345, "grad_norm": 2.357696771621704, "learning_rate": 3.475006194761573e-06, "loss": 0.4577, "step": 14710 }, { "epoch": 0.6107348766729459, "grad_norm": 2.3910984992980957, "learning_rate": 3.4743659276912258e-06, "loss": 0.4901, "step": 14711 }, { "epoch": 0.6107763921971572, "grad_norm": 2.4029574394226074, "learning_rate": 3.4737256882047837e-06, "loss": 0.455, "step": 14712 }, { "epoch": 0.6108179077213686, "grad_norm": 2.346043825149536, "learning_rate": 3.47308547631382e-06, "loss": 0.5643, "step": 14713 }, { "epoch": 0.6108594232455798, "grad_norm": 2.5178890228271484, "learning_rate": 3.47244529202991e-06, "loss": 0.5984, "step": 14714 }, { "epoch": 0.6109009387697912, "grad_norm": 2.1992032527923584, "learning_rate": 3.4718051353646304e-06, "loss": 0.5915, "step": 14715 }, { "epoch": 0.6109424542940025, "grad_norm": 1.8133138418197632, "learning_rate": 3.471165006329556e-06, "loss": 0.5576, "step": 14716 }, { "epoch": 0.6109839698182139, "grad_norm": 2.646320104598999, "learning_rate": 3.470524904936257e-06, "loss": 0.5296, "step": 14717 }, { "epoch": 0.6110254853424252, "grad_norm": 2.80157208442688, "learning_rate": 3.4698848311963085e-06, "loss": 0.4768, "step": 14718 }, { "epoch": 0.6110670008666366, "grad_norm": 2.44974946975708, "learning_rate": 3.4692447851212844e-06, "loss": 0.4078, "step": 14719 }, { "epoch": 0.611108516390848, "grad_norm": 2.2693285942077637, "learning_rate": 3.4686047667227553e-06, "loss": 0.6643, "step": 14720 }, { "epoch": 0.6111500319150592, "grad_norm": 2.105090379714966, "learning_rate": 3.4679647760122936e-06, "loss": 0.481, "step": 14721 }, { "epoch": 0.6111915474392706, "grad_norm": 2.397780179977417, "learning_rate": 3.4673248130014704e-06, "loss": 0.5053, "step": 14722 }, { "epoch": 0.6112330629634819, "grad_norm": 2.719095468521118, "learning_rate": 3.4666848777018556e-06, "loss": 0.4372, "step": 14723 }, { "epoch": 0.6112745784876933, "grad_norm": 2.2631399631500244, "learning_rate": 3.4660449701250197e-06, "loss": 0.5053, "step": 14724 }, { "epoch": 0.6113160940119046, "grad_norm": 2.1919169425964355, "learning_rate": 3.4654050902825354e-06, "loss": 0.4685, "step": 14725 }, { "epoch": 0.611357609536116, "grad_norm": 2.601860523223877, "learning_rate": 3.4647652381859675e-06, "loss": 0.5751, "step": 14726 }, { "epoch": 0.6113991250603272, "grad_norm": 2.4475271701812744, "learning_rate": 3.464125413846886e-06, "loss": 0.5362, "step": 14727 }, { "epoch": 0.6114406405845386, "grad_norm": 2.698499917984009, "learning_rate": 3.4634856172768605e-06, "loss": 0.5774, "step": 14728 }, { "epoch": 0.6114821561087499, "grad_norm": 2.6056411266326904, "learning_rate": 3.4628458484874594e-06, "loss": 0.5548, "step": 14729 }, { "epoch": 0.6115236716329613, "grad_norm": 2.7373697757720947, "learning_rate": 3.4622061074902465e-06, "loss": 0.5567, "step": 14730 }, { "epoch": 0.6115651871571726, "grad_norm": 1.7762337923049927, "learning_rate": 3.4615663942967905e-06, "loss": 0.4847, "step": 14731 }, { "epoch": 0.611606702681384, "grad_norm": 2.3581173419952393, "learning_rate": 3.460926708918658e-06, "loss": 0.5238, "step": 14732 }, { "epoch": 0.6116482182055952, "grad_norm": 2.1052045822143555, "learning_rate": 3.4602870513674135e-06, "loss": 0.47, "step": 14733 }, { "epoch": 0.6116897337298066, "grad_norm": 2.479482889175415, "learning_rate": 3.4596474216546246e-06, "loss": 0.5035, "step": 14734 }, { "epoch": 0.6117312492540179, "grad_norm": 2.410475015640259, "learning_rate": 3.4590078197918538e-06, "loss": 0.3579, "step": 14735 }, { "epoch": 0.6117727647782293, "grad_norm": 2.007256031036377, "learning_rate": 3.4583682457906643e-06, "loss": 0.332, "step": 14736 }, { "epoch": 0.6118142803024406, "grad_norm": 1.913957953453064, "learning_rate": 3.4577286996626216e-06, "loss": 0.4452, "step": 14737 }, { "epoch": 0.611855795826652, "grad_norm": 2.3663206100463867, "learning_rate": 3.457089181419291e-06, "loss": 0.6169, "step": 14738 }, { "epoch": 0.6118973113508632, "grad_norm": 2.2541613578796387, "learning_rate": 3.45644969107223e-06, "loss": 0.5403, "step": 14739 }, { "epoch": 0.6119388268750746, "grad_norm": 2.269212245941162, "learning_rate": 3.455810228633004e-06, "loss": 0.5666, "step": 14740 }, { "epoch": 0.6119803423992859, "grad_norm": 2.6019420623779297, "learning_rate": 3.455170794113174e-06, "loss": 0.5026, "step": 14741 }, { "epoch": 0.6120218579234973, "grad_norm": 2.3983943462371826, "learning_rate": 3.454531387524303e-06, "loss": 0.4106, "step": 14742 }, { "epoch": 0.6120633734477086, "grad_norm": 2.699833869934082, "learning_rate": 3.4538920088779483e-06, "loss": 0.6857, "step": 14743 }, { "epoch": 0.61210488897192, "grad_norm": 2.1763830184936523, "learning_rate": 3.4532526581856718e-06, "loss": 0.4638, "step": 14744 }, { "epoch": 0.6121464044961312, "grad_norm": 2.128040075302124, "learning_rate": 3.4526133354590342e-06, "loss": 0.3188, "step": 14745 }, { "epoch": 0.6121879200203426, "grad_norm": 2.465270757675171, "learning_rate": 3.4519740407095913e-06, "loss": 0.5236, "step": 14746 }, { "epoch": 0.6122294355445539, "grad_norm": 1.9392157793045044, "learning_rate": 3.451334773948908e-06, "loss": 0.4231, "step": 14747 }, { "epoch": 0.6122709510687653, "grad_norm": 2.3863158226013184, "learning_rate": 3.4506955351885346e-06, "loss": 0.5511, "step": 14748 }, { "epoch": 0.6123124665929766, "grad_norm": 2.7866051197052, "learning_rate": 3.450056324440033e-06, "loss": 0.5879, "step": 14749 }, { "epoch": 0.612353982117188, "grad_norm": 3.296412229537964, "learning_rate": 3.4494171417149604e-06, "loss": 0.3493, "step": 14750 }, { "epoch": 0.6123954976413993, "grad_norm": 1.8898334503173828, "learning_rate": 3.4487779870248738e-06, "loss": 0.3939, "step": 14751 }, { "epoch": 0.6124370131656106, "grad_norm": 2.187741994857788, "learning_rate": 3.448138860381327e-06, "loss": 0.4812, "step": 14752 }, { "epoch": 0.612478528689822, "grad_norm": 2.384890079498291, "learning_rate": 3.4474997617958763e-06, "loss": 0.5218, "step": 14753 }, { "epoch": 0.6125200442140333, "grad_norm": 2.1980416774749756, "learning_rate": 3.446860691280078e-06, "loss": 0.5199, "step": 14754 }, { "epoch": 0.6125615597382447, "grad_norm": 2.2802398204803467, "learning_rate": 3.446221648845488e-06, "loss": 0.4822, "step": 14755 }, { "epoch": 0.6126030752624559, "grad_norm": 2.0856616497039795, "learning_rate": 3.445582634503656e-06, "loss": 0.5597, "step": 14756 }, { "epoch": 0.6126445907866673, "grad_norm": 2.701172113418579, "learning_rate": 3.444943648266138e-06, "loss": 0.5198, "step": 14757 }, { "epoch": 0.6126861063108786, "grad_norm": 2.356423854827881, "learning_rate": 3.4443046901444886e-06, "loss": 0.6125, "step": 14758 }, { "epoch": 0.61272762183509, "grad_norm": 2.038774251937866, "learning_rate": 3.4436657601502578e-06, "loss": 0.5011, "step": 14759 }, { "epoch": 0.6127691373593013, "grad_norm": 2.371941328048706, "learning_rate": 3.443026858295e-06, "loss": 0.4971, "step": 14760 }, { "epoch": 0.6128106528835127, "grad_norm": 2.3921899795532227, "learning_rate": 3.442387984590264e-06, "loss": 0.4736, "step": 14761 }, { "epoch": 0.6128521684077239, "grad_norm": 3.939973831176758, "learning_rate": 3.441749139047602e-06, "loss": 0.5196, "step": 14762 }, { "epoch": 0.6128936839319353, "grad_norm": 2.4520184993743896, "learning_rate": 3.4411103216785645e-06, "loss": 0.5379, "step": 14763 }, { "epoch": 0.6129351994561466, "grad_norm": 2.5604825019836426, "learning_rate": 3.4404715324947037e-06, "loss": 0.5206, "step": 14764 }, { "epoch": 0.612976714980358, "grad_norm": 2.2458653450012207, "learning_rate": 3.439832771507565e-06, "loss": 0.3588, "step": 14765 }, { "epoch": 0.6130182305045693, "grad_norm": 2.237586259841919, "learning_rate": 3.4391940387287005e-06, "loss": 0.4724, "step": 14766 }, { "epoch": 0.6130597460287807, "grad_norm": 2.302046775817871, "learning_rate": 3.4385553341696564e-06, "loss": 0.4914, "step": 14767 }, { "epoch": 0.6131012615529919, "grad_norm": 2.4403178691864014, "learning_rate": 3.437916657841984e-06, "loss": 0.562, "step": 14768 }, { "epoch": 0.6131427770772033, "grad_norm": 2.059856653213501, "learning_rate": 3.437278009757228e-06, "loss": 0.4759, "step": 14769 }, { "epoch": 0.6131842926014146, "grad_norm": 2.319911479949951, "learning_rate": 3.4366393899269347e-06, "loss": 0.4631, "step": 14770 }, { "epoch": 0.613225808125626, "grad_norm": 2.2745182514190674, "learning_rate": 3.4360007983626533e-06, "loss": 0.3947, "step": 14771 }, { "epoch": 0.6132673236498373, "grad_norm": 2.170316219329834, "learning_rate": 3.435362235075927e-06, "loss": 0.5082, "step": 14772 }, { "epoch": 0.6133088391740487, "grad_norm": 2.200631856918335, "learning_rate": 3.4347237000783047e-06, "loss": 0.479, "step": 14773 }, { "epoch": 0.6133503546982599, "grad_norm": 1.856735348701477, "learning_rate": 3.434085193381327e-06, "loss": 0.4678, "step": 14774 }, { "epoch": 0.6133918702224713, "grad_norm": 3.075897216796875, "learning_rate": 3.43344671499654e-06, "loss": 0.5145, "step": 14775 }, { "epoch": 0.6134333857466826, "grad_norm": 2.099184036254883, "learning_rate": 3.432808264935488e-06, "loss": 0.339, "step": 14776 }, { "epoch": 0.613474901270894, "grad_norm": 2.6758084297180176, "learning_rate": 3.4321698432097172e-06, "loss": 0.4357, "step": 14777 }, { "epoch": 0.6135164167951053, "grad_norm": 2.3165578842163086, "learning_rate": 3.4315314498307646e-06, "loss": 0.5739, "step": 14778 }, { "epoch": 0.6135579323193167, "grad_norm": 2.467266321182251, "learning_rate": 3.4308930848101755e-06, "loss": 0.5514, "step": 14779 }, { "epoch": 0.6135994478435279, "grad_norm": 1.8984191417694092, "learning_rate": 3.4302547481594922e-06, "loss": 0.5579, "step": 14780 }, { "epoch": 0.6136409633677393, "grad_norm": 2.6785898208618164, "learning_rate": 3.4296164398902576e-06, "loss": 0.63, "step": 14781 }, { "epoch": 0.6136824788919507, "grad_norm": 2.5514285564422607, "learning_rate": 3.4289781600140075e-06, "loss": 0.393, "step": 14782 }, { "epoch": 0.613723994416162, "grad_norm": 2.360919952392578, "learning_rate": 3.428339908542286e-06, "loss": 0.4697, "step": 14783 }, { "epoch": 0.6137655099403734, "grad_norm": 2.2521443367004395, "learning_rate": 3.4277016854866326e-06, "loss": 0.4934, "step": 14784 }, { "epoch": 0.6138070254645847, "grad_norm": 2.4952175617218018, "learning_rate": 3.4270634908585845e-06, "loss": 0.4172, "step": 14785 }, { "epoch": 0.613848540988796, "grad_norm": 2.045994997024536, "learning_rate": 3.4264253246696845e-06, "loss": 0.5086, "step": 14786 }, { "epoch": 0.6138900565130073, "grad_norm": 2.4241487979888916, "learning_rate": 3.425787186931466e-06, "loss": 0.5178, "step": 14787 }, { "epoch": 0.6139315720372187, "grad_norm": 2.2134602069854736, "learning_rate": 3.4251490776554685e-06, "loss": 0.4924, "step": 14788 }, { "epoch": 0.61397308756143, "grad_norm": 2.519190788269043, "learning_rate": 3.42451099685323e-06, "loss": 0.5501, "step": 14789 }, { "epoch": 0.6140146030856414, "grad_norm": 2.5282723903656006, "learning_rate": 3.4238729445362884e-06, "loss": 0.6232, "step": 14790 }, { "epoch": 0.6140561186098527, "grad_norm": 2.2191600799560547, "learning_rate": 3.423234920716177e-06, "loss": 0.518, "step": 14791 }, { "epoch": 0.614097634134064, "grad_norm": 1.9428074359893799, "learning_rate": 3.422596925404432e-06, "loss": 0.4359, "step": 14792 }, { "epoch": 0.6141391496582753, "grad_norm": 2.416048765182495, "learning_rate": 3.4219589586125895e-06, "loss": 0.5341, "step": 14793 }, { "epoch": 0.6141806651824867, "grad_norm": 2.043602705001831, "learning_rate": 3.4213210203521863e-06, "loss": 0.4123, "step": 14794 }, { "epoch": 0.614222180706698, "grad_norm": 2.818251609802246, "learning_rate": 3.4206831106347517e-06, "loss": 0.5811, "step": 14795 }, { "epoch": 0.6142636962309094, "grad_norm": 2.189490795135498, "learning_rate": 3.420045229471821e-06, "loss": 0.5193, "step": 14796 }, { "epoch": 0.6143052117551207, "grad_norm": 2.183950424194336, "learning_rate": 3.41940737687493e-06, "loss": 0.5757, "step": 14797 }, { "epoch": 0.614346727279332, "grad_norm": 2.8413877487182617, "learning_rate": 3.4187695528556073e-06, "loss": 0.591, "step": 14798 }, { "epoch": 0.6143882428035433, "grad_norm": 2.459543466567993, "learning_rate": 3.418131757425389e-06, "loss": 0.6052, "step": 14799 }, { "epoch": 0.6144297583277547, "grad_norm": 2.0445334911346436, "learning_rate": 3.4174939905958033e-06, "loss": 0.571, "step": 14800 }, { "epoch": 0.614471273851966, "grad_norm": 2.704746961593628, "learning_rate": 3.4168562523783823e-06, "loss": 0.5645, "step": 14801 }, { "epoch": 0.6145127893761774, "grad_norm": 2.181441068649292, "learning_rate": 3.4162185427846555e-06, "loss": 0.5097, "step": 14802 }, { "epoch": 0.6145543049003886, "grad_norm": 2.3458266258239746, "learning_rate": 3.4155808618261564e-06, "loss": 0.6655, "step": 14803 }, { "epoch": 0.6145958204246, "grad_norm": 2.597567319869995, "learning_rate": 3.4149432095144108e-06, "loss": 0.7381, "step": 14804 }, { "epoch": 0.6146373359488113, "grad_norm": 2.1971466541290283, "learning_rate": 3.4143055858609475e-06, "loss": 0.6184, "step": 14805 }, { "epoch": 0.6146788514730227, "grad_norm": 2.130462646484375, "learning_rate": 3.413667990877297e-06, "loss": 0.4908, "step": 14806 }, { "epoch": 0.614720366997234, "grad_norm": 2.6363351345062256, "learning_rate": 3.413030424574989e-06, "loss": 0.6228, "step": 14807 }, { "epoch": 0.6147618825214454, "grad_norm": 1.6985970735549927, "learning_rate": 3.4123928869655455e-06, "loss": 0.4982, "step": 14808 }, { "epoch": 0.6148033980456566, "grad_norm": 2.1504290103912354, "learning_rate": 3.411755378060496e-06, "loss": 0.5775, "step": 14809 }, { "epoch": 0.614844913569868, "grad_norm": 2.025550603866577, "learning_rate": 3.411117897871369e-06, "loss": 0.4076, "step": 14810 }, { "epoch": 0.6148864290940793, "grad_norm": 2.130340099334717, "learning_rate": 3.4104804464096863e-06, "loss": 0.4864, "step": 14811 }, { "epoch": 0.6149279446182907, "grad_norm": 2.4767167568206787, "learning_rate": 3.4098430236869783e-06, "loss": 0.5037, "step": 14812 }, { "epoch": 0.6149694601425021, "grad_norm": 2.3794186115264893, "learning_rate": 3.409205629714765e-06, "loss": 0.6275, "step": 14813 }, { "epoch": 0.6150109756667134, "grad_norm": 2.4712321758270264, "learning_rate": 3.408568264504571e-06, "loss": 0.4493, "step": 14814 }, { "epoch": 0.6150524911909248, "grad_norm": 1.857805609703064, "learning_rate": 3.407930928067922e-06, "loss": 0.3958, "step": 14815 }, { "epoch": 0.615094006715136, "grad_norm": 2.691300868988037, "learning_rate": 3.407293620416343e-06, "loss": 0.5459, "step": 14816 }, { "epoch": 0.6151355222393474, "grad_norm": 2.744593858718872, "learning_rate": 3.4066563415613523e-06, "loss": 0.5282, "step": 14817 }, { "epoch": 0.6151770377635587, "grad_norm": 2.2129945755004883, "learning_rate": 3.4060190915144742e-06, "loss": 0.6526, "step": 14818 }, { "epoch": 0.6152185532877701, "grad_norm": 2.9972729682922363, "learning_rate": 3.4053818702872303e-06, "loss": 0.5856, "step": 14819 }, { "epoch": 0.6152600688119814, "grad_norm": 2.4333059787750244, "learning_rate": 3.4047446778911443e-06, "loss": 0.4657, "step": 14820 }, { "epoch": 0.6153015843361928, "grad_norm": 2.421055793762207, "learning_rate": 3.4041075143377316e-06, "loss": 0.4441, "step": 14821 }, { "epoch": 0.615343099860404, "grad_norm": 2.1337804794311523, "learning_rate": 3.4034703796385154e-06, "loss": 0.563, "step": 14822 }, { "epoch": 0.6153846153846154, "grad_norm": 2.221283197402954, "learning_rate": 3.4028332738050156e-06, "loss": 0.464, "step": 14823 }, { "epoch": 0.6154261309088267, "grad_norm": 2.5951249599456787, "learning_rate": 3.402196196848751e-06, "loss": 0.4851, "step": 14824 }, { "epoch": 0.6154676464330381, "grad_norm": 2.3426012992858887, "learning_rate": 3.4015591487812395e-06, "loss": 0.4545, "step": 14825 }, { "epoch": 0.6155091619572494, "grad_norm": 2.830399513244629, "learning_rate": 3.400922129613998e-06, "loss": 0.5944, "step": 14826 }, { "epoch": 0.6155506774814608, "grad_norm": 3.053133487701416, "learning_rate": 3.4002851393585455e-06, "loss": 0.5014, "step": 14827 }, { "epoch": 0.615592193005672, "grad_norm": 2.3781075477600098, "learning_rate": 3.399648178026399e-06, "loss": 0.5901, "step": 14828 }, { "epoch": 0.6156337085298834, "grad_norm": 2.459235191345215, "learning_rate": 3.399011245629077e-06, "loss": 0.5302, "step": 14829 }, { "epoch": 0.6156752240540947, "grad_norm": 2.5353357791900635, "learning_rate": 3.3983743421780906e-06, "loss": 0.5167, "step": 14830 }, { "epoch": 0.6157167395783061, "grad_norm": 2.3733789920806885, "learning_rate": 3.397737467684958e-06, "loss": 0.5566, "step": 14831 }, { "epoch": 0.6157582551025174, "grad_norm": 2.2415616512298584, "learning_rate": 3.3971006221611933e-06, "loss": 0.4333, "step": 14832 }, { "epoch": 0.6157997706267287, "grad_norm": 2.516173839569092, "learning_rate": 3.396463805618314e-06, "loss": 0.4892, "step": 14833 }, { "epoch": 0.61584128615094, "grad_norm": 2.4059994220733643, "learning_rate": 3.395827018067829e-06, "loss": 0.4087, "step": 14834 }, { "epoch": 0.6158828016751514, "grad_norm": 2.0471832752227783, "learning_rate": 3.395190259521254e-06, "loss": 0.4501, "step": 14835 }, { "epoch": 0.6159243171993627, "grad_norm": 2.4271786212921143, "learning_rate": 3.3945535299901023e-06, "loss": 0.5552, "step": 14836 }, { "epoch": 0.6159658327235741, "grad_norm": 2.12469482421875, "learning_rate": 3.393916829485886e-06, "loss": 0.587, "step": 14837 }, { "epoch": 0.6160073482477854, "grad_norm": 2.1572232246398926, "learning_rate": 3.3932801580201158e-06, "loss": 0.4571, "step": 14838 }, { "epoch": 0.6160488637719967, "grad_norm": 2.1533591747283936, "learning_rate": 3.392643515604303e-06, "loss": 0.5174, "step": 14839 }, { "epoch": 0.616090379296208, "grad_norm": 2.7170157432556152, "learning_rate": 3.392006902249958e-06, "loss": 0.6841, "step": 14840 }, { "epoch": 0.6161318948204194, "grad_norm": 2.3718655109405518, "learning_rate": 3.3913703179685924e-06, "loss": 0.4307, "step": 14841 }, { "epoch": 0.6161734103446308, "grad_norm": 2.398535966873169, "learning_rate": 3.3907337627717176e-06, "loss": 0.5417, "step": 14842 }, { "epoch": 0.6162149258688421, "grad_norm": 2.904463291168213, "learning_rate": 3.3900972366708375e-06, "loss": 0.6035, "step": 14843 }, { "epoch": 0.6162564413930535, "grad_norm": 2.8459479808807373, "learning_rate": 3.3894607396774628e-06, "loss": 0.5104, "step": 14844 }, { "epoch": 0.6162979569172647, "grad_norm": 2.529045820236206, "learning_rate": 3.388824271803103e-06, "loss": 0.4957, "step": 14845 }, { "epoch": 0.6163394724414761, "grad_norm": 2.5138497352600098, "learning_rate": 3.3881878330592667e-06, "loss": 0.5783, "step": 14846 }, { "epoch": 0.6163809879656874, "grad_norm": 2.2434351444244385, "learning_rate": 3.387551423457456e-06, "loss": 0.6783, "step": 14847 }, { "epoch": 0.6164225034898988, "grad_norm": 2.466555118560791, "learning_rate": 3.386915043009181e-06, "loss": 0.6295, "step": 14848 }, { "epoch": 0.6164640190141101, "grad_norm": 2.3096070289611816, "learning_rate": 3.3862786917259483e-06, "loss": 0.6206, "step": 14849 }, { "epoch": 0.6165055345383215, "grad_norm": 2.199535608291626, "learning_rate": 3.385642369619262e-06, "loss": 0.4957, "step": 14850 }, { "epoch": 0.6165470500625327, "grad_norm": 2.2431440353393555, "learning_rate": 3.3850060767006265e-06, "loss": 0.5968, "step": 14851 }, { "epoch": 0.6165885655867441, "grad_norm": 2.469580888748169, "learning_rate": 3.3843698129815455e-06, "loss": 0.4072, "step": 14852 }, { "epoch": 0.6166300811109554, "grad_norm": 2.442257881164551, "learning_rate": 3.3837335784735244e-06, "loss": 0.5736, "step": 14853 }, { "epoch": 0.6166715966351668, "grad_norm": 2.163849353790283, "learning_rate": 3.383097373188066e-06, "loss": 0.5979, "step": 14854 }, { "epoch": 0.6167131121593781, "grad_norm": 2.7510921955108643, "learning_rate": 3.382461197136675e-06, "loss": 0.4747, "step": 14855 }, { "epoch": 0.6167546276835895, "grad_norm": 2.3054380416870117, "learning_rate": 3.3818250503308494e-06, "loss": 0.5076, "step": 14856 }, { "epoch": 0.6167961432078007, "grad_norm": 2.7814440727233887, "learning_rate": 3.3811889327820935e-06, "loss": 0.7097, "step": 14857 }, { "epoch": 0.6168376587320121, "grad_norm": 2.672867774963379, "learning_rate": 3.380552844501909e-06, "loss": 0.6143, "step": 14858 }, { "epoch": 0.6168791742562234, "grad_norm": 2.9793546199798584, "learning_rate": 3.3799167855017974e-06, "loss": 0.5061, "step": 14859 }, { "epoch": 0.6169206897804348, "grad_norm": 1.8195034265518188, "learning_rate": 3.379280755793255e-06, "loss": 0.3842, "step": 14860 }, { "epoch": 0.6169622053046461, "grad_norm": 2.2067532539367676, "learning_rate": 3.3786447553877848e-06, "loss": 0.4377, "step": 14861 }, { "epoch": 0.6170037208288575, "grad_norm": 2.339190721511841, "learning_rate": 3.378008784296884e-06, "loss": 0.4605, "step": 14862 }, { "epoch": 0.6170452363530687, "grad_norm": 2.289069890975952, "learning_rate": 3.3773728425320527e-06, "loss": 0.6398, "step": 14863 }, { "epoch": 0.6170867518772801, "grad_norm": 2.57517671585083, "learning_rate": 3.3767369301047883e-06, "loss": 0.5433, "step": 14864 }, { "epoch": 0.6171282674014914, "grad_norm": 2.7502903938293457, "learning_rate": 3.3761010470265865e-06, "loss": 0.39, "step": 14865 }, { "epoch": 0.6171697829257028, "grad_norm": 3.027696132659912, "learning_rate": 3.3754651933089468e-06, "loss": 0.5212, "step": 14866 }, { "epoch": 0.6172112984499141, "grad_norm": 1.8906854391098022, "learning_rate": 3.3748293689633637e-06, "loss": 0.4369, "step": 14867 }, { "epoch": 0.6172528139741255, "grad_norm": 2.324432611465454, "learning_rate": 3.3741935740013366e-06, "loss": 0.6365, "step": 14868 }, { "epoch": 0.6172943294983367, "grad_norm": 2.2893669605255127, "learning_rate": 3.3735578084343556e-06, "loss": 0.4205, "step": 14869 }, { "epoch": 0.6173358450225481, "grad_norm": 2.187495708465576, "learning_rate": 3.3729220722739185e-06, "loss": 0.5507, "step": 14870 }, { "epoch": 0.6173773605467594, "grad_norm": 2.7311131954193115, "learning_rate": 3.37228636553152e-06, "loss": 0.6983, "step": 14871 }, { "epoch": 0.6174188760709708, "grad_norm": 2.3821334838867188, "learning_rate": 3.371650688218655e-06, "loss": 0.4431, "step": 14872 }, { "epoch": 0.6174603915951822, "grad_norm": 2.461655855178833, "learning_rate": 3.371015040346812e-06, "loss": 0.4914, "step": 14873 }, { "epoch": 0.6175019071193935, "grad_norm": 2.1241934299468994, "learning_rate": 3.370379421927489e-06, "loss": 0.6757, "step": 14874 }, { "epoch": 0.6175434226436048, "grad_norm": 2.5959343910217285, "learning_rate": 3.369743832972173e-06, "loss": 0.5369, "step": 14875 }, { "epoch": 0.6175849381678161, "grad_norm": 2.7891030311584473, "learning_rate": 3.3691082734923607e-06, "loss": 0.5489, "step": 14876 }, { "epoch": 0.6176264536920275, "grad_norm": 2.7903170585632324, "learning_rate": 3.36847274349954e-06, "loss": 0.5409, "step": 14877 }, { "epoch": 0.6176679692162388, "grad_norm": 2.293454170227051, "learning_rate": 3.367837243005202e-06, "loss": 0.3836, "step": 14878 }, { "epoch": 0.6177094847404502, "grad_norm": 2.077618360519409, "learning_rate": 3.3672017720208372e-06, "loss": 0.4507, "step": 14879 }, { "epoch": 0.6177510002646615, "grad_norm": 2.130742073059082, "learning_rate": 3.366566330557935e-06, "loss": 0.6199, "step": 14880 }, { "epoch": 0.6177925157888728, "grad_norm": 2.748483419418335, "learning_rate": 3.365930918627986e-06, "loss": 0.4281, "step": 14881 }, { "epoch": 0.6178340313130841, "grad_norm": 2.688582181930542, "learning_rate": 3.365295536242475e-06, "loss": 0.5543, "step": 14882 }, { "epoch": 0.6178755468372955, "grad_norm": 2.5570437908172607, "learning_rate": 3.3646601834128924e-06, "loss": 0.5118, "step": 14883 }, { "epoch": 0.6179170623615068, "grad_norm": 2.0278661251068115, "learning_rate": 3.3640248601507243e-06, "loss": 0.4443, "step": 14884 }, { "epoch": 0.6179585778857182, "grad_norm": 2.415044069290161, "learning_rate": 3.3633895664674604e-06, "loss": 0.5568, "step": 14885 }, { "epoch": 0.6180000934099295, "grad_norm": 2.271977424621582, "learning_rate": 3.362754302374583e-06, "loss": 0.5946, "step": 14886 }, { "epoch": 0.6180416089341408, "grad_norm": 2.93790864944458, "learning_rate": 3.362119067883581e-06, "loss": 0.5353, "step": 14887 }, { "epoch": 0.6180831244583521, "grad_norm": 1.9389476776123047, "learning_rate": 3.3614838630059364e-06, "loss": 0.4801, "step": 14888 }, { "epoch": 0.6181246399825635, "grad_norm": 2.582932710647583, "learning_rate": 3.360848687753138e-06, "loss": 0.5015, "step": 14889 }, { "epoch": 0.6181661555067748, "grad_norm": 2.011457681655884, "learning_rate": 3.3602135421366673e-06, "loss": 0.4813, "step": 14890 }, { "epoch": 0.6182076710309862, "grad_norm": 2.2812726497650146, "learning_rate": 3.3595784261680076e-06, "loss": 0.5069, "step": 14891 }, { "epoch": 0.6182491865551974, "grad_norm": 2.0812103748321533, "learning_rate": 3.3589433398586423e-06, "loss": 0.5042, "step": 14892 }, { "epoch": 0.6182907020794088, "grad_norm": 2.3517560958862305, "learning_rate": 3.3583082832200543e-06, "loss": 0.4716, "step": 14893 }, { "epoch": 0.6183322176036201, "grad_norm": 2.490140438079834, "learning_rate": 3.3576732562637284e-06, "loss": 0.5367, "step": 14894 }, { "epoch": 0.6183737331278315, "grad_norm": 2.791473150253296, "learning_rate": 3.3570382590011407e-06, "loss": 0.6039, "step": 14895 }, { "epoch": 0.6184152486520428, "grad_norm": 2.3044614791870117, "learning_rate": 3.3564032914437753e-06, "loss": 0.5138, "step": 14896 }, { "epoch": 0.6184567641762542, "grad_norm": 2.477614641189575, "learning_rate": 3.355768353603113e-06, "loss": 0.5632, "step": 14897 }, { "epoch": 0.6184982797004654, "grad_norm": 2.004575252532959, "learning_rate": 3.355133445490633e-06, "loss": 0.362, "step": 14898 }, { "epoch": 0.6185397952246768, "grad_norm": 2.381718635559082, "learning_rate": 3.354498567117813e-06, "loss": 0.6116, "step": 14899 }, { "epoch": 0.6185813107488881, "grad_norm": 1.9529087543487549, "learning_rate": 3.353863718496135e-06, "loss": 0.5244, "step": 14900 }, { "epoch": 0.6186228262730995, "grad_norm": 2.155618190765381, "learning_rate": 3.3532288996370736e-06, "loss": 0.5489, "step": 14901 }, { "epoch": 0.6186643417973108, "grad_norm": 2.3154263496398926, "learning_rate": 3.35259411055211e-06, "loss": 0.4753, "step": 14902 }, { "epoch": 0.6187058573215222, "grad_norm": 3.4705045223236084, "learning_rate": 3.3519593512527194e-06, "loss": 0.4356, "step": 14903 }, { "epoch": 0.6187473728457336, "grad_norm": 2.249706268310547, "learning_rate": 3.351324621750377e-06, "loss": 0.492, "step": 14904 }, { "epoch": 0.6187888883699448, "grad_norm": 3.2383134365081787, "learning_rate": 3.3506899220565615e-06, "loss": 0.4873, "step": 14905 }, { "epoch": 0.6188304038941562, "grad_norm": 2.1318001747131348, "learning_rate": 3.350055252182748e-06, "loss": 0.5753, "step": 14906 }, { "epoch": 0.6188719194183675, "grad_norm": 2.693922281265259, "learning_rate": 3.349420612140412e-06, "loss": 0.4807, "step": 14907 }, { "epoch": 0.6189134349425789, "grad_norm": 2.0669379234313965, "learning_rate": 3.3487860019410256e-06, "loss": 0.4237, "step": 14908 }, { "epoch": 0.6189549504667902, "grad_norm": 2.302574872970581, "learning_rate": 3.348151421596064e-06, "loss": 0.4802, "step": 14909 }, { "epoch": 0.6189964659910016, "grad_norm": 2.2648894786834717, "learning_rate": 3.3475168711170015e-06, "loss": 0.4858, "step": 14910 }, { "epoch": 0.6190379815152128, "grad_norm": 2.8031163215637207, "learning_rate": 3.3468823505153114e-06, "loss": 0.4699, "step": 14911 }, { "epoch": 0.6190794970394242, "grad_norm": 2.4249444007873535, "learning_rate": 3.3462478598024632e-06, "loss": 0.4278, "step": 14912 }, { "epoch": 0.6191210125636355, "grad_norm": 2.5784480571746826, "learning_rate": 3.345613398989932e-06, "loss": 0.5539, "step": 14913 }, { "epoch": 0.6191625280878469, "grad_norm": 2.1065611839294434, "learning_rate": 3.3449789680891855e-06, "loss": 0.5056, "step": 14914 }, { "epoch": 0.6192040436120582, "grad_norm": 2.764543294906616, "learning_rate": 3.3443445671116977e-06, "loss": 0.4478, "step": 14915 }, { "epoch": 0.6192455591362696, "grad_norm": 2.267885684967041, "learning_rate": 3.343710196068937e-06, "loss": 0.4988, "step": 14916 }, { "epoch": 0.6192870746604808, "grad_norm": 2.831516742706299, "learning_rate": 3.343075854972373e-06, "loss": 0.6559, "step": 14917 }, { "epoch": 0.6193285901846922, "grad_norm": 2.3290228843688965, "learning_rate": 3.3424415438334745e-06, "loss": 0.6949, "step": 14918 }, { "epoch": 0.6193701057089035, "grad_norm": 2.292013645172119, "learning_rate": 3.341807262663711e-06, "loss": 0.4366, "step": 14919 }, { "epoch": 0.6194116212331149, "grad_norm": 2.2958457469940186, "learning_rate": 3.3411730114745522e-06, "loss": 0.4521, "step": 14920 }, { "epoch": 0.6194531367573262, "grad_norm": 2.105982542037964, "learning_rate": 3.3405387902774612e-06, "loss": 0.5259, "step": 14921 }, { "epoch": 0.6194946522815376, "grad_norm": 1.743682861328125, "learning_rate": 3.339904599083907e-06, "loss": 0.4341, "step": 14922 }, { "epoch": 0.6195361678057488, "grad_norm": 2.1931142807006836, "learning_rate": 3.3392704379053564e-06, "loss": 0.4512, "step": 14923 }, { "epoch": 0.6195776833299602, "grad_norm": 2.91398286819458, "learning_rate": 3.338636306753277e-06, "loss": 0.4615, "step": 14924 }, { "epoch": 0.6196191988541715, "grad_norm": 2.81510853767395, "learning_rate": 3.33800220563913e-06, "loss": 0.6137, "step": 14925 }, { "epoch": 0.6196607143783829, "grad_norm": 2.868335485458374, "learning_rate": 3.337368134574383e-06, "loss": 0.4701, "step": 14926 }, { "epoch": 0.6197022299025942, "grad_norm": 1.6282144784927368, "learning_rate": 3.3367340935704984e-06, "loss": 0.3496, "step": 14927 }, { "epoch": 0.6197437454268055, "grad_norm": 3.360995054244995, "learning_rate": 3.336100082638942e-06, "loss": 0.5728, "step": 14928 }, { "epoch": 0.6197852609510168, "grad_norm": 2.0355050563812256, "learning_rate": 3.3354661017911756e-06, "loss": 0.4372, "step": 14929 }, { "epoch": 0.6198267764752282, "grad_norm": 2.3762547969818115, "learning_rate": 3.3348321510386604e-06, "loss": 0.5236, "step": 14930 }, { "epoch": 0.6198682919994395, "grad_norm": 1.8983170986175537, "learning_rate": 3.3341982303928606e-06, "loss": 0.4323, "step": 14931 }, { "epoch": 0.6199098075236509, "grad_norm": 2.289565086364746, "learning_rate": 3.333564339865236e-06, "loss": 0.4267, "step": 14932 }, { "epoch": 0.6199513230478622, "grad_norm": 2.4378252029418945, "learning_rate": 3.3329304794672518e-06, "loss": 0.4736, "step": 14933 }, { "epoch": 0.6199928385720735, "grad_norm": 2.0341227054595947, "learning_rate": 3.3322966492103626e-06, "loss": 0.4229, "step": 14934 }, { "epoch": 0.6200343540962849, "grad_norm": 2.509073495864868, "learning_rate": 3.33166284910603e-06, "loss": 0.5536, "step": 14935 }, { "epoch": 0.6200758696204962, "grad_norm": 2.3999016284942627, "learning_rate": 3.331029079165714e-06, "loss": 0.5198, "step": 14936 }, { "epoch": 0.6201173851447076, "grad_norm": 2.34074068069458, "learning_rate": 3.3303953394008758e-06, "loss": 0.5855, "step": 14937 }, { "epoch": 0.6201589006689189, "grad_norm": 2.578589677810669, "learning_rate": 3.329761629822969e-06, "loss": 0.5455, "step": 14938 }, { "epoch": 0.6202004161931303, "grad_norm": 2.7970950603485107, "learning_rate": 3.3291279504434543e-06, "loss": 0.5286, "step": 14939 }, { "epoch": 0.6202419317173415, "grad_norm": 2.106053352355957, "learning_rate": 3.328494301273787e-06, "loss": 0.4904, "step": 14940 }, { "epoch": 0.6202834472415529, "grad_norm": 2.714106321334839, "learning_rate": 3.3278606823254252e-06, "loss": 0.4804, "step": 14941 }, { "epoch": 0.6203249627657642, "grad_norm": 2.2119312286376953, "learning_rate": 3.3272270936098243e-06, "loss": 0.572, "step": 14942 }, { "epoch": 0.6203664782899756, "grad_norm": 2.6239495277404785, "learning_rate": 3.3265935351384386e-06, "loss": 0.4522, "step": 14943 }, { "epoch": 0.6204079938141869, "grad_norm": 2.2551724910736084, "learning_rate": 3.3259600069227242e-06, "loss": 0.4119, "step": 14944 }, { "epoch": 0.6204495093383983, "grad_norm": 2.789482593536377, "learning_rate": 3.325326508974136e-06, "loss": 0.5821, "step": 14945 }, { "epoch": 0.6204910248626095, "grad_norm": 2.21022367477417, "learning_rate": 3.324693041304128e-06, "loss": 0.5203, "step": 14946 }, { "epoch": 0.6205325403868209, "grad_norm": 2.369441509246826, "learning_rate": 3.3240596039241512e-06, "loss": 0.6124, "step": 14947 }, { "epoch": 0.6205740559110322, "grad_norm": 2.720942258834839, "learning_rate": 3.3234261968456603e-06, "loss": 0.6102, "step": 14948 }, { "epoch": 0.6206155714352436, "grad_norm": 2.161100387573242, "learning_rate": 3.322792820080107e-06, "loss": 0.5984, "step": 14949 }, { "epoch": 0.6206570869594549, "grad_norm": 2.663011312484741, "learning_rate": 3.322159473638944e-06, "loss": 0.5694, "step": 14950 }, { "epoch": 0.6206986024836663, "grad_norm": 2.283088207244873, "learning_rate": 3.32152615753362e-06, "loss": 0.5854, "step": 14951 }, { "epoch": 0.6207401180078775, "grad_norm": 2.0878005027770996, "learning_rate": 3.3208928717755886e-06, "loss": 0.3604, "step": 14952 }, { "epoch": 0.6207816335320889, "grad_norm": 2.580544948577881, "learning_rate": 3.3202596163762955e-06, "loss": 0.5141, "step": 14953 }, { "epoch": 0.6208231490563002, "grad_norm": 2.0124237537384033, "learning_rate": 3.319626391347196e-06, "loss": 0.4807, "step": 14954 }, { "epoch": 0.6208646645805116, "grad_norm": 3.026176691055298, "learning_rate": 3.3189931966997324e-06, "loss": 0.5137, "step": 14955 }, { "epoch": 0.6209061801047229, "grad_norm": 2.5226242542266846, "learning_rate": 3.3183600324453574e-06, "loss": 0.5987, "step": 14956 }, { "epoch": 0.6209476956289343, "grad_norm": 2.650402307510376, "learning_rate": 3.317726898595517e-06, "loss": 0.51, "step": 14957 }, { "epoch": 0.6209892111531455, "grad_norm": 2.724518060684204, "learning_rate": 3.31709379516166e-06, "loss": 0.6217, "step": 14958 }, { "epoch": 0.6210307266773569, "grad_norm": 2.6381609439849854, "learning_rate": 3.3164607221552336e-06, "loss": 0.5759, "step": 14959 }, { "epoch": 0.6210722422015682, "grad_norm": 2.300955057144165, "learning_rate": 3.315827679587681e-06, "loss": 0.4049, "step": 14960 }, { "epoch": 0.6211137577257796, "grad_norm": 2.1614058017730713, "learning_rate": 3.3151946674704487e-06, "loss": 0.5357, "step": 14961 }, { "epoch": 0.6211552732499909, "grad_norm": 2.2042126655578613, "learning_rate": 3.314561685814983e-06, "loss": 0.3626, "step": 14962 }, { "epoch": 0.6211967887742023, "grad_norm": 2.6134731769561768, "learning_rate": 3.31392873463273e-06, "loss": 0.3552, "step": 14963 }, { "epoch": 0.6212383042984135, "grad_norm": 2.4896371364593506, "learning_rate": 3.313295813935129e-06, "loss": 0.4904, "step": 14964 }, { "epoch": 0.6212798198226249, "grad_norm": 2.341848611831665, "learning_rate": 3.3126629237336266e-06, "loss": 0.5957, "step": 14965 }, { "epoch": 0.6213213353468363, "grad_norm": 3.668111801147461, "learning_rate": 3.312030064039664e-06, "loss": 0.4329, "step": 14966 }, { "epoch": 0.6213628508710476, "grad_norm": 2.8161234855651855, "learning_rate": 3.3113972348646873e-06, "loss": 0.495, "step": 14967 }, { "epoch": 0.621404366395259, "grad_norm": 2.5449304580688477, "learning_rate": 3.310764436220132e-06, "loss": 0.3841, "step": 14968 }, { "epoch": 0.6214458819194703, "grad_norm": 2.1922075748443604, "learning_rate": 3.310131668117443e-06, "loss": 0.4227, "step": 14969 }, { "epoch": 0.6214873974436816, "grad_norm": 2.6502175331115723, "learning_rate": 3.3094989305680613e-06, "loss": 0.4483, "step": 14970 }, { "epoch": 0.6215289129678929, "grad_norm": 2.1670377254486084, "learning_rate": 3.308866223583427e-06, "loss": 0.4564, "step": 14971 }, { "epoch": 0.6215704284921043, "grad_norm": 2.1724023818969727, "learning_rate": 3.308233547174978e-06, "loss": 0.5539, "step": 14972 }, { "epoch": 0.6216119440163156, "grad_norm": 2.76690936088562, "learning_rate": 3.307600901354153e-06, "loss": 0.6201, "step": 14973 }, { "epoch": 0.621653459540527, "grad_norm": 2.4923863410949707, "learning_rate": 3.3069682861323927e-06, "loss": 0.6535, "step": 14974 }, { "epoch": 0.6216949750647383, "grad_norm": 1.8594399690628052, "learning_rate": 3.306335701521134e-06, "loss": 0.5277, "step": 14975 }, { "epoch": 0.6217364905889496, "grad_norm": 2.4620070457458496, "learning_rate": 3.3057031475318135e-06, "loss": 0.5371, "step": 14976 }, { "epoch": 0.6217780061131609, "grad_norm": 2.6592912673950195, "learning_rate": 3.305070624175868e-06, "loss": 0.5475, "step": 14977 }, { "epoch": 0.6218195216373723, "grad_norm": 2.2738561630249023, "learning_rate": 3.3044381314647356e-06, "loss": 0.5044, "step": 14978 }, { "epoch": 0.6218610371615836, "grad_norm": 2.711249351501465, "learning_rate": 3.3038056694098485e-06, "loss": 0.537, "step": 14979 }, { "epoch": 0.621902552685795, "grad_norm": 1.938384771347046, "learning_rate": 3.303173238022647e-06, "loss": 0.5566, "step": 14980 }, { "epoch": 0.6219440682100063, "grad_norm": 2.172985315322876, "learning_rate": 3.3025408373145596e-06, "loss": 0.5193, "step": 14981 }, { "epoch": 0.6219855837342176, "grad_norm": 2.9623498916625977, "learning_rate": 3.301908467297023e-06, "loss": 0.593, "step": 14982 }, { "epoch": 0.6220270992584289, "grad_norm": 2.83298659324646, "learning_rate": 3.301276127981471e-06, "loss": 0.5431, "step": 14983 }, { "epoch": 0.6220686147826403, "grad_norm": 2.6659975051879883, "learning_rate": 3.3006438193793387e-06, "loss": 0.6808, "step": 14984 }, { "epoch": 0.6221101303068516, "grad_norm": 2.674898624420166, "learning_rate": 3.3000115415020534e-06, "loss": 0.5466, "step": 14985 }, { "epoch": 0.622151645831063, "grad_norm": 2.768242597579956, "learning_rate": 3.2993792943610493e-06, "loss": 0.5027, "step": 14986 }, { "epoch": 0.6221931613552742, "grad_norm": 2.1130499839782715, "learning_rate": 3.298747077967758e-06, "loss": 0.4805, "step": 14987 }, { "epoch": 0.6222346768794856, "grad_norm": 2.468053102493286, "learning_rate": 3.2981148923336108e-06, "loss": 0.6293, "step": 14988 }, { "epoch": 0.6222761924036969, "grad_norm": 2.7426607608795166, "learning_rate": 3.297482737470037e-06, "loss": 0.5081, "step": 14989 }, { "epoch": 0.6223177079279083, "grad_norm": 2.4359703063964844, "learning_rate": 3.2968506133884646e-06, "loss": 0.4824, "step": 14990 }, { "epoch": 0.6223592234521196, "grad_norm": 2.089182138442993, "learning_rate": 3.296218520100326e-06, "loss": 0.5102, "step": 14991 }, { "epoch": 0.622400738976331, "grad_norm": 2.9145259857177734, "learning_rate": 3.295586457617046e-06, "loss": 0.6756, "step": 14992 }, { "epoch": 0.6224422545005422, "grad_norm": 2.4051878452301025, "learning_rate": 3.294954425950056e-06, "loss": 0.5514, "step": 14993 }, { "epoch": 0.6224837700247536, "grad_norm": 1.9869654178619385, "learning_rate": 3.2943224251107795e-06, "loss": 0.3994, "step": 14994 }, { "epoch": 0.6225252855489649, "grad_norm": 2.1184043884277344, "learning_rate": 3.293690455110645e-06, "loss": 0.4578, "step": 14995 }, { "epoch": 0.6225668010731763, "grad_norm": 2.056941032409668, "learning_rate": 3.2930585159610794e-06, "loss": 0.5345, "step": 14996 }, { "epoch": 0.6226083165973877, "grad_norm": 2.5325074195861816, "learning_rate": 3.2924266076735094e-06, "loss": 0.6355, "step": 14997 }, { "epoch": 0.622649832121599, "grad_norm": 2.676574230194092, "learning_rate": 3.2917947302593565e-06, "loss": 0.6917, "step": 14998 }, { "epoch": 0.6226913476458104, "grad_norm": 2.4293758869171143, "learning_rate": 3.291162883730048e-06, "loss": 0.5493, "step": 14999 }, { "epoch": 0.6227328631700216, "grad_norm": 2.0591254234313965, "learning_rate": 3.290531068097006e-06, "loss": 0.4835, "step": 15000 }, { "epoch": 0.622774378694233, "grad_norm": 2.3462822437286377, "learning_rate": 3.289899283371657e-06, "loss": 0.5733, "step": 15001 }, { "epoch": 0.6228158942184443, "grad_norm": 2.171651601791382, "learning_rate": 3.289267529565421e-06, "loss": 0.5428, "step": 15002 }, { "epoch": 0.6228574097426557, "grad_norm": 2.21769642829895, "learning_rate": 3.2886358066897205e-06, "loss": 0.4617, "step": 15003 }, { "epoch": 0.622898925266867, "grad_norm": 2.110492467880249, "learning_rate": 3.2880041147559794e-06, "loss": 0.4221, "step": 15004 }, { "epoch": 0.6229404407910784, "grad_norm": 2.3960516452789307, "learning_rate": 3.2873724537756158e-06, "loss": 0.5164, "step": 15005 }, { "epoch": 0.6229819563152896, "grad_norm": 2.2986555099487305, "learning_rate": 3.286740823760054e-06, "loss": 0.4278, "step": 15006 }, { "epoch": 0.623023471839501, "grad_norm": 2.928955554962158, "learning_rate": 3.286109224720709e-06, "loss": 0.4945, "step": 15007 }, { "epoch": 0.6230649873637123, "grad_norm": 2.1967289447784424, "learning_rate": 3.2854776566690044e-06, "loss": 0.4455, "step": 15008 }, { "epoch": 0.6231065028879237, "grad_norm": 2.336848735809326, "learning_rate": 3.284846119616358e-06, "loss": 0.5608, "step": 15009 }, { "epoch": 0.623148018412135, "grad_norm": 2.466674327850342, "learning_rate": 3.2842146135741895e-06, "loss": 0.4971, "step": 15010 }, { "epoch": 0.6231895339363464, "grad_norm": 2.085601568222046, "learning_rate": 3.2835831385539136e-06, "loss": 0.4504, "step": 15011 }, { "epoch": 0.6232310494605576, "grad_norm": 2.2537600994110107, "learning_rate": 3.2829516945669493e-06, "loss": 0.5407, "step": 15012 }, { "epoch": 0.623272564984769, "grad_norm": 2.3901638984680176, "learning_rate": 3.282320281624713e-06, "loss": 0.6251, "step": 15013 }, { "epoch": 0.6233140805089803, "grad_norm": 2.2304537296295166, "learning_rate": 3.281688899738622e-06, "loss": 0.3972, "step": 15014 }, { "epoch": 0.6233555960331917, "grad_norm": 2.867307186126709, "learning_rate": 3.281057548920091e-06, "loss": 0.405, "step": 15015 }, { "epoch": 0.623397111557403, "grad_norm": 3.2429144382476807, "learning_rate": 3.2804262291805346e-06, "loss": 0.6247, "step": 15016 }, { "epoch": 0.6234386270816143, "grad_norm": 2.3482558727264404, "learning_rate": 3.2797949405313685e-06, "loss": 0.6349, "step": 15017 }, { "epoch": 0.6234801426058256, "grad_norm": 2.6890037059783936, "learning_rate": 3.2791636829840046e-06, "loss": 0.6191, "step": 15018 }, { "epoch": 0.623521658130037, "grad_norm": 2.571049213409424, "learning_rate": 3.2785324565498587e-06, "loss": 0.6409, "step": 15019 }, { "epoch": 0.6235631736542483, "grad_norm": 1.9336276054382324, "learning_rate": 3.2779012612403403e-06, "loss": 0.4286, "step": 15020 }, { "epoch": 0.6236046891784597, "grad_norm": 2.4757065773010254, "learning_rate": 3.277270097066864e-06, "loss": 0.5032, "step": 15021 }, { "epoch": 0.623646204702671, "grad_norm": 2.3854143619537354, "learning_rate": 3.276638964040841e-06, "loss": 0.524, "step": 15022 }, { "epoch": 0.6236877202268823, "grad_norm": 3.3122060298919678, "learning_rate": 3.2760078621736837e-06, "loss": 0.4748, "step": 15023 }, { "epoch": 0.6237292357510936, "grad_norm": 2.0919578075408936, "learning_rate": 3.2753767914767997e-06, "loss": 0.4712, "step": 15024 }, { "epoch": 0.623770751275305, "grad_norm": 2.3098762035369873, "learning_rate": 3.2747457519616e-06, "loss": 0.4498, "step": 15025 }, { "epoch": 0.6238122667995163, "grad_norm": 2.44473934173584, "learning_rate": 3.274114743639494e-06, "loss": 0.6054, "step": 15026 }, { "epoch": 0.6238537823237277, "grad_norm": 2.0382936000823975, "learning_rate": 3.273483766521893e-06, "loss": 0.3985, "step": 15027 }, { "epoch": 0.6238952978479391, "grad_norm": 2.3682868480682373, "learning_rate": 3.272852820620202e-06, "loss": 0.5545, "step": 15028 }, { "epoch": 0.6239368133721503, "grad_norm": 2.0803377628326416, "learning_rate": 3.2722219059458295e-06, "loss": 0.5725, "step": 15029 }, { "epoch": 0.6239783288963617, "grad_norm": 2.357977867126465, "learning_rate": 3.271591022510183e-06, "loss": 0.6058, "step": 15030 }, { "epoch": 0.624019844420573, "grad_norm": 2.3269171714782715, "learning_rate": 3.2709601703246697e-06, "loss": 0.5635, "step": 15031 }, { "epoch": 0.6240613599447844, "grad_norm": 3.178347110748291, "learning_rate": 3.270329349400696e-06, "loss": 0.4876, "step": 15032 }, { "epoch": 0.6241028754689957, "grad_norm": 2.15295147895813, "learning_rate": 3.2696985597496633e-06, "loss": 0.4719, "step": 15033 }, { "epoch": 0.6241443909932071, "grad_norm": 2.297586679458618, "learning_rate": 3.26906780138298e-06, "loss": 0.5964, "step": 15034 }, { "epoch": 0.6241859065174183, "grad_norm": 2.413550853729248, "learning_rate": 3.2684370743120507e-06, "loss": 0.6368, "step": 15035 }, { "epoch": 0.6242274220416297, "grad_norm": 2.921659231185913, "learning_rate": 3.2678063785482793e-06, "loss": 0.6134, "step": 15036 }, { "epoch": 0.624268937565841, "grad_norm": 2.659653663635254, "learning_rate": 3.2671757141030657e-06, "loss": 0.5437, "step": 15037 }, { "epoch": 0.6243104530900524, "grad_norm": 2.690955638885498, "learning_rate": 3.2665450809878154e-06, "loss": 0.5802, "step": 15038 }, { "epoch": 0.6243519686142637, "grad_norm": 2.3189141750335693, "learning_rate": 3.2659144792139284e-06, "loss": 0.4981, "step": 15039 }, { "epoch": 0.6243934841384751, "grad_norm": 2.5687577724456787, "learning_rate": 3.26528390879281e-06, "loss": 0.5182, "step": 15040 }, { "epoch": 0.6244349996626863, "grad_norm": 2.4398038387298584, "learning_rate": 3.2646533697358573e-06, "loss": 0.5273, "step": 15041 }, { "epoch": 0.6244765151868977, "grad_norm": 2.299595355987549, "learning_rate": 3.264022862054471e-06, "loss": 0.5611, "step": 15042 }, { "epoch": 0.624518030711109, "grad_norm": 2.672346830368042, "learning_rate": 3.2633923857600534e-06, "loss": 0.4586, "step": 15043 }, { "epoch": 0.6245595462353204, "grad_norm": 2.5765252113342285, "learning_rate": 3.262761940864001e-06, "loss": 0.4751, "step": 15044 }, { "epoch": 0.6246010617595317, "grad_norm": 2.348996639251709, "learning_rate": 3.262131527377715e-06, "loss": 0.5519, "step": 15045 }, { "epoch": 0.6246425772837431, "grad_norm": 2.126906633377075, "learning_rate": 3.26150114531259e-06, "loss": 0.4512, "step": 15046 }, { "epoch": 0.6246840928079543, "grad_norm": 2.832534074783325, "learning_rate": 3.2608707946800257e-06, "loss": 0.4168, "step": 15047 }, { "epoch": 0.6247256083321657, "grad_norm": 2.274200201034546, "learning_rate": 3.2602404754914173e-06, "loss": 0.5855, "step": 15048 }, { "epoch": 0.624767123856377, "grad_norm": 2.49725604057312, "learning_rate": 3.2596101877581664e-06, "loss": 0.5067, "step": 15049 }, { "epoch": 0.6248086393805884, "grad_norm": 2.0244522094726562, "learning_rate": 3.2589799314916613e-06, "loss": 0.5084, "step": 15050 }, { "epoch": 0.6248501549047997, "grad_norm": 2.422025203704834, "learning_rate": 3.258349706703302e-06, "loss": 0.5315, "step": 15051 }, { "epoch": 0.6248916704290111, "grad_norm": 2.947927951812744, "learning_rate": 3.257719513404482e-06, "loss": 0.5243, "step": 15052 }, { "epoch": 0.6249331859532223, "grad_norm": 2.5849242210388184, "learning_rate": 3.257089351606596e-06, "loss": 0.5319, "step": 15053 }, { "epoch": 0.6249747014774337, "grad_norm": 2.2598302364349365, "learning_rate": 3.2564592213210368e-06, "loss": 0.5457, "step": 15054 }, { "epoch": 0.625016217001645, "grad_norm": 2.340592384338379, "learning_rate": 3.2558291225591965e-06, "loss": 0.4034, "step": 15055 }, { "epoch": 0.6250577325258564, "grad_norm": 2.625042200088501, "learning_rate": 3.2551990553324696e-06, "loss": 0.4822, "step": 15056 }, { "epoch": 0.6250992480500677, "grad_norm": 2.292357921600342, "learning_rate": 3.2545690196522455e-06, "loss": 0.4782, "step": 15057 }, { "epoch": 0.6251407635742791, "grad_norm": 2.950007438659668, "learning_rate": 3.2539390155299187e-06, "loss": 0.5898, "step": 15058 }, { "epoch": 0.6251822790984904, "grad_norm": 2.480445623397827, "learning_rate": 3.253309042976876e-06, "loss": 0.5352, "step": 15059 }, { "epoch": 0.6252237946227017, "grad_norm": 2.8174381256103516, "learning_rate": 3.252679102004509e-06, "loss": 0.5146, "step": 15060 }, { "epoch": 0.6252653101469131, "grad_norm": 2.283045768737793, "learning_rate": 3.252049192624207e-06, "loss": 0.4614, "step": 15061 }, { "epoch": 0.6253068256711244, "grad_norm": 2.627997636795044, "learning_rate": 3.2514193148473623e-06, "loss": 0.5045, "step": 15062 }, { "epoch": 0.6253483411953358, "grad_norm": 2.1553494930267334, "learning_rate": 3.250789468685358e-06, "loss": 0.4734, "step": 15063 }, { "epoch": 0.625389856719547, "grad_norm": 2.3128485679626465, "learning_rate": 3.250159654149584e-06, "loss": 0.4738, "step": 15064 }, { "epoch": 0.6254313722437584, "grad_norm": 2.245609760284424, "learning_rate": 3.2495298712514278e-06, "loss": 0.5094, "step": 15065 }, { "epoch": 0.6254728877679697, "grad_norm": 2.451720714569092, "learning_rate": 3.248900120002277e-06, "loss": 0.6326, "step": 15066 }, { "epoch": 0.6255144032921811, "grad_norm": 2.5990498065948486, "learning_rate": 3.2482704004135178e-06, "loss": 0.5929, "step": 15067 }, { "epoch": 0.6255559188163924, "grad_norm": 2.551985502243042, "learning_rate": 3.2476407124965336e-06, "loss": 0.5762, "step": 15068 }, { "epoch": 0.6255974343406038, "grad_norm": 2.5653374195098877, "learning_rate": 3.24701105626271e-06, "loss": 0.6229, "step": 15069 }, { "epoch": 0.625638949864815, "grad_norm": 2.5155088901519775, "learning_rate": 3.246381431723431e-06, "loss": 0.6551, "step": 15070 }, { "epoch": 0.6256804653890264, "grad_norm": 2.3037166595458984, "learning_rate": 3.2457518388900846e-06, "loss": 0.5578, "step": 15071 }, { "epoch": 0.6257219809132377, "grad_norm": 2.6355671882629395, "learning_rate": 3.245122277774047e-06, "loss": 0.4964, "step": 15072 }, { "epoch": 0.6257634964374491, "grad_norm": 2.758769989013672, "learning_rate": 3.244492748386705e-06, "loss": 0.5276, "step": 15073 }, { "epoch": 0.6258050119616604, "grad_norm": 2.6223182678222656, "learning_rate": 3.24386325073944e-06, "loss": 0.4966, "step": 15074 }, { "epoch": 0.6258465274858718, "grad_norm": 2.367025852203369, "learning_rate": 3.2432337848436362e-06, "loss": 0.5401, "step": 15075 }, { "epoch": 0.625888043010083, "grad_norm": 2.2526769638061523, "learning_rate": 3.2426043507106695e-06, "loss": 0.4736, "step": 15076 }, { "epoch": 0.6259295585342944, "grad_norm": 2.330030679702759, "learning_rate": 3.2419749483519225e-06, "loss": 0.4091, "step": 15077 }, { "epoch": 0.6259710740585057, "grad_norm": 2.337355852127075, "learning_rate": 3.241345577778775e-06, "loss": 0.6688, "step": 15078 }, { "epoch": 0.6260125895827171, "grad_norm": 2.6919937133789062, "learning_rate": 3.2407162390026083e-06, "loss": 0.5042, "step": 15079 }, { "epoch": 0.6260541051069284, "grad_norm": 2.779449462890625, "learning_rate": 3.2400869320347984e-06, "loss": 0.4927, "step": 15080 }, { "epoch": 0.6260956206311398, "grad_norm": 2.351820230484009, "learning_rate": 3.2394576568867243e-06, "loss": 0.4583, "step": 15081 }, { "epoch": 0.626137136155351, "grad_norm": 2.1532161235809326, "learning_rate": 3.2388284135697612e-06, "loss": 0.4952, "step": 15082 }, { "epoch": 0.6261786516795624, "grad_norm": 2.196798324584961, "learning_rate": 3.238199202095289e-06, "loss": 0.4602, "step": 15083 }, { "epoch": 0.6262201672037737, "grad_norm": 2.795367479324341, "learning_rate": 3.2375700224746853e-06, "loss": 0.5366, "step": 15084 }, { "epoch": 0.6262616827279851, "grad_norm": 2.4738364219665527, "learning_rate": 3.236940874719321e-06, "loss": 0.5111, "step": 15085 }, { "epoch": 0.6263031982521964, "grad_norm": 2.237518548965454, "learning_rate": 3.2363117588405734e-06, "loss": 0.5654, "step": 15086 }, { "epoch": 0.6263447137764078, "grad_norm": 2.5710043907165527, "learning_rate": 3.2356826748498182e-06, "loss": 0.6164, "step": 15087 }, { "epoch": 0.626386229300619, "grad_norm": 2.200727939605713, "learning_rate": 3.2350536227584306e-06, "loss": 0.4581, "step": 15088 }, { "epoch": 0.6264277448248304, "grad_norm": 2.3587210178375244, "learning_rate": 3.2344246025777802e-06, "loss": 0.587, "step": 15089 }, { "epoch": 0.6264692603490418, "grad_norm": 2.530546188354492, "learning_rate": 3.2337956143192423e-06, "loss": 0.4312, "step": 15090 }, { "epoch": 0.6265107758732531, "grad_norm": 2.502915382385254, "learning_rate": 3.2331666579941883e-06, "loss": 0.4797, "step": 15091 }, { "epoch": 0.6265522913974645, "grad_norm": 2.273893117904663, "learning_rate": 3.2325377336139917e-06, "loss": 0.4762, "step": 15092 }, { "epoch": 0.6265938069216758, "grad_norm": 2.427164077758789, "learning_rate": 3.2319088411900222e-06, "loss": 0.3723, "step": 15093 }, { "epoch": 0.6266353224458872, "grad_norm": 2.423335552215576, "learning_rate": 3.2312799807336505e-06, "loss": 0.5789, "step": 15094 }, { "epoch": 0.6266768379700984, "grad_norm": 1.8990684747695923, "learning_rate": 3.2306511522562458e-06, "loss": 0.44, "step": 15095 }, { "epoch": 0.6267183534943098, "grad_norm": 2.190957546234131, "learning_rate": 3.2300223557691774e-06, "loss": 0.4626, "step": 15096 }, { "epoch": 0.6267598690185211, "grad_norm": 2.291771650314331, "learning_rate": 3.2293935912838175e-06, "loss": 0.3948, "step": 15097 }, { "epoch": 0.6268013845427325, "grad_norm": 2.1927285194396973, "learning_rate": 3.2287648588115293e-06, "loss": 0.4874, "step": 15098 }, { "epoch": 0.6268429000669438, "grad_norm": 2.3281469345092773, "learning_rate": 3.228136158363684e-06, "loss": 0.5204, "step": 15099 }, { "epoch": 0.6268844155911552, "grad_norm": 2.659902334213257, "learning_rate": 3.2275074899516466e-06, "loss": 0.502, "step": 15100 }, { "epoch": 0.6269259311153664, "grad_norm": 2.453303337097168, "learning_rate": 3.226878853586788e-06, "loss": 0.4217, "step": 15101 }, { "epoch": 0.6269674466395778, "grad_norm": 2.281404972076416, "learning_rate": 3.2262502492804676e-06, "loss": 0.4901, "step": 15102 }, { "epoch": 0.6270089621637891, "grad_norm": 2.3285932540893555, "learning_rate": 3.225621677044054e-06, "loss": 0.4262, "step": 15103 }, { "epoch": 0.6270504776880005, "grad_norm": 2.0826504230499268, "learning_rate": 3.2249931368889125e-06, "loss": 0.4357, "step": 15104 }, { "epoch": 0.6270919932122118, "grad_norm": 2.1612794399261475, "learning_rate": 3.2243646288264073e-06, "loss": 0.4697, "step": 15105 }, { "epoch": 0.6271335087364232, "grad_norm": 2.547100305557251, "learning_rate": 3.2237361528679017e-06, "loss": 0.4539, "step": 15106 }, { "epoch": 0.6271750242606344, "grad_norm": 2.65500545501709, "learning_rate": 3.2231077090247588e-06, "loss": 0.608, "step": 15107 }, { "epoch": 0.6272165397848458, "grad_norm": 2.375115156173706, "learning_rate": 3.2224792973083384e-06, "loss": 0.5476, "step": 15108 }, { "epoch": 0.6272580553090571, "grad_norm": 2.512589454650879, "learning_rate": 3.2218509177300058e-06, "loss": 0.4654, "step": 15109 }, { "epoch": 0.6272995708332685, "grad_norm": 2.1155591011047363, "learning_rate": 3.2212225703011224e-06, "loss": 0.615, "step": 15110 }, { "epoch": 0.6273410863574798, "grad_norm": 2.373380422592163, "learning_rate": 3.220594255033046e-06, "loss": 0.4505, "step": 15111 }, { "epoch": 0.6273826018816911, "grad_norm": 2.6956427097320557, "learning_rate": 3.2199659719371386e-06, "loss": 0.4948, "step": 15112 }, { "epoch": 0.6274241174059024, "grad_norm": 2.1925060749053955, "learning_rate": 3.219337721024759e-06, "loss": 0.523, "step": 15113 }, { "epoch": 0.6274656329301138, "grad_norm": 2.2876510620117188, "learning_rate": 3.218709502307269e-06, "loss": 0.5301, "step": 15114 }, { "epoch": 0.6275071484543251, "grad_norm": 2.321032762527466, "learning_rate": 3.2180813157960223e-06, "loss": 0.4688, "step": 15115 }, { "epoch": 0.6275486639785365, "grad_norm": 2.659158945083618, "learning_rate": 3.217453161502379e-06, "loss": 0.5715, "step": 15116 }, { "epoch": 0.6275901795027478, "grad_norm": 2.5176758766174316, "learning_rate": 3.2168250394376964e-06, "loss": 0.5529, "step": 15117 }, { "epoch": 0.6276316950269591, "grad_norm": 2.4810791015625, "learning_rate": 3.2161969496133337e-06, "loss": 0.5002, "step": 15118 }, { "epoch": 0.6276732105511704, "grad_norm": 2.3859524726867676, "learning_rate": 3.2155688920406415e-06, "loss": 0.6708, "step": 15119 }, { "epoch": 0.6277147260753818, "grad_norm": 2.5583126544952393, "learning_rate": 3.214940866730979e-06, "loss": 0.4223, "step": 15120 }, { "epoch": 0.6277562415995932, "grad_norm": 2.303823947906494, "learning_rate": 3.2143128736957e-06, "loss": 0.5374, "step": 15121 }, { "epoch": 0.6277977571238045, "grad_norm": 2.5861711502075195, "learning_rate": 3.213684912946158e-06, "loss": 0.5037, "step": 15122 }, { "epoch": 0.6278392726480159, "grad_norm": 2.106623649597168, "learning_rate": 3.2130569844937097e-06, "loss": 0.6016, "step": 15123 }, { "epoch": 0.6278807881722271, "grad_norm": 2.811471700668335, "learning_rate": 3.2124290883497043e-06, "loss": 0.5246, "step": 15124 }, { "epoch": 0.6279223036964385, "grad_norm": 2.8183701038360596, "learning_rate": 3.2118012245254972e-06, "loss": 0.4573, "step": 15125 }, { "epoch": 0.6279638192206498, "grad_norm": 1.9453017711639404, "learning_rate": 3.2111733930324386e-06, "loss": 0.4009, "step": 15126 }, { "epoch": 0.6280053347448612, "grad_norm": 2.070542097091675, "learning_rate": 3.2105455938818825e-06, "loss": 0.4835, "step": 15127 }, { "epoch": 0.6280468502690725, "grad_norm": 2.1633925437927246, "learning_rate": 3.209917827085176e-06, "loss": 0.5176, "step": 15128 }, { "epoch": 0.6280883657932839, "grad_norm": 2.471405267715454, "learning_rate": 3.2092900926536705e-06, "loss": 0.4894, "step": 15129 }, { "epoch": 0.6281298813174951, "grad_norm": 2.618630886077881, "learning_rate": 3.2086623905987172e-06, "loss": 0.5651, "step": 15130 }, { "epoch": 0.6281713968417065, "grad_norm": 2.18316650390625, "learning_rate": 3.2080347209316657e-06, "loss": 0.4386, "step": 15131 }, { "epoch": 0.6282129123659178, "grad_norm": 2.8231446743011475, "learning_rate": 3.2074070836638616e-06, "loss": 0.4762, "step": 15132 }, { "epoch": 0.6282544278901292, "grad_norm": 2.655010223388672, "learning_rate": 3.2067794788066543e-06, "loss": 0.4263, "step": 15133 }, { "epoch": 0.6282959434143405, "grad_norm": 2.0324106216430664, "learning_rate": 3.20615190637139e-06, "loss": 0.5543, "step": 15134 }, { "epoch": 0.6283374589385519, "grad_norm": 2.281585931777954, "learning_rate": 3.205524366369417e-06, "loss": 0.43, "step": 15135 }, { "epoch": 0.6283789744627631, "grad_norm": 1.9933940172195435, "learning_rate": 3.204896858812082e-06, "loss": 0.4561, "step": 15136 }, { "epoch": 0.6284204899869745, "grad_norm": 2.985531806945801, "learning_rate": 3.2042693837107275e-06, "loss": 0.4921, "step": 15137 }, { "epoch": 0.6284620055111858, "grad_norm": 1.9981971979141235, "learning_rate": 3.2036419410766994e-06, "loss": 0.5248, "step": 15138 }, { "epoch": 0.6285035210353972, "grad_norm": 2.406372308731079, "learning_rate": 3.203014530921343e-06, "loss": 0.4546, "step": 15139 }, { "epoch": 0.6285450365596085, "grad_norm": 2.4108669757843018, "learning_rate": 3.2023871532560046e-06, "loss": 0.4675, "step": 15140 }, { "epoch": 0.6285865520838199, "grad_norm": 2.5203680992126465, "learning_rate": 3.2017598080920224e-06, "loss": 0.7768, "step": 15141 }, { "epoch": 0.6286280676080311, "grad_norm": 2.598949432373047, "learning_rate": 3.201132495440741e-06, "loss": 0.5175, "step": 15142 }, { "epoch": 0.6286695831322425, "grad_norm": 2.4733214378356934, "learning_rate": 3.200505215313503e-06, "loss": 0.5636, "step": 15143 }, { "epoch": 0.6287110986564538, "grad_norm": 2.7265286445617676, "learning_rate": 3.1998779677216508e-06, "loss": 0.4293, "step": 15144 }, { "epoch": 0.6287526141806652, "grad_norm": 2.2814598083496094, "learning_rate": 3.1992507526765215e-06, "loss": 0.5021, "step": 15145 }, { "epoch": 0.6287941297048765, "grad_norm": 2.60654878616333, "learning_rate": 3.1986235701894596e-06, "loss": 0.4887, "step": 15146 }, { "epoch": 0.6288356452290879, "grad_norm": 2.507139205932617, "learning_rate": 3.1979964202718004e-06, "loss": 0.4359, "step": 15147 }, { "epoch": 0.6288771607532991, "grad_norm": 2.8357415199279785, "learning_rate": 3.1973693029348863e-06, "loss": 0.6526, "step": 15148 }, { "epoch": 0.6289186762775105, "grad_norm": 2.144232988357544, "learning_rate": 3.196742218190057e-06, "loss": 0.454, "step": 15149 }, { "epoch": 0.6289601918017218, "grad_norm": 2.504429817199707, "learning_rate": 3.1961151660486455e-06, "loss": 0.7063, "step": 15150 }, { "epoch": 0.6290017073259332, "grad_norm": 2.6061272621154785, "learning_rate": 3.1954881465219922e-06, "loss": 0.6213, "step": 15151 }, { "epoch": 0.6290432228501446, "grad_norm": 2.2182693481445312, "learning_rate": 3.194861159621433e-06, "loss": 0.585, "step": 15152 }, { "epoch": 0.6290847383743559, "grad_norm": 2.6510391235351562, "learning_rate": 3.1942342053583065e-06, "loss": 0.6723, "step": 15153 }, { "epoch": 0.6291262538985672, "grad_norm": 2.2040789127349854, "learning_rate": 3.1936072837439436e-06, "loss": 0.5097, "step": 15154 }, { "epoch": 0.6291677694227785, "grad_norm": 2.4772541522979736, "learning_rate": 3.192980394789682e-06, "loss": 0.5319, "step": 15155 }, { "epoch": 0.6292092849469899, "grad_norm": 2.2634568214416504, "learning_rate": 3.192353538506856e-06, "loss": 0.4728, "step": 15156 }, { "epoch": 0.6292508004712012, "grad_norm": 2.3825387954711914, "learning_rate": 3.191726714906801e-06, "loss": 0.4728, "step": 15157 }, { "epoch": 0.6292923159954126, "grad_norm": 2.6533279418945312, "learning_rate": 3.1910999240008457e-06, "loss": 0.5566, "step": 15158 }, { "epoch": 0.6293338315196239, "grad_norm": 2.7002952098846436, "learning_rate": 3.1904731658003264e-06, "loss": 0.51, "step": 15159 }, { "epoch": 0.6293753470438352, "grad_norm": 2.3551008701324463, "learning_rate": 3.189846440316573e-06, "loss": 0.6048, "step": 15160 }, { "epoch": 0.6294168625680465, "grad_norm": 2.2437305450439453, "learning_rate": 3.1892197475609176e-06, "loss": 0.546, "step": 15161 }, { "epoch": 0.6294583780922579, "grad_norm": 2.182189702987671, "learning_rate": 3.188593087544693e-06, "loss": 0.642, "step": 15162 }, { "epoch": 0.6294998936164692, "grad_norm": 2.5842819213867188, "learning_rate": 3.187966460279226e-06, "loss": 0.4684, "step": 15163 }, { "epoch": 0.6295414091406806, "grad_norm": 2.412003517150879, "learning_rate": 3.187339865775847e-06, "loss": 0.5064, "step": 15164 }, { "epoch": 0.6295829246648919, "grad_norm": 2.212540864944458, "learning_rate": 3.1867133040458854e-06, "loss": 0.545, "step": 15165 }, { "epoch": 0.6296244401891032, "grad_norm": 2.2508935928344727, "learning_rate": 3.186086775100673e-06, "loss": 0.5071, "step": 15166 }, { "epoch": 0.6296659557133145, "grad_norm": 2.0716545581817627, "learning_rate": 3.1854602789515313e-06, "loss": 0.4622, "step": 15167 }, { "epoch": 0.6297074712375259, "grad_norm": 2.4516799449920654, "learning_rate": 3.184833815609792e-06, "loss": 0.4909, "step": 15168 }, { "epoch": 0.6297489867617372, "grad_norm": 2.2730977535247803, "learning_rate": 3.1842073850867793e-06, "loss": 0.4734, "step": 15169 }, { "epoch": 0.6297905022859486, "grad_norm": 2.362834930419922, "learning_rate": 3.1835809873938234e-06, "loss": 0.4498, "step": 15170 }, { "epoch": 0.6298320178101598, "grad_norm": 2.312304735183716, "learning_rate": 3.182954622542245e-06, "loss": 0.6259, "step": 15171 }, { "epoch": 0.6298735333343712, "grad_norm": 2.3907504081726074, "learning_rate": 3.1823282905433707e-06, "loss": 0.5881, "step": 15172 }, { "epoch": 0.6299150488585825, "grad_norm": 2.189436435699463, "learning_rate": 3.1817019914085245e-06, "loss": 0.5176, "step": 15173 }, { "epoch": 0.6299565643827939, "grad_norm": 2.212331533432007, "learning_rate": 3.1810757251490298e-06, "loss": 0.5386, "step": 15174 }, { "epoch": 0.6299980799070052, "grad_norm": 2.1631789207458496, "learning_rate": 3.180449491776213e-06, "loss": 0.3758, "step": 15175 }, { "epoch": 0.6300395954312166, "grad_norm": 2.0982601642608643, "learning_rate": 3.1798232913013916e-06, "loss": 0.5546, "step": 15176 }, { "epoch": 0.6300811109554278, "grad_norm": 2.679077386856079, "learning_rate": 3.1791971237358893e-06, "loss": 0.4334, "step": 15177 }, { "epoch": 0.6301226264796392, "grad_norm": 2.238497495651245, "learning_rate": 3.178570989091028e-06, "loss": 0.5123, "step": 15178 }, { "epoch": 0.6301641420038505, "grad_norm": 2.8870689868927, "learning_rate": 3.1779448873781303e-06, "loss": 0.4924, "step": 15179 }, { "epoch": 0.6302056575280619, "grad_norm": 2.421309471130371, "learning_rate": 3.1773188186085126e-06, "loss": 0.5841, "step": 15180 }, { "epoch": 0.6302471730522732, "grad_norm": 2.2149453163146973, "learning_rate": 3.1766927827934957e-06, "loss": 0.4017, "step": 15181 }, { "epoch": 0.6302886885764846, "grad_norm": 2.066659688949585, "learning_rate": 3.1760667799444e-06, "loss": 0.6157, "step": 15182 }, { "epoch": 0.630330204100696, "grad_norm": 2.7245190143585205, "learning_rate": 3.175440810072542e-06, "loss": 0.4489, "step": 15183 }, { "epoch": 0.6303717196249072, "grad_norm": 2.286686420440674, "learning_rate": 3.1748148731892396e-06, "loss": 0.5062, "step": 15184 }, { "epoch": 0.6304132351491186, "grad_norm": 2.4899961948394775, "learning_rate": 3.1741889693058112e-06, "loss": 0.6136, "step": 15185 }, { "epoch": 0.6304547506733299, "grad_norm": 2.318441390991211, "learning_rate": 3.1735630984335718e-06, "loss": 0.4579, "step": 15186 }, { "epoch": 0.6304962661975413, "grad_norm": 2.415642023086548, "learning_rate": 3.1729372605838376e-06, "loss": 0.576, "step": 15187 }, { "epoch": 0.6305377817217526, "grad_norm": 1.9313268661499023, "learning_rate": 3.1723114557679267e-06, "loss": 0.437, "step": 15188 }, { "epoch": 0.630579297245964, "grad_norm": 2.3313167095184326, "learning_rate": 3.1716856839971487e-06, "loss": 0.4104, "step": 15189 }, { "epoch": 0.6306208127701752, "grad_norm": 2.1178712844848633, "learning_rate": 3.1710599452828216e-06, "loss": 0.3894, "step": 15190 }, { "epoch": 0.6306623282943866, "grad_norm": 2.315706968307495, "learning_rate": 3.1704342396362577e-06, "loss": 0.449, "step": 15191 }, { "epoch": 0.6307038438185979, "grad_norm": 2.342674493789673, "learning_rate": 3.169808567068771e-06, "loss": 0.4775, "step": 15192 }, { "epoch": 0.6307453593428093, "grad_norm": 2.662947177886963, "learning_rate": 3.169182927591672e-06, "loss": 0.4248, "step": 15193 }, { "epoch": 0.6307868748670206, "grad_norm": 2.191815137863159, "learning_rate": 3.168557321216273e-06, "loss": 0.4317, "step": 15194 }, { "epoch": 0.630828390391232, "grad_norm": 2.7601590156555176, "learning_rate": 3.1679317479538864e-06, "loss": 0.5858, "step": 15195 }, { "epoch": 0.6308699059154432, "grad_norm": 2.6458277702331543, "learning_rate": 3.1673062078158222e-06, "loss": 0.5481, "step": 15196 }, { "epoch": 0.6309114214396546, "grad_norm": 2.666810989379883, "learning_rate": 3.1666807008133884e-06, "loss": 0.4931, "step": 15197 }, { "epoch": 0.6309529369638659, "grad_norm": 2.2317914962768555, "learning_rate": 3.1660552269578978e-06, "loss": 0.5654, "step": 15198 }, { "epoch": 0.6309944524880773, "grad_norm": 2.1001462936401367, "learning_rate": 3.1654297862606554e-06, "loss": 0.4753, "step": 15199 }, { "epoch": 0.6310359680122886, "grad_norm": 2.1421544551849365, "learning_rate": 3.1648043787329717e-06, "loss": 0.4036, "step": 15200 }, { "epoch": 0.6310774835365, "grad_norm": 2.4952504634857178, "learning_rate": 3.1641790043861557e-06, "loss": 0.5044, "step": 15201 }, { "epoch": 0.6311189990607112, "grad_norm": 1.8968424797058105, "learning_rate": 3.1635536632315104e-06, "loss": 0.4568, "step": 15202 }, { "epoch": 0.6311605145849226, "grad_norm": 2.4142239093780518, "learning_rate": 3.162928355280345e-06, "loss": 0.5184, "step": 15203 }, { "epoch": 0.6312020301091339, "grad_norm": 2.5598816871643066, "learning_rate": 3.1623030805439636e-06, "loss": 0.4857, "step": 15204 }, { "epoch": 0.6312435456333453, "grad_norm": 2.1032841205596924, "learning_rate": 3.1616778390336737e-06, "loss": 0.4817, "step": 15205 }, { "epoch": 0.6312850611575566, "grad_norm": 2.2313365936279297, "learning_rate": 3.161052630760777e-06, "loss": 0.4532, "step": 15206 }, { "epoch": 0.631326576681768, "grad_norm": 2.282557487487793, "learning_rate": 3.160427455736579e-06, "loss": 0.4771, "step": 15207 }, { "epoch": 0.6313680922059792, "grad_norm": 2.2113585472106934, "learning_rate": 3.1598023139723833e-06, "loss": 0.4876, "step": 15208 }, { "epoch": 0.6314096077301906, "grad_norm": 2.1756603717803955, "learning_rate": 3.159177205479492e-06, "loss": 0.4131, "step": 15209 }, { "epoch": 0.6314511232544019, "grad_norm": 2.45808744430542, "learning_rate": 3.1585521302692073e-06, "loss": 0.5344, "step": 15210 }, { "epoch": 0.6314926387786133, "grad_norm": 2.4473884105682373, "learning_rate": 3.1579270883528314e-06, "loss": 0.4482, "step": 15211 }, { "epoch": 0.6315341543028246, "grad_norm": 2.7951197624206543, "learning_rate": 3.1573020797416636e-06, "loss": 0.5033, "step": 15212 }, { "epoch": 0.631575669827036, "grad_norm": 2.821291446685791, "learning_rate": 3.1566771044470057e-06, "loss": 0.7116, "step": 15213 }, { "epoch": 0.6316171853512473, "grad_norm": 3.8103866577148438, "learning_rate": 3.156052162480159e-06, "loss": 0.56, "step": 15214 }, { "epoch": 0.6316587008754586, "grad_norm": 3.5295779705047607, "learning_rate": 3.155427253852418e-06, "loss": 0.5214, "step": 15215 }, { "epoch": 0.63170021639967, "grad_norm": 2.348707437515259, "learning_rate": 3.1548023785750843e-06, "loss": 0.4974, "step": 15216 }, { "epoch": 0.6317417319238813, "grad_norm": 2.310553789138794, "learning_rate": 3.154177536659455e-06, "loss": 0.359, "step": 15217 }, { "epoch": 0.6317832474480927, "grad_norm": 2.133761167526245, "learning_rate": 3.153552728116831e-06, "loss": 0.5187, "step": 15218 }, { "epoch": 0.6318247629723039, "grad_norm": 2.7518246173858643, "learning_rate": 3.1529279529585034e-06, "loss": 0.4669, "step": 15219 }, { "epoch": 0.6318662784965153, "grad_norm": 2.2763631343841553, "learning_rate": 3.1523032111957703e-06, "loss": 0.4085, "step": 15220 }, { "epoch": 0.6319077940207266, "grad_norm": 3.0302846431732178, "learning_rate": 3.151678502839929e-06, "loss": 0.5163, "step": 15221 }, { "epoch": 0.631949309544938, "grad_norm": 2.1200923919677734, "learning_rate": 3.1510538279022727e-06, "loss": 0.4696, "step": 15222 }, { "epoch": 0.6319908250691493, "grad_norm": 2.9415225982666016, "learning_rate": 3.150429186394096e-06, "loss": 0.5869, "step": 15223 }, { "epoch": 0.6320323405933607, "grad_norm": 2.3871240615844727, "learning_rate": 3.1498045783266928e-06, "loss": 0.5554, "step": 15224 }, { "epoch": 0.6320738561175719, "grad_norm": 2.38006591796875, "learning_rate": 3.1491800037113555e-06, "loss": 0.6304, "step": 15225 }, { "epoch": 0.6321153716417833, "grad_norm": 2.6322996616363525, "learning_rate": 3.1485554625593774e-06, "loss": 0.4968, "step": 15226 }, { "epoch": 0.6321568871659946, "grad_norm": 2.0570037364959717, "learning_rate": 3.1479309548820525e-06, "loss": 0.4263, "step": 15227 }, { "epoch": 0.632198402690206, "grad_norm": 2.165238380432129, "learning_rate": 3.1473064806906674e-06, "loss": 0.4817, "step": 15228 }, { "epoch": 0.6322399182144173, "grad_norm": 2.2407045364379883, "learning_rate": 3.146682039996515e-06, "loss": 0.6453, "step": 15229 }, { "epoch": 0.6322814337386287, "grad_norm": 2.4142110347747803, "learning_rate": 3.146057632810885e-06, "loss": 0.597, "step": 15230 }, { "epoch": 0.6323229492628399, "grad_norm": 2.2628068923950195, "learning_rate": 3.1454332591450697e-06, "loss": 0.517, "step": 15231 }, { "epoch": 0.6323644647870513, "grad_norm": 2.4049360752105713, "learning_rate": 3.1448089190103535e-06, "loss": 0.514, "step": 15232 }, { "epoch": 0.6324059803112626, "grad_norm": 2.2914888858795166, "learning_rate": 3.1441846124180277e-06, "loss": 0.4492, "step": 15233 }, { "epoch": 0.632447495835474, "grad_norm": 2.2745819091796875, "learning_rate": 3.143560339379379e-06, "loss": 0.711, "step": 15234 }, { "epoch": 0.6324890113596853, "grad_norm": 2.3006722927093506, "learning_rate": 3.1429360999056953e-06, "loss": 0.5213, "step": 15235 }, { "epoch": 0.6325305268838967, "grad_norm": 2.8627841472625732, "learning_rate": 3.142311894008261e-06, "loss": 0.4911, "step": 15236 }, { "epoch": 0.6325720424081079, "grad_norm": 2.7347207069396973, "learning_rate": 3.141687721698363e-06, "loss": 0.5253, "step": 15237 }, { "epoch": 0.6326135579323193, "grad_norm": 2.3985671997070312, "learning_rate": 3.141063582987287e-06, "loss": 0.5951, "step": 15238 }, { "epoch": 0.6326550734565306, "grad_norm": 2.166057586669922, "learning_rate": 3.1404394778863166e-06, "loss": 0.5695, "step": 15239 }, { "epoch": 0.632696588980742, "grad_norm": 2.6220829486846924, "learning_rate": 3.139815406406738e-06, "loss": 0.5697, "step": 15240 }, { "epoch": 0.6327381045049533, "grad_norm": 2.58512806892395, "learning_rate": 3.139191368559832e-06, "loss": 0.4909, "step": 15241 }, { "epoch": 0.6327796200291647, "grad_norm": 2.5646684169769287, "learning_rate": 3.1385673643568813e-06, "loss": 0.663, "step": 15242 }, { "epoch": 0.6328211355533759, "grad_norm": 2.2134625911712646, "learning_rate": 3.1379433938091695e-06, "loss": 0.5228, "step": 15243 }, { "epoch": 0.6328626510775873, "grad_norm": 2.5313990116119385, "learning_rate": 3.1373194569279808e-06, "loss": 0.4937, "step": 15244 }, { "epoch": 0.6329041666017987, "grad_norm": 1.9156354665756226, "learning_rate": 3.1366955537245903e-06, "loss": 0.5271, "step": 15245 }, { "epoch": 0.63294568212601, "grad_norm": 2.541437864303589, "learning_rate": 3.136071684210281e-06, "loss": 0.6128, "step": 15246 }, { "epoch": 0.6329871976502214, "grad_norm": 2.4507241249084473, "learning_rate": 3.1354478483963348e-06, "loss": 0.7515, "step": 15247 }, { "epoch": 0.6330287131744327, "grad_norm": 2.3760790824890137, "learning_rate": 3.1348240462940282e-06, "loss": 0.483, "step": 15248 }, { "epoch": 0.633070228698644, "grad_norm": 2.1050400733947754, "learning_rate": 3.1342002779146398e-06, "loss": 0.3643, "step": 15249 }, { "epoch": 0.6331117442228553, "grad_norm": 2.090902328491211, "learning_rate": 3.1335765432694493e-06, "loss": 0.6338, "step": 15250 }, { "epoch": 0.6331532597470667, "grad_norm": 2.374824285507202, "learning_rate": 3.132952842369732e-06, "loss": 0.524, "step": 15251 }, { "epoch": 0.633194775271278, "grad_norm": 2.353140354156494, "learning_rate": 3.132329175226765e-06, "loss": 0.6424, "step": 15252 }, { "epoch": 0.6332362907954894, "grad_norm": 2.4608118534088135, "learning_rate": 3.1317055418518264e-06, "loss": 0.5554, "step": 15253 }, { "epoch": 0.6332778063197007, "grad_norm": 2.3042430877685547, "learning_rate": 3.131081942256189e-06, "loss": 0.4476, "step": 15254 }, { "epoch": 0.633319321843912, "grad_norm": 2.588452100753784, "learning_rate": 3.130458376451128e-06, "loss": 0.4927, "step": 15255 }, { "epoch": 0.6333608373681233, "grad_norm": 2.4366960525512695, "learning_rate": 3.129834844447919e-06, "loss": 0.5863, "step": 15256 }, { "epoch": 0.6334023528923347, "grad_norm": 1.973507046699524, "learning_rate": 3.1292113462578367e-06, "loss": 0.456, "step": 15257 }, { "epoch": 0.633443868416546, "grad_norm": 2.0238189697265625, "learning_rate": 3.1285878818921506e-06, "loss": 0.3839, "step": 15258 }, { "epoch": 0.6334853839407574, "grad_norm": 2.686328887939453, "learning_rate": 3.1279644513621352e-06, "loss": 0.4349, "step": 15259 }, { "epoch": 0.6335268994649687, "grad_norm": 2.8042194843292236, "learning_rate": 3.1273410546790627e-06, "loss": 0.7482, "step": 15260 }, { "epoch": 0.63356841498918, "grad_norm": 2.825988531112671, "learning_rate": 3.1267176918542043e-06, "loss": 0.608, "step": 15261 }, { "epoch": 0.6336099305133913, "grad_norm": 1.8855047225952148, "learning_rate": 3.1260943628988283e-06, "loss": 0.5102, "step": 15262 }, { "epoch": 0.6336514460376027, "grad_norm": 2.5794718265533447, "learning_rate": 3.1254710678242073e-06, "loss": 0.5566, "step": 15263 }, { "epoch": 0.633692961561814, "grad_norm": 2.1075425148010254, "learning_rate": 3.1248478066416094e-06, "loss": 0.5142, "step": 15264 }, { "epoch": 0.6337344770860254, "grad_norm": 2.6526989936828613, "learning_rate": 3.1242245793623048e-06, "loss": 0.4885, "step": 15265 }, { "epoch": 0.6337759926102366, "grad_norm": 2.470069646835327, "learning_rate": 3.123601385997559e-06, "loss": 0.5324, "step": 15266 }, { "epoch": 0.633817508134448, "grad_norm": 2.8500001430511475, "learning_rate": 3.122978226558641e-06, "loss": 0.5758, "step": 15267 }, { "epoch": 0.6338590236586593, "grad_norm": 1.908326268196106, "learning_rate": 3.1223551010568177e-06, "loss": 0.5043, "step": 15268 }, { "epoch": 0.6339005391828707, "grad_norm": 2.4061179161071777, "learning_rate": 3.1217320095033553e-06, "loss": 0.4926, "step": 15269 }, { "epoch": 0.633942054707082, "grad_norm": 1.9287234544754028, "learning_rate": 3.1211089519095216e-06, "loss": 0.4968, "step": 15270 }, { "epoch": 0.6339835702312934, "grad_norm": 2.4133596420288086, "learning_rate": 3.1204859282865773e-06, "loss": 0.5899, "step": 15271 }, { "epoch": 0.6340250857555046, "grad_norm": 2.454981803894043, "learning_rate": 3.119862938645789e-06, "loss": 0.4902, "step": 15272 }, { "epoch": 0.634066601279716, "grad_norm": 2.343021869659424, "learning_rate": 3.119239982998422e-06, "loss": 0.5644, "step": 15273 }, { "epoch": 0.6341081168039274, "grad_norm": 2.208507537841797, "learning_rate": 3.1186170613557386e-06, "loss": 0.5887, "step": 15274 }, { "epoch": 0.6341496323281387, "grad_norm": 2.0010995864868164, "learning_rate": 3.1179941737290006e-06, "loss": 0.452, "step": 15275 }, { "epoch": 0.6341911478523501, "grad_norm": 1.9458508491516113, "learning_rate": 3.117371320129469e-06, "loss": 0.5351, "step": 15276 }, { "epoch": 0.6342326633765614, "grad_norm": 2.825408697128296, "learning_rate": 3.1167485005684073e-06, "loss": 0.4152, "step": 15277 }, { "epoch": 0.6342741789007728, "grad_norm": 2.2704391479492188, "learning_rate": 3.116125715057077e-06, "loss": 0.5324, "step": 15278 }, { "epoch": 0.634315694424984, "grad_norm": 2.4167845249176025, "learning_rate": 3.1155029636067347e-06, "loss": 0.5162, "step": 15279 }, { "epoch": 0.6343572099491954, "grad_norm": 2.4790115356445312, "learning_rate": 3.1148802462286427e-06, "loss": 0.56, "step": 15280 }, { "epoch": 0.6343987254734067, "grad_norm": 2.319305896759033, "learning_rate": 3.114257562934059e-06, "loss": 0.4767, "step": 15281 }, { "epoch": 0.6344402409976181, "grad_norm": 2.319002389907837, "learning_rate": 3.1136349137342412e-06, "loss": 0.4316, "step": 15282 }, { "epoch": 0.6344817565218294, "grad_norm": 2.642798900604248, "learning_rate": 3.113012298640451e-06, "loss": 0.4671, "step": 15283 }, { "epoch": 0.6345232720460408, "grad_norm": 2.915443181991577, "learning_rate": 3.1123897176639395e-06, "loss": 0.4951, "step": 15284 }, { "epoch": 0.634564787570252, "grad_norm": 2.6999430656433105, "learning_rate": 3.1117671708159665e-06, "loss": 0.444, "step": 15285 }, { "epoch": 0.6346063030944634, "grad_norm": 2.680375814437866, "learning_rate": 3.1111446581077876e-06, "loss": 0.5224, "step": 15286 }, { "epoch": 0.6346478186186747, "grad_norm": 2.2115490436553955, "learning_rate": 3.1105221795506584e-06, "loss": 0.4906, "step": 15287 }, { "epoch": 0.6346893341428861, "grad_norm": 2.639753580093384, "learning_rate": 3.1098997351558325e-06, "loss": 0.5778, "step": 15288 }, { "epoch": 0.6347308496670974, "grad_norm": 2.165234088897705, "learning_rate": 3.109277324934563e-06, "loss": 0.4288, "step": 15289 }, { "epoch": 0.6347723651913088, "grad_norm": 2.389101266860962, "learning_rate": 3.1086549488981057e-06, "loss": 0.5169, "step": 15290 }, { "epoch": 0.63481388071552, "grad_norm": 2.04015851020813, "learning_rate": 3.1080326070577125e-06, "loss": 0.4514, "step": 15291 }, { "epoch": 0.6348553962397314, "grad_norm": 3.0519912242889404, "learning_rate": 3.1074102994246336e-06, "loss": 0.522, "step": 15292 }, { "epoch": 0.6348969117639427, "grad_norm": 2.236504316329956, "learning_rate": 3.1067880260101225e-06, "loss": 0.4044, "step": 15293 }, { "epoch": 0.6349384272881541, "grad_norm": 2.489480972290039, "learning_rate": 3.1061657868254287e-06, "loss": 0.4319, "step": 15294 }, { "epoch": 0.6349799428123654, "grad_norm": 2.824618101119995, "learning_rate": 3.1055435818818034e-06, "loss": 0.5211, "step": 15295 }, { "epoch": 0.6350214583365767, "grad_norm": 2.6762075424194336, "learning_rate": 3.104921411190499e-06, "loss": 0.533, "step": 15296 }, { "epoch": 0.635062973860788, "grad_norm": 2.27299427986145, "learning_rate": 3.1042992747627593e-06, "loss": 0.6029, "step": 15297 }, { "epoch": 0.6351044893849994, "grad_norm": 2.3594532012939453, "learning_rate": 3.1036771726098347e-06, "loss": 0.4824, "step": 15298 }, { "epoch": 0.6351460049092107, "grad_norm": 2.427536964416504, "learning_rate": 3.1030551047429747e-06, "loss": 0.506, "step": 15299 }, { "epoch": 0.6351875204334221, "grad_norm": 2.9454641342163086, "learning_rate": 3.1024330711734253e-06, "loss": 0.5666, "step": 15300 }, { "epoch": 0.6352290359576334, "grad_norm": 2.483666181564331, "learning_rate": 3.101811071912433e-06, "loss": 0.6133, "step": 15301 }, { "epoch": 0.6352705514818447, "grad_norm": 2.514381170272827, "learning_rate": 3.1011891069712424e-06, "loss": 0.5383, "step": 15302 }, { "epoch": 0.635312067006056, "grad_norm": 2.1643123626708984, "learning_rate": 3.1005671763611003e-06, "loss": 0.5439, "step": 15303 }, { "epoch": 0.6353535825302674, "grad_norm": 2.3502588272094727, "learning_rate": 3.099945280093253e-06, "loss": 0.4261, "step": 15304 }, { "epoch": 0.6353950980544788, "grad_norm": 2.649656295776367, "learning_rate": 3.0993234181789405e-06, "loss": 0.494, "step": 15305 }, { "epoch": 0.6354366135786901, "grad_norm": 2.4826931953430176, "learning_rate": 3.0987015906294083e-06, "loss": 0.5613, "step": 15306 }, { "epoch": 0.6354781291029015, "grad_norm": 2.2541282176971436, "learning_rate": 3.0980797974558997e-06, "loss": 0.5793, "step": 15307 }, { "epoch": 0.6355196446271127, "grad_norm": 3.044769525527954, "learning_rate": 3.0974580386696562e-06, "loss": 0.5426, "step": 15308 }, { "epoch": 0.6355611601513241, "grad_norm": 2.2193992137908936, "learning_rate": 3.0968363142819226e-06, "loss": 0.4902, "step": 15309 }, { "epoch": 0.6356026756755354, "grad_norm": 2.3258724212646484, "learning_rate": 3.0962146243039343e-06, "loss": 0.4986, "step": 15310 }, { "epoch": 0.6356441911997468, "grad_norm": 2.324524402618408, "learning_rate": 3.0955929687469344e-06, "loss": 0.5745, "step": 15311 }, { "epoch": 0.6356857067239581, "grad_norm": 2.2340126037597656, "learning_rate": 3.0949713476221634e-06, "loss": 0.5054, "step": 15312 }, { "epoch": 0.6357272222481695, "grad_norm": 2.549893617630005, "learning_rate": 3.0943497609408595e-06, "loss": 0.3972, "step": 15313 }, { "epoch": 0.6357687377723807, "grad_norm": 2.117454767227173, "learning_rate": 3.0937282087142618e-06, "loss": 0.5123, "step": 15314 }, { "epoch": 0.6358102532965921, "grad_norm": 2.486555337905884, "learning_rate": 3.0931066909536058e-06, "loss": 0.4735, "step": 15315 }, { "epoch": 0.6358517688208034, "grad_norm": 2.390346050262451, "learning_rate": 3.092485207670131e-06, "loss": 0.5407, "step": 15316 }, { "epoch": 0.6358932843450148, "grad_norm": 2.285586357116699, "learning_rate": 3.091863758875075e-06, "loss": 0.4528, "step": 15317 }, { "epoch": 0.6359347998692261, "grad_norm": 2.1496551036834717, "learning_rate": 3.0912423445796704e-06, "loss": 0.6214, "step": 15318 }, { "epoch": 0.6359763153934375, "grad_norm": 2.2364845275878906, "learning_rate": 3.0906209647951546e-06, "loss": 0.5829, "step": 15319 }, { "epoch": 0.6360178309176487, "grad_norm": 2.2109365463256836, "learning_rate": 3.0899996195327618e-06, "loss": 0.4688, "step": 15320 }, { "epoch": 0.6360593464418601, "grad_norm": 1.9955874681472778, "learning_rate": 3.0893783088037264e-06, "loss": 0.525, "step": 15321 }, { "epoch": 0.6361008619660714, "grad_norm": 2.7858173847198486, "learning_rate": 3.088757032619284e-06, "loss": 0.4759, "step": 15322 }, { "epoch": 0.6361423774902828, "grad_norm": 2.5124828815460205, "learning_rate": 3.0881357909906628e-06, "loss": 0.3707, "step": 15323 }, { "epoch": 0.6361838930144941, "grad_norm": 2.4265296459198, "learning_rate": 3.0875145839290987e-06, "loss": 0.5898, "step": 15324 }, { "epoch": 0.6362254085387055, "grad_norm": 2.6495556831359863, "learning_rate": 3.0868934114458226e-06, "loss": 0.6645, "step": 15325 }, { "epoch": 0.6362669240629167, "grad_norm": 2.6450016498565674, "learning_rate": 3.0862722735520644e-06, "loss": 0.5329, "step": 15326 }, { "epoch": 0.6363084395871281, "grad_norm": 2.390697479248047, "learning_rate": 3.0856511702590558e-06, "loss": 0.5518, "step": 15327 }, { "epoch": 0.6363499551113394, "grad_norm": 2.2400927543640137, "learning_rate": 3.085030101578025e-06, "loss": 0.5029, "step": 15328 }, { "epoch": 0.6363914706355508, "grad_norm": 2.0990993976593018, "learning_rate": 3.084409067520201e-06, "loss": 0.5491, "step": 15329 }, { "epoch": 0.6364329861597621, "grad_norm": 2.3522701263427734, "learning_rate": 3.083788068096816e-06, "loss": 0.6487, "step": 15330 }, { "epoch": 0.6364745016839735, "grad_norm": 2.620542287826538, "learning_rate": 3.083167103319093e-06, "loss": 0.5154, "step": 15331 }, { "epoch": 0.6365160172081847, "grad_norm": 1.886765718460083, "learning_rate": 3.082546173198261e-06, "loss": 0.4052, "step": 15332 }, { "epoch": 0.6365575327323961, "grad_norm": 2.080864906311035, "learning_rate": 3.0819252777455467e-06, "loss": 0.4499, "step": 15333 }, { "epoch": 0.6365990482566074, "grad_norm": 2.537236452102661, "learning_rate": 3.081304416972177e-06, "loss": 0.6102, "step": 15334 }, { "epoch": 0.6366405637808188, "grad_norm": 2.27347469329834, "learning_rate": 3.0806835908893774e-06, "loss": 0.4149, "step": 15335 }, { "epoch": 0.6366820793050302, "grad_norm": 2.2761292457580566, "learning_rate": 3.0800627995083703e-06, "loss": 0.506, "step": 15336 }, { "epoch": 0.6367235948292415, "grad_norm": 2.3785605430603027, "learning_rate": 3.0794420428403816e-06, "loss": 0.4035, "step": 15337 }, { "epoch": 0.6367651103534528, "grad_norm": 2.370579957962036, "learning_rate": 3.0788213208966343e-06, "loss": 0.4379, "step": 15338 }, { "epoch": 0.6368066258776641, "grad_norm": 3.1260788440704346, "learning_rate": 3.078200633688352e-06, "loss": 0.4859, "step": 15339 }, { "epoch": 0.6368481414018755, "grad_norm": 2.2569520473480225, "learning_rate": 3.077579981226756e-06, "loss": 0.4752, "step": 15340 }, { "epoch": 0.6368896569260868, "grad_norm": 2.5227723121643066, "learning_rate": 3.0769593635230666e-06, "loss": 0.4411, "step": 15341 }, { "epoch": 0.6369311724502982, "grad_norm": 2.2315642833709717, "learning_rate": 3.076338780588507e-06, "loss": 0.5935, "step": 15342 }, { "epoch": 0.6369726879745095, "grad_norm": 2.8265631198883057, "learning_rate": 3.075718232434298e-06, "loss": 0.4456, "step": 15343 }, { "epoch": 0.6370142034987208, "grad_norm": 1.895674228668213, "learning_rate": 3.0750977190716563e-06, "loss": 0.5063, "step": 15344 }, { "epoch": 0.6370557190229321, "grad_norm": 1.9225589036941528, "learning_rate": 3.0744772405118027e-06, "loss": 0.4074, "step": 15345 }, { "epoch": 0.6370972345471435, "grad_norm": 2.56276273727417, "learning_rate": 3.0738567967659557e-06, "loss": 0.6068, "step": 15346 }, { "epoch": 0.6371387500713548, "grad_norm": 2.3020434379577637, "learning_rate": 3.0732363878453326e-06, "loss": 0.4384, "step": 15347 }, { "epoch": 0.6371802655955662, "grad_norm": 1.8031922578811646, "learning_rate": 3.0726160137611527e-06, "loss": 0.5421, "step": 15348 }, { "epoch": 0.6372217811197775, "grad_norm": 2.5759081840515137, "learning_rate": 3.0719956745246295e-06, "loss": 0.4941, "step": 15349 }, { "epoch": 0.6372632966439888, "grad_norm": 2.7050318717956543, "learning_rate": 3.07137537014698e-06, "loss": 0.4966, "step": 15350 }, { "epoch": 0.6373048121682001, "grad_norm": 2.441990375518799, "learning_rate": 3.070755100639421e-06, "loss": 0.4886, "step": 15351 }, { "epoch": 0.6373463276924115, "grad_norm": 2.1982955932617188, "learning_rate": 3.070134866013165e-06, "loss": 0.4751, "step": 15352 }, { "epoch": 0.6373878432166228, "grad_norm": 3.1233081817626953, "learning_rate": 3.0695146662794273e-06, "loss": 0.5141, "step": 15353 }, { "epoch": 0.6374293587408342, "grad_norm": 2.1509406566619873, "learning_rate": 3.0688945014494202e-06, "loss": 0.3573, "step": 15354 }, { "epoch": 0.6374708742650455, "grad_norm": 1.9267629384994507, "learning_rate": 3.0682743715343565e-06, "loss": 0.4645, "step": 15355 }, { "epoch": 0.6375123897892568, "grad_norm": 3.149384021759033, "learning_rate": 3.067654276545452e-06, "loss": 0.7566, "step": 15356 }, { "epoch": 0.6375539053134681, "grad_norm": 2.4222614765167236, "learning_rate": 3.0670342164939126e-06, "loss": 0.5277, "step": 15357 }, { "epoch": 0.6375954208376795, "grad_norm": 2.7469048500061035, "learning_rate": 3.0664141913909513e-06, "loss": 0.4585, "step": 15358 }, { "epoch": 0.6376369363618908, "grad_norm": 2.4292306900024414, "learning_rate": 3.065794201247779e-06, "loss": 0.4672, "step": 15359 }, { "epoch": 0.6376784518861022, "grad_norm": 2.403546094894409, "learning_rate": 3.065174246075605e-06, "loss": 0.5752, "step": 15360 }, { "epoch": 0.6377199674103134, "grad_norm": 2.6669228076934814, "learning_rate": 3.0645543258856404e-06, "loss": 0.6138, "step": 15361 }, { "epoch": 0.6377614829345248, "grad_norm": 2.129528045654297, "learning_rate": 3.0639344406890893e-06, "loss": 0.4598, "step": 15362 }, { "epoch": 0.6378029984587361, "grad_norm": 2.8049798011779785, "learning_rate": 3.063314590497162e-06, "loss": 0.4223, "step": 15363 }, { "epoch": 0.6378445139829475, "grad_norm": 2.4005305767059326, "learning_rate": 3.0626947753210654e-06, "loss": 0.4724, "step": 15364 }, { "epoch": 0.6378860295071588, "grad_norm": 2.728003978729248, "learning_rate": 3.0620749951720065e-06, "loss": 0.5216, "step": 15365 }, { "epoch": 0.6379275450313702, "grad_norm": 2.231093406677246, "learning_rate": 3.0614552500611896e-06, "loss": 0.4483, "step": 15366 }, { "epoch": 0.6379690605555816, "grad_norm": 2.2296454906463623, "learning_rate": 3.060835539999819e-06, "loss": 0.4765, "step": 15367 }, { "epoch": 0.6380105760797928, "grad_norm": 2.2089245319366455, "learning_rate": 3.060215864999102e-06, "loss": 0.4579, "step": 15368 }, { "epoch": 0.6380520916040042, "grad_norm": 2.78252911567688, "learning_rate": 3.059596225070243e-06, "loss": 0.51, "step": 15369 }, { "epoch": 0.6380936071282155, "grad_norm": 2.452584743499756, "learning_rate": 3.058976620224441e-06, "loss": 0.4798, "step": 15370 }, { "epoch": 0.6381351226524269, "grad_norm": 2.7278614044189453, "learning_rate": 3.058357050472901e-06, "loss": 0.6028, "step": 15371 }, { "epoch": 0.6381766381766382, "grad_norm": 2.005563735961914, "learning_rate": 3.057737515826826e-06, "loss": 0.4994, "step": 15372 }, { "epoch": 0.6382181537008496, "grad_norm": 2.5617620944976807, "learning_rate": 3.0571180162974158e-06, "loss": 0.5951, "step": 15373 }, { "epoch": 0.6382596692250608, "grad_norm": 2.408724069595337, "learning_rate": 3.0564985518958744e-06, "loss": 0.604, "step": 15374 }, { "epoch": 0.6383011847492722, "grad_norm": 2.7417399883270264, "learning_rate": 3.0558791226333974e-06, "loss": 0.4322, "step": 15375 }, { "epoch": 0.6383427002734835, "grad_norm": 2.5201685428619385, "learning_rate": 3.055259728521186e-06, "loss": 0.601, "step": 15376 }, { "epoch": 0.6383842157976949, "grad_norm": 2.7389378547668457, "learning_rate": 3.0546403695704396e-06, "loss": 0.5568, "step": 15377 }, { "epoch": 0.6384257313219062, "grad_norm": 2.1359035968780518, "learning_rate": 3.054021045792357e-06, "loss": 0.6045, "step": 15378 }, { "epoch": 0.6384672468461176, "grad_norm": 2.0715184211730957, "learning_rate": 3.0534017571981343e-06, "loss": 0.4301, "step": 15379 }, { "epoch": 0.6385087623703288, "grad_norm": 2.1076014041900635, "learning_rate": 3.0527825037989676e-06, "loss": 0.4401, "step": 15380 }, { "epoch": 0.6385502778945402, "grad_norm": 2.5262670516967773, "learning_rate": 3.0521632856060558e-06, "loss": 0.5836, "step": 15381 }, { "epoch": 0.6385917934187515, "grad_norm": 2.0612058639526367, "learning_rate": 3.0515441026305936e-06, "loss": 0.4443, "step": 15382 }, { "epoch": 0.6386333089429629, "grad_norm": 2.355790138244629, "learning_rate": 3.050924954883775e-06, "loss": 0.6011, "step": 15383 }, { "epoch": 0.6386748244671742, "grad_norm": 2.034152030944824, "learning_rate": 3.0503058423767938e-06, "loss": 0.4648, "step": 15384 }, { "epoch": 0.6387163399913856, "grad_norm": 2.8997533321380615, "learning_rate": 3.049686765120846e-06, "loss": 0.6595, "step": 15385 }, { "epoch": 0.6387578555155968, "grad_norm": 2.564995050430298, "learning_rate": 3.0490677231271228e-06, "loss": 0.5976, "step": 15386 }, { "epoch": 0.6387993710398082, "grad_norm": 2.0381553173065186, "learning_rate": 3.0484487164068198e-06, "loss": 0.414, "step": 15387 }, { "epoch": 0.6388408865640195, "grad_norm": 2.1398000717163086, "learning_rate": 3.0478297449711247e-06, "loss": 0.4472, "step": 15388 }, { "epoch": 0.6388824020882309, "grad_norm": 2.1728901863098145, "learning_rate": 3.047210808831231e-06, "loss": 0.5042, "step": 15389 }, { "epoch": 0.6389239176124422, "grad_norm": 2.3378639221191406, "learning_rate": 3.0465919079983284e-06, "loss": 0.4496, "step": 15390 }, { "epoch": 0.6389654331366535, "grad_norm": 2.798609733581543, "learning_rate": 3.0459730424836075e-06, "loss": 0.5473, "step": 15391 }, { "epoch": 0.6390069486608648, "grad_norm": 2.9742445945739746, "learning_rate": 3.0453542122982577e-06, "loss": 0.5443, "step": 15392 }, { "epoch": 0.6390484641850762, "grad_norm": 2.3693501949310303, "learning_rate": 3.044735417453466e-06, "loss": 0.6909, "step": 15393 }, { "epoch": 0.6390899797092875, "grad_norm": 1.9403663873672485, "learning_rate": 3.044116657960421e-06, "loss": 0.3721, "step": 15394 }, { "epoch": 0.6391314952334989, "grad_norm": 2.0448250770568848, "learning_rate": 3.043497933830313e-06, "loss": 0.5064, "step": 15395 }, { "epoch": 0.6391730107577102, "grad_norm": 2.8287651538848877, "learning_rate": 3.0428792450743243e-06, "loss": 0.5628, "step": 15396 }, { "epoch": 0.6392145262819215, "grad_norm": 2.5243890285491943, "learning_rate": 3.042260591703643e-06, "loss": 0.4844, "step": 15397 }, { "epoch": 0.6392560418061329, "grad_norm": 2.4681036472320557, "learning_rate": 3.0416419737294545e-06, "loss": 0.5172, "step": 15398 }, { "epoch": 0.6392975573303442, "grad_norm": 2.3427681922912598, "learning_rate": 3.0410233911629454e-06, "loss": 0.5352, "step": 15399 }, { "epoch": 0.6393390728545556, "grad_norm": 2.029268503189087, "learning_rate": 3.0404048440152965e-06, "loss": 0.5202, "step": 15400 }, { "epoch": 0.6393805883787669, "grad_norm": 2.6619510650634766, "learning_rate": 3.0397863322976923e-06, "loss": 0.5245, "step": 15401 }, { "epoch": 0.6394221039029783, "grad_norm": 2.7519826889038086, "learning_rate": 3.039167856021317e-06, "loss": 0.5919, "step": 15402 }, { "epoch": 0.6394636194271895, "grad_norm": 3.171069622039795, "learning_rate": 3.0385494151973515e-06, "loss": 0.5191, "step": 15403 }, { "epoch": 0.6395051349514009, "grad_norm": 2.19744610786438, "learning_rate": 3.037931009836979e-06, "loss": 0.5337, "step": 15404 }, { "epoch": 0.6395466504756122, "grad_norm": 2.7111780643463135, "learning_rate": 3.0373126399513793e-06, "loss": 0.5227, "step": 15405 }, { "epoch": 0.6395881659998236, "grad_norm": 2.5890285968780518, "learning_rate": 3.036694305551732e-06, "loss": 0.6384, "step": 15406 }, { "epoch": 0.6396296815240349, "grad_norm": 2.0915560722351074, "learning_rate": 3.036076006649217e-06, "loss": 0.4078, "step": 15407 }, { "epoch": 0.6396711970482463, "grad_norm": 2.2879319190979004, "learning_rate": 3.035457743255016e-06, "loss": 0.4703, "step": 15408 }, { "epoch": 0.6397127125724575, "grad_norm": 1.776393175125122, "learning_rate": 3.0348395153803034e-06, "loss": 0.4321, "step": 15409 }, { "epoch": 0.6397542280966689, "grad_norm": 2.218505620956421, "learning_rate": 3.034221323036258e-06, "loss": 0.4163, "step": 15410 }, { "epoch": 0.6397957436208802, "grad_norm": 2.4067153930664062, "learning_rate": 3.033603166234059e-06, "loss": 0.4905, "step": 15411 }, { "epoch": 0.6398372591450916, "grad_norm": 2.14648175239563, "learning_rate": 3.032985044984883e-06, "loss": 0.4818, "step": 15412 }, { "epoch": 0.6398787746693029, "grad_norm": 2.4435155391693115, "learning_rate": 3.032366959299902e-06, "loss": 0.4862, "step": 15413 }, { "epoch": 0.6399202901935143, "grad_norm": 1.9251269102096558, "learning_rate": 3.0317489091902936e-06, "loss": 0.5271, "step": 15414 }, { "epoch": 0.6399618057177255, "grad_norm": 2.1609299182891846, "learning_rate": 3.0311308946672334e-06, "loss": 0.448, "step": 15415 }, { "epoch": 0.6400033212419369, "grad_norm": 2.21903657913208, "learning_rate": 3.0305129157418933e-06, "loss": 0.4498, "step": 15416 }, { "epoch": 0.6400448367661482, "grad_norm": 3.041431188583374, "learning_rate": 3.0298949724254477e-06, "loss": 0.5512, "step": 15417 }, { "epoch": 0.6400863522903596, "grad_norm": 3.009429454803467, "learning_rate": 3.0292770647290694e-06, "loss": 0.6317, "step": 15418 }, { "epoch": 0.6401278678145709, "grad_norm": 2.421238899230957, "learning_rate": 3.0286591926639287e-06, "loss": 0.5354, "step": 15419 }, { "epoch": 0.6401693833387823, "grad_norm": 2.729678153991699, "learning_rate": 3.0280413562411983e-06, "loss": 0.4993, "step": 15420 }, { "epoch": 0.6402108988629935, "grad_norm": 2.0338311195373535, "learning_rate": 3.0274235554720498e-06, "loss": 0.5303, "step": 15421 }, { "epoch": 0.6402524143872049, "grad_norm": 2.6055727005004883, "learning_rate": 3.026805790367651e-06, "loss": 0.525, "step": 15422 }, { "epoch": 0.6402939299114162, "grad_norm": 2.55000901222229, "learning_rate": 3.0261880609391713e-06, "loss": 0.4491, "step": 15423 }, { "epoch": 0.6403354454356276, "grad_norm": 2.4787490367889404, "learning_rate": 3.025570367197781e-06, "loss": 0.6224, "step": 15424 }, { "epoch": 0.6403769609598389, "grad_norm": 2.308436632156372, "learning_rate": 3.0249527091546493e-06, "loss": 0.5237, "step": 15425 }, { "epoch": 0.6404184764840503, "grad_norm": 2.9697823524475098, "learning_rate": 3.0243350868209397e-06, "loss": 0.5247, "step": 15426 }, { "epoch": 0.6404599920082615, "grad_norm": 2.294095754623413, "learning_rate": 3.023717500207821e-06, "loss": 0.5252, "step": 15427 }, { "epoch": 0.6405015075324729, "grad_norm": 2.3404414653778076, "learning_rate": 3.0230999493264614e-06, "loss": 0.7019, "step": 15428 }, { "epoch": 0.6405430230566843, "grad_norm": 2.420475959777832, "learning_rate": 3.0224824341880226e-06, "loss": 0.6613, "step": 15429 }, { "epoch": 0.6405845385808956, "grad_norm": 2.4364523887634277, "learning_rate": 3.021864954803673e-06, "loss": 0.5542, "step": 15430 }, { "epoch": 0.640626054105107, "grad_norm": 2.478487968444824, "learning_rate": 3.0212475111845753e-06, "loss": 0.4322, "step": 15431 }, { "epoch": 0.6406675696293183, "grad_norm": 2.705949068069458, "learning_rate": 3.0206301033418913e-06, "loss": 0.6191, "step": 15432 }, { "epoch": 0.6407090851535296, "grad_norm": 2.372187852859497, "learning_rate": 3.0200127312867855e-06, "loss": 0.5342, "step": 15433 }, { "epoch": 0.6407506006777409, "grad_norm": 1.9866751432418823, "learning_rate": 3.0193953950304226e-06, "loss": 0.4391, "step": 15434 }, { "epoch": 0.6407921162019523, "grad_norm": 2.126459836959839, "learning_rate": 3.0187780945839595e-06, "loss": 0.5307, "step": 15435 }, { "epoch": 0.6408336317261636, "grad_norm": 2.5879499912261963, "learning_rate": 3.0181608299585604e-06, "loss": 0.7621, "step": 15436 }, { "epoch": 0.640875147250375, "grad_norm": 2.398252487182617, "learning_rate": 3.0175436011653836e-06, "loss": 0.6475, "step": 15437 }, { "epoch": 0.6409166627745863, "grad_norm": 2.2507269382476807, "learning_rate": 3.0169264082155923e-06, "loss": 0.4232, "step": 15438 }, { "epoch": 0.6409581782987976, "grad_norm": 2.3792097568511963, "learning_rate": 3.016309251120342e-06, "loss": 0.5131, "step": 15439 }, { "epoch": 0.6409996938230089, "grad_norm": 2.5373103618621826, "learning_rate": 3.015692129890791e-06, "loss": 0.537, "step": 15440 }, { "epoch": 0.6410412093472203, "grad_norm": 2.7636806964874268, "learning_rate": 3.0150750445380995e-06, "loss": 0.4928, "step": 15441 }, { "epoch": 0.6410827248714316, "grad_norm": 2.186102867126465, "learning_rate": 3.014457995073423e-06, "loss": 0.4945, "step": 15442 }, { "epoch": 0.641124240395643, "grad_norm": 1.8665934801101685, "learning_rate": 3.0138409815079184e-06, "loss": 0.4902, "step": 15443 }, { "epoch": 0.6411657559198543, "grad_norm": 2.2698476314544678, "learning_rate": 3.0132240038527417e-06, "loss": 0.5691, "step": 15444 }, { "epoch": 0.6412072714440656, "grad_norm": 2.5570740699768066, "learning_rate": 3.012607062119046e-06, "loss": 0.5783, "step": 15445 }, { "epoch": 0.6412487869682769, "grad_norm": 2.352320909500122, "learning_rate": 3.011990156317989e-06, "loss": 0.6228, "step": 15446 }, { "epoch": 0.6412903024924883, "grad_norm": 2.279648542404175, "learning_rate": 3.0113732864607236e-06, "loss": 0.4547, "step": 15447 }, { "epoch": 0.6413318180166996, "grad_norm": 2.137857437133789, "learning_rate": 3.010756452558401e-06, "loss": 0.5606, "step": 15448 }, { "epoch": 0.641373333540911, "grad_norm": 2.7024049758911133, "learning_rate": 3.0101396546221743e-06, "loss": 0.5306, "step": 15449 }, { "epoch": 0.6414148490651223, "grad_norm": 2.323941469192505, "learning_rate": 3.009522892663197e-06, "loss": 0.4157, "step": 15450 }, { "epoch": 0.6414563645893336, "grad_norm": 3.2146832942962646, "learning_rate": 3.008906166692621e-06, "loss": 0.5854, "step": 15451 }, { "epoch": 0.6414978801135449, "grad_norm": 2.4963266849517822, "learning_rate": 3.008289476721594e-06, "loss": 0.5902, "step": 15452 }, { "epoch": 0.6415393956377563, "grad_norm": 2.488081216812134, "learning_rate": 3.0076728227612674e-06, "loss": 0.5422, "step": 15453 }, { "epoch": 0.6415809111619676, "grad_norm": 2.283963680267334, "learning_rate": 3.0070562048227904e-06, "loss": 0.6051, "step": 15454 }, { "epoch": 0.641622426686179, "grad_norm": 2.718817949295044, "learning_rate": 3.0064396229173113e-06, "loss": 0.4237, "step": 15455 }, { "epoch": 0.6416639422103902, "grad_norm": 2.0726733207702637, "learning_rate": 3.0058230770559794e-06, "loss": 0.4567, "step": 15456 }, { "epoch": 0.6417054577346016, "grad_norm": 2.3443386554718018, "learning_rate": 3.005206567249941e-06, "loss": 0.5026, "step": 15457 }, { "epoch": 0.6417469732588129, "grad_norm": 2.127272844314575, "learning_rate": 3.0045900935103424e-06, "loss": 0.6181, "step": 15458 }, { "epoch": 0.6417884887830243, "grad_norm": 2.8004510402679443, "learning_rate": 3.003973655848329e-06, "loss": 0.3712, "step": 15459 }, { "epoch": 0.6418300043072357, "grad_norm": 2.6794862747192383, "learning_rate": 3.00335725427505e-06, "loss": 0.5482, "step": 15460 }, { "epoch": 0.641871519831447, "grad_norm": 2.3451104164123535, "learning_rate": 3.0027408888016453e-06, "loss": 0.4034, "step": 15461 }, { "epoch": 0.6419130353556584, "grad_norm": 2.5968008041381836, "learning_rate": 3.00212455943926e-06, "loss": 0.4262, "step": 15462 }, { "epoch": 0.6419545508798696, "grad_norm": 2.4732003211975098, "learning_rate": 3.0015082661990397e-06, "loss": 0.5028, "step": 15463 }, { "epoch": 0.641996066404081, "grad_norm": 2.4185757637023926, "learning_rate": 3.000892009092128e-06, "loss": 0.4747, "step": 15464 }, { "epoch": 0.6420375819282923, "grad_norm": 2.311591863632202, "learning_rate": 3.000275788129662e-06, "loss": 0.5185, "step": 15465 }, { "epoch": 0.6420790974525037, "grad_norm": 2.2060348987579346, "learning_rate": 2.9996596033227864e-06, "loss": 0.2605, "step": 15466 }, { "epoch": 0.642120612976715, "grad_norm": 2.5720555782318115, "learning_rate": 2.999043454682643e-06, "loss": 0.561, "step": 15467 }, { "epoch": 0.6421621285009264, "grad_norm": 2.4669981002807617, "learning_rate": 2.9984273422203703e-06, "loss": 0.4916, "step": 15468 }, { "epoch": 0.6422036440251376, "grad_norm": 2.6041207313537598, "learning_rate": 2.997811265947108e-06, "loss": 0.456, "step": 15469 }, { "epoch": 0.642245159549349, "grad_norm": 2.430493116378784, "learning_rate": 2.9971952258739963e-06, "loss": 0.5491, "step": 15470 }, { "epoch": 0.6422866750735603, "grad_norm": 2.38582181930542, "learning_rate": 2.9965792220121702e-06, "loss": 0.5932, "step": 15471 }, { "epoch": 0.6423281905977717, "grad_norm": 2.730954647064209, "learning_rate": 2.9959632543727696e-06, "loss": 0.5999, "step": 15472 }, { "epoch": 0.642369706121983, "grad_norm": 2.3433215618133545, "learning_rate": 2.995347322966933e-06, "loss": 0.5665, "step": 15473 }, { "epoch": 0.6424112216461944, "grad_norm": 2.274027109146118, "learning_rate": 2.9947314278057927e-06, "loss": 0.5568, "step": 15474 }, { "epoch": 0.6424527371704056, "grad_norm": 2.3441638946533203, "learning_rate": 2.994115568900486e-06, "loss": 0.6779, "step": 15475 }, { "epoch": 0.642494252694617, "grad_norm": 2.0910232067108154, "learning_rate": 2.9934997462621477e-06, "loss": 0.4505, "step": 15476 }, { "epoch": 0.6425357682188283, "grad_norm": 2.5270941257476807, "learning_rate": 2.9928839599019144e-06, "loss": 0.4777, "step": 15477 }, { "epoch": 0.6425772837430397, "grad_norm": 1.8396949768066406, "learning_rate": 2.9922682098309164e-06, "loss": 0.3791, "step": 15478 }, { "epoch": 0.642618799267251, "grad_norm": 2.1488897800445557, "learning_rate": 2.9916524960602873e-06, "loss": 0.4702, "step": 15479 }, { "epoch": 0.6426603147914624, "grad_norm": 2.521524667739868, "learning_rate": 2.9910368186011606e-06, "loss": 0.5661, "step": 15480 }, { "epoch": 0.6427018303156736, "grad_norm": 2.2079381942749023, "learning_rate": 2.9904211774646664e-06, "loss": 0.4927, "step": 15481 }, { "epoch": 0.642743345839885, "grad_norm": 2.0649776458740234, "learning_rate": 2.989805572661939e-06, "loss": 0.4405, "step": 15482 }, { "epoch": 0.6427848613640963, "grad_norm": 2.4225881099700928, "learning_rate": 2.9891900042041043e-06, "loss": 0.6223, "step": 15483 }, { "epoch": 0.6428263768883077, "grad_norm": 2.1943516731262207, "learning_rate": 2.9885744721022934e-06, "loss": 0.478, "step": 15484 }, { "epoch": 0.642867892412519, "grad_norm": 2.0736286640167236, "learning_rate": 2.987958976367636e-06, "loss": 0.3689, "step": 15485 }, { "epoch": 0.6429094079367303, "grad_norm": 2.6916463375091553, "learning_rate": 2.987343517011262e-06, "loss": 0.4837, "step": 15486 }, { "epoch": 0.6429509234609416, "grad_norm": 2.4302690029144287, "learning_rate": 2.9867280940442954e-06, "loss": 0.5092, "step": 15487 }, { "epoch": 0.642992438985153, "grad_norm": 2.717852830886841, "learning_rate": 2.9861127074778656e-06, "loss": 0.6764, "step": 15488 }, { "epoch": 0.6430339545093643, "grad_norm": 2.532325506210327, "learning_rate": 2.9854973573230983e-06, "loss": 0.4977, "step": 15489 }, { "epoch": 0.6430754700335757, "grad_norm": 2.4068737030029297, "learning_rate": 2.9848820435911208e-06, "loss": 0.5507, "step": 15490 }, { "epoch": 0.6431169855577871, "grad_norm": 2.0584604740142822, "learning_rate": 2.984266766293056e-06, "loss": 0.482, "step": 15491 }, { "epoch": 0.6431585010819983, "grad_norm": 2.6633784770965576, "learning_rate": 2.983651525440028e-06, "loss": 0.5141, "step": 15492 }, { "epoch": 0.6432000166062097, "grad_norm": 2.404151439666748, "learning_rate": 2.9830363210431627e-06, "loss": 0.5051, "step": 15493 }, { "epoch": 0.643241532130421, "grad_norm": 2.721310615539551, "learning_rate": 2.9824211531135816e-06, "loss": 0.6442, "step": 15494 }, { "epoch": 0.6432830476546324, "grad_norm": 2.764418125152588, "learning_rate": 2.981806021662409e-06, "loss": 0.5865, "step": 15495 }, { "epoch": 0.6433245631788437, "grad_norm": 2.353437900543213, "learning_rate": 2.9811909267007638e-06, "loss": 0.409, "step": 15496 }, { "epoch": 0.6433660787030551, "grad_norm": 2.6004440784454346, "learning_rate": 2.980575868239768e-06, "loss": 0.5306, "step": 15497 }, { "epoch": 0.6434075942272663, "grad_norm": 2.834681510925293, "learning_rate": 2.979960846290543e-06, "loss": 0.3988, "step": 15498 }, { "epoch": 0.6434491097514777, "grad_norm": 1.9485186338424683, "learning_rate": 2.9793458608642103e-06, "loss": 0.4681, "step": 15499 }, { "epoch": 0.643490625275689, "grad_norm": 2.2191219329833984, "learning_rate": 2.9787309119718843e-06, "loss": 0.6679, "step": 15500 }, { "epoch": 0.6435321407999004, "grad_norm": 2.1659882068634033, "learning_rate": 2.978115999624686e-06, "loss": 0.4445, "step": 15501 }, { "epoch": 0.6435736563241117, "grad_norm": 2.539762258529663, "learning_rate": 2.9775011238337335e-06, "loss": 0.5339, "step": 15502 }, { "epoch": 0.6436151718483231, "grad_norm": 2.5330846309661865, "learning_rate": 2.9768862846101454e-06, "loss": 0.511, "step": 15503 }, { "epoch": 0.6436566873725343, "grad_norm": 2.610609769821167, "learning_rate": 2.976271481965035e-06, "loss": 0.6357, "step": 15504 }, { "epoch": 0.6436982028967457, "grad_norm": 2.272510051727295, "learning_rate": 2.9756567159095185e-06, "loss": 0.6401, "step": 15505 }, { "epoch": 0.643739718420957, "grad_norm": 2.4936928749084473, "learning_rate": 2.9750419864547143e-06, "loss": 0.5309, "step": 15506 }, { "epoch": 0.6437812339451684, "grad_norm": 2.4464266300201416, "learning_rate": 2.9744272936117323e-06, "loss": 0.4548, "step": 15507 }, { "epoch": 0.6438227494693797, "grad_norm": 2.933830499649048, "learning_rate": 2.9738126373916915e-06, "loss": 0.5292, "step": 15508 }, { "epoch": 0.6438642649935911, "grad_norm": 2.733412742614746, "learning_rate": 2.9731980178057004e-06, "loss": 0.5069, "step": 15509 }, { "epoch": 0.6439057805178023, "grad_norm": 2.5971548557281494, "learning_rate": 2.972583434864873e-06, "loss": 0.5716, "step": 15510 }, { "epoch": 0.6439472960420137, "grad_norm": 2.401907444000244, "learning_rate": 2.9719688885803212e-06, "loss": 0.6316, "step": 15511 }, { "epoch": 0.643988811566225, "grad_norm": 2.482403039932251, "learning_rate": 2.971354378963158e-06, "loss": 0.5665, "step": 15512 }, { "epoch": 0.6440303270904364, "grad_norm": 2.2761740684509277, "learning_rate": 2.9707399060244914e-06, "loss": 0.5049, "step": 15513 }, { "epoch": 0.6440718426146477, "grad_norm": 2.75130295753479, "learning_rate": 2.970125469775432e-06, "loss": 0.6418, "step": 15514 }, { "epoch": 0.6441133581388591, "grad_norm": 2.364644765853882, "learning_rate": 2.969511070227089e-06, "loss": 0.5765, "step": 15515 }, { "epoch": 0.6441548736630703, "grad_norm": 2.772545337677002, "learning_rate": 2.968896707390573e-06, "loss": 0.5018, "step": 15516 }, { "epoch": 0.6441963891872817, "grad_norm": 2.9981184005737305, "learning_rate": 2.9682823812769878e-06, "loss": 0.5574, "step": 15517 }, { "epoch": 0.644237904711493, "grad_norm": 2.1817281246185303, "learning_rate": 2.9676680918974432e-06, "loss": 0.5248, "step": 15518 }, { "epoch": 0.6442794202357044, "grad_norm": 2.3542468547821045, "learning_rate": 2.967053839263046e-06, "loss": 0.4712, "step": 15519 }, { "epoch": 0.6443209357599157, "grad_norm": 2.276675224304199, "learning_rate": 2.9664396233849e-06, "loss": 0.5505, "step": 15520 }, { "epoch": 0.6443624512841271, "grad_norm": 2.4795215129852295, "learning_rate": 2.9658254442741147e-06, "loss": 0.442, "step": 15521 }, { "epoch": 0.6444039668083384, "grad_norm": 2.262892007827759, "learning_rate": 2.9652113019417895e-06, "loss": 0.6088, "step": 15522 }, { "epoch": 0.6444454823325497, "grad_norm": 2.4379420280456543, "learning_rate": 2.9645971963990295e-06, "loss": 0.6165, "step": 15523 }, { "epoch": 0.6444869978567611, "grad_norm": 2.737457275390625, "learning_rate": 2.96398312765694e-06, "loss": 0.4707, "step": 15524 }, { "epoch": 0.6445285133809724, "grad_norm": 3.4142630100250244, "learning_rate": 2.9633690957266235e-06, "loss": 0.623, "step": 15525 }, { "epoch": 0.6445700289051838, "grad_norm": 2.814864158630371, "learning_rate": 2.962755100619179e-06, "loss": 0.661, "step": 15526 }, { "epoch": 0.6446115444293951, "grad_norm": 2.062426805496216, "learning_rate": 2.9621411423457092e-06, "loss": 0.377, "step": 15527 }, { "epoch": 0.6446530599536064, "grad_norm": 2.448421001434326, "learning_rate": 2.9615272209173155e-06, "loss": 0.5456, "step": 15528 }, { "epoch": 0.6446945754778177, "grad_norm": 2.4348580837249756, "learning_rate": 2.9609133363450994e-06, "loss": 0.4379, "step": 15529 }, { "epoch": 0.6447360910020291, "grad_norm": 2.8010053634643555, "learning_rate": 2.9602994886401554e-06, "loss": 0.6118, "step": 15530 }, { "epoch": 0.6447776065262404, "grad_norm": 2.477052927017212, "learning_rate": 2.9596856778135847e-06, "loss": 0.5175, "step": 15531 }, { "epoch": 0.6448191220504518, "grad_norm": 3.337054491043091, "learning_rate": 2.959071903876486e-06, "loss": 0.5583, "step": 15532 }, { "epoch": 0.644860637574663, "grad_norm": 2.2417047023773193, "learning_rate": 2.958458166839954e-06, "loss": 0.5438, "step": 15533 }, { "epoch": 0.6449021530988744, "grad_norm": 2.303391456604004, "learning_rate": 2.95784446671509e-06, "loss": 0.5572, "step": 15534 }, { "epoch": 0.6449436686230857, "grad_norm": 2.6092934608459473, "learning_rate": 2.9572308035129838e-06, "loss": 0.4898, "step": 15535 }, { "epoch": 0.6449851841472971, "grad_norm": 2.4625051021575928, "learning_rate": 2.9566171772447334e-06, "loss": 0.4993, "step": 15536 }, { "epoch": 0.6450266996715084, "grad_norm": 2.5936617851257324, "learning_rate": 2.956003587921433e-06, "loss": 0.6078, "step": 15537 }, { "epoch": 0.6450682151957198, "grad_norm": 2.595785140991211, "learning_rate": 2.95539003555418e-06, "loss": 0.5753, "step": 15538 }, { "epoch": 0.645109730719931, "grad_norm": 1.920844316482544, "learning_rate": 2.954776520154062e-06, "loss": 0.4551, "step": 15539 }, { "epoch": 0.6451512462441424, "grad_norm": 2.3340187072753906, "learning_rate": 2.954163041732174e-06, "loss": 0.4398, "step": 15540 }, { "epoch": 0.6451927617683537, "grad_norm": 2.2235965728759766, "learning_rate": 2.953549600299608e-06, "loss": 0.4714, "step": 15541 }, { "epoch": 0.6452342772925651, "grad_norm": 3.0773606300354004, "learning_rate": 2.952936195867457e-06, "loss": 0.561, "step": 15542 }, { "epoch": 0.6452757928167764, "grad_norm": 2.248352289199829, "learning_rate": 2.9523228284468077e-06, "loss": 0.4521, "step": 15543 }, { "epoch": 0.6453173083409878, "grad_norm": 2.3520302772521973, "learning_rate": 2.9517094980487514e-06, "loss": 0.5644, "step": 15544 }, { "epoch": 0.645358823865199, "grad_norm": 2.9409940242767334, "learning_rate": 2.9510962046843795e-06, "loss": 0.6935, "step": 15545 }, { "epoch": 0.6454003393894104, "grad_norm": 2.0982234477996826, "learning_rate": 2.950482948364778e-06, "loss": 0.4556, "step": 15546 }, { "epoch": 0.6454418549136217, "grad_norm": 2.4369606971740723, "learning_rate": 2.949869729101036e-06, "loss": 0.4391, "step": 15547 }, { "epoch": 0.6454833704378331, "grad_norm": 2.186274290084839, "learning_rate": 2.949256546904239e-06, "loss": 0.464, "step": 15548 }, { "epoch": 0.6455248859620444, "grad_norm": 2.5066981315612793, "learning_rate": 2.9486434017854746e-06, "loss": 0.7314, "step": 15549 }, { "epoch": 0.6455664014862558, "grad_norm": 2.1032490730285645, "learning_rate": 2.948030293755829e-06, "loss": 0.5077, "step": 15550 }, { "epoch": 0.645607917010467, "grad_norm": 2.2079381942749023, "learning_rate": 2.9474172228263886e-06, "loss": 0.3719, "step": 15551 }, { "epoch": 0.6456494325346784, "grad_norm": 2.145073413848877, "learning_rate": 2.9468041890082354e-06, "loss": 0.5114, "step": 15552 }, { "epoch": 0.6456909480588898, "grad_norm": 2.3718676567077637, "learning_rate": 2.9461911923124536e-06, "loss": 0.4918, "step": 15553 }, { "epoch": 0.6457324635831011, "grad_norm": 2.323690176010132, "learning_rate": 2.945578232750127e-06, "loss": 0.539, "step": 15554 }, { "epoch": 0.6457739791073125, "grad_norm": 2.2728824615478516, "learning_rate": 2.9449653103323405e-06, "loss": 0.4324, "step": 15555 }, { "epoch": 0.6458154946315238, "grad_norm": 2.110856294631958, "learning_rate": 2.9443524250701714e-06, "loss": 0.5213, "step": 15556 }, { "epoch": 0.6458570101557352, "grad_norm": 2.596510648727417, "learning_rate": 2.943739576974703e-06, "loss": 0.5417, "step": 15557 }, { "epoch": 0.6458985256799464, "grad_norm": 2.4438841342926025, "learning_rate": 2.943126766057016e-06, "loss": 0.6045, "step": 15558 }, { "epoch": 0.6459400412041578, "grad_norm": 3.1769497394561768, "learning_rate": 2.942513992328191e-06, "loss": 0.6588, "step": 15559 }, { "epoch": 0.6459815567283691, "grad_norm": 2.8855886459350586, "learning_rate": 2.9419012557993053e-06, "loss": 0.5943, "step": 15560 }, { "epoch": 0.6460230722525805, "grad_norm": 2.414623737335205, "learning_rate": 2.9412885564814375e-06, "loss": 0.5041, "step": 15561 }, { "epoch": 0.6460645877767918, "grad_norm": 2.1744110584259033, "learning_rate": 2.940675894385666e-06, "loss": 0.5716, "step": 15562 }, { "epoch": 0.6461061033010032, "grad_norm": 2.39715313911438, "learning_rate": 2.940063269523068e-06, "loss": 0.5151, "step": 15563 }, { "epoch": 0.6461476188252144, "grad_norm": 2.3704495429992676, "learning_rate": 2.9394506819047207e-06, "loss": 0.5374, "step": 15564 }, { "epoch": 0.6461891343494258, "grad_norm": 2.3007593154907227, "learning_rate": 2.938838131541698e-06, "loss": 0.4547, "step": 15565 }, { "epoch": 0.6462306498736371, "grad_norm": 2.1781792640686035, "learning_rate": 2.9382256184450765e-06, "loss": 0.603, "step": 15566 }, { "epoch": 0.6462721653978485, "grad_norm": 2.8816516399383545, "learning_rate": 2.937613142625929e-06, "loss": 0.5792, "step": 15567 }, { "epoch": 0.6463136809220598, "grad_norm": 2.317863702774048, "learning_rate": 2.937000704095333e-06, "loss": 0.5384, "step": 15568 }, { "epoch": 0.6463551964462712, "grad_norm": 2.2875125408172607, "learning_rate": 2.9363883028643566e-06, "loss": 0.4412, "step": 15569 }, { "epoch": 0.6463967119704824, "grad_norm": 2.037569284439087, "learning_rate": 2.935775938944074e-06, "loss": 0.4733, "step": 15570 }, { "epoch": 0.6464382274946938, "grad_norm": 2.659989833831787, "learning_rate": 2.9351636123455594e-06, "loss": 0.6078, "step": 15571 }, { "epoch": 0.6464797430189051, "grad_norm": 2.292693614959717, "learning_rate": 2.934551323079881e-06, "loss": 0.4822, "step": 15572 }, { "epoch": 0.6465212585431165, "grad_norm": 2.0939135551452637, "learning_rate": 2.9339390711581105e-06, "loss": 0.4229, "step": 15573 }, { "epoch": 0.6465627740673278, "grad_norm": 2.5895934104919434, "learning_rate": 2.9333268565913158e-06, "loss": 0.508, "step": 15574 }, { "epoch": 0.6466042895915391, "grad_norm": 1.9791861772537231, "learning_rate": 2.9327146793905676e-06, "loss": 0.4305, "step": 15575 }, { "epoch": 0.6466458051157504, "grad_norm": 2.309427499771118, "learning_rate": 2.9321025395669334e-06, "loss": 0.3448, "step": 15576 }, { "epoch": 0.6466873206399618, "grad_norm": 2.3034603595733643, "learning_rate": 2.9314904371314834e-06, "loss": 0.5305, "step": 15577 }, { "epoch": 0.6467288361641731, "grad_norm": 2.2303249835968018, "learning_rate": 2.9308783720952803e-06, "loss": 0.5727, "step": 15578 }, { "epoch": 0.6467703516883845, "grad_norm": 2.117259979248047, "learning_rate": 2.9302663444693935e-06, "loss": 0.4562, "step": 15579 }, { "epoch": 0.6468118672125958, "grad_norm": 2.1614084243774414, "learning_rate": 2.929654354264887e-06, "loss": 0.4112, "step": 15580 }, { "epoch": 0.6468533827368071, "grad_norm": 2.6309115886688232, "learning_rate": 2.9290424014928285e-06, "loss": 0.5325, "step": 15581 }, { "epoch": 0.6468948982610184, "grad_norm": 2.3959531784057617, "learning_rate": 2.9284304861642787e-06, "loss": 0.5036, "step": 15582 }, { "epoch": 0.6469364137852298, "grad_norm": 2.0273547172546387, "learning_rate": 2.9278186082903027e-06, "loss": 0.448, "step": 15583 }, { "epoch": 0.6469779293094412, "grad_norm": 2.7721219062805176, "learning_rate": 2.9272067678819648e-06, "loss": 0.4589, "step": 15584 }, { "epoch": 0.6470194448336525, "grad_norm": 2.7350826263427734, "learning_rate": 2.9265949649503257e-06, "loss": 0.6862, "step": 15585 }, { "epoch": 0.6470609603578639, "grad_norm": 2.394557476043701, "learning_rate": 2.925983199506447e-06, "loss": 0.5212, "step": 15586 }, { "epoch": 0.6471024758820751, "grad_norm": 2.7186059951782227, "learning_rate": 2.925371471561389e-06, "loss": 0.2948, "step": 15587 }, { "epoch": 0.6471439914062865, "grad_norm": 2.030240058898926, "learning_rate": 2.924759781126213e-06, "loss": 0.576, "step": 15588 }, { "epoch": 0.6471855069304978, "grad_norm": 2.396005153656006, "learning_rate": 2.924148128211978e-06, "loss": 0.5012, "step": 15589 }, { "epoch": 0.6472270224547092, "grad_norm": 2.349882125854492, "learning_rate": 2.923536512829745e-06, "loss": 0.419, "step": 15590 }, { "epoch": 0.6472685379789205, "grad_norm": 2.8190579414367676, "learning_rate": 2.9229249349905686e-06, "loss": 0.6715, "step": 15591 }, { "epoch": 0.6473100535031319, "grad_norm": 2.421670436859131, "learning_rate": 2.922313394705508e-06, "loss": 0.5002, "step": 15592 }, { "epoch": 0.6473515690273431, "grad_norm": 2.4795382022857666, "learning_rate": 2.92170189198562e-06, "loss": 0.5855, "step": 15593 }, { "epoch": 0.6473930845515545, "grad_norm": 2.3155200481414795, "learning_rate": 2.921090426841963e-06, "loss": 0.439, "step": 15594 }, { "epoch": 0.6474346000757658, "grad_norm": 2.427128314971924, "learning_rate": 2.9204789992855886e-06, "loss": 0.4956, "step": 15595 }, { "epoch": 0.6474761155999772, "grad_norm": 2.609795331954956, "learning_rate": 2.9198676093275535e-06, "loss": 0.4866, "step": 15596 }, { "epoch": 0.6475176311241885, "grad_norm": 2.0830767154693604, "learning_rate": 2.919256256978911e-06, "loss": 0.5711, "step": 15597 }, { "epoch": 0.6475591466483999, "grad_norm": 2.3673174381256104, "learning_rate": 2.9186449422507163e-06, "loss": 0.4275, "step": 15598 }, { "epoch": 0.6476006621726111, "grad_norm": 2.781982421875, "learning_rate": 2.918033665154021e-06, "loss": 0.3983, "step": 15599 }, { "epoch": 0.6476421776968225, "grad_norm": 2.863602876663208, "learning_rate": 2.9174224256998747e-06, "loss": 0.5535, "step": 15600 }, { "epoch": 0.6476836932210338, "grad_norm": 2.578042984008789, "learning_rate": 2.9168112238993333e-06, "loss": 0.5494, "step": 15601 }, { "epoch": 0.6477252087452452, "grad_norm": 2.104170799255371, "learning_rate": 2.9162000597634456e-06, "loss": 0.7732, "step": 15602 }, { "epoch": 0.6477667242694565, "grad_norm": 2.573291301727295, "learning_rate": 2.9155889333032616e-06, "loss": 0.5259, "step": 15603 }, { "epoch": 0.6478082397936679, "grad_norm": 2.079014539718628, "learning_rate": 2.91497784452983e-06, "loss": 0.4542, "step": 15604 }, { "epoch": 0.6478497553178791, "grad_norm": 1.9309576749801636, "learning_rate": 2.914366793454198e-06, "loss": 0.4201, "step": 15605 }, { "epoch": 0.6478912708420905, "grad_norm": 2.2406978607177734, "learning_rate": 2.9137557800874177e-06, "loss": 0.4487, "step": 15606 }, { "epoch": 0.6479327863663018, "grad_norm": 2.5288209915161133, "learning_rate": 2.9131448044405342e-06, "loss": 0.54, "step": 15607 }, { "epoch": 0.6479743018905132, "grad_norm": 2.265900135040283, "learning_rate": 2.912533866524594e-06, "loss": 0.6003, "step": 15608 }, { "epoch": 0.6480158174147245, "grad_norm": 2.6333863735198975, "learning_rate": 2.9119229663506417e-06, "loss": 0.4912, "step": 15609 }, { "epoch": 0.6480573329389359, "grad_norm": 2.703589677810669, "learning_rate": 2.9113121039297253e-06, "loss": 0.5567, "step": 15610 }, { "epoch": 0.6480988484631471, "grad_norm": 2.48573637008667, "learning_rate": 2.910701279272888e-06, "loss": 0.4296, "step": 15611 }, { "epoch": 0.6481403639873585, "grad_norm": 2.240185260772705, "learning_rate": 2.9100904923911743e-06, "loss": 0.4684, "step": 15612 }, { "epoch": 0.6481818795115698, "grad_norm": 2.279883623123169, "learning_rate": 2.9094797432956252e-06, "loss": 0.6176, "step": 15613 }, { "epoch": 0.6482233950357812, "grad_norm": 2.324511766433716, "learning_rate": 2.9088690319972862e-06, "loss": 0.4621, "step": 15614 }, { "epoch": 0.6482649105599926, "grad_norm": 2.24322772026062, "learning_rate": 2.9082583585071983e-06, "loss": 0.5771, "step": 15615 }, { "epoch": 0.6483064260842039, "grad_norm": 2.5157546997070312, "learning_rate": 2.907647722836402e-06, "loss": 0.5463, "step": 15616 }, { "epoch": 0.6483479416084152, "grad_norm": 2.756697177886963, "learning_rate": 2.907037124995938e-06, "loss": 0.5641, "step": 15617 }, { "epoch": 0.6483894571326265, "grad_norm": 2.480323553085327, "learning_rate": 2.906426564996844e-06, "loss": 0.5381, "step": 15618 }, { "epoch": 0.6484309726568379, "grad_norm": 1.921841025352478, "learning_rate": 2.9058160428501625e-06, "loss": 0.4397, "step": 15619 }, { "epoch": 0.6484724881810492, "grad_norm": 2.406928539276123, "learning_rate": 2.9052055585669316e-06, "loss": 0.5021, "step": 15620 }, { "epoch": 0.6485140037052606, "grad_norm": 1.8626055717468262, "learning_rate": 2.9045951121581874e-06, "loss": 0.3813, "step": 15621 }, { "epoch": 0.6485555192294719, "grad_norm": 2.4812519550323486, "learning_rate": 2.9039847036349657e-06, "loss": 0.6073, "step": 15622 }, { "epoch": 0.6485970347536832, "grad_norm": 2.2928521633148193, "learning_rate": 2.9033743330083064e-06, "loss": 0.495, "step": 15623 }, { "epoch": 0.6486385502778945, "grad_norm": 2.185701370239258, "learning_rate": 2.902764000289244e-06, "loss": 0.5043, "step": 15624 }, { "epoch": 0.6486800658021059, "grad_norm": 3.106726884841919, "learning_rate": 2.9021537054888126e-06, "loss": 0.5833, "step": 15625 }, { "epoch": 0.6487215813263172, "grad_norm": 1.9687755107879639, "learning_rate": 2.901543448618045e-06, "loss": 0.4613, "step": 15626 }, { "epoch": 0.6487630968505286, "grad_norm": 4.309963703155518, "learning_rate": 2.900933229687978e-06, "loss": 0.5594, "step": 15627 }, { "epoch": 0.6488046123747399, "grad_norm": 2.081155300140381, "learning_rate": 2.9003230487096444e-06, "loss": 0.6154, "step": 15628 }, { "epoch": 0.6488461278989512, "grad_norm": 2.6859683990478516, "learning_rate": 2.899712905694074e-06, "loss": 0.4789, "step": 15629 }, { "epoch": 0.6488876434231625, "grad_norm": 2.3485841751098633, "learning_rate": 2.899102800652301e-06, "loss": 0.574, "step": 15630 }, { "epoch": 0.6489291589473739, "grad_norm": 3.1821975708007812, "learning_rate": 2.898492733595352e-06, "loss": 0.4131, "step": 15631 }, { "epoch": 0.6489706744715852, "grad_norm": 2.6662445068359375, "learning_rate": 2.897882704534263e-06, "loss": 0.7994, "step": 15632 }, { "epoch": 0.6490121899957966, "grad_norm": 2.3748626708984375, "learning_rate": 2.8972727134800595e-06, "loss": 0.4887, "step": 15633 }, { "epoch": 0.6490537055200079, "grad_norm": 2.0905637741088867, "learning_rate": 2.8966627604437717e-06, "loss": 0.5283, "step": 15634 }, { "epoch": 0.6490952210442192, "grad_norm": 2.5397303104400635, "learning_rate": 2.8960528454364257e-06, "loss": 0.4869, "step": 15635 }, { "epoch": 0.6491367365684305, "grad_norm": 2.1640613079071045, "learning_rate": 2.895442968469053e-06, "loss": 0.5428, "step": 15636 }, { "epoch": 0.6491782520926419, "grad_norm": 2.672431468963623, "learning_rate": 2.8948331295526776e-06, "loss": 0.5722, "step": 15637 }, { "epoch": 0.6492197676168532, "grad_norm": 2.2403550148010254, "learning_rate": 2.894223328698326e-06, "loss": 0.5866, "step": 15638 }, { "epoch": 0.6492612831410646, "grad_norm": 2.0809848308563232, "learning_rate": 2.8936135659170217e-06, "loss": 0.5412, "step": 15639 }, { "epoch": 0.6493027986652758, "grad_norm": 2.0256001949310303, "learning_rate": 2.8930038412197927e-06, "loss": 0.5914, "step": 15640 }, { "epoch": 0.6493443141894872, "grad_norm": 2.7641441822052, "learning_rate": 2.892394154617662e-06, "loss": 0.4588, "step": 15641 }, { "epoch": 0.6493858297136985, "grad_norm": 2.0039305686950684, "learning_rate": 2.8917845061216525e-06, "loss": 0.5822, "step": 15642 }, { "epoch": 0.6494273452379099, "grad_norm": 2.329683303833008, "learning_rate": 2.8911748957427865e-06, "loss": 0.4754, "step": 15643 }, { "epoch": 0.6494688607621212, "grad_norm": 2.4964306354522705, "learning_rate": 2.890565323492085e-06, "loss": 0.5323, "step": 15644 }, { "epoch": 0.6495103762863326, "grad_norm": 2.4579787254333496, "learning_rate": 2.889955789380572e-06, "loss": 0.5744, "step": 15645 }, { "epoch": 0.649551891810544, "grad_norm": 2.193542957305908, "learning_rate": 2.889346293419266e-06, "loss": 0.3223, "step": 15646 }, { "epoch": 0.6495934073347552, "grad_norm": 1.8913718461990356, "learning_rate": 2.888736835619188e-06, "loss": 0.4955, "step": 15647 }, { "epoch": 0.6496349228589666, "grad_norm": 2.457066774368286, "learning_rate": 2.888127415991354e-06, "loss": 0.5933, "step": 15648 }, { "epoch": 0.6496764383831779, "grad_norm": 2.273477792739868, "learning_rate": 2.887518034546787e-06, "loss": 0.5326, "step": 15649 }, { "epoch": 0.6497179539073893, "grad_norm": 2.751713275909424, "learning_rate": 2.886908691296504e-06, "loss": 0.4588, "step": 15650 }, { "epoch": 0.6497594694316006, "grad_norm": 2.119922399520874, "learning_rate": 2.88629938625152e-06, "loss": 0.5038, "step": 15651 }, { "epoch": 0.649800984955812, "grad_norm": 2.3945765495300293, "learning_rate": 2.8856901194228503e-06, "loss": 0.5233, "step": 15652 }, { "epoch": 0.6498425004800232, "grad_norm": 2.061265707015991, "learning_rate": 2.8850808908215144e-06, "loss": 0.41, "step": 15653 }, { "epoch": 0.6498840160042346, "grad_norm": 2.6899187564849854, "learning_rate": 2.8844717004585264e-06, "loss": 0.4225, "step": 15654 }, { "epoch": 0.6499255315284459, "grad_norm": 2.3561816215515137, "learning_rate": 2.883862548344899e-06, "loss": 0.502, "step": 15655 }, { "epoch": 0.6499670470526573, "grad_norm": 1.8773711919784546, "learning_rate": 2.8832534344916474e-06, "loss": 0.5171, "step": 15656 }, { "epoch": 0.6500085625768686, "grad_norm": 2.4156644344329834, "learning_rate": 2.8826443589097806e-06, "loss": 0.6442, "step": 15657 }, { "epoch": 0.65005007810108, "grad_norm": 2.6459412574768066, "learning_rate": 2.8820353216103175e-06, "loss": 0.6303, "step": 15658 }, { "epoch": 0.6500915936252912, "grad_norm": 2.4057421684265137, "learning_rate": 2.881426322604266e-06, "loss": 0.5595, "step": 15659 }, { "epoch": 0.6501331091495026, "grad_norm": 2.5158205032348633, "learning_rate": 2.880817361902637e-06, "loss": 0.5272, "step": 15660 }, { "epoch": 0.6501746246737139, "grad_norm": 2.361346960067749, "learning_rate": 2.8802084395164387e-06, "loss": 0.6145, "step": 15661 }, { "epoch": 0.6502161401979253, "grad_norm": 2.2886135578155518, "learning_rate": 2.8795995554566847e-06, "loss": 0.7067, "step": 15662 }, { "epoch": 0.6502576557221366, "grad_norm": 2.340463399887085, "learning_rate": 2.8789907097343818e-06, "loss": 0.4346, "step": 15663 }, { "epoch": 0.650299171246348, "grad_norm": 2.2422118186950684, "learning_rate": 2.8783819023605388e-06, "loss": 0.4704, "step": 15664 }, { "epoch": 0.6503406867705592, "grad_norm": 2.9455387592315674, "learning_rate": 2.87777313334616e-06, "loss": 0.5336, "step": 15665 }, { "epoch": 0.6503822022947706, "grad_norm": 2.6401944160461426, "learning_rate": 2.8771644027022572e-06, "loss": 0.5703, "step": 15666 }, { "epoch": 0.6504237178189819, "grad_norm": 2.4611928462982178, "learning_rate": 2.8765557104398335e-06, "loss": 0.4738, "step": 15667 }, { "epoch": 0.6504652333431933, "grad_norm": 2.5882108211517334, "learning_rate": 2.8759470565698942e-06, "loss": 0.6123, "step": 15668 }, { "epoch": 0.6505067488674046, "grad_norm": 2.441329002380371, "learning_rate": 2.875338441103445e-06, "loss": 0.4665, "step": 15669 }, { "epoch": 0.650548264391616, "grad_norm": 2.0045411586761475, "learning_rate": 2.874729864051487e-06, "loss": 0.4736, "step": 15670 }, { "epoch": 0.6505897799158272, "grad_norm": 2.408217191696167, "learning_rate": 2.874121325425028e-06, "loss": 0.5985, "step": 15671 }, { "epoch": 0.6506312954400386, "grad_norm": 2.44488787651062, "learning_rate": 2.8735128252350677e-06, "loss": 0.5432, "step": 15672 }, { "epoch": 0.6506728109642499, "grad_norm": 3.0290443897247314, "learning_rate": 2.872904363492608e-06, "loss": 0.5362, "step": 15673 }, { "epoch": 0.6507143264884613, "grad_norm": 2.8831393718719482, "learning_rate": 2.8722959402086493e-06, "loss": 0.6271, "step": 15674 }, { "epoch": 0.6507558420126726, "grad_norm": 2.4475231170654297, "learning_rate": 2.8716875553941946e-06, "loss": 0.4146, "step": 15675 }, { "epoch": 0.650797357536884, "grad_norm": 1.929227590560913, "learning_rate": 2.871079209060243e-06, "loss": 0.3549, "step": 15676 }, { "epoch": 0.6508388730610953, "grad_norm": 2.4613003730773926, "learning_rate": 2.870470901217793e-06, "loss": 0.5644, "step": 15677 }, { "epoch": 0.6508803885853066, "grad_norm": 2.31030535697937, "learning_rate": 2.86986263187784e-06, "loss": 0.7399, "step": 15678 }, { "epoch": 0.650921904109518, "grad_norm": 2.1871085166931152, "learning_rate": 2.8692544010513872e-06, "loss": 0.5308, "step": 15679 }, { "epoch": 0.6509634196337293, "grad_norm": 2.3454105854034424, "learning_rate": 2.868646208749429e-06, "loss": 0.5947, "step": 15680 }, { "epoch": 0.6510049351579407, "grad_norm": 2.358323097229004, "learning_rate": 2.868038054982962e-06, "loss": 0.5253, "step": 15681 }, { "epoch": 0.651046450682152, "grad_norm": 2.177565574645996, "learning_rate": 2.8674299397629807e-06, "loss": 0.4768, "step": 15682 }, { "epoch": 0.6510879662063633, "grad_norm": 2.378392219543457, "learning_rate": 2.866821863100479e-06, "loss": 0.4591, "step": 15683 }, { "epoch": 0.6511294817305746, "grad_norm": 2.2352309226989746, "learning_rate": 2.8662138250064555e-06, "loss": 0.5427, "step": 15684 }, { "epoch": 0.651170997254786, "grad_norm": 2.5759975910186768, "learning_rate": 2.8656058254919e-06, "loss": 0.6186, "step": 15685 }, { "epoch": 0.6512125127789973, "grad_norm": 2.3490183353424072, "learning_rate": 2.864997864567807e-06, "loss": 0.4201, "step": 15686 }, { "epoch": 0.6512540283032087, "grad_norm": 2.4017224311828613, "learning_rate": 2.864389942245166e-06, "loss": 0.5147, "step": 15687 }, { "epoch": 0.6512955438274199, "grad_norm": 2.0424699783325195, "learning_rate": 2.863782058534972e-06, "loss": 0.5959, "step": 15688 }, { "epoch": 0.6513370593516313, "grad_norm": 2.546058177947998, "learning_rate": 2.863174213448214e-06, "loss": 0.4568, "step": 15689 }, { "epoch": 0.6513785748758426, "grad_norm": 3.195967197418213, "learning_rate": 2.862566406995883e-06, "loss": 0.4749, "step": 15690 }, { "epoch": 0.651420090400054, "grad_norm": 2.360283374786377, "learning_rate": 2.8619586391889653e-06, "loss": 0.4039, "step": 15691 }, { "epoch": 0.6514616059242653, "grad_norm": 2.773791790008545, "learning_rate": 2.861350910038453e-06, "loss": 0.6415, "step": 15692 }, { "epoch": 0.6515031214484767, "grad_norm": 2.0546648502349854, "learning_rate": 2.860743219555333e-06, "loss": 0.53, "step": 15693 }, { "epoch": 0.6515446369726879, "grad_norm": 2.3955020904541016, "learning_rate": 2.860135567750592e-06, "loss": 0.5945, "step": 15694 }, { "epoch": 0.6515861524968993, "grad_norm": 2.4456632137298584, "learning_rate": 2.8595279546352174e-06, "loss": 0.5372, "step": 15695 }, { "epoch": 0.6516276680211106, "grad_norm": 2.4559149742126465, "learning_rate": 2.8589203802201914e-06, "loss": 0.4664, "step": 15696 }, { "epoch": 0.651669183545322, "grad_norm": 2.302016019821167, "learning_rate": 2.8583128445165044e-06, "loss": 0.6045, "step": 15697 }, { "epoch": 0.6517106990695333, "grad_norm": 2.1469879150390625, "learning_rate": 2.857705347535139e-06, "loss": 0.4438, "step": 15698 }, { "epoch": 0.6517522145937447, "grad_norm": 2.4522628784179688, "learning_rate": 2.8570978892870777e-06, "loss": 0.6031, "step": 15699 }, { "epoch": 0.6517937301179559, "grad_norm": 2.2720422744750977, "learning_rate": 2.8564904697833023e-06, "loss": 0.4086, "step": 15700 }, { "epoch": 0.6518352456421673, "grad_norm": 2.1024394035339355, "learning_rate": 2.8558830890347987e-06, "loss": 0.5647, "step": 15701 }, { "epoch": 0.6518767611663786, "grad_norm": 2.1612229347229004, "learning_rate": 2.8552757470525476e-06, "loss": 0.4981, "step": 15702 }, { "epoch": 0.65191827669059, "grad_norm": 3.3656299114227295, "learning_rate": 2.8546684438475293e-06, "loss": 0.4412, "step": 15703 }, { "epoch": 0.6519597922148013, "grad_norm": 2.1883280277252197, "learning_rate": 2.8540611794307216e-06, "loss": 0.5223, "step": 15704 }, { "epoch": 0.6520013077390127, "grad_norm": 2.088641405105591, "learning_rate": 2.853453953813108e-06, "loss": 0.4261, "step": 15705 }, { "epoch": 0.652042823263224, "grad_norm": 2.4327235221862793, "learning_rate": 2.8528467670056658e-06, "loss": 0.5046, "step": 15706 }, { "epoch": 0.6520843387874353, "grad_norm": 2.0909509658813477, "learning_rate": 2.8522396190193723e-06, "loss": 0.5081, "step": 15707 }, { "epoch": 0.6521258543116467, "grad_norm": 2.3763904571533203, "learning_rate": 2.8516325098652065e-06, "loss": 0.4135, "step": 15708 }, { "epoch": 0.652167369835858, "grad_norm": 3.227800130844116, "learning_rate": 2.851025439554142e-06, "loss": 0.767, "step": 15709 }, { "epoch": 0.6522088853600694, "grad_norm": 2.545818567276001, "learning_rate": 2.8504184080971586e-06, "loss": 0.5501, "step": 15710 }, { "epoch": 0.6522504008842807, "grad_norm": 2.313354253768921, "learning_rate": 2.8498114155052304e-06, "loss": 0.6015, "step": 15711 }, { "epoch": 0.652291916408492, "grad_norm": 2.486436367034912, "learning_rate": 2.849204461789331e-06, "loss": 0.511, "step": 15712 }, { "epoch": 0.6523334319327033, "grad_norm": 2.137310743331909, "learning_rate": 2.848597546960433e-06, "loss": 0.4867, "step": 15713 }, { "epoch": 0.6523749474569147, "grad_norm": 2.139010190963745, "learning_rate": 2.8479906710295134e-06, "loss": 0.4265, "step": 15714 }, { "epoch": 0.652416462981126, "grad_norm": 2.2553517818450928, "learning_rate": 2.8473838340075433e-06, "loss": 0.5574, "step": 15715 }, { "epoch": 0.6524579785053374, "grad_norm": 2.492856025695801, "learning_rate": 2.8467770359054937e-06, "loss": 0.4718, "step": 15716 }, { "epoch": 0.6524994940295487, "grad_norm": 2.192301034927368, "learning_rate": 2.8461702767343336e-06, "loss": 0.3954, "step": 15717 }, { "epoch": 0.65254100955376, "grad_norm": 3.24934720993042, "learning_rate": 2.8455635565050377e-06, "loss": 0.4986, "step": 15718 }, { "epoch": 0.6525825250779713, "grad_norm": 2.578244209289551, "learning_rate": 2.8449568752285738e-06, "loss": 0.4834, "step": 15719 }, { "epoch": 0.6526240406021827, "grad_norm": 1.7876030206680298, "learning_rate": 2.8443502329159113e-06, "loss": 0.4162, "step": 15720 }, { "epoch": 0.652665556126394, "grad_norm": 2.2339136600494385, "learning_rate": 2.8437436295780176e-06, "loss": 0.4818, "step": 15721 }, { "epoch": 0.6527070716506054, "grad_norm": 2.7239997386932373, "learning_rate": 2.8431370652258584e-06, "loss": 0.5225, "step": 15722 }, { "epoch": 0.6527485871748167, "grad_norm": 2.533402919769287, "learning_rate": 2.842530539870404e-06, "loss": 0.5876, "step": 15723 }, { "epoch": 0.652790102699028, "grad_norm": 2.2208077907562256, "learning_rate": 2.84192405352262e-06, "loss": 0.4531, "step": 15724 }, { "epoch": 0.6528316182232393, "grad_norm": 2.010251522064209, "learning_rate": 2.841317606193471e-06, "loss": 0.5349, "step": 15725 }, { "epoch": 0.6528731337474507, "grad_norm": 2.5360405445098877, "learning_rate": 2.8407111978939205e-06, "loss": 0.3933, "step": 15726 }, { "epoch": 0.652914649271662, "grad_norm": 2.6046993732452393, "learning_rate": 2.8401048286349353e-06, "loss": 0.5765, "step": 15727 }, { "epoch": 0.6529561647958734, "grad_norm": 2.014397382736206, "learning_rate": 2.8394984984274776e-06, "loss": 0.5794, "step": 15728 }, { "epoch": 0.6529976803200847, "grad_norm": 3.523639440536499, "learning_rate": 2.83889220728251e-06, "loss": 0.4726, "step": 15729 }, { "epoch": 0.653039195844296, "grad_norm": 3.3435046672821045, "learning_rate": 2.8382859552109916e-06, "loss": 0.489, "step": 15730 }, { "epoch": 0.6530807113685073, "grad_norm": 2.36039662361145, "learning_rate": 2.837679742223888e-06, "loss": 0.4947, "step": 15731 }, { "epoch": 0.6531222268927187, "grad_norm": 2.0474045276641846, "learning_rate": 2.8370735683321586e-06, "loss": 0.6094, "step": 15732 }, { "epoch": 0.65316374241693, "grad_norm": 2.010307550430298, "learning_rate": 2.8364674335467625e-06, "loss": 0.4198, "step": 15733 }, { "epoch": 0.6532052579411414, "grad_norm": 2.6141343116760254, "learning_rate": 2.8358613378786583e-06, "loss": 0.504, "step": 15734 }, { "epoch": 0.6532467734653526, "grad_norm": 1.9558378458023071, "learning_rate": 2.8352552813388035e-06, "loss": 0.4794, "step": 15735 }, { "epoch": 0.653288288989564, "grad_norm": 2.631239414215088, "learning_rate": 2.8346492639381585e-06, "loss": 0.5588, "step": 15736 }, { "epoch": 0.6533298045137754, "grad_norm": 2.5356171131134033, "learning_rate": 2.8340432856876795e-06, "loss": 0.6418, "step": 15737 }, { "epoch": 0.6533713200379867, "grad_norm": 2.937983751296997, "learning_rate": 2.8334373465983216e-06, "loss": 0.5883, "step": 15738 }, { "epoch": 0.6534128355621981, "grad_norm": 2.2661287784576416, "learning_rate": 2.832831446681039e-06, "loss": 0.4481, "step": 15739 }, { "epoch": 0.6534543510864094, "grad_norm": 2.538548469543457, "learning_rate": 2.832225585946791e-06, "loss": 0.4649, "step": 15740 }, { "epoch": 0.6534958666106208, "grad_norm": 1.8678756952285767, "learning_rate": 2.831619764406529e-06, "loss": 0.5476, "step": 15741 }, { "epoch": 0.653537382134832, "grad_norm": 2.0342483520507812, "learning_rate": 2.831013982071207e-06, "loss": 0.6457, "step": 15742 }, { "epoch": 0.6535788976590434, "grad_norm": 2.0818514823913574, "learning_rate": 2.830408238951775e-06, "loss": 0.5414, "step": 15743 }, { "epoch": 0.6536204131832547, "grad_norm": 2.7031240463256836, "learning_rate": 2.8298025350591894e-06, "loss": 0.4763, "step": 15744 }, { "epoch": 0.6536619287074661, "grad_norm": 2.477219820022583, "learning_rate": 2.829196870404399e-06, "loss": 0.5574, "step": 15745 }, { "epoch": 0.6537034442316774, "grad_norm": 2.7483725547790527, "learning_rate": 2.8285912449983553e-06, "loss": 0.4759, "step": 15746 }, { "epoch": 0.6537449597558888, "grad_norm": 2.7592525482177734, "learning_rate": 2.827985658852008e-06, "loss": 0.485, "step": 15747 }, { "epoch": 0.6537864752801, "grad_norm": 2.095613956451416, "learning_rate": 2.8273801119763043e-06, "loss": 0.5038, "step": 15748 }, { "epoch": 0.6538279908043114, "grad_norm": 2.702106475830078, "learning_rate": 2.8267746043821952e-06, "loss": 0.6184, "step": 15749 }, { "epoch": 0.6538695063285227, "grad_norm": 2.397420644760132, "learning_rate": 2.8261691360806286e-06, "loss": 0.4675, "step": 15750 }, { "epoch": 0.6539110218527341, "grad_norm": 2.2249433994293213, "learning_rate": 2.8255637070825504e-06, "loss": 0.412, "step": 15751 }, { "epoch": 0.6539525373769454, "grad_norm": 2.5563488006591797, "learning_rate": 2.8249583173989054e-06, "loss": 0.3362, "step": 15752 }, { "epoch": 0.6539940529011568, "grad_norm": 2.0338895320892334, "learning_rate": 2.824352967040642e-06, "loss": 0.6018, "step": 15753 }, { "epoch": 0.654035568425368, "grad_norm": 2.796520233154297, "learning_rate": 2.823747656018705e-06, "loss": 0.5849, "step": 15754 }, { "epoch": 0.6540770839495794, "grad_norm": 2.521099328994751, "learning_rate": 2.8231423843440374e-06, "loss": 0.642, "step": 15755 }, { "epoch": 0.6541185994737907, "grad_norm": 2.2894344329833984, "learning_rate": 2.8225371520275813e-06, "loss": 0.5511, "step": 15756 }, { "epoch": 0.6541601149980021, "grad_norm": 3.1516408920288086, "learning_rate": 2.8219319590802822e-06, "loss": 0.3666, "step": 15757 }, { "epoch": 0.6542016305222134, "grad_norm": 2.333829402923584, "learning_rate": 2.8213268055130814e-06, "loss": 0.4789, "step": 15758 }, { "epoch": 0.6542431460464248, "grad_norm": 2.7093770503997803, "learning_rate": 2.8207216913369204e-06, "loss": 0.635, "step": 15759 }, { "epoch": 0.654284661570636, "grad_norm": 2.6128947734832764, "learning_rate": 2.820116616562739e-06, "loss": 0.5315, "step": 15760 }, { "epoch": 0.6543261770948474, "grad_norm": 2.7107067108154297, "learning_rate": 2.819511581201475e-06, "loss": 0.4237, "step": 15761 }, { "epoch": 0.6543676926190587, "grad_norm": 3.999114751815796, "learning_rate": 2.818906585264073e-06, "loss": 0.4563, "step": 15762 }, { "epoch": 0.6544092081432701, "grad_norm": 2.542750835418701, "learning_rate": 2.8183016287614684e-06, "loss": 0.3742, "step": 15763 }, { "epoch": 0.6544507236674814, "grad_norm": 2.5556693077087402, "learning_rate": 2.817696711704598e-06, "loss": 0.4752, "step": 15764 }, { "epoch": 0.6544922391916927, "grad_norm": 2.3234646320343018, "learning_rate": 2.8170918341043997e-06, "loss": 0.6091, "step": 15765 }, { "epoch": 0.654533754715904, "grad_norm": 2.4200501441955566, "learning_rate": 2.816486995971811e-06, "loss": 0.6358, "step": 15766 }, { "epoch": 0.6545752702401154, "grad_norm": 2.6257505416870117, "learning_rate": 2.8158821973177675e-06, "loss": 0.7066, "step": 15767 }, { "epoch": 0.6546167857643268, "grad_norm": 2.545095205307007, "learning_rate": 2.8152774381532033e-06, "loss": 0.4454, "step": 15768 }, { "epoch": 0.6546583012885381, "grad_norm": 3.168607234954834, "learning_rate": 2.814672718489051e-06, "loss": 0.5777, "step": 15769 }, { "epoch": 0.6546998168127495, "grad_norm": 2.4994540214538574, "learning_rate": 2.8140680383362473e-06, "loss": 0.4498, "step": 15770 }, { "epoch": 0.6547413323369607, "grad_norm": 2.3972203731536865, "learning_rate": 2.8134633977057236e-06, "loss": 0.462, "step": 15771 }, { "epoch": 0.6547828478611721, "grad_norm": 2.745220422744751, "learning_rate": 2.812858796608412e-06, "loss": 0.5273, "step": 15772 }, { "epoch": 0.6548243633853834, "grad_norm": 2.994520664215088, "learning_rate": 2.812254235055244e-06, "loss": 0.599, "step": 15773 }, { "epoch": 0.6548658789095948, "grad_norm": 2.460993766784668, "learning_rate": 2.8116497130571473e-06, "loss": 0.3741, "step": 15774 }, { "epoch": 0.6549073944338061, "grad_norm": 2.122483968734741, "learning_rate": 2.8110452306250568e-06, "loss": 0.4235, "step": 15775 }, { "epoch": 0.6549489099580175, "grad_norm": 2.4881880283355713, "learning_rate": 2.8104407877698996e-06, "loss": 0.7265, "step": 15776 }, { "epoch": 0.6549904254822287, "grad_norm": 2.2041616439819336, "learning_rate": 2.8098363845026044e-06, "loss": 0.5885, "step": 15777 }, { "epoch": 0.6550319410064401, "grad_norm": 2.2828404903411865, "learning_rate": 2.809232020834096e-06, "loss": 0.6406, "step": 15778 }, { "epoch": 0.6550734565306514, "grad_norm": 3.0155131816864014, "learning_rate": 2.808627696775307e-06, "loss": 0.5252, "step": 15779 }, { "epoch": 0.6551149720548628, "grad_norm": 2.3184986114501953, "learning_rate": 2.8080234123371596e-06, "loss": 0.5302, "step": 15780 }, { "epoch": 0.6551564875790741, "grad_norm": 1.785868525505066, "learning_rate": 2.8074191675305816e-06, "loss": 0.4572, "step": 15781 }, { "epoch": 0.6551980031032855, "grad_norm": 2.181185483932495, "learning_rate": 2.8068149623664953e-06, "loss": 0.5575, "step": 15782 }, { "epoch": 0.6552395186274967, "grad_norm": 2.5501887798309326, "learning_rate": 2.806210796855828e-06, "loss": 0.5346, "step": 15783 }, { "epoch": 0.6552810341517081, "grad_norm": 2.586357355117798, "learning_rate": 2.805606671009502e-06, "loss": 0.6357, "step": 15784 }, { "epoch": 0.6553225496759194, "grad_norm": 2.2044901847839355, "learning_rate": 2.80500258483844e-06, "loss": 0.4336, "step": 15785 }, { "epoch": 0.6553640652001308, "grad_norm": 2.4610555171966553, "learning_rate": 2.804398538353563e-06, "loss": 0.415, "step": 15786 }, { "epoch": 0.6554055807243421, "grad_norm": 2.3174173831939697, "learning_rate": 2.803794531565792e-06, "loss": 0.3455, "step": 15787 }, { "epoch": 0.6554470962485535, "grad_norm": 2.4028830528259277, "learning_rate": 2.8031905644860514e-06, "loss": 0.5494, "step": 15788 }, { "epoch": 0.6554886117727647, "grad_norm": 2.6380228996276855, "learning_rate": 2.802586637125258e-06, "loss": 0.5926, "step": 15789 }, { "epoch": 0.6555301272969761, "grad_norm": 2.357623338699341, "learning_rate": 2.801982749494332e-06, "loss": 0.5168, "step": 15790 }, { "epoch": 0.6555716428211874, "grad_norm": 2.647723436355591, "learning_rate": 2.8013789016041886e-06, "loss": 0.428, "step": 15791 }, { "epoch": 0.6556131583453988, "grad_norm": 2.374079704284668, "learning_rate": 2.800775093465751e-06, "loss": 0.7187, "step": 15792 }, { "epoch": 0.6556546738696101, "grad_norm": 2.7584218978881836, "learning_rate": 2.8001713250899332e-06, "loss": 0.6478, "step": 15793 }, { "epoch": 0.6556961893938215, "grad_norm": 2.3858954906463623, "learning_rate": 2.799567596487652e-06, "loss": 0.4402, "step": 15794 }, { "epoch": 0.6557377049180327, "grad_norm": 2.223451852798462, "learning_rate": 2.7989639076698216e-06, "loss": 0.3558, "step": 15795 }, { "epoch": 0.6557792204422441, "grad_norm": 2.7253499031066895, "learning_rate": 2.798360258647359e-06, "loss": 0.4609, "step": 15796 }, { "epoch": 0.6558207359664554, "grad_norm": 2.149843692779541, "learning_rate": 2.7977566494311786e-06, "loss": 0.5412, "step": 15797 }, { "epoch": 0.6558622514906668, "grad_norm": 2.362182855606079, "learning_rate": 2.797153080032192e-06, "loss": 0.4759, "step": 15798 }, { "epoch": 0.6559037670148782, "grad_norm": 2.2202653884887695, "learning_rate": 2.7965495504613126e-06, "loss": 0.3818, "step": 15799 }, { "epoch": 0.6559452825390895, "grad_norm": 2.395186185836792, "learning_rate": 2.795946060729451e-06, "loss": 0.4676, "step": 15800 }, { "epoch": 0.6559867980633008, "grad_norm": 2.344583034515381, "learning_rate": 2.7953426108475216e-06, "loss": 0.6532, "step": 15801 }, { "epoch": 0.6560283135875121, "grad_norm": 2.7275214195251465, "learning_rate": 2.7947392008264335e-06, "loss": 0.4858, "step": 15802 }, { "epoch": 0.6560698291117235, "grad_norm": 2.579594135284424, "learning_rate": 2.794135830677096e-06, "loss": 0.4638, "step": 15803 }, { "epoch": 0.6561113446359348, "grad_norm": 2.641714572906494, "learning_rate": 2.7935325004104164e-06, "loss": 0.529, "step": 15804 }, { "epoch": 0.6561528601601462, "grad_norm": 2.854339599609375, "learning_rate": 2.7929292100373074e-06, "loss": 0.5602, "step": 15805 }, { "epoch": 0.6561943756843575, "grad_norm": 2.4317805767059326, "learning_rate": 2.792325959568674e-06, "loss": 0.6461, "step": 15806 }, { "epoch": 0.6562358912085688, "grad_norm": 1.9572721719741821, "learning_rate": 2.791722749015424e-06, "loss": 0.473, "step": 15807 }, { "epoch": 0.6562774067327801, "grad_norm": 2.7316744327545166, "learning_rate": 2.7911195783884615e-06, "loss": 0.5088, "step": 15808 }, { "epoch": 0.6563189222569915, "grad_norm": 2.496544361114502, "learning_rate": 2.7905164476986945e-06, "loss": 0.4852, "step": 15809 }, { "epoch": 0.6563604377812028, "grad_norm": 2.1665186882019043, "learning_rate": 2.789913356957028e-06, "loss": 0.4679, "step": 15810 }, { "epoch": 0.6564019533054142, "grad_norm": 2.2644782066345215, "learning_rate": 2.7893103061743647e-06, "loss": 0.483, "step": 15811 }, { "epoch": 0.6564434688296255, "grad_norm": 2.712373733520508, "learning_rate": 2.7887072953616085e-06, "loss": 0.5263, "step": 15812 }, { "epoch": 0.6564849843538368, "grad_norm": 1.948040246963501, "learning_rate": 2.7881043245296595e-06, "loss": 0.4611, "step": 15813 }, { "epoch": 0.6565264998780481, "grad_norm": 2.325984001159668, "learning_rate": 2.787501393689424e-06, "loss": 0.6923, "step": 15814 }, { "epoch": 0.6565680154022595, "grad_norm": 1.8333197832107544, "learning_rate": 2.7868985028518007e-06, "loss": 0.5145, "step": 15815 }, { "epoch": 0.6566095309264708, "grad_norm": 2.863220453262329, "learning_rate": 2.7862956520276906e-06, "loss": 0.5951, "step": 15816 }, { "epoch": 0.6566510464506822, "grad_norm": 1.8259915113449097, "learning_rate": 2.7856928412279917e-06, "loss": 0.4643, "step": 15817 }, { "epoch": 0.6566925619748935, "grad_norm": 2.1461496353149414, "learning_rate": 2.7850900704636053e-06, "loss": 0.4602, "step": 15818 }, { "epoch": 0.6567340774991048, "grad_norm": 2.467801570892334, "learning_rate": 2.7844873397454298e-06, "loss": 0.568, "step": 15819 }, { "epoch": 0.6567755930233161, "grad_norm": 2.2762088775634766, "learning_rate": 2.7838846490843614e-06, "loss": 0.4022, "step": 15820 }, { "epoch": 0.6568171085475275, "grad_norm": 2.552579879760742, "learning_rate": 2.783281998491295e-06, "loss": 0.5158, "step": 15821 }, { "epoch": 0.6568586240717388, "grad_norm": 2.215628147125244, "learning_rate": 2.782679387977132e-06, "loss": 0.4474, "step": 15822 }, { "epoch": 0.6569001395959502, "grad_norm": 2.6299562454223633, "learning_rate": 2.7820768175527636e-06, "loss": 0.466, "step": 15823 }, { "epoch": 0.6569416551201614, "grad_norm": 2.130174398422241, "learning_rate": 2.7814742872290864e-06, "loss": 0.5969, "step": 15824 }, { "epoch": 0.6569831706443728, "grad_norm": 2.4369776248931885, "learning_rate": 2.7808717970169928e-06, "loss": 0.6108, "step": 15825 }, { "epoch": 0.6570246861685841, "grad_norm": 2.973536968231201, "learning_rate": 2.7802693469273756e-06, "loss": 0.6144, "step": 15826 }, { "epoch": 0.6570662016927955, "grad_norm": 2.215256452560425, "learning_rate": 2.7796669369711294e-06, "loss": 0.4242, "step": 15827 }, { "epoch": 0.6571077172170068, "grad_norm": 2.0476748943328857, "learning_rate": 2.7790645671591453e-06, "loss": 0.5199, "step": 15828 }, { "epoch": 0.6571492327412182, "grad_norm": 2.462221145629883, "learning_rate": 2.7784622375023145e-06, "loss": 0.3737, "step": 15829 }, { "epoch": 0.6571907482654296, "grad_norm": 2.28483247756958, "learning_rate": 2.7778599480115244e-06, "loss": 0.619, "step": 15830 }, { "epoch": 0.6572322637896408, "grad_norm": 2.7478725910186768, "learning_rate": 2.777257698697669e-06, "loss": 0.4366, "step": 15831 }, { "epoch": 0.6572737793138522, "grad_norm": 2.4348113536834717, "learning_rate": 2.7766554895716353e-06, "loss": 0.552, "step": 15832 }, { "epoch": 0.6573152948380635, "grad_norm": 2.3715274333953857, "learning_rate": 2.776053320644311e-06, "loss": 0.5087, "step": 15833 }, { "epoch": 0.6573568103622749, "grad_norm": 2.4768245220184326, "learning_rate": 2.7754511919265812e-06, "loss": 0.5905, "step": 15834 }, { "epoch": 0.6573983258864862, "grad_norm": 2.417809247970581, "learning_rate": 2.7748491034293377e-06, "loss": 0.5226, "step": 15835 }, { "epoch": 0.6574398414106976, "grad_norm": 2.8863937854766846, "learning_rate": 2.7742470551634636e-06, "loss": 0.5743, "step": 15836 }, { "epoch": 0.6574813569349088, "grad_norm": 2.259182929992676, "learning_rate": 2.7736450471398435e-06, "loss": 0.5868, "step": 15837 }, { "epoch": 0.6575228724591202, "grad_norm": 2.2520039081573486, "learning_rate": 2.773043079369364e-06, "loss": 0.6476, "step": 15838 }, { "epoch": 0.6575643879833315, "grad_norm": 1.9015908241271973, "learning_rate": 2.7724411518629044e-06, "loss": 0.371, "step": 15839 }, { "epoch": 0.6576059035075429, "grad_norm": 4.599420070648193, "learning_rate": 2.771839264631355e-06, "loss": 0.5159, "step": 15840 }, { "epoch": 0.6576474190317542, "grad_norm": 3.169217348098755, "learning_rate": 2.7712374176855905e-06, "loss": 0.4641, "step": 15841 }, { "epoch": 0.6576889345559656, "grad_norm": 2.650662660598755, "learning_rate": 2.7706356110364973e-06, "loss": 0.5423, "step": 15842 }, { "epoch": 0.6577304500801768, "grad_norm": 2.0548431873321533, "learning_rate": 2.770033844694954e-06, "loss": 0.4963, "step": 15843 }, { "epoch": 0.6577719656043882, "grad_norm": 3.187030792236328, "learning_rate": 2.769432118671842e-06, "loss": 0.5868, "step": 15844 }, { "epoch": 0.6578134811285995, "grad_norm": 2.6791090965270996, "learning_rate": 2.768830432978041e-06, "loss": 0.492, "step": 15845 }, { "epoch": 0.6578549966528109, "grad_norm": 3.457159996032715, "learning_rate": 2.7682287876244284e-06, "loss": 0.5844, "step": 15846 }, { "epoch": 0.6578965121770222, "grad_norm": 2.544539451599121, "learning_rate": 2.7676271826218807e-06, "loss": 0.6526, "step": 15847 }, { "epoch": 0.6579380277012336, "grad_norm": 2.966398000717163, "learning_rate": 2.767025617981279e-06, "loss": 0.5075, "step": 15848 }, { "epoch": 0.6579795432254448, "grad_norm": 2.0087852478027344, "learning_rate": 2.7664240937134986e-06, "loss": 0.5055, "step": 15849 }, { "epoch": 0.6580210587496562, "grad_norm": 2.2652645111083984, "learning_rate": 2.765822609829414e-06, "loss": 0.407, "step": 15850 }, { "epoch": 0.6580625742738675, "grad_norm": 2.4433319568634033, "learning_rate": 2.7652211663398996e-06, "loss": 0.5493, "step": 15851 }, { "epoch": 0.6581040897980789, "grad_norm": 2.2025413513183594, "learning_rate": 2.7646197632558293e-06, "loss": 0.5231, "step": 15852 }, { "epoch": 0.6581456053222902, "grad_norm": 2.9689114093780518, "learning_rate": 2.7640184005880822e-06, "loss": 0.535, "step": 15853 }, { "epoch": 0.6581871208465015, "grad_norm": 2.530362606048584, "learning_rate": 2.7634170783475224e-06, "loss": 0.5117, "step": 15854 }, { "epoch": 0.6582286363707128, "grad_norm": 3.2958929538726807, "learning_rate": 2.7628157965450286e-06, "loss": 0.5677, "step": 15855 }, { "epoch": 0.6582701518949242, "grad_norm": 2.6557586193084717, "learning_rate": 2.762214555191468e-06, "loss": 0.3993, "step": 15856 }, { "epoch": 0.6583116674191355, "grad_norm": 2.426020860671997, "learning_rate": 2.761613354297716e-06, "loss": 0.5213, "step": 15857 }, { "epoch": 0.6583531829433469, "grad_norm": 2.496877670288086, "learning_rate": 2.7610121938746394e-06, "loss": 0.5004, "step": 15858 }, { "epoch": 0.6583946984675582, "grad_norm": 2.2086567878723145, "learning_rate": 2.760411073933107e-06, "loss": 0.6043, "step": 15859 }, { "epoch": 0.6584362139917695, "grad_norm": 2.647366523742676, "learning_rate": 2.7598099944839886e-06, "loss": 0.5027, "step": 15860 }, { "epoch": 0.6584777295159809, "grad_norm": 2.3060600757598877, "learning_rate": 2.7592089555381486e-06, "loss": 0.5502, "step": 15861 }, { "epoch": 0.6585192450401922, "grad_norm": 2.31927752494812, "learning_rate": 2.758607957106459e-06, "loss": 0.5547, "step": 15862 }, { "epoch": 0.6585607605644036, "grad_norm": 2.3843765258789062, "learning_rate": 2.758006999199783e-06, "loss": 0.4928, "step": 15863 }, { "epoch": 0.6586022760886149, "grad_norm": 1.886887788772583, "learning_rate": 2.7574060818289873e-06, "loss": 0.4676, "step": 15864 }, { "epoch": 0.6586437916128263, "grad_norm": 2.4559381008148193, "learning_rate": 2.756805205004934e-06, "loss": 0.5932, "step": 15865 }, { "epoch": 0.6586853071370375, "grad_norm": 2.4471819400787354, "learning_rate": 2.756204368738493e-06, "loss": 0.5651, "step": 15866 }, { "epoch": 0.6587268226612489, "grad_norm": 2.4621715545654297, "learning_rate": 2.7556035730405196e-06, "loss": 0.5076, "step": 15867 }, { "epoch": 0.6587683381854602, "grad_norm": 2.5574419498443604, "learning_rate": 2.7550028179218825e-06, "loss": 0.4854, "step": 15868 }, { "epoch": 0.6588098537096716, "grad_norm": 2.5936169624328613, "learning_rate": 2.75440210339344e-06, "loss": 0.5753, "step": 15869 }, { "epoch": 0.6588513692338829, "grad_norm": 2.9728147983551025, "learning_rate": 2.7538014294660564e-06, "loss": 0.5527, "step": 15870 }, { "epoch": 0.6588928847580943, "grad_norm": 7.969537258148193, "learning_rate": 2.7532007961505903e-06, "loss": 0.6407, "step": 15871 }, { "epoch": 0.6589344002823055, "grad_norm": 2.6205687522888184, "learning_rate": 2.7526002034579015e-06, "loss": 0.5562, "step": 15872 }, { "epoch": 0.6589759158065169, "grad_norm": 2.1290018558502197, "learning_rate": 2.751999651398849e-06, "loss": 0.4458, "step": 15873 }, { "epoch": 0.6590174313307282, "grad_norm": 2.1274428367614746, "learning_rate": 2.7513991399842883e-06, "loss": 0.5203, "step": 15874 }, { "epoch": 0.6590589468549396, "grad_norm": 2.6788175106048584, "learning_rate": 2.750798669225081e-06, "loss": 0.4813, "step": 15875 }, { "epoch": 0.6591004623791509, "grad_norm": 2.1821322441101074, "learning_rate": 2.7501982391320825e-06, "loss": 0.4702, "step": 15876 }, { "epoch": 0.6591419779033623, "grad_norm": 2.25783109664917, "learning_rate": 2.7495978497161486e-06, "loss": 0.5752, "step": 15877 }, { "epoch": 0.6591834934275735, "grad_norm": 2.3731749057769775, "learning_rate": 2.748997500988132e-06, "loss": 0.6413, "step": 15878 }, { "epoch": 0.6592250089517849, "grad_norm": 2.7390267848968506, "learning_rate": 2.748397192958893e-06, "loss": 0.5946, "step": 15879 }, { "epoch": 0.6592665244759962, "grad_norm": 1.8520385026931763, "learning_rate": 2.747796925639278e-06, "loss": 0.4275, "step": 15880 }, { "epoch": 0.6593080400002076, "grad_norm": 2.2331576347351074, "learning_rate": 2.747196699040146e-06, "loss": 0.4333, "step": 15881 }, { "epoch": 0.6593495555244189, "grad_norm": 2.781799077987671, "learning_rate": 2.746596513172345e-06, "loss": 0.5802, "step": 15882 }, { "epoch": 0.6593910710486303, "grad_norm": 2.3133413791656494, "learning_rate": 2.7459963680467304e-06, "loss": 0.5677, "step": 15883 }, { "epoch": 0.6594325865728415, "grad_norm": 2.4691290855407715, "learning_rate": 2.745396263674151e-06, "loss": 0.5367, "step": 15884 }, { "epoch": 0.6594741020970529, "grad_norm": 1.9051456451416016, "learning_rate": 2.744796200065457e-06, "loss": 0.424, "step": 15885 }, { "epoch": 0.6595156176212642, "grad_norm": 2.5293660163879395, "learning_rate": 2.744196177231498e-06, "loss": 0.548, "step": 15886 }, { "epoch": 0.6595571331454756, "grad_norm": 2.259650468826294, "learning_rate": 2.74359619518312e-06, "loss": 0.5083, "step": 15887 }, { "epoch": 0.6595986486696869, "grad_norm": 2.4355971813201904, "learning_rate": 2.7429962539311748e-06, "loss": 0.5885, "step": 15888 }, { "epoch": 0.6596401641938983, "grad_norm": 2.34965443611145, "learning_rate": 2.742396353486508e-06, "loss": 0.4802, "step": 15889 }, { "epoch": 0.6596816797181095, "grad_norm": 2.119433641433716, "learning_rate": 2.7417964938599668e-06, "loss": 0.4403, "step": 15890 }, { "epoch": 0.6597231952423209, "grad_norm": 2.155698299407959, "learning_rate": 2.741196675062393e-06, "loss": 0.5709, "step": 15891 }, { "epoch": 0.6597647107665323, "grad_norm": 2.493726968765259, "learning_rate": 2.740596897104639e-06, "loss": 0.5197, "step": 15892 }, { "epoch": 0.6598062262907436, "grad_norm": 2.252967119216919, "learning_rate": 2.73999715999754e-06, "loss": 0.4565, "step": 15893 }, { "epoch": 0.659847741814955, "grad_norm": 2.032954454421997, "learning_rate": 2.7393974637519457e-06, "loss": 0.5099, "step": 15894 }, { "epoch": 0.6598892573391663, "grad_norm": 2.1985936164855957, "learning_rate": 2.738797808378695e-06, "loss": 0.3699, "step": 15895 }, { "epoch": 0.6599307728633776, "grad_norm": 1.8746099472045898, "learning_rate": 2.7381981938886333e-06, "loss": 0.3955, "step": 15896 }, { "epoch": 0.6599722883875889, "grad_norm": 2.908848285675049, "learning_rate": 2.7375986202926003e-06, "loss": 0.5239, "step": 15897 }, { "epoch": 0.6600138039118003, "grad_norm": 2.842329502105713, "learning_rate": 2.736999087601436e-06, "loss": 0.8147, "step": 15898 }, { "epoch": 0.6600553194360116, "grad_norm": 2.4486985206604004, "learning_rate": 2.7363995958259813e-06, "loss": 0.431, "step": 15899 }, { "epoch": 0.660096834960223, "grad_norm": 1.9615803956985474, "learning_rate": 2.7358001449770722e-06, "loss": 0.4874, "step": 15900 }, { "epoch": 0.6601383504844343, "grad_norm": 2.428122043609619, "learning_rate": 2.735200735065551e-06, "loss": 0.5142, "step": 15901 }, { "epoch": 0.6601798660086456, "grad_norm": 2.161221504211426, "learning_rate": 2.734601366102253e-06, "loss": 0.3232, "step": 15902 }, { "epoch": 0.6602213815328569, "grad_norm": 2.593653678894043, "learning_rate": 2.734002038098015e-06, "loss": 0.4549, "step": 15903 }, { "epoch": 0.6602628970570683, "grad_norm": 2.3976755142211914, "learning_rate": 2.7334027510636718e-06, "loss": 0.461, "step": 15904 }, { "epoch": 0.6603044125812796, "grad_norm": 2.364190101623535, "learning_rate": 2.7328035050100643e-06, "loss": 0.4778, "step": 15905 }, { "epoch": 0.660345928105491, "grad_norm": 2.495309591293335, "learning_rate": 2.7322042999480182e-06, "loss": 0.6713, "step": 15906 }, { "epoch": 0.6603874436297023, "grad_norm": 2.639413595199585, "learning_rate": 2.7316051358883744e-06, "loss": 0.5731, "step": 15907 }, { "epoch": 0.6604289591539136, "grad_norm": 2.8354527950286865, "learning_rate": 2.7310060128419617e-06, "loss": 0.5576, "step": 15908 }, { "epoch": 0.6604704746781249, "grad_norm": 2.3334367275238037, "learning_rate": 2.7304069308196163e-06, "loss": 0.5386, "step": 15909 }, { "epoch": 0.6605119902023363, "grad_norm": 2.0522358417510986, "learning_rate": 2.7298078898321678e-06, "loss": 0.4328, "step": 15910 }, { "epoch": 0.6605535057265476, "grad_norm": 1.8703668117523193, "learning_rate": 2.7292088898904468e-06, "loss": 0.3654, "step": 15911 }, { "epoch": 0.660595021250759, "grad_norm": 2.4075326919555664, "learning_rate": 2.728609931005284e-06, "loss": 0.5433, "step": 15912 }, { "epoch": 0.6606365367749703, "grad_norm": 2.2670466899871826, "learning_rate": 2.7280110131875056e-06, "loss": 0.5575, "step": 15913 }, { "epoch": 0.6606780522991816, "grad_norm": 1.942994475364685, "learning_rate": 2.727412136447945e-06, "loss": 0.4717, "step": 15914 }, { "epoch": 0.6607195678233929, "grad_norm": 2.6771557331085205, "learning_rate": 2.7268133007974284e-06, "loss": 0.7271, "step": 15915 }, { "epoch": 0.6607610833476043, "grad_norm": 2.469900131225586, "learning_rate": 2.7262145062467828e-06, "loss": 0.5078, "step": 15916 }, { "epoch": 0.6608025988718156, "grad_norm": 2.1724705696105957, "learning_rate": 2.7256157528068317e-06, "loss": 0.4865, "step": 15917 }, { "epoch": 0.660844114396027, "grad_norm": 2.707994222640991, "learning_rate": 2.7250170404884074e-06, "loss": 0.6075, "step": 15918 }, { "epoch": 0.6608856299202382, "grad_norm": 2.3856096267700195, "learning_rate": 2.724418369302327e-06, "loss": 0.5312, "step": 15919 }, { "epoch": 0.6609271454444496, "grad_norm": 2.5908689498901367, "learning_rate": 2.72381973925942e-06, "loss": 0.5228, "step": 15920 }, { "epoch": 0.6609686609686609, "grad_norm": 2.090557098388672, "learning_rate": 2.7232211503705065e-06, "loss": 0.3837, "step": 15921 }, { "epoch": 0.6610101764928723, "grad_norm": 2.781439781188965, "learning_rate": 2.7226226026464132e-06, "loss": 0.5941, "step": 15922 }, { "epoch": 0.6610516920170837, "grad_norm": 2.3382551670074463, "learning_rate": 2.7220240960979586e-06, "loss": 0.5385, "step": 15923 }, { "epoch": 0.661093207541295, "grad_norm": 2.1468143463134766, "learning_rate": 2.7214256307359652e-06, "loss": 0.4317, "step": 15924 }, { "epoch": 0.6611347230655064, "grad_norm": 2.6519672870635986, "learning_rate": 2.720827206571254e-06, "loss": 0.4126, "step": 15925 }, { "epoch": 0.6611762385897176, "grad_norm": 2.512632131576538, "learning_rate": 2.7202288236146413e-06, "loss": 0.5584, "step": 15926 }, { "epoch": 0.661217754113929, "grad_norm": 2.332104444503784, "learning_rate": 2.7196304818769507e-06, "loss": 0.5698, "step": 15927 }, { "epoch": 0.6612592696381403, "grad_norm": 2.0575268268585205, "learning_rate": 2.719032181368998e-06, "loss": 0.4824, "step": 15928 }, { "epoch": 0.6613007851623517, "grad_norm": 2.1616170406341553, "learning_rate": 2.718433922101601e-06, "loss": 0.5289, "step": 15929 }, { "epoch": 0.661342300686563, "grad_norm": 2.7046656608581543, "learning_rate": 2.7178357040855753e-06, "loss": 0.3927, "step": 15930 }, { "epoch": 0.6613838162107744, "grad_norm": 2.0326380729675293, "learning_rate": 2.7172375273317403e-06, "loss": 0.3559, "step": 15931 }, { "epoch": 0.6614253317349856, "grad_norm": 2.377981662750244, "learning_rate": 2.7166393918509054e-06, "loss": 0.3971, "step": 15932 }, { "epoch": 0.661466847259197, "grad_norm": 2.3021833896636963, "learning_rate": 2.716041297653891e-06, "loss": 0.5366, "step": 15933 }, { "epoch": 0.6615083627834083, "grad_norm": 2.745697498321533, "learning_rate": 2.7154432447515055e-06, "loss": 0.5148, "step": 15934 }, { "epoch": 0.6615498783076197, "grad_norm": 2.0094094276428223, "learning_rate": 2.7148452331545665e-06, "loss": 0.3756, "step": 15935 }, { "epoch": 0.661591393831831, "grad_norm": 2.3614885807037354, "learning_rate": 2.7142472628738846e-06, "loss": 0.4791, "step": 15936 }, { "epoch": 0.6616329093560424, "grad_norm": 2.307117223739624, "learning_rate": 2.7136493339202715e-06, "loss": 0.5156, "step": 15937 }, { "epoch": 0.6616744248802536, "grad_norm": 1.7710663080215454, "learning_rate": 2.7130514463045365e-06, "loss": 0.4754, "step": 15938 }, { "epoch": 0.661715940404465, "grad_norm": 2.6984405517578125, "learning_rate": 2.712453600037489e-06, "loss": 0.4855, "step": 15939 }, { "epoch": 0.6617574559286763, "grad_norm": 2.5850658416748047, "learning_rate": 2.7118557951299417e-06, "loss": 0.6071, "step": 15940 }, { "epoch": 0.6617989714528877, "grad_norm": 2.236675977706909, "learning_rate": 2.7112580315927017e-06, "loss": 0.4204, "step": 15941 }, { "epoch": 0.661840486977099, "grad_norm": 3.1406538486480713, "learning_rate": 2.7106603094365757e-06, "loss": 0.6472, "step": 15942 }, { "epoch": 0.6618820025013104, "grad_norm": 2.9253060817718506, "learning_rate": 2.7100626286723685e-06, "loss": 0.5349, "step": 15943 }, { "epoch": 0.6619235180255216, "grad_norm": 2.045881748199463, "learning_rate": 2.7094649893108937e-06, "loss": 0.5264, "step": 15944 }, { "epoch": 0.661965033549733, "grad_norm": 2.4148542881011963, "learning_rate": 2.708867391362948e-06, "loss": 0.5499, "step": 15945 }, { "epoch": 0.6620065490739443, "grad_norm": 2.225839138031006, "learning_rate": 2.7082698348393428e-06, "loss": 0.5009, "step": 15946 }, { "epoch": 0.6620480645981557, "grad_norm": 2.476503849029541, "learning_rate": 2.707672319750877e-06, "loss": 0.5913, "step": 15947 }, { "epoch": 0.662089580122367, "grad_norm": 2.3959619998931885, "learning_rate": 2.7070748461083583e-06, "loss": 0.5645, "step": 15948 }, { "epoch": 0.6621310956465783, "grad_norm": 2.466813802719116, "learning_rate": 2.7064774139225874e-06, "loss": 0.4928, "step": 15949 }, { "epoch": 0.6621726111707896, "grad_norm": 2.3458023071289062, "learning_rate": 2.705880023204366e-06, "loss": 0.6012, "step": 15950 }, { "epoch": 0.662214126695001, "grad_norm": 2.339343786239624, "learning_rate": 2.705282673964495e-06, "loss": 0.6416, "step": 15951 }, { "epoch": 0.6622556422192123, "grad_norm": 2.501924753189087, "learning_rate": 2.7046853662137727e-06, "loss": 0.4473, "step": 15952 }, { "epoch": 0.6622971577434237, "grad_norm": 3.0210843086242676, "learning_rate": 2.7040880999630043e-06, "loss": 0.4866, "step": 15953 }, { "epoch": 0.6623386732676351, "grad_norm": 2.015909433364868, "learning_rate": 2.7034908752229807e-06, "loss": 0.3257, "step": 15954 }, { "epoch": 0.6623801887918463, "grad_norm": 2.3474903106689453, "learning_rate": 2.7028936920045066e-06, "loss": 0.5452, "step": 15955 }, { "epoch": 0.6624217043160577, "grad_norm": 2.4665720462799072, "learning_rate": 2.702296550318374e-06, "loss": 0.5815, "step": 15956 }, { "epoch": 0.662463219840269, "grad_norm": 2.531285047531128, "learning_rate": 2.701699450175387e-06, "loss": 0.3774, "step": 15957 }, { "epoch": 0.6625047353644804, "grad_norm": 2.4229440689086914, "learning_rate": 2.701102391586331e-06, "loss": 0.6281, "step": 15958 }, { "epoch": 0.6625462508886917, "grad_norm": 2.1717727184295654, "learning_rate": 2.700505374562009e-06, "loss": 0.4641, "step": 15959 }, { "epoch": 0.6625877664129031, "grad_norm": 2.502723455429077, "learning_rate": 2.6999083991132113e-06, "loss": 0.531, "step": 15960 }, { "epoch": 0.6626292819371143, "grad_norm": 2.5564215183258057, "learning_rate": 2.6993114652507333e-06, "loss": 0.5262, "step": 15961 }, { "epoch": 0.6626707974613257, "grad_norm": 2.26438570022583, "learning_rate": 2.6987145729853683e-06, "loss": 0.4228, "step": 15962 }, { "epoch": 0.662712312985537, "grad_norm": 1.8747870922088623, "learning_rate": 2.6981177223279065e-06, "loss": 0.3291, "step": 15963 }, { "epoch": 0.6627538285097484, "grad_norm": 2.3516857624053955, "learning_rate": 2.69752091328914e-06, "loss": 0.4919, "step": 15964 }, { "epoch": 0.6627953440339597, "grad_norm": 2.461418390274048, "learning_rate": 2.696924145879858e-06, "loss": 0.7068, "step": 15965 }, { "epoch": 0.6628368595581711, "grad_norm": 2.008523464202881, "learning_rate": 2.6963274201108536e-06, "loss": 0.4261, "step": 15966 }, { "epoch": 0.6628783750823823, "grad_norm": 2.2460525035858154, "learning_rate": 2.6957307359929103e-06, "loss": 0.447, "step": 15967 }, { "epoch": 0.6629198906065937, "grad_norm": 2.525127410888672, "learning_rate": 2.6951340935368213e-06, "loss": 0.5312, "step": 15968 }, { "epoch": 0.662961406130805, "grad_norm": 2.3016934394836426, "learning_rate": 2.69453749275337e-06, "loss": 0.4821, "step": 15969 }, { "epoch": 0.6630029216550164, "grad_norm": 2.60947585105896, "learning_rate": 2.6939409336533494e-06, "loss": 0.6462, "step": 15970 }, { "epoch": 0.6630444371792277, "grad_norm": 3.5045154094696045, "learning_rate": 2.693344416247538e-06, "loss": 0.6777, "step": 15971 }, { "epoch": 0.6630859527034391, "grad_norm": 2.617192506790161, "learning_rate": 2.6927479405467262e-06, "loss": 0.5145, "step": 15972 }, { "epoch": 0.6631274682276503, "grad_norm": 2.496553421020508, "learning_rate": 2.6921515065616945e-06, "loss": 0.6161, "step": 15973 }, { "epoch": 0.6631689837518617, "grad_norm": 2.7218406200408936, "learning_rate": 2.6915551143032304e-06, "loss": 0.4862, "step": 15974 }, { "epoch": 0.663210499276073, "grad_norm": 2.241401195526123, "learning_rate": 2.6909587637821157e-06, "loss": 0.5943, "step": 15975 }, { "epoch": 0.6632520148002844, "grad_norm": 2.1243741512298584, "learning_rate": 2.690362455009131e-06, "loss": 0.5421, "step": 15976 }, { "epoch": 0.6632935303244957, "grad_norm": 2.4282867908477783, "learning_rate": 2.6897661879950603e-06, "loss": 0.5691, "step": 15977 }, { "epoch": 0.6633350458487071, "grad_norm": 2.6543140411376953, "learning_rate": 2.68916996275068e-06, "loss": 0.5615, "step": 15978 }, { "epoch": 0.6633765613729183, "grad_norm": 2.0449087619781494, "learning_rate": 2.6885737792867772e-06, "loss": 0.6253, "step": 15979 }, { "epoch": 0.6634180768971297, "grad_norm": 2.419099807739258, "learning_rate": 2.6879776376141226e-06, "loss": 0.5001, "step": 15980 }, { "epoch": 0.663459592421341, "grad_norm": 2.215172052383423, "learning_rate": 2.6873815377435007e-06, "loss": 0.537, "step": 15981 }, { "epoch": 0.6635011079455524, "grad_norm": 2.2847766876220703, "learning_rate": 2.6867854796856852e-06, "loss": 0.5079, "step": 15982 }, { "epoch": 0.6635426234697637, "grad_norm": 2.2759368419647217, "learning_rate": 2.6861894634514586e-06, "loss": 0.6563, "step": 15983 }, { "epoch": 0.6635841389939751, "grad_norm": 3.363661050796509, "learning_rate": 2.68559348905159e-06, "loss": 0.5598, "step": 15984 }, { "epoch": 0.6636256545181864, "grad_norm": 3.3034074306488037, "learning_rate": 2.6849975564968595e-06, "loss": 0.4319, "step": 15985 }, { "epoch": 0.6636671700423977, "grad_norm": 2.5000176429748535, "learning_rate": 2.6844016657980385e-06, "loss": 0.3936, "step": 15986 }, { "epoch": 0.6637086855666091, "grad_norm": 2.383568048477173, "learning_rate": 2.6838058169659076e-06, "loss": 0.5724, "step": 15987 }, { "epoch": 0.6637502010908204, "grad_norm": 2.52659010887146, "learning_rate": 2.68321001001123e-06, "loss": 0.5474, "step": 15988 }, { "epoch": 0.6637917166150318, "grad_norm": 2.1913044452667236, "learning_rate": 2.682614244944786e-06, "loss": 0.3942, "step": 15989 }, { "epoch": 0.6638332321392431, "grad_norm": 1.9067376852035522, "learning_rate": 2.6820185217773442e-06, "loss": 0.4799, "step": 15990 }, { "epoch": 0.6638747476634544, "grad_norm": 2.5856895446777344, "learning_rate": 2.681422840519674e-06, "loss": 0.5695, "step": 15991 }, { "epoch": 0.6639162631876657, "grad_norm": 3.0136570930480957, "learning_rate": 2.680827201182551e-06, "loss": 0.7269, "step": 15992 }, { "epoch": 0.6639577787118771, "grad_norm": 2.0999581813812256, "learning_rate": 2.680231603776736e-06, "loss": 0.526, "step": 15993 }, { "epoch": 0.6639992942360884, "grad_norm": 2.670152425765991, "learning_rate": 2.6796360483130046e-06, "loss": 0.5273, "step": 15994 }, { "epoch": 0.6640408097602998, "grad_norm": 3.1545209884643555, "learning_rate": 2.6790405348021207e-06, "loss": 0.6616, "step": 15995 }, { "epoch": 0.664082325284511, "grad_norm": 2.363621711730957, "learning_rate": 2.6784450632548563e-06, "loss": 0.5936, "step": 15996 }, { "epoch": 0.6641238408087224, "grad_norm": 2.4037232398986816, "learning_rate": 2.6778496336819703e-06, "loss": 0.5434, "step": 15997 }, { "epoch": 0.6641653563329337, "grad_norm": 2.0189638137817383, "learning_rate": 2.6772542460942354e-06, "loss": 0.4474, "step": 15998 }, { "epoch": 0.6642068718571451, "grad_norm": 2.248093366622925, "learning_rate": 2.6766589005024097e-06, "loss": 0.5962, "step": 15999 }, { "epoch": 0.6642483873813564, "grad_norm": 2.524195909500122, "learning_rate": 2.6760635969172653e-06, "loss": 0.3978, "step": 16000 }, { "epoch": 0.6642899029055678, "grad_norm": 2.1396145820617676, "learning_rate": 2.675468335349557e-06, "loss": 0.4989, "step": 16001 }, { "epoch": 0.664331418429779, "grad_norm": 2.211261034011841, "learning_rate": 2.6748731158100528e-06, "loss": 0.4423, "step": 16002 }, { "epoch": 0.6643729339539904, "grad_norm": 2.1610045433044434, "learning_rate": 2.6742779383095136e-06, "loss": 0.6261, "step": 16003 }, { "epoch": 0.6644144494782017, "grad_norm": 2.46738862991333, "learning_rate": 2.673682802858697e-06, "loss": 0.5169, "step": 16004 }, { "epoch": 0.6644559650024131, "grad_norm": 2.0455312728881836, "learning_rate": 2.67308770946837e-06, "loss": 0.4916, "step": 16005 }, { "epoch": 0.6644974805266244, "grad_norm": 2.1814844608306885, "learning_rate": 2.6724926581492837e-06, "loss": 0.4709, "step": 16006 }, { "epoch": 0.6645389960508358, "grad_norm": 3.1322333812713623, "learning_rate": 2.671897648912204e-06, "loss": 0.5617, "step": 16007 }, { "epoch": 0.664580511575047, "grad_norm": 2.6309151649475098, "learning_rate": 2.671302681767883e-06, "loss": 0.4662, "step": 16008 }, { "epoch": 0.6646220270992584, "grad_norm": 2.097473621368408, "learning_rate": 2.6707077567270845e-06, "loss": 0.4198, "step": 16009 }, { "epoch": 0.6646635426234697, "grad_norm": 2.4352731704711914, "learning_rate": 2.6701128738005576e-06, "loss": 0.4906, "step": 16010 }, { "epoch": 0.6647050581476811, "grad_norm": 1.9452637434005737, "learning_rate": 2.6695180329990633e-06, "loss": 0.5287, "step": 16011 }, { "epoch": 0.6647465736718924, "grad_norm": 2.4352774620056152, "learning_rate": 2.6689232343333525e-06, "loss": 0.3788, "step": 16012 }, { "epoch": 0.6647880891961038, "grad_norm": 2.1532342433929443, "learning_rate": 2.6683284778141856e-06, "loss": 0.4514, "step": 16013 }, { "epoch": 0.664829604720315, "grad_norm": 2.39697003364563, "learning_rate": 2.667733763452308e-06, "loss": 0.5105, "step": 16014 }, { "epoch": 0.6648711202445264, "grad_norm": 2.4631118774414062, "learning_rate": 2.6671390912584783e-06, "loss": 0.568, "step": 16015 }, { "epoch": 0.6649126357687378, "grad_norm": 2.3013463020324707, "learning_rate": 2.666544461243446e-06, "loss": 0.4942, "step": 16016 }, { "epoch": 0.6649541512929491, "grad_norm": 2.1751062870025635, "learning_rate": 2.66594987341796e-06, "loss": 0.638, "step": 16017 }, { "epoch": 0.6649956668171605, "grad_norm": 2.21109938621521, "learning_rate": 2.665355327792778e-06, "loss": 0.5212, "step": 16018 }, { "epoch": 0.6650371823413718, "grad_norm": 2.190695285797119, "learning_rate": 2.6647608243786403e-06, "loss": 0.4532, "step": 16019 }, { "epoch": 0.6650786978655832, "grad_norm": 2.230802297592163, "learning_rate": 2.6641663631863014e-06, "loss": 0.5225, "step": 16020 }, { "epoch": 0.6651202133897944, "grad_norm": 2.0389914512634277, "learning_rate": 2.663571944226507e-06, "loss": 0.456, "step": 16021 }, { "epoch": 0.6651617289140058, "grad_norm": 2.697671890258789, "learning_rate": 2.6629775675100076e-06, "loss": 0.4958, "step": 16022 }, { "epoch": 0.6652032444382171, "grad_norm": 2.2488479614257812, "learning_rate": 2.6623832330475454e-06, "loss": 0.3647, "step": 16023 }, { "epoch": 0.6652447599624285, "grad_norm": 2.2302675247192383, "learning_rate": 2.6617889408498686e-06, "loss": 0.4716, "step": 16024 }, { "epoch": 0.6652862754866398, "grad_norm": 2.050992250442505, "learning_rate": 2.6611946909277205e-06, "loss": 0.5103, "step": 16025 }, { "epoch": 0.6653277910108512, "grad_norm": 2.2006819248199463, "learning_rate": 2.6606004832918496e-06, "loss": 0.5585, "step": 16026 }, { "epoch": 0.6653693065350624, "grad_norm": 2.695906639099121, "learning_rate": 2.6600063179529933e-06, "loss": 0.5586, "step": 16027 }, { "epoch": 0.6654108220592738, "grad_norm": 2.1738979816436768, "learning_rate": 2.6594121949218987e-06, "loss": 0.6087, "step": 16028 }, { "epoch": 0.6654523375834851, "grad_norm": 2.5381600856781006, "learning_rate": 2.658818114209306e-06, "loss": 0.557, "step": 16029 }, { "epoch": 0.6654938531076965, "grad_norm": 2.4919581413269043, "learning_rate": 2.6582240758259554e-06, "loss": 0.5406, "step": 16030 }, { "epoch": 0.6655353686319078, "grad_norm": 2.6862733364105225, "learning_rate": 2.657630079782592e-06, "loss": 0.6134, "step": 16031 }, { "epoch": 0.6655768841561192, "grad_norm": 2.36702823638916, "learning_rate": 2.657036126089948e-06, "loss": 0.5485, "step": 16032 }, { "epoch": 0.6656183996803304, "grad_norm": 2.499302864074707, "learning_rate": 2.656442214758768e-06, "loss": 0.612, "step": 16033 }, { "epoch": 0.6656599152045418, "grad_norm": 2.240295171737671, "learning_rate": 2.6558483457997863e-06, "loss": 0.388, "step": 16034 }, { "epoch": 0.6657014307287531, "grad_norm": 2.1943857669830322, "learning_rate": 2.655254519223746e-06, "loss": 0.5234, "step": 16035 }, { "epoch": 0.6657429462529645, "grad_norm": 2.088648557662964, "learning_rate": 2.654660735041377e-06, "loss": 0.5094, "step": 16036 }, { "epoch": 0.6657844617771758, "grad_norm": 2.339017391204834, "learning_rate": 2.6540669932634177e-06, "loss": 0.573, "step": 16037 }, { "epoch": 0.6658259773013872, "grad_norm": 2.039153575897217, "learning_rate": 2.6534732939006032e-06, "loss": 0.5376, "step": 16038 }, { "epoch": 0.6658674928255984, "grad_norm": 2.509795665740967, "learning_rate": 2.652879636963671e-06, "loss": 0.5622, "step": 16039 }, { "epoch": 0.6659090083498098, "grad_norm": 2.8768093585968018, "learning_rate": 2.6522860224633477e-06, "loss": 0.5801, "step": 16040 }, { "epoch": 0.6659505238740211, "grad_norm": 2.174675703048706, "learning_rate": 2.651692450410372e-06, "loss": 0.4913, "step": 16041 }, { "epoch": 0.6659920393982325, "grad_norm": 2.2989635467529297, "learning_rate": 2.6510989208154736e-06, "loss": 0.5903, "step": 16042 }, { "epoch": 0.6660335549224438, "grad_norm": 2.569948434829712, "learning_rate": 2.6505054336893815e-06, "loss": 0.5199, "step": 16043 }, { "epoch": 0.6660750704466551, "grad_norm": 2.7728381156921387, "learning_rate": 2.6499119890428328e-06, "loss": 0.6838, "step": 16044 }, { "epoch": 0.6661165859708664, "grad_norm": 2.534067392349243, "learning_rate": 2.649318586886548e-06, "loss": 0.5826, "step": 16045 }, { "epoch": 0.6661581014950778, "grad_norm": 2.0394349098205566, "learning_rate": 2.648725227231263e-06, "loss": 0.4944, "step": 16046 }, { "epoch": 0.6661996170192892, "grad_norm": 3.324300765991211, "learning_rate": 2.6481319100877014e-06, "loss": 0.6118, "step": 16047 }, { "epoch": 0.6662411325435005, "grad_norm": 2.331907272338867, "learning_rate": 2.647538635466597e-06, "loss": 0.4508, "step": 16048 }, { "epoch": 0.6662826480677119, "grad_norm": 3.069295883178711, "learning_rate": 2.646945403378668e-06, "loss": 0.4642, "step": 16049 }, { "epoch": 0.6663241635919231, "grad_norm": 2.4895081520080566, "learning_rate": 2.6463522138346463e-06, "loss": 0.4935, "step": 16050 }, { "epoch": 0.6663656791161345, "grad_norm": 2.4453015327453613, "learning_rate": 2.645759066845254e-06, "loss": 0.5027, "step": 16051 }, { "epoch": 0.6664071946403458, "grad_norm": 2.063938617706299, "learning_rate": 2.645165962421219e-06, "loss": 0.4027, "step": 16052 }, { "epoch": 0.6664487101645572, "grad_norm": 2.670020818710327, "learning_rate": 2.644572900573259e-06, "loss": 0.4471, "step": 16053 }, { "epoch": 0.6664902256887685, "grad_norm": 2.5144476890563965, "learning_rate": 2.643979881312101e-06, "loss": 0.5787, "step": 16054 }, { "epoch": 0.6665317412129799, "grad_norm": 1.8523375988006592, "learning_rate": 2.6433869046484663e-06, "loss": 0.4905, "step": 16055 }, { "epoch": 0.6665732567371911, "grad_norm": 2.5758063793182373, "learning_rate": 2.6427939705930737e-06, "loss": 0.5313, "step": 16056 }, { "epoch": 0.6666147722614025, "grad_norm": 2.5017991065979004, "learning_rate": 2.6422010791566495e-06, "loss": 0.5974, "step": 16057 }, { "epoch": 0.6666562877856138, "grad_norm": 2.747107982635498, "learning_rate": 2.6416082303499053e-06, "loss": 0.4836, "step": 16058 }, { "epoch": 0.6666978033098252, "grad_norm": 2.606238842010498, "learning_rate": 2.6410154241835663e-06, "loss": 0.4818, "step": 16059 }, { "epoch": 0.6667393188340365, "grad_norm": 1.978655219078064, "learning_rate": 2.640422660668346e-06, "loss": 0.5188, "step": 16060 }, { "epoch": 0.6667808343582479, "grad_norm": 2.241016387939453, "learning_rate": 2.639829939814968e-06, "loss": 0.4903, "step": 16061 }, { "epoch": 0.6668223498824591, "grad_norm": 2.272531032562256, "learning_rate": 2.639237261634141e-06, "loss": 0.4059, "step": 16062 }, { "epoch": 0.6668638654066705, "grad_norm": 2.2957875728607178, "learning_rate": 2.6386446261365874e-06, "loss": 0.5092, "step": 16063 }, { "epoch": 0.6669053809308818, "grad_norm": 2.339888334274292, "learning_rate": 2.638052033333017e-06, "loss": 0.4678, "step": 16064 }, { "epoch": 0.6669468964550932, "grad_norm": 2.324481964111328, "learning_rate": 2.6374594832341506e-06, "loss": 0.5158, "step": 16065 }, { "epoch": 0.6669884119793045, "grad_norm": 2.6234660148620605, "learning_rate": 2.6368669758506937e-06, "loss": 0.5369, "step": 16066 }, { "epoch": 0.6670299275035159, "grad_norm": 2.2470200061798096, "learning_rate": 2.6362745111933654e-06, "loss": 0.585, "step": 16067 }, { "epoch": 0.6670714430277271, "grad_norm": 2.326256513595581, "learning_rate": 2.6356820892728752e-06, "loss": 0.6315, "step": 16068 }, { "epoch": 0.6671129585519385, "grad_norm": 2.2683491706848145, "learning_rate": 2.635089710099932e-06, "loss": 0.5554, "step": 16069 }, { "epoch": 0.6671544740761498, "grad_norm": 2.4297034740448, "learning_rate": 2.634497373685252e-06, "loss": 0.5402, "step": 16070 }, { "epoch": 0.6671959896003612, "grad_norm": 2.176577568054199, "learning_rate": 2.6339050800395383e-06, "loss": 0.4685, "step": 16071 }, { "epoch": 0.6672375051245725, "grad_norm": 2.315922975540161, "learning_rate": 2.6333128291735042e-06, "loss": 0.5233, "step": 16072 }, { "epoch": 0.6672790206487839, "grad_norm": 2.4027092456817627, "learning_rate": 2.6327206210978547e-06, "loss": 0.5206, "step": 16073 }, { "epoch": 0.6673205361729951, "grad_norm": 2.006763219833374, "learning_rate": 2.632128455823302e-06, "loss": 0.5196, "step": 16074 }, { "epoch": 0.6673620516972065, "grad_norm": 2.3852813243865967, "learning_rate": 2.631536333360546e-06, "loss": 0.5981, "step": 16075 }, { "epoch": 0.6674035672214178, "grad_norm": 2.138054609298706, "learning_rate": 2.6309442537202973e-06, "loss": 0.4154, "step": 16076 }, { "epoch": 0.6674450827456292, "grad_norm": 2.1959903240203857, "learning_rate": 2.630352216913258e-06, "loss": 0.5853, "step": 16077 }, { "epoch": 0.6674865982698406, "grad_norm": 2.020169496536255, "learning_rate": 2.629760222950136e-06, "loss": 0.5707, "step": 16078 }, { "epoch": 0.6675281137940519, "grad_norm": 1.9973300695419312, "learning_rate": 2.62916827184163e-06, "loss": 0.4455, "step": 16079 }, { "epoch": 0.6675696293182632, "grad_norm": 2.23581862449646, "learning_rate": 2.628576363598446e-06, "loss": 0.5286, "step": 16080 }, { "epoch": 0.6676111448424745, "grad_norm": 2.7697339057922363, "learning_rate": 2.6279844982312857e-06, "loss": 0.5965, "step": 16081 }, { "epoch": 0.6676526603666859, "grad_norm": 2.0044093132019043, "learning_rate": 2.627392675750847e-06, "loss": 0.5107, "step": 16082 }, { "epoch": 0.6676941758908972, "grad_norm": 2.6546030044555664, "learning_rate": 2.6268008961678365e-06, "loss": 0.3834, "step": 16083 }, { "epoch": 0.6677356914151086, "grad_norm": 2.0413389205932617, "learning_rate": 2.6262091594929463e-06, "loss": 0.5589, "step": 16084 }, { "epoch": 0.6677772069393199, "grad_norm": 2.219111680984497, "learning_rate": 2.6256174657368797e-06, "loss": 0.4655, "step": 16085 }, { "epoch": 0.6678187224635312, "grad_norm": 2.2683515548706055, "learning_rate": 2.6250258149103327e-06, "loss": 0.3624, "step": 16086 }, { "epoch": 0.6678602379877425, "grad_norm": 2.3551526069641113, "learning_rate": 2.624434207024007e-06, "loss": 0.4445, "step": 16087 }, { "epoch": 0.6679017535119539, "grad_norm": 2.7414987087249756, "learning_rate": 2.6238426420885917e-06, "loss": 0.5912, "step": 16088 }, { "epoch": 0.6679432690361652, "grad_norm": 2.237748861312866, "learning_rate": 2.6232511201147883e-06, "loss": 0.5366, "step": 16089 }, { "epoch": 0.6679847845603766, "grad_norm": 1.9112743139266968, "learning_rate": 2.6226596411132876e-06, "loss": 0.4151, "step": 16090 }, { "epoch": 0.6680263000845879, "grad_norm": 2.225922107696533, "learning_rate": 2.62206820509479e-06, "loss": 0.6104, "step": 16091 }, { "epoch": 0.6680678156087992, "grad_norm": 2.176462173461914, "learning_rate": 2.62147681206998e-06, "loss": 0.406, "step": 16092 }, { "epoch": 0.6681093311330105, "grad_norm": 2.7382755279541016, "learning_rate": 2.620885462049557e-06, "loss": 0.5862, "step": 16093 }, { "epoch": 0.6681508466572219, "grad_norm": 2.024388074874878, "learning_rate": 2.620294155044211e-06, "loss": 0.3929, "step": 16094 }, { "epoch": 0.6681923621814332, "grad_norm": 2.052943706512451, "learning_rate": 2.6197028910646304e-06, "loss": 0.4475, "step": 16095 }, { "epoch": 0.6682338777056446, "grad_norm": 2.4469947814941406, "learning_rate": 2.6191116701215118e-06, "loss": 0.5787, "step": 16096 }, { "epoch": 0.6682753932298559, "grad_norm": 1.9763983488082886, "learning_rate": 2.6185204922255363e-06, "loss": 0.3857, "step": 16097 }, { "epoch": 0.6683169087540672, "grad_norm": 2.3068342208862305, "learning_rate": 2.6179293573873986e-06, "loss": 0.4813, "step": 16098 }, { "epoch": 0.6683584242782785, "grad_norm": 2.472154140472412, "learning_rate": 2.617338265617783e-06, "loss": 0.5164, "step": 16099 }, { "epoch": 0.6683999398024899, "grad_norm": 2.581759214401245, "learning_rate": 2.616747216927382e-06, "loss": 0.4258, "step": 16100 }, { "epoch": 0.6684414553267012, "grad_norm": 2.3391358852386475, "learning_rate": 2.616156211326875e-06, "loss": 0.4128, "step": 16101 }, { "epoch": 0.6684829708509126, "grad_norm": 2.6758289337158203, "learning_rate": 2.6155652488269525e-06, "loss": 0.6373, "step": 16102 }, { "epoch": 0.6685244863751238, "grad_norm": 2.356478452682495, "learning_rate": 2.6149743294382946e-06, "loss": 0.5139, "step": 16103 }, { "epoch": 0.6685660018993352, "grad_norm": 2.1408159732818604, "learning_rate": 2.6143834531715927e-06, "loss": 0.5697, "step": 16104 }, { "epoch": 0.6686075174235465, "grad_norm": 1.9728636741638184, "learning_rate": 2.613792620037522e-06, "loss": 0.4155, "step": 16105 }, { "epoch": 0.6686490329477579, "grad_norm": 2.508432149887085, "learning_rate": 2.6132018300467705e-06, "loss": 0.4448, "step": 16106 }, { "epoch": 0.6686905484719692, "grad_norm": 2.7606077194213867, "learning_rate": 2.612611083210018e-06, "loss": 0.3847, "step": 16107 }, { "epoch": 0.6687320639961806, "grad_norm": 2.028461217880249, "learning_rate": 2.6120203795379426e-06, "loss": 0.396, "step": 16108 }, { "epoch": 0.668773579520392, "grad_norm": 2.719604253768921, "learning_rate": 2.6114297190412315e-06, "loss": 0.5548, "step": 16109 }, { "epoch": 0.6688150950446032, "grad_norm": 2.3432302474975586, "learning_rate": 2.6108391017305552e-06, "loss": 0.5488, "step": 16110 }, { "epoch": 0.6688566105688146, "grad_norm": 3.1890757083892822, "learning_rate": 2.610248527616599e-06, "loss": 0.628, "step": 16111 }, { "epoch": 0.6688981260930259, "grad_norm": 2.272155523300171, "learning_rate": 2.6096579967100354e-06, "loss": 0.5291, "step": 16112 }, { "epoch": 0.6689396416172373, "grad_norm": 2.2675535678863525, "learning_rate": 2.609067509021549e-06, "loss": 0.5446, "step": 16113 }, { "epoch": 0.6689811571414486, "grad_norm": 2.1870787143707275, "learning_rate": 2.608477064561807e-06, "loss": 0.4955, "step": 16114 }, { "epoch": 0.66902267266566, "grad_norm": 2.585155487060547, "learning_rate": 2.607886663341491e-06, "loss": 0.544, "step": 16115 }, { "epoch": 0.6690641881898712, "grad_norm": 2.7937021255493164, "learning_rate": 2.607296305371271e-06, "loss": 0.5077, "step": 16116 }, { "epoch": 0.6691057037140826, "grad_norm": 2.399789571762085, "learning_rate": 2.6067059906618286e-06, "loss": 0.5588, "step": 16117 }, { "epoch": 0.6691472192382939, "grad_norm": 2.265521764755249, "learning_rate": 2.6061157192238266e-06, "loss": 0.4632, "step": 16118 }, { "epoch": 0.6691887347625053, "grad_norm": 1.8913111686706543, "learning_rate": 2.605525491067945e-06, "loss": 0.3637, "step": 16119 }, { "epoch": 0.6692302502867166, "grad_norm": 2.3086888790130615, "learning_rate": 2.6049353062048526e-06, "loss": 0.5108, "step": 16120 }, { "epoch": 0.669271765810928, "grad_norm": 2.4770467281341553, "learning_rate": 2.604345164645218e-06, "loss": 0.4819, "step": 16121 }, { "epoch": 0.6693132813351392, "grad_norm": 1.908146858215332, "learning_rate": 2.603755066399718e-06, "loss": 0.4915, "step": 16122 }, { "epoch": 0.6693547968593506, "grad_norm": 2.491314649581909, "learning_rate": 2.603165011479012e-06, "loss": 0.4358, "step": 16123 }, { "epoch": 0.6693963123835619, "grad_norm": 2.149406671524048, "learning_rate": 2.602574999893777e-06, "loss": 0.557, "step": 16124 }, { "epoch": 0.6694378279077733, "grad_norm": 2.3993027210235596, "learning_rate": 2.6019850316546737e-06, "loss": 0.5818, "step": 16125 }, { "epoch": 0.6694793434319846, "grad_norm": 2.3610148429870605, "learning_rate": 2.6013951067723763e-06, "loss": 0.5413, "step": 16126 }, { "epoch": 0.669520858956196, "grad_norm": 2.323336601257324, "learning_rate": 2.600805225257543e-06, "loss": 0.5633, "step": 16127 }, { "epoch": 0.6695623744804072, "grad_norm": 2.402855396270752, "learning_rate": 2.6002153871208446e-06, "loss": 0.5249, "step": 16128 }, { "epoch": 0.6696038900046186, "grad_norm": 2.8735029697418213, "learning_rate": 2.599625592372942e-06, "loss": 0.4898, "step": 16129 }, { "epoch": 0.6696454055288299, "grad_norm": 1.9100178480148315, "learning_rate": 2.5990358410245037e-06, "loss": 0.517, "step": 16130 }, { "epoch": 0.6696869210530413, "grad_norm": 2.380842447280884, "learning_rate": 2.5984461330861864e-06, "loss": 0.5878, "step": 16131 }, { "epoch": 0.6697284365772526, "grad_norm": 2.8684651851654053, "learning_rate": 2.597856468568657e-06, "loss": 0.4811, "step": 16132 }, { "epoch": 0.669769952101464, "grad_norm": 2.306851625442505, "learning_rate": 2.597266847482575e-06, "loss": 0.5219, "step": 16133 }, { "epoch": 0.6698114676256752, "grad_norm": 2.5409319400787354, "learning_rate": 2.5966772698386e-06, "loss": 0.4341, "step": 16134 }, { "epoch": 0.6698529831498866, "grad_norm": 2.4383327960968018, "learning_rate": 2.5960877356473933e-06, "loss": 0.4487, "step": 16135 }, { "epoch": 0.6698944986740979, "grad_norm": 2.3714394569396973, "learning_rate": 2.595498244919612e-06, "loss": 0.5657, "step": 16136 }, { "epoch": 0.6699360141983093, "grad_norm": 2.272041082382202, "learning_rate": 2.594908797665917e-06, "loss": 0.4679, "step": 16137 }, { "epoch": 0.6699775297225207, "grad_norm": 2.0330276489257812, "learning_rate": 2.5943193938969624e-06, "loss": 0.3769, "step": 16138 }, { "epoch": 0.670019045246732, "grad_norm": 2.619628667831421, "learning_rate": 2.5937300336234093e-06, "loss": 0.4046, "step": 16139 }, { "epoch": 0.6700605607709433, "grad_norm": 2.2987053394317627, "learning_rate": 2.5931407168559083e-06, "loss": 0.5033, "step": 16140 }, { "epoch": 0.6701020762951546, "grad_norm": 2.798830032348633, "learning_rate": 2.592551443605118e-06, "loss": 0.4999, "step": 16141 }, { "epoch": 0.670143591819366, "grad_norm": 2.8749468326568604, "learning_rate": 2.59196221388169e-06, "loss": 0.4573, "step": 16142 }, { "epoch": 0.6701851073435773, "grad_norm": 2.488598585128784, "learning_rate": 2.5913730276962826e-06, "loss": 0.5524, "step": 16143 }, { "epoch": 0.6702266228677887, "grad_norm": 2.3225021362304688, "learning_rate": 2.590783885059542e-06, "loss": 0.4476, "step": 16144 }, { "epoch": 0.670268138392, "grad_norm": 2.2628896236419678, "learning_rate": 2.5901947859821244e-06, "loss": 0.3988, "step": 16145 }, { "epoch": 0.6703096539162113, "grad_norm": 2.6861915588378906, "learning_rate": 2.58960573047468e-06, "loss": 0.444, "step": 16146 }, { "epoch": 0.6703511694404226, "grad_norm": 2.5725300312042236, "learning_rate": 2.5890167185478588e-06, "loss": 0.5533, "step": 16147 }, { "epoch": 0.670392684964634, "grad_norm": 2.850545883178711, "learning_rate": 2.5884277502123102e-06, "loss": 0.5247, "step": 16148 }, { "epoch": 0.6704342004888453, "grad_norm": 2.043839931488037, "learning_rate": 2.58783882547868e-06, "loss": 0.4138, "step": 16149 }, { "epoch": 0.6704757160130567, "grad_norm": 2.6219804286956787, "learning_rate": 2.5872499443576227e-06, "loss": 0.4416, "step": 16150 }, { "epoch": 0.6705172315372679, "grad_norm": 2.763076066970825, "learning_rate": 2.586661106859779e-06, "loss": 0.5745, "step": 16151 }, { "epoch": 0.6705587470614793, "grad_norm": 2.5335023403167725, "learning_rate": 2.586072312995802e-06, "loss": 0.525, "step": 16152 }, { "epoch": 0.6706002625856906, "grad_norm": 2.4614641666412354, "learning_rate": 2.5854835627763298e-06, "loss": 0.5597, "step": 16153 }, { "epoch": 0.670641778109902, "grad_norm": 1.8265446424484253, "learning_rate": 2.5848948562120123e-06, "loss": 0.4384, "step": 16154 }, { "epoch": 0.6706832936341133, "grad_norm": 2.374162435531616, "learning_rate": 2.58430619331349e-06, "loss": 0.6624, "step": 16155 }, { "epoch": 0.6707248091583247, "grad_norm": 2.847987651824951, "learning_rate": 2.583717574091412e-06, "loss": 0.4392, "step": 16156 }, { "epoch": 0.6707663246825359, "grad_norm": 2.523733615875244, "learning_rate": 2.583128998556413e-06, "loss": 0.5252, "step": 16157 }, { "epoch": 0.6708078402067473, "grad_norm": 2.6571860313415527, "learning_rate": 2.58254046671914e-06, "loss": 0.4542, "step": 16158 }, { "epoch": 0.6708493557309586, "grad_norm": 2.2378599643707275, "learning_rate": 2.5819519785902326e-06, "loss": 0.4713, "step": 16159 }, { "epoch": 0.67089087125517, "grad_norm": 2.3829221725463867, "learning_rate": 2.58136353418033e-06, "loss": 0.5635, "step": 16160 }, { "epoch": 0.6709323867793813, "grad_norm": 3.531679630279541, "learning_rate": 2.5807751335000716e-06, "loss": 0.4281, "step": 16161 }, { "epoch": 0.6709739023035927, "grad_norm": 2.351698637008667, "learning_rate": 2.580186776560094e-06, "loss": 0.4017, "step": 16162 }, { "epoch": 0.6710154178278039, "grad_norm": 2.063020944595337, "learning_rate": 2.5795984633710393e-06, "loss": 0.4786, "step": 16163 }, { "epoch": 0.6710569333520153, "grad_norm": 2.234959602355957, "learning_rate": 2.5790101939435403e-06, "loss": 0.419, "step": 16164 }, { "epoch": 0.6710984488762266, "grad_norm": 2.529670238494873, "learning_rate": 2.5784219682882383e-06, "loss": 0.4996, "step": 16165 }, { "epoch": 0.671139964400438, "grad_norm": 2.039844274520874, "learning_rate": 2.5778337864157615e-06, "loss": 0.5021, "step": 16166 }, { "epoch": 0.6711814799246493, "grad_norm": 2.137512683868408, "learning_rate": 2.57724564833675e-06, "loss": 0.4276, "step": 16167 }, { "epoch": 0.6712229954488607, "grad_norm": 2.243276596069336, "learning_rate": 2.576657554061833e-06, "loss": 0.4435, "step": 16168 }, { "epoch": 0.671264510973072, "grad_norm": 2.122357130050659, "learning_rate": 2.5760695036016503e-06, "loss": 0.5443, "step": 16169 }, { "epoch": 0.6713060264972833, "grad_norm": 2.588015556335449, "learning_rate": 2.575481496966825e-06, "loss": 0.7144, "step": 16170 }, { "epoch": 0.6713475420214947, "grad_norm": 2.4474985599517822, "learning_rate": 2.574893534167996e-06, "loss": 0.485, "step": 16171 }, { "epoch": 0.671389057545706, "grad_norm": 2.530221939086914, "learning_rate": 2.5743056152157903e-06, "loss": 0.3772, "step": 16172 }, { "epoch": 0.6714305730699174, "grad_norm": 2.507540464401245, "learning_rate": 2.5737177401208386e-06, "loss": 0.4262, "step": 16173 }, { "epoch": 0.6714720885941287, "grad_norm": 2.628453254699707, "learning_rate": 2.573129908893769e-06, "loss": 0.4251, "step": 16174 }, { "epoch": 0.67151360411834, "grad_norm": 2.3356845378875732, "learning_rate": 2.5725421215452086e-06, "loss": 0.4203, "step": 16175 }, { "epoch": 0.6715551196425513, "grad_norm": 2.0804848670959473, "learning_rate": 2.571954378085788e-06, "loss": 0.4594, "step": 16176 }, { "epoch": 0.6715966351667627, "grad_norm": 2.242295026779175, "learning_rate": 2.5713666785261304e-06, "loss": 0.4939, "step": 16177 }, { "epoch": 0.671638150690974, "grad_norm": 2.6651864051818848, "learning_rate": 2.570779022876867e-06, "loss": 0.6066, "step": 16178 }, { "epoch": 0.6716796662151854, "grad_norm": 2.5041286945343018, "learning_rate": 2.5701914111486147e-06, "loss": 0.4948, "step": 16179 }, { "epoch": 0.6717211817393967, "grad_norm": 2.3845269680023193, "learning_rate": 2.569603843352005e-06, "loss": 0.6238, "step": 16180 }, { "epoch": 0.671762697263608, "grad_norm": 2.1018784046173096, "learning_rate": 2.5690163194976576e-06, "loss": 0.4911, "step": 16181 }, { "epoch": 0.6718042127878193, "grad_norm": 2.555898904800415, "learning_rate": 2.568428839596196e-06, "loss": 0.5383, "step": 16182 }, { "epoch": 0.6718457283120307, "grad_norm": 2.7546775341033936, "learning_rate": 2.56784140365824e-06, "loss": 0.5755, "step": 16183 }, { "epoch": 0.671887243836242, "grad_norm": 2.222053289413452, "learning_rate": 2.5672540116944147e-06, "loss": 0.4511, "step": 16184 }, { "epoch": 0.6719287593604534, "grad_norm": 2.0591511726379395, "learning_rate": 2.566666663715337e-06, "loss": 0.5435, "step": 16185 }, { "epoch": 0.6719702748846647, "grad_norm": 2.675248384475708, "learning_rate": 2.566079359731628e-06, "loss": 0.5999, "step": 16186 }, { "epoch": 0.672011790408876, "grad_norm": 2.840911388397217, "learning_rate": 2.5654920997539056e-06, "loss": 0.4836, "step": 16187 }, { "epoch": 0.6720533059330873, "grad_norm": 2.0761752128601074, "learning_rate": 2.564904883792786e-06, "loss": 0.4918, "step": 16188 }, { "epoch": 0.6720948214572987, "grad_norm": 2.1250486373901367, "learning_rate": 2.5643177118588903e-06, "loss": 0.5335, "step": 16189 }, { "epoch": 0.67213633698151, "grad_norm": 2.503230333328247, "learning_rate": 2.5637305839628298e-06, "loss": 0.3861, "step": 16190 }, { "epoch": 0.6721778525057214, "grad_norm": 2.306248426437378, "learning_rate": 2.563143500115226e-06, "loss": 0.583, "step": 16191 }, { "epoch": 0.6722193680299327, "grad_norm": 2.2621023654937744, "learning_rate": 2.5625564603266873e-06, "loss": 0.4585, "step": 16192 }, { "epoch": 0.672260883554144, "grad_norm": 2.23311448097229, "learning_rate": 2.561969464607832e-06, "loss": 0.5138, "step": 16193 }, { "epoch": 0.6723023990783553, "grad_norm": 2.3321847915649414, "learning_rate": 2.561382512969271e-06, "loss": 0.5185, "step": 16194 }, { "epoch": 0.6723439146025667, "grad_norm": 2.7964794635772705, "learning_rate": 2.5607956054216176e-06, "loss": 0.4914, "step": 16195 }, { "epoch": 0.672385430126778, "grad_norm": 2.1512680053710938, "learning_rate": 2.5602087419754807e-06, "loss": 0.5986, "step": 16196 }, { "epoch": 0.6724269456509894, "grad_norm": 2.3044729232788086, "learning_rate": 2.5596219226414743e-06, "loss": 0.4497, "step": 16197 }, { "epoch": 0.6724684611752006, "grad_norm": 2.3353214263916016, "learning_rate": 2.5590351474302077e-06, "loss": 0.5665, "step": 16198 }, { "epoch": 0.672509976699412, "grad_norm": 2.437689781188965, "learning_rate": 2.558448416352289e-06, "loss": 0.7091, "step": 16199 }, { "epoch": 0.6725514922236234, "grad_norm": 2.4123315811157227, "learning_rate": 2.557861729418326e-06, "loss": 0.4358, "step": 16200 }, { "epoch": 0.6725930077478347, "grad_norm": 2.1482250690460205, "learning_rate": 2.5572750866389255e-06, "loss": 0.4877, "step": 16201 }, { "epoch": 0.6726345232720461, "grad_norm": 2.5827574729919434, "learning_rate": 2.556688488024698e-06, "loss": 0.4319, "step": 16202 }, { "epoch": 0.6726760387962574, "grad_norm": 2.1968507766723633, "learning_rate": 2.5561019335862435e-06, "loss": 0.4531, "step": 16203 }, { "epoch": 0.6727175543204688, "grad_norm": 2.305429458618164, "learning_rate": 2.5555154233341757e-06, "loss": 0.6058, "step": 16204 }, { "epoch": 0.67275906984468, "grad_norm": 1.9439741373062134, "learning_rate": 2.554928957279089e-06, "loss": 0.4958, "step": 16205 }, { "epoch": 0.6728005853688914, "grad_norm": 2.4877970218658447, "learning_rate": 2.554342535431594e-06, "loss": 0.4804, "step": 16206 }, { "epoch": 0.6728421008931027, "grad_norm": 2.5997581481933594, "learning_rate": 2.5537561578022902e-06, "loss": 0.6264, "step": 16207 }, { "epoch": 0.6728836164173141, "grad_norm": 2.27474045753479, "learning_rate": 2.55316982440178e-06, "loss": 0.4986, "step": 16208 }, { "epoch": 0.6729251319415254, "grad_norm": 2.5681352615356445, "learning_rate": 2.5525835352406637e-06, "loss": 0.5582, "step": 16209 }, { "epoch": 0.6729666474657368, "grad_norm": 2.330595016479492, "learning_rate": 2.5519972903295437e-06, "loss": 0.5454, "step": 16210 }, { "epoch": 0.673008162989948, "grad_norm": 2.3573708534240723, "learning_rate": 2.5514110896790186e-06, "loss": 0.5277, "step": 16211 }, { "epoch": 0.6730496785141594, "grad_norm": 1.7615975141525269, "learning_rate": 2.5508249332996865e-06, "loss": 0.315, "step": 16212 }, { "epoch": 0.6730911940383707, "grad_norm": 2.5607120990753174, "learning_rate": 2.5502388212021455e-06, "loss": 0.5508, "step": 16213 }, { "epoch": 0.6731327095625821, "grad_norm": 2.404571294784546, "learning_rate": 2.5496527533969907e-06, "loss": 0.465, "step": 16214 }, { "epoch": 0.6731742250867934, "grad_norm": 2.5772061347961426, "learning_rate": 2.5490667298948227e-06, "loss": 0.6551, "step": 16215 }, { "epoch": 0.6732157406110048, "grad_norm": 1.9800612926483154, "learning_rate": 2.5484807507062324e-06, "loss": 0.5139, "step": 16216 }, { "epoch": 0.673257256135216, "grad_norm": 2.275522232055664, "learning_rate": 2.547894815841821e-06, "loss": 0.5377, "step": 16217 }, { "epoch": 0.6732987716594274, "grad_norm": 2.5105788707733154, "learning_rate": 2.5473089253121742e-06, "loss": 0.5099, "step": 16218 }, { "epoch": 0.6733402871836387, "grad_norm": 2.1306138038635254, "learning_rate": 2.5467230791278906e-06, "loss": 0.3793, "step": 16219 }, { "epoch": 0.6733818027078501, "grad_norm": 2.2540547847747803, "learning_rate": 2.546137277299561e-06, "loss": 0.4846, "step": 16220 }, { "epoch": 0.6734233182320614, "grad_norm": 2.5506296157836914, "learning_rate": 2.545551519837777e-06, "loss": 0.5502, "step": 16221 }, { "epoch": 0.6734648337562728, "grad_norm": 2.3539671897888184, "learning_rate": 2.5449658067531263e-06, "loss": 0.4524, "step": 16222 }, { "epoch": 0.673506349280484, "grad_norm": 2.7483744621276855, "learning_rate": 2.544380138056204e-06, "loss": 0.629, "step": 16223 }, { "epoch": 0.6735478648046954, "grad_norm": 2.8438539505004883, "learning_rate": 2.543794513757596e-06, "loss": 0.7559, "step": 16224 }, { "epoch": 0.6735893803289067, "grad_norm": 2.455181360244751, "learning_rate": 2.5432089338678913e-06, "loss": 0.4781, "step": 16225 }, { "epoch": 0.6736308958531181, "grad_norm": 2.714813232421875, "learning_rate": 2.5426233983976777e-06, "loss": 0.6032, "step": 16226 }, { "epoch": 0.6736724113773294, "grad_norm": 2.413191556930542, "learning_rate": 2.542037907357539e-06, "loss": 0.514, "step": 16227 }, { "epoch": 0.6737139269015407, "grad_norm": 2.067732810974121, "learning_rate": 2.5414524607580653e-06, "loss": 0.4806, "step": 16228 }, { "epoch": 0.673755442425752, "grad_norm": 2.4242892265319824, "learning_rate": 2.540867058609838e-06, "loss": 0.3916, "step": 16229 }, { "epoch": 0.6737969579499634, "grad_norm": 2.3619041442871094, "learning_rate": 2.540281700923447e-06, "loss": 0.4416, "step": 16230 }, { "epoch": 0.6738384734741748, "grad_norm": 2.1952641010284424, "learning_rate": 2.5396963877094677e-06, "loss": 0.523, "step": 16231 }, { "epoch": 0.6738799889983861, "grad_norm": 2.014652967453003, "learning_rate": 2.539111118978489e-06, "loss": 0.3929, "step": 16232 }, { "epoch": 0.6739215045225975, "grad_norm": 2.2991716861724854, "learning_rate": 2.5385258947410908e-06, "loss": 0.536, "step": 16233 }, { "epoch": 0.6739630200468087, "grad_norm": 2.3639466762542725, "learning_rate": 2.537940715007853e-06, "loss": 0.5113, "step": 16234 }, { "epoch": 0.6740045355710201, "grad_norm": 2.749722719192505, "learning_rate": 2.5373555797893555e-06, "loss": 0.5679, "step": 16235 }, { "epoch": 0.6740460510952314, "grad_norm": 2.46720552444458, "learning_rate": 2.536770489096181e-06, "loss": 0.5219, "step": 16236 }, { "epoch": 0.6740875666194428, "grad_norm": 2.4098691940307617, "learning_rate": 2.5361854429389054e-06, "loss": 0.5096, "step": 16237 }, { "epoch": 0.6741290821436541, "grad_norm": 2.073782205581665, "learning_rate": 2.5356004413281077e-06, "loss": 0.4886, "step": 16238 }, { "epoch": 0.6741705976678655, "grad_norm": 2.2144935131073, "learning_rate": 2.5350154842743643e-06, "loss": 0.4677, "step": 16239 }, { "epoch": 0.6742121131920767, "grad_norm": 2.2354893684387207, "learning_rate": 2.5344305717882487e-06, "loss": 0.378, "step": 16240 }, { "epoch": 0.6742536287162881, "grad_norm": 2.9932186603546143, "learning_rate": 2.533845703880342e-06, "loss": 0.4737, "step": 16241 }, { "epoch": 0.6742951442404994, "grad_norm": 3.0604796409606934, "learning_rate": 2.5332608805612135e-06, "loss": 0.5013, "step": 16242 }, { "epoch": 0.6743366597647108, "grad_norm": 2.908545732498169, "learning_rate": 2.532676101841443e-06, "loss": 0.607, "step": 16243 }, { "epoch": 0.6743781752889221, "grad_norm": 2.113577127456665, "learning_rate": 2.5320913677315963e-06, "loss": 0.5829, "step": 16244 }, { "epoch": 0.6744196908131335, "grad_norm": 2.1855037212371826, "learning_rate": 2.5315066782422503e-06, "loss": 0.5088, "step": 16245 }, { "epoch": 0.6744612063373447, "grad_norm": 2.09891939163208, "learning_rate": 2.5309220333839757e-06, "loss": 0.4778, "step": 16246 }, { "epoch": 0.6745027218615561, "grad_norm": 2.257885217666626, "learning_rate": 2.5303374331673414e-06, "loss": 0.4, "step": 16247 }, { "epoch": 0.6745442373857674, "grad_norm": 2.2865042686462402, "learning_rate": 2.5297528776029167e-06, "loss": 0.3699, "step": 16248 }, { "epoch": 0.6745857529099788, "grad_norm": 2.400009870529175, "learning_rate": 2.5291683667012743e-06, "loss": 0.581, "step": 16249 }, { "epoch": 0.6746272684341901, "grad_norm": 2.5192456245422363, "learning_rate": 2.5285839004729797e-06, "loss": 0.445, "step": 16250 }, { "epoch": 0.6746687839584015, "grad_norm": 2.5102813243865967, "learning_rate": 2.527999478928601e-06, "loss": 0.5483, "step": 16251 }, { "epoch": 0.6747102994826127, "grad_norm": 2.5641188621520996, "learning_rate": 2.5274151020787034e-06, "loss": 0.4802, "step": 16252 }, { "epoch": 0.6747518150068241, "grad_norm": 1.8721102476119995, "learning_rate": 2.5268307699338524e-06, "loss": 0.3445, "step": 16253 }, { "epoch": 0.6747933305310354, "grad_norm": 2.7473435401916504, "learning_rate": 2.526246482504615e-06, "loss": 0.606, "step": 16254 }, { "epoch": 0.6748348460552468, "grad_norm": 2.5051300525665283, "learning_rate": 2.525662239801553e-06, "loss": 0.6153, "step": 16255 }, { "epoch": 0.6748763615794581, "grad_norm": 2.755582809448242, "learning_rate": 2.525078041835234e-06, "loss": 0.5377, "step": 16256 }, { "epoch": 0.6749178771036695, "grad_norm": 2.30192232131958, "learning_rate": 2.524493888616214e-06, "loss": 0.6572, "step": 16257 }, { "epoch": 0.6749593926278807, "grad_norm": 2.519747495651245, "learning_rate": 2.5239097801550595e-06, "loss": 0.4985, "step": 16258 }, { "epoch": 0.6750009081520921, "grad_norm": 2.4135348796844482, "learning_rate": 2.5233257164623302e-06, "loss": 0.459, "step": 16259 }, { "epoch": 0.6750424236763034, "grad_norm": 2.315802812576294, "learning_rate": 2.5227416975485864e-06, "loss": 0.4408, "step": 16260 }, { "epoch": 0.6750839392005148, "grad_norm": 2.229750156402588, "learning_rate": 2.5221577234243853e-06, "loss": 0.5424, "step": 16261 }, { "epoch": 0.6751254547247262, "grad_norm": 2.3065478801727295, "learning_rate": 2.521573794100286e-06, "loss": 0.437, "step": 16262 }, { "epoch": 0.6751669702489375, "grad_norm": 1.9807167053222656, "learning_rate": 2.5209899095868484e-06, "loss": 0.4729, "step": 16263 }, { "epoch": 0.6752084857731488, "grad_norm": 2.2461845874786377, "learning_rate": 2.5204060698946277e-06, "loss": 0.5544, "step": 16264 }, { "epoch": 0.6752500012973601, "grad_norm": 2.5946221351623535, "learning_rate": 2.5198222750341803e-06, "loss": 0.4945, "step": 16265 }, { "epoch": 0.6752915168215715, "grad_norm": 2.4095938205718994, "learning_rate": 2.5192385250160587e-06, "loss": 0.5415, "step": 16266 }, { "epoch": 0.6753330323457828, "grad_norm": 2.2449800968170166, "learning_rate": 2.5186548198508215e-06, "loss": 0.5327, "step": 16267 }, { "epoch": 0.6753745478699942, "grad_norm": 2.2030394077301025, "learning_rate": 2.5180711595490186e-06, "loss": 0.5799, "step": 16268 }, { "epoch": 0.6754160633942055, "grad_norm": 2.3495514392852783, "learning_rate": 2.517487544121209e-06, "loss": 0.6036, "step": 16269 }, { "epoch": 0.6754575789184168, "grad_norm": 2.3346965312957764, "learning_rate": 2.5169039735779356e-06, "loss": 0.4639, "step": 16270 }, { "epoch": 0.6754990944426281, "grad_norm": 2.950866937637329, "learning_rate": 2.5163204479297556e-06, "loss": 0.4639, "step": 16271 }, { "epoch": 0.6755406099668395, "grad_norm": 2.6378908157348633, "learning_rate": 2.515736967187218e-06, "loss": 0.5266, "step": 16272 }, { "epoch": 0.6755821254910508, "grad_norm": 3.3139984607696533, "learning_rate": 2.515153531360872e-06, "loss": 0.5105, "step": 16273 }, { "epoch": 0.6756236410152622, "grad_norm": 2.294088363647461, "learning_rate": 2.514570140461267e-06, "loss": 0.5976, "step": 16274 }, { "epoch": 0.6756651565394735, "grad_norm": 2.6364903450012207, "learning_rate": 2.5139867944989483e-06, "loss": 0.4706, "step": 16275 }, { "epoch": 0.6757066720636848, "grad_norm": 2.4101412296295166, "learning_rate": 2.5134034934844666e-06, "loss": 0.4098, "step": 16276 }, { "epoch": 0.6757481875878961, "grad_norm": 2.424098491668701, "learning_rate": 2.512820237428366e-06, "loss": 0.498, "step": 16277 }, { "epoch": 0.6757897031121075, "grad_norm": 2.2111406326293945, "learning_rate": 2.5122370263411926e-06, "loss": 0.5527, "step": 16278 }, { "epoch": 0.6758312186363188, "grad_norm": 2.5322041511535645, "learning_rate": 2.5116538602334897e-06, "loss": 0.4177, "step": 16279 }, { "epoch": 0.6758727341605302, "grad_norm": 1.9873026609420776, "learning_rate": 2.5110707391158028e-06, "loss": 0.5042, "step": 16280 }, { "epoch": 0.6759142496847415, "grad_norm": 2.276942014694214, "learning_rate": 2.510487662998675e-06, "loss": 0.479, "step": 16281 }, { "epoch": 0.6759557652089528, "grad_norm": 2.4878578186035156, "learning_rate": 2.509904631892648e-06, "loss": 0.454, "step": 16282 }, { "epoch": 0.6759972807331641, "grad_norm": 2.162309408187866, "learning_rate": 2.5093216458082603e-06, "loss": 0.5919, "step": 16283 }, { "epoch": 0.6760387962573755, "grad_norm": 2.6360511779785156, "learning_rate": 2.508738704756058e-06, "loss": 0.5483, "step": 16284 }, { "epoch": 0.6760803117815868, "grad_norm": 2.679293394088745, "learning_rate": 2.5081558087465776e-06, "loss": 0.4787, "step": 16285 }, { "epoch": 0.6761218273057982, "grad_norm": 2.5770111083984375, "learning_rate": 2.5075729577903584e-06, "loss": 0.7598, "step": 16286 }, { "epoch": 0.6761633428300095, "grad_norm": 2.0338001251220703, "learning_rate": 2.506990151897938e-06, "loss": 0.4455, "step": 16287 }, { "epoch": 0.6762048583542208, "grad_norm": 2.2129592895507812, "learning_rate": 2.5064073910798533e-06, "loss": 0.6195, "step": 16288 }, { "epoch": 0.6762463738784321, "grad_norm": 2.054441213607788, "learning_rate": 2.505824675346643e-06, "loss": 0.5123, "step": 16289 }, { "epoch": 0.6762878894026435, "grad_norm": 2.6230690479278564, "learning_rate": 2.5052420047088417e-06, "loss": 0.6131, "step": 16290 }, { "epoch": 0.6763294049268548, "grad_norm": 2.1817526817321777, "learning_rate": 2.5046593791769834e-06, "loss": 0.4714, "step": 16291 }, { "epoch": 0.6763709204510662, "grad_norm": 2.285048246383667, "learning_rate": 2.5040767987616023e-06, "loss": 0.5464, "step": 16292 }, { "epoch": 0.6764124359752776, "grad_norm": 2.2262299060821533, "learning_rate": 2.503494263473233e-06, "loss": 0.5418, "step": 16293 }, { "epoch": 0.6764539514994888, "grad_norm": 3.0002572536468506, "learning_rate": 2.502911773322406e-06, "loss": 0.6687, "step": 16294 }, { "epoch": 0.6764954670237002, "grad_norm": 2.4708313941955566, "learning_rate": 2.5023293283196552e-06, "loss": 0.5183, "step": 16295 }, { "epoch": 0.6765369825479115, "grad_norm": 2.1517486572265625, "learning_rate": 2.501746928475508e-06, "loss": 0.6712, "step": 16296 }, { "epoch": 0.6765784980721229, "grad_norm": 2.3165299892425537, "learning_rate": 2.5011645738004987e-06, "loss": 0.6537, "step": 16297 }, { "epoch": 0.6766200135963342, "grad_norm": 2.7522289752960205, "learning_rate": 2.5005822643051536e-06, "loss": 0.5257, "step": 16298 }, { "epoch": 0.6766615291205456, "grad_norm": 2.2017123699188232, "learning_rate": 2.5000000000000015e-06, "loss": 0.5769, "step": 16299 }, { "epoch": 0.6767030446447568, "grad_norm": 2.3658385276794434, "learning_rate": 2.49941778089557e-06, "loss": 0.5245, "step": 16300 }, { "epoch": 0.6767445601689682, "grad_norm": 2.0341796875, "learning_rate": 2.4988356070023838e-06, "loss": 0.4095, "step": 16301 }, { "epoch": 0.6767860756931795, "grad_norm": 2.5085604190826416, "learning_rate": 2.4982534783309725e-06, "loss": 0.6589, "step": 16302 }, { "epoch": 0.6768275912173909, "grad_norm": 2.5750033855438232, "learning_rate": 2.4976713948918597e-06, "loss": 0.5296, "step": 16303 }, { "epoch": 0.6768691067416022, "grad_norm": 1.9928529262542725, "learning_rate": 2.4970893566955696e-06, "loss": 0.4572, "step": 16304 }, { "epoch": 0.6769106222658136, "grad_norm": 2.2952394485473633, "learning_rate": 2.4965073637526225e-06, "loss": 0.5166, "step": 16305 }, { "epoch": 0.6769521377900248, "grad_norm": 2.2080187797546387, "learning_rate": 2.495925416073547e-06, "loss": 0.5889, "step": 16306 }, { "epoch": 0.6769936533142362, "grad_norm": 2.497560501098633, "learning_rate": 2.4953435136688607e-06, "loss": 0.504, "step": 16307 }, { "epoch": 0.6770351688384475, "grad_norm": 2.220459461212158, "learning_rate": 2.494761656549086e-06, "loss": 0.5246, "step": 16308 }, { "epoch": 0.6770766843626589, "grad_norm": 2.4440503120422363, "learning_rate": 2.4941798447247412e-06, "loss": 0.5434, "step": 16309 }, { "epoch": 0.6771181998868702, "grad_norm": 2.6158487796783447, "learning_rate": 2.4935980782063484e-06, "loss": 0.6139, "step": 16310 }, { "epoch": 0.6771597154110816, "grad_norm": 2.3419690132141113, "learning_rate": 2.4930163570044245e-06, "loss": 0.4939, "step": 16311 }, { "epoch": 0.6772012309352928, "grad_norm": 2.3587770462036133, "learning_rate": 2.492434681129488e-06, "loss": 0.6685, "step": 16312 }, { "epoch": 0.6772427464595042, "grad_norm": 2.2891628742218018, "learning_rate": 2.4918530505920552e-06, "loss": 0.4033, "step": 16313 }, { "epoch": 0.6772842619837155, "grad_norm": 2.125488042831421, "learning_rate": 2.49127146540264e-06, "loss": 0.3993, "step": 16314 }, { "epoch": 0.6773257775079269, "grad_norm": 1.859583854675293, "learning_rate": 2.490689925571762e-06, "loss": 0.4218, "step": 16315 }, { "epoch": 0.6773672930321382, "grad_norm": 2.7350549697875977, "learning_rate": 2.4901084311099333e-06, "loss": 0.4503, "step": 16316 }, { "epoch": 0.6774088085563496, "grad_norm": 2.0967326164245605, "learning_rate": 2.4895269820276674e-06, "loss": 0.5029, "step": 16317 }, { "epoch": 0.6774503240805608, "grad_norm": 3.092761516571045, "learning_rate": 2.4889455783354754e-06, "loss": 0.6538, "step": 16318 }, { "epoch": 0.6774918396047722, "grad_norm": 3.2535202503204346, "learning_rate": 2.488364220043873e-06, "loss": 0.5805, "step": 16319 }, { "epoch": 0.6775333551289835, "grad_norm": 2.006532669067383, "learning_rate": 2.48778290716337e-06, "loss": 0.4718, "step": 16320 }, { "epoch": 0.6775748706531949, "grad_norm": 2.4743146896362305, "learning_rate": 2.487201639704475e-06, "loss": 0.4606, "step": 16321 }, { "epoch": 0.6776163861774062, "grad_norm": 2.1859259605407715, "learning_rate": 2.4866204176776974e-06, "loss": 0.4396, "step": 16322 }, { "epoch": 0.6776579017016175, "grad_norm": 2.574308395385742, "learning_rate": 2.4860392410935492e-06, "loss": 0.507, "step": 16323 }, { "epoch": 0.6776994172258289, "grad_norm": 2.7069311141967773, "learning_rate": 2.4854581099625358e-06, "loss": 0.5818, "step": 16324 }, { "epoch": 0.6777409327500402, "grad_norm": 2.307023048400879, "learning_rate": 2.484877024295165e-06, "loss": 0.535, "step": 16325 }, { "epoch": 0.6777824482742516, "grad_norm": 2.942568778991699, "learning_rate": 2.484295984101942e-06, "loss": 0.4867, "step": 16326 }, { "epoch": 0.6778239637984629, "grad_norm": 2.6543691158294678, "learning_rate": 2.483714989393371e-06, "loss": 0.5722, "step": 16327 }, { "epoch": 0.6778654793226743, "grad_norm": 2.773599624633789, "learning_rate": 2.4831340401799604e-06, "loss": 0.6159, "step": 16328 }, { "epoch": 0.6779069948468855, "grad_norm": 2.5580427646636963, "learning_rate": 2.482553136472211e-06, "loss": 0.4879, "step": 16329 }, { "epoch": 0.6779485103710969, "grad_norm": 1.8371304273605347, "learning_rate": 2.4819722782806272e-06, "loss": 0.501, "step": 16330 }, { "epoch": 0.6779900258953082, "grad_norm": 2.1769747734069824, "learning_rate": 2.4813914656157086e-06, "loss": 0.5452, "step": 16331 }, { "epoch": 0.6780315414195196, "grad_norm": 2.490601062774658, "learning_rate": 2.4808106984879597e-06, "loss": 0.5817, "step": 16332 }, { "epoch": 0.6780730569437309, "grad_norm": 2.1794278621673584, "learning_rate": 2.48022997690788e-06, "loss": 0.4721, "step": 16333 }, { "epoch": 0.6781145724679423, "grad_norm": 2.6496124267578125, "learning_rate": 2.4796493008859677e-06, "loss": 0.5239, "step": 16334 }, { "epoch": 0.6781560879921535, "grad_norm": 2.6322193145751953, "learning_rate": 2.479068670432721e-06, "loss": 0.5643, "step": 16335 }, { "epoch": 0.6781976035163649, "grad_norm": 3.0406017303466797, "learning_rate": 2.4784880855586415e-06, "loss": 0.4539, "step": 16336 }, { "epoch": 0.6782391190405762, "grad_norm": 2.5288848876953125, "learning_rate": 2.477907546274224e-06, "loss": 0.5406, "step": 16337 }, { "epoch": 0.6782806345647876, "grad_norm": 2.120537757873535, "learning_rate": 2.477327052589965e-06, "loss": 0.4997, "step": 16338 }, { "epoch": 0.6783221500889989, "grad_norm": 2.54724383354187, "learning_rate": 2.47674660451636e-06, "loss": 0.6741, "step": 16339 }, { "epoch": 0.6783636656132103, "grad_norm": 2.2887017726898193, "learning_rate": 2.476166202063902e-06, "loss": 0.5808, "step": 16340 }, { "epoch": 0.6784051811374215, "grad_norm": 3.1176400184631348, "learning_rate": 2.475585845243088e-06, "loss": 0.6399, "step": 16341 }, { "epoch": 0.6784466966616329, "grad_norm": 2.5982258319854736, "learning_rate": 2.47500553406441e-06, "loss": 0.5849, "step": 16342 }, { "epoch": 0.6784882121858442, "grad_norm": 1.9018971920013428, "learning_rate": 2.4744252685383595e-06, "loss": 0.4639, "step": 16343 }, { "epoch": 0.6785297277100556, "grad_norm": 1.9618504047393799, "learning_rate": 2.473845048675426e-06, "loss": 0.4274, "step": 16344 }, { "epoch": 0.6785712432342669, "grad_norm": 2.3125855922698975, "learning_rate": 2.473264874486104e-06, "loss": 0.4444, "step": 16345 }, { "epoch": 0.6786127587584783, "grad_norm": 1.9832653999328613, "learning_rate": 2.4726847459808825e-06, "loss": 0.5792, "step": 16346 }, { "epoch": 0.6786542742826895, "grad_norm": 2.4066829681396484, "learning_rate": 2.4721046631702478e-06, "loss": 0.4617, "step": 16347 }, { "epoch": 0.6786957898069009, "grad_norm": 2.151026725769043, "learning_rate": 2.471524626064688e-06, "loss": 0.5055, "step": 16348 }, { "epoch": 0.6787373053311122, "grad_norm": 2.1188769340515137, "learning_rate": 2.470944634674694e-06, "loss": 0.5395, "step": 16349 }, { "epoch": 0.6787788208553236, "grad_norm": 3.107562303543091, "learning_rate": 2.4703646890107495e-06, "loss": 0.5487, "step": 16350 }, { "epoch": 0.6788203363795349, "grad_norm": 2.586385488510132, "learning_rate": 2.469784789083341e-06, "loss": 0.5158, "step": 16351 }, { "epoch": 0.6788618519037463, "grad_norm": 2.638899326324463, "learning_rate": 2.4692049349029523e-06, "loss": 0.4468, "step": 16352 }, { "epoch": 0.6789033674279575, "grad_norm": 2.4870800971984863, "learning_rate": 2.468625126480066e-06, "loss": 0.5698, "step": 16353 }, { "epoch": 0.6789448829521689, "grad_norm": 2.4047188758850098, "learning_rate": 2.4680453638251687e-06, "loss": 0.6673, "step": 16354 }, { "epoch": 0.6789863984763803, "grad_norm": 2.6749720573425293, "learning_rate": 2.467465646948742e-06, "loss": 0.6193, "step": 16355 }, { "epoch": 0.6790279140005916, "grad_norm": 2.1119208335876465, "learning_rate": 2.4668859758612657e-06, "loss": 0.4186, "step": 16356 }, { "epoch": 0.679069429524803, "grad_norm": 2.2802276611328125, "learning_rate": 2.466306350573219e-06, "loss": 0.5034, "step": 16357 }, { "epoch": 0.6791109450490143, "grad_norm": 2.1194815635681152, "learning_rate": 2.465726771095086e-06, "loss": 0.594, "step": 16358 }, { "epoch": 0.6791524605732256, "grad_norm": 2.476881742477417, "learning_rate": 2.465147237437343e-06, "loss": 0.5132, "step": 16359 }, { "epoch": 0.6791939760974369, "grad_norm": 2.127145767211914, "learning_rate": 2.4645677496104692e-06, "loss": 0.4792, "step": 16360 }, { "epoch": 0.6792354916216483, "grad_norm": 2.1843438148498535, "learning_rate": 2.463988307624939e-06, "loss": 0.5712, "step": 16361 }, { "epoch": 0.6792770071458596, "grad_norm": 2.4125242233276367, "learning_rate": 2.4634089114912328e-06, "loss": 0.4502, "step": 16362 }, { "epoch": 0.679318522670071, "grad_norm": 2.7934186458587646, "learning_rate": 2.462829561219825e-06, "loss": 0.5549, "step": 16363 }, { "epoch": 0.6793600381942823, "grad_norm": 2.3589892387390137, "learning_rate": 2.4622502568211896e-06, "loss": 0.5426, "step": 16364 }, { "epoch": 0.6794015537184936, "grad_norm": 2.5002264976501465, "learning_rate": 2.461670998305802e-06, "loss": 0.5261, "step": 16365 }, { "epoch": 0.6794430692427049, "grad_norm": 2.3548028469085693, "learning_rate": 2.4610917856841313e-06, "loss": 0.4926, "step": 16366 }, { "epoch": 0.6794845847669163, "grad_norm": 2.551901340484619, "learning_rate": 2.4605126189666554e-06, "loss": 0.4963, "step": 16367 }, { "epoch": 0.6795261002911276, "grad_norm": 2.635467052459717, "learning_rate": 2.4599334981638434e-06, "loss": 0.5776, "step": 16368 }, { "epoch": 0.679567615815339, "grad_norm": 2.5850160121917725, "learning_rate": 2.459354423286166e-06, "loss": 0.5271, "step": 16369 }, { "epoch": 0.6796091313395503, "grad_norm": 2.284071922302246, "learning_rate": 2.4587753943440906e-06, "loss": 0.534, "step": 16370 }, { "epoch": 0.6796506468637616, "grad_norm": 2.1313540935516357, "learning_rate": 2.4581964113480906e-06, "loss": 0.547, "step": 16371 }, { "epoch": 0.6796921623879729, "grad_norm": 2.3040122985839844, "learning_rate": 2.4576174743086318e-06, "loss": 0.5793, "step": 16372 }, { "epoch": 0.6797336779121843, "grad_norm": 2.404435396194458, "learning_rate": 2.4570385832361823e-06, "loss": 0.5312, "step": 16373 }, { "epoch": 0.6797751934363956, "grad_norm": 2.131154775619507, "learning_rate": 2.456459738141206e-06, "loss": 0.6107, "step": 16374 }, { "epoch": 0.679816708960607, "grad_norm": 2.753937244415283, "learning_rate": 2.455880939034173e-06, "loss": 0.6375, "step": 16375 }, { "epoch": 0.6798582244848183, "grad_norm": 3.0321109294891357, "learning_rate": 2.4553021859255455e-06, "loss": 0.4543, "step": 16376 }, { "epoch": 0.6798997400090296, "grad_norm": 2.2661142349243164, "learning_rate": 2.4547234788257883e-06, "loss": 0.393, "step": 16377 }, { "epoch": 0.6799412555332409, "grad_norm": 2.134587526321411, "learning_rate": 2.454144817745364e-06, "loss": 0.3666, "step": 16378 }, { "epoch": 0.6799827710574523, "grad_norm": 1.991827368736267, "learning_rate": 2.453566202694733e-06, "loss": 0.4595, "step": 16379 }, { "epoch": 0.6800242865816636, "grad_norm": 2.212941884994507, "learning_rate": 2.4529876336843615e-06, "loss": 0.5175, "step": 16380 }, { "epoch": 0.680065802105875, "grad_norm": 2.5579476356506348, "learning_rate": 2.4524091107247077e-06, "loss": 0.4489, "step": 16381 }, { "epoch": 0.6801073176300862, "grad_norm": 2.2653768062591553, "learning_rate": 2.451830633826231e-06, "loss": 0.5238, "step": 16382 }, { "epoch": 0.6801488331542976, "grad_norm": 2.2118606567382812, "learning_rate": 2.451252202999389e-06, "loss": 0.4907, "step": 16383 }, { "epoch": 0.6801903486785089, "grad_norm": 2.005215883255005, "learning_rate": 2.450673818254644e-06, "loss": 0.4734, "step": 16384 }, { "epoch": 0.6802318642027203, "grad_norm": 2.134669065475464, "learning_rate": 2.450095479602451e-06, "loss": 0.485, "step": 16385 }, { "epoch": 0.6802733797269317, "grad_norm": 2.828554153442383, "learning_rate": 2.4495171870532667e-06, "loss": 0.7277, "step": 16386 }, { "epoch": 0.680314895251143, "grad_norm": 2.2390241622924805, "learning_rate": 2.4489389406175457e-06, "loss": 0.4325, "step": 16387 }, { "epoch": 0.6803564107753544, "grad_norm": 2.074606418609619, "learning_rate": 2.4483607403057446e-06, "loss": 0.5693, "step": 16388 }, { "epoch": 0.6803979262995656, "grad_norm": 2.375819206237793, "learning_rate": 2.4477825861283177e-06, "loss": 0.358, "step": 16389 }, { "epoch": 0.680439441823777, "grad_norm": 2.2077648639678955, "learning_rate": 2.4472044780957164e-06, "loss": 0.5304, "step": 16390 }, { "epoch": 0.6804809573479883, "grad_norm": 2.6087052822113037, "learning_rate": 2.446626416218395e-06, "loss": 0.4861, "step": 16391 }, { "epoch": 0.6805224728721997, "grad_norm": 2.3330559730529785, "learning_rate": 2.446048400506801e-06, "loss": 0.4763, "step": 16392 }, { "epoch": 0.680563988396411, "grad_norm": 2.9106950759887695, "learning_rate": 2.4454704309713907e-06, "loss": 0.5465, "step": 16393 }, { "epoch": 0.6806055039206224, "grad_norm": 2.1650283336639404, "learning_rate": 2.444892507622611e-06, "loss": 0.4927, "step": 16394 }, { "epoch": 0.6806470194448336, "grad_norm": 2.738288402557373, "learning_rate": 2.4443146304709113e-06, "loss": 0.5941, "step": 16395 }, { "epoch": 0.680688534969045, "grad_norm": 2.49021053314209, "learning_rate": 2.4437367995267374e-06, "loss": 0.5343, "step": 16396 }, { "epoch": 0.6807300504932563, "grad_norm": 2.636948347091675, "learning_rate": 2.4431590148005414e-06, "loss": 0.5593, "step": 16397 }, { "epoch": 0.6807715660174677, "grad_norm": 2.337139844894409, "learning_rate": 2.4425812763027672e-06, "loss": 0.6186, "step": 16398 }, { "epoch": 0.680813081541679, "grad_norm": 2.2239878177642822, "learning_rate": 2.4420035840438606e-06, "loss": 0.4335, "step": 16399 }, { "epoch": 0.6808545970658904, "grad_norm": 2.2366702556610107, "learning_rate": 2.441425938034265e-06, "loss": 0.4831, "step": 16400 }, { "epoch": 0.6808961125901016, "grad_norm": 2.3608710765838623, "learning_rate": 2.440848338284427e-06, "loss": 0.5113, "step": 16401 }, { "epoch": 0.680937628114313, "grad_norm": 2.4453327655792236, "learning_rate": 2.440270784804789e-06, "loss": 0.6531, "step": 16402 }, { "epoch": 0.6809791436385243, "grad_norm": 2.094212532043457, "learning_rate": 2.4396932776057935e-06, "loss": 0.5059, "step": 16403 }, { "epoch": 0.6810206591627357, "grad_norm": 2.1675124168395996, "learning_rate": 2.4391158166978813e-06, "loss": 0.4271, "step": 16404 }, { "epoch": 0.681062174686947, "grad_norm": 2.5543017387390137, "learning_rate": 2.4385384020914908e-06, "loss": 0.5061, "step": 16405 }, { "epoch": 0.6811036902111584, "grad_norm": 2.0624523162841797, "learning_rate": 2.4379610337970667e-06, "loss": 0.426, "step": 16406 }, { "epoch": 0.6811452057353696, "grad_norm": 2.5147030353546143, "learning_rate": 2.4373837118250453e-06, "loss": 0.7209, "step": 16407 }, { "epoch": 0.681186721259581, "grad_norm": 2.558981418609619, "learning_rate": 2.436806436185865e-06, "loss": 0.5265, "step": 16408 }, { "epoch": 0.6812282367837923, "grad_norm": 2.329925775527954, "learning_rate": 2.4362292068899618e-06, "loss": 0.4593, "step": 16409 }, { "epoch": 0.6812697523080037, "grad_norm": 2.068929433822632, "learning_rate": 2.4356520239477748e-06, "loss": 0.512, "step": 16410 }, { "epoch": 0.681311267832215, "grad_norm": 2.130680799484253, "learning_rate": 2.435074887369738e-06, "loss": 0.3745, "step": 16411 }, { "epoch": 0.6813527833564264, "grad_norm": 2.4929144382476807, "learning_rate": 2.4344977971662875e-06, "loss": 0.6201, "step": 16412 }, { "epoch": 0.6813942988806376, "grad_norm": 2.1080195903778076, "learning_rate": 2.433920753347854e-06, "loss": 0.4733, "step": 16413 }, { "epoch": 0.681435814404849, "grad_norm": 2.548886775970459, "learning_rate": 2.4333437559248745e-06, "loss": 0.5157, "step": 16414 }, { "epoch": 0.6814773299290603, "grad_norm": 2.392179489135742, "learning_rate": 2.43276680490778e-06, "loss": 0.5256, "step": 16415 }, { "epoch": 0.6815188454532717, "grad_norm": 2.3748843669891357, "learning_rate": 2.4321899003070016e-06, "loss": 0.4062, "step": 16416 }, { "epoch": 0.6815603609774831, "grad_norm": 3.578789234161377, "learning_rate": 2.4316130421329696e-06, "loss": 0.5591, "step": 16417 }, { "epoch": 0.6816018765016943, "grad_norm": 2.4381814002990723, "learning_rate": 2.431036230396113e-06, "loss": 0.6149, "step": 16418 }, { "epoch": 0.6816433920259057, "grad_norm": 2.6402721405029297, "learning_rate": 2.4304594651068626e-06, "loss": 0.633, "step": 16419 }, { "epoch": 0.681684907550117, "grad_norm": 2.106323719024658, "learning_rate": 2.4298827462756462e-06, "loss": 0.4063, "step": 16420 }, { "epoch": 0.6817264230743284, "grad_norm": 2.398350238800049, "learning_rate": 2.4293060739128903e-06, "loss": 0.658, "step": 16421 }, { "epoch": 0.6817679385985397, "grad_norm": 2.1018762588500977, "learning_rate": 2.42872944802902e-06, "loss": 0.5234, "step": 16422 }, { "epoch": 0.6818094541227511, "grad_norm": 2.3177387714385986, "learning_rate": 2.4281528686344645e-06, "loss": 0.4135, "step": 16423 }, { "epoch": 0.6818509696469623, "grad_norm": 2.262789726257324, "learning_rate": 2.4275763357396458e-06, "loss": 0.5407, "step": 16424 }, { "epoch": 0.6818924851711737, "grad_norm": 2.44948410987854, "learning_rate": 2.426999849354989e-06, "loss": 0.5441, "step": 16425 }, { "epoch": 0.681934000695385, "grad_norm": 2.4685709476470947, "learning_rate": 2.4264234094909143e-06, "loss": 0.47, "step": 16426 }, { "epoch": 0.6819755162195964, "grad_norm": 2.5287883281707764, "learning_rate": 2.425847016157848e-06, "loss": 0.509, "step": 16427 }, { "epoch": 0.6820170317438077, "grad_norm": 2.483863353729248, "learning_rate": 2.4252706693662097e-06, "loss": 0.4266, "step": 16428 }, { "epoch": 0.6820585472680191, "grad_norm": 2.415689706802368, "learning_rate": 2.4246943691264197e-06, "loss": 0.4687, "step": 16429 }, { "epoch": 0.6821000627922303, "grad_norm": 2.065629720687866, "learning_rate": 2.4241181154488978e-06, "loss": 0.4849, "step": 16430 }, { "epoch": 0.6821415783164417, "grad_norm": 2.3718183040618896, "learning_rate": 2.4235419083440615e-06, "loss": 0.4904, "step": 16431 }, { "epoch": 0.682183093840653, "grad_norm": 2.4976561069488525, "learning_rate": 2.4229657478223312e-06, "loss": 0.6406, "step": 16432 }, { "epoch": 0.6822246093648644, "grad_norm": 2.36167049407959, "learning_rate": 2.422389633894123e-06, "loss": 0.461, "step": 16433 }, { "epoch": 0.6822661248890757, "grad_norm": 2.8208954334259033, "learning_rate": 2.421813566569854e-06, "loss": 0.6453, "step": 16434 }, { "epoch": 0.6823076404132871, "grad_norm": 2.4851438999176025, "learning_rate": 2.421237545859936e-06, "loss": 0.5723, "step": 16435 }, { "epoch": 0.6823491559374983, "grad_norm": 2.673445224761963, "learning_rate": 2.420661571774789e-06, "loss": 0.6032, "step": 16436 }, { "epoch": 0.6823906714617097, "grad_norm": 2.563621759414673, "learning_rate": 2.420085644324824e-06, "loss": 0.5162, "step": 16437 }, { "epoch": 0.682432186985921, "grad_norm": 2.5902957916259766, "learning_rate": 2.419509763520454e-06, "loss": 0.4858, "step": 16438 }, { "epoch": 0.6824737025101324, "grad_norm": 2.3312127590179443, "learning_rate": 2.4189339293720897e-06, "loss": 0.5301, "step": 16439 }, { "epoch": 0.6825152180343437, "grad_norm": 2.2963643074035645, "learning_rate": 2.4183581418901453e-06, "loss": 0.5584, "step": 16440 }, { "epoch": 0.6825567335585551, "grad_norm": 2.3126327991485596, "learning_rate": 2.4177824010850302e-06, "loss": 0.4146, "step": 16441 }, { "epoch": 0.6825982490827663, "grad_norm": 1.6753755807876587, "learning_rate": 2.4172067069671535e-06, "loss": 0.5867, "step": 16442 }, { "epoch": 0.6826397646069777, "grad_norm": 2.0688154697418213, "learning_rate": 2.416631059546924e-06, "loss": 0.3302, "step": 16443 }, { "epoch": 0.682681280131189, "grad_norm": 2.4017906188964844, "learning_rate": 2.416055458834747e-06, "loss": 0.6082, "step": 16444 }, { "epoch": 0.6827227956554004, "grad_norm": 2.443866729736328, "learning_rate": 2.4154799048410355e-06, "loss": 0.5939, "step": 16445 }, { "epoch": 0.6827643111796117, "grad_norm": 2.4435834884643555, "learning_rate": 2.414904397576191e-06, "loss": 0.4104, "step": 16446 }, { "epoch": 0.6828058267038231, "grad_norm": 2.5332133769989014, "learning_rate": 2.4143289370506205e-06, "loss": 0.4411, "step": 16447 }, { "epoch": 0.6828473422280344, "grad_norm": 2.453464984893799, "learning_rate": 2.413753523274726e-06, "loss": 0.5533, "step": 16448 }, { "epoch": 0.6828888577522457, "grad_norm": 2.416410446166992, "learning_rate": 2.413178156258915e-06, "loss": 0.5025, "step": 16449 }, { "epoch": 0.6829303732764571, "grad_norm": 1.926421880722046, "learning_rate": 2.412602836013589e-06, "loss": 0.4126, "step": 16450 }, { "epoch": 0.6829718888006684, "grad_norm": 2.2545855045318604, "learning_rate": 2.412027562549149e-06, "loss": 0.5997, "step": 16451 }, { "epoch": 0.6830134043248798, "grad_norm": 2.554482936859131, "learning_rate": 2.4114523358759952e-06, "loss": 0.4829, "step": 16452 }, { "epoch": 0.6830549198490911, "grad_norm": 2.196152687072754, "learning_rate": 2.4108771560045314e-06, "loss": 0.5018, "step": 16453 }, { "epoch": 0.6830964353733024, "grad_norm": 2.2580833435058594, "learning_rate": 2.410302022945154e-06, "loss": 0.4028, "step": 16454 }, { "epoch": 0.6831379508975137, "grad_norm": 2.7389023303985596, "learning_rate": 2.409726936708263e-06, "loss": 0.6829, "step": 16455 }, { "epoch": 0.6831794664217251, "grad_norm": 2.897003650665283, "learning_rate": 2.4091518973042556e-06, "loss": 0.5403, "step": 16456 }, { "epoch": 0.6832209819459364, "grad_norm": 2.5141313076019287, "learning_rate": 2.4085769047435266e-06, "loss": 0.3988, "step": 16457 }, { "epoch": 0.6832624974701478, "grad_norm": 2.389373540878296, "learning_rate": 2.4080019590364754e-06, "loss": 0.5408, "step": 16458 }, { "epoch": 0.6833040129943591, "grad_norm": 2.3126473426818848, "learning_rate": 2.407427060193497e-06, "loss": 0.6434, "step": 16459 }, { "epoch": 0.6833455285185704, "grad_norm": 2.3382680416107178, "learning_rate": 2.406852208224984e-06, "loss": 0.4945, "step": 16460 }, { "epoch": 0.6833870440427817, "grad_norm": 2.531191349029541, "learning_rate": 2.4062774031413287e-06, "loss": 0.5095, "step": 16461 }, { "epoch": 0.6834285595669931, "grad_norm": 2.278719186782837, "learning_rate": 2.4057026449529277e-06, "loss": 0.5569, "step": 16462 }, { "epoch": 0.6834700750912044, "grad_norm": 2.2923107147216797, "learning_rate": 2.40512793367017e-06, "loss": 0.5465, "step": 16463 }, { "epoch": 0.6835115906154158, "grad_norm": 1.9998137950897217, "learning_rate": 2.404553269303448e-06, "loss": 0.4701, "step": 16464 }, { "epoch": 0.683553106139627, "grad_norm": 2.2541379928588867, "learning_rate": 2.4039786518631485e-06, "loss": 0.5577, "step": 16465 }, { "epoch": 0.6835946216638384, "grad_norm": 2.494630813598633, "learning_rate": 2.4034040813596655e-06, "loss": 0.4798, "step": 16466 }, { "epoch": 0.6836361371880497, "grad_norm": 2.29902720451355, "learning_rate": 2.4028295578033846e-06, "loss": 0.6789, "step": 16467 }, { "epoch": 0.6836776527122611, "grad_norm": 2.168100595474243, "learning_rate": 2.4022550812046943e-06, "loss": 0.4864, "step": 16468 }, { "epoch": 0.6837191682364724, "grad_norm": 2.1876096725463867, "learning_rate": 2.4016806515739805e-06, "loss": 0.4896, "step": 16469 }, { "epoch": 0.6837606837606838, "grad_norm": 2.2921392917633057, "learning_rate": 2.401106268921628e-06, "loss": 0.6294, "step": 16470 }, { "epoch": 0.683802199284895, "grad_norm": 2.225879192352295, "learning_rate": 2.4005319332580247e-06, "loss": 0.4449, "step": 16471 }, { "epoch": 0.6838437148091064, "grad_norm": 2.2801010608673096, "learning_rate": 2.3999576445935537e-06, "loss": 0.6021, "step": 16472 }, { "epoch": 0.6838852303333177, "grad_norm": 2.499805212020874, "learning_rate": 2.3993834029385976e-06, "loss": 0.7279, "step": 16473 }, { "epoch": 0.6839267458575291, "grad_norm": 2.267580509185791, "learning_rate": 2.3988092083035374e-06, "loss": 0.5838, "step": 16474 }, { "epoch": 0.6839682613817404, "grad_norm": 1.9574271440505981, "learning_rate": 2.398235060698759e-06, "loss": 0.4156, "step": 16475 }, { "epoch": 0.6840097769059518, "grad_norm": 2.223743200302124, "learning_rate": 2.3976609601346395e-06, "loss": 0.4512, "step": 16476 }, { "epoch": 0.684051292430163, "grad_norm": 2.489137649536133, "learning_rate": 2.39708690662156e-06, "loss": 0.4789, "step": 16477 }, { "epoch": 0.6840928079543744, "grad_norm": 2.402536392211914, "learning_rate": 2.3965129001698983e-06, "loss": 0.6464, "step": 16478 }, { "epoch": 0.6841343234785858, "grad_norm": 2.088294506072998, "learning_rate": 2.3959389407900353e-06, "loss": 0.3736, "step": 16479 }, { "epoch": 0.6841758390027971, "grad_norm": 2.290769100189209, "learning_rate": 2.395365028492347e-06, "loss": 0.4716, "step": 16480 }, { "epoch": 0.6842173545270085, "grad_norm": 3.363093376159668, "learning_rate": 2.3947911632872096e-06, "loss": 0.717, "step": 16481 }, { "epoch": 0.6842588700512198, "grad_norm": 2.213801383972168, "learning_rate": 2.3942173451849988e-06, "loss": 0.4418, "step": 16482 }, { "epoch": 0.6843003855754312, "grad_norm": 2.638385772705078, "learning_rate": 2.3936435741960874e-06, "loss": 0.6304, "step": 16483 }, { "epoch": 0.6843419010996424, "grad_norm": 2.2080583572387695, "learning_rate": 2.3930698503308537e-06, "loss": 0.5691, "step": 16484 }, { "epoch": 0.6843834166238538, "grad_norm": 1.8559807538986206, "learning_rate": 2.3924961735996684e-06, "loss": 0.5004, "step": 16485 }, { "epoch": 0.6844249321480651, "grad_norm": 2.6718034744262695, "learning_rate": 2.3919225440129037e-06, "loss": 0.505, "step": 16486 }, { "epoch": 0.6844664476722765, "grad_norm": 2.0153799057006836, "learning_rate": 2.3913489615809286e-06, "loss": 0.4895, "step": 16487 }, { "epoch": 0.6845079631964878, "grad_norm": 1.8887815475463867, "learning_rate": 2.3907754263141193e-06, "loss": 0.4704, "step": 16488 }, { "epoch": 0.6845494787206992, "grad_norm": 2.1301302909851074, "learning_rate": 2.3902019382228413e-06, "loss": 0.4975, "step": 16489 }, { "epoch": 0.6845909942449104, "grad_norm": 2.6593148708343506, "learning_rate": 2.3896284973174653e-06, "loss": 0.4742, "step": 16490 }, { "epoch": 0.6846325097691218, "grad_norm": 2.3806705474853516, "learning_rate": 2.3890551036083564e-06, "loss": 0.4938, "step": 16491 }, { "epoch": 0.6846740252933331, "grad_norm": 2.393947124481201, "learning_rate": 2.3884817571058853e-06, "loss": 0.7088, "step": 16492 }, { "epoch": 0.6847155408175445, "grad_norm": 2.5221197605133057, "learning_rate": 2.387908457820417e-06, "loss": 0.6228, "step": 16493 }, { "epoch": 0.6847570563417558, "grad_norm": 2.3774051666259766, "learning_rate": 2.3873352057623166e-06, "loss": 0.6566, "step": 16494 }, { "epoch": 0.6847985718659672, "grad_norm": 2.345806360244751, "learning_rate": 2.386762000941949e-06, "loss": 0.5604, "step": 16495 }, { "epoch": 0.6848400873901784, "grad_norm": 2.225231885910034, "learning_rate": 2.3861888433696757e-06, "loss": 0.408, "step": 16496 }, { "epoch": 0.6848816029143898, "grad_norm": 2.0613248348236084, "learning_rate": 2.3856157330558625e-06, "loss": 0.4571, "step": 16497 }, { "epoch": 0.6849231184386011, "grad_norm": 2.4426441192626953, "learning_rate": 2.3850426700108713e-06, "loss": 0.583, "step": 16498 }, { "epoch": 0.6849646339628125, "grad_norm": 2.6257245540618896, "learning_rate": 2.3844696542450623e-06, "loss": 0.4472, "step": 16499 }, { "epoch": 0.6850061494870238, "grad_norm": 2.2331326007843018, "learning_rate": 2.383896685768794e-06, "loss": 0.5544, "step": 16500 }, { "epoch": 0.6850476650112352, "grad_norm": 2.0705087184906006, "learning_rate": 2.383323764592429e-06, "loss": 0.5563, "step": 16501 }, { "epoch": 0.6850891805354464, "grad_norm": 3.0079538822174072, "learning_rate": 2.3827508907263246e-06, "loss": 0.4816, "step": 16502 }, { "epoch": 0.6851306960596578, "grad_norm": 2.5227391719818115, "learning_rate": 2.382178064180839e-06, "loss": 0.5567, "step": 16503 }, { "epoch": 0.6851722115838691, "grad_norm": 2.511894941329956, "learning_rate": 2.3816052849663264e-06, "loss": 0.5923, "step": 16504 }, { "epoch": 0.6852137271080805, "grad_norm": 2.5008249282836914, "learning_rate": 2.3810325530931473e-06, "loss": 0.3662, "step": 16505 }, { "epoch": 0.6852552426322918, "grad_norm": 2.0629165172576904, "learning_rate": 2.3804598685716537e-06, "loss": 0.5376, "step": 16506 }, { "epoch": 0.6852967581565031, "grad_norm": 2.751075506210327, "learning_rate": 2.379887231412201e-06, "loss": 0.4262, "step": 16507 }, { "epoch": 0.6853382736807144, "grad_norm": 2.214285135269165, "learning_rate": 2.3793146416251424e-06, "loss": 0.3632, "step": 16508 }, { "epoch": 0.6853797892049258, "grad_norm": 2.477534294128418, "learning_rate": 2.378742099220829e-06, "loss": 0.5074, "step": 16509 }, { "epoch": 0.6854213047291372, "grad_norm": 2.051100969314575, "learning_rate": 2.3781696042096146e-06, "loss": 0.4996, "step": 16510 }, { "epoch": 0.6854628202533485, "grad_norm": 1.9340991973876953, "learning_rate": 2.377597156601851e-06, "loss": 0.3549, "step": 16511 }, { "epoch": 0.6855043357775599, "grad_norm": 2.3388702869415283, "learning_rate": 2.3770247564078856e-06, "loss": 0.4985, "step": 16512 }, { "epoch": 0.6855458513017711, "grad_norm": 2.307896614074707, "learning_rate": 2.3764524036380672e-06, "loss": 0.5093, "step": 16513 }, { "epoch": 0.6855873668259825, "grad_norm": 2.548109769821167, "learning_rate": 2.375880098302747e-06, "loss": 0.5866, "step": 16514 }, { "epoch": 0.6856288823501938, "grad_norm": 2.4638254642486572, "learning_rate": 2.3753078404122713e-06, "loss": 0.6748, "step": 16515 }, { "epoch": 0.6856703978744052, "grad_norm": 2.249817132949829, "learning_rate": 2.3747356299769863e-06, "loss": 0.6038, "step": 16516 }, { "epoch": 0.6857119133986165, "grad_norm": 2.4530251026153564, "learning_rate": 2.374163467007236e-06, "loss": 0.626, "step": 16517 }, { "epoch": 0.6857534289228279, "grad_norm": 2.391792058944702, "learning_rate": 2.373591351513369e-06, "loss": 0.6054, "step": 16518 }, { "epoch": 0.6857949444470391, "grad_norm": 2.388512134552002, "learning_rate": 2.373019283505727e-06, "loss": 0.429, "step": 16519 }, { "epoch": 0.6858364599712505, "grad_norm": 2.6222236156463623, "learning_rate": 2.372447262994654e-06, "loss": 0.6051, "step": 16520 }, { "epoch": 0.6858779754954618, "grad_norm": 2.4016687870025635, "learning_rate": 2.3718752899904917e-06, "loss": 0.5391, "step": 16521 }, { "epoch": 0.6859194910196732, "grad_norm": 2.63369083404541, "learning_rate": 2.3713033645035793e-06, "loss": 0.6055, "step": 16522 }, { "epoch": 0.6859610065438845, "grad_norm": 2.1261038780212402, "learning_rate": 2.370731486544262e-06, "loss": 0.4817, "step": 16523 }, { "epoch": 0.6860025220680959, "grad_norm": 2.282762289047241, "learning_rate": 2.3701596561228773e-06, "loss": 0.6146, "step": 16524 }, { "epoch": 0.6860440375923071, "grad_norm": 2.2043755054473877, "learning_rate": 2.369587873249764e-06, "loss": 0.4518, "step": 16525 }, { "epoch": 0.6860855531165185, "grad_norm": 2.428936719894409, "learning_rate": 2.3690161379352583e-06, "loss": 0.5876, "step": 16526 }, { "epoch": 0.6861270686407298, "grad_norm": 2.529132127761841, "learning_rate": 2.3684444501897012e-06, "loss": 0.5444, "step": 16527 }, { "epoch": 0.6861685841649412, "grad_norm": 2.2681422233581543, "learning_rate": 2.3678728100234265e-06, "loss": 0.5399, "step": 16528 }, { "epoch": 0.6862100996891525, "grad_norm": 2.4322144985198975, "learning_rate": 2.3673012174467703e-06, "loss": 0.5214, "step": 16529 }, { "epoch": 0.6862516152133639, "grad_norm": 2.3086910247802734, "learning_rate": 2.366729672470065e-06, "loss": 0.6599, "step": 16530 }, { "epoch": 0.6862931307375751, "grad_norm": 1.960091233253479, "learning_rate": 2.3661581751036484e-06, "loss": 0.6419, "step": 16531 }, { "epoch": 0.6863346462617865, "grad_norm": 2.5586488246917725, "learning_rate": 2.3655867253578505e-06, "loss": 0.5916, "step": 16532 }, { "epoch": 0.6863761617859978, "grad_norm": 2.9468228816986084, "learning_rate": 2.3650153232430044e-06, "loss": 0.6339, "step": 16533 }, { "epoch": 0.6864176773102092, "grad_norm": 2.687014579772949, "learning_rate": 2.3644439687694405e-06, "loss": 0.4118, "step": 16534 }, { "epoch": 0.6864591928344205, "grad_norm": 2.14506459236145, "learning_rate": 2.363872661947488e-06, "loss": 0.5078, "step": 16535 }, { "epoch": 0.6865007083586319, "grad_norm": 2.962571620941162, "learning_rate": 2.3633014027874786e-06, "loss": 0.5332, "step": 16536 }, { "epoch": 0.6865422238828431, "grad_norm": 2.2657785415649414, "learning_rate": 2.36273019129974e-06, "loss": 0.4504, "step": 16537 }, { "epoch": 0.6865837394070545, "grad_norm": 2.0948381423950195, "learning_rate": 2.3621590274946e-06, "loss": 0.5786, "step": 16538 }, { "epoch": 0.6866252549312658, "grad_norm": 2.313567876815796, "learning_rate": 2.361587911382383e-06, "loss": 0.4117, "step": 16539 }, { "epoch": 0.6866667704554772, "grad_norm": 1.8265068531036377, "learning_rate": 2.3610168429734194e-06, "loss": 0.3884, "step": 16540 }, { "epoch": 0.6867082859796886, "grad_norm": 2.3595752716064453, "learning_rate": 2.3604458222780312e-06, "loss": 0.3666, "step": 16541 }, { "epoch": 0.6867498015038999, "grad_norm": 2.0499861240386963, "learning_rate": 2.359874849306544e-06, "loss": 0.529, "step": 16542 }, { "epoch": 0.6867913170281112, "grad_norm": 2.3135106563568115, "learning_rate": 2.3593039240692782e-06, "loss": 0.6064, "step": 16543 }, { "epoch": 0.6868328325523225, "grad_norm": 2.5324387550354004, "learning_rate": 2.3587330465765607e-06, "loss": 0.4534, "step": 16544 }, { "epoch": 0.6868743480765339, "grad_norm": 2.6451873779296875, "learning_rate": 2.3581622168387107e-06, "loss": 0.3628, "step": 16545 }, { "epoch": 0.6869158636007452, "grad_norm": 2.7224230766296387, "learning_rate": 2.3575914348660494e-06, "loss": 0.5493, "step": 16546 }, { "epoch": 0.6869573791249566, "grad_norm": 2.1359550952911377, "learning_rate": 2.3570207006688966e-06, "loss": 0.4906, "step": 16547 }, { "epoch": 0.6869988946491679, "grad_norm": 2.3334403038024902, "learning_rate": 2.356450014257569e-06, "loss": 0.26, "step": 16548 }, { "epoch": 0.6870404101733792, "grad_norm": 3.0181825160980225, "learning_rate": 2.35587937564239e-06, "loss": 0.3225, "step": 16549 }, { "epoch": 0.6870819256975905, "grad_norm": 2.7728383541107178, "learning_rate": 2.355308784833673e-06, "loss": 0.6006, "step": 16550 }, { "epoch": 0.6871234412218019, "grad_norm": 2.5805912017822266, "learning_rate": 2.3547382418417362e-06, "loss": 0.5447, "step": 16551 }, { "epoch": 0.6871649567460132, "grad_norm": 2.5112791061401367, "learning_rate": 2.354167746676892e-06, "loss": 0.5291, "step": 16552 }, { "epoch": 0.6872064722702246, "grad_norm": 2.209765911102295, "learning_rate": 2.35359729934946e-06, "loss": 0.5192, "step": 16553 }, { "epoch": 0.6872479877944359, "grad_norm": 2.480285882949829, "learning_rate": 2.353026899869751e-06, "loss": 0.621, "step": 16554 }, { "epoch": 0.6872895033186472, "grad_norm": 2.188835859298706, "learning_rate": 2.352456548248079e-06, "loss": 0.4928, "step": 16555 }, { "epoch": 0.6873310188428585, "grad_norm": 2.4873430728912354, "learning_rate": 2.3518862444947536e-06, "loss": 0.4509, "step": 16556 }, { "epoch": 0.6873725343670699, "grad_norm": 2.1619973182678223, "learning_rate": 2.35131598862009e-06, "loss": 0.4405, "step": 16557 }, { "epoch": 0.6874140498912812, "grad_norm": 1.8862426280975342, "learning_rate": 2.350745780634397e-06, "loss": 0.377, "step": 16558 }, { "epoch": 0.6874555654154926, "grad_norm": 2.5479495525360107, "learning_rate": 2.3501756205479838e-06, "loss": 0.5698, "step": 16559 }, { "epoch": 0.6874970809397039, "grad_norm": 4.01724100112915, "learning_rate": 2.3496055083711592e-06, "loss": 0.6783, "step": 16560 }, { "epoch": 0.6875385964639152, "grad_norm": 2.643842935562134, "learning_rate": 2.349035444114229e-06, "loss": 0.4935, "step": 16561 }, { "epoch": 0.6875801119881265, "grad_norm": 2.4330222606658936, "learning_rate": 2.348465427787504e-06, "loss": 0.5063, "step": 16562 }, { "epoch": 0.6876216275123379, "grad_norm": 2.174978733062744, "learning_rate": 2.3478954594012884e-06, "loss": 0.6371, "step": 16563 }, { "epoch": 0.6876631430365492, "grad_norm": 2.676058530807495, "learning_rate": 2.3473255389658865e-06, "loss": 0.55, "step": 16564 }, { "epoch": 0.6877046585607606, "grad_norm": 2.7132458686828613, "learning_rate": 2.3467556664916023e-06, "loss": 0.5071, "step": 16565 }, { "epoch": 0.6877461740849719, "grad_norm": 1.8820544481277466, "learning_rate": 2.3461858419887423e-06, "loss": 0.4289, "step": 16566 }, { "epoch": 0.6877876896091832, "grad_norm": 2.7981643676757812, "learning_rate": 2.3456160654676073e-06, "loss": 0.5979, "step": 16567 }, { "epoch": 0.6878292051333945, "grad_norm": 2.8251793384552, "learning_rate": 2.3450463369384985e-06, "loss": 0.6065, "step": 16568 }, { "epoch": 0.6878707206576059, "grad_norm": 2.786670684814453, "learning_rate": 2.3444766564117154e-06, "loss": 0.5478, "step": 16569 }, { "epoch": 0.6879122361818173, "grad_norm": 2.397225856781006, "learning_rate": 2.343907023897562e-06, "loss": 0.3848, "step": 16570 }, { "epoch": 0.6879537517060286, "grad_norm": 2.3031880855560303, "learning_rate": 2.3433374394063345e-06, "loss": 0.3808, "step": 16571 }, { "epoch": 0.68799526723024, "grad_norm": 2.612124443054199, "learning_rate": 2.3427679029483327e-06, "loss": 0.5681, "step": 16572 }, { "epoch": 0.6880367827544512, "grad_norm": 2.48317551612854, "learning_rate": 2.342198414533853e-06, "loss": 0.6426, "step": 16573 }, { "epoch": 0.6880782982786626, "grad_norm": 2.510065793991089, "learning_rate": 2.341628974173189e-06, "loss": 0.4641, "step": 16574 }, { "epoch": 0.6881198138028739, "grad_norm": 2.3089964389801025, "learning_rate": 2.341059581876644e-06, "loss": 0.4333, "step": 16575 }, { "epoch": 0.6881613293270853, "grad_norm": 2.670689105987549, "learning_rate": 2.340490237654505e-06, "loss": 0.4592, "step": 16576 }, { "epoch": 0.6882028448512966, "grad_norm": 2.717082977294922, "learning_rate": 2.33992094151707e-06, "loss": 0.4807, "step": 16577 }, { "epoch": 0.688244360375508, "grad_norm": 2.821748733520508, "learning_rate": 2.339351693474629e-06, "loss": 0.6416, "step": 16578 }, { "epoch": 0.6882858758997192, "grad_norm": 2.350712776184082, "learning_rate": 2.3387824935374787e-06, "loss": 0.4447, "step": 16579 }, { "epoch": 0.6883273914239306, "grad_norm": 2.4050545692443848, "learning_rate": 2.338213341715908e-06, "loss": 0.704, "step": 16580 }, { "epoch": 0.6883689069481419, "grad_norm": 2.26725172996521, "learning_rate": 2.337644238020207e-06, "loss": 0.5198, "step": 16581 }, { "epoch": 0.6884104224723533, "grad_norm": 2.34741473197937, "learning_rate": 2.3370751824606657e-06, "loss": 0.5658, "step": 16582 }, { "epoch": 0.6884519379965646, "grad_norm": 2.839188814163208, "learning_rate": 2.336506175047571e-06, "loss": 0.4523, "step": 16583 }, { "epoch": 0.688493453520776, "grad_norm": 3.1450588703155518, "learning_rate": 2.3359372157912134e-06, "loss": 0.6382, "step": 16584 }, { "epoch": 0.6885349690449872, "grad_norm": 2.287271738052368, "learning_rate": 2.33536830470188e-06, "loss": 0.5563, "step": 16585 }, { "epoch": 0.6885764845691986, "grad_norm": 2.350379705429077, "learning_rate": 2.334799441789855e-06, "loss": 0.5857, "step": 16586 }, { "epoch": 0.6886180000934099, "grad_norm": 2.199247360229492, "learning_rate": 2.3342306270654226e-06, "loss": 0.445, "step": 16587 }, { "epoch": 0.6886595156176213, "grad_norm": 2.70051646232605, "learning_rate": 2.333661860538872e-06, "loss": 0.3866, "step": 16588 }, { "epoch": 0.6887010311418326, "grad_norm": 2.5229852199554443, "learning_rate": 2.3330931422204806e-06, "loss": 0.4916, "step": 16589 }, { "epoch": 0.688742546666044, "grad_norm": 2.495748281478882, "learning_rate": 2.3325244721205353e-06, "loss": 0.5158, "step": 16590 }, { "epoch": 0.6887840621902552, "grad_norm": 2.1517457962036133, "learning_rate": 2.3319558502493144e-06, "loss": 0.4128, "step": 16591 }, { "epoch": 0.6888255777144666, "grad_norm": 2.146367073059082, "learning_rate": 2.3313872766171027e-06, "loss": 0.5032, "step": 16592 }, { "epoch": 0.6888670932386779, "grad_norm": 2.3856253623962402, "learning_rate": 2.3308187512341785e-06, "loss": 0.4649, "step": 16593 }, { "epoch": 0.6889086087628893, "grad_norm": 2.3726296424865723, "learning_rate": 2.33025027411082e-06, "loss": 0.5925, "step": 16594 }, { "epoch": 0.6889501242871006, "grad_norm": 3.6632087230682373, "learning_rate": 2.3296818452573066e-06, "loss": 0.4735, "step": 16595 }, { "epoch": 0.688991639811312, "grad_norm": 2.2269129753112793, "learning_rate": 2.329113464683913e-06, "loss": 0.4941, "step": 16596 }, { "epoch": 0.6890331553355232, "grad_norm": 2.381929874420166, "learning_rate": 2.3285451324009202e-06, "loss": 0.5724, "step": 16597 }, { "epoch": 0.6890746708597346, "grad_norm": 2.539898157119751, "learning_rate": 2.3279768484186006e-06, "loss": 0.4798, "step": 16598 }, { "epoch": 0.6891161863839459, "grad_norm": 2.5567173957824707, "learning_rate": 2.32740861274723e-06, "loss": 0.5917, "step": 16599 }, { "epoch": 0.6891577019081573, "grad_norm": 2.7324440479278564, "learning_rate": 2.3268404253970805e-06, "loss": 0.4828, "step": 16600 }, { "epoch": 0.6891992174323687, "grad_norm": 2.559644937515259, "learning_rate": 2.32627228637843e-06, "loss": 0.5994, "step": 16601 }, { "epoch": 0.68924073295658, "grad_norm": 2.6630666255950928, "learning_rate": 2.3257041957015437e-06, "loss": 0.5545, "step": 16602 }, { "epoch": 0.6892822484807913, "grad_norm": 1.9955501556396484, "learning_rate": 2.325136153376698e-06, "loss": 0.502, "step": 16603 }, { "epoch": 0.6893237640050026, "grad_norm": 2.0818324089050293, "learning_rate": 2.3245681594141597e-06, "loss": 0.4055, "step": 16604 }, { "epoch": 0.689365279529214, "grad_norm": 2.1892459392547607, "learning_rate": 2.3240002138242024e-06, "loss": 0.4346, "step": 16605 }, { "epoch": 0.6894067950534253, "grad_norm": 2.9047470092773438, "learning_rate": 2.323432316617092e-06, "loss": 0.5222, "step": 16606 }, { "epoch": 0.6894483105776367, "grad_norm": 2.4736461639404297, "learning_rate": 2.322864467803097e-06, "loss": 0.5217, "step": 16607 }, { "epoch": 0.689489826101848, "grad_norm": 2.412229061126709, "learning_rate": 2.3222966673924846e-06, "loss": 0.4886, "step": 16608 }, { "epoch": 0.6895313416260593, "grad_norm": 2.8043484687805176, "learning_rate": 2.321728915395518e-06, "loss": 0.5897, "step": 16609 }, { "epoch": 0.6895728571502706, "grad_norm": 2.6515252590179443, "learning_rate": 2.3211612118224664e-06, "loss": 0.5565, "step": 16610 }, { "epoch": 0.689614372674482, "grad_norm": 2.047314405441284, "learning_rate": 2.3205935566835918e-06, "loss": 0.4486, "step": 16611 }, { "epoch": 0.6896558881986933, "grad_norm": 2.321908712387085, "learning_rate": 2.3200259499891585e-06, "loss": 0.4805, "step": 16612 }, { "epoch": 0.6896974037229047, "grad_norm": 2.5165579319000244, "learning_rate": 2.3194583917494267e-06, "loss": 0.4301, "step": 16613 }, { "epoch": 0.689738919247116, "grad_norm": 2.4929614067077637, "learning_rate": 2.3188908819746637e-06, "loss": 0.4881, "step": 16614 }, { "epoch": 0.6897804347713273, "grad_norm": 2.1868197917938232, "learning_rate": 2.3183234206751223e-06, "loss": 0.411, "step": 16615 }, { "epoch": 0.6898219502955386, "grad_norm": 2.414227247238159, "learning_rate": 2.3177560078610677e-06, "loss": 0.5554, "step": 16616 }, { "epoch": 0.68986346581975, "grad_norm": 2.501572608947754, "learning_rate": 2.3171886435427567e-06, "loss": 0.5977, "step": 16617 }, { "epoch": 0.6899049813439613, "grad_norm": 2.518646717071533, "learning_rate": 2.3166213277304494e-06, "loss": 0.4403, "step": 16618 }, { "epoch": 0.6899464968681727, "grad_norm": 2.158895254135132, "learning_rate": 2.3160540604344025e-06, "loss": 0.5242, "step": 16619 }, { "epoch": 0.6899880123923839, "grad_norm": 1.9686362743377686, "learning_rate": 2.3154868416648714e-06, "loss": 0.5119, "step": 16620 }, { "epoch": 0.6900295279165953, "grad_norm": 2.481229543685913, "learning_rate": 2.314919671432112e-06, "loss": 0.5772, "step": 16621 }, { "epoch": 0.6900710434408066, "grad_norm": 3.1983203887939453, "learning_rate": 2.314352549746377e-06, "loss": 0.5236, "step": 16622 }, { "epoch": 0.690112558965018, "grad_norm": 2.465632438659668, "learning_rate": 2.3137854766179237e-06, "loss": 0.4837, "step": 16623 }, { "epoch": 0.6901540744892293, "grad_norm": 2.9840192794799805, "learning_rate": 2.3132184520570034e-06, "loss": 0.5846, "step": 16624 }, { "epoch": 0.6901955900134407, "grad_norm": 1.9728447198867798, "learning_rate": 2.312651476073868e-06, "loss": 0.4469, "step": 16625 }, { "epoch": 0.6902371055376519, "grad_norm": 2.6424925327301025, "learning_rate": 2.312084548678767e-06, "loss": 0.635, "step": 16626 }, { "epoch": 0.6902786210618633, "grad_norm": 2.5760374069213867, "learning_rate": 2.311517669881955e-06, "loss": 0.4772, "step": 16627 }, { "epoch": 0.6903201365860746, "grad_norm": 2.527186632156372, "learning_rate": 2.310950839693675e-06, "loss": 0.587, "step": 16628 }, { "epoch": 0.690361652110286, "grad_norm": 2.371027708053589, "learning_rate": 2.310384058124181e-06, "loss": 0.5383, "step": 16629 }, { "epoch": 0.6904031676344973, "grad_norm": 2.2675600051879883, "learning_rate": 2.3098173251837163e-06, "loss": 0.4976, "step": 16630 }, { "epoch": 0.6904446831587087, "grad_norm": 2.38533353805542, "learning_rate": 2.3092506408825323e-06, "loss": 0.5972, "step": 16631 }, { "epoch": 0.69048619868292, "grad_norm": 2.8529441356658936, "learning_rate": 2.308684005230871e-06, "loss": 0.6118, "step": 16632 }, { "epoch": 0.6905277142071313, "grad_norm": 1.8761790990829468, "learning_rate": 2.30811741823898e-06, "loss": 0.4149, "step": 16633 }, { "epoch": 0.6905692297313427, "grad_norm": 2.353883743286133, "learning_rate": 2.3075508799171007e-06, "loss": 0.5466, "step": 16634 }, { "epoch": 0.690610745255554, "grad_norm": 2.525601387023926, "learning_rate": 2.3069843902754767e-06, "loss": 0.4972, "step": 16635 }, { "epoch": 0.6906522607797654, "grad_norm": 2.509493589401245, "learning_rate": 2.306417949324353e-06, "loss": 0.5155, "step": 16636 }, { "epoch": 0.6906937763039767, "grad_norm": 2.4826889038085938, "learning_rate": 2.305851557073968e-06, "loss": 0.638, "step": 16637 }, { "epoch": 0.690735291828188, "grad_norm": 2.328864574432373, "learning_rate": 2.3052852135345644e-06, "loss": 0.4703, "step": 16638 }, { "epoch": 0.6907768073523993, "grad_norm": 2.0073912143707275, "learning_rate": 2.304718918716379e-06, "loss": 0.4164, "step": 16639 }, { "epoch": 0.6908183228766107, "grad_norm": 2.1813557147979736, "learning_rate": 2.3041526726296552e-06, "loss": 0.4485, "step": 16640 }, { "epoch": 0.690859838400822, "grad_norm": 2.4759185314178467, "learning_rate": 2.3035864752846255e-06, "loss": 0.5398, "step": 16641 }, { "epoch": 0.6909013539250334, "grad_norm": 3.007728099822998, "learning_rate": 2.303020326691531e-06, "loss": 0.4451, "step": 16642 }, { "epoch": 0.6909428694492447, "grad_norm": 2.6890833377838135, "learning_rate": 2.302454226860604e-06, "loss": 0.4416, "step": 16643 }, { "epoch": 0.690984384973456, "grad_norm": 2.4695122241973877, "learning_rate": 2.3018881758020846e-06, "loss": 0.6329, "step": 16644 }, { "epoch": 0.6910259004976673, "grad_norm": 2.269885540008545, "learning_rate": 2.301322173526204e-06, "loss": 0.4831, "step": 16645 }, { "epoch": 0.6910674160218787, "grad_norm": 2.1627798080444336, "learning_rate": 2.300756220043197e-06, "loss": 0.4507, "step": 16646 }, { "epoch": 0.69110893154609, "grad_norm": 2.394850254058838, "learning_rate": 2.3001903153632955e-06, "loss": 0.4773, "step": 16647 }, { "epoch": 0.6911504470703014, "grad_norm": 2.040398597717285, "learning_rate": 2.2996244594967288e-06, "loss": 0.4631, "step": 16648 }, { "epoch": 0.6911919625945127, "grad_norm": 2.4311602115631104, "learning_rate": 2.2990586524537323e-06, "loss": 0.5388, "step": 16649 }, { "epoch": 0.691233478118724, "grad_norm": 2.504432439804077, "learning_rate": 2.298492894244534e-06, "loss": 0.4458, "step": 16650 }, { "epoch": 0.6912749936429353, "grad_norm": 2.410487651824951, "learning_rate": 2.2979271848793633e-06, "loss": 0.4724, "step": 16651 }, { "epoch": 0.6913165091671467, "grad_norm": 2.696371555328369, "learning_rate": 2.2973615243684454e-06, "loss": 0.6326, "step": 16652 }, { "epoch": 0.691358024691358, "grad_norm": 2.4327826499938965, "learning_rate": 2.296795912722014e-06, "loss": 0.5162, "step": 16653 }, { "epoch": 0.6913995402155694, "grad_norm": 2.4340007305145264, "learning_rate": 2.296230349950288e-06, "loss": 0.5041, "step": 16654 }, { "epoch": 0.6914410557397807, "grad_norm": 2.201631784439087, "learning_rate": 2.295664836063498e-06, "loss": 0.5241, "step": 16655 }, { "epoch": 0.691482571263992, "grad_norm": 2.7256970405578613, "learning_rate": 2.2950993710718663e-06, "loss": 0.5612, "step": 16656 }, { "epoch": 0.6915240867882033, "grad_norm": 1.974707841873169, "learning_rate": 2.294533954985619e-06, "loss": 0.554, "step": 16657 }, { "epoch": 0.6915656023124147, "grad_norm": 2.412320613861084, "learning_rate": 2.2939685878149775e-06, "loss": 0.5028, "step": 16658 }, { "epoch": 0.691607117836626, "grad_norm": 1.8897501230239868, "learning_rate": 2.2934032695701635e-06, "loss": 0.4793, "step": 16659 }, { "epoch": 0.6916486333608374, "grad_norm": 2.371710777282715, "learning_rate": 2.292838000261399e-06, "loss": 0.4323, "step": 16660 }, { "epoch": 0.6916901488850487, "grad_norm": 1.9793648719787598, "learning_rate": 2.292272779898902e-06, "loss": 0.4766, "step": 16661 }, { "epoch": 0.69173166440926, "grad_norm": 2.4648003578186035, "learning_rate": 2.2917076084928953e-06, "loss": 0.5433, "step": 16662 }, { "epoch": 0.6917731799334714, "grad_norm": 2.6979856491088867, "learning_rate": 2.2911424860535957e-06, "loss": 0.5347, "step": 16663 }, { "epoch": 0.6918146954576827, "grad_norm": 2.5312516689300537, "learning_rate": 2.2905774125912205e-06, "loss": 0.5329, "step": 16664 }, { "epoch": 0.6918562109818941, "grad_norm": 2.520624876022339, "learning_rate": 2.290012388115985e-06, "loss": 0.6336, "step": 16665 }, { "epoch": 0.6918977265061054, "grad_norm": 2.3299291133880615, "learning_rate": 2.2894474126381107e-06, "loss": 0.3861, "step": 16666 }, { "epoch": 0.6919392420303168, "grad_norm": 2.2686009407043457, "learning_rate": 2.288882486167804e-06, "loss": 0.5582, "step": 16667 }, { "epoch": 0.691980757554528, "grad_norm": 2.5278403759002686, "learning_rate": 2.288317608715286e-06, "loss": 0.4234, "step": 16668 }, { "epoch": 0.6920222730787394, "grad_norm": 2.666701078414917, "learning_rate": 2.2877527802907646e-06, "loss": 0.6279, "step": 16669 }, { "epoch": 0.6920637886029507, "grad_norm": 2.41096830368042, "learning_rate": 2.2871880009044567e-06, "loss": 0.5158, "step": 16670 }, { "epoch": 0.6921053041271621, "grad_norm": 2.8275063037872314, "learning_rate": 2.286623270566572e-06, "loss": 0.5236, "step": 16671 }, { "epoch": 0.6921468196513734, "grad_norm": 2.180366039276123, "learning_rate": 2.2860585892873204e-06, "loss": 0.5672, "step": 16672 }, { "epoch": 0.6921883351755848, "grad_norm": 2.2245638370513916, "learning_rate": 2.285493957076912e-06, "loss": 0.4999, "step": 16673 }, { "epoch": 0.692229850699796, "grad_norm": 1.749174952507019, "learning_rate": 2.284929373945553e-06, "loss": 0.4635, "step": 16674 }, { "epoch": 0.6922713662240074, "grad_norm": 2.2517282962799072, "learning_rate": 2.2843648399034573e-06, "loss": 0.5676, "step": 16675 }, { "epoch": 0.6923128817482187, "grad_norm": 2.0047316551208496, "learning_rate": 2.2838003549608245e-06, "loss": 0.4304, "step": 16676 }, { "epoch": 0.6923543972724301, "grad_norm": 2.4071671962738037, "learning_rate": 2.283235919127866e-06, "loss": 0.5618, "step": 16677 }, { "epoch": 0.6923959127966414, "grad_norm": 2.12497615814209, "learning_rate": 2.282671532414783e-06, "loss": 0.5035, "step": 16678 }, { "epoch": 0.6924374283208528, "grad_norm": 2.3500986099243164, "learning_rate": 2.2821071948317857e-06, "loss": 0.5489, "step": 16679 }, { "epoch": 0.692478943845064, "grad_norm": 2.2812960147857666, "learning_rate": 2.2815429063890703e-06, "loss": 0.5745, "step": 16680 }, { "epoch": 0.6925204593692754, "grad_norm": 2.415888786315918, "learning_rate": 2.2809786670968447e-06, "loss": 0.4795, "step": 16681 }, { "epoch": 0.6925619748934867, "grad_norm": 2.64129376411438, "learning_rate": 2.2804144769653065e-06, "loss": 0.3936, "step": 16682 }, { "epoch": 0.6926034904176981, "grad_norm": 2.253837823867798, "learning_rate": 2.2798503360046598e-06, "loss": 0.5367, "step": 16683 }, { "epoch": 0.6926450059419094, "grad_norm": 2.4339916706085205, "learning_rate": 2.2792862442251037e-06, "loss": 0.8018, "step": 16684 }, { "epoch": 0.6926865214661208, "grad_norm": 2.563401460647583, "learning_rate": 2.278722201636836e-06, "loss": 0.468, "step": 16685 }, { "epoch": 0.692728036990332, "grad_norm": 2.440051794052124, "learning_rate": 2.2781582082500552e-06, "loss": 0.537, "step": 16686 }, { "epoch": 0.6927695525145434, "grad_norm": 2.47823166847229, "learning_rate": 2.2775942640749573e-06, "loss": 0.4863, "step": 16687 }, { "epoch": 0.6928110680387547, "grad_norm": 2.2549386024475098, "learning_rate": 2.2770303691217427e-06, "loss": 0.4393, "step": 16688 }, { "epoch": 0.6928525835629661, "grad_norm": 2.2890732288360596, "learning_rate": 2.2764665234006008e-06, "loss": 0.5754, "step": 16689 }, { "epoch": 0.6928940990871774, "grad_norm": 2.0440969467163086, "learning_rate": 2.2759027269217303e-06, "loss": 0.3948, "step": 16690 }, { "epoch": 0.6929356146113888, "grad_norm": 2.115020990371704, "learning_rate": 2.275338979695323e-06, "loss": 0.3717, "step": 16691 }, { "epoch": 0.6929771301356, "grad_norm": 2.092996597290039, "learning_rate": 2.274775281731575e-06, "loss": 0.5174, "step": 16692 }, { "epoch": 0.6930186456598114, "grad_norm": 2.414170026779175, "learning_rate": 2.2742116330406723e-06, "loss": 0.5109, "step": 16693 }, { "epoch": 0.6930601611840228, "grad_norm": 2.2049055099487305, "learning_rate": 2.2736480336328096e-06, "loss": 0.6836, "step": 16694 }, { "epoch": 0.6931016767082341, "grad_norm": 2.3684346675872803, "learning_rate": 2.273084483518176e-06, "loss": 0.6, "step": 16695 }, { "epoch": 0.6931431922324455, "grad_norm": 2.0461440086364746, "learning_rate": 2.2725209827069616e-06, "loss": 0.5994, "step": 16696 }, { "epoch": 0.6931847077566567, "grad_norm": 2.0188848972320557, "learning_rate": 2.2719575312093546e-06, "loss": 0.4337, "step": 16697 }, { "epoch": 0.6932262232808681, "grad_norm": 1.8311564922332764, "learning_rate": 2.2713941290355417e-06, "loss": 0.4706, "step": 16698 }, { "epoch": 0.6932677388050794, "grad_norm": 2.413072109222412, "learning_rate": 2.2708307761957093e-06, "loss": 0.5607, "step": 16699 }, { "epoch": 0.6933092543292908, "grad_norm": 8.157424926757812, "learning_rate": 2.2702674727000412e-06, "loss": 0.3984, "step": 16700 }, { "epoch": 0.6933507698535021, "grad_norm": 2.3306167125701904, "learning_rate": 2.2697042185587274e-06, "loss": 0.5781, "step": 16701 }, { "epoch": 0.6933922853777135, "grad_norm": 2.954263210296631, "learning_rate": 2.2691410137819453e-06, "loss": 0.5413, "step": 16702 }, { "epoch": 0.6934338009019247, "grad_norm": 2.4968676567077637, "learning_rate": 2.2685778583798817e-06, "loss": 0.4811, "step": 16703 }, { "epoch": 0.6934753164261361, "grad_norm": 2.5468108654022217, "learning_rate": 2.2680147523627167e-06, "loss": 0.5783, "step": 16704 }, { "epoch": 0.6935168319503474, "grad_norm": 2.646878957748413, "learning_rate": 2.2674516957406355e-06, "loss": 0.6162, "step": 16705 }, { "epoch": 0.6935583474745588, "grad_norm": 2.4383087158203125, "learning_rate": 2.2668886885238123e-06, "loss": 0.5015, "step": 16706 }, { "epoch": 0.6935998629987701, "grad_norm": 2.337836980819702, "learning_rate": 2.2663257307224308e-06, "loss": 0.5041, "step": 16707 }, { "epoch": 0.6936413785229815, "grad_norm": 2.667793035507202, "learning_rate": 2.265762822346666e-06, "loss": 0.615, "step": 16708 }, { "epoch": 0.6936828940471927, "grad_norm": 2.4478046894073486, "learning_rate": 2.2651999634066994e-06, "loss": 0.5185, "step": 16709 }, { "epoch": 0.6937244095714041, "grad_norm": 2.4850213527679443, "learning_rate": 2.264637153912706e-06, "loss": 0.4995, "step": 16710 }, { "epoch": 0.6937659250956154, "grad_norm": 2.3887476921081543, "learning_rate": 2.264074393874861e-06, "loss": 0.6518, "step": 16711 }, { "epoch": 0.6938074406198268, "grad_norm": 3.049133062362671, "learning_rate": 2.263511683303339e-06, "loss": 0.8261, "step": 16712 }, { "epoch": 0.6938489561440381, "grad_norm": 2.1770997047424316, "learning_rate": 2.262949022208314e-06, "loss": 0.4722, "step": 16713 }, { "epoch": 0.6938904716682495, "grad_norm": 2.115926742553711, "learning_rate": 2.262386410599962e-06, "loss": 0.5103, "step": 16714 }, { "epoch": 0.6939319871924607, "grad_norm": 2.2755653858184814, "learning_rate": 2.26182384848845e-06, "loss": 0.5587, "step": 16715 }, { "epoch": 0.6939735027166721, "grad_norm": 2.0553205013275146, "learning_rate": 2.261261335883953e-06, "loss": 0.4474, "step": 16716 }, { "epoch": 0.6940150182408834, "grad_norm": 2.497220754623413, "learning_rate": 2.2606988727966395e-06, "loss": 0.5759, "step": 16717 }, { "epoch": 0.6940565337650948, "grad_norm": 2.934729814529419, "learning_rate": 2.260136459236683e-06, "loss": 0.6189, "step": 16718 }, { "epoch": 0.6940980492893061, "grad_norm": 2.4373159408569336, "learning_rate": 2.2595740952142452e-06, "loss": 0.4951, "step": 16719 }, { "epoch": 0.6941395648135175, "grad_norm": 2.114502191543579, "learning_rate": 2.2590117807395003e-06, "loss": 0.4366, "step": 16720 }, { "epoch": 0.6941810803377287, "grad_norm": 2.205439567565918, "learning_rate": 2.25844951582261e-06, "loss": 0.5479, "step": 16721 }, { "epoch": 0.6942225958619401, "grad_norm": 2.604128122329712, "learning_rate": 2.257887300473747e-06, "loss": 0.3328, "step": 16722 }, { "epoch": 0.6942641113861514, "grad_norm": 2.2354485988616943, "learning_rate": 2.2573251347030673e-06, "loss": 0.5948, "step": 16723 }, { "epoch": 0.6943056269103628, "grad_norm": 2.4231560230255127, "learning_rate": 2.256763018520742e-06, "loss": 0.5984, "step": 16724 }, { "epoch": 0.6943471424345742, "grad_norm": 2.3956503868103027, "learning_rate": 2.2562009519369314e-06, "loss": 0.5353, "step": 16725 }, { "epoch": 0.6943886579587855, "grad_norm": 2.240628957748413, "learning_rate": 2.2556389349617975e-06, "loss": 0.5471, "step": 16726 }, { "epoch": 0.6944301734829968, "grad_norm": 2.5179810523986816, "learning_rate": 2.2550769676055064e-06, "loss": 0.6513, "step": 16727 }, { "epoch": 0.6944716890072081, "grad_norm": 2.215123176574707, "learning_rate": 2.25451504987821e-06, "loss": 0.5729, "step": 16728 }, { "epoch": 0.6945132045314195, "grad_norm": 2.3377153873443604, "learning_rate": 2.253953181790076e-06, "loss": 0.5177, "step": 16729 }, { "epoch": 0.6945547200556308, "grad_norm": 2.54790997505188, "learning_rate": 2.253391363351258e-06, "loss": 0.6335, "step": 16730 }, { "epoch": 0.6945962355798422, "grad_norm": 2.1085917949676514, "learning_rate": 2.2528295945719186e-06, "loss": 0.5393, "step": 16731 }, { "epoch": 0.6946377511040535, "grad_norm": 1.9630153179168701, "learning_rate": 2.252267875462209e-06, "loss": 0.4149, "step": 16732 }, { "epoch": 0.6946792666282648, "grad_norm": 2.726886749267578, "learning_rate": 2.2517062060322896e-06, "loss": 0.5004, "step": 16733 }, { "epoch": 0.6947207821524761, "grad_norm": 1.9999804496765137, "learning_rate": 2.2511445862923124e-06, "loss": 0.5341, "step": 16734 }, { "epoch": 0.6947622976766875, "grad_norm": 2.415019989013672, "learning_rate": 2.2505830162524377e-06, "loss": 0.5513, "step": 16735 }, { "epoch": 0.6948038132008988, "grad_norm": 2.6506903171539307, "learning_rate": 2.25002149592281e-06, "loss": 0.6283, "step": 16736 }, { "epoch": 0.6948453287251102, "grad_norm": 2.1171581745147705, "learning_rate": 2.249460025313589e-06, "loss": 0.4375, "step": 16737 }, { "epoch": 0.6948868442493215, "grad_norm": 1.9521734714508057, "learning_rate": 2.2488986044349224e-06, "loss": 0.5183, "step": 16738 }, { "epoch": 0.6949283597735328, "grad_norm": 2.4897620677948, "learning_rate": 2.2483372332969613e-06, "loss": 0.6686, "step": 16739 }, { "epoch": 0.6949698752977441, "grad_norm": 2.5545570850372314, "learning_rate": 2.247775911909859e-06, "loss": 0.5135, "step": 16740 }, { "epoch": 0.6950113908219555, "grad_norm": 2.7548091411590576, "learning_rate": 2.247214640283758e-06, "loss": 0.5424, "step": 16741 }, { "epoch": 0.6950529063461668, "grad_norm": 2.9347007274627686, "learning_rate": 2.246653418428812e-06, "loss": 0.5161, "step": 16742 }, { "epoch": 0.6950944218703782, "grad_norm": 2.660062313079834, "learning_rate": 2.246092246355163e-06, "loss": 0.6068, "step": 16743 }, { "epoch": 0.6951359373945895, "grad_norm": 2.4807496070861816, "learning_rate": 2.245531124072965e-06, "loss": 0.5492, "step": 16744 }, { "epoch": 0.6951774529188008, "grad_norm": 2.089817762374878, "learning_rate": 2.2449700515923534e-06, "loss": 0.5157, "step": 16745 }, { "epoch": 0.6952189684430121, "grad_norm": 1.9389595985412598, "learning_rate": 2.244409028923479e-06, "loss": 0.4758, "step": 16746 }, { "epoch": 0.6952604839672235, "grad_norm": 2.0634846687316895, "learning_rate": 2.243848056076483e-06, "loss": 0.5743, "step": 16747 }, { "epoch": 0.6953019994914348, "grad_norm": 2.809998035430908, "learning_rate": 2.2432871330615115e-06, "loss": 0.4747, "step": 16748 }, { "epoch": 0.6953435150156462, "grad_norm": 2.6650893688201904, "learning_rate": 2.2427262598886996e-06, "loss": 0.4974, "step": 16749 }, { "epoch": 0.6953850305398575, "grad_norm": 1.944053292274475, "learning_rate": 2.2421654365681934e-06, "loss": 0.4281, "step": 16750 }, { "epoch": 0.6954265460640688, "grad_norm": 2.327007293701172, "learning_rate": 2.241604663110131e-06, "loss": 0.5998, "step": 16751 }, { "epoch": 0.6954680615882801, "grad_norm": 2.8858771324157715, "learning_rate": 2.2410439395246497e-06, "loss": 0.4134, "step": 16752 }, { "epoch": 0.6955095771124915, "grad_norm": 2.041290283203125, "learning_rate": 2.2404832658218927e-06, "loss": 0.482, "step": 16753 }, { "epoch": 0.6955510926367028, "grad_norm": 2.393423318862915, "learning_rate": 2.2399226420119903e-06, "loss": 0.4748, "step": 16754 }, { "epoch": 0.6955926081609142, "grad_norm": 2.139187812805176, "learning_rate": 2.239362068105083e-06, "loss": 0.6254, "step": 16755 }, { "epoch": 0.6956341236851256, "grad_norm": 2.3908865451812744, "learning_rate": 2.238801544111304e-06, "loss": 0.4914, "step": 16756 }, { "epoch": 0.6956756392093368, "grad_norm": 2.344632625579834, "learning_rate": 2.2382410700407932e-06, "loss": 0.4005, "step": 16757 }, { "epoch": 0.6957171547335482, "grad_norm": 2.2498278617858887, "learning_rate": 2.2376806459036755e-06, "loss": 0.622, "step": 16758 }, { "epoch": 0.6957586702577595, "grad_norm": 1.8154902458190918, "learning_rate": 2.23712027171009e-06, "loss": 0.5562, "step": 16759 }, { "epoch": 0.6958001857819709, "grad_norm": 2.156057357788086, "learning_rate": 2.236559947470164e-06, "loss": 0.4295, "step": 16760 }, { "epoch": 0.6958417013061822, "grad_norm": 2.921635389328003, "learning_rate": 2.2359996731940348e-06, "loss": 0.5339, "step": 16761 }, { "epoch": 0.6958832168303936, "grad_norm": 2.1722311973571777, "learning_rate": 2.2354394488918242e-06, "loss": 0.6383, "step": 16762 }, { "epoch": 0.6959247323546048, "grad_norm": 3.055511951446533, "learning_rate": 2.2348792745736674e-06, "loss": 0.596, "step": 16763 }, { "epoch": 0.6959662478788162, "grad_norm": 3.5478203296661377, "learning_rate": 2.2343191502496902e-06, "loss": 0.5719, "step": 16764 }, { "epoch": 0.6960077634030275, "grad_norm": 2.27827787399292, "learning_rate": 2.2337590759300176e-06, "loss": 0.661, "step": 16765 }, { "epoch": 0.6960492789272389, "grad_norm": 2.3461403846740723, "learning_rate": 2.2331990516247826e-06, "loss": 0.585, "step": 16766 }, { "epoch": 0.6960907944514502, "grad_norm": 2.380509376525879, "learning_rate": 2.232639077344102e-06, "loss": 0.5009, "step": 16767 }, { "epoch": 0.6961323099756616, "grad_norm": 2.355962038040161, "learning_rate": 2.2320791530981066e-06, "loss": 0.4884, "step": 16768 }, { "epoch": 0.6961738254998728, "grad_norm": 2.7843871116638184, "learning_rate": 2.2315192788969164e-06, "loss": 0.5357, "step": 16769 }, { "epoch": 0.6962153410240842, "grad_norm": 2.323833703994751, "learning_rate": 2.2309594547506592e-06, "loss": 0.5657, "step": 16770 }, { "epoch": 0.6962568565482955, "grad_norm": 2.813438892364502, "learning_rate": 2.230399680669449e-06, "loss": 0.5133, "step": 16771 }, { "epoch": 0.6962983720725069, "grad_norm": 2.114971160888672, "learning_rate": 2.2298399566634126e-06, "loss": 0.4456, "step": 16772 }, { "epoch": 0.6963398875967182, "grad_norm": 2.9300217628479004, "learning_rate": 2.229280282742667e-06, "loss": 0.5223, "step": 16773 }, { "epoch": 0.6963814031209296, "grad_norm": 1.9883793592453003, "learning_rate": 2.228720658917336e-06, "loss": 0.5903, "step": 16774 }, { "epoch": 0.6964229186451408, "grad_norm": 1.9325401782989502, "learning_rate": 2.2281610851975305e-06, "loss": 0.5052, "step": 16775 }, { "epoch": 0.6964644341693522, "grad_norm": 2.219257354736328, "learning_rate": 2.2276015615933734e-06, "loss": 0.5942, "step": 16776 }, { "epoch": 0.6965059496935635, "grad_norm": 2.182682752609253, "learning_rate": 2.227042088114979e-06, "loss": 0.4046, "step": 16777 }, { "epoch": 0.6965474652177749, "grad_norm": 2.3943891525268555, "learning_rate": 2.2264826647724614e-06, "loss": 0.5323, "step": 16778 }, { "epoch": 0.6965889807419862, "grad_norm": 2.2210028171539307, "learning_rate": 2.22592329157594e-06, "loss": 0.421, "step": 16779 }, { "epoch": 0.6966304962661976, "grad_norm": 1.9824820756912231, "learning_rate": 2.225363968535522e-06, "loss": 0.4198, "step": 16780 }, { "epoch": 0.6966720117904088, "grad_norm": 2.3999671936035156, "learning_rate": 2.224804695661324e-06, "loss": 0.4824, "step": 16781 }, { "epoch": 0.6967135273146202, "grad_norm": 2.320110559463501, "learning_rate": 2.2242454729634555e-06, "loss": 0.5645, "step": 16782 }, { "epoch": 0.6967550428388315, "grad_norm": 2.053032875061035, "learning_rate": 2.2236863004520325e-06, "loss": 0.4967, "step": 16783 }, { "epoch": 0.6967965583630429, "grad_norm": 2.125417470932007, "learning_rate": 2.2231271781371584e-06, "loss": 0.5073, "step": 16784 }, { "epoch": 0.6968380738872542, "grad_norm": 2.26155686378479, "learning_rate": 2.2225681060289468e-06, "loss": 0.6008, "step": 16785 }, { "epoch": 0.6968795894114655, "grad_norm": 2.595309257507324, "learning_rate": 2.2220090841375027e-06, "loss": 0.4468, "step": 16786 }, { "epoch": 0.6969211049356769, "grad_norm": 1.94894278049469, "learning_rate": 2.221450112472938e-06, "loss": 0.4065, "step": 16787 }, { "epoch": 0.6969626204598882, "grad_norm": 2.282597541809082, "learning_rate": 2.220891191045353e-06, "loss": 0.4598, "step": 16788 }, { "epoch": 0.6970041359840996, "grad_norm": 2.348545551300049, "learning_rate": 2.220332319864858e-06, "loss": 0.4795, "step": 16789 }, { "epoch": 0.6970456515083109, "grad_norm": 2.157766103744507, "learning_rate": 2.219773498941556e-06, "loss": 0.3999, "step": 16790 }, { "epoch": 0.6970871670325223, "grad_norm": 2.1614556312561035, "learning_rate": 2.2192147282855487e-06, "loss": 0.5605, "step": 16791 }, { "epoch": 0.6971286825567335, "grad_norm": 2.0263142585754395, "learning_rate": 2.2186560079069442e-06, "loss": 0.4628, "step": 16792 }, { "epoch": 0.6971701980809449, "grad_norm": 2.1621358394622803, "learning_rate": 2.218097337815837e-06, "loss": 0.6053, "step": 16793 }, { "epoch": 0.6972117136051562, "grad_norm": 2.2299375534057617, "learning_rate": 2.2175387180223333e-06, "loss": 0.4524, "step": 16794 }, { "epoch": 0.6972532291293676, "grad_norm": 1.9458200931549072, "learning_rate": 2.2169801485365307e-06, "loss": 0.4264, "step": 16795 }, { "epoch": 0.6972947446535789, "grad_norm": 2.4946718215942383, "learning_rate": 2.216421629368532e-06, "loss": 0.5538, "step": 16796 }, { "epoch": 0.6973362601777903, "grad_norm": 2.4451053142547607, "learning_rate": 2.215863160528429e-06, "loss": 0.6003, "step": 16797 }, { "epoch": 0.6973777757020015, "grad_norm": 2.9688720703125, "learning_rate": 2.2153047420263248e-06, "loss": 0.5571, "step": 16798 }, { "epoch": 0.6974192912262129, "grad_norm": 2.2349789142608643, "learning_rate": 2.2147463738723107e-06, "loss": 0.4488, "step": 16799 }, { "epoch": 0.6974608067504242, "grad_norm": 2.6498327255249023, "learning_rate": 2.2141880560764894e-06, "loss": 0.4014, "step": 16800 }, { "epoch": 0.6975023222746356, "grad_norm": 2.499800443649292, "learning_rate": 2.213629788648947e-06, "loss": 0.588, "step": 16801 }, { "epoch": 0.6975438377988469, "grad_norm": 2.318054437637329, "learning_rate": 2.2130715715997835e-06, "loss": 0.4419, "step": 16802 }, { "epoch": 0.6975853533230583, "grad_norm": 2.4758071899414062, "learning_rate": 2.212513404939089e-06, "loss": 0.494, "step": 16803 }, { "epoch": 0.6976268688472695, "grad_norm": 2.1025052070617676, "learning_rate": 2.2119552886769536e-06, "loss": 0.5859, "step": 16804 }, { "epoch": 0.6976683843714809, "grad_norm": 2.378979206085205, "learning_rate": 2.2113972228234736e-06, "loss": 0.507, "step": 16805 }, { "epoch": 0.6977098998956922, "grad_norm": 2.4526233673095703, "learning_rate": 2.2108392073887323e-06, "loss": 0.5188, "step": 16806 }, { "epoch": 0.6977514154199036, "grad_norm": 2.010565757751465, "learning_rate": 2.210281242382824e-06, "loss": 0.473, "step": 16807 }, { "epoch": 0.6977929309441149, "grad_norm": 2.710843324661255, "learning_rate": 2.209723327815833e-06, "loss": 0.4537, "step": 16808 }, { "epoch": 0.6978344464683263, "grad_norm": 2.157845973968506, "learning_rate": 2.209165463697852e-06, "loss": 0.561, "step": 16809 }, { "epoch": 0.6978759619925375, "grad_norm": 2.4759068489074707, "learning_rate": 2.2086076500389605e-06, "loss": 0.5765, "step": 16810 }, { "epoch": 0.6979174775167489, "grad_norm": 2.6700551509857178, "learning_rate": 2.2080498868492486e-06, "loss": 0.534, "step": 16811 }, { "epoch": 0.6979589930409602, "grad_norm": 1.9265602827072144, "learning_rate": 2.2074921741387976e-06, "loss": 0.3229, "step": 16812 }, { "epoch": 0.6980005085651716, "grad_norm": 2.4565634727478027, "learning_rate": 2.206934511917697e-06, "loss": 0.5419, "step": 16813 }, { "epoch": 0.6980420240893829, "grad_norm": 3.059007406234741, "learning_rate": 2.206376900196022e-06, "loss": 0.6275, "step": 16814 }, { "epoch": 0.6980835396135943, "grad_norm": 2.1588218212127686, "learning_rate": 2.205819338983859e-06, "loss": 0.4734, "step": 16815 }, { "epoch": 0.6981250551378055, "grad_norm": 2.272852897644043, "learning_rate": 2.205261828291288e-06, "loss": 0.6656, "step": 16816 }, { "epoch": 0.6981665706620169, "grad_norm": 2.581207752227783, "learning_rate": 2.204704368128387e-06, "loss": 0.5573, "step": 16817 }, { "epoch": 0.6982080861862283, "grad_norm": 2.2997353076934814, "learning_rate": 2.2041469585052404e-06, "loss": 0.4597, "step": 16818 }, { "epoch": 0.6982496017104396, "grad_norm": 2.5458085536956787, "learning_rate": 2.203589599431919e-06, "loss": 0.494, "step": 16819 }, { "epoch": 0.698291117234651, "grad_norm": 2.1904473304748535, "learning_rate": 2.2030322909185055e-06, "loss": 0.4383, "step": 16820 }, { "epoch": 0.6983326327588623, "grad_norm": 2.5834624767303467, "learning_rate": 2.2024750329750723e-06, "loss": 0.7011, "step": 16821 }, { "epoch": 0.6983741482830736, "grad_norm": 2.1901016235351562, "learning_rate": 2.2019178256117003e-06, "loss": 0.5506, "step": 16822 }, { "epoch": 0.6984156638072849, "grad_norm": 2.4716098308563232, "learning_rate": 2.2013606688384574e-06, "loss": 0.5959, "step": 16823 }, { "epoch": 0.6984571793314963, "grad_norm": 3.1479787826538086, "learning_rate": 2.2008035626654216e-06, "loss": 0.6696, "step": 16824 }, { "epoch": 0.6984986948557076, "grad_norm": 2.2700302600860596, "learning_rate": 2.200246507102663e-06, "loss": 0.4961, "step": 16825 }, { "epoch": 0.698540210379919, "grad_norm": 2.3163018226623535, "learning_rate": 2.1996895021602575e-06, "loss": 0.4696, "step": 16826 }, { "epoch": 0.6985817259041303, "grad_norm": 2.0354466438293457, "learning_rate": 2.1991325478482695e-06, "loss": 0.422, "step": 16827 }, { "epoch": 0.6986232414283416, "grad_norm": 2.4713287353515625, "learning_rate": 2.1985756441767743e-06, "loss": 0.4331, "step": 16828 }, { "epoch": 0.6986647569525529, "grad_norm": 2.661155939102173, "learning_rate": 2.198018791155839e-06, "loss": 0.59, "step": 16829 }, { "epoch": 0.6987062724767643, "grad_norm": 2.2769687175750732, "learning_rate": 2.1974619887955294e-06, "loss": 0.4966, "step": 16830 }, { "epoch": 0.6987477880009756, "grad_norm": 2.2762937545776367, "learning_rate": 2.1969052371059184e-06, "loss": 0.4684, "step": 16831 }, { "epoch": 0.698789303525187, "grad_norm": 1.9847418069839478, "learning_rate": 2.1963485360970652e-06, "loss": 0.3899, "step": 16832 }, { "epoch": 0.6988308190493983, "grad_norm": 3.0158936977386475, "learning_rate": 2.195791885779041e-06, "loss": 0.6461, "step": 16833 }, { "epoch": 0.6988723345736096, "grad_norm": 2.9852192401885986, "learning_rate": 2.1952352861619053e-06, "loss": 0.6247, "step": 16834 }, { "epoch": 0.6989138500978209, "grad_norm": 2.570223093032837, "learning_rate": 2.1946787372557275e-06, "loss": 0.455, "step": 16835 }, { "epoch": 0.6989553656220323, "grad_norm": 2.4660863876342773, "learning_rate": 2.1941222390705635e-06, "loss": 0.4703, "step": 16836 }, { "epoch": 0.6989968811462436, "grad_norm": 2.3799102306365967, "learning_rate": 2.19356579161648e-06, "loss": 0.6413, "step": 16837 }, { "epoch": 0.699038396670455, "grad_norm": 2.9582903385162354, "learning_rate": 2.1930093949035334e-06, "loss": 0.5839, "step": 16838 }, { "epoch": 0.6990799121946663, "grad_norm": 2.037689208984375, "learning_rate": 2.1924530489417906e-06, "loss": 0.5039, "step": 16839 }, { "epoch": 0.6991214277188776, "grad_norm": 2.431628704071045, "learning_rate": 2.1918967537413017e-06, "loss": 0.4996, "step": 16840 }, { "epoch": 0.6991629432430889, "grad_norm": 2.3536715507507324, "learning_rate": 2.1913405093121308e-06, "loss": 0.3865, "step": 16841 }, { "epoch": 0.6992044587673003, "grad_norm": 2.3493731021881104, "learning_rate": 2.1907843156643333e-06, "loss": 0.4822, "step": 16842 }, { "epoch": 0.6992459742915116, "grad_norm": 2.344503164291382, "learning_rate": 2.1902281728079626e-06, "loss": 0.6199, "step": 16843 }, { "epoch": 0.699287489815723, "grad_norm": 2.3085756301879883, "learning_rate": 2.189672080753081e-06, "loss": 0.4371, "step": 16844 }, { "epoch": 0.6993290053399343, "grad_norm": 2.2574682235717773, "learning_rate": 2.189116039509734e-06, "loss": 0.5338, "step": 16845 }, { "epoch": 0.6993705208641456, "grad_norm": 2.502004861831665, "learning_rate": 2.1885600490879804e-06, "loss": 0.3931, "step": 16846 }, { "epoch": 0.6994120363883569, "grad_norm": 2.473140239715576, "learning_rate": 2.1880041094978703e-06, "loss": 0.5335, "step": 16847 }, { "epoch": 0.6994535519125683, "grad_norm": 2.680180788040161, "learning_rate": 2.1874482207494603e-06, "loss": 0.5063, "step": 16848 }, { "epoch": 0.6994950674367797, "grad_norm": 2.0467536449432373, "learning_rate": 2.186892382852793e-06, "loss": 0.4391, "step": 16849 }, { "epoch": 0.699536582960991, "grad_norm": 2.080456256866455, "learning_rate": 2.1863365958179242e-06, "loss": 0.4281, "step": 16850 }, { "epoch": 0.6995780984852024, "grad_norm": 2.196509838104248, "learning_rate": 2.1857808596548992e-06, "loss": 0.4826, "step": 16851 }, { "epoch": 0.6996196140094136, "grad_norm": 2.717900037765503, "learning_rate": 2.1852251743737706e-06, "loss": 0.5729, "step": 16852 }, { "epoch": 0.699661129533625, "grad_norm": 2.2562103271484375, "learning_rate": 2.1846695399845786e-06, "loss": 0.5493, "step": 16853 }, { "epoch": 0.6997026450578363, "grad_norm": 2.2685608863830566, "learning_rate": 2.1841139564973746e-06, "loss": 0.523, "step": 16854 }, { "epoch": 0.6997441605820477, "grad_norm": 2.175570011138916, "learning_rate": 2.1835584239222022e-06, "loss": 0.5495, "step": 16855 }, { "epoch": 0.699785676106259, "grad_norm": 2.3341641426086426, "learning_rate": 2.1830029422691036e-06, "loss": 0.3748, "step": 16856 }, { "epoch": 0.6998271916304704, "grad_norm": 2.0997183322906494, "learning_rate": 2.182447511548127e-06, "loss": 0.5075, "step": 16857 }, { "epoch": 0.6998687071546816, "grad_norm": 2.3403947353363037, "learning_rate": 2.1818921317693077e-06, "loss": 0.4096, "step": 16858 }, { "epoch": 0.699910222678893, "grad_norm": 2.6651034355163574, "learning_rate": 2.181336802942693e-06, "loss": 0.7528, "step": 16859 }, { "epoch": 0.6999517382031043, "grad_norm": 2.8076820373535156, "learning_rate": 2.1807815250783194e-06, "loss": 0.592, "step": 16860 }, { "epoch": 0.6999932537273157, "grad_norm": 2.639068365097046, "learning_rate": 2.1802262981862323e-06, "loss": 0.5029, "step": 16861 }, { "epoch": 0.700034769251527, "grad_norm": 2.009565830230713, "learning_rate": 2.179671122276463e-06, "loss": 0.4769, "step": 16862 }, { "epoch": 0.7000762847757384, "grad_norm": 2.0495638847351074, "learning_rate": 2.179115997359054e-06, "loss": 0.6011, "step": 16863 }, { "epoch": 0.7001178002999496, "grad_norm": 3.1325736045837402, "learning_rate": 2.1785609234440384e-06, "loss": 0.5847, "step": 16864 }, { "epoch": 0.700159315824161, "grad_norm": 2.4255869388580322, "learning_rate": 2.1780059005414594e-06, "loss": 0.5164, "step": 16865 }, { "epoch": 0.7002008313483723, "grad_norm": 2.1101560592651367, "learning_rate": 2.177450928661343e-06, "loss": 0.4885, "step": 16866 }, { "epoch": 0.7002423468725837, "grad_norm": 2.618067979812622, "learning_rate": 2.176896007813729e-06, "loss": 0.547, "step": 16867 }, { "epoch": 0.700283862396795, "grad_norm": 2.5857458114624023, "learning_rate": 2.1763411380086492e-06, "loss": 0.5298, "step": 16868 }, { "epoch": 0.7003253779210064, "grad_norm": 2.6392576694488525, "learning_rate": 2.1757863192561356e-06, "loss": 0.5265, "step": 16869 }, { "epoch": 0.7003668934452176, "grad_norm": 2.706254005432129, "learning_rate": 2.1752315515662192e-06, "loss": 0.6788, "step": 16870 }, { "epoch": 0.700408408969429, "grad_norm": 2.4734623432159424, "learning_rate": 2.174676834948928e-06, "loss": 0.5939, "step": 16871 }, { "epoch": 0.7004499244936403, "grad_norm": 2.274017572402954, "learning_rate": 2.1741221694142973e-06, "loss": 0.4401, "step": 16872 }, { "epoch": 0.7004914400178517, "grad_norm": 2.3307900428771973, "learning_rate": 2.1735675549723494e-06, "loss": 0.5039, "step": 16873 }, { "epoch": 0.700532955542063, "grad_norm": 2.384290933609009, "learning_rate": 2.173012991633119e-06, "loss": 0.5241, "step": 16874 }, { "epoch": 0.7005744710662744, "grad_norm": 2.109938144683838, "learning_rate": 2.172458479406624e-06, "loss": 0.4636, "step": 16875 }, { "epoch": 0.7006159865904856, "grad_norm": 2.211094617843628, "learning_rate": 2.171904018302897e-06, "loss": 0.5671, "step": 16876 }, { "epoch": 0.700657502114697, "grad_norm": 2.707288980484009, "learning_rate": 2.1713496083319592e-06, "loss": 0.3937, "step": 16877 }, { "epoch": 0.7006990176389083, "grad_norm": 2.0606372356414795, "learning_rate": 2.1707952495038386e-06, "loss": 0.3751, "step": 16878 }, { "epoch": 0.7007405331631197, "grad_norm": 1.9566853046417236, "learning_rate": 2.170240941828552e-06, "loss": 0.5616, "step": 16879 }, { "epoch": 0.7007820486873311, "grad_norm": 2.3346009254455566, "learning_rate": 2.1696866853161267e-06, "loss": 0.4696, "step": 16880 }, { "epoch": 0.7008235642115423, "grad_norm": 2.9629385471343994, "learning_rate": 2.169132479976582e-06, "loss": 0.553, "step": 16881 }, { "epoch": 0.7008650797357537, "grad_norm": 2.4388043880462646, "learning_rate": 2.168578325819938e-06, "loss": 0.5267, "step": 16882 }, { "epoch": 0.700906595259965, "grad_norm": 2.1384389400482178, "learning_rate": 2.1680242228562137e-06, "loss": 0.5451, "step": 16883 }, { "epoch": 0.7009481107841764, "grad_norm": 2.193812131881714, "learning_rate": 2.167470171095426e-06, "loss": 0.3888, "step": 16884 }, { "epoch": 0.7009896263083877, "grad_norm": 2.1517865657806396, "learning_rate": 2.166916170547596e-06, "loss": 0.3649, "step": 16885 }, { "epoch": 0.7010311418325991, "grad_norm": 2.8200836181640625, "learning_rate": 2.1663622212227364e-06, "loss": 0.5592, "step": 16886 }, { "epoch": 0.7010726573568103, "grad_norm": 3.026902198791504, "learning_rate": 2.165808323130868e-06, "loss": 0.3797, "step": 16887 }, { "epoch": 0.7011141728810217, "grad_norm": 2.326603889465332, "learning_rate": 2.1652544762819983e-06, "loss": 0.4336, "step": 16888 }, { "epoch": 0.701155688405233, "grad_norm": 1.9750646352767944, "learning_rate": 2.1647006806861472e-06, "loss": 0.5248, "step": 16889 }, { "epoch": 0.7011972039294444, "grad_norm": 3.1135263442993164, "learning_rate": 2.1641469363533226e-06, "loss": 0.4977, "step": 16890 }, { "epoch": 0.7012387194536557, "grad_norm": 2.477663278579712, "learning_rate": 2.1635932432935423e-06, "loss": 0.6341, "step": 16891 }, { "epoch": 0.7012802349778671, "grad_norm": 1.9190601110458374, "learning_rate": 2.1630396015168102e-06, "loss": 0.4314, "step": 16892 }, { "epoch": 0.7013217505020783, "grad_norm": 2.678778648376465, "learning_rate": 2.162486011033142e-06, "loss": 0.6117, "step": 16893 }, { "epoch": 0.7013632660262897, "grad_norm": 2.943490505218506, "learning_rate": 2.1619324718525443e-06, "loss": 0.5399, "step": 16894 }, { "epoch": 0.701404781550501, "grad_norm": 2.4243721961975098, "learning_rate": 2.161378983985025e-06, "loss": 0.4214, "step": 16895 }, { "epoch": 0.7014462970747124, "grad_norm": 2.6498711109161377, "learning_rate": 2.1608255474405932e-06, "loss": 0.6317, "step": 16896 }, { "epoch": 0.7014878125989237, "grad_norm": 2.553037643432617, "learning_rate": 2.160272162229251e-06, "loss": 0.5742, "step": 16897 }, { "epoch": 0.7015293281231351, "grad_norm": 2.375667095184326, "learning_rate": 2.1597188283610088e-06, "loss": 0.4747, "step": 16898 }, { "epoch": 0.7015708436473463, "grad_norm": 2.383125066757202, "learning_rate": 2.1591655458458664e-06, "loss": 0.3066, "step": 16899 }, { "epoch": 0.7016123591715577, "grad_norm": 2.957127809524536, "learning_rate": 2.1586123146938343e-06, "loss": 0.6278, "step": 16900 }, { "epoch": 0.701653874695769, "grad_norm": 2.368076801300049, "learning_rate": 2.158059134914906e-06, "loss": 0.4638, "step": 16901 }, { "epoch": 0.7016953902199804, "grad_norm": 2.4537904262542725, "learning_rate": 2.1575060065190902e-06, "loss": 0.5839, "step": 16902 }, { "epoch": 0.7017369057441917, "grad_norm": 2.2355904579162598, "learning_rate": 2.156952929516385e-06, "loss": 0.5577, "step": 16903 }, { "epoch": 0.7017784212684031, "grad_norm": 2.799795150756836, "learning_rate": 2.15639990391679e-06, "loss": 0.5394, "step": 16904 }, { "epoch": 0.7018199367926143, "grad_norm": 2.490243434906006, "learning_rate": 2.1558469297303025e-06, "loss": 0.5425, "step": 16905 }, { "epoch": 0.7018614523168257, "grad_norm": 2.254746437072754, "learning_rate": 2.155294006966924e-06, "loss": 0.5797, "step": 16906 }, { "epoch": 0.701902967841037, "grad_norm": 2.3077569007873535, "learning_rate": 2.1547411356366504e-06, "loss": 0.4341, "step": 16907 }, { "epoch": 0.7019444833652484, "grad_norm": 2.359269380569458, "learning_rate": 2.154188315749477e-06, "loss": 0.4682, "step": 16908 }, { "epoch": 0.7019859988894597, "grad_norm": 1.93252694606781, "learning_rate": 2.1536355473153985e-06, "loss": 0.4765, "step": 16909 }, { "epoch": 0.7020275144136711, "grad_norm": 2.52059268951416, "learning_rate": 2.1530828303444086e-06, "loss": 0.4164, "step": 16910 }, { "epoch": 0.7020690299378824, "grad_norm": 2.34822940826416, "learning_rate": 2.152530164846503e-06, "loss": 0.4026, "step": 16911 }, { "epoch": 0.7021105454620937, "grad_norm": 2.258958578109741, "learning_rate": 2.1519775508316707e-06, "loss": 0.5448, "step": 16912 }, { "epoch": 0.7021520609863051, "grad_norm": 2.4160752296447754, "learning_rate": 2.1514249883099097e-06, "loss": 0.5165, "step": 16913 }, { "epoch": 0.7021935765105164, "grad_norm": 1.9722765684127808, "learning_rate": 2.150872477291202e-06, "loss": 0.4154, "step": 16914 }, { "epoch": 0.7022350920347278, "grad_norm": 2.9404029846191406, "learning_rate": 2.1503200177855427e-06, "loss": 0.6851, "step": 16915 }, { "epoch": 0.7022766075589391, "grad_norm": 3.0112552642822266, "learning_rate": 2.149767609802919e-06, "loss": 0.484, "step": 16916 }, { "epoch": 0.7023181230831504, "grad_norm": 2.381601333618164, "learning_rate": 2.1492152533533185e-06, "loss": 0.5091, "step": 16917 }, { "epoch": 0.7023596386073617, "grad_norm": 2.3076179027557373, "learning_rate": 2.1486629484467257e-06, "loss": 0.4787, "step": 16918 }, { "epoch": 0.7024011541315731, "grad_norm": 2.279636859893799, "learning_rate": 2.1481106950931306e-06, "loss": 0.4267, "step": 16919 }, { "epoch": 0.7024426696557844, "grad_norm": 2.4033420085906982, "learning_rate": 2.1475584933025166e-06, "loss": 0.5969, "step": 16920 }, { "epoch": 0.7024841851799958, "grad_norm": 2.7695627212524414, "learning_rate": 2.147006343084867e-06, "loss": 0.5591, "step": 16921 }, { "epoch": 0.7025257007042071, "grad_norm": 2.1933631896972656, "learning_rate": 2.1464542444501647e-06, "loss": 0.4789, "step": 16922 }, { "epoch": 0.7025672162284184, "grad_norm": 2.5050711631774902, "learning_rate": 2.1459021974083905e-06, "loss": 0.4912, "step": 16923 }, { "epoch": 0.7026087317526297, "grad_norm": 2.208292007446289, "learning_rate": 2.1453502019695295e-06, "loss": 0.4718, "step": 16924 }, { "epoch": 0.7026502472768411, "grad_norm": 2.65339994430542, "learning_rate": 2.1447982581435573e-06, "loss": 0.5764, "step": 16925 }, { "epoch": 0.7026917628010524, "grad_norm": 2.5250275135040283, "learning_rate": 2.1442463659404587e-06, "loss": 0.5421, "step": 16926 }, { "epoch": 0.7027332783252638, "grad_norm": 2.0049211978912354, "learning_rate": 2.143694525370206e-06, "loss": 0.4469, "step": 16927 }, { "epoch": 0.702774793849475, "grad_norm": 2.151258945465088, "learning_rate": 2.1431427364427813e-06, "loss": 0.3884, "step": 16928 }, { "epoch": 0.7028163093736864, "grad_norm": 2.0445988178253174, "learning_rate": 2.1425909991681594e-06, "loss": 0.4481, "step": 16929 }, { "epoch": 0.7028578248978977, "grad_norm": 2.203845500946045, "learning_rate": 2.1420393135563158e-06, "loss": 0.4131, "step": 16930 }, { "epoch": 0.7028993404221091, "grad_norm": 2.5372836589813232, "learning_rate": 2.141487679617223e-06, "loss": 0.4373, "step": 16931 }, { "epoch": 0.7029408559463204, "grad_norm": 2.478708505630493, "learning_rate": 2.140936097360859e-06, "loss": 0.5486, "step": 16932 }, { "epoch": 0.7029823714705318, "grad_norm": 2.8478004932403564, "learning_rate": 2.1403845667971944e-06, "loss": 0.467, "step": 16933 }, { "epoch": 0.703023886994743, "grad_norm": 2.428873300552368, "learning_rate": 2.139833087936201e-06, "loss": 0.4043, "step": 16934 }, { "epoch": 0.7030654025189544, "grad_norm": 2.740797519683838, "learning_rate": 2.13928166078785e-06, "loss": 0.5221, "step": 16935 }, { "epoch": 0.7031069180431657, "grad_norm": 2.6697943210601807, "learning_rate": 2.1387302853621087e-06, "loss": 0.5772, "step": 16936 }, { "epoch": 0.7031484335673771, "grad_norm": 3.536334753036499, "learning_rate": 2.1381789616689514e-06, "loss": 0.7047, "step": 16937 }, { "epoch": 0.7031899490915884, "grad_norm": 2.730262041091919, "learning_rate": 2.1376276897183407e-06, "loss": 0.4918, "step": 16938 }, { "epoch": 0.7032314646157998, "grad_norm": 2.674255132675171, "learning_rate": 2.13707646952025e-06, "loss": 0.4176, "step": 16939 }, { "epoch": 0.703272980140011, "grad_norm": 2.3493130207061768, "learning_rate": 2.136525301084639e-06, "loss": 0.5173, "step": 16940 }, { "epoch": 0.7033144956642224, "grad_norm": 2.689662456512451, "learning_rate": 2.135974184421477e-06, "loss": 0.5516, "step": 16941 }, { "epoch": 0.7033560111884338, "grad_norm": 2.4654951095581055, "learning_rate": 2.1354231195407272e-06, "loss": 0.5062, "step": 16942 }, { "epoch": 0.7033975267126451, "grad_norm": 2.3262810707092285, "learning_rate": 2.1348721064523536e-06, "loss": 0.5217, "step": 16943 }, { "epoch": 0.7034390422368565, "grad_norm": 2.004856824874878, "learning_rate": 2.1343211451663158e-06, "loss": 0.5139, "step": 16944 }, { "epoch": 0.7034805577610678, "grad_norm": 2.462874412536621, "learning_rate": 2.13377023569258e-06, "loss": 0.6481, "step": 16945 }, { "epoch": 0.7035220732852792, "grad_norm": 2.1547482013702393, "learning_rate": 2.1332193780411043e-06, "loss": 0.5882, "step": 16946 }, { "epoch": 0.7035635888094904, "grad_norm": 2.1112477779388428, "learning_rate": 2.1326685722218484e-06, "loss": 0.4344, "step": 16947 }, { "epoch": 0.7036051043337018, "grad_norm": 2.6679391860961914, "learning_rate": 2.132117818244771e-06, "loss": 0.4662, "step": 16948 }, { "epoch": 0.7036466198579131, "grad_norm": 2.4363436698913574, "learning_rate": 2.1315671161198287e-06, "loss": 0.6647, "step": 16949 }, { "epoch": 0.7036881353821245, "grad_norm": 2.694499969482422, "learning_rate": 2.131016465856981e-06, "loss": 0.5732, "step": 16950 }, { "epoch": 0.7037296509063358, "grad_norm": 2.1904919147491455, "learning_rate": 2.130465867466181e-06, "loss": 0.4903, "step": 16951 }, { "epoch": 0.7037711664305472, "grad_norm": 2.539501190185547, "learning_rate": 2.1299153209573893e-06, "loss": 0.4862, "step": 16952 }, { "epoch": 0.7038126819547584, "grad_norm": 2.2307252883911133, "learning_rate": 2.1293648263405513e-06, "loss": 0.5474, "step": 16953 }, { "epoch": 0.7038541974789698, "grad_norm": 1.924674153327942, "learning_rate": 2.128814383625627e-06, "loss": 0.4874, "step": 16954 }, { "epoch": 0.7038957130031811, "grad_norm": 2.41475772857666, "learning_rate": 2.1282639928225662e-06, "loss": 0.39, "step": 16955 }, { "epoch": 0.7039372285273925, "grad_norm": 2.3966352939605713, "learning_rate": 2.1277136539413203e-06, "loss": 0.442, "step": 16956 }, { "epoch": 0.7039787440516038, "grad_norm": 2.3212850093841553, "learning_rate": 2.1271633669918366e-06, "loss": 0.5595, "step": 16957 }, { "epoch": 0.7040202595758152, "grad_norm": 2.647550582885742, "learning_rate": 2.1266131319840706e-06, "loss": 0.5171, "step": 16958 }, { "epoch": 0.7040617751000264, "grad_norm": 3.0930492877960205, "learning_rate": 2.1260629489279662e-06, "loss": 0.4769, "step": 16959 }, { "epoch": 0.7041032906242378, "grad_norm": 2.300952196121216, "learning_rate": 2.125512817833473e-06, "loss": 0.6043, "step": 16960 }, { "epoch": 0.7041448061484491, "grad_norm": 2.375647783279419, "learning_rate": 2.1249627387105354e-06, "loss": 0.4619, "step": 16961 }, { "epoch": 0.7041863216726605, "grad_norm": 2.0334370136260986, "learning_rate": 2.1244127115690994e-06, "loss": 0.3871, "step": 16962 }, { "epoch": 0.7042278371968718, "grad_norm": 2.2218761444091797, "learning_rate": 2.1238627364191115e-06, "loss": 0.4592, "step": 16963 }, { "epoch": 0.7042693527210832, "grad_norm": 2.4625730514526367, "learning_rate": 2.123312813270513e-06, "loss": 0.4925, "step": 16964 }, { "epoch": 0.7043108682452944, "grad_norm": 2.3333191871643066, "learning_rate": 2.1227629421332513e-06, "loss": 0.619, "step": 16965 }, { "epoch": 0.7043523837695058, "grad_norm": 2.4166433811187744, "learning_rate": 2.122213123017261e-06, "loss": 0.4744, "step": 16966 }, { "epoch": 0.7043938992937171, "grad_norm": 2.199599504470825, "learning_rate": 2.1216633559324894e-06, "loss": 0.4496, "step": 16967 }, { "epoch": 0.7044354148179285, "grad_norm": 2.454587459564209, "learning_rate": 2.1211136408888737e-06, "loss": 0.5066, "step": 16968 }, { "epoch": 0.7044769303421398, "grad_norm": 2.0745201110839844, "learning_rate": 2.1205639778963533e-06, "loss": 0.4172, "step": 16969 }, { "epoch": 0.7045184458663512, "grad_norm": 2.2875869274139404, "learning_rate": 2.1200143669648635e-06, "loss": 0.4819, "step": 16970 }, { "epoch": 0.7045599613905624, "grad_norm": 2.61916446685791, "learning_rate": 2.119464808104347e-06, "loss": 0.4984, "step": 16971 }, { "epoch": 0.7046014769147738, "grad_norm": 2.3774619102478027, "learning_rate": 2.1189153013247364e-06, "loss": 0.4396, "step": 16972 }, { "epoch": 0.7046429924389852, "grad_norm": 2.41743540763855, "learning_rate": 2.1183658466359675e-06, "loss": 0.5661, "step": 16973 }, { "epoch": 0.7046845079631965, "grad_norm": 2.3595149517059326, "learning_rate": 2.117816444047975e-06, "loss": 0.5811, "step": 16974 }, { "epoch": 0.7047260234874079, "grad_norm": 2.1170198917388916, "learning_rate": 2.11726709357069e-06, "loss": 0.3984, "step": 16975 }, { "epoch": 0.7047675390116191, "grad_norm": 2.7775633335113525, "learning_rate": 2.116717795214049e-06, "loss": 0.6575, "step": 16976 }, { "epoch": 0.7048090545358305, "grad_norm": 2.629685640335083, "learning_rate": 2.1161685489879784e-06, "loss": 0.5163, "step": 16977 }, { "epoch": 0.7048505700600418, "grad_norm": 3.738743305206299, "learning_rate": 2.115619354902416e-06, "loss": 0.4621, "step": 16978 }, { "epoch": 0.7048920855842532, "grad_norm": 2.854572296142578, "learning_rate": 2.1150702129672835e-06, "loss": 0.5455, "step": 16979 }, { "epoch": 0.7049336011084645, "grad_norm": 2.101104259490967, "learning_rate": 2.114521123192515e-06, "loss": 0.4508, "step": 16980 }, { "epoch": 0.7049751166326759, "grad_norm": 2.0787858963012695, "learning_rate": 2.113972085588036e-06, "loss": 0.4316, "step": 16981 }, { "epoch": 0.7050166321568871, "grad_norm": 2.2722890377044678, "learning_rate": 2.1134231001637733e-06, "loss": 0.5245, "step": 16982 }, { "epoch": 0.7050581476810985, "grad_norm": 3.197693347930908, "learning_rate": 2.1128741669296515e-06, "loss": 0.3874, "step": 16983 }, { "epoch": 0.7050996632053098, "grad_norm": 2.1769583225250244, "learning_rate": 2.112325285895599e-06, "loss": 0.523, "step": 16984 }, { "epoch": 0.7051411787295212, "grad_norm": 2.0952017307281494, "learning_rate": 2.1117764570715373e-06, "loss": 0.5442, "step": 16985 }, { "epoch": 0.7051826942537325, "grad_norm": 2.499178647994995, "learning_rate": 2.1112276804673896e-06, "loss": 0.45, "step": 16986 }, { "epoch": 0.7052242097779439, "grad_norm": 2.4426238536834717, "learning_rate": 2.1106789560930786e-06, "loss": 0.5016, "step": 16987 }, { "epoch": 0.7052657253021551, "grad_norm": 2.0000545978546143, "learning_rate": 2.1101302839585226e-06, "loss": 0.5663, "step": 16988 }, { "epoch": 0.7053072408263665, "grad_norm": 1.9991687536239624, "learning_rate": 2.1095816640736454e-06, "loss": 0.4981, "step": 16989 }, { "epoch": 0.7053487563505778, "grad_norm": 2.885721206665039, "learning_rate": 2.109033096448364e-06, "loss": 0.4918, "step": 16990 }, { "epoch": 0.7053902718747892, "grad_norm": 2.208416223526001, "learning_rate": 2.1084845810926003e-06, "loss": 0.6527, "step": 16991 }, { "epoch": 0.7054317873990005, "grad_norm": 2.2503604888916016, "learning_rate": 2.1079361180162657e-06, "loss": 0.3485, "step": 16992 }, { "epoch": 0.7054733029232119, "grad_norm": 2.400902509689331, "learning_rate": 2.1073877072292816e-06, "loss": 0.3483, "step": 16993 }, { "epoch": 0.7055148184474231, "grad_norm": 2.1081175804138184, "learning_rate": 2.1068393487415607e-06, "loss": 0.5309, "step": 16994 }, { "epoch": 0.7055563339716345, "grad_norm": 3.040797233581543, "learning_rate": 2.106291042563019e-06, "loss": 0.5494, "step": 16995 }, { "epoch": 0.7055978494958458, "grad_norm": 2.6222851276397705, "learning_rate": 2.1057427887035687e-06, "loss": 0.6154, "step": 16996 }, { "epoch": 0.7056393650200572, "grad_norm": 2.501980781555176, "learning_rate": 2.105194587173122e-06, "loss": 0.6384, "step": 16997 }, { "epoch": 0.7056808805442685, "grad_norm": 2.067603349685669, "learning_rate": 2.104646437981593e-06, "loss": 0.6093, "step": 16998 }, { "epoch": 0.7057223960684799, "grad_norm": 2.6567165851593018, "learning_rate": 2.1040983411388902e-06, "loss": 0.5708, "step": 16999 }, { "epoch": 0.7057639115926911, "grad_norm": 2.6899638175964355, "learning_rate": 2.103550296654925e-06, "loss": 0.5936, "step": 17000 }, { "epoch": 0.7058054271169025, "grad_norm": 2.7619082927703857, "learning_rate": 2.103002304539603e-06, "loss": 0.5873, "step": 17001 }, { "epoch": 0.7058469426411139, "grad_norm": 2.922274589538574, "learning_rate": 2.1024543648028363e-06, "loss": 0.5599, "step": 17002 }, { "epoch": 0.7058884581653252, "grad_norm": 2.4048898220062256, "learning_rate": 2.1019064774545283e-06, "loss": 0.6286, "step": 17003 }, { "epoch": 0.7059299736895366, "grad_norm": 2.3721017837524414, "learning_rate": 2.1013586425045894e-06, "loss": 0.5106, "step": 17004 }, { "epoch": 0.7059714892137479, "grad_norm": 2.5235941410064697, "learning_rate": 2.100810859962919e-06, "loss": 0.5278, "step": 17005 }, { "epoch": 0.7060130047379592, "grad_norm": 2.7182648181915283, "learning_rate": 2.100263129839425e-06, "loss": 0.5096, "step": 17006 }, { "epoch": 0.7060545202621705, "grad_norm": 2.113840341567993, "learning_rate": 2.09971545214401e-06, "loss": 0.4981, "step": 17007 }, { "epoch": 0.7060960357863819, "grad_norm": 2.3464317321777344, "learning_rate": 2.099167826886575e-06, "loss": 0.5562, "step": 17008 }, { "epoch": 0.7061375513105932, "grad_norm": 2.2247061729431152, "learning_rate": 2.0986202540770225e-06, "loss": 0.4857, "step": 17009 }, { "epoch": 0.7061790668348046, "grad_norm": 2.613884925842285, "learning_rate": 2.0980727337252495e-06, "loss": 0.42, "step": 17010 }, { "epoch": 0.7062205823590159, "grad_norm": 1.9919689893722534, "learning_rate": 2.09752526584116e-06, "loss": 0.4736, "step": 17011 }, { "epoch": 0.7062620978832272, "grad_norm": 2.3717408180236816, "learning_rate": 2.0969778504346506e-06, "loss": 0.6222, "step": 17012 }, { "epoch": 0.7063036134074385, "grad_norm": 2.4343080520629883, "learning_rate": 2.096430487515618e-06, "loss": 0.7392, "step": 17013 }, { "epoch": 0.7063451289316499, "grad_norm": 2.469613790512085, "learning_rate": 2.0958831770939575e-06, "loss": 0.5024, "step": 17014 }, { "epoch": 0.7063866444558612, "grad_norm": 2.3046698570251465, "learning_rate": 2.0953359191795676e-06, "loss": 0.5714, "step": 17015 }, { "epoch": 0.7064281599800726, "grad_norm": 2.201401948928833, "learning_rate": 2.094788713782342e-06, "loss": 0.4077, "step": 17016 }, { "epoch": 0.7064696755042839, "grad_norm": 2.1463301181793213, "learning_rate": 2.0942415609121734e-06, "loss": 0.5702, "step": 17017 }, { "epoch": 0.7065111910284952, "grad_norm": 2.1544504165649414, "learning_rate": 2.093694460578953e-06, "loss": 0.4842, "step": 17018 }, { "epoch": 0.7065527065527065, "grad_norm": 2.3162858486175537, "learning_rate": 2.093147412792577e-06, "loss": 0.4318, "step": 17019 }, { "epoch": 0.7065942220769179, "grad_norm": 2.05134916305542, "learning_rate": 2.0926004175629332e-06, "loss": 0.5128, "step": 17020 }, { "epoch": 0.7066357376011292, "grad_norm": 2.572232723236084, "learning_rate": 2.092053474899912e-06, "loss": 0.4982, "step": 17021 }, { "epoch": 0.7066772531253406, "grad_norm": 2.1427764892578125, "learning_rate": 2.091506584813402e-06, "loss": 0.536, "step": 17022 }, { "epoch": 0.7067187686495519, "grad_norm": 2.4360296726226807, "learning_rate": 2.0909597473132893e-06, "loss": 0.5878, "step": 17023 }, { "epoch": 0.7067602841737632, "grad_norm": 2.1928420066833496, "learning_rate": 2.0904129624094648e-06, "loss": 0.4203, "step": 17024 }, { "epoch": 0.7068017996979745, "grad_norm": 2.438542366027832, "learning_rate": 2.089866230111813e-06, "loss": 0.4728, "step": 17025 }, { "epoch": 0.7068433152221859, "grad_norm": 2.082178831100464, "learning_rate": 2.089319550430218e-06, "loss": 0.4955, "step": 17026 }, { "epoch": 0.7068848307463972, "grad_norm": 2.3185298442840576, "learning_rate": 2.0887729233745626e-06, "loss": 0.4655, "step": 17027 }, { "epoch": 0.7069263462706086, "grad_norm": 2.5369820594787598, "learning_rate": 2.0882263489547337e-06, "loss": 0.6122, "step": 17028 }, { "epoch": 0.7069678617948199, "grad_norm": 2.526007652282715, "learning_rate": 2.087679827180612e-06, "loss": 0.507, "step": 17029 }, { "epoch": 0.7070093773190312, "grad_norm": 2.5183231830596924, "learning_rate": 2.0871333580620786e-06, "loss": 0.6366, "step": 17030 }, { "epoch": 0.7070508928432425, "grad_norm": 2.3826403617858887, "learning_rate": 2.086586941609011e-06, "loss": 0.6405, "step": 17031 }, { "epoch": 0.7070924083674539, "grad_norm": 2.7018134593963623, "learning_rate": 2.0860405778312935e-06, "loss": 0.5054, "step": 17032 }, { "epoch": 0.7071339238916653, "grad_norm": 2.2410988807678223, "learning_rate": 2.085494266738803e-06, "loss": 0.4503, "step": 17033 }, { "epoch": 0.7071754394158766, "grad_norm": 2.4391069412231445, "learning_rate": 2.0849480083414155e-06, "loss": 0.5538, "step": 17034 }, { "epoch": 0.707216954940088, "grad_norm": 2.810157060623169, "learning_rate": 2.0844018026490075e-06, "loss": 0.7219, "step": 17035 }, { "epoch": 0.7072584704642992, "grad_norm": 2.199294328689575, "learning_rate": 2.0838556496714545e-06, "loss": 0.5902, "step": 17036 }, { "epoch": 0.7072999859885106, "grad_norm": 2.3651809692382812, "learning_rate": 2.0833095494186336e-06, "loss": 0.3666, "step": 17037 }, { "epoch": 0.7073415015127219, "grad_norm": 2.718935251235962, "learning_rate": 2.0827635019004166e-06, "loss": 0.6271, "step": 17038 }, { "epoch": 0.7073830170369333, "grad_norm": 2.3743247985839844, "learning_rate": 2.0822175071266757e-06, "loss": 0.454, "step": 17039 }, { "epoch": 0.7074245325611446, "grad_norm": 2.1008729934692383, "learning_rate": 2.0816715651072822e-06, "loss": 0.5472, "step": 17040 }, { "epoch": 0.707466048085356, "grad_norm": 2.252978563308716, "learning_rate": 2.08112567585211e-06, "loss": 0.5466, "step": 17041 }, { "epoch": 0.7075075636095672, "grad_norm": 2.621985673904419, "learning_rate": 2.0805798393710265e-06, "loss": 0.4756, "step": 17042 }, { "epoch": 0.7075490791337786, "grad_norm": 2.66461181640625, "learning_rate": 2.0800340556739006e-06, "loss": 0.5267, "step": 17043 }, { "epoch": 0.7075905946579899, "grad_norm": 2.3996121883392334, "learning_rate": 2.0794883247705994e-06, "loss": 0.415, "step": 17044 }, { "epoch": 0.7076321101822013, "grad_norm": 1.9180063009262085, "learning_rate": 2.0789426466709924e-06, "loss": 0.4961, "step": 17045 }, { "epoch": 0.7076736257064126, "grad_norm": 2.4131500720977783, "learning_rate": 2.0783970213849443e-06, "loss": 0.6922, "step": 17046 }, { "epoch": 0.707715141230624, "grad_norm": 2.8085033893585205, "learning_rate": 2.0778514489223207e-06, "loss": 0.3801, "step": 17047 }, { "epoch": 0.7077566567548352, "grad_norm": 2.597095251083374, "learning_rate": 2.0773059292929847e-06, "loss": 0.4839, "step": 17048 }, { "epoch": 0.7077981722790466, "grad_norm": 2.66654896736145, "learning_rate": 2.076760462506798e-06, "loss": 0.5139, "step": 17049 }, { "epoch": 0.7078396878032579, "grad_norm": 2.4190568923950195, "learning_rate": 2.0762150485736265e-06, "loss": 0.4185, "step": 17050 }, { "epoch": 0.7078812033274693, "grad_norm": 2.5847198963165283, "learning_rate": 2.07566968750333e-06, "loss": 0.5363, "step": 17051 }, { "epoch": 0.7079227188516806, "grad_norm": 2.6986451148986816, "learning_rate": 2.0751243793057686e-06, "loss": 0.51, "step": 17052 }, { "epoch": 0.707964234375892, "grad_norm": 2.366703510284424, "learning_rate": 2.074579123990799e-06, "loss": 0.45, "step": 17053 }, { "epoch": 0.7080057499001032, "grad_norm": 3.0337979793548584, "learning_rate": 2.0740339215682842e-06, "loss": 0.4941, "step": 17054 }, { "epoch": 0.7080472654243146, "grad_norm": 2.4636528491973877, "learning_rate": 2.0734887720480793e-06, "loss": 0.5467, "step": 17055 }, { "epoch": 0.7080887809485259, "grad_norm": 2.4707555770874023, "learning_rate": 2.072943675440041e-06, "loss": 0.4114, "step": 17056 }, { "epoch": 0.7081302964727373, "grad_norm": 2.0694282054901123, "learning_rate": 2.0723986317540233e-06, "loss": 0.421, "step": 17057 }, { "epoch": 0.7081718119969486, "grad_norm": 2.309983015060425, "learning_rate": 2.0718536409998834e-06, "loss": 0.4419, "step": 17058 }, { "epoch": 0.70821332752116, "grad_norm": 1.9822025299072266, "learning_rate": 2.071308703187474e-06, "loss": 0.4885, "step": 17059 }, { "epoch": 0.7082548430453712, "grad_norm": 2.03800630569458, "learning_rate": 2.0707638183266476e-06, "loss": 0.338, "step": 17060 }, { "epoch": 0.7082963585695826, "grad_norm": 2.7852046489715576, "learning_rate": 2.0702189864272554e-06, "loss": 0.5688, "step": 17061 }, { "epoch": 0.7083378740937939, "grad_norm": 2.5821990966796875, "learning_rate": 2.069674207499146e-06, "loss": 0.5496, "step": 17062 }, { "epoch": 0.7083793896180053, "grad_norm": 2.9595582485198975, "learning_rate": 2.069129481552174e-06, "loss": 0.4753, "step": 17063 }, { "epoch": 0.7084209051422167, "grad_norm": 2.4677205085754395, "learning_rate": 2.0685848085961858e-06, "loss": 0.414, "step": 17064 }, { "epoch": 0.708462420666428, "grad_norm": 2.4050185680389404, "learning_rate": 2.068040188641029e-06, "loss": 0.5465, "step": 17065 }, { "epoch": 0.7085039361906393, "grad_norm": 2.4413793087005615, "learning_rate": 2.0674956216965484e-06, "loss": 0.4065, "step": 17066 }, { "epoch": 0.7085454517148506, "grad_norm": 2.7950217723846436, "learning_rate": 2.0669511077725945e-06, "loss": 0.5317, "step": 17067 }, { "epoch": 0.708586967239062, "grad_norm": 2.3861289024353027, "learning_rate": 2.06640664687901e-06, "loss": 0.57, "step": 17068 }, { "epoch": 0.7086284827632733, "grad_norm": 2.5970141887664795, "learning_rate": 2.0658622390256388e-06, "loss": 0.4701, "step": 17069 }, { "epoch": 0.7086699982874847, "grad_norm": 2.582204580307007, "learning_rate": 2.0653178842223225e-06, "loss": 0.5926, "step": 17070 }, { "epoch": 0.708711513811696, "grad_norm": 2.2745916843414307, "learning_rate": 2.0647735824789063e-06, "loss": 0.4557, "step": 17071 }, { "epoch": 0.7087530293359073, "grad_norm": 3.281266450881958, "learning_rate": 2.0642293338052294e-06, "loss": 0.6021, "step": 17072 }, { "epoch": 0.7087945448601186, "grad_norm": 2.2620596885681152, "learning_rate": 2.0636851382111326e-06, "loss": 0.4015, "step": 17073 }, { "epoch": 0.70883606038433, "grad_norm": 2.666918992996216, "learning_rate": 2.063140995706455e-06, "loss": 0.5199, "step": 17074 }, { "epoch": 0.7088775759085413, "grad_norm": 2.1705362796783447, "learning_rate": 2.0625969063010325e-06, "loss": 0.4855, "step": 17075 }, { "epoch": 0.7089190914327527, "grad_norm": 2.2392666339874268, "learning_rate": 2.062052870004707e-06, "loss": 0.5547, "step": 17076 }, { "epoch": 0.708960606956964, "grad_norm": 2.4477579593658447, "learning_rate": 2.061508886827312e-06, "loss": 0.3894, "step": 17077 }, { "epoch": 0.7090021224811753, "grad_norm": 2.408585548400879, "learning_rate": 2.060964956778683e-06, "loss": 0.5114, "step": 17078 }, { "epoch": 0.7090436380053866, "grad_norm": 2.7214949131011963, "learning_rate": 2.0604210798686536e-06, "loss": 0.5892, "step": 17079 }, { "epoch": 0.709085153529598, "grad_norm": 2.6692769527435303, "learning_rate": 2.0598772561070595e-06, "loss": 0.5222, "step": 17080 }, { "epoch": 0.7091266690538093, "grad_norm": 2.2839019298553467, "learning_rate": 2.0593334855037323e-06, "loss": 0.6387, "step": 17081 }, { "epoch": 0.7091681845780207, "grad_norm": 2.5811948776245117, "learning_rate": 2.0587897680685036e-06, "loss": 0.4798, "step": 17082 }, { "epoch": 0.7092097001022319, "grad_norm": 2.1640467643737793, "learning_rate": 2.058246103811202e-06, "loss": 0.3955, "step": 17083 }, { "epoch": 0.7092512156264433, "grad_norm": 2.1346611976623535, "learning_rate": 2.0577024927416595e-06, "loss": 0.5036, "step": 17084 }, { "epoch": 0.7092927311506546, "grad_norm": 2.213381052017212, "learning_rate": 2.0571589348697045e-06, "loss": 0.4979, "step": 17085 }, { "epoch": 0.709334246674866, "grad_norm": 2.843703508377075, "learning_rate": 2.0566154302051643e-06, "loss": 0.4279, "step": 17086 }, { "epoch": 0.7093757621990773, "grad_norm": 2.5047614574432373, "learning_rate": 2.0560719787578652e-06, "loss": 0.4799, "step": 17087 }, { "epoch": 0.7094172777232887, "grad_norm": 2.4227097034454346, "learning_rate": 2.0555285805376325e-06, "loss": 0.6121, "step": 17088 }, { "epoch": 0.7094587932474999, "grad_norm": 2.5229318141937256, "learning_rate": 2.0549852355542926e-06, "loss": 0.491, "step": 17089 }, { "epoch": 0.7095003087717113, "grad_norm": 2.057772397994995, "learning_rate": 2.0544419438176684e-06, "loss": 0.3991, "step": 17090 }, { "epoch": 0.7095418242959226, "grad_norm": 2.0869686603546143, "learning_rate": 2.053898705337583e-06, "loss": 0.5782, "step": 17091 }, { "epoch": 0.709583339820134, "grad_norm": 2.336604595184326, "learning_rate": 2.0533555201238562e-06, "loss": 0.4554, "step": 17092 }, { "epoch": 0.7096248553443453, "grad_norm": 1.9932327270507812, "learning_rate": 2.052812388186313e-06, "loss": 0.515, "step": 17093 }, { "epoch": 0.7096663708685567, "grad_norm": 2.5729892253875732, "learning_rate": 2.052269309534772e-06, "loss": 0.4323, "step": 17094 }, { "epoch": 0.709707886392768, "grad_norm": 2.214475154876709, "learning_rate": 2.0517262841790503e-06, "loss": 0.4349, "step": 17095 }, { "epoch": 0.7097494019169793, "grad_norm": 2.3027262687683105, "learning_rate": 2.051183312128966e-06, "loss": 0.4876, "step": 17096 }, { "epoch": 0.7097909174411907, "grad_norm": 2.075063943862915, "learning_rate": 2.0506403933943394e-06, "loss": 0.6081, "step": 17097 }, { "epoch": 0.709832432965402, "grad_norm": 2.3478147983551025, "learning_rate": 2.050097527984984e-06, "loss": 0.4491, "step": 17098 }, { "epoch": 0.7098739484896134, "grad_norm": 3.4938623905181885, "learning_rate": 2.0495547159107154e-06, "loss": 0.4256, "step": 17099 }, { "epoch": 0.7099154640138247, "grad_norm": 3.4364895820617676, "learning_rate": 2.0490119571813488e-06, "loss": 0.6695, "step": 17100 }, { "epoch": 0.709956979538036, "grad_norm": 2.0944101810455322, "learning_rate": 2.048469251806694e-06, "loss": 0.378, "step": 17101 }, { "epoch": 0.7099984950622473, "grad_norm": 2.2019405364990234, "learning_rate": 2.047926599796568e-06, "loss": 0.7399, "step": 17102 }, { "epoch": 0.7100400105864587, "grad_norm": 2.3739233016967773, "learning_rate": 2.04738400116078e-06, "loss": 0.412, "step": 17103 }, { "epoch": 0.71008152611067, "grad_norm": 2.390350103378296, "learning_rate": 2.0468414559091397e-06, "loss": 0.5531, "step": 17104 }, { "epoch": 0.7101230416348814, "grad_norm": 2.008477210998535, "learning_rate": 2.0462989640514562e-06, "loss": 0.4381, "step": 17105 }, { "epoch": 0.7101645571590927, "grad_norm": 2.0945329666137695, "learning_rate": 2.04575652559754e-06, "loss": 0.4622, "step": 17106 }, { "epoch": 0.710206072683304, "grad_norm": 2.4184157848358154, "learning_rate": 2.0452141405571973e-06, "loss": 0.4753, "step": 17107 }, { "epoch": 0.7102475882075153, "grad_norm": 3.2774438858032227, "learning_rate": 2.0446718089402353e-06, "loss": 0.5295, "step": 17108 }, { "epoch": 0.7102891037317267, "grad_norm": 2.3337419033050537, "learning_rate": 2.044129530756456e-06, "loss": 0.3897, "step": 17109 }, { "epoch": 0.710330619255938, "grad_norm": 2.3808610439300537, "learning_rate": 2.043587306015669e-06, "loss": 0.5644, "step": 17110 }, { "epoch": 0.7103721347801494, "grad_norm": 2.522294282913208, "learning_rate": 2.043045134727676e-06, "loss": 0.4765, "step": 17111 }, { "epoch": 0.7104136503043607, "grad_norm": 1.8196630477905273, "learning_rate": 2.0425030169022784e-06, "loss": 0.4349, "step": 17112 }, { "epoch": 0.710455165828572, "grad_norm": 2.755478858947754, "learning_rate": 2.0419609525492796e-06, "loss": 0.4813, "step": 17113 }, { "epoch": 0.7104966813527833, "grad_norm": 2.3158798217773438, "learning_rate": 2.041418941678477e-06, "loss": 0.5084, "step": 17114 }, { "epoch": 0.7105381968769947, "grad_norm": 2.1789097785949707, "learning_rate": 2.0408769842996745e-06, "loss": 0.4566, "step": 17115 }, { "epoch": 0.710579712401206, "grad_norm": 3.18285870552063, "learning_rate": 2.0403350804226694e-06, "loss": 0.4962, "step": 17116 }, { "epoch": 0.7106212279254174, "grad_norm": 2.2217323780059814, "learning_rate": 2.0397932300572587e-06, "loss": 0.6021, "step": 17117 }, { "epoch": 0.7106627434496287, "grad_norm": 2.8940768241882324, "learning_rate": 2.039251433213238e-06, "loss": 0.5709, "step": 17118 }, { "epoch": 0.71070425897384, "grad_norm": 1.9743884801864624, "learning_rate": 2.0387096899004066e-06, "loss": 0.4541, "step": 17119 }, { "epoch": 0.7107457744980513, "grad_norm": 2.2878379821777344, "learning_rate": 2.0381680001285574e-06, "loss": 0.411, "step": 17120 }, { "epoch": 0.7107872900222627, "grad_norm": 2.6803011894226074, "learning_rate": 2.037626363907485e-06, "loss": 0.4434, "step": 17121 }, { "epoch": 0.710828805546474, "grad_norm": 2.3263895511627197, "learning_rate": 2.0370847812469797e-06, "loss": 0.4884, "step": 17122 }, { "epoch": 0.7108703210706854, "grad_norm": 2.2309584617614746, "learning_rate": 2.0365432521568367e-06, "loss": 0.5356, "step": 17123 }, { "epoch": 0.7109118365948967, "grad_norm": 2.2939388751983643, "learning_rate": 2.0360017766468466e-06, "loss": 0.4821, "step": 17124 }, { "epoch": 0.710953352119108, "grad_norm": 2.2443618774414062, "learning_rate": 2.0354603547267985e-06, "loss": 0.3853, "step": 17125 }, { "epoch": 0.7109948676433194, "grad_norm": 2.403244733810425, "learning_rate": 2.0349189864064816e-06, "loss": 0.5742, "step": 17126 }, { "epoch": 0.7110363831675307, "grad_norm": 2.4926068782806396, "learning_rate": 2.0343776716956825e-06, "loss": 0.5209, "step": 17127 }, { "epoch": 0.7110778986917421, "grad_norm": 2.4673802852630615, "learning_rate": 2.033836410604191e-06, "loss": 0.5127, "step": 17128 }, { "epoch": 0.7111194142159534, "grad_norm": 2.4110047817230225, "learning_rate": 2.033295203141793e-06, "loss": 0.5128, "step": 17129 }, { "epoch": 0.7111609297401648, "grad_norm": 2.1272940635681152, "learning_rate": 2.0327540493182725e-06, "loss": 0.3558, "step": 17130 }, { "epoch": 0.711202445264376, "grad_norm": 2.4938080310821533, "learning_rate": 2.032212949143412e-06, "loss": 0.5715, "step": 17131 }, { "epoch": 0.7112439607885874, "grad_norm": 2.3578715324401855, "learning_rate": 2.0316719026269995e-06, "loss": 0.5182, "step": 17132 }, { "epoch": 0.7112854763127987, "grad_norm": 1.9486500024795532, "learning_rate": 2.0311309097788135e-06, "loss": 0.4357, "step": 17133 }, { "epoch": 0.7113269918370101, "grad_norm": 2.6107523441314697, "learning_rate": 2.0305899706086367e-06, "loss": 0.4885, "step": 17134 }, { "epoch": 0.7113685073612214, "grad_norm": 2.582995891571045, "learning_rate": 2.0300490851262474e-06, "loss": 0.4142, "step": 17135 }, { "epoch": 0.7114100228854328, "grad_norm": 2.5086052417755127, "learning_rate": 2.0295082533414284e-06, "loss": 0.529, "step": 17136 }, { "epoch": 0.711451538409644, "grad_norm": 2.8023252487182617, "learning_rate": 2.028967475263956e-06, "loss": 0.5172, "step": 17137 }, { "epoch": 0.7114930539338554, "grad_norm": 2.4796361923217773, "learning_rate": 2.0284267509036086e-06, "loss": 0.5984, "step": 17138 }, { "epoch": 0.7115345694580667, "grad_norm": 2.708012104034424, "learning_rate": 2.0278860802701616e-06, "loss": 0.569, "step": 17139 }, { "epoch": 0.7115760849822781, "grad_norm": 2.216989755630493, "learning_rate": 2.02734546337339e-06, "loss": 0.4612, "step": 17140 }, { "epoch": 0.7116176005064894, "grad_norm": 2.4267795085906982, "learning_rate": 2.02680490022307e-06, "loss": 0.4955, "step": 17141 }, { "epoch": 0.7116591160307008, "grad_norm": 2.1651015281677246, "learning_rate": 2.026264390828975e-06, "loss": 0.4155, "step": 17142 }, { "epoch": 0.711700631554912, "grad_norm": 2.280059576034546, "learning_rate": 2.025723935200876e-06, "loss": 0.5387, "step": 17143 }, { "epoch": 0.7117421470791234, "grad_norm": 2.685525417327881, "learning_rate": 2.0251835333485447e-06, "loss": 0.5162, "step": 17144 }, { "epoch": 0.7117836626033347, "grad_norm": 2.775129795074463, "learning_rate": 2.024643185281754e-06, "loss": 0.5882, "step": 17145 }, { "epoch": 0.7118251781275461, "grad_norm": 2.266422986984253, "learning_rate": 2.024102891010272e-06, "loss": 0.5417, "step": 17146 }, { "epoch": 0.7118666936517574, "grad_norm": 2.337285280227661, "learning_rate": 2.023562650543868e-06, "loss": 0.5146, "step": 17147 }, { "epoch": 0.7119082091759688, "grad_norm": 2.212172269821167, "learning_rate": 2.0230224638923067e-06, "loss": 0.4588, "step": 17148 }, { "epoch": 0.71194972470018, "grad_norm": 1.9930260181427002, "learning_rate": 2.0224823310653597e-06, "loss": 0.3908, "step": 17149 }, { "epoch": 0.7119912402243914, "grad_norm": 2.389634370803833, "learning_rate": 2.02194225207279e-06, "loss": 0.3703, "step": 17150 }, { "epoch": 0.7120327557486027, "grad_norm": 2.512838363647461, "learning_rate": 2.021402226924363e-06, "loss": 0.4799, "step": 17151 }, { "epoch": 0.7120742712728141, "grad_norm": 2.8344249725341797, "learning_rate": 2.020862255629842e-06, "loss": 0.6323, "step": 17152 }, { "epoch": 0.7121157867970254, "grad_norm": 2.6881768703460693, "learning_rate": 2.0203223381989882e-06, "loss": 0.6141, "step": 17153 }, { "epoch": 0.7121573023212368, "grad_norm": 1.995654582977295, "learning_rate": 2.019782474641567e-06, "loss": 0.3965, "step": 17154 }, { "epoch": 0.712198817845448, "grad_norm": 2.754115343093872, "learning_rate": 2.0192426649673386e-06, "loss": 0.4469, "step": 17155 }, { "epoch": 0.7122403333696594, "grad_norm": 3.177700996398926, "learning_rate": 2.018702909186061e-06, "loss": 0.5619, "step": 17156 }, { "epoch": 0.7122818488938708, "grad_norm": 2.3600523471832275, "learning_rate": 2.0181632073074925e-06, "loss": 0.5179, "step": 17157 }, { "epoch": 0.7123233644180821, "grad_norm": 2.670262336730957, "learning_rate": 2.017623559341395e-06, "loss": 0.4857, "step": 17158 }, { "epoch": 0.7123648799422935, "grad_norm": 2.479306697845459, "learning_rate": 2.0170839652975227e-06, "loss": 0.5837, "step": 17159 }, { "epoch": 0.7124063954665047, "grad_norm": 2.7119462490081787, "learning_rate": 2.016544425185632e-06, "loss": 0.4586, "step": 17160 }, { "epoch": 0.7124479109907161, "grad_norm": 2.1617069244384766, "learning_rate": 2.0160049390154766e-06, "loss": 0.6564, "step": 17161 }, { "epoch": 0.7124894265149274, "grad_norm": 2.6200225353240967, "learning_rate": 2.0154655067968137e-06, "loss": 0.6151, "step": 17162 }, { "epoch": 0.7125309420391388, "grad_norm": 2.526766777038574, "learning_rate": 2.0149261285393947e-06, "loss": 0.6857, "step": 17163 }, { "epoch": 0.7125724575633501, "grad_norm": 2.0682590007781982, "learning_rate": 2.014386804252971e-06, "loss": 0.529, "step": 17164 }, { "epoch": 0.7126139730875615, "grad_norm": 2.293175220489502, "learning_rate": 2.013847533947295e-06, "loss": 0.5156, "step": 17165 }, { "epoch": 0.7126554886117727, "grad_norm": 2.0045077800750732, "learning_rate": 2.0133083176321143e-06, "loss": 0.3421, "step": 17166 }, { "epoch": 0.7126970041359841, "grad_norm": 2.032268762588501, "learning_rate": 2.0127691553171825e-06, "loss": 0.3864, "step": 17167 }, { "epoch": 0.7127385196601954, "grad_norm": 2.3344531059265137, "learning_rate": 2.0122300470122445e-06, "loss": 0.3712, "step": 17168 }, { "epoch": 0.7127800351844068, "grad_norm": 2.054323196411133, "learning_rate": 2.011690992727049e-06, "loss": 0.4305, "step": 17169 }, { "epoch": 0.7128215507086181, "grad_norm": 2.3634703159332275, "learning_rate": 2.0111519924713398e-06, "loss": 0.6455, "step": 17170 }, { "epoch": 0.7128630662328295, "grad_norm": 2.186150312423706, "learning_rate": 2.010613046254866e-06, "loss": 0.6072, "step": 17171 }, { "epoch": 0.7129045817570407, "grad_norm": 2.2482006549835205, "learning_rate": 2.010074154087369e-06, "loss": 0.434, "step": 17172 }, { "epoch": 0.7129460972812521, "grad_norm": 2.2291243076324463, "learning_rate": 2.009535315978594e-06, "loss": 0.5621, "step": 17173 }, { "epoch": 0.7129876128054634, "grad_norm": 2.1578617095947266, "learning_rate": 2.008996531938281e-06, "loss": 0.4426, "step": 17174 }, { "epoch": 0.7130291283296748, "grad_norm": 2.219024896621704, "learning_rate": 2.0084578019761738e-06, "loss": 0.5126, "step": 17175 }, { "epoch": 0.7130706438538861, "grad_norm": 2.0048587322235107, "learning_rate": 2.007919126102012e-06, "loss": 0.4842, "step": 17176 }, { "epoch": 0.7131121593780975, "grad_norm": 2.3925490379333496, "learning_rate": 2.0073805043255347e-06, "loss": 0.5398, "step": 17177 }, { "epoch": 0.7131536749023087, "grad_norm": 2.204042911529541, "learning_rate": 2.0068419366564807e-06, "loss": 0.4885, "step": 17178 }, { "epoch": 0.7131951904265201, "grad_norm": 2.5775744915008545, "learning_rate": 2.0063034231045853e-06, "loss": 0.4888, "step": 17179 }, { "epoch": 0.7132367059507314, "grad_norm": 2.488461971282959, "learning_rate": 2.0057649636795885e-06, "loss": 0.5051, "step": 17180 }, { "epoch": 0.7132782214749428, "grad_norm": 2.7920053005218506, "learning_rate": 2.0052265583912245e-06, "loss": 0.6322, "step": 17181 }, { "epoch": 0.7133197369991541, "grad_norm": 2.231252431869507, "learning_rate": 2.0046882072492273e-06, "loss": 0.4484, "step": 17182 }, { "epoch": 0.7133612525233655, "grad_norm": 2.5267086029052734, "learning_rate": 2.004149910263329e-06, "loss": 0.4053, "step": 17183 }, { "epoch": 0.7134027680475767, "grad_norm": 2.2077789306640625, "learning_rate": 2.0036116674432653e-06, "loss": 0.5298, "step": 17184 }, { "epoch": 0.7134442835717881, "grad_norm": 2.4323980808258057, "learning_rate": 2.0030734787987665e-06, "loss": 0.6293, "step": 17185 }, { "epoch": 0.7134857990959994, "grad_norm": 2.1411609649658203, "learning_rate": 2.0025353443395634e-06, "loss": 0.6473, "step": 17186 }, { "epoch": 0.7135273146202108, "grad_norm": 2.363048791885376, "learning_rate": 2.0019972640753833e-06, "loss": 0.5318, "step": 17187 }, { "epoch": 0.7135688301444222, "grad_norm": 2.4013397693634033, "learning_rate": 2.001459238015958e-06, "loss": 0.4939, "step": 17188 }, { "epoch": 0.7136103456686335, "grad_norm": 2.535884141921997, "learning_rate": 2.0009212661710143e-06, "loss": 0.6338, "step": 17189 }, { "epoch": 0.7136518611928448, "grad_norm": 2.195796489715576, "learning_rate": 2.000383348550279e-06, "loss": 0.3576, "step": 17190 }, { "epoch": 0.7136933767170561, "grad_norm": 2.168041229248047, "learning_rate": 1.999845485163477e-06, "loss": 0.4555, "step": 17191 }, { "epoch": 0.7137348922412675, "grad_norm": 2.249861240386963, "learning_rate": 1.9993076760203313e-06, "loss": 0.4031, "step": 17192 }, { "epoch": 0.7137764077654788, "grad_norm": 1.8987942934036255, "learning_rate": 1.9987699211305696e-06, "loss": 0.3613, "step": 17193 }, { "epoch": 0.7138179232896902, "grad_norm": 2.504005193710327, "learning_rate": 1.998232220503913e-06, "loss": 0.5924, "step": 17194 }, { "epoch": 0.7138594388139015, "grad_norm": 2.0964813232421875, "learning_rate": 1.9976945741500823e-06, "loss": 0.4015, "step": 17195 }, { "epoch": 0.7139009543381128, "grad_norm": 2.5127673149108887, "learning_rate": 1.9971569820787983e-06, "loss": 0.511, "step": 17196 }, { "epoch": 0.7139424698623241, "grad_norm": 2.389709234237671, "learning_rate": 1.9966194442997826e-06, "loss": 0.483, "step": 17197 }, { "epoch": 0.7139839853865355, "grad_norm": 2.064831256866455, "learning_rate": 1.996081960822754e-06, "loss": 0.6352, "step": 17198 }, { "epoch": 0.7140255009107468, "grad_norm": 2.986963987350464, "learning_rate": 1.9955445316574285e-06, "loss": 0.6038, "step": 17199 }, { "epoch": 0.7140670164349582, "grad_norm": 2.160865545272827, "learning_rate": 1.9950071568135223e-06, "loss": 0.4918, "step": 17200 }, { "epoch": 0.7141085319591695, "grad_norm": 2.2951955795288086, "learning_rate": 1.9944698363007547e-06, "loss": 0.4322, "step": 17201 }, { "epoch": 0.7141500474833808, "grad_norm": 1.975498914718628, "learning_rate": 1.9939325701288387e-06, "loss": 0.3522, "step": 17202 }, { "epoch": 0.7141915630075921, "grad_norm": 2.4680488109588623, "learning_rate": 1.993395358307488e-06, "loss": 0.4951, "step": 17203 }, { "epoch": 0.7142330785318035, "grad_norm": 1.9321495294570923, "learning_rate": 1.992858200846416e-06, "loss": 0.4993, "step": 17204 }, { "epoch": 0.7142745940560148, "grad_norm": 2.0467922687530518, "learning_rate": 1.9923210977553325e-06, "loss": 0.5761, "step": 17205 }, { "epoch": 0.7143161095802262, "grad_norm": 2.351437568664551, "learning_rate": 1.9917840490439527e-06, "loss": 0.4914, "step": 17206 }, { "epoch": 0.7143576251044375, "grad_norm": 2.716618061065674, "learning_rate": 1.991247054721984e-06, "loss": 0.4466, "step": 17207 }, { "epoch": 0.7143991406286488, "grad_norm": 2.0066959857940674, "learning_rate": 1.990710114799136e-06, "loss": 0.5049, "step": 17208 }, { "epoch": 0.7144406561528601, "grad_norm": 2.3264007568359375, "learning_rate": 1.9901732292851145e-06, "loss": 0.4529, "step": 17209 }, { "epoch": 0.7144821716770715, "grad_norm": 3.12416410446167, "learning_rate": 1.9896363981896304e-06, "loss": 0.4697, "step": 17210 }, { "epoch": 0.7145236872012828, "grad_norm": 2.1219875812530518, "learning_rate": 1.9890996215223885e-06, "loss": 0.5028, "step": 17211 }, { "epoch": 0.7145652027254942, "grad_norm": 2.867371082305908, "learning_rate": 1.988562899293092e-06, "loss": 0.5292, "step": 17212 }, { "epoch": 0.7146067182497055, "grad_norm": 2.53548002243042, "learning_rate": 1.9880262315114455e-06, "loss": 0.6025, "step": 17213 }, { "epoch": 0.7146482337739168, "grad_norm": 2.4324562549591064, "learning_rate": 1.9874896181871543e-06, "loss": 0.5192, "step": 17214 }, { "epoch": 0.7146897492981281, "grad_norm": 2.811663866043091, "learning_rate": 1.9869530593299196e-06, "loss": 0.5224, "step": 17215 }, { "epoch": 0.7147312648223395, "grad_norm": 2.296388864517212, "learning_rate": 1.986416554949441e-06, "loss": 0.4583, "step": 17216 }, { "epoch": 0.7147727803465508, "grad_norm": 2.1143620014190674, "learning_rate": 1.9858801050554206e-06, "loss": 0.5148, "step": 17217 }, { "epoch": 0.7148142958707622, "grad_norm": 2.373655319213867, "learning_rate": 1.9853437096575546e-06, "loss": 0.6217, "step": 17218 }, { "epoch": 0.7148558113949736, "grad_norm": 2.4977900981903076, "learning_rate": 1.984807368765545e-06, "loss": 0.7009, "step": 17219 }, { "epoch": 0.7148973269191848, "grad_norm": 2.269503116607666, "learning_rate": 1.9842710823890877e-06, "loss": 0.4699, "step": 17220 }, { "epoch": 0.7149388424433962, "grad_norm": 2.2723002433776855, "learning_rate": 1.983734850537878e-06, "loss": 0.3539, "step": 17221 }, { "epoch": 0.7149803579676075, "grad_norm": 2.383136034011841, "learning_rate": 1.9831986732216097e-06, "loss": 0.6819, "step": 17222 }, { "epoch": 0.7150218734918189, "grad_norm": 2.6105234622955322, "learning_rate": 1.9826625504499807e-06, "loss": 0.6835, "step": 17223 }, { "epoch": 0.7150633890160302, "grad_norm": 2.3477845191955566, "learning_rate": 1.9821264822326823e-06, "loss": 0.4504, "step": 17224 }, { "epoch": 0.7151049045402416, "grad_norm": 2.226283550262451, "learning_rate": 1.9815904685794073e-06, "loss": 0.564, "step": 17225 }, { "epoch": 0.7151464200644528, "grad_norm": 2.435492992401123, "learning_rate": 1.981054509499845e-06, "loss": 0.4783, "step": 17226 }, { "epoch": 0.7151879355886642, "grad_norm": 2.585981845855713, "learning_rate": 1.980518605003689e-06, "loss": 0.5823, "step": 17227 }, { "epoch": 0.7152294511128755, "grad_norm": 2.2001209259033203, "learning_rate": 1.9799827551006266e-06, "loss": 0.504, "step": 17228 }, { "epoch": 0.7152709666370869, "grad_norm": 2.2952752113342285, "learning_rate": 1.979446959800347e-06, "loss": 0.4978, "step": 17229 }, { "epoch": 0.7153124821612982, "grad_norm": 2.6039328575134277, "learning_rate": 1.9789112191125363e-06, "loss": 0.4992, "step": 17230 }, { "epoch": 0.7153539976855096, "grad_norm": 2.6225786209106445, "learning_rate": 1.97837553304688e-06, "loss": 0.615, "step": 17231 }, { "epoch": 0.7153955132097208, "grad_norm": 2.944363594055176, "learning_rate": 1.9778399016130674e-06, "loss": 0.4867, "step": 17232 }, { "epoch": 0.7154370287339322, "grad_norm": 2.3582615852355957, "learning_rate": 1.9773043248207797e-06, "loss": 0.56, "step": 17233 }, { "epoch": 0.7154785442581435, "grad_norm": 2.2815465927124023, "learning_rate": 1.9767688026797016e-06, "loss": 0.4036, "step": 17234 }, { "epoch": 0.7155200597823549, "grad_norm": 2.4958555698394775, "learning_rate": 1.976233335199513e-06, "loss": 0.4796, "step": 17235 }, { "epoch": 0.7155615753065662, "grad_norm": 2.0777127742767334, "learning_rate": 1.975697922389899e-06, "loss": 0.4699, "step": 17236 }, { "epoch": 0.7156030908307776, "grad_norm": 1.8579860925674438, "learning_rate": 1.9751625642605375e-06, "loss": 0.4354, "step": 17237 }, { "epoch": 0.7156446063549888, "grad_norm": 2.482201099395752, "learning_rate": 1.9746272608211094e-06, "loss": 0.4205, "step": 17238 }, { "epoch": 0.7156861218792002, "grad_norm": 2.769209623336792, "learning_rate": 1.9740920120812907e-06, "loss": 0.451, "step": 17239 }, { "epoch": 0.7157276374034115, "grad_norm": 2.438891887664795, "learning_rate": 1.9735568180507617e-06, "loss": 0.5203, "step": 17240 }, { "epoch": 0.7157691529276229, "grad_norm": 2.020336151123047, "learning_rate": 1.9730216787391985e-06, "loss": 0.4423, "step": 17241 }, { "epoch": 0.7158106684518342, "grad_norm": 2.0996835231781006, "learning_rate": 1.972486594156276e-06, "loss": 0.4447, "step": 17242 }, { "epoch": 0.7158521839760456, "grad_norm": 2.322690725326538, "learning_rate": 1.971951564311668e-06, "loss": 0.4091, "step": 17243 }, { "epoch": 0.7158936995002568, "grad_norm": 1.9977222681045532, "learning_rate": 1.9714165892150467e-06, "loss": 0.584, "step": 17244 }, { "epoch": 0.7159352150244682, "grad_norm": 1.892353892326355, "learning_rate": 1.9708816688760883e-06, "loss": 0.5162, "step": 17245 }, { "epoch": 0.7159767305486795, "grad_norm": 2.427893877029419, "learning_rate": 1.9703468033044627e-06, "loss": 0.4161, "step": 17246 }, { "epoch": 0.7160182460728909, "grad_norm": 2.5607104301452637, "learning_rate": 1.9698119925098398e-06, "loss": 0.453, "step": 17247 }, { "epoch": 0.7160597615971022, "grad_norm": 2.779280424118042, "learning_rate": 1.9692772365018877e-06, "loss": 0.4746, "step": 17248 }, { "epoch": 0.7161012771213136, "grad_norm": 2.2861852645874023, "learning_rate": 1.9687425352902783e-06, "loss": 0.6127, "step": 17249 }, { "epoch": 0.7161427926455249, "grad_norm": 2.4216761589050293, "learning_rate": 1.968207888884678e-06, "loss": 0.6816, "step": 17250 }, { "epoch": 0.7161843081697362, "grad_norm": 2.6139650344848633, "learning_rate": 1.9676732972947525e-06, "loss": 0.3775, "step": 17251 }, { "epoch": 0.7162258236939476, "grad_norm": 2.312570571899414, "learning_rate": 1.967138760530166e-06, "loss": 0.4317, "step": 17252 }, { "epoch": 0.7162673392181589, "grad_norm": 2.233936071395874, "learning_rate": 1.9666042786005866e-06, "loss": 0.5184, "step": 17253 }, { "epoch": 0.7163088547423703, "grad_norm": 2.9175546169281006, "learning_rate": 1.9660698515156767e-06, "loss": 0.5353, "step": 17254 }, { "epoch": 0.7163503702665815, "grad_norm": 2.528903007507324, "learning_rate": 1.9655354792850983e-06, "loss": 0.4866, "step": 17255 }, { "epoch": 0.7163918857907929, "grad_norm": 2.6982009410858154, "learning_rate": 1.965001161918513e-06, "loss": 0.4899, "step": 17256 }, { "epoch": 0.7164334013150042, "grad_norm": 2.0825960636138916, "learning_rate": 1.9644668994255793e-06, "loss": 0.6237, "step": 17257 }, { "epoch": 0.7164749168392156, "grad_norm": 2.018723487854004, "learning_rate": 1.9639326918159613e-06, "loss": 0.4927, "step": 17258 }, { "epoch": 0.7165164323634269, "grad_norm": 2.243579864501953, "learning_rate": 1.963398539099315e-06, "loss": 0.5356, "step": 17259 }, { "epoch": 0.7165579478876383, "grad_norm": 2.313175916671753, "learning_rate": 1.962864441285298e-06, "loss": 0.5231, "step": 17260 }, { "epoch": 0.7165994634118495, "grad_norm": 1.9901759624481201, "learning_rate": 1.9623303983835664e-06, "loss": 0.4386, "step": 17261 }, { "epoch": 0.7166409789360609, "grad_norm": 2.3199174404144287, "learning_rate": 1.961796410403778e-06, "loss": 0.4698, "step": 17262 }, { "epoch": 0.7166824944602722, "grad_norm": 2.188309907913208, "learning_rate": 1.961262477355586e-06, "loss": 0.4381, "step": 17263 }, { "epoch": 0.7167240099844836, "grad_norm": 2.6888420581817627, "learning_rate": 1.9607285992486446e-06, "loss": 0.4752, "step": 17264 }, { "epoch": 0.7167655255086949, "grad_norm": 2.842862129211426, "learning_rate": 1.9601947760926044e-06, "loss": 0.6559, "step": 17265 }, { "epoch": 0.7168070410329063, "grad_norm": 2.475370407104492, "learning_rate": 1.9596610078971203e-06, "loss": 0.6374, "step": 17266 }, { "epoch": 0.7168485565571175, "grad_norm": 2.682514190673828, "learning_rate": 1.9591272946718417e-06, "loss": 0.4592, "step": 17267 }, { "epoch": 0.7168900720813289, "grad_norm": 2.122513771057129, "learning_rate": 1.958593636426418e-06, "loss": 0.4203, "step": 17268 }, { "epoch": 0.7169315876055402, "grad_norm": 2.1406209468841553, "learning_rate": 1.958060033170498e-06, "loss": 0.3786, "step": 17269 }, { "epoch": 0.7169731031297516, "grad_norm": 2.3370864391326904, "learning_rate": 1.9575264849137277e-06, "loss": 0.525, "step": 17270 }, { "epoch": 0.7170146186539629, "grad_norm": 2.2383973598480225, "learning_rate": 1.956992991665757e-06, "loss": 0.5009, "step": 17271 }, { "epoch": 0.7170561341781743, "grad_norm": 2.5838067531585693, "learning_rate": 1.956459553436231e-06, "loss": 0.5691, "step": 17272 }, { "epoch": 0.7170976497023855, "grad_norm": 2.6479337215423584, "learning_rate": 1.9559261702347925e-06, "loss": 0.5841, "step": 17273 }, { "epoch": 0.7171391652265969, "grad_norm": 2.3212549686431885, "learning_rate": 1.9553928420710843e-06, "loss": 0.5478, "step": 17274 }, { "epoch": 0.7171806807508082, "grad_norm": 2.1006805896759033, "learning_rate": 1.9548595689547535e-06, "loss": 0.5277, "step": 17275 }, { "epoch": 0.7172221962750196, "grad_norm": 2.1672165393829346, "learning_rate": 1.954326350895439e-06, "loss": 0.4542, "step": 17276 }, { "epoch": 0.7172637117992309, "grad_norm": 2.1234452724456787, "learning_rate": 1.9537931879027827e-06, "loss": 0.5315, "step": 17277 }, { "epoch": 0.7173052273234423, "grad_norm": 2.5637314319610596, "learning_rate": 1.953260079986421e-06, "loss": 0.4373, "step": 17278 }, { "epoch": 0.7173467428476535, "grad_norm": 2.168227195739746, "learning_rate": 1.952727027155997e-06, "loss": 0.5688, "step": 17279 }, { "epoch": 0.7173882583718649, "grad_norm": 2.1293013095855713, "learning_rate": 1.9521940294211473e-06, "loss": 0.603, "step": 17280 }, { "epoch": 0.7174297738960763, "grad_norm": 2.4491026401519775, "learning_rate": 1.9516610867915074e-06, "loss": 0.5543, "step": 17281 }, { "epoch": 0.7174712894202876, "grad_norm": 2.48098087310791, "learning_rate": 1.951128199276714e-06, "loss": 0.5063, "step": 17282 }, { "epoch": 0.717512804944499, "grad_norm": 2.26694393157959, "learning_rate": 1.9505953668863996e-06, "loss": 0.4236, "step": 17283 }, { "epoch": 0.7175543204687103, "grad_norm": 2.1681723594665527, "learning_rate": 1.9500625896302017e-06, "loss": 0.3967, "step": 17284 }, { "epoch": 0.7175958359929216, "grad_norm": 2.0641024112701416, "learning_rate": 1.9495298675177516e-06, "loss": 0.4093, "step": 17285 }, { "epoch": 0.7176373515171329, "grad_norm": 1.8404172658920288, "learning_rate": 1.9489972005586804e-06, "loss": 0.4836, "step": 17286 }, { "epoch": 0.7176788670413443, "grad_norm": 2.5735421180725098, "learning_rate": 1.9484645887626176e-06, "loss": 0.5095, "step": 17287 }, { "epoch": 0.7177203825655556, "grad_norm": 1.9447698593139648, "learning_rate": 1.947932032139196e-06, "loss": 0.4869, "step": 17288 }, { "epoch": 0.717761898089767, "grad_norm": 3.1389832496643066, "learning_rate": 1.947399530698043e-06, "loss": 0.5509, "step": 17289 }, { "epoch": 0.7178034136139783, "grad_norm": 2.213881015777588, "learning_rate": 1.946867084448787e-06, "loss": 0.4368, "step": 17290 }, { "epoch": 0.7178449291381896, "grad_norm": 2.5966176986694336, "learning_rate": 1.9463346934010517e-06, "loss": 0.6353, "step": 17291 }, { "epoch": 0.7178864446624009, "grad_norm": 2.380915641784668, "learning_rate": 1.9458023575644675e-06, "loss": 0.4726, "step": 17292 }, { "epoch": 0.7179279601866123, "grad_norm": 2.524381160736084, "learning_rate": 1.9452700769486567e-06, "loss": 0.5252, "step": 17293 }, { "epoch": 0.7179694757108236, "grad_norm": 2.5544497966766357, "learning_rate": 1.944737851563243e-06, "loss": 0.5132, "step": 17294 }, { "epoch": 0.718010991235035, "grad_norm": 2.1377053260803223, "learning_rate": 1.94420568141785e-06, "loss": 0.5498, "step": 17295 }, { "epoch": 0.7180525067592463, "grad_norm": 2.0074124336242676, "learning_rate": 1.9436735665220973e-06, "loss": 0.4487, "step": 17296 }, { "epoch": 0.7180940222834576, "grad_norm": 2.7803525924682617, "learning_rate": 1.943141506885611e-06, "loss": 0.5763, "step": 17297 }, { "epoch": 0.7181355378076689, "grad_norm": 3.067873001098633, "learning_rate": 1.942609502518004e-06, "loss": 0.6358, "step": 17298 }, { "epoch": 0.7181770533318803, "grad_norm": 2.0660107135772705, "learning_rate": 1.9420775534289e-06, "loss": 0.4369, "step": 17299 }, { "epoch": 0.7182185688560916, "grad_norm": 2.0293142795562744, "learning_rate": 1.9415456596279132e-06, "loss": 0.3796, "step": 17300 }, { "epoch": 0.718260084380303, "grad_norm": 2.4662926197052, "learning_rate": 1.9410138211246644e-06, "loss": 0.4279, "step": 17301 }, { "epoch": 0.7183015999045143, "grad_norm": 2.208378314971924, "learning_rate": 1.9404820379287677e-06, "loss": 0.3879, "step": 17302 }, { "epoch": 0.7183431154287256, "grad_norm": 2.567169427871704, "learning_rate": 1.9399503100498373e-06, "loss": 0.5092, "step": 17303 }, { "epoch": 0.7183846309529369, "grad_norm": 2.0962681770324707, "learning_rate": 1.9394186374974855e-06, "loss": 0.6557, "step": 17304 }, { "epoch": 0.7184261464771483, "grad_norm": 2.3107354640960693, "learning_rate": 1.9388870202813286e-06, "loss": 0.6095, "step": 17305 }, { "epoch": 0.7184676620013596, "grad_norm": 2.2383060455322266, "learning_rate": 1.9383554584109765e-06, "loss": 0.5685, "step": 17306 }, { "epoch": 0.718509177525571, "grad_norm": 2.7171261310577393, "learning_rate": 1.9378239518960406e-06, "loss": 0.5629, "step": 17307 }, { "epoch": 0.7185506930497823, "grad_norm": 1.9194289445877075, "learning_rate": 1.93729250074613e-06, "loss": 0.6292, "step": 17308 }, { "epoch": 0.7185922085739936, "grad_norm": 2.5374057292938232, "learning_rate": 1.9367611049708524e-06, "loss": 0.443, "step": 17309 }, { "epoch": 0.7186337240982049, "grad_norm": 2.748933792114258, "learning_rate": 1.93622976457982e-06, "loss": 0.5719, "step": 17310 }, { "epoch": 0.7186752396224163, "grad_norm": 2.8294997215270996, "learning_rate": 1.9356984795826334e-06, "loss": 0.4642, "step": 17311 }, { "epoch": 0.7187167551466277, "grad_norm": 2.3530242443084717, "learning_rate": 1.935167249988903e-06, "loss": 0.5501, "step": 17312 }, { "epoch": 0.718758270670839, "grad_norm": 2.929452896118164, "learning_rate": 1.9346360758082305e-06, "loss": 0.6134, "step": 17313 }, { "epoch": 0.7187997861950504, "grad_norm": 1.9785994291305542, "learning_rate": 1.934104957050223e-06, "loss": 0.4389, "step": 17314 }, { "epoch": 0.7188413017192616, "grad_norm": 2.204881191253662, "learning_rate": 1.9335738937244813e-06, "loss": 0.5342, "step": 17315 }, { "epoch": 0.718882817243473, "grad_norm": 1.7518181800842285, "learning_rate": 1.9330428858406073e-06, "loss": 0.4579, "step": 17316 }, { "epoch": 0.7189243327676843, "grad_norm": 1.9742205142974854, "learning_rate": 1.9325119334082027e-06, "loss": 0.4515, "step": 17317 }, { "epoch": 0.7189658482918957, "grad_norm": 2.2785377502441406, "learning_rate": 1.931981036436864e-06, "loss": 0.5593, "step": 17318 }, { "epoch": 0.719007363816107, "grad_norm": 2.7266952991485596, "learning_rate": 1.9314501949361946e-06, "loss": 0.6255, "step": 17319 }, { "epoch": 0.7190488793403184, "grad_norm": 2.460907459259033, "learning_rate": 1.93091940891579e-06, "loss": 0.5367, "step": 17320 }, { "epoch": 0.7190903948645296, "grad_norm": 2.2411515712738037, "learning_rate": 1.930388678385247e-06, "loss": 0.4978, "step": 17321 }, { "epoch": 0.719131910388741, "grad_norm": 2.334385395050049, "learning_rate": 1.92985800335416e-06, "loss": 0.5182, "step": 17322 }, { "epoch": 0.7191734259129523, "grad_norm": 2.801809787750244, "learning_rate": 1.9293273838321288e-06, "loss": 0.3827, "step": 17323 }, { "epoch": 0.7192149414371637, "grad_norm": 2.181311845779419, "learning_rate": 1.9287968198287392e-06, "loss": 0.4811, "step": 17324 }, { "epoch": 0.719256456961375, "grad_norm": 2.4808642864227295, "learning_rate": 1.928266311353591e-06, "loss": 0.6615, "step": 17325 }, { "epoch": 0.7192979724855864, "grad_norm": 2.0615618228912354, "learning_rate": 1.927735858416271e-06, "loss": 0.5797, "step": 17326 }, { "epoch": 0.7193394880097976, "grad_norm": 2.3509907722473145, "learning_rate": 1.9272054610263742e-06, "loss": 0.4967, "step": 17327 }, { "epoch": 0.719381003534009, "grad_norm": 2.912654161453247, "learning_rate": 1.9266751191934883e-06, "loss": 0.5458, "step": 17328 }, { "epoch": 0.7194225190582203, "grad_norm": 2.370619535446167, "learning_rate": 1.926144832927202e-06, "loss": 0.6419, "step": 17329 }, { "epoch": 0.7194640345824317, "grad_norm": 2.7109286785125732, "learning_rate": 1.9256146022371032e-06, "loss": 0.5384, "step": 17330 }, { "epoch": 0.719505550106643, "grad_norm": 2.49979305267334, "learning_rate": 1.9250844271327766e-06, "loss": 0.5914, "step": 17331 }, { "epoch": 0.7195470656308544, "grad_norm": 2.2687933444976807, "learning_rate": 1.9245543076238115e-06, "loss": 0.5175, "step": 17332 }, { "epoch": 0.7195885811550656, "grad_norm": 2.67610764503479, "learning_rate": 1.9240242437197913e-06, "loss": 0.6011, "step": 17333 }, { "epoch": 0.719630096679277, "grad_norm": 2.944610118865967, "learning_rate": 1.9234942354302993e-06, "loss": 0.5559, "step": 17334 }, { "epoch": 0.7196716122034883, "grad_norm": 2.3303494453430176, "learning_rate": 1.9229642827649164e-06, "loss": 0.4572, "step": 17335 }, { "epoch": 0.7197131277276997, "grad_norm": 2.3331258296966553, "learning_rate": 1.9224343857332295e-06, "loss": 0.6058, "step": 17336 }, { "epoch": 0.719754643251911, "grad_norm": 2.67280650138855, "learning_rate": 1.9219045443448133e-06, "loss": 0.5276, "step": 17337 }, { "epoch": 0.7197961587761224, "grad_norm": 2.2968618869781494, "learning_rate": 1.9213747586092505e-06, "loss": 0.3006, "step": 17338 }, { "epoch": 0.7198376743003336, "grad_norm": 2.5862460136413574, "learning_rate": 1.9208450285361187e-06, "loss": 0.5482, "step": 17339 }, { "epoch": 0.719879189824545, "grad_norm": 2.358356475830078, "learning_rate": 1.9203153541349976e-06, "loss": 0.4438, "step": 17340 }, { "epoch": 0.7199207053487563, "grad_norm": 2.6161248683929443, "learning_rate": 1.919785735415463e-06, "loss": 0.5031, "step": 17341 }, { "epoch": 0.7199622208729677, "grad_norm": 2.340445041656494, "learning_rate": 1.9192561723870896e-06, "loss": 0.4749, "step": 17342 }, { "epoch": 0.7200037363971791, "grad_norm": 2.507131338119507, "learning_rate": 1.918726665059453e-06, "loss": 0.588, "step": 17343 }, { "epoch": 0.7200452519213904, "grad_norm": 2.5081100463867188, "learning_rate": 1.9181972134421246e-06, "loss": 0.4426, "step": 17344 }, { "epoch": 0.7200867674456017, "grad_norm": 2.406855344772339, "learning_rate": 1.9176678175446806e-06, "loss": 0.4511, "step": 17345 }, { "epoch": 0.720128282969813, "grad_norm": 2.5276939868927, "learning_rate": 1.9171384773766903e-06, "loss": 0.5467, "step": 17346 }, { "epoch": 0.7201697984940244, "grad_norm": 2.3370959758758545, "learning_rate": 1.9166091929477256e-06, "loss": 0.4612, "step": 17347 }, { "epoch": 0.7202113140182357, "grad_norm": 2.3101468086242676, "learning_rate": 1.916079964267353e-06, "loss": 0.5129, "step": 17348 }, { "epoch": 0.7202528295424471, "grad_norm": 2.233501434326172, "learning_rate": 1.9155507913451475e-06, "loss": 0.4323, "step": 17349 }, { "epoch": 0.7202943450666583, "grad_norm": 2.8143441677093506, "learning_rate": 1.9150216741906696e-06, "loss": 0.5214, "step": 17350 }, { "epoch": 0.7203358605908697, "grad_norm": 2.6756591796875, "learning_rate": 1.91449261281349e-06, "loss": 0.4753, "step": 17351 }, { "epoch": 0.720377376115081, "grad_norm": 2.358004093170166, "learning_rate": 1.913963607223172e-06, "loss": 0.6497, "step": 17352 }, { "epoch": 0.7204188916392924, "grad_norm": 1.9519351720809937, "learning_rate": 1.9134346574292835e-06, "loss": 0.4095, "step": 17353 }, { "epoch": 0.7204604071635037, "grad_norm": 2.6816818714141846, "learning_rate": 1.9129057634413857e-06, "loss": 0.5852, "step": 17354 }, { "epoch": 0.7205019226877151, "grad_norm": 2.6910762786865234, "learning_rate": 1.912376925269041e-06, "loss": 0.49, "step": 17355 }, { "epoch": 0.7205434382119263, "grad_norm": 2.6671862602233887, "learning_rate": 1.9118481429218124e-06, "loss": 0.4161, "step": 17356 }, { "epoch": 0.7205849537361377, "grad_norm": 2.3693556785583496, "learning_rate": 1.911319416409257e-06, "loss": 0.4941, "step": 17357 }, { "epoch": 0.720626469260349, "grad_norm": 2.7589664459228516, "learning_rate": 1.9107907457409384e-06, "loss": 0.4856, "step": 17358 }, { "epoch": 0.7206679847845604, "grad_norm": 2.5397090911865234, "learning_rate": 1.910262130926414e-06, "loss": 0.4556, "step": 17359 }, { "epoch": 0.7207095003087717, "grad_norm": 2.0785834789276123, "learning_rate": 1.9097335719752407e-06, "loss": 0.4511, "step": 17360 }, { "epoch": 0.7207510158329831, "grad_norm": 2.571676254272461, "learning_rate": 1.9092050688969736e-06, "loss": 0.5413, "step": 17361 }, { "epoch": 0.7207925313571943, "grad_norm": 2.1813812255859375, "learning_rate": 1.9086766217011733e-06, "loss": 0.5345, "step": 17362 }, { "epoch": 0.7208340468814057, "grad_norm": 2.191122531890869, "learning_rate": 1.9081482303973866e-06, "loss": 0.533, "step": 17363 }, { "epoch": 0.720875562405617, "grad_norm": 2.509124994277954, "learning_rate": 1.9076198949951728e-06, "loss": 0.6055, "step": 17364 }, { "epoch": 0.7209170779298284, "grad_norm": 2.283006429672241, "learning_rate": 1.9070916155040815e-06, "loss": 0.5224, "step": 17365 }, { "epoch": 0.7209585934540397, "grad_norm": 2.4419121742248535, "learning_rate": 1.9065633919336668e-06, "loss": 0.587, "step": 17366 }, { "epoch": 0.7210001089782511, "grad_norm": 1.9356218576431274, "learning_rate": 1.9060352242934776e-06, "loss": 0.5427, "step": 17367 }, { "epoch": 0.7210416245024623, "grad_norm": 2.194866418838501, "learning_rate": 1.905507112593063e-06, "loss": 0.4436, "step": 17368 }, { "epoch": 0.7210831400266737, "grad_norm": 2.387320041656494, "learning_rate": 1.9049790568419717e-06, "loss": 0.3714, "step": 17369 }, { "epoch": 0.721124655550885, "grad_norm": 2.671625852584839, "learning_rate": 1.9044510570497498e-06, "loss": 0.5809, "step": 17370 }, { "epoch": 0.7211661710750964, "grad_norm": 2.172767400741577, "learning_rate": 1.9039231132259467e-06, "loss": 0.5246, "step": 17371 }, { "epoch": 0.7212076865993077, "grad_norm": 2.3997182846069336, "learning_rate": 1.9033952253801062e-06, "loss": 0.5497, "step": 17372 }, { "epoch": 0.7212492021235191, "grad_norm": 2.2292230129241943, "learning_rate": 1.9028673935217723e-06, "loss": 0.5214, "step": 17373 }, { "epoch": 0.7212907176477305, "grad_norm": 2.1554877758026123, "learning_rate": 1.9023396176604869e-06, "loss": 0.5005, "step": 17374 }, { "epoch": 0.7213322331719417, "grad_norm": 2.6154847145080566, "learning_rate": 1.9018118978057975e-06, "loss": 0.477, "step": 17375 }, { "epoch": 0.7213737486961531, "grad_norm": 1.889265537261963, "learning_rate": 1.9012842339672388e-06, "loss": 0.4347, "step": 17376 }, { "epoch": 0.7214152642203644, "grad_norm": 2.40556001663208, "learning_rate": 1.900756626154356e-06, "loss": 0.5989, "step": 17377 }, { "epoch": 0.7214567797445758, "grad_norm": 1.981650710105896, "learning_rate": 1.9002290743766843e-06, "loss": 0.5071, "step": 17378 }, { "epoch": 0.7214982952687871, "grad_norm": 2.4622576236724854, "learning_rate": 1.8997015786437666e-06, "loss": 0.5654, "step": 17379 }, { "epoch": 0.7215398107929984, "grad_norm": 2.5772933959960938, "learning_rate": 1.899174138965138e-06, "loss": 0.4773, "step": 17380 }, { "epoch": 0.7215813263172097, "grad_norm": 3.101414203643799, "learning_rate": 1.8986467553503352e-06, "loss": 0.5592, "step": 17381 }, { "epoch": 0.7216228418414211, "grad_norm": 2.8284835815429688, "learning_rate": 1.898119427808892e-06, "loss": 0.5064, "step": 17382 }, { "epoch": 0.7216643573656324, "grad_norm": 2.1503050327301025, "learning_rate": 1.8975921563503425e-06, "loss": 0.5764, "step": 17383 }, { "epoch": 0.7217058728898438, "grad_norm": 2.1709659099578857, "learning_rate": 1.897064940984223e-06, "loss": 0.5639, "step": 17384 }, { "epoch": 0.7217473884140551, "grad_norm": 2.5027832984924316, "learning_rate": 1.896537781720063e-06, "loss": 0.3234, "step": 17385 }, { "epoch": 0.7217889039382664, "grad_norm": 2.303635358810425, "learning_rate": 1.8960106785673949e-06, "loss": 0.3931, "step": 17386 }, { "epoch": 0.7218304194624777, "grad_norm": 2.2977073192596436, "learning_rate": 1.8954836315357461e-06, "loss": 0.635, "step": 17387 }, { "epoch": 0.7218719349866891, "grad_norm": 2.6568002700805664, "learning_rate": 1.894956640634652e-06, "loss": 0.5022, "step": 17388 }, { "epoch": 0.7219134505109004, "grad_norm": 2.3360419273376465, "learning_rate": 1.8944297058736333e-06, "loss": 0.5173, "step": 17389 }, { "epoch": 0.7219549660351118, "grad_norm": 2.4502789974212646, "learning_rate": 1.8939028272622223e-06, "loss": 0.5143, "step": 17390 }, { "epoch": 0.7219964815593231, "grad_norm": 2.7368173599243164, "learning_rate": 1.893376004809942e-06, "loss": 0.7402, "step": 17391 }, { "epoch": 0.7220379970835344, "grad_norm": 1.787886142730713, "learning_rate": 1.8928492385263202e-06, "loss": 0.4781, "step": 17392 }, { "epoch": 0.7220795126077457, "grad_norm": 2.3106157779693604, "learning_rate": 1.8923225284208801e-06, "loss": 0.611, "step": 17393 }, { "epoch": 0.7221210281319571, "grad_norm": 1.964547872543335, "learning_rate": 1.8917958745031445e-06, "loss": 0.4427, "step": 17394 }, { "epoch": 0.7221625436561684, "grad_norm": 2.7723708152770996, "learning_rate": 1.891269276782635e-06, "loss": 0.4225, "step": 17395 }, { "epoch": 0.7222040591803798, "grad_norm": 2.426269054412842, "learning_rate": 1.8907427352688718e-06, "loss": 0.4913, "step": 17396 }, { "epoch": 0.722245574704591, "grad_norm": 3.0804121494293213, "learning_rate": 1.8902162499713773e-06, "loss": 0.5378, "step": 17397 }, { "epoch": 0.7222870902288024, "grad_norm": 2.1782753467559814, "learning_rate": 1.8896898208996695e-06, "loss": 0.4578, "step": 17398 }, { "epoch": 0.7223286057530137, "grad_norm": 2.4141037464141846, "learning_rate": 1.8891634480632664e-06, "loss": 0.4993, "step": 17399 }, { "epoch": 0.7223701212772251, "grad_norm": 2.323498249053955, "learning_rate": 1.888637131471683e-06, "loss": 0.4774, "step": 17400 }, { "epoch": 0.7224116368014364, "grad_norm": 2.031292200088501, "learning_rate": 1.8881108711344403e-06, "loss": 0.5322, "step": 17401 }, { "epoch": 0.7224531523256478, "grad_norm": 2.390665292739868, "learning_rate": 1.8875846670610464e-06, "loss": 0.3964, "step": 17402 }, { "epoch": 0.722494667849859, "grad_norm": 2.3222296237945557, "learning_rate": 1.8870585192610213e-06, "loss": 0.5924, "step": 17403 }, { "epoch": 0.7225361833740704, "grad_norm": 2.1911988258361816, "learning_rate": 1.8865324277438729e-06, "loss": 0.4699, "step": 17404 }, { "epoch": 0.7225776988982818, "grad_norm": 2.5633201599121094, "learning_rate": 1.886006392519118e-06, "loss": 0.6585, "step": 17405 }, { "epoch": 0.7226192144224931, "grad_norm": 2.226473808288574, "learning_rate": 1.8854804135962646e-06, "loss": 0.4824, "step": 17406 }, { "epoch": 0.7226607299467045, "grad_norm": 2.5489675998687744, "learning_rate": 1.8849544909848232e-06, "loss": 0.5368, "step": 17407 }, { "epoch": 0.7227022454709158, "grad_norm": 2.742154598236084, "learning_rate": 1.8844286246943023e-06, "loss": 0.6189, "step": 17408 }, { "epoch": 0.7227437609951272, "grad_norm": 1.938452124595642, "learning_rate": 1.8839028147342087e-06, "loss": 0.4164, "step": 17409 }, { "epoch": 0.7227852765193384, "grad_norm": 2.76200270652771, "learning_rate": 1.883377061114053e-06, "loss": 0.6308, "step": 17410 }, { "epoch": 0.7228267920435498, "grad_norm": 2.5729734897613525, "learning_rate": 1.8828513638433355e-06, "loss": 0.6154, "step": 17411 }, { "epoch": 0.7228683075677611, "grad_norm": 2.153921365737915, "learning_rate": 1.882325722931565e-06, "loss": 0.5042, "step": 17412 }, { "epoch": 0.7229098230919725, "grad_norm": 2.4654409885406494, "learning_rate": 1.8818001383882428e-06, "loss": 0.3958, "step": 17413 }, { "epoch": 0.7229513386161838, "grad_norm": 2.1910533905029297, "learning_rate": 1.8812746102228763e-06, "loss": 0.541, "step": 17414 }, { "epoch": 0.7229928541403952, "grad_norm": 2.4438774585723877, "learning_rate": 1.88074913844496e-06, "loss": 0.5771, "step": 17415 }, { "epoch": 0.7230343696646064, "grad_norm": 2.4053328037261963, "learning_rate": 1.8802237230640003e-06, "loss": 0.4768, "step": 17416 }, { "epoch": 0.7230758851888178, "grad_norm": 2.349926233291626, "learning_rate": 1.8796983640894928e-06, "loss": 0.5623, "step": 17417 }, { "epoch": 0.7231174007130291, "grad_norm": 2.26301646232605, "learning_rate": 1.8791730615309405e-06, "loss": 0.3499, "step": 17418 }, { "epoch": 0.7231589162372405, "grad_norm": 2.4388554096221924, "learning_rate": 1.8786478153978383e-06, "loss": 0.6685, "step": 17419 }, { "epoch": 0.7232004317614518, "grad_norm": 2.316932439804077, "learning_rate": 1.8781226256996838e-06, "loss": 0.5882, "step": 17420 }, { "epoch": 0.7232419472856632, "grad_norm": 2.5469954013824463, "learning_rate": 1.8775974924459716e-06, "loss": 0.5463, "step": 17421 }, { "epoch": 0.7232834628098744, "grad_norm": 2.5181031227111816, "learning_rate": 1.877072415646195e-06, "loss": 0.3897, "step": 17422 }, { "epoch": 0.7233249783340858, "grad_norm": 2.36612606048584, "learning_rate": 1.876547395309853e-06, "loss": 0.5741, "step": 17423 }, { "epoch": 0.7233664938582971, "grad_norm": 2.2411117553710938, "learning_rate": 1.8760224314464303e-06, "loss": 0.6131, "step": 17424 }, { "epoch": 0.7234080093825085, "grad_norm": 2.483222484588623, "learning_rate": 1.875497524065425e-06, "loss": 0.4596, "step": 17425 }, { "epoch": 0.7234495249067198, "grad_norm": 2.060997724533081, "learning_rate": 1.874972673176323e-06, "loss": 0.5623, "step": 17426 }, { "epoch": 0.7234910404309312, "grad_norm": 2.5959312915802, "learning_rate": 1.8744478787886188e-06, "loss": 0.5068, "step": 17427 }, { "epoch": 0.7235325559551424, "grad_norm": 2.172952175140381, "learning_rate": 1.8739231409117948e-06, "loss": 0.4036, "step": 17428 }, { "epoch": 0.7235740714793538, "grad_norm": 3.072645902633667, "learning_rate": 1.8733984595553433e-06, "loss": 0.7208, "step": 17429 }, { "epoch": 0.7236155870035651, "grad_norm": 2.269853115081787, "learning_rate": 1.8728738347287468e-06, "loss": 0.3765, "step": 17430 }, { "epoch": 0.7236571025277765, "grad_norm": 2.275824785232544, "learning_rate": 1.872349266441495e-06, "loss": 0.4344, "step": 17431 }, { "epoch": 0.7236986180519878, "grad_norm": 2.3056082725524902, "learning_rate": 1.8718247547030694e-06, "loss": 0.5743, "step": 17432 }, { "epoch": 0.7237401335761992, "grad_norm": 3.0342087745666504, "learning_rate": 1.871300299522954e-06, "loss": 0.6035, "step": 17433 }, { "epoch": 0.7237816491004105, "grad_norm": 1.9068225622177124, "learning_rate": 1.8707759009106307e-06, "loss": 0.4719, "step": 17434 }, { "epoch": 0.7238231646246218, "grad_norm": 2.7171616554260254, "learning_rate": 1.8702515588755798e-06, "loss": 0.5831, "step": 17435 }, { "epoch": 0.7238646801488332, "grad_norm": 2.658167600631714, "learning_rate": 1.8697272734272858e-06, "loss": 0.5114, "step": 17436 }, { "epoch": 0.7239061956730445, "grad_norm": 2.8512604236602783, "learning_rate": 1.8692030445752218e-06, "loss": 0.5106, "step": 17437 }, { "epoch": 0.7239477111972559, "grad_norm": 2.188873052597046, "learning_rate": 1.8686788723288711e-06, "loss": 0.4847, "step": 17438 }, { "epoch": 0.7239892267214671, "grad_norm": 1.999292016029358, "learning_rate": 1.8681547566977071e-06, "loss": 0.4258, "step": 17439 }, { "epoch": 0.7240307422456785, "grad_norm": 3.096431255340576, "learning_rate": 1.8676306976912106e-06, "loss": 0.5351, "step": 17440 }, { "epoch": 0.7240722577698898, "grad_norm": 2.361544370651245, "learning_rate": 1.867106695318851e-06, "loss": 0.5709, "step": 17441 }, { "epoch": 0.7241137732941012, "grad_norm": 2.1067657470703125, "learning_rate": 1.8665827495901074e-06, "loss": 0.3842, "step": 17442 }, { "epoch": 0.7241552888183125, "grad_norm": 1.9582806825637817, "learning_rate": 1.8660588605144487e-06, "loss": 0.5731, "step": 17443 }, { "epoch": 0.7241968043425239, "grad_norm": 2.712329149246216, "learning_rate": 1.8655350281013523e-06, "loss": 0.523, "step": 17444 }, { "epoch": 0.7242383198667351, "grad_norm": 2.4218695163726807, "learning_rate": 1.8650112523602832e-06, "loss": 0.5605, "step": 17445 }, { "epoch": 0.7242798353909465, "grad_norm": 2.2774226665496826, "learning_rate": 1.8644875333007156e-06, "loss": 0.5428, "step": 17446 }, { "epoch": 0.7243213509151578, "grad_norm": 2.4918506145477295, "learning_rate": 1.8639638709321173e-06, "loss": 0.416, "step": 17447 }, { "epoch": 0.7243628664393692, "grad_norm": 3.191305637359619, "learning_rate": 1.8634402652639538e-06, "loss": 0.5782, "step": 17448 }, { "epoch": 0.7244043819635805, "grad_norm": 2.1156225204467773, "learning_rate": 1.862916716305699e-06, "loss": 0.477, "step": 17449 }, { "epoch": 0.7244458974877919, "grad_norm": 2.4148526191711426, "learning_rate": 1.8623932240668097e-06, "loss": 0.6678, "step": 17450 }, { "epoch": 0.7244874130120031, "grad_norm": 2.02249813079834, "learning_rate": 1.861869788556757e-06, "loss": 0.5336, "step": 17451 }, { "epoch": 0.7245289285362145, "grad_norm": 2.30698823928833, "learning_rate": 1.861346409785002e-06, "loss": 0.529, "step": 17452 }, { "epoch": 0.7245704440604258, "grad_norm": 2.3014252185821533, "learning_rate": 1.8608230877610117e-06, "loss": 0.4421, "step": 17453 }, { "epoch": 0.7246119595846372, "grad_norm": 2.011664867401123, "learning_rate": 1.860299822494241e-06, "loss": 0.5447, "step": 17454 }, { "epoch": 0.7246534751088485, "grad_norm": 2.06489896774292, "learning_rate": 1.8597766139941564e-06, "loss": 0.466, "step": 17455 }, { "epoch": 0.7246949906330599, "grad_norm": 2.162357807159424, "learning_rate": 1.859253462270214e-06, "loss": 0.5781, "step": 17456 }, { "epoch": 0.7247365061572711, "grad_norm": 2.3365371227264404, "learning_rate": 1.858730367331878e-06, "loss": 0.5222, "step": 17457 }, { "epoch": 0.7247780216814825, "grad_norm": 2.5120270252227783, "learning_rate": 1.8582073291885987e-06, "loss": 0.4911, "step": 17458 }, { "epoch": 0.7248195372056938, "grad_norm": 2.73702335357666, "learning_rate": 1.857684347849838e-06, "loss": 0.5857, "step": 17459 }, { "epoch": 0.7248610527299052, "grad_norm": 2.386774778366089, "learning_rate": 1.8571614233250507e-06, "loss": 0.6854, "step": 17460 }, { "epoch": 0.7249025682541165, "grad_norm": 2.348689317703247, "learning_rate": 1.8566385556236883e-06, "loss": 0.5817, "step": 17461 }, { "epoch": 0.7249440837783279, "grad_norm": 2.5327882766723633, "learning_rate": 1.8561157447552102e-06, "loss": 0.6738, "step": 17462 }, { "epoch": 0.7249855993025391, "grad_norm": 2.496112108230591, "learning_rate": 1.8555929907290627e-06, "loss": 0.5379, "step": 17463 }, { "epoch": 0.7250271148267505, "grad_norm": 2.003878593444824, "learning_rate": 1.8550702935547015e-06, "loss": 0.4085, "step": 17464 }, { "epoch": 0.7250686303509619, "grad_norm": 2.192894220352173, "learning_rate": 1.8545476532415746e-06, "loss": 0.5409, "step": 17465 }, { "epoch": 0.7251101458751732, "grad_norm": 2.948207139968872, "learning_rate": 1.8540250697991357e-06, "loss": 0.5575, "step": 17466 }, { "epoch": 0.7251516613993846, "grad_norm": 2.4090042114257812, "learning_rate": 1.853502543236827e-06, "loss": 0.6671, "step": 17467 }, { "epoch": 0.7251931769235959, "grad_norm": 2.216942071914673, "learning_rate": 1.8529800735641012e-06, "loss": 0.5207, "step": 17468 }, { "epoch": 0.7252346924478072, "grad_norm": 2.892108678817749, "learning_rate": 1.852457660790401e-06, "loss": 0.5262, "step": 17469 }, { "epoch": 0.7252762079720185, "grad_norm": 2.3111398220062256, "learning_rate": 1.8519353049251766e-06, "loss": 0.5665, "step": 17470 }, { "epoch": 0.7253177234962299, "grad_norm": 2.092585325241089, "learning_rate": 1.8514130059778663e-06, "loss": 0.5232, "step": 17471 }, { "epoch": 0.7253592390204412, "grad_norm": 2.9374821186065674, "learning_rate": 1.8508907639579176e-06, "loss": 0.4793, "step": 17472 }, { "epoch": 0.7254007545446526, "grad_norm": 2.531730890274048, "learning_rate": 1.8503685788747716e-06, "loss": 0.5092, "step": 17473 }, { "epoch": 0.7254422700688639, "grad_norm": 2.4578068256378174, "learning_rate": 1.8498464507378682e-06, "loss": 0.4732, "step": 17474 }, { "epoch": 0.7254837855930752, "grad_norm": 2.2646305561065674, "learning_rate": 1.8493243795566517e-06, "loss": 0.5381, "step": 17475 }, { "epoch": 0.7255253011172865, "grad_norm": 2.275972366333008, "learning_rate": 1.8488023653405557e-06, "loss": 0.4649, "step": 17476 }, { "epoch": 0.7255668166414979, "grad_norm": 2.010214328765869, "learning_rate": 1.848280408099023e-06, "loss": 0.5598, "step": 17477 }, { "epoch": 0.7256083321657092, "grad_norm": 2.474548101425171, "learning_rate": 1.8477585078414866e-06, "loss": 0.4186, "step": 17478 }, { "epoch": 0.7256498476899206, "grad_norm": 1.8232303857803345, "learning_rate": 1.8472366645773892e-06, "loss": 0.4829, "step": 17479 }, { "epoch": 0.7256913632141319, "grad_norm": 2.729151487350464, "learning_rate": 1.8467148783161576e-06, "loss": 0.5425, "step": 17480 }, { "epoch": 0.7257328787383432, "grad_norm": 2.2411553859710693, "learning_rate": 1.846193149067232e-06, "loss": 0.424, "step": 17481 }, { "epoch": 0.7257743942625545, "grad_norm": 2.1485249996185303, "learning_rate": 1.8456714768400414e-06, "loss": 0.481, "step": 17482 }, { "epoch": 0.7258159097867659, "grad_norm": 2.711752414703369, "learning_rate": 1.8451498616440227e-06, "loss": 0.6426, "step": 17483 }, { "epoch": 0.7258574253109772, "grad_norm": 2.660625696182251, "learning_rate": 1.844628303488601e-06, "loss": 0.6332, "step": 17484 }, { "epoch": 0.7258989408351886, "grad_norm": 2.2968146800994873, "learning_rate": 1.8441068023832104e-06, "loss": 0.5127, "step": 17485 }, { "epoch": 0.7259404563593999, "grad_norm": 2.287869930267334, "learning_rate": 1.8435853583372786e-06, "loss": 0.4165, "step": 17486 }, { "epoch": 0.7259819718836112, "grad_norm": 3.200155735015869, "learning_rate": 1.8430639713602317e-06, "loss": 0.6608, "step": 17487 }, { "epoch": 0.7260234874078225, "grad_norm": 2.0190253257751465, "learning_rate": 1.8425426414615012e-06, "loss": 0.5288, "step": 17488 }, { "epoch": 0.7260650029320339, "grad_norm": 2.022761106491089, "learning_rate": 1.8420213686505068e-06, "loss": 0.4578, "step": 17489 }, { "epoch": 0.7261065184562452, "grad_norm": 1.8284940719604492, "learning_rate": 1.8415001529366778e-06, "loss": 0.4141, "step": 17490 }, { "epoch": 0.7261480339804566, "grad_norm": 2.4061620235443115, "learning_rate": 1.8409789943294343e-06, "loss": 0.4059, "step": 17491 }, { "epoch": 0.7261895495046679, "grad_norm": 2.3879146575927734, "learning_rate": 1.8404578928382049e-06, "loss": 0.5677, "step": 17492 }, { "epoch": 0.7262310650288792, "grad_norm": 2.5664467811584473, "learning_rate": 1.8399368484724045e-06, "loss": 0.5875, "step": 17493 }, { "epoch": 0.7262725805530905, "grad_norm": 2.3905065059661865, "learning_rate": 1.8394158612414575e-06, "loss": 0.5043, "step": 17494 }, { "epoch": 0.7263140960773019, "grad_norm": 2.338926076889038, "learning_rate": 1.8388949311547815e-06, "loss": 0.4903, "step": 17495 }, { "epoch": 0.7263556116015133, "grad_norm": 3.1145639419555664, "learning_rate": 1.8383740582218002e-06, "loss": 0.6962, "step": 17496 }, { "epoch": 0.7263971271257246, "grad_norm": 3.168236017227173, "learning_rate": 1.837853242451923e-06, "loss": 0.6226, "step": 17497 }, { "epoch": 0.726438642649936, "grad_norm": 2.101863145828247, "learning_rate": 1.8373324838545726e-06, "loss": 0.5854, "step": 17498 }, { "epoch": 0.7264801581741472, "grad_norm": 2.386082649230957, "learning_rate": 1.8368117824391623e-06, "loss": 0.477, "step": 17499 }, { "epoch": 0.7265216736983586, "grad_norm": 2.7335259914398193, "learning_rate": 1.8362911382151054e-06, "loss": 0.5619, "step": 17500 } ], "logging_steps": 1.0, "max_steps": 24087, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 500, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 5.185261553938596e+18, "train_batch_size": 1, "trial_name": null, "trial_params": null }