diff --git "a/trainer_state.json" "b/trainer_state.json" new file mode 100644--- /dev/null +++ "b/trainer_state.json" @@ -0,0 +1,7594 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 3.0, + "eval_steps": 500, + "global_step": 2523, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.0, + "learning_rate": 0.0, + "loss": 0.6576, + "step": 2 + }, + { + "epoch": 0.0, + "learning_rate": 0.0, + "loss": 0.6592, + "step": 4 + }, + { + "epoch": 0.01, + "learning_rate": 0.0, + "loss": 0.6936, + "step": 6 + }, + { + "epoch": 0.01, + "learning_rate": 0.0, + "loss": 0.6209, + "step": 8 + }, + { + "epoch": 0.01, + "learning_rate": 0.0, + "loss": 0.7266, + "step": 10 + }, + { + "epoch": 0.01, + "learning_rate": 0.0, + "loss": 0.583, + "step": 12 + }, + { + "epoch": 0.02, + "learning_rate": 0.0, + "loss": 0.5872, + "step": 14 + }, + { + "epoch": 0.02, + "learning_rate": 0.0, + "loss": 0.6971, + "step": 16 + }, + { + "epoch": 0.02, + "learning_rate": 0.0, + "loss": 0.6075, + "step": 18 + }, + { + "epoch": 0.02, + "learning_rate": 1.3333333333333334e-06, + "loss": 0.6744, + "step": 20 + }, + { + "epoch": 0.03, + "learning_rate": 2.666666666666667e-06, + "loss": 0.5986, + "step": 22 + }, + { + "epoch": 0.03, + "learning_rate": 4.000000000000001e-06, + "loss": 0.6547, + "step": 24 + }, + { + "epoch": 0.03, + "learning_rate": 5.333333333333334e-06, + "loss": 0.5527, + "step": 26 + }, + { + "epoch": 0.03, + "learning_rate": 6.666666666666667e-06, + "loss": 0.4789, + "step": 28 + }, + { + "epoch": 0.04, + "learning_rate": 8.000000000000001e-06, + "loss": 0.5411, + "step": 30 + }, + { + "epoch": 0.04, + "learning_rate": 9.333333333333334e-06, + "loss": 0.5627, + "step": 32 + }, + { + "epoch": 0.04, + "learning_rate": 1.0666666666666667e-05, + "loss": 0.5674, + "step": 34 + }, + { + "epoch": 0.04, + "learning_rate": 1.2e-05, + "loss": 0.5058, + "step": 36 + }, + { + "epoch": 0.05, + "learning_rate": 1.3333333333333333e-05, + "loss": 0.5078, + "step": 38 + }, + { + "epoch": 0.05, + "learning_rate": 1.4666666666666666e-05, + "loss": 0.5258, + "step": 40 + }, + { + "epoch": 0.05, + "learning_rate": 1.6000000000000003e-05, + "loss": 0.5611, + "step": 42 + }, + { + "epoch": 0.05, + "learning_rate": 1.7333333333333336e-05, + "loss": 0.4715, + "step": 44 + }, + { + "epoch": 0.05, + "learning_rate": 1.866666666666667e-05, + "loss": 0.7397, + "step": 46 + }, + { + "epoch": 0.06, + "learning_rate": 2e-05, + "loss": 0.5526, + "step": 48 + }, + { + "epoch": 0.06, + "learning_rate": 1.999996823967381e-05, + "loss": 0.5387, + "step": 50 + }, + { + "epoch": 0.06, + "learning_rate": 1.9999872958896982e-05, + "loss": 0.5412, + "step": 52 + }, + { + "epoch": 0.06, + "learning_rate": 1.9999714158274743e-05, + "loss": 0.487, + "step": 54 + }, + { + "epoch": 0.07, + "learning_rate": 1.9999491838815805e-05, + "loss": 0.5345, + "step": 56 + }, + { + "epoch": 0.07, + "learning_rate": 1.999920600193236e-05, + "loss": 0.5062, + "step": 58 + }, + { + "epoch": 0.07, + "learning_rate": 1.9998856649440058e-05, + "loss": 0.5077, + "step": 60 + }, + { + "epoch": 0.07, + "learning_rate": 1.999844378355801e-05, + "loss": 0.5029, + "step": 62 + }, + { + "epoch": 0.08, + "learning_rate": 1.999796740690877e-05, + "loss": 0.5374, + "step": 64 + }, + { + "epoch": 0.08, + "learning_rate": 1.9997427522518315e-05, + "loss": 0.5321, + "step": 66 + }, + { + "epoch": 0.08, + "learning_rate": 1.999682413381602e-05, + "loss": 0.4695, + "step": 68 + }, + { + "epoch": 0.08, + "learning_rate": 1.9996157244634647e-05, + "loss": 0.4798, + "step": 70 + }, + { + "epoch": 0.09, + "learning_rate": 1.9995799988672346e-05, + "loss": 0.4682, + "step": 72 + }, + { + "epoch": 0.09, + "learning_rate": 1.999503785684114e-05, + "loss": 0.5138, + "step": 74 + }, + { + "epoch": 0.09, + "learning_rate": 1.9994212235877407e-05, + "loss": 0.5271, + "step": 76 + }, + { + "epoch": 0.09, + "learning_rate": 1.999332313102555e-05, + "loss": 0.5526, + "step": 78 + }, + { + "epoch": 0.1, + "learning_rate": 1.999237054793322e-05, + "loss": 0.5385, + "step": 80 + }, + { + "epoch": 0.1, + "learning_rate": 1.9991354492651283e-05, + "loss": 0.5259, + "step": 82 + }, + { + "epoch": 0.1, + "learning_rate": 1.9990274971633787e-05, + "loss": 0.5054, + "step": 84 + }, + { + "epoch": 0.1, + "learning_rate": 1.9989131991737928e-05, + "loss": 0.4698, + "step": 86 + }, + { + "epoch": 0.1, + "learning_rate": 1.998792556022398e-05, + "loss": 0.4909, + "step": 88 + }, + { + "epoch": 0.11, + "learning_rate": 1.998665568475528e-05, + "loss": 0.462, + "step": 90 + }, + { + "epoch": 0.11, + "learning_rate": 1.998532237339816e-05, + "loss": 0.4898, + "step": 92 + }, + { + "epoch": 0.11, + "learning_rate": 1.9983925634621894e-05, + "loss": 0.4771, + "step": 94 + }, + { + "epoch": 0.11, + "learning_rate": 1.998246547729867e-05, + "loss": 0.4774, + "step": 96 + }, + { + "epoch": 0.12, + "learning_rate": 1.998094191070349e-05, + "loss": 0.421, + "step": 98 + }, + { + "epoch": 0.12, + "learning_rate": 1.997935494451416e-05, + "loss": 0.4991, + "step": 100 + }, + { + "epoch": 0.12, + "learning_rate": 1.9977704588811183e-05, + "loss": 0.5349, + "step": 102 + }, + { + "epoch": 0.12, + "learning_rate": 1.9975990854077733e-05, + "loss": 0.4717, + "step": 104 + }, + { + "epoch": 0.13, + "learning_rate": 1.9974213751199556e-05, + "loss": 0.5024, + "step": 106 + }, + { + "epoch": 0.13, + "learning_rate": 1.9972373291464933e-05, + "loss": 0.5904, + "step": 108 + }, + { + "epoch": 0.13, + "learning_rate": 1.9970469486564585e-05, + "loss": 0.4817, + "step": 110 + }, + { + "epoch": 0.13, + "learning_rate": 1.99685023485916e-05, + "loss": 0.488, + "step": 112 + }, + { + "epoch": 0.14, + "learning_rate": 1.9966471890041374e-05, + "loss": 0.5262, + "step": 114 + }, + { + "epoch": 0.14, + "learning_rate": 1.9964378123811502e-05, + "loss": 0.4721, + "step": 116 + }, + { + "epoch": 0.14, + "learning_rate": 1.9962221063201734e-05, + "loss": 0.4594, + "step": 118 + }, + { + "epoch": 0.14, + "learning_rate": 1.996000072191385e-05, + "loss": 0.5068, + "step": 120 + }, + { + "epoch": 0.15, + "learning_rate": 1.9957717114051608e-05, + "loss": 0.5169, + "step": 122 + }, + { + "epoch": 0.15, + "learning_rate": 1.9955370254120635e-05, + "loss": 0.4781, + "step": 124 + }, + { + "epoch": 0.15, + "learning_rate": 1.9952960157028335e-05, + "loss": 0.4682, + "step": 126 + }, + { + "epoch": 0.15, + "learning_rate": 1.9950486838083807e-05, + "loss": 0.4754, + "step": 128 + }, + { + "epoch": 0.15, + "learning_rate": 1.994795031299773e-05, + "loss": 0.479, + "step": 130 + }, + { + "epoch": 0.16, + "learning_rate": 1.9945350597882275e-05, + "loss": 0.5381, + "step": 132 + }, + { + "epoch": 0.16, + "learning_rate": 1.9942687709251006e-05, + "loss": 0.5457, + "step": 134 + }, + { + "epoch": 0.16, + "learning_rate": 1.993996166401877e-05, + "loss": 0.4653, + "step": 136 + }, + { + "epoch": 0.16, + "learning_rate": 1.9937172479501573e-05, + "loss": 0.4686, + "step": 138 + }, + { + "epoch": 0.17, + "learning_rate": 1.9934320173416502e-05, + "loss": 0.5098, + "step": 140 + }, + { + "epoch": 0.17, + "learning_rate": 1.9931404763881598e-05, + "loss": 0.4823, + "step": 142 + }, + { + "epoch": 0.17, + "learning_rate": 1.992842626941572e-05, + "loss": 0.4999, + "step": 144 + }, + { + "epoch": 0.17, + "learning_rate": 1.992538470893847e-05, + "loss": 0.4868, + "step": 146 + }, + { + "epoch": 0.18, + "learning_rate": 1.992228010177003e-05, + "loss": 0.4668, + "step": 148 + }, + { + "epoch": 0.18, + "learning_rate": 1.9919112467631074e-05, + "loss": 0.4955, + "step": 150 + }, + { + "epoch": 0.18, + "learning_rate": 1.991588182664262e-05, + "loss": 0.4855, + "step": 152 + }, + { + "epoch": 0.18, + "learning_rate": 1.99125881993259e-05, + "loss": 0.4646, + "step": 154 + }, + { + "epoch": 0.19, + "learning_rate": 1.9909231606602266e-05, + "loss": 0.5182, + "step": 156 + }, + { + "epoch": 0.19, + "learning_rate": 1.9905812069793002e-05, + "loss": 0.4857, + "step": 158 + }, + { + "epoch": 0.19, + "learning_rate": 1.990232961061924e-05, + "loss": 0.5048, + "step": 160 + }, + { + "epoch": 0.19, + "learning_rate": 1.989878425120177e-05, + "loss": 0.4597, + "step": 162 + }, + { + "epoch": 0.2, + "learning_rate": 1.9895176014060964e-05, + "loss": 0.4387, + "step": 164 + }, + { + "epoch": 0.2, + "learning_rate": 1.9891504922116572e-05, + "loss": 0.4556, + "step": 166 + }, + { + "epoch": 0.2, + "learning_rate": 1.9887770998687607e-05, + "loss": 0.4937, + "step": 168 + }, + { + "epoch": 0.2, + "learning_rate": 1.9883974267492202e-05, + "loss": 0.5738, + "step": 170 + }, + { + "epoch": 0.2, + "learning_rate": 1.9880114752647434e-05, + "loss": 0.4769, + "step": 172 + }, + { + "epoch": 0.21, + "learning_rate": 1.9876192478669197e-05, + "loss": 0.5109, + "step": 174 + }, + { + "epoch": 0.21, + "learning_rate": 1.987220747047203e-05, + "loss": 0.4926, + "step": 176 + }, + { + "epoch": 0.21, + "learning_rate": 1.9868159753368964e-05, + "loss": 0.4726, + "step": 178 + }, + { + "epoch": 0.21, + "learning_rate": 1.9864049353071365e-05, + "loss": 0.4675, + "step": 180 + }, + { + "epoch": 0.22, + "learning_rate": 1.985987629568876e-05, + "loss": 0.4723, + "step": 182 + }, + { + "epoch": 0.22, + "learning_rate": 1.9855640607728684e-05, + "loss": 0.4215, + "step": 184 + }, + { + "epoch": 0.22, + "learning_rate": 1.9851342316096503e-05, + "loss": 0.505, + "step": 186 + }, + { + "epoch": 0.22, + "learning_rate": 1.984698144809525e-05, + "loss": 0.5278, + "step": 188 + }, + { + "epoch": 0.23, + "learning_rate": 1.9842558031425434e-05, + "loss": 0.4836, + "step": 190 + }, + { + "epoch": 0.23, + "learning_rate": 1.983807209418489e-05, + "loss": 0.445, + "step": 192 + }, + { + "epoch": 0.23, + "learning_rate": 1.9833523664868587e-05, + "loss": 0.4585, + "step": 194 + }, + { + "epoch": 0.23, + "learning_rate": 1.982891277236845e-05, + "loss": 0.4934, + "step": 196 + }, + { + "epoch": 0.24, + "learning_rate": 1.982423944597315e-05, + "loss": 0.4899, + "step": 198 + }, + { + "epoch": 0.24, + "learning_rate": 1.981950371536798e-05, + "loss": 0.5321, + "step": 200 + }, + { + "epoch": 0.24, + "learning_rate": 1.9814705610634602e-05, + "loss": 0.4883, + "step": 202 + }, + { + "epoch": 0.24, + "learning_rate": 1.980984516225089e-05, + "loss": 0.4805, + "step": 204 + }, + { + "epoch": 0.24, + "learning_rate": 1.9804922401090732e-05, + "loss": 0.4507, + "step": 206 + }, + { + "epoch": 0.25, + "learning_rate": 1.9799937358423826e-05, + "loss": 0.4872, + "step": 208 + }, + { + "epoch": 0.25, + "learning_rate": 1.9794890065915486e-05, + "loss": 0.5495, + "step": 210 + }, + { + "epoch": 0.25, + "learning_rate": 1.9789780555626444e-05, + "loss": 0.4936, + "step": 212 + }, + { + "epoch": 0.25, + "learning_rate": 1.9784608860012652e-05, + "loss": 0.4769, + "step": 214 + }, + { + "epoch": 0.26, + "learning_rate": 1.9779375011925046e-05, + "loss": 0.4714, + "step": 216 + }, + { + "epoch": 0.26, + "learning_rate": 1.9774079044609373e-05, + "loss": 0.5064, + "step": 218 + }, + { + "epoch": 0.26, + "learning_rate": 1.976872099170597e-05, + "loss": 0.4629, + "step": 220 + }, + { + "epoch": 0.26, + "learning_rate": 1.976330088724953e-05, + "loss": 0.4149, + "step": 222 + }, + { + "epoch": 0.27, + "learning_rate": 1.9757818765668916e-05, + "loss": 0.4565, + "step": 224 + }, + { + "epoch": 0.27, + "learning_rate": 1.9752274661786916e-05, + "loss": 0.4233, + "step": 226 + }, + { + "epoch": 0.27, + "learning_rate": 1.9746668610820047e-05, + "loss": 0.4557, + "step": 228 + }, + { + "epoch": 0.27, + "learning_rate": 1.9741000648378303e-05, + "loss": 0.5186, + "step": 230 + }, + { + "epoch": 0.28, + "learning_rate": 1.9735270810464958e-05, + "loss": 0.4425, + "step": 232 + }, + { + "epoch": 0.28, + "learning_rate": 1.972947913347631e-05, + "loss": 0.5274, + "step": 234 + }, + { + "epoch": 0.28, + "learning_rate": 1.9723625654201472e-05, + "loss": 0.4087, + "step": 236 + }, + { + "epoch": 0.28, + "learning_rate": 1.971771040982213e-05, + "loss": 0.4412, + "step": 238 + }, + { + "epoch": 0.29, + "learning_rate": 1.9711733437912293e-05, + "loss": 0.4998, + "step": 240 + }, + { + "epoch": 0.29, + "learning_rate": 1.9705694776438084e-05, + "loss": 0.5343, + "step": 242 + }, + { + "epoch": 0.29, + "learning_rate": 1.9699594463757475e-05, + "loss": 0.4365, + "step": 244 + }, + { + "epoch": 0.29, + "learning_rate": 1.9693432538620046e-05, + "loss": 0.4877, + "step": 246 + }, + { + "epoch": 0.29, + "learning_rate": 1.9687209040166748e-05, + "loss": 0.4361, + "step": 248 + }, + { + "epoch": 0.3, + "learning_rate": 1.968092400792965e-05, + "loss": 0.4555, + "step": 250 + }, + { + "epoch": 0.3, + "learning_rate": 1.967457748183169e-05, + "loss": 0.4353, + "step": 252 + }, + { + "epoch": 0.3, + "learning_rate": 1.966816950218641e-05, + "loss": 0.3476, + "step": 254 + }, + { + "epoch": 0.3, + "learning_rate": 1.9661700109697718e-05, + "loss": 0.4342, + "step": 256 + }, + { + "epoch": 0.31, + "learning_rate": 1.9655169345459622e-05, + "loss": 0.4603, + "step": 258 + }, + { + "epoch": 0.31, + "learning_rate": 1.964857725095595e-05, + "loss": 0.4514, + "step": 260 + }, + { + "epoch": 0.31, + "learning_rate": 1.964192386806013e-05, + "loss": 0.4889, + "step": 262 + }, + { + "epoch": 0.31, + "learning_rate": 1.9635209239034872e-05, + "loss": 0.5255, + "step": 264 + }, + { + "epoch": 0.32, + "learning_rate": 1.962843340653195e-05, + "loss": 0.5325, + "step": 266 + }, + { + "epoch": 0.32, + "learning_rate": 1.9621596413591885e-05, + "loss": 0.5163, + "step": 268 + }, + { + "epoch": 0.32, + "learning_rate": 1.96146983036437e-05, + "loss": 0.3708, + "step": 270 + }, + { + "epoch": 0.32, + "learning_rate": 1.960773912050465e-05, + "loss": 0.5378, + "step": 272 + }, + { + "epoch": 0.33, + "learning_rate": 1.960071890837991e-05, + "loss": 0.4389, + "step": 274 + }, + { + "epoch": 0.33, + "learning_rate": 1.9593637711862335e-05, + "loss": 0.4565, + "step": 276 + }, + { + "epoch": 0.33, + "learning_rate": 1.9586495575932137e-05, + "loss": 0.5446, + "step": 278 + }, + { + "epoch": 0.33, + "learning_rate": 1.957929254595664e-05, + "loss": 0.5079, + "step": 280 + }, + { + "epoch": 0.34, + "learning_rate": 1.957202866768995e-05, + "loss": 0.3997, + "step": 282 + }, + { + "epoch": 0.34, + "learning_rate": 1.9564703987272703e-05, + "loss": 0.4934, + "step": 284 + }, + { + "epoch": 0.34, + "learning_rate": 1.9557318551231745e-05, + "loss": 0.5076, + "step": 286 + }, + { + "epoch": 0.34, + "learning_rate": 1.9549872406479843e-05, + "loss": 0.4487, + "step": 288 + }, + { + "epoch": 0.34, + "learning_rate": 1.9542365600315403e-05, + "loss": 0.5515, + "step": 290 + }, + { + "epoch": 0.35, + "learning_rate": 1.953479818042214e-05, + "loss": 0.5262, + "step": 292 + }, + { + "epoch": 0.35, + "learning_rate": 1.95271701948688e-05, + "loss": 0.4953, + "step": 294 + }, + { + "epoch": 0.35, + "learning_rate": 1.951948169210885e-05, + "loss": 0.5127, + "step": 296 + }, + { + "epoch": 0.35, + "learning_rate": 1.9511732720980156e-05, + "loss": 0.4796, + "step": 298 + }, + { + "epoch": 0.36, + "learning_rate": 1.950392333070469e-05, + "loss": 0.5016, + "step": 300 + }, + { + "epoch": 0.36, + "learning_rate": 1.9496053570888205e-05, + "loss": 0.5114, + "step": 302 + }, + { + "epoch": 0.36, + "learning_rate": 1.9488123491519935e-05, + "loss": 0.4471, + "step": 304 + }, + { + "epoch": 0.36, + "learning_rate": 1.9480133142972257e-05, + "loss": 0.4427, + "step": 306 + }, + { + "epoch": 0.37, + "learning_rate": 1.947208257600039e-05, + "loss": 0.457, + "step": 308 + }, + { + "epoch": 0.37, + "learning_rate": 1.9463971841742057e-05, + "loss": 0.5193, + "step": 310 + }, + { + "epoch": 0.37, + "learning_rate": 1.945580099171717e-05, + "loss": 0.5337, + "step": 312 + }, + { + "epoch": 0.37, + "learning_rate": 1.9447570077827503e-05, + "loss": 0.4758, + "step": 314 + }, + { + "epoch": 0.38, + "learning_rate": 1.9439279152356363e-05, + "loss": 0.4223, + "step": 316 + }, + { + "epoch": 0.38, + "learning_rate": 1.943092826796824e-05, + "loss": 0.4909, + "step": 318 + }, + { + "epoch": 0.38, + "learning_rate": 1.9422517477708506e-05, + "loss": 0.4615, + "step": 320 + }, + { + "epoch": 0.38, + "learning_rate": 1.9414046835003043e-05, + "loss": 0.3863, + "step": 322 + }, + { + "epoch": 0.39, + "learning_rate": 1.940551639365793e-05, + "loss": 0.4676, + "step": 324 + }, + { + "epoch": 0.39, + "learning_rate": 1.9396926207859085e-05, + "loss": 0.488, + "step": 326 + }, + { + "epoch": 0.39, + "learning_rate": 1.938827633217193e-05, + "loss": 0.4888, + "step": 328 + }, + { + "epoch": 0.39, + "learning_rate": 1.9379566821541034e-05, + "loss": 0.4754, + "step": 330 + }, + { + "epoch": 0.39, + "learning_rate": 1.9370797731289784e-05, + "loss": 0.7974, + "step": 332 + }, + { + "epoch": 0.4, + "learning_rate": 1.936196911712001e-05, + "loss": 0.3944, + "step": 334 + }, + { + "epoch": 0.4, + "learning_rate": 1.9353081035111644e-05, + "loss": 0.4883, + "step": 336 + }, + { + "epoch": 0.4, + "learning_rate": 1.9344133541722368e-05, + "loss": 0.5429, + "step": 338 + }, + { + "epoch": 0.4, + "learning_rate": 1.9335126693787237e-05, + "loss": 0.4573, + "step": 340 + }, + { + "epoch": 0.41, + "learning_rate": 1.9326060548518342e-05, + "loss": 0.4276, + "step": 342 + }, + { + "epoch": 0.41, + "learning_rate": 1.9316935163504424e-05, + "loss": 0.5089, + "step": 344 + }, + { + "epoch": 0.41, + "learning_rate": 1.930775059671053e-05, + "loss": 0.4285, + "step": 346 + }, + { + "epoch": 0.41, + "learning_rate": 1.9298506906477623e-05, + "loss": 0.4438, + "step": 348 + }, + { + "epoch": 0.42, + "learning_rate": 1.9289204151522227e-05, + "loss": 0.4644, + "step": 350 + }, + { + "epoch": 0.42, + "learning_rate": 1.927984239093605e-05, + "loss": 0.3904, + "step": 352 + }, + { + "epoch": 0.42, + "learning_rate": 1.9270421684185603e-05, + "loss": 0.5486, + "step": 354 + }, + { + "epoch": 0.42, + "learning_rate": 1.9260942091111836e-05, + "loss": 0.5429, + "step": 356 + }, + { + "epoch": 0.43, + "learning_rate": 1.9251403671929738e-05, + "loss": 0.4597, + "step": 358 + }, + { + "epoch": 0.43, + "learning_rate": 1.9241806487227967e-05, + "loss": 0.4721, + "step": 360 + }, + { + "epoch": 0.43, + "learning_rate": 1.923215059796847e-05, + "loss": 0.4689, + "step": 362 + }, + { + "epoch": 0.43, + "learning_rate": 1.922243606548609e-05, + "loss": 0.4716, + "step": 364 + }, + { + "epoch": 0.44, + "learning_rate": 1.9212662951488162e-05, + "loss": 0.4993, + "step": 366 + }, + { + "epoch": 0.44, + "learning_rate": 1.9202831318054153e-05, + "loss": 0.4557, + "step": 368 + }, + { + "epoch": 0.44, + "learning_rate": 1.9192941227635232e-05, + "loss": 0.4701, + "step": 370 + }, + { + "epoch": 0.44, + "learning_rate": 1.91829927430539e-05, + "loss": 0.4032, + "step": 372 + }, + { + "epoch": 0.44, + "learning_rate": 1.9172985927503584e-05, + "loss": 0.4743, + "step": 374 + }, + { + "epoch": 0.45, + "learning_rate": 1.9162920844548227e-05, + "loss": 0.4491, + "step": 376 + }, + { + "epoch": 0.45, + "learning_rate": 1.9152797558121894e-05, + "loss": 0.429, + "step": 378 + }, + { + "epoch": 0.45, + "learning_rate": 1.9142616132528356e-05, + "loss": 0.4707, + "step": 380 + }, + { + "epoch": 0.45, + "learning_rate": 1.91323766324407e-05, + "loss": 0.4371, + "step": 382 + }, + { + "epoch": 0.46, + "learning_rate": 1.912207912290089e-05, + "loss": 0.5172, + "step": 384 + }, + { + "epoch": 0.46, + "learning_rate": 1.9111723669319385e-05, + "loss": 0.4482, + "step": 386 + }, + { + "epoch": 0.46, + "learning_rate": 1.91013103374747e-05, + "loss": 0.4701, + "step": 388 + }, + { + "epoch": 0.46, + "learning_rate": 1.9090839193513e-05, + "loss": 0.4737, + "step": 390 + }, + { + "epoch": 0.47, + "learning_rate": 1.9080310303947668e-05, + "loss": 0.4922, + "step": 392 + }, + { + "epoch": 0.47, + "learning_rate": 1.9069723735658903e-05, + "loss": 0.4081, + "step": 394 + }, + { + "epoch": 0.47, + "learning_rate": 1.9059079555893277e-05, + "loss": 0.486, + "step": 396 + }, + { + "epoch": 0.47, + "learning_rate": 1.9048377832263314e-05, + "loss": 0.4674, + "step": 398 + }, + { + "epoch": 0.48, + "learning_rate": 1.903761863274706e-05, + "loss": 0.4528, + "step": 400 + }, + { + "epoch": 0.48, + "learning_rate": 1.902680202568765e-05, + "loss": 0.4716, + "step": 402 + }, + { + "epoch": 0.48, + "learning_rate": 1.9015928079792884e-05, + "loss": 0.5213, + "step": 404 + }, + { + "epoch": 0.48, + "learning_rate": 1.9004996864134767e-05, + "loss": 0.4836, + "step": 406 + }, + { + "epoch": 0.49, + "learning_rate": 1.8994008448149103e-05, + "loss": 0.5513, + "step": 408 + }, + { + "epoch": 0.49, + "learning_rate": 1.8982962901635022e-05, + "loss": 0.4634, + "step": 410 + }, + { + "epoch": 0.49, + "learning_rate": 1.8971860294754554e-05, + "loss": 0.5111, + "step": 412 + }, + { + "epoch": 0.49, + "learning_rate": 1.8960700698032194e-05, + "loss": 0.4474, + "step": 414 + }, + { + "epoch": 0.49, + "learning_rate": 1.894948418235441e-05, + "loss": 0.4785, + "step": 416 + }, + { + "epoch": 0.5, + "learning_rate": 1.8938210818969257e-05, + "loss": 0.4977, + "step": 418 + }, + { + "epoch": 0.5, + "learning_rate": 1.8926880679485865e-05, + "loss": 0.4346, + "step": 420 + }, + { + "epoch": 0.5, + "learning_rate": 1.8915493835874026e-05, + "loss": 0.4139, + "step": 422 + }, + { + "epoch": 0.5, + "learning_rate": 1.8904050360463708e-05, + "loss": 0.4975, + "step": 424 + }, + { + "epoch": 0.51, + "learning_rate": 1.8892550325944617e-05, + "loss": 0.4766, + "step": 426 + }, + { + "epoch": 0.51, + "learning_rate": 1.888099380536572e-05, + "loss": 0.484, + "step": 428 + }, + { + "epoch": 0.51, + "learning_rate": 1.886938087213479e-05, + "loss": 0.4301, + "step": 430 + }, + { + "epoch": 0.51, + "learning_rate": 1.885771160001794e-05, + "loss": 0.4474, + "step": 432 + }, + { + "epoch": 0.52, + "learning_rate": 1.8845986063139144e-05, + "loss": 0.4445, + "step": 434 + }, + { + "epoch": 0.52, + "learning_rate": 1.8834204335979777e-05, + "loss": 0.4422, + "step": 436 + }, + { + "epoch": 0.52, + "learning_rate": 1.8822366493378143e-05, + "loss": 0.5337, + "step": 438 + }, + { + "epoch": 0.52, + "learning_rate": 1.8810472610528987e-05, + "loss": 0.4704, + "step": 440 + }, + { + "epoch": 0.53, + "learning_rate": 1.8798522762983026e-05, + "loss": 0.458, + "step": 442 + }, + { + "epoch": 0.53, + "learning_rate": 1.8786517026646474e-05, + "loss": 0.3866, + "step": 444 + }, + { + "epoch": 0.53, + "learning_rate": 1.8774455477780557e-05, + "loss": 0.4939, + "step": 446 + }, + { + "epoch": 0.53, + "learning_rate": 1.8762338193001013e-05, + "loss": 0.496, + "step": 448 + }, + { + "epoch": 0.54, + "learning_rate": 1.8750165249277625e-05, + "loss": 0.4171, + "step": 450 + }, + { + "epoch": 0.54, + "learning_rate": 1.873793672393373e-05, + "loss": 0.4582, + "step": 452 + }, + { + "epoch": 0.54, + "learning_rate": 1.8725652694645714e-05, + "loss": 0.4762, + "step": 454 + }, + { + "epoch": 0.54, + "learning_rate": 1.871331323944254e-05, + "loss": 0.3859, + "step": 456 + }, + { + "epoch": 0.54, + "learning_rate": 1.8700918436705226e-05, + "loss": 0.4165, + "step": 458 + }, + { + "epoch": 0.55, + "learning_rate": 1.868846836516637e-05, + "loss": 0.3933, + "step": 460 + }, + { + "epoch": 0.55, + "learning_rate": 1.8675963103909636e-05, + "loss": 0.4746, + "step": 462 + }, + { + "epoch": 0.55, + "learning_rate": 1.866340273236926e-05, + "loss": 0.4893, + "step": 464 + }, + { + "epoch": 0.55, + "learning_rate": 1.8650787330329546e-05, + "loss": 0.516, + "step": 466 + }, + { + "epoch": 0.56, + "learning_rate": 1.8638116977924346e-05, + "loss": 0.4391, + "step": 468 + }, + { + "epoch": 0.56, + "learning_rate": 1.862539175563657e-05, + "loss": 0.3998, + "step": 470 + }, + { + "epoch": 0.56, + "learning_rate": 1.861261174429765e-05, + "loss": 0.4525, + "step": 472 + }, + { + "epoch": 0.56, + "learning_rate": 1.8599777025087068e-05, + "loss": 0.4023, + "step": 474 + }, + { + "epoch": 0.57, + "learning_rate": 1.858688767953178e-05, + "loss": 0.397, + "step": 476 + }, + { + "epoch": 0.57, + "learning_rate": 1.8573943789505762e-05, + "loss": 0.4845, + "step": 478 + }, + { + "epoch": 0.57, + "learning_rate": 1.8560945437229443e-05, + "loss": 0.4518, + "step": 480 + }, + { + "epoch": 0.57, + "learning_rate": 1.8547892705269207e-05, + "loss": 0.5362, + "step": 482 + }, + { + "epoch": 0.58, + "learning_rate": 1.8534785676536856e-05, + "loss": 0.4601, + "step": 484 + }, + { + "epoch": 0.58, + "learning_rate": 1.8521624434289094e-05, + "loss": 0.4814, + "step": 486 + }, + { + "epoch": 0.58, + "learning_rate": 1.850840906212699e-05, + "loss": 0.4707, + "step": 488 + }, + { + "epoch": 0.58, + "learning_rate": 1.849513964399545e-05, + "loss": 0.4144, + "step": 490 + }, + { + "epoch": 0.59, + "learning_rate": 1.8481816264182678e-05, + "loss": 0.434, + "step": 492 + }, + { + "epoch": 0.59, + "learning_rate": 1.8468439007319663e-05, + "loss": 0.4782, + "step": 494 + }, + { + "epoch": 0.59, + "learning_rate": 1.8455007958379604e-05, + "loss": 0.3848, + "step": 496 + }, + { + "epoch": 0.59, + "learning_rate": 1.8441523202677406e-05, + "loss": 0.4541, + "step": 498 + }, + { + "epoch": 0.59, + "learning_rate": 1.8427984825869114e-05, + "loss": 0.4708, + "step": 500 + }, + { + "epoch": 0.6, + "learning_rate": 1.8414392913951382e-05, + "loss": 0.5103, + "step": 502 + }, + { + "epoch": 0.6, + "learning_rate": 1.8400747553260915e-05, + "loss": 0.4201, + "step": 504 + }, + { + "epoch": 0.6, + "learning_rate": 1.8387048830473948e-05, + "loss": 0.4586, + "step": 506 + }, + { + "epoch": 0.6, + "learning_rate": 1.8373296832605647e-05, + "loss": 0.4667, + "step": 508 + }, + { + "epoch": 0.61, + "learning_rate": 1.8359491647009608e-05, + "loss": 0.4846, + "step": 510 + }, + { + "epoch": 0.61, + "learning_rate": 1.834563336137727e-05, + "loss": 0.5255, + "step": 512 + }, + { + "epoch": 0.61, + "learning_rate": 1.8331722063737365e-05, + "loss": 0.482, + "step": 514 + }, + { + "epoch": 0.61, + "learning_rate": 1.8317757842455363e-05, + "loss": 0.4211, + "step": 516 + }, + { + "epoch": 0.62, + "learning_rate": 1.830374078623291e-05, + "loss": 0.4852, + "step": 518 + }, + { + "epoch": 0.62, + "learning_rate": 1.8289670984107263e-05, + "loss": 0.4299, + "step": 520 + }, + { + "epoch": 0.62, + "learning_rate": 1.8275548525450722e-05, + "loss": 0.5044, + "step": 522 + }, + { + "epoch": 0.62, + "learning_rate": 1.8261373499970064e-05, + "loss": 0.4072, + "step": 524 + }, + { + "epoch": 0.63, + "learning_rate": 1.8247145997705977e-05, + "loss": 0.4478, + "step": 526 + }, + { + "epoch": 0.63, + "learning_rate": 1.823286610903248e-05, + "loss": 0.4962, + "step": 528 + }, + { + "epoch": 0.63, + "learning_rate": 1.8218533924656367e-05, + "loss": 0.4658, + "step": 530 + }, + { + "epoch": 0.63, + "learning_rate": 1.8204149535616596e-05, + "loss": 0.4124, + "step": 532 + }, + { + "epoch": 0.63, + "learning_rate": 1.8189713033283755e-05, + "loss": 0.4149, + "step": 534 + }, + { + "epoch": 0.64, + "learning_rate": 1.817522450935944e-05, + "loss": 0.4327, + "step": 536 + }, + { + "epoch": 0.64, + "learning_rate": 1.8160684055875704e-05, + "loss": 0.4469, + "step": 538 + }, + { + "epoch": 0.64, + "learning_rate": 1.8146091765194458e-05, + "loss": 0.44, + "step": 540 + }, + { + "epoch": 0.64, + "learning_rate": 1.8131447730006885e-05, + "loss": 0.4911, + "step": 542 + }, + { + "epoch": 0.65, + "learning_rate": 1.8116752043332848e-05, + "loss": 0.4848, + "step": 544 + }, + { + "epoch": 0.65, + "learning_rate": 1.810200479852031e-05, + "loss": 0.4297, + "step": 546 + }, + { + "epoch": 0.65, + "learning_rate": 1.8087206089244728e-05, + "loss": 0.4205, + "step": 548 + }, + { + "epoch": 0.65, + "learning_rate": 1.8072356009508473e-05, + "loss": 0.3892, + "step": 550 + }, + { + "epoch": 0.66, + "learning_rate": 1.805745465364022e-05, + "loss": 0.4519, + "step": 552 + }, + { + "epoch": 0.66, + "learning_rate": 1.8042502116294355e-05, + "loss": 0.4376, + "step": 554 + }, + { + "epoch": 0.66, + "learning_rate": 1.8027498492450367e-05, + "loss": 0.4538, + "step": 556 + }, + { + "epoch": 0.66, + "learning_rate": 1.8012443877412253e-05, + "loss": 0.4672, + "step": 558 + }, + { + "epoch": 0.67, + "learning_rate": 1.799733836680791e-05, + "loss": 0.4034, + "step": 560 + }, + { + "epoch": 0.67, + "learning_rate": 1.7982182056588536e-05, + "loss": 0.4613, + "step": 562 + }, + { + "epoch": 0.67, + "learning_rate": 1.796697504302799e-05, + "loss": 0.4664, + "step": 564 + }, + { + "epoch": 0.67, + "learning_rate": 1.795171742272222e-05, + "loss": 0.4271, + "step": 566 + }, + { + "epoch": 0.68, + "learning_rate": 1.7936409292588627e-05, + "loss": 0.4741, + "step": 568 + }, + { + "epoch": 0.68, + "learning_rate": 1.792105074986545e-05, + "loss": 0.3175, + "step": 570 + }, + { + "epoch": 0.68, + "learning_rate": 1.7905641892111152e-05, + "loss": 0.4154, + "step": 572 + }, + { + "epoch": 0.68, + "learning_rate": 1.7890182817203806e-05, + "loss": 0.4558, + "step": 574 + }, + { + "epoch": 0.68, + "learning_rate": 1.7874673623340463e-05, + "loss": 0.465, + "step": 576 + }, + { + "epoch": 0.69, + "learning_rate": 1.785911440903653e-05, + "loss": 0.4688, + "step": 578 + }, + { + "epoch": 0.69, + "learning_rate": 1.7843505273125164e-05, + "loss": 0.4411, + "step": 580 + }, + { + "epoch": 0.69, + "learning_rate": 1.7827846314756604e-05, + "loss": 0.4286, + "step": 582 + }, + { + "epoch": 0.69, + "learning_rate": 1.7812137633397577e-05, + "loss": 0.4425, + "step": 584 + }, + { + "epoch": 0.7, + "learning_rate": 1.7796379328830652e-05, + "loss": 0.4126, + "step": 586 + }, + { + "epoch": 0.7, + "learning_rate": 1.778057150115361e-05, + "loss": 0.479, + "step": 588 + }, + { + "epoch": 0.7, + "learning_rate": 1.77647142507788e-05, + "loss": 0.3994, + "step": 590 + }, + { + "epoch": 0.7, + "learning_rate": 1.7748807678432514e-05, + "loss": 0.4574, + "step": 592 + }, + { + "epoch": 0.71, + "learning_rate": 1.7732851885154336e-05, + "loss": 0.3901, + "step": 594 + }, + { + "epoch": 0.71, + "learning_rate": 1.7716846972296505e-05, + "loss": 0.435, + "step": 596 + }, + { + "epoch": 0.71, + "learning_rate": 1.7700793041523272e-05, + "loss": 0.4337, + "step": 598 + }, + { + "epoch": 0.71, + "learning_rate": 1.7684690194810256e-05, + "loss": 0.4196, + "step": 600 + }, + { + "epoch": 0.72, + "learning_rate": 1.7668538534443782e-05, + "loss": 0.4508, + "step": 602 + }, + { + "epoch": 0.72, + "learning_rate": 1.7652338163020257e-05, + "loss": 0.4583, + "step": 604 + }, + { + "epoch": 0.72, + "learning_rate": 1.76360891834455e-05, + "loss": 0.4499, + "step": 606 + }, + { + "epoch": 0.72, + "learning_rate": 1.7619791698934077e-05, + "loss": 0.4263, + "step": 608 + }, + { + "epoch": 0.73, + "learning_rate": 1.7603445813008685e-05, + "loss": 0.3721, + "step": 610 + }, + { + "epoch": 0.73, + "learning_rate": 1.7587051629499452e-05, + "loss": 0.3788, + "step": 612 + }, + { + "epoch": 0.73, + "learning_rate": 1.7570609252543302e-05, + "loss": 0.4405, + "step": 614 + }, + { + "epoch": 0.73, + "learning_rate": 1.755411878658329e-05, + "loss": 0.4939, + "step": 616 + }, + { + "epoch": 0.73, + "learning_rate": 1.7537580336367925e-05, + "loss": 0.4188, + "step": 618 + }, + { + "epoch": 0.74, + "learning_rate": 1.7520994006950526e-05, + "loss": 0.4425, + "step": 620 + }, + { + "epoch": 0.74, + "learning_rate": 1.7504359903688537e-05, + "loss": 0.37, + "step": 622 + }, + { + "epoch": 0.74, + "learning_rate": 1.748767813224287e-05, + "loss": 0.4633, + "step": 624 + }, + { + "epoch": 0.74, + "learning_rate": 1.747094879857722e-05, + "loss": 0.3729, + "step": 626 + }, + { + "epoch": 0.75, + "learning_rate": 1.7454172008957417e-05, + "loss": 0.4312, + "step": 628 + }, + { + "epoch": 0.75, + "learning_rate": 1.7437347869950713e-05, + "loss": 0.4367, + "step": 630 + }, + { + "epoch": 0.75, + "learning_rate": 1.7420476488425138e-05, + "loss": 0.4091, + "step": 632 + }, + { + "epoch": 0.75, + "learning_rate": 1.740355797154881e-05, + "loss": 0.469, + "step": 634 + }, + { + "epoch": 0.76, + "learning_rate": 1.7386592426789252e-05, + "loss": 0.4872, + "step": 636 + }, + { + "epoch": 0.76, + "learning_rate": 1.7369579961912712e-05, + "loss": 0.4932, + "step": 638 + }, + { + "epoch": 0.76, + "learning_rate": 1.7352520684983474e-05, + "loss": 0.3848, + "step": 640 + }, + { + "epoch": 0.76, + "learning_rate": 1.7335414704363178e-05, + "loss": 0.3694, + "step": 642 + }, + { + "epoch": 0.77, + "learning_rate": 1.7318262128710132e-05, + "loss": 0.5099, + "step": 644 + }, + { + "epoch": 0.77, + "learning_rate": 1.7301063066978617e-05, + "loss": 0.4407, + "step": 646 + }, + { + "epoch": 0.77, + "learning_rate": 1.728381762841819e-05, + "loss": 0.4409, + "step": 648 + }, + { + "epoch": 0.77, + "learning_rate": 1.7266525922573e-05, + "loss": 0.4444, + "step": 650 + }, + { + "epoch": 0.78, + "learning_rate": 1.72491880592811e-05, + "loss": 0.4079, + "step": 652 + }, + { + "epoch": 0.78, + "learning_rate": 1.7231804148673717e-05, + "loss": 0.4502, + "step": 654 + }, + { + "epoch": 0.78, + "learning_rate": 1.7214374301174594e-05, + "loss": 0.49, + "step": 656 + }, + { + "epoch": 0.78, + "learning_rate": 1.719689862749926e-05, + "loss": 0.4778, + "step": 658 + }, + { + "epoch": 0.78, + "learning_rate": 1.7179377238654325e-05, + "loss": 0.3734, + "step": 660 + }, + { + "epoch": 0.79, + "learning_rate": 1.716181024593681e-05, + "loss": 0.4956, + "step": 662 + }, + { + "epoch": 0.79, + "learning_rate": 1.714419776093338e-05, + "loss": 0.3712, + "step": 664 + }, + { + "epoch": 0.79, + "learning_rate": 1.7126539895519698e-05, + "loss": 0.3779, + "step": 666 + }, + { + "epoch": 0.79, + "learning_rate": 1.710883676185968e-05, + "loss": 0.4457, + "step": 668 + }, + { + "epoch": 0.8, + "learning_rate": 1.709108847240478e-05, + "loss": 0.4161, + "step": 670 + }, + { + "epoch": 0.8, + "learning_rate": 1.7073295139893296e-05, + "loss": 0.4459, + "step": 672 + }, + { + "epoch": 0.8, + "learning_rate": 1.705545687734963e-05, + "loss": 0.4465, + "step": 674 + }, + { + "epoch": 0.8, + "learning_rate": 1.7037573798083598e-05, + "loss": 0.4284, + "step": 676 + }, + { + "epoch": 0.81, + "learning_rate": 1.701964601568968e-05, + "loss": 0.3806, + "step": 678 + }, + { + "epoch": 0.81, + "learning_rate": 1.7001673644046322e-05, + "loss": 0.4591, + "step": 680 + }, + { + "epoch": 0.81, + "learning_rate": 1.6983656797315197e-05, + "loss": 0.4809, + "step": 682 + }, + { + "epoch": 0.81, + "learning_rate": 1.6965595589940496e-05, + "loss": 0.3811, + "step": 684 + }, + { + "epoch": 0.82, + "learning_rate": 1.6947490136648182e-05, + "loss": 0.4223, + "step": 686 + }, + { + "epoch": 0.82, + "learning_rate": 1.6929340552445283e-05, + "loss": 0.4698, + "step": 688 + }, + { + "epoch": 0.82, + "learning_rate": 1.6911146952619132e-05, + "loss": 0.4059, + "step": 690 + }, + { + "epoch": 0.82, + "learning_rate": 1.689290945273667e-05, + "loss": 0.4451, + "step": 692 + }, + { + "epoch": 0.83, + "learning_rate": 1.6874628168643683e-05, + "loss": 0.3428, + "step": 694 + }, + { + "epoch": 0.83, + "learning_rate": 1.685630321646408e-05, + "loss": 0.4552, + "step": 696 + }, + { + "epoch": 0.83, + "learning_rate": 1.683793471259915e-05, + "loss": 0.6389, + "step": 698 + }, + { + "epoch": 0.83, + "learning_rate": 1.681952277372683e-05, + "loss": 0.4356, + "step": 700 + }, + { + "epoch": 0.83, + "learning_rate": 1.680106751680096e-05, + "loss": 0.4199, + "step": 702 + }, + { + "epoch": 0.84, + "learning_rate": 1.6782569059050535e-05, + "loss": 0.4369, + "step": 704 + }, + { + "epoch": 0.84, + "learning_rate": 1.676402751797896e-05, + "loss": 0.3924, + "step": 706 + }, + { + "epoch": 0.84, + "learning_rate": 1.674544301136332e-05, + "loss": 0.386, + "step": 708 + }, + { + "epoch": 0.84, + "learning_rate": 1.672681565725361e-05, + "loss": 0.464, + "step": 710 + }, + { + "epoch": 0.85, + "learning_rate": 1.6708145573972005e-05, + "loss": 0.4597, + "step": 712 + }, + { + "epoch": 0.85, + "learning_rate": 1.6689432880112078e-05, + "loss": 0.4164, + "step": 714 + }, + { + "epoch": 0.85, + "learning_rate": 1.6670677694538096e-05, + "loss": 0.3761, + "step": 716 + }, + { + "epoch": 0.85, + "learning_rate": 1.6651880136384215e-05, + "loss": 0.4499, + "step": 718 + }, + { + "epoch": 0.86, + "learning_rate": 1.6633040325053746e-05, + "loss": 0.438, + "step": 720 + }, + { + "epoch": 0.86, + "learning_rate": 1.661415838021841e-05, + "loss": 0.4526, + "step": 722 + }, + { + "epoch": 0.86, + "learning_rate": 1.659523442181754e-05, + "loss": 0.4427, + "step": 724 + }, + { + "epoch": 0.86, + "learning_rate": 1.6576268570057363e-05, + "loss": 0.5268, + "step": 726 + }, + { + "epoch": 0.87, + "learning_rate": 1.655726094541021e-05, + "loss": 0.4135, + "step": 728 + }, + { + "epoch": 0.87, + "learning_rate": 1.653821166861374e-05, + "loss": 0.42, + "step": 730 + }, + { + "epoch": 0.87, + "learning_rate": 1.6519120860670215e-05, + "loss": 0.449, + "step": 732 + }, + { + "epoch": 0.87, + "learning_rate": 1.6499988642845686e-05, + "loss": 0.4751, + "step": 734 + }, + { + "epoch": 0.88, + "learning_rate": 1.6480815136669248e-05, + "loss": 0.3826, + "step": 736 + }, + { + "epoch": 0.88, + "learning_rate": 1.6461600463932266e-05, + "loss": 0.4712, + "step": 738 + }, + { + "epoch": 0.88, + "learning_rate": 1.6442344746687594e-05, + "loss": 0.4128, + "step": 740 + }, + { + "epoch": 0.88, + "learning_rate": 1.64230481072488e-05, + "loss": 0.4679, + "step": 742 + }, + { + "epoch": 0.88, + "learning_rate": 1.640371066818941e-05, + "loss": 0.4768, + "step": 744 + }, + { + "epoch": 0.89, + "learning_rate": 1.638433255234208e-05, + "loss": 0.4785, + "step": 746 + }, + { + "epoch": 0.89, + "learning_rate": 1.6364913882797875e-05, + "loss": 0.4334, + "step": 748 + }, + { + "epoch": 0.89, + "learning_rate": 1.6345454782905454e-05, + "loss": 0.4015, + "step": 750 + }, + { + "epoch": 0.89, + "learning_rate": 1.6325955376270286e-05, + "loss": 0.439, + "step": 752 + }, + { + "epoch": 0.9, + "learning_rate": 1.630641578675387e-05, + "loss": 0.3951, + "step": 754 + }, + { + "epoch": 0.9, + "learning_rate": 1.6296630962191733e-05, + "loss": 0.5453, + "step": 756 + }, + { + "epoch": 0.9, + "learning_rate": 1.62770313311519e-05, + "loss": 0.3902, + "step": 758 + }, + { + "epoch": 0.9, + "learning_rate": 1.625739182799955e-05, + "loss": 0.3943, + "step": 760 + }, + { + "epoch": 0.91, + "learning_rate": 1.6237712577486092e-05, + "loss": 0.3312, + "step": 762 + }, + { + "epoch": 0.91, + "learning_rate": 1.62179937046154e-05, + "loss": 0.4366, + "step": 764 + }, + { + "epoch": 0.91, + "learning_rate": 1.6198235334643045e-05, + "loss": 0.3924, + "step": 766 + }, + { + "epoch": 0.91, + "learning_rate": 1.6178437593075487e-05, + "loss": 0.378, + "step": 768 + }, + { + "epoch": 0.92, + "learning_rate": 1.6158600605669264e-05, + "loss": 0.4624, + "step": 770 + }, + { + "epoch": 0.92, + "learning_rate": 1.613872449843022e-05, + "loss": 0.4411, + "step": 772 + }, + { + "epoch": 0.92, + "learning_rate": 1.6118809397612678e-05, + "loss": 0.4695, + "step": 774 + }, + { + "epoch": 0.92, + "learning_rate": 1.6098855429718662e-05, + "loss": 0.4348, + "step": 776 + }, + { + "epoch": 0.93, + "learning_rate": 1.607886272149708e-05, + "loss": 0.4048, + "step": 778 + }, + { + "epoch": 0.93, + "learning_rate": 1.6058831399942917e-05, + "loss": 0.3485, + "step": 780 + }, + { + "epoch": 0.93, + "learning_rate": 1.6038761592296435e-05, + "loss": 0.4146, + "step": 782 + }, + { + "epoch": 0.93, + "learning_rate": 1.6018653426042357e-05, + "loss": 0.4398, + "step": 784 + }, + { + "epoch": 0.93, + "learning_rate": 1.5998507028909074e-05, + "loss": 0.5815, + "step": 786 + }, + { + "epoch": 0.94, + "learning_rate": 1.597832252886781e-05, + "loss": 0.4502, + "step": 788 + }, + { + "epoch": 0.94, + "learning_rate": 1.5958100054131828e-05, + "loss": 0.4275, + "step": 790 + }, + { + "epoch": 0.94, + "learning_rate": 1.5937839733155603e-05, + "loss": 0.4269, + "step": 792 + }, + { + "epoch": 0.94, + "learning_rate": 1.591754169463402e-05, + "loss": 0.4211, + "step": 794 + }, + { + "epoch": 0.95, + "learning_rate": 1.5897206067501544e-05, + "loss": 0.4194, + "step": 796 + }, + { + "epoch": 0.95, + "learning_rate": 1.5876832980931405e-05, + "loss": 0.3833, + "step": 798 + }, + { + "epoch": 0.95, + "learning_rate": 1.5856422564334772e-05, + "loss": 0.4176, + "step": 800 + }, + { + "epoch": 0.95, + "learning_rate": 1.5835974947359952e-05, + "loss": 0.5327, + "step": 802 + }, + { + "epoch": 0.96, + "learning_rate": 1.581549025989154e-05, + "loss": 0.4776, + "step": 804 + }, + { + "epoch": 0.96, + "learning_rate": 1.5794968632049598e-05, + "loss": 0.3573, + "step": 806 + }, + { + "epoch": 0.96, + "learning_rate": 1.5774410194188856e-05, + "loss": 0.464, + "step": 808 + }, + { + "epoch": 0.96, + "learning_rate": 1.5753815076897848e-05, + "loss": 0.4549, + "step": 810 + }, + { + "epoch": 0.97, + "learning_rate": 1.57331834109981e-05, + "loss": 0.463, + "step": 812 + }, + { + "epoch": 0.97, + "learning_rate": 1.5712515327543307e-05, + "loss": 0.4438, + "step": 814 + }, + { + "epoch": 0.97, + "learning_rate": 1.5691810957818475e-05, + "loss": 0.4306, + "step": 816 + }, + { + "epoch": 0.97, + "learning_rate": 1.5671070433339116e-05, + "loss": 0.4135, + "step": 818 + }, + { + "epoch": 0.98, + "learning_rate": 1.5650293885850393e-05, + "loss": 0.4706, + "step": 820 + }, + { + "epoch": 0.98, + "learning_rate": 1.5629481447326297e-05, + "loss": 0.4427, + "step": 822 + }, + { + "epoch": 0.98, + "learning_rate": 1.5608633249968783e-05, + "loss": 0.4661, + "step": 824 + }, + { + "epoch": 0.98, + "learning_rate": 1.558774942620697e-05, + "loss": 0.3674, + "step": 826 + }, + { + "epoch": 0.98, + "learning_rate": 1.5566830108696265e-05, + "loss": 0.4204, + "step": 828 + }, + { + "epoch": 0.99, + "learning_rate": 1.5545875430317546e-05, + "loss": 0.4685, + "step": 830 + }, + { + "epoch": 0.99, + "learning_rate": 1.5524885524176287e-05, + "loss": 0.4583, + "step": 832 + }, + { + "epoch": 0.99, + "learning_rate": 1.550386052360174e-05, + "loss": 0.4306, + "step": 834 + }, + { + "epoch": 0.99, + "learning_rate": 1.548280056214609e-05, + "loss": 0.4203, + "step": 836 + }, + { + "epoch": 1.0, + "learning_rate": 1.546170577358358e-05, + "loss": 0.4664, + "step": 838 + }, + { + "epoch": 1.0, + "learning_rate": 1.544057629190969e-05, + "loss": 0.3553, + "step": 840 + }, + { + "epoch": 1.0, + "learning_rate": 1.541941225134025e-05, + "loss": 0.368, + "step": 842 + }, + { + "epoch": 1.0, + "learning_rate": 1.5398213786310643e-05, + "loss": 0.3176, + "step": 844 + }, + { + "epoch": 1.01, + "learning_rate": 1.537698103147489e-05, + "loss": 0.2801, + "step": 846 + }, + { + "epoch": 1.01, + "learning_rate": 1.5355714121704846e-05, + "loss": 0.2576, + "step": 848 + }, + { + "epoch": 1.01, + "learning_rate": 1.53344131920893e-05, + "loss": 0.2689, + "step": 850 + }, + { + "epoch": 1.01, + "learning_rate": 1.531307837793315e-05, + "loss": 0.3045, + "step": 852 + }, + { + "epoch": 1.02, + "learning_rate": 1.529170981475653e-05, + "loss": 0.2506, + "step": 854 + }, + { + "epoch": 1.02, + "learning_rate": 1.5270307638293943e-05, + "loss": 0.2546, + "step": 856 + }, + { + "epoch": 1.02, + "learning_rate": 1.524887198449341e-05, + "loss": 0.2853, + "step": 858 + }, + { + "epoch": 1.02, + "learning_rate": 1.5227402989515607e-05, + "loss": 0.2772, + "step": 860 + }, + { + "epoch": 1.02, + "learning_rate": 1.5205900789732986e-05, + "loss": 0.2763, + "step": 862 + }, + { + "epoch": 1.03, + "learning_rate": 1.5184365521728928e-05, + "loss": 0.2578, + "step": 864 + }, + { + "epoch": 1.03, + "learning_rate": 1.5162797322296855e-05, + "loss": 0.3121, + "step": 866 + }, + { + "epoch": 1.03, + "learning_rate": 1.5141196328439377e-05, + "loss": 0.3037, + "step": 868 + }, + { + "epoch": 1.03, + "learning_rate": 1.5119562677367421e-05, + "loss": 0.2877, + "step": 870 + }, + { + "epoch": 1.04, + "learning_rate": 1.5097896506499349e-05, + "loss": 0.2856, + "step": 872 + }, + { + "epoch": 1.04, + "learning_rate": 1.5076197953460087e-05, + "loss": 0.3417, + "step": 874 + }, + { + "epoch": 1.04, + "learning_rate": 1.5054467156080262e-05, + "loss": 0.285, + "step": 876 + }, + { + "epoch": 1.04, + "learning_rate": 1.5032704252395315e-05, + "loss": 0.3137, + "step": 878 + }, + { + "epoch": 1.05, + "learning_rate": 1.5010909380644636e-05, + "loss": 0.2204, + "step": 880 + }, + { + "epoch": 1.05, + "learning_rate": 1.4989082679270668e-05, + "loss": 0.2808, + "step": 882 + }, + { + "epoch": 1.05, + "learning_rate": 1.496722428691804e-05, + "loss": 0.2691, + "step": 884 + }, + { + "epoch": 1.05, + "learning_rate": 1.4945334342432688e-05, + "loss": 0.2638, + "step": 886 + }, + { + "epoch": 1.06, + "learning_rate": 1.492341298486097e-05, + "loss": 0.26, + "step": 888 + }, + { + "epoch": 1.06, + "learning_rate": 1.490146035344878e-05, + "loss": 0.2764, + "step": 890 + }, + { + "epoch": 1.06, + "learning_rate": 1.4879476587640657e-05, + "loss": 0.2558, + "step": 892 + }, + { + "epoch": 1.06, + "learning_rate": 1.4868473072968645e-05, + "loss": 0.5349, + "step": 894 + }, + { + "epoch": 1.07, + "learning_rate": 1.4846442867457533e-05, + "loss": 0.2937, + "step": 896 + }, + { + "epoch": 1.07, + "learning_rate": 1.4824381877025154e-05, + "loss": 0.2684, + "step": 898 + }, + { + "epoch": 1.07, + "learning_rate": 1.4802290241804355e-05, + "loss": 0.2491, + "step": 900 + }, + { + "epoch": 1.07, + "learning_rate": 1.478016810212265e-05, + "loss": 0.2634, + "step": 902 + }, + { + "epoch": 1.07, + "learning_rate": 1.4758015598501308e-05, + "loss": 0.2889, + "step": 904 + }, + { + "epoch": 1.08, + "learning_rate": 1.473583287165448e-05, + "loss": 0.2843, + "step": 906 + }, + { + "epoch": 1.08, + "learning_rate": 1.4713620062488296e-05, + "loss": 0.2705, + "step": 908 + }, + { + "epoch": 1.08, + "learning_rate": 1.4691377312099965e-05, + "loss": 0.2765, + "step": 910 + }, + { + "epoch": 1.08, + "learning_rate": 1.4669104761776892e-05, + "loss": 0.2595, + "step": 912 + }, + { + "epoch": 1.09, + "learning_rate": 1.4646802552995767e-05, + "loss": 0.2101, + "step": 914 + }, + { + "epoch": 1.09, + "learning_rate": 1.4624470827421675e-05, + "loss": 0.263, + "step": 916 + }, + { + "epoch": 1.09, + "learning_rate": 1.4602109726907197e-05, + "loss": 0.2592, + "step": 918 + }, + { + "epoch": 1.09, + "learning_rate": 1.4579719393491496e-05, + "loss": 0.2732, + "step": 920 + }, + { + "epoch": 1.1, + "learning_rate": 1.455729996939944e-05, + "loss": 0.3056, + "step": 922 + }, + { + "epoch": 1.1, + "learning_rate": 1.4534851597040666e-05, + "loss": 0.2886, + "step": 924 + }, + { + "epoch": 1.1, + "learning_rate": 1.45123744190087e-05, + "loss": 0.2493, + "step": 926 + }, + { + "epoch": 1.1, + "learning_rate": 1.4489868578080046e-05, + "loss": 0.271, + "step": 928 + }, + { + "epoch": 1.11, + "learning_rate": 1.4467334217213274e-05, + "loss": 0.2752, + "step": 930 + }, + { + "epoch": 1.11, + "learning_rate": 1.4444771479548115e-05, + "loss": 0.3108, + "step": 932 + }, + { + "epoch": 1.11, + "learning_rate": 1.4422180508404544e-05, + "loss": 0.2946, + "step": 934 + }, + { + "epoch": 1.11, + "learning_rate": 1.439956144728189e-05, + "loss": 0.2401, + "step": 936 + }, + { + "epoch": 1.12, + "learning_rate": 1.4376914439857905e-05, + "loss": 0.3501, + "step": 938 + }, + { + "epoch": 1.12, + "learning_rate": 1.4354239629987857e-05, + "loss": 0.2895, + "step": 940 + }, + { + "epoch": 1.12, + "learning_rate": 1.4331537161703612e-05, + "loss": 0.2632, + "step": 942 + }, + { + "epoch": 1.12, + "learning_rate": 1.4308807179212736e-05, + "loss": 0.261, + "step": 944 + }, + { + "epoch": 1.12, + "learning_rate": 1.4286049826897559e-05, + "loss": 0.3207, + "step": 946 + }, + { + "epoch": 1.13, + "learning_rate": 1.4263265249314269e-05, + "loss": 0.2592, + "step": 948 + }, + { + "epoch": 1.13, + "learning_rate": 1.4240453591191984e-05, + "loss": 0.2468, + "step": 950 + }, + { + "epoch": 1.13, + "learning_rate": 1.4217614997431847e-05, + "loss": 0.2483, + "step": 952 + }, + { + "epoch": 1.13, + "learning_rate": 1.41947496131061e-05, + "loss": 0.2657, + "step": 954 + }, + { + "epoch": 1.14, + "learning_rate": 1.4171857583457154e-05, + "loss": 0.2389, + "step": 956 + }, + { + "epoch": 1.14, + "learning_rate": 1.4148939053896669e-05, + "loss": 0.2404, + "step": 958 + }, + { + "epoch": 1.14, + "learning_rate": 1.4125994170004644e-05, + "loss": 0.2539, + "step": 960 + }, + { + "epoch": 1.14, + "learning_rate": 1.4103023077528482e-05, + "loss": 0.2721, + "step": 962 + }, + { + "epoch": 1.15, + "learning_rate": 1.4080025922382056e-05, + "loss": 0.3314, + "step": 964 + }, + { + "epoch": 1.15, + "learning_rate": 1.4057002850644796e-05, + "loss": 0.2668, + "step": 966 + }, + { + "epoch": 1.15, + "learning_rate": 1.4033954008560758e-05, + "loss": 0.2295, + "step": 968 + }, + { + "epoch": 1.15, + "learning_rate": 1.401087954253769e-05, + "loss": 0.284, + "step": 970 + }, + { + "epoch": 1.16, + "learning_rate": 1.3987779599146105e-05, + "loss": 0.2595, + "step": 972 + }, + { + "epoch": 1.16, + "learning_rate": 1.396465432511835e-05, + "loss": 0.2849, + "step": 974 + }, + { + "epoch": 1.16, + "learning_rate": 1.3941503867347672e-05, + "loss": 0.271, + "step": 976 + }, + { + "epoch": 1.16, + "learning_rate": 1.3918328372887295e-05, + "loss": 0.2943, + "step": 978 + }, + { + "epoch": 1.17, + "learning_rate": 1.3895127988949471e-05, + "loss": 0.2751, + "step": 980 + }, + { + "epoch": 1.17, + "learning_rate": 1.3871902862904544e-05, + "loss": 0.276, + "step": 982 + }, + { + "epoch": 1.17, + "learning_rate": 1.3848653142280037e-05, + "loss": 0.2251, + "step": 984 + }, + { + "epoch": 1.17, + "learning_rate": 1.3825378974759696e-05, + "loss": 0.2722, + "step": 986 + }, + { + "epoch": 1.17, + "learning_rate": 1.3802080508182543e-05, + "loss": 0.2927, + "step": 988 + }, + { + "epoch": 1.18, + "learning_rate": 1.377875789054196e-05, + "loss": 0.2473, + "step": 990 + }, + { + "epoch": 1.18, + "learning_rate": 1.376708757136279e-05, + "loss": 0.3166, + "step": 992 + }, + { + "epoch": 1.18, + "learning_rate": 1.3743729004949972e-05, + "loss": 0.3079, + "step": 994 + }, + { + "epoch": 1.18, + "learning_rate": 1.3720346658126286e-05, + "loss": 0.2695, + "step": 996 + }, + { + "epoch": 1.19, + "learning_rate": 1.3696940679417918e-05, + "loss": 0.3125, + "step": 998 + }, + { + "epoch": 1.19, + "learning_rate": 1.3673511217501172e-05, + "loss": 0.2874, + "step": 1000 + }, + { + "epoch": 1.19, + "learning_rate": 1.3650058421201517e-05, + "loss": 0.31, + "step": 1002 + }, + { + "epoch": 1.19, + "learning_rate": 1.362658243949265e-05, + "loss": 0.2795, + "step": 1004 + }, + { + "epoch": 1.2, + "learning_rate": 1.3603083421495535e-05, + "loss": 0.2693, + "step": 1006 + }, + { + "epoch": 1.2, + "learning_rate": 1.3579561516477467e-05, + "loss": 0.2659, + "step": 1008 + }, + { + "epoch": 1.2, + "learning_rate": 1.355601687385112e-05, + "loss": 0.2909, + "step": 1010 + }, + { + "epoch": 1.2, + "learning_rate": 1.3532449643173604e-05, + "loss": 0.262, + "step": 1012 + }, + { + "epoch": 1.21, + "learning_rate": 1.3508859974145504e-05, + "loss": 0.2538, + "step": 1014 + }, + { + "epoch": 1.21, + "learning_rate": 1.3485248016609937e-05, + "loss": 0.2674, + "step": 1016 + }, + { + "epoch": 1.21, + "learning_rate": 1.3461613920551598e-05, + "loss": 0.2863, + "step": 1018 + }, + { + "epoch": 1.21, + "learning_rate": 1.3437957836095804e-05, + "loss": 0.3213, + "step": 1020 + }, + { + "epoch": 1.22, + "learning_rate": 1.3414279913507548e-05, + "loss": 0.2932, + "step": 1022 + }, + { + "epoch": 1.22, + "learning_rate": 1.3390580303190541e-05, + "loss": 0.2604, + "step": 1024 + }, + { + "epoch": 1.22, + "learning_rate": 1.3366859155686253e-05, + "loss": 0.275, + "step": 1026 + }, + { + "epoch": 1.22, + "learning_rate": 1.3343116621672959e-05, + "loss": 0.2625, + "step": 1028 + }, + { + "epoch": 1.22, + "learning_rate": 1.3319352851964787e-05, + "loss": 0.2664, + "step": 1030 + }, + { + "epoch": 1.23, + "learning_rate": 1.3295567997510747e-05, + "loss": 0.2567, + "step": 1032 + }, + { + "epoch": 1.23, + "learning_rate": 1.3271762209393793e-05, + "loss": 0.2469, + "step": 1034 + }, + { + "epoch": 1.23, + "learning_rate": 1.3247935638829838e-05, + "loss": 0.2596, + "step": 1036 + }, + { + "epoch": 1.23, + "learning_rate": 1.3224088437166818e-05, + "loss": 0.3033, + "step": 1038 + }, + { + "epoch": 1.24, + "learning_rate": 1.320022075588371e-05, + "loss": 0.3582, + "step": 1040 + }, + { + "epoch": 1.24, + "learning_rate": 1.3176332746589587e-05, + "loss": 0.2339, + "step": 1042 + }, + { + "epoch": 1.24, + "learning_rate": 1.3152424561022634e-05, + "loss": 0.2622, + "step": 1044 + }, + { + "epoch": 1.24, + "learning_rate": 1.3128496351049216e-05, + "loss": 0.2388, + "step": 1046 + }, + { + "epoch": 1.25, + "learning_rate": 1.3104548268662873e-05, + "loss": 0.2322, + "step": 1048 + }, + { + "epoch": 1.25, + "learning_rate": 1.3080580465983397e-05, + "loss": 0.3108, + "step": 1050 + }, + { + "epoch": 1.25, + "learning_rate": 1.3056593095255825e-05, + "loss": 0.2339, + "step": 1052 + }, + { + "epoch": 1.25, + "learning_rate": 1.3032586308849512e-05, + "loss": 0.2731, + "step": 1054 + }, + { + "epoch": 1.26, + "learning_rate": 1.3008560259257117e-05, + "loss": 0.2677, + "step": 1056 + }, + { + "epoch": 1.26, + "learning_rate": 1.2984515099093687e-05, + "loss": 0.2907, + "step": 1058 + }, + { + "epoch": 1.26, + "learning_rate": 1.2960450981095643e-05, + "loss": 0.2836, + "step": 1060 + }, + { + "epoch": 1.26, + "learning_rate": 1.2936368058119828e-05, + "loss": 0.2621, + "step": 1062 + }, + { + "epoch": 1.27, + "learning_rate": 1.2912266483142545e-05, + "loss": 0.3009, + "step": 1064 + }, + { + "epoch": 1.27, + "learning_rate": 1.2888146409258575e-05, + "loss": 0.252, + "step": 1066 + }, + { + "epoch": 1.27, + "learning_rate": 1.2864007989680194e-05, + "loss": 0.3354, + "step": 1068 + }, + { + "epoch": 1.27, + "learning_rate": 1.2839851377736216e-05, + "loss": 0.2908, + "step": 1070 + }, + { + "epoch": 1.27, + "learning_rate": 1.281567672687102e-05, + "loss": 0.284, + "step": 1072 + }, + { + "epoch": 1.28, + "learning_rate": 1.2791484190643571e-05, + "loss": 0.2882, + "step": 1074 + }, + { + "epoch": 1.28, + "learning_rate": 1.2767273922726427e-05, + "loss": 0.3096, + "step": 1076 + }, + { + "epoch": 1.28, + "learning_rate": 1.2743046076904795e-05, + "loss": 0.2674, + "step": 1078 + }, + { + "epoch": 1.28, + "learning_rate": 1.271880080707553e-05, + "loss": 0.2207, + "step": 1080 + }, + { + "epoch": 1.29, + "learning_rate": 1.2694538267246168e-05, + "loss": 0.2605, + "step": 1082 + }, + { + "epoch": 1.29, + "learning_rate": 1.2670258611533947e-05, + "loss": 0.2598, + "step": 1084 + }, + { + "epoch": 1.29, + "learning_rate": 1.2645961994164822e-05, + "loss": 0.2856, + "step": 1086 + }, + { + "epoch": 1.29, + "learning_rate": 1.2621648569472491e-05, + "loss": 0.2436, + "step": 1088 + }, + { + "epoch": 1.3, + "learning_rate": 1.2597318491897416e-05, + "loss": 0.2606, + "step": 1090 + }, + { + "epoch": 1.3, + "learning_rate": 1.257297191598584e-05, + "loss": 0.2602, + "step": 1092 + }, + { + "epoch": 1.3, + "learning_rate": 1.2548608996388792e-05, + "loss": 0.2465, + "step": 1094 + }, + { + "epoch": 1.3, + "learning_rate": 1.2524229887861132e-05, + "loss": 0.2536, + "step": 1096 + }, + { + "epoch": 1.31, + "learning_rate": 1.2499834745260553e-05, + "loss": 0.2859, + "step": 1098 + }, + { + "epoch": 1.31, + "learning_rate": 1.2475423723546584e-05, + "loss": 0.2539, + "step": 1100 + }, + { + "epoch": 1.31, + "learning_rate": 1.245099697777963e-05, + "loss": 0.2299, + "step": 1102 + }, + { + "epoch": 1.31, + "learning_rate": 1.2426554663119975e-05, + "loss": 0.4215, + "step": 1104 + }, + { + "epoch": 1.32, + "learning_rate": 1.2402096934826794e-05, + "loss": 0.2575, + "step": 1106 + }, + { + "epoch": 1.32, + "learning_rate": 1.237762394825718e-05, + "loss": 0.3257, + "step": 1108 + }, + { + "epoch": 1.32, + "learning_rate": 1.2353135858865128e-05, + "loss": 0.2778, + "step": 1110 + }, + { + "epoch": 1.32, + "learning_rate": 1.232863282220059e-05, + "loss": 0.2592, + "step": 1112 + }, + { + "epoch": 1.32, + "learning_rate": 1.230411499390845e-05, + "loss": 0.2539, + "step": 1114 + }, + { + "epoch": 1.33, + "learning_rate": 1.2279582529727552e-05, + "loss": 0.2831, + "step": 1116 + }, + { + "epoch": 1.33, + "learning_rate": 1.2255035585489705e-05, + "loss": 0.2806, + "step": 1118 + }, + { + "epoch": 1.33, + "learning_rate": 1.2230474317118708e-05, + "loss": 0.2777, + "step": 1120 + }, + { + "epoch": 1.33, + "learning_rate": 1.2205898880629336e-05, + "loss": 0.3334, + "step": 1122 + }, + { + "epoch": 1.34, + "learning_rate": 1.2181309432126366e-05, + "loss": 0.302, + "step": 1124 + }, + { + "epoch": 1.34, + "learning_rate": 1.2156706127803578e-05, + "loss": 0.2659, + "step": 1126 + }, + { + "epoch": 1.34, + "learning_rate": 1.2132089123942764e-05, + "loss": 0.297, + "step": 1128 + }, + { + "epoch": 1.34, + "learning_rate": 1.2107458576912743e-05, + "loss": 0.3207, + "step": 1130 + }, + { + "epoch": 1.35, + "learning_rate": 1.2082814643168357e-05, + "loss": 0.2224, + "step": 1132 + }, + { + "epoch": 1.35, + "learning_rate": 1.2058157479249475e-05, + "loss": 0.295, + "step": 1134 + }, + { + "epoch": 1.35, + "learning_rate": 1.2033487241780014e-05, + "loss": 0.2238, + "step": 1136 + }, + { + "epoch": 1.35, + "learning_rate": 1.2008804087466931e-05, + "loss": 0.277, + "step": 1138 + }, + { + "epoch": 1.36, + "learning_rate": 1.1984108173099238e-05, + "loss": 0.2906, + "step": 1140 + }, + { + "epoch": 1.36, + "learning_rate": 1.1959399655546989e-05, + "loss": 0.2649, + "step": 1142 + }, + { + "epoch": 1.36, + "learning_rate": 1.1934678691760296e-05, + "loss": 0.3147, + "step": 1144 + }, + { + "epoch": 1.36, + "learning_rate": 1.190994543876834e-05, + "loss": 0.2761, + "step": 1146 + }, + { + "epoch": 1.37, + "learning_rate": 1.188520005367836e-05, + "loss": 0.2252, + "step": 1148 + }, + { + "epoch": 1.37, + "learning_rate": 1.1860442693674648e-05, + "loss": 0.2521, + "step": 1150 + }, + { + "epoch": 1.37, + "learning_rate": 1.1835673516017571e-05, + "loss": 0.2618, + "step": 1152 + }, + { + "epoch": 1.37, + "learning_rate": 1.1810892678042565e-05, + "loss": 0.2869, + "step": 1154 + }, + { + "epoch": 1.37, + "learning_rate": 1.1786100337159132e-05, + "loss": 0.2124, + "step": 1156 + }, + { + "epoch": 1.38, + "learning_rate": 1.177369990233723e-05, + "loss": 0.3826, + "step": 1158 + }, + { + "epoch": 1.38, + "learning_rate": 1.1748890602393521e-05, + "loss": 0.2805, + "step": 1160 + }, + { + "epoch": 1.38, + "learning_rate": 1.172407019338261e-05, + "loss": 0.2467, + "step": 1162 + }, + { + "epoch": 1.38, + "learning_rate": 1.1699238832965358e-05, + "loss": 0.2405, + "step": 1164 + }, + { + "epoch": 1.39, + "learning_rate": 1.1674396678872186e-05, + "loss": 0.3017, + "step": 1166 + }, + { + "epoch": 1.39, + "learning_rate": 1.164954388890207e-05, + "loss": 0.2584, + "step": 1168 + }, + { + "epoch": 1.39, + "learning_rate": 1.162468062092156e-05, + "loss": 0.3141, + "step": 1170 + }, + { + "epoch": 1.39, + "learning_rate": 1.1599807032863756e-05, + "loss": 0.3254, + "step": 1172 + }, + { + "epoch": 1.4, + "learning_rate": 1.1574923282727314e-05, + "loss": 0.2703, + "step": 1174 + }, + { + "epoch": 1.4, + "learning_rate": 1.1550029528575428e-05, + "loss": 0.2207, + "step": 1176 + }, + { + "epoch": 1.4, + "learning_rate": 1.152512592853486e-05, + "loss": 0.2634, + "step": 1178 + }, + { + "epoch": 1.4, + "learning_rate": 1.1500212640794895e-05, + "loss": 0.3368, + "step": 1180 + }, + { + "epoch": 1.41, + "learning_rate": 1.1475289823606364e-05, + "loss": 0.2535, + "step": 1182 + }, + { + "epoch": 1.41, + "learning_rate": 1.1450357635280628e-05, + "loss": 0.287, + "step": 1184 + }, + { + "epoch": 1.41, + "learning_rate": 1.1425416234188578e-05, + "loss": 0.3052, + "step": 1186 + }, + { + "epoch": 1.41, + "learning_rate": 1.1400465778759611e-05, + "loss": 0.2909, + "step": 1188 + }, + { + "epoch": 1.41, + "learning_rate": 1.1375506427480658e-05, + "loss": 0.2904, + "step": 1190 + }, + { + "epoch": 1.42, + "learning_rate": 1.135053833889514e-05, + "loss": 0.339, + "step": 1192 + }, + { + "epoch": 1.42, + "learning_rate": 1.1325561671601987e-05, + "loss": 0.292, + "step": 1194 + }, + { + "epoch": 1.42, + "learning_rate": 1.1300576584254617e-05, + "loss": 0.2424, + "step": 1196 + }, + { + "epoch": 1.42, + "learning_rate": 1.127558323555994e-05, + "loss": 0.267, + "step": 1198 + }, + { + "epoch": 1.43, + "learning_rate": 1.125058178427733e-05, + "loss": 0.2641, + "step": 1200 + }, + { + "epoch": 1.43, + "learning_rate": 1.1225572389217643e-05, + "loss": 0.269, + "step": 1202 + }, + { + "epoch": 1.43, + "learning_rate": 1.1200555209242182e-05, + "loss": 0.2903, + "step": 1204 + }, + { + "epoch": 1.43, + "learning_rate": 1.1175530403261716e-05, + "loss": 0.2622, + "step": 1206 + }, + { + "epoch": 1.44, + "learning_rate": 1.1150498130235435e-05, + "loss": 0.3199, + "step": 1208 + }, + { + "epoch": 1.44, + "learning_rate": 1.1125458549169977e-05, + "loss": 0.2469, + "step": 1210 + }, + { + "epoch": 1.44, + "learning_rate": 1.1100411819118387e-05, + "loss": 0.2781, + "step": 1212 + }, + { + "epoch": 1.44, + "learning_rate": 1.1075358099179136e-05, + "loss": 0.3293, + "step": 1214 + }, + { + "epoch": 1.45, + "learning_rate": 1.1050297548495084e-05, + "loss": 0.2065, + "step": 1216 + }, + { + "epoch": 1.45, + "learning_rate": 1.1025230326252484e-05, + "loss": 0.2548, + "step": 1218 + }, + { + "epoch": 1.45, + "learning_rate": 1.1000156591679971e-05, + "loss": 0.3063, + "step": 1220 + }, + { + "epoch": 1.45, + "learning_rate": 1.0975076504047535e-05, + "loss": 0.3099, + "step": 1222 + }, + { + "epoch": 1.46, + "learning_rate": 1.0949990222665532e-05, + "loss": 0.2805, + "step": 1224 + }, + { + "epoch": 1.46, + "learning_rate": 1.0924897906883663e-05, + "loss": 0.288, + "step": 1226 + }, + { + "epoch": 1.46, + "learning_rate": 1.0899799716089949e-05, + "loss": 0.3014, + "step": 1228 + }, + { + "epoch": 1.46, + "learning_rate": 1.0874695809709737e-05, + "loss": 0.2768, + "step": 1230 + }, + { + "epoch": 1.46, + "learning_rate": 1.0849586347204677e-05, + "loss": 0.2894, + "step": 1232 + }, + { + "epoch": 1.47, + "learning_rate": 1.0824471488071714e-05, + "loss": 0.2718, + "step": 1234 + }, + { + "epoch": 1.47, + "learning_rate": 1.0799351391842074e-05, + "loss": 0.2476, + "step": 1236 + }, + { + "epoch": 1.47, + "learning_rate": 1.0774226218080244e-05, + "loss": 0.2318, + "step": 1238 + }, + { + "epoch": 1.47, + "learning_rate": 1.0749096126382965e-05, + "loss": 0.2545, + "step": 1240 + }, + { + "epoch": 1.48, + "learning_rate": 1.0723961276378225e-05, + "loss": 0.2708, + "step": 1242 + }, + { + "epoch": 1.48, + "learning_rate": 1.0698821827724225e-05, + "loss": 0.3471, + "step": 1244 + }, + { + "epoch": 1.48, + "learning_rate": 1.0673677940108386e-05, + "loss": 0.2528, + "step": 1246 + }, + { + "epoch": 1.48, + "learning_rate": 1.0648529773246324e-05, + "loss": 0.2625, + "step": 1248 + }, + { + "epoch": 1.49, + "learning_rate": 1.0623377486880831e-05, + "loss": 0.2634, + "step": 1250 + }, + { + "epoch": 1.49, + "learning_rate": 1.0598221240780874e-05, + "loss": 0.2506, + "step": 1252 + }, + { + "epoch": 1.49, + "learning_rate": 1.0573061194740568e-05, + "loss": 0.2659, + "step": 1254 + }, + { + "epoch": 1.49, + "learning_rate": 1.054789750857817e-05, + "loss": 0.239, + "step": 1256 + }, + { + "epoch": 1.5, + "learning_rate": 1.052273034213505e-05, + "loss": 0.2465, + "step": 1258 + }, + { + "epoch": 1.5, + "learning_rate": 1.0497559855274699e-05, + "loss": 0.2512, + "step": 1260 + }, + { + "epoch": 1.5, + "learning_rate": 1.0472386207881684e-05, + "loss": 0.303, + "step": 1262 + }, + { + "epoch": 1.5, + "learning_rate": 1.0447209559860658e-05, + "loss": 0.2542, + "step": 1264 + }, + { + "epoch": 1.51, + "learning_rate": 1.0422030071135336e-05, + "loss": 0.2995, + "step": 1266 + }, + { + "epoch": 1.51, + "learning_rate": 1.0396847901647469e-05, + "loss": 0.2597, + "step": 1268 + }, + { + "epoch": 1.51, + "learning_rate": 1.037166321135584e-05, + "loss": 0.2773, + "step": 1270 + }, + { + "epoch": 1.51, + "learning_rate": 1.0346476160235246e-05, + "loss": 0.2771, + "step": 1272 + }, + { + "epoch": 1.51, + "learning_rate": 1.0321286908275476e-05, + "loss": 0.2906, + "step": 1274 + }, + { + "epoch": 1.52, + "learning_rate": 1.0296095615480309e-05, + "loss": 0.3002, + "step": 1276 + }, + { + "epoch": 1.52, + "learning_rate": 1.0270902441866474e-05, + "loss": 0.2267, + "step": 1278 + }, + { + "epoch": 1.52, + "learning_rate": 1.0245707547462654e-05, + "loss": 0.2545, + "step": 1280 + }, + { + "epoch": 1.52, + "learning_rate": 1.0220511092308463e-05, + "loss": 0.2412, + "step": 1282 + }, + { + "epoch": 1.53, + "learning_rate": 1.0195313236453431e-05, + "loss": 0.2567, + "step": 1284 + }, + { + "epoch": 1.53, + "learning_rate": 1.0170114139955975e-05, + "loss": 0.2589, + "step": 1286 + }, + { + "epoch": 1.53, + "learning_rate": 1.0144913962882406e-05, + "loss": 0.2834, + "step": 1288 + }, + { + "epoch": 1.53, + "learning_rate": 1.0119712865305891e-05, + "loss": 0.2504, + "step": 1290 + }, + { + "epoch": 1.54, + "learning_rate": 1.0094511007305445e-05, + "loss": 0.2788, + "step": 1292 + }, + { + "epoch": 1.54, + "learning_rate": 1.0069308548964915e-05, + "loss": 0.2664, + "step": 1294 + }, + { + "epoch": 1.54, + "learning_rate": 1.0044105650371961e-05, + "loss": 0.2695, + "step": 1296 + }, + { + "epoch": 1.54, + "learning_rate": 1.0018902471617037e-05, + "loss": 0.2309, + "step": 1298 + }, + { + "epoch": 1.55, + "learning_rate": 9.993699172792381e-06, + "loss": 0.2949, + "step": 1300 + }, + { + "epoch": 1.55, + "learning_rate": 9.96849591399099e-06, + "loss": 0.3248, + "step": 1302 + }, + { + "epoch": 1.55, + "learning_rate": 9.943292855305611e-06, + "loss": 0.2719, + "step": 1304 + }, + { + "epoch": 1.55, + "learning_rate": 9.918090156827712e-06, + "loss": 0.2417, + "step": 1306 + }, + { + "epoch": 1.56, + "learning_rate": 9.892887978646483e-06, + "loss": 0.2371, + "step": 1308 + }, + { + "epoch": 1.56, + "learning_rate": 9.867686480847801e-06, + "loss": 0.271, + "step": 1310 + }, + { + "epoch": 1.56, + "learning_rate": 9.842485823513222e-06, + "loss": 0.2791, + "step": 1312 + }, + { + "epoch": 1.56, + "learning_rate": 9.817286166718971e-06, + "loss": 0.2759, + "step": 1314 + }, + { + "epoch": 1.56, + "learning_rate": 9.792087670534908e-06, + "loss": 0.2867, + "step": 1316 + }, + { + "epoch": 1.57, + "learning_rate": 9.766890495023522e-06, + "loss": 0.2964, + "step": 1318 + }, + { + "epoch": 1.57, + "learning_rate": 9.741694800238923e-06, + "loss": 0.2466, + "step": 1320 + }, + { + "epoch": 1.57, + "learning_rate": 9.716500746225802e-06, + "loss": 0.2745, + "step": 1322 + }, + { + "epoch": 1.57, + "learning_rate": 9.691308493018439e-06, + "loss": 0.2429, + "step": 1324 + }, + { + "epoch": 1.58, + "learning_rate": 9.666118200639667e-06, + "loss": 0.2561, + "step": 1326 + }, + { + "epoch": 1.58, + "learning_rate": 9.640930029099863e-06, + "loss": 0.2462, + "step": 1328 + }, + { + "epoch": 1.58, + "learning_rate": 9.615744138395941e-06, + "loss": 0.2294, + "step": 1330 + }, + { + "epoch": 1.58, + "learning_rate": 9.590560688510323e-06, + "loss": 0.2462, + "step": 1332 + }, + { + "epoch": 1.59, + "learning_rate": 9.565379839409916e-06, + "loss": 0.2755, + "step": 1334 + }, + { + "epoch": 1.59, + "learning_rate": 9.540201751045127e-06, + "loss": 0.2623, + "step": 1336 + }, + { + "epoch": 1.59, + "learning_rate": 9.515026583348811e-06, + "loss": 0.3047, + "step": 1338 + }, + { + "epoch": 1.59, + "learning_rate": 9.489854496235278e-06, + "loss": 0.2489, + "step": 1340 + }, + { + "epoch": 1.6, + "learning_rate": 9.464685649599266e-06, + "loss": 0.23, + "step": 1342 + }, + { + "epoch": 1.6, + "learning_rate": 9.439520203314927e-06, + "loss": 0.2517, + "step": 1344 + }, + { + "epoch": 1.6, + "learning_rate": 9.414358317234826e-06, + "loss": 0.3041, + "step": 1346 + }, + { + "epoch": 1.6, + "learning_rate": 9.3892001511889e-06, + "loss": 0.2922, + "step": 1348 + }, + { + "epoch": 1.61, + "learning_rate": 9.364045864983454e-06, + "loss": 0.2451, + "step": 1350 + }, + { + "epoch": 1.61, + "learning_rate": 9.338895618400168e-06, + "loss": 0.2457, + "step": 1352 + }, + { + "epoch": 1.61, + "learning_rate": 9.313749571195041e-06, + "loss": 0.2488, + "step": 1354 + }, + { + "epoch": 1.61, + "learning_rate": 9.28860788309741e-06, + "loss": 0.2599, + "step": 1356 + }, + { + "epoch": 1.61, + "learning_rate": 9.263470713808917e-06, + "loss": 0.2192, + "step": 1358 + }, + { + "epoch": 1.62, + "learning_rate": 9.238338223002496e-06, + "loss": 0.2296, + "step": 1360 + }, + { + "epoch": 1.62, + "learning_rate": 9.213210570321374e-06, + "loss": 0.2907, + "step": 1362 + }, + { + "epoch": 1.62, + "learning_rate": 9.188087915378037e-06, + "loss": 0.2644, + "step": 1364 + }, + { + "epoch": 1.62, + "learning_rate": 9.162970417753229e-06, + "loss": 0.2307, + "step": 1366 + }, + { + "epoch": 1.63, + "learning_rate": 9.137858236994932e-06, + "loss": 0.2493, + "step": 1368 + }, + { + "epoch": 1.63, + "learning_rate": 9.112751532617361e-06, + "loss": 0.2546, + "step": 1370 + }, + { + "epoch": 1.63, + "learning_rate": 9.087650464099937e-06, + "loss": 0.2835, + "step": 1372 + }, + { + "epoch": 1.63, + "learning_rate": 9.062555190886287e-06, + "loss": 0.2701, + "step": 1374 + }, + { + "epoch": 1.64, + "learning_rate": 9.037465872383219e-06, + "loss": 0.2514, + "step": 1376 + }, + { + "epoch": 1.64, + "learning_rate": 9.012382667959724e-06, + "loss": 0.2423, + "step": 1378 + }, + { + "epoch": 1.64, + "learning_rate": 8.987305736945955e-06, + "loss": 0.2289, + "step": 1380 + }, + { + "epoch": 1.64, + "learning_rate": 8.962235238632208e-06, + "loss": 0.2504, + "step": 1382 + }, + { + "epoch": 1.65, + "learning_rate": 8.937171332267927e-06, + "loss": 0.2912, + "step": 1384 + }, + { + "epoch": 1.65, + "learning_rate": 8.912114177060681e-06, + "loss": 0.2368, + "step": 1386 + }, + { + "epoch": 1.65, + "learning_rate": 8.887063932175156e-06, + "loss": 0.2823, + "step": 1388 + }, + { + "epoch": 1.65, + "learning_rate": 8.862020756732141e-06, + "loss": 0.2289, + "step": 1390 + }, + { + "epoch": 1.66, + "learning_rate": 8.836984809807514e-06, + "loss": 0.2332, + "step": 1392 + }, + { + "epoch": 1.66, + "learning_rate": 8.811956250431253e-06, + "loss": 0.2627, + "step": 1394 + }, + { + "epoch": 1.66, + "learning_rate": 8.786935237586394e-06, + "loss": 0.2613, + "step": 1396 + }, + { + "epoch": 1.66, + "learning_rate": 8.761921930208044e-06, + "loss": 0.2353, + "step": 1398 + }, + { + "epoch": 1.66, + "learning_rate": 8.73691648718236e-06, + "loss": 0.2668, + "step": 1400 + }, + { + "epoch": 1.67, + "learning_rate": 8.71191906734555e-06, + "loss": 0.2292, + "step": 1402 + }, + { + "epoch": 1.67, + "learning_rate": 8.686929829482862e-06, + "loss": 0.2577, + "step": 1404 + }, + { + "epoch": 1.67, + "learning_rate": 8.661948932327558e-06, + "loss": 0.2177, + "step": 1406 + }, + { + "epoch": 1.67, + "learning_rate": 8.636976534559926e-06, + "loss": 0.2849, + "step": 1408 + }, + { + "epoch": 1.68, + "learning_rate": 8.61201279480627e-06, + "loss": 0.2575, + "step": 1410 + }, + { + "epoch": 1.68, + "learning_rate": 8.587057871637891e-06, + "loss": 0.267, + "step": 1412 + }, + { + "epoch": 1.68, + "learning_rate": 8.562111923570091e-06, + "loss": 0.2572, + "step": 1414 + }, + { + "epoch": 1.68, + "learning_rate": 8.537175109061154e-06, + "loss": 0.2546, + "step": 1416 + }, + { + "epoch": 1.69, + "learning_rate": 8.512247586511354e-06, + "loss": 0.3069, + "step": 1418 + }, + { + "epoch": 1.69, + "learning_rate": 8.487329514261948e-06, + "loss": 0.2915, + "step": 1420 + }, + { + "epoch": 1.69, + "learning_rate": 8.46242105059415e-06, + "loss": 0.3033, + "step": 1422 + }, + { + "epoch": 1.69, + "learning_rate": 8.437522353728147e-06, + "loss": 0.2389, + "step": 1424 + }, + { + "epoch": 1.7, + "learning_rate": 8.412633581822086e-06, + "loss": 0.4257, + "step": 1426 + }, + { + "epoch": 1.7, + "learning_rate": 8.387754892971073e-06, + "loss": 0.2206, + "step": 1428 + }, + { + "epoch": 1.7, + "learning_rate": 8.36288644520616e-06, + "loss": 0.2217, + "step": 1430 + }, + { + "epoch": 1.7, + "learning_rate": 8.338028396493345e-06, + "loss": 0.27, + "step": 1432 + }, + { + "epoch": 1.71, + "learning_rate": 8.313180904732578e-06, + "loss": 0.266, + "step": 1434 + }, + { + "epoch": 1.71, + "learning_rate": 8.288344127756755e-06, + "loss": 0.2922, + "step": 1436 + }, + { + "epoch": 1.71, + "learning_rate": 8.263518223330698e-06, + "loss": 0.2122, + "step": 1438 + }, + { + "epoch": 1.71, + "learning_rate": 8.238703349150169e-06, + "loss": 0.2822, + "step": 1440 + }, + { + "epoch": 1.71, + "learning_rate": 8.213899662840871e-06, + "loss": 0.2687, + "step": 1442 + }, + { + "epoch": 1.72, + "learning_rate": 8.189107321957437e-06, + "loss": 0.2783, + "step": 1444 + }, + { + "epoch": 1.72, + "learning_rate": 8.164326483982434e-06, + "loss": 0.2499, + "step": 1446 + }, + { + "epoch": 1.72, + "learning_rate": 8.139557306325359e-06, + "loss": 0.2408, + "step": 1448 + }, + { + "epoch": 1.72, + "learning_rate": 8.114799946321647e-06, + "loss": 0.2584, + "step": 1450 + }, + { + "epoch": 1.73, + "learning_rate": 8.090054561231659e-06, + "loss": 0.237, + "step": 1452 + }, + { + "epoch": 1.73, + "learning_rate": 8.065321308239706e-06, + "loss": 0.2553, + "step": 1454 + }, + { + "epoch": 1.73, + "learning_rate": 8.040600344453013e-06, + "loss": 0.2224, + "step": 1456 + }, + { + "epoch": 1.73, + "learning_rate": 8.015891826900764e-06, + "loss": 0.3115, + "step": 1458 + }, + { + "epoch": 1.74, + "learning_rate": 7.99119591253307e-06, + "loss": 0.223, + "step": 1460 + }, + { + "epoch": 1.74, + "learning_rate": 7.966512758219991e-06, + "loss": 0.2604, + "step": 1462 + }, + { + "epoch": 1.74, + "learning_rate": 7.941842520750529e-06, + "loss": 0.262, + "step": 1464 + }, + { + "epoch": 1.74, + "learning_rate": 7.91718535683165e-06, + "loss": 0.2583, + "step": 1466 + }, + { + "epoch": 1.75, + "learning_rate": 7.892541423087258e-06, + "loss": 0.2318, + "step": 1468 + }, + { + "epoch": 1.75, + "learning_rate": 7.867910876057238e-06, + "loss": 0.2489, + "step": 1470 + }, + { + "epoch": 1.75, + "learning_rate": 7.843293872196425e-06, + "loss": 0.2609, + "step": 1472 + }, + { + "epoch": 1.75, + "learning_rate": 7.818690567873637e-06, + "loss": 0.2592, + "step": 1474 + }, + { + "epoch": 1.76, + "learning_rate": 7.794101119370668e-06, + "loss": 0.269, + "step": 1476 + }, + { + "epoch": 1.76, + "learning_rate": 7.769525682881295e-06, + "loss": 0.2532, + "step": 1478 + }, + { + "epoch": 1.76, + "learning_rate": 7.744964414510297e-06, + "loss": 0.2223, + "step": 1480 + }, + { + "epoch": 1.76, + "learning_rate": 7.720417470272455e-06, + "loss": 0.2525, + "step": 1482 + }, + { + "epoch": 1.76, + "learning_rate": 7.695885006091552e-06, + "loss": 0.2701, + "step": 1484 + }, + { + "epoch": 1.77, + "learning_rate": 7.67136717779941e-06, + "loss": 0.3059, + "step": 1486 + }, + { + "epoch": 1.77, + "learning_rate": 7.646864141134874e-06, + "loss": 0.2591, + "step": 1488 + }, + { + "epoch": 1.77, + "learning_rate": 7.622376051742824e-06, + "loss": 0.2618, + "step": 1490 + }, + { + "epoch": 1.77, + "learning_rate": 7.5979030651732065e-06, + "loss": 0.2814, + "step": 1492 + }, + { + "epoch": 1.78, + "learning_rate": 7.573445336880029e-06, + "loss": 0.2389, + "step": 1494 + }, + { + "epoch": 1.78, + "learning_rate": 7.549003022220374e-06, + "loss": 0.2347, + "step": 1496 + }, + { + "epoch": 1.78, + "learning_rate": 7.524576276453422e-06, + "loss": 0.2642, + "step": 1498 + }, + { + "epoch": 1.78, + "learning_rate": 7.500165254739453e-06, + "loss": 0.2591, + "step": 1500 + }, + { + "epoch": 1.79, + "learning_rate": 7.475770112138867e-06, + "loss": 0.2591, + "step": 1502 + }, + { + "epoch": 1.79, + "learning_rate": 7.4513910036112105e-06, + "loss": 0.2878, + "step": 1504 + }, + { + "epoch": 1.79, + "learning_rate": 7.427028084014163e-06, + "loss": 0.305, + "step": 1506 + }, + { + "epoch": 1.79, + "learning_rate": 7.402681508102585e-06, + "loss": 0.263, + "step": 1508 + }, + { + "epoch": 1.8, + "learning_rate": 7.378351430527511e-06, + "loss": 0.2868, + "step": 1510 + }, + { + "epoch": 1.8, + "learning_rate": 7.35403800583518e-06, + "loss": 0.2299, + "step": 1512 + }, + { + "epoch": 1.8, + "learning_rate": 7.329741388466056e-06, + "loss": 0.2942, + "step": 1514 + }, + { + "epoch": 1.8, + "learning_rate": 7.305461732753836e-06, + "loss": 0.2993, + "step": 1516 + }, + { + "epoch": 1.8, + "learning_rate": 7.281199192924473e-06, + "loss": 0.263, + "step": 1518 + }, + { + "epoch": 1.81, + "learning_rate": 7.256953923095209e-06, + "loss": 0.2591, + "step": 1520 + }, + { + "epoch": 1.81, + "learning_rate": 7.232726077273575e-06, + "loss": 0.2612, + "step": 1522 + }, + { + "epoch": 1.81, + "learning_rate": 7.208515809356434e-06, + "loss": 0.2354, + "step": 1524 + }, + { + "epoch": 1.81, + "learning_rate": 7.184323273128981e-06, + "loss": 0.2504, + "step": 1526 + }, + { + "epoch": 1.82, + "learning_rate": 7.160148622263786e-06, + "loss": 0.2437, + "step": 1528 + }, + { + "epoch": 1.82, + "learning_rate": 7.135992010319812e-06, + "loss": 0.2154, + "step": 1530 + }, + { + "epoch": 1.82, + "learning_rate": 7.123920516899151e-06, + "loss": 0.2963, + "step": 1532 + }, + { + "epoch": 1.82, + "learning_rate": 7.0997912510091335e-06, + "loss": 0.2307, + "step": 1534 + }, + { + "epoch": 1.83, + "learning_rate": 7.075680407434289e-06, + "loss": 0.2826, + "step": 1536 + }, + { + "epoch": 1.83, + "learning_rate": 7.051588139328276e-06, + "loss": 0.2344, + "step": 1538 + }, + { + "epoch": 1.83, + "learning_rate": 7.0275145997267544e-06, + "loss": 0.3482, + "step": 1540 + }, + { + "epoch": 1.83, + "learning_rate": 7.0034599415464135e-06, + "loss": 0.2818, + "step": 1542 + }, + { + "epoch": 1.84, + "learning_rate": 6.979424317584014e-06, + "loss": 0.3444, + "step": 1544 + }, + { + "epoch": 1.84, + "learning_rate": 6.955407880515404e-06, + "loss": 0.2712, + "step": 1546 + }, + { + "epoch": 1.84, + "learning_rate": 6.931410782894563e-06, + "loss": 0.2794, + "step": 1548 + }, + { + "epoch": 1.84, + "learning_rate": 6.907433177152618e-06, + "loss": 0.2701, + "step": 1550 + }, + { + "epoch": 1.85, + "learning_rate": 6.883475215596882e-06, + "loss": 0.2943, + "step": 1552 + }, + { + "epoch": 1.85, + "learning_rate": 6.859537050409895e-06, + "loss": 0.2488, + "step": 1554 + }, + { + "epoch": 1.85, + "learning_rate": 6.835618833648443e-06, + "loss": 0.2451, + "step": 1556 + }, + { + "epoch": 1.85, + "learning_rate": 6.8117207172425996e-06, + "loss": 0.3123, + "step": 1558 + }, + { + "epoch": 1.85, + "learning_rate": 6.787842852994757e-06, + "loss": 0.2522, + "step": 1560 + }, + { + "epoch": 1.86, + "learning_rate": 6.763985392578667e-06, + "loss": 0.2766, + "step": 1562 + }, + { + "epoch": 1.86, + "learning_rate": 6.740148487538476e-06, + "loss": 0.2473, + "step": 1564 + }, + { + "epoch": 1.86, + "learning_rate": 6.716332289287759e-06, + "loss": 0.214, + "step": 1566 + }, + { + "epoch": 1.86, + "learning_rate": 6.692536949108562e-06, + "loss": 0.2742, + "step": 1568 + }, + { + "epoch": 1.87, + "learning_rate": 6.6687626181504315e-06, + "loss": 0.2348, + "step": 1570 + }, + { + "epoch": 1.87, + "learning_rate": 6.64500944742948e-06, + "loss": 0.2363, + "step": 1572 + }, + { + "epoch": 1.87, + "learning_rate": 6.6212775878273925e-06, + "loss": 0.2686, + "step": 1574 + }, + { + "epoch": 1.87, + "learning_rate": 6.59756719009049e-06, + "loss": 0.2852, + "step": 1576 + }, + { + "epoch": 1.88, + "learning_rate": 6.5738784048287615e-06, + "loss": 0.2272, + "step": 1578 + }, + { + "epoch": 1.88, + "learning_rate": 6.550211382514922e-06, + "loss": 0.2975, + "step": 1580 + }, + { + "epoch": 1.88, + "learning_rate": 6.526566273483439e-06, + "loss": 0.2563, + "step": 1582 + }, + { + "epoch": 1.88, + "learning_rate": 6.502943227929586e-06, + "loss": 0.2971, + "step": 1584 + }, + { + "epoch": 1.89, + "learning_rate": 6.479342395908487e-06, + "loss": 0.2601, + "step": 1586 + }, + { + "epoch": 1.89, + "learning_rate": 6.455763927334177e-06, + "loss": 0.258, + "step": 1588 + }, + { + "epoch": 1.89, + "learning_rate": 6.432207971978619e-06, + "loss": 0.2953, + "step": 1590 + }, + { + "epoch": 1.89, + "learning_rate": 6.4086746794707795e-06, + "loss": 0.2961, + "step": 1592 + }, + { + "epoch": 1.9, + "learning_rate": 6.385164199295666e-06, + "loss": 0.262, + "step": 1594 + }, + { + "epoch": 1.9, + "learning_rate": 6.3616766807933875e-06, + "loss": 0.2151, + "step": 1596 + }, + { + "epoch": 1.9, + "learning_rate": 6.338212273158188e-06, + "loss": 0.2525, + "step": 1598 + }, + { + "epoch": 1.9, + "learning_rate": 6.314771125437517e-06, + "loss": 0.2393, + "step": 1600 + }, + { + "epoch": 1.9, + "learning_rate": 6.291353386531074e-06, + "loss": 0.2758, + "step": 1602 + }, + { + "epoch": 1.91, + "learning_rate": 6.2679592051898685e-06, + "loss": 0.2312, + "step": 1604 + }, + { + "epoch": 1.91, + "learning_rate": 6.244588730015264e-06, + "loss": 0.2428, + "step": 1606 + }, + { + "epoch": 1.91, + "learning_rate": 6.221242109458043e-06, + "loss": 0.2551, + "step": 1608 + }, + { + "epoch": 1.91, + "learning_rate": 6.197919491817459e-06, + "loss": 0.2715, + "step": 1610 + }, + { + "epoch": 1.92, + "learning_rate": 6.174621025240307e-06, + "loss": 0.2359, + "step": 1612 + }, + { + "epoch": 1.92, + "learning_rate": 6.151346857719964e-06, + "loss": 0.2671, + "step": 1614 + }, + { + "epoch": 1.92, + "learning_rate": 6.128097137095458e-06, + "loss": 0.1967, + "step": 1616 + }, + { + "epoch": 1.92, + "learning_rate": 6.104872011050534e-06, + "loss": 0.2563, + "step": 1618 + }, + { + "epoch": 1.93, + "learning_rate": 6.081671627112704e-06, + "loss": 0.2613, + "step": 1620 + }, + { + "epoch": 1.93, + "learning_rate": 6.0584961326523285e-06, + "loss": 0.2962, + "step": 1622 + }, + { + "epoch": 1.93, + "learning_rate": 6.0353456748816545e-06, + "loss": 0.2087, + "step": 1624 + }, + { + "epoch": 1.93, + "learning_rate": 6.012220400853899e-06, + "loss": 0.2902, + "step": 1626 + }, + { + "epoch": 1.94, + "learning_rate": 5.989120457462314e-06, + "loss": 0.2565, + "step": 1628 + }, + { + "epoch": 1.94, + "learning_rate": 5.9660459914392465e-06, + "loss": 0.243, + "step": 1630 + }, + { + "epoch": 1.94, + "learning_rate": 5.942997149355208e-06, + "loss": 0.2895, + "step": 1632 + }, + { + "epoch": 1.94, + "learning_rate": 5.9199740776179494e-06, + "loss": 0.3008, + "step": 1634 + }, + { + "epoch": 1.95, + "learning_rate": 5.89697692247152e-06, + "loss": 0.2124, + "step": 1636 + }, + { + "epoch": 1.95, + "learning_rate": 5.874005829995358e-06, + "loss": 0.2094, + "step": 1638 + }, + { + "epoch": 1.95, + "learning_rate": 5.851060946103334e-06, + "loss": 0.2355, + "step": 1640 + }, + { + "epoch": 1.95, + "learning_rate": 5.828142416542852e-06, + "loss": 0.304, + "step": 1642 + }, + { + "epoch": 1.95, + "learning_rate": 5.8052503868939005e-06, + "loss": 0.213, + "step": 1644 + }, + { + "epoch": 1.96, + "learning_rate": 5.782385002568153e-06, + "loss": 0.2471, + "step": 1646 + }, + { + "epoch": 1.96, + "learning_rate": 5.759546408808019e-06, + "loss": 0.2432, + "step": 1648 + }, + { + "epoch": 1.96, + "learning_rate": 5.736734750685737e-06, + "loss": 0.2565, + "step": 1650 + }, + { + "epoch": 1.96, + "learning_rate": 5.713950173102441e-06, + "loss": 0.2502, + "step": 1652 + }, + { + "epoch": 1.97, + "learning_rate": 5.691192820787266e-06, + "loss": 0.2105, + "step": 1654 + }, + { + "epoch": 1.97, + "learning_rate": 5.6684628382963905e-06, + "loss": 0.2437, + "step": 1656 + }, + { + "epoch": 1.97, + "learning_rate": 5.645760370012149e-06, + "loss": 0.2149, + "step": 1658 + }, + { + "epoch": 1.97, + "learning_rate": 5.623085560142099e-06, + "loss": 0.29, + "step": 1660 + }, + { + "epoch": 1.98, + "learning_rate": 5.60043855271811e-06, + "loss": 0.2718, + "step": 1662 + }, + { + "epoch": 1.98, + "learning_rate": 5.577819491595457e-06, + "loss": 0.2147, + "step": 1664 + }, + { + "epoch": 1.98, + "learning_rate": 5.555228520451891e-06, + "loss": 0.1925, + "step": 1666 + }, + { + "epoch": 1.98, + "learning_rate": 5.53266578278673e-06, + "loss": 0.2404, + "step": 1668 + }, + { + "epoch": 1.99, + "learning_rate": 5.510131421919955e-06, + "loss": 0.2405, + "step": 1670 + }, + { + "epoch": 1.99, + "learning_rate": 5.487625580991303e-06, + "loss": 0.1999, + "step": 1672 + }, + { + "epoch": 1.99, + "learning_rate": 5.465148402959339e-06, + "loss": 0.2185, + "step": 1674 + }, + { + "epoch": 1.99, + "learning_rate": 5.442700030600565e-06, + "loss": 0.3006, + "step": 1676 + }, + { + "epoch": 2.0, + "learning_rate": 5.420280606508503e-06, + "loss": 0.2406, + "step": 1678 + }, + { + "epoch": 2.0, + "learning_rate": 5.397890273092807e-06, + "loss": 0.2317, + "step": 1680 + }, + { + "epoch": 2.0, + "learning_rate": 5.375529172578329e-06, + "loss": 0.2616, + "step": 1682 + }, + { + "epoch": 2.0, + "learning_rate": 5.353197447004239e-06, + "loss": 0.1408, + "step": 1684 + }, + { + "epoch": 2.0, + "learning_rate": 5.33089523822311e-06, + "loss": 0.1778, + "step": 1686 + }, + { + "epoch": 2.01, + "learning_rate": 5.308622687900038e-06, + "loss": 0.1207, + "step": 1688 + }, + { + "epoch": 2.01, + "learning_rate": 5.286379937511707e-06, + "loss": 0.1327, + "step": 1690 + }, + { + "epoch": 2.01, + "learning_rate": 5.264167128345523e-06, + "loss": 0.125, + "step": 1692 + }, + { + "epoch": 2.01, + "learning_rate": 5.241984401498693e-06, + "loss": 0.17, + "step": 1694 + }, + { + "epoch": 2.02, + "learning_rate": 5.219831897877353e-06, + "loss": 0.159, + "step": 1696 + }, + { + "epoch": 2.02, + "learning_rate": 5.197709758195648e-06, + "loss": 0.1156, + "step": 1698 + }, + { + "epoch": 2.02, + "learning_rate": 5.175618122974851e-06, + "loss": 0.1356, + "step": 1700 + }, + { + "epoch": 2.02, + "learning_rate": 5.153557132542473e-06, + "loss": 0.1645, + "step": 1702 + }, + { + "epoch": 2.03, + "learning_rate": 5.131526927031356e-06, + "loss": 0.1626, + "step": 1704 + }, + { + "epoch": 2.03, + "learning_rate": 5.109527646378815e-06, + "loss": 0.1278, + "step": 1706 + }, + { + "epoch": 2.03, + "learning_rate": 5.087559430325708e-06, + "loss": 0.136, + "step": 1708 + }, + { + "epoch": 2.03, + "learning_rate": 5.0656224184155764e-06, + "loss": 0.1141, + "step": 1710 + }, + { + "epoch": 2.04, + "learning_rate": 5.043716749993757e-06, + "loss": 0.1444, + "step": 1712 + }, + { + "epoch": 2.04, + "learning_rate": 5.02184256420648e-06, + "loss": 0.1151, + "step": 1714 + }, + { + "epoch": 2.04, + "learning_rate": 5.000000000000003e-06, + "loss": 0.1474, + "step": 1716 + }, + { + "epoch": 2.04, + "learning_rate": 4.978189196119716e-06, + "loss": 0.121, + "step": 1718 + }, + { + "epoch": 2.05, + "learning_rate": 4.9564102911092646e-06, + "loss": 0.1284, + "step": 1720 + }, + { + "epoch": 2.05, + "learning_rate": 4.934663423309685e-06, + "loss": 0.153, + "step": 1722 + }, + { + "epoch": 2.05, + "learning_rate": 4.912948730858492e-06, + "loss": 0.145, + "step": 1724 + }, + { + "epoch": 2.05, + "learning_rate": 4.891266351688829e-06, + "loss": 0.1167, + "step": 1726 + }, + { + "epoch": 2.05, + "learning_rate": 4.869616423528588e-06, + "loss": 0.1338, + "step": 1728 + }, + { + "epoch": 2.06, + "learning_rate": 4.847999083899522e-06, + "loss": 0.1208, + "step": 1730 + }, + { + "epoch": 2.06, + "learning_rate": 4.826414470116382e-06, + "loss": 0.1403, + "step": 1732 + }, + { + "epoch": 2.06, + "learning_rate": 4.804862719286044e-06, + "loss": 0.1463, + "step": 1734 + }, + { + "epoch": 2.06, + "learning_rate": 4.783343968306631e-06, + "loss": 0.1276, + "step": 1736 + }, + { + "epoch": 2.07, + "learning_rate": 4.7618583538666605e-06, + "loss": 0.1242, + "step": 1738 + }, + { + "epoch": 2.07, + "learning_rate": 4.740406012444153e-06, + "loss": 0.1402, + "step": 1740 + }, + { + "epoch": 2.07, + "learning_rate": 4.718987080305778e-06, + "loss": 0.3846, + "step": 1742 + }, + { + "epoch": 2.07, + "learning_rate": 4.697601693505996e-06, + "loss": 0.161, + "step": 1744 + }, + { + "epoch": 2.08, + "learning_rate": 4.6762499878861764e-06, + "loss": 0.1102, + "step": 1746 + }, + { + "epoch": 2.08, + "learning_rate": 4.654932099073746e-06, + "loss": 0.1343, + "step": 1748 + }, + { + "epoch": 2.08, + "learning_rate": 4.633648162481326e-06, + "loss": 0.146, + "step": 1750 + }, + { + "epoch": 2.08, + "learning_rate": 4.612398313305867e-06, + "loss": 0.1533, + "step": 1752 + }, + { + "epoch": 2.09, + "learning_rate": 4.5911826865277975e-06, + "loss": 0.1346, + "step": 1754 + }, + { + "epoch": 2.09, + "learning_rate": 4.570001416910168e-06, + "loss": 0.1579, + "step": 1756 + }, + { + "epoch": 2.09, + "learning_rate": 4.548854638997778e-06, + "loss": 0.1382, + "step": 1758 + }, + { + "epoch": 2.09, + "learning_rate": 4.527742487116349e-06, + "loss": 0.133, + "step": 1760 + }, + { + "epoch": 2.1, + "learning_rate": 4.506665095371642e-06, + "loss": 0.121, + "step": 1762 + }, + { + "epoch": 2.1, + "learning_rate": 4.485622597648624e-06, + "loss": 0.1225, + "step": 1764 + }, + { + "epoch": 2.1, + "learning_rate": 4.464615127610615e-06, + "loss": 0.1139, + "step": 1766 + }, + { + "epoch": 2.1, + "learning_rate": 4.443642818698434e-06, + "loss": 0.1329, + "step": 1768 + }, + { + "epoch": 2.1, + "learning_rate": 4.4227058041295515e-06, + "loss": 0.1131, + "step": 1770 + }, + { + "epoch": 2.11, + "learning_rate": 4.401804216897258e-06, + "loss": 0.1301, + "step": 1772 + }, + { + "epoch": 2.11, + "learning_rate": 4.380938189769791e-06, + "loss": 0.1443, + "step": 1774 + }, + { + "epoch": 2.11, + "learning_rate": 4.3601078552895245e-06, + "loss": 0.1306, + "step": 1776 + }, + { + "epoch": 2.11, + "learning_rate": 4.339313345772098e-06, + "loss": 0.1501, + "step": 1778 + }, + { + "epoch": 2.12, + "learning_rate": 4.318554793305592e-06, + "loss": 0.1076, + "step": 1780 + }, + { + "epoch": 2.12, + "learning_rate": 4.297832329749687e-06, + "loss": 0.1671, + "step": 1782 + }, + { + "epoch": 2.12, + "learning_rate": 4.277146086734823e-06, + "loss": 0.1364, + "step": 1784 + }, + { + "epoch": 2.12, + "learning_rate": 4.2564961956613605e-06, + "loss": 0.1347, + "step": 1786 + }, + { + "epoch": 2.13, + "learning_rate": 4.235882787698763e-06, + "loss": 0.1667, + "step": 1788 + }, + { + "epoch": 2.13, + "learning_rate": 4.2153059937847355e-06, + "loss": 0.1366, + "step": 1790 + }, + { + "epoch": 2.13, + "learning_rate": 4.194765944624423e-06, + "loss": 0.1142, + "step": 1792 + }, + { + "epoch": 2.13, + "learning_rate": 4.174262770689552e-06, + "loss": 0.1188, + "step": 1794 + }, + { + "epoch": 2.14, + "learning_rate": 4.153796602217623e-06, + "loss": 0.1068, + "step": 1796 + }, + { + "epoch": 2.14, + "learning_rate": 4.133367569211074e-06, + "loss": 0.1359, + "step": 1798 + }, + { + "epoch": 2.14, + "learning_rate": 4.112975801436454e-06, + "loss": 0.1584, + "step": 1800 + }, + { + "epoch": 2.14, + "learning_rate": 4.092621428423601e-06, + "loss": 0.1308, + "step": 1802 + }, + { + "epoch": 2.15, + "learning_rate": 4.07230457946482e-06, + "loss": 0.1324, + "step": 1804 + }, + { + "epoch": 2.15, + "learning_rate": 4.052025383614061e-06, + "loss": 0.1339, + "step": 1806 + }, + { + "epoch": 2.15, + "learning_rate": 4.031783969686105e-06, + "loss": 0.1178, + "step": 1808 + }, + { + "epoch": 2.15, + "learning_rate": 4.011580466255729e-06, + "loss": 0.127, + "step": 1810 + }, + { + "epoch": 2.15, + "learning_rate": 3.991415001656906e-06, + "loss": 0.1334, + "step": 1812 + }, + { + "epoch": 2.16, + "learning_rate": 3.971287703981982e-06, + "loss": 0.1201, + "step": 1814 + }, + { + "epoch": 2.16, + "learning_rate": 3.9511987010808635e-06, + "loss": 0.1435, + "step": 1816 + }, + { + "epoch": 2.16, + "learning_rate": 3.931148120560211e-06, + "loss": 0.1394, + "step": 1818 + }, + { + "epoch": 2.16, + "learning_rate": 3.911136089782613e-06, + "loss": 0.1229, + "step": 1820 + }, + { + "epoch": 2.17, + "learning_rate": 3.8911627358658e-06, + "loss": 0.123, + "step": 1822 + }, + { + "epoch": 2.17, + "learning_rate": 3.871228185681822e-06, + "loss": 0.1433, + "step": 1824 + }, + { + "epoch": 2.17, + "learning_rate": 3.8513325658562395e-06, + "loss": 0.1238, + "step": 1826 + }, + { + "epoch": 2.17, + "learning_rate": 3.831476002767327e-06, + "loss": 0.1456, + "step": 1828 + }, + { + "epoch": 2.18, + "learning_rate": 3.811658622545268e-06, + "loss": 0.1478, + "step": 1830 + }, + { + "epoch": 2.18, + "learning_rate": 3.7918805510713553e-06, + "loss": 0.1269, + "step": 1832 + }, + { + "epoch": 2.18, + "learning_rate": 3.7721419139771886e-06, + "loss": 0.1379, + "step": 1834 + }, + { + "epoch": 2.18, + "learning_rate": 3.7524428366438757e-06, + "loss": 0.1365, + "step": 1836 + }, + { + "epoch": 2.19, + "learning_rate": 3.7327834442012433e-06, + "loss": 0.1263, + "step": 1838 + }, + { + "epoch": 2.19, + "learning_rate": 3.7131638615270404e-06, + "loss": 0.1055, + "step": 1840 + }, + { + "epoch": 2.19, + "learning_rate": 3.6935842132461307e-06, + "loss": 0.1053, + "step": 1842 + }, + { + "epoch": 2.19, + "learning_rate": 3.6740446237297177e-06, + "loss": 0.1259, + "step": 1844 + }, + { + "epoch": 2.2, + "learning_rate": 3.6545452170945496e-06, + "loss": 0.1428, + "step": 1846 + }, + { + "epoch": 2.2, + "learning_rate": 3.635086117202128e-06, + "loss": 0.1375, + "step": 1848 + }, + { + "epoch": 2.2, + "learning_rate": 3.6156674476579266e-06, + "loss": 0.1469, + "step": 1850 + }, + { + "epoch": 2.2, + "learning_rate": 3.5962893318105963e-06, + "loss": 0.123, + "step": 1852 + }, + { + "epoch": 2.2, + "learning_rate": 3.576951892751197e-06, + "loss": 0.128, + "step": 1854 + }, + { + "epoch": 2.21, + "learning_rate": 3.5576552533124074e-06, + "loss": 0.1403, + "step": 1856 + }, + { + "epoch": 2.21, + "learning_rate": 3.538399536067736e-06, + "loss": 0.1392, + "step": 1858 + }, + { + "epoch": 2.21, + "learning_rate": 3.5191848633307545e-06, + "loss": 0.1259, + "step": 1860 + }, + { + "epoch": 2.21, + "learning_rate": 3.5000113571543183e-06, + "loss": 0.1252, + "step": 1862 + }, + { + "epoch": 2.22, + "learning_rate": 3.480879139329789e-06, + "loss": 0.1641, + "step": 1864 + }, + { + "epoch": 2.22, + "learning_rate": 3.4617883313862633e-06, + "loss": 0.1396, + "step": 1866 + }, + { + "epoch": 2.22, + "learning_rate": 3.4427390545897955e-06, + "loss": 0.1284, + "step": 1868 + }, + { + "epoch": 2.22, + "learning_rate": 3.423731429942636e-06, + "loss": 0.1255, + "step": 1870 + }, + { + "epoch": 2.23, + "learning_rate": 3.4047655781824605e-06, + "loss": 0.1381, + "step": 1872 + }, + { + "epoch": 2.23, + "learning_rate": 3.3858416197815947e-06, + "loss": 0.1587, + "step": 1874 + }, + { + "epoch": 2.23, + "learning_rate": 3.3669596749462562e-06, + "loss": 0.1148, + "step": 1876 + }, + { + "epoch": 2.23, + "learning_rate": 3.3481198636157908e-06, + "loss": 0.1187, + "step": 1878 + }, + { + "epoch": 2.24, + "learning_rate": 3.3293223054619073e-06, + "loss": 0.1328, + "step": 1880 + }, + { + "epoch": 2.24, + "learning_rate": 3.3105671198879243e-06, + "loss": 0.1166, + "step": 1882 + }, + { + "epoch": 2.24, + "learning_rate": 3.2918544260279985e-06, + "loss": 0.133, + "step": 1884 + }, + { + "epoch": 2.24, + "learning_rate": 3.2731843427463894e-06, + "loss": 0.127, + "step": 1886 + }, + { + "epoch": 2.24, + "learning_rate": 3.254556988636678e-06, + "loss": 0.1678, + "step": 1888 + }, + { + "epoch": 2.25, + "learning_rate": 3.2359724820210394e-06, + "loss": 0.1156, + "step": 1890 + }, + { + "epoch": 2.25, + "learning_rate": 3.2174309409494675e-06, + "loss": 0.1384, + "step": 1892 + }, + { + "epoch": 2.25, + "learning_rate": 3.198932483199041e-06, + "loss": 0.1324, + "step": 1894 + }, + { + "epoch": 2.25, + "learning_rate": 3.180477226273172e-06, + "loss": 0.1498, + "step": 1896 + }, + { + "epoch": 2.26, + "learning_rate": 3.162065287400855e-06, + "loss": 0.1482, + "step": 1898 + }, + { + "epoch": 2.26, + "learning_rate": 3.1436967835359245e-06, + "loss": 0.1001, + "step": 1900 + }, + { + "epoch": 2.26, + "learning_rate": 3.1253718313563207e-06, + "loss": 0.1328, + "step": 1902 + }, + { + "epoch": 2.26, + "learning_rate": 3.1070905472633307e-06, + "loss": 0.1343, + "step": 1904 + }, + { + "epoch": 2.27, + "learning_rate": 3.0888530473808677e-06, + "loss": 0.1721, + "step": 1906 + }, + { + "epoch": 2.27, + "learning_rate": 3.070659447554719e-06, + "loss": 0.1211, + "step": 1908 + }, + { + "epoch": 2.27, + "learning_rate": 3.052509863351818e-06, + "loss": 0.1267, + "step": 1910 + }, + { + "epoch": 2.27, + "learning_rate": 3.0344044100595073e-06, + "loss": 0.1257, + "step": 1912 + }, + { + "epoch": 2.28, + "learning_rate": 3.016343202684807e-06, + "loss": 0.1769, + "step": 1914 + }, + { + "epoch": 2.28, + "learning_rate": 2.9983263559536813e-06, + "loss": 0.1398, + "step": 1916 + }, + { + "epoch": 2.28, + "learning_rate": 2.9803539843103226e-06, + "loss": 0.0818, + "step": 1918 + }, + { + "epoch": 2.28, + "learning_rate": 2.962426201916402e-06, + "loss": 0.1552, + "step": 1920 + }, + { + "epoch": 2.29, + "learning_rate": 2.9445431226503683e-06, + "loss": 0.1296, + "step": 1922 + }, + { + "epoch": 2.29, + "learning_rate": 2.926704860106706e-06, + "loss": 0.1082, + "step": 1924 + }, + { + "epoch": 2.29, + "learning_rate": 2.9089115275952217e-06, + "loss": 0.143, + "step": 1926 + }, + { + "epoch": 2.29, + "learning_rate": 2.891163238140323e-06, + "loss": 0.1019, + "step": 1928 + }, + { + "epoch": 2.29, + "learning_rate": 2.8734601044803056e-06, + "loss": 0.1256, + "step": 1930 + }, + { + "epoch": 2.3, + "learning_rate": 2.855802239066623e-06, + "loss": 0.1159, + "step": 1932 + }, + { + "epoch": 2.3, + "learning_rate": 2.8381897540631964e-06, + "loss": 0.1187, + "step": 1934 + }, + { + "epoch": 2.3, + "learning_rate": 2.820622761345676e-06, + "loss": 0.1375, + "step": 1936 + }, + { + "epoch": 2.3, + "learning_rate": 2.8031013725007415e-06, + "loss": 0.1305, + "step": 1938 + }, + { + "epoch": 2.31, + "learning_rate": 2.785625698825406e-06, + "loss": 0.1443, + "step": 1940 + }, + { + "epoch": 2.31, + "learning_rate": 2.768195851326285e-06, + "loss": 0.1351, + "step": 1942 + }, + { + "epoch": 2.31, + "learning_rate": 2.750811940718906e-06, + "loss": 0.1378, + "step": 1944 + }, + { + "epoch": 2.31, + "learning_rate": 2.733474077427004e-06, + "loss": 0.0981, + "step": 1946 + }, + { + "epoch": 2.32, + "learning_rate": 2.716182371581814e-06, + "loss": 0.146, + "step": 1948 + }, + { + "epoch": 2.32, + "learning_rate": 2.6989369330213865e-06, + "loss": 0.1286, + "step": 1950 + }, + { + "epoch": 2.32, + "learning_rate": 2.681737871289869e-06, + "loss": 0.1551, + "step": 1952 + }, + { + "epoch": 2.32, + "learning_rate": 2.6645852956368214e-06, + "loss": 0.1166, + "step": 1954 + }, + { + "epoch": 2.33, + "learning_rate": 2.647479315016528e-06, + "loss": 0.1181, + "step": 1956 + }, + { + "epoch": 2.33, + "learning_rate": 2.6304200380872913e-06, + "loss": 0.1341, + "step": 1958 + }, + { + "epoch": 2.33, + "learning_rate": 2.61340757321075e-06, + "loss": 0.1196, + "step": 1960 + }, + { + "epoch": 2.33, + "learning_rate": 2.596442028451194e-06, + "loss": 0.1364, + "step": 1962 + }, + { + "epoch": 2.34, + "learning_rate": 2.579523511574864e-06, + "loss": 0.1209, + "step": 1964 + }, + { + "epoch": 2.34, + "learning_rate": 2.56265213004929e-06, + "loss": 0.1174, + "step": 1966 + }, + { + "epoch": 2.34, + "learning_rate": 2.5458279910425865e-06, + "loss": 0.1383, + "step": 1968 + }, + { + "epoch": 2.34, + "learning_rate": 2.5290512014227774e-06, + "loss": 0.1044, + "step": 1970 + }, + { + "epoch": 2.34, + "learning_rate": 2.5123218677571313e-06, + "loss": 0.1163, + "step": 1972 + }, + { + "epoch": 2.35, + "learning_rate": 2.4956400963114647e-06, + "loss": 0.137, + "step": 1974 + }, + { + "epoch": 2.35, + "learning_rate": 2.479005993049478e-06, + "loss": 0.1591, + "step": 1976 + }, + { + "epoch": 2.35, + "learning_rate": 2.4624196636320795e-06, + "loss": 0.137, + "step": 1978 + }, + { + "epoch": 2.35, + "learning_rate": 2.445881213416713e-06, + "loss": 0.1583, + "step": 1980 + }, + { + "epoch": 2.36, + "learning_rate": 2.429390747456699e-06, + "loss": 0.1252, + "step": 1982 + }, + { + "epoch": 2.36, + "learning_rate": 2.412948370500551e-06, + "loss": 0.1552, + "step": 1984 + }, + { + "epoch": 2.36, + "learning_rate": 2.3965541869913188e-06, + "loss": 0.1481, + "step": 1986 + }, + { + "epoch": 2.36, + "learning_rate": 2.3802083010659238e-06, + "loss": 0.1243, + "step": 1988 + }, + { + "epoch": 2.37, + "learning_rate": 2.3639108165545057e-06, + "loss": 0.1273, + "step": 1990 + }, + { + "epoch": 2.37, + "learning_rate": 2.3476618369797457e-06, + "loss": 0.1403, + "step": 1992 + }, + { + "epoch": 2.37, + "learning_rate": 2.331461465556222e-06, + "loss": 0.1391, + "step": 1994 + }, + { + "epoch": 2.37, + "learning_rate": 2.315309805189748e-06, + "loss": 0.1376, + "step": 1996 + }, + { + "epoch": 2.38, + "learning_rate": 2.299206958476731e-06, + "loss": 0.1253, + "step": 1998 + }, + { + "epoch": 2.38, + "learning_rate": 2.2831530277034985e-06, + "loss": 0.131, + "step": 2000 + }, + { + "epoch": 2.38, + "learning_rate": 2.2671481148456685e-06, + "loss": 0.1377, + "step": 2002 + }, + { + "epoch": 2.38, + "learning_rate": 2.251192321567488e-06, + "loss": 0.1077, + "step": 2004 + }, + { + "epoch": 2.39, + "learning_rate": 2.235285749221201e-06, + "loss": 0.1253, + "step": 2006 + }, + { + "epoch": 2.39, + "learning_rate": 2.219428498846393e-06, + "loss": 0.1271, + "step": 2008 + }, + { + "epoch": 2.39, + "learning_rate": 2.2036206711693508e-06, + "loss": 0.1449, + "step": 2010 + }, + { + "epoch": 2.39, + "learning_rate": 2.1878623666024233e-06, + "loss": 0.1024, + "step": 2012 + }, + { + "epoch": 2.39, + "learning_rate": 2.1721536852433976e-06, + "loss": 0.1141, + "step": 2014 + }, + { + "epoch": 2.4, + "learning_rate": 2.1564947268748382e-06, + "loss": 0.1023, + "step": 2016 + }, + { + "epoch": 2.4, + "learning_rate": 2.1408855909634696e-06, + "loss": 0.1113, + "step": 2018 + }, + { + "epoch": 2.4, + "learning_rate": 2.125326376659539e-06, + "loss": 0.1467, + "step": 2020 + }, + { + "epoch": 2.4, + "learning_rate": 2.1098171827961965e-06, + "loss": 0.1194, + "step": 2022 + }, + { + "epoch": 2.41, + "learning_rate": 2.094358107888852e-06, + "loss": 0.1676, + "step": 2024 + }, + { + "epoch": 2.41, + "learning_rate": 2.0789492501345553e-06, + "loss": 0.1367, + "step": 2026 + }, + { + "epoch": 2.41, + "learning_rate": 2.0635907074113737e-06, + "loss": 0.1231, + "step": 2028 + }, + { + "epoch": 2.41, + "learning_rate": 2.0482825772777804e-06, + "loss": 0.1626, + "step": 2030 + }, + { + "epoch": 2.42, + "learning_rate": 2.0330249569720116e-06, + "loss": 0.1117, + "step": 2032 + }, + { + "epoch": 2.42, + "learning_rate": 2.0178179434114674e-06, + "loss": 0.1303, + "step": 2034 + }, + { + "epoch": 2.42, + "learning_rate": 2.00266163319209e-06, + "loss": 0.1336, + "step": 2036 + }, + { + "epoch": 2.42, + "learning_rate": 1.9875561225877482e-06, + "loss": 0.1143, + "step": 2038 + }, + { + "epoch": 2.43, + "learning_rate": 1.972501507549637e-06, + "loss": 0.1449, + "step": 2040 + }, + { + "epoch": 2.43, + "learning_rate": 1.957497883705649e-06, + "loss": 0.1331, + "step": 2042 + }, + { + "epoch": 2.43, + "learning_rate": 1.9425453463597798e-06, + "loss": 0.0957, + "step": 2044 + }, + { + "epoch": 2.43, + "learning_rate": 1.927643990491528e-06, + "loss": 0.1182, + "step": 2046 + }, + { + "epoch": 2.44, + "learning_rate": 1.912793910755275e-06, + "loss": 0.1394, + "step": 2048 + }, + { + "epoch": 2.44, + "learning_rate": 1.8979952014796954e-06, + "loss": 0.1155, + "step": 2050 + }, + { + "epoch": 2.44, + "learning_rate": 1.883247956667157e-06, + "loss": 0.1681, + "step": 2052 + }, + { + "epoch": 2.44, + "learning_rate": 1.8685522699931169e-06, + "loss": 0.1517, + "step": 2054 + }, + { + "epoch": 2.44, + "learning_rate": 1.8539082348055427e-06, + "loss": 0.1491, + "step": 2056 + }, + { + "epoch": 2.45, + "learning_rate": 1.839315944124298e-06, + "loss": 0.1276, + "step": 2058 + }, + { + "epoch": 2.45, + "learning_rate": 1.8247754906405624e-06, + "loss": 0.1343, + "step": 2060 + }, + { + "epoch": 2.45, + "learning_rate": 1.8102869667162494e-06, + "loss": 0.1477, + "step": 2062 + }, + { + "epoch": 2.45, + "learning_rate": 1.7958504643834062e-06, + "loss": 0.1584, + "step": 2064 + }, + { + "epoch": 2.46, + "learning_rate": 1.7814660753436386e-06, + "loss": 0.1316, + "step": 2066 + }, + { + "epoch": 2.46, + "learning_rate": 1.7671338909675218e-06, + "loss": 0.1373, + "step": 2068 + }, + { + "epoch": 2.46, + "learning_rate": 1.7528540022940288e-06, + "loss": 0.131, + "step": 2070 + }, + { + "epoch": 2.46, + "learning_rate": 1.7386265000299385e-06, + "loss": 0.1206, + "step": 2072 + }, + { + "epoch": 2.47, + "learning_rate": 1.7244514745492813e-06, + "loss": 0.117, + "step": 2074 + }, + { + "epoch": 2.47, + "learning_rate": 1.71032901589274e-06, + "loss": 0.1368, + "step": 2076 + }, + { + "epoch": 2.47, + "learning_rate": 1.6962592137670897e-06, + "loss": 0.1176, + "step": 2078 + }, + { + "epoch": 2.47, + "learning_rate": 1.6822421575446378e-06, + "loss": 0.1501, + "step": 2080 + }, + { + "epoch": 2.48, + "learning_rate": 1.6682779362626378e-06, + "loss": 0.1326, + "step": 2082 + }, + { + "epoch": 2.48, + "learning_rate": 1.6543666386227343e-06, + "loss": 0.1357, + "step": 2084 + }, + { + "epoch": 2.48, + "learning_rate": 1.6405083529903954e-06, + "loss": 0.1039, + "step": 2086 + }, + { + "epoch": 2.48, + "learning_rate": 1.6267031673943546e-06, + "loss": 0.1407, + "step": 2088 + }, + { + "epoch": 2.49, + "learning_rate": 1.6129511695260558e-06, + "loss": 0.1312, + "step": 2090 + }, + { + "epoch": 2.49, + "learning_rate": 1.5992524467390858e-06, + "loss": 0.1198, + "step": 2092 + }, + { + "epoch": 2.49, + "learning_rate": 1.5856070860486205e-06, + "loss": 0.1091, + "step": 2094 + }, + { + "epoch": 2.49, + "learning_rate": 1.5720151741308875e-06, + "loss": 0.119, + "step": 2096 + }, + { + "epoch": 2.49, + "learning_rate": 1.5584767973225967e-06, + "loss": 0.1316, + "step": 2098 + }, + { + "epoch": 2.5, + "learning_rate": 1.544992041620398e-06, + "loss": 0.1108, + "step": 2100 + }, + { + "epoch": 2.5, + "learning_rate": 1.531560992680341e-06, + "loss": 0.1267, + "step": 2102 + }, + { + "epoch": 2.5, + "learning_rate": 1.5181837358173223e-06, + "loss": 0.1292, + "step": 2104 + }, + { + "epoch": 2.5, + "learning_rate": 1.5048603560045549e-06, + "loss": 0.124, + "step": 2106 + }, + { + "epoch": 2.51, + "learning_rate": 1.4915909378730143e-06, + "loss": 0.1466, + "step": 2108 + }, + { + "epoch": 2.51, + "learning_rate": 1.4783755657109079e-06, + "loss": 0.103, + "step": 2110 + }, + { + "epoch": 2.51, + "learning_rate": 1.4652143234631465e-06, + "loss": 0.1478, + "step": 2112 + }, + { + "epoch": 2.51, + "learning_rate": 1.4521072947307957e-06, + "loss": 0.1196, + "step": 2114 + }, + { + "epoch": 2.52, + "learning_rate": 1.4390545627705588e-06, + "loss": 0.1203, + "step": 2116 + }, + { + "epoch": 2.52, + "learning_rate": 1.426056210494241e-06, + "loss": 0.125, + "step": 2118 + }, + { + "epoch": 2.52, + "learning_rate": 1.413112320468223e-06, + "loss": 0.1612, + "step": 2120 + }, + { + "epoch": 2.52, + "learning_rate": 1.400222974912936e-06, + "loss": 0.1226, + "step": 2122 + }, + { + "epoch": 2.53, + "learning_rate": 1.3873882557023488e-06, + "loss": 0.1304, + "step": 2124 + }, + { + "epoch": 2.53, + "learning_rate": 1.3746082443634311e-06, + "loss": 0.1172, + "step": 2126 + }, + { + "epoch": 2.53, + "learning_rate": 1.361883022075653e-06, + "loss": 0.1441, + "step": 2128 + }, + { + "epoch": 2.53, + "learning_rate": 1.3492126696704544e-06, + "loss": 0.1232, + "step": 2130 + }, + { + "epoch": 2.54, + "learning_rate": 1.3365972676307403e-06, + "loss": 0.1127, + "step": 2132 + }, + { + "epoch": 2.54, + "learning_rate": 1.3240368960903671e-06, + "loss": 0.1298, + "step": 2134 + }, + { + "epoch": 2.54, + "learning_rate": 1.3115316348336348e-06, + "loss": 0.1358, + "step": 2136 + }, + { + "epoch": 2.54, + "learning_rate": 1.2990815632947763e-06, + "loss": 0.1689, + "step": 2138 + }, + { + "epoch": 2.54, + "learning_rate": 1.2866867605574628e-06, + "loss": 0.1101, + "step": 2140 + }, + { + "epoch": 2.55, + "learning_rate": 1.2743473053542842e-06, + "loss": 0.1308, + "step": 2142 + }, + { + "epoch": 2.55, + "learning_rate": 1.262063276066272e-06, + "loss": 0.1472, + "step": 2144 + }, + { + "epoch": 2.55, + "learning_rate": 1.2498347507223763e-06, + "loss": 0.1298, + "step": 2146 + }, + { + "epoch": 2.55, + "learning_rate": 1.237661806998991e-06, + "loss": 0.1323, + "step": 2148 + }, + { + "epoch": 2.56, + "learning_rate": 1.2255445222194462e-06, + "loss": 0.0947, + "step": 2150 + }, + { + "epoch": 2.56, + "learning_rate": 1.2134829733535269e-06, + "loss": 0.1199, + "step": 2152 + }, + { + "epoch": 2.56, + "learning_rate": 1.2014772370169747e-06, + "loss": 0.1284, + "step": 2154 + }, + { + "epoch": 2.56, + "learning_rate": 1.1895273894710157e-06, + "loss": 0.1323, + "step": 2156 + }, + { + "epoch": 2.57, + "learning_rate": 1.177633506621857e-06, + "loss": 0.1188, + "step": 2158 + }, + { + "epoch": 2.57, + "learning_rate": 1.1657956640202217e-06, + "loss": 0.1448, + "step": 2160 + }, + { + "epoch": 2.57, + "learning_rate": 1.1540139368608572e-06, + "loss": 0.1819, + "step": 2162 + }, + { + "epoch": 2.57, + "learning_rate": 1.142288399982061e-06, + "loss": 0.2198, + "step": 2164 + }, + { + "epoch": 2.58, + "learning_rate": 1.1306191278652112e-06, + "loss": 0.1341, + "step": 2166 + }, + { + "epoch": 2.58, + "learning_rate": 1.1190061946342835e-06, + "loss": 0.2365, + "step": 2168 + }, + { + "epoch": 2.58, + "learning_rate": 1.1074496740553853e-06, + "loss": 0.1264, + "step": 2170 + }, + { + "epoch": 2.58, + "learning_rate": 1.0959496395362946e-06, + "loss": 0.1328, + "step": 2172 + }, + { + "epoch": 2.59, + "learning_rate": 1.0845061641259757e-06, + "loss": 0.1076, + "step": 2174 + }, + { + "epoch": 2.59, + "learning_rate": 1.0731193205141354e-06, + "loss": 0.1372, + "step": 2176 + }, + { + "epoch": 2.59, + "learning_rate": 1.0617891810307458e-06, + "loss": 0.1536, + "step": 2178 + }, + { + "epoch": 2.59, + "learning_rate": 1.050515817645591e-06, + "loss": 0.1243, + "step": 2180 + }, + { + "epoch": 2.59, + "learning_rate": 1.039299301967811e-06, + "loss": 0.2138, + "step": 2182 + }, + { + "epoch": 2.6, + "learning_rate": 1.0281397052454457e-06, + "loss": 0.1357, + "step": 2184 + }, + { + "epoch": 2.6, + "learning_rate": 1.0170370983649792e-06, + "loss": 0.1623, + "step": 2186 + }, + { + "epoch": 2.6, + "learning_rate": 1.005991551850899e-06, + "loss": 0.1314, + "step": 2188 + }, + { + "epoch": 2.6, + "learning_rate": 9.950031358652313e-07, + "loss": 0.1163, + "step": 2190 + }, + { + "epoch": 2.61, + "learning_rate": 9.84071920207118e-07, + "loss": 0.1045, + "step": 2192 + }, + { + "epoch": 2.61, + "learning_rate": 9.73197974312351e-07, + "loss": 0.1449, + "step": 2194 + }, + { + "epoch": 2.61, + "learning_rate": 9.623813672529437e-07, + "loss": 0.1287, + "step": 2196 + }, + { + "epoch": 2.61, + "learning_rate": 9.516221677366888e-07, + "loss": 0.1193, + "step": 2198 + }, + { + "epoch": 2.62, + "learning_rate": 9.409204441067254e-07, + "loss": 0.1306, + "step": 2200 + }, + { + "epoch": 2.62, + "learning_rate": 9.302762643411e-07, + "loss": 0.1151, + "step": 2202 + }, + { + "epoch": 2.62, + "learning_rate": 9.196896960523349e-07, + "loss": 0.1287, + "step": 2204 + }, + { + "epoch": 2.62, + "learning_rate": 9.091608064870028e-07, + "loss": 0.099, + "step": 2206 + }, + { + "epoch": 2.63, + "learning_rate": 8.986896625253006e-07, + "loss": 0.1151, + "step": 2208 + }, + { + "epoch": 2.63, + "learning_rate": 8.882763306806163e-07, + "loss": 0.1466, + "step": 2210 + }, + { + "epoch": 2.63, + "learning_rate": 8.779208770991121e-07, + "loss": 0.1133, + "step": 2212 + }, + { + "epoch": 2.63, + "learning_rate": 8.676233675593038e-07, + "loss": 0.157, + "step": 2214 + }, + { + "epoch": 2.63, + "learning_rate": 8.573838674716461e-07, + "loss": 0.1184, + "step": 2216 + }, + { + "epoch": 2.64, + "learning_rate": 8.472024418781099e-07, + "loss": 0.125, + "step": 2218 + }, + { + "epoch": 2.64, + "learning_rate": 8.370791554517743e-07, + "loss": 0.146, + "step": 2220 + }, + { + "epoch": 2.64, + "learning_rate": 8.270140724964159e-07, + "loss": 0.0981, + "step": 2222 + }, + { + "epoch": 2.64, + "learning_rate": 8.170072569460996e-07, + "loss": 0.1288, + "step": 2224 + }, + { + "epoch": 2.65, + "learning_rate": 8.070587723647705e-07, + "loss": 0.1714, + "step": 2226 + }, + { + "epoch": 2.65, + "learning_rate": 7.971686819458502e-07, + "loss": 0.1147, + "step": 2228 + }, + { + "epoch": 2.65, + "learning_rate": 7.873370485118381e-07, + "loss": 0.1307, + "step": 2230 + }, + { + "epoch": 2.65, + "learning_rate": 7.77563934513913e-07, + "loss": 0.1278, + "step": 2232 + }, + { + "epoch": 2.66, + "learning_rate": 7.678494020315308e-07, + "loss": 0.118, + "step": 2234 + }, + { + "epoch": 2.66, + "learning_rate": 7.581935127720352e-07, + "loss": 0.1289, + "step": 2236 + }, + { + "epoch": 2.66, + "learning_rate": 7.485963280702646e-07, + "loss": 0.1139, + "step": 2238 + }, + { + "epoch": 2.66, + "learning_rate": 7.390579088881655e-07, + "loss": 0.1164, + "step": 2240 + }, + { + "epoch": 2.67, + "learning_rate": 7.295783158143976e-07, + "loss": 0.0974, + "step": 2242 + }, + { + "epoch": 2.67, + "learning_rate": 7.201576090639529e-07, + "loss": 0.1444, + "step": 2244 + }, + { + "epoch": 2.67, + "learning_rate": 7.107958484777755e-07, + "loss": 0.1599, + "step": 2246 + }, + { + "epoch": 2.67, + "learning_rate": 7.014930935223807e-07, + "loss": 0.1482, + "step": 2248 + }, + { + "epoch": 2.68, + "learning_rate": 6.922494032894744e-07, + "loss": 0.1095, + "step": 2250 + }, + { + "epoch": 2.68, + "learning_rate": 6.830648364955772e-07, + "loss": 0.1398, + "step": 2252 + }, + { + "epoch": 2.68, + "learning_rate": 6.739394514816622e-07, + "loss": 0.1333, + "step": 2254 + }, + { + "epoch": 2.68, + "learning_rate": 6.648733062127643e-07, + "loss": 0.1209, + "step": 2256 + }, + { + "epoch": 2.68, + "learning_rate": 6.558664582776341e-07, + "loss": 0.1019, + "step": 2258 + }, + { + "epoch": 2.69, + "learning_rate": 6.469189648883567e-07, + "loss": 0.1099, + "step": 2260 + }, + { + "epoch": 2.69, + "learning_rate": 6.380308828799919e-07, + "loss": 0.1176, + "step": 2262 + }, + { + "epoch": 2.69, + "learning_rate": 6.292022687102184e-07, + "loss": 0.1138, + "step": 2264 + }, + { + "epoch": 2.69, + "learning_rate": 6.204331784589679e-07, + "loss": 0.1311, + "step": 2266 + }, + { + "epoch": 2.7, + "learning_rate": 6.117236678280736e-07, + "loss": 0.1296, + "step": 2268 + }, + { + "epoch": 2.7, + "learning_rate": 6.030737921409169e-07, + "loss": 0.1377, + "step": 2270 + }, + { + "epoch": 2.7, + "learning_rate": 5.9448360634207e-07, + "loss": 0.1579, + "step": 2272 + }, + { + "epoch": 2.7, + "learning_rate": 5.859531649969563e-07, + "loss": 0.1133, + "step": 2274 + }, + { + "epoch": 2.71, + "learning_rate": 5.774825222914948e-07, + "loss": 0.1324, + "step": 2276 + }, + { + "epoch": 2.71, + "learning_rate": 5.690717320317595e-07, + "loss": 0.1227, + "step": 2278 + }, + { + "epoch": 2.71, + "learning_rate": 5.60720847643641e-07, + "loss": 0.116, + "step": 2280 + }, + { + "epoch": 2.71, + "learning_rate": 5.524299221724993e-07, + "loss": 0.1574, + "step": 2282 + }, + { + "epoch": 2.72, + "learning_rate": 5.44199008282833e-07, + "loss": 0.1149, + "step": 2284 + }, + { + "epoch": 2.72, + "learning_rate": 5.360281582579474e-07, + "loss": 0.0964, + "step": 2286 + }, + { + "epoch": 2.72, + "learning_rate": 5.279174239996132e-07, + "loss": 0.1096, + "step": 2288 + }, + { + "epoch": 2.72, + "learning_rate": 5.198668570277443e-07, + "loss": 0.1395, + "step": 2290 + }, + { + "epoch": 2.73, + "learning_rate": 5.11876508480067e-07, + "loss": 0.1232, + "step": 2292 + }, + { + "epoch": 2.73, + "learning_rate": 5.039464291117968e-07, + "loss": 0.1222, + "step": 2294 + }, + { + "epoch": 2.73, + "learning_rate": 4.960766692953145e-07, + "loss": 0.16, + "step": 2296 + }, + { + "epoch": 2.73, + "learning_rate": 4.882672790198473e-07, + "loss": 0.1558, + "step": 2298 + }, + { + "epoch": 2.73, + "learning_rate": 4.805183078911524e-07, + "loss": 0.1193, + "step": 2300 + }, + { + "epoch": 2.74, + "learning_rate": 4.728298051312008e-07, + "loss": 0.1342, + "step": 2302 + }, + { + "epoch": 2.74, + "learning_rate": 4.652018195778629e-07, + "loss": 0.1598, + "step": 2304 + }, + { + "epoch": 2.74, + "learning_rate": 4.576343996845989e-07, + "loss": 0.1324, + "step": 2306 + }, + { + "epoch": 2.74, + "learning_rate": 4.5012759352015766e-07, + "loss": 0.0991, + "step": 2308 + }, + { + "epoch": 2.75, + "learning_rate": 4.4268144876825846e-07, + "loss": 0.1399, + "step": 2310 + }, + { + "epoch": 2.75, + "learning_rate": 4.352960127272987e-07, + "loss": 0.1098, + "step": 2312 + }, + { + "epoch": 2.75, + "learning_rate": 4.2797133231005207e-07, + "loss": 0.1343, + "step": 2314 + }, + { + "epoch": 2.75, + "learning_rate": 4.207074540433631e-07, + "loss": 0.1038, + "step": 2316 + }, + { + "epoch": 2.76, + "learning_rate": 4.1350442406786317e-07, + "loss": 0.1445, + "step": 2318 + }, + { + "epoch": 2.76, + "learning_rate": 4.063622881376683e-07, + "loss": 0.1484, + "step": 2320 + }, + { + "epoch": 2.76, + "learning_rate": 3.9928109162008953e-07, + "loss": 0.1116, + "step": 2322 + }, + { + "epoch": 2.76, + "learning_rate": 3.922608794953531e-07, + "loss": 0.1271, + "step": 2324 + }, + { + "epoch": 2.77, + "learning_rate": 3.8530169635630055e-07, + "loss": 0.1471, + "step": 2326 + }, + { + "epoch": 2.77, + "learning_rate": 3.7840358640812036e-07, + "loss": 0.1074, + "step": 2328 + }, + { + "epoch": 2.77, + "learning_rate": 3.715665934680546e-07, + "loss": 0.1571, + "step": 2330 + }, + { + "epoch": 2.77, + "learning_rate": 3.64790760965128e-07, + "loss": 0.0928, + "step": 2332 + }, + { + "epoch": 2.78, + "learning_rate": 3.580761319398729e-07, + "loss": 0.1362, + "step": 2334 + }, + { + "epoch": 2.78, + "learning_rate": 3.514227490440503e-07, + "loss": 0.1168, + "step": 2336 + }, + { + "epoch": 2.78, + "learning_rate": 3.4483065454038123e-07, + "loss": 0.1497, + "step": 2338 + }, + { + "epoch": 2.78, + "learning_rate": 3.3829989030228163e-07, + "loss": 0.1244, + "step": 2340 + }, + { + "epoch": 2.78, + "learning_rate": 3.3183049781359e-07, + "loss": 0.1058, + "step": 2342 + }, + { + "epoch": 2.79, + "learning_rate": 3.2542251816831237e-07, + "loss": 0.1158, + "step": 2344 + }, + { + "epoch": 2.79, + "learning_rate": 3.190759920703512e-07, + "loss": 0.1208, + "step": 2346 + }, + { + "epoch": 2.79, + "learning_rate": 3.127909598332535e-07, + "loss": 0.1214, + "step": 2348 + }, + { + "epoch": 2.79, + "learning_rate": 3.065674613799574e-07, + "loss": 0.1258, + "step": 2350 + }, + { + "epoch": 2.8, + "learning_rate": 3.0040553624252844e-07, + "loss": 0.136, + "step": 2352 + }, + { + "epoch": 2.8, + "learning_rate": 2.9430522356191814e-07, + "loss": 0.1553, + "step": 2354 + }, + { + "epoch": 2.8, + "learning_rate": 2.88266562087709e-07, + "loss": 0.164, + "step": 2356 + }, + { + "epoch": 2.8, + "learning_rate": 2.822895901778744e-07, + "loss": 0.1372, + "step": 2358 + }, + { + "epoch": 2.81, + "learning_rate": 2.7637434579853016e-07, + "loss": 0.1196, + "step": 2360 + }, + { + "epoch": 2.81, + "learning_rate": 2.7052086652369356e-07, + "loss": 0.1351, + "step": 2362 + }, + { + "epoch": 2.81, + "learning_rate": 2.6472918953504566e-07, + "loss": 0.1407, + "step": 2364 + }, + { + "epoch": 2.81, + "learning_rate": 2.589993516216993e-07, + "loss": 0.1181, + "step": 2366 + }, + { + "epoch": 2.82, + "learning_rate": 2.5333138917995714e-07, + "loss": 0.1484, + "step": 2368 + }, + { + "epoch": 2.82, + "learning_rate": 2.477253382130862e-07, + "loss": 0.125, + "step": 2370 + }, + { + "epoch": 2.82, + "learning_rate": 2.4218123433108696e-07, + "loss": 0.1309, + "step": 2372 + }, + { + "epoch": 2.82, + "learning_rate": 2.366991127504714e-07, + "loss": 0.1325, + "step": 2374 + }, + { + "epoch": 2.83, + "learning_rate": 2.3127900829403305e-07, + "loss": 0.1297, + "step": 2376 + }, + { + "epoch": 2.83, + "learning_rate": 2.259209553906272e-07, + "loss": 0.1242, + "step": 2378 + }, + { + "epoch": 2.83, + "learning_rate": 2.2062498807495669e-07, + "loss": 0.155, + "step": 2380 + }, + { + "epoch": 2.83, + "learning_rate": 2.1539113998735094e-07, + "loss": 0.1266, + "step": 2382 + }, + { + "epoch": 2.83, + "learning_rate": 2.10219444373555e-07, + "loss": 0.1459, + "step": 2384 + }, + { + "epoch": 2.84, + "learning_rate": 2.051099340845164e-07, + "loss": 0.1544, + "step": 2386 + }, + { + "epoch": 2.84, + "learning_rate": 2.000626415761786e-07, + "loss": 0.1347, + "step": 2388 + }, + { + "epoch": 2.84, + "learning_rate": 1.9507759890927125e-07, + "loss": 0.146, + "step": 2390 + }, + { + "epoch": 2.84, + "learning_rate": 1.9015483774911249e-07, + "loss": 0.1342, + "step": 2392 + }, + { + "epoch": 2.85, + "learning_rate": 1.8529438936540022e-07, + "loss": 0.1166, + "step": 2394 + }, + { + "epoch": 2.85, + "learning_rate": 1.8049628463202128e-07, + "loss": 0.1313, + "step": 2396 + }, + { + "epoch": 2.85, + "learning_rate": 1.7576055402685034e-07, + "loss": 0.1251, + "step": 2398 + }, + { + "epoch": 2.85, + "learning_rate": 1.710872276315556e-07, + "loss": 0.1077, + "step": 2400 + }, + { + "epoch": 2.86, + "learning_rate": 1.664763351314125e-07, + "loss": 0.1095, + "step": 2402 + }, + { + "epoch": 2.86, + "learning_rate": 1.619279058151102e-07, + "loss": 0.0939, + "step": 2404 + }, + { + "epoch": 2.86, + "learning_rate": 1.5744196857456874e-07, + "loss": 0.1118, + "step": 2406 + }, + { + "epoch": 2.86, + "learning_rate": 1.5301855190475445e-07, + "loss": 0.1477, + "step": 2408 + }, + { + "epoch": 2.87, + "learning_rate": 1.4865768390349812e-07, + "loss": 0.1073, + "step": 2410 + }, + { + "epoch": 2.87, + "learning_rate": 1.4435939227131712e-07, + "loss": 0.1194, + "step": 2412 + }, + { + "epoch": 2.87, + "learning_rate": 1.4012370431124133e-07, + "loss": 0.1222, + "step": 2414 + }, + { + "epoch": 2.87, + "learning_rate": 1.3595064692863757e-07, + "loss": 0.1367, + "step": 2416 + }, + { + "epoch": 2.88, + "learning_rate": 1.3184024663103755e-07, + "loss": 0.1182, + "step": 2418 + }, + { + "epoch": 2.88, + "learning_rate": 1.277925295279725e-07, + "loss": 0.1297, + "step": 2420 + }, + { + "epoch": 2.88, + "learning_rate": 1.2380752133080433e-07, + "loss": 0.1067, + "step": 2422 + }, + { + "epoch": 2.88, + "learning_rate": 1.198852473525669e-07, + "loss": 0.1483, + "step": 2424 + }, + { + "epoch": 2.88, + "learning_rate": 1.1602573250779958e-07, + "loss": 0.1534, + "step": 2426 + }, + { + "epoch": 2.89, + "learning_rate": 1.1222900131239279e-07, + "loss": 0.1177, + "step": 2428 + }, + { + "epoch": 2.89, + "learning_rate": 1.0849507788343038e-07, + "loss": 0.2059, + "step": 2430 + }, + { + "epoch": 2.89, + "learning_rate": 1.0482398593903764e-07, + "loss": 0.1369, + "step": 2432 + }, + { + "epoch": 2.89, + "learning_rate": 1.0121574879823015e-07, + "loss": 0.1178, + "step": 2434 + }, + { + "epoch": 2.9, + "learning_rate": 9.767038938076511e-08, + "loss": 0.1323, + "step": 2436 + }, + { + "epoch": 2.9, + "learning_rate": 9.418793020699813e-08, + "loss": 0.1272, + "step": 2438 + }, + { + "epoch": 2.9, + "learning_rate": 9.076839339773547e-08, + "loss": 0.112, + "step": 2440 + }, + { + "epoch": 2.9, + "learning_rate": 8.741180067409982e-08, + "loss": 0.1444, + "step": 2442 + }, + { + "epoch": 2.91, + "learning_rate": 8.411817335738482e-08, + "loss": 0.1269, + "step": 2444 + }, + { + "epoch": 2.91, + "learning_rate": 8.088753236892843e-08, + "loss": 0.1206, + "step": 2446 + }, + { + "epoch": 2.91, + "learning_rate": 7.771989822997206e-08, + "loss": 0.1349, + "step": 2448 + }, + { + "epoch": 2.91, + "learning_rate": 7.461529106153387e-08, + "loss": 0.1141, + "step": 2450 + }, + { + "epoch": 2.92, + "learning_rate": 7.15737305842823e-08, + "loss": 0.1265, + "step": 2452 + }, + { + "epoch": 2.92, + "learning_rate": 6.859523611840612e-08, + "loss": 0.12, + "step": 2454 + }, + { + "epoch": 2.92, + "learning_rate": 6.567982658349792e-08, + "loss": 0.1161, + "step": 2456 + }, + { + "epoch": 2.92, + "learning_rate": 6.282752049842855e-08, + "loss": 0.1389, + "step": 2458 + }, + { + "epoch": 2.93, + "learning_rate": 6.003833598123287e-08, + "loss": 0.1444, + "step": 2460 + }, + { + "epoch": 2.93, + "learning_rate": 5.731229074899203e-08, + "loss": 0.11, + "step": 2462 + }, + { + "epoch": 2.93, + "learning_rate": 5.464940211772574e-08, + "loss": 0.1272, + "step": 2464 + }, + { + "epoch": 2.93, + "learning_rate": 5.204968700227242e-08, + "loss": 0.1016, + "step": 2466 + }, + { + "epoch": 2.93, + "learning_rate": 4.951316191619593e-08, + "loss": 0.1521, + "step": 2468 + }, + { + "epoch": 2.94, + "learning_rate": 4.703984297166564e-08, + "loss": 0.142, + "step": 2470 + }, + { + "epoch": 2.94, + "learning_rate": 4.4629745879367634e-08, + "loss": 0.1034, + "step": 2472 + }, + { + "epoch": 2.94, + "learning_rate": 4.228288594839369e-08, + "loss": 0.118, + "step": 2474 + }, + { + "epoch": 2.94, + "learning_rate": 3.999927808615245e-08, + "loss": 0.1166, + "step": 2476 + }, + { + "epoch": 2.95, + "learning_rate": 3.777893679827061e-08, + "loss": 0.1518, + "step": 2478 + }, + { + "epoch": 2.95, + "learning_rate": 3.562187618849855e-08, + "loss": 0.1538, + "step": 2480 + }, + { + "epoch": 2.95, + "learning_rate": 3.352810995862932e-08, + "loss": 0.1053, + "step": 2482 + }, + { + "epoch": 2.95, + "learning_rate": 3.1497651408399774e-08, + "loss": 0.1095, + "step": 2484 + }, + { + "epoch": 2.96, + "learning_rate": 2.9530513435416243e-08, + "loss": 0.1574, + "step": 2486 + }, + { + "epoch": 2.96, + "learning_rate": 2.762670853506677e-08, + "loss": 0.1147, + "step": 2488 + }, + { + "epoch": 2.96, + "learning_rate": 2.578624880044567e-08, + "loss": 0.143, + "step": 2490 + }, + { + "epoch": 2.96, + "learning_rate": 2.4009145922271327e-08, + "loss": 0.0848, + "step": 2492 + }, + { + "epoch": 2.97, + "learning_rate": 2.2295411188819616e-08, + "loss": 0.1291, + "step": 2494 + }, + { + "epoch": 2.97, + "learning_rate": 2.0645055485842837e-08, + "loss": 0.132, + "step": 2496 + }, + { + "epoch": 2.97, + "learning_rate": 1.9058089296509762e-08, + "loss": 0.106, + "step": 2498 + }, + { + "epoch": 2.97, + "learning_rate": 1.753452270133238e-08, + "loss": 0.119, + "step": 2500 + }, + { + "epoch": 2.98, + "learning_rate": 1.6074365378105915e-08, + "loss": 0.1213, + "step": 2502 + }, + { + "epoch": 2.98, + "learning_rate": 1.4677626601843353e-08, + "loss": 0.1392, + "step": 2504 + }, + { + "epoch": 2.98, + "learning_rate": 1.3344315244722128e-08, + "loss": 0.1121, + "step": 2506 + }, + { + "epoch": 2.98, + "learning_rate": 1.2074439776021962e-08, + "loss": 0.1163, + "step": 2508 + }, + { + "epoch": 2.98, + "learning_rate": 1.0868008262076013e-08, + "loss": 0.149, + "step": 2510 + }, + { + "epoch": 2.99, + "learning_rate": 9.725028366214251e-09, + "loss": 0.1174, + "step": 2512 + }, + { + "epoch": 2.99, + "learning_rate": 8.64550734872016e-09, + "loss": 0.1226, + "step": 2514 + }, + { + "epoch": 2.99, + "learning_rate": 7.629452066783006e-09, + "loss": 0.1374, + "step": 2516 + }, + { + "epoch": 2.99, + "learning_rate": 6.6768689744500796e-09, + "loss": 0.112, + "step": 2518 + }, + { + "epoch": 3.0, + "learning_rate": 5.787764122592299e-09, + "loss": 0.1154, + "step": 2520 + }, + { + "epoch": 3.0, + "learning_rate": 4.9621431588620096e-09, + "loss": 0.1275, + "step": 2522 + }, + { + "epoch": 3.0, + "step": 2523, + "total_flos": 2232048114991104.0, + "train_loss": 0.2896275484415448, + "train_runtime": 144327.3604, + "train_samples_per_second": 0.559, + "train_steps_per_second": 0.017 + } + ], + "logging_steps": 2, + "max_steps": 2523, + "num_train_epochs": 3, + "save_steps": 1000, + "total_flos": 2232048114991104.0, + "trial_name": null, + "trial_params": null +}