| { | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 3.0, | |
| "global_step": 468696, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.01, | |
| "learning_rate": 4.967996313175278e-05, | |
| "loss": 2.3652, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 0.01, | |
| "eval_loss": 2.5228402614593506, | |
| "eval_runtime": 19.0224, | |
| "eval_samples_per_second": 105.139, | |
| "eval_steps_per_second": 1.682, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 0.01, | |
| "learning_rate": 4.935992626350556e-05, | |
| "loss": 2.3257, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 0.01, | |
| "eval_loss": 2.4702858924865723, | |
| "eval_runtime": 19.2339, | |
| "eval_samples_per_second": 103.983, | |
| "eval_steps_per_second": 1.664, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 0.02, | |
| "learning_rate": 4.903988939525834e-05, | |
| "loss": 2.3123, | |
| "step": 3000 | |
| }, | |
| { | |
| "epoch": 0.02, | |
| "eval_loss": 2.5149893760681152, | |
| "eval_runtime": 18.8526, | |
| "eval_samples_per_second": 106.086, | |
| "eval_steps_per_second": 1.697, | |
| "step": 3000 | |
| }, | |
| { | |
| "epoch": 0.03, | |
| "learning_rate": 4.871985252701111e-05, | |
| "loss": 2.3068, | |
| "step": 4000 | |
| }, | |
| { | |
| "epoch": 0.03, | |
| "eval_loss": 2.4867441654205322, | |
| "eval_runtime": 18.7454, | |
| "eval_samples_per_second": 106.693, | |
| "eval_steps_per_second": 1.707, | |
| "step": 4000 | |
| }, | |
| { | |
| "epoch": 0.03, | |
| "learning_rate": 4.8399815658763894e-05, | |
| "loss": 2.2906, | |
| "step": 5000 | |
| }, | |
| { | |
| "epoch": 0.03, | |
| "eval_loss": 2.4581546783447266, | |
| "eval_runtime": 19.2143, | |
| "eval_samples_per_second": 104.089, | |
| "eval_steps_per_second": 1.665, | |
| "step": 5000 | |
| }, | |
| { | |
| "epoch": 0.04, | |
| "learning_rate": 4.807977879051667e-05, | |
| "loss": 2.2817, | |
| "step": 6000 | |
| }, | |
| { | |
| "epoch": 0.04, | |
| "eval_loss": 2.477738380432129, | |
| "eval_runtime": 18.9443, | |
| "eval_samples_per_second": 105.573, | |
| "eval_steps_per_second": 1.689, | |
| "step": 6000 | |
| }, | |
| { | |
| "epoch": 0.04, | |
| "learning_rate": 4.7759741922269444e-05, | |
| "loss": 2.2713, | |
| "step": 7000 | |
| }, | |
| { | |
| "epoch": 0.04, | |
| "eval_loss": 2.490509271621704, | |
| "eval_runtime": 19.0513, | |
| "eval_samples_per_second": 104.98, | |
| "eval_steps_per_second": 1.68, | |
| "step": 7000 | |
| }, | |
| { | |
| "epoch": 0.05, | |
| "learning_rate": 4.743970505402223e-05, | |
| "loss": 2.2653, | |
| "step": 8000 | |
| }, | |
| { | |
| "epoch": 0.05, | |
| "eval_loss": 2.472813129425049, | |
| "eval_runtime": 19.2806, | |
| "eval_samples_per_second": 103.731, | |
| "eval_steps_per_second": 1.66, | |
| "step": 8000 | |
| }, | |
| { | |
| "epoch": 0.06, | |
| "learning_rate": 4.7119668185775e-05, | |
| "loss": 2.2581, | |
| "step": 9000 | |
| }, | |
| { | |
| "epoch": 0.06, | |
| "eval_loss": 2.4772017002105713, | |
| "eval_runtime": 19.5407, | |
| "eval_samples_per_second": 102.351, | |
| "eval_steps_per_second": 1.638, | |
| "step": 9000 | |
| }, | |
| { | |
| "epoch": 0.06, | |
| "learning_rate": 4.679963131752778e-05, | |
| "loss": 2.2687, | |
| "step": 10000 | |
| }, | |
| { | |
| "epoch": 0.06, | |
| "eval_loss": 2.4584801197052, | |
| "eval_runtime": 18.8505, | |
| "eval_samples_per_second": 106.098, | |
| "eval_steps_per_second": 1.698, | |
| "step": 10000 | |
| }, | |
| { | |
| "epoch": 0.07, | |
| "learning_rate": 4.6479594449280565e-05, | |
| "loss": 2.2473, | |
| "step": 11000 | |
| }, | |
| { | |
| "epoch": 0.07, | |
| "eval_loss": 2.450211524963379, | |
| "eval_runtime": 19.0047, | |
| "eval_samples_per_second": 105.237, | |
| "eval_steps_per_second": 1.684, | |
| "step": 11000 | |
| }, | |
| { | |
| "epoch": 0.08, | |
| "learning_rate": 4.6159557581033336e-05, | |
| "loss": 2.2536, | |
| "step": 12000 | |
| }, | |
| { | |
| "epoch": 0.08, | |
| "eval_loss": 2.402937650680542, | |
| "eval_runtime": 19.1856, | |
| "eval_samples_per_second": 104.245, | |
| "eval_steps_per_second": 1.668, | |
| "step": 12000 | |
| }, | |
| { | |
| "epoch": 0.08, | |
| "learning_rate": 4.5839520712786115e-05, | |
| "loss": 2.2355, | |
| "step": 13000 | |
| }, | |
| { | |
| "epoch": 0.08, | |
| "eval_loss": 2.5034797191619873, | |
| "eval_runtime": 18.9351, | |
| "eval_samples_per_second": 105.624, | |
| "eval_steps_per_second": 1.69, | |
| "step": 13000 | |
| }, | |
| { | |
| "epoch": 0.09, | |
| "learning_rate": 4.551948384453889e-05, | |
| "loss": 2.2356, | |
| "step": 14000 | |
| }, | |
| { | |
| "epoch": 0.09, | |
| "eval_loss": 2.443594217300415, | |
| "eval_runtime": 19.1979, | |
| "eval_samples_per_second": 104.178, | |
| "eval_steps_per_second": 1.667, | |
| "step": 14000 | |
| }, | |
| { | |
| "epoch": 0.1, | |
| "learning_rate": 4.519944697629167e-05, | |
| "loss": 2.2385, | |
| "step": 15000 | |
| }, | |
| { | |
| "epoch": 0.1, | |
| "eval_loss": 2.4230918884277344, | |
| "eval_runtime": 19.1941, | |
| "eval_samples_per_second": 104.199, | |
| "eval_steps_per_second": 1.667, | |
| "step": 15000 | |
| }, | |
| { | |
| "epoch": 0.1, | |
| "learning_rate": 4.487941010804445e-05, | |
| "loss": 2.229, | |
| "step": 16000 | |
| }, | |
| { | |
| "epoch": 0.1, | |
| "eval_loss": 2.435939073562622, | |
| "eval_runtime": 18.7132, | |
| "eval_samples_per_second": 106.876, | |
| "eval_steps_per_second": 1.71, | |
| "step": 16000 | |
| }, | |
| { | |
| "epoch": 0.11, | |
| "learning_rate": 4.455937323979723e-05, | |
| "loss": 2.2308, | |
| "step": 17000 | |
| }, | |
| { | |
| "epoch": 0.11, | |
| "eval_loss": 2.379002094268799, | |
| "eval_runtime": 18.8323, | |
| "eval_samples_per_second": 106.2, | |
| "eval_steps_per_second": 1.699, | |
| "step": 17000 | |
| }, | |
| { | |
| "epoch": 0.12, | |
| "learning_rate": 4.4239336371550006e-05, | |
| "loss": 2.2247, | |
| "step": 18000 | |
| }, | |
| { | |
| "epoch": 0.12, | |
| "eval_loss": 2.440680742263794, | |
| "eval_runtime": 18.8124, | |
| "eval_samples_per_second": 106.313, | |
| "eval_steps_per_second": 1.701, | |
| "step": 18000 | |
| }, | |
| { | |
| "epoch": 0.12, | |
| "learning_rate": 4.391929950330278e-05, | |
| "loss": 2.2262, | |
| "step": 19000 | |
| }, | |
| { | |
| "epoch": 0.12, | |
| "eval_loss": 2.401104211807251, | |
| "eval_runtime": 18.8589, | |
| "eval_samples_per_second": 106.051, | |
| "eval_steps_per_second": 1.697, | |
| "step": 19000 | |
| }, | |
| { | |
| "epoch": 0.13, | |
| "learning_rate": 4.359926263505556e-05, | |
| "loss": 2.2074, | |
| "step": 20000 | |
| }, | |
| { | |
| "epoch": 0.13, | |
| "eval_loss": 2.382688522338867, | |
| "eval_runtime": 18.7139, | |
| "eval_samples_per_second": 106.872, | |
| "eval_steps_per_second": 1.71, | |
| "step": 20000 | |
| }, | |
| { | |
| "epoch": 0.13, | |
| "learning_rate": 4.327922576680834e-05, | |
| "loss": 2.2204, | |
| "step": 21000 | |
| }, | |
| { | |
| "epoch": 0.13, | |
| "eval_loss": 2.421189308166504, | |
| "eval_runtime": 18.9386, | |
| "eval_samples_per_second": 105.604, | |
| "eval_steps_per_second": 1.69, | |
| "step": 21000 | |
| }, | |
| { | |
| "epoch": 0.14, | |
| "learning_rate": 4.295918889856111e-05, | |
| "loss": 2.2123, | |
| "step": 22000 | |
| }, | |
| { | |
| "epoch": 0.14, | |
| "eval_loss": 2.4362740516662598, | |
| "eval_runtime": 18.9745, | |
| "eval_samples_per_second": 105.405, | |
| "eval_steps_per_second": 1.686, | |
| "step": 22000 | |
| }, | |
| { | |
| "epoch": 0.15, | |
| "learning_rate": 4.263915203031389e-05, | |
| "loss": 2.2225, | |
| "step": 23000 | |
| }, | |
| { | |
| "epoch": 0.15, | |
| "eval_loss": 2.426682710647583, | |
| "eval_runtime": 19.1794, | |
| "eval_samples_per_second": 104.278, | |
| "eval_steps_per_second": 1.668, | |
| "step": 23000 | |
| }, | |
| { | |
| "epoch": 0.15, | |
| "learning_rate": 4.231911516206668e-05, | |
| "loss": 2.2137, | |
| "step": 24000 | |
| }, | |
| { | |
| "epoch": 0.15, | |
| "eval_loss": 2.4169669151306152, | |
| "eval_runtime": 18.8197, | |
| "eval_samples_per_second": 106.272, | |
| "eval_steps_per_second": 1.7, | |
| "step": 24000 | |
| }, | |
| { | |
| "epoch": 0.16, | |
| "learning_rate": 4.199907829381945e-05, | |
| "loss": 2.2143, | |
| "step": 25000 | |
| }, | |
| { | |
| "epoch": 0.16, | |
| "eval_loss": 2.4082441329956055, | |
| "eval_runtime": 18.9737, | |
| "eval_samples_per_second": 105.409, | |
| "eval_steps_per_second": 1.687, | |
| "step": 25000 | |
| }, | |
| { | |
| "epoch": 0.17, | |
| "learning_rate": 4.167904142557223e-05, | |
| "loss": 2.2131, | |
| "step": 26000 | |
| }, | |
| { | |
| "epoch": 0.17, | |
| "eval_loss": 2.4836766719818115, | |
| "eval_runtime": 19.0574, | |
| "eval_samples_per_second": 104.946, | |
| "eval_steps_per_second": 1.679, | |
| "step": 26000 | |
| }, | |
| { | |
| "epoch": 0.17, | |
| "learning_rate": 4.1359004557325005e-05, | |
| "loss": 2.1954, | |
| "step": 27000 | |
| }, | |
| { | |
| "epoch": 0.17, | |
| "eval_loss": 2.43381404876709, | |
| "eval_runtime": 18.8859, | |
| "eval_samples_per_second": 105.899, | |
| "eval_steps_per_second": 1.694, | |
| "step": 27000 | |
| }, | |
| { | |
| "epoch": 0.18, | |
| "learning_rate": 4.1038967689077783e-05, | |
| "loss": 2.1934, | |
| "step": 28000 | |
| }, | |
| { | |
| "epoch": 0.18, | |
| "eval_loss": 2.4075064659118652, | |
| "eval_runtime": 18.689, | |
| "eval_samples_per_second": 107.015, | |
| "eval_steps_per_second": 1.712, | |
| "step": 28000 | |
| }, | |
| { | |
| "epoch": 0.19, | |
| "learning_rate": 4.071893082083056e-05, | |
| "loss": 2.1943, | |
| "step": 29000 | |
| }, | |
| { | |
| "epoch": 0.19, | |
| "eval_loss": 2.383098602294922, | |
| "eval_runtime": 18.8218, | |
| "eval_samples_per_second": 106.26, | |
| "eval_steps_per_second": 1.7, | |
| "step": 29000 | |
| }, | |
| { | |
| "epoch": 0.19, | |
| "learning_rate": 4.039889395258334e-05, | |
| "loss": 2.1944, | |
| "step": 30000 | |
| }, | |
| { | |
| "epoch": 0.19, | |
| "eval_loss": 2.3953185081481934, | |
| "eval_runtime": 18.9451, | |
| "eval_samples_per_second": 105.568, | |
| "eval_steps_per_second": 1.689, | |
| "step": 30000 | |
| }, | |
| { | |
| "epoch": 0.2, | |
| "learning_rate": 4.007885708433612e-05, | |
| "loss": 2.1914, | |
| "step": 31000 | |
| }, | |
| { | |
| "epoch": 0.2, | |
| "eval_loss": 2.411050796508789, | |
| "eval_runtime": 18.7128, | |
| "eval_samples_per_second": 106.878, | |
| "eval_steps_per_second": 1.71, | |
| "step": 31000 | |
| }, | |
| { | |
| "epoch": 0.2, | |
| "learning_rate": 3.975882021608889e-05, | |
| "loss": 2.1865, | |
| "step": 32000 | |
| }, | |
| { | |
| "epoch": 0.2, | |
| "eval_loss": 2.390427827835083, | |
| "eval_runtime": 18.9045, | |
| "eval_samples_per_second": 105.795, | |
| "eval_steps_per_second": 1.693, | |
| "step": 32000 | |
| }, | |
| { | |
| "epoch": 0.21, | |
| "learning_rate": 3.9438783347841675e-05, | |
| "loss": 2.1871, | |
| "step": 33000 | |
| }, | |
| { | |
| "epoch": 0.21, | |
| "eval_loss": 2.401388168334961, | |
| "eval_runtime": 18.7096, | |
| "eval_samples_per_second": 106.897, | |
| "eval_steps_per_second": 1.71, | |
| "step": 33000 | |
| }, | |
| { | |
| "epoch": 0.22, | |
| "learning_rate": 3.9118746479594454e-05, | |
| "loss": 2.1792, | |
| "step": 34000 | |
| }, | |
| { | |
| "epoch": 0.22, | |
| "eval_loss": 2.4562745094299316, | |
| "eval_runtime": 18.8567, | |
| "eval_samples_per_second": 106.063, | |
| "eval_steps_per_second": 1.697, | |
| "step": 34000 | |
| }, | |
| { | |
| "epoch": 0.22, | |
| "learning_rate": 3.8798709611347225e-05, | |
| "loss": 2.1921, | |
| "step": 35000 | |
| }, | |
| { | |
| "epoch": 0.22, | |
| "eval_loss": 2.399921417236328, | |
| "eval_runtime": 18.7883, | |
| "eval_samples_per_second": 106.449, | |
| "eval_steps_per_second": 1.703, | |
| "step": 35000 | |
| }, | |
| { | |
| "epoch": 0.23, | |
| "learning_rate": 3.847867274310001e-05, | |
| "loss": 2.1831, | |
| "step": 36000 | |
| }, | |
| { | |
| "epoch": 0.23, | |
| "eval_loss": 2.3935768604278564, | |
| "eval_runtime": 18.8237, | |
| "eval_samples_per_second": 106.249, | |
| "eval_steps_per_second": 1.7, | |
| "step": 36000 | |
| }, | |
| { | |
| "epoch": 0.24, | |
| "learning_rate": 3.815863587485278e-05, | |
| "loss": 2.169, | |
| "step": 37000 | |
| }, | |
| { | |
| "epoch": 0.24, | |
| "eval_loss": 2.385082960128784, | |
| "eval_runtime": 18.9677, | |
| "eval_samples_per_second": 105.442, | |
| "eval_steps_per_second": 1.687, | |
| "step": 37000 | |
| }, | |
| { | |
| "epoch": 0.24, | |
| "learning_rate": 3.783859900660556e-05, | |
| "loss": 2.1619, | |
| "step": 38000 | |
| }, | |
| { | |
| "epoch": 0.24, | |
| "eval_loss": 2.3289620876312256, | |
| "eval_runtime": 19.0182, | |
| "eval_samples_per_second": 105.162, | |
| "eval_steps_per_second": 1.683, | |
| "step": 38000 | |
| }, | |
| { | |
| "epoch": 0.25, | |
| "learning_rate": 3.7518562138358346e-05, | |
| "loss": 2.1651, | |
| "step": 39000 | |
| }, | |
| { | |
| "epoch": 0.25, | |
| "eval_loss": 2.3818867206573486, | |
| "eval_runtime": 18.9593, | |
| "eval_samples_per_second": 105.489, | |
| "eval_steps_per_second": 1.688, | |
| "step": 39000 | |
| }, | |
| { | |
| "epoch": 0.26, | |
| "learning_rate": 3.719852527011112e-05, | |
| "loss": 2.1704, | |
| "step": 40000 | |
| }, | |
| { | |
| "epoch": 0.26, | |
| "eval_loss": 2.3583953380584717, | |
| "eval_runtime": 18.8577, | |
| "eval_samples_per_second": 106.057, | |
| "eval_steps_per_second": 1.697, | |
| "step": 40000 | |
| }, | |
| { | |
| "epoch": 0.26, | |
| "learning_rate": 3.6878488401863896e-05, | |
| "loss": 2.1601, | |
| "step": 41000 | |
| }, | |
| { | |
| "epoch": 0.26, | |
| "eval_loss": 2.3705227375030518, | |
| "eval_runtime": 19.0038, | |
| "eval_samples_per_second": 105.242, | |
| "eval_steps_per_second": 1.684, | |
| "step": 41000 | |
| }, | |
| { | |
| "epoch": 0.27, | |
| "learning_rate": 3.6558451533616674e-05, | |
| "loss": 2.1819, | |
| "step": 42000 | |
| }, | |
| { | |
| "epoch": 0.27, | |
| "eval_loss": 2.3806064128875732, | |
| "eval_runtime": 19.09, | |
| "eval_samples_per_second": 104.767, | |
| "eval_steps_per_second": 1.676, | |
| "step": 42000 | |
| }, | |
| { | |
| "epoch": 0.28, | |
| "learning_rate": 3.623841466536945e-05, | |
| "loss": 2.1666, | |
| "step": 43000 | |
| }, | |
| { | |
| "epoch": 0.28, | |
| "eval_loss": 2.3670589923858643, | |
| "eval_runtime": 18.9485, | |
| "eval_samples_per_second": 105.549, | |
| "eval_steps_per_second": 1.689, | |
| "step": 43000 | |
| }, | |
| { | |
| "epoch": 0.28, | |
| "learning_rate": 3.591837779712223e-05, | |
| "loss": 2.1718, | |
| "step": 44000 | |
| }, | |
| { | |
| "epoch": 0.28, | |
| "eval_loss": 2.364011764526367, | |
| "eval_runtime": 18.7665, | |
| "eval_samples_per_second": 106.573, | |
| "eval_steps_per_second": 1.705, | |
| "step": 44000 | |
| }, | |
| { | |
| "epoch": 0.29, | |
| "learning_rate": 3.559834092887501e-05, | |
| "loss": 2.1521, | |
| "step": 45000 | |
| }, | |
| { | |
| "epoch": 0.29, | |
| "eval_loss": 2.373670816421509, | |
| "eval_runtime": 18.9014, | |
| "eval_samples_per_second": 105.812, | |
| "eval_steps_per_second": 1.693, | |
| "step": 45000 | |
| }, | |
| { | |
| "epoch": 0.29, | |
| "learning_rate": 3.527830406062779e-05, | |
| "loss": 2.148, | |
| "step": 46000 | |
| }, | |
| { | |
| "epoch": 0.29, | |
| "eval_loss": 2.377063035964966, | |
| "eval_runtime": 19.012, | |
| "eval_samples_per_second": 105.197, | |
| "eval_steps_per_second": 1.683, | |
| "step": 46000 | |
| }, | |
| { | |
| "epoch": 0.3, | |
| "learning_rate": 3.495826719238056e-05, | |
| "loss": 2.1438, | |
| "step": 47000 | |
| }, | |
| { | |
| "epoch": 0.3, | |
| "eval_loss": 2.3637826442718506, | |
| "eval_runtime": 18.982, | |
| "eval_samples_per_second": 105.363, | |
| "eval_steps_per_second": 1.686, | |
| "step": 47000 | |
| }, | |
| { | |
| "epoch": 0.31, | |
| "learning_rate": 3.4638230324133344e-05, | |
| "loss": 2.1536, | |
| "step": 48000 | |
| }, | |
| { | |
| "epoch": 0.31, | |
| "eval_loss": 2.3571810722351074, | |
| "eval_runtime": 18.8471, | |
| "eval_samples_per_second": 106.117, | |
| "eval_steps_per_second": 1.698, | |
| "step": 48000 | |
| }, | |
| { | |
| "epoch": 0.31, | |
| "learning_rate": 3.431819345588612e-05, | |
| "loss": 2.1505, | |
| "step": 49000 | |
| }, | |
| { | |
| "epoch": 0.31, | |
| "eval_loss": 2.3516790866851807, | |
| "eval_runtime": 18.8575, | |
| "eval_samples_per_second": 106.059, | |
| "eval_steps_per_second": 1.697, | |
| "step": 49000 | |
| }, | |
| { | |
| "epoch": 0.32, | |
| "learning_rate": 3.3998156587638894e-05, | |
| "loss": 2.1319, | |
| "step": 50000 | |
| }, | |
| { | |
| "epoch": 0.32, | |
| "eval_loss": 2.3615307807922363, | |
| "eval_runtime": 19.1166, | |
| "eval_samples_per_second": 104.621, | |
| "eval_steps_per_second": 1.674, | |
| "step": 50000 | |
| }, | |
| { | |
| "epoch": 0.33, | |
| "learning_rate": 3.367811971939168e-05, | |
| "loss": 2.123, | |
| "step": 51000 | |
| }, | |
| { | |
| "epoch": 0.33, | |
| "eval_loss": 2.3522212505340576, | |
| "eval_runtime": 19.1501, | |
| "eval_samples_per_second": 104.438, | |
| "eval_steps_per_second": 1.671, | |
| "step": 51000 | |
| }, | |
| { | |
| "epoch": 0.33, | |
| "learning_rate": 3.335808285114445e-05, | |
| "loss": 2.1513, | |
| "step": 52000 | |
| }, | |
| { | |
| "epoch": 0.33, | |
| "eval_loss": 2.388401746749878, | |
| "eval_runtime": 18.8344, | |
| "eval_samples_per_second": 106.189, | |
| "eval_steps_per_second": 1.699, | |
| "step": 52000 | |
| }, | |
| { | |
| "epoch": 0.34, | |
| "learning_rate": 3.303804598289723e-05, | |
| "loss": 2.1419, | |
| "step": 53000 | |
| }, | |
| { | |
| "epoch": 0.34, | |
| "eval_loss": 2.32639479637146, | |
| "eval_runtime": 18.8162, | |
| "eval_samples_per_second": 106.292, | |
| "eval_steps_per_second": 1.701, | |
| "step": 53000 | |
| }, | |
| { | |
| "epoch": 0.35, | |
| "learning_rate": 3.271800911465001e-05, | |
| "loss": 2.1404, | |
| "step": 54000 | |
| }, | |
| { | |
| "epoch": 0.35, | |
| "eval_loss": 2.3595259189605713, | |
| "eval_runtime": 18.8272, | |
| "eval_samples_per_second": 106.229, | |
| "eval_steps_per_second": 1.7, | |
| "step": 54000 | |
| }, | |
| { | |
| "epoch": 0.35, | |
| "learning_rate": 3.2397972246402786e-05, | |
| "loss": 2.128, | |
| "step": 55000 | |
| }, | |
| { | |
| "epoch": 0.35, | |
| "eval_loss": 2.3471484184265137, | |
| "eval_runtime": 18.9594, | |
| "eval_samples_per_second": 105.489, | |
| "eval_steps_per_second": 1.688, | |
| "step": 55000 | |
| }, | |
| { | |
| "epoch": 0.36, | |
| "learning_rate": 3.2077935378155565e-05, | |
| "loss": 2.1287, | |
| "step": 56000 | |
| }, | |
| { | |
| "epoch": 0.36, | |
| "eval_loss": 2.347370147705078, | |
| "eval_runtime": 18.9278, | |
| "eval_samples_per_second": 105.665, | |
| "eval_steps_per_second": 1.691, | |
| "step": 56000 | |
| }, | |
| { | |
| "epoch": 0.36, | |
| "learning_rate": 3.175789850990834e-05, | |
| "loss": 2.1372, | |
| "step": 57000 | |
| }, | |
| { | |
| "epoch": 0.36, | |
| "eval_loss": 2.3139336109161377, | |
| "eval_runtime": 19.0473, | |
| "eval_samples_per_second": 105.002, | |
| "eval_steps_per_second": 1.68, | |
| "step": 57000 | |
| }, | |
| { | |
| "epoch": 0.37, | |
| "learning_rate": 3.143786164166112e-05, | |
| "loss": 2.1301, | |
| "step": 58000 | |
| }, | |
| { | |
| "epoch": 0.37, | |
| "eval_loss": 2.3145127296447754, | |
| "eval_runtime": 18.831, | |
| "eval_samples_per_second": 106.208, | |
| "eval_steps_per_second": 1.699, | |
| "step": 58000 | |
| }, | |
| { | |
| "epoch": 0.38, | |
| "learning_rate": 3.11178247734139e-05, | |
| "loss": 2.128, | |
| "step": 59000 | |
| }, | |
| { | |
| "epoch": 0.38, | |
| "eval_loss": 2.3634743690490723, | |
| "eval_runtime": 19.0052, | |
| "eval_samples_per_second": 105.234, | |
| "eval_steps_per_second": 1.684, | |
| "step": 59000 | |
| }, | |
| { | |
| "epoch": 0.38, | |
| "learning_rate": 3.079778790516668e-05, | |
| "loss": 2.1088, | |
| "step": 60000 | |
| }, | |
| { | |
| "epoch": 0.38, | |
| "eval_loss": 2.3068251609802246, | |
| "eval_runtime": 18.9935, | |
| "eval_samples_per_second": 105.299, | |
| "eval_steps_per_second": 1.685, | |
| "step": 60000 | |
| }, | |
| { | |
| "epoch": 0.39, | |
| "learning_rate": 3.0477751036919456e-05, | |
| "loss": 2.122, | |
| "step": 61000 | |
| }, | |
| { | |
| "epoch": 0.39, | |
| "eval_loss": 2.312502145767212, | |
| "eval_runtime": 18.8963, | |
| "eval_samples_per_second": 105.841, | |
| "eval_steps_per_second": 1.693, | |
| "step": 61000 | |
| }, | |
| { | |
| "epoch": 0.4, | |
| "learning_rate": 3.015771416867223e-05, | |
| "loss": 2.1113, | |
| "step": 62000 | |
| }, | |
| { | |
| "epoch": 0.4, | |
| "eval_loss": 2.3446314334869385, | |
| "eval_runtime": 18.8671, | |
| "eval_samples_per_second": 106.005, | |
| "eval_steps_per_second": 1.696, | |
| "step": 62000 | |
| }, | |
| { | |
| "epoch": 0.4, | |
| "learning_rate": 2.983767730042501e-05, | |
| "loss": 2.1108, | |
| "step": 63000 | |
| }, | |
| { | |
| "epoch": 0.4, | |
| "eval_loss": 2.3173420429229736, | |
| "eval_runtime": 18.7418, | |
| "eval_samples_per_second": 106.713, | |
| "eval_steps_per_second": 1.707, | |
| "step": 63000 | |
| }, | |
| { | |
| "epoch": 0.41, | |
| "learning_rate": 2.951764043217779e-05, | |
| "loss": 2.125, | |
| "step": 64000 | |
| }, | |
| { | |
| "epoch": 0.41, | |
| "eval_loss": 2.363111734390259, | |
| "eval_runtime": 18.789, | |
| "eval_samples_per_second": 106.445, | |
| "eval_steps_per_second": 1.703, | |
| "step": 64000 | |
| }, | |
| { | |
| "epoch": 0.42, | |
| "learning_rate": 2.9197603563930563e-05, | |
| "loss": 2.1106, | |
| "step": 65000 | |
| }, | |
| { | |
| "epoch": 0.42, | |
| "eval_loss": 2.331869602203369, | |
| "eval_runtime": 18.9057, | |
| "eval_samples_per_second": 105.788, | |
| "eval_steps_per_second": 1.693, | |
| "step": 65000 | |
| }, | |
| { | |
| "epoch": 0.42, | |
| "learning_rate": 2.8877566695683345e-05, | |
| "loss": 2.1143, | |
| "step": 66000 | |
| }, | |
| { | |
| "epoch": 0.42, | |
| "eval_loss": 2.300299882888794, | |
| "eval_runtime": 18.7948, | |
| "eval_samples_per_second": 106.413, | |
| "eval_steps_per_second": 1.703, | |
| "step": 66000 | |
| }, | |
| { | |
| "epoch": 0.43, | |
| "learning_rate": 2.8557529827436123e-05, | |
| "loss": 2.0982, | |
| "step": 67000 | |
| }, | |
| { | |
| "epoch": 0.43, | |
| "eval_loss": 2.3044443130493164, | |
| "eval_runtime": 19.1803, | |
| "eval_samples_per_second": 104.273, | |
| "eval_steps_per_second": 1.668, | |
| "step": 67000 | |
| }, | |
| { | |
| "epoch": 0.44, | |
| "learning_rate": 2.82374929591889e-05, | |
| "loss": 2.1026, | |
| "step": 68000 | |
| }, | |
| { | |
| "epoch": 0.44, | |
| "eval_loss": 2.305398464202881, | |
| "eval_runtime": 18.9121, | |
| "eval_samples_per_second": 105.752, | |
| "eval_steps_per_second": 1.692, | |
| "step": 68000 | |
| }, | |
| { | |
| "epoch": 0.44, | |
| "learning_rate": 2.791745609094168e-05, | |
| "loss": 2.0995, | |
| "step": 69000 | |
| }, | |
| { | |
| "epoch": 0.44, | |
| "eval_loss": 2.3068206310272217, | |
| "eval_runtime": 18.8989, | |
| "eval_samples_per_second": 105.826, | |
| "eval_steps_per_second": 1.693, | |
| "step": 69000 | |
| }, | |
| { | |
| "epoch": 0.45, | |
| "learning_rate": 2.7597419222694455e-05, | |
| "loss": 2.0844, | |
| "step": 70000 | |
| }, | |
| { | |
| "epoch": 0.45, | |
| "eval_loss": 2.3477184772491455, | |
| "eval_runtime": 19.0274, | |
| "eval_samples_per_second": 105.111, | |
| "eval_steps_per_second": 1.682, | |
| "step": 70000 | |
| }, | |
| { | |
| "epoch": 0.45, | |
| "learning_rate": 2.7277382354447233e-05, | |
| "loss": 2.1008, | |
| "step": 71000 | |
| }, | |
| { | |
| "epoch": 0.45, | |
| "eval_loss": 2.339860439300537, | |
| "eval_runtime": 18.7939, | |
| "eval_samples_per_second": 106.418, | |
| "eval_steps_per_second": 1.703, | |
| "step": 71000 | |
| }, | |
| { | |
| "epoch": 0.46, | |
| "learning_rate": 2.6957345486200015e-05, | |
| "loss": 2.092, | |
| "step": 72000 | |
| }, | |
| { | |
| "epoch": 0.46, | |
| "eval_loss": 2.3236074447631836, | |
| "eval_runtime": 18.7746, | |
| "eval_samples_per_second": 106.527, | |
| "eval_steps_per_second": 1.704, | |
| "step": 72000 | |
| }, | |
| { | |
| "epoch": 0.47, | |
| "learning_rate": 2.663730861795279e-05, | |
| "loss": 2.09, | |
| "step": 73000 | |
| }, | |
| { | |
| "epoch": 0.47, | |
| "eval_loss": 2.3070333003997803, | |
| "eval_runtime": 19.3882, | |
| "eval_samples_per_second": 103.155, | |
| "eval_steps_per_second": 1.65, | |
| "step": 73000 | |
| }, | |
| { | |
| "epoch": 0.47, | |
| "learning_rate": 2.631727174970557e-05, | |
| "loss": 2.0984, | |
| "step": 74000 | |
| }, | |
| { | |
| "epoch": 0.47, | |
| "eval_loss": 2.31845760345459, | |
| "eval_runtime": 19.5362, | |
| "eval_samples_per_second": 102.374, | |
| "eval_steps_per_second": 1.638, | |
| "step": 74000 | |
| }, | |
| { | |
| "epoch": 0.48, | |
| "learning_rate": 2.5997234881458344e-05, | |
| "loss": 2.0965, | |
| "step": 75000 | |
| }, | |
| { | |
| "epoch": 0.48, | |
| "eval_loss": 2.306812047958374, | |
| "eval_runtime": 19.4702, | |
| "eval_samples_per_second": 102.721, | |
| "eval_steps_per_second": 1.644, | |
| "step": 75000 | |
| }, | |
| { | |
| "epoch": 0.49, | |
| "learning_rate": 2.5677198013211122e-05, | |
| "loss": 2.081, | |
| "step": 76000 | |
| }, | |
| { | |
| "epoch": 0.49, | |
| "eval_loss": 2.274367570877075, | |
| "eval_runtime": 19.5806, | |
| "eval_samples_per_second": 102.142, | |
| "eval_steps_per_second": 1.634, | |
| "step": 76000 | |
| }, | |
| { | |
| "epoch": 0.49, | |
| "learning_rate": 2.5357161144963904e-05, | |
| "loss": 2.0871, | |
| "step": 77000 | |
| }, | |
| { | |
| "epoch": 0.49, | |
| "eval_loss": 2.254237651824951, | |
| "eval_runtime": 19.7552, | |
| "eval_samples_per_second": 101.239, | |
| "eval_steps_per_second": 1.62, | |
| "step": 77000 | |
| }, | |
| { | |
| "epoch": 0.5, | |
| "learning_rate": 2.503712427671668e-05, | |
| "loss": 2.0751, | |
| "step": 78000 | |
| }, | |
| { | |
| "epoch": 0.5, | |
| "eval_loss": 2.2817015647888184, | |
| "eval_runtime": 19.5765, | |
| "eval_samples_per_second": 102.163, | |
| "eval_steps_per_second": 1.635, | |
| "step": 78000 | |
| }, | |
| { | |
| "epoch": 0.51, | |
| "learning_rate": 2.4717087408469457e-05, | |
| "loss": 2.0875, | |
| "step": 79000 | |
| }, | |
| { | |
| "epoch": 0.51, | |
| "eval_loss": 2.288637161254883, | |
| "eval_runtime": 19.6173, | |
| "eval_samples_per_second": 101.951, | |
| "eval_steps_per_second": 1.631, | |
| "step": 79000 | |
| }, | |
| { | |
| "epoch": 0.51, | |
| "learning_rate": 2.4397050540222236e-05, | |
| "loss": 2.0847, | |
| "step": 80000 | |
| }, | |
| { | |
| "epoch": 0.51, | |
| "eval_loss": 2.3093936443328857, | |
| "eval_runtime": 19.3962, | |
| "eval_samples_per_second": 103.113, | |
| "eval_steps_per_second": 1.65, | |
| "step": 80000 | |
| }, | |
| { | |
| "epoch": 0.52, | |
| "learning_rate": 2.4077013671975014e-05, | |
| "loss": 2.0861, | |
| "step": 81000 | |
| }, | |
| { | |
| "epoch": 0.52, | |
| "eval_loss": 2.294950246810913, | |
| "eval_runtime": 19.5483, | |
| "eval_samples_per_second": 102.311, | |
| "eval_steps_per_second": 1.637, | |
| "step": 81000 | |
| }, | |
| { | |
| "epoch": 0.52, | |
| "learning_rate": 2.375697680372779e-05, | |
| "loss": 2.0689, | |
| "step": 82000 | |
| }, | |
| { | |
| "epoch": 0.52, | |
| "eval_loss": 2.293389320373535, | |
| "eval_runtime": 19.51, | |
| "eval_samples_per_second": 102.512, | |
| "eval_steps_per_second": 1.64, | |
| "step": 82000 | |
| }, | |
| { | |
| "epoch": 0.53, | |
| "learning_rate": 2.3436939935480567e-05, | |
| "loss": 2.0767, | |
| "step": 83000 | |
| }, | |
| { | |
| "epoch": 0.53, | |
| "eval_loss": 2.304983615875244, | |
| "eval_runtime": 19.26, | |
| "eval_samples_per_second": 103.842, | |
| "eval_steps_per_second": 1.661, | |
| "step": 83000 | |
| }, | |
| { | |
| "epoch": 0.54, | |
| "learning_rate": 2.311690306723335e-05, | |
| "loss": 2.0711, | |
| "step": 84000 | |
| }, | |
| { | |
| "epoch": 0.54, | |
| "eval_loss": 2.2823355197906494, | |
| "eval_runtime": 20.4429, | |
| "eval_samples_per_second": 97.834, | |
| "eval_steps_per_second": 1.565, | |
| "step": 84000 | |
| }, | |
| { | |
| "epoch": 0.54, | |
| "learning_rate": 2.2796866198986124e-05, | |
| "loss": 2.0654, | |
| "step": 85000 | |
| }, | |
| { | |
| "epoch": 0.54, | |
| "eval_loss": 2.280226469039917, | |
| "eval_runtime": 19.5501, | |
| "eval_samples_per_second": 102.301, | |
| "eval_steps_per_second": 1.637, | |
| "step": 85000 | |
| }, | |
| { | |
| "epoch": 0.55, | |
| "learning_rate": 2.2476829330738902e-05, | |
| "loss": 2.0627, | |
| "step": 86000 | |
| }, | |
| { | |
| "epoch": 0.55, | |
| "eval_loss": 2.2770543098449707, | |
| "eval_runtime": 19.4549, | |
| "eval_samples_per_second": 102.802, | |
| "eval_steps_per_second": 1.645, | |
| "step": 86000 | |
| }, | |
| { | |
| "epoch": 0.56, | |
| "learning_rate": 2.215679246249168e-05, | |
| "loss": 2.0656, | |
| "step": 87000 | |
| }, | |
| { | |
| "epoch": 0.56, | |
| "eval_loss": 2.2922134399414062, | |
| "eval_runtime": 19.3407, | |
| "eval_samples_per_second": 103.409, | |
| "eval_steps_per_second": 1.655, | |
| "step": 87000 | |
| }, | |
| { | |
| "epoch": 0.56, | |
| "learning_rate": 2.1836755594244456e-05, | |
| "loss": 2.07, | |
| "step": 88000 | |
| }, | |
| { | |
| "epoch": 0.56, | |
| "eval_loss": 2.268709897994995, | |
| "eval_runtime": 19.4551, | |
| "eval_samples_per_second": 102.801, | |
| "eval_steps_per_second": 1.645, | |
| "step": 88000 | |
| }, | |
| { | |
| "epoch": 0.57, | |
| "learning_rate": 2.1516718725997238e-05, | |
| "loss": 2.0661, | |
| "step": 89000 | |
| }, | |
| { | |
| "epoch": 0.57, | |
| "eval_loss": 2.247802972793579, | |
| "eval_runtime": 19.273, | |
| "eval_samples_per_second": 103.772, | |
| "eval_steps_per_second": 1.66, | |
| "step": 89000 | |
| }, | |
| { | |
| "epoch": 0.58, | |
| "learning_rate": 2.1196681857750016e-05, | |
| "loss": 2.0511, | |
| "step": 90000 | |
| }, | |
| { | |
| "epoch": 0.58, | |
| "eval_loss": 2.3074941635131836, | |
| "eval_runtime": 19.2075, | |
| "eval_samples_per_second": 104.126, | |
| "eval_steps_per_second": 1.666, | |
| "step": 90000 | |
| }, | |
| { | |
| "epoch": 0.58, | |
| "learning_rate": 2.087664498950279e-05, | |
| "loss": 2.0582, | |
| "step": 91000 | |
| }, | |
| { | |
| "epoch": 0.58, | |
| "eval_loss": 2.248690605163574, | |
| "eval_runtime": 19.2432, | |
| "eval_samples_per_second": 103.933, | |
| "eval_steps_per_second": 1.663, | |
| "step": 91000 | |
| }, | |
| { | |
| "epoch": 0.59, | |
| "learning_rate": 2.055660812125557e-05, | |
| "loss": 2.0626, | |
| "step": 92000 | |
| }, | |
| { | |
| "epoch": 0.59, | |
| "eval_loss": 2.2588484287261963, | |
| "eval_runtime": 19.4441, | |
| "eval_samples_per_second": 102.859, | |
| "eval_steps_per_second": 1.646, | |
| "step": 92000 | |
| }, | |
| { | |
| "epoch": 0.6, | |
| "learning_rate": 2.0236571253008348e-05, | |
| "loss": 2.0562, | |
| "step": 93000 | |
| }, | |
| { | |
| "epoch": 0.6, | |
| "eval_loss": 2.274319887161255, | |
| "eval_runtime": 19.4979, | |
| "eval_samples_per_second": 102.575, | |
| "eval_steps_per_second": 1.641, | |
| "step": 93000 | |
| }, | |
| { | |
| "epoch": 0.6, | |
| "learning_rate": 1.9916534384761126e-05, | |
| "loss": 2.0511, | |
| "step": 94000 | |
| }, | |
| { | |
| "epoch": 0.6, | |
| "eval_loss": 2.276171922683716, | |
| "eval_runtime": 19.331, | |
| "eval_samples_per_second": 103.461, | |
| "eval_steps_per_second": 1.655, | |
| "step": 94000 | |
| }, | |
| { | |
| "epoch": 0.61, | |
| "learning_rate": 1.9596497516513904e-05, | |
| "loss": 2.0413, | |
| "step": 95000 | |
| }, | |
| { | |
| "epoch": 0.61, | |
| "eval_loss": 2.2398881912231445, | |
| "eval_runtime": 19.5099, | |
| "eval_samples_per_second": 102.512, | |
| "eval_steps_per_second": 1.64, | |
| "step": 95000 | |
| }, | |
| { | |
| "epoch": 0.61, | |
| "learning_rate": 1.927646064826668e-05, | |
| "loss": 2.0496, | |
| "step": 96000 | |
| }, | |
| { | |
| "epoch": 0.61, | |
| "eval_loss": 2.271150588989258, | |
| "eval_runtime": 19.317, | |
| "eval_samples_per_second": 103.536, | |
| "eval_steps_per_second": 1.657, | |
| "step": 96000 | |
| }, | |
| { | |
| "epoch": 0.62, | |
| "learning_rate": 1.8956423780019458e-05, | |
| "loss": 2.0564, | |
| "step": 97000 | |
| }, | |
| { | |
| "epoch": 0.62, | |
| "eval_loss": 2.2770469188690186, | |
| "eval_runtime": 19.1141, | |
| "eval_samples_per_second": 104.635, | |
| "eval_steps_per_second": 1.674, | |
| "step": 97000 | |
| }, | |
| { | |
| "epoch": 0.63, | |
| "learning_rate": 1.863638691177224e-05, | |
| "loss": 2.0505, | |
| "step": 98000 | |
| }, | |
| { | |
| "epoch": 0.63, | |
| "eval_loss": 2.2885847091674805, | |
| "eval_runtime": 19.6608, | |
| "eval_samples_per_second": 101.725, | |
| "eval_steps_per_second": 1.628, | |
| "step": 98000 | |
| }, | |
| { | |
| "epoch": 0.63, | |
| "learning_rate": 1.8316350043525015e-05, | |
| "loss": 2.0504, | |
| "step": 99000 | |
| }, | |
| { | |
| "epoch": 0.63, | |
| "eval_loss": 2.3180038928985596, | |
| "eval_runtime": 19.4021, | |
| "eval_samples_per_second": 103.082, | |
| "eval_steps_per_second": 1.649, | |
| "step": 99000 | |
| }, | |
| { | |
| "epoch": 0.64, | |
| "learning_rate": 1.7996313175277793e-05, | |
| "loss": 2.0439, | |
| "step": 100000 | |
| }, | |
| { | |
| "epoch": 0.64, | |
| "eval_loss": 2.2651731967926025, | |
| "eval_runtime": 19.3214, | |
| "eval_samples_per_second": 103.512, | |
| "eval_steps_per_second": 1.656, | |
| "step": 100000 | |
| }, | |
| { | |
| "epoch": 0.65, | |
| "learning_rate": 1.767627630703057e-05, | |
| "loss": 2.0461, | |
| "step": 101000 | |
| }, | |
| { | |
| "epoch": 0.65, | |
| "eval_loss": 2.222968101501465, | |
| "eval_runtime": 19.2774, | |
| "eval_samples_per_second": 103.749, | |
| "eval_steps_per_second": 1.66, | |
| "step": 101000 | |
| }, | |
| { | |
| "epoch": 0.65, | |
| "learning_rate": 1.7356239438783346e-05, | |
| "loss": 2.0405, | |
| "step": 102000 | |
| }, | |
| { | |
| "epoch": 0.65, | |
| "eval_loss": 2.2448790073394775, | |
| "eval_runtime": 21.2727, | |
| "eval_samples_per_second": 94.017, | |
| "eval_steps_per_second": 1.504, | |
| "step": 102000 | |
| }, | |
| { | |
| "epoch": 0.66, | |
| "learning_rate": 1.7036202570536128e-05, | |
| "loss": 2.038, | |
| "step": 103000 | |
| }, | |
| { | |
| "epoch": 0.66, | |
| "eval_loss": 2.2096433639526367, | |
| "eval_runtime": 19.292, | |
| "eval_samples_per_second": 103.67, | |
| "eval_steps_per_second": 1.659, | |
| "step": 103000 | |
| }, | |
| { | |
| "epoch": 0.67, | |
| "learning_rate": 1.6716165702288906e-05, | |
| "loss": 2.0205, | |
| "step": 104000 | |
| }, | |
| { | |
| "epoch": 0.67, | |
| "eval_loss": 2.2131240367889404, | |
| "eval_runtime": 19.1995, | |
| "eval_samples_per_second": 104.169, | |
| "eval_steps_per_second": 1.667, | |
| "step": 104000 | |
| }, | |
| { | |
| "epoch": 0.67, | |
| "learning_rate": 1.639612883404168e-05, | |
| "loss": 2.0196, | |
| "step": 105000 | |
| }, | |
| { | |
| "epoch": 0.67, | |
| "eval_loss": 2.2505383491516113, | |
| "eval_runtime": 19.4936, | |
| "eval_samples_per_second": 102.598, | |
| "eval_steps_per_second": 1.642, | |
| "step": 105000 | |
| }, | |
| { | |
| "epoch": 0.68, | |
| "learning_rate": 1.607609196579446e-05, | |
| "loss": 2.0272, | |
| "step": 106000 | |
| }, | |
| { | |
| "epoch": 0.68, | |
| "eval_loss": 2.243058681488037, | |
| "eval_runtime": 19.4712, | |
| "eval_samples_per_second": 102.716, | |
| "eval_steps_per_second": 1.643, | |
| "step": 106000 | |
| }, | |
| { | |
| "epoch": 0.68, | |
| "learning_rate": 1.5756055097547238e-05, | |
| "loss": 2.0276, | |
| "step": 107000 | |
| }, | |
| { | |
| "epoch": 0.68, | |
| "eval_loss": 2.2137022018432617, | |
| "eval_runtime": 18.6801, | |
| "eval_samples_per_second": 107.066, | |
| "eval_steps_per_second": 1.713, | |
| "step": 107000 | |
| }, | |
| { | |
| "epoch": 0.69, | |
| "learning_rate": 1.5436018229300017e-05, | |
| "loss": 2.0224, | |
| "step": 108000 | |
| }, | |
| { | |
| "epoch": 0.69, | |
| "eval_loss": 2.2309203147888184, | |
| "eval_runtime": 18.8357, | |
| "eval_samples_per_second": 106.181, | |
| "eval_steps_per_second": 1.699, | |
| "step": 108000 | |
| }, | |
| { | |
| "epoch": 0.7, | |
| "learning_rate": 1.5115981361052795e-05, | |
| "loss": 2.0253, | |
| "step": 109000 | |
| }, | |
| { | |
| "epoch": 0.7, | |
| "eval_loss": 2.2213120460510254, | |
| "eval_runtime": 19.2801, | |
| "eval_samples_per_second": 103.734, | |
| "eval_steps_per_second": 1.66, | |
| "step": 109000 | |
| }, | |
| { | |
| "epoch": 0.7, | |
| "learning_rate": 1.4795944492805572e-05, | |
| "loss": 2.0199, | |
| "step": 110000 | |
| }, | |
| { | |
| "epoch": 0.7, | |
| "eval_loss": 2.2416763305664062, | |
| "eval_runtime": 18.8526, | |
| "eval_samples_per_second": 106.086, | |
| "eval_steps_per_second": 1.697, | |
| "step": 110000 | |
| }, | |
| { | |
| "epoch": 0.71, | |
| "learning_rate": 1.4475907624558348e-05, | |
| "loss": 2.0216, | |
| "step": 111000 | |
| }, | |
| { | |
| "epoch": 0.71, | |
| "eval_loss": 2.24078369140625, | |
| "eval_runtime": 18.5093, | |
| "eval_samples_per_second": 108.054, | |
| "eval_steps_per_second": 1.729, | |
| "step": 111000 | |
| }, | |
| { | |
| "epoch": 0.72, | |
| "learning_rate": 1.4155870756311127e-05, | |
| "loss": 2.0236, | |
| "step": 112000 | |
| }, | |
| { | |
| "epoch": 0.72, | |
| "eval_loss": 2.2598512172698975, | |
| "eval_runtime": 19.0496, | |
| "eval_samples_per_second": 104.989, | |
| "eval_steps_per_second": 1.68, | |
| "step": 112000 | |
| }, | |
| { | |
| "epoch": 0.72, | |
| "learning_rate": 1.3835833888063907e-05, | |
| "loss": 2.0247, | |
| "step": 113000 | |
| }, | |
| { | |
| "epoch": 0.72, | |
| "eval_loss": 2.2282919883728027, | |
| "eval_runtime": 18.7751, | |
| "eval_samples_per_second": 106.524, | |
| "eval_steps_per_second": 1.704, | |
| "step": 113000 | |
| }, | |
| { | |
| "epoch": 0.73, | |
| "learning_rate": 1.3515797019816683e-05, | |
| "loss": 2.0263, | |
| "step": 114000 | |
| }, | |
| { | |
| "epoch": 0.73, | |
| "eval_loss": 2.248234748840332, | |
| "eval_runtime": 18.8567, | |
| "eval_samples_per_second": 106.063, | |
| "eval_steps_per_second": 1.697, | |
| "step": 114000 | |
| }, | |
| { | |
| "epoch": 0.74, | |
| "learning_rate": 1.3195760151569462e-05, | |
| "loss": 2.014, | |
| "step": 115000 | |
| }, | |
| { | |
| "epoch": 0.74, | |
| "eval_loss": 2.194716691970825, | |
| "eval_runtime": 18.7872, | |
| "eval_samples_per_second": 106.455, | |
| "eval_steps_per_second": 1.703, | |
| "step": 115000 | |
| }, | |
| { | |
| "epoch": 0.74, | |
| "learning_rate": 1.2875723283322239e-05, | |
| "loss": 2.0076, | |
| "step": 116000 | |
| }, | |
| { | |
| "epoch": 0.74, | |
| "eval_loss": 2.233458995819092, | |
| "eval_runtime": 18.8711, | |
| "eval_samples_per_second": 105.982, | |
| "eval_steps_per_second": 1.696, | |
| "step": 116000 | |
| }, | |
| { | |
| "epoch": 0.75, | |
| "learning_rate": 1.2555686415075015e-05, | |
| "loss": 2.011, | |
| "step": 117000 | |
| }, | |
| { | |
| "epoch": 0.75, | |
| "eval_loss": 2.213284492492676, | |
| "eval_runtime": 19.5167, | |
| "eval_samples_per_second": 102.477, | |
| "eval_steps_per_second": 1.64, | |
| "step": 117000 | |
| }, | |
| { | |
| "epoch": 0.76, | |
| "learning_rate": 1.2235649546827795e-05, | |
| "loss": 2.0216, | |
| "step": 118000 | |
| }, | |
| { | |
| "epoch": 0.76, | |
| "eval_loss": 2.210317373275757, | |
| "eval_runtime": 18.6333, | |
| "eval_samples_per_second": 107.334, | |
| "eval_steps_per_second": 1.717, | |
| "step": 118000 | |
| }, | |
| { | |
| "epoch": 0.76, | |
| "learning_rate": 1.1915612678580574e-05, | |
| "loss": 2.0097, | |
| "step": 119000 | |
| }, | |
| { | |
| "epoch": 0.76, | |
| "eval_loss": 2.241175651550293, | |
| "eval_runtime": 18.694, | |
| "eval_samples_per_second": 106.986, | |
| "eval_steps_per_second": 1.712, | |
| "step": 119000 | |
| }, | |
| { | |
| "epoch": 0.77, | |
| "learning_rate": 1.159557581033335e-05, | |
| "loss": 2.0076, | |
| "step": 120000 | |
| }, | |
| { | |
| "epoch": 0.77, | |
| "eval_loss": 2.2543113231658936, | |
| "eval_runtime": 18.8082, | |
| "eval_samples_per_second": 106.336, | |
| "eval_steps_per_second": 1.701, | |
| "step": 120000 | |
| }, | |
| { | |
| "epoch": 0.77, | |
| "learning_rate": 1.1275538942086129e-05, | |
| "loss": 2.01, | |
| "step": 121000 | |
| }, | |
| { | |
| "epoch": 0.77, | |
| "eval_loss": 2.2642598152160645, | |
| "eval_runtime": 19.4522, | |
| "eval_samples_per_second": 102.816, | |
| "eval_steps_per_second": 1.645, | |
| "step": 121000 | |
| }, | |
| { | |
| "epoch": 0.78, | |
| "learning_rate": 1.0955502073838907e-05, | |
| "loss": 2.0074, | |
| "step": 122000 | |
| }, | |
| { | |
| "epoch": 0.78, | |
| "eval_loss": 2.2413113117218018, | |
| "eval_runtime": 18.8078, | |
| "eval_samples_per_second": 106.339, | |
| "eval_steps_per_second": 1.701, | |
| "step": 122000 | |
| }, | |
| { | |
| "epoch": 0.79, | |
| "learning_rate": 1.0635465205591686e-05, | |
| "loss": 1.9898, | |
| "step": 123000 | |
| }, | |
| { | |
| "epoch": 0.79, | |
| "eval_loss": 2.2442147731781006, | |
| "eval_runtime": 18.8179, | |
| "eval_samples_per_second": 106.282, | |
| "eval_steps_per_second": 1.701, | |
| "step": 123000 | |
| }, | |
| { | |
| "epoch": 0.79, | |
| "learning_rate": 1.0315428337344462e-05, | |
| "loss": 2.0119, | |
| "step": 124000 | |
| }, | |
| { | |
| "epoch": 0.79, | |
| "eval_loss": 2.227520704269409, | |
| "eval_runtime": 18.9241, | |
| "eval_samples_per_second": 105.686, | |
| "eval_steps_per_second": 1.691, | |
| "step": 124000 | |
| }, | |
| { | |
| "epoch": 0.8, | |
| "learning_rate": 9.99539146909724e-06, | |
| "loss": 1.993, | |
| "step": 125000 | |
| }, | |
| { | |
| "epoch": 0.8, | |
| "eval_loss": 2.2116286754608154, | |
| "eval_runtime": 19.6066, | |
| "eval_samples_per_second": 102.007, | |
| "eval_steps_per_second": 1.632, | |
| "step": 125000 | |
| }, | |
| { | |
| "epoch": 0.81, | |
| "learning_rate": 9.675354600850019e-06, | |
| "loss": 2.0092, | |
| "step": 126000 | |
| }, | |
| { | |
| "epoch": 0.81, | |
| "eval_loss": 2.2108232975006104, | |
| "eval_runtime": 18.7069, | |
| "eval_samples_per_second": 106.912, | |
| "eval_steps_per_second": 1.711, | |
| "step": 126000 | |
| }, | |
| { | |
| "epoch": 0.81, | |
| "learning_rate": 9.355317732602796e-06, | |
| "loss": 2.0019, | |
| "step": 127000 | |
| }, | |
| { | |
| "epoch": 0.81, | |
| "eval_loss": 2.2236363887786865, | |
| "eval_runtime": 18.8801, | |
| "eval_samples_per_second": 105.931, | |
| "eval_steps_per_second": 1.695, | |
| "step": 127000 | |
| }, | |
| { | |
| "epoch": 0.82, | |
| "learning_rate": 9.035280864355574e-06, | |
| "loss": 1.9931, | |
| "step": 128000 | |
| }, | |
| { | |
| "epoch": 0.82, | |
| "eval_loss": 2.2105228900909424, | |
| "eval_runtime": 21.3819, | |
| "eval_samples_per_second": 93.537, | |
| "eval_steps_per_second": 1.497, | |
| "step": 128000 | |
| }, | |
| { | |
| "epoch": 0.83, | |
| "learning_rate": 8.715243996108352e-06, | |
| "loss": 1.9851, | |
| "step": 129000 | |
| }, | |
| { | |
| "epoch": 0.83, | |
| "eval_loss": 2.2179064750671387, | |
| "eval_runtime": 19.3741, | |
| "eval_samples_per_second": 103.231, | |
| "eval_steps_per_second": 1.652, | |
| "step": 129000 | |
| }, | |
| { | |
| "epoch": 0.83, | |
| "learning_rate": 8.395207127861129e-06, | |
| "loss": 1.9882, | |
| "step": 130000 | |
| }, | |
| { | |
| "epoch": 0.83, | |
| "eval_loss": 2.2303926944732666, | |
| "eval_runtime": 18.846, | |
| "eval_samples_per_second": 106.123, | |
| "eval_steps_per_second": 1.698, | |
| "step": 130000 | |
| }, | |
| { | |
| "epoch": 0.84, | |
| "learning_rate": 8.075170259613907e-06, | |
| "loss": 1.999, | |
| "step": 131000 | |
| }, | |
| { | |
| "epoch": 0.84, | |
| "eval_loss": 2.202813148498535, | |
| "eval_runtime": 19.3498, | |
| "eval_samples_per_second": 103.36, | |
| "eval_steps_per_second": 1.654, | |
| "step": 131000 | |
| }, | |
| { | |
| "epoch": 0.84, | |
| "learning_rate": 7.755133391366686e-06, | |
| "loss": 1.9848, | |
| "step": 132000 | |
| }, | |
| { | |
| "epoch": 0.84, | |
| "eval_loss": 2.1549251079559326, | |
| "eval_runtime": 20.1588, | |
| "eval_samples_per_second": 99.212, | |
| "eval_steps_per_second": 1.587, | |
| "step": 132000 | |
| }, | |
| { | |
| "epoch": 0.85, | |
| "learning_rate": 7.435096523119464e-06, | |
| "loss": 1.9962, | |
| "step": 133000 | |
| }, | |
| { | |
| "epoch": 0.85, | |
| "eval_loss": 2.2457568645477295, | |
| "eval_runtime": 19.1213, | |
| "eval_samples_per_second": 104.595, | |
| "eval_steps_per_second": 1.674, | |
| "step": 133000 | |
| }, | |
| { | |
| "epoch": 0.86, | |
| "learning_rate": 7.115059654872242e-06, | |
| "loss": 1.991, | |
| "step": 134000 | |
| }, | |
| { | |
| "epoch": 0.86, | |
| "eval_loss": 2.1861023902893066, | |
| "eval_runtime": 19.2023, | |
| "eval_samples_per_second": 104.154, | |
| "eval_steps_per_second": 1.666, | |
| "step": 134000 | |
| }, | |
| { | |
| "epoch": 0.86, | |
| "learning_rate": 6.79502278662502e-06, | |
| "loss": 1.9901, | |
| "step": 135000 | |
| }, | |
| { | |
| "epoch": 0.86, | |
| "eval_loss": 2.2025179862976074, | |
| "eval_runtime": 19.2167, | |
| "eval_samples_per_second": 104.076, | |
| "eval_steps_per_second": 1.665, | |
| "step": 135000 | |
| }, | |
| { | |
| "epoch": 0.87, | |
| "learning_rate": 6.474985918377798e-06, | |
| "loss": 1.9698, | |
| "step": 136000 | |
| }, | |
| { | |
| "epoch": 0.87, | |
| "eval_loss": 2.2299790382385254, | |
| "eval_runtime": 20.1153, | |
| "eval_samples_per_second": 99.427, | |
| "eval_steps_per_second": 1.591, | |
| "step": 136000 | |
| }, | |
| { | |
| "epoch": 0.88, | |
| "learning_rate": 6.154949050130575e-06, | |
| "loss": 1.9772, | |
| "step": 137000 | |
| }, | |
| { | |
| "epoch": 0.88, | |
| "eval_loss": 2.1934893131256104, | |
| "eval_runtime": 19.0706, | |
| "eval_samples_per_second": 104.874, | |
| "eval_steps_per_second": 1.678, | |
| "step": 137000 | |
| }, | |
| { | |
| "epoch": 0.88, | |
| "learning_rate": 5.8349121818833536e-06, | |
| "loss": 1.974, | |
| "step": 138000 | |
| }, | |
| { | |
| "epoch": 0.88, | |
| "eval_loss": 2.201178789138794, | |
| "eval_runtime": 18.9851, | |
| "eval_samples_per_second": 105.346, | |
| "eval_steps_per_second": 1.686, | |
| "step": 138000 | |
| }, | |
| { | |
| "epoch": 0.89, | |
| "learning_rate": 5.514875313636131e-06, | |
| "loss": 1.9906, | |
| "step": 139000 | |
| }, | |
| { | |
| "epoch": 0.89, | |
| "eval_loss": 2.2042794227600098, | |
| "eval_runtime": 19.1406, | |
| "eval_samples_per_second": 104.49, | |
| "eval_steps_per_second": 1.672, | |
| "step": 139000 | |
| }, | |
| { | |
| "epoch": 0.9, | |
| "learning_rate": 5.194838445388909e-06, | |
| "loss": 1.9899, | |
| "step": 140000 | |
| }, | |
| { | |
| "epoch": 0.9, | |
| "eval_loss": 2.187676191329956, | |
| "eval_runtime": 19.4746, | |
| "eval_samples_per_second": 102.698, | |
| "eval_steps_per_second": 1.643, | |
| "step": 140000 | |
| }, | |
| { | |
| "epoch": 0.9, | |
| "learning_rate": 4.874801577141687e-06, | |
| "loss": 1.9785, | |
| "step": 141000 | |
| }, | |
| { | |
| "epoch": 0.9, | |
| "eval_loss": 2.2104039192199707, | |
| "eval_runtime": 19.2016, | |
| "eval_samples_per_second": 104.158, | |
| "eval_steps_per_second": 1.667, | |
| "step": 141000 | |
| }, | |
| { | |
| "epoch": 0.91, | |
| "learning_rate": 4.5547647088944646e-06, | |
| "loss": 1.9682, | |
| "step": 142000 | |
| }, | |
| { | |
| "epoch": 0.91, | |
| "eval_loss": 2.1898605823516846, | |
| "eval_runtime": 19.2296, | |
| "eval_samples_per_second": 104.006, | |
| "eval_steps_per_second": 1.664, | |
| "step": 142000 | |
| }, | |
| { | |
| "epoch": 0.92, | |
| "learning_rate": 4.234727840647243e-06, | |
| "loss": 1.9785, | |
| "step": 143000 | |
| }, | |
| { | |
| "epoch": 0.92, | |
| "eval_loss": 2.183152437210083, | |
| "eval_runtime": 19.1118, | |
| "eval_samples_per_second": 104.647, | |
| "eval_steps_per_second": 1.674, | |
| "step": 143000 | |
| }, | |
| { | |
| "epoch": 0.92, | |
| "learning_rate": 3.914690972400021e-06, | |
| "loss": 1.9795, | |
| "step": 144000 | |
| }, | |
| { | |
| "epoch": 0.92, | |
| "eval_loss": 2.199709415435791, | |
| "eval_runtime": 19.352, | |
| "eval_samples_per_second": 103.348, | |
| "eval_steps_per_second": 1.654, | |
| "step": 144000 | |
| }, | |
| { | |
| "epoch": 0.93, | |
| "learning_rate": 3.5946541041527984e-06, | |
| "loss": 1.9656, | |
| "step": 145000 | |
| }, | |
| { | |
| "epoch": 0.93, | |
| "eval_loss": 2.200268507003784, | |
| "eval_runtime": 19.3103, | |
| "eval_samples_per_second": 103.572, | |
| "eval_steps_per_second": 1.657, | |
| "step": 145000 | |
| }, | |
| { | |
| "epoch": 0.93, | |
| "learning_rate": 3.2746172359055764e-06, | |
| "loss": 1.9813, | |
| "step": 146000 | |
| }, | |
| { | |
| "epoch": 0.93, | |
| "eval_loss": 2.1825687885284424, | |
| "eval_runtime": 19.0952, | |
| "eval_samples_per_second": 104.739, | |
| "eval_steps_per_second": 1.676, | |
| "step": 146000 | |
| }, | |
| { | |
| "epoch": 0.94, | |
| "learning_rate": 2.9545803676583543e-06, | |
| "loss": 1.9719, | |
| "step": 147000 | |
| }, | |
| { | |
| "epoch": 0.94, | |
| "eval_loss": 2.1915125846862793, | |
| "eval_runtime": 19.3108, | |
| "eval_samples_per_second": 103.569, | |
| "eval_steps_per_second": 1.657, | |
| "step": 147000 | |
| }, | |
| { | |
| "epoch": 0.95, | |
| "learning_rate": 2.6345434994111323e-06, | |
| "loss": 1.962, | |
| "step": 148000 | |
| }, | |
| { | |
| "epoch": 0.95, | |
| "eval_loss": 2.196523904800415, | |
| "eval_runtime": 19.1234, | |
| "eval_samples_per_second": 104.584, | |
| "eval_steps_per_second": 1.673, | |
| "step": 148000 | |
| }, | |
| { | |
| "epoch": 0.95, | |
| "learning_rate": 2.3145066311639102e-06, | |
| "loss": 1.9657, | |
| "step": 149000 | |
| }, | |
| { | |
| "epoch": 0.95, | |
| "eval_loss": 2.1772007942199707, | |
| "eval_runtime": 19.0921, | |
| "eval_samples_per_second": 104.756, | |
| "eval_steps_per_second": 1.676, | |
| "step": 149000 | |
| }, | |
| { | |
| "epoch": 0.96, | |
| "learning_rate": 1.994469762916688e-06, | |
| "loss": 1.9662, | |
| "step": 150000 | |
| }, | |
| { | |
| "epoch": 0.96, | |
| "eval_loss": 2.151597261428833, | |
| "eval_runtime": 19.1055, | |
| "eval_samples_per_second": 104.682, | |
| "eval_steps_per_second": 1.675, | |
| "step": 150000 | |
| }, | |
| { | |
| "epoch": 0.97, | |
| "learning_rate": 1.674432894669466e-06, | |
| "loss": 1.9631, | |
| "step": 151000 | |
| }, | |
| { | |
| "epoch": 0.97, | |
| "eval_loss": 2.1692702770233154, | |
| "eval_runtime": 19.4031, | |
| "eval_samples_per_second": 103.077, | |
| "eval_steps_per_second": 1.649, | |
| "step": 151000 | |
| }, | |
| { | |
| "epoch": 0.97, | |
| "learning_rate": 1.354396026422244e-06, | |
| "loss": 1.9651, | |
| "step": 152000 | |
| }, | |
| { | |
| "epoch": 0.97, | |
| "eval_loss": 2.174436330795288, | |
| "eval_runtime": 19.3133, | |
| "eval_samples_per_second": 103.555, | |
| "eval_steps_per_second": 1.657, | |
| "step": 152000 | |
| }, | |
| { | |
| "epoch": 0.98, | |
| "learning_rate": 1.0343591581750219e-06, | |
| "loss": 1.9761, | |
| "step": 153000 | |
| }, | |
| { | |
| "epoch": 0.98, | |
| "eval_loss": 2.1922249794006348, | |
| "eval_runtime": 19.2415, | |
| "eval_samples_per_second": 103.942, | |
| "eval_steps_per_second": 1.663, | |
| "step": 153000 | |
| }, | |
| { | |
| "epoch": 0.99, | |
| "learning_rate": 7.143222899277997e-07, | |
| "loss": 1.9602, | |
| "step": 154000 | |
| }, | |
| { | |
| "epoch": 0.99, | |
| "eval_loss": 2.177457571029663, | |
| "eval_runtime": 19.1279, | |
| "eval_samples_per_second": 104.559, | |
| "eval_steps_per_second": 1.673, | |
| "step": 154000 | |
| }, | |
| { | |
| "epoch": 0.99, | |
| "learning_rate": 3.9428542168057766e-07, | |
| "loss": 1.9429, | |
| "step": 155000 | |
| }, | |
| { | |
| "epoch": 0.99, | |
| "eval_loss": 2.167567491531372, | |
| "eval_runtime": 19.5087, | |
| "eval_samples_per_second": 102.518, | |
| "eval_steps_per_second": 1.64, | |
| "step": 155000 | |
| }, | |
| { | |
| "epoch": 1.0, | |
| "learning_rate": 7.424855343335553e-08, | |
| "loss": 1.9662, | |
| "step": 156000 | |
| }, | |
| { | |
| "epoch": 1.0, | |
| "eval_loss": 2.179702043533325, | |
| "eval_runtime": 19.3655, | |
| "eval_samples_per_second": 103.276, | |
| "eval_steps_per_second": 1.652, | |
| "step": 156000 | |
| }, | |
| { | |
| "epoch": 1.0, | |
| "learning_rate": 2.4877105842593068e-05, | |
| "loss": 1.9883, | |
| "step": 157000 | |
| }, | |
| { | |
| "epoch": 1.0, | |
| "eval_loss": 2.1984949111938477, | |
| "eval_runtime": 19.5918, | |
| "eval_samples_per_second": 102.084, | |
| "eval_steps_per_second": 1.633, | |
| "step": 157000 | |
| }, | |
| { | |
| "epoch": 1.01, | |
| "learning_rate": 2.4717087408469457e-05, | |
| "loss": 2.0127, | |
| "step": 158000 | |
| }, | |
| { | |
| "epoch": 1.01, | |
| "eval_loss": 2.264371633529663, | |
| "eval_runtime": 19.1742, | |
| "eval_samples_per_second": 104.307, | |
| "eval_steps_per_second": 1.669, | |
| "step": 158000 | |
| }, | |
| { | |
| "epoch": 1.02, | |
| "learning_rate": 2.4557068974345846e-05, | |
| "loss": 2.013, | |
| "step": 159000 | |
| }, | |
| { | |
| "epoch": 1.02, | |
| "eval_loss": 2.263242721557617, | |
| "eval_runtime": 19.0844, | |
| "eval_samples_per_second": 104.798, | |
| "eval_steps_per_second": 1.677, | |
| "step": 159000 | |
| }, | |
| { | |
| "epoch": 1.02, | |
| "learning_rate": 2.4397050540222236e-05, | |
| "loss": 2.0243, | |
| "step": 160000 | |
| }, | |
| { | |
| "epoch": 1.02, | |
| "eval_loss": 2.267091751098633, | |
| "eval_runtime": 19.3957, | |
| "eval_samples_per_second": 103.116, | |
| "eval_steps_per_second": 1.65, | |
| "step": 160000 | |
| }, | |
| { | |
| "epoch": 1.03, | |
| "learning_rate": 2.423703210609862e-05, | |
| "loss": 2.021, | |
| "step": 161000 | |
| }, | |
| { | |
| "epoch": 1.03, | |
| "eval_loss": 2.2471094131469727, | |
| "eval_runtime": 19.2438, | |
| "eval_samples_per_second": 103.929, | |
| "eval_steps_per_second": 1.663, | |
| "step": 161000 | |
| }, | |
| { | |
| "epoch": 1.04, | |
| "learning_rate": 2.4077013671975014e-05, | |
| "loss": 2.0278, | |
| "step": 162000 | |
| }, | |
| { | |
| "epoch": 1.04, | |
| "eval_loss": 2.2140402793884277, | |
| "eval_runtime": 19.0312, | |
| "eval_samples_per_second": 105.091, | |
| "eval_steps_per_second": 1.681, | |
| "step": 162000 | |
| }, | |
| { | |
| "epoch": 1.04, | |
| "learning_rate": 2.3916995237851403e-05, | |
| "loss": 2.0109, | |
| "step": 163000 | |
| }, | |
| { | |
| "epoch": 1.04, | |
| "eval_loss": 2.2622554302215576, | |
| "eval_runtime": 19.0334, | |
| "eval_samples_per_second": 105.078, | |
| "eval_steps_per_second": 1.681, | |
| "step": 163000 | |
| }, | |
| { | |
| "epoch": 1.05, | |
| "learning_rate": 2.375697680372779e-05, | |
| "loss": 2.023, | |
| "step": 164000 | |
| }, | |
| { | |
| "epoch": 1.05, | |
| "eval_loss": 2.245877981185913, | |
| "eval_runtime": 19.4264, | |
| "eval_samples_per_second": 102.953, | |
| "eval_steps_per_second": 1.647, | |
| "step": 164000 | |
| }, | |
| { | |
| "epoch": 1.06, | |
| "learning_rate": 2.359695836960418e-05, | |
| "loss": 2.0187, | |
| "step": 165000 | |
| }, | |
| { | |
| "epoch": 1.06, | |
| "eval_loss": 2.25624942779541, | |
| "eval_runtime": 19.303, | |
| "eval_samples_per_second": 103.611, | |
| "eval_steps_per_second": 1.658, | |
| "step": 165000 | |
| }, | |
| { | |
| "epoch": 1.06, | |
| "learning_rate": 2.3436939935480567e-05, | |
| "loss": 2.019, | |
| "step": 166000 | |
| }, | |
| { | |
| "epoch": 1.06, | |
| "eval_loss": 2.2587056159973145, | |
| "eval_runtime": 18.8102, | |
| "eval_samples_per_second": 106.325, | |
| "eval_steps_per_second": 1.701, | |
| "step": 166000 | |
| }, | |
| { | |
| "epoch": 1.07, | |
| "learning_rate": 2.3276921501356956e-05, | |
| "loss": 2.0208, | |
| "step": 167000 | |
| }, | |
| { | |
| "epoch": 1.07, | |
| "eval_loss": 2.2842631340026855, | |
| "eval_runtime": 19.22, | |
| "eval_samples_per_second": 104.058, | |
| "eval_steps_per_second": 1.665, | |
| "step": 167000 | |
| }, | |
| { | |
| "epoch": 1.08, | |
| "learning_rate": 2.311690306723335e-05, | |
| "loss": 2.0043, | |
| "step": 168000 | |
| }, | |
| { | |
| "epoch": 1.08, | |
| "eval_loss": 2.2638208866119385, | |
| "eval_runtime": 19.6646, | |
| "eval_samples_per_second": 101.706, | |
| "eval_steps_per_second": 1.627, | |
| "step": 168000 | |
| }, | |
| { | |
| "epoch": 1.08, | |
| "learning_rate": 2.2956884633109735e-05, | |
| "loss": 2.0171, | |
| "step": 169000 | |
| }, | |
| { | |
| "epoch": 1.08, | |
| "eval_loss": 2.2604892253875732, | |
| "eval_runtime": 19.2438, | |
| "eval_samples_per_second": 103.93, | |
| "eval_steps_per_second": 1.663, | |
| "step": 169000 | |
| }, | |
| { | |
| "epoch": 1.09, | |
| "learning_rate": 2.2796866198986124e-05, | |
| "loss": 2.0351, | |
| "step": 170000 | |
| }, | |
| { | |
| "epoch": 1.09, | |
| "eval_loss": 2.2608911991119385, | |
| "eval_runtime": 19.3036, | |
| "eval_samples_per_second": 103.607, | |
| "eval_steps_per_second": 1.658, | |
| "step": 170000 | |
| }, | |
| { | |
| "epoch": 1.09, | |
| "learning_rate": 2.2636847764862513e-05, | |
| "loss": 2.0166, | |
| "step": 171000 | |
| }, | |
| { | |
| "epoch": 1.09, | |
| "eval_loss": 2.2317748069763184, | |
| "eval_runtime": 19.0555, | |
| "eval_samples_per_second": 104.957, | |
| "eval_steps_per_second": 1.679, | |
| "step": 171000 | |
| }, | |
| { | |
| "epoch": 1.1, | |
| "learning_rate": 2.2476829330738902e-05, | |
| "loss": 2.0102, | |
| "step": 172000 | |
| }, | |
| { | |
| "epoch": 1.1, | |
| "eval_loss": 2.2210681438446045, | |
| "eval_runtime": 19.4253, | |
| "eval_samples_per_second": 102.958, | |
| "eval_steps_per_second": 1.647, | |
| "step": 172000 | |
| }, | |
| { | |
| "epoch": 1.11, | |
| "learning_rate": 2.231681089661529e-05, | |
| "loss": 2.0226, | |
| "step": 173000 | |
| }, | |
| { | |
| "epoch": 1.11, | |
| "eval_loss": 2.2446329593658447, | |
| "eval_runtime": 19.1758, | |
| "eval_samples_per_second": 104.298, | |
| "eval_steps_per_second": 1.669, | |
| "step": 173000 | |
| }, | |
| { | |
| "epoch": 1.11, | |
| "learning_rate": 2.215679246249168e-05, | |
| "loss": 2.0293, | |
| "step": 174000 | |
| }, | |
| { | |
| "epoch": 1.11, | |
| "eval_loss": 2.2327494621276855, | |
| "eval_runtime": 19.0577, | |
| "eval_samples_per_second": 104.945, | |
| "eval_steps_per_second": 1.679, | |
| "step": 174000 | |
| }, | |
| { | |
| "epoch": 1.12, | |
| "learning_rate": 2.199677402836807e-05, | |
| "loss": 2.0269, | |
| "step": 175000 | |
| }, | |
| { | |
| "epoch": 1.12, | |
| "eval_loss": 2.223355293273926, | |
| "eval_runtime": 19.372, | |
| "eval_samples_per_second": 103.242, | |
| "eval_steps_per_second": 1.652, | |
| "step": 175000 | |
| }, | |
| { | |
| "epoch": 1.13, | |
| "learning_rate": 2.1836755594244456e-05, | |
| "loss": 2.0232, | |
| "step": 176000 | |
| }, | |
| { | |
| "epoch": 1.13, | |
| "eval_loss": 2.2283060550689697, | |
| "eval_runtime": 19.4986, | |
| "eval_samples_per_second": 102.572, | |
| "eval_steps_per_second": 1.641, | |
| "step": 176000 | |
| }, | |
| { | |
| "epoch": 1.13, | |
| "learning_rate": 2.167673716012085e-05, | |
| "loss": 2.0155, | |
| "step": 177000 | |
| }, | |
| { | |
| "epoch": 1.13, | |
| "eval_loss": 2.241269588470459, | |
| "eval_runtime": 19.5594, | |
| "eval_samples_per_second": 102.253, | |
| "eval_steps_per_second": 1.636, | |
| "step": 177000 | |
| }, | |
| { | |
| "epoch": 1.14, | |
| "learning_rate": 2.1516718725997238e-05, | |
| "loss": 2.0148, | |
| "step": 178000 | |
| }, | |
| { | |
| "epoch": 1.14, | |
| "eval_loss": 2.2584030628204346, | |
| "eval_runtime": 18.9767, | |
| "eval_samples_per_second": 105.392, | |
| "eval_steps_per_second": 1.686, | |
| "step": 178000 | |
| }, | |
| { | |
| "epoch": 1.15, | |
| "learning_rate": 2.1356700291873623e-05, | |
| "loss": 2.0167, | |
| "step": 179000 | |
| }, | |
| { | |
| "epoch": 1.15, | |
| "eval_loss": 2.2308297157287598, | |
| "eval_runtime": 19.318, | |
| "eval_samples_per_second": 103.531, | |
| "eval_steps_per_second": 1.656, | |
| "step": 179000 | |
| }, | |
| { | |
| "epoch": 1.15, | |
| "learning_rate": 2.1196681857750016e-05, | |
| "loss": 2.0204, | |
| "step": 180000 | |
| }, | |
| { | |
| "epoch": 1.15, | |
| "eval_loss": 2.2320470809936523, | |
| "eval_runtime": 19.5088, | |
| "eval_samples_per_second": 102.518, | |
| "eval_steps_per_second": 1.64, | |
| "step": 180000 | |
| }, | |
| { | |
| "epoch": 1.16, | |
| "learning_rate": 2.1036663423626402e-05, | |
| "loss": 2.014, | |
| "step": 181000 | |
| }, | |
| { | |
| "epoch": 1.16, | |
| "eval_loss": 2.25752854347229, | |
| "eval_runtime": 19.1454, | |
| "eval_samples_per_second": 104.464, | |
| "eval_steps_per_second": 1.671, | |
| "step": 181000 | |
| }, | |
| { | |
| "epoch": 1.16, | |
| "learning_rate": 2.087664498950279e-05, | |
| "loss": 2.0149, | |
| "step": 182000 | |
| }, | |
| { | |
| "epoch": 1.16, | |
| "eval_loss": 2.2161190509796143, | |
| "eval_runtime": 19.0879, | |
| "eval_samples_per_second": 104.779, | |
| "eval_steps_per_second": 1.676, | |
| "step": 182000 | |
| }, | |
| { | |
| "epoch": 1.17, | |
| "learning_rate": 2.071662655537918e-05, | |
| "loss": 2.0082, | |
| "step": 183000 | |
| }, | |
| { | |
| "epoch": 1.17, | |
| "eval_loss": 2.2062742710113525, | |
| "eval_runtime": 19.2713, | |
| "eval_samples_per_second": 103.781, | |
| "eval_steps_per_second": 1.66, | |
| "step": 183000 | |
| }, | |
| { | |
| "epoch": 1.18, | |
| "learning_rate": 2.055660812125557e-05, | |
| "loss": 2.0017, | |
| "step": 184000 | |
| }, | |
| { | |
| "epoch": 1.18, | |
| "eval_loss": 2.2289586067199707, | |
| "eval_runtime": 19.4499, | |
| "eval_samples_per_second": 102.828, | |
| "eval_steps_per_second": 1.645, | |
| "step": 184000 | |
| }, | |
| { | |
| "epoch": 1.18, | |
| "learning_rate": 2.039658968713196e-05, | |
| "loss": 2.0146, | |
| "step": 185000 | |
| }, | |
| { | |
| "epoch": 1.18, | |
| "eval_loss": 2.2288384437561035, | |
| "eval_runtime": 19.335, | |
| "eval_samples_per_second": 103.439, | |
| "eval_steps_per_second": 1.655, | |
| "step": 185000 | |
| }, | |
| { | |
| "epoch": 1.19, | |
| "learning_rate": 2.0236571253008348e-05, | |
| "loss": 2.024, | |
| "step": 186000 | |
| }, | |
| { | |
| "epoch": 1.19, | |
| "eval_loss": 2.194934606552124, | |
| "eval_runtime": 19.5009, | |
| "eval_samples_per_second": 102.559, | |
| "eval_steps_per_second": 1.641, | |
| "step": 186000 | |
| }, | |
| { | |
| "epoch": 1.2, | |
| "learning_rate": 2.0076552818884737e-05, | |
| "loss": 2.0016, | |
| "step": 187000 | |
| }, | |
| { | |
| "epoch": 1.2, | |
| "eval_loss": 2.197631597518921, | |
| "eval_runtime": 19.2128, | |
| "eval_samples_per_second": 104.097, | |
| "eval_steps_per_second": 1.666, | |
| "step": 187000 | |
| }, | |
| { | |
| "epoch": 1.2, | |
| "learning_rate": 1.9916534384761126e-05, | |
| "loss": 2.0066, | |
| "step": 188000 | |
| }, | |
| { | |
| "epoch": 1.2, | |
| "eval_loss": 2.238746166229248, | |
| "eval_runtime": 19.4524, | |
| "eval_samples_per_second": 102.815, | |
| "eval_steps_per_second": 1.645, | |
| "step": 188000 | |
| }, | |
| { | |
| "epoch": 1.21, | |
| "learning_rate": 1.9756515950637515e-05, | |
| "loss": 2.0168, | |
| "step": 189000 | |
| }, | |
| { | |
| "epoch": 1.21, | |
| "eval_loss": 2.2261757850646973, | |
| "eval_runtime": 19.645, | |
| "eval_samples_per_second": 101.807, | |
| "eval_steps_per_second": 1.629, | |
| "step": 189000 | |
| }, | |
| { | |
| "epoch": 1.22, | |
| "learning_rate": 1.9596497516513904e-05, | |
| "loss": 2.0023, | |
| "step": 190000 | |
| }, | |
| { | |
| "epoch": 1.22, | |
| "eval_loss": 2.2070722579956055, | |
| "eval_runtime": 19.0874, | |
| "eval_samples_per_second": 104.781, | |
| "eval_steps_per_second": 1.676, | |
| "step": 190000 | |
| }, | |
| { | |
| "epoch": 1.22, | |
| "learning_rate": 1.943647908239029e-05, | |
| "loss": 1.9917, | |
| "step": 191000 | |
| }, | |
| { | |
| "epoch": 1.22, | |
| "eval_loss": 2.2613461017608643, | |
| "eval_runtime": 19.1099, | |
| "eval_samples_per_second": 104.658, | |
| "eval_steps_per_second": 1.675, | |
| "step": 191000 | |
| }, | |
| { | |
| "epoch": 1.23, | |
| "learning_rate": 1.927646064826668e-05, | |
| "loss": 2.01, | |
| "step": 192000 | |
| }, | |
| { | |
| "epoch": 1.23, | |
| "eval_loss": 2.2324349880218506, | |
| "eval_runtime": 20.8611, | |
| "eval_samples_per_second": 95.872, | |
| "eval_steps_per_second": 1.534, | |
| "step": 192000 | |
| }, | |
| { | |
| "epoch": 1.24, | |
| "learning_rate": 1.9116442214143072e-05, | |
| "loss": 2.0023, | |
| "step": 193000 | |
| }, | |
| { | |
| "epoch": 1.24, | |
| "eval_loss": 2.2707834243774414, | |
| "eval_runtime": 19.7356, | |
| "eval_samples_per_second": 101.34, | |
| "eval_steps_per_second": 1.621, | |
| "step": 193000 | |
| }, | |
| { | |
| "epoch": 1.24, | |
| "learning_rate": 1.8956423780019458e-05, | |
| "loss": 2.0037, | |
| "step": 194000 | |
| }, | |
| { | |
| "epoch": 1.24, | |
| "eval_loss": 2.2384769916534424, | |
| "eval_runtime": 19.0414, | |
| "eval_samples_per_second": 105.034, | |
| "eval_steps_per_second": 1.681, | |
| "step": 194000 | |
| }, | |
| { | |
| "epoch": 1.25, | |
| "learning_rate": 1.8796405345895847e-05, | |
| "loss": 1.9994, | |
| "step": 195000 | |
| }, | |
| { | |
| "epoch": 1.25, | |
| "eval_loss": 2.192796230316162, | |
| "eval_runtime": 19.0496, | |
| "eval_samples_per_second": 104.989, | |
| "eval_steps_per_second": 1.68, | |
| "step": 195000 | |
| }, | |
| { | |
| "epoch": 1.25, | |
| "learning_rate": 1.863638691177224e-05, | |
| "loss": 1.994, | |
| "step": 196000 | |
| }, | |
| { | |
| "epoch": 1.25, | |
| "eval_loss": 2.170961618423462, | |
| "eval_runtime": 19.6903, | |
| "eval_samples_per_second": 101.573, | |
| "eval_steps_per_second": 1.625, | |
| "step": 196000 | |
| }, | |
| { | |
| "epoch": 1.26, | |
| "learning_rate": 1.8476368477648625e-05, | |
| "loss": 2.0016, | |
| "step": 197000 | |
| }, | |
| { | |
| "epoch": 1.26, | |
| "eval_loss": 2.2660317420959473, | |
| "eval_runtime": 19.6654, | |
| "eval_samples_per_second": 101.702, | |
| "eval_steps_per_second": 1.627, | |
| "step": 197000 | |
| }, | |
| { | |
| "epoch": 1.27, | |
| "learning_rate": 1.8316350043525015e-05, | |
| "loss": 2.0044, | |
| "step": 198000 | |
| }, | |
| { | |
| "epoch": 1.27, | |
| "eval_loss": 2.204163074493408, | |
| "eval_runtime": 18.9759, | |
| "eval_samples_per_second": 105.397, | |
| "eval_steps_per_second": 1.686, | |
| "step": 198000 | |
| }, | |
| { | |
| "epoch": 1.27, | |
| "learning_rate": 1.8156331609401404e-05, | |
| "loss": 1.9962, | |
| "step": 199000 | |
| }, | |
| { | |
| "epoch": 1.27, | |
| "eval_loss": 2.214494228363037, | |
| "eval_runtime": 19.1044, | |
| "eval_samples_per_second": 104.688, | |
| "eval_steps_per_second": 1.675, | |
| "step": 199000 | |
| }, | |
| { | |
| "epoch": 1.28, | |
| "learning_rate": 1.7996313175277793e-05, | |
| "loss": 2.002, | |
| "step": 200000 | |
| }, | |
| { | |
| "epoch": 1.28, | |
| "eval_loss": 2.231771230697632, | |
| "eval_runtime": 19.3683, | |
| "eval_samples_per_second": 103.262, | |
| "eval_steps_per_second": 1.652, | |
| "step": 200000 | |
| }, | |
| { | |
| "epoch": 1.29, | |
| "learning_rate": 1.7836294741154182e-05, | |
| "loss": 1.9933, | |
| "step": 201000 | |
| }, | |
| { | |
| "epoch": 1.29, | |
| "eval_loss": 2.2037816047668457, | |
| "eval_runtime": 19.3894, | |
| "eval_samples_per_second": 103.149, | |
| "eval_steps_per_second": 1.65, | |
| "step": 201000 | |
| }, | |
| { | |
| "epoch": 1.29, | |
| "learning_rate": 1.767627630703057e-05, | |
| "loss": 2.01, | |
| "step": 202000 | |
| }, | |
| { | |
| "epoch": 1.29, | |
| "eval_loss": 2.1932146549224854, | |
| "eval_runtime": 19.0804, | |
| "eval_samples_per_second": 104.819, | |
| "eval_steps_per_second": 1.677, | |
| "step": 202000 | |
| }, | |
| { | |
| "epoch": 1.3, | |
| "learning_rate": 1.751625787290696e-05, | |
| "loss": 1.9876, | |
| "step": 203000 | |
| }, | |
| { | |
| "epoch": 1.3, | |
| "eval_loss": 2.1909868717193604, | |
| "eval_runtime": 19.2334, | |
| "eval_samples_per_second": 103.986, | |
| "eval_steps_per_second": 1.664, | |
| "step": 203000 | |
| }, | |
| { | |
| "epoch": 1.31, | |
| "learning_rate": 1.7356239438783346e-05, | |
| "loss": 1.9959, | |
| "step": 204000 | |
| }, | |
| { | |
| "epoch": 1.31, | |
| "eval_loss": 2.226149559020996, | |
| "eval_runtime": 19.403, | |
| "eval_samples_per_second": 103.077, | |
| "eval_steps_per_second": 1.649, | |
| "step": 204000 | |
| }, | |
| { | |
| "epoch": 1.31, | |
| "learning_rate": 1.719622100465974e-05, | |
| "loss": 1.9966, | |
| "step": 205000 | |
| }, | |
| { | |
| "epoch": 1.31, | |
| "eval_loss": 2.250934600830078, | |
| "eval_runtime": 19.4964, | |
| "eval_samples_per_second": 102.583, | |
| "eval_steps_per_second": 1.641, | |
| "step": 205000 | |
| }, | |
| { | |
| "epoch": 1.32, | |
| "learning_rate": 1.7036202570536128e-05, | |
| "loss": 2.001, | |
| "step": 206000 | |
| }, | |
| { | |
| "epoch": 1.32, | |
| "eval_loss": 2.1994211673736572, | |
| "eval_runtime": 19.1839, | |
| "eval_samples_per_second": 104.254, | |
| "eval_steps_per_second": 1.668, | |
| "step": 206000 | |
| }, | |
| { | |
| "epoch": 1.32, | |
| "learning_rate": 1.6876184136412514e-05, | |
| "loss": 1.9883, | |
| "step": 207000 | |
| }, | |
| { | |
| "epoch": 1.32, | |
| "eval_loss": 2.196751356124878, | |
| "eval_runtime": 19.6979, | |
| "eval_samples_per_second": 101.534, | |
| "eval_steps_per_second": 1.625, | |
| "step": 207000 | |
| }, | |
| { | |
| "epoch": 1.33, | |
| "learning_rate": 1.6716165702288906e-05, | |
| "loss": 1.9968, | |
| "step": 208000 | |
| }, | |
| { | |
| "epoch": 1.33, | |
| "eval_loss": 2.248135805130005, | |
| "eval_runtime": 19.2411, | |
| "eval_samples_per_second": 103.944, | |
| "eval_steps_per_second": 1.663, | |
| "step": 208000 | |
| }, | |
| { | |
| "epoch": 1.34, | |
| "learning_rate": 1.6556147268165292e-05, | |
| "loss": 1.9951, | |
| "step": 209000 | |
| }, | |
| { | |
| "epoch": 1.34, | |
| "eval_loss": 2.213362216949463, | |
| "eval_runtime": 19.146, | |
| "eval_samples_per_second": 104.46, | |
| "eval_steps_per_second": 1.671, | |
| "step": 209000 | |
| }, | |
| { | |
| "epoch": 1.34, | |
| "learning_rate": 1.639612883404168e-05, | |
| "loss": 1.9941, | |
| "step": 210000 | |
| }, | |
| { | |
| "epoch": 1.34, | |
| "eval_loss": 2.219302177429199, | |
| "eval_runtime": 19.0054, | |
| "eval_samples_per_second": 105.233, | |
| "eval_steps_per_second": 1.684, | |
| "step": 210000 | |
| }, | |
| { | |
| "epoch": 1.35, | |
| "learning_rate": 1.6236110399918074e-05, | |
| "loss": 1.9875, | |
| "step": 211000 | |
| }, | |
| { | |
| "epoch": 1.35, | |
| "eval_loss": 2.2148916721343994, | |
| "eval_runtime": 19.4732, | |
| "eval_samples_per_second": 102.705, | |
| "eval_steps_per_second": 1.643, | |
| "step": 211000 | |
| }, | |
| { | |
| "epoch": 1.36, | |
| "learning_rate": 1.607609196579446e-05, | |
| "loss": 2.0026, | |
| "step": 212000 | |
| }, | |
| { | |
| "epoch": 1.36, | |
| "eval_loss": 2.197999954223633, | |
| "eval_runtime": 19.3649, | |
| "eval_samples_per_second": 103.28, | |
| "eval_steps_per_second": 1.652, | |
| "step": 212000 | |
| }, | |
| { | |
| "epoch": 1.36, | |
| "learning_rate": 1.591607353167085e-05, | |
| "loss": 1.9908, | |
| "step": 213000 | |
| }, | |
| { | |
| "epoch": 1.36, | |
| "eval_loss": 2.2245354652404785, | |
| "eval_runtime": 19.4688, | |
| "eval_samples_per_second": 102.728, | |
| "eval_steps_per_second": 1.644, | |
| "step": 213000 | |
| }, | |
| { | |
| "epoch": 1.37, | |
| "learning_rate": 1.5756055097547238e-05, | |
| "loss": 1.979, | |
| "step": 214000 | |
| }, | |
| { | |
| "epoch": 1.37, | |
| "eval_loss": 2.186586856842041, | |
| "eval_runtime": 19.6234, | |
| "eval_samples_per_second": 101.919, | |
| "eval_steps_per_second": 1.631, | |
| "step": 214000 | |
| }, | |
| { | |
| "epoch": 1.38, | |
| "learning_rate": 1.5596036663423627e-05, | |
| "loss": 1.99, | |
| "step": 215000 | |
| }, | |
| { | |
| "epoch": 1.38, | |
| "eval_loss": 2.182631015777588, | |
| "eval_runtime": 19.4018, | |
| "eval_samples_per_second": 103.083, | |
| "eval_steps_per_second": 1.649, | |
| "step": 215000 | |
| }, | |
| { | |
| "epoch": 1.38, | |
| "learning_rate": 1.5436018229300017e-05, | |
| "loss": 1.9816, | |
| "step": 216000 | |
| }, | |
| { | |
| "epoch": 1.38, | |
| "eval_loss": 2.187858819961548, | |
| "eval_runtime": 19.4098, | |
| "eval_samples_per_second": 103.041, | |
| "eval_steps_per_second": 1.649, | |
| "step": 216000 | |
| }, | |
| { | |
| "epoch": 1.39, | |
| "learning_rate": 1.5275999795176406e-05, | |
| "loss": 1.989, | |
| "step": 217000 | |
| }, | |
| { | |
| "epoch": 1.39, | |
| "eval_loss": 2.232002019882202, | |
| "eval_runtime": 19.4529, | |
| "eval_samples_per_second": 102.813, | |
| "eval_steps_per_second": 1.645, | |
| "step": 217000 | |
| }, | |
| { | |
| "epoch": 1.4, | |
| "learning_rate": 1.5115981361052795e-05, | |
| "loss": 1.9931, | |
| "step": 218000 | |
| }, | |
| { | |
| "epoch": 1.4, | |
| "eval_loss": 2.1929688453674316, | |
| "eval_runtime": 19.3402, | |
| "eval_samples_per_second": 103.411, | |
| "eval_steps_per_second": 1.655, | |
| "step": 218000 | |
| }, | |
| { | |
| "epoch": 1.4, | |
| "learning_rate": 1.4955962926929182e-05, | |
| "loss": 1.9804, | |
| "step": 219000 | |
| }, | |
| { | |
| "epoch": 1.4, | |
| "eval_loss": 2.2313404083251953, | |
| "eval_runtime": 19.6691, | |
| "eval_samples_per_second": 101.682, | |
| "eval_steps_per_second": 1.627, | |
| "step": 219000 | |
| }, | |
| { | |
| "epoch": 1.41, | |
| "learning_rate": 1.4795944492805572e-05, | |
| "loss": 1.9902, | |
| "step": 220000 | |
| }, | |
| { | |
| "epoch": 1.41, | |
| "eval_loss": 2.1808815002441406, | |
| "eval_runtime": 19.8875, | |
| "eval_samples_per_second": 100.566, | |
| "eval_steps_per_second": 1.609, | |
| "step": 220000 | |
| }, | |
| { | |
| "epoch": 1.41, | |
| "learning_rate": 1.4635926058681963e-05, | |
| "loss": 1.9791, | |
| "step": 221000 | |
| }, | |
| { | |
| "epoch": 1.41, | |
| "eval_loss": 2.1454262733459473, | |
| "eval_runtime": 19.9595, | |
| "eval_samples_per_second": 100.203, | |
| "eval_steps_per_second": 1.603, | |
| "step": 221000 | |
| }, | |
| { | |
| "epoch": 1.42, | |
| "learning_rate": 1.4475907624558348e-05, | |
| "loss": 1.9702, | |
| "step": 222000 | |
| }, | |
| { | |
| "epoch": 1.42, | |
| "eval_loss": 2.220078468322754, | |
| "eval_runtime": 19.5477, | |
| "eval_samples_per_second": 102.314, | |
| "eval_steps_per_second": 1.637, | |
| "step": 222000 | |
| }, | |
| { | |
| "epoch": 1.43, | |
| "learning_rate": 1.431588919043474e-05, | |
| "loss": 1.9848, | |
| "step": 223000 | |
| }, | |
| { | |
| "epoch": 1.43, | |
| "eval_loss": 2.198873281478882, | |
| "eval_runtime": 19.8165, | |
| "eval_samples_per_second": 100.926, | |
| "eval_steps_per_second": 1.615, | |
| "step": 223000 | |
| }, | |
| { | |
| "epoch": 1.43, | |
| "learning_rate": 1.4155870756311127e-05, | |
| "loss": 1.9813, | |
| "step": 224000 | |
| }, | |
| { | |
| "epoch": 1.43, | |
| "eval_loss": 2.197327136993408, | |
| "eval_runtime": 21.9598, | |
| "eval_samples_per_second": 91.076, | |
| "eval_steps_per_second": 1.457, | |
| "step": 224000 | |
| }, | |
| { | |
| "epoch": 1.44, | |
| "learning_rate": 1.3995852322187516e-05, | |
| "loss": 1.9784, | |
| "step": 225000 | |
| }, | |
| { | |
| "epoch": 1.44, | |
| "eval_loss": 2.189138889312744, | |
| "eval_runtime": 19.3319, | |
| "eval_samples_per_second": 103.456, | |
| "eval_steps_per_second": 1.655, | |
| "step": 225000 | |
| }, | |
| { | |
| "epoch": 1.45, | |
| "learning_rate": 1.3835833888063907e-05, | |
| "loss": 1.9766, | |
| "step": 226000 | |
| }, | |
| { | |
| "epoch": 1.45, | |
| "eval_loss": 2.20912504196167, | |
| "eval_runtime": 19.5253, | |
| "eval_samples_per_second": 102.431, | |
| "eval_steps_per_second": 1.639, | |
| "step": 226000 | |
| }, | |
| { | |
| "epoch": 1.45, | |
| "learning_rate": 1.3675815453940294e-05, | |
| "loss": 1.9732, | |
| "step": 227000 | |
| }, | |
| { | |
| "epoch": 1.45, | |
| "eval_loss": 2.140838384628296, | |
| "eval_runtime": 19.1497, | |
| "eval_samples_per_second": 104.44, | |
| "eval_steps_per_second": 1.671, | |
| "step": 227000 | |
| }, | |
| { | |
| "epoch": 1.46, | |
| "learning_rate": 1.3515797019816683e-05, | |
| "loss": 1.9621, | |
| "step": 228000 | |
| }, | |
| { | |
| "epoch": 1.46, | |
| "eval_loss": 2.226170063018799, | |
| "eval_runtime": 19.0166, | |
| "eval_samples_per_second": 105.171, | |
| "eval_steps_per_second": 1.683, | |
| "step": 228000 | |
| }, | |
| { | |
| "epoch": 1.47, | |
| "learning_rate": 1.3355778585693071e-05, | |
| "loss": 1.9739, | |
| "step": 229000 | |
| }, | |
| { | |
| "epoch": 1.47, | |
| "eval_loss": 2.2281548976898193, | |
| "eval_runtime": 19.3581, | |
| "eval_samples_per_second": 103.316, | |
| "eval_steps_per_second": 1.653, | |
| "step": 229000 | |
| }, | |
| { | |
| "epoch": 1.47, | |
| "learning_rate": 1.3195760151569462e-05, | |
| "loss": 1.968, | |
| "step": 230000 | |
| }, | |
| { | |
| "epoch": 1.47, | |
| "eval_loss": 2.205911636352539, | |
| "eval_runtime": 19.2592, | |
| "eval_samples_per_second": 103.846, | |
| "eval_steps_per_second": 1.662, | |
| "step": 230000 | |
| }, | |
| { | |
| "epoch": 1.48, | |
| "learning_rate": 1.3035741717445851e-05, | |
| "loss": 1.9656, | |
| "step": 231000 | |
| }, | |
| { | |
| "epoch": 1.48, | |
| "eval_loss": 2.2183620929718018, | |
| "eval_runtime": 19.2973, | |
| "eval_samples_per_second": 103.641, | |
| "eval_steps_per_second": 1.658, | |
| "step": 231000 | |
| }, | |
| { | |
| "epoch": 1.48, | |
| "learning_rate": 1.2875723283322239e-05, | |
| "loss": 1.9728, | |
| "step": 232000 | |
| }, | |
| { | |
| "epoch": 1.48, | |
| "eval_loss": 2.1920948028564453, | |
| "eval_runtime": 19.4211, | |
| "eval_samples_per_second": 102.981, | |
| "eval_steps_per_second": 1.648, | |
| "step": 232000 | |
| }, | |
| { | |
| "epoch": 1.49, | |
| "learning_rate": 1.271570484919863e-05, | |
| "loss": 1.9577, | |
| "step": 233000 | |
| }, | |
| { | |
| "epoch": 1.49, | |
| "eval_loss": 2.191782236099243, | |
| "eval_runtime": 19.3617, | |
| "eval_samples_per_second": 103.296, | |
| "eval_steps_per_second": 1.653, | |
| "step": 233000 | |
| }, | |
| { | |
| "epoch": 1.5, | |
| "learning_rate": 1.2555686415075015e-05, | |
| "loss": 1.9777, | |
| "step": 234000 | |
| }, | |
| { | |
| "epoch": 1.5, | |
| "eval_loss": 2.209336042404175, | |
| "eval_runtime": 19.3939, | |
| "eval_samples_per_second": 103.125, | |
| "eval_steps_per_second": 1.65, | |
| "step": 234000 | |
| }, | |
| { | |
| "epoch": 1.5, | |
| "learning_rate": 1.2395667980951406e-05, | |
| "loss": 1.9662, | |
| "step": 235000 | |
| }, | |
| { | |
| "epoch": 1.5, | |
| "eval_loss": 2.152353048324585, | |
| "eval_runtime": 19.7245, | |
| "eval_samples_per_second": 101.397, | |
| "eval_steps_per_second": 1.622, | |
| "step": 235000 | |
| }, | |
| { | |
| "epoch": 1.51, | |
| "learning_rate": 1.2235649546827795e-05, | |
| "loss": 1.9681, | |
| "step": 236000 | |
| }, | |
| { | |
| "epoch": 1.51, | |
| "eval_loss": 2.1999175548553467, | |
| "eval_runtime": 18.9532, | |
| "eval_samples_per_second": 105.523, | |
| "eval_steps_per_second": 1.688, | |
| "step": 236000 | |
| }, | |
| { | |
| "epoch": 1.52, | |
| "learning_rate": 1.2075631112704184e-05, | |
| "loss": 1.9543, | |
| "step": 237000 | |
| }, | |
| { | |
| "epoch": 1.52, | |
| "eval_loss": 2.1981661319732666, | |
| "eval_runtime": 19.2785, | |
| "eval_samples_per_second": 103.742, | |
| "eval_steps_per_second": 1.66, | |
| "step": 237000 | |
| }, | |
| { | |
| "epoch": 1.52, | |
| "learning_rate": 1.1915612678580574e-05, | |
| "loss": 1.9636, | |
| "step": 238000 | |
| }, | |
| { | |
| "epoch": 1.52, | |
| "eval_loss": 2.197685956954956, | |
| "eval_runtime": 19.3506, | |
| "eval_samples_per_second": 103.356, | |
| "eval_steps_per_second": 1.654, | |
| "step": 238000 | |
| }, | |
| { | |
| "epoch": 1.53, | |
| "learning_rate": 1.1755594244456961e-05, | |
| "loss": 1.9623, | |
| "step": 239000 | |
| }, | |
| { | |
| "epoch": 1.53, | |
| "eval_loss": 2.207620620727539, | |
| "eval_runtime": 19.1912, | |
| "eval_samples_per_second": 104.214, | |
| "eval_steps_per_second": 1.667, | |
| "step": 239000 | |
| }, | |
| { | |
| "epoch": 1.54, | |
| "learning_rate": 1.159557581033335e-05, | |
| "loss": 1.9645, | |
| "step": 240000 | |
| }, | |
| { | |
| "epoch": 1.54, | |
| "eval_loss": 2.1756386756896973, | |
| "eval_runtime": 19.1978, | |
| "eval_samples_per_second": 104.178, | |
| "eval_steps_per_second": 1.667, | |
| "step": 240000 | |
| }, | |
| { | |
| "epoch": 1.54, | |
| "learning_rate": 1.143555737620974e-05, | |
| "loss": 1.9676, | |
| "step": 241000 | |
| }, | |
| { | |
| "epoch": 1.54, | |
| "eval_loss": 2.1699678897857666, | |
| "eval_runtime": 19.2027, | |
| "eval_samples_per_second": 104.152, | |
| "eval_steps_per_second": 1.666, | |
| "step": 241000 | |
| }, | |
| { | |
| "epoch": 1.55, | |
| "learning_rate": 1.1275538942086129e-05, | |
| "loss": 1.9552, | |
| "step": 242000 | |
| }, | |
| { | |
| "epoch": 1.55, | |
| "eval_loss": 2.1813385486602783, | |
| "eval_runtime": 19.1939, | |
| "eval_samples_per_second": 104.2, | |
| "eval_steps_per_second": 1.667, | |
| "step": 242000 | |
| }, | |
| { | |
| "epoch": 1.56, | |
| "learning_rate": 1.1115520507962518e-05, | |
| "loss": 1.9675, | |
| "step": 243000 | |
| }, | |
| { | |
| "epoch": 1.56, | |
| "eval_loss": 2.1804428100585938, | |
| "eval_runtime": 19.3246, | |
| "eval_samples_per_second": 103.495, | |
| "eval_steps_per_second": 1.656, | |
| "step": 243000 | |
| }, | |
| { | |
| "epoch": 1.56, | |
| "learning_rate": 1.0955502073838907e-05, | |
| "loss": 1.9707, | |
| "step": 244000 | |
| }, | |
| { | |
| "epoch": 1.56, | |
| "eval_loss": 2.1776347160339355, | |
| "eval_runtime": 19.4613, | |
| "eval_samples_per_second": 102.768, | |
| "eval_steps_per_second": 1.644, | |
| "step": 244000 | |
| }, | |
| { | |
| "epoch": 1.57, | |
| "learning_rate": 1.0795483639715295e-05, | |
| "loss": 1.9609, | |
| "step": 245000 | |
| }, | |
| { | |
| "epoch": 1.57, | |
| "eval_loss": 2.2101809978485107, | |
| "eval_runtime": 19.232, | |
| "eval_samples_per_second": 103.993, | |
| "eval_steps_per_second": 1.664, | |
| "step": 245000 | |
| }, | |
| { | |
| "epoch": 1.57, | |
| "learning_rate": 1.0635465205591686e-05, | |
| "loss": 1.9584, | |
| "step": 246000 | |
| }, | |
| { | |
| "epoch": 1.57, | |
| "eval_loss": 2.18208384513855, | |
| "eval_runtime": 19.1408, | |
| "eval_samples_per_second": 104.489, | |
| "eval_steps_per_second": 1.672, | |
| "step": 246000 | |
| }, | |
| { | |
| "epoch": 1.58, | |
| "learning_rate": 1.0475446771468075e-05, | |
| "loss": 1.9568, | |
| "step": 247000 | |
| }, | |
| { | |
| "epoch": 1.58, | |
| "eval_loss": 2.164984941482544, | |
| "eval_runtime": 19.2986, | |
| "eval_samples_per_second": 103.634, | |
| "eval_steps_per_second": 1.658, | |
| "step": 247000 | |
| }, | |
| { | |
| "epoch": 1.59, | |
| "learning_rate": 1.0315428337344462e-05, | |
| "loss": 1.9514, | |
| "step": 248000 | |
| }, | |
| { | |
| "epoch": 1.59, | |
| "eval_loss": 2.218735456466675, | |
| "eval_runtime": 19.5707, | |
| "eval_samples_per_second": 102.193, | |
| "eval_steps_per_second": 1.635, | |
| "step": 248000 | |
| }, | |
| { | |
| "epoch": 1.59, | |
| "learning_rate": 1.0155409903220851e-05, | |
| "loss": 1.9567, | |
| "step": 249000 | |
| }, | |
| { | |
| "epoch": 1.59, | |
| "eval_loss": 2.1572988033294678, | |
| "eval_runtime": 19.0634, | |
| "eval_samples_per_second": 104.913, | |
| "eval_steps_per_second": 1.679, | |
| "step": 249000 | |
| }, | |
| { | |
| "epoch": 1.6, | |
| "learning_rate": 9.99539146909724e-06, | |
| "loss": 1.9555, | |
| "step": 250000 | |
| }, | |
| { | |
| "epoch": 1.6, | |
| "eval_loss": 2.1475002765655518, | |
| "eval_runtime": 19.0267, | |
| "eval_samples_per_second": 105.115, | |
| "eval_steps_per_second": 1.682, | |
| "step": 250000 | |
| }, | |
| { | |
| "epoch": 1.61, | |
| "learning_rate": 9.83537303497363e-06, | |
| "loss": 1.965, | |
| "step": 251000 | |
| }, | |
| { | |
| "epoch": 1.61, | |
| "eval_loss": 2.1785731315612793, | |
| "eval_runtime": 19.7697, | |
| "eval_samples_per_second": 101.165, | |
| "eval_steps_per_second": 1.619, | |
| "step": 251000 | |
| }, | |
| { | |
| "epoch": 1.61, | |
| "learning_rate": 9.675354600850019e-06, | |
| "loss": 1.9508, | |
| "step": 252000 | |
| }, | |
| { | |
| "epoch": 1.61, | |
| "eval_loss": 2.1723153591156006, | |
| "eval_runtime": 19.1786, | |
| "eval_samples_per_second": 104.283, | |
| "eval_steps_per_second": 1.669, | |
| "step": 252000 | |
| }, | |
| { | |
| "epoch": 1.62, | |
| "learning_rate": 9.515336166726408e-06, | |
| "loss": 1.9522, | |
| "step": 253000 | |
| }, | |
| { | |
| "epoch": 1.62, | |
| "eval_loss": 2.180307626724243, | |
| "eval_runtime": 18.9009, | |
| "eval_samples_per_second": 105.815, | |
| "eval_steps_per_second": 1.693, | |
| "step": 253000 | |
| }, | |
| { | |
| "epoch": 1.63, | |
| "learning_rate": 9.355317732602796e-06, | |
| "loss": 1.9637, | |
| "step": 254000 | |
| }, | |
| { | |
| "epoch": 1.63, | |
| "eval_loss": 2.179806709289551, | |
| "eval_runtime": 19.3455, | |
| "eval_samples_per_second": 103.383, | |
| "eval_steps_per_second": 1.654, | |
| "step": 254000 | |
| }, | |
| { | |
| "epoch": 1.63, | |
| "learning_rate": 9.195299298479185e-06, | |
| "loss": 1.9588, | |
| "step": 255000 | |
| }, | |
| { | |
| "epoch": 1.63, | |
| "eval_loss": 2.200853109359741, | |
| "eval_runtime": 19.4782, | |
| "eval_samples_per_second": 102.679, | |
| "eval_steps_per_second": 1.643, | |
| "step": 255000 | |
| }, | |
| { | |
| "epoch": 1.64, | |
| "learning_rate": 9.035280864355574e-06, | |
| "loss": 1.9553, | |
| "step": 256000 | |
| }, | |
| { | |
| "epoch": 1.64, | |
| "eval_loss": 2.1626343727111816, | |
| "eval_runtime": 19.24, | |
| "eval_samples_per_second": 103.95, | |
| "eval_steps_per_second": 1.663, | |
| "step": 256000 | |
| }, | |
| { | |
| "epoch": 1.64, | |
| "learning_rate": 8.875262430231963e-06, | |
| "loss": 1.946, | |
| "step": 257000 | |
| }, | |
| { | |
| "epoch": 1.64, | |
| "eval_loss": 2.1843950748443604, | |
| "eval_runtime": 19.1181, | |
| "eval_samples_per_second": 104.613, | |
| "eval_steps_per_second": 1.674, | |
| "step": 257000 | |
| }, | |
| { | |
| "epoch": 1.65, | |
| "learning_rate": 8.715243996108352e-06, | |
| "loss": 1.9493, | |
| "step": 258000 | |
| }, | |
| { | |
| "epoch": 1.65, | |
| "eval_loss": 2.150207757949829, | |
| "eval_runtime": 19.2502, | |
| "eval_samples_per_second": 103.895, | |
| "eval_steps_per_second": 1.662, | |
| "step": 258000 | |
| }, | |
| { | |
| "epoch": 1.66, | |
| "learning_rate": 8.55522556198474e-06, | |
| "loss": 1.9442, | |
| "step": 259000 | |
| }, | |
| { | |
| "epoch": 1.66, | |
| "eval_loss": 2.1614534854888916, | |
| "eval_runtime": 19.2393, | |
| "eval_samples_per_second": 103.954, | |
| "eval_steps_per_second": 1.663, | |
| "step": 259000 | |
| }, | |
| { | |
| "epoch": 1.66, | |
| "learning_rate": 8.395207127861129e-06, | |
| "loss": 1.945, | |
| "step": 260000 | |
| }, | |
| { | |
| "epoch": 1.66, | |
| "eval_loss": 2.178889751434326, | |
| "eval_runtime": 19.4657, | |
| "eval_samples_per_second": 102.745, | |
| "eval_steps_per_second": 1.644, | |
| "step": 260000 | |
| }, | |
| { | |
| "epoch": 1.67, | |
| "learning_rate": 8.23518869373752e-06, | |
| "loss": 1.9368, | |
| "step": 261000 | |
| }, | |
| { | |
| "epoch": 1.67, | |
| "eval_loss": 2.172461986541748, | |
| "eval_runtime": 19.2788, | |
| "eval_samples_per_second": 103.741, | |
| "eval_steps_per_second": 1.66, | |
| "step": 261000 | |
| }, | |
| { | |
| "epoch": 1.68, | |
| "learning_rate": 8.075170259613907e-06, | |
| "loss": 1.9393, | |
| "step": 262000 | |
| }, | |
| { | |
| "epoch": 1.68, | |
| "eval_loss": 2.169734001159668, | |
| "eval_runtime": 19.3666, | |
| "eval_samples_per_second": 103.27, | |
| "eval_steps_per_second": 1.652, | |
| "step": 262000 | |
| }, | |
| { | |
| "epoch": 1.68, | |
| "learning_rate": 7.915151825490297e-06, | |
| "loss": 1.9525, | |
| "step": 263000 | |
| }, | |
| { | |
| "epoch": 1.68, | |
| "eval_loss": 2.1597206592559814, | |
| "eval_runtime": 19.3459, | |
| "eval_samples_per_second": 103.381, | |
| "eval_steps_per_second": 1.654, | |
| "step": 263000 | |
| }, | |
| { | |
| "epoch": 1.69, | |
| "learning_rate": 7.755133391366686e-06, | |
| "loss": 1.9444, | |
| "step": 264000 | |
| }, | |
| { | |
| "epoch": 1.69, | |
| "eval_loss": 2.1798765659332275, | |
| "eval_runtime": 19.0083, | |
| "eval_samples_per_second": 105.217, | |
| "eval_steps_per_second": 1.683, | |
| "step": 264000 | |
| }, | |
| { | |
| "epoch": 1.7, | |
| "learning_rate": 7.595114957243074e-06, | |
| "loss": 1.9352, | |
| "step": 265000 | |
| }, | |
| { | |
| "epoch": 1.7, | |
| "eval_loss": 2.164872169494629, | |
| "eval_runtime": 19.1384, | |
| "eval_samples_per_second": 104.502, | |
| "eval_steps_per_second": 1.672, | |
| "step": 265000 | |
| }, | |
| { | |
| "epoch": 1.7, | |
| "learning_rate": 7.435096523119464e-06, | |
| "loss": 1.9537, | |
| "step": 266000 | |
| }, | |
| { | |
| "epoch": 1.7, | |
| "eval_loss": 2.1663596630096436, | |
| "eval_runtime": 19.6791, | |
| "eval_samples_per_second": 101.63, | |
| "eval_steps_per_second": 1.626, | |
| "step": 266000 | |
| }, | |
| { | |
| "epoch": 1.71, | |
| "learning_rate": 7.2750780889958526e-06, | |
| "loss": 1.9399, | |
| "step": 267000 | |
| }, | |
| { | |
| "epoch": 1.71, | |
| "eval_loss": 2.1855850219726562, | |
| "eval_runtime": 19.3954, | |
| "eval_samples_per_second": 103.117, | |
| "eval_steps_per_second": 1.65, | |
| "step": 267000 | |
| }, | |
| { | |
| "epoch": 1.72, | |
| "learning_rate": 7.115059654872242e-06, | |
| "loss": 1.9325, | |
| "step": 268000 | |
| }, | |
| { | |
| "epoch": 1.72, | |
| "eval_loss": 2.1838717460632324, | |
| "eval_runtime": 19.1074, | |
| "eval_samples_per_second": 104.671, | |
| "eval_steps_per_second": 1.675, | |
| "step": 268000 | |
| }, | |
| { | |
| "epoch": 1.72, | |
| "learning_rate": 6.95504122074863e-06, | |
| "loss": 1.9466, | |
| "step": 269000 | |
| }, | |
| { | |
| "epoch": 1.72, | |
| "eval_loss": 2.1524887084960938, | |
| "eval_runtime": 19.375, | |
| "eval_samples_per_second": 103.226, | |
| "eval_steps_per_second": 1.652, | |
| "step": 269000 | |
| }, | |
| { | |
| "epoch": 1.73, | |
| "learning_rate": 6.79502278662502e-06, | |
| "loss": 1.9403, | |
| "step": 270000 | |
| }, | |
| { | |
| "epoch": 1.73, | |
| "eval_loss": 2.1773369312286377, | |
| "eval_runtime": 19.1103, | |
| "eval_samples_per_second": 104.656, | |
| "eval_steps_per_second": 1.674, | |
| "step": 270000 | |
| }, | |
| { | |
| "epoch": 1.73, | |
| "learning_rate": 6.6350043525014085e-06, | |
| "loss": 1.9391, | |
| "step": 271000 | |
| }, | |
| { | |
| "epoch": 1.73, | |
| "eval_loss": 2.212693452835083, | |
| "eval_runtime": 19.2143, | |
| "eval_samples_per_second": 104.089, | |
| "eval_steps_per_second": 1.665, | |
| "step": 271000 | |
| }, | |
| { | |
| "epoch": 1.74, | |
| "learning_rate": 6.474985918377798e-06, | |
| "loss": 1.9419, | |
| "step": 272000 | |
| }, | |
| { | |
| "epoch": 1.74, | |
| "eval_loss": 2.1781909465789795, | |
| "eval_runtime": 19.4708, | |
| "eval_samples_per_second": 102.718, | |
| "eval_steps_per_second": 1.643, | |
| "step": 272000 | |
| }, | |
| { | |
| "epoch": 1.75, | |
| "learning_rate": 6.314967484254186e-06, | |
| "loss": 1.9454, | |
| "step": 273000 | |
| }, | |
| { | |
| "epoch": 1.75, | |
| "eval_loss": 2.1962130069732666, | |
| "eval_runtime": 18.6565, | |
| "eval_samples_per_second": 107.201, | |
| "eval_steps_per_second": 1.715, | |
| "step": 273000 | |
| }, | |
| { | |
| "epoch": 1.75, | |
| "learning_rate": 6.154949050130575e-06, | |
| "loss": 1.946, | |
| "step": 274000 | |
| }, | |
| { | |
| "epoch": 1.75, | |
| "eval_loss": 2.157792091369629, | |
| "eval_runtime": 19.0429, | |
| "eval_samples_per_second": 105.026, | |
| "eval_steps_per_second": 1.68, | |
| "step": 274000 | |
| }, | |
| { | |
| "epoch": 1.76, | |
| "learning_rate": 5.994930616006964e-06, | |
| "loss": 1.9339, | |
| "step": 275000 | |
| }, | |
| { | |
| "epoch": 1.76, | |
| "eval_loss": 2.190920829772949, | |
| "eval_runtime": 18.7174, | |
| "eval_samples_per_second": 106.853, | |
| "eval_steps_per_second": 1.71, | |
| "step": 275000 | |
| }, | |
| { | |
| "epoch": 1.77, | |
| "learning_rate": 5.8349121818833536e-06, | |
| "loss": 1.9289, | |
| "step": 276000 | |
| }, | |
| { | |
| "epoch": 1.77, | |
| "eval_loss": 2.169802665710449, | |
| "eval_runtime": 19.7624, | |
| "eval_samples_per_second": 101.202, | |
| "eval_steps_per_second": 1.619, | |
| "step": 276000 | |
| }, | |
| { | |
| "epoch": 1.77, | |
| "learning_rate": 5.674893747759742e-06, | |
| "loss": 1.9284, | |
| "step": 277000 | |
| }, | |
| { | |
| "epoch": 1.77, | |
| "eval_loss": 2.149372100830078, | |
| "eval_runtime": 18.847, | |
| "eval_samples_per_second": 106.118, | |
| "eval_steps_per_second": 1.698, | |
| "step": 277000 | |
| }, | |
| { | |
| "epoch": 1.78, | |
| "learning_rate": 5.514875313636131e-06, | |
| "loss": 1.9423, | |
| "step": 278000 | |
| }, | |
| { | |
| "epoch": 1.78, | |
| "eval_loss": 2.163377046585083, | |
| "eval_runtime": 19.097, | |
| "eval_samples_per_second": 104.728, | |
| "eval_steps_per_second": 1.676, | |
| "step": 278000 | |
| }, | |
| { | |
| "epoch": 1.79, | |
| "learning_rate": 5.35485687951252e-06, | |
| "loss": 1.9317, | |
| "step": 279000 | |
| }, | |
| { | |
| "epoch": 1.79, | |
| "eval_loss": 2.129027843475342, | |
| "eval_runtime": 18.715, | |
| "eval_samples_per_second": 106.866, | |
| "eval_steps_per_second": 1.71, | |
| "step": 279000 | |
| }, | |
| { | |
| "epoch": 1.79, | |
| "learning_rate": 5.194838445388909e-06, | |
| "loss": 1.9216, | |
| "step": 280000 | |
| }, | |
| { | |
| "epoch": 1.79, | |
| "eval_loss": 2.171983480453491, | |
| "eval_runtime": 18.8986, | |
| "eval_samples_per_second": 105.828, | |
| "eval_steps_per_second": 1.693, | |
| "step": 280000 | |
| }, | |
| { | |
| "epoch": 1.8, | |
| "learning_rate": 5.034820011265298e-06, | |
| "loss": 1.9176, | |
| "step": 281000 | |
| }, | |
| { | |
| "epoch": 1.8, | |
| "eval_loss": 2.1561877727508545, | |
| "eval_runtime": 18.6229, | |
| "eval_samples_per_second": 107.395, | |
| "eval_steps_per_second": 1.718, | |
| "step": 281000 | |
| }, | |
| { | |
| "epoch": 1.81, | |
| "learning_rate": 4.874801577141687e-06, | |
| "loss": 1.9345, | |
| "step": 282000 | |
| }, | |
| { | |
| "epoch": 1.81, | |
| "eval_loss": 2.1655592918395996, | |
| "eval_runtime": 18.6917, | |
| "eval_samples_per_second": 106.999, | |
| "eval_steps_per_second": 1.712, | |
| "step": 282000 | |
| }, | |
| { | |
| "epoch": 1.81, | |
| "learning_rate": 4.714783143018076e-06, | |
| "loss": 1.9431, | |
| "step": 283000 | |
| }, | |
| { | |
| "epoch": 1.81, | |
| "eval_loss": 2.1130497455596924, | |
| "eval_runtime": 18.7533, | |
| "eval_samples_per_second": 106.648, | |
| "eval_steps_per_second": 1.706, | |
| "step": 283000 | |
| }, | |
| { | |
| "epoch": 1.82, | |
| "learning_rate": 4.5547647088944646e-06, | |
| "loss": 1.936, | |
| "step": 284000 | |
| }, | |
| { | |
| "epoch": 1.82, | |
| "eval_loss": 2.1281943321228027, | |
| "eval_runtime": 18.4643, | |
| "eval_samples_per_second": 108.317, | |
| "eval_steps_per_second": 1.733, | |
| "step": 284000 | |
| }, | |
| { | |
| "epoch": 1.82, | |
| "learning_rate": 4.394746274770854e-06, | |
| "loss": 1.9344, | |
| "step": 285000 | |
| }, | |
| { | |
| "epoch": 1.82, | |
| "eval_loss": 2.142157554626465, | |
| "eval_runtime": 18.6731, | |
| "eval_samples_per_second": 107.106, | |
| "eval_steps_per_second": 1.714, | |
| "step": 285000 | |
| }, | |
| { | |
| "epoch": 1.83, | |
| "learning_rate": 4.234727840647243e-06, | |
| "loss": 1.9237, | |
| "step": 286000 | |
| }, | |
| { | |
| "epoch": 1.83, | |
| "eval_loss": 2.1462085247039795, | |
| "eval_runtime": 18.787, | |
| "eval_samples_per_second": 106.457, | |
| "eval_steps_per_second": 1.703, | |
| "step": 286000 | |
| }, | |
| { | |
| "epoch": 1.84, | |
| "learning_rate": 4.074709406523631e-06, | |
| "loss": 1.9309, | |
| "step": 287000 | |
| }, | |
| { | |
| "epoch": 1.84, | |
| "eval_loss": 2.1435041427612305, | |
| "eval_runtime": 18.7845, | |
| "eval_samples_per_second": 106.471, | |
| "eval_steps_per_second": 1.704, | |
| "step": 287000 | |
| }, | |
| { | |
| "epoch": 1.84, | |
| "learning_rate": 3.914690972400021e-06, | |
| "loss": 1.9239, | |
| "step": 288000 | |
| }, | |
| { | |
| "epoch": 1.84, | |
| "eval_loss": 2.152646064758301, | |
| "eval_runtime": 18.6983, | |
| "eval_samples_per_second": 106.961, | |
| "eval_steps_per_second": 1.711, | |
| "step": 288000 | |
| }, | |
| { | |
| "epoch": 1.85, | |
| "learning_rate": 3.7546725382764097e-06, | |
| "loss": 1.9168, | |
| "step": 289000 | |
| }, | |
| { | |
| "epoch": 1.85, | |
| "eval_loss": 2.1280956268310547, | |
| "eval_runtime": 18.8639, | |
| "eval_samples_per_second": 106.023, | |
| "eval_steps_per_second": 1.696, | |
| "step": 289000 | |
| }, | |
| { | |
| "epoch": 1.86, | |
| "learning_rate": 3.5946541041527984e-06, | |
| "loss": 1.9232, | |
| "step": 290000 | |
| }, | |
| { | |
| "epoch": 1.86, | |
| "eval_loss": 2.143430471420288, | |
| "eval_runtime": 18.873, | |
| "eval_samples_per_second": 105.971, | |
| "eval_steps_per_second": 1.696, | |
| "step": 290000 | |
| }, | |
| { | |
| "epoch": 1.86, | |
| "learning_rate": 3.4346356700291876e-06, | |
| "loss": 1.9338, | |
| "step": 291000 | |
| }, | |
| { | |
| "epoch": 1.86, | |
| "eval_loss": 2.1642520427703857, | |
| "eval_runtime": 18.6105, | |
| "eval_samples_per_second": 107.466, | |
| "eval_steps_per_second": 1.719, | |
| "step": 291000 | |
| }, | |
| { | |
| "epoch": 1.87, | |
| "learning_rate": 3.2746172359055764e-06, | |
| "loss": 1.9241, | |
| "step": 292000 | |
| }, | |
| { | |
| "epoch": 1.87, | |
| "eval_loss": 2.120400905609131, | |
| "eval_runtime": 18.6654, | |
| "eval_samples_per_second": 107.15, | |
| "eval_steps_per_second": 1.714, | |
| "step": 292000 | |
| }, | |
| { | |
| "epoch": 1.88, | |
| "learning_rate": 3.114598801781965e-06, | |
| "loss": 1.9209, | |
| "step": 293000 | |
| }, | |
| { | |
| "epoch": 1.88, | |
| "eval_loss": 2.1418490409851074, | |
| "eval_runtime": 18.986, | |
| "eval_samples_per_second": 105.341, | |
| "eval_steps_per_second": 1.685, | |
| "step": 293000 | |
| }, | |
| { | |
| "epoch": 1.88, | |
| "learning_rate": 2.9545803676583543e-06, | |
| "loss": 1.928, | |
| "step": 294000 | |
| }, | |
| { | |
| "epoch": 1.88, | |
| "eval_loss": 2.1255481243133545, | |
| "eval_runtime": 18.6322, | |
| "eval_samples_per_second": 107.341, | |
| "eval_steps_per_second": 1.717, | |
| "step": 294000 | |
| }, | |
| { | |
| "epoch": 1.89, | |
| "learning_rate": 2.7945619335347435e-06, | |
| "loss": 1.9482, | |
| "step": 295000 | |
| }, | |
| { | |
| "epoch": 1.89, | |
| "eval_loss": 2.185188055038452, | |
| "eval_runtime": 18.6065, | |
| "eval_samples_per_second": 107.489, | |
| "eval_steps_per_second": 1.72, | |
| "step": 295000 | |
| }, | |
| { | |
| "epoch": 1.89, | |
| "learning_rate": 2.6345434994111323e-06, | |
| "loss": 1.9276, | |
| "step": 296000 | |
| }, | |
| { | |
| "epoch": 1.89, | |
| "eval_loss": 2.1754209995269775, | |
| "eval_runtime": 18.6892, | |
| "eval_samples_per_second": 107.014, | |
| "eval_steps_per_second": 1.712, | |
| "step": 296000 | |
| }, | |
| { | |
| "epoch": 1.9, | |
| "learning_rate": 2.4745250652875215e-06, | |
| "loss": 1.9214, | |
| "step": 297000 | |
| }, | |
| { | |
| "epoch": 1.9, | |
| "eval_loss": 2.124568462371826, | |
| "eval_runtime": 18.6607, | |
| "eval_samples_per_second": 107.177, | |
| "eval_steps_per_second": 1.715, | |
| "step": 297000 | |
| }, | |
| { | |
| "epoch": 1.91, | |
| "learning_rate": 2.3145066311639102e-06, | |
| "loss": 1.9296, | |
| "step": 298000 | |
| }, | |
| { | |
| "epoch": 1.91, | |
| "eval_loss": 2.1418752670288086, | |
| "eval_runtime": 18.8993, | |
| "eval_samples_per_second": 105.824, | |
| "eval_steps_per_second": 1.693, | |
| "step": 298000 | |
| }, | |
| { | |
| "epoch": 1.91, | |
| "learning_rate": 2.154488197040299e-06, | |
| "loss": 1.9182, | |
| "step": 299000 | |
| }, | |
| { | |
| "epoch": 1.91, | |
| "eval_loss": 2.1427695751190186, | |
| "eval_runtime": 18.6439, | |
| "eval_samples_per_second": 107.273, | |
| "eval_steps_per_second": 1.716, | |
| "step": 299000 | |
| }, | |
| { | |
| "epoch": 1.92, | |
| "learning_rate": 1.994469762916688e-06, | |
| "loss": 1.9172, | |
| "step": 300000 | |
| }, | |
| { | |
| "epoch": 1.92, | |
| "eval_loss": 2.17488956451416, | |
| "eval_runtime": 20.0248, | |
| "eval_samples_per_second": 99.876, | |
| "eval_steps_per_second": 1.598, | |
| "step": 300000 | |
| }, | |
| { | |
| "epoch": 1.93, | |
| "learning_rate": 1.834451328793077e-06, | |
| "loss": 1.9054, | |
| "step": 301000 | |
| }, | |
| { | |
| "epoch": 1.93, | |
| "eval_loss": 2.1516401767730713, | |
| "eval_runtime": 19.1509, | |
| "eval_samples_per_second": 104.434, | |
| "eval_steps_per_second": 1.671, | |
| "step": 301000 | |
| }, | |
| { | |
| "epoch": 1.93, | |
| "learning_rate": 1.674432894669466e-06, | |
| "loss": 1.9209, | |
| "step": 302000 | |
| }, | |
| { | |
| "epoch": 1.93, | |
| "eval_loss": 2.1247944831848145, | |
| "eval_runtime": 19.0766, | |
| "eval_samples_per_second": 104.84, | |
| "eval_steps_per_second": 1.677, | |
| "step": 302000 | |
| }, | |
| { | |
| "epoch": 1.94, | |
| "learning_rate": 1.5144144605458551e-06, | |
| "loss": 1.9191, | |
| "step": 303000 | |
| }, | |
| { | |
| "epoch": 1.94, | |
| "eval_loss": 2.1422977447509766, | |
| "eval_runtime": 19.0887, | |
| "eval_samples_per_second": 104.774, | |
| "eval_steps_per_second": 1.676, | |
| "step": 303000 | |
| }, | |
| { | |
| "epoch": 1.95, | |
| "learning_rate": 1.354396026422244e-06, | |
| "loss": 1.9143, | |
| "step": 304000 | |
| }, | |
| { | |
| "epoch": 1.95, | |
| "eval_loss": 2.1302106380462646, | |
| "eval_runtime": 19.5033, | |
| "eval_samples_per_second": 102.547, | |
| "eval_steps_per_second": 1.641, | |
| "step": 304000 | |
| }, | |
| { | |
| "epoch": 1.95, | |
| "learning_rate": 1.1943775922986329e-06, | |
| "loss": 1.9163, | |
| "step": 305000 | |
| }, | |
| { | |
| "epoch": 1.95, | |
| "eval_loss": 2.16552472114563, | |
| "eval_runtime": 18.815, | |
| "eval_samples_per_second": 106.298, | |
| "eval_steps_per_second": 1.701, | |
| "step": 305000 | |
| }, | |
| { | |
| "epoch": 1.96, | |
| "learning_rate": 1.0343591581750219e-06, | |
| "loss": 1.915, | |
| "step": 306000 | |
| }, | |
| { | |
| "epoch": 1.96, | |
| "eval_loss": 2.1272425651550293, | |
| "eval_runtime": 19.1159, | |
| "eval_samples_per_second": 104.625, | |
| "eval_steps_per_second": 1.674, | |
| "step": 306000 | |
| }, | |
| { | |
| "epoch": 1.97, | |
| "learning_rate": 8.743407240514107e-07, | |
| "loss": 1.9193, | |
| "step": 307000 | |
| }, | |
| { | |
| "epoch": 1.97, | |
| "eval_loss": 2.151264190673828, | |
| "eval_runtime": 18.961, | |
| "eval_samples_per_second": 105.48, | |
| "eval_steps_per_second": 1.688, | |
| "step": 307000 | |
| }, | |
| { | |
| "epoch": 1.97, | |
| "learning_rate": 7.143222899277997e-07, | |
| "loss": 1.9238, | |
| "step": 308000 | |
| }, | |
| { | |
| "epoch": 1.97, | |
| "eval_loss": 2.145237922668457, | |
| "eval_runtime": 19.4596, | |
| "eval_samples_per_second": 102.777, | |
| "eval_steps_per_second": 1.644, | |
| "step": 308000 | |
| }, | |
| { | |
| "epoch": 1.98, | |
| "learning_rate": 5.543038558041887e-07, | |
| "loss": 1.9129, | |
| "step": 309000 | |
| }, | |
| { | |
| "epoch": 1.98, | |
| "eval_loss": 2.132681369781494, | |
| "eval_runtime": 18.9129, | |
| "eval_samples_per_second": 105.748, | |
| "eval_steps_per_second": 1.692, | |
| "step": 309000 | |
| }, | |
| { | |
| "epoch": 1.98, | |
| "learning_rate": 3.9428542168057766e-07, | |
| "loss": 1.92, | |
| "step": 310000 | |
| }, | |
| { | |
| "epoch": 1.98, | |
| "eval_loss": 2.1479594707489014, | |
| "eval_runtime": 18.8663, | |
| "eval_samples_per_second": 106.009, | |
| "eval_steps_per_second": 1.696, | |
| "step": 310000 | |
| }, | |
| { | |
| "epoch": 1.99, | |
| "learning_rate": 2.342669875569666e-07, | |
| "loss": 1.9098, | |
| "step": 311000 | |
| }, | |
| { | |
| "epoch": 1.99, | |
| "eval_loss": 2.171926736831665, | |
| "eval_runtime": 19.0151, | |
| "eval_samples_per_second": 105.179, | |
| "eval_steps_per_second": 1.683, | |
| "step": 311000 | |
| }, | |
| { | |
| "epoch": 2.0, | |
| "learning_rate": 7.424855343335553e-08, | |
| "loss": 1.9105, | |
| "step": 312000 | |
| }, | |
| { | |
| "epoch": 2.0, | |
| "eval_loss": 2.1461212635040283, | |
| "eval_runtime": 19.4871, | |
| "eval_samples_per_second": 102.632, | |
| "eval_steps_per_second": 1.642, | |
| "step": 312000 | |
| }, | |
| { | |
| "epoch": 2.0, | |
| "learning_rate": 1.6609486746206498e-05, | |
| "loss": 1.9453, | |
| "step": 313000 | |
| }, | |
| { | |
| "epoch": 2.0, | |
| "eval_loss": 2.190183162689209, | |
| "eval_runtime": 16.3864, | |
| "eval_samples_per_second": 122.052, | |
| "eval_steps_per_second": 1.953, | |
| "step": 313000 | |
| }, | |
| { | |
| "epoch": 2.01, | |
| "learning_rate": 1.650280779012409e-05, | |
| "loss": 1.9458, | |
| "step": 314000 | |
| }, | |
| { | |
| "epoch": 2.01, | |
| "eval_loss": 2.1692402362823486, | |
| "eval_runtime": 15.9646, | |
| "eval_samples_per_second": 125.277, | |
| "eval_steps_per_second": 2.004, | |
| "step": 314000 | |
| }, | |
| { | |
| "epoch": 2.02, | |
| "learning_rate": 1.639612883404168e-05, | |
| "loss": 1.9428, | |
| "step": 315000 | |
| }, | |
| { | |
| "epoch": 2.02, | |
| "eval_loss": 2.1538236141204834, | |
| "eval_runtime": 16.0153, | |
| "eval_samples_per_second": 124.881, | |
| "eval_steps_per_second": 1.998, | |
| "step": 315000 | |
| }, | |
| { | |
| "epoch": 2.02, | |
| "learning_rate": 1.6289449877959276e-05, | |
| "loss": 1.9488, | |
| "step": 316000 | |
| }, | |
| { | |
| "epoch": 2.02, | |
| "eval_loss": 2.153665542602539, | |
| "eval_runtime": 16.1132, | |
| "eval_samples_per_second": 124.122, | |
| "eval_steps_per_second": 1.986, | |
| "step": 316000 | |
| }, | |
| { | |
| "epoch": 2.03, | |
| "learning_rate": 1.6182770921876865e-05, | |
| "loss": 1.9437, | |
| "step": 317000 | |
| }, | |
| { | |
| "epoch": 2.03, | |
| "eval_loss": 2.1973447799682617, | |
| "eval_runtime": 17.5461, | |
| "eval_samples_per_second": 113.986, | |
| "eval_steps_per_second": 1.824, | |
| "step": 317000 | |
| }, | |
| { | |
| "epoch": 2.04, | |
| "learning_rate": 1.607609196579446e-05, | |
| "loss": 1.9487, | |
| "step": 318000 | |
| }, | |
| { | |
| "epoch": 2.04, | |
| "eval_loss": 2.1677041053771973, | |
| "eval_runtime": 15.9539, | |
| "eval_samples_per_second": 125.361, | |
| "eval_steps_per_second": 2.006, | |
| "step": 318000 | |
| }, | |
| { | |
| "epoch": 2.04, | |
| "learning_rate": 1.596941300971205e-05, | |
| "loss": 1.9559, | |
| "step": 319000 | |
| }, | |
| { | |
| "epoch": 2.04, | |
| "eval_loss": 2.155820369720459, | |
| "eval_runtime": 16.1853, | |
| "eval_samples_per_second": 123.569, | |
| "eval_steps_per_second": 1.977, | |
| "step": 319000 | |
| }, | |
| { | |
| "epoch": 2.05, | |
| "learning_rate": 1.5862734053629647e-05, | |
| "loss": 1.9662, | |
| "step": 320000 | |
| }, | |
| { | |
| "epoch": 2.05, | |
| "eval_loss": 2.1629369258880615, | |
| "eval_runtime": 16.6642, | |
| "eval_samples_per_second": 120.018, | |
| "eval_steps_per_second": 1.92, | |
| "step": 320000 | |
| }, | |
| { | |
| "epoch": 2.05, | |
| "learning_rate": 1.5756055097547238e-05, | |
| "loss": 1.9556, | |
| "step": 321000 | |
| }, | |
| { | |
| "epoch": 2.05, | |
| "eval_loss": 2.1815614700317383, | |
| "eval_runtime": 16.5814, | |
| "eval_samples_per_second": 120.617, | |
| "eval_steps_per_second": 1.93, | |
| "step": 321000 | |
| }, | |
| { | |
| "epoch": 2.06, | |
| "learning_rate": 1.564937614146483e-05, | |
| "loss": 1.9512, | |
| "step": 322000 | |
| }, | |
| { | |
| "epoch": 2.06, | |
| "eval_loss": 2.1164066791534424, | |
| "eval_runtime": 15.9298, | |
| "eval_samples_per_second": 125.551, | |
| "eval_steps_per_second": 2.009, | |
| "step": 322000 | |
| }, | |
| { | |
| "epoch": 2.07, | |
| "learning_rate": 1.5542697185382425e-05, | |
| "loss": 1.9544, | |
| "step": 323000 | |
| }, | |
| { | |
| "epoch": 2.07, | |
| "eval_loss": 2.165865659713745, | |
| "eval_runtime": 15.883, | |
| "eval_samples_per_second": 125.921, | |
| "eval_steps_per_second": 2.015, | |
| "step": 323000 | |
| }, | |
| { | |
| "epoch": 2.07, | |
| "learning_rate": 1.5436018229300017e-05, | |
| "loss": 1.9568, | |
| "step": 324000 | |
| }, | |
| { | |
| "epoch": 2.07, | |
| "eval_loss": 2.1747090816497803, | |
| "eval_runtime": 15.9331, | |
| "eval_samples_per_second": 125.525, | |
| "eval_steps_per_second": 2.008, | |
| "step": 324000 | |
| }, | |
| { | |
| "epoch": 2.08, | |
| "learning_rate": 1.5329339273217608e-05, | |
| "loss": 1.9449, | |
| "step": 325000 | |
| }, | |
| { | |
| "epoch": 2.08, | |
| "eval_loss": 2.1862990856170654, | |
| "eval_runtime": 16.6952, | |
| "eval_samples_per_second": 119.795, | |
| "eval_steps_per_second": 1.917, | |
| "step": 325000 | |
| }, | |
| { | |
| "epoch": 2.09, | |
| "learning_rate": 1.5222660317135202e-05, | |
| "loss": 1.9491, | |
| "step": 326000 | |
| }, | |
| { | |
| "epoch": 2.09, | |
| "eval_loss": 2.1621673107147217, | |
| "eval_runtime": 15.9325, | |
| "eval_samples_per_second": 125.529, | |
| "eval_steps_per_second": 2.008, | |
| "step": 326000 | |
| }, | |
| { | |
| "epoch": 2.09, | |
| "learning_rate": 1.5115981361052795e-05, | |
| "loss": 1.9526, | |
| "step": 327000 | |
| }, | |
| { | |
| "epoch": 2.09, | |
| "eval_loss": 2.1826863288879395, | |
| "eval_runtime": 15.8228, | |
| "eval_samples_per_second": 126.4, | |
| "eval_steps_per_second": 2.022, | |
| "step": 327000 | |
| }, | |
| { | |
| "epoch": 2.1, | |
| "learning_rate": 1.5009302404970388e-05, | |
| "loss": 1.952, | |
| "step": 328000 | |
| }, | |
| { | |
| "epoch": 2.1, | |
| "eval_loss": 2.1913392543792725, | |
| "eval_runtime": 16.1458, | |
| "eval_samples_per_second": 123.871, | |
| "eval_steps_per_second": 1.982, | |
| "step": 328000 | |
| }, | |
| { | |
| "epoch": 2.11, | |
| "learning_rate": 1.4902623448887978e-05, | |
| "loss": 1.9545, | |
| "step": 329000 | |
| }, | |
| { | |
| "epoch": 2.11, | |
| "eval_loss": 2.18023681640625, | |
| "eval_runtime": 16.3141, | |
| "eval_samples_per_second": 122.593, | |
| "eval_steps_per_second": 1.961, | |
| "step": 329000 | |
| }, | |
| { | |
| "epoch": 2.11, | |
| "learning_rate": 1.4795944492805572e-05, | |
| "loss": 1.9616, | |
| "step": 330000 | |
| }, | |
| { | |
| "epoch": 2.11, | |
| "eval_loss": 2.178854465484619, | |
| "eval_runtime": 15.9442, | |
| "eval_samples_per_second": 125.438, | |
| "eval_steps_per_second": 2.007, | |
| "step": 330000 | |
| }, | |
| { | |
| "epoch": 2.12, | |
| "learning_rate": 1.4689265536723165e-05, | |
| "loss": 1.9515, | |
| "step": 331000 | |
| }, | |
| { | |
| "epoch": 2.12, | |
| "eval_loss": 2.1725854873657227, | |
| "eval_runtime": 15.939, | |
| "eval_samples_per_second": 125.478, | |
| "eval_steps_per_second": 2.008, | |
| "step": 331000 | |
| }, | |
| { | |
| "epoch": 2.13, | |
| "learning_rate": 1.4582586580640758e-05, | |
| "loss": 1.9484, | |
| "step": 332000 | |
| }, | |
| { | |
| "epoch": 2.13, | |
| "eval_loss": 2.1632540225982666, | |
| "eval_runtime": 16.7042, | |
| "eval_samples_per_second": 119.731, | |
| "eval_steps_per_second": 1.916, | |
| "step": 332000 | |
| }, | |
| { | |
| "epoch": 2.13, | |
| "learning_rate": 1.4475907624558348e-05, | |
| "loss": 1.962, | |
| "step": 333000 | |
| }, | |
| { | |
| "epoch": 2.13, | |
| "eval_loss": 2.1514594554901123, | |
| "eval_runtime": 15.9872, | |
| "eval_samples_per_second": 125.1, | |
| "eval_steps_per_second": 2.002, | |
| "step": 333000 | |
| }, | |
| { | |
| "epoch": 2.14, | |
| "learning_rate": 1.4369228668475942e-05, | |
| "loss": 1.9563, | |
| "step": 334000 | |
| }, | |
| { | |
| "epoch": 2.14, | |
| "eval_loss": 2.18198299407959, | |
| "eval_runtime": 16.1962, | |
| "eval_samples_per_second": 123.486, | |
| "eval_steps_per_second": 1.976, | |
| "step": 334000 | |
| }, | |
| { | |
| "epoch": 2.14, | |
| "learning_rate": 1.4262549712393535e-05, | |
| "loss": 1.9544, | |
| "step": 335000 | |
| }, | |
| { | |
| "epoch": 2.14, | |
| "eval_loss": 2.168269634246826, | |
| "eval_runtime": 16.3411, | |
| "eval_samples_per_second": 122.391, | |
| "eval_steps_per_second": 1.958, | |
| "step": 335000 | |
| }, | |
| { | |
| "epoch": 2.15, | |
| "learning_rate": 1.4155870756311127e-05, | |
| "loss": 1.9509, | |
| "step": 336000 | |
| }, | |
| { | |
| "epoch": 2.15, | |
| "eval_loss": 2.157496690750122, | |
| "eval_runtime": 16.7663, | |
| "eval_samples_per_second": 119.287, | |
| "eval_steps_per_second": 1.909, | |
| "step": 336000 | |
| }, | |
| { | |
| "epoch": 2.16, | |
| "learning_rate": 1.404919180022872e-05, | |
| "loss": 1.9527, | |
| "step": 337000 | |
| }, | |
| { | |
| "epoch": 2.16, | |
| "eval_loss": 2.162778377532959, | |
| "eval_runtime": 16.4574, | |
| "eval_samples_per_second": 121.526, | |
| "eval_steps_per_second": 1.944, | |
| "step": 337000 | |
| }, | |
| { | |
| "epoch": 2.16, | |
| "learning_rate": 1.3942512844146313e-05, | |
| "loss": 1.9455, | |
| "step": 338000 | |
| }, | |
| { | |
| "epoch": 2.16, | |
| "eval_loss": 2.2115304470062256, | |
| "eval_runtime": 15.9425, | |
| "eval_samples_per_second": 125.451, | |
| "eval_steps_per_second": 2.007, | |
| "step": 338000 | |
| }, | |
| { | |
| "epoch": 2.17, | |
| "learning_rate": 1.3835833888063907e-05, | |
| "loss": 1.9443, | |
| "step": 339000 | |
| }, | |
| { | |
| "epoch": 2.17, | |
| "eval_loss": 2.1575698852539062, | |
| "eval_runtime": 16.1638, | |
| "eval_samples_per_second": 123.734, | |
| "eval_steps_per_second": 1.98, | |
| "step": 339000 | |
| }, | |
| { | |
| "epoch": 2.18, | |
| "learning_rate": 1.3729154931981497e-05, | |
| "loss": 1.9471, | |
| "step": 340000 | |
| }, | |
| { | |
| "epoch": 2.18, | |
| "eval_loss": 2.163440465927124, | |
| "eval_runtime": 16.5887, | |
| "eval_samples_per_second": 120.564, | |
| "eval_steps_per_second": 1.929, | |
| "step": 340000 | |
| }, | |
| { | |
| "epoch": 2.18, | |
| "learning_rate": 1.362247597589909e-05, | |
| "loss": 1.9385, | |
| "step": 341000 | |
| }, | |
| { | |
| "epoch": 2.18, | |
| "eval_loss": 2.1808547973632812, | |
| "eval_runtime": 16.0292, | |
| "eval_samples_per_second": 124.773, | |
| "eval_steps_per_second": 1.996, | |
| "step": 341000 | |
| }, | |
| { | |
| "epoch": 2.19, | |
| "learning_rate": 1.3515797019816683e-05, | |
| "loss": 1.9472, | |
| "step": 342000 | |
| }, | |
| { | |
| "epoch": 2.19, | |
| "eval_loss": 2.1804370880126953, | |
| "eval_runtime": 16.1599, | |
| "eval_samples_per_second": 123.763, | |
| "eval_steps_per_second": 1.98, | |
| "step": 342000 | |
| }, | |
| { | |
| "epoch": 2.2, | |
| "learning_rate": 1.3409118063734277e-05, | |
| "loss": 1.9578, | |
| "step": 343000 | |
| }, | |
| { | |
| "epoch": 2.2, | |
| "eval_loss": 2.172938346862793, | |
| "eval_runtime": 16.4066, | |
| "eval_samples_per_second": 121.902, | |
| "eval_steps_per_second": 1.95, | |
| "step": 343000 | |
| }, | |
| { | |
| "epoch": 2.2, | |
| "learning_rate": 1.3302439107651868e-05, | |
| "loss": 1.9501, | |
| "step": 344000 | |
| }, | |
| { | |
| "epoch": 2.2, | |
| "eval_loss": 2.1206016540527344, | |
| "eval_runtime": 16.7126, | |
| "eval_samples_per_second": 119.67, | |
| "eval_steps_per_second": 1.915, | |
| "step": 344000 | |
| }, | |
| { | |
| "epoch": 2.21, | |
| "learning_rate": 1.3195760151569462e-05, | |
| "loss": 1.9363, | |
| "step": 345000 | |
| }, | |
| { | |
| "epoch": 2.21, | |
| "eval_loss": 2.1700916290283203, | |
| "eval_runtime": 15.9452, | |
| "eval_samples_per_second": 125.43, | |
| "eval_steps_per_second": 2.007, | |
| "step": 345000 | |
| }, | |
| { | |
| "epoch": 2.21, | |
| "learning_rate": 1.3089081195487055e-05, | |
| "loss": 1.9452, | |
| "step": 346000 | |
| }, | |
| { | |
| "epoch": 2.21, | |
| "eval_loss": 2.1466197967529297, | |
| "eval_runtime": 16.0688, | |
| "eval_samples_per_second": 124.465, | |
| "eval_steps_per_second": 1.991, | |
| "step": 346000 | |
| }, | |
| { | |
| "epoch": 2.22, | |
| "learning_rate": 1.2982402239404649e-05, | |
| "loss": 1.9544, | |
| "step": 347000 | |
| }, | |
| { | |
| "epoch": 2.22, | |
| "eval_loss": 2.118955135345459, | |
| "eval_runtime": 16.4559, | |
| "eval_samples_per_second": 121.537, | |
| "eval_steps_per_second": 1.945, | |
| "step": 347000 | |
| }, | |
| { | |
| "epoch": 2.23, | |
| "learning_rate": 1.2875723283322239e-05, | |
| "loss": 1.9442, | |
| "step": 348000 | |
| }, | |
| { | |
| "epoch": 2.23, | |
| "eval_loss": 2.2223548889160156, | |
| "eval_runtime": 15.9342, | |
| "eval_samples_per_second": 125.516, | |
| "eval_steps_per_second": 2.008, | |
| "step": 348000 | |
| }, | |
| { | |
| "epoch": 2.23, | |
| "learning_rate": 1.2769044327239832e-05, | |
| "loss": 1.949, | |
| "step": 349000 | |
| }, | |
| { | |
| "epoch": 2.23, | |
| "eval_loss": 2.1240313053131104, | |
| "eval_runtime": 16.1322, | |
| "eval_samples_per_second": 123.975, | |
| "eval_steps_per_second": 1.984, | |
| "step": 349000 | |
| }, | |
| { | |
| "epoch": 2.24, | |
| "learning_rate": 1.2662365371157425e-05, | |
| "loss": 1.9524, | |
| "step": 350000 | |
| }, | |
| { | |
| "epoch": 2.24, | |
| "eval_loss": 2.2078564167022705, | |
| "eval_runtime": 15.9714, | |
| "eval_samples_per_second": 125.224, | |
| "eval_steps_per_second": 2.004, | |
| "step": 350000 | |
| }, | |
| { | |
| "epoch": 2.25, | |
| "learning_rate": 1.2555686415075015e-05, | |
| "loss": 1.9371, | |
| "step": 351000 | |
| }, | |
| { | |
| "epoch": 2.25, | |
| "eval_loss": 2.1884605884552, | |
| "eval_runtime": 17.7436, | |
| "eval_samples_per_second": 112.717, | |
| "eval_steps_per_second": 1.803, | |
| "step": 351000 | |
| }, | |
| { | |
| "epoch": 2.25, | |
| "learning_rate": 1.2449007458992609e-05, | |
| "loss": 1.9474, | |
| "step": 352000 | |
| }, | |
| { | |
| "epoch": 2.25, | |
| "eval_loss": 2.165747880935669, | |
| "eval_runtime": 15.9774, | |
| "eval_samples_per_second": 125.177, | |
| "eval_steps_per_second": 2.003, | |
| "step": 352000 | |
| }, | |
| { | |
| "epoch": 2.26, | |
| "learning_rate": 1.2342328502910202e-05, | |
| "loss": 1.9444, | |
| "step": 353000 | |
| }, | |
| { | |
| "epoch": 2.26, | |
| "eval_loss": 2.180070161819458, | |
| "eval_runtime": 15.9059, | |
| "eval_samples_per_second": 125.74, | |
| "eval_steps_per_second": 2.012, | |
| "step": 353000 | |
| }, | |
| { | |
| "epoch": 2.27, | |
| "learning_rate": 1.2235649546827795e-05, | |
| "loss": 1.9381, | |
| "step": 354000 | |
| }, | |
| { | |
| "epoch": 2.27, | |
| "eval_loss": 2.195138931274414, | |
| "eval_runtime": 15.4982, | |
| "eval_samples_per_second": 129.047, | |
| "eval_steps_per_second": 2.065, | |
| "step": 354000 | |
| }, | |
| { | |
| "epoch": 2.27, | |
| "learning_rate": 1.2128970590745389e-05, | |
| "loss": 1.9462, | |
| "step": 355000 | |
| }, | |
| { | |
| "epoch": 2.27, | |
| "eval_loss": 2.197645902633667, | |
| "eval_runtime": 16.2722, | |
| "eval_samples_per_second": 122.909, | |
| "eval_steps_per_second": 1.967, | |
| "step": 355000 | |
| }, | |
| { | |
| "epoch": 2.28, | |
| "learning_rate": 1.202229163466298e-05, | |
| "loss": 1.9312, | |
| "step": 356000 | |
| }, | |
| { | |
| "epoch": 2.28, | |
| "eval_loss": 2.1800289154052734, | |
| "eval_runtime": 15.5962, | |
| "eval_samples_per_second": 128.236, | |
| "eval_steps_per_second": 2.052, | |
| "step": 356000 | |
| }, | |
| { | |
| "epoch": 2.29, | |
| "learning_rate": 1.1915612678580574e-05, | |
| "loss": 1.9379, | |
| "step": 357000 | |
| }, | |
| { | |
| "epoch": 2.29, | |
| "eval_loss": 2.175736427307129, | |
| "eval_runtime": 15.9665, | |
| "eval_samples_per_second": 125.262, | |
| "eval_steps_per_second": 2.004, | |
| "step": 357000 | |
| }, | |
| { | |
| "epoch": 2.29, | |
| "learning_rate": 1.1808933722498165e-05, | |
| "loss": 1.9435, | |
| "step": 358000 | |
| }, | |
| { | |
| "epoch": 2.29, | |
| "eval_loss": 2.205449104309082, | |
| "eval_runtime": 15.7121, | |
| "eval_samples_per_second": 127.291, | |
| "eval_steps_per_second": 2.037, | |
| "step": 358000 | |
| }, | |
| { | |
| "epoch": 2.3, | |
| "learning_rate": 1.1702254766415759e-05, | |
| "loss": 1.9448, | |
| "step": 359000 | |
| }, | |
| { | |
| "epoch": 2.3, | |
| "eval_loss": 2.173300266265869, | |
| "eval_runtime": 16.397, | |
| "eval_samples_per_second": 121.974, | |
| "eval_steps_per_second": 1.952, | |
| "step": 359000 | |
| }, | |
| { | |
| "epoch": 2.3, | |
| "learning_rate": 1.159557581033335e-05, | |
| "loss": 1.9529, | |
| "step": 360000 | |
| }, | |
| { | |
| "epoch": 2.3, | |
| "eval_loss": 2.145735263824463, | |
| "eval_runtime": 15.5694, | |
| "eval_samples_per_second": 128.457, | |
| "eval_steps_per_second": 2.055, | |
| "step": 360000 | |
| }, | |
| { | |
| "epoch": 2.31, | |
| "learning_rate": 1.1488896854250944e-05, | |
| "loss": 1.9444, | |
| "step": 361000 | |
| }, | |
| { | |
| "epoch": 2.31, | |
| "eval_loss": 2.1839778423309326, | |
| "eval_runtime": 15.6504, | |
| "eval_samples_per_second": 127.792, | |
| "eval_steps_per_second": 2.045, | |
| "step": 361000 | |
| }, | |
| { | |
| "epoch": 2.32, | |
| "learning_rate": 1.1382217898168535e-05, | |
| "loss": 1.9439, | |
| "step": 362000 | |
| }, | |
| { | |
| "epoch": 2.32, | |
| "eval_loss": 2.128485918045044, | |
| "eval_runtime": 15.7866, | |
| "eval_samples_per_second": 126.69, | |
| "eval_steps_per_second": 2.027, | |
| "step": 362000 | |
| }, | |
| { | |
| "epoch": 2.32, | |
| "learning_rate": 1.1275538942086129e-05, | |
| "loss": 1.9345, | |
| "step": 363000 | |
| }, | |
| { | |
| "epoch": 2.32, | |
| "eval_loss": 2.16981840133667, | |
| "eval_runtime": 16.0509, | |
| "eval_samples_per_second": 124.604, | |
| "eval_steps_per_second": 1.994, | |
| "step": 363000 | |
| }, | |
| { | |
| "epoch": 2.33, | |
| "learning_rate": 1.1168859986003722e-05, | |
| "loss": 1.9355, | |
| "step": 364000 | |
| }, | |
| { | |
| "epoch": 2.33, | |
| "eval_loss": 2.1235830783843994, | |
| "eval_runtime": 15.5068, | |
| "eval_samples_per_second": 128.975, | |
| "eval_steps_per_second": 2.064, | |
| "step": 364000 | |
| }, | |
| { | |
| "epoch": 2.34, | |
| "learning_rate": 1.1062181029921315e-05, | |
| "loss": 1.9385, | |
| "step": 365000 | |
| }, | |
| { | |
| "epoch": 2.34, | |
| "eval_loss": 2.1465463638305664, | |
| "eval_runtime": 15.3143, | |
| "eval_samples_per_second": 130.597, | |
| "eval_steps_per_second": 2.09, | |
| "step": 365000 | |
| }, | |
| { | |
| "epoch": 2.34, | |
| "learning_rate": 1.0955502073838907e-05, | |
| "loss": 1.9425, | |
| "step": 366000 | |
| }, | |
| { | |
| "epoch": 2.34, | |
| "eval_loss": 2.1613283157348633, | |
| "eval_runtime": 15.466, | |
| "eval_samples_per_second": 129.316, | |
| "eval_steps_per_second": 2.069, | |
| "step": 366000 | |
| }, | |
| { | |
| "epoch": 2.35, | |
| "learning_rate": 1.08488231177565e-05, | |
| "loss": 1.9304, | |
| "step": 367000 | |
| }, | |
| { | |
| "epoch": 2.35, | |
| "eval_loss": 2.172750949859619, | |
| "eval_runtime": 15.5842, | |
| "eval_samples_per_second": 128.335, | |
| "eval_steps_per_second": 2.053, | |
| "step": 367000 | |
| }, | |
| { | |
| "epoch": 2.36, | |
| "learning_rate": 1.0742144161674092e-05, | |
| "loss": 1.9339, | |
| "step": 368000 | |
| }, | |
| { | |
| "epoch": 2.36, | |
| "eval_loss": 2.148078680038452, | |
| "eval_runtime": 15.9481, | |
| "eval_samples_per_second": 125.407, | |
| "eval_steps_per_second": 2.007, | |
| "step": 368000 | |
| }, | |
| { | |
| "epoch": 2.36, | |
| "learning_rate": 1.0635465205591686e-05, | |
| "loss": 1.9463, | |
| "step": 369000 | |
| }, | |
| { | |
| "epoch": 2.36, | |
| "eval_loss": 2.1650550365448, | |
| "eval_runtime": 15.3617, | |
| "eval_samples_per_second": 130.194, | |
| "eval_steps_per_second": 2.083, | |
| "step": 369000 | |
| }, | |
| { | |
| "epoch": 2.37, | |
| "learning_rate": 1.0528786249509277e-05, | |
| "loss": 1.9407, | |
| "step": 370000 | |
| }, | |
| { | |
| "epoch": 2.37, | |
| "eval_loss": 2.1432077884674072, | |
| "eval_runtime": 15.1001, | |
| "eval_samples_per_second": 132.45, | |
| "eval_steps_per_second": 2.119, | |
| "step": 370000 | |
| }, | |
| { | |
| "epoch": 2.37, | |
| "learning_rate": 1.0422107293426869e-05, | |
| "loss": 1.9453, | |
| "step": 371000 | |
| }, | |
| { | |
| "epoch": 2.37, | |
| "eval_loss": 2.147706985473633, | |
| "eval_runtime": 15.9626, | |
| "eval_samples_per_second": 125.293, | |
| "eval_steps_per_second": 2.005, | |
| "step": 371000 | |
| }, | |
| { | |
| "epoch": 2.38, | |
| "learning_rate": 1.0315428337344462e-05, | |
| "loss": 1.9368, | |
| "step": 372000 | |
| }, | |
| { | |
| "epoch": 2.38, | |
| "eval_loss": 2.184664249420166, | |
| "eval_runtime": 15.5454, | |
| "eval_samples_per_second": 128.656, | |
| "eval_steps_per_second": 2.058, | |
| "step": 372000 | |
| }, | |
| { | |
| "epoch": 2.39, | |
| "learning_rate": 1.0208749381262054e-05, | |
| "loss": 1.9407, | |
| "step": 373000 | |
| }, | |
| { | |
| "epoch": 2.39, | |
| "eval_loss": 2.1857311725616455, | |
| "eval_runtime": 15.3498, | |
| "eval_samples_per_second": 130.295, | |
| "eval_steps_per_second": 2.085, | |
| "step": 373000 | |
| }, | |
| { | |
| "epoch": 2.39, | |
| "learning_rate": 1.0102070425179647e-05, | |
| "loss": 1.934, | |
| "step": 374000 | |
| }, | |
| { | |
| "epoch": 2.39, | |
| "eval_loss": 2.119173765182495, | |
| "eval_runtime": 15.4006, | |
| "eval_samples_per_second": 129.865, | |
| "eval_steps_per_second": 2.078, | |
| "step": 374000 | |
| }, | |
| { | |
| "epoch": 2.4, | |
| "learning_rate": 9.99539146909724e-06, | |
| "loss": 1.9297, | |
| "step": 375000 | |
| }, | |
| { | |
| "epoch": 2.4, | |
| "eval_loss": 2.1658694744110107, | |
| "eval_runtime": 15.796, | |
| "eval_samples_per_second": 126.615, | |
| "eval_steps_per_second": 2.026, | |
| "step": 375000 | |
| }, | |
| { | |
| "epoch": 2.41, | |
| "learning_rate": 9.888712513014834e-06, | |
| "loss": 1.9298, | |
| "step": 376000 | |
| }, | |
| { | |
| "epoch": 2.41, | |
| "eval_loss": 2.171632766723633, | |
| "eval_runtime": 15.3482, | |
| "eval_samples_per_second": 130.308, | |
| "eval_steps_per_second": 2.085, | |
| "step": 376000 | |
| }, | |
| { | |
| "epoch": 2.41, | |
| "learning_rate": 9.782033556932426e-06, | |
| "loss": 1.9267, | |
| "step": 377000 | |
| }, | |
| { | |
| "epoch": 2.41, | |
| "eval_loss": 2.1282413005828857, | |
| "eval_runtime": 15.2611, | |
| "eval_samples_per_second": 131.052, | |
| "eval_steps_per_second": 2.097, | |
| "step": 377000 | |
| }, | |
| { | |
| "epoch": 2.42, | |
| "learning_rate": 9.675354600850019e-06, | |
| "loss": 1.9387, | |
| "step": 378000 | |
| }, | |
| { | |
| "epoch": 2.42, | |
| "eval_loss": 2.175699472427368, | |
| "eval_runtime": 15.3352, | |
| "eval_samples_per_second": 130.419, | |
| "eval_steps_per_second": 2.087, | |
| "step": 378000 | |
| }, | |
| { | |
| "epoch": 2.43, | |
| "learning_rate": 9.56867564476761e-06, | |
| "loss": 1.9235, | |
| "step": 379000 | |
| }, | |
| { | |
| "epoch": 2.43, | |
| "eval_loss": 2.1758999824523926, | |
| "eval_runtime": 16.089, | |
| "eval_samples_per_second": 124.309, | |
| "eval_steps_per_second": 1.989, | |
| "step": 379000 | |
| }, | |
| { | |
| "epoch": 2.43, | |
| "learning_rate": 9.461996688685204e-06, | |
| "loss": 1.9265, | |
| "step": 380000 | |
| }, | |
| { | |
| "epoch": 2.43, | |
| "eval_loss": 2.163534164428711, | |
| "eval_runtime": 15.2326, | |
| "eval_samples_per_second": 131.297, | |
| "eval_steps_per_second": 2.101, | |
| "step": 380000 | |
| }, | |
| { | |
| "epoch": 2.44, | |
| "learning_rate": 9.355317732602796e-06, | |
| "loss": 1.9151, | |
| "step": 381000 | |
| }, | |
| { | |
| "epoch": 2.44, | |
| "eval_loss": 2.1671011447906494, | |
| "eval_runtime": 15.2621, | |
| "eval_samples_per_second": 131.044, | |
| "eval_steps_per_second": 2.097, | |
| "step": 381000 | |
| }, | |
| { | |
| "epoch": 2.45, | |
| "learning_rate": 9.248638776520389e-06, | |
| "loss": 1.9262, | |
| "step": 382000 | |
| }, | |
| { | |
| "epoch": 2.45, | |
| "eval_loss": 2.144550323486328, | |
| "eval_runtime": 15.6946, | |
| "eval_samples_per_second": 127.432, | |
| "eval_steps_per_second": 2.039, | |
| "step": 382000 | |
| }, | |
| { | |
| "epoch": 2.45, | |
| "learning_rate": 9.14195982043798e-06, | |
| "loss": 1.9311, | |
| "step": 383000 | |
| }, | |
| { | |
| "epoch": 2.45, | |
| "eval_loss": 2.1890273094177246, | |
| "eval_runtime": 15.377, | |
| "eval_samples_per_second": 130.065, | |
| "eval_steps_per_second": 2.081, | |
| "step": 383000 | |
| }, | |
| { | |
| "epoch": 2.46, | |
| "learning_rate": 9.035280864355574e-06, | |
| "loss": 1.9305, | |
| "step": 384000 | |
| }, | |
| { | |
| "epoch": 2.46, | |
| "eval_loss": 2.166837692260742, | |
| "eval_runtime": 15.3262, | |
| "eval_samples_per_second": 130.496, | |
| "eval_steps_per_second": 2.088, | |
| "step": 384000 | |
| }, | |
| { | |
| "epoch": 2.46, | |
| "learning_rate": 8.928601908273167e-06, | |
| "loss": 1.9237, | |
| "step": 385000 | |
| }, | |
| { | |
| "epoch": 2.46, | |
| "eval_loss": 2.0922629833221436, | |
| "eval_runtime": 15.1049, | |
| "eval_samples_per_second": 132.408, | |
| "eval_steps_per_second": 2.119, | |
| "step": 385000 | |
| }, | |
| { | |
| "epoch": 2.47, | |
| "learning_rate": 8.82192295219076e-06, | |
| "loss": 1.9256, | |
| "step": 386000 | |
| }, | |
| { | |
| "epoch": 2.47, | |
| "eval_loss": 2.1387295722961426, | |
| "eval_runtime": 15.8611, | |
| "eval_samples_per_second": 126.095, | |
| "eval_steps_per_second": 2.018, | |
| "step": 386000 | |
| }, | |
| { | |
| "epoch": 2.48, | |
| "learning_rate": 8.715243996108352e-06, | |
| "loss": 1.9339, | |
| "step": 387000 | |
| }, | |
| { | |
| "epoch": 2.48, | |
| "eval_loss": 2.160367250442505, | |
| "eval_runtime": 15.4895, | |
| "eval_samples_per_second": 129.12, | |
| "eval_steps_per_second": 2.066, | |
| "step": 387000 | |
| }, | |
| { | |
| "epoch": 2.48, | |
| "learning_rate": 8.608565040025944e-06, | |
| "loss": 1.925, | |
| "step": 388000 | |
| }, | |
| { | |
| "epoch": 2.48, | |
| "eval_loss": 2.1711387634277344, | |
| "eval_runtime": 15.39, | |
| "eval_samples_per_second": 129.955, | |
| "eval_steps_per_second": 2.079, | |
| "step": 388000 | |
| }, | |
| { | |
| "epoch": 2.49, | |
| "learning_rate": 8.501886083943537e-06, | |
| "loss": 1.9185, | |
| "step": 389000 | |
| }, | |
| { | |
| "epoch": 2.49, | |
| "eval_loss": 2.1491212844848633, | |
| "eval_runtime": 15.607, | |
| "eval_samples_per_second": 128.147, | |
| "eval_steps_per_second": 2.05, | |
| "step": 389000 | |
| }, | |
| { | |
| "epoch": 2.5, | |
| "learning_rate": 8.395207127861129e-06, | |
| "loss": 1.9214, | |
| "step": 390000 | |
| }, | |
| { | |
| "epoch": 2.5, | |
| "eval_loss": 2.1444971561431885, | |
| "eval_runtime": 15.4605, | |
| "eval_samples_per_second": 129.362, | |
| "eval_steps_per_second": 2.07, | |
| "step": 390000 | |
| }, | |
| { | |
| "epoch": 2.5, | |
| "learning_rate": 8.288528171778722e-06, | |
| "loss": 1.928, | |
| "step": 391000 | |
| }, | |
| { | |
| "epoch": 2.5, | |
| "eval_loss": 2.1359145641326904, | |
| "eval_runtime": 15.6126, | |
| "eval_samples_per_second": 128.102, | |
| "eval_steps_per_second": 2.05, | |
| "step": 391000 | |
| }, | |
| { | |
| "epoch": 2.51, | |
| "learning_rate": 8.181849215696314e-06, | |
| "loss": 1.9243, | |
| "step": 392000 | |
| }, | |
| { | |
| "epoch": 2.51, | |
| "eval_loss": 2.156005620956421, | |
| "eval_runtime": 15.7238, | |
| "eval_samples_per_second": 127.196, | |
| "eval_steps_per_second": 2.035, | |
| "step": 392000 | |
| }, | |
| { | |
| "epoch": 2.52, | |
| "learning_rate": 8.075170259613907e-06, | |
| "loss": 1.9096, | |
| "step": 393000 | |
| }, | |
| { | |
| "epoch": 2.52, | |
| "eval_loss": 2.1110196113586426, | |
| "eval_runtime": 15.176, | |
| "eval_samples_per_second": 131.787, | |
| "eval_steps_per_second": 2.109, | |
| "step": 393000 | |
| }, | |
| { | |
| "epoch": 2.52, | |
| "learning_rate": 7.9684913035315e-06, | |
| "loss": 1.9254, | |
| "step": 394000 | |
| }, | |
| { | |
| "epoch": 2.52, | |
| "eval_loss": 2.135141611099243, | |
| "eval_runtime": 15.4036, | |
| "eval_samples_per_second": 129.84, | |
| "eval_steps_per_second": 2.077, | |
| "step": 394000 | |
| }, | |
| { | |
| "epoch": 2.53, | |
| "learning_rate": 7.861812347449094e-06, | |
| "loss": 1.9214, | |
| "step": 395000 | |
| }, | |
| { | |
| "epoch": 2.53, | |
| "eval_loss": 2.1366610527038574, | |
| "eval_runtime": 15.7136, | |
| "eval_samples_per_second": 127.279, | |
| "eval_steps_per_second": 2.036, | |
| "step": 395000 | |
| }, | |
| { | |
| "epoch": 2.53, | |
| "learning_rate": 7.755133391366686e-06, | |
| "loss": 1.9229, | |
| "step": 396000 | |
| }, | |
| { | |
| "epoch": 2.53, | |
| "eval_loss": 2.1293559074401855, | |
| "eval_runtime": 15.3708, | |
| "eval_samples_per_second": 130.117, | |
| "eval_steps_per_second": 2.082, | |
| "step": 396000 | |
| }, | |
| { | |
| "epoch": 2.54, | |
| "learning_rate": 7.64845443528428e-06, | |
| "loss": 1.9166, | |
| "step": 397000 | |
| }, | |
| { | |
| "epoch": 2.54, | |
| "eval_loss": 2.1272215843200684, | |
| "eval_runtime": 15.7644, | |
| "eval_samples_per_second": 126.868, | |
| "eval_steps_per_second": 2.03, | |
| "step": 397000 | |
| }, | |
| { | |
| "epoch": 2.55, | |
| "learning_rate": 7.541775479201871e-06, | |
| "loss": 1.9152, | |
| "step": 398000 | |
| }, | |
| { | |
| "epoch": 2.55, | |
| "eval_loss": 2.1080117225646973, | |
| "eval_runtime": 15.3816, | |
| "eval_samples_per_second": 130.026, | |
| "eval_steps_per_second": 2.08, | |
| "step": 398000 | |
| }, | |
| { | |
| "epoch": 2.55, | |
| "learning_rate": 7.435096523119464e-06, | |
| "loss": 1.9138, | |
| "step": 399000 | |
| }, | |
| { | |
| "epoch": 2.55, | |
| "eval_loss": 2.156583309173584, | |
| "eval_runtime": 15.5093, | |
| "eval_samples_per_second": 128.955, | |
| "eval_steps_per_second": 2.063, | |
| "step": 399000 | |
| }, | |
| { | |
| "epoch": 2.56, | |
| "learning_rate": 7.328417567037056e-06, | |
| "loss": 1.9193, | |
| "step": 400000 | |
| }, | |
| { | |
| "epoch": 2.56, | |
| "eval_loss": 2.1462528705596924, | |
| "eval_runtime": 15.6345, | |
| "eval_samples_per_second": 127.923, | |
| "eval_steps_per_second": 2.047, | |
| "step": 400000 | |
| }, | |
| { | |
| "epoch": 2.57, | |
| "learning_rate": 7.221738610954649e-06, | |
| "loss": 1.9216, | |
| "step": 401000 | |
| }, | |
| { | |
| "epoch": 2.57, | |
| "eval_loss": 2.1311724185943604, | |
| "eval_runtime": 15.5304, | |
| "eval_samples_per_second": 128.78, | |
| "eval_steps_per_second": 2.06, | |
| "step": 401000 | |
| }, | |
| { | |
| "epoch": 2.57, | |
| "learning_rate": 7.115059654872242e-06, | |
| "loss": 1.9171, | |
| "step": 402000 | |
| }, | |
| { | |
| "epoch": 2.57, | |
| "eval_loss": 2.1334073543548584, | |
| "eval_runtime": 15.6034, | |
| "eval_samples_per_second": 128.177, | |
| "eval_steps_per_second": 2.051, | |
| "step": 402000 | |
| }, | |
| { | |
| "epoch": 2.58, | |
| "learning_rate": 7.008380698789835e-06, | |
| "loss": 1.9148, | |
| "step": 403000 | |
| }, | |
| { | |
| "epoch": 2.58, | |
| "eval_loss": 2.1480307579040527, | |
| "eval_runtime": 15.4786, | |
| "eval_samples_per_second": 129.211, | |
| "eval_steps_per_second": 2.067, | |
| "step": 403000 | |
| }, | |
| { | |
| "epoch": 2.59, | |
| "learning_rate": 6.901701742707427e-06, | |
| "loss": 1.9204, | |
| "step": 404000 | |
| }, | |
| { | |
| "epoch": 2.59, | |
| "eval_loss": 2.1620922088623047, | |
| "eval_runtime": 17.9933, | |
| "eval_samples_per_second": 111.152, | |
| "eval_steps_per_second": 1.778, | |
| "step": 404000 | |
| }, | |
| { | |
| "epoch": 2.59, | |
| "learning_rate": 6.79502278662502e-06, | |
| "loss": 1.9163, | |
| "step": 405000 | |
| }, | |
| { | |
| "epoch": 2.59, | |
| "eval_loss": 2.1261579990386963, | |
| "eval_runtime": 15.7916, | |
| "eval_samples_per_second": 126.65, | |
| "eval_steps_per_second": 2.026, | |
| "step": 405000 | |
| }, | |
| { | |
| "epoch": 2.6, | |
| "learning_rate": 6.688343830542612e-06, | |
| "loss": 1.9147, | |
| "step": 406000 | |
| }, | |
| { | |
| "epoch": 2.6, | |
| "eval_loss": 2.134714365005493, | |
| "eval_runtime": 15.563, | |
| "eval_samples_per_second": 128.51, | |
| "eval_steps_per_second": 2.056, | |
| "step": 406000 | |
| }, | |
| { | |
| "epoch": 2.61, | |
| "learning_rate": 6.581664874460204e-06, | |
| "loss": 1.9107, | |
| "step": 407000 | |
| }, | |
| { | |
| "epoch": 2.61, | |
| "eval_loss": 2.094939947128296, | |
| "eval_runtime": 15.3395, | |
| "eval_samples_per_second": 130.383, | |
| "eval_steps_per_second": 2.086, | |
| "step": 407000 | |
| }, | |
| { | |
| "epoch": 2.61, | |
| "learning_rate": 6.474985918377798e-06, | |
| "loss": 1.9185, | |
| "step": 408000 | |
| }, | |
| { | |
| "epoch": 2.61, | |
| "eval_loss": 2.1135287284851074, | |
| "eval_runtime": 15.2587, | |
| "eval_samples_per_second": 131.072, | |
| "eval_steps_per_second": 2.097, | |
| "step": 408000 | |
| }, | |
| { | |
| "epoch": 2.62, | |
| "learning_rate": 6.368306962295389e-06, | |
| "loss": 1.9134, | |
| "step": 409000 | |
| }, | |
| { | |
| "epoch": 2.62, | |
| "eval_loss": 2.1412642002105713, | |
| "eval_runtime": 15.702, | |
| "eval_samples_per_second": 127.372, | |
| "eval_steps_per_second": 2.038, | |
| "step": 409000 | |
| }, | |
| { | |
| "epoch": 2.62, | |
| "learning_rate": 6.261628006212983e-06, | |
| "loss": 1.9144, | |
| "step": 410000 | |
| }, | |
| { | |
| "epoch": 2.62, | |
| "eval_loss": 2.1682534217834473, | |
| "eval_runtime": 15.4072, | |
| "eval_samples_per_second": 129.81, | |
| "eval_steps_per_second": 2.077, | |
| "step": 410000 | |
| }, | |
| { | |
| "epoch": 2.63, | |
| "learning_rate": 6.154949050130575e-06, | |
| "loss": 1.9086, | |
| "step": 411000 | |
| }, | |
| { | |
| "epoch": 2.63, | |
| "eval_loss": 2.141894578933716, | |
| "eval_runtime": 15.208, | |
| "eval_samples_per_second": 131.51, | |
| "eval_steps_per_second": 2.104, | |
| "step": 411000 | |
| }, | |
| { | |
| "epoch": 2.64, | |
| "learning_rate": 6.0482700940481686e-06, | |
| "loss": 1.9101, | |
| "step": 412000 | |
| }, | |
| { | |
| "epoch": 2.64, | |
| "eval_loss": 2.1342506408691406, | |
| "eval_runtime": 15.2405, | |
| "eval_samples_per_second": 131.229, | |
| "eval_steps_per_second": 2.1, | |
| "step": 412000 | |
| }, | |
| { | |
| "epoch": 2.64, | |
| "learning_rate": 5.941591137965761e-06, | |
| "loss": 1.9086, | |
| "step": 413000 | |
| }, | |
| { | |
| "epoch": 2.64, | |
| "eval_loss": 2.097320318222046, | |
| "eval_runtime": 15.5657, | |
| "eval_samples_per_second": 128.488, | |
| "eval_steps_per_second": 2.056, | |
| "step": 413000 | |
| }, | |
| { | |
| "epoch": 2.65, | |
| "learning_rate": 5.8349121818833536e-06, | |
| "loss": 1.9089, | |
| "step": 414000 | |
| }, | |
| { | |
| "epoch": 2.65, | |
| "eval_loss": 2.1229472160339355, | |
| "eval_runtime": 15.1808, | |
| "eval_samples_per_second": 131.746, | |
| "eval_steps_per_second": 2.108, | |
| "step": 414000 | |
| }, | |
| { | |
| "epoch": 2.66, | |
| "learning_rate": 5.728233225800946e-06, | |
| "loss": 1.915, | |
| "step": 415000 | |
| }, | |
| { | |
| "epoch": 2.66, | |
| "eval_loss": 2.1642491817474365, | |
| "eval_runtime": 15.6522, | |
| "eval_samples_per_second": 127.777, | |
| "eval_steps_per_second": 2.044, | |
| "step": 415000 | |
| }, | |
| { | |
| "epoch": 2.66, | |
| "learning_rate": 5.621554269718539e-06, | |
| "loss": 1.914, | |
| "step": 416000 | |
| }, | |
| { | |
| "epoch": 2.66, | |
| "eval_loss": 2.1208455562591553, | |
| "eval_runtime": 15.453, | |
| "eval_samples_per_second": 129.425, | |
| "eval_steps_per_second": 2.071, | |
| "step": 416000 | |
| }, | |
| { | |
| "epoch": 2.67, | |
| "learning_rate": 5.514875313636131e-06, | |
| "loss": 1.9031, | |
| "step": 417000 | |
| }, | |
| { | |
| "epoch": 2.67, | |
| "eval_loss": 2.103487253189087, | |
| "eval_runtime": 15.4394, | |
| "eval_samples_per_second": 129.539, | |
| "eval_steps_per_second": 2.073, | |
| "step": 417000 | |
| }, | |
| { | |
| "epoch": 2.68, | |
| "learning_rate": 5.408196357553724e-06, | |
| "loss": 1.9015, | |
| "step": 418000 | |
| }, | |
| { | |
| "epoch": 2.68, | |
| "eval_loss": 2.1312220096588135, | |
| "eval_runtime": 15.3068, | |
| "eval_samples_per_second": 130.661, | |
| "eval_steps_per_second": 2.091, | |
| "step": 418000 | |
| }, | |
| { | |
| "epoch": 2.68, | |
| "learning_rate": 5.301517401471316e-06, | |
| "loss": 1.9069, | |
| "step": 419000 | |
| }, | |
| { | |
| "epoch": 2.68, | |
| "eval_loss": 2.1444790363311768, | |
| "eval_runtime": 15.4574, | |
| "eval_samples_per_second": 129.388, | |
| "eval_steps_per_second": 2.07, | |
| "step": 419000 | |
| }, | |
| { | |
| "epoch": 2.69, | |
| "learning_rate": 5.194838445388909e-06, | |
| "loss": 1.9016, | |
| "step": 420000 | |
| }, | |
| { | |
| "epoch": 2.69, | |
| "eval_loss": 2.1105127334594727, | |
| "eval_runtime": 15.3042, | |
| "eval_samples_per_second": 130.683, | |
| "eval_steps_per_second": 2.091, | |
| "step": 420000 | |
| }, | |
| { | |
| "epoch": 2.69, | |
| "learning_rate": 5.088159489306501e-06, | |
| "loss": 1.8882, | |
| "step": 421000 | |
| }, | |
| { | |
| "epoch": 2.69, | |
| "eval_loss": 2.151632785797119, | |
| "eval_runtime": 15.8977, | |
| "eval_samples_per_second": 125.805, | |
| "eval_steps_per_second": 2.013, | |
| "step": 421000 | |
| }, | |
| { | |
| "epoch": 2.7, | |
| "learning_rate": 4.9814805332240945e-06, | |
| "loss": 1.9158, | |
| "step": 422000 | |
| }, | |
| { | |
| "epoch": 2.7, | |
| "eval_loss": 2.1242105960845947, | |
| "eval_runtime": 15.298, | |
| "eval_samples_per_second": 130.736, | |
| "eval_steps_per_second": 2.092, | |
| "step": 422000 | |
| }, | |
| { | |
| "epoch": 2.71, | |
| "learning_rate": 4.874801577141687e-06, | |
| "loss": 1.9136, | |
| "step": 423000 | |
| }, | |
| { | |
| "epoch": 2.71, | |
| "eval_loss": 2.1192123889923096, | |
| "eval_runtime": 15.1175, | |
| "eval_samples_per_second": 132.297, | |
| "eval_steps_per_second": 2.117, | |
| "step": 423000 | |
| }, | |
| { | |
| "epoch": 2.71, | |
| "learning_rate": 4.7681226210592795e-06, | |
| "loss": 1.916, | |
| "step": 424000 | |
| }, | |
| { | |
| "epoch": 2.71, | |
| "eval_loss": 2.1400868892669678, | |
| "eval_runtime": 15.3165, | |
| "eval_samples_per_second": 130.578, | |
| "eval_steps_per_second": 2.089, | |
| "step": 424000 | |
| }, | |
| { | |
| "epoch": 2.72, | |
| "learning_rate": 4.661443664976872e-06, | |
| "loss": 1.8986, | |
| "step": 425000 | |
| }, | |
| { | |
| "epoch": 2.72, | |
| "eval_loss": 2.158984899520874, | |
| "eval_runtime": 15.2786, | |
| "eval_samples_per_second": 130.902, | |
| "eval_steps_per_second": 2.094, | |
| "step": 425000 | |
| }, | |
| { | |
| "epoch": 2.73, | |
| "learning_rate": 4.5547647088944646e-06, | |
| "loss": 1.9046, | |
| "step": 426000 | |
| }, | |
| { | |
| "epoch": 2.73, | |
| "eval_loss": 2.1008715629577637, | |
| "eval_runtime": 15.3482, | |
| "eval_samples_per_second": 130.309, | |
| "eval_steps_per_second": 2.085, | |
| "step": 426000 | |
| }, | |
| { | |
| "epoch": 2.73, | |
| "learning_rate": 4.448085752812058e-06, | |
| "loss": 1.9019, | |
| "step": 427000 | |
| }, | |
| { | |
| "epoch": 2.73, | |
| "eval_loss": 2.1234779357910156, | |
| "eval_runtime": 15.3947, | |
| "eval_samples_per_second": 129.915, | |
| "eval_steps_per_second": 2.079, | |
| "step": 427000 | |
| }, | |
| { | |
| "epoch": 2.74, | |
| "learning_rate": 4.34140679672965e-06, | |
| "loss": 1.9075, | |
| "step": 428000 | |
| }, | |
| { | |
| "epoch": 2.74, | |
| "eval_loss": 2.1445555686950684, | |
| "eval_runtime": 15.263, | |
| "eval_samples_per_second": 131.036, | |
| "eval_steps_per_second": 2.097, | |
| "step": 428000 | |
| }, | |
| { | |
| "epoch": 2.75, | |
| "learning_rate": 4.234727840647243e-06, | |
| "loss": 1.9023, | |
| "step": 429000 | |
| }, | |
| { | |
| "epoch": 2.75, | |
| "eval_loss": 2.1059927940368652, | |
| "eval_runtime": 15.6241, | |
| "eval_samples_per_second": 128.007, | |
| "eval_steps_per_second": 2.048, | |
| "step": 429000 | |
| }, | |
| { | |
| "epoch": 2.75, | |
| "learning_rate": 4.1280488845648354e-06, | |
| "loss": 1.9096, | |
| "step": 430000 | |
| }, | |
| { | |
| "epoch": 2.75, | |
| "eval_loss": 2.124612331390381, | |
| "eval_runtime": 15.4182, | |
| "eval_samples_per_second": 129.717, | |
| "eval_steps_per_second": 2.075, | |
| "step": 430000 | |
| }, | |
| { | |
| "epoch": 2.76, | |
| "learning_rate": 4.021369928482428e-06, | |
| "loss": 1.9021, | |
| "step": 431000 | |
| }, | |
| { | |
| "epoch": 2.76, | |
| "eval_loss": 2.1339197158813477, | |
| "eval_runtime": 15.3184, | |
| "eval_samples_per_second": 130.562, | |
| "eval_steps_per_second": 2.089, | |
| "step": 431000 | |
| }, | |
| { | |
| "epoch": 2.77, | |
| "learning_rate": 3.914690972400021e-06, | |
| "loss": 1.9051, | |
| "step": 432000 | |
| }, | |
| { | |
| "epoch": 2.77, | |
| "eval_loss": 2.150739908218384, | |
| "eval_runtime": 15.3685, | |
| "eval_samples_per_second": 130.137, | |
| "eval_steps_per_second": 2.082, | |
| "step": 432000 | |
| }, | |
| { | |
| "epoch": 2.77, | |
| "learning_rate": 3.808012016317614e-06, | |
| "loss": 1.8959, | |
| "step": 433000 | |
| }, | |
| { | |
| "epoch": 2.77, | |
| "eval_loss": 2.1340439319610596, | |
| "eval_runtime": 15.9351, | |
| "eval_samples_per_second": 125.509, | |
| "eval_steps_per_second": 2.008, | |
| "step": 433000 | |
| }, | |
| { | |
| "epoch": 2.78, | |
| "learning_rate": 3.7013330602352055e-06, | |
| "loss": 1.8924, | |
| "step": 434000 | |
| }, | |
| { | |
| "epoch": 2.78, | |
| "eval_loss": 2.1609554290771484, | |
| "eval_runtime": 15.2114, | |
| "eval_samples_per_second": 131.48, | |
| "eval_steps_per_second": 2.104, | |
| "step": 434000 | |
| }, | |
| { | |
| "epoch": 2.78, | |
| "learning_rate": 3.5946541041527984e-06, | |
| "loss": 1.9091, | |
| "step": 435000 | |
| }, | |
| { | |
| "epoch": 2.78, | |
| "eval_loss": 2.147794008255005, | |
| "eval_runtime": 15.5411, | |
| "eval_samples_per_second": 128.691, | |
| "eval_steps_per_second": 2.059, | |
| "step": 435000 | |
| }, | |
| { | |
| "epoch": 2.79, | |
| "learning_rate": 3.487975148070391e-06, | |
| "loss": 1.8908, | |
| "step": 436000 | |
| }, | |
| { | |
| "epoch": 2.79, | |
| "eval_loss": 2.100537061691284, | |
| "eval_runtime": 15.6967, | |
| "eval_samples_per_second": 127.415, | |
| "eval_steps_per_second": 2.039, | |
| "step": 436000 | |
| }, | |
| { | |
| "epoch": 2.8, | |
| "learning_rate": 3.3812961919879834e-06, | |
| "loss": 1.8946, | |
| "step": 437000 | |
| }, | |
| { | |
| "epoch": 2.8, | |
| "eval_loss": 2.111453056335449, | |
| "eval_runtime": 15.3824, | |
| "eval_samples_per_second": 130.019, | |
| "eval_steps_per_second": 2.08, | |
| "step": 437000 | |
| }, | |
| { | |
| "epoch": 2.8, | |
| "learning_rate": 3.2746172359055764e-06, | |
| "loss": 1.8977, | |
| "step": 438000 | |
| }, | |
| { | |
| "epoch": 2.8, | |
| "eval_loss": 2.130976676940918, | |
| "eval_runtime": 15.1954, | |
| "eval_samples_per_second": 131.618, | |
| "eval_steps_per_second": 2.106, | |
| "step": 438000 | |
| }, | |
| { | |
| "epoch": 2.81, | |
| "learning_rate": 3.167938279823169e-06, | |
| "loss": 1.9021, | |
| "step": 439000 | |
| }, | |
| { | |
| "epoch": 2.81, | |
| "eval_loss": 2.1252684593200684, | |
| "eval_runtime": 15.3946, | |
| "eval_samples_per_second": 129.916, | |
| "eval_steps_per_second": 2.079, | |
| "step": 439000 | |
| }, | |
| { | |
| "epoch": 2.82, | |
| "learning_rate": 3.061259323740762e-06, | |
| "loss": 1.9019, | |
| "step": 440000 | |
| }, | |
| { | |
| "epoch": 2.82, | |
| "eval_loss": 2.1282765865325928, | |
| "eval_runtime": 15.6319, | |
| "eval_samples_per_second": 127.943, | |
| "eval_steps_per_second": 2.047, | |
| "step": 440000 | |
| }, | |
| { | |
| "epoch": 2.82, | |
| "learning_rate": 2.9545803676583543e-06, | |
| "loss": 1.8947, | |
| "step": 441000 | |
| }, | |
| { | |
| "epoch": 2.82, | |
| "eval_loss": 2.1524507999420166, | |
| "eval_runtime": 15.3337, | |
| "eval_samples_per_second": 130.432, | |
| "eval_steps_per_second": 2.087, | |
| "step": 441000 | |
| }, | |
| { | |
| "epoch": 2.83, | |
| "learning_rate": 2.847901411575947e-06, | |
| "loss": 1.8854, | |
| "step": 442000 | |
| }, | |
| { | |
| "epoch": 2.83, | |
| "eval_loss": 2.1064517498016357, | |
| "eval_runtime": 15.2656, | |
| "eval_samples_per_second": 131.013, | |
| "eval_steps_per_second": 2.096, | |
| "step": 442000 | |
| }, | |
| { | |
| "epoch": 2.84, | |
| "learning_rate": 2.7412224554935398e-06, | |
| "loss": 1.9007, | |
| "step": 443000 | |
| }, | |
| { | |
| "epoch": 2.84, | |
| "eval_loss": 2.0694828033447266, | |
| "eval_runtime": 15.8869, | |
| "eval_samples_per_second": 125.89, | |
| "eval_steps_per_second": 2.014, | |
| "step": 443000 | |
| }, | |
| { | |
| "epoch": 2.84, | |
| "learning_rate": 2.6345434994111323e-06, | |
| "loss": 1.8981, | |
| "step": 444000 | |
| }, | |
| { | |
| "epoch": 2.84, | |
| "eval_loss": 2.1273715496063232, | |
| "eval_runtime": 15.1985, | |
| "eval_samples_per_second": 131.592, | |
| "eval_steps_per_second": 2.105, | |
| "step": 444000 | |
| }, | |
| { | |
| "epoch": 2.85, | |
| "learning_rate": 2.527864543328725e-06, | |
| "loss": 1.8872, | |
| "step": 445000 | |
| }, | |
| { | |
| "epoch": 2.85, | |
| "eval_loss": 2.1042518615722656, | |
| "eval_runtime": 15.3793, | |
| "eval_samples_per_second": 130.045, | |
| "eval_steps_per_second": 2.081, | |
| "step": 445000 | |
| }, | |
| { | |
| "epoch": 2.85, | |
| "learning_rate": 2.4211855872463177e-06, | |
| "loss": 1.8957, | |
| "step": 446000 | |
| }, | |
| { | |
| "epoch": 2.85, | |
| "eval_loss": 2.0750997066497803, | |
| "eval_runtime": 15.5989, | |
| "eval_samples_per_second": 128.214, | |
| "eval_steps_per_second": 2.051, | |
| "step": 446000 | |
| }, | |
| { | |
| "epoch": 2.86, | |
| "learning_rate": 2.3145066311639102e-06, | |
| "loss": 1.9031, | |
| "step": 447000 | |
| }, | |
| { | |
| "epoch": 2.86, | |
| "eval_loss": 2.127918004989624, | |
| "eval_runtime": 15.6223, | |
| "eval_samples_per_second": 128.022, | |
| "eval_steps_per_second": 2.048, | |
| "step": 447000 | |
| }, | |
| { | |
| "epoch": 2.87, | |
| "learning_rate": 2.2078276750815028e-06, | |
| "loss": 1.9001, | |
| "step": 448000 | |
| }, | |
| { | |
| "epoch": 2.87, | |
| "eval_loss": 2.1019787788391113, | |
| "eval_runtime": 15.4843, | |
| "eval_samples_per_second": 129.163, | |
| "eval_steps_per_second": 2.067, | |
| "step": 448000 | |
| }, | |
| { | |
| "epoch": 2.87, | |
| "learning_rate": 2.1011487189990953e-06, | |
| "loss": 1.8964, | |
| "step": 449000 | |
| }, | |
| { | |
| "epoch": 2.87, | |
| "eval_loss": 2.0935049057006836, | |
| "eval_runtime": 15.5998, | |
| "eval_samples_per_second": 128.206, | |
| "eval_steps_per_second": 2.051, | |
| "step": 449000 | |
| }, | |
| { | |
| "epoch": 2.88, | |
| "learning_rate": 1.994469762916688e-06, | |
| "loss": 1.9003, | |
| "step": 450000 | |
| }, | |
| { | |
| "epoch": 2.88, | |
| "eval_loss": 2.1466352939605713, | |
| "eval_runtime": 15.1432, | |
| "eval_samples_per_second": 132.072, | |
| "eval_steps_per_second": 2.113, | |
| "step": 450000 | |
| }, | |
| { | |
| "epoch": 2.89, | |
| "learning_rate": 1.8877908068342807e-06, | |
| "loss": 1.9041, | |
| "step": 451000 | |
| }, | |
| { | |
| "epoch": 2.89, | |
| "eval_loss": 2.1213934421539307, | |
| "eval_runtime": 15.5486, | |
| "eval_samples_per_second": 128.629, | |
| "eval_steps_per_second": 2.058, | |
| "step": 451000 | |
| }, | |
| { | |
| "epoch": 2.89, | |
| "learning_rate": 1.7811118507518734e-06, | |
| "loss": 1.8972, | |
| "step": 452000 | |
| }, | |
| { | |
| "epoch": 2.89, | |
| "eval_loss": 2.139911651611328, | |
| "eval_runtime": 17.4254, | |
| "eval_samples_per_second": 114.775, | |
| "eval_steps_per_second": 1.836, | |
| "step": 452000 | |
| }, | |
| { | |
| "epoch": 2.9, | |
| "learning_rate": 1.674432894669466e-06, | |
| "loss": 1.9001, | |
| "step": 453000 | |
| }, | |
| { | |
| "epoch": 2.9, | |
| "eval_loss": 2.1135449409484863, | |
| "eval_runtime": 15.3928, | |
| "eval_samples_per_second": 129.931, | |
| "eval_steps_per_second": 2.079, | |
| "step": 453000 | |
| }, | |
| { | |
| "epoch": 2.91, | |
| "learning_rate": 1.5677539385870587e-06, | |
| "loss": 1.9034, | |
| "step": 454000 | |
| }, | |
| { | |
| "epoch": 2.91, | |
| "eval_loss": 2.0974974632263184, | |
| "eval_runtime": 15.5392, | |
| "eval_samples_per_second": 128.707, | |
| "eval_steps_per_second": 2.059, | |
| "step": 454000 | |
| }, | |
| { | |
| "epoch": 2.91, | |
| "learning_rate": 1.4610749825046512e-06, | |
| "loss": 1.88, | |
| "step": 455000 | |
| }, | |
| { | |
| "epoch": 2.91, | |
| "eval_loss": 2.086946725845337, | |
| "eval_runtime": 15.3909, | |
| "eval_samples_per_second": 129.947, | |
| "eval_steps_per_second": 2.079, | |
| "step": 455000 | |
| }, | |
| { | |
| "epoch": 2.92, | |
| "learning_rate": 1.354396026422244e-06, | |
| "loss": 1.894, | |
| "step": 456000 | |
| }, | |
| { | |
| "epoch": 2.92, | |
| "eval_loss": 2.0814855098724365, | |
| "eval_runtime": 16.0281, | |
| "eval_samples_per_second": 124.781, | |
| "eval_steps_per_second": 1.996, | |
| "step": 456000 | |
| }, | |
| { | |
| "epoch": 2.93, | |
| "learning_rate": 1.2477170703398366e-06, | |
| "loss": 1.8956, | |
| "step": 457000 | |
| }, | |
| { | |
| "epoch": 2.93, | |
| "eval_loss": 2.1207478046417236, | |
| "eval_runtime": 16.265, | |
| "eval_samples_per_second": 122.964, | |
| "eval_steps_per_second": 1.967, | |
| "step": 457000 | |
| }, | |
| { | |
| "epoch": 2.93, | |
| "learning_rate": 1.1410381142574291e-06, | |
| "loss": 1.8882, | |
| "step": 458000 | |
| }, | |
| { | |
| "epoch": 2.93, | |
| "eval_loss": 2.1136324405670166, | |
| "eval_runtime": 15.2771, | |
| "eval_samples_per_second": 130.915, | |
| "eval_steps_per_second": 2.095, | |
| "step": 458000 | |
| }, | |
| { | |
| "epoch": 2.94, | |
| "learning_rate": 1.0343591581750219e-06, | |
| "loss": 1.8924, | |
| "step": 459000 | |
| }, | |
| { | |
| "epoch": 2.94, | |
| "eval_loss": 2.137352466583252, | |
| "eval_runtime": 15.981, | |
| "eval_samples_per_second": 125.149, | |
| "eval_steps_per_second": 2.002, | |
| "step": 459000 | |
| }, | |
| { | |
| "epoch": 2.94, | |
| "learning_rate": 9.276802020926144e-07, | |
| "loss": 1.8953, | |
| "step": 460000 | |
| }, | |
| { | |
| "epoch": 2.94, | |
| "eval_loss": 2.1012661457061768, | |
| "eval_runtime": 15.3369, | |
| "eval_samples_per_second": 130.404, | |
| "eval_steps_per_second": 2.086, | |
| "step": 460000 | |
| }, | |
| { | |
| "epoch": 2.95, | |
| "learning_rate": 8.210012460102071e-07, | |
| "loss": 1.893, | |
| "step": 461000 | |
| }, | |
| { | |
| "epoch": 2.95, | |
| "eval_loss": 2.135178804397583, | |
| "eval_runtime": 15.8046, | |
| "eval_samples_per_second": 126.546, | |
| "eval_steps_per_second": 2.025, | |
| "step": 461000 | |
| }, | |
| { | |
| "epoch": 2.96, | |
| "learning_rate": 7.143222899277997e-07, | |
| "loss": 1.8903, | |
| "step": 462000 | |
| }, | |
| { | |
| "epoch": 2.96, | |
| "eval_loss": 2.1333072185516357, | |
| "eval_runtime": 15.5282, | |
| "eval_samples_per_second": 128.798, | |
| "eval_steps_per_second": 2.061, | |
| "step": 462000 | |
| }, | |
| { | |
| "epoch": 2.96, | |
| "learning_rate": 6.076433338453923e-07, | |
| "loss": 1.8895, | |
| "step": 463000 | |
| }, | |
| { | |
| "epoch": 2.96, | |
| "eval_loss": 2.1294093132019043, | |
| "eval_runtime": 15.3716, | |
| "eval_samples_per_second": 130.11, | |
| "eval_steps_per_second": 2.082, | |
| "step": 463000 | |
| }, | |
| { | |
| "epoch": 2.97, | |
| "learning_rate": 5.009643777629849e-07, | |
| "loss": 1.8939, | |
| "step": 464000 | |
| }, | |
| { | |
| "epoch": 2.97, | |
| "eval_loss": 2.1235413551330566, | |
| "eval_runtime": 15.3293, | |
| "eval_samples_per_second": 130.469, | |
| "eval_steps_per_second": 2.088, | |
| "step": 464000 | |
| }, | |
| { | |
| "epoch": 2.98, | |
| "learning_rate": 3.9428542168057766e-07, | |
| "loss": 1.8915, | |
| "step": 465000 | |
| }, | |
| { | |
| "epoch": 2.98, | |
| "eval_loss": 2.0933895111083984, | |
| "eval_runtime": 15.9617, | |
| "eval_samples_per_second": 125.3, | |
| "eval_steps_per_second": 2.005, | |
| "step": 465000 | |
| }, | |
| { | |
| "epoch": 2.98, | |
| "learning_rate": 2.8760646559817023e-07, | |
| "loss": 1.8884, | |
| "step": 466000 | |
| }, | |
| { | |
| "epoch": 2.98, | |
| "eval_loss": 2.1353940963745117, | |
| "eval_runtime": 15.6819, | |
| "eval_samples_per_second": 127.536, | |
| "eval_steps_per_second": 2.041, | |
| "step": 466000 | |
| }, | |
| { | |
| "epoch": 2.99, | |
| "learning_rate": 1.809275095157629e-07, | |
| "loss": 1.8932, | |
| "step": 467000 | |
| }, | |
| { | |
| "epoch": 2.99, | |
| "eval_loss": 2.1101338863372803, | |
| "eval_runtime": 15.545, | |
| "eval_samples_per_second": 128.659, | |
| "eval_steps_per_second": 2.059, | |
| "step": 467000 | |
| }, | |
| { | |
| "epoch": 3.0, | |
| "learning_rate": 7.424855343335553e-08, | |
| "loss": 1.9, | |
| "step": 468000 | |
| }, | |
| { | |
| "epoch": 3.0, | |
| "eval_loss": 2.130716562271118, | |
| "eval_runtime": 15.4114, | |
| "eval_samples_per_second": 129.774, | |
| "eval_steps_per_second": 2.076, | |
| "step": 468000 | |
| }, | |
| { | |
| "epoch": 3.0, | |
| "step": 468696, | |
| "total_flos": 6.219491681834838e+18, | |
| "train_loss": 0.6429893864398178, | |
| "train_runtime": 172266.1403, | |
| "train_samples_per_second": 174.128, | |
| "train_steps_per_second": 2.721 | |
| } | |
| ], | |
| "max_steps": 468696, | |
| "num_train_epochs": 3, | |
| "total_flos": 6.219491681834838e+18, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |