32k_test_dummy / trainer_log.jsonl
sedrickkeh's picture
Training in progress, epoch 3
26ab506 verified
{"current_steps": 1, "total_steps": 96, "loss": 0.5701, "lr": 1.0000000000000002e-06, "epoch": 0.03125, "percentage": 1.04, "elapsed_time": "0:00:12", "remaining_time": "0:20:29"}
{"current_steps": 2, "total_steps": 96, "loss": 0.6002, "lr": 2.0000000000000003e-06, "epoch": 0.0625, "percentage": 2.08, "elapsed_time": "0:00:15", "remaining_time": "0:12:24"}
{"current_steps": 3, "total_steps": 96, "loss": 0.578, "lr": 3e-06, "epoch": 0.09375, "percentage": 3.12, "elapsed_time": "0:00:18", "remaining_time": "0:09:24"}
{"current_steps": 4, "total_steps": 96, "loss": 0.5471, "lr": 4.000000000000001e-06, "epoch": 0.125, "percentage": 4.17, "elapsed_time": "0:00:20", "remaining_time": "0:07:53"}
{"current_steps": 5, "total_steps": 96, "loss": 0.6588, "lr": 5e-06, "epoch": 0.15625, "percentage": 5.21, "elapsed_time": "0:00:23", "remaining_time": "0:07:02"}
{"current_steps": 6, "total_steps": 96, "loss": 0.5074, "lr": 6e-06, "epoch": 0.1875, "percentage": 6.25, "elapsed_time": "0:00:25", "remaining_time": "0:06:22"}
{"current_steps": 7, "total_steps": 96, "loss": 0.484, "lr": 7e-06, "epoch": 0.21875, "percentage": 7.29, "elapsed_time": "0:00:27", "remaining_time": "0:05:54"}
{"current_steps": 8, "total_steps": 96, "loss": 0.6328, "lr": 8.000000000000001e-06, "epoch": 0.25, "percentage": 8.33, "elapsed_time": "0:00:30", "remaining_time": "0:05:32"}
{"current_steps": 9, "total_steps": 96, "loss": 0.47, "lr": 9e-06, "epoch": 0.28125, "percentage": 9.38, "elapsed_time": "0:00:32", "remaining_time": "0:05:14"}
{"current_steps": 10, "total_steps": 96, "loss": 0.4675, "lr": 1e-05, "epoch": 0.3125, "percentage": 10.42, "elapsed_time": "0:00:34", "remaining_time": "0:04:59"}
{"current_steps": 11, "total_steps": 96, "loss": 0.6794, "lr": 9.996664241851197e-06, "epoch": 0.34375, "percentage": 11.46, "elapsed_time": "0:00:37", "remaining_time": "0:04:47"}
{"current_steps": 12, "total_steps": 96, "loss": 0.4242, "lr": 9.986661418317759e-06, "epoch": 0.375, "percentage": 12.5, "elapsed_time": "0:00:39", "remaining_time": "0:04:36"}
{"current_steps": 13, "total_steps": 96, "loss": 0.4481, "lr": 9.970004876199731e-06, "epoch": 0.40625, "percentage": 13.54, "elapsed_time": "0:00:41", "remaining_time": "0:04:27"}
{"current_steps": 14, "total_steps": 96, "loss": 0.5388, "lr": 9.946716840375552e-06, "epoch": 0.4375, "percentage": 14.58, "elapsed_time": "0:00:44", "remaining_time": "0:04:18"}
{"current_steps": 15, "total_steps": 96, "loss": 0.4292, "lr": 9.91682838414733e-06, "epoch": 0.46875, "percentage": 15.62, "elapsed_time": "0:00:46", "remaining_time": "0:04:11"}
{"current_steps": 16, "total_steps": 96, "loss": 0.3977, "lr": 9.880379387779637e-06, "epoch": 0.5, "percentage": 16.67, "elapsed_time": "0:00:48", "remaining_time": "0:04:04"}
{"current_steps": 17, "total_steps": 96, "loss": 0.418, "lr": 9.837418485287126e-06, "epoch": 0.53125, "percentage": 17.71, "elapsed_time": "0:00:51", "remaining_time": "0:03:57"}
{"current_steps": 18, "total_steps": 96, "loss": 0.382, "lr": 9.78800299954203e-06, "epoch": 0.5625, "percentage": 18.75, "elapsed_time": "0:00:53", "remaining_time": "0:03:51"}
{"current_steps": 19, "total_steps": 96, "loss": 0.3844, "lr": 9.732198865788047e-06, "epoch": 0.59375, "percentage": 19.79, "elapsed_time": "0:00:55", "remaining_time": "0:03:46"}
{"current_steps": 20, "total_steps": 96, "loss": 0.3707, "lr": 9.670080543662742e-06, "epoch": 0.625, "percentage": 20.83, "elapsed_time": "0:00:58", "remaining_time": "0:03:41"}
{"current_steps": 21, "total_steps": 96, "loss": 0.5049, "lr": 9.601730917845798e-06, "epoch": 0.65625, "percentage": 21.88, "elapsed_time": "0:01:00", "remaining_time": "0:03:36"}
{"current_steps": 22, "total_steps": 96, "loss": 0.3874, "lr": 9.527241187465735e-06, "epoch": 0.6875, "percentage": 22.92, "elapsed_time": "0:01:02", "remaining_time": "0:03:31"}
{"current_steps": 23, "total_steps": 96, "loss": 0.3295, "lr": 9.446710744412595e-06, "epoch": 0.71875, "percentage": 23.96, "elapsed_time": "0:01:05", "remaining_time": "0:03:26"}
{"current_steps": 24, "total_steps": 96, "loss": 0.3313, "lr": 9.36024704071904e-06, "epoch": 0.75, "percentage": 25.0, "elapsed_time": "0:01:07", "remaining_time": "0:03:22"}
{"current_steps": 25, "total_steps": 96, "loss": 0.3649, "lr": 9.267965445186733e-06, "epoch": 0.78125, "percentage": 26.04, "elapsed_time": "0:01:09", "remaining_time": "0:03:18"}
{"current_steps": 26, "total_steps": 96, "loss": 0.4084, "lr": 9.16998908944939e-06, "epoch": 0.8125, "percentage": 27.08, "elapsed_time": "0:01:12", "remaining_time": "0:03:14"}
{"current_steps": 27, "total_steps": 96, "loss": 0.3463, "lr": 9.066448703677828e-06, "epoch": 0.84375, "percentage": 28.12, "elapsed_time": "0:01:14", "remaining_time": "0:03:10"}
{"current_steps": 28, "total_steps": 96, "loss": 0.4789, "lr": 8.957482442146271e-06, "epoch": 0.875, "percentage": 29.17, "elapsed_time": "0:01:16", "remaining_time": "0:03:06"}
{"current_steps": 29, "total_steps": 96, "loss": 0.4739, "lr": 8.843235698892661e-06, "epoch": 0.90625, "percentage": 30.21, "elapsed_time": "0:01:19", "remaining_time": "0:03:02"}
{"current_steps": 30, "total_steps": 96, "loss": 0.3864, "lr": 8.72386091371891e-06, "epoch": 0.9375, "percentage": 31.25, "elapsed_time": "0:01:21", "remaining_time": "0:02:59"}
{"current_steps": 31, "total_steps": 96, "loss": 0.3181, "lr": 8.599517368789981e-06, "epoch": 0.96875, "percentage": 32.29, "elapsed_time": "0:01:23", "remaining_time": "0:02:55"}
{"current_steps": 32, "total_steps": 96, "loss": 0.2821, "lr": 8.470370976103171e-06, "epoch": 1.0, "percentage": 33.33, "elapsed_time": "0:01:26", "remaining_time": "0:02:52"}
{"current_steps": 33, "total_steps": 96, "loss": 0.288, "lr": 8.336594056111197e-06, "epoch": 1.03125, "percentage": 34.38, "elapsed_time": "0:02:14", "remaining_time": "0:04:16"}
{"current_steps": 34, "total_steps": 96, "loss": 0.275, "lr": 8.198365107794457e-06, "epoch": 1.0625, "percentage": 35.42, "elapsed_time": "0:02:16", "remaining_time": "0:04:09"}
{"current_steps": 35, "total_steps": 96, "loss": 0.2725, "lr": 8.055868570489247e-06, "epoch": 1.09375, "percentage": 36.46, "elapsed_time": "0:02:20", "remaining_time": "0:04:04"}
{"current_steps": 36, "total_steps": 96, "loss": 0.2298, "lr": 7.909294577789765e-06, "epoch": 1.125, "percentage": 37.5, "elapsed_time": "0:02:22", "remaining_time": "0:03:57"}
{"current_steps": 37, "total_steps": 96, "loss": 0.2389, "lr": 7.75883870385223e-06, "epoch": 1.15625, "percentage": 38.54, "elapsed_time": "0:02:25", "remaining_time": "0:03:51"}
{"current_steps": 38, "total_steps": 96, "loss": 0.2394, "lr": 7.604701702439652e-06, "epoch": 1.1875, "percentage": 39.58, "elapsed_time": "0:02:27", "remaining_time": "0:03:45"}
{"current_steps": 39, "total_steps": 96, "loss": 0.2535, "lr": 7.447089239055428e-06, "epoch": 1.21875, "percentage": 40.62, "elapsed_time": "0:02:30", "remaining_time": "0:03:39"}
{"current_steps": 40, "total_steps": 96, "loss": 0.2704, "lr": 7.286211616523193e-06, "epoch": 1.25, "percentage": 41.67, "elapsed_time": "0:02:32", "remaining_time": "0:03:34"}
{"current_steps": 41, "total_steps": 96, "loss": 0.2416, "lr": 7.122283494379076e-06, "epoch": 1.28125, "percentage": 42.71, "elapsed_time": "0:02:35", "remaining_time": "0:03:28"}
{"current_steps": 42, "total_steps": 96, "loss": 0.2793, "lr": 6.95552360245078e-06, "epoch": 1.3125, "percentage": 43.75, "elapsed_time": "0:02:38", "remaining_time": "0:03:23"}
{"current_steps": 43, "total_steps": 96, "loss": 0.2504, "lr": 6.786154449005664e-06, "epoch": 1.34375, "percentage": 44.79, "elapsed_time": "0:02:41", "remaining_time": "0:03:18"}
{"current_steps": 44, "total_steps": 96, "loss": 0.2549, "lr": 6.614402023857231e-06, "epoch": 1.375, "percentage": 45.83, "elapsed_time": "0:02:43", "remaining_time": "0:03:13"}
{"current_steps": 45, "total_steps": 96, "loss": 0.2179, "lr": 6.440495496826189e-06, "epoch": 1.40625, "percentage": 46.88, "elapsed_time": "0:02:46", "remaining_time": "0:03:08"}
{"current_steps": 46, "total_steps": 96, "loss": 0.1993, "lr": 6.264666911958404e-06, "epoch": 1.4375, "percentage": 47.92, "elapsed_time": "0:02:48", "remaining_time": "0:03:03"}
{"current_steps": 47, "total_steps": 96, "loss": 0.2553, "lr": 6.087150877907786e-06, "epoch": 1.46875, "percentage": 48.96, "elapsed_time": "0:02:51", "remaining_time": "0:02:58"}
{"current_steps": 48, "total_steps": 96, "loss": 0.2949, "lr": 5.908184254897183e-06, "epoch": 1.5, "percentage": 50.0, "elapsed_time": "0:02:53", "remaining_time": "0:02:53"}
{"current_steps": 49, "total_steps": 96, "loss": 0.2204, "lr": 5.728005838675026e-06, "epoch": 1.53125, "percentage": 51.04, "elapsed_time": "0:02:56", "remaining_time": "0:02:48"}
{"current_steps": 50, "total_steps": 96, "loss": 0.2082, "lr": 5.546856041889374e-06, "epoch": 1.5625, "percentage": 52.08, "elapsed_time": "0:02:58", "remaining_time": "0:02:44"}
{"current_steps": 51, "total_steps": 96, "loss": 0.2052, "lr": 5.364976573304538e-06, "epoch": 1.59375, "percentage": 53.12, "elapsed_time": "0:03:01", "remaining_time": "0:02:39"}
{"current_steps": 52, "total_steps": 96, "loss": 0.2308, "lr": 5.182610115288296e-06, "epoch": 1.625, "percentage": 54.17, "elapsed_time": "0:03:03", "remaining_time": "0:02:35"}
{"current_steps": 53, "total_steps": 96, "loss": 0.2448, "lr": 5e-06, "epoch": 1.65625, "percentage": 55.21, "elapsed_time": "0:03:06", "remaining_time": "0:02:30"}
{"current_steps": 54, "total_steps": 96, "loss": 0.2229, "lr": 4.817389884711706e-06, "epoch": 1.6875, "percentage": 56.25, "elapsed_time": "0:03:08", "remaining_time": "0:02:26"}
{"current_steps": 55, "total_steps": 96, "loss": 0.216, "lr": 4.635023426695462e-06, "epoch": 1.71875, "percentage": 57.29, "elapsed_time": "0:03:10", "remaining_time": "0:02:22"}
{"current_steps": 56, "total_steps": 96, "loss": 0.1926, "lr": 4.4531439581106295e-06, "epoch": 1.75, "percentage": 58.33, "elapsed_time": "0:03:13", "remaining_time": "0:02:18"}
{"current_steps": 57, "total_steps": 96, "loss": 0.2045, "lr": 4.271994161324977e-06, "epoch": 1.78125, "percentage": 59.38, "elapsed_time": "0:03:15", "remaining_time": "0:02:14"}
{"current_steps": 58, "total_steps": 96, "loss": 0.2125, "lr": 4.091815745102818e-06, "epoch": 1.8125, "percentage": 60.42, "elapsed_time": "0:03:18", "remaining_time": "0:02:09"}
{"current_steps": 59, "total_steps": 96, "loss": 0.2282, "lr": 3.912849122092216e-06, "epoch": 1.84375, "percentage": 61.46, "elapsed_time": "0:03:20", "remaining_time": "0:02:05"}
{"current_steps": 60, "total_steps": 96, "loss": 0.2317, "lr": 3.7353330880415963e-06, "epoch": 1.875, "percentage": 62.5, "elapsed_time": "0:03:23", "remaining_time": "0:02:02"}
{"current_steps": 61, "total_steps": 96, "loss": 0.2459, "lr": 3.5595045031738123e-06, "epoch": 1.90625, "percentage": 63.54, "elapsed_time": "0:03:25", "remaining_time": "0:01:58"}
{"current_steps": 62, "total_steps": 96, "loss": 0.2836, "lr": 3.3855979761427705e-06, "epoch": 1.9375, "percentage": 64.58, "elapsed_time": "0:03:28", "remaining_time": "0:01:54"}
{"current_steps": 63, "total_steps": 96, "loss": 0.2508, "lr": 3.2138455509943365e-06, "epoch": 1.96875, "percentage": 65.62, "elapsed_time": "0:03:30", "remaining_time": "0:01:50"}
{"current_steps": 64, "total_steps": 96, "loss": 0.1681, "lr": 3.044476397549221e-06, "epoch": 2.0, "percentage": 66.67, "elapsed_time": "0:03:33", "remaining_time": "0:01:46"}
{"current_steps": 65, "total_steps": 96, "loss": 0.1305, "lr": 2.8777165056209256e-06, "epoch": 2.03125, "percentage": 67.71, "elapsed_time": "0:04:09", "remaining_time": "0:01:58"}
{"current_steps": 66, "total_steps": 96, "loss": 0.1462, "lr": 2.7137883834768076e-06, "epoch": 2.0625, "percentage": 68.75, "elapsed_time": "0:04:11", "remaining_time": "0:01:54"}
{"current_steps": 67, "total_steps": 96, "loss": 0.1384, "lr": 2.5529107609445737e-06, "epoch": 2.09375, "percentage": 69.79, "elapsed_time": "0:04:15", "remaining_time": "0:01:50"}
{"current_steps": 68, "total_steps": 96, "loss": 0.1262, "lr": 2.3952982975603494e-06, "epoch": 2.125, "percentage": 70.83, "elapsed_time": "0:04:17", "remaining_time": "0:01:46"}
{"current_steps": 69, "total_steps": 96, "loss": 0.1297, "lr": 2.2411612961477704e-06, "epoch": 2.15625, "percentage": 71.88, "elapsed_time": "0:04:20", "remaining_time": "0:01:41"}
{"current_steps": 70, "total_steps": 96, "loss": 0.127, "lr": 2.0907054222102367e-06, "epoch": 2.1875, "percentage": 72.92, "elapsed_time": "0:04:22", "remaining_time": "0:01:37"}
{"current_steps": 71, "total_steps": 96, "loss": 0.131, "lr": 1.944131429510754e-06, "epoch": 2.21875, "percentage": 73.96, "elapsed_time": "0:04:25", "remaining_time": "0:01:33"}
{"current_steps": 72, "total_steps": 96, "loss": 0.1119, "lr": 1.8016348922055448e-06, "epoch": 2.25, "percentage": 75.0, "elapsed_time": "0:04:27", "remaining_time": "0:01:29"}
{"current_steps": 73, "total_steps": 96, "loss": 0.1131, "lr": 1.6634059438888034e-06, "epoch": 2.28125, "percentage": 76.04, "elapsed_time": "0:04:30", "remaining_time": "0:01:25"}
{"current_steps": 74, "total_steps": 96, "loss": 0.1147, "lr": 1.5296290238968303e-06, "epoch": 2.3125, "percentage": 77.08, "elapsed_time": "0:04:33", "remaining_time": "0:01:21"}
{"current_steps": 75, "total_steps": 96, "loss": 0.1219, "lr": 1.4004826312100218e-06, "epoch": 2.34375, "percentage": 78.12, "elapsed_time": "0:04:35", "remaining_time": "0:01:17"}
{"current_steps": 76, "total_steps": 96, "loss": 0.1178, "lr": 1.2761390862810907e-06, "epoch": 2.375, "percentage": 79.17, "elapsed_time": "0:04:37", "remaining_time": "0:01:13"}
{"current_steps": 77, "total_steps": 96, "loss": 0.1102, "lr": 1.1567643011073393e-06, "epoch": 2.40625, "percentage": 80.21, "elapsed_time": "0:04:40", "remaining_time": "0:01:09"}
{"current_steps": 78, "total_steps": 96, "loss": 0.1077, "lr": 1.04251755785373e-06, "epoch": 2.4375, "percentage": 81.25, "elapsed_time": "0:04:42", "remaining_time": "0:01:05"}
{"current_steps": 79, "total_steps": 96, "loss": 0.1045, "lr": 9.335512963221732e-07, "epoch": 2.46875, "percentage": 82.29, "elapsed_time": "0:04:45", "remaining_time": "0:01:01"}
{"current_steps": 80, "total_steps": 96, "loss": 0.1298, "lr": 8.30010910550611e-07, "epoch": 2.5, "percentage": 83.33, "elapsed_time": "0:04:47", "remaining_time": "0:00:57"}
{"current_steps": 81, "total_steps": 96, "loss": 0.1336, "lr": 7.320345548132679e-07, "epoch": 2.53125, "percentage": 84.38, "elapsed_time": "0:04:50", "remaining_time": "0:00:53"}
{"current_steps": 82, "total_steps": 96, "loss": 0.1017, "lr": 6.397529592809615e-07, "epoch": 2.5625, "percentage": 85.42, "elapsed_time": "0:04:52", "remaining_time": "0:00:49"}
{"current_steps": 83, "total_steps": 96, "loss": 0.1058, "lr": 5.532892555874059e-07, "epoch": 2.59375, "percentage": 86.46, "elapsed_time": "0:04:54", "remaining_time": "0:00:46"}
{"current_steps": 84, "total_steps": 96, "loss": 0.1174, "lr": 4.727588125342669e-07, "epoch": 2.625, "percentage": 87.5, "elapsed_time": "0:04:57", "remaining_time": "0:00:42"}
{"current_steps": 85, "total_steps": 96, "loss": 0.1076, "lr": 3.9826908215420344e-07, "epoch": 2.65625, "percentage": 88.54, "elapsed_time": "0:04:59", "remaining_time": "0:00:38"}
{"current_steps": 86, "total_steps": 96, "loss": 0.1411, "lr": 3.299194563372604e-07, "epoch": 2.6875, "percentage": 89.58, "elapsed_time": "0:05:02", "remaining_time": "0:00:35"}
{"current_steps": 87, "total_steps": 96, "loss": 0.1101, "lr": 2.67801134211953e-07, "epoch": 2.71875, "percentage": 90.62, "elapsed_time": "0:05:04", "remaining_time": "0:00:31"}
{"current_steps": 88, "total_steps": 96, "loss": 0.1397, "lr": 2.1199700045797077e-07, "epoch": 2.75, "percentage": 91.67, "elapsed_time": "0:05:06", "remaining_time": "0:00:27"}
{"current_steps": 89, "total_steps": 96, "loss": 0.1053, "lr": 1.6258151471287397e-07, "epoch": 2.78125, "percentage": 92.71, "elapsed_time": "0:05:09", "remaining_time": "0:00:24"}
{"current_steps": 90, "total_steps": 96, "loss": 0.1197, "lr": 1.196206122203647e-07, "epoch": 2.8125, "percentage": 93.75, "elapsed_time": "0:05:11", "remaining_time": "0:00:20"}
{"current_steps": 91, "total_steps": 96, "loss": 0.1004, "lr": 8.317161585266964e-08, "epoch": 2.84375, "percentage": 94.79, "elapsed_time": "0:05:14", "remaining_time": "0:00:17"}
{"current_steps": 92, "total_steps": 96, "loss": 0.1161, "lr": 5.3283159624448745e-08, "epoch": 2.875, "percentage": 95.83, "elapsed_time": "0:05:16", "remaining_time": "0:00:13"}
{"current_steps": 93, "total_steps": 96, "loss": 0.1109, "lr": 2.9995123800270476e-08, "epoch": 2.90625, "percentage": 96.88, "elapsed_time": "0:05:19", "remaining_time": "0:00:10"}
{"current_steps": 94, "total_steps": 96, "loss": 0.109, "lr": 1.333858168224178e-08, "epoch": 2.9375, "percentage": 97.92, "elapsed_time": "0:05:21", "remaining_time": "0:00:06"}
{"current_steps": 95, "total_steps": 96, "loss": 0.1002, "lr": 3.3357581488030476e-09, "epoch": 2.96875, "percentage": 98.96, "elapsed_time": "0:05:23", "remaining_time": "0:00:03"}
{"current_steps": 96, "total_steps": 96, "loss": 0.0853, "lr": 0.0, "epoch": 3.0, "percentage": 100.0, "elapsed_time": "0:05:26", "remaining_time": "0:00:00"}
{"current_steps": 96, "total_steps": 96, "epoch": 3.0, "percentage": 100.0, "elapsed_time": "0:07:00", "remaining_time": "0:00:00"}