| { | |
| "best_metric": 85.6858, | |
| "best_model_checkpoint": "AraT5_FT_MSA_Transaltion/checkpoint-74500", | |
| "epoch": 60.0, | |
| "eval_steps": 500, | |
| "global_step": 75000, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.4, | |
| "learning_rate": 4.9836065573770496e-05, | |
| "loss": 3.9102, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 0.4, | |
| "eval_bleu": 18.6972, | |
| "eval_gen_len": 9.4035, | |
| "eval_loss": 1.9062472581863403, | |
| "eval_runtime": 106.2615, | |
| "eval_samples_per_second": 94.107, | |
| "eval_steps_per_second": 1.477, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 0.8, | |
| "learning_rate": 4.967213114754098e-05, | |
| "loss": 2.3273, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 0.8, | |
| "eval_bleu": 22.5788, | |
| "eval_gen_len": 9.3259, | |
| "eval_loss": 1.6005295515060425, | |
| "eval_runtime": 107.2854, | |
| "eval_samples_per_second": 93.209, | |
| "eval_steps_per_second": 1.463, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 1.2, | |
| "learning_rate": 4.9508196721311476e-05, | |
| "loss": 1.996, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 1.2, | |
| "eval_bleu": 25.6973, | |
| "eval_gen_len": 9.4844, | |
| "eval_loss": 1.413317084312439, | |
| "eval_runtime": 107.9077, | |
| "eval_samples_per_second": 92.672, | |
| "eval_steps_per_second": 1.455, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 1.6, | |
| "learning_rate": 4.934426229508197e-05, | |
| "loss": 1.7747, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 1.6, | |
| "eval_bleu": 29.1098, | |
| "eval_gen_len": 9.4334, | |
| "eval_loss": 1.2736828327178955, | |
| "eval_runtime": 108.0256, | |
| "eval_samples_per_second": 92.571, | |
| "eval_steps_per_second": 1.453, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 2.0, | |
| "learning_rate": 4.918032786885246e-05, | |
| "loss": 1.6363, | |
| "step": 2500 | |
| }, | |
| { | |
| "epoch": 2.0, | |
| "eval_bleu": 32.4975, | |
| "eval_gen_len": 9.5307, | |
| "eval_loss": 1.154405951499939, | |
| "eval_runtime": 108.0346, | |
| "eval_samples_per_second": 92.563, | |
| "eval_steps_per_second": 1.453, | |
| "step": 2500 | |
| }, | |
| { | |
| "epoch": 2.4, | |
| "learning_rate": 4.9016393442622957e-05, | |
| "loss": 1.4614, | |
| "step": 3000 | |
| }, | |
| { | |
| "epoch": 2.4, | |
| "eval_bleu": 35.6534, | |
| "eval_gen_len": 9.5125, | |
| "eval_loss": 1.0677547454833984, | |
| "eval_runtime": 106.6888, | |
| "eval_samples_per_second": 93.731, | |
| "eval_steps_per_second": 1.472, | |
| "step": 3000 | |
| }, | |
| { | |
| "epoch": 2.8, | |
| "learning_rate": 4.885245901639344e-05, | |
| "loss": 1.3627, | |
| "step": 3500 | |
| }, | |
| { | |
| "epoch": 2.8, | |
| "eval_bleu": 39.0667, | |
| "eval_gen_len": 9.5759, | |
| "eval_loss": 0.9860268235206604, | |
| "eval_runtime": 108.9156, | |
| "eval_samples_per_second": 91.814, | |
| "eval_steps_per_second": 1.441, | |
| "step": 3500 | |
| }, | |
| { | |
| "epoch": 3.2, | |
| "learning_rate": 4.868852459016394e-05, | |
| "loss": 1.2627, | |
| "step": 4000 | |
| }, | |
| { | |
| "epoch": 3.2, | |
| "eval_bleu": 42.4036, | |
| "eval_gen_len": 9.6225, | |
| "eval_loss": 0.9212129712104797, | |
| "eval_runtime": 108.2436, | |
| "eval_samples_per_second": 92.384, | |
| "eval_steps_per_second": 1.45, | |
| "step": 4000 | |
| }, | |
| { | |
| "epoch": 3.6, | |
| "learning_rate": 4.852459016393443e-05, | |
| "loss": 1.1616, | |
| "step": 4500 | |
| }, | |
| { | |
| "epoch": 3.6, | |
| "eval_bleu": 44.7376, | |
| "eval_gen_len": 9.6448, | |
| "eval_loss": 0.8675327897071838, | |
| "eval_runtime": 109.0598, | |
| "eval_samples_per_second": 91.693, | |
| "eval_steps_per_second": 1.44, | |
| "step": 4500 | |
| }, | |
| { | |
| "epoch": 4.0, | |
| "learning_rate": 4.836065573770492e-05, | |
| "loss": 1.1226, | |
| "step": 5000 | |
| }, | |
| { | |
| "epoch": 4.0, | |
| "eval_bleu": 47.2213, | |
| "eval_gen_len": 9.6337, | |
| "eval_loss": 0.816310703754425, | |
| "eval_runtime": 106.4424, | |
| "eval_samples_per_second": 93.947, | |
| "eval_steps_per_second": 1.475, | |
| "step": 5000 | |
| }, | |
| { | |
| "epoch": 4.4, | |
| "learning_rate": 4.819672131147541e-05, | |
| "loss": 1.006, | |
| "step": 5500 | |
| }, | |
| { | |
| "epoch": 4.4, | |
| "eval_bleu": 49.5563, | |
| "eval_gen_len": 9.7168, | |
| "eval_loss": 0.7709316611289978, | |
| "eval_runtime": 112.4236, | |
| "eval_samples_per_second": 88.949, | |
| "eval_steps_per_second": 1.397, | |
| "step": 5500 | |
| }, | |
| { | |
| "epoch": 4.8, | |
| "learning_rate": 4.8032786885245904e-05, | |
| "loss": 0.978, | |
| "step": 6000 | |
| }, | |
| { | |
| "epoch": 4.8, | |
| "eval_bleu": 50.775, | |
| "eval_gen_len": 9.6925, | |
| "eval_loss": 0.7373432517051697, | |
| "eval_runtime": 109.2099, | |
| "eval_samples_per_second": 91.567, | |
| "eval_steps_per_second": 1.438, | |
| "step": 6000 | |
| }, | |
| { | |
| "epoch": 5.2, | |
| "learning_rate": 4.78688524590164e-05, | |
| "loss": 0.9099, | |
| "step": 6500 | |
| }, | |
| { | |
| "epoch": 5.2, | |
| "eval_bleu": 52.697, | |
| "eval_gen_len": 9.7017, | |
| "eval_loss": 0.7020449042320251, | |
| "eval_runtime": 109.0697, | |
| "eval_samples_per_second": 91.684, | |
| "eval_steps_per_second": 1.439, | |
| "step": 6500 | |
| }, | |
| { | |
| "epoch": 5.6, | |
| "learning_rate": 4.770491803278689e-05, | |
| "loss": 0.8483, | |
| "step": 7000 | |
| }, | |
| { | |
| "epoch": 5.6, | |
| "eval_bleu": 53.9571, | |
| "eval_gen_len": 9.693, | |
| "eval_loss": 0.6663933992385864, | |
| "eval_runtime": 109.7349, | |
| "eval_samples_per_second": 91.129, | |
| "eval_steps_per_second": 1.431, | |
| "step": 7000 | |
| }, | |
| { | |
| "epoch": 6.0, | |
| "learning_rate": 4.754098360655738e-05, | |
| "loss": 0.8293, | |
| "step": 7500 | |
| }, | |
| { | |
| "epoch": 6.0, | |
| "eval_bleu": 55.874, | |
| "eval_gen_len": 9.7475, | |
| "eval_loss": 0.630104124546051, | |
| "eval_runtime": 109.6322, | |
| "eval_samples_per_second": 91.214, | |
| "eval_steps_per_second": 1.432, | |
| "step": 7500 | |
| }, | |
| { | |
| "epoch": 6.4, | |
| "learning_rate": 4.737704918032787e-05, | |
| "loss": 0.7493, | |
| "step": 8000 | |
| }, | |
| { | |
| "epoch": 6.4, | |
| "eval_bleu": 56.7427, | |
| "eval_gen_len": 9.7239, | |
| "eval_loss": 0.6072443723678589, | |
| "eval_runtime": 113.2863, | |
| "eval_samples_per_second": 88.272, | |
| "eval_steps_per_second": 1.386, | |
| "step": 8000 | |
| }, | |
| { | |
| "epoch": 6.8, | |
| "learning_rate": 4.7213114754098365e-05, | |
| "loss": 0.7294, | |
| "step": 8500 | |
| }, | |
| { | |
| "epoch": 6.8, | |
| "eval_bleu": 57.9936, | |
| "eval_gen_len": 9.7521, | |
| "eval_loss": 0.5758106112480164, | |
| "eval_runtime": 110.1034, | |
| "eval_samples_per_second": 90.824, | |
| "eval_steps_per_second": 1.426, | |
| "step": 8500 | |
| }, | |
| { | |
| "epoch": 7.2, | |
| "learning_rate": 4.704918032786885e-05, | |
| "loss": 0.6904, | |
| "step": 9000 | |
| }, | |
| { | |
| "epoch": 7.2, | |
| "eval_bleu": 59.0065, | |
| "eval_gen_len": 9.7544, | |
| "eval_loss": 0.5612244606018066, | |
| "eval_runtime": 117.8275, | |
| "eval_samples_per_second": 84.87, | |
| "eval_steps_per_second": 1.332, | |
| "step": 9000 | |
| }, | |
| { | |
| "epoch": 7.6, | |
| "learning_rate": 4.6885245901639345e-05, | |
| "loss": 0.6478, | |
| "step": 9500 | |
| }, | |
| { | |
| "epoch": 7.6, | |
| "eval_bleu": 60.1129, | |
| "eval_gen_len": 9.7827, | |
| "eval_loss": 0.525496780872345, | |
| "eval_runtime": 119.5262, | |
| "eval_samples_per_second": 83.664, | |
| "eval_steps_per_second": 1.314, | |
| "step": 9500 | |
| }, | |
| { | |
| "epoch": 8.0, | |
| "learning_rate": 4.672131147540984e-05, | |
| "loss": 0.6257, | |
| "step": 10000 | |
| }, | |
| { | |
| "epoch": 8.0, | |
| "eval_bleu": 61.0568, | |
| "eval_gen_len": 9.7663, | |
| "eval_loss": 0.5063189268112183, | |
| "eval_runtime": 118.6711, | |
| "eval_samples_per_second": 84.267, | |
| "eval_steps_per_second": 1.323, | |
| "step": 10000 | |
| }, | |
| { | |
| "epoch": 8.4, | |
| "learning_rate": 4.655737704918033e-05, | |
| "loss": 0.5696, | |
| "step": 10500 | |
| }, | |
| { | |
| "epoch": 8.4, | |
| "eval_bleu": 61.9169, | |
| "eval_gen_len": 9.776, | |
| "eval_loss": 0.4885226786136627, | |
| "eval_runtime": 121.031, | |
| "eval_samples_per_second": 82.623, | |
| "eval_steps_per_second": 1.297, | |
| "step": 10500 | |
| }, | |
| { | |
| "epoch": 8.8, | |
| "learning_rate": 4.6393442622950825e-05, | |
| "loss": 0.5636, | |
| "step": 11000 | |
| }, | |
| { | |
| "epoch": 8.8, | |
| "eval_bleu": 62.5974, | |
| "eval_gen_len": 9.7975, | |
| "eval_loss": 0.471066951751709, | |
| "eval_runtime": 119.0897, | |
| "eval_samples_per_second": 83.97, | |
| "eval_steps_per_second": 1.318, | |
| "step": 11000 | |
| }, | |
| { | |
| "epoch": 9.2, | |
| "learning_rate": 4.622950819672132e-05, | |
| "loss": 0.5258, | |
| "step": 11500 | |
| }, | |
| { | |
| "epoch": 9.2, | |
| "eval_bleu": 63.7967, | |
| "eval_gen_len": 9.8122, | |
| "eval_loss": 0.449593722820282, | |
| "eval_runtime": 120.8927, | |
| "eval_samples_per_second": 82.718, | |
| "eval_steps_per_second": 1.299, | |
| "step": 11500 | |
| }, | |
| { | |
| "epoch": 9.6, | |
| "learning_rate": 4.6065573770491805e-05, | |
| "loss": 0.4979, | |
| "step": 12000 | |
| }, | |
| { | |
| "epoch": 9.6, | |
| "eval_bleu": 64.6212, | |
| "eval_gen_len": 9.7674, | |
| "eval_loss": 0.43481728434562683, | |
| "eval_runtime": 119.3174, | |
| "eval_samples_per_second": 83.81, | |
| "eval_steps_per_second": 1.316, | |
| "step": 12000 | |
| }, | |
| { | |
| "epoch": 10.0, | |
| "learning_rate": 4.59016393442623e-05, | |
| "loss": 0.4987, | |
| "step": 12500 | |
| }, | |
| { | |
| "epoch": 10.0, | |
| "eval_bleu": 65.3736, | |
| "eval_gen_len": 9.8562, | |
| "eval_loss": 0.4133751392364502, | |
| "eval_runtime": 122.0111, | |
| "eval_samples_per_second": 81.96, | |
| "eval_steps_per_second": 1.287, | |
| "step": 12500 | |
| }, | |
| { | |
| "epoch": 10.4, | |
| "learning_rate": 4.5737704918032786e-05, | |
| "loss": 0.4497, | |
| "step": 13000 | |
| }, | |
| { | |
| "epoch": 10.4, | |
| "eval_bleu": 66.4415, | |
| "eval_gen_len": 9.8254, | |
| "eval_loss": 0.39948710799217224, | |
| "eval_runtime": 121.3113, | |
| "eval_samples_per_second": 82.433, | |
| "eval_steps_per_second": 1.294, | |
| "step": 13000 | |
| }, | |
| { | |
| "epoch": 10.8, | |
| "learning_rate": 4.557377049180328e-05, | |
| "loss": 0.4382, | |
| "step": 13500 | |
| }, | |
| { | |
| "epoch": 10.8, | |
| "eval_bleu": 66.8785, | |
| "eval_gen_len": 9.8152, | |
| "eval_loss": 0.3892167806625366, | |
| "eval_runtime": 120.977, | |
| "eval_samples_per_second": 82.66, | |
| "eval_steps_per_second": 1.298, | |
| "step": 13500 | |
| }, | |
| { | |
| "epoch": 11.2, | |
| "learning_rate": 4.540983606557377e-05, | |
| "loss": 0.4146, | |
| "step": 14000 | |
| }, | |
| { | |
| "epoch": 11.2, | |
| "eval_bleu": 67.6836, | |
| "eval_gen_len": 9.8031, | |
| "eval_loss": 0.374174565076828, | |
| "eval_runtime": 123.4198, | |
| "eval_samples_per_second": 81.024, | |
| "eval_steps_per_second": 1.272, | |
| "step": 14000 | |
| }, | |
| { | |
| "epoch": 11.6, | |
| "learning_rate": 4.524590163934426e-05, | |
| "loss": 0.3895, | |
| "step": 14500 | |
| }, | |
| { | |
| "epoch": 11.6, | |
| "eval_bleu": 68.4895, | |
| "eval_gen_len": 9.8325, | |
| "eval_loss": 0.3638547658920288, | |
| "eval_runtime": 123.9996, | |
| "eval_samples_per_second": 80.645, | |
| "eval_steps_per_second": 1.266, | |
| "step": 14500 | |
| }, | |
| { | |
| "epoch": 12.0, | |
| "learning_rate": 4.508196721311476e-05, | |
| "loss": 0.3881, | |
| "step": 15000 | |
| }, | |
| { | |
| "epoch": 12.0, | |
| "eval_bleu": 68.9665, | |
| "eval_gen_len": 9.8444, | |
| "eval_loss": 0.3532446026802063, | |
| "eval_runtime": 123.044, | |
| "eval_samples_per_second": 81.272, | |
| "eval_steps_per_second": 1.276, | |
| "step": 15000 | |
| }, | |
| { | |
| "epoch": 12.4, | |
| "learning_rate": 4.491803278688525e-05, | |
| "loss": 0.3495, | |
| "step": 15500 | |
| }, | |
| { | |
| "epoch": 12.4, | |
| "eval_bleu": 69.8231, | |
| "eval_gen_len": 9.8346, | |
| "eval_loss": 0.34260880947113037, | |
| "eval_runtime": 122.4901, | |
| "eval_samples_per_second": 81.639, | |
| "eval_steps_per_second": 1.282, | |
| "step": 15500 | |
| }, | |
| { | |
| "epoch": 12.8, | |
| "learning_rate": 4.475409836065574e-05, | |
| "loss": 0.3474, | |
| "step": 16000 | |
| }, | |
| { | |
| "epoch": 12.8, | |
| "eval_bleu": 70.4124, | |
| "eval_gen_len": 9.8408, | |
| "eval_loss": 0.3283344805240631, | |
| "eval_runtime": 122.1563, | |
| "eval_samples_per_second": 81.862, | |
| "eval_steps_per_second": 1.285, | |
| "step": 16000 | |
| }, | |
| { | |
| "epoch": 13.2, | |
| "learning_rate": 4.459016393442623e-05, | |
| "loss": 0.3264, | |
| "step": 16500 | |
| }, | |
| { | |
| "epoch": 13.2, | |
| "eval_bleu": 70.991, | |
| "eval_gen_len": 9.8374, | |
| "eval_loss": 0.3219762444496155, | |
| "eval_runtime": 122.3026, | |
| "eval_samples_per_second": 81.764, | |
| "eval_steps_per_second": 1.284, | |
| "step": 16500 | |
| }, | |
| { | |
| "epoch": 13.6, | |
| "learning_rate": 4.442622950819673e-05, | |
| "loss": 0.3095, | |
| "step": 17000 | |
| }, | |
| { | |
| "epoch": 13.6, | |
| "eval_bleu": 71.7934, | |
| "eval_gen_len": 9.8704, | |
| "eval_loss": 0.3138624131679535, | |
| "eval_runtime": 124.2274, | |
| "eval_samples_per_second": 80.498, | |
| "eval_steps_per_second": 1.264, | |
| "step": 17000 | |
| }, | |
| { | |
| "epoch": 14.0, | |
| "learning_rate": 4.426229508196721e-05, | |
| "loss": 0.3138, | |
| "step": 17500 | |
| }, | |
| { | |
| "epoch": 14.0, | |
| "eval_bleu": 72.3896, | |
| "eval_gen_len": 9.8585, | |
| "eval_loss": 0.3009161949157715, | |
| "eval_runtime": 122.1372, | |
| "eval_samples_per_second": 81.875, | |
| "eval_steps_per_second": 1.285, | |
| "step": 17500 | |
| }, | |
| { | |
| "epoch": 14.4, | |
| "learning_rate": 4.409836065573771e-05, | |
| "loss": 0.2828, | |
| "step": 18000 | |
| }, | |
| { | |
| "epoch": 14.4, | |
| "eval_bleu": 72.6457, | |
| "eval_gen_len": 9.8585, | |
| "eval_loss": 0.301722913980484, | |
| "eval_runtime": 123.5238, | |
| "eval_samples_per_second": 80.956, | |
| "eval_steps_per_second": 1.271, | |
| "step": 18000 | |
| }, | |
| { | |
| "epoch": 14.8, | |
| "learning_rate": 4.3934426229508194e-05, | |
| "loss": 0.2776, | |
| "step": 18500 | |
| }, | |
| { | |
| "epoch": 14.8, | |
| "eval_bleu": 73.1631, | |
| "eval_gen_len": 9.8606, | |
| "eval_loss": 0.2890518307685852, | |
| "eval_runtime": 123.7854, | |
| "eval_samples_per_second": 80.785, | |
| "eval_steps_per_second": 1.268, | |
| "step": 18500 | |
| }, | |
| { | |
| "epoch": 15.2, | |
| "learning_rate": 4.377049180327869e-05, | |
| "loss": 0.2653, | |
| "step": 19000 | |
| }, | |
| { | |
| "epoch": 15.2, | |
| "eval_bleu": 73.6086, | |
| "eval_gen_len": 9.8775, | |
| "eval_loss": 0.2824092507362366, | |
| "eval_runtime": 122.3472, | |
| "eval_samples_per_second": 81.735, | |
| "eval_steps_per_second": 1.283, | |
| "step": 19000 | |
| }, | |
| { | |
| "epoch": 15.6, | |
| "learning_rate": 4.360655737704919e-05, | |
| "loss": 0.2561, | |
| "step": 19500 | |
| }, | |
| { | |
| "epoch": 15.6, | |
| "eval_bleu": 74.2558, | |
| "eval_gen_len": 9.8651, | |
| "eval_loss": 0.27599573135375977, | |
| "eval_runtime": 123.4989, | |
| "eval_samples_per_second": 80.972, | |
| "eval_steps_per_second": 1.271, | |
| "step": 19500 | |
| }, | |
| { | |
| "epoch": 16.0, | |
| "learning_rate": 4.3442622950819674e-05, | |
| "loss": 0.2534, | |
| "step": 20000 | |
| }, | |
| { | |
| "epoch": 16.0, | |
| "eval_bleu": 74.6646, | |
| "eval_gen_len": 9.8609, | |
| "eval_loss": 0.2678174674510956, | |
| "eval_runtime": 121.7684, | |
| "eval_samples_per_second": 82.123, | |
| "eval_steps_per_second": 1.289, | |
| "step": 20000 | |
| }, | |
| { | |
| "epoch": 16.4, | |
| "learning_rate": 4.327868852459017e-05, | |
| "loss": 0.229, | |
| "step": 20500 | |
| }, | |
| { | |
| "epoch": 16.4, | |
| "eval_bleu": 75.1771, | |
| "eval_gen_len": 9.8587, | |
| "eval_loss": 0.26594653725624084, | |
| "eval_runtime": 122.704, | |
| "eval_samples_per_second": 81.497, | |
| "eval_steps_per_second": 1.28, | |
| "step": 20500 | |
| }, | |
| { | |
| "epoch": 16.8, | |
| "learning_rate": 4.311475409836066e-05, | |
| "loss": 0.23, | |
| "step": 21000 | |
| }, | |
| { | |
| "epoch": 16.8, | |
| "eval_bleu": 75.2663, | |
| "eval_gen_len": 9.8656, | |
| "eval_loss": 0.25894829630851746, | |
| "eval_runtime": 123.4498, | |
| "eval_samples_per_second": 81.005, | |
| "eval_steps_per_second": 1.272, | |
| "step": 21000 | |
| }, | |
| { | |
| "epoch": 17.2, | |
| "learning_rate": 4.295081967213115e-05, | |
| "loss": 0.2177, | |
| "step": 21500 | |
| }, | |
| { | |
| "epoch": 17.2, | |
| "eval_bleu": 75.7616, | |
| "eval_gen_len": 9.8622, | |
| "eval_loss": 0.260859876871109, | |
| "eval_runtime": 124.3613, | |
| "eval_samples_per_second": 80.411, | |
| "eval_steps_per_second": 1.262, | |
| "step": 21500 | |
| }, | |
| { | |
| "epoch": 17.6, | |
| "learning_rate": 4.278688524590164e-05, | |
| "loss": 0.2069, | |
| "step": 22000 | |
| }, | |
| { | |
| "epoch": 17.6, | |
| "eval_bleu": 76.485, | |
| "eval_gen_len": 9.8688, | |
| "eval_loss": 0.25088420510292053, | |
| "eval_runtime": 124.1311, | |
| "eval_samples_per_second": 80.56, | |
| "eval_steps_per_second": 1.265, | |
| "step": 22000 | |
| }, | |
| { | |
| "epoch": 18.0, | |
| "learning_rate": 4.262295081967213e-05, | |
| "loss": 0.2092, | |
| "step": 22500 | |
| }, | |
| { | |
| "epoch": 18.0, | |
| "eval_bleu": 76.8358, | |
| "eval_gen_len": 9.8662, | |
| "eval_loss": 0.24580596387386322, | |
| "eval_runtime": 123.4291, | |
| "eval_samples_per_second": 81.018, | |
| "eval_steps_per_second": 1.272, | |
| "step": 22500 | |
| }, | |
| { | |
| "epoch": 18.4, | |
| "learning_rate": 4.245901639344262e-05, | |
| "loss": 0.1882, | |
| "step": 23000 | |
| }, | |
| { | |
| "epoch": 18.4, | |
| "eval_bleu": 77.0551, | |
| "eval_gen_len": 9.885, | |
| "eval_loss": 0.24451805651187897, | |
| "eval_runtime": 124.2811, | |
| "eval_samples_per_second": 80.463, | |
| "eval_steps_per_second": 1.263, | |
| "step": 23000 | |
| }, | |
| { | |
| "epoch": 18.8, | |
| "learning_rate": 4.229508196721312e-05, | |
| "loss": 0.1896, | |
| "step": 23500 | |
| }, | |
| { | |
| "epoch": 18.8, | |
| "eval_bleu": 77.6142, | |
| "eval_gen_len": 9.8917, | |
| "eval_loss": 0.23918285965919495, | |
| "eval_runtime": 123.8288, | |
| "eval_samples_per_second": 80.757, | |
| "eval_steps_per_second": 1.268, | |
| "step": 23500 | |
| }, | |
| { | |
| "epoch": 19.2, | |
| "learning_rate": 4.213114754098361e-05, | |
| "loss": 0.1789, | |
| "step": 24000 | |
| }, | |
| { | |
| "epoch": 19.2, | |
| "eval_bleu": 77.6144, | |
| "eval_gen_len": 9.8919, | |
| "eval_loss": 0.2408699244260788, | |
| "eval_runtime": 122.8605, | |
| "eval_samples_per_second": 81.393, | |
| "eval_steps_per_second": 1.278, | |
| "step": 24000 | |
| }, | |
| { | |
| "epoch": 19.6, | |
| "learning_rate": 4.19672131147541e-05, | |
| "loss": 0.175, | |
| "step": 24500 | |
| }, | |
| { | |
| "epoch": 19.6, | |
| "eval_bleu": 78.0091, | |
| "eval_gen_len": 9.8878, | |
| "eval_loss": 0.23325826227664948, | |
| "eval_runtime": 123.7004, | |
| "eval_samples_per_second": 80.841, | |
| "eval_steps_per_second": 1.269, | |
| "step": 24500 | |
| }, | |
| { | |
| "epoch": 20.0, | |
| "learning_rate": 4.1803278688524595e-05, | |
| "loss": 0.1734, | |
| "step": 25000 | |
| }, | |
| { | |
| "epoch": 20.0, | |
| "eval_bleu": 78.4943, | |
| "eval_gen_len": 9.9012, | |
| "eval_loss": 0.2311151772737503, | |
| "eval_runtime": 124.6349, | |
| "eval_samples_per_second": 80.234, | |
| "eval_steps_per_second": 1.26, | |
| "step": 25000 | |
| }, | |
| { | |
| "epoch": 20.4, | |
| "learning_rate": 4.163934426229508e-05, | |
| "loss": 0.1543, | |
| "step": 25500 | |
| }, | |
| { | |
| "epoch": 20.4, | |
| "eval_bleu": 78.4902, | |
| "eval_gen_len": 9.8748, | |
| "eval_loss": 0.22952136397361755, | |
| "eval_runtime": 125.3963, | |
| "eval_samples_per_second": 79.747, | |
| "eval_steps_per_second": 1.252, | |
| "step": 25500 | |
| }, | |
| { | |
| "epoch": 20.8, | |
| "learning_rate": 4.1475409836065575e-05, | |
| "loss": 0.1585, | |
| "step": 26000 | |
| }, | |
| { | |
| "epoch": 20.8, | |
| "eval_bleu": 79.0041, | |
| "eval_gen_len": 9.8936, | |
| "eval_loss": 0.22459650039672852, | |
| "eval_runtime": 125.0293, | |
| "eval_samples_per_second": 79.981, | |
| "eval_steps_per_second": 1.256, | |
| "step": 26000 | |
| }, | |
| { | |
| "epoch": 21.2, | |
| "learning_rate": 4.131147540983607e-05, | |
| "loss": 0.1476, | |
| "step": 26500 | |
| }, | |
| { | |
| "epoch": 21.2, | |
| "eval_bleu": 78.922, | |
| "eval_gen_len": 9.8887, | |
| "eval_loss": 0.22683905065059662, | |
| "eval_runtime": 124.4553, | |
| "eval_samples_per_second": 80.35, | |
| "eval_steps_per_second": 1.261, | |
| "step": 26500 | |
| }, | |
| { | |
| "epoch": 21.6, | |
| "learning_rate": 4.1147540983606556e-05, | |
| "loss": 0.1425, | |
| "step": 27000 | |
| }, | |
| { | |
| "epoch": 21.6, | |
| "eval_bleu": 79.2218, | |
| "eval_gen_len": 9.9064, | |
| "eval_loss": 0.2226884663105011, | |
| "eval_runtime": 124.563, | |
| "eval_samples_per_second": 80.281, | |
| "eval_steps_per_second": 1.26, | |
| "step": 27000 | |
| }, | |
| { | |
| "epoch": 22.0, | |
| "learning_rate": 4.098360655737705e-05, | |
| "loss": 0.1452, | |
| "step": 27500 | |
| }, | |
| { | |
| "epoch": 22.0, | |
| "eval_bleu": 79.6707, | |
| "eval_gen_len": 9.9056, | |
| "eval_loss": 0.21725259721279144, | |
| "eval_runtime": 124.3401, | |
| "eval_samples_per_second": 80.425, | |
| "eval_steps_per_second": 1.263, | |
| "step": 27500 | |
| }, | |
| { | |
| "epoch": 22.4, | |
| "learning_rate": 4.081967213114754e-05, | |
| "loss": 0.1321, | |
| "step": 28000 | |
| }, | |
| { | |
| "epoch": 22.4, | |
| "eval_bleu": 79.7907, | |
| "eval_gen_len": 9.898, | |
| "eval_loss": 0.21729987859725952, | |
| "eval_runtime": 125.8166, | |
| "eval_samples_per_second": 79.481, | |
| "eval_steps_per_second": 1.248, | |
| "step": 28000 | |
| }, | |
| { | |
| "epoch": 22.8, | |
| "learning_rate": 4.0655737704918036e-05, | |
| "loss": 0.1361, | |
| "step": 28500 | |
| }, | |
| { | |
| "epoch": 22.8, | |
| "eval_bleu": 80.2256, | |
| "eval_gen_len": 9.911, | |
| "eval_loss": 0.20989477634429932, | |
| "eval_runtime": 126.9115, | |
| "eval_samples_per_second": 78.795, | |
| "eval_steps_per_second": 1.237, | |
| "step": 28500 | |
| }, | |
| { | |
| "epoch": 23.2, | |
| "learning_rate": 4.049180327868853e-05, | |
| "loss": 0.128, | |
| "step": 29000 | |
| }, | |
| { | |
| "epoch": 23.2, | |
| "eval_bleu": 80.2837, | |
| "eval_gen_len": 9.9044, | |
| "eval_loss": 0.21322031319141388, | |
| "eval_runtime": 126.5113, | |
| "eval_samples_per_second": 79.044, | |
| "eval_steps_per_second": 1.241, | |
| "step": 29000 | |
| }, | |
| { | |
| "epoch": 23.6, | |
| "learning_rate": 4.0327868852459016e-05, | |
| "loss": 0.1218, | |
| "step": 29500 | |
| }, | |
| { | |
| "epoch": 23.6, | |
| "eval_bleu": 80.6259, | |
| "eval_gen_len": 9.9151, | |
| "eval_loss": 0.21201317012310028, | |
| "eval_runtime": 124.3696, | |
| "eval_samples_per_second": 80.405, | |
| "eval_steps_per_second": 1.262, | |
| "step": 29500 | |
| }, | |
| { | |
| "epoch": 24.0, | |
| "learning_rate": 4.016393442622951e-05, | |
| "loss": 0.1248, | |
| "step": 30000 | |
| }, | |
| { | |
| "epoch": 24.0, | |
| "eval_bleu": 81.0878, | |
| "eval_gen_len": 9.9092, | |
| "eval_loss": 0.20630747079849243, | |
| "eval_runtime": 126.764, | |
| "eval_samples_per_second": 78.887, | |
| "eval_steps_per_second": 1.239, | |
| "step": 30000 | |
| }, | |
| { | |
| "epoch": 24.4, | |
| "learning_rate": 4e-05, | |
| "loss": 0.1113, | |
| "step": 30500 | |
| }, | |
| { | |
| "epoch": 24.4, | |
| "eval_bleu": 81.0524, | |
| "eval_gen_len": 9.8915, | |
| "eval_loss": 0.2094167023897171, | |
| "eval_runtime": 125.0054, | |
| "eval_samples_per_second": 79.997, | |
| "eval_steps_per_second": 1.256, | |
| "step": 30500 | |
| }, | |
| { | |
| "epoch": 24.8, | |
| "learning_rate": 3.983606557377049e-05, | |
| "loss": 0.1149, | |
| "step": 31000 | |
| }, | |
| { | |
| "epoch": 24.8, | |
| "eval_bleu": 81.0803, | |
| "eval_gen_len": 9.9123, | |
| "eval_loss": 0.20733323693275452, | |
| "eval_runtime": 125.3281, | |
| "eval_samples_per_second": 79.791, | |
| "eval_steps_per_second": 1.253, | |
| "step": 31000 | |
| }, | |
| { | |
| "epoch": 25.2, | |
| "learning_rate": 3.9672131147540983e-05, | |
| "loss": 0.1085, | |
| "step": 31500 | |
| }, | |
| { | |
| "epoch": 25.2, | |
| "eval_bleu": 81.1504, | |
| "eval_gen_len": 9.8832, | |
| "eval_loss": 0.2083030790090561, | |
| "eval_runtime": 126.8593, | |
| "eval_samples_per_second": 78.828, | |
| "eval_steps_per_second": 1.238, | |
| "step": 31500 | |
| }, | |
| { | |
| "epoch": 25.6, | |
| "learning_rate": 3.950819672131148e-05, | |
| "loss": 0.1057, | |
| "step": 32000 | |
| }, | |
| { | |
| "epoch": 25.6, | |
| "eval_bleu": 81.4995, | |
| "eval_gen_len": 9.901, | |
| "eval_loss": 0.20488029718399048, | |
| "eval_runtime": 124.973, | |
| "eval_samples_per_second": 80.017, | |
| "eval_steps_per_second": 1.256, | |
| "step": 32000 | |
| }, | |
| { | |
| "epoch": 26.0, | |
| "learning_rate": 3.934426229508197e-05, | |
| "loss": 0.1075, | |
| "step": 32500 | |
| }, | |
| { | |
| "epoch": 26.0, | |
| "eval_bleu": 81.5477, | |
| "eval_gen_len": 9.9016, | |
| "eval_loss": 0.20381322503089905, | |
| "eval_runtime": 122.9039, | |
| "eval_samples_per_second": 81.364, | |
| "eval_steps_per_second": 1.277, | |
| "step": 32500 | |
| }, | |
| { | |
| "epoch": 26.4, | |
| "learning_rate": 3.9180327868852464e-05, | |
| "loss": 0.0964, | |
| "step": 33000 | |
| }, | |
| { | |
| "epoch": 26.4, | |
| "eval_bleu": 81.7251, | |
| "eval_gen_len": 9.8988, | |
| "eval_loss": 0.20466774702072144, | |
| "eval_runtime": 124.1671, | |
| "eval_samples_per_second": 80.537, | |
| "eval_steps_per_second": 1.264, | |
| "step": 33000 | |
| }, | |
| { | |
| "epoch": 26.8, | |
| "learning_rate": 3.901639344262295e-05, | |
| "loss": 0.0969, | |
| "step": 33500 | |
| }, | |
| { | |
| "epoch": 26.8, | |
| "eval_bleu": 81.8982, | |
| "eval_gen_len": 9.8988, | |
| "eval_loss": 0.20009790360927582, | |
| "eval_runtime": 123.4229, | |
| "eval_samples_per_second": 81.022, | |
| "eval_steps_per_second": 1.272, | |
| "step": 33500 | |
| }, | |
| { | |
| "epoch": 27.2, | |
| "learning_rate": 3.8852459016393444e-05, | |
| "loss": 0.095, | |
| "step": 34000 | |
| }, | |
| { | |
| "epoch": 27.2, | |
| "eval_bleu": 82.1864, | |
| "eval_gen_len": 9.9051, | |
| "eval_loss": 0.2042824774980545, | |
| "eval_runtime": 128.3513, | |
| "eval_samples_per_second": 77.911, | |
| "eval_steps_per_second": 1.223, | |
| "step": 34000 | |
| }, | |
| { | |
| "epoch": 27.6, | |
| "learning_rate": 3.868852459016394e-05, | |
| "loss": 0.0898, | |
| "step": 34500 | |
| }, | |
| { | |
| "epoch": 27.6, | |
| "eval_bleu": 82.2154, | |
| "eval_gen_len": 9.9245, | |
| "eval_loss": 0.2033461481332779, | |
| "eval_runtime": 126.5658, | |
| "eval_samples_per_second": 79.01, | |
| "eval_steps_per_second": 1.24, | |
| "step": 34500 | |
| }, | |
| { | |
| "epoch": 28.0, | |
| "learning_rate": 3.8524590163934424e-05, | |
| "loss": 0.0915, | |
| "step": 35000 | |
| }, | |
| { | |
| "epoch": 28.0, | |
| "eval_bleu": 82.3736, | |
| "eval_gen_len": 9.9161, | |
| "eval_loss": 0.19727951288223267, | |
| "eval_runtime": 105.7319, | |
| "eval_samples_per_second": 94.579, | |
| "eval_steps_per_second": 1.485, | |
| "step": 35000 | |
| }, | |
| { | |
| "epoch": 28.4, | |
| "learning_rate": 3.836065573770492e-05, | |
| "loss": 0.0848, | |
| "step": 35500 | |
| }, | |
| { | |
| "epoch": 28.4, | |
| "eval_bleu": 82.495, | |
| "eval_gen_len": 9.9302, | |
| "eval_loss": 0.19799815118312836, | |
| "eval_runtime": 104.9507, | |
| "eval_samples_per_second": 95.283, | |
| "eval_steps_per_second": 1.496, | |
| "step": 35500 | |
| }, | |
| { | |
| "epoch": 28.8, | |
| "learning_rate": 3.819672131147541e-05, | |
| "loss": 0.0845, | |
| "step": 36000 | |
| }, | |
| { | |
| "epoch": 28.8, | |
| "eval_bleu": 82.4837, | |
| "eval_gen_len": 9.9116, | |
| "eval_loss": 0.19767090678215027, | |
| "eval_runtime": 106.5888, | |
| "eval_samples_per_second": 93.818, | |
| "eval_steps_per_second": 1.473, | |
| "step": 36000 | |
| }, | |
| { | |
| "epoch": 29.2, | |
| "learning_rate": 3.8032786885245905e-05, | |
| "loss": 0.0815, | |
| "step": 36500 | |
| }, | |
| { | |
| "epoch": 29.2, | |
| "eval_bleu": 82.4047, | |
| "eval_gen_len": 9.9089, | |
| "eval_loss": 0.19588139653205872, | |
| "eval_runtime": 107.9278, | |
| "eval_samples_per_second": 92.655, | |
| "eval_steps_per_second": 1.455, | |
| "step": 36500 | |
| }, | |
| { | |
| "epoch": 29.6, | |
| "learning_rate": 3.78688524590164e-05, | |
| "loss": 0.0795, | |
| "step": 37000 | |
| }, | |
| { | |
| "epoch": 29.6, | |
| "eval_bleu": 82.722, | |
| "eval_gen_len": 9.9046, | |
| "eval_loss": 0.1979523003101349, | |
| "eval_runtime": 109.3961, | |
| "eval_samples_per_second": 91.411, | |
| "eval_steps_per_second": 1.435, | |
| "step": 37000 | |
| }, | |
| { | |
| "epoch": 30.0, | |
| "learning_rate": 3.7704918032786885e-05, | |
| "loss": 0.0808, | |
| "step": 37500 | |
| }, | |
| { | |
| "epoch": 30.0, | |
| "eval_bleu": 82.6578, | |
| "eval_gen_len": 9.8982, | |
| "eval_loss": 0.19637715816497803, | |
| "eval_runtime": 109.5695, | |
| "eval_samples_per_second": 91.266, | |
| "eval_steps_per_second": 1.433, | |
| "step": 37500 | |
| }, | |
| { | |
| "epoch": 30.4, | |
| "learning_rate": 3.754098360655738e-05, | |
| "loss": 0.0732, | |
| "step": 38000 | |
| }, | |
| { | |
| "epoch": 30.4, | |
| "eval_bleu": 82.9919, | |
| "eval_gen_len": 9.9044, | |
| "eval_loss": 0.19727516174316406, | |
| "eval_runtime": 114.1436, | |
| "eval_samples_per_second": 87.609, | |
| "eval_steps_per_second": 1.375, | |
| "step": 38000 | |
| }, | |
| { | |
| "epoch": 30.8, | |
| "learning_rate": 3.737704918032787e-05, | |
| "loss": 0.0746, | |
| "step": 38500 | |
| }, | |
| { | |
| "epoch": 30.8, | |
| "eval_bleu": 82.6143, | |
| "eval_gen_len": 9.9165, | |
| "eval_loss": 0.19912780821323395, | |
| "eval_runtime": 111.6449, | |
| "eval_samples_per_second": 89.57, | |
| "eval_steps_per_second": 1.406, | |
| "step": 38500 | |
| }, | |
| { | |
| "epoch": 31.2, | |
| "learning_rate": 3.721311475409836e-05, | |
| "loss": 0.0707, | |
| "step": 39000 | |
| }, | |
| { | |
| "epoch": 31.2, | |
| "eval_bleu": 82.9765, | |
| "eval_gen_len": 9.9001, | |
| "eval_loss": 0.19909825921058655, | |
| "eval_runtime": 116.7381, | |
| "eval_samples_per_second": 85.662, | |
| "eval_steps_per_second": 1.345, | |
| "step": 39000 | |
| }, | |
| { | |
| "epoch": 31.6, | |
| "learning_rate": 3.704918032786885e-05, | |
| "loss": 0.0709, | |
| "step": 39500 | |
| }, | |
| { | |
| "epoch": 31.6, | |
| "eval_bleu": 83.0914, | |
| "eval_gen_len": 9.8965, | |
| "eval_loss": 0.19552859663963318, | |
| "eval_runtime": 112.5699, | |
| "eval_samples_per_second": 88.834, | |
| "eval_steps_per_second": 1.395, | |
| "step": 39500 | |
| }, | |
| { | |
| "epoch": 32.0, | |
| "learning_rate": 3.6885245901639346e-05, | |
| "loss": 0.0719, | |
| "step": 40000 | |
| }, | |
| { | |
| "epoch": 32.0, | |
| "eval_bleu": 83.2552, | |
| "eval_gen_len": 9.9192, | |
| "eval_loss": 0.1897631287574768, | |
| "eval_runtime": 111.6178, | |
| "eval_samples_per_second": 89.591, | |
| "eval_steps_per_second": 1.407, | |
| "step": 40000 | |
| }, | |
| { | |
| "epoch": 32.4, | |
| "learning_rate": 3.672131147540984e-05, | |
| "loss": 0.0645, | |
| "step": 40500 | |
| }, | |
| { | |
| "epoch": 32.4, | |
| "eval_bleu": 83.2469, | |
| "eval_gen_len": 9.9243, | |
| "eval_loss": 0.19487988948822021, | |
| "eval_runtime": 113.1976, | |
| "eval_samples_per_second": 88.341, | |
| "eval_steps_per_second": 1.387, | |
| "step": 40500 | |
| }, | |
| { | |
| "epoch": 32.8, | |
| "learning_rate": 3.655737704918033e-05, | |
| "loss": 0.0668, | |
| "step": 41000 | |
| }, | |
| { | |
| "epoch": 32.8, | |
| "eval_bleu": 83.554, | |
| "eval_gen_len": 9.927, | |
| "eval_loss": 0.19293373823165894, | |
| "eval_runtime": 115.9999, | |
| "eval_samples_per_second": 86.207, | |
| "eval_steps_per_second": 1.353, | |
| "step": 41000 | |
| }, | |
| { | |
| "epoch": 33.2, | |
| "learning_rate": 3.6393442622950826e-05, | |
| "loss": 0.0626, | |
| "step": 41500 | |
| }, | |
| { | |
| "epoch": 33.2, | |
| "eval_bleu": 83.451, | |
| "eval_gen_len": 9.919, | |
| "eval_loss": 0.19273407757282257, | |
| "eval_runtime": 118.7666, | |
| "eval_samples_per_second": 84.199, | |
| "eval_steps_per_second": 1.322, | |
| "step": 41500 | |
| }, | |
| { | |
| "epoch": 33.6, | |
| "learning_rate": 3.622950819672131e-05, | |
| "loss": 0.0616, | |
| "step": 42000 | |
| }, | |
| { | |
| "epoch": 33.6, | |
| "eval_bleu": 83.7799, | |
| "eval_gen_len": 9.9142, | |
| "eval_loss": 0.1896318793296814, | |
| "eval_runtime": 119.0485, | |
| "eval_samples_per_second": 83.999, | |
| "eval_steps_per_second": 1.319, | |
| "step": 42000 | |
| }, | |
| { | |
| "epoch": 34.0, | |
| "learning_rate": 3.6065573770491806e-05, | |
| "loss": 0.0636, | |
| "step": 42500 | |
| }, | |
| { | |
| "epoch": 34.0, | |
| "eval_bleu": 83.5126, | |
| "eval_gen_len": 9.8988, | |
| "eval_loss": 0.19057811796665192, | |
| "eval_runtime": 119.0601, | |
| "eval_samples_per_second": 83.991, | |
| "eval_steps_per_second": 1.319, | |
| "step": 42500 | |
| }, | |
| { | |
| "epoch": 34.4, | |
| "learning_rate": 3.590163934426229e-05, | |
| "loss": 0.0576, | |
| "step": 43000 | |
| }, | |
| { | |
| "epoch": 34.4, | |
| "eval_bleu": 83.5655, | |
| "eval_gen_len": 9.9154, | |
| "eval_loss": 0.19394218921661377, | |
| "eval_runtime": 119.5237, | |
| "eval_samples_per_second": 83.665, | |
| "eval_steps_per_second": 1.314, | |
| "step": 43000 | |
| }, | |
| { | |
| "epoch": 34.8, | |
| "learning_rate": 3.5737704918032786e-05, | |
| "loss": 0.0585, | |
| "step": 43500 | |
| }, | |
| { | |
| "epoch": 34.8, | |
| "eval_bleu": 83.6287, | |
| "eval_gen_len": 9.9233, | |
| "eval_loss": 0.19454576075077057, | |
| "eval_runtime": 119.4134, | |
| "eval_samples_per_second": 83.743, | |
| "eval_steps_per_second": 1.315, | |
| "step": 43500 | |
| }, | |
| { | |
| "epoch": 35.2, | |
| "learning_rate": 3.557377049180328e-05, | |
| "loss": 0.0568, | |
| "step": 44000 | |
| }, | |
| { | |
| "epoch": 35.2, | |
| "eval_bleu": 83.6904, | |
| "eval_gen_len": 9.907, | |
| "eval_loss": 0.19391930103302002, | |
| "eval_runtime": 119.702, | |
| "eval_samples_per_second": 83.541, | |
| "eval_steps_per_second": 1.312, | |
| "step": 44000 | |
| }, | |
| { | |
| "epoch": 35.6, | |
| "learning_rate": 3.5409836065573773e-05, | |
| "loss": 0.0551, | |
| "step": 44500 | |
| }, | |
| { | |
| "epoch": 35.6, | |
| "eval_bleu": 83.9373, | |
| "eval_gen_len": 9.9202, | |
| "eval_loss": 0.19054347276687622, | |
| "eval_runtime": 118.199, | |
| "eval_samples_per_second": 84.603, | |
| "eval_steps_per_second": 1.328, | |
| "step": 44500 | |
| }, | |
| { | |
| "epoch": 36.0, | |
| "learning_rate": 3.524590163934427e-05, | |
| "loss": 0.0563, | |
| "step": 45000 | |
| }, | |
| { | |
| "epoch": 36.0, | |
| "eval_bleu": 84.1348, | |
| "eval_gen_len": 9.9207, | |
| "eval_loss": 0.1921459436416626, | |
| "eval_runtime": 119.546, | |
| "eval_samples_per_second": 83.65, | |
| "eval_steps_per_second": 1.313, | |
| "step": 45000 | |
| }, | |
| { | |
| "epoch": 36.4, | |
| "learning_rate": 3.508196721311476e-05, | |
| "loss": 0.0514, | |
| "step": 45500 | |
| }, | |
| { | |
| "epoch": 36.4, | |
| "eval_bleu": 84.1097, | |
| "eval_gen_len": 9.9185, | |
| "eval_loss": 0.19464583694934845, | |
| "eval_runtime": 119.8131, | |
| "eval_samples_per_second": 83.463, | |
| "eval_steps_per_second": 1.31, | |
| "step": 45500 | |
| }, | |
| { | |
| "epoch": 36.8, | |
| "learning_rate": 3.491803278688525e-05, | |
| "loss": 0.0534, | |
| "step": 46000 | |
| }, | |
| { | |
| "epoch": 36.8, | |
| "eval_bleu": 84.0075, | |
| "eval_gen_len": 9.9111, | |
| "eval_loss": 0.19089433550834656, | |
| "eval_runtime": 118.1118, | |
| "eval_samples_per_second": 84.666, | |
| "eval_steps_per_second": 1.329, | |
| "step": 46000 | |
| }, | |
| { | |
| "epoch": 37.2, | |
| "learning_rate": 3.475409836065574e-05, | |
| "loss": 0.05, | |
| "step": 46500 | |
| }, | |
| { | |
| "epoch": 37.2, | |
| "eval_bleu": 84.0187, | |
| "eval_gen_len": 9.9198, | |
| "eval_loss": 0.1975044161081314, | |
| "eval_runtime": 118.0704, | |
| "eval_samples_per_second": 84.695, | |
| "eval_steps_per_second": 1.33, | |
| "step": 46500 | |
| }, | |
| { | |
| "epoch": 37.6, | |
| "learning_rate": 3.459016393442623e-05, | |
| "loss": 0.0498, | |
| "step": 47000 | |
| }, | |
| { | |
| "epoch": 37.6, | |
| "eval_bleu": 84.0124, | |
| "eval_gen_len": 9.9205, | |
| "eval_loss": 0.19323572516441345, | |
| "eval_runtime": 117.9012, | |
| "eval_samples_per_second": 84.817, | |
| "eval_steps_per_second": 1.332, | |
| "step": 47000 | |
| }, | |
| { | |
| "epoch": 38.0, | |
| "learning_rate": 3.442622950819672e-05, | |
| "loss": 0.0496, | |
| "step": 47500 | |
| }, | |
| { | |
| "epoch": 38.0, | |
| "eval_bleu": 84.2227, | |
| "eval_gen_len": 9.9151, | |
| "eval_loss": 0.1907936930656433, | |
| "eval_runtime": 118.1549, | |
| "eval_samples_per_second": 84.635, | |
| "eval_steps_per_second": 1.329, | |
| "step": 47500 | |
| }, | |
| { | |
| "epoch": 38.4, | |
| "learning_rate": 3.4262295081967214e-05, | |
| "loss": 0.0474, | |
| "step": 48000 | |
| }, | |
| { | |
| "epoch": 38.4, | |
| "eval_bleu": 84.1768, | |
| "eval_gen_len": 9.9068, | |
| "eval_loss": 0.192445769906044, | |
| "eval_runtime": 121.5905, | |
| "eval_samples_per_second": 82.243, | |
| "eval_steps_per_second": 1.291, | |
| "step": 48000 | |
| }, | |
| { | |
| "epoch": 38.8, | |
| "learning_rate": 3.409836065573771e-05, | |
| "loss": 0.0473, | |
| "step": 48500 | |
| }, | |
| { | |
| "epoch": 38.8, | |
| "eval_bleu": 84.2946, | |
| "eval_gen_len": 9.9193, | |
| "eval_loss": 0.1934969127178192, | |
| "eval_runtime": 119.2854, | |
| "eval_samples_per_second": 83.833, | |
| "eval_steps_per_second": 1.316, | |
| "step": 48500 | |
| }, | |
| { | |
| "epoch": 39.2, | |
| "learning_rate": 3.39344262295082e-05, | |
| "loss": 0.0454, | |
| "step": 49000 | |
| }, | |
| { | |
| "epoch": 39.2, | |
| "eval_bleu": 84.3262, | |
| "eval_gen_len": 9.9164, | |
| "eval_loss": 0.1953597515821457, | |
| "eval_runtime": 119.0304, | |
| "eval_samples_per_second": 84.012, | |
| "eval_steps_per_second": 1.319, | |
| "step": 49000 | |
| }, | |
| { | |
| "epoch": 39.6, | |
| "learning_rate": 3.3770491803278695e-05, | |
| "loss": 0.0453, | |
| "step": 49500 | |
| }, | |
| { | |
| "epoch": 39.6, | |
| "eval_bleu": 84.4165, | |
| "eval_gen_len": 9.9107, | |
| "eval_loss": 0.19031359255313873, | |
| "eval_runtime": 120.2198, | |
| "eval_samples_per_second": 83.181, | |
| "eval_steps_per_second": 1.306, | |
| "step": 49500 | |
| }, | |
| { | |
| "epoch": 40.0, | |
| "learning_rate": 3.360655737704918e-05, | |
| "loss": 0.0461, | |
| "step": 50000 | |
| }, | |
| { | |
| "epoch": 40.0, | |
| "eval_bleu": 84.4289, | |
| "eval_gen_len": 9.9216, | |
| "eval_loss": 0.1906299889087677, | |
| "eval_runtime": 118.8797, | |
| "eval_samples_per_second": 84.119, | |
| "eval_steps_per_second": 1.321, | |
| "step": 50000 | |
| }, | |
| { | |
| "epoch": 40.4, | |
| "learning_rate": 3.3442622950819675e-05, | |
| "loss": 0.0415, | |
| "step": 50500 | |
| }, | |
| { | |
| "epoch": 40.4, | |
| "eval_bleu": 84.4636, | |
| "eval_gen_len": 9.9082, | |
| "eval_loss": 0.19302137196063995, | |
| "eval_runtime": 118.8718, | |
| "eval_samples_per_second": 84.124, | |
| "eval_steps_per_second": 1.321, | |
| "step": 50500 | |
| }, | |
| { | |
| "epoch": 40.8, | |
| "learning_rate": 3.327868852459017e-05, | |
| "loss": 0.044, | |
| "step": 51000 | |
| }, | |
| { | |
| "epoch": 40.8, | |
| "eval_bleu": 84.5092, | |
| "eval_gen_len": 9.9237, | |
| "eval_loss": 0.18893210589885712, | |
| "eval_runtime": 120.9835, | |
| "eval_samples_per_second": 82.656, | |
| "eval_steps_per_second": 1.298, | |
| "step": 51000 | |
| }, | |
| { | |
| "epoch": 41.2, | |
| "learning_rate": 3.3114754098360655e-05, | |
| "loss": 0.043, | |
| "step": 51500 | |
| }, | |
| { | |
| "epoch": 41.2, | |
| "eval_bleu": 84.5908, | |
| "eval_gen_len": 9.9221, | |
| "eval_loss": 0.1906319111585617, | |
| "eval_runtime": 122.6906, | |
| "eval_samples_per_second": 81.506, | |
| "eval_steps_per_second": 1.28, | |
| "step": 51500 | |
| }, | |
| { | |
| "epoch": 41.6, | |
| "learning_rate": 3.295081967213115e-05, | |
| "loss": 0.0413, | |
| "step": 52000 | |
| }, | |
| { | |
| "epoch": 41.6, | |
| "eval_bleu": 84.7197, | |
| "eval_gen_len": 9.9113, | |
| "eval_loss": 0.19282755255699158, | |
| "eval_runtime": 121.4487, | |
| "eval_samples_per_second": 82.339, | |
| "eval_steps_per_second": 1.293, | |
| "step": 52000 | |
| }, | |
| { | |
| "epoch": 42.0, | |
| "learning_rate": 3.2786885245901635e-05, | |
| "loss": 0.0401, | |
| "step": 52500 | |
| }, | |
| { | |
| "epoch": 42.0, | |
| "eval_bleu": 84.7895, | |
| "eval_gen_len": 9.9215, | |
| "eval_loss": 0.19361305236816406, | |
| "eval_runtime": 120.7154, | |
| "eval_samples_per_second": 82.839, | |
| "eval_steps_per_second": 1.301, | |
| "step": 52500 | |
| }, | |
| { | |
| "epoch": 42.4, | |
| "learning_rate": 3.2622950819672136e-05, | |
| "loss": 0.0385, | |
| "step": 53000 | |
| }, | |
| { | |
| "epoch": 42.4, | |
| "eval_bleu": 84.7187, | |
| "eval_gen_len": 9.9239, | |
| "eval_loss": 0.19195546209812164, | |
| "eval_runtime": 119.559, | |
| "eval_samples_per_second": 83.641, | |
| "eval_steps_per_second": 1.313, | |
| "step": 53000 | |
| }, | |
| { | |
| "epoch": 42.8, | |
| "learning_rate": 3.245901639344263e-05, | |
| "loss": 0.0387, | |
| "step": 53500 | |
| }, | |
| { | |
| "epoch": 42.8, | |
| "eval_bleu": 84.7193, | |
| "eval_gen_len": 9.9146, | |
| "eval_loss": 0.19131682813167572, | |
| "eval_runtime": 121.1877, | |
| "eval_samples_per_second": 82.517, | |
| "eval_steps_per_second": 1.296, | |
| "step": 53500 | |
| }, | |
| { | |
| "epoch": 43.2, | |
| "learning_rate": 3.2295081967213116e-05, | |
| "loss": 0.0389, | |
| "step": 54000 | |
| }, | |
| { | |
| "epoch": 43.2, | |
| "eval_bleu": 84.6862, | |
| "eval_gen_len": 9.9225, | |
| "eval_loss": 0.19187390804290771, | |
| "eval_runtime": 120.9642, | |
| "eval_samples_per_second": 82.669, | |
| "eval_steps_per_second": 1.298, | |
| "step": 54000 | |
| }, | |
| { | |
| "epoch": 43.6, | |
| "learning_rate": 3.213114754098361e-05, | |
| "loss": 0.0372, | |
| "step": 54500 | |
| }, | |
| { | |
| "epoch": 43.6, | |
| "eval_bleu": 84.7769, | |
| "eval_gen_len": 9.9285, | |
| "eval_loss": 0.19241966307163239, | |
| "eval_runtime": 125.5973, | |
| "eval_samples_per_second": 79.62, | |
| "eval_steps_per_second": 1.25, | |
| "step": 54500 | |
| }, | |
| { | |
| "epoch": 44.0, | |
| "learning_rate": 3.19672131147541e-05, | |
| "loss": 0.0383, | |
| "step": 55000 | |
| }, | |
| { | |
| "epoch": 44.0, | |
| "eval_bleu": 84.9535, | |
| "eval_gen_len": 9.9347, | |
| "eval_loss": 0.19236235320568085, | |
| "eval_runtime": 122.9448, | |
| "eval_samples_per_second": 81.337, | |
| "eval_steps_per_second": 1.277, | |
| "step": 55000 | |
| }, | |
| { | |
| "epoch": 44.4, | |
| "learning_rate": 3.180327868852459e-05, | |
| "loss": 0.0347, | |
| "step": 55500 | |
| }, | |
| { | |
| "epoch": 44.4, | |
| "eval_bleu": 84.9326, | |
| "eval_gen_len": 9.9288, | |
| "eval_loss": 0.1917337328195572, | |
| "eval_runtime": 121.8141, | |
| "eval_samples_per_second": 82.092, | |
| "eval_steps_per_second": 1.289, | |
| "step": 55500 | |
| }, | |
| { | |
| "epoch": 44.8, | |
| "learning_rate": 3.163934426229508e-05, | |
| "loss": 0.0364, | |
| "step": 56000 | |
| }, | |
| { | |
| "epoch": 44.8, | |
| "eval_bleu": 85.0653, | |
| "eval_gen_len": 9.9159, | |
| "eval_loss": 0.19078262150287628, | |
| "eval_runtime": 122.743, | |
| "eval_samples_per_second": 81.471, | |
| "eval_steps_per_second": 1.279, | |
| "step": 56000 | |
| }, | |
| { | |
| "epoch": 45.2, | |
| "learning_rate": 3.1475409836065576e-05, | |
| "loss": 0.035, | |
| "step": 56500 | |
| }, | |
| { | |
| "epoch": 45.2, | |
| "eval_bleu": 84.8097, | |
| "eval_gen_len": 9.9093, | |
| "eval_loss": 0.19484488666057587, | |
| "eval_runtime": 121.3548, | |
| "eval_samples_per_second": 82.403, | |
| "eval_steps_per_second": 1.294, | |
| "step": 56500 | |
| }, | |
| { | |
| "epoch": 45.6, | |
| "learning_rate": 3.131147540983606e-05, | |
| "loss": 0.0338, | |
| "step": 57000 | |
| }, | |
| { | |
| "epoch": 45.6, | |
| "eval_bleu": 84.9659, | |
| "eval_gen_len": 9.9217, | |
| "eval_loss": 0.1974213719367981, | |
| "eval_runtime": 120.7302, | |
| "eval_samples_per_second": 82.829, | |
| "eval_steps_per_second": 1.3, | |
| "step": 57000 | |
| }, | |
| { | |
| "epoch": 46.0, | |
| "learning_rate": 3.114754098360656e-05, | |
| "loss": 0.0353, | |
| "step": 57500 | |
| }, | |
| { | |
| "epoch": 46.0, | |
| "eval_bleu": 85.0476, | |
| "eval_gen_len": 9.9244, | |
| "eval_loss": 0.19343513250350952, | |
| "eval_runtime": 118.747, | |
| "eval_samples_per_second": 84.213, | |
| "eval_steps_per_second": 1.322, | |
| "step": 57500 | |
| }, | |
| { | |
| "epoch": 46.4, | |
| "learning_rate": 3.098360655737705e-05, | |
| "loss": 0.0331, | |
| "step": 58000 | |
| }, | |
| { | |
| "epoch": 46.4, | |
| "eval_bleu": 85.0708, | |
| "eval_gen_len": 9.9146, | |
| "eval_loss": 0.19627127051353455, | |
| "eval_runtime": 121.8836, | |
| "eval_samples_per_second": 82.046, | |
| "eval_steps_per_second": 1.288, | |
| "step": 58000 | |
| }, | |
| { | |
| "epoch": 46.8, | |
| "learning_rate": 3.0819672131147544e-05, | |
| "loss": 0.0333, | |
| "step": 58500 | |
| }, | |
| { | |
| "epoch": 46.8, | |
| "eval_bleu": 84.9386, | |
| "eval_gen_len": 9.9224, | |
| "eval_loss": 0.19614210724830627, | |
| "eval_runtime": 121.7048, | |
| "eval_samples_per_second": 82.166, | |
| "eval_steps_per_second": 1.29, | |
| "step": 58500 | |
| }, | |
| { | |
| "epoch": 47.2, | |
| "learning_rate": 3.065573770491804e-05, | |
| "loss": 0.0326, | |
| "step": 59000 | |
| }, | |
| { | |
| "epoch": 47.2, | |
| "eval_bleu": 84.9433, | |
| "eval_gen_len": 9.918, | |
| "eval_loss": 0.19616641104221344, | |
| "eval_runtime": 120.7919, | |
| "eval_samples_per_second": 82.787, | |
| "eval_steps_per_second": 1.3, | |
| "step": 59000 | |
| }, | |
| { | |
| "epoch": 47.6, | |
| "learning_rate": 3.0491803278688524e-05, | |
| "loss": 0.0312, | |
| "step": 59500 | |
| }, | |
| { | |
| "epoch": 47.6, | |
| "eval_bleu": 84.8756, | |
| "eval_gen_len": 9.9365, | |
| "eval_loss": 0.1943608969449997, | |
| "eval_runtime": 120.9831, | |
| "eval_samples_per_second": 82.656, | |
| "eval_steps_per_second": 1.298, | |
| "step": 59500 | |
| }, | |
| { | |
| "epoch": 48.0, | |
| "learning_rate": 3.0327868852459017e-05, | |
| "loss": 0.0327, | |
| "step": 60000 | |
| }, | |
| { | |
| "epoch": 48.0, | |
| "eval_bleu": 84.8764, | |
| "eval_gen_len": 9.9289, | |
| "eval_loss": 0.19547414779663086, | |
| "eval_runtime": 120.3613, | |
| "eval_samples_per_second": 83.083, | |
| "eval_steps_per_second": 1.304, | |
| "step": 60000 | |
| }, | |
| { | |
| "epoch": 48.4, | |
| "learning_rate": 3.016393442622951e-05, | |
| "loss": 0.0303, | |
| "step": 60500 | |
| }, | |
| { | |
| "epoch": 48.4, | |
| "eval_bleu": 85.0585, | |
| "eval_gen_len": 9.9279, | |
| "eval_loss": 0.19307781755924225, | |
| "eval_runtime": 121.2016, | |
| "eval_samples_per_second": 82.507, | |
| "eval_steps_per_second": 1.295, | |
| "step": 60500 | |
| }, | |
| { | |
| "epoch": 48.8, | |
| "learning_rate": 3e-05, | |
| "loss": 0.0305, | |
| "step": 61000 | |
| }, | |
| { | |
| "epoch": 48.8, | |
| "eval_bleu": 85.2865, | |
| "eval_gen_len": 9.9287, | |
| "eval_loss": 0.19452740252017975, | |
| "eval_runtime": 123.6999, | |
| "eval_samples_per_second": 80.841, | |
| "eval_steps_per_second": 1.269, | |
| "step": 61000 | |
| }, | |
| { | |
| "epoch": 49.2, | |
| "learning_rate": 2.9836065573770494e-05, | |
| "loss": 0.0296, | |
| "step": 61500 | |
| }, | |
| { | |
| "epoch": 49.2, | |
| "eval_bleu": 85.1538, | |
| "eval_gen_len": 9.9253, | |
| "eval_loss": 0.19456754624843597, | |
| "eval_runtime": 123.3373, | |
| "eval_samples_per_second": 81.078, | |
| "eval_steps_per_second": 1.273, | |
| "step": 61500 | |
| }, | |
| { | |
| "epoch": 49.6, | |
| "learning_rate": 2.967213114754098e-05, | |
| "loss": 0.0295, | |
| "step": 62000 | |
| }, | |
| { | |
| "epoch": 49.6, | |
| "eval_bleu": 85.3376, | |
| "eval_gen_len": 9.9427, | |
| "eval_loss": 0.19405782222747803, | |
| "eval_runtime": 122.2953, | |
| "eval_samples_per_second": 81.769, | |
| "eval_steps_per_second": 1.284, | |
| "step": 62000 | |
| }, | |
| { | |
| "epoch": 50.0, | |
| "learning_rate": 2.9508196721311478e-05, | |
| "loss": 0.03, | |
| "step": 62500 | |
| }, | |
| { | |
| "epoch": 50.0, | |
| "eval_bleu": 85.0825, | |
| "eval_gen_len": 9.918, | |
| "eval_loss": 0.19235928356647491, | |
| "eval_runtime": 121.5967, | |
| "eval_samples_per_second": 82.239, | |
| "eval_steps_per_second": 1.291, | |
| "step": 62500 | |
| }, | |
| { | |
| "epoch": 50.4, | |
| "learning_rate": 2.934426229508197e-05, | |
| "loss": 0.028, | |
| "step": 63000 | |
| }, | |
| { | |
| "epoch": 50.4, | |
| "eval_bleu": 85.2126, | |
| "eval_gen_len": 9.9178, | |
| "eval_loss": 0.1952826976776123, | |
| "eval_runtime": 121.3203, | |
| "eval_samples_per_second": 82.426, | |
| "eval_steps_per_second": 1.294, | |
| "step": 63000 | |
| }, | |
| { | |
| "epoch": 50.8, | |
| "learning_rate": 2.9180327868852458e-05, | |
| "loss": 0.0295, | |
| "step": 63500 | |
| }, | |
| { | |
| "epoch": 50.8, | |
| "eval_bleu": 85.1624, | |
| "eval_gen_len": 9.9343, | |
| "eval_loss": 0.1901182234287262, | |
| "eval_runtime": 122.2317, | |
| "eval_samples_per_second": 81.812, | |
| "eval_steps_per_second": 1.284, | |
| "step": 63500 | |
| }, | |
| { | |
| "epoch": 51.2, | |
| "learning_rate": 2.901639344262295e-05, | |
| "loss": 0.028, | |
| "step": 64000 | |
| }, | |
| { | |
| "epoch": 51.2, | |
| "eval_bleu": 85.092, | |
| "eval_gen_len": 9.9193, | |
| "eval_loss": 0.19715240597724915, | |
| "eval_runtime": 121.6797, | |
| "eval_samples_per_second": 82.183, | |
| "eval_steps_per_second": 1.29, | |
| "step": 64000 | |
| }, | |
| { | |
| "epoch": 51.6, | |
| "learning_rate": 2.8852459016393445e-05, | |
| "loss": 0.0279, | |
| "step": 64500 | |
| }, | |
| { | |
| "epoch": 51.6, | |
| "eval_bleu": 85.3237, | |
| "eval_gen_len": 9.9341, | |
| "eval_loss": 0.19140399992465973, | |
| "eval_runtime": 122.9196, | |
| "eval_samples_per_second": 81.354, | |
| "eval_steps_per_second": 1.277, | |
| "step": 64500 | |
| }, | |
| { | |
| "epoch": 52.0, | |
| "learning_rate": 2.8688524590163935e-05, | |
| "loss": 0.0275, | |
| "step": 65000 | |
| }, | |
| { | |
| "epoch": 52.0, | |
| "eval_bleu": 85.3125, | |
| "eval_gen_len": 9.9184, | |
| "eval_loss": 0.19335660338401794, | |
| "eval_runtime": 120.7401, | |
| "eval_samples_per_second": 82.823, | |
| "eval_steps_per_second": 1.3, | |
| "step": 65000 | |
| }, | |
| { | |
| "epoch": 52.4, | |
| "learning_rate": 2.852459016393443e-05, | |
| "loss": 0.0266, | |
| "step": 65500 | |
| }, | |
| { | |
| "epoch": 52.4, | |
| "eval_bleu": 85.3497, | |
| "eval_gen_len": 9.9376, | |
| "eval_loss": 0.19615261256694794, | |
| "eval_runtime": 122.3086, | |
| "eval_samples_per_second": 81.76, | |
| "eval_steps_per_second": 1.284, | |
| "step": 65500 | |
| }, | |
| { | |
| "epoch": 52.8, | |
| "learning_rate": 2.8360655737704922e-05, | |
| "loss": 0.0269, | |
| "step": 66000 | |
| }, | |
| { | |
| "epoch": 52.8, | |
| "eval_bleu": 85.2571, | |
| "eval_gen_len": 9.9076, | |
| "eval_loss": 0.19512337446212769, | |
| "eval_runtime": 122.2496, | |
| "eval_samples_per_second": 81.8, | |
| "eval_steps_per_second": 1.284, | |
| "step": 66000 | |
| }, | |
| { | |
| "epoch": 53.2, | |
| "learning_rate": 2.819672131147541e-05, | |
| "loss": 0.026, | |
| "step": 66500 | |
| }, | |
| { | |
| "epoch": 53.2, | |
| "eval_bleu": 85.3837, | |
| "eval_gen_len": 9.9211, | |
| "eval_loss": 0.195496067404747, | |
| "eval_runtime": 122.1131, | |
| "eval_samples_per_second": 81.891, | |
| "eval_steps_per_second": 1.286, | |
| "step": 66500 | |
| }, | |
| { | |
| "epoch": 53.6, | |
| "learning_rate": 2.8032786885245906e-05, | |
| "loss": 0.0257, | |
| "step": 67000 | |
| }, | |
| { | |
| "epoch": 53.6, | |
| "eval_bleu": 85.3563, | |
| "eval_gen_len": 9.9245, | |
| "eval_loss": 0.19710040092468262, | |
| "eval_runtime": 125.1877, | |
| "eval_samples_per_second": 79.88, | |
| "eval_steps_per_second": 1.254, | |
| "step": 67000 | |
| }, | |
| { | |
| "epoch": 54.0, | |
| "learning_rate": 2.7868852459016392e-05, | |
| "loss": 0.0263, | |
| "step": 67500 | |
| }, | |
| { | |
| "epoch": 54.0, | |
| "eval_bleu": 85.3548, | |
| "eval_gen_len": 9.9285, | |
| "eval_loss": 0.19391243159770966, | |
| "eval_runtime": 121.9609, | |
| "eval_samples_per_second": 81.993, | |
| "eval_steps_per_second": 1.287, | |
| "step": 67500 | |
| }, | |
| { | |
| "epoch": 54.4, | |
| "learning_rate": 2.7704918032786886e-05, | |
| "loss": 0.0251, | |
| "step": 68000 | |
| }, | |
| { | |
| "epoch": 54.4, | |
| "eval_bleu": 85.3867, | |
| "eval_gen_len": 9.9223, | |
| "eval_loss": 0.19642965495586395, | |
| "eval_runtime": 122.2878, | |
| "eval_samples_per_second": 81.774, | |
| "eval_steps_per_second": 1.284, | |
| "step": 68000 | |
| }, | |
| { | |
| "epoch": 54.8, | |
| "learning_rate": 2.754098360655738e-05, | |
| "loss": 0.0258, | |
| "step": 68500 | |
| }, | |
| { | |
| "epoch": 54.8, | |
| "eval_bleu": 85.3325, | |
| "eval_gen_len": 9.9353, | |
| "eval_loss": 0.19264063239097595, | |
| "eval_runtime": 123.3008, | |
| "eval_samples_per_second": 81.103, | |
| "eval_steps_per_second": 1.273, | |
| "step": 68500 | |
| }, | |
| { | |
| "epoch": 55.2, | |
| "learning_rate": 2.737704918032787e-05, | |
| "loss": 0.0251, | |
| "step": 69000 | |
| }, | |
| { | |
| "epoch": 55.2, | |
| "eval_bleu": 85.4551, | |
| "eval_gen_len": 9.9308, | |
| "eval_loss": 0.19382888078689575, | |
| "eval_runtime": 126.2193, | |
| "eval_samples_per_second": 79.227, | |
| "eval_steps_per_second": 1.244, | |
| "step": 69000 | |
| }, | |
| { | |
| "epoch": 55.6, | |
| "learning_rate": 2.7213114754098363e-05, | |
| "loss": 0.0244, | |
| "step": 69500 | |
| }, | |
| { | |
| "epoch": 55.6, | |
| "eval_bleu": 85.309, | |
| "eval_gen_len": 9.9219, | |
| "eval_loss": 0.19579891860485077, | |
| "eval_runtime": 117.3342, | |
| "eval_samples_per_second": 85.227, | |
| "eval_steps_per_second": 1.338, | |
| "step": 69500 | |
| }, | |
| { | |
| "epoch": 56.0, | |
| "learning_rate": 2.7049180327868856e-05, | |
| "loss": 0.0255, | |
| "step": 70000 | |
| }, | |
| { | |
| "epoch": 56.0, | |
| "eval_bleu": 85.3467, | |
| "eval_gen_len": 9.9309, | |
| "eval_loss": 0.19363795220851898, | |
| "eval_runtime": 117.3138, | |
| "eval_samples_per_second": 85.241, | |
| "eval_steps_per_second": 1.338, | |
| "step": 70000 | |
| }, | |
| { | |
| "epoch": 56.4, | |
| "learning_rate": 2.6885245901639343e-05, | |
| "loss": 0.0237, | |
| "step": 70500 | |
| }, | |
| { | |
| "epoch": 56.4, | |
| "eval_bleu": 85.4309, | |
| "eval_gen_len": 9.919, | |
| "eval_loss": 0.19596821069717407, | |
| "eval_runtime": 116.9886, | |
| "eval_samples_per_second": 85.478, | |
| "eval_steps_per_second": 1.342, | |
| "step": 70500 | |
| }, | |
| { | |
| "epoch": 56.8, | |
| "learning_rate": 2.6721311475409837e-05, | |
| "loss": 0.0239, | |
| "step": 71000 | |
| }, | |
| { | |
| "epoch": 56.8, | |
| "eval_bleu": 85.4014, | |
| "eval_gen_len": 9.934, | |
| "eval_loss": 0.1943485587835312, | |
| "eval_runtime": 121.5859, | |
| "eval_samples_per_second": 82.246, | |
| "eval_steps_per_second": 1.291, | |
| "step": 71000 | |
| }, | |
| { | |
| "epoch": 57.2, | |
| "learning_rate": 2.6557377049180327e-05, | |
| "loss": 0.0231, | |
| "step": 71500 | |
| }, | |
| { | |
| "epoch": 57.2, | |
| "eval_bleu": 85.621, | |
| "eval_gen_len": 9.9301, | |
| "eval_loss": 0.19711793959140778, | |
| "eval_runtime": 118.739, | |
| "eval_samples_per_second": 84.218, | |
| "eval_steps_per_second": 1.322, | |
| "step": 71500 | |
| }, | |
| { | |
| "epoch": 57.6, | |
| "learning_rate": 2.639344262295082e-05, | |
| "loss": 0.0229, | |
| "step": 72000 | |
| }, | |
| { | |
| "epoch": 57.6, | |
| "eval_bleu": 85.5557, | |
| "eval_gen_len": 9.9331, | |
| "eval_loss": 0.19833779335021973, | |
| "eval_runtime": 119.0068, | |
| "eval_samples_per_second": 84.029, | |
| "eval_steps_per_second": 1.319, | |
| "step": 72000 | |
| }, | |
| { | |
| "epoch": 58.0, | |
| "learning_rate": 2.6229508196721314e-05, | |
| "loss": 0.0231, | |
| "step": 72500 | |
| }, | |
| { | |
| "epoch": 58.0, | |
| "eval_bleu": 85.6012, | |
| "eval_gen_len": 9.9243, | |
| "eval_loss": 0.1915123611688614, | |
| "eval_runtime": 115.0294, | |
| "eval_samples_per_second": 86.934, | |
| "eval_steps_per_second": 1.365, | |
| "step": 72500 | |
| }, | |
| { | |
| "epoch": 58.4, | |
| "learning_rate": 2.6065573770491804e-05, | |
| "loss": 0.0219, | |
| "step": 73000 | |
| }, | |
| { | |
| "epoch": 58.4, | |
| "eval_bleu": 85.4561, | |
| "eval_gen_len": 9.9202, | |
| "eval_loss": 0.19812047481536865, | |
| "eval_runtime": 114.3888, | |
| "eval_samples_per_second": 87.421, | |
| "eval_steps_per_second": 1.373, | |
| "step": 73000 | |
| }, | |
| { | |
| "epoch": 58.8, | |
| "learning_rate": 2.5901639344262297e-05, | |
| "loss": 0.0227, | |
| "step": 73500 | |
| }, | |
| { | |
| "epoch": 58.8, | |
| "eval_bleu": 85.442, | |
| "eval_gen_len": 9.9242, | |
| "eval_loss": 0.19562363624572754, | |
| "eval_runtime": 112.3308, | |
| "eval_samples_per_second": 89.023, | |
| "eval_steps_per_second": 1.398, | |
| "step": 73500 | |
| }, | |
| { | |
| "epoch": 59.2, | |
| "learning_rate": 2.573770491803279e-05, | |
| "loss": 0.0226, | |
| "step": 74000 | |
| }, | |
| { | |
| "epoch": 59.2, | |
| "eval_bleu": 85.4558, | |
| "eval_gen_len": 9.9199, | |
| "eval_loss": 0.19775182008743286, | |
| "eval_runtime": 112.3894, | |
| "eval_samples_per_second": 88.976, | |
| "eval_steps_per_second": 1.397, | |
| "step": 74000 | |
| }, | |
| { | |
| "epoch": 59.6, | |
| "learning_rate": 2.5573770491803277e-05, | |
| "loss": 0.0213, | |
| "step": 74500 | |
| }, | |
| { | |
| "epoch": 59.6, | |
| "eval_bleu": 85.6858, | |
| "eval_gen_len": 9.9401, | |
| "eval_loss": 0.19891192018985748, | |
| "eval_runtime": 111.4365, | |
| "eval_samples_per_second": 89.737, | |
| "eval_steps_per_second": 1.409, | |
| "step": 74500 | |
| }, | |
| { | |
| "epoch": 60.0, | |
| "learning_rate": 2.540983606557377e-05, | |
| "loss": 0.0222, | |
| "step": 75000 | |
| }, | |
| { | |
| "epoch": 60.0, | |
| "eval_bleu": 85.5013, | |
| "eval_gen_len": 9.9235, | |
| "eval_loss": 0.19413892924785614, | |
| "eval_runtime": 107.6887, | |
| "eval_samples_per_second": 92.86, | |
| "eval_steps_per_second": 1.458, | |
| "step": 75000 | |
| } | |
| ], | |
| "logging_steps": 500, | |
| "max_steps": 152500, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 122, | |
| "save_steps": 500, | |
| "total_flos": 2.3617192120005427e+17, | |
| "train_batch_size": 64, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |