| { | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 1.905829596412556, | |
| "eval_steps": 500, | |
| "global_step": 1700, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.11210762331838565, | |
| "grad_norm": 2.0963618755340576, | |
| "learning_rate": 2.793296089385475e-05, | |
| "loss": 4.9671, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.2242152466367713, | |
| "grad_norm": 0.3255109190940857, | |
| "learning_rate": 4.9408099688473526e-05, | |
| "loss": 4.7197, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.336322869955157, | |
| "grad_norm": 1.1150901317596436, | |
| "learning_rate": 4.632398753894081e-05, | |
| "loss": 4.5727, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 0.4484304932735426, | |
| "grad_norm": 4.738381862640381, | |
| "learning_rate": 4.320872274143302e-05, | |
| "loss": 4.5585, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 0.5605381165919282, | |
| "grad_norm": 0.3568013906478882, | |
| "learning_rate": 4.0093457943925236e-05, | |
| "loss": 4.5399, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 0.672645739910314, | |
| "grad_norm": 0.558647871017456, | |
| "learning_rate": 3.700934579439253e-05, | |
| "loss": 4.4905, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 0.7847533632286996, | |
| "grad_norm": 19.64227867126465, | |
| "learning_rate": 3.3894080996884734e-05, | |
| "loss": 4.4371, | |
| "step": 700 | |
| }, | |
| { | |
| "epoch": 0.8968609865470852, | |
| "grad_norm": 2.4077465534210205, | |
| "learning_rate": 3.077881619937695e-05, | |
| "loss": 4.4867, | |
| "step": 800 | |
| }, | |
| { | |
| "epoch": 1.0089686098654709, | |
| "grad_norm": 1.3140878677368164, | |
| "learning_rate": 2.7663551401869157e-05, | |
| "loss": 4.4675, | |
| "step": 900 | |
| }, | |
| { | |
| "epoch": 1.1210762331838564, | |
| "grad_norm": 1.7104419469833374, | |
| "learning_rate": 2.4579439252336452e-05, | |
| "loss": 4.432, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 1.2331838565022422, | |
| "grad_norm": 7.763595104217529, | |
| "learning_rate": 2.1464174454828662e-05, | |
| "loss": 4.4185, | |
| "step": 1100 | |
| }, | |
| { | |
| "epoch": 1.3452914798206277, | |
| "grad_norm": 0.4148617088794708, | |
| "learning_rate": 1.8348909657320872e-05, | |
| "loss": 4.428, | |
| "step": 1200 | |
| }, | |
| { | |
| "epoch": 1.4573991031390134, | |
| "grad_norm": 2.4640185832977295, | |
| "learning_rate": 1.5233644859813085e-05, | |
| "loss": 4.4133, | |
| "step": 1300 | |
| }, | |
| { | |
| "epoch": 1.5695067264573992, | |
| "grad_norm": 0.4486638903617859, | |
| "learning_rate": 1.2118380062305297e-05, | |
| "loss": 4.3019, | |
| "step": 1400 | |
| }, | |
| { | |
| "epoch": 1.6816143497757847, | |
| "grad_norm": 191.92808532714844, | |
| "learning_rate": 9.003115264797508e-06, | |
| "loss": 4.4209, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 1.7937219730941703, | |
| "grad_norm": 2.8736190795898438, | |
| "learning_rate": 5.88785046728972e-06, | |
| "loss": 4.3696, | |
| "step": 1600 | |
| }, | |
| { | |
| "epoch": 1.905829596412556, | |
| "grad_norm": 0.13583791255950928, | |
| "learning_rate": 2.7725856697819316e-06, | |
| "loss": 4.3962, | |
| "step": 1700 | |
| } | |
| ], | |
| "logging_steps": 100, | |
| "max_steps": 1784, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 2, | |
| "save_steps": 100, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": false | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 0.0, | |
| "train_batch_size": 16, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |