{ "best_metric": null, "best_model_checkpoint": null, "epoch": 2.9822485207100593, "eval_steps": 500, "global_step": 126, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.23668639053254437, "grad_norm": 7.8563451765334875, "learning_rate": 5e-06, "loss": 0.7715, "step": 10 }, { "epoch": 0.47337278106508873, "grad_norm": 1.2653404754348714, "learning_rate": 5e-06, "loss": 0.6752, "step": 20 }, { "epoch": 0.7100591715976331, "grad_norm": 0.7722723244728256, "learning_rate": 5e-06, "loss": 0.6462, "step": 30 }, { "epoch": 0.9467455621301775, "grad_norm": 0.5467401391418546, "learning_rate": 5e-06, "loss": 0.629, "step": 40 }, { "epoch": 0.9940828402366864, "eval_loss": 0.6316158175468445, "eval_runtime": 29.2463, "eval_samples_per_second": 38.911, "eval_steps_per_second": 0.615, "step": 42 }, { "epoch": 1.183431952662722, "grad_norm": 0.6046791079019197, "learning_rate": 5e-06, "loss": 0.6475, "step": 50 }, { "epoch": 1.4201183431952662, "grad_norm": 0.5148633963275753, "learning_rate": 5e-06, "loss": 0.5883, "step": 60 }, { "epoch": 1.6568047337278107, "grad_norm": 0.5201018481905905, "learning_rate": 5e-06, "loss": 0.5813, "step": 70 }, { "epoch": 1.893491124260355, "grad_norm": 0.876067375733763, "learning_rate": 5e-06, "loss": 0.5807, "step": 80 }, { "epoch": 1.9881656804733727, "eval_loss": 0.6118831038475037, "eval_runtime": 29.9679, "eval_samples_per_second": 37.974, "eval_steps_per_second": 0.601, "step": 84 }, { "epoch": 2.1301775147928996, "grad_norm": 1.237055001310703, "learning_rate": 5e-06, "loss": 0.5993, "step": 90 }, { "epoch": 2.366863905325444, "grad_norm": 0.657072150861852, "learning_rate": 5e-06, "loss": 0.5376, "step": 100 }, { "epoch": 2.603550295857988, "grad_norm": 0.6298128100266783, "learning_rate": 5e-06, "loss": 0.5388, "step": 110 }, { "epoch": 2.8402366863905324, "grad_norm": 0.6849566956882345, "learning_rate": 5e-06, "loss": 0.5369, "step": 120 }, { "epoch": 2.9822485207100593, "eval_loss": 0.6119207143783569, "eval_runtime": 28.1349, "eval_samples_per_second": 40.448, "eval_steps_per_second": 0.64, "step": 126 }, { "epoch": 2.9822485207100593, "step": 126, "total_flos": 210845313269760.0, "train_loss": 0.6075608276185536, "train_runtime": 4413.1086, "train_samples_per_second": 14.697, "train_steps_per_second": 0.029 } ], "logging_steps": 10, "max_steps": 126, "num_input_tokens_seen": 0, "num_train_epochs": 3, "save_steps": 500, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 210845313269760.0, "train_batch_size": 8, "trial_name": null, "trial_params": null }