| { |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 0.05790622665655238, |
| "eval_steps": 500, |
| "global_step": 10000, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.002895311332827619, |
| "grad_norm": 1.7338896989822388, |
| "learning_rate": 4.985523443335862e-05, |
| "loss": 3.1507, |
| "step": 500 |
| }, |
| { |
| "epoch": 0.005790622665655238, |
| "grad_norm": 1.2579742670059204, |
| "learning_rate": 4.9710468866717244e-05, |
| "loss": 2.6357, |
| "step": 1000 |
| }, |
| { |
| "epoch": 0.008685933998482857, |
| "grad_norm": 0.9996505975723267, |
| "learning_rate": 4.956570330007586e-05, |
| "loss": 2.4288, |
| "step": 1500 |
| }, |
| { |
| "epoch": 0.011581245331310476, |
| "grad_norm": 0.9657206535339355, |
| "learning_rate": 4.942093773343448e-05, |
| "loss": 2.2119, |
| "step": 2000 |
| }, |
| { |
| "epoch": 0.014476556664138095, |
| "grad_norm": 1.044640064239502, |
| "learning_rate": 4.92761721667931e-05, |
| "loss": 2.1225, |
| "step": 2500 |
| }, |
| { |
| "epoch": 0.017371867996965714, |
| "grad_norm": 0.8428456783294678, |
| "learning_rate": 4.913140660015172e-05, |
| "loss": 2.0409, |
| "step": 3000 |
| }, |
| { |
| "epoch": 0.02026717932979333, |
| "grad_norm": 1.2827749252319336, |
| "learning_rate": 4.898664103351033e-05, |
| "loss": 1.9031, |
| "step": 3500 |
| }, |
| { |
| "epoch": 0.023162490662620952, |
| "grad_norm": 0.9291247129440308, |
| "learning_rate": 4.884187546686895e-05, |
| "loss": 1.8438, |
| "step": 4000 |
| }, |
| { |
| "epoch": 0.02605780199544857, |
| "grad_norm": 0.7757951617240906, |
| "learning_rate": 4.8697109900227575e-05, |
| "loss": 1.7833, |
| "step": 4500 |
| }, |
| { |
| "epoch": 0.02895311332827619, |
| "grad_norm": 1.520269751548767, |
| "learning_rate": 4.855234433358619e-05, |
| "loss": 1.6942, |
| "step": 5000 |
| }, |
| { |
| "epoch": 0.03184842466110381, |
| "grad_norm": 1.2558553218841553, |
| "learning_rate": 4.840757876694481e-05, |
| "loss": 1.6669, |
| "step": 5500 |
| }, |
| { |
| "epoch": 0.03474373599393143, |
| "grad_norm": 1.3526251316070557, |
| "learning_rate": 4.826281320030343e-05, |
| "loss": 1.6115, |
| "step": 6000 |
| }, |
| { |
| "epoch": 0.037639047326759045, |
| "grad_norm": 2.5751099586486816, |
| "learning_rate": 4.811804763366205e-05, |
| "loss": 1.5629, |
| "step": 6500 |
| }, |
| { |
| "epoch": 0.04053435865958666, |
| "grad_norm": 1.3569105863571167, |
| "learning_rate": 4.797328206702067e-05, |
| "loss": 1.5258, |
| "step": 7000 |
| }, |
| { |
| "epoch": 0.04342966999241429, |
| "grad_norm": 1.7734428644180298, |
| "learning_rate": 4.782851650037929e-05, |
| "loss": 1.475, |
| "step": 7500 |
| }, |
| { |
| "epoch": 0.046324981325241904, |
| "grad_norm": 0.7975415587425232, |
| "learning_rate": 4.7683750933737905e-05, |
| "loss": 1.437, |
| "step": 8000 |
| }, |
| { |
| "epoch": 0.04922029265806952, |
| "grad_norm": 0.8590123057365417, |
| "learning_rate": 4.753898536709653e-05, |
| "loss": 1.4263, |
| "step": 8500 |
| }, |
| { |
| "epoch": 0.05211560399089714, |
| "grad_norm": 1.1225152015686035, |
| "learning_rate": 4.739421980045515e-05, |
| "loss": 1.3528, |
| "step": 9000 |
| }, |
| { |
| "epoch": 0.05501091532372476, |
| "grad_norm": 1.1342971324920654, |
| "learning_rate": 4.7249454233813765e-05, |
| "loss": 1.3576, |
| "step": 9500 |
| }, |
| { |
| "epoch": 0.05790622665655238, |
| "grad_norm": 0.9445364475250244, |
| "learning_rate": 4.710468866717238e-05, |
| "loss": 1.3283, |
| "step": 10000 |
| } |
| ], |
| "logging_steps": 500, |
| "max_steps": 172693, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 1, |
| "save_steps": 10000, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": false |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 4.180672512e+16, |
| "train_batch_size": 8, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|