|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 0.975609756097561, |
|
"eval_steps": 500, |
|
"global_step": 15, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.06504065040650407, |
|
"grad_norm": 0.5584165453910828, |
|
"learning_rate": 4.9453690018345144e-05, |
|
"loss": 0.7344, |
|
"num_input_tokens_seen": 2097152, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.13008130081300814, |
|
"grad_norm": 0.4928242862224579, |
|
"learning_rate": 4.783863644106502e-05, |
|
"loss": 0.7113, |
|
"num_input_tokens_seen": 4194304, |
|
"step": 2 |
|
}, |
|
{ |
|
"epoch": 0.1951219512195122, |
|
"grad_norm": 0.4566553831100464, |
|
"learning_rate": 4.522542485937369e-05, |
|
"loss": 0.7009, |
|
"num_input_tokens_seen": 6291456, |
|
"step": 3 |
|
}, |
|
{ |
|
"epoch": 0.2601626016260163, |
|
"grad_norm": 0.38513678312301636, |
|
"learning_rate": 4.172826515897146e-05, |
|
"loss": 0.6704, |
|
"num_input_tokens_seen": 8388608, |
|
"step": 4 |
|
}, |
|
{ |
|
"epoch": 0.3252032520325203, |
|
"grad_norm": 0.36934641003608704, |
|
"learning_rate": 3.7500000000000003e-05, |
|
"loss": 0.6504, |
|
"num_input_tokens_seen": 10485760, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.3902439024390244, |
|
"grad_norm": 0.33891424536705017, |
|
"learning_rate": 3.272542485937369e-05, |
|
"loss": 0.6217, |
|
"num_input_tokens_seen": 12582912, |
|
"step": 6 |
|
}, |
|
{ |
|
"epoch": 0.45528455284552843, |
|
"grad_norm": 0.30929532647132874, |
|
"learning_rate": 2.761321158169134e-05, |
|
"loss": 0.6052, |
|
"num_input_tokens_seen": 14680064, |
|
"step": 7 |
|
}, |
|
{ |
|
"epoch": 0.5203252032520326, |
|
"grad_norm": 0.28951436281204224, |
|
"learning_rate": 2.238678841830867e-05, |
|
"loss": 0.6302, |
|
"num_input_tokens_seen": 16777216, |
|
"step": 8 |
|
}, |
|
{ |
|
"epoch": 0.5853658536585366, |
|
"grad_norm": 0.29053160548210144, |
|
"learning_rate": 1.7274575140626318e-05, |
|
"loss": 0.5964, |
|
"num_input_tokens_seen": 18874368, |
|
"step": 9 |
|
}, |
|
{ |
|
"epoch": 0.6504065040650406, |
|
"grad_norm": 0.2903811037540436, |
|
"learning_rate": 1.2500000000000006e-05, |
|
"loss": 0.6233, |
|
"num_input_tokens_seen": 20971520, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.7154471544715447, |
|
"grad_norm": 0.28426122665405273, |
|
"learning_rate": 8.271734841028553e-06, |
|
"loss": 0.5964, |
|
"num_input_tokens_seen": 23068672, |
|
"step": 11 |
|
}, |
|
{ |
|
"epoch": 0.7804878048780488, |
|
"grad_norm": 0.25174424052238464, |
|
"learning_rate": 4.7745751406263165e-06, |
|
"loss": 0.6106, |
|
"num_input_tokens_seen": 25165824, |
|
"step": 12 |
|
}, |
|
{ |
|
"epoch": 0.8455284552845529, |
|
"grad_norm": 0.24576599895954132, |
|
"learning_rate": 2.1613635589349756e-06, |
|
"loss": 0.5916, |
|
"num_input_tokens_seen": 27262976, |
|
"step": 13 |
|
}, |
|
{ |
|
"epoch": 0.9105691056910569, |
|
"grad_norm": 0.24116040766239166, |
|
"learning_rate": 5.463099816548579e-07, |
|
"loss": 0.5893, |
|
"num_input_tokens_seen": 29360128, |
|
"step": 14 |
|
}, |
|
{ |
|
"epoch": 0.975609756097561, |
|
"grad_norm": 0.23365961015224457, |
|
"learning_rate": 0.0, |
|
"loss": 0.6298, |
|
"num_input_tokens_seen": 31457280, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.975609756097561, |
|
"num_input_tokens_seen": 31457280, |
|
"step": 15, |
|
"total_flos": 1.2251230144089293e+18, |
|
"train_loss": 0.6374592224756876, |
|
"train_runtime": 1520.8651, |
|
"train_samples_per_second": 5.175, |
|
"train_steps_per_second": 0.01 |
|
} |
|
], |
|
"logging_steps": 1, |
|
"max_steps": 15, |
|
"num_input_tokens_seen": 31457280, |
|
"num_train_epochs": 1, |
|
"save_steps": 500, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 1.2251230144089293e+18, |
|
"train_batch_size": 16, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|