|
{ |
|
"best_global_step": null, |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 1.0, |
|
"eval_steps": 500, |
|
"global_step": 250, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.04, |
|
"grad_norm": 0.5616522431373596, |
|
"learning_rate": 4.82e-05, |
|
"loss": 1.1747, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"grad_norm": 0.6579018831253052, |
|
"learning_rate": 4.6200000000000005e-05, |
|
"loss": 1.0469, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"grad_norm": 0.7981964349746704, |
|
"learning_rate": 4.4200000000000004e-05, |
|
"loss": 1.0567, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"grad_norm": 0.9774985909461975, |
|
"learning_rate": 4.22e-05, |
|
"loss": 0.9834, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"grad_norm": 0.6699696779251099, |
|
"learning_rate": 4.02e-05, |
|
"loss": 1.033, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"grad_norm": 0.7246522903442383, |
|
"learning_rate": 3.82e-05, |
|
"loss": 0.9588, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"grad_norm": 0.6503073573112488, |
|
"learning_rate": 3.62e-05, |
|
"loss": 1.1316, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"grad_norm": 0.7243292331695557, |
|
"learning_rate": 3.4200000000000005e-05, |
|
"loss": 1.105, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"grad_norm": 0.6837774515151978, |
|
"learning_rate": 3.2200000000000003e-05, |
|
"loss": 1.0431, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"grad_norm": 0.7276621460914612, |
|
"learning_rate": 3.02e-05, |
|
"loss": 1.0878, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"grad_norm": 0.5852382183074951, |
|
"learning_rate": 2.8199999999999998e-05, |
|
"loss": 1.0713, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"grad_norm": 0.6434891223907471, |
|
"learning_rate": 2.6200000000000003e-05, |
|
"loss": 1.0564, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"grad_norm": 0.8133268356323242, |
|
"learning_rate": 2.4200000000000002e-05, |
|
"loss": 1.0184, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"grad_norm": 0.6674674153327942, |
|
"learning_rate": 2.22e-05, |
|
"loss": 0.9579, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"grad_norm": 0.7289815545082092, |
|
"learning_rate": 2.0200000000000003e-05, |
|
"loss": 1.0709, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"grad_norm": 0.8111977577209473, |
|
"learning_rate": 1.8200000000000002e-05, |
|
"loss": 1.0948, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"grad_norm": 0.822685182094574, |
|
"learning_rate": 1.62e-05, |
|
"loss": 1.0823, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"grad_norm": 0.7154224514961243, |
|
"learning_rate": 1.42e-05, |
|
"loss": 0.9634, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"grad_norm": 0.6793033480644226, |
|
"learning_rate": 1.22e-05, |
|
"loss": 0.9934, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"grad_norm": 0.6630302667617798, |
|
"learning_rate": 1.02e-05, |
|
"loss": 0.9866, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"grad_norm": 0.8769239187240601, |
|
"learning_rate": 8.200000000000001e-06, |
|
"loss": 0.8853, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"grad_norm": 0.7216284275054932, |
|
"learning_rate": 6.2e-06, |
|
"loss": 0.9867, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"grad_norm": 0.9310891032218933, |
|
"learning_rate": 4.2000000000000004e-06, |
|
"loss": 0.9694, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"grad_norm": 0.816041111946106, |
|
"learning_rate": 2.2e-06, |
|
"loss": 0.9702, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"grad_norm": 0.7796522974967957, |
|
"learning_rate": 2.0000000000000002e-07, |
|
"loss": 0.9702, |
|
"step": 250 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 250, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 500, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 3181482344448000.0, |
|
"train_batch_size": 1, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|