|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 0.9893390191897654, |
|
"eval_steps": 100, |
|
"global_step": 58, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"clip_ratio": 0.0, |
|
"completion_length": 465.1015853881836, |
|
"epoch": 0.017057569296375266, |
|
"grad_norm": 0.46851029992103577, |
|
"kl": 0.0, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0311, |
|
"reward": 0.3024553693830967, |
|
"reward_std": 0.2792186979204416, |
|
"rewards/accuracy_reward": 0.3024553693830967, |
|
"rewards/format_reward": 0.0, |
|
"step": 1 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completion_length": 479.50532627105713, |
|
"epoch": 0.08528784648187633, |
|
"grad_norm": 4.368417263031006, |
|
"kl": 0.011774897575378418, |
|
"learning_rate": 2.5e-06, |
|
"loss": 0.0178, |
|
"reward": 0.3108259071595967, |
|
"reward_std": 0.282580086030066, |
|
"rewards/accuracy_reward": 0.3108259071595967, |
|
"rewards/format_reward": 0.0, |
|
"step": 5 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completion_length": 493.8828353881836, |
|
"epoch": 0.17057569296375266, |
|
"grad_norm": 0.43728089332580566, |
|
"kl": 0.0563232421875, |
|
"learning_rate": 2.956412726139078e-06, |
|
"loss": 0.0367, |
|
"reward": 0.5104910925030708, |
|
"reward_std": 0.28240326046943665, |
|
"rewards/accuracy_reward": 0.5104910925030708, |
|
"rewards/format_reward": 0.0, |
|
"step": 10 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completion_length": 539.2189956665039, |
|
"epoch": 0.255863539445629, |
|
"grad_norm": 3.359680652618408, |
|
"kl": 0.243646240234375, |
|
"learning_rate": 2.7836719084521715e-06, |
|
"loss": 0.0724, |
|
"reward": 0.44196430668234826, |
|
"reward_std": 0.2963893756270409, |
|
"rewards/accuracy_reward": 0.44196430668234826, |
|
"rewards/format_reward": 0.0, |
|
"step": 15 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completion_length": 636.4513717651367, |
|
"epoch": 0.3411513859275053, |
|
"grad_norm": 2.16961669921875, |
|
"kl": 0.4969482421875, |
|
"learning_rate": 2.4946839873611927e-06, |
|
"loss": 0.1423, |
|
"reward": 0.31763394474983214, |
|
"reward_std": 0.28798535354435445, |
|
"rewards/accuracy_reward": 0.31763394474983214, |
|
"rewards/format_reward": 0.0, |
|
"step": 20 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completion_length": 533.4765838623047, |
|
"epoch": 0.42643923240938164, |
|
"grad_norm": 8.851544380187988, |
|
"kl": 0.75478515625, |
|
"learning_rate": 2.1156192081791355e-06, |
|
"loss": 0.0974, |
|
"reward": 0.3165178719907999, |
|
"reward_std": 0.2697611689567566, |
|
"rewards/accuracy_reward": 0.3165178719907999, |
|
"rewards/format_reward": 0.0, |
|
"step": 25 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completion_length": 486.86542587280275, |
|
"epoch": 0.511727078891258, |
|
"grad_norm": 1.5197241306304932, |
|
"kl": 0.635107421875, |
|
"learning_rate": 1.6808050203829845e-06, |
|
"loss": -0.0795, |
|
"reward": 0.37879465594887735, |
|
"reward_std": 0.28313881531357765, |
|
"rewards/accuracy_reward": 0.37879465594887735, |
|
"rewards/format_reward": 0.0, |
|
"step": 30 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completion_length": 651.0556098937989, |
|
"epoch": 0.5970149253731343, |
|
"grad_norm": 251.43299865722656, |
|
"kl": 1.340673828125, |
|
"learning_rate": 1.2296174432791415e-06, |
|
"loss": -0.1861, |
|
"reward": 0.35892858989536763, |
|
"reward_std": 0.2909108128398657, |
|
"rewards/accuracy_reward": 0.35892858989536763, |
|
"rewards/format_reward": 0.0, |
|
"step": 35 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completion_length": 531.6004737854004, |
|
"epoch": 0.6823027718550106, |
|
"grad_norm": 0.8638615012168884, |
|
"kl": 1.24169921875, |
|
"learning_rate": 8.029152419343472e-07, |
|
"loss": -0.2823, |
|
"reward": 0.2919642984867096, |
|
"reward_std": 0.30123363584280016, |
|
"rewards/accuracy_reward": 0.2919642984867096, |
|
"rewards/format_reward": 0.0, |
|
"step": 40 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completion_length": 535.4821655273438, |
|
"epoch": 0.767590618336887, |
|
"grad_norm": 0.8260723948478699, |
|
"kl": 1.6283203125, |
|
"learning_rate": 4.3933982822017883e-07, |
|
"loss": -0.2886, |
|
"reward": 0.2575892962515354, |
|
"reward_std": 0.28938031755387783, |
|
"rewards/accuracy_reward": 0.2575892962515354, |
|
"rewards/format_reward": 0.0, |
|
"step": 45 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completion_length": 574.5600708007812, |
|
"epoch": 0.8528784648187633, |
|
"grad_norm": 0.8665443658828735, |
|
"kl": 1.6455078125, |
|
"learning_rate": 1.718159615201853e-07, |
|
"loss": -0.2452, |
|
"reward": 0.2620535843074322, |
|
"reward_std": 0.2727984722703695, |
|
"rewards/accuracy_reward": 0.2620535843074322, |
|
"rewards/format_reward": 0.0, |
|
"step": 50 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completion_length": 633.8752510070801, |
|
"epoch": 0.9381663113006397, |
|
"grad_norm": 0.7306084036827087, |
|
"kl": 1.2185546875, |
|
"learning_rate": 2.4570139579284723e-08, |
|
"loss": -0.1955, |
|
"reward": 0.2723214395344257, |
|
"reward_std": 0.2689169988036156, |
|
"rewards/accuracy_reward": 0.2723214395344257, |
|
"rewards/format_reward": 0.0, |
|
"step": 55 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completion_length": 655.2425740559896, |
|
"epoch": 0.9893390191897654, |
|
"kl": 1.1414388020833333, |
|
"reward": 0.2812500136593978, |
|
"reward_std": 0.2797961321969827, |
|
"rewards/accuracy_reward": 0.2812500136593978, |
|
"rewards/format_reward": 0.0, |
|
"step": 58, |
|
"total_flos": 0.0, |
|
"train_loss": -0.0886706564703892, |
|
"train_runtime": 9867.8585, |
|
"train_samples_per_second": 0.76, |
|
"train_steps_per_second": 0.006 |
|
} |
|
], |
|
"logging_steps": 5, |
|
"max_steps": 58, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 10, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 0.0, |
|
"train_batch_size": 16, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|