{ "best_metric": null, "best_model_checkpoint": null, "epoch": 32.0, "eval_steps": 400, "global_step": 60, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 2.6666666666666665, "grad_norm": 7.78125, "learning_rate": 6.666666666666667e-08, "logits/chosen": -0.6206714510917664, "logits/rejected": -0.6167551279067993, "logps/chosen": -1.6594607830047607, "logps/rejected": -1.8626664876937866, "loss": 0.7092, "rewards/accuracies": 0.4749999940395355, "rewards/chosen": -1.6594607830047607, "rewards/margins": 0.2032059133052826, "rewards/rejected": -1.8626664876937866, "semantic_entropy": 0.6521000862121582, "step": 5 }, { "epoch": 5.333333333333333, "grad_norm": 6.78125, "learning_rate": 7.892179482319296e-08, "logits/chosen": -0.644172191619873, "logits/rejected": -0.5970994234085083, "logps/chosen": -1.6529489755630493, "logps/rejected": -2.02937912940979, "loss": 0.6576, "rewards/accuracies": 0.5249999761581421, "rewards/chosen": -1.6529489755630493, "rewards/margins": 0.3764302134513855, "rewards/rejected": -2.02937912940979, "semantic_entropy": 0.6556634902954102, "step": 10 }, { "epoch": 8.0, "grad_norm": 8.6875, "learning_rate": 7.464101615137755e-08, "logits/chosen": -0.6107379794120789, "logits/rejected": -0.6173809766769409, "logps/chosen": -1.6537139415740967, "logps/rejected": -1.878178596496582, "loss": 0.6978, "rewards/accuracies": 0.5, "rewards/chosen": -1.6537139415740967, "rewards/margins": 0.22446465492248535, "rewards/rejected": -1.878178596496582, "semantic_entropy": 0.6528152227401733, "step": 15 }, { "epoch": 10.666666666666666, "grad_norm": 8.625, "learning_rate": 6.744966551474935e-08, "logits/chosen": -0.6249920129776001, "logits/rejected": -0.5978578925132751, "logps/chosen": -1.661948561668396, "logps/rejected": -1.9520155191421509, "loss": 0.6819, "rewards/accuracies": 0.512499988079071, "rewards/chosen": -1.661948561668396, "rewards/margins": 0.2900669574737549, "rewards/rejected": -1.9520155191421509, "semantic_entropy": 0.6477808952331543, "step": 20 }, { "epoch": 13.333333333333334, "grad_norm": 8.875, "learning_rate": 5.7951967208018495e-08, "logits/chosen": -0.6556390523910522, "logits/rejected": -0.6272687911987305, "logps/chosen": -1.6880241632461548, "logps/rejected": -1.9340057373046875, "loss": 0.7142, "rewards/accuracies": 0.4749999940395355, "rewards/chosen": -1.6880241632461548, "rewards/margins": 0.24598172307014465, "rewards/rejected": -1.9340057373046875, "semantic_entropy": 0.6515553593635559, "step": 25 }, { "epoch": 16.0, "grad_norm": 6.875, "learning_rate": 4.6945927106677224e-08, "logits/chosen": -0.5940297842025757, "logits/rejected": -0.6073416471481323, "logps/chosen": -1.6197277307510376, "logps/rejected": -1.888943076133728, "loss": 0.6684, "rewards/accuracies": 0.512499988079071, "rewards/chosen": -1.6197277307510376, "rewards/margins": 0.26921549439430237, "rewards/rejected": -1.888943076133728, "semantic_entropy": 0.6606020927429199, "step": 30 }, { "epoch": 18.666666666666668, "grad_norm": 9.1875, "learning_rate": 3.535628343499079e-08, "logits/chosen": -0.6252874732017517, "logits/rejected": -0.6270566582679749, "logps/chosen": -1.6999114751815796, "logps/rejected": -1.9409929513931274, "loss": 0.7074, "rewards/accuracies": 0.48750001192092896, "rewards/chosen": -1.6999114751815796, "rewards/margins": 0.24108140170574188, "rewards/rejected": -1.9409929513931274, "semantic_entropy": 0.6408571004867554, "step": 35 }, { "epoch": 21.333333333333332, "grad_norm": 8.125, "learning_rate": 2.4156809358433726e-08, "logits/chosen": -0.6158267259597778, "logits/rejected": -0.5839654803276062, "logps/chosen": -1.583505392074585, "logps/rejected": -1.8752552270889282, "loss": 0.6656, "rewards/accuracies": 0.5249999761581421, "rewards/chosen": -1.583505392074585, "rewards/margins": 0.29174983501434326, "rewards/rejected": -1.8752552270889282, "semantic_entropy": 0.6757909059524536, "step": 40 }, { "epoch": 24.0, "grad_norm": 8.75, "learning_rate": 1.4288495612538426e-08, "logits/chosen": -0.6340750455856323, "logits/rejected": -0.6243816018104553, "logps/chosen": -1.68179452419281, "logps/rejected": -1.9539234638214111, "loss": 0.6902, "rewards/accuracies": 0.48750001192092896, "rewards/chosen": -1.68179452419281, "rewards/margins": 0.2721291184425354, "rewards/rejected": -1.9539234638214111, "semantic_entropy": 0.6442986130714417, "step": 45 }, { "epoch": 26.666666666666668, "grad_norm": 8.1875, "learning_rate": 6.58048754348255e-09, "logits/chosen": -0.6418130397796631, "logits/rejected": -0.6143754720687866, "logps/chosen": -1.6074516773223877, "logps/rejected": -1.830583930015564, "loss": 0.694, "rewards/accuracies": 0.4749999940395355, "rewards/chosen": -1.6074516773223877, "rewards/margins": 0.22313210368156433, "rewards/rejected": -1.830583930015564, "semantic_entropy": 0.6700640916824341, "step": 50 }, { "epoch": 29.333333333333332, "grad_norm": 8.0625, "learning_rate": 1.680419507380444e-09, "logits/chosen": -0.6013139486312866, "logits/rejected": -0.5867229700088501, "logps/chosen": -1.689805030822754, "logps/rejected": -2.002596616744995, "loss": 0.6768, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -1.689805030822754, "rewards/margins": 0.3127916753292084, "rewards/rejected": -2.002596616744995, "semantic_entropy": 0.6430121064186096, "step": 55 }, { "epoch": 32.0, "grad_norm": 8.4375, "learning_rate": 0.0, "logits/chosen": -0.6301103830337524, "logits/rejected": -0.6289348006248474, "logps/chosen": -1.665967345237732, "logps/rejected": -1.932885766029358, "loss": 0.6937, "rewards/accuracies": 0.4749999940395355, "rewards/chosen": -1.665967345237732, "rewards/margins": 0.2669183611869812, "rewards/rejected": -1.932885766029358, "semantic_entropy": 0.6483110785484314, "step": 60 }, { "epoch": 32.0, "step": 60, "total_flos": 0.0, "train_loss": 0.6880494674046834, "train_runtime": 138.3641, "train_samples_per_second": 6.938, "train_steps_per_second": 0.434 } ], "logging_steps": 5, "max_steps": 60, "num_input_tokens_seen": 0, "num_train_epochs": 60, "save_steps": 1000000, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 0.0, "train_batch_size": 1, "trial_name": null, "trial_params": null }