jz666's picture
End of training
ea4a2b6 verified
{
"epoch": 0.9989094874591058,
"eval_logits/chosen": -24.043106079101562,
"eval_logits/rejected": -25.065006256103516,
"eval_logps/chosen": -18.249919891357422,
"eval_logps/rejected": -27.7159366607666,
"eval_loss": 0.6501222848892212,
"eval_rewards/accuracies": 0.75,
"eval_rewards/chosen": -0.18249918520450592,
"eval_rewards/margins": 0.09466017782688141,
"eval_rewards/rejected": -0.27715936303138733,
"eval_runtime": 86.4169,
"eval_samples": 1941,
"eval_samples_per_second": 22.461,
"eval_steps_per_second": 1.412
}