{ "epoch": 1.0, "eval_logits/chosen": -1.1438143253326416, "eval_logits/rejected": -1.0389952659606934, "eval_logps/chosen": -116.79230499267578, "eval_logps/rejected": -110.2491226196289, "eval_loss": 0.5636938810348511, "eval_rewards/accuracies": 0.7720000147819519, "eval_rewards/chosen": 0.22718606889247894, "eval_rewards/margins": 0.688110888004303, "eval_rewards/rejected": -0.46092477440834045, "eval_runtime": 65.8827, "eval_samples_per_second": 15.178, "eval_steps_per_second": 1.897 }