{ "epoch": 2.986666666666667, "eval_logits/chosen": -0.26991021633148193, "eval_logits/rejected": 0.17896275222301483, "eval_logps/chosen": -0.16233521699905396, "eval_logps/rejected": -0.90810227394104, "eval_loss": 0.20663729310035706, "eval_odds_ratio_loss": 1.8880867958068848, "eval_rewards/accuracies": 0.8299999833106995, "eval_rewards/chosen": -0.016233522444963455, "eval_rewards/margins": 0.0745767131447792, "eval_rewards/rejected": -0.09081023186445236, "eval_runtime": 6.9404, "eval_samples_per_second": 14.408, "eval_sft_loss": 0.017828578129410744, "eval_steps_per_second": 7.204, "total_flos": 3.471912421559501e+16, "train_loss": 1.00575195536727, "train_runtime": 639.1511, "train_samples_per_second": 4.224, "train_steps_per_second": 0.263 }