| { | |
| "epoch": 0.9989094874591058, | |
| "eval_logits/chosen": -24.043106079101562, | |
| "eval_logits/rejected": -25.065006256103516, | |
| "eval_logps/chosen": -18.249919891357422, | |
| "eval_logps/rejected": -27.7159366607666, | |
| "eval_loss": 0.6501222848892212, | |
| "eval_rewards/accuracies": 0.75, | |
| "eval_rewards/chosen": -0.18249918520450592, | |
| "eval_rewards/margins": 0.09466017782688141, | |
| "eval_rewards/rejected": -0.27715936303138733, | |
| "eval_runtime": 86.4169, | |
| "eval_samples": 1941, | |
| "eval_samples_per_second": 22.461, | |
| "eval_steps_per_second": 1.412, | |
| "total_flos": 0.0, | |
| "train_loss": 0.6719389883191305, | |
| "train_runtime": 8958.0836, | |
| "train_samples": 58684, | |
| "train_samples_per_second": 6.551, | |
| "train_steps_per_second": 0.051 | |
| } |