| { | |
| "epoch": 3.0, | |
| "eval_logits/chosen": -4.902731895446777, | |
| "eval_logits/rejected": -4.834568977355957, | |
| "eval_logps/chosen": -402.26611328125, | |
| "eval_logps/rejected": -267.3836975097656, | |
| "eval_loss": 0.1433890014886856, | |
| "eval_rewards/accuracies": 0.916201114654541, | |
| "eval_rewards/chosen": 4.818836688995361, | |
| "eval_rewards/margins": 5.867211818695068, | |
| "eval_rewards/rejected": -1.0483758449554443, | |
| "eval_runtime": 328.4143, | |
| "eval_samples": 2862, | |
| "eval_samples_per_second": 8.715, | |
| "eval_steps_per_second": 0.545, | |
| "train_loss": 0.15624731566807995, | |
| "train_runtime": 68304.4343, | |
| "train_samples": 140201, | |
| "train_samples_per_second": 6.158, | |
| "train_steps_per_second": 0.096 | |
| } |