| { | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 2.0, | |
| "eval_steps": 25, | |
| "global_step": 52, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.04, | |
| "learning_rate": 8.333333333333333e-08, | |
| "logits/generated": -2.788468599319458, | |
| "logits/real": -2.8911099433898926, | |
| "logps/generated": -226.66921997070312, | |
| "logps/real": -283.6243896484375, | |
| "loss": 0.6931, | |
| "rewards/accuracies": 0.0, | |
| "rewards/generated": 0.0, | |
| "rewards/margins": 0.0, | |
| "rewards/real": 0.0, | |
| "step": 1 | |
| }, | |
| { | |
| "epoch": 0.38, | |
| "learning_rate": 4.5652173913043473e-07, | |
| "logits/generated": -2.8515138626098633, | |
| "logits/real": -2.8768396377563477, | |
| "logps/generated": -354.09619140625, | |
| "logps/real": -350.52911376953125, | |
| "loss": 0.522, | |
| "rewards/accuracies": 0.7569444179534912, | |
| "rewards/generated": -0.21337264776229858, | |
| "rewards/margins": 0.5563015341758728, | |
| "rewards/real": 0.3429288864135742, | |
| "step": 10 | |
| }, | |
| { | |
| "epoch": 0.77, | |
| "learning_rate": 3.478260869565217e-07, | |
| "logits/generated": -2.792628049850464, | |
| "logits/real": -2.7778868675231934, | |
| "logps/generated": -351.04638671875, | |
| "logps/real": -327.13482666015625, | |
| "loss": 0.3011, | |
| "rewards/accuracies": 0.90625, | |
| "rewards/generated": -0.7889599800109863, | |
| "rewards/margins": 1.546514868736267, | |
| "rewards/real": 0.7575550675392151, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 0.96, | |
| "eval_logits/generated": -2.764375686645508, | |
| "eval_logits/real": -2.7640507221221924, | |
| "eval_logps/generated": -310.69891357421875, | |
| "eval_logps/real": -306.61572265625, | |
| "eval_loss": 0.24416939914226532, | |
| "eval_rewards/accuracies": 0.9791666865348816, | |
| "eval_rewards/generated": -0.9850902557373047, | |
| "eval_rewards/margins": 2.14570689201355, | |
| "eval_rewards/real": 1.1606166362762451, | |
| "eval_runtime": 27.6861, | |
| "eval_samples_per_second": 6.646, | |
| "eval_steps_per_second": 0.217, | |
| "step": 25 | |
| }, | |
| { | |
| "epoch": 1.15, | |
| "learning_rate": 2.391304347826087e-07, | |
| "logits/generated": -2.7530007362365723, | |
| "logits/real": -2.734692096710205, | |
| "logps/generated": -310.22607421875, | |
| "logps/real": -306.02044677734375, | |
| "loss": 0.1788, | |
| "rewards/accuracies": 0.9312499761581421, | |
| "rewards/generated": -1.791497826576233, | |
| "rewards/margins": 3.7750840187072754, | |
| "rewards/real": 1.9835857152938843, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 1.54, | |
| "learning_rate": 1.3043478260869563e-07, | |
| "logits/generated": -2.7655322551727295, | |
| "logits/real": -2.776773691177368, | |
| "logps/generated": -358.19403076171875, | |
| "logps/real": -309.92767333984375, | |
| "loss": 0.0384, | |
| "rewards/accuracies": 0.9937499761581421, | |
| "rewards/generated": -3.1963627338409424, | |
| "rewards/margins": 6.872523307800293, | |
| "rewards/real": 3.6761608123779297, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 1.92, | |
| "learning_rate": 2.1739130434782606e-08, | |
| "logits/generated": -2.7564454078674316, | |
| "logits/real": -2.7757363319396973, | |
| "logps/generated": -357.3354797363281, | |
| "logps/real": -296.8515930175781, | |
| "loss": 0.0376, | |
| "rewards/accuracies": 0.987500011920929, | |
| "rewards/generated": -2.9351892471313477, | |
| "rewards/margins": 6.2575507164001465, | |
| "rewards/real": 3.322361707687378, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 1.92, | |
| "eval_logits/generated": -2.7557647228240967, | |
| "eval_logits/real": -2.7546520233154297, | |
| "eval_logps/generated": -309.8145446777344, | |
| "eval_logps/real": -304.967041015625, | |
| "eval_loss": 0.23592980206012726, | |
| "eval_rewards/accuracies": 0.9791666865348816, | |
| "eval_rewards/generated": -0.8966498374938965, | |
| "eval_rewards/margins": 2.2221336364746094, | |
| "eval_rewards/real": 1.3254839181900024, | |
| "eval_runtime": 27.8272, | |
| "eval_samples_per_second": 6.612, | |
| "eval_steps_per_second": 0.216, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 2.0, | |
| "step": 52, | |
| "total_flos": 0.0, | |
| "train_loss": 0.2113667087486157, | |
| "train_runtime": 1162.1581, | |
| "train_samples_per_second": 2.836, | |
| "train_steps_per_second": 0.045 | |
| } | |
| ], | |
| "logging_steps": 10, | |
| "max_steps": 52, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 2, | |
| "save_steps": 500, | |
| "total_flos": 0.0, | |
| "train_batch_size": 8, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |