| { |
| "best_global_step": null, |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 0.2, |
| "eval_steps": 500, |
| "global_step": 50, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "completion_length": 361.75, |
| "epoch": 0.004, |
| "grad_norm": 0.0693558007478714, |
| "kl": 0.0, |
| "learning_rate": 0.0, |
| "loss": -0.0, |
| "reward": 0.02345000021159649, |
| "reward_std": 0.04690000042319298, |
| "rewards/pot_combined_reward": 0.02345000021159649, |
| "step": 1 |
| }, |
| { |
| "completion_length": 371.375, |
| "epoch": 0.008, |
| "grad_norm": 0.08951307833194733, |
| "kl": 0.0, |
| "learning_rate": 5.000000000000001e-07, |
| "loss": -0.0, |
| "reward": 0.026133334264159203, |
| "reward_std": 0.052266668528318405, |
| "rewards/pot_combined_reward": 0.026133334264159203, |
| "step": 2 |
| }, |
| { |
| "completion_length": 374.0, |
| "epoch": 0.012, |
| "grad_norm": 0.001663331058807671, |
| "kl": 0.0005528016190510243, |
| "learning_rate": 1.0000000000000002e-06, |
| "loss": 0.0001, |
| "reward": 0.0, |
| "reward_std": 0.0, |
| "rewards/pot_combined_reward": 0.0, |
| "step": 3 |
| }, |
| { |
| "completion_length": 374.0, |
| "epoch": 0.016, |
| "grad_norm": 0.0016158577054738998, |
| "kl": 0.0005013404006604105, |
| "learning_rate": 1.5e-06, |
| "loss": 0.0001, |
| "reward": 0.0, |
| "reward_std": 0.0, |
| "rewards/pot_combined_reward": 0.0, |
| "step": 4 |
| }, |
| { |
| "completion_length": 373.375, |
| "epoch": 0.02, |
| "grad_norm": 0.0030017346143722534, |
| "kl": 0.0005660907772835344, |
| "learning_rate": 2.0000000000000003e-06, |
| "loss": 0.0001, |
| "reward": 0.0, |
| "reward_std": 0.0, |
| "rewards/pot_combined_reward": 0.0, |
| "step": 5 |
| }, |
| { |
| "completion_length": 373.5625, |
| "epoch": 0.024, |
| "grad_norm": 0.0015043334569782019, |
| "kl": 0.0005426810312201269, |
| "learning_rate": 2.5e-06, |
| "loss": 0.0001, |
| "reward": 0.0, |
| "reward_std": 0.0, |
| "rewards/pot_combined_reward": 0.0, |
| "step": 6 |
| }, |
| { |
| "completion_length": 364.5, |
| "epoch": 0.028, |
| "grad_norm": 0.08007726073265076, |
| "kl": 0.0005026786457165144, |
| "learning_rate": 3e-06, |
| "loss": 0.0001, |
| "reward": 0.03146666660904884, |
| "reward_std": 0.06293333321809769, |
| "rewards/pot_combined_reward": 0.03146666660904884, |
| "step": 7 |
| }, |
| { |
| "completion_length": 374.0, |
| "epoch": 0.032, |
| "grad_norm": 0.0016093035228550434, |
| "kl": 0.0005015511706005782, |
| "learning_rate": 3.5e-06, |
| "loss": 0.0001, |
| "reward": 0.0, |
| "reward_std": 0.0, |
| "rewards/pot_combined_reward": 0.0, |
| "step": 8 |
| }, |
| { |
| "completion_length": 353.0625, |
| "epoch": 0.036, |
| "grad_norm": 0.001784446300007403, |
| "kl": 0.0003549655375536531, |
| "learning_rate": 4.000000000000001e-06, |
| "loss": 0.0, |
| "reward": 0.0, |
| "reward_std": 0.0, |
| "rewards/pot_combined_reward": 0.0, |
| "step": 9 |
| }, |
| { |
| "completion_length": 374.0, |
| "epoch": 0.04, |
| "grad_norm": 0.0020515809301286936, |
| "kl": 0.0005762250584666617, |
| "learning_rate": 4.5e-06, |
| "loss": 0.0001, |
| "reward": 0.0, |
| "reward_std": 0.0, |
| "rewards/pot_combined_reward": 0.0, |
| "step": 10 |
| }, |
| { |
| "completion_length": 374.0, |
| "epoch": 0.044, |
| "grad_norm": 0.06735244393348694, |
| "kl": 0.0005008808220736682, |
| "learning_rate": 5e-06, |
| "loss": 0.0001, |
| "reward": 0.01808333396911621, |
| "reward_std": 0.03616666793823242, |
| "rewards/pot_combined_reward": 0.01808333396911621, |
| "step": 11 |
| }, |
| { |
| "completion_length": 342.5, |
| "epoch": 0.048, |
| "grad_norm": 0.0024033007211983204, |
| "kl": 0.00046441886661341414, |
| "learning_rate": 4.99847706754774e-06, |
| "loss": 0.0, |
| "reward": 0.0, |
| "reward_std": 0.0, |
| "rewards/pot_combined_reward": 0.0, |
| "step": 12 |
| }, |
| { |
| "completion_length": 374.0, |
| "epoch": 0.052, |
| "grad_norm": 0.0014815045287832618, |
| "kl": 0.0004604290661518462, |
| "learning_rate": 4.993910125649561e-06, |
| "loss": 0.0, |
| "reward": 0.0, |
| "reward_std": 0.0, |
| "rewards/pot_combined_reward": 0.0, |
| "step": 13 |
| }, |
| { |
| "completion_length": 374.0, |
| "epoch": 0.056, |
| "grad_norm": 0.001548771746456623, |
| "kl": 0.0004989306180505082, |
| "learning_rate": 4.986304738420684e-06, |
| "loss": 0.0, |
| "reward": 0.0, |
| "reward_std": 0.0, |
| "rewards/pot_combined_reward": 0.0, |
| "step": 14 |
| }, |
| { |
| "completion_length": 360.8125, |
| "epoch": 0.06, |
| "grad_norm": 0.001822226564399898, |
| "kl": 0.00046228048086049967, |
| "learning_rate": 4.975670171853926e-06, |
| "loss": 0.0, |
| "reward": 0.0, |
| "reward_std": 0.0, |
| "rewards/pot_combined_reward": 0.0, |
| "step": 15 |
| }, |
| { |
| "completion_length": 374.0, |
| "epoch": 0.064, |
| "grad_norm": 0.09072617441415787, |
| "kl": 0.000476861278002616, |
| "learning_rate": 4.962019382530521e-06, |
| "loss": 0.0, |
| "reward": 0.012600000016391277, |
| "reward_std": 0.025200000032782555, |
| "rewards/pot_combined_reward": 0.012600000016391277, |
| "step": 16 |
| }, |
| { |
| "completion_length": 374.0, |
| "epoch": 0.068, |
| "grad_norm": 0.0015437575057148933, |
| "kl": 0.0005299622716847807, |
| "learning_rate": 4.9453690018345144e-06, |
| "loss": 0.0001, |
| "reward": 0.0, |
| "reward_std": 0.0, |
| "rewards/pot_combined_reward": 0.0, |
| "step": 17 |
| }, |
| { |
| "completion_length": 374.0, |
| "epoch": 0.072, |
| "grad_norm": 0.0016528957057744265, |
| "kl": 0.0005346549514797516, |
| "learning_rate": 4.925739315689991e-06, |
| "loss": 0.0001, |
| "reward": 0.0, |
| "reward_std": 0.0, |
| "rewards/pot_combined_reward": 0.0, |
| "step": 18 |
| }, |
| { |
| "completion_length": 374.0, |
| "epoch": 0.076, |
| "grad_norm": 0.0017183530144393444, |
| "kl": 0.0004841076224693097, |
| "learning_rate": 4.903154239845798e-06, |
| "loss": 0.0, |
| "reward": 0.0, |
| "reward_std": 0.0, |
| "rewards/pot_combined_reward": 0.0, |
| "step": 19 |
| }, |
| { |
| "completion_length": 373.5625, |
| "epoch": 0.08, |
| "grad_norm": 0.08476348221302032, |
| "kl": 0.000539450986252632, |
| "learning_rate": 4.8776412907378845e-06, |
| "loss": 0.0001, |
| "reward": 0.026249999180436134, |
| "reward_std": 0.05249999836087227, |
| "rewards/pot_combined_reward": 0.026249999180436134, |
| "step": 20 |
| }, |
| { |
| "completion_length": 374.0, |
| "epoch": 0.084, |
| "grad_norm": 0.07820821553468704, |
| "kl": 0.0005957721295999363, |
| "learning_rate": 4.849231551964771e-06, |
| "loss": 0.0001, |
| "reward": 0.07116249948740005, |
| "reward_std": 0.1423249989748001, |
| "rewards/pot_combined_reward": 0.07116249948740005, |
| "step": 21 |
| }, |
| { |
| "completion_length": 373.5625, |
| "epoch": 0.088, |
| "grad_norm": 0.0016008545644581318, |
| "kl": 0.000533243379322812, |
| "learning_rate": 4.817959636416969e-06, |
| "loss": 0.0001, |
| "reward": 0.0, |
| "reward_std": 0.0, |
| "rewards/pot_combined_reward": 0.0, |
| "step": 22 |
| }, |
| { |
| "completion_length": 374.0, |
| "epoch": 0.092, |
| "grad_norm": 0.0018651616992428899, |
| "kl": 0.0005620143201667815, |
| "learning_rate": 4.783863644106502e-06, |
| "loss": 0.0001, |
| "reward": 0.0, |
| "reward_std": 0.0, |
| "rewards/pot_combined_reward": 0.0, |
| "step": 23 |
| }, |
| { |
| "completion_length": 374.0, |
| "epoch": 0.096, |
| "grad_norm": 0.001895356923341751, |
| "kl": 0.00047788477240828797, |
| "learning_rate": 4.746985115747918e-06, |
| "loss": 0.0, |
| "reward": 0.0, |
| "reward_std": 0.0, |
| "rewards/pot_combined_reward": 0.0, |
| "step": 24 |
| }, |
| { |
| "completion_length": 369.75, |
| "epoch": 0.1, |
| "grad_norm": 0.11337540298700333, |
| "kl": 0.00047047801490407437, |
| "learning_rate": 4.707368982147318e-06, |
| "loss": 0.0, |
| "reward": 0.04736666567623615, |
| "reward_std": 0.0947333313524723, |
| "rewards/pot_combined_reward": 0.04736666567623615, |
| "step": 25 |
| }, |
| { |
| "completion_length": 368.625, |
| "epoch": 0.104, |
| "grad_norm": 0.01079186424612999, |
| "kl": 0.0006513141634059139, |
| "learning_rate": 4.665063509461098e-06, |
| "loss": 0.0001, |
| "reward": 0.0, |
| "reward_std": 0.0, |
| "rewards/pot_combined_reward": 0.0, |
| "step": 26 |
| }, |
| { |
| "completion_length": 374.0, |
| "epoch": 0.108, |
| "grad_norm": 0.0016566955018788576, |
| "kl": 0.0005965056043351069, |
| "learning_rate": 4.620120240391065e-06, |
| "loss": 0.0001, |
| "reward": 0.0, |
| "reward_std": 0.0, |
| "rewards/pot_combined_reward": 0.0, |
| "step": 27 |
| }, |
| { |
| "completion_length": 374.0, |
| "epoch": 0.112, |
| "grad_norm": 0.001792517607100308, |
| "kl": 0.0005393773099058308, |
| "learning_rate": 4.572593931387604e-06, |
| "loss": 0.0001, |
| "reward": 0.0, |
| "reward_std": 0.0, |
| "rewards/pot_combined_reward": 0.0, |
| "step": 28 |
| }, |
| { |
| "completion_length": 374.0, |
| "epoch": 0.116, |
| "grad_norm": 0.0017438618233427405, |
| "kl": 0.0005095232809253503, |
| "learning_rate": 4.522542485937369e-06, |
| "loss": 0.0001, |
| "reward": 0.0, |
| "reward_std": 0.0, |
| "rewards/pot_combined_reward": 0.0, |
| "step": 29 |
| }, |
| { |
| "completion_length": 374.0, |
| "epoch": 0.12, |
| "grad_norm": 0.0015684061218053102, |
| "kl": 0.00047055614413693547, |
| "learning_rate": 4.470026884016805e-06, |
| "loss": 0.0, |
| "reward": 0.0, |
| "reward_std": 0.0, |
| "rewards/pot_combined_reward": 0.0, |
| "step": 30 |
| }, |
| { |
| "completion_length": 374.0, |
| "epoch": 0.124, |
| "grad_norm": 0.0019608919974416494, |
| "kl": 0.0005961552087683231, |
| "learning_rate": 4.415111107797445e-06, |
| "loss": 0.0001, |
| "reward": 0.0, |
| "reward_std": 0.0, |
| "rewards/pot_combined_reward": 0.0, |
| "step": 31 |
| }, |
| { |
| "completion_length": 372.0625, |
| "epoch": 0.128, |
| "grad_norm": 0.0015237935585901141, |
| "kl": 0.0005326158570824191, |
| "learning_rate": 4.357862063693486e-06, |
| "loss": 0.0001, |
| "reward": 0.0, |
| "reward_std": 0.0, |
| "rewards/pot_combined_reward": 0.0, |
| "step": 32 |
| }, |
| { |
| "completion_length": 374.0, |
| "epoch": 0.132, |
| "grad_norm": 0.002203061943873763, |
| "kl": 0.0006071907628211193, |
| "learning_rate": 4.2983495008466285e-06, |
| "loss": 0.0001, |
| "reward": 0.0, |
| "reward_std": 0.0, |
| "rewards/pot_combined_reward": 0.0, |
| "step": 33 |
| }, |
| { |
| "completion_length": 370.5, |
| "epoch": 0.136, |
| "grad_norm": 0.10126212984323502, |
| "kl": 0.0006130525580374524, |
| "learning_rate": 4.236645926147493e-06, |
| "loss": 0.0001, |
| "reward": 0.061249999329447746, |
| "reward_std": 0.12249999865889549, |
| "rewards/pot_combined_reward": 0.061249999329447746, |
| "step": 34 |
| }, |
| { |
| "completion_length": 374.0, |
| "epoch": 0.14, |
| "grad_norm": 0.08621055632829666, |
| "kl": 0.0004927485424559563, |
| "learning_rate": 4.172826515897146e-06, |
| "loss": 0.0, |
| "reward": 0.024966666474938393, |
| "reward_std": 0.049933332949876785, |
| "rewards/pot_combined_reward": 0.024966666474938393, |
| "step": 35 |
| }, |
| { |
| "completion_length": 374.0, |
| "epoch": 0.144, |
| "grad_norm": 0.0017127083847299218, |
| "kl": 0.0005035524372942746, |
| "learning_rate": 4.106969024216348e-06, |
| "loss": 0.0001, |
| "reward": 0.0, |
| "reward_std": 0.0, |
| "rewards/pot_combined_reward": 0.0, |
| "step": 36 |
| }, |
| { |
| "completion_length": 374.0, |
| "epoch": 0.148, |
| "grad_norm": 0.0018064226023852825, |
| "kl": 0.0005530964990612119, |
| "learning_rate": 4.039153688314146e-06, |
| "loss": 0.0001, |
| "reward": 0.0, |
| "reward_std": 0.0, |
| "rewards/pot_combined_reward": 0.0, |
| "step": 37 |
| }, |
| { |
| "completion_length": 373.8125, |
| "epoch": 0.152, |
| "grad_norm": 0.0019408023217692971, |
| "kl": 0.0006417437689378858, |
| "learning_rate": 3.969463130731183e-06, |
| "loss": 0.0001, |
| "reward": 0.0, |
| "reward_std": 0.0, |
| "rewards/pot_combined_reward": 0.0, |
| "step": 38 |
| }, |
| { |
| "completion_length": 374.0, |
| "epoch": 0.156, |
| "grad_norm": 0.08421237021684647, |
| "kl": 0.0006241849769139662, |
| "learning_rate": 3.897982258676867e-06, |
| "loss": 0.0001, |
| "reward": 0.07303333282470703, |
| "reward_std": 0.057444244623184204, |
| "rewards/pot_combined_reward": 0.07303333282470703, |
| "step": 39 |
| }, |
| { |
| "completion_length": 374.0, |
| "epoch": 0.16, |
| "grad_norm": 0.08112610131502151, |
| "kl": 0.0005679467285517603, |
| "learning_rate": 3.824798160583012e-06, |
| "loss": 0.0001, |
| "reward": 0.014233333058655262, |
| "reward_std": 0.028466666117310524, |
| "rewards/pot_combined_reward": 0.014233333058655262, |
| "step": 40 |
| }, |
| { |
| "completion_length": 374.0, |
| "epoch": 0.164, |
| "grad_norm": 0.001669050194323063, |
| "kl": 0.0005617116403300315, |
| "learning_rate": 3.7500000000000005e-06, |
| "loss": 0.0001, |
| "reward": 0.0, |
| "reward_std": 0.0, |
| "rewards/pot_combined_reward": 0.0, |
| "step": 41 |
| }, |
| { |
| "completion_length": 374.0, |
| "epoch": 0.168, |
| "grad_norm": 0.002355287317186594, |
| "kl": 0.0006023006426403299, |
| "learning_rate": 3.6736789069647273e-06, |
| "loss": 0.0001, |
| "reward": 0.0, |
| "reward_std": 0.0, |
| "rewards/pot_combined_reward": 0.0, |
| "step": 42 |
| }, |
| { |
| "completion_length": 328.8125, |
| "epoch": 0.172, |
| "grad_norm": 0.0023054229095578194, |
| "kl": 0.0005200250307098031, |
| "learning_rate": 3.595927866972694e-06, |
| "loss": 0.0001, |
| "reward": 0.0, |
| "reward_std": 0.0, |
| "rewards/pot_combined_reward": 0.0, |
| "step": 43 |
| }, |
| { |
| "completion_length": 374.0, |
| "epoch": 0.176, |
| "grad_norm": 0.11641528457403183, |
| "kl": 0.0005873750924365595, |
| "learning_rate": 3.516841607689501e-06, |
| "loss": 0.0001, |
| "reward": 0.07468749955296516, |
| "reward_std": 0.14937499910593033, |
| "rewards/pot_combined_reward": 0.07468749955296516, |
| "step": 44 |
| }, |
| { |
| "completion_length": 374.0, |
| "epoch": 0.18, |
| "grad_norm": 0.001737726735882461, |
| "kl": 0.0005044558856752701, |
| "learning_rate": 3.436516483539781e-06, |
| "loss": 0.0001, |
| "reward": 0.0, |
| "reward_std": 0.0, |
| "rewards/pot_combined_reward": 0.0, |
| "step": 45 |
| }, |
| { |
| "completion_length": 370.9375, |
| "epoch": 0.184, |
| "grad_norm": 0.0019563438836485147, |
| "kl": 0.000592117925407365, |
| "learning_rate": 3.3550503583141726e-06, |
| "loss": 0.0001, |
| "reward": 0.0, |
| "reward_std": 0.0, |
| "rewards/pot_combined_reward": 0.0, |
| "step": 46 |
| }, |
| { |
| "completion_length": 372.4375, |
| "epoch": 0.188, |
| "grad_norm": 0.0026125519070774317, |
| "kl": 0.0005959889385849237, |
| "learning_rate": 3.272542485937369e-06, |
| "loss": 0.0001, |
| "reward": 0.0, |
| "reward_std": 0.0, |
| "rewards/pot_combined_reward": 0.0, |
| "step": 47 |
| }, |
| { |
| "completion_length": 372.75, |
| "epoch": 0.192, |
| "grad_norm": 0.0820600688457489, |
| "kl": 0.0005973696243017912, |
| "learning_rate": 3.189093389542498e-06, |
| "loss": 0.0001, |
| "reward": 0.03968749940395355, |
| "reward_std": 0.0793749988079071, |
| "rewards/pot_combined_reward": 0.03968749940395355, |
| "step": 48 |
| }, |
| { |
| "completion_length": 374.0, |
| "epoch": 0.196, |
| "grad_norm": 0.06782057881355286, |
| "kl": 0.0007923852826934308, |
| "learning_rate": 3.1048047389991693e-06, |
| "loss": 0.0001, |
| "reward": 0.026249999180436134, |
| "reward_std": 0.05249999836087227, |
| "rewards/pot_combined_reward": 0.026249999180436134, |
| "step": 49 |
| }, |
| { |
| "completion_length": 374.0, |
| "epoch": 0.2, |
| "grad_norm": 0.10189752280712128, |
| "kl": 0.0004997247888240963, |
| "learning_rate": 3.019779227044398e-06, |
| "loss": 0.0, |
| "reward": 0.06999999843537807, |
| "reward_std": 0.13999999687075615, |
| "rewards/pot_combined_reward": 0.06999999843537807, |
| "step": 50 |
| } |
| ], |
| "logging_steps": 1, |
| "max_steps": 100, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 1, |
| "save_steps": 50, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": false |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 0.0, |
| "train_batch_size": 4, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|