{ "best_metric": null, "best_model_checkpoint": null, "epoch": 0.9865470852017937, "eval_steps": 500, "global_step": 55, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.017937219730941704, "grad_norm": 0.6536183953285217, "learning_rate": 4.995922759815339e-05, "loss": 0.8371, "num_input_tokens_seen": 2097152, "step": 1 }, { "epoch": 0.03587443946188341, "grad_norm": 0.517680823802948, "learning_rate": 4.9837043383713753e-05, "loss": 0.7804, "num_input_tokens_seen": 4194304, "step": 2 }, { "epoch": 0.053811659192825115, "grad_norm": 0.4423481225967407, "learning_rate": 4.963384589619233e-05, "loss": 0.7695, "num_input_tokens_seen": 6291456, "step": 3 }, { "epoch": 0.07174887892376682, "grad_norm": 0.39828750491142273, "learning_rate": 4.935029792355834e-05, "loss": 0.7419, "num_input_tokens_seen": 8388608, "step": 4 }, { "epoch": 0.08968609865470852, "grad_norm": 0.31201115250587463, "learning_rate": 4.898732434036244e-05, "loss": 0.7166, "num_input_tokens_seen": 10485760, "step": 5 }, { "epoch": 0.10762331838565023, "grad_norm": 0.2536958158016205, "learning_rate": 4.854610909098812e-05, "loss": 0.7194, "num_input_tokens_seen": 12582912, "step": 6 }, { "epoch": 0.12556053811659193, "grad_norm": 0.2193588763475418, "learning_rate": 4.802809132787125e-05, "loss": 0.6975, "num_input_tokens_seen": 14680064, "step": 7 }, { "epoch": 0.14349775784753363, "grad_norm": 0.18916621804237366, "learning_rate": 4.743496071728396e-05, "loss": 0.7168, "num_input_tokens_seen": 16777216, "step": 8 }, { "epoch": 0.16143497757847533, "grad_norm": 0.1561172604560852, "learning_rate": 4.6768651927994434e-05, "loss": 0.6707, "num_input_tokens_seen": 18874368, "step": 9 }, { "epoch": 0.17937219730941703, "grad_norm": 0.12857139110565186, "learning_rate": 4.6031338320779534e-05, "loss": 0.6769, "num_input_tokens_seen": 20971520, "step": 10 }, { "epoch": 0.19730941704035873, "grad_norm": 0.11340289562940598, "learning_rate": 4.522542485937369e-05, "loss": 0.6873, "num_input_tokens_seen": 23068672, "step": 11 }, { "epoch": 0.21524663677130046, "grad_norm": 0.10658581554889679, "learning_rate": 4.4353540265977064e-05, "loss": 0.6643, "num_input_tokens_seen": 25165824, "step": 12 }, { "epoch": 0.23318385650224216, "grad_norm": 0.08937722444534302, "learning_rate": 4.341852844691012e-05, "loss": 0.6849, "num_input_tokens_seen": 27262976, "step": 13 }, { "epoch": 0.25112107623318386, "grad_norm": 0.07756289094686508, "learning_rate": 4.242343921638234e-05, "loss": 0.6461, "num_input_tokens_seen": 29360128, "step": 14 }, { "epoch": 0.26905829596412556, "grad_norm": 0.07581546157598495, "learning_rate": 4.137151834863213e-05, "loss": 0.6623, "num_input_tokens_seen": 31457280, "step": 15 }, { "epoch": 0.28699551569506726, "grad_norm": 0.07386067509651184, "learning_rate": 4.0266196990885955e-05, "loss": 0.6751, "num_input_tokens_seen": 33554432, "step": 16 }, { "epoch": 0.30493273542600896, "grad_norm": 0.06293580681085587, "learning_rate": 3.911108047166924e-05, "loss": 0.6472, "num_input_tokens_seen": 35651584, "step": 17 }, { "epoch": 0.32286995515695066, "grad_norm": 0.06199085712432861, "learning_rate": 3.790993654097405e-05, "loss": 0.6728, "num_input_tokens_seen": 37748736, "step": 18 }, { "epoch": 0.34080717488789236, "grad_norm": 0.060734592378139496, "learning_rate": 3.6666683080641846e-05, "loss": 0.7017, "num_input_tokens_seen": 39845888, "step": 19 }, { "epoch": 0.35874439461883406, "grad_norm": 0.05623164027929306, "learning_rate": 3.5385375325047166e-05, "loss": 0.6502, "num_input_tokens_seen": 41943040, "step": 20 }, { "epoch": 0.37668161434977576, "grad_norm": 0.0574677549302578, "learning_rate": 3.4070192633766025e-05, "loss": 0.6476, "num_input_tokens_seen": 44040192, "step": 21 }, { "epoch": 0.39461883408071746, "grad_norm": 0.05185185372829437, "learning_rate": 3.272542485937369e-05, "loss": 0.6411, "num_input_tokens_seen": 46137344, "step": 22 }, { "epoch": 0.4125560538116592, "grad_norm": 0.05139186978340149, "learning_rate": 3.135545835483718e-05, "loss": 0.6428, "num_input_tokens_seen": 48234496, "step": 23 }, { "epoch": 0.4304932735426009, "grad_norm": 0.050159115344285965, "learning_rate": 2.996476166614364e-05, "loss": 0.6661, "num_input_tokens_seen": 50331648, "step": 24 }, { "epoch": 0.4484304932735426, "grad_norm": 0.04851464927196503, "learning_rate": 2.8557870956832132e-05, "loss": 0.6378, "num_input_tokens_seen": 52428800, "step": 25 }, { "epoch": 0.4663677130044843, "grad_norm": 0.04896726831793785, "learning_rate": 2.7139375211970996e-05, "loss": 0.6532, "num_input_tokens_seen": 54525952, "step": 26 }, { "epoch": 0.484304932735426, "grad_norm": 0.04698600620031357, "learning_rate": 2.5713901269842404e-05, "loss": 0.6403, "num_input_tokens_seen": 56623104, "step": 27 }, { "epoch": 0.5022421524663677, "grad_norm": 0.048034097999334335, "learning_rate": 2.42860987301576e-05, "loss": 0.6248, "num_input_tokens_seen": 58720256, "step": 28 }, { "epoch": 0.5201793721973094, "grad_norm": 0.044828303158283234, "learning_rate": 2.2860624788029013e-05, "loss": 0.6583, "num_input_tokens_seen": 60817408, "step": 29 }, { "epoch": 0.5381165919282511, "grad_norm": 0.04563640430569649, "learning_rate": 2.1442129043167874e-05, "loss": 0.6579, "num_input_tokens_seen": 62914560, "step": 30 }, { "epoch": 0.5560538116591929, "grad_norm": 0.044318560510873795, "learning_rate": 2.003523833385637e-05, "loss": 0.6659, "num_input_tokens_seen": 65011712, "step": 31 }, { "epoch": 0.5739910313901345, "grad_norm": 0.04331167787313461, "learning_rate": 1.8644541645162834e-05, "loss": 0.6423, "num_input_tokens_seen": 67108864, "step": 32 }, { "epoch": 0.5919282511210763, "grad_norm": 0.04475367069244385, "learning_rate": 1.7274575140626318e-05, "loss": 0.6509, "num_input_tokens_seen": 69206016, "step": 33 }, { "epoch": 0.6098654708520179, "grad_norm": 0.045547887682914734, "learning_rate": 1.5929807366233977e-05, "loss": 0.6551, "num_input_tokens_seen": 71303168, "step": 34 }, { "epoch": 0.6278026905829597, "grad_norm": 0.043985530734062195, "learning_rate": 1.4614624674952842e-05, "loss": 0.6232, "num_input_tokens_seen": 73400320, "step": 35 }, { "epoch": 0.6457399103139013, "grad_norm": 0.0414094403386116, "learning_rate": 1.3333316919358157e-05, "loss": 0.6137, "num_input_tokens_seen": 75497472, "step": 36 }, { "epoch": 0.6636771300448431, "grad_norm": 0.041019294410943985, "learning_rate": 1.2090063459025955e-05, "loss": 0.6426, "num_input_tokens_seen": 77594624, "step": 37 }, { "epoch": 0.6816143497757847, "grad_norm": 0.04383592680096626, "learning_rate": 1.0888919528330777e-05, "loss": 0.6512, "num_input_tokens_seen": 79691776, "step": 38 }, { "epoch": 0.6995515695067265, "grad_norm": 0.040539514273405075, "learning_rate": 9.733803009114045e-06, "loss": 0.6269, "num_input_tokens_seen": 81788928, "step": 39 }, { "epoch": 0.7174887892376681, "grad_norm": 0.04238974675536156, "learning_rate": 8.628481651367876e-06, "loss": 0.6201, "num_input_tokens_seen": 83886080, "step": 40 }, { "epoch": 0.7354260089686099, "grad_norm": 0.04115669056773186, "learning_rate": 7.576560783617668e-06, "loss": 0.642, "num_input_tokens_seen": 85983232, "step": 41 }, { "epoch": 0.7533632286995515, "grad_norm": 0.04178008437156677, "learning_rate": 6.5814715530898745e-06, "loss": 0.648, "num_input_tokens_seen": 88080384, "step": 42 }, { "epoch": 0.7713004484304933, "grad_norm": 0.04329155012965202, "learning_rate": 5.646459734022938e-06, "loss": 0.6442, "num_input_tokens_seen": 90177536, "step": 43 }, { "epoch": 0.7892376681614349, "grad_norm": 0.043740272521972656, "learning_rate": 4.7745751406263165e-06, "loss": 0.6488, "num_input_tokens_seen": 92274688, "step": 44 }, { "epoch": 0.8071748878923767, "grad_norm": 0.04263562709093094, "learning_rate": 3.968661679220468e-06, "loss": 0.65, "num_input_tokens_seen": 94371840, "step": 45 }, { "epoch": 0.8251121076233184, "grad_norm": 0.041693028062582016, "learning_rate": 3.2313480720055745e-06, "loss": 0.6584, "num_input_tokens_seen": 96468992, "step": 46 }, { "epoch": 0.8430493273542601, "grad_norm": 0.04151754826307297, "learning_rate": 2.565039282716045e-06, "loss": 0.6392, "num_input_tokens_seen": 98566144, "step": 47 }, { "epoch": 0.8609865470852018, "grad_norm": 0.04260968416929245, "learning_rate": 1.97190867212875e-06, "loss": 0.6524, "num_input_tokens_seen": 100663296, "step": 48 }, { "epoch": 0.8789237668161435, "grad_norm": 0.04022514820098877, "learning_rate": 1.4538909090118846e-06, "loss": 0.6276, "num_input_tokens_seen": 102760448, "step": 49 }, { "epoch": 0.8968609865470852, "grad_norm": 0.039072513580322266, "learning_rate": 1.0126756596375686e-06, "loss": 0.6282, "num_input_tokens_seen": 104857600, "step": 50 }, { "epoch": 0.9147982062780269, "grad_norm": 0.03952722251415253, "learning_rate": 6.497020764416633e-07, "loss": 0.6344, "num_input_tokens_seen": 106954752, "step": 51 }, { "epoch": 0.9327354260089686, "grad_norm": 0.04045777767896652, "learning_rate": 3.6615410380767544e-07, "loss": 0.6464, "num_input_tokens_seen": 109051904, "step": 52 }, { "epoch": 0.9506726457399103, "grad_norm": 0.03984501212835312, "learning_rate": 1.6295661628624447e-07, "loss": 0.6253, "num_input_tokens_seen": 111149056, "step": 53 }, { "epoch": 0.968609865470852, "grad_norm": 0.040761884301900864, "learning_rate": 4.07724018466088e-08, "loss": 0.6375, "num_input_tokens_seen": 113246208, "step": 54 }, { "epoch": 0.9865470852017937, "grad_norm": 0.04142209142446518, "learning_rate": 0.0, "loss": 0.6419, "num_input_tokens_seen": 115343360, "step": 55 }, { "epoch": 0.9865470852017937, "num_input_tokens_seen": 115343360, "step": 55, "total_flos": 5.104238176512246e+18, "train_loss": 0.6637221011248502, "train_runtime": 9208.1472, "train_samples_per_second": 3.097, "train_steps_per_second": 0.006 } ], "logging_steps": 1.0, "max_steps": 55, "num_input_tokens_seen": 115343360, "num_train_epochs": 1, "save_steps": 500, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 5.104238176512246e+18, "train_batch_size": 16, "trial_name": null, "trial_params": null }